diff options
Diffstat (limited to 'llvm/test/CodeGen/X86')
| -rw-r--r-- | llvm/test/CodeGen/X86/isel-llvm.sincos.ll | 133 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/llvm.sincos.vec.ll | 404 |
2 files changed, 454 insertions, 83 deletions
diff --git a/llvm/test/CodeGen/X86/isel-llvm.sincos.ll b/llvm/test/CodeGen/X86/isel-llvm.sincos.ll index 065710f..8576f8f 100644 --- a/llvm/test/CodeGen/X86/isel-llvm.sincos.ll +++ b/llvm/test/CodeGen/X86/isel-llvm.sincos.ll @@ -3,6 +3,9 @@ ; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X64,FASTISEL-X64 ; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel=0 -fast-isel=0 | FileCheck %s --check-prefixes=X86,SDAG-X86 ; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel=0 -fast-isel=0 | FileCheck %s --check-prefixes=X64,SDAG-X64 +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9.0 -mcpu=core2 | FileCheck %s --check-prefix=MACOS-SINCOS-STRET +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core2 | FileCheck %s --check-prefix=MACOS-NOSINCOS-STRET + ; TODO: The below RUN line will fails GISEL selection and will fallback to DAG selection due to lack of support for loads/stores in i686 mode, support is expected soon enough, for this reason the llvm/test/CodeGen/X86/GlobalISel/llvm.sincos.mir test is added for now because of the lack of support for i686 in GlobalISel. ; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel=1 -global-isel-abort=2 | FileCheck %s --check-prefixes=GISEL-X86 ; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel=1 -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X64 @@ -34,6 +37,29 @@ define { float, float } @test_sincos_f32(float %Val) nounwind { ; X64-NEXT: popq %rax ; X64-NEXT: retq ; +; MACOS-SINCOS-STRET-LABEL: test_sincos_f32: +; MACOS-SINCOS-STRET: ## %bb.0: +; MACOS-SINCOS-STRET-NEXT: pushq %rax +; MACOS-SINCOS-STRET-NEXT: callq ___sincosf_stret +; MACOS-SINCOS-STRET-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; MACOS-SINCOS-STRET-NEXT: popq %rax +; MACOS-SINCOS-STRET-NEXT: retq +; +; MACOS-NOSINCOS-STRET-LABEL: test_sincos_f32: +; MACOS-NOSINCOS-STRET: ## %bb.0: +; MACOS-NOSINCOS-STRET-NEXT: pushq %rax +; MACOS-NOSINCOS-STRET-NEXT: movss %xmm0, (%rsp) ## 4-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: callq _sinf +; MACOS-NOSINCOS-STRET-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: movss (%rsp), %xmm0 ## 4-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: ## xmm0 = mem[0],zero,zero,zero +; MACOS-NOSINCOS-STRET-NEXT: callq _cosf +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, %xmm1 +; MACOS-NOSINCOS-STRET-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 4-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: ## xmm0 = mem[0],zero,zero,zero +; MACOS-NOSINCOS-STRET-NEXT: popq %rax +; MACOS-NOSINCOS-STRET-NEXT: retq +; ; GISEL-X86-LABEL: test_sincos_f32: ; GISEL-X86: # %bb.0: ; GISEL-X86-NEXT: subl $28, %esp @@ -93,6 +119,28 @@ define { double, double } @test_sincos_f64(double %Val) nounwind { ; X64-NEXT: addq $24, %rsp ; X64-NEXT: retq ; +; MACOS-SINCOS-STRET-LABEL: test_sincos_f64: +; MACOS-SINCOS-STRET: ## %bb.0: +; MACOS-SINCOS-STRET-NEXT: pushq %rax +; MACOS-SINCOS-STRET-NEXT: callq ___sincos_stret +; MACOS-SINCOS-STRET-NEXT: popq %rax +; MACOS-SINCOS-STRET-NEXT: retq +; +; MACOS-NOSINCOS-STRET-LABEL: test_sincos_f64: +; MACOS-NOSINCOS-STRET: ## %bb.0: +; MACOS-NOSINCOS-STRET-NEXT: subq $24, %rsp +; MACOS-NOSINCOS-STRET-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: callq _sin +; MACOS-NOSINCOS-STRET-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 8-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: ## xmm0 = mem[0],zero +; MACOS-NOSINCOS-STRET-NEXT: callq _cos +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, %xmm1 +; MACOS-NOSINCOS-STRET-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 8-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: ## xmm0 = mem[0],zero +; MACOS-NOSINCOS-STRET-NEXT: addq $24, %rsp +; MACOS-NOSINCOS-STRET-NEXT: retq +; ; GISEL-X86-LABEL: test_sincos_f64: ; GISEL-X86: # %bb.0: ; GISEL-X86-NEXT: subl $44, %esp @@ -153,6 +201,40 @@ define { x86_fp80, x86_fp80 } @test_sincos_f80(x86_fp80 %Val) nounwind { ; X64-NEXT: addq $56, %rsp ; X64-NEXT: retq ; +; MACOS-SINCOS-STRET-LABEL: test_sincos_f80: +; MACOS-SINCOS-STRET: ## %bb.0: +; MACOS-SINCOS-STRET-NEXT: subq $40, %rsp +; MACOS-SINCOS-STRET-NEXT: fldt {{[0-9]+}}(%rsp) +; MACOS-SINCOS-STRET-NEXT: fld %st(0) +; MACOS-SINCOS-STRET-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) ## 10-byte Folded Spill +; MACOS-SINCOS-STRET-NEXT: fstpt (%rsp) +; MACOS-SINCOS-STRET-NEXT: callq _cosl +; MACOS-SINCOS-STRET-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) ## 10-byte Folded Spill +; MACOS-SINCOS-STRET-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) ## 10-byte Folded Reload +; MACOS-SINCOS-STRET-NEXT: fstpt (%rsp) +; MACOS-SINCOS-STRET-NEXT: callq _sinl +; MACOS-SINCOS-STRET-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) ## 10-byte Folded Reload +; MACOS-SINCOS-STRET-NEXT: fxch %st(1) +; MACOS-SINCOS-STRET-NEXT: addq $40, %rsp +; MACOS-SINCOS-STRET-NEXT: retq +; +; MACOS-NOSINCOS-STRET-LABEL: test_sincos_f80: +; MACOS-NOSINCOS-STRET: ## %bb.0: +; MACOS-NOSINCOS-STRET-NEXT: subq $40, %rsp +; MACOS-NOSINCOS-STRET-NEXT: fldt {{[0-9]+}}(%rsp) +; MACOS-NOSINCOS-STRET-NEXT: fld %st(0) +; MACOS-NOSINCOS-STRET-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) ## 10-byte Folded Spill +; MACOS-NOSINCOS-STRET-NEXT: fstpt (%rsp) +; MACOS-NOSINCOS-STRET-NEXT: callq _cosl +; MACOS-NOSINCOS-STRET-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) ## 10-byte Folded Spill +; MACOS-NOSINCOS-STRET-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) ## 10-byte Folded Reload +; MACOS-NOSINCOS-STRET-NEXT: fstpt (%rsp) +; MACOS-NOSINCOS-STRET-NEXT: callq _sinl +; MACOS-NOSINCOS-STRET-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) ## 10-byte Folded Reload +; MACOS-NOSINCOS-STRET-NEXT: fxch %st(1) +; MACOS-NOSINCOS-STRET-NEXT: addq $40, %rsp +; MACOS-NOSINCOS-STRET-NEXT: retq +; ; GISEL-X86-LABEL: test_sincos_f80: ; GISEL-X86: # %bb.0: ; GISEL-X86-NEXT: subl $60, %esp @@ -288,6 +370,57 @@ define void @can_fold_with_call_in_chain(float %x, ptr noalias %a, ptr noalias % ; SDAG-X64-NEXT: popq %r14 ; SDAG-X64-NEXT: retq ; +; MACOS-SINCOS-STRET-LABEL: can_fold_with_call_in_chain: +; MACOS-SINCOS-STRET: ## %bb.0: ## %entry +; MACOS-SINCOS-STRET-NEXT: pushq %r14 +; MACOS-SINCOS-STRET-NEXT: pushq %rbx +; MACOS-SINCOS-STRET-NEXT: subq $40, %rsp +; MACOS-SINCOS-STRET-NEXT: movq %rsi, %rbx +; MACOS-SINCOS-STRET-NEXT: movq %rdi, %r14 +; MACOS-SINCOS-STRET-NEXT: callq ___sincosf_stret +; MACOS-SINCOS-STRET-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; MACOS-SINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT: movq %r14, %rdi +; MACOS-SINCOS-STRET-NEXT: movq %rbx, %rsi +; MACOS-SINCOS-STRET-NEXT: callq _foo +; MACOS-SINCOS-STRET-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload +; MACOS-SINCOS-STRET-NEXT: movss %xmm0, (%r14) +; MACOS-SINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; MACOS-SINCOS-STRET-NEXT: movss %xmm0, (%rbx) +; MACOS-SINCOS-STRET-NEXT: addq $40, %rsp +; MACOS-SINCOS-STRET-NEXT: popq %rbx +; MACOS-SINCOS-STRET-NEXT: popq %r14 +; MACOS-SINCOS-STRET-NEXT: retq +; +; MACOS-NOSINCOS-STRET-LABEL: can_fold_with_call_in_chain: +; MACOS-NOSINCOS-STRET: ## %bb.0: ## %entry +; MACOS-NOSINCOS-STRET-NEXT: pushq %r14 +; MACOS-NOSINCOS-STRET-NEXT: pushq %rbx +; MACOS-NOSINCOS-STRET-NEXT: pushq %rax +; MACOS-NOSINCOS-STRET-NEXT: movq %rsi, %rbx +; MACOS-NOSINCOS-STRET-NEXT: movq %rdi, %r14 +; MACOS-NOSINCOS-STRET-NEXT: movss %xmm0, (%rsp) ## 4-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: callq _sinf +; MACOS-NOSINCOS-STRET-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: movss (%rsp), %xmm0 ## 4-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: ## xmm0 = mem[0],zero,zero,zero +; MACOS-NOSINCOS-STRET-NEXT: callq _cosf +; MACOS-NOSINCOS-STRET-NEXT: movss %xmm0, (%rsp) ## 4-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: movq %r14, %rdi +; MACOS-NOSINCOS-STRET-NEXT: movq %rbx, %rsi +; MACOS-NOSINCOS-STRET-NEXT: callq _foo +; MACOS-NOSINCOS-STRET-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 4-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: ## xmm0 = mem[0],zero,zero,zero +; MACOS-NOSINCOS-STRET-NEXT: movss %xmm0, (%r14) +; MACOS-NOSINCOS-STRET-NEXT: movss (%rsp), %xmm0 ## 4-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: ## xmm0 = mem[0],zero,zero,zero +; MACOS-NOSINCOS-STRET-NEXT: movss %xmm0, (%rbx) +; MACOS-NOSINCOS-STRET-NEXT: addq $8, %rsp +; MACOS-NOSINCOS-STRET-NEXT: popq %rbx +; MACOS-NOSINCOS-STRET-NEXT: popq %r14 +; MACOS-NOSINCOS-STRET-NEXT: retq +; ; GISEL-X86-LABEL: can_fold_with_call_in_chain: ; GISEL-X86: # %bb.0: # %entry ; GISEL-X86-NEXT: pushl %ebx diff --git a/llvm/test/CodeGen/X86/llvm.sincos.vec.ll b/llvm/test/CodeGen/X86/llvm.sincos.vec.ll index 834dd78..9b02438 100644 --- a/llvm/test/CodeGen/X86/llvm.sincos.vec.ll +++ b/llvm/test/CodeGen/X86/llvm.sincos.vec.ll @@ -1,59 +1,213 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp --version 5 -; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck -check-prefix=X86 %s +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck -check-prefix=X64 %s +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9.0 | FileCheck --check-prefix=MACOS-SINCOS-STRET %s +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 | FileCheck --check-prefix=MACOS-NOSINCOS-STRET %s define void @test_sincos_v4f32(<4 x float> %x, ptr noalias %out_sin, ptr noalias %out_cos) nounwind { -; CHECK-LABEL: test_sincos_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: pushl %edi -; CHECK-NEXT: pushl %esi -; CHECK-NEXT: subl $52, %esp -; CHECK-NEXT: movl 84(%esp), %esi -; CHECK-NEXT: flds 76(%esp) -; CHECK-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; CHECK-NEXT: flds 64(%esp) -; CHECK-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; CHECK-NEXT: flds 72(%esp) -; CHECK-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; CHECK-NEXT: flds 68(%esp) -; CHECK-NEXT: movl 80(%esp), %edi -; CHECK-NEXT: leal 40(%esp), %eax -; CHECK-NEXT: movl %eax, 8(%esp) -; CHECK-NEXT: leal 4(%edi), %eax -; CHECK-NEXT: movl %eax, 4(%esp) -; CHECK-NEXT: fstps (%esp) -; CHECK-NEXT: calll sincosf -; CHECK-NEXT: leal 44(%esp), %eax -; CHECK-NEXT: movl %eax, 8(%esp) -; CHECK-NEXT: leal 8(%edi), %eax -; CHECK-NEXT: movl %eax, 4(%esp) -; CHECK-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; CHECK-NEXT: fstps (%esp) -; CHECK-NEXT: calll sincosf -; CHECK-NEXT: leal 36(%esp), %eax -; CHECK-NEXT: movl %eax, 8(%esp) -; CHECK-NEXT: movl %edi, 4(%esp) -; CHECK-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; CHECK-NEXT: fstps (%esp) -; CHECK-NEXT: calll sincosf -; CHECK-NEXT: leal 48(%esp), %eax -; CHECK-NEXT: movl %eax, 8(%esp) -; CHECK-NEXT: addl $12, %edi -; CHECK-NEXT: movl %edi, 4(%esp) -; CHECK-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; CHECK-NEXT: fstps (%esp) -; CHECK-NEXT: calll sincosf -; CHECK-NEXT: flds 36(%esp) -; CHECK-NEXT: flds 40(%esp) -; CHECK-NEXT: flds 44(%esp) -; CHECK-NEXT: flds 48(%esp) -; CHECK-NEXT: fstps 12(%esi) -; CHECK-NEXT: fstps 8(%esi) -; CHECK-NEXT: fstps 4(%esi) -; CHECK-NEXT: fstps (%esi) -; CHECK-NEXT: addl $52, %esp -; CHECK-NEXT: popl %esi -; CHECK-NEXT: popl %edi -; CHECK-NEXT: retl +; X86-LABEL: test_sincos_v4f32: +; X86: # %bb.0: +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $52, %esp +; X86-NEXT: movl 84(%esp), %esi +; X86-NEXT: flds 76(%esp) +; X86-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: flds 64(%esp) +; X86-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: flds 72(%esp) +; X86-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: flds 68(%esp) +; X86-NEXT: movl 80(%esp), %edi +; X86-NEXT: leal 40(%esp), %eax +; X86-NEXT: movl %eax, 8(%esp) +; X86-NEXT: leal 4(%edi), %eax +; X86-NEXT: movl %eax, 4(%esp) +; X86-NEXT: fstps (%esp) +; X86-NEXT: calll sincosf +; X86-NEXT: leal 44(%esp), %eax +; X86-NEXT: movl %eax, 8(%esp) +; X86-NEXT: leal 8(%edi), %eax +; X86-NEXT: movl %eax, 4(%esp) +; X86-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: fstps (%esp) +; X86-NEXT: calll sincosf +; X86-NEXT: leal 36(%esp), %eax +; X86-NEXT: movl %eax, 8(%esp) +; X86-NEXT: movl %edi, 4(%esp) +; X86-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: fstps (%esp) +; X86-NEXT: calll sincosf +; X86-NEXT: leal 48(%esp), %eax +; X86-NEXT: movl %eax, 8(%esp) +; X86-NEXT: addl $12, %edi +; X86-NEXT: movl %edi, 4(%esp) +; X86-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: fstps (%esp) +; X86-NEXT: calll sincosf +; X86-NEXT: flds 36(%esp) +; X86-NEXT: flds 40(%esp) +; X86-NEXT: flds 44(%esp) +; X86-NEXT: flds 48(%esp) +; X86-NEXT: fstps 12(%esi) +; X86-NEXT: fstps 8(%esi) +; X86-NEXT: fstps 4(%esi) +; X86-NEXT: fstps (%esi) +; X86-NEXT: addl $52, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl +; +; X64-LABEL: test_sincos_v4f32: +; X64: # %bb.0: +; X64-NEXT: pushq %r14 +; X64-NEXT: pushq %rbx +; X64-NEXT: subq $56, %rsp +; X64-NEXT: movq %rsi, %rbx +; X64-NEXT: movq %rdi, %r14 +; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; X64-NEXT: leaq 4(%rsp), %rdi +; X64-NEXT: movq %rsp, %rsi +; X64-NEXT: callq sincosf@PLT +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; X64-NEXT: leaq 12(%rsp), %rdi +; X64-NEXT: leaq 8(%rsp), %rsi +; X64-NEXT: callq sincosf@PLT +; X64-NEXT: leaq 28(%rsp), %rdi +; X64-NEXT: leaq 24(%rsp), %rsi +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: callq sincosf@PLT +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X64-NEXT: leaq 20(%rsp), %rdi +; X64-NEXT: leaq 16(%rsp), %rsi +; X64-NEXT: callq sincosf@PLT +; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] +; X64-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; X64-NEXT: movups %xmm1, (%r14) +; X64-NEXT: movups %xmm0, (%rbx) +; X64-NEXT: addq $56, %rsp +; X64-NEXT: popq %rbx +; X64-NEXT: popq %r14 +; X64-NEXT: retq +; +; MACOS-SINCOS-STRET-LABEL: test_sincos_v4f32: +; MACOS-SINCOS-STRET: ## %bb.0: +; MACOS-SINCOS-STRET-NEXT: pushq %r14 +; MACOS-SINCOS-STRET-NEXT: pushq %rbx +; MACOS-SINCOS-STRET-NEXT: subq $104, %rsp +; MACOS-SINCOS-STRET-NEXT: movq %rsi, %rbx +; MACOS-SINCOS-STRET-NEXT: movq %rdi, %r14 +; MACOS-SINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; MACOS-SINCOS-STRET-NEXT: callq ___sincosf_stret +; MACOS-SINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; MACOS-SINCOS-STRET-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; MACOS-SINCOS-STRET-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; MACOS-SINCOS-STRET-NEXT: callq ___sincosf_stret +; MACOS-SINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; MACOS-SINCOS-STRET-NEXT: unpcklps (%rsp), %xmm0 ## 16-byte Folded Reload +; MACOS-SINCOS-STRET-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; MACOS-SINCOS-STRET-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; MACOS-SINCOS-STRET-NEXT: callq ___sincosf_stret +; MACOS-SINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT: movaps %xmm0, %xmm1 +; MACOS-SINCOS-STRET-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] +; MACOS-SINCOS-STRET-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; MACOS-SINCOS-STRET-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; MACOS-SINCOS-STRET-NEXT: callq ___sincosf_stret +; MACOS-SINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload +; MACOS-SINCOS-STRET-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; MACOS-SINCOS-STRET-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; MACOS-SINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload +; MACOS-SINCOS-STRET-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] +; MACOS-SINCOS-STRET-NEXT: unpcklpd (%rsp), %xmm2 ## 16-byte Folded Reload +; MACOS-SINCOS-STRET-NEXT: ## xmm2 = xmm2[0],mem[0] +; MACOS-SINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; MACOS-SINCOS-STRET-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload +; MACOS-SINCOS-STRET-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; MACOS-SINCOS-STRET-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; MACOS-SINCOS-STRET-NEXT: movups %xmm1, (%r14) +; MACOS-SINCOS-STRET-NEXT: movups %xmm2, (%rbx) +; MACOS-SINCOS-STRET-NEXT: addq $104, %rsp +; MACOS-SINCOS-STRET-NEXT: popq %rbx +; MACOS-SINCOS-STRET-NEXT: popq %r14 +; MACOS-SINCOS-STRET-NEXT: retq +; +; MACOS-NOSINCOS-STRET-LABEL: test_sincos_v4f32: +; MACOS-NOSINCOS-STRET: ## %bb.0: +; MACOS-NOSINCOS-STRET-NEXT: pushq %r14 +; MACOS-NOSINCOS-STRET-NEXT: pushq %rbx +; MACOS-NOSINCOS-STRET-NEXT: subq $104, %rsp +; MACOS-NOSINCOS-STRET-NEXT: movq %rsi, %rbx +; MACOS-NOSINCOS-STRET-NEXT: movq %rdi, %r14 +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: callq _cosf +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: callq _cosf +; MACOS-NOSINCOS-STRET-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload +; MACOS-NOSINCOS-STRET-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: callq _cosf +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: callq _cosf +; MACOS-NOSINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; MACOS-NOSINCOS-STRET-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Folded Reload +; MACOS-NOSINCOS-STRET-NEXT: ## xmm1 = xmm1[0],mem[0] +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: callq _sinf +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: callq _sinf +; MACOS-NOSINCOS-STRET-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload +; MACOS-NOSINCOS-STRET-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: callq _sinf +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: callq _sinf +; MACOS-NOSINCOS-STRET-NEXT: movaps (%rsp), %xmm1 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; MACOS-NOSINCOS-STRET-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Folded Reload +; MACOS-NOSINCOS-STRET-NEXT: ## xmm1 = xmm1[0],mem[0] +; MACOS-NOSINCOS-STRET-NEXT: movups %xmm1, (%r14) +; MACOS-NOSINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: movups %xmm0, (%rbx) +; MACOS-NOSINCOS-STRET-NEXT: addq $104, %rsp +; MACOS-NOSINCOS-STRET-NEXT: popq %rbx +; MACOS-NOSINCOS-STRET-NEXT: popq %r14 +; MACOS-NOSINCOS-STRET-NEXT: retq %result = call { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> %x) %result.0 = extractvalue { <4 x float>, <4 x float> } %result, 0 %result.1 = extractvalue { <4 x float>, <4 x float> } %result, 1 @@ -63,36 +217,120 @@ define void @test_sincos_v4f32(<4 x float> %x, ptr noalias %out_sin, ptr noalias } define void @test_sincos_v2f64(<2 x double> %x, ptr noalias %out_sin, ptr noalias %out_cos) nounwind { -; CHECK-LABEL: test_sincos_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: pushl %edi -; CHECK-NEXT: pushl %esi -; CHECK-NEXT: subl $52, %esp -; CHECK-NEXT: movl 84(%esp), %esi -; CHECK-NEXT: fldl 72(%esp) -; CHECK-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill -; CHECK-NEXT: fldl 64(%esp) -; CHECK-NEXT: movl 80(%esp), %edi -; CHECK-NEXT: leal 24(%esp), %eax -; CHECK-NEXT: movl %eax, 12(%esp) -; CHECK-NEXT: movl %edi, 8(%esp) -; CHECK-NEXT: fstpl (%esp) -; CHECK-NEXT: calll sincos -; CHECK-NEXT: leal 32(%esp), %eax -; CHECK-NEXT: movl %eax, 12(%esp) -; CHECK-NEXT: addl $8, %edi -; CHECK-NEXT: movl %edi, 8(%esp) -; CHECK-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload -; CHECK-NEXT: fstpl (%esp) -; CHECK-NEXT: calll sincos -; CHECK-NEXT: fldl 24(%esp) -; CHECK-NEXT: fldl 32(%esp) -; CHECK-NEXT: fstpl 8(%esi) -; CHECK-NEXT: fstpl (%esi) -; CHECK-NEXT: addl $52, %esp -; CHECK-NEXT: popl %esi -; CHECK-NEXT: popl %edi -; CHECK-NEXT: retl +; X86-LABEL: test_sincos_v2f64: +; X86: # %bb.0: +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $52, %esp +; X86-NEXT: movl 84(%esp), %esi +; X86-NEXT: fldl 72(%esp) +; X86-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill +; X86-NEXT: fldl 64(%esp) +; X86-NEXT: movl 80(%esp), %edi +; X86-NEXT: leal 24(%esp), %eax +; X86-NEXT: movl %eax, 12(%esp) +; X86-NEXT: movl %edi, 8(%esp) +; X86-NEXT: fstpl (%esp) +; X86-NEXT: calll sincos +; X86-NEXT: leal 32(%esp), %eax +; X86-NEXT: movl %eax, 12(%esp) +; X86-NEXT: addl $8, %edi +; X86-NEXT: movl %edi, 8(%esp) +; X86-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload +; X86-NEXT: fstpl (%esp) +; X86-NEXT: calll sincos +; X86-NEXT: fldl 24(%esp) +; X86-NEXT: fldl 32(%esp) +; X86-NEXT: fstpl 8(%esi) +; X86-NEXT: fstpl (%esi) +; X86-NEXT: addl $52, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl +; +; X64-LABEL: test_sincos_v2f64: +; X64: # %bb.0: +; X64-NEXT: pushq %r14 +; X64-NEXT: pushq %rbx +; X64-NEXT: subq $56, %rsp +; X64-NEXT: movq %rsi, %rbx +; X64-NEXT: movq %rdi, %r14 +; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT: leaq 24(%rsp), %rdi +; X64-NEXT: leaq 16(%rsp), %rsi +; X64-NEXT: callq sincos@PLT +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; X64-NEXT: leaq 8(%rsp), %rdi +; X64-NEXT: movq %rsp, %rsi +; X64-NEXT: callq sincos@PLT +; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] +; X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; X64-NEXT: movhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] +; X64-NEXT: movups %xmm1, (%r14) +; X64-NEXT: movups %xmm0, (%rbx) +; X64-NEXT: addq $56, %rsp +; X64-NEXT: popq %rbx +; X64-NEXT: popq %r14 +; X64-NEXT: retq +; +; MACOS-SINCOS-STRET-LABEL: test_sincos_v2f64: +; MACOS-SINCOS-STRET: ## %bb.0: +; MACOS-SINCOS-STRET-NEXT: pushq %r14 +; MACOS-SINCOS-STRET-NEXT: pushq %rbx +; MACOS-SINCOS-STRET-NEXT: subq $56, %rsp +; MACOS-SINCOS-STRET-NEXT: movq %rsi, %rbx +; MACOS-SINCOS-STRET-NEXT: movq %rdi, %r14 +; MACOS-SINCOS-STRET-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT: callq ___sincos_stret +; MACOS-SINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload +; MACOS-SINCOS-STRET-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; MACOS-SINCOS-STRET-NEXT: callq ___sincos_stret +; MACOS-SINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload +; MACOS-SINCOS-STRET-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm1[0] +; MACOS-SINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload +; MACOS-SINCOS-STRET-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; MACOS-SINCOS-STRET-NEXT: movups %xmm1, (%r14) +; MACOS-SINCOS-STRET-NEXT: movups %xmm2, (%rbx) +; MACOS-SINCOS-STRET-NEXT: addq $56, %rsp +; MACOS-SINCOS-STRET-NEXT: popq %rbx +; MACOS-SINCOS-STRET-NEXT: popq %r14 +; MACOS-SINCOS-STRET-NEXT: retq +; +; MACOS-NOSINCOS-STRET-LABEL: test_sincos_v2f64: +; MACOS-NOSINCOS-STRET: ## %bb.0: +; MACOS-NOSINCOS-STRET-NEXT: pushq %r14 +; MACOS-NOSINCOS-STRET-NEXT: pushq %rbx +; MACOS-NOSINCOS-STRET-NEXT: subq $56, %rsp +; MACOS-NOSINCOS-STRET-NEXT: movq %rsi, %rbx +; MACOS-NOSINCOS-STRET-NEXT: movq %rdi, %r14 +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: callq _cos +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: callq _cos +; MACOS-NOSINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: callq _sin +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: callq _sin +; MACOS-NOSINCOS-STRET-NEXT: movaps (%rsp), %xmm1 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; MACOS-NOSINCOS-STRET-NEXT: movups %xmm1, (%r14) +; MACOS-NOSINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: movups %xmm0, (%rbx) +; MACOS-NOSINCOS-STRET-NEXT: addq $56, %rsp +; MACOS-NOSINCOS-STRET-NEXT: popq %rbx +; MACOS-NOSINCOS-STRET-NEXT: popq %r14 +; MACOS-NOSINCOS-STRET-NEXT: retq %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %x) %result.0 = extractvalue { <2 x double>, <2 x double> } %result, 0 %result.1 = extractvalue { <2 x double>, <2 x double> } %result, 1 |
