diff options
Diffstat (limited to 'llvm/test/CodeGen/X86')
97 files changed, 10221 insertions, 4114 deletions
diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-memop-scalar-32.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-memop-scalar-32.mir index ba72c4f..bbb09c6 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-memop-scalar-32.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-memop-scalar-32.mir @@ -10,18 +10,18 @@ body: | bb.0: ; X32-LABEL: name: test_memop_s8tos32 ; X32: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF - ; X32: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p0) :: (load (s1)) - ; X32: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p0) :: (load (s8)) - ; X32: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p0) :: (load (s16)) - ; X32: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p0) :: (load (s32)) - ; X32: [[LOAD4:%[0-9]+]]:_(p0) = G_LOAD [[DEF]](p0) :: (load (p0)) - ; X32: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 - ; X32: [[AND:%[0-9]+]]:_(s8) = G_AND [[LOAD]], [[C]] - ; X32: G_STORE [[AND]](s8), [[DEF]](p0) :: (store (s8)) - ; X32: G_STORE [[LOAD1]](s8), [[DEF]](p0) :: (store (s8)) - ; X32: G_STORE [[LOAD2]](s16), [[DEF]](p0) :: (store (s16)) - ; X32: G_STORE [[LOAD3]](s32), [[DEF]](p0) :: (store (s32)) - ; X32: G_STORE [[LOAD4]](p0), [[DEF]](p0) :: (store (p0)) + ; X32-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p0) :: (load (s1)) + ; X32-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p0) :: (load (s8)) + ; X32-NEXT: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p0) :: (load (s16)) + ; X32-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p0) :: (load (s32)) + ; X32-NEXT: [[LOAD4:%[0-9]+]]:_(p0) = G_LOAD [[DEF]](p0) :: (load (p0)) + ; X32-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 + ; X32-NEXT: [[AND:%[0-9]+]]:_(s8) = G_AND [[LOAD]], [[C]] + ; X32-NEXT: G_STORE [[AND]](s8), [[DEF]](p0) :: (store (s8)) + ; X32-NEXT: G_STORE [[LOAD1]](s8), [[DEF]](p0) :: (store (s8)) + ; X32-NEXT: G_STORE [[LOAD2]](s16), [[DEF]](p0) :: (store (s16)) + ; X32-NEXT: G_STORE [[LOAD3]](s32), [[DEF]](p0) :: (store (s32)) + ; X32-NEXT: G_STORE [[LOAD4]](p0), [[DEF]](p0) :: (store (p0)) %0:_(p0) = IMPLICIT_DEF %9:_(s1) = G_LOAD %0 :: (load (s1)) %1:_(s8) = G_LOAD %0 :: (load (s8)) @@ -46,13 +46,13 @@ body: | ; X32-LABEL: name: test_memop_s64 ; X32: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF - ; X32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p0) :: (load (s32), align 8) - ; X32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; X32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s32) - ; X32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; X32: G_STORE [[LOAD]](s32), [[DEF]](p0) :: (store (s32), align 8) - ; X32: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s32) - ; X32: G_STORE [[LOAD1]](s32), [[PTR_ADD1]](p0) :: (store (s32) into unknown-address + 4) + ; X32-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p0) :: (load (s32), align 8) + ; X32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; X32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[DEF]], [[C]](s32) + ; X32-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) + ; X32-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p0) :: (store (s32), align 8) + ; X32-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[DEF]], [[C]](s32) + ; X32-NEXT: G_STORE [[LOAD1]](s32), [[PTR_ADD1]](p0) :: (store (s32) into unknown-address + 4) %0:_(p0) = IMPLICIT_DEF %1:_(s64) = G_LOAD %0 :: (load (s64)) diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-undef.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-undef.mir index 8711d84..b16fe3e 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-undef.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-undef.mir @@ -21,6 +21,7 @@ body: | ; X64-NEXT: G_STORE [[DEF3]](s32), [[DEF]](p0) :: (store (s32)) ; X64-NEXT: [[DEF4:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; X64-NEXT: G_STORE [[DEF4]](s64), [[DEF]](p0) :: (store (s64)) + ; ; X32-LABEL: name: test_implicit_def ; X32: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF ; X32-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 @@ -35,7 +36,7 @@ body: | ; X32-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF4]](s64) ; X32-NEXT: G_STORE [[UV]](s32), [[DEF]](p0) :: (store (s32), align 8) ; X32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; X32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C1]](s32) + ; X32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[DEF]], [[C1]](s32) ; X32-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD]](p0) :: (store (s32) into unknown-address + 4) %5:_(p0) = G_IMPLICIT_DEF %0:_(s1) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-x87.ll b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-x87.ll index 99d458a..83c319b 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-x87.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-x87.ll @@ -164,12 +164,12 @@ define void @f5(ptr %a, ptr %b) { ; X86-NEXT: [[LOAD1:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (p0) from %fixed-stack.0) ; X86-NEXT: [[LOAD2:%[0-9]+]]:gpr(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.a, align 8) ; X86-NEXT: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 4 - ; X86-NEXT: [[PTR_ADD:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[LOAD]], [[C]](s32) + ; X86-NEXT: [[PTR_ADD:%[0-9]+]]:gpr(p0) = nuw inbounds G_PTR_ADD [[LOAD]], [[C]](s32) ; X86-NEXT: [[COPY:%[0-9]+]]:gpr(p0) = COPY [[PTR_ADD]](p0) ; X86-NEXT: [[LOAD3:%[0-9]+]]:gpr(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.a + 4, basealign 8) ; X86-NEXT: [[MV:%[0-9]+]]:gpr(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; X86-NEXT: [[LOAD4:%[0-9]+]]:gpr(s32) = G_LOAD [[LOAD1]](p0) :: (load (s32) from %ir.b, align 8) - ; X86-NEXT: [[PTR_ADD1:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[LOAD1]], [[C]](s32) + ; X86-NEXT: [[PTR_ADD1:%[0-9]+]]:gpr(p0) = nuw inbounds G_PTR_ADD [[LOAD1]], [[C]](s32) ; X86-NEXT: [[LOAD5:%[0-9]+]]:gpr(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from %ir.b + 4, basealign 8) ; X86-NEXT: [[MV1:%[0-9]+]]:gpr(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) ; X86-NEXT: [[COPY1:%[0-9]+]]:psr(s64) = COPY [[MV]](s64) diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll b/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll index 171ccb2..2f1f8bc 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll @@ -77,12 +77,12 @@ define { double, double } @test_return_d2(double %d.coerce0, double %d.coerce1) ; ALL-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.d ; ALL-NEXT: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.1) ; ALL-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; ALL-NEXT: %5:_(p0) = nuw nusw G_PTR_ADD [[FRAME_INDEX1]], [[C1]](s64) - ; ALL-NEXT: G_STORE [[COPY1]](s64), %5(p0) :: (store (s64) into %ir.2) + ; ALL-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[FRAME_INDEX1]], [[C1]](s64) + ; ALL-NEXT: G_STORE [[COPY1]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.2) ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.3, align 8), (load (s8) from %ir.4, align 8) ; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.5) - ; ALL-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64) - ; ALL-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s64) from %ir.5 + 8) + ; ALL-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64) + ; ALL-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (dereferenceable load (s64) from %ir.5 + 8) ; ALL-NEXT: $xmm0 = COPY [[LOAD]](s64) ; ALL-NEXT: $xmm1 = COPY [[LOAD1]](s64) ; ALL-NEXT: RET 0, implicit $xmm0, implicit $xmm1 @@ -170,14 +170,14 @@ define { i64, i32 } @test_return_i3(i64 %i.coerce0, i32 %i.coerce1) { ; ALL-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.3.tmp ; ALL-NEXT: G_STORE [[COPY]](s64), [[FRAME_INDEX2]](p0) :: (store (s64) into %ir.0, align 4) ; ALL-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; ALL-NEXT: %7:_(p0) = nuw nusw G_PTR_ADD [[FRAME_INDEX2]], [[C1]](s64) - ; ALL-NEXT: G_STORE [[COPY1]](s32), %7(p0) :: (store (s32) into %ir.1) + ; ALL-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[FRAME_INDEX2]], [[C1]](s64) + ; ALL-NEXT: G_STORE [[COPY1]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.1) ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX1]](p0), [[FRAME_INDEX2]](p0), [[C]](s64), 0 :: (store (s8) into %ir.2, align 4), (load (s8) from %ir.3, align 4) ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.4, align 4), (load (s8) from %ir.5, align 4) ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX3]](p0), [[FRAME_INDEX]](p0), [[C]](s64), 0 :: (store (s8) into %ir.6, align 8), (load (s8) from %ir.7, align 4) ; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX3]](p0) :: (dereferenceable load (s64) from %ir.tmp) - ; ALL-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s64) - ; ALL-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s32) from %ir.tmp + 8, align 8) + ; ALL-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s64) + ; ALL-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (dereferenceable load (s32) from %ir.tmp + 8, align 8) ; ALL-NEXT: $rax = COPY [[LOAD]](s64) ; ALL-NEXT: $edx = COPY [[LOAD1]](s32) ; ALL-NEXT: RET 0, implicit $rax, implicit $edx @@ -215,12 +215,12 @@ define { i64, i64 } @test_return_i4(i64 %i.coerce0, i64 %i.coerce1) { ; ALL-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.i ; ALL-NEXT: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.1, align 4) ; ALL-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; ALL-NEXT: %5:_(p0) = nuw nusw G_PTR_ADD [[FRAME_INDEX1]], [[C1]](s64) - ; ALL-NEXT: G_STORE [[COPY1]](s64), %5(p0) :: (store (s64) into %ir.2, align 4) + ; ALL-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[FRAME_INDEX1]], [[C1]](s64) + ; ALL-NEXT: G_STORE [[COPY1]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.2, align 4) ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.3, align 4), (load (s8) from %ir.4, align 4) ; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.5, align 4) - ; ALL-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64) - ; ALL-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s64) from %ir.5 + 8, align 4) + ; ALL-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64) + ; ALL-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (dereferenceable load (s64) from %ir.5 + 8, align 4) ; ALL-NEXT: $rax = COPY [[LOAD]](s64) ; ALL-NEXT: $rdx = COPY [[LOAD1]](s64) ; ALL-NEXT: RET 0, implicit $rax, implicit $rdx diff --git a/llvm/test/CodeGen/X86/abds-neg.ll b/llvm/test/CodeGen/X86/abds-neg.ll index f6d66ab..d9064c6 100644 --- a/llvm/test/CodeGen/X86/abds-neg.ll +++ b/llvm/test/CodeGen/X86/abds-neg.ll @@ -367,44 +367,49 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; X86-LABEL: abd_ext_i128: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: pushl %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: sbbl %edx, %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: sbbl %edi, %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: sbbl %esi, %eax -; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: cmovll %eax, %esi -; X86-NEXT: cmovll %ebx, %edi -; X86-NEXT: cmovll %ebp, %edx -; X86-NEXT: cmovll (%esp), %ecx # 4-byte Folded Reload -; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 40(%ebp), %ecx +; X86-NEXT: movl 44(%ebp), %eax +; X86-NEXT: movl 24(%ebp), %edx +; X86-NEXT: movl 28(%ebp), %esi +; X86-NEXT: subl %ecx, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %esi, %edx +; X86-NEXT: sbbl %eax, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 48(%ebp), %edx +; X86-NEXT: movl 32(%ebp), %ebx +; X86-NEXT: sbbl %edx, %ebx +; X86-NEXT: movl 52(%ebp), %esi +; X86-NEXT: movl 36(%ebp), %edi +; X86-NEXT: sbbl %esi, %edi +; X86-NEXT: subl 24(%ebp), %ecx +; X86-NEXT: sbbl 28(%ebp), %eax +; X86-NEXT: sbbl 32(%ebp), %edx +; X86-NEXT: sbbl 36(%ebp), %esi +; X86-NEXT: cmovll %edi, %esi +; X86-NEXT: cmovll %ebx, %edx +; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: xorl %edi, %edi ; X86-NEXT: negl %ecx -; X86-NEXT: movl $0, %ebp -; X86-NEXT: sbbl %edx, %ebp -; X86-NEXT: movl $0, %edx -; X86-NEXT: sbbl %edi, %edx -; X86-NEXT: sbbl %esi, %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %ecx, (%eax) -; X86-NEXT: movl %ebp, 4(%eax) -; X86-NEXT: movl %edx, 8(%eax) -; X86-NEXT: movl %ebx, 12(%eax) -; X86-NEXT: addl $4, %esp +; X86-NEXT: movl $0, %ebx +; X86-NEXT: sbbl %eax, %ebx +; X86-NEXT: movl $0, %eax +; X86-NEXT: sbbl %edx, %eax +; X86-NEXT: sbbl %esi, %edi +; X86-NEXT: movl 8(%ebp), %edx +; X86-NEXT: movl %ecx, (%edx) +; X86-NEXT: movl %ebx, 4(%edx) +; X86-NEXT: movl %eax, 8(%edx) +; X86-NEXT: movl %edi, 12(%edx) +; X86-NEXT: movl %edx, %eax +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -438,44 +443,49 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; X86-LABEL: abd_ext_i128_undef: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: pushl %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: sbbl %edx, %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: sbbl %edi, %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: sbbl %esi, %eax -; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: cmovll %eax, %esi -; X86-NEXT: cmovll %ebx, %edi -; X86-NEXT: cmovll %ebp, %edx -; X86-NEXT: cmovll (%esp), %ecx # 4-byte Folded Reload -; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 40(%ebp), %ecx +; X86-NEXT: movl 44(%ebp), %eax +; X86-NEXT: movl 24(%ebp), %edx +; X86-NEXT: movl 28(%ebp), %esi +; X86-NEXT: subl %ecx, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %esi, %edx +; X86-NEXT: sbbl %eax, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 48(%ebp), %edx +; X86-NEXT: movl 32(%ebp), %ebx +; X86-NEXT: sbbl %edx, %ebx +; X86-NEXT: movl 52(%ebp), %esi +; X86-NEXT: movl 36(%ebp), %edi +; X86-NEXT: sbbl %esi, %edi +; X86-NEXT: subl 24(%ebp), %ecx +; X86-NEXT: sbbl 28(%ebp), %eax +; X86-NEXT: sbbl 32(%ebp), %edx +; X86-NEXT: sbbl 36(%ebp), %esi +; X86-NEXT: cmovll %edi, %esi +; X86-NEXT: cmovll %ebx, %edx +; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: xorl %edi, %edi ; X86-NEXT: negl %ecx -; X86-NEXT: movl $0, %ebp -; X86-NEXT: sbbl %edx, %ebp -; X86-NEXT: movl $0, %edx -; X86-NEXT: sbbl %edi, %edx -; X86-NEXT: sbbl %esi, %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %ecx, (%eax) -; X86-NEXT: movl %ebp, 4(%eax) -; X86-NEXT: movl %edx, 8(%eax) -; X86-NEXT: movl %ebx, 12(%eax) -; X86-NEXT: addl $4, %esp +; X86-NEXT: movl $0, %ebx +; X86-NEXT: sbbl %eax, %ebx +; X86-NEXT: movl $0, %eax +; X86-NEXT: sbbl %edx, %eax +; X86-NEXT: sbbl %esi, %edi +; X86-NEXT: movl 8(%ebp), %edx +; X86-NEXT: movl %ecx, (%edx) +; X86-NEXT: movl %ebx, 4(%edx) +; X86-NEXT: movl %eax, 8(%edx) +; X86-NEXT: movl %edi, 12(%edx) +; X86-NEXT: movl %edx, %eax +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -639,55 +649,59 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; X86-LABEL: abd_minmax_i128: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: pushl %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: cmpl %eax, %esi -; X86-NEXT: sbbl %ebx, %ecx -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: sbbl %ebp, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: sbbl %edi, %ecx -; X86-NEXT: movl %edi, %ecx -; X86-NEXT: cmovll %edx, %ecx -; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X86-NEXT: cmovll {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl %ebx, %ecx -; X86-NEXT: cmovll {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %eax, %edx -; X86-NEXT: cmovll %esi, %edx -; X86-NEXT: cmpl %esi, %eax -; X86-NEXT: movl %ebx, %esi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %edi, %esi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: cmovll {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: cmovll {{[0-9]+}}(%esp), %esi -; X86-NEXT: cmovll {{[0-9]+}}(%esp), %ebx -; X86-NEXT: cmovll {{[0-9]+}}(%esp), %eax -; X86-NEXT: subl %eax, %edx -; X86-NEXT: sbbl %ebx, %ecx -; X86-NEXT: sbbl %esi, %ebp -; X86-NEXT: movl (%esp), %esi # 4-byte Reload -; X86-NEXT: sbbl %edi, %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %edx, (%eax) -; X86-NEXT: movl %ecx, 4(%eax) -; X86-NEXT: movl %ebp, 8(%eax) -; X86-NEXT: movl %esi, 12(%eax) -; X86-NEXT: addl $4, %esp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 40(%ebp), %esi +; X86-NEXT: movl 24(%ebp), %edi +; X86-NEXT: movl 28(%ebp), %eax +; X86-NEXT: cmpl %esi, %edi +; X86-NEXT: sbbl 44(%ebp), %eax +; X86-NEXT: movl 48(%ebp), %edx +; X86-NEXT: movl 32(%ebp), %eax +; X86-NEXT: sbbl %edx, %eax +; X86-NEXT: movl 52(%ebp), %ebx +; X86-NEXT: movl 36(%ebp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: sbbl %ebx, %eax +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: cmovll %ecx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edx, %eax +; X86-NEXT: cmovll 32(%ebp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 44(%ebp), %eax +; X86-NEXT: cmovll 28(%ebp), %eax +; X86-NEXT: movl %esi, %ecx +; X86-NEXT: cmovll %edi, %ecx +; X86-NEXT: cmpl %edi, %esi +; X86-NEXT: movl 44(%ebp), %edi +; X86-NEXT: sbbl 28(%ebp), %edi +; X86-NEXT: movl %edx, %edi +; X86-NEXT: sbbl 32(%ebp), %edi +; X86-NEXT: movl %ebx, %edi +; X86-NEXT: sbbl 36(%ebp), %edi +; X86-NEXT: cmovll 36(%ebp), %ebx +; X86-NEXT: cmovll 32(%ebp), %edx +; X86-NEXT: movl 44(%ebp), %edi +; X86-NEXT: cmovll 28(%ebp), %edi +; X86-NEXT: cmovll 24(%ebp), %esi +; X86-NEXT: subl %esi, %ecx +; X86-NEXT: sbbl %edi, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: sbbl %edx, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: sbbl %ebx, %esi +; X86-NEXT: movl 8(%ebp), %edx +; X86-NEXT: movl %ecx, (%edx) +; X86-NEXT: movl %eax, 4(%edx) +; X86-NEXT: movl %edi, 8(%edx) +; X86-NEXT: movl %esi, 12(%edx) +; X86-NEXT: movl %edx, %eax +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -848,37 +862,41 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; X86-LABEL: abd_cmp_i128: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: pushl %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: subl %edx, %eax -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: sbbl %esi, %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: sbbl %ecx, %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: sbbl %edi, %eax -; X86-NEXT: subl {{[0-9]+}}(%esp), %edx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi -; X86-NEXT: cmovgel (%esp), %edx # 4-byte Folded Reload -; X86-NEXT: cmovgel %ebx, %esi -; X86-NEXT: cmovgel %ebp, %ecx -; X86-NEXT: cmovgel %eax, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %edi, 12(%eax) -; X86-NEXT: movl %ecx, 8(%eax) -; X86-NEXT: movl %esi, 4(%eax) -; X86-NEXT: movl %edx, (%eax) -; X86-NEXT: addl $4, %esp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 24(%ebp), %ecx +; X86-NEXT: movl 28(%ebp), %edx +; X86-NEXT: movl 40(%ebp), %eax +; X86-NEXT: movl 44(%ebp), %esi +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %esi, %eax +; X86-NEXT: sbbl %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 32(%ebp), %esi +; X86-NEXT: movl 48(%ebp), %edi +; X86-NEXT: sbbl %esi, %edi +; X86-NEXT: movl 36(%ebp), %ebx +; X86-NEXT: movl 52(%ebp), %eax +; X86-NEXT: sbbl %ebx, %eax +; X86-NEXT: subl 40(%ebp), %ecx +; X86-NEXT: sbbl 44(%ebp), %edx +; X86-NEXT: sbbl 48(%ebp), %esi +; X86-NEXT: sbbl 52(%ebp), %ebx +; X86-NEXT: cmovgel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: cmovgel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: cmovgel %edi, %esi +; X86-NEXT: cmovgel %eax, %ebx +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl %ebx, 12(%eax) +; X86-NEXT: movl %esi, 8(%eax) +; X86-NEXT: movl %edx, 4(%eax) +; X86-NEXT: movl %ecx, (%eax) +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -1058,15 +1076,15 @@ define i64 @abd_subnsw_i64(i64 %a, i64 %b) nounwind { ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx +; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, %edx ; X86-NEXT: sarl $31, %edx -; X86-NEXT: xorl %edx, %ecx ; X86-NEXT: xorl %edx, %esi +; X86-NEXT: xorl %edx, %ecx ; X86-NEXT: movl %edx, %eax -; X86-NEXT: subl %esi, %eax -; X86-NEXT: sbbl %ecx, %edx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: sbbl %esi, %edx ; X86-NEXT: popl %esi ; X86-NEXT: retl ; @@ -1089,15 +1107,15 @@ define i64 @abd_subnsw_i64_undef(i64 %a, i64 %b) nounwind { ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx +; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, %edx ; X86-NEXT: sarl $31, %edx -; X86-NEXT: xorl %edx, %ecx ; X86-NEXT: xorl %edx, %esi +; X86-NEXT: xorl %edx, %ecx ; X86-NEXT: movl %edx, %eax -; X86-NEXT: subl %esi, %eax -; X86-NEXT: sbbl %ecx, %edx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: sbbl %esi, %edx ; X86-NEXT: popl %esi ; X86-NEXT: retl ; @@ -1118,35 +1136,39 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { ; X86-LABEL: abd_subnsw_i128: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: subl {{[0-9]+}}(%esp), %edi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %ebx -; X86-NEXT: sarl $31, %ebx -; X86-NEXT: xorl %ebx, %ecx -; X86-NEXT: xorl %ebx, %edx -; X86-NEXT: xorl %ebx, %esi -; X86-NEXT: xorl %ebx, %edi -; X86-NEXT: movl %ebx, %ebp -; X86-NEXT: subl %edi, %ebp -; X86-NEXT: movl %ebx, %edi -; X86-NEXT: sbbl %esi, %edi -; X86-NEXT: movl %ebx, %esi -; X86-NEXT: sbbl %edx, %esi -; X86-NEXT: sbbl %ecx, %ebx -; X86-NEXT: movl %ebp, (%eax) +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 32(%ebp), %ecx +; X86-NEXT: movl 36(%ebp), %eax +; X86-NEXT: movl 24(%ebp), %edi +; X86-NEXT: movl 28(%ebp), %edx +; X86-NEXT: subl 40(%ebp), %edi +; X86-NEXT: sbbl 44(%ebp), %edx +; X86-NEXT: sbbl 48(%ebp), %ecx +; X86-NEXT: sbbl 52(%ebp), %eax +; X86-NEXT: movl %eax, %esi +; X86-NEXT: sarl $31, %esi +; X86-NEXT: xorl %esi, %eax +; X86-NEXT: xorl %esi, %ecx +; X86-NEXT: xorl %esi, %edx +; X86-NEXT: xorl %esi, %edi +; X86-NEXT: movl %esi, %ebx +; X86-NEXT: subl %edi, %ebx +; X86-NEXT: movl %esi, %edi +; X86-NEXT: sbbl %edx, %edi +; X86-NEXT: movl %esi, %edx +; X86-NEXT: sbbl %ecx, %edx +; X86-NEXT: sbbl %eax, %esi +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl %ebx, (%eax) ; X86-NEXT: movl %edi, 4(%eax) -; X86-NEXT: movl %esi, 8(%eax) -; X86-NEXT: movl %ebx, 12(%eax) +; X86-NEXT: movl %edx, 8(%eax) +; X86-NEXT: movl %esi, 12(%eax) +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -1175,35 +1197,39 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind { ; X86-LABEL: abd_subnsw_i128_undef: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: subl {{[0-9]+}}(%esp), %edi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %ebx -; X86-NEXT: sarl $31, %ebx -; X86-NEXT: xorl %ebx, %ecx -; X86-NEXT: xorl %ebx, %edx -; X86-NEXT: xorl %ebx, %esi -; X86-NEXT: xorl %ebx, %edi -; X86-NEXT: movl %ebx, %ebp -; X86-NEXT: subl %edi, %ebp -; X86-NEXT: movl %ebx, %edi -; X86-NEXT: sbbl %esi, %edi -; X86-NEXT: movl %ebx, %esi -; X86-NEXT: sbbl %edx, %esi -; X86-NEXT: sbbl %ecx, %ebx -; X86-NEXT: movl %ebp, (%eax) +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 32(%ebp), %ecx +; X86-NEXT: movl 36(%ebp), %eax +; X86-NEXT: movl 24(%ebp), %edi +; X86-NEXT: movl 28(%ebp), %edx +; X86-NEXT: subl 40(%ebp), %edi +; X86-NEXT: sbbl 44(%ebp), %edx +; X86-NEXT: sbbl 48(%ebp), %ecx +; X86-NEXT: sbbl 52(%ebp), %eax +; X86-NEXT: movl %eax, %esi +; X86-NEXT: sarl $31, %esi +; X86-NEXT: xorl %esi, %eax +; X86-NEXT: xorl %esi, %ecx +; X86-NEXT: xorl %esi, %edx +; X86-NEXT: xorl %esi, %edi +; X86-NEXT: movl %esi, %ebx +; X86-NEXT: subl %edi, %ebx +; X86-NEXT: movl %esi, %edi +; X86-NEXT: sbbl %edx, %edi +; X86-NEXT: movl %esi, %edx +; X86-NEXT: sbbl %ecx, %edx +; X86-NEXT: sbbl %eax, %esi +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl %ebx, (%eax) ; X86-NEXT: movl %edi, 4(%eax) -; X86-NEXT: movl %esi, 8(%eax) -; X86-NEXT: movl %ebx, 12(%eax) +; X86-NEXT: movl %edx, 8(%eax) +; X86-NEXT: movl %esi, 12(%eax) +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx diff --git a/llvm/test/CodeGen/X86/abds.ll b/llvm/test/CodeGen/X86/abds.ll index 0356c27..a1a4ba8 100644 --- a/llvm/test/CodeGen/X86/abds.ll +++ b/llvm/test/CodeGen/X86/abds.ll @@ -343,37 +343,41 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; X86-LABEL: abd_ext_i128: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: pushl %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: subl %edx, %eax -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: sbbl %esi, %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: sbbl %ecx, %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: sbbl %edi, %eax -; X86-NEXT: subl {{[0-9]+}}(%esp), %edx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi -; X86-NEXT: cmovll (%esp), %edx # 4-byte Folded Reload -; X86-NEXT: cmovll %ebx, %esi -; X86-NEXT: cmovll %ebp, %ecx -; X86-NEXT: cmovll %eax, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %edi, 12(%eax) -; X86-NEXT: movl %ecx, 8(%eax) -; X86-NEXT: movl %esi, 4(%eax) -; X86-NEXT: movl %edx, (%eax) -; X86-NEXT: addl $4, %esp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 40(%ebp), %ecx +; X86-NEXT: movl 44(%ebp), %edx +; X86-NEXT: movl 24(%ebp), %eax +; X86-NEXT: movl 28(%ebp), %esi +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %esi, %eax +; X86-NEXT: sbbl %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 48(%ebp), %esi +; X86-NEXT: movl 32(%ebp), %edi +; X86-NEXT: sbbl %esi, %edi +; X86-NEXT: movl 52(%ebp), %ebx +; X86-NEXT: movl 36(%ebp), %eax +; X86-NEXT: sbbl %ebx, %eax +; X86-NEXT: subl 24(%ebp), %ecx +; X86-NEXT: sbbl 28(%ebp), %edx +; X86-NEXT: sbbl 32(%ebp), %esi +; X86-NEXT: sbbl 36(%ebp), %ebx +; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: cmovll %edi, %esi +; X86-NEXT: cmovll %eax, %ebx +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl %ebx, 12(%eax) +; X86-NEXT: movl %esi, 8(%eax) +; X86-NEXT: movl %edx, 4(%eax) +; X86-NEXT: movl %ecx, (%eax) +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -404,37 +408,41 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; X86-LABEL: abd_ext_i128_undef: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: pushl %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: subl %edx, %eax -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: sbbl %esi, %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: sbbl %ecx, %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: sbbl %edi, %eax -; X86-NEXT: subl {{[0-9]+}}(%esp), %edx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi -; X86-NEXT: cmovll (%esp), %edx # 4-byte Folded Reload -; X86-NEXT: cmovll %ebx, %esi -; X86-NEXT: cmovll %ebp, %ecx -; X86-NEXT: cmovll %eax, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %edi, 12(%eax) -; X86-NEXT: movl %ecx, 8(%eax) -; X86-NEXT: movl %esi, 4(%eax) -; X86-NEXT: movl %edx, (%eax) -; X86-NEXT: addl $4, %esp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 40(%ebp), %ecx +; X86-NEXT: movl 44(%ebp), %edx +; X86-NEXT: movl 24(%ebp), %eax +; X86-NEXT: movl 28(%ebp), %esi +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %esi, %eax +; X86-NEXT: sbbl %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 48(%ebp), %esi +; X86-NEXT: movl 32(%ebp), %edi +; X86-NEXT: sbbl %esi, %edi +; X86-NEXT: movl 52(%ebp), %ebx +; X86-NEXT: movl 36(%ebp), %eax +; X86-NEXT: sbbl %ebx, %eax +; X86-NEXT: subl 24(%ebp), %ecx +; X86-NEXT: sbbl 28(%ebp), %edx +; X86-NEXT: sbbl 32(%ebp), %esi +; X86-NEXT: sbbl 36(%ebp), %ebx +; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: cmovll %edi, %esi +; X86-NEXT: cmovll %eax, %ebx +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl %ebx, 12(%eax) +; X86-NEXT: movl %esi, 8(%eax) +; X86-NEXT: movl %edx, 4(%eax) +; X86-NEXT: movl %ecx, (%eax) +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -585,37 +593,41 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; X86-LABEL: abd_minmax_i128: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: pushl %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: subl %edx, %eax -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: sbbl %esi, %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: sbbl %ecx, %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: sbbl %edi, %eax -; X86-NEXT: subl {{[0-9]+}}(%esp), %edx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi -; X86-NEXT: cmovll (%esp), %edx # 4-byte Folded Reload -; X86-NEXT: cmovll %ebx, %esi -; X86-NEXT: cmovll %ebp, %ecx -; X86-NEXT: cmovll %eax, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %edi, 12(%eax) -; X86-NEXT: movl %ecx, 8(%eax) -; X86-NEXT: movl %esi, 4(%eax) -; X86-NEXT: movl %edx, (%eax) -; X86-NEXT: addl $4, %esp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 40(%ebp), %ecx +; X86-NEXT: movl 44(%ebp), %edx +; X86-NEXT: movl 24(%ebp), %eax +; X86-NEXT: movl 28(%ebp), %esi +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %esi, %eax +; X86-NEXT: sbbl %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 48(%ebp), %esi +; X86-NEXT: movl 32(%ebp), %edi +; X86-NEXT: sbbl %esi, %edi +; X86-NEXT: movl 52(%ebp), %ebx +; X86-NEXT: movl 36(%ebp), %eax +; X86-NEXT: sbbl %ebx, %eax +; X86-NEXT: subl 24(%ebp), %ecx +; X86-NEXT: sbbl 28(%ebp), %edx +; X86-NEXT: sbbl 32(%ebp), %esi +; X86-NEXT: sbbl 36(%ebp), %ebx +; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: cmovll %edi, %esi +; X86-NEXT: cmovll %eax, %ebx +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl %ebx, 12(%eax) +; X86-NEXT: movl %esi, 8(%eax) +; X86-NEXT: movl %edx, 4(%eax) +; X86-NEXT: movl %ecx, (%eax) +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -768,37 +780,41 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; X86-LABEL: abd_cmp_i128: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: pushl %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: subl %edx, %eax -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: sbbl %esi, %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: sbbl %ecx, %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: sbbl %edi, %eax -; X86-NEXT: subl {{[0-9]+}}(%esp), %edx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi -; X86-NEXT: cmovll (%esp), %edx # 4-byte Folded Reload -; X86-NEXT: cmovll %ebx, %esi -; X86-NEXT: cmovll %ebp, %ecx -; X86-NEXT: cmovll %eax, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %edi, 12(%eax) -; X86-NEXT: movl %ecx, 8(%eax) -; X86-NEXT: movl %esi, 4(%eax) -; X86-NEXT: movl %edx, (%eax) -; X86-NEXT: addl $4, %esp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 40(%ebp), %ecx +; X86-NEXT: movl 44(%ebp), %edx +; X86-NEXT: movl 24(%ebp), %eax +; X86-NEXT: movl 28(%ebp), %esi +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %esi, %eax +; X86-NEXT: sbbl %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 48(%ebp), %esi +; X86-NEXT: movl 32(%ebp), %edi +; X86-NEXT: sbbl %esi, %edi +; X86-NEXT: movl 52(%ebp), %ebx +; X86-NEXT: movl 36(%ebp), %eax +; X86-NEXT: sbbl %ebx, %eax +; X86-NEXT: subl 24(%ebp), %ecx +; X86-NEXT: sbbl 28(%ebp), %edx +; X86-NEXT: sbbl 32(%ebp), %esi +; X86-NEXT: sbbl 36(%ebp), %ebx +; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: cmovll %edi, %esi +; X86-NEXT: cmovll %eax, %ebx +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl %ebx, 12(%eax) +; X86-NEXT: movl %esi, 8(%eax) +; X86-NEXT: movl %edx, 4(%eax) +; X86-NEXT: movl %ecx, (%eax) +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -1027,35 +1043,38 @@ define i64 @abd_subnsw_i64_undef(i64 %a, i64 %b) nounwind { define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { ; X86-LABEL: abd_subnsw_i128: ; X86: # %bb.0: -; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl {{[0-9]+}}(%esp), %edi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %ebx -; X86-NEXT: sarl $31, %ebx -; X86-NEXT: xorl %ebx, %ecx -; X86-NEXT: xorl %ebx, %edx -; X86-NEXT: xorl %ebx, %esi -; X86-NEXT: xorl %ebx, %edi -; X86-NEXT: subl %ebx, %edi -; X86-NEXT: sbbl %ebx, %esi -; X86-NEXT: sbbl %ebx, %edx -; X86-NEXT: sbbl %ebx, %ecx +; X86-NEXT: andl $-16, %esp +; X86-NEXT: movl 32(%ebp), %edx +; X86-NEXT: movl 36(%ebp), %ecx +; X86-NEXT: movl 24(%ebp), %edi +; X86-NEXT: movl 28(%ebp), %esi +; X86-NEXT: subl 40(%ebp), %edi +; X86-NEXT: sbbl 44(%ebp), %esi +; X86-NEXT: sbbl 48(%ebp), %edx +; X86-NEXT: sbbl 52(%ebp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: sarl $31, %eax +; X86-NEXT: xorl %eax, %ecx +; X86-NEXT: xorl %eax, %edx +; X86-NEXT: xorl %eax, %esi +; X86-NEXT: xorl %eax, %edi +; X86-NEXT: subl %eax, %edi +; X86-NEXT: sbbl %eax, %esi +; X86-NEXT: sbbl %eax, %edx +; X86-NEXT: sbbl %eax, %ecx +; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl %edi, (%eax) ; X86-NEXT: movl %esi, 4(%eax) ; X86-NEXT: movl %edx, 8(%eax) ; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: leal -8(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 ; ; X64-LABEL: abd_subnsw_i128: @@ -1079,35 +1098,38 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind { ; X86-LABEL: abd_subnsw_i128_undef: ; X86: # %bb.0: -; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl {{[0-9]+}}(%esp), %edi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %ebx -; X86-NEXT: sarl $31, %ebx -; X86-NEXT: xorl %ebx, %ecx -; X86-NEXT: xorl %ebx, %edx -; X86-NEXT: xorl %ebx, %esi -; X86-NEXT: xorl %ebx, %edi -; X86-NEXT: subl %ebx, %edi -; X86-NEXT: sbbl %ebx, %esi -; X86-NEXT: sbbl %ebx, %edx -; X86-NEXT: sbbl %ebx, %ecx +; X86-NEXT: andl $-16, %esp +; X86-NEXT: movl 32(%ebp), %edx +; X86-NEXT: movl 36(%ebp), %ecx +; X86-NEXT: movl 24(%ebp), %edi +; X86-NEXT: movl 28(%ebp), %esi +; X86-NEXT: subl 40(%ebp), %edi +; X86-NEXT: sbbl 44(%ebp), %esi +; X86-NEXT: sbbl 48(%ebp), %edx +; X86-NEXT: sbbl 52(%ebp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: sarl $31, %eax +; X86-NEXT: xorl %eax, %ecx +; X86-NEXT: xorl %eax, %edx +; X86-NEXT: xorl %eax, %esi +; X86-NEXT: xorl %eax, %edi +; X86-NEXT: subl %eax, %edi +; X86-NEXT: sbbl %eax, %esi +; X86-NEXT: sbbl %eax, %edx +; X86-NEXT: sbbl %eax, %ecx +; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl %edi, (%eax) ; X86-NEXT: movl %esi, 4(%eax) ; X86-NEXT: movl %edx, 8(%eax) ; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: leal -8(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 ; ; X64-LABEL: abd_subnsw_i128_undef: @@ -1282,37 +1304,41 @@ define i128 @abd_select_i128(i128 %a, i128 %b) nounwind { ; X86-LABEL: abd_select_i128: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: pushl %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: subl %edx, %eax -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: sbbl %esi, %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: sbbl %ecx, %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: sbbl %edi, %eax -; X86-NEXT: subl {{[0-9]+}}(%esp), %edx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi -; X86-NEXT: cmovll (%esp), %edx # 4-byte Folded Reload -; X86-NEXT: cmovll %ebx, %esi -; X86-NEXT: cmovll %ebp, %ecx -; X86-NEXT: cmovll %eax, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %edi, 12(%eax) -; X86-NEXT: movl %ecx, 8(%eax) -; X86-NEXT: movl %esi, 4(%eax) -; X86-NEXT: movl %edx, (%eax) -; X86-NEXT: addl $4, %esp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 40(%ebp), %ecx +; X86-NEXT: movl 44(%ebp), %edx +; X86-NEXT: movl 24(%ebp), %eax +; X86-NEXT: movl 28(%ebp), %esi +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %esi, %eax +; X86-NEXT: sbbl %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 48(%ebp), %esi +; X86-NEXT: movl 32(%ebp), %edi +; X86-NEXT: sbbl %esi, %edi +; X86-NEXT: movl 52(%ebp), %ebx +; X86-NEXT: movl 36(%ebp), %eax +; X86-NEXT: sbbl %ebx, %eax +; X86-NEXT: subl 24(%ebp), %ecx +; X86-NEXT: sbbl 28(%ebp), %edx +; X86-NEXT: sbbl 32(%ebp), %esi +; X86-NEXT: sbbl 36(%ebp), %ebx +; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: cmovll %edi, %esi +; X86-NEXT: cmovll %eax, %ebx +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl %ebx, 12(%eax) +; X86-NEXT: movl %esi, 8(%eax) +; X86-NEXT: movl %edx, 4(%eax) +; X86-NEXT: movl %ecx, (%eax) +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx diff --git a/llvm/test/CodeGen/X86/abdu-neg.ll b/llvm/test/CodeGen/X86/abdu-neg.ll index 6bda99c..b7c3407 100644 --- a/llvm/test/CodeGen/X86/abdu-neg.ll +++ b/llvm/test/CodeGen/X86/abdu-neg.ll @@ -355,39 +355,43 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; X86-LABEL: abd_ext_i128: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: xorl %edi, %edi -; X86-NEXT: subl {{[0-9]+}}(%esp), %edx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl $0, %ebp -; X86-NEXT: sbbl %ebp, %ebp -; X86-NEXT: xorl %ebp, %ecx -; X86-NEXT: xorl %ebp, %esi -; X86-NEXT: xorl %ebp, %ebx -; X86-NEXT: xorl %ebp, %edx -; X86-NEXT: subl %ebp, %edx -; X86-NEXT: sbbl %ebp, %ebx -; X86-NEXT: sbbl %ebp, %esi -; X86-NEXT: sbbl %ebp, %ecx -; X86-NEXT: negl %edx -; X86-NEXT: movl $0, %ebp -; X86-NEXT: sbbl %ebx, %ebp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 32(%ebp), %esi +; X86-NEXT: movl 36(%ebp), %eax +; X86-NEXT: movl 24(%ebp), %ecx +; X86-NEXT: movl 28(%ebp), %edi +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: subl 40(%ebp), %ecx +; X86-NEXT: sbbl 44(%ebp), %edi +; X86-NEXT: sbbl 48(%ebp), %esi +; X86-NEXT: sbbl 52(%ebp), %eax ; X86-NEXT: movl $0, %ebx -; X86-NEXT: sbbl %esi, %ebx -; X86-NEXT: sbbl %ecx, %edi -; X86-NEXT: movl %edx, (%eax) -; X86-NEXT: movl %ebp, 4(%eax) -; X86-NEXT: movl %ebx, 8(%eax) -; X86-NEXT: movl %edi, 12(%eax) +; X86-NEXT: sbbl %ebx, %ebx +; X86-NEXT: xorl %ebx, %eax +; X86-NEXT: xorl %ebx, %esi +; X86-NEXT: xorl %ebx, %edi +; X86-NEXT: xorl %ebx, %ecx +; X86-NEXT: subl %ebx, %ecx +; X86-NEXT: sbbl %ebx, %edi +; X86-NEXT: sbbl %ebx, %esi +; X86-NEXT: sbbl %ebx, %eax +; X86-NEXT: negl %ecx +; X86-NEXT: movl $0, %ebx +; X86-NEXT: sbbl %edi, %ebx +; X86-NEXT: movl $0, %edi +; X86-NEXT: sbbl %esi, %edi +; X86-NEXT: sbbl %eax, %edx +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl %ecx, (%eax) +; X86-NEXT: movl %ebx, 4(%eax) +; X86-NEXT: movl %edi, 8(%eax) +; X86-NEXT: movl %edx, 12(%eax) +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -423,39 +427,43 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; X86-LABEL: abd_ext_i128_undef: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: xorl %edi, %edi -; X86-NEXT: subl {{[0-9]+}}(%esp), %edx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl $0, %ebp -; X86-NEXT: sbbl %ebp, %ebp -; X86-NEXT: xorl %ebp, %ecx -; X86-NEXT: xorl %ebp, %esi -; X86-NEXT: xorl %ebp, %ebx -; X86-NEXT: xorl %ebp, %edx -; X86-NEXT: subl %ebp, %edx -; X86-NEXT: sbbl %ebp, %ebx -; X86-NEXT: sbbl %ebp, %esi -; X86-NEXT: sbbl %ebp, %ecx -; X86-NEXT: negl %edx -; X86-NEXT: movl $0, %ebp -; X86-NEXT: sbbl %ebx, %ebp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 32(%ebp), %esi +; X86-NEXT: movl 36(%ebp), %eax +; X86-NEXT: movl 24(%ebp), %ecx +; X86-NEXT: movl 28(%ebp), %edi +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: subl 40(%ebp), %ecx +; X86-NEXT: sbbl 44(%ebp), %edi +; X86-NEXT: sbbl 48(%ebp), %esi +; X86-NEXT: sbbl 52(%ebp), %eax +; X86-NEXT: movl $0, %ebx +; X86-NEXT: sbbl %ebx, %ebx +; X86-NEXT: xorl %ebx, %eax +; X86-NEXT: xorl %ebx, %esi +; X86-NEXT: xorl %ebx, %edi +; X86-NEXT: xorl %ebx, %ecx +; X86-NEXT: subl %ebx, %ecx +; X86-NEXT: sbbl %ebx, %edi +; X86-NEXT: sbbl %ebx, %esi +; X86-NEXT: sbbl %ebx, %eax +; X86-NEXT: negl %ecx ; X86-NEXT: movl $0, %ebx -; X86-NEXT: sbbl %esi, %ebx -; X86-NEXT: sbbl %ecx, %edi -; X86-NEXT: movl %edx, (%eax) -; X86-NEXT: movl %ebp, 4(%eax) -; X86-NEXT: movl %ebx, 8(%eax) -; X86-NEXT: movl %edi, 12(%eax) +; X86-NEXT: sbbl %edi, %ebx +; X86-NEXT: movl $0, %edi +; X86-NEXT: sbbl %esi, %edi +; X86-NEXT: sbbl %eax, %edx +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl %ecx, (%eax) +; X86-NEXT: movl %ebx, 4(%eax) +; X86-NEXT: movl %edi, 8(%eax) +; X86-NEXT: movl %edx, 12(%eax) +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -621,55 +629,59 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; X86-LABEL: abd_minmax_i128: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: pushl %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: cmpl %eax, %esi -; X86-NEXT: sbbl %ebx, %ecx -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: sbbl %ebp, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: sbbl %edi, %ecx -; X86-NEXT: movl %edi, %ecx -; X86-NEXT: cmovbl %edx, %ecx -; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl %ebx, %ecx -; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %eax, %edx -; X86-NEXT: cmovbl %esi, %edx -; X86-NEXT: cmpl %esi, %eax -; X86-NEXT: movl %ebx, %esi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %edi, %esi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: subl %eax, %edx -; X86-NEXT: sbbl %ebx, %ecx -; X86-NEXT: sbbl %esi, %ebp -; X86-NEXT: movl (%esp), %esi # 4-byte Reload -; X86-NEXT: sbbl %edi, %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %edx, (%eax) -; X86-NEXT: movl %ecx, 4(%eax) -; X86-NEXT: movl %ebp, 8(%eax) -; X86-NEXT: movl %esi, 12(%eax) -; X86-NEXT: addl $4, %esp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 40(%ebp), %esi +; X86-NEXT: movl 24(%ebp), %edi +; X86-NEXT: movl 28(%ebp), %eax +; X86-NEXT: cmpl %esi, %edi +; X86-NEXT: sbbl 44(%ebp), %eax +; X86-NEXT: movl 48(%ebp), %edx +; X86-NEXT: movl 32(%ebp), %eax +; X86-NEXT: sbbl %edx, %eax +; X86-NEXT: movl 52(%ebp), %ebx +; X86-NEXT: movl 36(%ebp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: sbbl %ebx, %eax +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: cmovbl %ecx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edx, %eax +; X86-NEXT: cmovbl 32(%ebp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 44(%ebp), %eax +; X86-NEXT: cmovbl 28(%ebp), %eax +; X86-NEXT: movl %esi, %ecx +; X86-NEXT: cmovbl %edi, %ecx +; X86-NEXT: cmpl %edi, %esi +; X86-NEXT: movl 44(%ebp), %edi +; X86-NEXT: sbbl 28(%ebp), %edi +; X86-NEXT: movl %edx, %edi +; X86-NEXT: sbbl 32(%ebp), %edi +; X86-NEXT: movl %ebx, %edi +; X86-NEXT: sbbl 36(%ebp), %edi +; X86-NEXT: cmovbl 36(%ebp), %ebx +; X86-NEXT: cmovbl 32(%ebp), %edx +; X86-NEXT: movl 44(%ebp), %edi +; X86-NEXT: cmovbl 28(%ebp), %edi +; X86-NEXT: cmovbl 24(%ebp), %esi +; X86-NEXT: subl %esi, %ecx +; X86-NEXT: sbbl %edi, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: sbbl %edx, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: sbbl %ebx, %esi +; X86-NEXT: movl 8(%ebp), %edx +; X86-NEXT: movl %ecx, (%edx) +; X86-NEXT: movl %eax, 4(%edx) +; X86-NEXT: movl %edi, 8(%edx) +; X86-NEXT: movl %esi, 12(%edx) +; X86-NEXT: movl %edx, %eax +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -827,39 +839,43 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; X86-LABEL: abd_cmp_i128: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: xorl %edi, %edi -; X86-NEXT: subl {{[0-9]+}}(%esp), %edx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl $0, %ebp -; X86-NEXT: sbbl %ebp, %ebp -; X86-NEXT: xorl %ebp, %ecx -; X86-NEXT: xorl %ebp, %esi -; X86-NEXT: xorl %ebp, %ebx -; X86-NEXT: xorl %ebp, %edx -; X86-NEXT: subl %ebp, %edx -; X86-NEXT: sbbl %ebp, %ebx -; X86-NEXT: sbbl %ebp, %esi -; X86-NEXT: sbbl %ebp, %ecx -; X86-NEXT: negl %edx -; X86-NEXT: movl $0, %ebp -; X86-NEXT: sbbl %ebx, %ebp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 32(%ebp), %esi +; X86-NEXT: movl 36(%ebp), %eax +; X86-NEXT: movl 24(%ebp), %ecx +; X86-NEXT: movl 28(%ebp), %edi +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: subl 40(%ebp), %ecx +; X86-NEXT: sbbl 44(%ebp), %edi +; X86-NEXT: sbbl 48(%ebp), %esi +; X86-NEXT: sbbl 52(%ebp), %eax +; X86-NEXT: movl $0, %ebx +; X86-NEXT: sbbl %ebx, %ebx +; X86-NEXT: xorl %ebx, %eax +; X86-NEXT: xorl %ebx, %esi +; X86-NEXT: xorl %ebx, %edi +; X86-NEXT: xorl %ebx, %ecx +; X86-NEXT: subl %ebx, %ecx +; X86-NEXT: sbbl %ebx, %edi +; X86-NEXT: sbbl %ebx, %esi +; X86-NEXT: sbbl %ebx, %eax +; X86-NEXT: negl %ecx ; X86-NEXT: movl $0, %ebx -; X86-NEXT: sbbl %esi, %ebx -; X86-NEXT: sbbl %ecx, %edi -; X86-NEXT: movl %edx, (%eax) -; X86-NEXT: movl %ebp, 4(%eax) -; X86-NEXT: movl %ebx, 8(%eax) -; X86-NEXT: movl %edi, 12(%eax) +; X86-NEXT: sbbl %edi, %ebx +; X86-NEXT: movl $0, %edi +; X86-NEXT: sbbl %esi, %edi +; X86-NEXT: sbbl %eax, %edx +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl %ecx, (%eax) +; X86-NEXT: movl %ebx, 4(%eax) +; X86-NEXT: movl %edi, 8(%eax) +; X86-NEXT: movl %edx, 12(%eax) +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx diff --git a/llvm/test/CodeGen/X86/abdu.ll b/llvm/test/CodeGen/X86/abdu.ll index 27acec3..043c915 100644 --- a/llvm/test/CodeGen/X86/abdu.ll +++ b/llvm/test/CodeGen/X86/abdu.ll @@ -326,35 +326,38 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; X86-LABEL: abd_ext_i128: ; X86: # %bb.0: -; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: xorl %ebx, %ebx -; X86-NEXT: subl {{[0-9]+}}(%esp), %edi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: sbbl %ebx, %ebx -; X86-NEXT: xorl %ebx, %ecx -; X86-NEXT: xorl %ebx, %edx -; X86-NEXT: xorl %ebx, %esi -; X86-NEXT: xorl %ebx, %edi -; X86-NEXT: subl %ebx, %edi -; X86-NEXT: sbbl %ebx, %esi -; X86-NEXT: sbbl %ebx, %edx -; X86-NEXT: sbbl %ebx, %ecx +; X86-NEXT: andl $-16, %esp +; X86-NEXT: movl 32(%ebp), %edx +; X86-NEXT: movl 36(%ebp), %ecx +; X86-NEXT: movl 24(%ebp), %edi +; X86-NEXT: movl 28(%ebp), %esi +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: subl 40(%ebp), %edi +; X86-NEXT: sbbl 44(%ebp), %esi +; X86-NEXT: sbbl 48(%ebp), %edx +; X86-NEXT: sbbl 52(%ebp), %ecx +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: xorl %eax, %ecx +; X86-NEXT: xorl %eax, %edx +; X86-NEXT: xorl %eax, %esi +; X86-NEXT: xorl %eax, %edi +; X86-NEXT: subl %eax, %edi +; X86-NEXT: sbbl %eax, %esi +; X86-NEXT: sbbl %eax, %edx +; X86-NEXT: sbbl %eax, %ecx +; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl %edi, (%eax) ; X86-NEXT: movl %esi, 4(%eax) ; X86-NEXT: movl %edx, 8(%eax) ; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: leal -8(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 ; ; X64-LABEL: abd_ext_i128: @@ -381,35 +384,38 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; X86-LABEL: abd_ext_i128_undef: ; X86: # %bb.0: -; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: xorl %ebx, %ebx -; X86-NEXT: subl {{[0-9]+}}(%esp), %edi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: sbbl %ebx, %ebx -; X86-NEXT: xorl %ebx, %ecx -; X86-NEXT: xorl %ebx, %edx -; X86-NEXT: xorl %ebx, %esi -; X86-NEXT: xorl %ebx, %edi -; X86-NEXT: subl %ebx, %edi -; X86-NEXT: sbbl %ebx, %esi -; X86-NEXT: sbbl %ebx, %edx -; X86-NEXT: sbbl %ebx, %ecx +; X86-NEXT: andl $-16, %esp +; X86-NEXT: movl 32(%ebp), %edx +; X86-NEXT: movl 36(%ebp), %ecx +; X86-NEXT: movl 24(%ebp), %edi +; X86-NEXT: movl 28(%ebp), %esi +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: subl 40(%ebp), %edi +; X86-NEXT: sbbl 44(%ebp), %esi +; X86-NEXT: sbbl 48(%ebp), %edx +; X86-NEXT: sbbl 52(%ebp), %ecx +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: xorl %eax, %ecx +; X86-NEXT: xorl %eax, %edx +; X86-NEXT: xorl %eax, %esi +; X86-NEXT: xorl %eax, %edi +; X86-NEXT: subl %eax, %edi +; X86-NEXT: sbbl %eax, %esi +; X86-NEXT: sbbl %eax, %edx +; X86-NEXT: sbbl %eax, %ecx +; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl %edi, (%eax) ; X86-NEXT: movl %esi, 4(%eax) ; X86-NEXT: movl %edx, 8(%eax) ; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: leal -8(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 ; ; X64-LABEL: abd_ext_i128_undef: @@ -548,35 +554,38 @@ define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind { define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; X86-LABEL: abd_minmax_i128: ; X86: # %bb.0: -; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: xorl %ebx, %ebx -; X86-NEXT: subl {{[0-9]+}}(%esp), %edi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: sbbl %ebx, %ebx -; X86-NEXT: xorl %ebx, %ecx -; X86-NEXT: xorl %ebx, %edx -; X86-NEXT: xorl %ebx, %esi -; X86-NEXT: xorl %ebx, %edi -; X86-NEXT: subl %ebx, %edi -; X86-NEXT: sbbl %ebx, %esi -; X86-NEXT: sbbl %ebx, %edx -; X86-NEXT: sbbl %ebx, %ecx +; X86-NEXT: andl $-16, %esp +; X86-NEXT: movl 32(%ebp), %edx +; X86-NEXT: movl 36(%ebp), %ecx +; X86-NEXT: movl 24(%ebp), %edi +; X86-NEXT: movl 28(%ebp), %esi +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: subl 40(%ebp), %edi +; X86-NEXT: sbbl 44(%ebp), %esi +; X86-NEXT: sbbl 48(%ebp), %edx +; X86-NEXT: sbbl 52(%ebp), %ecx +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: xorl %eax, %ecx +; X86-NEXT: xorl %eax, %edx +; X86-NEXT: xorl %eax, %esi +; X86-NEXT: xorl %eax, %edi +; X86-NEXT: subl %eax, %edi +; X86-NEXT: sbbl %eax, %esi +; X86-NEXT: sbbl %eax, %edx +; X86-NEXT: sbbl %eax, %ecx +; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl %edi, (%eax) ; X86-NEXT: movl %esi, 4(%eax) ; X86-NEXT: movl %edx, 8(%eax) ; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: leal -8(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 ; ; X64-LABEL: abd_minmax_i128: @@ -717,35 +726,38 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; X86-LABEL: abd_cmp_i128: ; X86: # %bb.0: -; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: xorl %ebx, %ebx -; X86-NEXT: subl {{[0-9]+}}(%esp), %edi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: sbbl %ebx, %ebx -; X86-NEXT: xorl %ebx, %ecx -; X86-NEXT: xorl %ebx, %edx -; X86-NEXT: xorl %ebx, %esi -; X86-NEXT: xorl %ebx, %edi -; X86-NEXT: subl %ebx, %edi -; X86-NEXT: sbbl %ebx, %esi -; X86-NEXT: sbbl %ebx, %edx -; X86-NEXT: sbbl %ebx, %ecx +; X86-NEXT: andl $-16, %esp +; X86-NEXT: movl 32(%ebp), %edx +; X86-NEXT: movl 36(%ebp), %ecx +; X86-NEXT: movl 24(%ebp), %edi +; X86-NEXT: movl 28(%ebp), %esi +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: subl 40(%ebp), %edi +; X86-NEXT: sbbl 44(%ebp), %esi +; X86-NEXT: sbbl 48(%ebp), %edx +; X86-NEXT: sbbl 52(%ebp), %ecx +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: xorl %eax, %ecx +; X86-NEXT: xorl %eax, %edx +; X86-NEXT: xorl %eax, %esi +; X86-NEXT: xorl %eax, %edi +; X86-NEXT: subl %eax, %edi +; X86-NEXT: sbbl %eax, %esi +; X86-NEXT: sbbl %eax, %edx +; X86-NEXT: sbbl %eax, %ecx +; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl %edi, (%eax) ; X86-NEXT: movl %esi, 4(%eax) ; X86-NEXT: movl %edx, 8(%eax) ; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: leal -8(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 ; ; X64-LABEL: abd_cmp_i128: @@ -887,35 +899,38 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind { define i128 @abd_select_i128(i128 %a, i128 %b) nounwind { ; X86-LABEL: abd_select_i128: ; X86: # %bb.0: -; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: xorl %ebx, %ebx -; X86-NEXT: subl {{[0-9]+}}(%esp), %edi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: sbbl %ebx, %ebx -; X86-NEXT: xorl %ebx, %ecx -; X86-NEXT: xorl %ebx, %edx -; X86-NEXT: xorl %ebx, %esi -; X86-NEXT: xorl %ebx, %edi -; X86-NEXT: subl %ebx, %edi -; X86-NEXT: sbbl %ebx, %esi -; X86-NEXT: sbbl %ebx, %edx -; X86-NEXT: sbbl %ebx, %ecx +; X86-NEXT: andl $-16, %esp +; X86-NEXT: movl 32(%ebp), %edx +; X86-NEXT: movl 36(%ebp), %ecx +; X86-NEXT: movl 24(%ebp), %edi +; X86-NEXT: movl 28(%ebp), %esi +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: subl 40(%ebp), %edi +; X86-NEXT: sbbl 44(%ebp), %esi +; X86-NEXT: sbbl 48(%ebp), %edx +; X86-NEXT: sbbl 52(%ebp), %ecx +; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: xorl %eax, %ecx +; X86-NEXT: xorl %eax, %edx +; X86-NEXT: xorl %eax, %esi +; X86-NEXT: xorl %eax, %edi +; X86-NEXT: subl %eax, %edi +; X86-NEXT: sbbl %eax, %esi +; X86-NEXT: sbbl %eax, %edx +; X86-NEXT: sbbl %eax, %ecx +; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl %edi, (%eax) ; X86-NEXT: movl %esi, 4(%eax) ; X86-NEXT: movl %edx, 8(%eax) ; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: leal -8(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 ; ; X64-LABEL: abd_select_i128: diff --git a/llvm/test/CodeGen/X86/abs.ll b/llvm/test/CodeGen/X86/abs.ll index bae140a..e252d59 100644 --- a/llvm/test/CodeGen/X86/abs.ll +++ b/llvm/test/CodeGen/X86/abs.ll @@ -144,31 +144,34 @@ define i128 @test_i128(i128 %a) nounwind { ; ; X86-LABEL: test_i128: ; X86: # %bb.0: -; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: sarl $31, %edx -; X86-NEXT: xorl %edx, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: xorl %edx, %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: xorl %edx, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: xorl %edx, %ebx -; X86-NEXT: subl %edx, %ebx -; X86-NEXT: sbbl %edx, %edi -; X86-NEXT: sbbl %edx, %esi -; X86-NEXT: sbbl %edx, %ecx -; X86-NEXT: movl %ebx, (%eax) -; X86-NEXT: movl %edi, 4(%eax) -; X86-NEXT: movl %esi, 8(%eax) +; X86-NEXT: andl $-16, %esp +; X86-NEXT: movl 36(%ebp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: sarl $31, %eax +; X86-NEXT: xorl %eax, %ecx +; X86-NEXT: movl 32(%ebp), %edx +; X86-NEXT: xorl %eax, %edx +; X86-NEXT: movl 28(%ebp), %esi +; X86-NEXT: xorl %eax, %esi +; X86-NEXT: movl 24(%ebp), %edi +; X86-NEXT: xorl %eax, %edi +; X86-NEXT: subl %eax, %edi +; X86-NEXT: sbbl %eax, %esi +; X86-NEXT: sbbl %eax, %edx +; X86-NEXT: sbbl %eax, %ecx +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl %edi, (%eax) +; X86-NEXT: movl %esi, 4(%eax) +; X86-NEXT: movl %edx, 8(%eax) ; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: leal -8(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 %r = call i128 @llvm.abs.i128(i128 %a, i1 false) ret i128 %r @@ -688,13 +691,17 @@ define i128 @test_sextinreg_i128(i128 %a) nounwind { ; ; X86-LABEL: test_sextinreg_i128: ; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 28(%ebp), %ecx ; X86-NEXT: movl %ecx, %edx ; X86-NEXT: sarl $31, %edx ; X86-NEXT: xorl %edx, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl 24(%ebp), %esi ; X86-NEXT: xorl %edx, %esi ; X86-NEXT: subl %edx, %esi ; X86-NEXT: sbbl %edx, %ecx @@ -702,7 +709,9 @@ define i128 @test_sextinreg_i128(i128 %a) nounwind { ; X86-NEXT: movl %ecx, 4(%eax) ; X86-NEXT: movl $0, 12(%eax) ; X86-NEXT: movl $0, 8(%eax) +; X86-NEXT: leal -4(%ebp), %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 %shl = shl i128 %a, 64 %ashr = ashr exact i128 %shl, 64 diff --git a/llvm/test/CodeGen/X86/add-sub-bool.ll b/llvm/test/CodeGen/X86/add-sub-bool.ll index c2bfcf5..1df284f 100644 --- a/llvm/test/CodeGen/X86/add-sub-bool.ll +++ b/llvm/test/CodeGen/X86/add-sub-bool.ll @@ -104,18 +104,21 @@ define i24 @test_i24_add_add_idx(i24 %x, i24 %y, i24 %z) nounwind { define i128 @test_i128_add_add_idx(i128 %x, i128 %y, i128 %z) nounwind { ; X86-LABEL: test_i128_add_add_idx: ; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: addl {{[0-9]+}}(%esp), %esi -; X86-NEXT: adcl {{[0-9]+}}(%esp), %edi -; X86-NEXT: adcl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx -; X86-NEXT: btl $5, {{[0-9]+}}(%esp) +; X86-NEXT: andl $-16, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 48(%ebp), %ecx +; X86-NEXT: movl 52(%ebp), %edx +; X86-NEXT: movl 40(%ebp), %esi +; X86-NEXT: movl 44(%ebp), %edi +; X86-NEXT: addl 24(%ebp), %esi +; X86-NEXT: adcl 28(%ebp), %edi +; X86-NEXT: adcl 32(%ebp), %ecx +; X86-NEXT: adcl 36(%ebp), %edx +; X86-NEXT: btl $5, 64(%ebp) ; X86-NEXT: adcl $0, %esi ; X86-NEXT: adcl $0, %edi ; X86-NEXT: adcl $0, %ecx @@ -124,8 +127,10 @@ define i128 @test_i128_add_add_idx(i128 %x, i128 %y, i128 %z) nounwind { ; X86-NEXT: movl %esi, (%eax) ; X86-NEXT: movl %ecx, 8(%eax) ; X86-NEXT: movl %edx, 12(%eax) +; X86-NEXT: leal -8(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi +; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 ; ; X64-LABEL: test_i128_add_add_idx: diff --git a/llvm/test/CodeGen/X86/apx/cf.ll b/llvm/test/CodeGen/X86/apx/cf.ll index 1e4ac3f..b111ae5 100644 --- a/llvm/test/CodeGen/X86/apx/cf.ll +++ b/llvm/test/CodeGen/X86/apx/cf.ll @@ -162,7 +162,7 @@ entry: define void @load_zext(i1 %cond, ptr %b, ptr %p) { ; CHECK-LABEL: load_zext: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andb $1, %dil +; CHECK-NEXT: testb $1, %dil ; CHECK-NEXT: cfcmovnew (%rsi), %ax ; CHECK-NEXT: movzwl %ax, %eax ; CHECK-NEXT: cfcmovnel %eax, (%rdx) @@ -180,7 +180,7 @@ entry: define void @load_sext(i1 %cond, ptr %b, ptr %p) { ; CHECK-LABEL: load_sext: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andb $1, %dil +; CHECK-NEXT: testb $1, %dil ; CHECK-NEXT: cfcmovnel (%rsi), %eax ; CHECK-NEXT: cltq ; CHECK-NEXT: cfcmovneq %rax, (%rdx) diff --git a/llvm/test/CodeGen/X86/arg-copy-elide.ll b/llvm/test/CodeGen/X86/arg-copy-elide.ll index 0eb2c63..f13627b 100644 --- a/llvm/test/CodeGen/X86/arg-copy-elide.ll +++ b/llvm/test/CodeGen/X86/arg-copy-elide.ll @@ -188,11 +188,11 @@ define void @split_i128(ptr %sret, i128 %x) { ; CHECK-NEXT: pushl %esi ; CHECK-NEXT: andl $-16, %esp ; CHECK-NEXT: subl $48, %esp -; CHECK-NEXT: movl 12(%ebp), %eax +; CHECK-NEXT: movl 24(%ebp), %eax ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movl 16(%ebp), %ebx -; CHECK-NEXT: movl 20(%ebp), %esi -; CHECK-NEXT: movl 24(%ebp), %edi +; CHECK-NEXT: movl 28(%ebp), %ebx +; CHECK-NEXT: movl 32(%ebp), %esi +; CHECK-NEXT: movl 36(%ebp), %edi ; CHECK-NEXT: movl %edi, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl %esi, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl %ebx, {{[0-9]+}}(%esp) diff --git a/llvm/test/CodeGen/X86/avg.ll b/llvm/test/CodeGen/X86/avg.ll index 217cceb..0de308a 100644 --- a/llvm/test/CodeGen/X86/avg.ll +++ b/llvm/test/CodeGen/X86/avg.ll @@ -1734,20 +1734,20 @@ define void @not_avg_v16i8_wide_constants(ptr %a, ptr %b) nounwind { ; SSE2-LABEL: not_avg_v16i8_wide_constants: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps (%rdi), %xmm1 -; SSE2-NEXT: movdqa (%rsi), %xmm2 +; SSE2-NEXT: movdqa (%rsi), %xmm0 ; SSE2-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE2-NEXT: decl %eax -; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: movd %eax, %xmm2 ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE2-NEXT: decl %eax ; SSE2-NEXT: movd %eax, %xmm1 ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE2-NEXT: decl %eax -; SSE2-NEXT: movd %eax, %xmm4 +; SSE2-NEXT: movd %eax, %xmm3 ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE2-NEXT: decl %eax -; SSE2-NEXT: movd %eax, %xmm3 +; SSE2-NEXT: movd %eax, %xmm4 ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE2-NEXT: decl %eax ; SSE2-NEXT: movd %eax, %xmm5 @@ -1762,6 +1762,9 @@ define void @not_avg_v16i8_wide_constants(ptr %a, ptr %b) nounwind { ; SSE2-NEXT: movd %eax, %xmm8 ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE2-NEXT: decl %eax +; SSE2-NEXT: movd %eax, %xmm10 +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; SSE2-NEXT: decl %eax ; SSE2-NEXT: movd %eax, %xmm9 ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE2-NEXT: decl %eax @@ -1771,9 +1774,6 @@ define void @not_avg_v16i8_wide_constants(ptr %a, ptr %b) nounwind { ; SSE2-NEXT: movd %eax, %xmm12 ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE2-NEXT: decl %eax -; SSE2-NEXT: movd %eax, %xmm10 -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: decl %eax ; SSE2-NEXT: movd %eax, %xmm13 ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE2-NEXT: decl %eax @@ -1783,43 +1783,45 @@ define void @not_avg_v16i8_wide_constants(ptr %a, ptr %b) nounwind { ; SSE2-NEXT: movd %eax, %xmm15 ; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE2-NEXT: decl %eax -; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3] -; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] +; SSE2-NEXT: movd %eax, %xmm2 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,0,0,0] +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] ; SSE2-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm6[0,0,0,0] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm6[0,0,0,0] ; SSE2-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm7[0],xmm8[1],xmm7[1],xmm8[2],xmm7[2],xmm8[3],xmm7[3] ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm8[0,0,0,0] -; SSE2-NEXT: punpckhdq {{.*#+}} xmm4 = xmm4[2],xmm1[2],xmm4[3],xmm1[3] -; SSE2-NEXT: movsd {{.*#+}} xmm4 = xmm3[0],xmm4[1] +; SSE2-NEXT: punpckhdq {{.*#+}} xmm4 = xmm4[2],xmm3[2],xmm4[3],xmm3[3] +; SSE2-NEXT: movsd {{.*#+}} xmm4 = xmm1[0],xmm4[1] ; SSE2-NEXT: pxor %xmm3, %xmm3 -; SSE2-NEXT: movdqa %xmm2, %xmm1 +; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] ; SSE2-NEXT: movapd %xmm4, %xmm5 ; SSE2-NEXT: andpd %xmm1, %xmm5 ; SSE2-NEXT: xorpd %xmm4, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 ; SSE2-NEXT: paddw %xmm5, %xmm1 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm11 = xmm11[0],xmm9[0],xmm11[1],xmm9[1],xmm11[2],xmm9[2],xmm11[3],xmm9[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm10 = xmm10[0],xmm12[0],xmm10[1],xmm12[1],xmm10[2],xmm12[2],xmm10[3],xmm12[3] -; SSE2-NEXT: punpckldq {{.*#+}} xmm10 = xmm10[0],xmm11[0],xmm10[1],xmm11[1] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm9 = xmm9[0],xmm10[0],xmm9[1],xmm10[1],xmm9[2],xmm10[2],xmm9[3],xmm10[3] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm12 = xmm12[0],xmm11[0],xmm12[1],xmm11[1],xmm12[2],xmm11[2],xmm12[3],xmm11[3] +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm12[0,0,0,0] +; SSE2-NEXT: punpckldq {{.*#+}} xmm9 = xmm9[0],xmm4[0],xmm9[1],xmm4[1] ; SSE2-NEXT: punpcklwd {{.*#+}} xmm14 = xmm14[0],xmm13[0],xmm14[1],xmm13[1],xmm14[2],xmm13[2],xmm14[3],xmm13[3] ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm14[0,0,0,0] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm15[0],xmm0[1],xmm15[1],xmm0[2],xmm15[2],xmm0[3],xmm15[3] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] -; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm4[2],xmm0[3],xmm4[3] -; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm10[0],xmm0[1] -; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm3[8],xmm2[9],xmm3[9],xmm2[10],xmm3[10],xmm2[11],xmm3[11],xmm2[12],xmm3[12],xmm2[13],xmm3[13],xmm2[14],xmm3[14],xmm2[15],xmm3[15] -; SSE2-NEXT: movapd %xmm0, %xmm3 -; SSE2-NEXT: andpd %xmm2, %xmm3 -; SSE2-NEXT: xorpd %xmm0, %xmm2 -; SSE2-NEXT: psrlw $1, %xmm2 -; SSE2-NEXT: paddw %xmm3, %xmm2 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] -; SSE2-NEXT: pand %xmm0, %xmm2 -; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: packuswb %xmm2, %xmm1 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm15[0],xmm2[1],xmm15[1],xmm2[2],xmm15[2],xmm2[3],xmm15[3] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] +; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm4[2],xmm2[3],xmm4[3] +; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm9[0],xmm2[1] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm3[8],xmm0[9],xmm3[9],xmm0[10],xmm3[10],xmm0[11],xmm3[11],xmm0[12],xmm3[12],xmm0[13],xmm3[13],xmm0[14],xmm3[14],xmm0[15],xmm3[15] +; SSE2-NEXT: movapd %xmm2, %xmm3 +; SSE2-NEXT: andpd %xmm0, %xmm3 +; SSE2-NEXT: xorpd %xmm2, %xmm0 +; SSE2-NEXT: psrlw $1, %xmm0 +; SSE2-NEXT: paddw %xmm3, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: pand %xmm2, %xmm1 +; SSE2-NEXT: packuswb %xmm0, %xmm1 ; SSE2-NEXT: movdqu %xmm1, (%rax) ; SSE2-NEXT: retq ; @@ -1829,74 +1831,75 @@ define void @not_avg_v16i8_wide_constants(ptr %a, ptr %b) nounwind { ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero -; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] -; AVX1-NEXT: vpextrd $2, %xmm5, %ecx -; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm4 = xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] -; AVX1-NEXT: vpextrd $2, %xmm4, %eax -; AVX1-NEXT: vpextrw $3, %xmm3, %edx +; AVX1-NEXT: vpextrw $7, %xmm3, %edx +; AVX1-NEXT: vpextrw $6, %xmm3, %ecx +; AVX1-NEXT: vpextrw $5, %xmm3, %eax ; AVX1-NEXT: decl %edx ; AVX1-NEXT: vmovd %edx, %xmm4 -; AVX1-NEXT: vpextrw $2, %xmm3, %edx -; AVX1-NEXT: decl %edx -; AVX1-NEXT: vmovd %edx, %xmm5 -; AVX1-NEXT: vpextrw $1, %xmm3, %edx -; AVX1-NEXT: decl %edx -; AVX1-NEXT: vmovd %edx, %xmm6 -; AVX1-NEXT: vpextrw $0, %xmm3, %edx +; AVX1-NEXT: vpextrw $4, %xmm3, %edx +; AVX1-NEXT: decl %ecx +; AVX1-NEXT: vmovd %ecx, %xmm5 +; AVX1-NEXT: vpextrw $1, %xmm3, %ecx +; AVX1-NEXT: decl %eax +; AVX1-NEXT: vmovd %eax, %xmm6 +; AVX1-NEXT: vpextrw $0, %xmm3, %eax ; AVX1-NEXT: decl %edx ; AVX1-NEXT: vmovd %edx, %xmm7 -; AVX1-NEXT: vpextrw $3, %xmm2, %edx -; AVX1-NEXT: decl %edx -; AVX1-NEXT: vmovd %edx, %xmm8 -; AVX1-NEXT: vpextrw $2, %xmm2, %edx +; AVX1-NEXT: vpextrw $3, %xmm3, %edx +; AVX1-NEXT: decq %rcx +; AVX1-NEXT: vmovq %rcx, %xmm8 +; AVX1-NEXT: vpextrw $2, %xmm3, %ecx +; AVX1-NEXT: decq %rax +; AVX1-NEXT: vmovq %rax, %xmm3 +; AVX1-NEXT: vpextrw $7, %xmm2, %eax ; AVX1-NEXT: decl %edx ; AVX1-NEXT: vmovd %edx, %xmm9 -; AVX1-NEXT: vpextrw $1, %xmm2, %edx -; AVX1-NEXT: decl %edx -; AVX1-NEXT: vmovd %edx, %xmm10 -; AVX1-NEXT: vpextrw $0, %xmm2, %edx -; AVX1-NEXT: decl %edx -; AVX1-NEXT: vmovd %edx, %xmm11 -; AVX1-NEXT: vpextrw $5, %xmm3, %edx +; AVX1-NEXT: vpextrw $6, %xmm2, %edx +; AVX1-NEXT: decl %ecx +; AVX1-NEXT: vmovd %ecx, %xmm10 +; AVX1-NEXT: vpextrw $5, %xmm2, %ecx +; AVX1-NEXT: decl %eax +; AVX1-NEXT: vmovd %eax, %xmm11 +; AVX1-NEXT: vpextrw $4, %xmm2, %eax ; AVX1-NEXT: decl %edx ; AVX1-NEXT: vmovd %edx, %xmm12 -; AVX1-NEXT: vpextrw $4, %xmm3, %edx -; AVX1-NEXT: decl %edx -; AVX1-NEXT: vmovd %edx, %xmm13 -; AVX1-NEXT: vpextrw $5, %xmm2, %edx -; AVX1-NEXT: decl %edx -; AVX1-NEXT: vmovd %edx, %xmm14 -; AVX1-NEXT: vpextrw $4, %xmm2, %edx -; AVX1-NEXT: decl %edx -; AVX1-NEXT: vmovd %edx, %xmm15 -; AVX1-NEXT: vpextrw $7, %xmm3, %edx +; AVX1-NEXT: vpextrw $1, %xmm2, %edx ; AVX1-NEXT: decl %ecx -; AVX1-NEXT: vmovd %ecx, %xmm3 -; AVX1-NEXT: vpextrw $7, %xmm2, %ecx -; AVX1-NEXT: decl %edx -; AVX1-NEXT: vmovd %edx, %xmm2 +; AVX1-NEXT: vmovd %ecx, %xmm13 +; AVX1-NEXT: vpextrw $0, %xmm2, %ecx +; AVX1-NEXT: decl %eax +; AVX1-NEXT: vmovd %eax, %xmm14 +; AVX1-NEXT: vpextrw $3, %xmm2, %eax +; AVX1-NEXT: decq %rdx +; AVX1-NEXT: vmovq %rdx, %xmm15 +; AVX1-NEXT: vpextrw $2, %xmm2, %edx +; AVX1-NEXT: decq %rcx +; AVX1-NEXT: vmovq %rcx, %xmm2 ; AVX1-NEXT: decl %eax ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm4 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3] ; AVX1-NEXT: vmovd %eax, %xmm5 -; AVX1-NEXT: decl %ecx +; AVX1-NEXT: decl %edx ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm6 = xmm7[0],xmm6[0],xmm7[1],xmm6[1],xmm7[2],xmm6[2],xmm7[3],xmm6[3] -; AVX1-NEXT: vmovd %ecx, %xmm7 -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm6[0],xmm4[0],xmm6[1],xmm4[1] -; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm6 = xmm9[0],xmm8[0],xmm9[1],xmm8[1],xmm9[2],xmm8[2],xmm9[3],xmm8[3] -; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm8 = xmm11[0],xmm10[0],xmm11[1],xmm10[1],xmm11[2],xmm10[2],xmm11[3],xmm10[3] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm6 = xmm8[0],xmm6[0],xmm8[1],xmm6[1] -; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm6, %ymm4 -; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm6 = xmm13[0],xmm12[0],xmm13[1],xmm12[1],xmm13[2],xmm12[2],xmm13[3],xmm12[3] -; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm8 = xmm15[0],xmm14[0],xmm15[1],xmm14[1],xmm15[2],xmm14[2],xmm15[3],xmm14[3] -; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm8, %ymm6 -; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] -; AVX1-NEXT: vmovddup {{.*#+}} ymm3 = ymm6[0,0,2,2] -; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1],xmm5[2],xmm7[2],xmm5[3],xmm7[3] -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm5, %ymm2 -; AVX1-NEXT: vshufps {{.*#+}} ymm2 = ymm2[0,0,0,0,4,4,4,4] -; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm3[0,1,2],ymm2[3],ymm3[4,5,6],ymm2[7] -; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm4[0,1],ymm2[2,3],ymm4[4,5],ymm2[6,7] +; AVX1-NEXT: vmovd %edx, %xmm7 +; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[0,0,0,0] +; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[0,1,0,1] +; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm6[0,1,2,3,4,5],xmm4[6,7] +; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3] +; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm6 = xmm10[0],xmm9[0],xmm10[1],xmm9[1],xmm10[2],xmm9[2],xmm10[3],xmm9[3] +; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[0,0,1,1] +; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm6[2,3],xmm3[4,5,6,7] +; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm4[4,5,6,7] +; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm4 = xmm12[0],xmm11[0],xmm12[1],xmm11[1],xmm12[2],xmm11[2],xmm12[3],xmm11[3] +; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm6 = xmm14[0],xmm13[0],xmm14[1],xmm13[1],xmm14[2],xmm13[2],xmm14[3],xmm13[3] +; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[0,0,0,0] +; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[0,1,0,1] +; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm6[0,1,2,3,4,5],xmm4[6,7] +; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm15[0],xmm2[1],xmm15[1],xmm2[2],xmm15[2],xmm2[3],xmm15[3] +; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm5 = xmm7[0],xmm5[0],xmm7[1],xmm5[1],xmm7[2],xmm5[2],xmm7[3],xmm5[3] +; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[0,0,1,1] +; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm5[2,3],xmm2[4,5,6,7] +; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm4[4,5,6,7] +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: vandps %ymm0, %ymm2, %ymm1 ; AVX1-NEXT: vxorps %ymm0, %ymm2, %ymm0 diff --git a/llvm/test/CodeGen/X86/avx512fp16-cvt.ll b/llvm/test/CodeGen/X86/avx512fp16-cvt.ll index f66f0c0..cc58bc1 100644 --- a/llvm/test/CodeGen/X86/avx512fp16-cvt.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-cvt.ll @@ -628,13 +628,19 @@ define half @s128_to_half(i128 %x) { ; ; X86-LABEL: s128_to_half: ; X86: # %bb.0: -; X86-NEXT: subl $16, %esp -; X86-NEXT: .cfi_def_cfa_offset 20 -; X86-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $32, %esp +; X86-NEXT: vmovups 8(%ebp), %xmm0 ; X86-NEXT: vmovups %xmm0, (%esp) ; X86-NEXT: calll __floattihf -; X86-NEXT: addl $16, %esp -; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 ; X86-NEXT: retl %a = sitofp i128 %x to half ret half %a @@ -713,13 +719,19 @@ define half @u128_to_half(i128 %x) { ; ; X86-LABEL: u128_to_half: ; X86: # %bb.0: -; X86-NEXT: subl $16, %esp -; X86-NEXT: .cfi_def_cfa_offset 20 -; X86-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $32, %esp +; X86-NEXT: vmovups 8(%ebp), %xmm0 ; X86-NEXT: vmovups %xmm0, (%esp) ; X86-NEXT: calll __floatuntihf -; X86-NEXT: addl $16, %esp -; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 ; X86-NEXT: retl %a = uitofp i128 %x to half ret half %a @@ -1020,11 +1032,15 @@ define half @f128_to_half(fp128 %x) nounwind { ; ; X86-LABEL: f128_to_half: ; X86: # %bb.0: -; X86-NEXT: subl $16, %esp -; X86-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $32, %esp +; X86-NEXT: vmovups 8(%ebp), %xmm0 ; X86-NEXT: vmovups %xmm0, (%esp) ; X86-NEXT: calll __trunctfhf2 -; X86-NEXT: addl $16, %esp +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp ; X86-NEXT: retl %a = fptrunc fp128 %x to half ret half %a diff --git a/llvm/test/CodeGen/X86/bitselect.ll b/llvm/test/CodeGen/X86/bitselect.ll index 4fc0827..3338131 100644 --- a/llvm/test/CodeGen/X86/bitselect.ll +++ b/llvm/test/CodeGen/X86/bitselect.ll @@ -146,37 +146,40 @@ define i64 @bitselect_i64(i64 %a, i64 %b, i64 %m) nounwind { define i128 @bitselect_i128(i128 %a, i128 %b, i128 %m) nounwind { ; X86-LABEL: bitselect_i128: ; X86: # %bb.0: -; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: xorl %edi, %ecx -; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: xorl %edi, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: xorl %ebx, %edi -; X86-NEXT: andl {{[0-9]+}}(%esp), %edi -; X86-NEXT: xorl %ebx, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: xorl %esi, %ebx -; X86-NEXT: andl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: xorl %esi, %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: xorl %edx, %esi -; X86-NEXT: andl {{[0-9]+}}(%esp), %esi -; X86-NEXT: xorl %edx, %esi -; X86-NEXT: movl %esi, 12(%eax) -; X86-NEXT: movl %ebx, 8(%eax) -; X86-NEXT: movl %edi, 4(%eax) +; X86-NEXT: andl $-16, %esp +; X86-NEXT: movl 32(%ebp), %edx +; X86-NEXT: movl 36(%ebp), %eax +; X86-NEXT: movl 24(%ebp), %esi +; X86-NEXT: movl 28(%ebp), %edi +; X86-NEXT: movl 40(%ebp), %ecx +; X86-NEXT: xorl %esi, %ecx +; X86-NEXT: andl 56(%ebp), %ecx +; X86-NEXT: xorl %esi, %ecx +; X86-NEXT: movl 44(%ebp), %esi +; X86-NEXT: xorl %edi, %esi +; X86-NEXT: andl 60(%ebp), %esi +; X86-NEXT: xorl %edi, %esi +; X86-NEXT: movl 48(%ebp), %edi +; X86-NEXT: xorl %edx, %edi +; X86-NEXT: andl 64(%ebp), %edi +; X86-NEXT: xorl %edx, %edi +; X86-NEXT: movl 52(%ebp), %edx +; X86-NEXT: xorl %eax, %edx +; X86-NEXT: andl 68(%ebp), %edx +; X86-NEXT: xorl %eax, %edx +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl %edx, 12(%eax) +; X86-NEXT: movl %edi, 8(%eax) +; X86-NEXT: movl %esi, 4(%eax) ; X86-NEXT: movl %ecx, (%eax) +; X86-NEXT: leal -8(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 ; ; X64-NOBMI-LABEL: bitselect_i128: diff --git a/llvm/test/CodeGen/X86/bsf.ll b/llvm/test/CodeGen/X86/bsf.ll index 312f94c..143e10e 100644 --- a/llvm/test/CodeGen/X86/bsf.ll +++ b/llvm/test/CodeGen/X86/bsf.ll @@ -263,70 +263,78 @@ define i128 @cmov_bsf128(i128 %x, i128 %y) nounwind { ; X86-LABEL: cmov_bsf128: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, %edx -; X86-NEXT: orl %ebp, %edx -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: orl %eax, %esi -; X86-NEXT: orl %edx, %esi +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 32(%ebp), %edi +; X86-NEXT: movl 24(%ebp), %ecx +; X86-NEXT: movl 36(%ebp), %ebx +; X86-NEXT: movl 28(%ebp), %esi +; X86-NEXT: movl %esi, %eax +; X86-NEXT: orl %ebx, %eax +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: orl %edi, %edx +; X86-NEXT: orl %eax, %edx ; X86-NEXT: je .LBB8_1 ; X86-NEXT: # %bb.2: # %cond.false ; X86-NEXT: testl %ecx, %ecx ; X86-NEXT: jne .LBB8_3 ; X86-NEXT: # %bb.4: # %cond.false -; X86-NEXT: rep bsfl %edi, %esi -; X86-NEXT: addl $32, %esi -; X86-NEXT: testl %eax, %eax -; X86-NEXT: je .LBB8_7 -; X86-NEXT: .LBB8_6: -; X86-NEXT: rep bsfl %eax, %edx -; X86-NEXT: jmp .LBB8_8 +; X86-NEXT: rep bsfl %esi, %eax +; X86-NEXT: addl $32, %eax +; X86-NEXT: jmp .LBB8_5 ; X86-NEXT: .LBB8_1: -; X86-NEXT: movl %ebp, %eax -; X86-NEXT: xorl %ebp, %ebp -; X86-NEXT: movl $128, %esi +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: xorl %edi, %edi +; X86-NEXT: movl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X86-NEXT: jmp .LBB8_11 ; X86-NEXT: .LBB8_3: -; X86-NEXT: rep bsfl %ecx, %esi -; X86-NEXT: testl %eax, %eax +; X86-NEXT: rep bsfl %ecx, %eax +; X86-NEXT: .LBB8_5: # %cond.false +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: testl %edi, %edi ; X86-NEXT: jne .LBB8_6 -; X86-NEXT: .LBB8_7: # %cond.false -; X86-NEXT: rep bsfl %ebp, %edx +; X86-NEXT: # %bb.7: # %cond.false +; X86-NEXT: rep bsfl %ebx, %edx ; X86-NEXT: addl $32, %edx +; X86-NEXT: jmp .LBB8_8 +; X86-NEXT: .LBB8_6: +; X86-NEXT: rep bsfl %edi, %edx ; X86-NEXT: .LBB8_8: # %cond.false -; X86-NEXT: movl %ebp, %eax -; X86-NEXT: movl %ecx, %ebx -; X86-NEXT: orl %edi, %ebx +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: movl %ecx, %edi +; X86-NEXT: orl %esi, %edi ; X86-NEXT: jne .LBB8_10 ; X86-NEXT: # %bb.9: # %cond.false ; X86-NEXT: addl $64, %edx -; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: .LBB8_10: # %cond.false -; X86-NEXT: xorl %ebp, %ebp +; X86-NEXT: xorl %edi, %edi ; X86-NEXT: .LBB8_11: # %cond.end -; X86-NEXT: xorl %ebx, %ebx ; X86-NEXT: xorl %edx, %edx -; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: orl %eax, %edi -; X86-NEXT: orl %ecx, %edi -; X86-NEXT: jne .LBB8_13 -; X86-NEXT: # %bb.12: -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: .LBB8_13: # %cond.end -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %edx, 12(%eax) -; X86-NEXT: movl %ebx, 8(%eax) -; X86-NEXT: movl %ebp, 4(%eax) -; X86-NEXT: movl %esi, (%eax) +; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: orl 32(%ebp), %ecx +; X86-NEXT: orl %eax, %esi +; X86-NEXT: orl %ecx, %esi +; X86-NEXT: je .LBB8_12 +; X86-NEXT: # %bb.13: # %cond.end +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: jmp .LBB8_14 +; X86-NEXT: .LBB8_12: +; X86-NEXT: movl 52(%ebp), %ebx +; X86-NEXT: movl 48(%ebp), %edx +; X86-NEXT: movl 44(%ebp), %edi +; X86-NEXT: movl 40(%ebp), %ecx +; X86-NEXT: .LBB8_14: # %cond.end +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl %ebx, 12(%eax) +; X86-NEXT: movl %edx, 8(%eax) +; X86-NEXT: movl %edi, 4(%eax) +; X86-NEXT: movl %ecx, (%eax) +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -361,46 +369,49 @@ define i128 @cmov_bsf128_undef(i128 %x, i128 %y) nounwind { ; X86-LABEL: cmov_bsf128_undef: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ecx, %edi -; X86-NEXT: orl %esi, %edi -; X86-NEXT: movl %edx, %ebp -; X86-NEXT: orl %ebx, %ebp -; X86-NEXT: orl %edi, %ebp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 36(%ebp), %esi +; X86-NEXT: movl 32(%ebp), %edi +; X86-NEXT: movl 28(%ebp), %ecx +; X86-NEXT: movl 24(%ebp), %edx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: orl %esi, %eax +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: orl %edi, %ebx +; X86-NEXT: orl %eax, %ebx +; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: je .LBB9_11 ; X86-NEXT: # %bb.1: # %select.true.sink ; X86-NEXT: testl %edx, %edx ; X86-NEXT: jne .LBB9_2 ; X86-NEXT: # %bb.3: # %select.true.sink -; X86-NEXT: rep bsfl %ecx, %edi -; X86-NEXT: addl $32, %edi -; X86-NEXT: testl %ebx, %ebx +; X86-NEXT: rep bsfl %ecx, %ebx +; X86-NEXT: addl $32, %ebx +; X86-NEXT: testl %edi, %edi ; X86-NEXT: je .LBB9_6 ; X86-NEXT: .LBB9_5: -; X86-NEXT: rep bsfl %ebx, %esi +; X86-NEXT: rep bsfl %edi, %esi ; X86-NEXT: orl %ecx, %edx ; X86-NEXT: je .LBB9_8 ; X86-NEXT: jmp .LBB9_9 ; X86-NEXT: .LBB9_11: # %select.end -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl 52(%ebp), %ecx +; X86-NEXT: movl 48(%ebp), %edx +; X86-NEXT: movl 44(%ebp), %esi +; X86-NEXT: movl 40(%ebp), %edi ; X86-NEXT: movl %edi, (%eax) ; X86-NEXT: movl %esi, 4(%eax) ; X86-NEXT: movl %edx, 8(%eax) ; X86-NEXT: movl %ecx, 12(%eax) ; X86-NEXT: jmp .LBB9_10 ; X86-NEXT: .LBB9_2: -; X86-NEXT: rep bsfl %edx, %edi -; X86-NEXT: testl %ebx, %ebx +; X86-NEXT: rep bsfl %edx, %ebx +; X86-NEXT: testl %edi, %edi ; X86-NEXT: jne .LBB9_5 ; X86-NEXT: .LBB9_6: # %select.true.sink ; X86-NEXT: rep bsfl %esi, %esi @@ -409,13 +420,14 @@ define i128 @cmov_bsf128_undef(i128 %x, i128 %y) nounwind { ; X86-NEXT: jne .LBB9_9 ; X86-NEXT: .LBB9_8: # %select.true.sink ; X86-NEXT: addl $64, %esi -; X86-NEXT: movl %esi, %edi +; X86-NEXT: movl %esi, %ebx ; X86-NEXT: .LBB9_9: # %select.true.sink -; X86-NEXT: movl %edi, (%eax) +; X86-NEXT: movl %ebx, (%eax) ; X86-NEXT: movl $0, 12(%eax) ; X86-NEXT: movl $0, 8(%eax) ; X86-NEXT: movl $0, 4(%eax) ; X86-NEXT: .LBB9_10: # %select.true.sink +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx diff --git a/llvm/test/CodeGen/X86/bsr.ll b/llvm/test/CodeGen/X86/bsr.ll index fbca4af..ab0478a 100644 --- a/llvm/test/CodeGen/X86/bsr.ll +++ b/llvm/test/CodeGen/X86/bsr.ll @@ -291,79 +291,80 @@ define i128 @cmov_bsr128(i128 %x, i128 %y) nounwind { ; X86-LABEL: cmov_bsr128: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: pushl %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, %edx -; X86-NEXT: orl %ebp, %edx -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: orl %ebx, %esi -; X86-NEXT: orl %edx, %esi +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 32(%ebp), %ebx +; X86-NEXT: movl 24(%ebp), %ecx +; X86-NEXT: movl 36(%ebp), %esi +; X86-NEXT: movl 28(%ebp), %edi +; X86-NEXT: movl %edi, %eax +; X86-NEXT: orl %esi, %eax +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: orl %ebx, %edx +; X86-NEXT: orl %eax, %edx ; X86-NEXT: je .LBB8_1 ; X86-NEXT: # %bb.2: # %cond.false -; X86-NEXT: testl %ebp, %ebp +; X86-NEXT: testl %esi, %esi ; X86-NEXT: jne .LBB8_3 ; X86-NEXT: # %bb.4: # %cond.false -; X86-NEXT: bsrl %ebx, %edx -; X86-NEXT: xorl $31, %edx -; X86-NEXT: orl $32, %edx +; X86-NEXT: bsrl %ebx, %esi +; X86-NEXT: xorl $31, %esi +; X86-NEXT: orl $32, %esi ; X86-NEXT: testl %edi, %edi ; X86-NEXT: je .LBB8_7 ; X86-NEXT: .LBB8_6: -; X86-NEXT: bsrl %edi, %esi -; X86-NEXT: xorl $31, %esi +; X86-NEXT: bsrl %edi, %eax +; X86-NEXT: xorl $31, %eax ; X86-NEXT: jmp .LBB8_8 ; X86-NEXT: .LBB8_1: -; X86-NEXT: movl %ebx, %eax -; X86-NEXT: movl $0, (%esp) # 4-byte Folded Spill -; X86-NEXT: movl $128, %edx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: movl $128, %esi ; X86-NEXT: jmp .LBB8_11 ; X86-NEXT: .LBB8_3: -; X86-NEXT: bsrl %ebp, %edx -; X86-NEXT: xorl $31, %edx +; X86-NEXT: bsrl %esi, %esi +; X86-NEXT: xorl $31, %esi ; X86-NEXT: testl %edi, %edi ; X86-NEXT: jne .LBB8_6 ; X86-NEXT: .LBB8_7: # %cond.false -; X86-NEXT: bsrl %ecx, %esi -; X86-NEXT: xorl $31, %esi -; X86-NEXT: orl $32, %esi +; X86-NEXT: bsrl %ecx, %eax +; X86-NEXT: xorl $31, %eax +; X86-NEXT: orl $32, %eax ; X86-NEXT: .LBB8_8: # %cond.false -; X86-NEXT: movl %ebx, %eax -; X86-NEXT: orl %ebp, %ebx +; X86-NEXT: movl %ebx, %edx +; X86-NEXT: orl 36(%ebp), %edx ; X86-NEXT: jne .LBB8_10 ; X86-NEXT: # %bb.9: # %cond.false -; X86-NEXT: orl $64, %esi -; X86-NEXT: movl %esi, %edx +; X86-NEXT: orl $64, %eax +; X86-NEXT: movl %eax, %esi ; X86-NEXT: .LBB8_10: # %cond.false -; X86-NEXT: movl $0, (%esp) # 4-byte Folded Spill +; X86-NEXT: xorl %eax, %eax ; X86-NEXT: .LBB8_11: # %cond.end -; X86-NEXT: xorl %esi, %esi ; X86-NEXT: xorl %ebx, %ebx -; X86-NEXT: orl %eax, %ecx -; X86-NEXT: orl %ebp, %edi +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: orl 32(%ebp), %ecx +; X86-NEXT: orl 36(%ebp), %edi ; X86-NEXT: orl %ecx, %edi ; X86-NEXT: je .LBB8_12 ; X86-NEXT: # %bb.13: # %cond.end -; X86-NEXT: xorl $127, %edx -; X86-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-NEXT: xorl $127, %esi +; X86-NEXT: movl %eax, %ecx ; X86-NEXT: jmp .LBB8_14 ; X86-NEXT: .LBB8_12: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl 52(%ebp), %edx +; X86-NEXT: movl 48(%ebp), %ebx +; X86-NEXT: movl 44(%ebp), %ecx +; X86-NEXT: movl 40(%ebp), %esi ; X86-NEXT: .LBB8_14: # %cond.end -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %ebx, 12(%eax) -; X86-NEXT: movl %esi, 8(%eax) +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl %edx, 12(%eax) +; X86-NEXT: movl %ebx, 8(%eax) ; X86-NEXT: movl %ecx, 4(%eax) -; X86-NEXT: movl %edx, (%eax) -; X86-NEXT: addl $4, %esp +; X86-NEXT: movl %esi, (%eax) +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -398,62 +399,67 @@ define i128 @cmov_bsr128_undef(i128 %x, i128 %y) nounwind { ; X86-LABEL: cmov_bsr128_undef: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: testl %edi, %edi +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 28(%ebp), %edx +; X86-NEXT: movl 32(%ebp), %edi +; X86-NEXT: movl 36(%ebp), %eax +; X86-NEXT: testl %eax, %eax ; X86-NEXT: jne .LBB9_1 ; X86-NEXT: # %bb.2: -; X86-NEXT: bsrl %esi, %ecx -; X86-NEXT: xorl $31, %ecx -; X86-NEXT: orl $32, %ecx +; X86-NEXT: bsrl %edi, %esi +; X86-NEXT: xorl $31, %esi +; X86-NEXT: orl $32, %esi ; X86-NEXT: jmp .LBB9_3 ; X86-NEXT: .LBB9_1: -; X86-NEXT: bsrl %edi, %ecx -; X86-NEXT: xorl $31, %ecx +; X86-NEXT: bsrl %eax, %esi +; X86-NEXT: xorl $31, %esi ; X86-NEXT: .LBB9_3: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl 24(%ebp), %ebx ; X86-NEXT: testl %edx, %edx ; X86-NEXT: jne .LBB9_4 ; X86-NEXT: # %bb.5: -; X86-NEXT: bsrl %ebx, %ebp -; X86-NEXT: xorl $31, %ebp -; X86-NEXT: orl $32, %ebp -; X86-NEXT: jmp .LBB9_6 +; X86-NEXT: bsrl %ebx, %ecx +; X86-NEXT: xorl $31, %ecx +; X86-NEXT: orl $32, %ecx +; X86-NEXT: orl %eax, %edi +; X86-NEXT: je .LBB9_7 +; X86-NEXT: jmp .LBB9_8 ; X86-NEXT: .LBB9_4: -; X86-NEXT: bsrl %edx, %ebp -; X86-NEXT: xorl $31, %ebp -; X86-NEXT: .LBB9_6: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: orl %edi, %esi +; X86-NEXT: bsrl %edx, %ecx +; X86-NEXT: xorl $31, %ecx +; X86-NEXT: orl %eax, %edi ; X86-NEXT: jne .LBB9_8 -; X86-NEXT: # %bb.7: -; X86-NEXT: orl $64, %ebp -; X86-NEXT: movl %ebp, %ecx +; X86-NEXT: .LBB9_7: +; X86-NEXT: orl $64, %ecx +; X86-NEXT: movl %ecx, %esi ; X86-NEXT: .LBB9_8: -; X86-NEXT: orl %edi, %edx -; X86-NEXT: orl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: orl %eax, %edx +; X86-NEXT: orl 32(%ebp), %ebx ; X86-NEXT: orl %edx, %ebx ; X86-NEXT: jne .LBB9_9 ; X86-NEXT: # %bb.10: -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl 48(%ebp), %edx +; X86-NEXT: movl 52(%ebp), %edi +; X86-NEXT: movl 40(%ebp), %esi +; X86-NEXT: movl 44(%ebp), %ecx ; X86-NEXT: jmp .LBB9_11 ; X86-NEXT: .LBB9_9: -; X86-NEXT: xorl $127, %ecx +; X86-NEXT: xorl $127, %esi +; X86-NEXT: xorl %ecx, %ecx ; X86-NEXT: xorl %edx, %edx -; X86-NEXT: xorl %esi, %esi ; X86-NEXT: xorl %edi, %edi ; X86-NEXT: .LBB9_11: +; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl %edi, 12(%eax) -; X86-NEXT: movl %esi, 8(%eax) -; X86-NEXT: movl %edx, 4(%eax) -; X86-NEXT: movl %ecx, (%eax) +; X86-NEXT: movl %edx, 8(%eax) +; X86-NEXT: movl %ecx, 4(%eax) +; X86-NEXT: movl %esi, (%eax) +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx diff --git a/llvm/test/CodeGen/X86/bswap-wide-int.ll b/llvm/test/CodeGen/X86/bswap-wide-int.ll index 6d5e995a..673b7f1 100644 --- a/llvm/test/CodeGen/X86/bswap-wide-int.ll +++ b/llvm/test/CodeGen/X86/bswap-wide-int.ll @@ -41,13 +41,16 @@ define i64 @bswap_i64(i64 %a0) nounwind { define i128 @bswap_i128(i128 %a0) nounwind { ; X86-LABEL: bswap_i128: ; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: andl $-16, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 24(%ebp), %ecx +; X86-NEXT: movl 28(%ebp), %edx +; X86-NEXT: movl 32(%ebp), %esi +; X86-NEXT: movl 36(%ebp), %edi ; X86-NEXT: bswapl %edi ; X86-NEXT: bswapl %esi ; X86-NEXT: bswapl %edx @@ -56,25 +59,32 @@ define i128 @bswap_i128(i128 %a0) nounwind { ; X86-NEXT: movl %edx, 8(%eax) ; X86-NEXT: movl %esi, 4(%eax) ; X86-NEXT: movl %edi, (%eax) +; X86-NEXT: leal -8(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi +; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 ; ; X86-MOVBE-LABEL: bswap_i128: ; X86-MOVBE: # %bb.0: +; X86-MOVBE-NEXT: pushl %ebp +; X86-MOVBE-NEXT: movl %esp, %ebp ; X86-MOVBE-NEXT: pushl %edi ; X86-MOVBE-NEXT: pushl %esi -; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-MOVBE-NEXT: andl $-16, %esp +; X86-MOVBE-NEXT: movl 8(%ebp), %eax +; X86-MOVBE-NEXT: movl 32(%ebp), %ecx +; X86-MOVBE-NEXT: movl 36(%ebp), %edx +; X86-MOVBE-NEXT: movl 24(%ebp), %esi +; X86-MOVBE-NEXT: movl 28(%ebp), %edi ; X86-MOVBE-NEXT: movbel %esi, 12(%eax) ; X86-MOVBE-NEXT: movbel %edi, 8(%eax) ; X86-MOVBE-NEXT: movbel %ecx, 4(%eax) ; X86-MOVBE-NEXT: movbel %edx, (%eax) +; X86-MOVBE-NEXT: leal -8(%ebp), %esp ; X86-MOVBE-NEXT: popl %esi ; X86-MOVBE-NEXT: popl %edi +; X86-MOVBE-NEXT: popl %ebp ; X86-MOVBE-NEXT: retl $4 ; ; X64-LABEL: bswap_i128: diff --git a/llvm/test/CodeGen/X86/catchret-empty-fallthrough.ll b/llvm/test/CodeGen/X86/catchret-empty-fallthrough.ll index ab9fa22..24d3030 100644 --- a/llvm/test/CodeGen/X86/catchret-empty-fallthrough.ll +++ b/llvm/test/CodeGen/X86/catchret-empty-fallthrough.ll @@ -48,6 +48,6 @@ return: ; preds = %catch, %entry ; CHECK-NEXT: .long (.Llsda_end0-.Llsda_begin0)/16 ; CHECK-NEXT: .Llsda_begin0: ; CHECK-NEXT: .long .Ltmp0@IMGREL -; CHECK-NEXT: .long .Ltmp1@IMGREL+1 +; CHECK-NEXT: .long .Ltmp1@IMGREL ; CHECK-NEXT: .long 1 ; CHECK-NEXT: .long .LBB0_[[catch]]@IMGREL diff --git a/llvm/test/CodeGen/X86/combine-add-ssat.ll b/llvm/test/CodeGen/X86/combine-add-ssat.ll index 3e21798..75adcdd 100644 --- a/llvm/test/CodeGen/X86/combine-add-ssat.ll +++ b/llvm/test/CodeGen/X86/combine-add-ssat.ll @@ -62,12 +62,12 @@ define <8 x i16> @combine_constfold_v8i16() { define <8 x i16> @combine_constfold_undef_v8i16() { ; SSE-LABEL: combine_constfold_undef_v8i16: ; SSE: # %bb.0: -; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,65535,65534,0,65280,32768,0] +; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,u,65534,0,65280,32768,0] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_constfold_undef_v8i16: ; AVX: # %bb.0: -; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65535,65535,65534,0,65280,32768,0] +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65535,u,65534,0,65280,32768,0] ; AVX-NEXT: retq %res = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> <i16 undef, i16 1, i16 undef, i16 65535, i16 -1, i16 -255, i16 -32760, i16 1>, <8 x i16> <i16 1, i16 undef, i16 undef, i16 65535, i16 1, i16 65535, i16 -10, i16 65535>) ret <8 x i16> %res diff --git a/llvm/test/CodeGen/X86/combine-add-usat.ll b/llvm/test/CodeGen/X86/combine-add-usat.ll index 13bc3b2..5b947dd 100644 --- a/llvm/test/CodeGen/X86/combine-add-usat.ll +++ b/llvm/test/CodeGen/X86/combine-add-usat.ll @@ -62,12 +62,13 @@ define <8 x i16> @combine_constfold_v8i16() { define <8 x i16> @combine_constfold_undef_v8i16() { ; SSE-LABEL: combine_constfold_undef_v8i16: ; SSE: # %bb.0: -; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,65535,65535,65535,65535,2,65535] +; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,u,65535,65535,65535,2,65535] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_constfold_undef_v8i16: ; AVX: # %bb.0: -; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65535,65535,65535,65535,65535,2,65535] +; AVX-NEXT: vmovddup {{.*#+}} xmm0 = [65535,65535,2,65535,65535,65535,2,65535] +; AVX-NEXT: # xmm0 = mem[0,0] ; AVX-NEXT: retq %res = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> <i16 undef, i16 1, i16 undef, i16 65535, i16 -1, i16 -255, i16 -65535, i16 1>, <8 x i16> <i16 1, i16 undef, i16 undef, i16 65535, i16 1, i16 65535, i16 1, i16 65535>) ret <8 x i16> %res diff --git a/llvm/test/CodeGen/X86/combine-sub-ssat.ll b/llvm/test/CodeGen/X86/combine-sub-ssat.ll index 979331f..0dab025 100644 --- a/llvm/test/CodeGen/X86/combine-sub-ssat.ll +++ b/llvm/test/CodeGen/X86/combine-sub-ssat.ll @@ -62,12 +62,12 @@ define <8 x i16> @combine_constfold_v8i16() { define <8 x i16> @combine_constfold_undef_v8i16() { ; SSE-LABEL: combine_constfold_undef_v8i16: ; SSE: # %bb.0: -; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,65534,65282,32786,2] +; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,u,0,65534,65282,32786,2] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_constfold_undef_v8i16: ; AVX: # %bb.0: -; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,0,0,65534,65282,32786,2] +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,u,0,65534,65282,32786,2] ; AVX-NEXT: retq %res = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> <i16 undef, i16 1, i16 undef, i16 65535, i16 -1, i16 -255, i16 -32760, i16 1>, <8 x i16> <i16 1, i16 undef, i16 undef, i16 65535, i16 1, i16 65535, i16 -10, i16 65535>) ret <8 x i16> %res diff --git a/llvm/test/CodeGen/X86/combine-sub-usat.ll b/llvm/test/CodeGen/X86/combine-sub-usat.ll index b70e3fc..36e374b 100644 --- a/llvm/test/CodeGen/X86/combine-sub-usat.ll +++ b/llvm/test/CodeGen/X86/combine-sub-usat.ll @@ -73,17 +73,17 @@ define <8 x i16> @combine_constfold_v8i16() { define <8 x i16> @combine_constfold_undef_v8i16() { ; SSE-LABEL: combine_constfold_undef_v8i16: ; SSE: # %bb.0: -; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,65534,0,0,0] +; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,u,0,65534,0,0,0] ; SSE-NEXT: retq ; ; AVX1-LABEL: combine_constfold_undef_v8i16: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,0,0,65534,0,0,0] +; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,u,0,65534,0,0,0] ; AVX1-NEXT: retq ; ; AVX2-LABEL: combine_constfold_undef_v8i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,0,0,65534,0,0,0] +; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,u,0,65534,0,0,0] ; AVX2-NEXT: retq ; ; AVX512-LABEL: combine_constfold_undef_v8i16: diff --git a/llvm/test/CodeGen/X86/conditional-tailcall-pgso.ll b/llvm/test/CodeGen/X86/conditional-tailcall-pgso.ll index c4c194e..7855ff2 100644 --- a/llvm/test/CodeGen/X86/conditional-tailcall-pgso.ll +++ b/llvm/test/CodeGen/X86/conditional-tailcall-pgso.ll @@ -121,7 +121,6 @@ define void @f_non_leaf(i32 %x, i32 %y) !prof !14 { ; WIN64-NEXT: # encoding: [0xeb,A] ; WIN64-NEXT: # fixup A - offset: 1, value: foo, kind: FK_PCRel_1 ; WIN64-NEXT: .LBB1_2: # %bb2 -; WIN64-NEXT: nop # encoding: [0x90] ; WIN64-NEXT: .seh_startepilogue ; WIN64-NEXT: popq %rbx # encoding: [0x5b] ; WIN64-NEXT: .seh_endepilogue diff --git a/llvm/test/CodeGen/X86/conditional-tailcall.ll b/llvm/test/CodeGen/X86/conditional-tailcall.ll index 9c1d830..2859a87 100644 --- a/llvm/test/CodeGen/X86/conditional-tailcall.ll +++ b/llvm/test/CodeGen/X86/conditional-tailcall.ll @@ -121,7 +121,6 @@ define void @f_non_leaf(i32 %x, i32 %y) optsize { ; WIN64-NEXT: # encoding: [0xeb,A] ; WIN64-NEXT: # fixup A - offset: 1, value: foo, kind: FK_PCRel_1 ; WIN64-NEXT: .LBB1_2: # %bb2 -; WIN64-NEXT: nop # encoding: [0x90] ; WIN64-NEXT: .seh_startepilogue ; WIN64-NEXT: popq %rbx # encoding: [0x5b] ; WIN64-NEXT: .seh_endepilogue diff --git a/llvm/test/CodeGen/X86/constant-pool-partition.ll b/llvm/test/CodeGen/X86/constant-pool-partition.ll index 515284f..e42b41b 100644 --- a/llvm/test/CodeGen/X86/constant-pool-partition.ll +++ b/llvm/test/CodeGen/X86/constant-pool-partition.ll @@ -24,11 +24,11 @@ target triple = "x86_64-grtev4-linux-gnu" ; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always ;; For function @cold_func -; CHECK: .section .rodata.cst8.hot,"aM",@progbits,8 +; CHECK: .section .rodata.cst8.hot.,"aM",@progbits,8 ; CHECK-NEXT: .p2align ; CHECK-NEXT: .LCPI0_0: ; CHECK-NEXT: .quad 0x3fe5c28f5c28f5c3 # double 0.68000000000000005 -; CHECK-NEXT: .section .rodata.cst8.unlikely,"aM",@progbits,8 +; CHECK-NEXT: .section .rodata.cst8.unlikely.,"aM",@progbits,8 ; CHECK-NEXT: .p2align ; CHECK-NEXT: .LCPI0_1: ; CHECK-NEXT: .quad 0x3eb0000000000000 # double 9.5367431640625E-7 @@ -50,11 +50,11 @@ target triple = "x86_64-grtev4-linux-gnu" ; CHECK-NEXT: .long 0x3e000000 # float 0.125 ;; For function @hot_func -; CHECK: .section .rodata.cst8.hot,"aM",@progbits,8 +; CHECK: .section .rodata.cst8.hot.,"aM",@progbits,8 ; CHECK-NEXT: .p2align ; CHECK-NEXT: .LCPI3_0: ; CHECK-NEXT: .quad 0x3fe5c28f5c28f5c3 # double 0.68000000000000005 -; CHECK-NEXT: .section .rodata.cst16.hot,"aM",@progbits,16 +; CHECK-NEXT: .section .rodata.cst16.hot.,"aM",@progbits,16 ; CHECK-NEXT: .p2align ; CHECK-NEXT: .LCPI3_1: ; CHECK-NEXT: .long 2147483648 # 0x80000000 diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll index d869f8e..455b72d 100644 --- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll +++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll @@ -152,17 +152,17 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-16, %esp ; X86-NEXT: subl $176, %esp -; X86-NEXT: movl 20(%ebp), %edx -; X86-NEXT: movl 24(%ebp), %ecx +; X86-NEXT: movl 32(%ebp), %edx +; X86-NEXT: movl 36(%ebp), %ecx ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: sarl $31, %eax ; X86-NEXT: xorl %eax, %ecx ; X86-NEXT: movl %ecx, %edi ; X86-NEXT: xorl %eax, %edx ; X86-NEXT: movl %edx, %esi -; X86-NEXT: movl 16(%ebp), %edx +; X86-NEXT: movl 28(%ebp), %edx ; X86-NEXT: xorl %eax, %edx -; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl 24(%ebp), %ecx ; X86-NEXT: xorl %eax, %ecx ; X86-NEXT: subl %eax, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -172,16 +172,15 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl %eax, %edi ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 40(%ebp), %ecx -; X86-NEXT: movl %ecx, %edx +; X86-NEXT: movl 52(%ebp), %esi +; X86-NEXT: movl %esi, %edx ; X86-NEXT: sarl $31, %edx -; X86-NEXT: movl %ecx, %esi ; X86-NEXT: xorl %edx, %esi -; X86-NEXT: movl 36(%ebp), %ecx +; X86-NEXT: movl 48(%ebp), %ecx ; X86-NEXT: xorl %edx, %ecx -; X86-NEXT: movl 32(%ebp), %ebx +; X86-NEXT: movl 44(%ebp), %ebx ; X86-NEXT: xorl %edx, %ebx -; X86-NEXT: movl 28(%ebp), %edi +; X86-NEXT: movl 40(%ebp), %edi ; X86-NEXT: xorl %edx, %edi ; X86-NEXT: subl %edx, %edi ; X86-NEXT: sbbl %edx, %ebx @@ -204,45 +203,45 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: sete %al ; X86-NEXT: orb %cl, %al ; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: bsrl %eax, %edx +; X86-NEXT: bsrl %esi, %edx ; X86-NEXT: xorl $31, %edx -; X86-NEXT: addl $32, %edx -; X86-NEXT: bsrl %esi, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: bsrl %eax, %ecx ; X86-NEXT: xorl $31, %ecx +; X86-NEXT: orl $32, %ecx ; X86-NEXT: testl %esi, %esi -; X86-NEXT: cmovel %edx, %ecx +; X86-NEXT: cmovnel %edx, %ecx ; X86-NEXT: bsrl %ebx, %edx ; X86-NEXT: xorl $31, %edx ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: bsrl %edi, %edi ; X86-NEXT: xorl $31, %edi -; X86-NEXT: addl $32, %edi +; X86-NEXT: orl $32, %edi ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: testl %ebx, %ebx ; X86-NEXT: cmovnel %edx, %edi -; X86-NEXT: addl $64, %edi +; X86-NEXT: orl $64, %edi ; X86-NEXT: movl %eax, %edx ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: orl %esi, %edx ; X86-NEXT: cmovnel %ecx, %edi -; X86-NEXT: bsrl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: xorl $31, %edx -; X86-NEXT: addl $32, %edx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: bsrl %eax, %ecx +; X86-NEXT: bsrl %eax, %edx +; X86-NEXT: xorl $31, %edx +; X86-NEXT: bsrl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X86-NEXT: xorl $31, %ecx +; X86-NEXT: orl $32, %ecx ; X86-NEXT: testl %eax, %eax -; X86-NEXT: cmovel %edx, %ecx +; X86-NEXT: cmovnel %edx, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X86-NEXT: bsrl %ebx, %esi ; X86-NEXT: xorl $31, %esi ; X86-NEXT: bsrl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X86-NEXT: xorl $31, %edx -; X86-NEXT: addl $32, %edx +; X86-NEXT: orl $32, %edx ; X86-NEXT: testl %ebx, %ebx ; X86-NEXT: cmovnel %esi, %edx -; X86-NEXT: addl $64, %edx +; X86-NEXT: orl $64, %edx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-NEXT: orl %eax, %esi ; X86-NEXT: cmovnel %ecx, %edx @@ -380,9 +379,9 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: adcl $-1, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: adcl $-1, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: adcl $-1, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: adcl $-1, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -488,13 +487,13 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: sbbl %ecx, %ebx ; X86-NEXT: sbbl %ecx, %esi ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 44(%ebp), %ecx +; X86-NEXT: movl 56(%ebp), %ecx ; X86-NEXT: movl %edx, (%ecx) ; X86-NEXT: movl %eax, 4(%ecx) ; X86-NEXT: movl %ebx, 8(%ecx) ; X86-NEXT: movl %esi, 12(%ecx) ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 28(%ebp), %ecx +; X86-NEXT: movl 40(%ebp), %ecx ; X86-NEXT: movl %ebx, %edi ; X86-NEXT: movl %edx, %esi ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -508,7 +507,7 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X86-NEXT: adcl $0, %ebx ; X86-NEXT: movl %esi, %eax -; X86-NEXT: movl 32(%ebp), %esi +; X86-NEXT: movl 44(%ebp), %esi ; X86-NEXT: mull %esi ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -523,17 +522,17 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: adcl %eax, %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: movl 28(%ebp), %eax +; X86-NEXT: movl 40(%ebp), %eax ; X86-NEXT: imull %eax, %ebx ; X86-NEXT: mull %edi ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: imull %esi, %edi ; X86-NEXT: addl %edx, %edi ; X86-NEXT: addl %ebx, %edi -; X86-NEXT: movl 36(%ebp), %eax +; X86-NEXT: movl 48(%ebp), %eax ; X86-NEXT: movl %eax, %esi ; X86-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT: movl 40(%ebp), %ebx +; X86-NEXT: movl 52(%ebp), %ebx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NEXT: imull %edx, %ebx ; X86-NEXT: mull %edx @@ -543,13 +542,13 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: adcl %edi, %ebx ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT: movl 12(%ebp), %edx +; X86-NEXT: movl 24(%ebp), %edx ; X86-NEXT: subl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: movl 16(%ebp), %ecx +; X86-NEXT: movl 28(%ebp), %ecx ; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: movl 20(%ebp), %edi +; X86-NEXT: movl 32(%ebp), %edi ; X86-NEXT: sbbl %eax, %edi -; X86-NEXT: movl 24(%ebp), %esi +; X86-NEXT: movl 36(%ebp), %esi ; X86-NEXT: sbbl %ebx, %esi ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl %edx, (%eax) diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll index db6136c..859e924 100644 --- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll +++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll @@ -152,60 +152,60 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-16, %esp ; X86-NEXT: subl $160, %esp -; X86-NEXT: movl 28(%ebp), %ebx -; X86-NEXT: movl 40(%ebp), %esi -; X86-NEXT: movl 32(%ebp), %edi +; X86-NEXT: movl 40(%ebp), %ebx +; X86-NEXT: movl 52(%ebp), %esi +; X86-NEXT: movl 44(%ebp), %edi ; X86-NEXT: movl %edi, %eax ; X86-NEXT: orl %esi, %eax ; X86-NEXT: movl %ebx, %ecx -; X86-NEXT: orl 36(%ebp), %ecx +; X86-NEXT: orl 48(%ebp), %ecx ; X86-NEXT: orl %eax, %ecx ; X86-NEXT: sete %cl -; X86-NEXT: movl 16(%ebp), %eax -; X86-NEXT: orl 24(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %edx -; X86-NEXT: orl 20(%ebp), %edx +; X86-NEXT: movl 28(%ebp), %eax +; X86-NEXT: orl 36(%ebp), %eax +; X86-NEXT: movl 24(%ebp), %edx +; X86-NEXT: orl 32(%ebp), %edx ; X86-NEXT: orl %eax, %edx ; X86-NEXT: sete %al ; X86-NEXT: orb %cl, %al ; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; X86-NEXT: bsrl %esi, %edx ; X86-NEXT: xorl $31, %edx -; X86-NEXT: bsrl 36(%ebp), %ecx +; X86-NEXT: bsrl 48(%ebp), %ecx ; X86-NEXT: xorl $31, %ecx -; X86-NEXT: addl $32, %ecx +; X86-NEXT: orl $32, %ecx ; X86-NEXT: testl %esi, %esi ; X86-NEXT: cmovnel %edx, %ecx ; X86-NEXT: bsrl %edi, %edx ; X86-NEXT: xorl $31, %edx ; X86-NEXT: bsrl %ebx, %eax ; X86-NEXT: xorl $31, %eax -; X86-NEXT: addl $32, %eax +; X86-NEXT: orl $32, %eax ; X86-NEXT: testl %edi, %edi ; X86-NEXT: cmovnel %edx, %eax -; X86-NEXT: addl $64, %eax -; X86-NEXT: movl 36(%ebp), %edx +; X86-NEXT: orl $64, %eax +; X86-NEXT: movl 48(%ebp), %edx ; X86-NEXT: orl %esi, %edx ; X86-NEXT: cmovnel %ecx, %eax -; X86-NEXT: movl 24(%ebp), %ebx +; X86-NEXT: movl 36(%ebp), %ebx ; X86-NEXT: bsrl %ebx, %edx ; X86-NEXT: xorl $31, %edx -; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl 32(%ebp), %ecx ; X86-NEXT: bsrl %ecx, %ecx ; X86-NEXT: xorl $31, %ecx -; X86-NEXT: addl $32, %ecx +; X86-NEXT: orl $32, %ecx ; X86-NEXT: testl %ebx, %ebx ; X86-NEXT: cmovnel %edx, %ecx -; X86-NEXT: movl 16(%ebp), %edi +; X86-NEXT: movl 28(%ebp), %edi ; X86-NEXT: bsrl %edi, %esi ; X86-NEXT: xorl $31, %esi -; X86-NEXT: bsrl 12(%ebp), %edx +; X86-NEXT: bsrl 24(%ebp), %edx ; X86-NEXT: xorl $31, %edx -; X86-NEXT: addl $32, %edx +; X86-NEXT: orl $32, %edx ; X86-NEXT: testl %edi, %edi ; X86-NEXT: cmovnel %esi, %edx -; X86-NEXT: addl $64, %edx -; X86-NEXT: movl 20(%ebp), %esi +; X86-NEXT: orl $64, %edx +; X86-NEXT: movl 32(%ebp), %esi ; X86-NEXT: orl %ebx, %esi ; X86-NEXT: cmovnel %ecx, %edx ; X86-NEXT: xorl %edi, %edi @@ -230,15 +230,15 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: sbbl %esi, %edx ; X86-NEXT: setb %dl ; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Folded Reload -; X86-NEXT: movl 24(%ebp), %eax +; X86-NEXT: movl 36(%ebp), %eax ; X86-NEXT: cmovnel %edi, %eax -; X86-NEXT: movl 20(%ebp), %esi +; X86-NEXT: movl 32(%ebp), %esi ; X86-NEXT: cmovnel %edi, %esi -; X86-NEXT: movl 16(%ebp), %edx +; X86-NEXT: movl 28(%ebp), %edx ; X86-NEXT: cmovnel %edi, %edx -; X86-NEXT: movl 12(%ebp), %ebx +; X86-NEXT: movl 24(%ebp), %ebx ; X86-NEXT: cmovnel %edi, %ebx -; X86-NEXT: movl 44(%ebp), %edi +; X86-NEXT: movl 56(%ebp), %edi ; X86-NEXT: jne .LBB4_8 ; X86-NEXT: # %bb.1: # %_udiv-special-cases ; X86-NEXT: movl %eax, %edi @@ -249,18 +249,18 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X86-NEXT: orl %eax, %ecx ; X86-NEXT: movl %edi, %eax -; X86-NEXT: movl 44(%ebp), %edi -; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl 56(%ebp), %edi +; X86-NEXT: movl 24(%ebp), %ecx ; X86-NEXT: je .LBB4_8 ; X86-NEXT: # %bb.2: # %udiv-bb1 ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: xorps %xmm0, %xmm0 ; X86-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 28(%ebp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 20(%ebp), %eax +; X86-NEXT: movl 32(%ebp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 24(%ebp), %eax +; X86-NEXT: movl 36(%ebp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: movl %eax, %ecx @@ -293,13 +293,13 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: jmp .LBB4_7 ; X86-NEXT: .LBB4_3: # %udiv-preheader ; X86-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) -; X86-NEXT: movl 12(%ebp), %edi +; X86-NEXT: movl 24(%ebp), %edi ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %edi +; X86-NEXT: movl 28(%ebp), %edi ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) -; X86-NEXT: movl 20(%ebp), %edi +; X86-NEXT: movl 32(%ebp), %edi ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) -; X86-NEXT: movl 24(%ebp), %edi +; X86-NEXT: movl 36(%ebp), %edi ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload @@ -326,16 +326,16 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: # kill: def $cl killed $cl killed $ecx ; X86-NEXT: shrdl %cl, %eax, %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 28(%ebp), %eax +; X86-NEXT: movl 40(%ebp), %eax ; X86-NEXT: addl $-1, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 32(%ebp), %eax +; X86-NEXT: movl 44(%ebp), %eax ; X86-NEXT: adcl $-1, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 36(%ebp), %eax +; X86-NEXT: movl 48(%ebp), %eax ; X86-NEXT: adcl $-1, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 40(%ebp), %eax +; X86-NEXT: movl 52(%ebp), %eax ; X86-NEXT: adcl $-1, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill @@ -378,12 +378,12 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: andl $1, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %ecx, %esi -; X86-NEXT: andl 40(%ebp), %esi +; X86-NEXT: andl 52(%ebp), %esi ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: andl 36(%ebp), %eax +; X86-NEXT: andl 48(%ebp), %eax ; X86-NEXT: movl %ecx, %edx -; X86-NEXT: andl 32(%ebp), %edx -; X86-NEXT: andl 28(%ebp), %ecx +; X86-NEXT: andl 44(%ebp), %edx +; X86-NEXT: andl 40(%ebp), %ecx ; X86-NEXT: subl %ecx, %edi ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl %edx, %ebx @@ -413,7 +413,7 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: movl 44(%ebp), %edi +; X86-NEXT: movl 56(%ebp), %edi ; X86-NEXT: .LBB4_7: # %udiv-loop-exit ; X86-NEXT: shldl $1, %esi, %eax ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload @@ -432,23 +432,23 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: movl %esi, 8(%edi) ; X86-NEXT: movl %eax, 12(%edi) ; X86-NEXT: movl %eax, %ecx -; X86-NEXT: movl 36(%ebp), %eax +; X86-NEXT: movl 48(%ebp), %eax ; X86-NEXT: movl %eax, %esi ; X86-NEXT: imull %edx, %esi ; X86-NEXT: mull %ebx ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: addl %esi, %edx -; X86-NEXT: movl 40(%ebp), %edi +; X86-NEXT: movl 52(%ebp), %edi ; X86-NEXT: imull %ebx, %edi ; X86-NEXT: addl %edx, %edi -; X86-NEXT: movl 28(%ebp), %eax +; X86-NEXT: movl 40(%ebp), %eax ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X86-NEXT: mull %ebx ; X86-NEXT: movl %eax, %esi -; X86-NEXT: imull 28(%ebp), %ecx +; X86-NEXT: imull 40(%ebp), %ecx ; X86-NEXT: addl %edx, %ecx -; X86-NEXT: movl 32(%ebp), %eax +; X86-NEXT: movl 44(%ebp), %eax ; X86-NEXT: imull %eax, %ebx ; X86-NEXT: addl %ecx, %ebx ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload @@ -457,7 +457,7 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-NEXT: movl %esi, %eax -; X86-NEXT: movl 28(%ebp), %ecx +; X86-NEXT: movl 40(%ebp), %ecx ; X86-NEXT: mull %ecx ; X86-NEXT: movl %edx, %edi ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -468,26 +468,26 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: addl %edi, %ecx ; X86-NEXT: adcl $0, %ebx ; X86-NEXT: movl %esi, %eax -; X86-NEXT: mull 32(%ebp) -; X86-NEXT: movl 16(%ebp), %esi +; X86-NEXT: mull 44(%ebp) +; X86-NEXT: movl 28(%ebp), %esi ; X86-NEXT: movl %edx, %edi ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl %ebx, %edi ; X86-NEXT: setb %cl ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: mull 32(%ebp) +; X86-NEXT: mull 44(%ebp) ; X86-NEXT: addl %edi, %eax ; X86-NEXT: movzbl %cl, %ecx ; X86-NEXT: adcl %ecx, %edx ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: movl 12(%ebp), %ebx +; X86-NEXT: movl 24(%ebp), %ebx ; X86-NEXT: subl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT: movl 20(%ebp), %edi +; X86-NEXT: movl 32(%ebp), %edi ; X86-NEXT: sbbl %eax, %edi -; X86-NEXT: movl 24(%ebp), %ecx +; X86-NEXT: movl 36(%ebp), %ecx ; X86-NEXT: sbbl %edx, %ecx ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl %ebx, (%eax) diff --git a/llvm/test/CodeGen/X86/embed-bitcode.ll b/llvm/test/CodeGen/X86/embed-bitcode.ll index 0d66ba8..d4af954 100644 --- a/llvm/test/CodeGen/X86/embed-bitcode.ll +++ b/llvm/test/CodeGen/X86/embed-bitcode.ll @@ -1,10 +1,23 @@ ; RUN: llc -filetype=obj -mtriple=x86_64 %s -o %t ; RUN: llvm-readelf -S %t | FileCheck %s +; RUN: llc -filetype=obj -mtriple=x86_64-pc-windows-msvc %s -o %t +; RUN: llvm-readobj -S %t | FileCheck %s --check-prefix=COFF ; CHECK: .text PROGBITS 0000000000000000 [[#%x,OFF:]] 000000 00 AX 0 ; CHECK-NEXT: .llvmbc PROGBITS 0000000000000000 [[#%x,OFF:]] 000004 00 0 ; CHECK-NEXT: .llvmcmd PROGBITS 0000000000000000 [[#%x,OFF:]] 000005 00 0 +; COFF: Name: .llvmbc (2E 6C 6C 76 6D 62 63 00) +; COFF: Characteristics [ +; COFF-NEXT: IMAGE_SCN_ALIGN_1BYTES +; COFF-NEXT: IMAGE_SCN_MEM_DISCARDABLE +; COFF-NEXT: ] +; COFF: Name: .llvmcmd (2E 6C 6C 76 6D 63 6D 64) +; COFF: Characteristics [ +; COFF-NEXT: IMAGE_SCN_ALIGN_1BYTES +; COFF-NEXT: IMAGE_SCN_MEM_DISCARDABLE +; COFF-NEXT: ] + @llvm.embedded.module = private constant [4 x i8] c"BC\C0\DE", section ".llvmbc", align 1 @llvm.cmdline = private constant [5 x i8] c"-cc1\00", section ".llvmcmd", align 1 @llvm.compiler.used = appending global [2 x ptr] [ptr @llvm.embedded.module, ptr @llvm.cmdline], section "llvm.metadata" diff --git a/llvm/test/CodeGen/X86/fp128-cast-strict.ll b/llvm/test/CodeGen/X86/fp128-cast-strict.ll index 707b05f..bb5640a 100644 --- a/llvm/test/CodeGen/X86/fp128-cast-strict.ll +++ b/llvm/test/CodeGen/X86/fp128-cast-strict.ll @@ -481,18 +481,21 @@ define i128 @fptosi_i128(fp128 %x) nounwind strictfp { ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $20, %esp +; X86-NEXT: subl $52, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll __fixtfti -; X86-NEXT: addl $28, %esp -; X86-NEXT: movl (%esp), %eax +; X86-NEXT: subl $4, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -501,7 +504,7 @@ define i128 @fptosi_i128(fp128 %x) nounwind strictfp { ; X86-NEXT: movl %eax, (%esi) ; X86-NEXT: movl %ecx, 4(%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $20, %esp +; X86-NEXT: addl $52, %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: retl $4 @@ -620,18 +623,21 @@ define i128 @fptoui_i128(fp128 %x) nounwind strictfp { ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $20, %esp +; X86-NEXT: subl $52, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll __fixunstfti -; X86-NEXT: addl $28, %esp -; X86-NEXT: movl (%esp), %eax +; X86-NEXT: subl $4, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -640,7 +646,7 @@ define i128 @fptoui_i128(fp128 %x) nounwind strictfp { ; X86-NEXT: movl %eax, (%esi) ; X86-NEXT: movl %ecx, 4(%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $20, %esp +; X86-NEXT: addl $52, %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: retl $4 @@ -818,18 +824,21 @@ define fp128 @sitofp_i128(i128 %x) nounwind strictfp { ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $20, %esp +; X86-NEXT: subl $52, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll __floattitf -; X86-NEXT: addl $28, %esp -; X86-NEXT: movl (%esp), %eax +; X86-NEXT: subl $4, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -838,7 +847,7 @@ define fp128 @sitofp_i128(i128 %x) nounwind strictfp { ; X86-NEXT: movl %eax, (%esi) ; X86-NEXT: movl %ecx, 4(%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $20, %esp +; X86-NEXT: addl $52, %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: retl $4 @@ -1016,18 +1025,21 @@ define fp128 @uitofp_i128(i128 %x) nounwind strictfp { ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $20, %esp +; X86-NEXT: subl $52, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll __floatuntitf -; X86-NEXT: addl $28, %esp -; X86-NEXT: movl (%esp), %eax +; X86-NEXT: subl $4, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -1036,7 +1048,7 @@ define fp128 @uitofp_i128(i128 %x) nounwind strictfp { ; X86-NEXT: movl %eax, (%esi) ; X86-NEXT: movl %ecx, 4(%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $20, %esp +; X86-NEXT: addl $52, %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: retl $4 diff --git a/llvm/test/CodeGen/X86/fp128-cast.ll b/llvm/test/CodeGen/X86/fp128-cast.ll index 1de2484..6d4ec06 100644 --- a/llvm/test/CodeGen/X86/fp128-cast.ll +++ b/llvm/test/CodeGen/X86/fp128-cast.ll @@ -415,16 +415,20 @@ define dso_local void @TestFPToSIF128_I128() nounwind { ; X86-LABEL: TestFPToSIF128_I128: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %esi -; X86-NEXT: subl $36, %esp +; X86-NEXT: subl $56, %esp +; X86-NEXT: movl vf128, %eax +; X86-NEXT: movl vf128+4, %ecx +; X86-NEXT: movl vf128+8, %edx +; X86-NEXT: movl vf128+12, %esi +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl vf128+12 -; X86-NEXT: pushl vf128+8 -; X86-NEXT: pushl vf128+4 -; X86-NEXT: pushl vf128 -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll __fixtfti -; X86-NEXT: addl $28, %esp -; X86-NEXT: movl (%esp), %eax +; X86-NEXT: subl $4, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -432,7 +436,7 @@ define dso_local void @TestFPToSIF128_I128() nounwind { ; X86-NEXT: movl %edx, vi128+8 ; X86-NEXT: movl %ecx, vi128+4 ; X86-NEXT: movl %eax, vi128 -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $56, %esp ; X86-NEXT: popl %esi ; X86-NEXT: retl ; @@ -466,16 +470,20 @@ define dso_local void @TestFPToUIF128_U128() nounwind { ; X86-LABEL: TestFPToUIF128_U128: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %esi -; X86-NEXT: subl $36, %esp +; X86-NEXT: subl $56, %esp +; X86-NEXT: movl vf128, %eax +; X86-NEXT: movl vf128+4, %ecx +; X86-NEXT: movl vf128+8, %edx +; X86-NEXT: movl vf128+12, %esi +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl vf128+12 -; X86-NEXT: pushl vf128+8 -; X86-NEXT: pushl vf128+4 -; X86-NEXT: pushl vf128 -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll __fixunstfti -; X86-NEXT: addl $28, %esp -; X86-NEXT: movl (%esp), %eax +; X86-NEXT: subl $4, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -483,7 +491,7 @@ define dso_local void @TestFPToUIF128_U128() nounwind { ; X86-NEXT: movl %edx, vu128+8 ; X86-NEXT: movl %ecx, vu128+4 ; X86-NEXT: movl %eax, vu128 -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $56, %esp ; X86-NEXT: popl %esi ; X86-NEXT: retl ; @@ -913,16 +921,20 @@ define dso_local void @TestSIToFPI128_F128() nounwind { ; X86-LABEL: TestSIToFPI128_F128: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %esi -; X86-NEXT: subl $36, %esp +; X86-NEXT: subl $56, %esp +; X86-NEXT: movl vi128, %eax +; X86-NEXT: movl vi128+4, %ecx +; X86-NEXT: movl vi128+8, %edx +; X86-NEXT: movl vi128+12, %esi +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl vi128+12 -; X86-NEXT: pushl vi128+8 -; X86-NEXT: pushl vi128+4 -; X86-NEXT: pushl vi128 -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll __floattitf -; X86-NEXT: addl $28, %esp -; X86-NEXT: movl (%esp), %eax +; X86-NEXT: subl $4, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -930,7 +942,7 @@ define dso_local void @TestSIToFPI128_F128() nounwind { ; X86-NEXT: movl %edx, vf128+8 ; X86-NEXT: movl %ecx, vf128+4 ; X86-NEXT: movl %eax, vf128 -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $56, %esp ; X86-NEXT: popl %esi ; X86-NEXT: retl ; @@ -964,16 +976,20 @@ define dso_local void @TestUIToFPU128_F128() #2 { ; X86-LABEL: TestUIToFPU128_F128: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %esi -; X86-NEXT: subl $36, %esp +; X86-NEXT: subl $56, %esp +; X86-NEXT: movl vu128, %eax +; X86-NEXT: movl vu128+4, %ecx +; X86-NEXT: movl vu128+8, %edx +; X86-NEXT: movl vu128+12, %esi +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl vu128+12 -; X86-NEXT: pushl vu128+8 -; X86-NEXT: pushl vu128+4 -; X86-NEXT: pushl vu128 -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll __floatuntitf -; X86-NEXT: addl $28, %esp -; X86-NEXT: movl (%esp), %eax +; X86-NEXT: subl $4, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -981,7 +997,7 @@ define dso_local void @TestUIToFPU128_F128() #2 { ; X86-NEXT: movl %edx, vf128+8 ; X86-NEXT: movl %ecx, vf128+4 ; X86-NEXT: movl %eax, vf128 -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $56, %esp ; X86-NEXT: popl %esi ; X86-NEXT: retl ; @@ -1134,33 +1150,30 @@ define dso_local i32 @TestBits128(fp128 %ld) nounwind { ; ; X86-LABEL: TestBits128: ; X86: # %bb.0: # %entry -; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $20, %esp +; X86-NEXT: subl $72, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: subl $12, %esp -; X86-NEXT: leal {{[0-9]+}}(%esp), %edx -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl %eax -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl %eax -; X86-NEXT: pushl %edx +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll __multf3 -; X86-NEXT: addl $44, %esp +; X86-NEXT: subl $4, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: orl (%esp), %ecx +; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: sete %al -; X86-NEXT: addl $20, %esp +; X86-NEXT: addl $72, %esp ; X86-NEXT: popl %esi -; X86-NEXT: popl %edi ; X86-NEXT: retl ; ; X64-AVX-LABEL: TestBits128: @@ -1359,12 +1372,14 @@ define i1 @PR34866(i128 %x) nounwind { ; ; X86-LABEL: PR34866: ; X86: # %bb.0: +; X86-NEXT: subl $12, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax ; X86-NEXT: orl %ecx, %eax ; X86-NEXT: sete %al +; X86-NEXT: addl $12, %esp ; X86-NEXT: retl ; ; X64-AVX-LABEL: PR34866: @@ -1394,12 +1409,14 @@ define i1 @PR34866_commute(i128 %x) nounwind { ; ; X86-LABEL: PR34866_commute: ; X86: # %bb.0: +; X86-NEXT: subl $12, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax ; X86-NEXT: orl %ecx, %eax ; X86-NEXT: sete %al +; X86-NEXT: addl $12, %esp ; X86-NEXT: retl ; ; X64-AVX-LABEL: PR34866_commute: diff --git a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll index a7eea04..ad2d690 100644 --- a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll +++ b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll @@ -41,27 +41,40 @@ define fp128 @add(fp128 %x, fp128 %y) nounwind strictfp { ; ; X86-LABEL: add: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $76, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll __addtf3 -; X86-NEXT: addl $44, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $76, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 ; ; WIN-LABEL: add: @@ -81,24 +94,32 @@ define fp128 @add(fp128 %x, fp128 %y) nounwind strictfp { ; WIN-X86: # %bb.0: # %entry ; WIN-X86-NEXT: pushl %ebp ; WIN-X86-NEXT: movl %esp, %ebp +; WIN-X86-NEXT: pushl %ebx ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $80, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 40(%ebp) -; WIN-X86-NEXT: pushl 36(%ebp) -; WIN-X86-NEXT: pushl 32(%ebp) -; WIN-X86-NEXT: pushl 28(%ebp) -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl 40(%ebp), %ebx +; WIN-X86-NEXT: movl 44(%ebp), %edx +; WIN-X86-NEXT: movl 48(%ebp), %ecx +; WIN-X86-NEXT: movl 52(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 32(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 28(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll ___addtf3 -; WIN-X86-NEXT: addl $36, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -107,9 +128,10 @@ define fp128 @add(fp128 %x, fp128 %y) nounwind strictfp { ; WIN-X86-NEXT: movl %eax, (%esi) ; WIN-X86-NEXT: movl %ecx, 4(%esi) ; WIN-X86-NEXT: movl %esi, %eax -; WIN-X86-NEXT: leal -8(%ebp), %esp +; WIN-X86-NEXT: leal -12(%ebp), %esp ; WIN-X86-NEXT: popl %esi ; WIN-X86-NEXT: popl %edi +; WIN-X86-NEXT: popl %ebx ; WIN-X86-NEXT: popl %ebp ; WIN-X86-NEXT: retl entry: @@ -141,27 +163,40 @@ define fp128 @sub(fp128 %x, fp128 %y) nounwind strictfp { ; ; X86-LABEL: sub: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $76, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll __subtf3 -; X86-NEXT: addl $44, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $76, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 ; ; WIN-LABEL: sub: @@ -181,24 +216,32 @@ define fp128 @sub(fp128 %x, fp128 %y) nounwind strictfp { ; WIN-X86: # %bb.0: # %entry ; WIN-X86-NEXT: pushl %ebp ; WIN-X86-NEXT: movl %esp, %ebp +; WIN-X86-NEXT: pushl %ebx ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $80, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 40(%ebp) -; WIN-X86-NEXT: pushl 36(%ebp) -; WIN-X86-NEXT: pushl 32(%ebp) -; WIN-X86-NEXT: pushl 28(%ebp) -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl 40(%ebp), %ebx +; WIN-X86-NEXT: movl 44(%ebp), %edx +; WIN-X86-NEXT: movl 48(%ebp), %ecx +; WIN-X86-NEXT: movl 52(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 32(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 28(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll ___subtf3 -; WIN-X86-NEXT: addl $36, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -207,9 +250,10 @@ define fp128 @sub(fp128 %x, fp128 %y) nounwind strictfp { ; WIN-X86-NEXT: movl %eax, (%esi) ; WIN-X86-NEXT: movl %ecx, 4(%esi) ; WIN-X86-NEXT: movl %esi, %eax -; WIN-X86-NEXT: leal -8(%ebp), %esp +; WIN-X86-NEXT: leal -12(%ebp), %esp ; WIN-X86-NEXT: popl %esi ; WIN-X86-NEXT: popl %edi +; WIN-X86-NEXT: popl %ebx ; WIN-X86-NEXT: popl %ebp ; WIN-X86-NEXT: retl entry: @@ -241,27 +285,40 @@ define fp128 @mul(fp128 %x, fp128 %y) nounwind strictfp { ; ; X86-LABEL: mul: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $76, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll __multf3 -; X86-NEXT: addl $44, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $76, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 ; ; WIN-LABEL: mul: @@ -281,24 +338,32 @@ define fp128 @mul(fp128 %x, fp128 %y) nounwind strictfp { ; WIN-X86: # %bb.0: # %entry ; WIN-X86-NEXT: pushl %ebp ; WIN-X86-NEXT: movl %esp, %ebp +; WIN-X86-NEXT: pushl %ebx ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $80, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 40(%ebp) -; WIN-X86-NEXT: pushl 36(%ebp) -; WIN-X86-NEXT: pushl 32(%ebp) -; WIN-X86-NEXT: pushl 28(%ebp) -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl 40(%ebp), %ebx +; WIN-X86-NEXT: movl 44(%ebp), %edx +; WIN-X86-NEXT: movl 48(%ebp), %ecx +; WIN-X86-NEXT: movl 52(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 32(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 28(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll ___multf3 -; WIN-X86-NEXT: addl $36, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -307,9 +372,10 @@ define fp128 @mul(fp128 %x, fp128 %y) nounwind strictfp { ; WIN-X86-NEXT: movl %eax, (%esi) ; WIN-X86-NEXT: movl %ecx, 4(%esi) ; WIN-X86-NEXT: movl %esi, %eax -; WIN-X86-NEXT: leal -8(%ebp), %esp +; WIN-X86-NEXT: leal -12(%ebp), %esp ; WIN-X86-NEXT: popl %esi ; WIN-X86-NEXT: popl %edi +; WIN-X86-NEXT: popl %ebx ; WIN-X86-NEXT: popl %ebp ; WIN-X86-NEXT: retl entry: @@ -341,27 +407,40 @@ define fp128 @div(fp128 %x, fp128 %y) nounwind strictfp { ; ; X86-LABEL: div: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $76, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll __divtf3 -; X86-NEXT: addl $44, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $76, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 ; ; WIN-LABEL: div: @@ -381,24 +460,32 @@ define fp128 @div(fp128 %x, fp128 %y) nounwind strictfp { ; WIN-X86: # %bb.0: # %entry ; WIN-X86-NEXT: pushl %ebp ; WIN-X86-NEXT: movl %esp, %ebp +; WIN-X86-NEXT: pushl %ebx ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $80, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 40(%ebp) -; WIN-X86-NEXT: pushl 36(%ebp) -; WIN-X86-NEXT: pushl 32(%ebp) -; WIN-X86-NEXT: pushl 28(%ebp) -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl 40(%ebp), %ebx +; WIN-X86-NEXT: movl 44(%ebp), %edx +; WIN-X86-NEXT: movl 48(%ebp), %ecx +; WIN-X86-NEXT: movl 52(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 32(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 28(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll ___divtf3 -; WIN-X86-NEXT: addl $36, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -407,9 +494,10 @@ define fp128 @div(fp128 %x, fp128 %y) nounwind strictfp { ; WIN-X86-NEXT: movl %eax, (%esi) ; WIN-X86-NEXT: movl %ecx, 4(%esi) ; WIN-X86-NEXT: movl %esi, %eax -; WIN-X86-NEXT: leal -8(%ebp), %esp +; WIN-X86-NEXT: leal -12(%ebp), %esp ; WIN-X86-NEXT: popl %esi ; WIN-X86-NEXT: popl %edi +; WIN-X86-NEXT: popl %ebx ; WIN-X86-NEXT: popl %ebp ; WIN-X86-NEXT: retl entry: @@ -434,31 +522,48 @@ define fp128 @fma(fp128 %x, fp128 %y, fp128 %z) nounwind strictfp { ; ; X86-LABEL: fma: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $92, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll fmaf128 -; X86-NEXT: addl $60, %esp -; X86-NEXT: movaps (%esp), %xmm0 -; X86-NEXT: movaps %xmm0, (%esi) -; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: movaps %xmm0, (%ebp) +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: addl $92, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 ; ; WIN-LABEL: fma: @@ -481,28 +586,40 @@ define fp128 @fma(fp128 %x, fp128 %y, fp128 %z) nounwind strictfp { ; WIN-X86: # %bb.0: # %entry ; WIN-X86-NEXT: pushl %ebp ; WIN-X86-NEXT: movl %esp, %ebp +; WIN-X86-NEXT: pushl %ebx ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $96, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 56(%ebp) -; WIN-X86-NEXT: pushl 52(%ebp) -; WIN-X86-NEXT: pushl 48(%ebp) -; WIN-X86-NEXT: pushl 44(%ebp) -; WIN-X86-NEXT: pushl 40(%ebp) -; WIN-X86-NEXT: pushl 36(%ebp) -; WIN-X86-NEXT: pushl 32(%ebp) -; WIN-X86-NEXT: pushl 28(%ebp) -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 52(%ebp), %ebx +; WIN-X86-NEXT: movl 56(%ebp), %edi +; WIN-X86-NEXT: movl 60(%ebp), %edx +; WIN-X86-NEXT: movl 64(%ebp), %ecx +; WIN-X86-NEXT: movl 68(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 48(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 44(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 40(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 36(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 32(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 28(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _fmal -; WIN-X86-NEXT: addl $52, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -511,9 +628,10 @@ define fp128 @fma(fp128 %x, fp128 %y, fp128 %z) nounwind strictfp { ; WIN-X86-NEXT: movl %eax, (%esi) ; WIN-X86-NEXT: movl %ecx, 4(%esi) ; WIN-X86-NEXT: movl %esi, %eax -; WIN-X86-NEXT: leal -8(%ebp), %esp +; WIN-X86-NEXT: leal -12(%ebp), %esp ; WIN-X86-NEXT: popl %esi ; WIN-X86-NEXT: popl %edi +; WIN-X86-NEXT: popl %ebx ; WIN-X86-NEXT: popl %ebp ; WIN-X86-NEXT: retl entry: @@ -538,27 +656,40 @@ define fp128 @frem(fp128 %x, fp128 %y) nounwind strictfp { ; ; X86-LABEL: frem: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $76, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll fmodf128 -; X86-NEXT: addl $44, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $76, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 ; ; WIN-LABEL: frem: @@ -578,24 +709,32 @@ define fp128 @frem(fp128 %x, fp128 %y) nounwind strictfp { ; WIN-X86: # %bb.0: # %entry ; WIN-X86-NEXT: pushl %ebp ; WIN-X86-NEXT: movl %esp, %ebp +; WIN-X86-NEXT: pushl %ebx ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $80, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 40(%ebp) -; WIN-X86-NEXT: pushl 36(%ebp) -; WIN-X86-NEXT: pushl 32(%ebp) -; WIN-X86-NEXT: pushl 28(%ebp) -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl 40(%ebp), %ebx +; WIN-X86-NEXT: movl 44(%ebp), %edx +; WIN-X86-NEXT: movl 48(%ebp), %ecx +; WIN-X86-NEXT: movl 52(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 32(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 28(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _fmodl -; WIN-X86-NEXT: addl $36, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -604,9 +743,10 @@ define fp128 @frem(fp128 %x, fp128 %y) nounwind strictfp { ; WIN-X86-NEXT: movl %eax, (%esi) ; WIN-X86-NEXT: movl %ecx, 4(%esi) ; WIN-X86-NEXT: movl %esi, %eax -; WIN-X86-NEXT: leal -8(%ebp), %esp +; WIN-X86-NEXT: leal -12(%ebp), %esp ; WIN-X86-NEXT: popl %esi ; WIN-X86-NEXT: popl %edi +; WIN-X86-NEXT: popl %ebx ; WIN-X86-NEXT: popl %ebp ; WIN-X86-NEXT: retl entry: @@ -631,23 +771,28 @@ define fp128 @ceil(fp128 %x) nounwind strictfp { ; ; X86-LABEL: ceil: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $52, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll ceilf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $52, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi ; X86-NEXT: retl $4 ; ; WIN-LABEL: ceil: @@ -667,17 +812,20 @@ define fp128 @ceil(fp128 %x) nounwind strictfp { ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $48, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl 28(%ebp), %ecx +; WIN-X86-NEXT: movl 32(%ebp), %edx +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _ceill -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -713,23 +861,28 @@ define fp128 @acos(fp128 %x) nounwind strictfp { ; ; X86-LABEL: acos: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $52, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll acosf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $52, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi ; X86-NEXT: retl $4 ; ; WIN-LABEL: acos: @@ -749,17 +902,20 @@ define fp128 @acos(fp128 %x) nounwind strictfp { ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $48, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl 28(%ebp), %ecx +; WIN-X86-NEXT: movl 32(%ebp), %edx +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _acosl -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -795,23 +951,28 @@ define fp128 @cos(fp128 %x) nounwind strictfp { ; ; X86-LABEL: cos: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $52, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll cosf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $52, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi ; X86-NEXT: retl $4 ; ; WIN-LABEL: cos: @@ -831,17 +992,20 @@ define fp128 @cos(fp128 %x) nounwind strictfp { ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $48, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl 28(%ebp), %ecx +; WIN-X86-NEXT: movl 32(%ebp), %edx +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _cosl -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -877,23 +1041,28 @@ define fp128 @cosh(fp128 %x) nounwind strictfp { ; ; X86-LABEL: cosh: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $52, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll coshf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $52, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi ; X86-NEXT: retl $4 ; ; WIN-LABEL: cosh: @@ -913,17 +1082,20 @@ define fp128 @cosh(fp128 %x) nounwind strictfp { ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $48, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl 28(%ebp), %ecx +; WIN-X86-NEXT: movl 32(%ebp), %edx +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _coshl -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -959,23 +1131,28 @@ define fp128 @exp(fp128 %x) nounwind strictfp { ; ; X86-LABEL: exp: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $52, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll expf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $52, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi ; X86-NEXT: retl $4 ; ; WIN-LABEL: exp: @@ -995,17 +1172,20 @@ define fp128 @exp(fp128 %x) nounwind strictfp { ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $48, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl 28(%ebp), %ecx +; WIN-X86-NEXT: movl 32(%ebp), %edx +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _expl -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -1041,23 +1221,28 @@ define fp128 @exp2(fp128 %x) nounwind strictfp { ; ; X86-LABEL: exp2: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $52, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll exp2f128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $52, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi ; X86-NEXT: retl $4 ; ; WIN-LABEL: exp2: @@ -1077,17 +1262,20 @@ define fp128 @exp2(fp128 %x) nounwind strictfp { ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $48, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl 28(%ebp), %ecx +; WIN-X86-NEXT: movl 32(%ebp), %edx +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _exp2l -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -1123,23 +1311,28 @@ define fp128 @floor(fp128 %x) nounwind strictfp { ; ; X86-LABEL: floor: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $52, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll floorf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $52, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi ; X86-NEXT: retl $4 ; ; WIN-LABEL: floor: @@ -1159,17 +1352,20 @@ define fp128 @floor(fp128 %x) nounwind strictfp { ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $48, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl 28(%ebp), %ecx +; WIN-X86-NEXT: movl 32(%ebp), %edx +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _floorl -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -1205,23 +1401,28 @@ define fp128 @log(fp128 %x) nounwind strictfp { ; ; X86-LABEL: log: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $52, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll logf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $52, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi ; X86-NEXT: retl $4 ; ; WIN-LABEL: log: @@ -1241,17 +1442,20 @@ define fp128 @log(fp128 %x) nounwind strictfp { ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $48, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl 28(%ebp), %ecx +; WIN-X86-NEXT: movl 32(%ebp), %edx +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _logl -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -1287,23 +1491,28 @@ define fp128 @log10(fp128 %x) nounwind strictfp { ; ; X86-LABEL: log10: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $52, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll log10f128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $52, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi ; X86-NEXT: retl $4 ; ; WIN-LABEL: log10: @@ -1323,17 +1532,20 @@ define fp128 @log10(fp128 %x) nounwind strictfp { ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $48, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl 28(%ebp), %ecx +; WIN-X86-NEXT: movl 32(%ebp), %edx +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _log10l -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -1369,23 +1581,28 @@ define fp128 @log2(fp128 %x) nounwind strictfp { ; ; X86-LABEL: log2: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $52, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll log2f128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $52, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi ; X86-NEXT: retl $4 ; ; WIN-LABEL: log2: @@ -1405,17 +1622,20 @@ define fp128 @log2(fp128 %x) nounwind strictfp { ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $48, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl 28(%ebp), %ecx +; WIN-X86-NEXT: movl 32(%ebp), %edx +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _log2l -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -1451,27 +1671,40 @@ define fp128 @maxnum(fp128 %x, fp128 %y) nounwind strictfp { ; ; X86-LABEL: maxnum: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $76, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll fmaxf128 -; X86-NEXT: addl $44, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $76, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 ; ; WIN-LABEL: maxnum: @@ -1491,24 +1724,32 @@ define fp128 @maxnum(fp128 %x, fp128 %y) nounwind strictfp { ; WIN-X86: # %bb.0: # %entry ; WIN-X86-NEXT: pushl %ebp ; WIN-X86-NEXT: movl %esp, %ebp +; WIN-X86-NEXT: pushl %ebx ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $80, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 40(%ebp) -; WIN-X86-NEXT: pushl 36(%ebp) -; WIN-X86-NEXT: pushl 32(%ebp) -; WIN-X86-NEXT: pushl 28(%ebp) -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl 40(%ebp), %ebx +; WIN-X86-NEXT: movl 44(%ebp), %edx +; WIN-X86-NEXT: movl 48(%ebp), %ecx +; WIN-X86-NEXT: movl 52(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 32(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 28(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _fmaxl -; WIN-X86-NEXT: addl $36, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -1517,9 +1758,10 @@ define fp128 @maxnum(fp128 %x, fp128 %y) nounwind strictfp { ; WIN-X86-NEXT: movl %eax, (%esi) ; WIN-X86-NEXT: movl %ecx, 4(%esi) ; WIN-X86-NEXT: movl %esi, %eax -; WIN-X86-NEXT: leal -8(%ebp), %esp +; WIN-X86-NEXT: leal -12(%ebp), %esp ; WIN-X86-NEXT: popl %esi ; WIN-X86-NEXT: popl %edi +; WIN-X86-NEXT: popl %ebx ; WIN-X86-NEXT: popl %ebp ; WIN-X86-NEXT: retl entry: @@ -1544,27 +1786,40 @@ define fp128 @minnum(fp128 %x, fp128 %y) nounwind strictfp { ; ; X86-LABEL: minnum: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $76, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll fminf128 -; X86-NEXT: addl $44, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $76, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 ; ; WIN-LABEL: minnum: @@ -1584,24 +1839,32 @@ define fp128 @minnum(fp128 %x, fp128 %y) nounwind strictfp { ; WIN-X86: # %bb.0: # %entry ; WIN-X86-NEXT: pushl %ebp ; WIN-X86-NEXT: movl %esp, %ebp +; WIN-X86-NEXT: pushl %ebx ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $80, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 40(%ebp) -; WIN-X86-NEXT: pushl 36(%ebp) -; WIN-X86-NEXT: pushl 32(%ebp) -; WIN-X86-NEXT: pushl 28(%ebp) -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl 40(%ebp), %ebx +; WIN-X86-NEXT: movl 44(%ebp), %edx +; WIN-X86-NEXT: movl 48(%ebp), %ecx +; WIN-X86-NEXT: movl 52(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 32(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 28(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _fminl -; WIN-X86-NEXT: addl $36, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -1610,9 +1873,10 @@ define fp128 @minnum(fp128 %x, fp128 %y) nounwind strictfp { ; WIN-X86-NEXT: movl %eax, (%esi) ; WIN-X86-NEXT: movl %ecx, 4(%esi) ; WIN-X86-NEXT: movl %esi, %eax -; WIN-X86-NEXT: leal -8(%ebp), %esp +; WIN-X86-NEXT: leal -12(%ebp), %esp ; WIN-X86-NEXT: popl %esi ; WIN-X86-NEXT: popl %edi +; WIN-X86-NEXT: popl %ebx ; WIN-X86-NEXT: popl %ebp ; WIN-X86-NEXT: retl entry: @@ -1637,23 +1901,28 @@ define fp128 @nearbyint(fp128 %x) nounwind strictfp { ; ; X86-LABEL: nearbyint: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $52, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll nearbyintf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $52, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi ; X86-NEXT: retl $4 ; ; WIN-LABEL: nearbyint: @@ -1673,17 +1942,20 @@ define fp128 @nearbyint(fp128 %x) nounwind strictfp { ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $48, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl 28(%ebp), %ecx +; WIN-X86-NEXT: movl 32(%ebp), %edx +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _nearbyintl -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -1719,27 +1991,40 @@ define fp128 @pow(fp128 %x, fp128 %y) nounwind strictfp { ; ; X86-LABEL: pow: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $76, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll powf128 -; X86-NEXT: addl $44, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $76, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 ; ; WIN-LABEL: pow: @@ -1759,24 +2044,32 @@ define fp128 @pow(fp128 %x, fp128 %y) nounwind strictfp { ; WIN-X86: # %bb.0: # %entry ; WIN-X86-NEXT: pushl %ebp ; WIN-X86-NEXT: movl %esp, %ebp +; WIN-X86-NEXT: pushl %ebx ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $80, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 40(%ebp) -; WIN-X86-NEXT: pushl 36(%ebp) -; WIN-X86-NEXT: pushl 32(%ebp) -; WIN-X86-NEXT: pushl 28(%ebp) -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl 40(%ebp), %ebx +; WIN-X86-NEXT: movl 44(%ebp), %edx +; WIN-X86-NEXT: movl 48(%ebp), %ecx +; WIN-X86-NEXT: movl 52(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 32(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 28(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _powl -; WIN-X86-NEXT: addl $36, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -1785,9 +2078,10 @@ define fp128 @pow(fp128 %x, fp128 %y) nounwind strictfp { ; WIN-X86-NEXT: movl %eax, (%esi) ; WIN-X86-NEXT: movl %ecx, 4(%esi) ; WIN-X86-NEXT: movl %esi, %eax -; WIN-X86-NEXT: leal -8(%ebp), %esp +; WIN-X86-NEXT: leal -12(%ebp), %esp ; WIN-X86-NEXT: popl %esi ; WIN-X86-NEXT: popl %edi +; WIN-X86-NEXT: popl %ebx ; WIN-X86-NEXT: popl %ebp ; WIN-X86-NEXT: retl entry: @@ -1819,24 +2113,32 @@ define fp128 @powi(fp128 %x, i32 %y) nounwind strictfp { ; ; X86-LABEL: powi: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $64, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $8, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll __powitf2 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $64, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx ; X86-NEXT: retl $4 ; ; WIN-LABEL: powi: @@ -1853,21 +2155,26 @@ define fp128 @powi(fp128 %x, i32 %y) nounwind strictfp { ; WIN-X86: # %bb.0: # %entry ; WIN-X86-NEXT: pushl %ebp ; WIN-X86-NEXT: movl %esp, %ebp +; WIN-X86-NEXT: pushl %ebx ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $80, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 28(%ebp) -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl 28(%ebp), %ecx +; WIN-X86-NEXT: movl 32(%ebp), %edx +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl 40(%ebp), %ebx +; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll ___powitf2 -; WIN-X86-NEXT: addl $24, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -1876,9 +2183,10 @@ define fp128 @powi(fp128 %x, i32 %y) nounwind strictfp { ; WIN-X86-NEXT: movl %eax, (%esi) ; WIN-X86-NEXT: movl %ecx, 4(%esi) ; WIN-X86-NEXT: movl %esi, %eax -; WIN-X86-NEXT: leal -8(%ebp), %esp +; WIN-X86-NEXT: leal -12(%ebp), %esp ; WIN-X86-NEXT: popl %esi ; WIN-X86-NEXT: popl %edi +; WIN-X86-NEXT: popl %ebx ; WIN-X86-NEXT: popl %ebp ; WIN-X86-NEXT: retl entry: @@ -1903,23 +2211,28 @@ define fp128 @rint(fp128 %x) nounwind strictfp { ; ; X86-LABEL: rint: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $52, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll rintf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $52, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi ; X86-NEXT: retl $4 ; ; WIN-LABEL: rint: @@ -1939,17 +2252,20 @@ define fp128 @rint(fp128 %x) nounwind strictfp { ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $48, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl 28(%ebp), %ecx +; WIN-X86-NEXT: movl 32(%ebp), %edx +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _rintl -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -1985,23 +2301,28 @@ define fp128 @round(fp128 %x) nounwind strictfp { ; ; X86-LABEL: round: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $52, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll roundf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $52, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi ; X86-NEXT: retl $4 ; ; WIN-LABEL: round: @@ -2021,17 +2342,20 @@ define fp128 @round(fp128 %x) nounwind strictfp { ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $48, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl 28(%ebp), %ecx +; WIN-X86-NEXT: movl 32(%ebp), %edx +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _roundl -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -2067,23 +2391,28 @@ define fp128 @roundeven(fp128 %x) nounwind strictfp { ; ; X86-LABEL: roundeven: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $52, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll roundevenf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $52, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi ; X86-NEXT: retl $4 ; ; WIN-LABEL: roundeven: @@ -2103,17 +2432,20 @@ define fp128 @roundeven(fp128 %x) nounwind strictfp { ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $48, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl 28(%ebp), %ecx +; WIN-X86-NEXT: movl 32(%ebp), %edx +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _roundevenl -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -2149,23 +2481,28 @@ define fp128 @asin(fp128 %x) nounwind strictfp { ; ; X86-LABEL: asin: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $52, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll asinf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $52, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi ; X86-NEXT: retl $4 ; ; WIN-LABEL: asin: @@ -2185,17 +2522,20 @@ define fp128 @asin(fp128 %x) nounwind strictfp { ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $48, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl 28(%ebp), %ecx +; WIN-X86-NEXT: movl 32(%ebp), %edx +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _asinl -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -2231,23 +2571,28 @@ define fp128 @sin(fp128 %x) nounwind strictfp { ; ; X86-LABEL: sin: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $52, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll sinf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $52, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi ; X86-NEXT: retl $4 ; ; WIN-LABEL: sin: @@ -2267,17 +2612,20 @@ define fp128 @sin(fp128 %x) nounwind strictfp { ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $48, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl 28(%ebp), %ecx +; WIN-X86-NEXT: movl 32(%ebp), %edx +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _sinl -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -2313,23 +2661,28 @@ define fp128 @sinh(fp128 %x) nounwind strictfp { ; ; X86-LABEL: sinh: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $52, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll sinhf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $52, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi ; X86-NEXT: retl $4 ; ; WIN-LABEL: sinh: @@ -2349,17 +2702,20 @@ define fp128 @sinh(fp128 %x) nounwind strictfp { ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $48, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl 28(%ebp), %ecx +; WIN-X86-NEXT: movl 32(%ebp), %edx +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _sinhl -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -2395,23 +2751,28 @@ define fp128 @sqrt(fp128 %x) nounwind strictfp { ; ; X86-LABEL: sqrt: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $52, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll sqrtf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $52, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi ; X86-NEXT: retl $4 ; ; WIN-LABEL: sqrt: @@ -2431,17 +2792,20 @@ define fp128 @sqrt(fp128 %x) nounwind strictfp { ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $48, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl 28(%ebp), %ecx +; WIN-X86-NEXT: movl 32(%ebp), %edx +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _sqrtl -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -2477,23 +2841,28 @@ define fp128 @atan(fp128 %x) nounwind strictfp { ; ; X86-LABEL: atan: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $52, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll atanf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $52, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi ; X86-NEXT: retl $4 ; ; WIN-LABEL: atan: @@ -2513,17 +2882,20 @@ define fp128 @atan(fp128 %x) nounwind strictfp { ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $48, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl 28(%ebp), %ecx +; WIN-X86-NEXT: movl 32(%ebp), %edx +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _atanl -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -2559,27 +2931,40 @@ define fp128 @atan2(fp128 %x, fp128 %y) nounwind strictfp { ; ; X86-LABEL: atan2: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $76, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll atan2f128 -; X86-NEXT: addl $44, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $76, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 ; ; WIN-LABEL: atan2: @@ -2599,24 +2984,32 @@ define fp128 @atan2(fp128 %x, fp128 %y) nounwind strictfp { ; WIN-X86: # %bb.0: # %entry ; WIN-X86-NEXT: pushl %ebp ; WIN-X86-NEXT: movl %esp, %ebp +; WIN-X86-NEXT: pushl %ebx ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $80, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 40(%ebp) -; WIN-X86-NEXT: pushl 36(%ebp) -; WIN-X86-NEXT: pushl 32(%ebp) -; WIN-X86-NEXT: pushl 28(%ebp) -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl 40(%ebp), %ebx +; WIN-X86-NEXT: movl 44(%ebp), %edx +; WIN-X86-NEXT: movl 48(%ebp), %ecx +; WIN-X86-NEXT: movl 52(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 32(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 28(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _atan2l -; WIN-X86-NEXT: addl $36, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -2625,9 +3018,10 @@ define fp128 @atan2(fp128 %x, fp128 %y) nounwind strictfp { ; WIN-X86-NEXT: movl %eax, (%esi) ; WIN-X86-NEXT: movl %ecx, 4(%esi) ; WIN-X86-NEXT: movl %esi, %eax -; WIN-X86-NEXT: leal -8(%ebp), %esp +; WIN-X86-NEXT: leal -12(%ebp), %esp ; WIN-X86-NEXT: popl %esi ; WIN-X86-NEXT: popl %edi +; WIN-X86-NEXT: popl %ebx ; WIN-X86-NEXT: popl %ebp ; WIN-X86-NEXT: retl entry: @@ -2652,23 +3046,28 @@ define fp128 @tan(fp128 %x) nounwind strictfp { ; ; X86-LABEL: tan: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $52, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll tanf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $52, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi ; X86-NEXT: retl $4 ; ; WIN-LABEL: tan: @@ -2688,17 +3087,20 @@ define fp128 @tan(fp128 %x) nounwind strictfp { ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $48, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl 28(%ebp), %ecx +; WIN-X86-NEXT: movl 32(%ebp), %edx +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _tanl -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -2734,23 +3136,28 @@ define fp128 @tanh(fp128 %x) nounwind strictfp { ; ; X86-LABEL: tanh: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $52, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll tanhf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $52, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi ; X86-NEXT: retl $4 ; ; WIN-LABEL: tanh: @@ -2770,17 +3177,20 @@ define fp128 @tanh(fp128 %x) nounwind strictfp { ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $48, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl 28(%ebp), %ecx +; WIN-X86-NEXT: movl 32(%ebp), %edx +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _tanhl -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -2816,23 +3226,28 @@ define fp128 @trunc(fp128 %x) nounwind strictfp { ; ; X86-LABEL: trunc: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $52, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll truncf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $52, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi ; X86-NEXT: retl $4 ; ; WIN-LABEL: trunc: @@ -2852,17 +3267,20 @@ define fp128 @trunc(fp128 %x) nounwind strictfp { ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $48, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl 28(%ebp), %ecx +; WIN-X86-NEXT: movl 32(%ebp), %edx +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _truncl -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -2919,12 +3337,18 @@ define i32 @lrint(fp128 %x) nounwind strictfp { ; ; WIN-X86-LABEL: lrint: ; WIN-X86: # %bb.0: # %entry -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) +; WIN-X86-NEXT: pushl %ebp +; WIN-X86-NEXT: movl %esp, %ebp +; WIN-X86-NEXT: andl $-16, %esp +; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: pushl 20(%ebp) +; WIN-X86-NEXT: pushl 16(%ebp) +; WIN-X86-NEXT: pushl 12(%ebp) +; WIN-X86-NEXT: pushl 8(%ebp) ; WIN-X86-NEXT: calll _lrintl ; WIN-X86-NEXT: addl $16, %esp +; WIN-X86-NEXT: movl %ebp, %esp +; WIN-X86-NEXT: popl %ebp ; WIN-X86-NEXT: retl entry: %rint = call i32 @llvm.experimental.constrained.lrint.i32.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 @@ -2969,12 +3393,18 @@ define i64 @llrint(fp128 %x) nounwind strictfp { ; ; WIN-X86-LABEL: llrint: ; WIN-X86: # %bb.0: # %entry -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) +; WIN-X86-NEXT: pushl %ebp +; WIN-X86-NEXT: movl %esp, %ebp +; WIN-X86-NEXT: andl $-16, %esp +; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: pushl 20(%ebp) +; WIN-X86-NEXT: pushl 16(%ebp) +; WIN-X86-NEXT: pushl 12(%ebp) +; WIN-X86-NEXT: pushl 8(%ebp) ; WIN-X86-NEXT: calll _llrintl ; WIN-X86-NEXT: addl $16, %esp +; WIN-X86-NEXT: movl %ebp, %esp +; WIN-X86-NEXT: popl %ebp ; WIN-X86-NEXT: retl entry: %rint = call i64 @llvm.experimental.constrained.llrint.i64.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 @@ -3019,12 +3449,18 @@ define i32 @lround(fp128 %x) nounwind strictfp { ; ; WIN-X86-LABEL: lround: ; WIN-X86: # %bb.0: # %entry -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) +; WIN-X86-NEXT: pushl %ebp +; WIN-X86-NEXT: movl %esp, %ebp +; WIN-X86-NEXT: andl $-16, %esp +; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: pushl 20(%ebp) +; WIN-X86-NEXT: pushl 16(%ebp) +; WIN-X86-NEXT: pushl 12(%ebp) +; WIN-X86-NEXT: pushl 8(%ebp) ; WIN-X86-NEXT: calll _lroundl ; WIN-X86-NEXT: addl $16, %esp +; WIN-X86-NEXT: movl %ebp, %esp +; WIN-X86-NEXT: popl %ebp ; WIN-X86-NEXT: retl entry: %round = call i32 @llvm.experimental.constrained.lround.i32.f128(fp128 %x, metadata !"fpexcept.strict") #0 @@ -3069,12 +3505,18 @@ define i64 @llround(fp128 %x) nounwind strictfp { ; ; WIN-X86-LABEL: llround: ; WIN-X86: # %bb.0: # %entry -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) +; WIN-X86-NEXT: pushl %ebp +; WIN-X86-NEXT: movl %esp, %ebp +; WIN-X86-NEXT: andl $-16, %esp +; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: pushl 20(%ebp) +; WIN-X86-NEXT: pushl 16(%ebp) +; WIN-X86-NEXT: pushl 12(%ebp) +; WIN-X86-NEXT: pushl 8(%ebp) ; WIN-X86-NEXT: calll _llroundl ; WIN-X86-NEXT: addl $16, %esp +; WIN-X86-NEXT: movl %ebp, %esp +; WIN-X86-NEXT: popl %ebp ; WIN-X86-NEXT: retl entry: %round = call i64 @llvm.experimental.constrained.llround.i64.f128(fp128 %x, metadata !"fpexcept.strict") #0 @@ -3176,26 +3618,32 @@ define i64 @cmp(i64 %a, i64 %b, fp128 %x, fp128 %y) #0 { ; ; WIN-X86-LABEL: cmp: ; WIN-X86: # %bb.0: -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) +; WIN-X86-NEXT: pushl %ebp +; WIN-X86-NEXT: movl %esp, %ebp +; WIN-X86-NEXT: andl $-16, %esp +; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: pushl 52(%ebp) +; WIN-X86-NEXT: pushl 48(%ebp) +; WIN-X86-NEXT: pushl 44(%ebp) +; WIN-X86-NEXT: pushl 40(%ebp) +; WIN-X86-NEXT: pushl 36(%ebp) +; WIN-X86-NEXT: pushl 32(%ebp) +; WIN-X86-NEXT: pushl 28(%ebp) +; WIN-X86-NEXT: pushl 24(%ebp) ; WIN-X86-NEXT: calll ___eqtf2 ; WIN-X86-NEXT: addl $32, %esp ; WIN-X86-NEXT: testl %eax, %eax ; WIN-X86-NEXT: je LBB37_1 ; WIN-X86-NEXT: # %bb.2: -; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %ecx +; WIN-X86-NEXT: leal 16(%ebp), %ecx ; WIN-X86-NEXT: jmp LBB37_3 ; WIN-X86-NEXT: LBB37_1: -; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %ecx +; WIN-X86-NEXT: leal 8(%ebp), %ecx ; WIN-X86-NEXT: LBB37_3: ; WIN-X86-NEXT: movl (%ecx), %eax ; WIN-X86-NEXT: movl 4(%ecx), %edx +; WIN-X86-NEXT: movl %ebp, %esp +; WIN-X86-NEXT: popl %ebp ; WIN-X86-NEXT: retl %cond = call i1 @llvm.experimental.constrained.fcmp.f128( fp128 %x, fp128 %y, @@ -3300,26 +3748,32 @@ define i64 @cmps(i64 %a, i64 %b, fp128 %x, fp128 %y) #0 { ; ; WIN-X86-LABEL: cmps: ; WIN-X86: # %bb.0: -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) +; WIN-X86-NEXT: pushl %ebp +; WIN-X86-NEXT: movl %esp, %ebp +; WIN-X86-NEXT: andl $-16, %esp +; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: pushl 52(%ebp) +; WIN-X86-NEXT: pushl 48(%ebp) +; WIN-X86-NEXT: pushl 44(%ebp) +; WIN-X86-NEXT: pushl 40(%ebp) +; WIN-X86-NEXT: pushl 36(%ebp) +; WIN-X86-NEXT: pushl 32(%ebp) +; WIN-X86-NEXT: pushl 28(%ebp) +; WIN-X86-NEXT: pushl 24(%ebp) ; WIN-X86-NEXT: calll ___eqtf2 ; WIN-X86-NEXT: addl $32, %esp ; WIN-X86-NEXT: testl %eax, %eax ; WIN-X86-NEXT: je LBB38_1 ; WIN-X86-NEXT: # %bb.2: -; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %ecx +; WIN-X86-NEXT: leal 16(%ebp), %ecx ; WIN-X86-NEXT: jmp LBB38_3 ; WIN-X86-NEXT: LBB38_1: -; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %ecx +; WIN-X86-NEXT: leal 8(%ebp), %ecx ; WIN-X86-NEXT: LBB38_3: ; WIN-X86-NEXT: movl (%ecx), %eax ; WIN-X86-NEXT: movl 4(%ecx), %edx +; WIN-X86-NEXT: movl %ebp, %esp +; WIN-X86-NEXT: popl %ebp ; WIN-X86-NEXT: retl %cond = call i1 @llvm.experimental.constrained.fcmps.f128( fp128 %x, fp128 %y, @@ -3496,44 +3950,47 @@ define i64 @cmp_ueq_q(i64 %a, i64 %b, fp128 %x, fp128 %y) #0 { ; WIN-X86-LABEL: cmp_ueq_q: ; WIN-X86: # %bb.0: ; WIN-X86-NEXT: pushl %ebp +; WIN-X86-NEXT: movl %esp, %ebp ; WIN-X86-NEXT: pushl %ebx ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi -; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) +; WIN-X86-NEXT: andl $-16, %esp +; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: movl 32(%ebp), %edi +; WIN-X86-NEXT: movl 36(%ebp), %esi +; WIN-X86-NEXT: pushl 52(%ebp) +; WIN-X86-NEXT: pushl 48(%ebp) +; WIN-X86-NEXT: pushl 44(%ebp) +; WIN-X86-NEXT: pushl 40(%ebp) ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: pushl %edi -; WIN-X86-NEXT: pushl %ebp -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) +; WIN-X86-NEXT: pushl 28(%ebp) +; WIN-X86-NEXT: pushl 24(%ebp) ; WIN-X86-NEXT: calll ___eqtf2 ; WIN-X86-NEXT: addl $32, %esp ; WIN-X86-NEXT: testl %eax, %eax ; WIN-X86-NEXT: sete %bl -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) +; WIN-X86-NEXT: pushl 52(%ebp) +; WIN-X86-NEXT: pushl 48(%ebp) +; WIN-X86-NEXT: pushl 44(%ebp) +; WIN-X86-NEXT: pushl 40(%ebp) ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: pushl %edi -; WIN-X86-NEXT: pushl %ebp -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) +; WIN-X86-NEXT: pushl 28(%ebp) +; WIN-X86-NEXT: pushl 24(%ebp) ; WIN-X86-NEXT: calll ___unordtf2 ; WIN-X86-NEXT: addl $32, %esp ; WIN-X86-NEXT: orb %bl, %al ; WIN-X86-NEXT: jne LBB39_1 ; WIN-X86-NEXT: # %bb.2: -; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %ecx +; WIN-X86-NEXT: leal 16(%ebp), %ecx ; WIN-X86-NEXT: jmp LBB39_3 ; WIN-X86-NEXT: LBB39_1: -; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %ecx +; WIN-X86-NEXT: leal 8(%ebp), %ecx ; WIN-X86-NEXT: LBB39_3: ; WIN-X86-NEXT: movl (%ecx), %eax ; WIN-X86-NEXT: movl 4(%ecx), %edx +; WIN-X86-NEXT: leal -12(%ebp), %esp ; WIN-X86-NEXT: popl %esi ; WIN-X86-NEXT: popl %edi ; WIN-X86-NEXT: popl %ebx @@ -3716,32 +4173,34 @@ define i64 @cmp_one_q(i64 %a, i64 %b, fp128 %x, fp128 %y) #0 { ; WIN-X86-LABEL: cmp_one_q: ; WIN-X86: # %bb.0: ; WIN-X86-NEXT: pushl %ebp +; WIN-X86-NEXT: movl %esp, %ebp ; WIN-X86-NEXT: pushl %ebx ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi -; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) +; WIN-X86-NEXT: andl $-16, %esp +; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: movl 32(%ebp), %edi +; WIN-X86-NEXT: movl 36(%ebp), %esi +; WIN-X86-NEXT: pushl 52(%ebp) +; WIN-X86-NEXT: pushl 48(%ebp) +; WIN-X86-NEXT: pushl 44(%ebp) +; WIN-X86-NEXT: pushl 40(%ebp) ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: pushl %edi -; WIN-X86-NEXT: pushl %ebp -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) +; WIN-X86-NEXT: pushl 28(%ebp) +; WIN-X86-NEXT: pushl 24(%ebp) ; WIN-X86-NEXT: calll ___eqtf2 ; WIN-X86-NEXT: addl $32, %esp ; WIN-X86-NEXT: testl %eax, %eax ; WIN-X86-NEXT: setne %bl -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) +; WIN-X86-NEXT: pushl 52(%ebp) +; WIN-X86-NEXT: pushl 48(%ebp) +; WIN-X86-NEXT: pushl 44(%ebp) +; WIN-X86-NEXT: pushl 40(%ebp) ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: pushl %edi -; WIN-X86-NEXT: pushl %ebp -; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp) +; WIN-X86-NEXT: pushl 28(%ebp) +; WIN-X86-NEXT: pushl 24(%ebp) ; WIN-X86-NEXT: calll ___unordtf2 ; WIN-X86-NEXT: addl $32, %esp ; WIN-X86-NEXT: testl %eax, %eax @@ -3749,13 +4208,14 @@ define i64 @cmp_one_q(i64 %a, i64 %b, fp128 %x, fp128 %y) #0 { ; WIN-X86-NEXT: testb %bl, %al ; WIN-X86-NEXT: jne LBB40_1 ; WIN-X86-NEXT: # %bb.2: -; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %ecx +; WIN-X86-NEXT: leal 16(%ebp), %ecx ; WIN-X86-NEXT: jmp LBB40_3 ; WIN-X86-NEXT: LBB40_1: -; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %ecx +; WIN-X86-NEXT: leal 8(%ebp), %ecx ; WIN-X86-NEXT: LBB40_3: ; WIN-X86-NEXT: movl (%ecx), %eax ; WIN-X86-NEXT: movl 4(%ecx), %edx +; WIN-X86-NEXT: leal -12(%ebp), %esp ; WIN-X86-NEXT: popl %esi ; WIN-X86-NEXT: popl %edi ; WIN-X86-NEXT: popl %ebx diff --git a/llvm/test/CodeGen/X86/fp128-libcalls.ll b/llvm/test/CodeGen/X86/fp128-libcalls.ll index f727a79..4b0449f 100644 --- a/llvm/test/CodeGen/X86/fp128-libcalls.ll +++ b/llvm/test/CodeGen/X86/fp128-libcalls.ll @@ -42,22 +42,38 @@ define dso_local void @Test128Add(fp128 %d1, fp128 %d2) nounwind { ; ; X86-LABEL: Test128Add: ; X86: # %bb.0: # %entry -; X86-NEXT: subl $40, %esp +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $76, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll __addtf3 -; X86-NEXT: addl $44, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, vf128 -; X86-NEXT: addl $28, %esp +; X86-NEXT: addl $76, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; WIN-LABEL: Test128Add: @@ -78,22 +94,31 @@ define dso_local void @Test128Add(fp128 %d1, fp128 %d2) nounwind { ; WIN-X86: # %bb.0: # %entry ; WIN-X86-NEXT: pushl %ebp ; WIN-X86-NEXT: movl %esp, %ebp +; WIN-X86-NEXT: pushl %ebx +; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $32, %esp -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 36(%ebp) -; WIN-X86-NEXT: pushl 32(%ebp) -; WIN-X86-NEXT: pushl 28(%ebp) -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl 8(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: subl $80, %esp +; WIN-X86-NEXT: movl 16(%ebp), %edx +; WIN-X86-NEXT: movl 20(%ebp), %esi +; WIN-X86-NEXT: movl 24(%ebp), %edi +; WIN-X86-NEXT: movl 28(%ebp), %ebx +; WIN-X86-NEXT: movl 32(%ebp), %ecx +; WIN-X86-NEXT: movl 36(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 12(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 8(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll ___addtf3 -; WIN-X86-NEXT: addl $36, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -101,8 +126,10 @@ define dso_local void @Test128Add(fp128 %d1, fp128 %d2) nounwind { ; WIN-X86-NEXT: movl %edx, _vf128+8 ; WIN-X86-NEXT: movl %ecx, _vf128+4 ; WIN-X86-NEXT: movl %eax, _vf128 -; WIN-X86-NEXT: leal -4(%ebp), %esp +; WIN-X86-NEXT: leal -12(%ebp), %esp ; WIN-X86-NEXT: popl %esi +; WIN-X86-NEXT: popl %edi +; WIN-X86-NEXT: popl %ebx ; WIN-X86-NEXT: popl %ebp ; WIN-X86-NEXT: retl entry: @@ -144,22 +171,38 @@ define dso_local void @Test128_1Add(fp128 %d1) nounwind { ; ; X86-LABEL: Test128_1Add: ; X86: # %bb.0: # %entry -; X86-NEXT: subl $40, %esp +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $76, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl vf128, %edi +; X86-NEXT: movl vf128+4, %ebx +; X86-NEXT: movl vf128+8, %ebp +; X86-NEXT: movl vf128+12, %eax +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl vf128+12 -; X86-NEXT: pushl vf128+8 -; X86-NEXT: pushl vf128+4 -; X86-NEXT: pushl vf128 -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll __addtf3 -; X86-NEXT: addl $44, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, vf128 -; X86-NEXT: addl $28, %esp +; X86-NEXT: addl $76, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; WIN-LABEL: Test128_1Add: @@ -180,22 +223,31 @@ define dso_local void @Test128_1Add(fp128 %d1) nounwind { ; WIN-X86: # %bb.0: # %entry ; WIN-X86-NEXT: pushl %ebp ; WIN-X86-NEXT: movl %esp, %ebp +; WIN-X86-NEXT: pushl %ebx +; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $32, %esp -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl 8(%ebp) -; WIN-X86-NEXT: pushl _vf128+12 -; WIN-X86-NEXT: pushl _vf128+8 -; WIN-X86-NEXT: pushl _vf128+4 -; WIN-X86-NEXT: pushl _vf128 -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: subl $80, %esp +; WIN-X86-NEXT: movl 16(%ebp), %esi +; WIN-X86-NEXT: movl 20(%ebp), %edi +; WIN-X86-NEXT: movl _vf128, %edx +; WIN-X86-NEXT: movl _vf128+4, %ebx +; WIN-X86-NEXT: movl _vf128+8, %ecx +; WIN-X86-NEXT: movl _vf128+12, %eax +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 12(%ebp), %esi +; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 8(%ebp), %esi +; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll ___addtf3 -; WIN-X86-NEXT: addl $36, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -203,8 +255,10 @@ define dso_local void @Test128_1Add(fp128 %d1) nounwind { ; WIN-X86-NEXT: movl %edx, _vf128+12 ; WIN-X86-NEXT: movl %eax, _vf128 ; WIN-X86-NEXT: movl %ecx, _vf128+4 -; WIN-X86-NEXT: leal -4(%ebp), %esp +; WIN-X86-NEXT: leal -12(%ebp), %esp ; WIN-X86-NEXT: popl %esi +; WIN-X86-NEXT: popl %edi +; WIN-X86-NEXT: popl %ebx ; WIN-X86-NEXT: popl %ebp ; WIN-X86-NEXT: retl entry: @@ -241,22 +295,38 @@ define dso_local void @Test128Sub(fp128 %d1, fp128 %d2) nounwind { ; ; X86-LABEL: Test128Sub: ; X86: # %bb.0: # %entry -; X86-NEXT: subl $40, %esp +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $76, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll __subtf3 -; X86-NEXT: addl $44, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, vf128 -; X86-NEXT: addl $28, %esp +; X86-NEXT: addl $76, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; WIN-LABEL: Test128Sub: @@ -277,22 +347,31 @@ define dso_local void @Test128Sub(fp128 %d1, fp128 %d2) nounwind { ; WIN-X86: # %bb.0: # %entry ; WIN-X86-NEXT: pushl %ebp ; WIN-X86-NEXT: movl %esp, %ebp +; WIN-X86-NEXT: pushl %ebx +; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $32, %esp -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 36(%ebp) -; WIN-X86-NEXT: pushl 32(%ebp) -; WIN-X86-NEXT: pushl 28(%ebp) -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl 8(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: subl $80, %esp +; WIN-X86-NEXT: movl 16(%ebp), %edx +; WIN-X86-NEXT: movl 20(%ebp), %esi +; WIN-X86-NEXT: movl 24(%ebp), %edi +; WIN-X86-NEXT: movl 28(%ebp), %ebx +; WIN-X86-NEXT: movl 32(%ebp), %ecx +; WIN-X86-NEXT: movl 36(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 12(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 8(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll ___subtf3 -; WIN-X86-NEXT: addl $36, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -300,8 +379,10 @@ define dso_local void @Test128Sub(fp128 %d1, fp128 %d2) nounwind { ; WIN-X86-NEXT: movl %edx, _vf128+8 ; WIN-X86-NEXT: movl %ecx, _vf128+4 ; WIN-X86-NEXT: movl %eax, _vf128 -; WIN-X86-NEXT: leal -4(%ebp), %esp +; WIN-X86-NEXT: leal -12(%ebp), %esp ; WIN-X86-NEXT: popl %esi +; WIN-X86-NEXT: popl %edi +; WIN-X86-NEXT: popl %ebx ; WIN-X86-NEXT: popl %ebp ; WIN-X86-NEXT: retl entry: @@ -343,22 +424,38 @@ define dso_local void @Test128_1Sub(fp128 %d1) nounwind { ; ; X86-LABEL: Test128_1Sub: ; X86: # %bb.0: # %entry -; X86-NEXT: subl $40, %esp +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $76, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl vf128, %edi +; X86-NEXT: movl vf128+4, %ebx +; X86-NEXT: movl vf128+8, %ebp +; X86-NEXT: movl vf128+12, %eax +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl vf128+12 -; X86-NEXT: pushl vf128+8 -; X86-NEXT: pushl vf128+4 -; X86-NEXT: pushl vf128 -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll __subtf3 -; X86-NEXT: addl $44, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, vf128 -; X86-NEXT: addl $28, %esp +; X86-NEXT: addl $76, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; WIN-LABEL: Test128_1Sub: @@ -379,22 +476,31 @@ define dso_local void @Test128_1Sub(fp128 %d1) nounwind { ; WIN-X86: # %bb.0: # %entry ; WIN-X86-NEXT: pushl %ebp ; WIN-X86-NEXT: movl %esp, %ebp +; WIN-X86-NEXT: pushl %ebx +; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $32, %esp -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl 8(%ebp) -; WIN-X86-NEXT: pushl _vf128+12 -; WIN-X86-NEXT: pushl _vf128+8 -; WIN-X86-NEXT: pushl _vf128+4 -; WIN-X86-NEXT: pushl _vf128 -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: subl $80, %esp +; WIN-X86-NEXT: movl 16(%ebp), %esi +; WIN-X86-NEXT: movl 20(%ebp), %edi +; WIN-X86-NEXT: movl _vf128, %edx +; WIN-X86-NEXT: movl _vf128+4, %ebx +; WIN-X86-NEXT: movl _vf128+8, %ecx +; WIN-X86-NEXT: movl _vf128+12, %eax +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 12(%ebp), %esi +; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 8(%ebp), %esi +; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll ___subtf3 -; WIN-X86-NEXT: addl $36, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -402,8 +508,10 @@ define dso_local void @Test128_1Sub(fp128 %d1) nounwind { ; WIN-X86-NEXT: movl %edx, _vf128+12 ; WIN-X86-NEXT: movl %eax, _vf128 ; WIN-X86-NEXT: movl %ecx, _vf128+4 -; WIN-X86-NEXT: leal -4(%ebp), %esp +; WIN-X86-NEXT: leal -12(%ebp), %esp ; WIN-X86-NEXT: popl %esi +; WIN-X86-NEXT: popl %edi +; WIN-X86-NEXT: popl %ebx ; WIN-X86-NEXT: popl %ebp ; WIN-X86-NEXT: retl entry: @@ -440,22 +548,38 @@ define dso_local void @Test128Mul(fp128 %d1, fp128 %d2) nounwind { ; ; X86-LABEL: Test128Mul: ; X86: # %bb.0: # %entry -; X86-NEXT: subl $40, %esp +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $76, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll __multf3 -; X86-NEXT: addl $44, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, vf128 -; X86-NEXT: addl $28, %esp +; X86-NEXT: addl $76, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; WIN-LABEL: Test128Mul: @@ -476,22 +600,31 @@ define dso_local void @Test128Mul(fp128 %d1, fp128 %d2) nounwind { ; WIN-X86: # %bb.0: # %entry ; WIN-X86-NEXT: pushl %ebp ; WIN-X86-NEXT: movl %esp, %ebp +; WIN-X86-NEXT: pushl %ebx +; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $32, %esp -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 36(%ebp) -; WIN-X86-NEXT: pushl 32(%ebp) -; WIN-X86-NEXT: pushl 28(%ebp) -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl 8(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: subl $80, %esp +; WIN-X86-NEXT: movl 16(%ebp), %edx +; WIN-X86-NEXT: movl 20(%ebp), %esi +; WIN-X86-NEXT: movl 24(%ebp), %edi +; WIN-X86-NEXT: movl 28(%ebp), %ebx +; WIN-X86-NEXT: movl 32(%ebp), %ecx +; WIN-X86-NEXT: movl 36(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 12(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 8(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll ___multf3 -; WIN-X86-NEXT: addl $36, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -499,8 +632,10 @@ define dso_local void @Test128Mul(fp128 %d1, fp128 %d2) nounwind { ; WIN-X86-NEXT: movl %edx, _vf128+8 ; WIN-X86-NEXT: movl %ecx, _vf128+4 ; WIN-X86-NEXT: movl %eax, _vf128 -; WIN-X86-NEXT: leal -4(%ebp), %esp +; WIN-X86-NEXT: leal -12(%ebp), %esp ; WIN-X86-NEXT: popl %esi +; WIN-X86-NEXT: popl %edi +; WIN-X86-NEXT: popl %ebx ; WIN-X86-NEXT: popl %ebp ; WIN-X86-NEXT: retl entry: @@ -542,22 +677,38 @@ define dso_local void @Test128_1Mul(fp128 %d1) nounwind { ; ; X86-LABEL: Test128_1Mul: ; X86: # %bb.0: # %entry -; X86-NEXT: subl $40, %esp +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $76, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl vf128, %edi +; X86-NEXT: movl vf128+4, %ebx +; X86-NEXT: movl vf128+8, %ebp +; X86-NEXT: movl vf128+12, %eax +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl vf128+12 -; X86-NEXT: pushl vf128+8 -; X86-NEXT: pushl vf128+4 -; X86-NEXT: pushl vf128 -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll __multf3 -; X86-NEXT: addl $44, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, vf128 -; X86-NEXT: addl $28, %esp +; X86-NEXT: addl $76, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; WIN-LABEL: Test128_1Mul: @@ -578,22 +729,31 @@ define dso_local void @Test128_1Mul(fp128 %d1) nounwind { ; WIN-X86: # %bb.0: # %entry ; WIN-X86-NEXT: pushl %ebp ; WIN-X86-NEXT: movl %esp, %ebp +; WIN-X86-NEXT: pushl %ebx +; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $32, %esp -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl 8(%ebp) -; WIN-X86-NEXT: pushl _vf128+12 -; WIN-X86-NEXT: pushl _vf128+8 -; WIN-X86-NEXT: pushl _vf128+4 -; WIN-X86-NEXT: pushl _vf128 -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: subl $80, %esp +; WIN-X86-NEXT: movl 16(%ebp), %esi +; WIN-X86-NEXT: movl 20(%ebp), %edi +; WIN-X86-NEXT: movl _vf128, %edx +; WIN-X86-NEXT: movl _vf128+4, %ebx +; WIN-X86-NEXT: movl _vf128+8, %ecx +; WIN-X86-NEXT: movl _vf128+12, %eax +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 12(%ebp), %esi +; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 8(%ebp), %esi +; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll ___multf3 -; WIN-X86-NEXT: addl $36, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -601,8 +761,10 @@ define dso_local void @Test128_1Mul(fp128 %d1) nounwind { ; WIN-X86-NEXT: movl %edx, _vf128+12 ; WIN-X86-NEXT: movl %eax, _vf128 ; WIN-X86-NEXT: movl %ecx, _vf128+4 -; WIN-X86-NEXT: leal -4(%ebp), %esp +; WIN-X86-NEXT: leal -12(%ebp), %esp ; WIN-X86-NEXT: popl %esi +; WIN-X86-NEXT: popl %edi +; WIN-X86-NEXT: popl %ebx ; WIN-X86-NEXT: popl %ebp ; WIN-X86-NEXT: retl entry: @@ -639,22 +801,38 @@ define dso_local void @Test128Div(fp128 %d1, fp128 %d2) nounwind { ; ; X86-LABEL: Test128Div: ; X86: # %bb.0: # %entry -; X86-NEXT: subl $40, %esp +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $76, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll __divtf3 -; X86-NEXT: addl $44, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, vf128 -; X86-NEXT: addl $28, %esp +; X86-NEXT: addl $76, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; WIN-LABEL: Test128Div: @@ -675,22 +853,31 @@ define dso_local void @Test128Div(fp128 %d1, fp128 %d2) nounwind { ; WIN-X86: # %bb.0: # %entry ; WIN-X86-NEXT: pushl %ebp ; WIN-X86-NEXT: movl %esp, %ebp +; WIN-X86-NEXT: pushl %ebx +; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $32, %esp -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 36(%ebp) -; WIN-X86-NEXT: pushl 32(%ebp) -; WIN-X86-NEXT: pushl 28(%ebp) -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl 8(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: subl $80, %esp +; WIN-X86-NEXT: movl 16(%ebp), %edx +; WIN-X86-NEXT: movl 20(%ebp), %esi +; WIN-X86-NEXT: movl 24(%ebp), %edi +; WIN-X86-NEXT: movl 28(%ebp), %ebx +; WIN-X86-NEXT: movl 32(%ebp), %ecx +; WIN-X86-NEXT: movl 36(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 12(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 8(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll ___divtf3 -; WIN-X86-NEXT: addl $36, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -698,8 +885,10 @@ define dso_local void @Test128Div(fp128 %d1, fp128 %d2) nounwind { ; WIN-X86-NEXT: movl %edx, _vf128+8 ; WIN-X86-NEXT: movl %ecx, _vf128+4 ; WIN-X86-NEXT: movl %eax, _vf128 -; WIN-X86-NEXT: leal -4(%ebp), %esp +; WIN-X86-NEXT: leal -12(%ebp), %esp ; WIN-X86-NEXT: popl %esi +; WIN-X86-NEXT: popl %edi +; WIN-X86-NEXT: popl %ebx ; WIN-X86-NEXT: popl %ebp ; WIN-X86-NEXT: retl entry: @@ -741,22 +930,38 @@ define dso_local void @Test128_1Div(fp128 %d1) nounwind { ; ; X86-LABEL: Test128_1Div: ; X86: # %bb.0: # %entry -; X86-NEXT: subl $40, %esp +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $76, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl vf128, %edi +; X86-NEXT: movl vf128+4, %ebx +; X86-NEXT: movl vf128+8, %ebp +; X86-NEXT: movl vf128+12, %eax +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl vf128+12 -; X86-NEXT: pushl vf128+8 -; X86-NEXT: pushl vf128+4 -; X86-NEXT: pushl vf128 -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll __divtf3 -; X86-NEXT: addl $44, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, vf128 -; X86-NEXT: addl $28, %esp +; X86-NEXT: addl $76, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; WIN-LABEL: Test128_1Div: @@ -777,22 +982,31 @@ define dso_local void @Test128_1Div(fp128 %d1) nounwind { ; WIN-X86: # %bb.0: # %entry ; WIN-X86-NEXT: pushl %ebp ; WIN-X86-NEXT: movl %esp, %ebp +; WIN-X86-NEXT: pushl %ebx +; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $32, %esp -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl 8(%ebp) -; WIN-X86-NEXT: pushl _vf128+12 -; WIN-X86-NEXT: pushl _vf128+8 -; WIN-X86-NEXT: pushl _vf128+4 -; WIN-X86-NEXT: pushl _vf128 -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: subl $80, %esp +; WIN-X86-NEXT: movl 16(%ebp), %esi +; WIN-X86-NEXT: movl 20(%ebp), %edi +; WIN-X86-NEXT: movl _vf128, %edx +; WIN-X86-NEXT: movl _vf128+4, %ebx +; WIN-X86-NEXT: movl _vf128+8, %ecx +; WIN-X86-NEXT: movl _vf128+12, %eax +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 12(%ebp), %esi +; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 8(%ebp), %esi +; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll ___divtf3 -; WIN-X86-NEXT: addl $36, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -800,8 +1014,10 @@ define dso_local void @Test128_1Div(fp128 %d1) nounwind { ; WIN-X86-NEXT: movl %edx, _vf128+12 ; WIN-X86-NEXT: movl %eax, _vf128 ; WIN-X86-NEXT: movl %ecx, _vf128+4 -; WIN-X86-NEXT: leal -4(%ebp), %esp +; WIN-X86-NEXT: leal -12(%ebp), %esp ; WIN-X86-NEXT: popl %esi +; WIN-X86-NEXT: popl %edi +; WIN-X86-NEXT: popl %ebx ; WIN-X86-NEXT: popl %ebp ; WIN-X86-NEXT: retl entry: @@ -830,22 +1046,38 @@ define dso_local void @Test128Rem(fp128 %d1, fp128 %d2) nounwind { ; ; X86-LABEL: Test128Rem: ; X86: # %bb.0: # %entry -; X86-NEXT: subl $40, %esp +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $76, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll fmodf128 -; X86-NEXT: addl $44, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, vf128 -; X86-NEXT: addl $28, %esp +; X86-NEXT: addl $76, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; WIN-LABEL: Test128Rem: @@ -866,22 +1098,31 @@ define dso_local void @Test128Rem(fp128 %d1, fp128 %d2) nounwind { ; WIN-X86: # %bb.0: # %entry ; WIN-X86-NEXT: pushl %ebp ; WIN-X86-NEXT: movl %esp, %ebp +; WIN-X86-NEXT: pushl %ebx +; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $32, %esp -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 36(%ebp) -; WIN-X86-NEXT: pushl 32(%ebp) -; WIN-X86-NEXT: pushl 28(%ebp) -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl 8(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: subl $80, %esp +; WIN-X86-NEXT: movl 16(%ebp), %edx +; WIN-X86-NEXT: movl 20(%ebp), %esi +; WIN-X86-NEXT: movl 24(%ebp), %edi +; WIN-X86-NEXT: movl 28(%ebp), %ebx +; WIN-X86-NEXT: movl 32(%ebp), %ecx +; WIN-X86-NEXT: movl 36(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 12(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 8(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _fmodl -; WIN-X86-NEXT: addl $36, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -889,8 +1130,10 @@ define dso_local void @Test128Rem(fp128 %d1, fp128 %d2) nounwind { ; WIN-X86-NEXT: movl %edx, _vf128+8 ; WIN-X86-NEXT: movl %ecx, _vf128+4 ; WIN-X86-NEXT: movl %eax, _vf128 -; WIN-X86-NEXT: leal -4(%ebp), %esp +; WIN-X86-NEXT: leal -12(%ebp), %esp ; WIN-X86-NEXT: popl %esi +; WIN-X86-NEXT: popl %edi +; WIN-X86-NEXT: popl %ebx ; WIN-X86-NEXT: popl %ebp ; WIN-X86-NEXT: retl entry: @@ -922,22 +1165,38 @@ define dso_local void @Test128_1Rem(fp128 %d1) nounwind { ; ; X86-LABEL: Test128_1Rem: ; X86: # %bb.0: # %entry -; X86-NEXT: subl $40, %esp +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $76, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl vf128, %edi +; X86-NEXT: movl vf128+4, %ebx +; X86-NEXT: movl vf128+8, %ebp +; X86-NEXT: movl vf128+12, %eax +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl vf128+12 -; X86-NEXT: pushl vf128+8 -; X86-NEXT: pushl vf128+4 -; X86-NEXT: pushl vf128 -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll fmodf128 -; X86-NEXT: addl $44, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, vf128 -; X86-NEXT: addl $28, %esp +; X86-NEXT: addl $76, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; WIN-LABEL: Test128_1Rem: @@ -958,22 +1217,31 @@ define dso_local void @Test128_1Rem(fp128 %d1) nounwind { ; WIN-X86: # %bb.0: # %entry ; WIN-X86-NEXT: pushl %ebp ; WIN-X86-NEXT: movl %esp, %ebp +; WIN-X86-NEXT: pushl %ebx +; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $32, %esp -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl 8(%ebp) -; WIN-X86-NEXT: pushl _vf128+12 -; WIN-X86-NEXT: pushl _vf128+8 -; WIN-X86-NEXT: pushl _vf128+4 -; WIN-X86-NEXT: pushl _vf128 -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: subl $80, %esp +; WIN-X86-NEXT: movl 16(%ebp), %esi +; WIN-X86-NEXT: movl 20(%ebp), %edi +; WIN-X86-NEXT: movl _vf128, %edx +; WIN-X86-NEXT: movl _vf128+4, %ebx +; WIN-X86-NEXT: movl _vf128+8, %ecx +; WIN-X86-NEXT: movl _vf128+12, %eax +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 12(%ebp), %esi +; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 8(%ebp), %esi +; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _fmodl -; WIN-X86-NEXT: addl $36, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -981,8 +1249,10 @@ define dso_local void @Test128_1Rem(fp128 %d1) nounwind { ; WIN-X86-NEXT: movl %edx, _vf128+12 ; WIN-X86-NEXT: movl %eax, _vf128 ; WIN-X86-NEXT: movl %ecx, _vf128+4 -; WIN-X86-NEXT: leal -4(%ebp), %esp +; WIN-X86-NEXT: leal -12(%ebp), %esp ; WIN-X86-NEXT: popl %esi +; WIN-X86-NEXT: popl %edi +; WIN-X86-NEXT: popl %ebx ; WIN-X86-NEXT: popl %ebp ; WIN-X86-NEXT: retl entry: @@ -1011,18 +1281,24 @@ define dso_local void @Test128Sqrt(fp128 %d1) nounwind { ; ; X86-LABEL: Test128Sqrt: ; X86: # %bb.0: # %entry -; X86-NEXT: subl $40, %esp +; X86-NEXT: pushl %esi +; X86-NEXT: subl $56, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll sqrtf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, vf128 -; X86-NEXT: addl $28, %esp +; X86-NEXT: addl $56, %esp +; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; WIN-LABEL: Test128Sqrt: @@ -1042,16 +1318,19 @@ define dso_local void @Test128Sqrt(fp128 %d1) nounwind { ; WIN-X86-NEXT: movl %esp, %ebp ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $32, %esp -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl 8(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: subl $64, %esp +; WIN-X86-NEXT: movl 8(%ebp), %eax +; WIN-X86-NEXT: movl 12(%ebp), %ecx +; WIN-X86-NEXT: movl 16(%ebp), %edx +; WIN-X86-NEXT: movl 20(%ebp), %esi +; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _sqrtl -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -1089,18 +1368,24 @@ define dso_local void @Test128Sin(fp128 %d1) nounwind { ; ; X86-LABEL: Test128Sin: ; X86: # %bb.0: # %entry -; X86-NEXT: subl $40, %esp +; X86-NEXT: pushl %esi +; X86-NEXT: subl $56, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll sinf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, vf128 -; X86-NEXT: addl $28, %esp +; X86-NEXT: addl $56, %esp +; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; WIN-LABEL: Test128Sin: @@ -1120,16 +1405,19 @@ define dso_local void @Test128Sin(fp128 %d1) nounwind { ; WIN-X86-NEXT: movl %esp, %ebp ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $32, %esp -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl 8(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: subl $64, %esp +; WIN-X86-NEXT: movl 8(%ebp), %eax +; WIN-X86-NEXT: movl 12(%ebp), %ecx +; WIN-X86-NEXT: movl 16(%ebp), %edx +; WIN-X86-NEXT: movl 20(%ebp), %esi +; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _sinl -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -1167,18 +1455,24 @@ define dso_local void @Test128Cos(fp128 %d1) nounwind { ; ; X86-LABEL: Test128Cos: ; X86: # %bb.0: # %entry -; X86-NEXT: subl $40, %esp +; X86-NEXT: pushl %esi +; X86-NEXT: subl $56, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll cosf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, vf128 -; X86-NEXT: addl $28, %esp +; X86-NEXT: addl $56, %esp +; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; WIN-LABEL: Test128Cos: @@ -1198,16 +1492,19 @@ define dso_local void @Test128Cos(fp128 %d1) nounwind { ; WIN-X86-NEXT: movl %esp, %ebp ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $32, %esp -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl 8(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: subl $64, %esp +; WIN-X86-NEXT: movl 8(%ebp), %eax +; WIN-X86-NEXT: movl 12(%ebp), %ecx +; WIN-X86-NEXT: movl 16(%ebp), %edx +; WIN-X86-NEXT: movl 20(%ebp), %esi +; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _cosl -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -1245,18 +1542,24 @@ define dso_local void @Test128Ceil(fp128 %d1) nounwind { ; ; X86-LABEL: Test128Ceil: ; X86: # %bb.0: # %entry -; X86-NEXT: subl $40, %esp +; X86-NEXT: pushl %esi +; X86-NEXT: subl $56, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll ceilf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, vf128 -; X86-NEXT: addl $28, %esp +; X86-NEXT: addl $56, %esp +; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; WIN-LABEL: Test128Ceil: @@ -1276,16 +1579,19 @@ define dso_local void @Test128Ceil(fp128 %d1) nounwind { ; WIN-X86-NEXT: movl %esp, %ebp ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $32, %esp -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl 8(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: subl $64, %esp +; WIN-X86-NEXT: movl 8(%ebp), %eax +; WIN-X86-NEXT: movl 12(%ebp), %ecx +; WIN-X86-NEXT: movl 16(%ebp), %edx +; WIN-X86-NEXT: movl 20(%ebp), %esi +; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _ceill -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -1323,18 +1629,24 @@ define dso_local void @Test128Floor(fp128 %d1) nounwind { ; ; X86-LABEL: Test128Floor: ; X86: # %bb.0: # %entry -; X86-NEXT: subl $40, %esp +; X86-NEXT: pushl %esi +; X86-NEXT: subl $56, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll floorf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, vf128 -; X86-NEXT: addl $28, %esp +; X86-NEXT: addl $56, %esp +; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; WIN-LABEL: Test128Floor: @@ -1354,16 +1666,19 @@ define dso_local void @Test128Floor(fp128 %d1) nounwind { ; WIN-X86-NEXT: movl %esp, %ebp ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $32, %esp -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl 8(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: subl $64, %esp +; WIN-X86-NEXT: movl 8(%ebp), %eax +; WIN-X86-NEXT: movl 12(%ebp), %ecx +; WIN-X86-NEXT: movl 16(%ebp), %edx +; WIN-X86-NEXT: movl 20(%ebp), %esi +; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _floorl -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -1401,18 +1716,24 @@ define dso_local void @Test128Trunc(fp128 %d1) nounwind { ; ; X86-LABEL: Test128Trunc: ; X86: # %bb.0: # %entry -; X86-NEXT: subl $40, %esp +; X86-NEXT: pushl %esi +; X86-NEXT: subl $56, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll truncf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, vf128 -; X86-NEXT: addl $28, %esp +; X86-NEXT: addl $56, %esp +; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; WIN-LABEL: Test128Trunc: @@ -1432,16 +1753,19 @@ define dso_local void @Test128Trunc(fp128 %d1) nounwind { ; WIN-X86-NEXT: movl %esp, %ebp ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $32, %esp -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl 8(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: subl $64, %esp +; WIN-X86-NEXT: movl 8(%ebp), %eax +; WIN-X86-NEXT: movl 12(%ebp), %ecx +; WIN-X86-NEXT: movl 16(%ebp), %edx +; WIN-X86-NEXT: movl 20(%ebp), %esi +; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _truncl -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -1479,18 +1803,24 @@ define dso_local void @Test128Nearbyint(fp128 %d1) nounwind { ; ; X86-LABEL: Test128Nearbyint: ; X86: # %bb.0: # %entry -; X86-NEXT: subl $40, %esp +; X86-NEXT: pushl %esi +; X86-NEXT: subl $56, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll nearbyintf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, vf128 -; X86-NEXT: addl $28, %esp +; X86-NEXT: addl $56, %esp +; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; WIN-LABEL: Test128Nearbyint: @@ -1510,16 +1840,19 @@ define dso_local void @Test128Nearbyint(fp128 %d1) nounwind { ; WIN-X86-NEXT: movl %esp, %ebp ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $32, %esp -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl 8(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: subl $64, %esp +; WIN-X86-NEXT: movl 8(%ebp), %eax +; WIN-X86-NEXT: movl 12(%ebp), %ecx +; WIN-X86-NEXT: movl 16(%ebp), %edx +; WIN-X86-NEXT: movl 20(%ebp), %esi +; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _nearbyintl -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -1557,18 +1890,24 @@ define dso_local void @Test128Rint(fp128 %d1) nounwind { ; ; X86-LABEL: Test128Rint: ; X86: # %bb.0: # %entry -; X86-NEXT: subl $40, %esp +; X86-NEXT: pushl %esi +; X86-NEXT: subl $56, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll rintf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, vf128 -; X86-NEXT: addl $28, %esp +; X86-NEXT: addl $56, %esp +; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; WIN-LABEL: Test128Rint: @@ -1588,16 +1927,19 @@ define dso_local void @Test128Rint(fp128 %d1) nounwind { ; WIN-X86-NEXT: movl %esp, %ebp ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $32, %esp -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl 8(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: subl $64, %esp +; WIN-X86-NEXT: movl 8(%ebp), %eax +; WIN-X86-NEXT: movl 12(%ebp), %ecx +; WIN-X86-NEXT: movl 16(%ebp), %edx +; WIN-X86-NEXT: movl 20(%ebp), %esi +; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _rintl -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -1635,18 +1977,24 @@ define dso_local void @Test128Round(fp128 %d1) nounwind { ; ; X86-LABEL: Test128Round: ; X86: # %bb.0: # %entry -; X86-NEXT: subl $40, %esp +; X86-NEXT: pushl %esi +; X86-NEXT: subl $56, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll roundf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, vf128 -; X86-NEXT: addl $28, %esp +; X86-NEXT: addl $56, %esp +; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; WIN-LABEL: Test128Round: @@ -1666,16 +2014,19 @@ define dso_local void @Test128Round(fp128 %d1) nounwind { ; WIN-X86-NEXT: movl %esp, %ebp ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $32, %esp -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl 8(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: subl $64, %esp +; WIN-X86-NEXT: movl 8(%ebp), %eax +; WIN-X86-NEXT: movl 12(%ebp), %ecx +; WIN-X86-NEXT: movl 16(%ebp), %edx +; WIN-X86-NEXT: movl 20(%ebp), %esi +; WIN-X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _roundl -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -1705,31 +2056,48 @@ define fp128 @Test128FMA(fp128 %a, fp128 %b, fp128 %c) nounwind { ; ; X86-LABEL: Test128FMA: ; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $92, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll fmaf128 -; X86-NEXT: addl $60, %esp -; X86-NEXT: movaps (%esp), %xmm0 -; X86-NEXT: movaps %xmm0, (%esi) -; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: movaps %xmm0, (%ebp) +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: addl $92, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 ; ; WIN-LABEL: Test128FMA: @@ -1752,28 +2120,40 @@ define fp128 @Test128FMA(fp128 %a, fp128 %b, fp128 %c) nounwind { ; WIN-X86: # %bb.0: # %entry ; WIN-X86-NEXT: pushl %ebp ; WIN-X86-NEXT: movl %esp, %ebp +; WIN-X86-NEXT: pushl %ebx ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $96, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 56(%ebp) -; WIN-X86-NEXT: pushl 52(%ebp) -; WIN-X86-NEXT: pushl 48(%ebp) -; WIN-X86-NEXT: pushl 44(%ebp) -; WIN-X86-NEXT: pushl 40(%ebp) -; WIN-X86-NEXT: pushl 36(%ebp) -; WIN-X86-NEXT: pushl 32(%ebp) -; WIN-X86-NEXT: pushl 28(%ebp) -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 52(%ebp), %ebx +; WIN-X86-NEXT: movl 56(%ebp), %edi +; WIN-X86-NEXT: movl 60(%ebp), %edx +; WIN-X86-NEXT: movl 64(%ebp), %ecx +; WIN-X86-NEXT: movl 68(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 48(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 44(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 40(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 36(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 32(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 28(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _fmal -; WIN-X86-NEXT: addl $52, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -1782,9 +2162,10 @@ define fp128 @Test128FMA(fp128 %a, fp128 %b, fp128 %c) nounwind { ; WIN-X86-NEXT: movl %ecx, 4(%esi) ; WIN-X86-NEXT: movl %eax, (%esi) ; WIN-X86-NEXT: movl %esi, %eax -; WIN-X86-NEXT: leal -8(%ebp), %esp +; WIN-X86-NEXT: leal -12(%ebp), %esp ; WIN-X86-NEXT: popl %esi ; WIN-X86-NEXT: popl %edi +; WIN-X86-NEXT: popl %ebx ; WIN-X86-NEXT: popl %ebp ; WIN-X86-NEXT: retl entry: @@ -1804,23 +2185,28 @@ define fp128 @Test128Acos(fp128 %a) nounwind { ; ; X86-LABEL: Test128Acos: ; X86: # %bb.0: +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $52, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll acosf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $52, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi ; X86-NEXT: retl $4 ; ; WIN-LABEL: Test128Acos: @@ -1840,17 +2226,20 @@ define fp128 @Test128Acos(fp128 %a) nounwind { ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $48, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl 28(%ebp), %ecx +; WIN-X86-NEXT: movl 32(%ebp), %edx +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _acosl -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -1879,23 +2268,28 @@ define fp128 @Test128Asin(fp128 %a) nounwind { ; ; X86-LABEL: Test128Asin: ; X86: # %bb.0: +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $52, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll asinf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $52, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi ; X86-NEXT: retl $4 ; ; WIN-LABEL: Test128Asin: @@ -1915,17 +2309,20 @@ define fp128 @Test128Asin(fp128 %a) nounwind { ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $48, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl 28(%ebp), %ecx +; WIN-X86-NEXT: movl 32(%ebp), %edx +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _asinl -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -1954,23 +2351,28 @@ define fp128 @Test128Atan(fp128 %a) nounwind { ; ; X86-LABEL: Test128Atan: ; X86: # %bb.0: +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $52, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll atanf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $52, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi ; X86-NEXT: retl $4 ; ; WIN-LABEL: Test128Atan: @@ -1990,17 +2392,20 @@ define fp128 @Test128Atan(fp128 %a) nounwind { ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $48, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl 28(%ebp), %ecx +; WIN-X86-NEXT: movl 32(%ebp), %edx +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _atanl -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -2029,27 +2434,40 @@ define fp128 @Test128Atan2(fp128 %a, fp128 %b) nounwind { ; ; X86-LABEL: Test128Atan2: ; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $76, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll atan2f128 -; X86-NEXT: addl $44, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $76, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 ; ; WIN-LABEL: Test128Atan2: @@ -2069,24 +2487,32 @@ define fp128 @Test128Atan2(fp128 %a, fp128 %b) nounwind { ; WIN-X86: # %bb.0: ; WIN-X86-NEXT: pushl %ebp ; WIN-X86-NEXT: movl %esp, %ebp +; WIN-X86-NEXT: pushl %ebx ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $80, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 40(%ebp) -; WIN-X86-NEXT: pushl 36(%ebp) -; WIN-X86-NEXT: pushl 32(%ebp) -; WIN-X86-NEXT: pushl 28(%ebp) -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl 40(%ebp), %ebx +; WIN-X86-NEXT: movl 44(%ebp), %edx +; WIN-X86-NEXT: movl 48(%ebp), %ecx +; WIN-X86-NEXT: movl 52(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 32(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 28(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _atan2l -; WIN-X86-NEXT: addl $36, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -2095,9 +2521,10 @@ define fp128 @Test128Atan2(fp128 %a, fp128 %b) nounwind { ; WIN-X86-NEXT: movl %ecx, 4(%esi) ; WIN-X86-NEXT: movl %eax, (%esi) ; WIN-X86-NEXT: movl %esi, %eax -; WIN-X86-NEXT: leal -8(%ebp), %esp +; WIN-X86-NEXT: leal -12(%ebp), %esp ; WIN-X86-NEXT: popl %esi ; WIN-X86-NEXT: popl %edi +; WIN-X86-NEXT: popl %ebx ; WIN-X86-NEXT: popl %ebp ; WIN-X86-NEXT: retl %x = call fp128 @llvm.atan2.f128(fp128 %a, fp128 %b) @@ -2115,23 +2542,28 @@ define fp128 @Test128Cosh(fp128 %a) nounwind { ; ; X86-LABEL: Test128Cosh: ; X86: # %bb.0: +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $52, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll coshf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $52, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi ; X86-NEXT: retl $4 ; ; WIN-LABEL: Test128Cosh: @@ -2151,17 +2583,20 @@ define fp128 @Test128Cosh(fp128 %a) nounwind { ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $48, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl 28(%ebp), %ecx +; WIN-X86-NEXT: movl 32(%ebp), %edx +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _coshl -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -2190,23 +2625,28 @@ define fp128 @Test128Sinh(fp128 %a) nounwind { ; ; X86-LABEL: Test128Sinh: ; X86: # %bb.0: +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $52, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll sinhf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $52, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi ; X86-NEXT: retl $4 ; ; WIN-LABEL: Test128Sinh: @@ -2226,17 +2666,20 @@ define fp128 @Test128Sinh(fp128 %a) nounwind { ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $48, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl 28(%ebp), %ecx +; WIN-X86-NEXT: movl 32(%ebp), %edx +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _sinhl -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -2265,23 +2708,28 @@ define fp128 @Test128Tan(fp128 %a) nounwind { ; ; X86-LABEL: Test128Tan: ; X86: # %bb.0: +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $52, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll tanf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $52, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi ; X86-NEXT: retl $4 ; ; WIN-LABEL: Test128Tan: @@ -2301,17 +2749,20 @@ define fp128 @Test128Tan(fp128 %a) nounwind { ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $48, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl 28(%ebp), %ecx +; WIN-X86-NEXT: movl 32(%ebp), %edx +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _tanl -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -2340,23 +2791,28 @@ define fp128 @Test128Tanh(fp128 %a) nounwind { ; ; X86-LABEL: Test128Tanh: ; X86: # %bb.0: +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp +; X86-NEXT: subl $52, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll tanhf128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $52, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi ; X86-NEXT: retl $4 ; ; WIN-LABEL: Test128Tanh: @@ -2376,17 +2832,20 @@ define fp128 @Test128Tanh(fp128 %a) nounwind { ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $16, %esp +; WIN-X86-NEXT: subl $48, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi -; WIN-X86-NEXT: movl %esp, %eax -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %eax +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl 28(%ebp), %ecx +; WIN-X86-NEXT: movl 32(%ebp), %edx +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _tanhl -; WIN-X86-NEXT: addl $20, %esp -; WIN-X86-NEXT: movl (%esp), %eax +; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -2425,27 +2884,34 @@ define { fp128, fp128 } @Test128Modf(fp128 %a) nounwind { ; ; X86-LABEL: Test128Modf: ; X86: # %bb.0: +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $40, %esp +; X86-NEXT: subl $80, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subl $8, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: leal {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx -; X86-NEXT: pushl %eax -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %ecx +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll modff128 -; X86-NEXT: addl $28, %esp -; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movaps {{[0-9]+}}(%esp), %xmm1 ; X86-NEXT: movaps %xmm1, 16(%esi) ; X86-NEXT: movaps %xmm0, (%esi) ; X86-NEXT: movl %esi, %eax -; X86-NEXT: addl $40, %esp +; X86-NEXT: addl $80, %esp ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx ; X86-NEXT: retl $4 ; ; WIN-LABEL: Test128Modf: @@ -2468,18 +2934,21 @@ define { fp128, fp128 } @Test128Modf(fp128 %a) nounwind { ; WIN-X86-NEXT: pushl %edi ; WIN-X86-NEXT: pushl %esi ; WIN-X86-NEXT: andl $-16, %esp -; WIN-X86-NEXT: subl $64, %esp +; WIN-X86-NEXT: subl $112, %esp ; WIN-X86-NEXT: movl 8(%ebp), %esi +; WIN-X86-NEXT: movl 24(%ebp), %eax +; WIN-X86-NEXT: movl 28(%ebp), %ecx +; WIN-X86-NEXT: movl 32(%ebp), %edx +; WIN-X86-NEXT: movl 36(%ebp), %edi +; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %ebx +; WIN-X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; WIN-X86-NEXT: leal {{[0-9]+}}(%esp), %ecx -; WIN-X86-NEXT: pushl %eax -; WIN-X86-NEXT: pushl 24(%ebp) -; WIN-X86-NEXT: pushl 20(%ebp) -; WIN-X86-NEXT: pushl 16(%ebp) -; WIN-X86-NEXT: pushl 12(%ebp) -; WIN-X86-NEXT: pushl %ecx +; WIN-X86-NEXT: movl %eax, (%esp) ; WIN-X86-NEXT: calll _modfl -; WIN-X86-NEXT: addl $24, %esp ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; WIN-X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; WIN-X86-NEXT: movl {{[0-9]+}}(%esp), %eax diff --git a/llvm/test/CodeGen/X86/freeze-vector.ll b/llvm/test/CodeGen/X86/freeze-vector.ll index 0f66d42..953a5e7 100644 --- a/llvm/test/CodeGen/X86/freeze-vector.ll +++ b/llvm/test/CodeGen/X86/freeze-vector.ll @@ -171,15 +171,15 @@ define void @freeze_extractelement(ptr %origin0, ptr %origin1, ptr %dst) nounwin ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: vmovdqa (%ecx), %xmm0 -; X86-NEXT: vpand (%edx), %xmm0, %xmm0 +; X86-NEXT: vmovdqa (%edx), %xmm0 +; X86-NEXT: vpand (%ecx), %xmm0, %xmm0 ; X86-NEXT: vpextrb $6, %xmm0, (%eax) ; X86-NEXT: retl ; ; X64-LABEL: freeze_extractelement: ; X64: # %bb.0: -; X64-NEXT: vmovdqa (%rsi), %xmm0 -; X64-NEXT: vpand (%rdi), %xmm0, %xmm0 +; X64-NEXT: vmovdqa (%rdi), %xmm0 +; X64-NEXT: vpand (%rsi), %xmm0, %xmm0 ; X64-NEXT: vpextrb $6, %xmm0, (%rdx) ; X64-NEXT: retq %i0 = load <16 x i8>, ptr %origin0 @@ -198,8 +198,8 @@ define void @freeze_extractelement_escape(ptr %origin0, ptr %origin1, ptr %dst, ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: vmovdqa (%edx), %xmm0 -; X86-NEXT: vpand (%esi), %xmm0, %xmm0 +; X86-NEXT: vmovdqa (%esi), %xmm0 +; X86-NEXT: vpand (%edx), %xmm0, %xmm0 ; X86-NEXT: vmovdqa %xmm0, (%ecx) ; X86-NEXT: vpextrb $6, %xmm0, (%eax) ; X86-NEXT: popl %esi @@ -207,8 +207,8 @@ define void @freeze_extractelement_escape(ptr %origin0, ptr %origin1, ptr %dst, ; ; X64-LABEL: freeze_extractelement_escape: ; X64: # %bb.0: -; X64-NEXT: vmovdqa (%rsi), %xmm0 -; X64-NEXT: vpand (%rdi), %xmm0, %xmm0 +; X64-NEXT: vmovdqa (%rdi), %xmm0 +; X64-NEXT: vpand (%rsi), %xmm0, %xmm0 ; X64-NEXT: vmovdqa %xmm0, (%rcx) ; X64-NEXT: vpextrb $6, %xmm0, (%rdx) ; X64-NEXT: retq @@ -239,8 +239,8 @@ define void @freeze_extractelement_extra_use(ptr %origin0, ptr %origin1, i64 %id ; X86-NEXT: movl 32(%ebp), %edx ; X86-NEXT: movl 12(%ebp), %esi ; X86-NEXT: movl 8(%ebp), %edi -; X86-NEXT: vmovaps (%esi), %xmm0 -; X86-NEXT: vandps (%edi), %xmm0, %xmm0 +; X86-NEXT: vmovaps (%edi), %xmm0 +; X86-NEXT: vandps (%esi), %xmm0, %xmm0 ; X86-NEXT: vmovaps %xmm0, (%esp) ; X86-NEXT: movzbl (%esp,%ecx), %ecx ; X86-NEXT: cmpb (%esp,%eax), %cl @@ -255,8 +255,8 @@ define void @freeze_extractelement_extra_use(ptr %origin0, ptr %origin1, i64 %id ; X64: # %bb.0: ; X64-NEXT: andl $15, %ecx ; X64-NEXT: andl $15, %edx -; X64-NEXT: vmovaps (%rsi), %xmm0 -; X64-NEXT: vandps (%rdi), %xmm0, %xmm0 +; X64-NEXT: vmovaps (%rdi), %xmm0 +; X64-NEXT: vandps (%rsi), %xmm0, %xmm0 ; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; X64-NEXT: movzbl -24(%rsp,%rdx), %eax ; X64-NEXT: cmpb -24(%rsp,%rcx), %al diff --git a/llvm/test/CodeGen/X86/fshl.ll b/llvm/test/CodeGen/X86/fshl.ll index e8c8ccf..ec1b8a3 100644 --- a/llvm/test/CodeGen/X86/fshl.ll +++ b/llvm/test/CodeGen/X86/fshl.ll @@ -264,53 +264,62 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind { ; X86-FAST-LABEL: var_shift_i128: ; X86-FAST: # %bb.0: ; X86-FAST-NEXT: pushl %ebp +; X86-FAST-NEXT: movl %esp, %ebp ; X86-FAST-NEXT: pushl %ebx ; X86-FAST-NEXT: pushl %edi ; X86-FAST-NEXT: pushl %esi -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-FAST-NEXT: andl $-16, %esp +; X86-FAST-NEXT: subl $16, %esp +; X86-FAST-NEXT: movl 24(%ebp), %edi +; X86-FAST-NEXT: movl 28(%ebp), %edx +; X86-FAST-NEXT: movl 48(%ebp), %esi +; X86-FAST-NEXT: movl 56(%ebp), %ecx ; X86-FAST-NEXT: testb $64, %cl +; X86-FAST-NEXT: movl 52(%ebp), %eax ; X86-FAST-NEXT: jne .LBB6_1 ; X86-FAST-NEXT: # %bb.2: -; X86-FAST-NEXT: movl %ebx, %ebp ; X86-FAST-NEXT: movl %esi, %ebx -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-FAST-NEXT: movl %edi, %eax -; X86-FAST-NEXT: movl %edx, %edi -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-FAST-NEXT: movl %edi, %esi +; X86-FAST-NEXT: movl 32(%ebp), %edi +; X86-FAST-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-FAST-NEXT: movl %edx, %eax +; X86-FAST-NEXT: movl 36(%ebp), %edx ; X86-FAST-NEXT: testb $32, %cl ; X86-FAST-NEXT: je .LBB6_5 ; X86-FAST-NEXT: .LBB6_4: -; X86-FAST-NEXT: movl %esi, %edx -; X86-FAST-NEXT: movl %edi, %esi -; X86-FAST-NEXT: movl %ebx, %edi -; X86-FAST-NEXT: movl %eax, %ebx +; X86-FAST-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-FAST-NEXT: movl %esi, %eax +; X86-FAST-NEXT: movl (%esp), %esi # 4-byte Reload ; X86-FAST-NEXT: jmp .LBB6_6 ; X86-FAST-NEXT: .LBB6_1: -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-FAST-NEXT: movl 44(%ebp), %ebx +; X86-FAST-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X86-FAST-NEXT: movl 40(%ebp), %ebx ; X86-FAST-NEXT: testb $32, %cl ; X86-FAST-NEXT: jne .LBB6_4 ; X86-FAST-NEXT: .LBB6_5: -; X86-FAST-NEXT: movl %eax, %ebp +; X86-FAST-NEXT: movl (%esp), %ebx # 4-byte Reload +; X86-FAST-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-FAST-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-FAST-NEXT: .LBB6_6: -; X86-FAST-NEXT: movl %ebx, %eax -; X86-FAST-NEXT: shldl %cl, %ebp, %eax -; X86-FAST-NEXT: movl %edi, %ebp -; X86-FAST-NEXT: shldl %cl, %ebx, %ebp -; X86-FAST-NEXT: movl %esi, %ebx -; X86-FAST-NEXT: shldl %cl, %edi, %ebx +; X86-FAST-NEXT: movl %esi, %edi +; X86-FAST-NEXT: shldl %cl, %ebx, %edi +; X86-FAST-NEXT: movl %eax, %edx +; X86-FAST-NEXT: movl %eax, %ebx +; X86-FAST-NEXT: shldl %cl, %esi, %ebx +; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-FAST-NEXT: movl %eax, %esi +; X86-FAST-NEXT: shldl %cl, %edx, %esi ; X86-FAST-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-FAST-NEXT: shldl %cl, %esi, %edx -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-FAST-NEXT: movl %edx, 12(%ecx) -; X86-FAST-NEXT: movl %ebx, 8(%ecx) -; X86-FAST-NEXT: movl %ebp, 4(%ecx) -; X86-FAST-NEXT: movl %eax, (%ecx) -; X86-FAST-NEXT: movl %ecx, %eax +; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-FAST-NEXT: shldl %cl, %eax, %edx +; X86-FAST-NEXT: movl 8(%ebp), %eax +; X86-FAST-NEXT: movl %edx, 12(%eax) +; X86-FAST-NEXT: movl %esi, 8(%eax) +; X86-FAST-NEXT: movl %ebx, 4(%eax) +; X86-FAST-NEXT: movl %edi, (%eax) +; X86-FAST-NEXT: leal -12(%ebp), %esp ; X86-FAST-NEXT: popl %esi ; X86-FAST-NEXT: popl %edi ; X86-FAST-NEXT: popl %ebx @@ -320,77 +329,91 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind { ; X86-SLOW-LABEL: var_shift_i128: ; X86-SLOW: # %bb.0: ; X86-SLOW-NEXT: pushl %ebp +; X86-SLOW-NEXT: movl %esp, %ebp ; X86-SLOW-NEXT: pushl %ebx ; X86-SLOW-NEXT: pushl %edi ; X86-SLOW-NEXT: pushl %esi -; X86-SLOW-NEXT: pushl %eax -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SLOW-NEXT: testb $64, %al +; X86-SLOW-NEXT: andl $-16, %esp +; X86-SLOW-NEXT: subl $32, %esp +; X86-SLOW-NEXT: movl 24(%ebp), %esi +; X86-SLOW-NEXT: movl 28(%ebp), %eax +; X86-SLOW-NEXT: movl 48(%ebp), %edx +; X86-SLOW-NEXT: movl 56(%ebp), %ecx +; X86-SLOW-NEXT: testb $64, %cl +; X86-SLOW-NEXT: movl 52(%ebp), %edi ; X86-SLOW-NEXT: jne .LBB6_1 ; X86-SLOW-NEXT: # %bb.2: -; X86-SLOW-NEXT: movl %edx, %ebp -; X86-SLOW-NEXT: movl %ebx, %edx -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-SLOW-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: movl %esi, %edx +; X86-SLOW-NEXT: movl 32(%ebp), %esi ; X86-SLOW-NEXT: movl %edi, %ecx -; X86-SLOW-NEXT: movl %esi, %edi -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-SLOW-NEXT: testb $32, %al -; X86-SLOW-NEXT: je .LBB6_5 -; X86-SLOW-NEXT: .LBB6_4: -; X86-SLOW-NEXT: movl %ebx, (%esp) # 4-byte Spill -; X86-SLOW-NEXT: movl %edi, %ebx -; X86-SLOW-NEXT: movl %edx, %edi -; X86-SLOW-NEXT: movl %ecx, %edx -; X86-SLOW-NEXT: jmp .LBB6_6 +; X86-SLOW-NEXT: movl %eax, %edi +; X86-SLOW-NEXT: movl 36(%ebp), %eax +; X86-SLOW-NEXT: jmp .LBB6_3 ; X86-SLOW-NEXT: .LBB6_1: -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SLOW-NEXT: testb $32, %al +; X86-SLOW-NEXT: movl 40(%ebp), %ecx +; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: movl 44(%ebp), %ecx +; X86-SLOW-NEXT: .LBB6_3: +; X86-SLOW-NEXT: movl 56(%ebp), %ebx +; X86-SLOW-NEXT: testb $32, %bl ; X86-SLOW-NEXT: jne .LBB6_4 -; X86-SLOW-NEXT: .LBB6_5: -; X86-SLOW-NEXT: movl %ecx, %ebp -; X86-SLOW-NEXT: movl %esi, (%esp) # 4-byte Spill +; X86-SLOW-NEXT: # %bb.5: +; X86-SLOW-NEXT: movl %ecx, %ebx +; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: jmp .LBB6_6 +; X86-SLOW-NEXT: .LBB6_4: +; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: movl %ecx, %edx +; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X86-SLOW-NEXT: .LBB6_6: ; X86-SLOW-NEXT: movl %edx, %esi -; X86-SLOW-NEXT: movl %eax, %ecx -; X86-SLOW-NEXT: shll %cl, %esi -; X86-SLOW-NEXT: shrl %ebp -; X86-SLOW-NEXT: movb %al, %ch -; X86-SLOW-NEXT: notb %ch -; X86-SLOW-NEXT: movb %ch, %cl -; X86-SLOW-NEXT: shrl %cl, %ebp -; X86-SLOW-NEXT: orl %esi, %ebp -; X86-SLOW-NEXT: movl %edi, %esi -; X86-SLOW-NEXT: movb %al, %cl -; X86-SLOW-NEXT: shll %cl, %esi -; X86-SLOW-NEXT: shrl %edx -; X86-SLOW-NEXT: movb %ch, %cl -; X86-SLOW-NEXT: shrl %cl, %edx -; X86-SLOW-NEXT: orl %esi, %edx -; X86-SLOW-NEXT: movl %ebx, %esi -; X86-SLOW-NEXT: movb %al, %cl +; X86-SLOW-NEXT: movl 56(%ebp), %ecx ; X86-SLOW-NEXT: shll %cl, %esi +; X86-SLOW-NEXT: movl %ebx, %edi ; X86-SLOW-NEXT: shrl %edi -; X86-SLOW-NEXT: movb %ch, %cl +; X86-SLOW-NEXT: movl %ecx, %ebx +; X86-SLOW-NEXT: notb %bl +; X86-SLOW-NEXT: movl %ebx, %ecx +; X86-SLOW-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; X86-SLOW-NEXT: shrl %cl, %edi ; X86-SLOW-NEXT: orl %esi, %edi -; X86-SLOW-NEXT: movb %al, %cl -; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-SLOW-NEXT: movl %esi, %eax +; X86-SLOW-NEXT: movl 56(%ebp), %ecx +; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-SLOW-NEXT: shll %cl, %eax +; X86-SLOW-NEXT: shrl %edx +; X86-SLOW-NEXT: movl %ebx, %ecx +; X86-SLOW-NEXT: shrl %cl, %edx +; X86-SLOW-NEXT: orl %eax, %edx +; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-SLOW-NEXT: movl %ebx, %eax +; X86-SLOW-NEXT: movl 56(%ebp), %ecx +; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-SLOW-NEXT: shll %cl, %eax +; X86-SLOW-NEXT: shrl %esi +; X86-SLOW-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-SLOW-NEXT: shrl %cl, %esi +; X86-SLOW-NEXT: orl %eax, %esi +; X86-SLOW-NEXT: movl 56(%ebp), %ecx +; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-SLOW-NEXT: shll %cl, %eax ; X86-SLOW-NEXT: shrl %ebx -; X86-SLOW-NEXT: movb %ch, %cl +; X86-SLOW-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X86-SLOW-NEXT: shrl %cl, %ebx ; X86-SLOW-NEXT: orl %eax, %ebx -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SLOW-NEXT: movl 8(%ebp), %eax ; X86-SLOW-NEXT: movl %ebx, 12(%eax) -; X86-SLOW-NEXT: movl %edi, 8(%eax) +; X86-SLOW-NEXT: movl %esi, 8(%eax) ; X86-SLOW-NEXT: movl %edx, 4(%eax) -; X86-SLOW-NEXT: movl %ebp, (%eax) -; X86-SLOW-NEXT: addl $4, %esp +; X86-SLOW-NEXT: movl %edi, (%eax) +; X86-SLOW-NEXT: leal -12(%ebp), %esp ; X86-SLOW-NEXT: popl %esi ; X86-SLOW-NEXT: popl %edi ; X86-SLOW-NEXT: popl %ebx diff --git a/llvm/test/CodeGen/X86/fshr.ll b/llvm/test/CodeGen/X86/fshr.ll index 4340f8f..544ab7f 100644 --- a/llvm/test/CodeGen/X86/fshr.ll +++ b/llvm/test/CodeGen/X86/fshr.ll @@ -258,51 +258,53 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind { ; X86-FAST-LABEL: var_shift_i128: ; X86-FAST: # %bb.0: ; X86-FAST-NEXT: pushl %ebp +; X86-FAST-NEXT: movl %esp, %ebp ; X86-FAST-NEXT: pushl %ebx ; X86-FAST-NEXT: pushl %edi ; X86-FAST-NEXT: pushl %esi -; X86-FAST-NEXT: pushl %eax -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-FAST-NEXT: andl $-16, %esp +; X86-FAST-NEXT: subl $16, %esp +; X86-FAST-NEXT: movl 24(%ebp), %esi +; X86-FAST-NEXT: movl 28(%ebp), %eax +; X86-FAST-NEXT: movl 48(%ebp), %edx +; X86-FAST-NEXT: movl 56(%ebp), %ecx ; X86-FAST-NEXT: testb $64, %cl +; X86-FAST-NEXT: movl 52(%ebp), %ebx ; X86-FAST-NEXT: je .LBB6_1 ; X86-FAST-NEXT: # %bb.2: -; X86-FAST-NEXT: movl %edx, (%esp) # 4-byte Spill -; X86-FAST-NEXT: movl %edi, %edx -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-FAST-NEXT: movl %esi, %ebp -; X86-FAST-NEXT: movl %ebx, %esi -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-FAST-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-FAST-NEXT: movl %esi, %edx +; X86-FAST-NEXT: movl 32(%ebp), %esi +; X86-FAST-NEXT: movl %ebx, %edi +; X86-FAST-NEXT: movl %eax, %ebx +; X86-FAST-NEXT: movl 36(%ebp), %eax ; X86-FAST-NEXT: testb $32, %cl ; X86-FAST-NEXT: je .LBB6_4 ; X86-FAST-NEXT: jmp .LBB6_5 ; X86-FAST-NEXT: .LBB6_1: -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-FAST-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-FAST-NEXT: movl 40(%ebp), %edi +; X86-FAST-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-FAST-NEXT: movl 44(%ebp), %edi ; X86-FAST-NEXT: testb $32, %cl ; X86-FAST-NEXT: jne .LBB6_5 ; X86-FAST-NEXT: .LBB6_4: -; X86-FAST-NEXT: movl %edi, %ebx -; X86-FAST-NEXT: movl %esi, %edi -; X86-FAST-NEXT: movl %edx, %esi -; X86-FAST-NEXT: movl %ebp, %edx -; X86-FAST-NEXT: movl (%esp), %ebp # 4-byte Reload +; X86-FAST-NEXT: movl %esi, %eax +; X86-FAST-NEXT: movl %ebx, %esi +; X86-FAST-NEXT: movl %edx, %ebx +; X86-FAST-NEXT: movl %edi, %edx +; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X86-FAST-NEXT: .LBB6_5: -; X86-FAST-NEXT: shrdl %cl, %edx, %ebp -; X86-FAST-NEXT: shrdl %cl, %esi, %edx -; X86-FAST-NEXT: shrdl %cl, %edi, %esi +; X86-FAST-NEXT: shrdl %cl, %edx, %edi +; X86-FAST-NEXT: shrdl %cl, %ebx, %edx +; X86-FAST-NEXT: shrdl %cl, %esi, %ebx ; X86-FAST-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-FAST-NEXT: shrdl %cl, %ebx, %edi -; X86-FAST-NEXT: movl %edi, 12(%eax) -; X86-FAST-NEXT: movl %esi, 8(%eax) +; X86-FAST-NEXT: shrdl %cl, %eax, %esi +; X86-FAST-NEXT: movl 8(%ebp), %eax +; X86-FAST-NEXT: movl %esi, 12(%eax) +; X86-FAST-NEXT: movl %ebx, 8(%eax) ; X86-FAST-NEXT: movl %edx, 4(%eax) -; X86-FAST-NEXT: movl %ebp, (%eax) -; X86-FAST-NEXT: addl $4, %esp +; X86-FAST-NEXT: movl %edi, (%eax) +; X86-FAST-NEXT: leal -12(%ebp), %esp ; X86-FAST-NEXT: popl %esi ; X86-FAST-NEXT: popl %edi ; X86-FAST-NEXT: popl %ebx @@ -312,78 +314,88 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind { ; X86-SLOW-LABEL: var_shift_i128: ; X86-SLOW: # %bb.0: ; X86-SLOW-NEXT: pushl %ebp +; X86-SLOW-NEXT: movl %esp, %ebp ; X86-SLOW-NEXT: pushl %ebx ; X86-SLOW-NEXT: pushl %edi ; X86-SLOW-NEXT: pushl %esi -; X86-SLOW-NEXT: subl $8, %esp -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SLOW-NEXT: testb $64, %cl +; X86-SLOW-NEXT: andl $-16, %esp +; X86-SLOW-NEXT: subl $16, %esp +; X86-SLOW-NEXT: movl 24(%ebp), %edx +; X86-SLOW-NEXT: movl 28(%ebp), %esi +; X86-SLOW-NEXT: movl 48(%ebp), %ebx +; X86-SLOW-NEXT: movl 56(%ebp), %eax +; X86-SLOW-NEXT: testb $64, %al +; X86-SLOW-NEXT: movl 52(%ebp), %edi ; X86-SLOW-NEXT: je .LBB6_1 ; X86-SLOW-NEXT: # %bb.2: -; X86-SLOW-NEXT: movl %ebp, %eax -; X86-SLOW-NEXT: movl %ebx, %ebp -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-SLOW-NEXT: movl %edi, %edx +; X86-SLOW-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X86-SLOW-NEXT: movl %edx, %ebx +; X86-SLOW-NEXT: movl 32(%ebp), %edx +; X86-SLOW-NEXT: movl %edi, %eax ; X86-SLOW-NEXT: movl %esi, %edi -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-SLOW-NEXT: testb $32, %cl -; X86-SLOW-NEXT: jne .LBB6_5 -; X86-SLOW-NEXT: .LBB6_4: -; X86-SLOW-NEXT: movl %ebx, %esi -; X86-SLOW-NEXT: movl %edi, (%esp) # 4-byte Spill -; X86-SLOW-NEXT: movl %ebp, %edi -; X86-SLOW-NEXT: movl %edx, %ebp -; X86-SLOW-NEXT: movl %eax, %edx -; X86-SLOW-NEXT: jmp .LBB6_6 +; X86-SLOW-NEXT: movl 36(%ebp), %esi +; X86-SLOW-NEXT: jmp .LBB6_3 ; X86-SLOW-NEXT: .LBB6_1: -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SLOW-NEXT: movl 40(%ebp), %eax +; X86-SLOW-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-SLOW-NEXT: movl 44(%ebp), %eax +; X86-SLOW-NEXT: .LBB6_3: +; X86-SLOW-NEXT: movl 56(%ebp), %ecx ; X86-SLOW-NEXT: testb $32, %cl ; X86-SLOW-NEXT: je .LBB6_4 -; X86-SLOW-NEXT: .LBB6_5: -; X86-SLOW-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X86-SLOW-NEXT: # %bb.5: +; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: jmp .LBB6_6 +; X86-SLOW-NEXT: .LBB6_4: +; X86-SLOW-NEXT: movl %edx, %esi +; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: movl %eax, %ebx +; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-SLOW-NEXT: .LBB6_6: -; X86-SLOW-NEXT: shrl %cl, %edx -; X86-SLOW-NEXT: movl %ecx, %ebx -; X86-SLOW-NEXT: notb %bl -; X86-SLOW-NEXT: leal (%ebp,%ebp), %eax -; X86-SLOW-NEXT: movl %ebx, %ecx -; X86-SLOW-NEXT: shll %cl, %eax -; X86-SLOW-NEXT: orl %edx, %eax -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SLOW-NEXT: shrl %cl, %eax +; X86-SLOW-NEXT: movl %eax, %edx +; X86-SLOW-NEXT: movl %ecx, %eax +; X86-SLOW-NEXT: notb %al +; X86-SLOW-NEXT: movl %ebx, %edi +; X86-SLOW-NEXT: addl %ebx, %ebx +; X86-SLOW-NEXT: movl %eax, %ecx +; X86-SLOW-NEXT: shll %cl, %ebx +; X86-SLOW-NEXT: orl %edx, %ebx +; X86-SLOW-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X86-SLOW-NEXT: movl 56(%ebp), %ecx ; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-SLOW-NEXT: shrl %cl, %ebp -; X86-SLOW-NEXT: leal (%edi,%edi), %edx -; X86-SLOW-NEXT: movl %ebx, %ecx +; X86-SLOW-NEXT: shrl %cl, %edi +; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-SLOW-NEXT: leal (%ebx,%ebx), %edx +; X86-SLOW-NEXT: movl %eax, %ecx ; X86-SLOW-NEXT: shll %cl, %edx -; X86-SLOW-NEXT: orl %ebp, %edx -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SLOW-NEXT: orl %edi, %edx +; X86-SLOW-NEXT: movl 56(%ebp), %ecx ; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-SLOW-NEXT: shrl %cl, %edi -; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movl (%esp), %edi # 4-byte Reload -; X86-SLOW-NEXT: leal (%edi,%edi), %ebp -; X86-SLOW-NEXT: movl %ebx, %ecx -; X86-SLOW-NEXT: shll %cl, %ebp -; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SLOW-NEXT: shrl %cl, %ebx +; X86-SLOW-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-SLOW-NEXT: leal (%edi,%edi), %ebx +; X86-SLOW-NEXT: movl %eax, %ecx +; X86-SLOW-NEXT: shll %cl, %ebx +; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-SLOW-NEXT: movl 56(%ebp), %ecx ; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx ; X86-SLOW-NEXT: shrl %cl, %edi ; X86-SLOW-NEXT: addl %esi, %esi -; X86-SLOW-NEXT: movl %ebx, %ecx +; X86-SLOW-NEXT: movl %eax, %ecx ; X86-SLOW-NEXT: shll %cl, %esi ; X86-SLOW-NEXT: orl %edi, %esi -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SLOW-NEXT: movl 8(%ebp), %ecx ; X86-SLOW-NEXT: movl %esi, 12(%ecx) -; X86-SLOW-NEXT: movl %ebp, 8(%ecx) +; X86-SLOW-NEXT: movl %ebx, 8(%ecx) ; X86-SLOW-NEXT: movl %edx, 4(%ecx) +; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-SLOW-NEXT: movl %eax, (%ecx) ; X86-SLOW-NEXT: movl %ecx, %eax -; X86-SLOW-NEXT: addl $8, %esp +; X86-SLOW-NEXT: leal -12(%ebp), %esp ; X86-SLOW-NEXT: popl %esi ; X86-SLOW-NEXT: popl %edi ; X86-SLOW-NEXT: popl %ebx diff --git a/llvm/test/CodeGen/X86/function-align.ll b/llvm/test/CodeGen/X86/function-align.ll new file mode 100644 index 0000000..11d0e99 --- /dev/null +++ b/llvm/test/CodeGen/X86/function-align.ll @@ -0,0 +1,18 @@ +; RUN: llc -function-sections < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK: .section .text.f1 +; CHECK-NOT: .p2align +; CHECK: f1: +define void @f1() align 1 { + ret void +} + +; CHECK: .section .text.f2 +; CHECK-NEXT: .globl f2 +; CHECK-NEXT: .p2align 1 +define void @f2() align 2 { + ret void +} diff --git a/llvm/test/CodeGen/X86/funnel-shift.ll b/llvm/test/CodeGen/X86/funnel-shift.ll index a464d78..df97f49 100644 --- a/llvm/test/CodeGen/X86/funnel-shift.ll +++ b/llvm/test/CodeGen/X86/funnel-shift.ll @@ -74,43 +74,57 @@ define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind { ; X86-SSE2-LABEL: fshl_i128: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: movl %esp, %ebp ; X86-SSE2-NEXT: pushl %ebx ; X86-SSE2-NEXT: pushl %edi ; X86-SSE2-NEXT: pushl %esi -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: andl $-16, %esp +; X86-SSE2-NEXT: subl $16, %esp +; X86-SSE2-NEXT: movl 48(%ebp), %edi +; X86-SSE2-NEXT: movl 52(%ebp), %eax +; X86-SSE2-NEXT: movl 24(%ebp), %edx +; X86-SSE2-NEXT: movl 56(%ebp), %ecx ; X86-SSE2-NEXT: testb $64, %cl -; X86-SSE2-NEXT: movl %esi, %eax -; X86-SSE2-NEXT: cmovnel %ebx, %eax -; X86-SSE2-NEXT: movl %edx, %ebp -; X86-SSE2-NEXT: cmovnel %edi, %ebp -; X86-SSE2-NEXT: cmovnel {{[0-9]+}}(%esp), %edi -; X86-SSE2-NEXT: cmovnel {{[0-9]+}}(%esp), %ebx -; X86-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %edx -; X86-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %esi +; X86-SSE2-NEXT: movl %edx, %ecx +; X86-SSE2-NEXT: cmovnel %edi, %ecx +; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE2-NEXT: movl 28(%ebp), %esi +; X86-SSE2-NEXT: movl %esi, %ebx +; X86-SSE2-NEXT: cmovnel %eax, %ebx +; X86-SSE2-NEXT: cmovnel 44(%ebp), %eax +; X86-SSE2-NEXT: cmovnel 40(%ebp), %edi +; X86-SSE2-NEXT: cmovel 36(%ebp), %esi +; X86-SSE2-NEXT: cmovel 32(%ebp), %edx +; X86-SSE2-NEXT: movl 56(%ebp), %ecx ; X86-SSE2-NEXT: testb $32, %cl -; X86-SSE2-NEXT: cmovnel %esi, %edx -; X86-SSE2-NEXT: cmovnel %ebp, %esi -; X86-SSE2-NEXT: cmovnel %eax, %ebp -; X86-SSE2-NEXT: cmovel %edi, %ebx +; X86-SSE2-NEXT: cmovnel %edx, %esi +; X86-SSE2-NEXT: cmovnel %ebx, %edx +; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-SSE2-NEXT: cmovnel %ecx, %ebx ; X86-SSE2-NEXT: cmovel %eax, %edi -; X86-SSE2-NEXT: movl %edi, %eax -; X86-SSE2-NEXT: shldl %cl, %ebx, %eax -; X86-SSE2-NEXT: movl %ebp, %ebx -; X86-SSE2-NEXT: shldl %cl, %edi, %ebx -; X86-SSE2-NEXT: movl %esi, %edi -; X86-SSE2-NEXT: shldl %cl, %ebp, %edi +; X86-SSE2-NEXT: cmovel %ecx, %eax +; X86-SSE2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE2-NEXT: movl 56(%ebp), %ecx ; X86-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-SSE2-NEXT: shldl %cl, %esi, %edx -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE2-NEXT: movl %edx, 12(%ecx) -; X86-SSE2-NEXT: movl %edi, 8(%ecx) -; X86-SSE2-NEXT: movl %ebx, 4(%ecx) -; X86-SSE2-NEXT: movl %eax, (%ecx) -; X86-SSE2-NEXT: movl %ecx, %eax +; X86-SSE2-NEXT: shldl %cl, %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-SSE2-NEXT: movl %ebx, %edi +; X86-SSE2-NEXT: movl 56(%ebp), %ecx +; X86-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-SSE2-NEXT: shldl %cl, %eax, %edi +; X86-SSE2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE2-NEXT: movl %edx, %edi +; X86-SSE2-NEXT: movl 56(%ebp), %ecx +; X86-SSE2-NEXT: shldl %cl, %ebx, %edi +; X86-SSE2-NEXT: movl 8(%ebp), %eax +; X86-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-SSE2-NEXT: shldl %cl, %edx, %esi +; X86-SSE2-NEXT: movl %esi, 12(%eax) +; X86-SSE2-NEXT: movl %edi, 8(%eax) +; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-SSE2-NEXT: movl %ecx, 4(%eax) +; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-SSE2-NEXT: movl %ecx, (%eax) +; X86-SSE2-NEXT: leal -12(%ebp), %esp ; X86-SSE2-NEXT: popl %esi ; X86-SSE2-NEXT: popl %edi ; X86-SSE2-NEXT: popl %ebx diff --git a/llvm/test/CodeGen/X86/i128-add.ll b/llvm/test/CodeGen/X86/i128-add.ll index 2849e44..b4546c1 100644 --- a/llvm/test/CodeGen/X86/i128-add.ll +++ b/llvm/test/CodeGen/X86/i128-add.ll @@ -5,17 +5,20 @@ define i128 @add_i128(i128 %x, i128 %y) nounwind { ; X86-LABEL: add_i128: ; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: addl {{[0-9]+}}(%esp), %esi -; X86-NEXT: adcl {{[0-9]+}}(%esp), %edi -; X86-NEXT: adcl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx +; X86-NEXT: andl $-16, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 32(%ebp), %ecx +; X86-NEXT: movl 36(%ebp), %edx +; X86-NEXT: movl 24(%ebp), %esi +; X86-NEXT: movl 28(%ebp), %edi +; X86-NEXT: addl 40(%ebp), %esi +; X86-NEXT: adcl 44(%ebp), %edi +; X86-NEXT: adcl 48(%ebp), %ecx +; X86-NEXT: adcl 52(%ebp), %edx ; X86-NEXT: addl $1, %esi ; X86-NEXT: adcl $0, %edi ; X86-NEXT: adcl $0, %ecx @@ -24,8 +27,10 @@ define i128 @add_i128(i128 %x, i128 %y) nounwind { ; X86-NEXT: movl %esi, (%eax) ; X86-NEXT: movl %ecx, 8(%eax) ; X86-NEXT: movl %edx, 12(%eax) +; X86-NEXT: leal -8(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi +; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 ; ; X64-LABEL: add_i128: diff --git a/llvm/test/CodeGen/X86/i128-fp128-abi.ll b/llvm/test/CodeGen/X86/i128-fp128-abi.ll index 4152dcf..2174d50 100644 --- a/llvm/test/CodeGen/X86/i128-fp128-abi.ll +++ b/llvm/test/CodeGen/X86/i128-fp128-abi.ll @@ -55,41 +55,47 @@ define void @store(PrimTy %x, ptr %p) nounwind { ; CHECK-X86: # %bb.0: ; CHECK-X86-NEXT: pushl %edi ; CHECK-X86-NEXT: pushl %esi -; CHECK-X86-NEXT: movl 12(%esp), %eax -; CHECK-X86-NEXT: movl 16(%esp), %ecx -; CHECK-X86-NEXT: movl 20(%esp), %edx -; CHECK-X86-NEXT: movl 24(%esp), %esi -; CHECK-X86-NEXT: movl 28(%esp), %edi +; CHECK-X86-NEXT: pushl %eax +; CHECK-X86-NEXT: movl 16(%esp), %eax +; CHECK-X86-NEXT: movl 20(%esp), %ecx +; CHECK-X86-NEXT: movl 24(%esp), %edx +; CHECK-X86-NEXT: movl 28(%esp), %esi +; CHECK-X86-NEXT: movl 32(%esp), %edi ; CHECK-X86-NEXT: movl %esi, 12(%edi) ; CHECK-X86-NEXT: movl %edx, 8(%edi) ; CHECK-X86-NEXT: movl %ecx, 4(%edi) ; CHECK-X86-NEXT: movl %eax, (%edi) +; CHECK-X86-NEXT: addl $4, %esp ; CHECK-X86-NEXT: popl %esi ; CHECK-X86-NEXT: popl %edi ; CHECK-X86-NEXT: retl ; ; CHECK-MSVC32-LABEL: store: ; CHECK-MSVC32: # %bb.0: +; CHECK-MSVC32-NEXT: pushl %ebp +; CHECK-MSVC32-NEXT: movl %esp, %ebp ; CHECK-MSVC32-NEXT: pushl %edi ; CHECK-MSVC32-NEXT: pushl %esi -; CHECK-MSVC32-NEXT: movl 12(%esp), %eax -; CHECK-MSVC32-NEXT: movl 16(%esp), %ecx -; CHECK-MSVC32-NEXT: movl 20(%esp), %edx -; CHECK-MSVC32-NEXT: movl 24(%esp), %esi -; CHECK-MSVC32-NEXT: movl 28(%esp), %edi +; CHECK-MSVC32-NEXT: andl $-16, %esp +; CHECK-MSVC32-NEXT: movl 8(%ebp), %eax +; CHECK-MSVC32-NEXT: movl 12(%ebp), %ecx +; CHECK-MSVC32-NEXT: movl 16(%ebp), %edx +; CHECK-MSVC32-NEXT: movl 20(%ebp), %esi +; CHECK-MSVC32-NEXT: movl 24(%ebp), %edi ; CHECK-MSVC32-NEXT: movl %esi, 12(%edi) ; CHECK-MSVC32-NEXT: movl %edx, 8(%edi) ; CHECK-MSVC32-NEXT: movl %ecx, 4(%edi) ; CHECK-MSVC32-NEXT: movl %eax, (%edi) +; CHECK-MSVC32-NEXT: leal -8(%ebp), %esp ; CHECK-MSVC32-NEXT: popl %esi ; CHECK-MSVC32-NEXT: popl %edi +; CHECK-MSVC32-NEXT: popl %ebp ; CHECK-MSVC32-NEXT: retl store PrimTy %x, ptr %p ret void } ; Illustrate stack alignment -; FIXME(#77401): alignment on x86-32 is ABI-incorrect. define void @store_perturbed(i8 %_0, PrimTy %x, ptr %p) nounwind { ; CHECK-X64-F128-LABEL: store_perturbed: ; CHECK-X64-F128: # %bb.0: @@ -130,34 +136,41 @@ define void @store_perturbed(i8 %_0, PrimTy %x, ptr %p) nounwind { ; CHECK-X86: # %bb.0: ; CHECK-X86-NEXT: pushl %edi ; CHECK-X86-NEXT: pushl %esi -; CHECK-X86-NEXT: movl 16(%esp), %eax -; CHECK-X86-NEXT: movl 20(%esp), %ecx -; CHECK-X86-NEXT: movl 24(%esp), %edx -; CHECK-X86-NEXT: movl 28(%esp), %esi -; CHECK-X86-NEXT: movl 32(%esp), %edi +; CHECK-X86-NEXT: pushl %eax +; CHECK-X86-NEXT: movl 32(%esp), %eax +; CHECK-X86-NEXT: movl 36(%esp), %ecx +; CHECK-X86-NEXT: movl 40(%esp), %edx +; CHECK-X86-NEXT: movl 44(%esp), %esi +; CHECK-X86-NEXT: movl 48(%esp), %edi ; CHECK-X86-NEXT: movl %esi, 12(%edi) ; CHECK-X86-NEXT: movl %edx, 8(%edi) ; CHECK-X86-NEXT: movl %ecx, 4(%edi) ; CHECK-X86-NEXT: movl %eax, (%edi) +; CHECK-X86-NEXT: addl $4, %esp ; CHECK-X86-NEXT: popl %esi ; CHECK-X86-NEXT: popl %edi ; CHECK-X86-NEXT: retl ; ; CHECK-MSVC32-LABEL: store_perturbed: ; CHECK-MSVC32: # %bb.0: +; CHECK-MSVC32-NEXT: pushl %ebp +; CHECK-MSVC32-NEXT: movl %esp, %ebp ; CHECK-MSVC32-NEXT: pushl %edi ; CHECK-MSVC32-NEXT: pushl %esi -; CHECK-MSVC32-NEXT: movl 16(%esp), %eax -; CHECK-MSVC32-NEXT: movl 20(%esp), %ecx -; CHECK-MSVC32-NEXT: movl 24(%esp), %edx -; CHECK-MSVC32-NEXT: movl 28(%esp), %esi -; CHECK-MSVC32-NEXT: movl 32(%esp), %edi +; CHECK-MSVC32-NEXT: andl $-16, %esp +; CHECK-MSVC32-NEXT: movl 24(%ebp), %eax +; CHECK-MSVC32-NEXT: movl 28(%ebp), %ecx +; CHECK-MSVC32-NEXT: movl 32(%ebp), %edx +; CHECK-MSVC32-NEXT: movl 36(%ebp), %esi +; CHECK-MSVC32-NEXT: movl 40(%ebp), %edi ; CHECK-MSVC32-NEXT: movl %esi, 12(%edi) ; CHECK-MSVC32-NEXT: movl %edx, 8(%edi) ; CHECK-MSVC32-NEXT: movl %ecx, 4(%edi) ; CHECK-MSVC32-NEXT: movl %eax, (%edi) +; CHECK-MSVC32-NEXT: leal -8(%ebp), %esp ; CHECK-MSVC32-NEXT: popl %esi ; CHECK-MSVC32-NEXT: popl %edi +; CHECK-MSVC32-NEXT: popl %ebp ; CHECK-MSVC32-NEXT: retl store PrimTy %x, ptr %p ret void @@ -271,34 +284,41 @@ define PrimTy @first_arg(PrimTy %x) nounwind { ; CHECK-X86: # %bb.0: ; CHECK-X86-NEXT: pushl %edi ; CHECK-X86-NEXT: pushl %esi -; CHECK-X86-NEXT: movl 12(%esp), %eax -; CHECK-X86-NEXT: movl 16(%esp), %ecx -; CHECK-X86-NEXT: movl 20(%esp), %edx -; CHECK-X86-NEXT: movl 24(%esp), %esi -; CHECK-X86-NEXT: movl 28(%esp), %edi +; CHECK-X86-NEXT: pushl %eax +; CHECK-X86-NEXT: movl 16(%esp), %eax +; CHECK-X86-NEXT: movl 32(%esp), %ecx +; CHECK-X86-NEXT: movl 36(%esp), %edx +; CHECK-X86-NEXT: movl 40(%esp), %esi +; CHECK-X86-NEXT: movl 44(%esp), %edi ; CHECK-X86-NEXT: movl %edi, 12(%eax) ; CHECK-X86-NEXT: movl %esi, 8(%eax) ; CHECK-X86-NEXT: movl %edx, 4(%eax) ; CHECK-X86-NEXT: movl %ecx, (%eax) +; CHECK-X86-NEXT: addl $4, %esp ; CHECK-X86-NEXT: popl %esi ; CHECK-X86-NEXT: popl %edi ; CHECK-X86-NEXT: retl $4 ; ; CHECK-MSVC32-LABEL: first_arg: ; CHECK-MSVC32: # %bb.0: +; CHECK-MSVC32-NEXT: pushl %ebp +; CHECK-MSVC32-NEXT: movl %esp, %ebp ; CHECK-MSVC32-NEXT: pushl %edi ; CHECK-MSVC32-NEXT: pushl %esi -; CHECK-MSVC32-NEXT: movl 12(%esp), %eax -; CHECK-MSVC32-NEXT: movl 16(%esp), %ecx -; CHECK-MSVC32-NEXT: movl 20(%esp), %edx -; CHECK-MSVC32-NEXT: movl 24(%esp), %esi -; CHECK-MSVC32-NEXT: movl 28(%esp), %edi +; CHECK-MSVC32-NEXT: andl $-16, %esp +; CHECK-MSVC32-NEXT: movl 8(%ebp), %eax +; CHECK-MSVC32-NEXT: movl 24(%ebp), %ecx +; CHECK-MSVC32-NEXT: movl 28(%ebp), %edx +; CHECK-MSVC32-NEXT: movl 32(%ebp), %esi +; CHECK-MSVC32-NEXT: movl 36(%ebp), %edi ; CHECK-MSVC32-NEXT: movl %edi, 12(%eax) ; CHECK-MSVC32-NEXT: movl %esi, 8(%eax) ; CHECK-MSVC32-NEXT: movl %edx, 4(%eax) ; CHECK-MSVC32-NEXT: movl %ecx, (%eax) +; CHECK-MSVC32-NEXT: leal -8(%ebp), %esp ; CHECK-MSVC32-NEXT: popl %esi ; CHECK-MSVC32-NEXT: popl %edi +; CHECK-MSVC32-NEXT: popl %ebp ; CHECK-MSVC32-NEXT: retl ret PrimTy %x } @@ -344,34 +364,41 @@ define PrimTy @leading_args(i64 %_0, i64 %_1, i64 %_2, i64 %_3, PrimTy %x) nounw ; CHECK-X86: # %bb.0: ; CHECK-X86-NEXT: pushl %edi ; CHECK-X86-NEXT: pushl %esi -; CHECK-X86-NEXT: movl 12(%esp), %eax -; CHECK-X86-NEXT: movl 48(%esp), %ecx -; CHECK-X86-NEXT: movl 52(%esp), %edx -; CHECK-X86-NEXT: movl 56(%esp), %esi -; CHECK-X86-NEXT: movl 60(%esp), %edi +; CHECK-X86-NEXT: pushl %eax +; CHECK-X86-NEXT: movl 16(%esp), %eax +; CHECK-X86-NEXT: movl 64(%esp), %ecx +; CHECK-X86-NEXT: movl 68(%esp), %edx +; CHECK-X86-NEXT: movl 72(%esp), %esi +; CHECK-X86-NEXT: movl 76(%esp), %edi ; CHECK-X86-NEXT: movl %edi, 12(%eax) ; CHECK-X86-NEXT: movl %esi, 8(%eax) ; CHECK-X86-NEXT: movl %edx, 4(%eax) ; CHECK-X86-NEXT: movl %ecx, (%eax) +; CHECK-X86-NEXT: addl $4, %esp ; CHECK-X86-NEXT: popl %esi ; CHECK-X86-NEXT: popl %edi ; CHECK-X86-NEXT: retl $4 ; ; CHECK-MSVC32-LABEL: leading_args: ; CHECK-MSVC32: # %bb.0: +; CHECK-MSVC32-NEXT: pushl %ebp +; CHECK-MSVC32-NEXT: movl %esp, %ebp ; CHECK-MSVC32-NEXT: pushl %edi ; CHECK-MSVC32-NEXT: pushl %esi -; CHECK-MSVC32-NEXT: movl 12(%esp), %eax -; CHECK-MSVC32-NEXT: movl 48(%esp), %ecx -; CHECK-MSVC32-NEXT: movl 52(%esp), %edx -; CHECK-MSVC32-NEXT: movl 56(%esp), %esi -; CHECK-MSVC32-NEXT: movl 60(%esp), %edi +; CHECK-MSVC32-NEXT: andl $-16, %esp +; CHECK-MSVC32-NEXT: movl 8(%ebp), %eax +; CHECK-MSVC32-NEXT: movl 56(%ebp), %ecx +; CHECK-MSVC32-NEXT: movl 60(%ebp), %edx +; CHECK-MSVC32-NEXT: movl 64(%ebp), %esi +; CHECK-MSVC32-NEXT: movl 68(%ebp), %edi ; CHECK-MSVC32-NEXT: movl %edi, 12(%eax) ; CHECK-MSVC32-NEXT: movl %esi, 8(%eax) ; CHECK-MSVC32-NEXT: movl %edx, 4(%eax) ; CHECK-MSVC32-NEXT: movl %ecx, (%eax) +; CHECK-MSVC32-NEXT: leal -8(%ebp), %esp ; CHECK-MSVC32-NEXT: popl %esi ; CHECK-MSVC32-NEXT: popl %edi +; CHECK-MSVC32-NEXT: popl %ebp ; CHECK-MSVC32-NEXT: retl ret PrimTy %x } @@ -417,34 +444,41 @@ define PrimTy @many_leading_args(i64 %_0, i64 %_1, i64 %_2, i64 %_3, i64 %_4, Pr ; CHECK-X86: # %bb.0: ; CHECK-X86-NEXT: pushl %edi ; CHECK-X86-NEXT: pushl %esi -; CHECK-X86-NEXT: movl 12(%esp), %eax -; CHECK-X86-NEXT: movl 72(%esp), %ecx -; CHECK-X86-NEXT: movl 76(%esp), %edx -; CHECK-X86-NEXT: movl 80(%esp), %esi -; CHECK-X86-NEXT: movl 84(%esp), %edi +; CHECK-X86-NEXT: pushl %eax +; CHECK-X86-NEXT: movl 16(%esp), %eax +; CHECK-X86-NEXT: movl 80(%esp), %ecx +; CHECK-X86-NEXT: movl 84(%esp), %edx +; CHECK-X86-NEXT: movl 88(%esp), %esi +; CHECK-X86-NEXT: movl 92(%esp), %edi ; CHECK-X86-NEXT: movl %edi, 12(%eax) ; CHECK-X86-NEXT: movl %esi, 8(%eax) ; CHECK-X86-NEXT: movl %edx, 4(%eax) ; CHECK-X86-NEXT: movl %ecx, (%eax) +; CHECK-X86-NEXT: addl $4, %esp ; CHECK-X86-NEXT: popl %esi ; CHECK-X86-NEXT: popl %edi ; CHECK-X86-NEXT: retl $4 ; ; CHECK-MSVC32-LABEL: many_leading_args: ; CHECK-MSVC32: # %bb.0: +; CHECK-MSVC32-NEXT: pushl %ebp +; CHECK-MSVC32-NEXT: movl %esp, %ebp ; CHECK-MSVC32-NEXT: pushl %edi ; CHECK-MSVC32-NEXT: pushl %esi -; CHECK-MSVC32-NEXT: movl 12(%esp), %eax -; CHECK-MSVC32-NEXT: movl 72(%esp), %ecx -; CHECK-MSVC32-NEXT: movl 76(%esp), %edx -; CHECK-MSVC32-NEXT: movl 80(%esp), %esi -; CHECK-MSVC32-NEXT: movl 84(%esp), %edi +; CHECK-MSVC32-NEXT: andl $-16, %esp +; CHECK-MSVC32-NEXT: movl 8(%ebp), %eax +; CHECK-MSVC32-NEXT: movl 72(%ebp), %ecx +; CHECK-MSVC32-NEXT: movl 76(%ebp), %edx +; CHECK-MSVC32-NEXT: movl 80(%ebp), %esi +; CHECK-MSVC32-NEXT: movl 84(%ebp), %edi ; CHECK-MSVC32-NEXT: movl %edi, 12(%eax) ; CHECK-MSVC32-NEXT: movl %esi, 8(%eax) ; CHECK-MSVC32-NEXT: movl %edx, 4(%eax) ; CHECK-MSVC32-NEXT: movl %ecx, (%eax) +; CHECK-MSVC32-NEXT: leal -8(%ebp), %esp ; CHECK-MSVC32-NEXT: popl %esi ; CHECK-MSVC32-NEXT: popl %edi +; CHECK-MSVC32-NEXT: popl %ebp ; CHECK-MSVC32-NEXT: retl ret PrimTy %x } @@ -488,34 +522,41 @@ define PrimTy @trailing_arg(i64 %_0, i64 %_1, i64 %_2, i64 %_3, i64 %_4, PrimTy ; CHECK-X86: # %bb.0: ; CHECK-X86-NEXT: pushl %edi ; CHECK-X86-NEXT: pushl %esi -; CHECK-X86-NEXT: movl 12(%esp), %eax -; CHECK-X86-NEXT: movl 56(%esp), %ecx -; CHECK-X86-NEXT: movl 60(%esp), %edx -; CHECK-X86-NEXT: movl 64(%esp), %esi -; CHECK-X86-NEXT: movl 68(%esp), %edi +; CHECK-X86-NEXT: pushl %eax +; CHECK-X86-NEXT: movl 16(%esp), %eax +; CHECK-X86-NEXT: movl 64(%esp), %ecx +; CHECK-X86-NEXT: movl 68(%esp), %edx +; CHECK-X86-NEXT: movl 72(%esp), %esi +; CHECK-X86-NEXT: movl 76(%esp), %edi ; CHECK-X86-NEXT: movl %edi, 12(%eax) ; CHECK-X86-NEXT: movl %esi, 8(%eax) ; CHECK-X86-NEXT: movl %edx, 4(%eax) ; CHECK-X86-NEXT: movl %ecx, (%eax) +; CHECK-X86-NEXT: addl $4, %esp ; CHECK-X86-NEXT: popl %esi ; CHECK-X86-NEXT: popl %edi ; CHECK-X86-NEXT: retl $4 ; ; CHECK-MSVC32-LABEL: trailing_arg: ; CHECK-MSVC32: # %bb.0: +; CHECK-MSVC32-NEXT: pushl %ebp +; CHECK-MSVC32-NEXT: movl %esp, %ebp ; CHECK-MSVC32-NEXT: pushl %edi ; CHECK-MSVC32-NEXT: pushl %esi -; CHECK-MSVC32-NEXT: movl 12(%esp), %eax -; CHECK-MSVC32-NEXT: movl 56(%esp), %ecx -; CHECK-MSVC32-NEXT: movl 60(%esp), %edx -; CHECK-MSVC32-NEXT: movl 64(%esp), %esi -; CHECK-MSVC32-NEXT: movl 68(%esp), %edi +; CHECK-MSVC32-NEXT: andl $-16, %esp +; CHECK-MSVC32-NEXT: movl 8(%ebp), %eax +; CHECK-MSVC32-NEXT: movl 56(%ebp), %ecx +; CHECK-MSVC32-NEXT: movl 60(%ebp), %edx +; CHECK-MSVC32-NEXT: movl 64(%ebp), %esi +; CHECK-MSVC32-NEXT: movl 68(%ebp), %edi ; CHECK-MSVC32-NEXT: movl %edi, 12(%eax) ; CHECK-MSVC32-NEXT: movl %esi, 8(%eax) ; CHECK-MSVC32-NEXT: movl %edx, 4(%eax) ; CHECK-MSVC32-NEXT: movl %ecx, (%eax) +; CHECK-MSVC32-NEXT: leal -8(%ebp), %esp ; CHECK-MSVC32-NEXT: popl %esi ; CHECK-MSVC32-NEXT: popl %edi +; CHECK-MSVC32-NEXT: popl %ebp ; CHECK-MSVC32-NEXT: retl ret PrimTy %x } @@ -571,32 +612,43 @@ define void @call_first_arg(PrimTy %x) nounwind { ; ; CHECK-X86-LABEL: call_first_arg: ; CHECK-X86: # %bb.0: -; CHECK-X86-NEXT: subl $40, %esp -; CHECK-X86-NEXT: leal 12(%esp), %eax -; CHECK-X86-NEXT: pushl 56(%esp) -; CHECK-X86-NEXT: pushl 56(%esp) -; CHECK-X86-NEXT: pushl 56(%esp) -; CHECK-X86-NEXT: pushl 56(%esp) -; CHECK-X86-NEXT: pushl %eax +; CHECK-X86-NEXT: pushl %esi +; CHECK-X86-NEXT: subl $56, %esp +; CHECK-X86-NEXT: movl 64(%esp), %eax +; CHECK-X86-NEXT: movl 68(%esp), %ecx +; CHECK-X86-NEXT: movl 72(%esp), %edx +; CHECK-X86-NEXT: movl 76(%esp), %esi +; CHECK-X86-NEXT: movl %esi, 28(%esp) +; CHECK-X86-NEXT: movl %edx, 24(%esp) +; CHECK-X86-NEXT: movl %ecx, 20(%esp) +; CHECK-X86-NEXT: movl %eax, 16(%esp) +; CHECK-X86-NEXT: leal 32(%esp), %eax +; CHECK-X86-NEXT: movl %eax, (%esp) ; CHECK-X86-NEXT: calll first_arg@PLT -; CHECK-X86-NEXT: addl $56, %esp +; CHECK-X86-NEXT: addl $52, %esp +; CHECK-X86-NEXT: popl %esi ; CHECK-X86-NEXT: retl ; ; CHECK-MSVC32-LABEL: call_first_arg: ; CHECK-MSVC32: # %bb.0: ; CHECK-MSVC32-NEXT: pushl %ebp ; CHECK-MSVC32-NEXT: movl %esp, %ebp +; CHECK-MSVC32-NEXT: pushl %esi ; CHECK-MSVC32-NEXT: andl $-16, %esp -; CHECK-MSVC32-NEXT: subl $32, %esp -; CHECK-MSVC32-NEXT: movl %esp, %eax -; CHECK-MSVC32-NEXT: pushl 20(%ebp) -; CHECK-MSVC32-NEXT: pushl 16(%ebp) -; CHECK-MSVC32-NEXT: pushl 12(%ebp) -; CHECK-MSVC32-NEXT: pushl 8(%ebp) -; CHECK-MSVC32-NEXT: pushl %eax +; CHECK-MSVC32-NEXT: subl $64, %esp +; CHECK-MSVC32-NEXT: movl 8(%ebp), %eax +; CHECK-MSVC32-NEXT: movl 12(%ebp), %ecx +; CHECK-MSVC32-NEXT: movl 16(%ebp), %edx +; CHECK-MSVC32-NEXT: movl 20(%ebp), %esi +; CHECK-MSVC32-NEXT: movl %esi, 28(%esp) +; CHECK-MSVC32-NEXT: movl %edx, 24(%esp) +; CHECK-MSVC32-NEXT: movl %ecx, 20(%esp) +; CHECK-MSVC32-NEXT: movl %eax, 16(%esp) +; CHECK-MSVC32-NEXT: leal 32(%esp), %eax +; CHECK-MSVC32-NEXT: movl %eax, (%esp) ; CHECK-MSVC32-NEXT: calll _first_arg -; CHECK-MSVC32-NEXT: addl $20, %esp -; CHECK-MSVC32-NEXT: movl %ebp, %esp +; CHECK-MSVC32-NEXT: leal -4(%ebp), %esp +; CHECK-MSVC32-NEXT: popl %esi ; CHECK-MSVC32-NEXT: popl %ebp ; CHECK-MSVC32-NEXT: retl call PrimTy @first_arg(PrimTy %x) @@ -686,48 +738,59 @@ define void @call_leading_args(PrimTy %x) nounwind { ; ; CHECK-X86-LABEL: call_leading_args: ; CHECK-X86: # %bb.0: -; CHECK-X86-NEXT: subl $40, %esp -; CHECK-X86-NEXT: leal 12(%esp), %eax -; CHECK-X86-NEXT: pushl 56(%esp) -; CHECK-X86-NEXT: pushl 56(%esp) -; CHECK-X86-NEXT: pushl 56(%esp) -; CHECK-X86-NEXT: pushl 56(%esp) -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl %eax +; CHECK-X86-NEXT: pushl %esi +; CHECK-X86-NEXT: subl $88, %esp +; CHECK-X86-NEXT: movl 96(%esp), %eax +; CHECK-X86-NEXT: movl 100(%esp), %ecx +; CHECK-X86-NEXT: movl 104(%esp), %edx +; CHECK-X86-NEXT: movl 108(%esp), %esi +; CHECK-X86-NEXT: movl %esi, 60(%esp) +; CHECK-X86-NEXT: movl %edx, 56(%esp) +; CHECK-X86-NEXT: movl %ecx, 52(%esp) +; CHECK-X86-NEXT: movl %eax, 48(%esp) +; CHECK-X86-NEXT: leal 64(%esp), %eax +; CHECK-X86-NEXT: movl %eax, (%esp) +; CHECK-X86-NEXT: movl $0, 32(%esp) +; CHECK-X86-NEXT: movl $0, 28(%esp) +; CHECK-X86-NEXT: movl $0, 24(%esp) +; CHECK-X86-NEXT: movl $0, 20(%esp) +; CHECK-X86-NEXT: movl $0, 16(%esp) +; CHECK-X86-NEXT: movl $0, 12(%esp) +; CHECK-X86-NEXT: movl $0, 8(%esp) +; CHECK-X86-NEXT: movl $0, 4(%esp) ; CHECK-X86-NEXT: calll leading_args@PLT -; CHECK-X86-NEXT: addl $88, %esp +; CHECK-X86-NEXT: addl $84, %esp +; CHECK-X86-NEXT: popl %esi ; CHECK-X86-NEXT: retl ; ; CHECK-MSVC32-LABEL: call_leading_args: ; CHECK-MSVC32: # %bb.0: ; CHECK-MSVC32-NEXT: pushl %ebp ; CHECK-MSVC32-NEXT: movl %esp, %ebp +; CHECK-MSVC32-NEXT: pushl %esi ; CHECK-MSVC32-NEXT: andl $-16, %esp -; CHECK-MSVC32-NEXT: subl $32, %esp -; CHECK-MSVC32-NEXT: movl %esp, %eax -; CHECK-MSVC32-NEXT: pushl 20(%ebp) -; CHECK-MSVC32-NEXT: pushl 16(%ebp) -; CHECK-MSVC32-NEXT: pushl 12(%ebp) -; CHECK-MSVC32-NEXT: pushl 8(%ebp) -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl %eax +; CHECK-MSVC32-NEXT: subl $96, %esp +; CHECK-MSVC32-NEXT: movl 8(%ebp), %eax +; CHECK-MSVC32-NEXT: movl 12(%ebp), %ecx +; CHECK-MSVC32-NEXT: movl 16(%ebp), %edx +; CHECK-MSVC32-NEXT: movl 20(%ebp), %esi +; CHECK-MSVC32-NEXT: movl %esi, 60(%esp) +; CHECK-MSVC32-NEXT: movl %edx, 56(%esp) +; CHECK-MSVC32-NEXT: movl %ecx, 52(%esp) +; CHECK-MSVC32-NEXT: movl %eax, 48(%esp) +; CHECK-MSVC32-NEXT: leal 64(%esp), %eax +; CHECK-MSVC32-NEXT: movl %eax, (%esp) +; CHECK-MSVC32-NEXT: movl $0, 32(%esp) +; CHECK-MSVC32-NEXT: movl $0, 28(%esp) +; CHECK-MSVC32-NEXT: movl $0, 24(%esp) +; CHECK-MSVC32-NEXT: movl $0, 20(%esp) +; CHECK-MSVC32-NEXT: movl $0, 16(%esp) +; CHECK-MSVC32-NEXT: movl $0, 12(%esp) +; CHECK-MSVC32-NEXT: movl $0, 8(%esp) +; CHECK-MSVC32-NEXT: movl $0, 4(%esp) ; CHECK-MSVC32-NEXT: calll _leading_args -; CHECK-MSVC32-NEXT: addl $52, %esp -; CHECK-MSVC32-NEXT: movl %ebp, %esp +; CHECK-MSVC32-NEXT: leal -4(%ebp), %esp +; CHECK-MSVC32-NEXT: popl %esi ; CHECK-MSVC32-NEXT: popl %ebp ; CHECK-MSVC32-NEXT: retl call PrimTy @leading_args(i64 0, i64 0, i64 0, i64 0, PrimTy %x) @@ -836,56 +899,67 @@ define void @call_many_leading_args(PrimTy %x) nounwind { ; ; CHECK-X86-LABEL: call_many_leading_args: ; CHECK-X86: # %bb.0: -; CHECK-X86-NEXT: subl $40, %esp -; CHECK-X86-NEXT: leal 12(%esp), %eax -; CHECK-X86-NEXT: pushl 56(%esp) -; CHECK-X86-NEXT: pushl 56(%esp) -; CHECK-X86-NEXT: pushl 56(%esp) -; CHECK-X86-NEXT: pushl 56(%esp) -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl %eax +; CHECK-X86-NEXT: pushl %esi +; CHECK-X86-NEXT: subl $104, %esp +; CHECK-X86-NEXT: movl 112(%esp), %eax +; CHECK-X86-NEXT: movl 116(%esp), %ecx +; CHECK-X86-NEXT: movl 120(%esp), %edx +; CHECK-X86-NEXT: movl 124(%esp), %esi +; CHECK-X86-NEXT: movl %esi, 76(%esp) +; CHECK-X86-NEXT: movl %edx, 72(%esp) +; CHECK-X86-NEXT: movl %ecx, 68(%esp) +; CHECK-X86-NEXT: movl %eax, 64(%esp) +; CHECK-X86-NEXT: leal 80(%esp), %eax +; CHECK-X86-NEXT: movl %eax, (%esp) +; CHECK-X86-NEXT: movl $0, 60(%esp) +; CHECK-X86-NEXT: movl $0, 56(%esp) +; CHECK-X86-NEXT: movl $0, 52(%esp) +; CHECK-X86-NEXT: movl $0, 48(%esp) +; CHECK-X86-NEXT: movl $0, 32(%esp) +; CHECK-X86-NEXT: movl $0, 28(%esp) +; CHECK-X86-NEXT: movl $0, 24(%esp) +; CHECK-X86-NEXT: movl $0, 20(%esp) +; CHECK-X86-NEXT: movl $0, 16(%esp) +; CHECK-X86-NEXT: movl $0, 12(%esp) +; CHECK-X86-NEXT: movl $0, 8(%esp) +; CHECK-X86-NEXT: movl $0, 4(%esp) ; CHECK-X86-NEXT: calll many_leading_args@PLT -; CHECK-X86-NEXT: addl $104, %esp +; CHECK-X86-NEXT: addl $100, %esp +; CHECK-X86-NEXT: popl %esi ; CHECK-X86-NEXT: retl ; ; CHECK-MSVC32-LABEL: call_many_leading_args: ; CHECK-MSVC32: # %bb.0: ; CHECK-MSVC32-NEXT: pushl %ebp ; CHECK-MSVC32-NEXT: movl %esp, %ebp +; CHECK-MSVC32-NEXT: pushl %esi ; CHECK-MSVC32-NEXT: andl $-16, %esp -; CHECK-MSVC32-NEXT: subl $32, %esp -; CHECK-MSVC32-NEXT: movl %esp, %eax -; CHECK-MSVC32-NEXT: pushl 20(%ebp) -; CHECK-MSVC32-NEXT: pushl 16(%ebp) -; CHECK-MSVC32-NEXT: pushl 12(%ebp) -; CHECK-MSVC32-NEXT: pushl 8(%ebp) -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl %eax +; CHECK-MSVC32-NEXT: subl $112, %esp +; CHECK-MSVC32-NEXT: movl 8(%ebp), %eax +; CHECK-MSVC32-NEXT: movl 12(%ebp), %ecx +; CHECK-MSVC32-NEXT: movl 16(%ebp), %edx +; CHECK-MSVC32-NEXT: movl 20(%ebp), %esi +; CHECK-MSVC32-NEXT: movl %esi, 76(%esp) +; CHECK-MSVC32-NEXT: movl %edx, 72(%esp) +; CHECK-MSVC32-NEXT: movl %ecx, 68(%esp) +; CHECK-MSVC32-NEXT: movl %eax, 64(%esp) +; CHECK-MSVC32-NEXT: leal 80(%esp), %eax +; CHECK-MSVC32-NEXT: movl %eax, (%esp) +; CHECK-MSVC32-NEXT: movl $0, 60(%esp) +; CHECK-MSVC32-NEXT: movl $0, 56(%esp) +; CHECK-MSVC32-NEXT: movl $0, 52(%esp) +; CHECK-MSVC32-NEXT: movl $0, 48(%esp) +; CHECK-MSVC32-NEXT: movl $0, 32(%esp) +; CHECK-MSVC32-NEXT: movl $0, 28(%esp) +; CHECK-MSVC32-NEXT: movl $0, 24(%esp) +; CHECK-MSVC32-NEXT: movl $0, 20(%esp) +; CHECK-MSVC32-NEXT: movl $0, 16(%esp) +; CHECK-MSVC32-NEXT: movl $0, 12(%esp) +; CHECK-MSVC32-NEXT: movl $0, 8(%esp) +; CHECK-MSVC32-NEXT: movl $0, 4(%esp) ; CHECK-MSVC32-NEXT: calll _many_leading_args -; CHECK-MSVC32-NEXT: addl $68, %esp -; CHECK-MSVC32-NEXT: movl %ebp, %esp +; CHECK-MSVC32-NEXT: leal -4(%ebp), %esp +; CHECK-MSVC32-NEXT: popl %esi ; CHECK-MSVC32-NEXT: popl %ebp ; CHECK-MSVC32-NEXT: retl call PrimTy @many_leading_args(i64 0, i64 0, i64 0, i64 0, PrimTy Prim0, PrimTy %x) @@ -975,48 +1049,59 @@ define void @call_trailing_arg(PrimTy %x) nounwind { ; ; CHECK-X86-LABEL: call_trailing_arg: ; CHECK-X86: # %bb.0: -; CHECK-X86-NEXT: subl $40, %esp -; CHECK-X86-NEXT: leal 12(%esp), %eax -; CHECK-X86-NEXT: pushl 56(%esp) -; CHECK-X86-NEXT: pushl 56(%esp) -; CHECK-X86-NEXT: pushl 56(%esp) -; CHECK-X86-NEXT: pushl 56(%esp) -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl %eax +; CHECK-X86-NEXT: pushl %esi +; CHECK-X86-NEXT: subl $88, %esp +; CHECK-X86-NEXT: movl 96(%esp), %eax +; CHECK-X86-NEXT: movl 100(%esp), %ecx +; CHECK-X86-NEXT: movl 104(%esp), %edx +; CHECK-X86-NEXT: movl 108(%esp), %esi +; CHECK-X86-NEXT: movl %esi, 60(%esp) +; CHECK-X86-NEXT: movl %edx, 56(%esp) +; CHECK-X86-NEXT: movl %ecx, 52(%esp) +; CHECK-X86-NEXT: movl %eax, 48(%esp) +; CHECK-X86-NEXT: leal 64(%esp), %eax +; CHECK-X86-NEXT: movl %eax, (%esp) +; CHECK-X86-NEXT: movl $0, 32(%esp) +; CHECK-X86-NEXT: movl $0, 28(%esp) +; CHECK-X86-NEXT: movl $0, 24(%esp) +; CHECK-X86-NEXT: movl $0, 20(%esp) +; CHECK-X86-NEXT: movl $0, 16(%esp) +; CHECK-X86-NEXT: movl $0, 12(%esp) +; CHECK-X86-NEXT: movl $0, 8(%esp) +; CHECK-X86-NEXT: movl $0, 4(%esp) ; CHECK-X86-NEXT: calll trailing_arg@PLT -; CHECK-X86-NEXT: addl $88, %esp +; CHECK-X86-NEXT: addl $84, %esp +; CHECK-X86-NEXT: popl %esi ; CHECK-X86-NEXT: retl ; ; CHECK-MSVC32-LABEL: call_trailing_arg: ; CHECK-MSVC32: # %bb.0: ; CHECK-MSVC32-NEXT: pushl %ebp ; CHECK-MSVC32-NEXT: movl %esp, %ebp +; CHECK-MSVC32-NEXT: pushl %esi ; CHECK-MSVC32-NEXT: andl $-16, %esp -; CHECK-MSVC32-NEXT: subl $32, %esp -; CHECK-MSVC32-NEXT: movl %esp, %eax -; CHECK-MSVC32-NEXT: pushl 20(%ebp) -; CHECK-MSVC32-NEXT: pushl 16(%ebp) -; CHECK-MSVC32-NEXT: pushl 12(%ebp) -; CHECK-MSVC32-NEXT: pushl 8(%ebp) -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl %eax +; CHECK-MSVC32-NEXT: subl $96, %esp +; CHECK-MSVC32-NEXT: movl 8(%ebp), %eax +; CHECK-MSVC32-NEXT: movl 12(%ebp), %ecx +; CHECK-MSVC32-NEXT: movl 16(%ebp), %edx +; CHECK-MSVC32-NEXT: movl 20(%ebp), %esi +; CHECK-MSVC32-NEXT: movl %esi, 60(%esp) +; CHECK-MSVC32-NEXT: movl %edx, 56(%esp) +; CHECK-MSVC32-NEXT: movl %ecx, 52(%esp) +; CHECK-MSVC32-NEXT: movl %eax, 48(%esp) +; CHECK-MSVC32-NEXT: leal 64(%esp), %eax +; CHECK-MSVC32-NEXT: movl %eax, (%esp) +; CHECK-MSVC32-NEXT: movl $0, 32(%esp) +; CHECK-MSVC32-NEXT: movl $0, 28(%esp) +; CHECK-MSVC32-NEXT: movl $0, 24(%esp) +; CHECK-MSVC32-NEXT: movl $0, 20(%esp) +; CHECK-MSVC32-NEXT: movl $0, 16(%esp) +; CHECK-MSVC32-NEXT: movl $0, 12(%esp) +; CHECK-MSVC32-NEXT: movl $0, 8(%esp) +; CHECK-MSVC32-NEXT: movl $0, 4(%esp) ; CHECK-MSVC32-NEXT: calll _trailing_arg -; CHECK-MSVC32-NEXT: addl $52, %esp -; CHECK-MSVC32-NEXT: movl %ebp, %esp +; CHECK-MSVC32-NEXT: leal -4(%ebp), %esp +; CHECK-MSVC32-NEXT: popl %esi ; CHECK-MSVC32-NEXT: popl %ebp ; CHECK-MSVC32-NEXT: retl call PrimTy @trailing_arg(i64 0, i64 0, i64 0, i64 0, PrimTy %x) diff --git a/llvm/test/CodeGen/X86/i128-sdiv.ll b/llvm/test/CodeGen/X86/i128-sdiv.ll index 717f52f..7d57573 100644 --- a/llvm/test/CodeGen/X86/i128-sdiv.ll +++ b/llvm/test/CodeGen/X86/i128-sdiv.ll @@ -8,18 +8,21 @@ define i128 @test1(i128 %x) nounwind { ; X86-LABEL: test1: ; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: andl $-16, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 36(%ebp), %ecx ; X86-NEXT: movl %ecx, %esi ; X86-NEXT: sarl $31, %esi ; X86-NEXT: movl %esi, %edx ; X86-NEXT: shrl $30, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl 24(%ebp), %edi ; X86-NEXT: addl %esi, %edi -; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi -; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx +; X86-NEXT: adcl 28(%ebp), %esi +; X86-NEXT: adcl 32(%ebp), %edx ; X86-NEXT: adcl $0, %ecx ; X86-NEXT: shrdl $2, %ecx, %edx ; X86-NEXT: movl %ecx, %esi @@ -29,8 +32,10 @@ define i128 @test1(i128 %x) nounwind { ; X86-NEXT: movl %ecx, 8(%eax) ; X86-NEXT: movl %esi, 4(%eax) ; X86-NEXT: movl %edx, (%eax) +; X86-NEXT: leal -8(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi +; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 ; ; X64-LABEL: test1: @@ -52,38 +57,44 @@ define i128 @test1(i128 %x) nounwind { define i128 @test2(i128 %x) nounwind { ; X86-LABEL: test2: ; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %edx, %esi -; X86-NEXT: sarl $31, %esi -; X86-NEXT: movl %esi, %ecx +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 36(%ebp), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: sarl $31, %edx +; X86-NEXT: movl %edx, %ecx ; X86-NEXT: shrl $30, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: addl %esi, %edi -; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi -; X86-NEXT: adcl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: adcl $0, %edx -; X86-NEXT: shrdl $2, %edx, %ecx -; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl 24(%ebp), %esi +; X86-NEXT: addl %edx, %esi +; X86-NEXT: adcl 28(%ebp), %edx +; X86-NEXT: adcl 32(%ebp), %ecx +; X86-NEXT: adcl $0, %eax +; X86-NEXT: shrdl $2, %eax, %ecx +; X86-NEXT: movl %eax, %esi ; X86-NEXT: sarl $31, %esi -; X86-NEXT: sarl $2, %edx -; X86-NEXT: xorl %edi, %edi +; X86-NEXT: sarl $2, %eax +; X86-NEXT: xorl %edx, %edx ; X86-NEXT: negl %ecx +; X86-NEXT: movl $0, %edi +; X86-NEXT: sbbl %eax, %edi ; X86-NEXT: movl $0, %ebx -; X86-NEXT: sbbl %edx, %ebx -; X86-NEXT: movl $0, %edx +; X86-NEXT: sbbl %esi, %ebx ; X86-NEXT: sbbl %esi, %edx -; X86-NEXT: sbbl %esi, %edi +; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl %ecx, (%eax) -; X86-NEXT: movl %ebx, 4(%eax) -; X86-NEXT: movl %edx, 8(%eax) -; X86-NEXT: movl %edi, 12(%eax) +; X86-NEXT: movl %edi, 4(%eax) +; X86-NEXT: movl %ebx, 8(%eax) +; X86-NEXT: movl %edx, 12(%eax) +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 ; ; X64-LABEL: test2: diff --git a/llvm/test/CodeGen/X86/i128-udiv.ll b/llvm/test/CodeGen/X86/i128-udiv.ll index 3f890b7..9011832 100644 --- a/llvm/test/CodeGen/X86/i128-udiv.ll +++ b/llvm/test/CodeGen/X86/i128-udiv.ll @@ -8,15 +8,21 @@ define i128 @test1(i128 %x) nounwind { ; X86-LABEL: test1: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 32(%ebp), %ecx +; X86-NEXT: movl 36(%ebp), %edx ; X86-NEXT: shrdl $2, %edx, %ecx ; X86-NEXT: shrl $2, %edx ; X86-NEXT: movl %edx, 4(%eax) ; X86-NEXT: movl %ecx, (%eax) ; X86-NEXT: movl $0, 12(%eax) ; X86-NEXT: movl $0, 8(%eax) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 ; ; X64-LABEL: test1: diff --git a/llvm/test/CodeGen/X86/iabs.ll b/llvm/test/CodeGen/X86/iabs.ll index 55c318e..bdceeef 100644 --- a/llvm/test/CodeGen/X86/iabs.ll +++ b/llvm/test/CodeGen/X86/iabs.ll @@ -123,31 +123,34 @@ define i64 @test_i64(i64 %a) nounwind { define i128 @test_i128(i128 %a) nounwind { ; X86-LABEL: test_i128: ; X86: # %bb.0: -; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: sarl $31, %edx -; X86-NEXT: xorl %edx, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: xorl %edx, %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: xorl %edx, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: xorl %edx, %ebx -; X86-NEXT: subl %edx, %ebx -; X86-NEXT: sbbl %edx, %edi -; X86-NEXT: sbbl %edx, %esi -; X86-NEXT: sbbl %edx, %ecx -; X86-NEXT: movl %ebx, (%eax) -; X86-NEXT: movl %edi, 4(%eax) -; X86-NEXT: movl %esi, 8(%eax) +; X86-NEXT: andl $-16, %esp +; X86-NEXT: movl 36(%ebp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: sarl $31, %eax +; X86-NEXT: xorl %eax, %ecx +; X86-NEXT: movl 32(%ebp), %edx +; X86-NEXT: xorl %eax, %edx +; X86-NEXT: movl 28(%ebp), %esi +; X86-NEXT: xorl %eax, %esi +; X86-NEXT: movl 24(%ebp), %edi +; X86-NEXT: xorl %eax, %edi +; X86-NEXT: subl %eax, %edi +; X86-NEXT: sbbl %eax, %esi +; X86-NEXT: sbbl %eax, %edx +; X86-NEXT: sbbl %eax, %ecx +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl %edi, (%eax) +; X86-NEXT: movl %esi, 4(%eax) +; X86-NEXT: movl %edx, 8(%eax) ; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: leal -8(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 ; ; X64-LABEL: test_i128: diff --git a/llvm/test/CodeGen/X86/icmp-shift-opt.ll b/llvm/test/CodeGen/X86/icmp-shift-opt.ll index c52b3ed..4a6c1d0 100644 --- a/llvm/test/CodeGen/X86/icmp-shift-opt.ll +++ b/llvm/test/CodeGen/X86/icmp-shift-opt.ll @@ -10,33 +10,39 @@ define i128 @opt_setcc_lt_power_of_2(i128 %a) nounwind { ; X86-LABEL: opt_setcc_lt_power_of_2: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 36(%ebp), %ecx +; X86-NEXT: movl 32(%ebp), %edx +; X86-NEXT: movl 28(%ebp), %edi +; X86-NEXT: movl 24(%ebp), %esi ; X86-NEXT: .p2align 4 ; X86-NEXT: .LBB0_1: # %loop ; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: addl $1, %edi -; X86-NEXT: adcl $0, %esi +; X86-NEXT: addl $1, %esi +; X86-NEXT: adcl $0, %edi ; X86-NEXT: adcl $0, %edx ; X86-NEXT: adcl $0, %ecx ; X86-NEXT: movl %edx, %ebx ; X86-NEXT: orl %ecx, %ebx -; X86-NEXT: movl %esi, %ebp -; X86-NEXT: orl %edx, %ebp -; X86-NEXT: orl %ecx, %ebp -; X86-NEXT: shrdl $28, %ebx, %ebp +; X86-NEXT: movl %esi, %eax +; X86-NEXT: movl %edi, %esi +; X86-NEXT: orl %edx, %esi +; X86-NEXT: orl %ecx, %esi +; X86-NEXT: shrdl $28, %ebx, %esi +; X86-NEXT: movl %eax, %esi ; X86-NEXT: jne .LBB0_1 ; X86-NEXT: # %bb.2: # %exit -; X86-NEXT: movl %edi, (%eax) -; X86-NEXT: movl %esi, 4(%eax) +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl %esi, (%eax) +; X86-NEXT: movl %edi, 4(%eax) ; X86-NEXT: movl %edx, 8(%eax) ; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -73,15 +79,21 @@ exit: define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind { ; X86-LABEL: opt_setcc_srl_eq_zero: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: orl 20(%ebp), %ecx +; X86-NEXT: movl 8(%ebp), %edx ; X86-NEXT: orl %eax, %edx ; X86-NEXT: orl %ecx, %edx ; X86-NEXT: orl %eax, %ecx ; X86-NEXT: shldl $15, %edx, %ecx ; X86-NEXT: sete %al +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: opt_setcc_srl_eq_zero: @@ -98,15 +110,21 @@ define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind { define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind { ; X86-LABEL: opt_setcc_srl_ne_zero: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: orl 20(%ebp), %ecx +; X86-NEXT: movl 8(%ebp), %edx ; X86-NEXT: orl %eax, %edx ; X86-NEXT: orl %ecx, %edx ; X86-NEXT: orl %eax, %ecx ; X86-NEXT: shldl $15, %edx, %ecx ; X86-NEXT: setne %al +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: opt_setcc_srl_ne_zero: @@ -123,13 +141,19 @@ define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind { define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind { ; X86-LABEL: opt_setcc_shl_eq_zero: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: shll $17, %ecx -; X86-NEXT: orl {{[0-9]+}}(%esp), %eax -; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl 8(%ebp), %eax +; X86-NEXT: orl 12(%ebp), %eax ; X86-NEXT: orl %ecx, %eax ; X86-NEXT: sete %al +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: opt_setcc_shl_eq_zero: @@ -146,13 +170,19 @@ define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind { define i1 @opt_setcc_shl_ne_zero(i128 %a) nounwind { ; X86-LABEL: opt_setcc_shl_ne_zero: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx ; X86-NEXT: shll $17, %ecx -; X86-NEXT: orl {{[0-9]+}}(%esp), %eax -; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl 8(%ebp), %eax +; X86-NEXT: orl 12(%ebp), %eax ; X86-NEXT: orl %ecx, %eax ; X86-NEXT: setne %al +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: opt_setcc_shl_ne_zero: @@ -170,13 +200,17 @@ define i1 @opt_setcc_shl_ne_zero(i128 %a) nounwind { define i1 @opt_setcc_shl_eq_zero_multiple_shl_users(i128 %a) nounwind { ; X86-LABEL: opt_setcc_shl_eq_zero_multiple_shl_users: ; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl 16(%ebp), %edx +; X86-NEXT: movl 20(%ebp), %esi ; X86-NEXT: shldl $17, %edx, %esi ; X86-NEXT: shldl $17, %ecx, %edx ; X86-NEXT: shldl $17, %eax, %ecx @@ -194,9 +228,11 @@ define i1 @opt_setcc_shl_eq_zero_multiple_shl_users(i128 %a) nounwind { ; X86-NEXT: calll use@PLT ; X86-NEXT: addl $16, %esp ; X86-NEXT: movl %ebx, %eax +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: opt_setcc_shl_eq_zero_multiple_shl_users: diff --git a/llvm/test/CodeGen/X86/invalid-operand-bundle-call.ll b/llvm/test/CodeGen/X86/invalid-operand-bundle-call.ll index ac4963f..17065a4 100644 --- a/llvm/test/CodeGen/X86/invalid-operand-bundle-call.ll +++ b/llvm/test/CodeGen/X86/invalid-operand-bundle-call.ll @@ -1,10 +1,10 @@ ; RUN: not llc -mtriple=x86_64-unknown-linux-gnu < %s 2>&1 | FileCheck %s -; CHECK: LLVM ERROR: cannot lower calls with arbitrary operand bundles: foo +; CHECK: LLVM ERROR: cannot lower calls with arbitrary operand bundles: foo, bar, baz declare void @g() define void @f(i32 %arg) { - call void @g() [ "foo"(i32 %arg) ] + call void @g() [ "foo"(i32 %arg), "bar"(i32 %arg), "baz"(i32 %arg) ] ret void } diff --git a/llvm/test/CodeGen/X86/isel-fpclass.ll b/llvm/test/CodeGen/X86/isel-fpclass.ll new file mode 100644 index 0000000..960bbf5 --- /dev/null +++ b/llvm/test/CodeGen/X86/isel-fpclass.ll @@ -0,0 +1,526 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=i686-linux | FileCheck %s -check-prefixes=X86-SDAGISEL +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefixes=X64,X64-SDAGISEL +; RUN: llc < %s -mtriple=i686-linux -fast-isel -fast-isel-abort=1 | FileCheck %s -check-prefixes=X86-FASTISEL +; RUN: llc < %s -mtriple=x86_64-linux -fast-isel -fast-isel-abort=1 | FileCheck %s -check-prefixes=X64,X64-FASTISEL + +; FIXME: We can reuse/delete llvm/test/CodeGen/X86/is_fpclass.ll when all patches are included. + +define i1 @isnone_f(float %x) { +; X86-SDAGISEL-LABEL: isnone_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: xorl %eax, %eax +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: isnone_f: +; X64: # %bb.0: # %entry +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: isnone_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstp %st(0) +; X86-FASTISEL-NEXT: xorl %eax, %eax +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 0) + ret i1 %0 +} + +define i1 @isany_f(float %x) { +; X86-SDAGISEL-LABEL: isany_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movb $1, %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: isany_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movb $1, %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: isany_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstp %st(0) +; X86-FASTISEL-NEXT: movb $1, %al +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1023) + ret i1 %0 +} + +define i1 @issignaling_f(float %x) { +; X86-SDAGISEL-LABEL: issignaling_f: +; X86-SDAGISEL: # %bb.0: +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-SDAGISEL-NEXT: setl %cl +; X86-SDAGISEL-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X86-SDAGISEL-NEXT: setge %al +; X86-SDAGISEL-NEXT: andb %cl, %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: issignaling_f: +; X64: # %bb.0: +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-NEXT: setl %cl +; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X64-NEXT: setge %al +; X64-NEXT: andb %cl, %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: issignaling_f: +; X86-FASTISEL: # %bb.0: +; X86-FASTISEL-NEXT: pushl %eax +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstps (%esp) +; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-FASTISEL-NEXT: andl (%esp), %eax +; X86-FASTISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-FASTISEL-NEXT: setl %cl +; X86-FASTISEL-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X86-FASTISEL-NEXT: setge %al +; X86-FASTISEL-NEXT: andb %cl, %al +; X86-FASTISEL-NEXT: popl %ecx +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 +; X86-FASTISEL-NEXT: retl + %a0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1) ; "snan" + ret i1 %a0 +} + + define i1 @isquiet_f(float %x) { +; X86-SDAGISEL-LABEL: isquiet_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-SDAGISEL-NEXT: setge %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: isquiet_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-NEXT: setge %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: isquiet_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: pushl %eax +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstps (%esp) +; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-FASTISEL-NEXT: andl (%esp), %eax +; X86-FASTISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-FASTISEL-NEXT: setge %al +; X86-FASTISEL-NEXT: popl %ecx +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 +; X86-FASTISEL-NEXT: retl + entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 2) ; "qnan" + ret i1 %0 +} + +define i1 @not_isquiet_f(float %x) { +; X86-SDAGISEL-LABEL: not_isquiet_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-SDAGISEL-NEXT: setl %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: not_isquiet_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-NEXT: setl %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: not_isquiet_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: pushl %eax +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstps (%esp) +; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-FASTISEL-NEXT: andl (%esp), %eax +; X86-FASTISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-FASTISEL-NEXT: setl %al +; X86-FASTISEL-NEXT: popl %ecx +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1021) ; ~"qnan" + ret i1 %0 +} + +define i1 @isinf_f(float %x) { +; X86-SDAGISEL-LABEL: isinf_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-SDAGISEL-NEXT: sete %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: isinf_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: isinf_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: pushl %eax +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstps (%esp) +; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-FASTISEL-NEXT: andl (%esp), %eax +; X86-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-FASTISEL-NEXT: sete %al +; X86-FASTISEL-NEXT: popl %ecx +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 516) ; 0x204 = "inf" + ret i1 %0 +} + +define i1 @not_isinf_f(float %x) { +; X86-SDAGISEL-LABEL: not_isinf_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-SDAGISEL-NEXT: setne %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: not_isinf_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: setne %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: not_isinf_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: pushl %eax +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstps (%esp) +; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-FASTISEL-NEXT: andl (%esp), %eax +; X86-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-FASTISEL-NEXT: setne %al +; X86-FASTISEL-NEXT: popl %ecx +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 507) ; ~0x204 = "~inf" + ret i1 %0 +} + +define i1 @is_plus_inf_f(float %x) { +; X86-SDAGISEL-LABEL: is_plus_inf_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 +; X86-SDAGISEL-NEXT: sete %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: is_plus_inf_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: is_plus_inf_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: pushl %eax +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstps (%esp) +; X86-FASTISEL-NEXT: cmpl $2139095040, (%esp) # imm = 0x7F800000 +; X86-FASTISEL-NEXT: sete %al +; X86-FASTISEL-NEXT: popl %ecx +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 512) ; 0x200 = "+inf" + ret i1 %0 +} + +define i1 @is_minus_inf_f(float %x) { +; X86-SDAGISEL-LABEL: is_minus_inf_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000 +; X86-SDAGISEL-NEXT: sete %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: is_minus_inf_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: is_minus_inf_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: pushl %eax +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstps (%esp) +; X86-FASTISEL-NEXT: cmpl $-8388608, (%esp) # imm = 0xFF800000 +; X86-FASTISEL-NEXT: sete %al +; X86-FASTISEL-NEXT: popl %ecx +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 4) ; "-inf" + ret i1 %0 +} + +define i1 @not_is_minus_inf_f(float %x) { +; X86-SDAGISEL-LABEL: not_is_minus_inf_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000 +; X86-SDAGISEL-NEXT: setne %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: not_is_minus_inf_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 +; X64-NEXT: setne %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: not_is_minus_inf_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: pushl %eax +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstps (%esp) +; X86-FASTISEL-NEXT: cmpl $-8388608, (%esp) # imm = 0xFF800000 +; X86-FASTISEL-NEXT: setne %al +; X86-FASTISEL-NEXT: popl %ecx +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1019) ; ~"-inf" + ret i1 %0 +} + +define i1 @isfinite_f(float %x) { +; X86-SDAGISEL-LABEL: isfinite_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-SDAGISEL-NEXT: setl %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: isfinite_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: setl %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: isfinite_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: pushl %eax +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstps (%esp) +; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-FASTISEL-NEXT: andl (%esp), %eax +; X86-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-FASTISEL-NEXT: setl %al +; X86-FASTISEL-NEXT: popl %ecx +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; 0x1f8 = "finite" + ret i1 %0 +} + +define i1 @not_isfinite_f(float %x) { +; X86-SDAGISEL-LABEL: not_isfinite_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-SDAGISEL-NEXT: setge %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: not_isfinite_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: setge %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: not_isfinite_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: pushl %eax +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstps (%esp) +; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-FASTISEL-NEXT: andl (%esp), %eax +; X86-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-FASTISEL-NEXT: setge %al +; X86-FASTISEL-NEXT: popl %ecx +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 519) ; ~0x1f8 = "~finite" + ret i1 %0 +} + +define i1 @is_plus_finite_f(float %x) { +; X86-SDAGISEL-LABEL: is_plus_finite_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 +; X86-SDAGISEL-NEXT: setb %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: is_plus_finite_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: setb %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: is_plus_finite_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: pushl %eax +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstps (%esp) +; X86-FASTISEL-NEXT: cmpl $2139095040, (%esp) # imm = 0x7F800000 +; X86-FASTISEL-NEXT: setb %al +; X86-FASTISEL-NEXT: popl %ecx +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 448) ; 0x1c0 = "+finite" + ret i1 %0 +} + +define i1 @isnone_d(double %x) nounwind { +; X86-SDAGISEL-LABEL: isnone_d: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: xorl %eax, %eax +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: isnone_d: +; X64: # %bb.0: # %entry +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: isnone_d: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: fldl {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstp %st(0) +; X86-FASTISEL-NEXT: xorl %eax, %eax +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 0) + ret i1 %0 +} + +define i1 @isany_d(double %x) nounwind { +; X86-SDAGISEL-LABEL: isany_d: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movb $1, %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: isany_d: +; X64: # %bb.0: # %entry +; X64-NEXT: movb $1, %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: isany_d: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: fldl {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstp %st(0) +; X86-FASTISEL-NEXT: movb $1, %al +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 1023) + ret i1 %0 +} + +define i1 @isnone_f80(x86_fp80 %x) nounwind { +; X86-SDAGISEL-LABEL: isnone_f80: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: xorl %eax, %eax +; X86-SDAGISEL-NEXT: retl +; +; X64-SDAGISEL-LABEL: isnone_f80: +; X64-SDAGISEL: # %bb.0: # %entry +; X64-SDAGISEL-NEXT: xorl %eax, %eax +; X64-SDAGISEL-NEXT: retq +; +; X86-FASTISEL-LABEL: isnone_f80: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: fldt {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstp %st(0) +; X86-FASTISEL-NEXT: xorl %eax, %eax +; X86-FASTISEL-NEXT: retl +; +; X64-FASTISEL-LABEL: isnone_f80: +; X64-FASTISEL: # %bb.0: # %entry +; X64-FASTISEL-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-FASTISEL-NEXT: fstp %st(0) +; X64-FASTISEL-NEXT: xorl %eax, %eax +; X64-FASTISEL-NEXT: retq +entry: +%0 = tail call i1 @llvm.is.fpclass.f80(x86_fp80 %x, i32 0) +ret i1 %0 +} + +define i1 @isany_f80(x86_fp80 %x) nounwind { +; X86-SDAGISEL-LABEL: isany_f80: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movb $1, %al +; X86-SDAGISEL-NEXT: retl +; +; X64-SDAGISEL-LABEL: isany_f80: +; X64-SDAGISEL: # %bb.0: # %entry +; X64-SDAGISEL-NEXT: movb $1, %al +; X64-SDAGISEL-NEXT: retq +; +; X86-FASTISEL-LABEL: isany_f80: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: fldt {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstp %st(0) +; X86-FASTISEL-NEXT: movb $1, %al +; X86-FASTISEL-NEXT: retl +; +; X64-FASTISEL-LABEL: isany_f80: +; X64-FASTISEL: # %bb.0: # %entry +; X64-FASTISEL-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-FASTISEL-NEXT: fstp %st(0) +; X64-FASTISEL-NEXT: movb $1, %al +; X64-FASTISEL-NEXT: retq +entry: + %0 = tail call i1 @llvm.is.fpclass.f80(x86_fp80 %x, i32 1023) + ret i1 %0 +} diff --git a/llvm/test/CodeGen/X86/late-tail-dup-computed-goto.mir b/llvm/test/CodeGen/X86/late-tail-dup-computed-goto.mir new file mode 100644 index 0000000..e272e7e --- /dev/null +++ b/llvm/test/CodeGen/X86/late-tail-dup-computed-goto.mir @@ -0,0 +1,128 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=tailduplication -tail-dup-pred-size=1 -tail-dup-succ-size=1 %s -o - | FileCheck %s +# +# Check that only the computed gotos are duplicated aggressively. +# +--- | + @computed_goto.dispatch = constant [5 x ptr] [ptr null, ptr blockaddress(@computed_goto, %bb1), ptr blockaddress(@computed_goto, %bb2), ptr blockaddress(@computed_goto, %bb3), ptr blockaddress(@computed_goto, %bb4)] + declare i64 @f0() + declare i64 @f1() + declare i64 @f2() + declare i64 @f3() + declare i64 @f4() + declare i64 @f5() + define void @computed_goto() { + start: + ret void + bb1: + ret void + bb2: + ret void + bb3: + ret void + bb4: + ret void + } + define void @jump_table() { ret void } + define void @jump_table_pic() { ret void } +... +--- +name: computed_goto +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: computed_goto + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f0, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rax + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64_nosp = COPY [[COPY]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64_nosp = COPY [[COPY1]] + ; CHECK-NEXT: JMP64m $noreg, 8, [[COPY2]], @computed_goto.dispatch, $noreg + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.bb1 (ir-block-address-taken %ir-block.bb1): + ; CHECK-NEXT: successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f1, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr64 = COPY $rax + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64_nosp = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64_nosp = COPY [[COPY1]] + ; CHECK-NEXT: JMP64m $noreg, 8, [[COPY2]], @computed_goto.dispatch, $noreg + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.bb2 (ir-block-address-taken %ir-block.bb2): + ; CHECK-NEXT: successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f2, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr64 = COPY $rax + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64_nosp = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64_nosp = COPY [[COPY1]] + ; CHECK-NEXT: JMP64m $noreg, 8, [[COPY2]], @computed_goto.dispatch, $noreg + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.bb3 (ir-block-address-taken %ir-block.bb3): + ; CHECK-NEXT: successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f3, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gr64 = COPY $rax + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64_nosp = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64_nosp = COPY [[COPY1]] + ; CHECK-NEXT: JMP64m $noreg, 8, [[COPY2]], @computed_goto.dispatch, $noreg + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.bb4 (ir-block-address-taken %ir-block.bb4): + ; CHECK-NEXT: successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f4, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr64 = COPY $rax + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64_nosp = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64_nosp = COPY [[COPY1]] + ; CHECK-NEXT: JMP64m $noreg, 8, [[COPY2]], @computed_goto.dispatch, $noreg + bb.0: + successors: %bb.5(0x80000000) + + CALL64pcrel32 target-flags(x86-plt) @f0, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax + %0:gr64 = COPY $rax + %6:gr64_nosp = COPY %0 + JMP_1 %bb.5 + + bb.1.bb1 (ir-block-address-taken %ir-block.bb1): + successors: %bb.5(0x80000000) + + CALL64pcrel32 target-flags(x86-plt) @f1, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax + %1:gr64 = COPY $rax + %6:gr64_nosp = COPY %1 + JMP_1 %bb.5 + + bb.2.bb2 (ir-block-address-taken %ir-block.bb2): + successors: %bb.5(0x80000000) + + CALL64pcrel32 target-flags(x86-plt) @f2, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax + %2:gr64 = COPY $rax + %6:gr64_nosp = COPY %2 + JMP_1 %bb.5 + + bb.3.bb3 (ir-block-address-taken %ir-block.bb3): + successors: %bb.5(0x80000000) + + CALL64pcrel32 target-flags(x86-plt) @f3, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax + %3:gr64 = COPY $rax + %6:gr64_nosp = COPY %3 + JMP_1 %bb.5 + + bb.4.bb4 (ir-block-address-taken %ir-block.bb4): + successors: %bb.5(0x80000000) + + CALL64pcrel32 target-flags(x86-plt) @f4, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax + %4:gr64 = COPY $rax + %6:gr64_nosp = COPY %4 + + bb.5: + successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000) + + %5:gr64_nosp = COPY %6 + JMP64m $noreg, 8, %5, @computed_goto.dispatch, $noreg +... diff --git a/llvm/test/CodeGen/X86/load-combine.ll b/llvm/test/CodeGen/X86/load-combine.ll index b5f3e78..f21c075 100644 --- a/llvm/test/CodeGen/X86/load-combine.ll +++ b/llvm/test/CodeGen/X86/load-combine.ll @@ -800,13 +800,13 @@ define void @shift_i32_by_32(ptr %src1, ptr %src2, ptr %dst) { ; CHECK-LABEL: shift_i32_by_32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl $-1, 4(%eax) -; CHECK-NEXT: movl $-1, (%eax) +; CHECK-NEXT: movl $0, 4(%eax) +; CHECK-NEXT: movl $0, (%eax) ; CHECK-NEXT: retl ; ; CHECK64-LABEL: shift_i32_by_32: ; CHECK64: # %bb.0: # %entry -; CHECK64-NEXT: movq $-1, (%rdx) +; CHECK64-NEXT: movq $0, (%rdx) ; CHECK64-NEXT: retq entry: %load1 = load i8, ptr %src1, align 1 diff --git a/llvm/test/CodeGen/X86/mul128.ll b/llvm/test/CodeGen/X86/mul128.ll index fc1cc1f..e10e48f 100644 --- a/llvm/test/CodeGen/X86/mul128.ll +++ b/llvm/test/CodeGen/X86/mul128.ll @@ -18,85 +18,80 @@ define i128 @foo(i128 %t, i128 %u) { ; X86: # %bb.0: ; X86-NEXT: pushl %ebp ; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp ; X86-NEXT: pushl %ebx -; X86-NEXT: .cfi_def_cfa_offset 12 ; X86-NEXT: pushl %edi -; X86-NEXT: .cfi_def_cfa_offset 16 ; X86-NEXT: pushl %esi -; X86-NEXT: .cfi_def_cfa_offset 20 -; X86-NEXT: subl $8, %esp -; X86-NEXT: .cfi_def_cfa_offset 28 +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp ; X86-NEXT: .cfi_offset %esi, -20 ; X86-NEXT: .cfi_offset %edi, -16 ; X86-NEXT: .cfi_offset %ebx, -12 -; X86-NEXT: .cfi_offset %ebp, -8 -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: imull %ecx, %ebp -; X86-NEXT: movl %eax, %esi +; X86-NEXT: movl 32(%ebp), %ecx +; X86-NEXT: movl 40(%ebp), %edi +; X86-NEXT: movl 44(%ebp), %esi +; X86-NEXT: imull %ecx, %esi +; X86-NEXT: movl %edi, %eax ; X86-NEXT: mull %ecx -; X86-NEXT: movl %eax, %edi +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: movl %edx, %ebx -; X86-NEXT: addl %ebp, %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull %esi, %eax +; X86-NEXT: addl %esi, %ebx +; X86-NEXT: movl 36(%ebp), %eax +; X86-NEXT: imull %edi, %eax ; X86-NEXT: addl %eax, %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: imull %ecx, %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: mull %ebp -; X86-NEXT: addl %esi, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: imull %ebp, %esi +; X86-NEXT: movl 48(%ebp), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: imull 28(%ebp), %ecx +; X86-NEXT: movl 24(%ebp), %edi +; X86-NEXT: mull %edi +; X86-NEXT: addl %ecx, %edx +; X86-NEXT: movl 52(%ebp), %esi +; X86-NEXT: imull %edi, %esi ; X86-NEXT: addl %edx, %esi -; X86-NEXT: addl %edi, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: addl (%esp), %eax # 4-byte Folded Reload +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: adcl %ebx, %esi -; X86-NEXT: movl %ebp, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: mull %edi +; X86-NEXT: movl %edi, %eax +; X86-NEXT: movl 40(%ebp), %ecx +; X86-NEXT: mull %ecx ; X86-NEXT: movl %edx, %ebx -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: mull %edi +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 28(%ebp), %eax +; X86-NEXT: mull %ecx ; X86-NEXT: movl %edx, %edi ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: addl %ebx, %ecx ; X86-NEXT: adcl $0, %edi -; X86-NEXT: movl %ebp, %eax -; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: movl %edx, %ebp -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: addl %ecx, %ebx -; X86-NEXT: adcl %edi, %ebp +; X86-NEXT: movl 24(%ebp), %eax +; X86-NEXT: mull 44(%ebp) +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl %edi, %ebx ; X86-NEXT: setb %cl -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: addl %ebp, %eax +; X86-NEXT: movl 28(%ebp), %eax +; X86-NEXT: mull 44(%ebp) +; X86-NEXT: addl %ebx, %eax ; X86-NEXT: movzbl %cl, %ecx ; X86-NEXT: adcl %ecx, %edx -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: addl (%esp), %eax # 4-byte Folded Reload ; X86-NEXT: adcl %esi, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ebx, 4(%ecx) -; X86-NEXT: movl (%esp), %esi # 4-byte Reload +; X86-NEXT: movl 8(%ebp), %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: movl %esi, 4(%ecx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-NEXT: movl %esi, (%ecx) ; X86-NEXT: movl %eax, 8(%ecx) ; X86-NEXT: movl %edx, 12(%ecx) ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: addl $8, %esp -; X86-NEXT: .cfi_def_cfa_offset 20 +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 16 ; X86-NEXT: popl %edi -; X86-NEXT: .cfi_def_cfa_offset 12 ; X86-NEXT: popl %ebx -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: popl %ebp -; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: .cfi_def_cfa %esp, 4 ; X86-NEXT: retl $4 %k = mul i128 %t, %u ret i128 %k diff --git a/llvm/test/CodeGen/X86/neg-abs.ll b/llvm/test/CodeGen/X86/neg-abs.ll index 961205c..724b2dc 100644 --- a/llvm/test/CodeGen/X86/neg-abs.ll +++ b/llvm/test/CodeGen/X86/neg-abs.ll @@ -105,31 +105,35 @@ define i128 @neg_abs_i128(i128 %x) nounwind { ; X86-LABEL: neg_abs_i128: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %edx, %ecx +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 36(%ebp), %eax +; X86-NEXT: movl %eax, %ecx ; X86-NEXT: sarl $31, %ecx +; X86-NEXT: xorl %ecx, %eax +; X86-NEXT: movl 32(%ebp), %edx ; X86-NEXT: xorl %ecx, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl 28(%ebp), %esi ; X86-NEXT: xorl %ecx, %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl 24(%ebp), %edi ; X86-NEXT: xorl %ecx, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: xorl %ecx, %ebx -; X86-NEXT: movl %ecx, %ebp -; X86-NEXT: subl %ebx, %ebp ; X86-NEXT: movl %ecx, %ebx -; X86-NEXT: sbbl %edi, %ebx +; X86-NEXT: subl %edi, %ebx ; X86-NEXT: movl %ecx, %edi ; X86-NEXT: sbbl %esi, %edi -; X86-NEXT: sbbl %edx, %ecx -; X86-NEXT: movl %ebp, (%eax) -; X86-NEXT: movl %ebx, 4(%eax) -; X86-NEXT: movl %edi, 8(%eax) +; X86-NEXT: movl %ecx, %esi +; X86-NEXT: sbbl %edx, %esi +; X86-NEXT: sbbl %eax, %ecx +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl %ebx, (%eax) +; X86-NEXT: movl %edi, 4(%eax) +; X86-NEXT: movl %esi, 8(%eax) ; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -259,37 +263,42 @@ define i64 @sub_abs_i64(i64 %x, i64 %y) nounwind { define i128 @sub_abs_i128(i128 %x, i128 %y) nounwind { ; X86-LABEL: sub_abs_i128: ; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl $-16, %esp +; X86-NEXT: movl 36(%ebp), %eax ; X86-NEXT: movl %eax, %edx ; X86-NEXT: sarl $31, %edx ; X86-NEXT: xorl %edx, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl 32(%ebp), %ecx ; X86-NEXT: xorl %edx, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl 28(%ebp), %esi ; X86-NEXT: xorl %edx, %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl 24(%ebp), %edi ; X86-NEXT: xorl %edx, %edi ; X86-NEXT: subl %edx, %edi ; X86-NEXT: sbbl %edx, %esi ; X86-NEXT: sbbl %edx, %ecx ; X86-NEXT: sbbl %edx, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl 40(%ebp), %edx ; X86-NEXT: subl %edi, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl 44(%ebp), %edi ; X86-NEXT: sbbl %esi, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl 48(%ebp), %esi ; X86-NEXT: sbbl %ecx, %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl 52(%ebp), %ecx ; X86-NEXT: sbbl %eax, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl %edx, (%eax) ; X86-NEXT: movl %edi, 4(%eax) ; X86-NEXT: movl %esi, 8(%eax) ; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: leal -8(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi +; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 ; ; X64-LABEL: sub_abs_i128: diff --git a/llvm/test/CodeGen/X86/noreturn-call-win64.ll b/llvm/test/CodeGen/X86/noreturn-call-win64.ll index 57aa022..13be1f13 100644 --- a/llvm/test/CodeGen/X86/noreturn-call-win64.ll +++ b/llvm/test/CodeGen/X86/noreturn-call-win64.ll @@ -111,3 +111,15 @@ declare dso_local void @"??1MakeCleanup@@QEAA@XZ"(ptr) ; CHECK: # %unreachable ; CHECK: int3 ; CHECK: .seh_handlerdata + + +define dso_local void @last_call_no_return() { + call void @abort1() + unreachable +} + +; CHECK-LABEL: last_call_no_return: +; CHECK: callq abort1 +; CHECK-NEXT: int3 +; CHECK-NEXT: .seh_endproc + diff --git a/llvm/test/CodeGen/X86/pcsections-atomics.ll b/llvm/test/CodeGen/X86/pcsections-atomics.ll index 672ebc1e..69ae1f1 100644 --- a/llvm/test/CodeGen/X86/pcsections-atomics.ll +++ b/llvm/test/CodeGen/X86/pcsections-atomics.ll @@ -9,6 +9,7 @@ ; RUN: llc -O1 -mattr=cx16 < %s | FileCheck %s --check-prefixes=O1 ; RUN: llc -O2 -mattr=cx16 < %s | FileCheck %s --check-prefixes=O2 ; RUN: llc -O3 -mattr=cx16 < %s | FileCheck %s --check-prefixes=O3 +; RUN: llc -O3 -mcpu=haswell -mattr=cx16 < %s | FileCheck %s --check-prefixes=HASWELL-O3 target triple = "x86_64-unknown-linux-gnu" @@ -50,6 +51,14 @@ define void @mixed_atomic_non_atomic(ptr %a) { ; O3-NEXT: movl $1, (%rdi) ; O3-NEXT: decl (%rdi) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: mixed_atomic_non_atomic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: incl (%rdi) +; HASWELL-O3-NEXT: .Lpcsection0: +; HASWELL-O3-NEXT: movl $1, (%rdi) +; HASWELL-O3-NEXT: decl (%rdi) +; HASWELL-O3-NEXT: retq entry: ; Accesses the same location atomically and non-atomically. %0 = load volatile i32, ptr %a, align 4 @@ -107,6 +116,17 @@ define i64 @mixed_complex_atomic_non_atomic(ptr %a, ptr %b) { ; O3-NEXT: movq %rdx, (%rsi) ; O3-NEXT: addq %rcx, %rax ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: mixed_complex_atomic_non_atomic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movl $1, %eax +; HASWELL-O3-NEXT: .Lpcsection1: +; HASWELL-O3-NEXT: lock xaddq %rax, (%rdi) +; HASWELL-O3-NEXT: movq (%rsi), %rcx +; HASWELL-O3-NEXT: leaq 1(%rcx), %rdx +; HASWELL-O3-NEXT: movq %rdx, (%rsi) +; HASWELL-O3-NEXT: addq %rcx, %rax +; HASWELL-O3-NEXT: retq entry: %0 = atomicrmw add ptr %a, i64 1 monotonic, align 8, !pcsections !0 %1 = load i64, ptr %b, align 8 @@ -148,6 +168,14 @@ define i8 @atomic8_load_unordered(ptr %a) { ; O3-NEXT: movzbl (%rdi), %eax ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_load_unordered: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection2: +; HASWELL-O3-NEXT: movzbl (%rdi), %eax +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = load atomic i8, ptr %a unordered, align 1, !pcsections !0 @@ -187,6 +215,14 @@ define i8 @atomic8_load_monotonic(ptr %a) { ; O3-NEXT: movzbl (%rdi), %eax ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_load_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection3: +; HASWELL-O3-NEXT: movzbl (%rdi), %eax +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = load atomic i8, ptr %a monotonic, align 1, !pcsections !0 @@ -226,6 +262,14 @@ define i8 @atomic8_load_acquire(ptr %a) { ; O3-NEXT: movzbl (%rdi), %eax ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_load_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection4: +; HASWELL-O3-NEXT: movzbl (%rdi), %eax +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = load atomic i8, ptr %a acquire, align 1, !pcsections !0 @@ -265,6 +309,14 @@ define i8 @atomic8_load_seq_cst(ptr %a) { ; O3-NEXT: movzbl (%rdi), %eax ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_load_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection5: +; HASWELL-O3-NEXT: movzbl (%rdi), %eax +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = load atomic i8, ptr %a seq_cst, align 1, !pcsections !0 @@ -304,6 +356,14 @@ define void @atomic8_store_unordered(ptr %a) { ; O3-NEXT: movb $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_store_unordered: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection6: +; HASWELL-O3-NEXT: movb $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 store atomic i8 42, ptr %a unordered, align 1, !pcsections !0 @@ -343,6 +403,14 @@ define void @atomic8_store_monotonic(ptr %a) { ; O3-NEXT: movb $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_store_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection7: +; HASWELL-O3-NEXT: movb $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 store atomic i8 42, ptr %a monotonic, align 1, !pcsections !0 @@ -382,6 +450,14 @@ define void @atomic8_store_release(ptr %a) { ; O3-NEXT: movb $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_store_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection8: +; HASWELL-O3-NEXT: movb $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 store atomic i8 42, ptr %a release, align 1, !pcsections !0 @@ -425,6 +501,15 @@ define void @atomic8_store_seq_cst(ptr %a) { ; O3-NEXT: xchgb %al, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_store_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movb $42, %al +; HASWELL-O3-NEXT: .Lpcsection9: +; HASWELL-O3-NEXT: xchgb %al, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 store atomic i8 42, ptr %a seq_cst, align 1, !pcsections !0 @@ -468,6 +553,15 @@ define void @atomic8_xchg_monotonic(ptr %a) { ; O3-NEXT: xchgb %al, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_xchg_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movb $42, %al +; HASWELL-O3-NEXT: .Lpcsection10: +; HASWELL-O3-NEXT: xchgb %al, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xchg ptr %a, i8 42 monotonic, align 1, !pcsections !0 @@ -507,6 +601,14 @@ define void @atomic8_add_monotonic(ptr %a) { ; O3-NEXT: lock addb $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_add_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection11: +; HASWELL-O3-NEXT: lock addb $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw add ptr %a, i8 42 monotonic, align 1, !pcsections !0 @@ -546,6 +648,14 @@ define void @atomic8_sub_monotonic(ptr %a) { ; O3-NEXT: lock subb $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_sub_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection12: +; HASWELL-O3-NEXT: lock subb $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw sub ptr %a, i8 42 monotonic, align 1, !pcsections !0 @@ -585,6 +695,14 @@ define void @atomic8_and_monotonic(ptr %a) { ; O3-NEXT: lock andb $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_and_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection13: +; HASWELL-O3-NEXT: lock andb $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw and ptr %a, i8 42 monotonic, align 1, !pcsections !0 @@ -624,6 +742,14 @@ define void @atomic8_or_monotonic(ptr %a) { ; O3-NEXT: lock orb $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_or_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection14: +; HASWELL-O3-NEXT: lock orb $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw or ptr %a, i8 42 monotonic, align 1, !pcsections !0 @@ -663,6 +789,14 @@ define void @atomic8_xor_monotonic(ptr %a) { ; O3-NEXT: lock xorb $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_xor_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection15: +; HASWELL-O3-NEXT: lock xorb $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xor ptr %a, i8 42 monotonic, align 1, !pcsections !0 @@ -763,6 +897,27 @@ define void @atomic8_nand_monotonic(ptr %a) { ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_nand_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection16: +; HASWELL-O3-NEXT: movzbl (%rdi), %eax +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB16_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movl %eax, %ecx +; HASWELL-O3-NEXT: .Lpcsection17: +; HASWELL-O3-NEXT: notb %cl +; HASWELL-O3-NEXT: .Lpcsection18: +; HASWELL-O3-NEXT: orb $-43, %cl +; HASWELL-O3-NEXT: .Lpcsection19: +; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection20: +; HASWELL-O3-NEXT: jne .LBB16_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw nand ptr %a, i8 42 monotonic, align 1, !pcsections !0 @@ -806,6 +961,15 @@ define void @atomic8_xchg_acquire(ptr %a) { ; O3-NEXT: xchgb %al, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_xchg_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movb $42, %al +; HASWELL-O3-NEXT: .Lpcsection21: +; HASWELL-O3-NEXT: xchgb %al, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xchg ptr %a, i8 42 acquire, align 1, !pcsections !0 @@ -845,6 +1009,14 @@ define void @atomic8_add_acquire(ptr %a) { ; O3-NEXT: lock addb $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_add_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection22: +; HASWELL-O3-NEXT: lock addb $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw add ptr %a, i8 42 acquire, align 1, !pcsections !0 @@ -884,6 +1056,14 @@ define void @atomic8_sub_acquire(ptr %a) { ; O3-NEXT: lock subb $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_sub_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection23: +; HASWELL-O3-NEXT: lock subb $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw sub ptr %a, i8 42 acquire, align 1, !pcsections !0 @@ -923,6 +1103,14 @@ define void @atomic8_and_acquire(ptr %a) { ; O3-NEXT: lock andb $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_and_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection24: +; HASWELL-O3-NEXT: lock andb $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw and ptr %a, i8 42 acquire, align 1, !pcsections !0 @@ -962,6 +1150,14 @@ define void @atomic8_or_acquire(ptr %a) { ; O3-NEXT: lock orb $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_or_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection25: +; HASWELL-O3-NEXT: lock orb $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw or ptr %a, i8 42 acquire, align 1, !pcsections !0 @@ -1001,6 +1197,14 @@ define void @atomic8_xor_acquire(ptr %a) { ; O3-NEXT: lock xorb $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_xor_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection26: +; HASWELL-O3-NEXT: lock xorb $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xor ptr %a, i8 42 acquire, align 1, !pcsections !0 @@ -1101,6 +1305,27 @@ define void @atomic8_nand_acquire(ptr %a) { ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_nand_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection27: +; HASWELL-O3-NEXT: movzbl (%rdi), %eax +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB23_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movl %eax, %ecx +; HASWELL-O3-NEXT: .Lpcsection28: +; HASWELL-O3-NEXT: notb %cl +; HASWELL-O3-NEXT: .Lpcsection29: +; HASWELL-O3-NEXT: orb $-43, %cl +; HASWELL-O3-NEXT: .Lpcsection30: +; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection31: +; HASWELL-O3-NEXT: jne .LBB23_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw nand ptr %a, i8 42 acquire, align 1, !pcsections !0 @@ -1144,6 +1369,15 @@ define void @atomic8_xchg_release(ptr %a) { ; O3-NEXT: xchgb %al, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_xchg_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movb $42, %al +; HASWELL-O3-NEXT: .Lpcsection32: +; HASWELL-O3-NEXT: xchgb %al, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xchg ptr %a, i8 42 release, align 1, !pcsections !0 @@ -1183,6 +1417,14 @@ define void @atomic8_add_release(ptr %a) { ; O3-NEXT: lock addb $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_add_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection33: +; HASWELL-O3-NEXT: lock addb $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw add ptr %a, i8 42 release, align 1, !pcsections !0 @@ -1222,6 +1464,14 @@ define void @atomic8_sub_release(ptr %a) { ; O3-NEXT: lock subb $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_sub_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection34: +; HASWELL-O3-NEXT: lock subb $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw sub ptr %a, i8 42 release, align 1, !pcsections !0 @@ -1261,6 +1511,14 @@ define void @atomic8_and_release(ptr %a) { ; O3-NEXT: lock andb $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_and_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection35: +; HASWELL-O3-NEXT: lock andb $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw and ptr %a, i8 42 release, align 1, !pcsections !0 @@ -1300,6 +1558,14 @@ define void @atomic8_or_release(ptr %a) { ; O3-NEXT: lock orb $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_or_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection36: +; HASWELL-O3-NEXT: lock orb $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw or ptr %a, i8 42 release, align 1, !pcsections !0 @@ -1339,6 +1605,14 @@ define void @atomic8_xor_release(ptr %a) { ; O3-NEXT: lock xorb $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_xor_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection37: +; HASWELL-O3-NEXT: lock xorb $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xor ptr %a, i8 42 release, align 1, !pcsections !0 @@ -1439,6 +1713,27 @@ define void @atomic8_nand_release(ptr %a) { ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_nand_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection38: +; HASWELL-O3-NEXT: movzbl (%rdi), %eax +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB30_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movl %eax, %ecx +; HASWELL-O3-NEXT: .Lpcsection39: +; HASWELL-O3-NEXT: notb %cl +; HASWELL-O3-NEXT: .Lpcsection40: +; HASWELL-O3-NEXT: orb $-43, %cl +; HASWELL-O3-NEXT: .Lpcsection41: +; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection42: +; HASWELL-O3-NEXT: jne .LBB30_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw nand ptr %a, i8 42 release, align 1, !pcsections !0 @@ -1482,6 +1777,15 @@ define void @atomic8_xchg_acq_rel(ptr %a) { ; O3-NEXT: xchgb %al, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_xchg_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movb $42, %al +; HASWELL-O3-NEXT: .Lpcsection43: +; HASWELL-O3-NEXT: xchgb %al, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xchg ptr %a, i8 42 acq_rel, align 1, !pcsections !0 @@ -1521,6 +1825,14 @@ define void @atomic8_add_acq_rel(ptr %a) { ; O3-NEXT: lock addb $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_add_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection44: +; HASWELL-O3-NEXT: lock addb $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw add ptr %a, i8 42 acq_rel, align 1, !pcsections !0 @@ -1560,6 +1872,14 @@ define void @atomic8_sub_acq_rel(ptr %a) { ; O3-NEXT: lock subb $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_sub_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection45: +; HASWELL-O3-NEXT: lock subb $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw sub ptr %a, i8 42 acq_rel, align 1, !pcsections !0 @@ -1599,6 +1919,14 @@ define void @atomic8_and_acq_rel(ptr %a) { ; O3-NEXT: lock andb $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_and_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection46: +; HASWELL-O3-NEXT: lock andb $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw and ptr %a, i8 42 acq_rel, align 1, !pcsections !0 @@ -1638,6 +1966,14 @@ define void @atomic8_or_acq_rel(ptr %a) { ; O3-NEXT: lock orb $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_or_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection47: +; HASWELL-O3-NEXT: lock orb $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw or ptr %a, i8 42 acq_rel, align 1, !pcsections !0 @@ -1677,6 +2013,14 @@ define void @atomic8_xor_acq_rel(ptr %a) { ; O3-NEXT: lock xorb $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_xor_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection48: +; HASWELL-O3-NEXT: lock xorb $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xor ptr %a, i8 42 acq_rel, align 1, !pcsections !0 @@ -1777,6 +2121,27 @@ define void @atomic8_nand_acq_rel(ptr %a) { ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_nand_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection49: +; HASWELL-O3-NEXT: movzbl (%rdi), %eax +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB37_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movl %eax, %ecx +; HASWELL-O3-NEXT: .Lpcsection50: +; HASWELL-O3-NEXT: notb %cl +; HASWELL-O3-NEXT: .Lpcsection51: +; HASWELL-O3-NEXT: orb $-43, %cl +; HASWELL-O3-NEXT: .Lpcsection52: +; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection53: +; HASWELL-O3-NEXT: jne .LBB37_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw nand ptr %a, i8 42 acq_rel, align 1, !pcsections !0 @@ -1820,6 +2185,15 @@ define void @atomic8_xchg_seq_cst(ptr %a) { ; O3-NEXT: xchgb %al, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_xchg_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movb $42, %al +; HASWELL-O3-NEXT: .Lpcsection54: +; HASWELL-O3-NEXT: xchgb %al, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xchg ptr %a, i8 42 seq_cst, align 1, !pcsections !0 @@ -1859,6 +2233,14 @@ define void @atomic8_add_seq_cst(ptr %a) { ; O3-NEXT: lock addb $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_add_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection55: +; HASWELL-O3-NEXT: lock addb $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw add ptr %a, i8 42 seq_cst, align 1, !pcsections !0 @@ -1898,6 +2280,14 @@ define void @atomic8_sub_seq_cst(ptr %a) { ; O3-NEXT: lock subb $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_sub_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection56: +; HASWELL-O3-NEXT: lock subb $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw sub ptr %a, i8 42 seq_cst, align 1, !pcsections !0 @@ -1937,6 +2327,14 @@ define void @atomic8_and_seq_cst(ptr %a) { ; O3-NEXT: lock andb $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_and_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection57: +; HASWELL-O3-NEXT: lock andb $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw and ptr %a, i8 42 seq_cst, align 1, !pcsections !0 @@ -1976,6 +2374,14 @@ define void @atomic8_or_seq_cst(ptr %a) { ; O3-NEXT: lock orb $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_or_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection58: +; HASWELL-O3-NEXT: lock orb $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw or ptr %a, i8 42 seq_cst, align 1, !pcsections !0 @@ -2015,6 +2421,14 @@ define void @atomic8_xor_seq_cst(ptr %a) { ; O3-NEXT: lock xorb $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_xor_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection59: +; HASWELL-O3-NEXT: lock xorb $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xor ptr %a, i8 42 seq_cst, align 1, !pcsections !0 @@ -2115,6 +2529,27 @@ define void @atomic8_nand_seq_cst(ptr %a) { ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_nand_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection60: +; HASWELL-O3-NEXT: movzbl (%rdi), %eax +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB44_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movl %eax, %ecx +; HASWELL-O3-NEXT: .Lpcsection61: +; HASWELL-O3-NEXT: notb %cl +; HASWELL-O3-NEXT: .Lpcsection62: +; HASWELL-O3-NEXT: orb $-43, %cl +; HASWELL-O3-NEXT: .Lpcsection63: +; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection64: +; HASWELL-O3-NEXT: jne .LBB44_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw nand ptr %a, i8 42 seq_cst, align 1, !pcsections !0 @@ -2200,6 +2635,25 @@ define void @atomic8_cas_monotonic(ptr %a) { ; O3-NEXT: lock cmpxchgb %cl, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_cas_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movb $1, %cl +; HASWELL-O3-NEXT: .Lpcsection65: +; HASWELL-O3-NEXT: movb $42, %al +; HASWELL-O3-NEXT: .Lpcsection66: +; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection67: +; HASWELL-O3-NEXT: movb $42, %al +; HASWELL-O3-NEXT: .Lpcsection68: +; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection69: +; HASWELL-O3-NEXT: movb $42, %al +; HASWELL-O3-NEXT: .Lpcsection70: +; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = cmpxchg ptr %a, i8 42, i8 1 monotonic monotonic, align 1, !pcsections !0 @@ -2287,6 +2741,25 @@ define void @atomic8_cas_acquire(ptr %a) { ; O3-NEXT: lock cmpxchgb %cl, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_cas_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movb $1, %cl +; HASWELL-O3-NEXT: .Lpcsection71: +; HASWELL-O3-NEXT: movb $42, %al +; HASWELL-O3-NEXT: .Lpcsection72: +; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection73: +; HASWELL-O3-NEXT: movb $42, %al +; HASWELL-O3-NEXT: .Lpcsection74: +; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection75: +; HASWELL-O3-NEXT: movb $42, %al +; HASWELL-O3-NEXT: .Lpcsection76: +; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = cmpxchg ptr %a, i8 42, i8 1 acquire monotonic, align 1, !pcsections !0 @@ -2374,6 +2847,25 @@ define void @atomic8_cas_release(ptr %a) { ; O3-NEXT: lock cmpxchgb %cl, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_cas_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movb $1, %cl +; HASWELL-O3-NEXT: .Lpcsection77: +; HASWELL-O3-NEXT: movb $42, %al +; HASWELL-O3-NEXT: .Lpcsection78: +; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection79: +; HASWELL-O3-NEXT: movb $42, %al +; HASWELL-O3-NEXT: .Lpcsection80: +; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection81: +; HASWELL-O3-NEXT: movb $42, %al +; HASWELL-O3-NEXT: .Lpcsection82: +; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = cmpxchg ptr %a, i8 42, i8 1 release monotonic, align 1, !pcsections !0 @@ -2461,6 +2953,25 @@ define void @atomic8_cas_acq_rel(ptr %a) { ; O3-NEXT: lock cmpxchgb %cl, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_cas_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movb $1, %cl +; HASWELL-O3-NEXT: .Lpcsection83: +; HASWELL-O3-NEXT: movb $42, %al +; HASWELL-O3-NEXT: .Lpcsection84: +; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection85: +; HASWELL-O3-NEXT: movb $42, %al +; HASWELL-O3-NEXT: .Lpcsection86: +; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection87: +; HASWELL-O3-NEXT: movb $42, %al +; HASWELL-O3-NEXT: .Lpcsection88: +; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = cmpxchg ptr %a, i8 42, i8 1 acq_rel monotonic, align 1, !pcsections !0 @@ -2548,6 +3059,25 @@ define void @atomic8_cas_seq_cst(ptr %a) { ; O3-NEXT: lock cmpxchgb %cl, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic8_cas_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movb $1, %cl +; HASWELL-O3-NEXT: .Lpcsection89: +; HASWELL-O3-NEXT: movb $42, %al +; HASWELL-O3-NEXT: .Lpcsection90: +; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection91: +; HASWELL-O3-NEXT: movb $42, %al +; HASWELL-O3-NEXT: .Lpcsection92: +; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection93: +; HASWELL-O3-NEXT: movb $42, %al +; HASWELL-O3-NEXT: .Lpcsection94: +; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = cmpxchg ptr %a, i8 42, i8 1 seq_cst monotonic, align 1, !pcsections !0 @@ -2589,6 +3119,14 @@ define i16 @atomic16_load_unordered(ptr %a) { ; O3-NEXT: movzwl (%rdi), %eax ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_load_unordered: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection95: +; HASWELL-O3-NEXT: movzwl (%rdi), %eax +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = load atomic i16, ptr %a unordered, align 2, !pcsections !0 @@ -2628,6 +3166,14 @@ define i16 @atomic16_load_monotonic(ptr %a) { ; O3-NEXT: movzwl (%rdi), %eax ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_load_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection96: +; HASWELL-O3-NEXT: movzwl (%rdi), %eax +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = load atomic i16, ptr %a monotonic, align 2, !pcsections !0 @@ -2667,6 +3213,14 @@ define i16 @atomic16_load_acquire(ptr %a) { ; O3-NEXT: movzwl (%rdi), %eax ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_load_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection97: +; HASWELL-O3-NEXT: movzwl (%rdi), %eax +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = load atomic i16, ptr %a acquire, align 2, !pcsections !0 @@ -2706,6 +3260,14 @@ define i16 @atomic16_load_seq_cst(ptr %a) { ; O3-NEXT: movzwl (%rdi), %eax ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_load_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection98: +; HASWELL-O3-NEXT: movzwl (%rdi), %eax +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = load atomic i16, ptr %a seq_cst, align 2, !pcsections !0 @@ -2745,6 +3307,14 @@ define void @atomic16_store_unordered(ptr %a) { ; O3-NEXT: movw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_store_unordered: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection99: +; HASWELL-O3-NEXT: movw $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 store atomic i16 42, ptr %a unordered, align 2, !pcsections !0 @@ -2784,6 +3354,14 @@ define void @atomic16_store_monotonic(ptr %a) { ; O3-NEXT: movw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_store_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection100: +; HASWELL-O3-NEXT: movw $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 store atomic i16 42, ptr %a monotonic, align 2, !pcsections !0 @@ -2823,6 +3401,14 @@ define void @atomic16_store_release(ptr %a) { ; O3-NEXT: movw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_store_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection101: +; HASWELL-O3-NEXT: movw $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 store atomic i16 42, ptr %a release, align 2, !pcsections !0 @@ -2866,6 +3452,15 @@ define void @atomic16_store_seq_cst(ptr %a) { ; O3-NEXT: xchgw %ax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_store_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movw $42, %ax +; HASWELL-O3-NEXT: .Lpcsection102: +; HASWELL-O3-NEXT: xchgw %ax, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 store atomic i16 42, ptr %a seq_cst, align 2, !pcsections !0 @@ -2909,6 +3504,15 @@ define void @atomic16_xchg_monotonic(ptr %a) { ; O3-NEXT: xchgw %ax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_xchg_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movw $42, %ax +; HASWELL-O3-NEXT: .Lpcsection103: +; HASWELL-O3-NEXT: xchgw %ax, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xchg ptr %a, i16 42 monotonic, align 2, !pcsections !0 @@ -2948,6 +3552,14 @@ define void @atomic16_add_monotonic(ptr %a) { ; O3-NEXT: lock addw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_add_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection104: +; HASWELL-O3-NEXT: lock addw $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw add ptr %a, i16 42 monotonic, align 2, !pcsections !0 @@ -2987,6 +3599,14 @@ define void @atomic16_sub_monotonic(ptr %a) { ; O3-NEXT: lock subw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_sub_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection105: +; HASWELL-O3-NEXT: lock subw $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw sub ptr %a, i16 42 monotonic, align 2, !pcsections !0 @@ -3026,6 +3646,14 @@ define void @atomic16_and_monotonic(ptr %a) { ; O3-NEXT: lock andw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_and_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection106: +; HASWELL-O3-NEXT: lock andw $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw and ptr %a, i16 42 monotonic, align 2, !pcsections !0 @@ -3065,6 +3693,14 @@ define void @atomic16_or_monotonic(ptr %a) { ; O3-NEXT: lock orw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_or_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection107: +; HASWELL-O3-NEXT: lock orw $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw or ptr %a, i16 42 monotonic, align 2, !pcsections !0 @@ -3104,6 +3740,14 @@ define void @atomic16_xor_monotonic(ptr %a) { ; O3-NEXT: lock xorw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_xor_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection108: +; HASWELL-O3-NEXT: lock xorw $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xor ptr %a, i16 42 monotonic, align 2, !pcsections !0 @@ -3220,6 +3864,31 @@ define void @atomic16_nand_monotonic(ptr %a) { ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_nand_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection109: +; HASWELL-O3-NEXT: movzwl (%rdi), %eax +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB64_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movl %eax, %ecx +; HASWELL-O3-NEXT: .Lpcsection110: +; HASWELL-O3-NEXT: notl %ecx +; HASWELL-O3-NEXT: .Lpcsection111: +; HASWELL-O3-NEXT: orl $65493, %ecx # imm = 0xFFD5 +; HASWELL-O3-NEXT: .Lpcsection112: +; HASWELL-O3-NEXT: # kill: def $ax killed $ax killed $eax +; HASWELL-O3-NEXT: .Lpcsection113: +; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection114: +; HASWELL-O3-NEXT: # kill: def $ax killed $ax def $eax +; HASWELL-O3-NEXT: .Lpcsection115: +; HASWELL-O3-NEXT: jne .LBB64_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw nand ptr %a, i16 42 monotonic, align 2, !pcsections !0 @@ -3263,6 +3932,15 @@ define void @atomic16_xchg_acquire(ptr %a) { ; O3-NEXT: xchgw %ax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_xchg_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movw $42, %ax +; HASWELL-O3-NEXT: .Lpcsection116: +; HASWELL-O3-NEXT: xchgw %ax, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xchg ptr %a, i16 42 acquire, align 2, !pcsections !0 @@ -3302,6 +3980,14 @@ define void @atomic16_add_acquire(ptr %a) { ; O3-NEXT: lock addw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_add_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection117: +; HASWELL-O3-NEXT: lock addw $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw add ptr %a, i16 42 acquire, align 2, !pcsections !0 @@ -3341,6 +4027,14 @@ define void @atomic16_sub_acquire(ptr %a) { ; O3-NEXT: lock subw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_sub_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection118: +; HASWELL-O3-NEXT: lock subw $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw sub ptr %a, i16 42 acquire, align 2, !pcsections !0 @@ -3380,6 +4074,14 @@ define void @atomic16_and_acquire(ptr %a) { ; O3-NEXT: lock andw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_and_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection119: +; HASWELL-O3-NEXT: lock andw $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw and ptr %a, i16 42 acquire, align 2, !pcsections !0 @@ -3419,6 +4121,14 @@ define void @atomic16_or_acquire(ptr %a) { ; O3-NEXT: lock orw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_or_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection120: +; HASWELL-O3-NEXT: lock orw $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw or ptr %a, i16 42 acquire, align 2, !pcsections !0 @@ -3458,6 +4168,14 @@ define void @atomic16_xor_acquire(ptr %a) { ; O3-NEXT: lock xorw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_xor_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection121: +; HASWELL-O3-NEXT: lock xorw $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xor ptr %a, i16 42 acquire, align 2, !pcsections !0 @@ -3574,6 +4292,31 @@ define void @atomic16_nand_acquire(ptr %a) { ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_nand_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection122: +; HASWELL-O3-NEXT: movzwl (%rdi), %eax +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB71_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movl %eax, %ecx +; HASWELL-O3-NEXT: .Lpcsection123: +; HASWELL-O3-NEXT: notl %ecx +; HASWELL-O3-NEXT: .Lpcsection124: +; HASWELL-O3-NEXT: orl $65493, %ecx # imm = 0xFFD5 +; HASWELL-O3-NEXT: .Lpcsection125: +; HASWELL-O3-NEXT: # kill: def $ax killed $ax killed $eax +; HASWELL-O3-NEXT: .Lpcsection126: +; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection127: +; HASWELL-O3-NEXT: # kill: def $ax killed $ax def $eax +; HASWELL-O3-NEXT: .Lpcsection128: +; HASWELL-O3-NEXT: jne .LBB71_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw nand ptr %a, i16 42 acquire, align 2, !pcsections !0 @@ -3617,6 +4360,15 @@ define void @atomic16_xchg_release(ptr %a) { ; O3-NEXT: xchgw %ax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_xchg_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movw $42, %ax +; HASWELL-O3-NEXT: .Lpcsection129: +; HASWELL-O3-NEXT: xchgw %ax, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xchg ptr %a, i16 42 release, align 2, !pcsections !0 @@ -3656,6 +4408,14 @@ define void @atomic16_add_release(ptr %a) { ; O3-NEXT: lock addw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_add_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection130: +; HASWELL-O3-NEXT: lock addw $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw add ptr %a, i16 42 release, align 2, !pcsections !0 @@ -3695,6 +4455,14 @@ define void @atomic16_sub_release(ptr %a) { ; O3-NEXT: lock subw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_sub_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection131: +; HASWELL-O3-NEXT: lock subw $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw sub ptr %a, i16 42 release, align 2, !pcsections !0 @@ -3734,6 +4502,14 @@ define void @atomic16_and_release(ptr %a) { ; O3-NEXT: lock andw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_and_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection132: +; HASWELL-O3-NEXT: lock andw $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw and ptr %a, i16 42 release, align 2, !pcsections !0 @@ -3773,6 +4549,14 @@ define void @atomic16_or_release(ptr %a) { ; O3-NEXT: lock orw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_or_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection133: +; HASWELL-O3-NEXT: lock orw $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw or ptr %a, i16 42 release, align 2, !pcsections !0 @@ -3812,6 +4596,14 @@ define void @atomic16_xor_release(ptr %a) { ; O3-NEXT: lock xorw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_xor_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection134: +; HASWELL-O3-NEXT: lock xorw $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xor ptr %a, i16 42 release, align 2, !pcsections !0 @@ -3928,6 +4720,31 @@ define void @atomic16_nand_release(ptr %a) { ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_nand_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection135: +; HASWELL-O3-NEXT: movzwl (%rdi), %eax +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB78_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movl %eax, %ecx +; HASWELL-O3-NEXT: .Lpcsection136: +; HASWELL-O3-NEXT: notl %ecx +; HASWELL-O3-NEXT: .Lpcsection137: +; HASWELL-O3-NEXT: orl $65493, %ecx # imm = 0xFFD5 +; HASWELL-O3-NEXT: .Lpcsection138: +; HASWELL-O3-NEXT: # kill: def $ax killed $ax killed $eax +; HASWELL-O3-NEXT: .Lpcsection139: +; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection140: +; HASWELL-O3-NEXT: # kill: def $ax killed $ax def $eax +; HASWELL-O3-NEXT: .Lpcsection141: +; HASWELL-O3-NEXT: jne .LBB78_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw nand ptr %a, i16 42 release, align 2, !pcsections !0 @@ -3971,6 +4788,15 @@ define void @atomic16_xchg_acq_rel(ptr %a) { ; O3-NEXT: xchgw %ax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_xchg_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movw $42, %ax +; HASWELL-O3-NEXT: .Lpcsection142: +; HASWELL-O3-NEXT: xchgw %ax, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xchg ptr %a, i16 42 acq_rel, align 2, !pcsections !0 @@ -4010,6 +4836,14 @@ define void @atomic16_add_acq_rel(ptr %a) { ; O3-NEXT: lock addw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_add_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection143: +; HASWELL-O3-NEXT: lock addw $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw add ptr %a, i16 42 acq_rel, align 2, !pcsections !0 @@ -4049,6 +4883,14 @@ define void @atomic16_sub_acq_rel(ptr %a) { ; O3-NEXT: lock subw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_sub_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection144: +; HASWELL-O3-NEXT: lock subw $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw sub ptr %a, i16 42 acq_rel, align 2, !pcsections !0 @@ -4088,6 +4930,14 @@ define void @atomic16_and_acq_rel(ptr %a) { ; O3-NEXT: lock andw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_and_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection145: +; HASWELL-O3-NEXT: lock andw $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw and ptr %a, i16 42 acq_rel, align 2, !pcsections !0 @@ -4127,6 +4977,14 @@ define void @atomic16_or_acq_rel(ptr %a) { ; O3-NEXT: lock orw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_or_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection146: +; HASWELL-O3-NEXT: lock orw $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw or ptr %a, i16 42 acq_rel, align 2, !pcsections !0 @@ -4166,6 +5024,14 @@ define void @atomic16_xor_acq_rel(ptr %a) { ; O3-NEXT: lock xorw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_xor_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection147: +; HASWELL-O3-NEXT: lock xorw $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xor ptr %a, i16 42 acq_rel, align 2, !pcsections !0 @@ -4282,6 +5148,31 @@ define void @atomic16_nand_acq_rel(ptr %a) { ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_nand_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection148: +; HASWELL-O3-NEXT: movzwl (%rdi), %eax +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB85_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movl %eax, %ecx +; HASWELL-O3-NEXT: .Lpcsection149: +; HASWELL-O3-NEXT: notl %ecx +; HASWELL-O3-NEXT: .Lpcsection150: +; HASWELL-O3-NEXT: orl $65493, %ecx # imm = 0xFFD5 +; HASWELL-O3-NEXT: .Lpcsection151: +; HASWELL-O3-NEXT: # kill: def $ax killed $ax killed $eax +; HASWELL-O3-NEXT: .Lpcsection152: +; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection153: +; HASWELL-O3-NEXT: # kill: def $ax killed $ax def $eax +; HASWELL-O3-NEXT: .Lpcsection154: +; HASWELL-O3-NEXT: jne .LBB85_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw nand ptr %a, i16 42 acq_rel, align 2, !pcsections !0 @@ -4325,6 +5216,15 @@ define void @atomic16_xchg_seq_cst(ptr %a) { ; O3-NEXT: xchgw %ax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_xchg_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movw $42, %ax +; HASWELL-O3-NEXT: .Lpcsection155: +; HASWELL-O3-NEXT: xchgw %ax, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xchg ptr %a, i16 42 seq_cst, align 2, !pcsections !0 @@ -4364,6 +5264,14 @@ define void @atomic16_add_seq_cst(ptr %a) { ; O3-NEXT: lock addw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_add_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection156: +; HASWELL-O3-NEXT: lock addw $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw add ptr %a, i16 42 seq_cst, align 2, !pcsections !0 @@ -4403,6 +5311,14 @@ define void @atomic16_sub_seq_cst(ptr %a) { ; O3-NEXT: lock subw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_sub_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection157: +; HASWELL-O3-NEXT: lock subw $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw sub ptr %a, i16 42 seq_cst, align 2, !pcsections !0 @@ -4442,6 +5358,14 @@ define void @atomic16_and_seq_cst(ptr %a) { ; O3-NEXT: lock andw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_and_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection158: +; HASWELL-O3-NEXT: lock andw $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw and ptr %a, i16 42 seq_cst, align 2, !pcsections !0 @@ -4481,6 +5405,14 @@ define void @atomic16_or_seq_cst(ptr %a) { ; O3-NEXT: lock orw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_or_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection159: +; HASWELL-O3-NEXT: lock orw $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw or ptr %a, i16 42 seq_cst, align 2, !pcsections !0 @@ -4520,6 +5452,14 @@ define void @atomic16_xor_seq_cst(ptr %a) { ; O3-NEXT: lock xorw $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_xor_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection160: +; HASWELL-O3-NEXT: lock xorw $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xor ptr %a, i16 42 seq_cst, align 2, !pcsections !0 @@ -4636,6 +5576,31 @@ define void @atomic16_nand_seq_cst(ptr %a) { ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_nand_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection161: +; HASWELL-O3-NEXT: movzwl (%rdi), %eax +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB92_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movl %eax, %ecx +; HASWELL-O3-NEXT: .Lpcsection162: +; HASWELL-O3-NEXT: notl %ecx +; HASWELL-O3-NEXT: .Lpcsection163: +; HASWELL-O3-NEXT: orl $65493, %ecx # imm = 0xFFD5 +; HASWELL-O3-NEXT: .Lpcsection164: +; HASWELL-O3-NEXT: # kill: def $ax killed $ax killed $eax +; HASWELL-O3-NEXT: .Lpcsection165: +; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection166: +; HASWELL-O3-NEXT: # kill: def $ax killed $ax def $eax +; HASWELL-O3-NEXT: .Lpcsection167: +; HASWELL-O3-NEXT: jne .LBB92_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw nand ptr %a, i16 42 seq_cst, align 2, !pcsections !0 @@ -4712,6 +5677,22 @@ define void @atomic16_cas_monotonic(ptr %a) { ; O3-NEXT: lock cmpxchgw %cx, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_cas_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movw $1, %cx +; HASWELL-O3-NEXT: movw $42, %ax +; HASWELL-O3-NEXT: .Lpcsection168: +; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi) +; HASWELL-O3-NEXT: movw $42, %ax +; HASWELL-O3-NEXT: .Lpcsection169: +; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi) +; HASWELL-O3-NEXT: movw $42, %ax +; HASWELL-O3-NEXT: .Lpcsection170: +; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = cmpxchg ptr %a, i16 42, i16 1 monotonic monotonic, align 2, !pcsections !0 @@ -4790,6 +5771,22 @@ define void @atomic16_cas_acquire(ptr %a) { ; O3-NEXT: lock cmpxchgw %cx, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_cas_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movw $1, %cx +; HASWELL-O3-NEXT: movw $42, %ax +; HASWELL-O3-NEXT: .Lpcsection171: +; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi) +; HASWELL-O3-NEXT: movw $42, %ax +; HASWELL-O3-NEXT: .Lpcsection172: +; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi) +; HASWELL-O3-NEXT: movw $42, %ax +; HASWELL-O3-NEXT: .Lpcsection173: +; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = cmpxchg ptr %a, i16 42, i16 1 acquire monotonic, align 2, !pcsections !0 @@ -4868,6 +5865,22 @@ define void @atomic16_cas_release(ptr %a) { ; O3-NEXT: lock cmpxchgw %cx, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_cas_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movw $1, %cx +; HASWELL-O3-NEXT: movw $42, %ax +; HASWELL-O3-NEXT: .Lpcsection174: +; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi) +; HASWELL-O3-NEXT: movw $42, %ax +; HASWELL-O3-NEXT: .Lpcsection175: +; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi) +; HASWELL-O3-NEXT: movw $42, %ax +; HASWELL-O3-NEXT: .Lpcsection176: +; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = cmpxchg ptr %a, i16 42, i16 1 release monotonic, align 2, !pcsections !0 @@ -4946,6 +5959,22 @@ define void @atomic16_cas_acq_rel(ptr %a) { ; O3-NEXT: lock cmpxchgw %cx, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_cas_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movw $1, %cx +; HASWELL-O3-NEXT: movw $42, %ax +; HASWELL-O3-NEXT: .Lpcsection177: +; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi) +; HASWELL-O3-NEXT: movw $42, %ax +; HASWELL-O3-NEXT: .Lpcsection178: +; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi) +; HASWELL-O3-NEXT: movw $42, %ax +; HASWELL-O3-NEXT: .Lpcsection179: +; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = cmpxchg ptr %a, i16 42, i16 1 acq_rel monotonic, align 2, !pcsections !0 @@ -5024,6 +6053,22 @@ define void @atomic16_cas_seq_cst(ptr %a) { ; O3-NEXT: lock cmpxchgw %cx, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic16_cas_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movw $1, %cx +; HASWELL-O3-NEXT: movw $42, %ax +; HASWELL-O3-NEXT: .Lpcsection180: +; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi) +; HASWELL-O3-NEXT: movw $42, %ax +; HASWELL-O3-NEXT: .Lpcsection181: +; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi) +; HASWELL-O3-NEXT: movw $42, %ax +; HASWELL-O3-NEXT: .Lpcsection182: +; HASWELL-O3-NEXT: lock cmpxchgw %cx, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = cmpxchg ptr %a, i16 42, i16 1 seq_cst monotonic, align 2, !pcsections !0 @@ -5065,6 +6110,14 @@ define i32 @atomic32_load_unordered(ptr %a) { ; O3-NEXT: movl (%rdi), %eax ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_load_unordered: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection183: +; HASWELL-O3-NEXT: movl (%rdi), %eax +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = load atomic i32, ptr %a unordered, align 4, !pcsections !0 @@ -5104,6 +6157,14 @@ define i32 @atomic32_load_monotonic(ptr %a) { ; O3-NEXT: movl (%rdi), %eax ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_load_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection184: +; HASWELL-O3-NEXT: movl (%rdi), %eax +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = load atomic i32, ptr %a monotonic, align 4, !pcsections !0 @@ -5143,6 +6204,14 @@ define i32 @atomic32_load_acquire(ptr %a) { ; O3-NEXT: movl (%rdi), %eax ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_load_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection185: +; HASWELL-O3-NEXT: movl (%rdi), %eax +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = load atomic i32, ptr %a acquire, align 4, !pcsections !0 @@ -5182,6 +6251,14 @@ define i32 @atomic32_load_seq_cst(ptr %a) { ; O3-NEXT: movl (%rdi), %eax ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_load_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection186: +; HASWELL-O3-NEXT: movl (%rdi), %eax +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = load atomic i32, ptr %a seq_cst, align 4, !pcsections !0 @@ -5221,6 +6298,14 @@ define void @atomic32_store_unordered(ptr %a) { ; O3-NEXT: movl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_store_unordered: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection187: +; HASWELL-O3-NEXT: movl $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 store atomic i32 42, ptr %a unordered, align 4, !pcsections !0 @@ -5260,6 +6345,14 @@ define void @atomic32_store_monotonic(ptr %a) { ; O3-NEXT: movl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_store_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection188: +; HASWELL-O3-NEXT: movl $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 store atomic i32 42, ptr %a monotonic, align 4, !pcsections !0 @@ -5299,6 +6392,14 @@ define void @atomic32_store_release(ptr %a) { ; O3-NEXT: movl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_store_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection189: +; HASWELL-O3-NEXT: movl $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 store atomic i32 42, ptr %a release, align 4, !pcsections !0 @@ -5342,6 +6443,15 @@ define void @atomic32_store_seq_cst(ptr %a) { ; O3-NEXT: xchgl %eax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_store_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection190: +; HASWELL-O3-NEXT: xchgl %eax, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 store atomic i32 42, ptr %a seq_cst, align 4, !pcsections !0 @@ -5385,6 +6495,15 @@ define void @atomic32_xchg_monotonic(ptr %a) { ; O3-NEXT: xchgl %eax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_xchg_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection191: +; HASWELL-O3-NEXT: xchgl %eax, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xchg ptr %a, i32 42 monotonic, align 4, !pcsections !0 @@ -5424,6 +6543,14 @@ define void @atomic32_add_monotonic(ptr %a) { ; O3-NEXT: lock addl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_add_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection192: +; HASWELL-O3-NEXT: lock addl $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw add ptr %a, i32 42 monotonic, align 4, !pcsections !0 @@ -5463,6 +6590,14 @@ define void @atomic32_sub_monotonic(ptr %a) { ; O3-NEXT: lock subl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_sub_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection193: +; HASWELL-O3-NEXT: lock subl $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw sub ptr %a, i32 42 monotonic, align 4, !pcsections !0 @@ -5502,6 +6637,14 @@ define void @atomic32_and_monotonic(ptr %a) { ; O3-NEXT: lock andl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_and_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection194: +; HASWELL-O3-NEXT: lock andl $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw and ptr %a, i32 42 monotonic, align 4, !pcsections !0 @@ -5541,6 +6684,14 @@ define void @atomic32_or_monotonic(ptr %a) { ; O3-NEXT: lock orl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_or_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection195: +; HASWELL-O3-NEXT: lock orl $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw or ptr %a, i32 42 monotonic, align 4, !pcsections !0 @@ -5580,6 +6731,14 @@ define void @atomic32_xor_monotonic(ptr %a) { ; O3-NEXT: lock xorl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_xor_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection196: +; HASWELL-O3-NEXT: lock xorl $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xor ptr %a, i32 42 monotonic, align 4, !pcsections !0 @@ -5680,6 +6839,27 @@ define void @atomic32_nand_monotonic(ptr %a) { ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_nand_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection197: +; HASWELL-O3-NEXT: movl (%rdi), %eax +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB112_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movl %eax, %ecx +; HASWELL-O3-NEXT: .Lpcsection198: +; HASWELL-O3-NEXT: notl %ecx +; HASWELL-O3-NEXT: .Lpcsection199: +; HASWELL-O3-NEXT: orl $-43, %ecx +; HASWELL-O3-NEXT: .Lpcsection200: +; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection201: +; HASWELL-O3-NEXT: jne .LBB112_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw nand ptr %a, i32 42 monotonic, align 4, !pcsections !0 @@ -5723,6 +6903,15 @@ define void @atomic32_xchg_acquire(ptr %a) { ; O3-NEXT: xchgl %eax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_xchg_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection202: +; HASWELL-O3-NEXT: xchgl %eax, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xchg ptr %a, i32 42 acquire, align 4, !pcsections !0 @@ -5762,6 +6951,14 @@ define void @atomic32_add_acquire(ptr %a) { ; O3-NEXT: lock addl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_add_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection203: +; HASWELL-O3-NEXT: lock addl $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw add ptr %a, i32 42 acquire, align 4, !pcsections !0 @@ -5801,6 +6998,14 @@ define void @atomic32_sub_acquire(ptr %a) { ; O3-NEXT: lock subl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_sub_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection204: +; HASWELL-O3-NEXT: lock subl $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw sub ptr %a, i32 42 acquire, align 4, !pcsections !0 @@ -5840,6 +7045,14 @@ define void @atomic32_and_acquire(ptr %a) { ; O3-NEXT: lock andl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_and_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection205: +; HASWELL-O3-NEXT: lock andl $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw and ptr %a, i32 42 acquire, align 4, !pcsections !0 @@ -5879,6 +7092,14 @@ define void @atomic32_or_acquire(ptr %a) { ; O3-NEXT: lock orl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_or_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection206: +; HASWELL-O3-NEXT: lock orl $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw or ptr %a, i32 42 acquire, align 4, !pcsections !0 @@ -5918,6 +7139,14 @@ define void @atomic32_xor_acquire(ptr %a) { ; O3-NEXT: lock xorl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_xor_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection207: +; HASWELL-O3-NEXT: lock xorl $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xor ptr %a, i32 42 acquire, align 4, !pcsections !0 @@ -6018,6 +7247,27 @@ define void @atomic32_nand_acquire(ptr %a) { ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_nand_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection208: +; HASWELL-O3-NEXT: movl (%rdi), %eax +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB119_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movl %eax, %ecx +; HASWELL-O3-NEXT: .Lpcsection209: +; HASWELL-O3-NEXT: notl %ecx +; HASWELL-O3-NEXT: .Lpcsection210: +; HASWELL-O3-NEXT: orl $-43, %ecx +; HASWELL-O3-NEXT: .Lpcsection211: +; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection212: +; HASWELL-O3-NEXT: jne .LBB119_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw nand ptr %a, i32 42 acquire, align 4, !pcsections !0 @@ -6061,6 +7311,15 @@ define void @atomic32_xchg_release(ptr %a) { ; O3-NEXT: xchgl %eax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_xchg_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection213: +; HASWELL-O3-NEXT: xchgl %eax, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xchg ptr %a, i32 42 release, align 4, !pcsections !0 @@ -6100,6 +7359,14 @@ define void @atomic32_add_release(ptr %a) { ; O3-NEXT: lock addl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_add_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection214: +; HASWELL-O3-NEXT: lock addl $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw add ptr %a, i32 42 release, align 4, !pcsections !0 @@ -6139,6 +7406,14 @@ define void @atomic32_sub_release(ptr %a) { ; O3-NEXT: lock subl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_sub_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection215: +; HASWELL-O3-NEXT: lock subl $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw sub ptr %a, i32 42 release, align 4, !pcsections !0 @@ -6178,6 +7453,14 @@ define void @atomic32_and_release(ptr %a) { ; O3-NEXT: lock andl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_and_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection216: +; HASWELL-O3-NEXT: lock andl $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw and ptr %a, i32 42 release, align 4, !pcsections !0 @@ -6217,6 +7500,14 @@ define void @atomic32_or_release(ptr %a) { ; O3-NEXT: lock orl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_or_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection217: +; HASWELL-O3-NEXT: lock orl $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw or ptr %a, i32 42 release, align 4, !pcsections !0 @@ -6256,6 +7547,14 @@ define void @atomic32_xor_release(ptr %a) { ; O3-NEXT: lock xorl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_xor_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection218: +; HASWELL-O3-NEXT: lock xorl $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xor ptr %a, i32 42 release, align 4, !pcsections !0 @@ -6356,6 +7655,27 @@ define void @atomic32_nand_release(ptr %a) { ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_nand_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection219: +; HASWELL-O3-NEXT: movl (%rdi), %eax +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB126_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movl %eax, %ecx +; HASWELL-O3-NEXT: .Lpcsection220: +; HASWELL-O3-NEXT: notl %ecx +; HASWELL-O3-NEXT: .Lpcsection221: +; HASWELL-O3-NEXT: orl $-43, %ecx +; HASWELL-O3-NEXT: .Lpcsection222: +; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection223: +; HASWELL-O3-NEXT: jne .LBB126_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw nand ptr %a, i32 42 release, align 4, !pcsections !0 @@ -6399,6 +7719,15 @@ define void @atomic32_xchg_acq_rel(ptr %a) { ; O3-NEXT: xchgl %eax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_xchg_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection224: +; HASWELL-O3-NEXT: xchgl %eax, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xchg ptr %a, i32 42 acq_rel, align 4, !pcsections !0 @@ -6438,6 +7767,14 @@ define void @atomic32_add_acq_rel(ptr %a) { ; O3-NEXT: lock addl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_add_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection225: +; HASWELL-O3-NEXT: lock addl $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw add ptr %a, i32 42 acq_rel, align 4, !pcsections !0 @@ -6477,6 +7814,14 @@ define void @atomic32_sub_acq_rel(ptr %a) { ; O3-NEXT: lock subl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_sub_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection226: +; HASWELL-O3-NEXT: lock subl $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw sub ptr %a, i32 42 acq_rel, align 4, !pcsections !0 @@ -6516,6 +7861,14 @@ define void @atomic32_and_acq_rel(ptr %a) { ; O3-NEXT: lock andl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_and_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection227: +; HASWELL-O3-NEXT: lock andl $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw and ptr %a, i32 42 acq_rel, align 4, !pcsections !0 @@ -6555,6 +7908,14 @@ define void @atomic32_or_acq_rel(ptr %a) { ; O3-NEXT: lock orl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_or_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection228: +; HASWELL-O3-NEXT: lock orl $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw or ptr %a, i32 42 acq_rel, align 4, !pcsections !0 @@ -6594,6 +7955,14 @@ define void @atomic32_xor_acq_rel(ptr %a) { ; O3-NEXT: lock xorl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_xor_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection229: +; HASWELL-O3-NEXT: lock xorl $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xor ptr %a, i32 42 acq_rel, align 4, !pcsections !0 @@ -6694,6 +8063,27 @@ define void @atomic32_nand_acq_rel(ptr %a) { ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_nand_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection230: +; HASWELL-O3-NEXT: movl (%rdi), %eax +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB133_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movl %eax, %ecx +; HASWELL-O3-NEXT: .Lpcsection231: +; HASWELL-O3-NEXT: notl %ecx +; HASWELL-O3-NEXT: .Lpcsection232: +; HASWELL-O3-NEXT: orl $-43, %ecx +; HASWELL-O3-NEXT: .Lpcsection233: +; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection234: +; HASWELL-O3-NEXT: jne .LBB133_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw nand ptr %a, i32 42 acq_rel, align 4, !pcsections !0 @@ -6737,6 +8127,15 @@ define void @atomic32_xchg_seq_cst(ptr %a) { ; O3-NEXT: xchgl %eax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_xchg_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection235: +; HASWELL-O3-NEXT: xchgl %eax, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xchg ptr %a, i32 42 seq_cst, align 4, !pcsections !0 @@ -6776,6 +8175,14 @@ define void @atomic32_add_seq_cst(ptr %a) { ; O3-NEXT: lock addl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_add_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection236: +; HASWELL-O3-NEXT: lock addl $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw add ptr %a, i32 42 seq_cst, align 4, !pcsections !0 @@ -6815,6 +8222,14 @@ define void @atomic32_sub_seq_cst(ptr %a) { ; O3-NEXT: lock subl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_sub_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection237: +; HASWELL-O3-NEXT: lock subl $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw sub ptr %a, i32 42 seq_cst, align 4, !pcsections !0 @@ -6854,6 +8269,14 @@ define void @atomic32_and_seq_cst(ptr %a) { ; O3-NEXT: lock andl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_and_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection238: +; HASWELL-O3-NEXT: lock andl $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw and ptr %a, i32 42 seq_cst, align 4, !pcsections !0 @@ -6893,6 +8316,14 @@ define void @atomic32_or_seq_cst(ptr %a) { ; O3-NEXT: lock orl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_or_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection239: +; HASWELL-O3-NEXT: lock orl $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw or ptr %a, i32 42 seq_cst, align 4, !pcsections !0 @@ -6932,6 +8363,14 @@ define void @atomic32_xor_seq_cst(ptr %a) { ; O3-NEXT: lock xorl $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_xor_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection240: +; HASWELL-O3-NEXT: lock xorl $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xor ptr %a, i32 42 seq_cst, align 4, !pcsections !0 @@ -7032,6 +8471,27 @@ define void @atomic32_nand_seq_cst(ptr %a) { ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_nand_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection241: +; HASWELL-O3-NEXT: movl (%rdi), %eax +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB140_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movl %eax, %ecx +; HASWELL-O3-NEXT: .Lpcsection242: +; HASWELL-O3-NEXT: notl %ecx +; HASWELL-O3-NEXT: .Lpcsection243: +; HASWELL-O3-NEXT: orl $-43, %ecx +; HASWELL-O3-NEXT: .Lpcsection244: +; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection245: +; HASWELL-O3-NEXT: jne .LBB140_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw nand ptr %a, i32 42 seq_cst, align 4, !pcsections !0 @@ -7117,6 +8577,25 @@ define void @atomic32_cas_monotonic(ptr %a) { ; O3-NEXT: lock cmpxchgl %ecx, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_cas_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movl $1, %ecx +; HASWELL-O3-NEXT: .Lpcsection246: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection247: +; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection248: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection249: +; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection250: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection251: +; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = cmpxchg ptr %a, i32 42, i32 1 monotonic monotonic, align 4, !pcsections !0 @@ -7204,6 +8683,25 @@ define void @atomic32_cas_acquire(ptr %a) { ; O3-NEXT: lock cmpxchgl %ecx, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_cas_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movl $1, %ecx +; HASWELL-O3-NEXT: .Lpcsection252: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection253: +; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection254: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection255: +; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection256: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection257: +; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = cmpxchg ptr %a, i32 42, i32 1 acquire monotonic, align 4, !pcsections !0 @@ -7291,6 +8789,25 @@ define void @atomic32_cas_release(ptr %a) { ; O3-NEXT: lock cmpxchgl %ecx, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_cas_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movl $1, %ecx +; HASWELL-O3-NEXT: .Lpcsection258: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection259: +; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection260: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection261: +; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection262: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection263: +; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = cmpxchg ptr %a, i32 42, i32 1 release monotonic, align 4, !pcsections !0 @@ -7378,6 +8895,25 @@ define void @atomic32_cas_acq_rel(ptr %a) { ; O3-NEXT: lock cmpxchgl %ecx, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_cas_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movl $1, %ecx +; HASWELL-O3-NEXT: .Lpcsection264: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection265: +; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection266: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection267: +; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection268: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection269: +; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = cmpxchg ptr %a, i32 42, i32 1 acq_rel monotonic, align 4, !pcsections !0 @@ -7465,6 +9001,25 @@ define void @atomic32_cas_seq_cst(ptr %a) { ; O3-NEXT: lock cmpxchgl %ecx, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic32_cas_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movl $1, %ecx +; HASWELL-O3-NEXT: .Lpcsection270: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection271: +; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection272: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection273: +; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection274: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection275: +; HASWELL-O3-NEXT: lock cmpxchgl %ecx, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = cmpxchg ptr %a, i32 42, i32 1 seq_cst monotonic, align 4, !pcsections !0 @@ -7506,6 +9061,14 @@ define i64 @atomic64_load_unordered(ptr %a) { ; O3-NEXT: movq (%rdi), %rax ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_load_unordered: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection276: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = load atomic i64, ptr %a unordered, align 8, !pcsections !0 @@ -7545,6 +9108,14 @@ define i64 @atomic64_load_monotonic(ptr %a) { ; O3-NEXT: movq (%rdi), %rax ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_load_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection277: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = load atomic i64, ptr %a monotonic, align 8, !pcsections !0 @@ -7584,6 +9155,14 @@ define i64 @atomic64_load_acquire(ptr %a) { ; O3-NEXT: movq (%rdi), %rax ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_load_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection278: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = load atomic i64, ptr %a acquire, align 8, !pcsections !0 @@ -7623,6 +9202,14 @@ define i64 @atomic64_load_seq_cst(ptr %a) { ; O3-NEXT: movq (%rdi), %rax ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_load_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection279: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = load atomic i64, ptr %a seq_cst, align 8, !pcsections !0 @@ -7662,6 +9249,14 @@ define ptr @atomic64_load_seq_cst_ptr_ty(ptr %a) { ; O3-NEXT: movq (%rdi), %rax ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_load_seq_cst_ptr_ty: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection280: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = load atomic ptr, ptr %a seq_cst, align 8, !pcsections !0 @@ -7701,6 +9296,14 @@ define void @atomic64_store_unordered(ptr %a) { ; O3-NEXT: movq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_store_unordered: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection281: +; HASWELL-O3-NEXT: movq $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 store atomic i64 42, ptr %a unordered, align 8, !pcsections !0 @@ -7740,6 +9343,14 @@ define void @atomic64_store_monotonic(ptr %a) { ; O3-NEXT: movq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_store_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection282: +; HASWELL-O3-NEXT: movq $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 store atomic i64 42, ptr %a monotonic, align 8, !pcsections !0 @@ -7779,6 +9390,14 @@ define void @atomic64_store_release(ptr %a) { ; O3-NEXT: movq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_store_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection283: +; HASWELL-O3-NEXT: movq $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 store atomic i64 42, ptr %a release, align 8, !pcsections !0 @@ -7822,6 +9441,15 @@ define void @atomic64_store_seq_cst(ptr %a) { ; O3-NEXT: xchgq %rax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_store_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection284: +; HASWELL-O3-NEXT: xchgq %rax, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 store atomic i64 42, ptr %a seq_cst, align 8, !pcsections !0 @@ -7861,6 +9489,14 @@ define void @atomic64_store_seq_cst_ptr_ty(ptr %a, ptr %v) { ; O3-NEXT: xchgq %rsi, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_store_seq_cst_ptr_ty: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection285: +; HASWELL-O3-NEXT: xchgq %rsi, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 store atomic ptr %v, ptr %a seq_cst, align 8, !pcsections !0 @@ -7904,6 +9540,15 @@ define void @atomic64_xchg_monotonic(ptr %a) { ; O3-NEXT: xchgq %rax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_xchg_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection286: +; HASWELL-O3-NEXT: xchgq %rax, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xchg ptr %a, i64 42 monotonic, align 8, !pcsections !0 @@ -7943,6 +9588,14 @@ define void @atomic64_add_monotonic(ptr %a) { ; O3-NEXT: lock addq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_add_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection287: +; HASWELL-O3-NEXT: lock addq $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw add ptr %a, i64 42 monotonic, align 8, !pcsections !0 @@ -7982,6 +9635,14 @@ define void @atomic64_sub_monotonic(ptr %a) { ; O3-NEXT: lock subq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_sub_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection288: +; HASWELL-O3-NEXT: lock subq $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw sub ptr %a, i64 42 monotonic, align 8, !pcsections !0 @@ -8021,6 +9682,14 @@ define void @atomic64_and_monotonic(ptr %a) { ; O3-NEXT: lock andq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_and_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection289: +; HASWELL-O3-NEXT: lock andq $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw and ptr %a, i64 42 monotonic, align 8, !pcsections !0 @@ -8060,6 +9729,14 @@ define void @atomic64_or_monotonic(ptr %a) { ; O3-NEXT: lock orq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_or_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection290: +; HASWELL-O3-NEXT: lock orq $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw or ptr %a, i64 42 monotonic, align 8, !pcsections !0 @@ -8099,6 +9776,14 @@ define void @atomic64_xor_monotonic(ptr %a) { ; O3-NEXT: lock xorq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_xor_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection291: +; HASWELL-O3-NEXT: lock xorq $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xor ptr %a, i64 42 monotonic, align 8, !pcsections !0 @@ -8202,6 +9887,27 @@ define void @atomic64_nand_monotonic(ptr %a) { ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_nand_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection292: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB162_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movl %eax, %ecx +; HASWELL-O3-NEXT: .Lpcsection293: +; HASWELL-O3-NEXT: notl %ecx +; HASWELL-O3-NEXT: .Lpcsection294: +; HASWELL-O3-NEXT: orq $-43, %rcx +; HASWELL-O3-NEXT: .Lpcsection295: +; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection296: +; HASWELL-O3-NEXT: jne .LBB162_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw nand ptr %a, i64 42 monotonic, align 8, !pcsections !0 @@ -8245,6 +9951,15 @@ define void @atomic64_xchg_acquire(ptr %a) { ; O3-NEXT: xchgq %rax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_xchg_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection297: +; HASWELL-O3-NEXT: xchgq %rax, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xchg ptr %a, i64 42 acquire, align 8, !pcsections !0 @@ -8284,6 +9999,14 @@ define void @atomic64_add_acquire(ptr %a) { ; O3-NEXT: lock addq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_add_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection298: +; HASWELL-O3-NEXT: lock addq $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw add ptr %a, i64 42 acquire, align 8, !pcsections !0 @@ -8323,6 +10046,14 @@ define void @atomic64_sub_acquire(ptr %a) { ; O3-NEXT: lock subq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_sub_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection299: +; HASWELL-O3-NEXT: lock subq $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw sub ptr %a, i64 42 acquire, align 8, !pcsections !0 @@ -8362,6 +10093,14 @@ define void @atomic64_and_acquire(ptr %a) { ; O3-NEXT: lock andq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_and_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection300: +; HASWELL-O3-NEXT: lock andq $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw and ptr %a, i64 42 acquire, align 8, !pcsections !0 @@ -8401,6 +10140,14 @@ define void @atomic64_or_acquire(ptr %a) { ; O3-NEXT: lock orq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_or_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection301: +; HASWELL-O3-NEXT: lock orq $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw or ptr %a, i64 42 acquire, align 8, !pcsections !0 @@ -8440,6 +10187,14 @@ define void @atomic64_xor_acquire(ptr %a) { ; O3-NEXT: lock xorq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_xor_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection302: +; HASWELL-O3-NEXT: lock xorq $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xor ptr %a, i64 42 acquire, align 8, !pcsections !0 @@ -8543,6 +10298,27 @@ define void @atomic64_nand_acquire(ptr %a) { ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_nand_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection303: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB169_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movl %eax, %ecx +; HASWELL-O3-NEXT: .Lpcsection304: +; HASWELL-O3-NEXT: notl %ecx +; HASWELL-O3-NEXT: .Lpcsection305: +; HASWELL-O3-NEXT: orq $-43, %rcx +; HASWELL-O3-NEXT: .Lpcsection306: +; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection307: +; HASWELL-O3-NEXT: jne .LBB169_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw nand ptr %a, i64 42 acquire, align 8, !pcsections !0 @@ -8586,6 +10362,15 @@ define void @atomic64_xchg_release(ptr %a) { ; O3-NEXT: xchgq %rax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_xchg_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection308: +; HASWELL-O3-NEXT: xchgq %rax, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xchg ptr %a, i64 42 release, align 8, !pcsections !0 @@ -8625,6 +10410,14 @@ define void @atomic64_add_release(ptr %a) { ; O3-NEXT: lock addq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_add_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection309: +; HASWELL-O3-NEXT: lock addq $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw add ptr %a, i64 42 release, align 8, !pcsections !0 @@ -8664,6 +10457,14 @@ define void @atomic64_sub_release(ptr %a) { ; O3-NEXT: lock subq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_sub_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection310: +; HASWELL-O3-NEXT: lock subq $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw sub ptr %a, i64 42 release, align 8, !pcsections !0 @@ -8703,6 +10504,14 @@ define void @atomic64_and_release(ptr %a) { ; O3-NEXT: lock andq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_and_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection311: +; HASWELL-O3-NEXT: lock andq $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw and ptr %a, i64 42 release, align 8, !pcsections !0 @@ -8742,6 +10551,14 @@ define void @atomic64_or_release(ptr %a) { ; O3-NEXT: lock orq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_or_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection312: +; HASWELL-O3-NEXT: lock orq $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw or ptr %a, i64 42 release, align 8, !pcsections !0 @@ -8781,6 +10598,14 @@ define void @atomic64_xor_release(ptr %a) { ; O3-NEXT: lock xorq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_xor_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection313: +; HASWELL-O3-NEXT: lock xorq $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xor ptr %a, i64 42 release, align 8, !pcsections !0 @@ -8884,6 +10709,27 @@ define void @atomic64_nand_release(ptr %a) { ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_nand_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection314: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB176_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movl %eax, %ecx +; HASWELL-O3-NEXT: .Lpcsection315: +; HASWELL-O3-NEXT: notl %ecx +; HASWELL-O3-NEXT: .Lpcsection316: +; HASWELL-O3-NEXT: orq $-43, %rcx +; HASWELL-O3-NEXT: .Lpcsection317: +; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection318: +; HASWELL-O3-NEXT: jne .LBB176_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw nand ptr %a, i64 42 release, align 8, !pcsections !0 @@ -8927,6 +10773,15 @@ define void @atomic64_xchg_acq_rel(ptr %a) { ; O3-NEXT: xchgq %rax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_xchg_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection319: +; HASWELL-O3-NEXT: xchgq %rax, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xchg ptr %a, i64 42 acq_rel, align 8, !pcsections !0 @@ -8966,6 +10821,14 @@ define void @atomic64_add_acq_rel(ptr %a) { ; O3-NEXT: lock addq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_add_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection320: +; HASWELL-O3-NEXT: lock addq $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw add ptr %a, i64 42 acq_rel, align 8, !pcsections !0 @@ -9005,6 +10868,14 @@ define void @atomic64_sub_acq_rel(ptr %a) { ; O3-NEXT: lock subq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_sub_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection321: +; HASWELL-O3-NEXT: lock subq $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw sub ptr %a, i64 42 acq_rel, align 8, !pcsections !0 @@ -9044,6 +10915,14 @@ define void @atomic64_and_acq_rel(ptr %a) { ; O3-NEXT: lock andq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_and_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection322: +; HASWELL-O3-NEXT: lock andq $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw and ptr %a, i64 42 acq_rel, align 8, !pcsections !0 @@ -9083,6 +10962,14 @@ define void @atomic64_or_acq_rel(ptr %a) { ; O3-NEXT: lock orq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_or_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection323: +; HASWELL-O3-NEXT: lock orq $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw or ptr %a, i64 42 acq_rel, align 8, !pcsections !0 @@ -9122,6 +11009,14 @@ define void @atomic64_xor_acq_rel(ptr %a) { ; O3-NEXT: lock xorq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_xor_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection324: +; HASWELL-O3-NEXT: lock xorq $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xor ptr %a, i64 42 acq_rel, align 8, !pcsections !0 @@ -9225,6 +11120,27 @@ define void @atomic64_nand_acq_rel(ptr %a) { ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_nand_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection325: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB183_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movl %eax, %ecx +; HASWELL-O3-NEXT: .Lpcsection326: +; HASWELL-O3-NEXT: notl %ecx +; HASWELL-O3-NEXT: .Lpcsection327: +; HASWELL-O3-NEXT: orq $-43, %rcx +; HASWELL-O3-NEXT: .Lpcsection328: +; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection329: +; HASWELL-O3-NEXT: jne .LBB183_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw nand ptr %a, i64 42 acq_rel, align 8, !pcsections !0 @@ -9268,6 +11184,15 @@ define void @atomic64_xchg_seq_cst(ptr %a) { ; O3-NEXT: xchgq %rax, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_xchg_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection330: +; HASWELL-O3-NEXT: xchgq %rax, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xchg ptr %a, i64 42 seq_cst, align 8, !pcsections !0 @@ -9307,6 +11232,14 @@ define void @atomic64_add_seq_cst(ptr %a) { ; O3-NEXT: lock addq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_add_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection331: +; HASWELL-O3-NEXT: lock addq $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw add ptr %a, i64 42 seq_cst, align 8, !pcsections !0 @@ -9346,6 +11279,14 @@ define void @atomic64_sub_seq_cst(ptr %a) { ; O3-NEXT: lock subq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_sub_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection332: +; HASWELL-O3-NEXT: lock subq $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw sub ptr %a, i64 42 seq_cst, align 8, !pcsections !0 @@ -9385,6 +11326,14 @@ define void @atomic64_and_seq_cst(ptr %a) { ; O3-NEXT: lock andq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_and_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection333: +; HASWELL-O3-NEXT: lock andq $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw and ptr %a, i64 42 seq_cst, align 8, !pcsections !0 @@ -9424,6 +11373,14 @@ define void @atomic64_or_seq_cst(ptr %a) { ; O3-NEXT: lock orq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_or_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection334: +; HASWELL-O3-NEXT: lock orq $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw or ptr %a, i64 42 seq_cst, align 8, !pcsections !0 @@ -9463,6 +11420,14 @@ define void @atomic64_xor_seq_cst(ptr %a) { ; O3-NEXT: lock xorq $42, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_xor_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection335: +; HASWELL-O3-NEXT: lock xorq $42, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xor ptr %a, i64 42 seq_cst, align 8, !pcsections !0 @@ -9566,6 +11531,27 @@ define void @atomic64_nand_seq_cst(ptr %a) { ; O3-NEXT: # %bb.2: # %atomicrmw.end ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_nand_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection336: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB190_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movl %eax, %ecx +; HASWELL-O3-NEXT: .Lpcsection337: +; HASWELL-O3-NEXT: notl %ecx +; HASWELL-O3-NEXT: .Lpcsection338: +; HASWELL-O3-NEXT: orq $-43, %rcx +; HASWELL-O3-NEXT: .Lpcsection339: +; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection340: +; HASWELL-O3-NEXT: jne .LBB190_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw nand ptr %a, i64 42 seq_cst, align 8, !pcsections !0 @@ -9651,6 +11637,25 @@ define void @atomic64_cas_monotonic(ptr %a) { ; O3-NEXT: lock cmpxchgq %rcx, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_cas_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movl $1, %ecx +; HASWELL-O3-NEXT: .Lpcsection341: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection342: +; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection343: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection344: +; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection345: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection346: +; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = cmpxchg ptr %a, i64 42, i64 1 monotonic monotonic, align 8, !pcsections !0 @@ -9738,6 +11743,25 @@ define void @atomic64_cas_acquire(ptr %a) { ; O3-NEXT: lock cmpxchgq %rcx, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_cas_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movl $1, %ecx +; HASWELL-O3-NEXT: .Lpcsection347: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection348: +; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection349: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection350: +; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection351: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection352: +; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = cmpxchg ptr %a, i64 42, i64 1 acquire monotonic, align 8, !pcsections !0 @@ -9825,6 +11849,25 @@ define void @atomic64_cas_release(ptr %a) { ; O3-NEXT: lock cmpxchgq %rcx, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_cas_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movl $1, %ecx +; HASWELL-O3-NEXT: .Lpcsection353: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection354: +; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection355: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection356: +; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection357: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection358: +; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = cmpxchg ptr %a, i64 42, i64 1 release monotonic, align 8, !pcsections !0 @@ -9912,6 +11955,25 @@ define void @atomic64_cas_acq_rel(ptr %a) { ; O3-NEXT: lock cmpxchgq %rcx, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_cas_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movl $1, %ecx +; HASWELL-O3-NEXT: .Lpcsection359: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection360: +; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection361: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection362: +; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection363: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection364: +; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = cmpxchg ptr %a, i64 42, i64 1 acq_rel monotonic, align 8, !pcsections !0 @@ -9999,6 +12061,25 @@ define void @atomic64_cas_seq_cst(ptr %a) { ; O3-NEXT: lock cmpxchgq %rcx, (%rdi) ; O3-NEXT: movq $3, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_cas_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: movl $1, %ecx +; HASWELL-O3-NEXT: .Lpcsection365: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection366: +; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection367: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection368: +; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection369: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection370: +; HASWELL-O3-NEXT: lock cmpxchgq %rcx, (%rdi) +; HASWELL-O3-NEXT: movq $3, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = cmpxchg ptr %a, i64 42, i64 1 seq_cst monotonic, align 8, !pcsections !0 @@ -10044,6 +12125,15 @@ define void @atomic64_cas_seq_cst_ptr_ty(ptr %a, ptr %v1, ptr %v2) { ; O3-NEXT: lock cmpxchgq %rdx, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic64_cas_seq_cst_ptr_ty: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq %rsi, %rax +; HASWELL-O3-NEXT: movq foo(%rip), %rcx +; HASWELL-O3-NEXT: .Lpcsection371: +; HASWELL-O3-NEXT: lock cmpxchgq %rdx, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = cmpxchg ptr %a, ptr %v1, ptr %v2 seq_cst seq_cst, align 8, !pcsections !0 @@ -10102,6 +12192,18 @@ define i64 @atomic_use_cond(ptr %a) { ; O3-NEXT: .LBB197_2: # %else ; O3-NEXT: movl $2, %eax ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic_use_cond: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: .Lpcsection372: +; HASWELL-O3-NEXT: lock decq (%rdi) +; HASWELL-O3-NEXT: jne .LBB197_2 +; HASWELL-O3-NEXT: # %bb.1: # %then +; HASWELL-O3-NEXT: movl $1, %eax +; HASWELL-O3-NEXT: retq +; HASWELL-O3-NEXT: .LBB197_2: # %else +; HASWELL-O3-NEXT: movl $2, %eax +; HASWELL-O3-NEXT: retq entry: %x = atomicrmw sub ptr %a, i64 1 seq_cst, align 8, !pcsections !0 %y = icmp eq i64 %x, 1 @@ -10196,6 +12298,18 @@ define i128 @atomic128_load_unordered(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_load_unordered: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection373: +; HASWELL-O3-NEXT: vmovdqa (%rdi), %xmm0 +; HASWELL-O3-NEXT: .Lpcsection374: +; HASWELL-O3-NEXT: vmovq %xmm0, %rax +; HASWELL-O3-NEXT: .Lpcsection375: +; HASWELL-O3-NEXT: vpextrq $1, %xmm0, %rdx +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = load atomic i128, ptr %a unordered, align 16, !pcsections !0 @@ -10285,6 +12399,18 @@ define i128 @atomic128_load_monotonic(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_load_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection376: +; HASWELL-O3-NEXT: vmovdqa (%rdi), %xmm0 +; HASWELL-O3-NEXT: .Lpcsection377: +; HASWELL-O3-NEXT: vmovq %xmm0, %rax +; HASWELL-O3-NEXT: .Lpcsection378: +; HASWELL-O3-NEXT: vpextrq $1, %xmm0, %rdx +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = load atomic i128, ptr %a monotonic, align 16, !pcsections !0 @@ -10374,6 +12500,18 @@ define i128 @atomic128_load_acquire(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_load_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection379: +; HASWELL-O3-NEXT: vmovdqa (%rdi), %xmm0 +; HASWELL-O3-NEXT: .Lpcsection380: +; HASWELL-O3-NEXT: vmovq %xmm0, %rax +; HASWELL-O3-NEXT: .Lpcsection381: +; HASWELL-O3-NEXT: vpextrq $1, %xmm0, %rdx +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = load atomic i128, ptr %a acquire, align 16, !pcsections !0 @@ -10463,6 +12601,18 @@ define i128 @atomic128_load_seq_cst(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_load_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection382: +; HASWELL-O3-NEXT: vmovdqa (%rdi), %xmm0 +; HASWELL-O3-NEXT: .Lpcsection383: +; HASWELL-O3-NEXT: vmovq %xmm0, %rax +; HASWELL-O3-NEXT: .Lpcsection384: +; HASWELL-O3-NEXT: vpextrq $1, %xmm0, %rdx +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = load atomic i128, ptr %a seq_cst, align 16, !pcsections !0 @@ -10502,6 +12652,14 @@ define ptr @atomic128_load_seq_cst_ptr_ty(ptr %a) { ; O3-NEXT: movq (%rdi), %rax ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_load_seq_cst_ptr_ty: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection385: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = load atomic ptr, ptr %a seq_cst, align 16, !pcsections !0 @@ -10629,6 +12787,16 @@ define void @atomic128_store_unordered(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_store_unordered: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection386: +; HASWELL-O3-NEXT: vmovss {{.*#+}} xmm0 = [42,0,0,0] +; HASWELL-O3-NEXT: .Lpcsection387: +; HASWELL-O3-NEXT: vmovaps %xmm0, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 store atomic i128 42, ptr %a unordered, align 16, !pcsections !0 @@ -10756,6 +12924,16 @@ define void @atomic128_store_monotonic(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_store_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection388: +; HASWELL-O3-NEXT: vmovss {{.*#+}} xmm0 = [42,0,0,0] +; HASWELL-O3-NEXT: .Lpcsection389: +; HASWELL-O3-NEXT: vmovaps %xmm0, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 store atomic i128 42, ptr %a monotonic, align 16, !pcsections !0 @@ -10883,6 +13061,16 @@ define void @atomic128_store_release(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_store_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection390: +; HASWELL-O3-NEXT: vmovss {{.*#+}} xmm0 = [42,0,0,0] +; HASWELL-O3-NEXT: .Lpcsection391: +; HASWELL-O3-NEXT: vmovaps %xmm0, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 store atomic i128 42, ptr %a release, align 16, !pcsections !0 @@ -11010,6 +13198,18 @@ define void @atomic128_store_seq_cst(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_store_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection392: +; HASWELL-O3-NEXT: vmovss {{.*#+}} xmm0 = [42,0,0,0] +; HASWELL-O3-NEXT: .Lpcsection393: +; HASWELL-O3-NEXT: vmovaps %xmm0, (%rdi) +; HASWELL-O3-NEXT: .Lpcsection394: +; HASWELL-O3-NEXT: lock orl $0, -{{[0-9]+}}(%rsp) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 store atomic i128 42, ptr %a seq_cst, align 16, !pcsections !0 @@ -11049,6 +13249,14 @@ define void @atomic128_store_seq_cst_ptr_ty(ptr %a, ptr %v) { ; O3-NEXT: xchgq %rsi, (%rdi) ; O3-NEXT: movq $1, foo(%rip) ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_store_seq_cst_ptr_ty: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection395: +; HASWELL-O3-NEXT: xchgq %rsi, (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 store atomic ptr %v, ptr %a seq_cst, align 16, !pcsections !0 @@ -11176,6 +13384,33 @@ define void @atomic128_xchg_monotonic(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_xchg_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection396: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection397: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .Lpcsection398: +; HASWELL-O3-NEXT: movl $42, %ebx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB208_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: .Lpcsection399: +; HASWELL-O3-NEXT: xorl %ecx, %ecx +; HASWELL-O3-NEXT: .Lpcsection400: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection401: +; HASWELL-O3-NEXT: jne .LBB208_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xchg ptr %a, i128 42 monotonic, align 16, !pcsections !0 @@ -11309,6 +13544,35 @@ define void @atomic128_add_monotonic(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_add_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection402: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection403: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB209_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movq %rax, %rbx +; HASWELL-O3-NEXT: .Lpcsection404: +; HASWELL-O3-NEXT: addq $42, %rbx +; HASWELL-O3-NEXT: movq %rdx, %rcx +; HASWELL-O3-NEXT: .Lpcsection405: +; HASWELL-O3-NEXT: adcq $0, %rcx +; HASWELL-O3-NEXT: .Lpcsection406: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection407: +; HASWELL-O3-NEXT: jne .LBB209_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw add ptr %a, i128 42 monotonic, align 16, !pcsections !0 @@ -11442,6 +13706,35 @@ define void @atomic128_sub_monotonic(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_sub_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection408: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection409: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB210_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movq %rax, %rbx +; HASWELL-O3-NEXT: .Lpcsection410: +; HASWELL-O3-NEXT: addq $-42, %rbx +; HASWELL-O3-NEXT: movq %rdx, %rcx +; HASWELL-O3-NEXT: .Lpcsection411: +; HASWELL-O3-NEXT: adcq $-1, %rcx +; HASWELL-O3-NEXT: .Lpcsection412: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection413: +; HASWELL-O3-NEXT: jne .LBB210_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw sub ptr %a, i128 42 monotonic, align 16, !pcsections !0 @@ -11574,6 +13867,34 @@ define void @atomic128_and_monotonic(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_and_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection414: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection415: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB211_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movl %eax, %ebx +; HASWELL-O3-NEXT: .Lpcsection416: +; HASWELL-O3-NEXT: andl $42, %ebx +; HASWELL-O3-NEXT: .Lpcsection417: +; HASWELL-O3-NEXT: xorl %ecx, %ecx +; HASWELL-O3-NEXT: .Lpcsection418: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection419: +; HASWELL-O3-NEXT: jne .LBB211_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw and ptr %a, i128 42 monotonic, align 16, !pcsections !0 @@ -11699,6 +14020,33 @@ define void @atomic128_or_monotonic(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_or_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection420: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection421: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB212_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movq %rax, %rbx +; HASWELL-O3-NEXT: .Lpcsection422: +; HASWELL-O3-NEXT: orq $42, %rbx +; HASWELL-O3-NEXT: movq %rdx, %rcx +; HASWELL-O3-NEXT: .Lpcsection423: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection424: +; HASWELL-O3-NEXT: jne .LBB212_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw or ptr %a, i128 42 monotonic, align 16, !pcsections !0 @@ -11824,6 +14172,33 @@ define void @atomic128_xor_monotonic(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_xor_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection425: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection426: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB213_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movq %rax, %rbx +; HASWELL-O3-NEXT: .Lpcsection427: +; HASWELL-O3-NEXT: xorq $42, %rbx +; HASWELL-O3-NEXT: movq %rdx, %rcx +; HASWELL-O3-NEXT: .Lpcsection428: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection429: +; HASWELL-O3-NEXT: jne .LBB213_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xor ptr %a, i128 42 monotonic, align 16, !pcsections !0 @@ -11964,6 +14339,36 @@ define void @atomic128_nand_monotonic(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_nand_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection430: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection431: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .Lpcsection432: +; HASWELL-O3-NEXT: movq $-1, %rcx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB214_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movl %eax, %ebx +; HASWELL-O3-NEXT: .Lpcsection433: +; HASWELL-O3-NEXT: notl %ebx +; HASWELL-O3-NEXT: .Lpcsection434: +; HASWELL-O3-NEXT: orq $-43, %rbx +; HASWELL-O3-NEXT: .Lpcsection435: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection436: +; HASWELL-O3-NEXT: jne .LBB214_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw nand ptr %a, i128 42 monotonic, align 16, !pcsections !0 @@ -12091,6 +14496,33 @@ define void @atomic128_xchg_acquire(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_xchg_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection437: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection438: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .Lpcsection439: +; HASWELL-O3-NEXT: movl $42, %ebx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB215_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: .Lpcsection440: +; HASWELL-O3-NEXT: xorl %ecx, %ecx +; HASWELL-O3-NEXT: .Lpcsection441: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection442: +; HASWELL-O3-NEXT: jne .LBB215_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xchg ptr %a, i128 42 acquire, align 16, !pcsections !0 @@ -12224,6 +14656,35 @@ define void @atomic128_add_acquire(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_add_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection443: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection444: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB216_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movq %rax, %rbx +; HASWELL-O3-NEXT: .Lpcsection445: +; HASWELL-O3-NEXT: addq $42, %rbx +; HASWELL-O3-NEXT: movq %rdx, %rcx +; HASWELL-O3-NEXT: .Lpcsection446: +; HASWELL-O3-NEXT: adcq $0, %rcx +; HASWELL-O3-NEXT: .Lpcsection447: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection448: +; HASWELL-O3-NEXT: jne .LBB216_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw add ptr %a, i128 42 acquire, align 16, !pcsections !0 @@ -12357,6 +14818,35 @@ define void @atomic128_sub_acquire(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_sub_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection449: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection450: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB217_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movq %rax, %rbx +; HASWELL-O3-NEXT: .Lpcsection451: +; HASWELL-O3-NEXT: addq $-42, %rbx +; HASWELL-O3-NEXT: movq %rdx, %rcx +; HASWELL-O3-NEXT: .Lpcsection452: +; HASWELL-O3-NEXT: adcq $-1, %rcx +; HASWELL-O3-NEXT: .Lpcsection453: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection454: +; HASWELL-O3-NEXT: jne .LBB217_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw sub ptr %a, i128 42 acquire, align 16, !pcsections !0 @@ -12489,6 +14979,34 @@ define void @atomic128_and_acquire(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_and_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection455: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection456: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB218_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movl %eax, %ebx +; HASWELL-O3-NEXT: .Lpcsection457: +; HASWELL-O3-NEXT: andl $42, %ebx +; HASWELL-O3-NEXT: .Lpcsection458: +; HASWELL-O3-NEXT: xorl %ecx, %ecx +; HASWELL-O3-NEXT: .Lpcsection459: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection460: +; HASWELL-O3-NEXT: jne .LBB218_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw and ptr %a, i128 42 acquire, align 16, !pcsections !0 @@ -12614,6 +15132,33 @@ define void @atomic128_or_acquire(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_or_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection461: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection462: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB219_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movq %rax, %rbx +; HASWELL-O3-NEXT: .Lpcsection463: +; HASWELL-O3-NEXT: orq $42, %rbx +; HASWELL-O3-NEXT: movq %rdx, %rcx +; HASWELL-O3-NEXT: .Lpcsection464: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection465: +; HASWELL-O3-NEXT: jne .LBB219_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw or ptr %a, i128 42 acquire, align 16, !pcsections !0 @@ -12739,6 +15284,33 @@ define void @atomic128_xor_acquire(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_xor_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection466: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection467: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB220_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movq %rax, %rbx +; HASWELL-O3-NEXT: .Lpcsection468: +; HASWELL-O3-NEXT: xorq $42, %rbx +; HASWELL-O3-NEXT: movq %rdx, %rcx +; HASWELL-O3-NEXT: .Lpcsection469: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection470: +; HASWELL-O3-NEXT: jne .LBB220_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xor ptr %a, i128 42 acquire, align 16, !pcsections !0 @@ -12879,6 +15451,36 @@ define void @atomic128_nand_acquire(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_nand_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection471: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection472: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .Lpcsection473: +; HASWELL-O3-NEXT: movq $-1, %rcx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB221_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movl %eax, %ebx +; HASWELL-O3-NEXT: .Lpcsection474: +; HASWELL-O3-NEXT: notl %ebx +; HASWELL-O3-NEXT: .Lpcsection475: +; HASWELL-O3-NEXT: orq $-43, %rbx +; HASWELL-O3-NEXT: .Lpcsection476: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection477: +; HASWELL-O3-NEXT: jne .LBB221_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw nand ptr %a, i128 42 acquire, align 16, !pcsections !0 @@ -13006,6 +15608,33 @@ define void @atomic128_xchg_release(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_xchg_release: +; HASWELL-O3: # %bb.0: +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection478: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection479: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .Lpcsection480: +; HASWELL-O3-NEXT: movl $42, %ebx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB222_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: .Lpcsection481: +; HASWELL-O3-NEXT: xorl %ecx, %ecx +; HASWELL-O3-NEXT: .Lpcsection482: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection483: +; HASWELL-O3-NEXT: jne .LBB222_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq load volatile i64, ptr @foo, align 8 %x = atomicrmw xchg ptr %a, i128 42 release, align 16, !pcsections !0 store volatile i64 1, ptr @foo, align 8 @@ -13138,6 +15767,35 @@ define void @atomic128_add_release(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_add_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection484: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection485: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB223_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movq %rax, %rbx +; HASWELL-O3-NEXT: .Lpcsection486: +; HASWELL-O3-NEXT: addq $42, %rbx +; HASWELL-O3-NEXT: movq %rdx, %rcx +; HASWELL-O3-NEXT: .Lpcsection487: +; HASWELL-O3-NEXT: adcq $0, %rcx +; HASWELL-O3-NEXT: .Lpcsection488: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection489: +; HASWELL-O3-NEXT: jne .LBB223_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw add ptr %a, i128 42 release, align 16, !pcsections !0 @@ -13271,6 +15929,35 @@ define void @atomic128_sub_release(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_sub_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection490: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection491: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB224_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movq %rax, %rbx +; HASWELL-O3-NEXT: .Lpcsection492: +; HASWELL-O3-NEXT: addq $-42, %rbx +; HASWELL-O3-NEXT: movq %rdx, %rcx +; HASWELL-O3-NEXT: .Lpcsection493: +; HASWELL-O3-NEXT: adcq $-1, %rcx +; HASWELL-O3-NEXT: .Lpcsection494: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection495: +; HASWELL-O3-NEXT: jne .LBB224_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw sub ptr %a, i128 42 release, align 16, !pcsections !0 @@ -13403,6 +16090,34 @@ define void @atomic128_and_release(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_and_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection496: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection497: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB225_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movl %eax, %ebx +; HASWELL-O3-NEXT: .Lpcsection498: +; HASWELL-O3-NEXT: andl $42, %ebx +; HASWELL-O3-NEXT: .Lpcsection499: +; HASWELL-O3-NEXT: xorl %ecx, %ecx +; HASWELL-O3-NEXT: .Lpcsection500: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection501: +; HASWELL-O3-NEXT: jne .LBB225_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw and ptr %a, i128 42 release, align 16, !pcsections !0 @@ -13528,6 +16243,33 @@ define void @atomic128_or_release(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_or_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection502: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection503: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB226_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movq %rax, %rbx +; HASWELL-O3-NEXT: .Lpcsection504: +; HASWELL-O3-NEXT: orq $42, %rbx +; HASWELL-O3-NEXT: movq %rdx, %rcx +; HASWELL-O3-NEXT: .Lpcsection505: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection506: +; HASWELL-O3-NEXT: jne .LBB226_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw or ptr %a, i128 42 release, align 16, !pcsections !0 @@ -13653,6 +16395,33 @@ define void @atomic128_xor_release(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_xor_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection507: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection508: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB227_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movq %rax, %rbx +; HASWELL-O3-NEXT: .Lpcsection509: +; HASWELL-O3-NEXT: xorq $42, %rbx +; HASWELL-O3-NEXT: movq %rdx, %rcx +; HASWELL-O3-NEXT: .Lpcsection510: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection511: +; HASWELL-O3-NEXT: jne .LBB227_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xor ptr %a, i128 42 release, align 16, !pcsections !0 @@ -13793,6 +16562,36 @@ define void @atomic128_nand_release(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_nand_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection512: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection513: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .Lpcsection514: +; HASWELL-O3-NEXT: movq $-1, %rcx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB228_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movl %eax, %ebx +; HASWELL-O3-NEXT: .Lpcsection515: +; HASWELL-O3-NEXT: notl %ebx +; HASWELL-O3-NEXT: .Lpcsection516: +; HASWELL-O3-NEXT: orq $-43, %rbx +; HASWELL-O3-NEXT: .Lpcsection517: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection518: +; HASWELL-O3-NEXT: jne .LBB228_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw nand ptr %a, i128 42 release, align 16, !pcsections !0 @@ -13920,6 +16719,33 @@ define void @atomic128_xchg_acq_rel(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_xchg_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection519: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection520: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .Lpcsection521: +; HASWELL-O3-NEXT: movl $42, %ebx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB229_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: .Lpcsection522: +; HASWELL-O3-NEXT: xorl %ecx, %ecx +; HASWELL-O3-NEXT: .Lpcsection523: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection524: +; HASWELL-O3-NEXT: jne .LBB229_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xchg ptr %a, i128 42 acq_rel, align 16, !pcsections !0 @@ -14053,6 +16879,35 @@ define void @atomic128_add_acq_rel(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_add_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection525: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection526: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB230_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movq %rax, %rbx +; HASWELL-O3-NEXT: .Lpcsection527: +; HASWELL-O3-NEXT: addq $42, %rbx +; HASWELL-O3-NEXT: movq %rdx, %rcx +; HASWELL-O3-NEXT: .Lpcsection528: +; HASWELL-O3-NEXT: adcq $0, %rcx +; HASWELL-O3-NEXT: .Lpcsection529: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection530: +; HASWELL-O3-NEXT: jne .LBB230_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw add ptr %a, i128 42 acq_rel, align 16, !pcsections !0 @@ -14186,6 +17041,35 @@ define void @atomic128_sub_acq_rel(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_sub_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection531: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection532: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB231_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movq %rax, %rbx +; HASWELL-O3-NEXT: .Lpcsection533: +; HASWELL-O3-NEXT: addq $-42, %rbx +; HASWELL-O3-NEXT: movq %rdx, %rcx +; HASWELL-O3-NEXT: .Lpcsection534: +; HASWELL-O3-NEXT: adcq $-1, %rcx +; HASWELL-O3-NEXT: .Lpcsection535: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection536: +; HASWELL-O3-NEXT: jne .LBB231_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw sub ptr %a, i128 42 acq_rel, align 16, !pcsections !0 @@ -14318,6 +17202,34 @@ define void @atomic128_and_acq_rel(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_and_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection537: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection538: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB232_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movl %eax, %ebx +; HASWELL-O3-NEXT: .Lpcsection539: +; HASWELL-O3-NEXT: andl $42, %ebx +; HASWELL-O3-NEXT: .Lpcsection540: +; HASWELL-O3-NEXT: xorl %ecx, %ecx +; HASWELL-O3-NEXT: .Lpcsection541: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection542: +; HASWELL-O3-NEXT: jne .LBB232_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw and ptr %a, i128 42 acq_rel, align 16, !pcsections !0 @@ -14443,6 +17355,33 @@ define void @atomic128_or_acq_rel(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_or_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection543: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection544: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB233_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movq %rax, %rbx +; HASWELL-O3-NEXT: .Lpcsection545: +; HASWELL-O3-NEXT: orq $42, %rbx +; HASWELL-O3-NEXT: movq %rdx, %rcx +; HASWELL-O3-NEXT: .Lpcsection546: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection547: +; HASWELL-O3-NEXT: jne .LBB233_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw or ptr %a, i128 42 acq_rel, align 16, !pcsections !0 @@ -14568,6 +17507,33 @@ define void @atomic128_xor_acq_rel(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_xor_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection548: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection549: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB234_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movq %rax, %rbx +; HASWELL-O3-NEXT: .Lpcsection550: +; HASWELL-O3-NEXT: xorq $42, %rbx +; HASWELL-O3-NEXT: movq %rdx, %rcx +; HASWELL-O3-NEXT: .Lpcsection551: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection552: +; HASWELL-O3-NEXT: jne .LBB234_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xor ptr %a, i128 42 acq_rel, align 16, !pcsections !0 @@ -14708,6 +17674,36 @@ define void @atomic128_nand_acq_rel(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_nand_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection553: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection554: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .Lpcsection555: +; HASWELL-O3-NEXT: movq $-1, %rcx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB235_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movl %eax, %ebx +; HASWELL-O3-NEXT: .Lpcsection556: +; HASWELL-O3-NEXT: notl %ebx +; HASWELL-O3-NEXT: .Lpcsection557: +; HASWELL-O3-NEXT: orq $-43, %rbx +; HASWELL-O3-NEXT: .Lpcsection558: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection559: +; HASWELL-O3-NEXT: jne .LBB235_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw nand ptr %a, i128 42 acq_rel, align 16, !pcsections !0 @@ -14835,6 +17831,33 @@ define void @atomic128_xchg_seq_cst(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_xchg_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection560: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection561: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .Lpcsection562: +; HASWELL-O3-NEXT: movl $42, %ebx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB236_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: .Lpcsection563: +; HASWELL-O3-NEXT: xorl %ecx, %ecx +; HASWELL-O3-NEXT: .Lpcsection564: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection565: +; HASWELL-O3-NEXT: jne .LBB236_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xchg ptr %a, i128 42 seq_cst, align 16, !pcsections !0 @@ -14968,6 +17991,35 @@ define void @atomic128_add_seq_cst(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_add_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection566: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection567: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB237_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movq %rax, %rbx +; HASWELL-O3-NEXT: .Lpcsection568: +; HASWELL-O3-NEXT: addq $42, %rbx +; HASWELL-O3-NEXT: movq %rdx, %rcx +; HASWELL-O3-NEXT: .Lpcsection569: +; HASWELL-O3-NEXT: adcq $0, %rcx +; HASWELL-O3-NEXT: .Lpcsection570: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection571: +; HASWELL-O3-NEXT: jne .LBB237_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw add ptr %a, i128 42 seq_cst, align 16, !pcsections !0 @@ -15101,6 +18153,35 @@ define void @atomic128_sub_seq_cst(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_sub_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection572: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection573: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB238_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movq %rax, %rbx +; HASWELL-O3-NEXT: .Lpcsection574: +; HASWELL-O3-NEXT: addq $-42, %rbx +; HASWELL-O3-NEXT: movq %rdx, %rcx +; HASWELL-O3-NEXT: .Lpcsection575: +; HASWELL-O3-NEXT: adcq $-1, %rcx +; HASWELL-O3-NEXT: .Lpcsection576: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection577: +; HASWELL-O3-NEXT: jne .LBB238_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw sub ptr %a, i128 42 seq_cst, align 16, !pcsections !0 @@ -15233,6 +18314,34 @@ define void @atomic128_and_seq_cst(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_and_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection578: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection579: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB239_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movl %eax, %ebx +; HASWELL-O3-NEXT: .Lpcsection580: +; HASWELL-O3-NEXT: andl $42, %ebx +; HASWELL-O3-NEXT: .Lpcsection581: +; HASWELL-O3-NEXT: xorl %ecx, %ecx +; HASWELL-O3-NEXT: .Lpcsection582: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection583: +; HASWELL-O3-NEXT: jne .LBB239_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw and ptr %a, i128 42 seq_cst, align 16, !pcsections !0 @@ -15358,6 +18467,33 @@ define void @atomic128_or_seq_cst(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_or_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection584: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection585: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB240_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movq %rax, %rbx +; HASWELL-O3-NEXT: .Lpcsection586: +; HASWELL-O3-NEXT: orq $42, %rbx +; HASWELL-O3-NEXT: movq %rdx, %rcx +; HASWELL-O3-NEXT: .Lpcsection587: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection588: +; HASWELL-O3-NEXT: jne .LBB240_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw or ptr %a, i128 42 seq_cst, align 16, !pcsections !0 @@ -15483,6 +18619,33 @@ define void @atomic128_xor_seq_cst(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_xor_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection589: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection590: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB241_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movq %rax, %rbx +; HASWELL-O3-NEXT: .Lpcsection591: +; HASWELL-O3-NEXT: xorq $42, %rbx +; HASWELL-O3-NEXT: movq %rdx, %rcx +; HASWELL-O3-NEXT: .Lpcsection592: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection593: +; HASWELL-O3-NEXT: jne .LBB241_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw xor ptr %a, i128 42 seq_cst, align 16, !pcsections !0 @@ -15623,6 +18786,36 @@ define void @atomic128_nand_seq_cst(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_nand_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection594: +; HASWELL-O3-NEXT: movq (%rdi), %rax +; HASWELL-O3-NEXT: .Lpcsection595: +; HASWELL-O3-NEXT: movq 8(%rdi), %rdx +; HASWELL-O3-NEXT: .Lpcsection596: +; HASWELL-O3-NEXT: movq $-1, %rcx +; HASWELL-O3-NEXT: .p2align 4 +; HASWELL-O3-NEXT: .LBB242_1: # %atomicrmw.start +; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1 +; HASWELL-O3-NEXT: movl %eax, %ebx +; HASWELL-O3-NEXT: .Lpcsection597: +; HASWELL-O3-NEXT: notl %ebx +; HASWELL-O3-NEXT: .Lpcsection598: +; HASWELL-O3-NEXT: orq $-43, %rbx +; HASWELL-O3-NEXT: .Lpcsection599: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection600: +; HASWELL-O3-NEXT: jne .LBB242_1 +; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = atomicrmw nand ptr %a, i128 42 seq_cst, align 16, !pcsections !0 @@ -15781,6 +18974,43 @@ define void @atomic128_cas_monotonic(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_cas_monotonic: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection601: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection602: +; HASWELL-O3-NEXT: movl $1, %ebx +; HASWELL-O3-NEXT: .Lpcsection603: +; HASWELL-O3-NEXT: xorl %edx, %edx +; HASWELL-O3-NEXT: .Lpcsection604: +; HASWELL-O3-NEXT: xorl %ecx, %ecx +; HASWELL-O3-NEXT: .Lpcsection605: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection606: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection607: +; HASWELL-O3-NEXT: xorl %edx, %edx +; HASWELL-O3-NEXT: .Lpcsection608: +; HASWELL-O3-NEXT: xorl %ecx, %ecx +; HASWELL-O3-NEXT: .Lpcsection609: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection610: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection611: +; HASWELL-O3-NEXT: xorl %edx, %edx +; HASWELL-O3-NEXT: .Lpcsection612: +; HASWELL-O3-NEXT: xorl %ecx, %ecx +; HASWELL-O3-NEXT: .Lpcsection613: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = cmpxchg ptr %a, i128 42, i128 1 monotonic monotonic, align 16, !pcsections !0 @@ -15941,6 +19171,43 @@ define void @atomic128_cas_acquire(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_cas_acquire: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection614: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection615: +; HASWELL-O3-NEXT: movl $1, %ebx +; HASWELL-O3-NEXT: .Lpcsection616: +; HASWELL-O3-NEXT: xorl %edx, %edx +; HASWELL-O3-NEXT: .Lpcsection617: +; HASWELL-O3-NEXT: xorl %ecx, %ecx +; HASWELL-O3-NEXT: .Lpcsection618: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection619: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection620: +; HASWELL-O3-NEXT: xorl %edx, %edx +; HASWELL-O3-NEXT: .Lpcsection621: +; HASWELL-O3-NEXT: xorl %ecx, %ecx +; HASWELL-O3-NEXT: .Lpcsection622: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection623: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection624: +; HASWELL-O3-NEXT: xorl %edx, %edx +; HASWELL-O3-NEXT: .Lpcsection625: +; HASWELL-O3-NEXT: xorl %ecx, %ecx +; HASWELL-O3-NEXT: .Lpcsection626: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = cmpxchg ptr %a, i128 42, i128 1 acquire monotonic, align 16, !pcsections !0 @@ -16101,6 +19368,43 @@ define void @atomic128_cas_release(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_cas_release: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection627: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection628: +; HASWELL-O3-NEXT: movl $1, %ebx +; HASWELL-O3-NEXT: .Lpcsection629: +; HASWELL-O3-NEXT: xorl %edx, %edx +; HASWELL-O3-NEXT: .Lpcsection630: +; HASWELL-O3-NEXT: xorl %ecx, %ecx +; HASWELL-O3-NEXT: .Lpcsection631: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection632: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection633: +; HASWELL-O3-NEXT: xorl %edx, %edx +; HASWELL-O3-NEXT: .Lpcsection634: +; HASWELL-O3-NEXT: xorl %ecx, %ecx +; HASWELL-O3-NEXT: .Lpcsection635: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection636: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection637: +; HASWELL-O3-NEXT: xorl %edx, %edx +; HASWELL-O3-NEXT: .Lpcsection638: +; HASWELL-O3-NEXT: xorl %ecx, %ecx +; HASWELL-O3-NEXT: .Lpcsection639: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = cmpxchg ptr %a, i128 42, i128 1 release monotonic, align 16, !pcsections !0 @@ -16261,6 +19565,43 @@ define void @atomic128_cas_acq_rel(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_cas_acq_rel: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection640: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection641: +; HASWELL-O3-NEXT: movl $1, %ebx +; HASWELL-O3-NEXT: .Lpcsection642: +; HASWELL-O3-NEXT: xorl %edx, %edx +; HASWELL-O3-NEXT: .Lpcsection643: +; HASWELL-O3-NEXT: xorl %ecx, %ecx +; HASWELL-O3-NEXT: .Lpcsection644: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection645: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection646: +; HASWELL-O3-NEXT: xorl %edx, %edx +; HASWELL-O3-NEXT: .Lpcsection647: +; HASWELL-O3-NEXT: xorl %ecx, %ecx +; HASWELL-O3-NEXT: .Lpcsection648: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection649: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection650: +; HASWELL-O3-NEXT: xorl %edx, %edx +; HASWELL-O3-NEXT: .Lpcsection651: +; HASWELL-O3-NEXT: xorl %ecx, %ecx +; HASWELL-O3-NEXT: .Lpcsection652: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: movq $1, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = cmpxchg ptr %a, i128 42, i128 1 acq_rel monotonic, align 16, !pcsections !0 @@ -16421,6 +19762,43 @@ define void @atomic128_cas_seq_cst(ptr %a) { ; O3-NEXT: popq %rbx ; O3-NEXT: .cfi_def_cfa_offset 8 ; O3-NEXT: retq +; +; HASWELL-O3-LABEL: atomic128_cas_seq_cst: +; HASWELL-O3: # %bb.0: # %entry +; HASWELL-O3-NEXT: pushq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 16 +; HASWELL-O3-NEXT: .cfi_offset %rbx, -16 +; HASWELL-O3-NEXT: movq foo(%rip), %rax +; HASWELL-O3-NEXT: .Lpcsection653: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection654: +; HASWELL-O3-NEXT: movl $1, %ebx +; HASWELL-O3-NEXT: .Lpcsection655: +; HASWELL-O3-NEXT: xorl %edx, %edx +; HASWELL-O3-NEXT: .Lpcsection656: +; HASWELL-O3-NEXT: xorl %ecx, %ecx +; HASWELL-O3-NEXT: .Lpcsection657: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection658: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection659: +; HASWELL-O3-NEXT: xorl %edx, %edx +; HASWELL-O3-NEXT: .Lpcsection660: +; HASWELL-O3-NEXT: xorl %ecx, %ecx +; HASWELL-O3-NEXT: .Lpcsection661: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: .Lpcsection662: +; HASWELL-O3-NEXT: movl $42, %eax +; HASWELL-O3-NEXT: .Lpcsection663: +; HASWELL-O3-NEXT: xorl %edx, %edx +; HASWELL-O3-NEXT: .Lpcsection664: +; HASWELL-O3-NEXT: xorl %ecx, %ecx +; HASWELL-O3-NEXT: .Lpcsection665: +; HASWELL-O3-NEXT: lock cmpxchg16b (%rdi) +; HASWELL-O3-NEXT: movq $3, foo(%rip) +; HASWELL-O3-NEXT: popq %rbx +; HASWELL-O3-NEXT: .cfi_def_cfa_offset 8 +; HASWELL-O3-NEXT: retq entry: load volatile i64, ptr @foo, align 8 %x = cmpxchg ptr %a, i128 42, i128 1 seq_cst monotonic, align 16, !pcsections !0 diff --git a/llvm/test/CodeGen/X86/peephole-copy.mir b/llvm/test/CodeGen/X86/peephole-copy.mir index e24abf84..f399398 100644 --- a/llvm/test/CodeGen/X86/peephole-copy.mir +++ b/llvm/test/CodeGen/X86/peephole-copy.mir @@ -22,14 +22,14 @@ body: | bb.0: ; CHECK-LABEL: name: c ; CHECK: [[MOV32ri:%[0-9]+]]:gr32_abcd = MOV32ri 512 - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 2359305 /* reguse:GR32 */, [[MOV32ri]], 1 /* reguse */, implicit-def early-clobber $df + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 3211273 /* reguse:GR32_ABCD */, [[MOV32ri]], 1 /* reguse */, implicit-def early-clobber $df ; CHECK-NEXT: [[MOV32ri1:%[0-9]+]]:gr32_abcd = MOV32ri 512 - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 2359305 /* reguse:GR32 */, [[MOV32ri1]], 1 /* reguse */, implicit-def early-clobber $df + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 3211273 /* reguse:GR32_ABCD */, [[MOV32ri1]], 1 /* reguse */, implicit-def early-clobber $df ; CHECK-NEXT: RET 0 %2 = MOV32ri 512 %0 = COPY %2 - INLINEASM &"", 1 /* sideeffect attdialect */, 2359305 /* reguse:GR32_ABCD */, %0:gr32_abcd, 1 /* clobber */, implicit-def early-clobber $df + INLINEASM &"", 1 /* sideeffect attdialect */, 3211273 /* reguse:GR32_ABCD */, %0:gr32_abcd, 1 /* clobber */, implicit-def early-clobber $df %1 = COPY %2 - INLINEASM &"", 1 /* sideeffect attdialect */, 2359305 /* reguse:GR32_ABCD */, %1:gr32_abcd, 1 /* clobber */, implicit-def early-clobber $df + INLINEASM &"", 1 /* sideeffect attdialect */, 3211273 /* reguse:GR32_ABCD */, %1:gr32_abcd, 1 /* clobber */, implicit-def early-clobber $df RET 0 ... diff --git a/llvm/test/CodeGen/X86/popcnt.ll b/llvm/test/CodeGen/X86/popcnt.ll index 35c7c0e..3004b8b 100644 --- a/llvm/test/CodeGen/X86/popcnt.ll +++ b/llvm/test/CodeGen/X86/popcnt.ll @@ -340,84 +340,87 @@ define i64 @cnt64(i64 %x) nounwind readnone { define i128 @cnt128(i128 %x) nounwind readnone { ; X86-NOSSE-LABEL: cnt128: ; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl %ebx +; X86-NOSSE-NEXT: pushl %ebp +; X86-NOSSE-NEXT: movl %esp, %ebp ; X86-NOSSE-NEXT: pushl %edi ; X86-NOSSE-NEXT: pushl %esi -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NOSSE-NEXT: movl %edi, %ebx -; X86-NOSSE-NEXT: shrl %ebx -; X86-NOSSE-NEXT: andl $1431655765, %ebx # imm = 0x55555555 -; X86-NOSSE-NEXT: subl %ebx, %edi -; X86-NOSSE-NEXT: movl %edi, %ebx -; X86-NOSSE-NEXT: andl $858993459, %ebx # imm = 0x33333333 -; X86-NOSSE-NEXT: shrl $2, %edi -; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333 -; X86-NOSSE-NEXT: addl %ebx, %edi -; X86-NOSSE-NEXT: movl %edi, %ebx -; X86-NOSSE-NEXT: shrl $4, %ebx -; X86-NOSSE-NEXT: addl %edi, %ebx -; X86-NOSSE-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F -; X86-NOSSE-NEXT: imull $16843009, %ebx, %edi # imm = 0x1010101 -; X86-NOSSE-NEXT: shrl $24, %edi -; X86-NOSSE-NEXT: movl %esi, %ebx -; X86-NOSSE-NEXT: shrl %ebx -; X86-NOSSE-NEXT: andl $1431655765, %ebx # imm = 0x55555555 -; X86-NOSSE-NEXT: subl %ebx, %esi -; X86-NOSSE-NEXT: movl %esi, %ebx -; X86-NOSSE-NEXT: andl $858993459, %ebx # imm = 0x33333333 +; X86-NOSSE-NEXT: andl $-16, %esp +; X86-NOSSE-NEXT: movl 24(%ebp), %eax +; X86-NOSSE-NEXT: movl 32(%ebp), %ecx +; X86-NOSSE-NEXT: movl 36(%ebp), %esi +; X86-NOSSE-NEXT: movl %esi, %edx +; X86-NOSSE-NEXT: shrl %edx +; X86-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X86-NOSSE-NEXT: subl %edx, %esi +; X86-NOSSE-NEXT: movl %esi, %edx +; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333 ; X86-NOSSE-NEXT: shrl $2, %esi ; X86-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333 -; X86-NOSSE-NEXT: addl %ebx, %esi -; X86-NOSSE-NEXT: movl %esi, %ebx -; X86-NOSSE-NEXT: shrl $4, %ebx -; X86-NOSSE-NEXT: addl %esi, %ebx -; X86-NOSSE-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F -; X86-NOSSE-NEXT: imull $16843009, %ebx, %esi # imm = 0x1010101 -; X86-NOSSE-NEXT: shrl $24, %esi -; X86-NOSSE-NEXT: addl %edi, %esi -; X86-NOSSE-NEXT: movl %edx, %edi -; X86-NOSSE-NEXT: shrl %edi -; X86-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555 -; X86-NOSSE-NEXT: subl %edi, %edx -; X86-NOSSE-NEXT: movl %edx, %edi -; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333 -; X86-NOSSE-NEXT: shrl $2, %edx -; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333 -; X86-NOSSE-NEXT: addl %edi, %edx -; X86-NOSSE-NEXT: movl %edx, %edi -; X86-NOSSE-NEXT: shrl $4, %edi -; X86-NOSSE-NEXT: addl %edx, %edi -; X86-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F -; X86-NOSSE-NEXT: imull $16843009, %edi, %edx # imm = 0x1010101 -; X86-NOSSE-NEXT: shrl $24, %edx -; X86-NOSSE-NEXT: movl %ecx, %edi -; X86-NOSSE-NEXT: shrl %edi -; X86-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555 -; X86-NOSSE-NEXT: subl %edi, %ecx -; X86-NOSSE-NEXT: movl %ecx, %edi -; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333 +; X86-NOSSE-NEXT: addl %edx, %esi +; X86-NOSSE-NEXT: movl %esi, %edx +; X86-NOSSE-NEXT: shrl $4, %edx +; X86-NOSSE-NEXT: addl %esi, %edx +; X86-NOSSE-NEXT: movl %ecx, %esi +; X86-NOSSE-NEXT: shrl %esi +; X86-NOSSE-NEXT: andl $1431655765, %esi # imm = 0x55555555 +; X86-NOSSE-NEXT: subl %esi, %ecx +; X86-NOSSE-NEXT: movl %ecx, %esi +; X86-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333 ; X86-NOSSE-NEXT: shrl $2, %ecx ; X86-NOSSE-NEXT: andl $858993459, %ecx # imm = 0x33333333 -; X86-NOSSE-NEXT: addl %edi, %ecx +; X86-NOSSE-NEXT: addl %esi, %ecx ; X86-NOSSE-NEXT: movl %ecx, %edi ; X86-NOSSE-NEXT: shrl $4, %edi ; X86-NOSSE-NEXT: addl %ecx, %edi +; X86-NOSSE-NEXT: movl 28(%ebp), %esi +; X86-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F +; X86-NOSSE-NEXT: imull $16843009, %edx, %edx # imm = 0x1010101 +; X86-NOSSE-NEXT: shrl $24, %edx ; X86-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F ; X86-NOSSE-NEXT: imull $16843009, %edi, %ecx # imm = 0x1010101 ; X86-NOSSE-NEXT: shrl $24, %ecx ; X86-NOSSE-NEXT: addl %edx, %ecx -; X86-NOSSE-NEXT: addl %esi, %ecx -; X86-NOSSE-NEXT: movl %ecx, (%eax) +; X86-NOSSE-NEXT: movl %esi, %edx +; X86-NOSSE-NEXT: shrl %edx +; X86-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X86-NOSSE-NEXT: subl %edx, %esi +; X86-NOSSE-NEXT: movl %esi, %edx +; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333 +; X86-NOSSE-NEXT: shrl $2, %esi +; X86-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333 +; X86-NOSSE-NEXT: addl %edx, %esi +; X86-NOSSE-NEXT: movl %esi, %edx +; X86-NOSSE-NEXT: shrl $4, %edx +; X86-NOSSE-NEXT: addl %esi, %edx +; X86-NOSSE-NEXT: movl %eax, %esi +; X86-NOSSE-NEXT: shrl %esi +; X86-NOSSE-NEXT: andl $1431655765, %esi # imm = 0x55555555 +; X86-NOSSE-NEXT: subl %esi, %eax +; X86-NOSSE-NEXT: movl %eax, %esi +; X86-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333 +; X86-NOSSE-NEXT: shrl $2, %eax +; X86-NOSSE-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X86-NOSSE-NEXT: addl %esi, %eax +; X86-NOSSE-NEXT: movl %eax, %esi +; X86-NOSSE-NEXT: shrl $4, %esi +; X86-NOSSE-NEXT: addl %eax, %esi +; X86-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F +; X86-NOSSE-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101 +; X86-NOSSE-NEXT: shrl $24, %eax +; X86-NOSSE-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F +; X86-NOSSE-NEXT: imull $16843009, %esi, %edx # imm = 0x1010101 +; X86-NOSSE-NEXT: shrl $24, %edx +; X86-NOSSE-NEXT: addl %eax, %edx +; X86-NOSSE-NEXT: addl %ecx, %edx +; X86-NOSSE-NEXT: movl 8(%ebp), %eax +; X86-NOSSE-NEXT: movl %edx, (%eax) ; X86-NOSSE-NEXT: movl $0, 12(%eax) ; X86-NOSSE-NEXT: movl $0, 8(%eax) ; X86-NOSSE-NEXT: movl $0, 4(%eax) +; X86-NOSSE-NEXT: leal -8(%ebp), %esp ; X86-NOSSE-NEXT: popl %esi ; X86-NOSSE-NEXT: popl %edi -; X86-NOSSE-NEXT: popl %ebx +; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl $4 ; ; X64-BASE-LABEL: cnt128: @@ -462,20 +465,26 @@ define i128 @cnt128(i128 %x) nounwind readnone { ; ; X86-POPCNT-LABEL: cnt128: ; X86-POPCNT: # %bb.0: +; X86-POPCNT-NEXT: pushl %ebp +; X86-POPCNT-NEXT: movl %esp, %ebp ; X86-POPCNT-NEXT: pushl %esi -; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx -; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx +; X86-POPCNT-NEXT: andl $-16, %esp +; X86-POPCNT-NEXT: subl $16, %esp +; X86-POPCNT-NEXT: movl 8(%ebp), %eax +; X86-POPCNT-NEXT: popcntl 36(%ebp), %ecx +; X86-POPCNT-NEXT: popcntl 32(%ebp), %edx ; X86-POPCNT-NEXT: addl %ecx, %edx -; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx -; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi +; X86-POPCNT-NEXT: popcntl 28(%ebp), %ecx +; X86-POPCNT-NEXT: popcntl 24(%ebp), %esi ; X86-POPCNT-NEXT: addl %ecx, %esi ; X86-POPCNT-NEXT: addl %edx, %esi ; X86-POPCNT-NEXT: movl %esi, (%eax) ; X86-POPCNT-NEXT: movl $0, 12(%eax) ; X86-POPCNT-NEXT: movl $0, 8(%eax) ; X86-POPCNT-NEXT: movl $0, 4(%eax) +; X86-POPCNT-NEXT: leal -4(%ebp), %esp ; X86-POPCNT-NEXT: popl %esi +; X86-POPCNT-NEXT: popl %ebp ; X86-POPCNT-NEXT: retl $4 ; ; X64-POPCNT-LABEL: cnt128: @@ -522,7 +531,11 @@ define i128 @cnt128(i128 %x) nounwind readnone { ; ; X86-SSE2-LABEL: cnt128: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: andl $-16, %esp +; X86-SSE2-NEXT: subl $16, %esp +; X86-SSE2-NEXT: movl 8(%ebp), %eax ; X86-SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero ; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 ; X86-SSE2-NEXT: psrlw $1, %xmm0 @@ -564,11 +577,17 @@ define i128 @cnt128(i128 %x) nounwind readnone { ; X86-SSE2-NEXT: movl $0, 12(%eax) ; X86-SSE2-NEXT: movl $0, 8(%eax) ; X86-SSE2-NEXT: movl $0, 4(%eax) +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp ; X86-SSE2-NEXT: retl $4 ; ; X86-SSSE3-LABEL: cnt128: ; X86-SSSE3: # %bb.0: -; X86-SSSE3-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSSE3-NEXT: pushl %ebp +; X86-SSSE3-NEXT: movl %esp, %ebp +; X86-SSSE3-NEXT: andl $-16, %esp +; X86-SSSE3-NEXT: subl $16, %esp +; X86-SSSE3-NEXT: movl 8(%ebp), %eax ; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; X86-SSSE3-NEXT: movq {{.*#+}} xmm2 = mem[0],zero ; X86-SSSE3-NEXT: movdqa %xmm2, %xmm3 @@ -600,6 +619,8 @@ define i128 @cnt128(i128 %x) nounwind readnone { ; X86-SSSE3-NEXT: movl $0, 12(%eax) ; X86-SSSE3-NEXT: movl $0, 8(%eax) ; X86-SSSE3-NEXT: movl $0, 4(%eax) +; X86-SSSE3-NEXT: movl %ebp, %esp +; X86-SSSE3-NEXT: popl %ebp ; X86-SSSE3-NEXT: retl $4 %cnt = tail call i128 @llvm.ctpop.i128(i128 %x) ret i128 %cnt @@ -928,87 +949,92 @@ define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize { ; X86-NOSSE-LABEL: cnt128_optsize: ; X86-NOSSE: # %bb.0: ; X86-NOSSE-NEXT: pushl %ebp +; X86-NOSSE-NEXT: movl %esp, %ebp ; X86-NOSSE-NEXT: pushl %ebx ; X86-NOSSE-NEXT: pushl %edi ; X86-NOSSE-NEXT: pushl %esi -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NOSSE-NEXT: movl %ebx, %ecx -; X86-NOSSE-NEXT: shrl %ecx -; X86-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555 -; X86-NOSSE-NEXT: andl %edi, %ecx -; X86-NOSSE-NEXT: subl %ecx, %ebx +; X86-NOSSE-NEXT: andl $-16, %esp +; X86-NOSSE-NEXT: subl $16, %esp +; X86-NOSSE-NEXT: movl 32(%ebp), %edx +; X86-NOSSE-NEXT: movl 36(%ebp), %esi +; X86-NOSSE-NEXT: movl %esi, %eax +; X86-NOSSE-NEXT: shrl %eax +; X86-NOSSE-NEXT: movl $1431655765, %ecx # imm = 0x55555555 +; X86-NOSSE-NEXT: andl %ecx, %eax +; X86-NOSSE-NEXT: subl %eax, %esi ; X86-NOSSE-NEXT: movl $858993459, %ecx # imm = 0x33333333 -; X86-NOSSE-NEXT: movl %ebx, %ebp -; X86-NOSSE-NEXT: andl %ecx, %ebp +; X86-NOSSE-NEXT: movl %esi, %edi +; X86-NOSSE-NEXT: andl %ecx, %edi +; X86-NOSSE-NEXT: shrl $2, %esi +; X86-NOSSE-NEXT: andl %ecx, %esi +; X86-NOSSE-NEXT: addl %edi, %esi +; X86-NOSSE-NEXT: movl %esi, %edi +; X86-NOSSE-NEXT: shrl $4, %edi +; X86-NOSSE-NEXT: addl %esi, %edi +; X86-NOSSE-NEXT: movl %edx, %esi +; X86-NOSSE-NEXT: shrl %esi +; X86-NOSSE-NEXT: movl $1431655765, %eax # imm = 0x55555555 +; X86-NOSSE-NEXT: andl %eax, %esi +; X86-NOSSE-NEXT: subl %esi, %edx +; X86-NOSSE-NEXT: movl %edx, %esi +; X86-NOSSE-NEXT: andl %ecx, %esi +; X86-NOSSE-NEXT: shrl $2, %edx +; X86-NOSSE-NEXT: andl %ecx, %edx +; X86-NOSSE-NEXT: addl %esi, %edx +; X86-NOSSE-NEXT: movl %edx, %ebx +; X86-NOSSE-NEXT: shrl $4, %ebx +; X86-NOSSE-NEXT: addl %edx, %ebx +; X86-NOSSE-NEXT: movl $252645135, %edx # imm = 0xF0F0F0F +; X86-NOSSE-NEXT: andl %edx, %edi +; X86-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101 +; X86-NOSSE-NEXT: shrl $24, %edi +; X86-NOSSE-NEXT: andl %edx, %ebx +; X86-NOSSE-NEXT: imull $16843009, %ebx, %edx # imm = 0x1010101 +; X86-NOSSE-NEXT: shrl $24, %edx +; X86-NOSSE-NEXT: addl %edi, %edx +; X86-NOSSE-NEXT: movl 28(%ebp), %ebx +; X86-NOSSE-NEXT: movl %ebx, %edi +; X86-NOSSE-NEXT: shrl %edi +; X86-NOSSE-NEXT: andl %eax, %edi +; X86-NOSSE-NEXT: subl %edi, %ebx +; X86-NOSSE-NEXT: movl %ebx, %edi +; X86-NOSSE-NEXT: andl %ecx, %edi ; X86-NOSSE-NEXT: shrl $2, %ebx ; X86-NOSSE-NEXT: andl %ecx, %ebx -; X86-NOSSE-NEXT: addl %ebp, %ebx -; X86-NOSSE-NEXT: movl %ebx, %ebp -; X86-NOSSE-NEXT: shrl $4, %ebp -; X86-NOSSE-NEXT: addl %ebx, %ebp +; X86-NOSSE-NEXT: addl %edi, %ebx +; X86-NOSSE-NEXT: movl %ebx, %edi +; X86-NOSSE-NEXT: shrl $4, %edi +; X86-NOSSE-NEXT: addl %ebx, %edi +; X86-NOSSE-NEXT: movl 24(%ebp), %eax ; X86-NOSSE-NEXT: movl %eax, %ebx ; X86-NOSSE-NEXT: shrl %ebx -; X86-NOSSE-NEXT: andl %edi, %ebx +; X86-NOSSE-NEXT: movl $1431655765, %esi # imm = 0x55555555 +; X86-NOSSE-NEXT: andl %esi, %ebx ; X86-NOSSE-NEXT: subl %ebx, %eax ; X86-NOSSE-NEXT: movl %eax, %ebx ; X86-NOSSE-NEXT: andl %ecx, %ebx ; X86-NOSSE-NEXT: shrl $2, %eax ; X86-NOSSE-NEXT: andl %ecx, %eax ; X86-NOSSE-NEXT: addl %ebx, %eax -; X86-NOSSE-NEXT: movl %eax, %edi -; X86-NOSSE-NEXT: shrl $4, %edi -; X86-NOSSE-NEXT: addl %eax, %edi -; X86-NOSSE-NEXT: movl $252645135, %ebx # imm = 0xF0F0F0F -; X86-NOSSE-NEXT: andl %ebx, %ebp -; X86-NOSSE-NEXT: imull $16843009, %ebp, %eax # imm = 0x1010101 +; X86-NOSSE-NEXT: movl %eax, %ecx +; X86-NOSSE-NEXT: shrl $4, %ecx +; X86-NOSSE-NEXT: addl %eax, %ecx +; X86-NOSSE-NEXT: movl $252645135, %eax # imm = 0xF0F0F0F +; X86-NOSSE-NEXT: andl %eax, %edi +; X86-NOSSE-NEXT: andl %eax, %ecx +; X86-NOSSE-NEXT: imull $16843009, %edi, %eax # imm = 0x1010101 ; X86-NOSSE-NEXT: shrl $24, %eax -; X86-NOSSE-NEXT: andl %ebx, %edi -; X86-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101 -; X86-NOSSE-NEXT: shrl $24, %edi -; X86-NOSSE-NEXT: addl %eax, %edi -; X86-NOSSE-NEXT: movl %esi, %eax -; X86-NOSSE-NEXT: shrl %eax -; X86-NOSSE-NEXT: movl $1431655765, %ebp # imm = 0x55555555 -; X86-NOSSE-NEXT: andl %ebp, %eax -; X86-NOSSE-NEXT: subl %eax, %esi -; X86-NOSSE-NEXT: movl %esi, %eax -; X86-NOSSE-NEXT: andl %ecx, %eax -; X86-NOSSE-NEXT: shrl $2, %esi -; X86-NOSSE-NEXT: andl %ecx, %esi -; X86-NOSSE-NEXT: addl %eax, %esi -; X86-NOSSE-NEXT: movl %esi, %ebp -; X86-NOSSE-NEXT: shrl $4, %ebp -; X86-NOSSE-NEXT: addl %esi, %ebp -; X86-NOSSE-NEXT: movl %edx, %eax -; X86-NOSSE-NEXT: shrl %eax -; X86-NOSSE-NEXT: movl $1431655765, %esi # imm = 0x55555555 -; X86-NOSSE-NEXT: andl %esi, %eax -; X86-NOSSE-NEXT: subl %eax, %edx -; X86-NOSSE-NEXT: movl %edx, %eax -; X86-NOSSE-NEXT: andl %ecx, %eax -; X86-NOSSE-NEXT: shrl $2, %edx -; X86-NOSSE-NEXT: andl %ecx, %edx -; X86-NOSSE-NEXT: addl %eax, %edx -; X86-NOSSE-NEXT: movl %edx, %eax -; X86-NOSSE-NEXT: shrl $4, %eax -; X86-NOSSE-NEXT: addl %edx, %eax -; X86-NOSSE-NEXT: andl %ebx, %ebp -; X86-NOSSE-NEXT: andl %ebx, %eax -; X86-NOSSE-NEXT: imull $16843009, %ebp, %ecx # imm = 0x1010101 +; X86-NOSSE-NEXT: imull $16843009, %ecx, %ecx # imm = 0x1010101 ; X86-NOSSE-NEXT: shrl $24, %ecx -; X86-NOSSE-NEXT: imull $16843009, %eax, %edx # imm = 0x1010101 -; X86-NOSSE-NEXT: shrl $24, %edx -; X86-NOSSE-NEXT: addl %ecx, %edx -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: addl %edi, %edx -; X86-NOSSE-NEXT: xorl %ecx, %ecx -; X86-NOSSE-NEXT: movl %ecx, 12(%eax) -; X86-NOSSE-NEXT: movl %ecx, 8(%eax) -; X86-NOSSE-NEXT: movl %ecx, 4(%eax) -; X86-NOSSE-NEXT: movl %edx, (%eax) +; X86-NOSSE-NEXT: addl %eax, %ecx +; X86-NOSSE-NEXT: movl 8(%ebp), %eax +; X86-NOSSE-NEXT: addl %edx, %ecx +; X86-NOSSE-NEXT: xorl %edx, %edx +; X86-NOSSE-NEXT: movl %edx, 12(%eax) +; X86-NOSSE-NEXT: movl %edx, 8(%eax) +; X86-NOSSE-NEXT: movl %edx, 4(%eax) +; X86-NOSSE-NEXT: movl %ecx, (%eax) +; X86-NOSSE-NEXT: leal -12(%ebp), %esp ; X86-NOSSE-NEXT: popl %esi ; X86-NOSSE-NEXT: popl %edi ; X86-NOSSE-NEXT: popl %ebx @@ -1057,13 +1083,17 @@ define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize { ; ; X86-POPCNT-LABEL: cnt128_optsize: ; X86-POPCNT: # %bb.0: +; X86-POPCNT-NEXT: pushl %ebp +; X86-POPCNT-NEXT: movl %esp, %ebp ; X86-POPCNT-NEXT: pushl %esi -; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx -; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx +; X86-POPCNT-NEXT: andl $-16, %esp +; X86-POPCNT-NEXT: subl $16, %esp +; X86-POPCNT-NEXT: movl 8(%ebp), %eax +; X86-POPCNT-NEXT: popcntl 36(%ebp), %ecx +; X86-POPCNT-NEXT: popcntl 32(%ebp), %edx ; X86-POPCNT-NEXT: addl %ecx, %edx -; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx -; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi +; X86-POPCNT-NEXT: popcntl 28(%ebp), %ecx +; X86-POPCNT-NEXT: popcntl 24(%ebp), %esi ; X86-POPCNT-NEXT: addl %ecx, %esi ; X86-POPCNT-NEXT: addl %edx, %esi ; X86-POPCNT-NEXT: xorl %ecx, %ecx @@ -1071,7 +1101,9 @@ define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize { ; X86-POPCNT-NEXT: movl %ecx, 8(%eax) ; X86-POPCNT-NEXT: movl %ecx, 4(%eax) ; X86-POPCNT-NEXT: movl %esi, (%eax) +; X86-POPCNT-NEXT: leal -4(%ebp), %esp ; X86-POPCNT-NEXT: popl %esi +; X86-POPCNT-NEXT: popl %ebp ; X86-POPCNT-NEXT: retl $4 ; ; X64-POPCNT-LABEL: cnt128_optsize: @@ -1118,7 +1150,11 @@ define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize { ; ; X86-SSE2-LABEL: cnt128_optsize: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: andl $-16, %esp +; X86-SSE2-NEXT: subl $16, %esp +; X86-SSE2-NEXT: movl 8(%ebp), %eax ; X86-SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero ; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 ; X86-SSE2-NEXT: psrlw $1, %xmm0 @@ -1161,11 +1197,17 @@ define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize { ; X86-SSE2-NEXT: movl %ecx, 8(%eax) ; X86-SSE2-NEXT: movl %ecx, 4(%eax) ; X86-SSE2-NEXT: movl %edx, (%eax) +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp ; X86-SSE2-NEXT: retl $4 ; ; X86-SSSE3-LABEL: cnt128_optsize: ; X86-SSSE3: # %bb.0: -; X86-SSSE3-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSSE3-NEXT: pushl %ebp +; X86-SSSE3-NEXT: movl %esp, %ebp +; X86-SSSE3-NEXT: andl $-16, %esp +; X86-SSSE3-NEXT: subl $16, %esp +; X86-SSSE3-NEXT: movl 8(%ebp), %eax ; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; X86-SSSE3-NEXT: movq {{.*#+}} xmm2 = mem[0],zero ; X86-SSSE3-NEXT: movdqa %xmm2, %xmm3 @@ -1198,6 +1240,8 @@ define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize { ; X86-SSSE3-NEXT: movl %ecx, 8(%eax) ; X86-SSSE3-NEXT: movl %ecx, 4(%eax) ; X86-SSSE3-NEXT: movl %edx, (%eax) +; X86-SSSE3-NEXT: movl %ebp, %esp +; X86-SSSE3-NEXT: popl %ebp ; X86-SSSE3-NEXT: retl $4 %cnt = tail call i128 @llvm.ctpop.i128(i128 %x) ret i128 %cnt @@ -1415,85 +1459,88 @@ define i64 @cnt64_pgso(i64 %x) nounwind readnone !prof !14 { define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 { ; X86-NOSSE-LABEL: cnt128_pgso: ; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl %ebx +; X86-NOSSE-NEXT: pushl %ebp +; X86-NOSSE-NEXT: movl %esp, %ebp ; X86-NOSSE-NEXT: pushl %edi ; X86-NOSSE-NEXT: pushl %esi -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NOSSE-NEXT: movl %edi, %ebx -; X86-NOSSE-NEXT: shrl %ebx -; X86-NOSSE-NEXT: andl $1431655765, %ebx # imm = 0x55555555 -; X86-NOSSE-NEXT: subl %ebx, %edi -; X86-NOSSE-NEXT: movl %edi, %ebx -; X86-NOSSE-NEXT: andl $858993459, %ebx # imm = 0x33333333 -; X86-NOSSE-NEXT: shrl $2, %edi -; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333 -; X86-NOSSE-NEXT: addl %ebx, %edi -; X86-NOSSE-NEXT: movl %edi, %ebx -; X86-NOSSE-NEXT: shrl $4, %ebx -; X86-NOSSE-NEXT: addl %edi, %ebx -; X86-NOSSE-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F -; X86-NOSSE-NEXT: imull $16843009, %ebx, %edi # imm = 0x1010101 -; X86-NOSSE-NEXT: shrl $24, %edi -; X86-NOSSE-NEXT: movl %esi, %ebx -; X86-NOSSE-NEXT: shrl %ebx -; X86-NOSSE-NEXT: andl $1431655765, %ebx # imm = 0x55555555 -; X86-NOSSE-NEXT: subl %ebx, %esi -; X86-NOSSE-NEXT: movl %esi, %ebx -; X86-NOSSE-NEXT: andl $858993459, %ebx # imm = 0x33333333 +; X86-NOSSE-NEXT: andl $-16, %esp +; X86-NOSSE-NEXT: movl 24(%ebp), %eax +; X86-NOSSE-NEXT: movl 32(%ebp), %ecx +; X86-NOSSE-NEXT: movl 36(%ebp), %esi +; X86-NOSSE-NEXT: movl %esi, %edx +; X86-NOSSE-NEXT: shrl %edx +; X86-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X86-NOSSE-NEXT: subl %edx, %esi +; X86-NOSSE-NEXT: movl %esi, %edx +; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333 ; X86-NOSSE-NEXT: shrl $2, %esi ; X86-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333 -; X86-NOSSE-NEXT: addl %ebx, %esi -; X86-NOSSE-NEXT: movl %esi, %ebx -; X86-NOSSE-NEXT: shrl $4, %ebx -; X86-NOSSE-NEXT: addl %esi, %ebx -; X86-NOSSE-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F -; X86-NOSSE-NEXT: imull $16843009, %ebx, %esi # imm = 0x1010101 -; X86-NOSSE-NEXT: shrl $24, %esi -; X86-NOSSE-NEXT: addl %edi, %esi -; X86-NOSSE-NEXT: movl %edx, %edi -; X86-NOSSE-NEXT: shrl %edi -; X86-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555 -; X86-NOSSE-NEXT: subl %edi, %edx -; X86-NOSSE-NEXT: movl %edx, %edi -; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333 -; X86-NOSSE-NEXT: shrl $2, %edx -; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333 -; X86-NOSSE-NEXT: addl %edi, %edx -; X86-NOSSE-NEXT: movl %edx, %edi -; X86-NOSSE-NEXT: shrl $4, %edi -; X86-NOSSE-NEXT: addl %edx, %edi -; X86-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F -; X86-NOSSE-NEXT: imull $16843009, %edi, %edx # imm = 0x1010101 -; X86-NOSSE-NEXT: shrl $24, %edx -; X86-NOSSE-NEXT: movl %ecx, %edi -; X86-NOSSE-NEXT: shrl %edi -; X86-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555 -; X86-NOSSE-NEXT: subl %edi, %ecx -; X86-NOSSE-NEXT: movl %ecx, %edi -; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333 +; X86-NOSSE-NEXT: addl %edx, %esi +; X86-NOSSE-NEXT: movl %esi, %edx +; X86-NOSSE-NEXT: shrl $4, %edx +; X86-NOSSE-NEXT: addl %esi, %edx +; X86-NOSSE-NEXT: movl %ecx, %esi +; X86-NOSSE-NEXT: shrl %esi +; X86-NOSSE-NEXT: andl $1431655765, %esi # imm = 0x55555555 +; X86-NOSSE-NEXT: subl %esi, %ecx +; X86-NOSSE-NEXT: movl %ecx, %esi +; X86-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333 ; X86-NOSSE-NEXT: shrl $2, %ecx ; X86-NOSSE-NEXT: andl $858993459, %ecx # imm = 0x33333333 -; X86-NOSSE-NEXT: addl %edi, %ecx +; X86-NOSSE-NEXT: addl %esi, %ecx ; X86-NOSSE-NEXT: movl %ecx, %edi ; X86-NOSSE-NEXT: shrl $4, %edi ; X86-NOSSE-NEXT: addl %ecx, %edi +; X86-NOSSE-NEXT: movl 28(%ebp), %esi +; X86-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F +; X86-NOSSE-NEXT: imull $16843009, %edx, %edx # imm = 0x1010101 +; X86-NOSSE-NEXT: shrl $24, %edx ; X86-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F ; X86-NOSSE-NEXT: imull $16843009, %edi, %ecx # imm = 0x1010101 ; X86-NOSSE-NEXT: shrl $24, %ecx ; X86-NOSSE-NEXT: addl %edx, %ecx -; X86-NOSSE-NEXT: addl %esi, %ecx -; X86-NOSSE-NEXT: xorl %edx, %edx -; X86-NOSSE-NEXT: movl %edx, 12(%eax) -; X86-NOSSE-NEXT: movl %edx, 8(%eax) -; X86-NOSSE-NEXT: movl %edx, 4(%eax) -; X86-NOSSE-NEXT: movl %ecx, (%eax) +; X86-NOSSE-NEXT: movl %esi, %edx +; X86-NOSSE-NEXT: shrl %edx +; X86-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X86-NOSSE-NEXT: subl %edx, %esi +; X86-NOSSE-NEXT: movl %esi, %edx +; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333 +; X86-NOSSE-NEXT: shrl $2, %esi +; X86-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333 +; X86-NOSSE-NEXT: addl %edx, %esi +; X86-NOSSE-NEXT: movl %esi, %edx +; X86-NOSSE-NEXT: shrl $4, %edx +; X86-NOSSE-NEXT: addl %esi, %edx +; X86-NOSSE-NEXT: movl %eax, %esi +; X86-NOSSE-NEXT: shrl %esi +; X86-NOSSE-NEXT: andl $1431655765, %esi # imm = 0x55555555 +; X86-NOSSE-NEXT: subl %esi, %eax +; X86-NOSSE-NEXT: movl %eax, %esi +; X86-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333 +; X86-NOSSE-NEXT: shrl $2, %eax +; X86-NOSSE-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X86-NOSSE-NEXT: addl %esi, %eax +; X86-NOSSE-NEXT: movl %eax, %esi +; X86-NOSSE-NEXT: shrl $4, %esi +; X86-NOSSE-NEXT: addl %eax, %esi +; X86-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F +; X86-NOSSE-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101 +; X86-NOSSE-NEXT: shrl $24, %eax +; X86-NOSSE-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F +; X86-NOSSE-NEXT: imull $16843009, %esi, %edx # imm = 0x1010101 +; X86-NOSSE-NEXT: shrl $24, %edx +; X86-NOSSE-NEXT: addl %eax, %edx +; X86-NOSSE-NEXT: movl 8(%ebp), %eax +; X86-NOSSE-NEXT: addl %ecx, %edx +; X86-NOSSE-NEXT: xorl %ecx, %ecx +; X86-NOSSE-NEXT: movl %ecx, 12(%eax) +; X86-NOSSE-NEXT: movl %ecx, 8(%eax) +; X86-NOSSE-NEXT: movl %ecx, 4(%eax) +; X86-NOSSE-NEXT: movl %edx, (%eax) +; X86-NOSSE-NEXT: leal -8(%ebp), %esp ; X86-NOSSE-NEXT: popl %esi ; X86-NOSSE-NEXT: popl %edi -; X86-NOSSE-NEXT: popl %ebx +; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl $4 ; ; X64-BASE-LABEL: cnt128_pgso: @@ -1538,13 +1585,17 @@ define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 { ; ; X86-POPCNT-LABEL: cnt128_pgso: ; X86-POPCNT: # %bb.0: +; X86-POPCNT-NEXT: pushl %ebp +; X86-POPCNT-NEXT: movl %esp, %ebp ; X86-POPCNT-NEXT: pushl %esi -; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx -; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx +; X86-POPCNT-NEXT: andl $-16, %esp +; X86-POPCNT-NEXT: subl $16, %esp +; X86-POPCNT-NEXT: movl 8(%ebp), %eax +; X86-POPCNT-NEXT: popcntl 36(%ebp), %ecx +; X86-POPCNT-NEXT: popcntl 32(%ebp), %edx ; X86-POPCNT-NEXT: addl %ecx, %edx -; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx -; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi +; X86-POPCNT-NEXT: popcntl 28(%ebp), %ecx +; X86-POPCNT-NEXT: popcntl 24(%ebp), %esi ; X86-POPCNT-NEXT: addl %ecx, %esi ; X86-POPCNT-NEXT: addl %edx, %esi ; X86-POPCNT-NEXT: xorl %ecx, %ecx @@ -1552,7 +1603,9 @@ define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 { ; X86-POPCNT-NEXT: movl %ecx, 8(%eax) ; X86-POPCNT-NEXT: movl %ecx, 4(%eax) ; X86-POPCNT-NEXT: movl %esi, (%eax) +; X86-POPCNT-NEXT: leal -4(%ebp), %esp ; X86-POPCNT-NEXT: popl %esi +; X86-POPCNT-NEXT: popl %ebp ; X86-POPCNT-NEXT: retl $4 ; ; X64-POPCNT-LABEL: cnt128_pgso: @@ -1599,7 +1652,11 @@ define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 { ; ; X86-SSE2-LABEL: cnt128_pgso: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: andl $-16, %esp +; X86-SSE2-NEXT: subl $16, %esp +; X86-SSE2-NEXT: movl 8(%ebp), %eax ; X86-SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero ; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 ; X86-SSE2-NEXT: psrlw $1, %xmm0 @@ -1642,11 +1699,17 @@ define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 { ; X86-SSE2-NEXT: movl %ecx, 8(%eax) ; X86-SSE2-NEXT: movl %ecx, 4(%eax) ; X86-SSE2-NEXT: movl %edx, (%eax) +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp ; X86-SSE2-NEXT: retl $4 ; ; X86-SSSE3-LABEL: cnt128_pgso: ; X86-SSSE3: # %bb.0: -; X86-SSSE3-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSSE3-NEXT: pushl %ebp +; X86-SSSE3-NEXT: movl %esp, %ebp +; X86-SSSE3-NEXT: andl $-16, %esp +; X86-SSSE3-NEXT: subl $16, %esp +; X86-SSSE3-NEXT: movl 8(%ebp), %eax ; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; X86-SSSE3-NEXT: movq {{.*#+}} xmm2 = mem[0],zero ; X86-SSSE3-NEXT: movdqa %xmm2, %xmm3 @@ -1679,6 +1742,8 @@ define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 { ; X86-SSSE3-NEXT: movl %ecx, 8(%eax) ; X86-SSSE3-NEXT: movl %ecx, 4(%eax) ; X86-SSSE3-NEXT: movl %edx, (%eax) +; X86-SSSE3-NEXT: movl %ebp, %esp +; X86-SSSE3-NEXT: popl %ebp ; X86-SSSE3-NEXT: retl $4 %cnt = tail call i128 @llvm.ctpop.i128(i128 %x) ret i128 %cnt diff --git a/llvm/test/CodeGen/X86/pr149841.ll b/llvm/test/CodeGen/X86/pr149841.ll new file mode 100644 index 0000000..c17a617 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr149841.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.bar = type { [5 x ptr] } + +@global = external dso_local global %struct.bar + +define i1 @foo(ptr %arg, i1 %arg1) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %bb +; CHECK-NEXT: cmpq $global+1, %rdi +; CHECK-NEXT: setne %al +; CHECK-NEXT: andb %sil, %al +; CHECK-NEXT: retq +bb: + #dbg_value(ptr @global, !3, !DIExpression(), !5) + %icmp = icmp ne ptr %arg, getelementptr inbounds nuw (i8, ptr @global, i64 1) + %select = select i1 %arg1, i1 %icmp, i1 false + ret i1 %select +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2} + +!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug) +!1 = !DIFile(filename: "x.c", directory: "/proc/self/cwd") +!2 = !{i32 2, !"Debug Info Version", i32 3} +!3 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !1) +!4 = distinct !DISubprogram(name: "x", scope: null, file: !1, spFlags: DISPFlagDefinition, unit: !0) +!5 = !DILocation(line: 0, scope: !4) + diff --git a/llvm/test/CodeGen/X86/pr33960.ll b/llvm/test/CodeGen/X86/pr33960.ll index 44fe777..6ee270e 100644 --- a/llvm/test/CodeGen/X86/pr33960.ll +++ b/llvm/test/CodeGen/X86/pr33960.ll @@ -7,12 +7,10 @@ define void @PR33960() { ; X86-LABEL: PR33960: ; X86: # %bb.0: # %entry -; X86-NEXT: movl $-1, b ; X86-NEXT: retl ; ; X64-LABEL: PR33960: ; X64: # %bb.0: # %entry -; X64-NEXT: movl $-1, b(%rip) ; X64-NEXT: retq entry: %tmp = insertelement <4 x i32> <i32 undef, i32 -7, i32 -3, i32 undef>, i32 -2, i32 3 diff --git a/llvm/test/CodeGen/X86/pr46004.ll b/llvm/test/CodeGen/X86/pr46004.ll index f7c7da0..829d6df 100644 --- a/llvm/test/CodeGen/X86/pr46004.ll +++ b/llvm/test/CodeGen/X86/pr46004.ll @@ -6,7 +6,17 @@ define void @fuzz22357(i128 %a0) { ; X86-LABEL: fuzz22357: ; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp ; X86-NEXT: movb $0, (%eax) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 ; X86-NEXT: retl ; ; X64-LABEL: fuzz22357: @@ -24,6 +34,15 @@ define void @fuzz22357(i128 %a0) { define void @fuzz22723(i128 %a0) { ; X86-LABEL: fuzz22723: ; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 ; X86-NEXT: retl ; ; X64-LABEL: fuzz22723: diff --git a/llvm/test/CodeGen/X86/pr62286.ll b/llvm/test/CodeGen/X86/pr62286.ll index 2d1b7fc..9728e13 100644 --- a/llvm/test/CodeGen/X86/pr62286.ll +++ b/llvm/test/CodeGen/X86/pr62286.ll @@ -42,10 +42,10 @@ define i64 @PR62286(i32 %a) { ; AVX2-LABEL: PR62286: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovd %edi, %xmm0 -; AVX2-NEXT: vpaddd %xmm0, %xmm0, %xmm1 -; AVX2-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3] -; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; AVX2-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3] +; AVX2-NEXT: vpaddd %xmm0, %xmm0, %xmm0 +; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/pseudo-probe-desc-check.ll b/llvm/test/CodeGen/X86/pseudo-probe-desc-check.ll new file mode 100644 index 0000000..841061c --- /dev/null +++ b/llvm/test/CodeGen/X86/pseudo-probe-desc-check.ll @@ -0,0 +1,47 @@ +; REQUIRES: asserts +; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s -o /dev/null 2>&1 | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-windows-msvc < %s -o /dev/null 2>&1 | FileCheck %s + +; CHECK: warning: Guid:8314849053352128226 Name:inlinee does not exist in pseudo probe desc +; CHECK: warning: Guid:6492337042787843907 Name:extract2 does not exist in pseudo probe desc + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" + +define void @extract1() !dbg !8 { +entry: + call void @llvm.pseudoprobe(i64 6028998432455395745, i64 1, i32 0, i64 -1), !dbg !11 + call void @llvm.pseudoprobe(i64 8314849053352128226, i64 1, i32 0, i64 -1), !dbg !12 + ret void, !dbg !16 +} + +define void @extract2() !dbg !17 { +entry: + call void @llvm.pseudoprobe(i64 6492337042787843907, i64 1, i32 0, i64 -1), !dbg !18 + ret void, !dbg !18 +} + +declare void @llvm.pseudoprobe(i64, i64, i32, i64) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5, !6} +!llvm.pseudo_probe_desc = !{!7} + +!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang", isOptimized: false, emissionKind: LineTablesOnly, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.c", directory: "/home/foo") +!2 = !{i32 7, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 7, !"uwtable", i32 2} +!6 = !{i32 7, !"frame-pointer", i32 2} +!7 = !{i64 6028998432455395745, i64 281479271677951, !"extract1"} +!8 = distinct !DISubprogram(name: "extract1", scope: !1, file: !1, line: 4, type: !9, scopeLine: 4, spFlags: DISPFlagDefinition, unit: !0) +!9 = !DISubroutineType(types: !10) +!10 = !{} +!11 = !DILocation(line: 5, column: 3, scope: !8) +!12 = !DILocation(line: 2, column: 1, scope: !13, inlinedAt: !14) +!13 = distinct !DISubprogram(name: "inlinee", scope: !1, file: !1, line: 1, type: !9, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0) +!14 = distinct !DILocation(line: 5, column: 3, scope: !15) +!15 = !DILexicalBlockFile(scope: !8, file: !1, discriminator: 455082007) +!16 = !DILocation(line: 6, column: 1, scope: !8) +!17 = distinct !DISubprogram(name: "extract2", scope: !1, file: !1, line: 8, type: !9, scopeLine: 8, spFlags: DISPFlagDefinition, unit: !0) +!18 = !DILocation(line: 9, column: 1, scope: !17) diff --git a/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll b/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll index 50a967e..ce9723b 100644 --- a/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll +++ b/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll @@ -762,11 +762,15 @@ define i32 @x_to_s32(x86_fp80 %a) nounwind { define i32 @t_to_u32(fp128 %a) nounwind { ; X86-AVX512-WIN-LABEL: t_to_u32: ; X86-AVX512-WIN: # %bb.0: -; X86-AVX512-WIN-NEXT: subl $16, %esp -; X86-AVX512-WIN-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0 +; X86-AVX512-WIN-NEXT: pushl %ebp +; X86-AVX512-WIN-NEXT: movl %esp, %ebp +; X86-AVX512-WIN-NEXT: andl $-16, %esp +; X86-AVX512-WIN-NEXT: subl $32, %esp +; X86-AVX512-WIN-NEXT: vmovups 8(%ebp), %xmm0 ; X86-AVX512-WIN-NEXT: vmovups %xmm0, (%esp) ; X86-AVX512-WIN-NEXT: calll ___fixunstfsi -; X86-AVX512-WIN-NEXT: addl $16, %esp +; X86-AVX512-WIN-NEXT: movl %ebp, %esp +; X86-AVX512-WIN-NEXT: popl %ebp ; X86-AVX512-WIN-NEXT: retl ; ; X86-AVX512-LIN-LABEL: t_to_u32: @@ -797,12 +801,18 @@ define i32 @t_to_u32(fp128 %a) nounwind { ; ; X86-SSE-WIN-LABEL: t_to_u32: ; X86-SSE-WIN: # %bb.0: -; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp) +; X86-SSE-WIN-NEXT: pushl %ebp +; X86-SSE-WIN-NEXT: movl %esp, %ebp +; X86-SSE-WIN-NEXT: andl $-16, %esp +; X86-SSE-WIN-NEXT: subl $16, %esp +; X86-SSE-WIN-NEXT: pushl 20(%ebp) +; X86-SSE-WIN-NEXT: pushl 16(%ebp) +; X86-SSE-WIN-NEXT: pushl 12(%ebp) +; X86-SSE-WIN-NEXT: pushl 8(%ebp) ; X86-SSE-WIN-NEXT: calll ___fixunstfsi ; X86-SSE-WIN-NEXT: addl $16, %esp +; X86-SSE-WIN-NEXT: movl %ebp, %esp +; X86-SSE-WIN-NEXT: popl %ebp ; X86-SSE-WIN-NEXT: retl ; ; X86-SSE-LIN-LABEL: t_to_u32: @@ -835,12 +845,18 @@ define i32 @t_to_u32(fp128 %a) nounwind { ; ; X87-WIN-LABEL: t_to_u32: ; X87-WIN: # %bb.0: -; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp) -; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp) -; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp) -; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp) +; X87-WIN-NEXT: pushl %ebp +; X87-WIN-NEXT: movl %esp, %ebp +; X87-WIN-NEXT: andl $-16, %esp +; X87-WIN-NEXT: subl $16, %esp +; X87-WIN-NEXT: pushl 20(%ebp) +; X87-WIN-NEXT: pushl 16(%ebp) +; X87-WIN-NEXT: pushl 12(%ebp) +; X87-WIN-NEXT: pushl 8(%ebp) ; X87-WIN-NEXT: calll ___fixunstfsi ; X87-WIN-NEXT: addl $16, %esp +; X87-WIN-NEXT: movl %ebp, %esp +; X87-WIN-NEXT: popl %ebp ; X87-WIN-NEXT: retl ; ; X87-LIN-LABEL: t_to_u32: @@ -860,11 +876,15 @@ define i32 @t_to_u32(fp128 %a) nounwind { define i32 @t_to_s32(fp128 %a) nounwind { ; X86-AVX512-WIN-LABEL: t_to_s32: ; X86-AVX512-WIN: # %bb.0: -; X86-AVX512-WIN-NEXT: subl $16, %esp -; X86-AVX512-WIN-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0 +; X86-AVX512-WIN-NEXT: pushl %ebp +; X86-AVX512-WIN-NEXT: movl %esp, %ebp +; X86-AVX512-WIN-NEXT: andl $-16, %esp +; X86-AVX512-WIN-NEXT: subl $32, %esp +; X86-AVX512-WIN-NEXT: vmovups 8(%ebp), %xmm0 ; X86-AVX512-WIN-NEXT: vmovups %xmm0, (%esp) ; X86-AVX512-WIN-NEXT: calll ___fixtfsi -; X86-AVX512-WIN-NEXT: addl $16, %esp +; X86-AVX512-WIN-NEXT: movl %ebp, %esp +; X86-AVX512-WIN-NEXT: popl %ebp ; X86-AVX512-WIN-NEXT: retl ; ; X86-AVX512-LIN-LABEL: t_to_s32: @@ -895,12 +915,18 @@ define i32 @t_to_s32(fp128 %a) nounwind { ; ; X86-SSE-WIN-LABEL: t_to_s32: ; X86-SSE-WIN: # %bb.0: -; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp) +; X86-SSE-WIN-NEXT: pushl %ebp +; X86-SSE-WIN-NEXT: movl %esp, %ebp +; X86-SSE-WIN-NEXT: andl $-16, %esp +; X86-SSE-WIN-NEXT: subl $16, %esp +; X86-SSE-WIN-NEXT: pushl 20(%ebp) +; X86-SSE-WIN-NEXT: pushl 16(%ebp) +; X86-SSE-WIN-NEXT: pushl 12(%ebp) +; X86-SSE-WIN-NEXT: pushl 8(%ebp) ; X86-SSE-WIN-NEXT: calll ___fixtfsi ; X86-SSE-WIN-NEXT: addl $16, %esp +; X86-SSE-WIN-NEXT: movl %ebp, %esp +; X86-SSE-WIN-NEXT: popl %ebp ; X86-SSE-WIN-NEXT: retl ; ; X86-SSE-LIN-LABEL: t_to_s32: @@ -933,12 +959,18 @@ define i32 @t_to_s32(fp128 %a) nounwind { ; ; X87-WIN-LABEL: t_to_s32: ; X87-WIN: # %bb.0: -; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp) -; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp) -; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp) -; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp) +; X87-WIN-NEXT: pushl %ebp +; X87-WIN-NEXT: movl %esp, %ebp +; X87-WIN-NEXT: andl $-16, %esp +; X87-WIN-NEXT: subl $16, %esp +; X87-WIN-NEXT: pushl 20(%ebp) +; X87-WIN-NEXT: pushl 16(%ebp) +; X87-WIN-NEXT: pushl 12(%ebp) +; X87-WIN-NEXT: pushl 8(%ebp) ; X87-WIN-NEXT: calll ___fixtfsi ; X87-WIN-NEXT: addl $16, %esp +; X87-WIN-NEXT: movl %ebp, %esp +; X87-WIN-NEXT: popl %ebp ; X87-WIN-NEXT: retl ; ; X87-LIN-LABEL: t_to_s32: diff --git a/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll b/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll index f516db8..3287869 100644 --- a/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll +++ b/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll @@ -1417,11 +1417,15 @@ define i64 @x_to_s64(x86_fp80 %a) nounwind { define i64 @t_to_u64(fp128 %a) nounwind { ; X86-AVX512-WIN-LABEL: t_to_u64: ; X86-AVX512-WIN: # %bb.0: -; X86-AVX512-WIN-NEXT: subl $16, %esp -; X86-AVX512-WIN-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0 +; X86-AVX512-WIN-NEXT: pushl %ebp +; X86-AVX512-WIN-NEXT: movl %esp, %ebp +; X86-AVX512-WIN-NEXT: andl $-16, %esp +; X86-AVX512-WIN-NEXT: subl $32, %esp +; X86-AVX512-WIN-NEXT: vmovups 8(%ebp), %xmm0 ; X86-AVX512-WIN-NEXT: vmovups %xmm0, (%esp) ; X86-AVX512-WIN-NEXT: calll ___fixunstfdi -; X86-AVX512-WIN-NEXT: addl $16, %esp +; X86-AVX512-WIN-NEXT: movl %ebp, %esp +; X86-AVX512-WIN-NEXT: popl %ebp ; X86-AVX512-WIN-NEXT: retl ; ; X86-AVX512-LIN-LABEL: t_to_u64: @@ -1452,12 +1456,18 @@ define i64 @t_to_u64(fp128 %a) nounwind { ; ; X86-SSE-WIN-LABEL: t_to_u64: ; X86-SSE-WIN: # %bb.0: -; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp) +; X86-SSE-WIN-NEXT: pushl %ebp +; X86-SSE-WIN-NEXT: movl %esp, %ebp +; X86-SSE-WIN-NEXT: andl $-16, %esp +; X86-SSE-WIN-NEXT: subl $16, %esp +; X86-SSE-WIN-NEXT: pushl 20(%ebp) +; X86-SSE-WIN-NEXT: pushl 16(%ebp) +; X86-SSE-WIN-NEXT: pushl 12(%ebp) +; X86-SSE-WIN-NEXT: pushl 8(%ebp) ; X86-SSE-WIN-NEXT: calll ___fixunstfdi ; X86-SSE-WIN-NEXT: addl $16, %esp +; X86-SSE-WIN-NEXT: movl %ebp, %esp +; X86-SSE-WIN-NEXT: popl %ebp ; X86-SSE-WIN-NEXT: retl ; ; X86-SSE-LIN-LABEL: t_to_u64: @@ -1490,12 +1500,18 @@ define i64 @t_to_u64(fp128 %a) nounwind { ; ; X87-WIN-LABEL: t_to_u64: ; X87-WIN: # %bb.0: -; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp) -; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp) -; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp) -; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp) +; X87-WIN-NEXT: pushl %ebp +; X87-WIN-NEXT: movl %esp, %ebp +; X87-WIN-NEXT: andl $-16, %esp +; X87-WIN-NEXT: subl $16, %esp +; X87-WIN-NEXT: pushl 20(%ebp) +; X87-WIN-NEXT: pushl 16(%ebp) +; X87-WIN-NEXT: pushl 12(%ebp) +; X87-WIN-NEXT: pushl 8(%ebp) ; X87-WIN-NEXT: calll ___fixunstfdi ; X87-WIN-NEXT: addl $16, %esp +; X87-WIN-NEXT: movl %ebp, %esp +; X87-WIN-NEXT: popl %ebp ; X87-WIN-NEXT: retl ; ; X87-LIN-LABEL: t_to_u64: @@ -1515,11 +1531,15 @@ define i64 @t_to_u64(fp128 %a) nounwind { define i64 @t_to_s64(fp128 %a) nounwind { ; X86-AVX512-WIN-LABEL: t_to_s64: ; X86-AVX512-WIN: # %bb.0: -; X86-AVX512-WIN-NEXT: subl $16, %esp -; X86-AVX512-WIN-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0 +; X86-AVX512-WIN-NEXT: pushl %ebp +; X86-AVX512-WIN-NEXT: movl %esp, %ebp +; X86-AVX512-WIN-NEXT: andl $-16, %esp +; X86-AVX512-WIN-NEXT: subl $32, %esp +; X86-AVX512-WIN-NEXT: vmovups 8(%ebp), %xmm0 ; X86-AVX512-WIN-NEXT: vmovups %xmm0, (%esp) ; X86-AVX512-WIN-NEXT: calll ___fixtfdi -; X86-AVX512-WIN-NEXT: addl $16, %esp +; X86-AVX512-WIN-NEXT: movl %ebp, %esp +; X86-AVX512-WIN-NEXT: popl %ebp ; X86-AVX512-WIN-NEXT: retl ; ; X86-AVX512-LIN-LABEL: t_to_s64: @@ -1550,12 +1570,18 @@ define i64 @t_to_s64(fp128 %a) nounwind { ; ; X86-SSE-WIN-LABEL: t_to_s64: ; X86-SSE-WIN: # %bb.0: -; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp) -; X86-SSE-WIN-NEXT: pushl {{[0-9]+}}(%esp) +; X86-SSE-WIN-NEXT: pushl %ebp +; X86-SSE-WIN-NEXT: movl %esp, %ebp +; X86-SSE-WIN-NEXT: andl $-16, %esp +; X86-SSE-WIN-NEXT: subl $16, %esp +; X86-SSE-WIN-NEXT: pushl 20(%ebp) +; X86-SSE-WIN-NEXT: pushl 16(%ebp) +; X86-SSE-WIN-NEXT: pushl 12(%ebp) +; X86-SSE-WIN-NEXT: pushl 8(%ebp) ; X86-SSE-WIN-NEXT: calll ___fixtfdi ; X86-SSE-WIN-NEXT: addl $16, %esp +; X86-SSE-WIN-NEXT: movl %ebp, %esp +; X86-SSE-WIN-NEXT: popl %ebp ; X86-SSE-WIN-NEXT: retl ; ; X86-SSE-LIN-LABEL: t_to_s64: @@ -1588,12 +1614,18 @@ define i64 @t_to_s64(fp128 %a) nounwind { ; ; X87-WIN-LABEL: t_to_s64: ; X87-WIN: # %bb.0: -; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp) -; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp) -; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp) -; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp) +; X87-WIN-NEXT: pushl %ebp +; X87-WIN-NEXT: movl %esp, %ebp +; X87-WIN-NEXT: andl $-16, %esp +; X87-WIN-NEXT: subl $16, %esp +; X87-WIN-NEXT: pushl 20(%ebp) +; X87-WIN-NEXT: pushl 16(%ebp) +; X87-WIN-NEXT: pushl 12(%ebp) +; X87-WIN-NEXT: pushl 8(%ebp) ; X87-WIN-NEXT: calll ___fixtfdi ; X87-WIN-NEXT: addl $16, %esp +; X87-WIN-NEXT: movl %ebp, %esp +; X87-WIN-NEXT: popl %ebp ; X87-WIN-NEXT: retl ; ; X87-LIN-LABEL: t_to_s64: diff --git a/llvm/test/CodeGen/X86/scmp.ll b/llvm/test/CodeGen/X86/scmp.ll index 87491362..8a28722 100644 --- a/llvm/test/CodeGen/X86/scmp.ll +++ b/llvm/test/CodeGen/X86/scmp.ll @@ -118,30 +118,33 @@ define i8 @scmp.8.128(i128 %x, i128 %y) nounwind { ; X86-LABEL: scmp.8.128: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %ebx, %ebp -; X86-NEXT: sbbl %edx, %ebp -; X86-NEXT: movl %ecx, %ebp -; X86-NEXT: sbbl %eax, %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %esi, %ecx -; X86-NEXT: sbbl %ebp, %ecx +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 24(%ebp), %ecx +; X86-NEXT: movl 28(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %edi +; X86-NEXT: cmpl %ecx, 8(%ebp) +; X86-NEXT: sbbl %eax, %edi +; X86-NEXT: movl 32(%ebp), %edi +; X86-NEXT: movl 16(%ebp), %ebx +; X86-NEXT: sbbl %edi, %ebx +; X86-NEXT: movl 36(%ebp), %ebx +; X86-NEXT: movl 20(%ebp), %edx +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: sbbl %ebx, %ecx ; X86-NEXT: setl %cl -; X86-NEXT: cmpl %edi, {{[0-9]+}}(%esp) -; X86-NEXT: sbbl %ebx, %edx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: sbbl %esi, %ebp +; X86-NEXT: movl 24(%ebp), %esi +; X86-NEXT: cmpl 8(%ebp), %esi +; X86-NEXT: sbbl 12(%ebp), %eax +; X86-NEXT: sbbl 16(%ebp), %edi +; X86-NEXT: sbbl %edx, %ebx ; X86-NEXT: setl %al ; X86-NEXT: subb %cl, %al +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx diff --git a/llvm/test/CodeGen/X86/sdiv_fix.ll b/llvm/test/CodeGen/X86/sdiv_fix.ll index 4925f8b..392bc83 100644 --- a/llvm/test/CodeGen/X86/sdiv_fix.ll +++ b/llvm/test/CodeGen/X86/sdiv_fix.ll @@ -307,69 +307,70 @@ define i64 @func5(i64 %x, i64 %y) nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-16, %esp -; X86-NEXT: subl $80, %esp +; X86-NEXT: subl $112, %esp ; X86-NEXT: movl 8(%ebp), %ecx -; X86-NEXT: movl 12(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %edi +; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl 20(%ebp), %edx -; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: sarl $31, %ebx +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, %esi ; X86-NEXT: sarl $31, %esi -; X86-NEXT: movl %eax, %edi -; X86-NEXT: sarl $31, %edi -; X86-NEXT: movl %edi, %ebx -; X86-NEXT: shldl $31, %eax, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: shldl $31, %ecx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: shldl $31, %edi, %esi +; X86-NEXT: shldl $31, %ecx, %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) ; X86-NEXT: shll $31, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: pushl %esi -; X86-NEXT: pushl %esi -; X86-NEXT: pushl %edx -; X86-NEXT: pushl 16(%ebp) -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %eax -; X86-NEXT: pushl %ecx -; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl %eax +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) ; X86-NEXT: calll __divti3 -; X86-NEXT: addl $32, %esp -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: subl $1, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl $0, %ebx -; X86-NEXT: testl %esi, %esi -; X86-NEXT: sets %al -; X86-NEXT: testl %edi, %edi -; X86-NEXT: sets %cl -; X86-NEXT: xorb %al, %cl -; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-NEXT: subl $4, %esp +; X86-NEXT: movl 20(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl %esi -; X86-NEXT: pushl %esi -; X86-NEXT: pushl 20(%ebp) -; X86-NEXT: pushl 16(%ebp) -; X86-NEXT: pushl %edi -; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: subl $1, %esi +; X86-NEXT: sbbl $0, %edi +; X86-NEXT: testl %ebx, %ebx +; X86-NEXT: sets %al +; X86-NEXT: testl %ecx, %ecx +; X86-NEXT: sets %bl +; X86-NEXT: xorb %al, %bl ; X86-NEXT: calll __modti3 -; X86-NEXT: addl $32, %esp +; X86-NEXT: subl $4, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: orl %eax, %ecx ; X86-NEXT: setne %al -; X86-NEXT: testb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT: movl %ebx, %edx +; X86-NEXT: testb %bl, %al +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: movl %esi, %eax +; X86-NEXT: movl %edi, %edx ; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi diff --git a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll index e7727a0..7df490f 100644 --- a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll +++ b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll @@ -370,67 +370,68 @@ define i64 @func5(i64 %x, i64 %y) nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-16, %esp -; X86-NEXT: subl $96, %esp -; X86-NEXT: movl 8(%ebp), %ecx -; X86-NEXT: movl 12(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %esi -; X86-NEXT: movl %esi, %ebx -; X86-NEXT: sarl $31, %ebx -; X86-NEXT: movl %eax, %edx -; X86-NEXT: sarl $31, %edx -; X86-NEXT: movl %edx, %edi -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: shldl $31, %eax, %edi -; X86-NEXT: shldl $31, %ecx, %eax +; X86-NEXT: subl $128, %esp +; X86-NEXT: movl 8(%ebp), %esi +; X86-NEXT: movl 12(%ebp), %edi +; X86-NEXT: movl 16(%ebp), %ecx +; X86-NEXT: movl 20(%ebp), %edx +; X86-NEXT: movl %edx, %eax +; X86-NEXT: sarl $31, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: shll $31, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %esi -; X86-NEXT: pushl 16(%ebp) -; X86-NEXT: pushl %edx -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %eax -; X86-NEXT: pushl %ecx +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, %ebx +; X86-NEXT: sarl $31, %ebx +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: shldl $31, %edi, %ebx +; X86-NEXT: shldl $31, %esi, %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: shll $31, %esi +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) ; X86-NEXT: calll __divti3 -; X86-NEXT: addl $32, %esp +; X86-NEXT: subl $4, %esp +; X86-NEXT: movl 20(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: subl $1, %esi -; X86-NEXT: sbbl $0, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: subl $1, %edi ; X86-NEXT: sbbl $0, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl $0, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: testl %ebx, %ebx -; X86-NEXT: sets %al -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, %ebx +; X86-NEXT: sbbl $0, %ebx ; X86-NEXT: testl %ecx, %ecx -; X86-NEXT: sets %dl -; X86-NEXT: xorb %al, %dl -; X86-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl 20(%ebp) -; X86-NEXT: pushl 16(%ebp) -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl %edi -; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: pushl %eax +; X86-NEXT: sets %al +; X86-NEXT: testl %edx, %edx +; X86-NEXT: sets %cl +; X86-NEXT: xorb %al, %cl +; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; X86-NEXT: calll __modti3 -; X86-NEXT: addl $32, %esp +; X86-NEXT: subl $4, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx @@ -438,41 +439,38 @@ define i64 @func5(i64 %x, i64 %y) nounwind { ; X86-NEXT: orl %eax, %ecx ; X86-NEXT: setne %al ; X86-NEXT: testb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: cmovel %esi, %ebx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT: cmpl $-1, %esi -; X86-NEXT: movl %eax, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: cmpl $-1, %edi ; X86-NEXT: sbbl $2147483647, %ecx # imm = 0x7FFFFFFF -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: movl %edx, %ebx +; X86-NEXT: movl %eax, %ecx ; X86-NEXT: sbbl $0, %ecx -; X86-NEXT: movl %edi, %ecx +; X86-NEXT: movl %ebx, %ecx ; X86-NEXT: sbbl $0, %ecx ; X86-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF -; X86-NEXT: cmovll %eax, %edx +; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X86-NEXT: movl $0, %ecx -; X86-NEXT: cmovgel %ecx, %edi -; X86-NEXT: movl %edi, %eax ; X86-NEXT: cmovgel %ecx, %ebx +; X86-NEXT: cmovgel %ecx, %eax ; X86-NEXT: movl $-1, %ecx -; X86-NEXT: cmovgel %ecx, %esi -; X86-NEXT: movl %esi, %edi -; X86-NEXT: negl %edi -; X86-NEXT: movl $-2147483648, %edi # imm = 0x80000000 -; X86-NEXT: sbbl %edx, %edi -; X86-NEXT: movl $-1, %edi -; X86-NEXT: sbbl %ebx, %edi -; X86-NEXT: sbbl %eax, %ecx +; X86-NEXT: cmovgel %ecx, %edi +; X86-NEXT: movl %edi, %esi +; X86-NEXT: negl %esi +; X86-NEXT: movl $-2147483648, %esi # imm = 0x80000000 +; X86-NEXT: sbbl %edx, %esi +; X86-NEXT: movl $-1, %esi +; X86-NEXT: sbbl %eax, %esi +; X86-NEXT: sbbl %ebx, %ecx ; X86-NEXT: movl $0, %eax -; X86-NEXT: cmovgel %eax, %esi +; X86-NEXT: cmovgel %eax, %edi ; X86-NEXT: movl $-2147483648, %eax # imm = 0x80000000 ; X86-NEXT: cmovgel %eax, %edx -; X86-NEXT: movl %esi, %eax +; X86-NEXT: movl %edi, %eax ; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi @@ -805,137 +803,155 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-16, %esp -; X86-NEXT: subl $208, %esp -; X86-NEXT: movl 36(%ebp), %esi +; X86-NEXT: subl $240, %esp +; X86-NEXT: movl 12(%ebp), %esi +; X86-NEXT: movl 20(%ebp), %edi ; X86-NEXT: movl 16(%ebp), %ebx -; X86-NEXT: movl 32(%ebp), %eax -; X86-NEXT: movl %eax, %edi -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: sarl $31, %edi -; X86-NEXT: movl %ebx, %edx -; X86-NEXT: sarl $31, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 32(%ebp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: sarl $31, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: sarl $31, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: leal (%ebx,%ebx), %eax ; X86-NEXT: shrl $31, %ebx ; X86-NEXT: shldl $31, %eax, %ebx -; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl %edx -; X86-NEXT: pushl %edx -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl $0 -; X86-NEXT: pushl %eax +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: calll __modti3 -; X86-NEXT: addl $32, %esp -; X86-NEXT: sarl $31, %esi -; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: subl $4, %esp +; X86-NEXT: movl 36(%ebp), %ecx ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: sarl $31, %eax -; X86-NEXT: leal (%ecx,%ecx), %edx -; X86-NEXT: shrl $31, %ecx -; X86-NEXT: shldl $31, %edx, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: leal {{[0-9]+}}(%esp), %edx -; X86-NEXT: pushl %esi -; X86-NEXT: pushl %esi -; X86-NEXT: pushl %esi -; X86-NEXT: pushl 36(%ebp) ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: pushl %eax -; X86-NEXT: pushl %eax -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl $0 -; X86-NEXT: pushl %edx +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, %eax +; X86-NEXT: sarl $31, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: leal (%edi,%edi), %eax +; X86-NEXT: shrl $31, %edi +; X86-NEXT: shldl $31, %eax, %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: calll __divti3 -; X86-NEXT: addl $32, %esp +; X86-NEXT: subl $4, %esp +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 32(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %edi -; X86-NEXT: pushl 32(%ebp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl $0 -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: calll __divti3 -; X86-NEXT: addl $32, %esp +; X86-NEXT: subl $4, %esp +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 36(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: pushl %esi -; X86-NEXT: pushl %esi -; X86-NEXT: pushl %esi -; X86-NEXT: pushl 36(%ebp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %edi -; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: pushl $0 -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: calll __modti3 -; X86-NEXT: addl $32, %esp -; X86-NEXT: movl 28(%ebp), %edx -; X86-NEXT: movl %edx, %ebx -; X86-NEXT: sarl $31, %ebx -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: movl %ecx, %edi +; X86-NEXT: subl $4, %esp +; X86-NEXT: movl 28(%ebp), %eax +; X86-NEXT: movl %eax, %edi ; X86-NEXT: sarl $31, %edi -; X86-NEXT: leal (%ecx,%ecx), %eax -; X86-NEXT: shrl $31, %ecx -; X86-NEXT: shldl $31, %eax, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %esi, %ebx +; X86-NEXT: sarl $31, %ebx +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edx -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl $0 -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: leal (%esi,%esi), %eax +; X86-NEXT: shrl $31, %esi +; X86-NEXT: shldl $31, %eax, %esi +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: calll __modti3 -; X86-NEXT: addl $32, %esp -; X86-NEXT: movl 40(%ebp), %esi -; X86-NEXT: sarl $31, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: subl $4, %esp +; X86-NEXT: movl 40(%ebp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: sarl $31, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl 24(%ebp), %ecx ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: sarl $31, %eax -; X86-NEXT: leal (%ecx,%ecx), %edx +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: leal (%ecx,%ecx), %eax ; X86-NEXT: shrl $31, %ecx -; X86-NEXT: shldl $31, %edx, %ecx +; X86-NEXT: shldl $31, %eax, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: leal {{[0-9]+}}(%esp), %edx -; X86-NEXT: pushl %esi -; X86-NEXT: pushl %esi -; X86-NEXT: pushl %esi -; X86-NEXT: pushl 40(%ebp) -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: pushl %eax -; X86-NEXT: pushl %eax -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl $0 -; X86-NEXT: pushl %edx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: calll __divti3 -; X86-NEXT: addl $32, %esp +; X86-NEXT: subl $4, %esp +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl 28(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl 28(%ebp) -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %edi -; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: pushl $0 -; X86-NEXT: pushl %eax +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: calll __divti3 -; X86-NEXT: addl $32, %esp +; X86-NEXT: subl $4, %esp +; X86-NEXT: movl 40(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: subl $1, %eax @@ -949,18 +965,18 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl $0, %edx +; X86-NEXT: testl %edi, %edi +; X86-NEXT: sets {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X86-NEXT: testl %ebx, %ebx ; X86-NEXT: sets %bl -; X86-NEXT: testl %edi, %edi -; X86-NEXT: sets %bh -; X86-NEXT: xorb %bl, %bh +; X86-NEXT: xorb {{[-0-9]+}}(%e{{[sb]}}p), %bl # 1-byte Folded Reload ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: orl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: orl {{[0-9]+}}(%esp), %esi ; X86-NEXT: orl %edi, %esi -; X86-NEXT: setne %bl -; X86-NEXT: testb %bh, %bl +; X86-NEXT: setne %bh +; X86-NEXT: testb %bl, %bh ; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload @@ -1107,36 +1123,24 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: subl $1, %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl $0, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl $0, %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl $0, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl $0, %esi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: testl %ecx, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl $0, %edi +; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X86-NEXT: sets %al -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: testl %edx, %edx -; X86-NEXT: sets %ah -; X86-NEXT: xorb %al, %ah -; X86-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl 40(%ebp) -; X86-NEXT: pushl %edx -; X86-NEXT: pushl %edx -; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: pushl $0 -; X86-NEXT: pushl %eax +; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: sets %cl +; X86-NEXT: xorb %al, %cl +; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; X86-NEXT: calll __modti3 -; X86-NEXT: addl $32, %esp +; X86-NEXT: subl $4, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx @@ -1144,38 +1148,38 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-NEXT: orl %eax, %ecx ; X86-NEXT: setne %al ; X86-NEXT: testb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X86-NEXT: cmpl $-1, %ebx -; X86-NEXT: movl %edi, %eax +; X86-NEXT: movl %esi, %eax ; X86-NEXT: sbbl $0, %eax ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: sbbl $0, %eax -; X86-NEXT: movl %esi, %eax +; X86-NEXT: movl %edi, %eax ; X86-NEXT: sbbl $0, %eax ; X86-NEXT: movl $0, %eax -; X86-NEXT: cmovgel %eax, %esi -; X86-NEXT: cmovgel %eax, %ecx ; X86-NEXT: cmovgel %eax, %edi +; X86-NEXT: cmovgel %eax, %ecx +; X86-NEXT: cmovgel %eax, %esi ; X86-NEXT: movl $-1, %edx ; X86-NEXT: cmovgel %edx, %ebx ; X86-NEXT: movl %ebx, %eax ; X86-NEXT: negl %eax ; X86-NEXT: movl $-1, %eax -; X86-NEXT: sbbl %edi, %eax +; X86-NEXT: sbbl %esi, %eax ; X86-NEXT: movl $-1, %eax ; X86-NEXT: sbbl %ecx, %eax ; X86-NEXT: movl $-1, %eax -; X86-NEXT: sbbl %esi, %eax +; X86-NEXT: sbbl %edi, %eax ; X86-NEXT: movl $0, %eax ; X86-NEXT: cmovgel %eax, %ebx -; X86-NEXT: cmovgel %edx, %edi -; X86-NEXT: shldl $31, %ebx, %edi +; X86-NEXT: cmovgel %edx, %esi +; X86-NEXT: shldl $31, %ebx, %esi ; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl %edi, 12(%eax) +; X86-NEXT: movl %esi, 12(%eax) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: movl %ecx, 8(%eax) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload diff --git a/llvm/test/CodeGen/X86/seh-catch-all.ll b/llvm/test/CodeGen/X86/seh-catch-all.ll index 5250bb9..4e25aab 100644 --- a/llvm/test/CodeGen/X86/seh-catch-all.ll +++ b/llvm/test/CodeGen/X86/seh-catch-all.ll @@ -40,7 +40,7 @@ catchall: ; CHECK-NEXT: .long (.Llsda_end0-.Llsda_begin0)/16 ; CHECK-NEXT: .Llsda_begin0: ; CHECK-NEXT: .long .Ltmp{{[0-9]+}}@IMGREL -; CHECK-NEXT: .long .Ltmp{{[0-9]+}}@IMGREL+1 +; CHECK-NEXT: .long .Ltmp{{[0-9]+}}@IMGREL ; CHECK-NEXT: .long 1 ; CHECK-NEXT: .long .LBB0_2@IMGREL ; CHECK-NEXT: .Llsda_end0: diff --git a/llvm/test/CodeGen/X86/seh-catchpad.ll b/llvm/test/CodeGen/X86/seh-catchpad.ll index d958580..cb85f39 100644 --- a/llvm/test/CodeGen/X86/seh-catchpad.ll +++ b/llvm/test/CodeGen/X86/seh-catchpad.ll @@ -123,23 +123,23 @@ __except.ret: ; preds = %catch.dispatch.7 ; CHECK-NEXT: .long (.Llsda_end0-.Llsda_begin0)/16 ; CHECK-NEXT: .Llsda_begin0: ; CHECK-NEXT: .long .Ltmp0@IMGREL -; CHECK-NEXT: .long .Ltmp1@IMGREL+1 +; CHECK-NEXT: .long .Ltmp1@IMGREL ; CHECK-NEXT: .long 1 ; CHECK-NEXT: .long .LBB1_[[except1bb]]@IMGREL ; CHECK-NEXT: .long .Ltmp0@IMGREL -; CHECK-NEXT: .long .Ltmp1@IMGREL+1 +; CHECK-NEXT: .long .Ltmp1@IMGREL ; CHECK-NEXT: .long "?filt$0@0@main@@"@IMGREL ; CHECK-NEXT: .long .LBB1_[[except2bb]]@IMGREL ; CHECK-NEXT: .long .Ltmp2@IMGREL -; CHECK-NEXT: .long .Ltmp3@IMGREL+1 +; CHECK-NEXT: .long .Ltmp3@IMGREL ; CHECK-NEXT: .long "?dtor$[[finbb:[0-9]+]]@?0?main@4HA"@IMGREL ; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long .Ltmp2@IMGREL -; CHECK-NEXT: .long .Ltmp3@IMGREL+1 +; CHECK-NEXT: .long .Ltmp3@IMGREL ; CHECK-NEXT: .long "?filt$0@0@main@@"@IMGREL ; CHECK-NEXT: .long .LBB1_3@IMGREL ; CHECK-NEXT: .long .Ltmp6@IMGREL -; CHECK-NEXT: .long .Ltmp7@IMGREL+1 +; CHECK-NEXT: .long .Ltmp7@IMGREL ; CHECK-NEXT: .long "?filt$0@0@main@@"@IMGREL ; CHECK-NEXT: .long .LBB1_3@IMGREL ; CHECK-NEXT: .Llsda_end0: diff --git a/llvm/test/CodeGen/X86/seh-except-finally.ll b/llvm/test/CodeGen/X86/seh-except-finally.ll index 7f70655..539d776 100644 --- a/llvm/test/CodeGen/X86/seh-except-finally.ll +++ b/llvm/test/CodeGen/X86/seh-except-finally.ll @@ -83,15 +83,15 @@ __try.cont: ; preds = %__except, %invoke.c ; CHECK-NEXT: .long (.Llsda_end0-.Llsda_begin0)/16 ; CHECK-NEXT: .Llsda_begin0: ; CHECK-NEXT: .long .Ltmp0@IMGREL -; CHECK-NEXT: .long .Ltmp1@IMGREL+1 +; CHECK-NEXT: .long .Ltmp1@IMGREL ; CHECK-NEXT: .long "?dtor$2@?0?use_both@4HA"@IMGREL ; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long .Ltmp0@IMGREL -; CHECK-NEXT: .long .Ltmp1@IMGREL+1 +; CHECK-NEXT: .long .Ltmp1@IMGREL ; CHECK-NEXT: .long "?filt$0@0@use_both@@"@IMGREL ; CHECK-NEXT: .long .LBB0_{{[0-9]+}}@IMGREL ; CHECK-NEXT: .long .Ltmp4@IMGREL -; CHECK-NEXT: .long .Ltmp5@IMGREL+1 +; CHECK-NEXT: .long .Ltmp5@IMGREL ; CHECK-NEXT: .long "?filt$0@0@use_both@@"@IMGREL ; CHECK-NEXT: .long .LBB0_{{[0-9]+}}@IMGREL ; CHECK-NEXT: .Llsda_end0: diff --git a/llvm/test/CodeGen/X86/seh-finally.ll b/llvm/test/CodeGen/X86/seh-finally.ll index 41823df..6093e5e 100644 --- a/llvm/test/CodeGen/X86/seh-finally.ll +++ b/llvm/test/CodeGen/X86/seh-finally.ll @@ -30,7 +30,7 @@ lpad: ; preds = %entry ; X64-NEXT: .long (.Llsda_end0-.Llsda_begin0)/16 # Number of call sites ; X64-NEXT: .Llsda_begin0: ; X64-NEXT: .long .Ltmp0@IMGREL # LabelStart -; X64-NEXT: .long .Ltmp1@IMGREL+1 # LabelEnd +; X64-NEXT: .long .Ltmp1@IMGREL # LabelEnd ; X64-NEXT: .long "?dtor$2@?0?main@4HA"@IMGREL # FinallyFunclet ; X64-NEXT: .long 0 # Null ; X64-NEXT: .Llsda_end0: diff --git a/llvm/test/CodeGen/X86/seh-safe-div.ll b/llvm/test/CodeGen/X86/seh-safe-div.ll index 542d9f6..20169f8 100644 --- a/llvm/test/CodeGen/X86/seh-safe-div.ll +++ b/llvm/test/CodeGen/X86/seh-safe-div.ll @@ -60,6 +60,7 @@ __try.cont: ; CHECK: .Ltmp0: ; CHECK: leaq [[rloc:.*\(%rbp\)]], %rcx ; CHECK: callq try_body +; CHECK: nop ; CHECK-NEXT: .Ltmp1 ; CHECK: [[cont_bb:\.LBB0_[0-9]+]]: ; CHECK: movl [[rloc]], %eax @@ -82,11 +83,11 @@ __try.cont: ; CHECK-NEXT: .long (.Llsda_end0-.Llsda_begin0)/16 ; CHECK-NEXT: .Llsda_begin0: ; CHECK-NEXT: .long .Ltmp0@IMGREL -; CHECK-NEXT: .long .Ltmp1@IMGREL+1 +; CHECK-NEXT: .long .Ltmp1@IMGREL ; CHECK-NEXT: .long safe_div_filt0@IMGREL ; CHECK-NEXT: .long [[handler0]]@IMGREL ; CHECK-NEXT: .long .Ltmp0@IMGREL -; CHECK-NEXT: .long .Ltmp1@IMGREL+1 +; CHECK-NEXT: .long .Ltmp1@IMGREL ; CHECK-NEXT: .long safe_div_filt1@IMGREL ; CHECK-NEXT: .long [[handler1]]@IMGREL ; CHECK-NEXT: .Llsda_end0: diff --git a/llvm/test/CodeGen/X86/seh-unwind-inline-asm-codegen.ll b/llvm/test/CodeGen/X86/seh-unwind-inline-asm-codegen.ll index 2c576df..5a6aeb6 100644 --- a/llvm/test/CodeGen/X86/seh-unwind-inline-asm-codegen.ll +++ b/llvm/test/CodeGen/X86/seh-unwind-inline-asm-codegen.ll @@ -56,8 +56,8 @@ declare dso_local void @printf(ptr, ...) ; CHECK-NEXT:$ip2state$test: ; CHECK-NEXT: .long .Lfunc_begin0@IMGREL # IP ; CHECK-NEXT: .long -1 # ToState -; CHECK-NEXT: .long .Ltmp0@IMGREL+1 # IP +; CHECK-NEXT: .long .Ltmp0@IMGREL # IP ; CHECK-NEXT: .long 0 # ToState -; CHECK-NEXT: .long .Ltmp1@IMGREL+1 # IP +; CHECK-NEXT: .long .Ltmp1@IMGREL # IP ; CHECK-NEXT: .long -1 # ToState diff --git a/llvm/test/CodeGen/X86/select-optimize.ll b/llvm/test/CodeGen/X86/select-optimize.ll index d273d09..c7cf9cb 100644 --- a/llvm/test/CodeGen/X86/select-optimize.ll +++ b/llvm/test/CodeGen/X86/select-optimize.ll @@ -229,9 +229,10 @@ define i32 @expensive_val_operand4(ptr nocapture %a, i32 %b, i32 %y, i1 %cmp) { } ; Expensive cold value operand with unsafe-to-sink (due to lifetime-end marker) load (partial slice sinking). -define i32 @expensive_val_operand5(ptr nocapture %a, i32 %b, i32 %y, i1 %cmp) { +define i32 @expensive_val_operand5(i32 %b, i32 %y, i1 %cmp) { ; CHECK-LABEL: @expensive_val_operand5( -; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[A:%.*]], align 8 +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[A]], align 8 ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 2, ptr nonnull [[A]]) ; CHECK-NEXT: [[CMP_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] ; CHECK-NEXT: br i1 [[CMP_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !prof [[PROF18]] @@ -242,6 +243,7 @@ define i32 @expensive_val_operand5(ptr nocapture %a, i32 %b, i32 %y, i1 %cmp) { ; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[X]], [[SELECT_TRUE_SINK]] ], [ [[Y:%.*]], [[TMP0:%.*]] ] ; CHECK-NEXT: ret i32 [[SEL]] ; + %a = alloca i32 %load = load i32, ptr %a, align 8 call void @llvm.lifetime.end.p0(i64 2, ptr nonnull %a) %x = add i32 %load, %b diff --git a/llvm/test/CodeGen/X86/setcc-non-simple-type.ll b/llvm/test/CodeGen/X86/setcc-non-simple-type.ll index d2b292f..2ac2be5 100644 --- a/llvm/test/CodeGen/X86/setcc-non-simple-type.ll +++ b/llvm/test/CodeGen/X86/setcc-non-simple-type.ll @@ -119,8 +119,8 @@ define void @failing(ptr %0, ptr %1) nounwind { ; CHECK-AVX2-NEXT: .LBB0_2: # %vector.body ; CHECK-AVX2-NEXT: # Parent Loop BB0_1 Depth=1 ; CHECK-AVX2-NEXT: # => This Inner Loop Header: Depth=2 -; CHECK-AVX2-NEXT: vmovdqu 1024(%rdx,%rsi), %ymm5 -; CHECK-AVX2-NEXT: vextracti128 $1, %ymm5, %xmm6 +; CHECK-AVX2-NEXT: vmovdqu 1024(%rdx,%rsi), %xmm5 +; CHECK-AVX2-NEXT: vmovdqu 1040(%rdx,%rsi), %xmm6 ; CHECK-AVX2-NEXT: vpextrq $1, %xmm5, %rdi ; CHECK-AVX2-NEXT: vpextrq $1, %xmm6, %r8 ; CHECK-AVX2-NEXT: vmovq %xmm5, %r9 diff --git a/llvm/test/CodeGen/X86/shift-combine.ll b/llvm/test/CodeGen/X86/shift-combine.ll index 76cb4e87..dfeef48 100644 --- a/llvm/test/CodeGen/X86/shift-combine.ll +++ b/llvm/test/CodeGen/X86/shift-combine.ll @@ -792,14 +792,24 @@ define <4 x i32> @or_tree_with_mismatching_shifts_vec_i32(<4 x i32> %a, <4 x i32 define void @combineShiftOfShiftedLogic(i128 %a1, i32 %a2, ptr %p) { ; X86-LABEL: combineShiftOfShiftedLogic: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 24(%ebp), %eax +; X86-NEXT: movl 28(%ebp), %ecx ; X86-NEXT: movl %eax, 20(%ecx) ; X86-NEXT: movl $0, 16(%ecx) ; X86-NEXT: movl $0, 12(%ecx) ; X86-NEXT: movl $0, 8(%ecx) ; X86-NEXT: movl $0, 4(%ecx) ; X86-NEXT: movl $0, (%ecx) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 ; X86-NEXT: retl ; ; X64-LABEL: combineShiftOfShiftedLogic: diff --git a/llvm/test/CodeGen/X86/shift-i128.ll b/llvm/test/CodeGen/X86/shift-i128.ll index 767bd77..9323cd5 100644 --- a/llvm/test/CodeGen/X86/shift-i128.ll +++ b/llvm/test/CodeGen/X86/shift-i128.ll @@ -212,9 +212,18 @@ entry: } define void @test_lshr_i128_outofrange(i128 %x, ptr nocapture %r) nounwind { -; ALL-LABEL: test_lshr_i128_outofrange: -; ALL: # %bb.0: # %entry -; ALL-NEXT: ret{{[l|q]}} +; i686-LABEL: test_lshr_i128_outofrange: +; i686: # %bb.0: # %entry +; i686-NEXT: pushl %ebp +; i686-NEXT: movl %esp, %ebp +; i686-NEXT: andl $-16, %esp +; i686-NEXT: movl %ebp, %esp +; i686-NEXT: popl %ebp +; i686-NEXT: retl +; +; x86_64-LABEL: test_lshr_i128_outofrange: +; x86_64: # %bb.0: # %entry +; x86_64-NEXT: retq entry: %0 = lshr i128 %x, -1 store i128 %0, ptr %r, align 16 @@ -222,9 +231,18 @@ entry: } define void @test_ashr_i128_outofrange(i128 %x, ptr nocapture %r) nounwind { -; ALL-LABEL: test_ashr_i128_outofrange: -; ALL: # %bb.0: # %entry -; ALL-NEXT: ret{{[l|q]}} +; i686-LABEL: test_ashr_i128_outofrange: +; i686: # %bb.0: # %entry +; i686-NEXT: pushl %ebp +; i686-NEXT: movl %esp, %ebp +; i686-NEXT: andl $-16, %esp +; i686-NEXT: movl %ebp, %esp +; i686-NEXT: popl %ebp +; i686-NEXT: retl +; +; x86_64-LABEL: test_ashr_i128_outofrange: +; x86_64: # %bb.0: # %entry +; x86_64-NEXT: retq entry: %0 = ashr i128 %x, -1 store i128 %0, ptr %r, align 16 @@ -232,9 +250,18 @@ entry: } define void @test_shl_i128_outofrange(i128 %x, ptr nocapture %r) nounwind { -; ALL-LABEL: test_shl_i128_outofrange: -; ALL: # %bb.0: # %entry -; ALL-NEXT: ret{{[l|q]}} +; i686-LABEL: test_shl_i128_outofrange: +; i686: # %bb.0: # %entry +; i686-NEXT: pushl %ebp +; i686-NEXT: movl %esp, %ebp +; i686-NEXT: andl $-16, %esp +; i686-NEXT: movl %ebp, %esp +; i686-NEXT: popl %ebp +; i686-NEXT: retl +; +; x86_64-LABEL: test_shl_i128_outofrange: +; x86_64: # %bb.0: # %entry +; x86_64-NEXT: retq entry: %0 = shl i128 %x, -1 store i128 %0, ptr %r, align 16 @@ -874,26 +901,31 @@ define <2 x i256> @shl_zext_lshr_outofrange(<2 x i128> %a0) { define i128 @lshr_shl_mask(i128 %a0) { ; i686-LABEL: lshr_shl_mask: ; i686: # %bb.0: -; i686-NEXT: pushl %edi +; i686-NEXT: pushl %ebp ; i686-NEXT: .cfi_def_cfa_offset 8 +; i686-NEXT: .cfi_offset %ebp, -8 +; i686-NEXT: movl %esp, %ebp +; i686-NEXT: .cfi_def_cfa_register %ebp +; i686-NEXT: pushl %edi ; i686-NEXT: pushl %esi -; i686-NEXT: .cfi_def_cfa_offset 12 -; i686-NEXT: .cfi_offset %esi, -12 -; i686-NEXT: .cfi_offset %edi, -8 -; i686-NEXT: movl {{[0-9]+}}(%esp), %eax -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: andl $-16, %esp +; i686-NEXT: .cfi_offset %esi, -16 +; i686-NEXT: .cfi_offset %edi, -12 +; i686-NEXT: movl 8(%ebp), %eax +; i686-NEXT: movl 24(%ebp), %ecx +; i686-NEXT: movl 28(%ebp), %edx +; i686-NEXT: movl 32(%ebp), %esi ; i686-NEXT: movl $2147483647, %edi # imm = 0x7FFFFFFF -; i686-NEXT: andl {{[0-9]+}}(%esp), %edi +; i686-NEXT: andl 36(%ebp), %edi ; i686-NEXT: movl %edi, 12(%eax) ; i686-NEXT: movl %esi, 8(%eax) ; i686-NEXT: movl %edx, 4(%eax) ; i686-NEXT: movl %ecx, (%eax) +; i686-NEXT: leal -8(%ebp), %esp ; i686-NEXT: popl %esi -; i686-NEXT: .cfi_def_cfa_offset 8 ; i686-NEXT: popl %edi -; i686-NEXT: .cfi_def_cfa_offset 4 +; i686-NEXT: popl %ebp +; i686-NEXT: .cfi_def_cfa %esp, 4 ; i686-NEXT: retl $4 ; ; x86_64-LABEL: lshr_shl_mask: diff --git a/llvm/test/CodeGen/X86/smax.ll b/llvm/test/CodeGen/X86/smax.ll index 86891e9..509d444 100644 --- a/llvm/test/CodeGen/X86/smax.ll +++ b/llvm/test/CodeGen/X86/smax.ll @@ -151,31 +151,34 @@ define i128 @test_i128(i128 %a, i128 %b) nounwind { ; X86-LABEL: test_i128: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: cmpl %ebx, %edx -; X86-NEXT: movl %esi, %ebp -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl %edi, %ebp -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 40(%ebp), %ebx +; X86-NEXT: movl 44(%ebp), %edi +; X86-NEXT: cmpl 24(%ebp), %ebx +; X86-NEXT: movl %edi, %esi +; X86-NEXT: sbbl 28(%ebp), %esi +; X86-NEXT: movl 48(%ebp), %edx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: sbbl 32(%ebp), %esi +; X86-NEXT: movl 36(%ebp), %esi +; X86-NEXT: movl 52(%ebp), %ecx ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: sbbl %ebp, %eax -; X86-NEXT: cmovll %ebx, %edx -; X86-NEXT: cmovll {{[0-9]+}}(%esp), %esi -; X86-NEXT: cmovll {{[0-9]+}}(%esp), %edi -; X86-NEXT: cmovll %ebp, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: sbbl %esi, %eax +; X86-NEXT: cmovll 24(%ebp), %ebx +; X86-NEXT: cmovll 28(%ebp), %edi +; X86-NEXT: cmovll 32(%ebp), %edx +; X86-NEXT: cmovll %esi, %ecx +; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl %ecx, 12(%eax) -; X86-NEXT: movl %edi, 8(%eax) -; X86-NEXT: movl %esi, 4(%eax) -; X86-NEXT: movl %edx, (%eax) +; X86-NEXT: movl %edx, 8(%eax) +; X86-NEXT: movl %edi, 4(%eax) +; X86-NEXT: movl %ebx, (%eax) +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -717,29 +720,32 @@ define i128 @test_signbits_i128(i128 %a, i128 %b) nounwind { ; ; X86-LABEL: test_signbits_i128: ; X86: # %bb.0: -; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: shrdl $28, %edi, %ecx -; X86-NEXT: sarl $28, %edi +; X86-NEXT: andl $-16, %esp +; X86-NEXT: movl 32(%ebp), %esi +; X86-NEXT: movl 36(%ebp), %eax +; X86-NEXT: movl 48(%ebp), %ecx +; X86-NEXT: movl 52(%ebp), %edx +; X86-NEXT: shrdl $28, %edx, %ecx +; X86-NEXT: sarl $28, %edx ; X86-NEXT: cmpl %esi, %ecx -; X86-NEXT: movl %edi, %ebx -; X86-NEXT: sbbl %edx, %ebx +; X86-NEXT: movl %edx, %edi +; X86-NEXT: sbbl %eax, %edi ; X86-NEXT: cmovll %esi, %ecx -; X86-NEXT: cmovll %edx, %edi -; X86-NEXT: movl %edi, 4(%eax) -; X86-NEXT: sarl $31, %edi +; X86-NEXT: cmovll %eax, %edx +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl %edx, 4(%eax) ; X86-NEXT: movl %ecx, (%eax) -; X86-NEXT: movl %edi, 12(%eax) -; X86-NEXT: movl %edi, 8(%eax) +; X86-NEXT: sarl $31, %edx +; X86-NEXT: movl %edx, 12(%eax) +; X86-NEXT: movl %edx, 8(%eax) +; X86-NEXT: leal -8(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 %ax = ashr i128 %a, 64 %bx = ashr i128 %b, 92 diff --git a/llvm/test/CodeGen/X86/smin.ll b/llvm/test/CodeGen/X86/smin.ll index 8907f6c..5e9fe27 100644 --- a/llvm/test/CodeGen/X86/smin.ll +++ b/llvm/test/CodeGen/X86/smin.ll @@ -151,32 +151,34 @@ define i128 @test_i128(i128 %a, i128 %b) nounwind { ; X86-LABEL: test_i128: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: cmpl %edx, %ebx -; X86-NEXT: sbbl %esi, %ebp -; X86-NEXT: movl %eax, %ebp -; X86-NEXT: sbbl %ecx, %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 40(%ebp), %ecx +; X86-NEXT: movl 44(%ebp), %edx +; X86-NEXT: movl 28(%ebp), %esi +; X86-NEXT: cmpl %ecx, 24(%ebp) +; X86-NEXT: sbbl %edx, %esi +; X86-NEXT: movl 48(%ebp), %esi +; X86-NEXT: movl 32(%ebp), %ebx +; X86-NEXT: sbbl %esi, %ebx +; X86-NEXT: movl 52(%ebp), %ebx +; X86-NEXT: movl 36(%ebp), %edi ; X86-NEXT: movl %edi, %eax -; X86-NEXT: sbbl %ebp, %eax -; X86-NEXT: cmovll %ebx, %edx -; X86-NEXT: cmovll {{[0-9]+}}(%esp), %esi -; X86-NEXT: cmovll {{[0-9]+}}(%esp), %ecx -; X86-NEXT: cmovll %edi, %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %ebp, 12(%eax) -; X86-NEXT: movl %ecx, 8(%eax) -; X86-NEXT: movl %esi, 4(%eax) -; X86-NEXT: movl %edx, (%eax) +; X86-NEXT: sbbl %ebx, %eax +; X86-NEXT: cmovll 24(%ebp), %ecx +; X86-NEXT: cmovll 28(%ebp), %edx +; X86-NEXT: cmovll 32(%ebp), %esi +; X86-NEXT: cmovll %edi, %ebx +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl %ebx, 12(%eax) +; X86-NEXT: movl %esi, 8(%eax) +; X86-NEXT: movl %edx, 4(%eax) +; X86-NEXT: movl %ecx, (%eax) +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -718,29 +720,32 @@ define i128 @test_signbits_i128(i128 %a, i128 %b) nounwind { ; ; X86-LABEL: test_signbits_i128: ; X86: # %bb.0: -; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: shrdl $28, %edi, %ecx -; X86-NEXT: sarl $28, %edi +; X86-NEXT: andl $-16, %esp +; X86-NEXT: movl 32(%ebp), %esi +; X86-NEXT: movl 36(%ebp), %eax +; X86-NEXT: movl 48(%ebp), %ecx +; X86-NEXT: movl 52(%ebp), %edx +; X86-NEXT: shrdl $28, %edx, %ecx +; X86-NEXT: sarl $28, %edx ; X86-NEXT: cmpl %ecx, %esi -; X86-NEXT: movl %edx, %ebx -; X86-NEXT: sbbl %edi, %ebx +; X86-NEXT: movl %eax, %edi +; X86-NEXT: sbbl %edx, %edi ; X86-NEXT: cmovll %esi, %ecx -; X86-NEXT: cmovll %edx, %edi -; X86-NEXT: movl %edi, 4(%eax) -; X86-NEXT: sarl $31, %edi +; X86-NEXT: cmovll %eax, %edx +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl %edx, 4(%eax) ; X86-NEXT: movl %ecx, (%eax) -; X86-NEXT: movl %edi, 12(%eax) -; X86-NEXT: movl %edi, 8(%eax) +; X86-NEXT: sarl $31, %edx +; X86-NEXT: movl %edx, 12(%eax) +; X86-NEXT: movl %edx, 8(%eax) +; X86-NEXT: leal -8(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 %ax = ashr i128 %a, 64 %bx = ashr i128 %b, 92 diff --git a/llvm/test/CodeGen/X86/stack-coloring-wineh.ll b/llvm/test/CodeGen/X86/stack-coloring-wineh.ll index e2de2ff..74fe07e 100644 --- a/llvm/test/CodeGen/X86/stack-coloring-wineh.ll +++ b/llvm/test/CodeGen/X86/stack-coloring-wineh.ll @@ -84,12 +84,12 @@ define void @pr66984(ptr %arg) personality ptr @__CxxFrameHandler3 { ; X86_64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X86_64-NEXT: .Ltmp0: ; X86_64-NEXT: callq throw +; X86_64-NEXT: nop ; X86_64-NEXT: .Ltmp1: ; X86_64-NEXT: # %bb.1: # %bb14 ; X86_64-NEXT: .LBB0_3: # Block address taken ; X86_64-NEXT: # %exit ; X86_64-NEXT: $ehgcr_0_3: -; X86_64-NEXT: nop ; X86_64-NEXT: .seh_startepilogue ; X86_64-NEXT: addq $64, %rsp ; X86_64-NEXT: popq %rbp diff --git a/llvm/test/CodeGen/X86/stack-protector.ll b/llvm/test/CodeGen/X86/stack-protector.ll index f4f3ae4..772e776 100644 --- a/llvm/test/CodeGen/X86/stack-protector.ll +++ b/llvm/test/CodeGen/X86/stack-protector.ll @@ -6,6 +6,7 @@ ; RUN: llc -mtriple=amd64-pc-openbsd < %s -o - | FileCheck --check-prefix=OPENBSD-AMD64 %s ; RUN: llc -mtriple=i386-pc-windows-msvc < %s -o - | FileCheck -check-prefix=MSVC-I386 %s ; RUN: llc -mtriple=x86_64-w64-mingw32 < %s -o - | FileCheck --check-prefix=MINGW-X64 %s +; RUN: llc -mtriple=x86_64-pc-cygwin < %s -o - | FileCheck --check-prefix=MINGW-X64 %s ; RUN: llc -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck --check-prefix=IGNORE_INTRIN %s %struct.foo = type { [16 x i8] } diff --git a/llvm/test/CodeGen/X86/swap.ll b/llvm/test/CodeGen/X86/swap.ll index e556900..3330403 100644 --- a/llvm/test/CodeGen/X86/swap.ll +++ b/llvm/test/CodeGen/X86/swap.ll @@ -47,12 +47,10 @@ define dso_local void @onealloc_noreadback(ptr nocapture %a, ptr nocapture %b) l entry: %alloc = alloca [16 x i8], i8 2, align 1 %part2 = getelementptr inbounds [16 x i8], ptr %alloc, i64 1, i64 0 - call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %alloc) - call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %part2) + call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %alloc) call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 1 %alloc, ptr align 1 %a, i64 16, i1 false) tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 1 %part2, ptr align 1 %b, i64 16, i1 false) - call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %alloc) - call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %part2) + call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %alloc) ret void } @@ -116,19 +114,16 @@ define dso_local void @onealloc_readback_1(ptr nocapture %a, ptr nocapture %b) l ; AA-LABEL: onealloc_readback_1: ; AA: # %bb.0: # %entry ; AA-NEXT: vmovups (%rsi), %xmm0 -; AA-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; AA-NEXT: vmovups %xmm0, (%rdi) ; AA-NEXT: retq entry: %alloc = alloca [16 x i8], i8 2, align 1 %part1 = getelementptr inbounds [16 x i8], ptr %alloc, i64 1, i64 0 - call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %part1) - call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %alloc) + call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %alloc) call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 1 %part1, ptr align 1 %a, i64 16, i1 false) call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 1 %alloc, ptr align 1 %b, i64 16, i1 false) - call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %part1) tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 1 %a, ptr align 1 %alloc, i64 16, i1 false) - call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %alloc) + call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %alloc) ret void } @@ -145,19 +140,16 @@ define dso_local void @onealloc_readback_2(ptr nocapture %a, ptr nocapture %b) l ; AA-LABEL: onealloc_readback_2: ; AA: # %bb.0: # %entry ; AA-NEXT: vmovups (%rsi), %xmm0 -; AA-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; AA-NEXT: vmovups %xmm0, (%rdi) ; AA-NEXT: retq entry: %alloc = alloca [16 x i8], i8 2, align 1 %part2 = getelementptr inbounds [16 x i8], ptr %alloc, i64 1, i64 0 - call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %alloc) - call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %part2) + call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %alloc) call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 1 %alloc, ptr align 1 %a, i64 16, i1 false) call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 1 %part2, ptr align 1 %b, i64 16, i1 false) - call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %alloc) tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 1 %a, ptr align 1 %part2, i64 16, i1 false) - call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %part2) + call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %alloc) ret void } diff --git a/llvm/test/CodeGen/X86/taildup-heapallocsite.ll b/llvm/test/CodeGen/X86/taildup-heapallocsite.ll index 967e125..f3bef47 100644 --- a/llvm/test/CodeGen/X86/taildup-heapallocsite.ll +++ b/llvm/test/CodeGen/X86/taildup-heapallocsite.ll @@ -37,9 +37,11 @@ cond.end: ; preds = %entry, %cond.true ; CHECK: testq ; CHECK: je ; CHECK: callq alloc +; CHECK-NEXT: nop ; CHECK-NEXT: [[L1:.Ltmp[0-9]+]] ; CHECK: jmp f2 # TAILCALL ; CHECK: callq alloc +; CHECK-NEXT: nop ; CHECK-NEXT: [[L3:.Ltmp[0-9]+]] ; CHECK: jmp f2 # TAILCALL diff --git a/llvm/test/CodeGen/X86/ucmp.ll b/llvm/test/CodeGen/X86/ucmp.ll index 6a52acf..7f17299 100644 --- a/llvm/test/CodeGen/X86/ucmp.ll +++ b/llvm/test/CodeGen/X86/ucmp.ll @@ -107,29 +107,33 @@ define i8 @ucmp.8.128(i128 %x, i128 %y) nounwind { ; X86-LABEL: ucmp.8.128: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl %ebp, %eax -; X86-NEXT: sbbl %esi, %eax +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %edx +; X86-NEXT: movl 28(%ebp), %ecx +; X86-NEXT: cmpl %eax, 24(%ebp) ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: sbbl %edx, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, %eax +; X86-NEXT: movl 16(%ebp), %ebx +; X86-NEXT: movl 32(%ebp), %eax +; X86-NEXT: sbbl %ebx, %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl 36(%ebp), %esi +; X86-NEXT: movl %esi, %eax ; X86-NEXT: sbbl %ecx, %eax ; X86-NEXT: setb %al -; X86-NEXT: cmpl %ebx, {{[0-9]+}}(%esp) -; X86-NEXT: sbbl %ebp, %esi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx -; X86-NEXT: sbbl %edi, %ecx +; X86-NEXT: movl 8(%ebp), %edi +; X86-NEXT: cmpl 24(%ebp), %edi +; X86-NEXT: sbbl 28(%ebp), %edx +; X86-NEXT: sbbl 32(%ebp), %ebx +; X86-NEXT: sbbl %esi, %ecx ; X86-NEXT: sbbb $0, %al +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx diff --git a/llvm/test/CodeGen/X86/udiv_fix.ll b/llvm/test/CodeGen/X86/udiv_fix.ll index 5b1e054..82dfeee 100644 --- a/llvm/test/CodeGen/X86/udiv_fix.ll +++ b/llvm/test/CodeGen/X86/udiv_fix.ll @@ -153,26 +153,28 @@ define i64 @func5(i64 %x, i64 %y) nounwind { ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-16, %esp -; X86-NEXT: subl $32, %esp +; X86-NEXT: subl $80, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl 16(%ebp), %edx +; X86-NEXT: movl 20(%ebp), %esi +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %ecx, %edx ; X86-NEXT: shrl %edx +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X86-NEXT: shldl $31, %eax, %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: shll $31, %eax -; X86-NEXT: movl %esp, %esi -; X86-NEXT: pushl $0 -; X86-NEXT: pushl $0 -; X86-NEXT: pushl 20(%ebp) -; X86-NEXT: pushl 16(%ebp) -; X86-NEXT: pushl $0 -; X86-NEXT: pushl %edx -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl %eax -; X86-NEXT: pushl %esi +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: calll __udivti3 -; X86-NEXT: addl $32, %esp -; X86-NEXT: movl (%esp), %eax +; X86-NEXT: subl $4, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: leal -4(%ebp), %esp ; X86-NEXT: popl %esi diff --git a/llvm/test/CodeGen/X86/udiv_fix_sat.ll b/llvm/test/CodeGen/X86/udiv_fix_sat.ll index 30a7f80..3da5973 100644 --- a/llvm/test/CodeGen/X86/udiv_fix_sat.ll +++ b/llvm/test/CodeGen/X86/udiv_fix_sat.ll @@ -194,32 +194,34 @@ define i64 @func5(i64 %x, i64 %y) nounwind { ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-16, %esp -; X86-NEXT: subl $32, %esp +; X86-NEXT: subl $80, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl 16(%ebp), %edx +; X86-NEXT: movl 20(%ebp), %esi +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %ecx, %edx ; X86-NEXT: shrl %edx +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X86-NEXT: shldl $31, %eax, %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: shll $31, %eax -; X86-NEXT: movl %esp, %esi -; X86-NEXT: pushl $0 -; X86-NEXT: pushl $0 -; X86-NEXT: pushl 20(%ebp) -; X86-NEXT: pushl 16(%ebp) -; X86-NEXT: pushl $0 -; X86-NEXT: pushl %edx -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl %eax -; X86-NEXT: pushl %esi +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: calll __udivti3 -; X86-NEXT: addl $32, %esp +; X86-NEXT: subl $4, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl $-1, %eax ; X86-NEXT: movl $-1, %edx ; X86-NEXT: jne .LBB4_2 ; X86-NEXT: # %bb.1: -; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: .LBB4_2: ; X86-NEXT: leal -4(%ebp), %esp diff --git a/llvm/test/CodeGen/X86/umax.ll b/llvm/test/CodeGen/X86/umax.ll index f589d4a..7ef8599 100644 --- a/llvm/test/CodeGen/X86/umax.ll +++ b/llvm/test/CodeGen/X86/umax.ll @@ -232,31 +232,34 @@ define i128 @test_i128(i128 %a, i128 %b) nounwind { ; X86-LABEL: test_i128: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: cmpl %ebx, %edx -; X86-NEXT: movl %esi, %ebp -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl %edi, %ebp -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 40(%ebp), %ebx +; X86-NEXT: movl 44(%ebp), %edi +; X86-NEXT: cmpl 24(%ebp), %ebx +; X86-NEXT: movl %edi, %esi +; X86-NEXT: sbbl 28(%ebp), %esi +; X86-NEXT: movl 48(%ebp), %edx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: sbbl 32(%ebp), %esi +; X86-NEXT: movl 36(%ebp), %esi +; X86-NEXT: movl 52(%ebp), %ecx ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: sbbl %ebp, %eax -; X86-NEXT: cmovbl %ebx, %edx -; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %edi -; X86-NEXT: cmovbl %ebp, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: sbbl %esi, %eax +; X86-NEXT: cmovbl 24(%ebp), %ebx +; X86-NEXT: cmovbl 28(%ebp), %edi +; X86-NEXT: cmovbl 32(%ebp), %edx +; X86-NEXT: cmovbl %esi, %ecx +; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl %ecx, 12(%eax) -; X86-NEXT: movl %edi, 8(%eax) -; X86-NEXT: movl %esi, 4(%eax) -; X86-NEXT: movl %edx, (%eax) +; X86-NEXT: movl %edx, 8(%eax) +; X86-NEXT: movl %edi, 4(%eax) +; X86-NEXT: movl %ebx, (%eax) +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -282,37 +285,40 @@ define i128 @test_i128_1(i128 %a) nounwind { ; X86-LABEL: test_i128_1: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 32(%ebp), %edx +; X86-NEXT: movl 24(%ebp), %eax ; X86-NEXT: cmpl $1, %eax -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: adcl $0, %ebx -; X86-NEXT: testl %edx, %edx -; X86-NEXT: movl $1, %edi -; X86-NEXT: cmovnel %eax, %edi -; X86-NEXT: cmovel %ebx, %edi -; X86-NEXT: xorl %ebx, %ebx -; X86-NEXT: movl %ecx, %ebp -; X86-NEXT: negl %ebp -; X86-NEXT: movl $0, %ebp -; X86-NEXT: sbbl %esi, %ebp -; X86-NEXT: movl $1, %ebp -; X86-NEXT: cmovbl %eax, %ebp -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: cmovbl %edx, %ebx -; X86-NEXT: orl %esi, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %esi, 12(%eax) -; X86-NEXT: movl %ecx, 8(%eax) -; X86-NEXT: cmovel %edi, %ebp -; X86-NEXT: cmovel %edx, %ebx -; X86-NEXT: movl %ebx, 4(%eax) -; X86-NEXT: movl %ebp, (%eax) +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: adcl $0, %ecx +; X86-NEXT: cmpl $0, 28(%ebp) +; X86-NEXT: movl $1, %esi +; X86-NEXT: cmovnel %eax, %esi +; X86-NEXT: cmovel %ecx, %esi +; X86-NEXT: xorl %edi, %edi +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: negl %ecx +; X86-NEXT: movl 36(%ebp), %ecx +; X86-NEXT: movl $0, %ebx +; X86-NEXT: sbbl %ecx, %ebx +; X86-NEXT: movl $1, %ebx +; X86-NEXT: cmovbl %eax, %ebx +; X86-NEXT: cmovbl 28(%ebp), %edi +; X86-NEXT: movl %edx, %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: movl %edx, 8(%eax) +; X86-NEXT: cmovel %esi, %ebx +; X86-NEXT: cmovel 28(%ebp), %edi +; X86-NEXT: movl %edi, 4(%eax) +; X86-NEXT: movl %ebx, (%eax) +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -1312,29 +1318,32 @@ define i128 @test_signbits_i128(i128 %a, i128 %b) nounwind { ; ; X86-LABEL: test_signbits_i128: ; X86: # %bb.0: -; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: shrdl $28, %edi, %ecx -; X86-NEXT: sarl $28, %edi +; X86-NEXT: andl $-16, %esp +; X86-NEXT: movl 32(%ebp), %esi +; X86-NEXT: movl 36(%ebp), %eax +; X86-NEXT: movl 48(%ebp), %ecx +; X86-NEXT: movl 52(%ebp), %edx +; X86-NEXT: shrdl $28, %edx, %ecx +; X86-NEXT: sarl $28, %edx ; X86-NEXT: cmpl %esi, %ecx -; X86-NEXT: movl %edi, %ebx -; X86-NEXT: sbbl %edx, %ebx +; X86-NEXT: movl %edx, %edi +; X86-NEXT: sbbl %eax, %edi ; X86-NEXT: cmovbl %esi, %ecx -; X86-NEXT: cmovbl %edx, %edi -; X86-NEXT: movl %edi, 4(%eax) -; X86-NEXT: sarl $31, %edi +; X86-NEXT: cmovbl %eax, %edx +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl %edx, 4(%eax) ; X86-NEXT: movl %ecx, (%eax) -; X86-NEXT: movl %edi, 12(%eax) -; X86-NEXT: movl %edi, 8(%eax) +; X86-NEXT: sarl $31, %edx +; X86-NEXT: movl %edx, 12(%eax) +; X86-NEXT: movl %edx, 8(%eax) +; X86-NEXT: leal -8(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 %ax = ashr i128 %a, 64 %bx = ashr i128 %b, 92 diff --git a/llvm/test/CodeGen/X86/umin.ll b/llvm/test/CodeGen/X86/umin.ll index 7a5cdbb..c927abf 100644 --- a/llvm/test/CodeGen/X86/umin.ll +++ b/llvm/test/CodeGen/X86/umin.ll @@ -147,32 +147,34 @@ define i128 @test_i128(i128 %a, i128 %b) nounwind { ; X86-LABEL: test_i128: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: cmpl %edx, %ebx -; X86-NEXT: sbbl %esi, %ebp -; X86-NEXT: movl %eax, %ebp -; X86-NEXT: sbbl %ecx, %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 40(%ebp), %ecx +; X86-NEXT: movl 44(%ebp), %edx +; X86-NEXT: movl 28(%ebp), %esi +; X86-NEXT: cmpl %ecx, 24(%ebp) +; X86-NEXT: sbbl %edx, %esi +; X86-NEXT: movl 48(%ebp), %esi +; X86-NEXT: movl 32(%ebp), %ebx +; X86-NEXT: sbbl %esi, %ebx +; X86-NEXT: movl 52(%ebp), %ebx +; X86-NEXT: movl 36(%ebp), %edi ; X86-NEXT: movl %edi, %eax -; X86-NEXT: sbbl %ebp, %eax -; X86-NEXT: cmovbl %ebx, %edx -; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: cmovbl %edi, %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %ebp, 12(%eax) -; X86-NEXT: movl %ecx, 8(%eax) -; X86-NEXT: movl %esi, 4(%eax) -; X86-NEXT: movl %edx, (%eax) +; X86-NEXT: sbbl %ebx, %eax +; X86-NEXT: cmovbl 24(%ebp), %ecx +; X86-NEXT: cmovbl 28(%ebp), %edx +; X86-NEXT: cmovbl 32(%ebp), %esi +; X86-NEXT: cmovbl %edi, %ebx +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl %ebx, 12(%eax) +; X86-NEXT: movl %esi, 8(%eax) +; X86-NEXT: movl %edx, 4(%eax) +; X86-NEXT: movl %ecx, (%eax) +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -727,29 +729,32 @@ define i128 @test_signbits_i128(i128 %a, i128 %b) nounwind { ; ; X86-LABEL: test_signbits_i128: ; X86: # %bb.0: -; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: shrdl $28, %edi, %ecx -; X86-NEXT: sarl $28, %edi +; X86-NEXT: andl $-16, %esp +; X86-NEXT: movl 32(%ebp), %esi +; X86-NEXT: movl 36(%ebp), %eax +; X86-NEXT: movl 48(%ebp), %ecx +; X86-NEXT: movl 52(%ebp), %edx +; X86-NEXT: shrdl $28, %edx, %ecx +; X86-NEXT: sarl $28, %edx ; X86-NEXT: cmpl %ecx, %esi -; X86-NEXT: movl %edx, %ebx -; X86-NEXT: sbbl %edi, %ebx +; X86-NEXT: movl %eax, %edi +; X86-NEXT: sbbl %edx, %edi ; X86-NEXT: cmovbl %esi, %ecx -; X86-NEXT: cmovbl %edx, %edi -; X86-NEXT: movl %edi, 4(%eax) -; X86-NEXT: sarl $31, %edi +; X86-NEXT: cmovbl %eax, %edx +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl %edx, 4(%eax) ; X86-NEXT: movl %ecx, (%eax) -; X86-NEXT: movl %edi, 12(%eax) -; X86-NEXT: movl %edi, 8(%eax) +; X86-NEXT: sarl $31, %edx +; X86-NEXT: movl %edx, 12(%eax) +; X86-NEXT: movl %edx, 8(%eax) +; X86-NEXT: leal -8(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 %ax = ashr i128 %a, 64 %bx = ashr i128 %b, 92 diff --git a/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll index 4c31703..89afd1b 100644 --- a/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll @@ -38,8 +38,8 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; X86-NEXT: .cfi_def_cfa_offset 16 ; X86-NEXT: pushl %esi ; X86-NEXT: .cfi_def_cfa_offset 20 -; X86-NEXT: subl $24, %esp -; X86-NEXT: .cfi_def_cfa_offset 44 +; X86-NEXT: subl $28, %esp +; X86-NEXT: .cfi_def_cfa_offset 48 ; X86-NEXT: .cfi_offset %esi, -20 ; X86-NEXT: .cfi_offset %edi, -16 ; X86-NEXT: .cfi_offset %ebx, -12 @@ -147,7 +147,7 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; X86-NEXT: andb $1, %al ; X86-NEXT: movb %al, 16(%ecx) ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $28, %esp ; X86-NEXT: .cfi_def_cfa_offset 20 ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 16 diff --git a/llvm/test/CodeGen/X86/vec_extract.ll b/llvm/test/CodeGen/X86/vec_extract.ll index 087cd30..9bd38db 100644 --- a/llvm/test/CodeGen/X86/vec_extract.ll +++ b/llvm/test/CodeGen/X86/vec_extract.ll @@ -104,6 +104,72 @@ entry: } declare <2 x double> @foo() +define i64 @pr150117(<31 x i8> %a0) nounwind { +; X86-LABEL: pr150117: +; X86: # %bb.0: +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: shll $8, %edx +; X86-NEXT: orl %ebx, %edx +; X86-NEXT: shll $8, %edi +; X86-NEXT: orl %esi, %edi +; X86-NEXT: shll $16, %ecx +; X86-NEXT: orl %edi, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: shll $24, %esi +; X86-NEXT: orl %ecx, %esi +; X86-NEXT: movd %esi, %xmm0 +; X86-NEXT: pinsrw $2, %edx, %xmm0 +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: shll $8, %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: pinsrw $3, %ecx, %xmm0 +; X86-NEXT: movd %xmm0, %eax +; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X86-NEXT: movd %xmm0, %edx +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: retl +; +; X64-LABEL: pr150117: +; X64: # %bb.0: +; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %edx +; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %esi +; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; X64-NEXT: movl {{[0-9]+}}(%rsp), %r8d +; X64-NEXT: shll $8, %r8d +; X64-NEXT: orl %edi, %r8d +; X64-NEXT: shll $8, %esi +; X64-NEXT: orl %edx, %esi +; X64-NEXT: shll $16, %ecx +; X64-NEXT: orl %esi, %ecx +; X64-NEXT: movl {{[0-9]+}}(%rsp), %edx +; X64-NEXT: shll $24, %edx +; X64-NEXT: orl %ecx, %edx +; X64-NEXT: movd %edx, %xmm0 +; X64-NEXT: pinsrw $2, %r8d, %xmm0 +; X64-NEXT: movl {{[0-9]+}}(%rsp), %ecx +; X64-NEXT: shll $8, %ecx +; X64-NEXT: orl %eax, %ecx +; X64-NEXT: pinsrw $3, %ecx, %xmm0 +; X64-NEXT: movq %xmm0, %rax +; X64-NEXT: retq + %shuffle = shufflevector <31 x i8> %a0, <31 x i8> zeroinitializer, <32 x i32> <i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> + %bitcast = bitcast <32 x i8> %shuffle to <4 x i64> + %elt = extractelement <4 x i64> %bitcast, i64 0 + ret i64 %elt +} + ; OSS-Fuzz #15662 ; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=15662 define <4 x i32> @ossfuzz15662(ptr %in) { diff --git a/llvm/test/CodeGen/X86/vector-bitreverse.ll b/llvm/test/CodeGen/X86/vector-bitreverse.ll index 5dcf190..834dfd6 100644 --- a/llvm/test/CodeGen/X86/vector-bitreverse.ll +++ b/llvm/test/CodeGen/X86/vector-bitreverse.ll @@ -8,7 +8,8 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefixes=ALL,AVX,AVX512,AVX512BW ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=ALL,XOP,XOPAVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=ALL,XOP,XOPAVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3,+gfni | FileCheck %s --check-prefixes=ALL,GFNISSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+gfni | FileCheck %s --check-prefixes=ALL,GFNISSE,GFNISSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3,+gfni | FileCheck %s --check-prefixes=ALL,GFNISSE,GFNISSSE3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+gfni | FileCheck %s --check-prefixes=ALL,GFNIAVX,GFNIAVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+gfni | FileCheck %s --check-prefixes=ALL,GFNIAVX,GFNIAVX2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+gfni | FileCheck %s --check-prefixes=ALL,GFNIAVX,GFNIAVX512,GFNIAVX512F @@ -492,11 +493,20 @@ define <8 x i16> @test_bitreverse_v8i16(<8 x i16> %a) nounwind { ; XOP-NEXT: vpperm {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0, %xmm0 ; XOP-NEXT: retq ; -; GFNISSE-LABEL: test_bitreverse_v8i16: -; GFNISSE: # %bb.0: -; GFNISSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] -; GFNISSE-NEXT: gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; GFNISSE-NEXT: retq +; GFNISSE2-LABEL: test_bitreverse_v8i16: +; GFNISSE2: # %bb.0: +; GFNISSE2-NEXT: movdqa %xmm0, %xmm1 +; GFNISSE2-NEXT: psrlw $8, %xmm1 +; GFNISSE2-NEXT: psllw $8, %xmm0 +; GFNISSE2-NEXT: por %xmm1, %xmm0 +; GFNISSE2-NEXT: gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; GFNISSE2-NEXT: retq +; +; GFNISSSE3-LABEL: test_bitreverse_v8i16: +; GFNISSSE3: # %bb.0: +; GFNISSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; GFNISSSE3-NEXT: gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; GFNISSSE3-NEXT: retq ; ; GFNIAVX-LABEL: test_bitreverse_v8i16: ; GFNIAVX: # %bb.0: @@ -605,11 +615,25 @@ define <4 x i32> @test_bitreverse_v4i32(<4 x i32> %a) nounwind { ; XOP-NEXT: vpperm {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0, %xmm0 ; XOP-NEXT: retq ; -; GFNISSE-LABEL: test_bitreverse_v4i32: -; GFNISSE: # %bb.0: -; GFNISSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] -; GFNISSE-NEXT: gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; GFNISSE-NEXT: retq +; GFNISSE2-LABEL: test_bitreverse_v4i32: +; GFNISSE2: # %bb.0: +; GFNISSE2-NEXT: pxor %xmm1, %xmm1 +; GFNISSE2-NEXT: movdqa %xmm0, %xmm2 +; GFNISSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] +; GFNISSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7] +; GFNISSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4] +; GFNISSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; GFNISSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] +; GFNISSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] +; GFNISSE2-NEXT: packuswb %xmm2, %xmm0 +; GFNISSE2-NEXT: gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; GFNISSE2-NEXT: retq +; +; GFNISSSE3-LABEL: test_bitreverse_v4i32: +; GFNISSSE3: # %bb.0: +; GFNISSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; GFNISSSE3-NEXT: gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; GFNISSSE3-NEXT: retq ; ; GFNIAVX-LABEL: test_bitreverse_v4i32: ; GFNIAVX: # %bb.0: @@ -720,11 +744,27 @@ define <2 x i64> @test_bitreverse_v2i64(<2 x i64> %a) nounwind { ; XOP-NEXT: vpperm {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0, %xmm0 ; XOP-NEXT: retq ; -; GFNISSE-LABEL: test_bitreverse_v2i64: -; GFNISSE: # %bb.0: -; GFNISSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] -; GFNISSE-NEXT: gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; GFNISSE-NEXT: retq +; GFNISSE2-LABEL: test_bitreverse_v2i64: +; GFNISSE2: # %bb.0: +; GFNISSE2-NEXT: pxor %xmm1, %xmm1 +; GFNISSE2-NEXT: movdqa %xmm0, %xmm2 +; GFNISSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] +; GFNISSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] +; GFNISSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7] +; GFNISSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4] +; GFNISSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; GFNISSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; GFNISSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] +; GFNISSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] +; GFNISSE2-NEXT: packuswb %xmm2, %xmm0 +; GFNISSE2-NEXT: gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; GFNISSE2-NEXT: retq +; +; GFNISSSE3-LABEL: test_bitreverse_v2i64: +; GFNISSSE3: # %bb.0: +; GFNISSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; GFNISSSE3-NEXT: gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; GFNISSSE3-NEXT: retq ; ; GFNIAVX-LABEL: test_bitreverse_v2i64: ; GFNIAVX: # %bb.0: @@ -1042,15 +1082,30 @@ define <16 x i16> @test_bitreverse_v16i16(<16 x i16> %a) nounwind { ; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; -; GFNISSE-LABEL: test_bitreverse_v16i16: -; GFNISSE: # %bb.0: -; GFNISSE-NEXT: movdqa {{.*#+}} xmm2 = [1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] -; GFNISSE-NEXT: pshufb %xmm2, %xmm0 -; GFNISSE-NEXT: movdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] -; GFNISSE-NEXT: gf2p8affineqb $0, %xmm3, %xmm0 -; GFNISSE-NEXT: pshufb %xmm2, %xmm1 -; GFNISSE-NEXT: gf2p8affineqb $0, %xmm3, %xmm1 -; GFNISSE-NEXT: retq +; GFNISSE2-LABEL: test_bitreverse_v16i16: +; GFNISSE2: # %bb.0: +; GFNISSE2-NEXT: movdqa %xmm0, %xmm2 +; GFNISSE2-NEXT: psrlw $8, %xmm2 +; GFNISSE2-NEXT: psllw $8, %xmm0 +; GFNISSE2-NEXT: por %xmm2, %xmm0 +; GFNISSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] +; GFNISSE2-NEXT: gf2p8affineqb $0, %xmm2, %xmm0 +; GFNISSE2-NEXT: movdqa %xmm1, %xmm3 +; GFNISSE2-NEXT: psrlw $8, %xmm3 +; GFNISSE2-NEXT: psllw $8, %xmm1 +; GFNISSE2-NEXT: por %xmm3, %xmm1 +; GFNISSE2-NEXT: gf2p8affineqb $0, %xmm2, %xmm1 +; GFNISSE2-NEXT: retq +; +; GFNISSSE3-LABEL: test_bitreverse_v16i16: +; GFNISSSE3: # %bb.0: +; GFNISSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; GFNISSSE3-NEXT: pshufb %xmm2, %xmm0 +; GFNISSSE3-NEXT: movdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] +; GFNISSSE3-NEXT: gf2p8affineqb $0, %xmm3, %xmm0 +; GFNISSSE3-NEXT: pshufb %xmm2, %xmm1 +; GFNISSSE3-NEXT: gf2p8affineqb $0, %xmm3, %xmm1 +; GFNISSSE3-NEXT: retq ; ; GFNIAVX1-LABEL: test_bitreverse_v16i16: ; GFNIAVX1: # %bb.0: @@ -1241,15 +1296,39 @@ define <8 x i32> @test_bitreverse_v8i32(<8 x i32> %a) nounwind { ; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; -; GFNISSE-LABEL: test_bitreverse_v8i32: -; GFNISSE: # %bb.0: -; GFNISSE-NEXT: movdqa {{.*#+}} xmm2 = [3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] -; GFNISSE-NEXT: pshufb %xmm2, %xmm0 -; GFNISSE-NEXT: movdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] -; GFNISSE-NEXT: gf2p8affineqb $0, %xmm3, %xmm0 -; GFNISSE-NEXT: pshufb %xmm2, %xmm1 -; GFNISSE-NEXT: gf2p8affineqb $0, %xmm3, %xmm1 -; GFNISSE-NEXT: retq +; GFNISSE2-LABEL: test_bitreverse_v8i32: +; GFNISSE2: # %bb.0: +; GFNISSE2-NEXT: pxor %xmm2, %xmm2 +; GFNISSE2-NEXT: movdqa %xmm0, %xmm3 +; GFNISSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15] +; GFNISSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[3,2,1,0,4,5,6,7] +; GFNISSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,6,5,4] +; GFNISSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] +; GFNISSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] +; GFNISSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] +; GFNISSE2-NEXT: packuswb %xmm3, %xmm0 +; GFNISSE2-NEXT: movdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] +; GFNISSE2-NEXT: gf2p8affineqb $0, %xmm3, %xmm0 +; GFNISSE2-NEXT: movdqa %xmm1, %xmm4 +; GFNISSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm2[8],xmm4[9],xmm2[9],xmm4[10],xmm2[10],xmm4[11],xmm2[11],xmm4[12],xmm2[12],xmm4[13],xmm2[13],xmm4[14],xmm2[14],xmm4[15],xmm2[15] +; GFNISSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[3,2,1,0,4,5,6,7] +; GFNISSE2-NEXT: pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,7,6,5,4] +; GFNISSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; GFNISSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7] +; GFNISSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4] +; GFNISSE2-NEXT: packuswb %xmm4, %xmm1 +; GFNISSE2-NEXT: gf2p8affineqb $0, %xmm3, %xmm1 +; GFNISSE2-NEXT: retq +; +; GFNISSSE3-LABEL: test_bitreverse_v8i32: +; GFNISSSE3: # %bb.0: +; GFNISSSE3-NEXT: movdqa {{.*#+}} xmm2 = [3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; GFNISSSE3-NEXT: pshufb %xmm2, %xmm0 +; GFNISSSE3-NEXT: movdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] +; GFNISSSE3-NEXT: gf2p8affineqb $0, %xmm3, %xmm0 +; GFNISSSE3-NEXT: pshufb %xmm2, %xmm1 +; GFNISSSE3-NEXT: gf2p8affineqb $0, %xmm3, %xmm1 +; GFNISSSE3-NEXT: retq ; ; GFNIAVX1-LABEL: test_bitreverse_v8i32: ; GFNIAVX1: # %bb.0: @@ -1444,15 +1523,43 @@ define <4 x i64> @test_bitreverse_v4i64(<4 x i64> %a) nounwind { ; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; -; GFNISSE-LABEL: test_bitreverse_v4i64: -; GFNISSE: # %bb.0: -; GFNISSE-NEXT: movdqa {{.*#+}} xmm2 = [7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] -; GFNISSE-NEXT: pshufb %xmm2, %xmm0 -; GFNISSE-NEXT: movdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] -; GFNISSE-NEXT: gf2p8affineqb $0, %xmm3, %xmm0 -; GFNISSE-NEXT: pshufb %xmm2, %xmm1 -; GFNISSE-NEXT: gf2p8affineqb $0, %xmm3, %xmm1 -; GFNISSE-NEXT: retq +; GFNISSE2-LABEL: test_bitreverse_v4i64: +; GFNISSE2: # %bb.0: +; GFNISSE2-NEXT: pxor %xmm2, %xmm2 +; GFNISSE2-NEXT: movdqa %xmm0, %xmm3 +; GFNISSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15] +; GFNISSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1] +; GFNISSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[3,2,1,0,4,5,6,7] +; GFNISSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,6,5,4] +; GFNISSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] +; GFNISSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; GFNISSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] +; GFNISSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] +; GFNISSE2-NEXT: packuswb %xmm3, %xmm0 +; GFNISSE2-NEXT: movdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] +; GFNISSE2-NEXT: gf2p8affineqb $0, %xmm3, %xmm0 +; GFNISSE2-NEXT: movdqa %xmm1, %xmm4 +; GFNISSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm2[8],xmm4[9],xmm2[9],xmm4[10],xmm2[10],xmm4[11],xmm2[11],xmm4[12],xmm2[12],xmm4[13],xmm2[13],xmm4[14],xmm2[14],xmm4[15],xmm2[15] +; GFNISSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,0,1] +; GFNISSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[3,2,1,0,4,5,6,7] +; GFNISSE2-NEXT: pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,7,6,5,4] +; GFNISSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; GFNISSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; GFNISSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7] +; GFNISSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4] +; GFNISSE2-NEXT: packuswb %xmm4, %xmm1 +; GFNISSE2-NEXT: gf2p8affineqb $0, %xmm3, %xmm1 +; GFNISSE2-NEXT: retq +; +; GFNISSSE3-LABEL: test_bitreverse_v4i64: +; GFNISSSE3: # %bb.0: +; GFNISSSE3-NEXT: movdqa {{.*#+}} xmm2 = [7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; GFNISSSE3-NEXT: pshufb %xmm2, %xmm0 +; GFNISSSE3-NEXT: movdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] +; GFNISSSE3-NEXT: gf2p8affineqb $0, %xmm3, %xmm0 +; GFNISSSE3-NEXT: pshufb %xmm2, %xmm1 +; GFNISSSE3-NEXT: gf2p8affineqb $0, %xmm3, %xmm1 +; GFNISSSE3-NEXT: retq ; ; GFNIAVX1-LABEL: test_bitreverse_v4i64: ; GFNIAVX1: # %bb.0: @@ -2035,19 +2142,44 @@ define <32 x i16> @test_bitreverse_v32i16(<32 x i16> %a) nounwind { ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 ; XOPAVX2-NEXT: retq ; -; GFNISSE-LABEL: test_bitreverse_v32i16: -; GFNISSE: # %bb.0: -; GFNISSE-NEXT: movdqa {{.*#+}} xmm4 = [1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] -; GFNISSE-NEXT: pshufb %xmm4, %xmm0 -; GFNISSE-NEXT: movdqa {{.*#+}} xmm5 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] -; GFNISSE-NEXT: gf2p8affineqb $0, %xmm5, %xmm0 -; GFNISSE-NEXT: pshufb %xmm4, %xmm1 -; GFNISSE-NEXT: gf2p8affineqb $0, %xmm5, %xmm1 -; GFNISSE-NEXT: pshufb %xmm4, %xmm2 -; GFNISSE-NEXT: gf2p8affineqb $0, %xmm5, %xmm2 -; GFNISSE-NEXT: pshufb %xmm4, %xmm3 -; GFNISSE-NEXT: gf2p8affineqb $0, %xmm5, %xmm3 -; GFNISSE-NEXT: retq +; GFNISSE2-LABEL: test_bitreverse_v32i16: +; GFNISSE2: # %bb.0: +; GFNISSE2-NEXT: movdqa %xmm0, %xmm4 +; GFNISSE2-NEXT: psrlw $8, %xmm4 +; GFNISSE2-NEXT: psllw $8, %xmm0 +; GFNISSE2-NEXT: por %xmm4, %xmm0 +; GFNISSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] +; GFNISSE2-NEXT: gf2p8affineqb $0, %xmm4, %xmm0 +; GFNISSE2-NEXT: movdqa %xmm1, %xmm5 +; GFNISSE2-NEXT: psrlw $8, %xmm5 +; GFNISSE2-NEXT: psllw $8, %xmm1 +; GFNISSE2-NEXT: por %xmm5, %xmm1 +; GFNISSE2-NEXT: gf2p8affineqb $0, %xmm4, %xmm1 +; GFNISSE2-NEXT: movdqa %xmm2, %xmm5 +; GFNISSE2-NEXT: psrlw $8, %xmm5 +; GFNISSE2-NEXT: psllw $8, %xmm2 +; GFNISSE2-NEXT: por %xmm5, %xmm2 +; GFNISSE2-NEXT: gf2p8affineqb $0, %xmm4, %xmm2 +; GFNISSE2-NEXT: movdqa %xmm3, %xmm5 +; GFNISSE2-NEXT: psrlw $8, %xmm5 +; GFNISSE2-NEXT: psllw $8, %xmm3 +; GFNISSE2-NEXT: por %xmm5, %xmm3 +; GFNISSE2-NEXT: gf2p8affineqb $0, %xmm4, %xmm3 +; GFNISSE2-NEXT: retq +; +; GFNISSSE3-LABEL: test_bitreverse_v32i16: +; GFNISSSE3: # %bb.0: +; GFNISSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; GFNISSSE3-NEXT: pshufb %xmm4, %xmm0 +; GFNISSSE3-NEXT: movdqa {{.*#+}} xmm5 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] +; GFNISSSE3-NEXT: gf2p8affineqb $0, %xmm5, %xmm0 +; GFNISSSE3-NEXT: pshufb %xmm4, %xmm1 +; GFNISSSE3-NEXT: gf2p8affineqb $0, %xmm5, %xmm1 +; GFNISSSE3-NEXT: pshufb %xmm4, %xmm2 +; GFNISSSE3-NEXT: gf2p8affineqb $0, %xmm5, %xmm2 +; GFNISSSE3-NEXT: pshufb %xmm4, %xmm3 +; GFNISSSE3-NEXT: gf2p8affineqb $0, %xmm5, %xmm3 +; GFNISSSE3-NEXT: retq ; ; GFNIAVX1-LABEL: test_bitreverse_v32i16: ; GFNIAVX1: # %bb.0: @@ -2393,19 +2525,61 @@ define <16 x i32> @test_bitreverse_v16i32(<16 x i32> %a) nounwind { ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 ; XOPAVX2-NEXT: retq ; -; GFNISSE-LABEL: test_bitreverse_v16i32: -; GFNISSE: # %bb.0: -; GFNISSE-NEXT: movdqa {{.*#+}} xmm4 = [3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] -; GFNISSE-NEXT: pshufb %xmm4, %xmm0 -; GFNISSE-NEXT: movdqa {{.*#+}} xmm5 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] -; GFNISSE-NEXT: gf2p8affineqb $0, %xmm5, %xmm0 -; GFNISSE-NEXT: pshufb %xmm4, %xmm1 -; GFNISSE-NEXT: gf2p8affineqb $0, %xmm5, %xmm1 -; GFNISSE-NEXT: pshufb %xmm4, %xmm2 -; GFNISSE-NEXT: gf2p8affineqb $0, %xmm5, %xmm2 -; GFNISSE-NEXT: pshufb %xmm4, %xmm3 -; GFNISSE-NEXT: gf2p8affineqb $0, %xmm5, %xmm3 -; GFNISSE-NEXT: retq +; GFNISSE2-LABEL: test_bitreverse_v16i32: +; GFNISSE2: # %bb.0: +; GFNISSE2-NEXT: pxor %xmm4, %xmm4 +; GFNISSE2-NEXT: movdqa %xmm0, %xmm5 +; GFNISSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm4[8],xmm5[9],xmm4[9],xmm5[10],xmm4[10],xmm5[11],xmm4[11],xmm5[12],xmm4[12],xmm5[13],xmm4[13],xmm5[14],xmm4[14],xmm5[15],xmm4[15] +; GFNISSE2-NEXT: pshuflw {{.*#+}} xmm5 = xmm5[3,2,1,0,4,5,6,7] +; GFNISSE2-NEXT: pshufhw {{.*#+}} xmm5 = xmm5[0,1,2,3,7,6,5,4] +; GFNISSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] +; GFNISSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] +; GFNISSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] +; GFNISSE2-NEXT: packuswb %xmm5, %xmm0 +; GFNISSE2-NEXT: movdqa {{.*#+}} xmm5 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] +; GFNISSE2-NEXT: gf2p8affineqb $0, %xmm5, %xmm0 +; GFNISSE2-NEXT: movdqa %xmm1, %xmm6 +; GFNISSE2-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm4[8],xmm6[9],xmm4[9],xmm6[10],xmm4[10],xmm6[11],xmm4[11],xmm6[12],xmm4[12],xmm6[13],xmm4[13],xmm6[14],xmm4[14],xmm6[15],xmm4[15] +; GFNISSE2-NEXT: pshuflw {{.*#+}} xmm6 = xmm6[3,2,1,0,4,5,6,7] +; GFNISSE2-NEXT: pshufhw {{.*#+}} xmm6 = xmm6[0,1,2,3,7,6,5,4] +; GFNISSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] +; GFNISSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7] +; GFNISSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4] +; GFNISSE2-NEXT: packuswb %xmm6, %xmm1 +; GFNISSE2-NEXT: gf2p8affineqb $0, %xmm5, %xmm1 +; GFNISSE2-NEXT: movdqa %xmm2, %xmm6 +; GFNISSE2-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm4[8],xmm6[9],xmm4[9],xmm6[10],xmm4[10],xmm6[11],xmm4[11],xmm6[12],xmm4[12],xmm6[13],xmm4[13],xmm6[14],xmm4[14],xmm6[15],xmm4[15] +; GFNISSE2-NEXT: pshuflw {{.*#+}} xmm6 = xmm6[3,2,1,0,4,5,6,7] +; GFNISSE2-NEXT: pshufhw {{.*#+}} xmm6 = xmm6[0,1,2,3,7,6,5,4] +; GFNISSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] +; GFNISSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7] +; GFNISSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4] +; GFNISSE2-NEXT: packuswb %xmm6, %xmm2 +; GFNISSE2-NEXT: gf2p8affineqb $0, %xmm5, %xmm2 +; GFNISSE2-NEXT: movdqa %xmm3, %xmm6 +; GFNISSE2-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm4[8],xmm6[9],xmm4[9],xmm6[10],xmm4[10],xmm6[11],xmm4[11],xmm6[12],xmm4[12],xmm6[13],xmm4[13],xmm6[14],xmm4[14],xmm6[15],xmm4[15] +; GFNISSE2-NEXT: pshuflw {{.*#+}} xmm6 = xmm6[3,2,1,0,4,5,6,7] +; GFNISSE2-NEXT: pshufhw {{.*#+}} xmm6 = xmm6[0,1,2,3,7,6,5,4] +; GFNISSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] +; GFNISSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[3,2,1,0,4,5,6,7] +; GFNISSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,6,5,4] +; GFNISSE2-NEXT: packuswb %xmm6, %xmm3 +; GFNISSE2-NEXT: gf2p8affineqb $0, %xmm5, %xmm3 +; GFNISSE2-NEXT: retq +; +; GFNISSSE3-LABEL: test_bitreverse_v16i32: +; GFNISSSE3: # %bb.0: +; GFNISSSE3-NEXT: movdqa {{.*#+}} xmm4 = [3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; GFNISSSE3-NEXT: pshufb %xmm4, %xmm0 +; GFNISSSE3-NEXT: movdqa {{.*#+}} xmm5 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] +; GFNISSSE3-NEXT: gf2p8affineqb $0, %xmm5, %xmm0 +; GFNISSSE3-NEXT: pshufb %xmm4, %xmm1 +; GFNISSSE3-NEXT: gf2p8affineqb $0, %xmm5, %xmm1 +; GFNISSSE3-NEXT: pshufb %xmm4, %xmm2 +; GFNISSSE3-NEXT: gf2p8affineqb $0, %xmm5, %xmm2 +; GFNISSSE3-NEXT: pshufb %xmm4, %xmm3 +; GFNISSSE3-NEXT: gf2p8affineqb $0, %xmm5, %xmm3 +; GFNISSSE3-NEXT: retq ; ; GFNIAVX1-LABEL: test_bitreverse_v16i32: ; GFNIAVX1: # %bb.0: @@ -2759,19 +2933,69 @@ define <8 x i64> @test_bitreverse_v8i64(<8 x i64> %a) nounwind { ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 ; XOPAVX2-NEXT: retq ; -; GFNISSE-LABEL: test_bitreverse_v8i64: -; GFNISSE: # %bb.0: -; GFNISSE-NEXT: movdqa {{.*#+}} xmm4 = [7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] -; GFNISSE-NEXT: pshufb %xmm4, %xmm0 -; GFNISSE-NEXT: movdqa {{.*#+}} xmm5 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] -; GFNISSE-NEXT: gf2p8affineqb $0, %xmm5, %xmm0 -; GFNISSE-NEXT: pshufb %xmm4, %xmm1 -; GFNISSE-NEXT: gf2p8affineqb $0, %xmm5, %xmm1 -; GFNISSE-NEXT: pshufb %xmm4, %xmm2 -; GFNISSE-NEXT: gf2p8affineqb $0, %xmm5, %xmm2 -; GFNISSE-NEXT: pshufb %xmm4, %xmm3 -; GFNISSE-NEXT: gf2p8affineqb $0, %xmm5, %xmm3 -; GFNISSE-NEXT: retq +; GFNISSE2-LABEL: test_bitreverse_v8i64: +; GFNISSE2: # %bb.0: +; GFNISSE2-NEXT: pxor %xmm4, %xmm4 +; GFNISSE2-NEXT: movdqa %xmm0, %xmm5 +; GFNISSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm4[8],xmm5[9],xmm4[9],xmm5[10],xmm4[10],xmm5[11],xmm4[11],xmm5[12],xmm4[12],xmm5[13],xmm4[13],xmm5[14],xmm4[14],xmm5[15],xmm4[15] +; GFNISSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,0,1] +; GFNISSE2-NEXT: pshuflw {{.*#+}} xmm5 = xmm5[3,2,1,0,4,5,6,7] +; GFNISSE2-NEXT: pshufhw {{.*#+}} xmm5 = xmm5[0,1,2,3,7,6,5,4] +; GFNISSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] +; GFNISSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; GFNISSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] +; GFNISSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] +; GFNISSE2-NEXT: packuswb %xmm5, %xmm0 +; GFNISSE2-NEXT: movdqa {{.*#+}} xmm5 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] +; GFNISSE2-NEXT: gf2p8affineqb $0, %xmm5, %xmm0 +; GFNISSE2-NEXT: movdqa %xmm1, %xmm6 +; GFNISSE2-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm4[8],xmm6[9],xmm4[9],xmm6[10],xmm4[10],xmm6[11],xmm4[11],xmm6[12],xmm4[12],xmm6[13],xmm4[13],xmm6[14],xmm4[14],xmm6[15],xmm4[15] +; GFNISSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[2,3,0,1] +; GFNISSE2-NEXT: pshuflw {{.*#+}} xmm6 = xmm6[3,2,1,0,4,5,6,7] +; GFNISSE2-NEXT: pshufhw {{.*#+}} xmm6 = xmm6[0,1,2,3,7,6,5,4] +; GFNISSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] +; GFNISSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; GFNISSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7] +; GFNISSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4] +; GFNISSE2-NEXT: packuswb %xmm6, %xmm1 +; GFNISSE2-NEXT: gf2p8affineqb $0, %xmm5, %xmm1 +; GFNISSE2-NEXT: movdqa %xmm2, %xmm6 +; GFNISSE2-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm4[8],xmm6[9],xmm4[9],xmm6[10],xmm4[10],xmm6[11],xmm4[11],xmm6[12],xmm4[12],xmm6[13],xmm4[13],xmm6[14],xmm4[14],xmm6[15],xmm4[15] +; GFNISSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[2,3,0,1] +; GFNISSE2-NEXT: pshuflw {{.*#+}} xmm6 = xmm6[3,2,1,0,4,5,6,7] +; GFNISSE2-NEXT: pshufhw {{.*#+}} xmm6 = xmm6[0,1,2,3,7,6,5,4] +; GFNISSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] +; GFNISSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] +; GFNISSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7] +; GFNISSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4] +; GFNISSE2-NEXT: packuswb %xmm6, %xmm2 +; GFNISSE2-NEXT: gf2p8affineqb $0, %xmm5, %xmm2 +; GFNISSE2-NEXT: movdqa %xmm3, %xmm6 +; GFNISSE2-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm4[8],xmm6[9],xmm4[9],xmm6[10],xmm4[10],xmm6[11],xmm4[11],xmm6[12],xmm4[12],xmm6[13],xmm4[13],xmm6[14],xmm4[14],xmm6[15],xmm4[15] +; GFNISSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[2,3,0,1] +; GFNISSE2-NEXT: pshuflw {{.*#+}} xmm6 = xmm6[3,2,1,0,4,5,6,7] +; GFNISSE2-NEXT: pshufhw {{.*#+}} xmm6 = xmm6[0,1,2,3,7,6,5,4] +; GFNISSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] +; GFNISSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1] +; GFNISSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[3,2,1,0,4,5,6,7] +; GFNISSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,6,5,4] +; GFNISSE2-NEXT: packuswb %xmm6, %xmm3 +; GFNISSE2-NEXT: gf2p8affineqb $0, %xmm5, %xmm3 +; GFNISSE2-NEXT: retq +; +; GFNISSSE3-LABEL: test_bitreverse_v8i64: +; GFNISSSE3: # %bb.0: +; GFNISSSE3-NEXT: movdqa {{.*#+}} xmm4 = [7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; GFNISSSE3-NEXT: pshufb %xmm4, %xmm0 +; GFNISSSE3-NEXT: movdqa {{.*#+}} xmm5 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] +; GFNISSSE3-NEXT: gf2p8affineqb $0, %xmm5, %xmm0 +; GFNISSSE3-NEXT: pshufb %xmm4, %xmm1 +; GFNISSSE3-NEXT: gf2p8affineqb $0, %xmm5, %xmm1 +; GFNISSSE3-NEXT: pshufb %xmm4, %xmm2 +; GFNISSSE3-NEXT: gf2p8affineqb $0, %xmm5, %xmm2 +; GFNISSSE3-NEXT: pshufb %xmm4, %xmm3 +; GFNISSSE3-NEXT: gf2p8affineqb $0, %xmm5, %xmm3 +; GFNISSSE3-NEXT: retq ; ; GFNIAVX1-LABEL: test_bitreverse_v8i64: ; GFNIAVX1: # %bb.0: diff --git a/llvm/test/CodeGen/X86/wide-integer-cmp.ll b/llvm/test/CodeGen/X86/wide-integer-cmp.ll index a15d633..12dccca 100644 --- a/llvm/test/CodeGen/X86/wide-integer-cmp.ll +++ b/llvm/test/CodeGen/X86/wide-integer-cmp.ll @@ -92,6 +92,8 @@ define i32 @test_wide(i128 %a, i128 %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushl %esi ; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: subl $8, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset %esi, -8 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx @@ -101,15 +103,15 @@ define i32 @test_wide(i128 %a, i128 %b) { ; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %esi ; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: jge .LBB4_2 +; CHECK-NEXT: jge .LBB4_3 ; CHECK-NEXT: # %bb.1: # %bb1 ; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: popl %esi -; CHECK-NEXT: .cfi_def_cfa_offset 4 -; CHECK-NEXT: retl -; CHECK-NEXT: .LBB4_2: # %bb2 -; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: jmp .LBB4_2 +; CHECK-NEXT: .LBB4_3: # %bb2 ; CHECK-NEXT: movl $2, %eax +; CHECK-NEXT: .LBB4_2: # %bb1 +; CHECK-NEXT: addl $8, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: popl %esi ; CHECK-NEXT: .cfi_def_cfa_offset 4 ; CHECK-NEXT: retl diff --git a/llvm/test/CodeGen/X86/win-catchpad-nested-cxx.ll b/llvm/test/CodeGen/X86/win-catchpad-nested-cxx.ll index bfb9c43..0bf8370 100644 --- a/llvm/test/CodeGen/X86/win-catchpad-nested-cxx.ll +++ b/llvm/test/CodeGen/X86/win-catchpad-nested-cxx.ll @@ -103,15 +103,15 @@ handler2: ; X64: $ip2state$try_in_catch: ; X64-NEXT: .long .Lfunc_begin0@IMGREL ; X64-NEXT: .long -1 -; X64-NEXT: .long .Ltmp0@IMGREL+1 +; X64-NEXT: .long .Ltmp0@IMGREL ; X64-NEXT: .long 0 -; X64-NEXT: .long .Ltmp1@IMGREL+1 +; X64-NEXT: .long .Ltmp1@IMGREL ; X64-NEXT: .long -1 ; X64-NEXT: .long "?catch$2@?0?try_in_catch@4HA"@IMGREL ; X64-NEXT: .long 1 -; X64-NEXT: .long .Ltmp2@IMGREL+1 +; X64-NEXT: .long .Ltmp2@IMGREL ; X64-NEXT: .long 2 -; X64-NEXT: .long .Ltmp3@IMGREL+1 +; X64-NEXT: .long .Ltmp3@IMGREL ; X64-NEXT: .long 1 ; X64-NEXT: .long "?catch$4@?0?try_in_catch@4HA"@IMGREL ; X64-NEXT: .long 3 diff --git a/llvm/test/CodeGen/X86/win-catchpad.ll b/llvm/test/CodeGen/X86/win-catchpad.ll index 2491946..62ea510 100644 --- a/llvm/test/CodeGen/X86/win-catchpad.ll +++ b/llvm/test/CodeGen/X86/win-catchpad.ll @@ -214,9 +214,9 @@ try.cont: ; X64: $ip2state$try_catch_catch: ; X64-NEXT: .long .Lfunc_begin0@IMGREL ; X64-NEXT: .long -1 -; X64-NEXT: .long .Ltmp0@IMGREL+1 +; X64-NEXT: .long .Ltmp0@IMGREL ; X64-NEXT: .long 0 -; X64-NEXT: .long .Ltmp1@IMGREL+1 +; X64-NEXT: .long .Ltmp1@IMGREL ; X64-NEXT: .long -1 ; X64-NEXT: .long "?catch$[[catch1bb]]@?0?try_catch_catch@4HA"@IMGREL ; X64-NEXT: .long 1 @@ -357,9 +357,9 @@ try.cont: ; X64-LABEL: $ip2state$branch_to_normal_dest: ; X64-NEXT: .long .Lfunc_begin1@IMGREL ; X64-NEXT: .long -1 -; X64-NEXT: .long .Ltmp[[before_call]]@IMGREL+1 +; X64-NEXT: .long .Ltmp[[before_call]]@IMGREL ; X64-NEXT: .long 0 -; X64-NEXT: .long .Ltmp[[after_call]]@IMGREL+1 +; X64-NEXT: .long .Ltmp[[after_call]]@IMGREL ; X64-NEXT: .long -1 ; X64-NEXT: .long "?catch$[[catchbb]]@?0?branch_to_normal_dest@4HA"@IMGREL ; X64-NEXT: .long 1 diff --git a/llvm/test/CodeGen/X86/win-cleanuppad.ll b/llvm/test/CodeGen/X86/win-cleanuppad.ll index e3f7f5b..e9265a1 100644 --- a/llvm/test/CodeGen/X86/win-cleanuppad.ll +++ b/llvm/test/CodeGen/X86/win-cleanuppad.ll @@ -191,7 +191,7 @@ cleanup.outer: ; preds = %invoke.cont.1, %c ; X64-NEXT: .long 1 ; X64-NEXT: .long .Ltmp6@IMGREL ; X64-NEXT: .long 0 -; X64-NEXT: .long .Ltmp7@IMGREL+1 +; X64-NEXT: .long .Ltmp7@IMGREL ; X64-NEXT: .long -1 attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/X86/win32-eh-states.ll b/llvm/test/CodeGen/X86/win32-eh-states.ll index 42ae5b0..e645199 100644 --- a/llvm/test/CodeGen/X86/win32-eh-states.ll +++ b/llvm/test/CodeGen/X86/win32-eh-states.ll @@ -86,11 +86,11 @@ catch.7: ; X64-LABEL: $ip2state$f: ; X64-NEXT: .long .Lfunc_begin0@IMGREL ; X64-NEXT: .long -1 -; X64-NEXT: .long .Ltmp{{.*}}@IMGREL+1 +; X64-NEXT: .long .Ltmp{{.*}}@IMGREL ; X64-NEXT: .long 0 -; X64-NEXT: .long .Ltmp{{.*}}@IMGREL+1 +; X64-NEXT: .long .Ltmp{{.*}}@IMGREL ; X64-NEXT: .long 1 -; X64-NEXT: .long .Ltmp{{.*}}@IMGREL+1 +; X64-NEXT: .long .Ltmp{{.*}}@IMGREL ; X64-NEXT: .long -1 ; X64-NEXT: .long "?catch${{.*}}@?0?f@4HA"@IMGREL ; X64-NEXT: .long 2 @@ -189,15 +189,15 @@ unreachable: ; preds = %entry ; X64-LABEL: $ip2state$g: ; X64-NEXT: .long .Lfunc_begin1@IMGREL ; X64-NEXT: .long -1 -; X64-NEXT: .long .Ltmp{{.*}}@IMGREL+1 +; X64-NEXT: .long .Ltmp{{.*}}@IMGREL ; X64-NEXT: .long 1 -; X64-NEXT: .long .Ltmp{{.*}}@IMGREL+1 +; X64-NEXT: .long .Ltmp{{.*}}@IMGREL ; X64-NEXT: .long -1 ; X64-NEXT: .long "?catch${{.*}}@?0?g@4HA"@IMGREL ; X64-NEXT: .long 2 -; X64-NEXT: .long .Ltmp{{.*}}@IMGREL+1 +; X64-NEXT: .long .Ltmp{{.*}}@IMGREL ; X64-NEXT: .long 3 -; X64-NEXT: .long .Ltmp{{.*}}@IMGREL+1 +; X64-NEXT: .long .Ltmp{{.*}}@IMGREL ; X64-NEXT: .long 2 diff --git a/llvm/test/CodeGen/X86/win32-int-runtime-libcalls.ll b/llvm/test/CodeGen/X86/win32-int-runtime-libcalls.ll new file mode 100644 index 0000000..5ac90a0 --- /dev/null +++ b/llvm/test/CodeGen/X86/win32-int-runtime-libcalls.ll @@ -0,0 +1,113 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=i686-windows-msvc < %s | FileCheck -check-prefix=CHECK32 %s +; RUN: llc -mtriple=x86_64-windows-msvc < %s | FileCheck -check-prefix=CHECK64 %s + +define i64 @test_sdiv_i64(i64 %a, i64 %b) { +; CHECK32-LABEL: test_sdiv_i64: +; CHECK32: # %bb.0: +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: calll __alldiv +; CHECK32-NEXT: retl +; +; CHECK64-LABEL: test_sdiv_i64: +; CHECK64: # %bb.0: +; CHECK64-NEXT: movq %rdx, %r8 +; CHECK64-NEXT: movq %rcx, %rax +; CHECK64-NEXT: cqto +; CHECK64-NEXT: idivq %r8 +; CHECK64-NEXT: retq + %ret = sdiv i64 %a, %b + ret i64 %ret +} + +define i64 @test_srem_i64(i64 %a, i64 %b) { +; CHECK32-LABEL: test_srem_i64: +; CHECK32: # %bb.0: +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: calll __allrem +; CHECK32-NEXT: retl +; +; CHECK64-LABEL: test_srem_i64: +; CHECK64: # %bb.0: +; CHECK64-NEXT: movq %rdx, %r8 +; CHECK64-NEXT: movq %rcx, %rax +; CHECK64-NEXT: cqto +; CHECK64-NEXT: idivq %r8 +; CHECK64-NEXT: movq %rdx, %rax +; CHECK64-NEXT: retq + %ret = srem i64 %a, %b + ret i64 %ret +} + +define i64 @test_udiv_i64(i64 %a, i64 %b) { +; CHECK32-LABEL: test_udiv_i64: +; CHECK32: # %bb.0: +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: calll __aulldiv +; CHECK32-NEXT: retl +; +; CHECK64-LABEL: test_udiv_i64: +; CHECK64: # %bb.0: +; CHECK64-NEXT: movq %rdx, %r8 +; CHECK64-NEXT: movq %rcx, %rax +; CHECK64-NEXT: xorl %edx, %edx +; CHECK64-NEXT: divq %r8 +; CHECK64-NEXT: retq + %ret = udiv i64 %a, %b + ret i64 %ret +} + +define i64 @test_urem_i64(i64 %a, i64 %b) { +; CHECK32-LABEL: test_urem_i64: +; CHECK32: # %bb.0: +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: calll __aullrem +; CHECK32-NEXT: retl +; +; CHECK64-LABEL: test_urem_i64: +; CHECK64: # %bb.0: +; CHECK64-NEXT: movq %rdx, %r8 +; CHECK64-NEXT: movq %rcx, %rax +; CHECK64-NEXT: xorl %edx, %edx +; CHECK64-NEXT: divq %r8 +; CHECK64-NEXT: movq %rdx, %rax +; CHECK64-NEXT: retq + %ret = urem i64 %a, %b + ret i64 %ret +} + +define i64 @test_mul_i64(i64 %a, i64 %b) { +; CHECK32-LABEL: test_mul_i64: +; CHECK32: # %bb.0: +; CHECK32-NEXT: pushl %esi +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK32-NEXT: movl %ecx, %eax +; CHECK32-NEXT: mull %esi +; CHECK32-NEXT: imull {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: addl %ecx, %edx +; CHECK32-NEXT: imull {{[0-9]+}}(%esp), %esi +; CHECK32-NEXT: addl %esi, %edx +; CHECK32-NEXT: popl %esi +; CHECK32-NEXT: retl +; +; CHECK64-LABEL: test_mul_i64: +; CHECK64: # %bb.0: +; CHECK64-NEXT: movq %rcx, %rax +; CHECK64-NEXT: imulq %rdx, %rax +; CHECK64-NEXT: retq + %ret = mul i64 %a, %b + ret i64 %ret +} diff --git a/llvm/test/CodeGen/X86/win32-ssp.ll b/llvm/test/CodeGen/X86/win32-ssp.ll index 536a6d5..259f039 100644 --- a/llvm/test/CodeGen/X86/win32-ssp.ll +++ b/llvm/test/CodeGen/X86/win32-ssp.ll @@ -1,7 +1,9 @@ ; RUN: llc -mtriple=x86_64-w64-mingw32 < %s -o - | FileCheck --check-prefix=MINGW %s +; RUN: llc -mtriple=x86_64-pc-cygwin < %s -o - | FileCheck --check-prefix=MINGW %s ; RUN: llc -mtriple=x86_64-pc-windows-itanium < %s -o - | FileCheck --check-prefix=MSVC %s ; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s -o - | FileCheck --check-prefix=MSVC %s ; RUN: llc -mtriple=i686-w64-mingw32 < %s -o - | FileCheck --check-prefix=MINGW %s +; RUN: llc -mtriple=i686-pc-cygwin < %s -o - | FileCheck --check-prefix=MINGW %s declare void @llvm.lifetime.start.p0(i64, ptr nocapture) declare dso_local void @other(ptr) diff --git a/llvm/test/CodeGen/X86/win64-seh-epilogue-statepoint.ll b/llvm/test/CodeGen/X86/win64-seh-epilogue-statepoint.ll index bc5be7a..75f156f 100644 --- a/llvm/test/CodeGen/X86/win64-seh-epilogue-statepoint.ll +++ b/llvm/test/CodeGen/X86/win64-seh-epilogue-statepoint.ll @@ -8,8 +8,8 @@ define i32 @foobar() gc "statepoint-example" personality ptr @__gxx_personality_ ; CHECK-NEXT: .seh_stackalloc 40 ; CHECK-NEXT: .seh_endprologue ; CHECK-NEXT: callq bar -; CHECK-NEXT: .Ltmp0: ; CHECK-NEXT: nop +; CHECK-NEXT: .Ltmp0: ; CHECK-NEXT: .seh_startepilogue ; CHECK-NEXT: addq $40, %rsp ; CHECK-NEXT: .seh_endepilogue diff --git a/llvm/test/CodeGen/X86/wineh-coreclr.ll b/llvm/test/CodeGen/X86/wineh-coreclr.ll index baf5eaa..a3d0fde 100644 --- a/llvm/test/CodeGen/X86/wineh-coreclr.ll +++ b/llvm/test/CodeGen/X86/wineh-coreclr.ll @@ -38,6 +38,7 @@ entry: ; CHECK: [[test1_before_f1:.+]]: ; CHECK-NEXT: movl $1, %ecx ; CHECK-NEXT: callq f +; CHECK-NEXT: nop ; CHECK-NEXT: [[test1_after_f1:.+]]: invoke void @f(i32 1) to label %inner_try unwind label %finally @@ -46,6 +47,7 @@ inner_try: ; CHECK: [[test1_before_f2:.+]]: ; CHECK-NEXT: movl $2, %ecx ; CHECK-NEXT: callq f +; CHECK-NEXT: nop ; CHECK-NEXT: [[test1_after_f2:.+]]: invoke void @f(i32 2) to label %finally.clone unwind label %exn.dispatch @@ -69,6 +71,7 @@ catch1: ; CHECK: [[test1_before_f3:.+]]: ; CHECK-NEXT: movl $3, %ecx ; CHECK-NEXT: callq f +; CHECK-NEXT: nop ; CHECK-NEXT: [[test1_after_f3:.+]]: invoke void @f(i32 3) [ "funclet"(token %catch.pad1) ] to label %catch1.ret unwind label %finally @@ -92,6 +95,7 @@ catch2: ; CHECK: [[test1_before_f4:.+]]: ; CHECK-NEXT: movl $4, %ecx ; CHECK-NEXT: callq f +; CHECK-NEXT: nop ; CHECK-NEXT: [[test1_after_f4:.+]]: invoke void @f(i32 4) [ "funclet"(token %catch.pad2) ] to label %try_in_catch unwind label %finally @@ -100,6 +104,7 @@ try_in_catch: ; CHECK: [[test1_before_f5:.+]]: ; CHECK-NEXT: movl $5, %ecx ; CHECK-NEXT: callq f +; CHECK-NEXT: nop ; CHECK-NEXT: [[test1_after_f5:.+]]: invoke void @f(i32 5) [ "funclet"(token %catch.pad2) ] to label %catch2.ret unwind label %fault @@ -116,6 +121,7 @@ fault: ; CHECK: [[test1_before_f6:.+]]: ; CHECK-NEXT: movl $6, %ecx ; CHECK-NEXT: callq f +; CHECK-NEXT: nop ; CHECK-NEXT: [[test1_after_f6:.+]]: invoke void @f(i32 6) [ "funclet"(token %fault.pad) ] to label %fault.ret unwind label %finally @@ -312,6 +318,7 @@ unreachable: ; CHECK: [[test2_before_f1:.+]]: ; CHECK-NEXT: movl $1, %ecx ; CHECK-NEXT: callq f +; CHECK-NEXT: nop ; CHECK-NEXT: [[test2_after_f1:.+]]: ; CHECK: .seh_proc [[test2_catch1:[^ ]+]] ; CHECK: .seh_proc [[test2_catch2:[^ ]+]] @@ -320,6 +327,7 @@ unreachable: ; CHECK: [[test2_before_f2:.+]]: ; CHECK-NEXT: movl $2, %ecx ; CHECK-NEXT: callq f +; CHECK-NEXT: nop ; CHECK-NEXT: [[test2_after_f2:.+]]: ; CHECK: int3 ; CHECK: [[test2_end:.*func_end.*]]: @@ -448,6 +456,7 @@ entry: ; CHECK: [[test3_before_f1:.+]]: ; CHECK-NEXT: movl $1, %ecx ; CHECK-NEXT: callq f +; CHECK-NEXT: nop ; CHECK-NEXT: [[test3_after_f1:.+]]: invoke void @f(i32 1) to label %exit unwind label %fault1 @@ -474,6 +483,7 @@ fault4: ; CHECK: [[test3_before_f6:.+]]: ; CHECK-NEXT: movl $6, %ecx ; CHECK-NEXT: callq f +; CHECK-NEXT: nop ; CHECK-NEXT: [[test3_after_f6:.+]]: invoke void @f(i32 6) ["funclet"(token %fault.pad4)] to label %fault4.cont unwind label %exn.dispatch1 @@ -482,6 +492,7 @@ fault4.cont: ; CHECK: [[test3_before_f7:.+]]: ; CHECK-NEXT: movl $7, %ecx ; CHECK-NEXT: callq f +; CHECK-NEXT: nop ; CHECK-NEXT: [[test3_after_f7:.+]]: invoke void @f(i32 7) ["funclet"(token %fault.pad4)] to label %unreachable unwind label %fault5 @@ -512,6 +523,7 @@ unreachable: ; CHECK: [[test3_before_f4:.+]]: ; CHECK-NEXT: movl $4, %ecx ; CHECK-NEXT: callq f +; CHECK-NEXT: nop ; CHECK-NEXT: [[test3_after_f4:.+]]: ; CHECK: int3 ; CHECK: .seh_proc [[test3_fault2:[^ ]+]] @@ -520,6 +532,7 @@ unreachable: ; CHECK: [[test3_before_f3:.+]]: ; CHECK-NEXT: movl $3, %ecx ; CHECK-NEXT: callq f +; CHECK-NEXT: nop ; CHECK-NEXT: [[test3_after_f3:.+]]: ; CHECK: int3 ; CHECK: .seh_proc [[test3_fault1:[^ ]+]] @@ -528,6 +541,7 @@ unreachable: ; CHECK: [[test3_before_f2:.+]]: ; CHECK-NEXT: movl $2, %ecx ; CHECK-NEXT: callq f +; CHECK-NEXT: nop ; CHECK-NEXT: [[test3_after_f2:.+]]: ; CHECK: int3 ; CHECK: [[test3_end:.*func_end.*]]: |