diff options
Diffstat (limited to 'llvm/test/CodeGen/X86')
27 files changed, 1563 insertions, 70 deletions
diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-memop-scalar-32.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-memop-scalar-32.mir index ba72c4f..bbb09c6 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-memop-scalar-32.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-memop-scalar-32.mir @@ -10,18 +10,18 @@ body: | bb.0: ; X32-LABEL: name: test_memop_s8tos32 ; X32: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF - ; X32: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p0) :: (load (s1)) - ; X32: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p0) :: (load (s8)) - ; X32: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p0) :: (load (s16)) - ; X32: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p0) :: (load (s32)) - ; X32: [[LOAD4:%[0-9]+]]:_(p0) = G_LOAD [[DEF]](p0) :: (load (p0)) - ; X32: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 - ; X32: [[AND:%[0-9]+]]:_(s8) = G_AND [[LOAD]], [[C]] - ; X32: G_STORE [[AND]](s8), [[DEF]](p0) :: (store (s8)) - ; X32: G_STORE [[LOAD1]](s8), [[DEF]](p0) :: (store (s8)) - ; X32: G_STORE [[LOAD2]](s16), [[DEF]](p0) :: (store (s16)) - ; X32: G_STORE [[LOAD3]](s32), [[DEF]](p0) :: (store (s32)) - ; X32: G_STORE [[LOAD4]](p0), [[DEF]](p0) :: (store (p0)) + ; X32-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p0) :: (load (s1)) + ; X32-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p0) :: (load (s8)) + ; X32-NEXT: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p0) :: (load (s16)) + ; X32-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p0) :: (load (s32)) + ; X32-NEXT: [[LOAD4:%[0-9]+]]:_(p0) = G_LOAD [[DEF]](p0) :: (load (p0)) + ; X32-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 + ; X32-NEXT: [[AND:%[0-9]+]]:_(s8) = G_AND [[LOAD]], [[C]] + ; X32-NEXT: G_STORE [[AND]](s8), [[DEF]](p0) :: (store (s8)) + ; X32-NEXT: G_STORE [[LOAD1]](s8), [[DEF]](p0) :: (store (s8)) + ; X32-NEXT: G_STORE [[LOAD2]](s16), [[DEF]](p0) :: (store (s16)) + ; X32-NEXT: G_STORE [[LOAD3]](s32), [[DEF]](p0) :: (store (s32)) + ; X32-NEXT: G_STORE [[LOAD4]](p0), [[DEF]](p0) :: (store (p0)) %0:_(p0) = IMPLICIT_DEF %9:_(s1) = G_LOAD %0 :: (load (s1)) %1:_(s8) = G_LOAD %0 :: (load (s8)) @@ -46,13 +46,13 @@ body: | ; X32-LABEL: name: test_memop_s64 ; X32: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF - ; X32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p0) :: (load (s32), align 8) - ; X32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; X32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s32) - ; X32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) - ; X32: G_STORE [[LOAD]](s32), [[DEF]](p0) :: (store (s32), align 8) - ; X32: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s32) - ; X32: G_STORE [[LOAD1]](s32), [[PTR_ADD1]](p0) :: (store (s32) into unknown-address + 4) + ; X32-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p0) :: (load (s32), align 8) + ; X32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; X32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[DEF]], [[C]](s32) + ; X32-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) + ; X32-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p0) :: (store (s32), align 8) + ; X32-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[DEF]], [[C]](s32) + ; X32-NEXT: G_STORE [[LOAD1]](s32), [[PTR_ADD1]](p0) :: (store (s32) into unknown-address + 4) %0:_(p0) = IMPLICIT_DEF %1:_(s64) = G_LOAD %0 :: (load (s64)) diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-undef.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-undef.mir index 8711d84..b16fe3e 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-undef.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-undef.mir @@ -21,6 +21,7 @@ body: | ; X64-NEXT: G_STORE [[DEF3]](s32), [[DEF]](p0) :: (store (s32)) ; X64-NEXT: [[DEF4:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; X64-NEXT: G_STORE [[DEF4]](s64), [[DEF]](p0) :: (store (s64)) + ; ; X32-LABEL: name: test_implicit_def ; X32: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF ; X32-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 @@ -35,7 +36,7 @@ body: | ; X32-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF4]](s64) ; X32-NEXT: G_STORE [[UV]](s32), [[DEF]](p0) :: (store (s32), align 8) ; X32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; X32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C1]](s32) + ; X32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[DEF]], [[C1]](s32) ; X32-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD]](p0) :: (store (s32) into unknown-address + 4) %5:_(p0) = G_IMPLICIT_DEF %0:_(s1) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-x87.ll b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-x87.ll index 99d458a..83c319b 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-x87.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-x87.ll @@ -164,12 +164,12 @@ define void @f5(ptr %a, ptr %b) { ; X86-NEXT: [[LOAD1:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (p0) from %fixed-stack.0) ; X86-NEXT: [[LOAD2:%[0-9]+]]:gpr(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.a, align 8) ; X86-NEXT: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 4 - ; X86-NEXT: [[PTR_ADD:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[LOAD]], [[C]](s32) + ; X86-NEXT: [[PTR_ADD:%[0-9]+]]:gpr(p0) = nuw inbounds G_PTR_ADD [[LOAD]], [[C]](s32) ; X86-NEXT: [[COPY:%[0-9]+]]:gpr(p0) = COPY [[PTR_ADD]](p0) ; X86-NEXT: [[LOAD3:%[0-9]+]]:gpr(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.a + 4, basealign 8) ; X86-NEXT: [[MV:%[0-9]+]]:gpr(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; X86-NEXT: [[LOAD4:%[0-9]+]]:gpr(s32) = G_LOAD [[LOAD1]](p0) :: (load (s32) from %ir.b, align 8) - ; X86-NEXT: [[PTR_ADD1:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[LOAD1]], [[C]](s32) + ; X86-NEXT: [[PTR_ADD1:%[0-9]+]]:gpr(p0) = nuw inbounds G_PTR_ADD [[LOAD1]], [[C]](s32) ; X86-NEXT: [[LOAD5:%[0-9]+]]:gpr(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from %ir.b + 4, basealign 8) ; X86-NEXT: [[MV1:%[0-9]+]]:gpr(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) ; X86-NEXT: [[COPY1:%[0-9]+]]:psr(s64) = COPY [[MV]](s64) diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll b/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll index 171ccb2..2f1f8bc 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll @@ -77,12 +77,12 @@ define { double, double } @test_return_d2(double %d.coerce0, double %d.coerce1) ; ALL-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.d ; ALL-NEXT: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.1) ; ALL-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; ALL-NEXT: %5:_(p0) = nuw nusw G_PTR_ADD [[FRAME_INDEX1]], [[C1]](s64) - ; ALL-NEXT: G_STORE [[COPY1]](s64), %5(p0) :: (store (s64) into %ir.2) + ; ALL-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[FRAME_INDEX1]], [[C1]](s64) + ; ALL-NEXT: G_STORE [[COPY1]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.2) ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.3, align 8), (load (s8) from %ir.4, align 8) ; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.5) - ; ALL-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64) - ; ALL-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s64) from %ir.5 + 8) + ; ALL-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64) + ; ALL-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (dereferenceable load (s64) from %ir.5 + 8) ; ALL-NEXT: $xmm0 = COPY [[LOAD]](s64) ; ALL-NEXT: $xmm1 = COPY [[LOAD1]](s64) ; ALL-NEXT: RET 0, implicit $xmm0, implicit $xmm1 @@ -170,14 +170,14 @@ define { i64, i32 } @test_return_i3(i64 %i.coerce0, i32 %i.coerce1) { ; ALL-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.3.tmp ; ALL-NEXT: G_STORE [[COPY]](s64), [[FRAME_INDEX2]](p0) :: (store (s64) into %ir.0, align 4) ; ALL-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; ALL-NEXT: %7:_(p0) = nuw nusw G_PTR_ADD [[FRAME_INDEX2]], [[C1]](s64) - ; ALL-NEXT: G_STORE [[COPY1]](s32), %7(p0) :: (store (s32) into %ir.1) + ; ALL-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[FRAME_INDEX2]], [[C1]](s64) + ; ALL-NEXT: G_STORE [[COPY1]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.1) ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX1]](p0), [[FRAME_INDEX2]](p0), [[C]](s64), 0 :: (store (s8) into %ir.2, align 4), (load (s8) from %ir.3, align 4) ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.4, align 4), (load (s8) from %ir.5, align 4) ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX3]](p0), [[FRAME_INDEX]](p0), [[C]](s64), 0 :: (store (s8) into %ir.6, align 8), (load (s8) from %ir.7, align 4) ; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX3]](p0) :: (dereferenceable load (s64) from %ir.tmp) - ; ALL-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s64) - ; ALL-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s32) from %ir.tmp + 8, align 8) + ; ALL-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s64) + ; ALL-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (dereferenceable load (s32) from %ir.tmp + 8, align 8) ; ALL-NEXT: $rax = COPY [[LOAD]](s64) ; ALL-NEXT: $edx = COPY [[LOAD1]](s32) ; ALL-NEXT: RET 0, implicit $rax, implicit $edx @@ -215,12 +215,12 @@ define { i64, i64 } @test_return_i4(i64 %i.coerce0, i64 %i.coerce1) { ; ALL-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.i ; ALL-NEXT: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.1, align 4) ; ALL-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; ALL-NEXT: %5:_(p0) = nuw nusw G_PTR_ADD [[FRAME_INDEX1]], [[C1]](s64) - ; ALL-NEXT: G_STORE [[COPY1]](s64), %5(p0) :: (store (s64) into %ir.2, align 4) + ; ALL-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[FRAME_INDEX1]], [[C1]](s64) + ; ALL-NEXT: G_STORE [[COPY1]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.2, align 4) ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.3, align 4), (load (s8) from %ir.4, align 4) ; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.5, align 4) - ; ALL-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64) - ; ALL-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s64) from %ir.5 + 8, align 4) + ; ALL-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64) + ; ALL-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (dereferenceable load (s64) from %ir.5 + 8, align 4) ; ALL-NEXT: $rax = COPY [[LOAD]](s64) ; ALL-NEXT: $rdx = COPY [[LOAD1]](s64) ; ALL-NEXT: RET 0, implicit $rax, implicit $rdx diff --git a/llvm/test/CodeGen/X86/apx/cf.ll b/llvm/test/CodeGen/X86/apx/cf.ll index 1e4ac3f..b111ae5 100644 --- a/llvm/test/CodeGen/X86/apx/cf.ll +++ b/llvm/test/CodeGen/X86/apx/cf.ll @@ -162,7 +162,7 @@ entry: define void @load_zext(i1 %cond, ptr %b, ptr %p) { ; CHECK-LABEL: load_zext: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andb $1, %dil +; CHECK-NEXT: testb $1, %dil ; CHECK-NEXT: cfcmovnew (%rsi), %ax ; CHECK-NEXT: movzwl %ax, %eax ; CHECK-NEXT: cfcmovnel %eax, (%rdx) @@ -180,7 +180,7 @@ entry: define void @load_sext(i1 %cond, ptr %b, ptr %p) { ; CHECK-LABEL: load_sext: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andb $1, %dil +; CHECK-NEXT: testb $1, %dil ; CHECK-NEXT: cfcmovnel (%rsi), %eax ; CHECK-NEXT: cltq ; CHECK-NEXT: cfcmovneq %rax, (%rdx) diff --git a/llvm/test/CodeGen/X86/calleetypeid-directcall-mismatched.ll b/llvm/test/CodeGen/X86/calleetypeid-directcall-mismatched.ll new file mode 100644 index 0000000..7881ea7 --- /dev/null +++ b/llvm/test/CodeGen/X86/calleetypeid-directcall-mismatched.ll @@ -0,0 +1,32 @@ +;; Tests that callee_type metadata attached to direct call sites are safely ignored. + +; RUN: llc --call-graph-section -mtriple x86_64-linux-gnu < %s -stop-after=finalize-isel -o - | FileCheck --match-full-lines %s + +;; Test that `calleeTypeIds` field is not present in `callSites` +; CHECK-LABEL: callSites: +; CHECK-NEXT: - { bb: {{[0-9]+}}, offset: {{[0-9]+}}, fwdArgRegs: [] } +; CHECK-NEXT: - { bb: {{[0-9]+}}, offset: {{[0-9]+}}, fwdArgRegs: [] } +; CHECK-NEXT: - { bb: {{[0-9]+}}, offset: {{[0-9]+}}, fwdArgRegs: [] } +define i32 @foo(i32 %x, i32 %y) !type !0 { +entry: + ;; Call instruction with accurate callee_type. + ;; callee_type should be dropped seemlessly. + %call = call i32 @fizz(i32 %x, i32 %y), !callee_type !1 + ;; Call instruction with mismatched callee_type. + ;; callee_type should be dropped seemlessly without errors. + %call1 = call i32 @fizz(i32 %x, i32 %y), !callee_type !3 + %add = add nsw i32 %call, %call1 + ;; Call instruction with mismatched callee_type. + ;; callee_type should be dropped seemlessly without errors. + %call2 = call i32 @fizz(i32 %add, i32 %y), !callee_type !3 + %sub = sub nsw i32 %add, %call2 + ret i32 %sub +} + +declare !type !2 i32 @fizz(i32, i32) + +!0 = !{i64 0, !"_ZTSFiiiiE.generalized"} +!1 = !{!2} +!2 = !{i64 0, !"_ZTSFiiiE.generalized"} +!3 = !{!4} +!4 = !{i64 0, !"_ZTSFicE.generalized"} diff --git a/llvm/test/CodeGen/X86/callsite-emit-calleetypeid-tailcall.ll b/llvm/test/CodeGen/X86/callsite-emit-calleetypeid-tailcall.ll new file mode 100644 index 0000000..8f6b7a6 --- /dev/null +++ b/llvm/test/CodeGen/X86/callsite-emit-calleetypeid-tailcall.ll @@ -0,0 +1,19 @@ +;; Tests that call site callee type ids can be extracted and set from +;; callee_type metadata for indirect tail calls. + +;; Verify the exact calleeTypeIds value to ensure it is not garbage but the value +;; computed as the type id from the callee_type metadata. +; RUN: llc --call-graph-section -mtriple=x86_64-unknown-linux < %s -stop-after=finalize-isel -o - | FileCheck --match-full-lines %s + +define i32 @check_tailcall(ptr %func, i8 %x) !type !0 { +entry: + ; CHECK: callSites: + ; CHECK-NEXT: - { bb: {{.*}}, offset: {{.*}}, fwdArgRegs: [], calleeTypeIds: + ; CHECK-NEXT: [ 3498816979441845844 ] } + %call = tail call i32 %func(i8 signext %x), !callee_type !1 + ret i32 %call +} + +!0 = !{i64 0, !"_ZTSFiPvcE.generalized"} +!1 = !{!2} +!2 = !{i64 0, !"_ZTSFicE.generalized"} diff --git a/llvm/test/CodeGen/X86/callsite-emit-calleetypeid.ll b/llvm/test/CodeGen/X86/callsite-emit-calleetypeid.ll new file mode 100644 index 0000000..e97a6ac --- /dev/null +++ b/llvm/test/CodeGen/X86/callsite-emit-calleetypeid.ll @@ -0,0 +1,20 @@ +;; Tests that call site callee type ids can be extracted and set from +;; callee_type metadata. + +;; Verify the exact calleeTypeIds value to ensure it is not garbage but the value +;; computed as the type id from the callee_type metadata. +; RUN: llc --call-graph-section -mtriple=x86_64-unknown-linux < %s -stop-after=finalize-isel -o - | FileCheck --match-full-lines %s + +; CHECK: name: main +; CHECK: callSites: +; CHECK-NEXT: - { bb: {{.*}}, offset: {{.*}}, fwdArgRegs: [], calleeTypeIds: +; CHECK-NEXT: [ 7854600665770582568 ] } +define i32 @main() { +entry: + %fn = load ptr, ptr null, align 8 + call void %fn(i8 0), !callee_type !0 + ret i32 0 +} + +!0 = !{!1} +!1 = !{i64 0, !"_ZTSFvcE.generalized"} diff --git a/llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll b/llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll new file mode 100644 index 0000000..ea7454f --- /dev/null +++ b/llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll @@ -0,0 +1,185 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=x86_64-grtev4-linux-gnu < %s | FileCheck %s + +%struct.wibble = type { %struct.wombat } +%struct.wombat = type { %struct.ham, [3 x i8] } +%struct.ham = type { %struct.zot } +%struct.zot = type { %struct.blam } +%struct.blam = type { %struct.ham.0 } +%struct.ham.0 = type { %struct.bar } +%struct.bar = type { %struct.bar.1 } +%struct.bar.1 = type { %struct.baz, i8 } +%struct.baz = type { %struct.snork } +%struct.snork = type <{ %struct.spam, i8, [3 x i8] }> +%struct.spam = type { %struct.snork.2, %struct.snork.2 } +%struct.snork.2 = type { i32 } +%struct.snork.3 = type { %struct.baz, i8, [3 x i8] } + +define void @foo(ptr %arg, ptr %arg1, i40 %arg2, ptr %arg3, i32 %arg4) #0 { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %bb +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_offset %rbx, -56 +; CHECK-NEXT: .cfi_offset %r12, -48 +; CHECK-NEXT: .cfi_offset %r13, -40 +; CHECK-NEXT: .cfi_offset %r14, -32 +; CHECK-NEXT: .cfi_offset %r15, -24 +; CHECK-NEXT: movl %r8d, %r14d +; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movq %rsi, %r13 +; CHECK-NEXT: movq %rdi, %r15 +; CHECK-NEXT: incl %r14d +; CHECK-NEXT: xorl %ebx, %ebx +; CHECK-NEXT: # implicit-def: $r12 +; CHECK-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: jmp .LBB0_3 +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_1: # %bb17 +; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: movq %r15, %r13 +; CHECK-NEXT: xorl %r15d, %r15d +; CHECK-NEXT: testq %rbx, %rbx +; CHECK-NEXT: sete %r15b +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: callq _Znwm@PLT +; CHECK-NEXT: shll $4, %r15d +; CHECK-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload +; CHECK-NEXT: movq %r12, %rcx +; CHECK-NEXT: shrq $32, %rcx +; CHECK-NEXT: movb %cl, 12(%rax) +; CHECK-NEXT: movl %r12d, 8(%rax) +; CHECK-NEXT: movq %r15, %rbx +; CHECK-NEXT: movq %r13, %r15 +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload +; CHECK-NEXT: decl %r14d +; CHECK-NEXT: je .LBB0_8 +; CHECK-NEXT: .LBB0_3: # %bb7 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: callq widget@PLT +; CHECK-NEXT: cmpb $-5, (%r13) +; CHECK-NEXT: jae .LBB0_5 +; CHECK-NEXT: # %bb.4: # in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: movl %r12d, %r12d +; CHECK-NEXT: cmpq %r15, %rbx +; CHECK-NEXT: jbe .LBB0_1 +; CHECK-NEXT: jmp .LBB0_7 +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_5: # %bb12 +; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: movq 0, %rax +; CHECK-NEXT: movq 8, %rax +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload +; CHECK-NEXT: cmpq %r15, %rbx +; CHECK-NEXT: jbe .LBB0_1 +; CHECK-NEXT: .LBB0_7: # in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: xorl %ebx, %ebx +; CHECK-NEXT: decl %r14d +; CHECK-NEXT: jne .LBB0_3 +; CHECK-NEXT: .LBB0_8: # %bb21 +; CHECK-NEXT: cmpb $0, 12(%rax) +; CHECK-NEXT: jne .LBB0_10 +; CHECK-NEXT: # %bb.9: # %bb26 +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r13 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB0_10: # %bb25 +; CHECK-NEXT: .cfi_def_cfa %rbp, 16 +; CHECK-NEXT: movq %r15, %rdi +; CHECK-NEXT: callq pluto@PLT +bb: + br label %bb7 + +bb5: ; preds = %bb17, %bb14 + %phi = phi ptr [ %call19, %bb17 ], [ null, %bb14 ] + %phi6 = phi ptr [ %getelementptr, %bb17 ], [ null, %bb14 ] + %add = add i32 %phi9, 1 + %icmp = icmp eq i32 %phi9, %arg4 + br i1 %icmp, label %bb21, label %bb7 + +bb7: ; preds = %bb5, %bb + %phi8 = phi ptr [ null, %bb ], [ %phi6, %bb5 ] + %phi9 = phi i32 [ 0, %bb ], [ %add, %bb5 ] + %phi10 = phi i40 [ poison, %bb ], [ %phi15, %bb5 ] + %call = call ptr @widget() + %load = load i8, ptr %arg1, align 8 + %icmp11 = icmp ult i8 %load, -5 + %and = and i40 %phi10, 4294967295 + br i1 %icmp11, label %bb14, label %bb12 + +bb12: ; preds = %bb7 + %load13 = load volatile { i64, i64 }, ptr null, align 4294967296 + br label %bb14 + +bb14: ; preds = %bb12, %bb7 + %phi15 = phi i40 [ %and, %bb7 ], [ %arg2, %bb12 ] + %icmp16 = icmp ugt ptr %phi8, %arg + br i1 %icmp16, label %bb5, label %bb17 + +bb17: ; preds = %bb14 + %icmp18 = icmp eq ptr %phi8, null + %zext = zext i1 %icmp18 to i64 + %call19 = call ptr @_Znwm(i64 0) + %getelementptr = getelementptr %struct.wibble, ptr %arg3, i64 %zext + %getelementptr20 = getelementptr i8, ptr %call19, i64 8 + store i40 %phi15, ptr %getelementptr20, align 4 + br label %bb5 + +bb21: ; preds = %bb5 + %getelementptr22 = getelementptr %struct.snork.3, ptr %phi, i64 0, i32 1 + %load23 = load i8, ptr %getelementptr22, align 4 + %icmp24 = icmp eq i8 %load23, 0 + br i1 %icmp24, label %bb26, label %bb25 + +bb25: ; preds = %bb21 + call void @pluto(ptr %arg) + unreachable + +bb26: ; preds = %bb21 + ret void +} + +define void @eggs(ptr %arg, ptr %arg1) { +; CHECK-LABEL: eggs: +; CHECK: # %bb.0: # %bb +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movq %rsi, %rdi +; CHECK-NEXT: movq %rax, %rsi +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: xorl %r8d, %r8d +; CHECK-NEXT: callq foo@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +bb: + call void @foo(ptr %arg1, ptr %arg, i40 0, ptr null, i32 0) + ret void +} + +declare ptr @widget() + +declare void @pluto(ptr) + +declare ptr @_Znwm(i64) + +attributes #0 = { noinline "frame-pointer"="all" } diff --git a/llvm/test/CodeGen/X86/coalescer-implicit-def-regression-imp-operand-assert.mir b/llvm/test/CodeGen/X86/coalescer-implicit-def-regression-imp-operand-assert.mir index 8241a17..0bc208d 100644 --- a/llvm/test/CodeGen/X86/coalescer-implicit-def-regression-imp-operand-assert.mir +++ b/llvm/test/CodeGen/X86/coalescer-implicit-def-regression-imp-operand-assert.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 -# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=register-coalescer -o - %s | FileCheck %s +# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=register-coalescer -o - %s | FileCheck %s --match-full-lines --- name: rematerialize_subreg_to_reg_added_impdef_1 tracksRegLiveness: true @@ -9,7 +9,7 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x2aaaaaab), %bb.2(0x55555555) ; CHECK-NEXT: liveins: $edi ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags + ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]] ; CHECK-NEXT: JCC_1 %bb.2, 5, implicit killed undef $eflags ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -28,7 +28,7 @@ body: | ; CHECK-NEXT: JCC_1 %bb.5, 5, implicit killed undef $eflags ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: - ; CHECK-NEXT: dead $eax = MOV32r0 implicit-def dead $eflags, implicit-def $al + ; CHECK-NEXT: dead $eax = MOV32r0 implicit-def dead $eflags, implicit-def $al, implicit-def $al ; CHECK-NEXT: RET 0, killed undef $al ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: diff --git a/llvm/test/CodeGen/X86/coalescing-subreg-to-reg-requires-subrange-update.mir b/llvm/test/CodeGen/X86/coalescing-subreg-to-reg-requires-subrange-update.mir new file mode 100644 index 0000000..2e6395f --- /dev/null +++ b/llvm/test/CodeGen/X86/coalescing-subreg-to-reg-requires-subrange-update.mir @@ -0,0 +1,44 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 +# RUN: llc -mtriple=x86_64-- -run-pass=register-coalescer -enable-subreg-liveness -verify-coalescing -o - %s | FileCheck %s + +--- +name: requires_new_subrange_coalesce_subreg_to_reg +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: requires_new_subrange_coalesce_subreg_to_reg + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $eax + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef %a.sub_32bit:gr64_with_sub_8bit = COPY $eax + ; CHECK-NEXT: %b:gr32 = IMPLICIT_DEF + ; CHECK-NEXT: %c:gr64 = INSERT_SUBREG %a, %b, %subreg.sub_32bit + ; CHECK-NEXT: JCC_1 %bb.2, 4, implicit undef $eflags + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef %a.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags + ; CHECK-NEXT: %c.sub_32bit:gr64 = COPY %a + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: %c.sub_32bit:gr64 = SUBREG_TO_REG %a, %b, %subreg.sub_32bit + ; CHECK-NEXT: RET 0, implicit %c + bb.0: + liveins: $eax + %init_eax:gr32 = COPY $eax + %a:gr64 = SUBREG_TO_REG 0, %init_eax, %subreg.sub_32bit + %b:gr32 = IMPLICIT_DEF + %c:gr64 = INSERT_SUBREG %a, %b, %subreg.sub_32bit + JCC_1 %bb.2, 4, implicit undef $eflags + + bb.1: + %imm0:gr32 = MOV32r0 implicit-def dead $eflags + %a = SUBREG_TO_REG 0, %imm0, %subreg.sub_32bit + %c.sub_32bit = COPY %a + + bb.2: + %c.sub_32bit = SUBREG_TO_REG %a, %b, %subreg.sub_32bit + RET 0, implicit %c + +... diff --git a/llvm/test/CodeGen/X86/combine-add-ssat.ll b/llvm/test/CodeGen/X86/combine-add-ssat.ll index 3e21798..75adcdd 100644 --- a/llvm/test/CodeGen/X86/combine-add-ssat.ll +++ b/llvm/test/CodeGen/X86/combine-add-ssat.ll @@ -62,12 +62,12 @@ define <8 x i16> @combine_constfold_v8i16() { define <8 x i16> @combine_constfold_undef_v8i16() { ; SSE-LABEL: combine_constfold_undef_v8i16: ; SSE: # %bb.0: -; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,65535,65534,0,65280,32768,0] +; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,u,65534,0,65280,32768,0] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_constfold_undef_v8i16: ; AVX: # %bb.0: -; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65535,65535,65534,0,65280,32768,0] +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65535,u,65534,0,65280,32768,0] ; AVX-NEXT: retq %res = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> <i16 undef, i16 1, i16 undef, i16 65535, i16 -1, i16 -255, i16 -32760, i16 1>, <8 x i16> <i16 1, i16 undef, i16 undef, i16 65535, i16 1, i16 65535, i16 -10, i16 65535>) ret <8 x i16> %res diff --git a/llvm/test/CodeGen/X86/combine-add-usat.ll b/llvm/test/CodeGen/X86/combine-add-usat.ll index 13bc3b2..5b947dd 100644 --- a/llvm/test/CodeGen/X86/combine-add-usat.ll +++ b/llvm/test/CodeGen/X86/combine-add-usat.ll @@ -62,12 +62,13 @@ define <8 x i16> @combine_constfold_v8i16() { define <8 x i16> @combine_constfold_undef_v8i16() { ; SSE-LABEL: combine_constfold_undef_v8i16: ; SSE: # %bb.0: -; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,65535,65535,65535,65535,2,65535] +; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,u,65535,65535,65535,2,65535] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_constfold_undef_v8i16: ; AVX: # %bb.0: -; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65535,65535,65535,65535,65535,2,65535] +; AVX-NEXT: vmovddup {{.*#+}} xmm0 = [65535,65535,2,65535,65535,65535,2,65535] +; AVX-NEXT: # xmm0 = mem[0,0] ; AVX-NEXT: retq %res = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> <i16 undef, i16 1, i16 undef, i16 65535, i16 -1, i16 -255, i16 -65535, i16 1>, <8 x i16> <i16 1, i16 undef, i16 undef, i16 65535, i16 1, i16 65535, i16 1, i16 65535>) ret <8 x i16> %res diff --git a/llvm/test/CodeGen/X86/combine-sub-ssat.ll b/llvm/test/CodeGen/X86/combine-sub-ssat.ll index 979331f..0dab025 100644 --- a/llvm/test/CodeGen/X86/combine-sub-ssat.ll +++ b/llvm/test/CodeGen/X86/combine-sub-ssat.ll @@ -62,12 +62,12 @@ define <8 x i16> @combine_constfold_v8i16() { define <8 x i16> @combine_constfold_undef_v8i16() { ; SSE-LABEL: combine_constfold_undef_v8i16: ; SSE: # %bb.0: -; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,65534,65282,32786,2] +; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,u,0,65534,65282,32786,2] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_constfold_undef_v8i16: ; AVX: # %bb.0: -; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,0,0,65534,65282,32786,2] +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,u,0,65534,65282,32786,2] ; AVX-NEXT: retq %res = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> <i16 undef, i16 1, i16 undef, i16 65535, i16 -1, i16 -255, i16 -32760, i16 1>, <8 x i16> <i16 1, i16 undef, i16 undef, i16 65535, i16 1, i16 65535, i16 -10, i16 65535>) ret <8 x i16> %res diff --git a/llvm/test/CodeGen/X86/combine-sub-usat.ll b/llvm/test/CodeGen/X86/combine-sub-usat.ll index b70e3fc..36e374b 100644 --- a/llvm/test/CodeGen/X86/combine-sub-usat.ll +++ b/llvm/test/CodeGen/X86/combine-sub-usat.ll @@ -73,17 +73,17 @@ define <8 x i16> @combine_constfold_v8i16() { define <8 x i16> @combine_constfold_undef_v8i16() { ; SSE-LABEL: combine_constfold_undef_v8i16: ; SSE: # %bb.0: -; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,65534,0,0,0] +; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,u,0,65534,0,0,0] ; SSE-NEXT: retq ; ; AVX1-LABEL: combine_constfold_undef_v8i16: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,0,0,65534,0,0,0] +; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,u,0,65534,0,0,0] ; AVX1-NEXT: retq ; ; AVX2-LABEL: combine_constfold_undef_v8i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,0,0,65534,0,0,0] +; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,u,0,65534,0,0,0] ; AVX2-NEXT: retq ; ; AVX512-LABEL: combine_constfold_undef_v8i16: diff --git a/llvm/test/CodeGen/X86/constant-pool-partition.ll b/llvm/test/CodeGen/X86/constant-pool-partition.ll index 515284f..e42b41b 100644 --- a/llvm/test/CodeGen/X86/constant-pool-partition.ll +++ b/llvm/test/CodeGen/X86/constant-pool-partition.ll @@ -24,11 +24,11 @@ target triple = "x86_64-grtev4-linux-gnu" ; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always ;; For function @cold_func -; CHECK: .section .rodata.cst8.hot,"aM",@progbits,8 +; CHECK: .section .rodata.cst8.hot.,"aM",@progbits,8 ; CHECK-NEXT: .p2align ; CHECK-NEXT: .LCPI0_0: ; CHECK-NEXT: .quad 0x3fe5c28f5c28f5c3 # double 0.68000000000000005 -; CHECK-NEXT: .section .rodata.cst8.unlikely,"aM",@progbits,8 +; CHECK-NEXT: .section .rodata.cst8.unlikely.,"aM",@progbits,8 ; CHECK-NEXT: .p2align ; CHECK-NEXT: .LCPI0_1: ; CHECK-NEXT: .quad 0x3eb0000000000000 # double 9.5367431640625E-7 @@ -50,11 +50,11 @@ target triple = "x86_64-grtev4-linux-gnu" ; CHECK-NEXT: .long 0x3e000000 # float 0.125 ;; For function @hot_func -; CHECK: .section .rodata.cst8.hot,"aM",@progbits,8 +; CHECK: .section .rodata.cst8.hot.,"aM",@progbits,8 ; CHECK-NEXT: .p2align ; CHECK-NEXT: .LCPI3_0: ; CHECK-NEXT: .quad 0x3fe5c28f5c28f5c3 # double 0.68000000000000005 -; CHECK-NEXT: .section .rodata.cst16.hot,"aM",@progbits,16 +; CHECK-NEXT: .section .rodata.cst16.hot.,"aM",@progbits,16 ; CHECK-NEXT: .p2align ; CHECK-NEXT: .LCPI3_1: ; CHECK-NEXT: .long 2147483648 # 0x80000000 diff --git a/llvm/test/CodeGen/X86/embed-bitcode.ll b/llvm/test/CodeGen/X86/embed-bitcode.ll index 0d66ba8..d4af954 100644 --- a/llvm/test/CodeGen/X86/embed-bitcode.ll +++ b/llvm/test/CodeGen/X86/embed-bitcode.ll @@ -1,10 +1,23 @@ ; RUN: llc -filetype=obj -mtriple=x86_64 %s -o %t ; RUN: llvm-readelf -S %t | FileCheck %s +; RUN: llc -filetype=obj -mtriple=x86_64-pc-windows-msvc %s -o %t +; RUN: llvm-readobj -S %t | FileCheck %s --check-prefix=COFF ; CHECK: .text PROGBITS 0000000000000000 [[#%x,OFF:]] 000000 00 AX 0 ; CHECK-NEXT: .llvmbc PROGBITS 0000000000000000 [[#%x,OFF:]] 000004 00 0 ; CHECK-NEXT: .llvmcmd PROGBITS 0000000000000000 [[#%x,OFF:]] 000005 00 0 +; COFF: Name: .llvmbc (2E 6C 6C 76 6D 62 63 00) +; COFF: Characteristics [ +; COFF-NEXT: IMAGE_SCN_ALIGN_1BYTES +; COFF-NEXT: IMAGE_SCN_MEM_DISCARDABLE +; COFF-NEXT: ] +; COFF: Name: .llvmcmd (2E 6C 6C 76 6D 63 6D 64) +; COFF: Characteristics [ +; COFF-NEXT: IMAGE_SCN_ALIGN_1BYTES +; COFF-NEXT: IMAGE_SCN_MEM_DISCARDABLE +; COFF-NEXT: ] + @llvm.embedded.module = private constant [4 x i8] c"BC\C0\DE", section ".llvmbc", align 1 @llvm.cmdline = private constant [5 x i8] c"-cc1\00", section ".llvmcmd", align 1 @llvm.compiler.used = appending global [2 x ptr] [ptr @llvm.embedded.module, ptr @llvm.cmdline], section "llvm.metadata" diff --git a/llvm/test/CodeGen/X86/isel-fpclass.ll b/llvm/test/CodeGen/X86/isel-fpclass.ll new file mode 100644 index 0000000..960bbf5 --- /dev/null +++ b/llvm/test/CodeGen/X86/isel-fpclass.ll @@ -0,0 +1,526 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=i686-linux | FileCheck %s -check-prefixes=X86-SDAGISEL +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefixes=X64,X64-SDAGISEL +; RUN: llc < %s -mtriple=i686-linux -fast-isel -fast-isel-abort=1 | FileCheck %s -check-prefixes=X86-FASTISEL +; RUN: llc < %s -mtriple=x86_64-linux -fast-isel -fast-isel-abort=1 | FileCheck %s -check-prefixes=X64,X64-FASTISEL + +; FIXME: We can reuse/delete llvm/test/CodeGen/X86/is_fpclass.ll when all patches are included. + +define i1 @isnone_f(float %x) { +; X86-SDAGISEL-LABEL: isnone_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: xorl %eax, %eax +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: isnone_f: +; X64: # %bb.0: # %entry +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: isnone_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstp %st(0) +; X86-FASTISEL-NEXT: xorl %eax, %eax +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 0) + ret i1 %0 +} + +define i1 @isany_f(float %x) { +; X86-SDAGISEL-LABEL: isany_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movb $1, %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: isany_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movb $1, %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: isany_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstp %st(0) +; X86-FASTISEL-NEXT: movb $1, %al +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1023) + ret i1 %0 +} + +define i1 @issignaling_f(float %x) { +; X86-SDAGISEL-LABEL: issignaling_f: +; X86-SDAGISEL: # %bb.0: +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-SDAGISEL-NEXT: setl %cl +; X86-SDAGISEL-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X86-SDAGISEL-NEXT: setge %al +; X86-SDAGISEL-NEXT: andb %cl, %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: issignaling_f: +; X64: # %bb.0: +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-NEXT: setl %cl +; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X64-NEXT: setge %al +; X64-NEXT: andb %cl, %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: issignaling_f: +; X86-FASTISEL: # %bb.0: +; X86-FASTISEL-NEXT: pushl %eax +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstps (%esp) +; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-FASTISEL-NEXT: andl (%esp), %eax +; X86-FASTISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-FASTISEL-NEXT: setl %cl +; X86-FASTISEL-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X86-FASTISEL-NEXT: setge %al +; X86-FASTISEL-NEXT: andb %cl, %al +; X86-FASTISEL-NEXT: popl %ecx +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 +; X86-FASTISEL-NEXT: retl + %a0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1) ; "snan" + ret i1 %a0 +} + + define i1 @isquiet_f(float %x) { +; X86-SDAGISEL-LABEL: isquiet_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-SDAGISEL-NEXT: setge %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: isquiet_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-NEXT: setge %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: isquiet_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: pushl %eax +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstps (%esp) +; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-FASTISEL-NEXT: andl (%esp), %eax +; X86-FASTISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-FASTISEL-NEXT: setge %al +; X86-FASTISEL-NEXT: popl %ecx +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 +; X86-FASTISEL-NEXT: retl + entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 2) ; "qnan" + ret i1 %0 +} + +define i1 @not_isquiet_f(float %x) { +; X86-SDAGISEL-LABEL: not_isquiet_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-SDAGISEL-NEXT: setl %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: not_isquiet_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-NEXT: setl %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: not_isquiet_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: pushl %eax +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstps (%esp) +; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-FASTISEL-NEXT: andl (%esp), %eax +; X86-FASTISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-FASTISEL-NEXT: setl %al +; X86-FASTISEL-NEXT: popl %ecx +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1021) ; ~"qnan" + ret i1 %0 +} + +define i1 @isinf_f(float %x) { +; X86-SDAGISEL-LABEL: isinf_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-SDAGISEL-NEXT: sete %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: isinf_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: isinf_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: pushl %eax +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstps (%esp) +; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-FASTISEL-NEXT: andl (%esp), %eax +; X86-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-FASTISEL-NEXT: sete %al +; X86-FASTISEL-NEXT: popl %ecx +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 516) ; 0x204 = "inf" + ret i1 %0 +} + +define i1 @not_isinf_f(float %x) { +; X86-SDAGISEL-LABEL: not_isinf_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-SDAGISEL-NEXT: setne %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: not_isinf_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: setne %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: not_isinf_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: pushl %eax +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstps (%esp) +; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-FASTISEL-NEXT: andl (%esp), %eax +; X86-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-FASTISEL-NEXT: setne %al +; X86-FASTISEL-NEXT: popl %ecx +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 507) ; ~0x204 = "~inf" + ret i1 %0 +} + +define i1 @is_plus_inf_f(float %x) { +; X86-SDAGISEL-LABEL: is_plus_inf_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 +; X86-SDAGISEL-NEXT: sete %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: is_plus_inf_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: is_plus_inf_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: pushl %eax +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstps (%esp) +; X86-FASTISEL-NEXT: cmpl $2139095040, (%esp) # imm = 0x7F800000 +; X86-FASTISEL-NEXT: sete %al +; X86-FASTISEL-NEXT: popl %ecx +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 512) ; 0x200 = "+inf" + ret i1 %0 +} + +define i1 @is_minus_inf_f(float %x) { +; X86-SDAGISEL-LABEL: is_minus_inf_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000 +; X86-SDAGISEL-NEXT: sete %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: is_minus_inf_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: is_minus_inf_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: pushl %eax +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstps (%esp) +; X86-FASTISEL-NEXT: cmpl $-8388608, (%esp) # imm = 0xFF800000 +; X86-FASTISEL-NEXT: sete %al +; X86-FASTISEL-NEXT: popl %ecx +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 4) ; "-inf" + ret i1 %0 +} + +define i1 @not_is_minus_inf_f(float %x) { +; X86-SDAGISEL-LABEL: not_is_minus_inf_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000 +; X86-SDAGISEL-NEXT: setne %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: not_is_minus_inf_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 +; X64-NEXT: setne %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: not_is_minus_inf_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: pushl %eax +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstps (%esp) +; X86-FASTISEL-NEXT: cmpl $-8388608, (%esp) # imm = 0xFF800000 +; X86-FASTISEL-NEXT: setne %al +; X86-FASTISEL-NEXT: popl %ecx +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1019) ; ~"-inf" + ret i1 %0 +} + +define i1 @isfinite_f(float %x) { +; X86-SDAGISEL-LABEL: isfinite_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-SDAGISEL-NEXT: setl %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: isfinite_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: setl %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: isfinite_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: pushl %eax +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstps (%esp) +; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-FASTISEL-NEXT: andl (%esp), %eax +; X86-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-FASTISEL-NEXT: setl %al +; X86-FASTISEL-NEXT: popl %ecx +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; 0x1f8 = "finite" + ret i1 %0 +} + +define i1 @not_isfinite_f(float %x) { +; X86-SDAGISEL-LABEL: not_isfinite_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-SDAGISEL-NEXT: setge %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: not_isfinite_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: setge %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: not_isfinite_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: pushl %eax +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstps (%esp) +; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-FASTISEL-NEXT: andl (%esp), %eax +; X86-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-FASTISEL-NEXT: setge %al +; X86-FASTISEL-NEXT: popl %ecx +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 519) ; ~0x1f8 = "~finite" + ret i1 %0 +} + +define i1 @is_plus_finite_f(float %x) { +; X86-SDAGISEL-LABEL: is_plus_finite_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 +; X86-SDAGISEL-NEXT: setb %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: is_plus_finite_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: setb %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: is_plus_finite_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: pushl %eax +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstps (%esp) +; X86-FASTISEL-NEXT: cmpl $2139095040, (%esp) # imm = 0x7F800000 +; X86-FASTISEL-NEXT: setb %al +; X86-FASTISEL-NEXT: popl %ecx +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 448) ; 0x1c0 = "+finite" + ret i1 %0 +} + +define i1 @isnone_d(double %x) nounwind { +; X86-SDAGISEL-LABEL: isnone_d: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: xorl %eax, %eax +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: isnone_d: +; X64: # %bb.0: # %entry +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: isnone_d: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: fldl {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstp %st(0) +; X86-FASTISEL-NEXT: xorl %eax, %eax +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 0) + ret i1 %0 +} + +define i1 @isany_d(double %x) nounwind { +; X86-SDAGISEL-LABEL: isany_d: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movb $1, %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: isany_d: +; X64: # %bb.0: # %entry +; X64-NEXT: movb $1, %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: isany_d: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: fldl {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstp %st(0) +; X86-FASTISEL-NEXT: movb $1, %al +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 1023) + ret i1 %0 +} + +define i1 @isnone_f80(x86_fp80 %x) nounwind { +; X86-SDAGISEL-LABEL: isnone_f80: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: xorl %eax, %eax +; X86-SDAGISEL-NEXT: retl +; +; X64-SDAGISEL-LABEL: isnone_f80: +; X64-SDAGISEL: # %bb.0: # %entry +; X64-SDAGISEL-NEXT: xorl %eax, %eax +; X64-SDAGISEL-NEXT: retq +; +; X86-FASTISEL-LABEL: isnone_f80: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: fldt {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstp %st(0) +; X86-FASTISEL-NEXT: xorl %eax, %eax +; X86-FASTISEL-NEXT: retl +; +; X64-FASTISEL-LABEL: isnone_f80: +; X64-FASTISEL: # %bb.0: # %entry +; X64-FASTISEL-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-FASTISEL-NEXT: fstp %st(0) +; X64-FASTISEL-NEXT: xorl %eax, %eax +; X64-FASTISEL-NEXT: retq +entry: +%0 = tail call i1 @llvm.is.fpclass.f80(x86_fp80 %x, i32 0) +ret i1 %0 +} + +define i1 @isany_f80(x86_fp80 %x) nounwind { +; X86-SDAGISEL-LABEL: isany_f80: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movb $1, %al +; X86-SDAGISEL-NEXT: retl +; +; X64-SDAGISEL-LABEL: isany_f80: +; X64-SDAGISEL: # %bb.0: # %entry +; X64-SDAGISEL-NEXT: movb $1, %al +; X64-SDAGISEL-NEXT: retq +; +; X86-FASTISEL-LABEL: isany_f80: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: fldt {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstp %st(0) +; X86-FASTISEL-NEXT: movb $1, %al +; X86-FASTISEL-NEXT: retl +; +; X64-FASTISEL-LABEL: isany_f80: +; X64-FASTISEL: # %bb.0: # %entry +; X64-FASTISEL-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-FASTISEL-NEXT: fstp %st(0) +; X64-FASTISEL-NEXT: movb $1, %al +; X64-FASTISEL-NEXT: retq +entry: + %0 = tail call i1 @llvm.is.fpclass.f80(x86_fp80 %x, i32 1023) + ret i1 %0 +} diff --git a/llvm/test/CodeGen/X86/late-tail-dup-computed-goto.mir b/llvm/test/CodeGen/X86/late-tail-dup-computed-goto.mir new file mode 100644 index 0000000..e272e7e --- /dev/null +++ b/llvm/test/CodeGen/X86/late-tail-dup-computed-goto.mir @@ -0,0 +1,128 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=tailduplication -tail-dup-pred-size=1 -tail-dup-succ-size=1 %s -o - | FileCheck %s +# +# Check that only the computed gotos are duplicated aggressively. +# +--- | + @computed_goto.dispatch = constant [5 x ptr] [ptr null, ptr blockaddress(@computed_goto, %bb1), ptr blockaddress(@computed_goto, %bb2), ptr blockaddress(@computed_goto, %bb3), ptr blockaddress(@computed_goto, %bb4)] + declare i64 @f0() + declare i64 @f1() + declare i64 @f2() + declare i64 @f3() + declare i64 @f4() + declare i64 @f5() + define void @computed_goto() { + start: + ret void + bb1: + ret void + bb2: + ret void + bb3: + ret void + bb4: + ret void + } + define void @jump_table() { ret void } + define void @jump_table_pic() { ret void } +... +--- +name: computed_goto +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: computed_goto + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f0, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rax + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64_nosp = COPY [[COPY]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64_nosp = COPY [[COPY1]] + ; CHECK-NEXT: JMP64m $noreg, 8, [[COPY2]], @computed_goto.dispatch, $noreg + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.bb1 (ir-block-address-taken %ir-block.bb1): + ; CHECK-NEXT: successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f1, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr64 = COPY $rax + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64_nosp = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64_nosp = COPY [[COPY1]] + ; CHECK-NEXT: JMP64m $noreg, 8, [[COPY2]], @computed_goto.dispatch, $noreg + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.bb2 (ir-block-address-taken %ir-block.bb2): + ; CHECK-NEXT: successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f2, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr64 = COPY $rax + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64_nosp = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64_nosp = COPY [[COPY1]] + ; CHECK-NEXT: JMP64m $noreg, 8, [[COPY2]], @computed_goto.dispatch, $noreg + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.bb3 (ir-block-address-taken %ir-block.bb3): + ; CHECK-NEXT: successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f3, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gr64 = COPY $rax + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64_nosp = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64_nosp = COPY [[COPY1]] + ; CHECK-NEXT: JMP64m $noreg, 8, [[COPY2]], @computed_goto.dispatch, $noreg + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.bb4 (ir-block-address-taken %ir-block.bb4): + ; CHECK-NEXT: successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f4, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr64 = COPY $rax + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64_nosp = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64_nosp = COPY [[COPY1]] + ; CHECK-NEXT: JMP64m $noreg, 8, [[COPY2]], @computed_goto.dispatch, $noreg + bb.0: + successors: %bb.5(0x80000000) + + CALL64pcrel32 target-flags(x86-plt) @f0, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax + %0:gr64 = COPY $rax + %6:gr64_nosp = COPY %0 + JMP_1 %bb.5 + + bb.1.bb1 (ir-block-address-taken %ir-block.bb1): + successors: %bb.5(0x80000000) + + CALL64pcrel32 target-flags(x86-plt) @f1, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax + %1:gr64 = COPY $rax + %6:gr64_nosp = COPY %1 + JMP_1 %bb.5 + + bb.2.bb2 (ir-block-address-taken %ir-block.bb2): + successors: %bb.5(0x80000000) + + CALL64pcrel32 target-flags(x86-plt) @f2, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax + %2:gr64 = COPY $rax + %6:gr64_nosp = COPY %2 + JMP_1 %bb.5 + + bb.3.bb3 (ir-block-address-taken %ir-block.bb3): + successors: %bb.5(0x80000000) + + CALL64pcrel32 target-flags(x86-plt) @f3, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax + %3:gr64 = COPY $rax + %6:gr64_nosp = COPY %3 + JMP_1 %bb.5 + + bb.4.bb4 (ir-block-address-taken %ir-block.bb4): + successors: %bb.5(0x80000000) + + CALL64pcrel32 target-flags(x86-plt) @f4, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax + %4:gr64 = COPY $rax + %6:gr64_nosp = COPY %4 + + bb.5: + successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000) + + %5:gr64_nosp = COPY %6 + JMP64m $noreg, 8, %5, @computed_goto.dispatch, $noreg +... diff --git a/llvm/test/CodeGen/X86/load-combine.ll b/llvm/test/CodeGen/X86/load-combine.ll index b5f3e78..f21c075 100644 --- a/llvm/test/CodeGen/X86/load-combine.ll +++ b/llvm/test/CodeGen/X86/load-combine.ll @@ -800,13 +800,13 @@ define void @shift_i32_by_32(ptr %src1, ptr %src2, ptr %dst) { ; CHECK-LABEL: shift_i32_by_32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl $-1, 4(%eax) -; CHECK-NEXT: movl $-1, (%eax) +; CHECK-NEXT: movl $0, 4(%eax) +; CHECK-NEXT: movl $0, (%eax) ; CHECK-NEXT: retl ; ; CHECK64-LABEL: shift_i32_by_32: ; CHECK64: # %bb.0: # %entry -; CHECK64-NEXT: movq $-1, (%rdx) +; CHECK64-NEXT: movq $0, (%rdx) ; CHECK64-NEXT: retq entry: %load1 = load i8, ptr %src1, align 1 diff --git a/llvm/test/CodeGen/X86/pr33960.ll b/llvm/test/CodeGen/X86/pr33960.ll index 44fe777..6ee270e 100644 --- a/llvm/test/CodeGen/X86/pr33960.ll +++ b/llvm/test/CodeGen/X86/pr33960.ll @@ -7,12 +7,10 @@ define void @PR33960() { ; X86-LABEL: PR33960: ; X86: # %bb.0: # %entry -; X86-NEXT: movl $-1, b ; X86-NEXT: retl ; ; X64-LABEL: PR33960: ; X64: # %bb.0: # %entry -; X64-NEXT: movl $-1, b(%rip) ; X64-NEXT: retq entry: %tmp = insertelement <4 x i32> <i32 undef, i32 -7, i32 -3, i32 undef>, i32 -2, i32 3 diff --git a/llvm/test/CodeGen/X86/pr76416.ll b/llvm/test/CodeGen/X86/pr76416.ll new file mode 100644 index 0000000..68e9ef9 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr76416.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; +; Reproducer from https://github.com/llvm/llvm-project/issues/76416 +; + +@load_p = external global ptr, align 8 +@load_data = external global i8, align 1 + +define dso_local void @pr76416() { +; CHECK-LABEL: pr76416: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl $0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: cmpl $3, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: jg .LBB0_3 +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_2: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: incl -{{[0-9]+}}(%rsp) +; CHECK-NEXT: cmpl $3, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: jle .LBB0_2 +; CHECK-NEXT: .LBB0_3: # %for.end +; CHECK-NEXT: movl $0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq load_p@GOTPCREL(%rip), %rax +; CHECK-NEXT: movq load_data@GOTPCREL(%rip), %rcx +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_4: # %for.cond1 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: movq (%rax), %rdx +; CHECK-NEXT: movslq -{{[0-9]+}}(%rsp), %rsi +; CHECK-NEXT: movzbl (%rdx,%rsi), %edx +; CHECK-NEXT: movb %dl, (%rcx) +; CHECK-NEXT: leal 1(%rsi), %edx +; CHECK-NEXT: movl %edx, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: jmp .LBB0_4 +entry: + %alloca = alloca i32, align 4 + store i32 0, ptr %alloca, align 4 + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %load.from.alloca.0 = load i32, ptr %alloca, align 4 + %cmp = icmp slt i32 %load.from.alloca.0, 4 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + call void asm sideeffect "", "{ax},~{dirflag},~{fpsr},~{flags}"(i8 0) nounwind + %load.from.alloca.1 = load i32, ptr %alloca, align 4 + %inc = add nsw i32 %load.from.alloca.1, 1 + store i32 %inc, ptr %alloca, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + store i32 0, ptr %alloca, align 4 + br label %for.cond1 + +for.cond1: ; preds = %for.cond1, %for.end + call void asm sideeffect "", "N{dx},~{dirflag},~{fpsr},~{flags}"(i32 poison) nounwind + %load.from.load_p = load ptr, ptr @load_p, align 8 + %regs = getelementptr inbounds { [4 x i8] }, ptr %load.from.load_p, i32 0, i32 0 + %load.from.alloca.2 = load i32, ptr %alloca, align 4 + %idxprom = sext i32 %load.from.alloca.2 to i64 + %arrayidx = getelementptr inbounds [4 x i8], ptr %regs, i64 0, i64 %idxprom + %load.with.gep.ptr = load i8, ptr %arrayidx, align 1 + store i8 %load.with.gep.ptr, ptr @load_data, align 1 + %load.from.alloca.3 = load i32, ptr %alloca, align 4 + %inc2 = add nsw i32 %load.from.alloca.3, 1 + store i32 %inc2, ptr %alloca, align 4 + br label %for.cond1 +} diff --git a/llvm/test/CodeGen/X86/stack-protector.ll b/llvm/test/CodeGen/X86/stack-protector.ll index f4f3ae4..772e776 100644 --- a/llvm/test/CodeGen/X86/stack-protector.ll +++ b/llvm/test/CodeGen/X86/stack-protector.ll @@ -6,6 +6,7 @@ ; RUN: llc -mtriple=amd64-pc-openbsd < %s -o - | FileCheck --check-prefix=OPENBSD-AMD64 %s ; RUN: llc -mtriple=i386-pc-windows-msvc < %s -o - | FileCheck -check-prefix=MSVC-I386 %s ; RUN: llc -mtriple=x86_64-w64-mingw32 < %s -o - | FileCheck --check-prefix=MINGW-X64 %s +; RUN: llc -mtriple=x86_64-pc-cygwin < %s -o - | FileCheck --check-prefix=MINGW-X64 %s ; RUN: llc -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck --check-prefix=IGNORE_INTRIN %s %struct.foo = type { [16 x i8] } diff --git a/llvm/test/CodeGen/X86/subreg-fail.mir b/llvm/test/CodeGen/X86/subreg-fail.mir index c8146f0..dc69071 100644 --- a/llvm/test/CodeGen/X86/subreg-fail.mir +++ b/llvm/test/CodeGen/X86/subreg-fail.mir @@ -14,8 +14,8 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: test1 - ; CHECK: undef [[MOV32rm:%[0-9]+]].sub_32bit:gr64_nosp = MOV32rm undef %1:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`) - ; CHECK-NEXT: undef [[MOV32rm1:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32rm undef %4:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`) + ; CHECK: undef [[MOV32rm:%[0-9]+]].sub_32bit:gr64_nosp = MOV32rm undef %1:gr64, 1, $noreg, 0, $noreg, implicit-def [[MOV32rm]] :: (volatile load (s32) from `ptr undef`) + ; CHECK-NEXT: undef [[MOV32rm1:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32rm undef %4:gr64, 1, $noreg, 0, $noreg, implicit-def [[MOV32rm1]] :: (volatile load (s32) from `ptr undef`) ; CHECK-NEXT: [[MOV32rm1:%[0-9]+]]:gr64_with_sub_8bit = SHL64ri [[MOV32rm1]], 32, implicit-def dead $eflags ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_with_sub_8bit = LEA64r [[MOV32rm1]], 1, [[MOV32rm]], 256, $noreg ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_with_sub_8bit = SHR64ri [[LEA64r]], 8, implicit-def dead $eflags diff --git a/llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir b/llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir new file mode 100644 index 0000000..e4fb812 --- /dev/null +++ b/llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir @@ -0,0 +1,451 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +# RUN: llc -mtriple=x86_64-- -run-pass=register-coalescer -o - %s | FileCheck %s --match-full-lines + +# We cannot lose the liveness of the high subregister of %1 when +# coalesced with %0, so introduce an implicit-def of the super +# register on the MOV. + +--- +name: coalesce_mov32r0_into_subreg_to_reg64 +frameInfo: + adjustsStack: true +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64 + ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]] + ; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi + ; CHECK-NEXT: CALL64r [[MOV32r0_]], csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: RET 0 + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + %0:gr32 = MOV32r0 implicit-def dead $eflags + %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit + $rdi = COPY %1 + CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + RET 0 + +... + +--- +name: subreg_to_reg_folds_to_undef +frameInfo: + adjustsStack: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $rax + + ; CHECK-LABEL: name: subreg_to_reg_folds_to_undef + ; CHECK: liveins: $rax + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64_with_sub_8bit = COPY $rax + ; CHECK-NEXT: undef [[MOV32rr:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32rr [[COPY]].sub_32bit, implicit-def [[MOV32rr]] + ; CHECK-NEXT: RET 0, implicit [[MOV32rr]] + %0:gr64 = COPY killed $rax + %1:gr32 = COPY killed %0.sub_32bit + %2:gr32 = MOV32rr killed %1 + %3:gr64 = SUBREG_TO_REG 0, killed %2, %subreg.sub_32bit + %4:gr64 = COPY killed %3 + RET 0, implicit %4 + +... + +--- +name: coalesce_mov32r0_subreg_def_into_subreg_to_reg64 +frameInfo: + adjustsStack: true +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: name: coalesce_mov32r0_subreg_def_into_subreg_to_reg64 + ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]] + ; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi + ; CHECK-NEXT: CALL64r [[MOV32r0_]], csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: RET 0 + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + undef %0.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags + %1:gr64 = SUBREG_TO_REG 0, killed %0.sub_32bit, %subreg.sub_32bit + $rdi = COPY %1 + CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + RET 0 + +... + +--- +name: coalesce_mov32r0_into_subreg_def_with_super_def_to_reg64 +frameInfo: + adjustsStack: true +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_def_with_super_def_to_reg64 + ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]], implicit-def [[MOV32r0_]] + ; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi, implicit-def $rdi + ; CHECK-NEXT: CALL64r [[MOV32r0_]], csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: RET 0 + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + undef %0.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def %0 + %1:gr64 = SUBREG_TO_REG 0, killed %0.sub_32bit, %subreg.sub_32bit + $rdi = COPY %1 + CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + RET 0 + +... + +--- +name: coalesce_mov32r0_into_subreg_to_reg64_already_defs_other_subreg +frameInfo: + adjustsStack: true +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_already_defs_other_subreg + ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def undef [[MOV32r0_]].sub_8bit, implicit-def [[MOV32r0_]] + ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, implicit [[MOV32r0_]] + ; CHECK-NEXT: CALL64r [[MOV32r0_]], csr_64, implicit $rsp, implicit $ssp, implicit undef $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: RET 0 + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + %0:gr32 = MOV32r0 implicit-def dead $eflags, implicit-def undef %0.sub_8bit + %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit + INLINEASM &"", 0, implicit %1 + CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit undef $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + RET 0 + +... + + +# Reduced realistic case which was asserting after introducing new implicit-defs +--- +name: coalesce_needs_implicit_defs +frameInfo: + adjustsStack: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: coalesce_needs_implicit_defs + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $rdi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rdi + ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]], implicit-def [[MOV32r0_]] + ; CHECK-NEXT: undef [[MOV32r0_1:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_1]], implicit-def [[MOV32r0_1]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[MOV32r0_2:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags + ; CHECK-NEXT: TEST64rr [[MOV32r0_1]], [[MOV32r0_1]], implicit-def $eflags + ; CHECK-NEXT: [[MOV32r0_2:%[0-9]+]].sub_8bit:gr64_with_sub_8bit = SETCCr 4, implicit killed $eflags + ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi + ; CHECK-NEXT: CALL64r [[MOV32r0_]], csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: [[MOV32r0_2:%[0-9]+]]:gr64_with_sub_8bit = SHL64ri [[MOV32r0_2]], 4, implicit-def dead $eflags + ; CHECK-NEXT: [[MOV32r0_2:%[0-9]+]]:gr64_with_sub_8bit = ADD64rr [[MOV32r0_2]], [[COPY]], implicit-def dead $eflags + ; CHECK-NEXT: [[MOV32r0_1:%[0-9]+]]:gr64_with_sub_8bit = COPY [[MOV32r0_2]] + ; CHECK-NEXT: JMP_1 %bb.1 + bb.0: + liveins: $rdi + + %0:gr64 = COPY killed $rdi + %1:gr32 = MOV32r0 implicit-def dead $eflags + %2:gr64 = SUBREG_TO_REG 0, %1, %subreg.sub_32bit + %3:gr64 = COPY killed %2 + + bb.1: + %4:gr64 = COPY killed %3 + %5:gr32 = MOV32r0 implicit-def dead $eflags + TEST64rr killed %4, %4, implicit-def $eflags + %6:gr8 = SETCCr 4, implicit killed $eflags + %7:gr32 = COPY killed %5 + %7.sub_8bit:gr32 = COPY killed %6 + %8:gr64 = SUBREG_TO_REG 0, killed %7, %subreg.sub_32bit + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + %9:gr64 = SUBREG_TO_REG 0, %1, %subreg.sub_32bit + $rdi = COPY %9 + CALL64r killed %9, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + %10:gr64 = COPY killed %8 + %10:gr64 = SHL64ri %10, 4, implicit-def dead $eflags + %11:gr64 = COPY killed %10 + %11:gr64 = ADD64rr %11, %0, implicit-def dead $eflags + %3:gr64 = COPY killed %11 + JMP_1 %bb.1 + +... + +# Make sure to add the 'undef' flag to the result register %2, +# because the top 32bits are not defined. +--- +name: coalesce_add_implicitdef_and_undef +frameInfo: + adjustsStack: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: coalesce_add_implicitdef_and_undef + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $eflags, $edx + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = COPY $edx + ; CHECK-NEXT: JMP_1 %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = ADD32ri [[COPY]].sub_32bit, -34, implicit-def $eflags, implicit-def [[COPY]] + ; CHECK-NEXT: FAKE_USE [[COPY]] + ; CHECK-NEXT: RET 0 + bb.0: + liveins: $eflags, $edx + %0:gr32 = COPY $edx + JMP_1 %bb.1 + + bb.1: + %1:gr32 = COPY %0 + %1:gr32 = ADD32ri %1, -34, implicit-def $eflags + %2:gr64_with_sub_8bit = SUBREG_TO_REG 0, killed %1, %subreg.sub_32bit + FAKE_USE %2 + RET 0 +... + +# We can't mark the destination register as 'undef' or add implicit-def +# because the top 24 bits of %0:gr32 are retained by the SUBREG_TO_REG. +# +# For example, if this were to result in: +# +# undef %2.sub_32bit:gr64_with_sub_8bit = COPY $edx +# %1:gr8 = SETCCr 4, implicit $eflags +# JMP_1 %bb.1 +# +# bb.1: +# undef %2.sub_8bit:gr64_with_sub_8bit = COPY %1, implicit-def %2 +# +# Then this says that the top 56 bits of %2 are undef. That's not correct +# because only the top 32 bits are undef. +--- +name: coalesce_dont_add_implicitdef_or_undef +frameInfo: + adjustsStack: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: coalesce_dont_add_implicitdef_or_undef + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $eflags, $edx + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = COPY $edx + ; CHECK-NEXT: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags + ; CHECK-NEXT: JMP_1 %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: [[COPY:%[0-9]+]].sub_8bit:gr64_with_sub_8bit = COPY [[SETCCr]] + ; CHECK-NEXT: FAKE_USE [[COPY]] + ; CHECK-NEXT: RET 0 + bb.0: + liveins: $eflags, $edx + %0:gr32 = COPY $edx + %1:gr8 = SETCCr 4, implicit killed $eflags + JMP_1 %bb.1 + + bb.1: + %0.sub_8bit:gr32 = COPY %1 + %2:gr64_with_sub_8bit = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit + FAKE_USE %2 + RET 0 +... + +--- +name: coalesce_mov32r0_into_subreg_to_reg64_physreg_def +frameInfo: + adjustsStack: true +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_physreg_def + ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi + ; CHECK-NEXT: CALL64r killed $rdi, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: RET 0 + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + %0:gr32 = MOV32r0 implicit-def dead $eflags + $rdi = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit + CALL64r killed $rdi, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + RET 0 + +... + +--- +name: coalesce_mov32r0_into_subreg_to_reg64_physreg_use +frameInfo: + adjustsStack: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $eax + ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_physreg_use + ; CHECK: liveins: $eax + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: $eax = MOV32r0 implicit-def dead $eflags + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64 = SUBREG_TO_REG 0, $eax, %subreg.sub_32bit + ; CHECK-NEXT: $rdi = COPY [[SUBREG_TO_REG]] + ; CHECK-NEXT: CALL64r [[SUBREG_TO_REG]], csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: RET 0 + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + $eax = MOV32r0 implicit-def dead $eflags + %1:gr64 = SUBREG_TO_REG 0, killed $eax, %subreg.sub_32bit + $rdi = COPY %1 + CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + RET 0 + +... + +# Coalesced instruction is a copy with other implicit operands +--- +name: coalesce_copy_into_subreg_to_reg64 +frameInfo: + adjustsStack: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $eax + ; CHECK-LABEL: name: coalesce_copy_into_subreg_to_reg64 + ; CHECK: liveins: $eax + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = COPY $eax, implicit-def dead $eflags, implicit-def [[COPY]] + ; CHECK-NEXT: $rdi = COPY [[COPY]] + ; CHECK-NEXT: CALL64r [[COPY]], csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: RET 0 + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + %0:gr32 = COPY $eax, implicit-def dead $eflags + %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit + $rdi = COPY %1 + CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + RET 0 + +... + +--- +name: coalesce_mov32r0_into_subreg_to_reg64_multiple_redef_value +frameInfo: + adjustsStack: true +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_multiple_redef_value + ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags + ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, implicit-def undef [[MOV32r0_]].sub_32bit, implicit [[MOV32r0_]].sub_32bit, implicit-def [[MOV32r0_]] + ; CHECK-NEXT: $rdi = COPY [[MOV32r0_]] + ; CHECK-NEXT: CALL64r [[MOV32r0_]], csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: RET 0 + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + %0:gr32 = MOV32r0 implicit-def dead $eflags + INLINEASM &"", 0, implicit-def %0, implicit %0 + %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit + $rdi = COPY %1 + CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + RET 0 + +... + +--- +name: coalesce_mov32r0_into_subreg_to_reg64_def_is_block_liveout +frameInfo: + adjustsStack: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_def_is_block_liveout + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, implicit-def undef %1.sub_32bit, implicit-def %1 + ; CHECK-NEXT: JCC_1 %bb.1, 4, implicit undef $eflags + ; CHECK-NEXT: JMP_1 %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: $rdi = COPY %1 + ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: RET 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + bb.0: + INLINEASM &"", 0, implicit-def %0:gr32 + JCC_1 %bb.1, 4, implicit undef $eflags + JMP_1 %bb.2 + + bb.1: + %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit + $rdi = COPY %1 + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + RET 0 + + bb.2: + +... + +--- +name: coalesce_mov32r0_into_subreg_to_reg64_def_is_phi_def +frameInfo: + adjustsStack: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_def_is_phi_def + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, implicit-def undef %1.sub_32bit, implicit-def %1 + ; CHECK-NEXT: JCC_1 %bb.1, 4, implicit undef $eflags + ; CHECK-NEXT: JMP_1 %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $rdi = COPY %1 + ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK-NEXT: JMP_1 %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + bb.0: + + INLINEASM &"", 0, implicit-def %0:gr32 + JCC_1 %bb.1, 4, implicit undef $eflags + JMP_1 %bb.2 + + bb.1: + %1:gr64 = SUBREG_TO_REG 0, %0, %subreg.sub_32bit + $rdi = COPY %1 + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + JMP_1 %bb.1 + + bb.2: + +... diff --git a/llvm/test/CodeGen/X86/swap.ll b/llvm/test/CodeGen/X86/swap.ll index 1dc454dd..3330403 100644 --- a/llvm/test/CodeGen/X86/swap.ll +++ b/llvm/test/CodeGen/X86/swap.ll @@ -113,21 +113,17 @@ define dso_local void @onealloc_readback_1(ptr nocapture %a, ptr nocapture %b) l ; ; AA-LABEL: onealloc_readback_1: ; AA: # %bb.0: # %entry -; AA-NEXT: vmovups (%rdi), %xmm0 -; AA-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; AA-NEXT: vmovups (%rsi), %xmm0 ; AA-NEXT: vmovups %xmm0, (%rdi) ; AA-NEXT: retq entry: %alloc = alloca [16 x i8], i8 2, align 1 %part1 = getelementptr inbounds [16 x i8], ptr %alloc, i64 1, i64 0 - call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %part1) - call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %alloc) + call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %alloc) call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 1 %part1, ptr align 1 %a, i64 16, i1 false) call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 1 %alloc, ptr align 1 %b, i64 16, i1 false) - call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %part1) tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 1 %a, ptr align 1 %alloc, i64 16, i1 false) - call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %alloc) + call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %alloc) ret void } @@ -144,19 +140,16 @@ define dso_local void @onealloc_readback_2(ptr nocapture %a, ptr nocapture %b) l ; AA-LABEL: onealloc_readback_2: ; AA: # %bb.0: # %entry ; AA-NEXT: vmovups (%rsi), %xmm0 -; AA-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; AA-NEXT: vmovups %xmm0, (%rdi) ; AA-NEXT: retq entry: %alloc = alloca [16 x i8], i8 2, align 1 %part2 = getelementptr inbounds [16 x i8], ptr %alloc, i64 1, i64 0 - call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %alloc) - call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %part2) + call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %alloc) call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 1 %alloc, ptr align 1 %a, i64 16, i1 false) call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 1 %part2, ptr align 1 %b, i64 16, i1 false) - call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %alloc) tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 1 %a, ptr align 1 %part2, i64 16, i1 false) - call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %part2) + call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %alloc) ret void } diff --git a/llvm/test/CodeGen/X86/win32-ssp.ll b/llvm/test/CodeGen/X86/win32-ssp.ll index 536a6d5..259f039 100644 --- a/llvm/test/CodeGen/X86/win32-ssp.ll +++ b/llvm/test/CodeGen/X86/win32-ssp.ll @@ -1,7 +1,9 @@ ; RUN: llc -mtriple=x86_64-w64-mingw32 < %s -o - | FileCheck --check-prefix=MINGW %s +; RUN: llc -mtriple=x86_64-pc-cygwin < %s -o - | FileCheck --check-prefix=MINGW %s ; RUN: llc -mtriple=x86_64-pc-windows-itanium < %s -o - | FileCheck --check-prefix=MSVC %s ; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s -o - | FileCheck --check-prefix=MSVC %s ; RUN: llc -mtriple=i686-w64-mingw32 < %s -o - | FileCheck --check-prefix=MINGW %s +; RUN: llc -mtriple=i686-pc-cygwin < %s -o - | FileCheck --check-prefix=MINGW %s declare void @llvm.lifetime.start.p0(i64, ptr nocapture) declare dso_local void @other(ptr) |