aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/X86
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86')
-rw-r--r--llvm/test/CodeGen/X86/GlobalISel/legalize-memop-scalar-32.mir38
-rw-r--r--llvm/test/CodeGen/X86/GlobalISel/legalize-undef.mir3
-rw-r--r--llvm/test/CodeGen/X86/GlobalISel/regbankselect-x87.ll4
-rw-r--r--llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll24
-rw-r--r--llvm/test/CodeGen/X86/apx/cf.ll39
-rw-r--r--llvm/test/CodeGen/X86/calleetypeid-directcall-mismatched.ll32
-rw-r--r--llvm/test/CodeGen/X86/callsite-emit-calleetypeid-tailcall.ll19
-rw-r--r--llvm/test/CodeGen/X86/callsite-emit-calleetypeid.ll20
-rw-r--r--llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll185
-rw-r--r--llvm/test/CodeGen/X86/coalescer-implicit-def-regression-imp-operand-assert.mir6
-rw-r--r--llvm/test/CodeGen/X86/coalescing-subreg-to-reg-requires-subrange-update.mir44
-rw-r--r--llvm/test/CodeGen/X86/combine-add-ssat.ll4
-rw-r--r--llvm/test/CodeGen/X86/combine-add-usat.ll5
-rw-r--r--llvm/test/CodeGen/X86/combine-sub-ssat.ll4
-rw-r--r--llvm/test/CodeGen/X86/combine-sub-usat.ll6
-rw-r--r--llvm/test/CodeGen/X86/constant-pool-partition.ll8
-rw-r--r--llvm/test/CodeGen/X86/embed-bitcode.ll13
-rw-r--r--llvm/test/CodeGen/X86/isel-fpclass.ll526
-rw-r--r--llvm/test/CodeGen/X86/late-tail-dup-computed-goto.mir128
-rw-r--r--llvm/test/CodeGen/X86/load-combine.ll6
-rw-r--r--llvm/test/CodeGen/X86/pr33960.ll2
-rw-r--r--llvm/test/CodeGen/X86/pr76416.ll79
-rw-r--r--llvm/test/CodeGen/X86/stack-protector.ll1
-rw-r--r--llvm/test/CodeGen/X86/subreg-fail.mir4
-rw-r--r--llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir451
-rw-r--r--llvm/test/CodeGen/X86/swap.ll15
-rw-r--r--llvm/test/CodeGen/X86/win32-ssp.ll2
27 files changed, 1598 insertions, 70 deletions
diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-memop-scalar-32.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-memop-scalar-32.mir
index ba72c4f..bbb09c6 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/legalize-memop-scalar-32.mir
+++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-memop-scalar-32.mir
@@ -10,18 +10,18 @@ body: |
bb.0:
; X32-LABEL: name: test_memop_s8tos32
; X32: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
- ; X32: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p0) :: (load (s1))
- ; X32: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p0) :: (load (s8))
- ; X32: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p0) :: (load (s16))
- ; X32: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p0) :: (load (s32))
- ; X32: [[LOAD4:%[0-9]+]]:_(p0) = G_LOAD [[DEF]](p0) :: (load (p0))
- ; X32: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
- ; X32: [[AND:%[0-9]+]]:_(s8) = G_AND [[LOAD]], [[C]]
- ; X32: G_STORE [[AND]](s8), [[DEF]](p0) :: (store (s8))
- ; X32: G_STORE [[LOAD1]](s8), [[DEF]](p0) :: (store (s8))
- ; X32: G_STORE [[LOAD2]](s16), [[DEF]](p0) :: (store (s16))
- ; X32: G_STORE [[LOAD3]](s32), [[DEF]](p0) :: (store (s32))
- ; X32: G_STORE [[LOAD4]](p0), [[DEF]](p0) :: (store (p0))
+ ; X32-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p0) :: (load (s1))
+ ; X32-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p0) :: (load (s8))
+ ; X32-NEXT: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p0) :: (load (s16))
+ ; X32-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p0) :: (load (s32))
+ ; X32-NEXT: [[LOAD4:%[0-9]+]]:_(p0) = G_LOAD [[DEF]](p0) :: (load (p0))
+ ; X32-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
+ ; X32-NEXT: [[AND:%[0-9]+]]:_(s8) = G_AND [[LOAD]], [[C]]
+ ; X32-NEXT: G_STORE [[AND]](s8), [[DEF]](p0) :: (store (s8))
+ ; X32-NEXT: G_STORE [[LOAD1]](s8), [[DEF]](p0) :: (store (s8))
+ ; X32-NEXT: G_STORE [[LOAD2]](s16), [[DEF]](p0) :: (store (s16))
+ ; X32-NEXT: G_STORE [[LOAD3]](s32), [[DEF]](p0) :: (store (s32))
+ ; X32-NEXT: G_STORE [[LOAD4]](p0), [[DEF]](p0) :: (store (p0))
%0:_(p0) = IMPLICIT_DEF
%9:_(s1) = G_LOAD %0 :: (load (s1))
%1:_(s8) = G_LOAD %0 :: (load (s8))
@@ -46,13 +46,13 @@ body: |
; X32-LABEL: name: test_memop_s64
; X32: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
- ; X32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p0) :: (load (s32), align 8)
- ; X32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
- ; X32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s32)
- ; X32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4)
- ; X32: G_STORE [[LOAD]](s32), [[DEF]](p0) :: (store (s32), align 8)
- ; X32: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s32)
- ; X32: G_STORE [[LOAD1]](s32), [[PTR_ADD1]](p0) :: (store (s32) into unknown-address + 4)
+ ; X32-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p0) :: (load (s32), align 8)
+ ; X32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+ ; X32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[DEF]], [[C]](s32)
+ ; X32-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4)
+ ; X32-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p0) :: (store (s32), align 8)
+ ; X32-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[DEF]], [[C]](s32)
+ ; X32-NEXT: G_STORE [[LOAD1]](s32), [[PTR_ADD1]](p0) :: (store (s32) into unknown-address + 4)
%0:_(p0) = IMPLICIT_DEF
%1:_(s64) = G_LOAD %0 :: (load (s64))
diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-undef.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-undef.mir
index 8711d84..b16fe3e 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/legalize-undef.mir
+++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-undef.mir
@@ -21,6 +21,7 @@ body: |
; X64-NEXT: G_STORE [[DEF3]](s32), [[DEF]](p0) :: (store (s32))
; X64-NEXT: [[DEF4:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
; X64-NEXT: G_STORE [[DEF4]](s64), [[DEF]](p0) :: (store (s64))
+ ;
; X32-LABEL: name: test_implicit_def
; X32: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF
; X32-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
@@ -35,7 +36,7 @@ body: |
; X32-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF4]](s64)
; X32-NEXT: G_STORE [[UV]](s32), [[DEF]](p0) :: (store (s32), align 8)
; X32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
- ; X32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C1]](s32)
+ ; X32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[DEF]], [[C1]](s32)
; X32-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD]](p0) :: (store (s32) into unknown-address + 4)
%5:_(p0) = G_IMPLICIT_DEF
%0:_(s1) = G_IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-x87.ll b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-x87.ll
index 99d458a..83c319b 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-x87.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-x87.ll
@@ -164,12 +164,12 @@ define void @f5(ptr %a, ptr %b) {
; X86-NEXT: [[LOAD1:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (p0) from %fixed-stack.0)
; X86-NEXT: [[LOAD2:%[0-9]+]]:gpr(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.a, align 8)
; X86-NEXT: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 4
- ; X86-NEXT: [[PTR_ADD:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[LOAD]], [[C]](s32)
+ ; X86-NEXT: [[PTR_ADD:%[0-9]+]]:gpr(p0) = nuw inbounds G_PTR_ADD [[LOAD]], [[C]](s32)
; X86-NEXT: [[COPY:%[0-9]+]]:gpr(p0) = COPY [[PTR_ADD]](p0)
; X86-NEXT: [[LOAD3:%[0-9]+]]:gpr(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.a + 4, basealign 8)
; X86-NEXT: [[MV:%[0-9]+]]:gpr(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
; X86-NEXT: [[LOAD4:%[0-9]+]]:gpr(s32) = G_LOAD [[LOAD1]](p0) :: (load (s32) from %ir.b, align 8)
- ; X86-NEXT: [[PTR_ADD1:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[LOAD1]], [[C]](s32)
+ ; X86-NEXT: [[PTR_ADD1:%[0-9]+]]:gpr(p0) = nuw inbounds G_PTR_ADD [[LOAD1]], [[C]](s32)
; X86-NEXT: [[LOAD5:%[0-9]+]]:gpr(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from %ir.b + 4, basealign 8)
; X86-NEXT: [[MV1:%[0-9]+]]:gpr(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
; X86-NEXT: [[COPY1:%[0-9]+]]:psr(s64) = COPY [[MV]](s64)
diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll b/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll
index 171ccb2..2f1f8bc 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll
@@ -77,12 +77,12 @@ define { double, double } @test_return_d2(double %d.coerce0, double %d.coerce1)
; ALL-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.d
; ALL-NEXT: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.1)
; ALL-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
- ; ALL-NEXT: %5:_(p0) = nuw nusw G_PTR_ADD [[FRAME_INDEX1]], [[C1]](s64)
- ; ALL-NEXT: G_STORE [[COPY1]](s64), %5(p0) :: (store (s64) into %ir.2)
+ ; ALL-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[FRAME_INDEX1]], [[C1]](s64)
+ ; ALL-NEXT: G_STORE [[COPY1]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.2)
; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.3, align 8), (load (s8) from %ir.4, align 8)
; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.5)
- ; ALL-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64)
- ; ALL-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s64) from %ir.5 + 8)
+ ; ALL-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64)
+ ; ALL-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (dereferenceable load (s64) from %ir.5 + 8)
; ALL-NEXT: $xmm0 = COPY [[LOAD]](s64)
; ALL-NEXT: $xmm1 = COPY [[LOAD1]](s64)
; ALL-NEXT: RET 0, implicit $xmm0, implicit $xmm1
@@ -170,14 +170,14 @@ define { i64, i32 } @test_return_i3(i64 %i.coerce0, i32 %i.coerce1) {
; ALL-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.3.tmp
; ALL-NEXT: G_STORE [[COPY]](s64), [[FRAME_INDEX2]](p0) :: (store (s64) into %ir.0, align 4)
; ALL-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
- ; ALL-NEXT: %7:_(p0) = nuw nusw G_PTR_ADD [[FRAME_INDEX2]], [[C1]](s64)
- ; ALL-NEXT: G_STORE [[COPY1]](s32), %7(p0) :: (store (s32) into %ir.1)
+ ; ALL-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[FRAME_INDEX2]], [[C1]](s64)
+ ; ALL-NEXT: G_STORE [[COPY1]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.1)
; ALL-NEXT: G_MEMCPY [[FRAME_INDEX1]](p0), [[FRAME_INDEX2]](p0), [[C]](s64), 0 :: (store (s8) into %ir.2, align 4), (load (s8) from %ir.3, align 4)
; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.4, align 4), (load (s8) from %ir.5, align 4)
; ALL-NEXT: G_MEMCPY [[FRAME_INDEX3]](p0), [[FRAME_INDEX]](p0), [[C]](s64), 0 :: (store (s8) into %ir.6, align 8), (load (s8) from %ir.7, align 4)
; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX3]](p0) :: (dereferenceable load (s64) from %ir.tmp)
- ; ALL-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s64)
- ; ALL-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s32) from %ir.tmp + 8, align 8)
+ ; ALL-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s64)
+ ; ALL-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (dereferenceable load (s32) from %ir.tmp + 8, align 8)
; ALL-NEXT: $rax = COPY [[LOAD]](s64)
; ALL-NEXT: $edx = COPY [[LOAD1]](s32)
; ALL-NEXT: RET 0, implicit $rax, implicit $edx
@@ -215,12 +215,12 @@ define { i64, i64 } @test_return_i4(i64 %i.coerce0, i64 %i.coerce1) {
; ALL-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.i
; ALL-NEXT: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.1, align 4)
; ALL-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
- ; ALL-NEXT: %5:_(p0) = nuw nusw G_PTR_ADD [[FRAME_INDEX1]], [[C1]](s64)
- ; ALL-NEXT: G_STORE [[COPY1]](s64), %5(p0) :: (store (s64) into %ir.2, align 4)
+ ; ALL-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[FRAME_INDEX1]], [[C1]](s64)
+ ; ALL-NEXT: G_STORE [[COPY1]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.2, align 4)
; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.3, align 4), (load (s8) from %ir.4, align 4)
; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.5, align 4)
- ; ALL-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64)
- ; ALL-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s64) from %ir.5 + 8, align 4)
+ ; ALL-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64)
+ ; ALL-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (dereferenceable load (s64) from %ir.5 + 8, align 4)
; ALL-NEXT: $rax = COPY [[LOAD]](s64)
; ALL-NEXT: $rdx = COPY [[LOAD1]](s64)
; ALL-NEXT: RET 0, implicit $rax, implicit $rdx
diff --git a/llvm/test/CodeGen/X86/apx/cf.ll b/llvm/test/CodeGen/X86/apx/cf.ll
index 1e4ac3f..e52ce6c 100644
--- a/llvm/test/CodeGen/X86/apx/cf.ll
+++ b/llvm/test/CodeGen/X86/apx/cf.ll
@@ -162,7 +162,7 @@ entry:
define void @load_zext(i1 %cond, ptr %b, ptr %p) {
; CHECK-LABEL: load_zext:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: andb $1, %dil
+; CHECK-NEXT: testb $1, %dil
; CHECK-NEXT: cfcmovnew (%rsi), %ax
; CHECK-NEXT: movzwl %ax, %eax
; CHECK-NEXT: cfcmovnel %eax, (%rdx)
@@ -180,7 +180,7 @@ entry:
define void @load_sext(i1 %cond, ptr %b, ptr %p) {
; CHECK-LABEL: load_sext:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: andb $1, %dil
+; CHECK-NEXT: testb $1, %dil
; CHECK-NEXT: cfcmovnel (%rsi), %eax
; CHECK-NEXT: cltq
; CHECK-NEXT: cfcmovneq %rax, (%rdx)
@@ -194,3 +194,38 @@ entry:
call void @llvm.masked.store.v1i64.p0(<1 x i64> %3, ptr %p, i32 4, <1 x i1> %0)
ret void
}
+
+define void @sink_gep(ptr %p, i1 %cond) {
+; CHECK-LABEL: sink_gep:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testb $1, %sil
+; CHECK-NEXT: cfcmovnel %eax, 112(%rdi)
+; CHECK-NEXT: cfcmovnel 112(%rdi), %eax
+; CHECK-NEXT: movl %eax, (%rdi)
+; CHECK-NEXT: retq
+entry:
+ %0 = getelementptr i8, ptr %p, i64 112
+ br label %next
+
+next:
+ %1 = bitcast i1 %cond to <1 x i1>
+ call void @llvm.masked.store.v1i32.p0(<1 x i32> zeroinitializer, ptr %0, i32 1, <1 x i1> %1)
+ %2 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr %0, i32 1, <1 x i1> %1, <1 x i32> zeroinitializer)
+ store <1 x i32> %2, ptr %p, align 4
+ ret void
+}
+
+define void @xor_cond(ptr %p, i1 %cond) {
+; CHECK-LABEL: xor_cond:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testb $1, %sil
+; CHECK-NEXT: cfcmovel %eax, (%rdi)
+; CHECK-NEXT: retq
+entry:
+ %0 = xor i1 %cond, true
+ %1 = insertelement <1 x i1> zeroinitializer, i1 %0, i64 0
+ call void @llvm.masked.store.v1i32.p0(<1 x i32> zeroinitializer, ptr %p, i32 1, <1 x i1> %1)
+ ret void
+}
diff --git a/llvm/test/CodeGen/X86/calleetypeid-directcall-mismatched.ll b/llvm/test/CodeGen/X86/calleetypeid-directcall-mismatched.ll
new file mode 100644
index 0000000..7881ea7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/calleetypeid-directcall-mismatched.ll
@@ -0,0 +1,32 @@
+;; Tests that callee_type metadata attached to direct call sites are safely ignored.
+
+; RUN: llc --call-graph-section -mtriple x86_64-linux-gnu < %s -stop-after=finalize-isel -o - | FileCheck --match-full-lines %s
+
+;; Test that `calleeTypeIds` field is not present in `callSites`
+; CHECK-LABEL: callSites:
+; CHECK-NEXT: - { bb: {{[0-9]+}}, offset: {{[0-9]+}}, fwdArgRegs: [] }
+; CHECK-NEXT: - { bb: {{[0-9]+}}, offset: {{[0-9]+}}, fwdArgRegs: [] }
+; CHECK-NEXT: - { bb: {{[0-9]+}}, offset: {{[0-9]+}}, fwdArgRegs: [] }
+define i32 @foo(i32 %x, i32 %y) !type !0 {
+entry:
+ ;; Call instruction with accurate callee_type.
+ ;; callee_type should be dropped seemlessly.
+ %call = call i32 @fizz(i32 %x, i32 %y), !callee_type !1
+ ;; Call instruction with mismatched callee_type.
+ ;; callee_type should be dropped seemlessly without errors.
+ %call1 = call i32 @fizz(i32 %x, i32 %y), !callee_type !3
+ %add = add nsw i32 %call, %call1
+ ;; Call instruction with mismatched callee_type.
+ ;; callee_type should be dropped seemlessly without errors.
+ %call2 = call i32 @fizz(i32 %add, i32 %y), !callee_type !3
+ %sub = sub nsw i32 %add, %call2
+ ret i32 %sub
+}
+
+declare !type !2 i32 @fizz(i32, i32)
+
+!0 = !{i64 0, !"_ZTSFiiiiE.generalized"}
+!1 = !{!2}
+!2 = !{i64 0, !"_ZTSFiiiE.generalized"}
+!3 = !{!4}
+!4 = !{i64 0, !"_ZTSFicE.generalized"}
diff --git a/llvm/test/CodeGen/X86/callsite-emit-calleetypeid-tailcall.ll b/llvm/test/CodeGen/X86/callsite-emit-calleetypeid-tailcall.ll
new file mode 100644
index 0000000..8f6b7a6
--- /dev/null
+++ b/llvm/test/CodeGen/X86/callsite-emit-calleetypeid-tailcall.ll
@@ -0,0 +1,19 @@
+;; Tests that call site callee type ids can be extracted and set from
+;; callee_type metadata for indirect tail calls.
+
+;; Verify the exact calleeTypeIds value to ensure it is not garbage but the value
+;; computed as the type id from the callee_type metadata.
+; RUN: llc --call-graph-section -mtriple=x86_64-unknown-linux < %s -stop-after=finalize-isel -o - | FileCheck --match-full-lines %s
+
+define i32 @check_tailcall(ptr %func, i8 %x) !type !0 {
+entry:
+ ; CHECK: callSites:
+ ; CHECK-NEXT: - { bb: {{.*}}, offset: {{.*}}, fwdArgRegs: [], calleeTypeIds:
+ ; CHECK-NEXT: [ 3498816979441845844 ] }
+ %call = tail call i32 %func(i8 signext %x), !callee_type !1
+ ret i32 %call
+}
+
+!0 = !{i64 0, !"_ZTSFiPvcE.generalized"}
+!1 = !{!2}
+!2 = !{i64 0, !"_ZTSFicE.generalized"}
diff --git a/llvm/test/CodeGen/X86/callsite-emit-calleetypeid.ll b/llvm/test/CodeGen/X86/callsite-emit-calleetypeid.ll
new file mode 100644
index 0000000..e97a6ac
--- /dev/null
+++ b/llvm/test/CodeGen/X86/callsite-emit-calleetypeid.ll
@@ -0,0 +1,20 @@
+;; Tests that call site callee type ids can be extracted and set from
+;; callee_type metadata.
+
+;; Verify the exact calleeTypeIds value to ensure it is not garbage but the value
+;; computed as the type id from the callee_type metadata.
+; RUN: llc --call-graph-section -mtriple=x86_64-unknown-linux < %s -stop-after=finalize-isel -o - | FileCheck --match-full-lines %s
+
+; CHECK: name: main
+; CHECK: callSites:
+; CHECK-NEXT: - { bb: {{.*}}, offset: {{.*}}, fwdArgRegs: [], calleeTypeIds:
+; CHECK-NEXT: [ 7854600665770582568 ] }
+define i32 @main() {
+entry:
+ %fn = load ptr, ptr null, align 8
+ call void %fn(i8 0), !callee_type !0
+ ret i32 0
+}
+
+!0 = !{!1}
+!1 = !{i64 0, !"_ZTSFvcE.generalized"}
diff --git a/llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll b/llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll
new file mode 100644
index 0000000..ea7454f
--- /dev/null
+++ b/llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll
@@ -0,0 +1,185 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=x86_64-grtev4-linux-gnu < %s | FileCheck %s
+
+%struct.wibble = type { %struct.wombat }
+%struct.wombat = type { %struct.ham, [3 x i8] }
+%struct.ham = type { %struct.zot }
+%struct.zot = type { %struct.blam }
+%struct.blam = type { %struct.ham.0 }
+%struct.ham.0 = type { %struct.bar }
+%struct.bar = type { %struct.bar.1 }
+%struct.bar.1 = type { %struct.baz, i8 }
+%struct.baz = type { %struct.snork }
+%struct.snork = type <{ %struct.spam, i8, [3 x i8] }>
+%struct.spam = type { %struct.snork.2, %struct.snork.2 }
+%struct.snork.2 = type { i32 }
+%struct.snork.3 = type { %struct.baz, i8, [3 x i8] }
+
+define void @foo(ptr %arg, ptr %arg1, i40 %arg2, ptr %arg3, i32 %arg4) #0 {
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0: # %bb
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset %rbp, -16
+; CHECK-NEXT: movq %rsp, %rbp
+; CHECK-NEXT: .cfi_def_cfa_register %rbp
+; CHECK-NEXT: pushq %r15
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: pushq %r13
+; CHECK-NEXT: pushq %r12
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_offset %rbx, -56
+; CHECK-NEXT: .cfi_offset %r12, -48
+; CHECK-NEXT: .cfi_offset %r13, -40
+; CHECK-NEXT: .cfi_offset %r14, -32
+; CHECK-NEXT: .cfi_offset %r15, -24
+; CHECK-NEXT: movl %r8d, %r14d
+; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movq %rsi, %r13
+; CHECK-NEXT: movq %rdi, %r15
+; CHECK-NEXT: incl %r14d
+; CHECK-NEXT: xorl %ebx, %ebx
+; CHECK-NEXT: # implicit-def: $r12
+; CHECK-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: jmp .LBB0_3
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB0_1: # %bb17
+; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT: movq %r15, %r13
+; CHECK-NEXT: xorl %r15d, %r15d
+; CHECK-NEXT: testq %rbx, %rbx
+; CHECK-NEXT: sete %r15b
+; CHECK-NEXT: xorl %edi, %edi
+; CHECK-NEXT: callq _Znwm@PLT
+; CHECK-NEXT: shll $4, %r15d
+; CHECK-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
+; CHECK-NEXT: movq %r12, %rcx
+; CHECK-NEXT: shrq $32, %rcx
+; CHECK-NEXT: movb %cl, 12(%rax)
+; CHECK-NEXT: movl %r12d, 8(%rax)
+; CHECK-NEXT: movq %r15, %rbx
+; CHECK-NEXT: movq %r13, %r15
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; CHECK-NEXT: decl %r14d
+; CHECK-NEXT: je .LBB0_8
+; CHECK-NEXT: .LBB0_3: # %bb7
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: callq widget@PLT
+; CHECK-NEXT: cmpb $-5, (%r13)
+; CHECK-NEXT: jae .LBB0_5
+; CHECK-NEXT: # %bb.4: # in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT: movl %r12d, %r12d
+; CHECK-NEXT: cmpq %r15, %rbx
+; CHECK-NEXT: jbe .LBB0_1
+; CHECK-NEXT: jmp .LBB0_7
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB0_5: # %bb12
+; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT: movq 0, %rax
+; CHECK-NEXT: movq 8, %rax
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; CHECK-NEXT: cmpq %r15, %rbx
+; CHECK-NEXT: jbe .LBB0_1
+; CHECK-NEXT: .LBB0_7: # in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: xorl %ebx, %ebx
+; CHECK-NEXT: decl %r14d
+; CHECK-NEXT: jne .LBB0_3
+; CHECK-NEXT: .LBB0_8: # %bb21
+; CHECK-NEXT: cmpb $0, 12(%rax)
+; CHECK-NEXT: jne .LBB0_10
+; CHECK-NEXT: # %bb.9: # %bb26
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: popq %r12
+; CHECK-NEXT: popq %r13
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: popq %r15
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: .cfi_def_cfa %rsp, 8
+; CHECK-NEXT: retq
+; CHECK-NEXT: .LBB0_10: # %bb25
+; CHECK-NEXT: .cfi_def_cfa %rbp, 16
+; CHECK-NEXT: movq %r15, %rdi
+; CHECK-NEXT: callq pluto@PLT
+bb:
+ br label %bb7
+
+bb5: ; preds = %bb17, %bb14
+ %phi = phi ptr [ %call19, %bb17 ], [ null, %bb14 ]
+ %phi6 = phi ptr [ %getelementptr, %bb17 ], [ null, %bb14 ]
+ %add = add i32 %phi9, 1
+ %icmp = icmp eq i32 %phi9, %arg4
+ br i1 %icmp, label %bb21, label %bb7
+
+bb7: ; preds = %bb5, %bb
+ %phi8 = phi ptr [ null, %bb ], [ %phi6, %bb5 ]
+ %phi9 = phi i32 [ 0, %bb ], [ %add, %bb5 ]
+ %phi10 = phi i40 [ poison, %bb ], [ %phi15, %bb5 ]
+ %call = call ptr @widget()
+ %load = load i8, ptr %arg1, align 8
+ %icmp11 = icmp ult i8 %load, -5
+ %and = and i40 %phi10, 4294967295
+ br i1 %icmp11, label %bb14, label %bb12
+
+bb12: ; preds = %bb7
+ %load13 = load volatile { i64, i64 }, ptr null, align 4294967296
+ br label %bb14
+
+bb14: ; preds = %bb12, %bb7
+ %phi15 = phi i40 [ %and, %bb7 ], [ %arg2, %bb12 ]
+ %icmp16 = icmp ugt ptr %phi8, %arg
+ br i1 %icmp16, label %bb5, label %bb17
+
+bb17: ; preds = %bb14
+ %icmp18 = icmp eq ptr %phi8, null
+ %zext = zext i1 %icmp18 to i64
+ %call19 = call ptr @_Znwm(i64 0)
+ %getelementptr = getelementptr %struct.wibble, ptr %arg3, i64 %zext
+ %getelementptr20 = getelementptr i8, ptr %call19, i64 8
+ store i40 %phi15, ptr %getelementptr20, align 4
+ br label %bb5
+
+bb21: ; preds = %bb5
+ %getelementptr22 = getelementptr %struct.snork.3, ptr %phi, i64 0, i32 1
+ %load23 = load i8, ptr %getelementptr22, align 4
+ %icmp24 = icmp eq i8 %load23, 0
+ br i1 %icmp24, label %bb26, label %bb25
+
+bb25: ; preds = %bb21
+ call void @pluto(ptr %arg)
+ unreachable
+
+bb26: ; preds = %bb21
+ ret void
+}
+
+define void @eggs(ptr %arg, ptr %arg1) {
+; CHECK-LABEL: eggs:
+; CHECK: # %bb.0: # %bb
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: movq %rsi, %rdi
+; CHECK-NEXT: movq %rax, %rsi
+; CHECK-NEXT: xorl %edx, %edx
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: xorl %r8d, %r8d
+; CHECK-NEXT: callq foo@PLT
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+bb:
+ call void @foo(ptr %arg1, ptr %arg, i40 0, ptr null, i32 0)
+ ret void
+}
+
+declare ptr @widget()
+
+declare void @pluto(ptr)
+
+declare ptr @_Znwm(i64)
+
+attributes #0 = { noinline "frame-pointer"="all" }
diff --git a/llvm/test/CodeGen/X86/coalescer-implicit-def-regression-imp-operand-assert.mir b/llvm/test/CodeGen/X86/coalescer-implicit-def-regression-imp-operand-assert.mir
index 8241a17..0bc208d 100644
--- a/llvm/test/CodeGen/X86/coalescer-implicit-def-regression-imp-operand-assert.mir
+++ b/llvm/test/CodeGen/X86/coalescer-implicit-def-regression-imp-operand-assert.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
-# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=register-coalescer -o - %s | FileCheck %s
+# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=register-coalescer -o - %s | FileCheck %s --match-full-lines
---
name: rematerialize_subreg_to_reg_added_impdef_1
tracksRegLiveness: true
@@ -9,7 +9,7 @@ body: |
; CHECK-NEXT: successors: %bb.1(0x2aaaaaab), %bb.2(0x55555555)
; CHECK-NEXT: liveins: $edi
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags
+ ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]]
; CHECK-NEXT: JCC_1 %bb.2, 5, implicit killed undef $eflags
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
@@ -28,7 +28,7 @@ body: |
; CHECK-NEXT: JCC_1 %bb.5, 5, implicit killed undef $eflags
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
- ; CHECK-NEXT: dead $eax = MOV32r0 implicit-def dead $eflags, implicit-def $al
+ ; CHECK-NEXT: dead $eax = MOV32r0 implicit-def dead $eflags, implicit-def $al, implicit-def $al
; CHECK-NEXT: RET 0, killed undef $al
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
diff --git a/llvm/test/CodeGen/X86/coalescing-subreg-to-reg-requires-subrange-update.mir b/llvm/test/CodeGen/X86/coalescing-subreg-to-reg-requires-subrange-update.mir
new file mode 100644
index 0000000..2e6395f
--- /dev/null
+++ b/llvm/test/CodeGen/X86/coalescing-subreg-to-reg-requires-subrange-update.mir
@@ -0,0 +1,44 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+# RUN: llc -mtriple=x86_64-- -run-pass=register-coalescer -enable-subreg-liveness -verify-coalescing -o - %s | FileCheck %s
+
+---
+name: requires_new_subrange_coalesce_subreg_to_reg
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: requires_new_subrange_coalesce_subreg_to_reg
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $eax
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef %a.sub_32bit:gr64_with_sub_8bit = COPY $eax
+ ; CHECK-NEXT: %b:gr32 = IMPLICIT_DEF
+ ; CHECK-NEXT: %c:gr64 = INSERT_SUBREG %a, %b, %subreg.sub_32bit
+ ; CHECK-NEXT: JCC_1 %bb.2, 4, implicit undef $eflags
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef %a.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags
+ ; CHECK-NEXT: %c.sub_32bit:gr64 = COPY %a
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: %c.sub_32bit:gr64 = SUBREG_TO_REG %a, %b, %subreg.sub_32bit
+ ; CHECK-NEXT: RET 0, implicit %c
+ bb.0:
+ liveins: $eax
+ %init_eax:gr32 = COPY $eax
+ %a:gr64 = SUBREG_TO_REG 0, %init_eax, %subreg.sub_32bit
+ %b:gr32 = IMPLICIT_DEF
+ %c:gr64 = INSERT_SUBREG %a, %b, %subreg.sub_32bit
+ JCC_1 %bb.2, 4, implicit undef $eflags
+
+ bb.1:
+ %imm0:gr32 = MOV32r0 implicit-def dead $eflags
+ %a = SUBREG_TO_REG 0, %imm0, %subreg.sub_32bit
+ %c.sub_32bit = COPY %a
+
+ bb.2:
+ %c.sub_32bit = SUBREG_TO_REG %a, %b, %subreg.sub_32bit
+ RET 0, implicit %c
+
+...
diff --git a/llvm/test/CodeGen/X86/combine-add-ssat.ll b/llvm/test/CodeGen/X86/combine-add-ssat.ll
index 3e21798..75adcdd 100644
--- a/llvm/test/CodeGen/X86/combine-add-ssat.ll
+++ b/llvm/test/CodeGen/X86/combine-add-ssat.ll
@@ -62,12 +62,12 @@ define <8 x i16> @combine_constfold_v8i16() {
define <8 x i16> @combine_constfold_undef_v8i16() {
; SSE-LABEL: combine_constfold_undef_v8i16:
; SSE: # %bb.0:
-; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,65535,65534,0,65280,32768,0]
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,u,65534,0,65280,32768,0]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_constfold_undef_v8i16:
; AVX: # %bb.0:
-; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65535,65535,65534,0,65280,32768,0]
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65535,u,65534,0,65280,32768,0]
; AVX-NEXT: retq
%res = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> <i16 undef, i16 1, i16 undef, i16 65535, i16 -1, i16 -255, i16 -32760, i16 1>, <8 x i16> <i16 1, i16 undef, i16 undef, i16 65535, i16 1, i16 65535, i16 -10, i16 65535>)
ret <8 x i16> %res
diff --git a/llvm/test/CodeGen/X86/combine-add-usat.ll b/llvm/test/CodeGen/X86/combine-add-usat.ll
index 13bc3b2..5b947dd 100644
--- a/llvm/test/CodeGen/X86/combine-add-usat.ll
+++ b/llvm/test/CodeGen/X86/combine-add-usat.ll
@@ -62,12 +62,13 @@ define <8 x i16> @combine_constfold_v8i16() {
define <8 x i16> @combine_constfold_undef_v8i16() {
; SSE-LABEL: combine_constfold_undef_v8i16:
; SSE: # %bb.0:
-; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,65535,65535,65535,65535,2,65535]
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,u,65535,65535,65535,2,65535]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_constfold_undef_v8i16:
; AVX: # %bb.0:
-; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65535,65535,65535,65535,65535,2,65535]
+; AVX-NEXT: vmovddup {{.*#+}} xmm0 = [65535,65535,2,65535,65535,65535,2,65535]
+; AVX-NEXT: # xmm0 = mem[0,0]
; AVX-NEXT: retq
%res = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> <i16 undef, i16 1, i16 undef, i16 65535, i16 -1, i16 -255, i16 -65535, i16 1>, <8 x i16> <i16 1, i16 undef, i16 undef, i16 65535, i16 1, i16 65535, i16 1, i16 65535>)
ret <8 x i16> %res
diff --git a/llvm/test/CodeGen/X86/combine-sub-ssat.ll b/llvm/test/CodeGen/X86/combine-sub-ssat.ll
index 979331f..0dab025 100644
--- a/llvm/test/CodeGen/X86/combine-sub-ssat.ll
+++ b/llvm/test/CodeGen/X86/combine-sub-ssat.ll
@@ -62,12 +62,12 @@ define <8 x i16> @combine_constfold_v8i16() {
define <8 x i16> @combine_constfold_undef_v8i16() {
; SSE-LABEL: combine_constfold_undef_v8i16:
; SSE: # %bb.0:
-; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,65534,65282,32786,2]
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,u,0,65534,65282,32786,2]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_constfold_undef_v8i16:
; AVX: # %bb.0:
-; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,0,0,65534,65282,32786,2]
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,u,0,65534,65282,32786,2]
; AVX-NEXT: retq
%res = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> <i16 undef, i16 1, i16 undef, i16 65535, i16 -1, i16 -255, i16 -32760, i16 1>, <8 x i16> <i16 1, i16 undef, i16 undef, i16 65535, i16 1, i16 65535, i16 -10, i16 65535>)
ret <8 x i16> %res
diff --git a/llvm/test/CodeGen/X86/combine-sub-usat.ll b/llvm/test/CodeGen/X86/combine-sub-usat.ll
index b70e3fc..36e374b 100644
--- a/llvm/test/CodeGen/X86/combine-sub-usat.ll
+++ b/llvm/test/CodeGen/X86/combine-sub-usat.ll
@@ -73,17 +73,17 @@ define <8 x i16> @combine_constfold_v8i16() {
define <8 x i16> @combine_constfold_undef_v8i16() {
; SSE-LABEL: combine_constfold_undef_v8i16:
; SSE: # %bb.0:
-; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,65534,0,0,0]
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,u,0,65534,0,0,0]
; SSE-NEXT: retq
;
; AVX1-LABEL: combine_constfold_undef_v8i16:
; AVX1: # %bb.0:
-; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,0,0,65534,0,0,0]
+; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,u,0,65534,0,0,0]
; AVX1-NEXT: retq
;
; AVX2-LABEL: combine_constfold_undef_v8i16:
; AVX2: # %bb.0:
-; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,0,0,65534,0,0,0]
+; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,u,0,65534,0,0,0]
; AVX2-NEXT: retq
;
; AVX512-LABEL: combine_constfold_undef_v8i16:
diff --git a/llvm/test/CodeGen/X86/constant-pool-partition.ll b/llvm/test/CodeGen/X86/constant-pool-partition.ll
index 515284f..e42b41b 100644
--- a/llvm/test/CodeGen/X86/constant-pool-partition.ll
+++ b/llvm/test/CodeGen/X86/constant-pool-partition.ll
@@ -24,11 +24,11 @@ target triple = "x86_64-grtev4-linux-gnu"
; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always
;; For function @cold_func
-; CHECK: .section .rodata.cst8.hot,"aM",@progbits,8
+; CHECK: .section .rodata.cst8.hot.,"aM",@progbits,8
; CHECK-NEXT: .p2align
; CHECK-NEXT: .LCPI0_0:
; CHECK-NEXT: .quad 0x3fe5c28f5c28f5c3 # double 0.68000000000000005
-; CHECK-NEXT: .section .rodata.cst8.unlikely,"aM",@progbits,8
+; CHECK-NEXT: .section .rodata.cst8.unlikely.,"aM",@progbits,8
; CHECK-NEXT: .p2align
; CHECK-NEXT: .LCPI0_1:
; CHECK-NEXT: .quad 0x3eb0000000000000 # double 9.5367431640625E-7
@@ -50,11 +50,11 @@ target triple = "x86_64-grtev4-linux-gnu"
; CHECK-NEXT: .long 0x3e000000 # float 0.125
;; For function @hot_func
-; CHECK: .section .rodata.cst8.hot,"aM",@progbits,8
+; CHECK: .section .rodata.cst8.hot.,"aM",@progbits,8
; CHECK-NEXT: .p2align
; CHECK-NEXT: .LCPI3_0:
; CHECK-NEXT: .quad 0x3fe5c28f5c28f5c3 # double 0.68000000000000005
-; CHECK-NEXT: .section .rodata.cst16.hot,"aM",@progbits,16
+; CHECK-NEXT: .section .rodata.cst16.hot.,"aM",@progbits,16
; CHECK-NEXT: .p2align
; CHECK-NEXT: .LCPI3_1:
; CHECK-NEXT: .long 2147483648 # 0x80000000
diff --git a/llvm/test/CodeGen/X86/embed-bitcode.ll b/llvm/test/CodeGen/X86/embed-bitcode.ll
index 0d66ba8..d4af954 100644
--- a/llvm/test/CodeGen/X86/embed-bitcode.ll
+++ b/llvm/test/CodeGen/X86/embed-bitcode.ll
@@ -1,10 +1,23 @@
; RUN: llc -filetype=obj -mtriple=x86_64 %s -o %t
; RUN: llvm-readelf -S %t | FileCheck %s
+; RUN: llc -filetype=obj -mtriple=x86_64-pc-windows-msvc %s -o %t
+; RUN: llvm-readobj -S %t | FileCheck %s --check-prefix=COFF
; CHECK: .text PROGBITS 0000000000000000 [[#%x,OFF:]] 000000 00 AX 0
; CHECK-NEXT: .llvmbc PROGBITS 0000000000000000 [[#%x,OFF:]] 000004 00 0
; CHECK-NEXT: .llvmcmd PROGBITS 0000000000000000 [[#%x,OFF:]] 000005 00 0
+; COFF: Name: .llvmbc (2E 6C 6C 76 6D 62 63 00)
+; COFF: Characteristics [
+; COFF-NEXT: IMAGE_SCN_ALIGN_1BYTES
+; COFF-NEXT: IMAGE_SCN_MEM_DISCARDABLE
+; COFF-NEXT: ]
+; COFF: Name: .llvmcmd (2E 6C 6C 76 6D 63 6D 64)
+; COFF: Characteristics [
+; COFF-NEXT: IMAGE_SCN_ALIGN_1BYTES
+; COFF-NEXT: IMAGE_SCN_MEM_DISCARDABLE
+; COFF-NEXT: ]
+
@llvm.embedded.module = private constant [4 x i8] c"BC\C0\DE", section ".llvmbc", align 1
@llvm.cmdline = private constant [5 x i8] c"-cc1\00", section ".llvmcmd", align 1
@llvm.compiler.used = appending global [2 x ptr] [ptr @llvm.embedded.module, ptr @llvm.cmdline], section "llvm.metadata"
diff --git a/llvm/test/CodeGen/X86/isel-fpclass.ll b/llvm/test/CodeGen/X86/isel-fpclass.ll
new file mode 100644
index 0000000..960bbf5
--- /dev/null
+++ b/llvm/test/CodeGen/X86/isel-fpclass.ll
@@ -0,0 +1,526 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=i686-linux | FileCheck %s -check-prefixes=X86-SDAGISEL
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefixes=X64,X64-SDAGISEL
+; RUN: llc < %s -mtriple=i686-linux -fast-isel -fast-isel-abort=1 | FileCheck %s -check-prefixes=X86-FASTISEL
+; RUN: llc < %s -mtriple=x86_64-linux -fast-isel -fast-isel-abort=1 | FileCheck %s -check-prefixes=X64,X64-FASTISEL
+
+; FIXME: We can reuse/delete llvm/test/CodeGen/X86/is_fpclass.ll when all patches are included.
+
+define i1 @isnone_f(float %x) {
+; X86-SDAGISEL-LABEL: isnone_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: xorl %eax, %eax
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: isnone_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: isnone_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstp %st(0)
+; X86-FASTISEL-NEXT: xorl %eax, %eax
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 0)
+ ret i1 %0
+}
+
+define i1 @isany_f(float %x) {
+; X86-SDAGISEL-LABEL: isany_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: movb $1, %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: isany_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movb $1, %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: isany_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstp %st(0)
+; X86-FASTISEL-NEXT: movb $1, %al
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1023)
+ ret i1 %0
+}
+
+define i1 @issignaling_f(float %x) {
+; X86-SDAGISEL-LABEL: issignaling_f:
+; X86-SDAGISEL: # %bb.0:
+; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-SDAGISEL-NEXT: setl %cl
+; X86-SDAGISEL-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
+; X86-SDAGISEL-NEXT: setge %al
+; X86-SDAGISEL-NEXT: andb %cl, %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: issignaling_f:
+; X64: # %bb.0:
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X64-NEXT: setl %cl
+; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
+; X64-NEXT: setge %al
+; X64-NEXT: andb %cl, %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: issignaling_f:
+; X86-FASTISEL: # %bb.0:
+; X86-FASTISEL-NEXT: pushl %eax
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstps (%esp)
+; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-FASTISEL-NEXT: andl (%esp), %eax
+; X86-FASTISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-FASTISEL-NEXT: setl %cl
+; X86-FASTISEL-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
+; X86-FASTISEL-NEXT: setge %al
+; X86-FASTISEL-NEXT: andb %cl, %al
+; X86-FASTISEL-NEXT: popl %ecx
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
+; X86-FASTISEL-NEXT: retl
+ %a0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1) ; "snan"
+ ret i1 %a0
+}
+
+ define i1 @isquiet_f(float %x) {
+; X86-SDAGISEL-LABEL: isquiet_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-SDAGISEL-NEXT: setge %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: isquiet_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X64-NEXT: setge %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: isquiet_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: pushl %eax
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstps (%esp)
+; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-FASTISEL-NEXT: andl (%esp), %eax
+; X86-FASTISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-FASTISEL-NEXT: setge %al
+; X86-FASTISEL-NEXT: popl %ecx
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
+; X86-FASTISEL-NEXT: retl
+ entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 2) ; "qnan"
+ ret i1 %0
+}
+
+define i1 @not_isquiet_f(float %x) {
+; X86-SDAGISEL-LABEL: not_isquiet_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-SDAGISEL-NEXT: setl %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: not_isquiet_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X64-NEXT: setl %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: not_isquiet_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: pushl %eax
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstps (%esp)
+; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-FASTISEL-NEXT: andl (%esp), %eax
+; X86-FASTISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-FASTISEL-NEXT: setl %al
+; X86-FASTISEL-NEXT: popl %ecx
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1021) ; ~"qnan"
+ ret i1 %0
+}
+
+define i1 @isinf_f(float %x) {
+; X86-SDAGISEL-LABEL: isinf_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-SDAGISEL-NEXT: sete %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: isinf_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: isinf_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: pushl %eax
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstps (%esp)
+; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-FASTISEL-NEXT: andl (%esp), %eax
+; X86-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-FASTISEL-NEXT: sete %al
+; X86-FASTISEL-NEXT: popl %ecx
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 516) ; 0x204 = "inf"
+ ret i1 %0
+}
+
+define i1 @not_isinf_f(float %x) {
+; X86-SDAGISEL-LABEL: not_isinf_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-SDAGISEL-NEXT: setne %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: not_isinf_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-NEXT: setne %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: not_isinf_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: pushl %eax
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstps (%esp)
+; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-FASTISEL-NEXT: andl (%esp), %eax
+; X86-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-FASTISEL-NEXT: setne %al
+; X86-FASTISEL-NEXT: popl %ecx
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 507) ; ~0x204 = "~inf"
+ ret i1 %0
+}
+
+define i1 @is_plus_inf_f(float %x) {
+; X86-SDAGISEL-LABEL: is_plus_inf_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000
+; X86-SDAGISEL-NEXT: sete %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: is_plus_inf_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: is_plus_inf_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: pushl %eax
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstps (%esp)
+; X86-FASTISEL-NEXT: cmpl $2139095040, (%esp) # imm = 0x7F800000
+; X86-FASTISEL-NEXT: sete %al
+; X86-FASTISEL-NEXT: popl %ecx
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 512) ; 0x200 = "+inf"
+ ret i1 %0
+}
+
+define i1 @is_minus_inf_f(float %x) {
+; X86-SDAGISEL-LABEL: is_minus_inf_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000
+; X86-SDAGISEL-NEXT: sete %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: is_minus_inf_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: is_minus_inf_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: pushl %eax
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstps (%esp)
+; X86-FASTISEL-NEXT: cmpl $-8388608, (%esp) # imm = 0xFF800000
+; X86-FASTISEL-NEXT: sete %al
+; X86-FASTISEL-NEXT: popl %ecx
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 4) ; "-inf"
+ ret i1 %0
+}
+
+define i1 @not_is_minus_inf_f(float %x) {
+; X86-SDAGISEL-LABEL: not_is_minus_inf_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000
+; X86-SDAGISEL-NEXT: setne %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: not_is_minus_inf_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000
+; X64-NEXT: setne %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: not_is_minus_inf_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: pushl %eax
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstps (%esp)
+; X86-FASTISEL-NEXT: cmpl $-8388608, (%esp) # imm = 0xFF800000
+; X86-FASTISEL-NEXT: setne %al
+; X86-FASTISEL-NEXT: popl %ecx
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1019) ; ~"-inf"
+ ret i1 %0
+}
+
+define i1 @isfinite_f(float %x) {
+; X86-SDAGISEL-LABEL: isfinite_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-SDAGISEL-NEXT: setl %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: isfinite_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-NEXT: setl %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: isfinite_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: pushl %eax
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstps (%esp)
+; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-FASTISEL-NEXT: andl (%esp), %eax
+; X86-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-FASTISEL-NEXT: setl %al
+; X86-FASTISEL-NEXT: popl %ecx
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; 0x1f8 = "finite"
+ ret i1 %0
+}
+
+define i1 @not_isfinite_f(float %x) {
+; X86-SDAGISEL-LABEL: not_isfinite_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-SDAGISEL-NEXT: setge %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: not_isfinite_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-NEXT: setge %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: not_isfinite_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: pushl %eax
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstps (%esp)
+; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-FASTISEL-NEXT: andl (%esp), %eax
+; X86-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-FASTISEL-NEXT: setge %al
+; X86-FASTISEL-NEXT: popl %ecx
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 519) ; ~0x1f8 = "~finite"
+ ret i1 %0
+}
+
+define i1 @is_plus_finite_f(float %x) {
+; X86-SDAGISEL-LABEL: is_plus_finite_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000
+; X86-SDAGISEL-NEXT: setb %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: is_plus_finite_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-NEXT: setb %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: is_plus_finite_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: pushl %eax
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstps (%esp)
+; X86-FASTISEL-NEXT: cmpl $2139095040, (%esp) # imm = 0x7F800000
+; X86-FASTISEL-NEXT: setb %al
+; X86-FASTISEL-NEXT: popl %ecx
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 448) ; 0x1c0 = "+finite"
+ ret i1 %0
+}
+
+define i1 @isnone_d(double %x) nounwind {
+; X86-SDAGISEL-LABEL: isnone_d:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: xorl %eax, %eax
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: isnone_d:
+; X64: # %bb.0: # %entry
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: isnone_d:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstp %st(0)
+; X86-FASTISEL-NEXT: xorl %eax, %eax
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 0)
+ ret i1 %0
+}
+
+define i1 @isany_d(double %x) nounwind {
+; X86-SDAGISEL-LABEL: isany_d:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: movb $1, %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: isany_d:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movb $1, %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: isany_d:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstp %st(0)
+; X86-FASTISEL-NEXT: movb $1, %al
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 1023)
+ ret i1 %0
+}
+
+define i1 @isnone_f80(x86_fp80 %x) nounwind {
+; X86-SDAGISEL-LABEL: isnone_f80:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: xorl %eax, %eax
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-SDAGISEL-LABEL: isnone_f80:
+; X64-SDAGISEL: # %bb.0: # %entry
+; X64-SDAGISEL-NEXT: xorl %eax, %eax
+; X64-SDAGISEL-NEXT: retq
+;
+; X86-FASTISEL-LABEL: isnone_f80:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: fldt {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstp %st(0)
+; X86-FASTISEL-NEXT: xorl %eax, %eax
+; X86-FASTISEL-NEXT: retl
+;
+; X64-FASTISEL-LABEL: isnone_f80:
+; X64-FASTISEL: # %bb.0: # %entry
+; X64-FASTISEL-NEXT: fldt {{[0-9]+}}(%rsp)
+; X64-FASTISEL-NEXT: fstp %st(0)
+; X64-FASTISEL-NEXT: xorl %eax, %eax
+; X64-FASTISEL-NEXT: retq
+entry:
+%0 = tail call i1 @llvm.is.fpclass.f80(x86_fp80 %x, i32 0)
+ret i1 %0
+}
+
+define i1 @isany_f80(x86_fp80 %x) nounwind {
+; X86-SDAGISEL-LABEL: isany_f80:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: movb $1, %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-SDAGISEL-LABEL: isany_f80:
+; X64-SDAGISEL: # %bb.0: # %entry
+; X64-SDAGISEL-NEXT: movb $1, %al
+; X64-SDAGISEL-NEXT: retq
+;
+; X86-FASTISEL-LABEL: isany_f80:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: fldt {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstp %st(0)
+; X86-FASTISEL-NEXT: movb $1, %al
+; X86-FASTISEL-NEXT: retl
+;
+; X64-FASTISEL-LABEL: isany_f80:
+; X64-FASTISEL: # %bb.0: # %entry
+; X64-FASTISEL-NEXT: fldt {{[0-9]+}}(%rsp)
+; X64-FASTISEL-NEXT: fstp %st(0)
+; X64-FASTISEL-NEXT: movb $1, %al
+; X64-FASTISEL-NEXT: retq
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f80(x86_fp80 %x, i32 1023)
+ ret i1 %0
+}
diff --git a/llvm/test/CodeGen/X86/late-tail-dup-computed-goto.mir b/llvm/test/CodeGen/X86/late-tail-dup-computed-goto.mir
new file mode 100644
index 0000000..e272e7e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/late-tail-dup-computed-goto.mir
@@ -0,0 +1,128 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=tailduplication -tail-dup-pred-size=1 -tail-dup-succ-size=1 %s -o - | FileCheck %s
+#
+# Check that only the computed gotos are duplicated aggressively.
+#
+--- |
+ @computed_goto.dispatch = constant [5 x ptr] [ptr null, ptr blockaddress(@computed_goto, %bb1), ptr blockaddress(@computed_goto, %bb2), ptr blockaddress(@computed_goto, %bb3), ptr blockaddress(@computed_goto, %bb4)]
+ declare i64 @f0()
+ declare i64 @f1()
+ declare i64 @f2()
+ declare i64 @f3()
+ declare i64 @f4()
+ declare i64 @f5()
+ define void @computed_goto() {
+ start:
+ ret void
+ bb1:
+ ret void
+ bb2:
+ ret void
+ bb3:
+ ret void
+ bb4:
+ ret void
+ }
+ define void @jump_table() { ret void }
+ define void @jump_table_pic() { ret void }
+...
+---
+name: computed_goto
+alignment: 1
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: computed_goto
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f0, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rax
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64_nosp = COPY [[COPY]]
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64_nosp = COPY [[COPY1]]
+ ; CHECK-NEXT: JMP64m $noreg, 8, [[COPY2]], @computed_goto.dispatch, $noreg
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.bb1 (ir-block-address-taken %ir-block.bb1):
+ ; CHECK-NEXT: successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f1, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr64 = COPY $rax
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64_nosp = COPY [[COPY3]]
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64_nosp = COPY [[COPY1]]
+ ; CHECK-NEXT: JMP64m $noreg, 8, [[COPY2]], @computed_goto.dispatch, $noreg
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2.bb2 (ir-block-address-taken %ir-block.bb2):
+ ; CHECK-NEXT: successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f2, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr64 = COPY $rax
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64_nosp = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64_nosp = COPY [[COPY1]]
+ ; CHECK-NEXT: JMP64m $noreg, 8, [[COPY2]], @computed_goto.dispatch, $noreg
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3.bb3 (ir-block-address-taken %ir-block.bb3):
+ ; CHECK-NEXT: successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f3, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gr64 = COPY $rax
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64_nosp = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64_nosp = COPY [[COPY1]]
+ ; CHECK-NEXT: JMP64m $noreg, 8, [[COPY2]], @computed_goto.dispatch, $noreg
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4.bb4 (ir-block-address-taken %ir-block.bb4):
+ ; CHECK-NEXT: successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f4, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr64 = COPY $rax
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64_nosp = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64_nosp = COPY [[COPY1]]
+ ; CHECK-NEXT: JMP64m $noreg, 8, [[COPY2]], @computed_goto.dispatch, $noreg
+ bb.0:
+ successors: %bb.5(0x80000000)
+
+ CALL64pcrel32 target-flags(x86-plt) @f0, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ %0:gr64 = COPY $rax
+ %6:gr64_nosp = COPY %0
+ JMP_1 %bb.5
+
+ bb.1.bb1 (ir-block-address-taken %ir-block.bb1):
+ successors: %bb.5(0x80000000)
+
+ CALL64pcrel32 target-flags(x86-plt) @f1, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ %1:gr64 = COPY $rax
+ %6:gr64_nosp = COPY %1
+ JMP_1 %bb.5
+
+ bb.2.bb2 (ir-block-address-taken %ir-block.bb2):
+ successors: %bb.5(0x80000000)
+
+ CALL64pcrel32 target-flags(x86-plt) @f2, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ %2:gr64 = COPY $rax
+ %6:gr64_nosp = COPY %2
+ JMP_1 %bb.5
+
+ bb.3.bb3 (ir-block-address-taken %ir-block.bb3):
+ successors: %bb.5(0x80000000)
+
+ CALL64pcrel32 target-flags(x86-plt) @f3, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ %3:gr64 = COPY $rax
+ %6:gr64_nosp = COPY %3
+ JMP_1 %bb.5
+
+ bb.4.bb4 (ir-block-address-taken %ir-block.bb4):
+ successors: %bb.5(0x80000000)
+
+ CALL64pcrel32 target-flags(x86-plt) @f4, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ %4:gr64 = COPY $rax
+ %6:gr64_nosp = COPY %4
+
+ bb.5:
+ successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000)
+
+ %5:gr64_nosp = COPY %6
+ JMP64m $noreg, 8, %5, @computed_goto.dispatch, $noreg
+...
diff --git a/llvm/test/CodeGen/X86/load-combine.ll b/llvm/test/CodeGen/X86/load-combine.ll
index b5f3e78..f21c075 100644
--- a/llvm/test/CodeGen/X86/load-combine.ll
+++ b/llvm/test/CodeGen/X86/load-combine.ll
@@ -800,13 +800,13 @@ define void @shift_i32_by_32(ptr %src1, ptr %src2, ptr %dst) {
; CHECK-LABEL: shift_i32_by_32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movl $-1, 4(%eax)
-; CHECK-NEXT: movl $-1, (%eax)
+; CHECK-NEXT: movl $0, 4(%eax)
+; CHECK-NEXT: movl $0, (%eax)
; CHECK-NEXT: retl
;
; CHECK64-LABEL: shift_i32_by_32:
; CHECK64: # %bb.0: # %entry
-; CHECK64-NEXT: movq $-1, (%rdx)
+; CHECK64-NEXT: movq $0, (%rdx)
; CHECK64-NEXT: retq
entry:
%load1 = load i8, ptr %src1, align 1
diff --git a/llvm/test/CodeGen/X86/pr33960.ll b/llvm/test/CodeGen/X86/pr33960.ll
index 44fe777..6ee270e 100644
--- a/llvm/test/CodeGen/X86/pr33960.ll
+++ b/llvm/test/CodeGen/X86/pr33960.ll
@@ -7,12 +7,10 @@
define void @PR33960() {
; X86-LABEL: PR33960:
; X86: # %bb.0: # %entry
-; X86-NEXT: movl $-1, b
; X86-NEXT: retl
;
; X64-LABEL: PR33960:
; X64: # %bb.0: # %entry
-; X64-NEXT: movl $-1, b(%rip)
; X64-NEXT: retq
entry:
%tmp = insertelement <4 x i32> <i32 undef, i32 -7, i32 -3, i32 undef>, i32 -2, i32 3
diff --git a/llvm/test/CodeGen/X86/pr76416.ll b/llvm/test/CodeGen/X86/pr76416.ll
new file mode 100644
index 0000000..68e9ef9
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr76416.ll
@@ -0,0 +1,79 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+;
+; Reproducer from https://github.com/llvm/llvm-project/issues/76416
+;
+
+@load_p = external global ptr, align 8
+@load_data = external global i8, align 1
+
+define dso_local void @pr76416() {
+; CHECK-LABEL: pr76416:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movl $0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: cmpl $3, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: jg .LBB0_3
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB0_2: # %for.body
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: #APP
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: incl -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: cmpl $3, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: jle .LBB0_2
+; CHECK-NEXT: .LBB0_3: # %for.end
+; CHECK-NEXT: movl $0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq load_p@GOTPCREL(%rip), %rax
+; CHECK-NEXT: movq load_data@GOTPCREL(%rip), %rcx
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB0_4: # %for.cond1
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: #APP
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: movq (%rax), %rdx
+; CHECK-NEXT: movslq -{{[0-9]+}}(%rsp), %rsi
+; CHECK-NEXT: movzbl (%rdx,%rsi), %edx
+; CHECK-NEXT: movb %dl, (%rcx)
+; CHECK-NEXT: leal 1(%rsi), %edx
+; CHECK-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: jmp .LBB0_4
+entry:
+ %alloca = alloca i32, align 4
+ store i32 0, ptr %alloca, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.body, %entry
+ %load.from.alloca.0 = load i32, ptr %alloca, align 4
+ %cmp = icmp slt i32 %load.from.alloca.0, 4
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ call void asm sideeffect "", "{ax},~{dirflag},~{fpsr},~{flags}"(i8 0) nounwind
+ %load.from.alloca.1 = load i32, ptr %alloca, align 4
+ %inc = add nsw i32 %load.from.alloca.1, 1
+ store i32 %inc, ptr %alloca, align 4
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ store i32 0, ptr %alloca, align 4
+ br label %for.cond1
+
+for.cond1: ; preds = %for.cond1, %for.end
+ call void asm sideeffect "", "N{dx},~{dirflag},~{fpsr},~{flags}"(i32 poison) nounwind
+ %load.from.load_p = load ptr, ptr @load_p, align 8
+ %regs = getelementptr inbounds { [4 x i8] }, ptr %load.from.load_p, i32 0, i32 0
+ %load.from.alloca.2 = load i32, ptr %alloca, align 4
+ %idxprom = sext i32 %load.from.alloca.2 to i64
+ %arrayidx = getelementptr inbounds [4 x i8], ptr %regs, i64 0, i64 %idxprom
+ %load.with.gep.ptr = load i8, ptr %arrayidx, align 1
+ store i8 %load.with.gep.ptr, ptr @load_data, align 1
+ %load.from.alloca.3 = load i32, ptr %alloca, align 4
+ %inc2 = add nsw i32 %load.from.alloca.3, 1
+ store i32 %inc2, ptr %alloca, align 4
+ br label %for.cond1
+}
diff --git a/llvm/test/CodeGen/X86/stack-protector.ll b/llvm/test/CodeGen/X86/stack-protector.ll
index f4f3ae4..772e776 100644
--- a/llvm/test/CodeGen/X86/stack-protector.ll
+++ b/llvm/test/CodeGen/X86/stack-protector.ll
@@ -6,6 +6,7 @@
; RUN: llc -mtriple=amd64-pc-openbsd < %s -o - | FileCheck --check-prefix=OPENBSD-AMD64 %s
; RUN: llc -mtriple=i386-pc-windows-msvc < %s -o - | FileCheck -check-prefix=MSVC-I386 %s
; RUN: llc -mtriple=x86_64-w64-mingw32 < %s -o - | FileCheck --check-prefix=MINGW-X64 %s
+; RUN: llc -mtriple=x86_64-pc-cygwin < %s -o - | FileCheck --check-prefix=MINGW-X64 %s
; RUN: llc -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck --check-prefix=IGNORE_INTRIN %s
%struct.foo = type { [16 x i8] }
diff --git a/llvm/test/CodeGen/X86/subreg-fail.mir b/llvm/test/CodeGen/X86/subreg-fail.mir
index c8146f0..dc69071 100644
--- a/llvm/test/CodeGen/X86/subreg-fail.mir
+++ b/llvm/test/CodeGen/X86/subreg-fail.mir
@@ -14,8 +14,8 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test1
- ; CHECK: undef [[MOV32rm:%[0-9]+]].sub_32bit:gr64_nosp = MOV32rm undef %1:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`)
- ; CHECK-NEXT: undef [[MOV32rm1:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32rm undef %4:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`)
+ ; CHECK: undef [[MOV32rm:%[0-9]+]].sub_32bit:gr64_nosp = MOV32rm undef %1:gr64, 1, $noreg, 0, $noreg, implicit-def [[MOV32rm]] :: (volatile load (s32) from `ptr undef`)
+ ; CHECK-NEXT: undef [[MOV32rm1:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32rm undef %4:gr64, 1, $noreg, 0, $noreg, implicit-def [[MOV32rm1]] :: (volatile load (s32) from `ptr undef`)
; CHECK-NEXT: [[MOV32rm1:%[0-9]+]]:gr64_with_sub_8bit = SHL64ri [[MOV32rm1]], 32, implicit-def dead $eflags
; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_with_sub_8bit = LEA64r [[MOV32rm1]], 1, [[MOV32rm]], 256, $noreg
; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_with_sub_8bit = SHR64ri [[LEA64r]], 8, implicit-def dead $eflags
diff --git a/llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir b/llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir
new file mode 100644
index 0000000..e4fb812
--- /dev/null
+++ b/llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir
@@ -0,0 +1,451 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
+# RUN: llc -mtriple=x86_64-- -run-pass=register-coalescer -o - %s | FileCheck %s --match-full-lines
+
+# We cannot lose the liveness of the high subregister of %1 when
+# coalesced with %0, so introduce an implicit-def of the super
+# register on the MOV.
+
+---
+name: coalesce_mov32r0_into_subreg_to_reg64
+frameInfo:
+ adjustsStack: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64
+ ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]]
+ ; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi
+ ; CHECK-NEXT: CALL64r [[MOV32r0_]], csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: RET 0
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ %0:gr32 = MOV32r0 implicit-def dead $eflags
+ %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
+ $rdi = COPY %1
+ CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ RET 0
+
+...
+
+---
+name: subreg_to_reg_folds_to_undef
+frameInfo:
+ adjustsStack: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $rax
+
+ ; CHECK-LABEL: name: subreg_to_reg_folds_to_undef
+ ; CHECK: liveins: $rax
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64_with_sub_8bit = COPY $rax
+ ; CHECK-NEXT: undef [[MOV32rr:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32rr [[COPY]].sub_32bit, implicit-def [[MOV32rr]]
+ ; CHECK-NEXT: RET 0, implicit [[MOV32rr]]
+ %0:gr64 = COPY killed $rax
+ %1:gr32 = COPY killed %0.sub_32bit
+ %2:gr32 = MOV32rr killed %1
+ %3:gr64 = SUBREG_TO_REG 0, killed %2, %subreg.sub_32bit
+ %4:gr64 = COPY killed %3
+ RET 0, implicit %4
+
+...
+
+---
+name: coalesce_mov32r0_subreg_def_into_subreg_to_reg64
+frameInfo:
+ adjustsStack: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: coalesce_mov32r0_subreg_def_into_subreg_to_reg64
+ ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]]
+ ; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi
+ ; CHECK-NEXT: CALL64r [[MOV32r0_]], csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: RET 0
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ undef %0.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags
+ %1:gr64 = SUBREG_TO_REG 0, killed %0.sub_32bit, %subreg.sub_32bit
+ $rdi = COPY %1
+ CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ RET 0
+
+...
+
+---
+name: coalesce_mov32r0_into_subreg_def_with_super_def_to_reg64
+frameInfo:
+ adjustsStack: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_def_with_super_def_to_reg64
+ ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]], implicit-def [[MOV32r0_]]
+ ; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi, implicit-def $rdi
+ ; CHECK-NEXT: CALL64r [[MOV32r0_]], csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: RET 0
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ undef %0.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def %0
+ %1:gr64 = SUBREG_TO_REG 0, killed %0.sub_32bit, %subreg.sub_32bit
+ $rdi = COPY %1
+ CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ RET 0
+
+...
+
+---
+name: coalesce_mov32r0_into_subreg_to_reg64_already_defs_other_subreg
+frameInfo:
+ adjustsStack: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_already_defs_other_subreg
+ ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def undef [[MOV32r0_]].sub_8bit, implicit-def [[MOV32r0_]]
+ ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, implicit [[MOV32r0_]]
+ ; CHECK-NEXT: CALL64r [[MOV32r0_]], csr_64, implicit $rsp, implicit $ssp, implicit undef $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: RET 0
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ %0:gr32 = MOV32r0 implicit-def dead $eflags, implicit-def undef %0.sub_8bit
+ %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
+ INLINEASM &"", 0, implicit %1
+ CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit undef $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ RET 0
+
+...
+
+
+# Reduced realistic case which was asserting after introducing new implicit-defs
+---
+name: coalesce_needs_implicit_defs
+frameInfo:
+ adjustsStack: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: coalesce_needs_implicit_defs
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $rdi
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rdi
+ ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]], implicit-def [[MOV32r0_]]
+ ; CHECK-NEXT: undef [[MOV32r0_1:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_1]], implicit-def [[MOV32r0_1]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[MOV32r0_2:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags
+ ; CHECK-NEXT: TEST64rr [[MOV32r0_1]], [[MOV32r0_1]], implicit-def $eflags
+ ; CHECK-NEXT: [[MOV32r0_2:%[0-9]+]].sub_8bit:gr64_with_sub_8bit = SETCCr 4, implicit killed $eflags
+ ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi
+ ; CHECK-NEXT: CALL64r [[MOV32r0_]], csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: [[MOV32r0_2:%[0-9]+]]:gr64_with_sub_8bit = SHL64ri [[MOV32r0_2]], 4, implicit-def dead $eflags
+ ; CHECK-NEXT: [[MOV32r0_2:%[0-9]+]]:gr64_with_sub_8bit = ADD64rr [[MOV32r0_2]], [[COPY]], implicit-def dead $eflags
+ ; CHECK-NEXT: [[MOV32r0_1:%[0-9]+]]:gr64_with_sub_8bit = COPY [[MOV32r0_2]]
+ ; CHECK-NEXT: JMP_1 %bb.1
+ bb.0:
+ liveins: $rdi
+
+ %0:gr64 = COPY killed $rdi
+ %1:gr32 = MOV32r0 implicit-def dead $eflags
+ %2:gr64 = SUBREG_TO_REG 0, %1, %subreg.sub_32bit
+ %3:gr64 = COPY killed %2
+
+ bb.1:
+ %4:gr64 = COPY killed %3
+ %5:gr32 = MOV32r0 implicit-def dead $eflags
+ TEST64rr killed %4, %4, implicit-def $eflags
+ %6:gr8 = SETCCr 4, implicit killed $eflags
+ %7:gr32 = COPY killed %5
+ %7.sub_8bit:gr32 = COPY killed %6
+ %8:gr64 = SUBREG_TO_REG 0, killed %7, %subreg.sub_32bit
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ %9:gr64 = SUBREG_TO_REG 0, %1, %subreg.sub_32bit
+ $rdi = COPY %9
+ CALL64r killed %9, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ %10:gr64 = COPY killed %8
+ %10:gr64 = SHL64ri %10, 4, implicit-def dead $eflags
+ %11:gr64 = COPY killed %10
+ %11:gr64 = ADD64rr %11, %0, implicit-def dead $eflags
+ %3:gr64 = COPY killed %11
+ JMP_1 %bb.1
+
+...
+
+# Make sure to add the 'undef' flag to the result register %2,
+# because the top 32bits are not defined.
+---
+name: coalesce_add_implicitdef_and_undef
+frameInfo:
+ adjustsStack: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: coalesce_add_implicitdef_and_undef
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $eflags, $edx
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = COPY $edx
+ ; CHECK-NEXT: JMP_1 %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = ADD32ri [[COPY]].sub_32bit, -34, implicit-def $eflags, implicit-def [[COPY]]
+ ; CHECK-NEXT: FAKE_USE [[COPY]]
+ ; CHECK-NEXT: RET 0
+ bb.0:
+ liveins: $eflags, $edx
+ %0:gr32 = COPY $edx
+ JMP_1 %bb.1
+
+ bb.1:
+ %1:gr32 = COPY %0
+ %1:gr32 = ADD32ri %1, -34, implicit-def $eflags
+ %2:gr64_with_sub_8bit = SUBREG_TO_REG 0, killed %1, %subreg.sub_32bit
+ FAKE_USE %2
+ RET 0
+...
+
+# We can't mark the destination register as 'undef' or add implicit-def
+# because the top 24 bits of %0:gr32 are retained by the SUBREG_TO_REG.
+#
+# For example, if this were to result in:
+#
+# undef %2.sub_32bit:gr64_with_sub_8bit = COPY $edx
+# %1:gr8 = SETCCr 4, implicit $eflags
+# JMP_1 %bb.1
+#
+# bb.1:
+# undef %2.sub_8bit:gr64_with_sub_8bit = COPY %1, implicit-def %2
+#
+# Then this says that the top 56 bits of %2 are undef. That's not correct
+# because only the top 32 bits are undef.
+---
+name: coalesce_dont_add_implicitdef_or_undef
+frameInfo:
+ adjustsStack: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: coalesce_dont_add_implicitdef_or_undef
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $eflags, $edx
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = COPY $edx
+ ; CHECK-NEXT: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags
+ ; CHECK-NEXT: JMP_1 %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: [[COPY:%[0-9]+]].sub_8bit:gr64_with_sub_8bit = COPY [[SETCCr]]
+ ; CHECK-NEXT: FAKE_USE [[COPY]]
+ ; CHECK-NEXT: RET 0
+ bb.0:
+ liveins: $eflags, $edx
+ %0:gr32 = COPY $edx
+ %1:gr8 = SETCCr 4, implicit killed $eflags
+ JMP_1 %bb.1
+
+ bb.1:
+ %0.sub_8bit:gr32 = COPY %1
+ %2:gr64_with_sub_8bit = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
+ FAKE_USE %2
+ RET 0
+...
+
+---
+name: coalesce_mov32r0_into_subreg_to_reg64_physreg_def
+frameInfo:
+ adjustsStack: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_physreg_def
+ ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi
+ ; CHECK-NEXT: CALL64r killed $rdi, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: RET 0
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ %0:gr32 = MOV32r0 implicit-def dead $eflags
+ $rdi = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
+ CALL64r killed $rdi, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ RET 0
+
+...
+
+---
+name: coalesce_mov32r0_into_subreg_to_reg64_physreg_use
+frameInfo:
+ adjustsStack: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $eax
+ ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_physreg_use
+ ; CHECK: liveins: $eax
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: $eax = MOV32r0 implicit-def dead $eflags
+ ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64 = SUBREG_TO_REG 0, $eax, %subreg.sub_32bit
+ ; CHECK-NEXT: $rdi = COPY [[SUBREG_TO_REG]]
+ ; CHECK-NEXT: CALL64r [[SUBREG_TO_REG]], csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: RET 0
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ $eax = MOV32r0 implicit-def dead $eflags
+ %1:gr64 = SUBREG_TO_REG 0, killed $eax, %subreg.sub_32bit
+ $rdi = COPY %1
+ CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ RET 0
+
+...
+
+# Coalesced instruction is a copy with other implicit operands
+---
+name: coalesce_copy_into_subreg_to_reg64
+frameInfo:
+ adjustsStack: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $eax
+ ; CHECK-LABEL: name: coalesce_copy_into_subreg_to_reg64
+ ; CHECK: liveins: $eax
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = COPY $eax, implicit-def dead $eflags, implicit-def [[COPY]]
+ ; CHECK-NEXT: $rdi = COPY [[COPY]]
+ ; CHECK-NEXT: CALL64r [[COPY]], csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: RET 0
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ %0:gr32 = COPY $eax, implicit-def dead $eflags
+ %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
+ $rdi = COPY %1
+ CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ RET 0
+
+...
+
+---
+name: coalesce_mov32r0_into_subreg_to_reg64_multiple_redef_value
+frameInfo:
+ adjustsStack: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_multiple_redef_value
+ ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags
+ ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, implicit-def undef [[MOV32r0_]].sub_32bit, implicit [[MOV32r0_]].sub_32bit, implicit-def [[MOV32r0_]]
+ ; CHECK-NEXT: $rdi = COPY [[MOV32r0_]]
+ ; CHECK-NEXT: CALL64r [[MOV32r0_]], csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: RET 0
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ %0:gr32 = MOV32r0 implicit-def dead $eflags
+ INLINEASM &"", 0, implicit-def %0, implicit %0
+ %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
+ $rdi = COPY %1
+ CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ RET 0
+
+...
+
+---
+name: coalesce_mov32r0_into_subreg_to_reg64_def_is_block_liveout
+frameInfo:
+ adjustsStack: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_def_is_block_liveout
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, implicit-def undef %1.sub_32bit, implicit-def %1
+ ; CHECK-NEXT: JCC_1 %bb.1, 4, implicit undef $eflags
+ ; CHECK-NEXT: JMP_1 %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: $rdi = COPY %1
+ ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: RET 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ bb.0:
+ INLINEASM &"", 0, implicit-def %0:gr32
+ JCC_1 %bb.1, 4, implicit undef $eflags
+ JMP_1 %bb.2
+
+ bb.1:
+ %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
+ $rdi = COPY %1
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ RET 0
+
+ bb.2:
+
+...
+
+---
+name: coalesce_mov32r0_into_subreg_to_reg64_def_is_phi_def
+frameInfo:
+ adjustsStack: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_def_is_phi_def
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, implicit-def undef %1.sub_32bit, implicit-def %1
+ ; CHECK-NEXT: JCC_1 %bb.1, 4, implicit undef $eflags
+ ; CHECK-NEXT: JMP_1 %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $rdi = COPY %1
+ ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: JMP_1 %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ bb.0:
+
+ INLINEASM &"", 0, implicit-def %0:gr32
+ JCC_1 %bb.1, 4, implicit undef $eflags
+ JMP_1 %bb.2
+
+ bb.1:
+ %1:gr64 = SUBREG_TO_REG 0, %0, %subreg.sub_32bit
+ $rdi = COPY %1
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ JMP_1 %bb.1
+
+ bb.2:
+
+...
diff --git a/llvm/test/CodeGen/X86/swap.ll b/llvm/test/CodeGen/X86/swap.ll
index 1dc454dd..3330403 100644
--- a/llvm/test/CodeGen/X86/swap.ll
+++ b/llvm/test/CodeGen/X86/swap.ll
@@ -113,21 +113,17 @@ define dso_local void @onealloc_readback_1(ptr nocapture %a, ptr nocapture %b) l
;
; AA-LABEL: onealloc_readback_1:
; AA: # %bb.0: # %entry
-; AA-NEXT: vmovups (%rdi), %xmm0
-; AA-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; AA-NEXT: vmovups (%rsi), %xmm0
; AA-NEXT: vmovups %xmm0, (%rdi)
; AA-NEXT: retq
entry:
%alloc = alloca [16 x i8], i8 2, align 1
%part1 = getelementptr inbounds [16 x i8], ptr %alloc, i64 1, i64 0
- call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %part1)
- call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %alloc)
+ call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %alloc)
call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 1 %part1, ptr align 1 %a, i64 16, i1 false)
call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 1 %alloc, ptr align 1 %b, i64 16, i1 false)
- call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %part1)
tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 1 %a, ptr align 1 %alloc, i64 16, i1 false)
- call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %alloc)
+ call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %alloc)
ret void
}
@@ -144,19 +140,16 @@ define dso_local void @onealloc_readback_2(ptr nocapture %a, ptr nocapture %b) l
; AA-LABEL: onealloc_readback_2:
; AA: # %bb.0: # %entry
; AA-NEXT: vmovups (%rsi), %xmm0
-; AA-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; AA-NEXT: vmovups %xmm0, (%rdi)
; AA-NEXT: retq
entry:
%alloc = alloca [16 x i8], i8 2, align 1
%part2 = getelementptr inbounds [16 x i8], ptr %alloc, i64 1, i64 0
- call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %alloc)
- call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %part2)
+ call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %alloc)
call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 1 %alloc, ptr align 1 %a, i64 16, i1 false)
call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 1 %part2, ptr align 1 %b, i64 16, i1 false)
- call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %alloc)
tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 1 %a, ptr align 1 %part2, i64 16, i1 false)
- call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %part2)
+ call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %alloc)
ret void
}
diff --git a/llvm/test/CodeGen/X86/win32-ssp.ll b/llvm/test/CodeGen/X86/win32-ssp.ll
index 536a6d5..259f039 100644
--- a/llvm/test/CodeGen/X86/win32-ssp.ll
+++ b/llvm/test/CodeGen/X86/win32-ssp.ll
@@ -1,7 +1,9 @@
; RUN: llc -mtriple=x86_64-w64-mingw32 < %s -o - | FileCheck --check-prefix=MINGW %s
+; RUN: llc -mtriple=x86_64-pc-cygwin < %s -o - | FileCheck --check-prefix=MINGW %s
; RUN: llc -mtriple=x86_64-pc-windows-itanium < %s -o - | FileCheck --check-prefix=MSVC %s
; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s -o - | FileCheck --check-prefix=MSVC %s
; RUN: llc -mtriple=i686-w64-mingw32 < %s -o - | FileCheck --check-prefix=MINGW %s
+; RUN: llc -mtriple=i686-pc-cygwin < %s -o - | FileCheck --check-prefix=MINGW %s
declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
declare dso_local void @other(ptr)