aboutsummaryrefslogtreecommitdiff
path: root/llvm/test
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/AArch64/popcount_vmask.ll315
-rw-r--r--llvm/test/CodeGen/AArch64/vector-minmax.ll119
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll16
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir24
-rw-r--r--llvm/test/CodeGen/X86/pr166744.ll66
-rw-r--r--llvm/test/Transforms/VectorCombine/AArch64/sve-interleave-splat.ll11
6 files changed, 551 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AArch64/popcount_vmask.ll b/llvm/test/CodeGen/AArch64/popcount_vmask.ll
new file mode 100644
index 0000000..e784ead
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/popcount_vmask.ll
@@ -0,0 +1,315 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define i32 @vmask_popcount_i32_v8i8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: vmask_popcount_i32_v8i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmgt v0.8b, v1.8b, v0.8b
+; CHECK-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-NEXT: saddlv s0, v0.8h
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: ret
+ %mask = icmp slt <8 x i8> %a, %b
+ %t1 = bitcast <8 x i1> %mask to i8
+ %t2 = call i8 @llvm.ctpop(i8 %t1)
+ %t3 = zext i8 %t2 to i32
+ ret i32 %t3
+}
+
+define i32 @vmask_popcount_i32_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: vmask_popcount_i32_v16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmgt v0.16b, v1.16b, v0.16b
+; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0
+; CHECK-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-NEXT: saddl2 v2.4s, v0.8h, v1.8h
+; CHECK-NEXT: saddl v0.4s, v0.4h, v1.4h
+; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: addv s0, v0.4s
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: ret
+ %mask = icmp slt <16 x i8> %a, %b
+ %t1 = bitcast <16 x i1> %mask to i16
+ %t2 = call i16 @llvm.ctpop(i16 %t1)
+ %t3 = zext i16 %t2 to i32
+ ret i32 %t3
+}
+
+define i32 @vmask_popcount_i32_v4i16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: vmask_popcount_i32_v4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmgt v0.4h, v1.4h, v0.4h
+; CHECK-NEXT: saddlv s0, v0.4h
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: ret
+ %mask = icmp slt <4 x i16> %a, %b
+ %t1 = bitcast <4 x i1> %mask to i4
+ %t2 = call i4 @llvm.ctpop(i4 %t1)
+ %t3 = zext i4 %t2 to i32
+ ret i32 %t3
+}
+
+define i32 @vmask_popcount_i32_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: vmask_popcount_i32_v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmgt v0.8h, v1.8h, v0.8h
+; CHECK-NEXT: saddlv s0, v0.8h
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: ret
+ %mask = icmp slt <8 x i16> %a, %b
+ %t1 = bitcast <8 x i1> %mask to i8
+ %t2 = call i8 @llvm.ctpop(i8 %t1)
+ %t3 = zext i8 %t2 to i32
+ ret i32 %t3
+}
+
+define i32 @vmask_popcount_i32_v2i32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: vmask_popcount_i32_v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmgt v0.2s, v1.2s, v0.2s
+; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: ret
+ %mask = icmp slt <2 x i32> %a, %b
+ %t1 = bitcast <2 x i1> %mask to i2
+ %t2 = call i2 @llvm.ctpop(i2 %t1)
+ %t3 = zext i2 %t2 to i32
+ ret i32 %t3
+}
+
+define i32 @vmask_popcount_i32_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: vmask_popcount_i32_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmgt v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: addv s0, v0.4s
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: ret
+ %mask = icmp slt <4 x i32> %a, %b
+ %t1 = bitcast <4 x i1> %mask to i4
+ %t2 = call i4 @llvm.ctpop(i4 %t1)
+ %t3 = zext i4 %t2 to i32
+ ret i32 %t3
+}
+
+define i32 @vmask_popcount_i32_v1i64(<1 x i64> %a, <1 x i64> %b) {
+; CHECK-LABEL: vmask_popcount_i32_v1i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: fmov x8, d1
+; CHECK-NEXT: fmov x9, d0
+; CHECK-NEXT: cmp x9, x8
+; CHECK-NEXT: cset w0, lt
+; CHECK-NEXT: ret
+ %mask = icmp slt <1 x i64> %a, %b
+ %t1 = bitcast <1 x i1> %mask to i1
+ %t2 = call i1 @llvm.ctpop(i1 %t1)
+ %t3 = zext i1 %t2 to i32
+ ret i32 %t3
+}
+
+define i32 @vmask_popcount_i32_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: vmask_popcount_i32_v2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmgt v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: ret
+ %mask = icmp slt <2 x i64> %a, %b
+ %t1 = bitcast <2 x i1> %mask to i2
+ %t2 = call i2 @llvm.ctpop(i2 %t1)
+ %t3 = zext i2 %t2 to i32
+ ret i32 %t3
+}
+
+define i64 @vmask_popcount_i64_v8i8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: vmask_popcount_i64_v8i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmgt v0.8b, v1.8b, v0.8b
+; CHECK-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-NEXT: saddlv s0, v0.8h
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: ret
+ %mask = icmp slt <8 x i8> %a, %b
+ %t1 = bitcast <8 x i1> %mask to i8
+ %t2 = call i8 @llvm.ctpop(i8 %t1)
+ %t3 = zext i8 %t2 to i64
+ ret i64 %t3
+}
+
+define i64 @vmask_popcount_i64_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: vmask_popcount_i64_v16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmgt v0.16b, v1.16b, v0.16b
+; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0
+; CHECK-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-NEXT: saddl2 v2.4s, v0.8h, v1.8h
+; CHECK-NEXT: saddl v0.4s, v0.4h, v1.4h
+; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: addv s0, v0.4s
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: ret
+ %mask = icmp slt <16 x i8> %a, %b
+ %t1 = bitcast <16 x i1> %mask to i16
+ %t2 = call i16 @llvm.ctpop(i16 %t1)
+ %t3 = zext i16 %t2 to i64
+ ret i64 %t3
+}
+
+define i64 @vmask_popcount_i64_v4i16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: vmask_popcount_i64_v4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmgt v0.4h, v1.4h, v0.4h
+; CHECK-NEXT: saddlv s0, v0.4h
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: ret
+ %mask = icmp slt <4 x i16> %a, %b
+ %t1 = bitcast <4 x i1> %mask to i4
+ %t2 = call i4 @llvm.ctpop(i4 %t1)
+ %t3 = zext i4 %t2 to i64
+ ret i64 %t3
+}
+
+define i64 @vmask_popcount_i64_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: vmask_popcount_i64_v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmgt v0.8h, v1.8h, v0.8h
+; CHECK-NEXT: saddlv s0, v0.8h
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: ret
+ %mask = icmp slt <8 x i16> %a, %b
+ %t1 = bitcast <8 x i1> %mask to i8
+ %t2 = call i8 @llvm.ctpop(i8 %t1)
+ %t3 = zext i8 %t2 to i64
+ ret i64 %t3
+}
+
+define i64 @vmask_popcount_i64_v2i32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: vmask_popcount_i64_v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmgt v0.2s, v1.2s, v0.2s
+; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: ret
+ %mask = icmp slt <2 x i32> %a, %b
+ %t1 = bitcast <2 x i1> %mask to i2
+ %t2 = call i2 @llvm.ctpop(i2 %t1)
+ %t3 = zext i2 %t2 to i64
+ ret i64 %t3
+}
+
+define i64 @vmask_popcount_i64_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: vmask_popcount_i64_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmgt v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: addv s0, v0.4s
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: ret
+ %mask = icmp slt <4 x i32> %a, %b
+ %t1 = bitcast <4 x i1> %mask to i4
+ %t2 = call i4 @llvm.ctpop(i4 %t1)
+ %t3 = zext i4 %t2 to i64
+ ret i64 %t3
+}
+
+define i64 @vmask_popcount_i64_v1i64(<1 x i64> %a, <1 x i64> %b) {
+; CHECK-LABEL: vmask_popcount_i64_v1i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: fmov x8, d1
+; CHECK-NEXT: fmov x9, d0
+; CHECK-NEXT: cmp x9, x8
+; CHECK-NEXT: cset w0, lt
+; CHECK-NEXT: ret
+ %mask = icmp slt <1 x i64> %a, %b
+ %t1 = bitcast <1 x i1> %mask to i1
+ %t2 = call i1 @llvm.ctpop(i1 %t1)
+ %t3 = zext i1 %t2 to i64
+ ret i64 %t3
+}
+
+define i64 @vmask_popcount_i64_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: vmask_popcount_i64_v2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmgt v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: neg w0, w8
+; CHECK-NEXT: ret
+ %mask = icmp slt <2 x i64> %a, %b
+ %t1 = bitcast <2 x i1> %mask to i2
+ %t2 = call i2 @llvm.ctpop(i2 %t1)
+ %t3 = zext i2 %t2 to i64
+ ret i64 %t3
+}
+
+define i32 @non_vmask_popcount_1(half %a) {
+; CHECK-LABEL: non_vmask_popcount_1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: and w8, w8, #0xffff
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: cnt v0.8b, v0.8b
+; CHECK-NEXT: addv b0, v0.8b
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+ %t1 = bitcast half %a to i16
+ %t2 = call i16 @llvm.ctpop(i16 %t1)
+ %t3 = zext i16 %t2 to i32
+ ret i32 %t3
+}
+
+define i32 @non_vmask_popcount_2(<8 x i16> %a) {
+; CHECK-LABEL: non_vmask_popcount_2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: xtn v0.8b, v0.8h
+; CHECK-NEXT: umov w8, v0.b[0]
+; CHECK-NEXT: umov w9, v0.b[1]
+; CHECK-NEXT: umov w10, v0.b[2]
+; CHECK-NEXT: and w8, w8, #0x3
+; CHECK-NEXT: bfi w8, w9, #2, #2
+; CHECK-NEXT: umov w9, v0.b[3]
+; CHECK-NEXT: bfi w8, w10, #4, #2
+; CHECK-NEXT: umov w10, v0.b[4]
+; CHECK-NEXT: bfi w8, w9, #6, #2
+; CHECK-NEXT: umov w9, v0.b[5]
+; CHECK-NEXT: bfi w8, w10, #8, #2
+; CHECK-NEXT: umov w10, v0.b[6]
+; CHECK-NEXT: bfi w8, w9, #10, #2
+; CHECK-NEXT: umov w9, v0.b[7]
+; CHECK-NEXT: bfi w8, w10, #12, #2
+; CHECK-NEXT: orr w8, w8, w9, lsl #14
+; CHECK-NEXT: and w8, w8, #0xffff
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: cnt v0.8b, v0.8b
+; CHECK-NEXT: addv b0, v0.8b
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: ret
+ %mask = trunc <8 x i16> %a to <8 x i2>
+ %t1 = bitcast <8 x i2> %mask to i16
+ %t2 = call i16 @llvm.ctpop(i16 %t1)
+ %t3 = zext i16 %t2 to i32
+ ret i32 %t3
+}
diff --git a/llvm/test/CodeGen/AArch64/vector-minmax.ll b/llvm/test/CodeGen/AArch64/vector-minmax.ll
new file mode 100644
index 0000000..6696f94
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/vector-minmax.ll
@@ -0,0 +1,119 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=aarch64 -mattr=+neon | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64 -mattr=+neon,+sve | FileCheck %s --check-prefix=CHECK-SVE
+
+define <2 x i64> @smax_v2i64(<2 x i64> %a, <2 x i64> %b){
+; CHECK-LABEL: smax_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmgt v2.2d, v0.2d, v1.2d
+; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
+; CHECK-NEXT: ret
+;
+; CHECK-SVE-LABEL: smax_v2i64:
+; CHECK-SVE: // %bb.0: // %entry
+; CHECK-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-SVE-NEXT: smax z0.d, p0/m, z0.d, z1.d
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-SVE-NEXT: ret
+entry:
+ %0 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %a, <2 x i64> %b)
+ ret <2 x i64> %0
+}
+
+define <2 x i64> @smin_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: smin_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmgt v2.2d, v1.2d, v0.2d
+; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
+; CHECK-NEXT: ret
+;
+; CHECK-SVE-LABEL: smin_v2i64:
+; CHECK-SVE: // %bb.0: // %entry
+; CHECK-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-SVE-NEXT: smin z0.d, p0/m, z0.d, z1.d
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-SVE-NEXT: ret
+entry:
+ %0 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %a, <2 x i64> %b)
+ ret <2 x i64> %0
+}
+
+define <2 x i64> @umax_v2i64(<2 x i64> %a, <2 x i64> %b){
+; CHECK-LABEL: umax_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmhi v2.2d, v0.2d, v1.2d
+; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
+; CHECK-NEXT: ret
+;
+; CHECK-SVE-LABEL: umax_v2i64:
+; CHECK-SVE: // %bb.0: // %entry
+; CHECK-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-SVE-NEXT: umax z0.d, p0/m, z0.d, z1.d
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-SVE-NEXT: ret
+entry:
+ %0 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %a, <2 x i64> %b)
+ ret <2 x i64> %0
+}
+
+define <2 x i64> @umin_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: umin_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmhi v2.2d, v1.2d, v0.2d
+; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
+; CHECK-NEXT: ret
+;
+; CHECK-SVE-LABEL: umin_v2i64:
+; CHECK-SVE: // %bb.0: // %entry
+; CHECK-SVE-NEXT: ptrue p0.d, vl2
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-SVE-NEXT: umin z0.d, p0/m, z0.d, z1.d
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-SVE-NEXT: ret
+entry:
+ %0 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %a, <2 x i64> %b)
+ ret <2 x i64> %0
+}
+
+define <1 x i64> @smax_v1i64(<1 x i64> %a, <1 x i64> %b){
+; CHECK-LABEL: smax_v1i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmgt d2, d0, d1
+; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-NEXT: ret
+;
+; CHECK-SVE-LABEL: smax_v1i64:
+; CHECK-SVE: // %bb.0: // %entry
+; CHECK-SVE-NEXT: ptrue p0.d, vl1
+; CHECK-SVE-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-SVE-NEXT: // kill: def $d1 killed $d1 def $z1
+; CHECK-SVE-NEXT: smax z0.d, p0/m, z0.d, z1.d
+; CHECK-SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-SVE-NEXT: ret
+entry:
+ %0 = call <1 x i64> @llvm.smax.v2i64(<1 x i64> %a, <1 x i64> %b)
+ ret <1 x i64> %0
+}
+
+; This is legal for Neon, so this should use the Neon smax.
+define <4 x i32> @smax_v4i32(<4 x i32> %a, <4 x i32> %b){
+; CHECK-LABEL: smax_v4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+;
+; CHECK-SVE-LABEL: smax_v4i32:
+; CHECK-SVE: // %bb.0: // %entry
+; CHECK-SVE-NEXT: smax v0.4s, v0.4s, v1.4s
+; CHECK-SVE-NEXT: ret
+entry:
+ %0 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %0
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
index 20034b6..b6e29cf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
@@ -863,3 +863,19 @@ entry:
i64 2)
ret <vscale x 1 x double> %2
}
+
+; The two vsetvlis will be coalesced so the add will be made dead and
+; removed. Make sure we shrink the live interval of %x.
+define void @non_li_addi(i64 %x, ptr %p) {
+; CHECK-LABEL: non_li_addi:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: ret
+entry:
+ %add = add i64 %x, 1
+ %0 = tail call i64 @llvm.riscv.vsetvli(i64 %add, i64 3, i64 0)
+ %1 = call <vscale x 8 x i8> @llvm.riscv.vle(<vscale x 8 x i8> poison, ptr %p, i64 %0)
+ %2 = tail call i64 @llvm.riscv.vsetvli(i64 1, i64 3, i64 0)
+ %3 = tail call { <vscale x 8 x i8>, i64 } @llvm.riscv.vleff(<vscale x 8 x i8> poison, ptr %p, i64 %2)
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir
index fdd30c9..f9929c9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir
@@ -104,6 +104,10 @@
ret void
}
+ define void @non_li_addi() {
+ ret void
+ }
+
declare <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i64>, i64) #1
declare <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64.i64(<vscale x 1 x i64>, ptr nocapture, i64) #4
@@ -664,3 +668,23 @@ body: |
bb.2:
$x10 = COPY %vl
PseudoRET implicit killed $x10
+...
+---
+# The two vsetvlis will be coalesced so the ADDI will be made dead and removed.
+# Make sure we shrink the live interval of %0.
+name: non_li_addi
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x10
+ ; CHECK-LABEL: name: non_li_addi
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: dead [[PseudoVSETIVLI:%[0-9]+]]:gprnox0 = PseudoVSETIVLI 1, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: PseudoRET
+ %0:gpr = COPY $x10
+ %1:gprnox0 = ADDI %0, 1
+ %2:gprnox0 = PseudoVSETVLI %1, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
+ %3:gprnox0 = PseudoVSETIVLI 1, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
+ PseudoRET
diff --git a/llvm/test/CodeGen/X86/pr166744.ll b/llvm/test/CodeGen/X86/pr166744.ll
new file mode 100644
index 0000000..21b25d8
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr166744.ll
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s --check-prefixes=POSTRA
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=haswell | FileCheck %s --check-prefixes=NOPOSTRA
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=NOPOSTRA
+
+; Ensure reloads are after narrowed i512 -> i32 store
+define i1 @PR166744(ptr %v, i64 %idx, i1 zeroext %b) {
+; POSTRA-LABEL: PR166744:
+; POSTRA: # %bb.0:
+; POSTRA-NEXT: movl $1029, %eax # imm = 0x405
+; POSTRA-NEXT: shlxl %esi, %edx, %edx
+; POSTRA-NEXT: bextrl %eax, %esi, %eax
+; POSTRA-NEXT: movl (%rdi,%rax,4), %ecx
+; POSTRA-NEXT: btrl %esi, %ecx
+; POSTRA-NEXT: orl %ecx, %edx
+; POSTRA-NEXT: movl %edx, (%rdi,%rax,4)
+; POSTRA-NEXT: movq 16(%rdi), %rax
+; POSTRA-NEXT: movq (%rdi), %rcx
+; POSTRA-NEXT: movq 24(%rdi), %rdx
+; POSTRA-NEXT: movq 8(%rdi), %rsi
+; POSTRA-NEXT: orq 56(%rdi), %rdx
+; POSTRA-NEXT: orq 40(%rdi), %rsi
+; POSTRA-NEXT: orq 48(%rdi), %rax
+; POSTRA-NEXT: orq 32(%rdi), %rcx
+; POSTRA-NEXT: orq %rdx, %rsi
+; POSTRA-NEXT: orq %rax, %rcx
+; POSTRA-NEXT: orq %rsi, %rcx
+; POSTRA-NEXT: setne %al
+; POSTRA-NEXT: retq
+;
+; NOPOSTRA-LABEL: PR166744:
+; NOPOSTRA: # %bb.0:
+; NOPOSTRA-NEXT: movl %esi, %eax
+; NOPOSTRA-NEXT: shrl $3, %eax
+; NOPOSTRA-NEXT: andl $60, %eax
+; NOPOSTRA-NEXT: movl (%rdi,%rax), %ecx
+; NOPOSTRA-NEXT: btrl %esi, %ecx
+; NOPOSTRA-NEXT: shlxl %esi, %edx, %edx
+; NOPOSTRA-NEXT: orl %ecx, %edx
+; NOPOSTRA-NEXT: movl %edx, (%rdi,%rax)
+; NOPOSTRA-NEXT: movq 16(%rdi), %rax
+; NOPOSTRA-NEXT: movq (%rdi), %rcx
+; NOPOSTRA-NEXT: movq 8(%rdi), %rdx
+; NOPOSTRA-NEXT: movq 24(%rdi), %rsi
+; NOPOSTRA-NEXT: orq 56(%rdi), %rsi
+; NOPOSTRA-NEXT: orq 40(%rdi), %rdx
+; NOPOSTRA-NEXT: orq 48(%rdi), %rax
+; NOPOSTRA-NEXT: orq 32(%rdi), %rcx
+; NOPOSTRA-NEXT: orq %rsi, %rdx
+; NOPOSTRA-NEXT: orq %rax, %rcx
+; NOPOSTRA-NEXT: orq %rdx, %rcx
+; NOPOSTRA-NEXT: setne %al
+; NOPOSTRA-NEXT: retq
+ %rem = and i64 %idx, 511
+ %sh_prom = zext nneg i64 %rem to i512
+ %shl = shl nuw i512 1, %sh_prom
+ %not = xor i512 %shl, -1
+ %load = load i512, ptr %v, align 8
+ %and = and i512 %load, %not
+ %conv2 = zext i1 %b to i512
+ %shl4 = shl nuw i512 %conv2, %sh_prom
+ %or = or i512 %and, %shl4
+ store i512 %or, ptr %v, align 8
+ %cmp = icmp ne i512 %or, 0
+ ret i1 %cmp
+}
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/sve-interleave-splat.ll b/llvm/test/Transforms/VectorCombine/AArch64/sve-interleave-splat.ll
new file mode 100644
index 0000000..921bcf0
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/AArch64/sve-interleave-splat.ll
@@ -0,0 +1,11 @@
+; RUN: opt -passes=vector-combine %s -S -o - | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define <vscale x 4 x i16> @interleave2_same_const_splat_nxv4i16() {
+;CHECK-LABEL: @interleave2_same_const_splat_nxv4i16(
+;CHECK: call <vscale x 4 x i16> @llvm.vector.interleave2
+;CHECK: ret <vscale x 4 x i16> %retval
+ %retval = call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3))
+ ret <vscale x 4 x i16> %retval
+}