diff options
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/AArch64/popcount_vmask.ll | 315 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/vector-minmax.ll | 119 | ||||
| -rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll | 16 | ||||
| -rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir | 24 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/pr166744.ll | 66 | ||||
| -rw-r--r-- | llvm/test/Transforms/VectorCombine/AArch64/sve-interleave-splat.ll | 11 |
6 files changed, 551 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AArch64/popcount_vmask.ll b/llvm/test/CodeGen/AArch64/popcount_vmask.ll new file mode 100644 index 0000000..e784ead --- /dev/null +++ b/llvm/test/CodeGen/AArch64/popcount_vmask.ll @@ -0,0 +1,315 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +define i32 @vmask_popcount_i32_v8i8(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: vmask_popcount_i32_v8i8: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.8b, v1.8b, v0.8b +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: saddlv s0, v0.8h +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: ret + %mask = icmp slt <8 x i8> %a, %b + %t1 = bitcast <8 x i1> %mask to i8 + %t2 = call i8 @llvm.ctpop(i8 %t1) + %t3 = zext i8 %t2 to i32 + ret i32 %t3 +} + +define i32 @vmask_popcount_i32_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vmask_popcount_i32_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.16b, v1.16b, v0.16b +; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0 +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: saddl2 v2.4s, v0.8h, v1.8h +; CHECK-NEXT: saddl v0.4s, v0.4h, v1.4h +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: ret + %mask = icmp slt <16 x i8> %a, %b + %t1 = bitcast <16 x i1> %mask to i16 + %t2 = call i16 @llvm.ctpop(i16 %t1) + %t3 = zext i16 %t2 to i32 + ret i32 %t3 +} + +define i32 @vmask_popcount_i32_v4i16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: vmask_popcount_i32_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.4h, v1.4h, v0.4h +; CHECK-NEXT: saddlv s0, v0.4h +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: ret + %mask = icmp slt <4 x i16> %a, %b + %t1 = bitcast <4 x i1> %mask to i4 + %t2 = call i4 @llvm.ctpop(i4 %t1) + %t3 = zext i4 %t2 to i32 + ret i32 %t3 +} + +define i32 @vmask_popcount_i32_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vmask_popcount_i32_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.8h, v1.8h, v0.8h +; CHECK-NEXT: saddlv s0, v0.8h +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: ret + %mask = icmp slt <8 x i16> %a, %b + %t1 = bitcast <8 x i1> %mask to i8 + %t2 = call i8 @llvm.ctpop(i8 %t1) + %t3 = zext i8 %t2 to i32 + ret i32 %t3 +} + +define i32 @vmask_popcount_i32_v2i32(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: vmask_popcount_i32_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.2s, v1.2s, v0.2s +; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: ret + %mask = icmp slt <2 x i32> %a, %b + %t1 = bitcast <2 x i1> %mask to i2 + %t2 = call i2 @llvm.ctpop(i2 %t1) + %t3 = zext i2 %t2 to i32 + ret i32 %t3 +} + +define i32 @vmask_popcount_i32_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vmask_popcount_i32_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.4s, v1.4s, v0.4s +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: ret + %mask = icmp slt <4 x i32> %a, %b + %t1 = bitcast <4 x i1> %mask to i4 + %t2 = call i4 @llvm.ctpop(i4 %t1) + %t3 = zext i4 %t2 to i32 + ret i32 %t3 +} + +define i32 @vmask_popcount_i32_v1i64(<1 x i64> %a, <1 x i64> %b) { +; CHECK-LABEL: vmask_popcount_i32_v1i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: fmov x8, d1 +; CHECK-NEXT: fmov x9, d0 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: ret + %mask = icmp slt <1 x i64> %a, %b + %t1 = bitcast <1 x i1> %mask to i1 + %t2 = call i1 @llvm.ctpop(i1 %t1) + %t3 = zext i1 %t2 to i32 + ret i32 %t3 +} + +define i32 @vmask_popcount_i32_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: vmask_popcount_i32_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.2d, v1.2d, v0.2d +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: ret + %mask = icmp slt <2 x i64> %a, %b + %t1 = bitcast <2 x i1> %mask to i2 + %t2 = call i2 @llvm.ctpop(i2 %t1) + %t3 = zext i2 %t2 to i32 + ret i32 %t3 +} + +define i64 @vmask_popcount_i64_v8i8(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: vmask_popcount_i64_v8i8: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.8b, v1.8b, v0.8b +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: saddlv s0, v0.8h +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: ret + %mask = icmp slt <8 x i8> %a, %b + %t1 = bitcast <8 x i1> %mask to i8 + %t2 = call i8 @llvm.ctpop(i8 %t1) + %t3 = zext i8 %t2 to i64 + ret i64 %t3 +} + +define i64 @vmask_popcount_i64_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vmask_popcount_i64_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.16b, v1.16b, v0.16b +; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0 +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: saddl2 v2.4s, v0.8h, v1.8h +; CHECK-NEXT: saddl v0.4s, v0.4h, v1.4h +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: ret + %mask = icmp slt <16 x i8> %a, %b + %t1 = bitcast <16 x i1> %mask to i16 + %t2 = call i16 @llvm.ctpop(i16 %t1) + %t3 = zext i16 %t2 to i64 + ret i64 %t3 +} + +define i64 @vmask_popcount_i64_v4i16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: vmask_popcount_i64_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.4h, v1.4h, v0.4h +; CHECK-NEXT: saddlv s0, v0.4h +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: ret + %mask = icmp slt <4 x i16> %a, %b + %t1 = bitcast <4 x i1> %mask to i4 + %t2 = call i4 @llvm.ctpop(i4 %t1) + %t3 = zext i4 %t2 to i64 + ret i64 %t3 +} + +define i64 @vmask_popcount_i64_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vmask_popcount_i64_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.8h, v1.8h, v0.8h +; CHECK-NEXT: saddlv s0, v0.8h +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: ret + %mask = icmp slt <8 x i16> %a, %b + %t1 = bitcast <8 x i1> %mask to i8 + %t2 = call i8 @llvm.ctpop(i8 %t1) + %t3 = zext i8 %t2 to i64 + ret i64 %t3 +} + +define i64 @vmask_popcount_i64_v2i32(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: vmask_popcount_i64_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.2s, v1.2s, v0.2s +; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: ret + %mask = icmp slt <2 x i32> %a, %b + %t1 = bitcast <2 x i1> %mask to i2 + %t2 = call i2 @llvm.ctpop(i2 %t1) + %t3 = zext i2 %t2 to i64 + ret i64 %t3 +} + +define i64 @vmask_popcount_i64_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vmask_popcount_i64_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.4s, v1.4s, v0.4s +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: ret + %mask = icmp slt <4 x i32> %a, %b + %t1 = bitcast <4 x i1> %mask to i4 + %t2 = call i4 @llvm.ctpop(i4 %t1) + %t3 = zext i4 %t2 to i64 + ret i64 %t3 +} + +define i64 @vmask_popcount_i64_v1i64(<1 x i64> %a, <1 x i64> %b) { +; CHECK-LABEL: vmask_popcount_i64_v1i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: fmov x8, d1 +; CHECK-NEXT: fmov x9, d0 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: ret + %mask = icmp slt <1 x i64> %a, %b + %t1 = bitcast <1 x i1> %mask to i1 + %t2 = call i1 @llvm.ctpop(i1 %t1) + %t3 = zext i1 %t2 to i64 + ret i64 %t3 +} + +define i64 @vmask_popcount_i64_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: vmask_popcount_i64_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: cmgt v0.2d, v1.2d, v0.2d +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: neg w0, w8 +; CHECK-NEXT: ret + %mask = icmp slt <2 x i64> %a, %b + %t1 = bitcast <2 x i1> %mask to i2 + %t2 = call i2 @llvm.ctpop(i2 %t1) + %t3 = zext i2 %t2 to i64 + ret i64 %t3 +} + +define i32 @non_vmask_popcount_1(half %a) { +; CHECK-LABEL: non_vmask_popcount_1: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0 +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: cnt v0.8b, v0.8b +; CHECK-NEXT: addv b0, v0.8b +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret + %t1 = bitcast half %a to i16 + %t2 = call i16 @llvm.ctpop(i16 %t1) + %t3 = zext i16 %t2 to i32 + ret i32 %t3 +} + +define i32 @non_vmask_popcount_2(<8 x i16> %a) { +; CHECK-LABEL: non_vmask_popcount_2: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: xtn v0.8b, v0.8h +; CHECK-NEXT: umov w8, v0.b[0] +; CHECK-NEXT: umov w9, v0.b[1] +; CHECK-NEXT: umov w10, v0.b[2] +; CHECK-NEXT: and w8, w8, #0x3 +; CHECK-NEXT: bfi w8, w9, #2, #2 +; CHECK-NEXT: umov w9, v0.b[3] +; CHECK-NEXT: bfi w8, w10, #4, #2 +; CHECK-NEXT: umov w10, v0.b[4] +; CHECK-NEXT: bfi w8, w9, #6, #2 +; CHECK-NEXT: umov w9, v0.b[5] +; CHECK-NEXT: bfi w8, w10, #8, #2 +; CHECK-NEXT: umov w10, v0.b[6] +; CHECK-NEXT: bfi w8, w9, #10, #2 +; CHECK-NEXT: umov w9, v0.b[7] +; CHECK-NEXT: bfi w8, w10, #12, #2 +; CHECK-NEXT: orr w8, w8, w9, lsl #14 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: cnt v0.8b, v0.8b +; CHECK-NEXT: addv b0, v0.8b +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %mask = trunc <8 x i16> %a to <8 x i2> + %t1 = bitcast <8 x i2> %mask to i16 + %t2 = call i16 @llvm.ctpop(i16 %t1) + %t3 = zext i16 %t2 to i32 + ret i32 %t3 +} diff --git a/llvm/test/CodeGen/AArch64/vector-minmax.ll b/llvm/test/CodeGen/AArch64/vector-minmax.ll new file mode 100644 index 0000000..6696f94 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/vector-minmax.ll @@ -0,0 +1,119 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s -mtriple=aarch64 -mattr=+neon | FileCheck %s +; RUN: llc < %s -mtriple=aarch64 -mattr=+neon,+sve | FileCheck %s --check-prefix=CHECK-SVE + +define <2 x i64> @smax_v2i64(<2 x i64> %a, <2 x i64> %b){ +; CHECK-LABEL: smax_v2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmgt v2.2d, v0.2d, v1.2d +; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-NEXT: ret +; +; CHECK-SVE-LABEL: smax_v2i64: +; CHECK-SVE: // %bb.0: // %entry +; CHECK-SVE-NEXT: ptrue p0.d, vl2 +; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-SVE-NEXT: smax z0.d, p0/m, z0.d, z1.d +; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-SVE-NEXT: ret +entry: + %0 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %0 +} + +define <2 x i64> @smin_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: smin_v2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmgt v2.2d, v1.2d, v0.2d +; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-NEXT: ret +; +; CHECK-SVE-LABEL: smin_v2i64: +; CHECK-SVE: // %bb.0: // %entry +; CHECK-SVE-NEXT: ptrue p0.d, vl2 +; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-SVE-NEXT: smin z0.d, p0/m, z0.d, z1.d +; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-SVE-NEXT: ret +entry: + %0 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %0 +} + +define <2 x i64> @umax_v2i64(<2 x i64> %a, <2 x i64> %b){ +; CHECK-LABEL: umax_v2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmhi v2.2d, v0.2d, v1.2d +; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-NEXT: ret +; +; CHECK-SVE-LABEL: umax_v2i64: +; CHECK-SVE: // %bb.0: // %entry +; CHECK-SVE-NEXT: ptrue p0.d, vl2 +; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-SVE-NEXT: umax z0.d, p0/m, z0.d, z1.d +; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-SVE-NEXT: ret +entry: + %0 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %0 +} + +define <2 x i64> @umin_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: umin_v2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmhi v2.2d, v1.2d, v0.2d +; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-NEXT: ret +; +; CHECK-SVE-LABEL: umin_v2i64: +; CHECK-SVE: // %bb.0: // %entry +; CHECK-SVE-NEXT: ptrue p0.d, vl2 +; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-SVE-NEXT: umin z0.d, p0/m, z0.d, z1.d +; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-SVE-NEXT: ret +entry: + %0 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %0 +} + +define <1 x i64> @smax_v1i64(<1 x i64> %a, <1 x i64> %b){ +; CHECK-LABEL: smax_v1i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmgt d2, d0, d1 +; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b +; CHECK-NEXT: ret +; +; CHECK-SVE-LABEL: smax_v1i64: +; CHECK-SVE: // %bb.0: // %entry +; CHECK-SVE-NEXT: ptrue p0.d, vl1 +; CHECK-SVE-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-SVE-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-SVE-NEXT: smax z0.d, p0/m, z0.d, z1.d +; CHECK-SVE-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-SVE-NEXT: ret +entry: + %0 = call <1 x i64> @llvm.smax.v2i64(<1 x i64> %a, <1 x i64> %b) + ret <1 x i64> %0 +} + +; This is legal for Neon, so this should use the Neon smax. +define <4 x i32> @smax_v4i32(<4 x i32> %a, <4 x i32> %b){ +; CHECK-LABEL: smax_v4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret +; +; CHECK-SVE-LABEL: smax_v4i32: +; CHECK-SVE: // %bb.0: // %entry +; CHECK-SVE-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-SVE-NEXT: ret +entry: + %0 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %0 +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll index 20034b6..b6e29cf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll @@ -863,3 +863,19 @@ entry: i64 2) ret <vscale x 1 x double> %2 } + +; The two vsetvlis will be coalesced so the add will be made dead and +; removed. Make sure we shrink the live interval of %x. +define void @non_li_addi(i64 %x, ptr %p) { +; CHECK-LABEL: non_li_addi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: ret +entry: + %add = add i64 %x, 1 + %0 = tail call i64 @llvm.riscv.vsetvli(i64 %add, i64 3, i64 0) + %1 = call <vscale x 8 x i8> @llvm.riscv.vle(<vscale x 8 x i8> poison, ptr %p, i64 %0) + %2 = tail call i64 @llvm.riscv.vsetvli(i64 1, i64 3, i64 0) + %3 = tail call { <vscale x 8 x i8>, i64 } @llvm.riscv.vleff(<vscale x 8 x i8> poison, ptr %p, i64 %2) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir index fdd30c9..f9929c9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir @@ -104,6 +104,10 @@ ret void } + define void @non_li_addi() { + ret void + } + declare <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i64>, i64) #1 declare <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64.i64(<vscale x 1 x i64>, ptr nocapture, i64) #4 @@ -664,3 +668,23 @@ body: | bb.2: $x10 = COPY %vl PseudoRET implicit killed $x10 +... +--- +# The two vsetvlis will be coalesced so the ADDI will be made dead and removed. +# Make sure we shrink the live interval of %0. +name: non_li_addi +tracksRegLiveness: true +body: | + bb.0: + liveins: $x10 + ; CHECK-LABEL: name: non_li_addi + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: dead [[PseudoVSETIVLI:%[0-9]+]]:gprnox0 = PseudoVSETIVLI 1, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: PseudoRET + %0:gpr = COPY $x10 + %1:gprnox0 = ADDI %0, 1 + %2:gprnox0 = PseudoVSETVLI %1, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype + %3:gprnox0 = PseudoVSETIVLI 1, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype + PseudoRET diff --git a/llvm/test/CodeGen/X86/pr166744.ll b/llvm/test/CodeGen/X86/pr166744.ll new file mode 100644 index 0000000..21b25d8 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr166744.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s --check-prefixes=POSTRA +; RUN: llc < %s -mtriple=x86_64-- -mcpu=haswell | FileCheck %s --check-prefixes=NOPOSTRA +; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=NOPOSTRA + +; Ensure reloads are after narrowed i512 -> i32 store +define i1 @PR166744(ptr %v, i64 %idx, i1 zeroext %b) { +; POSTRA-LABEL: PR166744: +; POSTRA: # %bb.0: +; POSTRA-NEXT: movl $1029, %eax # imm = 0x405 +; POSTRA-NEXT: shlxl %esi, %edx, %edx +; POSTRA-NEXT: bextrl %eax, %esi, %eax +; POSTRA-NEXT: movl (%rdi,%rax,4), %ecx +; POSTRA-NEXT: btrl %esi, %ecx +; POSTRA-NEXT: orl %ecx, %edx +; POSTRA-NEXT: movl %edx, (%rdi,%rax,4) +; POSTRA-NEXT: movq 16(%rdi), %rax +; POSTRA-NEXT: movq (%rdi), %rcx +; POSTRA-NEXT: movq 24(%rdi), %rdx +; POSTRA-NEXT: movq 8(%rdi), %rsi +; POSTRA-NEXT: orq 56(%rdi), %rdx +; POSTRA-NEXT: orq 40(%rdi), %rsi +; POSTRA-NEXT: orq 48(%rdi), %rax +; POSTRA-NEXT: orq 32(%rdi), %rcx +; POSTRA-NEXT: orq %rdx, %rsi +; POSTRA-NEXT: orq %rax, %rcx +; POSTRA-NEXT: orq %rsi, %rcx +; POSTRA-NEXT: setne %al +; POSTRA-NEXT: retq +; +; NOPOSTRA-LABEL: PR166744: +; NOPOSTRA: # %bb.0: +; NOPOSTRA-NEXT: movl %esi, %eax +; NOPOSTRA-NEXT: shrl $3, %eax +; NOPOSTRA-NEXT: andl $60, %eax +; NOPOSTRA-NEXT: movl (%rdi,%rax), %ecx +; NOPOSTRA-NEXT: btrl %esi, %ecx +; NOPOSTRA-NEXT: shlxl %esi, %edx, %edx +; NOPOSTRA-NEXT: orl %ecx, %edx +; NOPOSTRA-NEXT: movl %edx, (%rdi,%rax) +; NOPOSTRA-NEXT: movq 16(%rdi), %rax +; NOPOSTRA-NEXT: movq (%rdi), %rcx +; NOPOSTRA-NEXT: movq 8(%rdi), %rdx +; NOPOSTRA-NEXT: movq 24(%rdi), %rsi +; NOPOSTRA-NEXT: orq 56(%rdi), %rsi +; NOPOSTRA-NEXT: orq 40(%rdi), %rdx +; NOPOSTRA-NEXT: orq 48(%rdi), %rax +; NOPOSTRA-NEXT: orq 32(%rdi), %rcx +; NOPOSTRA-NEXT: orq %rsi, %rdx +; NOPOSTRA-NEXT: orq %rax, %rcx +; NOPOSTRA-NEXT: orq %rdx, %rcx +; NOPOSTRA-NEXT: setne %al +; NOPOSTRA-NEXT: retq + %rem = and i64 %idx, 511 + %sh_prom = zext nneg i64 %rem to i512 + %shl = shl nuw i512 1, %sh_prom + %not = xor i512 %shl, -1 + %load = load i512, ptr %v, align 8 + %and = and i512 %load, %not + %conv2 = zext i1 %b to i512 + %shl4 = shl nuw i512 %conv2, %sh_prom + %or = or i512 %and, %shl4 + store i512 %or, ptr %v, align 8 + %cmp = icmp ne i512 %or, 0 + ret i1 %cmp +} diff --git a/llvm/test/Transforms/VectorCombine/AArch64/sve-interleave-splat.ll b/llvm/test/Transforms/VectorCombine/AArch64/sve-interleave-splat.ll new file mode 100644 index 0000000..921bcf0 --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/AArch64/sve-interleave-splat.ll @@ -0,0 +1,11 @@ +; RUN: opt -passes=vector-combine %s -S -o - | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +define <vscale x 4 x i16> @interleave2_same_const_splat_nxv4i16() { +;CHECK-LABEL: @interleave2_same_const_splat_nxv4i16( +;CHECK: call <vscale x 4 x i16> @llvm.vector.interleave2 +;CHECK: ret <vscale x 4 x i16> %retval + %retval = call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3)) + ret <vscale x 4 x i16> %retval +} |
