aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilip Reames <preames@rivosinc.com>2025-07-18 10:33:59 -0700
committerPhilip Reames <listmail@philipreames.com>2025-07-18 10:39:11 -0700
commit87c2adbb589d4cd0b6dfb374fce24d29c6bafac0 (patch)
tree6feeb04c2006db8b5b1c576c048b70c751a82f38
parent7fd91bb6e89be39a130e04058a01d41ae5d600cb (diff)
downloadllvm-87c2adbb589d4cd0b6dfb374fce24d29c6bafac0.zip
llvm-87c2adbb589d4cd0b6dfb374fce24d29c6bafac0.tar.gz
llvm-87c2adbb589d4cd0b6dfb374fce24d29c6bafac0.tar.bz2
[RISCV][IA] Precommit tests for deinterleaveN of masked.load
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll161
1 files changed, 161 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
index 9af92aa..578b67e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
@@ -538,3 +538,164 @@ define { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x
%res7 = insertvalue { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } %res6, <vscale x 8 x i8> %t7, 7
ret { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } %res7
}
+
+define {<vscale x 16 x i8>, <vscale x 16 x i8>} @masked_load_factor2(ptr %p) {
+; CHECK-LABEL: masked_load_factor2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl4r.v v12, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v12, 0
+; CHECK-NEXT: vnsrl.wi v10, v12, 8
+; CHECK-NEXT: ret
+ %vec = call <vscale x 32 x i8> @llvm.masked.load(ptr %p, i32 4, <vscale x 32 x i1> splat (i1 true), <vscale x 32 x i8> poison)
+ %deinterleaved.results = call {<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> %vec)
+ ret {<vscale x 16 x i8>, <vscale x 16 x i8>} %deinterleaved.results
+}
+
+define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @masked_loat_factor4(ptr %p) {
+; CHECK-LABEL: masked_loat_factor4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 2
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
+; CHECK-NEXT: vl4r.v v8, (a0)
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs4r.v v8, (a0)
+; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT: vlseg4e8.v v8, (a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: .cfi_def_cfa sp, 16
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: ret
+ %vec = call <vscale x 32 x i8> @llvm.masked.load(ptr %p, i32 4, <vscale x 32 x i1> splat (i1 true), <vscale x 32 x i8> poison)
+ %deinterleaved.results = call {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @llvm.vector.deinterleave4.nxv32i8(<vscale x 32 x i8> %vec)
+ ret {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} %deinterleaved.results
+}
+
+define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @masked_loat_factor4_mask(ptr %p, <vscale x 8 x i1> %mask) {
+; CHECK-LABEL: masked_loat_factor4_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT: add a3, a1, a2
+; CHECK-NEXT: vmv.v.v v9, v8
+; CHECK-NEXT: srli a4, a2, 2
+; CHECK-NEXT: vmv.v.v v10, v8
+; CHECK-NEXT: srli a5, a2, 3
+; CHECK-NEXT: vmv.v.v v11, v8
+; CHECK-NEXT: vsseg4e8.v v8, (a1)
+; CHECK-NEXT: vl1r.v v8, (a1)
+; CHECK-NEXT: add a1, a4, a5
+; CHECK-NEXT: vl1r.v v9, (a3)
+; CHECK-NEXT: add a3, a3, a2
+; CHECK-NEXT: add a2, a3, a2
+; CHECK-NEXT: vl1r.v v10, (a3)
+; CHECK-NEXT: vl1r.v v11, (a2)
+; CHECK-NEXT: vmsne.vi v9, v9, 0
+; CHECK-NEXT: vmsne.vi v0, v8, 0
+; CHECK-NEXT: vmsne.vi v8, v10, 0
+; CHECK-NEXT: vmsne.vi v10, v11, 0
+; CHECK-NEXT: vsetvli zero, a4, e8, mf2, tu, ma
+; CHECK-NEXT: vslideup.vx v0, v9, a5
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
+; CHECK-NEXT: vslideup.vx v0, v8, a4
+; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v0, v10, a1
+; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma
+; CHECK-NEXT: vle8.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vs4r.v v8, (a0)
+; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT: vlseg4e8.v v8, (a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: .cfi_def_cfa sp, 16
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: ret
+ %interleaved.mask = tail call <vscale x 32 x i1> @llvm.vector.interleave4.nxv32i1(<vscale x 8 x i1> %mask, <vscale x 8 x i1> %mask, <vscale x 8 x i1> %mask, <vscale x 8 x i1> %mask)
+ %vec = call <vscale x 32 x i8> @llvm.masked.load(ptr %p, i32 4, <vscale x 32 x i1> %interleaved.mask, <vscale x 32 x i8> poison)
+ %deinterleaved.results = call {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @llvm.vector.deinterleave4.nxv32i8(<vscale x 32 x i8> %vec)
+ ret {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} %deinterleaved.results
+}
+
+; Negative test - some of the deinterleaved elements might come from the
+; passthru not the load
+define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @masked_loat_factor4_passthru(ptr %p, <vscale x 8 x i1> %mask, <vscale x 32 x i8> %passthru) {
+; CHECK-LABEL: masked_loat_factor4_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v12, 0
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: vmerge.vim v12, v12, 1, v0
+; CHECK-NEXT: add a3, a1, a2
+; CHECK-NEXT: vmv.v.v v13, v12
+; CHECK-NEXT: srli a4, a2, 2
+; CHECK-NEXT: vmv.v.v v14, v12
+; CHECK-NEXT: srli a5, a2, 3
+; CHECK-NEXT: vmv.v.v v15, v12
+; CHECK-NEXT: vsseg4e8.v v12, (a1)
+; CHECK-NEXT: vl1r.v v12, (a1)
+; CHECK-NEXT: add a1, a4, a5
+; CHECK-NEXT: vl1r.v v13, (a3)
+; CHECK-NEXT: add a3, a3, a2
+; CHECK-NEXT: add a2, a3, a2
+; CHECK-NEXT: vl1r.v v14, (a3)
+; CHECK-NEXT: vl1r.v v15, (a2)
+; CHECK-NEXT: vmsne.vi v13, v13, 0
+; CHECK-NEXT: vmsne.vi v0, v12, 0
+; CHECK-NEXT: vmsne.vi v12, v14, 0
+; CHECK-NEXT: vmsne.vi v14, v15, 0
+; CHECK-NEXT: vsetvli zero, a4, e8, mf2, tu, ma
+; CHECK-NEXT: vslideup.vx v0, v13, a5
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
+; CHECK-NEXT: vslideup.vx v0, v12, a4
+; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v0, v14, a1
+; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, mu
+; CHECK-NEXT: vle8.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vs4r.v v8, (a0)
+; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT: vlseg4e8.v v8, (a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: .cfi_def_cfa sp, 16
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: ret
+ %interleaved.mask = tail call <vscale x 32 x i1> @llvm.vector.interleave4.nxv32i1(<vscale x 8 x i1> %mask, <vscale x 8 x i1> %mask, <vscale x 8 x i1> %mask, <vscale x 8 x i1> %mask)
+ %vec = call <vscale x 32 x i8> @llvm.masked.load(ptr %p, i32 4, <vscale x 32 x i1> %interleaved.mask, <vscale x 32 x i8> %passthru)
+ %deinterleaved.results = call {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @llvm.vector.deinterleave4.nxv32i8(<vscale x 32 x i8> %vec)
+ ret {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} %deinterleaved.results
+}