diff options
author | Philip Reames <preames@rivosinc.com> | 2025-07-18 10:33:59 -0700 |
---|---|---|
committer | Philip Reames <listmail@philipreames.com> | 2025-07-18 10:39:11 -0700 |
commit | 87c2adbb589d4cd0b6dfb374fce24d29c6bafac0 (patch) | |
tree | 6feeb04c2006db8b5b1c576c048b70c751a82f38 | |
parent | 7fd91bb6e89be39a130e04058a01d41ae5d600cb (diff) | |
download | llvm-87c2adbb589d4cd0b6dfb374fce24d29c6bafac0.zip llvm-87c2adbb589d4cd0b6dfb374fce24d29c6bafac0.tar.gz llvm-87c2adbb589d4cd0b6dfb374fce24d29c6bafac0.tar.bz2 |
[RISCV][IA] Precommit tests for deinterleaveN of masked.load
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll | 161 |
1 files changed, 161 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll index 9af92aa..578b67e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll @@ -538,3 +538,164 @@ define { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x %res7 = insertvalue { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } %res6, <vscale x 8 x i8> %t7, 7 ret { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } %res7 } + +define {<vscale x 16 x i8>, <vscale x 16 x i8>} @masked_load_factor2(ptr %p) { +; CHECK-LABEL: masked_load_factor2: +; CHECK: # %bb.0: +; CHECK-NEXT: vl4r.v v12, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v12, 0 +; CHECK-NEXT: vnsrl.wi v10, v12, 8 +; CHECK-NEXT: ret + %vec = call <vscale x 32 x i8> @llvm.masked.load(ptr %p, i32 4, <vscale x 32 x i1> splat (i1 true), <vscale x 32 x i8> poison) + %deinterleaved.results = call {<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> %vec) + ret {<vscale x 16 x i8>, <vscale x 16 x i8>} %deinterleaved.results +} + +define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @masked_loat_factor4(ptr %p) { +; CHECK-LABEL: masked_loat_factor4: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: vl4r.v v8, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs4r.v v8, (a0) +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vlseg4e8.v v8, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: .cfi_def_cfa sp, 16 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: ret + %vec = call <vscale x 32 x i8> @llvm.masked.load(ptr %p, i32 4, <vscale x 32 x i1> splat (i1 true), <vscale x 32 x i8> poison) + %deinterleaved.results = call {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @llvm.vector.deinterleave4.nxv32i8(<vscale x 32 x i8> %vec) + ret {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} %deinterleaved.results +} + +define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @masked_loat_factor4_mask(ptr %p, <vscale x 8 x i1> %mask) { +; CHECK-LABEL: masked_loat_factor4_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: add a3, a1, a2 +; CHECK-NEXT: vmv.v.v v9, v8 +; CHECK-NEXT: srli a4, a2, 2 +; CHECK-NEXT: vmv.v.v v10, v8 +; CHECK-NEXT: srli a5, a2, 3 +; CHECK-NEXT: vmv.v.v v11, v8 +; CHECK-NEXT: vsseg4e8.v v8, (a1) +; CHECK-NEXT: vl1r.v v8, (a1) +; CHECK-NEXT: add a1, a4, a5 +; CHECK-NEXT: vl1r.v v9, (a3) +; CHECK-NEXT: add a3, a3, a2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: vl1r.v v10, (a3) +; CHECK-NEXT: vl1r.v v11, (a2) +; CHECK-NEXT: vmsne.vi v9, v9, 0 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: vmsne.vi v8, v10, 0 +; CHECK-NEXT: vmsne.vi v10, v11, 0 +; CHECK-NEXT: vsetvli zero, a4, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vx v0, v9, a5 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vx v0, v8, a4 +; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v0, v10, a1 +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; CHECK-NEXT: vle8.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs4r.v v8, (a0) +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vlseg4e8.v v8, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: .cfi_def_cfa sp, 16 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: ret + %interleaved.mask = tail call <vscale x 32 x i1> @llvm.vector.interleave4.nxv32i1(<vscale x 8 x i1> %mask, <vscale x 8 x i1> %mask, <vscale x 8 x i1> %mask, <vscale x 8 x i1> %mask) + %vec = call <vscale x 32 x i8> @llvm.masked.load(ptr %p, i32 4, <vscale x 32 x i1> %interleaved.mask, <vscale x 32 x i8> poison) + %deinterleaved.results = call {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @llvm.vector.deinterleave4.nxv32i8(<vscale x 32 x i8> %vec) + ret {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} %deinterleaved.results +} + +; Negative test - some of the deinterleaved elements might come from the +; passthru not the load +define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @masked_loat_factor4_passthru(ptr %p, <vscale x 8 x i1> %mask, <vscale x 32 x i8> %passthru) { +; CHECK-LABEL: masked_loat_factor4_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 +; CHECK-NEXT: add a3, a1, a2 +; CHECK-NEXT: vmv.v.v v13, v12 +; CHECK-NEXT: srli a4, a2, 2 +; CHECK-NEXT: vmv.v.v v14, v12 +; CHECK-NEXT: srli a5, a2, 3 +; CHECK-NEXT: vmv.v.v v15, v12 +; CHECK-NEXT: vsseg4e8.v v12, (a1) +; CHECK-NEXT: vl1r.v v12, (a1) +; CHECK-NEXT: add a1, a4, a5 +; CHECK-NEXT: vl1r.v v13, (a3) +; CHECK-NEXT: add a3, a3, a2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: vl1r.v v14, (a3) +; CHECK-NEXT: vl1r.v v15, (a2) +; CHECK-NEXT: vmsne.vi v13, v13, 0 +; CHECK-NEXT: vmsne.vi v0, v12, 0 +; CHECK-NEXT: vmsne.vi v12, v14, 0 +; CHECK-NEXT: vmsne.vi v14, v15, 0 +; CHECK-NEXT: vsetvli zero, a4, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vx v0, v13, a5 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vx v0, v12, a4 +; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v0, v14, a1 +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, mu +; CHECK-NEXT: vle8.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs4r.v v8, (a0) +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vlseg4e8.v v8, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: .cfi_def_cfa sp, 16 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: ret + %interleaved.mask = tail call <vscale x 32 x i1> @llvm.vector.interleave4.nxv32i1(<vscale x 8 x i1> %mask, <vscale x 8 x i1> %mask, <vscale x 8 x i1> %mask, <vscale x 8 x i1> %mask) + %vec = call <vscale x 32 x i8> @llvm.masked.load(ptr %p, i32 4, <vscale x 32 x i1> %interleaved.mask, <vscale x 32 x i8> %passthru) + %deinterleaved.results = call {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @llvm.vector.deinterleave4.nxv32i8(<vscale x 32 x i8> %vec) + ret {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} %deinterleaved.results +} |