; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s -mtriple=riscv32 -mattr=+v,m -O2 | FileCheck -check-prefixes=CHECK,RV32 %s ; RUN: llc < %s -mtriple=riscv64 -mattr=+v,m -O2 | FileCheck -check-prefixes=CHECK,RV64 %s define {, } @load_factor2_v2(ptr %ptr, i32 zeroext %evl) { ; CHECK-LABEL: load_factor2_v2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret %rvl = mul nuw i32 %evl, 2 %wide.masked.load = call @llvm.vp.load.nxv4i32.p0(ptr %ptr, splat (i1 true), i32 %rvl) %deinterleaved.results = call { , } @llvm.vector.deinterleave2.nxv4i32( %wide.masked.load) %t0 = extractvalue { , } %deinterleaved.results, 0 %t1 = extractvalue { , } %deinterleaved.results, 1 %res0 = insertvalue { , } poison, %t0, 0 %res1 = insertvalue { , } %res0, %t1, 1 ret { , } %res1 } define {, , } @load_factor3_v2(ptr %ptr, i32 zeroext %evl) { ; CHECK-LABEL: load_factor3_v2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg3e32.v v8, (a0) ; CHECK-NEXT: ret %rvl = mul nuw i32 %evl, 3 %wide.masked.load = call @llvm.vp.load(ptr %ptr, splat (i1 true), i32 %rvl) %deinterleaved.results = call { , , } @llvm.vector.deinterleave3( %wide.masked.load) %t0 = extractvalue { , , } %deinterleaved.results, 0 %t1 = extractvalue { , , } %deinterleaved.results, 1 %t2 = extractvalue { , , } %deinterleaved.results, 2 %res0 = insertvalue { , , } poison, %t0, 0 %res1 = insertvalue { , , } %res0, %t1, 1 %res2 = insertvalue { , , } %res1, %t2, 2 ret { , , } %res1 } define {, } @load_factor3_partial(ptr %ptr, i32 zeroext %evl) { ; CHECK-LABEL: load_factor3_partial: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg3e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: ret %rvl = mul nuw i32 %evl, 3 %wide.masked.load = call @llvm.vp.load(ptr %ptr, splat (i1 true), i32 %rvl) %deinterleaved.results = call { , , } @llvm.vector.deinterleave3( %wide.masked.load) %t0 = extractvalue { , , } %deinterleaved.results, 0 %t2 = extractvalue { , , } %deinterleaved.results, 2 %res0 = insertvalue { , } poison, %t0, 0 %res1 = insertvalue { , } %res0, %t2, 1 ret { , } %res1 } ; InterleavedAccess should kick in even if the users of deinterleave intrinsic are not extractvalue. define {, } @load_factor3_no_extract(ptr %ptr, i32 zeroext %evl) { ; CHECK-LABEL: load_factor3_no_extract: ; CHECK: # %bb.0: ; CHECK-NEXT: li a2, 12 ; CHECK-NEXT: beq a1, a2, .LBB3_2 ; CHECK-NEXT: # %bb.1: # %bb0 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg3e32.v v7, (a0) ; CHECK-NEXT: j .LBB3_3 ; CHECK-NEXT: .LBB3_2: # %bb1 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vlseg3e32.v v7, (a0) ; CHECK-NEXT: .LBB3_3: # %merge ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: ret %p = icmp ne i32 %evl, 12 br i1 %p, label %bb0, label %bb1 bb0: %rvl.0 = mul nuw i32 %evl, 3 %wide.load.0 = call @llvm.vp.load(ptr %ptr, splat (i1 true), i32 %rvl.0) %deinterleaved.results.0 = call { , , } @llvm.vector.deinterleave3( %wide.load.0) br label %merge bb1: %wide.load.1 = call @llvm.vp.load(ptr %ptr, splat (i1 true), i32 12) %deinterleaved.results.1 = call { , , } @llvm.vector.deinterleave3( %wide.load.1) br label %merge merge: %deinterleaved.results = phi { , , } [%deinterleaved.results.0, %bb0], [%deinterleaved.results.1, %bb1] %t0 = extractvalue { , , } %deinterleaved.results, 0 %t2 = extractvalue { , , } %deinterleaved.results, 2 %res0 = insertvalue { , } poison, %t0, 0 %res1 = insertvalue { , } %res0, %t2, 1 ret { , } %res1 } define {, , , } @load_factor4_v2(ptr %ptr, i32 zeroext %evl) { ; CHECK-LABEL: load_factor4_v2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg4e32.v v8, (a0) ; CHECK-NEXT: ret %rvl = mul nuw i32 %evl, 4 %wide.masked.load = call @llvm.vp.load.nxv8i32.p0(ptr %ptr, splat (i1 true), i32 %rvl) %d = call { , , , } @llvm.vector.deinterleave4.nxv8i32( %wide.masked.load) %t0 = extractvalue { , , , } %d, 0 %t1 = extractvalue { , , , } %d, 1 %t2 = extractvalue { , , , } %d, 2 %t3 = extractvalue { , , , } %d, 3 %res0 = insertvalue { , , , } poison, %t0, 0 %res1 = insertvalue { , , , } %res0, %t1, 1 %res2 = insertvalue { , , , } %res1, %t2, 2 %res3 = insertvalue { , , , } %res2, %t3, 3 ret { , , , } %res3 } define {, , , , } @load_factor5_v2(ptr %ptr, i32 zeroext %evl) { ; CHECK-LABEL: load_factor5_v2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg5e32.v v8, (a0) ; CHECK-NEXT: ret %rvl = mul nuw i32 %evl, 5 %wide.masked.load = call @llvm.vp.load(ptr %ptr, splat (i1 true), i32 %rvl) %deinterleaved.results = call { , , , , } @llvm.vector.deinterleave5( %wide.masked.load) %t0 = extractvalue { , , , , } %deinterleaved.results, 0 %t1 = extractvalue { , , , , } %deinterleaved.results, 1 %t2 = extractvalue { , , , , } %deinterleaved.results, 2 %t3 = extractvalue { , , , , } %deinterleaved.results, 3 %t4 = extractvalue { , , , , } %deinterleaved.results, 4 %res0 = insertvalue { , , , , } poison, %t0, 0 %res1 = insertvalue { , , , , } %res0, %t1, 1 %res2 = insertvalue { , , , , } %res1, %t2, 2 %res3 = insertvalue { , , , , } %res2, %t3, 3 %res4 = insertvalue { , , , , } %res3, %t4, 4 ret { , , , , } %res4 } define {, , , , , , } @load_factor7_v2(ptr %ptr, i32 zeroext %evl) { ; CHECK-LABEL: load_factor7_v2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg7e32.v v8, (a0) ; CHECK-NEXT: ret %rvl = mul nuw i32 %evl, 7 %wide.masked.load = call @llvm.vp.load(ptr %ptr, splat (i1 true), i32 %rvl) %deinterleaved.results = call { , , , , , , } @llvm.vector.deinterleave7( %wide.masked.load) %t0 = extractvalue { , , , , , , } %deinterleaved.results, 0 %t1 = extractvalue { , , , , , , } %deinterleaved.results, 1 %t2 = extractvalue { , , , , , , } %deinterleaved.results, 2 %t3 = extractvalue { , , , , , , } %deinterleaved.results, 3 %t4 = extractvalue { , , , , , , } %deinterleaved.results, 4 %t5 = extractvalue { , , , , , , } %deinterleaved.results, 5 %t6 = extractvalue { , , , , , , } %deinterleaved.results, 6 %res0 = insertvalue { , , , , , , } poison, %t0, 0 %res1 = insertvalue { , , , , , , } %res0, %t1, 1 %res2 = insertvalue { , , , , , , } %res1, %t2, 2 %res3 = insertvalue { , , , , , , } %res2, %t3, 3 %res4 = insertvalue { , , , , , , } %res3, %t4, 4 %res5 = insertvalue { , , , , , , } %res4, %t5, 5 %res6 = insertvalue { , , , , , , } %res5, %t6, 6 ret { , , , , , , } %res6 } define {, , , , , , , } @load_factor8_v2(ptr %ptr, i32 zeroext %evl) { ; CHECK-LABEL: load_factor8_v2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg8e32.v v8, (a0) ; CHECK-NEXT: ret %rvl = mul nuw i32 %evl, 8 %wide.masked.load = call @llvm.vp.load.nxv16i32.p0(ptr %ptr, splat (i1 true), i32 %rvl) %d = call { , , , , , , , } @llvm.vector.deinterleave8.nxv16i32( %wide.masked.load) %t0 = extractvalue { , , , , , , , } %d, 0 %t1 = extractvalue { , , , , , , , } %d, 1 %t2 = extractvalue { , , , , , , , } %d, 2 %t3 = extractvalue { , , , , , , , } %d, 3 %t4 = extractvalue { , , , , , , , } %d, 4 %t5 = extractvalue { , , , , , , , } %d, 5 %t6 = extractvalue { , , , , , , , } %d, 6 %t7 = extractvalue { , , , , , , , } %d, 7 %res0 = insertvalue { , , , , , , , } poison, %t0, 0 %res1 = insertvalue { , , , , , , , } %res0, %t1, 1 %res2 = insertvalue { , , , , , , , } %res1, %t2, 2 %res3 = insertvalue { , , , , , , , } %res2, %t3, 3 %res4 = insertvalue { , , , , , , , } %res3, %t4, 4 %res5 = insertvalue { , , , , , , , } %res4, %t5, 5 %res6 = insertvalue { , , , , , , , } %res5, %t6, 6 %res7 = insertvalue { , , , , , , , } %res6, %t7, 7 ret { , , , , , , , } %res7 } define void @store_factor2_v2( %v0, %v1, ptr %ptr, i32 zeroext %evl) { ; CHECK-LABEL: store_factor2_v2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vsseg2e32.v v8, (a0) ; CHECK-NEXT: ret %rvl = mul nuw i32 %evl, 2 %interleaved.vec = call @llvm.vector.interleave2.nxv2i32( %v0, %v1) call void @llvm.vp.store.nxv2i32.p0( %interleaved.vec, ptr %ptr, splat (i1 true), i32 %rvl) ret void } define void @store_factor3_v2( %v0, %v1, %v2, ptr %ptr, i32 zeroext %evl) { ; CHECK-LABEL: store_factor3_v2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vsseg3e32.v v8, (a0) ; CHECK-NEXT: ret %rvl = mul nuw i32 %evl, 3 %interleaved.vec = call @llvm.vector.interleave3( %v0, %v1, %v2) call void @llvm.vp.store( %interleaved.vec, ptr %ptr, splat (i1 true), i32 %rvl) ret void } define void @store_factor4_v2( %v0, %v1, ptr %ptr, i32 zeroext %evl) { ; RV32-LABEL: store_factor4_v2: ; RV32: # %bb.0: ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: srli a1, a1, 2 ; RV32-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; RV32-NEXT: vmv1r.v v10, v8 ; RV32-NEXT: vmv1r.v v11, v9 ; RV32-NEXT: vsseg4e32.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: store_factor4_v2: ; RV64: # %bb.0: ; RV64-NEXT: slli a1, a1, 35 ; RV64-NEXT: srli a1, a1, 34 ; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; RV64-NEXT: vmv1r.v v10, v8 ; RV64-NEXT: vmv1r.v v11, v9 ; RV64-NEXT: vsseg4e32.v v8, (a0) ; RV64-NEXT: ret %rvl = mul nuw i32 %evl, 8 %interleaved.vec = call @llvm.vector.interleave4.nxv4i32( %v0, %v1, %v0, %v1) call void @llvm.vp.store.nxv4i32.p0( %interleaved.vec, ptr %ptr, splat (i1 true), i32 %rvl) ret void } define void @store_factor5_v2( %v0, %v1, %v2, %v3, %v4, ptr %ptr, i32 zeroext %evl) { ; CHECK-LABEL: store_factor5_v2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vsseg5e32.v v8, (a0) ; CHECK-NEXT: ret %rvl = mul nuw i32 %evl, 5 %interleaved.vec = call @llvm.vector.interleave5( %v0, %v1, %v2, %v3, %v4) call void @llvm.vp.store( %interleaved.vec, ptr %ptr, splat (i1 true), i32 %rvl) ret void } define void @store_factor7_v2( %v0, %v1, %v2, %v3, %v4, %v5, %v6, ptr %ptr, i32 zeroext %evl) { ; CHECK-LABEL: store_factor7_v2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vsseg7e32.v v8, (a0) ; CHECK-NEXT: ret %rvl = mul nuw i32 %evl, 7 %interleaved.vec = call @llvm.vector.interleave7( %v0, %v1, %v2, %v3, %v4, %v5, %v6) call void @llvm.vp.store( %interleaved.vec, ptr %ptr, splat (i1 true), i32 %rvl) ret void } define void @store_factor8_v2( %v0, %v1, ptr %ptr, i32 zeroext %evl) { ; CHECK-LABEL: store_factor8_v2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vmv1r.v v11, v9 ; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vmv1r.v v13, v9 ; CHECK-NEXT: vmv1r.v v14, v8 ; CHECK-NEXT: vmv1r.v v15, v9 ; CHECK-NEXT: vsseg8e32.v v8, (a0) ; CHECK-NEXT: ret %rvl = mul nuw i32 %evl, 8 %interleaved.vec = call @llvm.vector.interleave8.nxv8i32( %v0, %v1, %v0, %v1, %v0, %v1, %v0, %v1) call void @llvm.vp.store.nxv8i32.p0( %interleaved.vec, ptr %ptr, splat (i1 true), i32 %rvl) ret void } define {, } @masked_load_factor2_v2( %mask, ptr %ptr, i32 zeroext %evl) { ; CHECK-LABEL: masked_load_factor2_v2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret %rvl = mul nuw i32 %evl, 2 %interleaved.mask = tail call @llvm.vector.interleave2.nxv4i1( %mask, %mask) %wide.masked.load = tail call @llvm.vp.load.nxv4i32.p0(ptr %ptr, %interleaved.mask, i32 %rvl) %deinterleaved.results = tail call { , } @llvm.vector.deinterleave2.nxv16i32( %wide.masked.load) %t0 = extractvalue { , } %deinterleaved.results, 0 %t1 = extractvalue { , } %deinterleaved.results, 1 %res0 = insertvalue { , } poison, %t0, 0 %res1 = insertvalue { , } %res0, %t1, 1 ret { , } %res1 } define {, , , } @masked_load_factor4_v2( %mask, ptr %ptr, i32 zeroext %evl) { ; CHECK-LABEL: masked_load_factor4_v2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t ; CHECK-NEXT: ret %rvl = mul nuw i32 %evl, 4 %interleaved.mask = call @llvm.vector.interleave4.nxv8i1( %mask, %mask, %mask, %mask) %wide.masked.load = call @llvm.vp.load.nxv8i32.p0(ptr %ptr, %interleaved.mask, i32 %rvl) %d = call { , , , } @llvm.vector.deinterleave4.nxv8i32( %wide.masked.load) %t0 = extractvalue { , , , } %d, 0 %t1 = extractvalue { , , , } %d, 1 %t2 = extractvalue { , , , } %d, 2 %t3 = extractvalue { , , , } %d, 3 %res0 = insertvalue { , , , } poison, %t0, 0 %res1 = insertvalue { , , , } %res0, %t1, 1 %res2 = insertvalue { , , , } %res1, %t2, 2 %res3 = insertvalue { , , , } %res2, %t3, 3 ret { , , , } %res3 } define void @masked_store_factor2_v2( %mask, %v0, %v1, ptr %ptr, i32 zeroext %evl) { ; CHECK-LABEL: masked_store_factor2_v2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret %rvl = mul nuw i32 %evl, 2 %interleaved.mask = tail call @llvm.vector.interleave2.nxv2i1( %mask, %mask) %interleaved.vec = tail call @llvm.vector.interleave2.nxv2i32( %v0, %v0) tail call void @llvm.vp.store.nxv2i32.p0( %interleaved.vec, ptr %ptr, %interleaved.mask, i32 %rvl) ret void } define void @masked_load_store_factor2_v2_shared_mask( %mask, ptr %ptr, i32 zeroext %evl) { ; CHECK-LABEL: masked_load_store_factor2_v2_shared_mask: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: vsseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret %rvl = mul nuw i32 %evl, 2 %interleaved.mask = tail call @llvm.vector.interleave2.nxv4i1( %mask, %mask) %wide.masked.load = tail call @llvm.vp.load.nxv4i32.p0(ptr %ptr, %interleaved.mask, i32 %rvl) %deinterleaved.results = tail call { , } @llvm.vector.deinterleave2.nxv16i32( %wide.masked.load) %t0 = extractvalue { , } %deinterleaved.results, 0 %t1 = extractvalue { , } %deinterleaved.results, 1 %interleaved.vec = tail call @llvm.vector.interleave2.nxv4i32( %t0, %t1) tail call void @llvm.vp.store.nxv4i32.p0( %interleaved.vec, ptr %ptr, %interleaved.mask, i32 %rvl) ret void } define i32 @masked_load_store_factor2_v2_shared_mask_extract( %mask, ptr %ptr, i32 zeroext %evl) { ; RV32-LABEL: masked_load_store_factor2_v2_shared_mask_extract: ; RV32: # %bb.0: ; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; RV32-NEXT: vmv1r.v v8, v0 ; RV32-NEXT: slli a2, a1, 1 ; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: li a3, -1 ; RV32-NEXT: vmerge.vim v10, v9, 1, v0 ; RV32-NEXT: vwaddu.vv v11, v10, v10 ; RV32-NEXT: vwmaccu.vx v11, a3, v10 ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma ; RV32-NEXT: vmv.v.i v10, 0 ; RV32-NEXT: srli a3, a3, 2 ; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma ; RV32-NEXT: vmsne.vi v0, v11, 0 ; RV32-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; RV32-NEXT: vslidedown.vx v11, v11, a3 ; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma ; RV32-NEXT: vmerge.vim v10, v10, 1, v0 ; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma ; RV32-NEXT: vmsne.vi v0, v11, 0 ; RV32-NEXT: vmerge.vim v9, v9, 1, v0 ; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma ; RV32-NEXT: vslideup.vx v10, v9, a3 ; RV32-NEXT: vmsne.vi v0, v10, 0 ; RV32-NEXT: vle32.v v10, (a0), v0.t ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; RV32-NEXT: vnsrl.wx v13, v10, a2 ; RV32-NEXT: vnsrl.wi v12, v10, 0 ; RV32-NEXT: vmv.x.s a1, v10 ; RV32-NEXT: vmv1r.v v0, v8 ; RV32-NEXT: vsseg2e32.v v12, (a0), v0.t ; RV32-NEXT: mv a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: masked_load_store_factor2_v2_shared_mask_extract: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; RV64-NEXT: vmv1r.v v8, v0 ; RV64-NEXT: vmv.v.i v9, 0 ; RV64-NEXT: li a2, -1 ; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; RV64-NEXT: vmv.v.i v10, 0 ; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV64-NEXT: vmerge.vim v11, v9, 1, v0 ; RV64-NEXT: vwaddu.vv v12, v11, v11 ; RV64-NEXT: vwmaccu.vx v12, a2, v11 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: srli a2, a2, 2 ; RV64-NEXT: vmsne.vi v0, v12, 0 ; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; RV64-NEXT: vslidedown.vx v11, v12, a2 ; RV64-NEXT: vmerge.vim v10, v10, 1, v0 ; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV64-NEXT: vmsne.vi v0, v11, 0 ; RV64-NEXT: vmerge.vim v9, v9, 1, v0 ; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; RV64-NEXT: vslideup.vx v10, v9, a2 ; RV64-NEXT: slli a2, a1, 33 ; RV64-NEXT: vmsne.vi v0, v10, 0 ; RV64-NEXT: srli a2, a2, 32 ; RV64-NEXT: vsetvli zero, a2, e32, m2, ta, ma ; RV64-NEXT: vle32.v v10, (a0), v0.t ; RV64-NEXT: li a2, 32 ; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; RV64-NEXT: vnsrl.wx v13, v10, a2 ; RV64-NEXT: vnsrl.wi v12, v10, 0 ; RV64-NEXT: vmv.x.s a1, v10 ; RV64-NEXT: vmv1r.v v0, v8 ; RV64-NEXT: vsseg2e32.v v12, (a0), v0.t ; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %rvl = mul nuw i32 %evl, 2 %interleaved.mask = tail call @llvm.vector.interleave2.nxv4i1( %mask, %mask) %wide.masked.load = tail call @llvm.vp.load.nxv4i32.p0(ptr %ptr, %interleaved.mask, i32 %rvl) %deinterleaved.results = tail call { , } @llvm.vector.deinterleave2.nxv16i32( %wide.masked.load) %t0 = extractvalue { , } %deinterleaved.results, 0 %t1 = extractvalue { , } %deinterleaved.results, 1 %r0 = extractelement %wide.masked.load, i32 0 %interleaved.vec = tail call @llvm.vector.interleave2.nxv4i32( %t0, %t1) tail call void @llvm.vp.store.nxv4i32.p0( %interleaved.vec, ptr %ptr, %interleaved.mask, i32 %rvl) ret i32 %r0 } define void @masked_store_factor4_v2( %mask, %v0, %v1, ptr %ptr, i32 zeroext %evl) { ; CHECK-LABEL: masked_store_factor4_v2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vmv1r.v v11, v9 ; CHECK-NEXT: vsseg4e32.v v8, (a0), v0.t ; CHECK-NEXT: ret %rvl = mul nuw i32 %evl, 4 %interleaved.mask = call @llvm.vector.interleave4.nxv4i1( %mask, %mask, %mask, %mask) %interleaved.vec = call @llvm.vector.interleave4.nxv2i32( %v0, %v1, %v0, %v1) call void @llvm.vp.store.nxv4i32.p0( %interleaved.vec, ptr %ptr, %interleaved.mask, i32 %rvl) ret void } define @load_factor2_oneactive(ptr %ptr, i32 zeroext %evl) { ; RV32-LABEL: load_factor2_oneactive: ; RV32: # %bb.0: ; RV32-NEXT: slli a1, a1, 2 ; RV32-NEXT: srli a1, a1, 1 ; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; RV32-NEXT: vlseg2e32.v v7, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: load_factor2_oneactive: ; RV64: # %bb.0: ; RV64-NEXT: slli a1, a1, 34 ; RV64-NEXT: srli a1, a1, 33 ; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; RV64-NEXT: vlseg2e32.v v7, (a0) ; RV64-NEXT: ret %rvl = mul nuw i32 %evl, 4 %wide.masked.load = call @llvm.vp.load.nxv4i32.p0(ptr %ptr, splat (i1 true), i32 %rvl) %deinterleaved.results = call { , } @llvm.vector.deinterleave2.nxv4i32( %wide.masked.load) %t0 = extractvalue { , } %deinterleaved.results, 1 ret %t0 } define @load_factor5_oneactive(ptr %ptr, i32 zeroext %evl) { ; CHECK-LABEL: load_factor5_oneactive: ; CHECK: # %bb.0: ; CHECK-NEXT: addi a0, a0, 12 ; CHECK-NEXT: li a2, 20 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlse32.v v8, (a0), a2 ; CHECK-NEXT: ret %rvl = mul nuw i32 %evl, 5 %wide.masked.load = call @llvm.vp.load(ptr %ptr, splat (i1 true), i32 %rvl) %deinterleaved.results = call { , , , , } @llvm.vector.deinterleave5( %wide.masked.load) %t3 = extractvalue { , , , , } %deinterleaved.results, 3 ret %t3 } ; Negative tests define {, } @not_same_mask( %mask0, %mask1, ptr %ptr, i32 zeroext %evl) { ; RV32-LABEL: not_same_mask: ; RV32: # %bb.0: ; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; RV32-NEXT: vmv1r.v v9, v0 ; RV32-NEXT: vmv1r.v v0, v8 ; RV32-NEXT: slli a1, a1, 1 ; RV32-NEXT: vmv.v.i v8, 0 ; RV32-NEXT: li a2, -1 ; RV32-NEXT: vmerge.vim v10, v8, 1, v0 ; RV32-NEXT: vmv1r.v v0, v9 ; RV32-NEXT: vmerge.vim v9, v8, 1, v0 ; RV32-NEXT: vwaddu.vv v11, v9, v10 ; RV32-NEXT: vwmaccu.vx v11, a2, v10 ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: srli a2, a2, 2 ; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV32-NEXT: vmsne.vi v0, v11, 0 ; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; RV32-NEXT: vslidedown.vx v10, v11, a2 ; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; RV32-NEXT: vmerge.vim v9, v9, 1, v0 ; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV32-NEXT: vmsne.vi v0, v10, 0 ; RV32-NEXT: vmerge.vim v8, v8, 1, v0 ; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; RV32-NEXT: vslideup.vx v9, v8, a2 ; RV32-NEXT: vmsne.vi v0, v9, 0 ; RV32-NEXT: vle32.v v10, (a0), v0.t ; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; RV32-NEXT: vnsrl.wx v9, v10, a0 ; RV32-NEXT: vnsrl.wi v8, v10, 0 ; RV32-NEXT: ret ; ; RV64-LABEL: not_same_mask: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; RV64-NEXT: vmv1r.v v9, v0 ; RV64-NEXT: vmv1r.v v0, v8 ; RV64-NEXT: vmv.v.i v8, 0 ; RV64-NEXT: li a2, -1 ; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; RV64-NEXT: vmv.v.i v10, 0 ; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV64-NEXT: vmerge.vim v11, v8, 1, v0 ; RV64-NEXT: vmv1r.v v0, v9 ; RV64-NEXT: vmerge.vim v9, v8, 1, v0 ; RV64-NEXT: vwaddu.vv v12, v9, v11 ; RV64-NEXT: vwmaccu.vx v12, a2, v11 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: srli a2, a2, 2 ; RV64-NEXT: vmsne.vi v0, v12, 0 ; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; RV64-NEXT: vslidedown.vx v9, v12, a2 ; RV64-NEXT: vmerge.vim v10, v10, 1, v0 ; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV64-NEXT: vmsne.vi v0, v9, 0 ; RV64-NEXT: slli a1, a1, 33 ; RV64-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; RV64-NEXT: vslideup.vx v10, v8, a2 ; RV64-NEXT: vmsne.vi v0, v10, 0 ; RV64-NEXT: srli a1, a1, 32 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; RV64-NEXT: vle32.v v10, (a0), v0.t ; RV64-NEXT: li a0, 32 ; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; RV64-NEXT: vnsrl.wx v9, v10, a0 ; RV64-NEXT: vnsrl.wi v8, v10, 0 ; RV64-NEXT: ret %rvl = mul nuw i32 %evl, 2 %interleaved.mask = tail call @llvm.vector.interleave2.nxv4i1( %mask0, %mask1) %wide.masked.load = tail call @llvm.vp.load.nxv4i32.p0(ptr %ptr, %interleaved.mask, i32 %rvl) %deinterleaved.results = tail call { , } @llvm.vector.deinterleave2.nxv16i32( %wide.masked.load) %t0 = extractvalue { , } %deinterleaved.results, 0 %t1 = extractvalue { , } %deinterleaved.results, 1 %res0 = insertvalue { , } poison, %t0, 0 %res1 = insertvalue { , } %res0, %t1, 1 ret { , } %res1 } ; EVL should be a multiple of factor define {, , , } @invalid_evl(ptr %ptr, i32 zeroext %evl) nounwind { ; CHECK-LABEL: invalid_evl: ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 2 ; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: ori a1, a1, 1 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs4r.v v8, (a0) ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vlseg4e32.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %rvl = or i32 %evl, 1 %wide.masked.load = call @llvm.vp.load.nxv8i32.p0(ptr %ptr, splat (i1 true), i32 %rvl) %d = call { , , , } @llvm.vector.deinterleave4.nxv8i32( %wide.masked.load) %t0 = extractvalue { , , , } %d, 0 %t1 = extractvalue { , , , } %d, 1 %t2 = extractvalue { , , , } %d, 2 %t3 = extractvalue { , , , } %d, 3 %res0 = insertvalue { , , , } poison, %t0, 0 %res1 = insertvalue { , , , } %res0, %t1, 1 %res2 = insertvalue { , , , } %res1, %t2, 2 %res3 = insertvalue { , , , } %res2, %t3, 3 ret { , , , } %res3 }