; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=riscv32 -mattr=+v,m -O2 | FileCheck -check-prefixes=CHECK,RV32 %s ; RUN: llc < %s -mtriple=riscv64 -mattr=+v,m -O2 | FileCheck -check-prefixes=CHECK,RV64 %s ; ------------------------------------------------------------------------------ ; Loads ; ------------------------------------------------------------------------------ ; FIXME: This should be widened to a vlseg2 of <4 x i32> with VL set to 3 define {<3 x i32>, <3 x i32>} @load_factor2_v3(ptr %ptr) { ; RV32-LABEL: load_factor2_v3: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: vle32.v v10, (a0) ; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vnsrl.wi v8, v10, 0 ; RV32-NEXT: vnsrl.wx v9, v10, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: load_factor2_v3: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: vle32.v v10, (a0) ; RV64-NEXT: li a0, 32 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vnsrl.wx v9, v10, a0 ; RV64-NEXT: vnsrl.wi v8, v10, 0 ; RV64-NEXT: ret %interleaved.vec = load <6 x i32>, ptr %ptr %v0 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> poison, <3 x i32> %v1 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> poison, <3 x i32> %res0 = insertvalue {<3 x i32>, <3 x i32>} undef, <3 x i32> %v0, 0 %res1 = insertvalue {<3 x i32>, <3 x i32>} %res0, <3 x i32> %v1, 1 ret {<3 x i32>, <3 x i32>} %res1 } define {<4 x i32>, <4 x i32>} @load_factor2(ptr %ptr) { ; CHECK-LABEL: load_factor2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret %interleaved.vec = load <8 x i32>, ptr %ptr %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> %v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> %res0 = insertvalue {<4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0 %res1 = insertvalue {<4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1 ret {<4 x i32>, <4 x i32>} %res1 } define {<4 x i32>, <4 x i32>, <4 x i32>} @load_factor3(ptr %ptr) { ; CHECK-LABEL: load_factor3: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vlseg3e32.v v8, (a0) ; CHECK-NEXT: ret %interleaved.vec = load <12 x i32>, ptr %ptr %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> %v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> %res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0 %res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1 %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2 ret {<4 x i32>, <4 x i32>, <4 x i32>} %res2 } define {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} @load_factor4(ptr %ptr) { ; CHECK-LABEL: load_factor4: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vlseg4e32.v v8, (a0) ; CHECK-NEXT: ret %interleaved.vec = load <16 x i32>, ptr %ptr %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> %v2 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> %v3 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> %res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0 %res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1 %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2 %res3 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res2, <4 x i32> %v3, 3 ret {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res3 } define {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} @load_factor5(ptr %ptr) { ; CHECK-LABEL: load_factor5: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vlseg5e32.v v8, (a0) ; CHECK-NEXT: ret %interleaved.vec = load <20 x i32>, ptr %ptr %v0 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> %v1 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> %v2 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> %v3 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> %v4 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> %res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0 %res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1 %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2 %res3 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res2, <4 x i32> %v3, 3 %res4 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res3, <4 x i32> %v4, 4 ret {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res4 } define {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} @load_factor6(ptr %ptr) { ; CHECK-LABEL: load_factor6: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret %interleaved.vec = load <12 x i16>, ptr %ptr %v0 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> %v1 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> %v2 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> %v3 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> %v4 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> %v5 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> %res0 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} undef, <2 x i16> %v0, 0 %res1 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res0, <2 x i16> %v1, 1 %res2 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res1, <2 x i16> %v2, 2 %res3 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res2, <2 x i16> %v3, 3 %res4 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res3, <2 x i16> %v4, 4 %res5 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res4, <2 x i16> %v5, 5 ret {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res5 } define {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} @load_factor7(ptr %ptr) { ; CHECK-LABEL: load_factor7: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret %interleaved.vec = load <14 x i16>, ptr %ptr %v0 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> %v1 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> %v2 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> %v3 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> %v4 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> %v5 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> %v6 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> %res0 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} undef, <2 x i16> %v0, 0 %res1 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res0, <2 x i16> %v1, 1 %res2 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res1, <2 x i16> %v2, 2 %res3 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res2, <2 x i16> %v3, 3 %res4 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res3, <2 x i16> %v4, 4 %res5 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res4, <2 x i16> %v5, 5 %res6 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res5, <2 x i16> %v6, 6 ret {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res6 } define {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} @load_factor8(ptr %ptr) { ; CHECK-LABEL: load_factor8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret %interleaved.vec = load <16 x i16>, ptr %ptr %v0 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> %v1 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> %v2 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> %v3 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> %v4 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> %v5 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> %v6 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> %v7 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> %res0 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} undef, <2 x i16> %v0, 0 %res1 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res0, <2 x i16> %v1, 1 %res2 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res1, <2 x i16> %v2, 2 %res3 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res2, <2 x i16> %v3, 3 %res4 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res3, <2 x i16> %v4, 4 %res5 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res4, <2 x i16> %v5, 5 %res6 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res5, <2 x i16> %v6, 6 %res7 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res6, <2 x i16> %v7, 7 ret {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res7 } define {<4 x i32>, <4 x i32>} @vpload_factor2(ptr %ptr) { ; CHECK-LABEL: vpload_factor2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret %interleaved.vec = tail call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 8) %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> %v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> %res0 = insertvalue {<4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0 %res1 = insertvalue {<4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1 ret {<4 x i32>, <4 x i32>} %res1 } define {<4 x i32>, <4 x i32>} @vpload_factor2_interleaved_mask_intrinsic(ptr %ptr, <4 x i1> %m) { ; CHECK-LABEL: vpload_factor2_interleaved_mask_intrinsic: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret %interleaved.mask = call <8 x i1> @llvm.vector.interleave2(<4 x i1> %m, <4 x i1> %m) %interleaved.vec = tail call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %ptr, <8 x i1> %interleaved.mask, i32 8) %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> %v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> %res0 = insertvalue {<4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0 %res1 = insertvalue {<4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1 ret {<4 x i32>, <4 x i32>} %res1 } define {<4 x i32>, <4 x i32>} @vpload_factor2_interleaved_mask_shuffle(ptr %ptr, <4 x i1> %m) { ; CHECK-LABEL: vpload_factor2_interleaved_mask_shuffle: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret %interleaved.mask = shufflevector <4 x i1> %m, <4 x i1> poison, <8 x i32> %interleaved.vec = tail call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %ptr, <8 x i1> %interleaved.mask, i32 8) %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> %v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> %res0 = insertvalue {<4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0 %res1 = insertvalue {<4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1 ret {<4 x i32>, <4 x i32>} %res1 } define {<4 x i32>, <4 x i32>} @vpload_factor2_interleaved_mask_shuffle2(ptr %ptr, <2 x i1> %m) { ; CHECK-LABEL: vpload_factor2_interleaved_mask_shuffle2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: li a1, -1 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vwaddu.vv v9, v8, v8 ; CHECK-NEXT: vwmaccu.vx v9, a1, v8 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vmsne.vi v0, v9, 0 ; CHECK-NEXT: vle32.v v10, (a0), v0.t ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-NEXT: vnsrl.wx v9, v10, a0 ; CHECK-NEXT: ret %interleaved.mask = shufflevector <2 x i1> %m, <2 x i1> poison, <8 x i32> %interleaved.vec = tail call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %ptr, <8 x i1> %interleaved.mask, i32 4) %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> %v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> %res0 = insertvalue {<4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0 %res1 = insertvalue {<4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1 ret {<4 x i32>, <4 x i32>} %res1 } define {<4 x i32>, <4 x i32>, <4 x i32>} @vpload_factor3(ptr %ptr) { ; CHECK-LABEL: vpload_factor3: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vlseg3e32.v v8, (a0) ; CHECK-NEXT: ret %interleaved.vec = tail call <12 x i32> @llvm.vp.load.v12i32.p0(ptr %ptr, <12 x i1> splat (i1 true), i32 12) %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> %v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> %res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0 %res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1 %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2 ret {<4 x i32>, <4 x i32>, <4 x i32>} %res2 } ; We only extract some of the fields. define {<4 x i32>, <4 x i32>} @vpload_factor3_partial(ptr %ptr) { ; CHECK-LABEL: vpload_factor3_partial: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vlseg3e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: ret %interleaved.vec = tail call <12 x i32> @llvm.vp.load.v12i32.p0(ptr %ptr, <12 x i1> splat (i1 true), i32 12) %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> %res0 = insertvalue {<4 x i32>, <4 x i32>} poison, <4 x i32> %v0, 0 %res1 = insertvalue {<4 x i32>, <4 x i32>} %res0, <4 x i32> %v2, 1 ret {<4 x i32>, <4 x i32>} %res1 } ; Load a larger vector but only deinterleave a subset of the elements. define {<4 x i32>, <4 x i32>, <4 x i32>} @vpload_factor3_v16i32(ptr %ptr) { ; CHECK-LABEL: vpload_factor3_v16i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vlseg3e32.v v8, (a0) ; CHECK-NEXT: ret %interleaved.vec = tail call <16 x i32> @llvm.vp.load.v16i32.p0(ptr %ptr, <16 x i1> , i32 12) %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> %v2 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> %res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0 %res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1 %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2 ret {<4 x i32>, <4 x i32>, <4 x i32>} %res2 } ; Make sure the mask is propagated. define {<4 x i32>, <4 x i32>, <4 x i32>} @vpload_factor3_mask(ptr %ptr) { ; CHECK-LABEL: vpload_factor3_mask: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v0, 10 ; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t ; CHECK-NEXT: ret %interleaved.vec = tail call <12 x i32> @llvm.vp.load.v12i32.p0(ptr %ptr, <12 x i1> , i32 12) %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> %v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> %res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0 %res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1 %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2 ret {<4 x i32>, <4 x i32>, <4 x i32>} %res2 } ; Poison/undef in the shuffle mask shouldn't affect anything. define {<4 x i32>, <4 x i32>, <4 x i32>} @vpload_factor3_poison_shufflemask(ptr %ptr) { ; CHECK-LABEL: vpload_factor3_poison_shufflemask: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v0, 10 ; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t ; CHECK-NEXT: ret %interleaved.vec = tail call <12 x i32> @llvm.vp.load.v12i32.p0(ptr %ptr, <12 x i1> , i32 12) %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> %v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> %res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0 %res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1 %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2 ret {<4 x i32>, <4 x i32>, <4 x i32>} %res2 } define {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} @vpload_factor4(ptr %ptr) { ; CHECK-LABEL: vpload_factor4: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vlseg4e32.v v8, (a0) ; CHECK-NEXT: ret %interleaved.vec = tail call <16 x i32> @llvm.vp.load.v16i32.p0(ptr %ptr, <16 x i1> splat (i1 true), i32 16) %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> %v2 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> %v3 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> %res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0 %res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1 %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2 %res3 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res2, <4 x i32> %v3, 3 ret {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res3 } ; TODO: Add more tests for vp.load/store + (de)interleave intrinsics with fixed vectors. define {<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>} @vpload_factor4_intrinsics(ptr %ptr) { ; CHECK-LABEL: vpload_factor4_intrinsics: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vlseg4e32.v v8, (a0) ; CHECK-NEXT: ret %wide.masked.load = call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 8) %d = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.vector.deinterleave4.v8i32(<8 x i32> %wide.masked.load) %t0 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %d, 0 %t1 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %d, 1 %t2 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %d, 2 %t3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %d, 3 %res0 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } poison, <2 x i32> %t0, 0 %res1 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res0, <2 x i32> %t1, 1 %res2 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res1, <2 x i32> %t2, 2 %res3 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res2, <2 x i32> %t3, 3 ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res3 } define {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} @vpload_factor5(ptr %ptr) { ; CHECK-LABEL: vpload_factor5: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vlseg5e32.v v8, (a0) ; CHECK-NEXT: ret %interleaved.vec = tail call <20 x i32> @llvm.vp.load.v20i32.p0(ptr %ptr, <20 x i1> splat (i1 true), i32 20) %v0 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> %v1 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> %v2 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> %v3 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> %v4 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> %res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0 %res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1 %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2 %res3 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res2, <4 x i32> %v3, 3 %res4 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res3, <4 x i32> %v4, 4 ret {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res4 } define {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} @vpload_factor6(ptr %ptr) { ; CHECK-LABEL: vpload_factor6: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret %interleaved.vec = tail call <12 x i16> @llvm.vp.load.v12i16.p0(ptr %ptr, <12 x i1> splat (i1 true), i32 12) %v0 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> %v1 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> %v2 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> %v3 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> %v4 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> %v5 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> %res0 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} undef, <2 x i16> %v0, 0 %res1 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res0, <2 x i16> %v1, 1 %res2 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res1, <2 x i16> %v2, 2 %res3 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res2, <2 x i16> %v3, 3 %res4 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res3, <2 x i16> %v4, 4 %res5 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res4, <2 x i16> %v5, 5 ret {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res5 } define {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} @vpload_factor7(ptr %ptr) { ; CHECK-LABEL: vpload_factor7: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret %interleaved.vec = tail call <14 x i16> @llvm.vp.load.v14i16.p0(ptr %ptr, <14 x i1> splat (i1 true), i32 14) %v0 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> %v1 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> %v2 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> %v3 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> %v4 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> %v5 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> %v6 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> %res0 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} undef, <2 x i16> %v0, 0 %res1 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res0, <2 x i16> %v1, 1 %res2 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res1, <2 x i16> %v2, 2 %res3 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res2, <2 x i16> %v3, 3 %res4 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res3, <2 x i16> %v4, 4 %res5 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res4, <2 x i16> %v5, 5 %res6 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res5, <2 x i16> %v6, 6 ret {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res6 } define {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} @vpload_factor8(ptr %ptr) { ; CHECK-LABEL: vpload_factor8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret %interleaved.vec = tail call <16 x i16> @llvm.vp.load.v16i16.p0(ptr %ptr, <16 x i1> splat (i1 true), i32 16) %v0 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> %v1 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> %v2 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> %v3 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> %v4 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> %v5 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> %v6 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> %v7 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> %res0 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} undef, <2 x i16> %v0, 0 %res1 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res0, <2 x i16> %v1, 1 %res2 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res1, <2 x i16> %v2, 2 %res3 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res2, <2 x i16> %v3, 3 %res4 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res3, <2 x i16> %v4, 4 %res5 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res4, <2 x i16> %v5, 5 %res6 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res5, <2 x i16> %v6, 6 %res7 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res6, <2 x i16> %v7, 7 ret {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res7 } ; LMUL * NF is > 8 here and so shouldn't be lowered to a vlseg define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_factor6_too_big(ptr %ptr) { ; RV32-LABEL: load_factor6_too_big: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: li a3, 100 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: sub sp, sp, a2 ; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xe4, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 100 * vlenb ; RV32-NEXT: addi a4, a1, 128 ; RV32-NEXT: addi a5, a1, 256 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: lui a3, 12 ; RV32-NEXT: lui a6, 12291 ; RV32-NEXT: lui a7, %hi(.LCPI23_0) ; RV32-NEXT: addi a7, a7, %lo(.LCPI23_0) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vle32.v v24, (a5) ; RV32-NEXT: vmv.s.x v0, a3 ; RV32-NEXT: vle32.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 6 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill ; RV32-NEXT: addi a6, a6, 3 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vslideup.vi v16, v24, 4 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a5, 76 ; RV32-NEXT: mul a1, a1, a5 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a5, 92 ; RV32-NEXT: mul a1, a1, a5 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill ; RV32-NEXT: vmv1r.v v30, v0 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: vslideup.vi v16, v8, 10, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a5, 72 ; RV32-NEXT: mul a1, a1, a5 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v16, (a1) # vscale x 32-byte Folded Spill ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vle32.v v8, (a4) ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a4, 84 ; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vle16.v v28, (a7) ; RV32-NEXT: vmv.s.x v0, a6 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 6 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a4, 84 ; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vmerge.vvm v16, v8, v16, v0 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vrgatherei16.vv v0, v16, v28 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a4, 52 ; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v0, (a1) # vscale x 64-byte Folded Spill ; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; RV32-NEXT: vslideup.vi v8, v24, 2 ; RV32-NEXT: vmv1r.v v0, v30 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a4, 92 ; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload ; RV32-NEXT: vslideup.vi v8, v16, 8, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a4, 60 ; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill ; RV32-NEXT: lui a7, 49164 ; RV32-NEXT: lui a1, %hi(.LCPI23_1) ; RV32-NEXT: addi a1, a1, %lo(.LCPI23_1) ; RV32-NEXT: lui t2, 3 ; RV32-NEXT: lui t1, 196656 ; RV32-NEXT: lui a4, %hi(.LCPI23_3) ; RV32-NEXT: addi a4, a4, %lo(.LCPI23_3) ; RV32-NEXT: lui t0, 786624 ; RV32-NEXT: li a5, 48 ; RV32-NEXT: lui a6, 768 ; RV32-NEXT: addi a7, a7, 12 ; RV32-NEXT: vmv.s.x v0, a7 ; RV32-NEXT: addi t2, t2, 3 ; RV32-NEXT: csrr a7, vlenb ; RV32-NEXT: li t3, 84 ; RV32-NEXT: mul a7, a7, t3 ; RV32-NEXT: add a7, sp, a7 ; RV32-NEXT: addi a7, a7, 16 ; RV32-NEXT: vl8r.v v16, (a7) # vscale x 64-byte Folded Reload ; RV32-NEXT: csrr a7, vlenb ; RV32-NEXT: slli a7, a7, 6 ; RV32-NEXT: add a7, sp, a7 ; RV32-NEXT: addi a7, a7, 16 ; RV32-NEXT: vl8r.v v8, (a7) # vscale x 64-byte Folded Reload ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vmerge.vvm v8, v16, v8, v0 ; RV32-NEXT: csrr a7, vlenb ; RV32-NEXT: li t3, 36 ; RV32-NEXT: mul a7, a7, t3 ; RV32-NEXT: add a7, sp, a7 ; RV32-NEXT: addi a7, a7, 16 ; RV32-NEXT: vs8r.v v8, (a7) # vscale x 64-byte Folded Spill ; RV32-NEXT: vmv.s.x v0, t2 ; RV32-NEXT: addi a7, t1, 48 ; RV32-NEXT: csrr t1, vlenb ; RV32-NEXT: li t2, 92 ; RV32-NEXT: mul t1, t1, t2 ; RV32-NEXT: add t1, sp, t1 ; RV32-NEXT: addi t1, t1, 16 ; RV32-NEXT: vl8r.v v24, (t1) # vscale x 64-byte Folded Reload ; RV32-NEXT: csrr t1, vlenb ; RV32-NEXT: li t2, 76 ; RV32-NEXT: mul t1, t1, t2 ; RV32-NEXT: add t1, sp, t1 ; RV32-NEXT: addi t1, t1, 16 ; RV32-NEXT: vl8r.v v8, (t1) # vscale x 64-byte Folded Reload ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vmerge.vvm v8, v24, v8, v0 ; RV32-NEXT: addi t1, sp, 16 ; RV32-NEXT: vs4r.v v8, (t1) # vscale x 32-byte Folded Spill ; RV32-NEXT: vmv.s.x v0, a7 ; RV32-NEXT: addi a3, a3, 12 ; RV32-NEXT: csrr a7, vlenb ; RV32-NEXT: slli a7, a7, 6 ; RV32-NEXT: add a7, sp, a7 ; RV32-NEXT: addi a7, a7, 16 ; RV32-NEXT: vl8r.v v24, (a7) # vscale x 64-byte Folded Reload ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vmerge.vvm v8, v16, v24, v0 ; RV32-NEXT: csrr a7, vlenb ; RV32-NEXT: li t1, 20 ; RV32-NEXT: mul a7, a7, t1 ; RV32-NEXT: add a7, sp, a7 ; RV32-NEXT: addi a7, a7, 16 ; RV32-NEXT: vs8r.v v8, (a7) # vscale x 64-byte Folded Spill ; RV32-NEXT: vmv8r.v v16, v24 ; RV32-NEXT: vmv.s.x v0, a3 ; RV32-NEXT: addi a3, t0, 192 ; RV32-NEXT: csrr a7, vlenb ; RV32-NEXT: li t0, 92 ; RV32-NEXT: mul a7, a7, t0 ; RV32-NEXT: add a7, sp, a7 ; RV32-NEXT: addi a7, a7, 16 ; RV32-NEXT: vl8r.v v24, (a7) # vscale x 64-byte Folded Reload ; RV32-NEXT: csrr a7, vlenb ; RV32-NEXT: li t0, 76 ; RV32-NEXT: mul a7, a7, t0 ; RV32-NEXT: add a7, sp, a7 ; RV32-NEXT: addi a7, a7, 16 ; RV32-NEXT: vl8r.v v8, (a7) # vscale x 64-byte Folded Reload ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vmerge.vvm v8, v24, v8, v0 ; RV32-NEXT: csrr a7, vlenb ; RV32-NEXT: li t0, 48 ; RV32-NEXT: mul a7, a7, t0 ; RV32-NEXT: add a7, sp, a7 ; RV32-NEXT: addi a7, a7, 16 ; RV32-NEXT: vs4r.v v8, (a7) # vscale x 32-byte Folded Spill ; RV32-NEXT: vmv.s.x v0, a3 ; RV32-NEXT: li a3, 192 ; RV32-NEXT: csrr a7, vlenb ; RV32-NEXT: li t0, 84 ; RV32-NEXT: mul a7, a7, t0 ; RV32-NEXT: add a7, sp, a7 ; RV32-NEXT: addi a7, a7, 16 ; RV32-NEXT: vl8r.v v8, (a7) # vscale x 64-byte Folded Reload ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 ; RV32-NEXT: csrr a7, vlenb ; RV32-NEXT: li t0, 28 ; RV32-NEXT: mul a7, a7, t0 ; RV32-NEXT: add a7, sp, a7 ; RV32-NEXT: addi a7, a7, 16 ; RV32-NEXT: vs8r.v v8, (a7) # vscale x 64-byte Folded Spill ; RV32-NEXT: vmv.s.x v0, a5 ; RV32-NEXT: addi a5, a6, 768 ; RV32-NEXT: csrr a6, vlenb ; RV32-NEXT: li a7, 92 ; RV32-NEXT: mul a6, a6, a7 ; RV32-NEXT: add a6, sp, a6 ; RV32-NEXT: addi a6, a6, 16 ; RV32-NEXT: vl8r.v v24, (a6) # vscale x 64-byte Folded Reload ; RV32-NEXT: csrr a6, vlenb ; RV32-NEXT: li a7, 76 ; RV32-NEXT: mul a6, a6, a7 ; RV32-NEXT: add a6, sp, a6 ; RV32-NEXT: addi a6, a6, 16 ; RV32-NEXT: vl8r.v v8, (a6) # vscale x 64-byte Folded Reload ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vmerge.vvm v8, v24, v8, v0 ; RV32-NEXT: csrr a6, vlenb ; RV32-NEXT: li a7, 44 ; RV32-NEXT: mul a6, a6, a7 ; RV32-NEXT: add a6, sp, a6 ; RV32-NEXT: addi a6, a6, 16 ; RV32-NEXT: vs4r.v v8, (a6) # vscale x 32-byte Folded Spill ; RV32-NEXT: vmv.s.x v0, a5 ; RV32-NEXT: vle16.v v6, (a1) ; RV32-NEXT: vle16.v v2, (a4) ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a4, 84 ; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a4, 12 ; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill ; RV32-NEXT: vmv.s.x v0, a3 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a3, 36 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vrgatherei16.vv v24, v8, v6 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a3, 92 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a3, 76 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload ; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV32-NEXT: vmerge.vvm v8, v24, v8, v0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a3, 92 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a3, 20 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vrgatherei16.vv v24, v8, v2 ; RV32-NEXT: lui a1, %hi(.LCPI23_2) ; RV32-NEXT: addi a1, a1, %lo(.LCPI23_2) ; RV32-NEXT: lui a3, 3073 ; RV32-NEXT: addi a3, a3, -1024 ; RV32-NEXT: vmv.s.x v0, a3 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV32-NEXT: vle16.v v3, (a1) ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a3, 84 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 84 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 72 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v28, (a1) # vscale x 32-byte Folded Reload ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 52 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload ; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma ; RV32-NEXT: vmv.v.v v28, v16 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 72 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v28, (a1) # vscale x 32-byte Folded Spill ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 60 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v16, (a1) # vscale x 32-byte Folded Reload ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload ; RV32-NEXT: vmv.v.v v16, v8 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 60 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v16, (a1) # vscale x 32-byte Folded Spill ; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vl4r.v v8, (a1) # vscale x 32-byte Folded Reload ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vrgatherei16.vv v28, v8, v3 ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma ; RV32-NEXT: vmv.v.v v28, v24 ; RV32-NEXT: lui a1, %hi(.LCPI23_4) ; RV32-NEXT: addi a1, a1, %lo(.LCPI23_4) ; RV32-NEXT: lui a2, %hi(.LCPI23_5) ; RV32-NEXT: addi a2, a2, %lo(.LCPI23_5) ; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; RV32-NEXT: vle16.v v24, (a2) ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV32-NEXT: vle16.v v8, (a1) ; RV32-NEXT: lui a1, %hi(.LCPI23_7) ; RV32-NEXT: addi a1, a1, %lo(.LCPI23_7) ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle16.v v10, (a1) ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 28 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v0, (a1) # vscale x 64-byte Folded Reload ; RV32-NEXT: vrgatherei16.vv v16, v0, v24 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 48 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v20, (a1) # vscale x 32-byte Folded Reload ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vrgatherei16.vv v24, v20, v8 ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma ; RV32-NEXT: vmv.v.v v24, v16 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 12 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v0, (a1) # vscale x 64-byte Folded Reload ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vrgatherei16.vv v16, v0, v10 ; RV32-NEXT: lui a1, %hi(.LCPI23_6) ; RV32-NEXT: addi a1, a1, %lo(.LCPI23_6) ; RV32-NEXT: lui a2, %hi(.LCPI23_8) ; RV32-NEXT: addi a2, a2, %lo(.LCPI23_8) ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV32-NEXT: vle16.v v4, (a1) ; RV32-NEXT: lui a1, %hi(.LCPI23_9) ; RV32-NEXT: addi a1, a1, %lo(.LCPI23_9) ; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; RV32-NEXT: vle16.v v6, (a1) ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vle16.v v5, (a2) ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 44 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v20, (a1) # vscale x 32-byte Folded Reload ; RV32-NEXT: vrgatherei16.vv v0, v20, v4 ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma ; RV32-NEXT: vmv.v.v v0, v16 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 84 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vrgatherei16.vv v16, v8, v6 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 92 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v12, (a1) # vscale x 32-byte Folded Reload ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vrgatherei16.vv v8, v12, v5 ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma ; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: addi a1, a0, 320 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: addi a1, a0, 256 ; RV32-NEXT: vse32.v v0, (a1) ; RV32-NEXT: addi a1, a0, 192 ; RV32-NEXT: vse32.v v24, (a1) ; RV32-NEXT: addi a1, a0, 128 ; RV32-NEXT: vse32.v v28, (a1) ; RV32-NEXT: addi a1, a0, 64 ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: li a3, 60 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl4r.v v8, (a2) # vscale x 32-byte Folded Reload ; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 72 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v8, (a1) # vscale x 32-byte Folded Reload ; RV32-NEXT: vse32.v v8, (a0) ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 100 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: .cfi_def_cfa sp, 16 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; ; RV64-LABEL: load_factor6_too_big: ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: li a3, 93 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: sub sp, sp, a2 ; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xdd, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 93 * vlenb ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a1) ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: li a3, 53 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vs8r.v v8, (a2) # vscale x 64-byte Folded Spill ; RV64-NEXT: addi a2, a1, 128 ; RV64-NEXT: addi a3, a1, 256 ; RV64-NEXT: li a4, 128 ; RV64-NEXT: lui a1, 1 ; RV64-NEXT: vle64.v v8, (a3) ; RV64-NEXT: lui a3, %hi(.LCPI23_0) ; RV64-NEXT: addi a3, a3, %lo(.LCPI23_0) ; RV64-NEXT: vmv.s.x v0, a4 ; RV64-NEXT: csrr a4, vlenb ; RV64-NEXT: li a5, 61 ; RV64-NEXT: mul a4, a4, a5 ; RV64-NEXT: add a4, sp, a4 ; RV64-NEXT: addi a4, a4, 16 ; RV64-NEXT: vs1r.v v0, (a4) # vscale x 8-byte Folded Spill ; RV64-NEXT: addi a4, a1, 65 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vslideup.vi v24, v8, 2 ; RV64-NEXT: vsetivli zero, 8, e64, m8, ta, ma ; RV64-NEXT: vslidedown.vi v16, v8, 8 ; RV64-NEXT: csrr a5, vlenb ; RV64-NEXT: li a6, 77 ; RV64-NEXT: mul a5, a5, a6 ; RV64-NEXT: add a5, sp, a5 ; RV64-NEXT: addi a5, a5, 16 ; RV64-NEXT: vs8r.v v16, (a5) # vscale x 64-byte Folded Spill ; RV64-NEXT: csrr a5, vlenb ; RV64-NEXT: li a6, 77 ; RV64-NEXT: mul a5, a5, a6 ; RV64-NEXT: add a5, sp, a5 ; RV64-NEXT: addi a5, a5, 16 ; RV64-NEXT: vl8r.v v16, (a5) # vscale x 64-byte Folded Reload ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vslideup.vi v24, v16, 5, v0.t ; RV64-NEXT: csrr a5, vlenb ; RV64-NEXT: li a6, 73 ; RV64-NEXT: mul a5, a5, a6 ; RV64-NEXT: add a5, sp, a5 ; RV64-NEXT: addi a5, a5, 16 ; RV64-NEXT: vs4r.v v24, (a5) # vscale x 32-byte Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v24, (a2) ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: li a5, 85 ; RV64-NEXT: mul a2, a2, a5 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vs8r.v v24, (a2) # vscale x 64-byte Folded Spill ; RV64-NEXT: vle16.v v12, (a3) ; RV64-NEXT: vmv.s.x v0, a4 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: li a3, 85 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl8r.v v24, (a2) # vscale x 64-byte Folded Reload ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: li a3, 53 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload ; RV64-NEXT: vmerge.vvm v24, v24, v16, v0 ; RV64-NEXT: vrgatherei16.vv v0, v24, v12 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: li a3, 37 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vs8r.v v0, (a2) # vscale x 64-byte Folded Spill ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vslideup.vi v12, v8, 1 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: li a3, 61 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl1r.v v7, (a2) # vscale x 8-byte Folded Reload ; RV64-NEXT: vmv1r.v v0, v7 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: li a3, 77 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl8r.v v24, (a2) # vscale x 64-byte Folded Reload ; RV64-NEXT: vslideup.vi v12, v24, 4, v0.t ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: li a3, 69 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vs4r.v v12, (a2) # vscale x 32-byte Folded Spill ; RV64-NEXT: lui a2, 2 ; RV64-NEXT: lui a3, 4 ; RV64-NEXT: li a4, 32 ; RV64-NEXT: addi a2, a2, 130 ; RV64-NEXT: vmv.s.x v0, a2 ; RV64-NEXT: addi a2, a3, 260 ; RV64-NEXT: vmv8r.v v24, v16 ; RV64-NEXT: csrr a3, vlenb ; RV64-NEXT: li a5, 85 ; RV64-NEXT: mul a3, a3, a5 ; RV64-NEXT: add a3, sp, a3 ; RV64-NEXT: addi a3, a3, 16 ; RV64-NEXT: vl8r.v v16, (a3) # vscale x 64-byte Folded Reload ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vmerge.vvm v16, v16, v24, v0 ; RV64-NEXT: csrr a3, vlenb ; RV64-NEXT: slli a3, a3, 3 ; RV64-NEXT: add a3, sp, a3 ; RV64-NEXT: addi a3, a3, 16 ; RV64-NEXT: vs8r.v v16, (a3) # vscale x 64-byte Folded Spill ; RV64-NEXT: vmv.s.x v0, a2 ; RV64-NEXT: vmv.s.x v2, a4 ; RV64-NEXT: vmv4r.v v12, v8 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: li a3, 85 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload ; RV64-NEXT: vmerge.vvm v16, v16, v24, v0 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: li a3, 29 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vs8r.v v16, (a2) # vscale x 64-byte Folded Spill ; RV64-NEXT: vmv1r.v v0, v2 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: li a3, 45 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vs8r.v v8, (a2) # vscale x 64-byte Folded Spill ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vslideup.vi v12, v8, 5, v0.t ; RV64-NEXT: vmv1r.v v0, v7 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: li a3, 77 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl8r.v v24, (a2) # vscale x 64-byte Folded Reload ; RV64-NEXT: vrgather.vi v12, v24, 4, v0.t ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: slli a3, a2, 6 ; RV64-NEXT: add a2, a3, a2 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vs4r.v v12, (a2) # vscale x 32-byte Folded Spill ; RV64-NEXT: vslidedown.vi v12, v8, 1 ; RV64-NEXT: vmv1r.v v0, v2 ; RV64-NEXT: vslideup.vi v12, v8, 4, v0.t ; RV64-NEXT: vmv1r.v v0, v7 ; RV64-NEXT: vrgather.vi v12, v24, 5, v0.t ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: li a3, 25 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vs4r.v v12, (a2) # vscale x 32-byte Folded Spill ; RV64-NEXT: lui a2, 8 ; RV64-NEXT: addi a2, a2, 520 ; RV64-NEXT: vmv.s.x v0, a2 ; RV64-NEXT: vslideup.vi v12, v24, 6 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: li a3, 85 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: li a3, 53 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl8r.v v24, (a2) # vscale x 64-byte Folded Reload ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vmerge.vvm v16, v16, v24, v0 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: slli a3, a2, 4 ; RV64-NEXT: add a2, a3, a2 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vs8r.v v16, (a2) # vscale x 64-byte Folded Spill ; RV64-NEXT: vmv1r.v v0, v7 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: li a3, 77 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vslideup.vi v12, v16, 1, v0.t ; RV64-NEXT: lui a2, %hi(.LCPI23_1) ; RV64-NEXT: addi a2, a2, %lo(.LCPI23_1) ; RV64-NEXT: li a3, 192 ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; RV64-NEXT: vle16.v v6, (a2) ; RV64-NEXT: vmv.s.x v0, a3 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: slli a2, a2, 4 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vs1r.v v0, (a2) # vscale x 8-byte Folded Spill ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: li a3, 45 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vrgather.vi v28, v16, 2 ; RV64-NEXT: vmerge.vvm v16, v28, v12, v0 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: li a3, 61 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vs4r.v v16, (a2) # vscale x 32-byte Folded Spill ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: slli a2, a2, 3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vrgatherei16.vv v24, v16, v6 ; RV64-NEXT: addi a2, sp, 16 ; RV64-NEXT: vs8r.v v24, (a2) # vscale x 64-byte Folded Spill ; RV64-NEXT: lui a2, %hi(.LCPI23_2) ; RV64-NEXT: addi a2, a2, %lo(.LCPI23_2) ; RV64-NEXT: li a3, 1040 ; RV64-NEXT: vmv.s.x v0, a3 ; RV64-NEXT: addi a1, a1, -2016 ; RV64-NEXT: csrr a3, vlenb ; RV64-NEXT: li a4, 85 ; RV64-NEXT: mul a3, a3, a4 ; RV64-NEXT: add a3, sp, a3 ; RV64-NEXT: addi a3, a3, 16 ; RV64-NEXT: vl8r.v v24, (a3) # vscale x 64-byte Folded Reload ; RV64-NEXT: csrr a3, vlenb ; RV64-NEXT: li a4, 53 ; RV64-NEXT: mul a3, a3, a4 ; RV64-NEXT: add a3, sp, a3 ; RV64-NEXT: addi a3, a3, 16 ; RV64-NEXT: vl8r.v v16, (a3) # vscale x 64-byte Folded Reload ; RV64-NEXT: vmerge.vvm v8, v24, v16, v0 ; RV64-NEXT: csrr a3, vlenb ; RV64-NEXT: slli a3, a3, 3 ; RV64-NEXT: add a3, sp, a3 ; RV64-NEXT: addi a3, a3, 16 ; RV64-NEXT: vs8r.v v8, (a3) # vscale x 64-byte Folded Spill ; RV64-NEXT: vmv.s.x v0, a1 ; RV64-NEXT: vle16.v v6, (a2) ; RV64-NEXT: li a1, 64 ; RV64-NEXT: vmerge.vvm v8, v24, v16, v0 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: li a3, 85 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vs8r.v v8, (a2) # vscale x 64-byte Folded Spill ; RV64-NEXT: vmv.s.x v0, a1 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 29 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload ; RV64-NEXT: vrgatherei16.vv v24, v16, v6 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 77 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload ; RV64-NEXT: vmv4r.v v28, v8 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vslideup.vi v28, v8, 5, v0.t ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 73 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v8, (a1) # vscale x 32-byte Folded Reload ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 37 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v0, (a1) # vscale x 64-byte Folded Reload ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma ; RV64-NEXT: vmv.v.v v8, v0 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 73 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 69 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v8, (a1) # vscale x 32-byte Folded Reload ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload ; RV64-NEXT: vmv.v.v v8, v16 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 69 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill ; RV64-NEXT: lui a1, %hi(.LCPI23_3) ; RV64-NEXT: addi a1, a1, %lo(.LCPI23_3) ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; RV64-NEXT: vle16.v v20, (a1) ; RV64-NEXT: lui a1, %hi(.LCPI23_4) ; RV64-NEXT: addi a1, a1, %lo(.LCPI23_4) ; RV64-NEXT: vle16.v v8, (a1) ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 77 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs2r.v v8, (a1) # vscale x 16-byte Folded Spill ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a2, a1, 6 ; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v8, (a1) # vscale x 32-byte Folded Reload ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma ; RV64-NEXT: vmv.v.v v8, v24 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a2, a1, 6 ; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a2, a1, 4 ; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vrgatherei16.vv v0, v8, v20 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 25 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v12, (a1) # vscale x 32-byte Folded Reload ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma ; RV64-NEXT: vmv.v.v v12, v0 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 77 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl2r.v v8, (a1) # vscale x 16-byte Folded Reload ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vrgatherei16.vv v0, v16, v8 ; RV64-NEXT: lui a1, %hi(.LCPI23_5) ; RV64-NEXT: addi a1, a1, %lo(.LCPI23_5) ; RV64-NEXT: vle16.v v20, (a1) ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 61 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v8, (a1) # vscale x 32-byte Folded Reload ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma ; RV64-NEXT: vmv.v.v v8, v0 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 61 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 45 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v0, (a1) # vscale x 64-byte Folded Reload ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vrgather.vi v8, v0, 3 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 4 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl1r.v v0, (a1) # vscale x 8-byte Folded Reload ; RV64-NEXT: vmerge.vvm v8, v8, v28, v0 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 85 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v0, (a1) # vscale x 64-byte Folded Reload ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vrgatherei16.vv v24, v0, v20 ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma ; RV64-NEXT: vmv.v.v v8, v24 ; RV64-NEXT: addi a1, a0, 320 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: addi a1, a0, 256 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: li a3, 61 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl4r.v v8, (a2) # vscale x 32-byte Folded Reload ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: addi a1, a0, 192 ; RV64-NEXT: vse64.v v12, (a1) ; RV64-NEXT: addi a1, a0, 128 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: slli a3, a2, 6 ; RV64-NEXT: add a2, a3, a2 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl4r.v v8, (a2) # vscale x 32-byte Folded Reload ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: addi a1, a0, 64 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: li a3, 69 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl4r.v v8, (a2) # vscale x 32-byte Folded Reload ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 73 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v8, (a1) # vscale x 32-byte Folded Reload ; RV64-NEXT: vse64.v v8, (a0) ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: li a1, 93 ; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: .cfi_def_cfa sp, 16 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: .cfi_def_cfa_offset 0 ; RV64-NEXT: ret %interleaved.vec = load <48 x i64>, ptr %ptr %v0 = shufflevector <48 x i64> %interleaved.vec, <48 x i64> poison, <8 x i32> %v1 = shufflevector <48 x i64> %interleaved.vec, <48 x i64> poison, <8 x i32> %v2 = shufflevector <48 x i64> %interleaved.vec, <48 x i64> poison, <8 x i32> %v3 = shufflevector <48 x i64> %interleaved.vec, <48 x i64> poison, <8 x i32> %v4 = shufflevector <48 x i64> %interleaved.vec, <48 x i64> poison, <8 x i32> %v5 = shufflevector <48 x i64> %interleaved.vec, <48 x i64> poison, <8 x i32> %res0 = insertvalue {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} undef, <8 x i64> %v0, 0 %res1 = insertvalue {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} %res0, <8 x i64> %v1, 1 %res2 = insertvalue {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} %res1, <8 x i64> %v2, 2 %res3 = insertvalue {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} %res2, <8 x i64> %v3, 3 %res4 = insertvalue {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} %res3, <8 x i64> %v4, 4 %res5 = insertvalue {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} %res4, <8 x i64> %v5, 5 ret {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} %res5 } ; ------------------------------------------------------------------------------ ; Stores ; ------------------------------------------------------------------------------ define void @store_factor2(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: store_factor2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vsseg2e32.v v8, (a0) ; CHECK-NEXT: ret %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> store <8 x i32> %interleaved.vec, ptr %ptr ret void } define void @store_factor3(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) { ; CHECK-LABEL: store_factor3: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vsseg3e32.v v8, (a0) ; CHECK-NEXT: ret %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> %s1 = shufflevector <4 x i32> %v2, <4 x i32> poison, <8 x i32> %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> store <12 x i32> %interleaved.vec, ptr %ptr ret void } define void @store_factor4(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) { ; CHECK-LABEL: store_factor4: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vsseg4e32.v v8, (a0) ; CHECK-NEXT: ret %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> store <16 x i32> %interleaved.vec, ptr %ptr ret void } define void @store_factor5(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3, <4 x i32> %v4) { ; CHECK-LABEL: store_factor5: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vsseg5e32.v v8, (a0) ; CHECK-NEXT: ret %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> %s2 = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> %s3 = shufflevector <4 x i32> %v4, <4 x i32> poison, <16 x i32> %interleaved.vec = shufflevector <16 x i32> %s2, <16 x i32> %s3, <20 x i32> store <20 x i32> %interleaved.vec, ptr %ptr ret void } define void @store_factor6(ptr %ptr, <2 x i16> %v0, <2 x i16> %v1, <2 x i16> %v2, <2 x i16> %v3, <2 x i16> %v4, <2 x i16> %v5) { ; CHECK-LABEL: store_factor6: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vsseg6e16.v v8, (a0) ; CHECK-NEXT: ret %s0 = shufflevector <2 x i16> %v0, <2 x i16> %v1, <4 x i32> %s1 = shufflevector <2 x i16> %v2, <2 x i16> %v3, <4 x i32> %s2 = shufflevector <4 x i16> %s0, <4 x i16> %s1, <8 x i32> %s3 = shufflevector <2 x i16> %v4, <2 x i16> %v5, <8 x i32> %interleaved.vec = shufflevector <8 x i16> %s2, <8 x i16> %s3, <12 x i32> store <12 x i16> %interleaved.vec, ptr %ptr ret void } define void @store_factor7(ptr %ptr, <2 x i16> %v0, <2 x i16> %v1, <2 x i16> %v2, <2 x i16> %v3, <2 x i16> %v4, <2 x i16> %v5, <2 x i16> %v6) { ; CHECK-LABEL: store_factor7: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vsseg7e16.v v8, (a0) ; CHECK-NEXT: ret %s0 = shufflevector <2 x i16> %v0, <2 x i16> %v1, <4 x i32> %s1 = shufflevector <2 x i16> %v2, <2 x i16> %v3, <4 x i32> %s2 = shufflevector <2 x i16> %v4, <2 x i16> %v5, <4 x i32> %s3 = shufflevector <4 x i16> %s0, <4 x i16> %s1, <8 x i32> %s4 = shufflevector <2 x i16> %v6, <2 x i16> poison, <4 x i32> %s5 = shufflevector <4 x i16> %s2, <4 x i16> %s4, <8 x i32> %interleaved.vec = shufflevector <8 x i16> %s3, <8 x i16> %s5, <14 x i32> store <14 x i16> %interleaved.vec, ptr %ptr ret void } define void @store_factor8(ptr %ptr, <2 x i16> %v0, <2 x i16> %v1, <2 x i16> %v2, <2 x i16> %v3, <2 x i16> %v4, <2 x i16> %v5, <2 x i16> %v6, <2 x i16> %v7) { ; CHECK-LABEL: store_factor8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vsseg8e16.v v8, (a0) ; CHECK-NEXT: ret %s0 = shufflevector <2 x i16> %v0, <2 x i16> %v1, <4 x i32> %s1 = shufflevector <2 x i16> %v2, <2 x i16> %v3, <4 x i32> %s2 = shufflevector <2 x i16> %v4, <2 x i16> %v5, <4 x i32> %s3 = shufflevector <4 x i16> %s0, <4 x i16> %s1, <8 x i32> %s4 = shufflevector <2 x i16> %v6, <2 x i16> %v7, <4 x i32> %s5 = shufflevector <4 x i16> %s2, <4 x i16> %s4, <8 x i32> %interleaved.vec = shufflevector <8 x i16> %s3, <8 x i16> %s5, <16 x i32> store <16 x i16> %interleaved.vec, ptr %ptr ret void } define void @vpstore_factor2(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: vpstore_factor2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vsseg2e32.v v8, (a0) ; CHECK-NEXT: ret %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> tail call void @llvm.vp.store.v8i32.p0(<8 x i32> %interleaved.vec, ptr %ptr, <8 x i1> splat (i1 true), i32 8) ret void } define void @vpstore_factor2_interleaved_mask_intrinsic(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i1> %m) { ; CHECK-LABEL: vpstore_factor2_interleaved_mask_intrinsic: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vsseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret %interleaved.mask = call <8 x i1> @llvm.vector.interleave2(<4 x i1> %m, <4 x i1> %m) %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> tail call void @llvm.vp.store.v8i32.p0(<8 x i32> %interleaved.vec, ptr %ptr, <8 x i1> %interleaved.mask, i32 8) ret void } define void @vpstore_factor3(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) { ; CHECK-LABEL: vpstore_factor3: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vsseg3e32.v v8, (a0) ; CHECK-NEXT: ret %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> %s1 = shufflevector <4 x i32> %v2, <4 x i32> poison, <8 x i32> %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> tail call void @llvm.vp.store.v12i32.p0(<12 x i32> %interleaved.vec, ptr %ptr, <12 x i1> splat (i1 true), i32 12) ret void } define void @vpstore_factor3_mask(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) { ; CHECK-LABEL: vpstore_factor3_mask: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v0, 5 ; CHECK-NEXT: vsseg3e32.v v8, (a0), v0.t ; CHECK-NEXT: ret %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> %s1 = shufflevector <4 x i32> %v2, <4 x i32> poison, <8 x i32> %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> tail call void @llvm.vp.store.v12i32.p0(<12 x i32> %interleaved.vec, ptr %ptr, <12 x i1> , i32 12) ret void } define void @vpstore_factor4(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) { ; CHECK-LABEL: vpstore_factor4: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vsseg4e32.v v8, (a0) ; CHECK-NEXT: ret %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> tail call void @llvm.vp.store.v16i32.p0(<16 x i32> %interleaved.vec, ptr %ptr, <16 x i1> splat (i1 true), i32 16) ret void } define void @vpstore_factor5(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3, <4 x i32> %v4) { ; CHECK-LABEL: vpstore_factor5: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vsseg5e32.v v8, (a0) ; CHECK-NEXT: ret %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> %s2 = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> %s3 = shufflevector <4 x i32> %v4, <4 x i32> poison, <16 x i32> %interleaved.vec = shufflevector <16 x i32> %s2, <16 x i32> %s3, <20 x i32> tail call void @llvm.vp.store.v20i32.p0(<20 x i32> %interleaved.vec, ptr %ptr, <20 x i1> splat (i1 true), i32 20) ret void } define void @vpstore_factor6(ptr %ptr, <2 x i16> %v0, <2 x i16> %v1, <2 x i16> %v2, <2 x i16> %v3, <2 x i16> %v4, <2 x i16> %v5) { ; CHECK-LABEL: vpstore_factor6: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vsseg6e16.v v8, (a0) ; CHECK-NEXT: ret %s0 = shufflevector <2 x i16> %v0, <2 x i16> %v1, <4 x i32> %s1 = shufflevector <2 x i16> %v2, <2 x i16> %v3, <4 x i32> %s2 = shufflevector <4 x i16> %s0, <4 x i16> %s1, <8 x i32> %s3 = shufflevector <2 x i16> %v4, <2 x i16> %v5, <8 x i32> %interleaved.vec = shufflevector <8 x i16> %s2, <8 x i16> %s3, <12 x i32> tail call void @llvm.vp.store.v12i16.p0(<12 x i16> %interleaved.vec, ptr %ptr, <12 x i1> splat (i1 true), i32 12) ret void } define void @vpstore_factor7(ptr %ptr, <2 x i16> %v0, <2 x i16> %v1, <2 x i16> %v2, <2 x i16> %v3, <2 x i16> %v4, <2 x i16> %v5, <2 x i16> %v6) { ; CHECK-LABEL: vpstore_factor7: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vsseg7e16.v v8, (a0) ; CHECK-NEXT: ret %s0 = shufflevector <2 x i16> %v0, <2 x i16> %v1, <4 x i32> %s1 = shufflevector <2 x i16> %v2, <2 x i16> %v3, <4 x i32> %s2 = shufflevector <2 x i16> %v4, <2 x i16> %v5, <4 x i32> %s3 = shufflevector <4 x i16> %s0, <4 x i16> %s1, <8 x i32> %s4 = shufflevector <2 x i16> %v6, <2 x i16> poison, <4 x i32> %s5 = shufflevector <4 x i16> %s2, <4 x i16> %s4, <8 x i32> %interleaved.vec = shufflevector <8 x i16> %s3, <8 x i16> %s5, <14 x i32> tail call void @llvm.vp.store.v14i16.p0(<14 x i16> %interleaved.vec, ptr %ptr, <14 x i1> splat (i1 true), i32 14) ret void } define void @vpstore_factor7_masked(ptr %ptr, <2 x i16> %v0, <2 x i16> %v1, <2 x i16> %v2, <2 x i16> %v3, <2 x i16> %v4, <2 x i16> %v5, <2 x i16> %v6, <2 x i1> %m) { ; CHECK-LABEL: vpstore_factor7_masked: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vsseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret %interleaved.mask = shufflevector <2 x i1> %m, <2 x i1> poison, <14 x i32> %s0 = shufflevector <2 x i16> %v0, <2 x i16> %v1, <4 x i32> %s1 = shufflevector <2 x i16> %v2, <2 x i16> %v3, <4 x i32> %s2 = shufflevector <2 x i16> %v4, <2 x i16> %v5, <4 x i32> %s3 = shufflevector <4 x i16> %s0, <4 x i16> %s1, <8 x i32> %s4 = shufflevector <2 x i16> %v6, <2 x i16> poison, <4 x i32> %s5 = shufflevector <4 x i16> %s2, <4 x i16> %s4, <8 x i32> %interleaved.vec = shufflevector <8 x i16> %s3, <8 x i16> %s5, <14 x i32> tail call void @llvm.vp.store.v14i16.p0(<14 x i16> %interleaved.vec, ptr %ptr, <14 x i1> %interleaved.mask, i32 14) ret void } define void @vpstore_factor8(ptr %ptr, <2 x i16> %v0, <2 x i16> %v1, <2 x i16> %v2, <2 x i16> %v3, <2 x i16> %v4, <2 x i16> %v5, <2 x i16> %v6, <2 x i16> %v7) { ; CHECK-LABEL: vpstore_factor8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vsseg8e16.v v8, (a0) ; CHECK-NEXT: ret %s0 = shufflevector <2 x i16> %v0, <2 x i16> %v1, <4 x i32> %s1 = shufflevector <2 x i16> %v2, <2 x i16> %v3, <4 x i32> %s2 = shufflevector <2 x i16> %v4, <2 x i16> %v5, <4 x i32> %s3 = shufflevector <4 x i16> %s0, <4 x i16> %s1, <8 x i32> %s4 = shufflevector <2 x i16> %v6, <2 x i16> %v7, <4 x i32> %s5 = shufflevector <4 x i16> %s2, <4 x i16> %s4, <8 x i32> %interleaved.vec = shufflevector <8 x i16> %s3, <8 x i16> %s5, <16 x i32> tail call void @llvm.vp.store.v16i16.p0(<16 x i16> %interleaved.vec, ptr %ptr, <16 x i1> splat (i1 true), i32 16) ret void } define <4 x i32> @load_factor2_one_active(ptr %ptr) { ; CHECK-LABEL: load_factor2_one_active: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret %interleaved.vec = load <8 x i32>, ptr %ptr %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> ret <4 x i32> %v0 } define <4 x i32> @load_factor3_one_active(ptr %ptr) { ; CHECK-LABEL: load_factor3_one_active: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 12 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vlse32.v v8, (a0), a1 ; CHECK-NEXT: ret %interleaved.vec = load <12 x i32>, ptr %ptr %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> ret <4 x i32> %v0 } define <4 x i32> @load_factor4_one_active(ptr %ptr) { ; CHECK-LABEL: load_factor4_one_active: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vlse32.v v8, (a0), a1 ; CHECK-NEXT: ret %interleaved.vec = load <16 x i32>, ptr %ptr %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> ret <4 x i32> %v0 } define <4 x i32> @load_factor5_one_active(ptr %ptr) { ; CHECK-LABEL: load_factor5_one_active: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 20 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vlse32.v v8, (a0), a1 ; CHECK-NEXT: ret %interleaved.vec = load <20 x i32>, ptr %ptr %v0 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> ret <4 x i32> %v0 } define <2 x i16> @load_factor6_one_active(ptr %ptr) { ; CHECK-LABEL: load_factor6_one_active: ; CHECK: # %bb.0: ; CHECK-NEXT: addi a0, a0, 10 ; CHECK-NEXT: li a1, 12 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vlse16.v v8, (a0), a1 ; CHECK-NEXT: ret %interleaved.vec = load <12 x i16>, ptr %ptr %v0 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> ret <2 x i16> %v0 } define <4 x i8> @load_factor7_one_active(ptr %ptr) vscale_range(8,1024) { ; CHECK-LABEL: load_factor7_one_active: ; CHECK: # %bb.0: ; CHECK-NEXT: addi a0, a0, 1 ; CHECK-NEXT: li a1, 7 ; CHECK-NEXT: vsetivli zero, 4, e8, mf8, ta, ma ; CHECK-NEXT: vlse8.v v8, (a0), a1 ; CHECK-NEXT: ret %interleaved.vec = load <32 x i8>, ptr %ptr %v0 = shufflevector <32 x i8> %interleaved.vec, <32 x i8> poison, <4 x i32> ret <4 x i8> %v0 } define <4 x i8> @load_factor8_one_active(ptr %ptr) vscale_range(8,1024) { ; CHECK-LABEL: load_factor8_one_active: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 8 ; CHECK-NEXT: vsetivli zero, 4, e8, mf8, ta, ma ; CHECK-NEXT: vlse8.v v8, (a0), a1 ; CHECK-NEXT: ret %interleaved.vec = load <32 x i8>, ptr %ptr %v0 = shufflevector <32 x i8> %interleaved.vec, <32 x i8> poison, <4 x i32> ret <4 x i8> %v0 } define <4 x ptr> @load_factor3_one_active_ptr(ptr %ptr) { ; RV32-LABEL: load_factor3_one_active_ptr: ; RV32: # %bb.0: ; RV32-NEXT: li a1, 12 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vlse32.v v8, (a0), a1 ; RV32-NEXT: ret ; ; RV64-LABEL: load_factor3_one_active_ptr: ; RV64: # %bb.0: ; RV64-NEXT: li a1, 24 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vlse64.v v8, (a0), a1 ; RV64-NEXT: ret %interleaved.vec = load <12 x ptr>, ptr %ptr %v0 = shufflevector <12 x ptr> %interleaved.vec, <12 x ptr> poison, <4 x i32> ret <4 x ptr> %v0 } define void @load_factor4_one_active_storeback(ptr %ptr) { ; CHECK-LABEL: load_factor4_one_active_storeback: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vlse32.v v8, (a0), a1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %interleaved.vec = load <16 x i32>, ptr %ptr %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> store <4 x i32> %v0, ptr %ptr ret void } ; TODO: This should be a strided load define void @load_factor4_one_active_storeback_full(ptr %ptr) { ; CHECK-LABEL: load_factor4_one_active_storeback_full: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v12, v8, 4 ; CHECK-NEXT: vmv1r.v v13, v8 ; CHECK-NEXT: vmv1r.v v14, v12 ; CHECK-NEXT: vsetivli zero, 4, e32, m4, ta, ma ; CHECK-NEXT: vslidedown.vi v16, v8, 8 ; CHECK-NEXT: vmv1r.v v15, v16 ; CHECK-NEXT: vslidedown.vi v16, v8, 12 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vsseg4e32.v v13, (a0) ; CHECK-NEXT: ret %interleaved.vec = load <16 x i32>, ptr %ptr %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <16 x i32> store <16 x i32> %v0, ptr %ptr ret void } define <4 x i32> @vp_load_factor3_one_active(ptr %ptr) { ; CHECK-LABEL: vp_load_factor3_one_active: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 12 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vlse32.v v8, (a0), a1 ; CHECK-NEXT: ret %interleaved.vec = tail call <12 x i32> @llvm.vp.load.v12i32.p0(ptr %ptr, <12 x i1> splat (i1 true), i32 12) %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> ret <4 x i32> %v0 } define <4 x i32> @vp_load_factor5_one_active(ptr %ptr) { ; CHECK-LABEL: vp_load_factor5_one_active: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 20 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vlse32.v v8, (a0), a1 ; CHECK-NEXT: ret %interleaved.vec = tail call <20 x i32> @llvm.vp.load.v20i32.p0(ptr %ptr, <20 x i1> splat (i1 true), i32 20) %v0 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> ret <4 x i32> %v0 } define void @store_factor4_one_active(ptr %ptr, <4 x i32> %v) { ; CHECK-LABEL: store_factor4_one_active: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vsse32.v v8, (a0), a1 ; CHECK-NEXT: ret %v0 = shufflevector <4 x i32> %v, <4 x i32> poison, <16 x i32> store <16 x i32> %v0, ptr %ptr ret void } define void @vpstore_factor4_one_active(ptr %ptr, <4 x i32> %v) { ; CHECK-LABEL: vpstore_factor4_one_active: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vsse32.v v8, (a0), a1 ; CHECK-NEXT: ret %v0 = shufflevector <4 x i32> %v, <4 x i32> poison, <16 x i32> tail call void @llvm.vp.store.v16i32.p0(<16 x i32> %v0, ptr %ptr, <16 x i1> splat (i1 true), i32 16) ret void } define void @store_factor4_one_active_idx1(ptr %ptr, <4 x i32> %v) { ; CHECK-LABEL: store_factor4_one_active_idx1: ; CHECK: # %bb.0: ; CHECK-NEXT: addi a0, a0, 4 ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vsse32.v v8, (a0), a1 ; CHECK-NEXT: ret %v0 = shufflevector <4 x i32> %v, <4 x i32> poison, <16 x i32> store <16 x i32> %v0, ptr %ptr ret void } define void @store_factor4_one_active_fullwidth(ptr %ptr, <16 x i32> %v) { ; CHECK-LABEL: store_factor4_one_active_fullwidth: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vsse32.v v8, (a0), a1 ; CHECK-NEXT: ret %v0 = shufflevector <16 x i32> %v, <16 x i32> poison, <16 x i32> store <16 x i32> %v0, ptr %ptr ret void } define void @store_factor4_one_active_slidedown(ptr %ptr, <4 x i32> %v) { ; CHECK-LABEL: store_factor4_one_active_slidedown: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: vsse32.v v8, (a0), a1 ; CHECK-NEXT: ret %v0 = shufflevector <4 x i32> %v, <4 x i32> poison, <16 x i32> store <16 x i32> %v0, ptr %ptr ret void } define void @store_factor4_one_active_ptr(ptr %ptr, <4 x ptr> %v) { ; RV32-LABEL: store_factor4_one_active_ptr: ; RV32: # %bb.0: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vsse32.v v8, (a0), a1 ; RV32-NEXT: ret ; ; RV64-LABEL: store_factor4_one_active_ptr: ; RV64: # %bb.0: ; RV64-NEXT: li a1, 32 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vsse64.v v8, (a0), a1 ; RV64-NEXT: ret %v0 = shufflevector <4 x ptr> %v, <4 x ptr> poison, <16 x i32> store <16 x ptr> %v0, ptr %ptr ret void } ; Negative tests define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) { ; RV32-LABEL: invalid_vp_mask: ; RV32: # %bb.0: ; RV32-NEXT: li a1, 73 ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV32-NEXT: vmv.s.x v11, a1 ; RV32-NEXT: lui a1, 1 ; RV32-NEXT: vmv.v.i v10, 8 ; RV32-NEXT: addi a1, a1, -43 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: li a1, 146 ; RV32-NEXT: vsetivli zero, 12, e32, m4, ta, ma ; RV32-NEXT: vle32.v v12, (a0), v0.t ; RV32-NEXT: li a0, 36 ; RV32-NEXT: vmv.s.x v20, a1 ; RV32-NEXT: lui a1, %hi(.LCPI59_0) ; RV32-NEXT: addi a1, a1, %lo(.LCPI59_0) ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vle16.v v21, (a1) ; RV32-NEXT: vcompress.vm v8, v12, v11 ; RV32-NEXT: vsetivli zero, 8, e32, m4, ta, ma ; RV32-NEXT: vslidedown.vi v16, v12, 8 ; RV32-NEXT: vmv1r.v v0, v10 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; RV32-NEXT: vrgather.vi v8, v16, 1, v0.t ; RV32-NEXT: vcompress.vm v14, v12, v20 ; RV32-NEXT: vrgather.vi v14, v16, 2, v0.t ; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: vmerge.vvm v12, v16, v12, v0 ; RV32-NEXT: vrgatherei16.vv v10, v12, v21 ; RV32-NEXT: vmv1r.v v9, v14 ; RV32-NEXT: ret ; ; RV64-LABEL: invalid_vp_mask: ; RV64: # %bb.0: ; RV64-NEXT: li a1, 73 ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64-NEXT: vmv.s.x v11, a1 ; RV64-NEXT: li a1, 146 ; RV64-NEXT: vmv.s.x v20, a1 ; RV64-NEXT: lui a1, 1 ; RV64-NEXT: vmv.v.i v10, 8 ; RV64-NEXT: addi a1, a1, -43 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: vmv.s.x v0, a1 ; RV64-NEXT: li a1, 36 ; RV64-NEXT: vsetivli zero, 12, e32, m4, ta, ma ; RV64-NEXT: vle32.v v12, (a0), v0.t ; RV64-NEXT: li a0, 3 ; RV64-NEXT: slli a0, a0, 32 ; RV64-NEXT: addi a0, a0, 5 ; RV64-NEXT: slli a0, a0, 16 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64-NEXT: vcompress.vm v8, v12, v11 ; RV64-NEXT: vsetivli zero, 8, e32, m4, ta, ma ; RV64-NEXT: vslidedown.vi v16, v12, 8 ; RV64-NEXT: vmv1r.v v0, v10 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; RV64-NEXT: vrgather.vi v8, v16, 1, v0.t ; RV64-NEXT: vcompress.vm v14, v12, v20 ; RV64-NEXT: vrgather.vi v14, v16, 2, v0.t ; RV64-NEXT: vmv.s.x v0, a1 ; RV64-NEXT: addi a0, a0, 2 ; RV64-NEXT: vmerge.vvm v12, v16, v12, v0 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64-NEXT: vrgatherei16.vv v10, v12, v9 ; RV64-NEXT: vmv1r.v v9, v14 ; RV64-NEXT: ret %interleaved.vec = tail call <12 x i32> @llvm.vp.load.v12i32.p0(ptr %ptr, <12 x i1> , i32 12) %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> %v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> %res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0 %res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1 %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2 ret {<4 x i32>, <4 x i32>, <4 x i32>} %res2 } define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) { ; RV32-LABEL: invalid_vp_evl: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 10, e32, m4, ta, ma ; RV32-NEXT: vle32.v v12, (a0) ; RV32-NEXT: li a0, 73 ; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV32-NEXT: vmv.v.i v0, 8 ; RV32-NEXT: vmv.s.x v10, a0 ; RV32-NEXT: li a0, 146 ; RV32-NEXT: vmv.s.x v11, a0 ; RV32-NEXT: lui a0, %hi(.LCPI60_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI60_0) ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vle16.v v20, (a0) ; RV32-NEXT: li a0, 36 ; RV32-NEXT: vcompress.vm v8, v12, v10 ; RV32-NEXT: vsetivli zero, 8, e32, m4, ta, ma ; RV32-NEXT: vslidedown.vi v16, v12, 8 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; RV32-NEXT: vrgather.vi v8, v16, 1, v0.t ; RV32-NEXT: vcompress.vm v14, v12, v11 ; RV32-NEXT: vrgather.vi v14, v16, 2, v0.t ; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: vmerge.vvm v12, v16, v12, v0 ; RV32-NEXT: vrgatherei16.vv v10, v12, v20 ; RV32-NEXT: vmv1r.v v9, v14 ; RV32-NEXT: ret ; ; RV64-LABEL: invalid_vp_evl: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 10, e32, m4, ta, ma ; RV64-NEXT: vle32.v v12, (a0) ; RV64-NEXT: li a0, 73 ; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV64-NEXT: vmv.v.i v0, 8 ; RV64-NEXT: vmv.s.x v10, a0 ; RV64-NEXT: li a0, 146 ; RV64-NEXT: vmv.s.x v11, a0 ; RV64-NEXT: li a0, 36 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64-NEXT: vcompress.vm v8, v12, v10 ; RV64-NEXT: vsetivli zero, 8, e32, m4, ta, ma ; RV64-NEXT: vslidedown.vi v16, v12, 8 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; RV64-NEXT: vrgather.vi v8, v16, 1, v0.t ; RV64-NEXT: vcompress.vm v14, v12, v11 ; RV64-NEXT: vrgather.vi v14, v16, 2, v0.t ; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: li a0, 3 ; RV64-NEXT: slli a0, a0, 32 ; RV64-NEXT: addi a0, a0, 5 ; RV64-NEXT: slli a0, a0, 16 ; RV64-NEXT: addi a0, a0, 2 ; RV64-NEXT: vmerge.vvm v12, v16, v12, v0 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64-NEXT: vrgatherei16.vv v10, v12, v9 ; RV64-NEXT: vmv1r.v v9, v14 ; RV64-NEXT: ret %interleaved.vec = tail call <12 x i32> @llvm.vp.load.v12i32.p0(ptr %ptr, <12 x i1> splat (i1 true), i32 10) %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> %v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> %res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0 %res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1 %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2 ret {<4 x i32>, <4 x i32>, <4 x i32>} %res2 } define {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} @maskedload_factor5(ptr %ptr) { ; CHECK-LABEL: maskedload_factor5: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vlseg5e32.v v8, (a0) ; CHECK-NEXT: ret %interleaved.vec = tail call <20 x i32> @llvm.masked.load(ptr %ptr, i32 4, <20 x i1> splat (i1 true), <20 x i32> poison) %v0 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> %v1 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> %v2 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> %v3 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> %v4 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> %res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0 %res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1 %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2 %res3 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res2, <4 x i32> %v3, 3 %res4 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res3, <4 x i32> %v4, 4 ret {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res4 } define void @maskedstore_factor2(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: maskedstore_factor2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vsseg2e32.v v8, (a0) ; CHECK-NEXT: ret %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> tail call void @llvm.masked.store(<8 x i32> %interleaved.vec, ptr %ptr, i32 4, <8 x i1> splat (i1 true)) ret void }