aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp41
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll16
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll112
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll290
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll74
5 files changed, 258 insertions, 275 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 4caadef..8235b53 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -3877,6 +3877,47 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
return convertFromScalableVector(VT, Vec, DAG, Subtarget);
}
+ // For m1 vectors, if we have non-undef values in both halves of our vector,
+ // split the vector into low and high halves, build them separately, then
+ // use a vselect to combine them. For long vectors, this cuts the critical
+ // path of the vslide1down sequence in half, and gives us an opportunity
+ // to special case each half independently. Note that we don't change the
+ // length of the sub-vectors here, so if both fallback to the generic
+ // vslide1down path, we should be able to fold the vselect into the final
+ // vslidedown (for the undef tail) for the first half w/ masking.
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumUndefElts =
+ count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
+ unsigned NumDefElts = NumElts - NumUndefElts;
+ if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
+ ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) {
+ SmallVector<SDValue> SubVecAOps, SubVecBOps;
+ SmallVector<SDValue> MaskVals;
+ SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
+ SubVecAOps.reserve(NumElts);
+ SubVecBOps.reserve(NumElts);
+ for (unsigned i = 0; i < NumElts; i++) {
+ SDValue Elem = Op->getOperand(i);
+ if (i < NumElts / 2) {
+ SubVecAOps.push_back(Elem);
+ SubVecBOps.push_back(UndefElem);
+ } else {
+ SubVecAOps.push_back(UndefElem);
+ SubVecBOps.push_back(Elem);
+ }
+ bool SelectMaskVal = (i < NumElts / 2);
+ MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
+ }
+ assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
+ MaskVals.size() == NumElts);
+
+ SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
+ SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
+ SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
+ return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
+ }
+
// Cap the cost at a value linear to the number of elements in the vector.
// The default lowering is to use the stack. The vector store + scalar loads
// is linear in VL. However, at high lmuls vslide1down and vslidedown end up
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index a2bd862..8e214e4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -1399,15 +1399,17 @@ define <2 x double> @vid_step2_v2f64() {
define <8 x float> @buildvec_v8f32_zvl256(float %e0, float %e1, float %e2, float %e3, float %e4, float %e5, float %e6, float %e7) vscale_range(4, 128) {
; CHECK-LABEL: buildvec_v8f32_zvl256:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, mu
; CHECK-NEXT: vfmv.v.f v8, fa0
; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
; CHECK-NEXT: vfslide1down.vf v8, v8, fa2
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa3
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa4
+; CHECK-NEXT: vfslide1down.vf v9, v8, fa3
+; CHECK-NEXT: vfmv.v.f v8, fa4
; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
; CHECK-NEXT: vfslide1down.vf v8, v8, fa6
+; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
+; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
; CHECK-NEXT: ret
%v0 = insertelement <8 x float> poison, float %e0, i64 0
%v1 = insertelement <8 x float> %v0, float %e1, i64 1
@@ -1448,15 +1450,17 @@ define <8 x double> @buildvec_v8f64_zvl256(double %e0, double %e1, double %e2, d
define <8 x double> @buildvec_v8f64_zvl512(double %e0, double %e1, double %e2, double %e3, double %e4, double %e5, double %e6, double %e7) vscale_range(8, 128) {
; CHECK-LABEL: buildvec_v8f64_zvl512:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, mu
; CHECK-NEXT: vfmv.v.f v8, fa0
; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
; CHECK-NEXT: vfslide1down.vf v8, v8, fa2
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa3
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa4
+; CHECK-NEXT: vfslide1down.vf v9, v8, fa3
+; CHECK-NEXT: vfmv.v.f v8, fa4
; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
; CHECK-NEXT: vfslide1down.vf v8, v8, fa6
+; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
+; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
; CHECK-NEXT: ret
%v0 = insertelement <8 x double> poison, double %e0, i64 0
%v1 = insertelement <8 x double> %v0, double %e1, i64 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
index ed0b15c..85b8490 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
@@ -359,28 +359,28 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
; RV32-NEXT: feq.d a0, fa3, fa3
; RV32-NEXT: fmax.d fa3, fa3, fa5
; RV32-NEXT: fmin.d fa3, fa3, fa4
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; RV32-NEXT: fld fa2, 40(sp)
; RV32-NEXT: fcvt.w.d a2, fa3, rtz
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT: fld fa3, 32(sp)
; RV32-NEXT: neg a0, a0
; RV32-NEXT: and a0, a0, a2
-; RV32-NEXT: vslide1down.vx v8, v10, a0
-; RV32-NEXT: feq.d a0, fa3, fa3
-; RV32-NEXT: fmax.d fa3, fa3, fa5
+; RV32-NEXT: feq.d a2, fa2, fa2
+; RV32-NEXT: fmax.d fa3, fa2, fa5
; RV32-NEXT: fmin.d fa3, fa3, fa4
-; RV32-NEXT: fcvt.w.d a2, fa3, rtz
-; RV32-NEXT: fld fa3, 40(sp)
-; RV32-NEXT: neg a0, a0
-; RV32-NEXT: and a0, a0, a2
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: feq.d a0, fa3, fa3
+; RV32-NEXT: fcvt.w.d a3, fa3, rtz
+; RV32-NEXT: fld fa3, 32(sp)
+; RV32-NEXT: vslide1down.vx v8, v10, a0
+; RV32-NEXT: neg a0, a2
+; RV32-NEXT: and a0, a0, a3
+; RV32-NEXT: feq.d a2, fa3, fa3
+; RV32-NEXT: neg a2, a2
; RV32-NEXT: fmax.d fa3, fa3, fa5
; RV32-NEXT: fmin.d fa3, fa3, fa4
-; RV32-NEXT: fcvt.w.d a2, fa3, rtz
+; RV32-NEXT: fcvt.w.d a3, fa3, rtz
; RV32-NEXT: fld fa3, 48(sp)
-; RV32-NEXT: neg a0, a0
-; RV32-NEXT: and a0, a0, a2
-; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: and a2, a2, a3
+; RV32-NEXT: vmv.v.x v9, a2
+; RV32-NEXT: vslide1down.vx v9, v9, a0
; RV32-NEXT: feq.d a0, fa3, fa3
; RV32-NEXT: fmax.d fa3, fa3, fa5
; RV32-NEXT: fmin.d fa3, fa3, fa4
@@ -388,15 +388,17 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
; RV32-NEXT: fld fa3, 56(sp)
; RV32-NEXT: neg a0, a0
; RV32-NEXT: and a0, a0, a2
-; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslide1down.vx v9, v9, a0
; RV32-NEXT: feq.d a0, fa3, fa3
; RV32-NEXT: neg a0, a0
; RV32-NEXT: fmax.d fa5, fa3, fa5
; RV32-NEXT: fmin.d fa5, fa5, fa4
; RV32-NEXT: fcvt.w.d a2, fa5, rtz
; RV32-NEXT: and a0, a0, a2
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vse8.v v8, (a1)
+; RV32-NEXT: vmv.v.i v0, 15
+; RV32-NEXT: vslide1down.vx v9, v9, a0
+; RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; RV32-NEXT: vse8.v v9, (a1)
; RV32-NEXT: addi sp, s0, -128
; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
@@ -458,28 +460,28 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
; RV64-NEXT: feq.d a0, fa3, fa3
; RV64-NEXT: fmax.d fa3, fa3, fa5
; RV64-NEXT: fmin.d fa3, fa3, fa4
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; RV64-NEXT: fld fa2, 40(sp)
; RV64-NEXT: fcvt.l.d a2, fa3, rtz
-; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT: fld fa3, 32(sp)
; RV64-NEXT: neg a0, a0
; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: vslide1down.vx v8, v10, a0
-; RV64-NEXT: feq.d a0, fa3, fa3
-; RV64-NEXT: fmax.d fa3, fa3, fa5
+; RV64-NEXT: feq.d a2, fa2, fa2
+; RV64-NEXT: fmax.d fa3, fa2, fa5
; RV64-NEXT: fmin.d fa3, fa3, fa4
-; RV64-NEXT: fcvt.l.d a2, fa3, rtz
-; RV64-NEXT: fld fa3, 40(sp)
-; RV64-NEXT: neg a0, a0
-; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: vslide1down.vx v8, v8, a0
-; RV64-NEXT: feq.d a0, fa3, fa3
+; RV64-NEXT: fcvt.l.d a3, fa3, rtz
+; RV64-NEXT: fld fa3, 32(sp)
+; RV64-NEXT: vslide1down.vx v8, v10, a0
+; RV64-NEXT: neg a0, a2
+; RV64-NEXT: and a0, a0, a3
+; RV64-NEXT: feq.d a2, fa3, fa3
+; RV64-NEXT: negw a2, a2
; RV64-NEXT: fmax.d fa3, fa3, fa5
; RV64-NEXT: fmin.d fa3, fa3, fa4
-; RV64-NEXT: fcvt.l.d a2, fa3, rtz
+; RV64-NEXT: fcvt.l.d a3, fa3, rtz
; RV64-NEXT: fld fa3, 48(sp)
-; RV64-NEXT: neg a0, a0
-; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: and a2, a2, a3
+; RV64-NEXT: vmv.v.x v9, a2
+; RV64-NEXT: vslide1down.vx v9, v9, a0
; RV64-NEXT: feq.d a0, fa3, fa3
; RV64-NEXT: fmax.d fa3, fa3, fa5
; RV64-NEXT: fmin.d fa3, fa3, fa4
@@ -487,15 +489,17 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
; RV64-NEXT: fld fa3, 56(sp)
; RV64-NEXT: neg a0, a0
; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: vslide1down.vx v9, v9, a0
; RV64-NEXT: feq.d a0, fa3, fa3
; RV64-NEXT: neg a0, a0
; RV64-NEXT: fmax.d fa5, fa3, fa5
; RV64-NEXT: fmin.d fa5, fa5, fa4
; RV64-NEXT: fcvt.l.d a2, fa5, rtz
; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: vslide1down.vx v8, v8, a0
-; RV64-NEXT: vse8.v v8, (a1)
+; RV64-NEXT: vmv.v.i v0, 15
+; RV64-NEXT: vslide1down.vx v9, v9, a0
+; RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; RV64-NEXT: vse8.v v9, (a1)
; RV64-NEXT: addi sp, s0, -128
; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
@@ -553,11 +557,11 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
; RV32-NEXT: vslidedown.vi v8, v8, 3
; RV32-NEXT: vfmv.f.s fa4, v8
; RV32-NEXT: fmax.d fa4, fa4, fa3
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT: fld fa2, 32(sp)
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; RV32-NEXT: fld fa2, 40(sp)
; RV32-NEXT: fmin.d fa4, fa4, fa5
; RV32-NEXT: fcvt.wu.d a0, fa4, rtz
-; RV32-NEXT: fld fa4, 40(sp)
+; RV32-NEXT: fld fa4, 32(sp)
; RV32-NEXT: fmax.d fa2, fa2, fa3
; RV32-NEXT: fmin.d fa2, fa2, fa5
; RV32-NEXT: fcvt.wu.d a2, fa2, rtz
@@ -570,14 +574,16 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
; RV32-NEXT: fmin.d fa4, fa4, fa5
; RV32-NEXT: fcvt.wu.d a0, fa4, rtz
; RV32-NEXT: fld fa4, 56(sp)
-; RV32-NEXT: vslide1down.vx v8, v8, a2
-; RV32-NEXT: vslide1down.vx v8, v8, a3
-; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vmv.v.x v9, a3
+; RV32-NEXT: vslide1down.vx v9, v9, a2
+; RV32-NEXT: vslide1down.vx v9, v9, a0
; RV32-NEXT: fmax.d fa4, fa4, fa3
; RV32-NEXT: fmin.d fa5, fa4, fa5
; RV32-NEXT: fcvt.wu.d a0, fa5, rtz
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vse8.v v8, (a1)
+; RV32-NEXT: vmv.v.i v0, 15
+; RV32-NEXT: vslide1down.vx v9, v9, a0
+; RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; RV32-NEXT: vse8.v v9, (a1)
; RV32-NEXT: addi sp, s0, -128
; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
@@ -627,11 +633,11 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
; RV64-NEXT: vslidedown.vi v8, v8, 3
; RV64-NEXT: vfmv.f.s fa4, v8
; RV64-NEXT: fmax.d fa4, fa4, fa3
-; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT: fld fa2, 32(sp)
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; RV64-NEXT: fld fa2, 40(sp)
; RV64-NEXT: fmin.d fa4, fa4, fa5
; RV64-NEXT: fcvt.lu.d a0, fa4, rtz
-; RV64-NEXT: fld fa4, 40(sp)
+; RV64-NEXT: fld fa4, 32(sp)
; RV64-NEXT: fmax.d fa2, fa2, fa3
; RV64-NEXT: fmin.d fa2, fa2, fa5
; RV64-NEXT: fcvt.lu.d a2, fa2, rtz
@@ -644,14 +650,16 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
; RV64-NEXT: fmin.d fa4, fa4, fa5
; RV64-NEXT: fcvt.lu.d a0, fa4, rtz
; RV64-NEXT: fld fa4, 56(sp)
-; RV64-NEXT: vslide1down.vx v8, v8, a2
-; RV64-NEXT: vslide1down.vx v8, v8, a3
-; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: vmv.v.x v9, a3
+; RV64-NEXT: vslide1down.vx v9, v9, a2
+; RV64-NEXT: vslide1down.vx v9, v9, a0
; RV64-NEXT: fmax.d fa4, fa4, fa3
; RV64-NEXT: fmin.d fa5, fa4, fa5
; RV64-NEXT: fcvt.lu.d a0, fa5, rtz
-; RV64-NEXT: vslide1down.vx v8, v8, a0
-; RV64-NEXT: vse8.v v8, (a1)
+; RV64-NEXT: vmv.v.i v0, 15
+; RV64-NEXT: vslide1down.vx v9, v9, a0
+; RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; RV64-NEXT: vse8.v v9, (a1)
; RV64-NEXT: addi sp, s0, -128
; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index e691e63..ed6c01a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -1181,89 +1181,46 @@ define <8 x i64> @v8xi64_exact_undef_prefix(i64 %a, i64 %b, i64 %c, i64 %d) vsca
define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) {
-; RV32-LABEL: buildvec_v16i8_loads_contigous:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: .cfi_offset s0, -4
-; RV32-NEXT: lbu a1, 1(a0)
-; RV32-NEXT: lbu a2, 2(a0)
-; RV32-NEXT: lbu a3, 3(a0)
-; RV32-NEXT: lbu a4, 4(a0)
-; RV32-NEXT: lbu a5, 5(a0)
-; RV32-NEXT: lbu a6, 6(a0)
-; RV32-NEXT: lbu a7, 7(a0)
-; RV32-NEXT: lbu t0, 8(a0)
-; RV32-NEXT: lbu t1, 9(a0)
-; RV32-NEXT: lbu t2, 10(a0)
-; RV32-NEXT: lbu t3, 11(a0)
-; RV32-NEXT: lbu t4, 12(a0)
-; RV32-NEXT: lbu t5, 13(a0)
-; RV32-NEXT: lbu t6, 14(a0)
-; RV32-NEXT: lbu s0, 15(a0)
-; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV32-NEXT: vlse8.v v8, (a0), zero
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: vslide1down.vx v8, v8, a2
-; RV32-NEXT: vslide1down.vx v8, v8, a3
-; RV32-NEXT: vslide1down.vx v8, v8, a4
-; RV32-NEXT: vslide1down.vx v8, v8, a5
-; RV32-NEXT: vslide1down.vx v8, v8, a6
-; RV32-NEXT: vslide1down.vx v8, v8, a7
-; RV32-NEXT: vslide1down.vx v8, v8, t0
-; RV32-NEXT: vslide1down.vx v8, v8, t1
-; RV32-NEXT: vslide1down.vx v8, v8, t2
-; RV32-NEXT: vslide1down.vx v8, v8, t3
-; RV32-NEXT: vslide1down.vx v8, v8, t4
-; RV32-NEXT: vslide1down.vx v8, v8, t5
-; RV32-NEXT: vslide1down.vx v8, v8, t6
-; RV32-NEXT: vslide1down.vx v8, v8, s0
-; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
-; RV32-NEXT: ret
-;
-; RV64-LABEL: buildvec_v16i8_loads_contigous:
-; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: .cfi_def_cfa_offset 16
-; RV64-NEXT: sd s0, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT: .cfi_offset s0, -8
-; RV64-NEXT: lbu a1, 1(a0)
-; RV64-NEXT: lbu a2, 2(a0)
-; RV64-NEXT: lbu a3, 3(a0)
-; RV64-NEXT: lbu a4, 4(a0)
-; RV64-NEXT: lbu a5, 5(a0)
-; RV64-NEXT: lbu a6, 6(a0)
-; RV64-NEXT: lbu a7, 7(a0)
-; RV64-NEXT: lbu t0, 8(a0)
-; RV64-NEXT: lbu t1, 9(a0)
-; RV64-NEXT: lbu t2, 10(a0)
-; RV64-NEXT: lbu t3, 11(a0)
-; RV64-NEXT: lbu t4, 12(a0)
-; RV64-NEXT: lbu t5, 13(a0)
-; RV64-NEXT: lbu t6, 14(a0)
-; RV64-NEXT: lbu s0, 15(a0)
-; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT: vlse8.v v8, (a0), zero
-; RV64-NEXT: vslide1down.vx v8, v8, a1
-; RV64-NEXT: vslide1down.vx v8, v8, a2
-; RV64-NEXT: vslide1down.vx v8, v8, a3
-; RV64-NEXT: vslide1down.vx v8, v8, a4
-; RV64-NEXT: vslide1down.vx v8, v8, a5
-; RV64-NEXT: vslide1down.vx v8, v8, a6
-; RV64-NEXT: vslide1down.vx v8, v8, a7
-; RV64-NEXT: vslide1down.vx v8, v8, t0
-; RV64-NEXT: vslide1down.vx v8, v8, t1
-; RV64-NEXT: vslide1down.vx v8, v8, t2
-; RV64-NEXT: vslide1down.vx v8, v8, t3
-; RV64-NEXT: vslide1down.vx v8, v8, t4
-; RV64-NEXT: vslide1down.vx v8, v8, t5
-; RV64-NEXT: vslide1down.vx v8, v8, t6
-; RV64-NEXT: vslide1down.vx v8, v8, s0
-; RV64-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 16
-; RV64-NEXT: ret
+; CHECK-LABEL: buildvec_v16i8_loads_contigous:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lbu a1, 1(a0)
+; CHECK-NEXT: lbu a2, 2(a0)
+; CHECK-NEXT: lbu a3, 3(a0)
+; CHECK-NEXT: lbu a4, 4(a0)
+; CHECK-NEXT: lbu a5, 5(a0)
+; CHECK-NEXT: lbu a6, 6(a0)
+; CHECK-NEXT: lbu a7, 7(a0)
+; CHECK-NEXT: lbu t0, 9(a0)
+; CHECK-NEXT: lbu t1, 10(a0)
+; CHECK-NEXT: lbu t2, 11(a0)
+; CHECK-NEXT: lbu t3, 12(a0)
+; CHECK-NEXT: lbu t4, 13(a0)
+; CHECK-NEXT: lbu t5, 14(a0)
+; CHECK-NEXT: lbu t6, 15(a0)
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vlse8.v v8, (a0), zero
+; CHECK-NEXT: addi a0, a0, 8
+; CHECK-NEXT: vslide1down.vx v8, v8, a1
+; CHECK-NEXT: vslide1down.vx v8, v8, a2
+; CHECK-NEXT: vslide1down.vx v8, v8, a3
+; CHECK-NEXT: vslide1down.vx v8, v8, a4
+; CHECK-NEXT: vlse8.v v9, (a0), zero
+; CHECK-NEXT: vslide1down.vx v8, v8, a5
+; CHECK-NEXT: vslide1down.vx v8, v8, a6
+; CHECK-NEXT: vslide1down.vx v10, v8, a7
+; CHECK-NEXT: vslide1down.vx v8, v9, t0
+; CHECK-NEXT: vslide1down.vx v8, v8, t1
+; CHECK-NEXT: vslide1down.vx v8, v8, t2
+; CHECK-NEXT: vslide1down.vx v8, v8, t3
+; CHECK-NEXT: vslide1down.vx v8, v8, t4
+; CHECK-NEXT: vslide1down.vx v8, v8, t5
+; CHECK-NEXT: vslide1down.vx v8, v8, t6
+; CHECK-NEXT: li a0, 255
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.s.x v0, a0
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu
+; CHECK-NEXT: vslidedown.vi v8, v10, 8, v0.t
+; CHECK-NEXT: ret
%p2 = getelementptr i8, ptr %p, i32 1
%p3 = getelementptr i8, ptr %p, i32 2
%p4 = getelementptr i8, ptr %p, i32 3
@@ -1318,89 +1275,46 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) {
define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) {
-; RV32-LABEL: buildvec_v16i8_loads_gather:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: .cfi_offset s0, -4
-; RV32-NEXT: lbu a1, 1(a0)
-; RV32-NEXT: lbu a2, 22(a0)
-; RV32-NEXT: lbu a3, 31(a0)
-; RV32-NEXT: lbu a4, 44(a0)
-; RV32-NEXT: lbu a5, 55(a0)
-; RV32-NEXT: lbu a6, 623(a0)
-; RV32-NEXT: lbu a7, 75(a0)
-; RV32-NEXT: lbu t0, 82(a0)
-; RV32-NEXT: lbu t1, 93(a0)
-; RV32-NEXT: lbu t2, 105(a0)
-; RV32-NEXT: lbu t3, 161(a0)
-; RV32-NEXT: lbu t4, 124(a0)
-; RV32-NEXT: lbu t5, 163(a0)
-; RV32-NEXT: lbu t6, 144(a0)
-; RV32-NEXT: lbu s0, 154(a0)
-; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV32-NEXT: vlse8.v v8, (a0), zero
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: vslide1down.vx v8, v8, a2
-; RV32-NEXT: vslide1down.vx v8, v8, a3
-; RV32-NEXT: vslide1down.vx v8, v8, a4
-; RV32-NEXT: vslide1down.vx v8, v8, a5
-; RV32-NEXT: vslide1down.vx v8, v8, a6
-; RV32-NEXT: vslide1down.vx v8, v8, a7
-; RV32-NEXT: vslide1down.vx v8, v8, t0
-; RV32-NEXT: vslide1down.vx v8, v8, t1
-; RV32-NEXT: vslide1down.vx v8, v8, t2
-; RV32-NEXT: vslide1down.vx v8, v8, t3
-; RV32-NEXT: vslide1down.vx v8, v8, t4
-; RV32-NEXT: vslide1down.vx v8, v8, t5
-; RV32-NEXT: vslide1down.vx v8, v8, t6
-; RV32-NEXT: vslide1down.vx v8, v8, s0
-; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
-; RV32-NEXT: ret
-;
-; RV64-LABEL: buildvec_v16i8_loads_gather:
-; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: .cfi_def_cfa_offset 16
-; RV64-NEXT: sd s0, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT: .cfi_offset s0, -8
-; RV64-NEXT: lbu a1, 1(a0)
-; RV64-NEXT: lbu a2, 22(a0)
-; RV64-NEXT: lbu a3, 31(a0)
-; RV64-NEXT: lbu a4, 44(a0)
-; RV64-NEXT: lbu a5, 55(a0)
-; RV64-NEXT: lbu a6, 623(a0)
-; RV64-NEXT: lbu a7, 75(a0)
-; RV64-NEXT: lbu t0, 82(a0)
-; RV64-NEXT: lbu t1, 93(a0)
-; RV64-NEXT: lbu t2, 105(a0)
-; RV64-NEXT: lbu t3, 161(a0)
-; RV64-NEXT: lbu t4, 124(a0)
-; RV64-NEXT: lbu t5, 163(a0)
-; RV64-NEXT: lbu t6, 144(a0)
-; RV64-NEXT: lbu s0, 154(a0)
-; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT: vlse8.v v8, (a0), zero
-; RV64-NEXT: vslide1down.vx v8, v8, a1
-; RV64-NEXT: vslide1down.vx v8, v8, a2
-; RV64-NEXT: vslide1down.vx v8, v8, a3
-; RV64-NEXT: vslide1down.vx v8, v8, a4
-; RV64-NEXT: vslide1down.vx v8, v8, a5
-; RV64-NEXT: vslide1down.vx v8, v8, a6
-; RV64-NEXT: vslide1down.vx v8, v8, a7
-; RV64-NEXT: vslide1down.vx v8, v8, t0
-; RV64-NEXT: vslide1down.vx v8, v8, t1
-; RV64-NEXT: vslide1down.vx v8, v8, t2
-; RV64-NEXT: vslide1down.vx v8, v8, t3
-; RV64-NEXT: vslide1down.vx v8, v8, t4
-; RV64-NEXT: vslide1down.vx v8, v8, t5
-; RV64-NEXT: vslide1down.vx v8, v8, t6
-; RV64-NEXT: vslide1down.vx v8, v8, s0
-; RV64-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 16
-; RV64-NEXT: ret
+; CHECK-LABEL: buildvec_v16i8_loads_gather:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lbu a1, 1(a0)
+; CHECK-NEXT: lbu a2, 22(a0)
+; CHECK-NEXT: lbu a3, 31(a0)
+; CHECK-NEXT: lbu a4, 44(a0)
+; CHECK-NEXT: lbu a5, 55(a0)
+; CHECK-NEXT: lbu a6, 623(a0)
+; CHECK-NEXT: lbu a7, 75(a0)
+; CHECK-NEXT: lbu t0, 93(a0)
+; CHECK-NEXT: lbu t1, 105(a0)
+; CHECK-NEXT: lbu t2, 161(a0)
+; CHECK-NEXT: lbu t3, 124(a0)
+; CHECK-NEXT: lbu t4, 163(a0)
+; CHECK-NEXT: lbu t5, 144(a0)
+; CHECK-NEXT: lbu t6, 154(a0)
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vlse8.v v8, (a0), zero
+; CHECK-NEXT: addi a0, a0, 82
+; CHECK-NEXT: vslide1down.vx v8, v8, a1
+; CHECK-NEXT: vslide1down.vx v8, v8, a2
+; CHECK-NEXT: vslide1down.vx v8, v8, a3
+; CHECK-NEXT: vslide1down.vx v8, v8, a4
+; CHECK-NEXT: vlse8.v v9, (a0), zero
+; CHECK-NEXT: vslide1down.vx v8, v8, a5
+; CHECK-NEXT: vslide1down.vx v8, v8, a6
+; CHECK-NEXT: vslide1down.vx v10, v8, a7
+; CHECK-NEXT: vslide1down.vx v8, v9, t0
+; CHECK-NEXT: vslide1down.vx v8, v8, t1
+; CHECK-NEXT: vslide1down.vx v8, v8, t2
+; CHECK-NEXT: vslide1down.vx v8, v8, t3
+; CHECK-NEXT: vslide1down.vx v8, v8, t4
+; CHECK-NEXT: vslide1down.vx v8, v8, t5
+; CHECK-NEXT: vslide1down.vx v8, v8, t6
+; CHECK-NEXT: li a0, 255
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.s.x v0, a0
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu
+; CHECK-NEXT: vslidedown.vi v8, v10, 8, v0.t
+; CHECK-NEXT: ret
%p2 = getelementptr i8, ptr %p, i32 1
%p3 = getelementptr i8, ptr %p, i32 22
%p4 = getelementptr i8, ptr %p, i32 31
@@ -1560,21 +1474,26 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) {
; CHECK-NEXT: lbu a3, 55(a0)
; CHECK-NEXT: lbu a4, 623(a0)
; CHECK-NEXT: lbu a5, 75(a0)
-; CHECK-NEXT: lbu a6, 82(a0)
-; CHECK-NEXT: lbu a7, 93(a0)
-; CHECK-NEXT: lbu t0, 105(a0)
-; CHECK-NEXT: lbu a0, 161(a0)
+; CHECK-NEXT: lbu a6, 93(a0)
+; CHECK-NEXT: lbu a7, 105(a0)
+; CHECK-NEXT: lbu t0, 161(a0)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vlse8.v v8, (a1), zero
+; CHECK-NEXT: addi a0, a0, 82
; CHECK-NEXT: vslide1down.vx v8, v8, a2
+; CHECK-NEXT: vlse8.v v9, (a0), zero
; CHECK-NEXT: vslide1down.vx v8, v8, a3
; CHECK-NEXT: vslide1down.vx v8, v8, a4
-; CHECK-NEXT: vslide1down.vx v8, v8, a5
-; CHECK-NEXT: vslide1down.vx v8, v8, a6
+; CHECK-NEXT: vslide1down.vx v10, v8, a5
+; CHECK-NEXT: vslide1down.vx v8, v9, a6
; CHECK-NEXT: vslide1down.vx v8, v8, a7
; CHECK-NEXT: vslide1down.vx v8, v8, t0
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
; CHECK-NEXT: vslidedown.vi v8, v8, 4
+; CHECK-NEXT: li a0, 255
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.s.x v0, a0
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu
+; CHECK-NEXT: vslidedown.vi v8, v10, 8, v0.t
; CHECK-NEXT: ret
%p4 = getelementptr i8, ptr %p, i32 31
%p5 = getelementptr i8, ptr %p, i32 44
@@ -1615,26 +1534,31 @@ define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) {
; CHECK-NEXT: lbu a2, 44(a0)
; CHECK-NEXT: lbu a3, 55(a0)
; CHECK-NEXT: lbu a4, 75(a0)
-; CHECK-NEXT: lbu a5, 82(a0)
-; CHECK-NEXT: lbu a6, 93(a0)
-; CHECK-NEXT: lbu a7, 124(a0)
-; CHECK-NEXT: lbu t0, 144(a0)
-; CHECK-NEXT: lbu t1, 154(a0)
+; CHECK-NEXT: lbu a5, 93(a0)
+; CHECK-NEXT: lbu a6, 124(a0)
+; CHECK-NEXT: lbu a7, 144(a0)
+; CHECK-NEXT: lbu t0, 154(a0)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vlse8.v v8, (a0), zero
+; CHECK-NEXT: addi a0, a0, 82
; CHECK-NEXT: vslide1down.vx v8, v8, a1
; CHECK-NEXT: vslidedown.vi v8, v8, 2
; CHECK-NEXT: vslide1down.vx v8, v8, a2
+; CHECK-NEXT: vlse8.v v9, (a0), zero
; CHECK-NEXT: vslide1down.vx v8, v8, a3
; CHECK-NEXT: vslidedown.vi v8, v8, 1
-; CHECK-NEXT: vslide1down.vx v8, v8, a4
-; CHECK-NEXT: vslide1down.vx v8, v8, a5
-; CHECK-NEXT: vslide1down.vx v8, v8, a6
+; CHECK-NEXT: vslide1down.vx v10, v8, a4
+; CHECK-NEXT: vslide1down.vx v8, v9, a5
; CHECK-NEXT: vslidedown.vi v8, v8, 2
-; CHECK-NEXT: vslide1down.vx v8, v8, a7
+; CHECK-NEXT: vslide1down.vx v8, v8, a6
; CHECK-NEXT: vslidedown.vi v8, v8, 1
+; CHECK-NEXT: vslide1down.vx v8, v8, a7
; CHECK-NEXT: vslide1down.vx v8, v8, t0
-; CHECK-NEXT: vslide1down.vx v8, v8, t1
+; CHECK-NEXT: li a0, 255
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.s.x v0, a0
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu
+; CHECK-NEXT: vslidedown.vi v8, v10, 8, v0.t
; CHECK-NEXT: ret
%p2 = getelementptr i8, ptr %p, i32 1
%p3 = getelementptr i8, ptr %p, i32 22
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
index dd0fc5a..c295fed 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
@@ -278,32 +278,34 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1(i1 %x, i1 %y) {
define <8 x i1> @buildvec_mask_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) {
; CHECK-LABEL: buildvec_mask_nonconst_v8i1_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
-; CHECK-NEXT: li a4, 1
-; CHECK-NEXT: vslide1down.vx v8, v8, a4
-; CHECK-NEXT: vslide1down.vx v8, v8, a1
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vslide1down.vx v9, v8, a0
+; CHECK-NEXT: li a0, 1
+; CHECK-NEXT: vslide1down.vx v9, v9, a0
+; CHECK-NEXT: vslide1down.vx v9, v9, a1
; CHECK-NEXT: vslide1down.vx v8, v8, a3
; CHECK-NEXT: vslide1down.vx v8, v8, zero
+; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vslide1down.vx v8, v8, a2
+; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
;
; ZVE32F-LABEL: buildvec_mask_nonconst_v8i1_2:
; ZVE32F: # %bb.0:
-; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; ZVE32F-NEXT: vmv.v.x v8, a0
-; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
-; ZVE32F-NEXT: li a4, 1
-; ZVE32F-NEXT: vslide1down.vx v8, v8, a4
-; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
-; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT: vslide1down.vx v9, v8, a0
+; ZVE32F-NEXT: li a0, 1
+; ZVE32F-NEXT: vslide1down.vx v9, v9, a0
+; ZVE32F-NEXT: vslide1down.vx v9, v9, a1
; ZVE32F-NEXT: vslide1down.vx v8, v8, a3
; ZVE32F-NEXT: vslide1down.vx v8, v8, zero
+; ZVE32F-NEXT: vmv.v.i v0, 15
; ZVE32F-NEXT: vslide1down.vx v8, v8, a2
+; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
; ZVE32F-NEXT: vand.vi v8, v8, 1
; ZVE32F-NEXT: vmsne.vi v0, v8, 0
; ZVE32F-NEXT: ret
@@ -321,32 +323,34 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) {
define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) optsize {
; CHECK-LABEL: buildvec_mask_optsize_nonconst_v8i1_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
-; CHECK-NEXT: li a4, 1
-; CHECK-NEXT: vslide1down.vx v8, v8, a4
-; CHECK-NEXT: vslide1down.vx v8, v8, a1
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vslide1down.vx v9, v8, a0
+; CHECK-NEXT: li a0, 1
+; CHECK-NEXT: vslide1down.vx v9, v9, a0
+; CHECK-NEXT: vslide1down.vx v9, v9, a1
; CHECK-NEXT: vslide1down.vx v8, v8, a3
; CHECK-NEXT: vslide1down.vx v8, v8, zero
+; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vslide1down.vx v8, v8, a2
+; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
;
; ZVE32F-LABEL: buildvec_mask_optsize_nonconst_v8i1_2:
; ZVE32F: # %bb.0:
-; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; ZVE32F-NEXT: vmv.v.x v8, a0
-; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
-; ZVE32F-NEXT: li a4, 1
-; ZVE32F-NEXT: vslide1down.vx v8, v8, a4
-; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
-; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT: vslide1down.vx v9, v8, a0
+; ZVE32F-NEXT: li a0, 1
+; ZVE32F-NEXT: vslide1down.vx v9, v9, a0
+; ZVE32F-NEXT: vslide1down.vx v9, v9, a1
; ZVE32F-NEXT: vslide1down.vx v8, v8, a3
; ZVE32F-NEXT: vslide1down.vx v8, v8, zero
+; ZVE32F-NEXT: vmv.v.i v0, 15
; ZVE32F-NEXT: vslide1down.vx v8, v8, a2
+; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
; ZVE32F-NEXT: vand.vi v8, v8, 1
; ZVE32F-NEXT: vmsne.vi v0, v8, 0
; ZVE32F-NEXT: ret
@@ -364,30 +368,32 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %
define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1(i1 %x, i1 %y) optsize {
; CHECK-LABEL: buildvec_mask_optsize_nonconst_v8i1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
-; CHECK-NEXT: vslide1down.vx v8, v8, a1
-; CHECK-NEXT: vslide1down.vx v8, v8, a1
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vslide1down.vx v9, v8, a0
+; CHECK-NEXT: vslide1down.vx v9, v9, a1
+; CHECK-NEXT: vslide1down.vx v9, v9, a1
; CHECK-NEXT: vslide1down.vx v8, v8, a1
; CHECK-NEXT: vslide1down.vx v8, v8, a1
+; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vslide1down.vx v8, v8, a1
+; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
;
; ZVE32F-LABEL: buildvec_mask_optsize_nonconst_v8i1:
; ZVE32F: # %bb.0:
-; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; ZVE32F-NEXT: vmv.v.x v8, a0
-; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
-; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
-; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
-; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT: vslide1down.vx v9, v8, a0
+; ZVE32F-NEXT: vslide1down.vx v9, v9, a1
+; ZVE32F-NEXT: vslide1down.vx v9, v9, a1
; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
+; ZVE32F-NEXT: vmv.v.i v0, 15
; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
+; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
; ZVE32F-NEXT: vand.vi v8, v8, 1
; ZVE32F-NEXT: vmsne.vi v0, v8, 0
; ZVE32F-NEXT: ret