aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBjorn Pettersson <bjorn.a.pettersson@ericsson.com>2025-06-17 11:16:32 +0200
committerBjorn Pettersson <bjorn.a.pettersson@ericsson.com>2025-06-17 13:15:07 +0200
commitef9547df04cbf0eb7e756a790f90a64704b57975 (patch)
tree40e6d81a8207a07a773511b3d0b538cf155c85f3
parent01f9dff61fb028f69493a44616014256dee5fb2a (diff)
downloadllvm-users/bjope/prepare_1.zip
llvm-users/bjope/prepare_1.tar.gz
llvm-users/bjope/prepare_1.tar.bz2
[RISCV] Fold (add (srl x, n), (srl x, n)) into (srl x, n-1)users/bjope/prepare_1
This patch adds a new fold that will turn (add (srl x, n), (srl x, n)) into (srl x, n-1) when bit n-1 is known to be zero in x. This could perhaps be moved to generic DAGCombiner in the future, but this patch adds it as a RISCV specific combine. For RISCV it typically trigger for DAG nodes like this that may be created by the legalizer: t1: i32 = srl RISCVISD::READ_VLENB:i32, Constant:i32<2> t2: i32 = add t1, t1 Got the idea when working on a solution for #141034, as it may avoid some regressions otherwise caused by the fix being prepared for that issue.
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp30
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll4
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll19
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll283
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll12
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll9
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll3
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll2
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll194
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll18
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll9
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll817
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll12
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll100
14 files changed, 705 insertions, 807 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 7cfada6..046bc11 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -15240,12 +15240,42 @@ static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG) {
N0.getOperand(0));
}
+// Try to turn (add (srl x, n), (srl x, n)) into (srl x, n-1).
+//
+// This combine could perhaps be moved to DAGCombiner. For RISCV this kind of
+// pattern seem to appear in situations when x is READ_VLENB, which matches with
+// the condition that the lsb of x need to be zero.
+static SDValue combineAddSrl(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // Match (add (srl x, n), (srl x, n)).
+ if (N0 != N1 || N0.getOpcode() != ISD::SRL)
+ return SDValue();
+
+ // Need a srl that has constant shift amount of at least 1.
+ std::optional<uint64_t> ShAmt = DAG.getValidShiftAmount(N0);
+ if (!ShAmt || *ShAmt == 0)
+ return SDValue();
+
+ // Last bit shifted out by srl should be known zero.
+ if (!DAG.computeKnownBits(N0.getOperand(0)).Zero[*ShAmt - 1])
+ return SDValue();
+
+ SDValue NewAmt = DAG.getShiftAmountConstant(*ShAmt - 1, VT, DL);
+ return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), NewAmt);
+}
+
static SDValue performADDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const RISCVSubtarget &Subtarget) {
SelectionDAG &DAG = DCI.DAG;
if (SDValue V = combineAddOfBooleanXor(N, DAG))
return V;
+ if (SDValue V = combineAddSrl(N, DAG))
+ return V;
if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
return V;
if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
index 83637e4..0d288c0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
@@ -490,8 +490,6 @@ define <vscale x 6 x half> @extract_nxv6f16_nxv12f16_6(<vscale x 12 x half> %in)
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v13, v10, a0
; CHECK-NEXT: vslidedown.vx v12, v9, a0
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v12, v10, a0
; CHECK-NEXT: vmv2r.v v8, v12
; CHECK-NEXT: ret
@@ -545,8 +543,6 @@ define <vscale x 6 x bfloat> @extract_nxv6bf16_nxv12bf16_6(<vscale x 12 x bfloat
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v13, v10, a0
; CHECK-NEXT: vslidedown.vx v12, v9, a0
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v12, v10, a0
; CHECK-NEXT: vmv2r.v v8, v12
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
index ca9cec9..d643e8e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
@@ -246,8 +246,7 @@ define <vscale x 16 x i32> @insert_nxv16i32_nxv1i32_1(<vscale x 16 x i32> %vec,
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v16, a0
; CHECK-NEXT: ret
%v = call <vscale x 16 x i32> @llvm.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec, i64 1)
@@ -282,8 +281,8 @@ define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_1(<vscale x 16 x i8> %vec, <vsc
; CHECK-LABEL: insert_nxv16i8_nxv1i8_1:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a1, a0, 2
; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: add a1, a0, a0
; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, ma
; CHECK-NEXT: vslideup.vx v8, v10, a0
; CHECK-NEXT: ret
@@ -363,8 +362,7 @@ define <vscale x 32 x half> @insert_nxv32f16_nxv2f16_2(<vscale x 32 x half> %vec
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v16, a0
; CHECK-NEXT: ret
%v = call <vscale x 32 x half> @llvm.vector.insert.nxv2f16.nxv32f16(<vscale x 32 x half> %vec, <vscale x 2 x half> %subvec, i64 2)
@@ -376,8 +374,7 @@ define <vscale x 32 x half> @insert_nxv32f16_nxv2f16_26(<vscale x 32 x half> %ve
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v14, v16, a0
; CHECK-NEXT: ret
%v = call <vscale x 32 x half> @llvm.vector.insert.nxv2f16.nxv32f16(<vscale x 32 x half> %vec, <vscale x 2 x half> %subvec, i64 26)
@@ -422,8 +419,8 @@ define <vscale x 32 x i1> @insert_nxv32i1_nxv8i1_8(<vscale x 32 x i1> %v, <vscal
; CHECK-LABEL: insert_nxv32i1_nxv8i1_8:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a1, a0, 2
; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: add a1, a0, a0
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vx v0, v8, a0
; CHECK-NEXT: ret
@@ -570,8 +567,7 @@ define <vscale x 32 x bfloat> @insert_nxv32bf16_nxv2bf16_2(<vscale x 32 x bfloat
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v16, a0
; CHECK-NEXT: ret
%v = call <vscale x 32 x bfloat> @llvm.vector.insert.nxv2bf16.nxv32bf16(<vscale x 32 x bfloat> %vec, <vscale x 2 x bfloat> %subvec, i64 2)
@@ -583,8 +579,7 @@ define <vscale x 32 x bfloat> @insert_nxv32bf16_nxv2bf16_26(<vscale x 32 x bfloa
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v14, v16, a0
; CHECK-NEXT: ret
%v = call <vscale x 32 x bfloat> @llvm.vector.insert.nxv2bf16.nxv32bf16(<vscale x 32 x bfloat> %vec, <vscale x 2 x bfloat> %subvec, i64 26)
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
index 28b27bb..9972df9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
@@ -1371,6 +1371,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: mv a3, a1
; CHECK-NEXT: slli a1, a1, 2
+; CHECK-NEXT: add a3, a3, a1
+; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: add a1, a1, a3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
@@ -1378,9 +1380,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vmv8r.v v0, v16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: mv a3, a1
; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: add a3, a3, a1
+; CHECK-NEXT: mv a3, a1
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, a1, a3
; CHECK-NEXT: add a1, sp, a1
@@ -1406,6 +1407,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
; CHECK-NEXT: slli t0, t0, 1
; CHECK-NEXT: mv t1, t0
; CHECK-NEXT: slli t0, t0, 2
+; CHECK-NEXT: add t1, t1, t0
+; CHECK-NEXT: slli t0, t0, 1
; CHECK-NEXT: add t0, t0, t1
; CHECK-NEXT: add t0, sp, t0
; CHECK-NEXT: addi t0, t0, 16
@@ -1413,9 +1416,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
; CHECK-NEXT: vslidedown.vx v16, v8, a1
; CHECK-NEXT: vl8re16.v v8, (a0)
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: mv t0, a0
; CHECK-NEXT: slli a0, a0, 1
-; CHECK-NEXT: add t0, t0, a0
+; CHECK-NEXT: mv t0, a0
; CHECK-NEXT: slli a0, a0, 2
; CHECK-NEXT: add a0, a0, t0
; CHECK-NEXT: add a0, sp, a0
@@ -1445,10 +1447,6 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
; CHECK-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; CHECK-NEXT: vmfeq.vv v5, v8, v16, v0.t
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
; CHECK-NEXT: vsetvli zero, a6, e16, m4, ta, ma
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
; CHECK-NEXT: csrr a0, vlenb
@@ -1457,85 +1455,95 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vmfeq.vv v6, v24, v16, v0.t
-; CHECK-NEXT: add a0, a3, a3
+; CHECK-NEXT: vmfeq.vv v7, v24, v16, v0.t
; CHECK-NEXT: bltu a2, a5, .LBB85_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: mv a2, a5
; CHECK-NEXT: .LBB85_4:
-; CHECK-NEXT: sub a5, a2, a4
-; CHECK-NEXT: csrr a6, vlenb
-; CHECK-NEXT: slli a6, a6, 1
-; CHECK-NEXT: mv a7, a6
-; CHECK-NEXT: slli a6, a6, 2
-; CHECK-NEXT: add a6, a6, a7
-; CHECK-NEXT: add a6, sp, a6
-; CHECK-NEXT: addi a6, a6, 16
-; CHECK-NEXT: vl1r.v v7, (a6) # vscale x 8-byte Folded Reload
-; CHECK-NEXT: vsetvli a6, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v7, a3
-; CHECK-NEXT: sltu a6, a2, a5
-; CHECK-NEXT: addi a6, a6, -1
-; CHECK-NEXT: and a5, a6, a5
-; CHECK-NEXT: csrr a6, vlenb
-; CHECK-NEXT: mv a7, a6
-; CHECK-NEXT: slli a6, a6, 1
-; CHECK-NEXT: add a7, a7, a6
-; CHECK-NEXT: slli a6, a6, 3
-; CHECK-NEXT: add a6, a6, a7
-; CHECK-NEXT: add a6, sp, a6
-; CHECK-NEXT: addi a6, a6, 16
-; CHECK-NEXT: vl8r.v v24, (a6) # vscale x 64-byte Folded Reload
-; CHECK-NEXT: vsetvli zero, a5, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28
+; CHECK-NEXT: sub a0, a2, a4
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: mv a6, a5
; CHECK-NEXT: slli a5, a5, 1
-; CHECK-NEXT: add a6, a6, a5
+; CHECK-NEXT: mv a6, a5
; CHECK-NEXT: slli a5, a5, 2
+; CHECK-NEXT: add a6, a6, a5
+; CHECK-NEXT: slli a5, a5, 1
+; CHECK-NEXT: add a5, a5, a6
+; CHECK-NEXT: add a5, sp, a5
+; CHECK-NEXT: addi a5, a5, 16
+; CHECK-NEXT: vl1r.v v8, (a5) # vscale x 8-byte Folded Reload
+; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v8, a3
+; CHECK-NEXT: sltu a5, a2, a0
+; CHECK-NEXT: addi a5, a5, -1
+; CHECK-NEXT: and a0, a5, a0
+; CHECK-NEXT: csrr a5, vlenb
+; CHECK-NEXT: slli a5, a5, 1
+; CHECK-NEXT: mv a6, a5
+; CHECK-NEXT: slli a5, a5, 3
; CHECK-NEXT: add a5, a5, a6
; CHECK-NEXT: add a5, sp, a5
; CHECK-NEXT: addi a5, a5, 16
-; CHECK-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
+; CHECK-NEXT: vl8r.v v24, (a5) # vscale x 64-byte Folded Reload
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: mv a5, a0
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: add a0, a0, a5
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vmfeq.vv v4, v16, v24, v0.t
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v6, v5, a3
+; CHECK-NEXT: vmfeq.vv v10, v16, v24, v0.t
+; CHECK-NEXT: vmv1r.v v9, v7
+; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v9, v5, a3
; CHECK-NEXT: bltu a2, a4, .LBB85_6
; CHECK-NEXT: # %bb.5:
; CHECK-NEXT: mv a2, a4
; CHECK-NEXT: .LBB85_6:
-; CHECK-NEXT: vmv1r.v v0, v7
-; CHECK-NEXT: csrr a4, vlenb
-; CHECK-NEXT: mv a5, a4
-; CHECK-NEXT: slli a4, a4, 1
-; CHECK-NEXT: add a5, a5, a4
-; CHECK-NEXT: slli a4, a4, 3
-; CHECK-NEXT: add a4, a4, a5
-; CHECK-NEXT: add a4, sp, a4
-; CHECK-NEXT: addi a4, a4, 16
-; CHECK-NEXT: vl8r.v v24, (a4) # vscale x 64-byte Folded Reload
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: mv a4, a0
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, a0, a4
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: mv a4, a2
-; CHECK-NEXT: slli a2, a2, 1
-; CHECK-NEXT: add a4, a4, a2
-; CHECK-NEXT: slli a2, a2, 2
-; CHECK-NEXT: add a2, a2, a4
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: add a0, a0, a2
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: add a2, a2, a0
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: add a0, a0, a2
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; CHECK-NEXT: vmfeq.vv v8, v16, v24, v0.t
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v4, a3
-; CHECK-NEXT: add a0, a1, a1
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v6, a1
+; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v10, a3
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v9, a1
; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: mv a1, a0
@@ -3546,8 +3554,7 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFH-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; ZVFH-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; ZVFH-NEXT: vmfeq.vv v16, v24, v8, v0.t
-; ZVFH-NEXT: add a0, a1, a1
-; ZVFH-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; ZVFH-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; ZVFH-NEXT: vslideup.vx v16, v6, a1
; ZVFH-NEXT: vmv.v.v v0, v16
; ZVFH-NEXT: csrr a0, vlenb
@@ -3576,6 +3583,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: slli a1, a1, 1
; ZVFHMIN-NEXT: mv a3, a1
; ZVFHMIN-NEXT: slli a1, a1, 2
+; ZVFHMIN-NEXT: add a3, a3, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
; ZVFHMIN-NEXT: add a1, a1, a3
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
@@ -3583,9 +3592,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; ZVFHMIN-NEXT: vmv8r.v v0, v16
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: mv a3, a1
; ZVFHMIN-NEXT: slli a1, a1, 1
-; ZVFHMIN-NEXT: add a3, a3, a1
+; ZVFHMIN-NEXT: mv a3, a1
; ZVFHMIN-NEXT: slli a1, a1, 3
; ZVFHMIN-NEXT: add a1, a1, a3
; ZVFHMIN-NEXT: add a1, sp, a1
@@ -3611,6 +3619,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: slli t0, t0, 1
; ZVFHMIN-NEXT: mv t1, t0
; ZVFHMIN-NEXT: slli t0, t0, 2
+; ZVFHMIN-NEXT: add t1, t1, t0
+; ZVFHMIN-NEXT: slli t0, t0, 1
; ZVFHMIN-NEXT: add t0, t0, t1
; ZVFHMIN-NEXT: add t0, sp, t0
; ZVFHMIN-NEXT: addi t0, t0, 16
@@ -3618,9 +3628,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: vslidedown.vx v16, v8, a1
; ZVFHMIN-NEXT: vl8re16.v v8, (a0)
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: mv t0, a0
; ZVFHMIN-NEXT: slli a0, a0, 1
-; ZVFHMIN-NEXT: add t0, t0, a0
+; ZVFHMIN-NEXT: mv t0, a0
; ZVFHMIN-NEXT: slli a0, a0, 2
; ZVFHMIN-NEXT: add a0, a0, t0
; ZVFHMIN-NEXT: add a0, sp, a0
@@ -3650,10 +3659,6 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v5, v8, v16, v0.t
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: add a0, sp, a0
-; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, a6, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
; ZVFHMIN-NEXT: csrr a0, vlenb
@@ -3662,85 +3667,95 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v6, v24, v16, v0.t
-; ZVFHMIN-NEXT: add a0, a3, a3
+; ZVFHMIN-NEXT: vmfeq.vv v7, v24, v16, v0.t
; ZVFHMIN-NEXT: bltu a2, a5, .LBB171_4
; ZVFHMIN-NEXT: # %bb.3:
; ZVFHMIN-NEXT: mv a2, a5
; ZVFHMIN-NEXT: .LBB171_4:
-; ZVFHMIN-NEXT: sub a5, a2, a4
-; ZVFHMIN-NEXT: csrr a6, vlenb
-; ZVFHMIN-NEXT: slli a6, a6, 1
-; ZVFHMIN-NEXT: mv a7, a6
-; ZVFHMIN-NEXT: slli a6, a6, 2
-; ZVFHMIN-NEXT: add a6, a6, a7
-; ZVFHMIN-NEXT: add a6, sp, a6
-; ZVFHMIN-NEXT: addi a6, a6, 16
-; ZVFHMIN-NEXT: vl1r.v v7, (a6) # vscale x 8-byte Folded Reload
-; ZVFHMIN-NEXT: vsetvli a6, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT: vslidedown.vx v0, v7, a3
-; ZVFHMIN-NEXT: sltu a6, a2, a5
-; ZVFHMIN-NEXT: addi a6, a6, -1
-; ZVFHMIN-NEXT: and a5, a6, a5
-; ZVFHMIN-NEXT: csrr a6, vlenb
-; ZVFHMIN-NEXT: mv a7, a6
-; ZVFHMIN-NEXT: slli a6, a6, 1
-; ZVFHMIN-NEXT: add a7, a7, a6
-; ZVFHMIN-NEXT: slli a6, a6, 3
-; ZVFHMIN-NEXT: add a6, a6, a7
-; ZVFHMIN-NEXT: add a6, sp, a6
-; ZVFHMIN-NEXT: addi a6, a6, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a6) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT: vsetvli zero, a5, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28
+; ZVFHMIN-NEXT: sub a0, a2, a4
; ZVFHMIN-NEXT: csrr a5, vlenb
-; ZVFHMIN-NEXT: mv a6, a5
; ZVFHMIN-NEXT: slli a5, a5, 1
-; ZVFHMIN-NEXT: add a6, a6, a5
+; ZVFHMIN-NEXT: mv a6, a5
; ZVFHMIN-NEXT: slli a5, a5, 2
+; ZVFHMIN-NEXT: add a6, a6, a5
+; ZVFHMIN-NEXT: slli a5, a5, 1
+; ZVFHMIN-NEXT: add a5, a5, a6
+; ZVFHMIN-NEXT: add a5, sp, a5
+; ZVFHMIN-NEXT: addi a5, a5, 16
+; ZVFHMIN-NEXT: vl1r.v v8, (a5) # vscale x 8-byte Folded Reload
+; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma
+; ZVFHMIN-NEXT: vslidedown.vx v0, v8, a3
+; ZVFHMIN-NEXT: sltu a5, a2, a0
+; ZVFHMIN-NEXT: addi a5, a5, -1
+; ZVFHMIN-NEXT: and a0, a5, a0
+; ZVFHMIN-NEXT: csrr a5, vlenb
+; ZVFHMIN-NEXT: slli a5, a5, 1
+; ZVFHMIN-NEXT: mv a6, a5
+; ZVFHMIN-NEXT: slli a5, a5, 3
; ZVFHMIN-NEXT: add a5, a5, a6
; ZVFHMIN-NEXT: add a5, sp, a5
; ZVFHMIN-NEXT: addi a5, a5, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v24, (a5) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: mv a5, a0
+; ZVFHMIN-NEXT: slli a0, a0, 2
+; ZVFHMIN-NEXT: add a0, a0, a5
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v4, v16, v24, v0.t
-; ZVFHMIN-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; ZVFHMIN-NEXT: vslideup.vx v6, v5, a3
+; ZVFHMIN-NEXT: vmfeq.vv v10, v16, v24, v0.t
+; ZVFHMIN-NEXT: vmv1r.v v9, v7
+; ZVFHMIN-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; ZVFHMIN-NEXT: vslideup.vx v9, v5, a3
; ZVFHMIN-NEXT: bltu a2, a4, .LBB171_6
; ZVFHMIN-NEXT: # %bb.5:
; ZVFHMIN-NEXT: mv a2, a4
; ZVFHMIN-NEXT: .LBB171_6:
-; ZVFHMIN-NEXT: vmv1r.v v0, v7
-; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: mv a5, a4
-; ZVFHMIN-NEXT: slli a4, a4, 1
-; ZVFHMIN-NEXT: add a5, a5, a4
-; ZVFHMIN-NEXT: slli a4, a4, 3
-; ZVFHMIN-NEXT: add a4, a4, a5
-; ZVFHMIN-NEXT: add a4, sp, a4
-; ZVFHMIN-NEXT: addi a4, a4, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a4) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: mv a4, a0
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add a0, a0, a4
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
-; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: mv a4, a2
-; ZVFHMIN-NEXT: slli a2, a2, 1
-; ZVFHMIN-NEXT: add a4, a4, a2
-; ZVFHMIN-NEXT: slli a2, a2, 2
-; ZVFHMIN-NEXT: add a2, a2, a4
-; ZVFHMIN-NEXT: add a2, sp, a2
-; ZVFHMIN-NEXT: addi a2, a2, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: mv a2, a0
+; ZVFHMIN-NEXT: slli a0, a0, 2
+; ZVFHMIN-NEXT: add a0, a0, a2
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: mv a2, a0
+; ZVFHMIN-NEXT: slli a0, a0, 2
+; ZVFHMIN-NEXT: add a2, a2, a0
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: add a0, a0, a2
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v24, v0.t
-; ZVFHMIN-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; ZVFHMIN-NEXT: vslideup.vx v8, v4, a3
-; ZVFHMIN-NEXT: add a0, a1, a1
-; ZVFHMIN-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; ZVFHMIN-NEXT: vslideup.vx v8, v6, a1
+; ZVFHMIN-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; ZVFHMIN-NEXT: vslideup.vx v8, v10, a3
+; ZVFHMIN-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; ZVFHMIN-NEXT: vslideup.vx v8, v9, a1
; ZVFHMIN-NEXT: vmv.v.v v0, v8
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: mv a1, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll
index ae868fe..ff923ef 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll
@@ -4280,8 +4280,7 @@ define <vscale x 16 x i1> @fcmp_oeq_vf_nx16f64(<vscale x 16 x double> %va) {
; RV32-NEXT: vmfeq.vf v24, v16, fa5
; RV32-NEXT: vmfeq.vf v0, v8, fa5
; RV32-NEXT: srli a0, a0, 3
-; RV32-NEXT: add a1, a0, a0
-; RV32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; RV32-NEXT: vslideup.vx v0, v24, a0
; RV32-NEXT: ret
;
@@ -4293,8 +4292,7 @@ define <vscale x 16 x i1> @fcmp_oeq_vf_nx16f64(<vscale x 16 x double> %va) {
; RV64-NEXT: vmfeq.vf v24, v16, fa5
; RV64-NEXT: vmfeq.vf v0, v8, fa5
; RV64-NEXT: srli a0, a0, 3
-; RV64-NEXT: add a1, a0, a0
-; RV64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; RV64-NEXT: vslideup.vx v0, v24, a0
; RV64-NEXT: ret
;
@@ -4306,8 +4304,7 @@ define <vscale x 16 x i1> @fcmp_oeq_vf_nx16f64(<vscale x 16 x double> %va) {
; ZVFHMIN32-NEXT: vmfeq.vf v24, v16, fa5
; ZVFHMIN32-NEXT: vmfeq.vf v0, v8, fa5
; ZVFHMIN32-NEXT: srli a0, a0, 3
-; ZVFHMIN32-NEXT: add a1, a0, a0
-; ZVFHMIN32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; ZVFHMIN32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; ZVFHMIN32-NEXT: vslideup.vx v0, v24, a0
; ZVFHMIN32-NEXT: ret
;
@@ -4319,8 +4316,7 @@ define <vscale x 16 x i1> @fcmp_oeq_vf_nx16f64(<vscale x 16 x double> %va) {
; ZVFHMIN64-NEXT: vmfeq.vf v24, v16, fa5
; ZVFHMIN64-NEXT: vmfeq.vf v0, v8, fa5
; ZVFHMIN64-NEXT: srli a0, a0, 3
-; ZVFHMIN64-NEXT: add a1, a0, a0
-; ZVFHMIN64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; ZVFHMIN64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; ZVFHMIN64-NEXT: vslideup.vx v0, v24, a0
; ZVFHMIN64-NEXT: ret
%vc = fcmp oeq <vscale x 16 x double> %va, zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll
index ef560a7..13c63d9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll
@@ -2246,8 +2246,7 @@ define <vscale x 32 x i1> @icmp_eq_vv_nxv32i32(<vscale x 32 x i32> %va, <vscale
; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; CHECK-NEXT: vmseq.vv v16, v24, v8, v0.t
-; CHECK-NEXT: add a0, a1, a1
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vx v16, v6, a1
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: csrr a0, vlenb
@@ -2283,8 +2282,7 @@ define <vscale x 32 x i1> @icmp_eq_vx_nxv32i32(<vscale x 32 x i32> %va, i32 %b,
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t
-; CHECK-NEXT: add a0, a2, a2
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vx v16, v25, a2
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: ret
@@ -2316,8 +2314,7 @@ define <vscale x 32 x i1> @icmp_eq_vx_swap_nxv32i32(<vscale x 32 x i32> %va, i32
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t
-; CHECK-NEXT: add a0, a2, a2
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vx v16, v25, a2
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll
index bd3c29b..a85b471 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll
@@ -3001,9 +3001,8 @@ define <vscale x 16 x i1> @icmp_eq_vi_nx16i64(<vscale x 16 x i64> %va) {
; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; CHECK-NEXT: vmseq.vi v24, v16, 0
; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: add a1, a0, a0
; CHECK-NEXT: vmseq.vi v0, v8, 0
-; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; CHECK-NEXT: vslideup.vx v0, v24, a0
; CHECK-NEXT: ret
%vc = icmp eq <vscale x 16 x i64> %va, zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll b/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll
index c9f9a79..790cd56 100644
--- a/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll
@@ -48,10 +48,10 @@ define internal void @SubRegLivenessUndefInPhi(i64 %cond) {
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
; CHECK-NEXT: vid.v v9
; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a1, a0, 2
; CHECK-NEXT: srli a0, a0, 3
; CHECK-NEXT: vadd.vi v10, v9, 1
; CHECK-NEXT: vadd.vi v11, v9, 3
-; CHECK-NEXT: add a1, a0, a0
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v9, a0
; CHECK-NEXT: vslideup.vx v12, v10, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
index ca7f256..f7ed130 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
@@ -191,8 +191,7 @@ define {<2 x i32>, <2 x i32>, <2 x i32>} @vector_deinterleave3_v2i32_v6i32(<6 x
; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma
; CHECK-NEXT: vslidedown.vi v10, v8, 4
; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v12, a0
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vmv1r.v v9, v10
@@ -222,8 +221,7 @@ define {<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>} @vector_deinterleave4_v2i32_
; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; CHECK-NEXT: vslidedown.vi v9, v8, 2
; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v12, v10, a0
; CHECK-NEXT: vslideup.vx v8, v9, a0
; CHECK-NEXT: addi a0, sp, 16
@@ -254,15 +252,13 @@ define {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} @vector_deinterle
; CHECK-NEXT: vslidedown.vi v14, v8, 2
; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma
; CHECK-NEXT: vslidedown.vi v10, v8, 8
-; CHECK-NEXT: srli a1, a0, 3
-; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: add a2, a1, a1
-; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v13, v12, a1
-; CHECK-NEXT: vslideup.vx v8, v14, a1
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v13, a0
+; CHECK-NEXT: srli a1, a0, 2
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v13, v12, a0
+; CHECK-NEXT: vslideup.vx v8, v14, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v13, a1
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vmv1r.v v9, v10
; CHECK-NEXT: vs2r.v v8, (a0)
@@ -292,16 +288,14 @@ define {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} @vecto
; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma
; CHECK-NEXT: vslidedown.vi v10, v8, 10
; CHECK-NEXT: vslidedown.vi v12, v8, 8
-; CHECK-NEXT: srli a1, a0, 3
-; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: add a2, a1, a1
-; CHECK-NEXT: add a3, a0, a0
-; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v15, v14, a1
-; CHECK-NEXT: vslideup.vx v8, v16, a1
-; CHECK-NEXT: vslideup.vx v12, v10, a1
-; CHECK-NEXT: vsetvli zero, a3, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v15, a0
+; CHECK-NEXT: srli a1, a0, 2
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v15, v14, a0
+; CHECK-NEXT: vslideup.vx v8, v16, a0
+; CHECK-NEXT: vslideup.vx v12, v10, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v15, a1
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vmv1r.v v9, v12
; CHECK-NEXT: vs2r.v v8, (a0)
@@ -330,24 +324,22 @@ define {<2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>} @v
; CHECK-NEXT: vslidedown.vi v12, v8, 2
; CHECK-NEXT: vslidedown.vi v13, v8, 4
; CHECK-NEXT: vslidedown.vi v14, v8, 6
-; CHECK-NEXT: srli a1, a0, 3
-; CHECK-NEXT: srli a2, a0, 2
+; CHECK-NEXT: srli a1, a0, 2
+; CHECK-NEXT: srli a2, a0, 3
; CHECK-NEXT: srli a0, a0, 1
-; CHECK-NEXT: add a3, a1, a1
-; CHECK-NEXT: add a4, a2, a1
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
+; CHECK-NEXT: vslideup.vx v10, v9, a2
+; CHECK-NEXT: add a3, a1, a2
+; CHECK-NEXT: vslideup.vx v8, v12, a2
; CHECK-NEXT: vsetvli zero, a3, e8, mf2, tu, ma
-; CHECK-NEXT: vslideup.vx v10, v9, a1
-; CHECK-NEXT: vslideup.vx v8, v12, a1
-; CHECK-NEXT: slli a3, a1, 1
-; CHECK-NEXT: vsetvli zero, a4, e8, mf2, tu, ma
-; CHECK-NEXT: vslideup.vx v10, v11, a2
-; CHECK-NEXT: vslideup.vx v8, v13, a2
-; CHECK-NEXT: add a2, a0, a0
-; CHECK-NEXT: add a3, a3, a1
-; CHECK-NEXT: add a1, a3, a1
-; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v14, a3
-; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v10, v11, a1
+; CHECK-NEXT: vslideup.vx v8, v13, a1
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: add a2, a1, a2
+; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v14, a1
+; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v10, a0
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs1r.v v8, (a0)
@@ -376,25 +368,23 @@ define {<2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2
; CHECK-NEXT: vslidedown.vi v13, v8, 2
; CHECK-NEXT: vslidedown.vi v14, v8, 4
; CHECK-NEXT: vslidedown.vi v15, v8, 6
-; CHECK-NEXT: srli a1, a0, 3
-; CHECK-NEXT: srli a2, a0, 2
+; CHECK-NEXT: srli a1, a0, 2
+; CHECK-NEXT: srli a2, a0, 3
; CHECK-NEXT: srli a0, a0, 1
-; CHECK-NEXT: add a3, a1, a1
-; CHECK-NEXT: add a4, a2, a1
-; CHECK-NEXT: slli a5, a1, 1
-; CHECK-NEXT: add a6, a0, a0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
+; CHECK-NEXT: vslideup.vx v10, v9, a2
+; CHECK-NEXT: add a3, a1, a2
+; CHECK-NEXT: slli a4, a2, 1
+; CHECK-NEXT: vslideup.vx v8, v13, a2
; CHECK-NEXT: vsetvli zero, a3, e8, mf2, tu, ma
-; CHECK-NEXT: vslideup.vx v10, v9, a1
-; CHECK-NEXT: add a5, a5, a1
-; CHECK-NEXT: vslideup.vx v8, v13, a1
-; CHECK-NEXT: vsetvli zero, a4, e8, mf2, tu, ma
-; CHECK-NEXT: vslideup.vx v10, v11, a2
-; CHECK-NEXT: add a1, a5, a1
-; CHECK-NEXT: vslideup.vx v8, v14, a2
-; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v10, v12, a5
-; CHECK-NEXT: vslideup.vx v8, v15, a5
-; CHECK-NEXT: vsetvli zero, a6, e8, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v10, v11, a1
+; CHECK-NEXT: add a4, a4, a2
+; CHECK-NEXT: vslideup.vx v8, v14, a1
+; CHECK-NEXT: add a2, a4, a2
+; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v10, v12, a4
+; CHECK-NEXT: vslideup.vx v8, v15, a4
+; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v10, a0
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs1r.v v8, (a0)
@@ -555,8 +545,7 @@ define {<2 x float>, <2 x float>, <2 x float>} @vector_deinterleave3_v6f32_v2f32
; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma
; CHECK-NEXT: vslidedown.vi v10, v8, 4
; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v12, a0
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vmv1r.v v9, v10
@@ -590,8 +579,7 @@ define {<2 x float>, <2 x float>, <2 x float>, <2 x float>} @vector_deinterleave
; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; CHECK-NEXT: vslidedown.vi v9, v8, 2
; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v12, v10, a0
; CHECK-NEXT: vslideup.vx v8, v9, a0
; CHECK-NEXT: addi a0, sp, 16
@@ -626,15 +614,13 @@ define {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @vector_dein
; CHECK-NEXT: vslidedown.vi v14, v8, 2
; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma
; CHECK-NEXT: vslidedown.vi v10, v8, 8
-; CHECK-NEXT: srli a1, a0, 3
-; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: add a2, a1, a1
-; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v13, v12, a1
-; CHECK-NEXT: vslideup.vx v8, v14, a1
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v13, a0
+; CHECK-NEXT: srli a1, a0, 2
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v13, v12, a0
+; CHECK-NEXT: vslideup.vx v8, v14, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v13, a1
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vmv1r.v v9, v10
; CHECK-NEXT: vs2r.v v8, (a0)
@@ -668,16 +654,14 @@ define {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>}
; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma
; CHECK-NEXT: vslidedown.vi v10, v8, 10
; CHECK-NEXT: vslidedown.vi v12, v8, 8
-; CHECK-NEXT: srli a1, a0, 3
-; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: add a2, a1, a1
-; CHECK-NEXT: add a3, a0, a0
-; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v15, v14, a1
-; CHECK-NEXT: vslideup.vx v8, v16, a1
-; CHECK-NEXT: vslideup.vx v12, v10, a1
-; CHECK-NEXT: vsetvli zero, a3, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v15, a0
+; CHECK-NEXT: srli a1, a0, 2
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v15, v14, a0
+; CHECK-NEXT: vslideup.vx v8, v16, a0
+; CHECK-NEXT: vslideup.vx v12, v10, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v15, a1
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vmv1r.v v9, v12
; CHECK-NEXT: vs2r.v v8, (a0)
@@ -711,21 +695,18 @@ define {<1 x half>, <1 x half>, <1 x half>, <1 x half>, <1 x half>, <1 x half>,
; CHECK-NEXT: vmv1r.v v10, v8
; CHECK-NEXT: vslidedown.vi v13, v8, 5
; CHECK-NEXT: vslidedown.vi v14, v8, 6
-; CHECK-NEXT: srli a1, a0, 3
-; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: add a2, a1, a1
-; CHECK-NEXT: add a3, a0, a0
-; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v11, v9, a1
-; CHECK-NEXT: vslideup.vx v10, v12, a1
-; CHECK-NEXT: vsetvli zero, a3, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v10, v11, a0
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: srli a1, a0, 2
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v11, v9, a0
+; CHECK-NEXT: vslideup.vx v10, v12, a0
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v10, v11, a1
; CHECK-NEXT: vslidedown.vi v11, v8, 4
-; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v11, v13, a1
-; CHECK-NEXT: vsetvli zero, a3, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v11, v14, a0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v11, v13, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v11, v14, a1
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs2r.v v10, (a0)
; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
@@ -755,25 +736,22 @@ define {<1 x half>, <1 x half>, <1 x half>, <1 x half>, <1 x half>, <1 x half>,
; CHECK-NEXT: vslidedown.vi v10, v8, 7
; CHECK-NEXT: vslidedown.vi v11, v8, 6
; CHECK-NEXT: vslidedown.vi v12, v8, 5
-; CHECK-NEXT: srli a1, a0, 3
+; CHECK-NEXT: srli a1, a0, 2
+; CHECK-NEXT: srli a0, a0, 3
; CHECK-NEXT: vslidedown.vi v9, v8, 4
-; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: add a2, a1, a1
-; CHECK-NEXT: add a3, a0, a0
-; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v11, v10, a1
-; CHECK-NEXT: vslideup.vx v9, v12, a1
-; CHECK-NEXT: vsetvli zero, a3, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v9, v11, a0
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v11, v10, a0
+; CHECK-NEXT: vslideup.vx v9, v12, a0
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v9, v11, a1
; CHECK-NEXT: vslidedown.vi v10, v8, 3
; CHECK-NEXT: vslidedown.vi v11, v8, 2
; CHECK-NEXT: vslidedown.vi v12, v8, 1
-; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v11, v10, a1
-; CHECK-NEXT: vslideup.vx v8, v12, a1
-; CHECK-NEXT: vsetvli zero, a3, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v11, a0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v11, v10, a0
+; CHECK-NEXT: vslideup.vx v8, v12, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v11, a1
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs2r.v v8, (a0)
; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
index 6a08f5a..45ffd84 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
@@ -2712,16 +2712,10 @@ define {<vscale x 2 x half>, <vscale x 2 x half>, <vscale x 2 x half>, <vscale x
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v11, v9, a0
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v11, a0
-; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v11, v8, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v11, a0
-; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v11, v10, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v10, v11, a0
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs4r.v v8, (a0)
@@ -2808,16 +2802,10 @@ define {<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vs
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v11, v9, a0
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v11, a0
-; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v11, v8, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v11, a0
-; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v11, v10, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v10, v11, a0
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs4r.v v8, (a0)
@@ -2904,16 +2892,10 @@ define {<vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscal
; CHECK-NEXT: srli a0, a0, 3
; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v11, v9, a0
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v11, a0
-; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v11, v8, a0
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v11, a0
-; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v11, v10, a0
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v10, v11, a0
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs4r.v v8, (a0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
index 3751967..a5811e6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
@@ -14,18 +14,17 @@ define void @vector_interleave_store_nxv32i1_nxv16i1(<vscale x 16 x i1> %a, <vsc
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: li a1, -1
-; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: vmerge.vim v12, v10, 1, v0
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vmerge.vim v14, v10, 1, v0
-; CHECK-NEXT: srli a2, a2, 2
; CHECK-NEXT: vwaddu.vv v8, v14, v12
; CHECK-NEXT: vwmaccu.vx v8, a1, v12
+; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: vmsne.vi v12, v10, 0
; CHECK-NEXT: vmsne.vi v10, v8, 0
-; CHECK-NEXT: add a1, a2, a2
-; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v10, v12, a2
+; CHECK-NEXT: srli a1, a1, 2
+; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v10, v12, a1
; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma
; CHECK-NEXT: vsm.v v10, (a0)
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
index e297e88..01cc5c5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
@@ -17,18 +17,17 @@ define <vscale x 32 x i1> @vector_interleave_nxv32i1_nxv16i1(<vscale x 16 x i1>
; V-NEXT: vmv1r.v v0, v8
; V-NEXT: vmv.v.i v10, 0
; V-NEXT: li a0, -1
-; V-NEXT: csrr a1, vlenb
; V-NEXT: vmerge.vim v12, v10, 1, v0
; V-NEXT: vmv1r.v v0, v9
; V-NEXT: vmerge.vim v14, v10, 1, v0
-; V-NEXT: srli a1, a1, 2
; V-NEXT: vwaddu.vv v8, v14, v12
; V-NEXT: vwmaccu.vx v8, a0, v12
+; V-NEXT: csrr a0, vlenb
; V-NEXT: vmsne.vi v12, v10, 0
; V-NEXT: vmsne.vi v0, v8, 0
-; V-NEXT: add a0, a1, a1
-; V-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; V-NEXT: vslideup.vx v0, v12, a1
+; V-NEXT: srli a0, a0, 2
+; V-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; V-NEXT: vslideup.vx v0, v12, a0
; V-NEXT: ret
;
; ZVBB-LABEL: vector_interleave_nxv32i1_nxv16i1:
@@ -38,17 +37,16 @@ define <vscale x 32 x i1> @vector_interleave_nxv32i1_nxv16i1(<vscale x 16 x i1>
; ZVBB-NEXT: vmv1r.v v0, v8
; ZVBB-NEXT: vmv.v.i v10, 0
; ZVBB-NEXT: li a0, 1
-; ZVBB-NEXT: csrr a1, vlenb
; ZVBB-NEXT: vmerge.vim v10, v10, 1, v0
-; ZVBB-NEXT: srli a1, a1, 2
; ZVBB-NEXT: vwsll.vi v12, v10, 8
; ZVBB-NEXT: vmv1r.v v0, v9
; ZVBB-NEXT: vwaddu.wx v12, v12, a0, v0.t
+; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: vmsne.vi v8, v14, 0
; ZVBB-NEXT: vmsne.vi v0, v12, 0
-; ZVBB-NEXT: add a0, a1, a1
-; ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; ZVBB-NEXT: vslideup.vx v0, v8, a1
+; ZVBB-NEXT: srli a0, a0, 2
+; ZVBB-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; ZVBB-NEXT: vslideup.vx v0, v8, a0
; ZVBB-NEXT: ret
;
; ZIP-LABEL: vector_interleave_nxv32i1_nxv16i1:
@@ -61,13 +59,12 @@ define <vscale x 32 x i1> @vector_interleave_nxv32i1_nxv16i1(<vscale x 16 x i1>
; ZIP-NEXT: vmerge.vim v12, v10, 1, v0
; ZIP-NEXT: vmv1r.v v0, v9
; ZIP-NEXT: vmerge.vim v8, v10, 1, v0
-; ZIP-NEXT: srli a0, a0, 2
; ZIP-NEXT: ri.vzip2b.vv v10, v8, v12
; ZIP-NEXT: ri.vzip2a.vv v14, v8, v12
; ZIP-NEXT: vmsne.vi v8, v10, 0
; ZIP-NEXT: vmsne.vi v0, v14, 0
-; ZIP-NEXT: add a1, a0, a0
-; ZIP-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; ZIP-NEXT: srli a0, a0, 2
+; ZIP-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; ZIP-NEXT: vslideup.vx v0, v8, a0
; ZIP-NEXT: ret
%res = call <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
@@ -508,19 +505,17 @@ define <vscale x 48 x i1> @vector_interleave_nxv48i1_nxv16i1(<vscale x 16 x i1>
; CHECK-NEXT: add a2, a3, a2
; CHECK-NEXT: vsseg3e8.v v14, (a0)
; CHECK-NEXT: vl2r.v v8, (a2)
-; CHECK-NEXT: srli a2, a1, 2
-; CHECK-NEXT: srli a1, a1, 1
+; CHECK-NEXT: srli a2, a1, 1
; CHECK-NEXT: vl2r.v v10, (a3)
; CHECK-NEXT: vl2r.v v12, (a0)
-; CHECK-NEXT: add a0, a2, a2
+; CHECK-NEXT: srli a1, a1, 2
; CHECK-NEXT: vmsne.vi v14, v8, 0
; CHECK-NEXT: vmsne.vi v8, v10, 0
; CHECK-NEXT: vmsne.vi v0, v12, 0
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v0, v8, a2
-; CHECK-NEXT: add a0, a1, a1
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v0, v14, a1
+; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v0, v8, a1
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v0, v14, a2
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 6
; CHECK-NEXT: mul a0, a0, a1
@@ -551,19 +546,17 @@ define <vscale x 48 x i1> @vector_interleave_nxv48i1_nxv16i1(<vscale x 16 x i1>
; ZVBB-NEXT: add a2, a3, a2
; ZVBB-NEXT: vsseg3e8.v v14, (a0)
; ZVBB-NEXT: vl2r.v v8, (a2)
-; ZVBB-NEXT: srli a2, a1, 2
-; ZVBB-NEXT: srli a1, a1, 1
+; ZVBB-NEXT: srli a2, a1, 1
; ZVBB-NEXT: vl2r.v v10, (a3)
; ZVBB-NEXT: vl2r.v v12, (a0)
-; ZVBB-NEXT: add a0, a2, a2
+; ZVBB-NEXT: srli a1, a1, 2
; ZVBB-NEXT: vmsne.vi v14, v8, 0
; ZVBB-NEXT: vmsne.vi v8, v10, 0
; ZVBB-NEXT: vmsne.vi v0, v12, 0
-; ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; ZVBB-NEXT: vslideup.vx v0, v8, a2
-; ZVBB-NEXT: add a0, a1, a1
-; ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; ZVBB-NEXT: vslideup.vx v0, v14, a1
+; ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; ZVBB-NEXT: vslideup.vx v0, v8, a1
+; ZVBB-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; ZVBB-NEXT: vslideup.vx v0, v14, a2
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: li a1, 6
; ZVBB-NEXT: mul a0, a0, a1
@@ -812,22 +805,20 @@ define <vscale x 64 x i1> @vector_interleave_nxv64i1_nxv16i1(<vscale x 16 x i1>
; CHECK-NEXT: add a2, a4, a2
; CHECK-NEXT: vsseg4e8.v v14, (a0)
; CHECK-NEXT: vl2r.v v8, (a2)
-; CHECK-NEXT: srli a2, a1, 2
-; CHECK-NEXT: srli a1, a1, 1
+; CHECK-NEXT: srli a2, a1, 1
+; CHECK-NEXT: srli a1, a1, 2
; CHECK-NEXT: vl2r.v v10, (a4)
-; CHECK-NEXT: add a4, a2, a2
; CHECK-NEXT: vl2r.v v12, (a3)
; CHECK-NEXT: vl2r.v v14, (a0)
; CHECK-NEXT: vmsne.vi v16, v8, 0
; CHECK-NEXT: vmsne.vi v8, v10, 0
; CHECK-NEXT: vmsne.vi v9, v12, 0
; CHECK-NEXT: vmsne.vi v0, v14, 0
-; CHECK-NEXT: vsetvli zero, a4, e8, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v16, a2
-; CHECK-NEXT: vslideup.vx v0, v9, a2
-; CHECK-NEXT: add a0, a1, a1
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v0, v8, a1
+; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v16, a1
+; CHECK-NEXT: vslideup.vx v0, v9, a1
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v0, v8, a2
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
@@ -859,22 +850,20 @@ define <vscale x 64 x i1> @vector_interleave_nxv64i1_nxv16i1(<vscale x 16 x i1>
; ZVBB-NEXT: add a2, a4, a2
; ZVBB-NEXT: vsseg4e8.v v14, (a0)
; ZVBB-NEXT: vl2r.v v8, (a2)
-; ZVBB-NEXT: srli a2, a1, 2
-; ZVBB-NEXT: srli a1, a1, 1
+; ZVBB-NEXT: srli a2, a1, 1
+; ZVBB-NEXT: srli a1, a1, 2
; ZVBB-NEXT: vl2r.v v10, (a4)
-; ZVBB-NEXT: add a4, a2, a2
; ZVBB-NEXT: vl2r.v v12, (a3)
; ZVBB-NEXT: vl2r.v v14, (a0)
; ZVBB-NEXT: vmsne.vi v16, v8, 0
; ZVBB-NEXT: vmsne.vi v8, v10, 0
; ZVBB-NEXT: vmsne.vi v9, v12, 0
; ZVBB-NEXT: vmsne.vi v0, v14, 0
-; ZVBB-NEXT: vsetvli zero, a4, e8, mf2, ta, ma
-; ZVBB-NEXT: vslideup.vx v8, v16, a2
-; ZVBB-NEXT: vslideup.vx v0, v9, a2
-; ZVBB-NEXT: add a0, a1, a1
-; ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; ZVBB-NEXT: vslideup.vx v0, v8, a1
+; ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; ZVBB-NEXT: vslideup.vx v8, v16, a1
+; ZVBB-NEXT: vslideup.vx v0, v9, a1
+; ZVBB-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; ZVBB-NEXT: vslideup.vx v0, v8, a2
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: slli a0, a0, 3
; ZVBB-NEXT: add sp, sp, a0
@@ -1114,7 +1103,7 @@ define <vscale x 80 x i1> @vector_interleave_nxv80i1_nxv16i1(<vscale x 16 x i1>
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v18, v12, 1, v0
; CHECK-NEXT: add a2, a4, a1
-; CHECK-NEXT: srli a3, a1, 2
+; CHECK-NEXT: srli a3, a1, 1
; CHECK-NEXT: vmv2r.v v20, v14
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vmerge.vim v16, v12, 1, v0
@@ -1144,11 +1133,9 @@ define <vscale x 80 x i1> @vector_interleave_nxv80i1_nxv16i1(<vscale x 16 x i1>
; CHECK-NEXT: add a5, a4, a1
; CHECK-NEXT: vl1r.v v16, (a5)
; CHECK-NEXT: add a5, a5, a1
-; CHECK-NEXT: srli a1, a1, 1
+; CHECK-NEXT: srli a1, a1, 2
; CHECK-NEXT: vl1r.v v11, (a2)
-; CHECK-NEXT: add a2, a3, a3
; CHECK-NEXT: vl1r.v v15, (a4)
-; CHECK-NEXT: add a4, a1, a1
; CHECK-NEXT: vl1r.v v13, (a0)
; CHECK-NEXT: vl1r.v v17, (a5)
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
@@ -1156,11 +1143,11 @@ define <vscale x 80 x i1> @vector_interleave_nxv80i1_nxv16i1(<vscale x 16 x i1>
; CHECK-NEXT: vmsne.vi v0, v10, 0
; CHECK-NEXT: vmsne.vi v8, v14, 0
; CHECK-NEXT: vmsne.vi v9, v12, 0
-; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v0, v18, a3
-; CHECK-NEXT: vslideup.vx v9, v8, a3
-; CHECK-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v0, v9, a1
+; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v0, v18, a1
+; CHECK-NEXT: vslideup.vx v9, v8, a1
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v0, v9, a3
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
; CHECK-NEXT: vmsne.vi v8, v16, 0
; CHECK-NEXT: csrr a0, vlenb
@@ -1190,7 +1177,7 @@ define <vscale x 80 x i1> @vector_interleave_nxv80i1_nxv16i1(<vscale x 16 x i1>
; ZVBB-NEXT: vmv1r.v v0, v8
; ZVBB-NEXT: vmerge.vim v18, v12, 1, v0
; ZVBB-NEXT: add a2, a4, a1
-; ZVBB-NEXT: srli a3, a1, 2
+; ZVBB-NEXT: srli a3, a1, 1
; ZVBB-NEXT: vmv2r.v v20, v14
; ZVBB-NEXT: vmv1r.v v0, v9
; ZVBB-NEXT: vmerge.vim v16, v12, 1, v0
@@ -1220,11 +1207,9 @@ define <vscale x 80 x i1> @vector_interleave_nxv80i1_nxv16i1(<vscale x 16 x i1>
; ZVBB-NEXT: add a5, a4, a1
; ZVBB-NEXT: vl1r.v v16, (a5)
; ZVBB-NEXT: add a5, a5, a1
-; ZVBB-NEXT: srli a1, a1, 1
+; ZVBB-NEXT: srli a1, a1, 2
; ZVBB-NEXT: vl1r.v v11, (a2)
-; ZVBB-NEXT: add a2, a3, a3
; ZVBB-NEXT: vl1r.v v15, (a4)
-; ZVBB-NEXT: add a4, a1, a1
; ZVBB-NEXT: vl1r.v v13, (a0)
; ZVBB-NEXT: vl1r.v v17, (a5)
; ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, ma
@@ -1232,11 +1217,11 @@ define <vscale x 80 x i1> @vector_interleave_nxv80i1_nxv16i1(<vscale x 16 x i1>
; ZVBB-NEXT: vmsne.vi v0, v10, 0
; ZVBB-NEXT: vmsne.vi v8, v14, 0
; ZVBB-NEXT: vmsne.vi v9, v12, 0
-; ZVBB-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
-; ZVBB-NEXT: vslideup.vx v0, v18, a3
-; ZVBB-NEXT: vslideup.vx v9, v8, a3
-; ZVBB-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; ZVBB-NEXT: vslideup.vx v0, v9, a1
+; ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; ZVBB-NEXT: vslideup.vx v0, v18, a1
+; ZVBB-NEXT: vslideup.vx v9, v8, a1
+; ZVBB-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; ZVBB-NEXT: vslideup.vx v0, v9, a3
; ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, ma
; ZVBB-NEXT: vmsne.vi v8, v16, 0
; ZVBB-NEXT: csrr a0, vlenb
@@ -2340,47 +2325,45 @@ define <vscale x 96 x i1> @vector_interleave_nxv96i1_nxv16i1(<vscale x 16 x i1>
; CHECK-NEXT: vmv1r.v v17, v9
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vmerge.vim v24, v20, 1, v0
-; CHECK-NEXT: addi a5, sp, 16
+; CHECK-NEXT: addi a4, sp, 16
; CHECK-NEXT: vmv1r.v v18, v25
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: vmerge.vim v26, v20, 1, v0
-; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: vmv1r.v v19, v27
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vmerge.vim v10, v20, 1, v0
-; CHECK-NEXT: add a3, a0, a2
+; CHECK-NEXT: add a2, a0, a1
; CHECK-NEXT: vmv1r.v v20, v11
-; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
; CHECK-NEXT: vsseg6e8.v v15, (a0)
; CHECK-NEXT: vmv1r.v v15, v22
-; CHECK-NEXT: add a4, a5, a2
+; CHECK-NEXT: add a5, a4, a1
; CHECK-NEXT: vmv1r.v v16, v8
-; CHECK-NEXT: srli a1, a2, 2
+; CHECK-NEXT: srli a3, a1, 1
; CHECK-NEXT: vmv1r.v v17, v24
-; CHECK-NEXT: add a6, a4, a2
+; CHECK-NEXT: add a6, a5, a1
; CHECK-NEXT: vmv1r.v v18, v26
-; CHECK-NEXT: add a7, a3, a2
+; CHECK-NEXT: add a7, a2, a1
; CHECK-NEXT: vmv1r.v v19, v10
-; CHECK-NEXT: vsseg6e8.v v14, (a5)
+; CHECK-NEXT: vsseg6e8.v v14, (a4)
; CHECK-NEXT: vl1r.v v8, (a0)
-; CHECK-NEXT: add a0, a6, a2
+; CHECK-NEXT: add a0, a6, a1
; CHECK-NEXT: vl1r.v v10, (a6)
-; CHECK-NEXT: add a6, a7, a2
-; CHECK-NEXT: vl1r.v v12, (a5)
-; CHECK-NEXT: add a5, a0, a2
+; CHECK-NEXT: add a6, a7, a1
+; CHECK-NEXT: vl1r.v v12, (a4)
+; CHECK-NEXT: add a4, a0, a1
; CHECK-NEXT: vl1r.v v14, (a7)
-; CHECK-NEXT: add a7, a6, a2
-; CHECK-NEXT: vl1r.v v16, (a5)
-; CHECK-NEXT: add a5, a5, a2
+; CHECK-NEXT: add a7, a6, a1
+; CHECK-NEXT: vl1r.v v16, (a4)
+; CHECK-NEXT: add a4, a4, a1
; CHECK-NEXT: vl1r.v v18, (a7)
-; CHECK-NEXT: add a7, a7, a2
-; CHECK-NEXT: srli a2, a2, 1
-; CHECK-NEXT: vl1r.v v9, (a3)
-; CHECK-NEXT: add a3, a1, a1
-; CHECK-NEXT: vl1r.v v17, (a5)
-; CHECK-NEXT: add a5, a2, a2
+; CHECK-NEXT: add a7, a7, a1
+; CHECK-NEXT: srli a1, a1, 2
+; CHECK-NEXT: vl1r.v v9, (a2)
+; CHECK-NEXT: vl1r.v v17, (a4)
; CHECK-NEXT: vl1r.v v11, (a0)
-; CHECK-NEXT: vl1r.v v13, (a4)
+; CHECK-NEXT: vl1r.v v13, (a5)
; CHECK-NEXT: vl1r.v v19, (a7)
; CHECK-NEXT: vl1r.v v15, (a6)
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
@@ -2390,12 +2373,12 @@ define <vscale x 96 x i1> @vector_interleave_nxv96i1_nxv16i1(<vscale x 16 x i1>
; CHECK-NEXT: vmsne.vi v0, v12, 0
; CHECK-NEXT: vmsne.vi v10, v18, 0
; CHECK-NEXT: vmsne.vi v8, v14, 0
-; CHECK-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vx v9, v20, a1
; CHECK-NEXT: vslideup.vx v0, v16, a1
-; CHECK-NEXT: vsetvli zero, a5, e8, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v0, v9, a2
-; CHECK-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v0, v9, a3
+; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vx v8, v10, a1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 12
@@ -2427,47 +2410,45 @@ define <vscale x 96 x i1> @vector_interleave_nxv96i1_nxv16i1(<vscale x 16 x i1>
; ZVBB-NEXT: vmv1r.v v17, v9
; ZVBB-NEXT: vmv1r.v v0, v10
; ZVBB-NEXT: vmerge.vim v24, v20, 1, v0
-; ZVBB-NEXT: addi a5, sp, 16
+; ZVBB-NEXT: addi a4, sp, 16
; ZVBB-NEXT: vmv1r.v v18, v25
; ZVBB-NEXT: vmv1r.v v0, v11
; ZVBB-NEXT: vmerge.vim v26, v20, 1, v0
-; ZVBB-NEXT: csrr a2, vlenb
+; ZVBB-NEXT: csrr a1, vlenb
; ZVBB-NEXT: vmv1r.v v19, v27
; ZVBB-NEXT: vmv1r.v v0, v12
; ZVBB-NEXT: vmerge.vim v10, v20, 1, v0
-; ZVBB-NEXT: add a3, a0, a2
+; ZVBB-NEXT: add a2, a0, a1
; ZVBB-NEXT: vmv1r.v v20, v11
-; ZVBB-NEXT: vsetvli a1, zero, e8, m1, ta, ma
+; ZVBB-NEXT: vsetvli a3, zero, e8, m1, ta, ma
; ZVBB-NEXT: vsseg6e8.v v15, (a0)
; ZVBB-NEXT: vmv1r.v v15, v22
-; ZVBB-NEXT: add a4, a5, a2
+; ZVBB-NEXT: add a5, a4, a1
; ZVBB-NEXT: vmv1r.v v16, v8
-; ZVBB-NEXT: srli a1, a2, 2
+; ZVBB-NEXT: srli a3, a1, 1
; ZVBB-NEXT: vmv1r.v v17, v24
-; ZVBB-NEXT: add a6, a4, a2
+; ZVBB-NEXT: add a6, a5, a1
; ZVBB-NEXT: vmv1r.v v18, v26
-; ZVBB-NEXT: add a7, a3, a2
+; ZVBB-NEXT: add a7, a2, a1
; ZVBB-NEXT: vmv1r.v v19, v10
-; ZVBB-NEXT: vsseg6e8.v v14, (a5)
+; ZVBB-NEXT: vsseg6e8.v v14, (a4)
; ZVBB-NEXT: vl1r.v v8, (a0)
-; ZVBB-NEXT: add a0, a6, a2
+; ZVBB-NEXT: add a0, a6, a1
; ZVBB-NEXT: vl1r.v v10, (a6)
-; ZVBB-NEXT: add a6, a7, a2
-; ZVBB-NEXT: vl1r.v v12, (a5)
-; ZVBB-NEXT: add a5, a0, a2
+; ZVBB-NEXT: add a6, a7, a1
+; ZVBB-NEXT: vl1r.v v12, (a4)
+; ZVBB-NEXT: add a4, a0, a1
; ZVBB-NEXT: vl1r.v v14, (a7)
-; ZVBB-NEXT: add a7, a6, a2
-; ZVBB-NEXT: vl1r.v v16, (a5)
-; ZVBB-NEXT: add a5, a5, a2
+; ZVBB-NEXT: add a7, a6, a1
+; ZVBB-NEXT: vl1r.v v16, (a4)
+; ZVBB-NEXT: add a4, a4, a1
; ZVBB-NEXT: vl1r.v v18, (a7)
-; ZVBB-NEXT: add a7, a7, a2
-; ZVBB-NEXT: srli a2, a2, 1
-; ZVBB-NEXT: vl1r.v v9, (a3)
-; ZVBB-NEXT: add a3, a1, a1
-; ZVBB-NEXT: vl1r.v v17, (a5)
-; ZVBB-NEXT: add a5, a2, a2
+; ZVBB-NEXT: add a7, a7, a1
+; ZVBB-NEXT: srli a1, a1, 2
+; ZVBB-NEXT: vl1r.v v9, (a2)
+; ZVBB-NEXT: vl1r.v v17, (a4)
; ZVBB-NEXT: vl1r.v v11, (a0)
-; ZVBB-NEXT: vl1r.v v13, (a4)
+; ZVBB-NEXT: vl1r.v v13, (a5)
; ZVBB-NEXT: vl1r.v v19, (a7)
; ZVBB-NEXT: vl1r.v v15, (a6)
; ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, ma
@@ -2477,12 +2458,12 @@ define <vscale x 96 x i1> @vector_interleave_nxv96i1_nxv16i1(<vscale x 16 x i1>
; ZVBB-NEXT: vmsne.vi v0, v12, 0
; ZVBB-NEXT: vmsne.vi v10, v18, 0
; ZVBB-NEXT: vmsne.vi v8, v14, 0
-; ZVBB-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
; ZVBB-NEXT: vslideup.vx v9, v20, a1
; ZVBB-NEXT: vslideup.vx v0, v16, a1
-; ZVBB-NEXT: vsetvli zero, a5, e8, m1, ta, ma
-; ZVBB-NEXT: vslideup.vx v0, v9, a2
-; ZVBB-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; ZVBB-NEXT: vslideup.vx v0, v9, a3
+; ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v10, a1
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: li a1, 12
@@ -3676,23 +3657,21 @@ define <vscale x 112 x i1> @vector_interleave_nxv112i1_nxv16i1(<vscale x 16 x i1
; CHECK-NEXT: sub sp, sp, a0
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
; CHECK-NEXT: vmv.v.i v14, 0
-; CHECK-NEXT: addi a4, sp, 16
+; CHECK-NEXT: addi a3, sp, 16
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a1, a0, 3
; CHECK-NEXT: sub a0, a1, a0
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: vmerge.vim v16, v14, 1, v0
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v22, v14, 1, v0
-; CHECK-NEXT: add a3, a4, a2
-; CHECK-NEXT: srli a1, a2, 2
-; CHECK-NEXT: add a5, a0, a2
+; CHECK-NEXT: add a2, a3, a1
; CHECK-NEXT: vmv4r.v v24, v16
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vmerge.vim v18, v14, 1, v0
-; CHECK-NEXT: add a6, a3, a2
+; CHECK-NEXT: add a4, a2, a1
; CHECK-NEXT: vmv1r.v v25, v22
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vmerge.vim v8, v14, 1, v0
@@ -3704,41 +3683,41 @@ define <vscale x 112 x i1> @vector_interleave_nxv112i1_nxv16i1(<vscale x 16 x i1
; CHECK-NEXT: vmerge.vim v10, v14, 1, v0
; CHECK-NEXT: vmv1r.v v28, v20
; CHECK-NEXT: vmv1r.v v18, v23
-; CHECK-NEXT: add a7, a6, a2
+; CHECK-NEXT: add a5, a4, a1
; CHECK-NEXT: vmv1r.v v29, v10
; CHECK-NEXT: vmv1r.v v20, v9
; CHECK-NEXT: vmv1r.v v0, v13
; CHECK-NEXT: vmerge.vim v30, v14, 1, v0
; CHECK-NEXT: vmv1r.v v22, v11
-; CHECK-NEXT: vsetvli t0, zero, e8, m1, ta, ma
-; CHECK-NEXT: vsseg7e8.v v24, (a4)
+; CHECK-NEXT: vsetvli a6, zero, e8, m1, ta, ma
+; CHECK-NEXT: vsseg7e8.v v24, (a3)
; CHECK-NEXT: vmv1r.v v23, v31
; CHECK-NEXT: vsseg7e8.v v17, (a0)
-; CHECK-NEXT: vl1r.v v8, (a6)
-; CHECK-NEXT: add a6, a7, a2
-; CHECK-NEXT: vl1r.v v10, (a4)
-; CHECK-NEXT: add a4, a6, a2
-; CHECK-NEXT: vl1r.v v12, (a6)
-; CHECK-NEXT: add a6, a4, a2
-; CHECK-NEXT: vl1r.v v14, (a6)
-; CHECK-NEXT: add a6, a5, a2
-; CHECK-NEXT: vl1r.v v16, (a5)
-; CHECK-NEXT: add a5, a6, a2
-; CHECK-NEXT: vl1r.v v18, (a5)
-; CHECK-NEXT: add a5, a5, a2
-; CHECK-NEXT: vl1r.v v9, (a7)
-; CHECK-NEXT: add a7, a5, a2
-; CHECK-NEXT: vl1r.v v20, (a7)
-; CHECK-NEXT: add a7, a7, a2
-; CHECK-NEXT: srli a2, a2, 1
-; CHECK-NEXT: vl1r.v v11, (a3)
-; CHECK-NEXT: add a3, a1, a1
-; CHECK-NEXT: vl1r.v v13, (a4)
-; CHECK-NEXT: add a4, a2, a2
+; CHECK-NEXT: vl1r.v v8, (a4)
+; CHECK-NEXT: add a4, a5, a1
+; CHECK-NEXT: vl1r.v v10, (a3)
+; CHECK-NEXT: add a6, a4, a1
+; CHECK-NEXT: vl1r.v v12, (a4)
+; CHECK-NEXT: add a3, a6, a1
+; CHECK-NEXT: vl1r.v v14, (a3)
+; CHECK-NEXT: srli a3, a1, 1
+; CHECK-NEXT: vl1r.v v9, (a5)
+; CHECK-NEXT: add a4, a0, a1
+; CHECK-NEXT: vl1r.v v16, (a4)
+; CHECK-NEXT: add a4, a4, a1
+; CHECK-NEXT: vl1r.v v11, (a2)
+; CHECK-NEXT: add a2, a4, a1
+; CHECK-NEXT: vl1r.v v18, (a2)
+; CHECK-NEXT: add a2, a2, a1
+; CHECK-NEXT: vl1r.v v13, (a6)
+; CHECK-NEXT: add a5, a2, a1
+; CHECK-NEXT: vl1r.v v20, (a5)
+; CHECK-NEXT: add a5, a5, a1
+; CHECK-NEXT: srli a1, a1, 2
; CHECK-NEXT: vl1r.v v15, (a0)
-; CHECK-NEXT: vl1r.v v19, (a5)
-; CHECK-NEXT: vl1r.v v17, (a6)
-; CHECK-NEXT: vl1r.v v21, (a7)
+; CHECK-NEXT: vl1r.v v19, (a2)
+; CHECK-NEXT: vl1r.v v17, (a4)
+; CHECK-NEXT: vl1r.v v21, (a5)
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
; CHECK-NEXT: vmsne.vi v22, v8, 0
; CHECK-NEXT: vmsne.vi v0, v10, 0
@@ -3747,13 +3726,13 @@ define <vscale x 112 x i1> @vector_interleave_nxv112i1_nxv16i1(<vscale x 16 x i1
; CHECK-NEXT: vmsne.vi v11, v18, 0
; CHECK-NEXT: vmsne.vi v8, v16, 0
; CHECK-NEXT: vmsne.vi v12, v20, 0
-; CHECK-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vx v0, v22, a1
; CHECK-NEXT: vslideup.vx v9, v10, a1
; CHECK-NEXT: vslideup.vx v8, v11, a1
-; CHECK-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v0, v9, a2
-; CHECK-NEXT: vslideup.vx v8, v12, a2
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v0, v9, a3
+; CHECK-NEXT: vslideup.vx v8, v12, a3
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 14
; CHECK-NEXT: mul a0, a0, a1
@@ -3770,23 +3749,21 @@ define <vscale x 112 x i1> @vector_interleave_nxv112i1_nxv16i1(<vscale x 16 x i1
; ZVBB-NEXT: sub sp, sp, a0
; ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, ma
; ZVBB-NEXT: vmv.v.i v14, 0
-; ZVBB-NEXT: addi a4, sp, 16
+; ZVBB-NEXT: addi a3, sp, 16
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: slli a1, a0, 3
; ZVBB-NEXT: sub a0, a1, a0
; ZVBB-NEXT: add a0, sp, a0
; ZVBB-NEXT: addi a0, a0, 16
-; ZVBB-NEXT: csrr a2, vlenb
+; ZVBB-NEXT: csrr a1, vlenb
; ZVBB-NEXT: vmerge.vim v16, v14, 1, v0
; ZVBB-NEXT: vmv1r.v v0, v8
; ZVBB-NEXT: vmerge.vim v22, v14, 1, v0
-; ZVBB-NEXT: add a3, a4, a2
-; ZVBB-NEXT: srli a1, a2, 2
-; ZVBB-NEXT: add a5, a0, a2
+; ZVBB-NEXT: add a2, a3, a1
; ZVBB-NEXT: vmv4r.v v24, v16
; ZVBB-NEXT: vmv1r.v v0, v9
; ZVBB-NEXT: vmerge.vim v18, v14, 1, v0
-; ZVBB-NEXT: add a6, a3, a2
+; ZVBB-NEXT: add a4, a2, a1
; ZVBB-NEXT: vmv1r.v v25, v22
; ZVBB-NEXT: vmv1r.v v0, v10
; ZVBB-NEXT: vmerge.vim v8, v14, 1, v0
@@ -3798,41 +3775,41 @@ define <vscale x 112 x i1> @vector_interleave_nxv112i1_nxv16i1(<vscale x 16 x i1
; ZVBB-NEXT: vmerge.vim v10, v14, 1, v0
; ZVBB-NEXT: vmv1r.v v28, v20
; ZVBB-NEXT: vmv1r.v v18, v23
-; ZVBB-NEXT: add a7, a6, a2
+; ZVBB-NEXT: add a5, a4, a1
; ZVBB-NEXT: vmv1r.v v29, v10
; ZVBB-NEXT: vmv1r.v v20, v9
; ZVBB-NEXT: vmv1r.v v0, v13
; ZVBB-NEXT: vmerge.vim v30, v14, 1, v0
; ZVBB-NEXT: vmv1r.v v22, v11
-; ZVBB-NEXT: vsetvli t0, zero, e8, m1, ta, ma
-; ZVBB-NEXT: vsseg7e8.v v24, (a4)
+; ZVBB-NEXT: vsetvli a6, zero, e8, m1, ta, ma
+; ZVBB-NEXT: vsseg7e8.v v24, (a3)
; ZVBB-NEXT: vmv1r.v v23, v31
; ZVBB-NEXT: vsseg7e8.v v17, (a0)
-; ZVBB-NEXT: vl1r.v v8, (a6)
-; ZVBB-NEXT: add a6, a7, a2
-; ZVBB-NEXT: vl1r.v v10, (a4)
-; ZVBB-NEXT: add a4, a6, a2
-; ZVBB-NEXT: vl1r.v v12, (a6)
-; ZVBB-NEXT: add a6, a4, a2
-; ZVBB-NEXT: vl1r.v v14, (a6)
-; ZVBB-NEXT: add a6, a5, a2
-; ZVBB-NEXT: vl1r.v v16, (a5)
-; ZVBB-NEXT: add a5, a6, a2
-; ZVBB-NEXT: vl1r.v v18, (a5)
-; ZVBB-NEXT: add a5, a5, a2
-; ZVBB-NEXT: vl1r.v v9, (a7)
-; ZVBB-NEXT: add a7, a5, a2
-; ZVBB-NEXT: vl1r.v v20, (a7)
-; ZVBB-NEXT: add a7, a7, a2
-; ZVBB-NEXT: srli a2, a2, 1
-; ZVBB-NEXT: vl1r.v v11, (a3)
-; ZVBB-NEXT: add a3, a1, a1
-; ZVBB-NEXT: vl1r.v v13, (a4)
-; ZVBB-NEXT: add a4, a2, a2
+; ZVBB-NEXT: vl1r.v v8, (a4)
+; ZVBB-NEXT: add a4, a5, a1
+; ZVBB-NEXT: vl1r.v v10, (a3)
+; ZVBB-NEXT: add a6, a4, a1
+; ZVBB-NEXT: vl1r.v v12, (a4)
+; ZVBB-NEXT: add a3, a6, a1
+; ZVBB-NEXT: vl1r.v v14, (a3)
+; ZVBB-NEXT: srli a3, a1, 1
+; ZVBB-NEXT: vl1r.v v9, (a5)
+; ZVBB-NEXT: add a4, a0, a1
+; ZVBB-NEXT: vl1r.v v16, (a4)
+; ZVBB-NEXT: add a4, a4, a1
+; ZVBB-NEXT: vl1r.v v11, (a2)
+; ZVBB-NEXT: add a2, a4, a1
+; ZVBB-NEXT: vl1r.v v18, (a2)
+; ZVBB-NEXT: add a2, a2, a1
+; ZVBB-NEXT: vl1r.v v13, (a6)
+; ZVBB-NEXT: add a5, a2, a1
+; ZVBB-NEXT: vl1r.v v20, (a5)
+; ZVBB-NEXT: add a5, a5, a1
+; ZVBB-NEXT: srli a1, a1, 2
; ZVBB-NEXT: vl1r.v v15, (a0)
-; ZVBB-NEXT: vl1r.v v19, (a5)
-; ZVBB-NEXT: vl1r.v v17, (a6)
-; ZVBB-NEXT: vl1r.v v21, (a7)
+; ZVBB-NEXT: vl1r.v v19, (a2)
+; ZVBB-NEXT: vl1r.v v17, (a4)
+; ZVBB-NEXT: vl1r.v v21, (a5)
; ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, ma
; ZVBB-NEXT: vmsne.vi v22, v8, 0
; ZVBB-NEXT: vmsne.vi v0, v10, 0
@@ -3841,13 +3818,13 @@ define <vscale x 112 x i1> @vector_interleave_nxv112i1_nxv16i1(<vscale x 16 x i1
; ZVBB-NEXT: vmsne.vi v11, v18, 0
; ZVBB-NEXT: vmsne.vi v8, v16, 0
; ZVBB-NEXT: vmsne.vi v12, v20, 0
-; ZVBB-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
; ZVBB-NEXT: vslideup.vx v0, v22, a1
; ZVBB-NEXT: vslideup.vx v9, v10, a1
; ZVBB-NEXT: vslideup.vx v8, v11, a1
-; ZVBB-NEXT: vsetvli zero, a4, e8, m1, ta, ma
-; ZVBB-NEXT: vslideup.vx v0, v9, a2
-; ZVBB-NEXT: vslideup.vx v8, v12, a2
+; ZVBB-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; ZVBB-NEXT: vslideup.vx v0, v9, a3
+; ZVBB-NEXT: vslideup.vx v8, v12, a3
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: li a1, 14
; ZVBB-NEXT: mul a0, a0, a1
@@ -5569,54 +5546,52 @@ define <vscale x 128 x i1> @vector_interleave_nxv128i1_nxv16i1(<vscale x 16 x i1
; CHECK-NEXT: add a6, a4, a0
; CHECK-NEXT: add a7, a5, a0
; CHECK-NEXT: add t0, a6, a0
-; CHECK-NEXT: add t1, a7, a0
-; CHECK-NEXT: add t2, t0, a0
; CHECK-NEXT: vmv1r.v v20, v9
-; CHECK-NEXT: add t3, t1, a0
+; CHECK-NEXT: add t1, a7, a0
; CHECK-NEXT: vmv1r.v v22, v11
; CHECK-NEXT: vsseg8e8.v v16, (a1)
-; CHECK-NEXT: vl1r.v v10, (t1)
-; CHECK-NEXT: add t1, t2, a0
-; CHECK-NEXT: vl1r.v v12, (a5)
-; CHECK-NEXT: add a5, t3, a0
+; CHECK-NEXT: vl1r.v v8, (a5)
+; CHECK-NEXT: add a5, t0, a0
+; CHECK-NEXT: vl1r.v v12, (t1)
+; CHECK-NEXT: add t1, t1, a0
; CHECK-NEXT: vl1r.v v14, (a2)
-; CHECK-NEXT: add a2, t1, a0
+; CHECK-NEXT: add a2, a5, a0
+; CHECK-NEXT: vl1r.v v10, (a5)
+; CHECK-NEXT: add a5, t1, a0
; CHECK-NEXT: vl1r.v v16, (a5)
; CHECK-NEXT: add a5, a5, a0
-; CHECK-NEXT: vl1r.v v8, (a2)
-; CHECK-NEXT: add a2, a2, a0
-; CHECK-NEXT: vl1r.v v18, (t2)
; CHECK-NEXT: vl1r.v v17, (a5)
-; CHECK-NEXT: vl1r.v v11, (t3)
-; CHECK-NEXT: vl1r.v v13, (a7)
+; CHECK-NEXT: add a5, a2, a0
+; CHECK-NEXT: vl1r.v v18, (a5)
+; CHECK-NEXT: add a5, a5, a0
+; CHECK-NEXT: vl1r.v v13, (t1)
+; CHECK-NEXT: vl1r.v v9, (a7)
; CHECK-NEXT: vl1r.v v15, (a3)
; CHECK-NEXT: vsetvli a3, zero, e8, m2, ta, ma
; CHECK-NEXT: vmsne.vi v20, v16, 0
-; CHECK-NEXT: vmsne.vi v16, v10, 0
-; CHECK-NEXT: vl1r.v v10, (a6)
-; CHECK-NEXT: vmsne.vi v17, v12, 0
+; CHECK-NEXT: vmsne.vi v16, v12, 0
+; CHECK-NEXT: vl1r.v v12, (a6)
+; CHECK-NEXT: vmsne.vi v17, v8, 0
; CHECK-NEXT: vmsne.vi v0, v14, 0
-; CHECK-NEXT: vl1r.v v12, (a1)
-; CHECK-NEXT: vl1r.v v9, (a2)
-; CHECK-NEXT: vl1r.v v19, (t1)
-; CHECK-NEXT: vl1r.v v11, (t0)
-; CHECK-NEXT: vl1r.v v13, (a4)
-; CHECK-NEXT: vmsne.vi v14, v8, 0
+; CHECK-NEXT: vl1r.v v14, (a1)
+; CHECK-NEXT: vl1r.v v19, (a5)
+; CHECK-NEXT: vl1r.v v11, (a2)
+; CHECK-NEXT: vl1r.v v13, (t0)
+; CHECK-NEXT: vl1r.v v15, (a4)
; CHECK-NEXT: vmsne.vi v9, v18, 0
-; CHECK-NEXT: vmsne.vi v15, v10, 0
-; CHECK-NEXT: vmsne.vi v8, v12, 0
+; CHECK-NEXT: vmsne.vi v18, v10, 0
+; CHECK-NEXT: vmsne.vi v10, v12, 0
+; CHECK-NEXT: vmsne.vi v8, v14, 0
; CHECK-NEXT: srli a1, a0, 2
-; CHECK-NEXT: add a2, a1, a1
-; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vx v16, v20, a1
; CHECK-NEXT: vslideup.vx v0, v17, a1
-; CHECK-NEXT: vslideup.vx v9, v14, a1
-; CHECK-NEXT: vslideup.vx v8, v15, a1
+; CHECK-NEXT: vslideup.vx v18, v9, a1
+; CHECK-NEXT: vslideup.vx v8, v10, a1
; CHECK-NEXT: srli a0, a0, 1
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vslideup.vx v0, v16, a0
-; CHECK-NEXT: vslideup.vx v8, v9, a0
+; CHECK-NEXT: vslideup.vx v8, v18, a0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
@@ -5670,54 +5645,52 @@ define <vscale x 128 x i1> @vector_interleave_nxv128i1_nxv16i1(<vscale x 16 x i1
; ZVBB-NEXT: add a6, a4, a0
; ZVBB-NEXT: add a7, a5, a0
; ZVBB-NEXT: add t0, a6, a0
-; ZVBB-NEXT: add t1, a7, a0
-; ZVBB-NEXT: add t2, t0, a0
; ZVBB-NEXT: vmv1r.v v20, v9
-; ZVBB-NEXT: add t3, t1, a0
+; ZVBB-NEXT: add t1, a7, a0
; ZVBB-NEXT: vmv1r.v v22, v11
; ZVBB-NEXT: vsseg8e8.v v16, (a1)
-; ZVBB-NEXT: vl1r.v v10, (t1)
-; ZVBB-NEXT: add t1, t2, a0
-; ZVBB-NEXT: vl1r.v v12, (a5)
-; ZVBB-NEXT: add a5, t3, a0
+; ZVBB-NEXT: vl1r.v v8, (a5)
+; ZVBB-NEXT: add a5, t0, a0
+; ZVBB-NEXT: vl1r.v v12, (t1)
+; ZVBB-NEXT: add t1, t1, a0
; ZVBB-NEXT: vl1r.v v14, (a2)
-; ZVBB-NEXT: add a2, t1, a0
+; ZVBB-NEXT: add a2, a5, a0
+; ZVBB-NEXT: vl1r.v v10, (a5)
+; ZVBB-NEXT: add a5, t1, a0
; ZVBB-NEXT: vl1r.v v16, (a5)
; ZVBB-NEXT: add a5, a5, a0
-; ZVBB-NEXT: vl1r.v v8, (a2)
-; ZVBB-NEXT: add a2, a2, a0
-; ZVBB-NEXT: vl1r.v v18, (t2)
; ZVBB-NEXT: vl1r.v v17, (a5)
-; ZVBB-NEXT: vl1r.v v11, (t3)
-; ZVBB-NEXT: vl1r.v v13, (a7)
+; ZVBB-NEXT: add a5, a2, a0
+; ZVBB-NEXT: vl1r.v v18, (a5)
+; ZVBB-NEXT: add a5, a5, a0
+; ZVBB-NEXT: vl1r.v v13, (t1)
+; ZVBB-NEXT: vl1r.v v9, (a7)
; ZVBB-NEXT: vl1r.v v15, (a3)
; ZVBB-NEXT: vsetvli a3, zero, e8, m2, ta, ma
; ZVBB-NEXT: vmsne.vi v20, v16, 0
-; ZVBB-NEXT: vmsne.vi v16, v10, 0
-; ZVBB-NEXT: vl1r.v v10, (a6)
-; ZVBB-NEXT: vmsne.vi v17, v12, 0
+; ZVBB-NEXT: vmsne.vi v16, v12, 0
+; ZVBB-NEXT: vl1r.v v12, (a6)
+; ZVBB-NEXT: vmsne.vi v17, v8, 0
; ZVBB-NEXT: vmsne.vi v0, v14, 0
-; ZVBB-NEXT: vl1r.v v12, (a1)
-; ZVBB-NEXT: vl1r.v v9, (a2)
-; ZVBB-NEXT: vl1r.v v19, (t1)
-; ZVBB-NEXT: vl1r.v v11, (t0)
-; ZVBB-NEXT: vl1r.v v13, (a4)
-; ZVBB-NEXT: vmsne.vi v14, v8, 0
+; ZVBB-NEXT: vl1r.v v14, (a1)
+; ZVBB-NEXT: vl1r.v v19, (a5)
+; ZVBB-NEXT: vl1r.v v11, (a2)
+; ZVBB-NEXT: vl1r.v v13, (t0)
+; ZVBB-NEXT: vl1r.v v15, (a4)
; ZVBB-NEXT: vmsne.vi v9, v18, 0
-; ZVBB-NEXT: vmsne.vi v15, v10, 0
-; ZVBB-NEXT: vmsne.vi v8, v12, 0
+; ZVBB-NEXT: vmsne.vi v18, v10, 0
+; ZVBB-NEXT: vmsne.vi v10, v12, 0
+; ZVBB-NEXT: vmsne.vi v8, v14, 0
; ZVBB-NEXT: srli a1, a0, 2
-; ZVBB-NEXT: add a2, a1, a1
-; ZVBB-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
; ZVBB-NEXT: vslideup.vx v16, v20, a1
; ZVBB-NEXT: vslideup.vx v0, v17, a1
-; ZVBB-NEXT: vslideup.vx v9, v14, a1
-; ZVBB-NEXT: vslideup.vx v8, v15, a1
+; ZVBB-NEXT: vslideup.vx v18, v9, a1
+; ZVBB-NEXT: vslideup.vx v8, v10, a1
; ZVBB-NEXT: srli a0, a0, 1
-; ZVBB-NEXT: add a1, a0, a0
-; ZVBB-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; ZVBB-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v0, v16, a0
-; ZVBB-NEXT: vslideup.vx v8, v9, a0
+; ZVBB-NEXT: vslideup.vx v8, v18, a0
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: slli a0, a0, 4
; ZVBB-NEXT: add sp, sp, a0
@@ -6294,14 +6267,12 @@ define <vscale x 4 x bfloat> @vector_interleave_nxv4bf16_nxv2bf16(<vscale x 2 x
; V-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
; V-NEXT: vwaddu.vv v10, v8, v9
; V-NEXT: li a0, -1
-; V-NEXT: csrr a1, vlenb
; V-NEXT: vwmaccu.vx v10, a0, v9
-; V-NEXT: srli a1, a1, 2
-; V-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; V-NEXT: vslidedown.vx v8, v10, a1
-; V-NEXT: add a0, a1, a1
-; V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; V-NEXT: vslideup.vx v10, v8, a1
+; V-NEXT: csrr a0, vlenb
+; V-NEXT: srli a0, a0, 2
+; V-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; V-NEXT: vslidedown.vx v8, v10, a0
+; V-NEXT: vslideup.vx v10, v8, a0
; V-NEXT: vmv.v.v v8, v10
; V-NEXT: ret
;
@@ -6314,8 +6285,6 @@ define <vscale x 4 x bfloat> @vector_interleave_nxv4bf16_nxv2bf16(<vscale x 2 x
; ZVBB-NEXT: srli a0, a0, 2
; ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslidedown.vx v8, v10, a0
-; ZVBB-NEXT: add a1, a0, a0
-; ZVBB-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v10, v8, a0
; ZVBB-NEXT: vmv.v.v v8, v10
; ZVBB-NEXT: ret
@@ -6327,8 +6296,7 @@ define <vscale x 4 x bfloat> @vector_interleave_nxv4bf16_nxv2bf16(<vscale x 2 x
; ZIP-NEXT: ri.vzip2a.vv v10, v8, v9
; ZIP-NEXT: csrr a0, vlenb
; ZIP-NEXT: srli a0, a0, 2
-; ZIP-NEXT: add a1, a0, a0
-; ZIP-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; ZIP-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; ZIP-NEXT: vslideup.vx v10, v11, a0
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: ret
@@ -6374,14 +6342,12 @@ define <vscale x 4 x half> @vector_interleave_nxv4f16_nxv2f16(<vscale x 2 x half
; V-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
; V-NEXT: vwaddu.vv v10, v8, v9
; V-NEXT: li a0, -1
-; V-NEXT: csrr a1, vlenb
; V-NEXT: vwmaccu.vx v10, a0, v9
-; V-NEXT: srli a1, a1, 2
-; V-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; V-NEXT: vslidedown.vx v8, v10, a1
-; V-NEXT: add a0, a1, a1
-; V-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; V-NEXT: vslideup.vx v10, v8, a1
+; V-NEXT: csrr a0, vlenb
+; V-NEXT: srli a0, a0, 2
+; V-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; V-NEXT: vslidedown.vx v8, v10, a0
+; V-NEXT: vslideup.vx v10, v8, a0
; V-NEXT: vmv.v.v v8, v10
; V-NEXT: ret
;
@@ -6394,8 +6360,6 @@ define <vscale x 4 x half> @vector_interleave_nxv4f16_nxv2f16(<vscale x 2 x half
; ZVBB-NEXT: srli a0, a0, 2
; ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslidedown.vx v8, v10, a0
-; ZVBB-NEXT: add a1, a0, a0
-; ZVBB-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v10, v8, a0
; ZVBB-NEXT: vmv.v.v v8, v10
; ZVBB-NEXT: ret
@@ -6407,8 +6371,7 @@ define <vscale x 4 x half> @vector_interleave_nxv4f16_nxv2f16(<vscale x 2 x half
; ZIP-NEXT: ri.vzip2a.vv v10, v8, v9
; ZIP-NEXT: csrr a0, vlenb
; ZIP-NEXT: srli a0, a0, 2
-; ZIP-NEXT: add a1, a0, a0
-; ZIP-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; ZIP-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; ZIP-NEXT: vslideup.vx v10, v11, a0
; ZIP-NEXT: vmv.v.v v8, v10
; ZIP-NEXT: ret
@@ -6807,8 +6770,7 @@ define <vscale x 6 x half> @vector_interleave_nxv6f16_nxv2f16(<vscale x 2 x half
; CHECK-NEXT: vle16.v v9, (a3)
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: srli a1, a1, 2
-; CHECK-NEXT: add a0, a1, a1
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v9, a1
; CHECK-NEXT: add a2, a3, a2
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
@@ -6834,8 +6796,7 @@ define <vscale x 6 x half> @vector_interleave_nxv6f16_nxv2f16(<vscale x 2 x half
; ZVBB-NEXT: vle16.v v9, (a3)
; ZVBB-NEXT: vle16.v v8, (a0)
; ZVBB-NEXT: srli a1, a1, 2
-; ZVBB-NEXT: add a0, a1, a1
-; ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v9, a1
; ZVBB-NEXT: add a2, a3, a2
; ZVBB-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
@@ -6967,8 +6928,7 @@ define <vscale x 6 x bfloat> @vector_interleave_nxv6bf16_nxv2bf16(<vscale x 2 x
; CHECK-NEXT: vle16.v v9, (a3)
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: srli a1, a1, 2
-; CHECK-NEXT: add a0, a1, a1
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v9, a1
; CHECK-NEXT: add a2, a3, a2
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
@@ -6994,8 +6954,7 @@ define <vscale x 6 x bfloat> @vector_interleave_nxv6bf16_nxv2bf16(<vscale x 2 x
; ZVBB-NEXT: vle16.v v9, (a3)
; ZVBB-NEXT: vle16.v v8, (a0)
; ZVBB-NEXT: srli a1, a1, 2
-; ZVBB-NEXT: add a0, a1, a1
-; ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v9, a1
; ZVBB-NEXT: add a2, a3, a2
; ZVBB-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
@@ -7127,8 +7086,7 @@ define <vscale x 3 x float> @vector_interleave_nxv3f32_nxv1f32(<vscale x 1 x flo
; CHECK-NEXT: vle32.v v9, (a3)
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: srli a1, a1, 3
-; CHECK-NEXT: add a0, a1, a1
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v9, a1
; CHECK-NEXT: add a2, a3, a2
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
@@ -7154,8 +7112,7 @@ define <vscale x 3 x float> @vector_interleave_nxv3f32_nxv1f32(<vscale x 1 x flo
; ZVBB-NEXT: vle32.v v9, (a3)
; ZVBB-NEXT: vle32.v v8, (a0)
; ZVBB-NEXT: srli a1, a1, 3
-; ZVBB-NEXT: add a0, a1, a1
-; ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v9, a1
; ZVBB-NEXT: add a2, a3, a2
; ZVBB-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
@@ -7391,13 +7348,12 @@ define <vscale x 8 x half> @vector_interleave_nxv8f16_nxv2f16(<vscale x 2 x half
; CHECK-NEXT: vle16.v v9, (a4)
; CHECK-NEXT: vle16.v v8, (a2)
; CHECK-NEXT: srli a1, a1, 2
-; CHECK-NEXT: add a2, a1, a1
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v8, a1
-; CHECK-NEXT: vsetvli a4, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v10, (a3)
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v10, a1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 1
@@ -7422,13 +7378,12 @@ define <vscale x 8 x half> @vector_interleave_nxv8f16_nxv2f16(<vscale x 2 x half
; ZVBB-NEXT: vle16.v v9, (a4)
; ZVBB-NEXT: vle16.v v8, (a2)
; ZVBB-NEXT: srli a1, a1, 2
-; ZVBB-NEXT: add a2, a1, a1
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v9, v8, a1
-; ZVBB-NEXT: vsetvli a4, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v10, (a3)
; ZVBB-NEXT: vle16.v v8, (a0)
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v10, a1
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: slli a0, a0, 1
@@ -7559,13 +7514,12 @@ define <vscale x 8 x bfloat> @vector_interleave_nxv8bf16_nxv2bf16(<vscale x 2 x
; CHECK-NEXT: vle16.v v9, (a4)
; CHECK-NEXT: vle16.v v8, (a2)
; CHECK-NEXT: srli a1, a1, 2
-; CHECK-NEXT: add a2, a1, a1
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v8, a1
-; CHECK-NEXT: vsetvli a4, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v10, (a3)
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v10, a1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 1
@@ -7590,13 +7544,12 @@ define <vscale x 8 x bfloat> @vector_interleave_nxv8bf16_nxv2bf16(<vscale x 2 x
; ZVBB-NEXT: vle16.v v9, (a4)
; ZVBB-NEXT: vle16.v v8, (a2)
; ZVBB-NEXT: srli a1, a1, 2
-; ZVBB-NEXT: add a2, a1, a1
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v9, v8, a1
-; ZVBB-NEXT: vsetvli a4, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v10, (a3)
; ZVBB-NEXT: vle16.v v8, (a0)
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v10, a1
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: slli a0, a0, 1
@@ -7727,13 +7680,12 @@ define <vscale x 4 x float> @vector_interleave_nxv4f32_nxv1f32(<vscale x 1 x flo
; CHECK-NEXT: vle32.v v9, (a4)
; CHECK-NEXT: vle32.v v8, (a2)
; CHECK-NEXT: srli a1, a1, 3
-; CHECK-NEXT: add a2, a1, a1
-; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v8, a1
-; CHECK-NEXT: vsetvli a4, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v10, (a3)
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v10, a1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 1
@@ -7758,13 +7710,12 @@ define <vscale x 4 x float> @vector_interleave_nxv4f32_nxv1f32(<vscale x 1 x flo
; ZVBB-NEXT: vle32.v v9, (a4)
; ZVBB-NEXT: vle32.v v8, (a2)
; ZVBB-NEXT: srli a1, a1, 3
-; ZVBB-NEXT: add a2, a1, a1
-; ZVBB-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v9, v8, a1
-; ZVBB-NEXT: vsetvli a4, zero, e32, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
; ZVBB-NEXT: vle32.v v10, (a3)
; ZVBB-NEXT: vle32.v v8, (a0)
-; ZVBB-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v10, a1
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: slli a0, a0, 1
@@ -7998,13 +7949,12 @@ define <vscale x 10 x half> @vector_interleave_nxv10f16_nxv2f16(<vscale x 2 x ha
; CHECK-NEXT: vle16.v v8, (a5)
; CHECK-NEXT: vle16.v v9, (a4)
; CHECK-NEXT: srli a1, a1, 2
-; CHECK-NEXT: add a4, a1, a1
; CHECK-NEXT: vle16.v v10, (a3)
-; CHECK-NEXT: vsetvli zero, a4, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v8, a1
; CHECK-NEXT: vsetvli a3, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a4, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v10, a1
; CHECK-NEXT: add a2, a5, a2
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
@@ -8034,13 +7984,12 @@ define <vscale x 10 x half> @vector_interleave_nxv10f16_nxv2f16(<vscale x 2 x ha
; ZVBB-NEXT: vle16.v v8, (a5)
; ZVBB-NEXT: vle16.v v9, (a4)
; ZVBB-NEXT: srli a1, a1, 2
-; ZVBB-NEXT: add a4, a1, a1
; ZVBB-NEXT: vle16.v v10, (a3)
-; ZVBB-NEXT: vsetvli zero, a4, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a3, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v9, v8, a1
; ZVBB-NEXT: vsetvli a3, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v8, (a0)
-; ZVBB-NEXT: vsetvli zero, a4, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v10, a1
; ZVBB-NEXT: add a2, a5, a2
; ZVBB-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
@@ -8466,13 +8415,12 @@ define <vscale x 10 x bfloat> @vector_interleave_nxv10bf16_nxv2bf16(<vscale x 2
; CHECK-NEXT: vle16.v v8, (a5)
; CHECK-NEXT: vle16.v v9, (a4)
; CHECK-NEXT: srli a1, a1, 2
-; CHECK-NEXT: add a4, a1, a1
; CHECK-NEXT: vle16.v v10, (a3)
-; CHECK-NEXT: vsetvli zero, a4, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v8, a1
; CHECK-NEXT: vsetvli a3, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a4, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v10, a1
; CHECK-NEXT: add a2, a5, a2
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
@@ -8502,13 +8450,12 @@ define <vscale x 10 x bfloat> @vector_interleave_nxv10bf16_nxv2bf16(<vscale x 2
; ZVBB-NEXT: vle16.v v8, (a5)
; ZVBB-NEXT: vle16.v v9, (a4)
; ZVBB-NEXT: srli a1, a1, 2
-; ZVBB-NEXT: add a4, a1, a1
; ZVBB-NEXT: vle16.v v10, (a3)
-; ZVBB-NEXT: vsetvli zero, a4, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a3, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v9, v8, a1
; ZVBB-NEXT: vsetvli a3, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v8, (a0)
-; ZVBB-NEXT: vsetvli zero, a4, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v10, a1
; ZVBB-NEXT: add a2, a5, a2
; ZVBB-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
@@ -8934,13 +8881,12 @@ define <vscale x 5 x float> @vector_interleave_nxv5f32_nxv1f32(<vscale x 1 x flo
; CHECK-NEXT: vle32.v v8, (a5)
; CHECK-NEXT: vle32.v v9, (a4)
; CHECK-NEXT: srli a1, a1, 3
-; CHECK-NEXT: add a4, a1, a1
; CHECK-NEXT: vle32.v v10, (a3)
-; CHECK-NEXT: vsetvli zero, a4, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v8, a1
; CHECK-NEXT: vsetvli a3, zero, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a4, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v10, a1
; CHECK-NEXT: add a2, a5, a2
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
@@ -8970,13 +8916,12 @@ define <vscale x 5 x float> @vector_interleave_nxv5f32_nxv1f32(<vscale x 1 x flo
; ZVBB-NEXT: vle32.v v8, (a5)
; ZVBB-NEXT: vle32.v v9, (a4)
; ZVBB-NEXT: srli a1, a1, 3
-; ZVBB-NEXT: add a4, a1, a1
; ZVBB-NEXT: vle32.v v10, (a3)
-; ZVBB-NEXT: vsetvli zero, a4, e32, m1, ta, ma
+; ZVBB-NEXT: vsetvli a3, zero, e32, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v9, v8, a1
; ZVBB-NEXT: vsetvli a3, zero, e32, mf2, ta, ma
; ZVBB-NEXT: vle32.v v8, (a0)
-; ZVBB-NEXT: vsetvli zero, a4, e32, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v10, a1
; ZVBB-NEXT: add a2, a5, a2
; ZVBB-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
@@ -9796,18 +9741,17 @@ define <vscale x 12 x half> @vector_interleave_nxv12f16_nxv2f16(<vscale x 2 x ha
; CHECK-NEXT: vle16.v v10, (a6)
; CHECK-NEXT: vle16.v v8, (a2)
; CHECK-NEXT: srli a1, a1, 2
-; CHECK-NEXT: add a2, a1, a1
; CHECK-NEXT: vle16.v v11, (a5)
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v10, v8, a1
-; CHECK-NEXT: vsetvli a5, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v9, (a4)
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v11, a1
-; CHECK-NEXT: vsetvli a4, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v11, (a3)
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v11, a1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a1, a0, 1
@@ -9836,18 +9780,17 @@ define <vscale x 12 x half> @vector_interleave_nxv12f16_nxv2f16(<vscale x 2 x ha
; ZVBB-NEXT: vle16.v v10, (a6)
; ZVBB-NEXT: vle16.v v8, (a2)
; ZVBB-NEXT: srli a1, a1, 2
-; ZVBB-NEXT: add a2, a1, a1
; ZVBB-NEXT: vle16.v v11, (a5)
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v10, v8, a1
-; ZVBB-NEXT: vsetvli a5, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v9, (a4)
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v9, v11, a1
-; ZVBB-NEXT: vsetvli a4, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v11, (a3)
; ZVBB-NEXT: vle16.v v8, (a0)
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v11, a1
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: slli a1, a0, 1
@@ -10311,18 +10254,17 @@ define <vscale x 12 x bfloat> @vector_interleave_nxv12bf16_nxv2bf16(<vscale x 2
; CHECK-NEXT: vle16.v v10, (a6)
; CHECK-NEXT: vle16.v v8, (a2)
; CHECK-NEXT: srli a1, a1, 2
-; CHECK-NEXT: add a2, a1, a1
; CHECK-NEXT: vle16.v v11, (a5)
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v10, v8, a1
-; CHECK-NEXT: vsetvli a5, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v9, (a4)
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v11, a1
-; CHECK-NEXT: vsetvli a4, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v11, (a3)
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v11, a1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a1, a0, 1
@@ -10351,18 +10293,17 @@ define <vscale x 12 x bfloat> @vector_interleave_nxv12bf16_nxv2bf16(<vscale x 2
; ZVBB-NEXT: vle16.v v10, (a6)
; ZVBB-NEXT: vle16.v v8, (a2)
; ZVBB-NEXT: srli a1, a1, 2
-; ZVBB-NEXT: add a2, a1, a1
; ZVBB-NEXT: vle16.v v11, (a5)
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v10, v8, a1
-; ZVBB-NEXT: vsetvli a5, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v9, (a4)
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v9, v11, a1
-; ZVBB-NEXT: vsetvli a4, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v11, (a3)
; ZVBB-NEXT: vle16.v v8, (a0)
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v11, a1
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: slli a1, a0, 1
@@ -10826,18 +10767,17 @@ define <vscale x 6 x float> @vector_interleave_nxv6f32_nxv1f32(<vscale x 1 x flo
; CHECK-NEXT: vle32.v v10, (a6)
; CHECK-NEXT: vle32.v v8, (a2)
; CHECK-NEXT: srli a1, a1, 3
-; CHECK-NEXT: add a2, a1, a1
; CHECK-NEXT: vle32.v v11, (a5)
-; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v10, v8, a1
-; CHECK-NEXT: vsetvli a5, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v9, (a4)
-; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v11, a1
-; CHECK-NEXT: vsetvli a4, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v11, (a3)
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v11, a1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a1, a0, 1
@@ -10866,18 +10806,17 @@ define <vscale x 6 x float> @vector_interleave_nxv6f32_nxv1f32(<vscale x 1 x flo
; ZVBB-NEXT: vle32.v v10, (a6)
; ZVBB-NEXT: vle32.v v8, (a2)
; ZVBB-NEXT: srli a1, a1, 3
-; ZVBB-NEXT: add a2, a1, a1
; ZVBB-NEXT: vle32.v v11, (a5)
-; ZVBB-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v10, v8, a1
-; ZVBB-NEXT: vsetvli a5, zero, e32, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
; ZVBB-NEXT: vle32.v v9, (a4)
-; ZVBB-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v9, v11, a1
-; ZVBB-NEXT: vsetvli a4, zero, e32, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
; ZVBB-NEXT: vle32.v v11, (a3)
; ZVBB-NEXT: vle32.v v8, (a0)
-; ZVBB-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v11, a1
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: slli a1, a0, 1
@@ -11761,7 +11700,6 @@ define <vscale x 14 x half> @vector_interleave_nxv14f16_nxv2f16(<vscale x 2 x ha
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a2, a1, 1
-; CHECK-NEXT: srli a1, a1, 2
; CHECK-NEXT: add a3, a0, a2
; CHECK-NEXT: add a4, a3, a2
; CHECK-NEXT: add a5, a4, a2
@@ -11771,20 +11709,20 @@ define <vscale x 14 x half> @vector_interleave_nxv14f16_nxv2f16(<vscale x 2 x ha
; CHECK-NEXT: add a7, a6, a2
; CHECK-NEXT: vle16.v v8, (a7)
; CHECK-NEXT: vle16.v v10, (a6)
-; CHECK-NEXT: add a6, a1, a1
+; CHECK-NEXT: srli a1, a1, 2
; CHECK-NEXT: add a2, a7, a2
; CHECK-NEXT: vle16.v v12, (a5)
-; CHECK-NEXT: vsetvli zero, a6, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a5, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v10, v8, a1
; CHECK-NEXT: vsetvli a5, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v11, (a2)
; CHECK-NEXT: vle16.v v9, (a4)
-; CHECK-NEXT: vsetvli zero, a6, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v12, a1
; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v12, (a3)
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a6, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v12, a1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 2
@@ -11801,7 +11739,6 @@ define <vscale x 14 x half> @vector_interleave_nxv14f16_nxv2f16(<vscale x 2 x ha
; ZVBB-NEXT: addi a0, sp, 16
; ZVBB-NEXT: csrr a1, vlenb
; ZVBB-NEXT: srli a2, a1, 1
-; ZVBB-NEXT: srli a1, a1, 2
; ZVBB-NEXT: add a3, a0, a2
; ZVBB-NEXT: add a4, a3, a2
; ZVBB-NEXT: add a5, a4, a2
@@ -11811,20 +11748,20 @@ define <vscale x 14 x half> @vector_interleave_nxv14f16_nxv2f16(<vscale x 2 x ha
; ZVBB-NEXT: add a7, a6, a2
; ZVBB-NEXT: vle16.v v8, (a7)
; ZVBB-NEXT: vle16.v v10, (a6)
-; ZVBB-NEXT: add a6, a1, a1
+; ZVBB-NEXT: srli a1, a1, 2
; ZVBB-NEXT: add a2, a7, a2
; ZVBB-NEXT: vle16.v v12, (a5)
-; ZVBB-NEXT: vsetvli zero, a6, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a5, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v10, v8, a1
; ZVBB-NEXT: vsetvli a5, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v11, (a2)
; ZVBB-NEXT: vle16.v v9, (a4)
-; ZVBB-NEXT: vsetvli zero, a6, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v9, v12, a1
; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v12, (a3)
; ZVBB-NEXT: vle16.v v8, (a0)
-; ZVBB-NEXT: vsetvli zero, a6, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v12, a1
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: slli a0, a0, 2
@@ -12325,7 +12262,6 @@ define <vscale x 14 x bfloat> @vector_interleave_nxv14bf16_nxv2bf16(<vscale x 2
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a2, a1, 1
-; CHECK-NEXT: srli a1, a1, 2
; CHECK-NEXT: add a3, a0, a2
; CHECK-NEXT: add a4, a3, a2
; CHECK-NEXT: add a5, a4, a2
@@ -12335,20 +12271,20 @@ define <vscale x 14 x bfloat> @vector_interleave_nxv14bf16_nxv2bf16(<vscale x 2
; CHECK-NEXT: add a7, a6, a2
; CHECK-NEXT: vle16.v v8, (a7)
; CHECK-NEXT: vle16.v v10, (a6)
-; CHECK-NEXT: add a6, a1, a1
+; CHECK-NEXT: srli a1, a1, 2
; CHECK-NEXT: add a2, a7, a2
; CHECK-NEXT: vle16.v v12, (a5)
-; CHECK-NEXT: vsetvli zero, a6, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a5, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v10, v8, a1
; CHECK-NEXT: vsetvli a5, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v11, (a2)
; CHECK-NEXT: vle16.v v9, (a4)
-; CHECK-NEXT: vsetvli zero, a6, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v12, a1
; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v12, (a3)
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a6, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v12, a1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 2
@@ -12365,7 +12301,6 @@ define <vscale x 14 x bfloat> @vector_interleave_nxv14bf16_nxv2bf16(<vscale x 2
; ZVBB-NEXT: addi a0, sp, 16
; ZVBB-NEXT: csrr a1, vlenb
; ZVBB-NEXT: srli a2, a1, 1
-; ZVBB-NEXT: srli a1, a1, 2
; ZVBB-NEXT: add a3, a0, a2
; ZVBB-NEXT: add a4, a3, a2
; ZVBB-NEXT: add a5, a4, a2
@@ -12375,20 +12310,20 @@ define <vscale x 14 x bfloat> @vector_interleave_nxv14bf16_nxv2bf16(<vscale x 2
; ZVBB-NEXT: add a7, a6, a2
; ZVBB-NEXT: vle16.v v8, (a7)
; ZVBB-NEXT: vle16.v v10, (a6)
-; ZVBB-NEXT: add a6, a1, a1
+; ZVBB-NEXT: srli a1, a1, 2
; ZVBB-NEXT: add a2, a7, a2
; ZVBB-NEXT: vle16.v v12, (a5)
-; ZVBB-NEXT: vsetvli zero, a6, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a5, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v10, v8, a1
; ZVBB-NEXT: vsetvli a5, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v11, (a2)
; ZVBB-NEXT: vle16.v v9, (a4)
-; ZVBB-NEXT: vsetvli zero, a6, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v9, v12, a1
; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v12, (a3)
; ZVBB-NEXT: vle16.v v8, (a0)
-; ZVBB-NEXT: vsetvli zero, a6, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v12, a1
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: slli a0, a0, 2
@@ -12889,7 +12824,6 @@ define <vscale x 7 x float> @vector_interleave_nxv7f32_nxv1f32(<vscale x 1 x flo
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a2, a1, 1
-; CHECK-NEXT: srli a1, a1, 3
; CHECK-NEXT: add a3, a0, a2
; CHECK-NEXT: add a4, a3, a2
; CHECK-NEXT: add a5, a4, a2
@@ -12899,20 +12833,20 @@ define <vscale x 7 x float> @vector_interleave_nxv7f32_nxv1f32(<vscale x 1 x flo
; CHECK-NEXT: add a7, a6, a2
; CHECK-NEXT: vle32.v v8, (a7)
; CHECK-NEXT: vle32.v v10, (a6)
-; CHECK-NEXT: add a6, a1, a1
+; CHECK-NEXT: srli a1, a1, 3
; CHECK-NEXT: add a2, a7, a2
; CHECK-NEXT: vle32.v v12, (a5)
-; CHECK-NEXT: vsetvli zero, a6, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v10, v8, a1
; CHECK-NEXT: vsetvli a5, zero, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v11, (a2)
; CHECK-NEXT: vle32.v v9, (a4)
-; CHECK-NEXT: vsetvli zero, a6, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v12, a1
; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v12, (a3)
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a6, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v12, a1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 2
@@ -12929,7 +12863,6 @@ define <vscale x 7 x float> @vector_interleave_nxv7f32_nxv1f32(<vscale x 1 x flo
; ZVBB-NEXT: addi a0, sp, 16
; ZVBB-NEXT: csrr a1, vlenb
; ZVBB-NEXT: srli a2, a1, 1
-; ZVBB-NEXT: srli a1, a1, 3
; ZVBB-NEXT: add a3, a0, a2
; ZVBB-NEXT: add a4, a3, a2
; ZVBB-NEXT: add a5, a4, a2
@@ -12939,20 +12872,20 @@ define <vscale x 7 x float> @vector_interleave_nxv7f32_nxv1f32(<vscale x 1 x flo
; ZVBB-NEXT: add a7, a6, a2
; ZVBB-NEXT: vle32.v v8, (a7)
; ZVBB-NEXT: vle32.v v10, (a6)
-; ZVBB-NEXT: add a6, a1, a1
+; ZVBB-NEXT: srli a1, a1, 3
; ZVBB-NEXT: add a2, a7, a2
; ZVBB-NEXT: vle32.v v12, (a5)
-; ZVBB-NEXT: vsetvli zero, a6, e32, m1, ta, ma
+; ZVBB-NEXT: vsetvli a5, zero, e32, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v10, v8, a1
; ZVBB-NEXT: vsetvli a5, zero, e32, mf2, ta, ma
; ZVBB-NEXT: vle32.v v11, (a2)
; ZVBB-NEXT: vle32.v v9, (a4)
-; ZVBB-NEXT: vsetvli zero, a6, e32, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v9, v12, a1
; ZVBB-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
; ZVBB-NEXT: vle32.v v12, (a3)
; ZVBB-NEXT: vle32.v v8, (a0)
-; ZVBB-NEXT: vsetvli zero, a6, e32, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v12, a1
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: slli a0, a0, 2
@@ -13945,23 +13878,22 @@ define <vscale x 16 x half> @vector_interleave_nxv16f16_nxv2f16(<vscale x 2 x ha
; CHECK-NEXT: vle16.v v11, (t0)
; CHECK-NEXT: vle16.v v8, (a2)
; CHECK-NEXT: srli a1, a1, 2
-; CHECK-NEXT: add a2, a1, a1
; CHECK-NEXT: vle16.v v9, (a7)
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v11, v8, a1
-; CHECK-NEXT: vsetvli a7, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v10, (a6)
; CHECK-NEXT: vle16.v v8, (a5)
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v10, v9, a1
-; CHECK-NEXT: vsetvli a5, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v9, (a4)
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v8, a1
-; CHECK-NEXT: vsetvli a4, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v12, (a3)
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v12, a1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 2
@@ -13990,23 +13922,22 @@ define <vscale x 16 x half> @vector_interleave_nxv16f16_nxv2f16(<vscale x 2 x ha
; ZVBB-NEXT: vle16.v v11, (t0)
; ZVBB-NEXT: vle16.v v8, (a2)
; ZVBB-NEXT: srli a1, a1, 2
-; ZVBB-NEXT: add a2, a1, a1
; ZVBB-NEXT: vle16.v v9, (a7)
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v11, v8, a1
-; ZVBB-NEXT: vsetvli a7, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v10, (a6)
; ZVBB-NEXT: vle16.v v8, (a5)
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v10, v9, a1
-; ZVBB-NEXT: vsetvli a5, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v9, (a4)
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v9, v8, a1
-; ZVBB-NEXT: vsetvli a4, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v12, (a3)
; ZVBB-NEXT: vle16.v v8, (a0)
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v12, a1
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: slli a0, a0, 2
@@ -14243,23 +14174,22 @@ define <vscale x 16 x bfloat> @vector_interleave_nxv16bf16_nxv2bf16(<vscale x 2
; CHECK-NEXT: vle16.v v11, (t0)
; CHECK-NEXT: vle16.v v8, (a2)
; CHECK-NEXT: srli a1, a1, 2
-; CHECK-NEXT: add a2, a1, a1
; CHECK-NEXT: vle16.v v9, (a7)
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v11, v8, a1
-; CHECK-NEXT: vsetvli a7, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v10, (a6)
; CHECK-NEXT: vle16.v v8, (a5)
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v10, v9, a1
-; CHECK-NEXT: vsetvli a5, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v9, (a4)
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v8, a1
-; CHECK-NEXT: vsetvli a4, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v12, (a3)
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v12, a1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 2
@@ -14288,23 +14218,22 @@ define <vscale x 16 x bfloat> @vector_interleave_nxv16bf16_nxv2bf16(<vscale x 2
; ZVBB-NEXT: vle16.v v11, (t0)
; ZVBB-NEXT: vle16.v v8, (a2)
; ZVBB-NEXT: srli a1, a1, 2
-; ZVBB-NEXT: add a2, a1, a1
; ZVBB-NEXT: vle16.v v9, (a7)
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v11, v8, a1
-; ZVBB-NEXT: vsetvli a7, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v10, (a6)
; ZVBB-NEXT: vle16.v v8, (a5)
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v10, v9, a1
-; ZVBB-NEXT: vsetvli a5, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v9, (a4)
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v9, v8, a1
-; ZVBB-NEXT: vsetvli a4, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
; ZVBB-NEXT: vle16.v v12, (a3)
; ZVBB-NEXT: vle16.v v8, (a0)
-; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v12, a1
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: slli a0, a0, 2
@@ -14541,23 +14470,22 @@ define <vscale x 8 x float> @vector_interleave_nxv8f32_nxv1f32(<vscale x 1 x flo
; CHECK-NEXT: vle32.v v11, (t0)
; CHECK-NEXT: vle32.v v8, (a2)
; CHECK-NEXT: srli a1, a1, 3
-; CHECK-NEXT: add a2, a1, a1
; CHECK-NEXT: vle32.v v9, (a7)
-; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v11, v8, a1
-; CHECK-NEXT: vsetvli a7, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v10, (a6)
; CHECK-NEXT: vle32.v v8, (a5)
-; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v10, v9, a1
-; CHECK-NEXT: vsetvli a5, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v9, (a4)
-; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v9, v8, a1
-; CHECK-NEXT: vsetvli a4, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v12, (a3)
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v12, a1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 2
@@ -14586,23 +14514,22 @@ define <vscale x 8 x float> @vector_interleave_nxv8f32_nxv1f32(<vscale x 1 x flo
; ZVBB-NEXT: vle32.v v11, (t0)
; ZVBB-NEXT: vle32.v v8, (a2)
; ZVBB-NEXT: srli a1, a1, 3
-; ZVBB-NEXT: add a2, a1, a1
; ZVBB-NEXT: vle32.v v9, (a7)
-; ZVBB-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v11, v8, a1
-; ZVBB-NEXT: vsetvli a7, zero, e32, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
; ZVBB-NEXT: vle32.v v10, (a6)
; ZVBB-NEXT: vle32.v v8, (a5)
-; ZVBB-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v10, v9, a1
-; ZVBB-NEXT: vsetvli a5, zero, e32, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
; ZVBB-NEXT: vle32.v v9, (a4)
-; ZVBB-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v9, v8, a1
-; ZVBB-NEXT: vsetvli a4, zero, e32, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
; ZVBB-NEXT: vle32.v v12, (a3)
; ZVBB-NEXT: vle32.v v8, (a0)
-; ZVBB-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v8, v12, a1
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: slli a0, a0, 2
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll
index df7af4d8..111fa36 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll
@@ -634,12 +634,11 @@ define <vscale x 32 x i1> @vfptosi_nxv32bf16_nxv32i1(<vscale x 32 x bfloat> %va)
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v16
; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v24
-; CHECK-NEXT: add a1, a0, a0
; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vand.vi v12, v12, 1
; CHECK-NEXT: vmsne.vi v16, v8, 0
; CHECK-NEXT: vmsne.vi v0, v12, 0
-; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vx v0, v16, a0
; CHECK-NEXT: ret
%evec = fptosi <vscale x 32 x bfloat> %va to <vscale x 32 x i1>
@@ -656,12 +655,11 @@ define <vscale x 32 x i1> @vfptoui_nxv32bf16_nxv32i1(<vscale x 32 x bfloat> %va)
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v16
; CHECK-NEXT: vfncvt.rtz.xu.f.w v12, v24
-; CHECK-NEXT: add a1, a0, a0
; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vand.vi v12, v12, 1
; CHECK-NEXT: vmsne.vi v16, v8, 0
; CHECK-NEXT: vmsne.vi v0, v12, 0
-; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vx v0, v16, a0
; CHECK-NEXT: ret
%evec = fptoui <vscale x 32 x bfloat> %va to <vscale x 32 x i1>
@@ -1654,12 +1652,11 @@ define <vscale x 32 x i1> @vfptosi_nxv32f16_nxv32i1(<vscale x 32 x half> %va) {
; ZVFHMIN-NEXT: srli a0, a0, 2
; ZVFHMIN-NEXT: vfncvt.rtz.x.f.w v8, v16
; ZVFHMIN-NEXT: vfncvt.rtz.x.f.w v12, v24
-; ZVFHMIN-NEXT: add a1, a0, a0
; ZVFHMIN-NEXT: vand.vi v8, v8, 1
; ZVFHMIN-NEXT: vand.vi v12, v12, 1
; ZVFHMIN-NEXT: vmsne.vi v16, v8, 0
; ZVFHMIN-NEXT: vmsne.vi v0, v12, 0
-; ZVFHMIN-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslideup.vx v0, v16, a0
; ZVFHMIN-NEXT: ret
%evec = fptosi <vscale x 32 x half> %va to <vscale x 32 x i1>
@@ -1684,12 +1681,11 @@ define <vscale x 32 x i1> @vfptoui_nxv32f16_nxv32i1(<vscale x 32 x half> %va) {
; ZVFHMIN-NEXT: srli a0, a0, 2
; ZVFHMIN-NEXT: vfncvt.rtz.xu.f.w v8, v16
; ZVFHMIN-NEXT: vfncvt.rtz.xu.f.w v12, v24
-; ZVFHMIN-NEXT: add a1, a0, a0
; ZVFHMIN-NEXT: vand.vi v8, v8, 1
; ZVFHMIN-NEXT: vand.vi v12, v12, 1
; ZVFHMIN-NEXT: vmsne.vi v16, v8, 0
; ZVFHMIN-NEXT: vmsne.vi v0, v12, 0
-; ZVFHMIN-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslideup.vx v0, v16, a0
; ZVFHMIN-NEXT: ret
%evec = fptoui <vscale x 32 x half> %va to <vscale x 32 x i1>
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
index 142ee52..1868154 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
@@ -567,38 +567,37 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract(<vscale x 2 x i1> %
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
; RV32-NEXT: vmv1r.v v8, v0
-; RV32-NEXT: slli a2, a1, 1
; RV32-NEXT: vmv.v.i v9, 0
-; RV32-NEXT: li a1, -1
+; RV32-NEXT: li a2, -1
; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
; RV32-NEXT: vmv.v.i v10, 0
-; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
+; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
; RV32-NEXT: vmerge.vim v11, v9, 1, v0
-; RV32-NEXT: srli a3, a3, 2
; RV32-NEXT: vwaddu.vv v12, v11, v11
-; RV32-NEXT: vwmaccu.vx v12, a1, v11
+; RV32-NEXT: vwmaccu.vx v12, a2, v11
+; RV32-NEXT: csrr a2, vlenb
+; RV32-NEXT: srli a2, a2, 2
; RV32-NEXT: vmsne.vi v0, v12, 0
-; RV32-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; RV32-NEXT: vslidedown.vx v11, v12, a3
+; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; RV32-NEXT: vslidedown.vx v11, v12, a2
; RV32-NEXT: vmerge.vim v10, v10, 1, v0
-; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
+; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
; RV32-NEXT: vmsne.vi v0, v11, 0
-; RV32-NEXT: add a1, a3, a3
+; RV32-NEXT: slli a3, a1, 1
; RV32-NEXT: vmerge.vim v9, v9, 1, v0
-; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; RV32-NEXT: vslideup.vx v10, v9, a3
-; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
+; RV32-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; RV32-NEXT: vslideup.vx v10, v9, a2
+; RV32-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
; RV32-NEXT: vmsne.vi v0, v10, 0
; RV32-NEXT: vle32.v v10, (a0), v0.t
; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma
+; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; RV32-NEXT: vnsrl.wx v13, v10, a1
; RV32-NEXT: vmv.x.s a1, v10
; RV32-NEXT: vnsrl.wi v12, v10, 0
-; RV32-NEXT: srli a2, a2, 1
+; RV32-NEXT: srli a3, a3, 1
; RV32-NEXT: vmv1r.v v0, v8
-; RV32-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; RV32-NEXT: vsetvli zero, a3, e32, m1, ta, ma
; RV32-NEXT: vsseg2e32.v v12, (a0), v0.t
; RV32-NEXT: mv a0, a1
; RV32-NEXT: ret
@@ -611,26 +610,24 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract(<vscale x 2 x i1> %
; RV64-NEXT: li a2, -1
; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
; RV64-NEXT: vmv.v.i v10, 0
-; RV64-NEXT: csrr a3, vlenb
-; RV64-NEXT: slli a4, a1, 33
-; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
+; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
; RV64-NEXT: vmerge.vim v11, v9, 1, v0
-; RV64-NEXT: srli a3, a3, 2
; RV64-NEXT: vwaddu.vv v12, v11, v11
; RV64-NEXT: vwmaccu.vx v12, a2, v11
+; RV64-NEXT: csrr a2, vlenb
+; RV64-NEXT: srli a2, a2, 2
; RV64-NEXT: vmsne.vi v0, v12, 0
-; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; RV64-NEXT: vslidedown.vx v11, v12, a3
+; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; RV64-NEXT: vslidedown.vx v11, v12, a2
; RV64-NEXT: vmerge.vim v10, v10, 1, v0
-; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
+; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
; RV64-NEXT: vmsne.vi v0, v11, 0
-; RV64-NEXT: add a1, a3, a3
+; RV64-NEXT: slli a3, a1, 33
; RV64-NEXT: vmerge.vim v9, v9, 1, v0
-; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; RV64-NEXT: vslideup.vx v10, v9, a3
; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; RV64-NEXT: vslideup.vx v10, v9, a2
; RV64-NEXT: vmsne.vi v0, v10, 0
-; RV64-NEXT: srli a1, a4, 32
+; RV64-NEXT: srli a1, a3, 32
; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; RV64-NEXT: vle32.v v10, (a0), v0.t
; RV64-NEXT: li a1, 32
@@ -638,9 +635,9 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract(<vscale x 2 x i1> %
; RV64-NEXT: vnsrl.wx v13, v10, a1
; RV64-NEXT: vmv.x.s a1, v10
; RV64-NEXT: vnsrl.wi v12, v10, 0
-; RV64-NEXT: srli a4, a4, 33
+; RV64-NEXT: srli a3, a3, 33
; RV64-NEXT: vmv1r.v v0, v8
-; RV64-NEXT: vsetvli zero, a4, e32, m1, ta, ma
+; RV64-NEXT: vsetvli zero, a3, e32, m1, ta, ma
; RV64-NEXT: vsseg2e32.v v12, (a0), v0.t
; RV64-NEXT: mv a0, a1
; RV64-NEXT: ret
@@ -807,10 +804,7 @@ define void @not_balanced_store_tree(<vscale x 1 x i32> %v0, <vscale x 2 x i32>
; RV32-NEXT: srli a3, a3, 3
; RV32-NEXT: vsetvli a4, zero, e32, m1, ta, ma
; RV32-NEXT: vslidedown.vx v8, v12, a3
-; RV32-NEXT: add a4, a3, a3
-; RV32-NEXT: vsetvli zero, a4, e32, m1, ta, ma
; RV32-NEXT: vslideup.vx v12, v8, a3
-; RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma
; RV32-NEXT: vwaddu.vv v16, v12, v9
; RV32-NEXT: vwmaccu.vx v16, a2, v9
; RV32-NEXT: vsetvli a3, zero, e32, m2, ta, ma
@@ -831,10 +825,7 @@ define void @not_balanced_store_tree(<vscale x 1 x i32> %v0, <vscale x 2 x i32>
; RV64-NEXT: srli a3, a3, 3
; RV64-NEXT: vsetvli a4, zero, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vx v8, v12, a3
-; RV64-NEXT: add a4, a3, a3
-; RV64-NEXT: vsetvli zero, a4, e32, m1, ta, ma
; RV64-NEXT: vslideup.vx v12, v8, a3
-; RV64-NEXT: vsetvli a3, zero, e32, m1, ta, ma
; RV64-NEXT: vwaddu.vv v16, v12, v9
; RV64-NEXT: vwmaccu.vx v16, a2, v9
; RV64-NEXT: vsetvli a3, zero, e32, m2, ta, ma
@@ -858,29 +849,28 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>} @not_same_mask(<vscale x 2 x i1>
; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
; RV32-NEXT: vmv1r.v v9, v0
; RV32-NEXT: vmv1r.v v0, v8
-; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: vmv.v.i v8, 0
; RV32-NEXT: li a2, -1
; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
; RV32-NEXT: vmv.v.i v10, 0
-; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
+; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
; RV32-NEXT: vmerge.vim v11, v8, 1, v0
; RV32-NEXT: vmv1r.v v0, v9
; RV32-NEXT: vmerge.vim v9, v8, 1, v0
-; RV32-NEXT: srli a3, a3, 2
; RV32-NEXT: vwaddu.vv v12, v9, v11
; RV32-NEXT: vwmaccu.vx v12, a2, v11
+; RV32-NEXT: csrr a2, vlenb
+; RV32-NEXT: srli a2, a2, 2
; RV32-NEXT: vmsne.vi v0, v12, 0
-; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
-; RV32-NEXT: vslidedown.vx v9, v12, a3
+; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; RV32-NEXT: vslidedown.vx v9, v12, a2
; RV32-NEXT: vmerge.vim v10, v10, 1, v0
-; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
+; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
; RV32-NEXT: vmsne.vi v0, v9, 0
-; RV32-NEXT: add a2, a3, a3
+; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: vmerge.vim v8, v8, 1, v0
-; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
-; RV32-NEXT: vslideup.vx v10, v8, a3
+; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; RV32-NEXT: vslideup.vx v10, v8, a2
; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
; RV32-NEXT: vmsne.vi v0, v10, 0
; RV32-NEXT: vle32.v v10, (a0), v0.t
@@ -899,26 +889,24 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>} @not_same_mask(<vscale x 2 x i1>
; RV64-NEXT: li a2, -1
; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
; RV64-NEXT: vmv.v.i v10, 0
-; RV64-NEXT: csrr a3, vlenb
-; RV64-NEXT: slli a1, a1, 33
-; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
+; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
; RV64-NEXT: vmerge.vim v11, v8, 1, v0
; RV64-NEXT: vmv1r.v v0, v9
; RV64-NEXT: vmerge.vim v9, v8, 1, v0
-; RV64-NEXT: srli a3, a3, 2
; RV64-NEXT: vwaddu.vv v12, v9, v11
; RV64-NEXT: vwmaccu.vx v12, a2, v11
+; RV64-NEXT: csrr a2, vlenb
+; RV64-NEXT: srli a2, a2, 2
; RV64-NEXT: vmsne.vi v0, v12, 0
-; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
-; RV64-NEXT: vslidedown.vx v9, v12, a3
+; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; RV64-NEXT: vslidedown.vx v9, v12, a2
; RV64-NEXT: vmerge.vim v10, v10, 1, v0
-; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
+; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
; RV64-NEXT: vmsne.vi v0, v9, 0
-; RV64-NEXT: add a2, a3, a3
+; RV64-NEXT: slli a1, a1, 33
; RV64-NEXT: vmerge.vim v8, v8, 1, v0
-; RV64-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
-; RV64-NEXT: vslideup.vx v10, v8, a3
-; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; RV64-NEXT: vslideup.vx v10, v8, a2
; RV64-NEXT: vmsne.vi v0, v10, 0
; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma