diff options
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 30 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll | 19 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll | 283 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll | 9 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll | 194 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll | 18 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll | 9 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll | 817 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll | 100 |
14 files changed, 705 insertions, 807 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 7cfada6..046bc11 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -15240,12 +15240,42 @@ static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG) { N0.getOperand(0)); } +// Try to turn (add (srl x, n), (srl x, n)) into (srl x, n-1). +// +// This combine could perhaps be moved to DAGCombiner. For RISCV this kind of +// pattern seem to appear in situations when x is READ_VLENB, which matches with +// the condition that the lsb of x need to be zero. +static SDValue combineAddSrl(SDNode *N, SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + SDLoc DL(N); + + // Match (add (srl x, n), (srl x, n)). + if (N0 != N1 || N0.getOpcode() != ISD::SRL) + return SDValue(); + + // Need a srl that has constant shift amount of at least 1. + std::optional<uint64_t> ShAmt = DAG.getValidShiftAmount(N0); + if (!ShAmt || *ShAmt == 0) + return SDValue(); + + // Last bit shifted out by srl should be known zero. + if (!DAG.computeKnownBits(N0.getOperand(0)).Zero[*ShAmt - 1]) + return SDValue(); + + SDValue NewAmt = DAG.getShiftAmountConstant(*ShAmt - 1, VT, DL); + return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), NewAmt); +} + static SDValue performADDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget) { SelectionDAG &DAG = DCI.DAG; if (SDValue V = combineAddOfBooleanXor(N, DAG)) return V; + if (SDValue V = combineAddSrl(N, DAG)) + return V; if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget)) return V; if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) { diff --git a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll index 83637e4..0d288c0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll @@ -490,8 +490,6 @@ define <vscale x 6 x half> @extract_nxv6f16_nxv12f16_6(<vscale x 12 x half> %in) ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vslidedown.vx v13, v10, a0 ; CHECK-NEXT: vslidedown.vx v12, v9, a0 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v12, v10, a0 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret @@ -545,8 +543,6 @@ define <vscale x 6 x bfloat> @extract_nxv6bf16_nxv12bf16_6(<vscale x 12 x bfloat ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vslidedown.vx v13, v10, a0 ; CHECK-NEXT: vslidedown.vx v12, v9, a0 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v12, v10, a0 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll index ca9cec9..d643e8e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll @@ -246,8 +246,7 @@ define <vscale x 16 x i32> @insert_nxv16i32_nxv1i32_1(<vscale x 16 x i32> %vec, ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v16, a0 ; CHECK-NEXT: ret %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec, i64 1) @@ -282,8 +281,8 @@ define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_1(<vscale x 16 x i8> %vec, <vsc ; CHECK-LABEL: insert_nxv16i8_nxv1i8_1: ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a1, a0, 2 ; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v10, a0 ; CHECK-NEXT: ret @@ -363,8 +362,7 @@ define <vscale x 32 x half> @insert_nxv32f16_nxv2f16_2(<vscale x 32 x half> %vec ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v16, a0 ; CHECK-NEXT: ret %v = call <vscale x 32 x half> @llvm.vector.insert.nxv2f16.nxv32f16(<vscale x 32 x half> %vec, <vscale x 2 x half> %subvec, i64 2) @@ -376,8 +374,7 @@ define <vscale x 32 x half> @insert_nxv32f16_nxv2f16_26(<vscale x 32 x half> %ve ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v14, v16, a0 ; CHECK-NEXT: ret %v = call <vscale x 32 x half> @llvm.vector.insert.nxv2f16.nxv32f16(<vscale x 32 x half> %vec, <vscale x 2 x half> %subvec, i64 26) @@ -422,8 +419,8 @@ define <vscale x 32 x i1> @insert_nxv32i1_nxv8i1_8(<vscale x 32 x i1> %v, <vscal ; CHECK-LABEL: insert_nxv32i1_nxv8i1_8: ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a1, a0, 2 ; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vx v0, v8, a0 ; CHECK-NEXT: ret @@ -570,8 +567,7 @@ define <vscale x 32 x bfloat> @insert_nxv32bf16_nxv2bf16_2(<vscale x 32 x bfloat ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v16, a0 ; CHECK-NEXT: ret %v = call <vscale x 32 x bfloat> @llvm.vector.insert.nxv2bf16.nxv32bf16(<vscale x 32 x bfloat> %vec, <vscale x 2 x bfloat> %subvec, i64 2) @@ -583,8 +579,7 @@ define <vscale x 32 x bfloat> @insert_nxv32bf16_nxv2bf16_26(<vscale x 32 x bfloa ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v14, v16, a0 ; CHECK-NEXT: ret %v = call <vscale x 32 x bfloat> @llvm.vector.insert.nxv2bf16.nxv32bf16(<vscale x 32 x bfloat> %vec, <vscale x 2 x bfloat> %subvec, i64 26) diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll index 28b27bb..9972df9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll @@ -1371,6 +1371,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs ; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: mv a3, a1 ; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a3, a3, a1 +; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: add a1, a1, a3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 @@ -1378,9 +1380,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv8r.v v0, v16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: mv a3, a1 ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: add a3, a3, a1 +; CHECK-NEXT: mv a3, a1 ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, a1, a3 ; CHECK-NEXT: add a1, sp, a1 @@ -1406,6 +1407,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs ; CHECK-NEXT: slli t0, t0, 1 ; CHECK-NEXT: mv t1, t0 ; CHECK-NEXT: slli t0, t0, 2 +; CHECK-NEXT: add t1, t1, t0 +; CHECK-NEXT: slli t0, t0, 1 ; CHECK-NEXT: add t0, t0, t1 ; CHECK-NEXT: add t0, sp, t0 ; CHECK-NEXT: addi t0, t0, 16 @@ -1413,9 +1416,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs ; CHECK-NEXT: vslidedown.vx v16, v8, a1 ; CHECK-NEXT: vl8re16.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: mv t0, a0 ; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: add t0, t0, a0 +; CHECK-NEXT: mv t0, a0 ; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: add a0, a0, t0 ; CHECK-NEXT: add a0, sp, a0 @@ -1445,10 +1447,6 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs ; CHECK-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v5, v8, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload ; CHECK-NEXT: vsetvli zero, a6, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 ; CHECK-NEXT: csrr a0, vlenb @@ -1457,85 +1455,95 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v6, v24, v16, v0.t -; CHECK-NEXT: add a0, a3, a3 +; CHECK-NEXT: vmfeq.vv v7, v24, v16, v0.t ; CHECK-NEXT: bltu a2, a5, .LBB85_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a2, a5 ; CHECK-NEXT: .LBB85_4: -; CHECK-NEXT: sub a5, a2, a4 -; CHECK-NEXT: csrr a6, vlenb -; CHECK-NEXT: slli a6, a6, 1 -; CHECK-NEXT: mv a7, a6 -; CHECK-NEXT: slli a6, a6, 2 -; CHECK-NEXT: add a6, a6, a7 -; CHECK-NEXT: add a6, sp, a6 -; CHECK-NEXT: addi a6, a6, 16 -; CHECK-NEXT: vl1r.v v7, (a6) # vscale x 8-byte Folded Reload -; CHECK-NEXT: vsetvli a6, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v7, a3 -; CHECK-NEXT: sltu a6, a2, a5 -; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a5, a6, a5 -; CHECK-NEXT: csrr a6, vlenb -; CHECK-NEXT: mv a7, a6 -; CHECK-NEXT: slli a6, a6, 1 -; CHECK-NEXT: add a7, a7, a6 -; CHECK-NEXT: slli a6, a6, 3 -; CHECK-NEXT: add a6, a6, a7 -; CHECK-NEXT: add a6, sp, a6 -; CHECK-NEXT: addi a6, a6, 16 -; CHECK-NEXT: vl8r.v v24, (a6) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vsetvli zero, a5, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28 +; CHECK-NEXT: sub a0, a2, a4 ; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: mv a6, a5 ; CHECK-NEXT: slli a5, a5, 1 -; CHECK-NEXT: add a6, a6, a5 +; CHECK-NEXT: mv a6, a5 ; CHECK-NEXT: slli a5, a5, 2 +; CHECK-NEXT: add a6, a6, a5 +; CHECK-NEXT: slli a5, a5, 1 +; CHECK-NEXT: add a5, a5, a6 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vl1r.v v8, (a5) # vscale x 8-byte Folded Reload +; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v8, a3 +; CHECK-NEXT: sltu a5, a2, a0 +; CHECK-NEXT: addi a5, a5, -1 +; CHECK-NEXT: and a0, a5, a0 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: slli a5, a5, 1 +; CHECK-NEXT: mv a6, a5 +; CHECK-NEXT: slli a5, a5, 3 ; CHECK-NEXT: add a5, a5, a6 ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vl8r.v v24, (a5) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: mv a5, a0 +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, a0, a5 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v4, v16, v24, v0.t -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v6, v5, a3 +; CHECK-NEXT: vmfeq.vv v10, v16, v24, v0.t +; CHECK-NEXT: vmv1r.v v9, v7 +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v9, v5, a3 ; CHECK-NEXT: bltu a2, a4, .LBB85_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: mv a2, a4 ; CHECK-NEXT: .LBB85_6: -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: mv a5, a4 -; CHECK-NEXT: slli a4, a4, 1 -; CHECK-NEXT: add a5, a5, a4 -; CHECK-NEXT: slli a4, a4, 3 -; CHECK-NEXT: add a4, a4, a5 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vl8r.v v24, (a4) # vscale x 64-byte Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: mv a4, a0 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, a0, a4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: mv a4, a2 -; CHECK-NEXT: slli a2, a2, 1 -; CHECK-NEXT: add a4, a4, a2 -; CHECK-NEXT: slli a2, a2, 2 -; CHECK-NEXT: add a2, a2, a4 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a2, a2, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v8, v16, v24, v0.t -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v8, v4, a3 -; CHECK-NEXT: add a0, a1, a1 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v6, a1 +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v8, v10, a3 +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: mv a1, a0 @@ -3546,8 +3554,7 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal ; ZVFH-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload ; ZVFH-NEXT: vsetvli zero, a2, e16, m8, ta, ma ; ZVFH-NEXT: vmfeq.vv v16, v24, v8, v0.t -; ZVFH-NEXT: add a0, a1, a1 -; ZVFH-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; ZVFH-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; ZVFH-NEXT: vslideup.vx v16, v6, a1 ; ZVFH-NEXT: vmv.v.v v0, v16 ; ZVFH-NEXT: csrr a0, vlenb @@ -3576,6 +3583,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal ; ZVFHMIN-NEXT: slli a1, a1, 1 ; ZVFHMIN-NEXT: mv a3, a1 ; ZVFHMIN-NEXT: slli a1, a1, 2 +; ZVFHMIN-NEXT: add a3, a3, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 ; ZVFHMIN-NEXT: add a1, a1, a3 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 @@ -3583,9 +3592,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal ; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; ZVFHMIN-NEXT: vmv8r.v v0, v16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: mv a3, a1 ; ZVFHMIN-NEXT: slli a1, a1, 1 -; ZVFHMIN-NEXT: add a3, a3, a1 +; ZVFHMIN-NEXT: mv a3, a1 ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: add a1, a1, a3 ; ZVFHMIN-NEXT: add a1, sp, a1 @@ -3611,6 +3619,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal ; ZVFHMIN-NEXT: slli t0, t0, 1 ; ZVFHMIN-NEXT: mv t1, t0 ; ZVFHMIN-NEXT: slli t0, t0, 2 +; ZVFHMIN-NEXT: add t1, t1, t0 +; ZVFHMIN-NEXT: slli t0, t0, 1 ; ZVFHMIN-NEXT: add t0, t0, t1 ; ZVFHMIN-NEXT: add t0, sp, t0 ; ZVFHMIN-NEXT: addi t0, t0, 16 @@ -3618,9 +3628,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal ; ZVFHMIN-NEXT: vslidedown.vx v16, v8, a1 ; ZVFHMIN-NEXT: vl8re16.v v8, (a0) ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: mv t0, a0 ; ZVFHMIN-NEXT: slli a0, a0, 1 -; ZVFHMIN-NEXT: add t0, t0, a0 +; ZVFHMIN-NEXT: mv t0, a0 ; ZVFHMIN-NEXT: slli a0, a0, 2 ; ZVFHMIN-NEXT: add a0, a0, t0 ; ZVFHMIN-NEXT: add a0, sp, a0 @@ -3650,10 +3659,6 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal ; ZVFHMIN-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v5, v8, v16, v0.t -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a6, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 ; ZVFHMIN-NEXT: csrr a0, vlenb @@ -3662,85 +3667,95 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v6, v24, v16, v0.t -; ZVFHMIN-NEXT: add a0, a3, a3 +; ZVFHMIN-NEXT: vmfeq.vv v7, v24, v16, v0.t ; ZVFHMIN-NEXT: bltu a2, a5, .LBB171_4 ; ZVFHMIN-NEXT: # %bb.3: ; ZVFHMIN-NEXT: mv a2, a5 ; ZVFHMIN-NEXT: .LBB171_4: -; ZVFHMIN-NEXT: sub a5, a2, a4 -; ZVFHMIN-NEXT: csrr a6, vlenb -; ZVFHMIN-NEXT: slli a6, a6, 1 -; ZVFHMIN-NEXT: mv a7, a6 -; ZVFHMIN-NEXT: slli a6, a6, 2 -; ZVFHMIN-NEXT: add a6, a6, a7 -; ZVFHMIN-NEXT: add a6, sp, a6 -; ZVFHMIN-NEXT: addi a6, a6, 16 -; ZVFHMIN-NEXT: vl1r.v v7, (a6) # vscale x 8-byte Folded Reload -; ZVFHMIN-NEXT: vsetvli a6, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v0, v7, a3 -; ZVFHMIN-NEXT: sltu a6, a2, a5 -; ZVFHMIN-NEXT: addi a6, a6, -1 -; ZVFHMIN-NEXT: and a5, a6, a5 -; ZVFHMIN-NEXT: csrr a6, vlenb -; ZVFHMIN-NEXT: mv a7, a6 -; ZVFHMIN-NEXT: slli a6, a6, 1 -; ZVFHMIN-NEXT: add a7, a7, a6 -; ZVFHMIN-NEXT: slli a6, a6, 3 -; ZVFHMIN-NEXT: add a6, a6, a7 -; ZVFHMIN-NEXT: add a6, sp, a6 -; ZVFHMIN-NEXT: addi a6, a6, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a6) # vscale x 64-byte Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a5, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28 +; ZVFHMIN-NEXT: sub a0, a2, a4 ; ZVFHMIN-NEXT: csrr a5, vlenb -; ZVFHMIN-NEXT: mv a6, a5 ; ZVFHMIN-NEXT: slli a5, a5, 1 -; ZVFHMIN-NEXT: add a6, a6, a5 +; ZVFHMIN-NEXT: mv a6, a5 ; ZVFHMIN-NEXT: slli a5, a5, 2 +; ZVFHMIN-NEXT: add a6, a6, a5 +; ZVFHMIN-NEXT: slli a5, a5, 1 +; ZVFHMIN-NEXT: add a5, a5, a6 +; ZVFHMIN-NEXT: add a5, sp, a5 +; ZVFHMIN-NEXT: addi a5, a5, 16 +; ZVFHMIN-NEXT: vl1r.v v8, (a5) # vscale x 8-byte Folded Reload +; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v8, a3 +; ZVFHMIN-NEXT: sltu a5, a2, a0 +; ZVFHMIN-NEXT: addi a5, a5, -1 +; ZVFHMIN-NEXT: and a0, a5, a0 +; ZVFHMIN-NEXT: csrr a5, vlenb +; ZVFHMIN-NEXT: slli a5, a5, 1 +; ZVFHMIN-NEXT: mv a6, a5 +; ZVFHMIN-NEXT: slli a5, a5, 3 ; ZVFHMIN-NEXT: add a5, a5, a6 ; ZVFHMIN-NEXT: add a5, sp, a5 ; ZVFHMIN-NEXT: addi a5, a5, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload +; ZVFHMIN-NEXT: vl8r.v v24, (a5) # vscale x 64-byte Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: mv a5, a0 +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add a0, a0, a5 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v4, v16, v24, v0.t -; ZVFHMIN-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslideup.vx v6, v5, a3 +; ZVFHMIN-NEXT: vmfeq.vv v10, v16, v24, v0.t +; ZVFHMIN-NEXT: vmv1r.v v9, v7 +; ZVFHMIN-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslideup.vx v9, v5, a3 ; ZVFHMIN-NEXT: bltu a2, a4, .LBB171_6 ; ZVFHMIN-NEXT: # %bb.5: ; ZVFHMIN-NEXT: mv a2, a4 ; ZVFHMIN-NEXT: .LBB171_6: -; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: mv a5, a4 -; ZVFHMIN-NEXT: slli a4, a4, 1 -; ZVFHMIN-NEXT: add a5, a5, a4 -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: add a4, a4, a5 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a4) # vscale x 64-byte Folded Reload +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: mv a4, a0 +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, a0, a4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: mv a4, a2 -; ZVFHMIN-NEXT: slli a2, a2, 1 -; ZVFHMIN-NEXT: add a4, a4, a2 -; ZVFHMIN-NEXT: slli a2, a2, 2 -; ZVFHMIN-NEXT: add a2, a2, a4 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: mv a2, a0 +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add a0, a0, a2 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: mv a2, a0 +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add a2, a2, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a2 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v24, v0.t -; ZVFHMIN-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslideup.vx v8, v4, a3 -; ZVFHMIN-NEXT: add a0, a1, a1 -; ZVFHMIN-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; ZVFHMIN-NEXT: vslideup.vx v8, v6, a1 +; ZVFHMIN-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslideup.vx v8, v10, a3 +; ZVFHMIN-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; ZVFHMIN-NEXT: vslideup.vx v8, v9, a1 ; ZVFHMIN-NEXT: vmv.v.v v0, v8 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: mv a1, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll index ae868fe..ff923ef 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll @@ -4280,8 +4280,7 @@ define <vscale x 16 x i1> @fcmp_oeq_vf_nx16f64(<vscale x 16 x double> %va) { ; RV32-NEXT: vmfeq.vf v24, v16, fa5 ; RV32-NEXT: vmfeq.vf v0, v8, fa5 ; RV32-NEXT: srli a0, a0, 3 -; RV32-NEXT: add a1, a0, a0 -; RV32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; RV32-NEXT: vslideup.vx v0, v24, a0 ; RV32-NEXT: ret ; @@ -4293,8 +4292,7 @@ define <vscale x 16 x i1> @fcmp_oeq_vf_nx16f64(<vscale x 16 x double> %va) { ; RV64-NEXT: vmfeq.vf v24, v16, fa5 ; RV64-NEXT: vmfeq.vf v0, v8, fa5 ; RV64-NEXT: srli a0, a0, 3 -; RV64-NEXT: add a1, a0, a0 -; RV64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; RV64-NEXT: vslideup.vx v0, v24, a0 ; RV64-NEXT: ret ; @@ -4306,8 +4304,7 @@ define <vscale x 16 x i1> @fcmp_oeq_vf_nx16f64(<vscale x 16 x double> %va) { ; ZVFHMIN32-NEXT: vmfeq.vf v24, v16, fa5 ; ZVFHMIN32-NEXT: vmfeq.vf v0, v8, fa5 ; ZVFHMIN32-NEXT: srli a0, a0, 3 -; ZVFHMIN32-NEXT: add a1, a0, a0 -; ZVFHMIN32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; ZVFHMIN32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; ZVFHMIN32-NEXT: vslideup.vx v0, v24, a0 ; ZVFHMIN32-NEXT: ret ; @@ -4319,8 +4316,7 @@ define <vscale x 16 x i1> @fcmp_oeq_vf_nx16f64(<vscale x 16 x double> %va) { ; ZVFHMIN64-NEXT: vmfeq.vf v24, v16, fa5 ; ZVFHMIN64-NEXT: vmfeq.vf v0, v8, fa5 ; ZVFHMIN64-NEXT: srli a0, a0, 3 -; ZVFHMIN64-NEXT: add a1, a0, a0 -; ZVFHMIN64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; ZVFHMIN64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; ZVFHMIN64-NEXT: vslideup.vx v0, v24, a0 ; ZVFHMIN64-NEXT: ret %vc = fcmp oeq <vscale x 16 x double> %va, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll index ef560a7..13c63d9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll @@ -2246,8 +2246,7 @@ define <vscale x 32 x i1> @icmp_eq_vv_nxv32i32(<vscale x 32 x i32> %va, <vscale ; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vmseq.vv v16, v24, v8, v0.t -; CHECK-NEXT: add a0, a1, a1 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vx v16, v6, a1 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: csrr a0, vlenb @@ -2283,8 +2282,7 @@ define <vscale x 32 x i1> @icmp_eq_vx_nxv32i32(<vscale x 32 x i32> %va, i32 %b, ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t -; CHECK-NEXT: add a0, a2, a2 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vx v16, v25, a2 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -2316,8 +2314,7 @@ define <vscale x 32 x i1> @icmp_eq_vx_swap_nxv32i32(<vscale x 32 x i32> %va, i32 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t -; CHECK-NEXT: add a0, a2, a2 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vx v16, v25, a2 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll index bd3c29b..a85b471 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll @@ -3001,9 +3001,8 @@ define <vscale x 16 x i1> @icmp_eq_vi_nx16i64(<vscale x 16 x i64> %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vmseq.vi v24, v16, 0 ; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vmseq.vi v0, v8, 0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslideup.vx v0, v24, a0 ; CHECK-NEXT: ret %vc = icmp eq <vscale x 16 x i64> %va, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll b/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll index c9f9a79..790cd56 100644 --- a/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll +++ b/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll @@ -48,10 +48,10 @@ define internal void @SubRegLivenessUndefInPhi(i64 %cond) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a1, a0, 2 ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: vadd.vi v10, v9, 1 ; CHECK-NEXT: vadd.vi v11, v9, 3 -; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: vslideup.vx v12, v10, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll index ca7f256..f7ed130 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll @@ -191,8 +191,7 @@ define {<2 x i32>, <2 x i32>, <2 x i32>} @vector_deinterleave3_v2i32_v6i32(<6 x ; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v8, 4 ; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v12, a0 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -222,8 +221,7 @@ define {<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>} @vector_deinterleave4_v2i32_ ; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v9, v8, 2 ; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v12, v10, a0 ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: addi a0, sp, 16 @@ -254,15 +252,13 @@ define {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} @vector_deinterle ; CHECK-NEXT: vslidedown.vi v14, v8, 2 ; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v8, 8 -; CHECK-NEXT: srli a1, a0, 3 -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: add a2, a1, a1 -; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v13, v12, a1 -; CHECK-NEXT: vslideup.vx v8, v14, a1 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v13, a0 +; CHECK-NEXT: srli a1, a0, 2 +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v13, v12, a0 +; CHECK-NEXT: vslideup.vx v8, v14, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v8, v13, a1 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vmv1r.v v9, v10 ; CHECK-NEXT: vs2r.v v8, (a0) @@ -292,16 +288,14 @@ define {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} @vecto ; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v8, 10 ; CHECK-NEXT: vslidedown.vi v12, v8, 8 -; CHECK-NEXT: srli a1, a0, 3 -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: add a2, a1, a1 -; CHECK-NEXT: add a3, a0, a0 -; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v15, v14, a1 -; CHECK-NEXT: vslideup.vx v8, v16, a1 -; CHECK-NEXT: vslideup.vx v12, v10, a1 -; CHECK-NEXT: vsetvli zero, a3, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v15, a0 +; CHECK-NEXT: srli a1, a0, 2 +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v15, v14, a0 +; CHECK-NEXT: vslideup.vx v8, v16, a0 +; CHECK-NEXT: vslideup.vx v12, v10, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v8, v15, a1 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vmv1r.v v9, v12 ; CHECK-NEXT: vs2r.v v8, (a0) @@ -330,24 +324,22 @@ define {<2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>} @v ; CHECK-NEXT: vslidedown.vi v12, v8, 2 ; CHECK-NEXT: vslidedown.vi v13, v8, 4 ; CHECK-NEXT: vslidedown.vi v14, v8, 6 -; CHECK-NEXT: srli a1, a0, 3 -; CHECK-NEXT: srli a2, a0, 2 +; CHECK-NEXT: srli a1, a0, 2 +; CHECK-NEXT: srli a2, a0, 3 ; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: add a3, a1, a1 -; CHECK-NEXT: add a4, a2, a1 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vx v10, v9, a2 +; CHECK-NEXT: add a3, a1, a2 +; CHECK-NEXT: vslideup.vx v8, v12, a2 ; CHECK-NEXT: vsetvli zero, a3, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vx v10, v9, a1 -; CHECK-NEXT: vslideup.vx v8, v12, a1 -; CHECK-NEXT: slli a3, a1, 1 -; CHECK-NEXT: vsetvli zero, a4, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vx v10, v11, a2 -; CHECK-NEXT: vslideup.vx v8, v13, a2 -; CHECK-NEXT: add a2, a0, a0 -; CHECK-NEXT: add a3, a3, a1 -; CHECK-NEXT: add a1, a3, a1 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v8, v14, a3 -; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma +; CHECK-NEXT: vslideup.vx v10, v11, a1 +; CHECK-NEXT: vslideup.vx v8, v13, a1 +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: add a2, a1, a2 +; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v8, v14, a1 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v10, a0 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs1r.v v8, (a0) @@ -376,25 +368,23 @@ define {<2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 ; CHECK-NEXT: vslidedown.vi v13, v8, 2 ; CHECK-NEXT: vslidedown.vi v14, v8, 4 ; CHECK-NEXT: vslidedown.vi v15, v8, 6 -; CHECK-NEXT: srli a1, a0, 3 -; CHECK-NEXT: srli a2, a0, 2 +; CHECK-NEXT: srli a1, a0, 2 +; CHECK-NEXT: srli a2, a0, 3 ; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: add a3, a1, a1 -; CHECK-NEXT: add a4, a2, a1 -; CHECK-NEXT: slli a5, a1, 1 -; CHECK-NEXT: add a6, a0, a0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vx v10, v9, a2 +; CHECK-NEXT: add a3, a1, a2 +; CHECK-NEXT: slli a4, a2, 1 +; CHECK-NEXT: vslideup.vx v8, v13, a2 ; CHECK-NEXT: vsetvli zero, a3, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vx v10, v9, a1 -; CHECK-NEXT: add a5, a5, a1 -; CHECK-NEXT: vslideup.vx v8, v13, a1 -; CHECK-NEXT: vsetvli zero, a4, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vx v10, v11, a2 -; CHECK-NEXT: add a1, a5, a1 -; CHECK-NEXT: vslideup.vx v8, v14, a2 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v10, v12, a5 -; CHECK-NEXT: vslideup.vx v8, v15, a5 -; CHECK-NEXT: vsetvli zero, a6, e8, m1, ta, ma +; CHECK-NEXT: vslideup.vx v10, v11, a1 +; CHECK-NEXT: add a4, a4, a2 +; CHECK-NEXT: vslideup.vx v8, v14, a1 +; CHECK-NEXT: add a2, a4, a2 +; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v10, v12, a4 +; CHECK-NEXT: vslideup.vx v8, v15, a4 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v10, a0 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs1r.v v8, (a0) @@ -555,8 +545,7 @@ define {<2 x float>, <2 x float>, <2 x float>} @vector_deinterleave3_v6f32_v2f32 ; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v8, 4 ; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v12, a0 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -590,8 +579,7 @@ define {<2 x float>, <2 x float>, <2 x float>, <2 x float>} @vector_deinterleave ; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v9, v8, 2 ; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v12, v10, a0 ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: addi a0, sp, 16 @@ -626,15 +614,13 @@ define {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @vector_dein ; CHECK-NEXT: vslidedown.vi v14, v8, 2 ; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v8, 8 -; CHECK-NEXT: srli a1, a0, 3 -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: add a2, a1, a1 -; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v13, v12, a1 -; CHECK-NEXT: vslideup.vx v8, v14, a1 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v13, a0 +; CHECK-NEXT: srli a1, a0, 2 +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v13, v12, a0 +; CHECK-NEXT: vslideup.vx v8, v14, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v8, v13, a1 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vmv1r.v v9, v10 ; CHECK-NEXT: vs2r.v v8, (a0) @@ -668,16 +654,14 @@ define {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} ; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v8, 10 ; CHECK-NEXT: vslidedown.vi v12, v8, 8 -; CHECK-NEXT: srli a1, a0, 3 -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: add a2, a1, a1 -; CHECK-NEXT: add a3, a0, a0 -; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v15, v14, a1 -; CHECK-NEXT: vslideup.vx v8, v16, a1 -; CHECK-NEXT: vslideup.vx v12, v10, a1 -; CHECK-NEXT: vsetvli zero, a3, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v15, a0 +; CHECK-NEXT: srli a1, a0, 2 +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v15, v14, a0 +; CHECK-NEXT: vslideup.vx v8, v16, a0 +; CHECK-NEXT: vslideup.vx v12, v10, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v8, v15, a1 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vmv1r.v v9, v12 ; CHECK-NEXT: vs2r.v v8, (a0) @@ -711,21 +695,18 @@ define {<1 x half>, <1 x half>, <1 x half>, <1 x half>, <1 x half>, <1 x half>, ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vslidedown.vi v13, v8, 5 ; CHECK-NEXT: vslidedown.vi v14, v8, 6 -; CHECK-NEXT: srli a1, a0, 3 -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: add a2, a1, a1 -; CHECK-NEXT: add a3, a0, a0 -; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v11, v9, a1 -; CHECK-NEXT: vslideup.vx v10, v12, a1 -; CHECK-NEXT: vsetvli zero, a3, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vx v10, v11, a0 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: srli a1, a0, 2 +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v11, v9, a0 +; CHECK-NEXT: vslideup.vx v10, v12, a0 +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v10, v11, a1 ; CHECK-NEXT: vslidedown.vi v11, v8, 4 -; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v11, v13, a1 -; CHECK-NEXT: vsetvli zero, a3, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vx v11, v14, a0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v11, v13, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v11, v14, a1 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs2r.v v10, (a0) ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma @@ -755,25 +736,22 @@ define {<1 x half>, <1 x half>, <1 x half>, <1 x half>, <1 x half>, <1 x half>, ; CHECK-NEXT: vslidedown.vi v10, v8, 7 ; CHECK-NEXT: vslidedown.vi v11, v8, 6 ; CHECK-NEXT: vslidedown.vi v12, v8, 5 -; CHECK-NEXT: srli a1, a0, 3 +; CHECK-NEXT: srli a1, a0, 2 +; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: vslidedown.vi v9, v8, 4 -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: add a2, a1, a1 -; CHECK-NEXT: add a3, a0, a0 -; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v11, v10, a1 -; CHECK-NEXT: vslideup.vx v9, v12, a1 -; CHECK-NEXT: vsetvli zero, a3, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vx v9, v11, a0 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v11, v10, a0 +; CHECK-NEXT: vslideup.vx v9, v12, a0 +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v9, v11, a1 ; CHECK-NEXT: vslidedown.vi v10, v8, 3 ; CHECK-NEXT: vslidedown.vi v11, v8, 2 ; CHECK-NEXT: vslidedown.vi v12, v8, 1 -; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v11, v10, a1 -; CHECK-NEXT: vslideup.vx v8, v12, a1 -; CHECK-NEXT: vsetvli zero, a3, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v11, a0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v11, v10, a0 +; CHECK-NEXT: vslideup.vx v8, v12, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v8, v11, a1 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs2r.v v8, (a0) ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll index 6a08f5a..45ffd84 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll @@ -2712,16 +2712,10 @@ define {<vscale x 2 x half>, <vscale x 2 x half>, <vscale x 2 x half>, <vscale x ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vslidedown.vx v11, v9, a0 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v9, v11, a0 -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; CHECK-NEXT: vslidedown.vx v11, v8, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v11, a0 -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; CHECK-NEXT: vslidedown.vx v11, v10, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v10, v11, a0 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs4r.v v8, (a0) @@ -2808,16 +2802,10 @@ define {<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vs ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vslidedown.vx v11, v9, a0 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v9, v11, a0 -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; CHECK-NEXT: vslidedown.vx v11, v8, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v11, a0 -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; CHECK-NEXT: vslidedown.vx v11, v10, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v10, v11, a0 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs4r.v v8, (a0) @@ -2904,16 +2892,10 @@ define {<vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscal ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vslidedown.vx v11, v9, a0 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v9, v11, a0 -; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma ; CHECK-NEXT: vslidedown.vx v11, v8, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v11, a0 -; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma ; CHECK-NEXT: vslidedown.vx v11, v10, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v10, v11, a0 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs4r.v v8, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll index 3751967..a5811e6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll @@ -14,18 +14,17 @@ define void @vector_interleave_store_nxv32i1_nxv16i1(<vscale x 16 x i1> %a, <vsc ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: li a1, -1 -; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: vmerge.vim v12, v10, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vmerge.vim v14, v10, 1, v0 -; CHECK-NEXT: srli a2, a2, 2 ; CHECK-NEXT: vwaddu.vv v8, v14, v12 ; CHECK-NEXT: vwmaccu.vx v8, a1, v12 +; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: vmsne.vi v12, v10, 0 ; CHECK-NEXT: vmsne.vi v10, v8, 0 -; CHECK-NEXT: add a1, a2, a2 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v10, v12, a2 +; CHECK-NEXT: srli a1, a1, 2 +; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v10, v12, a1 ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: vsm.v v10, (a0) ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll index e297e88..01cc5c5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll @@ -17,18 +17,17 @@ define <vscale x 32 x i1> @vector_interleave_nxv32i1_nxv16i1(<vscale x 16 x i1> ; V-NEXT: vmv1r.v v0, v8 ; V-NEXT: vmv.v.i v10, 0 ; V-NEXT: li a0, -1 -; V-NEXT: csrr a1, vlenb ; V-NEXT: vmerge.vim v12, v10, 1, v0 ; V-NEXT: vmv1r.v v0, v9 ; V-NEXT: vmerge.vim v14, v10, 1, v0 -; V-NEXT: srli a1, a1, 2 ; V-NEXT: vwaddu.vv v8, v14, v12 ; V-NEXT: vwmaccu.vx v8, a0, v12 +; V-NEXT: csrr a0, vlenb ; V-NEXT: vmsne.vi v12, v10, 0 ; V-NEXT: vmsne.vi v0, v8, 0 -; V-NEXT: add a0, a1, a1 -; V-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; V-NEXT: vslideup.vx v0, v12, a1 +; V-NEXT: srli a0, a0, 2 +; V-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; V-NEXT: vslideup.vx v0, v12, a0 ; V-NEXT: ret ; ; ZVBB-LABEL: vector_interleave_nxv32i1_nxv16i1: @@ -38,17 +37,16 @@ define <vscale x 32 x i1> @vector_interleave_nxv32i1_nxv16i1(<vscale x 16 x i1> ; ZVBB-NEXT: vmv1r.v v0, v8 ; ZVBB-NEXT: vmv.v.i v10, 0 ; ZVBB-NEXT: li a0, 1 -; ZVBB-NEXT: csrr a1, vlenb ; ZVBB-NEXT: vmerge.vim v10, v10, 1, v0 -; ZVBB-NEXT: srli a1, a1, 2 ; ZVBB-NEXT: vwsll.vi v12, v10, 8 ; ZVBB-NEXT: vmv1r.v v0, v9 ; ZVBB-NEXT: vwaddu.wx v12, v12, a0, v0.t +; ZVBB-NEXT: csrr a0, vlenb ; ZVBB-NEXT: vmsne.vi v8, v14, 0 ; ZVBB-NEXT: vmsne.vi v0, v12, 0 -; ZVBB-NEXT: add a0, a1, a1 -; ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; ZVBB-NEXT: vslideup.vx v0, v8, a1 +; ZVBB-NEXT: srli a0, a0, 2 +; ZVBB-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; ZVBB-NEXT: vslideup.vx v0, v8, a0 ; ZVBB-NEXT: ret ; ; ZIP-LABEL: vector_interleave_nxv32i1_nxv16i1: @@ -61,13 +59,12 @@ define <vscale x 32 x i1> @vector_interleave_nxv32i1_nxv16i1(<vscale x 16 x i1> ; ZIP-NEXT: vmerge.vim v12, v10, 1, v0 ; ZIP-NEXT: vmv1r.v v0, v9 ; ZIP-NEXT: vmerge.vim v8, v10, 1, v0 -; ZIP-NEXT: srli a0, a0, 2 ; ZIP-NEXT: ri.vzip2b.vv v10, v8, v12 ; ZIP-NEXT: ri.vzip2a.vv v14, v8, v12 ; ZIP-NEXT: vmsne.vi v8, v10, 0 ; ZIP-NEXT: vmsne.vi v0, v14, 0 -; ZIP-NEXT: add a1, a0, a0 -; ZIP-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; ZIP-NEXT: srli a0, a0, 2 +; ZIP-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; ZIP-NEXT: vslideup.vx v0, v8, a0 ; ZIP-NEXT: ret %res = call <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) @@ -508,19 +505,17 @@ define <vscale x 48 x i1> @vector_interleave_nxv48i1_nxv16i1(<vscale x 16 x i1> ; CHECK-NEXT: add a2, a3, a2 ; CHECK-NEXT: vsseg3e8.v v14, (a0) ; CHECK-NEXT: vl2r.v v8, (a2) -; CHECK-NEXT: srli a2, a1, 2 -; CHECK-NEXT: srli a1, a1, 1 +; CHECK-NEXT: srli a2, a1, 1 ; CHECK-NEXT: vl2r.v v10, (a3) ; CHECK-NEXT: vl2r.v v12, (a0) -; CHECK-NEXT: add a0, a2, a2 +; CHECK-NEXT: srli a1, a1, 2 ; CHECK-NEXT: vmsne.vi v14, v8, 0 ; CHECK-NEXT: vmsne.vi v8, v10, 0 ; CHECK-NEXT: vmsne.vi v0, v12, 0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v0, v8, a2 -; CHECK-NEXT: add a0, a1, a1 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vslideup.vx v0, v14, a1 +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v0, v8, a1 +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NEXT: vslideup.vx v0, v14, a2 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 6 ; CHECK-NEXT: mul a0, a0, a1 @@ -551,19 +546,17 @@ define <vscale x 48 x i1> @vector_interleave_nxv48i1_nxv16i1(<vscale x 16 x i1> ; ZVBB-NEXT: add a2, a3, a2 ; ZVBB-NEXT: vsseg3e8.v v14, (a0) ; ZVBB-NEXT: vl2r.v v8, (a2) -; ZVBB-NEXT: srli a2, a1, 2 -; ZVBB-NEXT: srli a1, a1, 1 +; ZVBB-NEXT: srli a2, a1, 1 ; ZVBB-NEXT: vl2r.v v10, (a3) ; ZVBB-NEXT: vl2r.v v12, (a0) -; ZVBB-NEXT: add a0, a2, a2 +; ZVBB-NEXT: srli a1, a1, 2 ; ZVBB-NEXT: vmsne.vi v14, v8, 0 ; ZVBB-NEXT: vmsne.vi v8, v10, 0 ; ZVBB-NEXT: vmsne.vi v0, v12, 0 -; ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; ZVBB-NEXT: vslideup.vx v0, v8, a2 -; ZVBB-NEXT: add a0, a1, a1 -; ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; ZVBB-NEXT: vslideup.vx v0, v14, a1 +; ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; ZVBB-NEXT: vslideup.vx v0, v8, a1 +; ZVBB-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; ZVBB-NEXT: vslideup.vx v0, v14, a2 ; ZVBB-NEXT: csrr a0, vlenb ; ZVBB-NEXT: li a1, 6 ; ZVBB-NEXT: mul a0, a0, a1 @@ -812,22 +805,20 @@ define <vscale x 64 x i1> @vector_interleave_nxv64i1_nxv16i1(<vscale x 16 x i1> ; CHECK-NEXT: add a2, a4, a2 ; CHECK-NEXT: vsseg4e8.v v14, (a0) ; CHECK-NEXT: vl2r.v v8, (a2) -; CHECK-NEXT: srli a2, a1, 2 -; CHECK-NEXT: srli a1, a1, 1 +; CHECK-NEXT: srli a2, a1, 1 +; CHECK-NEXT: srli a1, a1, 2 ; CHECK-NEXT: vl2r.v v10, (a4) -; CHECK-NEXT: add a4, a2, a2 ; CHECK-NEXT: vl2r.v v12, (a3) ; CHECK-NEXT: vl2r.v v14, (a0) ; CHECK-NEXT: vmsne.vi v16, v8, 0 ; CHECK-NEXT: vmsne.vi v8, v10, 0 ; CHECK-NEXT: vmsne.vi v9, v12, 0 ; CHECK-NEXT: vmsne.vi v0, v14, 0 -; CHECK-NEXT: vsetvli zero, a4, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v8, v16, a2 -; CHECK-NEXT: vslideup.vx v0, v9, a2 -; CHECK-NEXT: add a0, a1, a1 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vslideup.vx v0, v8, a1 +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v8, v16, a1 +; CHECK-NEXT: vslideup.vx v0, v9, a1 +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NEXT: vslideup.vx v0, v8, a2 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 @@ -859,22 +850,20 @@ define <vscale x 64 x i1> @vector_interleave_nxv64i1_nxv16i1(<vscale x 16 x i1> ; ZVBB-NEXT: add a2, a4, a2 ; ZVBB-NEXT: vsseg4e8.v v14, (a0) ; ZVBB-NEXT: vl2r.v v8, (a2) -; ZVBB-NEXT: srli a2, a1, 2 -; ZVBB-NEXT: srli a1, a1, 1 +; ZVBB-NEXT: srli a2, a1, 1 +; ZVBB-NEXT: srli a1, a1, 2 ; ZVBB-NEXT: vl2r.v v10, (a4) -; ZVBB-NEXT: add a4, a2, a2 ; ZVBB-NEXT: vl2r.v v12, (a3) ; ZVBB-NEXT: vl2r.v v14, (a0) ; ZVBB-NEXT: vmsne.vi v16, v8, 0 ; ZVBB-NEXT: vmsne.vi v8, v10, 0 ; ZVBB-NEXT: vmsne.vi v9, v12, 0 ; ZVBB-NEXT: vmsne.vi v0, v14, 0 -; ZVBB-NEXT: vsetvli zero, a4, e8, mf2, ta, ma -; ZVBB-NEXT: vslideup.vx v8, v16, a2 -; ZVBB-NEXT: vslideup.vx v0, v9, a2 -; ZVBB-NEXT: add a0, a1, a1 -; ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; ZVBB-NEXT: vslideup.vx v0, v8, a1 +; ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; ZVBB-NEXT: vslideup.vx v8, v16, a1 +; ZVBB-NEXT: vslideup.vx v0, v9, a1 +; ZVBB-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; ZVBB-NEXT: vslideup.vx v0, v8, a2 ; ZVBB-NEXT: csrr a0, vlenb ; ZVBB-NEXT: slli a0, a0, 3 ; ZVBB-NEXT: add sp, sp, a0 @@ -1114,7 +1103,7 @@ define <vscale x 80 x i1> @vector_interleave_nxv80i1_nxv16i1(<vscale x 16 x i1> ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v18, v12, 1, v0 ; CHECK-NEXT: add a2, a4, a1 -; CHECK-NEXT: srli a3, a1, 2 +; CHECK-NEXT: srli a3, a1, 1 ; CHECK-NEXT: vmv2r.v v20, v14 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vmerge.vim v16, v12, 1, v0 @@ -1144,11 +1133,9 @@ define <vscale x 80 x i1> @vector_interleave_nxv80i1_nxv16i1(<vscale x 16 x i1> ; CHECK-NEXT: add a5, a4, a1 ; CHECK-NEXT: vl1r.v v16, (a5) ; CHECK-NEXT: add a5, a5, a1 -; CHECK-NEXT: srli a1, a1, 1 +; CHECK-NEXT: srli a1, a1, 2 ; CHECK-NEXT: vl1r.v v11, (a2) -; CHECK-NEXT: add a2, a3, a3 ; CHECK-NEXT: vl1r.v v15, (a4) -; CHECK-NEXT: add a4, a1, a1 ; CHECK-NEXT: vl1r.v v13, (a0) ; CHECK-NEXT: vl1r.v v17, (a5) ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma @@ -1156,11 +1143,11 @@ define <vscale x 80 x i1> @vector_interleave_nxv80i1_nxv16i1(<vscale x 16 x i1> ; CHECK-NEXT: vmsne.vi v0, v10, 0 ; CHECK-NEXT: vmsne.vi v8, v14, 0 ; CHECK-NEXT: vmsne.vi v9, v12, 0 -; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v0, v18, a3 -; CHECK-NEXT: vslideup.vx v9, v8, a3 -; CHECK-NEXT: vsetvli zero, a4, e8, m1, ta, ma -; CHECK-NEXT: vslideup.vx v0, v9, a1 +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v0, v18, a1 +; CHECK-NEXT: vslideup.vx v9, v8, a1 +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NEXT: vslideup.vx v0, v9, a3 ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vmsne.vi v8, v16, 0 ; CHECK-NEXT: csrr a0, vlenb @@ -1190,7 +1177,7 @@ define <vscale x 80 x i1> @vector_interleave_nxv80i1_nxv16i1(<vscale x 16 x i1> ; ZVBB-NEXT: vmv1r.v v0, v8 ; ZVBB-NEXT: vmerge.vim v18, v12, 1, v0 ; ZVBB-NEXT: add a2, a4, a1 -; ZVBB-NEXT: srli a3, a1, 2 +; ZVBB-NEXT: srli a3, a1, 1 ; ZVBB-NEXT: vmv2r.v v20, v14 ; ZVBB-NEXT: vmv1r.v v0, v9 ; ZVBB-NEXT: vmerge.vim v16, v12, 1, v0 @@ -1220,11 +1207,9 @@ define <vscale x 80 x i1> @vector_interleave_nxv80i1_nxv16i1(<vscale x 16 x i1> ; ZVBB-NEXT: add a5, a4, a1 ; ZVBB-NEXT: vl1r.v v16, (a5) ; ZVBB-NEXT: add a5, a5, a1 -; ZVBB-NEXT: srli a1, a1, 1 +; ZVBB-NEXT: srli a1, a1, 2 ; ZVBB-NEXT: vl1r.v v11, (a2) -; ZVBB-NEXT: add a2, a3, a3 ; ZVBB-NEXT: vl1r.v v15, (a4) -; ZVBB-NEXT: add a4, a1, a1 ; ZVBB-NEXT: vl1r.v v13, (a0) ; ZVBB-NEXT: vl1r.v v17, (a5) ; ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, ma @@ -1232,11 +1217,11 @@ define <vscale x 80 x i1> @vector_interleave_nxv80i1_nxv16i1(<vscale x 16 x i1> ; ZVBB-NEXT: vmsne.vi v0, v10, 0 ; ZVBB-NEXT: vmsne.vi v8, v14, 0 ; ZVBB-NEXT: vmsne.vi v9, v12, 0 -; ZVBB-NEXT: vsetvli zero, a2, e8, mf2, ta, ma -; ZVBB-NEXT: vslideup.vx v0, v18, a3 -; ZVBB-NEXT: vslideup.vx v9, v8, a3 -; ZVBB-NEXT: vsetvli zero, a4, e8, m1, ta, ma -; ZVBB-NEXT: vslideup.vx v0, v9, a1 +; ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; ZVBB-NEXT: vslideup.vx v0, v18, a1 +; ZVBB-NEXT: vslideup.vx v9, v8, a1 +; ZVBB-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; ZVBB-NEXT: vslideup.vx v0, v9, a3 ; ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; ZVBB-NEXT: vmsne.vi v8, v16, 0 ; ZVBB-NEXT: csrr a0, vlenb @@ -2340,47 +2325,45 @@ define <vscale x 96 x i1> @vector_interleave_nxv96i1_nxv16i1(<vscale x 16 x i1> ; CHECK-NEXT: vmv1r.v v17, v9 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmerge.vim v24, v20, 1, v0 -; CHECK-NEXT: addi a5, sp, 16 +; CHECK-NEXT: addi a4, sp, 16 ; CHECK-NEXT: vmv1r.v v18, v25 ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmerge.vim v26, v20, 1, v0 -; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: vmv1r.v v19, v27 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vmerge.vim v10, v20, 1, v0 -; CHECK-NEXT: add a3, a0, a2 +; CHECK-NEXT: add a2, a0, a1 ; CHECK-NEXT: vmv1r.v v20, v11 -; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma ; CHECK-NEXT: vsseg6e8.v v15, (a0) ; CHECK-NEXT: vmv1r.v v15, v22 -; CHECK-NEXT: add a4, a5, a2 +; CHECK-NEXT: add a5, a4, a1 ; CHECK-NEXT: vmv1r.v v16, v8 -; CHECK-NEXT: srli a1, a2, 2 +; CHECK-NEXT: srli a3, a1, 1 ; CHECK-NEXT: vmv1r.v v17, v24 -; CHECK-NEXT: add a6, a4, a2 +; CHECK-NEXT: add a6, a5, a1 ; CHECK-NEXT: vmv1r.v v18, v26 -; CHECK-NEXT: add a7, a3, a2 +; CHECK-NEXT: add a7, a2, a1 ; CHECK-NEXT: vmv1r.v v19, v10 -; CHECK-NEXT: vsseg6e8.v v14, (a5) +; CHECK-NEXT: vsseg6e8.v v14, (a4) ; CHECK-NEXT: vl1r.v v8, (a0) -; CHECK-NEXT: add a0, a6, a2 +; CHECK-NEXT: add a0, a6, a1 ; CHECK-NEXT: vl1r.v v10, (a6) -; CHECK-NEXT: add a6, a7, a2 -; CHECK-NEXT: vl1r.v v12, (a5) -; CHECK-NEXT: add a5, a0, a2 +; CHECK-NEXT: add a6, a7, a1 +; CHECK-NEXT: vl1r.v v12, (a4) +; CHECK-NEXT: add a4, a0, a1 ; CHECK-NEXT: vl1r.v v14, (a7) -; CHECK-NEXT: add a7, a6, a2 -; CHECK-NEXT: vl1r.v v16, (a5) -; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a7, a6, a1 +; CHECK-NEXT: vl1r.v v16, (a4) +; CHECK-NEXT: add a4, a4, a1 ; CHECK-NEXT: vl1r.v v18, (a7) -; CHECK-NEXT: add a7, a7, a2 -; CHECK-NEXT: srli a2, a2, 1 -; CHECK-NEXT: vl1r.v v9, (a3) -; CHECK-NEXT: add a3, a1, a1 -; CHECK-NEXT: vl1r.v v17, (a5) -; CHECK-NEXT: add a5, a2, a2 +; CHECK-NEXT: add a7, a7, a1 +; CHECK-NEXT: srli a1, a1, 2 +; CHECK-NEXT: vl1r.v v9, (a2) +; CHECK-NEXT: vl1r.v v17, (a4) ; CHECK-NEXT: vl1r.v v11, (a0) -; CHECK-NEXT: vl1r.v v13, (a4) +; CHECK-NEXT: vl1r.v v13, (a5) ; CHECK-NEXT: vl1r.v v19, (a7) ; CHECK-NEXT: vl1r.v v15, (a6) ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma @@ -2390,12 +2373,12 @@ define <vscale x 96 x i1> @vector_interleave_nxv96i1_nxv16i1(<vscale x 16 x i1> ; CHECK-NEXT: vmsne.vi v0, v12, 0 ; CHECK-NEXT: vmsne.vi v10, v18, 0 ; CHECK-NEXT: vmsne.vi v8, v14, 0 -; CHECK-NEXT: vsetvli zero, a3, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vx v9, v20, a1 ; CHECK-NEXT: vslideup.vx v0, v16, a1 -; CHECK-NEXT: vsetvli zero, a5, e8, m1, ta, ma -; CHECK-NEXT: vslideup.vx v0, v9, a2 -; CHECK-NEXT: vsetvli zero, a3, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NEXT: vslideup.vx v0, v9, a3 +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vx v8, v10, a1 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 12 @@ -2427,47 +2410,45 @@ define <vscale x 96 x i1> @vector_interleave_nxv96i1_nxv16i1(<vscale x 16 x i1> ; ZVBB-NEXT: vmv1r.v v17, v9 ; ZVBB-NEXT: vmv1r.v v0, v10 ; ZVBB-NEXT: vmerge.vim v24, v20, 1, v0 -; ZVBB-NEXT: addi a5, sp, 16 +; ZVBB-NEXT: addi a4, sp, 16 ; ZVBB-NEXT: vmv1r.v v18, v25 ; ZVBB-NEXT: vmv1r.v v0, v11 ; ZVBB-NEXT: vmerge.vim v26, v20, 1, v0 -; ZVBB-NEXT: csrr a2, vlenb +; ZVBB-NEXT: csrr a1, vlenb ; ZVBB-NEXT: vmv1r.v v19, v27 ; ZVBB-NEXT: vmv1r.v v0, v12 ; ZVBB-NEXT: vmerge.vim v10, v20, 1, v0 -; ZVBB-NEXT: add a3, a0, a2 +; ZVBB-NEXT: add a2, a0, a1 ; ZVBB-NEXT: vmv1r.v v20, v11 -; ZVBB-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; ZVBB-NEXT: vsetvli a3, zero, e8, m1, ta, ma ; ZVBB-NEXT: vsseg6e8.v v15, (a0) ; ZVBB-NEXT: vmv1r.v v15, v22 -; ZVBB-NEXT: add a4, a5, a2 +; ZVBB-NEXT: add a5, a4, a1 ; ZVBB-NEXT: vmv1r.v v16, v8 -; ZVBB-NEXT: srli a1, a2, 2 +; ZVBB-NEXT: srli a3, a1, 1 ; ZVBB-NEXT: vmv1r.v v17, v24 -; ZVBB-NEXT: add a6, a4, a2 +; ZVBB-NEXT: add a6, a5, a1 ; ZVBB-NEXT: vmv1r.v v18, v26 -; ZVBB-NEXT: add a7, a3, a2 +; ZVBB-NEXT: add a7, a2, a1 ; ZVBB-NEXT: vmv1r.v v19, v10 -; ZVBB-NEXT: vsseg6e8.v v14, (a5) +; ZVBB-NEXT: vsseg6e8.v v14, (a4) ; ZVBB-NEXT: vl1r.v v8, (a0) -; ZVBB-NEXT: add a0, a6, a2 +; ZVBB-NEXT: add a0, a6, a1 ; ZVBB-NEXT: vl1r.v v10, (a6) -; ZVBB-NEXT: add a6, a7, a2 -; ZVBB-NEXT: vl1r.v v12, (a5) -; ZVBB-NEXT: add a5, a0, a2 +; ZVBB-NEXT: add a6, a7, a1 +; ZVBB-NEXT: vl1r.v v12, (a4) +; ZVBB-NEXT: add a4, a0, a1 ; ZVBB-NEXT: vl1r.v v14, (a7) -; ZVBB-NEXT: add a7, a6, a2 -; ZVBB-NEXT: vl1r.v v16, (a5) -; ZVBB-NEXT: add a5, a5, a2 +; ZVBB-NEXT: add a7, a6, a1 +; ZVBB-NEXT: vl1r.v v16, (a4) +; ZVBB-NEXT: add a4, a4, a1 ; ZVBB-NEXT: vl1r.v v18, (a7) -; ZVBB-NEXT: add a7, a7, a2 -; ZVBB-NEXT: srli a2, a2, 1 -; ZVBB-NEXT: vl1r.v v9, (a3) -; ZVBB-NEXT: add a3, a1, a1 -; ZVBB-NEXT: vl1r.v v17, (a5) -; ZVBB-NEXT: add a5, a2, a2 +; ZVBB-NEXT: add a7, a7, a1 +; ZVBB-NEXT: srli a1, a1, 2 +; ZVBB-NEXT: vl1r.v v9, (a2) +; ZVBB-NEXT: vl1r.v v17, (a4) ; ZVBB-NEXT: vl1r.v v11, (a0) -; ZVBB-NEXT: vl1r.v v13, (a4) +; ZVBB-NEXT: vl1r.v v13, (a5) ; ZVBB-NEXT: vl1r.v v19, (a7) ; ZVBB-NEXT: vl1r.v v15, (a6) ; ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, ma @@ -2477,12 +2458,12 @@ define <vscale x 96 x i1> @vector_interleave_nxv96i1_nxv16i1(<vscale x 16 x i1> ; ZVBB-NEXT: vmsne.vi v0, v12, 0 ; ZVBB-NEXT: vmsne.vi v10, v18, 0 ; ZVBB-NEXT: vmsne.vi v8, v14, 0 -; ZVBB-NEXT: vsetvli zero, a3, e8, mf2, ta, ma +; ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; ZVBB-NEXT: vslideup.vx v9, v20, a1 ; ZVBB-NEXT: vslideup.vx v0, v16, a1 -; ZVBB-NEXT: vsetvli zero, a5, e8, m1, ta, ma -; ZVBB-NEXT: vslideup.vx v0, v9, a2 -; ZVBB-NEXT: vsetvli zero, a3, e8, mf2, ta, ma +; ZVBB-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; ZVBB-NEXT: vslideup.vx v0, v9, a3 +; ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; ZVBB-NEXT: vslideup.vx v8, v10, a1 ; ZVBB-NEXT: csrr a0, vlenb ; ZVBB-NEXT: li a1, 12 @@ -3676,23 +3657,21 @@ define <vscale x 112 x i1> @vector_interleave_nxv112i1_nxv16i1(<vscale x 16 x i1 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.i v14, 0 -; CHECK-NEXT: addi a4, sp, 16 +; CHECK-NEXT: addi a3, sp, 16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a1, a0, 3 ; CHECK-NEXT: sub a0, a1, a0 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: vmerge.vim v16, v14, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v22, v14, 1, v0 -; CHECK-NEXT: add a3, a4, a2 -; CHECK-NEXT: srli a1, a2, 2 -; CHECK-NEXT: add a5, a0, a2 +; CHECK-NEXT: add a2, a3, a1 ; CHECK-NEXT: vmv4r.v v24, v16 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vmerge.vim v18, v14, 1, v0 -; CHECK-NEXT: add a6, a3, a2 +; CHECK-NEXT: add a4, a2, a1 ; CHECK-NEXT: vmv1r.v v25, v22 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmerge.vim v8, v14, 1, v0 @@ -3704,41 +3683,41 @@ define <vscale x 112 x i1> @vector_interleave_nxv112i1_nxv16i1(<vscale x 16 x i1 ; CHECK-NEXT: vmerge.vim v10, v14, 1, v0 ; CHECK-NEXT: vmv1r.v v28, v20 ; CHECK-NEXT: vmv1r.v v18, v23 -; CHECK-NEXT: add a7, a6, a2 +; CHECK-NEXT: add a5, a4, a1 ; CHECK-NEXT: vmv1r.v v29, v10 ; CHECK-NEXT: vmv1r.v v20, v9 ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vmerge.vim v30, v14, 1, v0 ; CHECK-NEXT: vmv1r.v v22, v11 -; CHECK-NEXT: vsetvli t0, zero, e8, m1, ta, ma -; CHECK-NEXT: vsseg7e8.v v24, (a4) +; CHECK-NEXT: vsetvli a6, zero, e8, m1, ta, ma +; CHECK-NEXT: vsseg7e8.v v24, (a3) ; CHECK-NEXT: vmv1r.v v23, v31 ; CHECK-NEXT: vsseg7e8.v v17, (a0) -; CHECK-NEXT: vl1r.v v8, (a6) -; CHECK-NEXT: add a6, a7, a2 -; CHECK-NEXT: vl1r.v v10, (a4) -; CHECK-NEXT: add a4, a6, a2 -; CHECK-NEXT: vl1r.v v12, (a6) -; CHECK-NEXT: add a6, a4, a2 -; CHECK-NEXT: vl1r.v v14, (a6) -; CHECK-NEXT: add a6, a5, a2 -; CHECK-NEXT: vl1r.v v16, (a5) -; CHECK-NEXT: add a5, a6, a2 -; CHECK-NEXT: vl1r.v v18, (a5) -; CHECK-NEXT: add a5, a5, a2 -; CHECK-NEXT: vl1r.v v9, (a7) -; CHECK-NEXT: add a7, a5, a2 -; CHECK-NEXT: vl1r.v v20, (a7) -; CHECK-NEXT: add a7, a7, a2 -; CHECK-NEXT: srli a2, a2, 1 -; CHECK-NEXT: vl1r.v v11, (a3) -; CHECK-NEXT: add a3, a1, a1 -; CHECK-NEXT: vl1r.v v13, (a4) -; CHECK-NEXT: add a4, a2, a2 +; CHECK-NEXT: vl1r.v v8, (a4) +; CHECK-NEXT: add a4, a5, a1 +; CHECK-NEXT: vl1r.v v10, (a3) +; CHECK-NEXT: add a6, a4, a1 +; CHECK-NEXT: vl1r.v v12, (a4) +; CHECK-NEXT: add a3, a6, a1 +; CHECK-NEXT: vl1r.v v14, (a3) +; CHECK-NEXT: srli a3, a1, 1 +; CHECK-NEXT: vl1r.v v9, (a5) +; CHECK-NEXT: add a4, a0, a1 +; CHECK-NEXT: vl1r.v v16, (a4) +; CHECK-NEXT: add a4, a4, a1 +; CHECK-NEXT: vl1r.v v11, (a2) +; CHECK-NEXT: add a2, a4, a1 +; CHECK-NEXT: vl1r.v v18, (a2) +; CHECK-NEXT: add a2, a2, a1 +; CHECK-NEXT: vl1r.v v13, (a6) +; CHECK-NEXT: add a5, a2, a1 +; CHECK-NEXT: vl1r.v v20, (a5) +; CHECK-NEXT: add a5, a5, a1 +; CHECK-NEXT: srli a1, a1, 2 ; CHECK-NEXT: vl1r.v v15, (a0) -; CHECK-NEXT: vl1r.v v19, (a5) -; CHECK-NEXT: vl1r.v v17, (a6) -; CHECK-NEXT: vl1r.v v21, (a7) +; CHECK-NEXT: vl1r.v v19, (a2) +; CHECK-NEXT: vl1r.v v17, (a4) +; CHECK-NEXT: vl1r.v v21, (a5) ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vmsne.vi v22, v8, 0 ; CHECK-NEXT: vmsne.vi v0, v10, 0 @@ -3747,13 +3726,13 @@ define <vscale x 112 x i1> @vector_interleave_nxv112i1_nxv16i1(<vscale x 16 x i1 ; CHECK-NEXT: vmsne.vi v11, v18, 0 ; CHECK-NEXT: vmsne.vi v8, v16, 0 ; CHECK-NEXT: vmsne.vi v12, v20, 0 -; CHECK-NEXT: vsetvli zero, a3, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vx v0, v22, a1 ; CHECK-NEXT: vslideup.vx v9, v10, a1 ; CHECK-NEXT: vslideup.vx v8, v11, a1 -; CHECK-NEXT: vsetvli zero, a4, e8, m1, ta, ma -; CHECK-NEXT: vslideup.vx v0, v9, a2 -; CHECK-NEXT: vslideup.vx v8, v12, a2 +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NEXT: vslideup.vx v0, v9, a3 +; CHECK-NEXT: vslideup.vx v8, v12, a3 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 14 ; CHECK-NEXT: mul a0, a0, a1 @@ -3770,23 +3749,21 @@ define <vscale x 112 x i1> @vector_interleave_nxv112i1_nxv16i1(<vscale x 16 x i1 ; ZVBB-NEXT: sub sp, sp, a0 ; ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; ZVBB-NEXT: vmv.v.i v14, 0 -; ZVBB-NEXT: addi a4, sp, 16 +; ZVBB-NEXT: addi a3, sp, 16 ; ZVBB-NEXT: csrr a0, vlenb ; ZVBB-NEXT: slli a1, a0, 3 ; ZVBB-NEXT: sub a0, a1, a0 ; ZVBB-NEXT: add a0, sp, a0 ; ZVBB-NEXT: addi a0, a0, 16 -; ZVBB-NEXT: csrr a2, vlenb +; ZVBB-NEXT: csrr a1, vlenb ; ZVBB-NEXT: vmerge.vim v16, v14, 1, v0 ; ZVBB-NEXT: vmv1r.v v0, v8 ; ZVBB-NEXT: vmerge.vim v22, v14, 1, v0 -; ZVBB-NEXT: add a3, a4, a2 -; ZVBB-NEXT: srli a1, a2, 2 -; ZVBB-NEXT: add a5, a0, a2 +; ZVBB-NEXT: add a2, a3, a1 ; ZVBB-NEXT: vmv4r.v v24, v16 ; ZVBB-NEXT: vmv1r.v v0, v9 ; ZVBB-NEXT: vmerge.vim v18, v14, 1, v0 -; ZVBB-NEXT: add a6, a3, a2 +; ZVBB-NEXT: add a4, a2, a1 ; ZVBB-NEXT: vmv1r.v v25, v22 ; ZVBB-NEXT: vmv1r.v v0, v10 ; ZVBB-NEXT: vmerge.vim v8, v14, 1, v0 @@ -3798,41 +3775,41 @@ define <vscale x 112 x i1> @vector_interleave_nxv112i1_nxv16i1(<vscale x 16 x i1 ; ZVBB-NEXT: vmerge.vim v10, v14, 1, v0 ; ZVBB-NEXT: vmv1r.v v28, v20 ; ZVBB-NEXT: vmv1r.v v18, v23 -; ZVBB-NEXT: add a7, a6, a2 +; ZVBB-NEXT: add a5, a4, a1 ; ZVBB-NEXT: vmv1r.v v29, v10 ; ZVBB-NEXT: vmv1r.v v20, v9 ; ZVBB-NEXT: vmv1r.v v0, v13 ; ZVBB-NEXT: vmerge.vim v30, v14, 1, v0 ; ZVBB-NEXT: vmv1r.v v22, v11 -; ZVBB-NEXT: vsetvli t0, zero, e8, m1, ta, ma -; ZVBB-NEXT: vsseg7e8.v v24, (a4) +; ZVBB-NEXT: vsetvli a6, zero, e8, m1, ta, ma +; ZVBB-NEXT: vsseg7e8.v v24, (a3) ; ZVBB-NEXT: vmv1r.v v23, v31 ; ZVBB-NEXT: vsseg7e8.v v17, (a0) -; ZVBB-NEXT: vl1r.v v8, (a6) -; ZVBB-NEXT: add a6, a7, a2 -; ZVBB-NEXT: vl1r.v v10, (a4) -; ZVBB-NEXT: add a4, a6, a2 -; ZVBB-NEXT: vl1r.v v12, (a6) -; ZVBB-NEXT: add a6, a4, a2 -; ZVBB-NEXT: vl1r.v v14, (a6) -; ZVBB-NEXT: add a6, a5, a2 -; ZVBB-NEXT: vl1r.v v16, (a5) -; ZVBB-NEXT: add a5, a6, a2 -; ZVBB-NEXT: vl1r.v v18, (a5) -; ZVBB-NEXT: add a5, a5, a2 -; ZVBB-NEXT: vl1r.v v9, (a7) -; ZVBB-NEXT: add a7, a5, a2 -; ZVBB-NEXT: vl1r.v v20, (a7) -; ZVBB-NEXT: add a7, a7, a2 -; ZVBB-NEXT: srli a2, a2, 1 -; ZVBB-NEXT: vl1r.v v11, (a3) -; ZVBB-NEXT: add a3, a1, a1 -; ZVBB-NEXT: vl1r.v v13, (a4) -; ZVBB-NEXT: add a4, a2, a2 +; ZVBB-NEXT: vl1r.v v8, (a4) +; ZVBB-NEXT: add a4, a5, a1 +; ZVBB-NEXT: vl1r.v v10, (a3) +; ZVBB-NEXT: add a6, a4, a1 +; ZVBB-NEXT: vl1r.v v12, (a4) +; ZVBB-NEXT: add a3, a6, a1 +; ZVBB-NEXT: vl1r.v v14, (a3) +; ZVBB-NEXT: srli a3, a1, 1 +; ZVBB-NEXT: vl1r.v v9, (a5) +; ZVBB-NEXT: add a4, a0, a1 +; ZVBB-NEXT: vl1r.v v16, (a4) +; ZVBB-NEXT: add a4, a4, a1 +; ZVBB-NEXT: vl1r.v v11, (a2) +; ZVBB-NEXT: add a2, a4, a1 +; ZVBB-NEXT: vl1r.v v18, (a2) +; ZVBB-NEXT: add a2, a2, a1 +; ZVBB-NEXT: vl1r.v v13, (a6) +; ZVBB-NEXT: add a5, a2, a1 +; ZVBB-NEXT: vl1r.v v20, (a5) +; ZVBB-NEXT: add a5, a5, a1 +; ZVBB-NEXT: srli a1, a1, 2 ; ZVBB-NEXT: vl1r.v v15, (a0) -; ZVBB-NEXT: vl1r.v v19, (a5) -; ZVBB-NEXT: vl1r.v v17, (a6) -; ZVBB-NEXT: vl1r.v v21, (a7) +; ZVBB-NEXT: vl1r.v v19, (a2) +; ZVBB-NEXT: vl1r.v v17, (a4) +; ZVBB-NEXT: vl1r.v v21, (a5) ; ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; ZVBB-NEXT: vmsne.vi v22, v8, 0 ; ZVBB-NEXT: vmsne.vi v0, v10, 0 @@ -3841,13 +3818,13 @@ define <vscale x 112 x i1> @vector_interleave_nxv112i1_nxv16i1(<vscale x 16 x i1 ; ZVBB-NEXT: vmsne.vi v11, v18, 0 ; ZVBB-NEXT: vmsne.vi v8, v16, 0 ; ZVBB-NEXT: vmsne.vi v12, v20, 0 -; ZVBB-NEXT: vsetvli zero, a3, e8, mf2, ta, ma +; ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; ZVBB-NEXT: vslideup.vx v0, v22, a1 ; ZVBB-NEXT: vslideup.vx v9, v10, a1 ; ZVBB-NEXT: vslideup.vx v8, v11, a1 -; ZVBB-NEXT: vsetvli zero, a4, e8, m1, ta, ma -; ZVBB-NEXT: vslideup.vx v0, v9, a2 -; ZVBB-NEXT: vslideup.vx v8, v12, a2 +; ZVBB-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; ZVBB-NEXT: vslideup.vx v0, v9, a3 +; ZVBB-NEXT: vslideup.vx v8, v12, a3 ; ZVBB-NEXT: csrr a0, vlenb ; ZVBB-NEXT: li a1, 14 ; ZVBB-NEXT: mul a0, a0, a1 @@ -5569,54 +5546,52 @@ define <vscale x 128 x i1> @vector_interleave_nxv128i1_nxv16i1(<vscale x 16 x i1 ; CHECK-NEXT: add a6, a4, a0 ; CHECK-NEXT: add a7, a5, a0 ; CHECK-NEXT: add t0, a6, a0 -; CHECK-NEXT: add t1, a7, a0 -; CHECK-NEXT: add t2, t0, a0 ; CHECK-NEXT: vmv1r.v v20, v9 -; CHECK-NEXT: add t3, t1, a0 +; CHECK-NEXT: add t1, a7, a0 ; CHECK-NEXT: vmv1r.v v22, v11 ; CHECK-NEXT: vsseg8e8.v v16, (a1) -; CHECK-NEXT: vl1r.v v10, (t1) -; CHECK-NEXT: add t1, t2, a0 -; CHECK-NEXT: vl1r.v v12, (a5) -; CHECK-NEXT: add a5, t3, a0 +; CHECK-NEXT: vl1r.v v8, (a5) +; CHECK-NEXT: add a5, t0, a0 +; CHECK-NEXT: vl1r.v v12, (t1) +; CHECK-NEXT: add t1, t1, a0 ; CHECK-NEXT: vl1r.v v14, (a2) -; CHECK-NEXT: add a2, t1, a0 +; CHECK-NEXT: add a2, a5, a0 +; CHECK-NEXT: vl1r.v v10, (a5) +; CHECK-NEXT: add a5, t1, a0 ; CHECK-NEXT: vl1r.v v16, (a5) ; CHECK-NEXT: add a5, a5, a0 -; CHECK-NEXT: vl1r.v v8, (a2) -; CHECK-NEXT: add a2, a2, a0 -; CHECK-NEXT: vl1r.v v18, (t2) ; CHECK-NEXT: vl1r.v v17, (a5) -; CHECK-NEXT: vl1r.v v11, (t3) -; CHECK-NEXT: vl1r.v v13, (a7) +; CHECK-NEXT: add a5, a2, a0 +; CHECK-NEXT: vl1r.v v18, (a5) +; CHECK-NEXT: add a5, a5, a0 +; CHECK-NEXT: vl1r.v v13, (t1) +; CHECK-NEXT: vl1r.v v9, (a7) ; CHECK-NEXT: vl1r.v v15, (a3) ; CHECK-NEXT: vsetvli a3, zero, e8, m2, ta, ma ; CHECK-NEXT: vmsne.vi v20, v16, 0 -; CHECK-NEXT: vmsne.vi v16, v10, 0 -; CHECK-NEXT: vl1r.v v10, (a6) -; CHECK-NEXT: vmsne.vi v17, v12, 0 +; CHECK-NEXT: vmsne.vi v16, v12, 0 +; CHECK-NEXT: vl1r.v v12, (a6) +; CHECK-NEXT: vmsne.vi v17, v8, 0 ; CHECK-NEXT: vmsne.vi v0, v14, 0 -; CHECK-NEXT: vl1r.v v12, (a1) -; CHECK-NEXT: vl1r.v v9, (a2) -; CHECK-NEXT: vl1r.v v19, (t1) -; CHECK-NEXT: vl1r.v v11, (t0) -; CHECK-NEXT: vl1r.v v13, (a4) -; CHECK-NEXT: vmsne.vi v14, v8, 0 +; CHECK-NEXT: vl1r.v v14, (a1) +; CHECK-NEXT: vl1r.v v19, (a5) +; CHECK-NEXT: vl1r.v v11, (a2) +; CHECK-NEXT: vl1r.v v13, (t0) +; CHECK-NEXT: vl1r.v v15, (a4) ; CHECK-NEXT: vmsne.vi v9, v18, 0 -; CHECK-NEXT: vmsne.vi v15, v10, 0 -; CHECK-NEXT: vmsne.vi v8, v12, 0 +; CHECK-NEXT: vmsne.vi v18, v10, 0 +; CHECK-NEXT: vmsne.vi v10, v12, 0 +; CHECK-NEXT: vmsne.vi v8, v14, 0 ; CHECK-NEXT: srli a1, a0, 2 -; CHECK-NEXT: add a2, a1, a1 -; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vx v16, v20, a1 ; CHECK-NEXT: vslideup.vx v0, v17, a1 -; CHECK-NEXT: vslideup.vx v9, v14, a1 -; CHECK-NEXT: vslideup.vx v8, v15, a1 +; CHECK-NEXT: vslideup.vx v18, v9, a1 +; CHECK-NEXT: vslideup.vx v8, v10, a1 ; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vslideup.vx v0, v16, a0 -; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: vslideup.vx v8, v18, a0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 @@ -5670,54 +5645,52 @@ define <vscale x 128 x i1> @vector_interleave_nxv128i1_nxv16i1(<vscale x 16 x i1 ; ZVBB-NEXT: add a6, a4, a0 ; ZVBB-NEXT: add a7, a5, a0 ; ZVBB-NEXT: add t0, a6, a0 -; ZVBB-NEXT: add t1, a7, a0 -; ZVBB-NEXT: add t2, t0, a0 ; ZVBB-NEXT: vmv1r.v v20, v9 -; ZVBB-NEXT: add t3, t1, a0 +; ZVBB-NEXT: add t1, a7, a0 ; ZVBB-NEXT: vmv1r.v v22, v11 ; ZVBB-NEXT: vsseg8e8.v v16, (a1) -; ZVBB-NEXT: vl1r.v v10, (t1) -; ZVBB-NEXT: add t1, t2, a0 -; ZVBB-NEXT: vl1r.v v12, (a5) -; ZVBB-NEXT: add a5, t3, a0 +; ZVBB-NEXT: vl1r.v v8, (a5) +; ZVBB-NEXT: add a5, t0, a0 +; ZVBB-NEXT: vl1r.v v12, (t1) +; ZVBB-NEXT: add t1, t1, a0 ; ZVBB-NEXT: vl1r.v v14, (a2) -; ZVBB-NEXT: add a2, t1, a0 +; ZVBB-NEXT: add a2, a5, a0 +; ZVBB-NEXT: vl1r.v v10, (a5) +; ZVBB-NEXT: add a5, t1, a0 ; ZVBB-NEXT: vl1r.v v16, (a5) ; ZVBB-NEXT: add a5, a5, a0 -; ZVBB-NEXT: vl1r.v v8, (a2) -; ZVBB-NEXT: add a2, a2, a0 -; ZVBB-NEXT: vl1r.v v18, (t2) ; ZVBB-NEXT: vl1r.v v17, (a5) -; ZVBB-NEXT: vl1r.v v11, (t3) -; ZVBB-NEXT: vl1r.v v13, (a7) +; ZVBB-NEXT: add a5, a2, a0 +; ZVBB-NEXT: vl1r.v v18, (a5) +; ZVBB-NEXT: add a5, a5, a0 +; ZVBB-NEXT: vl1r.v v13, (t1) +; ZVBB-NEXT: vl1r.v v9, (a7) ; ZVBB-NEXT: vl1r.v v15, (a3) ; ZVBB-NEXT: vsetvli a3, zero, e8, m2, ta, ma ; ZVBB-NEXT: vmsne.vi v20, v16, 0 -; ZVBB-NEXT: vmsne.vi v16, v10, 0 -; ZVBB-NEXT: vl1r.v v10, (a6) -; ZVBB-NEXT: vmsne.vi v17, v12, 0 +; ZVBB-NEXT: vmsne.vi v16, v12, 0 +; ZVBB-NEXT: vl1r.v v12, (a6) +; ZVBB-NEXT: vmsne.vi v17, v8, 0 ; ZVBB-NEXT: vmsne.vi v0, v14, 0 -; ZVBB-NEXT: vl1r.v v12, (a1) -; ZVBB-NEXT: vl1r.v v9, (a2) -; ZVBB-NEXT: vl1r.v v19, (t1) -; ZVBB-NEXT: vl1r.v v11, (t0) -; ZVBB-NEXT: vl1r.v v13, (a4) -; ZVBB-NEXT: vmsne.vi v14, v8, 0 +; ZVBB-NEXT: vl1r.v v14, (a1) +; ZVBB-NEXT: vl1r.v v19, (a5) +; ZVBB-NEXT: vl1r.v v11, (a2) +; ZVBB-NEXT: vl1r.v v13, (t0) +; ZVBB-NEXT: vl1r.v v15, (a4) ; ZVBB-NEXT: vmsne.vi v9, v18, 0 -; ZVBB-NEXT: vmsne.vi v15, v10, 0 -; ZVBB-NEXT: vmsne.vi v8, v12, 0 +; ZVBB-NEXT: vmsne.vi v18, v10, 0 +; ZVBB-NEXT: vmsne.vi v10, v12, 0 +; ZVBB-NEXT: vmsne.vi v8, v14, 0 ; ZVBB-NEXT: srli a1, a0, 2 -; ZVBB-NEXT: add a2, a1, a1 -; ZVBB-NEXT: vsetvli zero, a2, e8, mf2, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; ZVBB-NEXT: vslideup.vx v16, v20, a1 ; ZVBB-NEXT: vslideup.vx v0, v17, a1 -; ZVBB-NEXT: vslideup.vx v9, v14, a1 -; ZVBB-NEXT: vslideup.vx v8, v15, a1 +; ZVBB-NEXT: vslideup.vx v18, v9, a1 +; ZVBB-NEXT: vslideup.vx v8, v10, a1 ; ZVBB-NEXT: srli a0, a0, 1 -; ZVBB-NEXT: add a1, a0, a0 -; ZVBB-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; ZVBB-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v0, v16, a0 -; ZVBB-NEXT: vslideup.vx v8, v9, a0 +; ZVBB-NEXT: vslideup.vx v8, v18, a0 ; ZVBB-NEXT: csrr a0, vlenb ; ZVBB-NEXT: slli a0, a0, 4 ; ZVBB-NEXT: add sp, sp, a0 @@ -6294,14 +6267,12 @@ define <vscale x 4 x bfloat> @vector_interleave_nxv4bf16_nxv2bf16(<vscale x 2 x ; V-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; V-NEXT: vwaddu.vv v10, v8, v9 ; V-NEXT: li a0, -1 -; V-NEXT: csrr a1, vlenb ; V-NEXT: vwmaccu.vx v10, a0, v9 -; V-NEXT: srli a1, a1, 2 -; V-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; V-NEXT: vslidedown.vx v8, v10, a1 -; V-NEXT: add a0, a1, a1 -; V-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; V-NEXT: vslideup.vx v10, v8, a1 +; V-NEXT: csrr a0, vlenb +; V-NEXT: srli a0, a0, 2 +; V-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; V-NEXT: vslidedown.vx v8, v10, a0 +; V-NEXT: vslideup.vx v10, v8, a0 ; V-NEXT: vmv.v.v v8, v10 ; V-NEXT: ret ; @@ -6314,8 +6285,6 @@ define <vscale x 4 x bfloat> @vector_interleave_nxv4bf16_nxv2bf16(<vscale x 2 x ; ZVBB-NEXT: srli a0, a0, 2 ; ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; ZVBB-NEXT: vslidedown.vx v8, v10, a0 -; ZVBB-NEXT: add a1, a0, a0 -; ZVBB-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v10, v8, a0 ; ZVBB-NEXT: vmv.v.v v8, v10 ; ZVBB-NEXT: ret @@ -6327,8 +6296,7 @@ define <vscale x 4 x bfloat> @vector_interleave_nxv4bf16_nxv2bf16(<vscale x 2 x ; ZIP-NEXT: ri.vzip2a.vv v10, v8, v9 ; ZIP-NEXT: csrr a0, vlenb ; ZIP-NEXT: srli a0, a0, 2 -; ZIP-NEXT: add a1, a0, a0 -; ZIP-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; ZIP-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; ZIP-NEXT: vslideup.vx v10, v11, a0 ; ZIP-NEXT: vmv.v.v v8, v10 ; ZIP-NEXT: ret @@ -6374,14 +6342,12 @@ define <vscale x 4 x half> @vector_interleave_nxv4f16_nxv2f16(<vscale x 2 x half ; V-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; V-NEXT: vwaddu.vv v10, v8, v9 ; V-NEXT: li a0, -1 -; V-NEXT: csrr a1, vlenb ; V-NEXT: vwmaccu.vx v10, a0, v9 -; V-NEXT: srli a1, a1, 2 -; V-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; V-NEXT: vslidedown.vx v8, v10, a1 -; V-NEXT: add a0, a1, a1 -; V-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; V-NEXT: vslideup.vx v10, v8, a1 +; V-NEXT: csrr a0, vlenb +; V-NEXT: srli a0, a0, 2 +; V-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; V-NEXT: vslidedown.vx v8, v10, a0 +; V-NEXT: vslideup.vx v10, v8, a0 ; V-NEXT: vmv.v.v v8, v10 ; V-NEXT: ret ; @@ -6394,8 +6360,6 @@ define <vscale x 4 x half> @vector_interleave_nxv4f16_nxv2f16(<vscale x 2 x half ; ZVBB-NEXT: srli a0, a0, 2 ; ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; ZVBB-NEXT: vslidedown.vx v8, v10, a0 -; ZVBB-NEXT: add a1, a0, a0 -; ZVBB-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v10, v8, a0 ; ZVBB-NEXT: vmv.v.v v8, v10 ; ZVBB-NEXT: ret @@ -6407,8 +6371,7 @@ define <vscale x 4 x half> @vector_interleave_nxv4f16_nxv2f16(<vscale x 2 x half ; ZIP-NEXT: ri.vzip2a.vv v10, v8, v9 ; ZIP-NEXT: csrr a0, vlenb ; ZIP-NEXT: srli a0, a0, 2 -; ZIP-NEXT: add a1, a0, a0 -; ZIP-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; ZIP-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; ZIP-NEXT: vslideup.vx v10, v11, a0 ; ZIP-NEXT: vmv.v.v v8, v10 ; ZIP-NEXT: ret @@ -6807,8 +6770,7 @@ define <vscale x 6 x half> @vector_interleave_nxv6f16_nxv2f16(<vscale x 2 x half ; CHECK-NEXT: vle16.v v9, (a3) ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: srli a1, a1, 2 -; CHECK-NEXT: add a0, a1, a1 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: add a2, a3, a2 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma @@ -6834,8 +6796,7 @@ define <vscale x 6 x half> @vector_interleave_nxv6f16_nxv2f16(<vscale x 2 x half ; ZVBB-NEXT: vle16.v v9, (a3) ; ZVBB-NEXT: vle16.v v8, (a0) ; ZVBB-NEXT: srli a1, a1, 2 -; ZVBB-NEXT: add a0, a1, a1 -; ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v8, v9, a1 ; ZVBB-NEXT: add a2, a3, a2 ; ZVBB-NEXT: vsetvli a0, zero, e16, mf2, ta, ma @@ -6967,8 +6928,7 @@ define <vscale x 6 x bfloat> @vector_interleave_nxv6bf16_nxv2bf16(<vscale x 2 x ; CHECK-NEXT: vle16.v v9, (a3) ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: srli a1, a1, 2 -; CHECK-NEXT: add a0, a1, a1 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: add a2, a3, a2 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma @@ -6994,8 +6954,7 @@ define <vscale x 6 x bfloat> @vector_interleave_nxv6bf16_nxv2bf16(<vscale x 2 x ; ZVBB-NEXT: vle16.v v9, (a3) ; ZVBB-NEXT: vle16.v v8, (a0) ; ZVBB-NEXT: srli a1, a1, 2 -; ZVBB-NEXT: add a0, a1, a1 -; ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v8, v9, a1 ; ZVBB-NEXT: add a2, a3, a2 ; ZVBB-NEXT: vsetvli a0, zero, e16, mf2, ta, ma @@ -7127,8 +7086,7 @@ define <vscale x 3 x float> @vector_interleave_nxv3f32_nxv1f32(<vscale x 1 x flo ; CHECK-NEXT: vle32.v v9, (a3) ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: srli a1, a1, 3 -; CHECK-NEXT: add a0, a1, a1 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: add a2, a3, a2 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma @@ -7154,8 +7112,7 @@ define <vscale x 3 x float> @vector_interleave_nxv3f32_nxv1f32(<vscale x 1 x flo ; ZVBB-NEXT: vle32.v v9, (a3) ; ZVBB-NEXT: vle32.v v8, (a0) ; ZVBB-NEXT: srli a1, a1, 3 -; ZVBB-NEXT: add a0, a1, a1 -; ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVBB-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v8, v9, a1 ; ZVBB-NEXT: add a2, a3, a2 ; ZVBB-NEXT: vsetvli a0, zero, e32, mf2, ta, ma @@ -7391,13 +7348,12 @@ define <vscale x 8 x half> @vector_interleave_nxv8f16_nxv2f16(<vscale x 2 x half ; CHECK-NEXT: vle16.v v9, (a4) ; CHECK-NEXT: vle16.v v8, (a2) ; CHECK-NEXT: srli a1, a1, 2 -; CHECK-NEXT: add a2, a1, a1 -; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v9, v8, a1 -; CHECK-NEXT: vsetvli a4, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v10, (a3) ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v10, a1 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 @@ -7422,13 +7378,12 @@ define <vscale x 8 x half> @vector_interleave_nxv8f16_nxv2f16(<vscale x 2 x half ; ZVBB-NEXT: vle16.v v9, (a4) ; ZVBB-NEXT: vle16.v v8, (a2) ; ZVBB-NEXT: srli a1, a1, 2 -; ZVBB-NEXT: add a2, a1, a1 -; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v9, v8, a1 -; ZVBB-NEXT: vsetvli a4, zero, e16, mf2, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma ; ZVBB-NEXT: vle16.v v10, (a3) ; ZVBB-NEXT: vle16.v v8, (a0) -; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v8, v10, a1 ; ZVBB-NEXT: csrr a0, vlenb ; ZVBB-NEXT: slli a0, a0, 1 @@ -7559,13 +7514,12 @@ define <vscale x 8 x bfloat> @vector_interleave_nxv8bf16_nxv2bf16(<vscale x 2 x ; CHECK-NEXT: vle16.v v9, (a4) ; CHECK-NEXT: vle16.v v8, (a2) ; CHECK-NEXT: srli a1, a1, 2 -; CHECK-NEXT: add a2, a1, a1 -; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v9, v8, a1 -; CHECK-NEXT: vsetvli a4, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v10, (a3) ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v10, a1 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 @@ -7590,13 +7544,12 @@ define <vscale x 8 x bfloat> @vector_interleave_nxv8bf16_nxv2bf16(<vscale x 2 x ; ZVBB-NEXT: vle16.v v9, (a4) ; ZVBB-NEXT: vle16.v v8, (a2) ; ZVBB-NEXT: srli a1, a1, 2 -; ZVBB-NEXT: add a2, a1, a1 -; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v9, v8, a1 -; ZVBB-NEXT: vsetvli a4, zero, e16, mf2, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma ; ZVBB-NEXT: vle16.v v10, (a3) ; ZVBB-NEXT: vle16.v v8, (a0) -; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v8, v10, a1 ; ZVBB-NEXT: csrr a0, vlenb ; ZVBB-NEXT: slli a0, a0, 1 @@ -7727,13 +7680,12 @@ define <vscale x 4 x float> @vector_interleave_nxv4f32_nxv1f32(<vscale x 1 x flo ; CHECK-NEXT: vle32.v v9, (a4) ; CHECK-NEXT: vle32.v v8, (a2) ; CHECK-NEXT: srli a1, a1, 3 -; CHECK-NEXT: add a2, a1, a1 -; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v9, v8, a1 -; CHECK-NEXT: vsetvli a4, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v10, (a3) ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v10, a1 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 @@ -7758,13 +7710,12 @@ define <vscale x 4 x float> @vector_interleave_nxv4f32_nxv1f32(<vscale x 1 x flo ; ZVBB-NEXT: vle32.v v9, (a4) ; ZVBB-NEXT: vle32.v v8, (a2) ; ZVBB-NEXT: srli a1, a1, 3 -; ZVBB-NEXT: add a2, a1, a1 -; ZVBB-NEXT: vsetvli zero, a2, e32, m1, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e32, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v9, v8, a1 -; ZVBB-NEXT: vsetvli a4, zero, e32, mf2, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e32, mf2, ta, ma ; ZVBB-NEXT: vle32.v v10, (a3) ; ZVBB-NEXT: vle32.v v8, (a0) -; ZVBB-NEXT: vsetvli zero, a2, e32, m1, ta, ma +; ZVBB-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v8, v10, a1 ; ZVBB-NEXT: csrr a0, vlenb ; ZVBB-NEXT: slli a0, a0, 1 @@ -7998,13 +7949,12 @@ define <vscale x 10 x half> @vector_interleave_nxv10f16_nxv2f16(<vscale x 2 x ha ; CHECK-NEXT: vle16.v v8, (a5) ; CHECK-NEXT: vle16.v v9, (a4) ; CHECK-NEXT: srli a1, a1, 2 -; CHECK-NEXT: add a4, a1, a1 ; CHECK-NEXT: vle16.v v10, (a3) -; CHECK-NEXT: vsetvli zero, a4, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v9, v8, a1 ; CHECK-NEXT: vsetvli a3, zero, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a4, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v10, a1 ; CHECK-NEXT: add a2, a5, a2 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma @@ -8034,13 +7984,12 @@ define <vscale x 10 x half> @vector_interleave_nxv10f16_nxv2f16(<vscale x 2 x ha ; ZVBB-NEXT: vle16.v v8, (a5) ; ZVBB-NEXT: vle16.v v9, (a4) ; ZVBB-NEXT: srli a1, a1, 2 -; ZVBB-NEXT: add a4, a1, a1 ; ZVBB-NEXT: vle16.v v10, (a3) -; ZVBB-NEXT: vsetvli zero, a4, e16, m1, ta, ma +; ZVBB-NEXT: vsetvli a3, zero, e16, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v9, v8, a1 ; ZVBB-NEXT: vsetvli a3, zero, e16, mf2, ta, ma ; ZVBB-NEXT: vle16.v v8, (a0) -; ZVBB-NEXT: vsetvli zero, a4, e16, m1, ta, ma +; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v8, v10, a1 ; ZVBB-NEXT: add a2, a5, a2 ; ZVBB-NEXT: vsetvli a0, zero, e16, mf2, ta, ma @@ -8466,13 +8415,12 @@ define <vscale x 10 x bfloat> @vector_interleave_nxv10bf16_nxv2bf16(<vscale x 2 ; CHECK-NEXT: vle16.v v8, (a5) ; CHECK-NEXT: vle16.v v9, (a4) ; CHECK-NEXT: srli a1, a1, 2 -; CHECK-NEXT: add a4, a1, a1 ; CHECK-NEXT: vle16.v v10, (a3) -; CHECK-NEXT: vsetvli zero, a4, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v9, v8, a1 ; CHECK-NEXT: vsetvli a3, zero, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a4, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v10, a1 ; CHECK-NEXT: add a2, a5, a2 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma @@ -8502,13 +8450,12 @@ define <vscale x 10 x bfloat> @vector_interleave_nxv10bf16_nxv2bf16(<vscale x 2 ; ZVBB-NEXT: vle16.v v8, (a5) ; ZVBB-NEXT: vle16.v v9, (a4) ; ZVBB-NEXT: srli a1, a1, 2 -; ZVBB-NEXT: add a4, a1, a1 ; ZVBB-NEXT: vle16.v v10, (a3) -; ZVBB-NEXT: vsetvli zero, a4, e16, m1, ta, ma +; ZVBB-NEXT: vsetvli a3, zero, e16, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v9, v8, a1 ; ZVBB-NEXT: vsetvli a3, zero, e16, mf2, ta, ma ; ZVBB-NEXT: vle16.v v8, (a0) -; ZVBB-NEXT: vsetvli zero, a4, e16, m1, ta, ma +; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v8, v10, a1 ; ZVBB-NEXT: add a2, a5, a2 ; ZVBB-NEXT: vsetvli a0, zero, e16, mf2, ta, ma @@ -8934,13 +8881,12 @@ define <vscale x 5 x float> @vector_interleave_nxv5f32_nxv1f32(<vscale x 1 x flo ; CHECK-NEXT: vle32.v v8, (a5) ; CHECK-NEXT: vle32.v v9, (a4) ; CHECK-NEXT: srli a1, a1, 3 -; CHECK-NEXT: add a4, a1, a1 ; CHECK-NEXT: vle32.v v10, (a3) -; CHECK-NEXT: vsetvli zero, a4, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v9, v8, a1 ; CHECK-NEXT: vsetvli a3, zero, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a4, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v10, a1 ; CHECK-NEXT: add a2, a5, a2 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma @@ -8970,13 +8916,12 @@ define <vscale x 5 x float> @vector_interleave_nxv5f32_nxv1f32(<vscale x 1 x flo ; ZVBB-NEXT: vle32.v v8, (a5) ; ZVBB-NEXT: vle32.v v9, (a4) ; ZVBB-NEXT: srli a1, a1, 3 -; ZVBB-NEXT: add a4, a1, a1 ; ZVBB-NEXT: vle32.v v10, (a3) -; ZVBB-NEXT: vsetvli zero, a4, e32, m1, ta, ma +; ZVBB-NEXT: vsetvli a3, zero, e32, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v9, v8, a1 ; ZVBB-NEXT: vsetvli a3, zero, e32, mf2, ta, ma ; ZVBB-NEXT: vle32.v v8, (a0) -; ZVBB-NEXT: vsetvli zero, a4, e32, m1, ta, ma +; ZVBB-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v8, v10, a1 ; ZVBB-NEXT: add a2, a5, a2 ; ZVBB-NEXT: vsetvli a0, zero, e32, mf2, ta, ma @@ -9796,18 +9741,17 @@ define <vscale x 12 x half> @vector_interleave_nxv12f16_nxv2f16(<vscale x 2 x ha ; CHECK-NEXT: vle16.v v10, (a6) ; CHECK-NEXT: vle16.v v8, (a2) ; CHECK-NEXT: srli a1, a1, 2 -; CHECK-NEXT: add a2, a1, a1 ; CHECK-NEXT: vle16.v v11, (a5) -; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v10, v8, a1 -; CHECK-NEXT: vsetvli a5, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v9, (a4) -; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v9, v11, a1 -; CHECK-NEXT: vsetvli a4, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v11, (a3) ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v11, a1 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a1, a0, 1 @@ -9836,18 +9780,17 @@ define <vscale x 12 x half> @vector_interleave_nxv12f16_nxv2f16(<vscale x 2 x ha ; ZVBB-NEXT: vle16.v v10, (a6) ; ZVBB-NEXT: vle16.v v8, (a2) ; ZVBB-NEXT: srli a1, a1, 2 -; ZVBB-NEXT: add a2, a1, a1 ; ZVBB-NEXT: vle16.v v11, (a5) -; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v10, v8, a1 -; ZVBB-NEXT: vsetvli a5, zero, e16, mf2, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma ; ZVBB-NEXT: vle16.v v9, (a4) -; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v9, v11, a1 -; ZVBB-NEXT: vsetvli a4, zero, e16, mf2, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma ; ZVBB-NEXT: vle16.v v11, (a3) ; ZVBB-NEXT: vle16.v v8, (a0) -; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v8, v11, a1 ; ZVBB-NEXT: csrr a0, vlenb ; ZVBB-NEXT: slli a1, a0, 1 @@ -10311,18 +10254,17 @@ define <vscale x 12 x bfloat> @vector_interleave_nxv12bf16_nxv2bf16(<vscale x 2 ; CHECK-NEXT: vle16.v v10, (a6) ; CHECK-NEXT: vle16.v v8, (a2) ; CHECK-NEXT: srli a1, a1, 2 -; CHECK-NEXT: add a2, a1, a1 ; CHECK-NEXT: vle16.v v11, (a5) -; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v10, v8, a1 -; CHECK-NEXT: vsetvli a5, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v9, (a4) -; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v9, v11, a1 -; CHECK-NEXT: vsetvli a4, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v11, (a3) ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v11, a1 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a1, a0, 1 @@ -10351,18 +10293,17 @@ define <vscale x 12 x bfloat> @vector_interleave_nxv12bf16_nxv2bf16(<vscale x 2 ; ZVBB-NEXT: vle16.v v10, (a6) ; ZVBB-NEXT: vle16.v v8, (a2) ; ZVBB-NEXT: srli a1, a1, 2 -; ZVBB-NEXT: add a2, a1, a1 ; ZVBB-NEXT: vle16.v v11, (a5) -; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v10, v8, a1 -; ZVBB-NEXT: vsetvli a5, zero, e16, mf2, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma ; ZVBB-NEXT: vle16.v v9, (a4) -; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v9, v11, a1 -; ZVBB-NEXT: vsetvli a4, zero, e16, mf2, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma ; ZVBB-NEXT: vle16.v v11, (a3) ; ZVBB-NEXT: vle16.v v8, (a0) -; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v8, v11, a1 ; ZVBB-NEXT: csrr a0, vlenb ; ZVBB-NEXT: slli a1, a0, 1 @@ -10826,18 +10767,17 @@ define <vscale x 6 x float> @vector_interleave_nxv6f32_nxv1f32(<vscale x 1 x flo ; CHECK-NEXT: vle32.v v10, (a6) ; CHECK-NEXT: vle32.v v8, (a2) ; CHECK-NEXT: srli a1, a1, 3 -; CHECK-NEXT: add a2, a1, a1 ; CHECK-NEXT: vle32.v v11, (a5) -; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v10, v8, a1 -; CHECK-NEXT: vsetvli a5, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v9, (a4) -; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v9, v11, a1 -; CHECK-NEXT: vsetvli a4, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v11, (a3) ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v11, a1 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a1, a0, 1 @@ -10866,18 +10806,17 @@ define <vscale x 6 x float> @vector_interleave_nxv6f32_nxv1f32(<vscale x 1 x flo ; ZVBB-NEXT: vle32.v v10, (a6) ; ZVBB-NEXT: vle32.v v8, (a2) ; ZVBB-NEXT: srli a1, a1, 3 -; ZVBB-NEXT: add a2, a1, a1 ; ZVBB-NEXT: vle32.v v11, (a5) -; ZVBB-NEXT: vsetvli zero, a2, e32, m1, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e32, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v10, v8, a1 -; ZVBB-NEXT: vsetvli a5, zero, e32, mf2, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e32, mf2, ta, ma ; ZVBB-NEXT: vle32.v v9, (a4) -; ZVBB-NEXT: vsetvli zero, a2, e32, m1, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e32, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v9, v11, a1 -; ZVBB-NEXT: vsetvli a4, zero, e32, mf2, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e32, mf2, ta, ma ; ZVBB-NEXT: vle32.v v11, (a3) ; ZVBB-NEXT: vle32.v v8, (a0) -; ZVBB-NEXT: vsetvli zero, a2, e32, m1, ta, ma +; ZVBB-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v8, v11, a1 ; ZVBB-NEXT: csrr a0, vlenb ; ZVBB-NEXT: slli a1, a0, 1 @@ -11761,7 +11700,6 @@ define <vscale x 14 x half> @vector_interleave_nxv14f16_nxv2f16(<vscale x 2 x ha ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 1 -; CHECK-NEXT: srli a1, a1, 2 ; CHECK-NEXT: add a3, a0, a2 ; CHECK-NEXT: add a4, a3, a2 ; CHECK-NEXT: add a5, a4, a2 @@ -11771,20 +11709,20 @@ define <vscale x 14 x half> @vector_interleave_nxv14f16_nxv2f16(<vscale x 2 x ha ; CHECK-NEXT: add a7, a6, a2 ; CHECK-NEXT: vle16.v v8, (a7) ; CHECK-NEXT: vle16.v v10, (a6) -; CHECK-NEXT: add a6, a1, a1 +; CHECK-NEXT: srli a1, a1, 2 ; CHECK-NEXT: add a2, a7, a2 ; CHECK-NEXT: vle16.v v12, (a5) -; CHECK-NEXT: vsetvli zero, a6, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a5, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v10, v8, a1 ; CHECK-NEXT: vsetvli a5, zero, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v11, (a2) ; CHECK-NEXT: vle16.v v9, (a4) -; CHECK-NEXT: vsetvli zero, a6, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v9, v12, a1 ; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v12, (a3) ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a6, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v12, a1 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 2 @@ -11801,7 +11739,6 @@ define <vscale x 14 x half> @vector_interleave_nxv14f16_nxv2f16(<vscale x 2 x ha ; ZVBB-NEXT: addi a0, sp, 16 ; ZVBB-NEXT: csrr a1, vlenb ; ZVBB-NEXT: srli a2, a1, 1 -; ZVBB-NEXT: srli a1, a1, 2 ; ZVBB-NEXT: add a3, a0, a2 ; ZVBB-NEXT: add a4, a3, a2 ; ZVBB-NEXT: add a5, a4, a2 @@ -11811,20 +11748,20 @@ define <vscale x 14 x half> @vector_interleave_nxv14f16_nxv2f16(<vscale x 2 x ha ; ZVBB-NEXT: add a7, a6, a2 ; ZVBB-NEXT: vle16.v v8, (a7) ; ZVBB-NEXT: vle16.v v10, (a6) -; ZVBB-NEXT: add a6, a1, a1 +; ZVBB-NEXT: srli a1, a1, 2 ; ZVBB-NEXT: add a2, a7, a2 ; ZVBB-NEXT: vle16.v v12, (a5) -; ZVBB-NEXT: vsetvli zero, a6, e16, m1, ta, ma +; ZVBB-NEXT: vsetvli a5, zero, e16, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v10, v8, a1 ; ZVBB-NEXT: vsetvli a5, zero, e16, mf2, ta, ma ; ZVBB-NEXT: vle16.v v11, (a2) ; ZVBB-NEXT: vle16.v v9, (a4) -; ZVBB-NEXT: vsetvli zero, a6, e16, m1, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v9, v12, a1 ; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma ; ZVBB-NEXT: vle16.v v12, (a3) ; ZVBB-NEXT: vle16.v v8, (a0) -; ZVBB-NEXT: vsetvli zero, a6, e16, m1, ta, ma +; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v8, v12, a1 ; ZVBB-NEXT: csrr a0, vlenb ; ZVBB-NEXT: slli a0, a0, 2 @@ -12325,7 +12262,6 @@ define <vscale x 14 x bfloat> @vector_interleave_nxv14bf16_nxv2bf16(<vscale x 2 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 1 -; CHECK-NEXT: srli a1, a1, 2 ; CHECK-NEXT: add a3, a0, a2 ; CHECK-NEXT: add a4, a3, a2 ; CHECK-NEXT: add a5, a4, a2 @@ -12335,20 +12271,20 @@ define <vscale x 14 x bfloat> @vector_interleave_nxv14bf16_nxv2bf16(<vscale x 2 ; CHECK-NEXT: add a7, a6, a2 ; CHECK-NEXT: vle16.v v8, (a7) ; CHECK-NEXT: vle16.v v10, (a6) -; CHECK-NEXT: add a6, a1, a1 +; CHECK-NEXT: srli a1, a1, 2 ; CHECK-NEXT: add a2, a7, a2 ; CHECK-NEXT: vle16.v v12, (a5) -; CHECK-NEXT: vsetvli zero, a6, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a5, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v10, v8, a1 ; CHECK-NEXT: vsetvli a5, zero, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v11, (a2) ; CHECK-NEXT: vle16.v v9, (a4) -; CHECK-NEXT: vsetvli zero, a6, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v9, v12, a1 ; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v12, (a3) ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a6, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v12, a1 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 2 @@ -12365,7 +12301,6 @@ define <vscale x 14 x bfloat> @vector_interleave_nxv14bf16_nxv2bf16(<vscale x 2 ; ZVBB-NEXT: addi a0, sp, 16 ; ZVBB-NEXT: csrr a1, vlenb ; ZVBB-NEXT: srli a2, a1, 1 -; ZVBB-NEXT: srli a1, a1, 2 ; ZVBB-NEXT: add a3, a0, a2 ; ZVBB-NEXT: add a4, a3, a2 ; ZVBB-NEXT: add a5, a4, a2 @@ -12375,20 +12310,20 @@ define <vscale x 14 x bfloat> @vector_interleave_nxv14bf16_nxv2bf16(<vscale x 2 ; ZVBB-NEXT: add a7, a6, a2 ; ZVBB-NEXT: vle16.v v8, (a7) ; ZVBB-NEXT: vle16.v v10, (a6) -; ZVBB-NEXT: add a6, a1, a1 +; ZVBB-NEXT: srli a1, a1, 2 ; ZVBB-NEXT: add a2, a7, a2 ; ZVBB-NEXT: vle16.v v12, (a5) -; ZVBB-NEXT: vsetvli zero, a6, e16, m1, ta, ma +; ZVBB-NEXT: vsetvli a5, zero, e16, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v10, v8, a1 ; ZVBB-NEXT: vsetvli a5, zero, e16, mf2, ta, ma ; ZVBB-NEXT: vle16.v v11, (a2) ; ZVBB-NEXT: vle16.v v9, (a4) -; ZVBB-NEXT: vsetvli zero, a6, e16, m1, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v9, v12, a1 ; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma ; ZVBB-NEXT: vle16.v v12, (a3) ; ZVBB-NEXT: vle16.v v8, (a0) -; ZVBB-NEXT: vsetvli zero, a6, e16, m1, ta, ma +; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v8, v12, a1 ; ZVBB-NEXT: csrr a0, vlenb ; ZVBB-NEXT: slli a0, a0, 2 @@ -12889,7 +12824,6 @@ define <vscale x 7 x float> @vector_interleave_nxv7f32_nxv1f32(<vscale x 1 x flo ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 1 -; CHECK-NEXT: srli a1, a1, 3 ; CHECK-NEXT: add a3, a0, a2 ; CHECK-NEXT: add a4, a3, a2 ; CHECK-NEXT: add a5, a4, a2 @@ -12899,20 +12833,20 @@ define <vscale x 7 x float> @vector_interleave_nxv7f32_nxv1f32(<vscale x 1 x flo ; CHECK-NEXT: add a7, a6, a2 ; CHECK-NEXT: vle32.v v8, (a7) ; CHECK-NEXT: vle32.v v10, (a6) -; CHECK-NEXT: add a6, a1, a1 +; CHECK-NEXT: srli a1, a1, 3 ; CHECK-NEXT: add a2, a7, a2 ; CHECK-NEXT: vle32.v v12, (a5) -; CHECK-NEXT: vsetvli zero, a6, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v10, v8, a1 ; CHECK-NEXT: vsetvli a5, zero, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v11, (a2) ; CHECK-NEXT: vle32.v v9, (a4) -; CHECK-NEXT: vsetvli zero, a6, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v9, v12, a1 ; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v12, (a3) ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a6, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v12, a1 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 2 @@ -12929,7 +12863,6 @@ define <vscale x 7 x float> @vector_interleave_nxv7f32_nxv1f32(<vscale x 1 x flo ; ZVBB-NEXT: addi a0, sp, 16 ; ZVBB-NEXT: csrr a1, vlenb ; ZVBB-NEXT: srli a2, a1, 1 -; ZVBB-NEXT: srli a1, a1, 3 ; ZVBB-NEXT: add a3, a0, a2 ; ZVBB-NEXT: add a4, a3, a2 ; ZVBB-NEXT: add a5, a4, a2 @@ -12939,20 +12872,20 @@ define <vscale x 7 x float> @vector_interleave_nxv7f32_nxv1f32(<vscale x 1 x flo ; ZVBB-NEXT: add a7, a6, a2 ; ZVBB-NEXT: vle32.v v8, (a7) ; ZVBB-NEXT: vle32.v v10, (a6) -; ZVBB-NEXT: add a6, a1, a1 +; ZVBB-NEXT: srli a1, a1, 3 ; ZVBB-NEXT: add a2, a7, a2 ; ZVBB-NEXT: vle32.v v12, (a5) -; ZVBB-NEXT: vsetvli zero, a6, e32, m1, ta, ma +; ZVBB-NEXT: vsetvli a5, zero, e32, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v10, v8, a1 ; ZVBB-NEXT: vsetvli a5, zero, e32, mf2, ta, ma ; ZVBB-NEXT: vle32.v v11, (a2) ; ZVBB-NEXT: vle32.v v9, (a4) -; ZVBB-NEXT: vsetvli zero, a6, e32, m1, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e32, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v9, v12, a1 ; ZVBB-NEXT: vsetvli a2, zero, e32, mf2, ta, ma ; ZVBB-NEXT: vle32.v v12, (a3) ; ZVBB-NEXT: vle32.v v8, (a0) -; ZVBB-NEXT: vsetvli zero, a6, e32, m1, ta, ma +; ZVBB-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v8, v12, a1 ; ZVBB-NEXT: csrr a0, vlenb ; ZVBB-NEXT: slli a0, a0, 2 @@ -13945,23 +13878,22 @@ define <vscale x 16 x half> @vector_interleave_nxv16f16_nxv2f16(<vscale x 2 x ha ; CHECK-NEXT: vle16.v v11, (t0) ; CHECK-NEXT: vle16.v v8, (a2) ; CHECK-NEXT: srli a1, a1, 2 -; CHECK-NEXT: add a2, a1, a1 ; CHECK-NEXT: vle16.v v9, (a7) -; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v11, v8, a1 -; CHECK-NEXT: vsetvli a7, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v10, (a6) ; CHECK-NEXT: vle16.v v8, (a5) -; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v10, v9, a1 -; CHECK-NEXT: vsetvli a5, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v9, (a4) -; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v9, v8, a1 -; CHECK-NEXT: vsetvli a4, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v12, (a3) ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v12, a1 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 2 @@ -13990,23 +13922,22 @@ define <vscale x 16 x half> @vector_interleave_nxv16f16_nxv2f16(<vscale x 2 x ha ; ZVBB-NEXT: vle16.v v11, (t0) ; ZVBB-NEXT: vle16.v v8, (a2) ; ZVBB-NEXT: srli a1, a1, 2 -; ZVBB-NEXT: add a2, a1, a1 ; ZVBB-NEXT: vle16.v v9, (a7) -; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v11, v8, a1 -; ZVBB-NEXT: vsetvli a7, zero, e16, mf2, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma ; ZVBB-NEXT: vle16.v v10, (a6) ; ZVBB-NEXT: vle16.v v8, (a5) -; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v10, v9, a1 -; ZVBB-NEXT: vsetvli a5, zero, e16, mf2, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma ; ZVBB-NEXT: vle16.v v9, (a4) -; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v9, v8, a1 -; ZVBB-NEXT: vsetvli a4, zero, e16, mf2, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma ; ZVBB-NEXT: vle16.v v12, (a3) ; ZVBB-NEXT: vle16.v v8, (a0) -; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v8, v12, a1 ; ZVBB-NEXT: csrr a0, vlenb ; ZVBB-NEXT: slli a0, a0, 2 @@ -14243,23 +14174,22 @@ define <vscale x 16 x bfloat> @vector_interleave_nxv16bf16_nxv2bf16(<vscale x 2 ; CHECK-NEXT: vle16.v v11, (t0) ; CHECK-NEXT: vle16.v v8, (a2) ; CHECK-NEXT: srli a1, a1, 2 -; CHECK-NEXT: add a2, a1, a1 ; CHECK-NEXT: vle16.v v9, (a7) -; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v11, v8, a1 -; CHECK-NEXT: vsetvli a7, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v10, (a6) ; CHECK-NEXT: vle16.v v8, (a5) -; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v10, v9, a1 -; CHECK-NEXT: vsetvli a5, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v9, (a4) -; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v9, v8, a1 -; CHECK-NEXT: vsetvli a4, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v12, (a3) ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v12, a1 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 2 @@ -14288,23 +14218,22 @@ define <vscale x 16 x bfloat> @vector_interleave_nxv16bf16_nxv2bf16(<vscale x 2 ; ZVBB-NEXT: vle16.v v11, (t0) ; ZVBB-NEXT: vle16.v v8, (a2) ; ZVBB-NEXT: srli a1, a1, 2 -; ZVBB-NEXT: add a2, a1, a1 ; ZVBB-NEXT: vle16.v v9, (a7) -; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v11, v8, a1 -; ZVBB-NEXT: vsetvli a7, zero, e16, mf2, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma ; ZVBB-NEXT: vle16.v v10, (a6) ; ZVBB-NEXT: vle16.v v8, (a5) -; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v10, v9, a1 -; ZVBB-NEXT: vsetvli a5, zero, e16, mf2, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma ; ZVBB-NEXT: vle16.v v9, (a4) -; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v9, v8, a1 -; ZVBB-NEXT: vsetvli a4, zero, e16, mf2, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma ; ZVBB-NEXT: vle16.v v12, (a3) ; ZVBB-NEXT: vle16.v v8, (a0) -; ZVBB-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v8, v12, a1 ; ZVBB-NEXT: csrr a0, vlenb ; ZVBB-NEXT: slli a0, a0, 2 @@ -14541,23 +14470,22 @@ define <vscale x 8 x float> @vector_interleave_nxv8f32_nxv1f32(<vscale x 1 x flo ; CHECK-NEXT: vle32.v v11, (t0) ; CHECK-NEXT: vle32.v v8, (a2) ; CHECK-NEXT: srli a1, a1, 3 -; CHECK-NEXT: add a2, a1, a1 ; CHECK-NEXT: vle32.v v9, (a7) -; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v11, v8, a1 -; CHECK-NEXT: vsetvli a7, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v10, (a6) ; CHECK-NEXT: vle32.v v8, (a5) -; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v10, v9, a1 -; CHECK-NEXT: vsetvli a5, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v9, (a4) -; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v9, v8, a1 -; CHECK-NEXT: vsetvli a4, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v12, (a3) ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v12, a1 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 2 @@ -14586,23 +14514,22 @@ define <vscale x 8 x float> @vector_interleave_nxv8f32_nxv1f32(<vscale x 1 x flo ; ZVBB-NEXT: vle32.v v11, (t0) ; ZVBB-NEXT: vle32.v v8, (a2) ; ZVBB-NEXT: srli a1, a1, 3 -; ZVBB-NEXT: add a2, a1, a1 ; ZVBB-NEXT: vle32.v v9, (a7) -; ZVBB-NEXT: vsetvli zero, a2, e32, m1, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e32, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v11, v8, a1 -; ZVBB-NEXT: vsetvli a7, zero, e32, mf2, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e32, mf2, ta, ma ; ZVBB-NEXT: vle32.v v10, (a6) ; ZVBB-NEXT: vle32.v v8, (a5) -; ZVBB-NEXT: vsetvli zero, a2, e32, m1, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e32, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v10, v9, a1 -; ZVBB-NEXT: vsetvli a5, zero, e32, mf2, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e32, mf2, ta, ma ; ZVBB-NEXT: vle32.v v9, (a4) -; ZVBB-NEXT: vsetvli zero, a2, e32, m1, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e32, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v9, v8, a1 -; ZVBB-NEXT: vsetvli a4, zero, e32, mf2, ta, ma +; ZVBB-NEXT: vsetvli a2, zero, e32, mf2, ta, ma ; ZVBB-NEXT: vle32.v v12, (a3) ; ZVBB-NEXT: vle32.v v8, (a0) -; ZVBB-NEXT: vsetvli zero, a2, e32, m1, ta, ma +; ZVBB-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; ZVBB-NEXT: vslideup.vx v8, v12, a1 ; ZVBB-NEXT: csrr a0, vlenb ; ZVBB-NEXT: slli a0, a0, 2 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll index df7af4d8..111fa36 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll @@ -634,12 +634,11 @@ define <vscale x 32 x i1> @vfptosi_nxv32bf16_nxv32i1(<vscale x 32 x bfloat> %va) ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v16 ; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v24 -; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vand.vi v12, v12, 1 ; CHECK-NEXT: vmsne.vi v16, v8, 0 ; CHECK-NEXT: vmsne.vi v0, v12, 0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vx v0, v16, a0 ; CHECK-NEXT: ret %evec = fptosi <vscale x 32 x bfloat> %va to <vscale x 32 x i1> @@ -656,12 +655,11 @@ define <vscale x 32 x i1> @vfptoui_nxv32bf16_nxv32i1(<vscale x 32 x bfloat> %va) ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v16 ; CHECK-NEXT: vfncvt.rtz.xu.f.w v12, v24 -; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vand.vi v12, v12, 1 ; CHECK-NEXT: vmsne.vi v16, v8, 0 ; CHECK-NEXT: vmsne.vi v0, v12, 0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vx v0, v16, a0 ; CHECK-NEXT: ret %evec = fptoui <vscale x 32 x bfloat> %va to <vscale x 32 x i1> @@ -1654,12 +1652,11 @@ define <vscale x 32 x i1> @vfptosi_nxv32f16_nxv32i1(<vscale x 32 x half> %va) { ; ZVFHMIN-NEXT: srli a0, a0, 2 ; ZVFHMIN-NEXT: vfncvt.rtz.x.f.w v8, v16 ; ZVFHMIN-NEXT: vfncvt.rtz.x.f.w v12, v24 -; ZVFHMIN-NEXT: add a1, a0, a0 ; ZVFHMIN-NEXT: vand.vi v8, v8, 1 ; ZVFHMIN-NEXT: vand.vi v12, v12, 1 ; ZVFHMIN-NEXT: vmsne.vi v16, v8, 0 ; ZVFHMIN-NEXT: vmsne.vi v0, v12, 0 -; ZVFHMIN-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslideup.vx v0, v16, a0 ; ZVFHMIN-NEXT: ret %evec = fptosi <vscale x 32 x half> %va to <vscale x 32 x i1> @@ -1684,12 +1681,11 @@ define <vscale x 32 x i1> @vfptoui_nxv32f16_nxv32i1(<vscale x 32 x half> %va) { ; ZVFHMIN-NEXT: srli a0, a0, 2 ; ZVFHMIN-NEXT: vfncvt.rtz.xu.f.w v8, v16 ; ZVFHMIN-NEXT: vfncvt.rtz.xu.f.w v12, v24 -; ZVFHMIN-NEXT: add a1, a0, a0 ; ZVFHMIN-NEXT: vand.vi v8, v8, 1 ; ZVFHMIN-NEXT: vand.vi v12, v12, 1 ; ZVFHMIN-NEXT: vmsne.vi v16, v8, 0 ; ZVFHMIN-NEXT: vmsne.vi v0, v12, 0 -; ZVFHMIN-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslideup.vx v0, v16, a0 ; ZVFHMIN-NEXT: ret %evec = fptoui <vscale x 32 x half> %va to <vscale x 32 x i1> diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll index 142ee52..1868154 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll @@ -567,38 +567,37 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract(<vscale x 2 x i1> % ; RV32: # %bb.0: ; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; RV32-NEXT: vmv1r.v v8, v0 -; RV32-NEXT: slli a2, a1, 1 ; RV32-NEXT: vmv.v.i v9, 0 -; RV32-NEXT: li a1, -1 +; RV32-NEXT: li a2, -1 ; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; RV32-NEXT: vmv.v.i v10, 0 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV32-NEXT: vmerge.vim v11, v9, 1, v0 -; RV32-NEXT: srli a3, a3, 2 ; RV32-NEXT: vwaddu.vv v12, v11, v11 -; RV32-NEXT: vwmaccu.vx v12, a1, v11 +; RV32-NEXT: vwmaccu.vx v12, a2, v11 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: srli a2, a2, 2 ; RV32-NEXT: vmsne.vi v0, v12, 0 -; RV32-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; RV32-NEXT: vslidedown.vx v11, v12, a3 +; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; RV32-NEXT: vslidedown.vx v11, v12, a2 ; RV32-NEXT: vmerge.vim v10, v10, 1, v0 -; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV32-NEXT: vmsne.vi v0, v11, 0 -; RV32-NEXT: add a1, a3, a3 +; RV32-NEXT: slli a3, a1, 1 ; RV32-NEXT: vmerge.vim v9, v9, 1, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; RV32-NEXT: vslideup.vx v10, v9, a3 -; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma +; RV32-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; RV32-NEXT: vslideup.vx v10, v9, a2 +; RV32-NEXT: vsetvli zero, a3, e8, mf2, ta, ma ; RV32-NEXT: vmsne.vi v0, v10, 0 ; RV32-NEXT: vle32.v v10, (a0), v0.t ; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma ; RV32-NEXT: vnsrl.wx v13, v10, a1 ; RV32-NEXT: vmv.x.s a1, v10 ; RV32-NEXT: vnsrl.wi v12, v10, 0 -; RV32-NEXT: srli a2, a2, 1 +; RV32-NEXT: srli a3, a3, 1 ; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vsetvli zero, a2, e32, m1, ta, ma +; RV32-NEXT: vsetvli zero, a3, e32, m1, ta, ma ; RV32-NEXT: vsseg2e32.v v12, (a0), v0.t ; RV32-NEXT: mv a0, a1 ; RV32-NEXT: ret @@ -611,26 +610,24 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract(<vscale x 2 x i1> % ; RV64-NEXT: li a2, -1 ; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; RV64-NEXT: vmv.v.i v10, 0 -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a4, a1, 33 -; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV64-NEXT: vmerge.vim v11, v9, 1, v0 -; RV64-NEXT: srli a3, a3, 2 ; RV64-NEXT: vwaddu.vv v12, v11, v11 ; RV64-NEXT: vwmaccu.vx v12, a2, v11 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: srli a2, a2, 2 ; RV64-NEXT: vmsne.vi v0, v12, 0 -; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; RV64-NEXT: vslidedown.vx v11, v12, a3 +; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; RV64-NEXT: vslidedown.vx v11, v12, a2 ; RV64-NEXT: vmerge.vim v10, v10, 1, v0 -; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV64-NEXT: vmsne.vi v0, v11, 0 -; RV64-NEXT: add a1, a3, a3 +; RV64-NEXT: slli a3, a1, 33 ; RV64-NEXT: vmerge.vim v9, v9, 1, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; RV64-NEXT: vslideup.vx v10, v9, a3 ; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; RV64-NEXT: vslideup.vx v10, v9, a2 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: srli a1, a4, 32 +; RV64-NEXT: srli a1, a3, 32 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; RV64-NEXT: vle32.v v10, (a0), v0.t ; RV64-NEXT: li a1, 32 @@ -638,9 +635,9 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract(<vscale x 2 x i1> % ; RV64-NEXT: vnsrl.wx v13, v10, a1 ; RV64-NEXT: vmv.x.s a1, v10 ; RV64-NEXT: vnsrl.wi v12, v10, 0 -; RV64-NEXT: srli a4, a4, 33 +; RV64-NEXT: srli a3, a3, 33 ; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vsetvli zero, a4, e32, m1, ta, ma +; RV64-NEXT: vsetvli zero, a3, e32, m1, ta, ma ; RV64-NEXT: vsseg2e32.v v12, (a0), v0.t ; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret @@ -807,10 +804,7 @@ define void @not_balanced_store_tree(<vscale x 1 x i32> %v0, <vscale x 2 x i32> ; RV32-NEXT: srli a3, a3, 3 ; RV32-NEXT: vsetvli a4, zero, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vx v8, v12, a3 -; RV32-NEXT: add a4, a3, a3 -; RV32-NEXT: vsetvli zero, a4, e32, m1, ta, ma ; RV32-NEXT: vslideup.vx v12, v8, a3 -; RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma ; RV32-NEXT: vwaddu.vv v16, v12, v9 ; RV32-NEXT: vwmaccu.vx v16, a2, v9 ; RV32-NEXT: vsetvli a3, zero, e32, m2, ta, ma @@ -831,10 +825,7 @@ define void @not_balanced_store_tree(<vscale x 1 x i32> %v0, <vscale x 2 x i32> ; RV64-NEXT: srli a3, a3, 3 ; RV64-NEXT: vsetvli a4, zero, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vx v8, v12, a3 -; RV64-NEXT: add a4, a3, a3 -; RV64-NEXT: vsetvli zero, a4, e32, m1, ta, ma ; RV64-NEXT: vslideup.vx v12, v8, a3 -; RV64-NEXT: vsetvli a3, zero, e32, m1, ta, ma ; RV64-NEXT: vwaddu.vv v16, v12, v9 ; RV64-NEXT: vwmaccu.vx v16, a2, v9 ; RV64-NEXT: vsetvli a3, zero, e32, m2, ta, ma @@ -858,29 +849,28 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>} @not_same_mask(<vscale x 2 x i1> ; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; RV32-NEXT: vmv1r.v v9, v0 ; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: slli a1, a1, 1 ; RV32-NEXT: vmv.v.i v8, 0 ; RV32-NEXT: li a2, -1 ; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; RV32-NEXT: vmv.v.i v10, 0 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV32-NEXT: vmerge.vim v11, v8, 1, v0 ; RV32-NEXT: vmv1r.v v0, v9 ; RV32-NEXT: vmerge.vim v9, v8, 1, v0 -; RV32-NEXT: srli a3, a3, 2 ; RV32-NEXT: vwaddu.vv v12, v9, v11 ; RV32-NEXT: vwmaccu.vx v12, a2, v11 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: srli a2, a2, 2 ; RV32-NEXT: vmsne.vi v0, v12, 0 -; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma -; RV32-NEXT: vslidedown.vx v9, v12, a3 +; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; RV32-NEXT: vslidedown.vx v9, v12, a2 ; RV32-NEXT: vmerge.vim v10, v10, 1, v0 -; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV32-NEXT: vmsne.vi v0, v9, 0 -; RV32-NEXT: add a2, a3, a3 +; RV32-NEXT: slli a1, a1, 1 ; RV32-NEXT: vmerge.vim v8, v8, 1, v0 -; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma -; RV32-NEXT: vslideup.vx v10, v8, a3 +; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; RV32-NEXT: vslideup.vx v10, v8, a2 ; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; RV32-NEXT: vmsne.vi v0, v10, 0 ; RV32-NEXT: vle32.v v10, (a0), v0.t @@ -899,26 +889,24 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>} @not_same_mask(<vscale x 2 x i1> ; RV64-NEXT: li a2, -1 ; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; RV64-NEXT: vmv.v.i v10, 0 -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a1, a1, 33 -; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV64-NEXT: vmerge.vim v11, v8, 1, v0 ; RV64-NEXT: vmv1r.v v0, v9 ; RV64-NEXT: vmerge.vim v9, v8, 1, v0 -; RV64-NEXT: srli a3, a3, 2 ; RV64-NEXT: vwaddu.vv v12, v9, v11 ; RV64-NEXT: vwmaccu.vx v12, a2, v11 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: srli a2, a2, 2 ; RV64-NEXT: vmsne.vi v0, v12, 0 -; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma -; RV64-NEXT: vslidedown.vx v9, v12, a3 +; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; RV64-NEXT: vslidedown.vx v9, v12, a2 ; RV64-NEXT: vmerge.vim v10, v10, 1, v0 -; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV64-NEXT: vmsne.vi v0, v9, 0 -; RV64-NEXT: add a2, a3, a3 +; RV64-NEXT: slli a1, a1, 33 ; RV64-NEXT: vmerge.vim v8, v8, 1, v0 -; RV64-NEXT: vsetvli zero, a2, e8, mf2, ta, ma -; RV64-NEXT: vslideup.vx v10, v8, a3 -; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; RV64-NEXT: vslideup.vx v10, v8, a2 ; RV64-NEXT: vmsne.vi v0, v10, 0 ; RV64-NEXT: srli a1, a1, 32 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma |