diff options
36 files changed, 1107 insertions, 1166 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp b/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp index 5e6b789..7de48d8 100644 --- a/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp +++ b/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp @@ -72,7 +72,9 @@ bool RISCVDeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) { // are reserved for HINT instructions. const MCInstrDesc &Desc = MI.getDesc(); if (!Desc.mayLoad() && !Desc.mayStore() && - !Desc.hasUnmodeledSideEffects()) + !Desc.hasUnmodeledSideEffects() && + MI.getOpcode() != RISCV::PseudoVSETVLI && + MI.getOpcode() != RISCV::PseudoVSETIVLI) continue; // For PseudoVSETVLIX0, Rd = X0 has special meaning. if (MI.getOpcode() == RISCV::PseudoVSETVLIX0) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 4adc26f..317a6d7 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -6181,7 +6181,7 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 0, //===----------------------------------------------------------------------===// // Pseudos. -let hasSideEffects = 1, mayLoad = 0, mayStore = 0, Defs = [VL, VTYPE] in { +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Defs = [VL, VTYPE] in { // Due to rs1=X0 having special meaning, we need a GPRNoX0 register class for // the when we aren't using one of the special X0 encodings. Otherwise it could // be accidentally be made X0 by MachineIR optimizations. To satisfy the diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll index 187f758..0a7fa38 100644 --- a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll @@ -236,11 +236,12 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_param_nxv32i32_nxv32i32_nxv32i32 ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -248,29 +249,40 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_param_nxv32i32_nxv32i32_nxv32i32 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a3, a2, a1 -; CHECK-NEXT: vl8re32.v v8, (a3) -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: add a1, a0, a1 -; CHECK-NEXT: vl8re32.v v0, (a0) ; CHECK-NEXT: vl8re32.v v8, (a1) -; CHECK-NEXT: vl8re32.v v16, (a2) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vl8re32.v v0, (a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vl8re32.v v8, (a3) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vl8re32.v v16, (a2) ; CHECK-NEXT: vadd.vv v0, v24, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vadd.vv v8, v24, v8 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vadd.vv v24, v24, v8 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vadd.vv v8, v8, v24 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vadd.vv v8, v24, v8 ; CHECK-NEXT: vadd.vv v24, v0, v16 ; CHECK-NEXT: vadd.vx v16, v8, a4 ; CHECK-NEXT: vadd.vx v8, v24, a4 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll index 647d315..fa62143 100644 --- a/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll @@ -39,11 +39,11 @@ define <vscale x 32 x i32> @caller_scalable_vector_split_indirect(<vscale x 32 x ; RV32-NEXT: vs8r.v v8, (a0) ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: vs8r.v v16, (a0) +; RV32-NEXT: add a1, a0, a1 ; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v8, 0 ; RV32-NEXT: addi a0, sp, 128 +; RV32-NEXT: vs8r.v v16, (a1) ; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: call callee_scalable_vector_split_indirect ; RV32-NEXT: addi sp, s0, -144 @@ -70,11 +70,11 @@ define <vscale x 32 x i32> @caller_scalable_vector_split_indirect(<vscale x 32 x ; RV64-NEXT: vs8r.v v8, (a0) ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vs8r.v v16, (a0) +; RV64-NEXT: add a1, a0, a1 ; RV64-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; RV64-NEXT: vmv.v.i v8, 0 ; RV64-NEXT: addi a0, sp, 128 +; RV64-NEXT: vs8r.v v16, (a1) ; RV64-NEXT: vmv.v.i v16, 0 ; RV64-NEXT: call callee_scalable_vector_split_indirect ; RV64-NEXT: addi sp, s0, -144 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll index 1b50214..9e9a8b8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll @@ -19,9 +19,9 @@ define <2 x half> @nearbyint_v2f16(<2 x half> %v) strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <2 x half> @llvm.experimental.constrained.nearbyint.v2f16(<2 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <2 x half> %r @@ -42,9 +42,9 @@ define <4 x half> @nearbyint_v4f16(<4 x half> %v) strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <4 x half> @llvm.experimental.constrained.nearbyint.v4f16(<4 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <4 x half> %r @@ -65,9 +65,9 @@ define <8 x half> @nearbyint_v8f16(<8 x half> %v) strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <8 x half> @llvm.experimental.constrained.nearbyint.v8f16(<8 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <8 x half> %r @@ -88,9 +88,9 @@ define <16 x half> @nearbyint_v16f16(<16 x half> %v) strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <16 x half> @llvm.experimental.constrained.nearbyint.v16f16(<16 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <16 x half> %r @@ -112,9 +112,9 @@ define <32 x half> @nearbyint_v32f16(<32 x half> %v) strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <32 x half> @llvm.experimental.constrained.nearbyint.v32f16(<32 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <32 x half> %r @@ -135,9 +135,9 @@ define <2 x float> @nearbyint_v2f32(<2 x float> %v) strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <2 x float> @llvm.experimental.constrained.nearbyint.v2f32(<2 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <2 x float> %r @@ -158,9 +158,9 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %v) strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(<4 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <4 x float> %r @@ -181,9 +181,9 @@ define <8 x float> @nearbyint_v8f32(<8 x float> %v) strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <8 x float> @llvm.experimental.constrained.nearbyint.v8f32(<8 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <8 x float> %r @@ -204,9 +204,9 @@ define <16 x float> @nearbyint_v16f32(<16 x float> %v) strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <16 x float> @llvm.experimental.constrained.nearbyint.v16f32(<16 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <16 x float> %r @@ -227,9 +227,9 @@ define <2 x double> @nearbyint_v2f64(<2 x double> %v) strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <2 x double> %r @@ -250,9 +250,9 @@ define <4 x double> @nearbyint_v4f64(<4 x double> %v) strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <4 x double> %r @@ -273,9 +273,9 @@ define <8 x double> @nearbyint_v8f64(<8 x double> %v) strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <8 x double> @llvm.experimental.constrained.nearbyint.v8f64(<8 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <8 x double> %r diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll index a8e4af2..6320b07 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll @@ -359,13 +359,13 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) { ; RV32-NEXT: feq.d a0, fa3, fa3 ; RV32-NEXT: fmax.d fa3, fa3, fa5 ; RV32-NEXT: fmin.d fa3, fa3, fa4 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; RV32-NEXT: fld fa2, 40(sp) ; RV32-NEXT: fcvt.w.d a2, fa3, rtz +; RV32-NEXT: fld fa3, 40(sp) ; RV32-NEXT: neg a0, a0 ; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: feq.d a2, fa2, fa2 -; RV32-NEXT: fmax.d fa3, fa2, fa5 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV32-NEXT: feq.d a2, fa3, fa3 +; RV32-NEXT: fmax.d fa3, fa3, fa5 ; RV32-NEXT: fmin.d fa3, fa3, fa4 ; RV32-NEXT: fcvt.w.d a3, fa3, rtz ; RV32-NEXT: fld fa3, 32(sp) @@ -460,13 +460,13 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) { ; RV64-NEXT: feq.d a0, fa3, fa3 ; RV64-NEXT: fmax.d fa3, fa3, fa5 ; RV64-NEXT: fmin.d fa3, fa3, fa4 -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; RV64-NEXT: fld fa2, 40(sp) ; RV64-NEXT: fcvt.l.d a2, fa3, rtz +; RV64-NEXT: fld fa3, 40(sp) ; RV64-NEXT: neg a0, a0 ; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: feq.d a2, fa2, fa2 -; RV64-NEXT: fmax.d fa3, fa2, fa5 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV64-NEXT: feq.d a2, fa3, fa3 +; RV64-NEXT: fmax.d fa3, fa3, fa5 ; RV64-NEXT: fmin.d fa3, fa3, fa4 ; RV64-NEXT: fcvt.l.d a3, fa3, rtz ; RV64-NEXT: fld fa3, 32(sp) @@ -557,7 +557,6 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) { ; RV32-NEXT: vslidedown.vi v8, v8, 3 ; RV32-NEXT: vfmv.f.s fa4, v8 ; RV32-NEXT: fmax.d fa4, fa4, fa3 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; RV32-NEXT: fld fa2, 40(sp) ; RV32-NEXT: fmin.d fa4, fa4, fa5 ; RV32-NEXT: fcvt.wu.d a0, fa4, rtz @@ -566,9 +565,10 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) { ; RV32-NEXT: fmin.d fa2, fa2, fa5 ; RV32-NEXT: fcvt.wu.d a2, fa2, rtz ; RV32-NEXT: fmax.d fa4, fa4, fa3 -; RV32-NEXT: fld fa2, 48(sp) ; RV32-NEXT: fmin.d fa4, fa4, fa5 +; RV32-NEXT: fld fa2, 48(sp) ; RV32-NEXT: fcvt.wu.d a3, fa4, rtz +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; RV32-NEXT: vslide1down.vx v8, v10, a0 ; RV32-NEXT: fmax.d fa4, fa2, fa3 ; RV32-NEXT: fmin.d fa4, fa4, fa5 @@ -633,7 +633,6 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) { ; RV64-NEXT: vslidedown.vi v8, v8, 3 ; RV64-NEXT: vfmv.f.s fa4, v8 ; RV64-NEXT: fmax.d fa4, fa4, fa3 -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; RV64-NEXT: fld fa2, 40(sp) ; RV64-NEXT: fmin.d fa4, fa4, fa5 ; RV64-NEXT: fcvt.lu.d a0, fa4, rtz @@ -642,9 +641,10 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) { ; RV64-NEXT: fmin.d fa2, fa2, fa5 ; RV64-NEXT: fcvt.lu.d a2, fa2, rtz ; RV64-NEXT: fmax.d fa4, fa4, fa3 -; RV64-NEXT: fld fa2, 48(sp) ; RV64-NEXT: fmin.d fa4, fa4, fa5 +; RV64-NEXT: fld fa2, 48(sp) ; RV64-NEXT: fcvt.lu.d a3, fa4, rtz +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; RV64-NEXT: vslide1down.vx v8, v10, a0 ; RV64-NEXT: fmax.d fa4, fa2, fa3 ; RV64-NEXT: fmin.d fa4, fa4, fa5 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll index 6ffa6ac..9c76b83 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll @@ -132,12 +132,12 @@ define <3 x float> @si2fp_v3i1_v3f32(<3 x i1> %x) { define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) { ; ZVFH32-LABEL: si2fp_v3i7_v3f32: ; ZVFH32: # %bb.0: -; ZVFH32-NEXT: lw a1, 4(a0) -; ZVFH32-NEXT: lw a2, 0(a0) -; ZVFH32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFH32-NEXT: lw a1, 0(a0) +; ZVFH32-NEXT: lw a2, 4(a0) ; ZVFH32-NEXT: lw a0, 8(a0) -; ZVFH32-NEXT: vmv.v.x v8, a2 -; ZVFH32-NEXT: vslide1down.vx v8, v8, a1 +; ZVFH32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFH32-NEXT: vmv.v.x v8, a1 +; ZVFH32-NEXT: vslide1down.vx v8, v8, a2 ; ZVFH32-NEXT: vslide1down.vx v8, v8, a0 ; ZVFH32-NEXT: vslidedown.vi v8, v8, 1 ; ZVFH32-NEXT: vadd.vv v8, v8, v8 @@ -149,12 +149,12 @@ define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) { ; ; ZVFH64-LABEL: si2fp_v3i7_v3f32: ; ZVFH64: # %bb.0: -; ZVFH64-NEXT: ld a1, 8(a0) -; ZVFH64-NEXT: ld a2, 0(a0) -; ZVFH64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFH64-NEXT: ld a1, 0(a0) +; ZVFH64-NEXT: ld a2, 8(a0) ; ZVFH64-NEXT: ld a0, 16(a0) -; ZVFH64-NEXT: vmv.v.x v8, a2 -; ZVFH64-NEXT: vslide1down.vx v8, v8, a1 +; ZVFH64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFH64-NEXT: vmv.v.x v8, a1 +; ZVFH64-NEXT: vslide1down.vx v8, v8, a2 ; ZVFH64-NEXT: vslide1down.vx v8, v8, a0 ; ZVFH64-NEXT: vslidedown.vi v8, v8, 1 ; ZVFH64-NEXT: vadd.vv v8, v8, v8 @@ -166,12 +166,12 @@ define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) { ; ; ZVFHMIN32-LABEL: si2fp_v3i7_v3f32: ; ZVFHMIN32: # %bb.0: -; ZVFHMIN32-NEXT: lw a1, 4(a0) -; ZVFHMIN32-NEXT: lw a2, 0(a0) -; ZVFHMIN32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFHMIN32-NEXT: lw a1, 0(a0) +; ZVFHMIN32-NEXT: lw a2, 4(a0) ; ZVFHMIN32-NEXT: lw a0, 8(a0) -; ZVFHMIN32-NEXT: vmv.v.x v8, a2 -; ZVFHMIN32-NEXT: vslide1down.vx v8, v8, a1 +; ZVFHMIN32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFHMIN32-NEXT: vmv.v.x v8, a1 +; ZVFHMIN32-NEXT: vslide1down.vx v8, v8, a2 ; ZVFHMIN32-NEXT: vslide1down.vx v8, v8, a0 ; ZVFHMIN32-NEXT: vslidedown.vi v8, v8, 1 ; ZVFHMIN32-NEXT: vadd.vv v8, v8, v8 @@ -183,12 +183,12 @@ define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) { ; ; ZVFHMIN64-LABEL: si2fp_v3i7_v3f32: ; ZVFHMIN64: # %bb.0: -; ZVFHMIN64-NEXT: ld a1, 8(a0) -; ZVFHMIN64-NEXT: ld a2, 0(a0) -; ZVFHMIN64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFHMIN64-NEXT: ld a1, 0(a0) +; ZVFHMIN64-NEXT: ld a2, 8(a0) ; ZVFHMIN64-NEXT: ld a0, 16(a0) -; ZVFHMIN64-NEXT: vmv.v.x v8, a2 -; ZVFHMIN64-NEXT: vslide1down.vx v8, v8, a1 +; ZVFHMIN64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFHMIN64-NEXT: vmv.v.x v8, a1 +; ZVFHMIN64-NEXT: vslide1down.vx v8, v8, a2 ; ZVFHMIN64-NEXT: vslide1down.vx v8, v8, a0 ; ZVFHMIN64-NEXT: vslidedown.vi v8, v8, 1 ; ZVFHMIN64-NEXT: vadd.vv v8, v8, v8 @@ -205,12 +205,12 @@ define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) { define <3 x float> @ui2fp_v3i7_v3f32(<3 x i7> %x) { ; ZVFH32-LABEL: ui2fp_v3i7_v3f32: ; ZVFH32: # %bb.0: -; ZVFH32-NEXT: lw a1, 4(a0) -; ZVFH32-NEXT: lw a2, 0(a0) -; ZVFH32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFH32-NEXT: lw a1, 0(a0) +; ZVFH32-NEXT: lw a2, 4(a0) ; ZVFH32-NEXT: lw a0, 8(a0) -; ZVFH32-NEXT: vmv.v.x v8, a2 -; ZVFH32-NEXT: vslide1down.vx v8, v8, a1 +; ZVFH32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFH32-NEXT: vmv.v.x v8, a1 +; ZVFH32-NEXT: vslide1down.vx v8, v8, a2 ; ZVFH32-NEXT: vslide1down.vx v8, v8, a0 ; ZVFH32-NEXT: vslidedown.vi v8, v8, 1 ; ZVFH32-NEXT: li a0, 127 @@ -222,12 +222,12 @@ define <3 x float> @ui2fp_v3i7_v3f32(<3 x i7> %x) { ; ; ZVFH64-LABEL: ui2fp_v3i7_v3f32: ; ZVFH64: # %bb.0: -; ZVFH64-NEXT: ld a1, 8(a0) -; ZVFH64-NEXT: ld a2, 0(a0) -; ZVFH64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFH64-NEXT: ld a1, 0(a0) +; ZVFH64-NEXT: ld a2, 8(a0) ; ZVFH64-NEXT: ld a0, 16(a0) -; ZVFH64-NEXT: vmv.v.x v8, a2 -; ZVFH64-NEXT: vslide1down.vx v8, v8, a1 +; ZVFH64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFH64-NEXT: vmv.v.x v8, a1 +; ZVFH64-NEXT: vslide1down.vx v8, v8, a2 ; ZVFH64-NEXT: vslide1down.vx v8, v8, a0 ; ZVFH64-NEXT: vslidedown.vi v8, v8, 1 ; ZVFH64-NEXT: li a0, 127 @@ -239,12 +239,12 @@ define <3 x float> @ui2fp_v3i7_v3f32(<3 x i7> %x) { ; ; ZVFHMIN32-LABEL: ui2fp_v3i7_v3f32: ; ZVFHMIN32: # %bb.0: -; ZVFHMIN32-NEXT: lw a1, 4(a0) -; ZVFHMIN32-NEXT: lw a2, 0(a0) -; ZVFHMIN32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFHMIN32-NEXT: lw a1, 0(a0) +; ZVFHMIN32-NEXT: lw a2, 4(a0) ; ZVFHMIN32-NEXT: lw a0, 8(a0) -; ZVFHMIN32-NEXT: vmv.v.x v8, a2 -; ZVFHMIN32-NEXT: vslide1down.vx v8, v8, a1 +; ZVFHMIN32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFHMIN32-NEXT: vmv.v.x v8, a1 +; ZVFHMIN32-NEXT: vslide1down.vx v8, v8, a2 ; ZVFHMIN32-NEXT: vslide1down.vx v8, v8, a0 ; ZVFHMIN32-NEXT: vslidedown.vi v8, v8, 1 ; ZVFHMIN32-NEXT: li a0, 127 @@ -256,12 +256,12 @@ define <3 x float> @ui2fp_v3i7_v3f32(<3 x i7> %x) { ; ; ZVFHMIN64-LABEL: ui2fp_v3i7_v3f32: ; ZVFHMIN64: # %bb.0: -; ZVFHMIN64-NEXT: ld a1, 8(a0) -; ZVFHMIN64-NEXT: ld a2, 0(a0) -; ZVFHMIN64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFHMIN64-NEXT: ld a1, 0(a0) +; ZVFHMIN64-NEXT: ld a2, 8(a0) ; ZVFHMIN64-NEXT: ld a0, 16(a0) -; ZVFHMIN64-NEXT: vmv.v.x v8, a2 -; ZVFHMIN64-NEXT: vslide1down.vx v8, v8, a1 +; ZVFHMIN64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFHMIN64-NEXT: vmv.v.x v8, a1 +; ZVFHMIN64-NEXT: vslide1down.vx v8, v8, a2 ; ZVFHMIN64-NEXT: vslide1down.vx v8, v8, a0 ; ZVFHMIN64-NEXT: vslidedown.vi v8, v8, 1 ; ZVFHMIN64-NEXT: li a0, 127 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll index 592ce6f..4f4f0a09 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -1183,38 +1183,38 @@ define <8 x i64> @v8xi64_exact_undef_prefix(i64 %a, i64 %b, i64 %c, i64 %d) vsca define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) { ; CHECK-LABEL: buildvec_v16i8_loads_contigous: ; CHECK: # %bb.0: -; CHECK-NEXT: lbu a1, 1(a0) -; CHECK-NEXT: lbu a2, 2(a0) -; CHECK-NEXT: lbu a3, 3(a0) -; CHECK-NEXT: lbu a4, 4(a0) -; CHECK-NEXT: lbu a5, 5(a0) -; CHECK-NEXT: lbu a6, 6(a0) -; CHECK-NEXT: lbu a7, 7(a0) -; CHECK-NEXT: lbu t0, 9(a0) -; CHECK-NEXT: lbu t1, 10(a0) -; CHECK-NEXT: lbu t2, 11(a0) -; CHECK-NEXT: lbu t3, 12(a0) -; CHECK-NEXT: lbu t4, 13(a0) -; CHECK-NEXT: lbu t5, 14(a0) -; CHECK-NEXT: lbu t6, 15(a0) +; CHECK-NEXT: addi a1, a0, 8 +; CHECK-NEXT: lbu a2, 1(a0) +; CHECK-NEXT: lbu a3, 2(a0) +; CHECK-NEXT: lbu a4, 3(a0) +; CHECK-NEXT: lbu a5, 4(a0) +; CHECK-NEXT: lbu a6, 5(a0) +; CHECK-NEXT: lbu a7, 6(a0) +; CHECK-NEXT: lbu t0, 7(a0) +; CHECK-NEXT: lbu t1, 9(a0) +; CHECK-NEXT: lbu t2, 10(a0) +; CHECK-NEXT: lbu t3, 11(a0) +; CHECK-NEXT: lbu t4, 12(a0) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vlse8.v v8, (a0), zero -; CHECK-NEXT: addi a0, a0, 8 -; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: lbu t5, 13(a0) +; CHECK-NEXT: lbu t6, 14(a0) +; CHECK-NEXT: lbu a0, 15(a0) ; CHECK-NEXT: vslide1down.vx v8, v8, a2 ; CHECK-NEXT: vslide1down.vx v8, v8, a3 ; CHECK-NEXT: vslide1down.vx v8, v8, a4 -; CHECK-NEXT: vlse8.v v9, (a0), zero ; CHECK-NEXT: vslide1down.vx v8, v8, a5 +; CHECK-NEXT: vlse8.v v9, (a1), zero ; CHECK-NEXT: vslide1down.vx v8, v8, a6 -; CHECK-NEXT: vslide1down.vx v10, v8, a7 -; CHECK-NEXT: vslide1down.vx v8, v9, t0 -; CHECK-NEXT: vslide1down.vx v8, v8, t1 +; CHECK-NEXT: vslide1down.vx v8, v8, a7 +; CHECK-NEXT: vslide1down.vx v10, v8, t0 +; CHECK-NEXT: vslide1down.vx v8, v9, t1 ; CHECK-NEXT: vslide1down.vx v8, v8, t2 ; CHECK-NEXT: vslide1down.vx v8, v8, t3 ; CHECK-NEXT: vslide1down.vx v8, v8, t4 ; CHECK-NEXT: vslide1down.vx v8, v8, t5 ; CHECK-NEXT: vslide1down.vx v8, v8, t6 +; CHECK-NEXT: vslide1down.vx v8, v8, a0 ; CHECK-NEXT: li a0, 255 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 @@ -1277,38 +1277,38 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) { define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) { ; CHECK-LABEL: buildvec_v16i8_loads_gather: ; CHECK: # %bb.0: -; CHECK-NEXT: lbu a1, 1(a0) -; CHECK-NEXT: lbu a2, 22(a0) -; CHECK-NEXT: lbu a3, 31(a0) -; CHECK-NEXT: lbu a4, 44(a0) -; CHECK-NEXT: lbu a5, 55(a0) -; CHECK-NEXT: lbu a6, 623(a0) -; CHECK-NEXT: lbu a7, 75(a0) -; CHECK-NEXT: lbu t0, 93(a0) -; CHECK-NEXT: lbu t1, 105(a0) -; CHECK-NEXT: lbu t2, 161(a0) -; CHECK-NEXT: lbu t3, 124(a0) -; CHECK-NEXT: lbu t4, 163(a0) -; CHECK-NEXT: lbu t5, 144(a0) -; CHECK-NEXT: lbu t6, 154(a0) +; CHECK-NEXT: addi a1, a0, 82 +; CHECK-NEXT: lbu a2, 1(a0) +; CHECK-NEXT: lbu a3, 22(a0) +; CHECK-NEXT: lbu a4, 31(a0) +; CHECK-NEXT: lbu a5, 44(a0) +; CHECK-NEXT: lbu a6, 55(a0) +; CHECK-NEXT: lbu a7, 623(a0) +; CHECK-NEXT: lbu t0, 75(a0) +; CHECK-NEXT: lbu t1, 93(a0) +; CHECK-NEXT: lbu t2, 105(a0) +; CHECK-NEXT: lbu t3, 161(a0) +; CHECK-NEXT: lbu t4, 124(a0) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vlse8.v v8, (a0), zero -; CHECK-NEXT: addi a0, a0, 82 -; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: lbu t5, 163(a0) +; CHECK-NEXT: lbu t6, 144(a0) +; CHECK-NEXT: lbu a0, 154(a0) ; CHECK-NEXT: vslide1down.vx v8, v8, a2 ; CHECK-NEXT: vslide1down.vx v8, v8, a3 ; CHECK-NEXT: vslide1down.vx v8, v8, a4 -; CHECK-NEXT: vlse8.v v9, (a0), zero ; CHECK-NEXT: vslide1down.vx v8, v8, a5 +; CHECK-NEXT: vlse8.v v9, (a1), zero ; CHECK-NEXT: vslide1down.vx v8, v8, a6 -; CHECK-NEXT: vslide1down.vx v10, v8, a7 -; CHECK-NEXT: vslide1down.vx v8, v9, t0 -; CHECK-NEXT: vslide1down.vx v8, v8, t1 +; CHECK-NEXT: vslide1down.vx v8, v8, a7 +; CHECK-NEXT: vslide1down.vx v10, v8, t0 +; CHECK-NEXT: vslide1down.vx v8, v9, t1 ; CHECK-NEXT: vslide1down.vx v8, v8, t2 ; CHECK-NEXT: vslide1down.vx v8, v8, t3 ; CHECK-NEXT: vslide1down.vx v8, v8, t4 ; CHECK-NEXT: vslide1down.vx v8, v8, t5 ; CHECK-NEXT: vslide1down.vx v8, v8, t6 +; CHECK-NEXT: vslide1down.vx v8, v8, a0 ; CHECK-NEXT: li a0, 255 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 @@ -1375,17 +1375,17 @@ define <16 x i8> @buildvec_v16i8_undef_low_half(ptr %p) { ; CHECK-NEXT: lbu a3, 105(a0) ; CHECK-NEXT: lbu a4, 161(a0) ; CHECK-NEXT: lbu a5, 124(a0) -; CHECK-NEXT: lbu a6, 163(a0) -; CHECK-NEXT: lbu a7, 144(a0) -; CHECK-NEXT: lbu a0, 154(a0) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vlse8.v v8, (a1), zero +; CHECK-NEXT: lbu a1, 163(a0) +; CHECK-NEXT: lbu a6, 144(a0) +; CHECK-NEXT: lbu a0, 154(a0) ; CHECK-NEXT: vslide1down.vx v8, v8, a2 ; CHECK-NEXT: vslide1down.vx v8, v8, a3 ; CHECK-NEXT: vslide1down.vx v8, v8, a4 ; CHECK-NEXT: vslide1down.vx v8, v8, a5 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a6 -; CHECK-NEXT: vslide1down.vx v8, v8, a7 ; CHECK-NEXT: vslide1down.vx v8, v8, a0 ; CHECK-NEXT: ret %p9 = getelementptr i8, ptr %p, i32 82 @@ -1424,18 +1424,18 @@ define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) { ; CHECK-NEXT: lbu a2, 22(a0) ; CHECK-NEXT: lbu a3, 31(a0) ; CHECK-NEXT: lbu a4, 44(a0) -; CHECK-NEXT: lbu a5, 55(a0) -; CHECK-NEXT: lbu a6, 623(a0) -; CHECK-NEXT: lbu a7, 75(a0) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vlse8.v v8, (a0), zero +; CHECK-NEXT: lbu a5, 55(a0) +; CHECK-NEXT: lbu a6, 623(a0) +; CHECK-NEXT: lbu a0, 75(a0) ; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a2 ; CHECK-NEXT: vslide1down.vx v8, v8, a3 ; CHECK-NEXT: vslide1down.vx v8, v8, a4 ; CHECK-NEXT: vslide1down.vx v8, v8, a5 ; CHECK-NEXT: vslide1down.vx v8, v8, a6 -; CHECK-NEXT: vslide1down.vx v8, v8, a7 +; CHECK-NEXT: vslide1down.vx v8, v8, a0 ; CHECK-NEXT: vslidedown.vi v8, v8, 8 ; CHECK-NEXT: ret %p2 = getelementptr i8, ptr %p, i32 1 @@ -1470,24 +1470,24 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) { ; CHECK-LABEL: buildvec_v16i8_undef_edges: ; CHECK: # %bb.0: ; CHECK-NEXT: addi a1, a0, 31 -; CHECK-NEXT: lbu a2, 44(a0) -; CHECK-NEXT: lbu a3, 55(a0) -; CHECK-NEXT: lbu a4, 623(a0) -; CHECK-NEXT: lbu a5, 75(a0) -; CHECK-NEXT: lbu a6, 93(a0) -; CHECK-NEXT: lbu a7, 105(a0) -; CHECK-NEXT: lbu t0, 161(a0) +; CHECK-NEXT: addi a2, a0, 82 +; CHECK-NEXT: lbu a3, 44(a0) +; CHECK-NEXT: lbu a4, 55(a0) +; CHECK-NEXT: lbu a5, 623(a0) +; CHECK-NEXT: lbu a6, 75(a0) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vlse8.v v8, (a1), zero -; CHECK-NEXT: addi a0, a0, 82 -; CHECK-NEXT: vslide1down.vx v8, v8, a2 -; CHECK-NEXT: vlse8.v v9, (a0), zero +; CHECK-NEXT: lbu a1, 93(a0) +; CHECK-NEXT: lbu a7, 105(a0) +; CHECK-NEXT: lbu a0, 161(a0) ; CHECK-NEXT: vslide1down.vx v8, v8, a3 +; CHECK-NEXT: vlse8.v v9, (a2), zero ; CHECK-NEXT: vslide1down.vx v8, v8, a4 -; CHECK-NEXT: vslide1down.vx v10, v8, a5 -; CHECK-NEXT: vslide1down.vx v8, v9, a6 +; CHECK-NEXT: vslide1down.vx v8, v8, a5 +; CHECK-NEXT: vslide1down.vx v10, v8, a6 +; CHECK-NEXT: vslide1down.vx v8, v9, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a7 -; CHECK-NEXT: vslide1down.vx v8, v8, t0 +; CHECK-NEXT: vslide1down.vx v8, v8, a0 ; CHECK-NEXT: vslidedown.vi v8, v8, 4 ; CHECK-NEXT: li a0, 255 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma @@ -1530,30 +1530,30 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) { define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) { ; CHECK-LABEL: buildvec_v16i8_loads_undef_scattered: ; CHECK: # %bb.0: -; CHECK-NEXT: lbu a1, 1(a0) -; CHECK-NEXT: lbu a2, 44(a0) -; CHECK-NEXT: lbu a3, 55(a0) -; CHECK-NEXT: lbu a4, 75(a0) -; CHECK-NEXT: lbu a5, 93(a0) -; CHECK-NEXT: lbu a6, 124(a0) -; CHECK-NEXT: lbu a7, 144(a0) -; CHECK-NEXT: lbu t0, 154(a0) +; CHECK-NEXT: addi a1, a0, 82 +; CHECK-NEXT: lbu a2, 1(a0) +; CHECK-NEXT: lbu a3, 44(a0) +; CHECK-NEXT: lbu a4, 55(a0) +; CHECK-NEXT: lbu a5, 75(a0) +; CHECK-NEXT: lbu a6, 93(a0) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vlse8.v v8, (a0), zero -; CHECK-NEXT: addi a0, a0, 82 -; CHECK-NEXT: vslide1down.vx v8, v8, a1 -; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: lbu a7, 124(a0) +; CHECK-NEXT: lbu t0, 144(a0) +; CHECK-NEXT: lbu a0, 154(a0) ; CHECK-NEXT: vslide1down.vx v8, v8, a2 -; CHECK-NEXT: vlse8.v v9, (a0), zero +; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: vslide1down.vx v8, v8, a3 +; CHECK-NEXT: vlse8.v v9, (a1), zero +; CHECK-NEXT: vslide1down.vx v8, v8, a4 ; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vslide1down.vx v10, v8, a4 -; CHECK-NEXT: vslide1down.vx v8, v9, a5 +; CHECK-NEXT: vslide1down.vx v10, v8, a5 +; CHECK-NEXT: vslide1down.vx v8, v9, a6 ; CHECK-NEXT: vslidedown.vi v8, v8, 2 -; CHECK-NEXT: vslide1down.vx v8, v8, a6 -; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: vslide1down.vx v8, v8, a7 +; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: vslide1down.vx v8, v8, t0 +; CHECK-NEXT: vslide1down.vx v8, v8, a0 ; CHECK-NEXT: li a0, 255 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll index 8acc70f..eb95d86 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll @@ -7,25 +7,23 @@ define <4 x i1> @load_large_vector(ptr %p) { ; ZVE32X-LABEL: load_large_vector: ; ZVE32X: # %bb.0: -; ZVE32X-NEXT: ld a1, 80(a0) -; ZVE32X-NEXT: ld a2, 72(a0) -; ZVE32X-NEXT: ld a3, 56(a0) -; ZVE32X-NEXT: ld a4, 32(a0) -; ZVE32X-NEXT: ld a5, 24(a0) -; ZVE32X-NEXT: ld a6, 48(a0) -; ZVE32X-NEXT: ld a7, 8(a0) -; ZVE32X-NEXT: ld a0, 0(a0) -; ZVE32X-NEXT: xor a4, a5, a4 -; ZVE32X-NEXT: snez a4, a4 +; ZVE32X-NEXT: ld a1, 56(a0) +; ZVE32X-NEXT: ld a2, 32(a0) +; ZVE32X-NEXT: ld a3, 24(a0) +; ZVE32X-NEXT: ld a4, 48(a0) +; ZVE32X-NEXT: ld a5, 8(a0) +; ZVE32X-NEXT: ld a6, 0(a0) +; ZVE32X-NEXT: xor a2, a3, a2 +; ZVE32X-NEXT: snez a2, a2 ; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; ZVE32X-NEXT: vmv.s.x v8, a4 +; ZVE32X-NEXT: vmv.s.x v8, a2 ; ZVE32X-NEXT: vand.vi v8, v8, 1 ; ZVE32X-NEXT: vmsne.vi v0, v8, 0 ; ZVE32X-NEXT: vmv.s.x v8, zero ; ZVE32X-NEXT: vmerge.vim v9, v8, 1, v0 -; ZVE32X-NEXT: xor a0, a0, a7 -; ZVE32X-NEXT: snez a0, a0 -; ZVE32X-NEXT: vmv.s.x v10, a0 +; ZVE32X-NEXT: xor a2, a6, a5 +; ZVE32X-NEXT: snez a2, a2 +; ZVE32X-NEXT: vmv.s.x v10, a2 ; ZVE32X-NEXT: vand.vi v10, v10, 1 ; ZVE32X-NEXT: vmsne.vi v0, v10, 0 ; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma @@ -35,21 +33,23 @@ define <4 x i1> @load_large_vector(ptr %p) { ; ZVE32X-NEXT: vslideup.vi v11, v9, 1 ; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; ZVE32X-NEXT: vmsne.vi v0, v11, 0 +; ZVE32X-NEXT: ld a2, 80(a0) ; ZVE32X-NEXT: vmerge.vim v9, v10, 1, v0 -; ZVE32X-NEXT: xor a0, a6, a3 -; ZVE32X-NEXT: snez a0, a0 -; ZVE32X-NEXT: vmv.s.x v11, a0 +; ZVE32X-NEXT: xor a1, a4, a1 +; ZVE32X-NEXT: snez a1, a1 +; ZVE32X-NEXT: vmv.s.x v11, a1 ; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; ZVE32X-NEXT: vand.vi v11, v11, 1 ; ZVE32X-NEXT: vmsne.vi v0, v11, 0 +; ZVE32X-NEXT: ld a0, 72(a0) ; ZVE32X-NEXT: vmerge.vim v11, v8, 1, v0 ; ZVE32X-NEXT: vsetivli zero, 3, e8, mf4, tu, ma ; ZVE32X-NEXT: vslideup.vi v9, v11, 2 ; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; ZVE32X-NEXT: vmsne.vi v0, v9, 0 ; ZVE32X-NEXT: vmerge.vim v9, v10, 1, v0 -; ZVE32X-NEXT: xor a1, a2, a1 -; ZVE32X-NEXT: snez a0, a1 +; ZVE32X-NEXT: xor a0, a0, a2 +; ZVE32X-NEXT: snez a0, a0 ; ZVE32X-NEXT: vmv.s.x v10, a0 ; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; ZVE32X-NEXT: vand.vi v10, v10, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll index 35baa680..e2075e0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll @@ -812,14 +812,14 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) { ; RV32-NEXT: vslide1down.vx v10, v10, a0 ; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 3 -; RV32-NEXT: vfmv.f.s fa5, v8 -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: fld fa4, 32(sp) +; RV32-NEXT: fld fa5, 32(sp) +; RV32-NEXT: vfmv.f.s fa4, v8 ; RV32-NEXT: fld fa3, 40(sp) -; RV32-NEXT: fcvt.w.d a0, fa5 +; RV32-NEXT: fcvt.w.d a0, fa4 +; RV32-NEXT: fcvt.w.d a1, fa5 ; RV32-NEXT: fld fa5, 48(sp) -; RV32-NEXT: fcvt.w.d a1, fa4 ; RV32-NEXT: fcvt.w.d a2, fa3 +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vslide1down.vx v8, v10, a0 ; RV32-NEXT: fcvt.w.d a0, fa5 ; RV32-NEXT: fld fa5, 56(sp) @@ -865,14 +865,14 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) { ; RV64-i32-NEXT: vslide1down.vx v10, v10, a0 ; RV64-i32-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV64-i32-NEXT: vslidedown.vi v8, v8, 3 -; RV64-i32-NEXT: vfmv.f.s fa5, v8 -; RV64-i32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-i32-NEXT: fld fa4, 32(sp) +; RV64-i32-NEXT: fld fa5, 32(sp) +; RV64-i32-NEXT: vfmv.f.s fa4, v8 ; RV64-i32-NEXT: fld fa3, 40(sp) -; RV64-i32-NEXT: fcvt.l.d a0, fa5 +; RV64-i32-NEXT: fcvt.l.d a0, fa4 +; RV64-i32-NEXT: fcvt.l.d a1, fa5 ; RV64-i32-NEXT: fld fa5, 48(sp) -; RV64-i32-NEXT: fcvt.l.d a1, fa4 ; RV64-i32-NEXT: fcvt.l.d a2, fa3 +; RV64-i32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64-i32-NEXT: vslide1down.vx v8, v10, a0 ; RV64-i32-NEXT: fcvt.l.d a0, fa5 ; RV64-i32-NEXT: fld fa5, 56(sp) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index f42f32e..08cad29 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -519,17 +519,17 @@ define <4 x i8> @mgather_truemask_v4i8(<4 x ptr> %ptrs, <4 x i8> %passthru) { ; RV64ZVE32F-LABEL: mgather_truemask_v4i8: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a1, 8(a0) -; RV64ZVE32F-NEXT: ld a2, 16(a0) +; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: ld a3, 24(a0) -; RV64ZVE32F-NEXT: ld a0, 0(a0) +; RV64ZVE32F-NEXT: ld a0, 16(a0) ; RV64ZVE32F-NEXT: lbu a1, 0(a1) -; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: lbu a3, 0(a3) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vlse8.v v8, (a0), zero +; RV64ZVE32F-NEXT: vlse8.v v8, (a2), zero +; RV64ZVE32F-NEXT: lbu a0, 0(a0) +; RV64ZVE32F-NEXT: lbu a2, 0(a3) ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 ; RV64ZVE32F-NEXT: ret %v = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 1), <4 x i8> %passthru) ret <4 x i8> %v @@ -1208,17 +1208,17 @@ define <4 x i16> @mgather_truemask_v4i16(<4 x ptr> %ptrs, <4 x i16> %passthru) { ; RV64ZVE32F-LABEL: mgather_truemask_v4i16: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a1, 8(a0) -; RV64ZVE32F-NEXT: ld a2, 16(a0) +; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: ld a3, 24(a0) -; RV64ZVE32F-NEXT: ld a0, 0(a0) +; RV64ZVE32F-NEXT: ld a0, 16(a0) ; RV64ZVE32F-NEXT: lh a1, 0(a1) -; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: lh a3, 0(a3) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero +; RV64ZVE32F-NEXT: vlse16.v v8, (a2), zero +; RV64ZVE32F-NEXT: lh a0, 0(a0) +; RV64ZVE32F-NEXT: lh a2, 0(a3) ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 ; RV64ZVE32F-NEXT: ret %v = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1), <4 x i16> %passthru) ret <4 x i16> %v @@ -2257,17 +2257,17 @@ define <4 x i32> @mgather_truemask_v4i32(<4 x ptr> %ptrs, <4 x i32> %passthru) { ; RV64ZVE32F-LABEL: mgather_truemask_v4i32: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a1, 8(a0) -; RV64ZVE32F-NEXT: ld a2, 16(a0) +; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: ld a3, 24(a0) -; RV64ZVE32F-NEXT: ld a0, 0(a0) +; RV64ZVE32F-NEXT: ld a0, 16(a0) ; RV64ZVE32F-NEXT: lw a1, 0(a1) -; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: lw a3, 0(a3) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vlse32.v v8, (a0), zero +; RV64ZVE32F-NEXT: vlse32.v v8, (a2), zero +; RV64ZVE32F-NEXT: lw a0, 0(a0) +; RV64ZVE32F-NEXT: lw a2, 0(a3) ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 ; RV64ZVE32F-NEXT: ret %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1), <4 x i32> %passthru) ret <4 x i32> %v @@ -6589,16 +6589,16 @@ define <8 x i64> @mgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m, ; RV32ZVE32F-NEXT: lw a4, 56(a2) ; RV32ZVE32F-NEXT: lw a5, 48(a2) ; RV32ZVE32F-NEXT: lw a6, 40(a2) -; RV32ZVE32F-NEXT: lw a7, 32(a2) -; RV32ZVE32F-NEXT: lw t0, 24(a2) -; RV32ZVE32F-NEXT: lw t1, 16(a2) -; RV32ZVE32F-NEXT: lw t2, 8(a2) +; RV32ZVE32F-NEXT: lw a7, 8(a2) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vlse32.v v8, (a2), zero -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t2 -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t1 -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0 +; RV32ZVE32F-NEXT: lw t0, 16(a2) +; RV32ZVE32F-NEXT: lw t1, 24(a2) +; RV32ZVE32F-NEXT: lw a2, 32(a2) ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t1 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4 @@ -7017,14 +7017,14 @@ define <4 x half> @mgather_truemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru) ; RV64ZVE32F-LABEL: mgather_truemask_v4f16: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a1, 8(a0) -; RV64ZVE32F-NEXT: ld a2, 16(a0) +; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: ld a3, 24(a0) -; RV64ZVE32F-NEXT: ld a0, 0(a0) +; RV64ZVE32F-NEXT: ld a0, 16(a0) ; RV64ZVE32F-NEXT: flh fa5, 0(a1) -; RV64ZVE32F-NEXT: flh fa4, 0(a2) -; RV64ZVE32F-NEXT: flh fa3, 0(a3) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero +; RV64ZVE32F-NEXT: vlse16.v v8, (a2), zero +; RV64ZVE32F-NEXT: flh fa4, 0(a0) +; RV64ZVE32F-NEXT: flh fa3, 0(a3) ; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa5 ; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa4 ; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa3 @@ -7940,14 +7940,14 @@ define <4 x float> @mgather_truemask_v4f32(<4 x ptr> %ptrs, <4 x float> %passthr ; RV64ZVE32F-LABEL: mgather_truemask_v4f32: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a1, 8(a0) -; RV64ZVE32F-NEXT: ld a2, 16(a0) +; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: ld a3, 24(a0) -; RV64ZVE32F-NEXT: ld a0, 0(a0) +; RV64ZVE32F-NEXT: ld a0, 16(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a1) -; RV64ZVE32F-NEXT: flw fa4, 0(a2) -; RV64ZVE32F-NEXT: flw fa3, 0(a3) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vlse32.v v8, (a0), zero +; RV64ZVE32F-NEXT: vlse32.v v8, (a2), zero +; RV64ZVE32F-NEXT: flw fa4, 0(a0) +; RV64ZVE32F-NEXT: flw fa3, 0(a3) ; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa5 ; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa4 ; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa3 @@ -11632,16 +11632,16 @@ define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1> ; RV32ZVE32F-NEXT: lw a3, 56(a2) ; RV32ZVE32F-NEXT: lw a4, 48(a2) ; RV32ZVE32F-NEXT: lw a5, 40(a2) -; RV32ZVE32F-NEXT: lw a6, 32(a2) -; RV32ZVE32F-NEXT: lw a7, 24(a2) -; RV32ZVE32F-NEXT: lw t0, 16(a2) -; RV32ZVE32F-NEXT: lw t1, 8(a2) +; RV32ZVE32F-NEXT: lw a6, 8(a2) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vlse32.v v8, (a2), zero -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t1 -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0 -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7 +; RV32ZVE32F-NEXT: lw a7, 16(a2) +; RV32ZVE32F-NEXT: lw t0, 24(a2) +; RV32ZVE32F-NEXT: lw a2, 32(a2) ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a3 @@ -12881,22 +12881,22 @@ define <8 x i16> @mgather_strided_2xSEW(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_strided_2xSEW: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: lh a1, 2(a0) -; RV64ZVE32F-NEXT: lh a2, 8(a0) -; RV64ZVE32F-NEXT: lh a3, 10(a0) -; RV64ZVE32F-NEXT: lh a4, 18(a0) -; RV64ZVE32F-NEXT: lh a5, 24(a0) -; RV64ZVE32F-NEXT: lh a6, 26(a0) +; RV64ZVE32F-NEXT: addi a1, a0, 16 +; RV64ZVE32F-NEXT: lh a2, 2(a0) +; RV64ZVE32F-NEXT: lh a3, 8(a0) +; RV64ZVE32F-NEXT: lh a4, 10(a0) +; RV64ZVE32F-NEXT: lh a5, 18(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero -; RV64ZVE32F-NEXT: addi a0, a0, 16 -; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: lh a6, 24(a0) +; RV64ZVE32F-NEXT: lh a0, 26(a0) +; RV64ZVE32F-NEXT: vlse16.v v9, (a1), zero ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret @@ -12925,23 +12925,23 @@ define <8 x i16> @mgather_strided_2xSEW_with_offset(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_strided_2xSEW_with_offset: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: addi a1, a0, 4 -; RV64ZVE32F-NEXT: lh a2, 6(a0) -; RV64ZVE32F-NEXT: lh a3, 12(a0) -; RV64ZVE32F-NEXT: lh a4, 14(a0) -; RV64ZVE32F-NEXT: lh a5, 22(a0) -; RV64ZVE32F-NEXT: lh a6, 28(a0) -; RV64ZVE32F-NEXT: lh a7, 30(a0) +; RV64ZVE32F-NEXT: addi a1, a0, 20 +; RV64ZVE32F-NEXT: addi a2, a0, 4 +; RV64ZVE32F-NEXT: lh a3, 6(a0) +; RV64ZVE32F-NEXT: lh a4, 12(a0) +; RV64ZVE32F-NEXT: lh a5, 14(a0) +; RV64ZVE32F-NEXT: lh a6, 22(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vlse16.v v8, (a1), zero -; RV64ZVE32F-NEXT: addi a0, a0, 20 -; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vlse16.v v8, (a2), zero +; RV64ZVE32F-NEXT: lh a2, 28(a0) +; RV64ZVE32F-NEXT: lh a0, 30(a0) +; RV64ZVE32F-NEXT: vlse16.v v9, (a1), zero ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 -; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a6 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret @@ -12970,23 +12970,23 @@ define <8 x i16> @mgather_reverse_unit_strided_2xSEW(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_reverse_unit_strided_2xSEW: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: addi a1, a0, 28 -; RV64ZVE32F-NEXT: lh a2, 30(a0) -; RV64ZVE32F-NEXT: lh a3, 24(a0) -; RV64ZVE32F-NEXT: lh a4, 26(a0) -; RV64ZVE32F-NEXT: lh a5, 22(a0) -; RV64ZVE32F-NEXT: lh a6, 16(a0) -; RV64ZVE32F-NEXT: lh a7, 18(a0) +; RV64ZVE32F-NEXT: addi a1, a0, 20 +; RV64ZVE32F-NEXT: addi a2, a0, 28 +; RV64ZVE32F-NEXT: lh a3, 30(a0) +; RV64ZVE32F-NEXT: lh a4, 24(a0) +; RV64ZVE32F-NEXT: lh a5, 26(a0) +; RV64ZVE32F-NEXT: lh a6, 22(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vlse16.v v8, (a1), zero -; RV64ZVE32F-NEXT: addi a0, a0, 20 -; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vlse16.v v8, (a2), zero +; RV64ZVE32F-NEXT: lh a2, 16(a0) +; RV64ZVE32F-NEXT: lh a0, 18(a0) +; RV64ZVE32F-NEXT: vlse16.v v9, (a1), zero ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 -; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a6 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret @@ -13015,23 +13015,23 @@ define <8 x i16> @mgather_reverse_strided_2xSEW(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_reverse_strided_2xSEW: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: addi a1, a0, 28 -; RV64ZVE32F-NEXT: lh a2, 30(a0) -; RV64ZVE32F-NEXT: lh a3, 20(a0) -; RV64ZVE32F-NEXT: lh a4, 22(a0) -; RV64ZVE32F-NEXT: lh a5, 14(a0) -; RV64ZVE32F-NEXT: lh a6, 4(a0) -; RV64ZVE32F-NEXT: lh a7, 6(a0) +; RV64ZVE32F-NEXT: addi a1, a0, 12 +; RV64ZVE32F-NEXT: addi a2, a0, 28 +; RV64ZVE32F-NEXT: lh a3, 30(a0) +; RV64ZVE32F-NEXT: lh a4, 20(a0) +; RV64ZVE32F-NEXT: lh a5, 22(a0) +; RV64ZVE32F-NEXT: lh a6, 14(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vlse16.v v8, (a1), zero -; RV64ZVE32F-NEXT: addi a0, a0, 12 -; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vlse16.v v8, (a2), zero +; RV64ZVE32F-NEXT: lh a2, 4(a0) +; RV64ZVE32F-NEXT: lh a0, 6(a0) +; RV64ZVE32F-NEXT: vlse16.v v9, (a1), zero ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 -; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a6 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret @@ -13059,22 +13059,22 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_gather_2xSEW: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: lh a1, 2(a0) -; RV64ZVE32F-NEXT: lh a2, 16(a0) -; RV64ZVE32F-NEXT: lh a3, 18(a0) -; RV64ZVE32F-NEXT: lh a4, 10(a0) -; RV64ZVE32F-NEXT: lh a5, 4(a0) -; RV64ZVE32F-NEXT: lh a6, 6(a0) +; RV64ZVE32F-NEXT: addi a1, a0, 8 +; RV64ZVE32F-NEXT: lh a2, 2(a0) +; RV64ZVE32F-NEXT: lh a3, 16(a0) +; RV64ZVE32F-NEXT: lh a4, 18(a0) +; RV64ZVE32F-NEXT: lh a5, 10(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero -; RV64ZVE32F-NEXT: addi a0, a0, 8 -; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: lh a6, 4(a0) +; RV64ZVE32F-NEXT: lh a0, 6(a0) +; RV64ZVE32F-NEXT: vlse16.v v9, (a1), zero ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret @@ -13105,22 +13105,22 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_gather_2xSEW_unaligned: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: lh a1, 2(a0) -; RV64ZVE32F-NEXT: lh a2, 18(a0) -; RV64ZVE32F-NEXT: lh a3, 20(a0) -; RV64ZVE32F-NEXT: lh a4, 10(a0) -; RV64ZVE32F-NEXT: lh a5, 4(a0) -; RV64ZVE32F-NEXT: lh a6, 6(a0) +; RV64ZVE32F-NEXT: addi a1, a0, 8 +; RV64ZVE32F-NEXT: lh a2, 2(a0) +; RV64ZVE32F-NEXT: lh a3, 18(a0) +; RV64ZVE32F-NEXT: lh a4, 20(a0) +; RV64ZVE32F-NEXT: lh a5, 10(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero -; RV64ZVE32F-NEXT: addi a0, a0, 8 -; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: lh a6, 4(a0) +; RV64ZVE32F-NEXT: lh a0, 6(a0) +; RV64ZVE32F-NEXT: vlse16.v v9, (a1), zero ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret @@ -13152,22 +13152,22 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned2(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_gather_2xSEW_unaligned2: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: addi a1, a0, 2 -; RV64ZVE32F-NEXT: lh a2, 4(a0) -; RV64ZVE32F-NEXT: lh a3, 18(a0) -; RV64ZVE32F-NEXT: lh a4, 20(a0) -; RV64ZVE32F-NEXT: lh a5, 10(a0) -; RV64ZVE32F-NEXT: lh a6, 6(a0) +; RV64ZVE32F-NEXT: addi a1, a0, 8 +; RV64ZVE32F-NEXT: addi a2, a0, 2 +; RV64ZVE32F-NEXT: lh a3, 4(a0) +; RV64ZVE32F-NEXT: lh a4, 18(a0) +; RV64ZVE32F-NEXT: lh a5, 20(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vlse16.v v8, (a1), zero -; RV64ZVE32F-NEXT: addi a0, a0, 8 -; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vlse16.v v8, (a2), zero +; RV64ZVE32F-NEXT: lh a2, 10(a0) +; RV64ZVE32F-NEXT: lh a0, 6(a0) +; RV64ZVE32F-NEXT: vlse16.v v9, (a1), zero ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 -; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret @@ -13202,22 +13202,22 @@ define <8 x i16> @mgather_gather_4xSEW(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_gather_4xSEW: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: lh a1, 2(a0) -; RV64ZVE32F-NEXT: lh a2, 4(a0) -; RV64ZVE32F-NEXT: lh a3, 6(a0) -; RV64ZVE32F-NEXT: lh a4, 18(a0) -; RV64ZVE32F-NEXT: lh a5, 20(a0) -; RV64ZVE32F-NEXT: lh a6, 22(a0) +; RV64ZVE32F-NEXT: addi a1, a0, 16 +; RV64ZVE32F-NEXT: lh a2, 2(a0) +; RV64ZVE32F-NEXT: lh a3, 4(a0) +; RV64ZVE32F-NEXT: lh a4, 6(a0) +; RV64ZVE32F-NEXT: lh a5, 18(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero -; RV64ZVE32F-NEXT: addi a0, a0, 16 -; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: lh a6, 20(a0) +; RV64ZVE32F-NEXT: lh a0, 22(a0) +; RV64ZVE32F-NEXT: vlse16.v v9, (a1), zero ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret @@ -13249,22 +13249,22 @@ define <8 x i16> @mgather_gather_4xSEW_partial_align(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_gather_4xSEW_partial_align: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: lh a1, 2(a0) -; RV64ZVE32F-NEXT: lh a2, 4(a0) -; RV64ZVE32F-NEXT: lh a3, 6(a0) -; RV64ZVE32F-NEXT: lh a4, 18(a0) -; RV64ZVE32F-NEXT: lh a5, 20(a0) -; RV64ZVE32F-NEXT: lh a6, 22(a0) +; RV64ZVE32F-NEXT: addi a1, a0, 16 +; RV64ZVE32F-NEXT: lh a2, 2(a0) +; RV64ZVE32F-NEXT: lh a3, 4(a0) +; RV64ZVE32F-NEXT: lh a4, 6(a0) +; RV64ZVE32F-NEXT: lh a5, 18(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero -; RV64ZVE32F-NEXT: addi a0, a0, 16 -; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: lh a6, 20(a0) +; RV64ZVE32F-NEXT: lh a0, 22(a0) +; RV64ZVE32F-NEXT: vlse16.v v9, (a1), zero ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret @@ -13305,22 +13305,22 @@ define <8 x i16> @mgather_shuffle_rotate(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_shuffle_rotate: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: lh a1, 10(a0) -; RV64ZVE32F-NEXT: lh a2, 12(a0) -; RV64ZVE32F-NEXT: lh a3, 14(a0) -; RV64ZVE32F-NEXT: lh a4, 2(a0) -; RV64ZVE32F-NEXT: lh a5, 4(a0) -; RV64ZVE32F-NEXT: lh a6, 6(a0) +; RV64ZVE32F-NEXT: addi a1, a0, 8 +; RV64ZVE32F-NEXT: lh a2, 10(a0) +; RV64ZVE32F-NEXT: lh a3, 12(a0) +; RV64ZVE32F-NEXT: lh a4, 14(a0) +; RV64ZVE32F-NEXT: lh a5, 2(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero -; RV64ZVE32F-NEXT: addi a0, a0, 8 -; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a4 +; RV64ZVE32F-NEXT: lh a6, 4(a0) +; RV64ZVE32F-NEXT: lh a0, 6(a0) +; RV64ZVE32F-NEXT: vlse16.v v9, (a1), zero ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 -; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a2 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a4 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; RV64ZVE32F-NEXT: ret @@ -13352,22 +13352,22 @@ define <8 x i16> @mgather_shuffle_vrgather(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_shuffle_vrgather: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: lh a1, 4(a0) -; RV64ZVE32F-NEXT: lh a2, 6(a0) -; RV64ZVE32F-NEXT: lh a3, 2(a0) -; RV64ZVE32F-NEXT: lh a4, 10(a0) -; RV64ZVE32F-NEXT: lh a5, 12(a0) -; RV64ZVE32F-NEXT: lh a6, 14(a0) +; RV64ZVE32F-NEXT: addi a1, a0, 8 +; RV64ZVE32F-NEXT: lh a2, 4(a0) +; RV64ZVE32F-NEXT: lh a3, 6(a0) +; RV64ZVE32F-NEXT: lh a4, 2(a0) +; RV64ZVE32F-NEXT: lh a5, 10(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero -; RV64ZVE32F-NEXT: addi a0, a0, 8 -; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: lh a6, 12(a0) +; RV64ZVE32F-NEXT: lh a0, 14(a0) +; RV64ZVE32F-NEXT: vlse16.v v9, (a1), zero ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret @@ -13853,12 +13853,12 @@ define <4 x i32> @masked_gather_widen_sew_negative_stride(ptr %base) { ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: addi a1, a0, 136 ; RV64ZVE32F-NEXT: lw a2, 140(a0) -; RV64ZVE32F-NEXT: lw a3, 0(a0) -; RV64ZVE32F-NEXT: lw a0, 4(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vlse32.v v8, (a1), zero +; RV64ZVE32F-NEXT: lw a1, 0(a0) +; RV64ZVE32F-NEXT: lw a0, 4(a0) ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr i32, ptr %base, <4 x i64> <i64 34, i64 35, i64 0, i64 1> diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll index aa815e1..42e5243 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -5598,17 +5598,16 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ; ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i64: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: addi sp, sp, -48 -; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 48 -; RV32ZVE32F-NEXT: sw s0, 44(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s1, 40(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s2, 36(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s3, 32(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s4, 28(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s5, 24(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s6, 20(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s7, 16(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s8, 12(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: addi sp, sp, -32 +; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 32 +; RV32ZVE32F-NEXT: sw s0, 28(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s1, 24(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s2, 20(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s3, 16(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s4, 12(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s5, 8(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s6, 4(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s7, 0(sp) # 4-byte Folded Spill ; RV32ZVE32F-NEXT: .cfi_offset s0, -4 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12 @@ -5617,7 +5616,6 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ; RV32ZVE32F-NEXT: .cfi_offset s5, -24 ; RV32ZVE32F-NEXT: .cfi_offset s6, -28 ; RV32ZVE32F-NEXT: .cfi_offset s7, -32 -; RV32ZVE32F-NEXT: .cfi_offset s8, -36 ; RV32ZVE32F-NEXT: lw a3, 60(a0) ; RV32ZVE32F-NEXT: lw a4, 56(a0) ; RV32ZVE32F-NEXT: lw a5, 52(a0) @@ -5635,16 +5633,16 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ; RV32ZVE32F-NEXT: lw s2, 56(a2) ; RV32ZVE32F-NEXT: lw s3, 48(a2) ; RV32ZVE32F-NEXT: lw s4, 40(a2) -; RV32ZVE32F-NEXT: lw s5, 32(a2) -; RV32ZVE32F-NEXT: lw s6, 24(a2) -; RV32ZVE32F-NEXT: lw s7, 16(a2) -; RV32ZVE32F-NEXT: lw s8, 8(a2) +; RV32ZVE32F-NEXT: lw s5, 8(a2) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vlse32.v v8, (a2), zero -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s8 -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s7 -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s6 +; RV32ZVE32F-NEXT: lw s6, 16(a2) +; RV32ZVE32F-NEXT: lw s7, 24(a2) +; RV32ZVE32F-NEXT: lw a2, 32(a2) ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s5 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s6 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s7 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s4 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s3 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s2 @@ -5682,16 +5680,15 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ; RV32ZVE32F-NEXT: sw a4, 0(a0) ; RV32ZVE32F-NEXT: sw a3, 4(a0) ; RV32ZVE32F-NEXT: .LBB51_9: # %else14 -; RV32ZVE32F-NEXT: lw s0, 44(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s1, 40(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s2, 36(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s3, 32(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s4, 28(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s5, 24(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s6, 20(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s7, 16(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s8, 12(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: addi sp, sp, 48 +; RV32ZVE32F-NEXT: lw s0, 28(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s1, 24(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s2, 20(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s3, 16(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s4, 12(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s5, 8(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s6, 4(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s7, 0(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: addi sp, sp, 32 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB51_10: # %cond.store ; RV32ZVE32F-NEXT: lw a2, 4(a0) @@ -10227,16 +10224,16 @@ define void @mscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idx ; RV32ZVE32F-NEXT: lw a2, 56(a1) ; RV32ZVE32F-NEXT: lw a3, 48(a1) ; RV32ZVE32F-NEXT: lw a4, 40(a1) -; RV32ZVE32F-NEXT: lw a5, 32(a1) -; RV32ZVE32F-NEXT: lw a6, 24(a1) -; RV32ZVE32F-NEXT: lw a7, 16(a1) -; RV32ZVE32F-NEXT: lw t0, 8(a1) +; RV32ZVE32F-NEXT: lw a5, 8(a1) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vlse32.v v8, (a1), zero -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0 -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7 -; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV32ZVE32F-NEXT: lw a6, 16(a1) +; RV32ZVE32F-NEXT: lw a7, 24(a1) +; RV32ZVE32F-NEXT: lw a1, 32(a1) ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a3 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll index 19f3d3c..7be015e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll @@ -19,9 +19,9 @@ define <2 x half> @vp_nearbyint_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext % ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <2 x half> @llvm.vp.nearbyint.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) ret <2 x half> %v @@ -38,9 +38,9 @@ define <2 x half> @vp_nearbyint_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <2 x half> @llvm.vp.nearbyint.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v @@ -61,9 +61,9 @@ define <4 x half> @vp_nearbyint_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext % ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <4 x half> @llvm.vp.nearbyint.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) ret <4 x half> %v @@ -80,9 +80,9 @@ define <4 x half> @vp_nearbyint_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <4 x half> @llvm.vp.nearbyint.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v @@ -103,9 +103,9 @@ define <8 x half> @vp_nearbyint_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext % ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <8 x half> @llvm.vp.nearbyint.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) ret <8 x half> %v @@ -122,9 +122,9 @@ define <8 x half> @vp_nearbyint_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <8 x half> @llvm.vp.nearbyint.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v @@ -147,9 +147,9 @@ define <16 x half> @vp_nearbyint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroe ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <16 x half> @llvm.vp.nearbyint.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) ret <16 x half> %v @@ -166,9 +166,9 @@ define <16 x half> @vp_nearbyint_v16f16_unmasked(<16 x half> %va, i32 zeroext %e ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <16 x half> @llvm.vp.nearbyint.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v @@ -189,9 +189,9 @@ define <2 x float> @vp_nearbyint_v2f32(<2 x float> %va, <2 x i1> %m, i32 zeroext ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <2 x float> @llvm.vp.nearbyint.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) ret <2 x float> %v @@ -208,9 +208,9 @@ define <2 x float> @vp_nearbyint_v2f32_unmasked(<2 x float> %va, i32 zeroext %ev ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <2 x float> @llvm.vp.nearbyint.v2f32(<2 x float> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v @@ -231,9 +231,9 @@ define <4 x float> @vp_nearbyint_v4f32(<4 x float> %va, <4 x i1> %m, i32 zeroext ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <4 x float> @llvm.vp.nearbyint.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) ret <4 x float> %v @@ -250,9 +250,9 @@ define <4 x float> @vp_nearbyint_v4f32_unmasked(<4 x float> %va, i32 zeroext %ev ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <4 x float> @llvm.vp.nearbyint.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v @@ -275,9 +275,9 @@ define <8 x float> @vp_nearbyint_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <8 x float> @llvm.vp.nearbyint.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) ret <8 x float> %v @@ -294,9 +294,9 @@ define <8 x float> @vp_nearbyint_v8f32_unmasked(<8 x float> %va, i32 zeroext %ev ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <8 x float> @llvm.vp.nearbyint.v8f32(<8 x float> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v @@ -319,9 +319,9 @@ define <16 x float> @vp_nearbyint_v16f32(<16 x float> %va, <16 x i1> %m, i32 zer ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <16 x float> @llvm.vp.nearbyint.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) ret <16 x float> %v @@ -338,9 +338,9 @@ define <16 x float> @vp_nearbyint_v16f32_unmasked(<16 x float> %va, i32 zeroext ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <16 x float> @llvm.vp.nearbyint.v16f32(<16 x float> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v @@ -361,9 +361,9 @@ define <2 x double> @vp_nearbyint_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroe ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <2 x double> @llvm.vp.nearbyint.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) ret <2 x double> %v @@ -380,9 +380,9 @@ define <2 x double> @vp_nearbyint_v2f64_unmasked(<2 x double> %va, i32 zeroext % ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <2 x double> @llvm.vp.nearbyint.v2f64(<2 x double> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v @@ -405,9 +405,9 @@ define <4 x double> @vp_nearbyint_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroe ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) ret <4 x double> %v @@ -424,9 +424,9 @@ define <4 x double> @vp_nearbyint_v4f64_unmasked(<4 x double> %va, i32 zeroext % ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v @@ -449,9 +449,9 @@ define <8 x double> @vp_nearbyint_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroe ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) ret <8 x double> %v @@ -468,9 +468,9 @@ define <8 x double> @vp_nearbyint_v8f64_unmasked(<8 x double> %va, i32 zeroext % ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v @@ -493,9 +493,9 @@ define <15 x double> @vp_nearbyint_v15f64(<15 x double> %va, <15 x i1> %m, i32 z ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <15 x double> @llvm.vp.nearbyint.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) ret <15 x double> %v @@ -512,9 +512,9 @@ define <15 x double> @vp_nearbyint_v15f64_unmasked(<15 x double> %va, i32 zeroex ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <15 x double> @llvm.vp.nearbyint.v15f64(<15 x double> %va, <15 x i1> splat (i1 true), i32 %evl) ret <15 x double> %v @@ -537,9 +537,9 @@ define <16 x double> @vp_nearbyint_v16f64(<16 x double> %va, <16 x i1> %m, i32 z ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) ret <16 x double> %v @@ -556,9 +556,9 @@ define <16 x double> @vp_nearbyint_v16f64_unmasked(<16 x double> %va, i32 zeroex ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v @@ -617,9 +617,9 @@ define <32 x double> @vp_nearbyint_v32f64(<32 x double> %va, <32 x i1> %m, i32 z ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vmv.v.v v16, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 @@ -660,9 +660,9 @@ define <32 x double> @vp_nearbyint_v32f64_unmasked(<32 x double> %va, i32 zeroex ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.nearbyint.v32f64(<32 x double> %va, <32 x i1> splat (i1 true), i32 %evl) ret <32 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll index 7dcd4c4..ed2ed2a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll @@ -5,8 +5,8 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; RV32-LABEL: vselect_vv_v6i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: lbu a2, 0(a2) +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: vle32.v v8, (a1) ; RV32-NEXT: slli a1, a2, 30 ; RV32-NEXT: srli a1, a1, 31 @@ -35,8 +35,8 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; ; RV64-LABEL: vselect_vv_v6i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: lbu a2, 0(a2) +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: vle32.v v8, (a1) ; RV64-NEXT: slli a1, a2, 62 ; RV64-NEXT: srli a1, a1, 63 @@ -73,8 +73,8 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) { define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) { ; RV32-LABEL: vselect_vx_v6i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: lbu a2, 0(a2) +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: vle32.v v8, (a1) ; RV32-NEXT: slli a1, a2, 30 ; RV32-NEXT: srli a1, a1, 31 @@ -104,8 +104,8 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) { ; ; RV64-LABEL: vselect_vx_v6i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: lbu a2, 0(a2) +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: vle32.v v8, (a1) ; RV64-NEXT: slli a1, a2, 62 ; RV64-NEXT: srli a1, a1, 63 @@ -144,8 +144,8 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) { define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) { ; RV32-LABEL: vselect_vi_v6i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: lbu a1, 0(a1) +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: slli a0, a1, 30 ; RV32-NEXT: srli a0, a0, 31 @@ -175,8 +175,8 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) { ; ; RV64-LABEL: vselect_vi_v6i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: lbu a1, 0(a1) +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: slli a0, a1, 62 ; RV64-NEXT: srli a0, a0, 63 @@ -214,8 +214,8 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) { define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; RV32-LABEL: vselect_vv_v6f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: lbu a2, 0(a2) +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: vle32.v v8, (a1) ; RV32-NEXT: slli a1, a2, 30 ; RV32-NEXT: srli a1, a1, 31 @@ -244,8 +244,8 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; ; RV64-LABEL: vselect_vv_v6f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: lbu a2, 0(a2) +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: vle32.v v8, (a1) ; RV64-NEXT: slli a1, a2, 62 ; RV64-NEXT: srli a1, a1, 63 @@ -282,8 +282,8 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) { define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) { ; RV32-LABEL: vselect_vx_v6f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: lbu a1, 0(a1) +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: slli a0, a1, 30 ; RV32-NEXT: srli a0, a0, 31 @@ -313,8 +313,8 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) { ; ; RV64-LABEL: vselect_vx_v6f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: lbu a1, 0(a1) +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: slli a0, a1, 62 ; RV64-NEXT: srli a0, a0, 63 @@ -353,8 +353,8 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) { define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) { ; RV32-LABEL: vselect_vfpzero_v6f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: lbu a1, 0(a1) +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: slli a0, a1, 30 ; RV32-NEXT: srli a0, a0, 31 @@ -384,8 +384,8 @@ define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) { ; ; RV64-LABEL: vselect_vfpzero_v6f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: lbu a1, 0(a1) +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: slli a0, a1, 62 ; RV64-NEXT: srli a0, a0, 63 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll index a4a5917..b1726be 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll @@ -768,8 +768,8 @@ define <4 x i32> @vwadd_vx_v4i32_i32(ptr %x, ptr %y) { define <2 x i64> @vwadd_vx_v2i64_i8(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwadd_vx_v2i64_i8: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: lb a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle32.v v9, (a0) ; RV32-NEXT: vmv.v.x v8, a1 ; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma @@ -796,8 +796,8 @@ define <2 x i64> @vwadd_vx_v2i64_i8(ptr %x, ptr %y) nounwind { define <2 x i64> @vwadd_vx_v2i64_i16(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwadd_vx_v2i64_i16: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: lh a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle32.v v9, (a0) ; RV32-NEXT: vmv.v.x v8, a1 ; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma @@ -824,8 +824,8 @@ define <2 x i64> @vwadd_vx_v2i64_i16(ptr %x, ptr %y) nounwind { define <2 x i64> @vwadd_vx_v2i64_i32(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwadd_vx_v2i64_i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle32.v v9, (a0) ; RV32-NEXT: vmv.v.x v8, a1 ; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma @@ -853,9 +853,9 @@ define <2 x i64> @vwadd_vx_v2i64_i64(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwadd_vx_v2i64_i64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: lw a2, 4(a1) ; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v9, (a0) ; RV32-NEXT: sw a2, 12(sp) ; RV32-NEXT: sw a1, 8(sp) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll index bc0bf5d..f6d9695 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll @@ -769,8 +769,8 @@ define <2 x i64> @vwaddu_vx_v2i64_i8(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwaddu_vx_v2i64_i8: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: lbu a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v9, (a0) ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a1, 8(sp) @@ -801,8 +801,8 @@ define <2 x i64> @vwaddu_vx_v2i64_i16(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwaddu_vx_v2i64_i16: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: lhu a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v9, (a0) ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a1, 8(sp) @@ -833,8 +833,8 @@ define <2 x i64> @vwaddu_vx_v2i64_i32(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwaddu_vx_v2i64_i32: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v9, (a0) ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a1, 8(sp) @@ -865,9 +865,9 @@ define <2 x i64> @vwaddu_vx_v2i64_i64(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwaddu_vx_v2i64_i64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: lw a2, 4(a1) ; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v9, (a0) ; RV32-NEXT: sw a2, 12(sp) ; RV32-NEXT: sw a1, 8(sp) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll index 2abd34f..c87584a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll @@ -883,9 +883,9 @@ define <2 x i64> @vwmul_vx_v2i64_i64(ptr %x, ptr %y) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: lw a2, 4(a1) ; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: sw a2, 12(sp) ; RV32-NEXT: sw a1, 8(sp) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll index 921037d..a569845 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll @@ -793,8 +793,8 @@ define <2 x i64> @vwmulsu_vx_v2i64_i8(ptr %x, ptr %y) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: lbu a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a1, 8(sp) @@ -827,8 +827,8 @@ define <2 x i64> @vwmulsu_vx_v2i64_i16(ptr %x, ptr %y) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: lhu a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a1, 8(sp) @@ -861,8 +861,8 @@ define <2 x i64> @vwmulsu_vx_v2i64_i32(ptr %x, ptr %y) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a1, 8(sp) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll index 154093d..2782a5f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll @@ -769,8 +769,8 @@ define <4 x i32> @vwsub_vx_v4i32_i32(ptr %x, ptr %y) { define <2 x i64> @vwsub_vx_v2i64_i8(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwsub_vx_v2i64_i8: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: lb a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle32.v v9, (a0) ; RV32-NEXT: vmv.v.x v8, a1 ; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma @@ -798,8 +798,8 @@ define <2 x i64> @vwsub_vx_v2i64_i8(ptr %x, ptr %y) nounwind { define <2 x i64> @vwsub_vx_v2i64_i16(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwsub_vx_v2i64_i16: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: lh a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle32.v v9, (a0) ; RV32-NEXT: vmv.v.x v8, a1 ; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma @@ -827,8 +827,8 @@ define <2 x i64> @vwsub_vx_v2i64_i16(ptr %x, ptr %y) nounwind { define <2 x i64> @vwsub_vx_v2i64_i32(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwsub_vx_v2i64_i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle32.v v9, (a0) ; RV32-NEXT: vmv.v.x v8, a1 ; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma @@ -856,9 +856,9 @@ define <2 x i64> @vwsub_vx_v2i64_i64(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwsub_vx_v2i64_i64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: lw a2, 4(a1) ; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v9, (a0) ; RV32-NEXT: sw a2, 12(sp) ; RV32-NEXT: sw a1, 8(sp) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll index a084b53..ccbc26c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll @@ -770,8 +770,8 @@ define <2 x i64> @vwsubu_vx_v2i64_i8(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwsubu_vx_v2i64_i8: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: lbu a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v9, (a0) ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a1, 8(sp) @@ -803,8 +803,8 @@ define <2 x i64> @vwsubu_vx_v2i64_i16(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwsubu_vx_v2i64_i16: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: lhu a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v9, (a0) ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a1, 8(sp) @@ -836,8 +836,8 @@ define <2 x i64> @vwsubu_vx_v2i64_i32(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwsubu_vx_v2i64_i32: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v9, (a0) ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: sw a1, 8(sp) @@ -868,9 +868,9 @@ define <2 x i64> @vwsubu_vx_v2i64_i64(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwsubu_vx_v2i64_i64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: lw a2, 4(a1) ; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v9, (a0) ; RV32-NEXT: sw a2, 12(sp) ; RV32-NEXT: sw a1, 8(sp) diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll index b78b866..02cfd3d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll @@ -1012,77 +1012,99 @@ define <vscale x 16 x double> @vfmax_vv_nxv16f64(<vscale x 16 x double> %va, <vs ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 42 +; CHECK-NEXT: li a3, 36 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x24, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 36 * vlenb ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a3, a1, 3 ; CHECK-NEXT: add a3, a0, a3 ; CHECK-NEXT: vl8re64.v v24, (a3) ; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a3, a3, 4 +; CHECK-NEXT: li a4, 27 +; CHECK-NEXT: mul a3, a3, a4 ; CHECK-NEXT: add a3, sp, a3 ; CHECK-NEXT: addi a3, a3, 16 ; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: srli a3, a1, 3 ; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma ; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: slli a4, a4, 5 +; CHECK-NEXT: li a5, 18 +; CHECK-NEXT: mul a4, a4, a5 ; CHECK-NEXT: add a4, sp, a4 ; CHECK-NEXT: addi a4, a4, 16 ; CHECK-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill -; CHECK-NEXT: vslidedown.vx v24, v0, a3 +; CHECK-NEXT: vslidedown.vx v7, v0, a3 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: sub a3, a2, a1 ; CHECK-NEXT: sltu a4, a2, a3 ; CHECK-NEXT: addi a4, a4, -1 ; CHECK-NEXT: and a3, a4, a3 -; CHECK-NEXT: vl8re64.v v0, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a4, a0, 5 -; CHECK-NEXT: add a0, a4, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v7, v24 -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmfeq.vv v26, v16, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v26 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: li a4, 27 +; CHECK-NEXT: mul a3, a3, a4 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v24, v16, v24, v0 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: li a4, 10 +; CHECK-NEXT: mul a3, a3, a4 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: li a4, 27 +; CHECK-NEXT: mul a3, a3, a4 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v16, v24, v24, v0.t +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v16, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vl8re64.v v0, (a0) ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a0, a0, a3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmfeq.vv v17, v24, v24, v0.t +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a0, a0, a3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v17 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a0, a0, a3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmax.vv v8, v8, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a0, a0, a3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill @@ -1092,49 +1114,63 @@ define <vscale x 16 x double> @vfmax_vv_nxv16f64(<vscale x 16 x double> %va, <vs ; CHECK-NEXT: .LBB28_2: ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: li a1, 18 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 5 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: li a1, 27 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 27 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 27 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 42 +; CHECK-NEXT: li a1, 36 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 @@ -1162,19 +1198,19 @@ define <vscale x 16 x double> @vfmax_vv_nxv16f64_unmasked(<vscale x 16 x double> ; CHECK-NEXT: slli a3, a1, 3 ; CHECK-NEXT: add a3, a0, a3 ; CHECK-NEXT: vl8re64.v v24, (a3) +; CHECK-NEXT: sub a3, a2, a1 +; CHECK-NEXT: sltu a4, a2, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v7, v24, v24 ; CHECK-NEXT: vl8re64.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: sub a0, a2, a1 -; CHECK-NEXT: sltu a3, a2, a0 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a0, a3, a0 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v7, v24, v24 ; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll index 69c7615..72a47ca 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll @@ -1012,77 +1012,99 @@ define <vscale x 16 x double> @vfmin_vv_nxv16f64(<vscale x 16 x double> %va, <vs ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 42 +; CHECK-NEXT: li a3, 36 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x24, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 36 * vlenb ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a3, a1, 3 ; CHECK-NEXT: add a3, a0, a3 ; CHECK-NEXT: vl8re64.v v24, (a3) ; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a3, a3, 4 +; CHECK-NEXT: li a4, 27 +; CHECK-NEXT: mul a3, a3, a4 ; CHECK-NEXT: add a3, sp, a3 ; CHECK-NEXT: addi a3, a3, 16 ; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: srli a3, a1, 3 ; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma ; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: slli a4, a4, 5 +; CHECK-NEXT: li a5, 18 +; CHECK-NEXT: mul a4, a4, a5 ; CHECK-NEXT: add a4, sp, a4 ; CHECK-NEXT: addi a4, a4, 16 ; CHECK-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill -; CHECK-NEXT: vslidedown.vx v24, v0, a3 +; CHECK-NEXT: vslidedown.vx v7, v0, a3 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: sub a3, a2, a1 ; CHECK-NEXT: sltu a4, a2, a3 ; CHECK-NEXT: addi a4, a4, -1 ; CHECK-NEXT: and a3, a4, a3 -; CHECK-NEXT: vl8re64.v v0, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a4, a0, 5 -; CHECK-NEXT: add a0, a4, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v7, v24 -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmfeq.vv v26, v16, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v26 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: li a4, 27 +; CHECK-NEXT: mul a3, a3, a4 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v24, v16, v24, v0 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: li a4, 10 +; CHECK-NEXT: mul a3, a3, a4 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: li a4, 27 +; CHECK-NEXT: mul a3, a3, a4 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v16, v24, v24, v0.t +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v16, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vl8re64.v v0, (a0) ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a0, a0, a3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmfeq.vv v17, v24, v24, v0.t +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a0, a0, a3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v17 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a0, a0, a3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmin.vv v8, v8, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a0, a0, a3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill @@ -1092,49 +1114,63 @@ define <vscale x 16 x double> @vfmin_vv_nxv16f64(<vscale x 16 x double> %va, <vs ; CHECK-NEXT: .LBB28_2: ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: li a1, 18 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 5 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: li a1, 27 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 27 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 27 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 42 +; CHECK-NEXT: li a1, 36 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 @@ -1162,19 +1198,19 @@ define <vscale x 16 x double> @vfmin_vv_nxv16f64_unmasked(<vscale x 16 x double> ; CHECK-NEXT: slli a3, a1, 3 ; CHECK-NEXT: add a3, a0, a3 ; CHECK-NEXT: vl8re64.v v24, (a3) +; CHECK-NEXT: sub a3, a2, a1 +; CHECK-NEXT: sltu a4, a2, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v7, v24, v24 ; CHECK-NEXT: vl8re64.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: sub a0, a2, a1 -; CHECK-NEXT: sltu a3, a2, a0 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a0, a3, a0 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v7, v24, v24 ; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll index f90237b..f88a9b3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll @@ -19,9 +19,9 @@ define <vscale x 1 x half> @nearbyint_nxv1f16(<vscale x 1 x half> %v) strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <vscale x 1 x half> @llvm.experimental.constrained.nearbyint.nxv1f16(<vscale x 1 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <vscale x 1 x half> %r @@ -42,9 +42,9 @@ define <vscale x 2 x half> @nearbyint_nxv2f16(<vscale x 2 x half> %v) strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <vscale x 2 x half> @llvm.experimental.constrained.nearbyint.nxv2f16(<vscale x 2 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <vscale x 2 x half> %r @@ -65,9 +65,9 @@ define <vscale x 4 x half> @nearbyint_nxv4f16(<vscale x 4 x half> %v) strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <vscale x 4 x half> @llvm.experimental.constrained.nearbyint.nxv4f16(<vscale x 4 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <vscale x 4 x half> %r @@ -88,9 +88,9 @@ define <vscale x 8 x half> @nearbyint_nxv8f16(<vscale x 8 x half> %v) strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <vscale x 8 x half> @llvm.experimental.constrained.nearbyint.nxv8f16(<vscale x 8 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <vscale x 8 x half> %r @@ -111,9 +111,9 @@ define <vscale x 16 x half> @nearbyint_nxv16f16(<vscale x 16 x half> %v) strictf ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <vscale x 16 x half> @llvm.experimental.constrained.nearbyint.nxv16f16(<vscale x 16 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <vscale x 16 x half> %r @@ -134,9 +134,9 @@ define <vscale x 32 x half> @nearbyint_nxv32f16(<vscale x 32 x half> %v) strictf ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <vscale x 32 x half> @llvm.experimental.constrained.nearbyint.nxv32f16(<vscale x 32 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <vscale x 32 x half> %r @@ -157,9 +157,9 @@ define <vscale x 1 x float> @nearbyint_nxv1f32(<vscale x 1 x float> %v) strictfp ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <vscale x 1 x float> @llvm.experimental.constrained.nearbyint.nxv1f32(<vscale x 1 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <vscale x 1 x float> %r @@ -180,9 +180,9 @@ define <vscale x 2 x float> @nearbyint_nxv2f32(<vscale x 2 x float> %v) strictfp ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <vscale x 2 x float> @llvm.experimental.constrained.nearbyint.nxv2f32(<vscale x 2 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <vscale x 2 x float> %r @@ -203,9 +203,9 @@ define <vscale x 4 x float> @nearbyint_nxv4f32(<vscale x 4 x float> %v) strictfp ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <vscale x 4 x float> @llvm.experimental.constrained.nearbyint.nxv4f32(<vscale x 4 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <vscale x 4 x float> %r @@ -226,9 +226,9 @@ define <vscale x 8 x float> @nearbyint_nxv8f32(<vscale x 8 x float> %v) strictfp ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <vscale x 8 x float> @llvm.experimental.constrained.nearbyint.nxv8f32(<vscale x 8 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <vscale x 8 x float> %r @@ -249,9 +249,9 @@ define <vscale x 16 x float> @nearbyint_nxv16f32(<vscale x 16 x float> %v) stric ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <vscale x 16 x float> @llvm.experimental.constrained.nearbyint.nxv16f32(<vscale x 16 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <vscale x 16 x float> %r @@ -272,9 +272,9 @@ define <vscale x 1 x double> @nearbyint_nxv1f64(<vscale x 1 x double> %v) strict ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <vscale x 1 x double> @llvm.experimental.constrained.nearbyint.nxv1f64(<vscale x 1 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <vscale x 1 x double> %r @@ -295,9 +295,9 @@ define <vscale x 2 x double> @nearbyint_nxv2f64(<vscale x 2 x double> %v) strict ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <vscale x 2 x double> @llvm.experimental.constrained.nearbyint.nxv2f64(<vscale x 2 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <vscale x 2 x double> %r @@ -318,9 +318,9 @@ define <vscale x 4 x double> @nearbyint_nxv4f64(<vscale x 4 x double> %v) strict ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <vscale x 4 x double> @llvm.experimental.constrained.nearbyint.nxv4f64(<vscale x 4 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <vscale x 4 x double> %r @@ -341,9 +341,9 @@ define <vscale x 8 x double> @nearbyint_nxv8f64(<vscale x 8 x double> %v) strict ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <vscale x 8 x double> @llvm.experimental.constrained.nearbyint.nxv8f64(<vscale x 8 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <vscale x 8 x double> %r diff --git a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll index 9aa356b..9e14852 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll @@ -15,9 +15,9 @@ define <vscale x 1 x half> @nearbyint_nxv1f16(<vscale x 1 x half> %x) { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call <vscale x 1 x half> @llvm.nearbyint.nxv1f16(<vscale x 1 x half> %x) ret <vscale x 1 x half> %a @@ -35,9 +35,9 @@ define <vscale x 2 x half> @nearbyint_nxv2f16(<vscale x 2 x half> %x) { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call <vscale x 2 x half> @llvm.nearbyint.nxv2f16(<vscale x 2 x half> %x) ret <vscale x 2 x half> %a @@ -55,9 +55,9 @@ define <vscale x 4 x half> @nearbyint_nxv4f16(<vscale x 4 x half> %x) { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call <vscale x 4 x half> @llvm.nearbyint.nxv4f16(<vscale x 4 x half> %x) ret <vscale x 4 x half> %a @@ -75,9 +75,9 @@ define <vscale x 8 x half> @nearbyint_nxv8f16(<vscale x 8 x half> %x) { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call <vscale x 8 x half> @llvm.nearbyint.nxv8f16(<vscale x 8 x half> %x) ret <vscale x 8 x half> %a @@ -95,9 +95,9 @@ define <vscale x 16 x half> @nearbyint_nxv16f16(<vscale x 16 x half> %x) { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call <vscale x 16 x half> @llvm.nearbyint.nxv16f16(<vscale x 16 x half> %x) ret <vscale x 16 x half> %a @@ -115,9 +115,9 @@ define <vscale x 32 x half> @nearbyint_nxv32f16(<vscale x 32 x half> %x) { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call <vscale x 32 x half> @llvm.nearbyint.nxv32f16(<vscale x 32 x half> %x) ret <vscale x 32 x half> %a @@ -135,9 +135,9 @@ define <vscale x 1 x float> @nearbyint_nxv1f32(<vscale x 1 x float> %x) { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call <vscale x 1 x float> @llvm.nearbyint.nxv1f32(<vscale x 1 x float> %x) ret <vscale x 1 x float> %a @@ -155,9 +155,9 @@ define <vscale x 2 x float> @nearbyint_nxv2f32(<vscale x 2 x float> %x) { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call <vscale x 2 x float> @llvm.nearbyint.nxv2f32(<vscale x 2 x float> %x) ret <vscale x 2 x float> %a @@ -175,9 +175,9 @@ define <vscale x 4 x float> @nearbyint_nxv4f32(<vscale x 4 x float> %x) { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call <vscale x 4 x float> @llvm.nearbyint.nxv4f32(<vscale x 4 x float> %x) ret <vscale x 4 x float> %a @@ -195,9 +195,9 @@ define <vscale x 8 x float> @nearbyint_nxv8f32(<vscale x 8 x float> %x) { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call <vscale x 8 x float> @llvm.nearbyint.nxv8f32(<vscale x 8 x float> %x) ret <vscale x 8 x float> %a @@ -215,9 +215,9 @@ define <vscale x 16 x float> @nearbyint_nxv16f32(<vscale x 16 x float> %x) { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call <vscale x 16 x float> @llvm.nearbyint.nxv16f32(<vscale x 16 x float> %x) ret <vscale x 16 x float> %a @@ -235,9 +235,9 @@ define <vscale x 1 x double> @nearbyint_nxv1f64(<vscale x 1 x double> %x) { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call <vscale x 1 x double> @llvm.nearbyint.nxv1f64(<vscale x 1 x double> %x) ret <vscale x 1 x double> %a @@ -255,9 +255,9 @@ define <vscale x 2 x double> @nearbyint_nxv2f64(<vscale x 2 x double> %x) { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call <vscale x 2 x double> @llvm.nearbyint.nxv2f64(<vscale x 2 x double> %x) ret <vscale x 2 x double> %a @@ -275,9 +275,9 @@ define <vscale x 4 x double> @nearbyint_nxv4f64(<vscale x 4 x double> %x) { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call <vscale x 4 x double> @llvm.nearbyint.nxv4f64(<vscale x 4 x double> %x) ret <vscale x 4 x double> %a @@ -295,9 +295,9 @@ define <vscale x 8 x double> @nearbyint_nxv8f64(<vscale x 8 x double> %x) { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call <vscale x 8 x double> @llvm.nearbyint.nxv8f64(<vscale x 8 x double> %x) ret <vscale x 8 x double> %a diff --git a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll index 277cd7d..249f765 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll @@ -960,108 +960,87 @@ define <vscale x 16 x i64> @fshr_v16i64(<vscale x 16 x i64> %a, <vscale x 16 x i ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 56 +; CHECK-NEXT: li a3, 40 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a3, vlenb ; CHECK-NEXT: slli a1, a3, 3 ; CHECK-NEXT: add a5, a0, a1 +; CHECK-NEXT: srli a6, a3, 3 +; CHECK-NEXT: vsetvli a7, zero, e8, mf4, ta, ma +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: vl8re64.v v16, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: sub a1, a4, a3 +; CHECK-NEXT: sltu a7, a4, a1 +; CHECK-NEXT: addi a7, a7, -1 +; CHECK-NEXT: and a7, a7, a1 +; CHECK-NEXT: li a1, 63 ; CHECK-NEXT: vl8re64.v v8, (a5) ; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: slli a5, a5, 3 +; CHECK-NEXT: slli a5, a5, 5 ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 ; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: srli a5, a3, 3 -; CHECK-NEXT: vsetvli a6, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v0, a5 -; CHECK-NEXT: add a5, a2, a1 -; CHECK-NEXT: sub a1, a4, a3 -; CHECK-NEXT: sltu a6, a4, a1 -; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a6, a6, a1 -; CHECK-NEXT: li a1, 63 -; CHECK-NEXT: vl8re64.v v8, (a5) +; CHECK-NEXT: vslidedown.vx v0, v0, a6 +; CHECK-NEXT: vsetvli zero, a7, e64, m8, ta, ma +; CHECK-NEXT: vand.vx v8, v16, a1, v0.t ; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a7, 40 -; CHECK-NEXT: mul a5, a5, a7 +; CHECK-NEXT: slli a5, a5, 5 ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload +; CHECK-NEXT: vsrl.vv v8, v16, v8, v0.t +; CHECK-NEXT: addi a5, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: vl8re64.v v16, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vl8re64.v v16, (a2) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 48 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 40 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vand.vx v8, v8, a1, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vsrl.vv v16, v16, v8, v0.t +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: slli a5, a5, 3 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vand.vx v16, v8, a1, v0.t +; CHECK-NEXT: vl8re64.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vl8re64.v v8, (a2) ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 40 -; CHECK-NEXT: mul a0, a0, a2 +; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: vand.vx v16, v8, a1, v0.t +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 24 -; CHECK-NEXT: mul a0, a0, a2 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 40 -; CHECK-NEXT: mul a0, a0, a2 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill @@ -1072,66 +1051,50 @@ define <vscale x 16 x i64> @fshr_v16i64(<vscale x 16 x i64> %a, <vscale x 16 x i ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 48 -; CHECK-NEXT: mul a0, a0, a2 +; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vand.vx v8, v8, a1, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 24 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 24 -; CHECK-NEXT: mul a0, a0, a2 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vsrl.vv v8, v8, v16, v0.t +; CHECK-NEXT: vsrl.vv v8, v16, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 24 -; CHECK-NEXT: mul a0, a0, a2 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 48 -; CHECK-NEXT: mul a0, a0, a2 +; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vnot.v v16, v8, v0.t ; CHECK-NEXT: vand.vx v16, v16, a1, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 40 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 56 +; CHECK-NEXT: li a1, 40 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 @@ -1147,109 +1110,89 @@ define <vscale x 16 x i64> @fshl_v16i64(<vscale x 16 x i64> %a, <vscale x 16 x i ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 56 +; CHECK-NEXT: li a3, 40 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a1, a3, 3 -; CHECK-NEXT: add a5, a0, a1 -; CHECK-NEXT: vl8re64.v v8, (a5) -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: slli a5, a5, 4 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: srli a5, a3, 3 +; CHECK-NEXT: slli a5, a3, 3 +; CHECK-NEXT: srli a1, a3, 3 ; CHECK-NEXT: vsetvli a6, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v0, a5 -; CHECK-NEXT: add a5, a2, a1 +; CHECK-NEXT: vslidedown.vx v0, v0, a1 +; CHECK-NEXT: add a1, a2, a5 +; CHECK-NEXT: vl8re64.v v8, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: sub a1, a4, a3 ; CHECK-NEXT: sltu a6, a4, a1 ; CHECK-NEXT: addi a6, a6, -1 ; CHECK-NEXT: and a6, a6, a1 ; CHECK-NEXT: li a1, 63 -; CHECK-NEXT: vl8re64.v v8, (a5) -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a7, 40 -; CHECK-NEXT: mul a5, a5, a7 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: vl8re64.v v16, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a5, 24 -; CHECK-NEXT: mul a0, a0, a5 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vl8re64.v v16, (a2) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 48 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma +; CHECK-NEXT: vand.vx v8, v8, a1, v0.t +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: slli a6, a6, 5 +; CHECK-NEXT: add a6, sp, a6 +; CHECK-NEXT: addi a6, a6, 16 +; CHECK-NEXT: vl8r.v v16, (a6) # Unknown-size Folded Reload +; CHECK-NEXT: vsll.vv v8, v16, v8, v0.t +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: slli a6, a6, 3 +; CHECK-NEXT: add a6, sp, a6 +; CHECK-NEXT: addi a6, a6, 16 +; CHECK-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill +; CHECK-NEXT: add a5, a0, a5 +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: slli a6, a6, 4 +; CHECK-NEXT: add a6, sp, a6 +; CHECK-NEXT: addi a6, a6, 16 +; CHECK-NEXT: vl8r.v v8, (a6) # Unknown-size Folded Reload +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vl8re64.v v16, (a5) +; CHECK-NEXT: vand.vx v8, v8, a1, v0.t +; CHECK-NEXT: addi a5, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: vl8re64.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 40 -; CHECK-NEXT: mul a0, a0, a2 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vand.vx v8, v8, a1, v0.t -; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vl8re64.v v8, (a2) ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vsrl.vi v16, v16, 1, v0.t ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vsll.vv v16, v16, v8, v0.t +; CHECK-NEXT: vsrl.vv v16, v16, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 40 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: vand.vx v16, v8, a1, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vsrl.vi v8, v8, 1, v0.t -; CHECK-NEXT: vsrl.vv v8, v8, v16, v0.t +; CHECK-NEXT: vor.vv v8, v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vor.vv v8, v16, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 40 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a4, a3, .LBB47_2 ; CHECK-NEXT: # %bb.1: @@ -1258,63 +1201,56 @@ define <vscale x 16 x i64> @fshl_v16i64(<vscale x 16 x i64> %a, <vscale x 16 x i ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 48 -; CHECK-NEXT: mul a0, a0, a2 +; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vand.vx v8, v8, a1, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a0, a0, a2 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsll.vv v8, v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a0, a0, a2 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vsll.vv v16, v16, v8, v0.t +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 48 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vnot.v v16, v8, v0.t -; CHECK-NEXT: vand.vx v16, v16, a1, v0.t +; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: vand.vx v16, v8, a1, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsrl.vi v8, v8, 1, v0.t ; CHECK-NEXT: vsrl.vv v8, v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vor.vv v8, v16, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 40 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 56 +; CHECK-NEXT: li a1, 40 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll index e12f1cf..e260ae5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll @@ -1216,35 +1216,20 @@ define void @mgather_nxv16i64(<vscale x 8 x ptr> %ptrs0, <vscale x 8 x ptr> %ptr ; ; RV64-LABEL: mgather_nxv16i64: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 3 -; RV64-NEXT: sub sp, sp, a3 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; RV64-NEXT: vl8re64.v v24, (a0) -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV64-NEXT: vmv8r.v v16, v8 -; RV64-NEXT: vl8re64.v v8, (a1) ; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; RV64-NEXT: vluxei64.v v24, (zero), v16, v0.t +; RV64-NEXT: vluxei64.v v24, (zero), v8, v0.t +; RV64-NEXT: vl8re64.v v8, (a1) ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: srli a1, a0, 3 ; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vluxei64.v v8, (zero), v16, v0.t ; RV64-NEXT: slli a0, a0, 3 ; RV64-NEXT: add a0, a2, a0 ; RV64-NEXT: vs8r.v v8, (a0) ; RV64-NEXT: vs8r.v v24, (a2) -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %p0 = call <vscale x 16 x ptr> @llvm.vector.insert.nxv8p0.nxv16p0(<vscale x 16 x ptr> undef, <vscale x 8 x ptr> %ptrs0, i64 0) %p1 = call <vscale x 16 x ptr> @llvm.vector.insert.nxv8p0.nxv16p0(<vscale x 16 x ptr> %p0, <vscale x 8 x ptr> %ptrs1, i64 8) diff --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll index 0e09f59..fc8fdf4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll @@ -1714,8 +1714,8 @@ define void @mscatter_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double ; RV64-NEXT: vl8re64.v v24, (a0) ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV64-NEXT: vl8re64.v v16, (a1) ; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV64-NEXT: vl8re64.v v16, (a1) ; RV64-NEXT: vsoxei64.v v8, (zero), v24, v0.t ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: srli a0, a0, 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll index 8bc2334..ebe8981 100644 --- a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll @@ -23,9 +23,9 @@ define <vscale x 1 x half> @vp_nearbyint_nxv1f16(<vscale x 1 x half> %va, <vscal ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv1f16: @@ -42,11 +42,11 @@ define <vscale x 1 x half> @vp_nearbyint_nxv1f16(<vscale x 1 x half> %va, <vscal ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t -; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: ret %v = call <vscale x 1 x half> @llvm.vp.nearbyint.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl) ret <vscale x 1 x half> %v @@ -63,9 +63,9 @@ define <vscale x 1 x half> @vp_nearbyint_nxv1f16_unmasked(<vscale x 1 x half> %v ; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv1f16_unmasked: @@ -80,11 +80,11 @@ define <vscale x 1 x half> @vp_nearbyint_nxv1f16_unmasked(<vscale x 1 x half> %v ; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t -; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: ret %v = call <vscale x 1 x half> @llvm.vp.nearbyint.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) ret <vscale x 1 x half> %v @@ -105,9 +105,9 @@ define <vscale x 2 x half> @vp_nearbyint_nxv2f16(<vscale x 2 x half> %va, <vscal ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv2f16: @@ -124,11 +124,11 @@ define <vscale x 2 x half> @vp_nearbyint_nxv2f16(<vscale x 2 x half> %va, <vscal ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t -; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: ret %v = call <vscale x 2 x half> @llvm.vp.nearbyint.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl) ret <vscale x 2 x half> %v @@ -145,9 +145,9 @@ define <vscale x 2 x half> @vp_nearbyint_nxv2f16_unmasked(<vscale x 2 x half> %v ; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv2f16_unmasked: @@ -162,11 +162,11 @@ define <vscale x 2 x half> @vp_nearbyint_nxv2f16_unmasked(<vscale x 2 x half> %v ; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t -; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: ret %v = call <vscale x 2 x half> @llvm.vp.nearbyint.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) ret <vscale x 2 x half> %v @@ -187,9 +187,9 @@ define <vscale x 4 x half> @vp_nearbyint_nxv4f16(<vscale x 4 x half> %va, <vscal ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv4f16: @@ -208,11 +208,11 @@ define <vscale x 4 x half> @vp_nearbyint_nxv4f16(<vscale x 4 x half> %va, <vscal ; ZVFHMIN-NEXT: vmv1r.v v0, v9 ; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t -; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: ret %v = call <vscale x 4 x half> @llvm.vp.nearbyint.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl) ret <vscale x 4 x half> %v @@ -229,9 +229,9 @@ define <vscale x 4 x half> @vp_nearbyint_nxv4f16_unmasked(<vscale x 4 x half> %v ; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv4f16_unmasked: @@ -246,11 +246,11 @@ define <vscale x 4 x half> @vp_nearbyint_nxv4f16_unmasked(<vscale x 4 x half> %v ; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t -; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: ret %v = call <vscale x 4 x half> @llvm.vp.nearbyint.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) ret <vscale x 4 x half> %v @@ -273,9 +273,9 @@ define <vscale x 8 x half> @vp_nearbyint_nxv8f16(<vscale x 8 x half> %va, <vscal ; ZVFH-NEXT: vmv1r.v v0, v10 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv8f16: @@ -294,11 +294,11 @@ define <vscale x 8 x half> @vp_nearbyint_nxv8f16(<vscale x 8 x half> %va, <vscal ; ZVFHMIN-NEXT: vmv1r.v v0, v10 ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t -; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: ret %v = call <vscale x 8 x half> @llvm.vp.nearbyint.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x half> %v @@ -315,9 +315,9 @@ define <vscale x 8 x half> @vp_nearbyint_nxv8f16_unmasked(<vscale x 8 x half> %v ; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv8f16_unmasked: @@ -332,11 +332,11 @@ define <vscale x 8 x half> @vp_nearbyint_nxv8f16_unmasked(<vscale x 8 x half> %v ; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t -; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: ret %v = call <vscale x 8 x half> @llvm.vp.nearbyint.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) ret <vscale x 8 x half> %v @@ -359,9 +359,9 @@ define <vscale x 16 x half> @vp_nearbyint_nxv16f16(<vscale x 16 x half> %va, <vs ; ZVFH-NEXT: vmv1r.v v0, v12 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv16f16: @@ -380,11 +380,11 @@ define <vscale x 16 x half> @vp_nearbyint_nxv16f16(<vscale x 16 x half> %va, <vs ; ZVFHMIN-NEXT: vmv1r.v v0, v12 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t -; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: ret %v = call <vscale x 16 x half> @llvm.vp.nearbyint.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl) ret <vscale x 16 x half> %v @@ -401,9 +401,9 @@ define <vscale x 16 x half> @vp_nearbyint_nxv16f16_unmasked(<vscale x 16 x half> ; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv16f16_unmasked: @@ -418,11 +418,11 @@ define <vscale x 16 x half> @vp_nearbyint_nxv16f16_unmasked(<vscale x 16 x half> ; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t -; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: ret %v = call <vscale x 16 x half> @llvm.vp.nearbyint.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) ret <vscale x 16 x half> %v @@ -445,9 +445,9 @@ define <vscale x 32 x half> @vp_nearbyint_nxv32f16(<vscale x 32 x half> %va, <vs ; ZVFH-NEXT: vmv1r.v v0, v16 ; ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv32f16: @@ -458,7 +458,7 @@ define <vscale x 32 x half> @vp_nearbyint_nxv32f16(<vscale x 32 x half> %va, <vs ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: vmv1r.v v7, v0 +; ZVFHMIN-NEXT: vmv1r.v v16, v0 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -488,29 +488,30 @@ define <vscale x 32 x half> @vp_nearbyint_nxv32f16(<vscale x 32 x half> %va, <vs ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 +; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v24 ; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB10_2: ; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vfabs.v v16, v24, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v7, v16, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v16, v8, fa5, v0.t ; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v24, v0.t -; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t -; ZVFHMIN-NEXT: fsflags a0 +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vfsgnj.vv v24, v16, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24 +; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v24 +; ZVFHMIN-NEXT: fsflags a0 +; ZVFHMIN-NEXT: vmv8r.v v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add sp, sp, a0 @@ -531,9 +532,9 @@ define <vscale x 32 x half> @vp_nearbyint_nxv32f16_unmasked(<vscale x 32 x half> ; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv32f16_unmasked: @@ -589,11 +590,11 @@ define <vscale x 32 x half> @vp_nearbyint_nxv32f16_unmasked(<vscale x 32 x half> ; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t -; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add sp, sp, a0 @@ -618,9 +619,9 @@ define <vscale x 1 x float> @vp_nearbyint_nxv1f32(<vscale x 1 x float> %va, <vsc ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <vscale x 1 x float> @llvm.vp.nearbyint.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 %evl) ret <vscale x 1 x float> %v @@ -637,9 +638,9 @@ define <vscale x 1 x float> @vp_nearbyint_nxv1f32_unmasked(<vscale x 1 x float> ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <vscale x 1 x float> @llvm.vp.nearbyint.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) ret <vscale x 1 x float> %v @@ -660,9 +661,9 @@ define <vscale x 2 x float> @vp_nearbyint_nxv2f32(<vscale x 2 x float> %va, <vsc ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <vscale x 2 x float> @llvm.vp.nearbyint.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl) ret <vscale x 2 x float> %v @@ -679,9 +680,9 @@ define <vscale x 2 x float> @vp_nearbyint_nxv2f32_unmasked(<vscale x 2 x float> ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <vscale x 2 x float> @llvm.vp.nearbyint.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) ret <vscale x 2 x float> %v @@ -704,9 +705,9 @@ define <vscale x 4 x float> @vp_nearbyint_nxv4f32(<vscale x 4 x float> %va, <vsc ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <vscale x 4 x float> @llvm.vp.nearbyint.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl) ret <vscale x 4 x float> %v @@ -723,9 +724,9 @@ define <vscale x 4 x float> @vp_nearbyint_nxv4f32_unmasked(<vscale x 4 x float> ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <vscale x 4 x float> @llvm.vp.nearbyint.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) ret <vscale x 4 x float> %v @@ -748,9 +749,9 @@ define <vscale x 8 x float> @vp_nearbyint_nxv8f32(<vscale x 8 x float> %va, <vsc ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <vscale x 8 x float> @llvm.vp.nearbyint.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x float> %v @@ -767,9 +768,9 @@ define <vscale x 8 x float> @vp_nearbyint_nxv8f32_unmasked(<vscale x 8 x float> ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <vscale x 8 x float> @llvm.vp.nearbyint.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) ret <vscale x 8 x float> %v @@ -792,9 +793,9 @@ define <vscale x 16 x float> @vp_nearbyint_nxv16f32(<vscale x 16 x float> %va, < ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <vscale x 16 x float> @llvm.vp.nearbyint.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl) ret <vscale x 16 x float> %v @@ -811,9 +812,9 @@ define <vscale x 16 x float> @vp_nearbyint_nxv16f32_unmasked(<vscale x 16 x floa ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <vscale x 16 x float> @llvm.vp.nearbyint.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) ret <vscale x 16 x float> %v @@ -834,9 +835,9 @@ define <vscale x 1 x double> @vp_nearbyint_nxv1f64(<vscale x 1 x double> %va, <v ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <vscale x 1 x double> @llvm.vp.nearbyint.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl) ret <vscale x 1 x double> %v @@ -853,9 +854,9 @@ define <vscale x 1 x double> @vp_nearbyint_nxv1f64_unmasked(<vscale x 1 x double ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <vscale x 1 x double> @llvm.vp.nearbyint.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) ret <vscale x 1 x double> %v @@ -878,9 +879,9 @@ define <vscale x 2 x double> @vp_nearbyint_nxv2f64(<vscale x 2 x double> %va, <v ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <vscale x 2 x double> @llvm.vp.nearbyint.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl) ret <vscale x 2 x double> %v @@ -897,9 +898,9 @@ define <vscale x 2 x double> @vp_nearbyint_nxv2f64_unmasked(<vscale x 2 x double ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <vscale x 2 x double> @llvm.vp.nearbyint.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) ret <vscale x 2 x double> %v @@ -922,9 +923,9 @@ define <vscale x 4 x double> @vp_nearbyint_nxv4f64(<vscale x 4 x double> %va, <v ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <vscale x 4 x double> @llvm.vp.nearbyint.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl) ret <vscale x 4 x double> %v @@ -941,9 +942,9 @@ define <vscale x 4 x double> @vp_nearbyint_nxv4f64_unmasked(<vscale x 4 x double ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <vscale x 4 x double> @llvm.vp.nearbyint.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) ret <vscale x 4 x double> %v @@ -966,9 +967,9 @@ define <vscale x 7 x double> @vp_nearbyint_nxv7f64(<vscale x 7 x double> %va, <v ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <vscale x 7 x double> @llvm.vp.nearbyint.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl) ret <vscale x 7 x double> %v @@ -985,9 +986,9 @@ define <vscale x 7 x double> @vp_nearbyint_nxv7f64_unmasked(<vscale x 7 x double ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <vscale x 7 x double> @llvm.vp.nearbyint.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> splat (i1 true), i32 %evl) ret <vscale x 7 x double> %v @@ -1010,9 +1011,9 @@ define <vscale x 8 x double> @vp_nearbyint_nxv8f64(<vscale x 8 x double> %va, <v ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <vscale x 8 x double> @llvm.vp.nearbyint.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x double> %v @@ -1029,9 +1030,9 @@ define <vscale x 8 x double> @vp_nearbyint_nxv8f64_unmasked(<vscale x 8 x double ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <vscale x 8 x double> @llvm.vp.nearbyint.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) ret <vscale x 8 x double> %v @@ -1046,16 +1047,15 @@ define <vscale x 16 x double> @vp_nearbyint_nxv16f64(<vscale x 16 x double> %va, ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: vmv8r.v v24, v16 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v25, v0, a2 +; CHECK-NEXT: vslidedown.vx v6, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 @@ -1063,60 +1063,41 @@ define <vscale x 16 x double> @vp_nearbyint_nxv16f64(<vscale x 16 x double> %va, ; CHECK-NEXT: lui a3, %hi(.LCPI32_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3) ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vfabs.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v6, v16, fa5, v0.t ; CHECK-NEXT: frflags a2 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: fsflags a2 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB32_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB32_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -1153,9 +1134,9 @@ define <vscale x 16 x double> @vp_nearbyint_nxv16f64_unmasked(<vscale x 16 x dou ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <vscale x 16 x double> @llvm.vp.nearbyint.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) ret <vscale x 16 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll index 897bfde..cc96739 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll @@ -2203,17 +2203,17 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal ; ZVFH-NEXT: add a1, sp, a1 ; ZVFH-NEXT: addi a1, a1, 16 ; ZVFH-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; ZVFH-NEXT: csrr a1, vlenb -; ZVFH-NEXT: vsetvli a3, zero, e8, m1, ta, ma -; ZVFH-NEXT: slli a3, a1, 3 -; ZVFH-NEXT: add a3, a0, a3 -; ZVFH-NEXT: vl8re16.v v8, (a3) -; ZVFH-NEXT: slli a3, a1, 2 +; ZVFH-NEXT: csrr a3, vlenb +; ZVFH-NEXT: srli a1, a3, 1 +; ZVFH-NEXT: slli a4, a3, 3 +; ZVFH-NEXT: add a4, a0, a4 +; ZVFH-NEXT: vl8re16.v v8, (a4) +; ZVFH-NEXT: slli a3, a3, 2 ; ZVFH-NEXT: sub a4, a2, a3 ; ZVFH-NEXT: sltu a5, a2, a4 ; ZVFH-NEXT: addi a5, a5, -1 ; ZVFH-NEXT: and a4, a5, a4 -; ZVFH-NEXT: srli a1, a1, 1 +; ZVFH-NEXT: vsetvli a5, zero, e8, m1, ta, ma ; ZVFH-NEXT: vl8re16.v v0, (a0) ; ZVFH-NEXT: addi a0, sp, 16 ; ZVFH-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill @@ -2249,152 +2249,133 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a3, 34 -; ZVFHMIN-NEXT: mul a1, a1, a3 +; ZVFHMIN-NEXT: slli a1, a1, 5 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb -; ZVFHMIN-NEXT: vmv8r.v v24, v16 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a3, 18 -; ZVFHMIN-NEXT: mul a1, a1, a3 +; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; ZVFHMIN-NEXT: slli a1, a3, 3 -; ZVFHMIN-NEXT: add a1, a0, a1 -; ZVFHMIN-NEXT: vl8re16.v v16, (a1) +; ZVFHMIN-NEXT: srli a1, a3, 1 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m1, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v17, v0 +; ZVFHMIN-NEXT: vslidedown.vx v18, v0, a1 +; ZVFHMIN-NEXT: slli a4, a3, 3 +; ZVFHMIN-NEXT: add a4, a0, a4 +; ZVFHMIN-NEXT: vl8re16.v v0, (a4) ; ZVFHMIN-NEXT: slli a5, a3, 2 -; ZVFHMIN-NEXT: sub a1, a2, a5 -; ZVFHMIN-NEXT: sltu a4, a2, a1 -; ZVFHMIN-NEXT: addi a4, a4, -1 -; ZVFHMIN-NEXT: and a6, a4, a1 +; ZVFHMIN-NEXT: sub a4, a2, a5 +; ZVFHMIN-NEXT: sltu a6, a2, a4 +; ZVFHMIN-NEXT: addi a6, a6, -1 +; ZVFHMIN-NEXT: and a6, a6, a4 ; ZVFHMIN-NEXT: slli a4, a3, 1 -; ZVFHMIN-NEXT: sub a1, a6, a4 -; ZVFHMIN-NEXT: sltu a7, a6, a1 -; ZVFHMIN-NEXT: addi a7, a7, -1 -; ZVFHMIN-NEXT: and a7, a7, a1 -; ZVFHMIN-NEXT: srli a1, a3, 1 -; ZVFHMIN-NEXT: csrr t0, vlenb -; ZVFHMIN-NEXT: add t0, sp, t0 -; ZVFHMIN-NEXT: addi t0, t0, 16 -; ZVFHMIN-NEXT: vs1r.v v0, (t0) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vslidedown.vx v7, v0, a1 +; ZVFHMIN-NEXT: sub a7, a6, a4 +; ZVFHMIN-NEXT: sltu t0, a6, a7 +; ZVFHMIN-NEXT: addi t0, t0, -1 +; ZVFHMIN-NEXT: and a7, t0, a7 ; ZVFHMIN-NEXT: srli a3, a3, 2 +; ZVFHMIN-NEXT: vsetvli t0, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v16, v18, a3 +; ZVFHMIN-NEXT: vsetvli t0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vl8re16.v v8, (a0) ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li t0, 26 +; ZVFHMIN-NEXT: li t0, 24 ; ZVFHMIN-NEXT: mul a0, a0, t0 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vslidedown.vx v0, v7, a3 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 1 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 -; ZVFHMIN-NEXT: vmv4r.v v16, v24 +; ZVFHMIN-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li t0, 10 -; ZVFHMIN-NEXT: mul a0, a0, t0 +; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28 +; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4 ; ZVFHMIN-NEXT: vsetvli zero, a7, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v26, v16, v8, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vmfeq.vv v20, v8, v24, v0.t ; ZVFHMIN-NEXT: bltu a6, a4, .LBB85_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a6, a4 ; ZVFHMIN-NEXT: .LBB85_2: ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 1 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a7, 10 -; ZVFHMIN-NEXT: mul a0, a0, a7 +; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 ; ZVFHMIN-NEXT: vsetvli zero, a6, e32, m8, ta, ma -; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmfeq.vv v6, v16, v8, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v18 +; ZVFHMIN-NEXT: vmfeq.vv v6, v24, v8, v0.t ; ZVFHMIN-NEXT: add a0, a3, a3 ; ZVFHMIN-NEXT: bltu a2, a5, .LBB85_4 ; ZVFHMIN-NEXT: # %bb.3: ; ZVFHMIN-NEXT: mv a2, a5 ; ZVFHMIN-NEXT: .LBB85_4: ; ZVFHMIN-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslideup.vx v6, v26, a3 +; ZVFHMIN-NEXT: vslideup.vx v6, v20, a3 ; ZVFHMIN-NEXT: sub a5, a2, a4 ; ZVFHMIN-NEXT: sltu a6, a2, a5 ; ZVFHMIN-NEXT: addi a6, a6, -1 ; ZVFHMIN-NEXT: and a5, a6, a5 ; ZVFHMIN-NEXT: vsetvli a6, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: csrr a6, vlenb -; ZVFHMIN-NEXT: add a6, sp, a6 -; ZVFHMIN-NEXT: addi a6, a6, 16 -; ZVFHMIN-NEXT: vl1r.v v8, (a6) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmv1r.v v7, v8 -; ZVFHMIN-NEXT: vslidedown.vx v0, v8, a3 +; ZVFHMIN-NEXT: vmv1r.v v7, v17 +; ZVFHMIN-NEXT: vslidedown.vx v0, v17, a3 ; ZVFHMIN-NEXT: vsetvli a6, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a6, vlenb -; ZVFHMIN-NEXT: li a7, 18 -; ZVFHMIN-NEXT: mul a6, a6, a7 +; ZVFHMIN-NEXT: slli a6, a6, 3 ; ZVFHMIN-NEXT: add a6, sp, a6 ; ZVFHMIN-NEXT: addi a6, a6, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a6) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28 +; ZVFHMIN-NEXT: vl8r.v v16, (a6) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 ; ZVFHMIN-NEXT: csrr a6, vlenb -; ZVFHMIN-NEXT: li a7, 10 -; ZVFHMIN-NEXT: mul a6, a6, a7 +; ZVFHMIN-NEXT: slli a6, a6, 4 ; ZVFHMIN-NEXT: add a6, sp, a6 ; ZVFHMIN-NEXT: addi a6, a6, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a6, vlenb -; ZVFHMIN-NEXT: li a7, 26 +; ZVFHMIN-NEXT: li a7, 24 ; ZVFHMIN-NEXT: mul a6, a6, a7 ; ZVFHMIN-NEXT: add a6, sp, a6 ; ZVFHMIN-NEXT: addi a6, a6, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a6) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 +; ZVFHMIN-NEXT: vl8r.v v24, (a6) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28 ; ZVFHMIN-NEXT: vsetvli zero, a5, e32, m8, ta, ma ; ZVFHMIN-NEXT: csrr a5, vlenb -; ZVFHMIN-NEXT: li a6, 10 -; ZVFHMIN-NEXT: mul a5, a5, a6 +; ZVFHMIN-NEXT: slli a5, a5, 4 ; ZVFHMIN-NEXT: add a5, sp, a5 ; ZVFHMIN-NEXT: addi a5, a5, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmfeq.vv v5, v16, v8, v0.t +; ZVFHMIN-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vmfeq.vv v5, v24, v8, v0.t ; ZVFHMIN-NEXT: bltu a2, a4, .LBB85_6 ; ZVFHMIN-NEXT: # %bb.5: ; ZVFHMIN-NEXT: mv a2, a4 ; ZVFHMIN-NEXT: .LBB85_6: ; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 ; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: li a5, 26 +; ZVFHMIN-NEXT: li a5, 24 ; ZVFHMIN-NEXT: mul a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v24, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v24, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslideup.vx v8, v5, a3 ; ZVFHMIN-NEXT: add a0, a1, a1 @@ -2402,8 +2383,7 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal ; ZVFHMIN-NEXT: vslideup.vx v8, v6, a1 ; ZVFHMIN-NEXT: vmv.v.v v0, v8 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 34 -; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: slli a0, a0, 5 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret @@ -3494,109 +3474,93 @@ define <vscale x 32 x i1> @fcmp_oeq_vv_nxv32f64(<vscale x 32 x double> %va, <vsc ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 48 +; CHECK-NEXT: li a3, 24 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 40 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a3, vlenb ; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul t2, a3, a1 -; CHECK-NEXT: slli t1, a3, 3 +; CHECK-NEXT: slli a7, a3, 3 ; CHECK-NEXT: srli a4, a3, 2 ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v7, v0, a4 ; CHECK-NEXT: srli a1, a3, 3 -; CHECK-NEXT: vsetvli a5, zero, e8, mf4, ta, ma -; CHECK-NEXT: add a5, a2, t1 -; CHECK-NEXT: vl8re64.v v8, (a5) ; CHECK-NEXT: slli t0, a3, 4 +; CHECK-NEXT: add a5, a2, a7 +; CHECK-NEXT: vl8re64.v v16, (a5) +; CHECK-NEXT: vsetvli a5, zero, e8, mf4, ta, ma ; CHECK-NEXT: slli a5, a3, 1 ; CHECK-NEXT: vslidedown.vx v0, v0, a1 -; CHECK-NEXT: mv a7, a6 +; CHECK-NEXT: mv t1, a6 ; CHECK-NEXT: bltu a6, a5, .LBB171_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a7, a5 +; CHECK-NEXT: mv t1, a5 ; CHECK-NEXT: .LBB171_2: ; CHECK-NEXT: add t2, a2, t2 -; CHECK-NEXT: add t1, a0, t1 +; CHECK-NEXT: add a7, a0, a7 ; CHECK-NEXT: add t0, a2, t0 -; CHECK-NEXT: vl8re64.v v16, (a2) -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 4 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: sub a2, a7, a3 -; CHECK-NEXT: sltu t3, a7, a2 +; CHECK-NEXT: vl8re64.v v8, (a2) +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: sub a2, t1, a3 +; CHECK-NEXT: sltu t3, t1, a2 ; CHECK-NEXT: addi t3, t3, -1 ; CHECK-NEXT: and a2, t3, a2 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 5 +; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v6, v16, v8, v0.t -; CHECK-NEXT: bltu a7, a3, .LBB171_4 +; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v6, v8, v16, v0.t +; CHECK-NEXT: bltu t1, a3, .LBB171_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a7, a3 +; CHECK-NEXT: mv t1, a3 ; CHECK-NEXT: .LBB171_4: -; CHECK-NEXT: vl8re64.v v8, (t2) +; CHECK-NEXT: vl8re64.v v16, (t2) ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 5 +; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vl8re64.v v8, (t1) +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v18, v7, a1 +; CHECK-NEXT: vsetvli zero, t1, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li t1, 24 -; CHECK-NEXT: mul a2, a2, t1 +; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v18, v7, a1 -; CHECK-NEXT: vl8re64.v v8, (t0) +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v17, v24, v8, v0.t +; CHECK-NEXT: vl8re64.v v8, (a7) ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vl8re64.v v8, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a7, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 40 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v17, v24, v8, v0.t -; CHECK-NEXT: add a0, a1, a1 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma -; CHECK-NEXT: sub a0, a6, a5 -; CHECK-NEXT: sltu a2, a6, a0 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a0, a2, a0 +; CHECK-NEXT: vl8re64.v v8, (t0) +; CHECK-NEXT: add a2, a1, a1 +; CHECK-NEXT: vsetvli zero, a2, e8, mf2, tu, ma +; CHECK-NEXT: sub a2, a6, a5 +; CHECK-NEXT: sltu a5, a6, a2 +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: addi a0, a5, -1 +; CHECK-NEXT: and a0, a0, a2 ; CHECK-NEXT: vslideup.vx v17, v6, a1 ; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: bltu a0, a3, .LBB171_6 @@ -3605,13 +3569,6 @@ define <vscale x 32 x i1> @fcmp_oeq_vv_nxv32f64(<vscale x 32 x double> %va, <vsc ; CHECK-NEXT: .LBB171_6: ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vmfeq.vv v16, v24, v8, v0.t ; CHECK-NEXT: add a2, a4, a1 ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, tu, ma @@ -3623,13 +3580,12 @@ define <vscale x 32 x i1> @fcmp_oeq_vv_nxv32f64(<vscale x 32 x double> %va, <vsc ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v18 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 24 -; CHECK-NEXT: mul a0, a0, a2 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -3641,7 +3597,7 @@ define <vscale x 32 x i1> @fcmp_oeq_vv_nxv32f64(<vscale x 32 x double> %va, <vsc ; CHECK-NEXT: vslideup.vx v17, v16, a0 ; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 48 +; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll index 7fd77c0..85f5ffd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll @@ -1092,7 +1092,7 @@ define <vscale x 128 x i1> @icmp_eq_vv_nxv128i8(<vscale x 128 x i8> %va, <vscale ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 @@ -1102,23 +1102,23 @@ define <vscale x 128 x i1> @icmp_eq_vv_nxv128i8(<vscale x 128 x i8> %va, <vscale ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a4, a0, a1 ; CHECK-NEXT: vl8r.v v8, (a4) -; CHECK-NEXT: vl8r.v v0, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: sub a0, a3, a1 +; CHECK-NEXT: vsetvli a4, zero, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a2) -; CHECK-NEXT: sltu a2, a3, a0 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a0, a2, a0 -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vmseq.vv v7, v16, v8, v0.t +; CHECK-NEXT: sub a2, a3, a1 +; CHECK-NEXT: sltu a4, a3, a2 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: vl8r.v v24, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: and a2, a4, a2 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-NEXT: vmseq.vv v6, v16, v8, v0.t ; CHECK-NEXT: bltu a3, a1, .LBB96_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a3, a1 ; CHECK-NEXT: .LBB96_2: ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 @@ -1128,7 +1128,7 @@ define <vscale x 128 x i1> @icmp_eq_vv_nxv128i8(<vscale x 128 x i8> %va, <vscale ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmseq.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmv1r.v v8, v7 +; CHECK-NEXT: vmv1r.v v8, v6 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 @@ -2248,17 +2248,17 @@ define <vscale x 32 x i1> @icmp_eq_vv_nxv32i32(<vscale x 32 x i32> %va, <vscale ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma -; CHECK-NEXT: slli a3, a1, 3 -; CHECK-NEXT: add a3, a0, a3 -; CHECK-NEXT: vl8re32.v v8, (a3) -; CHECK-NEXT: slli a3, a1, 1 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: srli a1, a3, 2 +; CHECK-NEXT: slli a4, a3, 3 +; CHECK-NEXT: add a4, a0, a4 +; CHECK-NEXT: vl8re32.v v8, (a4) +; CHECK-NEXT: slli a3, a3, 1 ; CHECK-NEXT: sub a4, a2, a3 ; CHECK-NEXT: sltu a5, a2, a4 ; CHECK-NEXT: addi a5, a5, -1 ; CHECK-NEXT: and a4, a5, a4 -; CHECK-NEXT: srli a1, a1, 2 +; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma ; CHECK-NEXT: vl8re32.v v0, (a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill diff --git a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll index a41c262..0f47236 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll @@ -1125,22 +1125,22 @@ define <vscale x 16 x double> @vfma_vv_nxv16f64(<vscale x 16 x double> %va, <vsc ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: slli a3, a1, 3 -; CHECK-NEXT: add a5, a2, a3 +; CHECK-NEXT: srli a3, a1, 3 +; CHECK-NEXT: slli a5, a1, 3 +; CHECK-NEXT: add a6, a2, a5 +; CHECK-NEXT: vl8re64.v v8, (a6) +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: slli a6, a6, 4 +; CHECK-NEXT: add a6, sp, a6 +; CHECK-NEXT: addi a6, a6, 16 +; CHECK-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill +; CHECK-NEXT: sub a6, a4, a1 +; CHECK-NEXT: sltu a7, a4, a6 +; CHECK-NEXT: addi a7, a7, -1 +; CHECK-NEXT: and a6, a7, a6 +; CHECK-NEXT: vsetvli a7, zero, e8, mf4, ta, ma +; CHECK-NEXT: add a5, a0, a5 ; CHECK-NEXT: vl8re64.v v8, (a5) -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: slli a5, a5, 4 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: sub a5, a4, a1 -; CHECK-NEXT: sltu a6, a4, a5 -; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a5, a6, a5 -; CHECK-NEXT: srli a6, a1, 3 -; CHECK-NEXT: add a3, a0, a3 -; CHECK-NEXT: vl8re64.v v8, (a3) ; CHECK-NEXT: vl8re64.v v16, (a2) ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 @@ -1150,8 +1150,8 @@ define <vscale x 16 x double> @vfma_vv_nxv16f64(<vscale x 16 x double> %va, <vsc ; CHECK-NEXT: vl8re64.v v16, (a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vslidedown.vx v0, v0, a6 -; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a3 +; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a2, 24 ; CHECK-NEXT: mul a0, a0, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll index 292f277..bacf9ba 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll @@ -1125,22 +1125,22 @@ define <vscale x 16 x double> @vfma_vv_nxv16f64(<vscale x 16 x double> %va, <vsc ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: slli a3, a1, 3 -; CHECK-NEXT: add a5, a2, a3 +; CHECK-NEXT: srli a3, a1, 3 +; CHECK-NEXT: slli a5, a1, 3 +; CHECK-NEXT: add a6, a2, a5 +; CHECK-NEXT: vl8re64.v v8, (a6) +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: slli a6, a6, 4 +; CHECK-NEXT: add a6, sp, a6 +; CHECK-NEXT: addi a6, a6, 16 +; CHECK-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill +; CHECK-NEXT: sub a6, a4, a1 +; CHECK-NEXT: sltu a7, a4, a6 +; CHECK-NEXT: addi a7, a7, -1 +; CHECK-NEXT: and a6, a7, a6 +; CHECK-NEXT: vsetvli a7, zero, e8, mf4, ta, ma +; CHECK-NEXT: add a5, a0, a5 ; CHECK-NEXT: vl8re64.v v8, (a5) -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: slli a5, a5, 4 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: sub a5, a4, a1 -; CHECK-NEXT: sltu a6, a4, a5 -; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a5, a6, a5 -; CHECK-NEXT: srli a6, a1, 3 -; CHECK-NEXT: add a3, a0, a3 -; CHECK-NEXT: vl8re64.v v8, (a3) ; CHECK-NEXT: vl8re64.v v16, (a2) ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 @@ -1150,8 +1150,8 @@ define <vscale x 16 x double> @vfma_vv_nxv16f64(<vscale x 16 x double> %va, <vsc ; CHECK-NEXT: vl8re64.v v16, (a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vslidedown.vx v0, v0, a6 -; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a3 +; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a2, 24 ; CHECK-NEXT: mul a0, a0, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll index 76efdda..26f7c56 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll @@ -369,14 +369,14 @@ define <vscale x 128 x i8> @vpmerge_vv_nxv128i8(<vscale x 128 x i8> %va, <vscale ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a4, a0, a1 ; CHECK-NEXT: vl8r.v v16, (a4) -; CHECK-NEXT: vl8r.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: sub a0, a3, a1 +; CHECK-NEXT: vsetvli a4, zero, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a2) -; CHECK-NEXT: sltu a2, a3, a0 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a0, a2, a0 -; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, ma +; CHECK-NEXT: sub a2, a3, a1 +; CHECK-NEXT: sltu a4, a3, a2 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: vl8r.v v8, (a0) +; CHECK-NEXT: and a2, a4, a2 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, tu, ma ; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 ; CHECK-NEXT: bltu a3, a1, .LBB28_2 ; CHECK-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll index 0a5e501..312378d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll @@ -373,10 +373,10 @@ define <vscale x 32 x i32> @select_nxv32i32(<vscale x 32 x i1> %a, <vscale x 32 ; CHECK-NEXT: addi a5, a5, -1 ; CHECK-NEXT: and a4, a5, a4 ; CHECK-NEXT: srli a3, a3, 2 +; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma ; CHECK-NEXT: vl8re32.v v0, (a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v24, a3 ; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 @@ -430,10 +430,10 @@ define <vscale x 32 x i32> @select_evl_nxv32i32(<vscale x 32 x i1> %a, <vscale x ; CHECK-NEXT: addi a4, a4, -1 ; CHECK-NEXT: and a3, a4, a3 ; CHECK-NEXT: srli a4, a1, 2 +; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma ; CHECK-NEXT: vl8re32.v v0, (a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v24, a4 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 @@ -716,10 +716,10 @@ define <vscale x 16 x double> @select_nxv16f64(<vscale x 16 x i1> %a, <vscale x ; CHECK-NEXT: addi a4, a4, -1 ; CHECK-NEXT: and a3, a4, a3 ; CHECK-NEXT: srli a4, a1, 3 +; CHECK-NEXT: vsetvli a5, zero, e8, mf4, ta, ma ; CHECK-NEXT: vl8re64.v v0, (a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v24, a4 ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll index 25aa3a7..9d5ff00 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll @@ -242,7 +242,6 @@ define <vscale x 1 x double> @test6(i64 %avl, i8 zeroext %cond, <vscale x 1 x do ; CHECK-NEXT: andi a1, a1, 2 ; CHECK-NEXT: beqz a1, .LBB5_4 ; CHECK-NEXT: .LBB5_2: # %if.then4 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_0) ; CHECK-NEXT: vlse64.v v9, (a0), zero @@ -631,22 +630,22 @@ declare <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(<vscale x 2 x i32 define void @vlmax(i64 %N, ptr %c, ptr %a, ptr %b) { ; CHECK-LABEL: vlmax: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a6, zero, e64, m1, ta, ma ; CHECK-NEXT: blez a0, .LBB12_3 ; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: slli a4, a6, 3 +; CHECK-NEXT: li a4, 0 +; CHECK-NEXT: vsetvli a6, zero, e64, m1, ta, ma +; CHECK-NEXT: slli a5, a6, 3 ; CHECK-NEXT: .LBB12_2: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vle64.v v8, (a2) ; CHECK-NEXT: vle64.v v9, (a3) ; CHECK-NEXT: vfadd.vv v8, v8, v9 ; CHECK-NEXT: vse64.v v8, (a1) -; CHECK-NEXT: add a5, a5, a6 -; CHECK-NEXT: add a1, a1, a4 -; CHECK-NEXT: add a3, a3, a4 -; CHECK-NEXT: add a2, a2, a4 -; CHECK-NEXT: blt a5, a0, .LBB12_2 +; CHECK-NEXT: add a4, a4, a6 +; CHECK-NEXT: add a1, a1, a5 +; CHECK-NEXT: add a3, a3, a5 +; CHECK-NEXT: add a2, a2, a5 +; CHECK-NEXT: blt a4, a0, .LBB12_2 ; CHECK-NEXT: .LBB12_3: # %for.end ; CHECK-NEXT: ret entry: @@ -678,18 +677,18 @@ for.end: ; preds = %for.body, %entry define void @vector_init_vlmax(i64 %N, ptr %c) { ; CHECK-LABEL: vector_init_vlmax: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; CHECK-NEXT: blez a0, .LBB13_3 ; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: slli a4, a2, 3 +; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: vsetvli a3, zero, e64, m1, ta, ma +; CHECK-NEXT: slli a4, a3, 3 ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: .LBB13_2: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vse64.v v8, (a1) -; CHECK-NEXT: add a3, a3, a2 +; CHECK-NEXT: add a2, a2, a3 ; CHECK-NEXT: add a1, a1, a4 -; CHECK-NEXT: blt a3, a0, .LBB13_2 +; CHECK-NEXT: blt a2, a0, .LBB13_2 ; CHECK-NEXT: .LBB13_3: # %for.end ; CHECK-NEXT: ret entry: @@ -714,20 +713,20 @@ for.end: ; preds = %for.body, %entry define void @vector_init_vsetvli_N(i64 %N, ptr %c) { ; CHECK-LABEL: vector_init_vsetvli_N: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, a0, e64, m1, ta, ma ; CHECK-NEXT: blez a0, .LBB14_3 ; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: slli a4, a2, 3 +; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: vsetvli a3, a0, e64, m1, ta, ma +; CHECK-NEXT: slli a4, a3, 3 ; CHECK-NEXT: vsetvli a5, zero, e64, m1, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: .LBB14_2: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a3, e64, m1, ta, ma ; CHECK-NEXT: vse64.v v8, (a1) -; CHECK-NEXT: add a3, a3, a2 +; CHECK-NEXT: add a2, a2, a3 ; CHECK-NEXT: add a1, a1, a4 -; CHECK-NEXT: blt a3, a0, .LBB14_2 +; CHECK-NEXT: blt a2, a0, .LBB14_2 ; CHECK-NEXT: .LBB14_3: # %for.end ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-regression.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-regression.ll index f658a2c..c3b19b5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-regression.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-regression.ll @@ -11,9 +11,10 @@ define i32 @illegal_preserve_vl(<vscale x 2 x i32> %a, <vscale x 4 x i64> %x, pt ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; CHECK-NEXT: vadd.vv v12, v12, v12 -; CHECK-NEXT: vs4r.v v12, (a0) ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vmv.x.s a1, v8 +; CHECK-NEXT: vs4r.v v12, (a0) +; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: ret %index = add <vscale x 4 x i64> %x, %x store <vscale x 4 x i64> %index, ptr %y |