aboutsummaryrefslogtreecommitdiff
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp262
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetMachine.cpp9
-rw-r--r--llvm/test/CodeGen/RISCV/O0-pipeline.ll6
-rw-r--r--llvm/test/CodeGen/RISCV/O3-pipeline.ll2
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/combine-vmv.ll2
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll2
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll8
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll2
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll2
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll2
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll2
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll2
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll2
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll4
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll12
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll4
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll10
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll4
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll10
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll8
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll44
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll2
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll4
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll8
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll2
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll8
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll2
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir136
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll12
29 files changed, 312 insertions, 261 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 7a8ff84..1c81542 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -47,6 +47,18 @@ static cl::opt<bool> DisableInsertVSETVLPHIOpt(
namespace {
+/// Given a virtual register \p Reg, return the corresponding VNInfo for it.
+/// This should never return nullptr.
+static VNInfo *getVNInfoFromReg(Register Reg, const MachineInstr &MI,
+ const LiveIntervals *LIS) {
+ assert(Reg.isVirtual());
+ auto &LI = LIS->getInterval(Reg);
+ SlotIndex SI = LIS->getSlotIndexes()->getInstructionIndex(MI);
+ VNInfo *VNI = LI.getVNInfoBefore(SI);
+ assert(VNI);
+ return VNI;
+}
+
static unsigned getVLOpNum(const MachineInstr &MI) {
return RISCVII::getVLOpNum(MI.getDesc());
}
@@ -426,7 +438,8 @@ DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
/// values of the VL and VTYPE registers after insertion.
class VSETVLIInfo {
struct AVLDef {
- const MachineInstr *DefMI;
+ // Every AVLDef should have a VNInfo.
+ const VNInfo *ValNo;
Register DefReg;
};
union {
@@ -465,9 +478,9 @@ public:
void setUnknown() { State = Unknown; }
bool isUnknown() const { return State == Unknown; }
- void setAVLRegDef(const MachineInstr *DefMI, Register AVLReg) {
- assert(DefMI && AVLReg.isVirtual());
- AVLRegDef.DefMI = DefMI;
+ void setAVLRegDef(const VNInfo *VNInfo, Register AVLReg) {
+ assert(VNInfo && AVLReg.isVirtual());
+ AVLRegDef.ValNo = VNInfo;
AVLRegDef.DefReg = AVLReg;
State = AVLIsReg;
}
@@ -493,9 +506,18 @@ public:
assert(hasAVLImm());
return AVLImm;
}
- const MachineInstr &getAVLDefMI() const {
- assert(hasAVLReg() && AVLRegDef.DefMI);
- return *AVLRegDef.DefMI;
+ const VNInfo *getAVLVNInfo() const {
+ assert(hasAVLReg());
+ return AVLRegDef.ValNo;
+ }
+ // Most AVLIsReg infos will have a single defining MachineInstr, unless it was
+ // a PHI node. In that case getAVLVNInfo()->def will point to the block
+ // boundary slot.
+ const MachineInstr *getAVLDefMI(const LiveIntervals *LIS) const {
+ assert(hasAVLReg());
+ auto *MI = LIS->getInstructionFromIndex(getAVLVNInfo()->def);
+ assert(!(getAVLVNInfo()->isPHIDef() && MI));
+ return MI;
}
void setAVL(VSETVLIInfo Info) {
@@ -503,7 +525,7 @@ public:
if (Info.isUnknown())
setUnknown();
else if (Info.hasAVLReg())
- setAVLRegDef(&Info.getAVLDefMI(), Info.getAVLReg());
+ setAVLRegDef(Info.getAVLVNInfo(), Info.getAVLReg());
else if (Info.hasAVLVLMAX())
setAVLVLMAX();
else if (Info.hasAVLIgnored())
@@ -519,11 +541,13 @@ public:
bool getTailAgnostic() const { return TailAgnostic; }
bool getMaskAgnostic() const { return MaskAgnostic; }
- bool hasNonZeroAVL() const {
+ bool hasNonZeroAVL(const LiveIntervals *LIS) const {
if (hasAVLImm())
return getAVLImm() > 0;
- if (hasAVLReg())
- return isNonZeroLoadImmediate(getAVLDefMI());
+ if (hasAVLReg()) {
+ if (auto *DefMI = getAVLDefMI(LIS))
+ return isNonZeroLoadImmediate(*DefMI);
+ }
if (hasAVLVLMAX())
return true;
if (hasAVLIgnored())
@@ -531,16 +555,17 @@ public:
return false;
}
- bool hasEquallyZeroAVL(const VSETVLIInfo &Other) const {
+ bool hasEquallyZeroAVL(const VSETVLIInfo &Other,
+ const LiveIntervals *LIS) const {
if (hasSameAVL(Other))
return true;
- return (hasNonZeroAVL() && Other.hasNonZeroAVL());
+ return (hasNonZeroAVL(LIS) && Other.hasNonZeroAVL(LIS));
}
bool hasSameAVL(const VSETVLIInfo &Other) const {
if (hasAVLReg() && Other.hasAVLReg())
- return AVLRegDef.DefMI == Other.AVLRegDef.DefMI &&
- AVLRegDef.DefReg == Other.AVLRegDef.DefReg;
+ return getAVLVNInfo()->id == Other.getAVLVNInfo()->id &&
+ getAVLReg() == Other.getAVLReg();
if (hasAVLImm() && Other.hasAVLImm())
return getAVLImm() == Other.getAVLImm();
@@ -620,7 +645,7 @@ public:
// Require are compatible with the previous vsetvli instruction represented
// by this. MI is the instruction whose requirements we're considering.
bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require,
- const MachineRegisterInfo &MRI) const {
+ const LiveIntervals *LIS) const {
assert(isValid() && Require.isValid() &&
"Can't compare invalid VSETVLIInfos");
assert(!Require.SEWLMULRatioOnly &&
@@ -636,7 +661,7 @@ public:
if (Used.VLAny && !(hasSameAVL(Require) && hasSameVLMAX(Require)))
return false;
- if (Used.VLZeroness && !hasEquallyZeroAVL(Require))
+ if (Used.VLZeroness && !hasEquallyZeroAVL(Require, LIS))
return false;
return hasCompatibleVTYPE(Used, Require);
@@ -765,6 +790,7 @@ class RISCVInsertVSETVLI : public MachineFunctionPass {
const RISCVSubtarget *ST;
const TargetInstrInfo *TII;
MachineRegisterInfo *MRI;
+ LiveIntervals *LIS;
std::vector<BlockData> BlockInfo;
std::queue<const MachineBasicBlock *> WorkList;
@@ -777,6 +803,14 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
+
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addPreserved<LiveDebugVariables>();
+ AU.addPreserved<LiveStacks>();
+
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -848,7 +882,7 @@ INITIALIZE_PASS(RISCVCoalesceVSETVLI, "riscv-coalesce-vsetvli",
// Return a VSETVLIInfo representing the changes made by this VSETVLI or
// VSETIVLI instruction.
static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI,
- const MachineRegisterInfo &MRI) {
+ const LiveIntervals *LIS) {
VSETVLIInfo NewInfo;
if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
NewInfo.setAVLImm(MI.getOperand(1).getImm());
@@ -861,7 +895,7 @@ static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI,
if (AVLReg == RISCV::X0)
NewInfo.setAVLVLMAX();
else
- NewInfo.setAVLRegDef(MRI.getUniqueVRegDef(AVLReg), AVLReg);
+ NewInfo.setAVLRegDef(getVNInfoFromReg(AVLReg, MI, LIS), AVLReg);
}
NewInfo.setVTYPE(MI.getOperand(2).getImm());
@@ -880,7 +914,7 @@ static unsigned computeVLMAX(unsigned VLEN, unsigned SEW,
static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
const RISCVSubtarget &ST,
- const MachineRegisterInfo *MRI) {
+ const LiveIntervals *LIS) {
VSETVLIInfo InstrInfo;
bool TailAgnostic = true;
@@ -933,7 +967,7 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
else
InstrInfo.setAVLImm(Imm);
} else {
- InstrInfo.setAVLRegDef(MRI->getUniqueVRegDef(VLOp.getReg()),
+ InstrInfo.setAVLRegDef(getVNInfoFromReg(VLOp.getReg(), MI, LIS),
VLOp.getReg());
}
} else {
@@ -955,9 +989,9 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
// register AVLs to avoid extending live ranges without being sure we can
// kill the original source reg entirely.
if (InstrInfo.hasAVLReg()) {
- const MachineInstr &DefMI = InstrInfo.getAVLDefMI();
- if (isVectorConfigInstr(DefMI)) {
- VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(DefMI, *MRI);
+ if (const MachineInstr *DefMI = InstrInfo.getAVLDefMI(LIS);
+ DefMI && isVectorConfigInstr(*DefMI)) {
+ VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(*DefMI, LIS);
if (DefInstrInfo.hasSameVLMAX(InstrInfo) &&
(DefInstrInfo.hasAVLImm() || DefInstrInfo.hasAVLVLMAX()))
InstrInfo.setAVL(DefInstrInfo);
@@ -983,11 +1017,12 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
// Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
// VLMAX.
if (Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
- BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
- .addReg(RISCV::X0, RegState::Define | RegState::Dead)
- .addReg(RISCV::X0, RegState::Kill)
- .addImm(Info.encodeVTYPE())
- .addReg(RISCV::VL, RegState::Implicit);
+ auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
+ .addReg(RISCV::X0, RegState::Define | RegState::Dead)
+ .addReg(RISCV::X0, RegState::Kill)
+ .addImm(Info.encodeVTYPE())
+ .addReg(RISCV::VL, RegState::Implicit);
+ LIS->InsertMachineInstrInMaps(*MI);
return;
}
@@ -995,15 +1030,16 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
// it has the same VLMAX we want and the last VL/VTYPE we observed is the
// same, we can use the X0, X0 form.
if (Info.hasSameVLMAX(PrevInfo) && Info.hasAVLReg()) {
- const MachineInstr &DefMI = Info.getAVLDefMI();
- if (isVectorConfigInstr(DefMI)) {
- VSETVLIInfo DefInfo = getInfoForVSETVLI(DefMI, *MRI);
+ if (const MachineInstr *DefMI = Info.getAVLDefMI(LIS);
+ DefMI && isVectorConfigInstr(*DefMI)) {
+ VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI, LIS);
if (DefInfo.hasSameAVL(PrevInfo) && DefInfo.hasSameVLMAX(PrevInfo)) {
- BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
- .addReg(RISCV::X0, RegState::Define | RegState::Dead)
- .addReg(RISCV::X0, RegState::Kill)
- .addImm(Info.encodeVTYPE())
- .addReg(RISCV::VL, RegState::Implicit);
+ auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
+ .addReg(RISCV::X0, RegState::Define | RegState::Dead)
+ .addReg(RISCV::X0, RegState::Kill)
+ .addImm(Info.encodeVTYPE())
+ .addReg(RISCV::VL, RegState::Implicit);
+ LIS->InsertMachineInstrInMaps(*MI);
return;
}
}
@@ -1011,10 +1047,11 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
}
if (Info.hasAVLImm()) {
- BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
- .addReg(RISCV::X0, RegState::Define | RegState::Dead)
- .addImm(Info.getAVLImm())
- .addImm(Info.encodeVTYPE());
+ auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
+ .addReg(RISCV::X0, RegState::Define | RegState::Dead)
+ .addImm(Info.getAVLImm())
+ .addImm(Info.encodeVTYPE());
+ LIS->InsertMachineInstrInMaps(*MI);
return;
}
@@ -1023,36 +1060,46 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
// the previous vl to become invalid.
if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
Info.hasSameVLMAX(PrevInfo)) {
- BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
- .addReg(RISCV::X0, RegState::Define | RegState::Dead)
- .addReg(RISCV::X0, RegState::Kill)
- .addImm(Info.encodeVTYPE())
- .addReg(RISCV::VL, RegState::Implicit);
+ auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
+ .addReg(RISCV::X0, RegState::Define | RegState::Dead)
+ .addReg(RISCV::X0, RegState::Kill)
+ .addImm(Info.encodeVTYPE())
+ .addReg(RISCV::VL, RegState::Implicit);
+ LIS->InsertMachineInstrInMaps(*MI);
return;
}
// Otherwise use an AVL of 1 to avoid depending on previous vl.
- BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
- .addReg(RISCV::X0, RegState::Define | RegState::Dead)
- .addImm(1)
- .addImm(Info.encodeVTYPE());
+ auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
+ .addReg(RISCV::X0, RegState::Define | RegState::Dead)
+ .addImm(1)
+ .addImm(Info.encodeVTYPE());
+ LIS->InsertMachineInstrInMaps(*MI);
return;
}
if (Info.hasAVLVLMAX()) {
Register DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
- BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
- .addReg(DestReg, RegState::Define | RegState::Dead)
- .addReg(RISCV::X0, RegState::Kill)
- .addImm(Info.encodeVTYPE());
+ auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
+ .addReg(DestReg, RegState::Define | RegState::Dead)
+ .addReg(RISCV::X0, RegState::Kill)
+ .addImm(Info.encodeVTYPE());
+ LIS->InsertMachineInstrInMaps(*MI);
+ LIS->createAndComputeVirtRegInterval(DestReg);
return;
}
Register AVLReg = Info.getAVLReg();
MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass);
- BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLI))
- .addReg(RISCV::X0, RegState::Define | RegState::Dead)
- .addReg(AVLReg)
- .addImm(Info.encodeVTYPE());
+ auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLI))
+ .addReg(RISCV::X0, RegState::Define | RegState::Dead)
+ .addReg(AVLReg)
+ .addImm(Info.encodeVTYPE());
+ LIS->InsertMachineInstrInMaps(*MI);
+ // Normally the AVL's live range will already extend past the inserted vsetvli
+ // because the pseudos below will already use the AVL. But this isn't always
+ // the case, e.g. PseudoVMV_X_S doesn't have an AVL operand.
+ LIS->getInterval(AVLReg).extendInBlock(
+ LIS->getMBBStartIdx(&MBB), LIS->getInstructionIndex(*MI).getRegSlot());
}
static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) {
@@ -1065,7 +1112,7 @@ static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) {
bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
const VSETVLIInfo &Require,
const VSETVLIInfo &CurInfo) const {
- assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, *ST, MRI));
+ assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, *ST, LIS));
if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
return true;
@@ -1106,7 +1153,7 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
Used.TailPolicy = false;
}
- if (CurInfo.isCompatible(Used, Require, *MRI))
+ if (CurInfo.isCompatible(Used, Require, LIS))
return false;
// We didn't find a compatible value. If our AVL is a virtual register,
@@ -1114,9 +1161,9 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
// and the last VL/VTYPE we observed is the same, we don't need a
// VSETVLI here.
if (Require.hasAVLReg() && CurInfo.hasCompatibleVTYPE(Used, Require)) {
- const MachineInstr &DefMI = Require.getAVLDefMI();
- if (isVectorConfigInstr(DefMI)) {
- VSETVLIInfo DefInfo = getInfoForVSETVLI(DefMI, *MRI);
+ if (const MachineInstr *DefMI = Require.getAVLDefMI(LIS);
+ DefMI && isVectorConfigInstr(*DefMI)) {
+ VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI, LIS);
if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo))
return false;
}
@@ -1152,7 +1199,7 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
if (!RISCVII::hasSEWOp(TSFlags))
return;
- const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, *ST, MRI);
+ const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, *ST, LIS);
assert(NewInfo.isValid() && !NewInfo.isUnknown());
if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info))
return;
@@ -1171,7 +1218,7 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
// variant, so we avoid the transform to prevent extending live range of an
// avl register operand.
// TODO: We can probably relax this for immediates.
- bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(PrevInfo) &&
+ bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(PrevInfo, LIS) &&
IncomingInfo.hasSameVLMAX(PrevInfo);
if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero))
Info.setAVL(IncomingInfo);
@@ -1202,14 +1249,17 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info,
const MachineInstr &MI) const {
if (isVectorConfigInstr(MI)) {
- Info = getInfoForVSETVLI(MI, *MRI);
+ Info = getInfoForVSETVLI(MI, LIS);
return;
}
if (RISCV::isFaultFirstLoad(MI)) {
// Update AVL to vl-output of the fault first load.
- Info.setAVLRegDef(MRI->getUniqueVRegDef(MI.getOperand(1).getReg()),
- MI.getOperand(1).getReg());
+ assert(MI.getOperand(1).getReg().isVirtual());
+ auto &LI = LIS->getInterval(MI.getOperand(1).getReg());
+ SlotIndex SI = LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot();
+ VNInfo *VNI = LI.getVNInfoAt(SI);
+ Info.setAVLRegDef(VNI, MI.getOperand(1).getReg());
return;
}
@@ -1293,7 +1343,7 @@ void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
}
// If we weren't able to prove a vsetvli was directly unneeded, it might still
-// be unneeded if the AVL is a phi node where all incoming values are VL
+// be unneeded if the AVL was a phi node where all incoming values are VL
// outputs from the last VSETVLI in their respective basic blocks.
bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
const MachineBasicBlock &MBB) const {
@@ -1303,26 +1353,27 @@ bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
if (!Require.hasAVLReg())
return true;
- // We need the AVL to be produce by a PHI node in this basic block.
- const MachineInstr *PHI = &Require.getAVLDefMI();
- if (PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB)
+ // We need the AVL to have been produced by a PHI node in this basic block.
+ const VNInfo *Valno = Require.getAVLVNInfo();
+ if (!Valno->isPHIDef() || LIS->getMBBFromIndex(Valno->def) != &MBB)
return true;
- for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
- PHIOp += 2) {
- Register InReg = PHI->getOperand(PHIOp).getReg();
- MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB();
+ const LiveRange &LR = LIS->getInterval(Require.getAVLReg());
+
+ for (auto *PBB : MBB.predecessors()) {
const VSETVLIInfo &PBBExit = BlockInfo[PBB->getNumber()].Exit;
// We need the PHI input to the be the output of a VSET(I)VLI.
- MachineInstr *DefMI = MRI->getUniqueVRegDef(InReg);
- assert(DefMI);
- if (!isVectorConfigInstr(*DefMI))
+ const VNInfo *Value = LR.getVNInfoBefore(LIS->getMBBEndIdx(PBB));
+ if (!Value)
+ return true;
+ MachineInstr *DefMI = LIS->getInstructionFromIndex(Value->def);
+ if (!DefMI || !isVectorConfigInstr(*DefMI))
return true;
// We found a VSET(I)VLI make sure it matches the output of the
// predecessor block.
- VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI, *MRI);
+ VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI, LIS);
if (DefInfo != PBBExit)
return true;
@@ -1377,19 +1428,28 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
if (VLOp.isReg()) {
Register Reg = VLOp.getReg();
- MachineInstr *VLOpDef = MRI->getUniqueVRegDef(Reg);
- assert(VLOpDef);
+ LiveInterval &LI = LIS->getInterval(Reg);
// Erase the AVL operand from the instruction.
VLOp.setReg(RISCV::NoRegister);
VLOp.setIsKill(false);
+ SmallVector<MachineInstr *> DeadMIs;
+ LIS->shrinkToUses(&LI, &DeadMIs);
+ // We might have separate components that need split due to
+ // needVSETVLIPHI causing us to skip inserting a new VL def.
+ SmallVector<LiveInterval *> SplitLIs;
+ LIS->splitSeparateComponents(LI, SplitLIs);
// If the AVL was an immediate > 31, then it would have been emitted
// as an ADDI. However, the ADDI might not have been used in the
// vsetvli, or a vsetvli might not have been emitted, so it may be
// dead now.
- if (TII->isAddImmediate(*VLOpDef, Reg) && MRI->use_nodbg_empty(Reg))
- VLOpDef->eraseFromParent();
+ for (MachineInstr *DeadMI : DeadMIs) {
+ if (!TII->isAddImmediate(*DeadMI, Reg))
+ continue;
+ LIS->RemoveMachineInstrFromMaps(*DeadMI);
+ DeadMI->eraseFromParent();
+ }
}
MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
/*isImp*/ true));
@@ -1458,14 +1518,14 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
// we need to prove the value is available at the point we're going
// to insert the vsetvli at.
if (AvailableInfo.hasAVLReg()) {
- const MachineInstr *AVLDefMI = &AvailableInfo.getAVLDefMI();
+ SlotIndex SI = AvailableInfo.getAVLVNInfo()->def;
// This is an inline dominance check which covers the case of
// UnavailablePred being the preheader of a loop.
- if (AVLDefMI->getParent() != UnavailablePred)
+ if (LIS->getMBBFromIndex(SI) != UnavailablePred)
+ return;
+ if (!UnavailablePred->terminators().empty() &&
+ SI >= LIS->getInstructionIndex(*UnavailablePred->getFirstTerminator()))
return;
- for (auto &TermMI : UnavailablePred->terminators())
- if (&TermMI == AVLDefMI)
- return;
}
// If the AVL isn't used in its predecessors then bail, since we have no AVL
@@ -1526,7 +1586,8 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
static bool canMutatePriorConfig(const MachineInstr &PrevMI,
const MachineInstr &MI,
const DemandedFields &Used,
- const MachineRegisterInfo &MRI) {
+ const MachineRegisterInfo &MRI,
+ const LiveIntervals *LIS) {
// If the VL values aren't equal, return false if either a) the former is
// demanded, or b) we can't rewrite the former to be the later for
// implementation reasons.
@@ -1537,8 +1598,8 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI,
if (Used.VLZeroness) {
if (isVLPreservingConfig(PrevMI))
return false;
- if (!getInfoForVSETVLI(PrevMI, MRI)
- .hasEquallyZeroAVL(getInfoForVSETVLI(MI, MRI)))
+ if (!getInfoForVSETVLI(PrevMI, LIS)
+ .hasEquallyZeroAVL(getInfoForVSETVLI(MI, LIS), LIS))
return false;
}
@@ -1588,7 +1649,7 @@ bool RISCVCoalesceVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) {
continue;
}
- if (canMutatePriorConfig(MI, *NextMI, Used, *MRI)) {
+ if (canMutatePriorConfig(MI, *NextMI, Used, *MRI, LIS)) {
if (!isVLPreservingConfig(*NextMI)) {
Register DefReg = NextMI->getOperand(0).getReg();
@@ -1661,9 +1722,17 @@ void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
if (RISCV::isFaultFirstLoad(MI)) {
Register VLOutput = MI.getOperand(1).getReg();
assert(VLOutput.isVirtual());
- if (!MRI->use_nodbg_empty(VLOutput))
- BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL),
- VLOutput);
+ if (!MI.getOperand(1).isDead()) {
+ auto ReadVLMI = BuildMI(MBB, I, MI.getDebugLoc(),
+ TII->get(RISCV::PseudoReadVL), VLOutput);
+ // Move the LiveInterval's definition down to PseudoReadVL.
+ SlotIndex NewDefSI =
+ LIS->InsertMachineInstrInMaps(*ReadVLMI).getRegSlot();
+ LiveInterval &DefLI = LIS->getInterval(VLOutput);
+ VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
+ DefLI.removeSegment(DefLI.beginIndex(), NewDefSI);
+ DefVNI->def = NewDefSI;
+ }
// We don't use the vl output of the VLEFF/VLSEGFF anymore.
MI.getOperand(1).setReg(RISCV::X0);
}
@@ -1680,6 +1749,7 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
TII = ST->getInstrInfo();
MRI = &MF.getRegInfo();
+ LIS = &getAnalysis<LiveIntervals>();
assert(BlockInfo.empty() && "Expect empty block infos");
BlockInfo.resize(MF.getNumBlockIDs());
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 7b2dcad..5d598a2 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -541,9 +541,16 @@ void RISCVPassConfig::addPreRegAlloc() {
addPass(createRISCVPreRAExpandPseudoPass());
if (TM->getOptLevel() != CodeGenOptLevel::None)
addPass(createRISCVMergeBaseOffsetOptPass());
+
addPass(createRISCVInsertReadWriteCSRPass());
addPass(createRISCVInsertWriteVXRMPass());
- addPass(createRISCVInsertVSETVLIPass());
+
+ // Run RISCVInsertVSETVLI after PHI elimination. On O1 and above do it after
+ // register coalescing so needVSETVLIPHI doesn't need to look through COPYs.
+ if (TM->getOptLevel() == CodeGenOptLevel::None)
+ insertPass(&PHIEliminationID, createRISCVInsertVSETVLIPass());
+ else
+ insertPass(&RegisterCoalescerID, createRISCVInsertVSETVLIPass());
}
void RISCVPassConfig::addFastRegAlloc() {
diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
index c4a7f95..3aaa5dc 100644
--- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
@@ -42,12 +42,14 @@
; CHECK-NEXT: RISC-V Pre-RA pseudo instruction expansion pass
; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass
; CHECK-NEXT: RISC-V Insert Write VXRM Pass
-; CHECK-NEXT: RISC-V Insert VSETVLI pass
; CHECK-NEXT: Init Undef Pass
; CHECK-NEXT: Eliminate PHI nodes for register allocation
+; CHECK-NEXT: MachineDominator Tree Construction
+; CHECK-NEXT: Slot index numbering
+; CHECK-NEXT: Live Interval Analysis
+; CHECK-NEXT: RISC-V Insert VSETVLI pass
; CHECK-NEXT: Two-Address instruction pass
; CHECK-NEXT: Fast Register Allocator
-; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: Slot index numbering
; CHECK-NEXT: Live Interval Analysis
; CHECK-NEXT: RISC-V Coalesce VSETVLI pass
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index 4a71d32..52634b2 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -117,7 +117,6 @@
; CHECK-NEXT: RISC-V Merge Base Offset
; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass
; CHECK-NEXT: RISC-V Insert Write VXRM Pass
-; CHECK-NEXT: RISC-V Insert VSETVLI pass
; CHECK-NEXT: Detect Dead Lanes
; CHECK-NEXT: Init Undef Pass
; CHECK-NEXT: Process Implicit Definitions
@@ -129,6 +128,7 @@
; CHECK-NEXT: Slot index numbering
; CHECK-NEXT: Live Interval Analysis
; CHECK-NEXT: Register Coalescer
+; CHECK-NEXT: RISC-V Insert VSETVLI pass
; CHECK-NEXT: Rename Disconnected Subregister Components
; CHECK-NEXT: Machine Instruction Scheduler
; CHECK-NEXT: Machine Block Frequency Analysis
diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-vmv.ll b/llvm/test/CodeGen/RISCV/rvv/combine-vmv.ll
index 682ad57..61acf1a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/combine-vmv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/combine-vmv.ll
@@ -36,8 +36,8 @@ define <vscale x 4 x i32> @vadd_undef(<vscale x 4 x i32> %a, <vscale x 4 x i32>
define <vscale x 4 x i32> @vadd_same_passthru(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
; CHECK-LABEL: vadd_same_passthru:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma
; CHECK-NEXT: vmv2r.v v14, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma
; CHECK-NEXT: vadd.vv v14, v10, v12
; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma
; CHECK-NEXT: vmv.v.v v8, v14
diff --git a/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll b/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll
index ff35043..c6b8420 100644
--- a/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll
@@ -149,8 +149,8 @@ define void @constant_zero_stride(ptr %s, ptr %d) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vmv1r.v v9, v8
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vslideup.vi v9, v8, 2
; CHECK-NEXT: vse8.v v9, (a1)
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll
index dc4d288..2b4b8e9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll
@@ -141,9 +141,9 @@ define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
; SINK-NEXT: andi a4, a3, 1024
; SINK-NEXT: xori a3, a4, 1024
; SINK-NEXT: slli a5, a5, 1
-; SINK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; SINK-NEXT: mv a6, a0
; SINK-NEXT: mv a7, a3
+; SINK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
; SINK-NEXT: .LBB1_3: # %vector.body
; SINK-NEXT: # =>This Inner Loop Header: Depth=1
; SINK-NEXT: vl2re32.v v8, (a6)
@@ -183,9 +183,9 @@ define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
; DEFAULT-NEXT: andi a4, a3, 1024
; DEFAULT-NEXT: xori a3, a4, 1024
; DEFAULT-NEXT: slli a5, a5, 1
-; DEFAULT-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; DEFAULT-NEXT: mv a6, a0
; DEFAULT-NEXT: mv a7, a3
+; DEFAULT-NEXT: vsetvli t0, zero, e32, m2, ta, ma
; DEFAULT-NEXT: .LBB1_3: # %vector.body
; DEFAULT-NEXT: # =>This Inner Loop Header: Depth=1
; DEFAULT-NEXT: vl2re32.v v8, (a6)
@@ -459,9 +459,9 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
; SINK-NEXT: addi a3, a2, -1
; SINK-NEXT: andi a4, a3, 1024
; SINK-NEXT: xori a3, a4, 1024
-; SINK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
; SINK-NEXT: mv a5, a0
; SINK-NEXT: mv a6, a3
+; SINK-NEXT: vsetvli a7, zero, e32, m1, ta, ma
; SINK-NEXT: .LBB4_3: # %vector.body
; SINK-NEXT: # =>This Inner Loop Header: Depth=1
; SINK-NEXT: vl1re32.v v8, (a5)
@@ -500,9 +500,9 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
; DEFAULT-NEXT: addi a3, a2, -1
; DEFAULT-NEXT: andi a4, a3, 1024
; DEFAULT-NEXT: xori a3, a4, 1024
-; DEFAULT-NEXT: vsetvli a5, zero, e32, m1, ta, ma
; DEFAULT-NEXT: mv a5, a0
; DEFAULT-NEXT: mv a6, a3
+; DEFAULT-NEXT: vsetvli a7, zero, e32, m1, ta, ma
; DEFAULT-NEXT: .LBB4_3: # %vector.body
; DEFAULT-NEXT: # =>This Inner Loop Header: Depth=1
; DEFAULT-NEXT: vl1re32.v v8, (a5)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
index 03e99ba..6358699 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
@@ -1155,8 +1155,8 @@ define void @mulhu_v8i16(ptr %x) {
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: vmv.v.i v9, 0
; CHECK-NEXT: lui a1, 1048568
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, tu, ma
; CHECK-NEXT: vmv.v.i v10, 0
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, tu, ma
; CHECK-NEXT: vmv.s.x v10, a1
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v11, 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 539a840..f42f32e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -12092,8 +12092,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64V-NEXT: vsext.vf8 v16, v8
-; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, mu
; RV64V-NEXT: vmv1r.v v12, v10
+; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, mu
; RV64V-NEXT: vluxei64.v v12, (a0), v16, v0.t
; RV64V-NEXT: vsetivli zero, 16, e8, m2, ta, ma
; RV64V-NEXT: vslidedown.vi v10, v10, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll
index 175a3ee..d1fb30c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll
@@ -369,8 +369,8 @@ define <4 x i8> @vslide1up_4xi8_neg_incorrect_insert3(<4 x i8> %v, i8 %b) {
define <2 x i8> @vslide1up_4xi8_neg_length_changing(<4 x i8> %v, i8 %b) {
; CHECK-LABEL: vslide1up_4xi8_neg_length_changing:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e8, m1, tu, ma
; CHECK-NEXT: vmv1r.v v9, v8
+; CHECK-NEXT: vsetivli zero, 4, e8, m1, tu, ma
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vslideup.vi v9, v8, 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll
index f0fcc48..0e6b03b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll
@@ -168,8 +168,8 @@ define void @strided_constant_0(ptr %x, ptr %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vmv1r.v v9, v8
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vi v9, v8, 4
; CHECK-NEXT: vse16.v v9, (a1)
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
index c38406b..64ad86d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
@@ -62,8 +62,8 @@ define void @gather_masked(ptr noalias nocapture %A, ptr noalias nocapture reado
; CHECK-NEXT: li a4, 5
; CHECK-NEXT: .LBB1_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, mu
; CHECK-NEXT: vmv1r.v v9, v8
+; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, mu
; CHECK-NEXT: vlse8.v v9, (a1), a4, v0.t
; CHECK-NEXT: vle8.v v10, (a0)
; CHECK-NEXT: vadd.vv v9, v10, v9
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll
index 4f16ce2..9fa8ab3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll
@@ -394,7 +394,6 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
; CHECK-NEXT: # %bb.11:
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: .LBB16_12:
-; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; CHECK-NEXT: csrr a4, vlenb
; CHECK-NEXT: li a5, 24
; CHECK-NEXT: mul a4, a4, a5
@@ -402,6 +401,7 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
; CHECK-NEXT: addi a4, a4, 16
; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
; CHECK-NEXT: vmv4r.v v24, v8
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; CHECK-NEXT: csrr a4, vlenb
; CHECK-NEXT: li a5, 56
; CHECK-NEXT: mul a4, a4, a5
diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll b/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll
index 79b1e14..c8bed2d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll
@@ -15,8 +15,8 @@ define i32 @test(i32 %size, ptr %add.ptr, i64 %const) {
; RV32-NEXT: .LBB0_1: # %for.body
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: vmv.s.x v9, zero
-; RV32-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
; RV32-NEXT: vmv1r.v v10, v8
+; RV32-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
; RV32-NEXT: vslideup.vx v10, v9, a2
; RV32-NEXT: vsetivli zero, 8, e8, mf2, tu, ma
; RV32-NEXT: vmv.s.x v10, a0
@@ -40,8 +40,8 @@ define i32 @test(i32 %size, ptr %add.ptr, i64 %const) {
; RV64-NEXT: .LBB0_1: # %for.body
; RV64-NEXT: # =>This Inner Loop Header: Depth=1
; RV64-NEXT: vmv.s.x v9, zero
-; RV64-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
; RV64-NEXT: vmv1r.v v10, v8
+; RV64-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
; RV64-NEXT: vslideup.vx v10, v9, a2
; RV64-NEXT: vsetivli zero, 8, e8, mf2, tu, ma
; RV64-NEXT: vmv.s.x v10, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
index a6b2d31..bb28ff5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
@@ -479,11 +479,11 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
; CHECK-V-NEXT: addi a0, sp, 16
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 1
-; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: add a0, sp, a0
; CHECK-V-NEXT: addi a0, a0, 16
; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-V-NEXT: vslideup.vi v10, v8, 2
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-V-NEXT: vnclip.wi v8, v10, 0
@@ -640,11 +640,11 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-V-NEXT: addi a0, sp, 16
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 1
-; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: add a0, sp, a0
; CHECK-V-NEXT: addi a0, a0, 16
; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-V-NEXT: vslideup.vi v10, v8, 2
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
@@ -811,11 +811,11 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
; CHECK-V-NEXT: addi a0, sp, 16
; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
-; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: add a0, sp, a0
; CHECK-V-NEXT: addi a0, a0, 16
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-V-NEXT: vslideup.vi v8, v10, 2
; CHECK-V-NEXT: li a0, -1
; CHECK-V-NEXT: srli a0, a0, 32
@@ -3850,11 +3850,11 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
; CHECK-V-NEXT: addi a0, sp, 16
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 1
-; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: add a0, sp, a0
; CHECK-V-NEXT: addi a0, a0, 16
; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-V-NEXT: vslideup.vi v10, v8, 2
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-V-NEXT: vnclip.wi v8, v10, 0
@@ -4009,11 +4009,11 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
; CHECK-V-NEXT: addi a0, sp, 16
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 1
-; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: add a0, sp, a0
; CHECK-V-NEXT: addi a0, a0, 16
; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-V-NEXT: vslideup.vi v10, v8, 2
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
@@ -4179,11 +4179,11 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
; CHECK-V-NEXT: addi a0, sp, 16
; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
-; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: add a0, sp, a0
; CHECK-V-NEXT: addi a0, a0, 16
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-V-NEXT: vslideup.vi v8, v10, 2
; CHECK-V-NEXT: li a0, -1
; CHECK-V-NEXT: srli a0, a0, 32
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll
index 129fbcf..e73415a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll
@@ -21,8 +21,8 @@ define <vscale x 1 x double> @foo(<vscale x 1 x double> %a, <vscale x 1 x double
; SPILL-O0-NEXT: add a1, sp, a1
; SPILL-O0-NEXT: addi a1, a1, 16
; SPILL-O0-NEXT: vs1r.v v9, (a1) # Unknown-size Folded Spill
-; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; SPILL-O0-NEXT: # implicit-def: $v8
+; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; SPILL-O0-NEXT: vfadd.vv v8, v9, v10
; SPILL-O0-NEXT: addi a0, sp, 16
; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
@@ -37,8 +37,8 @@ define <vscale x 1 x double> @foo(<vscale x 1 x double> %a, <vscale x 1 x double
; SPILL-O0-NEXT: vl1r.v v9, (a1) # Unknown-size Folded Reload
; SPILL-O0-NEXT: # kill: def $x11 killed $x10
; SPILL-O0-NEXT: lw a0, 8(sp) # 4-byte Folded Reload
-; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; SPILL-O0-NEXT: # implicit-def: $v8
+; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; SPILL-O0-NEXT: vfadd.vv v8, v9, v10
; SPILL-O0-NEXT: csrr a0, vlenb
; SPILL-O0-NEXT: slli a0, a0, 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll
index e7913fc..9f4718d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll
@@ -13,8 +13,8 @@ define <vscale x 1 x i32> @spill_zvlsseg_nxv1i32(ptr %base, i32 %vl) nounwind {
; SPILL-O0-NEXT: csrr a2, vlenb
; SPILL-O0-NEXT: slli a2, a2, 1
; SPILL-O0-NEXT: sub sp, sp, a2
-; SPILL-O0-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
; SPILL-O0-NEXT: # implicit-def: $v8_v9
+; SPILL-O0-NEXT: vsetvli zero, a1, e32, mf2, tu, ma
; SPILL-O0-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O0-NEXT: vmv1r.v v8, v9
; SPILL-O0-NEXT: addi a0, sp, 16
@@ -90,8 +90,8 @@ define <vscale x 2 x i32> @spill_zvlsseg_nxv2i32(ptr %base, i32 %vl) nounwind {
; SPILL-O0-NEXT: csrr a2, vlenb
; SPILL-O0-NEXT: slli a2, a2, 1
; SPILL-O0-NEXT: sub sp, sp, a2
-; SPILL-O0-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; SPILL-O0-NEXT: # implicit-def: $v8_v9
+; SPILL-O0-NEXT: vsetvli zero, a1, e32, m1, tu, ma
; SPILL-O0-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O0-NEXT: vmv1r.v v8, v9
; SPILL-O0-NEXT: addi a0, sp, 16
@@ -167,8 +167,8 @@ define <vscale x 4 x i32> @spill_zvlsseg_nxv4i32(ptr %base, i32 %vl) nounwind {
; SPILL-O0-NEXT: csrr a2, vlenb
; SPILL-O0-NEXT: slli a2, a2, 1
; SPILL-O0-NEXT: sub sp, sp, a2
-; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; SPILL-O0-NEXT: # implicit-def: $v8m2_v10m2
+; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, tu, ma
; SPILL-O0-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O0-NEXT: vmv2r.v v8, v10
; SPILL-O0-NEXT: addi a0, sp, 16
@@ -247,8 +247,8 @@ define <vscale x 8 x i32> @spill_zvlsseg_nxv8i32(ptr %base, i32 %vl) nounwind {
; SPILL-O0-NEXT: csrr a2, vlenb
; SPILL-O0-NEXT: slli a2, a2, 2
; SPILL-O0-NEXT: sub sp, sp, a2
-; SPILL-O0-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; SPILL-O0-NEXT: # implicit-def: $v8m4_v12m4
+; SPILL-O0-NEXT: vsetvli zero, a1, e32, m4, tu, ma
; SPILL-O0-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O0-NEXT: vmv4r.v v8, v12
; SPILL-O0-NEXT: addi a0, sp, 16
@@ -327,8 +327,8 @@ define <vscale x 4 x i32> @spill_zvlsseg3_nxv4i32(ptr %base, i32 %vl) nounwind {
; SPILL-O0-NEXT: csrr a2, vlenb
; SPILL-O0-NEXT: slli a2, a2, 1
; SPILL-O0-NEXT: sub sp, sp, a2
-; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; SPILL-O0-NEXT: # implicit-def: $v8m2_v10m2_v12m2
+; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, tu, ma
; SPILL-O0-NEXT: vlseg3e32.v v8, (a0)
; SPILL-O0-NEXT: vmv2r.v v8, v10
; SPILL-O0-NEXT: addi a0, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll
index 34eb58e..483f689 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll
@@ -24,8 +24,8 @@ define <vscale x 1 x double> @foo(<vscale x 1 x double> %a, <vscale x 1 x double
; SPILL-O0-NEXT: add a1, sp, a1
; SPILL-O0-NEXT: addi a1, a1, 32
; SPILL-O0-NEXT: vs1r.v v9, (a1) # Unknown-size Folded Spill
-; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; SPILL-O0-NEXT: # implicit-def: $v8
+; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; SPILL-O0-NEXT: vfadd.vv v8, v9, v10
; SPILL-O0-NEXT: addi a0, sp, 32
; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
@@ -40,8 +40,8 @@ define <vscale x 1 x double> @foo(<vscale x 1 x double> %a, <vscale x 1 x double
; SPILL-O0-NEXT: vl1r.v v9, (a1) # Unknown-size Folded Reload
; SPILL-O0-NEXT: # kill: def $x11 killed $x10
; SPILL-O0-NEXT: ld a0, 16(sp) # 8-byte Folded Reload
-; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; SPILL-O0-NEXT: # implicit-def: $v8
+; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; SPILL-O0-NEXT: vfadd.vv v8, v9, v10
; SPILL-O0-NEXT: csrr a0, vlenb
; SPILL-O0-NEXT: slli a0, a0, 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll
index dd575b3..4ea9fab 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll
@@ -13,8 +13,8 @@ define <vscale x 1 x i32> @spill_zvlsseg_nxv1i32(ptr %base, i64 %vl) nounwind {
; SPILL-O0-NEXT: csrr a2, vlenb
; SPILL-O0-NEXT: slli a2, a2, 1
; SPILL-O0-NEXT: sub sp, sp, a2
-; SPILL-O0-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
; SPILL-O0-NEXT: # implicit-def: $v8_v9
+; SPILL-O0-NEXT: vsetvli zero, a1, e32, mf2, tu, ma
; SPILL-O0-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O0-NEXT: vmv1r.v v8, v9
; SPILL-O0-NEXT: addi a0, sp, 16
@@ -90,8 +90,8 @@ define <vscale x 2 x i32> @spill_zvlsseg_nxv2i32(ptr %base, i64 %vl) nounwind {
; SPILL-O0-NEXT: csrr a2, vlenb
; SPILL-O0-NEXT: slli a2, a2, 1
; SPILL-O0-NEXT: sub sp, sp, a2
-; SPILL-O0-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; SPILL-O0-NEXT: # implicit-def: $v8_v9
+; SPILL-O0-NEXT: vsetvli zero, a1, e32, m1, tu, ma
; SPILL-O0-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O0-NEXT: vmv1r.v v8, v9
; SPILL-O0-NEXT: addi a0, sp, 16
@@ -167,8 +167,8 @@ define <vscale x 4 x i32> @spill_zvlsseg_nxv4i32(ptr %base, i64 %vl) nounwind {
; SPILL-O0-NEXT: csrr a2, vlenb
; SPILL-O0-NEXT: slli a2, a2, 1
; SPILL-O0-NEXT: sub sp, sp, a2
-; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; SPILL-O0-NEXT: # implicit-def: $v8m2_v10m2
+; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, tu, ma
; SPILL-O0-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O0-NEXT: vmv2r.v v8, v10
; SPILL-O0-NEXT: addi a0, sp, 16
@@ -247,8 +247,8 @@ define <vscale x 8 x i32> @spill_zvlsseg_nxv8i32(ptr %base, i64 %vl) nounwind {
; SPILL-O0-NEXT: csrr a2, vlenb
; SPILL-O0-NEXT: slli a2, a2, 2
; SPILL-O0-NEXT: sub sp, sp, a2
-; SPILL-O0-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; SPILL-O0-NEXT: # implicit-def: $v8m4_v12m4
+; SPILL-O0-NEXT: vsetvli zero, a1, e32, m4, tu, ma
; SPILL-O0-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O0-NEXT: vmv4r.v v8, v12
; SPILL-O0-NEXT: addi a0, sp, 16
@@ -327,8 +327,8 @@ define <vscale x 4 x i32> @spill_zvlsseg3_nxv4i32(ptr %base, i64 %vl) nounwind {
; SPILL-O0-NEXT: csrr a2, vlenb
; SPILL-O0-NEXT: slli a2, a2, 1
; SPILL-O0-NEXT: sub sp, sp, a2
-; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; SPILL-O0-NEXT: # implicit-def: $v8m2_v10m2_v12m2
+; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, tu, ma
; SPILL-O0-NEXT: vlseg3e32.v v8, (a0)
; SPILL-O0-NEXT: vmv2r.v v8, v10
; SPILL-O0-NEXT: addi a0, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
index 1a3a1a6..743016a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
@@ -893,10 +893,10 @@ define void @test_dag_loop() {
; CHECK-LABEL: test_dag_loop:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma
-; CHECK-NEXT: vmclr.m v0
; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmclr.m v0
+; CHECK-NEXT: vmv.v.i v12, 0
; CHECK-NEXT: vsetivli zero, 0, e8, m4, tu, mu
-; CHECK-NEXT: vmv4r.v v12, v8
; CHECK-NEXT: vssubu.vx v12, v8, zero, v0.t
; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma
; CHECK-NEXT: vmseq.vv v0, v12, v8
@@ -942,8 +942,8 @@ declare <vscale x 2 x i32> @llvm.riscv.vredsum.nxv2i32.nxv2i32(
define <vscale x 2 x i32> @vredsum(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 %vl) {
; CHECK-LABEL: vredsum:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vredsum.vs v11, v9, v10
; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0
@@ -967,8 +967,8 @@ define <vscale x 2 x float> @vfredusum(<vscale x 2 x float> %passthru, <vscale x
; CHECK-LABEL: vfredusum:
; CHECK: # %bb.0:
; CHECK-NEXT: fsrmi a1, 0
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vfredusum.vs v11, v9, v10
; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0
diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
index 6186723..8a297db 100644
--- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
@@ -255,9 +255,9 @@ define void @sink_splat_mul_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-NEXT: andi a4, a3, 1024
; CHECK-NEXT: xori a3, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
-; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: mv a6, a0
; CHECK-NEXT: mv a7, a3
+; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB7_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
@@ -346,9 +346,9 @@ define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-NEXT: andi a4, a3, 1024
; CHECK-NEXT: xori a3, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
-; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: mv a6, a0
; CHECK-NEXT: mv a7, a3
+; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB8_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
@@ -437,9 +437,9 @@ define void @sink_splat_sub_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-NEXT: andi a4, a3, 1024
; CHECK-NEXT: xori a3, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
-; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: mv a6, a0
; CHECK-NEXT: mv a7, a3
+; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB9_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
@@ -528,9 +528,9 @@ define void @sink_splat_rsub_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-NEXT: andi a4, a3, 1024
; CHECK-NEXT: xori a3, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
-; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: mv a6, a0
; CHECK-NEXT: mv a7, a3
+; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB10_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
@@ -619,9 +619,9 @@ define void @sink_splat_and_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-NEXT: andi a4, a3, 1024
; CHECK-NEXT: xori a3, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
-; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: mv a6, a0
; CHECK-NEXT: mv a7, a3
+; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB11_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
@@ -710,9 +710,9 @@ define void @sink_splat_or_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-NEXT: andi a4, a3, 1024
; CHECK-NEXT: xori a3, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
-; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: mv a6, a0
; CHECK-NEXT: mv a7, a3
+; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB12_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
@@ -801,9 +801,9 @@ define void @sink_splat_xor_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-NEXT: andi a4, a3, 1024
; CHECK-NEXT: xori a3, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
-; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: mv a6, a0
; CHECK-NEXT: mv a7, a3
+; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB13_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
@@ -994,9 +994,9 @@ define void @sink_splat_shl_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-NEXT: andi a4, a3, 1024
; CHECK-NEXT: xori a3, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
-; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: mv a6, a0
; CHECK-NEXT: mv a7, a3
+; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB17_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
@@ -1085,9 +1085,9 @@ define void @sink_splat_lshr_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-NEXT: andi a4, a3, 1024
; CHECK-NEXT: xori a3, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
-; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: mv a6, a0
; CHECK-NEXT: mv a7, a3
+; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB18_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
@@ -1176,9 +1176,9 @@ define void @sink_splat_ashr_scalable(ptr nocapture %a) {
; CHECK-NEXT: andi a3, a1, 1024
; CHECK-NEXT: xori a1, a3, 1024
; CHECK-NEXT: slli a4, a4, 1
-; CHECK-NEXT: vsetvli a5, zero, e32, m2, ta, ma
; CHECK-NEXT: mv a5, a0
; CHECK-NEXT: mv a6, a1
+; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB19_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a5)
@@ -1468,9 +1468,9 @@ define void @sink_splat_fmul_scalable(ptr nocapture %a, float %x) {
; CHECK-NEXT: addi a3, a2, -1
; CHECK-NEXT: andi a4, a3, 1024
; CHECK-NEXT: xori a3, a4, 1024
-; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
; CHECK-NEXT: mv a5, a0
; CHECK-NEXT: mv a6, a3
+; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma
; CHECK-NEXT: .LBB26_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl1re32.v v8, (a5)
@@ -1558,9 +1558,9 @@ define void @sink_splat_fdiv_scalable(ptr nocapture %a, float %x) {
; CHECK-NEXT: addi a3, a2, -1
; CHECK-NEXT: andi a4, a3, 1024
; CHECK-NEXT: xori a3, a4, 1024
-; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
; CHECK-NEXT: mv a5, a0
; CHECK-NEXT: mv a6, a3
+; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma
; CHECK-NEXT: .LBB27_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl1re32.v v8, (a5)
@@ -1648,9 +1648,9 @@ define void @sink_splat_frdiv_scalable(ptr nocapture %a, float %x) {
; CHECK-NEXT: addi a3, a2, -1
; CHECK-NEXT: andi a4, a3, 1024
; CHECK-NEXT: xori a3, a4, 1024
-; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
; CHECK-NEXT: mv a5, a0
; CHECK-NEXT: mv a6, a3
+; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma
; CHECK-NEXT: .LBB28_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl1re32.v v8, (a5)
@@ -1738,9 +1738,9 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
; CHECK-NEXT: addi a3, a2, -1
; CHECK-NEXT: andi a4, a3, 1024
; CHECK-NEXT: xori a3, a4, 1024
-; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
; CHECK-NEXT: mv a5, a0
; CHECK-NEXT: mv a6, a3
+; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma
; CHECK-NEXT: .LBB29_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl1re32.v v8, (a5)
@@ -1828,9 +1828,9 @@ define void @sink_splat_fsub_scalable(ptr nocapture %a, float %x) {
; CHECK-NEXT: addi a3, a2, -1
; CHECK-NEXT: andi a4, a3, 1024
; CHECK-NEXT: xori a3, a4, 1024
-; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
; CHECK-NEXT: mv a5, a0
; CHECK-NEXT: mv a6, a3
+; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma
; CHECK-NEXT: .LBB30_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl1re32.v v8, (a5)
@@ -1918,9 +1918,9 @@ define void @sink_splat_frsub_scalable(ptr nocapture %a, float %x) {
; CHECK-NEXT: addi a3, a2, -1
; CHECK-NEXT: andi a4, a3, 1024
; CHECK-NEXT: xori a3, a4, 1024
-; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
; CHECK-NEXT: mv a5, a0
; CHECK-NEXT: mv a6, a3
+; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma
; CHECK-NEXT: .LBB31_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl1re32.v v8, (a5)
@@ -2084,10 +2084,10 @@ define void @sink_splat_fma_scalable(ptr noalias nocapture %a, ptr noalias nocap
; CHECK-NEXT: addi a4, a3, -1
; CHECK-NEXT: andi a5, a4, 1024
; CHECK-NEXT: xori a4, a5, 1024
-; CHECK-NEXT: vsetvli a6, zero, e32, m1, ta, ma
; CHECK-NEXT: mv a6, a0
; CHECK-NEXT: mv a7, a1
; CHECK-NEXT: mv t0, a4
+; CHECK-NEXT: vsetvli t1, zero, e32, m1, ta, ma
; CHECK-NEXT: .LBB34_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl1re32.v v8, (a6)
@@ -2184,10 +2184,10 @@ define void @sink_splat_fma_commute_scalable(ptr noalias nocapture %a, ptr noali
; CHECK-NEXT: addi a4, a3, -1
; CHECK-NEXT: andi a5, a4, 1024
; CHECK-NEXT: xori a4, a5, 1024
-; CHECK-NEXT: vsetvli a6, zero, e32, m1, ta, ma
; CHECK-NEXT: mv a6, a0
; CHECK-NEXT: mv a7, a1
; CHECK-NEXT: mv t0, a4
+; CHECK-NEXT: vsetvli t1, zero, e32, m1, ta, ma
; CHECK-NEXT: .LBB35_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl1re32.v v8, (a6)
@@ -2498,9 +2498,9 @@ define void @sink_splat_udiv_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-NEXT: andi a4, a3, 1024
; CHECK-NEXT: xori a3, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
-; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: mv a6, a0
; CHECK-NEXT: mv a7, a3
+; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB42_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
@@ -2589,9 +2589,9 @@ define void @sink_splat_sdiv_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-NEXT: andi a4, a3, 1024
; CHECK-NEXT: xori a3, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
-; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: mv a6, a0
; CHECK-NEXT: mv a7, a3
+; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB43_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
@@ -2680,9 +2680,9 @@ define void @sink_splat_urem_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-NEXT: andi a4, a3, 1024
; CHECK-NEXT: xori a3, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
-; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: mv a6, a0
; CHECK-NEXT: mv a7, a3
+; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB44_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
@@ -2771,9 +2771,9 @@ define void @sink_splat_srem_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-NEXT: andi a4, a3, 1024
; CHECK-NEXT: xori a3, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
-; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: mv a6, a0
; CHECK-NEXT: mv a7, a3
+; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB45_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
diff --git a/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll b/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll
index f41a3ec..48c3059 100644
--- a/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll
@@ -161,8 +161,8 @@ declare <vscale x 8 x i8> @llvm.riscv.vrgatherei16.vv.nxv8i8.i64(<vscale x 8 x i
define void @repeat_shuffle(<2 x double> %v, ptr noalias %q) {
; CHECK-LABEL: repeat_shuffle:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vmv2r.v v10, v8
+; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vslideup.vi v10, v8, 2
; CHECK-NEXT: vse64.v v10, (a0)
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll b/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll
index 25e3468..439301f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll
@@ -711,8 +711,8 @@ define <vscale x 1 x i64> @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64(<vscale
; RV32: # %bb.0: # %entry
; RV32-NEXT: vsetvli a2, a2, e64, m1, ta, ma
; RV32-NEXT: slli a2, a2, 1
-; RV32-NEXT: vsetvli zero, a2, e32, m1, tu, ma
; RV32-NEXT: vmv1r.v v10, v8
+; RV32-NEXT: vsetvli zero, a2, e32, m1, tu, ma
; RV32-NEXT: vslide1down.vx v10, v9, a0
; RV32-NEXT: vslide1down.vx v8, v10, a1
; RV32-NEXT: ret
@@ -743,8 +743,8 @@ define <vscale x 1 x i64> @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64(<vscale x
; RV32: # %bb.0: # %entry
; RV32-NEXT: vsetvli a2, a2, e64, m1, ta, ma
; RV32-NEXT: slli a2, a2, 1
-; RV32-NEXT: vsetvli zero, a2, e32, m1, tu, ma
; RV32-NEXT: vmv1r.v v10, v8
+; RV32-NEXT: vsetvli zero, a2, e32, m1, tu, ma
; RV32-NEXT: vslide1up.vx v10, v9, a1
; RV32-NEXT: vslide1up.vx v8, v10, a0
; RV32-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll b/llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll
index 6435c1c..79bd60d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll
@@ -120,8 +120,8 @@ entry:
define <vscale x 1 x i8> @vadd_vv_passthru(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, i32 %2) nounwind {
; CHECK-LABEL: vadd_vv_passthru:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma
; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma
; CHECK-NEXT: vadd.vv v10, v8, v9
; CHECK-NEXT: vmv1r.v v9, v8
; CHECK-NEXT: vadd.vv v9, v8, v8
@@ -152,8 +152,8 @@ entry:
define <vscale x 1 x i8> @vadd_vv_passthru_negative(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, i32 %2) nounwind {
; CHECK-LABEL: vadd_vv_passthru_negative:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma
; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma
; CHECK-NEXT: vadd.vv v10, v8, v9
; CHECK-NEXT: vadd.vv v9, v8, v10
; CHECK-NEXT: vadd.vv v8, v8, v9
@@ -183,8 +183,8 @@ entry:
define <vscale x 1 x i8> @vadd_vv_mask(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, i32 %2, <vscale x 1 x i1> %m) nounwind {
; CHECK-LABEL: vadd_vv_mask:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
; CHECK-NEXT: vadd.vv v10, v8, v9, v0.t
; CHECK-NEXT: vmv1r.v v9, v8
; CHECK-NEXT: vadd.vv v9, v8, v8, v0.t
@@ -218,8 +218,8 @@ entry:
define <vscale x 1 x i8> @vadd_vv_mask_negative(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, i32 %2, <vscale x 1 x i1> %m, <vscale x 1 x i1> %m2) nounwind {
; CHECK-LABEL: vadd_vv_mask_negative:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
; CHECK-NEXT: vadd.vv v11, v8, v9, v0.t
; CHECK-NEXT: vmv1r.v v9, v8
; CHECK-NEXT: vadd.vv v9, v8, v11, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll
index fab76ac..78f3792 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll
@@ -85,8 +85,8 @@ define <vscale x 1 x float> @vfmacc_vv_nxv1f32_tu(<vscale x 1 x half> %a, <vscal
define <vscale x 1 x float> @vfmacc_vv_nxv1f32_masked__tu(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vfmacc_vv_nxv1f32_masked__tu:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFH-NEXT: vmv1r.v v11, v10
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFH-NEXT: vfwmacc.vv v11, v8, v9, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e32, mf2, tu, ma
; ZVFH-NEXT: vmerge.vvm v10, v10, v11, v0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll b/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll
index 0c0a3dc..462d499 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll
@@ -16,14 +16,14 @@ define internal void @foo(<vscale x 1 x i16> %v15, <vscale x 1 x i16> %0, <vscal
; NOSUBREG-NEXT: vmv.v.i v14, 0
; NOSUBREG-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; NOSUBREG-NEXT: vmv.v.i v9, 0
+; NOSUBREG-NEXT: vmv.v.i v8, 0
; NOSUBREG-NEXT: vsetivli zero, 4, e8, m1, tu, ma
-; NOSUBREG-NEXT: vmv1r.v v8, v9
; NOSUBREG-NEXT: vrgatherei16.vv v8, v9, v14
; NOSUBREG-NEXT: .LBB0_1: # %loopIR3.i.i
; NOSUBREG-NEXT: # =>This Inner Loop Header: Depth=1
; NOSUBREG-NEXT: vl1r.v v9, (zero)
-; NOSUBREG-NEXT: vsetivli zero, 4, e8, m1, tu, ma
; NOSUBREG-NEXT: vmv1r.v v13, v12
+; NOSUBREG-NEXT: vsetivli zero, 4, e8, m1, tu, ma
; NOSUBREG-NEXT: vrgatherei16.vv v13, v9, v10
; NOSUBREG-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; NOSUBREG-NEXT: vand.vv v9, v8, v13
@@ -36,14 +36,14 @@ define internal void @foo(<vscale x 1 x i16> %v15, <vscale x 1 x i16> %0, <vscal
; SUBREG-NEXT: vmv.v.i v14, 0
; SUBREG-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; SUBREG-NEXT: vmv.v.i v9, 0
+; SUBREG-NEXT: vmv.v.i v8, 0
; SUBREG-NEXT: vsetivli zero, 4, e8, m1, tu, ma
-; SUBREG-NEXT: vmv1r.v v8, v9
; SUBREG-NEXT: vrgatherei16.vv v8, v9, v14
; SUBREG-NEXT: .LBB0_1: # %loopIR3.i.i
; SUBREG-NEXT: # =>This Inner Loop Header: Depth=1
; SUBREG-NEXT: vl1r.v v9, (zero)
-; SUBREG-NEXT: vsetivli zero, 4, e8, m1, tu, ma
; SUBREG-NEXT: vmv1r.v v13, v12
+; SUBREG-NEXT: vsetivli zero, 4, e8, m1, tu, ma
; SUBREG-NEXT: vrgatherei16.vv v13, v9, v10
; SUBREG-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; SUBREG-NEXT: vand.vv v9, v8, v13
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
index 088d121..25aa3a7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
@@ -91,13 +91,11 @@ define <vscale x 1 x double> @test3(i64 %avl, i8 zeroext %cond, <vscale x 1 x do
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vfadd.vv v9, v8, v9
; CHECK-NEXT: vfmul.vv v8, v9, v8
-; CHECK-NEXT: # implicit-def: $x10
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB2_2: # %if.else
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vfsub.vv v9, v8, v9
; CHECK-NEXT: vfmul.vv v8, v9, v8
-; CHECK-NEXT: # implicit-def: $x10
; CHECK-NEXT: ret
entry:
%tobool = icmp eq i8 %cond, 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir
index ef83440..295d4c5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir
@@ -1,6 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc %s -o - -mtriple=riscv64 -mattr=v \
-# RUN: -run-pass=riscv-insert-vsetvli | FileCheck %s
+# RUN: -run-pass=phi-node-elimination,register-coalescer,riscv-insert-vsetvli | FileCheck %s
--- |
source_filename = "vsetvli-insert.ll"
@@ -191,8 +191,7 @@ body: |
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY]], 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
; CHECK-NEXT: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 undef $noreg, [[COPY2]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY $x0
- ; CHECK-NEXT: BEQ [[COPY3]], [[COPY4]], %bb.2
+ ; CHECK-NEXT: BEQ [[COPY3]], $x0, %bb.2
; CHECK-NEXT: PseudoBR %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.if.then:
@@ -204,11 +203,10 @@ body: |
; CHECK-NEXT: bb.2.if.else:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PseudoVSUB_VV_M1_:%[0-9]+]]:vr = PseudoVSUB_VV_M1 undef $noreg, [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVSUB_VV_M1 undef $noreg, [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3.if.end:
- ; CHECK-NEXT: [[PHI:%[0-9]+]]:vr = PHI [[PseudoVADD_VV_M1_]], %bb.1, [[PseudoVSUB_VV_M1_]], %bb.2
- ; CHECK-NEXT: $v8 = COPY [[PHI]]
+ ; CHECK-NEXT: $v8 = COPY [[PseudoVADD_VV_M1_]]
; CHECK-NEXT: PseudoRET implicit $v8
bb.0.entry:
successors: %bb.2(0x30000000), %bb.1(0x50000000)
@@ -270,26 +268,24 @@ body: |
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY]], 215 /* e32, mf2, ta, ma */, implicit-def $vl, implicit-def $vtype
; CHECK-NEXT: [[PseudoVLE32_V_MF2_:%[0-9]+]]:vr = PseudoVLE32_V_MF2 undef $noreg, [[COPY2]], $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY $x0
- ; CHECK-NEXT: BEQ [[COPY3]], [[COPY4]], %bb.2
+ ; CHECK-NEXT: BEQ [[COPY3]], $x0, %bb.2
; CHECK-NEXT: PseudoBR %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.if.then:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype, implicit $vl
- ; CHECK-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 undef $noreg, [[PseudoVLE32_V_MF2_]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: early-clobber %9:vr = PseudoVZEXT_VF2_M1 undef $noreg, [[PseudoVLE32_V_MF2_]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
; CHECK-NEXT: PseudoBR %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.if.else:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype, implicit $vl
- ; CHECK-NEXT: early-clobber %2:vr = PseudoVSEXT_VF2_M1 undef $noreg, [[PseudoVLE32_V_MF2_]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: early-clobber %9:vr = PseudoVSEXT_VF2_M1 undef $noreg, [[PseudoVLE32_V_MF2_]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3.if.end:
- ; CHECK-NEXT: [[PHI:%[0-9]+]]:vr = PHI %1, %bb.1, %2, %bb.2
- ; CHECK-NEXT: PseudoVSE64_V_M1 [[PHI]], [[COPY1]], $noreg, 6 /* e64 */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: PseudoVSE64_V_M1 %9, [[COPY1]], $noreg, 6 /* e64 */, implicit $vl, implicit $vtype
; CHECK-NEXT: PseudoRET
bb.0.entry:
successors: %bb.2(0x30000000), %bb.1(0x50000000)
@@ -349,8 +345,7 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v9
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v8
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x10
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY $x0
- ; CHECK-NEXT: BEQ [[COPY3]], [[COPY4]], %bb.2
+ ; CHECK-NEXT: BEQ [[COPY3]], $x0, %bb.2
; CHECK-NEXT: PseudoBR %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.if.then:
@@ -364,11 +359,10 @@ body: |
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY]], 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
- ; CHECK-NEXT: [[PseudoVSUB_VV_M1_:%[0-9]+]]:vr = PseudoVSUB_VV_M1 undef $noreg, [[COPY1]], [[COPY1]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVSUB_VV_M1 undef $noreg, [[COPY1]], [[COPY1]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3.if.end:
- ; CHECK-NEXT: [[PHI:%[0-9]+]]:vr = PHI [[PseudoVADD_VV_M1_]], %bb.1, [[PseudoVSUB_VV_M1_]], %bb.2
- ; CHECK-NEXT: [[PseudoVMV_X_S:%[0-9]+]]:gpr = PseudoVMV_X_S [[PHI]], 6 /* e64 */, implicit $vtype
+ ; CHECK-NEXT: [[PseudoVMV_X_S:%[0-9]+]]:gpr = PseudoVMV_X_S [[PseudoVADD_VV_M1_]], 6 /* e64 */, implicit $vtype
; CHECK-NEXT: $x10 = COPY [[PseudoVMV_X_S]]
; CHECK-NEXT: PseudoRET implicit $x10
bb.0.entry:
@@ -429,9 +423,8 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v9
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v8
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x10
- ; CHECK-NEXT: [[PseudoVSETVLI:%[0-9]+]]:gprnox0 = PseudoVSETVLI [[COPY]], 88 /* e64, m1, ta, mu */, implicit-def $vl, implicit-def $vtype
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY $x0
- ; CHECK-NEXT: BEQ [[COPY3]], [[COPY4]], %bb.2
+ ; CHECK-NEXT: dead [[PseudoVSETVLI:%[0-9]+]]:gprnox0 = PseudoVSETVLI [[COPY]], 88 /* e64, m1, ta, mu */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: BEQ [[COPY3]], $x0, %bb.2
; CHECK-NEXT: PseudoBR %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.if.then:
@@ -443,11 +436,10 @@ body: |
; CHECK-NEXT: bb.2.if.else:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PseudoVSUB_VV_M1_:%[0-9]+]]:vr = PseudoVSUB_VV_M1 undef $noreg, [[COPY2]], [[COPY1]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVSUB_VV_M1 undef $noreg, [[COPY2]], [[COPY1]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3.if.end:
- ; CHECK-NEXT: [[PHI:%[0-9]+]]:vr = PHI [[PseudoVADD_VV_M1_]], %bb.1, [[PseudoVSUB_VV_M1_]], %bb.2
- ; CHECK-NEXT: $v8 = COPY [[PHI]]
+ ; CHECK-NEXT: $v8 = COPY [[PseudoVADD_VV_M1_]]
; CHECK-NEXT: PseudoRET implicit $v8
bb.0.entry:
successors: %bb.2(0x30000000), %bb.1(0x50000000)
@@ -499,7 +491,6 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x11
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x10
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr = IMPLICIT_DEF
; CHECK-NEXT: dead [[PseudoVSETVLIX0_:%[0-9]+]]:gpr = PseudoVSETVLIX0 killed $x0, 223 /* e64, mf2, ta, ma */, implicit-def $vl, implicit-def $vtype
; CHECK-NEXT: [[PseudoVID_V_MF2_:%[0-9]+]]:vr = PseudoVID_V_MF2 undef $noreg, -1, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
; CHECK-NEXT: dead [[PseudoVSETVLIX0_1:%[0-9]+]]:gpr = PseudoVSETVLIX0 killed $x0, 215 /* e32, mf2, ta, ma */, implicit-def $vl, implicit-def $vtype
@@ -508,25 +499,24 @@ body: |
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PseudoVMSEQ_VI_MF2_:%[0-9]+]]:vmv0 = PseudoVMSEQ_VI_MF2 killed [[PseudoVID_V_MF2_]], 0, -1, 5 /* e32 */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: [[PseudoVMSEQ_VI_MF2_:%[0-9]+]]:vmv0 = PseudoVMSEQ_VI_MF2 [[PseudoVID_V_MF2_]], 0, -1, 5 /* e32 */, implicit $vl, implicit $vtype
; CHECK-NEXT: $v0 = COPY [[PseudoVMSEQ_VI_MF2_]]
; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 23 /* e32, mf2, tu, mu */, implicit-def $vl, implicit-def $vtype, implicit $vl
- ; CHECK-NEXT: [[PseudoVLE32_V_MF2_MASK:%[0-9]+]]:vrnov0 = PseudoVLE32_V_MF2_MASK [[PseudoVMV_V_I_MF2_]], killed [[COPY]], $v0, -1, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: [[PseudoVLE32_V_MF2_MASK:%[0-9]+]]:vrnov0 = PseudoVLE32_V_MF2_MASK [[PseudoVMV_V_I_MF2_]], [[COPY]], $v0, -1, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 197 /* e8, mf8, ta, ma */, implicit-def $vl, implicit-def $vtype, implicit $vl
; CHECK-NEXT: [[PseudoVCPOP_M_B1_:%[0-9]+]]:gpr = PseudoVCPOP_M_B1 [[PseudoVMSEQ_VI_MF2_]], -1, 0 /* e8 */, implicit $vl, implicit $vtype
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0
- ; CHECK-NEXT: BEQ killed [[PseudoVCPOP_M_B1_]], [[COPY2]], %bb.3
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr = IMPLICIT_DEF
+ ; CHECK-NEXT: BEQ [[PseudoVCPOP_M_B1_]], $x0, %bb.3
; CHECK-NEXT: PseudoBR %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[LWU:%[0-9]+]]:gpr = LWU [[COPY1]], 0
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr = LWU [[COPY1]], 0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
- ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr = PHI [[DEF]], %bb.1, [[LWU]], %bb.2
; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 215 /* e32, mf2, ta, ma */, implicit-def $vl, implicit-def $vtype, implicit $vl
- ; CHECK-NEXT: [[PseudoVADD_VX_MF2_:%[0-9]+]]:vr = nsw PseudoVADD_VX_MF2 undef $noreg, [[PseudoVLE32_V_MF2_MASK]], [[PHI]], -1, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: [[PseudoVADD_VX_MF2_:%[0-9]+]]:vr = nsw PseudoVADD_VX_MF2 undef $noreg, [[PseudoVLE32_V_MF2_MASK]], [[DEF]], -1, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
; CHECK-NEXT: $v0 = COPY [[PseudoVADD_VX_MF2_]]
; CHECK-NEXT: PseudoRET implicit $v0
bb.0:
@@ -593,13 +583,12 @@ body: |
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr = PHI [[COPY2]], %bb.0, %10, %bb.1
- ; CHECK-NEXT: [[PseudoVADD_VX_M1_:%[0-9]+]]:vr = PseudoVADD_VX_M1 undef $noreg, [[PseudoVID_V_M1_]], [[PHI]], -1, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
- ; CHECK-NEXT: [[MUL:%[0-9]+]]:gpr = MUL [[PHI]], [[SRLI]]
+ ; CHECK-NEXT: [[PseudoVADD_VX_M1_:%[0-9]+]]:vr = PseudoVADD_VX_M1 undef $noreg, [[PseudoVID_V_M1_]], [[COPY2]], -1, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: [[MUL:%[0-9]+]]:gpr = MUL [[COPY2]], [[SRLI]]
; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[COPY]], [[MUL]]
- ; CHECK-NEXT: PseudoVSE32_V_MF2 killed [[PseudoVADD_VX_M1_]], killed [[ADD]], -1, 5 /* e32 */, implicit $vl, implicit $vtype
- ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI [[PHI]], 1
- ; CHECK-NEXT: BLTU [[ADDI]], [[COPY1]], %bb.1
+ ; CHECK-NEXT: PseudoVSE32_V_MF2 [[PseudoVADD_VX_M1_]], [[ADD]], -1, 5 /* e32 */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = ADDI [[COPY2]], 1
+ ; CHECK-NEXT: BLTU [[COPY2]], [[COPY1]], %bb.1
; CHECK-NEXT: PseudoBR %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
@@ -661,17 +650,16 @@ body: |
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr = PHI [[COPY2]], %bb.0, %10, %bb.2
- ; CHECK-NEXT: [[PseudoVADD_VX_M1_:%[0-9]+]]:vr = PseudoVADD_VX_M1 undef $noreg, [[PseudoVID_V_M1_]], [[PHI]], -1, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
- ; CHECK-NEXT: [[MUL:%[0-9]+]]:gpr = MUL [[PHI]], [[SRLI]]
+ ; CHECK-NEXT: [[PseudoVADD_VX_M1_:%[0-9]+]]:vr = PseudoVADD_VX_M1 undef $noreg, [[PseudoVID_V_M1_]], [[COPY2]], -1, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: [[MUL:%[0-9]+]]:gpr = MUL [[COPY2]], [[SRLI]]
; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[COPY]], [[MUL]]
- ; CHECK-NEXT: PseudoVSE32_V_MF2 killed [[PseudoVADD_VX_M1_]], killed [[ADD]], -1, 5 /* e32 */, implicit $vl, implicit $vtype
- ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI [[PHI]], 1
+ ; CHECK-NEXT: PseudoVSE32_V_MF2 [[PseudoVADD_VX_M1_]], [[ADD]], -1, 5 /* e32 */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = ADDI [[COPY2]], 1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: BLTU [[ADDI]], [[COPY1]], %bb.1
+ ; CHECK-NEXT: BLTU [[COPY2]], [[COPY1]], %bb.1
; CHECK-NEXT: PseudoBR %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
@@ -750,31 +738,24 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 4, 208 /* e32, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
; CHECK-NEXT: [[PseudoVMV_V_I_M1_:%[0-9]+]]:vr = PseudoVMV_V_I_M1 undef $noreg, 0, 4, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vr = COPY [[PseudoVMV_V_I_M1_]]
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vr = COPY [[COPY2]]
; CHECK-NEXT: [[LUI:%[0-9]+]]:gpr = LUI 1
- ; CHECK-NEXT: [[ADDIW:%[0-9]+]]:gpr = ADDIW killed [[LUI]], -2048
+ ; CHECK-NEXT: [[ADDIW:%[0-9]+]]:gpr = ADDIW [[LUI]], -2048
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.vector.body:
; CHECK-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr = PHI [[COPY1]], %bb.0, %5, %bb.1
- ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gpr = PHI [[ADDIW]], %bb.0, %4, %bb.1
- ; CHECK-NEXT: [[PHI2:%[0-9]+]]:vr = PHI [[COPY3]], %bb.0, %16, %bb.1
- ; CHECK-NEXT: [[PseudoVLE32_V_M1_:%[0-9]+]]:vr = PseudoVLE32_V_M1 undef $noreg, [[PHI]], 4, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype :: (load (s128) from %ir.lsr.iv12, align 4)
- ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 undef $noreg, killed [[PseudoVLE32_V_M1_]], [[PHI2]], 4, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
- ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = nsw ADDI [[PHI1]], -4
- ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI [[PHI]], 16
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY $x0
- ; CHECK-NEXT: BNE [[ADDI]], [[COPY4]], %bb.1
+ ; CHECK-NEXT: [[PseudoVLE32_V_M1_:%[0-9]+]]:vr = PseudoVLE32_V_M1 undef $noreg, [[COPY1]], 4, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype :: (load (s128) from %ir.lsr.iv12, align 4)
+ ; CHECK-NEXT: [[PseudoVMV_V_I_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 undef $noreg, [[PseudoVLE32_V_M1_]], [[PseudoVMV_V_I_M1_]], 4, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: [[ADDIW:%[0-9]+]]:gpr = nsw ADDI [[ADDIW]], -4
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = ADDI [[COPY1]], 16
+ ; CHECK-NEXT: BNE [[ADDIW]], $x0, %bb.1
; CHECK-NEXT: PseudoBR %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.middle.block:
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr = COPY $x0
- ; CHECK-NEXT: [[PseudoVMV_S_X:%[0-9]+]]:vr = PseudoVMV_S_X undef $noreg, [[COPY5]], 1, 5 /* e32 */, implicit $vl, implicit $vtype
- ; CHECK-NEXT: [[PseudoVREDSUM_VS_M1_E8_:%[0-9]+]]:vr = PseudoVREDSUM_VS_M1_E8 undef $noreg, [[PseudoVADD_VV_M1_]], killed [[PseudoVMV_S_X]], 4, 5 /* e32 */, 1 /* ta, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: [[PseudoVMV_S_X:%[0-9]+]]:vr = PseudoVMV_S_X undef $noreg, $x0, 1, 5 /* e32 */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: [[PseudoVREDSUM_VS_M1_E8_:%[0-9]+]]:vr = PseudoVREDSUM_VS_M1_E8 undef $noreg, [[PseudoVMV_V_I_M1_]], [[PseudoVMV_S_X]], 4, 5 /* e32 */, 1 /* ta, mu */, implicit $vl, implicit $vtype
; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 1, 208 /* e32, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
- ; CHECK-NEXT: PseudoVSE32_V_M1 killed [[PseudoVREDSUM_VS_M1_E8_]], [[COPY]], 1, 5 /* e32 */, implicit $vl, implicit $vtype :: (store (s32) into %ir.res)
+ ; CHECK-NEXT: PseudoVSE32_V_M1 [[PseudoVREDSUM_VS_M1_E8_]], [[COPY]], 1, 5 /* e32 */, implicit $vl, implicit $vtype :: (store (s32) into %ir.res)
; CHECK-NEXT: PseudoRET
bb.0.entry:
liveins: $x10, $x12
@@ -824,7 +805,7 @@ body: |
; CHECK-NEXT: %idxs:vr = COPY $v0
; CHECK-NEXT: %t1:vr = COPY $v1
; CHECK-NEXT: %t3:vr = COPY $v2
- ; CHECK-NEXT: %t4:vr = COPY $v3
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vrnov0 = COPY $v3
; CHECK-NEXT: %t5:vrnov0 = COPY $v1
; CHECK-NEXT: dead [[PseudoVSETVLIX0_:%[0-9]+]]:gpr = PseudoVSETVLIX0 killed $x0, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
; CHECK-NEXT: %t6:vr = PseudoVMSEQ_VI_M1 %t1, 0, -1, 6 /* e64 */, implicit $vl, implicit $vtype
@@ -834,8 +815,7 @@ body: |
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %mask:vr = PseudoVMANDN_MM_MF8 %t6, %t3, -1, 0 /* e8 */, implicit $vl, implicit $vtype
- ; CHECK-NEXT: %t2:gpr = COPY $x0
- ; CHECK-NEXT: BEQ %a, %t2, %bb.3
+ ; CHECK-NEXT: BEQ %a, $x0, %bb.3
; CHECK-NEXT: PseudoBR %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
@@ -843,15 +823,13 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $v0 = COPY %mask
; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 69 /* e8, mf8, ta, mu */, implicit-def $vl, implicit-def $vtype, implicit $vl
- ; CHECK-NEXT: early-clobber %t0:vrnov0 = PseudoVLUXEI64_V_M1_MF8_MASK %t5, killed %inaddr, %idxs, $v0, -1, 3 /* e8 */, 1 /* ta, mu */, implicit $vl, implicit $vtype
- ; CHECK-NEXT: %ldval:vr = COPY %t0
+ ; CHECK-NEXT: early-clobber [[COPY]]:vrnov0 = PseudoVLUXEI64_V_M1_MF8_MASK %t5, %inaddr, %idxs, $v0, -1, 3 /* e8 */, 1 /* ta, mu */, implicit $vl, implicit $vtype
; CHECK-NEXT: PseudoBR %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
- ; CHECK-NEXT: %stval:vr = PHI %t4, %bb.1, %ldval, %bb.2
; CHECK-NEXT: $v0 = COPY %mask
; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 197 /* e8, mf8, ta, ma */, implicit-def $vl, implicit-def $vtype, implicit $vl
- ; CHECK-NEXT: PseudoVSOXEI64_V_M1_MF8_MASK killed %stval, killed %b, %idxs, $v0, -1, 3 /* e8 */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: PseudoVSOXEI64_V_M1_MF8_MASK [[COPY]], %b, %idxs, $v0, -1, 3 /* e8 */, implicit $vl, implicit $vtype
; CHECK-NEXT: PseudoRET
bb.0:
successors: %bb.1
@@ -902,12 +880,12 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %dst:gpr = COPY $x10
; CHECK-NEXT: %src:gpr = COPY $x11
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x12
+ ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:gpr = COPY $x12
; CHECK-NEXT: %tc:gpr = COPY $x13
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x14
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x15
; CHECK-NEXT: %vlenb:gpr = PseudoReadVLENB
- ; CHECK-NEXT: %inc:gpr = SRLI killed %vlenb, 3
+ ; CHECK-NEXT: %inc:gpr = SRLI %vlenb, 3
; CHECK-NEXT: dead [[PseudoVSETVLIX0_:%[0-9]+]]:gpr = PseudoVSETVLIX0 killed $x0, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
; CHECK-NEXT: [[PseudoVID_V_M1_:%[0-9]+]]:vr = PseudoVID_V_M1 undef $noreg, -1, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x0
@@ -916,31 +894,29 @@ body: |
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr = PHI [[COPY3]], %bb.0, %11, %bb.3
- ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[COPY2]], [[PHI]]
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[COPY2]], [[COPY3]]
; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype, implicit $vl
- ; CHECK-NEXT: [[PseudoVADD_VX_M1_:%[0-9]+]]:vr = PseudoVADD_VX_M1 undef $noreg, [[PseudoVID_V_M1_]], killed [[ADD]], -1, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: [[PseudoVADD_VX_M1_:%[0-9]+]]:vr = PseudoVADD_VX_M1 undef $noreg, [[PseudoVID_V_M1_]], [[ADD]], -1, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
; CHECK-NEXT: [[PseudoVMSLTU_VX_M1_:%[0-9]+]]:vr = PseudoVMSLTU_VX_M1 [[PseudoVADD_VX_M1_]], [[COPY1]], -1, 6 /* e64 */, implicit $vl, implicit $vtype
; CHECK-NEXT: [[PseudoVCPOP_M_B1_:%[0-9]+]]:gpr = PseudoVCPOP_M_B1 [[PseudoVMSLTU_VX_M1_]], -1, 0 /* e8 */, implicit $vl, implicit $vtype
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY $x0
- ; CHECK-NEXT: BEQ killed [[PseudoVCPOP_M_B1_]], [[COPY4]], %bb.3
+ ; CHECK-NEXT: BEQ [[PseudoVCPOP_M_B1_]], $x0, %bb.3
; CHECK-NEXT: PseudoBR %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[ADD1:%[0-9]+]]:gpr = ADD %src, [[PHI]]
- ; CHECK-NEXT: [[PseudoVLE8_V_MF8_:%[0-9]+]]:vrnov0 = PseudoVLE8_V_MF8 undef $noreg, killed [[ADD1]], -1, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: [[ADD1:%[0-9]+]]:gpr = ADD %src, [[COPY3]]
+ ; CHECK-NEXT: [[PseudoVLE8_V_MF8_:%[0-9]+]]:vrnov0 = PseudoVLE8_V_MF8 undef $noreg, [[ADD1]], -1, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 197 /* e8, mf8, ta, ma */, implicit-def $vl, implicit-def $vtype, implicit $vl
; CHECK-NEXT: [[PseudoVADD_VI_MF8_:%[0-9]+]]:vrnov0 = PseudoVADD_VI_MF8 undef $noreg, [[PseudoVLE8_V_MF8_]], 4, -1, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
- ; CHECK-NEXT: [[ADD2:%[0-9]+]]:gpr = ADD %dst, [[PHI]]
- ; CHECK-NEXT: PseudoVSE8_V_MF8 killed [[PseudoVADD_VI_MF8_]], killed [[ADD2]], -1, 3 /* e8 */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: [[ADD2:%[0-9]+]]:gpr = ADD %dst, [[COPY3]]
+ ; CHECK-NEXT: PseudoVSE8_V_MF8 [[PseudoVADD_VI_MF8_]], [[ADD2]], -1, 3 /* e8 */, implicit $vl, implicit $vtype
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.4(0x04000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[ADD3:%[0-9]+]]:gpr = ADD [[PHI]], %inc
- ; CHECK-NEXT: BLTU [[ADD3]], %tc, %bb.1
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr = ADD [[COPY3]], %inc
+ ; CHECK-NEXT: BLTU [[COPY3]], %tc, %bb.1
; CHECK-NEXT: PseudoBR %bb.4
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
@@ -1006,7 +982,7 @@ body: |
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 1, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
- ; CHECK-NEXT: %x:gpr = PseudoVMV_X_S undef $noreg, 6 /* e64 */, implicit $vtype
+ ; CHECK-NEXT: dead %x:gpr = PseudoVMV_X_S undef $noreg, 6 /* e64 */, implicit $vtype
; CHECK-NEXT: PseudoBR %bb.1
bb.0:
PseudoBR %bb.1
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
index 29ce7c5..5b09aae 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
@@ -352,15 +352,13 @@ entry:
define <vscale x 1 x double> @test18(<vscale x 1 x double> %a, double %b) nounwind {
; CHECK-LABEL: test18:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 6, e64, m1, tu, ma
-; CHECK-NEXT: vmv1r.v v9, v8
-; CHECK-NEXT: vfmv.s.f v9, fa0
-; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT: vfadd.vv v8, v8, v8
+; CHECK-NEXT: vsetivli zero, 6, e64, m1, ta, ma
+; CHECK-NEXT: vfadd.vv v9, v8, v8
; CHECK-NEXT: vsetvli zero, zero, e64, m1, tu, ma
; CHECK-NEXT: vfmv.s.f v8, fa0
+; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vfadd.vv v8, v9, v8
+; CHECK-NEXT: vfadd.vv v8, v8, v9
; CHECK-NEXT: ret
entry:
%x = tail call i64 @llvm.riscv.vsetvli(i64 6, i64 3, i64 0)
@@ -380,8 +378,8 @@ entry:
define <vscale x 1 x double> @test19(<vscale x 1 x double> %a, double %b) nounwind {
; CHECK-LABEL: test19:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 2, e64, m1, tu, ma
; CHECK-NEXT: vmv1r.v v9, v8
+; CHECK-NEXT: vsetivli zero, 2, e64, m1, tu, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vfadd.vv v8, v9, v8