aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/RISCV
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/RISCV')
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp73
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h9
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp19
-rw-r--r--llvm/lib/Target/RISCV/RISCVCallingConv.td14
-rw-r--r--llvm/lib/Target/RISCV/RISCVFrameLowering.cpp53
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp89
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h15
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp32
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.h6
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrFormats.td1
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoP.td55
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoV.td4
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td4
-rw-r--r--llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp224
-rw-r--r--llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp96
-rw-r--r--llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp12
-rw-r--r--llvm/lib/Target/RISCV/RISCVSubtarget.cpp6
-rw-r--r--llvm/lib/Target/RISCV/RISCVSubtarget.h6
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp28
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h4
-rw-r--r--llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp9
-rw-r--r--llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp9
22 files changed, 429 insertions, 339 deletions
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index f76f8b3..2c37c3b 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -302,6 +302,28 @@ void RISCVAsmBackend::relaxInstruction(MCInst &Inst,
Inst = std::move(Res);
}
+// Check if an R_RISCV_ALIGN relocation is needed for an alignment directive.
+// If conditions are met, compute the padding size and create a fixup encoding
+// the padding size in the addend.
+bool RISCVAsmBackend::relaxAlign(MCFragment &F, unsigned &Size) {
+ // Use default handling unless linker relaxation is enabled and the alignment
+ // is larger than the nop size.
+ const MCSubtargetInfo *STI = F.getSubtargetInfo();
+ if (!STI->hasFeature(RISCV::FeatureRelax))
+ return false;
+ unsigned MinNopLen = STI->hasFeature(RISCV::FeatureStdExtZca) ? 2 : 4;
+ if (F.getAlignment() <= MinNopLen)
+ return false;
+
+ Size = F.getAlignment().value() - MinNopLen;
+ auto *Expr = MCConstantExpr::create(Size, getContext());
+ MCFixup Fixup =
+ MCFixup::create(0, Expr, FirstLiteralRelocationKind + ELF::R_RISCV_ALIGN);
+ F.setVarFixups({Fixup});
+ F.getParent()->setLinkerRelaxable();
+ return true;
+}
+
bool RISCVAsmBackend::relaxDwarfLineAddr(MCFragment &F,
bool &WasRelaxed) const {
MCContext &C = getContext();
@@ -637,7 +659,7 @@ bool RISCVAsmBackend::isPCRelFixupResolved(const MCSymbol *SymA,
// Otherwise, check if the offset between the symbol and fragment is fully
// resolved, unaffected by linker-relaxable fragments (e.g. instructions or
- // offset-affected MCAlignFragment). Complements the generic
+ // offset-affected FT_Align fragments). Complements the generic
// isSymbolRefDifferenceFullyResolvedImpl.
if (!PCRelTemp)
PCRelTemp = getContext().createTempSymbol();
@@ -887,55 +909,6 @@ void RISCVAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
}
}
-// Linker relaxation may change code size. We have to insert Nops
-// for .align directive when linker relaxation enabled. So then Linker
-// could satisfy alignment by removing Nops.
-// The function return the total Nops Size we need to insert.
-bool RISCVAsmBackend::shouldInsertExtraNopBytesForCodeAlign(
- const MCAlignFragment &AF, unsigned &Size) {
- // Calculate Nops Size only when linker relaxation enabled.
- const MCSubtargetInfo *STI = AF.getSubtargetInfo();
- if (!STI->hasFeature(RISCV::FeatureRelax))
- return false;
-
- unsigned MinNopLen = STI->hasFeature(RISCV::FeatureStdExtZca) ? 2 : 4;
-
- if (AF.getAlignment() <= MinNopLen) {
- return false;
- } else {
- Size = AF.getAlignment().value() - MinNopLen;
- return true;
- }
-}
-
-// We need to insert R_RISCV_ALIGN relocation type to indicate the
-// position of Nops and the total bytes of the Nops have been inserted
-// when linker relaxation enabled.
-// The function insert fixup_riscv_align fixup which eventually will
-// transfer to R_RISCV_ALIGN relocation type.
-bool RISCVAsmBackend::shouldInsertFixupForCodeAlign(MCAssembler &Asm,
- MCAlignFragment &AF) {
- // Insert the fixup only when linker relaxation enabled.
- const MCSubtargetInfo *STI = AF.getSubtargetInfo();
- if (!STI->hasFeature(RISCV::FeatureRelax))
- return false;
-
- // Calculate total Nops we need to insert. If there are none to insert
- // then simply return.
- unsigned Count;
- if (!shouldInsertExtraNopBytesForCodeAlign(AF, Count) || (Count == 0))
- return false;
-
- MCContext &Ctx = getContext();
- const MCExpr *Dummy = MCConstantExpr::create(0, Ctx);
- MCFixup Fixup = MCFixup::create(0, Dummy, ELF::R_RISCV_ALIGN);
-
- uint64_t FixedValue = 0;
- MCValue NopBytes = MCValue::get(Count);
- Asm.getWriter().recordRelocation(AF, Fixup, NopBytes, FixedValue);
- return true;
-}
-
std::unique_ptr<MCObjectTargetWriter>
RISCVAsmBackend::createObjectTargetWriter() const {
return createRISCVELFObjectWriter(OSABI, Is64Bit);
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
index 8c10fbe..d97d632 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
@@ -38,14 +38,6 @@ public:
const MCTargetOptions &Options);
~RISCVAsmBackend() override = default;
- // Return Size with extra Nop Bytes for alignment directive in code section.
- bool shouldInsertExtraNopBytesForCodeAlign(const MCAlignFragment &AF,
- unsigned &Size) override;
-
- // Insert target specific fixup type for alignment directive in code section.
- bool shouldInsertFixupForCodeAlign(MCAssembler &Asm,
- MCAlignFragment &AF) override;
-
std::optional<bool> evaluateFixup(const MCFragment &, MCFixup &, MCValue &,
uint64_t &) override;
bool addReloc(const MCFragment &, const MCFixup &, const MCValue &,
@@ -73,6 +65,7 @@ public:
void relaxInstruction(MCInst &Inst,
const MCSubtargetInfo &STI) const override;
+ bool relaxAlign(MCFragment &F, unsigned &Size) override;
bool relaxDwarfLineAddr(MCFragment &F, bool &WasRelaxed) const override;
bool relaxDwarfCFA(MCFragment &F, bool &WasRelaxed) const override;
std::pair<bool, bool> relaxLEB128(MCFragment &LF,
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
index aeda5ac..5abb546 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
@@ -52,15 +52,6 @@ namespace RISCV {
#include "RISCVGenSearchableTables.inc"
} // namespace RISCV
-// Report an error but don't ask the user to report a bug.
-// TODO: Remove these wrappers.
-[[noreturn]] static void reportError(const char *Reason) {
- reportFatalUsageError(Reason);
-}
-[[noreturn]] static void reportError(Error Err) {
- reportFatalUsageError(std::move(Err));
-}
-
namespace RISCVABI {
ABI computeTargetABI(const Triple &TT, const FeatureBitset &FeatureBits,
StringRef ABIName) {
@@ -97,7 +88,7 @@ ABI computeTargetABI(const Triple &TT, const FeatureBitset &FeatureBits,
if ((TargetABI == RISCVABI::ABI::ABI_ILP32E ||
(TargetABI == ABI_Unknown && IsRVE && !IsRV64)) &&
FeatureBits[RISCV::FeatureStdExtD])
- reportError("ILP32E cannot be used with the D ISA extension");
+ reportFatalUsageError("ILP32E cannot be used with the D ISA extension");
if (TargetABI != ABI_Unknown)
return TargetABI;
@@ -105,7 +96,7 @@ ABI computeTargetABI(const Triple &TT, const FeatureBitset &FeatureBits,
// If no explicit ABI is given, try to compute the default ABI.
auto ISAInfo = RISCVFeatures::parseFeatureBits(IsRV64, FeatureBits);
if (!ISAInfo)
- reportError(ISAInfo.takeError());
+ reportFatalUsageError(ISAInfo.takeError());
return getTargetABI((*ISAInfo)->computeDefaultABI());
}
@@ -137,12 +128,12 @@ namespace RISCVFeatures {
void validate(const Triple &TT, const FeatureBitset &FeatureBits) {
if (TT.isArch64Bit() && !FeatureBits[RISCV::Feature64Bit])
- reportError("RV64 target requires an RV64 CPU");
+ reportFatalUsageError("RV64 target requires an RV64 CPU");
if (!TT.isArch64Bit() && !FeatureBits[RISCV::Feature32Bit])
- reportError("RV32 target requires an RV32 CPU");
+ reportFatalUsageError("RV32 target requires an RV32 CPU");
if (FeatureBits[RISCV::Feature32Bit] &&
FeatureBits[RISCV::Feature64Bit])
- reportError("RV32 and RV64 can't be combined");
+ reportFatalUsageError("RV32 and RV64 can't be combined");
}
llvm::Expected<std::unique_ptr<RISCVISAInfo>>
diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.td b/llvm/lib/Target/RISCV/RISCVCallingConv.td
index cbf039e..4c303a9 100644
--- a/llvm/lib/Target/RISCV/RISCVCallingConv.td
+++ b/llvm/lib/Target/RISCV/RISCVCallingConv.td
@@ -56,19 +56,21 @@ def CSR_XLEN_F32_Interrupt: CalleeSavedRegs<(add CSR_Interrupt,
def CSR_XLEN_F64_Interrupt: CalleeSavedRegs<(add CSR_Interrupt,
(sequence "F%u_D", 0, 31))>;
+defvar VREGS = (add (sequence "V%u", 0, 31),
+ (sequence "V%uM2", 0, 31, 2),
+ (sequence "V%uM4", 0, 31, 4),
+ (sequence "V%uM8", 0, 31, 8));
+
// Same as CSR_Interrupt, but including all vector registers.
-def CSR_XLEN_V_Interrupt: CalleeSavedRegs<(add CSR_Interrupt,
- (sequence "V%u", 0, 31))>;
+def CSR_XLEN_V_Interrupt: CalleeSavedRegs<(add CSR_Interrupt, VREGS)>;
// Same as CSR_Interrupt, but including all 32-bit FP registers and all vector
// registers.
-def CSR_XLEN_F32_V_Interrupt: CalleeSavedRegs<(add CSR_XLEN_F32_Interrupt,
- (sequence "V%u", 0, 31))>;
+def CSR_XLEN_F32_V_Interrupt: CalleeSavedRegs<(add CSR_XLEN_F32_Interrupt, VREGS)>;
// Same as CSR_Interrupt, but including all 64-bit FP registers and all vector
// registers.
-def CSR_XLEN_F64_V_Interrupt: CalleeSavedRegs<(add CSR_XLEN_F64_Interrupt,
- (sequence "V%u", 0, 31))>;
+def CSR_XLEN_F64_V_Interrupt: CalleeSavedRegs<(add CSR_XLEN_F64_Interrupt, VREGS)>;
// Same as CSR_Interrupt, but excluding X16-X31.
def CSR_Interrupt_RVE : CalleeSavedRegs<(sub CSR_Interrupt,
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 23b4554..b1ab76a 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -1544,10 +1544,53 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
return Offset;
}
+static MCRegister getRVVBaseRegister(const RISCVRegisterInfo &TRI,
+ const Register &Reg) {
+ MCRegister BaseReg = TRI.getSubReg(Reg, RISCV::sub_vrm1_0);
+ // If it's not a grouped vector register, it doesn't have subregister, so
+ // the base register is just itself.
+ if (BaseReg == RISCV::NoRegister)
+ BaseReg = Reg;
+ return BaseReg;
+}
+
void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF,
BitVector &SavedRegs,
RegScavenger *RS) const {
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
+
+ // In TargetFrameLowering::determineCalleeSaves, any vector register is marked
+ // as saved if any of its subregister is clobbered, this is not correct in
+ // vector registers. We only want the vector register to be marked as saved
+ // if all of its subregisters are clobbered.
+ // For example:
+ // Original behavior: If v24 is marked, v24m2, v24m4, v24m8 are also marked.
+ // Correct behavior: v24m2 is marked only if v24 and v25 are marked.
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
+ const RISCVRegisterInfo &TRI = *STI.getRegisterInfo();
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ unsigned CSReg = CSRegs[i];
+ // Only vector registers need special care.
+ if (!RISCV::VRRegClass.contains(getRVVBaseRegister(TRI, CSReg)))
+ continue;
+
+ SavedRegs.reset(CSReg);
+
+ auto SubRegs = TRI.subregs(CSReg);
+ // Set the register and all its subregisters.
+ if (!MRI.def_empty(CSReg) || MRI.getUsedPhysRegsMask().test(CSReg)) {
+ SavedRegs.set(CSReg);
+ llvm::for_each(SubRegs, [&](unsigned Reg) { return SavedRegs.set(Reg); });
+ }
+
+ // Combine to super register if all of its subregisters are marked.
+ if (!SubRegs.empty() && llvm::all_of(SubRegs, [&](unsigned Reg) {
+ return SavedRegs.test(Reg);
+ }))
+ SavedRegs.set(CSReg);
+ }
+
// Unconditionally spill RA and FP only if the function uses a frame
// pointer.
if (hasFP(MF)) {
@@ -2137,16 +2180,6 @@ static unsigned getCalleeSavedRVVNumRegs(const Register &BaseReg) {
: 8;
}
-static MCRegister getRVVBaseRegister(const RISCVRegisterInfo &TRI,
- const Register &Reg) {
- MCRegister BaseReg = TRI.getSubReg(Reg, RISCV::sub_vrm1_0);
- // If it's not a grouped vector register, it doesn't have subregister, so
- // the base register is just itself.
- if (BaseReg == RISCV::NoRegister)
- BaseReg = Reg;
- return BaseReg;
-}
-
void RISCVFrameLowering::emitCalleeSavedRVVPrologCFI(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, bool HasFP) const {
MachineFunction *MF = MBB.getParent();
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index cfec46d2..34910b7 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -3032,6 +3032,63 @@ bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base,
return true;
}
+/// Return true if this a load/store that we have a RegRegScale instruction for.
+static bool isRegRegScaleLoadOrStore(SDNode *User, SDValue Add,
+ const RISCVSubtarget &Subtarget) {
+ if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE)
+ return false;
+ EVT VT = cast<MemSDNode>(User)->getMemoryVT();
+ if (!(VT.isScalarInteger() &&
+ (Subtarget.hasVendorXTHeadMemIdx() || Subtarget.hasVendorXqcisls())) &&
+ !((VT == MVT::f32 || VT == MVT::f64) &&
+ Subtarget.hasVendorXTHeadFMemIdx()))
+ return false;
+ // Don't allow stores of the value. It must be used as the address.
+ if (User->getOpcode() == ISD::STORE &&
+ cast<StoreSDNode>(User)->getValue() == Add)
+ return false;
+
+ return true;
+}
+
+/// Is it profitable to fold this Add into RegRegScale load/store. If \p
+/// Shift is non-null, then we have matched a shl+add. We allow reassociating
+/// (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2)) if there is a
+/// single addi and we don't have a SHXADD instruction we could use.
+/// FIXME: May still need to check how many and what kind of users the SHL has.
+static bool isWorthFoldingIntoRegRegScale(const RISCVSubtarget &Subtarget,
+ SDValue Add,
+ SDValue Shift = SDValue()) {
+ bool FoundADDI = false;
+ for (auto *User : Add->users()) {
+ if (isRegRegScaleLoadOrStore(User, Add, Subtarget))
+ continue;
+
+ // Allow a single ADDI that is used by loads/stores if we matched a shift.
+ if (!Shift || FoundADDI || User->getOpcode() != ISD::ADD ||
+ !isa<ConstantSDNode>(User->getOperand(1)) ||
+ !isInt<12>(cast<ConstantSDNode>(User->getOperand(1))->getSExtValue()))
+ return false;
+
+ FoundADDI = true;
+
+ // If we have a SHXADD instruction, prefer that over reassociating an ADDI.
+ assert(Shift.getOpcode() == ISD::SHL);
+ unsigned ShiftAmt = Shift.getConstantOperandVal(1);
+ if ((ShiftAmt <= 3 &&
+ (Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa())) ||
+ (ShiftAmt >= 4 && ShiftAmt <= 7 && Subtarget.hasVendorXqciac()))
+ return false;
+
+ // All users of the ADDI should be load/store.
+ for (auto *ADDIUser : User->users())
+ if (!isRegRegScaleLoadOrStore(ADDIUser, SDValue(User, 0), Subtarget))
+ return false;
+ }
+
+ return true;
+}
+
bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
unsigned MaxShiftAmount,
SDValue &Base, SDValue &Index,
@@ -3062,7 +3119,8 @@ bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
if (LHS.getOpcode() == ISD::ADD &&
!isa<ConstantSDNode>(LHS.getOperand(1)) &&
isInt<12>(C1->getSExtValue())) {
- if (SelectShl(LHS.getOperand(1), Index, Scale)) {
+ if (SelectShl(LHS.getOperand(1), Index, Scale) &&
+ isWorthFoldingIntoRegRegScale(*Subtarget, LHS, LHS.getOperand(1))) {
SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
SDLoc(Addr), VT);
Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
@@ -3072,7 +3130,8 @@ bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
}
// Add is commutative so we need to check both operands.
- if (SelectShl(LHS.getOperand(0), Index, Scale)) {
+ if (SelectShl(LHS.getOperand(0), Index, Scale) &&
+ isWorthFoldingIntoRegRegScale(*Subtarget, LHS, LHS.getOperand(0))) {
SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
SDLoc(Addr), VT);
Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
@@ -3090,22 +3149,48 @@ bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
// Try to match a shift on the RHS.
if (SelectShl(RHS, Index, Scale)) {
+ if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr, RHS))
+ return false;
Base = LHS;
return true;
}
// Try to match a shift on the LHS.
if (SelectShl(LHS, Index, Scale)) {
+ if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr, LHS))
+ return false;
Base = RHS;
return true;
}
+ if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr))
+ return false;
+
Base = LHS;
Index = RHS;
Scale = CurDAG->getTargetConstant(0, SDLoc(Addr), VT);
return true;
}
+bool RISCVDAGToDAGISel::SelectAddrRegZextRegScale(SDValue Addr,
+ unsigned MaxShiftAmount,
+ unsigned Bits, SDValue &Base,
+ SDValue &Index,
+ SDValue &Scale) {
+ if (!SelectAddrRegRegScale(Addr, MaxShiftAmount, Base, Index, Scale))
+ return false;
+
+ if (Index.getOpcode() == ISD::AND) {
+ auto *C = dyn_cast<ConstantSDNode>(Index.getOperand(1));
+ if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
+ Index = Index.getOperand(0);
+ return true;
+ }
+ }
+
+ return false;
+}
+
bool RISCVDAGToDAGISel::SelectAddrRegReg(SDValue Addr, SDValue &Base,
SDValue &Offset) {
if (Addr.getOpcode() != ISD::ADD)
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index 72e2f96..ee3a86e 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -59,19 +59,14 @@ public:
return SelectAddrRegRegScale(Addr, MaxShift, Base, Index, Scale);
}
+ bool SelectAddrRegZextRegScale(SDValue Addr, unsigned MaxShiftAmount,
+ unsigned Bits, SDValue &Base, SDValue &Index,
+ SDValue &Scale);
+
template <unsigned MaxShift, unsigned Bits>
bool SelectAddrRegZextRegScale(SDValue Addr, SDValue &Base, SDValue &Index,
SDValue &Scale) {
- if (SelectAddrRegRegScale(Addr, MaxShift, Base, Index, Scale)) {
- if (Index.getOpcode() == ISD::AND) {
- auto *C = dyn_cast<ConstantSDNode>(Index.getOperand(1));
- if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
- Index = Index.getOperand(0);
- return true;
- }
- }
- }
- return false;
+ return SelectAddrRegZextRegScale(Addr, MaxShift, Bits, Base, Index, Scale);
}
bool SelectAddrRegReg(SDValue Addr, SDValue &Base, SDValue &Offset);
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 4845a9c..3918dd2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -2319,6 +2319,10 @@ bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
if (getLegalZfaFPImm(Imm, VT) >= 0)
return true;
+ // Some constants can be produced by fli+fneg.
+ if (Imm.isNegative() && getLegalZfaFPImm(-Imm, VT) >= 0)
+ return true;
+
// Cannot create a 64 bit floating-point immediate value for rv32.
if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
// td can handle +0.0 or -0.0 already.
@@ -7936,7 +7940,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
BasePtr, MachinePointerInfo(Load->getAddressSpace()), Align(8));
OutChains.push_back(LoadVal.getValue(1));
Ret = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Ret, LoadVal,
- DAG.getVectorIdxConstant(i, DL));
+ DAG.getTargetConstant(i, DL, MVT::i32));
BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
}
return DAG.getMergeValues(
@@ -8015,9 +8019,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
// Extract subregisters in a vector tuple and store them individually.
for (unsigned i = 0; i < NF; ++i) {
- auto Extract = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL,
- MVT::getScalableVectorVT(MVT::i8, NumElts),
- StoredVal, DAG.getVectorIdxConstant(i, DL));
+ auto Extract =
+ DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL,
+ MVT::getScalableVectorVT(MVT::i8, NumElts), StoredVal,
+ DAG.getTargetConstant(i, DL, MVT::i32));
Ret = DAG.getStore(Chain, DL, Extract, BasePtr,
MachinePointerInfo(Store->getAddressSpace()),
Store->getBaseAlign(),
@@ -10934,9 +10939,9 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Load->getMemoryVT(), Load->getMemOperand());
SmallVector<SDValue, 9> Results;
for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) {
- SDValue SubVec =
- DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, ContainerVT,
- Result.getValue(0), DAG.getVectorIdxConstant(RetIdx, DL));
+ SDValue SubVec = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, ContainerVT,
+ Result.getValue(0),
+ DAG.getTargetConstant(RetIdx, DL, MVT::i32));
Results.push_back(convertFromScalableVector(VT, SubVec, DAG, Subtarget));
}
Results.push_back(Result.getValue(1));
@@ -11023,7 +11028,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
convertToScalableVector(
ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget),
- DAG.getVectorIdxConstant(i, DL));
+ DAG.getTargetConstant(i, DL, MVT::i32));
SDValue Ops[] = {
FixedIntrinsic->getChain(),
@@ -12027,7 +12032,7 @@ SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
for (unsigned i = 0U; i < Factor; ++i)
Res[i] = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, VecVT, Load,
- DAG.getVectorIdxConstant(i, DL));
+ DAG.getTargetConstant(i, DL, MVT::i32));
return DAG.getMergeValues(Res, DL);
}
@@ -12124,8 +12129,9 @@ SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
SDValue StoredVal = DAG.getUNDEF(VecTupTy);
for (unsigned i = 0; i < Factor; i++)
- StoredVal = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
- Op.getOperand(i), DAG.getConstant(i, DL, XLenVT));
+ StoredVal =
+ DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
+ Op.getOperand(i), DAG.getTargetConstant(i, DL, MVT::i32));
SDValue Ops[] = {DAG.getEntryNode(),
DAG.getTargetConstant(IntrIds[Factor - 2], DL, XLenVT),
@@ -20690,7 +20696,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
SDValue Result = DAG.getUNDEF(VT);
for (unsigned i = 0; i < NF; ++i)
Result = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Result, Splat,
- DAG.getVectorIdxConstant(i, DL));
+ DAG.getTargetConstant(i, DL, MVT::i32));
return Result;
}
// If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
@@ -24014,7 +24020,7 @@ bool RISCVTargetLowering::splitValueIntoRegisterParts(
#endif
Val = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, PartVT, DAG.getUNDEF(PartVT),
- Val, DAG.getVectorIdxConstant(0, DL));
+ Val, DAG.getTargetConstant(0, DL, MVT::i32));
Parts[0] = Val;
return true;
}
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index e0a8c07..f0447e0 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -434,7 +434,8 @@ public:
ArrayRef<unsigned> Indices,
unsigned Factor) const override;
- bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
+ bool lowerInterleavedStore(Instruction *Store, Value *Mask,
+ ShuffleVectorInst *SVI,
unsigned Factor) const override;
bool lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask,
@@ -444,9 +445,6 @@ public:
Instruction *Store, Value *Mask,
ArrayRef<Value *> InterleaveValues) const override;
- bool lowerInterleavedVPStore(VPIntrinsic *Store, Value *Mask,
- ArrayRef<Value *> InterleaveOps) const override;
-
bool supportKCFIBundles() const override { return true; }
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
index e23001a..d9c6101 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
@@ -174,6 +174,7 @@ class EltDeps<bit vl, bit mask> {
def EltDepsNone : EltDeps<vl=0, mask=0>;
def EltDepsVL : EltDeps<vl=1, mask=0>;
+def EltDepsMask : EltDeps<vl=0, mask=1>;
def EltDepsVLMask : EltDeps<vl=1, mask=1>;
class EEW <bits<2> val> {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index aef410f..7f1077a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -44,45 +44,62 @@ def simm10_unsigned : RISCVOp {
//===----------------------------------------------------------------------===//
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class RVPUnaryImm10<bits<7> funct7, string opcodestr,
- DAGOperand TyImm10 = simm10>
- : RVInstIBase<0b010, OPC_OP_IMM_32, (outs GPR:$rd), (ins TyImm10:$imm10),
- opcodestr, "$rd, $imm10"> {
+class PLI_i<bits<7> funct7, string opcodestr>
+ : RVInst<(outs GPR:$rd), (ins simm10:$imm10), opcodestr, "$rd, $imm10", [],
+ InstFormatOther> {
bits<10> imm10;
+ bits<5> rd;
let Inst{31-25} = funct7;
let Inst{24-16} = imm10{8-0};
let Inst{15} = imm10{9};
+ let Inst{14-12} = 0b010;
+ let Inst{11-7} = rd;
+ let Inst{6-0} = OPC_OP_IMM_32.Value;
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class RVPUnaryImm8<bits<8> funct8, string opcodestr>
- : RVInstIBase<0b010, OPC_OP_IMM_32, (outs GPR:$rd), (ins uimm8:$uimm8),
- opcodestr, "$rd, $uimm8"> {
+class PLUI_i<bits<7> funct7, string opcodestr>
+ : RVInst<(outs GPR:$rd), (ins simm10_unsigned:$imm10), opcodestr,
+ "$rd, $imm10", [], InstFormatOther> {
+ bits<10> imm10;
+ bits<5> rd;
+
+ let Inst{31-25} = funct7;
+ let Inst{24} = imm10{0};
+ let Inst{23-15} = imm10{9-1};
+ let Inst{14-12} = 0b010;
+ let Inst{11-7} = rd;
+ let Inst{6-0} = OPC_OP_IMM_32.Value;
+}
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class PLI_B_i<bits<8> funct8, string opcodestr>
+ : RVInst<(outs GPR:$rd), (ins uimm8:$uimm8), opcodestr, "$rd, $uimm8", [],
+ InstFormatOther> {
bits<8> uimm8;
+ bits<5> rd;
let Inst{31-24} = funct8;
let Inst{23-16} = uimm8;
let Inst{15} = 0b0;
+ let Inst{14-12} = 0b010;
+ let Inst{11-7} = rd;
+ let Inst{6-0} = OPC_OP_IMM_32.Value;
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class RVPUnary<bits<3> f, string opcodestr, dag operands, string argstr>
: RVInstIBase<0b010, OPC_OP_IMM_32, (outs GPR:$rd), operands, opcodestr, argstr> {
- bits<5> imm;
- bits<5> rs1;
-
let Inst{31} = 0b1;
let Inst{30-28} = f;
let Inst{27} = 0b0;
- let Inst{19-15} = rs1;
}
class RVPUnaryImm5<bits<3> f, string opcodestr>
: RVPUnary<f, opcodestr, (ins GPR:$rs1, uimm5:$uimm5), "$rd, $rs1, $uimm5"> {
bits<5> uimm5;
- let imm = uimm5;
let Inst{26-25} = 0b01;
let Inst{24-20} = uimm5;
}
@@ -145,11 +162,11 @@ def PSSLAI_W : RVPUnaryImm5<0b101, "psslai.w">;
} // Predicates = [HasStdExtP, IsRV64]
let Predicates = [HasStdExtP] in
-def PLI_H : RVPUnaryImm10<0b1011000, "pli.h">;
+def PLI_H : PLI_i<0b1011000, "pli.h">;
let Predicates = [HasStdExtP, IsRV64] in
-def PLI_W : RVPUnaryImm10<0b1011001, "pli.w">;
+def PLI_W : PLI_i<0b1011001, "pli.w">;
let Predicates = [HasStdExtP] in
-def PLI_B : RVPUnaryImm8<0b10110100, "pli.b">;
+def PLI_B : PLI_B_i<0b10110100, "pli.b">;
let Predicates = [HasStdExtP] in {
def PSEXT_H_B : RVPUnaryWUF<0b00, 0b00100, "psext.h.b">;
@@ -157,11 +174,11 @@ def PSABS_H : RVPUnaryWUF<0b00, 0b00111, "psabs.h">;
def PSABS_B : RVPUnaryWUF<0b10, 0b00111, "psabs.b">;
} // Predicates = [HasStdExtP]
let Predicates = [HasStdExtP, IsRV64] in {
-def PSEXT_W_B : RVPUnaryWUF<0b01, 0b00100, "psext.w.b">;
-def PSEXT_W_H : RVPUnaryWUF<0b01, 0b00101, "psext.w.h">;
+def PSEXT_W_B : RVPUnaryWUF<0b01, 0b00100, "psext.w.b">;
+def PSEXT_W_H : RVPUnaryWUF<0b01, 0b00101, "psext.w.h">;
} // Predicates = [HasStdExtP, IsRV64]
let Predicates = [HasStdExtP] in
-def PLUI_H : RVPUnaryImm10<0b1111000, "plui.h", simm10_unsigned>;
+def PLUI_H : PLUI_i<0b1111000, "plui.h">;
let Predicates = [HasStdExtP, IsRV64] in
-def PLUI_W : RVPUnaryImm10<0b1111001, "plui.w", simm10_unsigned>;
+def PLUI_W : PLUI_i<0b1111001, "plui.w">;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
index 5d13a87..33c7138 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
@@ -1642,7 +1642,7 @@ def VFIRST_M : RVInstV<0b010000, 0b10001, OPMVV, (outs GPR:$vd),
def : MnemonicAlias<"vpopc.m", "vcpop.m">;
-let Constraints = "@earlyclobber $vd", RVVConstraint = Iota, ElementsDependOn = EltDepsVLMask in {
+let Constraints = "@earlyclobber $vd", RVVConstraint = Iota, ElementsDependOn = EltDepsMask in {
let DestEEW = EEW1 in {
// vmsbf.m set-before-first mask bit
@@ -1655,7 +1655,7 @@ defm VMSOF_M : VMSFS_MV_V<"vmsof.m", 0b010100, 0b00010>;
// Vector Iota Instruction
defm VIOTA_M : VIOTA_MV_V<"viota.m", 0b010100, 0b10000>;
-} // Constraints = "@earlyclobber $vd", RVVConstraint = Iota, ElementsDependOn = EltDepsVLMask
+} // Constraints = "@earlyclobber $vd", RVVConstraint = Iota, ElementsDependOn = EltDepsMask
// Vector Element Index Instruction
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
index c7cb6e2..f391300 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
@@ -1377,9 +1377,9 @@ let Predicates = [HasVendorXqciac, IsRV32] in {
def : Pat<(i32 (add GPRNoX0:$rd, (mul GPRNoX0:$rs1, simm12:$imm12))),
(QC_MULIADD GPRNoX0:$rd, GPRNoX0:$rs1, simm12:$imm12)>;
def : Pat<(i32 (add_like_non_imm12 (shl GPRNoX0:$rs1, uimm5gt3:$imm), GPRNoX0:$rs2)),
- (QC_SHLADD GPRNoX0:$rs2, GPRNoX0:$rs1, uimm5gt3:$imm)>;
+ (QC_SHLADD GPRNoX0:$rs1, GPRNoX0:$rs2, uimm5gt3:$imm)>;
def : Pat<(i32 (riscv_shl_add GPRNoX0:$rs1, uimm5gt3:$imm, GPRNoX0:$rs2)),
- (QC_SHLADD GPRNoX0:$rs2, GPRNoX0:$rs1, uimm5gt3:$imm)>;
+ (QC_SHLADD GPRNoX0:$rs1, GPRNoX0:$rs2, uimm5gt3:$imm)>;
} // Predicates = [HasVendorXqciac, IsRV32]
/// Simple arithmetic operations
diff --git a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
index dd68a55..0565fcd 100644
--- a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
@@ -131,25 +131,56 @@ static bool getMemOperands(unsigned Factor, VectorType *VTy, Type *XLenTy,
: Constant::getAllOnesValue(XLenTy);
return true;
}
- auto *VPLdSt = cast<VPIntrinsic>(I);
- assert((VPLdSt->getIntrinsicID() == Intrinsic::vp_load ||
- VPLdSt->getIntrinsicID() == Intrinsic::vp_store) &&
- "Unexpected intrinsic");
- Ptr = VPLdSt->getMemoryPointerParam();
- Alignment = VPLdSt->getPointerAlignment().value_or(
- DL.getABITypeAlign(VTy->getElementType()));
-
- assert(Mask && "vp.load and vp.store needs a mask!");
-
- Value *WideEVL = VPLdSt->getVectorLengthParam();
- // Conservatively check if EVL is a multiple of factor, otherwise some
- // (trailing) elements might be lost after the transformation.
- if (!isMultipleOfN(WideEVL, I->getDataLayout(), Factor))
- return false;
- auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor);
- VL = Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy);
- return true;
+ auto *II = cast<IntrinsicInst>(I);
+ switch (II->getIntrinsicID()) {
+ default:
+ llvm_unreachable("Unsupported intrinsic type");
+ case Intrinsic::vp_load:
+ case Intrinsic::vp_store: {
+ auto *VPLdSt = cast<VPIntrinsic>(I);
+ Ptr = VPLdSt->getMemoryPointerParam();
+ Alignment = VPLdSt->getPointerAlignment().value_or(
+ DL.getABITypeAlign(VTy->getElementType()));
+
+ assert(Mask && "vp.load and vp.store needs a mask!");
+
+ Value *WideEVL = VPLdSt->getVectorLengthParam();
+ // Conservatively check if EVL is a multiple of factor, otherwise some
+ // (trailing) elements might be lost after the transformation.
+ if (!isMultipleOfN(WideEVL, I->getDataLayout(), Factor))
+ return false;
+
+ auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor);
+ VL = Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy);
+ return true;
+ }
+ case Intrinsic::masked_load: {
+ Ptr = II->getOperand(0);
+ Alignment = cast<ConstantInt>(II->getArgOperand(1))->getAlignValue();
+
+ if (!isa<UndefValue>(II->getOperand(3)))
+ return false;
+
+ assert(Mask && "masked.load needs a mask!");
+
+ VL = isa<FixedVectorType>(VTy)
+ ? Builder.CreateElementCount(XLenTy, VTy->getElementCount())
+ : Constant::getAllOnesValue(XLenTy);
+ return true;
+ }
+ case Intrinsic::masked_store: {
+ Ptr = II->getOperand(1);
+ Alignment = cast<ConstantInt>(II->getArgOperand(2))->getAlignValue();
+
+ assert(Mask && "masked.store needs a mask!");
+
+ VL = isa<FixedVectorType>(VTy)
+ ? Builder.CreateElementCount(XLenTy, VTy->getElementCount())
+ : Constant::getAllOnesValue(XLenTy);
+ return true;
+ }
+ }
}
/// Lower an interleaved load into a vlsegN intrinsic.
@@ -173,7 +204,7 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
const DataLayout &DL = Load->getDataLayout();
auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
- auto *XLenTy = Type::getIntNTy(Load->getContext(), Subtarget.getXLen());
+ auto *XLenTy = Builder.getIntNTy(Subtarget.getXLen());
Value *Ptr, *VL;
Align Alignment;
@@ -201,6 +232,7 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_load,
{VTy, BasePtr->getType(), Stride->getType()},
{BasePtr, Stride, Mask, VL});
+ Alignment = commonAlignment(Alignment, Indices[0] * ScalarSizeInBytes);
CI->addParamAttr(0,
Attribute::getWithAlignment(CI->getContext(), Alignment));
Shuffles[0]->replaceAllUsesWith(CI);
@@ -234,22 +266,28 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
///
/// Note that the new shufflevectors will be removed and we'll only generate one
/// vsseg3 instruction in CodeGen.
-bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
+bool RISCVTargetLowering::lowerInterleavedStore(Instruction *Store,
+ Value *LaneMask,
ShuffleVectorInst *SVI,
unsigned Factor) const {
- IRBuilder<> Builder(SI);
- const DataLayout &DL = SI->getDataLayout();
+ IRBuilder<> Builder(Store);
+ const DataLayout &DL = Store->getDataLayout();
auto Mask = SVI->getShuffleMask();
auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
// Given SVI : <n*factor x ty>, then VTy : <n x ty>
auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
ShuffleVTy->getNumElements() / Factor);
- if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
- SI->getPointerAddressSpace(), DL))
+ auto *XLenTy = Builder.getIntNTy(Subtarget.getXLen());
+
+ Value *Ptr, *VL;
+ Align Alignment;
+ if (!getMemOperands(Factor, VTy, XLenTy, Store, Ptr, LaneMask, VL, Alignment))
return false;
- auto *PtrTy = SI->getPointerOperandType();
- auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
+ Type *PtrTy = Ptr->getType();
+ unsigned AS = PtrTy->getPointerAddressSpace();
+ if (!isLegalInterleavedAccessType(VTy, Factor, Alignment, AS, DL))
+ return false;
unsigned Index;
// If the segment store only has one active lane (i.e. the interleave is
@@ -260,26 +298,27 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
unsigned ScalarSizeInBytes =
DL.getTypeStoreSize(ShuffleVTy->getElementType());
Value *Data = SVI->getOperand(0);
- auto *DataVTy = cast<FixedVectorType>(Data->getType());
+ Data = Builder.CreateExtractVector(VTy, Data, uint64_t(0));
Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
Value *Offset = ConstantInt::get(XLenTy, Index * ScalarSizeInBytes);
- Value *BasePtr = Builder.CreatePtrAdd(SI->getPointerOperand(), Offset);
- Value *Mask = Builder.getAllOnesMask(DataVTy->getElementCount());
- Value *VL = Builder.CreateElementCount(Builder.getInt32Ty(),
- VTy->getElementCount());
-
- CallInst *CI = Builder.CreateIntrinsic(
- Intrinsic::experimental_vp_strided_store,
- {Data->getType(), BasePtr->getType(), Stride->getType()},
- {Data, BasePtr, Stride, Mask, VL});
- CI->addParamAttr(
- 1, Attribute::getWithAlignment(CI->getContext(), SI->getAlign()));
+ Value *BasePtr = Builder.CreatePtrAdd(Ptr, Offset);
+ // Note: Same VL as above, but i32 not xlen due to signature of
+ // vp.strided.store
+ VL = Builder.CreateElementCount(Builder.getInt32Ty(),
+ VTy->getElementCount());
+ CallInst *CI =
+ Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_store,
+ {VTy, BasePtr->getType(), Stride->getType()},
+ {Data, BasePtr, Stride, LaneMask, VL});
+ Alignment = commonAlignment(Alignment, Index * ScalarSizeInBytes);
+ CI->addParamAttr(1,
+ Attribute::getWithAlignment(CI->getContext(), Alignment));
return true;
}
Function *VssegNFunc = Intrinsic::getOrInsertDeclaration(
- SI->getModule(), FixedVssegIntrIds[Factor - 2], {VTy, PtrTy, XLenTy});
+ Store->getModule(), FixedVssegIntrIds[Factor - 2], {VTy, PtrTy, XLenTy});
SmallVector<Value *, 10> Ops;
SmallVector<int, 16> NewShuffleMask;
@@ -295,13 +334,7 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
NewShuffleMask.clear();
}
- // This VL should be OK (should be executable in one vsseg instruction,
- // potentially under larger LMULs) because we checked that the fixed vector
- // type fits in isLegalInterleavedAccessType
- Value *VL = Builder.CreateElementCount(XLenTy, VTy->getElementCount());
- Value *StoreMask = Builder.getAllOnesMask(VTy->getElementCount());
- Ops.append({SI->getPointerOperand(), StoreMask, VL});
-
+ Ops.append({Ptr, LaneMask, VL});
Builder.CreateCall(VssegNFunc, Ops);
return true;
@@ -318,7 +351,7 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(
VectorType *ResVTy = getDeinterleavedVectorType(DI);
const DataLayout &DL = Load->getDataLayout();
- auto *XLenTy = Type::getIntNTy(Load->getContext(), Subtarget.getXLen());
+ auto *XLenTy = Builder.getIntNTy(Subtarget.getXLen());
Value *Ptr, *VL;
Align Alignment;
@@ -339,8 +372,7 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(
unsigned NumElts = ResVTy->getElementCount().getKnownMinValue();
Type *VecTupTy = TargetExtType::get(
Load->getContext(), "riscv.vector.tuple",
- ScalableVectorType::get(Type::getInt8Ty(Load->getContext()),
- NumElts * SEW / 8),
+ ScalableVectorType::get(Builder.getInt8Ty(), NumElts * SEW / 8),
Factor);
Function *VlsegNFunc = Intrinsic::getOrInsertDeclaration(
Load->getModule(), ScalableVlsegIntrIds[Factor - 2],
@@ -381,7 +413,7 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(
auto *InVTy = cast<VectorType>(InterleaveValues[0]->getType());
const DataLayout &DL = Store->getDataLayout();
- Type *XLenTy = Type::getIntNTy(Store->getContext(), Subtarget.getXLen());
+ Type *XLenTy = Builder.getIntNTy(Subtarget.getXLen());
Value *Ptr, *VL;
Align Alignment;
@@ -405,9 +437,7 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(
unsigned NumElts = InVTy->getElementCount().getKnownMinValue();
Type *VecTupTy = TargetExtType::get(
Store->getContext(), "riscv.vector.tuple",
- ScalableVectorType::get(Type::getInt8Ty(Store->getContext()),
- NumElts * SEW / 8),
- Factor);
+ ScalableVectorType::get(Builder.getInt8Ty(), NumElts * SEW / 8), Factor);
Value *StoredVal = PoisonValue::get(VecTupTy);
for (unsigned i = 0; i < Factor; ++i)
@@ -424,91 +454,3 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(
Builder.CreateCall(VssegNFunc, Operands);
return true;
}
-
-/// Lower an interleaved vp.store into a vssegN intrinsic.
-///
-/// E.g. Lower an interleaved vp.store (Factor = 2):
-///
-/// %is = tail call <vscale x 64 x i8>
-/// @llvm.vector.interleave2.nxv64i8(
-/// <vscale x 32 x i8> %load0,
-/// <vscale x 32 x i8> %load1
-/// %wide.rvl = shl nuw nsw i32 %rvl, 1
-/// tail call void @llvm.vp.store.nxv64i8.p0(
-/// <vscale x 64 x i8> %is, ptr %ptr,
-/// %mask,
-/// i32 %wide.rvl)
-///
-/// Into:
-/// call void @llvm.riscv.vsseg2.mask.nxv32i8.i64(
-/// <vscale x 32 x i8> %load1,
-/// <vscale x 32 x i8> %load2, ptr %ptr,
-/// %mask,
-/// i64 %rvl)
-bool RISCVTargetLowering::lowerInterleavedVPStore(
- VPIntrinsic *Store, Value *Mask,
- ArrayRef<Value *> InterleaveOperands) const {
- assert(Mask && "Expect a valid mask");
- assert(Store->getIntrinsicID() == Intrinsic::vp_store &&
- "Unexpected intrinsic");
-
- const unsigned Factor = InterleaveOperands.size();
-
- auto *VTy = dyn_cast<VectorType>(InterleaveOperands[0]->getType());
- if (!VTy)
- return false;
-
- const DataLayout &DL = Store->getDataLayout();
- Align Alignment = Store->getParamAlign(1).value_or(
- DL.getABITypeAlign(VTy->getElementType()));
- if (!isLegalInterleavedAccessType(
- VTy, Factor, Alignment,
- Store->getArgOperand(1)->getType()->getPointerAddressSpace(), DL))
- return false;
-
- IRBuilder<> Builder(Store);
- Value *WideEVL = Store->getArgOperand(3);
- // Conservatively check if EVL is a multiple of factor, otherwise some
- // (trailing) elements might be lost after the transformation.
- if (!isMultipleOfN(WideEVL, Store->getDataLayout(), Factor))
- return false;
-
- auto *PtrTy = Store->getArgOperand(1)->getType();
- auto *XLenTy = Type::getIntNTy(Store->getContext(), Subtarget.getXLen());
- auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor);
- Value *EVL =
- Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy);
-
- if (isa<FixedVectorType>(VTy)) {
- SmallVector<Value *, 8> Operands(InterleaveOperands);
- Operands.append({Store->getArgOperand(1), Mask, EVL});
- Builder.CreateIntrinsic(FixedVssegIntrIds[Factor - 2],
- {VTy, PtrTy, XLenTy}, Operands);
- return true;
- }
-
- unsigned SEW = DL.getTypeSizeInBits(VTy->getElementType());
- unsigned NumElts = VTy->getElementCount().getKnownMinValue();
- Type *VecTupTy = TargetExtType::get(
- Store->getContext(), "riscv.vector.tuple",
- ScalableVectorType::get(Type::getInt8Ty(Store->getContext()),
- NumElts * SEW / 8),
- Factor);
-
- Function *VecInsertFunc = Intrinsic::getOrInsertDeclaration(
- Store->getModule(), Intrinsic::riscv_tuple_insert, {VecTupTy, VTy});
- Value *StoredVal = PoisonValue::get(VecTupTy);
- for (unsigned i = 0; i < Factor; ++i)
- StoredVal = Builder.CreateCall(
- VecInsertFunc, {StoredVal, InterleaveOperands[i], Builder.getInt32(i)});
-
- Function *VssegNFunc = Intrinsic::getOrInsertDeclaration(
- Store->getModule(), ScalableVssegIntrIds[Factor - 2],
- {VecTupTy, PtrTy, Mask->getType(), EVL->getType()});
-
- Value *Operands[] = {StoredVal, Store->getArgOperand(1), Mask, EVL,
- ConstantInt::get(XLenTy, Log2_64(SEW))};
-
- Builder.CreateCall(VssegNFunc, Operands);
- return true;
-}
diff --git a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
index 28d6403..3b19c34 100644
--- a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
+++ b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
@@ -48,6 +48,8 @@ using namespace llvm;
STATISTIC(NumRemovedSExtW, "Number of removed sign-extensions");
STATISTIC(NumTransformedToWInstrs,
"Number of instructions transformed to W-ops");
+STATISTIC(NumTransformedToNonWInstrs,
+ "Number of instructions transformed to non-W-ops");
static cl::opt<bool> DisableSExtWRemoval("riscv-disable-sextw-removal",
cl::desc("Disable removal of sext.w"),
@@ -67,10 +69,9 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
bool removeSExtWInstrs(MachineFunction &MF, const RISCVInstrInfo &TII,
const RISCVSubtarget &ST, MachineRegisterInfo &MRI);
- bool stripWSuffixes(MachineFunction &MF, const RISCVInstrInfo &TII,
- const RISCVSubtarget &ST, MachineRegisterInfo &MRI);
- bool appendWSuffixes(MachineFunction &MF, const RISCVInstrInfo &TII,
- const RISCVSubtarget &ST, MachineRegisterInfo &MRI);
+ bool canonicalizeWSuffixes(MachineFunction &MF, const RISCVInstrInfo &TII,
+ const RISCVSubtarget &ST,
+ MachineRegisterInfo &MRI);
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
@@ -721,45 +722,39 @@ bool RISCVOptWInstrs::removeSExtWInstrs(MachineFunction &MF,
return MadeChange;
}
-bool RISCVOptWInstrs::stripWSuffixes(MachineFunction &MF,
- const RISCVInstrInfo &TII,
- const RISCVSubtarget &ST,
- MachineRegisterInfo &MRI) {
+// Strips or adds W suffixes to eligible instructions depending on the
+// subtarget preferences.
+bool RISCVOptWInstrs::canonicalizeWSuffixes(MachineFunction &MF,
+ const RISCVInstrInfo &TII,
+ const RISCVSubtarget &ST,
+ MachineRegisterInfo &MRI) {
+ bool ShouldStripW = !(DisableStripWSuffix || ST.preferWInst());
+ bool ShouldPreferW = ST.preferWInst();
bool MadeChange = false;
- for (MachineBasicBlock &MBB : MF) {
- for (MachineInstr &MI : MBB) {
- unsigned Opc;
- switch (MI.getOpcode()) {
- default:
- continue;
- case RISCV::ADDW: Opc = RISCV::ADD; break;
- case RISCV::ADDIW: Opc = RISCV::ADDI; break;
- case RISCV::MULW: Opc = RISCV::MUL; break;
- case RISCV::SLLIW: Opc = RISCV::SLLI; break;
- }
- if (hasAllWUsers(MI, ST, MRI)) {
- MI.setDesc(TII.get(Opc));
- MadeChange = true;
- }
- }
- }
-
- return MadeChange;
-}
-
-bool RISCVOptWInstrs::appendWSuffixes(MachineFunction &MF,
- const RISCVInstrInfo &TII,
- const RISCVSubtarget &ST,
- MachineRegisterInfo &MRI) {
- bool MadeChange = false;
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
- unsigned WOpc;
- // TODO: Add more?
- switch (MI.getOpcode()) {
+ std::optional<unsigned> WOpc;
+ std::optional<unsigned> NonWOpc;
+ unsigned OrigOpc = MI.getOpcode();
+ switch (OrigOpc) {
default:
continue;
+ case RISCV::ADDW:
+ NonWOpc = RISCV::ADD;
+ break;
+ case RISCV::ADDIW:
+ NonWOpc = RISCV::ADDI;
+ break;
+ case RISCV::MULW:
+ NonWOpc = RISCV::MUL;
+ break;
+ case RISCV::SLLIW:
+ NonWOpc = RISCV::SLLI;
+ break;
+ case RISCV::SUBW:
+ NonWOpc = RISCV::SUB;
+ break;
case RISCV::ADD:
WOpc = RISCV::ADDW;
break;
@@ -773,7 +768,7 @@ bool RISCVOptWInstrs::appendWSuffixes(MachineFunction &MF,
WOpc = RISCV::MULW;
break;
case RISCV::SLLI:
- // SLLIW reads the lowest 5 bits, while SLLI reads lowest 6 bits
+ // SLLIW reads the lowest 5 bits, while SLLI reads lowest 6 bits.
if (MI.getOperand(2).getImm() >= 32)
continue;
WOpc = RISCV::SLLIW;
@@ -784,19 +779,30 @@ bool RISCVOptWInstrs::appendWSuffixes(MachineFunction &MF,
break;
}
- if (hasAllWUsers(MI, ST, MRI)) {
+ if (ShouldStripW && NonWOpc.has_value() && hasAllWUsers(MI, ST, MRI)) {
+ LLVM_DEBUG(dbgs() << "Replacing " << MI);
+ MI.setDesc(TII.get(NonWOpc.value()));
+ LLVM_DEBUG(dbgs() << " with " << MI);
+ ++NumTransformedToNonWInstrs;
+ MadeChange = true;
+ continue;
+ }
+ // LWU is always converted to LW when possible as 1) LW is compressible
+ // and 2) it helps minimise differences vs RV32.
+ if ((ShouldPreferW || OrigOpc == RISCV::LWU) && WOpc.has_value() &&
+ hasAllWUsers(MI, ST, MRI)) {
LLVM_DEBUG(dbgs() << "Replacing " << MI);
- MI.setDesc(TII.get(WOpc));
+ MI.setDesc(TII.get(WOpc.value()));
MI.clearFlag(MachineInstr::MIFlag::NoSWrap);
MI.clearFlag(MachineInstr::MIFlag::NoUWrap);
MI.clearFlag(MachineInstr::MIFlag::IsExact);
LLVM_DEBUG(dbgs() << " with " << MI);
++NumTransformedToWInstrs;
MadeChange = true;
+ continue;
}
}
}
-
return MadeChange;
}
@@ -813,12 +819,6 @@ bool RISCVOptWInstrs::runOnMachineFunction(MachineFunction &MF) {
bool MadeChange = false;
MadeChange |= removeSExtWInstrs(MF, TII, ST, MRI);
-
- if (!(DisableStripWSuffix || ST.preferWInst()))
- MadeChange |= stripWSuffixes(MF, TII, ST, MRI);
-
- if (ST.preferWInst())
- MadeChange |= appendWSuffixes(MF, TII, ST, MRI);
-
+ MadeChange |= canonicalizeWSuffixes(MF, TII, ST, MRI);
return MadeChange;
}
diff --git a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
index 668e596..6ecddad 100644
--- a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
@@ -24,6 +24,18 @@ void RISCVSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG,
switch (N->getOpcode()) {
default:
return SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N);
+ case RISCVISD::TUPLE_EXTRACT:
+ assert(N->getNumOperands() == 2 && "Expected three operands!");
+ assert(N->getOperand(1).getOpcode() == ISD::TargetConstant &&
+ N->getOperand(1).getValueType() == MVT::i32 &&
+ "Expected index to be an i32 target constant!");
+ break;
+ case RISCVISD::TUPLE_INSERT:
+ assert(N->getNumOperands() == 3 && "Expected three operands!");
+ assert(N->getOperand(2).getOpcode() == ISD::TargetConstant &&
+ N->getOperand(2).getValueType() == MVT::i32 &&
+ "Expected index to be an i32 target constant!");
+ break;
case RISCVISD::VQDOT_VL:
case RISCVISD::VQDOTU_VL:
case RISCVISD::VQDOTSU_VL: {
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index c754de4..e35ffaf 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -216,7 +216,7 @@ unsigned RISCVSubtarget::getMinimumJumpTableEntries() const {
}
void RISCVSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
- unsigned NumRegionInstrs) const {
+ const SchedRegion &Region) const {
// Do bidirectional scheduling since it provides a more balanced scheduling
// leading to better performance. This will increase compile time.
Policy.OnlyTopDown = false;
@@ -231,8 +231,8 @@ void RISCVSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
Policy.ShouldTrackPressure = true;
}
-void RISCVSubtarget::overridePostRASchedPolicy(MachineSchedPolicy &Policy,
- unsigned NumRegionInstrs) const {
+void RISCVSubtarget::overridePostRASchedPolicy(
+ MachineSchedPolicy &Policy, const SchedRegion &Region) const {
MISched::Direction PostRASchedDirection = getPostRASchedDirection();
if (PostRASchedDirection == MISched::TopDown) {
Policy.OnlyTopDown = true;
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 4f560cc..fd57e02 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -395,11 +395,11 @@ public:
}
void overrideSchedPolicy(MachineSchedPolicy &Policy,
- unsigned NumRegionInstrs) const override;
+ const SchedRegion &Region) const override;
void overridePostRASchedPolicy(MachineSchedPolicy &Policy,
- unsigned NumRegionInstrs) const override;
+ const SchedRegion &Region) const override;
};
-} // End llvm namespace
+} // namespace llvm
#endif
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 56ead92..fd634b5 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1489,6 +1489,34 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
cast<VectorType>(ICA.getArgTypes()[0]), {}, CostKind,
0, cast<VectorType>(ICA.getReturnType()));
}
+ case Intrinsic::fptoui_sat:
+ case Intrinsic::fptosi_sat: {
+ InstructionCost Cost = 0;
+ bool IsSigned = ICA.getID() == Intrinsic::fptosi_sat;
+ Type *SrcTy = ICA.getArgTypes()[0];
+
+ auto SrcLT = getTypeLegalizationCost(SrcTy);
+ auto DstLT = getTypeLegalizationCost(RetTy);
+ if (!SrcTy->isVectorTy())
+ break;
+
+ if (!SrcLT.first.isValid() || !DstLT.first.isValid())
+ return InstructionCost::getInvalid();
+
+ Cost +=
+ getCastInstrCost(IsSigned ? Instruction::FPToSI : Instruction::FPToUI,
+ RetTy, SrcTy, TTI::CastContextHint::None, CostKind);
+
+ // Handle NaN.
+ // vmfne v0, v8, v8 # If v8[i] is NaN set v0[i] to 1.
+ // vmerge.vim v8, v8, 0, v0 # Convert NaN to 0.
+ Type *CondTy = RetTy->getWithNewBitWidth(1);
+ Cost += getCmpSelInstrCost(BinaryOperator::FCmp, SrcTy, CondTy,
+ CmpInst::FCMP_UNO, CostKind);
+ Cost += getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
+ CmpInst::FCMP_UNO, CostKind);
+ return Cost;
+ }
}
if (ST->hasVInstructions() && RetTy->isVectorTy()) {
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 12bf8c1..d62d99c 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -116,8 +116,8 @@ public:
}
TailFoldingStyle
getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const override {
- return ST->hasVInstructions() ? TailFoldingStyle::Data
- : TailFoldingStyle::DataWithoutLaneMask;
+ return ST->hasVInstructions() ? TailFoldingStyle::DataWithEVL
+ : TailFoldingStyle::None;
}
std::optional<unsigned> getMaxVScale() const override;
std::optional<unsigned> getVScaleForTuning() const override;
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index e656e8b..b53d919 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -33,6 +33,7 @@ namespace {
class RISCVVLOptimizer : public MachineFunctionPass {
const MachineRegisterInfo *MRI;
const MachineDominatorTree *MDT;
+ const TargetInstrInfo *TII;
public:
static char ID;
@@ -1291,7 +1292,8 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
return false;
}
- assert(!RISCVII::elementsDependOnVL(RISCV::getRVVMCOpcode(MI.getOpcode())) &&
+ assert(!RISCVII::elementsDependOnVL(
+ TII->get(RISCV::getRVVMCOpcode(MI.getOpcode())).TSFlags) &&
"Instruction shouldn't be supported if elements depend on VL");
assert(MI.getOperand(0).isReg() &&
@@ -1484,7 +1486,6 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const {
}
bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) {
- assert(DemandedVLs.size() == 0);
if (skipFunction(MF.getFunction()))
return false;
@@ -1495,6 +1496,10 @@ bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (!ST.hasVInstructions())
return false;
+ TII = ST.getInstrInfo();
+
+ assert(DemandedVLs.empty());
+
// For each instruction that defines a vector, compute what VL its
// downstream users demand.
for (MachineBasicBlock *MBB : post_order(&MF)) {
diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
index 84ef539..c1cc19b 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
@@ -434,6 +434,15 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) {
if (!isKnownSameDefs(TrueMask.getReg(), MIMask.getReg()))
return false;
+ // Masked off lanes past TrueVL will come from False, and converting to vmv
+ // will lose these lanes unless MIVL <= TrueVL.
+ // TODO: We could relax this for False == Passthru and True policy == TU
+ const MachineOperand &MIVL = MI.getOperand(RISCVII::getVLOpNum(MI.getDesc()));
+ const MachineOperand &TrueVL =
+ True->getOperand(RISCVII::getVLOpNum(True->getDesc()));
+ if (!RISCV::isVLKnownLE(MIVL, TrueVL))
+ return false;
+
// True's passthru needs to be equivalent to False
Register TruePassthruReg = True->getOperand(1).getReg();
Register FalseReg = MI.getOperand(2).getReg();