aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/RISCV
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/RISCV')
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp73
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h9
-rw-r--r--llvm/lib/Target/RISCV/RISCVCallingConv.td14
-rw-r--r--llvm/lib/Target/RISCV/RISCVFrameLowering.cpp175
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp19
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h15
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrFormats.td1
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoP.td37
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoV.td4
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td4
-rw-r--r--llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp169
-rw-r--r--llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp96
-rw-r--r--llvm/lib/Target/RISCV/RISCVSubtarget.cpp6
-rw-r--r--llvm/lib/Target/RISCV/RISCVSubtarget.h6
-rw-r--r--llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp9
-rw-r--r--llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp9
16 files changed, 357 insertions, 289 deletions
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index f76f8b3..2c37c3b 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -302,6 +302,28 @@ void RISCVAsmBackend::relaxInstruction(MCInst &Inst,
Inst = std::move(Res);
}
+// Check if an R_RISCV_ALIGN relocation is needed for an alignment directive.
+// If conditions are met, compute the padding size and create a fixup encoding
+// the padding size in the addend.
+bool RISCVAsmBackend::relaxAlign(MCFragment &F, unsigned &Size) {
+ // Use default handling unless linker relaxation is enabled and the alignment
+ // is larger than the nop size.
+ const MCSubtargetInfo *STI = F.getSubtargetInfo();
+ if (!STI->hasFeature(RISCV::FeatureRelax))
+ return false;
+ unsigned MinNopLen = STI->hasFeature(RISCV::FeatureStdExtZca) ? 2 : 4;
+ if (F.getAlignment() <= MinNopLen)
+ return false;
+
+ Size = F.getAlignment().value() - MinNopLen;
+ auto *Expr = MCConstantExpr::create(Size, getContext());
+ MCFixup Fixup =
+ MCFixup::create(0, Expr, FirstLiteralRelocationKind + ELF::R_RISCV_ALIGN);
+ F.setVarFixups({Fixup});
+ F.getParent()->setLinkerRelaxable();
+ return true;
+}
+
bool RISCVAsmBackend::relaxDwarfLineAddr(MCFragment &F,
bool &WasRelaxed) const {
MCContext &C = getContext();
@@ -637,7 +659,7 @@ bool RISCVAsmBackend::isPCRelFixupResolved(const MCSymbol *SymA,
// Otherwise, check if the offset between the symbol and fragment is fully
// resolved, unaffected by linker-relaxable fragments (e.g. instructions or
- // offset-affected MCAlignFragment). Complements the generic
+ // offset-affected FT_Align fragments). Complements the generic
// isSymbolRefDifferenceFullyResolvedImpl.
if (!PCRelTemp)
PCRelTemp = getContext().createTempSymbol();
@@ -887,55 +909,6 @@ void RISCVAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
}
}
-// Linker relaxation may change code size. We have to insert Nops
-// for .align directive when linker relaxation enabled. So then Linker
-// could satisfy alignment by removing Nops.
-// The function return the total Nops Size we need to insert.
-bool RISCVAsmBackend::shouldInsertExtraNopBytesForCodeAlign(
- const MCAlignFragment &AF, unsigned &Size) {
- // Calculate Nops Size only when linker relaxation enabled.
- const MCSubtargetInfo *STI = AF.getSubtargetInfo();
- if (!STI->hasFeature(RISCV::FeatureRelax))
- return false;
-
- unsigned MinNopLen = STI->hasFeature(RISCV::FeatureStdExtZca) ? 2 : 4;
-
- if (AF.getAlignment() <= MinNopLen) {
- return false;
- } else {
- Size = AF.getAlignment().value() - MinNopLen;
- return true;
- }
-}
-
-// We need to insert R_RISCV_ALIGN relocation type to indicate the
-// position of Nops and the total bytes of the Nops have been inserted
-// when linker relaxation enabled.
-// The function insert fixup_riscv_align fixup which eventually will
-// transfer to R_RISCV_ALIGN relocation type.
-bool RISCVAsmBackend::shouldInsertFixupForCodeAlign(MCAssembler &Asm,
- MCAlignFragment &AF) {
- // Insert the fixup only when linker relaxation enabled.
- const MCSubtargetInfo *STI = AF.getSubtargetInfo();
- if (!STI->hasFeature(RISCV::FeatureRelax))
- return false;
-
- // Calculate total Nops we need to insert. If there are none to insert
- // then simply return.
- unsigned Count;
- if (!shouldInsertExtraNopBytesForCodeAlign(AF, Count) || (Count == 0))
- return false;
-
- MCContext &Ctx = getContext();
- const MCExpr *Dummy = MCConstantExpr::create(0, Ctx);
- MCFixup Fixup = MCFixup::create(0, Dummy, ELF::R_RISCV_ALIGN);
-
- uint64_t FixedValue = 0;
- MCValue NopBytes = MCValue::get(Count);
- Asm.getWriter().recordRelocation(AF, Fixup, NopBytes, FixedValue);
- return true;
-}
-
std::unique_ptr<MCObjectTargetWriter>
RISCVAsmBackend::createObjectTargetWriter() const {
return createRISCVELFObjectWriter(OSABI, Is64Bit);
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
index 8c10fbe..d97d632 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
@@ -38,14 +38,6 @@ public:
const MCTargetOptions &Options);
~RISCVAsmBackend() override = default;
- // Return Size with extra Nop Bytes for alignment directive in code section.
- bool shouldInsertExtraNopBytesForCodeAlign(const MCAlignFragment &AF,
- unsigned &Size) override;
-
- // Insert target specific fixup type for alignment directive in code section.
- bool shouldInsertFixupForCodeAlign(MCAssembler &Asm,
- MCAlignFragment &AF) override;
-
std::optional<bool> evaluateFixup(const MCFragment &, MCFixup &, MCValue &,
uint64_t &) override;
bool addReloc(const MCFragment &, const MCFixup &, const MCValue &,
@@ -73,6 +65,7 @@ public:
void relaxInstruction(MCInst &Inst,
const MCSubtargetInfo &STI) const override;
+ bool relaxAlign(MCFragment &F, unsigned &Size) override;
bool relaxDwarfLineAddr(MCFragment &F, bool &WasRelaxed) const override;
bool relaxDwarfCFA(MCFragment &F, bool &WasRelaxed) const override;
std::pair<bool, bool> relaxLEB128(MCFragment &LF,
diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.td b/llvm/lib/Target/RISCV/RISCVCallingConv.td
index cbf039e..4c303a9 100644
--- a/llvm/lib/Target/RISCV/RISCVCallingConv.td
+++ b/llvm/lib/Target/RISCV/RISCVCallingConv.td
@@ -56,19 +56,21 @@ def CSR_XLEN_F32_Interrupt: CalleeSavedRegs<(add CSR_Interrupt,
def CSR_XLEN_F64_Interrupt: CalleeSavedRegs<(add CSR_Interrupt,
(sequence "F%u_D", 0, 31))>;
+defvar VREGS = (add (sequence "V%u", 0, 31),
+ (sequence "V%uM2", 0, 31, 2),
+ (sequence "V%uM4", 0, 31, 4),
+ (sequence "V%uM8", 0, 31, 8));
+
// Same as CSR_Interrupt, but including all vector registers.
-def CSR_XLEN_V_Interrupt: CalleeSavedRegs<(add CSR_Interrupt,
- (sequence "V%u", 0, 31))>;
+def CSR_XLEN_V_Interrupt: CalleeSavedRegs<(add CSR_Interrupt, VREGS)>;
// Same as CSR_Interrupt, but including all 32-bit FP registers and all vector
// registers.
-def CSR_XLEN_F32_V_Interrupt: CalleeSavedRegs<(add CSR_XLEN_F32_Interrupt,
- (sequence "V%u", 0, 31))>;
+def CSR_XLEN_F32_V_Interrupt: CalleeSavedRegs<(add CSR_XLEN_F32_Interrupt, VREGS)>;
// Same as CSR_Interrupt, but including all 64-bit FP registers and all vector
// registers.
-def CSR_XLEN_F64_V_Interrupt: CalleeSavedRegs<(add CSR_XLEN_F64_Interrupt,
- (sequence "V%u", 0, 31))>;
+def CSR_XLEN_F64_V_Interrupt: CalleeSavedRegs<(add CSR_XLEN_F64_Interrupt, VREGS)>;
// Same as CSR_Interrupt, but excluding X16-X31.
def CSR_Interrupt_RVE : CalleeSavedRegs<(sub CSR_Interrupt,
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 6c8e3da..b1ab76a 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -95,6 +95,11 @@ static const std::pair<MCPhysReg, int8_t> FixedCSRFIQCIInterruptMap[] = {
/* -21, -22, -23, -24 are reserved */
};
+/// Returns true if DWARF CFI instructions ("frame moves") should be emitted.
+static bool needsDwarfCFI(const MachineFunction &MF) {
+ return MF.needsFrameMoves();
+}
+
// For now we use x3, a.k.a gp, as pointer to shadow call stack.
// User should not use x3 in their asm.
static void emitSCSPrologue(MachineFunction &MF, MachineBasicBlock &MBB,
@@ -141,6 +146,9 @@ static void emitSCSPrologue(MachineFunction &MF, MachineBasicBlock &MBB,
.addImm(-SlotSize)
.setMIFlag(MachineInstr::FrameSetup);
+ if (!needsDwarfCFI(MF))
+ return;
+
// Emit a CFI instruction that causes SlotSize to be subtracted from the value
// of the shadow stack pointer when unwinding past this frame.
char DwarfSCSReg = TRI->getDwarfRegNum(SCSPReg, /*IsEH*/ true);
@@ -199,8 +207,10 @@ static void emitSCSEpilogue(MachineFunction &MF, MachineBasicBlock &MBB,
.addReg(SCSPReg)
.addImm(-SlotSize)
.setMIFlag(MachineInstr::FrameDestroy);
- // Restore the SCS pointer
- CFIInstBuilder(MBB, MI, MachineInstr::FrameDestroy).buildRestore(SCSPReg);
+ if (needsDwarfCFI(MF)) {
+ // Restore the SCS pointer
+ CFIInstBuilder(MBB, MI, MachineInstr::FrameDestroy).buildRestore(SCSPReg);
+ }
}
// Insert instruction to swap mscratchsw with sp
@@ -935,6 +945,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
MBBI = std::prev(MBBI, getRVVCalleeSavedInfo(MF, CSI).size() +
getUnmanagedCSI(MF, CSI).size());
CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
+ bool NeedsDwarfCFI = needsDwarfCFI(MF);
// If libcalls are used to spill and restore callee-saved registers, the frame
// has two sections; the opaque section managed by the libcalls, and the
@@ -962,10 +973,12 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
alignTo((STI.getXLen() / 8) * LibCallRegs, getStackAlign());
RVFI->setLibCallStackSize(LibCallFrameSize);
- CFIBuilder.buildDefCFAOffset(LibCallFrameSize);
- for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI))
- CFIBuilder.buildOffset(CS.getReg(),
- MFI.getObjectOffset(CS.getFrameIdx()));
+ if (NeedsDwarfCFI) {
+ CFIBuilder.buildDefCFAOffset(LibCallFrameSize);
+ for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI))
+ CFIBuilder.buildOffset(CS.getReg(),
+ MFI.getObjectOffset(CS.getFrameIdx()));
+ }
}
// FIXME (note copied from Lanai): This appears to be overallocating. Needs
@@ -996,14 +1009,17 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
// could only be the next instruction.
++PossiblePush;
- // Insert the CFI metadata before where we think the `(QC.)CM.PUSH(FP)`
- // could be. The PUSH will also get its own CFI metadata for its own
- // modifications, which should come after the PUSH.
- CFIInstBuilder PushCFIBuilder(MBB, PossiblePush, MachineInstr::FrameSetup);
- PushCFIBuilder.buildDefCFAOffset(QCIInterruptPushAmount);
- for (const CalleeSavedInfo &CS : getQCISavedInfo(MF, CSI))
- PushCFIBuilder.buildOffset(CS.getReg(),
- MFI.getObjectOffset(CS.getFrameIdx()));
+ if (NeedsDwarfCFI) {
+ // Insert the CFI metadata before where we think the `(QC.)CM.PUSH(FP)`
+ // could be. The PUSH will also get its own CFI metadata for its own
+ // modifications, which should come after the PUSH.
+ CFIInstBuilder PushCFIBuilder(MBB, PossiblePush,
+ MachineInstr::FrameSetup);
+ PushCFIBuilder.buildDefCFAOffset(QCIInterruptPushAmount);
+ for (const CalleeSavedInfo &CS : getQCISavedInfo(MF, CSI))
+ PushCFIBuilder.buildOffset(CS.getReg(),
+ MFI.getObjectOffset(CS.getFrameIdx()));
+ }
}
if (RVFI->isPushable(MF) && PossiblePush != MBB.end() &&
@@ -1017,10 +1033,12 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
PossiblePush->getOperand(1).setImm(StackAdj);
StackSize -= StackAdj;
- CFIBuilder.buildDefCFAOffset(RealStackSize - StackSize);
- for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI))
- CFIBuilder.buildOffset(CS.getReg(),
- MFI.getObjectOffset(CS.getFrameIdx()));
+ if (NeedsDwarfCFI) {
+ CFIBuilder.buildDefCFAOffset(RealStackSize - StackSize);
+ for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI))
+ CFIBuilder.buildOffset(CS.getReg(),
+ MFI.getObjectOffset(CS.getFrameIdx()));
+ }
}
// Allocate space on the stack if necessary.
@@ -1031,7 +1049,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
bool DynAllocation =
MF.getInfo<RISCVMachineFunctionInfo>()->hasDynamicAllocation();
if (StackSize != 0)
- allocateStack(MBB, MBBI, MF, StackSize, RealStackSize, /*EmitCFI=*/true,
+ allocateStack(MBB, MBBI, MF, StackSize, RealStackSize, NeedsDwarfCFI,
NeedProbe, ProbeSize, DynAllocation,
MachineInstr::FrameSetup);
@@ -1049,8 +1067,10 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
// Iterate over list of callee-saved registers and emit .cfi_offset
// directives.
- for (const CalleeSavedInfo &CS : getUnmanagedCSI(MF, CSI))
- CFIBuilder.buildOffset(CS.getReg(), MFI.getObjectOffset(CS.getFrameIdx()));
+ if (NeedsDwarfCFI)
+ for (const CalleeSavedInfo &CS : getUnmanagedCSI(MF, CSI))
+ CFIBuilder.buildOffset(CS.getReg(),
+ MFI.getObjectOffset(CS.getFrameIdx()));
// Generate new FP.
if (hasFP(MF)) {
@@ -1069,7 +1089,8 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
MachineInstr::FrameSetup, getStackAlign());
}
- CFIBuilder.buildDefCFA(FPReg, RVFI->getVarArgsSaveSize());
+ if (NeedsDwarfCFI)
+ CFIBuilder.buildDefCFA(FPReg, RVFI->getVarArgsSaveSize());
}
uint64_t SecondSPAdjustAmount = 0;
@@ -1080,15 +1101,16 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
"SecondSPAdjustAmount should be greater than zero");
allocateStack(MBB, MBBI, MF, SecondSPAdjustAmount,
- getStackSizeWithRVVPadding(MF), !hasFP(MF), NeedProbe,
- ProbeSize, DynAllocation, MachineInstr::FrameSetup);
+ getStackSizeWithRVVPadding(MF), NeedsDwarfCFI && !hasFP(MF),
+ NeedProbe, ProbeSize, DynAllocation,
+ MachineInstr::FrameSetup);
}
if (RVVStackSize) {
if (NeedProbe) {
allocateAndProbeStackForRVV(MF, MBB, MBBI, DL, RVVStackSize,
- MachineInstr::FrameSetup, !hasFP(MF),
- DynAllocation);
+ MachineInstr::FrameSetup,
+ NeedsDwarfCFI && !hasFP(MF), DynAllocation);
} else {
// We must keep the stack pointer aligned through any intermediate
// updates.
@@ -1097,14 +1119,15 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
MachineInstr::FrameSetup, getStackAlign());
}
- if (!hasFP(MF)) {
+ if (NeedsDwarfCFI && !hasFP(MF)) {
// Emit .cfi_def_cfa_expression "sp + StackSize + RVVStackSize * vlenb".
CFIBuilder.insertCFIInst(createDefCFAExpression(
*RI, SPReg, getStackSizeWithRVVPadding(MF), RVVStackSize / 8));
}
std::advance(MBBI, getRVVCalleeSavedInfo(MF, CSI).size());
- emitCalleeSavedRVVPrologCFI(MBB, MBBI, hasFP(MF));
+ if (NeedsDwarfCFI)
+ emitCalleeSavedRVVPrologCFI(MBB, MBBI, hasFP(MF));
}
if (hasFP(MF)) {
@@ -1171,8 +1194,9 @@ void RISCVFrameLowering::deallocateStack(MachineFunction &MF,
MachineInstr::FrameDestroy, getStackAlign());
StackSize = 0;
- CFIInstBuilder(MBB, MBBI, MachineInstr::FrameDestroy)
- .buildDefCFAOffset(CFAOffset);
+ if (needsDwarfCFI(MF))
+ CFIInstBuilder(MBB, MBBI, MachineInstr::FrameDestroy)
+ .buildDefCFAOffset(CFAOffset);
}
void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
@@ -1212,6 +1236,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
std::next(MBBI, getRVVCalleeSavedInfo(MF, CSI).size());
CFIInstBuilder CFIBuilder(MBB, FirstScalarCSRRestoreInsn,
MachineInstr::FrameDestroy);
+ bool NeedsDwarfCFI = needsDwarfCFI(MF);
uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
uint64_t RealStackSize = FirstSPAdjustAmount ? FirstSPAdjustAmount
@@ -1232,10 +1257,11 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
StackOffset::getScalable(RVVStackSize),
MachineInstr::FrameDestroy, getStackAlign());
- if (!hasFP(MF))
- CFIBuilder.buildDefCFA(SPReg, RealStackSize);
-
- emitCalleeSavedRVVEpilogCFI(MBB, FirstScalarCSRRestoreInsn);
+ if (NeedsDwarfCFI) {
+ if (!hasFP(MF))
+ CFIBuilder.buildDefCFA(SPReg, RealStackSize);
+ emitCalleeSavedRVVEpilogCFI(MBB, FirstScalarCSRRestoreInsn);
+ }
}
if (FirstSPAdjustAmount) {
@@ -1251,7 +1277,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
StackOffset::getFixed(SecondSPAdjustAmount),
MachineInstr::FrameDestroy, getStackAlign());
- if (!hasFP(MF))
+ if (NeedsDwarfCFI && !hasFP(MF))
CFIBuilder.buildDefCFAOffset(FirstSPAdjustAmount);
}
@@ -1272,7 +1298,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
getStackAlign());
}
- if (hasFP(MF))
+ if (NeedsDwarfCFI && hasFP(MF))
CFIBuilder.buildDefCFA(SPReg, RealStackSize);
// Skip to after the restores of scalar callee-saved registers
@@ -1295,8 +1321,9 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
}
// Recover callee-saved registers.
- for (const CalleeSavedInfo &CS : getUnmanagedCSI(MF, CSI))
- CFIBuilder.buildRestore(CS.getReg());
+ if (NeedsDwarfCFI)
+ for (const CalleeSavedInfo &CS : getUnmanagedCSI(MF, CSI))
+ CFIBuilder.buildRestore(CS.getReg());
if (RVFI->isPushable(MF) && MBBI != MBB.end() && isPop(MBBI->getOpcode())) {
// Use available stack adjustment in pop instruction to deallocate stack
@@ -1315,15 +1342,17 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
auto NextI = next_nodbg(MBBI, MBB.end());
if (NextI == MBB.end() || NextI->getOpcode() != RISCV::PseudoRET) {
++MBBI;
- CFIBuilder.setInsertPoint(MBBI);
+ if (NeedsDwarfCFI) {
+ CFIBuilder.setInsertPoint(MBBI);
- for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI))
- CFIBuilder.buildRestore(CS.getReg());
+ for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI))
+ CFIBuilder.buildRestore(CS.getReg());
- // Update CFA Offset. If this is a QCI interrupt function, there will be a
- // leftover offset which is deallocated by `QC.C.MILEAVERET`, otherwise
- // getQCIInterruptStackSize() will be 0.
- CFIBuilder.buildDefCFAOffset(RVFI->getQCIInterruptStackSize());
+ // Update CFA Offset. If this is a QCI interrupt function, there will
+ // be a leftover offset which is deallocated by `QC.C.MILEAVERET`,
+ // otherwise getQCIInterruptStackSize() will be 0.
+ CFIBuilder.buildDefCFAOffset(RVFI->getQCIInterruptStackSize());
+ }
}
}
@@ -1515,10 +1544,53 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
return Offset;
}
+static MCRegister getRVVBaseRegister(const RISCVRegisterInfo &TRI,
+ const Register &Reg) {
+ MCRegister BaseReg = TRI.getSubReg(Reg, RISCV::sub_vrm1_0);
+ // If it's not a grouped vector register, it doesn't have subregister, so
+ // the base register is just itself.
+ if (BaseReg == RISCV::NoRegister)
+ BaseReg = Reg;
+ return BaseReg;
+}
+
void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF,
BitVector &SavedRegs,
RegScavenger *RS) const {
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
+
+ // In TargetFrameLowering::determineCalleeSaves, any vector register is marked
+ // as saved if any of its subregister is clobbered, this is not correct in
+ // vector registers. We only want the vector register to be marked as saved
+ // if all of its subregisters are clobbered.
+ // For example:
+ // Original behavior: If v24 is marked, v24m2, v24m4, v24m8 are also marked.
+ // Correct behavior: v24m2 is marked only if v24 and v25 are marked.
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
+ const RISCVRegisterInfo &TRI = *STI.getRegisterInfo();
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ unsigned CSReg = CSRegs[i];
+ // Only vector registers need special care.
+ if (!RISCV::VRRegClass.contains(getRVVBaseRegister(TRI, CSReg)))
+ continue;
+
+ SavedRegs.reset(CSReg);
+
+ auto SubRegs = TRI.subregs(CSReg);
+ // Set the register and all its subregisters.
+ if (!MRI.def_empty(CSReg) || MRI.getUsedPhysRegsMask().test(CSReg)) {
+ SavedRegs.set(CSReg);
+ llvm::for_each(SubRegs, [&](unsigned Reg) { return SavedRegs.set(Reg); });
+ }
+
+ // Combine to super register if all of its subregisters are marked.
+ if (!SubRegs.empty() && llvm::all_of(SubRegs, [&](unsigned Reg) {
+ return SavedRegs.test(Reg);
+ }))
+ SavedRegs.set(CSReg);
+ }
+
// Unconditionally spill RA and FP only if the function uses a frame
// pointer.
if (hasFP(MF)) {
@@ -1812,7 +1884,8 @@ MachineBasicBlock::iterator RISCVFrameLowering::eliminateCallFramePseudoInstr(
// allocateStack.
bool DynAllocation =
MF.getInfo<RISCVMachineFunctionInfo>()->hasDynamicAllocation();
- allocateStack(MBB, MI, MF, -Amount, -Amount, !hasFP(MF),
+ allocateStack(MBB, MI, MF, -Amount, -Amount,
+ needsDwarfCFI(MF) && !hasFP(MF),
/*NeedProbe=*/true, ProbeSize, DynAllocation,
MachineInstr::NoFlags);
} else {
@@ -2107,16 +2180,6 @@ static unsigned getCalleeSavedRVVNumRegs(const Register &BaseReg) {
: 8;
}
-static MCRegister getRVVBaseRegister(const RISCVRegisterInfo &TRI,
- const Register &Reg) {
- MCRegister BaseReg = TRI.getSubReg(Reg, RISCV::sub_vrm1_0);
- // If it's not a grouped vector register, it doesn't have subregister, so
- // the base register is just itself.
- if (BaseReg == RISCV::NoRegister)
- BaseReg = Reg;
- return BaseReg;
-}
-
void RISCVFrameLowering::emitCalleeSavedRVVPrologCFI(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, bool HasFP) const {
MachineFunction *MF = MBB.getParent();
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index cfec46d2..a541c2f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -3106,6 +3106,25 @@ bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
return true;
}
+bool RISCVDAGToDAGISel::SelectAddrRegZextRegScale(SDValue Addr,
+ unsigned MaxShiftAmount,
+ unsigned Bits, SDValue &Base,
+ SDValue &Index,
+ SDValue &Scale) {
+ if (!SelectAddrRegRegScale(Addr, MaxShiftAmount, Base, Index, Scale))
+ return false;
+
+ if (Index.getOpcode() == ISD::AND) {
+ auto *C = dyn_cast<ConstantSDNode>(Index.getOperand(1));
+ if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
+ Index = Index.getOperand(0);
+ return true;
+ }
+ }
+
+ return false;
+}
+
bool RISCVDAGToDAGISel::SelectAddrRegReg(SDValue Addr, SDValue &Base,
SDValue &Offset) {
if (Addr.getOpcode() != ISD::ADD)
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index 72e2f96..ee3a86e 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -59,19 +59,14 @@ public:
return SelectAddrRegRegScale(Addr, MaxShift, Base, Index, Scale);
}
+ bool SelectAddrRegZextRegScale(SDValue Addr, unsigned MaxShiftAmount,
+ unsigned Bits, SDValue &Base, SDValue &Index,
+ SDValue &Scale);
+
template <unsigned MaxShift, unsigned Bits>
bool SelectAddrRegZextRegScale(SDValue Addr, SDValue &Base, SDValue &Index,
SDValue &Scale) {
- if (SelectAddrRegRegScale(Addr, MaxShift, Base, Index, Scale)) {
- if (Index.getOpcode() == ISD::AND) {
- auto *C = dyn_cast<ConstantSDNode>(Index.getOperand(1));
- if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
- Index = Index.getOperand(0);
- return true;
- }
- }
- }
- return false;
+ return SelectAddrRegZextRegScale(Addr, MaxShift, Bits, Base, Index, Scale);
}
bool SelectAddrRegReg(SDValue Addr, SDValue &Base, SDValue &Offset);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
index e23001a..d9c6101 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
@@ -174,6 +174,7 @@ class EltDeps<bit vl, bit mask> {
def EltDepsNone : EltDeps<vl=0, mask=0>;
def EltDepsVL : EltDeps<vl=1, mask=0>;
+def EltDepsMask : EltDeps<vl=0, mask=1>;
def EltDepsVLMask : EltDeps<vl=1, mask=1>;
class EEW <bits<2> val> {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index aef410f..17067220 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -44,45 +44,48 @@ def simm10_unsigned : RISCVOp {
//===----------------------------------------------------------------------===//
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class RVPUnaryImm10<bits<7> funct7, string opcodestr,
- DAGOperand TyImm10 = simm10>
- : RVInstIBase<0b010, OPC_OP_IMM_32, (outs GPR:$rd), (ins TyImm10:$imm10),
- opcodestr, "$rd, $imm10"> {
+class RVPLoadImm10<bits<7> funct7, string opcodestr,
+ DAGOperand TyImm10 = simm10>
+ : RVInst<(outs GPR:$rd), (ins TyImm10:$imm10), opcodestr, "$rd, $imm10", [],
+ InstFormatOther> {
bits<10> imm10;
+ bits<5> rd;
let Inst{31-25} = funct7;
let Inst{24-16} = imm10{8-0};
let Inst{15} = imm10{9};
+ let Inst{14-12} = 0b010;
+ let Inst{11-7} = rd;
+ let Inst{6-0} = OPC_OP_IMM_32.Value;
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class RVPUnaryImm8<bits<8> funct8, string opcodestr>
- : RVInstIBase<0b010, OPC_OP_IMM_32, (outs GPR:$rd), (ins uimm8:$uimm8),
- opcodestr, "$rd, $uimm8"> {
+class RVPLoadImm8<bits<8> funct8, string opcodestr>
+ : RVInst<(outs GPR:$rd), (ins uimm8:$uimm8), opcodestr, "$rd, $uimm8", [],
+ InstFormatOther> {
bits<8> uimm8;
+ bits<5> rd;
let Inst{31-24} = funct8;
let Inst{23-16} = uimm8;
let Inst{15} = 0b0;
+ let Inst{14-12} = 0b010;
+ let Inst{11-7} = rd;
+ let Inst{6-0} = OPC_OP_IMM_32.Value;
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class RVPUnary<bits<3> f, string opcodestr, dag operands, string argstr>
: RVInstIBase<0b010, OPC_OP_IMM_32, (outs GPR:$rd), operands, opcodestr, argstr> {
- bits<5> imm;
- bits<5> rs1;
-
let Inst{31} = 0b1;
let Inst{30-28} = f;
let Inst{27} = 0b0;
- let Inst{19-15} = rs1;
}
class RVPUnaryImm5<bits<3> f, string opcodestr>
: RVPUnary<f, opcodestr, (ins GPR:$rs1, uimm5:$uimm5), "$rd, $rs1, $uimm5"> {
bits<5> uimm5;
- let imm = uimm5;
let Inst{26-25} = 0b01;
let Inst{24-20} = uimm5;
}
@@ -145,11 +148,11 @@ def PSSLAI_W : RVPUnaryImm5<0b101, "psslai.w">;
} // Predicates = [HasStdExtP, IsRV64]
let Predicates = [HasStdExtP] in
-def PLI_H : RVPUnaryImm10<0b1011000, "pli.h">;
+def PLI_H : RVPLoadImm10<0b1011000, "pli.h">;
let Predicates = [HasStdExtP, IsRV64] in
-def PLI_W : RVPUnaryImm10<0b1011001, "pli.w">;
+def PLI_W : RVPLoadImm10<0b1011001, "pli.w">;
let Predicates = [HasStdExtP] in
-def PLI_B : RVPUnaryImm8<0b10110100, "pli.b">;
+def PLI_B : RVPLoadImm8<0b10110100, "pli.b">;
let Predicates = [HasStdExtP] in {
def PSEXT_H_B : RVPUnaryWUF<0b00, 0b00100, "psext.h.b">;
@@ -162,6 +165,6 @@ def PSEXT_W_H : RVPUnaryWUF<0b01, 0b00101, "psext.w.h">;
} // Predicates = [HasStdExtP, IsRV64]
let Predicates = [HasStdExtP] in
-def PLUI_H : RVPUnaryImm10<0b1111000, "plui.h", simm10_unsigned>;
+def PLUI_H : RVPLoadImm10<0b1111000, "plui.h", simm10_unsigned>;
let Predicates = [HasStdExtP, IsRV64] in
-def PLUI_W : RVPUnaryImm10<0b1111001, "plui.w", simm10_unsigned>;
+def PLUI_W : RVPLoadImm10<0b1111001, "plui.w", simm10_unsigned>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
index 5d13a87..33c7138 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
@@ -1642,7 +1642,7 @@ def VFIRST_M : RVInstV<0b010000, 0b10001, OPMVV, (outs GPR:$vd),
def : MnemonicAlias<"vpopc.m", "vcpop.m">;
-let Constraints = "@earlyclobber $vd", RVVConstraint = Iota, ElementsDependOn = EltDepsVLMask in {
+let Constraints = "@earlyclobber $vd", RVVConstraint = Iota, ElementsDependOn = EltDepsMask in {
let DestEEW = EEW1 in {
// vmsbf.m set-before-first mask bit
@@ -1655,7 +1655,7 @@ defm VMSOF_M : VMSFS_MV_V<"vmsof.m", 0b010100, 0b00010>;
// Vector Iota Instruction
defm VIOTA_M : VIOTA_MV_V<"viota.m", 0b010100, 0b10000>;
-} // Constraints = "@earlyclobber $vd", RVVConstraint = Iota, ElementsDependOn = EltDepsVLMask
+} // Constraints = "@earlyclobber $vd", RVVConstraint = Iota, ElementsDependOn = EltDepsMask
// Vector Element Index Instruction
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
index c7cb6e2..f391300 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
@@ -1377,9 +1377,9 @@ let Predicates = [HasVendorXqciac, IsRV32] in {
def : Pat<(i32 (add GPRNoX0:$rd, (mul GPRNoX0:$rs1, simm12:$imm12))),
(QC_MULIADD GPRNoX0:$rd, GPRNoX0:$rs1, simm12:$imm12)>;
def : Pat<(i32 (add_like_non_imm12 (shl GPRNoX0:$rs1, uimm5gt3:$imm), GPRNoX0:$rs2)),
- (QC_SHLADD GPRNoX0:$rs2, GPRNoX0:$rs1, uimm5gt3:$imm)>;
+ (QC_SHLADD GPRNoX0:$rs1, GPRNoX0:$rs2, uimm5gt3:$imm)>;
def : Pat<(i32 (riscv_shl_add GPRNoX0:$rs1, uimm5gt3:$imm, GPRNoX0:$rs2)),
- (QC_SHLADD GPRNoX0:$rs2, GPRNoX0:$rs1, uimm5gt3:$imm)>;
+ (QC_SHLADD GPRNoX0:$rs1, GPRNoX0:$rs2, uimm5gt3:$imm)>;
} // Predicates = [HasVendorXqciac, IsRV32]
/// Simple arithmetic operations
diff --git a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
index 38cc0ce..25817b6 100644
--- a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
@@ -102,6 +102,87 @@ static bool isMultipleOfN(const Value *V, const DataLayout &DL, unsigned N) {
return false;
}
+/// Do the common operand retrieval and validition required by the
+/// routines below.
+static bool getMemOperands(unsigned Factor, VectorType *VTy, Type *XLenTy,
+ Instruction *I, Value *&Ptr, Value *&Mask,
+ Value *&VL, Align &Alignment) {
+
+ IRBuilder<> Builder(I);
+ const DataLayout &DL = I->getDataLayout();
+ ElementCount EC = VTy->getElementCount();
+ if (auto *LI = dyn_cast<LoadInst>(I)) {
+ assert(LI->isSimple());
+ Ptr = LI->getPointerOperand();
+ Alignment = LI->getAlign();
+ assert(!Mask && "Unexpected mask on a load");
+ Mask = Builder.getAllOnesMask(EC);
+ VL = isa<FixedVectorType>(VTy) ? Builder.CreateElementCount(XLenTy, EC)
+ : Constant::getAllOnesValue(XLenTy);
+ return true;
+ }
+ if (auto *SI = dyn_cast<StoreInst>(I)) {
+ assert(SI->isSimple());
+ Ptr = SI->getPointerOperand();
+ Alignment = SI->getAlign();
+ assert(!Mask && "Unexpected mask on a store");
+ Mask = Builder.getAllOnesMask(EC);
+ VL = isa<FixedVectorType>(VTy) ? Builder.CreateElementCount(XLenTy, EC)
+ : Constant::getAllOnesValue(XLenTy);
+ return true;
+ }
+
+ auto *II = cast<IntrinsicInst>(I);
+ switch (II->getIntrinsicID()) {
+ default:
+ llvm_unreachable("Unsupported intrinsic type");
+ case Intrinsic::vp_load:
+ case Intrinsic::vp_store: {
+ auto *VPLdSt = cast<VPIntrinsic>(I);
+ Ptr = VPLdSt->getMemoryPointerParam();
+ Alignment = VPLdSt->getPointerAlignment().value_or(
+ DL.getABITypeAlign(VTy->getElementType()));
+
+ assert(Mask && "vp.load and vp.store needs a mask!");
+
+ Value *WideEVL = VPLdSt->getVectorLengthParam();
+ // Conservatively check if EVL is a multiple of factor, otherwise some
+ // (trailing) elements might be lost after the transformation.
+ if (!isMultipleOfN(WideEVL, I->getDataLayout(), Factor))
+ return false;
+
+ auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor);
+ VL = Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy);
+ return true;
+ }
+ case Intrinsic::masked_load: {
+ Ptr = II->getOperand(0);
+ Alignment = cast<ConstantInt>(II->getArgOperand(1))->getAlignValue();
+
+ if (!isa<UndefValue>(II->getOperand(3)))
+ return false;
+
+ assert(Mask && "masked.load needs a mask!");
+
+ VL = isa<FixedVectorType>(VTy)
+ ? Builder.CreateElementCount(XLenTy, VTy->getElementCount())
+ : Constant::getAllOnesValue(XLenTy);
+ return true;
+ }
+ case Intrinsic::masked_store: {
+ Ptr = II->getOperand(1);
+ Alignment = cast<ConstantInt>(II->getArgOperand(2))->getAlignValue();
+
+ assert(Mask && "masked.store needs a mask!");
+
+ VL = isa<FixedVectorType>(VTy)
+ ? Builder.CreateElementCount(XLenTy, VTy->getElementCount())
+ : Constant::getAllOnesValue(XLenTy);
+ return true;
+ }
+ }
+}
+
/// Lower an interleaved load into a vlsegN intrinsic.
///
/// E.g. Lower an interleaved load (Factor = 2):
@@ -127,32 +208,8 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
Value *Ptr, *VL;
Align Alignment;
- if (auto *LI = dyn_cast<LoadInst>(Load)) {
- assert(LI->isSimple());
- Ptr = LI->getPointerOperand();
- Alignment = LI->getAlign();
- assert(!Mask && "Unexpected mask on a load\n");
- Mask = Builder.getAllOnesMask(VTy->getElementCount());
- VL = Builder.CreateElementCount(XLenTy, VTy->getElementCount());
- } else {
- auto *VPLoad = cast<VPIntrinsic>(Load);
- assert(VPLoad->getIntrinsicID() == Intrinsic::vp_load &&
- "Unexpected intrinsic");
- Ptr = VPLoad->getMemoryPointerParam();
- Alignment = VPLoad->getPointerAlignment().value_or(
- DL.getABITypeAlign(VTy->getElementType()));
-
- assert(Mask && "vp.load needs a mask!");
-
- Value *WideEVL = VPLoad->getVectorLengthParam();
- // Conservatively check if EVL is a multiple of factor, otherwise some
- // (trailing) elements might be lost after the transformation.
- if (!isMultipleOfN(WideEVL, DL, Factor))
- return false;
-
- auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor);
- VL = Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy);
- }
+ if (!getMemOperands(Factor, VTy, XLenTy, Load, Ptr, Mask, VL, Alignment))
+ return false;
Type *PtrTy = Ptr->getType();
unsigned AS = PtrTy->getPointerAddressSpace();
@@ -296,34 +353,8 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(
Value *Ptr, *VL;
Align Alignment;
- if (auto *LI = dyn_cast<LoadInst>(Load)) {
- assert(LI->isSimple());
- Ptr = LI->getPointerOperand();
- Alignment = LI->getAlign();
- assert(!Mask && "Unexpected mask on a load\n");
- Mask = Builder.getAllOnesMask(ResVTy->getElementCount());
- VL = isa<FixedVectorType>(ResVTy)
- ? Builder.CreateElementCount(XLenTy, ResVTy->getElementCount())
- : Constant::getAllOnesValue(XLenTy);
- } else {
- auto *VPLoad = cast<VPIntrinsic>(Load);
- assert(VPLoad->getIntrinsicID() == Intrinsic::vp_load &&
- "Unexpected intrinsic");
- Ptr = VPLoad->getMemoryPointerParam();
- Alignment = VPLoad->getPointerAlignment().value_or(
- DL.getABITypeAlign(ResVTy->getElementType()));
-
- assert(Mask && "vp.load needs a mask!");
-
- Value *WideEVL = VPLoad->getVectorLengthParam();
- // Conservatively check if EVL is a multiple of factor, otherwise some
- // (trailing) elements might be lost after the transformation.
- if (!isMultipleOfN(WideEVL, Load->getDataLayout(), Factor))
- return false;
-
- auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor);
- VL = Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy);
- }
+ if (!getMemOperands(Factor, ResVTy, XLenTy, Load, Ptr, Mask, VL, Alignment))
+ return false;
Type *PtrTy = Ptr->getType();
unsigned AS = PtrTy->getPointerAddressSpace();
@@ -385,34 +416,8 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(
Value *Ptr, *VL;
Align Alignment;
- if (auto *SI = dyn_cast<StoreInst>(Store)) {
- assert(SI->isSimple());
- Ptr = SI->getPointerOperand();
- Alignment = SI->getAlign();
- assert(!Mask && "Unexpected mask on a store");
- Mask = Builder.getAllOnesMask(InVTy->getElementCount());
- VL = isa<FixedVectorType>(InVTy)
- ? Builder.CreateElementCount(XLenTy, InVTy->getElementCount())
- : Constant::getAllOnesValue(XLenTy);
- } else {
- auto *VPStore = cast<VPIntrinsic>(Store);
- assert(VPStore->getIntrinsicID() == Intrinsic::vp_store &&
- "Unexpected intrinsic");
- Ptr = VPStore->getMemoryPointerParam();
- Alignment = VPStore->getPointerAlignment().value_or(
- DL.getABITypeAlign(InVTy->getElementType()));
-
- assert(Mask && "vp.store needs a mask!");
-
- Value *WideEVL = VPStore->getVectorLengthParam();
- // Conservatively check if EVL is a multiple of factor, otherwise some
- // (trailing) elements might be lost after the transformation.
- if (!isMultipleOfN(WideEVL, DL, Factor))
- return false;
-
- auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor);
- VL = Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy);
- }
+ if (!getMemOperands(Factor, InVTy, XLenTy, Store, Ptr, Mask, VL, Alignment))
+ return false;
Type *PtrTy = Ptr->getType();
unsigned AS = Ptr->getType()->getPointerAddressSpace();
if (!isLegalInterleavedAccessType(InVTy, Factor, Alignment, AS, DL))
diff --git a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
index 28d6403..3b19c34 100644
--- a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
+++ b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
@@ -48,6 +48,8 @@ using namespace llvm;
STATISTIC(NumRemovedSExtW, "Number of removed sign-extensions");
STATISTIC(NumTransformedToWInstrs,
"Number of instructions transformed to W-ops");
+STATISTIC(NumTransformedToNonWInstrs,
+ "Number of instructions transformed to non-W-ops");
static cl::opt<bool> DisableSExtWRemoval("riscv-disable-sextw-removal",
cl::desc("Disable removal of sext.w"),
@@ -67,10 +69,9 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
bool removeSExtWInstrs(MachineFunction &MF, const RISCVInstrInfo &TII,
const RISCVSubtarget &ST, MachineRegisterInfo &MRI);
- bool stripWSuffixes(MachineFunction &MF, const RISCVInstrInfo &TII,
- const RISCVSubtarget &ST, MachineRegisterInfo &MRI);
- bool appendWSuffixes(MachineFunction &MF, const RISCVInstrInfo &TII,
- const RISCVSubtarget &ST, MachineRegisterInfo &MRI);
+ bool canonicalizeWSuffixes(MachineFunction &MF, const RISCVInstrInfo &TII,
+ const RISCVSubtarget &ST,
+ MachineRegisterInfo &MRI);
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
@@ -721,45 +722,39 @@ bool RISCVOptWInstrs::removeSExtWInstrs(MachineFunction &MF,
return MadeChange;
}
-bool RISCVOptWInstrs::stripWSuffixes(MachineFunction &MF,
- const RISCVInstrInfo &TII,
- const RISCVSubtarget &ST,
- MachineRegisterInfo &MRI) {
+// Strips or adds W suffixes to eligible instructions depending on the
+// subtarget preferences.
+bool RISCVOptWInstrs::canonicalizeWSuffixes(MachineFunction &MF,
+ const RISCVInstrInfo &TII,
+ const RISCVSubtarget &ST,
+ MachineRegisterInfo &MRI) {
+ bool ShouldStripW = !(DisableStripWSuffix || ST.preferWInst());
+ bool ShouldPreferW = ST.preferWInst();
bool MadeChange = false;
- for (MachineBasicBlock &MBB : MF) {
- for (MachineInstr &MI : MBB) {
- unsigned Opc;
- switch (MI.getOpcode()) {
- default:
- continue;
- case RISCV::ADDW: Opc = RISCV::ADD; break;
- case RISCV::ADDIW: Opc = RISCV::ADDI; break;
- case RISCV::MULW: Opc = RISCV::MUL; break;
- case RISCV::SLLIW: Opc = RISCV::SLLI; break;
- }
- if (hasAllWUsers(MI, ST, MRI)) {
- MI.setDesc(TII.get(Opc));
- MadeChange = true;
- }
- }
- }
-
- return MadeChange;
-}
-
-bool RISCVOptWInstrs::appendWSuffixes(MachineFunction &MF,
- const RISCVInstrInfo &TII,
- const RISCVSubtarget &ST,
- MachineRegisterInfo &MRI) {
- bool MadeChange = false;
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
- unsigned WOpc;
- // TODO: Add more?
- switch (MI.getOpcode()) {
+ std::optional<unsigned> WOpc;
+ std::optional<unsigned> NonWOpc;
+ unsigned OrigOpc = MI.getOpcode();
+ switch (OrigOpc) {
default:
continue;
+ case RISCV::ADDW:
+ NonWOpc = RISCV::ADD;
+ break;
+ case RISCV::ADDIW:
+ NonWOpc = RISCV::ADDI;
+ break;
+ case RISCV::MULW:
+ NonWOpc = RISCV::MUL;
+ break;
+ case RISCV::SLLIW:
+ NonWOpc = RISCV::SLLI;
+ break;
+ case RISCV::SUBW:
+ NonWOpc = RISCV::SUB;
+ break;
case RISCV::ADD:
WOpc = RISCV::ADDW;
break;
@@ -773,7 +768,7 @@ bool RISCVOptWInstrs::appendWSuffixes(MachineFunction &MF,
WOpc = RISCV::MULW;
break;
case RISCV::SLLI:
- // SLLIW reads the lowest 5 bits, while SLLI reads lowest 6 bits
+ // SLLIW reads the lowest 5 bits, while SLLI reads lowest 6 bits.
if (MI.getOperand(2).getImm() >= 32)
continue;
WOpc = RISCV::SLLIW;
@@ -784,19 +779,30 @@ bool RISCVOptWInstrs::appendWSuffixes(MachineFunction &MF,
break;
}
- if (hasAllWUsers(MI, ST, MRI)) {
+ if (ShouldStripW && NonWOpc.has_value() && hasAllWUsers(MI, ST, MRI)) {
+ LLVM_DEBUG(dbgs() << "Replacing " << MI);
+ MI.setDesc(TII.get(NonWOpc.value()));
+ LLVM_DEBUG(dbgs() << " with " << MI);
+ ++NumTransformedToNonWInstrs;
+ MadeChange = true;
+ continue;
+ }
+ // LWU is always converted to LW when possible as 1) LW is compressible
+ // and 2) it helps minimise differences vs RV32.
+ if ((ShouldPreferW || OrigOpc == RISCV::LWU) && WOpc.has_value() &&
+ hasAllWUsers(MI, ST, MRI)) {
LLVM_DEBUG(dbgs() << "Replacing " << MI);
- MI.setDesc(TII.get(WOpc));
+ MI.setDesc(TII.get(WOpc.value()));
MI.clearFlag(MachineInstr::MIFlag::NoSWrap);
MI.clearFlag(MachineInstr::MIFlag::NoUWrap);
MI.clearFlag(MachineInstr::MIFlag::IsExact);
LLVM_DEBUG(dbgs() << " with " << MI);
++NumTransformedToWInstrs;
MadeChange = true;
+ continue;
}
}
}
-
return MadeChange;
}
@@ -813,12 +819,6 @@ bool RISCVOptWInstrs::runOnMachineFunction(MachineFunction &MF) {
bool MadeChange = false;
MadeChange |= removeSExtWInstrs(MF, TII, ST, MRI);
-
- if (!(DisableStripWSuffix || ST.preferWInst()))
- MadeChange |= stripWSuffixes(MF, TII, ST, MRI);
-
- if (ST.preferWInst())
- MadeChange |= appendWSuffixes(MF, TII, ST, MRI);
-
+ MadeChange |= canonicalizeWSuffixes(MF, TII, ST, MRI);
return MadeChange;
}
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index c754de4..e35ffaf 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -216,7 +216,7 @@ unsigned RISCVSubtarget::getMinimumJumpTableEntries() const {
}
void RISCVSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
- unsigned NumRegionInstrs) const {
+ const SchedRegion &Region) const {
// Do bidirectional scheduling since it provides a more balanced scheduling
// leading to better performance. This will increase compile time.
Policy.OnlyTopDown = false;
@@ -231,8 +231,8 @@ void RISCVSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
Policy.ShouldTrackPressure = true;
}
-void RISCVSubtarget::overridePostRASchedPolicy(MachineSchedPolicy &Policy,
- unsigned NumRegionInstrs) const {
+void RISCVSubtarget::overridePostRASchedPolicy(
+ MachineSchedPolicy &Policy, const SchedRegion &Region) const {
MISched::Direction PostRASchedDirection = getPostRASchedDirection();
if (PostRASchedDirection == MISched::TopDown) {
Policy.OnlyTopDown = true;
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 4f560cc..fd57e02 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -395,11 +395,11 @@ public:
}
void overrideSchedPolicy(MachineSchedPolicy &Policy,
- unsigned NumRegionInstrs) const override;
+ const SchedRegion &Region) const override;
void overridePostRASchedPolicy(MachineSchedPolicy &Policy,
- unsigned NumRegionInstrs) const override;
+ const SchedRegion &Region) const override;
};
-} // End llvm namespace
+} // namespace llvm
#endif
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index e656e8b..b53d919 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -33,6 +33,7 @@ namespace {
class RISCVVLOptimizer : public MachineFunctionPass {
const MachineRegisterInfo *MRI;
const MachineDominatorTree *MDT;
+ const TargetInstrInfo *TII;
public:
static char ID;
@@ -1291,7 +1292,8 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
return false;
}
- assert(!RISCVII::elementsDependOnVL(RISCV::getRVVMCOpcode(MI.getOpcode())) &&
+ assert(!RISCVII::elementsDependOnVL(
+ TII->get(RISCV::getRVVMCOpcode(MI.getOpcode())).TSFlags) &&
"Instruction shouldn't be supported if elements depend on VL");
assert(MI.getOperand(0).isReg() &&
@@ -1484,7 +1486,6 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const {
}
bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) {
- assert(DemandedVLs.size() == 0);
if (skipFunction(MF.getFunction()))
return false;
@@ -1495,6 +1496,10 @@ bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (!ST.hasVInstructions())
return false;
+ TII = ST.getInstrInfo();
+
+ assert(DemandedVLs.empty());
+
// For each instruction that defines a vector, compute what VL its
// downstream users demand.
for (MachineBasicBlock *MBB : post_order(&MF)) {
diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
index 84ef539..c1cc19b 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
@@ -434,6 +434,15 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) {
if (!isKnownSameDefs(TrueMask.getReg(), MIMask.getReg()))
return false;
+ // Masked off lanes past TrueVL will come from False, and converting to vmv
+ // will lose these lanes unless MIVL <= TrueVL.
+ // TODO: We could relax this for False == Passthru and True policy == TU
+ const MachineOperand &MIVL = MI.getOperand(RISCVII::getVLOpNum(MI.getDesc()));
+ const MachineOperand &TrueVL =
+ True->getOperand(RISCVII::getVLOpNum(True->getDesc()));
+ if (!RISCV::isVLKnownLE(MIVL, TrueVL))
+ return false;
+
// True's passthru needs to be equivalent to False
Register TruePassthruReg = True->getOperand(1).getReg();
Register FalseReg = MI.getOperand(2).getReg();