aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/RISCV
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/RISCV')
-rw-r--r--llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp58
-rw-r--r--llvm/lib/Target/RISCV/CMakeLists.txt1
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp44
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp7
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h14
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h1
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp2
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp29
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h2
-rw-r--r--llvm/lib/Target/RISCV/RISCV.h15
-rw-r--r--llvm/lib/Target/RISCV/RISCV.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp88
-rw-r--r--llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp3
-rw-r--r--llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp9
-rw-r--r--llvm/lib/Target/RISCV/RISCVFeatures.td172
-rw-r--r--llvm/lib/Target/RISCV/RISCVFrameLowering.cpp53
-rw-r--r--llvm/lib/Target/RISCV/RISCVFrameLowering.h12
-rw-r--r--llvm/lib/Target/RISCV/RISCVGISel.td6
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp103
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp730
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.h24
-rw-r--r--llvm/lib/Target/RISCV/RISCVIndirectBranchTracking.cpp34
-rw-r--r--llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp123
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp146
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.h9
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.td9
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoD.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoF.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoP.td246
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td38
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoV.td6
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td30
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td8
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td8
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td4
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td9
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td24
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td37
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZb.td46
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td17
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td18
-rw-r--r--llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp230
-rw-r--r--llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp38
-rw-r--r--llvm/lib/Target/RISCV/RISCVPassRegistry.def20
-rw-r--r--llvm/lib/Target/RISCV/RISCVProcessors.td11
-rw-r--r--llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp69
-rw-r--r--llvm/lib/Target/RISCV/RISCVRegisterInfo.h10
-rw-r--r--llvm/lib/Target/RISCV/RISCVRegisterInfo.td13
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedSiFive7.td12
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td459
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td720
-rw-r--r--llvm/lib/Target/RISCV/RISCVScheduleV.td8
-rw-r--r--llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp19
-rw-r--r--llvm/lib/Target/RISCV/RISCVSubtarget.cpp14
-rw-r--r--llvm/lib/Target/RISCV/RISCVSubtarget.h18
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetMachine.cpp20
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp177
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h33
-rw-r--r--llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp95
-rw-r--r--llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp86
-rw-r--r--llvm/lib/Target/RISCV/RISCVZilsdOptimizer.cpp527
63 files changed, 3884 insertions, 890 deletions
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index edde7ac..9bb3724 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -222,7 +222,6 @@ class RISCVAsmParser : public MCTargetAsmParser {
ParseStatus parseRegReg(OperandVector &Operands);
ParseStatus parseXSfmmVType(OperandVector &Operands);
- ParseStatus parseRetval(OperandVector &Operands);
ParseStatus parseZcmpStackAdj(OperandVector &Operands,
bool ExpectNegative = false);
ParseStatus parseZcmpNegStackAdj(OperandVector &Operands) {
@@ -352,7 +351,7 @@ struct RISCVOperand final : public MCParsedAsmOperand {
} Kind;
struct RegOp {
- MCRegister RegNum;
+ MCRegister Reg;
bool IsGPRAsFPR;
};
@@ -461,20 +460,18 @@ public:
bool isReg() const override { return Kind == KindTy::Register; }
bool isExpr() const { return Kind == KindTy::Expression; }
bool isV0Reg() const {
- return Kind == KindTy::Register && Reg.RegNum == RISCV::V0;
+ return Kind == KindTy::Register && Reg.Reg == RISCV::V0;
}
bool isAnyReg() const {
return Kind == KindTy::Register &&
- (RISCVMCRegisterClasses[RISCV::GPRRegClassID].contains(Reg.RegNum) ||
- RISCVMCRegisterClasses[RISCV::FPR64RegClassID].contains(Reg.RegNum) ||
- RISCVMCRegisterClasses[RISCV::VRRegClassID].contains(Reg.RegNum));
+ (RISCVMCRegisterClasses[RISCV::GPRRegClassID].contains(Reg.Reg) ||
+ RISCVMCRegisterClasses[RISCV::FPR64RegClassID].contains(Reg.Reg) ||
+ RISCVMCRegisterClasses[RISCV::VRRegClassID].contains(Reg.Reg));
}
bool isAnyRegC() const {
return Kind == KindTy::Register &&
- (RISCVMCRegisterClasses[RISCV::GPRCRegClassID].contains(
- Reg.RegNum) ||
- RISCVMCRegisterClasses[RISCV::FPR64CRegClassID].contains(
- Reg.RegNum));
+ (RISCVMCRegisterClasses[RISCV::GPRCRegClassID].contains(Reg.Reg) ||
+ RISCVMCRegisterClasses[RISCV::FPR64CRegClassID].contains(Reg.Reg));
}
bool isImm() const override { return isExpr(); }
bool isMem() const override { return false; }
@@ -488,35 +485,33 @@ public:
bool isGPR() const {
return Kind == KindTy::Register &&
- RISCVMCRegisterClasses[RISCV::GPRRegClassID].contains(Reg.RegNum);
+ RISCVMCRegisterClasses[RISCV::GPRRegClassID].contains(Reg.Reg);
}
bool isGPRPair() const {
return Kind == KindTy::Register &&
- RISCVMCRegisterClasses[RISCV::GPRPairRegClassID].contains(
- Reg.RegNum);
+ RISCVMCRegisterClasses[RISCV::GPRPairRegClassID].contains(Reg.Reg);
}
bool isGPRPairC() const {
return Kind == KindTy::Register &&
- RISCVMCRegisterClasses[RISCV::GPRPairCRegClassID].contains(
- Reg.RegNum);
+ RISCVMCRegisterClasses[RISCV::GPRPairCRegClassID].contains(Reg.Reg);
}
bool isGPRPairNoX0() const {
return Kind == KindTy::Register &&
RISCVMCRegisterClasses[RISCV::GPRPairNoX0RegClassID].contains(
- Reg.RegNum);
+ Reg.Reg);
}
bool isGPRF16() const {
return Kind == KindTy::Register &&
- RISCVMCRegisterClasses[RISCV::GPRF16RegClassID].contains(Reg.RegNum);
+ RISCVMCRegisterClasses[RISCV::GPRF16RegClassID].contains(Reg.Reg);
}
bool isGPRF32() const {
return Kind == KindTy::Register &&
- RISCVMCRegisterClasses[RISCV::GPRF32RegClassID].contains(Reg.RegNum);
+ RISCVMCRegisterClasses[RISCV::GPRF32RegClassID].contains(Reg.Reg);
}
bool isGPRAsFPR() const { return isGPR() && Reg.IsGPRAsFPR; }
@@ -991,7 +986,7 @@ public:
MCRegister getReg() const override {
assert(Kind == KindTy::Register && "Invalid type access!");
- return Reg.RegNum;
+ return Reg.Reg;
}
StringRef getSysReg() const {
@@ -1047,7 +1042,7 @@ public:
OS << "<fpimm: " << FPImm.Val << ">";
break;
case KindTy::Register:
- OS << "<reg: " << RegName(Reg.RegNum) << " (" << Reg.RegNum
+ OS << "<reg: " << RegName(Reg.Reg) << " (" << Reg.Reg.id()
<< (Reg.IsGPRAsFPR ? ") GPRasFPR>" : ")>");
break;
case KindTy::Token:
@@ -1099,7 +1094,7 @@ public:
static std::unique_ptr<RISCVOperand>
createReg(MCRegister Reg, SMLoc S, SMLoc E, bool IsGPRAsFPR = false) {
auto Op = std::make_unique<RISCVOperand>(KindTy::Register);
- Op->Reg.RegNum = Reg;
+ Op->Reg.Reg = Reg;
Op->Reg.IsGPRAsFPR = IsGPRAsFPR;
Op->StartLoc = S;
Op->EndLoc = E;
@@ -1335,28 +1330,28 @@ unsigned RISCVAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
bool IsRegVR = RISCVMCRegisterClasses[RISCV::VRRegClassID].contains(Reg);
if (IsRegFPR64 && Kind == MCK_FPR128) {
- Op.Reg.RegNum = convertFPR64ToFPR128(Reg);
+ Op.Reg.Reg = convertFPR64ToFPR128(Reg);
return Match_Success;
}
// As the parser couldn't differentiate an FPR32 from an FPR64, coerce the
// register from FPR64 to FPR32 or FPR64C to FPR32C if necessary.
if ((IsRegFPR64 && Kind == MCK_FPR32) ||
(IsRegFPR64C && Kind == MCK_FPR32C)) {
- Op.Reg.RegNum = convertFPR64ToFPR32(Reg);
+ Op.Reg.Reg = convertFPR64ToFPR32(Reg);
return Match_Success;
}
// As the parser couldn't differentiate an FPR16 from an FPR64, coerce the
// register from FPR64 to FPR16 if necessary.
if (IsRegFPR64 && Kind == MCK_FPR16) {
- Op.Reg.RegNum = convertFPR64ToFPR16(Reg);
+ Op.Reg.Reg = convertFPR64ToFPR16(Reg);
return Match_Success;
}
if (Kind == MCK_GPRAsFPR16 && Op.isGPRAsFPR()) {
- Op.Reg.RegNum = Reg - RISCV::X0 + RISCV::X0_H;
+ Op.Reg.Reg = Reg - RISCV::X0 + RISCV::X0_H;
return Match_Success;
}
if (Kind == MCK_GPRAsFPR32 && Op.isGPRAsFPR()) {
- Op.Reg.RegNum = Reg - RISCV::X0 + RISCV::X0_W;
+ Op.Reg.Reg = Reg - RISCV::X0 + RISCV::X0_W;
return Match_Success;
}
@@ -1372,8 +1367,8 @@ unsigned RISCVAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
// As the parser couldn't differentiate an VRM2/VRM4/VRM8 from an VR, coerce
// the register from VR to VRM2/VRM4/VRM8 if necessary.
if (IsRegVR && (Kind == MCK_VRM2 || Kind == MCK_VRM4 || Kind == MCK_VRM8)) {
- Op.Reg.RegNum = convertVRToVRMx(*getContext().getRegisterInfo(), Reg, Kind);
- if (!Op.Reg.RegNum)
+ Op.Reg.Reg = convertVRToVRMx(*getContext().getRegisterInfo(), Reg, Kind);
+ if (!Op.Reg.Reg)
return Match_InvalidOperand;
return Match_Success;
}
@@ -1659,10 +1654,6 @@ bool RISCVAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return generateImmOutOfRangeError(
Operands, ErrorInfo, -1, (1 << 5) - 1,
"immediate must be non-zero in the range");
- case Match_InvalidXSfmmVType: {
- SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
- return generateXSfmmVTypeError(ErrorLoc);
- }
case Match_InvalidVTypeI: {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
return generateVTypeError(ErrorLoc);
@@ -4091,6 +4082,9 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
return false;
}
+ case RISCV::PseudoCV_ELW:
+ emitLoadStoreSymbol(Inst, RISCV::CV_ELW, IDLoc, Out, /*HasTmpReg=*/false);
+ return false;
}
emitToStreamer(Out, Inst);
diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt
index e9088a4..f8cf71e 100644
--- a/llvm/lib/Target/RISCV/CMakeLists.txt
+++ b/llvm/lib/Target/RISCV/CMakeLists.txt
@@ -72,6 +72,7 @@ add_llvm_target(RISCVCodeGen
RISCVVLOptimizer.cpp
RISCVVMV0Elimination.cpp
RISCVZacasABIFix.cpp
+ RISCVZilsdOptimizer.cpp
GISel/RISCVCallLowering.cpp
GISel/RISCVInstructionSelector.cpp
GISel/RISCVLegalizerInfo.cpp
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
index 3d5a55c..4f2e633 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
@@ -99,6 +99,7 @@ private:
LLT *IndexVT = nullptr) const;
bool selectIntrinsicWithSideEffects(MachineInstr &I,
MachineIRBuilder &MIB) const;
+ bool selectExtractSubvector(MachineInstr &MI, MachineIRBuilder &MIB) const;
ComplexRendererFns selectShiftMask(MachineOperand &Root,
unsigned ShiftWidth) const;
@@ -967,6 +968,45 @@ bool RISCVInstructionSelector::selectIntrinsicWithSideEffects(
}
}
+bool RISCVInstructionSelector::selectExtractSubvector(
+ MachineInstr &MI, MachineIRBuilder &MIB) const {
+ assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_SUBVECTOR);
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+
+ LLT DstTy = MRI->getType(DstReg);
+ LLT SrcTy = MRI->getType(SrcReg);
+
+ unsigned Idx = static_cast<unsigned>(MI.getOperand(2).getImm());
+
+ MVT DstMVT = getMVTForLLT(DstTy);
+ MVT SrcMVT = getMVTForLLT(SrcTy);
+
+ unsigned SubRegIdx;
+ std::tie(SubRegIdx, Idx) =
+ RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
+ SrcMVT, DstMVT, Idx, &TRI);
+
+ if (Idx != 0)
+ return false;
+
+ unsigned DstRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(DstMVT);
+ const TargetRegisterClass *DstRC = TRI.getRegClass(DstRegClassID);
+ if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
+ return false;
+
+ unsigned SrcRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(SrcMVT);
+ const TargetRegisterClass *SrcRC = TRI.getRegClass(SrcRegClassID);
+ if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
+ return false;
+
+ MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(SrcReg, 0, SubRegIdx);
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool RISCVInstructionSelector::select(MachineInstr &MI) {
MachineIRBuilder MIB(MI);
@@ -1239,6 +1279,8 @@ bool RISCVInstructionSelector::select(MachineInstr &MI) {
}
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
return selectIntrinsicWithSideEffects(MI, MIB);
+ case TargetOpcode::G_EXTRACT_SUBVECTOR:
+ return selectExtractSubvector(MI, MIB);
default:
return false;
}
@@ -1569,7 +1611,7 @@ bool RISCVInstructionSelector::selectAddr(MachineInstr &MI,
switch (TM.getCodeModel()) {
default: {
- reportGISelFailure(*MF, *TPC, *MORE, getName(),
+ reportGISelFailure(*MF, *MORE, getName(),
"Unsupported code model for lowering", MI);
return false;
}
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index b1794b7..2cc594a 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -238,7 +238,10 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
.clampScalar(0, sXLen, sXLen)
.scalarSameSizeAs(1, 0);
} else {
- CTPOPActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower();
+ CTPOPActions.widenScalarToNextPow2(0, /*Min*/ 8)
+ .clampScalar(0, s8, sXLen)
+ .scalarSameSizeAs(1, 0)
+ .lower();
}
getActionDefinitionsBuilder(G_CONSTANT)
@@ -1208,7 +1211,7 @@ bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
// to place the desired subvector starting at element 0.
const LLT XLenTy(STI.getXLenVT());
auto SlidedownAmt = MIB.buildVScale(XLenTy, RemIdx);
- auto [Mask, VL] = buildDefaultVLOps(LitTy, MIB, MRI);
+ auto [Mask, VL] = buildDefaultVLOps(InterLitTy, MIB, MRI);
uint64_t Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC;
auto Slidedown = MIB.buildInstr(
RISCV::G_VSLIDEDOWN_VL, {InterLitTy},
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index 5b8cfb2..dbf5cfe 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -393,7 +393,6 @@ enum OperandType : unsigned {
OPERAND_UIMM14_LSB00,
OPERAND_UIMM16,
OPERAND_UIMM16_NONZERO,
- OPERAND_UIMM20,
OPERAND_UIMMLOG2XLEN,
OPERAND_UIMMLOG2XLEN_NONZERO,
OPERAND_UIMM32,
@@ -412,13 +411,11 @@ enum OperandType : unsigned {
OPERAND_SIMM10_LSB0000_NONZERO,
OPERAND_SIMM10_UNSIGNED,
OPERAND_SIMM11,
- OPERAND_SIMM12,
OPERAND_SIMM12_LSB00000,
OPERAND_SIMM16,
OPERAND_SIMM16_NONZERO,
OPERAND_SIMM20_LI,
OPERAND_SIMM26,
- OPERAND_BARE_SIMM32,
OPERAND_CLUI_IMM,
OPERAND_VTYPEI10,
OPERAND_VTYPEI11,
@@ -447,6 +444,15 @@ enum OperandType : unsigned {
// Vtype operand for XSfmm extension.
OPERAND_XSFMM_VTYPE,
OPERAND_LAST_RISCV_IMM = OPERAND_XSFMM_VTYPE,
+
+ OPERAND_UIMM20_LUI,
+ OPERAND_UIMM20_AUIPC,
+
+ // Simm12 or constant pool, global, basicblock, etc.
+ OPERAND_SIMM12_LO,
+
+ OPERAND_BARE_SIMM32,
+
// Operand is either a register or uimm5, this is used by V extension pseudo
// instructions to represent a value that be passed as AVL to either vsetvli
// or vsetivli.
@@ -700,7 +706,7 @@ enum RLISTENCODE {
inline unsigned encodeRegList(MCRegister EndReg, bool IsRVE = false) {
assert((!IsRVE || EndReg <= RISCV::X9) && "Invalid Rlist for RV32E");
- switch (EndReg) {
+ switch (EndReg.id()) {
case RISCV::X1:
return RLISTENCODE::RA;
case RISCV::X8:
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h
index 98c8738..a2b75e4 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h
@@ -11,7 +11,6 @@
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCFixup.h"
-#include <utility>
#undef RISCV
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
index 5934c91..fd460e4 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
@@ -725,7 +725,7 @@ unsigned RISCVMCCodeEmitter::getVMaskReg(const MCInst &MI, unsigned OpNo,
MCOperand MO = MI.getOperand(OpNo);
assert(MO.isReg() && "Expected a register.");
- switch (MO.getReg()) {
+ switch (MO.getReg().id()) {
default:
llvm_unreachable("Invalid mask register.");
case RISCV::V0:
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
index 26f434b..cedaa86 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
@@ -79,6 +79,32 @@ static void generateInstSeqImpl(int64_t Val, const MCSubtargetInfo &STI,
}
}
+ if (STI.hasFeature(RISCV::FeatureStdExtP)) {
+ // Check if the immediate is packed i8 or i10
+ int32_t Bit63To32 = Val >> 32;
+ int32_t Bit31To0 = Val;
+ int16_t Bit31To16 = Bit31To0 >> 16;
+ int16_t Bit15To0 = Bit31To0;
+ int8_t Bit15To8 = Bit15To0 >> 8;
+ int8_t Bit7To0 = Bit15To0;
+ if (Bit63To32 == Bit31To0) {
+ if (IsRV64 && isInt<10>(Bit63To32)) {
+ Res.emplace_back(RISCV::PLI_W, Bit63To32);
+ return;
+ }
+ if (Bit31To16 == Bit15To0) {
+ if (isInt<10>(Bit31To16)) {
+ Res.emplace_back(RISCV::PLI_H, Bit31To16);
+ return;
+ }
+ if (Bit15To8 == Bit7To0) {
+ Res.emplace_back(RISCV::PLI_B, Bit15To8);
+ return;
+ }
+ }
+ }
+ }
+
if (isInt<32>(Val)) {
// Depending on the active bits in the immediate Value v, the following
// instruction sequences are emitted:
@@ -562,6 +588,9 @@ OpndKind Inst::getOpndKind() const {
case RISCV::LUI:
case RISCV::QC_LI:
case RISCV::QC_E_LI:
+ case RISCV::PLI_B:
+ case RISCV::PLI_H:
+ case RISCV::PLI_W:
return RISCVMatInt::Imm;
case RISCV::ADD_UW:
return RISCVMatInt::RegX0;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h
index a82cd65..5df8edb 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h
@@ -21,7 +21,7 @@ namespace RISCVMatInt {
enum OpndKind {
RegImm, // ADDI/ADDIW/XORI/SLLI/SRLI/SLLI_UW/RORI/BSETI/BCLRI/TH_SRRI
- Imm, // LUI/QC_LI/QC_E_LI
+ Imm, // LUI/QC_LI/QC_E_LI/PLI_B/PLI_H/PLI_W
RegReg, // SH1ADD/SH2ADD/SH3ADD/PACK
RegX0, // ADD_UW
};
diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h
index 51e8e85..048db20 100644
--- a/llvm/lib/Target/RISCV/RISCV.h
+++ b/llvm/lib/Target/RISCV/RISCV.h
@@ -26,8 +26,16 @@ class RISCVRegisterBankInfo;
class RISCVSubtarget;
class RISCVTargetMachine;
-FunctionPass *createRISCVCodeGenPreparePass();
-void initializeRISCVCodeGenPreparePass(PassRegistry &);
+class RISCVCodeGenPreparePass : public PassInfoMixin<RISCVCodeGenPreparePass> {
+private:
+ const RISCVTargetMachine *TM;
+
+public:
+ RISCVCodeGenPreparePass(const RISCVTargetMachine *TM) : TM(TM) {}
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+};
+FunctionPass *createRISCVCodeGenPrepareLegacyPass();
+void initializeRISCVCodeGenPrepareLegacyPassPass(PassRegistry &);
FunctionPass *createRISCVDeadRegisterDefinitionsPass();
void initializeRISCVDeadRegisterDefinitionsPass(PassRegistry &);
@@ -94,6 +102,9 @@ void initializeRISCVPushPopOptPass(PassRegistry &);
FunctionPass *createRISCVLoadStoreOptPass();
void initializeRISCVLoadStoreOptPass(PassRegistry &);
+FunctionPass *createRISCVPreAllocZilsdOptPass();
+void initializeRISCVPreAllocZilsdOptPass(PassRegistry &);
+
FunctionPass *createRISCVZacasABIFixPass();
void initializeRISCVZacasABIFixPass(PassRegistry &);
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index b24d863..f6f82fd 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -96,6 +96,8 @@ def RISCVAsmWriter : AsmWriter {
int PassSubtarget = 1;
}
+defm : RemapAllTargetPseudoPointerOperands<GPR>;
+
def RISCV : Target {
let InstructionSet = RISCVInstrInfo;
let AssemblyParsers = [RISCVAsmParser];
diff --git a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
index ce34959..1ee4c66 100644
--- a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
+++ b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
@@ -33,20 +33,33 @@ using namespace llvm;
#define PASS_NAME "RISC-V CodeGenPrepare"
namespace {
-
-class RISCVCodeGenPrepare : public FunctionPass,
- public InstVisitor<RISCVCodeGenPrepare, bool> {
+class RISCVCodeGenPrepare : public InstVisitor<RISCVCodeGenPrepare, bool> {
+ Function &F;
const DataLayout *DL;
const DominatorTree *DT;
const RISCVSubtarget *ST;
public:
+ RISCVCodeGenPrepare(Function &F, const DominatorTree *DT,
+ const RISCVSubtarget *ST)
+ : F(F), DL(&F.getDataLayout()), DT(DT), ST(ST) {}
+ bool run();
+ bool visitInstruction(Instruction &I) { return false; }
+ bool visitAnd(BinaryOperator &BO);
+ bool visitIntrinsicInst(IntrinsicInst &I);
+ bool expandVPStrideLoad(IntrinsicInst &I);
+ bool widenVPMerge(IntrinsicInst &I);
+};
+} // namespace
+
+namespace {
+class RISCVCodeGenPrepareLegacyPass : public FunctionPass {
+public:
static char ID;
- RISCVCodeGenPrepare() : FunctionPass(ID) {}
+ RISCVCodeGenPrepareLegacyPass() : FunctionPass(ID) {}
bool runOnFunction(Function &F) override;
-
StringRef getPassName() const override { return PASS_NAME; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -54,15 +67,8 @@ public:
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetPassConfig>();
}
-
- bool visitInstruction(Instruction &I) { return false; }
- bool visitAnd(BinaryOperator &BO);
- bool visitIntrinsicInst(IntrinsicInst &I);
- bool expandVPStrideLoad(IntrinsicInst &I);
- bool widenVPMerge(IntrinsicInst &I);
};
-
-} // end anonymous namespace
+} // namespace
// Try to optimize (i64 (and (zext/sext (i32 X), C1))) if C1 has bit 31 set,
// but bits 63:32 are zero. If we know that bit 31 of X is 0, we can fill
@@ -265,25 +271,17 @@ bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) {
IRBuilder<> Builder(&II);
Type *STy = VTy->getElementType();
Value *Val = Builder.CreateLoad(STy, BasePtr);
- Value *Res = Builder.CreateIntrinsic(Intrinsic::experimental_vp_splat, {VTy},
- {Val, II.getOperand(2), VL});
+ Value *Res = Builder.CreateIntrinsic(
+ Intrinsic::vp_merge, VTy,
+ {II.getOperand(2), Builder.CreateVectorSplat(VTy->getElementCount(), Val),
+ PoisonValue::get(VTy), VL});
II.replaceAllUsesWith(Res);
II.eraseFromParent();
return true;
}
-bool RISCVCodeGenPrepare::runOnFunction(Function &F) {
- if (skipFunction(F))
- return false;
-
- auto &TPC = getAnalysis<TargetPassConfig>();
- auto &TM = TPC.getTM<RISCVTargetMachine>();
- ST = &TM.getSubtarget<RISCVSubtarget>(F);
-
- DL = &F.getDataLayout();
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-
+bool RISCVCodeGenPrepare::run() {
bool MadeChange = false;
for (auto &BB : F)
for (Instruction &I : llvm::make_early_inc_range(BB))
@@ -292,12 +290,40 @@ bool RISCVCodeGenPrepare::runOnFunction(Function &F) {
return MadeChange;
}
-INITIALIZE_PASS_BEGIN(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false)
+bool RISCVCodeGenPrepareLegacyPass::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
+
+ auto &TPC = getAnalysis<TargetPassConfig>();
+ auto &TM = TPC.getTM<RISCVTargetMachine>();
+ auto ST = &TM.getSubtarget<RISCVSubtarget>(F);
+ auto DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+ RISCVCodeGenPrepare RVCGP(F, DT, ST);
+ return RVCGP.run();
+}
+
+INITIALIZE_PASS_BEGIN(RISCVCodeGenPrepareLegacyPass, DEBUG_TYPE, PASS_NAME,
+ false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
-INITIALIZE_PASS_END(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false)
+INITIALIZE_PASS_END(RISCVCodeGenPrepareLegacyPass, DEBUG_TYPE, PASS_NAME, false,
+ false)
-char RISCVCodeGenPrepare::ID = 0;
+char RISCVCodeGenPrepareLegacyPass::ID = 0;
+
+FunctionPass *llvm::createRISCVCodeGenPrepareLegacyPass() {
+ return new RISCVCodeGenPrepareLegacyPass();
+}
-FunctionPass *llvm::createRISCVCodeGenPreparePass() {
- return new RISCVCodeGenPrepare();
+PreservedAnalyses RISCVCodeGenPreparePass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ DominatorTree *DT = &FAM.getResult<DominatorTreeAnalysis>(F);
+ auto ST = &TM->getSubtarget<RISCVSubtarget>(F);
+ bool Changed = RISCVCodeGenPrepare(F, DT, ST).run();
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA = PreservedAnalyses::none();
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
}
diff --git a/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp b/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp
index 51180f5..5d3d9b5 100644
--- a/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp
+++ b/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp
@@ -59,7 +59,6 @@ bool RISCVDeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) {
return false;
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
LiveIntervals &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
LLVM_DEBUG(dbgs() << "***** RISCVDeadRegisterDefinitions *****\n");
@@ -89,7 +88,7 @@ bool RISCVDeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << " Dead def operand #" << I << " in:\n ";
MI.print(dbgs()));
Register X0Reg;
- const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI);
+ const TargetRegisterClass *RC = TII->getRegClass(Desc, I);
if (RC && RC->contains(RISCV::X0)) {
X0Reg = RISCV::X0;
} else if (RC && RC->contains(RISCV::X0_W)) {
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index b0453fc..60e0afd 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -132,6 +132,9 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
case RISCV::PseudoCCMIN:
case RISCV::PseudoCCMINU:
case RISCV::PseudoCCMUL:
+ case RISCV::PseudoCCLUI:
+ case RISCV::PseudoCCQC_LI:
+ case RISCV::PseudoCCQC_E_LI:
case RISCV::PseudoCCADDW:
case RISCV::PseudoCCSUBW:
case RISCV::PseudoCCSLL:
@@ -239,6 +242,9 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
case RISCV::PseudoCCMAXU: NewOpc = RISCV::MAXU; break;
case RISCV::PseudoCCMINU: NewOpc = RISCV::MINU; break;
case RISCV::PseudoCCMUL: NewOpc = RISCV::MUL; break;
+ case RISCV::PseudoCCLUI: NewOpc = RISCV::LUI; break;
+ case RISCV::PseudoCCQC_LI: NewOpc = RISCV::QC_LI; break;
+ case RISCV::PseudoCCQC_E_LI: NewOpc = RISCV::QC_E_LI; break;
case RISCV::PseudoCCADDI: NewOpc = RISCV::ADDI; break;
case RISCV::PseudoCCSLLI: NewOpc = RISCV::SLLI; break;
case RISCV::PseudoCCSRLI: NewOpc = RISCV::SRLI; break;
@@ -268,6 +274,9 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
.add(MI.getOperand(5))
.add(MI.getOperand(6))
.add(MI.getOperand(7));
+ } else if (NewOpc == RISCV::LUI || NewOpc == RISCV::QC_LI ||
+ NewOpc == RISCV::QC_E_LI) {
+ BuildMI(TrueBB, DL, TII->get(NewOpc), DestReg).add(MI.getOperand(5));
} else {
BuildMI(TrueBB, DL, TII->get(NewOpc), DestReg)
.add(MI.getOperand(5))
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 0b964c4..1a5bb83 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -194,6 +194,10 @@ def HasStdExtZilsd : Predicate<"Subtarget->hasStdExtZilsd()">,
AssemblerPredicate<(all_of FeatureStdExtZilsd),
"'Zilsd' (Load/Store pair instructions)">;
+def FeatureZilsd4ByteAlign
+ : SubtargetFeature<"zilsd-4byte-align", "AllowZilsd4ByteAlign", "true",
+ "Allow 4-byte alignment for Zilsd LD/SD instructions">;
+
// Multiply Extensions
def FeatureStdExtZmmul
@@ -1102,38 +1106,18 @@ def HasStdExtSmctrOrSsctr : Predicate<"Subtarget->hasStdExtSmctrOrSsctr()">,
// Packed SIMD Extensions
def FeatureStdExtP
- : RISCVExperimentalExtension<0, 15,
+ : RISCVExperimentalExtension<0, 18,
"'Base P' (Packed SIMD)">;
def HasStdExtP : Predicate<"Subtarget->hasStdExtP()">,
AssemblerPredicate<(all_of FeatureStdExtP),
"'Base P' (Packed SIMD)">;
-def HasStdExtZbaOrP
- : Predicate<"Subtarget->hasStdExtZba() || Subtarget->hasStdExtP()">,
- AssemblerPredicate<(any_of FeatureStdExtZba, FeatureStdExtP),
- "'Zba' (Address Generation Instructions) or "
- "'Base P' (Packed-SIMD)">;
-
-def HasStdExtZbbOrP
- : Predicate<"Subtarget->hasStdExtZbb() || Subtarget->hasStdExtP()">,
- AssemblerPredicate<(any_of FeatureStdExtZbb, FeatureStdExtP),
- "'Zbb' (Basic Bit-Manipulation) or "
- "'Base P' (Packed-SIMD)">;
-
def HasStdExtZbkbOrP
: Predicate<"Subtarget->hasStdExtZbkb() || Subtarget->hasStdExtP()">,
AssemblerPredicate<(any_of FeatureStdExtZbkb, FeatureStdExtP),
"'Zbkb' (Bitmanip instructions for Cryptography) or "
"'Base P' (Packed-SIMD)">;
-def HasStdExtZbbOrZbkbOrP
- : Predicate<"Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbkb() || "
- "Subtarget->hasStdExtP()">,
- AssemblerPredicate<(any_of FeatureStdExtZbb, FeatureStdExtZbkb, FeatureStdExtP),
- "'Zbb' (Basic Bit-Manipulation) or "
- "'Zbkb' (Bitmanip instructions for Cryptography) or "
- "'Base P' (Packed-SIMD)">;
-
//===----------------------------------------------------------------------===//
// Vendor extensions
//===----------------------------------------------------------------------===//
@@ -1787,6 +1771,45 @@ def FeatureUnalignedVectorMem
"true", "Has reasonably performant unaligned vector "
"loads and stores">;
+// Assume that lock-free native-width atomics are available, even if the target
+// and operating system combination would not usually provide them. The user
+// is responsible for providing any necessary __sync implementations. Code
+// built with this feature is not ABI-compatible with code built without this
+// feature, if atomic variables are exposed across the ABI boundary.
+def FeatureForcedAtomics : SubtargetFeature<
+ "forced-atomics", "HasForcedAtomics", "true",
+ "Assume that lock-free native-width atomics are available">;
+def HasAtomicLdSt
+ : Predicate<"Subtarget->hasStdExtZalrsc() || Subtarget->hasForcedAtomics()">;
+
+// The RISC-V Unprivileged Architecture - ISA Volume 1 (Version: 20250508)
+// [https://docs.riscv.org/reference/isa/_attachments/riscv-unprivileged.pdf]
+// in section 13.3. Eventual Success of Store-Conditional Instructions, defines
+// _constrained_ LR/SC loops:
+// The dynamic code executed between the LR and SC instructions can only
+// contain instructions from the base ''I'' instruction set, excluding loads,
+// stores, backward jumps, taken backward branches, JALR, FENCE, and SYSTEM
+// instructions. Compressed forms of the aforementioned ''I'' instructions in
+// the Zca and Zcb extensions are also permitted.
+// LR/SC loops that do not adhere to the above are _unconstrained_ LR/SC loops,
+// and success is implementation specific. For implementations which know that
+// non-base instructions (such as the ''B'' extension) will not violate any
+// forward progress guarantees, using these instructions to reduce the LR/SC
+// sequence length is desirable.
+def FeaturePermissiveZalrsc
+ : SubtargetFeature<
+ "permissive-zalrsc", "HasPermissiveZalrsc", "true",
+ "Implementation permits non-base instructions between LR/SC pairs">;
+
+def FeatureTaggedGlobals : SubtargetFeature<"tagged-globals",
+ "AllowTaggedGlobals",
+ "true", "Use an instruction sequence for taking the address of a global "
+ "that allows a memory tag in the upper address bits">;
+
+//===----------------------------------------------------------------------===//
+// Tuning features
+//===----------------------------------------------------------------------===//
+
def TuneNLogNVRGather
: SubtargetFeature<"log-vrgather", "RISCVVRGatherCostModel", "NLog2N",
"Has vrgather.vv with LMUL*log2(LMUL) latency">;
@@ -1846,23 +1869,44 @@ def TuneNoDefaultUnroll
: SubtargetFeature<"no-default-unroll", "EnableDefaultUnroll", "false",
"Disable default unroll preference.">;
-// SiFive 7 is able to fuse integer ALU operations with a preceding branch
-// instruction.
-def TuneShortForwardBranchOpt
- : SubtargetFeature<"short-forward-branch-opt", "HasShortForwardBranchOpt",
- "true", "Enable short forward branch optimization">;
-def HasShortForwardBranchOpt : Predicate<"Subtarget->hasShortForwardBranchOpt()">;
-def NoShortForwardBranchOpt : Predicate<"!Subtarget->hasShortForwardBranchOpt()">;
+// Many Microarchitectures are able to fuse a branch over a single instruction
+// with the branched-over instruction. We call this fusion "short forward
+// branches".
+//
+// We can do this for a variety of instruction groups, depending on the
+// microarch. We broadly group these by their scheduler class:
+// - IALU: RVI Integer instructions, plus ANDN/ORN/XNOR (Zbb/Zbkb)
+// - IMinMax: Zbb MIN(U)/MAX(U)
+// - IMul: MUL
+//
+// We make the simplifying assumption that any microarches that implement
+// any "short forward branches" can do the IALU fusions, and can opt into
+// the other fusions they implement.
+//
+// The important Pseudo used by all these instructions requires the IALU
+// short forward branches.
+//
+// Vendor-specific short-forward-branch opts may be added under IALU, as
+// the vendor-specific instructions should only be enabled for vendor
+// cores.
+def TuneShortForwardBranchIALU
+ : SubtargetFeature<"short-forward-branch-ialu", "HasShortForwardBranchIALU",
+ "true", "Enable short forward branch optimization for RVI base instructions">;
+def HasShortForwardBranchIALU : Predicate<"Subtarget->hasShortForwardBranchIALU()">;
+def NoShortForwardBranch : Predicate<"!Subtarget->hasShortForwardBranchIALU()">;
def TuneShortForwardBranchIMinMax
- : SubtargetFeature<"short-forward-branch-i-minmax", "HasShortForwardBranchIMinMax",
- "true", "Enable short forward branch optimization for min,max instructions in Zbb",
- [TuneShortForwardBranchOpt]>;
+ : SubtargetFeature<"short-forward-branch-iminmax", "HasShortForwardBranchIMinMax",
+ "true", "Enable short forward branch optimization for MIN,MAX instructions in Zbb",
+ [TuneShortForwardBranchIALU]>;
+def HasShortForwardBranchIMinMax : Predicate<"Subtarget->hasShortForwardBranchIMinMax()">;
def TuneShortForwardBranchIMul
- : SubtargetFeature<"short-forward-branch-i-mul", "HasShortForwardBranchIMul",
- "true", "Enable short forward branch optimization for mul instruction",
- [TuneShortForwardBranchOpt]>;
+ : SubtargetFeature<"short-forward-branch-imul", "HasShortForwardBranchIMul",
+ "true", "Enable short forward branch optimization for MUL instruction",
+ [TuneShortForwardBranchIALU]>;
+def HasShortForwardBranchIMul : Predicate<"Subtarget->hasShortForwardBranchIMul()">;
+
// Some subtargets require a S2V transfer buffer to move scalars into vectors.
// FIXME: Forming .vx/.vf/.wx/.wf can reduce register pressure.
@@ -1886,19 +1930,6 @@ def TuneHasSingleElementVecFP64
"Certain vector FP64 operations produce a single result "
"element per cycle">;
-def TuneMIPSP8700
- : SubtargetFeature<"mips-p8700", "RISCVProcFamily", "MIPSP8700",
- "MIPS p8700 processor">;
-
-def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7",
- "SiFive 7-Series processors">;
-
-def TuneVentanaVeyron : SubtargetFeature<"ventana-veyron", "RISCVProcFamily", "VentanaVeyron",
- "Ventana Veyron-Series processors">;
-
-def TuneAndes45 : SubtargetFeature<"andes45", "RISCVProcFamily", "Andes45",
- "Andes 45-Series processors">;
-
def TuneVXRMPipelineFlush : SubtargetFeature<"vxrm-pipeline-flush", "HasVXRMPipelineFlush",
"true", "VXRM writes causes pipeline flush">;
@@ -1908,37 +1939,20 @@ def TunePreferVsetvliOverReadVLENB
"true",
"Prefer vsetvli over read vlenb CSR to calculate VLEN">;
-// Assume that lock-free native-width atomics are available, even if the target
-// and operating system combination would not usually provide them. The user
-// is responsible for providing any necessary __sync implementations. Code
-// built with this feature is not ABI-compatible with code built without this
-// feature, if atomic variables are exposed across the ABI boundary.
-def FeatureForcedAtomics : SubtargetFeature<
- "forced-atomics", "HasForcedAtomics", "true",
- "Assume that lock-free native-width atomics are available">;
-def HasAtomicLdSt
- : Predicate<"Subtarget->hasStdExtZalrsc() || Subtarget->hasForcedAtomics()">;
+//===----------------------------------------------------------------------===//
+// CPU Families (alphabetized by vendor).
+//===----------------------------------------------------------------------===//
-// The RISC-V Unprivileged Architecture - ISA Volume 1 (Version: 20250508)
-// [https://docs.riscv.org/reference/isa/_attachments/riscv-unprivileged.pdf]
-// in section 13.3. Eventual Success of Store-Conditional Instructions, defines
-// _constrained_ LR/SC loops:
-// The dynamic code executed between the LR and SC instructions can only
-// contain instructions from the base ''I'' instruction set, excluding loads,
-// stores, backward jumps, taken backward branches, JALR, FENCE, and SYSTEM
-// instructions. Compressed forms of the aforementioned ''I'' instructions in
-// the Zca and Zcb extensions are also permitted.
-// LR/SC loops that do not adhere to the above are _unconstrained_ LR/SC loops,
-// and success is implementation specific. For implementations which know that
-// non-base instructions (such as the ''B'' extension) will not violate any
-// forward progress guarantees, using these instructions to reduce the LR/SC
-// sequence length is desirable.
-def FeaturePermissiveZalrsc
- : SubtargetFeature<
- "permissive-zalrsc", "HasPermissiveZalrsc", "true",
- "Implementation permits non-base instructions between LR/SC pairs">;
+def TuneAndes45 : SubtargetFeature<"andes45", "RISCVProcFamily", "Andes45",
+ "Andes 45-Series processors">;
+
+def TuneMIPSP8700
+ : SubtargetFeature<"mips-p8700", "RISCVProcFamily", "MIPSP8700",
+ "MIPS p8700 processor">;
+
+def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7",
+ "SiFive 7-Series processors">;
+
+def TuneVentanaVeyron : SubtargetFeature<"ventana-veyron", "RISCVProcFamily", "VentanaVeyron",
+ "Ventana Veyron-Series processors">;
-def FeatureTaggedGlobals : SubtargetFeature<"tagged-globals",
- "AllowTaggedGlobals",
- "true", "Use an instruction sequence for taking the address of a global "
- "that allows a memory tag in the upper address bits">;
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index f881c4c..668bb84 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -291,12 +291,12 @@ static void emitSiFiveCLICPreemptibleSaves(MachineFunction &MF,
// which affects other passes.
TII->storeRegToStackSlot(MBB, MBBI, RISCV::X8, /* IsKill=*/true,
RVFI->getInterruptCSRFrameIndex(0),
- &RISCV::GPRRegClass, STI.getRegisterInfo(),
- Register(), MachineInstr::FrameSetup);
+ &RISCV::GPRRegClass, Register(),
+ MachineInstr::FrameSetup);
TII->storeRegToStackSlot(MBB, MBBI, RISCV::X9, /* IsKill=*/true,
RVFI->getInterruptCSRFrameIndex(1),
- &RISCV::GPRRegClass, STI.getRegisterInfo(),
- Register(), MachineInstr::FrameSetup);
+ &RISCV::GPRRegClass, Register(),
+ MachineInstr::FrameSetup);
// Put `mcause` into X8 (s0), and `mepc` into X9 (s1). If either of these are
// used in the function, then they will appear in `getUnmanagedCSI` and will
@@ -357,14 +357,12 @@ static void emitSiFiveCLICPreemptibleRestores(MachineFunction &MF,
// X8 and X9 need to be restored to their values on function entry, which we
// saved onto the stack in `emitSiFiveCLICPreemptibleSaves`.
- TII->loadRegFromStackSlot(MBB, MBBI, RISCV::X9,
- RVFI->getInterruptCSRFrameIndex(1),
- &RISCV::GPRRegClass, STI.getRegisterInfo(),
- Register(), MachineInstr::FrameSetup);
- TII->loadRegFromStackSlot(MBB, MBBI, RISCV::X8,
- RVFI->getInterruptCSRFrameIndex(0),
- &RISCV::GPRRegClass, STI.getRegisterInfo(),
- Register(), MachineInstr::FrameSetup);
+ TII->loadRegFromStackSlot(
+ MBB, MBBI, RISCV::X9, RVFI->getInterruptCSRFrameIndex(1),
+ &RISCV::GPRRegClass, Register(), MachineInstr::FrameSetup);
+ TII->loadRegFromStackSlot(
+ MBB, MBBI, RISCV::X8, RVFI->getInterruptCSRFrameIndex(0),
+ &RISCV::GPRRegClass, Register(), MachineInstr::FrameSetup);
}
// Get the ID of the libcall used for spilling and restoring callee saved
@@ -1994,17 +1992,17 @@ RISCVFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF) const {
bool RISCVFrameLowering::assignCalleeSavedSpillSlots(
MachineFunction &MF, const TargetRegisterInfo *TRI,
- std::vector<CalleeSavedInfo> &CSI, unsigned &MinCSFrameIndex,
- unsigned &MaxCSFrameIndex) const {
+ std::vector<CalleeSavedInfo> &CSI) const {
auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
// Preemptible Interrupts have two additional Callee-save Frame Indexes,
// not tracked by `CSI`.
if (RVFI->isSiFivePreemptibleInterrupt(MF)) {
for (int I = 0; I < 2; ++I) {
int FI = RVFI->getInterruptCSRFrameIndex(I);
- MinCSFrameIndex = std::min<unsigned>(MinCSFrameIndex, FI);
- MaxCSFrameIndex = std::max<unsigned>(MaxCSFrameIndex, FI);
+ MFI.setIsCalleeSavedObjectIndex(FI, true);
}
}
@@ -2030,9 +2028,6 @@ bool RISCVFrameLowering::assignCalleeSavedSpillSlots(
}
}
- MachineFrameInfo &MFI = MF.getFrameInfo();
- const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
-
for (auto &CS : CSI) {
MCRegister Reg = CS.getReg();
const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
@@ -2082,10 +2077,7 @@ bool RISCVFrameLowering::assignCalleeSavedSpillSlots(
// min.
Alignment = std::min(Alignment, getStackAlign());
int FrameIdx = MFI.CreateStackObject(Size, Alignment, true);
- if ((unsigned)FrameIdx < MinCSFrameIndex)
- MinCSFrameIndex = FrameIdx;
- if ((unsigned)FrameIdx > MaxCSFrameIndex)
- MaxCSFrameIndex = FrameIdx;
+ MFI.setIsCalleeSavedObjectIndex(FrameIdx, true);
CS.setFrameIdx(FrameIdx);
if (RISCVRegisterInfo::isRVVRegClass(RC))
MFI.setStackID(FrameIdx, TargetStackID::ScalableVector);
@@ -2177,7 +2169,7 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters(
MCRegister Reg = CS.getReg();
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg),
- CS.getFrameIdx(), RC, TRI, Register(),
+ CS.getFrameIdx(), RC, Register(),
MachineInstr::FrameSetup);
}
};
@@ -2267,8 +2259,8 @@ bool RISCVFrameLowering::restoreCalleeSavedRegisters(
for (auto &CS : CSInfo) {
MCRegister Reg = CS.getReg();
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI,
- Register(), MachineInstr::FrameDestroy);
+ TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, Register(),
+ MachineInstr::FrameDestroy);
assert(MI != MBB.begin() &&
"loadRegFromStackSlot didn't insert any code!");
}
@@ -2509,3 +2501,12 @@ void RISCVFrameLowering::inlineStackProbe(MachineFunction &MF,
}
}
}
+
+int RISCVFrameLowering::getInitialCFAOffset(const MachineFunction &MF) const {
+ return 0;
+}
+
+Register
+RISCVFrameLowering::getInitialCFARegister(const MachineFunction &MF) const {
+ return RISCV::X2;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
index 6af63a4..84e48db 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
@@ -23,6 +23,9 @@ class RISCVFrameLowering : public TargetFrameLowering {
public:
explicit RISCVFrameLowering(const RISCVSubtarget &STI);
+ int getInitialCFAOffset(const MachineFunction &MF) const override;
+ Register getInitialCFARegister(const MachineFunction &MF) const override;
+
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
@@ -44,11 +47,10 @@ public:
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const override;
- bool assignCalleeSavedSpillSlots(MachineFunction &MF,
- const TargetRegisterInfo *TRI,
- std::vector<CalleeSavedInfo> &CSI,
- unsigned &MinCSFrameIndex,
- unsigned &MaxCSFrameIndex) const override;
+ bool
+ assignCalleeSavedSpillSlots(MachineFunction &MF,
+ const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const override;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
ArrayRef<CalleeSavedInfo> CSI,
diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td
index eba35ef..67d2cac 100644
--- a/llvm/lib/Target/RISCV/RISCVGISel.td
+++ b/llvm/lib/Target/RISCV/RISCVGISel.td
@@ -17,14 +17,14 @@ include "RISCV.td"
include "RISCVCombine.td"
def simm12Plus1 : ImmLeaf<XLenVT, [{
- return (isInt<12>(Imm) && Imm != -2048) || Imm == 2048;}]>;
+ return Imm >= -2047 && Imm <= 2048;}]>;
def simm12Plus1i32 : ImmLeaf<i32, [{
- return (isInt<12>(Imm) && Imm != -2048) || Imm == 2048;}]>;
+ return Imm >= -2047 && Imm <= 2048;}]>;
// FIXME: This doesn't check that the G_CONSTANT we're deriving the immediate
// from is only used once
def simm12Minus1Nonzero : ImmLeaf<XLenVT, [{
- return (Imm >= -2049 && Imm < 0) || (Imm > 0 && Imm <= 2046);}]>;
+ return Imm >= -2049 && Imm <= 2046 && Imm != 0;}]>;
def simm12Minus1NonzeroNonNeg1 : ImmLeaf<XLenVT, [{
return (Imm >= -2049 && Imm < -1) || (Imm > 0 && Imm <= 2046);}]>;
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 9078335..8bfdbef 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -51,6 +51,8 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() {
SDValue Result;
switch (N->getOpcode()) {
case ISD::SPLAT_VECTOR: {
+ if (Subtarget->enablePExtCodeGen())
+ break;
// Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
// SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
MVT VT = N->getSimpleValueType(0);
@@ -991,6 +993,18 @@ static unsigned getSegInstNF(unsigned Intrinsic) {
}
}
+static bool isApplicableToPLI(int Val) {
+ // Check if the immediate is packed i8 or i10
+ int16_t Bit31To16 = Val >> 16;
+ int16_t Bit15To0 = Val;
+ int8_t Bit15To8 = Bit15To0 >> 8;
+ int8_t Bit7To0 = Val;
+ if (Bit31To16 != Bit15To0)
+ return false;
+
+ return isInt<10>(Bit31To16) || Bit15To8 == Bit7To0;
+}
+
void RISCVDAGToDAGISel::Select(SDNode *Node) {
// If we have a custom node, we have already selected.
if (Node->isMachineOpcode()) {
@@ -1034,6 +1048,14 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
Imm = SignExtend64<32>(Imm);
+ if (Subtarget->enablePExtCodeGen() && isApplicableToPLI(Imm) &&
+ hasAllWUsers(Node)) {
+ // If it's 4 packed 8-bit integers or 2 packed signed 16-bit integers, we
+ // can simply copy lower 32 bits to higher 32 bits to make it able to
+ // rematerialize to PLI_B or PLI_H
+ Imm = ((uint64_t)Imm << 32) | (Imm & 0xFFFFFFFF);
+ }
+
ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
return;
}
@@ -1478,8 +1500,16 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
if (tryUnsignedBitfieldInsertInZero(Node, DL, VT, X, Msb, Lsb))
return;
- // (srli (slli c2+c3), c3)
if (OneUseOrZExtW && !IsCANDI) {
+ // (packh x0, X)
+ if (Subtarget->hasStdExtZbkb() && C1 == 0xff00 && C2 == 8) {
+ SDNode *PACKH = CurDAG->getMachineNode(
+ RISCV::PACKH, DL, VT,
+ CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT()), X);
+ ReplaceNode(Node, PACKH);
+ return;
+ }
+ // (srli (slli c2+c3), c3)
SDNode *SLLI = CurDAG->getMachineNode(
RISCV::SLLI, DL, VT, X,
CurDAG->getTargetConstant(C2 + Leading, DL, VT));
@@ -1845,6 +1875,43 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
CurDAG->RemoveDeadNode(Node);
return;
}
+ case RISCVISD::PPACK_DH: {
+ assert(Subtarget->enablePExtCodeGen() && Subtarget->isRV32());
+
+ SDValue Val0 = Node->getOperand(0);
+ SDValue Val1 = Node->getOperand(1);
+ SDValue Val2 = Node->getOperand(2);
+ SDValue Val3 = Node->getOperand(3);
+
+ SDValue Ops[] = {
+ CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32), Val0,
+ CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32), Val2,
+ CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
+ SDValue RegPair0 =
+ SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
+ MVT::Untyped, Ops),
+ 0);
+ SDValue Ops1[] = {
+ CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32), Val1,
+ CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32), Val3,
+ CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
+ SDValue RegPair1 =
+ SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
+ MVT::Untyped, Ops1),
+ 0);
+
+ MachineSDNode *PackDH = CurDAG->getMachineNode(
+ RISCV::PPAIRE_DB, DL, MVT::Untyped, {RegPair0, RegPair1});
+
+ SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
+ MVT::i32, SDValue(PackDH, 0));
+ SDValue Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL,
+ MVT::i32, SDValue(PackDH, 0));
+ ReplaceUses(SDValue(Node, 0), Lo);
+ ReplaceUses(SDValue(Node, 1), Hi);
+ CurDAG->RemoveDeadNode(Node);
+ return;
+ }
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntNo = Node->getConstantOperandVal(0);
switch (IntNo) {
@@ -2654,8 +2721,34 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
CurDAG->RemoveDeadNode(Node);
return;
}
+ if (Subtarget->enablePExtCodeGen()) {
+ bool Is32BitCast =
+ (VT == MVT::i32 && (SrcVT == MVT::v4i8 || SrcVT == MVT::v2i16)) ||
+ (SrcVT == MVT::i32 && (VT == MVT::v4i8 || VT == MVT::v2i16));
+ bool Is64BitCast =
+ (VT == MVT::i64 && (SrcVT == MVT::v8i8 || SrcVT == MVT::v4i16 ||
+ SrcVT == MVT::v2i32)) ||
+ (SrcVT == MVT::i64 &&
+ (VT == MVT::v8i8 || VT == MVT::v4i16 || VT == MVT::v2i32));
+ if (Is32BitCast || Is64BitCast) {
+ ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
+ CurDAG->RemoveDeadNode(Node);
+ return;
+ }
+ }
break;
}
+ case ISD::SCALAR_TO_VECTOR:
+ if (Subtarget->enablePExtCodeGen()) {
+ MVT SrcVT = Node->getOperand(0).getSimpleValueType();
+ if ((VT == MVT::v2i32 && SrcVT == MVT::i64) ||
+ (VT == MVT::v4i8 && SrcVT == MVT::i32)) {
+ ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
+ CurDAG->RemoveDeadNode(Node);
+ return;
+ }
+ }
+ break;
case ISD::INSERT_SUBVECTOR:
case RISCVISD::TUPLE_INSERT: {
SDValue V = Node->getOperand(0);
@@ -4230,14 +4323,14 @@ bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) {
bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) {
return selectVSplatImmHelper(
N, SplatVal, *CurDAG, *Subtarget,
- [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; },
+ [](int64_t Imm) { return Imm >= -15 && Imm <= 16; },
/*Decrement=*/true);
}
bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NoDec(SDValue N, SDValue &SplatVal) {
return selectVSplatImmHelper(
N, SplatVal, *CurDAG, *Subtarget,
- [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; },
+ [](int64_t Imm) { return Imm >= -15 && Imm <= 16; },
/*Decrement=*/false);
}
@@ -4245,9 +4338,7 @@ bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N,
SDValue &SplatVal) {
return selectVSplatImmHelper(
N, SplatVal, *CurDAG, *Subtarget,
- [](int64_t Imm) {
- return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
- },
+ [](int64_t Imm) { return Imm != 0 && Imm >= -15 && Imm <= 16; },
/*Decrement=*/true);
}
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a3ccbd8..7cbb9c0 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -88,13 +88,15 @@ static cl::opt<bool>
cl::init(true));
// TODO: Support more ops
-static const unsigned ZvfbfaVPOps[] = {ISD::VP_FNEG, ISD::VP_FABS,
- ISD::VP_FCOPYSIGN};
-static const unsigned ZvfbfaOps[] = {ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN};
+static const unsigned ZvfbfaVPOps[] = {
+ ISD::VP_FNEG, ISD::VP_FABS, ISD::VP_FCOPYSIGN};
+static const unsigned ZvfbfaOps[] = {
+ ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::SPLAT_VECTOR,
+ ISD::FADD, ISD::FSUB, ISD::FMUL};
RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
const RISCVSubtarget &STI)
- : TargetLowering(TM), Subtarget(STI) {
+ : TargetLowering(TM, STI), Subtarget(STI) {
RISCVABI::ABI ABI = Subtarget.getTargetABI();
assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
@@ -284,6 +286,18 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::riscv_nxv32i8x2, &RISCV::VRN2M4RegClass);
}
+ // fixed vector is stored in GPRs for P extension packed operations
+ if (Subtarget.enablePExtCodeGen()) {
+ if (Subtarget.is64Bit()) {
+ addRegisterClass(MVT::v2i32, &RISCV::GPRRegClass);
+ addRegisterClass(MVT::v4i16, &RISCV::GPRRegClass);
+ addRegisterClass(MVT::v8i8, &RISCV::GPRRegClass);
+ } else {
+ addRegisterClass(MVT::v2i16, &RISCV::GPRRegClass);
+ addRegisterClass(MVT::v4i8, &RISCV::GPRRegClass);
+ }
+ }
+
// Compute derived properties from the register classes.
computeRegisterProperties(STI.getRegisterInfo());
@@ -323,9 +337,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
- if (!Subtarget.hasStdExtZbb() && !Subtarget.hasStdExtP() &&
- !Subtarget.hasVendorXTHeadBb() && !Subtarget.hasVendorXqcibm() &&
- !Subtarget.hasVendorXAndesPerf() &&
+ if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&
+ !Subtarget.hasVendorXqcibm() && !Subtarget.hasVendorXAndesPerf() &&
!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
@@ -398,7 +411,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
}
- if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtP() ||
+ if (Subtarget.hasStdExtZbb() ||
(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, XLenVT,
Legal);
@@ -409,9 +422,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom);
} else {
setOperationAction(ISD::CTTZ, XLenVT, Expand);
- // If have a CLZW, but not CTZW, custom promote i32.
- if (Subtarget.hasStdExtP() && Subtarget.is64Bit())
- setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom);
}
if (!Subtarget.hasCPOPLike()) {
@@ -440,7 +450,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ABS, XLenVT, Legal);
if (Subtarget.is64Bit())
setOperationAction(ISD::ABS, MVT::i32, Custom);
- } else if (Subtarget.hasShortForwardBranchOpt()) {
+ } else if (Subtarget.hasShortForwardBranchIALU()) {
// We can use PseudoCCSUB to implement ABS.
setOperationAction(ISD::ABS, XLenVT, Legal);
} else if (Subtarget.is64Bit()) {
@@ -492,6 +502,35 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
ISD::FROUNDEVEN, ISD::FCANONICALIZE};
+ if (Subtarget.enablePExtCodeGen()) {
+ setTargetDAGCombine(ISD::TRUNCATE);
+ setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
+ setTruncStoreAction(MVT::v4i16, MVT::v4i8, Expand);
+ SmallVector<MVT, 2> VTs;
+ if (Subtarget.is64Bit()) {
+ VTs.append({MVT::v2i32, MVT::v4i16, MVT::v8i8});
+ setTruncStoreAction(MVT::v2i64, MVT::v2i32, Expand);
+ setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
+ setTruncStoreAction(MVT::v8i16, MVT::v8i8, Expand);
+ setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
+ setTruncStoreAction(MVT::v4i16, MVT::v4i8, Expand);
+ } else {
+ VTs.append({MVT::v2i16, MVT::v4i8});
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4i8, Custom);
+ }
+ setOperationAction(ISD::UADDSAT, VTs, Legal);
+ setOperationAction(ISD::SADDSAT, VTs, Legal);
+ setOperationAction(ISD::USUBSAT, VTs, Legal);
+ setOperationAction(ISD::SSUBSAT, VTs, Legal);
+ setOperationAction(ISD::SSHLSAT, VTs, Legal);
+ setOperationAction({ISD::AVGFLOORS, ISD::AVGFLOORU}, VTs, Legal);
+ setOperationAction({ISD::ABDS, ISD::ABDU}, VTs, Legal);
+ setOperationAction(ISD::SPLAT_VECTOR, VTs, Legal);
+ setOperationAction(ISD::SHL, VTs, Custom);
+ setOperationAction(ISD::BITCAST, VTs, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VTs, Custom);
+ }
+
if (Subtarget.hasStdExtZfbfmin()) {
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
setOperationAction(ISD::ConstantFP, MVT::bf16, Expand);
@@ -756,8 +795,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
- ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,
- ISD::EXPERIMENTAL_VP_SPLAT};
+ ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF};
static const unsigned FloatingPointVPOps[] = {
ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
@@ -772,7 +810,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
ISD::VP_LLRINT, ISD::VP_REDUCE_FMINIMUM,
- ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};
+ ISD::VP_REDUCE_FMAXIMUM};
static const unsigned IntegerVecReduceOps[] = {
ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
@@ -866,7 +904,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
- setOperationAction(ISD::EXPERIMENTAL_VP_SPLAT, VT, Custom);
setOperationPromotedToType(
ISD::VECTOR_SPLICE, VT,
@@ -1049,6 +1086,36 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VECREDUCE_FMINIMUM,
ISD::VECREDUCE_FMAXIMUM};
+ // TODO: Make more of these ops legal.
+ static const unsigned ZvfbfaPromoteOps[] = {ISD::FMINNUM,
+ ISD::FMAXNUM,
+ ISD::FMINIMUMNUM,
+ ISD::FMAXIMUMNUM,
+ ISD::FDIV,
+ ISD::FMA,
+ ISD::FSQRT,
+ ISD::FCEIL,
+ ISD::FTRUNC,
+ ISD::FFLOOR,
+ ISD::FROUND,
+ ISD::FROUNDEVEN,
+ ISD::FRINT,
+ ISD::FNEARBYINT,
+ ISD::IS_FPCLASS,
+ ISD::SETCC,
+ ISD::FMAXIMUM,
+ ISD::FMINIMUM,
+ ISD::STRICT_FADD,
+ ISD::STRICT_FSUB,
+ ISD::STRICT_FMUL,
+ ISD::STRICT_FDIV,
+ ISD::STRICT_FSQRT,
+ ISD::STRICT_FMA,
+ ISD::VECREDUCE_FMIN,
+ ISD::VECREDUCE_FMAX,
+ ISD::VECREDUCE_FMINIMUM,
+ ISD::VECREDUCE_FMAXIMUM};
+
// TODO: support more vp ops.
static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
ISD::VP_FADD,
@@ -1181,12 +1248,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
MVT EltVT = VT.getVectorElementType();
if (isTypeLegal(EltVT))
- setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
- ISD::EXTRACT_VECTOR_ELT},
+ setOperationAction({ISD::SPLAT_VECTOR, ISD::EXTRACT_VECTOR_ELT},
VT, Custom);
else
- setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},
- EltVT, Custom);
+ setOperationAction(ISD::SPLAT_VECTOR, EltVT, Custom);
setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
@@ -1226,26 +1291,19 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
- setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::CONCAT_VECTORS,
- ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR,
- ISD::VECTOR_DEINTERLEAVE, ISD::VECTOR_INTERLEAVE,
- ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE,
- ISD::VECTOR_COMPRESS},
+ setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
+ ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
+ ISD::EXTRACT_SUBVECTOR, ISD::VECTOR_DEINTERLEAVE,
+ ISD::VECTOR_INTERLEAVE, ISD::VECTOR_REVERSE,
+ ISD::VECTOR_SPLICE, ISD::VECTOR_COMPRESS},
VT, Custom);
setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
setOperationAction(ISD::FCOPYSIGN, VT, Legal);
+ setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
setOperationAction(ZvfbfaVPOps, VT, Custom);
- MVT EltVT = VT.getVectorElementType();
- if (isTypeLegal(EltVT))
- setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
- ISD::EXTRACT_VECTOR_ELT},
- VT, Custom);
- else
- setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},
- EltVT, Custom);
setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
@@ -1259,11 +1317,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
if (getLMUL(VT) == RISCVVType::LMUL_8) {
- setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
+ setOperationAction(ZvfbfaPromoteOps, VT, Custom);
setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
} else {
MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
- setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
+ setOperationPromotedToType(ZvfbfaPromoteOps, VT, F32VecVT);
setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
}
};
@@ -1580,8 +1638,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// TODO: could split the f16 vector into two vectors and do promotion.
if (!isTypeLegal(F32VecVT))
continue;
- setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
- // TODO: Promote VP ops to fp32.
+
+ if (Subtarget.hasStdExtZvfbfa())
+ setOperationPromotedToType(ZvfbfaPromoteOps, VT, F32VecVT);
+ else
+ setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
+ setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
continue;
}
@@ -1776,6 +1838,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
MaxLoadsPerMemcmp = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/false);
}
+TargetLoweringBase::LegalizeTypeAction
+RISCVTargetLowering::getPreferredVectorAction(MVT VT) const {
+ if (Subtarget.is64Bit() && Subtarget.enablePExtCodeGen())
+ if (VT == MVT::v2i16 || VT == MVT::v4i8)
+ return TypeWidenVector;
+
+ return TargetLoweringBase::getPreferredVectorAction(VT);
+}
+
EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
LLVMContext &Context,
EVT VT) const {
@@ -1824,7 +1895,7 @@ bool RISCVTargetLowering::shouldExpandCttzElements(EVT VT) const {
}
bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
- const CallInst &I,
+ const CallBase &I,
MachineFunction &MF,
unsigned Intrinsic) const {
auto &DL = I.getDataLayout();
@@ -2493,7 +2564,7 @@ bool RISCVTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
// TODO: For sizes which aren't multiples of VLEN sizes, this may not be
// a cheap extract. However, this case is important in practice for
// shuffled extracts of longer vectors. How resolve?
- return (ResElts * 2) == SrcElts && (Index == 0 || Index == ResElts);
+ return (ResElts * 2) == SrcElts && Index == ResElts;
}
MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
@@ -2505,9 +2576,7 @@ MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
!Subtarget.hasStdExtZfhminOrZhinxmin())
return MVT::f32;
- MVT PartVT = TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
-
- return PartVT;
+ return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
}
unsigned
@@ -2533,15 +2602,6 @@ unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context
return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
}
-unsigned RISCVTargetLowering::getVectorTypeBreakdownForCallingConv(
- LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
- unsigned &NumIntermediates, MVT &RegisterVT) const {
- unsigned NumRegs = TargetLowering::getVectorTypeBreakdownForCallingConv(
- Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
-
- return NumRegs;
-}
-
// Changes the condition code and swaps operands if necessary, so the SetCC
// operation matches one of the comparisons supported directly by branches
// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
@@ -4392,6 +4452,33 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
SDLoc DL(Op);
+ if (Subtarget.isRV32() && Subtarget.enablePExtCodeGen()) {
+ if (VT != MVT::v4i8)
+ return SDValue();
+
+ // <4 x i8> BUILD_VECTOR a, b, c, d -> PACK(PPACK.DH pair(a, b), pair(c, d))
+ SDValue Val0 =
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i8, Op->getOperand(0));
+ SDValue Val1 =
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i8, Op->getOperand(1));
+ SDValue Val2 =
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i8, Op->getOperand(2));
+ SDValue Val3 =
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i8, Op->getOperand(3));
+ SDValue PackDH =
+ DAG.getNode(RISCVISD::PPACK_DH, DL, {MVT::v2i16, MVT::v2i16},
+ {Val0, Val1, Val2, Val3});
+
+ return DAG.getNode(
+ ISD::BITCAST, DL, MVT::v4i8,
+ SDValue(
+ DAG.getMachineNode(
+ RISCV::PACK, DL, MVT::i32,
+ {DAG.getNode(ISD::BITCAST, DL, MVT::i32, PackDH.getValue(0)),
+ DAG.getNode(ISD::BITCAST, DL, MVT::i32, PackDH.getValue(1))}),
+ 0));
+ }
+
// Proper support for f16 requires Zvfh. bf16 always requires special
// handling. We need to cast the scalar to integer and create an integer
// build_vector.
@@ -4793,7 +4880,7 @@ static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
if (VT.isFloatingPoint()) {
if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
- EltVT == MVT::bf16) {
+ (EltVT == MVT::bf16 && !Subtarget.hasVInstructionsBF16())) {
if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
(EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
Scalar = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Scalar);
@@ -6773,6 +6860,99 @@ SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
Store->getMemOperand()->getFlags());
}
+// While RVV has alignment restrictions, we should always be able to load as a
+// legal equivalently-sized byte-typed vector instead. This method is
+// responsible for re-expressing a ISD::VP_LOAD via a correctly-aligned type. If
+// the load is already correctly-aligned, it returns SDValue().
+SDValue RISCVTargetLowering::expandUnalignedVPLoad(SDValue Op,
+ SelectionDAG &DAG) const {
+ auto *Load = cast<VPLoadSDNode>(Op);
+ assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
+
+ if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
+ Load->getMemoryVT(),
+ *Load->getMemOperand()))
+ return SDValue();
+
+ SDValue Mask = Load->getMask();
+
+ // FIXME: Handled masked loads somehow.
+ if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
+ return SDValue();
+
+ SDLoc DL(Op);
+ MVT VT = Op.getSimpleValueType();
+ unsigned EltSizeBits = VT.getScalarSizeInBits();
+ assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
+ "Unexpected unaligned RVV load type");
+ MVT NewVT =
+ MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
+ assert(NewVT.isValid() &&
+ "Expecting equally-sized RVV vector types to be legal");
+
+ SDValue VL = Load->getVectorLength();
+ VL = DAG.getNode(ISD::MUL, DL, VL.getValueType(), VL,
+ DAG.getConstant((EltSizeBits / 8), DL, VL.getValueType()));
+
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, NewVT.getVectorElementCount());
+ SDValue L = DAG.getLoadVP(NewVT, DL, Load->getChain(), Load->getBasePtr(),
+ DAG.getAllOnesConstant(DL, MaskVT), VL,
+ Load->getPointerInfo(), Load->getBaseAlign(),
+ Load->getMemOperand()->getFlags(), AAMDNodes());
+ return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
+}
+
+// While RVV has alignment restrictions, we should always be able to store as a
+// legal equivalently-sized byte-typed vector instead. This method is
+// responsible for re-expressing a ISD::VP STORE via a correctly-aligned type.
+// It returns SDValue() if the store is already correctly aligned.
+SDValue RISCVTargetLowering::expandUnalignedVPStore(SDValue Op,
+ SelectionDAG &DAG) const {
+ auto *Store = cast<VPStoreSDNode>(Op);
+ assert(Store && Store->getValue().getValueType().isVector() &&
+ "Expected vector store");
+
+ if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
+ Store->getMemoryVT(),
+ *Store->getMemOperand()))
+ return SDValue();
+
+ SDValue Mask = Store->getMask();
+
+ // FIXME: Handled masked stores somehow.
+ if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
+ return SDValue();
+
+ SDLoc DL(Op);
+ SDValue StoredVal = Store->getValue();
+ MVT VT = StoredVal.getSimpleValueType();
+ unsigned EltSizeBits = VT.getScalarSizeInBits();
+ assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
+ "Unexpected unaligned RVV store type");
+ MVT NewVT =
+ MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
+ assert(NewVT.isValid() &&
+ "Expecting equally-sized RVV vector types to be legal");
+
+ SDValue VL = Store->getVectorLength();
+ VL = DAG.getNode(ISD::MUL, DL, VL.getValueType(), VL,
+ DAG.getConstant((EltSizeBits / 8), DL, VL.getValueType()));
+
+ StoredVal = DAG.getBitcast(NewVT, StoredVal);
+
+ LocationSize Size = LocationSize::precise(NewVT.getStoreSize());
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ Store->getPointerInfo(), Store->getMemOperand()->getFlags(), Size,
+ Store->getBaseAlign());
+
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, NewVT.getVectorElementCount());
+ return DAG.getStoreVP(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
+ DAG.getUNDEF(Store->getBasePtr().getValueType()),
+ DAG.getAllOnesConstant(DL, MaskVT), VL, NewVT, MMO,
+ ISD::UNINDEXED);
+}
+
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
@@ -7546,6 +7726,19 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
}
+ if (Subtarget.enablePExtCodeGen()) {
+ bool Is32BitCast =
+ (VT == MVT::i32 && (Op0VT == MVT::v4i8 || Op0VT == MVT::v2i16)) ||
+ (Op0VT == MVT::i32 && (VT == MVT::v4i8 || VT == MVT::v2i16));
+ bool Is64BitCast =
+ (VT == MVT::i64 && (Op0VT == MVT::v8i8 || Op0VT == MVT::v4i16 ||
+ Op0VT == MVT::v2i32)) ||
+ (Op0VT == MVT::i64 &&
+ (VT == MVT::v8i8 || VT == MVT::v4i16 || VT == MVT::v2i32));
+ if (Is32BitCast || Is64BitCast)
+ return Op;
+ }
+
// Consider other scalar<->scalar casts as legal if the types are legal.
// Otherwise expand them.
if (!VT.isVector() && !Op0VT.isVector()) {
@@ -8218,6 +8411,17 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
auto *Store = cast<StoreSDNode>(Op);
SDValue StoredVal = Store->getValue();
EVT VT = StoredVal.getValueType();
+ if (Subtarget.enablePExtCodeGen()) {
+ if (VT == MVT::v2i16 || VT == MVT::v4i8) {
+ SDValue DL(Op);
+ SDValue Cast = DAG.getBitcast(MVT::i32, StoredVal);
+ SDValue NewStore =
+ DAG.getStore(Store->getChain(), DL, Cast, Store->getBasePtr(),
+ Store->getPointerInfo(), Store->getBaseAlign(),
+ Store->getMemOperand()->getFlags());
+ return NewStore;
+ }
+ }
if (VT == MVT::f64) {
assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
!Subtarget.is64Bit() && "Unexpected custom legalisation");
@@ -8245,7 +8449,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
if (Store->isTruncatingStore())
return SDValue();
- if (!Subtarget.enableUnalignedScalarMem() && Store->getAlign() < 8)
+ if (Store->getAlign() < Subtarget.getZilsdAlign())
return SDValue();
SDLoc DL(Op);
@@ -8304,13 +8508,19 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerFixedLengthVectorStoreToRVV(Op, DAG);
return Op;
}
- case ISD::MLOAD:
case ISD::VP_LOAD:
+ if (SDValue V = expandUnalignedVPLoad(Op, DAG))
+ return V;
+ [[fallthrough]];
+ case ISD::MLOAD:
return lowerMaskedLoad(Op, DAG);
case ISD::VP_LOAD_FF:
return lowerLoadFF(Op, DAG);
- case ISD::MSTORE:
case ISD::VP_STORE:
+ if (SDValue V = expandUnalignedVPStore(Op, DAG))
+ return V;
+ [[fallthrough]];
+ case ISD::MSTORE:
return lowerMaskedStore(Op, DAG);
case ISD::VECTOR_COMPRESS:
return lowerVectorCompress(Op, DAG);
@@ -8398,6 +8608,18 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::VSELECT:
return lowerToScalableOp(Op, DAG);
case ISD::SHL:
+ if (Subtarget.enablePExtCodeGen() &&
+ Op.getSimpleValueType().isFixedLengthVector()) {
+ // We have patterns for scalar/immediate shift amount, so no lowering
+ // needed.
+ if (Op.getOperand(1)->getOpcode() == ISD::SPLAT_VECTOR)
+ return Op;
+
+ // There's no vector-vector version of shift instruction in P extension so
+ // we need to unroll to scalar computation and pack them back.
+ return DAG.UnrollVectorOp(Op.getNode());
+ }
+ [[fallthrough]];
case ISD::SRA:
case ISD::SRL:
if (Op.getSimpleValueType().isFixedLengthVector())
@@ -8654,8 +8876,6 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerVPSpliceExperimental(Op, DAG);
case ISD::EXPERIMENTAL_VP_REVERSE:
return lowerVPReverseExperimental(Op, DAG);
- case ISD::EXPERIMENTAL_VP_SPLAT:
- return lowerVPSplatExperimental(Op, DAG);
case ISD::CLEAR_CACHE: {
assert(getTargetMachine().getTargetTriple().isOSLinux() &&
"llvm.clear_cache only needs custom lower on Linux targets");
@@ -9315,7 +9535,7 @@ static SDValue lowerSelectToBinOp(SDNode *N, SelectionDAG &DAG,
static SDValue
foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
- if (Subtarget.hasShortForwardBranchOpt())
+ if (Subtarget.hasShortForwardBranchIALU())
return SDValue();
unsigned SelOpNo = 0;
@@ -9390,6 +9610,50 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
if (SDValue V = lowerSelectToBinOp(Op.getNode(), DAG, Subtarget))
return V;
+ // When there is no cost for GPR <-> FPR, we can use zicond select for
+ // floating value when CondV is int type
+ bool FPinGPR = Subtarget.hasStdExtZfinx();
+
+ // We can handle FGPR without spliting into hi/lo parts
+ bool FitsInGPR = TypeSize::isKnownLE(VT.getSizeInBits(),
+ Subtarget.getXLenVT().getSizeInBits());
+
+ bool UseZicondForFPSel = Subtarget.hasStdExtZicond() && FPinGPR &&
+ VT.isFloatingPoint() && FitsInGPR;
+
+ if (UseZicondForFPSel) {
+
+ auto CastToInt = [&](SDValue V) -> SDValue {
+ // Treat +0.0 as int 0 to enable single 'czero' instruction generation.
+ if (isNullFPConstant(V))
+ return DAG.getConstant(0, DL, XLenVT);
+
+ if (VT == MVT::f16)
+ return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, V);
+
+ if (VT == MVT::f32 && Subtarget.is64Bit())
+ return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, V);
+
+ return DAG.getBitcast(XLenVT, V);
+ };
+
+ SDValue TrueVInt = CastToInt(TrueV);
+ SDValue FalseVInt = CastToInt(FalseV);
+
+ // Emit integer SELECT (lowers to Zicond)
+ SDValue ResultInt =
+ DAG.getNode(ISD::SELECT, DL, XLenVT, CondV, TrueVInt, FalseVInt);
+
+ // Convert back to floating VT
+ if (VT == MVT::f32 && Subtarget.is64Bit())
+ return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, VT, ResultInt);
+
+ if (VT == MVT::f16)
+ return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, ResultInt);
+
+ return DAG.getBitcast(VT, ResultInt);
+ }
+
// When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
// nodes to implement the SELECT. Performing the lowering here allows for
// greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
@@ -10254,7 +10518,7 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
}
if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
- ValVT == MVT::bf16) {
+ (ValVT == MVT::bf16 && !Subtarget.hasVInstructionsBF16())) {
// If we don't have vfmv.s.f for f16/bf16, use fmv.x.h first.
MVT IntVT = VecVT.changeTypeToInteger();
SDValue IntInsert = DAG.getNode(
@@ -10491,7 +10755,7 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
}
if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
- EltVT == MVT::bf16) {
+ (EltVT == MVT::bf16 && !Subtarget.hasVInstructionsBF16())) {
// If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x
MVT IntVT = VecVT.changeTypeToInteger();
SDValue IntVec = DAG.getBitcast(IntVT, Vec);
@@ -10500,6 +10764,17 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
return DAG.getNode(RISCVISD::FMV_H_X, DL, EltVT, IntExtract);
}
+ if (Subtarget.enablePExtCodeGen() && VecVT.isFixedLengthVector()) {
+ if (VecVT != MVT::v4i16 && VecVT != MVT::v2i16 && VecVT != MVT::v8i8 &&
+ VecVT != MVT::v4i8 && VecVT != MVT::v2i32)
+ return SDValue();
+ SDValue Extracted = DAG.getBitcast(XLenVT, Vec);
+ unsigned ElemWidth = VecVT.getVectorElementType().getSizeInBits();
+ SDValue Shamt = DAG.getNode(ISD::MUL, DL, XLenVT, Idx,
+ DAG.getConstant(ElemWidth, DL, XLenVT));
+ return DAG.getNode(ISD::SRL, DL, XLenVT, Extracted, Shamt);
+ }
+
// If this is a fixed vector, we need to convert it to a scalable vector.
MVT ContainerVT = VecVT;
if (VecVT.isFixedLengthVector()) {
@@ -12534,10 +12809,7 @@ SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
SmallVector<SDValue, 8> Loads(Factor);
- SDValue Increment =
- DAG.getVScale(DL, PtrVT,
- APInt(PtrVT.getFixedSizeInBits(),
- VecVT.getStoreSize().getKnownMinValue()));
+ SDValue Increment = DAG.getTypeSize(DL, PtrVT, VecVT.getStoreSize());
for (unsigned i = 0; i != Factor; ++i) {
if (i != 0)
StackPtr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, Increment);
@@ -13823,47 +14095,6 @@ RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
return convertFromScalableVector(VT, Result, DAG, Subtarget);
}
-SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
- SelectionDAG &DAG) const {
- SDLoc DL(Op);
- SDValue Val = Op.getOperand(0);
- SDValue Mask = Op.getOperand(1);
- SDValue VL = Op.getOperand(2);
- MVT VT = Op.getSimpleValueType();
-
- MVT ContainerVT = VT;
- if (VT.isFixedLengthVector()) {
- ContainerVT = getContainerForFixedLengthVector(VT);
- MVT MaskVT = getMaskTypeFor(ContainerVT);
- Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
- }
-
- SDValue Result;
- if (VT.getScalarType() == MVT::i1) {
- if (auto *C = dyn_cast<ConstantSDNode>(Val)) {
- Result =
- DAG.getNode(C->isZero() ? RISCVISD::VMCLR_VL : RISCVISD::VMSET_VL, DL,
- ContainerVT, VL);
- } else {
- MVT WidenVT = ContainerVT.changeVectorElementType(MVT::i8);
- SDValue LHS =
- DAG.getNode(RISCVISD::VMV_V_X_VL, DL, WidenVT, DAG.getUNDEF(WidenVT),
- DAG.getZExtOrTrunc(Val, DL, Subtarget.getXLenVT()), VL);
- SDValue RHS = DAG.getConstant(0, DL, WidenVT);
- Result = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
- {LHS, RHS, DAG.getCondCode(ISD::SETNE),
- DAG.getUNDEF(ContainerVT), Mask, VL});
- }
- } else {
- Result =
- lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
- }
-
- if (!VT.isFixedLengthVector())
- return Result;
- return convertFromScalableVector(VT, Result, DAG, Subtarget);
-}
-
SDValue
RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
SelectionDAG &DAG) const {
@@ -13935,9 +14166,8 @@ RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
// Slide off any elements from past EVL that were reversed into the low
// elements.
- unsigned MinElts = GatherVT.getVectorMinNumElements();
SDValue VLMax =
- DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
+ DAG.getElementCount(DL, XLenVT, GatherVT.getVectorElementCount());
SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
@@ -14627,7 +14857,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
"Unexpected custom legalisation");
- if (!Subtarget.enableUnalignedScalarMem() && Ld->getAlign() < 8)
+ if (Ld->getAlign() < Subtarget.getZilsdAlign())
return;
SDLoc DL(N);
@@ -14752,24 +14982,6 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
bool IsCTZ =
N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
- // Without Zbb, lower as 32 - clzw(~X & (X-1))
- if (IsCTZ && !Subtarget.hasStdExtZbb()) {
- assert(Subtarget.hasStdExtP());
-
- NewOp0 = DAG.getFreeze(NewOp0);
- SDValue Not = DAG.getNOT(DL, NewOp0, MVT::i64);
- SDValue Minus1 = DAG.getNode(ISD::SUB, DL, MVT::i64, NewOp0,
- DAG.getConstant(1, DL, MVT::i64));
- SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Not, Minus1);
- SDValue CLZW = DAG.getNode(RISCVISD::CLZW, DL, MVT::i64, And);
- SDValue Sub = DAG.getNode(ISD::SUB, DL, MVT::i64,
- DAG.getConstant(32, DL, MVT::i64), CLZW);
- SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Sub,
- DAG.getValueType(MVT::i32));
- Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
- return;
- }
-
unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
@@ -14997,6 +15209,21 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
break;
}
+ case RISCVISD::PASUB:
+ case RISCVISD::PASUBU: {
+ MVT VT = N->getSimpleValueType(0);
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ assert(VT == MVT::v2i16 || VT == MVT::v4i8);
+ MVT NewVT = MVT::v4i16;
+ if (VT == MVT::v4i8)
+ NewVT = MVT::v8i8;
+ SDValue Undef = DAG.getUNDEF(VT);
+ Op0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, NewVT, {Op0, Undef});
+ Op1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, NewVT, {Op1, Undef});
+ Results.push_back(DAG.getNode(N->getOpcode(), DL, NewVT, {Op0, Op1}));
+ return;
+ }
case ISD::EXTRACT_VECTOR_ELT: {
// Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
// type is illegal (currently only vXi64 RV32).
@@ -16104,11 +16331,84 @@ static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);
}
+// Handle P extension averaging subtraction pattern:
+// (vXiY (trunc (srl (sub ([s|z]ext vXiY:$a), ([s|z]ext vXiY:$b)), 1)))
+// -> PASUB/PASUBU
+static SDValue combinePExtTruncate(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ if (N0.getOpcode() != ISD::SRL)
+ return SDValue();
+
+ MVT VecVT = VT.getSimpleVT();
+ if (VecVT != MVT::v4i16 && VecVT != MVT::v2i16 && VecVT != MVT::v8i8 &&
+ VecVT != MVT::v4i8 && VecVT != MVT::v2i32)
+ return SDValue();
+
+ // Check if shift amount is 1
+ SDValue ShAmt = N0.getOperand(1);
+ if (ShAmt.getOpcode() != ISD::BUILD_VECTOR)
+ return SDValue();
+
+ BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(ShAmt.getNode());
+ if (!BV)
+ return SDValue();
+ SDValue Splat = BV->getSplatValue();
+ if (!Splat)
+ return SDValue();
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Splat);
+ if (!C)
+ return SDValue();
+ if (C->getZExtValue() != 1)
+ return SDValue();
+
+ // Check for SUB operation
+ SDValue Sub = N0.getOperand(0);
+ if (Sub.getOpcode() != ISD::SUB)
+ return SDValue();
+
+ SDValue LHS = Sub.getOperand(0);
+ SDValue RHS = Sub.getOperand(1);
+
+ // Check if both operands are sign/zero extends from the target
+ // type
+ bool IsSignExt = LHS.getOpcode() == ISD::SIGN_EXTEND &&
+ RHS.getOpcode() == ISD::SIGN_EXTEND;
+ bool IsZeroExt = LHS.getOpcode() == ISD::ZERO_EXTEND &&
+ RHS.getOpcode() == ISD::ZERO_EXTEND;
+
+ if (!IsSignExt && !IsZeroExt)
+ return SDValue();
+
+ SDValue A = LHS.getOperand(0);
+ SDValue B = RHS.getOperand(0);
+
+ // Check if the extends are from our target vector type
+ if (A.getValueType() != VT || B.getValueType() != VT)
+ return SDValue();
+
+ // Determine the instruction based on type and signedness
+ unsigned Opc;
+ if (IsSignExt)
+ Opc = RISCVISD::PASUB;
+ else if (IsZeroExt)
+ Opc = RISCVISD::PASUBU;
+ else
+ return SDValue();
+
+ // Create the machine node directly
+ return DAG.getNode(Opc, SDLoc(N), VT, {A, B});
+}
+
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ if (VT.isFixedLengthVector() && Subtarget.enablePExtCodeGen())
+ return combinePExtTruncate(N, DAG, Subtarget);
+
// Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
// extending X. This is safe since we only need the LSB after the shift and
// shift amounts larger than 31 would produce poison. If we wait until
@@ -16591,22 +16891,33 @@ static SDValue expandMulToNAFSequence(SDNode *N, SelectionDAG &DAG,
static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG,
uint64_t MulAmt) {
uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
+ SDValue X = N->getOperand(0);
ISD::NodeType Op;
uint64_t ShiftAmt1;
- if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
- Op = ISD::SUB;
- ShiftAmt1 = MulAmt + MulAmtLowBit;
- } else if (isPowerOf2_64(MulAmt - MulAmtLowBit)) {
+ bool CanSub = isPowerOf2_64(MulAmt + MulAmtLowBit);
+ auto PreferSub = [X, MulAmtLowBit]() {
+ // For MulAmt == 3 << M both (X << M + 2) - (X << M)
+ // and (X << M + 1) + (X << M) are valid expansions.
+ // Prefer SUB if we can get (X << M + 2) for free,
+ // because X is exact (Y >> M + 2).
+ uint64_t ShAmt = Log2_64(MulAmtLowBit) + 2;
+ using namespace SDPatternMatch;
+ return sd_match(X, m_ExactSr(m_Value(), m_SpecificInt(ShAmt)));
+ };
+ if (isPowerOf2_64(MulAmt - MulAmtLowBit) && !(CanSub && PreferSub())) {
Op = ISD::ADD;
ShiftAmt1 = MulAmt - MulAmtLowBit;
+ } else if (CanSub) {
+ Op = ISD::SUB;
+ ShiftAmt1 = MulAmt + MulAmtLowBit;
} else {
return SDValue();
}
EVT VT = N->getValueType(0);
SDLoc DL(N);
- SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, X,
DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
- SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, X,
DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
return DAG.getNode(Op, DL, VT, Shift1, Shift2);
}
@@ -16616,10 +16927,13 @@ static SDValue getShlAddShlAdd(SDNode *N, SelectionDAG &DAG, unsigned ShX,
SDLoc DL(N);
EVT VT = N->getValueType(0);
SDValue X = N->getOperand(0);
- // Put the shift first if we can fold a zext into the shift forming a slli.uw.
+ // Put the shift first if we can fold:
+ // a. a zext into the shift forming a slli.uw
+ // b. an exact shift right forming one shorter shift or no shift at all
using namespace SDPatternMatch;
if (Shift != 0 &&
- sd_match(X, m_And(m_Value(), m_SpecificInt(UINT64_C(0xffffffff))))) {
+ sd_match(X, m_AnyOf(m_And(m_Value(), m_SpecificInt(UINT64_C(0xffffffff))),
+ m_ExactSr(m_Value(), m_ConstInt())))) {
X = DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(Shift, DL, VT));
Shift = 0;
}
@@ -16660,12 +16974,23 @@ static SDValue expandMulToShlAddShlAdd(SDNode *N, SelectionDAG &DAG,
break;
}
- // 2/4/8 * 3/5/9 + 1 -> (shXadd (shYadd X, X), X)
int ShX;
if (int ShY = isShifted359(MulAmt - 1, ShX)) {
assert(ShX != 0 && "MulAmt=4,6,10 handled before");
+ // 2/4/8 * 3/5/9 + 1 -> (shXadd (shYadd X, X), X)
if (ShX <= 3)
return getShlAddShlAdd(N, DAG, ShX, ShY, /*AddX=*/true, Shift);
+ // 2^N * 3/5/9 + 1 -> (add (shYadd (shl X, N), (shl X, N)), X)
+ if (Shift == 0) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ SDValue X = N->getOperand(0);
+ SDValue Shl =
+ DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShX, DL, VT));
+ SDValue ShlAdd = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
+ DAG.getTargetConstant(ShY, DL, VT), Shl);
+ return DAG.getNode(ISD::ADD, DL, VT, ShlAdd, X);
+ }
}
return SDValue();
}
@@ -16726,7 +17051,7 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
DAG.getTargetConstant(Shift, DL, VT), Shift1);
}
- // TODO: 2^(C1>3) * 3,5,9 +/- 1
+ // TODO: 2^(C1>3) * 3/5/9 - 1
// 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
@@ -18076,8 +18401,7 @@ static SDValue combineOp_VLToVWOp_VL(SDNode *N,
}
}
for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
- DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
- DCI.AddToWorklist(OldNewValues.second.getNode());
+ DCI.CombineTo(OldNewValues.first.getNode(), OldNewValues.second);
}
return InputRootReplacement;
}
@@ -20534,7 +20858,8 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
// Undo this and sink the fneg so we match more fmsub/fnmadd patterns.
if (sd_match(N, m_FMul(m_Value(X), m_OneUse(m_FNeg(m_Value(Y))))))
return DAG.getNode(ISD::FNEG, DL, VT,
- DAG.getNode(ISD::FMUL, DL, VT, X, Y));
+ DAG.getNode(ISD::FMUL, DL, VT, X, Y, N->getFlags()),
+ N->getFlags());
// fmul X, (copysign 1.0, Y) -> fsgnjx X, Y
SDValue N0 = N->getOperand(0);
@@ -20655,7 +20980,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
// (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
// (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
- if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
+ if (!Subtarget.hasShortForwardBranchIALU() && isa<ConstantSDNode>(TrueV) &&
isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
(CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
if (CCVal == ISD::CondCode::SETGE)
@@ -21514,6 +21839,49 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return N->getOperand(0);
break;
}
+ case RISCVISD::VMERGE_VL: {
+ // vmerge_vl allones, x, y, passthru, vl -> vmv_v_v passthru, x, vl
+ SDValue Mask = N->getOperand(0);
+ SDValue True = N->getOperand(1);
+ SDValue Passthru = N->getOperand(3);
+ SDValue VL = N->getOperand(4);
+
+ // Fixed vectors are wrapped in scalable containers, unwrap them.
+ using namespace SDPatternMatch;
+ SDValue SubVec;
+ if (sd_match(Mask, m_InsertSubvector(m_Undef(), m_Value(SubVec), m_Zero())))
+ Mask = SubVec;
+
+ if (!isOneOrOneSplat(Mask))
+ break;
+
+ return DAG.getNode(RISCVISD::VMV_V_V_VL, SDLoc(N), N->getSimpleValueType(0),
+ Passthru, True, VL);
+ }
+ case RISCVISD::VMV_V_V_VL: {
+ // vmv_v_v passthru, splat(x), vl -> vmv_v_x passthru, x, vl
+ SDValue Passthru = N->getOperand(0);
+ SDValue Src = N->getOperand(1);
+ SDValue VL = N->getOperand(2);
+
+ // Fixed vectors are wrapped in scalable containers, unwrap them.
+ using namespace SDPatternMatch;
+ SDValue SubVec;
+ if (sd_match(Src, m_InsertSubvector(m_Undef(), m_Value(SubVec), m_Zero())))
+ Src = SubVec;
+
+ SDValue SplatVal = DAG.getSplatValue(Src);
+ if (!SplatVal)
+ break;
+ MVT VT = N->getSimpleValueType(0);
+ return lowerScalarSplat(Passthru, SplatVal, VL, VT, SDLoc(N), DAG,
+ Subtarget);
+ }
+ case RISCVISD::VSLIDEDOWN_VL:
+ case RISCVISD::VSLIDEUP_VL:
+ if (N->getOperand(1)->isUndef())
+ return N->getOperand(0);
+ break;
case RISCVISD::VSLIDE1UP_VL:
case RISCVISD::VFSLIDE1UP_VL: {
using namespace SDPatternMatch;
@@ -22203,8 +22571,7 @@ static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
MachineFunction &MF = *BB->getParent();
DebugLoc DL = MI.getDebugLoc();
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
- const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
+ const RISCVInstrInfo &TII = *MF.getSubtarget<RISCVSubtarget>().getInstrInfo();
Register LoReg = MI.getOperand(0).getReg();
Register HiReg = MI.getOperand(1).getReg();
Register SrcReg = MI.getOperand(2).getReg();
@@ -22213,7 +22580,7 @@ static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
- RI, Register());
+ Register());
MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
MachineMemOperand *MMOLo =
MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
@@ -22239,8 +22606,7 @@ static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
MachineFunction &MF = *BB->getParent();
DebugLoc DL = MI.getDebugLoc();
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
- const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
+ const RISCVInstrInfo &TII = *MF.getSubtarget<RISCVSubtarget>().getInstrInfo();
Register DstReg = MI.getOperand(0).getReg();
Register LoReg = MI.getOperand(1).getReg();
Register HiReg = MI.getOperand(2).getReg();
@@ -22263,7 +22629,7 @@ static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
.addFrameIndex(FI)
.addImm(4)
.addMemOperand(MMOHi);
- TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
+ TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, Register());
MI.eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
@@ -23957,14 +24323,15 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
break;
}
} else if (Constraint == "vr") {
+ // Check VM first so that mask types will use that instead of VR.
for (const auto *RC :
- {&RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass,
- &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN3M1RegClass,
- &RISCV::VRN4M1RegClass, &RISCV::VRN5M1RegClass,
- &RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass,
- &RISCV::VRN8M1RegClass, &RISCV::VRN2M2RegClass,
- &RISCV::VRN3M2RegClass, &RISCV::VRN4M2RegClass,
- &RISCV::VRN2M4RegClass}) {
+ {&RISCV::VMRegClass, &RISCV::VRRegClass, &RISCV::VRM2RegClass,
+ &RISCV::VRM4RegClass, &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass,
+ &RISCV::VRN3M1RegClass, &RISCV::VRN4M1RegClass,
+ &RISCV::VRN5M1RegClass, &RISCV::VRN6M1RegClass,
+ &RISCV::VRN7M1RegClass, &RISCV::VRN8M1RegClass,
+ &RISCV::VRN2M2RegClass, &RISCV::VRN3M2RegClass,
+ &RISCV::VRN4M2RegClass, &RISCV::VRN2M4RegClass}) {
if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
return std::make_pair(0U, RC);
@@ -23975,15 +24342,16 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
}
}
} else if (Constraint == "vd") {
+ // Check VMNoV0 first so that mask types will use that instead of VRNoV0.
for (const auto *RC :
- {&RISCV::VRNoV0RegClass, &RISCV::VRM2NoV0RegClass,
- &RISCV::VRM4NoV0RegClass, &RISCV::VRM8NoV0RegClass,
- &RISCV::VRN2M1NoV0RegClass, &RISCV::VRN3M1NoV0RegClass,
- &RISCV::VRN4M1NoV0RegClass, &RISCV::VRN5M1NoV0RegClass,
- &RISCV::VRN6M1NoV0RegClass, &RISCV::VRN7M1NoV0RegClass,
- &RISCV::VRN8M1NoV0RegClass, &RISCV::VRN2M2NoV0RegClass,
- &RISCV::VRN3M2NoV0RegClass, &RISCV::VRN4M2NoV0RegClass,
- &RISCV::VRN2M4NoV0RegClass}) {
+ {&RISCV::VMNoV0RegClass, &RISCV::VRNoV0RegClass,
+ &RISCV::VRM2NoV0RegClass, &RISCV::VRM4NoV0RegClass,
+ &RISCV::VRM8NoV0RegClass, &RISCV::VRN2M1NoV0RegClass,
+ &RISCV::VRN3M1NoV0RegClass, &RISCV::VRN4M1NoV0RegClass,
+ &RISCV::VRN5M1NoV0RegClass, &RISCV::VRN6M1NoV0RegClass,
+ &RISCV::VRN7M1NoV0RegClass, &RISCV::VRN8M1NoV0RegClass,
+ &RISCV::VRN2M2NoV0RegClass, &RISCV::VRN3M2NoV0RegClass,
+ &RISCV::VRN4M2NoV0RegClass, &RISCV::VRN2M4NoV0RegClass}) {
if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
return std::make_pair(0U, RC);
@@ -25043,6 +25411,22 @@ bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType,
return true;
}
+bool RISCVTargetLowering::isLegalFirstFaultLoad(EVT DataType,
+ Align Alignment) const {
+ if (!Subtarget.hasVInstructions())
+ return false;
+
+ EVT ScalarType = DataType.getScalarType();
+ if (!isLegalElementTypeForRVV(ScalarType))
+ return false;
+
+ if (!Subtarget.enableUnalignedVectorMem() &&
+ Alignment < ScalarType.getStoreSize())
+ return false;
+
+ return true;
+}
+
MachineInstr *
RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB,
MachineBasicBlock::instr_iterator &MBBI,
@@ -25191,8 +25575,10 @@ bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
if (auto *II = dyn_cast<IntrinsicInst>(&Inst)) {
// Mark RVV intrinsic as supported.
if (RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(II->getIntrinsicID())) {
- // GISel doesn't support tuple types yet.
- if (Inst.getType()->isRISCVVectorTupleTy())
+ // GISel doesn't support tuple types yet. It also doesn't suport returning
+ // a struct containing a scalable vector like vleff.
+ if (Inst.getType()->isRISCVVectorTupleTy() ||
+ Inst.getType()->isStructTy())
return true;
for (unsigned i = 0; i < II->arg_size(); ++i)
@@ -25201,6 +25587,8 @@ bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
return false;
}
+ if (II->getIntrinsicID() == Intrinsic::vector_extract)
+ return false;
}
if (Inst.getType()->isScalableTy())
@@ -25228,7 +25616,7 @@ RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
return SDValue(N, 0); // Lower SDIV as SDIV
// Only perform this transform if short forward branch opt is supported.
- if (!Subtarget.hasShortForwardBranchOpt())
+ if (!Subtarget.hasShortForwardBranchIALU())
return SDValue();
EVT VT = N->getValueType(0);
if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
@@ -25434,3 +25822,17 @@ bool RISCVTargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const {
return VT.getSizeInBits() <= Subtarget.getXLen();
}
+
+bool RISCVTargetLowering::isReassocProfitable(SelectionDAG &DAG, SDValue N0,
+ SDValue N1) const {
+ if (!N0.hasOneUse())
+ return false;
+
+ // Avoid reassociating expressions that can be lowered to vector
+ // multiply accumulate (i.e. add (mul x, y), z)
+ if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::MUL &&
+ (N0.getValueType().isVector() && Subtarget.hasVInstructions()))
+ return false;
+
+ return true;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index dd62a9c..a7db946 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -35,7 +35,7 @@ public:
const RISCVSubtarget &getSubtarget() const { return Subtarget; }
- bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+ bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I,
MachineFunction &MF,
unsigned Intrinsic) const override;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
@@ -71,6 +71,9 @@ public:
bool preferScalarizeSplat(SDNode *N) const override;
+ /// Customize the preferred legalization strategy for certain types.
+ LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
+
bool softPromoteHalfType() const override { return true; }
/// Return the register type for a given MVT, ensuring vectors are treated
@@ -89,12 +92,6 @@ public:
CallingConv::ID CC,
EVT VT) const override;
- unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context,
- CallingConv::ID CC, EVT VT,
- EVT &IntermediateVT,
- unsigned &NumIntermediates,
- MVT &RegisterVT) const override;
-
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT,
unsigned SelectOpcode, SDValue X,
SDValue Y) const override;
@@ -426,6 +423,10 @@ public:
/// alignment is legal.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const;
+ /// Return true if a fault-only-first load of the given result type and
+ /// alignment is legal.
+ bool isLegalFirstFaultLoad(EVT DataType, Align Alignment) const;
+
unsigned getMaxSupportedInterleaveFactor() const override { return 8; }
bool fallBackToDAGISel(const Instruction &Inst) const override;
@@ -467,6 +468,11 @@ public:
bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
+ /// Control the following reassociation of operands: (op (op x, c1), y) -> (op
+ /// (op x, y), c1) where N0 is (op x, c1) and N1 is y.
+ bool isReassocProfitable(SelectionDAG &DAG, SDValue N0,
+ SDValue N1) const override;
+
/// Match a mask which "spreads" the leading elements of a vector evenly
/// across the result. Factor is the spread amount, and Index is the
/// offset applied.
@@ -545,7 +551,6 @@ private:
SDValue lowerVPExtMaskOp(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPSetCCMaskOp(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPMergeMask(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerVPSplatExperimental(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPSpliceExperimental(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPReverseExperimental(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG) const;
@@ -573,6 +578,9 @@ private:
SDValue expandUnalignedRVVLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue expandUnalignedRVVStore(SDValue Op, SelectionDAG &DAG) const;
+ SDValue expandUnalignedVPLoad(SDValue Op, SelectionDAG &DAG) const;
+ SDValue expandUnalignedVPStore(SDValue Op, SelectionDAG &DAG) const;
+
SDValue lowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerPARTIAL_REDUCE_MLA(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/RISCV/RISCVIndirectBranchTracking.cpp b/llvm/lib/Target/RISCV/RISCVIndirectBranchTracking.cpp
index 9664ab3..0fc139a 100644
--- a/llvm/lib/Target/RISCV/RISCVIndirectBranchTracking.cpp
+++ b/llvm/lib/Target/RISCV/RISCVIndirectBranchTracking.cpp
@@ -16,11 +16,12 @@
#include "RISCVInstrInfo.h"
#include "RISCVSubtarget.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#define DEBUG_TYPE "riscv-indrect-branch-tracking"
+#define DEBUG_TYPE "riscv-indirect-branch-tracking"
#define PASS_NAME "RISC-V Indirect Branch Tracking"
using namespace llvm;
@@ -54,13 +55,25 @@ FunctionPass *llvm::createRISCVIndirectBranchTrackingPass() {
return new RISCVIndirectBranchTracking();
}
-static void emitLpad(MachineBasicBlock &MBB, const RISCVInstrInfo *TII,
- uint32_t Label) {
- auto I = MBB.begin();
+static void
+emitLpad(MachineBasicBlock &MBB, const RISCVInstrInfo *TII, uint32_t Label,
+ MachineBasicBlock::iterator I = MachineBasicBlock::iterator{}) {
+ if (!I.isValid())
+ I = MBB.begin();
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(RISCV::AUIPC), RISCV::X0)
.addImm(Label);
}
+static bool isCallReturnTwice(const MachineOperand &MOp) {
+ if (!MOp.isGlobal())
+ return false;
+ auto *CalleeFn = dyn_cast<Function>(MOp.getGlobal());
+ if (!CalleeFn)
+ return false;
+ AttributeList Attrs = CalleeFn->getAttributes();
+ return Attrs.hasFnAttr(Attribute::ReturnsTwice);
+}
+
bool RISCVIndirectBranchTracking::runOnMachineFunction(MachineFunction &MF) {
const auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
@@ -100,5 +113,18 @@ bool RISCVIndirectBranchTracking::runOnMachineFunction(MachineFunction &MF) {
}
}
+ // Check for calls to functions with ReturnsTwice attribute and insert
+ // LPAD after such calls
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
+ if (I->isCall() && I->getNumOperands() > 0 &&
+ isCallReturnTwice(I->getOperand(0))) {
+ auto NextI = std::next(I);
+ emitLpad(MBB, TII, FixedLabel, NextI);
+ Changed = true;
+ }
+ }
+ }
+
return Changed;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index bf9de0a..b1ba870 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -519,13 +519,13 @@ class VSETVLIInfo {
unsigned AVLImm;
};
- enum : uint8_t {
+ enum class AVLState : uint8_t {
Uninitialized,
AVLIsReg,
AVLIsImm,
AVLIsVLMAX,
Unknown, // AVL and VTYPE are fully unknown
- } State = Uninitialized;
+ } State = AVLState::Uninitialized;
// Fields from VTYPE.
RISCVVType::VLMUL VLMul = RISCVVType::LMUL_1;
@@ -539,7 +539,7 @@ class VSETVLIInfo {
public:
VSETVLIInfo()
: AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
- SEWLMULRatioOnly(false) {}
+ SEWLMULRatioOnly(false), AltFmt(false), TWiden(0) {}
static VSETVLIInfo getUnknown() {
VSETVLIInfo Info;
@@ -547,27 +547,27 @@ public:
return Info;
}
- bool isValid() const { return State != Uninitialized; }
- void setUnknown() { State = Unknown; }
- bool isUnknown() const { return State == Unknown; }
+ bool isValid() const { return State != AVLState::Uninitialized; }
+ void setUnknown() { State = AVLState::Unknown; }
+ bool isUnknown() const { return State == AVLState::Unknown; }
void setAVLRegDef(const VNInfo *VNInfo, Register AVLReg) {
assert(AVLReg.isVirtual());
AVLRegDef.ValNo = VNInfo;
AVLRegDef.DefReg = AVLReg;
- State = AVLIsReg;
+ State = AVLState::AVLIsReg;
}
void setAVLImm(unsigned Imm) {
AVLImm = Imm;
- State = AVLIsImm;
+ State = AVLState::AVLIsImm;
}
- void setAVLVLMAX() { State = AVLIsVLMAX; }
+ void setAVLVLMAX() { State = AVLState::AVLIsVLMAX; }
- bool hasAVLImm() const { return State == AVLIsImm; }
- bool hasAVLReg() const { return State == AVLIsReg; }
- bool hasAVLVLMAX() const { return State == AVLIsVLMAX; }
+ bool hasAVLImm() const { return State == AVLState::AVLIsImm; }
+ bool hasAVLReg() const { return State == AVLState::AVLIsReg; }
+ bool hasAVLVLMAX() const { return State == AVLState::AVLIsVLMAX; }
Register getAVLReg() const {
assert(hasAVLReg() && AVLRegDef.DefReg.isVirtual());
return AVLRegDef.DefReg;
@@ -607,12 +607,36 @@ public:
}
}
- unsigned getSEW() const { return SEW; }
- RISCVVType::VLMUL getVLMUL() const { return VLMul; }
- bool getTailAgnostic() const { return TailAgnostic; }
- bool getMaskAgnostic() const { return MaskAgnostic; }
- bool getAltFmt() const { return AltFmt; }
- unsigned getTWiden() const { return TWiden; }
+ unsigned getSEW() const {
+ assert(isValid() && !isUnknown() &&
+ "Can't use VTYPE for uninitialized or unknown");
+ return SEW;
+ }
+ RISCVVType::VLMUL getVLMUL() const {
+ assert(isValid() && !isUnknown() &&
+ "Can't use VTYPE for uninitialized or unknown");
+ return VLMul;
+ }
+ bool getTailAgnostic() const {
+ assert(isValid() && !isUnknown() &&
+ "Can't use VTYPE for uninitialized or unknown");
+ return TailAgnostic;
+ }
+ bool getMaskAgnostic() const {
+ assert(isValid() && !isUnknown() &&
+ "Can't use VTYPE for uninitialized or unknown");
+ return MaskAgnostic;
+ }
+ bool getAltFmt() const {
+ assert(isValid() && !isUnknown() &&
+ "Can't use VTYPE for uninitialized or unknown");
+ return AltFmt;
+ }
+ unsigned getTWiden() const {
+ assert(isValid() && !isUnknown() &&
+ "Can't use VTYPE for uninitialized or unknown");
+ return TWiden;
+ }
bool hasNonZeroAVL(const LiveIntervals *LIS) const {
if (hasAVLImm())
@@ -837,35 +861,44 @@ public:
/// Implement operator<<.
/// @{
void print(raw_ostream &OS) const {
- OS << "{";
- if (!isValid())
+ OS << '{';
+ switch (State) {
+ case AVLState::Uninitialized:
OS << "Uninitialized";
- if (isUnknown())
+ break;
+ case AVLState::Unknown:
OS << "unknown";
- if (hasAVLReg())
+ break;
+ case AVLState::AVLIsReg:
OS << "AVLReg=" << llvm::printReg(getAVLReg());
- if (hasAVLImm())
+ break;
+ case AVLState::AVLIsImm:
OS << "AVLImm=" << (unsigned)AVLImm;
- if (hasAVLVLMAX())
+ break;
+ case AVLState::AVLIsVLMAX:
OS << "AVLVLMAX";
- OS << ", ";
+ break;
+ }
+ if (isValid() && !isUnknown()) {
+ OS << ", ";
+
+ unsigned LMul;
+ bool Fractional;
+ std::tie(LMul, Fractional) = decodeVLMUL(VLMul);
+
+ OS << "VLMul=m";
+ if (Fractional)
+ OS << 'f';
+ OS << LMul << ", "
+ << "SEW=e" << (unsigned)SEW << ", "
+ << "TailAgnostic=" << (bool)TailAgnostic << ", "
+ << "MaskAgnostic=" << (bool)MaskAgnostic << ", "
+ << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << ", "
+ << "TWiden=" << (unsigned)TWiden << ", "
+ << "AltFmt=" << (bool)AltFmt;
+ }
- unsigned LMul;
- bool Fractional;
- std::tie(LMul, Fractional) = decodeVLMUL(VLMul);
-
- OS << "VLMul=";
- if (Fractional)
- OS << "mf";
- else
- OS << "m";
- OS << LMul << ", "
- << "SEW=e" << (unsigned)SEW << ", "
- << "TailAgnostic=" << (bool)TailAgnostic << ", "
- << "MaskAgnostic=" << (bool)MaskAgnostic << ", "
- << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << ", "
- << "TWiden=" << (unsigned)TWiden << ", "
- << "AltFmt=" << (bool)AltFmt << "}";
+ OS << '}';
}
#endif
};
@@ -1755,6 +1788,14 @@ bool RISCVInsertVSETVLI::canMutatePriorConfig(
if (!VNI || !PrevVNI || VNI != PrevVNI)
return false;
}
+
+ // If we define VL and need to move the definition up, check we can extend
+ // the live interval upwards from MI to PrevMI.
+ Register VL = MI.getOperand(0).getReg();
+ if (VL.isVirtual() && LIS &&
+ LIS->getInterval(VL).overlaps(LIS->getInstructionIndex(PrevMI),
+ LIS->getInstructionIndex(MI)))
+ return false;
}
assert(PrevMI.getOperand(2).isImm() && MI.getOperand(2).isImm());
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index b05956b..a3bacfb 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -82,8 +82,9 @@ namespace llvm::RISCV {
} // end namespace llvm::RISCV
RISCVInstrInfo::RISCVInstrInfo(const RISCVSubtarget &STI)
- : RISCVGenInstrInfo(STI, RISCV::ADJCALLSTACKDOWN, RISCV::ADJCALLSTACKUP),
- STI(STI) {}
+ : RISCVGenInstrInfo(STI, RegInfo, RISCV::ADJCALLSTACKDOWN,
+ RISCV::ADJCALLSTACKUP),
+ RegInfo(STI.getHwMode()), STI(STI) {}
#define GET_INSTRINFO_HELPERS
#include "RISCVGenInstrInfo.inc"
@@ -530,6 +531,15 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
if (RISCV::GPRPairRegClass.contains(DstReg, SrcReg)) {
+ if (STI.isRV32() && STI.hasStdExtZdinx()) {
+ // On RV32_Zdinx, FMV.D will move a pair of registers to another pair of
+ // registers, in one instruction.
+ BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_D_IN32X), DstReg)
+ .addReg(SrcReg, getRenamableRegState(RenamableSrc))
+ .addReg(SrcReg, KillFlag | getRenamableRegState(RenamableSrc));
+ return;
+ }
+
MCRegister EvenReg = TRI->getSubReg(SrcReg, RISCV::sub_gpr_even);
MCRegister OddReg = TRI->getSubReg(SrcReg, RISCV::sub_gpr_odd);
// We need to correct the odd register of X0_Pair.
@@ -638,7 +648,6 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
Register SrcReg, bool IsKill, int FI,
const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI,
Register VReg,
MachineInstr::MIFlag Flags) const {
MachineFunction *MF = MBB.getParent();
@@ -646,8 +655,8 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
unsigned Opcode;
if (RISCV::GPRRegClass.hasSubClassEq(RC)) {
- Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
- RISCV::SW : RISCV::SD;
+ Opcode = RegInfo.getRegSizeInBits(RISCV::GPRRegClass) == 32 ? RISCV::SW
+ : RISCV::SD;
} else if (RISCV::GPRF16RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::SH_INX;
} else if (RISCV::GPRF32RegClass.hasSubClassEq(RC)) {
@@ -704,7 +713,7 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
.addFrameIndex(FI)
.addMemOperand(MMO)
.setMIFlag(Flags);
- NumVRegSpilled += TRI->getRegSizeInBits(*RC) / RISCV::RVVBitsPerBlock;
+ NumVRegSpilled += RegInfo.getRegSizeInBits(*RC) / RISCV::RVVBitsPerBlock;
} else {
MachineMemOperand *MMO = MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,
@@ -719,10 +728,12 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
}
}
-void RISCVInstrInfo::loadRegFromStackSlot(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DstReg,
- int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
- Register VReg, MachineInstr::MIFlag Flags) const {
+void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ Register DstReg, int FI,
+ const TargetRegisterClass *RC,
+ Register VReg,
+ MachineInstr::MIFlag Flags) const {
MachineFunction *MF = MBB.getParent();
MachineFrameInfo &MFI = MF->getFrameInfo();
DebugLoc DL =
@@ -730,8 +741,8 @@ void RISCVInstrInfo::loadRegFromStackSlot(
unsigned Opcode;
if (RISCV::GPRRegClass.hasSubClassEq(RC)) {
- Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
- RISCV::LW : RISCV::LD;
+ Opcode = RegInfo.getRegSizeInBits(RISCV::GPRRegClass) == 32 ? RISCV::LW
+ : RISCV::LD;
} else if (RISCV::GPRF16RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::LH_INX;
} else if (RISCV::GPRF32RegClass.hasSubClassEq(RC)) {
@@ -787,7 +798,7 @@ void RISCVInstrInfo::loadRegFromStackSlot(
.addFrameIndex(FI)
.addMemOperand(MMO)
.setMIFlag(Flags);
- NumVRegReloaded += TRI->getRegSizeInBits(*RC) / RISCV::RVVBitsPerBlock;
+ NumVRegReloaded += RegInfo.getRegSizeInBits(*RC) / RISCV::RVVBitsPerBlock;
} else {
MachineMemOperand *MMO = MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
@@ -1361,8 +1372,11 @@ void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
.addMBB(&DestBB, RISCVII::MO_CALL);
RS->enterBasicBlockEnd(MBB);
+ const TargetRegisterClass *RC = &RISCV::GPRRegClass;
+ if (STI.hasStdExtZicfilp())
+ RC = &RISCV::GPRX7RegClass;
Register TmpGPR =
- RS->scavengeRegisterBackwards(RISCV::GPRRegClass, MI.getIterator(),
+ RS->scavengeRegisterBackwards(*RC, MI.getIterator(),
/*RestoreAfter=*/false, /*SpAdj=*/0,
/*AllowSpill=*/false);
if (TmpGPR.isValid())
@@ -1372,20 +1386,23 @@ void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
// Pick s11(or s1 for rve) because it doesn't make a difference.
TmpGPR = STI.hasStdExtE() ? RISCV::X9 : RISCV::X27;
+ // Force t2 if Zicfilp is on
+ if (STI.hasStdExtZicfilp())
+ TmpGPR = RISCV::X7;
int FrameIndex = RVFI->getBranchRelaxationScratchFrameIndex();
if (FrameIndex == -1)
report_fatal_error("underestimated function size");
storeRegToStackSlot(MBB, MI, TmpGPR, /*IsKill=*/true, FrameIndex,
- &RISCV::GPRRegClass, TRI, Register());
+ &RISCV::GPRRegClass, Register());
TRI->eliminateFrameIndex(std::prev(MI.getIterator()),
/*SpAdj=*/0, /*FIOperandNum=*/1);
MI.getOperand(1).setMBB(&RestoreBB);
loadRegFromStackSlot(RestoreBB, RestoreBB.end(), TmpGPR, FrameIndex,
- &RISCV::GPRRegClass, TRI, Register());
+ &RISCV::GPRRegClass, Register());
TRI->eliminateFrameIndex(RestoreBB.back(),
/*SpAdj=*/0, /*FIOperandNum=*/1);
}
@@ -1705,6 +1722,9 @@ unsigned getPredicatedOpcode(unsigned Opcode) {
case RISCV::MIN: return RISCV::PseudoCCMIN;
case RISCV::MINU: return RISCV::PseudoCCMINU;
case RISCV::MUL: return RISCV::PseudoCCMUL;
+ case RISCV::LUI: return RISCV::PseudoCCLUI;
+ case RISCV::QC_LI: return RISCV::PseudoCCQC_LI;
+ case RISCV::QC_E_LI: return RISCV::PseudoCCQC_E_LI;
case RISCV::ADDI: return RISCV::PseudoCCADDI;
case RISCV::SLLI: return RISCV::PseudoCCSLLI;
@@ -1807,7 +1827,7 @@ bool RISCVInstrInfo::analyzeSelect(const MachineInstr &MI,
Cond.push_back(MI.getOperand(2));
Cond.push_back(MI.getOperand(3));
// We can only fold when we support short forward branch opt.
- Optimizable = STI.hasShortForwardBranchOpt();
+ Optimizable = STI.hasShortForwardBranchIALU();
return false;
}
@@ -1817,7 +1837,7 @@ RISCVInstrInfo::optimizeSelect(MachineInstr &MI,
bool PreferFalse) const {
assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR &&
"Unknown select instruction");
- if (!STI.hasShortForwardBranchOpt())
+ if (!STI.hasShortForwardBranchIALU())
return nullptr;
MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
@@ -2836,15 +2856,16 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
MCInstrDesc const &Desc = MI.getDesc();
for (const auto &[Index, Operand] : enumerate(Desc.operands())) {
+ const MachineOperand &MO = MI.getOperand(Index);
unsigned OpType = Operand.OperandType;
- if (OpType >= RISCVOp::OPERAND_FIRST_RISCV_IMM &&
- OpType <= RISCVOp::OPERAND_LAST_RISCV_IMM) {
- const MachineOperand &MO = MI.getOperand(Index);
- if (MO.isReg()) {
- ErrInfo = "Expected a non-register operand.";
- return false;
- }
- if (MO.isImm()) {
+ switch (OpType) {
+ default:
+ if (OpType >= RISCVOp::OPERAND_FIRST_RISCV_IMM &&
+ OpType <= RISCVOp::OPERAND_LAST_RISCV_IMM) {
+ if (!MO.isImm()) {
+ ErrInfo = "Expected an immediate operand.";
+ return false;
+ }
int64_t Imm = MO.getImm();
bool Ok;
switch (OpType) {
@@ -2872,7 +2893,6 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
CASE_OPERAND_UIMM(10)
CASE_OPERAND_UIMM(12)
CASE_OPERAND_UIMM(16)
- CASE_OPERAND_UIMM(20)
CASE_OPERAND_UIMM(32)
CASE_OPERAND_UIMM(48)
CASE_OPERAND_UIMM(64)
@@ -2890,7 +2910,7 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
Ok = isUInt<5>(Imm) && (Imm > 3);
break;
case RISCVOp::OPERAND_UIMM5_PLUS1:
- Ok = (isUInt<5>(Imm) && (Imm != 0)) || (Imm == 32);
+ Ok = Imm >= 1 && Imm <= 32;
break;
case RISCVOp::OPERAND_UIMM6_LSB0:
Ok = isShiftedUInt<5, 1>(Imm);
@@ -2913,6 +2933,9 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
case RISCVOp::OPERAND_UIMM9_LSB000:
Ok = isShiftedUInt<6, 3>(Imm);
break;
+ case RISCVOp::OPERAND_SIMM8_UNSIGNED:
+ Ok = isInt<8>(Imm);
+ break;
case RISCVOp::OPERAND_SIMM10_LSB0000_NONZERO:
Ok = isShiftedInt<6, 4>(Imm) && (Imm != 0);
break;
@@ -2934,12 +2957,12 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
// clang-format off
CASE_OPERAND_SIMM(5)
CASE_OPERAND_SIMM(6)
+ CASE_OPERAND_SIMM(10)
CASE_OPERAND_SIMM(11)
- CASE_OPERAND_SIMM(12)
CASE_OPERAND_SIMM(26)
// clang-format on
case RISCVOp::OPERAND_SIMM5_PLUS1:
- Ok = (isInt<5>(Imm) && Imm != -16) || Imm == 16;
+ Ok = Imm >= -15 && Imm <= 16;
break;
case RISCVOp::OPERAND_SIMM5_NONZERO:
Ok = isInt<5>(Imm) && (Imm != 0);
@@ -2962,9 +2985,6 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
case RISCVOp::OPERAND_SIMM20_LI:
Ok = isInt<20>(Imm);
break;
- case RISCVOp::OPERAND_BARE_SIMM32:
- Ok = isInt<32>(Imm);
- break;
case RISCVOp::OPERAND_UIMMLOG2XLEN:
Ok = STI.is64Bit() ? isUInt<6>(Imm) : isUInt<5>(Imm);
break;
@@ -2973,8 +2993,7 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
Ok = Ok && Imm != 0;
break;
case RISCVOp::OPERAND_CLUI_IMM:
- Ok = (isUInt<5>(Imm) && Imm != 0) ||
- (Imm >= 0xfffe0 && Imm <= 0xfffff);
+ Ok = (isUInt<5>(Imm) && Imm != 0) || (Imm >= 0xfffe0 && Imm <= 0xfffff);
break;
case RISCVOp::OPERAND_RVKRNUM:
Ok = Imm >= 0 && Imm <= 10;
@@ -3007,8 +3026,8 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
Ok = Imm >= 0 && Imm < RISCVCC::COND_INVALID;
break;
case RISCVOp::OPERAND_VEC_POLICY:
- Ok = (Imm &
- (RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC)) == Imm;
+ Ok = (Imm & (RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC)) ==
+ Imm;
break;
case RISCVOp::OPERAND_SEW:
Ok = (isUInt<5>(Imm) && RISCVVType::isValidSEW(1 << Imm));
@@ -3032,6 +3051,57 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
return false;
}
}
+ break;
+ case RISCVOp::OPERAND_SIMM12_LO:
+ // TODO: We could be stricter about what non-register operands are
+ // allowed.
+ if (MO.isReg()) {
+ ErrInfo = "Expected a non-register operand.";
+ return false;
+ }
+ if (MO.isImm() && !isInt<12>(MO.getImm())) {
+ ErrInfo = "Invalid immediate";
+ return false;
+ }
+ break;
+ case RISCVOp::OPERAND_UIMM20_LUI:
+ case RISCVOp::OPERAND_UIMM20_AUIPC:
+ // TODO: We could be stricter about what non-register operands are
+ // allowed.
+ if (MO.isReg()) {
+ ErrInfo = "Expected a non-register operand.";
+ return false;
+ }
+ if (MO.isImm() && !isUInt<20>(MO.getImm())) {
+ ErrInfo = "Invalid immediate";
+ return false;
+ }
+ break;
+ case RISCVOp::OPERAND_BARE_SIMM32:
+ // TODO: We could be stricter about what non-register operands are
+ // allowed.
+ if (MO.isReg()) {
+ ErrInfo = "Expected a non-register operand.";
+ return false;
+ }
+ if (MO.isImm() && !isInt<32>(MO.getImm())) {
+ ErrInfo = "Invalid immediate";
+ return false;
+ }
+ break;
+ case RISCVOp::OPERAND_AVL:
+ if (MO.isImm()) {
+ int64_t Imm = MO.getImm();
+ // VLMAX is represented as -1.
+ if (!isUInt<5>(Imm) && Imm != -1) {
+ ErrInfo = "Invalid immediate";
+ return false;
+ }
+ } else if (!MO.isReg()) {
+ ErrInfo = "Expected a register or immediate operand.";
+ return false;
+ }
+ break;
}
}
@@ -3045,7 +3115,7 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
if (Op.isReg() && Op.getReg().isValid()) {
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
auto *RC = MRI.getRegClass(Op.getReg());
- if (!RISCV::GPRRegClass.hasSubClassEq(RC)) {
+ if (!RISCV::GPRNoX0RegClass.hasSubClassEq(RC)) {
ErrInfo = "Invalid register class for VL operand";
return false;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index c5eddb9..0ffe015 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -79,10 +79,13 @@ enum RISCVMachineCombinerPattern : unsigned {
};
class RISCVInstrInfo : public RISCVGenInstrInfo {
+ const RISCVRegisterInfo RegInfo;
public:
explicit RISCVInstrInfo(const RISCVSubtarget &STI);
+ const RISCVRegisterInfo &getRegisterInfo() const { return RegInfo; }
+
MCInst getNop() const override;
Register isLoadFromStackSlot(const MachineInstr &MI,
@@ -113,13 +116,13 @@ public:
void storeRegToStackSlot(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
bool IsKill, int FrameIndex, const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI, Register VReg,
+
+ Register VReg,
MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
void loadRegFromStackSlot(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DstReg,
- int FrameIndex, const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI, Register VReg,
+ int FrameIndex, const TargetRegisterClass *RC, Register VReg,
MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
using TargetInstrInfo::foldMemoryOperandImpl;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 9cb53fb..9a4eb12 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -349,6 +349,7 @@ def simm12_lo : RISCVSImmLeafOp<12> {
return isInt<12>(Imm);
return MCOp.isBareSymbolRef();
}];
+ let OperandType = "OPERAND_SIMM12_LO";
}
// A 12-bit signed immediate which cannot fit in 6-bit signed immediate,
@@ -394,9 +395,11 @@ class UImm20OperandMaybeSym : RISCVUImmOp<20> {
def uimm20_lui : UImm20OperandMaybeSym {
let ParserMatchClass = UImmAsmOperand<20, "LUI">;
+ let OperandType = "OPERAND_UIMM20_LUI";
}
def uimm20_auipc : UImm20OperandMaybeSym {
let ParserMatchClass = UImmAsmOperand<20, "AUIPC">;
+ let OperandType = "OPERAND_UIMM20_AUIPC";
}
def uimm20 : RISCVUImmOp<20>;
@@ -507,7 +510,7 @@ def ixlenimm_li_restricted : Operand<XLenVT> {
// A 12-bit signed immediate plus one where the imm range will be -2047~2048.
def simm12_plus1 : ImmLeaf<XLenVT,
- [{return (isInt<12>(Imm) && Imm != -2048) || Imm == 2048;}]>;
+ [{return Imm >= -2047 && Imm <= 2048;}]>;
// A 6-bit constant greater than 32.
def uimm6gt32 : ImmLeaf<XLenVT, [{
@@ -768,7 +771,7 @@ def BGE : BranchCC_rri<0b101, "bge">;
def BLTU : BranchCC_rri<0b110, "bltu">;
def BGEU : BranchCC_rri<0b111, "bgeu">;
-let IsSignExtendingOpW = 1, canFoldAsLoad = 1 in {
+let IsSignExtendingOpW = 1, canFoldAsLoad = 1, isReMaterializable = 1 in {
def LB : Load_ri<0b000, "lb">, Sched<[WriteLDB, ReadMemBase]>;
def LH : Load_ri<0b001, "lh">, Sched<[WriteLDH, ReadMemBase]>;
def LW : Load_ri<0b010, "lw">, Sched<[WriteLDW, ReadMemBase]>;
@@ -889,7 +892,7 @@ def CSRRCI : CSR_ii<0b111, "csrrci">;
/// RV64I instructions
let Predicates = [IsRV64] in {
-let canFoldAsLoad = 1 in {
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
def LWU : Load_ri<0b110, "lwu">, Sched<[WriteLDW, ReadMemBase]>;
def LD : Load_ri<0b011, "ld">, Sched<[WriteLDD, ReadMemBase]>;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
index 4ffe3e6..deacd41 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
@@ -71,7 +71,7 @@ defvar DExtsRV64 = [DExt, ZdinxExt];
//===----------------------------------------------------------------------===//
let Predicates = [HasStdExtD] in {
-let canFoldAsLoad = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
def FLD : FPLoad_r<0b011, "fld", FPR64, WriteFLD64>;
// Operands for stores are in the order srcreg, base, offset rather than
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
index b30f8ec..bd19100 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -330,7 +330,7 @@ class PseudoFROUND<DAGOperand Ty, ValueType vt, ValueType intvt = XLenVT>
//===----------------------------------------------------------------------===//
let Predicates = [HasStdExtF] in {
-let canFoldAsLoad = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
def FLW : FPLoad_r<0b010, "flw", FPR32, WriteFLD32>;
// Operands for stores are in the order srcreg, base, offset rather than
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index 4cbbba3..bba9f96 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -18,15 +18,22 @@
// Operand and SDNode transformation definitions.
//===----------------------------------------------------------------------===//
-def simm10 : RISCVSImmOp<10>;
+def simm10 : RISCVSImmOp<10>, ImmLeaf<XLenVT, "return isInt<10>(Imm);">;
def SImm8UnsignedAsmOperand : SImmAsmOperand<8, "Unsigned"> {
let RenderMethod = "addSImm8UnsignedOperands";
}
+// (<2 x i16>, <2 x i16>) PPACK_DH (<4 x i8>, <4 x i8>, <4 x i8>, <4 x i8>)
+def SDT_RISCVPPackDH
+ : SDTypeProfile<2, 4, [SDTCisVT<0, v2i16>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, v4i8>, SDTCisSameAs<0, 3>,
+ SDTCisSameAs<0, 4>, SDTCisSameAs<0, 5>]>;
+def riscv_ppack_dh : RVSDNode<"PPACK_DH", SDT_RISCVPPackDH>;
+
// A 8-bit signed immediate allowing range [-128, 255]
// but represented as [-128, 127].
-def simm8_unsigned : RISCVOp {
+def simm8_unsigned : RISCVOp, ImmLeaf<XLenVT, "return isInt<8>(Imm);"> {
let ParserMatchClass = SImm8UnsignedAsmOperand;
let EncoderMethod = "getImmOpValue";
let DecoderMethod = "decodeSImmOperand<8>";
@@ -625,8 +632,8 @@ let Predicates = [HasStdExtP] in {
def PSUB_H : RVPBinary_rr<0b1000, 0b00, 0b000, "psub.h">;
def PSUB_B : RVPBinary_rr<0b1000, 0b10, 0b000, "psub.b">;
- def PDIF_H : RVPBinary_rr<0b1001, 0b00, 0b000, "pdif.h">;
- def PDIF_B : RVPBinary_rr<0b1001, 0b10, 0b000, "pdif.b">;
+ def PABD_H : RVPBinary_rr<0b1001, 0b00, 0b000, "pabd.h">;
+ def PABD_B : RVPBinary_rr<0b1001, 0b10, 0b000, "pabd.b">;
def PSSUB_H : RVPBinary_rr<0b1010, 0b00, 0b000, "pssub.h">;
def PSSUB_B : RVPBinary_rr<0b1010, 0b10, 0b000, "pssub.b">;
@@ -634,8 +641,8 @@ let Predicates = [HasStdExtP] in {
def PASUB_H : RVPBinary_rr<0b1011, 0b00, 0b000, "pasub.h">;
def PASUB_B : RVPBinary_rr<0b1011, 0b10, 0b000, "pasub.b">;
- def PDIFU_H : RVPBinary_rr<0b1101, 0b00, 0b000, "pdifu.h">;
- def PDIFU_B : RVPBinary_rr<0b1101, 0b10, 0b000, "pdifu.b">;
+ def PABDU_H : RVPBinary_rr<0b1101, 0b00, 0b000, "pabdu.h">;
+ def PABDU_B : RVPBinary_rr<0b1101, 0b10, 0b000, "pabdu.b">;
def PSSUBU_H : RVPBinary_rr<0b1110, 0b00, 0b000, "pssubu.h">;
def PSSUBU_B : RVPBinary_rr<0b1110, 0b10, 0b000, "pssubu.b">;
@@ -693,9 +700,9 @@ let Predicates = [HasStdExtP] in {
def SRX : RVPTernary_rrr<0b0101, 0b11, 0b001, "srx">;
def PMULU_H_B01 : RVPBinary_rr<0b0110, 0b00, 0b001, "pmulu.h.b01">;
- def PDIFSUMU_B : RVPBinary_rr<0b0110, 0b10, 0b001, "pdifsumu.b">;
+ def PABDSUMU_B : RVPBinary_rr<0b0110, 0b10, 0b001, "pabdsumu.b">;
- def PDIFSUMAU_B : RVPTernary_rrr<0b0111, 0b10, 0b001, "pdifsumau.b">;
+ def PABDSUMAU_B : RVPTernary_rrr<0b0111, 0b10, 0b001, "pabdsumau.b">;
} // Predicates = [HasStdExtP]
let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in {
def MUL_H01 : RVPBinary_rr<0b0010, 0b01, 0b001, "mul.h01">;
@@ -825,32 +832,25 @@ let Predicates = [HasStdExtP, IsRV64] in {
// Note the spec has a 3-bit f field in bits 30:28 with 0 in bit 27.
// Here we include the 0 in the f field to reduce number of tablegen classes.
let Predicates = [HasStdExtP] in {
- def PPACK_H : RVPBinary_rr<0b0000, 0b00, 0b100, "ppack.h">;
+ def PPAIRE_B : RVPBinary_rr<0b0000, 0b00, 0b100, "ppaire.b">;
- def PPACKBT_H : RVPBinary_rr<0b0010, 0b00, 0b100, "ppackbt.h">;
+ def PPAIREO_B : RVPBinary_rr<0b0010, 0b00, 0b100, "ppaireo.b">;
+ def PPAIREO_H : RVPBinary_rr<0b0010, 0b01, 0b100, "ppaireo.h">;
- def PPACKTB_H : RVPBinary_rr<0b0100, 0b00, 0b100, "ppacktb.h">;
+ def PPAIROE_B : RVPBinary_rr<0b0100, 0b00, 0b100, "ppairoe.b">;
+ def PPAIROE_H : RVPBinary_rr<0b0100, 0b01, 0b100, "ppairoe.h">;
- def PPACKT_H : RVPBinary_rr<0b0110, 0b00, 0b100, "ppackt.h">;
+ def PPAIRO_B : RVPBinary_rr<0b0110, 0b00, 0b100, "ppairo.b">;
+ def PPAIRO_H : RVPBinary_rr<0b0110, 0b01, 0b100, "ppairo.h">;
} // Predicates = [HasStdExtP]
-let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in {
- def PACKBT_RV32 : RVPBinary_rr<0b0010, 0b01, 0b100, "packbt">;
-
- def PACKTB_RV32 : RVPBinary_rr<0b0100, 0b01, 0b100, "packtb">;
-
- def PACKT_RV32 : RVPBinary_rr<0b0110, 0b01, 0b100, "packt">;
-} // Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only"
let Predicates = [HasStdExtP, IsRV64] in {
- def PPACK_W : RVPBinary_rr<0b0000, 0b01, 0b100, "ppack.w">;
+ def PPAIRE_H : RVPBinary_rr<0b0000, 0b01, 0b100, "ppaire.h">;
- def PPACKBT_W : RVPBinary_rr<0b0010, 0b01, 0b100, "ppackbt.w">;
- def PACKBT_RV64 : RVPBinary_rr<0b0010, 0b11, 0b100, "packbt">;
+ def PPAIREO_W : RVPBinary_rr<0b0010, 0b11, 0b100, "ppaireo.w">;
- def PPACKTB_W : RVPBinary_rr<0b0100, 0b01, 0b100, "ppacktb.w">;
- def PACKTB_RV64 : RVPBinary_rr<0b0100, 0b11, 0b100, "packtb">;
+ def PPAIROE_W : RVPBinary_rr<0b0100, 0b11, 0b100, "ppairoe.w">;
- def PPACKT_W : RVPBinary_rr<0b0110, 0b01, 0b100, "ppackt.w">;
- def PACKT_RV64 : RVPBinary_rr<0b0110, 0b11, 0b100, "packt">;
+ def PPAIRO_W : RVPBinary_rr<0b0110, 0b11, 0b100, "ppairo.w">;
} // Predicates = [HasStdExtP, IsRV64]
let Predicates = [HasStdExtP] in {
@@ -1378,8 +1378,8 @@ let Predicates = [HasStdExtP, IsRV32] in {
def PSUB_DB : RVPPairBinary_rr<0b1000, 0b10, "psub.db">;
def SUBD : RVPPairBinary_rr<0b1000, 0b11, "subd">;
- def PDIF_DH : RVPPairBinary_rr<0b1001, 0b00, "pdif.dh">;
- def PDIF_DB : RVPPairBinary_rr<0b1001, 0b10, "pdif.db">;
+ def PABD_DH : RVPPairBinary_rr<0b1001, 0b00, "pabd.dh">;
+ def PABD_DB : RVPPairBinary_rr<0b1001, 0b10, "pabd.db">;
def PSSUB_DH : RVPPairBinary_rr<0b1010, 0b00, "pssub.dh">;
def PSSUB_DW : RVPPairBinary_rr<0b1010, 0b01, "pssub.dw">;
@@ -1389,8 +1389,8 @@ let Predicates = [HasStdExtP, IsRV32] in {
def PASUB_DW : RVPPairBinary_rr<0b1011, 0b01, "pasub.dw">;
def PASUB_DB : RVPPairBinary_rr<0b1011, 0b10, "pasub.db">;
- def PDIFU_DH : RVPPairBinary_rr<0b1101, 0b00, "pdifu.dh">;
- def PDIFU_DB : RVPPairBinary_rr<0b1101, 0b10, "pdifu.db">;
+ def PABDU_DH : RVPPairBinary_rr<0b1101, 0b00, "pabdu.dh">;
+ def PABDU_DB : RVPPairBinary_rr<0b1101, 0b10, "pabdu.db">;
def PSSUBU_DH : RVPPairBinary_rr<0b1110, 0b00, "pssubu.dh">;
def PSSUBU_DW : RVPPairBinary_rr<0b1110, 0b01, "pssubu.dw">;
@@ -1406,17 +1406,17 @@ let Predicates = [HasStdExtP, IsRV32] in {
def PSSH1SADD_DH : RVPPairBinaryShift_rr<0b011, 0b00, "pssh1sadd.dh">;
def PSSH1SADD_DW : RVPPairBinaryShift_rr<0b011, 0b01, "pssh1sadd.dw">;
- def PPACK_DH : RVPPairBinaryPack_rr<0b000, 0b00, "ppack.dh">;
- def PPACK_DW : RVPPairBinaryPack_rr<0b000, 0b01, "ppack.dw">;
+ def PPAIRE_DB : RVPPairBinaryPack_rr<0b000, 0b00, "ppaire.db">;
+ def PPAIRE_DH : RVPPairBinaryPack_rr<0b000, 0b01, "ppaire.dh">;
- def PPACKBT_DH : RVPPairBinaryPack_rr<0b001, 0b00, "ppackbt.dh">;
- def PPACKBT_DW : RVPPairBinaryPack_rr<0b001, 0b01, "ppackbt.dw">;
+ def PPAIREO_DB : RVPPairBinaryPack_rr<0b001, 0b00, "ppaireo.db">;
+ def PPAIREO_DH : RVPPairBinaryPack_rr<0b001, 0b01, "ppaireo.dh">;
- def PPACKTB_DH : RVPPairBinaryPack_rr<0b010, 0b00, "ppacktb.dh">;
- def PPACKTB_DW : RVPPairBinaryPack_rr<0b010, 0b01, "ppacktb.dw">;
+ def PPAIROE_DB : RVPPairBinaryPack_rr<0b010, 0b00, "ppairoe.db">;
+ def PPAIROE_DH : RVPPairBinaryPack_rr<0b010, 0b01, "ppairoe.dh">;
- def PPACKT_DH : RVPPairBinaryPack_rr<0b011, 0b00, "ppackt.dh">;
- def PPACKT_DW : RVPPairBinaryPack_rr<0b011, 0b01, "ppackt.dw">;
+ def PPAIRO_DB : RVPPairBinaryPack_rr<0b011, 0b00, "ppairo.db">;
+ def PPAIRO_DH : RVPPairBinaryPack_rr<0b011, 0b01, "ppairo.dh">;
def PAS_DHX : RVPPairBinaryExchanged_rr<0b0000, 0b00, "pas.dhx">;
def PSA_DHX : RVPPairBinaryExchanged_rr<0b0000, 0b10, "psa.dhx">;
@@ -1461,10 +1461,170 @@ let Predicates = [HasStdExtP, IsRV32] in {
// Codegen patterns
//===----------------------------------------------------------------------===//
-def riscv_absw : RVSDNode<"ABSW", SDTIntUnaryOp>;
+def riscv_absw : RVSDNode<"ABSW", SDT_RISCVIntUnaryOpW>;
-let Predicates = [HasStdExtP] in
-def : PatGpr<abs, ABS>;
+def SDT_RISCVPASUB : SDTypeProfile<1, 2, [SDTCisVec<0>,
+ SDTCisInt<0>,
+ SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>]>;
+def riscv_pasub : RVSDNode<"PASUB", SDT_RISCVPASUB>;
+def riscv_pasubu : RVSDNode<"PASUBU", SDT_RISCVPASUB>;
-let Predicates = [HasStdExtP, IsRV64] in
-def : PatGpr<riscv_absw, ABSW>;
+let Predicates = [HasStdExtP] in {
+ def : PatGpr<abs, ABS>;
+
+ // Basic 8-bit arithmetic patterns
+ def: Pat<(XLenVecI8VT (add GPR:$rs1, GPR:$rs2)), (PADD_B GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI8VT (sub GPR:$rs1, GPR:$rs2)), (PSUB_B GPR:$rs1, GPR:$rs2)>;
+
+ // Basic 16-bit arithmetic patterns
+ def: Pat<(XLenVecI16VT (add GPR:$rs1, GPR:$rs2)), (PADD_H GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI16VT (sub GPR:$rs1, GPR:$rs2)), (PSUB_H GPR:$rs1, GPR:$rs2)>;
+
+ // 8-bit saturating add/sub patterns
+ def: Pat<(XLenVecI8VT (saddsat GPR:$rs1, GPR:$rs2)), (PSADD_B GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI8VT (uaddsat GPR:$rs1, GPR:$rs2)), (PSADDU_B GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI8VT (ssubsat GPR:$rs1, GPR:$rs2)), (PSSUB_B GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI8VT (usubsat GPR:$rs1, GPR:$rs2)), (PSSUBU_B GPR:$rs1, GPR:$rs2)>;
+
+ // 16-bit saturating add/sub patterns
+ def: Pat<(XLenVecI16VT (saddsat GPR:$rs1, GPR:$rs2)), (PSADD_H GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI16VT (uaddsat GPR:$rs1, GPR:$rs2)), (PSADDU_H GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI16VT (ssubsat GPR:$rs1, GPR:$rs2)), (PSSUB_H GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI16VT (usubsat GPR:$rs1, GPR:$rs2)), (PSSUBU_H GPR:$rs1, GPR:$rs2)>;
+
+ // 8-bit averaging patterns
+ def: Pat<(XLenVecI8VT (avgfloors GPR:$rs1, GPR:$rs2)), (PAADD_B GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI8VT (avgflooru GPR:$rs1, GPR:$rs2)), (PAADDU_B GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI8VT (riscv_pasub GPR:$rs1, GPR:$rs2)), (PASUB_B GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI8VT (riscv_pasubu GPR:$rs1, GPR:$rs2)), (PASUBU_B GPR:$rs1, GPR:$rs2)>;
+
+ // 16-bit averaging patterns
+ def: Pat<(XLenVecI16VT (avgfloors GPR:$rs1, GPR:$rs2)), (PAADD_H GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI16VT (avgflooru GPR:$rs1, GPR:$rs2)), (PAADDU_H GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI16VT (riscv_pasub GPR:$rs1, GPR:$rs2)), (PASUB_H GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI16VT (riscv_pasubu GPR:$rs1, GPR:$rs2)), (PASUBU_H GPR:$rs1, GPR:$rs2)>;
+
+ // 8-bit absolute difference patterns
+ def: Pat<(XLenVecI8VT (abds GPR:$rs1, GPR:$rs2)), (PABD_B GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI8VT (abdu GPR:$rs1, GPR:$rs2)), (PABDU_B GPR:$rs1, GPR:$rs2)>;
+
+ // 16-bit absolute difference patterns
+ def: Pat<(XLenVecI16VT (abds GPR:$rs1, GPR:$rs2)), (PABD_H GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI16VT (abdu GPR:$rs1, GPR:$rs2)), (PABDU_H GPR:$rs1, GPR:$rs2)>;
+
+ // 8-bit logical shift left patterns
+ def: Pat<(XLenVecI8VT (shl GPR:$rs1, (XLenVecI8VT (splat_vector uimm3:$shamt)))),
+ (PSLLI_B GPR:$rs1, uimm3:$shamt)>;
+
+ // 16-bit logical shift left patterns
+ def: Pat<(XLenVecI16VT (shl GPR:$rs1, (XLenVecI16VT (splat_vector uimm4:$shamt)))),
+ (PSLLI_H GPR:$rs1, uimm4:$shamt)>;
+
+ // 16-bit signed saturation shift left patterns
+ def: Pat<(XLenVecI16VT (sshlsat GPR:$rs1, (XLenVecI16VT (splat_vector uimm4:$shamt)))),
+ (PSSLAI_H GPR:$rs1, uimm4:$shamt)>;
+
+ // 8-bit logical shift left
+ def: Pat<(XLenVecI8VT (shl GPR:$rs1,
+ (XLenVecI8VT (splat_vector (XLenVT GPR:$rs2))))),
+ (PSLL_BS GPR:$rs1, GPR:$rs2)>;
+ // 16-bit logical shift left
+ def: Pat<(XLenVecI16VT (shl GPR:$rs1,
+ (XLenVecI16VT (splat_vector (XLenVT GPR:$rs2))))),
+ (PSLL_HS GPR:$rs1, GPR:$rs2)>;
+
+ // 8-bit PLI SD node pattern
+ def: Pat<(XLenVecI8VT (splat_vector simm8_unsigned:$imm8)), (PLI_B simm8_unsigned:$imm8)>;
+ // 16-bit PLI SD node pattern
+ def: Pat<(XLenVecI16VT (splat_vector simm10:$imm10)), (PLI_H simm10:$imm10)>;
+
+ // // splat pattern
+ def: Pat<(XLenVecI8VT (splat_vector (XLenVT GPR:$rs2))), (PADD_BS (XLenVT X0), GPR:$rs2)>;
+ def: Pat<(XLenVecI16VT (splat_vector (XLenVT GPR:$rs2))), (PADD_HS (XLenVT X0), GPR:$rs2)>;
+} // Predicates = [HasStdExtP]
+
+let Predicates = [HasStdExtP, IsRV32] in {
+ // Load/Store patterns
+ def : StPat<store, SW, GPR, v4i8>;
+ def : StPat<store, SW, GPR, v2i16>;
+ def : LdPat<load, LW, v4i8>;
+ def : LdPat<load, LW, v2i16>;
+
+ // Build vector patterns
+ def : Pat<(v2i16 (build_vector (XLenVT GPR:$a), (XLenVT GPR:$b))),
+ (PACK GPR:$a, GPR:$b)>;
+} // Predicates = [HasStdExtP, IsRV32]
+
+let Predicates = [HasStdExtP, IsRV64] in {
+ def : PatGpr<riscv_absw, ABSW>;
+
+ // 32-bit PLI SD node pattern
+ def: Pat<(v2i32 (splat_vector simm10:$imm10)), (PLI_W simm10:$imm10)>;
+
+ // Basic 32-bit arithmetic patterns
+ def: Pat<(v2i32 (add GPR:$rs1, GPR:$rs2)), (PADD_W GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v2i32 (sub GPR:$rs1, GPR:$rs2)), (PSUB_W GPR:$rs1, GPR:$rs2)>;
+
+ // 32-bit saturating add/sub patterns
+ def: Pat<(v2i32 (saddsat GPR:$rs1, GPR:$rs2)), (PSADD_W GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v2i32 (uaddsat GPR:$rs1, GPR:$rs2)), (PSADDU_W GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v2i32 (ssubsat GPR:$rs1, GPR:$rs2)), (PSSUB_W GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v2i32 (usubsat GPR:$rs1, GPR:$rs2)), (PSSUBU_W GPR:$rs1, GPR:$rs2)>;
+
+ // 32-bit averaging patterns
+ def: Pat<(v2i32 (avgfloors GPR:$rs1, GPR:$rs2)), (PAADD_W GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v2i32 (avgflooru GPR:$rs1, GPR:$rs2)), (PAADDU_W GPR:$rs1, GPR:$rs2)>;
+
+ // 32-bit averaging-sub patterns
+ def: Pat<(v2i32 (riscv_pasub GPR:$rs1, GPR:$rs2)), (PASUB_W GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v2i32 (riscv_pasubu GPR:$rs1, GPR:$rs2)), (PASUBU_W GPR:$rs1, GPR:$rs2)>;
+
+ // 32-bit logical shift left
+ def: Pat<(v2i32 (shl GPR:$rs1, (v2i32 (splat_vector (XLenVT GPR:$rs2))))),
+ (PSLL_WS GPR:$rs1, GPR:$rs2)>;
+
+ // splat pattern
+ def: Pat<(v2i32 (splat_vector (XLenVT GPR:$rs2))), (PADD_WS (XLenVT X0), GPR:$rs2)>;
+
+ // 32-bit logical shift left patterns
+ def: Pat<(v2i32 (shl GPR:$rs1, (v2i32 (splat_vector uimm5:$shamt)))),
+ (PSLLI_W GPR:$rs1, uimm5:$shamt)>;
+
+ // 32-bit signed saturation shift left patterns
+ def: Pat<(v2i32 (sshlsat GPR:$rs1, (v2i32 (splat_vector uimm5:$shamt)))),
+ (PSSLAI_W GPR:$rs1, uimm5:$shamt)>;
+
+ // Load/Store patterns
+ def : StPat<store, SD, GPR, v8i8>;
+ def : StPat<store, SD, GPR, v4i16>;
+ def : StPat<store, SD, GPR, v2i32>;
+ def : LdPat<load, LD, v8i8>;
+ def : LdPat<load, LD, v4i16>;
+ def : LdPat<load, LD, v2i32>;
+
+ // Build vector patterns
+ def : Pat<(v8i8 (build_vector (XLenVT GPR:$a), (XLenVT GPR:$b),
+ (XLenVT GPR:$c), (XLenVT GPR:$d),
+ (XLenVT undef), (XLenVT undef),
+ (XLenVT undef), (XLenVT undef))),
+ (PPAIRE_H (PPAIRE_B GPR:$a, GPR:$b), (PPAIRE_B GPR:$c, GPR:$d))>;
+
+ def : Pat<(v8i8 (build_vector (XLenVT GPR:$a), (XLenVT GPR:$b),
+ (XLenVT GPR:$c), (XLenVT GPR:$d),
+ (XLenVT GPR:$e), (XLenVT GPR:$f),
+ (XLenVT GPR:$g), (XLenVT GPR:$h))),
+ (PACK (PPAIRE_H (PPAIRE_B GPR:$a, GPR:$b), (PPAIRE_B GPR:$c, GPR:$d)),
+ (PPAIRE_H (PPAIRE_B GPR:$e, GPR:$f), (PPAIRE_B GPR:$g, GPR:$h)))>;
+
+ def : Pat<(v4i16 (build_vector (XLenVT GPR:$a), (XLenVT GPR:$b),
+ (XLenVT undef), (XLenVT undef))),
+ (PPAIRE_H GPR:$a, GPR:$b)>;
+
+ def : Pat<(v4i16 (build_vector (XLenVT GPR:$a), (XLenVT GPR:$b),
+ (XLenVT GPR:$c), (XLenVT GPR:$d))),
+ (PACK (PPAIRE_H GPR:$a, GPR:$b), (PPAIRE_H GPR:$c, GPR:$d))>;
+
+ def : Pat<(v2i32 (build_vector (XLenVT GPR:$a), (XLenVT GPR:$b))),
+ (PACK GPR:$a, GPR:$b)>;
+} // Predicates = [HasStdExtP, IsRV64]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
index 494b1c9..6563cc2 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
@@ -10,7 +10,7 @@
//
//===----------------------------------------------------------------------===//
-let Predicates = [HasShortForwardBranchOpt], isSelect = 1,
+let Predicates = [HasShortForwardBranchIALU], isSelect = 1,
Constraints = "$dst = $falsev", isCommutable = 1, Size = 8 in {
// This instruction moves $truev to $dst when the condition is true. It will
// be expanded to control flow in RISCVExpandPseudoInsts.
@@ -28,7 +28,7 @@ def PseudoCCMOVGPR : Pseudo<(outs GPR:$dst),
// This should always expand to a branch+c.mv so the size is 6 or 4 if the
// branch is compressible.
-let Predicates = [HasConditionalMoveFusion, NoShortForwardBranchOpt],
+let Predicates = [HasConditionalMoveFusion, NoShortForwardBranch],
Constraints = "$dst = $falsev", isCommutable = 1, Size = 6 in {
// This instruction moves $truev to $dst when the condition is true. It will
// be expanded to control flow in RISCVExpandPseudoInsts.
@@ -69,6 +69,17 @@ class SFBALU_ri
let Constraints = "$dst = $falsev";
}
+class SFBLUI
+ : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev,
+ uimm20_lui:$imm), []> {
+ let hasSideEffects = 0;
+ let mayLoad = 0;
+ let mayStore = 0;
+ let Size = 8;
+ let Constraints = "$dst = $falsev";
+}
+
class SFBShift_ri
: Pseudo<(outs GPR:$dst),
(ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1,
@@ -97,7 +108,7 @@ class SFBShiftW_ri
// is true. Returns $falsev otherwise. Selected by optimizeSelect.
// TODO: Can we use DefaultOperands on the regular binop to accomplish this more
// like how ARM does predication?
-let Predicates = [HasShortForwardBranchOpt] in {
+let Predicates = [HasShortForwardBranchIALU] in {
def PseudoCCADD : SFBALU_rr;
def PseudoCCSUB : SFBALU_rr;
def PseudoCCSLL : SFBALU_rr;
@@ -106,17 +117,14 @@ def PseudoCCSRA : SFBALU_rr;
def PseudoCCAND : SFBALU_rr;
def PseudoCCOR : SFBALU_rr;
def PseudoCCXOR : SFBALU_rr;
-def PseudoCCMAX : SFBALU_rr;
-def PseudoCCMIN : SFBALU_rr;
-def PseudoCCMAXU : SFBALU_rr;
-def PseudoCCMINU : SFBALU_rr;
-def PseudoCCMUL : SFBALU_rr;
def PseudoCCADDI : SFBALU_ri;
def PseudoCCANDI : SFBALU_ri;
def PseudoCCORI : SFBALU_ri;
def PseudoCCXORI : SFBALU_ri;
+def PseudoCCLUI : SFBLUI;
+
def PseudoCCSLLI : SFBShift_ri;
def PseudoCCSRLI : SFBShift_ri;
def PseudoCCSRAI : SFBShift_ri;
@@ -140,11 +148,21 @@ def PseudoCCORN : SFBALU_rr;
def PseudoCCXNOR : SFBALU_rr;
}
-let Predicates = [HasShortForwardBranchOpt] in
+let Predicates = [HasShortForwardBranchIALU] in
def : Pat<(XLenVT (abs GPR:$rs1)),
(PseudoCCSUB (XLenVT GPR:$rs1), (XLenVT X0), /* COND_LT */ 2,
(XLenVT GPR:$rs1), (XLenVT X0), (XLenVT GPR:$rs1))>;
-let Predicates = [HasShortForwardBranchOpt, IsRV64] in
+let Predicates = [HasShortForwardBranchIALU, IsRV64] in
def : Pat<(sext_inreg (abs 33signbits_node:$rs1), i32),
(PseudoCCSUBW (i64 GPR:$rs1), (i64 X0), /* COND_LT */ 2,
(i64 GPR:$rs1), (i64 X0), (i64 GPR:$rs1))>;
+
+let Predicates = [HasShortForwardBranchIMinMax] in {
+def PseudoCCMAX : SFBALU_rr;
+def PseudoCCMIN : SFBALU_rr;
+def PseudoCCMAXU : SFBALU_rr;
+def PseudoCCMINU : SFBALU_rr;
+}
+
+let Predicates = [HasShortForwardBranchIMul] in
+def PseudoCCMUL : SFBALU_rr;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
index f46455a..594a75a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
@@ -79,19 +79,19 @@ def simm5 : RISCVSImmLeafOp<5> {
}
def simm5_plus1 : RISCVOp, ImmLeaf<XLenVT,
- [{return (isInt<5>(Imm) && Imm != -16) || Imm == 16;}]> {
+ [{return Imm >= -15 && Imm <= 16;}]> {
let ParserMatchClass = SImmAsmOperand<5, "Plus1">;
let OperandType = "OPERAND_SIMM5_PLUS1";
let MCOperandPredicate = [{
int64_t Imm;
if (MCOp.evaluateAsConstantImm(Imm))
- return (isInt<5>(Imm) && Imm != -16) || Imm == 16;
+ return Imm >= -15 && Imm <= 16;
return MCOp.isBareSymbolRef();
}];
}
def simm5_plus1_nonzero : ImmLeaf<XLenVT,
- [{return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);}]>;
+ [{return Imm != 0 && Imm >= -15 && Imm <= 16;}]>;
//===----------------------------------------------------------------------===//
// Scheduling definitions.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index eb3c9b0..e36204c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -2982,21 +2982,21 @@ multiclass VPseudoVFWALU_WV_WF_RM {
multiclass VPseudoVMRG_VM_XM_IM {
foreach m = MxList in {
defvar mx = m.MX;
- def "_VVM" # "_" # m.MX:
- VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
- m.vrclass, m.vrclass, m>,
- SchedBinary<"WriteVIMergeV", "ReadVIMergeV", "ReadVIMergeV", mx,
- forcePassthruRead=true>;
- def "_VXM" # "_" # m.MX:
- VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
- m.vrclass, GPR, m>,
- SchedBinary<"WriteVIMergeX", "ReadVIMergeV", "ReadVIMergeX", mx,
- forcePassthruRead=true>;
- def "_VIM" # "_" # m.MX:
- VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
- m.vrclass, simm5, m>,
- SchedUnary<"WriteVIMergeI", "ReadVIMergeV", mx,
- forcePassthruRead=true>;
+ def "_VVM"#"_"#m.MX : VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
+ GetVRegNoV0<m.vrclass>.R,
+ GetVRegNoV0<m.vrclass>.R, m>,
+ SchedBinary<"WriteVIMergeV", "ReadVIMergeV", "ReadVIMergeV", mx,
+ forcePassthruRead = true>;
+ def "_VXM"#"_"#m.MX
+ : VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
+ GetVRegNoV0<m.vrclass>.R, GPR, m>,
+ SchedBinary<"WriteVIMergeX", "ReadVIMergeV", "ReadVIMergeX", mx,
+ forcePassthruRead = true>;
+ def "_VIM"#"_"#m.MX
+ : VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
+ GetVRegNoV0<m.vrclass>.R, simm5, m>,
+ SchedUnary<"WriteVIMergeI", "ReadVIMergeV", mx,
+ forcePassthruRead = true>;
}
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index 139ff92..a67112b 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -215,8 +215,8 @@ multiclass VPatBinaryFPSDNode_VV_VF<SDPatternOperator vop, string instruction_na
}
multiclass VPatBinaryFPSDNode_VV_VF_RM<SDPatternOperator vop, string instruction_name,
- bit isSEWAware = 0> {
- foreach vti = AllFloatVectors in {
+ bit isSEWAware = 0, bit isBF16 = 0> {
+ foreach vti = !if(isBF16, AllBF16Vectors, AllFloatVectors) in {
let Predicates = GetVTypePredicates<vti>.Predicates in {
def : VPatBinarySDNode_VV_RM<vop, instruction_name,
vti.Vector, vti.Vector, vti.Log2SEW,
@@ -246,8 +246,8 @@ multiclass VPatBinaryFPSDNode_R_VF<SDPatternOperator vop, string instruction_nam
}
multiclass VPatBinaryFPSDNode_R_VF_RM<SDPatternOperator vop, string instruction_name,
- bit isSEWAware = 0> {
- foreach fvti = AllFloatVectors in
+ bit isSEWAware = 0, bit isBF16 = 0> {
+ foreach fvti = !if(isBF16, AllBF16Vectors, AllFloatVectors) in
let Predicates = GetVTypePredicates<fvti>.Predicates in
def : Pat<(fvti.Vector (vop (fvti.Vector (SplatFPOp fvti.Scalar:$rs2)),
(fvti.Vector fvti.RegClass:$rs1))),
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index cf904ea..38edab5 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -1058,8 +1058,8 @@ multiclass VPatBinaryFPVL_VV_VF<SDPatternOperator vop, string instruction_name,
}
multiclass VPatBinaryFPVL_VV_VF_RM<SDPatternOperator vop, string instruction_name,
- bit isSEWAware = 0> {
- foreach vti = AllFloatVectors in {
+ bit isSEWAware = 0, bit isBF16 = 0> {
+ foreach vti = !if(isBF16, AllBF16Vectors, AllFloatVectors) in {
let Predicates = GetVTypePredicates<vti>.Predicates in {
def : VPatBinaryVL_V_RM<vop, instruction_name, "VV",
vti.Vector, vti.Vector, vti.Vector, vti.Mask,
@@ -1093,8 +1093,8 @@ multiclass VPatBinaryFPVL_R_VF<SDPatternOperator vop, string instruction_name,
}
multiclass VPatBinaryFPVL_R_VF_RM<SDPatternOperator vop, string instruction_name,
- bit isSEWAware = 0> {
- foreach fvti = AllFloatVectors in {
+ bit isSEWAware = 0, bit isBF16 = 0> {
+ foreach fvti = !if(isBF16, AllBF16Vectors, AllFloatVectors) in {
let Predicates = GetVTypePredicates<fvti>.Predicates in
def : Pat<(fvti.Vector (vop (SplatFPOp fvti.ScalarRegClass:$rs2),
fvti.RegClass:$rs1,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td
index b683e89..80aded3 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td
@@ -838,7 +838,6 @@ def : Pat<(fpextend (bf16 FPR16:$rs)),
(NDS_FCVT_S_BF16 (bf16 FPR16:$rs))>;
def : Pat<(bf16 (fpround FPR32:$rs)),
(NDS_FCVT_BF16_S FPR32:$rs)>;
-} // Predicates = [HasVendorXAndesBFHCvt]
let isCodeGenOnly = 1 in {
def NDS_FMV_BF16_X : FPUnaryOp_r<0b1111000, 0b00000, 0b000, FPR16, GPR, "fmv.w.x">,
@@ -847,7 +846,6 @@ def NDS_FMV_X_BF16 : FPUnaryOp_r<0b1110000, 0b00000, 0b000, GPR, FPR16, "fmv.x.w
Sched<[WriteFMovF32ToI32, ReadFMovF32ToI32]>;
}
-let Predicates = [HasVendorXAndesBFHCvt] in {
def : Pat<(riscv_nds_fmv_bf16_x GPR:$src), (NDS_FMV_BF16_X GPR:$src)>;
def : Pat<(riscv_nds_fmv_x_anyextbf16 (bf16 FPR16:$src)),
(NDS_FMV_X_BF16 (bf16 FPR16:$src))>;
@@ -914,7 +912,7 @@ defm : VPatTernaryVD4DOT_VV<"int_riscv_nds_vd4dotsu", "PseudoNDS_VD4DOTSU",
// Pseudo-instructions for SFB (Short Forward Branch)
//===----------------------------------------------------------------------===//
-let Predicates = [HasShortForwardBranchOpt], hasSideEffects = 0,
+let Predicates = [HasShortForwardBranchIALU], hasSideEffects = 0,
mayLoad = 0, mayStore = 0, Size = 8, Constraints = "$dst = $falsev" in {
def PseudoCCNDS_BFOS : Pseudo<(outs GPR:$dst),
(ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
index aa8f1a1..7abc616 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
@@ -633,8 +633,9 @@ let Predicates = [HasVendorXCVmem, IsRV32] in {
def CV_SW_rr : CVStore_rr<0b011, 0b0010110, "cv.sw">;
}
-let Predicates = [HasVendorXCVelw, IsRV32], hasSideEffects = 0,
+let Predicates = [HasVendorXCVelw, IsRV32], hasSideEffects = 1,
mayLoad = 1, mayStore = 0 in {
+ def PseudoCV_ELW : PseudoLoad<"cv.elw">;
// Event load
def CV_ELW : CVLoad_ri<0b011, "cv.elw">;
}
@@ -706,6 +707,12 @@ let Predicates = [HasVendorXCVmem, IsRV32], AddedComplexity = 1 in {
def : CVStrrPat<store, CV_SW_rr>;
}
+let Predicates = [HasVendorXCVelw, IsRV32] in {
+ def : Pat<(int_riscv_cv_elw_elw (XLenVT GPR:$rs1)), (PseudoCV_ELW GPR:$rs1)>;
+ def : Pat<(int_riscv_cv_elw_elw (AddrRegImm (XLenVT GPR:$rs1), simm12_lo:$imm12)),
+ (CV_ELW GPR:$rs1, simm12_lo:$imm12)>;
+}
+
multiclass PatCoreVBitManip<Intrinsic intr> {
def : PatGprGpr<intr, !cast<RVInst>("CV_" # NAME # "R")>;
def : Pat<(intr GPR:$rs1, cv_uimm10:$imm),
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
index f7b4914..c07ed85 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
@@ -935,7 +935,7 @@ let Predicates = [HasVendorXSfcease] in {
let rd = 0b00000;
let rs1 = 0b00000;
let rs2 = 0b00101;
-}
+ }
}
let Predicates = [HasVendorXSfvfbfexp16e] in {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td
index d77a44a..445e513 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td
@@ -13,7 +13,6 @@
def XSfmmVTypeAsmOperand : AsmOperandClass {
let Name = "XSfmmVType";
let ParserMethod = "parseXSfmmVType";
- let DiagnosticType = "InvalidXSfmmVType";
let RenderMethod = "addVTypeIOperands";
}
@@ -279,7 +278,7 @@ let Uses = [FRM], mayRaiseFPException = true in {
} // DecoderNamespace = "XSfvector"
class VPseudoSF_VTileLoad
- : RISCVVPseudo<(outs), (ins GPR:$rs2, GPR:$rs1, AVL:$atn, ixlenimm:$sew,
+ : RISCVVPseudo<(outs), (ins GPR:$rs2, GPR:$rs1, GPRNoX0:$atn, ixlenimm:$sew,
ixlenimm:$twiden)> {
let mayLoad = 1;
let mayStore = 0;
@@ -290,7 +289,7 @@ class VPseudoSF_VTileLoad
}
class VPseudoSF_VTileStore
- : RISCVVPseudo<(outs), (ins GPR:$rs2, GPR:$rs1, AVL:$atn, ixlenimm:$sew,
+ : RISCVVPseudo<(outs), (ins GPR:$rs2, GPR:$rs1, GPRNoX0:$atn, ixlenimm:$sew,
ixlenimm:$twiden)> {
let mayLoad = 0;
let mayStore = 1;
@@ -301,7 +300,7 @@ class VPseudoSF_VTileStore
}
class VPseudoSF_VTileMove_V_T
- : RISCVVPseudo<(outs VRM8:$vd), (ins GPR:$rs1, AVL:$atn, ixlenimm:$sew,
+ : RISCVVPseudo<(outs VRM8:$vd), (ins GPR:$rs1, GPRNoX0:$atn, ixlenimm:$sew,
ixlenimm:$twiden)> {
let mayLoad = 0;
let mayStore = 0;
@@ -312,7 +311,7 @@ class VPseudoSF_VTileMove_V_T
}
class VPseudoSF_VTileMove_T_V
- : RISCVVPseudo<(outs), (ins GPR:$rs1, VRM8:$vs2, AVL:$atn, ixlenimm:$sew,
+ : RISCVVPseudo<(outs), (ins GPR:$rs1, VRM8:$vs2, GPRNoX0:$atn, ixlenimm:$sew,
ixlenimm:$twiden)> {
let mayLoad = 0;
let mayStore = 0;
@@ -324,8 +323,9 @@ class VPseudoSF_VTileMove_T_V
class VPseudoSF_MatMul<RegisterClass mtd_class>
: RISCVVPseudo<(outs),
- (ins mtd_class:$rd, VRM8:$vs2, VRM8:$vs1, AVL:$atm, AVL:$atn,
- AVL:$atk, ixlenimm:$sew, ixlenimm:$twiden)> {
+ (ins mtd_class:$rd, VRM8:$vs2, VRM8:$vs1, GPRNoX0:$atm,
+ GPRNoX0:$atn, GPRNoX0:$atk, ixlenimm:$sew,
+ ixlenimm:$twiden)> {
let mayLoad = 0;
let mayStore = 0;
let HasTmOp = 1;
@@ -339,7 +339,7 @@ class VPseudoSF_MatMul<RegisterClass mtd_class>
class VPseudoSF_MatMul_FRM<RegisterClass mtd_class>
: RISCVVPseudo<(outs),
(ins mtd_class:$rd, VRM8:$vs2, VRM8:$vs1, ixlenimm:$frm,
- AVL:$atm, AVL:$atn, AVL:$atk, ixlenimm:$sew,
+ GPRNoX0:$atm, GPRNoX0:$atn, GPRNoX0:$atk, ixlenimm:$sew,
ixlenimm:$twiden), []> {
let mayLoad = 0;
let mayStore = 0;
@@ -414,7 +414,7 @@ let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in {
let HasVLOp = 1, HasTmOp = 1, HasTWidenOp = 1, HasSEWOp = 1 in
def PseudoSF_VTZERO_T
: RISCVVPseudo<(outs),
- (ins TR:$rd, AVL:$atm, AVL:$atn, ixlenimm:$sew,
+ (ins TR:$rd, GPRNoX0:$atm, GPRNoX0:$atn, ixlenimm:$sew,
ixlenimm:$twiden)>;
def PseudoSF_VTDISCARD : RISCVVPseudo<(outs), (ins), []>;
}
@@ -425,7 +425,7 @@ class VPatXSfmmTileStore<string intrinsic_name,
Pat<(!cast<Intrinsic>(intrinsic_name)
(XLenVT GPR:$rs2),
(XLenVT GPR:$rs1),
- (XLenVT AVL:$tn)),
+ (XLenVT GPRNoX0:$tn)),
(!cast<Instruction>(inst_name)
(XLenVT GPR:$rs2),
(XLenVT GPR:$rs1),
@@ -438,7 +438,7 @@ class VPatXSfmmTileMove_T_V<string intrinsic_name,
Pat<(!cast<Intrinsic>(intrinsic_name)
(XLenVT GPR:$rs1),
(reg_type VRM8:$vs2),
- (XLenVT AVL:$atn)),
+ (XLenVT GPRNoX0:$atn)),
(!cast<Instruction>(inst_name)
(XLenVT GPR:$rs1),
(reg_type VRM8:$vs2),
@@ -450,7 +450,7 @@ class VPatXSfmmTileMove_V_T<string intrinsic_name,
int log2sew> :
Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
(XLenVT GPR:$rs1),
- (XLenVT AVL:$atn))),
+ (XLenVT GPRNoX0:$atn))),
(!cast<Instruction>(inst_name)
(XLenVT GPR:$rs1),
GPR:$atn, log2sew, 1)>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
index 8376da5..748494f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
@@ -62,7 +62,7 @@ def UImm5Plus1AsmOperand : AsmOperandClass {
}
def uimm5_plus1 : RISCVOp, ImmLeaf<XLenVT,
- [{return (isUInt<5>(Imm) && (Imm != 0)) || (Imm == 32);}]> {
+ [{return Imm >= 1 && Imm <= 32;}]> {
let ParserMatchClass = UImm5Plus1AsmOperand;
let EncoderMethod = "getImmOpValueMinus1";
let DecoderMethod = "decodeUImmPlus1Operand<5>";
@@ -71,12 +71,12 @@ def uimm5_plus1 : RISCVOp, ImmLeaf<XLenVT,
int64_t Imm;
if (!MCOp.evaluateAsConstantImm(Imm))
return false;
- return (isUInt<5>(Imm) && (Imm != 0)) || (Imm == 32);
+ return Imm >= 1 && Imm <= 32;
}];
}
def uimm5ge6_plus1 : RISCVOp<XLenVT>, ImmLeaf<XLenVT,
- [{return (Imm >= 6) && (isUInt<5>(Imm) || (Imm == 32));}]> {
+ [{return Imm >= 6 && Imm <= 32;}]> {
let ParserMatchClass = UImmAsmOperand<5, "GE6Plus1">;
let EncoderMethod = "getImmOpValueMinus1";
let DecoderMethod = "decodeUImmPlus1OperandGE<5,6>";
@@ -85,7 +85,7 @@ def uimm5ge6_plus1 : RISCVOp<XLenVT>, ImmLeaf<XLenVT,
int64_t Imm;
if (!MCOp.evaluateAsConstantImm(Imm))
return false;
- return (Imm >= 6) && (isUInt<5>(Imm) || (Imm == 32));
+ return Imm >= 6 && Imm <= 32;
}];
}
@@ -817,6 +817,28 @@ class QCIRVInst48EJ<bits<2> func2, string opcodestr>
let Inst{6-0} = 0b0011111;
}
+class SFBQC_LI
+ : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev,
+ simm20_li:$imm), []> {
+ let hasSideEffects = 0;
+ let mayLoad = 0;
+ let mayStore = 0;
+ let Size = 8;
+ let Constraints = "$dst = $falsev";
+}
+
+class SFBQC_E_LI
+ : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev,
+ bare_simm32:$imm), []> {
+ let hasSideEffects = 0;
+ let mayLoad = 0;
+ let mayStore = 0;
+ let Size = 10;
+ let Constraints = "$dst = $falsev";
+}
+
//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
@@ -1308,6 +1330,11 @@ def PseudoQC_E_SH : PseudoStore<"qc.e.sh">;
def PseudoQC_E_SW : PseudoStore<"qc.e.sw">;
} // Predicates = [HasVendorXqcilo, IsRV32]
+let Predicates = [HasShortForwardBranchIALU] in {
+def PseudoCCQC_LI : SFBQC_LI;
+def PseudoCCQC_E_LI : SFBQC_E_LI;
+}
+
//===----------------------------------------------------------------------===//
// Code Gen Patterns
//===----------------------------------------------------------------------===//
@@ -1544,7 +1571,7 @@ def: Pat<(i32 (ctlz (not (i32 GPR:$rs1)))), (QC_CLO GPR:$rs1)>;
let Predicates = [HasVendorXqciint, IsRV32] in
def : Pat<(riscv_mileaveret_glue), (QC_C_MILEAVERET)>;
-let Predicates = [HasVendorXqcicm, NoShortForwardBranchOpt, IsRV32] in {
+let Predicates = [HasVendorXqcicm, NoShortForwardBranch, IsRV32] in {
def : QCIMVCCPat<SETEQ, QC_MVEQ>;
def : QCIMVCCPat<SETNE, QC_MVNE>;
def : QCIMVCCPat<SETLT, QC_MVLT>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index 5429c2a..3730f55 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -276,10 +276,9 @@ def XNOR : ALU_rr<0b0100000, 0b100, "xnor", Commutable=1>,
Sched<[WriteIALU, ReadIALU, ReadIALU]>;
} // Predicates = [HasStdExtZbbOrZbkb]
-let Predicates = [HasStdExtZbaOrP] in
+let Predicates = [HasStdExtZba] in {
def SH1ADD : ALU_rr<0b0010000, 0b010, "sh1add">,
Sched<[WriteSHXADD, ReadSHXADD, ReadSHXADD]>;
-let Predicates = [HasStdExtZba] in {
def SH2ADD : ALU_rr<0b0010000, 0b100, "sh2add">,
Sched<[WriteSHXADD, ReadSHXADD, ReadSHXADD]>;
def SH3ADD : ALU_rr<0b0010000, 0b110, "sh3add">,
@@ -351,32 +350,30 @@ def XPERM8 : ALU_rr<0b0010100, 0b100, "xperm8">,
Sched<[WriteXPERM, ReadXPERM, ReadXPERM]>;
} // Predicates = [HasStdExtZbkx]
-let Predicates = [HasStdExtZbbOrP], IsSignExtendingOpW = 1 in
+let Predicates = [HasStdExtZbb], IsSignExtendingOpW = 1 in {
def CLZ : Unary_r<0b011000000000, 0b001, "clz">,
Sched<[WriteCLZ, ReadCLZ]>;
-let Predicates = [HasStdExtZbb], IsSignExtendingOpW = 1 in {
def CTZ : Unary_r<0b011000000001, 0b001, "ctz">,
Sched<[WriteCTZ, ReadCTZ]>;
def CPOP : Unary_r<0b011000000010, 0b001, "cpop">,
Sched<[WriteCPOP, ReadCPOP]>;
} // Predicates = [HasStdExtZbb]
-let Predicates = [HasStdExtZbbOrP, IsRV64], IsSignExtendingOpW = 1 in
+let Predicates = [HasStdExtZbb, IsRV64], IsSignExtendingOpW = 1 in {
def CLZW : UnaryW_r<0b011000000000, 0b001, "clzw">,
Sched<[WriteCLZ32, ReadCLZ32]>;
-let Predicates = [HasStdExtZbb, IsRV64], IsSignExtendingOpW = 1 in {
def CTZW : UnaryW_r<0b011000000001, 0b001, "ctzw">,
Sched<[WriteCTZ32, ReadCTZ32]>;
def CPOPW : UnaryW_r<0b011000000010, 0b001, "cpopw">,
Sched<[WriteCPOP32, ReadCPOP32]>;
} // Predicates = [HasStdExtZbb, IsRV64]
-let Predicates = [HasStdExtZbbOrP], IsSignExtendingOpW = 1 in {
+let Predicates = [HasStdExtZbb], IsSignExtendingOpW = 1 in {
def SEXT_B : Unary_r<0b011000000100, 0b001, "sext.b">,
Sched<[WriteIALU, ReadIALU]>;
def SEXT_H : Unary_r<0b011000000101, 0b001, "sext.h">,
Sched<[WriteIALU, ReadIALU]>;
-} // Predicates = [HasStdExtZbbOrP]
+} // Predicates = [HasStdExtZbb]
let Predicates = [HasStdExtZbc] in {
def CLMULR : ALU_rr<0b0000101, 0b010, "clmulr", Commutable=1>,
@@ -390,7 +387,7 @@ def CLMULH : ALU_rr<0b0000101, 0b011, "clmulh", Commutable=1>,
Sched<[WriteCLMUL, ReadCLMUL, ReadCLMUL]>;
} // Predicates = [HasStdExtZbcOrZbkc]
-let Predicates = [HasStdExtZbbOrP] in {
+let Predicates = [HasStdExtZbb] in {
def MIN : ALU_rr<0b0000101, 0b100, "min", Commutable=1>,
Sched<[WriteIMinMax, ReadIMinMax, ReadIMinMax]>;
def MINU : ALU_rr<0b0000101, 0b101, "minu", Commutable=1>,
@@ -399,7 +396,7 @@ def MAX : ALU_rr<0b0000101, 0b110, "max", Commutable=1>,
Sched<[WriteIMinMax, ReadIMinMax, ReadIMinMax]>;
def MAXU : ALU_rr<0b0000101, 0b111, "maxu", Commutable=1>,
Sched<[WriteIMinMax, ReadIMinMax, ReadIMinMax]>;
-} // Predicates = [HasStdExtZbbOrP]
+} // Predicates = [HasStdExtZbb]
let Predicates = [HasStdExtZbkbOrP] in
def PACK : ALU_rr<0b0000100, 0b100, "pack">,
@@ -424,15 +421,15 @@ def ZEXT_H_RV64 : RVBUnaryR<0b0000100, 0b100, OPC_OP_32, "zext.h">,
Sched<[WriteIALU, ReadIALU]>;
} // Predicates = [HasStdExtZbb, IsRV64]
-let Predicates = [HasStdExtZbbOrZbkbOrP, IsRV32] in {
+let Predicates = [HasStdExtZbbOrZbkb, IsRV32] in {
def REV8_RV32 : Unary_r<0b011010011000, 0b101, "rev8">,
Sched<[WriteREV8, ReadREV8]>;
-} // Predicates = [HasStdExtZbbOrZbkbOrP, IsRV32]
+} // Predicates = [HasStdExtZbbOrZbkb, IsRV32]
-let Predicates = [HasStdExtZbbOrZbkbOrP, IsRV64] in {
+let Predicates = [HasStdExtZbbOrZbkb, IsRV64] in {
def REV8_RV64 : Unary_r<0b011010111000, 0b101, "rev8">,
Sched<[WriteREV8, ReadREV8]>;
-} // Predicates = [HasStdExtZbbOrZbkbOrP, IsRV64]
+} // Predicates = [HasStdExtZbbOrZbkb, IsRV64]
let Predicates = [HasStdExtZbb] in {
def ORC_B : Unary_r<0b001010000111, 0b101, "orc.b">,
@@ -599,20 +596,14 @@ def : PatGpr<riscv_zip, ZIP_RV32, i32>;
def : PatGpr<riscv_unzip, UNZIP_RV32, i32>;
} // Predicates = [HasStdExtZbkb, IsRV32]
-let Predicates = [HasStdExtZbbOrP] in {
-def : PatGpr<ctlz, CLZ>;
-}
-
let Predicates = [HasStdExtZbb] in {
+def : PatGpr<ctlz, CLZ>;
def : PatGpr<cttz, CTZ>;
def : PatGpr<ctpop, CPOP>;
} // Predicates = [HasStdExtZbb]
-let Predicates = [HasStdExtZbbOrP, IsRV64] in {
-def : PatGpr<riscv_clzw, CLZW>;
-}
-
let Predicates = [HasStdExtZbb, IsRV64] in {
+def : PatGpr<riscv_clzw, CLZW>;
def : PatGpr<riscv_ctzw, CTZW>;
def : Pat<(i64 (ctpop (i64 (zexti32 (i64 GPR:$rs1))))), (CPOPW GPR:$rs1)>;
@@ -620,22 +611,22 @@ def : Pat<(i64 (riscv_negw_max GPR:$rs1)),
(MAX GPR:$rs1, (XLenVT (SUBW (XLenVT X0), GPR:$rs1)))>;
} // Predicates = [HasStdExtZbb, IsRV64]
-let Predicates = [HasStdExtZbbOrP] in {
+let Predicates = [HasStdExtZbb] in {
def : Pat<(XLenVT (sext_inreg GPR:$rs1, i8)), (SEXT_B GPR:$rs1)>;
def : Pat<(XLenVT (sext_inreg GPR:$rs1, i16)), (SEXT_H GPR:$rs1)>;
} // Predicates = [HasStdExtZbb]
-let Predicates = [HasStdExtZbbOrP] in {
+let Predicates = [HasStdExtZbb] in {
def : PatGprGpr<smin, MIN>;
def : PatGprGpr<smax, MAX>;
def : PatGprGpr<umin, MINU>;
def : PatGprGpr<umax, MAXU>;
} // Predicates = [HasStdExtZbb]
-let Predicates = [HasStdExtZbbOrZbkbOrP, IsRV32] in
+let Predicates = [HasStdExtZbbOrZbkb, IsRV32] in
def : PatGpr<bswap, REV8_RV32, i32>;
-let Predicates = [HasStdExtZbbOrZbkbOrP, IsRV64] in
+let Predicates = [HasStdExtZbbOrZbkb, IsRV64] in
def : PatGpr<bswap, REV8_RV64, i64>;
let Predicates = [HasStdExtZbkb] in {
@@ -652,6 +643,9 @@ def : Pat<(and (or (shl GPR:$rs2, (XLenVT 8)),
def : Pat<(binop_allhusers<or> (shl GPR:$rs2, (XLenVT 8)),
zexti8:$rs1),
(PACKH zexti8:$rs1, GPR:$rs2)>;
+
+def : Pat<(shl (and GPR:$rs2, 0xFF), (XLenVT 8)),
+ (PACKH (XLenVT X0), GPR:$rs2)>;
} // Predicates = [HasStdExtZbkb]
let Predicates = [HasStdExtZbkb, IsRV32] in {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
index 1c6a5af..c172d17 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
@@ -90,7 +90,7 @@ defvar ZfhminDExts = [ZfhminDExt, ZhinxminZdinxExt, ZhinxminZdinx32Ext];
//===----------------------------------------------------------------------===//
let Predicates = [HasHalfFPLoadStoreMove] in {
-let canFoldAsLoad = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
def FLH : FPLoad_r<0b001, "flh", FPR16, WriteFLD16>;
// Operands for stores are in the order srcreg, base, offset rather than
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td
index a3203f2..4fc859f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td
@@ -47,6 +47,23 @@ let Predicates = [HasStdExtZilsd, IsRV32] in {
def PseudoLD_RV32 : PseudoLoad<"ld", GPRPairRV32>;
def PseudoSD_RV32 : PseudoStore<"sd", GPRPairRV32>;
+// Pseudo instructions for load/store optimization with 2 separate registers
+def PseudoLD_RV32_OPT :
+ Pseudo<(outs GPR:$rd1, GPR:$rd2),
+ (ins GPR:$rs1, simm12_lo:$imm12), [], "", ""> {
+ let hasSideEffects = 0;
+ let mayLoad = 1;
+ let mayStore = 0;
+}
+
+def PseudoSD_RV32_OPT :
+ Pseudo<(outs),
+ (ins GPR:$rs1, GPR:$rs2, GPR:$rs3, simm12_lo:$imm12), [], "", ""> {
+ let hasSideEffects = 0;
+ let mayLoad = 0;
+ let mayStore = 1;
+}
+
def : InstAlias<"ld $rd, (${rs1})", (LD_RV32 GPRPairRV32:$rd, GPR:$rs1, 0), 0>;
def : InstAlias<"sd $rs2, (${rs1})", (SD_RV32 GPRPairRV32:$rs2, GPR:$rs1, 0), 0>;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
index ffb2ac0..e24e4a3 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
@@ -783,4 +783,22 @@ let Predicates = [HasStdExtZvfbfa] in {
TAIL_AGNOSTIC)>;
}
}
+
+ defm : VPatBinaryFPSDNode_VV_VF_RM<any_fadd, "PseudoVFADD_ALT",
+ isSEWAware=1, isBF16=1>;
+ defm : VPatBinaryFPSDNode_VV_VF_RM<any_fsub, "PseudoVFSUB_ALT",
+ isSEWAware=1, isBF16=1>;
+ defm : VPatBinaryFPSDNode_VV_VF_RM<any_fmul, "PseudoVFMUL_ALT",
+ isSEWAware=1, isBF16=1>;
+ defm : VPatBinaryFPSDNode_R_VF_RM<any_fsub, "PseudoVFRSUB_ALT",
+ isSEWAware=1, isBF16=1>;
+
+ defm : VPatBinaryFPVL_VV_VF_RM<any_riscv_fadd_vl, "PseudoVFADD_ALT",
+ isSEWAware=1, isBF16=1>;
+ defm : VPatBinaryFPVL_VV_VF_RM<any_riscv_fsub_vl, "PseudoVFSUB_ALT",
+ isSEWAware=1, isBF16=1>;
+ defm : VPatBinaryFPVL_VV_VF_RM<any_riscv_fmul_vl, "PseudoVFMUL_ALT",
+ isSEWAware=1, isBF16=1>;
+ defm : VPatBinaryFPVL_R_VF_RM<any_riscv_fsub_vl, "PseudoVFRSUB_ALT",
+ isSEWAware=1, isBF16=1>;
} // Predicates = [HasStdExtZvfbfa]
diff --git a/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
index 115a96e..a22ab6b 100644
--- a/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
@@ -11,6 +11,9 @@
// paired instruction, leveraging hardware support for paired memory accesses.
// Much of the pairing logic is adapted from the AArch64LoadStoreOpt pass.
//
+// Post-allocation Zilsd decomposition: Fixes invalid LD/SD instructions if
+// register allocation didn't provide suitable consecutive registers.
+//
// NOTE: The AArch64LoadStoreOpt pass performs additional optimizations such as
// merging zero store instructions, promoting loads that read directly from a
// preceding store, and merging base register updates with load/store
@@ -23,6 +26,7 @@
#include "RISCV.h"
#include "RISCVTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/MC/TargetRegistry.h"
@@ -38,6 +42,8 @@ using namespace llvm;
// pairs.
static cl::opt<unsigned> LdStLimit("riscv-load-store-scan-limit", cl::init(128),
cl::Hidden);
+STATISTIC(NumLD2LW, "Number of LD instructions split back to LW");
+STATISTIC(NumSD2SW, "Number of SD instructions split back to SW");
namespace {
@@ -75,6 +81,13 @@ struct RISCVLoadStoreOpt : public MachineFunctionPass {
mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Paired, bool MergeForward);
+ // Post reg-alloc zilsd part
+ bool fixInvalidRegPairOp(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI);
+ bool isValidZilsdRegPair(Register First, Register Second);
+ void splitLdSdIntoTwo(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI, bool IsLoad);
+
private:
AliasAnalysis *AA;
MachineRegisterInfo *MRI;
@@ -92,8 +105,6 @@ bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
if (skipFunction(Fn.getFunction()))
return false;
const RISCVSubtarget &Subtarget = Fn.getSubtarget<RISCVSubtarget>();
- if (!Subtarget.useMIPSLoadStorePairs())
- return false;
bool MadeChange = false;
TII = Subtarget.getInstrInfo();
@@ -103,18 +114,34 @@ bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
ModifiedRegUnits.init(*TRI);
UsedRegUnits.init(*TRI);
- for (MachineBasicBlock &MBB : Fn) {
- LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
+ if (Subtarget.useMIPSLoadStorePairs()) {
+ for (MachineBasicBlock &MBB : Fn) {
+ LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
+
+ for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ MBBI != E;) {
+ if (TII->isPairableLdStInstOpc(MBBI->getOpcode()) &&
+ tryToPairLdStInst(MBBI))
+ MadeChange = true;
+ else
+ ++MBBI;
+ }
+ }
+ }
- for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
- MBBI != E;) {
- if (TII->isPairableLdStInstOpc(MBBI->getOpcode()) &&
- tryToPairLdStInst(MBBI))
- MadeChange = true;
- else
- ++MBBI;
+ if (!Subtarget.is64Bit() && Subtarget.hasStdExtZilsd()) {
+ for (auto &MBB : Fn) {
+ for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E;) {
+ if (fixInvalidRegPairOp(MBB, MBBI)) {
+ MadeChange = true;
+ // Iterator was updated by fixInvalidRegPairOp
+ } else {
+ ++MBBI;
+ }
+ }
}
}
+
return MadeChange;
}
@@ -395,6 +422,187 @@ RISCVLoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
return NextI;
}
+//===----------------------------------------------------------------------===//
+// Post reg-alloc zilsd pass implementation
+//===----------------------------------------------------------------------===//
+
+bool RISCVLoadStoreOpt::isValidZilsdRegPair(Register First, Register Second) {
+ // Special case: First register can not be zero unless both registers are
+ // zeros.
+ // Spec says: LD instructions with destination x0 are processed as any other
+ // load, but the result is discarded entirely and x1 is not written. If using
+ // x0 as src of SD, the entire 64-bit operand is zero — i.e., register x1 is
+ // not accessed.
+ if (First == RISCV::X0)
+ return Second == RISCV::X0;
+
+ // Check if registers form a valid even/odd pair for Zilsd
+ unsigned FirstNum = TRI->getEncodingValue(First);
+ unsigned SecondNum = TRI->getEncodingValue(Second);
+
+ // Must be consecutive and first must be even
+ return (FirstNum % 2 == 0) && (SecondNum == FirstNum + 1);
+}
+
+void RISCVLoadStoreOpt::splitLdSdIntoTwo(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ bool IsLoad) {
+ MachineInstr *MI = &*MBBI;
+ DebugLoc DL = MI->getDebugLoc();
+
+ const MachineOperand &FirstOp = MI->getOperand(0);
+ const MachineOperand &SecondOp = MI->getOperand(1);
+ const MachineOperand &BaseOp = MI->getOperand(2);
+ Register FirstReg = FirstOp.getReg();
+ Register SecondReg = SecondOp.getReg();
+ Register BaseReg = BaseOp.getReg();
+
+ // Handle both immediate and symbolic operands for offset
+ const MachineOperand &OffsetOp = MI->getOperand(3);
+ int BaseOffset;
+ if (OffsetOp.isImm())
+ BaseOffset = OffsetOp.getImm();
+ else
+ // For symbolic operands, extract the embedded offset
+ BaseOffset = OffsetOp.getOffset();
+
+ unsigned Opc = IsLoad ? RISCV::LW : RISCV::SW;
+ MachineInstrBuilder MIB1, MIB2;
+
+ // Create two separate instructions
+ if (IsLoad) {
+ // It's possible that first register is same as base register, when we split
+ // it becomes incorrect because base register is overwritten, e.g.
+ // X10, X13 = PseudoLD_RV32_OPT killed X10, 0
+ // =>
+ // X10 = LW X10, 0
+ // X13 = LW killed X10, 4
+ // we can just switch the order to resolve that:
+ // X13 = LW X10, 4
+ // X10 = LW killed X10, 0
+ if (FirstReg == BaseReg) {
+ MIB2 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
+ .addReg(SecondReg,
+ RegState::Define | getDeadRegState(SecondOp.isDead()))
+ .addReg(BaseReg);
+ MIB1 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
+ .addReg(FirstReg,
+ RegState::Define | getDeadRegState(FirstOp.isDead()))
+ .addReg(BaseReg, getKillRegState(BaseOp.isKill()));
+
+ } else {
+ MIB1 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
+ .addReg(FirstReg,
+ RegState::Define | getDeadRegState(FirstOp.isDead()))
+ .addReg(BaseReg);
+
+ MIB2 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
+ .addReg(SecondReg,
+ RegState::Define | getDeadRegState(SecondOp.isDead()))
+ .addReg(BaseReg, getKillRegState(BaseOp.isKill()));
+ }
+
+ ++NumLD2LW;
+ LLVM_DEBUG(dbgs() << "Split LD back to two LW instructions\n");
+ } else {
+ assert(
+ FirstReg != SecondReg &&
+ "First register and second register is impossible to be same register");
+ MIB1 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
+ .addReg(FirstReg, getKillRegState(FirstOp.isKill()))
+ .addReg(BaseReg);
+
+ MIB2 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
+ .addReg(SecondReg, getKillRegState(SecondOp.isKill()))
+ .addReg(BaseReg, getKillRegState(BaseOp.isKill()));
+
+ ++NumSD2SW;
+ LLVM_DEBUG(dbgs() << "Split SD back to two SW instructions\n");
+ }
+
+ // Add offset operands - preserve symbolic references
+ MIB1.add(OffsetOp);
+ if (OffsetOp.isImm())
+ MIB2.addImm(BaseOffset + 4);
+ else if (OffsetOp.isGlobal())
+ MIB2.addGlobalAddress(OffsetOp.getGlobal(), BaseOffset + 4,
+ OffsetOp.getTargetFlags());
+ else if (OffsetOp.isCPI())
+ MIB2.addConstantPoolIndex(OffsetOp.getIndex(), BaseOffset + 4,
+ OffsetOp.getTargetFlags());
+ else if (OffsetOp.isBlockAddress())
+ MIB2.addBlockAddress(OffsetOp.getBlockAddress(), BaseOffset + 4,
+ OffsetOp.getTargetFlags());
+
+ // Copy memory operands if the original instruction had them
+ // FIXME: This is overly conservative; the new instruction accesses 4 bytes,
+ // not 8.
+ MIB1.cloneMemRefs(*MI);
+ MIB2.cloneMemRefs(*MI);
+
+ // Remove the original paired instruction and update iterator
+ MBBI = MBB.erase(MBBI);
+}
+
+bool RISCVLoadStoreOpt::fixInvalidRegPairOp(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI) {
+ MachineInstr *MI = &*MBBI;
+ unsigned Opcode = MI->getOpcode();
+
+ // Check if this is a Zilsd pseudo that needs fixing
+ if (Opcode != RISCV::PseudoLD_RV32_OPT && Opcode != RISCV::PseudoSD_RV32_OPT)
+ return false;
+
+ bool IsLoad = Opcode == RISCV::PseudoLD_RV32_OPT;
+
+ const MachineOperand &FirstOp = MI->getOperand(0);
+ const MachineOperand &SecondOp = MI->getOperand(1);
+ Register FirstReg = FirstOp.getReg();
+ Register SecondReg = SecondOp.getReg();
+
+ if (!isValidZilsdRegPair(FirstReg, SecondReg)) {
+ // Need to split back into two instructions
+ splitLdSdIntoTwo(MBB, MBBI, IsLoad);
+ return true;
+ }
+
+ // Registers are valid, convert to real LD/SD instruction
+ const MachineOperand &BaseOp = MI->getOperand(2);
+ Register BaseReg = BaseOp.getReg();
+ DebugLoc DL = MI->getDebugLoc();
+ // Handle both immediate and symbolic operands for offset
+ const MachineOperand &OffsetOp = MI->getOperand(3);
+
+ unsigned RealOpc = IsLoad ? RISCV::LD_RV32 : RISCV::SD_RV32;
+
+ // Create register pair from the two individual registers
+ unsigned RegPair = TRI->getMatchingSuperReg(FirstReg, RISCV::sub_gpr_even,
+ &RISCV::GPRPairRegClass);
+ // Create the real LD/SD instruction with register pair
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(RealOpc));
+
+ if (IsLoad) {
+ // For LD, the register pair is the destination
+ MIB.addReg(RegPair, RegState::Define | getDeadRegState(FirstOp.isDead() &&
+ SecondOp.isDead()));
+ } else {
+ // For SD, the register pair is the source
+ MIB.addReg(RegPair, getKillRegState(FirstOp.isKill() && SecondOp.isKill()));
+ }
+
+ MIB.addReg(BaseReg, getKillRegState(BaseOp.isKill()))
+ .add(OffsetOp)
+ .cloneMemRefs(*MI);
+
+ LLVM_DEBUG(dbgs() << "Converted pseudo to real instruction: " << *MIB
+ << "\n");
+
+ // Remove the pseudo instruction and update iterator
+ MBBI = MBB.erase(MBBI);
+
+ return true;
+}
+
// Returns an instance of the Load / Store Optimization pass.
FunctionPass *llvm::createRISCVLoadStoreOptPass() {
return new RISCVLoadStoreOpt();
diff --git a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
index 87f0c8f..f3adac8 100644
--- a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
@@ -83,13 +83,14 @@ INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, DEBUG_TYPE,
// 3) The offset value in the Global Address or Constant Pool is 0.
bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi,
MachineInstr *&Lo) {
- if (Hi.getOpcode() != RISCV::LUI && Hi.getOpcode() != RISCV::AUIPC &&
- Hi.getOpcode() != RISCV::PseudoMovAddr)
+ auto HiOpc = Hi.getOpcode();
+ if (HiOpc != RISCV::LUI && HiOpc != RISCV::AUIPC &&
+ HiOpc != RISCV::PseudoMovAddr)
return false;
const MachineOperand &HiOp1 = Hi.getOperand(1);
unsigned ExpectedFlags =
- Hi.getOpcode() == RISCV::AUIPC ? RISCVII::MO_PCREL_HI : RISCVII::MO_HI;
+ HiOpc == RISCV::AUIPC ? RISCVII::MO_PCREL_HI : RISCVII::MO_HI;
if (HiOp1.getTargetFlags() != ExpectedFlags)
return false;
@@ -97,7 +98,7 @@ bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi,
HiOp1.getOffset() != 0)
return false;
- if (Hi.getOpcode() == RISCV::PseudoMovAddr) {
+ if (HiOpc == RISCV::PseudoMovAddr) {
// Most of the code should handle it correctly without modification by
// setting Lo and Hi both point to PseudoMovAddr
Lo = &Hi;
@@ -112,13 +113,13 @@ bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi,
}
const MachineOperand &LoOp2 = Lo->getOperand(2);
- if (Hi.getOpcode() == RISCV::LUI || Hi.getOpcode() == RISCV::PseudoMovAddr) {
+ if (HiOpc == RISCV::LUI || HiOpc == RISCV::PseudoMovAddr) {
if (LoOp2.getTargetFlags() != RISCVII::MO_LO ||
!(LoOp2.isGlobal() || LoOp2.isCPI() || LoOp2.isBlockAddress()) ||
LoOp2.getOffset() != 0)
return false;
} else {
- assert(Hi.getOpcode() == RISCV::AUIPC);
+ assert(HiOpc == RISCV::AUIPC);
if (LoOp2.getTargetFlags() != RISCVII::MO_PCREL_LO ||
LoOp2.getType() != MachineOperand::MO_MCSymbol)
return false;
@@ -148,7 +149,8 @@ bool RISCVMergeBaseOffsetOpt::foldOffset(MachineInstr &Hi, MachineInstr &Lo,
// If Hi is an AUIPC, don't fold the offset if it is outside the bounds of
// the global object. The object may be within 2GB of the PC, but addresses
// outside of the object might not be.
- if (Hi.getOpcode() == RISCV::AUIPC && Hi.getOperand(1).isGlobal()) {
+ auto HiOpc = Hi.getOpcode();
+ if (HiOpc == RISCV::AUIPC && Hi.getOperand(1).isGlobal()) {
const GlobalValue *GV = Hi.getOperand(1).getGlobal();
Type *Ty = GV->getValueType();
if (!Ty->isSized() || Offset < 0 ||
@@ -158,12 +160,13 @@ bool RISCVMergeBaseOffsetOpt::foldOffset(MachineInstr &Hi, MachineInstr &Lo,
// Put the offset back in Hi and the Lo
Hi.getOperand(1).setOffset(Offset);
- if (Hi.getOpcode() != RISCV::AUIPC)
+ if (HiOpc != RISCV::AUIPC)
Lo.getOperand(2).setOffset(Offset);
// Delete the tail instruction.
- MRI->constrainRegClass(Lo.getOperand(0).getReg(),
- MRI->getRegClass(Tail.getOperand(0).getReg()));
- MRI->replaceRegWith(Tail.getOperand(0).getReg(), Lo.getOperand(0).getReg());
+ Register LoOp0Reg = Lo.getOperand(0).getReg();
+ Register TailOp0Reg = Tail.getOperand(0).getReg();
+ MRI->constrainRegClass(LoOp0Reg, MRI->getRegClass(TailOp0Reg));
+ MRI->replaceRegWith(TailOp0Reg, LoOp0Reg);
Tail.eraseFromParent();
LLVM_DEBUG(dbgs() << " Merged offset " << Offset << " into base.\n"
<< " " << Hi << " " << Lo;);
@@ -204,8 +207,8 @@ bool RISCVMergeBaseOffsetOpt::foldLargeOffset(MachineInstr &Hi,
return false;
// This can point to an ADDI(W) or a LUI:
MachineInstr &OffsetTail = *MRI->getVRegDef(Reg);
- if (OffsetTail.getOpcode() == RISCV::ADDI ||
- OffsetTail.getOpcode() == RISCV::ADDIW) {
+ auto OffsetTailOpc = OffsetTail.getOpcode();
+ if (OffsetTailOpc == RISCV::ADDI || OffsetTailOpc == RISCV::ADDIW) {
// The offset value has non zero bits in both %hi and %lo parts.
// Detect an ADDI that feeds from a LUI instruction.
MachineOperand &AddiImmOp = OffsetTail.getOperand(2);
@@ -232,7 +235,7 @@ bool RISCVMergeBaseOffsetOpt::foldLargeOffset(MachineInstr &Hi,
int64_t Offset = SignExtend64<32>(LuiImmOp.getImm() << 12);
Offset += OffLo;
// RV32 ignores the upper 32 bits. ADDIW sign extends the result.
- if (!ST->is64Bit() || OffsetTail.getOpcode() == RISCV::ADDIW)
+ if (!ST->is64Bit() || OffsetTailOpc == RISCV::ADDIW)
Offset = SignExtend64<32>(Offset);
// We can only fold simm32 offsets.
if (!isInt<32>(Offset))
@@ -244,7 +247,7 @@ bool RISCVMergeBaseOffsetOpt::foldLargeOffset(MachineInstr &Hi,
OffsetTail.eraseFromParent();
OffsetLui.eraseFromParent();
return true;
- } else if (OffsetTail.getOpcode() == RISCV::LUI) {
+ } else if (OffsetTailOpc == RISCV::LUI) {
// The offset value has all zero bits in the lower 12 bits. Only LUI
// exists.
LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail);
@@ -503,14 +506,15 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,
Hi.getOperand(1).setOffset(NewOffset);
MachineOperand &ImmOp = Lo.getOperand(2);
+ auto HiOpc = Hi.getOpcode();
// Expand PseudoMovAddr into LUI
- if (Hi.getOpcode() == RISCV::PseudoMovAddr) {
+ if (HiOpc == RISCV::PseudoMovAddr) {
auto *TII = ST->getInstrInfo();
Hi.setDesc(TII->get(RISCV::LUI));
Hi.removeOperand(2);
}
- if (Hi.getOpcode() != RISCV::AUIPC)
+ if (HiOpc != RISCV::AUIPC)
ImmOp.setOffset(NewOffset);
// Update the immediate in the load/store instructions to add the offset.
diff --git a/llvm/lib/Target/RISCV/RISCVPassRegistry.def b/llvm/lib/Target/RISCV/RISCVPassRegistry.def
new file mode 100644
index 0000000..29ccf2c
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVPassRegistry.def
@@ -0,0 +1,20 @@
+//===- RISCVPassRegistry.def - Registry of RISC-V passes --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is used as the registry of passes that are part of the RISC-V
+// backend.
+//
+//===----------------------------------------------------------------------===//
+
+// NOTE: NO INCLUDE GUARD DESIRED!
+
+#ifndef FUNCTION_PASS
+#define FUNCTION_PASS(NAME, CREATE_PASS)
+#endif
+FUNCTION_PASS("riscv-codegenprepare", RISCVCodeGenPreparePass(this))
+#undef FUNCTION_PASS
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index e86431f..5becfd2 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -141,7 +141,7 @@ def ROCKET : RISCVTuneProcessorModel<"rocket",
RocketModel>;
defvar SiFive7TuneFeatures = [TuneSiFive7, TuneNoDefaultUnroll,
- TuneShortForwardBranchOpt,
+ TuneShortForwardBranchIALU,
TunePostRAScheduler];
def SIFIVE_7 : RISCVTuneProcessorModel<"sifive-7-series",
SiFive7Model, SiFive7TuneFeatures>;
@@ -633,6 +633,13 @@ def TENSTORRENT_ASCALON_D8 : RISCVProcessorModel<"tt-ascalon-d8",
FeatureUnalignedVectorMem]),
[TuneNoDefaultUnroll,
TuneNLogNVRGather,
+ TuneOptimizedNF2SegmentLoadStore,
+ TuneOptimizedNF3SegmentLoadStore,
+ TuneOptimizedNF4SegmentLoadStore,
+ TuneOptimizedNF5SegmentLoadStore,
+ TuneOptimizedNF6SegmentLoadStore,
+ TuneOptimizedNF7SegmentLoadStore,
+ TuneOptimizedNF8SegmentLoadStore,
TuneOptimizedZeroStrideLoad,
TunePostRAScheduler]>;
@@ -798,7 +805,7 @@ def ANDES_AX25 : RISCVProcessorModel<"andes-ax25",
defvar Andes45TuneFeatures = [TuneAndes45,
TuneNoDefaultUnroll,
- TuneShortForwardBranchOpt,
+ TuneShortForwardBranchIALU,
TunePostRAScheduler];
def ANDES_45 : RISCVTuneProcessorModel<"andes-45-series",
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 84bb294..d802d19 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -864,6 +864,46 @@ bool RISCVRegisterInfo::getRegAllocationHints(
const MachineRegisterInfo *MRI = &MF.getRegInfo();
auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
+ // Handle RegPairEven/RegPairOdd hints for Zilsd register pairs
+ std::pair<unsigned, Register> Hint = MRI->getRegAllocationHint(VirtReg);
+ unsigned HintType = Hint.first;
+ Register Partner = Hint.second;
+
+ MCRegister TargetReg;
+ if (HintType == RISCVRI::RegPairEven || HintType == RISCVRI::RegPairOdd) {
+ // Check if we want the even or odd register of a consecutive pair
+ bool WantOdd = (HintType == RISCVRI::RegPairOdd);
+
+ // First priority: Check if partner is already allocated
+ if (Partner.isVirtual() && VRM && VRM->hasPhys(Partner)) {
+ MCRegister PartnerPhys = VRM->getPhys(Partner);
+ // Calculate the exact register we need for consecutive pairing
+ TargetReg = PartnerPhys.id() + (WantOdd ? 1 : -1);
+
+ // Verify it's valid and available
+ if (RISCV::GPRRegClass.contains(TargetReg) &&
+ is_contained(Order, TargetReg))
+ Hints.push_back(TargetReg.id());
+ }
+
+ // Second priority: Try to find consecutive register pairs in the allocation
+ // order
+ for (MCPhysReg PhysReg : Order) {
+ // Don't add the hint if we already added above.
+ if (TargetReg == PhysReg)
+ continue;
+
+ unsigned RegNum = getEncodingValue(PhysReg);
+ // Check if this register matches the even/odd requirement
+ bool IsOdd = (RegNum % 2 != 0);
+
+ // Don't provide hints that are paired to a reserved register.
+ MCRegister Paired = PhysReg + (IsOdd ? -1 : 1);
+ if (WantOdd == IsOdd && !MRI->isReserved(Paired))
+ Hints.push_back(PhysReg);
+ }
+ }
+
bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints(
VirtReg, Order, Hints, MF, VRM, Matrix);
@@ -1005,6 +1045,35 @@ bool RISCVRegisterInfo::getRegAllocationHints(
return BaseImplRetVal;
}
+void RISCVRegisterInfo::updateRegAllocHint(Register Reg, Register NewReg,
+ MachineFunction &MF) const {
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ std::pair<unsigned, Register> Hint = MRI->getRegAllocationHint(Reg);
+
+ // Handle RegPairEven/RegPairOdd hints for Zilsd register pairs
+ if ((Hint.first == RISCVRI::RegPairOdd ||
+ Hint.first == RISCVRI::RegPairEven) &&
+ Hint.second.isVirtual()) {
+ // If 'Reg' is one of the even/odd register pair and it's now changed
+ // (e.g. coalesced) into a different register, the other register of the
+ // pair allocation hint must be updated to reflect the relationship change.
+ Register Partner = Hint.second;
+ std::pair<unsigned, Register> PartnerHint =
+ MRI->getRegAllocationHint(Partner);
+
+ // Make sure partner still points to us
+ if (PartnerHint.second == Reg) {
+ // Update partner to point to NewReg instead of Reg
+ MRI->setRegAllocationHint(Partner, PartnerHint.first, NewReg);
+
+ // If NewReg is virtual, set up the reciprocal hint
+ // NewReg takes over Reg's role, so it gets the SAME hint type as Reg
+ if (NewReg.isVirtual())
+ MRI->setRegAllocationHint(NewReg, Hint.first, Partner);
+ }
+ }
+}
+
Register
RISCVRegisterInfo::findVRegWithEncoding(const TargetRegisterClass &RegClass,
uint16_t Encoding) const {
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
index 67726db..f29f85e 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
@@ -37,6 +37,13 @@ enum : uint8_t {
NFShiftMask = 0b111 << NFShift,
};
+/// Register allocation hints for Zilsd register pairs
+enum {
+ // Used for Zilsd LD/SD register pairs
+ RegPairOdd = 1,
+ RegPairEven = 2,
+};
+
/// \returns the IsVRegClass for the register class.
static inline bool isVRegClass(uint8_t TSFlags) {
return (TSFlags & IsVRegClassShiftMask) >> IsVRegClassShift;
@@ -143,6 +150,9 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
const MachineFunction &MF, const VirtRegMap *VRM,
const LiveRegMatrix *Matrix) const override;
+ void updateRegAllocHint(Register Reg, Register NewReg,
+ MachineFunction &MF) const override;
+
Register findVRegWithEncoding(const TargetRegisterClass &RegClass,
uint16_t Encoding) const;
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index 6605a5c..f354793 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -222,6 +222,14 @@ def XLenFVT : ValueTypeByHwMode<[RV64],
[f64]>;
def XLenPairFVT : ValueTypeByHwMode<[RV32],
[f64]>;
+
+// P extension
+def XLenVecI8VT : ValueTypeByHwMode<[RV32, RV64],
+ [v4i8, v8i8]>;
+def XLenVecI16VT : ValueTypeByHwMode<[RV32, RV64],
+ [v2i16, v4i16]>;
+def XLenVecI32VT : ValueTypeByHwMode<[RV64],
+ [v2i32]>;
def XLenRI : RegInfoByHwMode<
[RV32, RV64],
[RegInfo<32,32,32>, RegInfo<64,64,64>]>;
@@ -238,7 +246,9 @@ class RISCVRegisterClass<list<ValueType> regTypes, int align, dag regList>
}
class GPRRegisterClass<dag regList>
- : RISCVRegisterClass<[XLenVT, XLenFVT], 32, regList> {
+ : RISCVRegisterClass<[XLenVT, XLenFVT,
+ // P extension packed vector types:
+ XLenVecI8VT, XLenVecI16VT, XLenVecI32VT], 32, regList> {
let RegInfos = XLenRI;
}
@@ -803,6 +813,7 @@ def VMV0 : VReg<VMaskVTs, (add V0), 1>;
// The register class is added for inline assembly for vector mask types.
def VM : VReg<VMaskVTs, (add VR), 1>;
+def VMNoV0 : VReg<VMaskVTs, (sub VR, V0), 1>;
defvar VTupM1N2VTs = [riscv_nxv8i8x2, riscv_nxv4i8x2, riscv_nxv2i8x2, riscv_nxv1i8x2];
defvar VTupM1N3VTs = [riscv_nxv8i8x3, riscv_nxv4i8x3, riscv_nxv2i8x3, riscv_nxv1i8x3];
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 36a2f46..f8a7013 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -562,7 +562,7 @@ multiclass SiFive7WriteResBase<int VLEN,
// resource, we do not need to use LMULSEWXXX constructors. However, we do
// use the SEW from the name to determine the number of Cycles.
- foreach mx = SchedMxList in {
+ foreach mx = SchedMxListEEW8 in {
defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 8, VLEN>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
@@ -582,10 +582,8 @@ multiclass SiFive7WriteResBase<int VLEN,
defm : LMULWriteResMX<"WriteVSTOX8", [VCQ, VS], mx, IsWorstCase>;
}
}
- // TODO: The MxLists need to be filtered by EEW. We only need to support
- // LMUL >= SEW_min/ELEN. Here, the smallest EEW prevents us from having MF8
- // since LMUL >= 16/64.
- foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in {
+
+ foreach mx = SchedMxListEEW16 in {
defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 16, VLEN>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
@@ -605,7 +603,7 @@ multiclass SiFive7WriteResBase<int VLEN,
defm : LMULWriteResMX<"WriteVSTOX16", [VCQ, VS], mx, IsWorstCase>;
}
}
- foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in {
+ foreach mx = SchedMxListEEW32 in {
defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 32, VLEN>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
@@ -625,7 +623,7 @@ multiclass SiFive7WriteResBase<int VLEN,
defm : LMULWriteResMX<"WriteVSTOX32", [VCQ, VS], mx, IsWorstCase>;
}
}
- foreach mx = ["M1", "M2", "M4", "M8"] in {
+ foreach mx = SchedMxListEEW64 in {
defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 64, VLEN>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
index 41071b2..1cbb6db 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
@@ -104,6 +104,11 @@ class Get461018Latency<string mx> {
int c = GetLMULValue<[/*MF8=*/4, /*MF4=*/4, /*MF2=*/4, /*M1=*/4, /*M2=*/6, /*M4=*/10, /*M8=*/18], mx>.c;
}
+// Used for: FP FMA operations, complex FP ops
+class Get6678Latency<string mx> {
+ int c = GetLMULValue<[/*MF8=*/6, /*MF4=*/6, /*MF2=*/6, /*M1=*/6, /*M2=*/6, /*M4=*/7, /*M8=*/8], mx>.c;
+}
+
//===----------------------------------------------------------------------===//
class SMX60IsWorstCaseMX<string mx, list<string> MxList> {
@@ -120,6 +125,33 @@ class SMX60IsWorstCaseMXSEW<string mx, int sew, list<string> MxList, bit isF = 0
defvar SMX60VLEN = 256;
defvar SMX60DLEN = !div(SMX60VLEN, 2);
+class SMX60GetLMulCycles<string mx> {
+ int c = !cond(
+ !eq(mx, "M1") : 1,
+ !eq(mx, "M2") : 2,
+ !eq(mx, "M4") : 4,
+ !eq(mx, "M8") : 8,
+ !eq(mx, "MF2") : 1,
+ !eq(mx, "MF4") : 1,
+ !eq(mx, "MF8") : 1
+ );
+}
+
+class SMX60GetVLMAX<string mx, int sew> {
+ defvar LMUL = SMX60GetLMulCycles<mx>.c;
+ int val = !cond(
+ !eq(mx, "MF2") : !div(!div(SMX60VLEN, 2), sew),
+ !eq(mx, "MF4") : !div(!div(SMX60VLEN, 4), sew),
+ !eq(mx, "MF8") : !div(!div(SMX60VLEN, 8), sew),
+ true: !div(!mul(SMX60VLEN, LMUL), sew)
+ );
+}
+
+// Latency for segmented loads and stores are calculated as vl * nf.
+class SMX60SegmentedLdStCycles<string mx, int sew, int nf> {
+ int c = !mul(SMX60GetVLMAX<mx, sew>.val, nf);
+}
+
def SpacemitX60Model : SchedMachineModel {
let IssueWidth = 2; // dual-issue
let MicroOpBufferSize = 0; // in-order
@@ -362,23 +394,43 @@ foreach mx = SchedMxList in {
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c;
// Unit-stride loads and stores
- defm "" : LMULWriteResMX<"WriteVLDE", [SMX60_VLS], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLDFF", [SMX60_VLS], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSTE", [SMX60_VLS], mx, IsWorstCase>;
+ defvar VLDELatAndOcc = ConstValueUntilLMULThenDoubleBase<"M2", 3, 4, mx>.c;
+ let Latency = VLDELatAndOcc, ReleaseAtCycles = [VLDELatAndOcc] in {
+ defm "" : LMULWriteResMX<"WriteVLDE", [SMX60_VLS], mx, IsWorstCase>;
+ }
+ defvar VSTELatAndOcc = GetLMULValue<[2, 2, 2, 3, 4, 8, 19], mx>.c;
+ let Latency = VSTELatAndOcc, ReleaseAtCycles = [VSTELatAndOcc] in {
+ defm "" : LMULWriteResMX<"WriteVSTE", [SMX60_VLS], mx, IsWorstCase>;
+ }
+ defvar VLDFFLatAndOcc = GetLMULValue<[4, 4, 4, 5, 7, 11, 19], mx>.c;
+ let Latency = VLDFFLatAndOcc, ReleaseAtCycles = [VLDFFLatAndOcc] in {
+ defm "" : LMULWriteResMX<"WriteVLDFF", [SMX60_VLS], mx, IsWorstCase>;
+ }
// Mask loads and stores
- defm "" : LMULWriteResMX<"WriteVLDM", [SMX60_VLS], mx, IsWorstCase=!eq(mx, "M1")>;
- defm "" : LMULWriteResMX<"WriteVSTM", [SMX60_VLS], mx, IsWorstCase=!eq(mx, "M1")>;
+ let ReleaseAtCycles = [2] in {
+ defm "" : LMULWriteResMX<"WriteVLDM", [SMX60_VLS], mx, IsWorstCase>;
+ }
+ let Latency = 2, ReleaseAtCycles = [2] in {
+ defm "" : LMULWriteResMX<"WriteVSTM", [SMX60_VLS], mx, IsWorstCase>;
+ }
// Strided and indexed loads and stores
foreach eew = [8, 16, 32, 64] in {
- defm "" : LMULWriteResMX<"WriteVLDS" # eew, [SMX60_VLS], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLDUX" # eew, [SMX60_VLS], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLDOX" # eew, [SMX60_VLS], mx, IsWorstCase>;
+ defvar StridedLdStLatAndOcc = SMX60GetVLMAX<mx, eew>.val;
+ let Latency = StridedLdStLatAndOcc, ReleaseAtCycles = [StridedLdStLatAndOcc] in {
+ defm "" : LMULWriteResMX<"WriteVLDS" # eew, [SMX60_VLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTS" # eew, [SMX60_VLS], mx, IsWorstCase>;
+ }
+
+ defvar IndexedLdStLatAndOcc = !div(SMX60GetVLMAX<mx, eew>.val, 2);
+ let Latency = IndexedLdStLatAndOcc, ReleaseAtCycles = [IndexedLdStLatAndOcc] in {
+ defm "" : LMULWriteResMX<"WriteVLDUX" # eew, [SMX60_VLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDOX" # eew, [SMX60_VLS], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSTS" # eew, [SMX60_VLS], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSTUX" # eew, [SMX60_VLS], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSTOX" # eew, [SMX60_VLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTUX" # eew, [SMX60_VLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTOX" # eew, [SMX60_VLS], mx, IsWorstCase>;
+ }
}
}
@@ -388,51 +440,67 @@ foreach mx = SchedMxList in {
foreach eew = [8, 16, 32, 64] in {
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c;
- // Unit-stride segmented
- defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
-
- // Strided/indexed segmented
- defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
-
- // Indexed segmented
- defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
+ defvar SegmentedLdStLatAndOcc = SMX60SegmentedLdStCycles<mx, eew, nf>.c;
+ let Latency = SegmentedLdStLatAndOcc, ReleaseAtCycles = [SegmentedLdStLatAndOcc] in {
+ // Unit-stride segmented
+ defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
+
+ // Strided/indexed segmented
+ defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
+
+ // Indexed segmented
+ defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
+ }
}
}
}
// Whole register move/load/store
foreach LMul = [1, 2, 4, 8] in {
- def : WriteRes<!cast<SchedWrite>("WriteVLD" # LMul # "R"), [SMX60_VLS]>;
- def : WriteRes<!cast<SchedWrite>("WriteVST" # LMul # "R"), [SMX60_VLS]>;
+ defvar WholeRegLdStLatAndOcc = !if(!eq(LMul, 1), 3, !mul(LMul, 2));
+ let Latency = WholeRegLdStLatAndOcc, ReleaseAtCycles = [WholeRegLdStLatAndOcc] in {
+ def : WriteRes<!cast<SchedWrite>("WriteVLD" # LMul # "R"), [SMX60_VLS]>;
+ def : WriteRes<!cast<SchedWrite>("WriteVST" # LMul # "R"), [SMX60_VLS]>;
+ }
- def : WriteRes<!cast<SchedWrite>("WriteVMov" # LMul # "V"), [SMX60_VIEU]>;
+ defvar VMovLatAndOcc = !if(!eq(LMul, 1), 4, !mul(LMul, 2));
+ let Latency = VMovLatAndOcc, ReleaseAtCycles = [VMovLatAndOcc] in {
+ def : WriteRes<!cast<SchedWrite>("WriteVMov" # LMul # "V"), [SMX60_VIEU]>;
+ }
}
// 11. Vector Integer Arithmetic Instructions
foreach mx = SchedMxList in {
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = Get4458Latency<mx>.c, ReleaseAtCycles = [4] in {
+ let Latency = Get4458Latency<mx>.c, ReleaseAtCycles = [ConstOneUntilM1ThenDouble<mx>.c] in {
defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SMX60_VIEU], mx, IsWorstCase>;
}
+ // Latency of vadd, vsub, vrsub: 4/4/5/8
+ // ReleaseAtCycles of vadd, vsub, vrsub: 1/2/4/8
+ // Latency of vand, vor, vxor: 4/4/8/16
+ // ReleaseAtCycles of vand, vor, vxor: 2/4/8/16
+ // They are grouped together, so we used the worst case 4/4/8/16 and 2/4/8/16
+ // TODO: use InstRW to override individual instructions' scheduling data
defvar VIALULat = ConstValueUntilLMULThenDouble<"M2", 4, mx>.c;
- let Latency = VIALULat, ReleaseAtCycles = [4] in {
- // Pattern of vadd, vsub, vrsub: 4/4/5/8
- // Pattern of vand, vor, vxor: 4/4/8/16
- // They are grouped together, so we used the worst case 4/4/8/16
- // TODO: use InstRW to override individual instructions' scheduling data
+ defvar VIALUOcc = ConstOneUntilMF2ThenDouble<mx>.c;
+ let Latency = VIALULat, ReleaseAtCycles = [VIALUOcc] in {
defm "" : LMULWriteResMX<"WriteVIALUV", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVIALUX", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVIALUI", [SMX60_VIEU], mx, IsWorstCase>;
+ }
+ defvar VILogicalLat = ConstValueUntilLMULThenDouble<"M2", 4, mx>.c;
+ defvar VILogicalOcc = ConstValueUntilLMULThenDouble<"MF2", 1, mx>.c;
+ let Latency = VILogicalLat, ReleaseAtCycles = [VILogicalOcc] in {
defm "" : LMULWriteResMX<"WriteVExtV", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVIMergeV", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVIMergeX", [SMX60_VIEU], mx, IsWorstCase>;
@@ -449,7 +517,9 @@ foreach mx = SchedMxList in {
defm "" : LMULWriteResMX<"WriteVICALUI", [SMX60_VIEU], mx, IsWorstCase>;
}
- let Latency = Get461018Latency<mx>.c, ReleaseAtCycles = [4] in {
+ // Slightly increase Occ when LMUL == M8
+ defvar VICmpCarryOcc = GetLMULValue<[1, 1, 1, 2, 4, 8, 18], mx>.c;
+ let Latency = Get461018Latency<mx>.c, ReleaseAtCycles = [VICmpCarryOcc] in {
defm "" : LMULWriteResMX<"WriteVICALUMV", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVICALUMX", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVICALUMI", [SMX60_VIEU], mx, IsWorstCase>;
@@ -458,10 +528,14 @@ foreach mx = SchedMxList in {
defm "" : LMULWriteResMX<"WriteVICmpI", [SMX60_VIEU], mx, IsWorstCase>;
}
- // Pattern of vmacc, vmadd, vmul, vmulh, etc.: e8/e16 = 4/4/5/8, e32 = 5,5,5,8,
+ // Latency of vmacc, vmadd, vmul, vmulh, etc.: e8/e16 = 4/4/5/8, e32 = 5,5,5,8,
// e64 = 7,8,16,32. We use the worst-case until we can split the SEW.
// TODO: change WriteVIMulV, etc to be defined with LMULSEWSchedWrites
- let Latency = ConstValueUntilLMULThenDoubleBase<"M2", 7, 8, mx>.c, ReleaseAtCycles = [7] in {
+ defvar VIMulLat = ConstValueUntilLMULThenDoubleBase<"M2", 7, 8, mx>.c;
+ // ReleaseAtCycles for vnmsac/vnmsub is 1/1/1/1/2/5 but we use the worse case
+ // here since they are grouped together with vmacc/vmadd/vmul/vmulh.
+ defvar VIMulOcc = ConstOneUntilM1ThenDouble<mx>.c;
+ let Latency = VIMulLat, ReleaseAtCycles = [VIMulOcc] in {
defm "" : LMULWriteResMX<"WriteVIMulV", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVIMulX", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVIMulAddV", [SMX60_VIEU], mx, IsWorstCase>;
@@ -475,7 +549,8 @@ foreach mx = SchedMxList in {
foreach mx = SchedMxListW in {
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxListW>.c;
- let Latency = Get4588Latency<mx>.c, ReleaseAtCycles = [4] in {
+ defvar VIWideningOcc = ConstOneUntilMF2ThenDouble<mx>.c;
+ let Latency = Get4588Latency<mx>.c, ReleaseAtCycles = [VIWideningOcc] in {
defm "" : LMULWriteResMX<"WriteVIWALUV", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVIWALUX", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVIWALUI", [SMX60_VIEU], mx, IsWorstCase>;
@@ -497,8 +572,9 @@ foreach mx = SchedMxList in {
foreach sew = SchedSEWSet<mx>.val in {
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
- defvar VIDivLat = ConstValueUntilLMULThenDouble<"MF2", 12, mx>.c;
- let Latency = VIDivLat, ReleaseAtCycles = [12] in {
+ // Not pipelined
+ defvar VIDivLatAndOcc = ConstValueUntilLMULThenDouble<"MF2", 12, mx>.c;
+ let Latency = VIDivLatAndOcc, ReleaseAtCycles = [VIDivLatAndOcc] in {
defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SMX60_VIEU], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SMX60_VIEU], mx, sew, IsWorstCase>;
}
@@ -510,7 +586,8 @@ foreach mx = SchedMxListW in {
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxListW>.c;
defvar VNarrowingLat = ConstValueUntilLMULThenDouble<"M1", 4, mx>.c;
- let Latency = VNarrowingLat, ReleaseAtCycles = [4] in {
+ defvar VNarrowingOcc = ConstValueUntilLMULThenDouble<"MF4", 1, mx>.c;
+ let Latency = VNarrowingLat, ReleaseAtCycles = [VNarrowingOcc] in {
defm "" : LMULWriteResMX<"WriteVNShiftV", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVNShiftX", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVNShiftI", [SMX60_VIEU], mx, IsWorstCase>;
@@ -558,39 +635,71 @@ foreach mx = SchedMxListF in {
foreach sew = SchedSEWSet<mx, isF=1>.val in {
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFALUV", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFALUF", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFMulV", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFMulF", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [SMX60_VFP], mx, sew, IsWorstCase>;
- }
-}
+ defvar VFALULat = Get4458Latency<mx>.c;
+ defvar VFALUOcc = ConstOneUntilM1ThenDouble<mx>.c;
+ let Latency = VFALULat, ReleaseAtCycles = [VFALUOcc] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFALUV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFALUF", [SMX60_VFP], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [SMX60_VFP], mx, sew, IsWorstCase>;
+ }
-foreach mx = SchedMxListF in {
- foreach sew = SchedSEWSet<mx, isF=1>.val in {
- defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c;
+ // Slightly increased latency for sew == 64
+ defvar VFMulVLat = !if(!eq(sew, 64), ConstValueUntilLMULThenDoubleBase<"M8", 5, 8, mx>.c,
+ Get4458Latency<mx>.c);
+ let Latency = VFMulVLat, ReleaseAtCycles = [ConstOneUntilM1ThenDouble<mx>.c] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMulV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ }
+ // VFMulF has the same latency as VFMulV, but slighlty lower ReleaseAtCycles
+ let Latency = VFMulVLat, ReleaseAtCycles = [ConstOneUntilM1ThenDouble<mx>.c] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMulF", [SMX60_VFP], mx, sew, IsWorstCase>;
+ }
- defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [SMX60_VFP], mx, sew, IsWorstCase>;
+ defvar VFSgnjLat = ConstValueUntilLMULThenDouble<"M2", 4, mx>.c;
+ defvar VFSgnjOcc = ConstOneUntilMF2ThenDouble<mx>.c;
+ let Latency = VFSgnjLat, ReleaseAtCycles = [VFSgnjOcc] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [SMX60_VFP], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ }
- defm "" : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ // The following covers vfmacc, vfmsac, and their vfn* variants in the same group, but the
+ // ReleaseAtCycles takes one extra cycle for the vfn* variants.
+ // TODO: Should we split them?
+ // TODO: for some reason, the following cond is not working, and always use ConstValueUntilLMULThenDoubleBase<"M4", 5, 8, mx>.c
+ defvar VFMulAddLatency = !if(!eq(sew, 64),
+ Get6678Latency<mx>.c,
+ ConstValueUntilLMULThenDoubleBase<"M8", 5, 8, mx>.c
+ );
+ let Latency = VFMulAddLatency, ReleaseAtCycles = [ConstOneUntilM1ThenDouble<mx>.c] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [SMX60_VFP], mx, sew, IsWorstCase>;
+ }
}
}
foreach mx = SchedMxList in {
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c;
- defm "" : LMULWriteResMX<"WriteVFCmpV", [SMX60_VFP], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFCmpF", [SMX60_VFP], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFClassV", [SMX60_VFP], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFMergeV", [SMX60_VFP], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFMovV", [SMX60_VFP], mx, IsWorstCase>;
+ // Slightly increased ReleaseAtCycles for M8: 18
+ defvar VFCmpOcc = !if(!eq(mx, "M8"),
+ !add(ConstOneUntilMF2ThenDouble<mx>.c, 2),
+ ConstOneUntilMF2ThenDouble<mx>.c
+ );
+ let Latency = Get461018Latency<mx>.c, ReleaseAtCycles = [VFCmpOcc] in {
+ defm "" : LMULWriteResMX<"WriteVFCmpV", [SMX60_VFP], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFCmpF", [SMX60_VFP], mx, IsWorstCase>;
+ }
- defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SMX60_VFP], mx, IsWorstCase>;
+ defvar VFClassLat = ConstValueUntilLMULThenDouble<"M2", 4, mx>.c;
+ defvar VFClassOcc = ConstOneUntilMF2ThenDouble<mx>.c;
+ let Latency = VFClassLat, ReleaseAtCycles = [VFClassOcc] in {
+ defm "" : LMULWriteResMX<"WriteVFClassV", [SMX60_VFP], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFMergeV", [SMX60_VFP], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFMovV", [SMX60_VFP], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SMX60_VFP], mx, IsWorstCase>;
+ }
}
// Widening
@@ -598,27 +707,73 @@ foreach mx = SchedMxListW in {
foreach sew = SchedSEWSet<mx, isF=0, isWidening=1>.val in {
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListW>.c;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ defvar VFWCvtILat = ConstValueUntilLMULThenDouble<"M1", 4, mx>.c;
+ defvar VFWCvtIOcc = ConstOneUntilMF4ThenDouble<mx>.c;
+ let Latency = VFWCvtILat, ReleaseAtCycles = [VFWCvtIOcc] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ }
}
}
foreach mx = SchedMxListFW in {
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxListFW>.c;
- defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SMX60_VFP], mx, IsWorstCase>;
+ defvar VFWCvtFToIVLat = ConstValueUntilLMULThenDouble<"M1", 4, mx>.c;
+ defvar VFWCvtFToIVOcc = ConstOneUntilMF4ThenDouble<mx>.c;
+ let Latency = VFWCvtFToIVLat, ReleaseAtCycles = [VFWCvtFToIVOcc] in {
+ defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SMX60_VFP], mx, IsWorstCase>;
+ }
}
foreach mx = SchedMxListFW in {
foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUV", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUF", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulV", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulF", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ defvar VFWCvtFToFVLat = ConstValueUntilLMULThenDouble<"M1", 4, mx>.c;
+ defvar VFWCvtFToFVOcc = ConstOneUntilMF4ThenDouble<mx>.c;
+ let Latency = VFWCvtFToFVLat, ReleaseAtCycles = [VFWCvtFToFVOcc] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ }
+
+ // Latency for vfwsub/vfwadd.vv, vfwsub/vfwadd.vf: 4/4/4/5/8
+ // ReleaseAtCycles for vfwsub/vfwadd.vv, vfwsub/vfwadd.vf: 1/1/2/4/8
+ // Latency for vfwsub/vfwadd.wv, vfwsub/vfwadd.wf: 5/5/5/9/17
+ // ReleaseAtCycles for vfwsub/vfwadd.wv, vfwsub/vfwadd.wf: 1/2/4/8/17
+ // We use the worst-case
+ defvar VFWALULat = !add(ConstValueUntilLMULThenDouble<"M1", 4, mx>.c, 1); // 5/5/9/17
+ defvar VFWALUOcc = !if(!eq(mx, "M4"),
+ !add(ConstOneUntilMF4ThenDouble<mx>.c, 1), // 2/4/8/17
+ ConstOneUntilMF4ThenDouble<mx>.c
+ );
+ // TODO: Split .wf/.wv variants into separate scheduling classes
+ let Latency = VFWALULat, ReleaseAtCycles = [VFWALUOcc] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUF", [SMX60_VFP], mx, sew, IsWorstCase>;
+ }
+
+ let Latency = Get4588Latency<mx>.c, ReleaseAtCycles = [ConstOneUntilMF2ThenDouble<mx>.c] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulF", [SMX60_VFP], mx, sew, IsWorstCase>;
+ }
+
+ // Slightly increased latency for SEW == 32
+ defvar VFWMullOcc = !if(!eq(sew, 32),
+ GetLMULValue<[1, 1, 1, 3, 5, 9, 18], mx>.c,
+ ConstOneUntilMF2ThenDouble<mx>.c
+ );
+ defvar VFWMulVLat = ConstValueUntilLMULThenDoubleBase<"M8", 5, 8, mx>.c;
+ let Latency = VFWMulVLat, ReleaseAtCycles = [VFWMullOcc] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ }
+
+ // Latency for vfwmacc, vfwnmacc, etc: e16 = 5/5/5/8; e32 = 6/6/7/8
+ defvar VFWMulAddVLat = !if(!eq(sew, 16),
+ ConstValueUntilLMULThenDoubleBase<"M4", 5, 8, mx>.c,
+ Get6678Latency<mx>.c
+ );
+ let Latency = VFWMulAddVLat, ReleaseAtCycles = [ConstOneUntilMF2ThenDouble<mx>.c] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [SMX60_VFP], mx, sew, IsWorstCase>;
+ }
}
}
@@ -626,15 +781,23 @@ foreach mx = SchedMxListFW in {
foreach mx = SchedMxListW in {
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxListW>.c;
- defm "" : LMULWriteResMX<"WriteVFNCvtFToIV", [SMX60_VFP], mx, IsWorstCase>;
+ defvar VFNCvtFToIVLat = ConstValueUntilLMULThenDouble<"M1", 4, mx>.c;
+ defvar VFNCvtFToIVOcc = ConstOneUntilMF4ThenDouble<mx>.c;
+ let Latency = VFNCvtFToIVLat, ReleaseAtCycles = [VFNCvtFToIVOcc] in {
+ defm "" : LMULWriteResMX<"WriteVFNCvtFToIV", [SMX60_VFP], mx, IsWorstCase>;
+ }
}
foreach mx = SchedMxListFW in {
foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
-
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [SMX60_VFP], mx, sew, IsWorstCase>;
+
+ defvar VFNCvtToFVLat = ConstValueUntilLMULThenDouble<"M1", 4, mx>.c;
+ defvar VFNCvtToFVOcc = ConstOneUntilMF4ThenDouble<mx>.c;
+ let Latency = VFNCvtToFVLat, ReleaseAtCycles = [VFNCvtToFVOcc] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ }
}
}
@@ -643,9 +806,35 @@ foreach mx = SchedMxListF in {
foreach sew = SchedSEWSet<mx, 1>.val in {
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ // Compute ReleaseAtCycles based on SEW
+ // Latency for vfdiv.vf: e16/e32 = 12/24/48/96; e64 = 18/36/72/144
+ // Latency for vfrdiv.vf: e16/e32 = 12/24/48/96; e64 = 40/80/160/320
+ // We use the worst-case, vfdiv.vf is penalized in e64
+ // TODO: split vfdiv.vf and vfrdiv.vf into separate scheduling classes
+ defvar VFDivFFactor = !if(!eq(sew, 64), 40, 12);
+ defvar VFDivFLatAndOcc = !mul(ConstOneUntilM1ThenDouble<mx>.c, VFDivFFactor);
+ let Latency = VFDivFLatAndOcc, ReleaseAtCycles = [VFDivFLatAndOcc] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF", [SMX60_VFP], mx, sew, IsWorstCase>;
+ }
+
+ defvar VFDivVFactor = !if(!eq(sew, 16), 12, 40);
+ defvar VFDivVLatAndOcc = !mul(ConstOneUntilM1ThenDouble<mx>.c, VFDivVFactor);
+ let Latency = VFDivVLatAndOcc, ReleaseAtCycles = [VFDivVLatAndOcc] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ }
+ }
+}
+
+// Pattern for vfsqrt.v: e16 = 18/36/72/144; e32 = 38/76/152/304; e64 = 40/80/160/320
+foreach mx = SchedMxListF in {
+ foreach sew = SchedSEWSet<mx, 1>.val in {
+ defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
+
+ defvar VFSqrtVFactor = !if(!eq(sew, 16), 12, 40);
+ defvar VFSqrtVLatAndOcc = !mul(ConstOneUntilM1ThenDouble<mx>.c, VFSqrtVFactor);
+ let Latency = VFSqrtVLatAndOcc, ReleaseAtCycles = [VFSqrtVLatAndOcc] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ }
}
}
@@ -740,49 +929,103 @@ foreach mx = SchedMxListFWRed in {
foreach mx = SchedMxList in {
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c;
- defm "" : LMULWriteResMX<"WriteVMALUV", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVMPopV", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVMFFSV", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVMSFSV", [SMX60_VIEU], mx, IsWorstCase>;
+ let Latency = 4 in {
+ defm "" : LMULWriteResMX<"WriteVMALUV", [SMX60_VIEU], mx, IsWorstCase>;
+ }
+ let Latency = 4, ReleaseAtCycles = [ConstValueUntilLMULThenDouble<"M2", 1, mx>.c] in {
+ defm "" : LMULWriteResMX<"WriteVMSFSV", [SMX60_VIEU], mx, IsWorstCase>;
+ }
+
+ let Latency = 6, ReleaseAtCycles = [2] in {
+ defm "" : LMULWriteResMX<"WriteVMPopV", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVMFFSV", [SMX60_VIEU], mx, IsWorstCase>;
+ }
- defm "" : LMULWriteResMX<"WriteVIotaV", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIdxV", [SMX60_VIEU], mx, IsWorstCase>;
+ defvar VIotaLat = ConstValueUntilLMULThenDouble<"M2", 4, mx>.c;
+ defvar VIotaOcc = ConstOneUntilMF2ThenDouble<mx>.c;
+ let Latency = VIotaLat, ReleaseAtCycles = [VIotaOcc] in {
+ defm "" : LMULWriteResMX<"WriteVIotaV", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIdxV", [SMX60_VIEU], mx, IsWorstCase>;
+ }
}
// 16. Vector Permutation Instructions
+// Slide
foreach mx = SchedMxList in {
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c;
- defm "" : LMULWriteResMX<"WriteVSlideI", [SMX60_VIEU], mx, IsWorstCase>;
+ // Latency for slide up: 4/4/8/16, ReleaseAtCycles is 2/4/8/16
+ defvar VSlideUpLat = ConstValueUntilLMULThenDouble<"M2", 4, mx>.c;
+ defvar VSlideUpOcc = ConstOneUntilMF2ThenDouble<mx>.c;
+ let Latency = VSlideUpLat, ReleaseAtCycles =[VSlideUpOcc] in {
+ defm "" : LMULWriteResMX<"WriteVSlideUpX", [SMX60_VIEU], mx, IsWorstCase>;
+ }
- defm "" : LMULWriteResMX<"WriteVISlide1X", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFSlide1F", [SMX60_VFP], mx, IsWorstCase>;
+ // Latency for slide down: 4/5/9/17, ReleaseAtCycles is 3/5/9/17
+ defvar VSlideDownLat = GetLMULValue<[4, 4, 4, 4, 5, 9, 17], mx>.c;
+ defvar VSlideDownOcc = GetLMULValue<[1, 1, 1, 3, 5, 9, 17], mx>.c;
+ let Latency = VSlideDownLat, ReleaseAtCycles =[VSlideDownOcc] in {
+ defm "" : LMULWriteResMX<"WriteVSlideDownX", [SMX60_VIEU], mx, IsWorstCase>;
+ }
+ // The following group slide up and down together, so we use the worst-case
+ // (slide down) for all.
+ let Latency = VSlideDownLat, ReleaseAtCycles =[VSlideDownOcc] in {
+ defm "" : LMULWriteResMX<"WriteVSlideI", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVISlide1X", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSlideUpX", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSlideDownX", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFSlide1F", [SMX60_VFP], mx, IsWorstCase>;
+ }
}
-def : WriteRes<WriteVMovXS, [SMX60_VIEU]>;
-def : WriteRes<WriteVMovSX, [SMX60_VIEU]>;
-
-def : WriteRes<WriteVMovFS, [SMX60_VIEU]>;
-def : WriteRes<WriteVMovSF, [SMX60_VIEU]>;
+// ReleaseAtCycles is 2/2/2/2/2/3/6, but we can't set based on MX for now
+// TODO: Split this into separate WriteRes for each MX
+let Latency = 6, ReleaseAtCycles = [6] in {
+ def : WriteRes<WriteVMovXS, [SMX60_VIEU]>;
+}
-// Gather and Compress
-foreach mx = SchedMxList in {
- foreach sew = SchedSEWSet<mx>.val in {
- defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
- defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SMX60_VIEU], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherEI16VV", [SMX60_VIEU], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SMX60_VIEU], mx, sew, IsWorstCase>;
- }
+// ReleaseAtCycles is 1/1/1/1/1/2/4, but we can't set based on MX for now
+// TODO: Split this into separate WriteRes for each MX
+let Latency = 4, ReleaseAtCycles = [4] in {
+ def : WriteRes<WriteVMovSX, [SMX60_VIEU]>;
+ def : WriteRes<WriteVMovFS, [SMX60_VIEU]>;
+ def : WriteRes<WriteVMovSF, [SMX60_VIEU]>;
}
+// Integer LMUL Gather and Compress
foreach mx = SchedMxList in {
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c;
- defm "" : LMULWriteResMX<"WriteVRGatherVX", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVRGatherVI", [SMX60_VIEU], mx, IsWorstCase>;
+ defvar VRGatherLat = ConstValueUntilLMULThenDouble<"M2", 4, mx>.c;
+ let Latency = VRGatherLat, ReleaseAtCycles = [ConstOneUntilMF2ThenDouble<mx>.c] in {
+ defm "" : LMULWriteResMX<"WriteVRGatherVX", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVRGatherVI", [SMX60_VIEU], mx, IsWorstCase>;
+ }
+
+ foreach sew = SchedSEWSet<mx>.val in {
+ defvar IsWorstCaseSEW = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
+
+ defvar VRGatherVVLat = GetLMULValue<[4, 4, 4, 4, 16, 64, 256], mx>.c;
+ defvar VRGatherVVOcc = GetLMULValue<[1, 1, 1, 4, 16, 64, 256], mx>.c;
+ let Latency = VRGatherVVLat, ReleaseAtCycles = [VRGatherVVOcc] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SMX60_VIEU], mx, sew, IsWorstCaseSEW>;
+ }
+ // For sew == 8, latency is half of the other cases, except for the fractional LMULs (const 4 cycles)
+ defvar VRGatherEI16Lat = !if(!eq(sew, 8),
+ GetLMULValue<[4, 4, 4, 8, 32, 128, 256], mx>.c,
+ GetLMULValue<[4, 4, 4, 4, 16, 64, 256], mx>.c);
+ defvar VRGatherEI16Occ = !if(!eq(sew, 8),
+ GetLMULValue<[1, 1, 2, 8, 32, 128, 256], mx>.c,
+ GetLMULValue<[1, 1, 1, 4, 16, 64, 256], mx>.c);
+ let Latency = VRGatherEI16Lat, ReleaseAtCycles = [VRGatherEI16Occ] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherEI16VV", [SMX60_VIEU], mx, sew, IsWorstCaseSEW>;
+ }
+
+ defvar VCompressVLat = GetLMULValue<[4, 4, 4, 4, 10, 36, 136], mx>.c;
+ defvar VCompressVOcc = GetLMULValue<[1, 1, 1, 3, 10, 36, 136], mx>.c;
+ let Latency = VCompressVLat, ReleaseAtCycles = [VCompressVOcc] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SMX60_VIEU], mx, sew, IsWorstCaseSEW>;
+ }
+ }
}
// Others
@@ -790,6 +1033,10 @@ def : WriteRes<WriteCSR, [SMX60_IEU]>;
def : WriteRes<WriteNop, [SMX60_IEU]>;
def : WriteRes<WriteRdVLENB, [SMX60_IEUA]>;
+// Give COPY instructions an execution resource.
+// FIXME: This could be better modeled by looking at the regclasses of the operands.
+def : InstRW<[WriteIALU], (instrs COPY)>;
+
//===----------------------------------------------------------------------===//
// Bypass and advance
def : ReadAdvance<ReadJmp, 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td b/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td
index da89e15..08ee180 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td
@@ -8,19 +8,106 @@
//===----------------------------------------------------------------------===//
+class AscalonIsWorstCaseMX<string mx, list<string> MxList> {
+ defvar LLMUL = LargestLMUL<MxList>.r;
+ bit c = !eq(mx, LLMUL);
+}
+
+class AscalonIsWorstCaseMXSEW<string mx, int sew, list<string> MxList,
+ bit isF = 0> {
+ defvar LLMUL = LargestLMUL<MxList>.r;
+ defvar SSEW = SmallestSEW<mx, isF>.r;
+ bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW));
+}
+
+/// Cycle counts that scale with LMUL with LMUL=1 having the same latency as
+/// fractional LMULs
+class AscalonGetCyclesLMUL<string mx, int base> {
+ int c = !cond(
+ !eq(mx, "M1") : base,
+ !eq(mx, "M2") : !mul(base, 2),
+ !eq(mx, "M4") : !mul(base, 4),
+ !eq(mx, "M8") : !mul(base, 8),
+ !eq(mx, "MF2") : base,
+ !eq(mx, "MF4") : base,
+ !eq(mx, "MF8") : base
+ );
+}
+
+/// Linear LMUL scaling starting from smallest fractional LMUL
+class AscalonGetCyclesLMULFractional<string mx, int base> {
+ int c = !cond(
+ !eq(mx, "MF8") : base,
+ !eq(mx, "MF4") : !mul(base, 2),
+ !eq(mx, "MF2") : !mul(base, 4),
+ !eq(mx, "M1") : !mul(base, 8),
+ !eq(mx, "M2") : !mul(base, 16),
+ !eq(mx, "M4") : !mul(base, 32),
+ !eq(mx, "M8") : !mul(base, 64)
+ );
+}
+
+class AscalonGetCyclesDefault<string mx> {
+ int c = AscalonGetCyclesLMUL<mx, 1>.c;
+}
+
+class AscalonGetCyclesNarrowing<string mx> {
+ int c = !cond(
+ !eq(mx, "M1") : 4,
+ !eq(mx, "M2") : 8,
+ !eq(mx, "M4") : 16,
+ !eq(mx, "MF2") : 2,
+ !eq(mx, "MF4") : 1,
+ !eq(mx, "MF8") : 1
+ );
+}
+
+
+class AscalonGetCyclesDivOrSqrt<string mx, int sew> {
+ int c = !cond(
+ !eq(sew, 8) : AscalonGetCyclesLMUL<mx, 7>.c,
+ !eq(sew, 16) : AscalonGetCyclesLMUL<mx, 6>.c,
+ !eq(sew, 32) : AscalonGetCyclesLMUL<mx, 5>.c,
+ !eq(sew, 64) : AscalonGetCyclesLMUL<mx, 8>.c
+ );
+}
+
+class AscalonGetCyclesVRGatherVV<string mx> {
+ int c = !cond(
+ !eq(mx, "M1") : 2,
+ !eq(mx, "M2") : 4,
+ !eq(mx, "M4") : 12,
+ !eq(mx, "M8") : 48,
+ !eq(mx, "MF2") : 2,
+ !eq(mx, "MF4") : 2,
+ !eq(mx, "MF8") : 2
+ );
+}
+
+class AscalonGetCyclesStridedSegmented<string mx, int sew> {
+ int c = !cond(
+ !eq(sew, 8) : AscalonGetCyclesLMULFractional<mx, 4>.c,
+ !eq(sew, 16) : AscalonGetCyclesLMULFractional<mx, 2>.c,
+ !eq(sew, 32) : AscalonGetCyclesLMULFractional<mx, 1>.c,
+ !eq(sew, 64) : AscalonGetCyclesLMULFractional<mx, 1>.c
+ );
+}
+
+//===----------------------------------------------------------------------===//
+
def TTAscalonD8Model : SchedMachineModel {
let IssueWidth = 8; // 8-way decode and dispatch
let MicroOpBufferSize = 256; // 256 micro-op re-order buffer
let LoadLatency = 4; // Optimistic load latency
let MispredictPenalty = 14; // Fetch + Decode/Rename/Dispatch + Branch
- let CompleteModel = 0;
+ let CompleteModel = false;
// TODO: supported, but haven't added scheduling info yet.
let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx,
HasStdExtZcmt, HasStdExtZknd, HasStdExtZkne,
HasStdExtZknh, HasStdExtZksed, HasStdExtZksh,
- HasStdExtZkr, HasVInstructions, HasVInstructionsI64];
+ HasStdExtZkr];
}
let SchedModel = TTAscalonD8Model in {
@@ -34,11 +121,17 @@ let BufferSize = 16 in {
def AscalonFXB : ProcResource<1>; // ALU, INT -> FP/VEC
def AscalonFXC : ProcResource<2>; // ALU, BR
def AscalonFXD : ProcResource<2>; // ALU
- def AscalonFP : ProcResource<2>;
- // TODO: two vector units with vector scheduling model.
+ def AscalonFX : ProcResGroup<[AscalonFXA, AscalonFXB, AscalonFXC, AscalonFXD]>;
+ // FP
+ def AscalonFPA : ProcResource<1>; // Pipe A also handles FP/VEC -> INT
+ def AscalonFPB : ProcResource<1>;
+ def AscalonFP : ProcResGroup<[AscalonFPA, AscalonFPB]>;
+ // Vector
+ def AscalonVA : ProcResource<1>;
+ def AscalonVB : ProcResource<1>;
+ def AscalonV : ProcResGroup<[AscalonVA, AscalonVB]>;
}
-def AscalonFX : ProcResGroup<[AscalonFXA, AscalonFXB, AscalonFXC, AscalonFXD]>;
//===----------------------------------------------------------------------===//
@@ -317,9 +410,624 @@ def : ReadAdvance<ReadSingleBit, 0>;
def : ReadAdvance<ReadSingleBitImm, 0>;
//===----------------------------------------------------------------------===//
+// Vector
+def : WriteRes<WriteRdVLENB, [AscalonFXA]>;
+
+// Configuration-Setting Instructions
+def : WriteRes<WriteVSETVLI, [AscalonV]>;
+def : WriteRes<WriteVSETIVLI, [AscalonV]>;
+let Latency = 2 in {
+ def : WriteRes<WriteVSETVL, [AscalonV]>;
+}
+
+// Vector Loads and Stores
+foreach mx = SchedMxList in {
+ defvar Cycles = AscalonGetCyclesDefault<mx>.c;
+ defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = Cycles in {
+ defm "" : LMULWriteResMX<"WriteVLDE", [AscalonLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDFF", [AscalonLS], mx, IsWorstCase>;
+ }
+ defm "" : LMULWriteResMX<"WriteVSTE", [AscalonLS], mx, IsWorstCase>;
+}
+
+foreach mx = SchedMxList in {
+ defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
+ defm "" : LMULWriteResMX<"WriteVLDM", [AscalonLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTM", [AscalonLS], mx, IsWorstCase>;
+}
+
+foreach mx = SchedMxListEEW8 in {
+ defvar Cycles = AscalonGetCyclesLMUL<mx, 2>.c;
+ defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = Cycles in {
+ defm "" : LMULWriteResMX<"WriteVLDS8", [AscalonLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDUX8", [AscalonLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDOX8", [AscalonLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTS8", [AscalonLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTUX8", [AscalonLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTOX8", [AscalonLS], mx, IsWorstCase>;
+ }
+}
+foreach mx = SchedMxListEEW16 in {
+ defvar Cycles = AscalonGetCyclesLMUL<mx, 2>.c;
+ defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = Cycles in {
+ defm "" : LMULWriteResMX<"WriteVLDS16", [AscalonLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDUX16", [AscalonLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDOX16", [AscalonLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTS16", [AscalonLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTUX16", [AscalonLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTOX16", [AscalonLS], mx, IsWorstCase>;
+ }
+}
+foreach mx = SchedMxListEEW32 in {
+ defvar Cycles = AscalonGetCyclesLMUL<mx, 2>.c;
+ defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = Cycles in {
+ defm "" : LMULWriteResMX<"WriteVLDS32", [AscalonLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDUX32", [AscalonLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDOX32", [AscalonLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTS32", [AscalonLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTUX32", [AscalonLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTOX32", [AscalonLS], mx, IsWorstCase>;
+ }
+}
+foreach mx = SchedMxListEEW64 in {
+ defvar Cycles = AscalonGetCyclesLMUL<mx, 2>.c;
+ defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = Cycles in {
+ defm "" : LMULWriteResMX<"WriteVLDS64", [AscalonLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDUX64", [AscalonLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDOX64", [AscalonLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTS64", [AscalonLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTUX64", [AscalonLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTOX64", [AscalonLS], mx, IsWorstCase>;
+ }
+}
+
+// VLD*R is not LMUL aware
+def : WriteRes<WriteVLD1R, [AscalonLS]>;
+def : WriteRes<WriteVLD2R, [AscalonLS]>;
+def : WriteRes<WriteVLD4R, [AscalonLS]>;
+def : WriteRes<WriteVLD8R, [AscalonLS]>;
+// VST*R is not LMUL aware
+def : WriteRes<WriteVST1R, [AscalonLS]>;
+def : WriteRes<WriteVST2R, [AscalonLS]>;
+def : WriteRes<WriteVST4R, [AscalonLS]>;
+def : WriteRes<WriteVST8R, [AscalonLS]>;
+
+// Segmented Loads and Stores
+foreach mx = SchedMxList in {
+ foreach eew = [8, 16, 32, 64] in {
+ foreach nf=2-8 in {
+ defvar Cycles = AscalonGetCyclesDefault<mx>.c;
+ defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = Cycles in {
+ defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" # eew, [AscalonLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" # eew, [AscalonLS], mx, IsWorstCase>;
+ }
+ let Latency = 1, AcquireAtCycles = [1], ReleaseAtCycles = [!add(1, Cycles)] in
+ defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" # eew, [AscalonLS], mx, IsWorstCase>;
+ }
+ }
+}
+foreach mx = SchedMxList in {
+ foreach nf=2-8 in {
+ foreach eew = [8, 16, 32, 64] in {
+ defvar Cycles = AscalonGetCyclesStridedSegmented<mx, eew>.c;
+ defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = Cycles in {
+ defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" # eew, [AscalonLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" # eew, [AscalonLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" # eew, [AscalonLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" # eew, [AscalonLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" # eew, [AscalonLS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" # eew, [AscalonLS], mx, IsWorstCase>;
+ }
+ }
+ }
+}
+
+// Vector Fixed-Point Arithmetic Instructions
+foreach mx = SchedMxList in {
+ defvar Cycles = AscalonGetCyclesDefault<mx>.c;
+ defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVSALUV", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSALUX", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSALUI", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVAALUV", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVAALUX", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSMulV", [AscalonFXA, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSMulX", [AscalonFXA, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSShiftV", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSShiftX", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSShiftI", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ }
+}
+// Narrowing
+foreach mx = SchedMxListW in {
+ defvar Cycles = AscalonGetCyclesNarrowing<mx>.c;
+ defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxListW>.c;
+ let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVNClipV", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVNClipX", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVNClipI", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ }
+}
+
+// Configuration-Setting Instructions
+def : ReadAdvance<ReadVSETVLI, 1>;
+def : ReadAdvance<ReadVSETVL, 1>;
+
+// Vector Loads and Stores
+def : ReadAdvance<ReadVLDX, 0>;
+def : ReadAdvance<ReadVSTX, 0>;
+defm "" : LMULReadAdvance<"ReadVSTEV", 0>;
+defm "" : LMULReadAdvance<"ReadVSTM", 0>;
+def : ReadAdvance<ReadVLDSX, 0>;
+def : ReadAdvance<ReadVSTSX, 0>;
+defm "" : LMULReadAdvance<"ReadVSTS8V", 0>;
+defm "" : LMULReadAdvance<"ReadVSTS16V", 0>;
+defm "" : LMULReadAdvance<"ReadVSTS32V", 0>;
+defm "" : LMULReadAdvance<"ReadVSTS64V", 0>;
+defm "" : LMULReadAdvance<"ReadVLDUXV", 0>;
+defm "" : LMULReadAdvance<"ReadVLDOXV", 0>;
+defm "" : LMULReadAdvance<"ReadVSTUX8", 0>;
+defm "" : LMULReadAdvance<"ReadVSTUX16", 0>;
+defm "" : LMULReadAdvance<"ReadVSTUX32", 0>;
+defm "" : LMULReadAdvance<"ReadVSTUX64", 0>;
+defm "" : LMULReadAdvance<"ReadVSTUXV", 0>;
+defm "" : LMULReadAdvance<"ReadVSTUX8V", 0>;
+defm "" : LMULReadAdvance<"ReadVSTUX16V", 0>;
+defm "" : LMULReadAdvance<"ReadVSTUX32V", 0>;
+defm "" : LMULReadAdvance<"ReadVSTUX64V", 0>;
+defm "" : LMULReadAdvance<"ReadVSTOX8", 0>;
+defm "" : LMULReadAdvance<"ReadVSTOX16", 0>;
+defm "" : LMULReadAdvance<"ReadVSTOX32", 0>;
+defm "" : LMULReadAdvance<"ReadVSTOX64", 0>;
+defm "" : LMULReadAdvance<"ReadVSTOXV", 0>;
+defm "" : LMULReadAdvance<"ReadVSTOX8V", 0>;
+defm "" : LMULReadAdvance<"ReadVSTOX16V", 0>;
+defm "" : LMULReadAdvance<"ReadVSTOX32V", 0>;
+defm "" : LMULReadAdvance<"ReadVSTOX64V", 0>;
+// LMUL Aware
+def : ReadAdvance<ReadVST1R, 0>;
+def : ReadAdvance<ReadVST2R, 0>;
+def : ReadAdvance<ReadVST4R, 0>;
+def : ReadAdvance<ReadVST8R, 0>;
+
+// Vector Integer Arithmetic Instructions
+foreach mx = SchedMxList in {
+ defvar Cycles = AscalonGetCyclesDefault<mx>.c;
+ defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVIALUV", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIALUX", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIALUI", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUV", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUX", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUI", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUMV", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUMX", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUMI", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVShiftV", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVShiftX", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVShiftI", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMinMaxV", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMinMaxX", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMulV", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMulX", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMulAddV", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMulAddX", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMergeV", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMergeX", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMergeI", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMovV", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMovX", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMovI", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICmpV", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICmpX", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICmpI", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ }
+}
+foreach mx = SchedMxList in {
+ defvar Cycles = AscalonGetCyclesDefault<mx>.c;
+ defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVExtV", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ }
+}
+foreach mx = SchedMxList in {
+ foreach sew = SchedSEWSet<mx>.val in {
+ defvar Cycles = AscalonGetCyclesDivOrSqrt<mx, sew>.c;
+ defvar IsWorstCase = AscalonIsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
+ let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [AscalonFX, AscalonV], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [AscalonFX, AscalonV], mx, sew, IsWorstCase>;
+ }
+ }
+}
+
+// Widening
+foreach mx = SchedMxListW in {
+ defvar Cycles = AscalonGetCyclesDefault<mx>.c;
+ defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxListW>.c;
+ let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVIWALUV", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIWALUX", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIWALUI", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIWMulV", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIWMulX", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIWMulAddV", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIWMulAddX", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ }
+}
+// Narrowing
+foreach mx = SchedMxListW in {
+ defvar Cycles = AscalonGetCyclesNarrowing<mx>.c;
+ defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxListW>.c;
+ let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVNShiftV", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVNShiftX", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVNShiftI", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ }
+}
+
+// Vector Floating-Point Instructions
+foreach mx = SchedMxListF in {
+ foreach sew = SchedSEWSet<mx, isF=1>.val in {
+ defvar Cycles = AscalonGetCyclesDefault<mx>.c;
+ defvar IsWorstCase = AscalonIsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c;
+ let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFALUV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFALUF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMulV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMulF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
+ }
+ }
+}
+foreach mx = SchedMxList in {
+ defvar Cycles = AscalonGetCyclesDefault<mx>.c;
+ defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [AscalonFPA, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFClassV", [AscalonFP, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFMergeV", [AscalonFP, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFMovV", [AscalonFP, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFCmpV", [AscalonFP, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFCmpF", [AscalonFP, AscalonV], mx, IsWorstCase>;
+ }
+}
+foreach mx = SchedMxListF in {
+ foreach sew = SchedSEWSet<mx, isF=1>.val in {
+ defvar Cycles = AscalonGetCyclesDivOrSqrt<mx, sew>.c;
+ defvar IsWorstCase = AscalonIsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
+ let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
+ }
+ }
+}
+
+// Widening
+foreach mx = SchedMxListW in {
+ foreach sew = SchedSEWSet<mx, isF=0, isWidening=1>.val in {
+ defvar Cycles = AscalonGetCyclesDefault<mx>.c;
+ defvar IsWorstCase = AscalonIsWorstCaseMXSEW<mx, sew, SchedMxListW>.c;
+ let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [AscalonFXB, AscalonV], mx, sew, IsWorstCase>;
+ }
+}
+foreach mx = SchedMxListFW in {
+ foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
+ defvar Cycles = AscalonGetCyclesDefault<mx>.c;
+ defvar IsWorstCase = AscalonIsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
+ let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [AscalonFP, AscalonV], mx, sew, IsWorstCase>;
+ }
+ }
+ defvar Cycles = AscalonGetCyclesDefault<mx>.c;
+ defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxListFW>.c;
+ let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in
+ defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [AscalonFPA, AscalonV], mx, IsWorstCase>;
+}
+// Narrowing
+foreach mx = SchedMxListW in {
+ defvar Cycles = AscalonGetCyclesNarrowing<mx>.c;
+ defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxListW>.c;
+ let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVFNCvtFToIV", [AscalonFPA, AscalonV], mx, IsWorstCase>;
+ }
+}
+foreach mx = SchedMxListFW in {
+ foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
+ defvar Cycles = AscalonGetCyclesNarrowing<mx>.c;
+ defvar IsWorstCase = AscalonIsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
+ let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [AscalonFXB, AscalonV], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [AscalonFXB, AscalonV], mx, sew, IsWorstCase>;
+ }
+ }
+}
+
+// Vector Reduction Instructions
+foreach mx = SchedMxList in {
+ foreach sew = SchedSEWSet<mx>.val in {
+ defvar Cycles = AscalonGetCyclesDefault<mx>.c;
+ defvar IsWorstCase = AscalonIsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
+ let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [AscalonFX, AscalonV],
+ mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [AscalonFX, AscalonV],
+ mx, sew, IsWorstCase>;
+ }
+ }
+}
+
+foreach mx = SchedMxListWRed in {
+ foreach sew = SchedSEWSet<mx, 0, 1>.val in {
+ defvar Cycles = AscalonGetCyclesDefault<mx>.c;
+ defvar IsWorstCase = AscalonIsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c;
+ let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [AscalonFX, AscalonV],
+ mx, sew, IsWorstCase>;
+ }
+}
+
+foreach mx = SchedMxListF in {
+ foreach sew = SchedSEWSet<mx, 1>.val in {
+ defvar RedCycles = AscalonGetCyclesDefault<mx>.c;
+ defvar IsWorstCase = AscalonIsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
+ let Latency = RedCycles, ReleaseAtCycles = [1, RedCycles] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [AscalonFX, AscalonV],
+ mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [AscalonFX, AscalonV],
+ mx, sew, IsWorstCase>;
+ }
+ defvar OrdRedCycles = AscalonGetCyclesLMUL<mx, 18>.c;
+ let Latency = OrdRedCycles, ReleaseAtCycles = [1, OrdRedCycles] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [AscalonFX, AscalonV],
+ mx, sew, IsWorstCase>;
+ }
+}
+
+foreach mx = SchedMxListFWRed in {
+ foreach sew = SchedSEWSet<mx, 1, 1>.val in {
+ defvar RedCycles = AscalonGetCyclesDefault<mx>.c;
+ defvar IsWorstCase = AscalonIsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c;
+ let Latency = RedCycles, ReleaseAtCycles = [1, RedCycles] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [AscalonFX, AscalonV],
+ mx, sew, IsWorstCase>;
+ defvar OrdRedCycles = AscalonGetCyclesLMUL<mx, 18>.c;
+ let Latency = OrdRedCycles, ReleaseAtCycles = [1, OrdRedCycles] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [AscalonFX, AscalonV],
+ mx, sew, IsWorstCase>;
+ }
+}
+
+// Vector Mask Instructions
+foreach mx = SchedMxList in {
+ defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
+ defm "" : LMULWriteResMX<"WriteVMALUV", [AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVMSFSV", [AscalonV], mx, IsWorstCase>;
+ let Latency = 2, ReleaseAtCycles = [1, 2] in {
+ defm "" : LMULWriteResMX<"WriteVMPopV", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVMFFSV", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ }
+}
+foreach mx = SchedMxList in {
+ defvar Cycles = AscalonGetCyclesDefault<mx>.c;
+ defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = Cycles, ReleaseAtCycles = [1, Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVIotaV", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIdxV", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ }
+}
+
+// Vector Permutation Instructions
+let Latency = 2, ReleaseAtCycles = [1, 2] in {
+ def : WriteRes<WriteVMovSX, [AscalonFX, AscalonV]>;
+ def : WriteRes<WriteVMovXS, [AscalonFX, AscalonV]>;
+ def : WriteRes<WriteVMovSF, [AscalonFX, AscalonV]>;
+ def : WriteRes<WriteVMovFS, [AscalonFX, AscalonV]>;
+}
+foreach mx = SchedMxList in {
+ defvar Cycles = AscalonGetCyclesDefault<mx>.c;
+ defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = !mul(Cycles, 2), ReleaseAtCycles = [Cycles, !mul(Cycles, 2)] in {
+ defm "" : LMULWriteResMX<"WriteVRGatherVX", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVRGatherVI", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ }
+}
+
+foreach mx = SchedMxList in {
+ foreach sew = SchedSEWSet<mx>.val in {
+ defvar Cycles = AscalonGetCyclesVRGatherVV<mx>.c;
+ defvar IsWorstCase = AscalonIsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
+ let Latency = !add(Cycles, 3), ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [AscalonFX, AscalonV], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherEI16VV", [AscalonFX, AscalonV], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [AscalonFX, AscalonV], mx, sew, IsWorstCase>;
+ }
+ }
+}
+
+foreach mx = SchedMxList in {
+ defvar Cycles = AscalonGetCyclesDefault<mx>.c;
+ defvar IsWorstCase = AscalonIsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVSlideUpX", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSlideDownX", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSlideI", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVISlide1X", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFSlide1F", [AscalonFX, AscalonV], mx, IsWorstCase>;
+ }
+}
+
+// Whole vector register move, vmv<N>.v, not LMUL aware
+let Latency = 1, ReleaseAtCycles = [1] in
+ def : WriteRes<WriteVMov1V, [AscalonV]>;
+let Latency = 2, ReleaseAtCycles = [2] in
+ def : WriteRes<WriteVMov2V, [AscalonV]>;
+let Latency = 4, ReleaseAtCycles = [4] in
+ def : WriteRes<WriteVMov4V, [AscalonV]>;
+let Latency = 8, ReleaseAtCycles = [8] in
+ def : WriteRes<WriteVMov8V, [AscalonV]>;
+
+// Vector Integer Arithmetic Instructions
+defm : LMULReadAdvance<"ReadVIALUV", 0>;
+defm : LMULReadAdvance<"ReadVIALUX", 0>;
+defm : LMULReadAdvanceW<"ReadVIWALUV", 0>;
+defm : LMULReadAdvanceW<"ReadVIWALUX", 0>;
+defm : LMULReadAdvance<"ReadVExtV", 0>;
+defm : LMULReadAdvance<"ReadVICALUV", 0>;
+defm : LMULReadAdvance<"ReadVICALUX", 0>;
+defm : LMULReadAdvance<"ReadVShiftV", 0>;
+defm : LMULReadAdvance<"ReadVShiftX", 0>;
+defm : LMULReadAdvanceW<"ReadVNShiftV", 0>;
+defm : LMULReadAdvanceW<"ReadVNShiftX", 0>;
+defm : LMULReadAdvance<"ReadVICmpV", 0>;
+defm : LMULReadAdvance<"ReadVICmpX", 0>;
+defm : LMULReadAdvance<"ReadVIMinMaxV", 0>;
+defm : LMULReadAdvance<"ReadVIMinMaxX", 0>;
+defm : LMULReadAdvance<"ReadVIMulV", 0>;
+defm : LMULReadAdvance<"ReadVIMulX", 0>;
+defm : LMULSEWReadAdvance<"ReadVIDivV", 0>;
+defm : LMULSEWReadAdvance<"ReadVIDivX", 0>;
+defm : LMULReadAdvanceW<"ReadVIWMulV", 0>;
+defm : LMULReadAdvanceW<"ReadVIWMulX", 0>;
+defm : LMULReadAdvance<"ReadVIMulAddV", 0>;
+defm : LMULReadAdvance<"ReadVIMulAddX", 0>;
+defm : LMULReadAdvanceW<"ReadVIWMulAddV", 0>;
+defm : LMULReadAdvanceW<"ReadVIWMulAddX", 0>;
+defm : LMULReadAdvance<"ReadVIMergeV", 0>;
+defm : LMULReadAdvance<"ReadVIMergeX", 0>;
+defm : LMULReadAdvance<"ReadVIMovV", 0>;
+defm : LMULReadAdvance<"ReadVIMovX", 0>;
+
+// Vector Fixed-Point Arithmetic Instructions
+defm "" : LMULReadAdvance<"ReadVSALUV", 0>;
+defm "" : LMULReadAdvance<"ReadVSALUX", 0>;
+defm "" : LMULReadAdvance<"ReadVAALUV", 0>;
+defm "" : LMULReadAdvance<"ReadVAALUX", 0>;
+defm "" : LMULReadAdvance<"ReadVSMulV", 0>;
+defm "" : LMULReadAdvance<"ReadVSMulX", 0>;
+defm "" : LMULReadAdvance<"ReadVSShiftV", 0>;
+defm "" : LMULReadAdvance<"ReadVSShiftX", 0>;
+defm "" : LMULReadAdvanceW<"ReadVNClipV", 0>;
+defm "" : LMULReadAdvanceW<"ReadVNClipX", 0>;
+
+// Vector Floating-Point Instructions
+defm "" : LMULSEWReadAdvanceF<"ReadVFALUV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFALUF", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFWALUV", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFWALUF", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFMulV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFMulF", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFDivV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFDivF", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulV", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulF", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddF", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddV", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddF", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFRecpV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxF", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjF", 0>;
+defm "" : LMULReadAdvance<"ReadVFCmpV", 0>;
+defm "" : LMULReadAdvance<"ReadVFCmpF", 0>;
+defm "" : LMULReadAdvance<"ReadVFClassV", 0>;
+defm "" : LMULReadAdvance<"ReadVFMergeV", 0>;
+defm "" : LMULReadAdvance<"ReadVFMergeF", 0>;
+defm "" : LMULReadAdvance<"ReadVFMovF", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFCvtIToFV", 0>;
+defm "" : LMULReadAdvance<"ReadVFCvtFToIV", 0>;
+defm "" : LMULSEWReadAdvanceW<"ReadVFWCvtIToFV", 0>;
+defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToIV", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFWCvtFToFV", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtIToFV", 0>;
+defm "" : LMULReadAdvanceW<"ReadVFNCvtFToIV", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtFToFV", 0>;
+
+// Vector Reduction Instructions
+def : ReadAdvance<ReadVIRedV, 0>;
+def : ReadAdvance<ReadVIRedV0, 0>;
+def : ReadAdvance<ReadVIWRedV, 0>;
+def : ReadAdvance<ReadVIWRedV0, 0>;
+def : ReadAdvance<ReadVFRedV, 0>;
+def : ReadAdvance<ReadVFRedV0, 0>;
+def : ReadAdvance<ReadVFRedOV, 0>;
+def : ReadAdvance<ReadVFRedOV0, 0>;
+def : ReadAdvance<ReadVFWRedV, 0>;
+def : ReadAdvance<ReadVFWRedV0, 0>;
+def : ReadAdvance<ReadVFWRedOV, 0>;
+def : ReadAdvance<ReadVFWRedOV0, 0>;
+
+// Vector Mask Instructions
+defm "" : LMULReadAdvance<"ReadVMALUV", 0>;
+defm "" : LMULReadAdvance<"ReadVMPopV", 0>;
+defm "" : LMULReadAdvance<"ReadVMFFSV", 0>;
+defm "" : LMULReadAdvance<"ReadVMSFSV", 0>;
+defm "" : LMULReadAdvance<"ReadVIotaV", 0>;
+
+// Vector Permutation Instructions
+def : ReadAdvance<ReadVMovXS, 0>;
+def : ReadAdvance<ReadVMovSX_V, 0>;
+def : ReadAdvance<ReadVMovSX_X, 0>;
+def : ReadAdvance<ReadVMovFS, 0>;
+def : ReadAdvance<ReadVMovSF_V, 0>;
+def : ReadAdvance<ReadVMovSF_F, 0>;
+defm "" : LMULReadAdvance<"ReadVISlideV", 0>;
+defm "" : LMULReadAdvance<"ReadVISlideX", 0>;
+defm "" : LMULReadAdvance<"ReadVFSlideV", 0>;
+defm "" : LMULReadAdvance<"ReadVFSlideF", 0>;
+defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_data", 0>;
+defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_index", 0>;
+defm "" : LMULSEWReadAdvance<"ReadVRGatherEI16VV_data", 0>;
+defm "" : LMULSEWReadAdvance<"ReadVRGatherEI16VV_index", 0>;
+defm "" : LMULReadAdvance<"ReadVRGatherVX_data", 0>;
+defm "" : LMULReadAdvance<"ReadVRGatherVX_index", 0>;
+defm "" : LMULReadAdvance<"ReadVRGatherVI_data", 0>;
+defm "" : LMULSEWReadAdvance<"ReadVCompressV", 0>;
+// LMUL Aware
+def : ReadAdvance<ReadVMov1V, 0>;
+def : ReadAdvance<ReadVMov2V, 0>;
+def : ReadAdvance<ReadVMov4V, 0>;
+def : ReadAdvance<ReadVMov8V, 0>;
+
+// Others
+def : ReadAdvance<ReadVMask, 0>;
+def : ReadAdvance<ReadVPassthru_WorstCase, 0>;
+foreach mx = SchedMxList in {
+ def : ReadAdvance<!cast<SchedRead>("ReadVPassthru_" # mx), 0>;
+ foreach sew = SchedSEWSet<mx>.val in
+ def : ReadAdvance<!cast<SchedRead>("ReadVPassthru_" # mx # "_E" # sew), 0>;
+}
+
+//===----------------------------------------------------------------------===//
// Unsupported extensions
defm : UnsupportedSchedQ;
-defm : UnsupportedSchedV;
defm : UnsupportedSchedZabha;
defm : UnsupportedSchedZbc;
defm : UnsupportedSchedZbkb;
diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td
index d11b446..601308b 100644
--- a/llvm/lib/Target/RISCV/RISCVScheduleV.td
+++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td
@@ -19,6 +19,14 @@ defvar SchedMxListFW = !listremove(SchedMxList, ["M8", "MF8"]);
defvar SchedMxListF = !listremove(SchedMxList, ["MF8"]);
// Used for widening floating-point Reduction as it doesn't contain MF8.
defvar SchedMxListFWRed = SchedMxListF;
+// Used for indexed and strided loads of 8 bit lanes, same as full MX list
+defvar SchedMxListEEW8 = SchedMxList;
+// Used for indexed and strided loads of 16 bit lanes
+defvar SchedMxListEEW16 = SchedMxListF;
+// Used for indexed and strided loads of 32 bit lanes
+defvar SchedMxListEEW32 = !listremove(SchedMxListEEW16, ["MF4"]);
+// Used for indexed and strided loads of 64 bit lanes
+defvar SchedMxListEEW64 = !listremove(SchedMxListEEW32, ["MF2"]);
class SchedSEWSet<string mx, bit isF = 0, bit isWidening = 0> {
assert !or(!not(isF), !ne(mx, "MF8")), "LMUL shouldn't be MF8 for floating-point";
diff --git a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
index 041dd07..8b66aa1 100644
--- a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp
@@ -22,27 +22,22 @@ RISCVSelectionDAGInfo::~RISCVSelectionDAGInfo() = default;
void RISCVSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG,
const SDNode *N) const {
+ SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N);
+
#ifndef NDEBUG
+ // Some additional checks not yet implemented by verifyTargetNode.
switch (N->getOpcode()) {
- default:
- return SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N);
case RISCVISD::TUPLE_EXTRACT:
- assert(N->getNumOperands() == 2 && "Expected three operands!");
assert(N->getOperand(1).getOpcode() == ISD::TargetConstant &&
- N->getOperand(1).getValueType() == MVT::i32 &&
- "Expected index to be an i32 target constant!");
+ "Expected index to be a target constant!");
break;
case RISCVISD::TUPLE_INSERT:
- assert(N->getNumOperands() == 3 && "Expected three operands!");
assert(N->getOperand(2).getOpcode() == ISD::TargetConstant &&
- N->getOperand(2).getValueType() == MVT::i32 &&
- "Expected index to be an i32 target constant!");
+ "Expected index to be a target constant!");
break;
case RISCVISD::VQDOT_VL:
case RISCVISD::VQDOTU_VL:
case RISCVISD::VQDOTSU_VL: {
- assert(N->getNumValues() == 1 && "Expected one result!");
- assert(N->getNumOperands() == 5 && "Expected five operands!");
EVT VT = N->getValueType(0);
assert(VT.isScalableVector() && VT.getVectorElementType() == MVT::i32 &&
"Expected result to be an i32 scalable vector");
@@ -52,13 +47,9 @@ void RISCVSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG,
"Expected result and first 3 operands to have the same type!");
EVT MaskVT = N->getOperand(3).getValueType();
assert(MaskVT.isScalableVector() &&
- MaskVT.getVectorElementType() == MVT::i1 &&
MaskVT.getVectorElementCount() == VT.getVectorElementCount() &&
"Expected mask VT to be an i1 scalable vector with same number of "
"elements as the result");
- assert((N->getOperand(4).getValueType() == MVT::i32 ||
- N->getOperand(4).getValueType() == MVT::i64) &&
- "Expect VL operand to be i32 or i64");
break;
}
}
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index 715ac4c..f86265a 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -69,6 +69,12 @@ static cl::opt<bool> UseMIPSCCMovInsn("use-riscv-mips-ccmov",
cl::desc("Use 'mips.ccmov' instruction"),
cl::init(true), cl::Hidden);
+static cl::opt<bool> EnablePExtCodeGen(
+ "enable-p-ext-codegen",
+ cl::desc("Turn on P Extension codegen(This is a temporary switch where "
+ "only partial codegen is currently supported)"),
+ cl::init(false), cl::Hidden);
+
void RISCVSubtarget::anchor() {}
RISCVSubtarget &
@@ -82,6 +88,8 @@ RISCVSubtarget::initializeSubtargetDependencies(const Triple &TT, StringRef CPU,
if (TuneCPU.empty())
TuneCPU = CPU;
+ if (TuneCPU == "generic")
+ TuneCPU = Is64Bit ? "generic-rv64" : "generic-rv32";
TuneInfo = RISCVTuneInfoTable::getRISCVTuneInfo(TuneCPU);
// If there is no TuneInfo for this CPU, we fail back to generic.
@@ -104,7 +112,7 @@ RISCVSubtarget::RISCVSubtarget(const Triple &TT, StringRef CPU,
RVVVectorBitsMin(RVVVectorBitsMin), RVVVectorBitsMax(RVVVectorBitsMax),
FrameLowering(
initializeSubtargetDependencies(TT, CPU, TuneCPU, FS, ABIName)),
- InstrInfo(*this), RegInfo(getHwMode()), TLInfo(TM, *this) {
+ InstrInfo(*this), TLInfo(TM, *this) {
TSInfo = std::make_unique<RISCVSelectionDAGInfo>();
}
@@ -145,6 +153,10 @@ bool RISCVSubtarget::useConstantPoolForLargeInts() const {
return !RISCVDisableUsingConstantPoolForLargeInts;
}
+bool RISCVSubtarget::enablePExtCodeGen() const {
+ return HasStdExtP && EnablePExtCodeGen;
+}
+
unsigned RISCVSubtarget::getMaxBuildIntsCost() const {
// Loading integer from constant pool needs two instructions (the reason why
// the minimum cost is 2): an address calculation instruction and a load
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 4b4fc8f..ae6ca97 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -112,7 +112,6 @@ private:
RISCVFrameLowering FrameLowering;
RISCVInstrInfo InstrInfo;
- RISCVRegisterInfo RegInfo;
RISCVTargetLowering TLInfo;
/// Initializes using the passed in CPU and feature strings so that we can
@@ -140,7 +139,7 @@ public:
}
const RISCVInstrInfo *getInstrInfo() const override { return &InstrInfo; }
const RISCVRegisterInfo *getRegisterInfo() const override {
- return &RegInfo;
+ return &InstrInfo.getRegisterInfo();
}
const RISCVTargetLowering *getTargetLowering() const override {
return &TLInfo;
@@ -187,7 +186,7 @@ public:
}
bool hasCLZLike() const {
- return HasStdExtZbb || HasStdExtP || HasVendorXTHeadBb ||
+ return HasStdExtZbb || HasVendorXTHeadBb ||
(HasVendorXCVbitmanip && !IsRV64);
}
bool hasCTZLike() const {
@@ -197,7 +196,7 @@ public:
return HasStdExtZbb || (HasVendorXCVbitmanip && !IsRV64);
}
bool hasREV8Like() const {
- return HasStdExtZbb || HasStdExtZbkb || HasStdExtP || HasVendorXTHeadBb;
+ return HasStdExtZbb || HasStdExtZbkb || HasVendorXTHeadBb;
}
bool hasBEXTILike() const { return HasStdExtZbs || HasVendorXTHeadBs; }
@@ -209,7 +208,7 @@ public:
bool hasConditionalMoveFusion() const {
// Do we support fusing a branch+mv or branch+c.mv as a conditional move.
return (hasConditionalCompressedMoveFusion() && hasStdExtZca()) ||
- hasShortForwardBranchOpt();
+ hasShortForwardBranchIALU();
}
bool hasShlAdd(int64_t ShAmt) const {
@@ -238,6 +237,13 @@ public:
return 0;
}
+
+ Align getZilsdAlign() const {
+ return Align(enableUnalignedScalarMem() ? 1
+ : allowZilsd4ByteAlign() ? 4
+ : 8);
+ }
+
unsigned getELen() const {
assert(hasVInstructions() && "Expected V extension");
return hasVInstructionsI64() ? 64 : 32;
@@ -322,6 +328,8 @@ public:
}
}
+ bool enablePExtCodeGen() const;
+
// Returns VLEN divided by DLEN. Where DLEN is the datapath width of the
// vector hardware implementation which may be less than VLEN.
unsigned getDLenFactor() const {
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 16ef67d..52dc385 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -103,6 +103,11 @@ static cl::opt<bool>
cl::desc("Enable Machine Pipeliner for RISC-V"),
cl::init(false), cl::Hidden);
+static cl::opt<bool> EnableCFIInstrInserter(
+ "riscv-enable-cfi-instr-inserter",
+ cl::desc("Enable CFI Instruction Inserter for RISC-V"), cl::init(false),
+ cl::Hidden);
+
extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
@@ -118,7 +123,7 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
initializeRISCVLateBranchOptPass(*PR);
initializeRISCVMakeCompressibleOptPass(*PR);
initializeRISCVGatherScatterLoweringPass(*PR);
- initializeRISCVCodeGenPreparePass(*PR);
+ initializeRISCVCodeGenPrepareLegacyPassPass(*PR);
initializeRISCVPostRAExpandPseudoPass(*PR);
initializeRISCVMergeBaseOffsetOptPass(*PR);
initializeRISCVOptWInstrsPass(*PR);
@@ -136,6 +141,7 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
initializeRISCVPushPopOptPass(*PR);
initializeRISCVIndirectBranchTrackingPass(*PR);
initializeRISCVLoadStoreOptPass(*PR);
+ initializeRISCVPreAllocZilsdOptPass(*PR);
initializeRISCVExpandAtomicPseudoPass(*PR);
initializeRISCVRedundantCopyEliminationPass(*PR);
initializeRISCVAsmPrinterPass(*PR);
@@ -169,7 +175,7 @@ RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT,
if (TT.isOSFuchsia() && !TT.isArch64Bit())
report_fatal_error("Fuchsia is only supported for 64-bit");
- setCFIFixup(true);
+ setCFIFixup(!EnableCFIInstrInserter);
}
const RISCVSubtarget *
@@ -456,7 +462,7 @@ void RISCVPassConfig::addIRPasses() {
addPass(createRISCVGatherScatterLoweringPass());
addPass(createInterleavedAccessPass());
- addPass(createRISCVCodeGenPreparePass());
+ addPass(createRISCVCodeGenPrepareLegacyPass());
}
TargetPassConfig::addIRPasses();
@@ -578,6 +584,9 @@ void RISCVPassConfig::addPreEmitPass2() {
addPass(createUnpackMachineBundles([&](const MachineFunction &MF) {
return MF.getFunction().getParent()->getModuleFlag("kcfi");
}));
+
+ if (EnableCFIInstrInserter)
+ addPass(createCFIInstrInserter());
}
void RISCVPassConfig::addMachineSSAOptimization() {
@@ -596,6 +605,8 @@ void RISCVPassConfig::addPreRegAlloc() {
if (TM->getOptLevel() != CodeGenOptLevel::None) {
addPass(createRISCVMergeBaseOffsetOptPass());
addPass(createRISCVVLOptimizerPass());
+ // Add Zilsd pre-allocation load/store optimization
+ addPass(createRISCVPreAllocZilsdOptPass());
}
addPass(createRISCVInsertReadWriteCSRPass());
@@ -628,6 +639,9 @@ bool RISCVPassConfig::addILPOpts() {
}
void RISCVTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
+#define GET_PASS_REGISTRY "RISCVPassRegistry.def"
+#include "llvm/Passes/TargetPassRegistry.inc"
+
PB.registerLateLoopOptimizationsEPCallback([=](LoopPassManager &LPM,
OptimizationLevel Level) {
if (Level != OptimizationLevel::O0)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 3d8eb40..bb469e9 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -969,6 +969,13 @@ InstructionCost RISCVTTIImpl::getScalarizationOverhead(
if (isa<ScalableVectorType>(Ty))
return InstructionCost::getInvalid();
+ // TODO: Add proper cost model for P extension fixed vectors (e.g., v4i16)
+ // For now, skip all fixed vector cost analysis when P extension is available
+ // to avoid crashes in getMinRVVVectorSizeInBits()
+ if (ST->enablePExtCodeGen() && isa<FixedVectorType>(Ty)) {
+ return 1; // Treat as single instruction cost for now
+ }
+
// A build_vector (which is m1 sized or smaller) can be done in no
// worse than one vslide1down.vx per element in the type. We could
// in theory do an explode_vector in the inverse manner, but our
@@ -1001,13 +1008,52 @@ InstructionCost RISCVTTIImpl::getScalarizationOverhead(
}
InstructionCost
-RISCVTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
- unsigned AddressSpace,
+RISCVTTIImpl::getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA,
+ TTI::TargetCostKind CostKind) const {
+ Type *DataTy = MICA.getDataType();
+ Align Alignment = MICA.getAlignment();
+ switch (MICA.getID()) {
+ case Intrinsic::vp_load_ff: {
+ EVT DataTypeVT = TLI->getValueType(DL, DataTy);
+ if (!TLI->isLegalFirstFaultLoad(DataTypeVT, Alignment))
+ return BaseT::getMemIntrinsicInstrCost(MICA, CostKind);
+
+ unsigned AS = MICA.getAddressSpace();
+ return getMemoryOpCost(Instruction::Load, DataTy, Alignment, AS, CostKind,
+ {TTI::OK_AnyValue, TTI::OP_None}, nullptr);
+ }
+ case Intrinsic::experimental_vp_strided_load:
+ case Intrinsic::experimental_vp_strided_store:
+ return getStridedMemoryOpCost(MICA, CostKind);
+ case Intrinsic::masked_compressstore:
+ case Intrinsic::masked_expandload:
+ return getExpandCompressMemoryOpCost(MICA, CostKind);
+ case Intrinsic::vp_scatter:
+ case Intrinsic::vp_gather:
+ case Intrinsic::masked_scatter:
+ case Intrinsic::masked_gather:
+ return getGatherScatterOpCost(MICA, CostKind);
+ case Intrinsic::vp_load:
+ case Intrinsic::vp_store:
+ case Intrinsic::masked_load:
+ case Intrinsic::masked_store:
+ return getMaskedMemoryOpCost(MICA, CostKind);
+ }
+ return BaseT::getMemIntrinsicInstrCost(MICA, CostKind);
+}
+
+InstructionCost
+RISCVTTIImpl::getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA,
TTI::TargetCostKind CostKind) const {
+ unsigned Opcode = MICA.getID() == Intrinsic::masked_load ? Instruction::Load
+ : Instruction::Store;
+ Type *Src = MICA.getDataType();
+ Align Alignment = MICA.getAlignment();
+ unsigned AddressSpace = MICA.getAddressSpace();
+
if (!isLegalMaskedLoadStore(Src, Alignment) ||
CostKind != TTI::TCK_RecipThroughput)
- return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
- CostKind);
+ return BaseT::getMemIntrinsicInstrCost(MICA, CostKind);
return getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);
}
@@ -1109,19 +1155,24 @@ InstructionCost RISCVTTIImpl::getInterleavedMemoryOpCost(
return MemCost + ShuffleCost;
}
-InstructionCost RISCVTTIImpl::getGatherScatterOpCost(
- unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
- Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {
+InstructionCost
+RISCVTTIImpl::getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA,
+ TTI::TargetCostKind CostKind) const {
+
+ bool IsLoad = MICA.getID() == Intrinsic::masked_gather ||
+ MICA.getID() == Intrinsic::vp_gather;
+ unsigned Opcode = IsLoad ? Instruction::Load : Instruction::Store;
+ Type *DataTy = MICA.getDataType();
+ Align Alignment = MICA.getAlignment();
+ const Instruction *I = MICA.getInst();
if (CostKind != TTI::TCK_RecipThroughput)
- return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
- Alignment, CostKind, I);
+ return BaseT::getMemIntrinsicInstrCost(MICA, CostKind);
if ((Opcode == Instruction::Load &&
!isLegalMaskedGather(DataTy, Align(Alignment))) ||
(Opcode == Instruction::Store &&
!isLegalMaskedScatter(DataTy, Align(Alignment))))
- return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
- Alignment, CostKind, I);
+ return BaseT::getMemIntrinsicInstrCost(MICA, CostKind);
// Cost is proportional to the number of memory operations implied. For
// scalable vectors, we use an estimate on that number since we don't
@@ -1135,15 +1186,20 @@ InstructionCost RISCVTTIImpl::getGatherScatterOpCost(
}
InstructionCost RISCVTTIImpl::getExpandCompressMemoryOpCost(
- unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
- TTI::TargetCostKind CostKind, const Instruction *I) const {
+ const MemIntrinsicCostAttributes &MICA,
+ TTI::TargetCostKind CostKind) const {
+ unsigned Opcode = MICA.getID() == Intrinsic::masked_expandload
+ ? Instruction::Load
+ : Instruction::Store;
+ Type *DataTy = MICA.getDataType();
+ bool VariableMask = MICA.getVariableMask();
+ Align Alignment = MICA.getAlignment();
bool IsLegal = (Opcode == Instruction::Store &&
isLegalMaskedCompressStore(DataTy, Alignment)) ||
(Opcode == Instruction::Load &&
isLegalMaskedExpandLoad(DataTy, Alignment));
if (!IsLegal || CostKind != TTI::TCK_RecipThroughput)
- return BaseT::getExpandCompressMemoryOpCost(Opcode, DataTy, VariableMask,
- Alignment, CostKind, I);
+ return BaseT::getMemIntrinsicInstrCost(MICA, CostKind);
// Example compressstore sequence:
// vsetivli zero, 8, e32, m2, ta, ma (ignored)
// vcompress.vm v10, v8, v0
@@ -1172,14 +1228,20 @@ InstructionCost RISCVTTIImpl::getExpandCompressMemoryOpCost(
LT.first * getRISCVInstructionCost(Opcodes, LT.second, CostKind);
}
-InstructionCost RISCVTTIImpl::getStridedMemoryOpCost(
- unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
- Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {
- if (((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
- !isLegalStridedLoadStore(DataTy, Alignment)) ||
- (Opcode != Instruction::Load && Opcode != Instruction::Store))
- return BaseT::getStridedMemoryOpCost(Opcode, DataTy, Ptr, VariableMask,
- Alignment, CostKind, I);
+InstructionCost
+RISCVTTIImpl::getStridedMemoryOpCost(const MemIntrinsicCostAttributes &MICA,
+ TTI::TargetCostKind CostKind) const {
+
+ unsigned Opcode = MICA.getID() == Intrinsic::experimental_vp_strided_load
+ ? Instruction::Load
+ : Instruction::Store;
+
+ Type *DataTy = MICA.getDataType();
+ Align Alignment = MICA.getAlignment();
+ const Instruction *I = MICA.getInst();
+
+ if (!isLegalStridedLoadStore(DataTy, Alignment))
+ return BaseT::getMemIntrinsicInstrCost(MICA, CostKind);
if (CostKind == TTI::TCK_CodeSize)
return TTI::TCC_Basic;
@@ -1497,6 +1559,23 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
}
break;
}
+ case Intrinsic::fshl:
+ case Intrinsic::fshr: {
+ if (ICA.getArgs().empty())
+ break;
+
+ // Funnel-shifts are ROTL/ROTR when the first and second operand are equal.
+ // When Zbb/Zbkb is enabled we can use a single ROL(W)/ROR(I)(W)
+ // instruction.
+ if ((ST->hasStdExtZbb() || ST->hasStdExtZbkb()) && RetTy->isIntegerTy() &&
+ ICA.getArgs()[0] == ICA.getArgs()[1] &&
+ (RetTy->getIntegerBitWidth() == 32 ||
+ RetTy->getIntegerBitWidth() == 64) &&
+ RetTy->getIntegerBitWidth() <= ST->getXLen()) {
+ return 1;
+ }
+ break;
+ }
case Intrinsic::get_active_lane_mask: {
if (ST->hasVInstructions()) {
Type *ExpRetTy = VectorType::get(
@@ -1543,16 +1622,6 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
return Cost;
}
- case Intrinsic::experimental_vp_splat: {
- auto LT = getTypeLegalizationCost(RetTy);
- // TODO: Lower i1 experimental_vp_splat
- if (!ST->hasVInstructions() || LT.second.getScalarType() == MVT::i1)
- return InstructionCost::getInvalid();
- return LT.first * getRISCVInstructionCost(LT.second.isFloatingPoint()
- ? RISCV::VFMV_V_F
- : RISCV::VMV_V_X,
- LT.second, CostKind);
- }
case Intrinsic::experimental_vp_splice: {
// To support type-based query from vectorizer, set the index to 0.
// Note that index only change the cost from vslide.vx to vslide.vi and in
@@ -1625,6 +1694,14 @@ InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
if (!IsVectorType)
return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
+ // TODO: Add proper cost model for P extension fixed vectors (e.g., v4i16)
+ // For now, skip all fixed vector cost analysis when P extension is available
+ // to avoid crashes in getMinRVVVectorSizeInBits()
+ if (ST->enablePExtCodeGen() &&
+ (isa<FixedVectorType>(Dst) || isa<FixedVectorType>(Src))) {
+ return 1; // Treat as single instruction cost for now
+ }
+
// FIXME: Need to compute legalizing cost for illegal types. The current
// code handles only legal types and those which can be trivially
// promoted to legal.
@@ -2323,6 +2400,13 @@ InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
const Value *Op1) const {
assert(Val->isVectorTy() && "This must be a vector type");
+ // TODO: Add proper cost model for P extension fixed vectors (e.g., v4i16)
+ // For now, skip all fixed vector cost analysis when P extension is available
+ // to avoid crashes in getMinRVVVectorSizeInBits()
+ if (ST->enablePExtCodeGen() && isa<FixedVectorType>(Val)) {
+ return 1; // Treat as single instruction cost for now
+ }
+
if (Opcode != Instruction::ExtractElement &&
Opcode != Instruction::InsertElement)
return BaseT::getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
@@ -2708,7 +2792,10 @@ void RISCVTTIImpl::getUnrollingPreferences(
// Both auto-vectorized loops and the scalar remainder have the
// isvectorized attribute, so differentiate between them by the presence
// of vector instructions.
- if (IsVectorized && I.getType()->isVectorTy())
+ if (IsVectorized && (I.getType()->isVectorTy() ||
+ llvm::any_of(I.operand_values(), [](Value *V) {
+ return V->getType()->isVectorTy();
+ })))
return;
if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
@@ -3322,11 +3409,8 @@ bool RISCVTTIImpl::isProfitableToSinkOperands(
if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
continue;
- // We are looking for a splat/vp.splat that can be sunk.
- bool IsVPSplat = match(Op, m_Intrinsic<Intrinsic::experimental_vp_splat>(
- m_Value(), m_Value(), m_Value()));
- if (!IsVPSplat &&
- !match(Op, m_Shuffle(m_InsertElt(m_Value(), m_Value(), m_ZeroInt()),
+ // We are looking for a splat that can be sunk.
+ if (!match(Op, m_Shuffle(m_InsertElt(m_Value(), m_Value(), m_ZeroInt()),
m_Value(), m_ZeroMask())))
continue;
@@ -3343,16 +3427,11 @@ bool RISCVTTIImpl::isProfitableToSinkOperands(
}
// Sink any fpexts since they might be used in a widening fp pattern.
- if (IsVPSplat) {
- if (isa<FPExtInst>(Op->getOperand(0)))
- Ops.push_back(&Op->getOperandUse(0));
- } else {
- Use *InsertEltUse = &Op->getOperandUse(0);
- auto *InsertElt = cast<InsertElementInst>(InsertEltUse);
- if (isa<FPExtInst>(InsertElt->getOperand(1)))
- Ops.push_back(&InsertElt->getOperandUse(1));
- Ops.push_back(InsertEltUse);
- }
+ Use *InsertEltUse = &Op->getOperandUse(0);
+ auto *InsertElt = cast<InsertElementInst>(InsertEltUse);
+ if (isa<FPExtInst>(InsertElt->getOperand(1)))
+ Ops.push_back(&InsertElt->getOperandUse(1));
+ Ops.push_back(InsertEltUse);
Ops.push_back(&OpIdx.value());
}
return true;
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 6886e896..e6b75d7 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -144,9 +144,11 @@ public:
bool shouldConsiderVectorizationRegPressure() const override { return true; }
InstructionCost
- getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
- unsigned AddressSpace,
- TTI::TargetCostKind CostKind) const override;
+ getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA,
+ TTI::TargetCostKind CostKind) const override;
+
+ InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA,
+ TTI::TargetCostKind CostKind) const;
InstructionCost
getPointersChainCost(ArrayRef<const Value *> Ptrs, const Value *Base,
@@ -191,22 +193,15 @@ public:
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond = false, bool UseMaskForGaps = false) const override;
- InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
- const Value *Ptr, bool VariableMask,
- Align Alignment,
- TTI::TargetCostKind CostKind,
- const Instruction *I) const override;
+ InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA,
+ TTI::TargetCostKind CostKind) const;
InstructionCost
- getExpandCompressMemoryOpCost(unsigned Opcode, Type *Src, bool VariableMask,
- Align Alignment, TTI::TargetCostKind CostKind,
- const Instruction *I = nullptr) const override;
+ getExpandCompressMemoryOpCost(const MemIntrinsicCostAttributes &MICA,
+ TTI::TargetCostKind CostKind) const;
- InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy,
- const Value *Ptr, bool VariableMask,
- Align Alignment,
- TTI::TargetCostKind CostKind,
- const Instruction *I) const override;
+ InstructionCost getStridedMemoryOpCost(const MemIntrinsicCostAttributes &MICA,
+ TTI::TargetCostKind CostKind) const;
InstructionCost
getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const override;
@@ -286,11 +281,13 @@ public:
}
bool isLegalMaskedLoad(Type *DataType, Align Alignment,
- unsigned /*AddressSpace*/) const override {
+ unsigned /*AddressSpace*/,
+ TTI::MaskKind /*MaskKind*/) const override {
return isLegalMaskedLoadStore(DataType, Alignment);
}
bool isLegalMaskedStore(Type *DataType, Align Alignment,
- unsigned /*AddressSpace*/) const override {
+ unsigned /*AddressSpace*/,
+ TTI::MaskKind /*MaskKind*/) const override {
return isLegalMaskedLoadStore(DataType, Alignment);
}
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 0a8838c..638bf12 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -62,7 +62,7 @@ struct DemandedVL {
};
class RISCVVLOptimizer : public MachineFunctionPass {
- const MachineRegisterInfo *MRI;
+ MachineRegisterInfo *MRI;
const MachineDominatorTree *MDT;
const TargetInstrInfo *TII;
@@ -85,7 +85,7 @@ private:
DemandedVL getMinimumVLForUser(const MachineOperand &UserOp) const;
/// Returns true if the users of \p MI have compatible EEWs and SEWs.
bool checkUsers(const MachineInstr &MI) const;
- bool tryReduceVL(MachineInstr &MI) const;
+ bool tryReduceVL(MachineInstr &MI, MachineOperand VL) const;
bool isCandidate(const MachineInstr &MI) const;
void transfer(const MachineInstr &MI);
@@ -1392,6 +1392,42 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
return true;
}
+/// Given a vslidedown.vx like:
+///
+/// %slideamt = ADDI %x, -1
+/// %v = PseudoVSLIDEDOWN_VX %passthru, %src, %slideamt, avl=1
+///
+/// %v will only read the first %slideamt + 1 lanes of %src, which = %x.
+/// This is a common case when lowering extractelement.
+///
+/// Note that if %x is 0, %slideamt will be all ones. In this case %src will be
+/// completely slid down and none of its lanes will be read (since %slideamt is
+/// greater than the largest VLMAX of 65536) so we can demand any minimum VL.
+static std::optional<DemandedVL>
+getMinimumVLForVSLIDEDOWN_VX(const MachineOperand &UserOp,
+ const MachineRegisterInfo *MRI) {
+ const MachineInstr &MI = *UserOp.getParent();
+ if (RISCV::getRVVMCOpcode(MI.getOpcode()) != RISCV::VSLIDEDOWN_VX)
+ return std::nullopt;
+ // We're looking at what lanes are used from the src operand.
+ if (UserOp.getOperandNo() != 2)
+ return std::nullopt;
+ // For now, the AVL must be 1.
+ const MachineOperand &AVL = MI.getOperand(4);
+ if (!AVL.isImm() || AVL.getImm() != 1)
+ return std::nullopt;
+ // The slide amount must be %x - 1.
+ const MachineOperand &SlideAmt = MI.getOperand(3);
+ if (!SlideAmt.getReg().isVirtual())
+ return std::nullopt;
+ MachineInstr *SlideAmtDef = MRI->getUniqueVRegDef(SlideAmt.getReg());
+ if (SlideAmtDef->getOpcode() != RISCV::ADDI ||
+ SlideAmtDef->getOperand(2).getImm() != -AVL.getImm() ||
+ !SlideAmtDef->getOperand(1).getReg().isVirtual())
+ return std::nullopt;
+ return SlideAmtDef->getOperand(1);
+}
+
DemandedVL
RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const {
const MachineInstr &UserMI = *UserOp.getParent();
@@ -1406,6 +1442,9 @@ RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const {
return DemandedVL::vlmax();
}
+ if (auto VL = getMinimumVLForVSLIDEDOWN_VX(UserOp, MRI))
+ return *VL;
+
if (RISCVII::readsPastVL(
TII->get(RISCV::getRVVMCOpcode(UserMI.getOpcode())).TSFlags)) {
LLVM_DEBUG(dbgs() << " Abort because used by unsafe instruction\n");
@@ -1568,7 +1607,8 @@ bool RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const {
return true;
}
-bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const {
+bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI,
+ MachineOperand CommonVL) const {
LLVM_DEBUG(dbgs() << "Trying to reduce VL for " << MI);
unsigned VLOpNum = RISCVII::getVLOpNum(MI.getDesc());
@@ -1581,49 +1621,47 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const {
return false;
}
- auto *CommonVL = &DemandedVLs.at(&MI).VL;
-
- assert((CommonVL->isImm() || CommonVL->getReg().isVirtual()) &&
+ assert((CommonVL.isImm() || CommonVL.getReg().isVirtual()) &&
"Expected VL to be an Imm or virtual Reg");
// If the VL is defined by a vleff that doesn't dominate MI, try using the
// vleff's AVL. It will be greater than or equal to the output VL.
- if (CommonVL->isReg()) {
- const MachineInstr *VLMI = MRI->getVRegDef(CommonVL->getReg());
+ if (CommonVL.isReg()) {
+ const MachineInstr *VLMI = MRI->getVRegDef(CommonVL.getReg());
if (RISCVInstrInfo::isFaultOnlyFirstLoad(*VLMI) &&
!MDT->dominates(VLMI, &MI))
- CommonVL = &VLMI->getOperand(RISCVII::getVLOpNum(VLMI->getDesc()));
+ CommonVL = VLMI->getOperand(RISCVII::getVLOpNum(VLMI->getDesc()));
}
- if (!RISCV::isVLKnownLE(*CommonVL, VLOp)) {
+ if (!RISCV::isVLKnownLE(CommonVL, VLOp)) {
LLVM_DEBUG(dbgs() << " Abort due to CommonVL not <= VLOp.\n");
return false;
}
- if (CommonVL->isIdenticalTo(VLOp)) {
+ if (CommonVL.isIdenticalTo(VLOp)) {
LLVM_DEBUG(
dbgs() << " Abort due to CommonVL == VLOp, no point in reducing.\n");
return false;
}
- if (CommonVL->isImm()) {
+ if (CommonVL.isImm()) {
LLVM_DEBUG(dbgs() << " Reduce VL from " << VLOp << " to "
- << CommonVL->getImm() << " for " << MI << "\n");
- VLOp.ChangeToImmediate(CommonVL->getImm());
+ << CommonVL.getImm() << " for " << MI << "\n");
+ VLOp.ChangeToImmediate(CommonVL.getImm());
return true;
}
- const MachineInstr *VLMI = MRI->getVRegDef(CommonVL->getReg());
+ const MachineInstr *VLMI = MRI->getVRegDef(CommonVL.getReg());
if (!MDT->dominates(VLMI, &MI)) {
LLVM_DEBUG(dbgs() << " Abort due to VL not dominating.\n");
return false;
}
- LLVM_DEBUG(
- dbgs() << " Reduce VL from " << VLOp << " to "
- << printReg(CommonVL->getReg(), MRI->getTargetRegisterInfo())
- << " for " << MI << "\n");
+ LLVM_DEBUG(dbgs() << " Reduce VL from " << VLOp << " to "
+ << printReg(CommonVL.getReg(), MRI->getTargetRegisterInfo())
+ << " for " << MI << "\n");
// All our checks passed. We can reduce VL.
- VLOp.ChangeToRegister(CommonVL->getReg(), false);
+ VLOp.ChangeToRegister(CommonVL.getReg(), false);
+ MRI->constrainRegClass(CommonVL.getReg(), &RISCV::GPRNoX0RegClass);
return true;
}
@@ -1678,18 +1716,13 @@ bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) {
// Then go through and see if we can reduce the VL of any instructions to
// only what's demanded.
bool MadeChange = false;
- for (MachineBasicBlock &MBB : MF) {
- // Avoid unreachable blocks as they have degenerate dominance
- if (!MDT->isReachableFromEntry(&MBB))
+ for (auto &[MI, VL] : DemandedVLs) {
+ assert(MDT->isReachableFromEntry(MI->getParent()));
+ if (!isCandidate(*MI))
continue;
-
- for (auto &MI : reverse(MBB)) {
- if (!isCandidate(MI))
- continue;
- if (!tryReduceVL(MI))
- continue;
- MadeChange = true;
- }
+ if (!tryReduceVL(*const_cast<MachineInstr *>(MI), VL.VL))
+ continue;
+ MadeChange = true;
}
DemandedVLs.clear();
diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
index fdf9a4f..a5385be 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
@@ -73,7 +73,7 @@ private:
bool isAllOnesMask(const MachineInstr *MaskDef) const;
std::optional<unsigned> getConstant(const MachineOperand &VL) const;
bool ensureDominates(const MachineOperand &Use, MachineInstr &Src) const;
- bool isKnownSameDefs(Register A, Register B) const;
+ Register lookThruCopies(Register Reg, bool OneUseOnly = false) const;
};
} // namespace
@@ -387,23 +387,21 @@ bool RISCVVectorPeephole::convertAllOnesVMergeToVMv(MachineInstr &MI) const {
return true;
}
-bool RISCVVectorPeephole::isKnownSameDefs(Register A, Register B) const {
- if (A.isPhysical() || B.isPhysical())
- return false;
-
- auto LookThruVirtRegCopies = [this](Register Reg) {
- while (MachineInstr *Def = MRI->getUniqueVRegDef(Reg)) {
- if (!Def->isFullCopy())
- break;
- Register Src = Def->getOperand(1).getReg();
- if (!Src.isVirtual())
- break;
- Reg = Src;
- }
- return Reg;
- };
-
- return LookThruVirtRegCopies(A) == LookThruVirtRegCopies(B);
+// If \p Reg is defined by one or more COPYs of virtual registers, traverses
+// the chain and returns the root non-COPY source.
+Register RISCVVectorPeephole::lookThruCopies(Register Reg,
+ bool OneUseOnly) const {
+ while (MachineInstr *Def = MRI->getUniqueVRegDef(Reg)) {
+ if (!Def->isFullCopy())
+ break;
+ Register Src = Def->getOperand(1).getReg();
+ if (!Src.isVirtual())
+ break;
+ if (OneUseOnly && !MRI->hasOneNonDBGUse(Reg))
+ break;
+ Reg = Src;
+ }
+ return Reg;
}
/// If a PseudoVMERGE_VVM's true operand is a masked pseudo and both have the
@@ -428,10 +426,11 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) {
if (!TrueMaskedInfo || !hasSameEEW(MI, *True))
return false;
- const MachineOperand &TrueMask =
- True->getOperand(TrueMaskedInfo->MaskOpIdx + True->getNumExplicitDefs());
- const MachineOperand &MIMask = MI.getOperand(4);
- if (!isKnownSameDefs(TrueMask.getReg(), MIMask.getReg()))
+ Register TrueMaskReg = lookThruCopies(
+ True->getOperand(TrueMaskedInfo->MaskOpIdx + True->getNumExplicitDefs())
+ .getReg());
+ Register MIMaskReg = lookThruCopies(MI.getOperand(4).getReg());
+ if (!TrueMaskReg.isVirtual() || TrueMaskReg != MIMaskReg)
return false;
// Masked off lanes past TrueVL will come from False, and converting to vmv
@@ -455,7 +454,7 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) {
True->getOperand(1).setReg(MI.getOperand(2).getReg());
// If True is masked then its passthru needs to be in VRNoV0.
MRI->constrainRegClass(True->getOperand(1).getReg(),
- TII->getRegClass(True->getDesc(), 1, TRI));
+ TII->getRegClass(True->getDesc(), 1));
}
MI.setDesc(TII->get(NewOpc));
@@ -652,11 +651,23 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
if (!hasSameEEW(MI, *Src))
return false;
+ std::optional<std::pair<unsigned, unsigned>> NeedsCommute;
+
// Src needs to have the same passthru as VMV_V_V
MachineOperand &SrcPassthru = Src->getOperand(Src->getNumExplicitDefs());
if (SrcPassthru.getReg().isValid() &&
- SrcPassthru.getReg() != Passthru.getReg())
- return false;
+ SrcPassthru.getReg() != Passthru.getReg()) {
+ // If Src's passthru != Passthru, check if it uses Passthru in another
+ // operand and try to commute it.
+ int OtherIdx = Src->findRegisterUseOperandIdx(Passthru.getReg(), TRI);
+ if (OtherIdx == -1)
+ return false;
+ unsigned OpIdx1 = OtherIdx;
+ unsigned OpIdx2 = Src->getNumExplicitDefs();
+ if (!TII->findCommutedOpIndices(*Src, OpIdx1, OpIdx2))
+ return false;
+ NeedsCommute = {OpIdx1, OpIdx2};
+ }
// Src VL will have already been reduced if legal (see tryToReduceVL),
// so we don't need to handle a smaller source VL here. However, the
@@ -669,13 +680,20 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
if (!ensureDominates(Passthru, *Src))
return false;
+ if (NeedsCommute) {
+ auto [OpIdx1, OpIdx2] = *NeedsCommute;
+ [[maybe_unused]] bool Commuted =
+ TII->commuteInstruction(*Src, /*NewMI=*/false, OpIdx1, OpIdx2);
+ assert(Commuted && "Failed to commute Src?");
+ }
+
if (SrcPassthru.getReg() != Passthru.getReg()) {
SrcPassthru.setReg(Passthru.getReg());
// If Src is masked then its passthru needs to be in VRNoV0.
if (Passthru.getReg().isValid())
MRI->constrainRegClass(
Passthru.getReg(),
- TII->getRegClass(Src->getDesc(), SrcPassthru.getOperandNo(), TRI));
+ TII->getRegClass(Src->getDesc(), SrcPassthru.getOperandNo()));
}
if (RISCVII::hasVecPolicyOp(Src->getDesc().TSFlags)) {
@@ -717,9 +735,10 @@ bool RISCVVectorPeephole::foldVMergeToMask(MachineInstr &MI) const {
if (RISCV::getRVVMCOpcode(MI.getOpcode()) != RISCV::VMERGE_VVM)
return false;
- Register PassthruReg = MI.getOperand(1).getReg();
- Register FalseReg = MI.getOperand(2).getReg();
- Register TrueReg = MI.getOperand(3).getReg();
+ Register PassthruReg = lookThruCopies(MI.getOperand(1).getReg());
+ Register FalseReg = lookThruCopies(MI.getOperand(2).getReg());
+ Register TrueReg =
+ lookThruCopies(MI.getOperand(3).getReg(), /*OneUseOnly=*/true);
if (!TrueReg.isVirtual() || !MRI->hasOneUse(TrueReg))
return false;
MachineInstr &True = *MRI->getUniqueVRegDef(TrueReg);
@@ -740,16 +759,17 @@ bool RISCVVectorPeephole::foldVMergeToMask(MachineInstr &MI) const {
// We require that either passthru and false are the same, or that passthru
// is undefined.
- if (PassthruReg && !isKnownSameDefs(PassthruReg, FalseReg))
+ if (PassthruReg && !(PassthruReg.isVirtual() && PassthruReg == FalseReg))
return false;
std::optional<std::pair<unsigned, unsigned>> NeedsCommute;
// If True has a passthru operand then it needs to be the same as vmerge's
// False, since False will be used for the result's passthru operand.
- Register TruePassthru = True.getOperand(True.getNumExplicitDefs()).getReg();
+ Register TruePassthru =
+ lookThruCopies(True.getOperand(True.getNumExplicitDefs()).getReg());
if (RISCVII::isFirstDefTiedToFirstUse(True.getDesc()) && TruePassthru &&
- !isKnownSameDefs(TruePassthru, FalseReg)) {
+ !(TruePassthru.isVirtual() && TruePassthru == FalseReg)) {
// If True's passthru != False, check if it uses False in another operand
// and try to commute it.
int OtherIdx = True.findRegisterUseOperandIdx(FalseReg, TRI);
@@ -837,6 +857,8 @@ bool RISCVVectorPeephole::foldVMergeToMask(MachineInstr &MI) const {
MRI->constrainRegClass(
MO.getReg(), True.getRegClassConstraint(MO.getOperandNo(), TII, TRI));
}
+ // We should clear the IsKill flag since we have a new use now.
+ MRI->clearKillFlags(FalseReg);
MI.eraseFromParent();
return true;
diff --git a/llvm/lib/Target/RISCV/RISCVZilsdOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVZilsdOptimizer.cpp
new file mode 100644
index 0000000..3b47903
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVZilsdOptimizer.cpp
@@ -0,0 +1,527 @@
+//===-- RISCVZilsdOptimizer.cpp - RISC-V Zilsd Load/Store Optimizer ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that performs load/store optimizations for the
+// RISC-V Zilsd extension. It combines pairs of 32-bit load/store instructions
+// into single 64-bit LD/SD instructions when possible.
+//
+// The pass runs in two phases:
+// 1. Pre-allocation: Reschedules loads/stores to bring consecutive memory
+// accesses closer together and forms LD/SD pairs with register hints.
+// 2. Post-allocation: Fixes invalid LD/SD instructions if register allocation
+// didn't provide suitable consecutive registers.
+//
+// Note: second phase is integrated into RISCVLoadStoreOptimizer
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVInstrInfo.h"
+#include "RISCVRegisterInfo.h"
+#include "RISCVSubtarget.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include <algorithm>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-zilsd-opt"
+
+STATISTIC(NumLDFormed, "Number of LD instructions formed");
+STATISTIC(NumSDFormed, "Number of SD instructions formed");
+
+static cl::opt<bool>
+ DisableZilsdOpt("disable-riscv-zilsd-opt", cl::Hidden, cl::init(false),
+ cl::desc("Disable Zilsd load/store optimization"));
+
+static cl::opt<unsigned> MaxRescheduleDistance(
+ "riscv-zilsd-max-reschedule-distance", cl::Hidden, cl::init(10),
+ cl::desc("Maximum distance for rescheduling load/store instructions"));
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+// Pre-allocation Zilsd optimization pass
+//===----------------------------------------------------------------------===//
+class RISCVPreAllocZilsdOpt : public MachineFunctionPass {
+public:
+ static char ID;
+
+ RISCVPreAllocZilsdOpt() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override {
+ return "RISC-V pre-allocation Zilsd load/store optimization";
+ }
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().setIsSSA();
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<MachineDominatorTreeWrapperPass>();
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ enum class MemoryOffsetKind {
+ Imm = 0,
+ Global = 1,
+ CPI = 2,
+ BlockAddr = 3,
+ Unknown = 4,
+ };
+ using MemOffset = std::pair<MemoryOffsetKind, int>;
+ using BaseRegInfo = std::pair<unsigned, MemoryOffsetKind>;
+
+private:
+ bool isMemoryOp(const MachineInstr &MI);
+ bool rescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
+ bool canFormLdSdPair(MachineInstr *MI0, MachineInstr *MI1);
+ bool rescheduleOps(MachineBasicBlock *MBB,
+ SmallVectorImpl<MachineInstr *> &MIs, BaseRegInfo Base,
+ bool IsLoad,
+ DenseMap<MachineInstr *, unsigned> &MI2LocMap);
+ bool isSafeToMove(MachineInstr *MI, MachineInstr *Target, bool MoveForward);
+ MemOffset getMemoryOpOffset(const MachineInstr &MI);
+
+ const RISCVSubtarget *STI;
+ const RISCVInstrInfo *TII;
+ const RISCVRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+ AliasAnalysis *AA;
+ MachineDominatorTree *DT;
+ Align RequiredAlign;
+};
+
+} // end anonymous namespace
+
+char RISCVPreAllocZilsdOpt::ID = 0;
+
+INITIALIZE_PASS_BEGIN(RISCVPreAllocZilsdOpt, "riscv-prera-zilsd-opt",
+ "RISC-V pre-allocation Zilsd optimization", false, false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
+INITIALIZE_PASS_END(RISCVPreAllocZilsdOpt, "riscv-prera-zilsd-opt",
+ "RISC-V pre-allocation Zilsd optimization", false, false)
+
+//===----------------------------------------------------------------------===//
+// Pre-allocation pass implementation
+//===----------------------------------------------------------------------===//
+
+bool RISCVPreAllocZilsdOpt::runOnMachineFunction(MachineFunction &MF) {
+
+ if (DisableZilsdOpt || skipFunction(MF.getFunction()))
+ return false;
+
+ STI = &MF.getSubtarget<RISCVSubtarget>();
+
+ // Only run on RV32 with Zilsd extension
+ if (STI->is64Bit() || !STI->hasStdExtZilsd())
+ return false;
+
+ TII = STI->getInstrInfo();
+ TRI = STI->getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
+
+ // Check alignment: default is 8-byte, but allow 4-byte with tune feature
+ // If unaligned scalar memory is enabled, allow any alignment
+ RequiredAlign = STI->getZilsdAlign();
+ bool Modified = false;
+ for (auto &MBB : MF) {
+ Modified |= rescheduleLoadStoreInstrs(&MBB);
+ }
+
+ return Modified;
+}
+
+RISCVPreAllocZilsdOpt::MemOffset
+RISCVPreAllocZilsdOpt::getMemoryOpOffset(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case RISCV::LW:
+ case RISCV::SW: {
+ // For LW/SW, the offset is in operand 2
+ const MachineOperand &OffsetOp = MI.getOperand(2);
+
+ // Handle immediate offset
+ if (OffsetOp.isImm())
+ return std::make_pair(MemoryOffsetKind::Imm, OffsetOp.getImm());
+
+ // Handle symbolic operands with MO_LO flag (from MergeBaseOffset)
+ if (OffsetOp.getTargetFlags() & RISCVII::MO_LO) {
+ if (OffsetOp.isGlobal())
+ return std::make_pair(MemoryOffsetKind::Global, OffsetOp.getOffset());
+ if (OffsetOp.isCPI())
+ return std::make_pair(MemoryOffsetKind::CPI, OffsetOp.getOffset());
+ if (OffsetOp.isBlockAddress())
+ return std::make_pair(MemoryOffsetKind::BlockAddr,
+ OffsetOp.getOffset());
+ }
+
+ break;
+ }
+ default:
+ break;
+ }
+
+ return std::make_pair(MemoryOffsetKind::Unknown, 0);
+}
+
+bool RISCVPreAllocZilsdOpt::canFormLdSdPair(MachineInstr *MI0,
+ MachineInstr *MI1) {
+ if (!MI0->hasOneMemOperand() || !MI1->hasOneMemOperand())
+ return false;
+
+ // Get offsets and check they are consecutive
+ int Offset0 = getMemoryOpOffset(*MI0).second;
+ int Offset1 = getMemoryOpOffset(*MI1).second;
+
+ // Offsets must be 4 bytes apart
+ if (Offset1 - Offset0 != 4)
+ return false;
+
+ // We need to guarantee the alignment(base + offset) is legal.
+ const MachineMemOperand *MMO = *MI0->memoperands_begin();
+ if (MMO->getAlign() < RequiredAlign)
+ return false;
+
+ // Check that the two destination/source registers are different for
+ // load/store respectively.
+ Register FirstReg = MI0->getOperand(0).getReg();
+ Register SecondReg = MI1->getOperand(0).getReg();
+ if (FirstReg == SecondReg)
+ return false;
+
+ return true;
+}
+
+bool RISCVPreAllocZilsdOpt::isSafeToMove(MachineInstr *MI, MachineInstr *Target,
+ bool MoveForward) {
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineBasicBlock::iterator Start = MI->getIterator();
+ MachineBasicBlock::iterator End = Target->getIterator();
+
+ if (!MoveForward)
+ std::swap(Start, End);
+
+ // Increment Start to skip the current instruction
+ if (Start != MBB->end())
+ ++Start;
+
+ Register DefReg = MI->getOperand(0).getReg();
+ Register BaseReg = MI->getOperand(1).getReg();
+
+ unsigned ScanCount = 0;
+ for (auto It = Start; It != End; ++It, ++ScanCount) {
+ // Don't move across calls or terminators
+ if (It->isCall() || It->isTerminator()) {
+ LLVM_DEBUG(dbgs() << "Cannot move across call/terminator: " << *It);
+ return false;
+ }
+
+ // Don't move across instructions that modify memory barrier
+ if (It->hasUnmodeledSideEffects()) {
+ LLVM_DEBUG(dbgs() << "Cannot move across instruction with side effects: "
+ << *It);
+ return false;
+ }
+
+ // Check if the base register is modified
+ if (It->modifiesRegister(BaseReg, TRI)) {
+ LLVM_DEBUG(dbgs() << "Base register " << BaseReg
+ << " modified by: " << *It);
+ return false;
+ }
+
+ // For loads, check if the loaded value is used
+ if (MI->mayLoad() &&
+ (It->readsRegister(DefReg, TRI) || It->modifiesRegister(DefReg, TRI))) {
+ LLVM_DEBUG(dbgs() << "Destination register " << DefReg
+ << " used by: " << *It);
+ return false;
+ }
+
+ // For stores, check if the stored register is modified
+ if (MI->mayStore() && It->modifiesRegister(DefReg, TRI)) {
+ LLVM_DEBUG(dbgs() << "Source register " << DefReg
+ << " modified by: " << *It);
+ return false;
+ }
+
+ // Check for memory operation interference
+ if (It->mayLoadOrStore() && It->mayAlias(AA, *MI, /*UseTBAA*/ false)) {
+ LLVM_DEBUG(dbgs() << "Memory operation interference detected\n");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool RISCVPreAllocZilsdOpt::rescheduleOps(
+ MachineBasicBlock *MBB, SmallVectorImpl<MachineInstr *> &MIs,
+ BaseRegInfo Base, bool IsLoad,
+ DenseMap<MachineInstr *, unsigned> &MI2LocMap) {
+ // Sort by offset, at this point it ensure base reg and MemoryOffsetKind are
+ // same, so we just need to simply sort by offset value.
+ llvm::sort(MIs.begin(), MIs.end(), [this](MachineInstr *A, MachineInstr *B) {
+ return getMemoryOpOffset(*A).second < getMemoryOpOffset(*B).second;
+ });
+
+ bool Modified = false;
+
+ // Try to pair consecutive operations
+ for (size_t i = 0; i + 1 < MIs.size(); i++) {
+ MachineInstr *MI0 = MIs[i];
+ MachineInstr *MI1 = MIs[i + 1];
+
+ Register FirstReg = MI0->getOperand(0).getReg();
+ Register SecondReg = MI1->getOperand(0).getReg();
+ Register BaseReg = MI0->getOperand(1).getReg();
+ const MachineOperand &OffsetOp = MI0->getOperand(2);
+
+ // At this point, MI0 and MI1 are:
+ // 1. both either LW or SW.
+ // 2. guaranteed to have same memory kind.
+ // 3. guaranteed to have same base register.
+ // 4. already be sorted by offset value.
+ // so we don't have to check these in canFormLdSdPair.
+ if (!canFormLdSdPair(MI0, MI1))
+ continue;
+
+ // Use MI2LocMap to determine which instruction appears later in program
+ // order
+ bool MI1IsLater = MI2LocMap[MI1] > MI2LocMap[MI0];
+
+ // For loads: move later instruction up (backwards) to earlier instruction
+ // For stores: move earlier instruction down (forwards) to later instruction
+ MachineInstr *MoveInstr, *TargetInstr;
+ if (IsLoad) {
+ // For loads: move the later instruction to the earlier one
+ MoveInstr = MI1IsLater ? MI1 : MI0;
+ TargetInstr = MI1IsLater ? MI0 : MI1;
+ } else {
+ // For stores: move the earlier instruction to the later one
+ MoveInstr = MI1IsLater ? MI0 : MI1;
+ TargetInstr = MI1IsLater ? MI1 : MI0;
+ }
+
+ unsigned Distance = MI1IsLater ? MI2LocMap[MI1] - MI2LocMap[MI0]
+ : MI2LocMap[MI0] - MI2LocMap[MI1];
+ if (!isSafeToMove(MoveInstr, TargetInstr, !IsLoad) ||
+ Distance > MaxRescheduleDistance)
+ continue;
+
+ // Move the instruction to the target position
+ MachineBasicBlock::iterator InsertPos = TargetInstr->getIterator();
+ ++InsertPos;
+
+ // If we need to move an instruction, do it now
+ if (MoveInstr != TargetInstr)
+ MBB->splice(InsertPos, MBB, MoveInstr->getIterator());
+
+ // Create the paired instruction
+ MachineInstrBuilder MIB;
+ DebugLoc DL = MI0->getDebugLoc();
+
+ if (IsLoad) {
+ MIB = BuildMI(*MBB, InsertPos, DL, TII->get(RISCV::PseudoLD_RV32_OPT))
+ .addReg(FirstReg, RegState::Define)
+ .addReg(SecondReg, RegState::Define)
+ .addReg(BaseReg)
+ .add(OffsetOp);
+ ++NumLDFormed;
+ LLVM_DEBUG(dbgs() << "Formed LD: " << *MIB << "\n");
+ } else {
+ MIB = BuildMI(*MBB, InsertPos, DL, TII->get(RISCV::PseudoSD_RV32_OPT))
+ .addReg(FirstReg)
+ .addReg(SecondReg)
+ .addReg(BaseReg)
+ .add(OffsetOp);
+ ++NumSDFormed;
+ LLVM_DEBUG(dbgs() << "Formed SD: " << *MIB << "\n");
+ }
+
+ // Copy memory operands
+ MIB.cloneMergedMemRefs({MI0, MI1});
+
+ // Add register allocation hints for consecutive registers
+ // RISC-V Zilsd requires even/odd register pairs
+ // Only set hints for virtual registers (physical registers already have
+ // encoding)
+ if (FirstReg.isVirtual() && SecondReg.isVirtual()) {
+ // For virtual registers, we can't determine even/odd yet, but we can hint
+ // that they should be allocated as a consecutive pair
+ MRI->setRegAllocationHint(FirstReg, RISCVRI::RegPairEven, SecondReg);
+ MRI->setRegAllocationHint(SecondReg, RISCVRI::RegPairOdd, FirstReg);
+ }
+
+ // Remove the original instructions
+ MI0->eraseFromParent();
+ MI1->eraseFromParent();
+
+ Modified = true;
+
+ // Skip the next instruction since we've already processed it
+ i++;
+ }
+
+ return Modified;
+}
+
+bool RISCVPreAllocZilsdOpt::isMemoryOp(const MachineInstr &MI) {
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode != RISCV::LW && Opcode != RISCV::SW)
+ return false;
+
+ if (!MI.getOperand(1).isReg())
+ return false;
+
+ // When no memory operands are present, conservatively assume unaligned,
+ // volatile, unfoldable.
+ if (!MI.hasOneMemOperand())
+ return false;
+
+ const MachineMemOperand *MMO = *MI.memoperands_begin();
+
+ if (MMO->isVolatile() || MMO->isAtomic())
+ return false;
+
+ // sw <undef> could probably be eliminated entirely, but for now we just want
+ // to avoid making a mess of it.
+ if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef())
+ return false;
+
+ // Likewise don't mess with references to undefined addresses.
+ if (MI.getOperand(1).isUndef())
+ return false;
+
+ return true;
+}
+
+bool RISCVPreAllocZilsdOpt::rescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
+ bool Modified = false;
+
+ // Process the basic block in windows delimited by calls, terminators,
+ // or instructions with duplicate base+offset pairs
+ MachineBasicBlock::iterator MBBI = MBB->begin();
+ MachineBasicBlock::iterator E = MBB->end();
+
+ while (MBBI != E) {
+ // Map from instruction to its location in the current window
+ DenseMap<MachineInstr *, unsigned> MI2LocMap;
+
+ // Map from base register to list of load/store instructions
+ using Base2InstMap = DenseMap<BaseRegInfo, SmallVector<MachineInstr *, 4>>;
+ using BaseVec = SmallVector<BaseRegInfo, 4>;
+ Base2InstMap Base2LdsMap;
+ Base2InstMap Base2StsMap;
+ BaseVec LdBases;
+ BaseVec StBases;
+
+ unsigned Loc = 0;
+
+ // Build the current window of instructions
+ for (; MBBI != E; ++MBBI) {
+ MachineInstr &MI = *MBBI;
+
+ // Stop at barriers (calls and terminators)
+ if (MI.isCall() || MI.isTerminator()) {
+ // Move past the barrier for next iteration
+ ++MBBI;
+ break;
+ }
+
+ // Track instruction location in window
+ if (!MI.isDebugInstr())
+ MI2LocMap[&MI] = ++Loc;
+
+ MemOffset Offset = getMemoryOpOffset(MI);
+ // Skip non-memory operations or it's not a valid memory offset kind.
+ if (!isMemoryOp(MI) || Offset.first == MemoryOffsetKind::Unknown)
+ continue;
+
+ bool IsLd = (MI.getOpcode() == RISCV::LW);
+ Register Base = MI.getOperand(1).getReg();
+ bool StopHere = false;
+
+ // Lambda to find or add base register entries
+ auto FindBases = [&](Base2InstMap &Base2Ops, BaseVec &Bases) {
+ auto [BI, Inserted] = Base2Ops.try_emplace({Base.id(), Offset.first});
+ if (Inserted) {
+ // First time seeing this base register
+ BI->second.push_back(&MI);
+ Bases.push_back({Base.id(), Offset.first});
+ return;
+ }
+ // Check if we've seen this exact base+offset before
+ if (any_of(BI->second, [&](const MachineInstr *PrevMI) {
+ return Offset == getMemoryOpOffset(*PrevMI);
+ })) {
+ // Found duplicate base+offset - stop here to process current window
+ StopHere = true;
+ } else {
+ BI->second.push_back(&MI);
+ }
+ };
+
+ if (IsLd)
+ FindBases(Base2LdsMap, LdBases);
+ else
+ FindBases(Base2StsMap, StBases);
+
+ if (StopHere) {
+ // Found a duplicate (a base+offset combination that's seen earlier).
+ // Backtrack to process the current window.
+ --Loc;
+ break;
+ }
+ }
+
+ // Process the current window - reschedule loads
+ for (auto Base : LdBases) {
+ SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
+ if (Lds.size() > 1) {
+ Modified |= rescheduleOps(MBB, Lds, Base, true, MI2LocMap);
+ }
+ }
+
+ // Process the current window - reschedule stores
+ for (auto Base : StBases) {
+ SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
+ if (Sts.size() > 1) {
+ Modified |= rescheduleOps(MBB, Sts, Base, false, MI2LocMap);
+ }
+ }
+ }
+
+ return Modified;
+}
+
+//===----------------------------------------------------------------------===//
+// Pass creation functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createRISCVPreAllocZilsdOptPass() {
+ return new RISCVPreAllocZilsdOpt();
+}