aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/RISCV
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/RISCV')
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp57
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp56
-rw-r--r--llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp4
-rw-r--r--llvm/lib/Target/RISCV/RISCVFeatures.td26
-rw-r--r--llvm/lib/Target/RISCV/RISCVFrameLowering.cpp109
-rw-r--r--llvm/lib/Target/RISCV/RISCVFrameLowering.h3
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp510
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.h61
-rw-r--r--llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp24
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp85
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td346
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td24
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td30
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td48
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td10
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp89
-rw-r--r--llvm/lib/Target/RISCV/RISCVProcessors.td33
-rw-r--r--llvm/lib/Target/RISCV/RISCVRegisterInfo.td6
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedRocket.td1
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedSiFive7.td108
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td1
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td81
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td1
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td1
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedule.td1
-rw-r--r--llvm/lib/Target/RISCV/RISCVScheduleV.td84
-rw-r--r--llvm/lib/Target/RISCV/RISCVScheduleXSf.td59
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp17
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h4
30 files changed, 1341 insertions, 540 deletions
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
index 45e19cd..c18892a 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
@@ -34,14 +34,15 @@ private:
// Whether this is assigning args for a return.
bool IsRet;
- // true if assignArg has been called for a mask argument, false otherwise.
- bool AssignedFirstMaskArg = false;
+ RVVArgDispatcher &RVVDispatcher;
public:
RISCVOutgoingValueAssigner(
- RISCVTargetLowering::RISCVCCAssignFn *RISCVAssignFn_, bool IsRet)
+ RISCVTargetLowering::RISCVCCAssignFn *RISCVAssignFn_, bool IsRet,
+ RVVArgDispatcher &RVVDispatcher)
: CallLowering::OutgoingValueAssigner(nullptr),
- RISCVAssignFn(RISCVAssignFn_), IsRet(IsRet) {}
+ RISCVAssignFn(RISCVAssignFn_), IsRet(IsRet),
+ RVVDispatcher(RVVDispatcher) {}
bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
@@ -51,16 +52,9 @@ public:
const DataLayout &DL = MF.getDataLayout();
const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
- std::optional<unsigned> FirstMaskArgument;
- if (Subtarget.hasVInstructions() && !AssignedFirstMaskArg &&
- ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1) {
- FirstMaskArgument = ValNo;
- AssignedFirstMaskArg = true;
- }
-
if (RISCVAssignFn(DL, Subtarget.getTargetABI(), ValNo, ValVT, LocVT,
LocInfo, Flags, State, Info.IsFixed, IsRet, Info.Ty,
- *Subtarget.getTargetLowering(), FirstMaskArgument))
+ *Subtarget.getTargetLowering(), RVVDispatcher))
return true;
StackSize = State.getStackSize();
@@ -181,14 +175,15 @@ private:
// Whether this is assigning args from a return.
bool IsRet;
- // true if assignArg has been called for a mask argument, false otherwise.
- bool AssignedFirstMaskArg = false;
+ RVVArgDispatcher &RVVDispatcher;
public:
RISCVIncomingValueAssigner(
- RISCVTargetLowering::RISCVCCAssignFn *RISCVAssignFn_, bool IsRet)
+ RISCVTargetLowering::RISCVCCAssignFn *RISCVAssignFn_, bool IsRet,
+ RVVArgDispatcher &RVVDispatcher)
: CallLowering::IncomingValueAssigner(nullptr),
- RISCVAssignFn(RISCVAssignFn_), IsRet(IsRet) {}
+ RISCVAssignFn(RISCVAssignFn_), IsRet(IsRet),
+ RVVDispatcher(RVVDispatcher) {}
bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
@@ -201,16 +196,9 @@ public:
if (LocVT.isScalableVector())
MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
- std::optional<unsigned> FirstMaskArgument;
- if (Subtarget.hasVInstructions() && !AssignedFirstMaskArg &&
- ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1) {
- FirstMaskArgument = ValNo;
- AssignedFirstMaskArg = true;
- }
-
if (RISCVAssignFn(DL, Subtarget.getTargetABI(), ValNo, ValVT, LocVT,
LocInfo, Flags, State, /*IsFixed=*/true, IsRet, Info.Ty,
- *Subtarget.getTargetLowering(), FirstMaskArgument))
+ *Subtarget.getTargetLowering(), RVVDispatcher))
return true;
StackSize = State.getStackSize();
@@ -420,9 +408,11 @@ bool RISCVCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
SmallVector<ArgInfo, 4> SplitRetInfos;
splitToValueTypes(OrigRetInfo, SplitRetInfos, DL, CC);
+ RVVArgDispatcher Dispatcher{&MF, getTLI<RISCVTargetLowering>(),
+ ArrayRef(F.getReturnType())};
RISCVOutgoingValueAssigner Assigner(
CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV,
- /*IsRet=*/true);
+ /*IsRet=*/true, Dispatcher);
RISCVOutgoingValueHandler Handler(MIRBuilder, MF.getRegInfo(), Ret);
return determineAndHandleAssignments(Handler, Assigner, SplitRetInfos,
MIRBuilder, CC, F.isVarArg());
@@ -531,6 +521,7 @@ bool RISCVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
CallingConv::ID CC = F.getCallingConv();
SmallVector<ArgInfo, 32> SplitArgInfos;
+ SmallVector<Type *, 4> TypeList;
unsigned Index = 0;
for (auto &Arg : F.args()) {
// Construct the ArgInfo object from destination register and argument type.
@@ -542,12 +533,16 @@ bool RISCVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
// correspondingly and appended to SplitArgInfos.
splitToValueTypes(AInfo, SplitArgInfos, DL, CC);
+ TypeList.push_back(Arg.getType());
+
++Index;
}
+ RVVArgDispatcher Dispatcher{&MF, getTLI<RISCVTargetLowering>(),
+ ArrayRef(TypeList)};
RISCVIncomingValueAssigner Assigner(
CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV,
- /*IsRet=*/false);
+ /*IsRet=*/false, Dispatcher);
RISCVFormalArgHandler Handler(MIRBuilder, MF.getRegInfo());
SmallVector<CCValAssign, 16> ArgLocs;
@@ -585,11 +580,13 @@ bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
SmallVector<ArgInfo, 32> SplitArgInfos;
SmallVector<ISD::OutputArg, 8> Outs;
+ SmallVector<Type *, 4> TypeList;
for (auto &AInfo : Info.OrigArgs) {
// Handle any required unmerging of split value types from a given VReg into
// physical registers. ArgInfo objects are constructed correspondingly and
// appended to SplitArgInfos.
splitToValueTypes(AInfo, SplitArgInfos, DL, CC);
+ TypeList.push_back(AInfo.Ty);
}
// TODO: Support tail calls.
@@ -607,9 +604,11 @@ bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
Call.addRegMask(TRI->getCallPreservedMask(MF, Info.CallConv));
+ RVVArgDispatcher ArgDispatcher{&MF, getTLI<RISCVTargetLowering>(),
+ ArrayRef(TypeList)};
RISCVOutgoingValueAssigner ArgAssigner(
CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV,
- /*IsRet=*/false);
+ /*IsRet=*/false, ArgDispatcher);
RISCVOutgoingValueHandler ArgHandler(MIRBuilder, MF.getRegInfo(), Call);
if (!determineAndHandleAssignments(ArgHandler, ArgAssigner, SplitArgInfos,
MIRBuilder, CC, Info.IsVarArg))
@@ -637,9 +636,11 @@ bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
SmallVector<ArgInfo, 4> SplitRetInfos;
splitToValueTypes(Info.OrigRet, SplitRetInfos, DL, CC);
+ RVVArgDispatcher RetDispatcher{&MF, getTLI<RISCVTargetLowering>(),
+ ArrayRef(F.getReturnType())};
RISCVIncomingValueAssigner RetAssigner(
CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV,
- /*IsRet=*/true);
+ /*IsRet=*/true, RetDispatcher);
RISCVCallReturnHandler RetHandler(MIRBuilder, MF.getRegInfo(), Call);
if (!determineAndHandleAssignments(RetHandler, RetAssigner, SplitRetInfos,
MIRBuilder, CC, Info.IsVarArg))
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
index 86e4434..c1fde73 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
@@ -110,6 +110,8 @@ RISCVRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
LLT Ty) const {
switch (RC.getID()) {
default:
+ if (RISCVRI::isVRegClass(RC.TSFlags))
+ return getRegBank(RISCV::VRBRegBankID);
llvm_unreachable("Register class not supported");
case RISCV::GPRRegClassID:
case RISCV::GPRF16RegClassID:
@@ -131,20 +133,6 @@ RISCVRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
case RISCV::FPR64CRegClassID:
case RISCV::FPR32CRegClassID:
return getRegBank(RISCV::FPRBRegBankID);
- case RISCV::VMRegClassID:
- case RISCV::VRRegClassID:
- case RISCV::VRNoV0RegClassID:
- case RISCV::VRM2RegClassID:
- case RISCV::VRM2NoV0RegClassID:
- case RISCV::VRM4RegClassID:
- case RISCV::VRM4NoV0RegClassID:
- case RISCV::VMV0RegClassID:
- case RISCV::VRM2_with_sub_vrm1_0_in_VMV0RegClassID:
- case RISCV::VRM4_with_sub_vrm1_0_in_VMV0RegClassID:
- case RISCV::VRM8RegClassID:
- case RISCV::VRM8NoV0RegClassID:
- case RISCV::VRM8_with_sub_vrm1_0_in_VMV0RegClassID:
- return getRegBank(RISCV::VRBRegBankID);
}
}
@@ -154,46 +142,6 @@ static const RegisterBankInfo::ValueMapping *getFPValueMapping(unsigned Size) {
return &RISCV::ValueMappings[Idx];
}
-/// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode,
-/// having only floating-point operands.
-/// FIXME: this is copied from target AArch64. Needs some code refactor here to
-/// put this function in GlobalISel/Utils.cpp.
-static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
- switch (Opc) {
- case TargetOpcode::G_FADD:
- case TargetOpcode::G_FSUB:
- case TargetOpcode::G_FMUL:
- case TargetOpcode::G_FMA:
- case TargetOpcode::G_FDIV:
- case TargetOpcode::G_FCONSTANT:
- case TargetOpcode::G_FPEXT:
- case TargetOpcode::G_FPTRUNC:
- case TargetOpcode::G_FCEIL:
- case TargetOpcode::G_FFLOOR:
- case TargetOpcode::G_FNEARBYINT:
- case TargetOpcode::G_FNEG:
- case TargetOpcode::G_FCOPYSIGN:
- case TargetOpcode::G_FCOS:
- case TargetOpcode::G_FSIN:
- case TargetOpcode::G_FLOG10:
- case TargetOpcode::G_FLOG:
- case TargetOpcode::G_FLOG2:
- case TargetOpcode::G_FSQRT:
- case TargetOpcode::G_FABS:
- case TargetOpcode::G_FEXP:
- case TargetOpcode::G_FRINT:
- case TargetOpcode::G_INTRINSIC_TRUNC:
- case TargetOpcode::G_INTRINSIC_ROUND:
- case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
- case TargetOpcode::G_FMAXNUM:
- case TargetOpcode::G_FMINNUM:
- case TargetOpcode::G_FMAXIMUM:
- case TargetOpcode::G_FMINIMUM:
- return true;
- }
- return false;
-}
-
// TODO: Make this more like AArch64?
bool RISCVRegisterBankInfo::hasFPConstraints(
const MachineInstr &MI, const MachineRegisterInfo &MRI,
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 173995f..d93709a 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -326,8 +326,8 @@ bool RISCVExpandPseudo::expandRV32ZdinxStore(MachineBasicBlock &MBB,
.setMemRefs(MMOLo);
if (MBBI->getOperand(2).isGlobal() || MBBI->getOperand(2).isCPI()) {
- // FIXME: Zdinx RV32 can not work on unaligned memory.
- assert(!STI->hasFastUnalignedAccess());
+ // FIXME: Zdinx RV32 can not work on unaligned scalar memory.
+ assert(!STI->enableUnalignedScalarMem());
assert(MBBI->getOperand(2).getOffset() % 8 == 0);
MBBI->getOperand(2).setOffset(MBBI->getOperand(2).getOffset() + 4);
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 794455a..f830ead 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -208,6 +208,13 @@ def HasStdExtAOrZalrsc
"'A' (Atomic Instructions) or "
"'Zalrsc' (Load-Reserved/Store-Conditional)">;
+def FeatureStdExtZama16b
+ : SubtargetFeature<"zama16b", "HasStdExtZama16b", "true",
+ "'Zama16b' (Atomic 16-byte misaligned loads, stores and AMOs)">;
+def HasStdExtZama16b : Predicate<"Subtarget->hasStdExtZama16b()">,
+ AssemblerPredicate<(all_of FeatureStdExtZama16b),
+ "'Zama16b' (Atomic 16-byte misaligned loads, stores and AMOs)">;
+
def FeatureStdExtZawrs : SubtargetFeature<"zawrs", "HasStdExtZawrs", "true",
"'Zawrs' (Wait on Reservation Set)">;
def HasStdExtZawrs : Predicate<"Subtarget->hasStdExtZawrs()">,
@@ -1183,10 +1190,15 @@ def FeatureTrailingSeqCstFence : SubtargetFeature<"seq-cst-trailing-fence",
"true",
"Enable trailing fence for seq-cst store.">;
-def FeatureFastUnalignedAccess
- : SubtargetFeature<"fast-unaligned-access", "HasFastUnalignedAccess",
- "true", "Has reasonably performant unaligned "
- "loads and stores (both scalar and vector)">;
+def FeatureUnalignedScalarMem
+ : SubtargetFeature<"unaligned-scalar-mem", "EnableUnalignedScalarMem",
+ "true", "Has reasonably performant unaligned scalar "
+ "loads and stores">;
+
+def FeatureUnalignedVectorMem
+ : SubtargetFeature<"unaligned-vector-mem", "EnableUnalignedVectorMem",
+ "true", "Has reasonably performant unaligned vector "
+ "loads and stores">;
def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
"UsePostRAScheduler", "true", "Schedule again after register allocation">;
@@ -1226,9 +1238,9 @@ def TuneNoSinkSplatOperands
"false", "Disable sink splat operands to enable .vx, .vf,"
".wx, and .wf instructions">;
-def TuneNoStripWSuffix
- : SubtargetFeature<"no-strip-w-suffix", "EnableStripWSuffix", "false",
- "Disable strip W suffix">;
+def TunePreferWInst
+ : SubtargetFeature<"prefer-w-inst", "PreferWInst", "true",
+ "Prefer instructions with W suffix">;
def TuneConditionalCompressedMoveFusion
: SubtargetFeature<"conditional-cmv-fusion", "HasConditionalCompressedMoveFusion",
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 71672ed..cb41577 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -435,6 +435,33 @@ void RISCVFrameLowering::adjustStackForRVV(MachineFunction &MF,
Flag, getStackAlign());
}
+static void appendScalableVectorExpression(const TargetRegisterInfo &TRI,
+ SmallVectorImpl<char> &Expr,
+ int FixedOffset, int ScalableOffset,
+ llvm::raw_string_ostream &Comment) {
+ unsigned DwarfVLenB = TRI.getDwarfRegNum(RISCV::VLENB, true);
+ uint8_t Buffer[16];
+ if (FixedOffset) {
+ Expr.push_back(dwarf::DW_OP_consts);
+ Expr.append(Buffer, Buffer + encodeSLEB128(FixedOffset, Buffer));
+ Expr.push_back((uint8_t)dwarf::DW_OP_plus);
+ Comment << (FixedOffset < 0 ? " - " : " + ") << std::abs(FixedOffset);
+ }
+
+ Expr.push_back((uint8_t)dwarf::DW_OP_consts);
+ Expr.append(Buffer, Buffer + encodeSLEB128(ScalableOffset, Buffer));
+
+ Expr.push_back((uint8_t)dwarf::DW_OP_bregx);
+ Expr.append(Buffer, Buffer + encodeULEB128(DwarfVLenB, Buffer));
+ Expr.push_back(0);
+
+ Expr.push_back((uint8_t)dwarf::DW_OP_mul);
+ Expr.push_back((uint8_t)dwarf::DW_OP_plus);
+
+ Comment << (ScalableOffset < 0 ? " - " : " + ") << std::abs(ScalableOffset)
+ << " * vlenb";
+}
+
static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI,
Register Reg,
uint64_t FixedOffset,
@@ -452,30 +479,38 @@ static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI,
else
Comment << printReg(Reg, &TRI);
- uint8_t buffer[16];
- if (FixedOffset) {
- Expr.push_back(dwarf::DW_OP_consts);
- Expr.append(buffer, buffer + encodeSLEB128(FixedOffset, buffer));
- Expr.push_back((uint8_t)dwarf::DW_OP_plus);
- Comment << " + " << FixedOffset;
- }
+ appendScalableVectorExpression(TRI, Expr, FixedOffset, ScalableOffset,
+ Comment);
- Expr.push_back((uint8_t)dwarf::DW_OP_consts);
- Expr.append(buffer, buffer + encodeSLEB128(ScalableOffset, buffer));
+ SmallString<64> DefCfaExpr;
+ uint8_t Buffer[16];
+ DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
+ DefCfaExpr.append(Buffer, Buffer + encodeULEB128(Expr.size(), Buffer));
+ DefCfaExpr.append(Expr.str());
- unsigned DwarfVlenb = TRI.getDwarfRegNum(RISCV::VLENB, true);
- Expr.push_back((uint8_t)dwarf::DW_OP_bregx);
- Expr.append(buffer, buffer + encodeULEB128(DwarfVlenb, buffer));
- Expr.push_back(0);
+ return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
+ Comment.str());
+}
- Expr.push_back((uint8_t)dwarf::DW_OP_mul);
- Expr.push_back((uint8_t)dwarf::DW_OP_plus);
+static MCCFIInstruction createDefCFAOffset(const TargetRegisterInfo &TRI,
+ Register Reg, uint64_t FixedOffset,
+ uint64_t ScalableOffset) {
+ assert(ScalableOffset != 0 && "Did not need to adjust CFA for RVV");
+ SmallString<64> Expr;
+ std::string CommentBuffer;
+ llvm::raw_string_ostream Comment(CommentBuffer);
+ Comment << printReg(Reg, &TRI) << " @ cfa";
- Comment << " + " << ScalableOffset << " * vlenb";
+ // Build up the expression (FixedOffset + ScalableOffset * VLENB).
+ appendScalableVectorExpression(TRI, Expr, FixedOffset, ScalableOffset,
+ Comment);
SmallString<64> DefCfaExpr;
- DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
- DefCfaExpr.append(buffer, buffer + encodeULEB128(Expr.size(), buffer));
+ uint8_t Buffer[16];
+ unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
+ DefCfaExpr.push_back(dwarf::DW_CFA_expression);
+ DefCfaExpr.append(Buffer, Buffer + encodeULEB128(DwarfReg, Buffer));
+ DefCfaExpr.append(Buffer, Buffer + encodeULEB128(Expr.size(), Buffer));
DefCfaExpr.append(Expr.str());
return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
@@ -671,6 +706,9 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
.addCFIIndex(CFIIndex)
.setMIFlag(MachineInstr::FrameSetup);
}
+
+ std::advance(MBBI, getRVVCalleeSavedInfo(MF, CSI).size());
+ emitCalleeSavedRVVPrologCFI(MBB, MBBI, hasFP(MF));
}
if (hasFP(MF)) {
@@ -1492,6 +1530,41 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters(
return true;
}
+void RISCVFrameLowering::emitCalleeSavedRVVPrologCFI(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, bool HasFP) const {
+ MachineFunction *MF = MBB.getParent();
+ const MachineFrameInfo &MFI = MF->getFrameInfo();
+ RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
+ DebugLoc DL = MBB.findDebugLoc(MI);
+
+ const auto &RVVCSI = getRVVCalleeSavedInfo(*MF, MFI.getCalleeSavedInfo());
+ if (RVVCSI.empty())
+ return;
+
+ uint64_t FixedSize = getStackSizeWithRVVPadding(*MF);
+ if (!HasFP) {
+ uint64_t ScalarLocalVarSize =
+ MFI.getStackSize() - RVFI->getCalleeSavedStackSize() -
+ RVFI->getRVPushStackSize() - RVFI->getVarArgsSaveSize() +
+ RVFI->getRVVPadding();
+ FixedSize -= ScalarLocalVarSize;
+ }
+
+ for (auto &CS : RVVCSI) {
+ // Insert the spill to the stack frame.
+ int FI = CS.getFrameIdx();
+ if (FI >= 0 && MFI.getStackID(FI) == TargetStackID::ScalableVector) {
+ unsigned CFIIndex = MF->addFrameInst(
+ createDefCFAOffset(*STI.getRegisterInfo(), CS.getReg(), -FixedSize,
+ MFI.getObjectOffset(FI) / 8));
+ BuildMI(MBB, MI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ }
+}
+
bool RISCVFrameLowering::restoreCalleeSavedRegisters(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
index 210f8c1..28ab4af 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
@@ -88,6 +88,9 @@ private:
void adjustStackForRVV(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
int64_t Amount, MachineInstr::MIFlag Flag) const;
+ void emitCalleeSavedRVVPrologCFI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ bool HasFP) const;
std::pair<int64_t, Align>
assignRVVStackObjectOffsets(MachineFunction &MF) const;
};
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 5a57200..b0deb1d 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -1484,6 +1485,11 @@ bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
return VF > MaxVF || !isPowerOf2_32(VF);
}
+bool RISCVTargetLowering::shouldExpandCttzElements(EVT VT) const {
+ return !Subtarget.hasVInstructions() ||
+ VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
+}
+
bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
MachineFunction &MF,
@@ -1918,7 +1924,7 @@ bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
// replace. If we don't support unaligned scalar mem, prefer the constant
// pool.
// TODO: Can the caller pass down the alignment?
- if (!Subtarget.hasFastUnalignedAccess())
+ if (!Subtarget.enableUnalignedScalarMem())
return true;
// Prefer to keep the load if it would require many instructions.
@@ -8718,6 +8724,29 @@ static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
}
+static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ SDValue Op0 = N->getOperand(1);
+ MVT OpVT = Op0.getSimpleValueType();
+ MVT ContainerVT = OpVT;
+ if (OpVT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
+ Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
+ }
+ MVT XLenVT = Subtarget.getXLenVT();
+ SDLoc DL(N);
+ auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
+ SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
+ if (isOneConstant(N->getOperand(2)))
+ return Res;
+
+ // Convert -1 to VL.
+ SDValue Setcc =
+ DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
+ VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
+ return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
+}
+
static inline void promoteVCIXScalar(const SDValue &Op,
SmallVectorImpl<SDValue> &Operands,
SelectionDAG &DAG) {
@@ -8913,6 +8942,8 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
case Intrinsic::experimental_get_vector_length:
return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
+ case Intrinsic::experimental_cttz_elts:
+ return lowerCttzElts(Op.getNode(), DAG, Subtarget);
case Intrinsic::riscv_vmv_x_s: {
SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
@@ -10403,14 +10434,10 @@ RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
MachineMemOperand *MMO = Load->getMemOperand();
- MachineFunction &MF = DAG.getMachineFunction();
- MMO = MF.getMachineMemOperand(
- MMO, MMO->getPointerInfo(),
- MMO->getMemoryType().isValid()
- ? LLT::scalable_vector(1, MMO->getMemoryType().getSizeInBits())
- : MMO->getMemoryType());
SDValue NewLoad =
- DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(), MMO);
+ DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
+ MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
+ MMO->getAAInfo(), MMO->getRanges());
SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
}
@@ -10470,14 +10497,9 @@ RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
MachineMemOperand *MMO = Store->getMemOperand();
- MachineFunction &MF = DAG.getMachineFunction();
- MMO = MF.getMachineMemOperand(
- MMO, MMO->getPointerInfo(),
- MMO->getMemoryType().isValid()
- ? LLT::scalable_vector(1, MMO->getMemoryType().getSizeInBits())
- : MMO->getMemoryType());
return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
- MMO);
+ MMO->getPointerInfo(), MMO->getBaseAlign(),
+ MMO->getFlags(), MMO->getAAInfo());
}
SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
@@ -12336,6 +12358,12 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
return;
}
+ case Intrinsic::experimental_cttz_elts: {
+ SDValue Res = lowerCttzElts(N, DAG, Subtarget);
+ Results.push_back(
+ DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
+ return;
+ }
case Intrinsic::riscv_orc_b:
case Intrinsic::riscv_brev8:
case Intrinsic::riscv_sha256sig0:
@@ -13363,11 +13391,100 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
}
-static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG) {
+// Try to expand a scalar multiply to a faster sequence.
+static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const RISCVSubtarget &Subtarget) {
+
EVT VT = N->getValueType(0);
- if (!VT.isVector())
+
+ // LI + MUL is usually smaller than the alternative sequence.
+ if (DAG.getMachineFunction().getFunction().hasMinSize())
return SDValue();
+ if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+ return SDValue();
+
+ if (VT != Subtarget.getXLenVT())
+ return SDValue();
+
+ if (!Subtarget.hasStdExtZba() && !Subtarget.hasVendorXTHeadBa())
+ return SDValue();
+
+ ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!CNode)
+ return SDValue();
+ uint64_t MulAmt = CNode->getZExtValue();
+
+ // 3/5/9 * 2^N -> shXadd (sll X, C), (sll X, C)
+ // Matched in tablegen, avoid perturbing patterns.
+ for (uint64_t Divisor : {3, 5, 9})
+ if (MulAmt % Divisor == 0 && isPowerOf2_64(MulAmt / Divisor))
+ return SDValue();
+
+ // If this is a power 2 + 2/4/8, we can use a shift followed by a single
+ // shXadd. First check if this a sum of two power of 2s because that's
+ // easy. Then count how many zeros are up to the first bit.
+ if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
+ unsigned ScaleShift = llvm::countr_zero(MulAmt);
+ if (ScaleShift >= 1 && ScaleShift < 4) {
+ unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
+ SDLoc DL(N);
+ SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(ShiftAmt, DL, VT));
+ SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(ScaleShift, DL, VT));
+ return DAG.getNode(ISD::ADD, DL, VT, Shift1, Shift2);
+ }
+ }
+
+ // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
+ // Matched in tablegen, avoid perturbing patterns.
+ switch (MulAmt) {
+ case 11:
+ case 13:
+ case 19:
+ case 21:
+ case 25:
+ case 27:
+ case 29:
+ case 37:
+ case 41:
+ case 45:
+ case 73:
+ case 91:
+ return SDValue();
+ default:
+ break;
+ }
+
+ // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
+ if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
+ unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
+ if (ScaleShift >= 1 && ScaleShift < 4) {
+ unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
+ SDLoc DL(N);
+ SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(ShiftAmt, DL, VT));
+ SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(ScaleShift, DL, VT));
+ return DAG.getNode(
+ ISD::ADD, DL, VT, Shift1,
+ DAG.getNode(ISD::ADD, DL, VT, Shift2, N->getOperand(0)));
+ }
+ }
+
+ return SDValue();
+}
+
+
+static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const RISCVSubtarget &Subtarget) {
+ EVT VT = N->getValueType(0);
+ if (!VT.isVector())
+ return expandMul(N, DAG, DCI, Subtarget);
+
SDLoc DL(N);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -15720,7 +15837,7 @@ static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
if (WiderElementSize > ST.getELen()/8)
return false;
- if (!ST.hasFastUnalignedAccess() && BaseAlign < WiderElementSize)
+ if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
return false;
for (unsigned i = 0; i < Index->getNumOperands(); i++) {
@@ -15913,7 +16030,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::MUL:
if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
return V;
- return performMULCombine(N, DAG);
+ return performMULCombine(N, DAG, DCI, Subtarget);
case ISD::SDIV:
case ISD::UDIV:
case ISD::SREM:
@@ -17642,8 +17759,7 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
static MachineBasicBlock *emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI,
MachineBasicBlock *BB,
- unsigned CVTXOpc,
- unsigned CVTFOpc) {
+ unsigned CVTXOpc) {
DebugLoc DL = MI.getDebugLoc();
const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
@@ -17674,6 +17790,85 @@ static MachineBasicBlock *emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI,
/*IsImp*/ true));
// Emit a VFCVT_F_X
+ RISCVII::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
+ unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
+ // There is no E8 variant for VFCVT_F_X.
+ assert(Log2SEW >= 4);
+ // Since MI (VFROUND) isn't SEW specific, we cannot use a macro to make
+ // handling of different (LMUL, SEW) pairs easier because we need to pull the
+ // SEW immediate from MI, and that information is not avaliable during macro
+ // expansion.
+ unsigned CVTFOpc;
+ if (Log2SEW == 4) {
+ switch (LMul) {
+ case RISCVII::LMUL_1:
+ CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M1_E16_MASK;
+ break;
+ case RISCVII::LMUL_2:
+ CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M2_E16_MASK;
+ break;
+ case RISCVII::LMUL_4:
+ CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M4_E16_MASK;
+ break;
+ case RISCVII::LMUL_8:
+ CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M8_E16_MASK;
+ break;
+ case RISCVII::LMUL_F2:
+ CVTFOpc = RISCV::PseudoVFCVT_F_X_V_MF2_E16_MASK;
+ break;
+ case RISCVII::LMUL_F4:
+ CVTFOpc = RISCV::PseudoVFCVT_F_X_V_MF4_E16_MASK;
+ break;
+ case RISCVII::LMUL_F8:
+ case RISCVII::LMUL_RESERVED:
+ llvm_unreachable("Unexpected LMUL and SEW combination value for MI.");
+ }
+ } else if (Log2SEW == 5) {
+ switch (LMul) {
+ case RISCVII::LMUL_1:
+ CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M1_E32_MASK;
+ break;
+ case RISCVII::LMUL_2:
+ CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M2_E32_MASK;
+ break;
+ case RISCVII::LMUL_4:
+ CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M4_E32_MASK;
+ break;
+ case RISCVII::LMUL_8:
+ CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M8_E32_MASK;
+ break;
+ case RISCVII::LMUL_F2:
+ CVTFOpc = RISCV::PseudoVFCVT_F_X_V_MF2_E32_MASK;
+ break;
+ case RISCVII::LMUL_F4:
+ case RISCVII::LMUL_F8:
+ case RISCVII::LMUL_RESERVED:
+ llvm_unreachable("Unexpected LMUL and SEW combination value for MI.");
+ }
+ } else if (Log2SEW == 6) {
+ switch (LMul) {
+ case RISCVII::LMUL_1:
+ CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M1_E64_MASK;
+ break;
+ case RISCVII::LMUL_2:
+ CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M2_E64_MASK;
+ break;
+ case RISCVII::LMUL_4:
+ CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M4_E64_MASK;
+ break;
+ case RISCVII::LMUL_8:
+ CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M8_E64_MASK;
+ break;
+ case RISCVII::LMUL_F2:
+ case RISCVII::LMUL_F4:
+ case RISCVII::LMUL_F8:
+ case RISCVII::LMUL_RESERVED:
+ llvm_unreachable("Unexpected LMUL and SEW combination value for MI.");
+ }
+ } else {
+ llvm_unreachable("Unexpected LMUL and SEW combination value for MI.");
+ }
+
BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
.add(MI.getOperand(0))
.add(MI.getOperand(1))
@@ -17883,23 +18078,17 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
Subtarget);
case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
- return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK,
- RISCV::PseudoVFCVT_F_X_V_M1_MASK);
+ return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
- return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK,
- RISCV::PseudoVFCVT_F_X_V_M2_MASK);
+ return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
- return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK,
- RISCV::PseudoVFCVT_F_X_V_M4_MASK);
+ return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
- return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK,
- RISCV::PseudoVFCVT_F_X_V_M8_MASK);
+ return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
- return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK,
- RISCV::PseudoVFCVT_F_X_V_MF2_MASK);
+ return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
- return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK,
- RISCV::PseudoVFCVT_F_X_V_MF4_MASK);
+ return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
case RISCV::PseudoFROUND_H:
case RISCV::PseudoFROUND_H_INX:
case RISCV::PseudoFROUND_S:
@@ -18078,33 +18267,12 @@ static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
return false;
}
-static unsigned allocateRVVReg(MVT ValVT, unsigned ValNo,
- std::optional<unsigned> FirstMaskArgument,
- CCState &State, const RISCVTargetLowering &TLI) {
- const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
- if (RC == &RISCV::VRRegClass) {
- // Assign the first mask argument to V0.
- // This is an interim calling convention and it may be changed in the
- // future.
- if (FirstMaskArgument && ValNo == *FirstMaskArgument)
- return State.AllocateReg(RISCV::V0);
- return State.AllocateReg(ArgVRs);
- }
- if (RC == &RISCV::VRM2RegClass)
- return State.AllocateReg(ArgVRM2s);
- if (RC == &RISCV::VRM4RegClass)
- return State.AllocateReg(ArgVRM4s);
- if (RC == &RISCV::VRM8RegClass)
- return State.AllocateReg(ArgVRM8s);
- llvm_unreachable("Unhandled register class for ValueType");
-}
-
// Implements the RISC-V calling convention. Returns true upon failure.
bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
- std::optional<unsigned> FirstMaskArgument) {
+ RVVArgDispatcher &RVVDispatcher) {
unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
assert(XLen == 32 || XLen == 64);
MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
@@ -18273,7 +18441,7 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
else if (ValVT == MVT::f64 && !UseGPRForF64)
Reg = State.AllocateReg(ArgFPR64s);
else if (ValVT.isVector()) {
- Reg = allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI);
+ Reg = RVVDispatcher.getNextPhysReg();
if (!Reg) {
// For return values, the vector must be passed fully via registers or
// via the stack.
@@ -18359,9 +18527,15 @@ void RISCVTargetLowering::analyzeInputArgs(
unsigned NumArgs = Ins.size();
FunctionType *FType = MF.getFunction().getFunctionType();
- std::optional<unsigned> FirstMaskArgument;
- if (Subtarget.hasVInstructions())
- FirstMaskArgument = preAssignMask(Ins);
+ RVVArgDispatcher Dispatcher;
+ if (IsRet) {
+ Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(Ins)};
+ } else {
+ SmallVector<Type *, 4> TypeList;
+ for (const Argument &Arg : MF.getFunction().args())
+ TypeList.push_back(Arg.getType());
+ Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(TypeList)};
+ }
for (unsigned i = 0; i != NumArgs; ++i) {
MVT ArgVT = Ins[i].VT;
@@ -18376,7 +18550,7 @@ void RISCVTargetLowering::analyzeInputArgs(
RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
- FirstMaskArgument)) {
+ Dispatcher)) {
LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
<< ArgVT << '\n');
llvm_unreachable(nullptr);
@@ -18390,9 +18564,13 @@ void RISCVTargetLowering::analyzeOutputArgs(
CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
unsigned NumArgs = Outs.size();
- std::optional<unsigned> FirstMaskArgument;
- if (Subtarget.hasVInstructions())
- FirstMaskArgument = preAssignMask(Outs);
+ SmallVector<Type *, 4> TypeList;
+ if (IsRet)
+ TypeList.push_back(MF.getFunction().getReturnType());
+ else if (CLI)
+ for (const TargetLowering::ArgListEntry &Arg : CLI->getArgs())
+ TypeList.push_back(Arg.Ty);
+ RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(TypeList)};
for (unsigned i = 0; i != NumArgs; i++) {
MVT ArgVT = Outs[i].VT;
@@ -18402,7 +18580,7 @@ void RISCVTargetLowering::analyzeOutputArgs(
RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
- FirstMaskArgument)) {
+ Dispatcher)) {
LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
<< ArgVT << "\n");
llvm_unreachable(nullptr);
@@ -18583,7 +18761,7 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
ISD::ArgFlagsTy ArgFlags, CCState &State,
bool IsFixed, bool IsRet, Type *OrigTy,
const RISCVTargetLowering &TLI,
- std::optional<unsigned> FirstMaskArgument) {
+ RVVArgDispatcher &RVVDispatcher) {
if (LocVT == MVT::i32 || LocVT == MVT::i64) {
if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
@@ -18661,13 +18839,14 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
}
if (LocVT.isVector()) {
- if (unsigned Reg =
- allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI)) {
+ MCPhysReg AllocatedVReg = RVVDispatcher.getNextPhysReg();
+ if (AllocatedVReg) {
// Fixed-length vectors are located in the corresponding scalable-vector
// container types.
if (ValVT.isFixedLengthVector())
LocVT = TLI.getContainerForFixedLengthVector(LocVT);
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ State.addLoc(
+ CCValAssign::getReg(ValNo, ValVT, AllocatedVReg, LocVT, LocInfo));
} else {
// Try and pass the address via a "fast" GPR.
if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
@@ -19295,17 +19474,15 @@ bool RISCVTargetLowering::CanLowerReturn(
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
- std::optional<unsigned> FirstMaskArgument;
- if (Subtarget.hasVInstructions())
- FirstMaskArgument = preAssignMask(Outs);
+ RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(Outs)};
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
MVT VT = Outs[i].VT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
- ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
- *this, FirstMaskArgument))
+ ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
+ nullptr, *this, Dispatcher))
return false;
}
return true;
@@ -20486,8 +20663,8 @@ bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
unsigned *Fast) const {
if (!VT.isVector()) {
if (Fast)
- *Fast = Subtarget.hasFastUnalignedAccess();
- return Subtarget.hasFastUnalignedAccess();
+ *Fast = Subtarget.enableUnalignedScalarMem();
+ return Subtarget.enableUnalignedScalarMem();
}
// All vector implementations must support element alignment
@@ -20503,8 +20680,8 @@ bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
// misaligned accesses. TODO: Work through the codegen implications of
// allowing such accesses to be formed, and considered fast.
if (Fast)
- *Fast = Subtarget.hasFastUnalignedAccess();
- return Subtarget.hasFastUnalignedAccess();
+ *Fast = Subtarget.enableUnalignedVectorMem();
+ return Subtarget.enableUnalignedVectorMem();
}
@@ -20539,7 +20716,7 @@ EVT RISCVTargetLowering::getOptimalMemOpType(const MemOp &Op,
// Do we have sufficient alignment for our preferred VT? If not, revert
// to largest size allowed by our alignment criteria.
- if (PreferredVT != MVT::i8 && !Subtarget.hasFastUnalignedAccess()) {
+ if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
Align RequiredAlign(PreferredVT.getStoreSize());
if (Op.isFixedDstAlign())
RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
@@ -20731,7 +20908,7 @@ bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType,
if (!isLegalElementTypeForRVV(ScalarType))
return false;
- if (!Subtarget.hasFastUnalignedAccess() &&
+ if (!Subtarget.enableUnalignedVectorMem() &&
Alignment < ScalarType.getStoreSize())
return false;
@@ -21102,6 +21279,181 @@ unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
return Subtarget.getMinimumJumpTableEntries();
}
+// Handle single arg such as return value.
+template <typename Arg>
+void RVVArgDispatcher::constructArgInfos(ArrayRef<Arg> ArgList) {
+ // This lambda determines whether an array of types are constructed by
+ // homogeneous vector types.
+ auto isHomogeneousScalableVectorType = [](ArrayRef<Arg> ArgList) {
+ // First, extract the first element in the argument type.
+ auto It = ArgList.begin();
+ MVT FirstArgRegType = It->VT;
+
+ // Return if there is no return or the type needs split.
+ if (It == ArgList.end() || It->Flags.isSplit())
+ return false;
+
+ ++It;
+
+ // Return if this argument type contains only 1 element, or it's not a
+ // vector type.
+ if (It == ArgList.end() || !FirstArgRegType.isScalableVector())
+ return false;
+
+ // Second, check if the following elements in this argument type are all the
+ // same.
+ for (; It != ArgList.end(); ++It)
+ if (It->Flags.isSplit() || It->VT != FirstArgRegType)
+ return false;
+
+ return true;
+ };
+
+ if (isHomogeneousScalableVectorType(ArgList)) {
+ // Handle as tuple type
+ RVVArgInfos.push_back({(unsigned)ArgList.size(), ArgList[0].VT, false});
+ } else {
+ // Handle as normal vector type
+ bool FirstVMaskAssigned = false;
+ for (const auto &OutArg : ArgList) {
+ MVT RegisterVT = OutArg.VT;
+
+ // Skip non-RVV register type
+ if (!RegisterVT.isVector())
+ continue;
+
+ if (RegisterVT.isFixedLengthVector())
+ RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);
+
+ if (!FirstVMaskAssigned && RegisterVT.getVectorElementType() == MVT::i1) {
+ RVVArgInfos.push_back({1, RegisterVT, true});
+ FirstVMaskAssigned = true;
+ continue;
+ }
+
+ RVVArgInfos.push_back({1, RegisterVT, false});
+ }
+ }
+}
+
+// Handle multiple args.
+template <>
+void RVVArgDispatcher::constructArgInfos<Type *>(ArrayRef<Type *> TypeList) {
+ const DataLayout &DL = MF->getDataLayout();
+ const Function &F = MF->getFunction();
+ LLVMContext &Context = F.getContext();
+
+ bool FirstVMaskAssigned = false;
+ for (Type *Ty : TypeList) {
+ StructType *STy = dyn_cast<StructType>(Ty);
+ if (STy && STy->containsHomogeneousScalableVectorTypes()) {
+ Type *ElemTy = STy->getTypeAtIndex(0U);
+ EVT VT = TLI->getValueType(DL, ElemTy);
+ MVT RegisterVT =
+ TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);
+ unsigned NumRegs =
+ TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);
+
+ RVVArgInfos.push_back(
+ {NumRegs * STy->getNumElements(), RegisterVT, false});
+ } else {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(*TLI, DL, Ty, ValueVTs);
+
+ for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
+ ++Value) {
+ EVT VT = ValueVTs[Value];
+ MVT RegisterVT =
+ TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);
+ unsigned NumRegs =
+ TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);
+
+ // Skip non-RVV register type
+ if (!RegisterVT.isVector())
+ continue;
+
+ if (RegisterVT.isFixedLengthVector())
+ RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);
+
+ if (!FirstVMaskAssigned &&
+ RegisterVT.getVectorElementType() == MVT::i1) {
+ RVVArgInfos.push_back({1, RegisterVT, true});
+ FirstVMaskAssigned = true;
+ --NumRegs;
+ }
+
+ RVVArgInfos.insert(RVVArgInfos.end(), NumRegs, {1, RegisterVT, false});
+ }
+ }
+ }
+}
+
+void RVVArgDispatcher::allocatePhysReg(unsigned NF, unsigned LMul,
+ unsigned StartReg) {
+ assert((StartReg % LMul) == 0 &&
+ "Start register number should be multiple of lmul");
+ const MCPhysReg *VRArrays;
+ switch (LMul) {
+ default:
+ report_fatal_error("Invalid lmul");
+ case 1:
+ VRArrays = ArgVRs;
+ break;
+ case 2:
+ VRArrays = ArgVRM2s;
+ break;
+ case 4:
+ VRArrays = ArgVRM4s;
+ break;
+ case 8:
+ VRArrays = ArgVRM8s;
+ break;
+ }
+
+ for (unsigned i = 0; i < NF; ++i)
+ if (StartReg)
+ AllocatedPhysRegs.push_back(VRArrays[(StartReg - 8) / LMul + i]);
+ else
+ AllocatedPhysRegs.push_back(MCPhysReg());
+}
+
+/// This function determines if each RVV argument is passed by register, if the
+/// argument can be assigned to a VR, then give it a specific register.
+/// Otherwise, assign the argument to 0 which is a invalid MCPhysReg.
+void RVVArgDispatcher::compute() {
+ uint32_t AssignedMap = 0;
+ auto allocate = [&](const RVVArgInfo &ArgInfo) {
+ // Allocate first vector mask argument to V0.
+ if (ArgInfo.FirstVMask) {
+ AllocatedPhysRegs.push_back(RISCV::V0);
+ return;
+ }
+
+ unsigned RegsNeeded = divideCeil(
+ ArgInfo.VT.getSizeInBits().getKnownMinValue(), RISCV::RVVBitsPerBlock);
+ unsigned TotalRegsNeeded = ArgInfo.NF * RegsNeeded;
+ for (unsigned StartReg = 0; StartReg + TotalRegsNeeded <= NumArgVRs;
+ StartReg += RegsNeeded) {
+ uint32_t Map = ((1 << TotalRegsNeeded) - 1) << StartReg;
+ if ((AssignedMap & Map) == 0) {
+ allocatePhysReg(ArgInfo.NF, RegsNeeded, StartReg + 8);
+ AssignedMap |= Map;
+ return;
+ }
+ }
+
+ allocatePhysReg(ArgInfo.NF, RegsNeeded, 0);
+ };
+
+ for (unsigned i = 0; i < RVVArgInfos.size(); ++i)
+ allocate(RVVArgInfos[i]);
+}
+
+MCPhysReg RVVArgDispatcher::getNextPhysReg() {
+ assert(CurIdx < AllocatedPhysRegs.size() && "Index out of range");
+ return AllocatedPhysRegs[CurIdx++];
+}
+
namespace llvm::RISCVVIntrinsicsTable {
#define GET_RISCVVIntrinsicsTable_IMPL
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index ace5b3f..b10da3d 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -24,6 +24,7 @@ namespace llvm {
class InstructionCost;
class RISCVSubtarget;
struct RISCVRegisterInfo;
+class RVVArgDispatcher;
namespace RISCVISD {
// clang-format off
@@ -875,7 +876,7 @@ public:
ISD::ArgFlagsTy ArgFlags, CCState &State,
bool IsFixed, bool IsRet, Type *OrigTy,
const RISCVTargetLowering &TLI,
- std::optional<unsigned> FirstMaskArgument);
+ RVVArgDispatcher &RVVDispatcher);
private:
void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo,
@@ -986,6 +987,8 @@ private:
bool shouldExpandGetVectorLength(EVT TripCountVT, unsigned VF,
bool IsScalable) const override;
+ bool shouldExpandCttzElements(EVT VT) const override;
+
/// RVV code generation for fixed length vectors does not lower all
/// BUILD_VECTORs. This makes BUILD_VECTOR legalisation a source of stores to
/// merge. However, merging them creates a BUILD_VECTOR that is just as
@@ -1015,19 +1018,71 @@ private:
unsigned getMinimumJumpTableEntries() const override;
};
+/// As per the spec, the rules for passing vector arguments are as follows:
+///
+/// 1. For the first vector mask argument, use v0 to pass it.
+/// 2. For vector data arguments or rest vector mask arguments, starting from
+/// the v8 register, if a vector register group between v8-v23 that has not been
+/// allocated can be found and the first register number is a multiple of LMUL,
+/// then allocate this vector register group to the argument and mark these
+/// registers as allocated. Otherwise, pass it by reference and are replaced in
+/// the argument list with the address.
+/// 3. For tuple vector data arguments, starting from the v8 register, if
+/// NFIELDS consecutive vector register groups between v8-v23 that have not been
+/// allocated can be found and the first register number is a multiple of LMUL,
+/// then allocate these vector register groups to the argument and mark these
+/// registers as allocated. Otherwise, pass it by reference and are replaced in
+/// the argument list with the address.
+class RVVArgDispatcher {
+public:
+ static constexpr unsigned NumArgVRs = 16;
+
+ struct RVVArgInfo {
+ unsigned NF;
+ MVT VT;
+ bool FirstVMask = false;
+ };
+
+ template <typename Arg>
+ RVVArgDispatcher(const MachineFunction *MF, const RISCVTargetLowering *TLI,
+ ArrayRef<Arg> ArgList)
+ : MF(MF), TLI(TLI) {
+ constructArgInfos(ArgList);
+ compute();
+ }
+
+ RVVArgDispatcher() = default;
+
+ MCPhysReg getNextPhysReg();
+
+private:
+ SmallVector<RVVArgInfo, 4> RVVArgInfos;
+ SmallVector<MCPhysReg, 4> AllocatedPhysRegs;
+
+ const MachineFunction *MF = nullptr;
+ const RISCVTargetLowering *TLI = nullptr;
+
+ unsigned CurIdx = 0;
+
+ template <typename Arg> void constructArgInfos(ArrayRef<Arg> Ret);
+ void compute();
+ void allocatePhysReg(unsigned NF = 1, unsigned LMul = 1,
+ unsigned StartReg = 0);
+};
+
namespace RISCV {
bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
- std::optional<unsigned> FirstMaskArgument);
+ RVVArgDispatcher &RVVDispatcher);
bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
- std::optional<unsigned> FirstMaskArgument);
+ RVVArgDispatcher &RVVDispatcher);
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index a14f9a2..aab91ad 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -468,6 +468,7 @@ public:
bool isUnknown() const { return State == Unknown; }
void setAVLReg(Register Reg) {
+ assert(Reg.isVirtual() || Reg == RISCV::X0 || Reg == RISCV::NoRegister);
AVLReg = Reg;
State = AVLIsReg;
}
@@ -1514,17 +1515,12 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI,
// If the AVL is a register, we need to make sure MI's AVL dominates PrevMI.
// For now just check that PrevMI uses the same virtual register.
- if (AVL.isReg() && AVL.getReg() != RISCV::X0) {
- if (AVL.getReg().isPhysical())
- return false;
- if (!PrevAVL.isReg() || PrevAVL.getReg() != AVL.getReg())
- return false;
- }
+ if (AVL.isReg() && AVL.getReg() != RISCV::X0 &&
+ (!PrevAVL.isReg() || PrevAVL.getReg() != AVL.getReg()))
+ return false;
}
- if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm())
- return false;
-
+ assert(PrevMI.getOperand(2).isImm() && MI.getOperand(2).isImm());
auto PriorVType = PrevMI.getOperand(2).getImm();
auto VType = MI.getOperand(2).getImm();
return areCompatibleVTYPEs(PriorVType, VType, Used);
@@ -1545,9 +1541,9 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
continue;
}
- Register VRegDef = MI.getOperand(0).getReg();
- if (VRegDef != RISCV::X0 &&
- !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef)))
+ Register RegDef = MI.getOperand(0).getReg();
+ assert(RegDef == RISCV::X0 || RegDef.isVirtual());
+ if (RegDef != RISCV::X0 && !MRI->use_nodbg_empty(RegDef))
Used.demandVL();
if (NextMI) {
@@ -1555,7 +1551,9 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
ToDelete.push_back(&MI);
// Leave NextMI unchanged
continue;
- } else if (canMutatePriorConfig(MI, *NextMI, Used, *MRI)) {
+ }
+
+ if (canMutatePriorConfig(MI, *NextMI, Used, *MRI)) {
if (!isVLPreservingConfig(*NextMI)) {
MI.getOperand(0).setReg(NextMI->getOperand(0).getReg());
MI.getOperand(0).setIsDead(false);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 508f607..8331fc0 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -361,15 +361,12 @@ void RISCVInstrInfo::copyPhysRegVector(
return {RISCVII::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V,
RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1};
};
- auto FindRegWithEncoding = [&TRI](const TargetRegisterClass &RegClass,
- uint16_t Encoding) {
- ArrayRef<MCPhysReg> Regs = RegClass.getRegisters();
- const auto *FoundReg = llvm::find_if(Regs, [&](MCPhysReg Reg) {
- return TRI->getEncodingValue(Reg) == Encoding;
- });
- // We should be always able to find one valid register.
- assert(FoundReg != Regs.end());
- return *FoundReg;
+ auto FindRegWithEncoding = [TRI](const TargetRegisterClass &RegClass,
+ uint16_t Encoding) {
+ MCRegister Reg = RISCV::V0 + Encoding;
+ if (&RegClass == &RISCV::VRRegClass)
+ return Reg;
+ return TRI->getMatchingSuperReg(Reg, RISCV::sub_vrm1_0, &RegClass);
};
while (I != NumRegs) {
// For non-segment copying, we only do this once as the registers are always
@@ -1986,7 +1983,7 @@ genShXAddAddShift(MachineInstr &Root, unsigned AddOpIdx,
MRI.getUniqueVRegDef(AddMI->getOperand(AddOpIdx).getReg());
unsigned InnerShiftAmt = ShiftMI->getOperand(2).getImm();
- assert(InnerShiftAmt > OuterShiftAmt && "Unexpected shift amount");
+ assert(InnerShiftAmt >= OuterShiftAmt && "Unexpected shift amount");
unsigned InnerOpc;
switch (InnerShiftAmt - OuterShiftAmt) {
@@ -2719,6 +2716,50 @@ std::string RISCVInstrInfo::createMIROperandComment(
}
// clang-format off
+#define CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL) \
+ RISCV::Pseudo##OP##_##LMUL
+
+#define CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL) \
+ RISCV::Pseudo##OP##_##LMUL##_MASK
+
+#define CASE_RVV_OPCODE_LMUL(OP, LMUL) \
+ CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL): \
+ case CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL)
+
+#define CASE_RVV_OPCODE_UNMASK_WIDEN(OP) \
+ CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF8): \
+ case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF4): \
+ case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF2): \
+ case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M1): \
+ case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M2): \
+ case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M4)
+
+#define CASE_RVV_OPCODE_UNMASK(OP) \
+ CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \
+ case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M8)
+
+#define CASE_RVV_OPCODE_MASK_WIDEN(OP) \
+ CASE_RVV_OPCODE_MASK_LMUL(OP, MF8): \
+ case CASE_RVV_OPCODE_MASK_LMUL(OP, MF4): \
+ case CASE_RVV_OPCODE_MASK_LMUL(OP, MF2): \
+ case CASE_RVV_OPCODE_MASK_LMUL(OP, M1): \
+ case CASE_RVV_OPCODE_MASK_LMUL(OP, M2): \
+ case CASE_RVV_OPCODE_MASK_LMUL(OP, M4)
+
+#define CASE_RVV_OPCODE_MASK(OP) \
+ CASE_RVV_OPCODE_MASK_WIDEN(OP): \
+ case CASE_RVV_OPCODE_MASK_LMUL(OP, M8)
+
+#define CASE_RVV_OPCODE_WIDEN(OP) \
+ CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \
+ case CASE_RVV_OPCODE_MASK_WIDEN(OP)
+
+#define CASE_RVV_OPCODE(OP) \
+ CASE_RVV_OPCODE_UNMASK(OP): \
+ case CASE_RVV_OPCODE_MASK(OP)
+// clang-format on
+
+// clang-format off
#define CASE_VMA_OPCODE_COMMON(OP, TYPE, LMUL) \
RISCV::PseudoV##OP##_##TYPE##_##LMUL
@@ -2798,6 +2839,28 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
case RISCV::PseudoCCMOVGPR:
// Operands 4 and 5 are commutable.
return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 4, 5);
+ case CASE_RVV_OPCODE(VADD_VV):
+ case CASE_RVV_OPCODE(VAND_VV):
+ case CASE_RVV_OPCODE(VOR_VV):
+ case CASE_RVV_OPCODE(VXOR_VV):
+ case CASE_RVV_OPCODE_MASK(VMSEQ_VV):
+ case CASE_RVV_OPCODE_MASK(VMSNE_VV):
+ case CASE_RVV_OPCODE(VMIN_VV):
+ case CASE_RVV_OPCODE(VMINU_VV):
+ case CASE_RVV_OPCODE(VMAX_VV):
+ case CASE_RVV_OPCODE(VMAXU_VV):
+ case CASE_RVV_OPCODE(VMUL_VV):
+ case CASE_RVV_OPCODE(VMULH_VV):
+ case CASE_RVV_OPCODE(VMULHU_VV):
+ case CASE_RVV_OPCODE_WIDEN(VWADD_VV):
+ case CASE_RVV_OPCODE_WIDEN(VWADDU_VV):
+ case CASE_RVV_OPCODE_WIDEN(VWMUL_VV):
+ case CASE_RVV_OPCODE_WIDEN(VWMULU_VV):
+ case CASE_RVV_OPCODE_WIDEN(VWMACC_VV):
+ case CASE_RVV_OPCODE_WIDEN(VWMACCU_VV):
+ case CASE_RVV_OPCODE_UNMASK(VADC_VVM):
+ // Operands 2 and 3 are commutable.
+ return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
case CASE_VFMA_SPLATS(FMADD):
case CASE_VFMA_SPLATS(FMSUB):
case CASE_VFMA_SPLATS(FMACC):
@@ -2950,7 +3013,7 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE, SEW)
#define CASE_VFMA_CHANGE_OPCODE_VV(OLDOP, NEWOP) \
- CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VV, E16) \
+ CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VV, E16) \
CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VV, E32) \
CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VV, E64)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 0dc466f..cd5caa4 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -143,22 +143,24 @@ class PseudoToVInst<string PseudoInst> {
// This class describes information associated to the LMUL.
class LMULInfo<int lmul, int oct, VReg regclass, VReg wregclass,
- VReg f2regclass, VReg f4regclass, VReg f8regclass, string mx> {
+ VReg f2regclass, VReg f4regclass, VReg f8regclass, string mx,
+ VReg moutregclass = VMM1> {
bits<3> value = lmul; // This is encoded as the vlmul field of vtype.
VReg vrclass = regclass;
VReg wvrclass = wregclass;
VReg f8vrclass = f8regclass;
VReg f4vrclass = f4regclass;
VReg f2vrclass = f2regclass;
+ VReg moutclass = moutregclass;
string MX = mx;
int octuple = oct;
}
// Associate LMUL with tablegen records of register classes.
def V_M1 : LMULInfo<0b000, 8, VR, VRM2, VR, VR, VR, "M1">;
-def V_M2 : LMULInfo<0b001, 16, VRM2, VRM4, VR, VR, VR, "M2">;
-def V_M4 : LMULInfo<0b010, 32, VRM4, VRM8, VRM2, VR, VR, "M4">;
-def V_M8 : LMULInfo<0b011, 64, VRM8,/*NoVReg*/VR, VRM4, VRM2, VR, "M8">;
+def V_M2 : LMULInfo<0b001, 16, VRM2, VRM4, VR, VR, VR, "M2", VMM2>;
+def V_M4 : LMULInfo<0b010, 32, VRM4, VRM8, VRM2, VR, VR, "M4", VMM4>;
+def V_M8 : LMULInfo<0b011, 64, VRM8,/*NoVReg*/VR, VRM4, VRM2, VR, "M8", VMM8>;
def V_MF8 : LMULInfo<0b101, 1, VR, VR,/*NoVReg*/VR,/*NoVReg*/VR,/*NoVReg*/VR, "MF8">;
def V_MF4 : LMULInfo<0b110, 2, VR, VR, VR,/*NoVReg*/VR,/*NoVReg*/VR, "MF4">;
@@ -2127,8 +2129,9 @@ multiclass VPseudoBinary<VReg RetClass,
LMULInfo MInfo,
string Constraint = "",
int sew = 0,
- int TargetConstraintType = 1> {
- let VLMul = MInfo.value, SEW=sew in {
+ int TargetConstraintType = 1,
+ bit Commutable = 0> {
+ let VLMul = MInfo.value, SEW=sew, isCommutable = Commutable in {
defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX);
def suffix : VPseudoBinaryNoMaskTU<RetClass, Op1Class, Op2Class,
Constraint, TargetConstraintType>;
@@ -2167,8 +2170,9 @@ multiclass VPseudoBinaryM<VReg RetClass,
DAGOperand Op2Class,
LMULInfo MInfo,
string Constraint = "",
- int TargetConstraintType = 1> {
- let VLMul = MInfo.value in {
+ int TargetConstraintType = 1,
+ bit Commutable = 0> {
+ let VLMul = MInfo.value, isCommutable = Commutable in {
def "_" # MInfo.MX : VPseudoBinaryMOutNoMask<RetClass, Op1Class, Op2Class,
Constraint, TargetConstraintType>;
let ForceTailAgnostic = true in
@@ -2226,8 +2230,8 @@ multiclass VPseudoTiedBinaryRoundingMode<VReg RetClass,
}
-multiclass VPseudoBinaryV_VV<LMULInfo m, string Constraint = "", int sew = 0> {
- defm _VV : VPseudoBinary<m.vrclass, m.vrclass, m.vrclass, m, Constraint, sew>;
+multiclass VPseudoBinaryV_VV<LMULInfo m, string Constraint = "", int sew = 0, bit Commutable = 0> {
+ defm _VV : VPseudoBinary<m.vrclass, m.vrclass, m.vrclass, m, Constraint, sew, Commutable=Commutable>;
}
multiclass VPseudoBinaryV_VV_RM<LMULInfo m, string Constraint = ""> {
@@ -2331,9 +2335,10 @@ multiclass VPseudoVALU_MM<bit Commutable = 0> {
// * The destination EEW is greater than the source EEW, the source EMUL is
// at least 1, and the overlap is in the highest-numbered part of the
// destination register group is legal. Otherwise, it is illegal.
-multiclass VPseudoBinaryW_VV<LMULInfo m> {
+multiclass VPseudoBinaryW_VV<LMULInfo m, bit Commutable = 0> {
defm _VV : VPseudoBinary<m.wvrclass, m.vrclass, m.vrclass, m,
- "@earlyclobber $rd", TargetConstraintType=3>;
+ "@earlyclobber $rd", TargetConstraintType=3,
+ Commutable=Commutable>;
}
multiclass VPseudoBinaryW_VV_RM<LMULInfo m, int sew = 0> {
@@ -2453,7 +2458,9 @@ multiclass VPseudoBinaryV_VM<LMULInfo m, bit CarryOut = 0, bit CarryIn = 1,
m.vrclass, m.vrclass, m, CarryIn, Constraint, TargetConstraintType>;
}
-multiclass VPseudoTiedBinaryV_VM<LMULInfo m, int TargetConstraintType = 1> {
+multiclass VPseudoTiedBinaryV_VM<LMULInfo m, int TargetConstraintType = 1,
+ bit Commutable = 0> {
+ let isCommutable = Commutable in
def "_VVM" # "_" # m.MX:
VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
m.vrclass, m.vrclass, m, 1, "",
@@ -2667,26 +2674,24 @@ multiclass PseudoVEXT_VF8 {
// lowest-numbered part of the source register group".
// With LMUL<=1 the source and dest occupy a single register so any overlap
// is in the lowest-numbered part.
-multiclass VPseudoBinaryM_VV<LMULInfo m, int TargetConstraintType = 1> {
- defm _VV : VPseudoBinaryM<VR, m.vrclass, m.vrclass, m,
- !if(!ge(m.octuple, 16), "@earlyclobber $rd", ""), TargetConstraintType>;
+multiclass VPseudoBinaryM_VV<LMULInfo m, int TargetConstraintType = 1,
+ bit Commutable = 0> {
+ defm _VV : VPseudoBinaryM<m.moutclass, m.vrclass, m.vrclass, m, "",
+ TargetConstraintType, Commutable=Commutable>;
}
multiclass VPseudoBinaryM_VX<LMULInfo m, int TargetConstraintType = 1> {
defm "_VX" :
- VPseudoBinaryM<VR, m.vrclass, GPR, m,
- !if(!ge(m.octuple, 16), "@earlyclobber $rd", ""), TargetConstraintType>;
+ VPseudoBinaryM<m.moutclass, m.vrclass, GPR, m, "", TargetConstraintType>;
}
multiclass VPseudoBinaryM_VF<LMULInfo m, FPR_Info f, int TargetConstraintType = 1> {
defm "_V" # f.FX :
- VPseudoBinaryM<VR, m.vrclass, f.fprclass, m,
- !if(!ge(m.octuple, 16), "@earlyclobber $rd", ""), TargetConstraintType>;
+ VPseudoBinaryM<m.moutclass, m.vrclass, f.fprclass, m, "", TargetConstraintType>;
}
multiclass VPseudoBinaryM_VI<LMULInfo m, int TargetConstraintType = 1> {
- defm _VI : VPseudoBinaryM<VR, m.vrclass, simm5, m,
- !if(!ge(m.octuple, 16), "@earlyclobber $rd", ""), TargetConstraintType>;
+ defm _VI : VPseudoBinaryM<m.moutclass, m.vrclass, simm5, m, "", TargetConstraintType>;
}
multiclass VPseudoVGTR_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
@@ -2751,10 +2756,11 @@ multiclass VPseudoVSSHT_VV_VX_VI_RM<Operand ImmType = simm5, string Constraint =
}
}
-multiclass VPseudoVALU_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
+multiclass VPseudoVALU_VV_VX_VI<Operand ImmType = simm5, string Constraint = "",
+ bit Commutable = 0> {
foreach m = MxList in {
defvar mx = m.MX;
- defm "" : VPseudoBinaryV_VV<m, Constraint>,
+ defm "" : VPseudoBinaryV_VV<m, Constraint, Commutable=Commutable>,
SchedBinary<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", mx,
forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_VX<m, Constraint>,
@@ -2804,17 +2810,17 @@ multiclass VPseudoVAALU_VV_VX_RM {
multiclass VPseudoVMINMAX_VV_VX {
foreach m = MxList in {
defvar mx = m.MX;
- defm "" : VPseudoBinaryV_VV<m>,
+ defm "" : VPseudoBinaryV_VV<m, Commutable=1>,
SchedBinary<"WriteVIMinMaxV", "ReadVIMinMaxV", "ReadVIMinMaxV", mx>;
defm "" : VPseudoBinaryV_VX<m>,
SchedBinary<"WriteVIMinMaxX", "ReadVIMinMaxV", "ReadVIMinMaxX", mx>;
}
}
-multiclass VPseudoVMUL_VV_VX {
+multiclass VPseudoVMUL_VV_VX<bit Commutable = 0> {
foreach m = MxList in {
defvar mx = m.MX;
- defm "" : VPseudoBinaryV_VV<m>,
+ defm "" : VPseudoBinaryV_VV<m, Commutable=Commutable>,
SchedBinary<"WriteVIMulV", "ReadVIMulV", "ReadVIMulV", mx>;
defm "" : VPseudoBinaryV_VX<m>,
SchedBinary<"WriteVIMulX", "ReadVIMulV", "ReadVIMulX", mx>;
@@ -2894,32 +2900,34 @@ multiclass VPseudoVALU_VV_VX {
multiclass VPseudoVSGNJ_VV_VF {
foreach m = MxListF in {
- defm "" : VPseudoBinaryFV_VV<m>,
+ foreach e = SchedSEWSet<m.MX, isF=1>.val in
+ defm "" : VPseudoBinaryFV_VV<m, sew=e>,
SchedBinary<"WriteVFSgnjV", "ReadVFSgnjV", "ReadVFSgnjV", m.MX,
- forceMergeOpRead=true>;
+ e, forceMergeOpRead=true>;
}
foreach f = FPList in {
foreach m = f.MxList in {
- defm "" : VPseudoBinaryV_VF<m, f>,
+ defm "" : VPseudoBinaryV_VF<m, f, sew=f.SEW>,
SchedBinary<"WriteVFSgnjF", "ReadVFSgnjV", "ReadVFSgnjF", m.MX,
- forceMergeOpRead=true>;
+ f.SEW, forceMergeOpRead=true>;
}
}
}
multiclass VPseudoVMAX_VV_VF {
foreach m = MxListF in {
- defm "" : VPseudoBinaryFV_VV<m>,
- SchedBinary<"WriteVFMinMaxV", "ReadVFMinMaxV", "ReadVFMinMaxV", m.MX,
- forceMergeOpRead=true>;
+ foreach e = SchedSEWSet<m.MX, isF=1>.val in
+ defm "" : VPseudoBinaryFV_VV<m, sew=e>,
+ SchedBinary<"WriteVFMinMaxV", "ReadVFMinMaxV", "ReadVFMinMaxV",
+ m.MX, e, forceMergeOpRead=true>;
}
foreach f = FPList in {
foreach m = f.MxList in {
- defm "" : VPseudoBinaryV_VF<m, f>,
- SchedBinary<"WriteVFMinMaxF", "ReadVFMinMaxV", "ReadVFMinMaxF", m.MX,
- forceMergeOpRead=true>;
+ defm "" : VPseudoBinaryV_VF<m, f, sew=f.SEW>,
+ SchedBinary<"WriteVFMinMaxF", "ReadVFMinMaxV", "ReadVFMinMaxF",
+ m.MX, f.SEW, forceMergeOpRead=true>;
}
}
}
@@ -2962,10 +2970,10 @@ multiclass VPseudoVALU_VX_VI<Operand ImmType = simm5> {
}
}
-multiclass VPseudoVWALU_VV_VX {
+multiclass VPseudoVWALU_VV_VX<bit Commutable = 0> {
foreach m = MxListW in {
defvar mx = m.MX;
- defm "" : VPseudoBinaryW_VV<m>,
+ defm "" : VPseudoBinaryW_VV<m, Commutable=Commutable>,
SchedBinary<"WriteVIWALUV", "ReadVIWALUV", "ReadVIWALUV", mx,
forceMergeOpRead=true>;
defm "" : VPseudoBinaryW_VX<m>,
@@ -2974,10 +2982,10 @@ multiclass VPseudoVWALU_VV_VX {
}
}
-multiclass VPseudoVWMUL_VV_VX {
+multiclass VPseudoVWMUL_VV_VX<bit Commutable = 0> {
foreach m = MxListW in {
defvar mx = m.MX;
- defm "" : VPseudoBinaryW_VV<m>,
+ defm "" : VPseudoBinaryW_VV<m, Commutable=Commutable>,
SchedBinary<"WriteVIWMulV", "ReadVIWMulV", "ReadVIWMulV", mx,
forceMergeOpRead=true>;
defm "" : VPseudoBinaryW_VX<m>,
@@ -3072,7 +3080,7 @@ multiclass VPseudoVMRG_VM_XM_IM {
multiclass VPseudoVCALU_VM_XM_IM {
foreach m = MxList in {
defvar mx = m.MX;
- defm "" : VPseudoTiedBinaryV_VM<m>,
+ defm "" : VPseudoTiedBinaryV_VM<m, Commutable=1>,
SchedBinary<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV", mx,
forceMergeOpRead=true>;
defm "" : VPseudoTiedBinaryV_XM<m>,
@@ -3285,10 +3293,10 @@ multiclass VPseudoTernaryV_VF_AAXA_RM<LMULInfo m, FPR_Info f,
sew, Commutable=1>;
}
-multiclass VPseudoTernaryW_VV<LMULInfo m> {
+multiclass VPseudoTernaryW_VV<LMULInfo m, bit Commutable = 0> {
defvar constraint = "@earlyclobber $rd";
defm _VV : VPseudoTernaryWithPolicy<m.wvrclass, m.vrclass, m.vrclass, m,
- constraint, /*Commutable*/ 0, TargetConstraintType=3>;
+ constraint, Commutable=Commutable, TargetConstraintType=3>;
}
multiclass VPseudoTernaryW_VV_RM<LMULInfo m, int sew = 0> {
@@ -3378,10 +3386,10 @@ multiclass VPseudoVSLD_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
}
}
-multiclass VPseudoVWMAC_VV_VX {
+multiclass VPseudoVWMAC_VV_VX<bit Commutable = 0> {
foreach m = MxListW in {
defvar mx = m.MX;
- defm "" : VPseudoTernaryW_VV<m>,
+ defm "" : VPseudoTernaryW_VV<m, Commutable=Commutable>,
SchedTernary<"WriteVIWMulAddV", "ReadVIWMulAddV", "ReadVIWMulAddV",
"ReadVIWMulAddV", mx>;
defm "" : VPseudoTernaryW_VX<m>,
@@ -3434,10 +3442,10 @@ multiclass VPseudoVWMAC_VV_VF_BF_RM {
}
}
-multiclass VPseudoVCMPM_VV_VX_VI {
+multiclass VPseudoVCMPM_VV_VX_VI<bit Commutable = 0> {
foreach m = MxList in {
defvar mx = m.MX;
- defm "" : VPseudoBinaryM_VV<m, TargetConstraintType=2>,
+ defm "" : VPseudoBinaryM_VV<m, TargetConstraintType=2, Commutable=Commutable>,
SchedBinary<"WriteVICmpV", "ReadVICmpV", "ReadVICmpV", mx>;
defm "" : VPseudoBinaryM_VX<m, TargetConstraintType=2>,
SchedBinary<"WriteVICmpX", "ReadVICmpV", "ReadVICmpX", mx>;
@@ -3580,12 +3588,14 @@ multiclass VPseudoConversion<VReg RetClass,
VReg Op1Class,
LMULInfo MInfo,
string Constraint = "",
+ int sew = 0,
int TargetConstraintType = 1> {
+ defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX);
let VLMul = MInfo.value in {
- def "_" # MInfo.MX : VPseudoUnaryNoMask<RetClass, Op1Class, Constraint, TargetConstraintType>;
- def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMask<RetClass, Op1Class,
- Constraint, TargetConstraintType>,
- RISCVMaskedPseudo<MaskIdx=2>;
+ def suffix : VPseudoUnaryNoMask<RetClass, Op1Class, Constraint, TargetConstraintType>;
+ def suffix # "_MASK" : VPseudoUnaryMask<RetClass, Op1Class,
+ Constraint, TargetConstraintType>,
+ RISCVMaskedPseudo<MaskIdx=2>;
}
}
@@ -3593,12 +3603,15 @@ multiclass VPseudoConversionRoundingMode<VReg RetClass,
VReg Op1Class,
LMULInfo MInfo,
string Constraint = "",
+ int sew = 0,
int TargetConstraintType = 1> {
let VLMul = MInfo.value in {
- def "_" # MInfo.MX : VPseudoUnaryNoMaskRoundingMode<RetClass, Op1Class, Constraint, TargetConstraintType>;
- def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMaskRoundingMode<RetClass, Op1Class,
- Constraint, TargetConstraintType>,
- RISCVMaskedPseudo<MaskIdx=2>;
+ defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX);
+ def suffix : VPseudoUnaryNoMaskRoundingMode<RetClass, Op1Class, Constraint, TargetConstraintType>;
+ def suffix # "_MASK" : VPseudoUnaryMaskRoundingMode<RetClass, Op1Class,
+ Constraint,
+ TargetConstraintType>,
+ RISCVMaskedPseudo<MaskIdx=2>;
}
}
@@ -3607,13 +3620,15 @@ multiclass VPseudoConversionRM<VReg RetClass,
VReg Op1Class,
LMULInfo MInfo,
string Constraint = "",
+ int sew = 0,
int TargetConstraintType = 1> {
let VLMul = MInfo.value in {
- def "_" # MInfo.MX : VPseudoUnaryNoMask_FRM<RetClass, Op1Class,
- Constraint, TargetConstraintType>;
- def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMask_FRM<RetClass, Op1Class,
- Constraint, TargetConstraintType>,
- RISCVMaskedPseudo<MaskIdx=2>;
+ defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX);
+ def suffix : VPseudoUnaryNoMask_FRM<RetClass, Op1Class,
+ Constraint, TargetConstraintType>;
+ def suffix # "_MASK" : VPseudoUnaryMask_FRM<RetClass, Op1Class,
+ Constraint, TargetConstraintType>,
+ RISCVMaskedPseudo<MaskIdx=2>;
}
}
@@ -3660,17 +3675,19 @@ multiclass VPseudoVFROUND_NOEXCEPT_V {
multiclass VPseudoVCVTF_V_RM {
foreach m = MxListF in {
- defm _V : VPseudoConversionRoundingMode<m.vrclass, m.vrclass, m>,
- SchedUnary<"WriteVFCvtIToFV", "ReadVFCvtIToFV", m.MX,
- forceMergeOpRead=true>;
+ foreach e = SchedSEWSet<m.MX, isF=1>.val in
+ defm _V : VPseudoConversionRoundingMode<m.vrclass, m.vrclass, m, sew=e>,
+ SchedUnary<"WriteVFCvtIToFV", "ReadVFCvtIToFV", m.MX, e,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVCVTF_RM_V {
foreach m = MxListF in {
- defm _V : VPseudoConversionRM<m.vrclass, m.vrclass, m>,
- SchedUnary<"WriteVFCvtIToFV", "ReadVFCvtIToFV", m.MX,
- forceMergeOpRead=true>;
+ foreach e = SchedSEWSet<m.MX, isF=1>.val in
+ defm _V : VPseudoConversionRM<m.vrclass, m.vrclass, m, sew=e>,
+ SchedUnary<"WriteVFCvtIToFV", "ReadVFCvtIToFV", m.MX, e,
+ forceMergeOpRead=true>;
}
}
@@ -3704,18 +3721,22 @@ multiclass VPseudoVWCVTI_RM_V {
multiclass VPseudoVWCVTF_V {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListW in {
- defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint, TargetConstraintType=3>,
- SchedUnary<"WriteVFWCvtIToFV", "ReadVFWCvtIToFV", m.MX,
- forceMergeOpRead=true>;
+ foreach e = SchedSEWSet<m.MX, isF=0, isWidening=1>.val in
+ defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint, sew=e,
+ TargetConstraintType=3>,
+ SchedUnary<"WriteVFWCvtIToFV", "ReadVFWCvtIToFV", m.MX, e,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVWCVTD_V {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListFW in {
- defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint, TargetConstraintType=3>,
- SchedUnary<"WriteVFWCvtFToFV", "ReadVFWCvtFToFV", m.MX,
- forceMergeOpRead=true>;
+ foreach e = SchedSEWSet<m.MX, isF=1, isWidening=1>.val in
+ defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint, sew=e,
+ TargetConstraintType=3>,
+ SchedUnary<"WriteVFWCvtFToFV", "ReadVFWCvtFToFV", m.MX, e,
+ forceMergeOpRead=true>;
}
}
@@ -3749,36 +3770,45 @@ multiclass VPseudoVNCVTI_RM_W {
multiclass VPseudoVNCVTF_W_RM {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListFW in {
- defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m, constraint, TargetConstraintType=2>,
- SchedUnary<"WriteVFNCvtIToFV", "ReadVFNCvtIToFV", m.MX,
- forceMergeOpRead=true>;
+ foreach e = SchedSEWSet<m.MX, isF=1, isWidening=1>.val in
+ defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m,
+ constraint, sew=e,
+ TargetConstraintType=2>,
+ SchedUnary<"WriteVFNCvtIToFV", "ReadVFNCvtIToFV", m.MX, e,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVNCVTF_RM_W {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListFW in {
- defm _W : VPseudoConversionRM<m.vrclass, m.wvrclass, m, constraint>,
- SchedUnary<"WriteVFNCvtIToFV", "ReadVFNCvtIToFV", m.MX,
- forceMergeOpRead=true>;
+ foreach e = SchedSEWSet<m.MX, isF=1, isWidening=1>.val in
+ defm _W : VPseudoConversionRM<m.vrclass, m.wvrclass, m, constraint, sew=e>,
+ SchedUnary<"WriteVFNCvtIToFV", "ReadVFNCvtIToFV", m.MX, e,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVNCVTD_W {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListFW in {
- defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint, TargetConstraintType=2>,
- SchedUnary<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV", m.MX,
- forceMergeOpRead=true>;
+ foreach e = SchedSEWSet<m.MX, isF=1, isWidening=1>.val in
+ defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint, sew=e,
+ TargetConstraintType=2>,
+ SchedUnary<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV", m.MX, e,
+ forceMergeOpRead=true>;
}
}
multiclass VPseudoVNCVTD_W_RM {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListFW in {
- defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m, constraint, TargetConstraintType=2>,
- SchedUnary<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV", m.MX,
- forceMergeOpRead=true>;
+ foreach e = SchedSEWSet<m.MX, isF=1, isWidening=1>.val in
+ defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m,
+ constraint, sew=e,
+ TargetConstraintType=2>,
+ SchedUnary<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV", m.MX, e,
+ forceMergeOpRead=true>;
}
}
@@ -4889,14 +4919,17 @@ multiclass VPatConversionTA<string intrinsic,
ValueType result_type,
ValueType op1_type,
ValueType mask_type,
- int sew,
+ int log2sew,
LMULInfo vlmul,
VReg result_reg_class,
- VReg op1_reg_class> {
+ VReg op1_reg_class,
+ bit isSEWAware = 0> {
def : VPatUnaryNoMask<intrinsic, inst, kind, result_type, op1_type,
- sew, vlmul, result_reg_class, op1_reg_class>;
+ log2sew, vlmul, result_reg_class, op1_reg_class,
+ isSEWAware>;
def : VPatUnaryMask<intrinsic, inst, kind, result_type, op1_type,
- mask_type, sew, vlmul, result_reg_class, op1_reg_class>;
+ mask_type, log2sew, vlmul, result_reg_class, op1_reg_class,
+ isSEWAware>;
}
multiclass VPatConversionTARoundingMode<string intrinsic,
@@ -4905,14 +4938,17 @@ multiclass VPatConversionTARoundingMode<string intrinsic,
ValueType result_type,
ValueType op1_type,
ValueType mask_type,
- int sew,
+ int log2sew,
LMULInfo vlmul,
VReg result_reg_class,
- VReg op1_reg_class> {
+ VReg op1_reg_class,
+ bit isSEWAware = 0> {
def : VPatUnaryNoMaskRoundingMode<intrinsic, inst, kind, result_type, op1_type,
- sew, vlmul, result_reg_class, op1_reg_class>;
+ log2sew, vlmul, result_reg_class,
+ op1_reg_class, isSEWAware>;
def : VPatUnaryMaskRoundingMode<intrinsic, inst, kind, result_type, op1_type,
- mask_type, sew, vlmul, result_reg_class, op1_reg_class>;
+ mask_type, log2sew, vlmul, result_reg_class,
+ op1_reg_class, isSEWAware>;
}
multiclass VPatBinaryV_VV<string intrinsic, string instruction,
@@ -5905,15 +5941,16 @@ multiclass VPatConversionVI_VF_RM<string intrinsic,
}
}
-multiclass VPatConversionVF_VI_RM<string intrinsic,
- string instruction> {
+multiclass VPatConversionVF_VI_RM<string intrinsic, string instruction,
+ bit isSEWAware = 0> {
foreach fvti = AllFloatVectors in {
defvar ivti = GetIntVTypeInfo<fvti>.Vti;
let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
GetVTypePredicates<ivti>.Predicates) in
defm : VPatConversionTARoundingMode<intrinsic, instruction, "V",
fvti.Vector, ivti.Vector, fvti.Mask, ivti.Log2SEW,
- ivti.LMul, fvti.RegClass, ivti.RegClass>;
+ ivti.LMul, fvti.RegClass, ivti.RegClass,
+ isSEWAware>;
}
}
@@ -5941,7 +5978,8 @@ multiclass VPatConversionWI_VF_RM<string intrinsic, string instruction> {
}
}
-multiclass VPatConversionWF_VI<string intrinsic, string instruction> {
+multiclass VPatConversionWF_VI<string intrinsic, string instruction,
+ bit isSEWAware = 0> {
foreach vtiToWti = AllWidenableIntToFloatVectors in {
defvar vti = vtiToWti.Vti;
defvar fwti = vtiToWti.Wti;
@@ -5949,11 +5987,12 @@ multiclass VPatConversionWF_VI<string intrinsic, string instruction> {
GetVTypePredicates<fwti>.Predicates) in
defm : VPatConversionTA<intrinsic, instruction, "V",
fwti.Vector, vti.Vector, fwti.Mask, vti.Log2SEW,
- vti.LMul, fwti.RegClass, vti.RegClass>;
+ vti.LMul, fwti.RegClass, vti.RegClass, isSEWAware>;
}
}
-multiclass VPatConversionWF_VF<string intrinsic, string instruction> {
+multiclass VPatConversionWF_VF<string intrinsic, string instruction,
+ bit isSEWAware = 0> {
foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar fwti = fvtiToFWti.Wti;
@@ -5963,11 +6002,12 @@ multiclass VPatConversionWF_VF<string intrinsic, string instruction> {
GetVTypePredicates<fwti>.Predicates)) in
defm : VPatConversionTA<intrinsic, instruction, "V",
fwti.Vector, fvti.Vector, fwti.Mask, fvti.Log2SEW,
- fvti.LMul, fwti.RegClass, fvti.RegClass>;
+ fvti.LMul, fwti.RegClass, fvti.RegClass, isSEWAware>;
}
}
-multiclass VPatConversionWF_VF_BF <string intrinsic, string instruction> {
+multiclass VPatConversionWF_VF_BF <string intrinsic, string instruction,
+ bit isSEWAware = 0> {
foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in
{
defvar fvti = fvtiToFWti.Vti;
@@ -5976,7 +6016,7 @@ multiclass VPatConversionWF_VF_BF <string intrinsic, string instruction> {
GetVTypePredicates<fwti>.Predicates) in
defm : VPatConversionTA<intrinsic, instruction, "V",
fwti.Vector, fvti.Vector, fwti.Mask, fvti.Log2SEW,
- fvti.LMul, fwti.RegClass, fvti.RegClass>;
+ fvti.LMul, fwti.RegClass, fvti.RegClass, isSEWAware>;
}
}
@@ -6004,7 +6044,8 @@ multiclass VPatConversionVI_WF_RM <string intrinsic, string instruction> {
}
}
-multiclass VPatConversionVF_WI_RM <string intrinsic, string instruction> {
+multiclass VPatConversionVF_WI_RM <string intrinsic, string instruction,
+ bit isSEWAware = 0> {
foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar iwti = GetIntVTypeInfo<fvtiToFWti.Wti>.Vti;
@@ -6012,11 +6053,13 @@ multiclass VPatConversionVF_WI_RM <string intrinsic, string instruction> {
GetVTypePredicates<iwti>.Predicates) in
defm : VPatConversionTARoundingMode<intrinsic, instruction, "W",
fvti.Vector, iwti.Vector, fvti.Mask, fvti.Log2SEW,
- fvti.LMul, fvti.RegClass, iwti.RegClass>;
+ fvti.LMul, fvti.RegClass, iwti.RegClass,
+ isSEWAware>;
}
}
-multiclass VPatConversionVF_WF <string intrinsic, string instruction> {
+multiclass VPatConversionVF_WF<string intrinsic, string instruction,
+ bit isSEWAware = 0> {
foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar fwti = fvtiToFWti.Wti;
@@ -6024,12 +6067,13 @@ multiclass VPatConversionVF_WF <string intrinsic, string instruction> {
GetVTypePredicates<fwti>.Predicates) in
defm : VPatConversionTA<intrinsic, instruction, "W",
fvti.Vector, fwti.Vector, fvti.Mask, fvti.Log2SEW,
- fvti.LMul, fvti.RegClass, fwti.RegClass>;
+ fvti.LMul, fvti.RegClass, fwti.RegClass, isSEWAware>;
}
}
-multiclass VPatConversionVF_WF_RM <string intrinsic, string instruction,
- list<VTypeInfoToWide> wlist = AllWidenableFloatVectors> {
+multiclass VPatConversionVF_WF_RM<string intrinsic, string instruction,
+ list<VTypeInfoToWide> wlist = AllWidenableFloatVectors,
+ bit isSEWAware = 0> {
foreach fvtiToFWti = wlist in {
defvar fvti = fvtiToFWti.Vti;
defvar fwti = fvtiToFWti.Wti;
@@ -6037,11 +6081,13 @@ multiclass VPatConversionVF_WF_RM <string intrinsic, string instruction,
GetVTypePredicates<fwti>.Predicates) in
defm : VPatConversionTARoundingMode<intrinsic, instruction, "W",
fvti.Vector, fwti.Vector, fvti.Mask, fvti.Log2SEW,
- fvti.LMul, fvti.RegClass, fwti.RegClass>;
+ fvti.LMul, fvti.RegClass, fwti.RegClass,
+ isSEWAware>;
}
}
-multiclass VPatConversionVF_WF_BF_RM <string intrinsic, string instruction> {
+multiclass VPatConversionVF_WF_BF_RM <string intrinsic, string instruction,
+ bit isSEWAware = 0> {
foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar fwti = fvtiToFWti.Wti;
@@ -6049,7 +6095,8 @@ multiclass VPatConversionVF_WF_BF_RM <string intrinsic, string instruction> {
GetVTypePredicates<fwti>.Predicates) in
defm : VPatConversionTARoundingMode<intrinsic, instruction, "W",
fvti.Vector, fwti.Vector, fvti.Mask, fvti.Log2SEW,
- fvti.LMul, fvti.RegClass, fwti.RegClass>;
+ fvti.LMul, fvti.RegClass, fwti.RegClass,
+ isSEWAware>;
}
}
@@ -6207,7 +6254,7 @@ defm PseudoVLSEG : VPseudoUSSegLoadFF;
//===----------------------------------------------------------------------===//
// 11.1. Vector Single-Width Integer Add and Subtract
//===----------------------------------------------------------------------===//
-defm PseudoVADD : VPseudoVALU_VV_VX_VI;
+defm PseudoVADD : VPseudoVALU_VV_VX_VI<Commutable=1>;
defm PseudoVSUB : VPseudoVALU_VV_VX;
defm PseudoVRSUB : VPseudoVALU_VX_VI;
@@ -6272,9 +6319,9 @@ foreach vti = AllIntegerVectors in {
//===----------------------------------------------------------------------===//
// 11.2. Vector Widening Integer Add/Subtract
//===----------------------------------------------------------------------===//
-defm PseudoVWADDU : VPseudoVWALU_VV_VX;
+defm PseudoVWADDU : VPseudoVWALU_VV_VX<Commutable=1>;
defm PseudoVWSUBU : VPseudoVWALU_VV_VX;
-defm PseudoVWADD : VPseudoVWALU_VV_VX;
+defm PseudoVWADD : VPseudoVWALU_VV_VX<Commutable=1>;
defm PseudoVWSUB : VPseudoVWALU_VV_VX;
defm PseudoVWADDU : VPseudoVWALU_WV_WX;
defm PseudoVWSUBU : VPseudoVWALU_WV_WX;
@@ -6305,9 +6352,9 @@ defm PseudoVMSBC : VPseudoVCALUM_V_X<"@earlyclobber $rd">;
//===----------------------------------------------------------------------===//
// 11.5. Vector Bitwise Logical Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVAND : VPseudoVALU_VV_VX_VI;
-defm PseudoVOR : VPseudoVALU_VV_VX_VI;
-defm PseudoVXOR : VPseudoVALU_VV_VX_VI;
+defm PseudoVAND : VPseudoVALU_VV_VX_VI<Commutable=1>;
+defm PseudoVOR : VPseudoVALU_VV_VX_VI<Commutable=1>;
+defm PseudoVXOR : VPseudoVALU_VV_VX_VI<Commutable=1>;
//===----------------------------------------------------------------------===//
// 11.6. Vector Single-Width Bit Shift Instructions
@@ -6325,8 +6372,8 @@ defm PseudoVNSRA : VPseudoVNSHT_WV_WX_WI;
//===----------------------------------------------------------------------===//
// 11.8. Vector Integer Comparison Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVMSEQ : VPseudoVCMPM_VV_VX_VI;
-defm PseudoVMSNE : VPseudoVCMPM_VV_VX_VI;
+defm PseudoVMSEQ : VPseudoVCMPM_VV_VX_VI<Commutable=1>;
+defm PseudoVMSNE : VPseudoVCMPM_VV_VX_VI<Commutable=1>;
defm PseudoVMSLTU : VPseudoVCMPM_VV_VX;
defm PseudoVMSLT : VPseudoVCMPM_VV_VX;
defm PseudoVMSLEU : VPseudoVCMPM_VV_VX_VI;
@@ -6345,9 +6392,9 @@ defm PseudoVMAX : VPseudoVMINMAX_VV_VX;
//===----------------------------------------------------------------------===//
// 11.10. Vector Single-Width Integer Multiply Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVMUL : VPseudoVMUL_VV_VX;
-defm PseudoVMULH : VPseudoVMUL_VV_VX;
-defm PseudoVMULHU : VPseudoVMUL_VV_VX;
+defm PseudoVMUL : VPseudoVMUL_VV_VX<Commutable=1>;
+defm PseudoVMULH : VPseudoVMUL_VV_VX<Commutable=1>;
+defm PseudoVMULHU : VPseudoVMUL_VV_VX<Commutable=1>;
defm PseudoVMULHSU : VPseudoVMUL_VV_VX;
//===----------------------------------------------------------------------===//
@@ -6361,8 +6408,8 @@ defm PseudoVREM : VPseudoVDIV_VV_VX;
//===----------------------------------------------------------------------===//
// 11.12. Vector Widening Integer Multiply Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVWMUL : VPseudoVWMUL_VV_VX;
-defm PseudoVWMULU : VPseudoVWMUL_VV_VX;
+defm PseudoVWMUL : VPseudoVWMUL_VV_VX<Commutable=1>;
+defm PseudoVWMULU : VPseudoVWMUL_VV_VX<Commutable=1>;
defm PseudoVWMULSU : VPseudoVWMUL_VV_VX;
//===----------------------------------------------------------------------===//
@@ -6376,8 +6423,8 @@ defm PseudoVNMSUB : VPseudoVMAC_VV_VX_AAXA;
//===----------------------------------------------------------------------===//
// 11.14. Vector Widening Integer Multiply-Add Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVWMACCU : VPseudoVWMAC_VV_VX;
-defm PseudoVWMACC : VPseudoVWMAC_VV_VX;
+defm PseudoVWMACCU : VPseudoVWMAC_VV_VX<Commutable=1>;
+defm PseudoVWMACC : VPseudoVWMAC_VV_VX<Commutable=1>;
defm PseudoVWMACCSU : VPseudoVWMAC_VV_VX;
defm PseudoVWMACCUS : VPseudoVWMAC_VX;
@@ -7197,15 +7244,20 @@ defm : VPatUnaryV_V_RM<"int_riscv_vfrec7", "PseudoVFREC7", AllFloatVectors, isSE
//===----------------------------------------------------------------------===//
// 13.11. Vector Floating-Point Min/Max Instructions
//===----------------------------------------------------------------------===//
-defm : VPatBinaryV_VV_VX<"int_riscv_vfmin", "PseudoVFMIN", AllFloatVectors>;
-defm : VPatBinaryV_VV_VX<"int_riscv_vfmax", "PseudoVFMAX", AllFloatVectors>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vfmin", "PseudoVFMIN", AllFloatVectors,
+ isSEWAware=1>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vfmax", "PseudoVFMAX", AllFloatVectors,
+ isSEWAware=1>;
//===----------------------------------------------------------------------===//
// 13.12. Vector Floating-Point Sign-Injection Instructions
//===----------------------------------------------------------------------===//
-defm : VPatBinaryV_VV_VX<"int_riscv_vfsgnj", "PseudoVFSGNJ", AllFloatVectors>;
-defm : VPatBinaryV_VV_VX<"int_riscv_vfsgnjn", "PseudoVFSGNJN", AllFloatVectors>;
-defm : VPatBinaryV_VV_VX<"int_riscv_vfsgnjx", "PseudoVFSGNJX", AllFloatVectors>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vfsgnj", "PseudoVFSGNJ", AllFloatVectors,
+ isSEWAware=1>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vfsgnjn", "PseudoVFSGNJN", AllFloatVectors,
+ isSEWAware=1>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vfsgnjx", "PseudoVFSGNJX", AllFloatVectors,
+ isSEWAware=1>;
//===----------------------------------------------------------------------===//
// 13.13. Vector Floating-Point Compare Instructions
@@ -7269,8 +7321,10 @@ defm : VPatConversionVI_VF_RM<"int_riscv_vfcvt_x_f_v", "PseudoVFCVT_X_F">;
defm : VPatConversionVI_VF_RM<"int_riscv_vfcvt_xu_f_v", "PseudoVFCVT_XU_F">;
defm : VPatConversionVI_VF<"int_riscv_vfcvt_rtz_xu_f_v", "PseudoVFCVT_RTZ_XU_F">;
defm : VPatConversionVI_VF<"int_riscv_vfcvt_rtz_x_f_v", "PseudoVFCVT_RTZ_X_F">;
-defm : VPatConversionVF_VI_RM<"int_riscv_vfcvt_f_x_v", "PseudoVFCVT_F_X">;
-defm : VPatConversionVF_VI_RM<"int_riscv_vfcvt_f_xu_v", "PseudoVFCVT_F_XU">;
+defm : VPatConversionVF_VI_RM<"int_riscv_vfcvt_f_x_v", "PseudoVFCVT_F_X",
+ isSEWAware=1>;
+defm : VPatConversionVF_VI_RM<"int_riscv_vfcvt_f_xu_v", "PseudoVFCVT_F_XU",
+ isSEWAware=1>;
//===----------------------------------------------------------------------===//
// 13.18. Widening Floating-Point/Integer Type-Convert Instructions
@@ -7279,11 +7333,14 @@ defm : VPatConversionWI_VF_RM<"int_riscv_vfwcvt_xu_f_v", "PseudoVFWCVT_XU_F">;
defm : VPatConversionWI_VF_RM<"int_riscv_vfwcvt_x_f_v", "PseudoVFWCVT_X_F">;
defm : VPatConversionWI_VF<"int_riscv_vfwcvt_rtz_xu_f_v", "PseudoVFWCVT_RTZ_XU_F">;
defm : VPatConversionWI_VF<"int_riscv_vfwcvt_rtz_x_f_v", "PseudoVFWCVT_RTZ_X_F">;
-defm : VPatConversionWF_VI<"int_riscv_vfwcvt_f_xu_v", "PseudoVFWCVT_F_XU">;
-defm : VPatConversionWF_VI<"int_riscv_vfwcvt_f_x_v", "PseudoVFWCVT_F_X">;
-defm : VPatConversionWF_VF<"int_riscv_vfwcvt_f_f_v", "PseudoVFWCVT_F_F">;
+defm : VPatConversionWF_VI<"int_riscv_vfwcvt_f_xu_v", "PseudoVFWCVT_F_XU",
+ isSEWAware=1>;
+defm : VPatConversionWF_VI<"int_riscv_vfwcvt_f_x_v", "PseudoVFWCVT_F_X",
+ isSEWAware=1>;
+defm : VPatConversionWF_VF<"int_riscv_vfwcvt_f_f_v", "PseudoVFWCVT_F_F",
+ isSEWAware=1>;
defm : VPatConversionWF_VF_BF<"int_riscv_vfwcvtbf16_f_f_v",
- "PseudoVFWCVTBF16_F_F">;
+ "PseudoVFWCVTBF16_F_F", isSEWAware=1>;
//===----------------------------------------------------------------------===//
// 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions
@@ -7292,21 +7349,24 @@ defm : VPatConversionVI_WF_RM<"int_riscv_vfncvt_xu_f_w", "PseudoVFNCVT_XU_F">;
defm : VPatConversionVI_WF_RM<"int_riscv_vfncvt_x_f_w", "PseudoVFNCVT_X_F">;
defm : VPatConversionVI_WF<"int_riscv_vfncvt_rtz_xu_f_w", "PseudoVFNCVT_RTZ_XU_F">;
defm : VPatConversionVI_WF<"int_riscv_vfncvt_rtz_x_f_w", "PseudoVFNCVT_RTZ_X_F">;
-defm : VPatConversionVF_WI_RM <"int_riscv_vfncvt_f_xu_w", "PseudoVFNCVT_F_XU">;
-defm : VPatConversionVF_WI_RM <"int_riscv_vfncvt_f_x_w", "PseudoVFNCVT_F_X">;
+defm : VPatConversionVF_WI_RM<"int_riscv_vfncvt_f_xu_w", "PseudoVFNCVT_F_XU",
+ isSEWAware=1>;
+defm : VPatConversionVF_WI_RM<"int_riscv_vfncvt_f_x_w", "PseudoVFNCVT_F_X",
+ isSEWAware=1>;
defvar WidenableFloatVectorsExceptF16 = !filter(fvtiToFWti, AllWidenableFloatVectors,
!ne(fvtiToFWti.Vti.Scalar, f16));
defm : VPatConversionVF_WF_RM<"int_riscv_vfncvt_f_f_w", "PseudoVFNCVT_F_F",
- WidenableFloatVectorsExceptF16>;
+ WidenableFloatVectorsExceptF16, isSEWAware=1>;
// Define vfncvt.f.f.w for f16 when Zvfhmin is enable.
defvar F16WidenableFloatVectors = !filter(fvtiToFWti, AllWidenableFloatVectors,
!eq(fvtiToFWti.Vti.Scalar, f16));
let Predicates = [HasVInstructionsF16Minimal] in
defm : VPatConversionVF_WF_RM<"int_riscv_vfncvt_f_f_w", "PseudoVFNCVT_F_F",
- F16WidenableFloatVectors>;
+ F16WidenableFloatVectors, isSEWAware=1>;
defm : VPatConversionVF_WF_BF_RM<"int_riscv_vfncvtbf16_f_f_w",
- "PseudoVFNCVTBF16_F_F">;
-defm : VPatConversionVF_WF<"int_riscv_vfncvt_rod_f_f_w", "PseudoVFNCVT_ROD_F_F">;
+ "PseudoVFNCVTBF16_F_F", isSEWAware=1>;
+defm : VPatConversionVF_WF<"int_riscv_vfncvt_rod_f_f_w", "PseudoVFNCVT_ROD_F_F",
+ isSEWAware=1>;
//===----------------------------------------------------------------------===//
// 14. Vector Reduction Operations
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index b6cd6dc..3397d55 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -410,7 +410,7 @@ multiclass VPatConvertI2FPSDNode_V_RM<SDPatternOperator vop,
let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
GetVTypePredicates<ivti>.Predicates) in
def : Pat<(fvti.Vector (vop (ivti.Vector ivti.RegClass:$rs1))),
- (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX)
+ (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_E"#fvti.SEW)
(fvti.Vector (IMPLICIT_DEF)),
ivti.RegClass:$rs1,
// Value to indicate no rounding mode change in
@@ -441,7 +441,7 @@ multiclass VPatWConvertI2FPSDNode_V<SDPatternOperator vop,
let Predicates = !listconcat(GetVTypePredicates<ivti>.Predicates,
GetVTypePredicates<fwti>.Predicates) in
def : Pat<(fwti.Vector (vop (ivti.Vector ivti.RegClass:$rs1))),
- (!cast<Instruction>(instruction_name#"_"#ivti.LMul.MX)
+ (!cast<Instruction>(instruction_name#"_"#ivti.LMul.MX#"_E"#ivti.SEW)
(fwti.Vector (IMPLICIT_DEF)),
ivti.RegClass:$rs1,
ivti.AVL, ivti.Log2SEW, TA_MA)>;
@@ -470,7 +470,7 @@ multiclass VPatNConvertI2FPSDNode_W_RM<SDPatternOperator vop,
let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
GetVTypePredicates<iwti>.Predicates) in
def : Pat<(fvti.Vector (vop (iwti.Vector iwti.RegClass:$rs1))),
- (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX)
+ (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_E"#fvti.SEW)
(fvti.Vector (IMPLICIT_DEF)),
iwti.RegClass:$rs1,
// Value to indicate no rounding mode change in
@@ -1339,42 +1339,42 @@ foreach vti = AllFloatVectors in {
// 13.12. Vector Floating-Point Sign-Injection Instructions
def : Pat<(fabs (vti.Vector vti.RegClass:$rs)),
- (!cast<Instruction>("PseudoVFSGNJX_VV_"# vti.LMul.MX)
+ (!cast<Instruction>("PseudoVFSGNJX_VV_"# vti.LMul.MX#"_E"#vti.SEW)
(vti.Vector (IMPLICIT_DEF)),
vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TA_MA)>;
// Handle fneg with VFSGNJN using the same input for both operands.
def : Pat<(fneg (vti.Vector vti.RegClass:$rs)),
- (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX)
+ (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX#"_E"#vti.SEW)
(vti.Vector (IMPLICIT_DEF)),
vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TA_MA)>;
def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
(vti.Vector vti.RegClass:$rs2))),
- (!cast<Instruction>("PseudoVFSGNJ_VV_"# vti.LMul.MX)
+ (!cast<Instruction>("PseudoVFSGNJ_VV_"# vti.LMul.MX#"_E"#vti.SEW)
(vti.Vector (IMPLICIT_DEF)),
vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
(vti.Vector (SplatFPOp vti.ScalarRegClass:$rs2)))),
- (!cast<Instruction>("PseudoVFSGNJ_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ (!cast<Instruction>("PseudoVFSGNJ_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_E"#vti.SEW)
(vti.Vector (IMPLICIT_DEF)),
vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
(vti.Vector (fneg vti.RegClass:$rs2)))),
- (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX)
+ (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX#"_E"#vti.SEW)
(vti.Vector (IMPLICIT_DEF)),
vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
(vti.Vector (fneg (SplatFPOp vti.ScalarRegClass:$rs2))))),
- (!cast<Instruction>("PseudoVFSGNJN_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ (!cast<Instruction>("PseudoVFSGNJN_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_E"#vti.SEW)
(vti.Vector (IMPLICIT_DEF)),
vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
}
}
// 13.11. Vector Floating-Point MIN/MAX Instructions
-defm : VPatBinaryFPSDNode_VV_VF<fminnum, "PseudoVFMIN">;
-defm : VPatBinaryFPSDNode_VV_VF<fmaxnum, "PseudoVFMAX">;
+defm : VPatBinaryFPSDNode_VV_VF<fminnum, "PseudoVFMIN", isSEWAware=1>;
+defm : VPatBinaryFPSDNode_VV_VF<fmaxnum, "PseudoVFMAX", isSEWAware=1>;
// 13.13. Vector Floating-Point Compare Instructions
defm : VPatFPSetCCSDNode_VV_VF_FV<SETEQ, "PseudoVMFEQ", "PseudoVMFEQ">;
@@ -1445,7 +1445,7 @@ foreach fvtiToFWti = AllWidenableFloatVectors in {
!listconcat(GetVTypePredicates<fvti>.Predicates,
GetVTypePredicates<fwti>.Predicates)) in
def : Pat<(fvti.Vector (fpround (fwti.Vector fwti.RegClass:$rs1))),
- (!cast<Instruction>("PseudoVFNCVT_F_F_W_"#fvti.LMul.MX)
+ (!cast<Instruction>("PseudoVFNCVT_F_F_W_"#fvti.LMul.MX#"_E"#fvti.SEW)
(fvti.Vector (IMPLICIT_DEF)),
fwti.RegClass:$rs1,
// Value to indicate no rounding mode change in
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 6fde30a..42fee1a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -1229,7 +1229,7 @@ multiclass VPatConvertI2FPVL_V_RM<SDPatternOperator vop, string instruction_name
def : Pat<(fvti.Vector (vop (ivti.Vector ivti.RegClass:$rs1),
(ivti.Mask VMV0:$vm),
VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_MASK")
+ (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK")
(fvti.Vector (IMPLICIT_DEF)), ivti.RegClass:$rs1,
(ivti.Mask VMV0:$vm),
// Value to indicate no rounding mode change in
@@ -1247,7 +1247,7 @@ multiclass VPatConvertI2FP_RM_VL_V<SDNode vop, string instruction_name> {
def : Pat<(fvti.Vector (vop (ivti.Vector ivti.RegClass:$rs1),
(ivti.Mask VMV0:$vm), (XLenVT timm:$frm),
VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_MASK")
+ (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK")
(fvti.Vector (IMPLICIT_DEF)), ivti.RegClass:$rs1,
(ivti.Mask VMV0:$vm), timm:$frm, GPR:$vl, fvti.Log2SEW, TA_MA)>;
}
@@ -1315,7 +1315,7 @@ multiclass VPatWConvertI2FPVL_V<SDPatternOperator vop,
def : Pat<(fwti.Vector (vop (ivti.Vector ivti.RegClass:$rs1),
(ivti.Mask VMV0:$vm),
VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_"#ivti.LMul.MX#"_MASK")
+ (!cast<Instruction>(instruction_name#"_"#ivti.LMul.MX#"_E"#ivti.SEW#"_MASK")
(fwti.Vector (IMPLICIT_DEF)), ivti.RegClass:$rs1,
(ivti.Mask VMV0:$vm),
GPR:$vl, ivti.Log2SEW, TA_MA)>;
@@ -1389,7 +1389,7 @@ multiclass VPatNConvertI2FPVL_W_RM<SDPatternOperator vop,
def : Pat<(fvti.Vector (vop (iwti.Vector iwti.RegClass:$rs1),
(iwti.Mask VMV0:$vm),
VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_MASK")
+ (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK")
(fvti.Vector (IMPLICIT_DEF)), iwti.RegClass:$rs1,
(iwti.Mask VMV0:$vm),
// Value to indicate no rounding mode change in
@@ -1408,7 +1408,7 @@ multiclass VPatNConvertI2FP_RM_VL_W<SDNode vop, string instruction_name> {
def : Pat<(fvti.Vector (vop (iwti.Vector iwti.RegClass:$rs1),
(iwti.Mask VMV0:$vm), (XLenVT timm:$frm),
VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_MASK")
+ (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK")
(fvti.Vector (IMPLICIT_DEF)), iwti.RegClass:$rs1,
(iwti.Mask VMV0:$vm), timm:$frm, GPR:$vl, fvti.Log2SEW, TA_MA)>;
}
@@ -2468,8 +2468,8 @@ defm : VPatWidenFPMulAccVL_VV_VF_RM<riscv_vfwmsub_vl, "PseudoVFWMSAC">;
defm : VPatWidenFPMulAccVL_VV_VF_RM<riscv_vfwnmsub_vl, "PseudoVFWNMSAC">;
// 13.11. Vector Floating-Point MIN/MAX Instructions
-defm : VPatBinaryFPVL_VV_VF<riscv_vfmin_vl, "PseudoVFMIN">;
-defm : VPatBinaryFPVL_VV_VF<riscv_vfmax_vl, "PseudoVFMAX">;
+defm : VPatBinaryFPVL_VV_VF<riscv_vfmin_vl, "PseudoVFMIN", isSEWAware=1>;
+defm : VPatBinaryFPVL_VV_VF<riscv_vfmax_vl, "PseudoVFMAX", isSEWAware=1>;
// 13.13. Vector Floating-Point Compare Instructions
defm : VPatFPSetCCVL_VV_VF_FV<any_riscv_fsetcc_vl, SETEQ,
@@ -2505,14 +2505,14 @@ foreach vti = AllFloatVectors in {
// 13.12. Vector Floating-Point Sign-Injection Instructions
def : Pat<(riscv_fabs_vl (vti.Vector vti.RegClass:$rs), (vti.Mask VMV0:$vm),
VLOpFrag),
- (!cast<Instruction>("PseudoVFSGNJX_VV_"# vti.LMul.MX #"_MASK")
+ (!cast<Instruction>("PseudoVFSGNJX_VV_"# vti.LMul.MX #"_E"#vti.SEW#"_MASK")
(vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs,
vti.RegClass:$rs, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW,
TA_MA)>;
// Handle fneg with VFSGNJN using the same input for both operands.
def : Pat<(riscv_fneg_vl (vti.Vector vti.RegClass:$rs), (vti.Mask VMV0:$vm),
VLOpFrag),
- (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX #"_MASK")
+ (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX#"_E"#vti.SEW #"_MASK")
(vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs,
vti.RegClass:$rs, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW,
TA_MA)>;
@@ -2522,7 +2522,7 @@ foreach vti = AllFloatVectors in {
vti.RegClass:$merge,
(vti.Mask VMV0:$vm),
VLOpFrag),
- (!cast<Instruction>("PseudoVFSGNJ_VV_"# vti.LMul.MX#"_MASK")
+ (!cast<Instruction>("PseudoVFSGNJ_VV_"# vti.LMul.MX#"_E"#vti.SEW#"_MASK")
vti.RegClass:$merge, vti.RegClass:$rs1,
vti.RegClass:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW,
TAIL_AGNOSTIC)>;
@@ -2534,7 +2534,7 @@ foreach vti = AllFloatVectors in {
srcvalue,
(vti.Mask true_mask),
VLOpFrag),
- (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX)
+ (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX#"_E"#vti.SEW)
(vti.Vector (IMPLICIT_DEF)),
vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TA_MA)>;
@@ -2543,7 +2543,7 @@ foreach vti = AllFloatVectors in {
vti.RegClass:$merge,
(vti.Mask VMV0:$vm),
VLOpFrag),
- (!cast<Instruction>("PseudoVFSGNJ_V"#vti.ScalarSuffix#"_"# vti.LMul.MX#"_MASK")
+ (!cast<Instruction>("PseudoVFSGNJ_V"#vti.ScalarSuffix#"_"# vti.LMul.MX#"_E"#vti.SEW#"_MASK")
vti.RegClass:$merge, vti.RegClass:$rs1,
vti.ScalarRegClass:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW,
TAIL_AGNOSTIC)>;
@@ -2672,7 +2672,7 @@ foreach fvtiToFWti = AllWidenableFloatVectors in {
(fvti.Vector fvti.RegClass:$rs1),
(fvti.Mask VMV0:$vm),
VLOpFrag)),
- (!cast<Instruction>("PseudoVFWCVT_F_F_V_"#fvti.LMul.MX#"_MASK")
+ (!cast<Instruction>("PseudoVFWCVT_F_F_V_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK")
(fwti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1,
(fvti.Mask VMV0:$vm),
GPR:$vl, fvti.Log2SEW, TA_MA)>;
@@ -2703,7 +2703,7 @@ foreach fvtiToFWti = AllWidenableFloatVectors in {
def : Pat<(fvti.Vector (any_riscv_fpround_vl
(fwti.Vector fwti.RegClass:$rs1),
(fwti.Mask VMV0:$vm), VLOpFrag)),
- (!cast<Instruction>("PseudoVFNCVT_F_F_W_"#fvti.LMul.MX#"_MASK")
+ (!cast<Instruction>("PseudoVFNCVT_F_F_W_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK")
(fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1,
(fwti.Mask VMV0:$vm),
// Value to indicate no rounding mode change in
@@ -2716,7 +2716,7 @@ foreach fvtiToFWti = AllWidenableFloatVectors in {
def : Pat<(fvti.Vector (any_riscv_fncvt_rod_vl
(fwti.Vector fwti.RegClass:$rs1),
(fwti.Mask VMV0:$vm), VLOpFrag)),
- (!cast<Instruction>("PseudoVFNCVT_ROD_F_F_W_"#fvti.LMul.MX#"_MASK")
+ (!cast<Instruction>("PseudoVFNCVT_ROD_F_F_W_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK")
(fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1,
(fwti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TA_MA)>;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
index 9a6818c..71aa1f1 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
@@ -307,10 +307,16 @@ multiclass VPseudoVC_X<LMULInfo m, DAGOperand RS1Class,
Operand OpClass = payload2> {
let VLMul = m.value in {
let Defs = [VCIX_STATE], Uses = [VCIX_STATE] in {
- def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_X<OpClass, RS1Class>;
- def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_X<OpClass, m.vrclass, RS1Class>;
+ def "PseudoVC_" # NAME # "_SE_" # m.MX
+ : VPseudoVC_X<OpClass, RS1Class>,
+ Sched<[!cast<SchedWrite>("WriteVC_" # NAME # "_" # m.MX)]>;
+ def "PseudoVC_V_" # NAME # "_SE_" # m.MX
+ : VPseudoVC_V_X<OpClass, m.vrclass, RS1Class>,
+ Sched<[!cast<SchedWrite>("WriteVC_V_" # NAME # "_" # m.MX)]>;
}
- def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_X<OpClass, m.vrclass, RS1Class>;
+ def "PseudoVC_V_" # NAME # "_" # m.MX
+ : VPseudoVC_V_X<OpClass, m.vrclass, RS1Class>,
+ Sched<[!cast<SchedWrite>("WriteVC_V_" # NAME # "_" # m.MX)]>;
}
}
@@ -318,10 +324,16 @@ multiclass VPseudoVC_XV<LMULInfo m, DAGOperand RS1Class,
Operand OpClass = payload2> {
let VLMul = m.value in {
let Defs = [VCIX_STATE], Uses = [VCIX_STATE] in {
- def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_XV<OpClass, m.vrclass, RS1Class>;
- def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_XV<OpClass, m.vrclass, m.vrclass, RS1Class>;
+ def "PseudoVC_" # NAME # "_SE_" # m.MX
+ : VPseudoVC_XV<OpClass, m.vrclass, RS1Class>,
+ Sched<[!cast<SchedWrite>("WriteVC_" # NAME # "_" # m.MX)]>;
+ def "PseudoVC_V_" # NAME # "_SE_" # m.MX
+ : VPseudoVC_V_XV<OpClass, m.vrclass, m.vrclass, RS1Class>,
+ Sched<[!cast<SchedWrite>("WriteVC_V_" # NAME # "_" # m.MX)]>;
}
- def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_XV<OpClass, m.vrclass, m.vrclass, RS1Class>;
+ def "PseudoVC_V_" # NAME # "_" # m.MX
+ : VPseudoVC_V_XV<OpClass, m.vrclass, m.vrclass, RS1Class>,
+ Sched<[!cast<SchedWrite>("WriteVC_V_" # NAME # "_" # m.MX)]>;
}
}
@@ -329,10 +341,16 @@ multiclass VPseudoVC_XVV<LMULInfo m, DAGOperand RS1Class,
Operand OpClass = payload2> {
let VLMul = m.value in {
let Defs = [VCIX_STATE], Uses = [VCIX_STATE] in {
- def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>;
- def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>;
+ def "PseudoVC_" # NAME # "_SE_" # m.MX
+ : VPseudoVC_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>,
+ Sched<[!cast<SchedWrite>("WriteVC_" # NAME # "_" # m.MX)]>;
+ def "PseudoVC_V_" # NAME # "_SE_" # m.MX
+ : VPseudoVC_V_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>,
+ Sched<[!cast<SchedWrite>("WriteVC_V_" # NAME # "_" # m.MX)]>;
}
- def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>;
+ def "PseudoVC_V_" # NAME # "_" # m.MX
+ : VPseudoVC_V_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>,
+ Sched<[!cast<SchedWrite>("WriteVC_V_" # NAME # "_" # m.MX)]>;
}
}
@@ -340,11 +358,17 @@ multiclass VPseudoVC_XVW<LMULInfo m, DAGOperand RS1Class,
Operand OpClass = payload2> {
let VLMul = m.value in {
let Defs = [VCIX_STATE], Uses = [VCIX_STATE] in
- def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_XVV<OpClass, m.wvrclass, m.vrclass, RS1Class>;
+ def "PseudoVC_" # NAME # "_SE_" # m.MX
+ : VPseudoVC_XVV<OpClass, m.wvrclass, m.vrclass, RS1Class>,
+ Sched<[!cast<SchedWrite>("WriteVC_" # NAME # "_" # m.MX)]>;
let Constraints = "@earlyclobber $rd, $rd = $rs3" in {
let Defs = [VCIX_STATE], Uses = [VCIX_STATE] in
- def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_XVV<OpClass, m.wvrclass, m.vrclass, RS1Class>;
- def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_XVV<OpClass, m.wvrclass, m.vrclass, RS1Class>;
+ def "PseudoVC_V_" # NAME # "_SE_" # m.MX
+ : VPseudoVC_V_XVV<OpClass, m.wvrclass, m.vrclass, RS1Class>,
+ Sched<[!cast<SchedWrite>("WriteVC_V_" # NAME # "_" # m.MX)]>;
+ def "PseudoVC_V_" # NAME # "_" # m.MX
+ : VPseudoVC_V_XVV<OpClass, m.wvrclass, m.vrclass, RS1Class>,
+ Sched<[!cast<SchedWrite>("WriteVC_V_" # NAME # "_" # m.MX)]>;
}
}
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td
index dd13a07..32e7f96 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td
@@ -20,13 +20,7 @@ class CMOPInst<bits<3> imm3, string opcodestr>
let Inst{12-11} = 0;
}
-// CMOP1, CMOP5 is used by Zicfiss.
-let Predicates = [HasStdExtZcmop, NoHasStdExtZicfiss] in {
- def CMOP1 : CMOPInst<0, "cmop.1">, Sched<[]>;
- def CMOP5 : CMOPInst<2, "cmop.5">, Sched<[]>;
-}
-
-foreach n = [3, 7, 9, 11, 13, 15] in {
+foreach n = [1, 3, 5, 7, 9, 11, 13, 15] in {
let Predicates = [HasStdExtZcmop] in
- def CMOP # n : CMOPInst<!srl(n, 1), "cmop." # n>, Sched<[]>;
+ def C_MOP # n : CMOPInst<!srl(n, 1), "c.mop." # n>, Sched<[]>;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
index 2d72e98..16f7279 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
@@ -152,7 +152,7 @@ let Predicates = [HasStdExtZvknhaOrZvknhb], RVVConstraint = Sha2Constraint in {
def VSHA2MS_VV : PALUVVNoVmTernary<0b101101, OPMVV, "vsha2ms.vv">;
} // Predicates = [HasStdExtZvknhaOrZvknhb]
-let Predicates = [HasStdExtZvkned]in {
+let Predicates = [HasStdExtZvkned] in {
defm VAESDF : VAES_MV_V_S<0b101000, 0b101001, 0b00001, OPMVV, "vaesdf">;
defm VAESDM : VAES_MV_V_S<0b101000, 0b101001, 0b00000, OPMVV, "vaesdm">;
defm VAESEF : VAES_MV_V_S<0b101000, 0b101001, 0b00011, OPMVV, "vaesef">;
diff --git a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
index 39d420c..ead91c5 100644
--- a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
+++ b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
@@ -12,15 +12,24 @@
// extended bits aren't consumed or because the input was already sign extended
// by an earlier instruction.
//
-// Then it removes the -w suffix from opw instructions whenever all users are
-// dependent only on the lower word of the result of the instruction.
-// The cases handled are:
-// * addw because c.add has a larger register encoding than c.addw.
-// * addiw because it helps reduce test differences between RV32 and RV64
-// w/o being a pessimization.
-// * mulw because c.mulw doesn't exist but c.mul does (w/ zcb)
-// * slliw because c.slliw doesn't exist and c.slli does
+// Then:
+// 1. Unless explicit disabled or the target prefers instructions with W suffix,
+// it removes the -w suffix from opw instructions whenever all users are
+// dependent only on the lower word of the result of the instruction.
+// The cases handled are:
+// * addw because c.add has a larger register encoding than c.addw.
+// * addiw because it helps reduce test differences between RV32 and RV64
+// w/o being a pessimization.
+// * mulw because c.mulw doesn't exist but c.mul does (w/ zcb)
+// * slliw because c.slliw doesn't exist and c.slli does
//
+// 2. Or if explicit enabled or the target prefers instructions with W suffix,
+// it adds the W suffix to the instruction whenever all users are dependent
+// only on the lower word of the result of the instruction.
+// The cases handled are:
+// * add/addi/sub/mul.
+// * slli with imm < 32.
+// * ld/lwu.
//===---------------------------------------------------------------------===//
#include "RISCV.h"
@@ -60,6 +69,8 @@ public:
const RISCVSubtarget &ST, MachineRegisterInfo &MRI);
bool stripWSuffixes(MachineFunction &MF, const RISCVInstrInfo &TII,
const RISCVSubtarget &ST, MachineRegisterInfo &MRI);
+ bool appendWSuffixes(MachineFunction &MF, const RISCVInstrInfo &TII,
+ const RISCVSubtarget &ST, MachineRegisterInfo &MRI);
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
@@ -672,9 +683,6 @@ bool RISCVOptWInstrs::stripWSuffixes(MachineFunction &MF,
const RISCVInstrInfo &TII,
const RISCVSubtarget &ST,
MachineRegisterInfo &MRI) {
- if (DisableStripWSuffix || !ST.enableStripWSuffix())
- return false;
-
bool MadeChange = false;
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
@@ -698,6 +706,58 @@ bool RISCVOptWInstrs::stripWSuffixes(MachineFunction &MF,
return MadeChange;
}
+bool RISCVOptWInstrs::appendWSuffixes(MachineFunction &MF,
+ const RISCVInstrInfo &TII,
+ const RISCVSubtarget &ST,
+ MachineRegisterInfo &MRI) {
+ bool MadeChange = false;
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ unsigned WOpc;
+ // TODO: Add more?
+ switch (MI.getOpcode()) {
+ default:
+ continue;
+ case RISCV::ADD:
+ WOpc = RISCV::ADDW;
+ break;
+ case RISCV::ADDI:
+ WOpc = RISCV::ADDIW;
+ break;
+ case RISCV::SUB:
+ WOpc = RISCV::SUBW;
+ break;
+ case RISCV::MUL:
+ WOpc = RISCV::MULW;
+ break;
+ case RISCV::SLLI:
+ // SLLIW reads the lowest 5 bits, while SLLI reads lowest 6 bits
+ if (MI.getOperand(2).getImm() >= 32)
+ continue;
+ WOpc = RISCV::SLLIW;
+ break;
+ case RISCV::LD:
+ case RISCV::LWU:
+ WOpc = RISCV::LW;
+ break;
+ }
+
+ if (hasAllWUsers(MI, ST, MRI)) {
+ LLVM_DEBUG(dbgs() << "Replacing " << MI);
+ MI.setDesc(TII.get(WOpc));
+ MI.clearFlag(MachineInstr::MIFlag::NoSWrap);
+ MI.clearFlag(MachineInstr::MIFlag::NoUWrap);
+ MI.clearFlag(MachineInstr::MIFlag::IsExact);
+ LLVM_DEBUG(dbgs() << " with " << MI);
+ ++NumTransformedToWInstrs;
+ MadeChange = true;
+ }
+ }
+ }
+
+ return MadeChange;
+}
+
bool RISCVOptWInstrs::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
@@ -711,7 +771,12 @@ bool RISCVOptWInstrs::runOnMachineFunction(MachineFunction &MF) {
bool MadeChange = false;
MadeChange |= removeSExtWInstrs(MF, TII, ST, MRI);
- MadeChange |= stripWSuffixes(MF, TII, ST, MRI);
+
+ if (!(DisableStripWSuffix || ST.preferWInst()))
+ MadeChange |= stripWSuffixes(MF, TII, ST, MRI);
+
+ if (ST.preferWInst())
+ MadeChange |= appendWSuffixes(MF, TII, ST, MRI);
return MadeChange;
}
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index fd6d607..f9a557e 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -56,11 +56,13 @@ class RISCVTuneProcessorModel<string n,
def GENERIC_RV32 : RISCVProcessorModel<"generic-rv32",
NoSchedModel,
- [Feature32Bit]>,
+ [Feature32Bit,
+ FeatureStdExtI]>,
GenericTuneInfo;
def GENERIC_RV64 : RISCVProcessorModel<"generic-rv64",
NoSchedModel,
- [Feature64Bit]>,
+ [Feature64Bit,
+ FeatureStdExtI]>,
GenericTuneInfo;
// Support generic for compatibility with other targets. The triple will be used
// to change to the appropriate rv32/rv64 version.
@@ -69,11 +71,13 @@ def : ProcessorModel<"generic", NoSchedModel, []>, GenericTuneInfo;
def ROCKET_RV32 : RISCVProcessorModel<"rocket-rv32",
RocketModel,
[Feature32Bit,
+ FeatureStdExtI,
FeatureStdExtZifencei,
FeatureStdExtZicsr]>;
def ROCKET_RV64 : RISCVProcessorModel<"rocket-rv64",
RocketModel,
[Feature64Bit,
+ FeatureStdExtI,
FeatureStdExtZifencei,
FeatureStdExtZicsr]>;
def ROCKET : RISCVTuneProcessorModel<"rocket",
@@ -86,6 +90,7 @@ def SIFIVE_7 : RISCVTuneProcessorModel<"sifive-7-series",
def SIFIVE_E20 : RISCVProcessorModel<"sifive-e20",
RocketModel,
[Feature32Bit,
+ FeatureStdExtI,
FeatureStdExtZicsr,
FeatureStdExtZifencei,
FeatureStdExtM,
@@ -94,6 +99,7 @@ def SIFIVE_E20 : RISCVProcessorModel<"sifive-e20",
def SIFIVE_E21 : RISCVProcessorModel<"sifive-e21",
RocketModel,
[Feature32Bit,
+ FeatureStdExtI,
FeatureStdExtZicsr,
FeatureStdExtZifencei,
FeatureStdExtM,
@@ -103,6 +109,7 @@ def SIFIVE_E21 : RISCVProcessorModel<"sifive-e21",
def SIFIVE_E24 : RISCVProcessorModel<"sifive-e24",
RocketModel,
[Feature32Bit,
+ FeatureStdExtI,
FeatureStdExtZifencei,
FeatureStdExtM,
FeatureStdExtA,
@@ -112,6 +119,7 @@ def SIFIVE_E24 : RISCVProcessorModel<"sifive-e24",
def SIFIVE_E31 : RISCVProcessorModel<"sifive-e31",
RocketModel,
[Feature32Bit,
+ FeatureStdExtI,
FeatureStdExtZifencei,
FeatureStdExtZicsr,
FeatureStdExtM,
@@ -121,6 +129,7 @@ def SIFIVE_E31 : RISCVProcessorModel<"sifive-e31",
def SIFIVE_E34 : RISCVProcessorModel<"sifive-e34",
RocketModel,
[Feature32Bit,
+ FeatureStdExtI,
FeatureStdExtZifencei,
FeatureStdExtM,
FeatureStdExtA,
@@ -130,6 +139,7 @@ def SIFIVE_E34 : RISCVProcessorModel<"sifive-e34",
def SIFIVE_E76 : RISCVProcessorModel<"sifive-e76",
SiFive7Model,
[Feature32Bit,
+ FeatureStdExtI,
FeatureStdExtZifencei,
FeatureStdExtM,
FeatureStdExtA,
@@ -140,6 +150,7 @@ def SIFIVE_E76 : RISCVProcessorModel<"sifive-e76",
def SIFIVE_S21 : RISCVProcessorModel<"sifive-s21",
RocketModel,
[Feature64Bit,
+ FeatureStdExtI,
FeatureStdExtZicsr,
FeatureStdExtZifencei,
FeatureStdExtM,
@@ -149,6 +160,7 @@ def SIFIVE_S21 : RISCVProcessorModel<"sifive-s21",
def SIFIVE_S51 : RISCVProcessorModel<"sifive-s51",
RocketModel,
[Feature64Bit,
+ FeatureStdExtI,
FeatureStdExtZicsr,
FeatureStdExtZifencei,
FeatureStdExtM,
@@ -158,6 +170,7 @@ def SIFIVE_S51 : RISCVProcessorModel<"sifive-s51",
def SIFIVE_S54 : RISCVProcessorModel<"sifive-s54",
RocketModel,
[Feature64Bit,
+ FeatureStdExtI,
FeatureStdExtZifencei,
FeatureStdExtM,
FeatureStdExtA,
@@ -168,6 +181,7 @@ def SIFIVE_S54 : RISCVProcessorModel<"sifive-s54",
def SIFIVE_S76 : RISCVProcessorModel<"sifive-s76",
SiFive7Model,
[Feature64Bit,
+ FeatureStdExtI,
FeatureStdExtZifencei,
FeatureStdExtM,
FeatureStdExtA,
@@ -180,6 +194,7 @@ def SIFIVE_S76 : RISCVProcessorModel<"sifive-s76",
def SIFIVE_U54 : RISCVProcessorModel<"sifive-u54",
RocketModel,
[Feature64Bit,
+ FeatureStdExtI,
FeatureStdExtZifencei,
FeatureStdExtM,
FeatureStdExtA,
@@ -190,6 +205,7 @@ def SIFIVE_U54 : RISCVProcessorModel<"sifive-u54",
def SIFIVE_U74 : RISCVProcessorModel<"sifive-u74",
SiFive7Model,
[Feature64Bit,
+ FeatureStdExtI,
FeatureStdExtZifencei,
FeatureStdExtM,
FeatureStdExtA,
@@ -200,6 +216,7 @@ def SIFIVE_U74 : RISCVProcessorModel<"sifive-u74",
def SIFIVE_X280 : RISCVProcessorModel<"sifive-x280", SiFive7Model,
[Feature64Bit,
+ FeatureStdExtI,
FeatureStdExtZifencei,
FeatureStdExtM,
FeatureStdExtA,
@@ -217,6 +234,7 @@ def SIFIVE_X280 : RISCVProcessorModel<"sifive-x280", SiFive7Model,
def SIFIVE_P450 : RISCVProcessorModel<"sifive-p450", SiFiveP400Model,
[Feature64Bit,
+ FeatureStdExtI,
FeatureStdExtZifencei,
FeatureStdExtM,
FeatureStdExtA,
@@ -239,7 +257,8 @@ def SIFIVE_P450 : RISCVProcessorModel<"sifive-p450", SiFiveP400Model,
FeatureStdExtZbb,
FeatureStdExtZbs,
FeatureStdExtZfhmin,
- FeatureFastUnalignedAccess],
+ FeatureUnalignedScalarMem,
+ FeatureUnalignedVectorMem],
[TuneNoDefaultUnroll,
TuneConditionalCompressedMoveFusion,
TuneLUIADDIFusion,
@@ -247,6 +266,7 @@ def SIFIVE_P450 : RISCVProcessorModel<"sifive-p450", SiFiveP400Model,
def SIFIVE_P670 : RISCVProcessorModel<"sifive-p670", SiFiveP600Model,
[Feature64Bit,
+ FeatureStdExtI,
FeatureStdExtZifencei,
FeatureStdExtM,
FeatureStdExtA,
@@ -276,7 +296,8 @@ def SIFIVE_P670 : RISCVProcessorModel<"sifive-p670", SiFiveP600Model,
FeatureStdExtZvkng,
FeatureStdExtZvksc,
FeatureStdExtZvksg,
- FeatureFastUnalignedAccess],
+ FeatureUnalignedScalarMem,
+ FeatureUnalignedVectorMem],
[TuneNoDefaultUnroll,
TuneConditionalCompressedMoveFusion,
TuneLUIADDIFusion,
@@ -286,6 +307,7 @@ def SIFIVE_P670 : RISCVProcessorModel<"sifive-p670", SiFiveP600Model,
def SYNTACORE_SCR1_BASE : RISCVProcessorModel<"syntacore-scr1-base",
SyntacoreSCR1Model,
[Feature32Bit,
+ FeatureStdExtI,
FeatureStdExtZicsr,
FeatureStdExtZifencei,
FeatureStdExtC],
@@ -294,6 +316,7 @@ def SYNTACORE_SCR1_BASE : RISCVProcessorModel<"syntacore-scr1-base",
def SYNTACORE_SCR1_MAX : RISCVProcessorModel<"syntacore-scr1-max",
SyntacoreSCR1Model,
[Feature32Bit,
+ FeatureStdExtI,
FeatureStdExtZicsr,
FeatureStdExtZifencei,
FeatureStdExtM,
@@ -303,6 +326,7 @@ def SYNTACORE_SCR1_MAX : RISCVProcessorModel<"syntacore-scr1-max",
def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1",
NoSchedModel,
[Feature64Bit,
+ FeatureStdExtI,
FeatureStdExtZifencei,
FeatureStdExtZicsr,
FeatureStdExtZicntr,
@@ -332,6 +356,7 @@ def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1",
def XIANGSHAN_NANHU : RISCVProcessorModel<"xiangshan-nanhu",
XiangShanNanHuModel,
[Feature64Bit,
+ FeatureStdExtI,
FeatureStdExtZicsr,
FeatureStdExtZifencei,
FeatureStdExtM,
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index 316daf2..1a0533c 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -533,6 +533,12 @@ def VR : VReg<!listconcat(VM1VTs, VMaskVTs),
(add (sequence "V%u", 8, 31),
(sequence "V%u", 7, 0)), 1>;
+// V0 is likely to be used as mask, so we move it in front of allocation order.
+def VMM1 : VReg<VMaskVTs, (add (sequence "V%u", 0, 31)), 1>;
+def VMM2 : VReg<VMaskVTs, (add (sequence "V%u", 0, 31, 2)), 1>;
+def VMM4 : VReg<VMaskVTs, (add (sequence "V%u", 0, 31, 4)), 1>;
+def VMM8 : VReg<VMaskVTs, (add (sequence "V%u", 0, 31, 8)), 1>;
+
def VRNoV0 : VReg<!listconcat(VM1VTs, VMaskVTs), (sub VR, V0), 1>;
def VRM2 : VReg<VM2VTs, (add (sequence "V%uM2", 8, 31, 2),
diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
index e74c7aa..65494e7 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
@@ -261,4 +261,5 @@ defm : UnsupportedSchedZbkx;
defm : UnsupportedSchedZfa;
defm : UnsupportedSchedZfh;
defm : UnsupportedSchedSFB;
+defm : UnsupportedSchedXsfvcp;
}
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 2a13cb4..a532066 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -744,6 +744,13 @@ foreach mx = SchedMxListF in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ }
+ let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
}
}
}
@@ -751,14 +758,9 @@ foreach mx = SchedMxList in {
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
- defm "" : LMULWriteResMX<"WriteVFCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
- defm "" : LMULWriteResMX<"WriteVFSgnjV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFSgnjF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFMinMaxV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFMinMaxF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFClassV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFMergeV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFMovV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
@@ -784,10 +786,11 @@ foreach mx = SchedMxListF in {
// Widening
foreach mx = SchedMxListW in {
- defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
- defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
- let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
- defm "" : LMULWriteResMX<"WriteVFWCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ foreach sew = SchedSEWSet<mx, isF=0, isWidening=1>.val in {
+ defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListW>.c;
+ let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
}
}
foreach mx = SchedMxListFW in {
@@ -801,16 +804,13 @@ foreach mx = SchedMxListFW in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
}
}
-}
-foreach mx = SchedMxListFW in {
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c;
- let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
- defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFWCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
- }
+ let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
+ defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
// Narrowing
foreach mx = SchedMxListW in {
@@ -821,11 +821,13 @@ foreach mx = SchedMxListW in {
}
}
foreach mx = SchedMxListFW in {
- defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
- defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c;
- let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
- defm "" : LMULWriteResMX<"WriteVFNCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFNCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
+ defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
+ let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ }
}
}
@@ -960,6 +962,54 @@ let Latency = 3 in
def : InstRW<[WriteIALU], (instrs COPY)>;
+// VCIX
+//
+// In principle we don't know the latency of any VCIX instructions. But instead
+// of taking the default of 1, which can lead to issues [1], we assume that they
+// have a fairly high latency.
+//
+// [1] https://github.com/llvm/llvm-project/issues/83391
+foreach mx = SchedMxList in {
+ defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = !mul(Cycles, 10),
+ AcquireAtCycles = [0, 1],
+ ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULWriteResMX<"WriteVC_V_I", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_V_X", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_V_IV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_V_VV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_V_XV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_V_IVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_V_IVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_V_VVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_V_VVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_V_XVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_V_XVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ foreach f = ["FPR16", "FPR32", "FPR64"] in {
+ defm "" : LMULWriteResMX<"WriteVC_V_" # f # "V", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_V_" # f # "VV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_V_" # f # "VW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ }
+ defm "" : LMULWriteResMX<"WriteVC_I", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_X", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_IV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_VV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_XV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_IVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_IVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_VVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_VVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_XVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_XVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ foreach f = ["FPR16", "FPR32", "FPR64"] in {
+ defm "" : LMULWriteResMX<"WriteVC_" # f # "V", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_" # f # "VV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVC_" # f # "VW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+ }
+ }
+}
+
//===----------------------------------------------------------------------===//
// Bypass and advance
@@ -1169,24 +1219,24 @@ defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddV", 0>;
defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddF", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFRecpV", 0>;
-defm "" : LMULReadAdvance<"ReadVFMinMaxV", 0>;
-defm "" : LMULReadAdvance<"ReadVFMinMaxF", 0>;
-defm "" : LMULReadAdvance<"ReadVFSgnjV", 0>;
-defm "" : LMULReadAdvance<"ReadVFSgnjF", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxF", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjF", 0>;
defm "" : LMULReadAdvance<"ReadVFCmpV", 0>;
defm "" : LMULReadAdvance<"ReadVFCmpF", 0>;
defm "" : LMULReadAdvance<"ReadVFClassV", 0>;
defm "" : LMULReadAdvance<"ReadVFMergeV", 0>;
defm "" : LMULReadAdvance<"ReadVFMergeF", 0>;
defm "" : LMULReadAdvance<"ReadVFMovF", 0>;
-defm "" : LMULReadAdvance<"ReadVFCvtIToFV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFCvtIToFV", 0>;
defm "" : LMULReadAdvance<"ReadVFCvtFToIV", 0>;
-defm "" : LMULReadAdvanceW<"ReadVFWCvtIToFV", 0>;
+defm "" : LMULSEWReadAdvanceW<"ReadVFWCvtIToFV", 0>;
defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToIV", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToFV", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFNCvtIToFV", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFWCvtFToFV", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtIToFV", 0>;
defm "" : LMULReadAdvanceW<"ReadVFNCvtFToIV", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFNCvtFToFV", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtFToFV", 0>;
// 15. Vector Reduction Operations
def : ReadAdvance<ReadVIRedV, 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td
index 8ec2e4f..fccdd7e 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td
@@ -366,4 +366,5 @@ defm : UnsupportedSchedZbkx;
defm : UnsupportedSchedSFB;
defm : UnsupportedSchedZfa;
defm : UnsupportedSchedV;
+defm : UnsupportedSchedXsfvcp;
}
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
index 80090a0..6e4fb19 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
@@ -495,45 +495,37 @@ foreach mx = SchedMxListF in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFALUF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
-
- }
- }
-}
-
-foreach mx = SchedMxListF in {
- foreach sew = SchedSEWSet<mx, isF=1>.val in {
- defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
- defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c;
- let Latency = 6, ReleaseAtCycles = [LMulLat] in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
}
+ let Latency = 2, ReleaseAtCycles = [LMulLat] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ let Latency = 3, ReleaseAtCycles = [LMulLat] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
}
}
foreach mx = SchedMxListF in {
foreach sew = SchedSEWSet<mx, isF=1>.val in {
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
- defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c;
- let Latency = 2, ReleaseAtCycles = [LMulLat] in
- defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxList, isF=1>.c;
+ let Latency = 1, ReleaseAtCycles = [LMulLat] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ }
}
}
foreach mx = SchedMxList in {
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = 3, ReleaseAtCycles = [LMulLat] in {
- defm "" : LMULWriteResMX<"WriteVFCvtIToFV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- }
+ let Latency = 3, ReleaseAtCycles = [LMulLat] in
+ defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SiFiveP600VectorArith], mx, IsWorstCase>;
let Latency = 2, ReleaseAtCycles = [LMulLat] in {
defm "" : LMULWriteResMX<"WriteVFCmpV", [SiFiveP600VectorArith], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFCmpF", [SiFiveP600VectorArith], mx, IsWorstCase>;
}
let Latency = 1, ReleaseAtCycles = [LMulLat] in {
- defm "" : LMULWriteResMX<"WriteVFSgnjV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFSgnjF", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFMinMaxV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFMinMaxF", [SiFiveP600VectorArith], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFClassV", [SiFiveP600VectorArith], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFMergeV", [SiFiveP600VectorArith], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFMovV", [SiFiveP600VectorArith], mx, IsWorstCase>;
@@ -542,19 +534,18 @@ foreach mx = SchedMxList in {
// Widening
foreach mx = SchedMxListW in {
- defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
- defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxListW>.c;
- let Latency = 3, ReleaseAtCycles = [LMulLat] in {
- defm "" : LMULWriteResMX<"WriteVFWCvtIToFV", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ foreach sew = SchedSEWSet<mx, isF=0, isWidening=1>.val in {
+ defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
+ defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListW>.c;
+ let Latency = 3, ReleaseAtCycles = [LMulLat] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
}
}
foreach mx = SchedMxListFW in {
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxListFW>.c;
- let Latency = 6, ReleaseAtCycles = [LMulLat] in {
- defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFWCvtFToFV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- }
+ let Latency = 6, ReleaseAtCycles = [LMulLat] in
+ defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFiveP600VectorArith], mx, IsWorstCase>;
}
foreach mx = SchedMxListFW in {
foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
@@ -567,6 +558,7 @@ foreach mx = SchedMxListFW in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
}
}
}
@@ -579,11 +571,13 @@ foreach mx = SchedMxListW in {
}
}
foreach mx = SchedMxListFW in {
- defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
- defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxListFW>.c;
- let Latency = 3, ReleaseAtCycles = [LMulLat] in {
- defm "" : LMULWriteResMX<"WriteVFNCvtIToFV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFNCvtFToFV", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
+ defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
+ defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
+ let Latency = 3, ReleaseAtCycles = [LMulLat] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ }
}
}
@@ -968,22 +962,22 @@ defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>;
defm "" : LMULSEWReadAdvance<"ReadVFRecpV", 0>;
defm "" : LMULReadAdvance<"ReadVFCmpV", 0>;
defm "" : LMULReadAdvance<"ReadVFCmpF", 0>;
-defm "" : LMULReadAdvance<"ReadVFMinMaxV", 0>;
-defm "" : LMULReadAdvance<"ReadVFMinMaxF", 0>;
-defm "" : LMULReadAdvance<"ReadVFSgnjV", 0>;
-defm "" : LMULReadAdvance<"ReadVFSgnjF", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxF", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjF", 0>;
defm "" : LMULReadAdvance<"ReadVFClassV", 0>;
defm "" : LMULReadAdvance<"ReadVFMergeV", 0>;
defm "" : LMULReadAdvance<"ReadVFMergeF", 0>;
defm "" : LMULReadAdvance<"ReadVFMovF", 0>;
-defm "" : LMULReadAdvance<"ReadVFCvtIToFV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFCvtIToFV", 0>;
defm "" : LMULReadAdvance<"ReadVFCvtFToIV", 0>;
-defm "" : LMULReadAdvanceW<"ReadVFWCvtIToFV", 0>;
+defm "" : LMULSEWReadAdvanceW<"ReadVFWCvtIToFV", 0>;
defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToIV", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToFV", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFNCvtIToFV", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFWCvtFToFV", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtIToFV", 0>;
defm "" : LMULReadAdvanceW<"ReadVFNCvtFToIV", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFNCvtFToFV", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtFToFV", 0>;
// 15. Vector Reduction Operations
def : ReadAdvance<ReadVIRedV, 0>;
@@ -1046,4 +1040,5 @@ defm : UnsupportedSchedZbkb;
defm : UnsupportedSchedZbkx;
defm : UnsupportedSchedSFB;
defm : UnsupportedSchedZfa;
+defm : UnsupportedSchedXsfvcp;
}
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td b/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td
index 9625d17..0885e32 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td
@@ -212,4 +212,5 @@ defm : UnsupportedSchedZbkb;
defm : UnsupportedSchedZbkx;
defm : UnsupportedSchedZfa;
defm : UnsupportedSchedZfh;
+defm : UnsupportedSchedXsfvcp;
}
diff --git a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
index 4fc7b03..e0f1fab 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
@@ -311,4 +311,5 @@ defm : UnsupportedSchedZfa;
defm : UnsupportedSchedZfh;
defm : UnsupportedSchedSFB;
defm : UnsupportedSchedZabha;
+defm : UnsupportedSchedXsfvcp;
}
diff --git a/llvm/lib/Target/RISCV/RISCVSchedule.td b/llvm/lib/Target/RISCV/RISCVSchedule.td
index 1d19624..0086557 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedule.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedule.td
@@ -296,3 +296,4 @@ def : ReadAdvance<ReadAtomicHD, 0>;
// Include the scheduler resources for other instruction extensions.
include "RISCVScheduleZb.td"
include "RISCVScheduleV.td"
+include "RISCVScheduleXSf.td"
diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td
index 6070482..5993884 100644
--- a/llvm/lib/Target/RISCV/RISCVScheduleV.td
+++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td
@@ -253,6 +253,18 @@ multiclass LMULReadAdvanceFW<string name, int val, list<SchedWrite> writes = []>
: LMULReadAdvanceImpl<name, val, writes>;
class LMULSchedWriteListFW<list<string> names> : LMULSchedWriteListImpl<names, SchedMxListFW>;
+multiclass LMULSEWSchedWritesW<string name>
+ : LMULSEWSchedWritesImpl<name, SchedMxListW, isF = 0, isWidening = 1>;
+multiclass LMULSEWSchedReadsW<string name>
+ : LMULSEWSchedReadsImpl<name, SchedMxListW, isF = 0, isWidening = 1>;
+multiclass LMULSEWWriteResW<string name, list<ProcResourceKind> resources>
+ : LMULSEWWriteResImpl<name, resources, SchedMxListW, isF = 0,
+ isWidening = 1>;
+multiclass
+ LMULSEWReadAdvanceW<string name, int val, list<SchedWrite> writes = []>
+ : LMULSEWReadAdvanceImpl<name, val, writes, SchedMxListW, isF = 0,
+ isWidening = 1>;
+
multiclass LMULSEWSchedWritesFW<string name>
: LMULSEWSchedWritesImpl<name, SchedMxListFW, isF = 1, isWidening = 1>;
multiclass LMULSEWSchedReadsFW<string name>
@@ -434,11 +446,11 @@ defm "" : LMULSEWSchedWritesF<"WriteVFSqrtV">;
// 13.10. Vector Floating-Point Reciprocal Estimate Instruction
defm "" : LMULSEWSchedWritesF<"WriteVFRecpV">;
// 13.11. Vector Floating-Point MIN/MAX Instructions
-defm "" : LMULSchedWrites<"WriteVFMinMaxV">;
-defm "" : LMULSchedWrites<"WriteVFMinMaxF">;
+defm "" : LMULSEWSchedWritesF<"WriteVFMinMaxV">;
+defm "" : LMULSEWSchedWritesF<"WriteVFMinMaxF">;
// 13.12. Vector Floating-Point Sign-Injection Instructions
-defm "" : LMULSchedWrites<"WriteVFSgnjV">;
-defm "" : LMULSchedWrites<"WriteVFSgnjF">;
+defm "" : LMULSEWSchedWritesF<"WriteVFSgnjV">;
+defm "" : LMULSEWSchedWritesF<"WriteVFSgnjF">;
// 13.13. Vector Floating-Point Compare Instructions
defm "" : LMULSchedWrites<"WriteVFCmpV">;
defm "" : LMULSchedWrites<"WriteVFCmpF">;
@@ -449,16 +461,16 @@ defm "" : LMULSchedWrites<"WriteVFMergeV">;
// 13.16. Vector Floating-Point Move Instruction
defm "" : LMULSchedWrites<"WriteVFMovV">;
// 13.17. Single-Width Floating-Point/Integer Type-Convert Instructions
-defm "" : LMULSchedWrites<"WriteVFCvtIToFV">;
+defm "" : LMULSEWSchedWritesF<"WriteVFCvtIToFV">;
defm "" : LMULSchedWrites<"WriteVFCvtFToIV">;
// 13.18. Widening Floating-Point/Integer Type-Convert Instructions
-defm "" : LMULSchedWritesW<"WriteVFWCvtIToFV">;
+defm "" : LMULSEWSchedWritesW<"WriteVFWCvtIToFV">;
defm "" : LMULSchedWritesFW<"WriteVFWCvtFToIV">;
-defm "" : LMULSchedWritesFW<"WriteVFWCvtFToFV">;
+defm "" : LMULSEWSchedWritesFW<"WriteVFWCvtFToFV">;
// 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions
-defm "" : LMULSchedWritesFW<"WriteVFNCvtIToFV">;
+defm "" : LMULSEWSchedWritesFW<"WriteVFNCvtIToFV">;
defm "" : LMULSchedWritesW<"WriteVFNCvtFToIV">;
-defm "" : LMULSchedWritesFW<"WriteVFNCvtFToFV">;
+defm "" : LMULSEWSchedWritesFW<"WriteVFNCvtFToFV">;
// 14. Vector Reduction Operations
// The latency of reduction is determined by the size of the read resource.
@@ -659,11 +671,11 @@ defm "" : LMULSEWSchedReadsF<"ReadVFSqrtV">;
// 13.10. Vector Floating-Point Reciprocal Estimate Instruction
defm "" : LMULSEWSchedReadsF<"ReadVFRecpV">;
// 13.11. Vector Floating-Point MIN/MAX Instructions
-defm "" : LMULSchedReads<"ReadVFMinMaxV">;
-defm "" : LMULSchedReads<"ReadVFMinMaxF">;
+defm "" : LMULSEWSchedReadsF<"ReadVFMinMaxV">;
+defm "" : LMULSEWSchedReadsF<"ReadVFMinMaxF">;
// 13.12. Vector Floating-Point Sign-Injection Instructions
-defm "" : LMULSchedReads<"ReadVFSgnjV">;
-defm "" : LMULSchedReads<"ReadVFSgnjF">;
+defm "" : LMULSEWSchedReadsF<"ReadVFSgnjV">;
+defm "" : LMULSEWSchedReadsF<"ReadVFSgnjF">;
// 13.13. Vector Floating-Point Compare Instructions
defm "" : LMULSchedReads<"ReadVFCmpV">;
defm "" : LMULSchedReads<"ReadVFCmpF">;
@@ -675,16 +687,16 @@ defm "" : LMULSchedReads<"ReadVFMergeF">;
// 13.16. Vector Floating-Point Move Instruction
defm "" : LMULSchedReads<"ReadVFMovF">;
// 13.17. Single-Width Floating-Point/Integer Type-Convert Instructions
-defm "" : LMULSchedReads<"ReadVFCvtIToFV">;
+defm "" : LMULSEWSchedReadsF<"ReadVFCvtIToFV">;
defm "" : LMULSchedReads<"ReadVFCvtFToIV">;
// 13.18. Widening Floating-Point/Integer Type-Convert Instructions
-defm "" : LMULSchedReadsW<"ReadVFWCvtIToFV">;
+defm "" : LMULSEWSchedReadsW<"ReadVFWCvtIToFV">;
defm "" : LMULSchedReadsFW<"ReadVFWCvtFToIV">;
-defm "" : LMULSchedReadsFW<"ReadVFWCvtFToFV">;
+defm "" : LMULSEWSchedReadsFW<"ReadVFWCvtFToFV">;
// 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions
-defm "" : LMULSchedReadsFW<"ReadVFNCvtIToFV">;
+defm "" : LMULSEWSchedReadsFW<"ReadVFNCvtIToFV">;
defm "" : LMULSchedReadsW<"ReadVFNCvtFToIV">;
-defm "" : LMULSchedReadsFW<"ReadVFNCvtFToFV">;
+defm "" : LMULSEWSchedReadsFW<"ReadVFNCvtFToFV">;
// 14. Vector Reduction Operations
// 14.1. Vector Single-Width Integer Reduction Instructions
@@ -896,23 +908,23 @@ defm "" : LMULSEWWriteResFW<"WriteVFWMulAddV", []>;
defm "" : LMULSEWWriteResFW<"WriteVFWMulAddF", []>;
defm "" : LMULSEWWriteResF<"WriteVFSqrtV", []>;
defm "" : LMULSEWWriteResF<"WriteVFRecpV", []>;
-defm "" : LMULWriteRes<"WriteVFMinMaxV", []>;
-defm "" : LMULWriteRes<"WriteVFMinMaxF", []>;
-defm "" : LMULWriteRes<"WriteVFSgnjV", []>;
-defm "" : LMULWriteRes<"WriteVFSgnjF", []>;
+defm "" : LMULSEWWriteResF<"WriteVFMinMaxV", []>;
+defm "" : LMULSEWWriteResF<"WriteVFMinMaxF", []>;
+defm "" : LMULSEWWriteResF<"WriteVFSgnjV", []>;
+defm "" : LMULSEWWriteResF<"WriteVFSgnjF", []>;
defm "" : LMULWriteRes<"WriteVFCmpV", []>;
defm "" : LMULWriteRes<"WriteVFCmpF", []>;
defm "" : LMULWriteRes<"WriteVFClassV", []>;
defm "" : LMULWriteRes<"WriteVFMergeV", []>;
defm "" : LMULWriteRes<"WriteVFMovV", []>;
-defm "" : LMULWriteRes<"WriteVFCvtIToFV", []>;
+defm "" : LMULSEWWriteResF<"WriteVFCvtIToFV", []>;
defm "" : LMULWriteRes<"WriteVFCvtFToIV", []>;
-defm "" : LMULWriteResW<"WriteVFWCvtIToFV", []>;
+defm "" : LMULSEWWriteResW<"WriteVFWCvtIToFV", []>;
defm "" : LMULWriteResFW<"WriteVFWCvtFToIV", []>;
-defm "" : LMULWriteResFW<"WriteVFWCvtFToFV", []>;
-defm "" : LMULWriteResFW<"WriteVFNCvtIToFV", []>;
+defm "" : LMULSEWWriteResFW<"WriteVFWCvtFToFV", []>;
+defm "" : LMULSEWWriteResFW<"WriteVFNCvtIToFV", []>;
defm "" : LMULWriteResW<"WriteVFNCvtFToIV", []>;
-defm "" : LMULWriteResFW<"WriteVFNCvtFToFV", []>;
+defm "" : LMULSEWWriteResFW<"WriteVFNCvtFToFV", []>;
// 14. Vector Reduction Operations
defm "" : LMULSEWWriteRes<"WriteVIRedV_From", []>;
@@ -1052,24 +1064,24 @@ defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddV", 0>;
defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddF", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFRecpV", 0>;
-defm "" : LMULReadAdvance<"ReadVFMinMaxV", 0>;
-defm "" : LMULReadAdvance<"ReadVFMinMaxF", 0>;
-defm "" : LMULReadAdvance<"ReadVFSgnjV", 0>;
-defm "" : LMULReadAdvance<"ReadVFSgnjF", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxF", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjF", 0>;
defm "" : LMULReadAdvance<"ReadVFCmpV", 0>;
defm "" : LMULReadAdvance<"ReadVFCmpF", 0>;
defm "" : LMULReadAdvance<"ReadVFClassV", 0>;
defm "" : LMULReadAdvance<"ReadVFMergeV", 0>;
defm "" : LMULReadAdvance<"ReadVFMergeF", 0>;
defm "" : LMULReadAdvance<"ReadVFMovF", 0>;
-defm "" : LMULReadAdvance<"ReadVFCvtIToFV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFCvtIToFV", 0>;
defm "" : LMULReadAdvance<"ReadVFCvtFToIV", 0>;
-defm "" : LMULReadAdvanceW<"ReadVFWCvtIToFV", 0>;
+defm "" : LMULSEWReadAdvanceW<"ReadVFWCvtIToFV", 0>;
defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToIV", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToFV", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFNCvtIToFV", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFWCvtFToFV", 0>;
+defm "" : LMULSEWReadAdvanceFW<"SEWReadVFNCvtIToFV", 0>;
defm "" : LMULReadAdvanceW<"ReadVFNCvtFToIV", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFNCvtFToFV", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtFToFV", 0>;
// 14. Vector Reduction Operations
def : ReadAdvance<ReadVIRedV, 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVScheduleXSf.td b/llvm/lib/Target/RISCV/RISCVScheduleXSf.td
new file mode 100644
index 0000000..58d5084
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVScheduleXSf.td
@@ -0,0 +1,59 @@
+//===-- RISCVScheduleXSf.td - Scheduling Definitions XSf ---*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the scheduling information for SiFive extensions.
+//
+//===----------------------------------------------------------------------===//
+
+multiclass LMULSchedWritesVCIX<string id>{
+defm "" : LMULSchedWrites<"WriteVC_" # id>;
+defm "" : LMULSchedWrites<"WriteVC_V_" # id>;
+}
+
+defm "" : LMULSchedWritesVCIX<"I">;
+defm "" : LMULSchedWritesVCIX<"X">;
+defm "" : LMULSchedWritesVCIX<"IV">;
+defm "" : LMULSchedWritesVCIX<"VV">;
+defm "" : LMULSchedWritesVCIX<"XV">;
+defm "" : LMULSchedWritesVCIX<"IVV">;
+defm "" : LMULSchedWritesVCIX<"IVW">;
+defm "" : LMULSchedWritesVCIX<"VVV">;
+defm "" : LMULSchedWritesVCIX<"VVW">;
+defm "" : LMULSchedWritesVCIX<"XVV">;
+defm "" : LMULSchedWritesVCIX<"XVW">;
+foreach f = ["FPR16", "FPR32", "FPR64"] in {
+ defm "" : LMULSchedWritesVCIX<f # "V">;
+ defm "" : LMULSchedWritesVCIX<f # "VV">;
+ defm "" : LMULSchedWritesVCIX<f # "VW">;
+}
+
+multiclass LMULWriteResVCIX<string id, list<ProcResourceKind> resources>{
+defm : LMULWriteRes<"WriteVC_" # id, resources>;
+defm : LMULWriteRes<"WriteVC_V_" # id, resources>;
+}
+
+multiclass UnsupportedSchedXsfvcp {
+let Unsupported = true in {
+defm : LMULWriteResVCIX<"I", []>;
+defm : LMULWriteResVCIX<"X", []>;
+defm : LMULWriteResVCIX<"IV", []>;
+defm : LMULWriteResVCIX<"VV", []>;
+defm : LMULWriteResVCIX<"XV", []>;
+defm : LMULWriteResVCIX<"IVV", []>;
+defm : LMULWriteResVCIX<"IVW", []>;
+defm : LMULWriteResVCIX<"VVV", []>;
+defm : LMULWriteResVCIX<"VVW", []>;
+defm : LMULWriteResVCIX<"XVV", []>;
+defm : LMULWriteResVCIX<"XVW", []>;
+foreach f = ["FPR16", "FPR32", "FPR64"] in {
+ defm : LMULWriteResVCIX<f # "V", []>;
+ defm : LMULWriteResVCIX<f # "VV", []>;
+ defm : LMULWriteResVCIX<f # "VW", []>;
+}
+}
+}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index bc9756c..56f5bd8 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1335,8 +1335,8 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
I);
+ std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
if (Opcode == Instruction::Select && ValTy->isVectorTy()) {
- std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
if (CondTy->isVectorTy()) {
if (ValTy->getScalarSizeInBits() == 1) {
// vmandn.mm v8, v8, v9
@@ -1375,14 +1375,15 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
LT.second, CostKind);
}
- if ((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
- ValTy->isVectorTy()) {
- std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
-
- // Support natively.
- if (CmpInst::isIntPredicate(VecPred))
- return LT.first * 1;
+ if ((Opcode == Instruction::ICmp) && ValTy->isVectorTy() &&
+ CmpInst::isIntPredicate(VecPred)) {
+ // Use VMSLT_VV to represent VMSEQ, VMSNE, VMSLTU, VMSLEU, VMSLT, VMSLE
+ // provided they incur the same cost across all implementations
+ return LT.first *
+ getRISCVInstructionCost(RISCV::VMSLT_VV, LT.second, CostKind);
+ }
+ if ((Opcode == Instruction::FCmp) && ValTy->isVectorTy()) {
// If we do not support the input floating point vector type, use the base
// one which will calculate as:
// ScalarizeCost + Num * Cost for fixed vector,
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index e0c0e65..2f9281a 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -228,7 +228,7 @@ public:
return false;
EVT ElemType = DataTypeVT.getScalarType();
- if (!ST->hasFastUnalignedAccess() && Alignment < ElemType.getStoreSize())
+ if (!ST->enableUnalignedVectorMem() && Alignment < ElemType.getStoreSize())
return false;
return TLI->isLegalElementTypeForRVV(ElemType);
@@ -253,7 +253,7 @@ public:
return false;
EVT ElemType = DataTypeVT.getScalarType();
- if (!ST->hasFastUnalignedAccess() && Alignment < ElemType.getStoreSize())
+ if (!ST->enableUnalignedVectorMem() && Alignment < ElemType.getStoreSize())
return false;
return TLI->isLegalElementTypeForRVV(ElemType);