diff options
Diffstat (limited to 'llvm/lib/Target/RISCV')
30 files changed, 1341 insertions, 540 deletions
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp index 45e19cd..c18892a 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp @@ -34,14 +34,15 @@ private: // Whether this is assigning args for a return. bool IsRet; - // true if assignArg has been called for a mask argument, false otherwise. - bool AssignedFirstMaskArg = false; + RVVArgDispatcher &RVVDispatcher; public: RISCVOutgoingValueAssigner( - RISCVTargetLowering::RISCVCCAssignFn *RISCVAssignFn_, bool IsRet) + RISCVTargetLowering::RISCVCCAssignFn *RISCVAssignFn_, bool IsRet, + RVVArgDispatcher &RVVDispatcher) : CallLowering::OutgoingValueAssigner(nullptr), - RISCVAssignFn(RISCVAssignFn_), IsRet(IsRet) {} + RISCVAssignFn(RISCVAssignFn_), IsRet(IsRet), + RVVDispatcher(RVVDispatcher) {} bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, @@ -51,16 +52,9 @@ public: const DataLayout &DL = MF.getDataLayout(); const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>(); - std::optional<unsigned> FirstMaskArgument; - if (Subtarget.hasVInstructions() && !AssignedFirstMaskArg && - ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1) { - FirstMaskArgument = ValNo; - AssignedFirstMaskArg = true; - } - if (RISCVAssignFn(DL, Subtarget.getTargetABI(), ValNo, ValVT, LocVT, LocInfo, Flags, State, Info.IsFixed, IsRet, Info.Ty, - *Subtarget.getTargetLowering(), FirstMaskArgument)) + *Subtarget.getTargetLowering(), RVVDispatcher)) return true; StackSize = State.getStackSize(); @@ -181,14 +175,15 @@ private: // Whether this is assigning args from a return. bool IsRet; - // true if assignArg has been called for a mask argument, false otherwise. - bool AssignedFirstMaskArg = false; + RVVArgDispatcher &RVVDispatcher; public: RISCVIncomingValueAssigner( - RISCVTargetLowering::RISCVCCAssignFn *RISCVAssignFn_, bool IsRet) + RISCVTargetLowering::RISCVCCAssignFn *RISCVAssignFn_, bool IsRet, + RVVArgDispatcher &RVVDispatcher) : CallLowering::IncomingValueAssigner(nullptr), - RISCVAssignFn(RISCVAssignFn_), IsRet(IsRet) {} + RISCVAssignFn(RISCVAssignFn_), IsRet(IsRet), + RVVDispatcher(RVVDispatcher) {} bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, @@ -201,16 +196,9 @@ public: if (LocVT.isScalableVector()) MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall(); - std::optional<unsigned> FirstMaskArgument; - if (Subtarget.hasVInstructions() && !AssignedFirstMaskArg && - ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1) { - FirstMaskArgument = ValNo; - AssignedFirstMaskArg = true; - } - if (RISCVAssignFn(DL, Subtarget.getTargetABI(), ValNo, ValVT, LocVT, LocInfo, Flags, State, /*IsFixed=*/true, IsRet, Info.Ty, - *Subtarget.getTargetLowering(), FirstMaskArgument)) + *Subtarget.getTargetLowering(), RVVDispatcher)) return true; StackSize = State.getStackSize(); @@ -420,9 +408,11 @@ bool RISCVCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder, SmallVector<ArgInfo, 4> SplitRetInfos; splitToValueTypes(OrigRetInfo, SplitRetInfos, DL, CC); + RVVArgDispatcher Dispatcher{&MF, getTLI<RISCVTargetLowering>(), + ArrayRef(F.getReturnType())}; RISCVOutgoingValueAssigner Assigner( CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV, - /*IsRet=*/true); + /*IsRet=*/true, Dispatcher); RISCVOutgoingValueHandler Handler(MIRBuilder, MF.getRegInfo(), Ret); return determineAndHandleAssignments(Handler, Assigner, SplitRetInfos, MIRBuilder, CC, F.isVarArg()); @@ -531,6 +521,7 @@ bool RISCVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, CallingConv::ID CC = F.getCallingConv(); SmallVector<ArgInfo, 32> SplitArgInfos; + SmallVector<Type *, 4> TypeList; unsigned Index = 0; for (auto &Arg : F.args()) { // Construct the ArgInfo object from destination register and argument type. @@ -542,12 +533,16 @@ bool RISCVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, // correspondingly and appended to SplitArgInfos. splitToValueTypes(AInfo, SplitArgInfos, DL, CC); + TypeList.push_back(Arg.getType()); + ++Index; } + RVVArgDispatcher Dispatcher{&MF, getTLI<RISCVTargetLowering>(), + ArrayRef(TypeList)}; RISCVIncomingValueAssigner Assigner( CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV, - /*IsRet=*/false); + /*IsRet=*/false, Dispatcher); RISCVFormalArgHandler Handler(MIRBuilder, MF.getRegInfo()); SmallVector<CCValAssign, 16> ArgLocs; @@ -585,11 +580,13 @@ bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, SmallVector<ArgInfo, 32> SplitArgInfos; SmallVector<ISD::OutputArg, 8> Outs; + SmallVector<Type *, 4> TypeList; for (auto &AInfo : Info.OrigArgs) { // Handle any required unmerging of split value types from a given VReg into // physical registers. ArgInfo objects are constructed correspondingly and // appended to SplitArgInfos. splitToValueTypes(AInfo, SplitArgInfos, DL, CC); + TypeList.push_back(AInfo.Ty); } // TODO: Support tail calls. @@ -607,9 +604,11 @@ bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); Call.addRegMask(TRI->getCallPreservedMask(MF, Info.CallConv)); + RVVArgDispatcher ArgDispatcher{&MF, getTLI<RISCVTargetLowering>(), + ArrayRef(TypeList)}; RISCVOutgoingValueAssigner ArgAssigner( CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV, - /*IsRet=*/false); + /*IsRet=*/false, ArgDispatcher); RISCVOutgoingValueHandler ArgHandler(MIRBuilder, MF.getRegInfo(), Call); if (!determineAndHandleAssignments(ArgHandler, ArgAssigner, SplitArgInfos, MIRBuilder, CC, Info.IsVarArg)) @@ -637,9 +636,11 @@ bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, SmallVector<ArgInfo, 4> SplitRetInfos; splitToValueTypes(Info.OrigRet, SplitRetInfos, DL, CC); + RVVArgDispatcher RetDispatcher{&MF, getTLI<RISCVTargetLowering>(), + ArrayRef(F.getReturnType())}; RISCVIncomingValueAssigner RetAssigner( CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV, - /*IsRet=*/true); + /*IsRet=*/true, RetDispatcher); RISCVCallReturnHandler RetHandler(MIRBuilder, MF.getRegInfo(), Call); if (!determineAndHandleAssignments(RetHandler, RetAssigner, SplitRetInfos, MIRBuilder, CC, Info.IsVarArg)) diff --git a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp index 86e4434..c1fde73 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp @@ -110,6 +110,8 @@ RISCVRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC, LLT Ty) const { switch (RC.getID()) { default: + if (RISCVRI::isVRegClass(RC.TSFlags)) + return getRegBank(RISCV::VRBRegBankID); llvm_unreachable("Register class not supported"); case RISCV::GPRRegClassID: case RISCV::GPRF16RegClassID: @@ -131,20 +133,6 @@ RISCVRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC, case RISCV::FPR64CRegClassID: case RISCV::FPR32CRegClassID: return getRegBank(RISCV::FPRBRegBankID); - case RISCV::VMRegClassID: - case RISCV::VRRegClassID: - case RISCV::VRNoV0RegClassID: - case RISCV::VRM2RegClassID: - case RISCV::VRM2NoV0RegClassID: - case RISCV::VRM4RegClassID: - case RISCV::VRM4NoV0RegClassID: - case RISCV::VMV0RegClassID: - case RISCV::VRM2_with_sub_vrm1_0_in_VMV0RegClassID: - case RISCV::VRM4_with_sub_vrm1_0_in_VMV0RegClassID: - case RISCV::VRM8RegClassID: - case RISCV::VRM8NoV0RegClassID: - case RISCV::VRM8_with_sub_vrm1_0_in_VMV0RegClassID: - return getRegBank(RISCV::VRBRegBankID); } } @@ -154,46 +142,6 @@ static const RegisterBankInfo::ValueMapping *getFPValueMapping(unsigned Size) { return &RISCV::ValueMappings[Idx]; } -/// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode, -/// having only floating-point operands. -/// FIXME: this is copied from target AArch64. Needs some code refactor here to -/// put this function in GlobalISel/Utils.cpp. -static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) { - switch (Opc) { - case TargetOpcode::G_FADD: - case TargetOpcode::G_FSUB: - case TargetOpcode::G_FMUL: - case TargetOpcode::G_FMA: - case TargetOpcode::G_FDIV: - case TargetOpcode::G_FCONSTANT: - case TargetOpcode::G_FPEXT: - case TargetOpcode::G_FPTRUNC: - case TargetOpcode::G_FCEIL: - case TargetOpcode::G_FFLOOR: - case TargetOpcode::G_FNEARBYINT: - case TargetOpcode::G_FNEG: - case TargetOpcode::G_FCOPYSIGN: - case TargetOpcode::G_FCOS: - case TargetOpcode::G_FSIN: - case TargetOpcode::G_FLOG10: - case TargetOpcode::G_FLOG: - case TargetOpcode::G_FLOG2: - case TargetOpcode::G_FSQRT: - case TargetOpcode::G_FABS: - case TargetOpcode::G_FEXP: - case TargetOpcode::G_FRINT: - case TargetOpcode::G_INTRINSIC_TRUNC: - case TargetOpcode::G_INTRINSIC_ROUND: - case TargetOpcode::G_INTRINSIC_ROUNDEVEN: - case TargetOpcode::G_FMAXNUM: - case TargetOpcode::G_FMINNUM: - case TargetOpcode::G_FMAXIMUM: - case TargetOpcode::G_FMINIMUM: - return true; - } - return false; -} - // TODO: Make this more like AArch64? bool RISCVRegisterBankInfo::hasFPConstraints( const MachineInstr &MI, const MachineRegisterInfo &MRI, diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp index 173995f..d93709a 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -326,8 +326,8 @@ bool RISCVExpandPseudo::expandRV32ZdinxStore(MachineBasicBlock &MBB, .setMemRefs(MMOLo); if (MBBI->getOperand(2).isGlobal() || MBBI->getOperand(2).isCPI()) { - // FIXME: Zdinx RV32 can not work on unaligned memory. - assert(!STI->hasFastUnalignedAccess()); + // FIXME: Zdinx RV32 can not work on unaligned scalar memory. + assert(!STI->enableUnalignedScalarMem()); assert(MBBI->getOperand(2).getOffset() % 8 == 0); MBBI->getOperand(2).setOffset(MBBI->getOperand(2).getOffset() + 4); diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 794455a..f830ead 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -208,6 +208,13 @@ def HasStdExtAOrZalrsc "'A' (Atomic Instructions) or " "'Zalrsc' (Load-Reserved/Store-Conditional)">; +def FeatureStdExtZama16b + : SubtargetFeature<"zama16b", "HasStdExtZama16b", "true", + "'Zama16b' (Atomic 16-byte misaligned loads, stores and AMOs)">; +def HasStdExtZama16b : Predicate<"Subtarget->hasStdExtZama16b()">, + AssemblerPredicate<(all_of FeatureStdExtZama16b), + "'Zama16b' (Atomic 16-byte misaligned loads, stores and AMOs)">; + def FeatureStdExtZawrs : SubtargetFeature<"zawrs", "HasStdExtZawrs", "true", "'Zawrs' (Wait on Reservation Set)">; def HasStdExtZawrs : Predicate<"Subtarget->hasStdExtZawrs()">, @@ -1183,10 +1190,15 @@ def FeatureTrailingSeqCstFence : SubtargetFeature<"seq-cst-trailing-fence", "true", "Enable trailing fence for seq-cst store.">; -def FeatureFastUnalignedAccess - : SubtargetFeature<"fast-unaligned-access", "HasFastUnalignedAccess", - "true", "Has reasonably performant unaligned " - "loads and stores (both scalar and vector)">; +def FeatureUnalignedScalarMem + : SubtargetFeature<"unaligned-scalar-mem", "EnableUnalignedScalarMem", + "true", "Has reasonably performant unaligned scalar " + "loads and stores">; + +def FeatureUnalignedVectorMem + : SubtargetFeature<"unaligned-vector-mem", "EnableUnalignedVectorMem", + "true", "Has reasonably performant unaligned vector " + "loads and stores">; def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler", "UsePostRAScheduler", "true", "Schedule again after register allocation">; @@ -1226,9 +1238,9 @@ def TuneNoSinkSplatOperands "false", "Disable sink splat operands to enable .vx, .vf," ".wx, and .wf instructions">; -def TuneNoStripWSuffix - : SubtargetFeature<"no-strip-w-suffix", "EnableStripWSuffix", "false", - "Disable strip W suffix">; +def TunePreferWInst + : SubtargetFeature<"prefer-w-inst", "PreferWInst", "true", + "Prefer instructions with W suffix">; def TuneConditionalCompressedMoveFusion : SubtargetFeature<"conditional-cmv-fusion", "HasConditionalCompressedMoveFusion", diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 71672ed..cb41577 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -435,6 +435,33 @@ void RISCVFrameLowering::adjustStackForRVV(MachineFunction &MF, Flag, getStackAlign()); } +static void appendScalableVectorExpression(const TargetRegisterInfo &TRI, + SmallVectorImpl<char> &Expr, + int FixedOffset, int ScalableOffset, + llvm::raw_string_ostream &Comment) { + unsigned DwarfVLenB = TRI.getDwarfRegNum(RISCV::VLENB, true); + uint8_t Buffer[16]; + if (FixedOffset) { + Expr.push_back(dwarf::DW_OP_consts); + Expr.append(Buffer, Buffer + encodeSLEB128(FixedOffset, Buffer)); + Expr.push_back((uint8_t)dwarf::DW_OP_plus); + Comment << (FixedOffset < 0 ? " - " : " + ") << std::abs(FixedOffset); + } + + Expr.push_back((uint8_t)dwarf::DW_OP_consts); + Expr.append(Buffer, Buffer + encodeSLEB128(ScalableOffset, Buffer)); + + Expr.push_back((uint8_t)dwarf::DW_OP_bregx); + Expr.append(Buffer, Buffer + encodeULEB128(DwarfVLenB, Buffer)); + Expr.push_back(0); + + Expr.push_back((uint8_t)dwarf::DW_OP_mul); + Expr.push_back((uint8_t)dwarf::DW_OP_plus); + + Comment << (ScalableOffset < 0 ? " - " : " + ") << std::abs(ScalableOffset) + << " * vlenb"; +} + static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, Register Reg, uint64_t FixedOffset, @@ -452,30 +479,38 @@ static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, else Comment << printReg(Reg, &TRI); - uint8_t buffer[16]; - if (FixedOffset) { - Expr.push_back(dwarf::DW_OP_consts); - Expr.append(buffer, buffer + encodeSLEB128(FixedOffset, buffer)); - Expr.push_back((uint8_t)dwarf::DW_OP_plus); - Comment << " + " << FixedOffset; - } + appendScalableVectorExpression(TRI, Expr, FixedOffset, ScalableOffset, + Comment); - Expr.push_back((uint8_t)dwarf::DW_OP_consts); - Expr.append(buffer, buffer + encodeSLEB128(ScalableOffset, buffer)); + SmallString<64> DefCfaExpr; + uint8_t Buffer[16]; + DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression); + DefCfaExpr.append(Buffer, Buffer + encodeULEB128(Expr.size(), Buffer)); + DefCfaExpr.append(Expr.str()); - unsigned DwarfVlenb = TRI.getDwarfRegNum(RISCV::VLENB, true); - Expr.push_back((uint8_t)dwarf::DW_OP_bregx); - Expr.append(buffer, buffer + encodeULEB128(DwarfVlenb, buffer)); - Expr.push_back(0); + return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(), + Comment.str()); +} - Expr.push_back((uint8_t)dwarf::DW_OP_mul); - Expr.push_back((uint8_t)dwarf::DW_OP_plus); +static MCCFIInstruction createDefCFAOffset(const TargetRegisterInfo &TRI, + Register Reg, uint64_t FixedOffset, + uint64_t ScalableOffset) { + assert(ScalableOffset != 0 && "Did not need to adjust CFA for RVV"); + SmallString<64> Expr; + std::string CommentBuffer; + llvm::raw_string_ostream Comment(CommentBuffer); + Comment << printReg(Reg, &TRI) << " @ cfa"; - Comment << " + " << ScalableOffset << " * vlenb"; + // Build up the expression (FixedOffset + ScalableOffset * VLENB). + appendScalableVectorExpression(TRI, Expr, FixedOffset, ScalableOffset, + Comment); SmallString<64> DefCfaExpr; - DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression); - DefCfaExpr.append(buffer, buffer + encodeULEB128(Expr.size(), buffer)); + uint8_t Buffer[16]; + unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true); + DefCfaExpr.push_back(dwarf::DW_CFA_expression); + DefCfaExpr.append(Buffer, Buffer + encodeULEB128(DwarfReg, Buffer)); + DefCfaExpr.append(Buffer, Buffer + encodeULEB128(Expr.size(), Buffer)); DefCfaExpr.append(Expr.str()); return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(), @@ -671,6 +706,9 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, .addCFIIndex(CFIIndex) .setMIFlag(MachineInstr::FrameSetup); } + + std::advance(MBBI, getRVVCalleeSavedInfo(MF, CSI).size()); + emitCalleeSavedRVVPrologCFI(MBB, MBBI, hasFP(MF)); } if (hasFP(MF)) { @@ -1492,6 +1530,41 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters( return true; } +void RISCVFrameLowering::emitCalleeSavedRVVPrologCFI( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, bool HasFP) const { + MachineFunction *MF = MBB.getParent(); + const MachineFrameInfo &MFI = MF->getFrameInfo(); + RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>(); + const TargetInstrInfo &TII = *STI.getInstrInfo(); + DebugLoc DL = MBB.findDebugLoc(MI); + + const auto &RVVCSI = getRVVCalleeSavedInfo(*MF, MFI.getCalleeSavedInfo()); + if (RVVCSI.empty()) + return; + + uint64_t FixedSize = getStackSizeWithRVVPadding(*MF); + if (!HasFP) { + uint64_t ScalarLocalVarSize = + MFI.getStackSize() - RVFI->getCalleeSavedStackSize() - + RVFI->getRVPushStackSize() - RVFI->getVarArgsSaveSize() + + RVFI->getRVVPadding(); + FixedSize -= ScalarLocalVarSize; + } + + for (auto &CS : RVVCSI) { + // Insert the spill to the stack frame. + int FI = CS.getFrameIdx(); + if (FI >= 0 && MFI.getStackID(FI) == TargetStackID::ScalableVector) { + unsigned CFIIndex = MF->addFrameInst( + createDefCFAOffset(*STI.getRegisterInfo(), CS.getReg(), -FixedSize, + MFI.getObjectOffset(FI) / 8)); + BuildMI(MBB, MI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlag(MachineInstr::FrameSetup); + } + } +} + bool RISCVFrameLowering::restoreCalleeSavedRegisters( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h index 210f8c1..28ab4af 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h @@ -88,6 +88,9 @@ private: void adjustStackForRVV(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int64_t Amount, MachineInstr::MIFlag Flag) const; + void emitCalleeSavedRVVPrologCFI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + bool HasFP) const; std::pair<int64_t, Align> assignRVVStackObjectOffsets(MachineFunction &MF) const; }; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 5a57200..b0deb1d 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/VectorUtils.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -1484,6 +1485,11 @@ bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT, return VF > MaxVF || !isPowerOf2_32(VF); } +bool RISCVTargetLowering::shouldExpandCttzElements(EVT VT) const { + return !Subtarget.hasVInstructions() || + VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT); +} + bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, @@ -1918,7 +1924,7 @@ bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, // replace. If we don't support unaligned scalar mem, prefer the constant // pool. // TODO: Can the caller pass down the alignment? - if (!Subtarget.hasFastUnalignedAccess()) + if (!Subtarget.enableUnalignedScalarMem()) return true; // Prefer to keep the load if it would require many instructions. @@ -8718,6 +8724,29 @@ static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res); } +static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + SDValue Op0 = N->getOperand(1); + MVT OpVT = Op0.getSimpleValueType(); + MVT ContainerVT = OpVT; + if (OpVT.isFixedLengthVector()) { + ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget); + Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget); + } + MVT XLenVT = Subtarget.getXLenVT(); + SDLoc DL(N); + auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget); + SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL); + if (isOneConstant(N->getOperand(2))) + return Res; + + // Convert -1 to VL. + SDValue Setcc = + DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT); + VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount()); + return DAG.getSelect(DL, XLenVT, Setcc, VL, Res); +} + static inline void promoteVCIXScalar(const SDValue &Op, SmallVectorImpl<SDValue> &Operands, SelectionDAG &DAG) { @@ -8913,6 +8942,8 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, } case Intrinsic::experimental_get_vector_length: return lowerGetVectorLength(Op.getNode(), DAG, Subtarget); + case Intrinsic::experimental_cttz_elts: + return lowerCttzElts(Op.getNode(), DAG, Subtarget); case Intrinsic::riscv_vmv_x_s: { SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1)); return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res); @@ -10403,14 +10434,10 @@ RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op, if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() && getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) { MachineMemOperand *MMO = Load->getMemOperand(); - MachineFunction &MF = DAG.getMachineFunction(); - MMO = MF.getMachineMemOperand( - MMO, MMO->getPointerInfo(), - MMO->getMemoryType().isValid() - ? LLT::scalable_vector(1, MMO->getMemoryType().getSizeInBits()) - : MMO->getMemoryType()); SDValue NewLoad = - DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(), MMO); + DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(), + MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(), + MMO->getAAInfo(), MMO->getRanges()); SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget); return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL); } @@ -10470,14 +10497,9 @@ RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op, if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() && getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) { MachineMemOperand *MMO = Store->getMemOperand(); - MachineFunction &MF = DAG.getMachineFunction(); - MMO = MF.getMachineMemOperand( - MMO, MMO->getPointerInfo(), - MMO->getMemoryType().isValid() - ? LLT::scalable_vector(1, MMO->getMemoryType().getSizeInBits()) - : MMO->getMemoryType()); return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(), - MMO); + MMO->getPointerInfo(), MMO->getBaseAlign(), + MMO->getFlags(), MMO->getAAInfo()); } SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, @@ -12336,6 +12358,12 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); return; } + case Intrinsic::experimental_cttz_elts: { + SDValue Res = lowerCttzElts(N, DAG, Subtarget); + Results.push_back( + DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res)); + return; + } case Intrinsic::riscv_orc_b: case Intrinsic::riscv_brev8: case Intrinsic::riscv_sha256sig0: @@ -13363,11 +13391,100 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); } -static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG) { +// Try to expand a scalar multiply to a faster sequence. +static SDValue expandMul(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const RISCVSubtarget &Subtarget) { + EVT VT = N->getValueType(0); - if (!VT.isVector()) + + // LI + MUL is usually smaller than the alternative sequence. + if (DAG.getMachineFunction().getFunction().hasMinSize()) return SDValue(); + if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) + return SDValue(); + + if (VT != Subtarget.getXLenVT()) + return SDValue(); + + if (!Subtarget.hasStdExtZba() && !Subtarget.hasVendorXTHeadBa()) + return SDValue(); + + ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (!CNode) + return SDValue(); + uint64_t MulAmt = CNode->getZExtValue(); + + // 3/5/9 * 2^N -> shXadd (sll X, C), (sll X, C) + // Matched in tablegen, avoid perturbing patterns. + for (uint64_t Divisor : {3, 5, 9}) + if (MulAmt % Divisor == 0 && isPowerOf2_64(MulAmt / Divisor)) + return SDValue(); + + // If this is a power 2 + 2/4/8, we can use a shift followed by a single + // shXadd. First check if this a sum of two power of 2s because that's + // easy. Then count how many zeros are up to the first bit. + if (isPowerOf2_64(MulAmt & (MulAmt - 1))) { + unsigned ScaleShift = llvm::countr_zero(MulAmt); + if (ScaleShift >= 1 && ScaleShift < 4) { + unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1))); + SDLoc DL(N); + SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant(ShiftAmt, DL, VT)); + SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant(ScaleShift, DL, VT)); + return DAG.getNode(ISD::ADD, DL, VT, Shift1, Shift2); + } + } + + // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x) + // Matched in tablegen, avoid perturbing patterns. + switch (MulAmt) { + case 11: + case 13: + case 19: + case 21: + case 25: + case 27: + case 29: + case 37: + case 41: + case 45: + case 73: + case 91: + return SDValue(); + default: + break; + } + + // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X)) + if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) { + unsigned ScaleShift = llvm::countr_zero(MulAmt - 1); + if (ScaleShift >= 1 && ScaleShift < 4) { + unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2))); + SDLoc DL(N); + SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant(ShiftAmt, DL, VT)); + SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant(ScaleShift, DL, VT)); + return DAG.getNode( + ISD::ADD, DL, VT, Shift1, + DAG.getNode(ISD::ADD, DL, VT, Shift2, N->getOperand(0))); + } + } + + return SDValue(); +} + + +static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const RISCVSubtarget &Subtarget) { + EVT VT = N->getValueType(0); + if (!VT.isVector()) + return expandMul(N, DAG, DCI, Subtarget); + SDLoc DL(N); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -15720,7 +15837,7 @@ static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, if (WiderElementSize > ST.getELen()/8) return false; - if (!ST.hasFastUnalignedAccess() && BaseAlign < WiderElementSize) + if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize) return false; for (unsigned i = 0; i < Index->getNumOperands(); i++) { @@ -15913,7 +16030,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, case ISD::MUL: if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) return V; - return performMULCombine(N, DAG); + return performMULCombine(N, DAG, DCI, Subtarget); case ISD::SDIV: case ISD::UDIV: case ISD::SREM: @@ -17642,8 +17759,7 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, static MachineBasicBlock *emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, - unsigned CVTXOpc, - unsigned CVTFOpc) { + unsigned CVTXOpc) { DebugLoc DL = MI.getDebugLoc(); const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); @@ -17674,6 +17790,85 @@ static MachineBasicBlock *emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, /*IsImp*/ true)); // Emit a VFCVT_F_X + RISCVII::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags); + unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm(); + // There is no E8 variant for VFCVT_F_X. + assert(Log2SEW >= 4); + // Since MI (VFROUND) isn't SEW specific, we cannot use a macro to make + // handling of different (LMUL, SEW) pairs easier because we need to pull the + // SEW immediate from MI, and that information is not avaliable during macro + // expansion. + unsigned CVTFOpc; + if (Log2SEW == 4) { + switch (LMul) { + case RISCVII::LMUL_1: + CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M1_E16_MASK; + break; + case RISCVII::LMUL_2: + CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M2_E16_MASK; + break; + case RISCVII::LMUL_4: + CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M4_E16_MASK; + break; + case RISCVII::LMUL_8: + CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M8_E16_MASK; + break; + case RISCVII::LMUL_F2: + CVTFOpc = RISCV::PseudoVFCVT_F_X_V_MF2_E16_MASK; + break; + case RISCVII::LMUL_F4: + CVTFOpc = RISCV::PseudoVFCVT_F_X_V_MF4_E16_MASK; + break; + case RISCVII::LMUL_F8: + case RISCVII::LMUL_RESERVED: + llvm_unreachable("Unexpected LMUL and SEW combination value for MI."); + } + } else if (Log2SEW == 5) { + switch (LMul) { + case RISCVII::LMUL_1: + CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M1_E32_MASK; + break; + case RISCVII::LMUL_2: + CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M2_E32_MASK; + break; + case RISCVII::LMUL_4: + CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M4_E32_MASK; + break; + case RISCVII::LMUL_8: + CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M8_E32_MASK; + break; + case RISCVII::LMUL_F2: + CVTFOpc = RISCV::PseudoVFCVT_F_X_V_MF2_E32_MASK; + break; + case RISCVII::LMUL_F4: + case RISCVII::LMUL_F8: + case RISCVII::LMUL_RESERVED: + llvm_unreachable("Unexpected LMUL and SEW combination value for MI."); + } + } else if (Log2SEW == 6) { + switch (LMul) { + case RISCVII::LMUL_1: + CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M1_E64_MASK; + break; + case RISCVII::LMUL_2: + CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M2_E64_MASK; + break; + case RISCVII::LMUL_4: + CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M4_E64_MASK; + break; + case RISCVII::LMUL_8: + CVTFOpc = RISCV::PseudoVFCVT_F_X_V_M8_E64_MASK; + break; + case RISCVII::LMUL_F2: + case RISCVII::LMUL_F4: + case RISCVII::LMUL_F8: + case RISCVII::LMUL_RESERVED: + llvm_unreachable("Unexpected LMUL and SEW combination value for MI."); + } + } else { + llvm_unreachable("Unexpected LMUL and SEW combination value for MI."); + } + BuildMI(*BB, MI, DL, TII.get(CVTFOpc)) .add(MI.getOperand(0)) .add(MI.getOperand(1)) @@ -17883,23 +18078,17 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, Subtarget); case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK: - return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK, - RISCV::PseudoVFCVT_F_X_V_M1_MASK); + return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK); case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK: - return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK, - RISCV::PseudoVFCVT_F_X_V_M2_MASK); + return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK); case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK: - return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK, - RISCV::PseudoVFCVT_F_X_V_M4_MASK); + return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK); case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK: - return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK, - RISCV::PseudoVFCVT_F_X_V_M8_MASK); + return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK); case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK: - return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK, - RISCV::PseudoVFCVT_F_X_V_MF2_MASK); + return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK); case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK: - return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK, - RISCV::PseudoVFCVT_F_X_V_MF4_MASK); + return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK); case RISCV::PseudoFROUND_H: case RISCV::PseudoFROUND_H_INX: case RISCV::PseudoFROUND_S: @@ -18078,33 +18267,12 @@ static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, return false; } -static unsigned allocateRVVReg(MVT ValVT, unsigned ValNo, - std::optional<unsigned> FirstMaskArgument, - CCState &State, const RISCVTargetLowering &TLI) { - const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT); - if (RC == &RISCV::VRRegClass) { - // Assign the first mask argument to V0. - // This is an interim calling convention and it may be changed in the - // future. - if (FirstMaskArgument && ValNo == *FirstMaskArgument) - return State.AllocateReg(RISCV::V0); - return State.AllocateReg(ArgVRs); - } - if (RC == &RISCV::VRM2RegClass) - return State.AllocateReg(ArgVRM2s); - if (RC == &RISCV::VRM4RegClass) - return State.AllocateReg(ArgVRM4s); - if (RC == &RISCV::VRM8RegClass) - return State.AllocateReg(ArgVRM8s); - llvm_unreachable("Unhandled register class for ValueType"); -} - // Implements the RISC-V calling convention. Returns true upon failure. bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, - std::optional<unsigned> FirstMaskArgument) { + RVVArgDispatcher &RVVDispatcher) { unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); assert(XLen == 32 || XLen == 64); MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; @@ -18273,7 +18441,7 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, else if (ValVT == MVT::f64 && !UseGPRForF64) Reg = State.AllocateReg(ArgFPR64s); else if (ValVT.isVector()) { - Reg = allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI); + Reg = RVVDispatcher.getNextPhysReg(); if (!Reg) { // For return values, the vector must be passed fully via registers or // via the stack. @@ -18359,9 +18527,15 @@ void RISCVTargetLowering::analyzeInputArgs( unsigned NumArgs = Ins.size(); FunctionType *FType = MF.getFunction().getFunctionType(); - std::optional<unsigned> FirstMaskArgument; - if (Subtarget.hasVInstructions()) - FirstMaskArgument = preAssignMask(Ins); + RVVArgDispatcher Dispatcher; + if (IsRet) { + Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(Ins)}; + } else { + SmallVector<Type *, 4> TypeList; + for (const Argument &Arg : MF.getFunction().args()) + TypeList.push_back(Arg.getType()); + Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(TypeList)}; + } for (unsigned i = 0; i != NumArgs; ++i) { MVT ArgVT = Ins[i].VT; @@ -18376,7 +18550,7 @@ void RISCVTargetLowering::analyzeInputArgs( RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this, - FirstMaskArgument)) { + Dispatcher)) { LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT << '\n'); llvm_unreachable(nullptr); @@ -18390,9 +18564,13 @@ void RISCVTargetLowering::analyzeOutputArgs( CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const { unsigned NumArgs = Outs.size(); - std::optional<unsigned> FirstMaskArgument; - if (Subtarget.hasVInstructions()) - FirstMaskArgument = preAssignMask(Outs); + SmallVector<Type *, 4> TypeList; + if (IsRet) + TypeList.push_back(MF.getFunction().getReturnType()); + else if (CLI) + for (const TargetLowering::ArgListEntry &Arg : CLI->getArgs()) + TypeList.push_back(Arg.Ty); + RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(TypeList)}; for (unsigned i = 0; i != NumArgs; i++) { MVT ArgVT = Outs[i].VT; @@ -18402,7 +18580,7 @@ void RISCVTargetLowering::analyzeOutputArgs( RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this, - FirstMaskArgument)) { + Dispatcher)) { LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT << "\n"); llvm_unreachable(nullptr); @@ -18583,7 +18761,7 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, - std::optional<unsigned> FirstMaskArgument) { + RVVArgDispatcher &RVVDispatcher) { if (LocVT == MVT::i32 || LocVT == MVT::i64) { if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); @@ -18661,13 +18839,14 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, } if (LocVT.isVector()) { - if (unsigned Reg = - allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI)) { + MCPhysReg AllocatedVReg = RVVDispatcher.getNextPhysReg(); + if (AllocatedVReg) { // Fixed-length vectors are located in the corresponding scalable-vector // container types. if (ValVT.isFixedLengthVector()) LocVT = TLI.getContainerForFixedLengthVector(LocVT); - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + State.addLoc( + CCValAssign::getReg(ValNo, ValVT, AllocatedVReg, LocVT, LocInfo)); } else { // Try and pass the address via a "fast" GPR. if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) { @@ -19295,17 +19474,15 @@ bool RISCVTargetLowering::CanLowerReturn( SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); - std::optional<unsigned> FirstMaskArgument; - if (Subtarget.hasVInstructions()) - FirstMaskArgument = preAssignMask(Outs); + RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(Outs)}; for (unsigned i = 0, e = Outs.size(); i != e; ++i) { MVT VT = Outs[i].VT; ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full, - ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr, - *this, FirstMaskArgument)) + ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, + nullptr, *this, Dispatcher)) return false; } return true; @@ -20486,8 +20663,8 @@ bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( unsigned *Fast) const { if (!VT.isVector()) { if (Fast) - *Fast = Subtarget.hasFastUnalignedAccess(); - return Subtarget.hasFastUnalignedAccess(); + *Fast = Subtarget.enableUnalignedScalarMem(); + return Subtarget.enableUnalignedScalarMem(); } // All vector implementations must support element alignment @@ -20503,8 +20680,8 @@ bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( // misaligned accesses. TODO: Work through the codegen implications of // allowing such accesses to be formed, and considered fast. if (Fast) - *Fast = Subtarget.hasFastUnalignedAccess(); - return Subtarget.hasFastUnalignedAccess(); + *Fast = Subtarget.enableUnalignedVectorMem(); + return Subtarget.enableUnalignedVectorMem(); } @@ -20539,7 +20716,7 @@ EVT RISCVTargetLowering::getOptimalMemOpType(const MemOp &Op, // Do we have sufficient alignment for our preferred VT? If not, revert // to largest size allowed by our alignment criteria. - if (PreferredVT != MVT::i8 && !Subtarget.hasFastUnalignedAccess()) { + if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) { Align RequiredAlign(PreferredVT.getStoreSize()); if (Op.isFixedDstAlign()) RequiredAlign = std::min(RequiredAlign, Op.getDstAlign()); @@ -20731,7 +20908,7 @@ bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType, if (!isLegalElementTypeForRVV(ScalarType)) return false; - if (!Subtarget.hasFastUnalignedAccess() && + if (!Subtarget.enableUnalignedVectorMem() && Alignment < ScalarType.getStoreSize()) return false; @@ -21102,6 +21279,181 @@ unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const { return Subtarget.getMinimumJumpTableEntries(); } +// Handle single arg such as return value. +template <typename Arg> +void RVVArgDispatcher::constructArgInfos(ArrayRef<Arg> ArgList) { + // This lambda determines whether an array of types are constructed by + // homogeneous vector types. + auto isHomogeneousScalableVectorType = [](ArrayRef<Arg> ArgList) { + // First, extract the first element in the argument type. + auto It = ArgList.begin(); + MVT FirstArgRegType = It->VT; + + // Return if there is no return or the type needs split. + if (It == ArgList.end() || It->Flags.isSplit()) + return false; + + ++It; + + // Return if this argument type contains only 1 element, or it's not a + // vector type. + if (It == ArgList.end() || !FirstArgRegType.isScalableVector()) + return false; + + // Second, check if the following elements in this argument type are all the + // same. + for (; It != ArgList.end(); ++It) + if (It->Flags.isSplit() || It->VT != FirstArgRegType) + return false; + + return true; + }; + + if (isHomogeneousScalableVectorType(ArgList)) { + // Handle as tuple type + RVVArgInfos.push_back({(unsigned)ArgList.size(), ArgList[0].VT, false}); + } else { + // Handle as normal vector type + bool FirstVMaskAssigned = false; + for (const auto &OutArg : ArgList) { + MVT RegisterVT = OutArg.VT; + + // Skip non-RVV register type + if (!RegisterVT.isVector()) + continue; + + if (RegisterVT.isFixedLengthVector()) + RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT); + + if (!FirstVMaskAssigned && RegisterVT.getVectorElementType() == MVT::i1) { + RVVArgInfos.push_back({1, RegisterVT, true}); + FirstVMaskAssigned = true; + continue; + } + + RVVArgInfos.push_back({1, RegisterVT, false}); + } + } +} + +// Handle multiple args. +template <> +void RVVArgDispatcher::constructArgInfos<Type *>(ArrayRef<Type *> TypeList) { + const DataLayout &DL = MF->getDataLayout(); + const Function &F = MF->getFunction(); + LLVMContext &Context = F.getContext(); + + bool FirstVMaskAssigned = false; + for (Type *Ty : TypeList) { + StructType *STy = dyn_cast<StructType>(Ty); + if (STy && STy->containsHomogeneousScalableVectorTypes()) { + Type *ElemTy = STy->getTypeAtIndex(0U); + EVT VT = TLI->getValueType(DL, ElemTy); + MVT RegisterVT = + TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT); + unsigned NumRegs = + TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT); + + RVVArgInfos.push_back( + {NumRegs * STy->getNumElements(), RegisterVT, false}); + } else { + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(*TLI, DL, Ty, ValueVTs); + + for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; + ++Value) { + EVT VT = ValueVTs[Value]; + MVT RegisterVT = + TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT); + unsigned NumRegs = + TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT); + + // Skip non-RVV register type + if (!RegisterVT.isVector()) + continue; + + if (RegisterVT.isFixedLengthVector()) + RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT); + + if (!FirstVMaskAssigned && + RegisterVT.getVectorElementType() == MVT::i1) { + RVVArgInfos.push_back({1, RegisterVT, true}); + FirstVMaskAssigned = true; + --NumRegs; + } + + RVVArgInfos.insert(RVVArgInfos.end(), NumRegs, {1, RegisterVT, false}); + } + } + } +} + +void RVVArgDispatcher::allocatePhysReg(unsigned NF, unsigned LMul, + unsigned StartReg) { + assert((StartReg % LMul) == 0 && + "Start register number should be multiple of lmul"); + const MCPhysReg *VRArrays; + switch (LMul) { + default: + report_fatal_error("Invalid lmul"); + case 1: + VRArrays = ArgVRs; + break; + case 2: + VRArrays = ArgVRM2s; + break; + case 4: + VRArrays = ArgVRM4s; + break; + case 8: + VRArrays = ArgVRM8s; + break; + } + + for (unsigned i = 0; i < NF; ++i) + if (StartReg) + AllocatedPhysRegs.push_back(VRArrays[(StartReg - 8) / LMul + i]); + else + AllocatedPhysRegs.push_back(MCPhysReg()); +} + +/// This function determines if each RVV argument is passed by register, if the +/// argument can be assigned to a VR, then give it a specific register. +/// Otherwise, assign the argument to 0 which is a invalid MCPhysReg. +void RVVArgDispatcher::compute() { + uint32_t AssignedMap = 0; + auto allocate = [&](const RVVArgInfo &ArgInfo) { + // Allocate first vector mask argument to V0. + if (ArgInfo.FirstVMask) { + AllocatedPhysRegs.push_back(RISCV::V0); + return; + } + + unsigned RegsNeeded = divideCeil( + ArgInfo.VT.getSizeInBits().getKnownMinValue(), RISCV::RVVBitsPerBlock); + unsigned TotalRegsNeeded = ArgInfo.NF * RegsNeeded; + for (unsigned StartReg = 0; StartReg + TotalRegsNeeded <= NumArgVRs; + StartReg += RegsNeeded) { + uint32_t Map = ((1 << TotalRegsNeeded) - 1) << StartReg; + if ((AssignedMap & Map) == 0) { + allocatePhysReg(ArgInfo.NF, RegsNeeded, StartReg + 8); + AssignedMap |= Map; + return; + } + } + + allocatePhysReg(ArgInfo.NF, RegsNeeded, 0); + }; + + for (unsigned i = 0; i < RVVArgInfos.size(); ++i) + allocate(RVVArgInfos[i]); +} + +MCPhysReg RVVArgDispatcher::getNextPhysReg() { + assert(CurIdx < AllocatedPhysRegs.size() && "Index out of range"); + return AllocatedPhysRegs[CurIdx++]; +} + namespace llvm::RISCVVIntrinsicsTable { #define GET_RISCVVIntrinsicsTable_IMPL diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index ace5b3f..b10da3d 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -24,6 +24,7 @@ namespace llvm { class InstructionCost; class RISCVSubtarget; struct RISCVRegisterInfo; +class RVVArgDispatcher; namespace RISCVISD { // clang-format off @@ -875,7 +876,7 @@ public: ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, - std::optional<unsigned> FirstMaskArgument); + RVVArgDispatcher &RVVDispatcher); private: void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo, @@ -986,6 +987,8 @@ private: bool shouldExpandGetVectorLength(EVT TripCountVT, unsigned VF, bool IsScalable) const override; + bool shouldExpandCttzElements(EVT VT) const override; + /// RVV code generation for fixed length vectors does not lower all /// BUILD_VECTORs. This makes BUILD_VECTOR legalisation a source of stores to /// merge. However, merging them creates a BUILD_VECTOR that is just as @@ -1015,19 +1018,71 @@ private: unsigned getMinimumJumpTableEntries() const override; }; +/// As per the spec, the rules for passing vector arguments are as follows: +/// +/// 1. For the first vector mask argument, use v0 to pass it. +/// 2. For vector data arguments or rest vector mask arguments, starting from +/// the v8 register, if a vector register group between v8-v23 that has not been +/// allocated can be found and the first register number is a multiple of LMUL, +/// then allocate this vector register group to the argument and mark these +/// registers as allocated. Otherwise, pass it by reference and are replaced in +/// the argument list with the address. +/// 3. For tuple vector data arguments, starting from the v8 register, if +/// NFIELDS consecutive vector register groups between v8-v23 that have not been +/// allocated can be found and the first register number is a multiple of LMUL, +/// then allocate these vector register groups to the argument and mark these +/// registers as allocated. Otherwise, pass it by reference and are replaced in +/// the argument list with the address. +class RVVArgDispatcher { +public: + static constexpr unsigned NumArgVRs = 16; + + struct RVVArgInfo { + unsigned NF; + MVT VT; + bool FirstVMask = false; + }; + + template <typename Arg> + RVVArgDispatcher(const MachineFunction *MF, const RISCVTargetLowering *TLI, + ArrayRef<Arg> ArgList) + : MF(MF), TLI(TLI) { + constructArgInfos(ArgList); + compute(); + } + + RVVArgDispatcher() = default; + + MCPhysReg getNextPhysReg(); + +private: + SmallVector<RVVArgInfo, 4> RVVArgInfos; + SmallVector<MCPhysReg, 4> AllocatedPhysRegs; + + const MachineFunction *MF = nullptr; + const RISCVTargetLowering *TLI = nullptr; + + unsigned CurIdx = 0; + + template <typename Arg> void constructArgInfos(ArrayRef<Arg> Ret); + void compute(); + void allocatePhysReg(unsigned NF = 1, unsigned LMul = 1, + unsigned StartReg = 0); +}; + namespace RISCV { bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, - std::optional<unsigned> FirstMaskArgument); + RVVArgDispatcher &RVVDispatcher); bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, - std::optional<unsigned> FirstMaskArgument); + RVVArgDispatcher &RVVDispatcher); bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index a14f9a2..aab91ad 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -468,6 +468,7 @@ public: bool isUnknown() const { return State == Unknown; } void setAVLReg(Register Reg) { + assert(Reg.isVirtual() || Reg == RISCV::X0 || Reg == RISCV::NoRegister); AVLReg = Reg; State = AVLIsReg; } @@ -1514,17 +1515,12 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI, // If the AVL is a register, we need to make sure MI's AVL dominates PrevMI. // For now just check that PrevMI uses the same virtual register. - if (AVL.isReg() && AVL.getReg() != RISCV::X0) { - if (AVL.getReg().isPhysical()) - return false; - if (!PrevAVL.isReg() || PrevAVL.getReg() != AVL.getReg()) - return false; - } + if (AVL.isReg() && AVL.getReg() != RISCV::X0 && + (!PrevAVL.isReg() || PrevAVL.getReg() != AVL.getReg())) + return false; } - if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm()) - return false; - + assert(PrevMI.getOperand(2).isImm() && MI.getOperand(2).isImm()); auto PriorVType = PrevMI.getOperand(2).getImm(); auto VType = MI.getOperand(2).getImm(); return areCompatibleVTYPEs(PriorVType, VType, Used); @@ -1545,9 +1541,9 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) { continue; } - Register VRegDef = MI.getOperand(0).getReg(); - if (VRegDef != RISCV::X0 && - !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef))) + Register RegDef = MI.getOperand(0).getReg(); + assert(RegDef == RISCV::X0 || RegDef.isVirtual()); + if (RegDef != RISCV::X0 && !MRI->use_nodbg_empty(RegDef)) Used.demandVL(); if (NextMI) { @@ -1555,7 +1551,9 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) { ToDelete.push_back(&MI); // Leave NextMI unchanged continue; - } else if (canMutatePriorConfig(MI, *NextMI, Used, *MRI)) { + } + + if (canMutatePriorConfig(MI, *NextMI, Used, *MRI)) { if (!isVLPreservingConfig(*NextMI)) { MI.getOperand(0).setReg(NextMI->getOperand(0).getReg()); MI.getOperand(0).setIsDead(false); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 508f607..8331fc0 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -361,15 +361,12 @@ void RISCVInstrInfo::copyPhysRegVector( return {RISCVII::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V, RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1}; }; - auto FindRegWithEncoding = [&TRI](const TargetRegisterClass &RegClass, - uint16_t Encoding) { - ArrayRef<MCPhysReg> Regs = RegClass.getRegisters(); - const auto *FoundReg = llvm::find_if(Regs, [&](MCPhysReg Reg) { - return TRI->getEncodingValue(Reg) == Encoding; - }); - // We should be always able to find one valid register. - assert(FoundReg != Regs.end()); - return *FoundReg; + auto FindRegWithEncoding = [TRI](const TargetRegisterClass &RegClass, + uint16_t Encoding) { + MCRegister Reg = RISCV::V0 + Encoding; + if (&RegClass == &RISCV::VRRegClass) + return Reg; + return TRI->getMatchingSuperReg(Reg, RISCV::sub_vrm1_0, &RegClass); }; while (I != NumRegs) { // For non-segment copying, we only do this once as the registers are always @@ -1986,7 +1983,7 @@ genShXAddAddShift(MachineInstr &Root, unsigned AddOpIdx, MRI.getUniqueVRegDef(AddMI->getOperand(AddOpIdx).getReg()); unsigned InnerShiftAmt = ShiftMI->getOperand(2).getImm(); - assert(InnerShiftAmt > OuterShiftAmt && "Unexpected shift amount"); + assert(InnerShiftAmt >= OuterShiftAmt && "Unexpected shift amount"); unsigned InnerOpc; switch (InnerShiftAmt - OuterShiftAmt) { @@ -2719,6 +2716,50 @@ std::string RISCVInstrInfo::createMIROperandComment( } // clang-format off +#define CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL) \ + RISCV::Pseudo##OP##_##LMUL + +#define CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL) \ + RISCV::Pseudo##OP##_##LMUL##_MASK + +#define CASE_RVV_OPCODE_LMUL(OP, LMUL) \ + CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL): \ + case CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL) + +#define CASE_RVV_OPCODE_UNMASK_WIDEN(OP) \ + CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF8): \ + case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF4): \ + case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF2): \ + case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M1): \ + case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M2): \ + case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M4) + +#define CASE_RVV_OPCODE_UNMASK(OP) \ + CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \ + case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M8) + +#define CASE_RVV_OPCODE_MASK_WIDEN(OP) \ + CASE_RVV_OPCODE_MASK_LMUL(OP, MF8): \ + case CASE_RVV_OPCODE_MASK_LMUL(OP, MF4): \ + case CASE_RVV_OPCODE_MASK_LMUL(OP, MF2): \ + case CASE_RVV_OPCODE_MASK_LMUL(OP, M1): \ + case CASE_RVV_OPCODE_MASK_LMUL(OP, M2): \ + case CASE_RVV_OPCODE_MASK_LMUL(OP, M4) + +#define CASE_RVV_OPCODE_MASK(OP) \ + CASE_RVV_OPCODE_MASK_WIDEN(OP): \ + case CASE_RVV_OPCODE_MASK_LMUL(OP, M8) + +#define CASE_RVV_OPCODE_WIDEN(OP) \ + CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \ + case CASE_RVV_OPCODE_MASK_WIDEN(OP) + +#define CASE_RVV_OPCODE(OP) \ + CASE_RVV_OPCODE_UNMASK(OP): \ + case CASE_RVV_OPCODE_MASK(OP) +// clang-format on + +// clang-format off #define CASE_VMA_OPCODE_COMMON(OP, TYPE, LMUL) \ RISCV::PseudoV##OP##_##TYPE##_##LMUL @@ -2798,6 +2839,28 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI, case RISCV::PseudoCCMOVGPR: // Operands 4 and 5 are commutable. return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 4, 5); + case CASE_RVV_OPCODE(VADD_VV): + case CASE_RVV_OPCODE(VAND_VV): + case CASE_RVV_OPCODE(VOR_VV): + case CASE_RVV_OPCODE(VXOR_VV): + case CASE_RVV_OPCODE_MASK(VMSEQ_VV): + case CASE_RVV_OPCODE_MASK(VMSNE_VV): + case CASE_RVV_OPCODE(VMIN_VV): + case CASE_RVV_OPCODE(VMINU_VV): + case CASE_RVV_OPCODE(VMAX_VV): + case CASE_RVV_OPCODE(VMAXU_VV): + case CASE_RVV_OPCODE(VMUL_VV): + case CASE_RVV_OPCODE(VMULH_VV): + case CASE_RVV_OPCODE(VMULHU_VV): + case CASE_RVV_OPCODE_WIDEN(VWADD_VV): + case CASE_RVV_OPCODE_WIDEN(VWADDU_VV): + case CASE_RVV_OPCODE_WIDEN(VWMUL_VV): + case CASE_RVV_OPCODE_WIDEN(VWMULU_VV): + case CASE_RVV_OPCODE_WIDEN(VWMACC_VV): + case CASE_RVV_OPCODE_WIDEN(VWMACCU_VV): + case CASE_RVV_OPCODE_UNMASK(VADC_VVM): + // Operands 2 and 3 are commutable. + return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3); case CASE_VFMA_SPLATS(FMADD): case CASE_VFMA_SPLATS(FMSUB): case CASE_VFMA_SPLATS(FMACC): @@ -2950,7 +3013,7 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI, CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE, SEW) #define CASE_VFMA_CHANGE_OPCODE_VV(OLDOP, NEWOP) \ - CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VV, E16) \ + CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VV, E16) \ CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VV, E32) \ CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VV, E64) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 0dc466f..cd5caa4 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -143,22 +143,24 @@ class PseudoToVInst<string PseudoInst> { // This class describes information associated to the LMUL. class LMULInfo<int lmul, int oct, VReg regclass, VReg wregclass, - VReg f2regclass, VReg f4regclass, VReg f8regclass, string mx> { + VReg f2regclass, VReg f4regclass, VReg f8regclass, string mx, + VReg moutregclass = VMM1> { bits<3> value = lmul; // This is encoded as the vlmul field of vtype. VReg vrclass = regclass; VReg wvrclass = wregclass; VReg f8vrclass = f8regclass; VReg f4vrclass = f4regclass; VReg f2vrclass = f2regclass; + VReg moutclass = moutregclass; string MX = mx; int octuple = oct; } // Associate LMUL with tablegen records of register classes. def V_M1 : LMULInfo<0b000, 8, VR, VRM2, VR, VR, VR, "M1">; -def V_M2 : LMULInfo<0b001, 16, VRM2, VRM4, VR, VR, VR, "M2">; -def V_M4 : LMULInfo<0b010, 32, VRM4, VRM8, VRM2, VR, VR, "M4">; -def V_M8 : LMULInfo<0b011, 64, VRM8,/*NoVReg*/VR, VRM4, VRM2, VR, "M8">; +def V_M2 : LMULInfo<0b001, 16, VRM2, VRM4, VR, VR, VR, "M2", VMM2>; +def V_M4 : LMULInfo<0b010, 32, VRM4, VRM8, VRM2, VR, VR, "M4", VMM4>; +def V_M8 : LMULInfo<0b011, 64, VRM8,/*NoVReg*/VR, VRM4, VRM2, VR, "M8", VMM8>; def V_MF8 : LMULInfo<0b101, 1, VR, VR,/*NoVReg*/VR,/*NoVReg*/VR,/*NoVReg*/VR, "MF8">; def V_MF4 : LMULInfo<0b110, 2, VR, VR, VR,/*NoVReg*/VR,/*NoVReg*/VR, "MF4">; @@ -2127,8 +2129,9 @@ multiclass VPseudoBinary<VReg RetClass, LMULInfo MInfo, string Constraint = "", int sew = 0, - int TargetConstraintType = 1> { - let VLMul = MInfo.value, SEW=sew in { + int TargetConstraintType = 1, + bit Commutable = 0> { + let VLMul = MInfo.value, SEW=sew, isCommutable = Commutable in { defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX); def suffix : VPseudoBinaryNoMaskTU<RetClass, Op1Class, Op2Class, Constraint, TargetConstraintType>; @@ -2167,8 +2170,9 @@ multiclass VPseudoBinaryM<VReg RetClass, DAGOperand Op2Class, LMULInfo MInfo, string Constraint = "", - int TargetConstraintType = 1> { - let VLMul = MInfo.value in { + int TargetConstraintType = 1, + bit Commutable = 0> { + let VLMul = MInfo.value, isCommutable = Commutable in { def "_" # MInfo.MX : VPseudoBinaryMOutNoMask<RetClass, Op1Class, Op2Class, Constraint, TargetConstraintType>; let ForceTailAgnostic = true in @@ -2226,8 +2230,8 @@ multiclass VPseudoTiedBinaryRoundingMode<VReg RetClass, } -multiclass VPseudoBinaryV_VV<LMULInfo m, string Constraint = "", int sew = 0> { - defm _VV : VPseudoBinary<m.vrclass, m.vrclass, m.vrclass, m, Constraint, sew>; +multiclass VPseudoBinaryV_VV<LMULInfo m, string Constraint = "", int sew = 0, bit Commutable = 0> { + defm _VV : VPseudoBinary<m.vrclass, m.vrclass, m.vrclass, m, Constraint, sew, Commutable=Commutable>; } multiclass VPseudoBinaryV_VV_RM<LMULInfo m, string Constraint = ""> { @@ -2331,9 +2335,10 @@ multiclass VPseudoVALU_MM<bit Commutable = 0> { // * The destination EEW is greater than the source EEW, the source EMUL is // at least 1, and the overlap is in the highest-numbered part of the // destination register group is legal. Otherwise, it is illegal. -multiclass VPseudoBinaryW_VV<LMULInfo m> { +multiclass VPseudoBinaryW_VV<LMULInfo m, bit Commutable = 0> { defm _VV : VPseudoBinary<m.wvrclass, m.vrclass, m.vrclass, m, - "@earlyclobber $rd", TargetConstraintType=3>; + "@earlyclobber $rd", TargetConstraintType=3, + Commutable=Commutable>; } multiclass VPseudoBinaryW_VV_RM<LMULInfo m, int sew = 0> { @@ -2453,7 +2458,9 @@ multiclass VPseudoBinaryV_VM<LMULInfo m, bit CarryOut = 0, bit CarryIn = 1, m.vrclass, m.vrclass, m, CarryIn, Constraint, TargetConstraintType>; } -multiclass VPseudoTiedBinaryV_VM<LMULInfo m, int TargetConstraintType = 1> { +multiclass VPseudoTiedBinaryV_VM<LMULInfo m, int TargetConstraintType = 1, + bit Commutable = 0> { + let isCommutable = Commutable in def "_VVM" # "_" # m.MX: VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R, m.vrclass, m.vrclass, m, 1, "", @@ -2667,26 +2674,24 @@ multiclass PseudoVEXT_VF8 { // lowest-numbered part of the source register group". // With LMUL<=1 the source and dest occupy a single register so any overlap // is in the lowest-numbered part. -multiclass VPseudoBinaryM_VV<LMULInfo m, int TargetConstraintType = 1> { - defm _VV : VPseudoBinaryM<VR, m.vrclass, m.vrclass, m, - !if(!ge(m.octuple, 16), "@earlyclobber $rd", ""), TargetConstraintType>; +multiclass VPseudoBinaryM_VV<LMULInfo m, int TargetConstraintType = 1, + bit Commutable = 0> { + defm _VV : VPseudoBinaryM<m.moutclass, m.vrclass, m.vrclass, m, "", + TargetConstraintType, Commutable=Commutable>; } multiclass VPseudoBinaryM_VX<LMULInfo m, int TargetConstraintType = 1> { defm "_VX" : - VPseudoBinaryM<VR, m.vrclass, GPR, m, - !if(!ge(m.octuple, 16), "@earlyclobber $rd", ""), TargetConstraintType>; + VPseudoBinaryM<m.moutclass, m.vrclass, GPR, m, "", TargetConstraintType>; } multiclass VPseudoBinaryM_VF<LMULInfo m, FPR_Info f, int TargetConstraintType = 1> { defm "_V" # f.FX : - VPseudoBinaryM<VR, m.vrclass, f.fprclass, m, - !if(!ge(m.octuple, 16), "@earlyclobber $rd", ""), TargetConstraintType>; + VPseudoBinaryM<m.moutclass, m.vrclass, f.fprclass, m, "", TargetConstraintType>; } multiclass VPseudoBinaryM_VI<LMULInfo m, int TargetConstraintType = 1> { - defm _VI : VPseudoBinaryM<VR, m.vrclass, simm5, m, - !if(!ge(m.octuple, 16), "@earlyclobber $rd", ""), TargetConstraintType>; + defm _VI : VPseudoBinaryM<m.moutclass, m.vrclass, simm5, m, "", TargetConstraintType>; } multiclass VPseudoVGTR_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> { @@ -2751,10 +2756,11 @@ multiclass VPseudoVSSHT_VV_VX_VI_RM<Operand ImmType = simm5, string Constraint = } } -multiclass VPseudoVALU_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> { +multiclass VPseudoVALU_VV_VX_VI<Operand ImmType = simm5, string Constraint = "", + bit Commutable = 0> { foreach m = MxList in { defvar mx = m.MX; - defm "" : VPseudoBinaryV_VV<m, Constraint>, + defm "" : VPseudoBinaryV_VV<m, Constraint, Commutable=Commutable>, SchedBinary<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", mx, forceMergeOpRead=true>; defm "" : VPseudoBinaryV_VX<m, Constraint>, @@ -2804,17 +2810,17 @@ multiclass VPseudoVAALU_VV_VX_RM { multiclass VPseudoVMINMAX_VV_VX { foreach m = MxList in { defvar mx = m.MX; - defm "" : VPseudoBinaryV_VV<m>, + defm "" : VPseudoBinaryV_VV<m, Commutable=1>, SchedBinary<"WriteVIMinMaxV", "ReadVIMinMaxV", "ReadVIMinMaxV", mx>; defm "" : VPseudoBinaryV_VX<m>, SchedBinary<"WriteVIMinMaxX", "ReadVIMinMaxV", "ReadVIMinMaxX", mx>; } } -multiclass VPseudoVMUL_VV_VX { +multiclass VPseudoVMUL_VV_VX<bit Commutable = 0> { foreach m = MxList in { defvar mx = m.MX; - defm "" : VPseudoBinaryV_VV<m>, + defm "" : VPseudoBinaryV_VV<m, Commutable=Commutable>, SchedBinary<"WriteVIMulV", "ReadVIMulV", "ReadVIMulV", mx>; defm "" : VPseudoBinaryV_VX<m>, SchedBinary<"WriteVIMulX", "ReadVIMulV", "ReadVIMulX", mx>; @@ -2894,32 +2900,34 @@ multiclass VPseudoVALU_VV_VX { multiclass VPseudoVSGNJ_VV_VF { foreach m = MxListF in { - defm "" : VPseudoBinaryFV_VV<m>, + foreach e = SchedSEWSet<m.MX, isF=1>.val in + defm "" : VPseudoBinaryFV_VV<m, sew=e>, SchedBinary<"WriteVFSgnjV", "ReadVFSgnjV", "ReadVFSgnjV", m.MX, - forceMergeOpRead=true>; + e, forceMergeOpRead=true>; } foreach f = FPList in { foreach m = f.MxList in { - defm "" : VPseudoBinaryV_VF<m, f>, + defm "" : VPseudoBinaryV_VF<m, f, sew=f.SEW>, SchedBinary<"WriteVFSgnjF", "ReadVFSgnjV", "ReadVFSgnjF", m.MX, - forceMergeOpRead=true>; + f.SEW, forceMergeOpRead=true>; } } } multiclass VPseudoVMAX_VV_VF { foreach m = MxListF in { - defm "" : VPseudoBinaryFV_VV<m>, - SchedBinary<"WriteVFMinMaxV", "ReadVFMinMaxV", "ReadVFMinMaxV", m.MX, - forceMergeOpRead=true>; + foreach e = SchedSEWSet<m.MX, isF=1>.val in + defm "" : VPseudoBinaryFV_VV<m, sew=e>, + SchedBinary<"WriteVFMinMaxV", "ReadVFMinMaxV", "ReadVFMinMaxV", + m.MX, e, forceMergeOpRead=true>; } foreach f = FPList in { foreach m = f.MxList in { - defm "" : VPseudoBinaryV_VF<m, f>, - SchedBinary<"WriteVFMinMaxF", "ReadVFMinMaxV", "ReadVFMinMaxF", m.MX, - forceMergeOpRead=true>; + defm "" : VPseudoBinaryV_VF<m, f, sew=f.SEW>, + SchedBinary<"WriteVFMinMaxF", "ReadVFMinMaxV", "ReadVFMinMaxF", + m.MX, f.SEW, forceMergeOpRead=true>; } } } @@ -2962,10 +2970,10 @@ multiclass VPseudoVALU_VX_VI<Operand ImmType = simm5> { } } -multiclass VPseudoVWALU_VV_VX { +multiclass VPseudoVWALU_VV_VX<bit Commutable = 0> { foreach m = MxListW in { defvar mx = m.MX; - defm "" : VPseudoBinaryW_VV<m>, + defm "" : VPseudoBinaryW_VV<m, Commutable=Commutable>, SchedBinary<"WriteVIWALUV", "ReadVIWALUV", "ReadVIWALUV", mx, forceMergeOpRead=true>; defm "" : VPseudoBinaryW_VX<m>, @@ -2974,10 +2982,10 @@ multiclass VPseudoVWALU_VV_VX { } } -multiclass VPseudoVWMUL_VV_VX { +multiclass VPseudoVWMUL_VV_VX<bit Commutable = 0> { foreach m = MxListW in { defvar mx = m.MX; - defm "" : VPseudoBinaryW_VV<m>, + defm "" : VPseudoBinaryW_VV<m, Commutable=Commutable>, SchedBinary<"WriteVIWMulV", "ReadVIWMulV", "ReadVIWMulV", mx, forceMergeOpRead=true>; defm "" : VPseudoBinaryW_VX<m>, @@ -3072,7 +3080,7 @@ multiclass VPseudoVMRG_VM_XM_IM { multiclass VPseudoVCALU_VM_XM_IM { foreach m = MxList in { defvar mx = m.MX; - defm "" : VPseudoTiedBinaryV_VM<m>, + defm "" : VPseudoTiedBinaryV_VM<m, Commutable=1>, SchedBinary<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV", mx, forceMergeOpRead=true>; defm "" : VPseudoTiedBinaryV_XM<m>, @@ -3285,10 +3293,10 @@ multiclass VPseudoTernaryV_VF_AAXA_RM<LMULInfo m, FPR_Info f, sew, Commutable=1>; } -multiclass VPseudoTernaryW_VV<LMULInfo m> { +multiclass VPseudoTernaryW_VV<LMULInfo m, bit Commutable = 0> { defvar constraint = "@earlyclobber $rd"; defm _VV : VPseudoTernaryWithPolicy<m.wvrclass, m.vrclass, m.vrclass, m, - constraint, /*Commutable*/ 0, TargetConstraintType=3>; + constraint, Commutable=Commutable, TargetConstraintType=3>; } multiclass VPseudoTernaryW_VV_RM<LMULInfo m, int sew = 0> { @@ -3378,10 +3386,10 @@ multiclass VPseudoVSLD_VX_VI<Operand ImmType = simm5, string Constraint = ""> { } } -multiclass VPseudoVWMAC_VV_VX { +multiclass VPseudoVWMAC_VV_VX<bit Commutable = 0> { foreach m = MxListW in { defvar mx = m.MX; - defm "" : VPseudoTernaryW_VV<m>, + defm "" : VPseudoTernaryW_VV<m, Commutable=Commutable>, SchedTernary<"WriteVIWMulAddV", "ReadVIWMulAddV", "ReadVIWMulAddV", "ReadVIWMulAddV", mx>; defm "" : VPseudoTernaryW_VX<m>, @@ -3434,10 +3442,10 @@ multiclass VPseudoVWMAC_VV_VF_BF_RM { } } -multiclass VPseudoVCMPM_VV_VX_VI { +multiclass VPseudoVCMPM_VV_VX_VI<bit Commutable = 0> { foreach m = MxList in { defvar mx = m.MX; - defm "" : VPseudoBinaryM_VV<m, TargetConstraintType=2>, + defm "" : VPseudoBinaryM_VV<m, TargetConstraintType=2, Commutable=Commutable>, SchedBinary<"WriteVICmpV", "ReadVICmpV", "ReadVICmpV", mx>; defm "" : VPseudoBinaryM_VX<m, TargetConstraintType=2>, SchedBinary<"WriteVICmpX", "ReadVICmpV", "ReadVICmpX", mx>; @@ -3580,12 +3588,14 @@ multiclass VPseudoConversion<VReg RetClass, VReg Op1Class, LMULInfo MInfo, string Constraint = "", + int sew = 0, int TargetConstraintType = 1> { + defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX); let VLMul = MInfo.value in { - def "_" # MInfo.MX : VPseudoUnaryNoMask<RetClass, Op1Class, Constraint, TargetConstraintType>; - def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMask<RetClass, Op1Class, - Constraint, TargetConstraintType>, - RISCVMaskedPseudo<MaskIdx=2>; + def suffix : VPseudoUnaryNoMask<RetClass, Op1Class, Constraint, TargetConstraintType>; + def suffix # "_MASK" : VPseudoUnaryMask<RetClass, Op1Class, + Constraint, TargetConstraintType>, + RISCVMaskedPseudo<MaskIdx=2>; } } @@ -3593,12 +3603,15 @@ multiclass VPseudoConversionRoundingMode<VReg RetClass, VReg Op1Class, LMULInfo MInfo, string Constraint = "", + int sew = 0, int TargetConstraintType = 1> { let VLMul = MInfo.value in { - def "_" # MInfo.MX : VPseudoUnaryNoMaskRoundingMode<RetClass, Op1Class, Constraint, TargetConstraintType>; - def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMaskRoundingMode<RetClass, Op1Class, - Constraint, TargetConstraintType>, - RISCVMaskedPseudo<MaskIdx=2>; + defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX); + def suffix : VPseudoUnaryNoMaskRoundingMode<RetClass, Op1Class, Constraint, TargetConstraintType>; + def suffix # "_MASK" : VPseudoUnaryMaskRoundingMode<RetClass, Op1Class, + Constraint, + TargetConstraintType>, + RISCVMaskedPseudo<MaskIdx=2>; } } @@ -3607,13 +3620,15 @@ multiclass VPseudoConversionRM<VReg RetClass, VReg Op1Class, LMULInfo MInfo, string Constraint = "", + int sew = 0, int TargetConstraintType = 1> { let VLMul = MInfo.value in { - def "_" # MInfo.MX : VPseudoUnaryNoMask_FRM<RetClass, Op1Class, - Constraint, TargetConstraintType>; - def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMask_FRM<RetClass, Op1Class, - Constraint, TargetConstraintType>, - RISCVMaskedPseudo<MaskIdx=2>; + defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX); + def suffix : VPseudoUnaryNoMask_FRM<RetClass, Op1Class, + Constraint, TargetConstraintType>; + def suffix # "_MASK" : VPseudoUnaryMask_FRM<RetClass, Op1Class, + Constraint, TargetConstraintType>, + RISCVMaskedPseudo<MaskIdx=2>; } } @@ -3660,17 +3675,19 @@ multiclass VPseudoVFROUND_NOEXCEPT_V { multiclass VPseudoVCVTF_V_RM { foreach m = MxListF in { - defm _V : VPseudoConversionRoundingMode<m.vrclass, m.vrclass, m>, - SchedUnary<"WriteVFCvtIToFV", "ReadVFCvtIToFV", m.MX, - forceMergeOpRead=true>; + foreach e = SchedSEWSet<m.MX, isF=1>.val in + defm _V : VPseudoConversionRoundingMode<m.vrclass, m.vrclass, m, sew=e>, + SchedUnary<"WriteVFCvtIToFV", "ReadVFCvtIToFV", m.MX, e, + forceMergeOpRead=true>; } } multiclass VPseudoVCVTF_RM_V { foreach m = MxListF in { - defm _V : VPseudoConversionRM<m.vrclass, m.vrclass, m>, - SchedUnary<"WriteVFCvtIToFV", "ReadVFCvtIToFV", m.MX, - forceMergeOpRead=true>; + foreach e = SchedSEWSet<m.MX, isF=1>.val in + defm _V : VPseudoConversionRM<m.vrclass, m.vrclass, m, sew=e>, + SchedUnary<"WriteVFCvtIToFV", "ReadVFCvtIToFV", m.MX, e, + forceMergeOpRead=true>; } } @@ -3704,18 +3721,22 @@ multiclass VPseudoVWCVTI_RM_V { multiclass VPseudoVWCVTF_V { defvar constraint = "@earlyclobber $rd"; foreach m = MxListW in { - defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint, TargetConstraintType=3>, - SchedUnary<"WriteVFWCvtIToFV", "ReadVFWCvtIToFV", m.MX, - forceMergeOpRead=true>; + foreach e = SchedSEWSet<m.MX, isF=0, isWidening=1>.val in + defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint, sew=e, + TargetConstraintType=3>, + SchedUnary<"WriteVFWCvtIToFV", "ReadVFWCvtIToFV", m.MX, e, + forceMergeOpRead=true>; } } multiclass VPseudoVWCVTD_V { defvar constraint = "@earlyclobber $rd"; foreach m = MxListFW in { - defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint, TargetConstraintType=3>, - SchedUnary<"WriteVFWCvtFToFV", "ReadVFWCvtFToFV", m.MX, - forceMergeOpRead=true>; + foreach e = SchedSEWSet<m.MX, isF=1, isWidening=1>.val in + defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint, sew=e, + TargetConstraintType=3>, + SchedUnary<"WriteVFWCvtFToFV", "ReadVFWCvtFToFV", m.MX, e, + forceMergeOpRead=true>; } } @@ -3749,36 +3770,45 @@ multiclass VPseudoVNCVTI_RM_W { multiclass VPseudoVNCVTF_W_RM { defvar constraint = "@earlyclobber $rd"; foreach m = MxListFW in { - defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m, constraint, TargetConstraintType=2>, - SchedUnary<"WriteVFNCvtIToFV", "ReadVFNCvtIToFV", m.MX, - forceMergeOpRead=true>; + foreach e = SchedSEWSet<m.MX, isF=1, isWidening=1>.val in + defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m, + constraint, sew=e, + TargetConstraintType=2>, + SchedUnary<"WriteVFNCvtIToFV", "ReadVFNCvtIToFV", m.MX, e, + forceMergeOpRead=true>; } } multiclass VPseudoVNCVTF_RM_W { defvar constraint = "@earlyclobber $rd"; foreach m = MxListFW in { - defm _W : VPseudoConversionRM<m.vrclass, m.wvrclass, m, constraint>, - SchedUnary<"WriteVFNCvtIToFV", "ReadVFNCvtIToFV", m.MX, - forceMergeOpRead=true>; + foreach e = SchedSEWSet<m.MX, isF=1, isWidening=1>.val in + defm _W : VPseudoConversionRM<m.vrclass, m.wvrclass, m, constraint, sew=e>, + SchedUnary<"WriteVFNCvtIToFV", "ReadVFNCvtIToFV", m.MX, e, + forceMergeOpRead=true>; } } multiclass VPseudoVNCVTD_W { defvar constraint = "@earlyclobber $rd"; foreach m = MxListFW in { - defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint, TargetConstraintType=2>, - SchedUnary<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV", m.MX, - forceMergeOpRead=true>; + foreach e = SchedSEWSet<m.MX, isF=1, isWidening=1>.val in + defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint, sew=e, + TargetConstraintType=2>, + SchedUnary<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV", m.MX, e, + forceMergeOpRead=true>; } } multiclass VPseudoVNCVTD_W_RM { defvar constraint = "@earlyclobber $rd"; foreach m = MxListFW in { - defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m, constraint, TargetConstraintType=2>, - SchedUnary<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV", m.MX, - forceMergeOpRead=true>; + foreach e = SchedSEWSet<m.MX, isF=1, isWidening=1>.val in + defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m, + constraint, sew=e, + TargetConstraintType=2>, + SchedUnary<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV", m.MX, e, + forceMergeOpRead=true>; } } @@ -4889,14 +4919,17 @@ multiclass VPatConversionTA<string intrinsic, ValueType result_type, ValueType op1_type, ValueType mask_type, - int sew, + int log2sew, LMULInfo vlmul, VReg result_reg_class, - VReg op1_reg_class> { + VReg op1_reg_class, + bit isSEWAware = 0> { def : VPatUnaryNoMask<intrinsic, inst, kind, result_type, op1_type, - sew, vlmul, result_reg_class, op1_reg_class>; + log2sew, vlmul, result_reg_class, op1_reg_class, + isSEWAware>; def : VPatUnaryMask<intrinsic, inst, kind, result_type, op1_type, - mask_type, sew, vlmul, result_reg_class, op1_reg_class>; + mask_type, log2sew, vlmul, result_reg_class, op1_reg_class, + isSEWAware>; } multiclass VPatConversionTARoundingMode<string intrinsic, @@ -4905,14 +4938,17 @@ multiclass VPatConversionTARoundingMode<string intrinsic, ValueType result_type, ValueType op1_type, ValueType mask_type, - int sew, + int log2sew, LMULInfo vlmul, VReg result_reg_class, - VReg op1_reg_class> { + VReg op1_reg_class, + bit isSEWAware = 0> { def : VPatUnaryNoMaskRoundingMode<intrinsic, inst, kind, result_type, op1_type, - sew, vlmul, result_reg_class, op1_reg_class>; + log2sew, vlmul, result_reg_class, + op1_reg_class, isSEWAware>; def : VPatUnaryMaskRoundingMode<intrinsic, inst, kind, result_type, op1_type, - mask_type, sew, vlmul, result_reg_class, op1_reg_class>; + mask_type, log2sew, vlmul, result_reg_class, + op1_reg_class, isSEWAware>; } multiclass VPatBinaryV_VV<string intrinsic, string instruction, @@ -5905,15 +5941,16 @@ multiclass VPatConversionVI_VF_RM<string intrinsic, } } -multiclass VPatConversionVF_VI_RM<string intrinsic, - string instruction> { +multiclass VPatConversionVF_VI_RM<string intrinsic, string instruction, + bit isSEWAware = 0> { foreach fvti = AllFloatVectors in { defvar ivti = GetIntVTypeInfo<fvti>.Vti; let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates, GetVTypePredicates<ivti>.Predicates) in defm : VPatConversionTARoundingMode<intrinsic, instruction, "V", fvti.Vector, ivti.Vector, fvti.Mask, ivti.Log2SEW, - ivti.LMul, fvti.RegClass, ivti.RegClass>; + ivti.LMul, fvti.RegClass, ivti.RegClass, + isSEWAware>; } } @@ -5941,7 +5978,8 @@ multiclass VPatConversionWI_VF_RM<string intrinsic, string instruction> { } } -multiclass VPatConversionWF_VI<string intrinsic, string instruction> { +multiclass VPatConversionWF_VI<string intrinsic, string instruction, + bit isSEWAware = 0> { foreach vtiToWti = AllWidenableIntToFloatVectors in { defvar vti = vtiToWti.Vti; defvar fwti = vtiToWti.Wti; @@ -5949,11 +5987,12 @@ multiclass VPatConversionWF_VI<string intrinsic, string instruction> { GetVTypePredicates<fwti>.Predicates) in defm : VPatConversionTA<intrinsic, instruction, "V", fwti.Vector, vti.Vector, fwti.Mask, vti.Log2SEW, - vti.LMul, fwti.RegClass, vti.RegClass>; + vti.LMul, fwti.RegClass, vti.RegClass, isSEWAware>; } } -multiclass VPatConversionWF_VF<string intrinsic, string instruction> { +multiclass VPatConversionWF_VF<string intrinsic, string instruction, + bit isSEWAware = 0> { foreach fvtiToFWti = AllWidenableFloatVectors in { defvar fvti = fvtiToFWti.Vti; defvar fwti = fvtiToFWti.Wti; @@ -5963,11 +6002,12 @@ multiclass VPatConversionWF_VF<string intrinsic, string instruction> { GetVTypePredicates<fwti>.Predicates)) in defm : VPatConversionTA<intrinsic, instruction, "V", fwti.Vector, fvti.Vector, fwti.Mask, fvti.Log2SEW, - fvti.LMul, fwti.RegClass, fvti.RegClass>; + fvti.LMul, fwti.RegClass, fvti.RegClass, isSEWAware>; } } -multiclass VPatConversionWF_VF_BF <string intrinsic, string instruction> { +multiclass VPatConversionWF_VF_BF <string intrinsic, string instruction, + bit isSEWAware = 0> { foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in { defvar fvti = fvtiToFWti.Vti; @@ -5976,7 +6016,7 @@ multiclass VPatConversionWF_VF_BF <string intrinsic, string instruction> { GetVTypePredicates<fwti>.Predicates) in defm : VPatConversionTA<intrinsic, instruction, "V", fwti.Vector, fvti.Vector, fwti.Mask, fvti.Log2SEW, - fvti.LMul, fwti.RegClass, fvti.RegClass>; + fvti.LMul, fwti.RegClass, fvti.RegClass, isSEWAware>; } } @@ -6004,7 +6044,8 @@ multiclass VPatConversionVI_WF_RM <string intrinsic, string instruction> { } } -multiclass VPatConversionVF_WI_RM <string intrinsic, string instruction> { +multiclass VPatConversionVF_WI_RM <string intrinsic, string instruction, + bit isSEWAware = 0> { foreach fvtiToFWti = AllWidenableFloatVectors in { defvar fvti = fvtiToFWti.Vti; defvar iwti = GetIntVTypeInfo<fvtiToFWti.Wti>.Vti; @@ -6012,11 +6053,13 @@ multiclass VPatConversionVF_WI_RM <string intrinsic, string instruction> { GetVTypePredicates<iwti>.Predicates) in defm : VPatConversionTARoundingMode<intrinsic, instruction, "W", fvti.Vector, iwti.Vector, fvti.Mask, fvti.Log2SEW, - fvti.LMul, fvti.RegClass, iwti.RegClass>; + fvti.LMul, fvti.RegClass, iwti.RegClass, + isSEWAware>; } } -multiclass VPatConversionVF_WF <string intrinsic, string instruction> { +multiclass VPatConversionVF_WF<string intrinsic, string instruction, + bit isSEWAware = 0> { foreach fvtiToFWti = AllWidenableFloatVectors in { defvar fvti = fvtiToFWti.Vti; defvar fwti = fvtiToFWti.Wti; @@ -6024,12 +6067,13 @@ multiclass VPatConversionVF_WF <string intrinsic, string instruction> { GetVTypePredicates<fwti>.Predicates) in defm : VPatConversionTA<intrinsic, instruction, "W", fvti.Vector, fwti.Vector, fvti.Mask, fvti.Log2SEW, - fvti.LMul, fvti.RegClass, fwti.RegClass>; + fvti.LMul, fvti.RegClass, fwti.RegClass, isSEWAware>; } } -multiclass VPatConversionVF_WF_RM <string intrinsic, string instruction, - list<VTypeInfoToWide> wlist = AllWidenableFloatVectors> { +multiclass VPatConversionVF_WF_RM<string intrinsic, string instruction, + list<VTypeInfoToWide> wlist = AllWidenableFloatVectors, + bit isSEWAware = 0> { foreach fvtiToFWti = wlist in { defvar fvti = fvtiToFWti.Vti; defvar fwti = fvtiToFWti.Wti; @@ -6037,11 +6081,13 @@ multiclass VPatConversionVF_WF_RM <string intrinsic, string instruction, GetVTypePredicates<fwti>.Predicates) in defm : VPatConversionTARoundingMode<intrinsic, instruction, "W", fvti.Vector, fwti.Vector, fvti.Mask, fvti.Log2SEW, - fvti.LMul, fvti.RegClass, fwti.RegClass>; + fvti.LMul, fvti.RegClass, fwti.RegClass, + isSEWAware>; } } -multiclass VPatConversionVF_WF_BF_RM <string intrinsic, string instruction> { +multiclass VPatConversionVF_WF_BF_RM <string intrinsic, string instruction, + bit isSEWAware = 0> { foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in { defvar fvti = fvtiToFWti.Vti; defvar fwti = fvtiToFWti.Wti; @@ -6049,7 +6095,8 @@ multiclass VPatConversionVF_WF_BF_RM <string intrinsic, string instruction> { GetVTypePredicates<fwti>.Predicates) in defm : VPatConversionTARoundingMode<intrinsic, instruction, "W", fvti.Vector, fwti.Vector, fvti.Mask, fvti.Log2SEW, - fvti.LMul, fvti.RegClass, fwti.RegClass>; + fvti.LMul, fvti.RegClass, fwti.RegClass, + isSEWAware>; } } @@ -6207,7 +6254,7 @@ defm PseudoVLSEG : VPseudoUSSegLoadFF; //===----------------------------------------------------------------------===// // 11.1. Vector Single-Width Integer Add and Subtract //===----------------------------------------------------------------------===// -defm PseudoVADD : VPseudoVALU_VV_VX_VI; +defm PseudoVADD : VPseudoVALU_VV_VX_VI<Commutable=1>; defm PseudoVSUB : VPseudoVALU_VV_VX; defm PseudoVRSUB : VPseudoVALU_VX_VI; @@ -6272,9 +6319,9 @@ foreach vti = AllIntegerVectors in { //===----------------------------------------------------------------------===// // 11.2. Vector Widening Integer Add/Subtract //===----------------------------------------------------------------------===// -defm PseudoVWADDU : VPseudoVWALU_VV_VX; +defm PseudoVWADDU : VPseudoVWALU_VV_VX<Commutable=1>; defm PseudoVWSUBU : VPseudoVWALU_VV_VX; -defm PseudoVWADD : VPseudoVWALU_VV_VX; +defm PseudoVWADD : VPseudoVWALU_VV_VX<Commutable=1>; defm PseudoVWSUB : VPseudoVWALU_VV_VX; defm PseudoVWADDU : VPseudoVWALU_WV_WX; defm PseudoVWSUBU : VPseudoVWALU_WV_WX; @@ -6305,9 +6352,9 @@ defm PseudoVMSBC : VPseudoVCALUM_V_X<"@earlyclobber $rd">; //===----------------------------------------------------------------------===// // 11.5. Vector Bitwise Logical Instructions //===----------------------------------------------------------------------===// -defm PseudoVAND : VPseudoVALU_VV_VX_VI; -defm PseudoVOR : VPseudoVALU_VV_VX_VI; -defm PseudoVXOR : VPseudoVALU_VV_VX_VI; +defm PseudoVAND : VPseudoVALU_VV_VX_VI<Commutable=1>; +defm PseudoVOR : VPseudoVALU_VV_VX_VI<Commutable=1>; +defm PseudoVXOR : VPseudoVALU_VV_VX_VI<Commutable=1>; //===----------------------------------------------------------------------===// // 11.6. Vector Single-Width Bit Shift Instructions @@ -6325,8 +6372,8 @@ defm PseudoVNSRA : VPseudoVNSHT_WV_WX_WI; //===----------------------------------------------------------------------===// // 11.8. Vector Integer Comparison Instructions //===----------------------------------------------------------------------===// -defm PseudoVMSEQ : VPseudoVCMPM_VV_VX_VI; -defm PseudoVMSNE : VPseudoVCMPM_VV_VX_VI; +defm PseudoVMSEQ : VPseudoVCMPM_VV_VX_VI<Commutable=1>; +defm PseudoVMSNE : VPseudoVCMPM_VV_VX_VI<Commutable=1>; defm PseudoVMSLTU : VPseudoVCMPM_VV_VX; defm PseudoVMSLT : VPseudoVCMPM_VV_VX; defm PseudoVMSLEU : VPseudoVCMPM_VV_VX_VI; @@ -6345,9 +6392,9 @@ defm PseudoVMAX : VPseudoVMINMAX_VV_VX; //===----------------------------------------------------------------------===// // 11.10. Vector Single-Width Integer Multiply Instructions //===----------------------------------------------------------------------===// -defm PseudoVMUL : VPseudoVMUL_VV_VX; -defm PseudoVMULH : VPseudoVMUL_VV_VX; -defm PseudoVMULHU : VPseudoVMUL_VV_VX; +defm PseudoVMUL : VPseudoVMUL_VV_VX<Commutable=1>; +defm PseudoVMULH : VPseudoVMUL_VV_VX<Commutable=1>; +defm PseudoVMULHU : VPseudoVMUL_VV_VX<Commutable=1>; defm PseudoVMULHSU : VPseudoVMUL_VV_VX; //===----------------------------------------------------------------------===// @@ -6361,8 +6408,8 @@ defm PseudoVREM : VPseudoVDIV_VV_VX; //===----------------------------------------------------------------------===// // 11.12. Vector Widening Integer Multiply Instructions //===----------------------------------------------------------------------===// -defm PseudoVWMUL : VPseudoVWMUL_VV_VX; -defm PseudoVWMULU : VPseudoVWMUL_VV_VX; +defm PseudoVWMUL : VPseudoVWMUL_VV_VX<Commutable=1>; +defm PseudoVWMULU : VPseudoVWMUL_VV_VX<Commutable=1>; defm PseudoVWMULSU : VPseudoVWMUL_VV_VX; //===----------------------------------------------------------------------===// @@ -6376,8 +6423,8 @@ defm PseudoVNMSUB : VPseudoVMAC_VV_VX_AAXA; //===----------------------------------------------------------------------===// // 11.14. Vector Widening Integer Multiply-Add Instructions //===----------------------------------------------------------------------===// -defm PseudoVWMACCU : VPseudoVWMAC_VV_VX; -defm PseudoVWMACC : VPseudoVWMAC_VV_VX; +defm PseudoVWMACCU : VPseudoVWMAC_VV_VX<Commutable=1>; +defm PseudoVWMACC : VPseudoVWMAC_VV_VX<Commutable=1>; defm PseudoVWMACCSU : VPseudoVWMAC_VV_VX; defm PseudoVWMACCUS : VPseudoVWMAC_VX; @@ -7197,15 +7244,20 @@ defm : VPatUnaryV_V_RM<"int_riscv_vfrec7", "PseudoVFREC7", AllFloatVectors, isSE //===----------------------------------------------------------------------===// // 13.11. Vector Floating-Point Min/Max Instructions //===----------------------------------------------------------------------===// -defm : VPatBinaryV_VV_VX<"int_riscv_vfmin", "PseudoVFMIN", AllFloatVectors>; -defm : VPatBinaryV_VV_VX<"int_riscv_vfmax", "PseudoVFMAX", AllFloatVectors>; +defm : VPatBinaryV_VV_VX<"int_riscv_vfmin", "PseudoVFMIN", AllFloatVectors, + isSEWAware=1>; +defm : VPatBinaryV_VV_VX<"int_riscv_vfmax", "PseudoVFMAX", AllFloatVectors, + isSEWAware=1>; //===----------------------------------------------------------------------===// // 13.12. Vector Floating-Point Sign-Injection Instructions //===----------------------------------------------------------------------===// -defm : VPatBinaryV_VV_VX<"int_riscv_vfsgnj", "PseudoVFSGNJ", AllFloatVectors>; -defm : VPatBinaryV_VV_VX<"int_riscv_vfsgnjn", "PseudoVFSGNJN", AllFloatVectors>; -defm : VPatBinaryV_VV_VX<"int_riscv_vfsgnjx", "PseudoVFSGNJX", AllFloatVectors>; +defm : VPatBinaryV_VV_VX<"int_riscv_vfsgnj", "PseudoVFSGNJ", AllFloatVectors, + isSEWAware=1>; +defm : VPatBinaryV_VV_VX<"int_riscv_vfsgnjn", "PseudoVFSGNJN", AllFloatVectors, + isSEWAware=1>; +defm : VPatBinaryV_VV_VX<"int_riscv_vfsgnjx", "PseudoVFSGNJX", AllFloatVectors, + isSEWAware=1>; //===----------------------------------------------------------------------===// // 13.13. Vector Floating-Point Compare Instructions @@ -7269,8 +7321,10 @@ defm : VPatConversionVI_VF_RM<"int_riscv_vfcvt_x_f_v", "PseudoVFCVT_X_F">; defm : VPatConversionVI_VF_RM<"int_riscv_vfcvt_xu_f_v", "PseudoVFCVT_XU_F">; defm : VPatConversionVI_VF<"int_riscv_vfcvt_rtz_xu_f_v", "PseudoVFCVT_RTZ_XU_F">; defm : VPatConversionVI_VF<"int_riscv_vfcvt_rtz_x_f_v", "PseudoVFCVT_RTZ_X_F">; -defm : VPatConversionVF_VI_RM<"int_riscv_vfcvt_f_x_v", "PseudoVFCVT_F_X">; -defm : VPatConversionVF_VI_RM<"int_riscv_vfcvt_f_xu_v", "PseudoVFCVT_F_XU">; +defm : VPatConversionVF_VI_RM<"int_riscv_vfcvt_f_x_v", "PseudoVFCVT_F_X", + isSEWAware=1>; +defm : VPatConversionVF_VI_RM<"int_riscv_vfcvt_f_xu_v", "PseudoVFCVT_F_XU", + isSEWAware=1>; //===----------------------------------------------------------------------===// // 13.18. Widening Floating-Point/Integer Type-Convert Instructions @@ -7279,11 +7333,14 @@ defm : VPatConversionWI_VF_RM<"int_riscv_vfwcvt_xu_f_v", "PseudoVFWCVT_XU_F">; defm : VPatConversionWI_VF_RM<"int_riscv_vfwcvt_x_f_v", "PseudoVFWCVT_X_F">; defm : VPatConversionWI_VF<"int_riscv_vfwcvt_rtz_xu_f_v", "PseudoVFWCVT_RTZ_XU_F">; defm : VPatConversionWI_VF<"int_riscv_vfwcvt_rtz_x_f_v", "PseudoVFWCVT_RTZ_X_F">; -defm : VPatConversionWF_VI<"int_riscv_vfwcvt_f_xu_v", "PseudoVFWCVT_F_XU">; -defm : VPatConversionWF_VI<"int_riscv_vfwcvt_f_x_v", "PseudoVFWCVT_F_X">; -defm : VPatConversionWF_VF<"int_riscv_vfwcvt_f_f_v", "PseudoVFWCVT_F_F">; +defm : VPatConversionWF_VI<"int_riscv_vfwcvt_f_xu_v", "PseudoVFWCVT_F_XU", + isSEWAware=1>; +defm : VPatConversionWF_VI<"int_riscv_vfwcvt_f_x_v", "PseudoVFWCVT_F_X", + isSEWAware=1>; +defm : VPatConversionWF_VF<"int_riscv_vfwcvt_f_f_v", "PseudoVFWCVT_F_F", + isSEWAware=1>; defm : VPatConversionWF_VF_BF<"int_riscv_vfwcvtbf16_f_f_v", - "PseudoVFWCVTBF16_F_F">; + "PseudoVFWCVTBF16_F_F", isSEWAware=1>; //===----------------------------------------------------------------------===// // 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions @@ -7292,21 +7349,24 @@ defm : VPatConversionVI_WF_RM<"int_riscv_vfncvt_xu_f_w", "PseudoVFNCVT_XU_F">; defm : VPatConversionVI_WF_RM<"int_riscv_vfncvt_x_f_w", "PseudoVFNCVT_X_F">; defm : VPatConversionVI_WF<"int_riscv_vfncvt_rtz_xu_f_w", "PseudoVFNCVT_RTZ_XU_F">; defm : VPatConversionVI_WF<"int_riscv_vfncvt_rtz_x_f_w", "PseudoVFNCVT_RTZ_X_F">; -defm : VPatConversionVF_WI_RM <"int_riscv_vfncvt_f_xu_w", "PseudoVFNCVT_F_XU">; -defm : VPatConversionVF_WI_RM <"int_riscv_vfncvt_f_x_w", "PseudoVFNCVT_F_X">; +defm : VPatConversionVF_WI_RM<"int_riscv_vfncvt_f_xu_w", "PseudoVFNCVT_F_XU", + isSEWAware=1>; +defm : VPatConversionVF_WI_RM<"int_riscv_vfncvt_f_x_w", "PseudoVFNCVT_F_X", + isSEWAware=1>; defvar WidenableFloatVectorsExceptF16 = !filter(fvtiToFWti, AllWidenableFloatVectors, !ne(fvtiToFWti.Vti.Scalar, f16)); defm : VPatConversionVF_WF_RM<"int_riscv_vfncvt_f_f_w", "PseudoVFNCVT_F_F", - WidenableFloatVectorsExceptF16>; + WidenableFloatVectorsExceptF16, isSEWAware=1>; // Define vfncvt.f.f.w for f16 when Zvfhmin is enable. defvar F16WidenableFloatVectors = !filter(fvtiToFWti, AllWidenableFloatVectors, !eq(fvtiToFWti.Vti.Scalar, f16)); let Predicates = [HasVInstructionsF16Minimal] in defm : VPatConversionVF_WF_RM<"int_riscv_vfncvt_f_f_w", "PseudoVFNCVT_F_F", - F16WidenableFloatVectors>; + F16WidenableFloatVectors, isSEWAware=1>; defm : VPatConversionVF_WF_BF_RM<"int_riscv_vfncvtbf16_f_f_w", - "PseudoVFNCVTBF16_F_F">; -defm : VPatConversionVF_WF<"int_riscv_vfncvt_rod_f_f_w", "PseudoVFNCVT_ROD_F_F">; + "PseudoVFNCVTBF16_F_F", isSEWAware=1>; +defm : VPatConversionVF_WF<"int_riscv_vfncvt_rod_f_f_w", "PseudoVFNCVT_ROD_F_F", + isSEWAware=1>; //===----------------------------------------------------------------------===// // 14. Vector Reduction Operations diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index b6cd6dc..3397d55 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -410,7 +410,7 @@ multiclass VPatConvertI2FPSDNode_V_RM<SDPatternOperator vop, let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates, GetVTypePredicates<ivti>.Predicates) in def : Pat<(fvti.Vector (vop (ivti.Vector ivti.RegClass:$rs1))), - (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX) + (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_E"#fvti.SEW) (fvti.Vector (IMPLICIT_DEF)), ivti.RegClass:$rs1, // Value to indicate no rounding mode change in @@ -441,7 +441,7 @@ multiclass VPatWConvertI2FPSDNode_V<SDPatternOperator vop, let Predicates = !listconcat(GetVTypePredicates<ivti>.Predicates, GetVTypePredicates<fwti>.Predicates) in def : Pat<(fwti.Vector (vop (ivti.Vector ivti.RegClass:$rs1))), - (!cast<Instruction>(instruction_name#"_"#ivti.LMul.MX) + (!cast<Instruction>(instruction_name#"_"#ivti.LMul.MX#"_E"#ivti.SEW) (fwti.Vector (IMPLICIT_DEF)), ivti.RegClass:$rs1, ivti.AVL, ivti.Log2SEW, TA_MA)>; @@ -470,7 +470,7 @@ multiclass VPatNConvertI2FPSDNode_W_RM<SDPatternOperator vop, let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates, GetVTypePredicates<iwti>.Predicates) in def : Pat<(fvti.Vector (vop (iwti.Vector iwti.RegClass:$rs1))), - (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX) + (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_E"#fvti.SEW) (fvti.Vector (IMPLICIT_DEF)), iwti.RegClass:$rs1, // Value to indicate no rounding mode change in @@ -1339,42 +1339,42 @@ foreach vti = AllFloatVectors in { // 13.12. Vector Floating-Point Sign-Injection Instructions def : Pat<(fabs (vti.Vector vti.RegClass:$rs)), - (!cast<Instruction>("PseudoVFSGNJX_VV_"# vti.LMul.MX) + (!cast<Instruction>("PseudoVFSGNJX_VV_"# vti.LMul.MX#"_E"#vti.SEW) (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TA_MA)>; // Handle fneg with VFSGNJN using the same input for both operands. def : Pat<(fneg (vti.Vector vti.RegClass:$rs)), - (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX) + (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX#"_E"#vti.SEW) (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TA_MA)>; def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), (vti.Vector vti.RegClass:$rs2))), - (!cast<Instruction>("PseudoVFSGNJ_VV_"# vti.LMul.MX) + (!cast<Instruction>("PseudoVFSGNJ_VV_"# vti.LMul.MX#"_E"#vti.SEW) (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>; def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs2)))), - (!cast<Instruction>("PseudoVFSGNJ_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) + (!cast<Instruction>("PseudoVFSGNJ_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_E"#vti.SEW) (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>; def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), (vti.Vector (fneg vti.RegClass:$rs2)))), - (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX) + (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX#"_E"#vti.SEW) (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>; def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), (vti.Vector (fneg (SplatFPOp vti.ScalarRegClass:$rs2))))), - (!cast<Instruction>("PseudoVFSGNJN_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) + (!cast<Instruction>("PseudoVFSGNJN_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_E"#vti.SEW) (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>; } } // 13.11. Vector Floating-Point MIN/MAX Instructions -defm : VPatBinaryFPSDNode_VV_VF<fminnum, "PseudoVFMIN">; -defm : VPatBinaryFPSDNode_VV_VF<fmaxnum, "PseudoVFMAX">; +defm : VPatBinaryFPSDNode_VV_VF<fminnum, "PseudoVFMIN", isSEWAware=1>; +defm : VPatBinaryFPSDNode_VV_VF<fmaxnum, "PseudoVFMAX", isSEWAware=1>; // 13.13. Vector Floating-Point Compare Instructions defm : VPatFPSetCCSDNode_VV_VF_FV<SETEQ, "PseudoVMFEQ", "PseudoVMFEQ">; @@ -1445,7 +1445,7 @@ foreach fvtiToFWti = AllWidenableFloatVectors in { !listconcat(GetVTypePredicates<fvti>.Predicates, GetVTypePredicates<fwti>.Predicates)) in def : Pat<(fvti.Vector (fpround (fwti.Vector fwti.RegClass:$rs1))), - (!cast<Instruction>("PseudoVFNCVT_F_F_W_"#fvti.LMul.MX) + (!cast<Instruction>("PseudoVFNCVT_F_F_W_"#fvti.LMul.MX#"_E"#fvti.SEW) (fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1, // Value to indicate no rounding mode change in diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 6fde30a..42fee1a 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -1229,7 +1229,7 @@ multiclass VPatConvertI2FPVL_V_RM<SDPatternOperator vop, string instruction_name def : Pat<(fvti.Vector (vop (ivti.Vector ivti.RegClass:$rs1), (ivti.Mask VMV0:$vm), VLOpFrag)), - (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_MASK") + (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") (fvti.Vector (IMPLICIT_DEF)), ivti.RegClass:$rs1, (ivti.Mask VMV0:$vm), // Value to indicate no rounding mode change in @@ -1247,7 +1247,7 @@ multiclass VPatConvertI2FP_RM_VL_V<SDNode vop, string instruction_name> { def : Pat<(fvti.Vector (vop (ivti.Vector ivti.RegClass:$rs1), (ivti.Mask VMV0:$vm), (XLenVT timm:$frm), VLOpFrag)), - (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_MASK") + (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") (fvti.Vector (IMPLICIT_DEF)), ivti.RegClass:$rs1, (ivti.Mask VMV0:$vm), timm:$frm, GPR:$vl, fvti.Log2SEW, TA_MA)>; } @@ -1315,7 +1315,7 @@ multiclass VPatWConvertI2FPVL_V<SDPatternOperator vop, def : Pat<(fwti.Vector (vop (ivti.Vector ivti.RegClass:$rs1), (ivti.Mask VMV0:$vm), VLOpFrag)), - (!cast<Instruction>(instruction_name#"_"#ivti.LMul.MX#"_MASK") + (!cast<Instruction>(instruction_name#"_"#ivti.LMul.MX#"_E"#ivti.SEW#"_MASK") (fwti.Vector (IMPLICIT_DEF)), ivti.RegClass:$rs1, (ivti.Mask VMV0:$vm), GPR:$vl, ivti.Log2SEW, TA_MA)>; @@ -1389,7 +1389,7 @@ multiclass VPatNConvertI2FPVL_W_RM<SDPatternOperator vop, def : Pat<(fvti.Vector (vop (iwti.Vector iwti.RegClass:$rs1), (iwti.Mask VMV0:$vm), VLOpFrag)), - (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_MASK") + (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") (fvti.Vector (IMPLICIT_DEF)), iwti.RegClass:$rs1, (iwti.Mask VMV0:$vm), // Value to indicate no rounding mode change in @@ -1408,7 +1408,7 @@ multiclass VPatNConvertI2FP_RM_VL_W<SDNode vop, string instruction_name> { def : Pat<(fvti.Vector (vop (iwti.Vector iwti.RegClass:$rs1), (iwti.Mask VMV0:$vm), (XLenVT timm:$frm), VLOpFrag)), - (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_MASK") + (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") (fvti.Vector (IMPLICIT_DEF)), iwti.RegClass:$rs1, (iwti.Mask VMV0:$vm), timm:$frm, GPR:$vl, fvti.Log2SEW, TA_MA)>; } @@ -2468,8 +2468,8 @@ defm : VPatWidenFPMulAccVL_VV_VF_RM<riscv_vfwmsub_vl, "PseudoVFWMSAC">; defm : VPatWidenFPMulAccVL_VV_VF_RM<riscv_vfwnmsub_vl, "PseudoVFWNMSAC">; // 13.11. Vector Floating-Point MIN/MAX Instructions -defm : VPatBinaryFPVL_VV_VF<riscv_vfmin_vl, "PseudoVFMIN">; -defm : VPatBinaryFPVL_VV_VF<riscv_vfmax_vl, "PseudoVFMAX">; +defm : VPatBinaryFPVL_VV_VF<riscv_vfmin_vl, "PseudoVFMIN", isSEWAware=1>; +defm : VPatBinaryFPVL_VV_VF<riscv_vfmax_vl, "PseudoVFMAX", isSEWAware=1>; // 13.13. Vector Floating-Point Compare Instructions defm : VPatFPSetCCVL_VV_VF_FV<any_riscv_fsetcc_vl, SETEQ, @@ -2505,14 +2505,14 @@ foreach vti = AllFloatVectors in { // 13.12. Vector Floating-Point Sign-Injection Instructions def : Pat<(riscv_fabs_vl (vti.Vector vti.RegClass:$rs), (vti.Mask VMV0:$vm), VLOpFrag), - (!cast<Instruction>("PseudoVFSGNJX_VV_"# vti.LMul.MX #"_MASK") + (!cast<Instruction>("PseudoVFSGNJX_VV_"# vti.LMul.MX #"_E"#vti.SEW#"_MASK") (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs, vti.RegClass:$rs, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TA_MA)>; // Handle fneg with VFSGNJN using the same input for both operands. def : Pat<(riscv_fneg_vl (vti.Vector vti.RegClass:$rs), (vti.Mask VMV0:$vm), VLOpFrag), - (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX #"_MASK") + (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX#"_E"#vti.SEW #"_MASK") (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs, vti.RegClass:$rs, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TA_MA)>; @@ -2522,7 +2522,7 @@ foreach vti = AllFloatVectors in { vti.RegClass:$merge, (vti.Mask VMV0:$vm), VLOpFrag), - (!cast<Instruction>("PseudoVFSGNJ_VV_"# vti.LMul.MX#"_MASK") + (!cast<Instruction>("PseudoVFSGNJ_VV_"# vti.LMul.MX#"_E"#vti.SEW#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs1, vti.RegClass:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; @@ -2534,7 +2534,7 @@ foreach vti = AllFloatVectors in { srcvalue, (vti.Mask true_mask), VLOpFrag), - (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX) + (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX#"_E"#vti.SEW) (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TA_MA)>; @@ -2543,7 +2543,7 @@ foreach vti = AllFloatVectors in { vti.RegClass:$merge, (vti.Mask VMV0:$vm), VLOpFrag), - (!cast<Instruction>("PseudoVFSGNJ_V"#vti.ScalarSuffix#"_"# vti.LMul.MX#"_MASK") + (!cast<Instruction>("PseudoVFSGNJ_V"#vti.ScalarSuffix#"_"# vti.LMul.MX#"_E"#vti.SEW#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; @@ -2672,7 +2672,7 @@ foreach fvtiToFWti = AllWidenableFloatVectors in { (fvti.Vector fvti.RegClass:$rs1), (fvti.Mask VMV0:$vm), VLOpFrag)), - (!cast<Instruction>("PseudoVFWCVT_F_F_V_"#fvti.LMul.MX#"_MASK") + (!cast<Instruction>("PseudoVFWCVT_F_F_V_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") (fwti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1, (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TA_MA)>; @@ -2703,7 +2703,7 @@ foreach fvtiToFWti = AllWidenableFloatVectors in { def : Pat<(fvti.Vector (any_riscv_fpround_vl (fwti.Vector fwti.RegClass:$rs1), (fwti.Mask VMV0:$vm), VLOpFrag)), - (!cast<Instruction>("PseudoVFNCVT_F_F_W_"#fvti.LMul.MX#"_MASK") + (!cast<Instruction>("PseudoVFNCVT_F_F_W_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") (fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1, (fwti.Mask VMV0:$vm), // Value to indicate no rounding mode change in @@ -2716,7 +2716,7 @@ foreach fvtiToFWti = AllWidenableFloatVectors in { def : Pat<(fvti.Vector (any_riscv_fncvt_rod_vl (fwti.Vector fwti.RegClass:$rs1), (fwti.Mask VMV0:$vm), VLOpFrag)), - (!cast<Instruction>("PseudoVFNCVT_ROD_F_F_W_"#fvti.LMul.MX#"_MASK") + (!cast<Instruction>("PseudoVFNCVT_ROD_F_F_W_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") (fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1, (fwti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TA_MA)>; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td index 9a6818c..71aa1f1 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td @@ -307,10 +307,16 @@ multiclass VPseudoVC_X<LMULInfo m, DAGOperand RS1Class, Operand OpClass = payload2> { let VLMul = m.value in { let Defs = [VCIX_STATE], Uses = [VCIX_STATE] in { - def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_X<OpClass, RS1Class>; - def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_X<OpClass, m.vrclass, RS1Class>; + def "PseudoVC_" # NAME # "_SE_" # m.MX + : VPseudoVC_X<OpClass, RS1Class>, + Sched<[!cast<SchedWrite>("WriteVC_" # NAME # "_" # m.MX)]>; + def "PseudoVC_V_" # NAME # "_SE_" # m.MX + : VPseudoVC_V_X<OpClass, m.vrclass, RS1Class>, + Sched<[!cast<SchedWrite>("WriteVC_V_" # NAME # "_" # m.MX)]>; } - def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_X<OpClass, m.vrclass, RS1Class>; + def "PseudoVC_V_" # NAME # "_" # m.MX + : VPseudoVC_V_X<OpClass, m.vrclass, RS1Class>, + Sched<[!cast<SchedWrite>("WriteVC_V_" # NAME # "_" # m.MX)]>; } } @@ -318,10 +324,16 @@ multiclass VPseudoVC_XV<LMULInfo m, DAGOperand RS1Class, Operand OpClass = payload2> { let VLMul = m.value in { let Defs = [VCIX_STATE], Uses = [VCIX_STATE] in { - def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_XV<OpClass, m.vrclass, RS1Class>; - def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_XV<OpClass, m.vrclass, m.vrclass, RS1Class>; + def "PseudoVC_" # NAME # "_SE_" # m.MX + : VPseudoVC_XV<OpClass, m.vrclass, RS1Class>, + Sched<[!cast<SchedWrite>("WriteVC_" # NAME # "_" # m.MX)]>; + def "PseudoVC_V_" # NAME # "_SE_" # m.MX + : VPseudoVC_V_XV<OpClass, m.vrclass, m.vrclass, RS1Class>, + Sched<[!cast<SchedWrite>("WriteVC_V_" # NAME # "_" # m.MX)]>; } - def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_XV<OpClass, m.vrclass, m.vrclass, RS1Class>; + def "PseudoVC_V_" # NAME # "_" # m.MX + : VPseudoVC_V_XV<OpClass, m.vrclass, m.vrclass, RS1Class>, + Sched<[!cast<SchedWrite>("WriteVC_V_" # NAME # "_" # m.MX)]>; } } @@ -329,10 +341,16 @@ multiclass VPseudoVC_XVV<LMULInfo m, DAGOperand RS1Class, Operand OpClass = payload2> { let VLMul = m.value in { let Defs = [VCIX_STATE], Uses = [VCIX_STATE] in { - def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>; - def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>; + def "PseudoVC_" # NAME # "_SE_" # m.MX + : VPseudoVC_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>, + Sched<[!cast<SchedWrite>("WriteVC_" # NAME # "_" # m.MX)]>; + def "PseudoVC_V_" # NAME # "_SE_" # m.MX + : VPseudoVC_V_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>, + Sched<[!cast<SchedWrite>("WriteVC_V_" # NAME # "_" # m.MX)]>; } - def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>; + def "PseudoVC_V_" # NAME # "_" # m.MX + : VPseudoVC_V_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>, + Sched<[!cast<SchedWrite>("WriteVC_V_" # NAME # "_" # m.MX)]>; } } @@ -340,11 +358,17 @@ multiclass VPseudoVC_XVW<LMULInfo m, DAGOperand RS1Class, Operand OpClass = payload2> { let VLMul = m.value in { let Defs = [VCIX_STATE], Uses = [VCIX_STATE] in - def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_XVV<OpClass, m.wvrclass, m.vrclass, RS1Class>; + def "PseudoVC_" # NAME # "_SE_" # m.MX + : VPseudoVC_XVV<OpClass, m.wvrclass, m.vrclass, RS1Class>, + Sched<[!cast<SchedWrite>("WriteVC_" # NAME # "_" # m.MX)]>; let Constraints = "@earlyclobber $rd, $rd = $rs3" in { let Defs = [VCIX_STATE], Uses = [VCIX_STATE] in - def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_XVV<OpClass, m.wvrclass, m.vrclass, RS1Class>; - def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_XVV<OpClass, m.wvrclass, m.vrclass, RS1Class>; + def "PseudoVC_V_" # NAME # "_SE_" # m.MX + : VPseudoVC_V_XVV<OpClass, m.wvrclass, m.vrclass, RS1Class>, + Sched<[!cast<SchedWrite>("WriteVC_V_" # NAME # "_" # m.MX)]>; + def "PseudoVC_V_" # NAME # "_" # m.MX + : VPseudoVC_V_XVV<OpClass, m.wvrclass, m.vrclass, RS1Class>, + Sched<[!cast<SchedWrite>("WriteVC_V_" # NAME # "_" # m.MX)]>; } } } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td index dd13a07..32e7f96 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td @@ -20,13 +20,7 @@ class CMOPInst<bits<3> imm3, string opcodestr> let Inst{12-11} = 0; } -// CMOP1, CMOP5 is used by Zicfiss. -let Predicates = [HasStdExtZcmop, NoHasStdExtZicfiss] in { - def CMOP1 : CMOPInst<0, "cmop.1">, Sched<[]>; - def CMOP5 : CMOPInst<2, "cmop.5">, Sched<[]>; -} - -foreach n = [3, 7, 9, 11, 13, 15] in { +foreach n = [1, 3, 5, 7, 9, 11, 13, 15] in { let Predicates = [HasStdExtZcmop] in - def CMOP # n : CMOPInst<!srl(n, 1), "cmop." # n>, Sched<[]>; + def C_MOP # n : CMOPInst<!srl(n, 1), "c.mop." # n>, Sched<[]>; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td index 2d72e98..16f7279 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td @@ -152,7 +152,7 @@ let Predicates = [HasStdExtZvknhaOrZvknhb], RVVConstraint = Sha2Constraint in { def VSHA2MS_VV : PALUVVNoVmTernary<0b101101, OPMVV, "vsha2ms.vv">; } // Predicates = [HasStdExtZvknhaOrZvknhb] -let Predicates = [HasStdExtZvkned]in { +let Predicates = [HasStdExtZvkned] in { defm VAESDF : VAES_MV_V_S<0b101000, 0b101001, 0b00001, OPMVV, "vaesdf">; defm VAESDM : VAES_MV_V_S<0b101000, 0b101001, 0b00000, OPMVV, "vaesdm">; defm VAESEF : VAES_MV_V_S<0b101000, 0b101001, 0b00011, OPMVV, "vaesef">; diff --git a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp index 39d420c..ead91c5 100644 --- a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp +++ b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp @@ -12,15 +12,24 @@ // extended bits aren't consumed or because the input was already sign extended // by an earlier instruction. // -// Then it removes the -w suffix from opw instructions whenever all users are -// dependent only on the lower word of the result of the instruction. -// The cases handled are: -// * addw because c.add has a larger register encoding than c.addw. -// * addiw because it helps reduce test differences between RV32 and RV64 -// w/o being a pessimization. -// * mulw because c.mulw doesn't exist but c.mul does (w/ zcb) -// * slliw because c.slliw doesn't exist and c.slli does +// Then: +// 1. Unless explicit disabled or the target prefers instructions with W suffix, +// it removes the -w suffix from opw instructions whenever all users are +// dependent only on the lower word of the result of the instruction. +// The cases handled are: +// * addw because c.add has a larger register encoding than c.addw. +// * addiw because it helps reduce test differences between RV32 and RV64 +// w/o being a pessimization. +// * mulw because c.mulw doesn't exist but c.mul does (w/ zcb) +// * slliw because c.slliw doesn't exist and c.slli does // +// 2. Or if explicit enabled or the target prefers instructions with W suffix, +// it adds the W suffix to the instruction whenever all users are dependent +// only on the lower word of the result of the instruction. +// The cases handled are: +// * add/addi/sub/mul. +// * slli with imm < 32. +// * ld/lwu. //===---------------------------------------------------------------------===// #include "RISCV.h" @@ -60,6 +69,8 @@ public: const RISCVSubtarget &ST, MachineRegisterInfo &MRI); bool stripWSuffixes(MachineFunction &MF, const RISCVInstrInfo &TII, const RISCVSubtarget &ST, MachineRegisterInfo &MRI); + bool appendWSuffixes(MachineFunction &MF, const RISCVInstrInfo &TII, + const RISCVSubtarget &ST, MachineRegisterInfo &MRI); void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); @@ -672,9 +683,6 @@ bool RISCVOptWInstrs::stripWSuffixes(MachineFunction &MF, const RISCVInstrInfo &TII, const RISCVSubtarget &ST, MachineRegisterInfo &MRI) { - if (DisableStripWSuffix || !ST.enableStripWSuffix()) - return false; - bool MadeChange = false; for (MachineBasicBlock &MBB : MF) { for (MachineInstr &MI : MBB) { @@ -698,6 +706,58 @@ bool RISCVOptWInstrs::stripWSuffixes(MachineFunction &MF, return MadeChange; } +bool RISCVOptWInstrs::appendWSuffixes(MachineFunction &MF, + const RISCVInstrInfo &TII, + const RISCVSubtarget &ST, + MachineRegisterInfo &MRI) { + bool MadeChange = false; + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + unsigned WOpc; + // TODO: Add more? + switch (MI.getOpcode()) { + default: + continue; + case RISCV::ADD: + WOpc = RISCV::ADDW; + break; + case RISCV::ADDI: + WOpc = RISCV::ADDIW; + break; + case RISCV::SUB: + WOpc = RISCV::SUBW; + break; + case RISCV::MUL: + WOpc = RISCV::MULW; + break; + case RISCV::SLLI: + // SLLIW reads the lowest 5 bits, while SLLI reads lowest 6 bits + if (MI.getOperand(2).getImm() >= 32) + continue; + WOpc = RISCV::SLLIW; + break; + case RISCV::LD: + case RISCV::LWU: + WOpc = RISCV::LW; + break; + } + + if (hasAllWUsers(MI, ST, MRI)) { + LLVM_DEBUG(dbgs() << "Replacing " << MI); + MI.setDesc(TII.get(WOpc)); + MI.clearFlag(MachineInstr::MIFlag::NoSWrap); + MI.clearFlag(MachineInstr::MIFlag::NoUWrap); + MI.clearFlag(MachineInstr::MIFlag::IsExact); + LLVM_DEBUG(dbgs() << " with " << MI); + ++NumTransformedToWInstrs; + MadeChange = true; + } + } + } + + return MadeChange; +} + bool RISCVOptWInstrs::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -711,7 +771,12 @@ bool RISCVOptWInstrs::runOnMachineFunction(MachineFunction &MF) { bool MadeChange = false; MadeChange |= removeSExtWInstrs(MF, TII, ST, MRI); - MadeChange |= stripWSuffixes(MF, TII, ST, MRI); + + if (!(DisableStripWSuffix || ST.preferWInst())) + MadeChange |= stripWSuffixes(MF, TII, ST, MRI); + + if (ST.preferWInst()) + MadeChange |= appendWSuffixes(MF, TII, ST, MRI); return MadeChange; } diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td index fd6d607..f9a557e 100644 --- a/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -56,11 +56,13 @@ class RISCVTuneProcessorModel<string n, def GENERIC_RV32 : RISCVProcessorModel<"generic-rv32", NoSchedModel, - [Feature32Bit]>, + [Feature32Bit, + FeatureStdExtI]>, GenericTuneInfo; def GENERIC_RV64 : RISCVProcessorModel<"generic-rv64", NoSchedModel, - [Feature64Bit]>, + [Feature64Bit, + FeatureStdExtI]>, GenericTuneInfo; // Support generic for compatibility with other targets. The triple will be used // to change to the appropriate rv32/rv64 version. @@ -69,11 +71,13 @@ def : ProcessorModel<"generic", NoSchedModel, []>, GenericTuneInfo; def ROCKET_RV32 : RISCVProcessorModel<"rocket-rv32", RocketModel, [Feature32Bit, + FeatureStdExtI, FeatureStdExtZifencei, FeatureStdExtZicsr]>; def ROCKET_RV64 : RISCVProcessorModel<"rocket-rv64", RocketModel, [Feature64Bit, + FeatureStdExtI, FeatureStdExtZifencei, FeatureStdExtZicsr]>; def ROCKET : RISCVTuneProcessorModel<"rocket", @@ -86,6 +90,7 @@ def SIFIVE_7 : RISCVTuneProcessorModel<"sifive-7-series", def SIFIVE_E20 : RISCVProcessorModel<"sifive-e20", RocketModel, [Feature32Bit, + FeatureStdExtI, FeatureStdExtZicsr, FeatureStdExtZifencei, FeatureStdExtM, @@ -94,6 +99,7 @@ def SIFIVE_E20 : RISCVProcessorModel<"sifive-e20", def SIFIVE_E21 : RISCVProcessorModel<"sifive-e21", RocketModel, [Feature32Bit, + FeatureStdExtI, FeatureStdExtZicsr, FeatureStdExtZifencei, FeatureStdExtM, @@ -103,6 +109,7 @@ def SIFIVE_E21 : RISCVProcessorModel<"sifive-e21", def SIFIVE_E24 : RISCVProcessorModel<"sifive-e24", RocketModel, [Feature32Bit, + FeatureStdExtI, FeatureStdExtZifencei, FeatureStdExtM, FeatureStdExtA, @@ -112,6 +119,7 @@ def SIFIVE_E24 : RISCVProcessorModel<"sifive-e24", def SIFIVE_E31 : RISCVProcessorModel<"sifive-e31", RocketModel, [Feature32Bit, + FeatureStdExtI, FeatureStdExtZifencei, FeatureStdExtZicsr, FeatureStdExtM, @@ -121,6 +129,7 @@ def SIFIVE_E31 : RISCVProcessorModel<"sifive-e31", def SIFIVE_E34 : RISCVProcessorModel<"sifive-e34", RocketModel, [Feature32Bit, + FeatureStdExtI, FeatureStdExtZifencei, FeatureStdExtM, FeatureStdExtA, @@ -130,6 +139,7 @@ def SIFIVE_E34 : RISCVProcessorModel<"sifive-e34", def SIFIVE_E76 : RISCVProcessorModel<"sifive-e76", SiFive7Model, [Feature32Bit, + FeatureStdExtI, FeatureStdExtZifencei, FeatureStdExtM, FeatureStdExtA, @@ -140,6 +150,7 @@ def SIFIVE_E76 : RISCVProcessorModel<"sifive-e76", def SIFIVE_S21 : RISCVProcessorModel<"sifive-s21", RocketModel, [Feature64Bit, + FeatureStdExtI, FeatureStdExtZicsr, FeatureStdExtZifencei, FeatureStdExtM, @@ -149,6 +160,7 @@ def SIFIVE_S21 : RISCVProcessorModel<"sifive-s21", def SIFIVE_S51 : RISCVProcessorModel<"sifive-s51", RocketModel, [Feature64Bit, + FeatureStdExtI, FeatureStdExtZicsr, FeatureStdExtZifencei, FeatureStdExtM, @@ -158,6 +170,7 @@ def SIFIVE_S51 : RISCVProcessorModel<"sifive-s51", def SIFIVE_S54 : RISCVProcessorModel<"sifive-s54", RocketModel, [Feature64Bit, + FeatureStdExtI, FeatureStdExtZifencei, FeatureStdExtM, FeatureStdExtA, @@ -168,6 +181,7 @@ def SIFIVE_S54 : RISCVProcessorModel<"sifive-s54", def SIFIVE_S76 : RISCVProcessorModel<"sifive-s76", SiFive7Model, [Feature64Bit, + FeatureStdExtI, FeatureStdExtZifencei, FeatureStdExtM, FeatureStdExtA, @@ -180,6 +194,7 @@ def SIFIVE_S76 : RISCVProcessorModel<"sifive-s76", def SIFIVE_U54 : RISCVProcessorModel<"sifive-u54", RocketModel, [Feature64Bit, + FeatureStdExtI, FeatureStdExtZifencei, FeatureStdExtM, FeatureStdExtA, @@ -190,6 +205,7 @@ def SIFIVE_U54 : RISCVProcessorModel<"sifive-u54", def SIFIVE_U74 : RISCVProcessorModel<"sifive-u74", SiFive7Model, [Feature64Bit, + FeatureStdExtI, FeatureStdExtZifencei, FeatureStdExtM, FeatureStdExtA, @@ -200,6 +216,7 @@ def SIFIVE_U74 : RISCVProcessorModel<"sifive-u74", def SIFIVE_X280 : RISCVProcessorModel<"sifive-x280", SiFive7Model, [Feature64Bit, + FeatureStdExtI, FeatureStdExtZifencei, FeatureStdExtM, FeatureStdExtA, @@ -217,6 +234,7 @@ def SIFIVE_X280 : RISCVProcessorModel<"sifive-x280", SiFive7Model, def SIFIVE_P450 : RISCVProcessorModel<"sifive-p450", SiFiveP400Model, [Feature64Bit, + FeatureStdExtI, FeatureStdExtZifencei, FeatureStdExtM, FeatureStdExtA, @@ -239,7 +257,8 @@ def SIFIVE_P450 : RISCVProcessorModel<"sifive-p450", SiFiveP400Model, FeatureStdExtZbb, FeatureStdExtZbs, FeatureStdExtZfhmin, - FeatureFastUnalignedAccess], + FeatureUnalignedScalarMem, + FeatureUnalignedVectorMem], [TuneNoDefaultUnroll, TuneConditionalCompressedMoveFusion, TuneLUIADDIFusion, @@ -247,6 +266,7 @@ def SIFIVE_P450 : RISCVProcessorModel<"sifive-p450", SiFiveP400Model, def SIFIVE_P670 : RISCVProcessorModel<"sifive-p670", SiFiveP600Model, [Feature64Bit, + FeatureStdExtI, FeatureStdExtZifencei, FeatureStdExtM, FeatureStdExtA, @@ -276,7 +296,8 @@ def SIFIVE_P670 : RISCVProcessorModel<"sifive-p670", SiFiveP600Model, FeatureStdExtZvkng, FeatureStdExtZvksc, FeatureStdExtZvksg, - FeatureFastUnalignedAccess], + FeatureUnalignedScalarMem, + FeatureUnalignedVectorMem], [TuneNoDefaultUnroll, TuneConditionalCompressedMoveFusion, TuneLUIADDIFusion, @@ -286,6 +307,7 @@ def SIFIVE_P670 : RISCVProcessorModel<"sifive-p670", SiFiveP600Model, def SYNTACORE_SCR1_BASE : RISCVProcessorModel<"syntacore-scr1-base", SyntacoreSCR1Model, [Feature32Bit, + FeatureStdExtI, FeatureStdExtZicsr, FeatureStdExtZifencei, FeatureStdExtC], @@ -294,6 +316,7 @@ def SYNTACORE_SCR1_BASE : RISCVProcessorModel<"syntacore-scr1-base", def SYNTACORE_SCR1_MAX : RISCVProcessorModel<"syntacore-scr1-max", SyntacoreSCR1Model, [Feature32Bit, + FeatureStdExtI, FeatureStdExtZicsr, FeatureStdExtZifencei, FeatureStdExtM, @@ -303,6 +326,7 @@ def SYNTACORE_SCR1_MAX : RISCVProcessorModel<"syntacore-scr1-max", def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1", NoSchedModel, [Feature64Bit, + FeatureStdExtI, FeatureStdExtZifencei, FeatureStdExtZicsr, FeatureStdExtZicntr, @@ -332,6 +356,7 @@ def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1", def XIANGSHAN_NANHU : RISCVProcessorModel<"xiangshan-nanhu", XiangShanNanHuModel, [Feature64Bit, + FeatureStdExtI, FeatureStdExtZicsr, FeatureStdExtZifencei, FeatureStdExtM, diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index 316daf2..1a0533c 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -533,6 +533,12 @@ def VR : VReg<!listconcat(VM1VTs, VMaskVTs), (add (sequence "V%u", 8, 31), (sequence "V%u", 7, 0)), 1>; +// V0 is likely to be used as mask, so we move it in front of allocation order. +def VMM1 : VReg<VMaskVTs, (add (sequence "V%u", 0, 31)), 1>; +def VMM2 : VReg<VMaskVTs, (add (sequence "V%u", 0, 31, 2)), 1>; +def VMM4 : VReg<VMaskVTs, (add (sequence "V%u", 0, 31, 4)), 1>; +def VMM8 : VReg<VMaskVTs, (add (sequence "V%u", 0, 31, 8)), 1>; + def VRNoV0 : VReg<!listconcat(VM1VTs, VMaskVTs), (sub VR, V0), 1>; def VRM2 : VReg<VM2VTs, (add (sequence "V%uM2", 8, 31, 2), diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td index e74c7aa..65494e7 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td +++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td @@ -261,4 +261,5 @@ defm : UnsupportedSchedZbkx; defm : UnsupportedSchedZfa; defm : UnsupportedSchedZfh; defm : UnsupportedSchedSFB; +defm : UnsupportedSchedXsfvcp; } diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td index 2a13cb4..a532066 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -744,6 +744,13 @@ foreach mx = SchedMxListF in { defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; + } + let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; } } } @@ -751,14 +758,9 @@ foreach mx = SchedMxList in { defvar Cycles = SiFive7GetCyclesDefault<mx>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { - defm "" : LMULWriteResMX<"WriteVFCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; } let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { - defm "" : LMULWriteResMX<"WriteVFSgnjV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFSgnjF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFMinMaxV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFMinMaxF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVFClassV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVFMergeV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVFMovV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; @@ -784,10 +786,11 @@ foreach mx = SchedMxListF in { // Widening foreach mx = SchedMxListW in { - defvar Cycles = SiFive7GetCyclesDefault<mx>.c; - defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c; - let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { - defm "" : LMULWriteResMX<"WriteVFWCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + foreach sew = SchedSEWSet<mx, isF=0, isWidening=1>.val in { + defvar Cycles = SiFive7GetCyclesDefault<mx>.c; + defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListW>.c; + let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in + defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; } } foreach mx = SchedMxListFW in { @@ -801,16 +804,13 @@ foreach mx = SchedMxListFW in { defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; } } -} -foreach mx = SchedMxListFW in { defvar Cycles = SiFive7GetCyclesDefault<mx>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c; - let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { - defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFWCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; - } + let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in + defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; } // Narrowing foreach mx = SchedMxListW in { @@ -821,11 +821,13 @@ foreach mx = SchedMxListW in { } } foreach mx = SchedMxListFW in { - defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c; - defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c; - let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { - defm "" : LMULWriteResMX<"WriteVFNCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFNCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in { + defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c; + defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c; + let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; + } } } @@ -960,6 +962,54 @@ let Latency = 3 in def : InstRW<[WriteIALU], (instrs COPY)>; +// VCIX +// +// In principle we don't know the latency of any VCIX instructions. But instead +// of taking the default of 1, which can lead to issues [1], we assume that they +// have a fairly high latency. +// +// [1] https://github.com/llvm/llvm-project/issues/83391 +foreach mx = SchedMxList in { + defvar Cycles = SiFive7GetCyclesDefault<mx>.c; + defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; + let Latency = !mul(Cycles, 10), + AcquireAtCycles = [0, 1], + ReleaseAtCycles = [1, !add(1, Cycles)] in { + defm "" : LMULWriteResMX<"WriteVC_V_I", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVC_V_X", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVC_V_IV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVC_V_VV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVC_V_XV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVC_V_IVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVC_V_IVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVC_V_VVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVC_V_VVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVC_V_XVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVC_V_XVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + foreach f = ["FPR16", "FPR32", "FPR64"] in { + defm "" : LMULWriteResMX<"WriteVC_V_" # f # "V", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVC_V_" # f # "VV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVC_V_" # f # "VW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + } + defm "" : LMULWriteResMX<"WriteVC_I", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVC_X", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVC_IV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVC_VV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVC_XV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVC_IVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVC_IVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVC_VVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVC_VVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVC_XVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVC_XVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + foreach f = ["FPR16", "FPR32", "FPR64"] in { + defm "" : LMULWriteResMX<"WriteVC_" # f # "V", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVC_" # f # "VV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVC_" # f # "VW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + } + } +} + //===----------------------------------------------------------------------===// // Bypass and advance @@ -1169,24 +1219,24 @@ defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddV", 0>; defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddF", 0>; defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>; defm "" : LMULSEWReadAdvanceF<"ReadVFRecpV", 0>; -defm "" : LMULReadAdvance<"ReadVFMinMaxV", 0>; -defm "" : LMULReadAdvance<"ReadVFMinMaxF", 0>; -defm "" : LMULReadAdvance<"ReadVFSgnjV", 0>; -defm "" : LMULReadAdvance<"ReadVFSgnjF", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxV", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxF", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjV", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjF", 0>; defm "" : LMULReadAdvance<"ReadVFCmpV", 0>; defm "" : LMULReadAdvance<"ReadVFCmpF", 0>; defm "" : LMULReadAdvance<"ReadVFClassV", 0>; defm "" : LMULReadAdvance<"ReadVFMergeV", 0>; defm "" : LMULReadAdvance<"ReadVFMergeF", 0>; defm "" : LMULReadAdvance<"ReadVFMovF", 0>; -defm "" : LMULReadAdvance<"ReadVFCvtIToFV", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFCvtIToFV", 0>; defm "" : LMULReadAdvance<"ReadVFCvtFToIV", 0>; -defm "" : LMULReadAdvanceW<"ReadVFWCvtIToFV", 0>; +defm "" : LMULSEWReadAdvanceW<"ReadVFWCvtIToFV", 0>; defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToIV", 0>; -defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToFV", 0>; -defm "" : LMULReadAdvanceFW<"ReadVFNCvtIToFV", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFWCvtFToFV", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtIToFV", 0>; defm "" : LMULReadAdvanceW<"ReadVFNCvtFToIV", 0>; -defm "" : LMULReadAdvanceFW<"ReadVFNCvtFToFV", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtFToFV", 0>; // 15. Vector Reduction Operations def : ReadAdvance<ReadVIRedV, 0>; diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td index 8ec2e4f..fccdd7e 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td @@ -366,4 +366,5 @@ defm : UnsupportedSchedZbkx; defm : UnsupportedSchedSFB; defm : UnsupportedSchedZfa; defm : UnsupportedSchedV; +defm : UnsupportedSchedXsfvcp; } diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td index 80090a0..6e4fb19 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td @@ -495,45 +495,37 @@ foreach mx = SchedMxListF in { defm "" : LMULSEWWriteResMXSEW<"WriteVFALUF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; defm "" : LMULSEWWriteResMXSEW<"WriteVFMulV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; defm "" : LMULSEWWriteResMXSEW<"WriteVFMulF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; - - } - } -} - -foreach mx = SchedMxListF in { - foreach sew = SchedSEWSet<mx, isF=1>.val in { - defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; - defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c; - let Latency = 6, ReleaseAtCycles = [LMulLat] in { defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; } + let Latency = 2, ReleaseAtCycles = [LMulLat] in + defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; + let Latency = 3, ReleaseAtCycles = [LMulLat] in + defm "" : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; } } foreach mx = SchedMxListF in { foreach sew = SchedSEWSet<mx, isF=1>.val in { defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; - defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c; - let Latency = 2, ReleaseAtCycles = [LMulLat] in - defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; + defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxList, isF=1>.c; + let Latency = 1, ReleaseAtCycles = [LMulLat] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; + } } } foreach mx = SchedMxList in { defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c; - let Latency = 3, ReleaseAtCycles = [LMulLat] in { - defm "" : LMULWriteResMX<"WriteVFCvtIToFV", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SiFiveP600VectorArith], mx, IsWorstCase>; - } + let Latency = 3, ReleaseAtCycles = [LMulLat] in + defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SiFiveP600VectorArith], mx, IsWorstCase>; let Latency = 2, ReleaseAtCycles = [LMulLat] in { defm "" : LMULWriteResMX<"WriteVFCmpV", [SiFiveP600VectorArith], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVFCmpF", [SiFiveP600VectorArith], mx, IsWorstCase>; } let Latency = 1, ReleaseAtCycles = [LMulLat] in { - defm "" : LMULWriteResMX<"WriteVFSgnjV", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFSgnjF", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFMinMaxV", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFMinMaxF", [SiFiveP600VectorArith], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVFClassV", [SiFiveP600VectorArith], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVFMergeV", [SiFiveP600VectorArith], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVFMovV", [SiFiveP600VectorArith], mx, IsWorstCase>; @@ -542,19 +534,18 @@ foreach mx = SchedMxList in { // Widening foreach mx = SchedMxListW in { - defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; - defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxListW>.c; - let Latency = 3, ReleaseAtCycles = [LMulLat] in { - defm "" : LMULWriteResMX<"WriteVFWCvtIToFV", [SiFiveP600VectorArith], mx, IsWorstCase>; + foreach sew = SchedSEWSet<mx, isF=0, isWidening=1>.val in { + defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; + defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListW>.c; + let Latency = 3, ReleaseAtCycles = [LMulLat] in + defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; } } foreach mx = SchedMxListFW in { defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxListFW>.c; - let Latency = 6, ReleaseAtCycles = [LMulLat] in { - defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFWCvtFToFV", [SiFiveP600VectorArith], mx, IsWorstCase>; - } + let Latency = 6, ReleaseAtCycles = [LMulLat] in + defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFiveP600VectorArith], mx, IsWorstCase>; } foreach mx = SchedMxListFW in { foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in { @@ -567,6 +558,7 @@ foreach mx = SchedMxListFW in { defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; } } } @@ -579,11 +571,13 @@ foreach mx = SchedMxListW in { } } foreach mx = SchedMxListFW in { - defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; - defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxListFW>.c; - let Latency = 3, ReleaseAtCycles = [LMulLat] in { - defm "" : LMULWriteResMX<"WriteVFNCvtIToFV", [SiFiveP600VectorArith], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFNCvtFToFV", [SiFiveP600VectorArith], mx, IsWorstCase>; + foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in { + defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; + defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c; + let Latency = 3, ReleaseAtCycles = [LMulLat] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; + } } } @@ -968,22 +962,22 @@ defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>; defm "" : LMULSEWReadAdvance<"ReadVFRecpV", 0>; defm "" : LMULReadAdvance<"ReadVFCmpV", 0>; defm "" : LMULReadAdvance<"ReadVFCmpF", 0>; -defm "" : LMULReadAdvance<"ReadVFMinMaxV", 0>; -defm "" : LMULReadAdvance<"ReadVFMinMaxF", 0>; -defm "" : LMULReadAdvance<"ReadVFSgnjV", 0>; -defm "" : LMULReadAdvance<"ReadVFSgnjF", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxV", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxF", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjV", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjF", 0>; defm "" : LMULReadAdvance<"ReadVFClassV", 0>; defm "" : LMULReadAdvance<"ReadVFMergeV", 0>; defm "" : LMULReadAdvance<"ReadVFMergeF", 0>; defm "" : LMULReadAdvance<"ReadVFMovF", 0>; -defm "" : LMULReadAdvance<"ReadVFCvtIToFV", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFCvtIToFV", 0>; defm "" : LMULReadAdvance<"ReadVFCvtFToIV", 0>; -defm "" : LMULReadAdvanceW<"ReadVFWCvtIToFV", 0>; +defm "" : LMULSEWReadAdvanceW<"ReadVFWCvtIToFV", 0>; defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToIV", 0>; -defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToFV", 0>; -defm "" : LMULReadAdvanceFW<"ReadVFNCvtIToFV", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFWCvtFToFV", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtIToFV", 0>; defm "" : LMULReadAdvanceW<"ReadVFNCvtFToIV", 0>; -defm "" : LMULReadAdvanceFW<"ReadVFNCvtFToFV", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtFToFV", 0>; // 15. Vector Reduction Operations def : ReadAdvance<ReadVIRedV, 0>; @@ -1046,4 +1040,5 @@ defm : UnsupportedSchedZbkb; defm : UnsupportedSchedZbkx; defm : UnsupportedSchedSFB; defm : UnsupportedSchedZfa; +defm : UnsupportedSchedXsfvcp; } diff --git a/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td b/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td index 9625d17..0885e32 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td @@ -212,4 +212,5 @@ defm : UnsupportedSchedZbkb; defm : UnsupportedSchedZbkx; defm : UnsupportedSchedZfa; defm : UnsupportedSchedZfh; +defm : UnsupportedSchedXsfvcp; } diff --git a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td index 4fc7b03..e0f1fab 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td +++ b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td @@ -311,4 +311,5 @@ defm : UnsupportedSchedZfa; defm : UnsupportedSchedZfh; defm : UnsupportedSchedSFB; defm : UnsupportedSchedZabha; +defm : UnsupportedSchedXsfvcp; } diff --git a/llvm/lib/Target/RISCV/RISCVSchedule.td b/llvm/lib/Target/RISCV/RISCVSchedule.td index 1d19624..0086557 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedule.td +++ b/llvm/lib/Target/RISCV/RISCVSchedule.td @@ -296,3 +296,4 @@ def : ReadAdvance<ReadAtomicHD, 0>; // Include the scheduler resources for other instruction extensions. include "RISCVScheduleZb.td" include "RISCVScheduleV.td" +include "RISCVScheduleXSf.td" diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td index 6070482..5993884 100644 --- a/llvm/lib/Target/RISCV/RISCVScheduleV.td +++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td @@ -253,6 +253,18 @@ multiclass LMULReadAdvanceFW<string name, int val, list<SchedWrite> writes = []> : LMULReadAdvanceImpl<name, val, writes>; class LMULSchedWriteListFW<list<string> names> : LMULSchedWriteListImpl<names, SchedMxListFW>; +multiclass LMULSEWSchedWritesW<string name> + : LMULSEWSchedWritesImpl<name, SchedMxListW, isF = 0, isWidening = 1>; +multiclass LMULSEWSchedReadsW<string name> + : LMULSEWSchedReadsImpl<name, SchedMxListW, isF = 0, isWidening = 1>; +multiclass LMULSEWWriteResW<string name, list<ProcResourceKind> resources> + : LMULSEWWriteResImpl<name, resources, SchedMxListW, isF = 0, + isWidening = 1>; +multiclass + LMULSEWReadAdvanceW<string name, int val, list<SchedWrite> writes = []> + : LMULSEWReadAdvanceImpl<name, val, writes, SchedMxListW, isF = 0, + isWidening = 1>; + multiclass LMULSEWSchedWritesFW<string name> : LMULSEWSchedWritesImpl<name, SchedMxListFW, isF = 1, isWidening = 1>; multiclass LMULSEWSchedReadsFW<string name> @@ -434,11 +446,11 @@ defm "" : LMULSEWSchedWritesF<"WriteVFSqrtV">; // 13.10. Vector Floating-Point Reciprocal Estimate Instruction defm "" : LMULSEWSchedWritesF<"WriteVFRecpV">; // 13.11. Vector Floating-Point MIN/MAX Instructions -defm "" : LMULSchedWrites<"WriteVFMinMaxV">; -defm "" : LMULSchedWrites<"WriteVFMinMaxF">; +defm "" : LMULSEWSchedWritesF<"WriteVFMinMaxV">; +defm "" : LMULSEWSchedWritesF<"WriteVFMinMaxF">; // 13.12. Vector Floating-Point Sign-Injection Instructions -defm "" : LMULSchedWrites<"WriteVFSgnjV">; -defm "" : LMULSchedWrites<"WriteVFSgnjF">; +defm "" : LMULSEWSchedWritesF<"WriteVFSgnjV">; +defm "" : LMULSEWSchedWritesF<"WriteVFSgnjF">; // 13.13. Vector Floating-Point Compare Instructions defm "" : LMULSchedWrites<"WriteVFCmpV">; defm "" : LMULSchedWrites<"WriteVFCmpF">; @@ -449,16 +461,16 @@ defm "" : LMULSchedWrites<"WriteVFMergeV">; // 13.16. Vector Floating-Point Move Instruction defm "" : LMULSchedWrites<"WriteVFMovV">; // 13.17. Single-Width Floating-Point/Integer Type-Convert Instructions -defm "" : LMULSchedWrites<"WriteVFCvtIToFV">; +defm "" : LMULSEWSchedWritesF<"WriteVFCvtIToFV">; defm "" : LMULSchedWrites<"WriteVFCvtFToIV">; // 13.18. Widening Floating-Point/Integer Type-Convert Instructions -defm "" : LMULSchedWritesW<"WriteVFWCvtIToFV">; +defm "" : LMULSEWSchedWritesW<"WriteVFWCvtIToFV">; defm "" : LMULSchedWritesFW<"WriteVFWCvtFToIV">; -defm "" : LMULSchedWritesFW<"WriteVFWCvtFToFV">; +defm "" : LMULSEWSchedWritesFW<"WriteVFWCvtFToFV">; // 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions -defm "" : LMULSchedWritesFW<"WriteVFNCvtIToFV">; +defm "" : LMULSEWSchedWritesFW<"WriteVFNCvtIToFV">; defm "" : LMULSchedWritesW<"WriteVFNCvtFToIV">; -defm "" : LMULSchedWritesFW<"WriteVFNCvtFToFV">; +defm "" : LMULSEWSchedWritesFW<"WriteVFNCvtFToFV">; // 14. Vector Reduction Operations // The latency of reduction is determined by the size of the read resource. @@ -659,11 +671,11 @@ defm "" : LMULSEWSchedReadsF<"ReadVFSqrtV">; // 13.10. Vector Floating-Point Reciprocal Estimate Instruction defm "" : LMULSEWSchedReadsF<"ReadVFRecpV">; // 13.11. Vector Floating-Point MIN/MAX Instructions -defm "" : LMULSchedReads<"ReadVFMinMaxV">; -defm "" : LMULSchedReads<"ReadVFMinMaxF">; +defm "" : LMULSEWSchedReadsF<"ReadVFMinMaxV">; +defm "" : LMULSEWSchedReadsF<"ReadVFMinMaxF">; // 13.12. Vector Floating-Point Sign-Injection Instructions -defm "" : LMULSchedReads<"ReadVFSgnjV">; -defm "" : LMULSchedReads<"ReadVFSgnjF">; +defm "" : LMULSEWSchedReadsF<"ReadVFSgnjV">; +defm "" : LMULSEWSchedReadsF<"ReadVFSgnjF">; // 13.13. Vector Floating-Point Compare Instructions defm "" : LMULSchedReads<"ReadVFCmpV">; defm "" : LMULSchedReads<"ReadVFCmpF">; @@ -675,16 +687,16 @@ defm "" : LMULSchedReads<"ReadVFMergeF">; // 13.16. Vector Floating-Point Move Instruction defm "" : LMULSchedReads<"ReadVFMovF">; // 13.17. Single-Width Floating-Point/Integer Type-Convert Instructions -defm "" : LMULSchedReads<"ReadVFCvtIToFV">; +defm "" : LMULSEWSchedReadsF<"ReadVFCvtIToFV">; defm "" : LMULSchedReads<"ReadVFCvtFToIV">; // 13.18. Widening Floating-Point/Integer Type-Convert Instructions -defm "" : LMULSchedReadsW<"ReadVFWCvtIToFV">; +defm "" : LMULSEWSchedReadsW<"ReadVFWCvtIToFV">; defm "" : LMULSchedReadsFW<"ReadVFWCvtFToIV">; -defm "" : LMULSchedReadsFW<"ReadVFWCvtFToFV">; +defm "" : LMULSEWSchedReadsFW<"ReadVFWCvtFToFV">; // 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions -defm "" : LMULSchedReadsFW<"ReadVFNCvtIToFV">; +defm "" : LMULSEWSchedReadsFW<"ReadVFNCvtIToFV">; defm "" : LMULSchedReadsW<"ReadVFNCvtFToIV">; -defm "" : LMULSchedReadsFW<"ReadVFNCvtFToFV">; +defm "" : LMULSEWSchedReadsFW<"ReadVFNCvtFToFV">; // 14. Vector Reduction Operations // 14.1. Vector Single-Width Integer Reduction Instructions @@ -896,23 +908,23 @@ defm "" : LMULSEWWriteResFW<"WriteVFWMulAddV", []>; defm "" : LMULSEWWriteResFW<"WriteVFWMulAddF", []>; defm "" : LMULSEWWriteResF<"WriteVFSqrtV", []>; defm "" : LMULSEWWriteResF<"WriteVFRecpV", []>; -defm "" : LMULWriteRes<"WriteVFMinMaxV", []>; -defm "" : LMULWriteRes<"WriteVFMinMaxF", []>; -defm "" : LMULWriteRes<"WriteVFSgnjV", []>; -defm "" : LMULWriteRes<"WriteVFSgnjF", []>; +defm "" : LMULSEWWriteResF<"WriteVFMinMaxV", []>; +defm "" : LMULSEWWriteResF<"WriteVFMinMaxF", []>; +defm "" : LMULSEWWriteResF<"WriteVFSgnjV", []>; +defm "" : LMULSEWWriteResF<"WriteVFSgnjF", []>; defm "" : LMULWriteRes<"WriteVFCmpV", []>; defm "" : LMULWriteRes<"WriteVFCmpF", []>; defm "" : LMULWriteRes<"WriteVFClassV", []>; defm "" : LMULWriteRes<"WriteVFMergeV", []>; defm "" : LMULWriteRes<"WriteVFMovV", []>; -defm "" : LMULWriteRes<"WriteVFCvtIToFV", []>; +defm "" : LMULSEWWriteResF<"WriteVFCvtIToFV", []>; defm "" : LMULWriteRes<"WriteVFCvtFToIV", []>; -defm "" : LMULWriteResW<"WriteVFWCvtIToFV", []>; +defm "" : LMULSEWWriteResW<"WriteVFWCvtIToFV", []>; defm "" : LMULWriteResFW<"WriteVFWCvtFToIV", []>; -defm "" : LMULWriteResFW<"WriteVFWCvtFToFV", []>; -defm "" : LMULWriteResFW<"WriteVFNCvtIToFV", []>; +defm "" : LMULSEWWriteResFW<"WriteVFWCvtFToFV", []>; +defm "" : LMULSEWWriteResFW<"WriteVFNCvtIToFV", []>; defm "" : LMULWriteResW<"WriteVFNCvtFToIV", []>; -defm "" : LMULWriteResFW<"WriteVFNCvtFToFV", []>; +defm "" : LMULSEWWriteResFW<"WriteVFNCvtFToFV", []>; // 14. Vector Reduction Operations defm "" : LMULSEWWriteRes<"WriteVIRedV_From", []>; @@ -1052,24 +1064,24 @@ defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddV", 0>; defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddF", 0>; defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>; defm "" : LMULSEWReadAdvanceF<"ReadVFRecpV", 0>; -defm "" : LMULReadAdvance<"ReadVFMinMaxV", 0>; -defm "" : LMULReadAdvance<"ReadVFMinMaxF", 0>; -defm "" : LMULReadAdvance<"ReadVFSgnjV", 0>; -defm "" : LMULReadAdvance<"ReadVFSgnjF", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxV", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxF", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjV", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjF", 0>; defm "" : LMULReadAdvance<"ReadVFCmpV", 0>; defm "" : LMULReadAdvance<"ReadVFCmpF", 0>; defm "" : LMULReadAdvance<"ReadVFClassV", 0>; defm "" : LMULReadAdvance<"ReadVFMergeV", 0>; defm "" : LMULReadAdvance<"ReadVFMergeF", 0>; defm "" : LMULReadAdvance<"ReadVFMovF", 0>; -defm "" : LMULReadAdvance<"ReadVFCvtIToFV", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFCvtIToFV", 0>; defm "" : LMULReadAdvance<"ReadVFCvtFToIV", 0>; -defm "" : LMULReadAdvanceW<"ReadVFWCvtIToFV", 0>; +defm "" : LMULSEWReadAdvanceW<"ReadVFWCvtIToFV", 0>; defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToIV", 0>; -defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToFV", 0>; -defm "" : LMULReadAdvanceFW<"ReadVFNCvtIToFV", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFWCvtFToFV", 0>; +defm "" : LMULSEWReadAdvanceFW<"SEWReadVFNCvtIToFV", 0>; defm "" : LMULReadAdvanceW<"ReadVFNCvtFToIV", 0>; -defm "" : LMULReadAdvanceFW<"ReadVFNCvtFToFV", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtFToFV", 0>; // 14. Vector Reduction Operations def : ReadAdvance<ReadVIRedV, 0>; diff --git a/llvm/lib/Target/RISCV/RISCVScheduleXSf.td b/llvm/lib/Target/RISCV/RISCVScheduleXSf.td new file mode 100644 index 0000000..58d5084 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVScheduleXSf.td @@ -0,0 +1,59 @@ +//===-- RISCVScheduleXSf.td - Scheduling Definitions XSf ---*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes the scheduling information for SiFive extensions. +// +//===----------------------------------------------------------------------===// + +multiclass LMULSchedWritesVCIX<string id>{ +defm "" : LMULSchedWrites<"WriteVC_" # id>; +defm "" : LMULSchedWrites<"WriteVC_V_" # id>; +} + +defm "" : LMULSchedWritesVCIX<"I">; +defm "" : LMULSchedWritesVCIX<"X">; +defm "" : LMULSchedWritesVCIX<"IV">; +defm "" : LMULSchedWritesVCIX<"VV">; +defm "" : LMULSchedWritesVCIX<"XV">; +defm "" : LMULSchedWritesVCIX<"IVV">; +defm "" : LMULSchedWritesVCIX<"IVW">; +defm "" : LMULSchedWritesVCIX<"VVV">; +defm "" : LMULSchedWritesVCIX<"VVW">; +defm "" : LMULSchedWritesVCIX<"XVV">; +defm "" : LMULSchedWritesVCIX<"XVW">; +foreach f = ["FPR16", "FPR32", "FPR64"] in { + defm "" : LMULSchedWritesVCIX<f # "V">; + defm "" : LMULSchedWritesVCIX<f # "VV">; + defm "" : LMULSchedWritesVCIX<f # "VW">; +} + +multiclass LMULWriteResVCIX<string id, list<ProcResourceKind> resources>{ +defm : LMULWriteRes<"WriteVC_" # id, resources>; +defm : LMULWriteRes<"WriteVC_V_" # id, resources>; +} + +multiclass UnsupportedSchedXsfvcp { +let Unsupported = true in { +defm : LMULWriteResVCIX<"I", []>; +defm : LMULWriteResVCIX<"X", []>; +defm : LMULWriteResVCIX<"IV", []>; +defm : LMULWriteResVCIX<"VV", []>; +defm : LMULWriteResVCIX<"XV", []>; +defm : LMULWriteResVCIX<"IVV", []>; +defm : LMULWriteResVCIX<"IVW", []>; +defm : LMULWriteResVCIX<"VVV", []>; +defm : LMULWriteResVCIX<"VVW", []>; +defm : LMULWriteResVCIX<"XVV", []>; +defm : LMULWriteResVCIX<"XVW", []>; +foreach f = ["FPR16", "FPR32", "FPR64"] in { + defm : LMULWriteResVCIX<f # "V", []>; + defm : LMULWriteResVCIX<f # "VV", []>; + defm : LMULWriteResVCIX<f # "VW", []>; +} +} +} diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index bc9756c..56f5bd8 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1335,8 +1335,8 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); + std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy); if (Opcode == Instruction::Select && ValTy->isVectorTy()) { - std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy); if (CondTy->isVectorTy()) { if (ValTy->getScalarSizeInBits() == 1) { // vmandn.mm v8, v8, v9 @@ -1375,14 +1375,15 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, LT.second, CostKind); } - if ((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && - ValTy->isVectorTy()) { - std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy); - - // Support natively. - if (CmpInst::isIntPredicate(VecPred)) - return LT.first * 1; + if ((Opcode == Instruction::ICmp) && ValTy->isVectorTy() && + CmpInst::isIntPredicate(VecPred)) { + // Use VMSLT_VV to represent VMSEQ, VMSNE, VMSLTU, VMSLEU, VMSLT, VMSLE + // provided they incur the same cost across all implementations + return LT.first * + getRISCVInstructionCost(RISCV::VMSLT_VV, LT.second, CostKind); + } + if ((Opcode == Instruction::FCmp) && ValTy->isVectorTy()) { // If we do not support the input floating point vector type, use the base // one which will calculate as: // ScalarizeCost + Num * Cost for fixed vector, diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index e0c0e65..2f9281a 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -228,7 +228,7 @@ public: return false; EVT ElemType = DataTypeVT.getScalarType(); - if (!ST->hasFastUnalignedAccess() && Alignment < ElemType.getStoreSize()) + if (!ST->enableUnalignedVectorMem() && Alignment < ElemType.getStoreSize()) return false; return TLI->isLegalElementTypeForRVV(ElemType); @@ -253,7 +253,7 @@ public: return false; EVT ElemType = DataTypeVT.getScalarType(); - if (!ST->hasFastUnalignedAccess() && Alignment < ElemType.getStoreSize()) + if (!ST->enableUnalignedVectorMem() && Alignment < ElemType.getStoreSize()) return false; return TLI->isLegalElementTypeForRVV(ElemType); |