diff options
Diffstat (limited to 'llvm/lib')
40 files changed, 494 insertions, 243 deletions
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 0ca55a26..54e916e 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -118,6 +118,10 @@ static cl::opt<bool> #endif cl::desc("")); +static cl::opt<bool> PreserveBitcodeUseListOrder( + "preserve-bc-uselistorder", cl::Hidden, cl::init(true), + cl::desc("Preserve use-list order when writing LLVM bitcode.")); + namespace llvm { extern FunctionSummary::ForceSummaryHotnessType ForceSummaryEdgesCold; } @@ -217,7 +221,10 @@ public: bool ShouldPreserveUseListOrder, const ModuleSummaryIndex *Index) : BitcodeWriterBase(Stream, StrtabBuilder), M(M), - VE(M, ShouldPreserveUseListOrder), Index(Index) { + VE(M, PreserveBitcodeUseListOrder.getNumOccurrences() + ? PreserveBitcodeUseListOrder + : ShouldPreserveUseListOrder), + Index(Index) { // Assign ValueIds to any callee values in the index that came from // indirect call profiles and were recorded as a GUID not a Value* // (which would have been assigned an ID by the ValueEnumerator). diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index aa078f3..e40fb76 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -704,9 +704,17 @@ void DwarfUnit::addType(DIE &Entity, const DIType *Ty, addDIEEntry(Entity, Attribute, DIEEntry(*getOrCreateTypeDIE(Ty))); } +// FIXME: change callsites to use the new DW_LNAME_ language codes. llvm::dwarf::SourceLanguage DwarfUnit::getSourceLanguage() const { - return static_cast<llvm::dwarf::SourceLanguage>( - getLanguage().getUnversionedName()); + const auto &Lang = getLanguage(); + + if (!Lang.hasVersionedName()) + return static_cast<llvm::dwarf::SourceLanguage>(Lang.getName()); + + return llvm::dwarf::toDW_LANG( + static_cast<llvm::dwarf::SourceLanguageName>(Lang.getName()), + Lang.getVersion()) + .value_or(llvm::dwarf::DW_LANG_hi_user); } std::string DwarfUnit::getParentContextString(const DIScope *Context) const { diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index d5153b7..cdcb29d9 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -1209,7 +1209,7 @@ MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB, MIE = MBB.instr_begin(); MII != MIE; --MII) { const MachineInstr &MI = *std::prev(MII); - if (MI.isDebugInstr() || MI.isPseudoProbe()) + if (MI.isDebugOrPseudoInstr()) continue; RegisterOperands RegOpers; RegOpers.collect(MI, *TRI, *MRI, false, false); diff --git a/llvm/lib/CodeGen/RegisterPressure.cpp b/llvm/lib/CodeGen/RegisterPressure.cpp index 5f37890..7d4674b 100644 --- a/llvm/lib/CodeGen/RegisterPressure.cpp +++ b/llvm/lib/CodeGen/RegisterPressure.cpp @@ -858,7 +858,7 @@ void RegPressureTracker::recedeSkipDebugValues() { void RegPressureTracker::recede(SmallVectorImpl<VRegMaskOrUnit> *LiveUses) { recedeSkipDebugValues(); - if (CurrPos->isDebugInstr() || CurrPos->isPseudoProbe()) { + if (CurrPos->isDebugOrPseudoInstr()) { // It's possible to only have debug_value and pseudo probe instructions and // hit the start of the block. assert(CurrPos == MBB->begin()); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index b47274b..b23b190 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10628,7 +10628,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // folding this will increase the total number of instructions. if (N0.getOpcode() == ISD::SRL && (N0.getOperand(1) == N1 || N0.hasOneUse()) && - TLI.shouldFoldConstantShiftPairToMask(N, Level)) { + TLI.shouldFoldConstantShiftPairToMask(N)) { if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount, /*AllowUndefs*/ false, /*AllowTypeMismatch*/ true)) { @@ -11207,7 +11207,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or // (and (srl x, (sub c2, c1), MASK) if ((N0.getOperand(1) == N1 || N0->hasOneUse()) && - TLI.shouldFoldConstantShiftPairToMask(N, Level)) { + TLI.shouldFoldConstantShiftPairToMask(N)) { auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS, ConstantSDNode *RHS) { const APInt &LHSC = LHS->getAPIntValue(); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index b5f8a61..437d0f4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -3313,7 +3313,6 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { case ISD::FP_ROUND: R = SoftPromoteHalfRes_FP_ROUND(N); break; // Unary FP Operations - case ISD::FABS: case ISD::FACOS: case ISD::FASIN: case ISD::FATAN: @@ -3329,7 +3328,6 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { case ISD::FLOG2: case ISD::FLOG10: case ISD::FNEARBYINT: - case ISD::FNEG: case ISD::FREEZE: case ISD::FRINT: case ISD::FROUND: @@ -3341,6 +3339,12 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { case ISD::FTAN: case ISD::FTANH: case ISD::FCANONICALIZE: R = SoftPromoteHalfRes_UnaryOp(N); break; + case ISD::FABS: + R = SoftPromoteHalfRes_FABS(N); + break; + case ISD::FNEG: + R = SoftPromoteHalfRes_FNEG(N); + break; case ISD::AssertNoFPClass: R = SoftPromoteHalfRes_AssertNoFPClass(N); break; @@ -3670,6 +3674,24 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_UnaryOp(SDNode *N) { return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res); } +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FABS(SDNode *N) { + SDValue Op = GetSoftPromotedHalf(N->getOperand(0)); + SDLoc dl(N); + + // Clear the sign bit. + return DAG.getNode(ISD::AND, dl, MVT::i16, Op, + DAG.getConstant(0x7fff, dl, MVT::i16)); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FNEG(SDNode *N) { + SDValue Op = GetSoftPromotedHalf(N->getOperand(0)); + SDLoc dl(N); + + // Invert the sign bit. + return DAG.getNode(ISD::XOR, dl, MVT::i16, Op, + DAG.getConstant(0x8000, dl, MVT::i16)); +} + SDValue DAGTypeLegalizer::SoftPromoteHalfRes_AssertNoFPClass(SDNode *N) { return GetSoftPromotedHalf(N->getOperand(0)); } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index d580ce0..603dc34 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -832,6 +832,8 @@ private: SDValue SoftPromoteHalfRes_SELECT(SDNode *N); SDValue SoftPromoteHalfRes_SELECT_CC(SDNode *N); SDValue SoftPromoteHalfRes_UnaryOp(SDNode *N); + SDValue SoftPromoteHalfRes_FABS(SDNode *N); + SDValue SoftPromoteHalfRes_FNEG(SDNode *N); SDValue SoftPromoteHalfRes_AssertNoFPClass(SDNode *N); SDValue SoftPromoteHalfRes_XINT_TO_FP(SDNode *N); SDValue SoftPromoteHalfRes_UNDEF(SDNode *N); diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 0bc877d..2430d98 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -102,6 +102,10 @@ static cl::opt<bool> PrintProfData( "print-prof-data", cl::Hidden, cl::desc("Pretty print perf data (branch weights, etc) when dumping")); +static cl::opt<bool> PreserveAssemblyUseListOrder( + "preserve-ll-uselistorder", cl::Hidden, cl::init(false), + cl::desc("Preserve use-list order when writing LLVM assembly.")); + // Make virtual table appear in this compilation unit. AssemblyAnnotationWriter::~AssemblyAnnotationWriter() = default; @@ -2939,7 +2943,10 @@ AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac, bool IsForDebug, bool ShouldPreserveUseListOrder) : Out(o), TheModule(M), Machine(Mac), TypePrinter(M), AnnotationWriter(AAW), IsForDebug(IsForDebug), - ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) { + ShouldPreserveUseListOrder( + PreserveAssemblyUseListOrder.getNumOccurrences() + ? PreserveAssemblyUseListOrder + : ShouldPreserveUseListOrder) { if (!TheModule) return; for (const GlobalObject &GO : TheModule->global_objects()) @@ -2950,7 +2957,8 @@ AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac, AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac, const ModuleSummaryIndex *Index, bool IsForDebug) : Out(o), TheIndex(Index), Machine(Mac), TypePrinter(/*Module=*/nullptr), - IsForDebug(IsForDebug), ShouldPreserveUseListOrder(false) {} + IsForDebug(IsForDebug), + ShouldPreserveUseListOrder(PreserveAssemblyUseListOrder) {} void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) { if (!Operand) { diff --git a/llvm/lib/IR/ConstantFPRange.cpp b/llvm/lib/IR/ConstantFPRange.cpp index 2477e22..070e833 100644 --- a/llvm/lib/IR/ConstantFPRange.cpp +++ b/llvm/lib/IR/ConstantFPRange.cpp @@ -326,6 +326,8 @@ std::optional<bool> ConstantFPRange::getSignBit() const { } bool ConstantFPRange::operator==(const ConstantFPRange &CR) const { + assert(&getSemantics() == &CR.getSemantics() && + "Should only use the same semantics"); if (MayBeSNaN != CR.MayBeSNaN || MayBeQNaN != CR.MayBeQNaN) return false; return Lower.bitwiseIsEqual(CR.Lower) && Upper.bitwiseIsEqual(CR.Upper); @@ -425,3 +427,20 @@ ConstantFPRange ConstantFPRange::getWithoutInf() const { return ConstantFPRange(std::move(NewLower), std::move(NewUpper), MayBeQNaN, MayBeSNaN); } + +ConstantFPRange ConstantFPRange::cast(const fltSemantics &DstSem, + APFloat::roundingMode RM) const { + bool LosesInfo; + APFloat NewLower = Lower; + APFloat NewUpper = Upper; + // For conservative, return full range if conversion is invalid. + if (NewLower.convert(DstSem, RM, &LosesInfo) == APFloat::opInvalidOp || + NewLower.isNaN()) + return getFull(DstSem); + if (NewUpper.convert(DstSem, RM, &LosesInfo) == APFloat::opInvalidOp || + NewUpper.isNaN()) + return getFull(DstSem); + return ConstantFPRange(std::move(NewLower), std::move(NewUpper), + /*MayBeQNaNVal=*/MayBeQNaN || MayBeSNaN, + /*MayBeSNaNVal=*/false); +} diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 7294f3e..fbce3b0 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -18640,7 +18640,7 @@ bool AArch64TargetLowering::isDesirableToCommuteXorWithShift( } bool AArch64TargetLowering::shouldFoldConstantShiftPairToMask( - const SDNode *N, CombineLevel Level) const { + const SDNode *N) const { assert(((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index e472e7d..00956fd 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -300,8 +300,7 @@ public: bool isDesirableToCommuteXorWithShift(const SDNode *N) const override; /// Return true if it is profitable to fold a pair of shifts into a mask. - bool shouldFoldConstantShiftPairToMask(const SDNode *N, - CombineLevel Level) const override; + bool shouldFoldConstantShiftPairToMask(const SDNode *N) const override; /// Return true if it is profitable to fold a pair of shifts into a mask. bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override { diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 83c7def..67ea2dd 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -13816,7 +13816,7 @@ bool ARMTargetLowering::isDesirableToCommuteXorWithShift( } bool ARMTargetLowering::shouldFoldConstantShiftPairToMask( - const SDNode *N, CombineLevel Level) const { + const SDNode *N) const { assert(((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && @@ -13826,7 +13826,8 @@ bool ARMTargetLowering::shouldFoldConstantShiftPairToMask( if (!Subtarget->isThumb1Only()) return true; - if (Level == BeforeLegalizeTypes) + EVT VT = N->getValueType(0); + if (VT.getScalarSizeInBits() > 32) return true; return false; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 26ff54c..70aa001 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -772,8 +772,7 @@ class VectorType; bool isDesirableToCommuteXorWithShift(const SDNode *N) const override; - bool shouldFoldConstantShiftPairToMask(const SDNode *N, - CombineLevel Level) const override; + bool shouldFoldConstantShiftPairToMask(const SDNode *N) const override; /// Return true if it is profitable to fold a pair of shifts into a mask. bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override { diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp index b05de49..7f1ff45 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -1306,7 +1306,7 @@ bool MipsTargetLowering::hasBitTest(SDValue X, SDValue Y) const { } bool MipsTargetLowering::shouldFoldConstantShiftPairToMask( - const SDNode *N, CombineLevel Level) const { + const SDNode *N) const { assert(((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h index c65c76c..25a0bf9 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.h +++ b/llvm/lib/Target/Mips/MipsISelLowering.h @@ -290,8 +290,7 @@ class TargetRegisterClass; bool isCheapToSpeculateCttz(Type *Ty) const override; bool isCheapToSpeculateCtlz(Type *Ty) const override; bool hasBitTest(SDValue X, SDValue Y) const override; - bool shouldFoldConstantShiftPairToMask(const SDNode *N, - CombineLevel Level) const override; + bool shouldFoldConstantShiftPairToMask(const SDNode *N) const override; /// Return the register type for a given MVT, ensuring vectors are treated /// as a series of gpr sized integers. diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index f692180..944a1e2 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -585,6 +585,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // We cannot sextinreg(i1). Expand to shifts. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + // Custom handling for PowerPC ucmp instruction + setOperationAction(ISD::UCMP, MVT::i32, Custom); + setOperationAction(ISD::UCMP, MVT::i64, isPPC64 ? Custom : Expand); + // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support // SjLj exception handling but a light-weight setjmp/longjmp replacement to // support continuation, user-level threading, and etc.. As a result, no @@ -12618,6 +12622,33 @@ SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const { return DAG.getMergeValues({Sub, OverflowTrunc}, dl); } +// Lower unsigned 3-way compare producing -1/0/1. +SDValue PPCTargetLowering::LowerUCMP(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue A = DAG.getFreeze(Op.getOperand(0)); + SDValue B = DAG.getFreeze(Op.getOperand(1)); + EVT OpVT = A.getValueType(); // operand type + EVT ResVT = Op.getValueType(); // result type + + // First compute diff = A - B (will become subf). + SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, A, B); + + // Generate B - A using SUBC to capture carry. + SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); + SDValue SubC = DAG.getNode(PPCISD::SUBC, DL, VTs, B, A); + SDValue CA0 = SubC.getValue(1); + + // t2 = A - B + CA0 using SUBE. + SDValue SubE1 = DAG.getNode(PPCISD::SUBE, DL, VTs, A, B, CA0); + SDValue CA1 = SubE1.getValue(1); + + // res = diff - t2 + CA1 using SUBE (produces desired -1/0/1). + SDValue ResPair = DAG.getNode(PPCISD::SUBE, DL, VTs, Diff, SubE1, CA1); + + // Extract the first result and truncate to result type if needed + return DAG.getSExtOrTrunc(ResPair.getValue(0), DL, ResVT); +} + /// LowerOperation - Provide custom lowering hooks for some operations. /// SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { @@ -12722,6 +12753,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::UADDO_CARRY: case ISD::USUBO_CARRY: return LowerADDSUBO_CARRY(Op, DAG); + case ISD::UCMP: + return LowerUCMP(Op, DAG); } } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 6694305..59f3387 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1318,6 +1318,7 @@ namespace llvm { SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG) const; SDValue LowerADDSUBO(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerUCMP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerToLibCall(const char *LibCallName, SDValue Op, SelectionDAG &DAG) const; SDValue lowerLibCallBasedOnType(const char *LibCallFloatName, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 7a14929..66717b9 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1367,9 +1367,8 @@ def : InstAlias<".insn_s $opcode, $funct3, $rs2, (${rs1})", class PatGpr<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT> : Pat<(vt (OpNode (vt GPR:$rs1))), (Inst GPR:$rs1)>; -class PatGprGpr<SDPatternOperator OpNode, RVInst Inst, ValueType vt1 = XLenVT, - ValueType vt2 = XLenVT> - : Pat<(vt1 (OpNode (vt1 GPR:$rs1), (vt2 GPR:$rs2))), (Inst GPR:$rs1, GPR:$rs2)>; +class PatGprGpr<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT> + : Pat<(vt (OpNode (vt GPR:$rs1), (vt GPR:$rs2))), (Inst GPR:$rs1, GPR:$rs2)>; class PatGprImm<SDPatternOperator OpNode, RVInst Inst, ImmLeaf ImmType, ValueType vt = XLenVT> @@ -1653,17 +1652,18 @@ def riscv_selectcc_frag : PatFrag<(ops node:$lhs, node:$rhs, node:$cc, node:$falsev), [{}], IntCCtoRISCVCC>; -multiclass SelectCC_GPR_rrirr<DAGOperand valty, ValueType vt> { +multiclass SelectCC_GPR_rrirr<DAGOperand valty, ValueType vt, + ValueType cmpvt = XLenVT> { let usesCustomInserter = 1 in def _Using_CC_GPR : Pseudo<(outs valty:$dst), (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, valty:$truev, valty:$falsev), [(set valty:$dst, - (riscv_selectcc_frag:$cc (XLenVT GPR:$lhs), GPR:$rhs, cond, + (riscv_selectcc_frag:$cc (cmpvt GPR:$lhs), GPR:$rhs, cond, (vt valty:$truev), valty:$falsev))]>; // Explicitly select 0 in the condition to X0. The register coalescer doesn't // always do it. - def : Pat<(riscv_selectcc_frag:$cc (XLenVT GPR:$lhs), 0, cond, (vt valty:$truev), + def : Pat<(riscv_selectcc_frag:$cc (cmpvt GPR:$lhs), 0, cond, (vt valty:$truev), valty:$falsev), (!cast<Instruction>(NAME#"_Using_CC_GPR") GPR:$lhs, (XLenVT X0), (IntCCtoRISCVCC $cc), valty:$truev, valty:$falsev)>; @@ -1972,8 +1972,9 @@ def PseudoZEXT_W : Pseudo<(outs GPR:$rd), (ins GPR:$rs), [], "zext.w", "$rd, $rs /// Loads -class LdPat<PatFrag LoadOp, RVInst Inst, ValueType vt = XLenVT> - : Pat<(vt (LoadOp (AddrRegImm (XLenVT GPRMem:$rs1), simm12_lo:$imm12))), +class LdPat<PatFrag LoadOp, RVInst Inst, ValueType vt = XLenVT, + ValueType PtrVT = XLenVT> + : Pat<(vt (LoadOp (AddrRegImm (PtrVT GPRMem:$rs1), simm12_lo:$imm12))), (Inst GPRMem:$rs1, simm12_lo:$imm12)>; def : LdPat<sextloadi8, LB>; @@ -1987,8 +1988,8 @@ def : LdPat<zextloadi16, LHU>; /// Stores class StPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy, - ValueType vt> - : Pat<(StoreOp (vt StTy:$rs2), (AddrRegImm (XLenVT GPRMem:$rs1), + ValueType vt, ValueType PtrVT = XLenVT> + : Pat<(StoreOp (vt StTy:$rs2), (AddrRegImm (PtrVT GPRMem:$rs1), simm12_lo:$imm12)), (Inst StTy:$rs2, GPRMem:$rs1, simm12_lo:$imm12)>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td index b9510ef..afac37d 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -59,9 +59,9 @@ def FPR64IN32X : RegisterOperand<GPRPair> { def DExt : ExtInfo<"", "", [HasStdExtD], f64, FPR64, FPR32, FPR64, ?>; def ZdinxExt : ExtInfo<"_INX", "Zfinx", [HasStdExtZdinx, IsRV64], - f64, FPR64INX, FPR32INX, FPR64INX, ?>; + f64, FPR64INX, FPR32INX, FPR64INX, ?, i64>; def Zdinx32Ext : ExtInfo<"_IN32X", "ZdinxRV32Only", [HasStdExtZdinx, IsRV32], - f64, FPR64IN32X, FPR32INX, FPR64IN32X, ?>; + f64, FPR64IN32X, FPR32INX, FPR64IN32X, ?, i32>; defvar DExts = [DExt, ZdinxExt, Zdinx32Ext]; defvar DExtsRV64 = [DExt, ZdinxExt]; @@ -261,8 +261,10 @@ let Predicates = [HasStdExtZdinx, IsRV32] in { /// Float conversion operations // f64 -> f32, f32 -> f64 -def : Pat<(any_fpround FPR64IN32X:$rs1), (FCVT_S_D_IN32X FPR64IN32X:$rs1, FRM_DYN)>; -def : Pat<(any_fpextend FPR32INX:$rs1), (FCVT_D_S_IN32X FPR32INX:$rs1, FRM_RNE)>; +def : Pat<(any_fpround FPR64IN32X:$rs1), + (FCVT_S_D_IN32X FPR64IN32X:$rs1, (i32 FRM_DYN))>; +def : Pat<(any_fpextend FPR32INX:$rs1), + (FCVT_D_S_IN32X FPR32INX:$rs1, (i32 FRM_RNE))>; } // Predicates = [HasStdExtZdinx, IsRV32] // [u]int<->double conversion patterns must be gated on IsRV32 or IsRV64, so @@ -321,7 +323,7 @@ def : Pat<(any_fsqrt FPR64INX:$rs1), (FSQRT_D_INX FPR64INX:$rs1, FRM_DYN)>; def : Pat<(fneg FPR64INX:$rs1), (FSGNJN_D_INX $rs1, $rs1)>; def : Pat<(fabs FPR64INX:$rs1), (FSGNJX_D_INX $rs1, $rs1)>; -def : Pat<(riscv_fclass FPR64INX:$rs1), (FCLASS_D_INX $rs1)>; +def : Pat<(i64 (riscv_fclass FPR64INX:$rs1)), (FCLASS_D_INX $rs1)>; def : PatFprFpr<fcopysign, FSGNJ_D_INX, FPR64INX, f64>; def : PatFprFpr<riscv_fsgnjx, FSGNJX_D_INX, FPR64INX, f64>; @@ -354,41 +356,46 @@ def : Pat<(fneg (any_fma_nsz FPR64INX:$rs1, FPR64INX:$rs2, FPR64INX:$rs3)), } // Predicates = [HasStdExtZdinx, IsRV64] let Predicates = [HasStdExtZdinx, IsRV32] in { -def : Pat<(any_fsqrt FPR64IN32X:$rs1), (FSQRT_D_IN32X FPR64IN32X:$rs1, FRM_DYN)>; +def : Pat<(any_fsqrt FPR64IN32X:$rs1), + (FSQRT_D_IN32X FPR64IN32X:$rs1, (i32 FRM_DYN))>; def : Pat<(fneg FPR64IN32X:$rs1), (FSGNJN_D_IN32X $rs1, $rs1)>; def : Pat<(fabs FPR64IN32X:$rs1), (FSGNJX_D_IN32X $rs1, $rs1)>; -def : Pat<(riscv_fclass FPR64IN32X:$rs1), (FCLASS_D_IN32X $rs1)>; +def : Pat<(i32 (riscv_fclass FPR64IN32X:$rs1)), (FCLASS_D_IN32X $rs1)>; def : PatFprFpr<fcopysign, FSGNJ_D_IN32X, FPR64IN32X, f64>; def : PatFprFpr<riscv_fsgnjx, FSGNJX_D_IN32X, FPR64IN32X, f64>; def : Pat<(fcopysign FPR64IN32X:$rs1, (fneg FPR64IN32X:$rs2)), (FSGNJN_D_IN32X FPR64IN32X:$rs1, FPR64IN32X:$rs2)>; def : Pat<(fcopysign FPR64IN32X:$rs1, FPR32INX:$rs2), - (FSGNJ_D_IN32X $rs1, (FCVT_D_S_IN32X $rs2, FRM_RNE))>; + (FSGNJ_D_IN32X $rs1, (FCVT_D_S_IN32X $rs2, (i32 FRM_RNE)))>; def : Pat<(fcopysign FPR32INX:$rs1, FPR64IN32X:$rs2), - (FSGNJ_S_INX $rs1, (FCVT_S_D_IN32X $rs2, FRM_DYN))>; + (FSGNJ_S_INX $rs1, (FCVT_S_D_IN32X $rs2, (i32 FRM_DYN)))>; // fmadd: rs1 * rs2 + rs3 def : Pat<(any_fma FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3), - (FMADD_D_IN32X $rs1, $rs2, $rs3, FRM_DYN)>; + (FMADD_D_IN32X $rs1, $rs2, $rs3, (i32 FRM_DYN))>; // fmsub: rs1 * rs2 - rs3 def : Pat<(any_fma FPR64IN32X:$rs1, FPR64IN32X:$rs2, (fneg FPR64IN32X:$rs3)), - (FMSUB_D_IN32X FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3, FRM_DYN)>; + (FMSUB_D_IN32X FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3, + (i32 FRM_DYN))>; // fnmsub: -rs1 * rs2 + rs3 def : Pat<(any_fma (fneg FPR64IN32X:$rs1), FPR64IN32X:$rs2, FPR64IN32X:$rs3), - (FNMSUB_D_IN32X FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3, FRM_DYN)>; + (FNMSUB_D_IN32X FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3, + (i32 FRM_DYN))>; // fnmadd: -rs1 * rs2 - rs3 def : Pat<(any_fma (fneg FPR64IN32X:$rs1), FPR64IN32X:$rs2, (fneg FPR64IN32X:$rs3)), - (FNMADD_D_IN32X FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3, FRM_DYN)>; + (FNMADD_D_IN32X FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3, + (i32 FRM_DYN))>; // fnmadd: -(rs1 * rs2 + rs3) (the nsz flag on the FMA) def : Pat<(fneg (any_fma_nsz FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3)), - (FNMADD_D_IN32X FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3, FRM_DYN)>; + (FNMADD_D_IN32X FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3, + (i32 FRM_DYN))>; } // Predicates = [HasStdExtZdinx, IsRV32] // The ratified 20191213 ISA spec defines fmin and fmax in a way that matches @@ -441,42 +448,42 @@ def : PatSetCC<FPR64, any_fsetccs, SETOLE, FLE_D, f64>; let Predicates = [HasStdExtZdinx, IsRV64] in { // Match signaling FEQ_D -def : Pat<(XLenVT (strict_fsetccs (f64 FPR64INX:$rs1), FPR64INX:$rs2, SETEQ)), +def : Pat<(XLenVT (strict_fsetccs FPR64INX:$rs1, FPR64INX:$rs2, SETEQ)), (AND (XLenVT (FLE_D_INX $rs1, $rs2)), (XLenVT (FLE_D_INX $rs2, $rs1)))>; -def : Pat<(XLenVT (strict_fsetccs (f64 FPR64INX:$rs1), FPR64INX:$rs2, SETOEQ)), +def : Pat<(XLenVT (strict_fsetccs FPR64INX:$rs1, FPR64INX:$rs2, SETOEQ)), (AND (XLenVT (FLE_D_INX $rs1, $rs2)), (XLenVT (FLE_D_INX $rs2, $rs1)))>; // If both operands are the same, use a single FLE. -def : Pat<(XLenVT (strict_fsetccs (f64 FPR64INX:$rs1), FPR64INX:$rs1, SETEQ)), +def : Pat<(XLenVT (strict_fsetccs FPR64INX:$rs1, FPR64INX:$rs1, SETEQ)), (FLE_D_INX $rs1, $rs1)>; -def : Pat<(XLenVT (strict_fsetccs (f64 FPR64INX:$rs1), FPR64INX:$rs1, SETOEQ)), +def : Pat<(XLenVT (strict_fsetccs FPR64INX:$rs1, FPR64INX:$rs1, SETOEQ)), (FLE_D_INX $rs1, $rs1)>; -def : PatSetCC<FPR64INX, any_fsetccs, SETLT, FLT_D_INX, f64>; -def : PatSetCC<FPR64INX, any_fsetccs, SETOLT, FLT_D_INX, f64>; -def : PatSetCC<FPR64INX, any_fsetccs, SETLE, FLE_D_INX, f64>; -def : PatSetCC<FPR64INX, any_fsetccs, SETOLE, FLE_D_INX, f64>; +def : PatSetCC<FPR64INX, any_fsetccs, SETLT, FLT_D_INX, f64, i64>; +def : PatSetCC<FPR64INX, any_fsetccs, SETOLT, FLT_D_INX, f64, i64>; +def : PatSetCC<FPR64INX, any_fsetccs, SETLE, FLE_D_INX, f64, i64>; +def : PatSetCC<FPR64INX, any_fsetccs, SETOLE, FLE_D_INX, f64, i64>; } // Predicates = [HasStdExtZdinx, IsRV64] let Predicates = [HasStdExtZdinx, IsRV32] in { // Match signaling FEQ_D -def : Pat<(XLenVT (strict_fsetccs (f64 FPR64IN32X:$rs1), FPR64IN32X:$rs2, SETEQ)), +def : Pat<(i32 (strict_fsetccs FPR64IN32X:$rs1, FPR64IN32X:$rs2, SETEQ)), (AND (XLenVT (FLE_D_IN32X $rs1, $rs2)), (XLenVT (FLE_D_IN32X $rs2, $rs1)))>; -def : Pat<(XLenVT (strict_fsetccs (f64 FPR64IN32X:$rs1), FPR64IN32X:$rs2, SETOEQ)), +def : Pat<(i32 (strict_fsetccs FPR64IN32X:$rs1, FPR64IN32X:$rs2, SETOEQ)), (AND (XLenVT (FLE_D_IN32X $rs1, $rs2)), (XLenVT (FLE_D_IN32X $rs2, $rs1)))>; // If both operands are the same, use a single FLE. -def : Pat<(XLenVT (strict_fsetccs (f64 FPR64IN32X:$rs1), FPR64IN32X:$rs1, SETEQ)), +def : Pat<(i32 (strict_fsetccs FPR64IN32X:$rs1, FPR64IN32X:$rs1, SETEQ)), (FLE_D_IN32X $rs1, $rs1)>; -def : Pat<(XLenVT (strict_fsetccs (f64 FPR64IN32X:$rs1), FPR64IN32X:$rs1, SETOEQ)), +def : Pat<(i32 (strict_fsetccs FPR64IN32X:$rs1, FPR64IN32X:$rs1, SETOEQ)), (FLE_D_IN32X $rs1, $rs1)>; -def : PatSetCC<FPR64IN32X, any_fsetccs, SETLT, FLT_D_IN32X, f64>; -def : PatSetCC<FPR64IN32X, any_fsetccs, SETOLT, FLT_D_IN32X, f64>; -def : PatSetCC<FPR64IN32X, any_fsetccs, SETLE, FLE_D_IN32X, f64>; -def : PatSetCC<FPR64IN32X, any_fsetccs, SETOLE, FLE_D_IN32X, f64>; +def : PatSetCC<FPR64IN32X, any_fsetccs, SETLT, FLT_D_IN32X, f64, i32>; +def : PatSetCC<FPR64IN32X, any_fsetccs, SETOLT, FLT_D_IN32X, f64, i32>; +def : PatSetCC<FPR64IN32X, any_fsetccs, SETLE, FLE_D_IN32X, f64, i32>; +def : PatSetCC<FPR64IN32X, any_fsetccs, SETOLE, FLE_D_IN32X, f64, i32>; } // Predicates = [HasStdExtZdinx, IsRV32] let Predicates = [HasStdExtD] in { @@ -511,7 +518,7 @@ def SplitF64Pseudo } // Predicates = [HasStdExtD, NoStdExtZfa, IsRV32] let Predicates = [HasStdExtZdinx, IsRV64] in { -defm Select_FPR64INX : SelectCC_GPR_rrirr<FPR64INX, f64>; +defm Select_FPR64INX : SelectCC_GPR_rrirr<FPR64INX, f64, i64>; def PseudoFROUND_D_INX : PseudoFROUND<FPR64INX, f64>; @@ -523,9 +530,9 @@ def : StPat<store, SD, GPR, f64>; } // Predicates = [HasStdExtZdinx, IsRV64] let Predicates = [HasStdExtZdinx, IsRV32] in { -defm Select_FPR64IN32X : SelectCC_GPR_rrirr<FPR64IN32X, f64>; +defm Select_FPR64IN32X : SelectCC_GPR_rrirr<FPR64IN32X, f64, i32>; -def PseudoFROUND_D_IN32X : PseudoFROUND<FPR64IN32X, f64>; +def PseudoFROUND_D_IN32X : PseudoFROUND<FPR64IN32X, f64, i32>; /// Loads let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 1 in @@ -537,8 +544,8 @@ def PseudoRV32ZdinxSD : Pseudo<(outs), (ins GPRPair:$rs2, GPRNoX0:$rs1, simm12_l } // Predicates = [HasStdExtZdinx, IsRV32] let Predicates = [HasStdExtZdinx, HasStdExtZilsd, IsRV32] in { -def : LdPat<load, LD_RV32, f64>; -def : StPat<store, SD_RV32, GPRPair, f64>; +def : LdPat<load, LD_RV32, f64, i32>; +def : StPat<store, SD_RV32, GPRPair, f64, i32>; } let Predicates = [HasStdExtD, IsRV32] in { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td index fde030e..6571d99 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -131,7 +131,7 @@ def FPR32INX : RegisterOperand<GPRF32> { // The DAGOperand can be unset if the predicates are not enough to define it. class ExtInfo<string suffix, string space, list<Predicate> predicates, ValueType primaryvt, DAGOperand primaryty, DAGOperand f32ty, - DAGOperand f64ty, DAGOperand f16ty> { + DAGOperand f64ty, DAGOperand f16ty, ValueType intvt = XLenVT> { list<Predicate> Predicates = predicates; string Suffix = suffix; string Space = space; @@ -140,6 +140,7 @@ class ExtInfo<string suffix, string space, list<Predicate> predicates, DAGOperand F32Ty = f32ty; DAGOperand F64Ty = f64ty; ValueType PrimaryVT = primaryvt; + ValueType IntVT = intvt; } def FExt : ExtInfo<"", "", [HasStdExtF], f32, FPR32, FPR32, ?, ?>; @@ -314,9 +315,9 @@ multiclass FPCmp_rr_m<bits<7> funct7, bits<3> funct3, string opcodestr, def Ext.Suffix : FPCmp_rr<funct7, funct3, opcodestr, Ext.PrimaryTy, Commutable>; } -class PseudoFROUND<DAGOperand Ty, ValueType vt> +class PseudoFROUND<DAGOperand Ty, ValueType vt, ValueType intvt = XLenVT> : Pseudo<(outs Ty:$rd), (ins Ty:$rs1, Ty:$rs2, ixlenimm:$rm), - [(set Ty:$rd, (vt (riscv_fround Ty:$rs1, Ty:$rs2, timm:$rm)))]> { + [(set Ty:$rd, (vt (riscv_fround Ty:$rs1, Ty:$rs2, (intvt timm:$rm))))]> { let hasSideEffects = 0; let mayLoad = 0; let mayStore = 0; @@ -529,13 +530,14 @@ def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>; /// Generic pattern classes class PatSetCC<DAGOperand Ty, SDPatternOperator OpNode, CondCode Cond, - RVInstCommon Inst, ValueType vt> - : Pat<(XLenVT (OpNode (vt Ty:$rs1), Ty:$rs2, Cond)), (Inst $rs1, $rs2)>; + RVInstCommon Inst, ValueType vt, ValueType intvt = XLenVT> + : Pat<(intvt (OpNode (vt Ty:$rs1), Ty:$rs2, Cond)), (Inst $rs1, $rs2)>; multiclass PatSetCC_m<SDPatternOperator OpNode, CondCode Cond, RVInstCommon Inst, ExtInfo Ext> { let Predicates = Ext.Predicates in def Ext.Suffix : PatSetCC<Ext.PrimaryTy, OpNode, Cond, - !cast<RVInstCommon>(Inst#Ext.Suffix), Ext.PrimaryVT>; + !cast<RVInstCommon>(Inst#Ext.Suffix), + Ext.PrimaryVT, Ext.IntVT>; } class PatFprFpr<SDPatternOperator OpNode, RVInstR Inst, @@ -549,14 +551,15 @@ multiclass PatFprFpr_m<SDPatternOperator OpNode, RVInstR Inst, } class PatFprFprDynFrm<SDPatternOperator OpNode, RVInstRFrm Inst, - DAGOperand RegTy, ValueType vt> - : Pat<(OpNode (vt RegTy:$rs1), (vt RegTy:$rs2)), (Inst $rs1, $rs2, FRM_DYN)>; + DAGOperand RegTy, ValueType vt, ValueType intvt> + : Pat<(OpNode (vt RegTy:$rs1), (vt RegTy:$rs2)), + (Inst $rs1, $rs2,(intvt FRM_DYN))>; multiclass PatFprFprDynFrm_m<SDPatternOperator OpNode, RVInstRFrm Inst, ExtInfo Ext> { let Predicates = Ext.Predicates in def Ext.Suffix : PatFprFprDynFrm<OpNode, !cast<RVInstRFrm>(Inst#Ext.Suffix), - Ext.PrimaryTy, Ext.PrimaryVT>; + Ext.PrimaryTy, Ext.PrimaryVT, Ext.IntVT>; } /// Float conversion operations diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td index d8f5d3e..aa8f1a1 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td @@ -669,19 +669,19 @@ let Predicates = [HasVendorXCValu, IsRV32] in { // Patterns for load & store operations //===----------------------------------------------------------------------===// class CVLdrrPat<PatFrag LoadOp, RVInst Inst> - : Pat<(XLenVT (LoadOp CVrr:$regreg)), + : Pat<(i32 (LoadOp CVrr:$regreg)), (Inst CVrr:$regreg)>; class CVStriPat<PatFrag StoreOp, RVInst Inst> - : Pat<(StoreOp (XLenVT GPR:$rs2), GPR:$rs1, simm12_lo:$imm12), + : Pat<(StoreOp (i32 GPR:$rs2), GPR:$rs1, simm12_lo:$imm12), (Inst GPR:$rs2, GPR:$rs1, simm12_lo:$imm12)>; class CVStrriPat<PatFrag StoreOp, RVInst Inst> - : Pat<(StoreOp (XLenVT GPR:$rs2), GPR:$rs1, GPR:$rs3), + : Pat<(StoreOp (i32 GPR:$rs2), GPR:$rs1, GPR:$rs3), (Inst GPR:$rs2, GPR:$rs1, GPR:$rs3)>; class CVStrrPat<PatFrag StoreOp, RVInst Inst> - : Pat<(StoreOp (XLenVT GPR:$rs2), CVrr:$regreg), + : Pat<(StoreOp (i32 GPR:$rs2), CVrr:$regreg), (Inst GPR:$rs2, CVrr:$regreg)>; let Predicates = [HasVendorXCVmem, IsRV32], AddedComplexity = 1 in { @@ -725,17 +725,17 @@ let Predicates = [HasVendorXCVbitmanip, IsRV32] in { (CV_INSERT GPR:$rd, GPR:$rs1, (CV_HI5 cv_uimm10:$imm), (CV_LO5 cv_uimm10:$imm))>; - def : PatGpr<cttz, CV_FF1>; - def : PatGpr<ctlz, CV_FL1>; + def : PatGpr<cttz, CV_FF1, i32>; + def : PatGpr<ctlz, CV_FL1, i32>; def : PatGpr<int_riscv_cv_bitmanip_clb, CV_CLB>; - def : PatGpr<ctpop, CV_CNT>; + def : PatGpr<ctpop, CV_CNT, i32>; - def : PatGprGpr<rotr, CV_ROR>; + def : PatGprGpr<rotr, CV_ROR, i32>; def : Pat<(int_riscv_cv_bitmanip_bitrev GPR:$rs1, cv_tuimm5:$pts, cv_tuimm2:$radix), (CV_BITREV GPR:$rs1, cv_tuimm2:$radix, cv_tuimm5:$pts)>; - def : Pat<(bitreverse (XLenVT GPR:$rs)), (CV_BITREV GPR:$rs, 0, 0)>; + def : Pat<(bitreverse (i32 GPR:$rs)), (CV_BITREV GPR:$rs, 0, 0)>; } class PatCoreVAluGpr<string intr, string asm> : @@ -760,18 +760,18 @@ multiclass PatCoreVAluGprGprImm<Intrinsic intr> { } let Predicates = [HasVendorXCValu, IsRV32], AddedComplexity = 1 in { - def : PatGpr<abs, CV_ABS>; - def : PatGprGpr<setle, CV_SLE>; - def : PatGprGpr<setule, CV_SLEU>; - def : PatGprGpr<smin, CV_MIN>; - def : PatGprGpr<umin, CV_MINU>; - def : PatGprGpr<smax, CV_MAX>; - def : PatGprGpr<umax, CV_MAXU>; - - def : Pat<(sext_inreg (XLenVT GPR:$rs1), i16), (CV_EXTHS GPR:$rs1)>; - def : Pat<(sext_inreg (XLenVT GPR:$rs1), i8), (CV_EXTBS GPR:$rs1)>; - def : Pat<(and (XLenVT GPR:$rs1), 0xffff), (CV_EXTHZ GPR:$rs1)>; - def : Pat<(and (XLenVT GPR:$rs1), 0xff), (CV_EXTBZ GPR:$rs1)>; + def : PatGpr<abs, CV_ABS, i32>; + def : PatGprGpr<setle, CV_SLE, i32>; + def : PatGprGpr<setule, CV_SLEU, i32>; + def : PatGprGpr<smin, CV_MIN, i32>; + def : PatGprGpr<umin, CV_MINU, i32>; + def : PatGprGpr<smax, CV_MAX, i32>; + def : PatGprGpr<umax, CV_MAXU, i32>; + + def : Pat<(sext_inreg (i32 GPR:$rs1), i16), (CV_EXTHS GPR:$rs1)>; + def : Pat<(sext_inreg (i32 GPR:$rs1), i8), (CV_EXTBS GPR:$rs1)>; + def : Pat<(and (i32 GPR:$rs1), 0xffff), (CV_EXTHZ GPR:$rs1)>; + def : Pat<(and (i32 GPR:$rs1), 0xff), (CV_EXTBZ GPR:$rs1)>; defm CLIP : PatCoreVAluGprImm<int_riscv_cv_alu_clip>; defm CLIPU : PatCoreVAluGprImm<int_riscv_cv_alu_clipu>; @@ -790,9 +790,9 @@ let Predicates = [HasVendorXCValu, IsRV32], AddedComplexity = 1 in { //===----------------------------------------------------------------------===// let Predicates = [HasVendorXCVbi, IsRV32], AddedComplexity = 2 in { - def : Pat<(riscv_brcc GPR:$rs1, simm5:$imm5, SETEQ, bb:$imm12), + def : Pat<(riscv_brcc (i32 GPR:$rs1), simm5:$imm5, SETEQ, bb:$imm12), (CV_BEQIMM GPR:$rs1, simm5:$imm5, bare_simm13_lsb0_bb:$imm12)>; - def : Pat<(riscv_brcc GPR:$rs1, simm5:$imm5, SETNE, bb:$imm12), + def : Pat<(riscv_brcc (i32 GPR:$rs1), simm5:$imm5, SETNE, bb:$imm12), (CV_BNEIMM GPR:$rs1, simm5:$imm5, bare_simm13_lsb0_bb:$imm12)>; defm CC_SImm5_CV : SelectCC_GPR_riirr<GPR, simm5>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td index 5e1d07a..4537bfe 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td @@ -1648,10 +1648,10 @@ def : Pat<(qc_setwmi (i32 GPR:$rs3), GPR:$rs1, tuimm5nonzero:$uimm5, tuimm7_lsb0 } // Predicates = [HasVendorXqcilsm, IsRV32] let Predicates = [HasVendorXqcili, IsRV32] in { -def: Pat<(qc_e_li tglobaladdr:$A), (QC_E_LI bare_simm32:$A)>; -def: Pat<(qc_e_li tblockaddress:$A), (QC_E_LI bare_simm32:$A)>; -def: Pat<(qc_e_li tjumptable:$A), (QC_E_LI bare_simm32:$A)>; -def: Pat<(qc_e_li tconstpool:$A), (QC_E_LI bare_simm32:$A)>; +def: Pat<(i32 (qc_e_li tglobaladdr:$A)), (QC_E_LI bare_simm32:$A)>; +def: Pat<(i32 (qc_e_li tblockaddress:$A)), (QC_E_LI bare_simm32:$A)>; +def: Pat<(i32 (qc_e_li tjumptable:$A)), (QC_E_LI bare_simm32:$A)>; +def: Pat<(i32 (qc_e_li tconstpool:$A)), (QC_E_LI bare_simm32:$A)>; } // Predicates = [HasVendorXqcili, IsRV32] //===----------------------------------------------------------------------===/i diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td index 014da99..c31713e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -69,16 +69,16 @@ def ZhinxminExt : ExtInfo<"_INX", "Zfinx", f16, FPR16INX, FPR32INX, ?, FPR16INX>; def ZhinxZdinxExt : ExtInfo<"_INX", "Zfinx", [HasStdExtZhinx, HasStdExtZdinx, IsRV64], - ?, ?, FPR32INX, FPR64INX, FPR16INX>; + ?, ?, FPR32INX, FPR64INX, FPR16INX, i64>; def ZhinxminZdinxExt : ExtInfo<"_INX", "Zfinx", [HasStdExtZhinxmin, HasStdExtZdinx, IsRV64], - ?, ?, FPR32INX, FPR64INX, FPR16INX>; + ?, ?, FPR32INX, FPR64INX, FPR16INX, i64>; def ZhinxZdinx32Ext : ExtInfo<"_IN32X", "ZdinxGPRPairRV32", [HasStdExtZhinx, HasStdExtZdinx, IsRV32], - ?, ?, FPR32INX, FPR64IN32X, FPR16INX>; + ?, ?, FPR32INX, FPR64IN32X, FPR16INX, i32>; def ZhinxminZdinx32Ext : ExtInfo<"_IN32X", "ZdinxGPRPairRV32", [HasStdExtZhinxmin, HasStdExtZdinx, IsRV32], - ?, ?, FPR32INX, FPR64IN32X, FPR16INX>; + ?, ?, FPR32INX, FPR64IN32X, FPR16INX, i32>; defvar ZfhExts = [ZfhExt, ZhinxExt]; defvar ZfhminExts = [ZfhminExt, ZhinxminExt]; @@ -607,13 +607,16 @@ def : Pat<(fcopysign FPR64:$rs1, (f16 FPR16:$rs2)), (FSGNJ_D $rs1, (FCVT_D_H $rs let Predicates = [HasStdExtZhinxmin, HasStdExtZdinx, IsRV32] in { /// Float conversion operations // f64 -> f16, f16 -> f64 -def : Pat<(any_fpround FPR64IN32X:$rs1), (FCVT_H_D_IN32X FPR64IN32X:$rs1, FRM_DYN)>; -def : Pat<(any_fpextend FPR16INX:$rs1), (FCVT_D_H_IN32X FPR16INX:$rs1, FRM_RNE)>; +def : Pat<(any_fpround FPR64IN32X:$rs1), + (FCVT_H_D_IN32X FPR64IN32X:$rs1, (i32 FRM_DYN))>; +def : Pat<(any_fpextend FPR16INX:$rs1), + (FCVT_D_H_IN32X FPR16INX:$rs1, (i32 FRM_RNE))>; /// Float arithmetic operations def : Pat<(fcopysign FPR16INX:$rs1, FPR64IN32X:$rs2), - (FSGNJ_H_INX $rs1, (FCVT_H_D_IN32X $rs2, 0b111))>; -def : Pat<(fcopysign FPR64IN32X:$rs1, FPR16INX:$rs2), (FSGNJ_D_IN32X $rs1, (FCVT_D_H_IN32X $rs2, FRM_RNE))>; + (FSGNJ_H_INX $rs1, (FCVT_H_D_IN32X $rs2, (i32 FRM_DYN)))>; +def : Pat<(fcopysign FPR64IN32X:$rs1, FPR16INX:$rs2), + (FSGNJ_D_IN32X $rs1, (FCVT_D_H_IN32X $rs2, (i32 FRM_RNE)))>; } // Predicates = [HasStdExtZhinxmin, HasStdExtZdinx, IsRV32] let Predicates = [HasStdExtZhinxmin, HasStdExtZdinx, IsRV64] in { diff --git a/llvm/lib/Target/SPIRV/CMakeLists.txt b/llvm/lib/Target/SPIRV/CMakeLists.txt index 46afe03..eab7b21 100644 --- a/llvm/lib/Target/SPIRV/CMakeLists.txt +++ b/llvm/lib/Target/SPIRV/CMakeLists.txt @@ -36,6 +36,7 @@ add_llvm_target(SPIRVCodeGen SPIRVMetadata.cpp SPIRVModuleAnalysis.cpp SPIRVStructurizer.cpp + SPIRVCombinerHelper.cpp SPIRVPreLegalizer.cpp SPIRVPreLegalizerCombiner.cpp SPIRVPostLegalizer.cpp diff --git a/llvm/lib/Target/SPIRV/SPIRVCombine.td b/llvm/lib/Target/SPIRV/SPIRVCombine.td index 6f726e0..fde56c4 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCombine.td +++ b/llvm/lib/Target/SPIRV/SPIRVCombine.td @@ -11,8 +11,8 @@ include "llvm/Target/GlobalISel/Combine.td" def vector_length_sub_to_distance_lowering : GICombineRule < (defs root:$root), (match (wip_match_opcode G_INTRINSIC):$root, - [{ return matchLengthToDistance(*${root}, MRI); }]), - (apply [{ applySPIRVDistance(*${root}, MRI, B); }]) + [{ return Helper.matchLengthToDistance(*${root}); }]), + (apply [{ Helper.applySPIRVDistance(*${root}); }]) >; def SPIRVPreLegalizerCombiner diff --git a/llvm/lib/Target/SPIRV/SPIRVCombinerHelper.cpp b/llvm/lib/Target/SPIRV/SPIRVCombinerHelper.cpp new file mode 100644 index 0000000..267794c --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVCombinerHelper.cpp @@ -0,0 +1,60 @@ +//===-- SPIRVCombinerHelper.cpp -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SPIRVCombinerHelper.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" +#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" +#include "llvm/IR/IntrinsicsSPIRV.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; +using namespace MIPatternMatch; + +SPIRVCombinerHelper::SPIRVCombinerHelper( + GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, + GISelValueTracking *VT, MachineDominatorTree *MDT, const LegalizerInfo *LI, + const SPIRVSubtarget &STI) + : CombinerHelper(Observer, B, IsPreLegalize, VT, MDT, LI), STI(STI) {} + +/// This match is part of a combine that +/// rewrites length(X - Y) to distance(X, Y) +/// (f32 (g_intrinsic length +/// (g_fsub (vXf32 X) (vXf32 Y)))) +/// -> +/// (f32 (g_intrinsic distance +/// (vXf32 X) (vXf32 Y))) +/// +bool SPIRVCombinerHelper::matchLengthToDistance(MachineInstr &MI) const { + if (MI.getOpcode() != TargetOpcode::G_INTRINSIC || + cast<GIntrinsic>(MI).getIntrinsicID() != Intrinsic::spv_length) + return false; + + // First operand of MI is `G_INTRINSIC` so start at operand 2. + Register SubReg = MI.getOperand(2).getReg(); + MachineInstr *SubInstr = MRI.getVRegDef(SubReg); + if (SubInstr->getOpcode() != TargetOpcode::G_FSUB) + return false; + + return true; +} + +void SPIRVCombinerHelper::applySPIRVDistance(MachineInstr &MI) const { + // Extract the operands for X and Y from the match criteria. + Register SubDestReg = MI.getOperand(2).getReg(); + MachineInstr *SubInstr = MRI.getVRegDef(SubDestReg); + Register SubOperand1 = SubInstr->getOperand(1).getReg(); + Register SubOperand2 = SubInstr->getOperand(2).getReg(); + Register ResultReg = MI.getOperand(0).getReg(); + + Builder.setInstrAndDebugLoc(MI); + Builder.buildIntrinsic(Intrinsic::spv_distance, ResultReg) + .addUse(SubOperand1) + .addUse(SubOperand2); + + MI.eraseFromParent(); +} diff --git a/llvm/lib/Target/SPIRV/SPIRVCombinerHelper.h b/llvm/lib/Target/SPIRV/SPIRVCombinerHelper.h new file mode 100644 index 0000000..0b39d34 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVCombinerHelper.h @@ -0,0 +1,38 @@ +//===-- SPIRVCombinerHelper.h -----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// This contains common combine transformations that may be used in a combine +/// pass. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVCOMBINERHELPER_H +#define LLVM_LIB_TARGET_SPIRV_SPIRVCOMBINERHELPER_H + +#include "SPIRVSubtarget.h" +#include "llvm/CodeGen/GlobalISel/CombinerHelper.h" + +namespace llvm { +class SPIRVCombinerHelper : public CombinerHelper { +protected: + const SPIRVSubtarget &STI; + +public: + using CombinerHelper::CombinerHelper; + SPIRVCombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, + bool IsPreLegalize, GISelValueTracking *VT, + MachineDominatorTree *MDT, const LegalizerInfo *LI, + const SPIRVSubtarget &STI); + + bool matchLengthToDistance(MachineInstr &MI) const; + void applySPIRVDistance(MachineInstr &MI) const; +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_SPIRV_SPIRVCOMBINERHELPER_H diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 989950f..a466ab2 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -316,6 +316,9 @@ private: bool selectImageWriteIntrinsic(MachineInstr &I) const; bool selectResourceGetPointer(Register &ResVReg, const SPIRVType *ResType, MachineInstr &I) const; + bool selectResourceNonUniformIndex(Register &ResVReg, + const SPIRVType *ResType, + MachineInstr &I) const; bool selectModf(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const; bool selectUpdateCounter(Register &ResVReg, const SPIRVType *ResType, @@ -347,7 +350,7 @@ private: SPIRV::StorageClass::StorageClass SC, uint32_t Set, uint32_t Binding, uint32_t ArraySize, Register IndexReg, - bool IsNonUniform, StringRef Name, + StringRef Name, MachineIRBuilder MIRBuilder) const; SPIRVType *widenTypeToVec4(const SPIRVType *Type, MachineInstr &I) const; bool extractSubvector(Register &ResVReg, const SPIRVType *ResType, @@ -364,6 +367,7 @@ private: MachineInstr &I) const; bool loadHandleBeforePosition(Register &HandleReg, const SPIRVType *ResType, GIntrinsic &HandleDef, MachineInstr &Pos) const; + void decorateUsesAsNonUniform(Register &NonUniformReg) const; }; bool sampledTypeIsSignedInteger(const llvm::Type *HandleType) { @@ -3465,6 +3469,9 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, case Intrinsic::spv_discard: { return selectDiscard(ResVReg, ResType, I); } + case Intrinsic::spv_resource_nonuniformindex: { + return selectResourceNonUniformIndex(ResVReg, ResType, I); + } default: { std::string DiagMsg; raw_string_ostream OS(DiagMsg); @@ -3504,7 +3511,6 @@ bool SPIRVInstructionSelector::selectCounterHandleFromBinding( uint32_t Binding = getIConstVal(Intr.getOperand(3).getReg(), MRI); uint32_t ArraySize = getIConstVal(MainHandleDef->getOperand(4).getReg(), MRI); Register IndexReg = MainHandleDef->getOperand(5).getReg(); - const bool IsNonUniform = false; std::string CounterName = getStringValueFromReg(MainHandleDef->getOperand(6).getReg(), *MRI) + ".counter"; @@ -3513,7 +3519,7 @@ bool SPIRVInstructionSelector::selectCounterHandleFromBinding( MachineIRBuilder MIRBuilder(I); Register CounterVarReg = buildPointerToResource( GR.getPointeeType(ResType), GR.getPointerStorageClass(ResType), Set, - Binding, ArraySize, IndexReg, IsNonUniform, CounterName, MIRBuilder); + Binding, ArraySize, IndexReg, CounterName, MIRBuilder); return BuildCOPY(ResVReg, CounterVarReg, I); } @@ -3713,6 +3719,55 @@ bool SPIRVInstructionSelector::selectResourceGetPointer( .constrainAllUses(TII, TRI, RBI); } +bool SPIRVInstructionSelector::selectResourceNonUniformIndex( + Register &ResVReg, const SPIRVType *ResType, MachineInstr &I) const { + Register ObjReg = I.getOperand(2).getReg(); + if (!BuildCOPY(ResVReg, ObjReg, I)) + return false; + + buildOpDecorate(ResVReg, I, TII, SPIRV::Decoration::NonUniformEXT, {}); + // Check for the registers that use the index marked as non-uniform + // and recursively mark them as non-uniform. + // Per the spec, it's necessary that the final argument used for + // load/store/sample/atomic must be decorated, so we need to propagate the + // decoration through access chains and copies. + // https://docs.vulkan.org/samples/latest/samples/extensions/descriptor_indexing/README.html#_when_to_use_non_uniform_indexing_qualifier + decorateUsesAsNonUniform(ResVReg); + return true; +} + +void SPIRVInstructionSelector::decorateUsesAsNonUniform( + Register &NonUniformReg) const { + llvm::SmallVector<Register> WorkList = {NonUniformReg}; + while (WorkList.size() > 0) { + Register CurrentReg = WorkList.back(); + WorkList.pop_back(); + + bool IsDecorated = false; + for (MachineInstr &Use : MRI->use_instructions(CurrentReg)) { + if (Use.getOpcode() == SPIRV::OpDecorate && + Use.getOperand(1).getImm() == SPIRV::Decoration::NonUniformEXT) { + IsDecorated = true; + continue; + } + // Check if the instruction has the result register and add it to the + // worklist. + if (Use.getOperand(0).isReg() && Use.getOperand(0).isDef()) { + Register ResultReg = Use.getOperand(0).getReg(); + if (ResultReg == CurrentReg) + continue; + WorkList.push_back(ResultReg); + } + } + + if (!IsDecorated) { + buildOpDecorate(CurrentReg, *MRI->getVRegDef(CurrentReg), TII, + SPIRV::Decoration::NonUniformEXT, {}); + } + } + return; +} + bool SPIRVInstructionSelector::extractSubvector( Register &ResVReg, const SPIRVType *ResType, Register &ReadReg, MachineInstr &InsertionPoint) const { @@ -3784,7 +3839,7 @@ bool SPIRVInstructionSelector::selectImageWriteIntrinsic( Register SPIRVInstructionSelector::buildPointerToResource( const SPIRVType *SpirvResType, SPIRV::StorageClass::StorageClass SC, uint32_t Set, uint32_t Binding, uint32_t ArraySize, Register IndexReg, - bool IsNonUniform, StringRef Name, MachineIRBuilder MIRBuilder) const { + StringRef Name, MachineIRBuilder MIRBuilder) const { const Type *ResType = GR.getTypeForSPIRVType(SpirvResType); if (ArraySize == 1) { SPIRVType *PtrType = @@ -3803,14 +3858,7 @@ Register SPIRVInstructionSelector::buildPointerToResource( SPIRVType *ResPointerType = GR.getOrCreateSPIRVPointerType(ResType, MIRBuilder, SC); - Register AcReg = MRI->createVirtualRegister(GR.getRegClass(ResPointerType)); - if (IsNonUniform) { - // It is unclear which value needs to be marked an non-uniform, so both - // the index and the access changed are decorated as non-uniform. - buildOpDecorate(IndexReg, MIRBuilder, SPIRV::Decoration::NonUniformEXT, {}); - buildOpDecorate(AcReg, MIRBuilder, SPIRV::Decoration::NonUniformEXT, {}); - } MIRBuilder.buildInstr(SPIRV::OpAccessChain) .addDef(AcReg) @@ -4560,9 +4608,6 @@ bool SPIRVInstructionSelector::loadHandleBeforePosition( uint32_t Binding = foldImm(HandleDef.getOperand(3), MRI); uint32_t ArraySize = foldImm(HandleDef.getOperand(4), MRI); Register IndexReg = HandleDef.getOperand(5).getReg(); - // FIXME: The IsNonUniform flag needs to be set based on resource analysis. - // https://github.com/llvm/llvm-project/issues/155701 - bool IsNonUniform = false; std::string Name = getStringValueFromReg(HandleDef.getOperand(6).getReg(), *MRI); @@ -4576,13 +4621,8 @@ bool SPIRVInstructionSelector::loadHandleBeforePosition( SC = GR.getPointerStorageClass(ResType); } - Register VarReg = - buildPointerToResource(VarType, SC, Set, Binding, ArraySize, IndexReg, - IsNonUniform, Name, MIRBuilder); - - if (IsNonUniform) - buildOpDecorate(HandleReg, HandleDef, TII, SPIRV::Decoration::NonUniformEXT, - {}); + Register VarReg = buildPointerToResource(VarType, SC, Set, Binding, ArraySize, + IndexReg, Name, MIRBuilder); // The handle for the buffer is the pointer to the resource. For an image, the // handle is the image object. So images get an extra load. diff --git a/llvm/lib/Target/SPIRV/SPIRVPreLegalizerCombiner.cpp b/llvm/lib/Target/SPIRV/SPIRVPreLegalizerCombiner.cpp index 8356751..48f4047 100644 --- a/llvm/lib/Target/SPIRV/SPIRVPreLegalizerCombiner.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVPreLegalizerCombiner.cpp @@ -1,4 +1,3 @@ - //===-- SPIRVPreLegalizerCombiner.cpp - combine legalization ----*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. @@ -13,24 +12,17 @@ //===----------------------------------------------------------------------===// #include "SPIRV.h" -#include "SPIRVTargetMachine.h" +#include "SPIRVCombinerHelper.h" #include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/Combiner.h" -#include "llvm/CodeGen/GlobalISel/CombinerHelper.h" #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/GISelValueTracking.h" -#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" -#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" -#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetPassConfig.h" -#include "llvm/IR/IntrinsicsSPIRV.h" #define GET_GICOMBINER_DEPS #include "SPIRVGenPreLegalizeGICombiner.inc" @@ -47,72 +39,9 @@ namespace { #include "SPIRVGenPreLegalizeGICombiner.inc" #undef GET_GICOMBINER_TYPES -/// This match is part of a combine that -/// rewrites length(X - Y) to distance(X, Y) -/// (f32 (g_intrinsic length -/// (g_fsub (vXf32 X) (vXf32 Y)))) -/// -> -/// (f32 (g_intrinsic distance -/// (vXf32 X) (vXf32 Y))) -/// -bool matchLengthToDistance(MachineInstr &MI, MachineRegisterInfo &MRI) { - if (MI.getOpcode() != TargetOpcode::G_INTRINSIC || - cast<GIntrinsic>(MI).getIntrinsicID() != Intrinsic::spv_length) - return false; - - // First operand of MI is `G_INTRINSIC` so start at operand 2. - Register SubReg = MI.getOperand(2).getReg(); - MachineInstr *SubInstr = MRI.getVRegDef(SubReg); - if (!SubInstr || SubInstr->getOpcode() != TargetOpcode::G_FSUB) - return false; - - return true; -} -void applySPIRVDistance(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &B) { - - // Extract the operands for X and Y from the match criteria. - Register SubDestReg = MI.getOperand(2).getReg(); - MachineInstr *SubInstr = MRI.getVRegDef(SubDestReg); - Register SubOperand1 = SubInstr->getOperand(1).getReg(); - Register SubOperand2 = SubInstr->getOperand(2).getReg(); - - // Remove the original `spv_length` instruction. - - Register ResultReg = MI.getOperand(0).getReg(); - DebugLoc DL = MI.getDebugLoc(); - MachineBasicBlock &MBB = *MI.getParent(); - MachineBasicBlock::iterator InsertPt = MI.getIterator(); - - // Build the `spv_distance` intrinsic. - MachineInstrBuilder NewInstr = - BuildMI(MBB, InsertPt, DL, B.getTII().get(TargetOpcode::G_INTRINSIC)); - NewInstr - .addDef(ResultReg) // Result register - .addIntrinsicID(Intrinsic::spv_distance) // Intrinsic ID - .addUse(SubOperand1) // Operand X - .addUse(SubOperand2); // Operand Y - - SPIRVGlobalRegistry *GR = - MI.getMF()->getSubtarget<SPIRVSubtarget>().getSPIRVGlobalRegistry(); - auto RemoveAllUses = [&](Register Reg) { - SmallVector<MachineInstr *, 4> UsesToErase( - llvm::make_pointer_range(MRI.use_instructions(Reg))); - - // calling eraseFromParent to early invalidates the iterator. - for (auto *MIToErase : UsesToErase) { - GR->invalidateMachineInstr(MIToErase); - MIToErase->eraseFromParent(); - } - }; - RemoveAllUses(SubDestReg); // remove all uses of FSUB Result - GR->invalidateMachineInstr(SubInstr); - SubInstr->eraseFromParent(); // remove FSUB instruction -} - class SPIRVPreLegalizerCombinerImpl : public Combiner { protected: - const CombinerHelper Helper; + const SPIRVCombinerHelper Helper; const SPIRVPreLegalizerCombinerImplRuleConfig &RuleConfig; const SPIRVSubtarget &STI; @@ -147,7 +76,7 @@ SPIRVPreLegalizerCombinerImpl::SPIRVPreLegalizerCombinerImpl( const SPIRVSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI) : Combiner(MF, CInfo, TPC, &VT, CSEInfo), - Helper(Observer, B, /*IsPreLegalize*/ true, &VT, MDT, LI), + Helper(Observer, B, /*IsPreLegalize*/ true, &VT, MDT, LI, STI), RuleConfig(RuleConfig), STI(STI), #define GET_GICOMBINER_CONSTRUCTOR_INITS #include "SPIRVGenPreLegalizeGICombiner.inc" diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1cfcb1f..eea84a2 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3633,7 +3633,7 @@ bool X86TargetLowering::preferScalarizeSplat(SDNode *N) const { } bool X86TargetLowering::shouldFoldConstantShiftPairToMask( - const SDNode *N, CombineLevel Level) const { + const SDNode *N) const { assert(((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && @@ -3648,7 +3648,7 @@ bool X86TargetLowering::shouldFoldConstantShiftPairToMask( // the fold for non-splats yet. return N->getOperand(1) == N->getOperand(0).getOperand(1); } - return TargetLoweringBase::shouldFoldConstantShiftPairToMask(N, Level); + return TargetLoweringBase::shouldFoldConstantShiftPairToMask(N); } bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const { diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index b55556a..e28b9c1 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1244,8 +1244,7 @@ namespace llvm { getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs) const override; - bool shouldFoldConstantShiftPairToMask(const SDNode *N, - CombineLevel Level) const override; + bool shouldFoldConstantShiftPairToMask(const SDNode *N) const override; bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override; diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp index ac3626d..f021094 100644 --- a/llvm/lib/TargetParser/Triple.cpp +++ b/llvm/lib/TargetParser/Triple.cpp @@ -375,6 +375,8 @@ StringRef Triple::getEnvironmentTypeName(EnvironmentType Kind) { case MuslSF: return "muslsf"; case MuslX32: return "muslx32"; + case MuslWALI: + return "muslwali"; case Simulator: return "simulator"; case Pixel: return "pixel"; case Vertex: return "vertex"; @@ -767,6 +769,7 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) { .StartsWith("muslf32", Triple::MuslF32) .StartsWith("muslsf", Triple::MuslSF) .StartsWith("muslx32", Triple::MuslX32) + .StartsWith("muslwali", Triple::MuslWALI) .StartsWith("musl", Triple::Musl) .StartsWith("msvc", Triple::MSVC) .StartsWith("itanium", Triple::Itanium) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 8c8fc69..6b67b48 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -544,8 +544,18 @@ Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal, Value *NewSel = Builder.CreateSelect(SI.getCondition(), Swapped ? C : OOp, Swapped ? OOp : C, "", &SI); - if (isa<FPMathOperator>(&SI)) - cast<Instruction>(NewSel)->setFastMathFlags(FMF); + if (isa<FPMathOperator>(&SI)) { + FastMathFlags NewSelFMF = FMF; + // We cannot propagate ninf from the original select, because OOp may be + // inf and the flag only guarantees that FalseVal (op OOp) is never + // infinity. + // Examples: -inf + +inf = NaN, -inf - -inf = NaN, 0 * inf = NaN + // Specifically, if the original select has both ninf and nnan, we can + // safely propagate the flag. + NewSelFMF.setNoInfs(TVI->hasNoInfs() || + (NewSelFMF.noInfs() && NewSelFMF.noNaNs())); + cast<Instruction>(NewSel)->setFastMathFlags(NewSelFMF); + } NewSel->takeName(TVI); BinaryOperator *BO = BinaryOperator::Create(TVI->getOpcode(), FalseVal, NewSel); diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 45d3d49..b9d332b 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -2961,6 +2961,7 @@ public: isa<FixedVectorType>(NewAI.getAllocatedType()) ? cast<FixedVectorType>(NewAI.getAllocatedType())->getElementType() : Type::getInt8Ty(NewAI.getContext()); + unsigned AllocatedEltTySize = DL.getTypeSizeInBits(AllocatedEltTy); // Helper to check if a type is // 1. A fixed vector type @@ -2991,10 +2992,17 @@ public: // Do not handle the case if // 1. The store does not meet the conditions in the helper function // 2. The store is volatile + // 3. The total store size is not a multiple of the allocated element + // type size if (!IsTypeValidForTreeStructuredMerge( SI->getValueOperand()->getType()) || SI->isVolatile()) return std::nullopt; + auto *VecTy = cast<FixedVectorType>(SI->getValueOperand()->getType()); + unsigned NumElts = VecTy->getNumElements(); + unsigned EltSize = DL.getTypeSizeInBits(VecTy->getElementType()); + if (NumElts * EltSize % AllocatedEltTySize != 0) + return std::nullopt; StoreInfos.emplace_back(SI, S.beginOffset(), S.endOffset(), SI->getValueOperand()); } else { diff --git a/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/llvm/lib/Transforms/Utils/InstructionNamer.cpp index 3ae570c..4f1ff7b 100644 --- a/llvm/lib/Transforms/Utils/InstructionNamer.cpp +++ b/llvm/lib/Transforms/Utils/InstructionNamer.cpp @@ -20,9 +20,8 @@ using namespace llvm; -namespace { -void nameInstructions(Function &F) { - for (auto &Arg : F.args()) { +static void nameInstructions(Function &F) { + for (Argument &Arg : F.args()) { if (!Arg.hasName()) Arg.setName("arg"); } @@ -38,8 +37,6 @@ void nameInstructions(Function &F) { } } -} // namespace - PreservedAnalyses InstructionNamerPass::run(Function &F, FunctionAnalysisManager &FAM) { nameInstructions(F); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index e62d57e..50136a8 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9348,13 +9348,12 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop( VPBasicBlock *Header = VectorLoop->getEntryBasicBlock(); Header->setName("vec.epilog.vector.body"); - // Ensure that the start values for all header phi recipes are updated before - // vectorizing the epilogue loop. VPCanonicalIVPHIRecipe *IV = Plan.getCanonicalIV(); - // When vectorizing the epilogue loop, the canonical induction start - // value needs to be changed from zero to the value after the main - // vector loop. Find the resume value created during execution of the main - // VPlan. It must be the first phi in the loop preheader. + // When vectorizing the epilogue loop, the canonical induction needs to be + // adjusted by the value after the main vector loop. Find the resume value + // created during execution of the main VPlan. It must be the first phi in the + // loop preheader. Use the value to increment the canonical IV, and update all + // users in the loop region to use the adjusted value. // FIXME: Improve modeling for canonical IV start values in the epilogue // loop. using namespace llvm::PatternMatch; @@ -9389,10 +9388,16 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop( }) && "the canonical IV should only be used by its increment or " "ScalarIVSteps when resetting the start value"); - IV->setOperand(0, VPV); + VPBuilder Builder(Header, Header->getFirstNonPhi()); + VPInstruction *Add = Builder.createNaryOp(Instruction::Add, {IV, VPV}); + IV->replaceAllUsesWith(Add); + Add->setOperand(0, IV); DenseMap<Value *, Value *> ToFrozen; SmallVector<Instruction *> InstsToMove; + // Ensure that the start values for all header phi recipes are updated before + // vectorizing the epilogue loop. Skip the canonical IV, which has been + // handled above. for (VPRecipeBase &R : drop_begin(Header->phis())) { Value *ResumeV = nullptr; // TODO: Move setting of resume values to prepareToExecute. diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index cfa8d27..2388375 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2245,6 +2245,26 @@ public: Align Alignment, const int64_t Diff, Value *Ptr0, Value *PtrN, StridedPtrInfo &SPtrInfo) const; + /// Return true if an array of scalar loads can be replaced with a strided + /// load (with run-time stride). + /// \param PointerOps list of pointer arguments of loads. + /// \param ScalarTy type of loads. + /// \param CommonAlignment common alignement of loads as computed by + /// `computeCommonAlignment<LoadInst>`. + /// \param SortedIndicies is a list of indicies computed by this function such + /// that the sequence `PointerOps[SortedIndices[0]], + /// PointerOps[SortedIndicies[1]], ..., PointerOps[SortedIndices[n]]` is + /// ordered by the coefficient of the stride. For example, if PointerOps is + /// `%base + %stride, %base, %base + 2 * stride` the `SortedIndices` will be + /// `[1, 0, 2]`. We follow the convention that if `SortedIndices` has to be + /// `0, 1, 2, 3, ...` we return empty vector for `SortedIndicies`. + /// \param SPtrInfo If the function return `true`, it also sets all the fields + /// of `SPtrInfo` necessary to generate the strided load later. + bool analyzeRtStrideCandidate(ArrayRef<Value *> PointerOps, Type *ScalarTy, + Align CommonAlignment, + SmallVectorImpl<unsigned> &SortedIndices, + StridedPtrInfo &SPtrInfo) const; + /// Checks if the given array of loads can be represented as a vectorized, /// scatter or just simple gather. /// \param VL list of loads. @@ -6875,6 +6895,24 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy, return false; } +bool BoUpSLP::analyzeRtStrideCandidate(ArrayRef<Value *> PointerOps, + Type *ScalarTy, Align CommonAlignment, + SmallVectorImpl<unsigned> &SortedIndices, + StridedPtrInfo &SPtrInfo) const { + const unsigned Sz = PointerOps.size(); + FixedVectorType *StridedLoadTy = getWidenedType(ScalarTy, Sz); + if (Sz <= MinProfitableStridedLoads || !TTI->isTypeLegal(StridedLoadTy) || + !TTI->isLegalStridedLoadStore(StridedLoadTy, CommonAlignment)) + return false; + if (const SCEV *Stride = + calculateRtStride(PointerOps, ScalarTy, *DL, *SE, SortedIndices)) { + SPtrInfo.Ty = getWidenedType(ScalarTy, PointerOps.size()); + SPtrInfo.StrideSCEV = Stride; + return true; + } + return false; +} + BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads( ArrayRef<Value *> VL, const Value *VL0, SmallVectorImpl<unsigned> &Order, SmallVectorImpl<Value *> &PointerOps, StridedPtrInfo &SPtrInfo, @@ -6915,15 +6953,9 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads( auto *VecTy = getWidenedType(ScalarTy, Sz); Align CommonAlignment = computeCommonAlignment<LoadInst>(VL); if (!IsSorted) { - if (Sz > MinProfitableStridedLoads && TTI->isTypeLegal(VecTy)) { - if (const SCEV *Stride = - calculateRtStride(PointerOps, ScalarTy, *DL, *SE, Order); - Stride && TTI->isLegalStridedLoadStore(VecTy, CommonAlignment)) { - SPtrInfo.Ty = getWidenedType(ScalarTy, PointerOps.size()); - SPtrInfo.StrideSCEV = Stride; - return LoadsState::StridedVectorize; - } - } + if (analyzeRtStrideCandidate(PointerOps, ScalarTy, CommonAlignment, Order, + SPtrInfo)) + return LoadsState::StridedVectorize; if (!TTI->isLegalMaskedGather(VecTy, CommonAlignment) || TTI->forceScalarizeMaskedGather(VecTy, CommonAlignment)) @@ -10632,7 +10664,9 @@ class InstructionsCompatibilityAnalysis { void findAndSetMainInstruction(ArrayRef<Value *> VL, const BoUpSLP &R) { BasicBlock *Parent = nullptr; // Checks if the instruction has supported opcode. - auto IsSupportedInstruction = [&](Instruction *I) { + auto IsSupportedInstruction = [&](Instruction *I, bool AnyUndef) { + if (AnyUndef && (I->isIntDivRem() || I->isFPDivRem() || isa<CallInst>(I))) + return false; return I && isSupportedOpcode(I->getOpcode()) && (!doesNotNeedToBeScheduled(I) || !R.isVectorized(I)); }; @@ -10640,10 +10674,13 @@ class InstructionsCompatibilityAnalysis { // will be unable to schedule anyway. SmallDenseSet<Value *, 8> Operands; SmallMapVector<unsigned, SmallVector<Instruction *>, 4> Candidates; + bool AnyUndef = false; for (Value *V : VL) { auto *I = dyn_cast<Instruction>(V); - if (!I) + if (!I) { + AnyUndef |= isa<UndefValue>(V); continue; + } if (!DT.isReachableFromEntry(I->getParent())) continue; if (Candidates.empty()) { @@ -10678,7 +10715,7 @@ class InstructionsCompatibilityAnalysis { if (P.second.size() < BestOpcodeNum) continue; for (Instruction *I : P.second) { - if (IsSupportedInstruction(I) && !Operands.contains(I)) { + if (IsSupportedInstruction(I, AnyUndef) && !Operands.contains(I)) { MainOp = I; BestOpcodeNum = P.second.size(); break; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 2555ebe..1fea068 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1777,6 +1777,9 @@ InstructionCost VPCostContext::getScalarizationOverhead( if (VF.isScalar()) return 0; + assert(!VF.isScalable() && + "Scalarization overhead not supported for scalable vectors"); + InstructionCost ScalarizationCost = 0; // Compute the cost of scalarizing the result if needed. if (!ResultTy->isVoidTy()) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 600ff8a..8e916772 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3174,6 +3174,9 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, // transform, avoid computing their cost multiple times for now. Ctx.SkipCostComputation.insert(UI); + if (VF.isScalable() && !isSingleScalar()) + return InstructionCost::getInvalid(); + switch (UI->getOpcode()) { case Instruction::GetElementPtr: // We mark this instruction as zero-cost because the cost of GEPs in @@ -3221,9 +3224,6 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, return ScalarCallCost; } - if (VF.isScalable()) - return InstructionCost::getInvalid(); - return ScalarCallCost * VF.getFixedValue() + Ctx.getScalarizationOverhead(ResultTy, ArgOps, VF); } @@ -3274,9 +3274,6 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, } case Instruction::Load: case Instruction::Store: { - if (VF.isScalable() && !isSingleScalar()) - return InstructionCost::getInvalid(); - // TODO: See getMemInstScalarizationCost for how to handle replicating and // predicated cases. const VPRegionBlock *ParentRegion = getParent()->getParent(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index c8a2d84..7563cd7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1234,6 +1234,18 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { if (!Plan->isUnrolled()) return; + // Hoist an invariant increment Y of a phi X, by having X start at Y. + if (match(Def, m_c_Add(m_VPValue(X), m_VPValue(Y))) && Y->isLiveIn() && + isa<VPPhi>(X)) { + auto *Phi = cast<VPPhi>(X); + if (Phi->getOperand(1) != Def && match(Phi->getOperand(0), m_ZeroInt()) && + Phi->getNumUsers() == 1 && (*Phi->user_begin() == &R)) { + Phi->setOperand(0, Y); + Def->replaceAllUsesWith(Phi); + return; + } + } + // VPVectorPointer for part 0 can be replaced by their start pointer. if (auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(&R)) { if (VecPtr->isFirstPart()) { |