diff options
Diffstat (limited to 'llvm/lib')
121 files changed, 1927 insertions, 839 deletions
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index b5b4cd9..00c3dbb 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -5419,20 +5419,15 @@ static Type *isSimpleCastedPHI(const SCEV *Op, const SCEVUnknown *SymbolicPHI, if (SourceBits != NewBits) return nullptr; - const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(Op); - const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(Op); - if (!SExt && !ZExt) - return nullptr; - const SCEVTruncateExpr *Trunc = - SExt ? dyn_cast<SCEVTruncateExpr>(SExt->getOperand()) - : dyn_cast<SCEVTruncateExpr>(ZExt->getOperand()); - if (!Trunc) - return nullptr; - const SCEV *X = Trunc->getOperand(); - if (X != SymbolicPHI) - return nullptr; - Signed = SExt != nullptr; - return Trunc->getType(); + if (match(Op, m_scev_SExt(m_scev_Trunc(m_scev_Specific(SymbolicPHI))))) { + Signed = true; + return cast<SCEVCastExpr>(Op)->getOperand()->getType(); + } + if (match(Op, m_scev_ZExt(m_scev_Trunc(m_scev_Specific(SymbolicPHI))))) { + Signed = false; + return cast<SCEVCastExpr>(Op)->getOperand()->getType(); + } + return nullptr; } static const Loop *isIntegerLoopHeaderPHI(const PHINode *PN, LoopInfo &LI) { @@ -15428,20 +15423,18 @@ bool ScalarEvolution::matchURem(const SCEV *Expr, const SCEV *&LHS, // Try to match 'zext (trunc A to iB) to iY', which is used // for URem with constant power-of-2 second operands. Make sure the size of // the operand A matches the size of the whole expressions. - if (const auto *ZExt = dyn_cast<SCEVZeroExtendExpr>(Expr)) - if (const auto *Trunc = dyn_cast<SCEVTruncateExpr>(ZExt->getOperand(0))) { - LHS = Trunc->getOperand(); - // Bail out if the type of the LHS is larger than the type of the - // expression for now. - if (getTypeSizeInBits(LHS->getType()) > - getTypeSizeInBits(Expr->getType())) - return false; - if (LHS->getType() != Expr->getType()) - LHS = getZeroExtendExpr(LHS, Expr->getType()); - RHS = getConstant(APInt(getTypeSizeInBits(Expr->getType()), 1) - << getTypeSizeInBits(Trunc->getType())); - return true; - } + if (match(Expr, m_scev_ZExt(m_scev_Trunc(m_SCEV(LHS))))) { + Type *TruncTy = cast<SCEVZeroExtendExpr>(Expr)->getOperand()->getType(); + // Bail out if the type of the LHS is larger than the type of the + // expression for now. + if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(Expr->getType())) + return false; + if (LHS->getType() != Expr->getType()) + LHS = getZeroExtendExpr(LHS, Expr->getType()); + RHS = getConstant(APInt(getTypeSizeInBits(Expr->getType()), 1) + << getTypeSizeInBits(TruncTy)); + return true; + } const auto *Add = dyn_cast<SCEVAddExpr>(Expr); if (Add == nullptr || Add->getNumOperands() != 2) return false; diff --git a/llvm/lib/Analysis/StaticDataProfileInfo.cpp b/llvm/lib/Analysis/StaticDataProfileInfo.cpp index b036b2d..1f751ee 100644 --- a/llvm/lib/Analysis/StaticDataProfileInfo.cpp +++ b/llvm/lib/Analysis/StaticDataProfileInfo.cpp @@ -6,6 +6,46 @@ #include "llvm/ProfileData/InstrProf.h" using namespace llvm; + +namespace llvm { +namespace memprof { +// Returns true iff the global variable has custom section either by +// __attribute__((section("name"))) +// (https://clang.llvm.org/docs/AttributeReference.html#section-declspec-allocate) +// or #pragma clang section directives +// (https://clang.llvm.org/docs/LanguageExtensions.html#specifying-section-names-for-global-objects-pragma-clang-section). +static bool hasExplicitSectionName(const GlobalVariable &GVar) { + if (GVar.hasSection()) + return true; + + auto Attrs = GVar.getAttributes(); + if (Attrs.hasAttribute("bss-section") || Attrs.hasAttribute("data-section") || + Attrs.hasAttribute("relro-section") || + Attrs.hasAttribute("rodata-section")) + return true; + return false; +} + +AnnotationKind getAnnotationKind(const GlobalVariable &GV) { + if (GV.isDeclarationForLinker()) + return AnnotationKind::DeclForLinker; + // Skip 'llvm.'-prefixed global variables conservatively because they are + // often handled specially, + StringRef Name = GV.getName(); + if (Name.starts_with("llvm.")) + return AnnotationKind::ReservedName; + // Respect user-specified custom data sections. + if (hasExplicitSectionName(GV)) + return AnnotationKind::ExplicitSection; + return AnnotationKind::AnnotationOK; +} + +bool IsAnnotationOK(const GlobalVariable &GV) { + return getAnnotationKind(GV) == AnnotationKind::AnnotationOK; +} +} // namespace memprof +} // namespace llvm + void StaticDataProfileInfo::addConstantProfileCount( const Constant *C, std::optional<uint64_t> Count) { if (!Count) { diff --git a/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp b/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp index 6356d71..873ac8f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp @@ -20,7 +20,7 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" -namespace llvm { +using namespace llvm; AIXException::AIXException(AsmPrinter *A) : EHStreamer(A) {} @@ -90,5 +90,3 @@ void AIXException::endFunction(const MachineFunction *MF) { emitExceptionInfoTable(LSDALabel, PerSym); } - -} // End of namespace llvm diff --git a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp index 260ce8f..93ae548 100644 --- a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp +++ b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp @@ -85,8 +85,7 @@ template <> struct llvm::DenseMapInfo<VariableID> { using VarLocInsertPt = PointerUnion<const Instruction *, const DbgRecord *>; -namespace std { -template <> struct hash<VarLocInsertPt> { +template <> struct std::hash<VarLocInsertPt> { using argument_type = VarLocInsertPt; using result_type = std::size_t; @@ -94,7 +93,6 @@ template <> struct hash<VarLocInsertPt> { return std::hash<void *>()(Arg.getOpaqueValue()); } }; -} // namespace std /// Helper class to build FunctionVarLocs, since that class isn't easy to /// modify. TODO: There's not a great deal of value in the split, it could be diff --git a/llvm/lib/CodeGen/BasicBlockPathCloning.cpp b/llvm/lib/CodeGen/BasicBlockPathCloning.cpp index fd7df6b..47b7a88 100644 --- a/llvm/lib/CodeGen/BasicBlockPathCloning.cpp +++ b/llvm/lib/CodeGen/BasicBlockPathCloning.cpp @@ -207,9 +207,7 @@ bool ApplyCloning(MachineFunction &MF, } return AnyPathsCloned; } -} // end anonymous namespace -namespace llvm { class BasicBlockPathCloning : public MachineFunctionPass { public: static char ID; @@ -229,7 +227,7 @@ public: bool runOnMachineFunction(MachineFunction &MF) override; }; -} // namespace llvm +} // namespace char BasicBlockPathCloning::ID = 0; INITIALIZE_PASS_BEGIN( diff --git a/llvm/lib/CodeGen/BreakFalseDeps.cpp b/llvm/lib/CodeGen/BreakFalseDeps.cpp index 28e6728..1846880 100644 --- a/llvm/lib/CodeGen/BreakFalseDeps.cpp +++ b/llvm/lib/CodeGen/BreakFalseDeps.cpp @@ -31,7 +31,7 @@ using namespace llvm; -namespace llvm { +namespace { class BreakFalseDeps : public MachineFunctionPass { private: @@ -95,7 +95,7 @@ private: void processUndefReads(MachineBasicBlock *); }; -} // namespace llvm +} // namespace #define DEBUG_TYPE "break-false-deps" diff --git a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp index 6c2a5a7..87ada87 100644 --- a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp +++ b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp @@ -126,8 +126,7 @@ hash_code hash_value(const ComplexValue &Arg) { } // end namespace typedef SmallVector<struct ComplexValue, 2> ComplexValues; -namespace llvm { -template <> struct DenseMapInfo<ComplexValue> { +template <> struct llvm::DenseMapInfo<ComplexValue> { static inline ComplexValue getEmptyKey() { return {DenseMapInfo<Value *>::getEmptyKey(), DenseMapInfo<Value *>::getEmptyKey()}; @@ -144,7 +143,6 @@ template <> struct DenseMapInfo<ComplexValue> { return LHS.Real == RHS.Real && LHS.Imag == RHS.Imag; } }; -} // end namespace llvm namespace { template <typename T, typename IterT> diff --git a/llvm/lib/CodeGen/EdgeBundles.cpp b/llvm/lib/CodeGen/EdgeBundles.cpp index f4335396..50dd66f 100644 --- a/llvm/lib/CodeGen/EdgeBundles.cpp +++ b/llvm/lib/CodeGen/EdgeBundles.cpp @@ -81,13 +81,10 @@ void EdgeBundles::init() { } } -namespace llvm { - /// Specialize WriteGraph, the standard implementation won't work. -template<> -raw_ostream &WriteGraph<>(raw_ostream &O, const EdgeBundles &G, - bool ShortNames, - const Twine &Title) { +template <> +raw_ostream &llvm::WriteGraph<>(raw_ostream &O, const EdgeBundles &G, + bool ShortNames, const Twine &Title) { const MachineFunction *MF = G.getMachineFunction(); O << "digraph {\n"; @@ -107,8 +104,6 @@ raw_ostream &WriteGraph<>(raw_ostream &O, const EdgeBundles &G, return O; } -} // end namespace llvm - /// view - Visualize the annotated bipartite CFG with Graphviz. void EdgeBundles::view() const { ViewGraph(*this, "EdgeBundles"); diff --git a/llvm/lib/CodeGen/ExpandFp.cpp b/llvm/lib/CodeGen/ExpandFp.cpp index 9cc6c6a..04c7008 100644 --- a/llvm/lib/CodeGen/ExpandFp.cpp +++ b/llvm/lib/CodeGen/ExpandFp.cpp @@ -82,7 +82,7 @@ public: } static FRemExpander create(IRBuilder<> &B, Type *Ty) { - assert(canExpandType(Ty)); + assert(canExpandType(Ty) && "Expected supported floating point type"); // The type to use for the computation of the remainder. This may be // wider than the input/result type which affects the ... @@ -356,8 +356,9 @@ Value *FRemExpander::buildFRem(Value *X, Value *Y, static bool expandFRem(BinaryOperator &I, std::optional<SimplifyQuery> &SQ) { LLVM_DEBUG(dbgs() << "Expanding instruction: " << I << '\n'); - Type *ReturnTy = I.getType(); - assert(FRemExpander::canExpandType(ReturnTy->getScalarType())); + Type *Ty = I.getType(); + assert(FRemExpander::canExpandType(Ty) && + "Expected supported floating point type"); FastMathFlags FMF = I.getFastMathFlags(); // TODO Make use of those flags for optimization? @@ -368,32 +369,10 @@ static bool expandFRem(BinaryOperator &I, std::optional<SimplifyQuery> &SQ) { B.setFastMathFlags(FMF); B.SetCurrentDebugLocation(I.getDebugLoc()); - Type *ElemTy = ReturnTy->getScalarType(); - const FRemExpander Expander = FRemExpander::create(B, ElemTy); - - Value *Ret; - if (ReturnTy->isFloatingPointTy()) - Ret = FMF.approxFunc() - ? Expander.buildApproxFRem(I.getOperand(0), I.getOperand(1)) - : Expander.buildFRem(I.getOperand(0), I.getOperand(1), SQ); - else { - auto *VecTy = cast<FixedVectorType>(ReturnTy); - - // This could use SplitBlockAndInsertForEachLane but the interface - // is a bit awkward for a constant number of elements and it will - // boil down to the same code. - // TODO Expand the FRem instruction only once and reuse the code. - Value *Nums = I.getOperand(0); - Value *Denums = I.getOperand(1); - Ret = PoisonValue::get(I.getType()); - for (int I = 0, E = VecTy->getNumElements(); I != E; ++I) { - Value *Num = B.CreateExtractElement(Nums, I); - Value *Denum = B.CreateExtractElement(Denums, I); - Value *Rem = FMF.approxFunc() ? Expander.buildApproxFRem(Num, Denum) - : Expander.buildFRem(Num, Denum, SQ); - Ret = B.CreateInsertElement(Ret, Rem, I); - } - } + const FRemExpander Expander = FRemExpander::create(B, Ty); + Value *Ret = FMF.approxFunc() + ? Expander.buildApproxFRem(I.getOperand(0), I.getOperand(1)) + : Expander.buildFRem(I.getOperand(0), I.getOperand(1), SQ); I.replaceAllUsesWith(Ret); Ret->takeName(&I); @@ -939,7 +918,8 @@ static void expandIToFP(Instruction *IToFP) { IToFP->eraseFromParent(); } -static void scalarize(Instruction *I, SmallVectorImpl<Instruction *> &Replace) { +static void scalarize(Instruction *I, + SmallVectorImpl<Instruction *> &Worklist) { VectorType *VTy = cast<FixedVectorType>(I->getType()); IRBuilder<> Builder(I); @@ -948,12 +928,25 @@ static void scalarize(Instruction *I, SmallVectorImpl<Instruction *> &Replace) { Value *Result = PoisonValue::get(VTy); for (unsigned Idx = 0; Idx < NumElements; ++Idx) { Value *Ext = Builder.CreateExtractElement(I->getOperand(0), Idx); - Value *Cast = Builder.CreateCast(cast<CastInst>(I)->getOpcode(), Ext, - I->getType()->getScalarType()); - Result = Builder.CreateInsertElement(Result, Cast, Idx); - if (isa<Instruction>(Cast)) - Replace.push_back(cast<Instruction>(Cast)); + + Value *NewOp = nullptr; + if (auto *BinOp = dyn_cast<BinaryOperator>(I)) + NewOp = Builder.CreateBinOp( + BinOp->getOpcode(), Ext, + Builder.CreateExtractElement(I->getOperand(1), Idx)); + else if (auto *CastI = dyn_cast<CastInst>(I)) + NewOp = Builder.CreateCast(CastI->getOpcode(), Ext, + I->getType()->getScalarType()); + else + llvm_unreachable("Unsupported instruction type"); + + Result = Builder.CreateInsertElement(Result, NewOp, Idx); + if (auto *ScalarizedI = dyn_cast<Instruction>(NewOp)) { + ScalarizedI->copyIRFlags(I, true); + Worklist.push_back(ScalarizedI); + } } + I->replaceAllUsesWith(Result); I->dropAllReferences(); I->eraseFromParent(); @@ -989,10 +982,17 @@ static bool targetSupportsFrem(const TargetLowering &TLI, Type *Ty) { return TLI.getLibcallName(fremToLibcall(Ty->getScalarType())); } +static void addToWorklist(Instruction &I, + SmallVector<Instruction *, 4> &Worklist) { + if (I.getOperand(0)->getType()->isVectorTy()) + scalarize(&I, Worklist); + else + Worklist.push_back(&I); +} + static bool runImpl(Function &F, const TargetLowering &TLI, AssumptionCache *AC) { - SmallVector<Instruction *, 4> Replace; - SmallVector<Instruction *, 4> ReplaceVector; + SmallVector<Instruction *, 4> Worklist; bool Modified = false; unsigned MaxLegalFpConvertBitWidth = @@ -1003,55 +1003,39 @@ static bool runImpl(Function &F, const TargetLowering &TLI, if (MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS) return false; - for (auto &I : instructions(F)) { - switch (I.getOpcode()) { - case Instruction::FRem: { - Type *Ty = I.getType(); - // TODO: This pass doesn't handle scalable vectors. - if (Ty->isScalableTy()) - continue; - - if (targetSupportsFrem(TLI, Ty) || - !FRemExpander::canExpandType(Ty->getScalarType())) - continue; - - Replace.push_back(&I); - Modified = true; + for (auto It = inst_begin(&F), End = inst_end(F); It != End;) { + Instruction &I = *It++; + Type *Ty = I.getType(); + // TODO: This pass doesn't handle scalable vectors. + if (Ty->isScalableTy()) + continue; + switch (I.getOpcode()) { + case Instruction::FRem: + if (!targetSupportsFrem(TLI, Ty) && + FRemExpander::canExpandType(Ty->getScalarType())) { + addToWorklist(I, Worklist); + Modified = true; + } break; - } case Instruction::FPToUI: case Instruction::FPToSI: { - // TODO: This pass doesn't handle scalable vectors. - if (I.getOperand(0)->getType()->isScalableTy()) - continue; - - auto *IntTy = cast<IntegerType>(I.getType()->getScalarType()); + auto *IntTy = cast<IntegerType>(Ty->getScalarType()); if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth) continue; - if (I.getOperand(0)->getType()->isVectorTy()) - ReplaceVector.push_back(&I); - else - Replace.push_back(&I); + addToWorklist(I, Worklist); Modified = true; break; } case Instruction::UIToFP: case Instruction::SIToFP: { - // TODO: This pass doesn't handle scalable vectors. - if (I.getOperand(0)->getType()->isScalableTy()) - continue; - auto *IntTy = cast<IntegerType>(I.getOperand(0)->getType()->getScalarType()); if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth) continue; - if (I.getOperand(0)->getType()->isVectorTy()) - ReplaceVector.push_back(&I); - else - Replace.push_back(&I); + addToWorklist(I, Worklist); Modified = true; break; } @@ -1060,16 +1044,8 @@ static bool runImpl(Function &F, const TargetLowering &TLI, } } - while (!ReplaceVector.empty()) { - Instruction *I = ReplaceVector.pop_back_val(); - scalarize(I, Replace); - } - - if (Replace.empty()) - return false; - - while (!Replace.empty()) { - Instruction *I = Replace.pop_back_val(); + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); if (I->getOpcode() == Instruction::FRem) { auto SQ = [&]() -> std::optional<SimplifyQuery> { if (AC) { diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp index 90c60d4..3812823 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp @@ -1975,6 +1975,44 @@ unsigned GISelValueTracking::computeNumSignBits(Register R, break; } + case TargetOpcode::G_SUB: { + Register Src2 = MI.getOperand(2).getReg(); + unsigned Src2NumSignBits = + computeNumSignBits(Src2, DemandedElts, Depth + 1); + if (Src2NumSignBits == 1) + return 1; // Early out. + + // Handle NEG. + Register Src1 = MI.getOperand(1).getReg(); + KnownBits Known1 = getKnownBits(Src1, DemandedElts, Depth); + if (Known1.isZero()) { + KnownBits Known2 = getKnownBits(Src2, DemandedElts, Depth); + // If the input is known to be 0 or 1, the output is 0/-1, which is all + // sign bits set. + if ((Known2.Zero | 1).isAllOnes()) + return TyBits; + + // If the input is known to be positive (the sign bit is known clear), + // the output of the NEG has, at worst, the same number of sign bits as + // the input. + if (Known2.isNonNegative()) { + FirstAnswer = Src2NumSignBits; + break; + } + + // Otherwise, we treat this like a SUB. + } + + unsigned Src1NumSignBits = + computeNumSignBits(Src1, DemandedElts, Depth + 1); + if (Src1NumSignBits == 1) + return 1; // Early Out. + + // Sub can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits) - 1; + break; + } case TargetOpcode::G_FCMP: case TargetOpcode::G_ICMP: { bool IsFP = Opcode == TargetOpcode::G_FCMP; diff --git a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp index 47640c4a..81ab317 100644 --- a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp +++ b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp @@ -587,16 +587,12 @@ public: } // namespace char GlobalMergeFuncPassWrapper::ID = 0; -INITIALIZE_PASS_BEGIN(GlobalMergeFuncPassWrapper, "global-merge-func", - "Global merge function pass", false, false) -INITIALIZE_PASS_END(GlobalMergeFuncPassWrapper, "global-merge-func", - "Global merge function pass", false, false) +INITIALIZE_PASS(GlobalMergeFuncPassWrapper, "global-merge-func", + "Global merge function pass", false, false) -namespace llvm { -ModulePass *createGlobalMergeFuncPass() { +ModulePass *llvm::createGlobalMergeFuncPass() { return new GlobalMergeFuncPassWrapper(); } -} // namespace llvm GlobalMergeFuncPassWrapper::GlobalMergeFuncPassWrapper() : ModulePass(ID) { initializeGlobalMergeFuncPassWrapperPass( diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp index 3485a27..0e38017 100644 --- a/llvm/lib/CodeGen/LiveIntervals.cpp +++ b/llvm/lib/CodeGen/LiveIntervals.cpp @@ -101,15 +101,11 @@ static cl::opt<bool> EnablePrecomputePhysRegs( static bool EnablePrecomputePhysRegs = false; #endif // NDEBUG -namespace llvm { - -cl::opt<bool> UseSegmentSetForPhysRegs( +cl::opt<bool> llvm::UseSegmentSetForPhysRegs( "use-segment-set-for-physregs", cl::Hidden, cl::init(true), cl::desc( "Use segment set for the computation of the live ranges of physregs.")); -} // end namespace llvm - void LiveIntervalsWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addPreserved<LiveVariablesWrapperPass>(); diff --git a/llvm/lib/CodeGen/MIR2Vec.cpp b/llvm/lib/CodeGen/MIR2Vec.cpp index e859765..5c78d98 100644 --- a/llvm/lib/CodeGen/MIR2Vec.cpp +++ b/llvm/lib/CodeGen/MIR2Vec.cpp @@ -29,20 +29,17 @@ using namespace mir2vec; STATISTIC(MIRVocabMissCounter, "Number of lookups to MIR entities not present in the vocabulary"); -namespace llvm { -namespace mir2vec { -cl::OptionCategory MIR2VecCategory("MIR2Vec Options"); +cl::OptionCategory llvm::mir2vec::MIR2VecCategory("MIR2Vec Options"); // FIXME: Use a default vocab when not specified static cl::opt<std::string> VocabFile("mir2vec-vocab-path", cl::Optional, cl::desc("Path to the vocabulary file for MIR2Vec"), cl::init(""), cl::cat(MIR2VecCategory)); -cl::opt<float> OpcWeight("mir2vec-opc-weight", cl::Optional, cl::init(1.0), - cl::desc("Weight for machine opcode embeddings"), - cl::cat(MIR2VecCategory)); -} // namespace mir2vec -} // namespace llvm +cl::opt<float> + llvm::mir2vec::OpcWeight("mir2vec-opc-weight", cl::Optional, cl::init(1.0), + cl::desc("Weight for machine opcode embeddings"), + cl::cat(MIR2VecCategory)); //===----------------------------------------------------------------------===// // Vocabulary Implementation diff --git a/llvm/lib/CodeGen/MIRFSDiscriminator.cpp b/llvm/lib/CodeGen/MIRFSDiscriminator.cpp index f5146f5..d988a2a 100644 --- a/llvm/lib/CodeGen/MIRFSDiscriminator.cpp +++ b/llvm/lib/CodeGen/MIRFSDiscriminator.cpp @@ -40,7 +40,7 @@ cl::opt<bool> ImprovedFSDiscriminator( "improved-fs-discriminator", cl::Hidden, cl::init(false), cl::desc("New FS discriminators encoding (incompatible with the original " "encoding)")); -} +} // namespace llvm char MIRAddFSDiscriminators::ID = 0; diff --git a/llvm/lib/CodeGen/MIRNamerPass.cpp b/llvm/lib/CodeGen/MIRNamerPass.cpp index bc65700..cbf8867 100644 --- a/llvm/lib/CodeGen/MIRNamerPass.cpp +++ b/llvm/lib/CodeGen/MIRNamerPass.cpp @@ -23,10 +23,6 @@ using namespace llvm; -namespace llvm { -extern char &MIRNamerID; -} // namespace llvm - #define DEBUG_TYPE "mir-namer" namespace { @@ -53,10 +49,9 @@ public: VRegRenamer Renamer(MF.getRegInfo()); - unsigned BBIndex = 0; ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); - for (auto &MBB : RPOT) - Changed |= Renamer.renameVRegs(MBB, BBIndex++); + for (const auto &[BBIndex, MBB] : enumerate(RPOT)) + Changed |= Renamer.renameVRegs(MBB, BBIndex); return Changed; } @@ -66,10 +61,4 @@ public: char MIRNamer::ID; -char &llvm::MIRNamerID = MIRNamer::ID; - -INITIALIZE_PASS_BEGIN(MIRNamer, "mir-namer", "Rename Register Operands", false, - false) - -INITIALIZE_PASS_END(MIRNamer, "mir-namer", "Rename Register Operands", false, - false) +INITIALIZE_PASS(MIRNamer, "mir-namer", "Rename Register Operands", false, false) diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp index bf8a6cd..96428cd 100644 --- a/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/llvm/lib/CodeGen/MIRPrinter.cpp @@ -107,10 +107,8 @@ struct MFPrintState { } // end anonymous namespace -namespace llvm::yaml { - /// This struct serializes the LLVM IR module. -template <> struct BlockScalarTraits<Module> { +template <> struct yaml::BlockScalarTraits<Module> { static void output(const Module &Mod, void *Ctxt, raw_ostream &OS) { Mod.print(OS, nullptr); } @@ -121,8 +119,6 @@ template <> struct BlockScalarTraits<Module> { } }; -} // end namespace llvm::yaml - static void printRegMIR(Register Reg, yaml::StringValue &Dest, const TargetRegisterInfo *TRI) { raw_string_ostream OS(Dest.Value); diff --git a/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp index b2731b69..a72c2c4 100644 --- a/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp +++ b/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp @@ -97,7 +97,9 @@ static const bool EnableDevelopmentFeatures = false; /// this happens only in development mode. It's a no-op otherwise. namespace llvm { extern cl::opt<unsigned> EvictInterferenceCutoff; +} // namespace llvm +namespace { class RegAllocScoring : public MachineFunctionPass { public: static char ID; @@ -124,11 +126,12 @@ public: /// Performs this pass bool runOnMachineFunction(MachineFunction &) override; }; +} // namespace char RegAllocScoring::ID = 0; -FunctionPass *createRegAllocScoringPass() { return new RegAllocScoring(); } - -} // namespace llvm +FunctionPass *llvm::createRegAllocScoringPass() { + return new RegAllocScoring(); +} INITIALIZE_PASS(RegAllocScoring, "regallocscoringpass", "Register Allocation Scoring Pass", false, false) diff --git a/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp index e7fa082..26eb10f 100644 --- a/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -29,7 +29,6 @@ using namespace llvm; #define DEBUG_TYPE "machine-block-freq" -namespace llvm { static cl::opt<GVDAGType> ViewMachineBlockFreqPropagationDAG( "view-machine-block-freq-propagation-dags", cl::Hidden, cl::desc("Pop up a window to show a dag displaying how machine block " @@ -44,6 +43,7 @@ static cl::opt<GVDAGType> ViewMachineBlockFreqPropagationDAG( clEnumValN(GVDT_Count, "count", "display a graph using the real " "profile count if available."))); +namespace llvm { // Similar option above, but used to control BFI display only after MBP pass cl::opt<GVDAGType> ViewBlockLayoutWithBFI( "view-block-layout-with-bfi", cl::Hidden, @@ -69,15 +69,15 @@ extern cl::opt<std::string> ViewBlockFreqFuncName; // Defined in Analysis/BlockFrequencyInfo.cpp: -view-hot-freq-perc= extern cl::opt<unsigned> ViewHotFreqPercent; -static cl::opt<bool> PrintMachineBlockFreq( - "print-machine-bfi", cl::init(false), cl::Hidden, - cl::desc("Print the machine block frequency info.")); - // Command line option to specify the name of the function for block frequency // dump. Defined in Analysis/BlockFrequencyInfo.cpp. extern cl::opt<std::string> PrintBFIFuncName; } // namespace llvm +static cl::opt<bool> + PrintMachineBlockFreq("print-machine-bfi", cl::init(false), cl::Hidden, + cl::desc("Print the machine block frequency info.")); + static GVDAGType getGVDT() { if (ViewBlockLayoutWithBFI != GVDT_None) return ViewBlockLayoutWithBFI; @@ -85,9 +85,7 @@ static GVDAGType getGVDT() { return ViewMachineBlockFreqPropagationDAG; } -namespace llvm { - -template <> struct GraphTraits<MachineBlockFrequencyInfo *> { +template <> struct llvm::GraphTraits<MachineBlockFrequencyInfo *> { using NodeRef = const MachineBasicBlock *; using ChildIteratorType = MachineBasicBlock::const_succ_iterator; using nodes_iterator = pointer_iterator<MachineFunction::const_iterator>; @@ -116,7 +114,7 @@ using MBFIDOTGraphTraitsBase = MachineBranchProbabilityInfo>; template <> -struct DOTGraphTraits<MachineBlockFrequencyInfo *> +struct llvm::DOTGraphTraits<MachineBlockFrequencyInfo *> : public MBFIDOTGraphTraitsBase { const MachineFunction *CurFunc = nullptr; DenseMap<const MachineBasicBlock *, int> LayoutOrderMap; @@ -159,8 +157,6 @@ struct DOTGraphTraits<MachineBlockFrequencyInfo *> } }; -} // end namespace llvm - AnalysisKey MachineBlockFrequencyAnalysis::Key; MachineBlockFrequencyAnalysis::Result diff --git a/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp index 2e92dd8..7ca4582 100644 --- a/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ b/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -18,13 +18,8 @@ using namespace llvm; -INITIALIZE_PASS_BEGIN(MachineBranchProbabilityInfoWrapperPass, - "machine-branch-prob", - "Machine Branch Probability Analysis", false, true) -INITIALIZE_PASS_END(MachineBranchProbabilityInfoWrapperPass, - "machine-branch-prob", - "Machine Branch Probability Analysis", false, true) - +INITIALIZE_PASS(MachineBranchProbabilityInfoWrapperPass, "machine-branch-prob", + "Machine Branch Probability Analysis", false, true) namespace llvm { cl::opt<unsigned> StaticLikelyProb("static-likely-prob", diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index 224231c..bfa5ab2 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -719,43 +719,41 @@ MachineFunction::CallSiteInfo::CallSiteInfo(const CallBase &CB) { } } -namespace llvm { +template <> +struct llvm::DOTGraphTraits<const MachineFunction *> + : public DefaultDOTGraphTraits { + DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} - template<> - struct DOTGraphTraits<const MachineFunction*> : public DefaultDOTGraphTraits { - DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} + static std::string getGraphName(const MachineFunction *F) { + return ("CFG for '" + F->getName() + "' function").str(); + } - static std::string getGraphName(const MachineFunction *F) { - return ("CFG for '" + F->getName() + "' function").str(); + std::string getNodeLabel(const MachineBasicBlock *Node, + const MachineFunction *Graph) { + std::string OutStr; + { + raw_string_ostream OSS(OutStr); + + if (isSimple()) { + OSS << printMBBReference(*Node); + if (const BasicBlock *BB = Node->getBasicBlock()) + OSS << ": " << BB->getName(); + } else + Node->print(OSS); } - std::string getNodeLabel(const MachineBasicBlock *Node, - const MachineFunction *Graph) { - std::string OutStr; - { - raw_string_ostream OSS(OutStr); - - if (isSimple()) { - OSS << printMBBReference(*Node); - if (const BasicBlock *BB = Node->getBasicBlock()) - OSS << ": " << BB->getName(); - } else - Node->print(OSS); - } - - if (OutStr[0] == '\n') OutStr.erase(OutStr.begin()); - - // Process string output to make it nicer... - for (unsigned i = 0; i != OutStr.length(); ++i) - if (OutStr[i] == '\n') { // Left justify - OutStr[i] = '\\'; - OutStr.insert(OutStr.begin()+i+1, 'l'); - } - return OutStr; - } - }; + if (OutStr[0] == '\n') + OutStr.erase(OutStr.begin()); -} // end namespace llvm + // Process string output to make it nicer... + for (unsigned i = 0; i != OutStr.length(); ++i) + if (OutStr[i] == '\n') { // Left justify + OutStr[i] = '\\'; + OutStr.insert(OutStr.begin() + i + 1, 'l'); + } + return OutStr; + } +}; void MachineFunction::viewCFG() const { diff --git a/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp index 0f88a7b..5111322 100644 --- a/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp +++ b/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp @@ -60,13 +60,11 @@ char &llvm::MachineFunctionPrinterPassID = MachineFunctionPrinterPass::ID; INITIALIZE_PASS(MachineFunctionPrinterPass, "machineinstr-printer", "Machine Function Printer", false, false) -namespace llvm { /// Returns a newly-created MachineFunction Printer pass. The /// default banner is empty. /// -MachineFunctionPass *createMachineFunctionPrinterPass(raw_ostream &OS, - const std::string &Banner){ +MachineFunctionPass * +llvm::createMachineFunctionPrinterPass(raw_ostream &OS, + const std::string &Banner) { return new MachineFunctionPrinterPass(OS, Banner); } - -} diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp index fdae3b4..9feb974 100644 --- a/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/llvm/lib/CodeGen/MachineOutliner.cpp @@ -593,15 +593,12 @@ struct MachineOutliner : public ModulePass { char MachineOutliner::ID = 0; -namespace llvm { -ModulePass *createMachineOutlinerPass(RunOutliner RunOutlinerMode) { +ModulePass *llvm::createMachineOutlinerPass(RunOutliner RunOutlinerMode) { MachineOutliner *OL = new MachineOutliner(); OL->RunOutlinerMode = RunOutlinerMode; return OL; } -} // namespace llvm - INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false, false) diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index 89ed4da..a717d9e 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -201,16 +201,15 @@ static cl::opt<unsigned> SwpMaxNumStores( cl::desc("Maximum number of stores allwed in the target loop."), cl::Hidden, cl::init(200)); -namespace llvm { - // A command line option to enable the CopyToPhi DAG mutation. -cl::opt<bool> SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden, - cl::init(true), - cl::desc("Enable CopyToPhi DAG Mutation")); +cl::opt<bool> + llvm::SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden, + cl::init(true), + cl::desc("Enable CopyToPhi DAG Mutation")); /// A command line argument to force pipeliner to use specified issue /// width. -cl::opt<int> SwpForceIssueWidth( +cl::opt<int> llvm::SwpForceIssueWidth( "pipeliner-force-issue-width", cl::desc("Force pipeliner to use specified issue width."), cl::Hidden, cl::init(-1)); @@ -226,8 +225,6 @@ static cl::opt<WindowSchedulingFlag> WindowSchedulingOption( clEnumValN(WindowSchedulingFlag::WS_Force, "force", "Use window algorithm instead of SMS algorithm."))); -} // end namespace llvm - unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5; char MachinePipeliner::ID = 0; #ifndef NDEBUG diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index 299bcc4..3ed1045 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -176,9 +176,7 @@ STATISTIC(NumNodeOrderPostRA, STATISTIC(NumFirstValidPostRA, "Number of scheduling units chosen for FirstValid heuristic post-RA"); -namespace llvm { - -cl::opt<MISched::Direction> PreRADirection( +cl::opt<MISched::Direction> llvm::PreRADirection( "misched-prera-direction", cl::Hidden, cl::desc("Pre reg-alloc list scheduling direction"), cl::init(MISched::Unspecified), @@ -206,33 +204,31 @@ static cl::opt<bool> DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout")); -cl::opt<bool> VerifyScheduling( +cl::opt<bool> llvm::VerifyScheduling( "verify-misched", cl::Hidden, cl::desc("Verify machine instrs before and after machine scheduling")); #ifndef NDEBUG -cl::opt<bool> ViewMISchedDAGs( +cl::opt<bool> llvm::ViewMISchedDAGs( "view-misched-dags", cl::Hidden, cl::desc("Pop up a window to show MISched dags after they are processed")); -cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden, - cl::desc("Print schedule DAGs")); -cl::opt<bool> MISchedDumpReservedCycles( +cl::opt<bool> llvm::PrintDAGs("misched-print-dags", cl::Hidden, + cl::desc("Print schedule DAGs")); +static cl::opt<bool> MISchedDumpReservedCycles( "misched-dump-reserved-cycles", cl::Hidden, cl::init(false), cl::desc("Dump resource usage at schedule boundary.")); -cl::opt<bool> MischedDetailResourceBooking( +static cl::opt<bool> MischedDetailResourceBooking( "misched-detail-resource-booking", cl::Hidden, cl::init(false), cl::desc("Show details of invoking getNextResoufceCycle.")); #else -const bool ViewMISchedDAGs = false; -const bool PrintDAGs = false; -const bool MischedDetailResourceBooking = false; +const bool llvm::ViewMISchedDAGs = false; +const bool llvm::PrintDAGs = false; +static const bool MischedDetailResourceBooking = false; #ifdef LLVM_ENABLE_DUMP -const bool MISchedDumpReservedCycles = false; +static const bool MISchedDumpReservedCycles = false; #endif // LLVM_ENABLE_DUMP #endif // NDEBUG -} // end namespace llvm - #ifndef NDEBUG /// In some situations a few uninteresting nodes depend on nearly all other /// nodes in the graph, provide a cutoff to hide them. @@ -2053,28 +2049,24 @@ public: } // end anonymous namespace -namespace llvm { - std::unique_ptr<ScheduleDAGMutation> -createLoadClusterDAGMutation(const TargetInstrInfo *TII, - const TargetRegisterInfo *TRI, - bool ReorderWhileClustering) { +llvm::createLoadClusterDAGMutation(const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI, + bool ReorderWhileClustering) { return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>( TII, TRI, ReorderWhileClustering) : nullptr; } std::unique_ptr<ScheduleDAGMutation> -createStoreClusterDAGMutation(const TargetInstrInfo *TII, - const TargetRegisterInfo *TRI, - bool ReorderWhileClustering) { +llvm::createStoreClusterDAGMutation(const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI, + bool ReorderWhileClustering) { return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>( TII, TRI, ReorderWhileClustering) : nullptr; } -} // end namespace llvm - // Sorting all the loads/stores first, then for each load/store, checking the // following load/store one by one, until reach the first non-dependent one and // call target hook to see if they can cluster. @@ -2304,16 +2296,12 @@ protected: } // end anonymous namespace -namespace llvm { - std::unique_ptr<ScheduleDAGMutation> -createCopyConstrainDAGMutation(const TargetInstrInfo *TII, - const TargetRegisterInfo *TRI) { +llvm::createCopyConstrainDAGMutation(const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI) { return std::make_unique<CopyConstrain>(TII, TRI); } -} // end namespace llvm - /// constrainLocalCopy handles two possibilities: /// 1) Local src: /// I0: = dst @@ -3445,14 +3433,13 @@ void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) { } #endif -namespace llvm { /// Return true if this heuristic determines order. /// TODO: Consider refactor return type of these functions as integer or enum, /// as we may need to differentiate whether TryCand is better than Cand. -bool tryLess(int TryVal, int CandVal, - GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - GenericSchedulerBase::CandReason Reason) { +bool llvm::tryLess(int TryVal, int CandVal, + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + GenericSchedulerBase::CandReason Reason) { if (TryVal < CandVal) { TryCand.Reason = Reason; return true; @@ -3465,10 +3452,10 @@ bool tryLess(int TryVal, int CandVal, return false; } -bool tryGreater(int TryVal, int CandVal, - GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - GenericSchedulerBase::CandReason Reason) { +bool llvm::tryGreater(int TryVal, int CandVal, + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + GenericSchedulerBase::CandReason Reason) { if (TryVal > CandVal) { TryCand.Reason = Reason; return true; @@ -3481,9 +3468,9 @@ bool tryGreater(int TryVal, int CandVal, return false; } -bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - SchedBoundary &Zone) { +bool llvm::tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + SchedBoundary &Zone) { if (Zone.isTop()) { // Prefer the candidate with the lesser depth, but only if one of them has // depth greater than the total latency scheduled so far, otherwise either @@ -3513,7 +3500,6 @@ bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, } return false; } -} // end namespace llvm static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop, bool IsPostRA = false) { @@ -3798,14 +3784,12 @@ void GenericScheduler::registerRoots() { } } -namespace llvm { -bool tryPressure(const PressureChange &TryP, - const PressureChange &CandP, - GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - GenericSchedulerBase::CandReason Reason, - const TargetRegisterInfo *TRI, - const MachineFunction &MF) { +bool llvm::tryPressure(const PressureChange &TryP, const PressureChange &CandP, + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + GenericSchedulerBase::CandReason Reason, + const TargetRegisterInfo *TRI, + const MachineFunction &MF) { // If one candidate decreases and the other increases, go with it. // Invalid candidates have UnitInc==0. if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand, @@ -3838,7 +3822,7 @@ bool tryPressure(const PressureChange &TryP, return tryGreater(TryRank, CandRank, TryCand, Cand, Reason); } -unsigned getWeakLeft(const SUnit *SU, bool isTop) { +unsigned llvm::getWeakLeft(const SUnit *SU, bool isTop) { return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft; } @@ -3849,7 +3833,7 @@ unsigned getWeakLeft(const SUnit *SU, bool isTop) { /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled /// with the operation that produces or consumes the physreg. We'll do this when /// regalloc has support for parallel copies. -int biasPhysReg(const SUnit *SU, bool isTop) { +int llvm::biasPhysReg(const SUnit *SU, bool isTop) { const MachineInstr *MI = SU->getInstr(); if (MI->isCopy()) { @@ -3884,7 +3868,6 @@ int biasPhysReg(const SUnit *SU, bool isTop) { return 0; } -} // end namespace llvm void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, @@ -4812,13 +4795,13 @@ static MachineSchedRegistry ShufflerRegistry( //===----------------------------------------------------------------------===// #ifndef NDEBUG -namespace llvm { -template<> struct GraphTraits< - ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {}; +template <> +struct llvm::GraphTraits<ScheduleDAGMI *> : public GraphTraits<ScheduleDAG *> { +}; -template<> -struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { +template <> +struct llvm::DOTGraphTraits<ScheduleDAGMI *> : public DefaultDOTGraphTraits { DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} static std::string getGraphName(const ScheduleDAG *G) { @@ -4878,7 +4861,6 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { } }; -} // end namespace llvm #endif // NDEBUG /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG diff --git a/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/llvm/lib/CodeGen/MachineTraceMetrics.cpp index c2d4aa0..9ac3f741 100644 --- a/llvm/lib/CodeGen/MachineTraceMetrics.cpp +++ b/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -485,10 +485,7 @@ struct LoopBounds { // Specialize po_iterator_storage in order to prune the post-order traversal so // it is limited to the current loop and doesn't traverse the loop back edges. -namespace llvm { - -template<> -class po_iterator_storage<LoopBounds, true> { +template <> class llvm::po_iterator_storage<LoopBounds, true> { LoopBounds &LB; public: @@ -519,8 +516,6 @@ public: } }; -} // end namespace llvm - /// Compute the trace through MBB. void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { LLVM_DEBUG(dbgs() << "Computing " << getName() << " trace through " diff --git a/llvm/lib/CodeGen/NonRelocatableStringpool.cpp b/llvm/lib/CodeGen/NonRelocatableStringpool.cpp index 087ac62..59c587c 100644 --- a/llvm/lib/CodeGen/NonRelocatableStringpool.cpp +++ b/llvm/lib/CodeGen/NonRelocatableStringpool.cpp @@ -9,7 +9,7 @@ #include "llvm/CodeGen/NonRelocatableStringpool.h" #include "llvm/ADT/STLExtras.h" -namespace llvm { +using namespace llvm; DwarfStringPoolEntryRef NonRelocatableStringpool::getEntry(StringRef S) { auto I = Strings.try_emplace(S); @@ -43,5 +43,3 @@ NonRelocatableStringpool::getEntriesForEmission() const { }); return Result; } - -} // namespace llvm diff --git a/llvm/lib/CodeGen/SafeStack.cpp b/llvm/lib/CodeGen/SafeStack.cpp index 6f373a5..e9ffa85 100644 --- a/llvm/lib/CodeGen/SafeStack.cpp +++ b/llvm/lib/CodeGen/SafeStack.cpp @@ -76,8 +76,6 @@ using namespace llvm::safestack; #define DEBUG_TYPE "safe-stack" -namespace llvm { - STATISTIC(NumFunctions, "Total number of functions"); STATISTIC(NumUnsafeStackFunctions, "Number of functions with unsafe stack"); STATISTIC(NumUnsafeStackRestorePointsFunctions, @@ -89,8 +87,6 @@ STATISTIC(NumUnsafeDynamicAllocas, "Number of unsafe dynamic allocas"); STATISTIC(NumUnsafeByValArguments, "Number of unsafe byval arguments"); STATISTIC(NumUnsafeStackRestorePoints, "Number of setjmps and landingpads"); -} // namespace llvm - /// Use __safestack_pointer_address even if the platform has a faster way of /// access safe stack pointer. static cl::opt<bool> diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index eae2e8c..3268c26 100644 --- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -1551,14 +1551,10 @@ LLVM_DUMP_METHOD void ILPValue::dump() const { dbgs() << *this << '\n'; } -namespace llvm { - LLVM_ATTRIBUTE_UNUSED -raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) { +raw_ostream &llvm::operator<<(raw_ostream &OS, const ILPValue &Val) { Val.print(OS); return OS; } -} // end namespace llvm - #endif diff --git a/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp index e7b1494..c80eade 100644 --- a/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -16,57 +16,51 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -namespace llvm { - template<> - struct DOTGraphTraits<ScheduleDAG*> : public DefaultDOTGraphTraits { +template <> +struct llvm::DOTGraphTraits<ScheduleDAG *> : public DefaultDOTGraphTraits { - DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} + DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} - static std::string getGraphName(const ScheduleDAG *G) { - return std::string(G->MF.getName()); - } + static std::string getGraphName(const ScheduleDAG *G) { + return std::string(G->MF.getName()); + } - static bool renderGraphFromBottomUp() { - return true; - } + static bool renderGraphFromBottomUp() { return true; } - static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) { - return (Node->NumPreds > 10 || Node->NumSuccs > 10); - } + static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) { + return (Node->NumPreds > 10 || Node->NumSuccs > 10); + } - static std::string getNodeIdentifierLabel(const SUnit *Node, - const ScheduleDAG *Graph) { - std::string R; - raw_string_ostream OS(R); - OS << static_cast<const void *>(Node); - return R; - } + static std::string getNodeIdentifierLabel(const SUnit *Node, + const ScheduleDAG *Graph) { + std::string R; + raw_string_ostream OS(R); + OS << static_cast<const void *>(Node); + return R; + } - /// If you want to override the dot attributes printed for a particular - /// edge, override this method. - static std::string getEdgeAttributes(const SUnit *Node, - SUnitIterator EI, - const ScheduleDAG *Graph) { - if (EI.isArtificialDep()) - return "color=cyan,style=dashed"; - if (EI.isCtrlDep()) - return "color=blue,style=dashed"; - return ""; - } + /// If you want to override the dot attributes printed for a particular + /// edge, override this method. + static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, + const ScheduleDAG *Graph) { + if (EI.isArtificialDep()) + return "color=cyan,style=dashed"; + if (EI.isCtrlDep()) + return "color=blue,style=dashed"; + return ""; + } + std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *Graph); + static std::string getNodeAttributes(const SUnit *N, + const ScheduleDAG *Graph) { + return "shape=Mrecord"; + } - std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *Graph); - static std::string getNodeAttributes(const SUnit *N, - const ScheduleDAG *Graph) { - return "shape=Mrecord"; - } - - static void addCustomGraphFeatures(ScheduleDAG *G, - GraphWriter<ScheduleDAG*> &GW) { - return G->addCustomGraphFeatures(GW); - } - }; -} + static void addCustomGraphFeatures(ScheduleDAG *G, + GraphWriter<ScheduleDAG *> &GW) { + return G->addCustomGraphFeatures(GW); + } +}; std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU, const ScheduleDAG *G) { diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index b1accdd..e153842 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -509,6 +509,7 @@ namespace { SDValue visitFMUL(SDNode *N); template <class MatchContextClass> SDValue visitFMA(SDNode *N); SDValue visitFMAD(SDNode *N); + SDValue visitFMULADD(SDNode *N); SDValue visitFDIV(SDNode *N); SDValue visitFREM(SDNode *N); SDValue visitFSQRT(SDNode *N); @@ -1991,6 +1992,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FMUL: return visitFMUL(N); case ISD::FMA: return visitFMA<EmptyMatchContext>(N); case ISD::FMAD: return visitFMAD(N); + case ISD::FMULADD: return visitFMULADD(N); case ISD::FDIV: return visitFDIV(N); case ISD::FREM: return visitFREM(N); case ISD::FSQRT: return visitFSQRT(N); @@ -18444,6 +18446,21 @@ SDValue DAGCombiner::visitFMAD(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitFMULADD(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + EVT VT = N->getValueType(0); + SDLoc DL(N); + + // Constant fold FMULADD. + if (SDValue C = + DAG.FoldConstantArithmetic(ISD::FMULADD, DL, VT, {N0, N1, N2})) + return C; + + return SDValue(); +} + // Combine multiple FDIVs with the same divisor into multiple FMULs by the // reciprocal. // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 08af74c..4512c5c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5786,6 +5786,7 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, case ISD::FCOPYSIGN: case ISD::FMA: case ISD::FMAD: + case ISD::FMULADD: case ISD::FP_EXTEND: case ISD::FP_TO_SINT_SAT: case ISD::FP_TO_UINT_SAT: @@ -5904,6 +5905,7 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, case ISD::FCOSH: case ISD::FTANH: case ISD::FMA: + case ISD::FMULADD: case ISD::FMAD: { if (SNaN) return true; @@ -7231,7 +7233,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, } // Handle fma/fmad special cases. - if (Opcode == ISD::FMA || Opcode == ISD::FMAD) { + if (Opcode == ISD::FMA || Opcode == ISD::FMAD || Opcode == ISD::FMULADD) { assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); assert(Ops[0].getValueType() == VT && Ops[1].getValueType() == VT && Ops[2].getValueType() == VT && "FMA types must match!"); @@ -7242,7 +7244,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, APFloat V1 = C1->getValueAPF(); const APFloat &V2 = C2->getValueAPF(); const APFloat &V3 = C3->getValueAPF(); - if (Opcode == ISD::FMAD) { + if (Opcode == ISD::FMAD || Opcode == ISD::FMULADD) { V1.multiply(V2, APFloat::rmNearestTiesToEven); V1.add(V3, APFloat::rmNearestTiesToEven); } else @@ -11844,25 +11846,38 @@ SDValue SelectionDAG::getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, /// getNodeIfExists - Get the specified node if it's already available, or /// else return NULL. SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, - ArrayRef<SDValue> Ops) { + ArrayRef<SDValue> Ops, + bool AllowCommute) { SDNodeFlags Flags; if (Inserter) Flags = Inserter->getFlags(); - return getNodeIfExists(Opcode, VTList, Ops, Flags); + return getNodeIfExists(Opcode, VTList, Ops, Flags, AllowCommute); } SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef<SDValue> Ops, - const SDNodeFlags Flags) { - if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) { + const SDNodeFlags Flags, + bool AllowCommute) { + if (VTList.VTs[VTList.NumVTs - 1] == MVT::Glue) + return nullptr; + + auto Lookup = [&](ArrayRef<SDValue> LookupOps) -> SDNode * { FoldingSetNodeID ID; - AddNodeIDNode(ID, Opcode, VTList, Ops); + AddNodeIDNode(ID, Opcode, VTList, LookupOps); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, SDLoc(), IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) { E->intersectFlagsWith(Flags); return E; } - } + return nullptr; + }; + + if (SDNode *Existing = Lookup(Ops)) + return Existing; + + if (AllowCommute && TLI->isCommutativeBinOp(Opcode)) + return Lookup({Ops[1], Ops[0]}); + return nullptr; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index c21890a..0f2b518 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6996,6 +6996,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)), Flags)); + } else if (TLI.isOperationLegalOrCustom(ISD::FMULADD, VT)) { + // TODO: Support splitting the vector. + setValue(&I, DAG.getNode(ISD::FMULADD, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)), Flags)); } else { // TODO: Intrinsic calls should have fast-math-flags. SDValue Mul = DAG.getNode( diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index fcfbfe6..39cbfad 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -310,6 +310,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FMA: return "fma"; case ISD::STRICT_FMA: return "strict_fma"; case ISD::FMAD: return "fmad"; + case ISD::FMULADD: return "fmuladd"; case ISD::FREM: return "frem"; case ISD::STRICT_FREM: return "strict_frem"; case ISD::FCOPYSIGN: return "fcopysign"; diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index cc503d3..920dff9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -7676,6 +7676,7 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, break; } case ISD::FMA: + case ISD::FMULADD: case ISD::FMAD: { if (!Flags.hasNoSignedZeros()) break; diff --git a/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp b/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp index 64e5cd5..95a9c3f 100644 --- a/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp +++ b/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp @@ -306,10 +306,7 @@ char &llvm::StackFrameLayoutAnalysisPassID = StackFrameLayoutAnalysisLegacy::ID; INITIALIZE_PASS(StackFrameLayoutAnalysisLegacy, "stack-frame-layout", "Stack Frame Layout", false, false) -namespace llvm { /// Returns a newly-created StackFrameLayout pass. -MachineFunctionPass *createStackFrameLayoutAnalysisPass() { +MachineFunctionPass *llvm::createStackFrameLayoutAnalysisPass() { return new StackFrameLayoutAnalysisLegacy(); } - -} // namespace llvm diff --git a/llvm/lib/CodeGen/StaticDataAnnotator.cpp b/llvm/lib/CodeGen/StaticDataAnnotator.cpp index 53a9ab4..eac20120 100644 --- a/llvm/lib/CodeGen/StaticDataAnnotator.cpp +++ b/llvm/lib/CodeGen/StaticDataAnnotator.cpp @@ -75,22 +75,11 @@ bool StaticDataAnnotator::runOnModule(Module &M) { bool Changed = false; for (auto &GV : M.globals()) { - if (GV.isDeclarationForLinker()) + if (!llvm::memprof::IsAnnotationOK(GV)) continue; - // The implementation below assumes prior passes don't set section prefixes, - // and specifically do 'assign' rather than 'update'. So report error if a - // section prefix is already set. - if (auto maybeSectionPrefix = GV.getSectionPrefix(); - maybeSectionPrefix && !maybeSectionPrefix->empty()) - llvm::report_fatal_error("Global variable " + GV.getName() + - " already has a section prefix " + - *maybeSectionPrefix); - StringRef SectionPrefix = SDPI->getConstantSectionPrefix(&GV, PSI); - if (SectionPrefix.empty()) - continue; - + // setSectionPrefix returns true if the section prefix is updated. Changed |= GV.setSectionPrefix(SectionPrefix); } diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp index e22dc25..1593a40 100644 --- a/llvm/lib/CodeGen/StaticDataSplitter.cpp +++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp @@ -130,10 +130,8 @@ StaticDataSplitter::getConstant(const MachineOperand &Op, if (Op.isGlobal()) { // Find global variables with local linkage. const GlobalVariable *GV = getLocalLinkageGlobalVariable(Op.getGlobal()); - // Skip 'llvm.'-prefixed global variables conservatively because they are - // often handled specially, and skip those not in static data - // sections. - if (!GV || GV->getName().starts_with("llvm.") || + // Skip those not eligible for annotation or not in static data sections. + if (!GV || !llvm::memprof::IsAnnotationOK(*GV) || !inStaticDataSection(*GV, TM)) return nullptr; return GV; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index c23281a..060b1dd 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -815,7 +815,8 @@ void TargetLoweringBase::initActions() { ISD::FTAN, ISD::FACOS, ISD::FASIN, ISD::FATAN, ISD::FCOSH, ISD::FSINH, - ISD::FTANH, ISD::FATAN2}, + ISD::FTANH, ISD::FATAN2, + ISD::FMULADD}, VT, Expand); // Overflow operations default to expand diff --git a/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/llvm/lib/CodeGen/TargetRegisterInfo.cpp index c9e4618..971f822 100644 --- a/llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ b/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -102,10 +102,8 @@ bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet, return true; } -namespace llvm { - -Printable printReg(Register Reg, const TargetRegisterInfo *TRI, - unsigned SubIdx, const MachineRegisterInfo *MRI) { +Printable llvm::printReg(Register Reg, const TargetRegisterInfo *TRI, + unsigned SubIdx, const MachineRegisterInfo *MRI) { return Printable([Reg, TRI, SubIdx, MRI](raw_ostream &OS) { if (!Reg) OS << "$noreg"; @@ -135,7 +133,7 @@ Printable printReg(Register Reg, const TargetRegisterInfo *TRI, }); } -Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) { +Printable llvm::printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) { return Printable([Unit, TRI](raw_ostream &OS) { // Generic printout when TRI is missing. if (!TRI) { @@ -158,7 +156,7 @@ Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) { }); } -Printable printVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) { +Printable llvm::printVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) { return Printable([Unit, TRI](raw_ostream &OS) { if (Register::isVirtualRegister(Unit)) { OS << '%' << Register(Unit).virtRegIndex(); @@ -168,8 +166,9 @@ Printable printVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) { }); } -Printable printRegClassOrBank(Register Reg, const MachineRegisterInfo &RegInfo, - const TargetRegisterInfo *TRI) { +Printable llvm::printRegClassOrBank(Register Reg, + const MachineRegisterInfo &RegInfo, + const TargetRegisterInfo *TRI) { return Printable([Reg, &RegInfo, TRI](raw_ostream &OS) { if (RegInfo.getRegClassOrNull(Reg)) OS << StringRef(TRI->getRegClassName(RegInfo.getRegClass(Reg))).lower(); @@ -183,8 +182,6 @@ Printable printRegClassOrBank(Register Reg, const MachineRegisterInfo &RegInfo, }); } -} // end namespace llvm - /// getAllocatableClass - Return the maximal subclass of the given register /// class that is alloctable, or NULL. const TargetRegisterClass * diff --git a/llvm/lib/ExecutionEngine/Orc/MapperJITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/MapperJITLinkMemoryManager.cpp index 33734b8..bb8d2cb 100644 --- a/llvm/lib/ExecutionEngine/Orc/MapperJITLinkMemoryManager.cpp +++ b/llvm/lib/ExecutionEngine/Orc/MapperJITLinkMemoryManager.cpp @@ -90,7 +90,7 @@ void MapperJITLinkMemoryManager::allocate(const JITLinkDylib *JD, LinkGraph &G, auto TotalSize = Seg.ContentSize + Seg.ZeroFillSize; Seg.Addr = NextSegAddr; - Seg.WorkingMem = Mapper->prepare(NextSegAddr, TotalSize); + Seg.WorkingMem = Mapper->prepare(G, NextSegAddr, TotalSize); NextSegAddr += alignTo(TotalSize, Mapper->getPageSize()); diff --git a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp index ea3b22a..7b327af 100644 --- a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp +++ b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp @@ -58,7 +58,8 @@ void InProcessMemoryMapper::reserve(size_t NumBytes, ExecutorAddrRange(ExecutorAddr::fromPtr(MB.base()), MB.allocatedSize())); } -char *InProcessMemoryMapper::prepare(ExecutorAddr Addr, size_t ContentSize) { +char *InProcessMemoryMapper::prepare(jitlink::LinkGraph &G, ExecutorAddr Addr, + size_t ContentSize) { return Addr.toPtr<char *>(); } @@ -324,7 +325,8 @@ void SharedMemoryMapper::reserve(size_t NumBytes, #endif } -char *SharedMemoryMapper::prepare(ExecutorAddr Addr, size_t ContentSize) { +char *SharedMemoryMapper::prepare(jitlink::LinkGraph &G, ExecutorAddr Addr, + size_t ContentSize) { auto R = Reservations.upper_bound(Addr); assert(R != Reservations.begin() && "Attempt to prepare unreserved range"); R--; diff --git a/llvm/lib/IR/ConstantFPRange.cpp b/llvm/lib/IR/ConstantFPRange.cpp index 51d2e21..5b87686 100644 --- a/llvm/lib/IR/ConstantFPRange.cpp +++ b/llvm/lib/IR/ConstantFPRange.cpp @@ -8,6 +8,7 @@ #include "llvm/IR/ConstantFPRange.h" #include "llvm/ADT/APFloat.h" +#include "llvm/ADT/FloatingPointMode.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include <cassert> @@ -506,3 +507,168 @@ ConstantFPRange ConstantFPRange::sub(const ConstantFPRange &Other) const { // fsub X, Y = fadd X, (fneg Y) return add(Other.negate()); } + +void ConstantFPRange::flushDenormals(DenormalMode::DenormalModeKind Mode) { + if (Mode == DenormalMode::IEEE) + return; + FPClassTest Class = classify(); + if (!(Class & fcSubnormal)) + return; + + auto &Sem = getSemantics(); + // PreserveSign: PosSubnormal -> PosZero, NegSubnormal -> NegZero + // PositiveZero: PosSubnormal -> PosZero, NegSubnormal -> PosZero + // Dynamic: PosSubnormal -> PosZero, NegSubnormal -> NegZero/PosZero + bool ZeroLowerNegative = + Mode != DenormalMode::PositiveZero && (Class & fcNegSubnormal); + bool ZeroUpperNegative = + Mode == DenormalMode::PreserveSign && !(Class & fcPosSubnormal); + assert((ZeroLowerNegative || !ZeroUpperNegative) && + "ZeroLower is greater than ZeroUpper."); + Lower = minnum(Lower, APFloat::getZero(Sem, ZeroLowerNegative)); + Upper = maxnum(Upper, APFloat::getZero(Sem, ZeroUpperNegative)); +} + +/// Represent a contiguous range of values sharing the same sign. +struct SameSignRange { + bool HasZero; + bool HasNonZero; + bool HasInf; + // The lower and upper bounds of the range (inclusive). + // The sign is dropped and infinities are excluded. + std::optional<std::pair<APFloat, APFloat>> FinitePart; + + explicit SameSignRange(const APFloat &Lower, const APFloat &Upper) + : HasZero(Lower.isZero()), HasNonZero(!Upper.isZero()), + HasInf(Upper.isInfinity()) { + assert(!Lower.isNegative() && !Upper.isNegative() && + "The sign should be dropped."); + assert(strictCompare(Lower, Upper) != APFloat::cmpGreaterThan && + "Empty set."); + if (!Lower.isInfinity()) + FinitePart = {Lower, + HasInf ? APFloat::getLargest(Lower.getSemantics()) : Upper}; + } +}; + +/// Split the range into positive and negative components. +static void splitPosNeg(const APFloat &Lower, const APFloat &Upper, + std::optional<SameSignRange> &NegPart, + std::optional<SameSignRange> &PosPart) { + assert(strictCompare(Lower, Upper) != APFloat::cmpGreaterThan && + "Non-NaN part is empty."); + if (Lower.isNegative() == Upper.isNegative()) { + if (Lower.isNegative()) + NegPart = SameSignRange{abs(Upper), abs(Lower)}; + else + PosPart = SameSignRange{Lower, Upper}; + return; + } + auto &Sem = Lower.getSemantics(); + NegPart = SameSignRange{APFloat::getZero(Sem), abs(Lower)}; + PosPart = SameSignRange{APFloat::getZero(Sem), Upper}; +} + +ConstantFPRange ConstantFPRange::mul(const ConstantFPRange &Other) const { + auto &Sem = getSemantics(); + bool ResMayBeQNaN = ((MayBeQNaN || MayBeSNaN) && !Other.isEmptySet()) || + ((Other.MayBeQNaN || Other.MayBeSNaN) && !isEmptySet()); + if (isNaNOnly() || Other.isNaNOnly()) + return getNaNOnly(Sem, /*MayBeQNaN=*/ResMayBeQNaN, + /*MayBeSNaN=*/false); + std::optional<SameSignRange> LHSNeg, LHSPos, RHSNeg, RHSPos; + splitPosNeg(Lower, Upper, LHSNeg, LHSPos); + splitPosNeg(Other.Lower, Other.Upper, RHSNeg, RHSPos); + APFloat ResLower = APFloat::getInf(Sem, /*Negative=*/false); + APFloat ResUpper = APFloat::getInf(Sem, /*Negative=*/true); + auto Update = [&](std::optional<SameSignRange> &LHS, + std::optional<SameSignRange> &RHS, bool Negative) { + if (!LHS || !RHS) + return; + // 0 * inf = QNaN + ResMayBeQNaN |= LHS->HasZero && RHS->HasInf; + ResMayBeQNaN |= RHS->HasZero && LHS->HasInf; + // NonZero * inf = inf + if ((LHS->HasInf && RHS->HasNonZero) || (RHS->HasInf && LHS->HasNonZero)) + (Negative ? ResLower : ResUpper) = APFloat::getInf(Sem, Negative); + // Finite * Finite + if (LHS->FinitePart && RHS->FinitePart) { + APFloat NewLower = LHS->FinitePart->first * RHS->FinitePart->first; + APFloat NewUpper = LHS->FinitePart->second * RHS->FinitePart->second; + if (Negative) { + ResLower = minnum(ResLower, -NewUpper); + ResUpper = maxnum(ResUpper, -NewLower); + } else { + ResLower = minnum(ResLower, NewLower); + ResUpper = maxnum(ResUpper, NewUpper); + } + } + }; + Update(LHSNeg, RHSNeg, /*Negative=*/false); + Update(LHSNeg, RHSPos, /*Negative=*/true); + Update(LHSPos, RHSNeg, /*Negative=*/true); + Update(LHSPos, RHSPos, /*Negative=*/false); + return ConstantFPRange(ResLower, ResUpper, ResMayBeQNaN, /*MayBeSNaN=*/false); +} + +ConstantFPRange ConstantFPRange::div(const ConstantFPRange &Other) const { + auto &Sem = getSemantics(); + bool ResMayBeQNaN = ((MayBeQNaN || MayBeSNaN) && !Other.isEmptySet()) || + ((Other.MayBeQNaN || Other.MayBeSNaN) && !isEmptySet()); + if (isNaNOnly() || Other.isNaNOnly()) + return getNaNOnly(Sem, /*MayBeQNaN=*/ResMayBeQNaN, + /*MayBeSNaN=*/false); + std::optional<SameSignRange> LHSNeg, LHSPos, RHSNeg, RHSPos; + splitPosNeg(Lower, Upper, LHSNeg, LHSPos); + splitPosNeg(Other.Lower, Other.Upper, RHSNeg, RHSPos); + APFloat ResLower = APFloat::getInf(Sem, /*Negative=*/false); + APFloat ResUpper = APFloat::getInf(Sem, /*Negative=*/true); + auto Update = [&](std::optional<SameSignRange> &LHS, + std::optional<SameSignRange> &RHS, bool Negative) { + if (!LHS || !RHS) + return; + // inf / inf = QNaN 0 / 0 = QNaN + ResMayBeQNaN |= LHS->HasInf && RHS->HasInf; + ResMayBeQNaN |= LHS->HasZero && RHS->HasZero; + // It is not straightforward to infer HasNonZeroFinite = HasFinite && + // HasNonZero. By definitions we have: + // HasFinite = HasNonZeroFinite || HasZero + // HasNonZero = HasNonZeroFinite || HasInf + // Since the range is contiguous, if both HasFinite and HasNonZero are true, + // HasNonZeroFinite must be true. + bool LHSHasNonZeroFinite = LHS->FinitePart && LHS->HasNonZero; + bool RHSHasNonZeroFinite = RHS->FinitePart && RHS->HasNonZero; + // inf / Finite = inf FiniteNonZero / 0 = inf + if ((LHS->HasInf && RHS->FinitePart) || + (LHSHasNonZeroFinite && RHS->HasZero)) + (Negative ? ResLower : ResUpper) = APFloat::getInf(Sem, Negative); + // Finite / inf = 0 + if (LHS->FinitePart && RHS->HasInf) { + APFloat Zero = APFloat::getZero(Sem, /*Negative=*/Negative); + ResLower = minnum(ResLower, Zero); + ResUpper = maxnum(ResUpper, Zero); + } + // Finite / FiniteNonZero + if (LHS->FinitePart && RHSHasNonZeroFinite) { + assert(!RHS->FinitePart->second.isZero() && + "Divisor should be non-zero."); + APFloat NewLower = LHS->FinitePart->first / RHS->FinitePart->second; + APFloat NewUpper = LHS->FinitePart->second / + (RHS->FinitePart->first.isZero() + ? APFloat::getSmallest(Sem, /*Negative=*/false) + : RHS->FinitePart->first); + if (Negative) { + ResLower = minnum(ResLower, -NewUpper); + ResUpper = maxnum(ResUpper, -NewLower); + } else { + ResLower = minnum(ResLower, NewLower); + ResUpper = maxnum(ResUpper, NewUpper); + } + } + }; + Update(LHSNeg, RHSNeg, /*Negative=*/false); + Update(LHSNeg, RHSPos, /*Negative=*/true); + Update(LHSPos, RHSNeg, /*Negative=*/true); + Update(LHSPos, RHSPos, /*Negative=*/false); + return ConstantFPRange(ResLower, ResUpper, ResMayBeQNaN, /*MayBeSNaN=*/false); +} diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp index 2c2950c..cbce8bd 100644 --- a/llvm/lib/IR/Constants.cpp +++ b/llvm/lib/IR/Constants.cpp @@ -667,8 +667,11 @@ Constant::PossibleRelocationsTy Constant::getRelocationInfo() const { if (CE->getOpcode() == Instruction::Sub) { ConstantExpr *LHS = dyn_cast<ConstantExpr>(CE->getOperand(0)); ConstantExpr *RHS = dyn_cast<ConstantExpr>(CE->getOperand(1)); - if (LHS && RHS && LHS->getOpcode() == Instruction::PtrToInt && - RHS->getOpcode() == Instruction::PtrToInt) { + if (LHS && RHS && + (LHS->getOpcode() == Instruction::PtrToInt || + LHS->getOpcode() == Instruction::PtrToAddr) && + (RHS->getOpcode() == Instruction::PtrToInt || + RHS->getOpcode() == Instruction::PtrToAddr)) { Constant *LHSOp0 = LHS->getOperand(0); Constant *RHSOp0 = RHS->getOperand(0); diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp index 3f1cc1e..27d8294 100644 --- a/llvm/lib/IR/Core.cpp +++ b/llvm/lib/IR/Core.cpp @@ -4098,15 +4098,8 @@ LLVMValueRef LLVMBuildGlobalStringPtr(LLVMBuilderRef B, const char *Str, return wrap(unwrap(B)->CreateGlobalString(Str, Name)); } -LLVMBool LLVMGetVolatile(LLVMValueRef MemAccessInst) { - Value *P = unwrap(MemAccessInst); - if (LoadInst *LI = dyn_cast<LoadInst>(P)) - return LI->isVolatile(); - if (StoreInst *SI = dyn_cast<StoreInst>(P)) - return SI->isVolatile(); - if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(P)) - return AI->isVolatile(); - return cast<AtomicCmpXchgInst>(P)->isVolatile(); +LLVMBool LLVMGetVolatile(LLVMValueRef Inst) { + return cast<Instruction>(unwrap(Inst))->isVolatile(); } void LLVMSetVolatile(LLVMValueRef MemAccessInst, LLVMBool isVolatile) { diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp index 614c3a9..15c0198 100644 --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -25,6 +25,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/NoFolder.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/ProfDataUtils.h" #include "llvm/IR/Statepoint.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" @@ -1002,6 +1003,18 @@ CallInst *IRBuilderBase::CreateConstrainedFPCall( return C; } +Value *IRBuilderBase::CreateSelectWithUnknownProfile(Value *C, Value *True, + Value *False, + StringRef PassName, + const Twine &Name) { + Value *Ret = CreateSelectFMF(C, True, False, {}, Name); + if (auto *SI = dyn_cast<SelectInst>(Ret)) { + setExplicitlyUnknownBranchWeightsIfProfiled( + *SI, *SI->getParent()->getParent(), PassName); + } + return Ret; +} + Value *IRBuilderBase::CreateSelect(Value *C, Value *True, Value *False, const Twine &Name, Instruction *MDFrom) { return CreateSelectFMF(C, True, False, {}, Name, MDFrom); diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index 88e7c44..9060a89 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -2965,8 +2965,7 @@ unsigned CastInst::isEliminableCastPair(Instruction::CastOps firstOp, // zext, sext -> zext, because sext can't sign extend after zext return Instruction::ZExt; case 11: { - // inttoptr, ptrtoint/ptrtoaddr -> bitcast if SrcSize<=PtrSize/AddrSize - // and SrcSize==DstSize + // inttoptr, ptrtoint/ptrtoaddr -> integer cast if (!DL) return 0; unsigned MidSize = secondOp == Instruction::PtrToAddr @@ -2974,10 +2973,15 @@ unsigned CastInst::isEliminableCastPair(Instruction::CastOps firstOp, : DL->getPointerTypeSizeInBits(MidTy); unsigned SrcSize = SrcTy->getScalarSizeInBits(); unsigned DstSize = DstTy->getScalarSizeInBits(); - // TODO: Could also produce zext or trunc here. - if (SrcSize <= MidSize && SrcSize == DstSize) - return Instruction::BitCast; - return 0; + // If the middle size is smaller than both source and destination, + // an additional masking operation would be required. + if (MidSize < SrcSize && MidSize < DstSize) + return 0; + if (DstSize < SrcSize) + return Instruction::Trunc; + if (DstSize > SrcSize) + return Instruction::ZExt; + return Instruction::BitCast; } case 12: // addrspacecast, addrspacecast -> bitcast, if SrcAS == DstAS diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index c9ff86b..c79a950 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -893,7 +893,7 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) { if (GV.hasInitializer()) { const Constant *Init = GV.getInitializer(); const ConstantArray *InitArray = dyn_cast<ConstantArray>(Init); - Check(InitArray, "wrong initalizer for intrinsic global variable", + Check(InitArray, "wrong initializer for intrinsic global variable", Init); for (Value *Op : InitArray->operands()) { Value *V = Op->stripPointerCasts(); diff --git a/llvm/lib/Support/DebugCounter.cpp b/llvm/lib/Support/DebugCounter.cpp index 6b65720..5ab1def 100644 --- a/llvm/lib/Support/DebugCounter.cpp +++ b/llvm/lib/Support/DebugCounter.cpp @@ -136,6 +136,13 @@ struct DebugCounterOwner : DebugCounter { cl::location(this->ShouldPrintCounter), cl::init(false), cl::desc("Print out debug counter info after all counters accumulated")}; + cl::opt<bool, true> PrintDebugCounterQueries{ + "print-debug-counter-queries", + cl::Hidden, + cl::Optional, + cl::location(this->ShouldPrintCounterQueries), + cl::init(false), + cl::desc("Print out each query of an enabled debug counter")}; cl::opt<bool, true> BreakOnLastCount{ "debug-counter-break-on-last", cl::Hidden, @@ -221,31 +228,40 @@ void DebugCounter::print(raw_ostream &OS) const { } } +bool DebugCounter::handleCounterIncrement(CounterInfo &Info) { + int64_t CurrCount = Info.Count++; + uint64_t CurrIdx = Info.CurrChunkIdx; + + if (Info.Chunks.empty()) + return true; + if (CurrIdx >= Info.Chunks.size()) + return false; + + bool Res = Info.Chunks[CurrIdx].contains(CurrCount); + if (BreakOnLast && CurrIdx == (Info.Chunks.size() - 1) && + CurrCount == Info.Chunks[CurrIdx].End) { + LLVM_BUILTIN_DEBUGTRAP; + } + if (CurrCount > Info.Chunks[CurrIdx].End) { + Info.CurrChunkIdx++; + + /// Handle consecutive blocks. + if (Info.CurrChunkIdx < Info.Chunks.size() && + CurrCount == Info.Chunks[Info.CurrChunkIdx].Begin) + return true; + } + return Res; +} + bool DebugCounter::shouldExecuteImpl(unsigned CounterName) { auto &Us = instance(); auto Result = Us.Counters.find(CounterName); if (Result != Us.Counters.end()) { auto &CounterInfo = Result->second; - int64_t CurrCount = CounterInfo.Count++; - uint64_t CurrIdx = CounterInfo.CurrChunkIdx; - - if (CounterInfo.Chunks.empty()) - return true; - if (CurrIdx >= CounterInfo.Chunks.size()) - return false; - - bool Res = CounterInfo.Chunks[CurrIdx].contains(CurrCount); - if (Us.BreakOnLast && CurrIdx == (CounterInfo.Chunks.size() - 1) && - CurrCount == CounterInfo.Chunks[CurrIdx].End) { - LLVM_BUILTIN_DEBUGTRAP; - } - if (CurrCount > CounterInfo.Chunks[CurrIdx].End) { - CounterInfo.CurrChunkIdx++; - - /// Handle consecutive blocks. - if (CounterInfo.CurrChunkIdx < CounterInfo.Chunks.size() && - CurrCount == CounterInfo.Chunks[CounterInfo.CurrChunkIdx].Begin) - return true; + bool Res = Us.handleCounterIncrement(CounterInfo); + if (Us.ShouldPrintCounterQueries && CounterInfo.IsSet) { + dbgs() << "DebugCounter " << Us.RegisteredCounters[CounterName] << "=" + << (CounterInfo.Count - 1) << (Res ? " execute" : " skip") << "\n"; } return Res; } diff --git a/llvm/lib/Support/VirtualOutputBackends.cpp b/llvm/lib/Support/VirtualOutputBackends.cpp index d6d7b87..de59b8a 100644 --- a/llvm/lib/Support/VirtualOutputBackends.cpp +++ b/llvm/lib/Support/VirtualOutputBackends.cpp @@ -498,7 +498,7 @@ Error OnDiskOutputFile::keep() { // Someone else owns the lock on this file, wait. switch (Lock.waitForUnlockFor(std::chrono::seconds(256))) { case WaitForUnlockResult::Success: - LLVM_FALLTHROUGH; + [[fallthrough]]; case WaitForUnlockResult::OwnerDied: { continue; // try again to get the lock. } diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp index 2ea3a24..afce803 100644 --- a/llvm/lib/TableGen/Record.cpp +++ b/llvm/lib/TableGen/Record.cpp @@ -1363,9 +1363,12 @@ const Init *BinOpInit::Fold(const Record *CurRec) const { } case LISTSPLAT: { const auto *Value = dyn_cast<TypedInit>(LHS); - const auto *Size = dyn_cast<IntInit>(RHS); - if (Value && Size) { - SmallVector<const Init *, 8> Args(Size->getValue(), Value); + const auto *Count = dyn_cast<IntInit>(RHS); + if (Value && Count) { + if (Count->getValue() < 0) + PrintFatalError(Twine("!listsplat count ") + Count->getAsString() + + " is negative"); + SmallVector<const Init *, 8> Args(Count->getValue(), Value); return ListInit::get(Args, Value->getType()); } break; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 6965116..9926a4d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -26196,9 +26196,10 @@ static SDValue performFlagSettingCombine(SDNode *N, return DCI.CombineTo(N, Res, SDValue(N, 1)); } - // Combine identical generic nodes into this node, re-using the result. + // Combine equivalent generic nodes into this node, re-using the result. if (SDNode *Generic = DCI.DAG.getNodeIfExists( - GenericOpcode, DCI.DAG.getVTList(VT), {LHS, RHS})) + GenericOpcode, DCI.DAG.getVTList(VT), {LHS, RHS}, + /*AllowCommute=*/true)) DCI.CombineTo(Generic, SDValue(N, 0)); return SDValue(); diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp index f110558..7e03b97 100644 --- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp +++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp @@ -1360,14 +1360,24 @@ void AArch64EpilogueEmitter::emitEpilogue() { } bool CombineSPBump = shouldCombineCSRLocalStackBump(NumBytes); - // Assume we can't combine the last pop with the sp restore. - bool CombineAfterCSRBump = false; + + unsigned ProloguePopSize = PrologueSaveSize; if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) { + // With CalleeSavesAboveFrameRecord ProloguePopSize is the amount of stack + // that needs to be popped until we reach the start of the SVE save area. + // The "FixedObject" stack occurs after the SVE area and must be popped + // later. + ProloguePopSize -= FixedObject; AfterCSRPopSize += FixedObject; - } else if (!CombineSPBump && PrologueSaveSize != 0) { + } + + // Assume we can't combine the last pop with the sp restore. + if (!CombineSPBump && ProloguePopSize != 0) { MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator()); while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION || - AArch64InstrInfo::isSEHInstruction(*Pop)) + AArch64InstrInfo::isSEHInstruction(*Pop) || + (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord && + isPartOfSVECalleeSaves(Pop))) Pop = std::prev(Pop); // Converting the last ldp to a post-index ldp is valid only if the last // ldp's offset is 0. @@ -1377,18 +1387,27 @@ void AArch64EpilogueEmitter::emitEpilogue() { // may clobber), convert it to a post-index ldp. if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) { convertCalleeSaveRestoreToSPPrePostIncDec( - Pop, DL, PrologueSaveSize, EmitCFI, MachineInstr::FrameDestroy, - PrologueSaveSize); + Pop, DL, ProloguePopSize, EmitCFI, MachineInstr::FrameDestroy, + ProloguePopSize); + } else if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) { + MachineBasicBlock::iterator AfterLastPop = std::next(Pop); + if (AArch64InstrInfo::isSEHInstruction(*AfterLastPop)) + ++AfterLastPop; + // If not, and CalleeSavesAboveFrameRecord is enabled, deallocate + // callee-save non-SVE registers to move the stack pointer to the start of + // the SVE area. + emitFrameOffset(MBB, AfterLastPop, DL, AArch64::SP, AArch64::SP, + StackOffset::getFixed(ProloguePopSize), TII, + MachineInstr::FrameDestroy, false, NeedsWinCFI, + &HasWinCFI); } else { - // If not, make sure to emit an add after the last ldp. + // Otherwise, make sure to emit an add after the last ldp. // We're doing this by transferring the size to be restored from the // adjustment *before* the CSR pops to the adjustment *after* the CSR // pops. - AfterCSRPopSize += PrologueSaveSize; - CombineAfterCSRBump = true; + AfterCSRPopSize += ProloguePopSize; } } - // Move past the restores of the callee-saved registers. // If we plan on combining the sp bump of the local stack size and the callee // save stack size, we might need to adjust the CSR save and restore offsets. @@ -1419,6 +1438,17 @@ void AArch64EpilogueEmitter::emitEpilogue() { --SEHEpilogueStartI; } + // Determine the ranges of SVE callee-saves. This is done before emitting any + // code at the end of the epilogue (for Swift async), which can get in the way + // of finding SVE callee-saves with CalleeSavesAboveFrameRecord. + auto [PPR, ZPR] = getSVEStackFrameSizes(); + auto [PPRRange, ZPRRange] = partitionSVECS( + MBB, + SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord + ? MBB.getFirstTerminator() + : FirstGPRRestoreI, + PPR.CalleeSavesSize, ZPR.CalleeSavesSize, /*IsEpilogue=*/true); + if (HasFP && AFI->hasSwiftAsyncContext()) emitSwiftAsyncContextFramePointer(EpilogueEndI, DL); @@ -1441,14 +1471,6 @@ void AArch64EpilogueEmitter::emitEpilogue() { NumBytes -= PrologueSaveSize; assert(NumBytes >= 0 && "Negative stack allocation size!?"); - auto [PPR, ZPR] = getSVEStackFrameSizes(); - auto [PPRRange, ZPRRange] = partitionSVECS( - MBB, - SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord - ? MBB.getFirstTerminator() - : FirstGPRRestoreI, - PPR.CalleeSavesSize, ZPR.CalleeSavesSize, /*IsEpilogue=*/true); - StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize; StackOffset SVEStackSize = SVECalleeSavesSize + PPR.LocalsSize + ZPR.LocalsSize; @@ -1467,16 +1489,6 @@ void AArch64EpilogueEmitter::emitEpilogue() { NeedsWinCFI, &HasWinCFI); } - // Deallocate callee-save non-SVE registers. - emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed(AFI->getCalleeSavedStackSize()), TII, - MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); - - // Deallocate fixed objects. - emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed(FixedObject), TII, - MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); - // Deallocate callee-save SVE registers. emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP, SVECalleeSavesSize, TII, MachineInstr::FrameDestroy, false, @@ -1619,7 +1631,7 @@ void AArch64EpilogueEmitter::emitEpilogue() { MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, StackOffset::getFixed(AfterCSRPopSize), TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI, EmitCFI, - StackOffset::getFixed(CombineAfterCSRBump ? PrologueSaveSize : 0)); + StackOffset::getFixed(AfterCSRPopSize - ArgumentStackToRestore)); } } diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index 12ddf47..53b00e8 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -273,7 +273,7 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) { EpilogueVectorizationMinVF = 8; MaxInterleaveFactor = 4; ScatterOverhead = 13; - LLVM_FALLTHROUGH; + [[fallthrough]]; case NeoverseN2: case NeoverseN3: PrefFunctionAlignment = Align(16); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp index dbe74b1..5700468 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp @@ -2394,15 +2394,19 @@ bool SchedGroup::canAddMI(const MachineInstr &MI) const { else if (((SGMask & SchedGroupMask::ALU) != SchedGroupMask::NONE) && (TII->isVALU(MI) || TII->isMFMAorWMMA(MI) || TII->isSALU(MI) || TII->isTRANS(MI))) - Result = true; + Result = !MI.mayLoadOrStore(); else if (((SGMask & SchedGroupMask::VALU) != SchedGroupMask::NONE) && - TII->isVALU(MI) && !TII->isMFMAorWMMA(MI) && !TII->isTRANS(MI)) - Result = true; + TII->isVALU(MI) && !TII->isMFMAorWMMA(MI) && !TII->isTRANS(MI)) { + // Some memory instructions may be marked as VALU (e.g. BUFFER_LOAD_*_LDS). + // For our purposes, these shall not be classified as VALU as this results + // in unexpected behavior. + Result = !MI.mayLoadOrStore(); + } else if (((SGMask & SchedGroupMask::SALU) != SchedGroupMask::NONE) && TII->isSALU(MI)) - Result = true; + Result = !MI.mayLoadOrStore(); else if (((SGMask & SchedGroupMask::MFMA) != SchedGroupMask::NONE) && TII->isMFMAorWMMA(MI)) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index a44af5f..1b559a6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2833,8 +2833,8 @@ SDValue AMDGPUTargetLowering::LowerFLOGCommon(SDValue Op, R = getMad(DAG, DL, VT, YH, CH, Mad1); } - const bool IsFiniteOnly = (Flags.hasNoNaNs() || Options.NoNaNsFPMath) && - (Flags.hasNoInfs() || Options.NoInfsFPMath); + const bool IsFiniteOnly = + (Flags.hasNoNaNs() || Options.NoNaNsFPMath) && Flags.hasNoInfs(); // TODO: Check if known finite from source value. if (!IsFiniteOnly) { @@ -3161,9 +3161,8 @@ SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const { DAG.getSetCC(SL, SetCCVT, X, UnderflowCheckConst, ISD::SETOLT); R = DAG.getNode(ISD::SELECT, SL, VT, Underflow, Zero, R); - const auto &Options = getTargetMachine().Options; - if (!Flags.hasNoInfs() && !Options.NoInfsFPMath) { + if (!Flags.hasNoInfs()) { SDValue OverflowCheckConst = DAG.getConstantFP(IsExp10 ? 0x1.344136p+5f : 0x1.62e430p+6f, SL, VT); SDValue Overflow = diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index ee466ca..596a895 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -3575,7 +3575,7 @@ bool AMDGPULegalizerInfo::legalizeFlogCommon(MachineInstr &MI, const bool IsFiniteOnly = (MI.getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath) && - (MI.getFlag(MachineInstr::FmNoInfs) || TM.Options.NoInfsFPMath); + MI.getFlag(MachineInstr::FmNoInfs); if (!IsFiniteOnly) { // Expand isfinite(x) => fabs(x) < inf @@ -3864,9 +3864,7 @@ bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI, R = B.buildSelect(Ty, Underflow, Zero, R); - const auto &Options = MF.getTarget().Options; - - if (!(Flags & MachineInstr::FmNoInfs) && !Options.NoInfsFPMath) { + if (!(Flags & MachineInstr::FmNoInfs)) { auto OverflowCheckConst = B.buildFConstant(Ty, IsExp10 ? 0x1.344136p+5f : 0x1.62e430p+6f); diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp index 71494be..4e11c4f 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp @@ -14,6 +14,7 @@ #include "GCNRegPressure.h" #include "AMDGPU.h" #include "SIMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/RegisterPressure.h" using namespace llvm; @@ -459,10 +460,14 @@ LaneBitmask llvm::getLiveLaneMask(const LiveInterval &LI, SlotIndex SI, GCNRPTracker::LiveRegSet llvm::getLiveRegs(SlotIndex SI, const LiveIntervals &LIS, - const MachineRegisterInfo &MRI) { + const MachineRegisterInfo &MRI, + GCNRegPressure::RegKind RegKind) { GCNRPTracker::LiveRegSet LiveRegs; for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { auto Reg = Register::index2VirtReg(I); + if (RegKind != GCNRegPressure::TOTAL_KINDS && + GCNRegPressure::getRegKind(Reg, MRI) != RegKind) + continue; if (!LIS.hasInterval(Reg)) continue; auto LiveMask = getLiveLaneMask(Reg, SI, LIS, MRI); @@ -986,3 +991,128 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) { #undef PFX } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void llvm::dumpMaxRegPressure(MachineFunction &MF, + GCNRegPressure::RegKind Kind, + LiveIntervals &LIS, + const MachineLoopInfo *MLI) { + + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); + auto &OS = dbgs(); + const char *RegName = GCNRegPressure::getName(Kind); + + unsigned MaxNumRegs = 0; + const MachineInstr *MaxPressureMI = nullptr; + GCNUpwardRPTracker RPT(LIS); + for (const MachineBasicBlock &MBB : MF) { + RPT.reset(MRI, LIS.getSlotIndexes()->getMBBEndIdx(&MBB).getPrevSlot()); + for (const MachineInstr &MI : reverse(MBB)) { + RPT.recede(MI); + unsigned NumRegs = RPT.getMaxPressure().getNumRegs(Kind); + if (NumRegs > MaxNumRegs) { + MaxNumRegs = NumRegs; + MaxPressureMI = &MI; + } + } + } + + SlotIndex MISlot = LIS.getInstructionIndex(*MaxPressureMI); + + // Max pressure can occur at either the early-clobber or register slot. + // Choose the maximum liveset between both slots. This is ugly but this is + // diagnostic code. + SlotIndex ECSlot = MISlot.getRegSlot(true); + SlotIndex RSlot = MISlot.getRegSlot(false); + GCNRPTracker::LiveRegSet ECLiveSet = getLiveRegs(ECSlot, LIS, MRI, Kind); + GCNRPTracker::LiveRegSet RLiveSet = getLiveRegs(RSlot, LIS, MRI, Kind); + unsigned ECNumRegs = getRegPressure(MRI, ECLiveSet).getNumRegs(Kind); + unsigned RNumRegs = getRegPressure(MRI, RLiveSet).getNumRegs(Kind); + GCNRPTracker::LiveRegSet *LiveSet = + ECNumRegs > RNumRegs ? &ECLiveSet : &RLiveSet; + SlotIndex MaxPressureSlot = ECNumRegs > RNumRegs ? ECSlot : RSlot; + assert(getRegPressure(MRI, *LiveSet).getNumRegs(Kind) == MaxNumRegs); + + // Split live registers into single-def and multi-def sets. + GCNRegPressure SDefPressure, MDefPressure; + SmallVector<Register, 16> SDefRegs, MDefRegs; + for (auto [Reg, LaneMask] : *LiveSet) { + assert(GCNRegPressure::getRegKind(Reg, MRI) == Kind); + LiveInterval &LI = LIS.getInterval(Reg); + if (LI.getNumValNums() == 1 || + (LI.hasSubRanges() && + llvm::all_of(LI.subranges(), [](const LiveInterval::SubRange &SR) { + return SR.getNumValNums() == 1; + }))) { + SDefPressure.inc(Reg, LaneBitmask::getNone(), LaneMask, MRI); + SDefRegs.push_back(Reg); + } else { + MDefPressure.inc(Reg, LaneBitmask::getNone(), LaneMask, MRI); + MDefRegs.push_back(Reg); + } + } + unsigned SDefNumRegs = SDefPressure.getNumRegs(Kind); + unsigned MDefNumRegs = MDefPressure.getNumRegs(Kind); + assert(SDefNumRegs + MDefNumRegs == MaxNumRegs); + + auto printLoc = [&](const MachineBasicBlock *MBB, SlotIndex SI) { + return Printable([&, MBB, SI](raw_ostream &OS) { + OS << SI << ':' << printMBBReference(*MBB); + if (MLI) + if (const MachineLoop *ML = MLI->getLoopFor(MBB)) + OS << " (LoopHdr " << printMBBReference(*ML->getHeader()) + << ", Depth " << ML->getLoopDepth() << ")"; + }); + }; + + auto PrintRegInfo = [&](Register Reg, LaneBitmask LiveMask) { + GCNRegPressure RegPressure; + RegPressure.inc(Reg, LaneBitmask::getNone(), LiveMask, MRI); + OS << " " << printReg(Reg, TRI) << ':' + << TRI->getRegClassName(MRI.getRegClass(Reg)) << ", LiveMask " + << PrintLaneMask(LiveMask) << " (" << RegPressure.getNumRegs(Kind) << ' ' + << RegName << "s)\n"; + + // Use std::map to sort def/uses by SlotIndex. + std::map<SlotIndex, const MachineInstr *> Instrs; + for (const MachineInstr &MI : MRI.reg_nodbg_instructions(Reg)) { + Instrs[LIS.getInstructionIndex(MI).getRegSlot()] = &MI; + } + + for (const auto &[SI, MI] : Instrs) { + OS << " "; + if (MI->definesRegister(Reg, TRI)) + OS << "def "; + if (MI->readsRegister(Reg, TRI)) + OS << "use "; + OS << printLoc(MI->getParent(), SI) << ": " << *MI; + } + }; + + OS << "\n*** Register pressure info (" << RegName << "s) for " << MF.getName() + << " ***\n"; + OS << "Max pressure is " << MaxNumRegs << ' ' << RegName << "s at " + << printLoc(MaxPressureMI->getParent(), MaxPressureSlot) << ": " + << *MaxPressureMI; + + OS << "\nLive registers with single definition (" << SDefNumRegs << ' ' + << RegName << "s):\n"; + + // Sort SDefRegs by number of uses (smallest first) + llvm::sort(SDefRegs, [&](Register A, Register B) { + return std::distance(MRI.use_nodbg_begin(A), MRI.use_nodbg_end()) < + std::distance(MRI.use_nodbg_begin(B), MRI.use_nodbg_end()); + }); + + for (const Register Reg : SDefRegs) { + PrintRegInfo(Reg, LiveSet->lookup(Reg)); + } + + OS << "\nLive registers with multiple definitions (" << MDefNumRegs << ' ' + << RegName << "s):\n"; + for (const Register Reg : MDefRegs) { + PrintRegInfo(Reg, LiveSet->lookup(Reg)); + } +} +#endif diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h index 898d1ff..979a8b0 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h @@ -31,6 +31,12 @@ class SlotIndex; struct GCNRegPressure { enum RegKind { SGPR, VGPR, AGPR, AVGPR, TOTAL_KINDS }; + static constexpr const char *getName(RegKind Kind) { + const char *Names[] = {"SGPR", "VGPR", "AGPR", "AVGPR"}; + assert(Kind < TOTAL_KINDS); + return Names[Kind]; + } + GCNRegPressure() { clear(); } @@ -41,6 +47,11 @@ struct GCNRegPressure { void clear() { std::fill(&Value[0], &Value[ValueArraySize], 0); } + unsigned getNumRegs(RegKind Kind) const { + assert(Kind < TOTAL_KINDS); + return Value[Kind]; + } + /// \returns the SGPR32 pressure unsigned getSGPRNum() const { return Value[SGPR]; } /// \returns the aggregated ArchVGPR32, AccVGPR32, and Pseudo AVGPR pressure @@ -138,6 +149,12 @@ struct GCNRegPressure { void dump() const; + static RegKind getRegKind(unsigned Reg, const MachineRegisterInfo &MRI) { + const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); + const SIRegisterInfo *STI = static_cast<const SIRegisterInfo *>(TRI); + return (RegKind)getRegKind(MRI.getRegClass(Reg), STI); + } + private: static constexpr unsigned ValueArraySize = TOTAL_KINDS * 2; @@ -294,8 +311,10 @@ public: } }; -GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS, - const MachineRegisterInfo &MRI); +GCNRPTracker::LiveRegSet +getLiveRegs(SlotIndex SI, const LiveIntervals &LIS, + const MachineRegisterInfo &MRI, + GCNRegPressure::RegKind RegKind = GCNRegPressure::TOTAL_KINDS); //////////////////////////////////////////////////////////////////////////////// // GCNUpwardRPTracker @@ -428,9 +447,6 @@ LaneBitmask getLiveLaneMask(const LiveInterval &LI, SlotIndex SI, const MachineRegisterInfo &MRI, LaneBitmask LaneMaskFilter = LaneBitmask::getAll()); -GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS, - const MachineRegisterInfo &MRI); - /// creates a map MachineInstr -> LiveRegSet /// R - range of iterators on instructions /// After - upon entry or exit of every instruction @@ -524,6 +540,11 @@ public: } }; +LLVM_ABI void dumpMaxRegPressure(MachineFunction &MF, + GCNRegPressure::RegKind Kind, + LiveIntervals &LIS, + const MachineLoopInfo *MLI); + } // end namespace llvm #endif // LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index bdc0810..58482ea 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -69,6 +69,21 @@ static cl::opt<bool> GCNTrackers( cl::desc("Use the AMDGPU specific RPTrackers during scheduling"), cl::init(false)); +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +#define DUMP_MAX_REG_PRESSURE +static cl::opt<bool> PrintMaxRPRegUsageBeforeScheduler( + "amdgpu-print-max-reg-pressure-regusage-before-scheduler", cl::Hidden, + cl::desc("Print a list of live registers along with their def/uses at the " + "point of maximum register pressure before scheduling."), + cl::init(false)); + +static cl::opt<bool> PrintMaxRPRegUsageAfterScheduler( + "amdgpu-print-max-reg-pressure-regusage-after-scheduler", cl::Hidden, + cl::desc("Print a list of live registers along with their def/uses at the " + "point of maximum register pressure after scheduling."), + cl::init(false)); +#endif + const unsigned ScheduleMetrics::ScaleFactor = 100; GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C) @@ -960,6 +975,14 @@ void GCNScheduleDAGMILive::runSchedStages() { RegionLiveOuts.buildLiveRegMap(); } +#ifdef DUMP_MAX_REG_PRESSURE + if (PrintMaxRPRegUsageBeforeScheduler) { + dumpMaxRegPressure(MF, GCNRegPressure::VGPR, *LIS, MLI); + dumpMaxRegPressure(MF, GCNRegPressure::SGPR, *LIS, MLI); + LIS->dump(); + } +#endif + GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl); while (S.advanceStage()) { auto Stage = createSchedStage(S.getCurrentStage()); @@ -995,6 +1018,14 @@ void GCNScheduleDAGMILive::runSchedStages() { Stage->finalizeGCNSchedStage(); } + +#ifdef DUMP_MAX_REG_PRESSURE + if (PrintMaxRPRegUsageAfterScheduler) { + dumpMaxRegPressure(MF, GCNRegPressure::VGPR, *LIS, MLI); + dumpMaxRegPressure(MF, GCNRegPressure::SGPR, *LIS, MLI); + LIS->dump(); + } +#endif } #ifndef NDEBUG diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index 64e34db..5f6d742 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -260,8 +260,12 @@ class NSAHelper { } class MIMGNSAHelper<int num_addrs, - list<RegisterClass> addr_types=!listsplat(VGPR_32, num_addrs)> - : NSAHelper<> { + list<RegisterOperand> addr_types_in=[]> + : NSAHelper<> { + list<RegisterOperand> addr_types = + !if(!empty(addr_types_in), !listsplat(VGPROp_32, num_addrs), + addr_types_in); + list<string> AddrAsmNames = !foreach(i, !range(num_addrs), "vaddr" # i); let AddrIns = !dag(ins, addr_types, AddrAsmNames); let AddrAsm = "[$" # !interleave(AddrAsmNames, ", $") # "]"; @@ -358,7 +362,7 @@ class MIMG_gfx11<int op, dag outs, string dns = ""> // Base class for all NSA MIMG instructions. // Note that 1-dword addresses always use non-NSA variants. class MIMG_nsa_gfx11<int op, dag outs, int num_addrs, string dns="", - list<RegisterClass> addr_types=[], + list<RegisterOperand> addr_types=[], RegisterOperand LastAddrRC = VGPROp_32> : MIMG<outs, dns>, MIMGe_gfx11<op> { let SubtargetPredicate = isGFX11Only; @@ -378,7 +382,7 @@ class MIMG_nsa_gfx11<int op, dag outs, int num_addrs, string dns="", } class VIMAGE_gfx12<int op, dag outs, int num_addrs, string dns="", - list<RegisterClass> addr_types=[]> + list<RegisterOperand> addr_types=[]> : VIMAGE<outs, dns>, VIMAGEe<op> { let SubtargetPredicate = isGFX12Plus; let AssemblerPredicate = isGFX12Plus; @@ -1521,12 +1525,12 @@ class MIMG_IntersectRay_Helper<bit Is64, bit IsA16, bit isDual, bit isBVH8> { int VAddrDwords = !srl(Size, 5); int GFX11PlusNSAAddrs = !if(IsA16, 4, 5); - RegisterClass node_ptr_type = !if(Is64, VReg_64, VGPR_32); - list<RegisterClass> GFX11PlusAddrTypes = - !cond(isBVH8 : [node_ptr_type, VReg_64, VReg_96, VReg_96, VGPR_32], - isDual : [node_ptr_type, VReg_64, VReg_96, VReg_96, VReg_64], - IsA16 : [node_ptr_type, VGPR_32, VReg_96, VReg_96], - true : [node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]); + RegisterOperand node_ptr_type = !if(Is64, VGPROp_64, VGPROp_32); + list<RegisterOperand> GFX11PlusAddrTypes = + !cond(isBVH8 : [node_ptr_type, VGPROp_64, VGPROp_96, VGPROp_96, VGPROp_32], + isDual : [node_ptr_type, VGPROp_64, VGPROp_96, VGPROp_96, VGPROp_64], + IsA16 : [node_ptr_type, VGPROp_32, VGPROp_96, VGPROp_96], + true : [node_ptr_type, VGPROp_32, VGPROp_96, VGPROp_96, VGPROp_96]); } class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterOperand AddrRC> @@ -1552,7 +1556,7 @@ class MIMG_IntersectRay_gfx11<mimgopc op, string opcode, RegisterOperand AddrRC> } class MIMG_IntersectRay_nsa_gfx11<mimgopc op, string opcode, int num_addrs, - list<RegisterClass> addr_types> + list<RegisterOperand> addr_types> : MIMG_nsa_gfx11<op.GFX11, (outs VReg_128:$vdata), num_addrs, "GFX11", addr_types> { let InOperandList = !con(nsah.AddrIns, (ins SReg_128_XNULL:$srsrc, A16:$a16)); @@ -1561,7 +1565,7 @@ class MIMG_IntersectRay_nsa_gfx11<mimgopc op, string opcode, int num_addrs, class VIMAGE_IntersectRay_gfx12<mimgopc op, string opcode, int num_addrs, bit isDual, bit isBVH8, - list<RegisterClass> addr_types> + list<RegisterOperand> addr_types> : VIMAGE_gfx12<op.GFX12, !if(!or(isDual, isBVH8), (outs VReg_320:$vdata, VReg_96:$ray_origin_out, VReg_96:$ray_dir_out), diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 5e27b37..6dcbced 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1019,7 +1019,7 @@ void WaitcntBrackets::updateByEvent(WaitEventType E, MachineInstr &Inst) { // SMEM and VMEM operations. So there will never be // outstanding address translations for both SMEM and // VMEM at the same time. - setScoreLB(T, CurrScore - 1); + setScoreLB(T, getScoreUB(T) - 1); PendingEvents &= ~(1 << OtherEvent); } for (const MachineOperand &Op : Inst.all_uses()) diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index eac9fd4..27e5ee9c 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3726,6 +3726,23 @@ def : GCNPat < } // End foreach Ty = ... } // End AddedComplexity = 1 +let True16Predicate = UseRealTrue16Insts in { +def : GCNPat< + (i32 (DivergentBinFrag<or> + (i32 (zext i16:$src_lo)), + (i32 (bitconvert (v2i16 (build_vector (i16 0), (i16 VGPR_16:$src_hi))))) + )), + (REG_SEQUENCE VGPR_32, $src_lo, lo16, $src_hi, hi16) +>; +def : GCNPat< + (i32 (DivergentBinFrag<or> + (i32 (bitconvert (v2i16 (build_vector (i16 0), (i16 VGPR_16:$src_hi))))), + (i32 (zext i16:$src_lo)) + )), + (REG_SEQUENCE VGPR_32, $src_lo, lo16, $src_hi, hi16) +>; +} + let True16Predicate = UseRealTrue16Insts in def : GCNPat < (v2i16 (DivergentBinFrag<build_vector> (i16 undef), (i16 (trunc i32:$src1)))), diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index be1c883..ebd2e7e 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -2356,7 +2356,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::mask)); - LLVM_FALLTHROUGH; + [[fallthrough]]; } case AMDGPU::SI_SPILL_V1024_SAVE: case AMDGPU::SI_SPILL_V512_SAVE: @@ -2446,7 +2446,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::mask)); - LLVM_FALLTHROUGH; + [[fallthrough]]; } case AMDGPU::SI_SPILL_V16_RESTORE: case AMDGPU::SI_SPILL_V32_RESTORE: diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index bef4868..7e7ee75 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -280,6 +280,10 @@ static unsigned getTcgen05LdOpcode(unsigned IID, bool enablePack) { } void NVPTXDAGToDAGISel::SelectTcgen05Ld(SDNode *N, bool hasOffset) { + if (!Subtarget->hasTcgen05InstSupport()) + report_fatal_error( + "tcgen05.ld is not supported on this architecture variant"); + SDLoc DL(N); unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); @@ -2136,6 +2140,10 @@ static unsigned getTcgen05StOpcode(unsigned IID, bool enableUnpack) { } void NVPTXDAGToDAGISel::SelectTcgen05St(SDNode *N, bool hasOffset) { + if (!Subtarget->hasTcgen05InstSupport()) + report_fatal_error( + "tcgen05.st is not supported on this architecture variant"); + SDLoc DL(N); unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index a1fb665..272c21f 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -233,7 +233,7 @@ getVectorLoweringShape(EVT VectorEVT, const NVPTXSubtarget &STI, // target supports 256-bit loads/stores if (!CanLowerTo256Bit) return std::nullopt; - LLVM_FALLTHROUGH; + [[fallthrough]]; case MVT::v2i8: case MVT::v2i64: case MVT::v2f64: @@ -248,7 +248,7 @@ getVectorLoweringShape(EVT VectorEVT, const NVPTXSubtarget &STI, // global and the target supports 256-bit loads/stores. if (!CanLowerTo256Bit) return std::nullopt; - LLVM_FALLTHROUGH; + [[fallthrough]]; case MVT::v2i16: // <1 x i16x2> case MVT::v2f16: // <1 x f16x2> case MVT::v2bf16: // <1 x bf16x2> @@ -270,7 +270,7 @@ getVectorLoweringShape(EVT VectorEVT, const NVPTXSubtarget &STI, // target supports 256-bit loads/stores if (!CanLowerTo256Bit) return std::nullopt; - LLVM_FALLTHROUGH; + [[fallthrough]]; case MVT::v2f32: // <1 x f32x2> case MVT::v4f32: // <2 x f32x2> case MVT::v2i32: // <1 x i32x2> @@ -6749,7 +6749,7 @@ NVPTXTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { case AtomicRMWInst::BinOp::Xchg: if (BitWidth == 128) return AtomicExpansionKind::None; - LLVM_FALLTHROUGH; + [[fallthrough]]; case AtomicRMWInst::BinOp::And: case AtomicRMWInst::BinOp::Or: case AtomicRMWInst::BinOp::Xor: diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 6c14cf0..dfde0cc 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -101,6 +101,22 @@ def PrmtMode : Operand<i32> { // NVPTX Instruction Predicate Definitions //===----------------------------------------------------------------------===// +// Checks PTX version and family-specific and architecture-specific SM versions. +// For example, sm_100{f/a} and any future variants in the same family will match +// for any PTX version greater than or equal to `PTXVersion`. +class PTXWithFamilySMs<int PTXVersion, list<int> SMVersions> : + Predicate<"Subtarget->hasPTXWithFamilySMs(" # PTXVersion # ", {" # + !interleave(SMVersions, ", ") # "})">; + +// Checks PTX version and architecture-specific SM versions. +// For example, sm_100{a} will match for any PTX version +// greater than or equal to `PTXVersion`. +class PTXWithAccelSMs<int PTXVersion, list<int> SMVersions> : + Predicate<"Subtarget->hasPTXWithAccelSMs(" # PTXVersion # ", {" # + !interleave(SMVersions, ", ") # "})">; + +// Helper predicate to call a subtarget method. +class callSubtarget<string SubtargetMethod> : Predicate<"Subtarget->" # SubtargetMethod # "()">; def hasAtomAddF64 : Predicate<"Subtarget->hasAtomAddF64()">; def hasAtomScope : Predicate<"Subtarget->hasAtomScope()">; diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index a8b854f..22cf3a7 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -5103,8 +5103,8 @@ let Predicates = [hasSM<90>, hasPTX<78>] in { def EXIT : NullaryInst<"exit", int_nvvm_exit>; // Tcgen05 intrinsics -let isConvergent = true, Predicates = [hasTcgen05Instructions] in { - +let isConvergent = true in { +let Predicates = [callSubtarget<"hasTcgen05InstSupport">] in { multiclass TCGEN05_ALLOC_INTR<string AS, string num, Intrinsic Intr> { def "" : BasicNVPTXInst<(outs), (ins ADDR:$dst, B32:$ncols), @@ -5156,15 +5156,6 @@ defm TCGEN05_COMMIT_CG2 : TCGEN05_COMMIT_INTR<"", "2">; defm TCGEN05_COMMIT_S64_CG1 : TCGEN05_COMMIT_INTR<"shared", "1">; defm TCGEN05_COMMIT_S64_CG2 : TCGEN05_COMMIT_INTR<"shared", "2">; -multiclass TCGEN05_SHIFT_INTR<string num, Intrinsic Intr> { - def "" : BasicNVPTXInst<(outs), - (ins ADDR:$tmem_addr), - "tcgen05.shift.cta_group::" # num # ".down", - [(Intr addr:$tmem_addr)]>; -} -defm TCGEN05_SHIFT_CG1: TCGEN05_SHIFT_INTR<"1", int_nvvm_tcgen05_shift_down_cg1>; -defm TCGEN05_SHIFT_CG2: TCGEN05_SHIFT_INTR<"2", int_nvvm_tcgen05_shift_down_cg2>; - multiclass TCGEN05_CP_INTR<string shape, string src_fmt, string mc = ""> { defvar dst_fmt = !if(!eq(src_fmt, ""), "", ".b8x16"); defvar fmt_asm = StrJoin<".", [dst_fmt, src_fmt]>.ret; @@ -5195,9 +5186,22 @@ foreach src_fmt = ["", "b6x16_p32", "b4x16_p64"] in { defm TCGEN05_CP_64x128_2 # src_fmt : TCGEN05_CP_INTR<"64x128b", src_fmt, "warpx2::01_23">; defm TCGEN05_CP_32x128 # src_fmt : TCGEN05_CP_INTR<"32x128b", src_fmt, "warpx4">; } +} // Predicates + +let Predicates = [callSubtarget<"hasTcgen05ShiftSupport">] in { +multiclass TCGEN05_SHIFT_INTR<string num, Intrinsic Intr> { + def "" : BasicNVPTXInst<(outs), + (ins ADDR:$tmem_addr), + "tcgen05.shift.cta_group::" # num # ".down", + [(Intr addr:$tmem_addr)]>; +} +defm TCGEN05_SHIFT_CG1: TCGEN05_SHIFT_INTR<"1", int_nvvm_tcgen05_shift_down_cg1>; +defm TCGEN05_SHIFT_CG2: TCGEN05_SHIFT_INTR<"2", int_nvvm_tcgen05_shift_down_cg2>; +} // Predicates + } // isConvergent -let hasSideEffects = 1, Predicates = [hasTcgen05Instructions] in { +let hasSideEffects = 1, Predicates = [callSubtarget<"hasTcgen05InstSupport">] in { def tcgen05_fence_before_thread_sync: NullaryInst< "tcgen05.fence::before_thread_sync", int_nvvm_tcgen05_fence_before_thread_sync>; @@ -5231,8 +5235,7 @@ class TCGEN05_LDST_REGINFO<int Veclen> { // class TCGEN05_LD_INST<string Shape, int Num, bit Pack> : - NVPTXInst<(outs), (ins), "?", []>, - Requires<[hasTcgen05Instructions]> { + NVPTXInst<(outs), (ins), "?", []> { TCGEN05_LDST_REGINFO Info = TCGEN05_LDST_REGINFO< NVVM_TCGEN05_LDST_ACCESS_SIZE<Shape, Num>.veclen>; @@ -5256,8 +5259,7 @@ class TCGEN05_LD_INST<string Shape, int Num, bit Pack> : // class TCGEN05_ST_INST<string Shape, int Num, bit Unpack> : - NVPTXInst<(outs), (ins), "?", []>, - Requires<[hasTcgen05Instructions]> { + NVPTXInst<(outs), (ins), "?", []> { TCGEN05_LDST_REGINFO Info = TCGEN05_LDST_REGINFO< NVVM_TCGEN05_LDST_ACCESS_SIZE<Shape, Num>.veclen>; diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp index c548967..989be50 100644 --- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -72,6 +72,40 @@ const SelectionDAGTargetInfo *NVPTXSubtarget::getSelectionDAGInfo() const { return TSInfo.get(); } +bool NVPTXSubtarget::hasPTXWithFamilySMs(unsigned PTXVersion, + ArrayRef<unsigned> SMVersions) const { + unsigned PTXVer = getPTXVersion(); + if (!hasFamilySpecificFeatures() || PTXVer < PTXVersion) + return false; + + unsigned SMVer = getSmVersion(); + return llvm::any_of(SMVersions, [&](unsigned SM) { + // sm_101 is a different family, never group it with sm_10x. + if (SMVer == 101 || SM == 101) + return SMVer == SM && + // PTX 9.0 and later renamed sm_101 to sm_110, so sm_101 is not + // supported. + !(PTXVer >= 90 && SMVer == 101); + + return getSmFamilyVersion() == SM / 10 && SMVer >= SM; + }); +} + +bool NVPTXSubtarget::hasPTXWithAccelSMs(unsigned PTXVersion, + ArrayRef<unsigned> SMVersions) const { + unsigned PTXVer = getPTXVersion(); + if (!hasArchAccelFeatures() || PTXVer < PTXVersion) + return false; + + unsigned SMVer = getSmVersion(); + return llvm::any_of(SMVersions, [&](unsigned SM) { + return SMVer == SM && + // PTX 9.0 and later renamed sm_101 to sm_110, so sm_101 is not + // supported. + !(PTXVer >= 90 && SMVer == 101); + }); +} + bool NVPTXSubtarget::allowFP16Math() const { return hasFP16Math() && NoF16Math == false; } diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h index e81c56b..194dbdc 100644 --- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h @@ -73,6 +73,18 @@ public: const SelectionDAGTargetInfo *getSelectionDAGInfo() const override; + // Checks PTX version and family-specific and architecture-specific SM + // versions. For example, sm_100{f/a} and any future variants in the same + // family will match for any PTX version greater than or equal to + // `PTXVersion`. + bool hasPTXWithFamilySMs(unsigned PTXVersion, + ArrayRef<unsigned> SMVersions) const; + // Checks PTX version and architecture-specific SM versions. + // For example, sm_100{a} will match for any PTX version greater than or equal + // to `PTXVersion`. + bool hasPTXWithAccelSMs(unsigned PTXVersion, + ArrayRef<unsigned> SMVersions) const; + bool has256BitVectorLoadStore(unsigned AS) const { return SmVersion >= 100 && PTXVersion >= 88 && AS == NVPTXAS::ADDRESS_SPACE_GLOBAL; @@ -127,6 +139,27 @@ public: return HasTcgen05 && PTXVersion >= MinPTXVersion; } + // Checks following instructions support: + // - tcgen05.ld/st + // - tcgen05.alloc/dealloc/relinquish + // - tcgen05.cp + // - tcgen05.fence/wait + // - tcgen05.commit + bool hasTcgen05InstSupport() const { + // sm_101 renamed to sm_110 in PTX 9.0 + return hasPTXWithFamilySMs(90, {100, 110}) || + hasPTXWithFamilySMs(88, {100, 101}) || + hasPTXWithAccelSMs(86, {100, 101}); + } + + // Checks tcgen05.shift instruction support. + bool hasTcgen05ShiftSupport() const { + // sm_101 renamed to sm_110 in PTX 9.0 + return hasPTXWithAccelSMs(90, {100, 110, 103}) || + hasPTXWithAccelSMs(88, {100, 101, 103}) || + hasPTXWithAccelSMs(86, {100, 101}); + } + bool hasTcgen05MMAScaleInputDImm() const { return FullSmVersion == 1003 && PTXVersion >= 86; } @@ -158,6 +191,7 @@ public: bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; } unsigned int getFullSmVersion() const { return FullSmVersion; } unsigned int getSmVersion() const { return getFullSmVersion() / 10; } + unsigned int getSmFamilyVersion() const { return getFullSmVersion() / 100; } // GPUs with "a" suffix have architecture-accelerated features that are // supported on the specified architecture only, hence such targets do not // follow the onion layer model. hasArchAccelFeatures() allows distinguishing diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 21dbb7c..8851a0f 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -1659,6 +1659,10 @@ bool RISCVAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return generateImmOutOfRangeError( Operands, ErrorInfo, -1, (1 << 5) - 1, "immediate must be non-zero in the range"); + case Match_InvalidXSfmmVType: { + SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc(); + return generateXSfmmVTypeError(ErrorLoc); + } case Match_InvalidVTypeI: { SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc(); return generateVTypeError(ErrorLoc); @@ -1688,7 +1692,7 @@ bool RISCVAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, (1 << 25) - 1); // HACK: See comment before `BareSymbolQC_E_LI` in RISCVInstrInfoXqci.td. case Match_InvalidBareSymbolQC_E_LI: - LLVM_FALLTHROUGH; + [[fallthrough]]; // END HACK case Match_InvalidBareSImm32: return generateImmOutOfRangeError(Operands, ErrorInfo, diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index 70b7c43..e75dfe3 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -142,6 +142,22 @@ enum { ReadsPastVLShift = DestEEWShift + 2, ReadsPastVLMask = 1ULL << ReadsPastVLShift, + + // 0 -> Don't care about altfmt bit in VTYPE. + // 1 -> Is not altfmt. + // 2 -> Is altfmt(BF16). + AltFmtTypeShift = ReadsPastVLShift + 1, + AltFmtTypeMask = 3ULL << AltFmtTypeShift, + + // XSfmmbase + HasTWidenOpShift = AltFmtTypeShift + 2, + HasTWidenOpMask = 1ULL << HasTWidenOpShift, + + HasTMOpShift = HasTWidenOpShift + 1, + HasTMOpMask = 1ULL << HasTMOpShift, + + HasTKOpShift = HasTMOpShift + 1, + HasTKOpMask = 1ULL << HasTKOpShift, }; // Helper functions to read TSFlags. @@ -183,6 +199,11 @@ static inline bool hasRoundModeOp(uint64_t TSFlags) { return TSFlags & HasRoundModeOpMask; } +enum class AltFmtType { DontCare, NotAltFmt, AltFmt }; +static inline AltFmtType getAltFmtType(uint64_t TSFlags) { + return static_cast<AltFmtType>((TSFlags & AltFmtTypeMask) >> AltFmtTypeShift); +} + /// \returns true if this instruction uses vxrm static inline bool usesVXRM(uint64_t TSFlags) { return TSFlags & UsesVXRMMask; } @@ -204,11 +225,47 @@ static inline bool readsPastVL(uint64_t TSFlags) { return TSFlags & ReadsPastVLMask; } +// XSfmmbase +static inline bool hasTWidenOp(uint64_t TSFlags) { + return TSFlags & HasTWidenOpMask; +} + +static inline bool hasTMOp(uint64_t TSFlags) { return TSFlags & HasTMOpMask; } + +static inline bool hasTKOp(uint64_t TSFlags) { return TSFlags & HasTKOpMask; } + +static inline unsigned getTNOpNum(const MCInstrDesc &Desc) { + const uint64_t TSFlags = Desc.TSFlags; + assert(hasTWidenOp(TSFlags) && hasVLOp(TSFlags)); + unsigned Offset = 3; + if (hasTKOp(TSFlags)) + Offset = 4; + return Desc.getNumOperands() - Offset; +} + +static inline unsigned getTMOpNum(const MCInstrDesc &Desc) { + const uint64_t TSFlags = Desc.TSFlags; + assert(hasTWidenOp(TSFlags) && hasTMOp(TSFlags)); + if (hasTKOp(TSFlags)) + return Desc.getNumOperands() - 5; + // vtzero.t + return Desc.getNumOperands() - 4; +} + +static inline unsigned getTKOpNum(const MCInstrDesc &Desc) { + [[maybe_unused]] const uint64_t TSFlags = Desc.TSFlags; + assert(hasTWidenOp(TSFlags) && hasTKOp(TSFlags)); + return Desc.getNumOperands() - 3; +} + static inline unsigned getVLOpNum(const MCInstrDesc &Desc) { const uint64_t TSFlags = Desc.TSFlags; // This method is only called if we expect to have a VL operand, and all // instructions with VL also have SEW. assert(hasSEWOp(TSFlags) && hasVLOp(TSFlags)); + // In Xsfmmbase, TN is an alias for VL, so here we use the same TSFlags bit. + if (hasTWidenOp(TSFlags)) + return getTNOpNum(Desc); unsigned Offset = 2; if (hasVecPolicyOp(TSFlags)) Offset = 3; @@ -226,7 +283,7 @@ static inline unsigned getSEWOpNum(const MCInstrDesc &Desc) { const uint64_t TSFlags = Desc.TSFlags; assert(hasSEWOp(TSFlags)); unsigned Offset = 1; - if (hasVecPolicyOp(TSFlags)) + if (hasVecPolicyOp(TSFlags) || hasTWidenOp(TSFlags)) Offset = 2; return Desc.getNumOperands() - Offset; } @@ -243,6 +300,9 @@ static inline int getFRMOpNum(const MCInstrDesc &Desc) { if (!hasRoundModeOp(TSFlags) || usesVXRM(TSFlags)) return -1; + if (hasTWidenOp(TSFlags) && hasTMOp(TSFlags)) + return getTMOpNum(Desc) - 1; + // The operand order // -------------------------------------- // | n-1 (if any) | n-2 | n-3 | n-4 | @@ -385,7 +445,9 @@ enum OperandType : unsigned { OPERAND_SEW_MASK, // Vector rounding mode for VXRM or FRM. OPERAND_VEC_RM, - OPERAND_LAST_RISCV_IMM = OPERAND_VEC_RM, + // Vtype operand for XSfmm extension. + OPERAND_XSFMM_VTYPE, + OPERAND_LAST_RISCV_IMM = OPERAND_XSFMM_VTYPE, // Operand is either a register or uimm5, this is used by V extension pseudo // instructions to represent a value that be passed as AVL to either vsetvli // or vsetivli. diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index cf8d120..9ed3b97 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -168,10 +168,13 @@ struct DemandedFields { // If this is true, we demand that VTYPE is set to some legal state, i.e. that // vill is unset. bool VILL = false; + bool UseTWiden = false; + bool UseAltFmt = false; // Return true if any part of VTYPE was used bool usedVTYPE() const { - return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy || VILL; + return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy || VILL || + UseTWiden || UseAltFmt; } // Return true if any property of VL was used @@ -187,6 +190,8 @@ struct DemandedFields { TailPolicy = true; MaskPolicy = true; VILL = true; + UseTWiden = true; + UseAltFmt = true; } // Mark all VL properties as demanded @@ -212,6 +217,8 @@ struct DemandedFields { TailPolicy |= B.TailPolicy; MaskPolicy |= B.MaskPolicy; VILL |= B.VILL; + UseAltFmt |= B.UseAltFmt; + UseTWiden |= B.UseTWiden; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -258,7 +265,9 @@ struct DemandedFields { OS << "SEWLMULRatio=" << SEWLMULRatio << ", "; OS << "TailPolicy=" << TailPolicy << ", "; OS << "MaskPolicy=" << MaskPolicy << ", "; - OS << "VILL=" << VILL; + OS << "VILL=" << VILL << ", "; + OS << "UseAltFmt=" << UseAltFmt << ", "; + OS << "UseTWiden=" << UseTWiden; OS << "}"; } #endif @@ -328,6 +337,15 @@ static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType, if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) != RISCVVType::isMaskAgnostic(NewVType)) return false; + if (Used.UseTWiden && (RISCVVType::hasXSfmmWiden(CurVType) != + RISCVVType::hasXSfmmWiden(NewVType) || + (RISCVVType::hasXSfmmWiden(CurVType) && + RISCVVType::getXSfmmWiden(CurVType) != + RISCVVType::getXSfmmWiden(NewVType)))) + return false; + if (Used.UseAltFmt && + RISCVVType::isAltFmt(CurVType) != RISCVVType::isAltFmt(NewVType)) + return false; return true; } @@ -479,6 +497,11 @@ DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) { Res.TailPolicy = false; } + Res.UseAltFmt = RISCVII::getAltFmtType(MI.getDesc().TSFlags) != + RISCVII::AltFmtType::DontCare; + Res.UseTWiden = RISCVII::hasTWidenOp(MI.getDesc().TSFlags) || + RISCVInstrInfo::isXSfmmVectorConfigInstr(MI); + return Res; } @@ -510,6 +533,8 @@ class VSETVLIInfo { uint8_t TailAgnostic : 1; uint8_t MaskAgnostic : 1; uint8_t SEWLMULRatioOnly : 1; + uint8_t AltFmt : 1; + uint8_t TWiden : 3; public: VSETVLIInfo() @@ -586,6 +611,8 @@ public: RISCVVType::VLMUL getVLMUL() const { return VLMul; } bool getTailAgnostic() const { return TailAgnostic; } bool getMaskAgnostic() const { return MaskAgnostic; } + bool getAltFmt() const { return AltFmt; } + unsigned getTWiden() const { return TWiden; } bool hasNonZeroAVL(const LiveIntervals *LIS) const { if (hasAVLImm()) @@ -647,21 +674,31 @@ public: SEW = RISCVVType::getSEW(VType); TailAgnostic = RISCVVType::isTailAgnostic(VType); MaskAgnostic = RISCVVType::isMaskAgnostic(VType); + AltFmt = RISCVVType::isAltFmt(VType); + TWiden = + RISCVVType::hasXSfmmWiden(VType) ? RISCVVType::getXSfmmWiden(VType) : 0; } - void setVTYPE(RISCVVType::VLMUL L, unsigned S, bool TA, bool MA) { + void setVTYPE(RISCVVType::VLMUL L, unsigned S, bool TA, bool MA, bool Altfmt, + unsigned W) { assert(isValid() && !isUnknown() && "Can't set VTYPE for uninitialized or unknown"); VLMul = L; SEW = S; TailAgnostic = TA; MaskAgnostic = MA; + AltFmt = Altfmt; + TWiden = W; } + void setAltFmt(bool AF) { AltFmt = AF; } + void setVLMul(RISCVVType::VLMUL VLMul) { this->VLMul = VLMul; } unsigned encodeVTYPE() const { assert(isValid() && !isUnknown() && !SEWLMULRatioOnly && "Can't encode VTYPE for uninitialized or unknown"); + if (TWiden != 0) + return RISCVVType::encodeXSfmmVType(SEW, TWiden, AltFmt); return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); } @@ -674,9 +711,9 @@ public: "Can't compare VTYPE in unknown state"); assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly && "Can't compare when only LMUL/SEW ratio is valid."); - return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) == + return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic, AltFmt, TWiden) == std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic, - Other.MaskAgnostic); + Other.MaskAgnostic, Other.AltFmt, Other.TWiden); } unsigned getSEWLMULRatio() const { @@ -825,7 +862,9 @@ public: << "SEW=e" << (unsigned)SEW << ", " << "TailAgnostic=" << (bool)TailAgnostic << ", " << "MaskAgnostic=" << (bool)MaskAgnostic << ", " - << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}"; + << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << ", " + << "TWiden=" << (unsigned)TWiden << ", " + << "AltFmt=" << (bool)AltFmt << "}"; } #endif }; @@ -853,6 +892,11 @@ struct BlockData { BlockData() = default; }; +enum TKTMMode { + VSETTK = 0, + VSETTM = 1, +}; + class RISCVInsertVSETVLI : public MachineFunctionPass { const RISCVSubtarget *ST; const TargetInstrInfo *TII; @@ -908,6 +952,7 @@ private: VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) const; VSETVLIInfo computeInfoForInstr(const MachineInstr &MI) const; void forwardVSETVLIAVL(VSETVLIInfo &Info) const; + bool insertVSETMTK(MachineBasicBlock &MBB, TKTMMode Mode) const; }; } // end anonymous namespace @@ -945,6 +990,18 @@ RISCVInsertVSETVLI::getInfoForVSETVLI(const MachineInstr &MI) const { VSETVLIInfo NewInfo; if (MI.getOpcode() == RISCV::PseudoVSETIVLI) { NewInfo.setAVLImm(MI.getOperand(1).getImm()); + } else if (RISCVInstrInfo::isXSfmmVectorConfigTNInstr(MI)) { + assert(MI.getOpcode() == RISCV::PseudoSF_VSETTNT || + MI.getOpcode() == RISCV::PseudoSF_VSETTNTX0); + switch (MI.getOpcode()) { + case RISCV::PseudoSF_VSETTNTX0: + NewInfo.setAVLVLMAX(); + break; + case RISCV::PseudoSF_VSETTNT: + Register ATNReg = MI.getOperand(1).getReg(); + NewInfo.setAVLRegDef(getVNInfoFromReg(ATNReg, MI, LIS), ATNReg); + break; + } } else { assert(MI.getOpcode() == RISCV::PseudoVSETVLI || MI.getOpcode() == RISCV::PseudoVSETVLIX0); @@ -1005,11 +1062,34 @@ RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const { RISCVVType::VLMUL VLMul = RISCVII::getLMul(TSFlags); + bool AltFmt = RISCVII::getAltFmtType(TSFlags) == RISCVII::AltFmtType::AltFmt; + InstrInfo.setAltFmt(AltFmt); + unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm(); // A Log2SEW of 0 is an operation on mask registers only. unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); + if (RISCVII::hasTWidenOp(TSFlags)) { + const MachineOperand &TWidenOp = + MI.getOperand(MI.getNumExplicitOperands() - 1); + unsigned TWiden = TWidenOp.getImm(); + + InstrInfo.setAVLVLMAX(); + if (RISCVII::hasVLOp(TSFlags)) { + const MachineOperand &TNOp = + MI.getOperand(RISCVII::getTNOpNum(MI.getDesc())); + + if (TNOp.getReg().isVirtual()) + InstrInfo.setAVLRegDef(getVNInfoFromReg(TNOp.getReg(), MI, LIS), + TNOp.getReg()); + } + + InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic, AltFmt, TWiden); + + return InstrInfo; + } + if (RISCVII::hasVLOp(TSFlags)) { const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); if (VLOp.isImm()) { @@ -1045,7 +1125,9 @@ RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const { assert(SEW == EEW && "Initial SEW doesn't match expected EEW"); } #endif - InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); + // TODO: Propagate the twiden from previous vtype for potential reuse. + InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic, AltFmt, + /*TWiden*/ 0); forwardVSETVLIAVL(InstrInfo); @@ -1053,10 +1135,33 @@ RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const { } void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, - MachineBasicBlock::iterator InsertPt, DebugLoc DL, - const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) { - + MachineBasicBlock::iterator InsertPt, + DebugLoc DL, const VSETVLIInfo &Info, + const VSETVLIInfo &PrevInfo) { ++NumInsertedVSETVL; + + if (Info.getTWiden()) { + if (Info.hasAVLVLMAX()) { + Register DestReg = MRI->createVirtualRegister(&RISCV::GPRNoX0RegClass); + auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoSF_VSETTNTX0)) + .addReg(DestReg, RegState::Define | RegState::Dead) + .addReg(RISCV::X0, RegState::Kill) + .addImm(Info.encodeVTYPE()); + if (LIS) { + LIS->InsertMachineInstrInMaps(*MI); + LIS->createAndComputeVirtRegInterval(DestReg); + } + } else { + auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoSF_VSETTNT)) + .addReg(RISCV::X0, RegState::Define | RegState::Dead) + .addReg(Info.getAVLReg()) + .addImm(Info.encodeVTYPE()); + if (LIS) + LIS->InsertMachineInstrInMaps(*MI); + } + return; + } + if (PrevInfo.isValid() && !PrevInfo.isUnknown()) { // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same // VLMAX. @@ -1198,7 +1303,8 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, // be coalesced into another vsetvli since we won't demand any fields. VSETVLIInfo NewInfo; // Need a new VSETVLIInfo to clear SEWLMULRatioOnly NewInfo.setAVLImm(1); - NewInfo.setVTYPE(RISCVVType::LMUL_1, /*sew*/ 8, /*ta*/ true, /*ma*/ true); + NewInfo.setVTYPE(RISCVVType::LMUL_1, /*sew*/ 8, /*ta*/ true, /*ma*/ true, + /*AltFmt*/ false, /*W*/ 0); Info = NewInfo; return; } @@ -1240,7 +1346,9 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() || IncomingInfo.getTailAgnostic(), (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() || - IncomingInfo.getMaskAgnostic()); + IncomingInfo.getMaskAgnostic(), + (Demanded.UseAltFmt ? IncomingInfo : Info).getAltFmt(), + Demanded.UseTWiden ? IncomingInfo.getTWiden() : 0); // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep // the AVL. @@ -1293,7 +1401,8 @@ bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB, if (RISCVInstrInfo::isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags) || - isVectorCopy(ST->getRegisterInfo(), MI)) + isVectorCopy(ST->getRegisterInfo(), MI) || + RISCVInstrInfo::isXSfmmVectorConfigInstr(MI)) HadVectorOp = true; transferAfter(Info, MI); @@ -1675,6 +1784,12 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const { }; for (MachineInstr &MI : make_early_inc_range(reverse(MBB))) { + // TODO: Support XSfmm. + if (RISCVII::hasTWidenOp(MI.getDesc().TSFlags) || + RISCVInstrInfo::isXSfmmVectorConfigInstr(MI)) { + NextMI = nullptr; + continue; + } if (!RISCVInstrInfo::isVectorConfigInstr(MI)) { Used.doUnion(getDemanded(MI, ST)); @@ -1788,6 +1903,65 @@ void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) { } } +bool RISCVInsertVSETVLI::insertVSETMTK(MachineBasicBlock &MBB, + TKTMMode Mode) const { + + bool Changed = false; + for (auto &MI : MBB) { + uint64_t TSFlags = MI.getDesc().TSFlags; + if (RISCVInstrInfo::isXSfmmVectorConfigTMTKInstr(MI) || + !RISCVII::hasSEWOp(TSFlags) || !RISCVII::hasTWidenOp(TSFlags)) + continue; + + VSETVLIInfo CurrInfo = computeInfoForInstr(MI); + + if (Mode == VSETTK && !RISCVII::hasTKOp(TSFlags)) + continue; + + if (Mode == VSETTM && !RISCVII::hasTMOp(TSFlags)) + continue; + + unsigned OpNum = 0; + unsigned Opcode = 0; + switch (Mode) { + case VSETTK: + OpNum = RISCVII::getTKOpNum(MI.getDesc()); + Opcode = RISCV::PseudoSF_VSETTK; + break; + case VSETTM: + OpNum = RISCVII::getTMOpNum(MI.getDesc()); + Opcode = RISCV::PseudoSF_VSETTM; + break; + } + + assert(OpNum && Opcode && "Invalid OpNum or Opcode"); + + MachineOperand &Op = MI.getOperand(OpNum); + + auto TmpMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(Opcode)) + .addReg(RISCV::X0, RegState::Define | RegState::Dead) + .addReg(Op.getReg()) + .addImm(Log2_32(CurrInfo.getSEW())) + .addImm(Log2_32(CurrInfo.getTWiden()) + 1); + + Changed = true; + Register Reg = Op.getReg(); + Op.setReg(Register()); + Op.setIsKill(false); + if (LIS) { + LIS->InsertMachineInstrInMaps(*TmpMI); + LiveInterval &LI = LIS->getInterval(Reg); + + // Erase the AVL operand from the instruction. + LIS->shrinkToUses(&LI); + // TODO: Enable this once needVSETVLIPHI is supported. + // SmallVector<LiveInterval *> SplitLIs; + // LIS->splitSeparateComponents(LI, SplitLIs); + } + } + return Changed; +} + bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { // Skip if the vector extension is not enabled. ST = &MF.getSubtarget<RISCVSubtarget>(); @@ -1865,6 +2039,11 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock &MBB : MF) insertReadVL(MBB); + for (MachineBasicBlock &MBB : MF) { + insertVSETMTK(MBB, VSETTM); + insertVSETMTK(MBB, VSETTK); + } + BlockInfo.clear(); return HaveVectorOp; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td index 2afd77a..5b06303 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td +++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td @@ -267,6 +267,22 @@ class RVInstCommon<dag outs, dag ins, string opcodestr, string argstr, // operands' VLs. bit ReadsPastVL = 0; let TSFlags{26} = ReadsPastVL; + + // 0 -> Don't care about altfmt bit in VTYPE. + // 1 -> Is not altfmt. + // 2 -> Is altfmt(BF16). + bits<2> AltFmtType = 0; + let TSFlags{28-27} = AltFmtType; + + // XSfmmbase + bit HasTWidenOp = 0; + let TSFlags{29} = HasTWidenOp; + + bit HasTmOp = 0; + let TSFlags{30} = HasTmOp; + + bit HasTkOp = 0; + let TSFlags{31} = HasTkOp; } class RVInst<dag outs, dag ins, string opcodestr, string argstr, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 96e1078..ddb53a2 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -3005,6 +3005,9 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI, else Ok = RISCVFPRndMode::isValidRoundingMode(Imm); break; + case RISCVOp::OPERAND_XSFMM_VTYPE: + Ok = RISCVVType::isValidXSfmmVType(Imm); + break; } if (!Ok) { ErrInfo = "Invalid immediate"; @@ -3670,6 +3673,11 @@ std::string RISCVInstrInfo::createMIROperandComment( RISCVVType::printVType(Imm, OS); break; } + case RISCVOp::OPERAND_XSFMM_VTYPE: { + unsigned Imm = Op.getImm(); + RISCVVType::printXSfmmVType(Imm, OS); + break; + } case RISCVOp::OPERAND_SEW: case RISCVOp::OPERAND_SEW_MASK: { unsigned Log2SEW = Op.getImm(); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 298d35a..c1b23af 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -128,6 +128,9 @@ defvar TAIL_AGNOSTIC = 1; defvar TU_MU = 0; defvar TA_MU = 1; defvar TA_MA = 3; +defvar DONT_CARE_ALTFMT = 0; +defvar IS_NOT_ALTFMT = 1; +defvar IS_ALTFMT = 2; //===----------------------------------------------------------------------===// // Utilities. @@ -159,7 +162,8 @@ class PseudoToVInst<string PseudoInst> { ["_M4", ""], ["_M8", ""], ["_SE", ""], - ["_RM", ""] + ["_RM", ""], + ["_ALT", ""] ]; string VInst = !foldl(PseudoInst, AffixSubsts, Acc, AffixSubst, !subst(AffixSubst[0], AffixSubst[1], Acc)); @@ -6396,7 +6400,7 @@ let Defs = [VXSAT] in { // 13. Vector Floating-Point Instructions //===----------------------------------------------------------------------===// -let Predicates = [HasVInstructionsAnyF] in { +let Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT in { //===----------------------------------------------------------------------===// // 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions //===----------------------------------------------------------------------===// @@ -6565,7 +6569,7 @@ defm PseudoVFNCVT_F_F : VPseudoVNCVTD_W_RM; defm PseudoVFNCVT_ROD_F_F : VPseudoVNCVTD_W; } // mayRaiseFPException = true -} // Predicates = [HasVInstructionsAnyF] +} // Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT //===----------------------------------------------------------------------===// // 14. Vector Reduction Operations @@ -6593,7 +6597,7 @@ defm PseudoVWREDSUM : VPseudoVWRED_VS; } } // Predicates = [HasVInstructions] -let Predicates = [HasVInstructionsAnyF] in { +let Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT in { //===----------------------------------------------------------------------===// // 14.3. Vector Single-Width Floating-Point Reduction Instructions //===----------------------------------------------------------------------===// @@ -6612,7 +6616,7 @@ defm PseudoVFWREDUSUM : VPseudoVFWRED_VS_RM; defm PseudoVFWREDOSUM : VPseudoVFWREDO_VS_RM; } -} // Predicates = [HasVInstructionsAnyF] +} // Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT //===----------------------------------------------------------------------===// // 15. Vector Mask Instructions @@ -6703,7 +6707,7 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { // 16.2. Floating-Point Scalar Move Instructions //===----------------------------------------------------------------------===// -let Predicates = [HasVInstructionsAnyF] in { +let Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT in { let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { foreach f = FPList in { let HasSEWOp = 1, BaseInstr = VFMV_F_S in @@ -6718,7 +6722,7 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { Sched<[WriteVMovSF, ReadVMovSF_V, ReadVMovSF_F]>; } } -} // Predicates = [HasVInstructionsAnyF] +} // Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT //===----------------------------------------------------------------------===// // 16.3. Vector Slide Instructions @@ -6730,10 +6734,10 @@ let Predicates = [HasVInstructions] in { defm PseudoVSLIDE1DOWN : VPseudoVSLD1_VX; } // Predicates = [HasVInstructions] -let Predicates = [HasVInstructionsAnyF] in { +let Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT in { defm PseudoVFSLIDE1UP : VPseudoVSLD1_VF<"@earlyclobber $rd">; defm PseudoVFSLIDE1DOWN : VPseudoVSLD1_VF; -} // Predicates = [HasVInstructionsAnyF] +} // Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT //===----------------------------------------------------------------------===// // 16.4. Vector Register Gather Instructions diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td index 557d873..6a4119a 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td @@ -438,8 +438,10 @@ let Predicates = [HasVendorXSfvcp] in { } foreach f = FPList in { foreach m = f.MxList in { - defm f.FX # "V" : VPseudoVC_XV<m, f.fprclass, payload1>; - defm f.FX # "VV" : VPseudoVC_XVV<m, f.fprclass, payload1>; + let AltFmtType = IS_NOT_ALTFMT in { + defm f.FX # "V" : VPseudoVC_XV<m, f.fprclass, payload1>; + defm f.FX # "VV" : VPseudoVC_XVV<m, f.fprclass, payload1>; + } } } foreach m = MxListW in { @@ -449,7 +451,8 @@ let Predicates = [HasVendorXSfvcp] in { } foreach f = FPListW in { foreach m = f.MxList in - defm f.FX # "VW" : VPseudoVC_XVW<m, f.fprclass, payload1>; + let AltFmtType = IS_NOT_ALTFMT in + defm f.FX # "VW" : VPseudoVC_XVW<m, f.fprclass, payload1>; } } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td index a5ee701..5ad22e6b 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td @@ -225,7 +225,7 @@ let Predicates = [HasVendorXSfmmbase] in { def SF_VSETTM : SFInstSetSingle<(outs GPR:$rd), (ins GPR:$rs1), 0b00001, "sf.vsettm", "$rd, $rs1">; def SF_VSETTK : SFInstSetSingle<(outs GPR:$rd), (ins GPR:$rs1), 0b00010, - "sf.vsettk", "$rd, $rs1">; + "sf.vsettk", "$rd, $rs1">; def SF_VTDISCARD : SFInstVtDiscard<"sf.vtdiscard">; def SF_VTMV_V_T : SFInstTileMoveOp<0b010000, (outs VR:$vd), (ins GPR:$rs1), @@ -277,3 +277,144 @@ let Uses = [FRM], mayRaiseFPException = true in { } // Predicates = [HasVendorXSfmm32a8f] } // DecoderNamespace = "XSfvector" + +class VPseudoSF_VTileLoad + : RISCVVPseudo<(outs), (ins GPR:$rs2, GPR:$rs1, AVL:$atn, ixlenimm:$sew, + ixlenimm:$twiden)> { + let mayLoad = 1; + let mayStore = 0; + let HasVLOp = 1; // Tn + let HasSEWOp = 1; + let HasTWidenOp = 1; + let hasSideEffects = 1; +} + +class VPseudoSF_VTileStore + : RISCVVPseudo<(outs), (ins GPR:$rs2, GPR:$rs1, AVL:$atn, ixlenimm:$sew, + ixlenimm:$twiden)> { + let mayLoad = 0; + let mayStore = 1; + let HasVLOp = 1; // Tn + let HasSEWOp = 1; + let HasTWidenOp = 1; + let hasSideEffects = 1; +} + +class VPseudoSF_VTileMove_V_T + : RISCVVPseudo<(outs VRM8:$vd), (ins GPR:$rs1, AVL:$atn, ixlenimm:$sew, + ixlenimm:$twiden)> { + let mayLoad = 0; + let mayStore = 0; + let HasVLOp = 1; // Tn + let HasSEWOp = 1; + let HasTWidenOp = 1; + let hasSideEffects = 1; +} + +class VPseudoSF_VTileMove_T_V + : RISCVVPseudo<(outs), (ins GPR:$rs1, VRM8:$vs2, AVL:$atn, ixlenimm:$sew, + ixlenimm:$twiden)> { + let mayLoad = 0; + let mayStore = 0; + let HasVLOp = 1; // Tn + let HasSEWOp = 1; + let HasTWidenOp = 1; + let hasSideEffects = 1; +} + +class VPseudoSF_MatMul<RegisterClass mtd_class> + : RISCVVPseudo<(outs), + (ins mtd_class:$rd, VRM8:$vs2, VRM8:$vs1, AVL:$atm, AVL:$atn, + AVL:$atk, ixlenimm:$sew, ixlenimm:$twiden)> { + let mayLoad = 0; + let mayStore = 0; + let HasTmOp = 1; + let HasVLOp = 1; // Tn + let HasTkOp = 1; + let HasSEWOp = 1; + let HasTWidenOp = 1; + let hasSideEffects = 1; +} + +class VPseudoSF_MatMul_FRM<RegisterClass mtd_class> + : RISCVVPseudo<(outs), + (ins mtd_class:$rd, VRM8:$vs2, VRM8:$vs1, ixlenimm:$frm, + AVL:$atm, AVL:$atn, AVL:$atk, ixlenimm:$sew, + ixlenimm:$twiden), []> { + let mayLoad = 0; + let mayStore = 0; + let HasTmOp = 1; + let HasVLOp = 1; // Tn + let HasTkOp = 1; + let HasSEWOp = 1; + let HasRoundModeOp = 1; + let hasPostISelHook = 1; + let HasTWidenOp = 1; + let hasSideEffects = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { +let Defs = [VL, VTYPE] in { + def PseudoSF_VSETTNT + : Pseudo<(outs GPR:$rd), + (ins GPRNoX0:$rs1, XSfmmVTypeOp:$vtypei), []>, + PseudoInstExpansion<(VSETVLI GPR:$rd, GPR:$rs1, VTypeIOp11:$vtypei)>, + Sched<[WriteVSETVLI, ReadVSETVLI]>; + def PseudoSF_VSETTNTX0 + : Pseudo<(outs GPRNoX0:$rd), + (ins GPRX0:$rs1, XSfmmVTypeOp:$vtypei), []>, + PseudoInstExpansion<(VSETVLI GPR:$rd, GPR:$rs1, VTypeIOp11:$vtypei)>, + Sched<[WriteVSETVLI, ReadVSETVLI]>; + def PseudoSF_VSETTNTX0X0 + : Pseudo<(outs GPRX0:$rd), + (ins GPRX0:$rs1, XSfmmVTypeOp:$vtypei), []>, + PseudoInstExpansion<(VSETVLI GPR:$rd, GPR:$rs1, VTypeIOp11:$vtypei)>, + Sched<[WriteVSETVLI, ReadVSETVLI]>; +} + +let Defs = [VTYPE], Uses = [VTYPE], HasTWidenOp = 1, HasSEWOp = 1 in { + def PseudoSF_VSETTM + : Pseudo<(outs GPR:$rd), + (ins GPR:$rs1, ixlenimm:$log2sew, ixlenimm:$twiden), []>, + PseudoInstExpansion<(SF_VSETTM GPR:$rd, GPR:$rs1)>, + Sched<[WriteVSETVLI, ReadVSETVLI]>; + def PseudoSF_VSETTK + : Pseudo<(outs GPR:$rd), + (ins GPR:$rs1, ixlenimm:$logwsew, ixlenimm:$twiden), []>, + PseudoInstExpansion<(SF_VSETTK GPR:$rd, GPR:$rs1)>, + Sched<[WriteVSETVLI, ReadVSETVLI]>; +} +} + +foreach eew = [8, 16, 32, 64] in { + def PseudoSF_VLTE # eew : VPseudoSF_VTileLoad; + def PseudoSF_VSTE # eew : VPseudoSF_VTileStore; +} + +def PseudoSF_VTMV_T_V : VPseudoSF_VTileMove_T_V; +def PseudoSF_VTMV_V_T : VPseudoSF_VTileMove_V_T; + +foreach a = I8Encodes in + foreach b = I8Encodes in + def PseudoSF_MM_ # !toupper(a.Name) # _ # !toupper(b.Name) + : VPseudoSF_MatMul<TRM4>; + +let AltFmtType = IS_NOT_ALTFMT in + def PseudoSF_MM_F_F : VPseudoSF_MatMul_FRM<TRM2>; +let AltFmtType = IS_ALTFMT in + def PseudoSF_MM_F_F_ALT : VPseudoSF_MatMul_FRM<TRM2>; + +foreach e1 = [5, 4] in + foreach e2 = [5, 4] in + def PseudoSF_MM_E # e1 # M # !sub(7, e1) # _E # e2 # M # !sub(7, e2) + : VPseudoSF_MatMul_FRM<TRM4>; + +let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in { + let HasVLOp = 1, HasTmOp = 1, HasTWidenOp = 1, HasSEWOp = 1 in + def PseudoSF_VTZERO_T + : RISCVVPseudo<(outs), + (ins TR:$rd, AVL:$atm, AVL:$atn, ixlenimm:$sew, + ixlenimm:$twiden)>; + def PseudoSF_VTDISCARD : RISCVVPseudo<(outs), (ins), []>; +} diff --git a/llvm/lib/Target/RISCV/RISCVInstrPredicates.td b/llvm/lib/Target/RISCV/RISCVInstrPredicates.td index 3658817..dcae977 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrPredicates.td +++ b/llvm/lib/Target/RISCV/RISCVInstrPredicates.td @@ -78,7 +78,41 @@ def isVectorConfigInstr PseudoVSETVLI, PseudoVSETVLIX0, PseudoVSETVLIX0X0, - PseudoVSETIVLI + PseudoVSETIVLI, + PseudoSF_VSETTNT, + PseudoSF_VSETTNTX0, + PseudoSF_VSETTNTX0X0 + ]>>>; + +// Returns true if this is a PseudoSF_VSETTNT* instructions. +def isXSfmmVectorConfigTNInstr + : TIIPredicate<"isXSfmmVectorConfigTNInstr", + MCReturnStatement< + CheckOpcode<[ + PseudoSF_VSETTNT, + PseudoSF_VSETTNTX0, + PseudoSF_VSETTNTX0X0 + ]>>>; + +// Returns true if this is PseudoSF_VSETTM or PseudoSF_VSETTK. +def isXSfmmVectorConfigTMTKInstr + : TIIPredicate<"isXSfmmVectorConfigTMTKInstr", + MCReturnStatement< + CheckOpcode<[ + PseudoSF_VSETTM, + PseudoSF_VSETTK + ]>>>; + +// Returns true if this is a XSfmm vector configuration instruction. +def isXSfmmVectorConfigInstr + : TIIPredicate<"isXSfmmVectorConfigInstr", + MCReturnStatement< + CheckOpcode<[ + PseudoSF_VSETTNT, + PseudoSF_VSETTNTX0, + PseudoSF_VSETTNTX0X0, + PseudoSF_VSETTM, + PseudoSF_VSETTK ]>>>; // Return true if this is 'vsetvli x0, x0, vtype' which preserves diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index 40b6416..e9f43b9 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -178,6 +178,10 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const { // Shadow stack pointer. markSuperRegs(Reserved, RISCV::SSP); + // XSfmmbase + for (MCPhysReg Reg = RISCV::T0; Reg <= RISCV::T15; Reg++) + markSuperRegs(Reserved, Reg); + assert(checkAllSuperRegsMarked(Reserved)); return Reserved; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 6472334..47c24fc 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -317,6 +317,15 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Custom); } + if (Subtarget->hasFP16()) { + setOperationAction(ISD::FMA, MVT::v8f16, Legal); + } + + if (Subtarget->hasRelaxedSIMD()) { + setOperationAction(ISD::FMULADD, MVT::v4f32, Legal); + setOperationAction(ISD::FMULADD, MVT::v2f64, Legal); + } + // Partial MLA reductions. for (auto Op : {ISD::PARTIAL_REDUCE_SMLA, ISD::PARTIAL_REDUCE_UMLA}) { setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v16i8, Legal); @@ -1120,6 +1129,18 @@ WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const { return TargetLoweringBase::getPreferredVectorAction(VT); } +bool WebAssemblyTargetLowering::isFMAFasterThanFMulAndFAdd( + const MachineFunction &MF, EVT VT) const { + if (!Subtarget->hasFP16() || !VT.isVector()) + return false; + + EVT ScalarVT = VT.getScalarType(); + if (!ScalarVT.isSimple()) + return false; + + return ScalarVT.getSimpleVT().SimpleTy == MVT::f16; +} + bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts( SDValue Op, const TargetLoweringOpt &TLO) const { // ISel process runs DAGCombiner after legalization; this step is called diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h index b33a853..472ec67 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -81,6 +81,8 @@ private: TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override; + bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const override; SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const override; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 49af78b..0f6e1ca 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -1213,6 +1213,27 @@ defm EXTMUL_LOW_U : defm EXTMUL_HIGH_U : SIMDExtBinary<I64x2, extmul_high_u, "extmul_high_i32x4_u", 0xdf>; +// Pattern for i32x4.dot_i16x8_s +def : Pat< + (v4i32 (add + (wasm_shuffle + (v4i32 (extmul_low_s v8i16:$lhs, v8i16:$rhs)), + (v4i32 (extmul_high_s v8i16:$lhs, v8i16:$rhs)), + (i32 0), (i32 1), (i32 2), (i32 3), + (i32 8), (i32 9), (i32 10), (i32 11), + (i32 16), (i32 17), (i32 18), (i32 19), + (i32 24), (i32 25), (i32 26), (i32 27)), + (wasm_shuffle + (v4i32 (extmul_low_s v8i16:$lhs, v8i16:$rhs)), + (v4i32 (extmul_high_s v8i16:$lhs, v8i16:$rhs)), + (i32 4), (i32 5), (i32 6), (i32 7), + (i32 12), (i32 13), (i32 14), (i32 15), + (i32 20), (i32 21), (i32 22), (i32 23), + (i32 28), (i32 29), (i32 30), (i32 31))) + ), + (v4i32 (DOT v8i16:$lhs, v8i16:$rhs)) +>; + //===----------------------------------------------------------------------===// // Floating-point unary arithmetic //===----------------------------------------------------------------------===// @@ -1626,7 +1647,8 @@ defm "" : RelaxedConvert<I32x4, F64x2, int_wasm_relaxed_trunc_unsigned_zero, // Relaxed (Negative) Multiply-Add (madd/nmadd) //===----------------------------------------------------------------------===// -multiclass SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, list<Predicate> reqs> { +multiclass RELAXED_SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, + list<Predicate> reqs> { defm MADD_#vec : SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins), [(set (vec.vt V128:$dst), (int_wasm_relaxed_madd @@ -1640,16 +1662,46 @@ multiclass SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, list<Predicate> vec.prefix#".relaxed_nmadd\t$dst, $a, $b, $c", vec.prefix#".relaxed_nmadd", simdopS, reqs>; - def : Pat<(fadd_contract (vec.vt V128:$a), (fmul_contract (vec.vt V128:$b), (vec.vt V128:$c))), - (!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<[HasRelaxedSIMD]>; + def : Pat<(fadd_contract (fmul_contract (vec.vt V128:$a), (vec.vt V128:$b)), (vec.vt V128:$c)), + (!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>; + def : Pat<(fmuladd (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)), + (!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>; - def : Pat<(fsub_contract (vec.vt V128:$a), (fmul_contract (vec.vt V128:$b), (vec.vt V128:$c))), - (!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<[HasRelaxedSIMD]>; + def : Pat<(fsub_contract (vec.vt V128:$c), (fmul_contract (vec.vt V128:$a), (vec.vt V128:$b))), + (!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>; + def : Pat<(fmuladd (fneg (vec.vt V128:$a)), (vec.vt V128:$b), (vec.vt V128:$c)), + (!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>; } -defm "" : SIMDMADD<F32x4, 0x105, 0x106, [HasRelaxedSIMD]>; -defm "" : SIMDMADD<F64x2, 0x107, 0x108, [HasRelaxedSIMD]>; -defm "" : SIMDMADD<F16x8, 0x14e, 0x14f, [HasFP16]>; +defm "" : RELAXED_SIMDMADD<F32x4, 0x105, 0x106, [HasRelaxedSIMD]>; +defm "" : RELAXED_SIMDMADD<F64x2, 0x107, 0x108, [HasRelaxedSIMD]>; + +//===----------------------------------------------------------------------===// +// FP16 (Negative) Multiply-Add (madd/nmadd) +//===----------------------------------------------------------------------===// + +multiclass HALF_PRECISION_SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, + list<Predicate> reqs> { + defm MADD_#vec : + SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins), + [(set (vec.vt V128:$dst), (fma + (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))], + vec.prefix#".madd\t$dst, $a, $b, $c", + vec.prefix#".madd", simdopA, reqs>; + defm NMADD_#vec : + SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins), + [(set (vec.vt V128:$dst), (fma + (fneg (vec.vt V128:$a)), (vec.vt V128:$b), (vec.vt V128:$c)))], + vec.prefix#".nmadd\t$dst, $a, $b, $c", + vec.prefix#".nmadd", simdopS, reqs>; +} +defm "" : HALF_PRECISION_SIMDMADD<F16x8, 0x14e, 0x14f, [HasFP16]>; + +// TODO: I think separate intrinsics should be introduced for these FP16 operations. +def : Pat<(v8f16 (int_wasm_relaxed_madd (v8f16 V128:$a), (v8f16 V128:$b), (v8f16 V128:$c))), + (MADD_F16x8 V128:$a, V128:$b, V128:$c)>; +def : Pat<(v8f16 (int_wasm_relaxed_nmadd (v8f16 V128:$a), (v8f16 V128:$b), (v8f16 V128:$c))), + (NMADD_F16x8 V128:$a, V128:$b, V128:$c)>; //===----------------------------------------------------------------------===// // Laneselect diff --git a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp index 080a9c0..4e73070 100644 --- a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp +++ b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp @@ -84,11 +84,11 @@ bool Xtensa::checkRegister(MCRegister RegNo, const FeatureBitset &FeatureBits, case Xtensa::CCOMPARE0: if (FeatureBits[Xtensa::FeatureTimers1]) return true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case Xtensa::CCOMPARE1: if (FeatureBits[Xtensa::FeatureTimers2]) return true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case Xtensa::CCOMPARE2: if (FeatureBits[Xtensa::FeatureTimers3]) return true; @@ -107,37 +107,37 @@ bool Xtensa::checkRegister(MCRegister RegNo, const FeatureBitset &FeatureBits, case Xtensa::EXCSAVE1: case Xtensa::EXCVADDR: return FeatureBits[Xtensa::FeatureException]; - LLVM_FALLTHROUGH; + [[fallthrough]]; case Xtensa::EPC2: case Xtensa::EPS2: case Xtensa::EXCSAVE2: if (FeatureBits[Xtensa::FeatureHighPriInterrupts]) return true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case Xtensa::EPC3: case Xtensa::EPS3: case Xtensa::EXCSAVE3: if (FeatureBits[Xtensa::FeatureHighPriInterruptsLevel3]) return true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case Xtensa::EPC4: case Xtensa::EPS4: case Xtensa::EXCSAVE4: if (FeatureBits[Xtensa::FeatureHighPriInterruptsLevel4]) return true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case Xtensa::EPC5: case Xtensa::EPS5: case Xtensa::EXCSAVE5: if (FeatureBits[Xtensa::FeatureHighPriInterruptsLevel5]) return true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case Xtensa::EPC6: case Xtensa::EPS6: case Xtensa::EXCSAVE6: if (FeatureBits[Xtensa::FeatureHighPriInterruptsLevel6]) return true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case Xtensa::EPC7: case Xtensa::EPS7: case Xtensa::EXCSAVE7: diff --git a/llvm/lib/TargetParser/RISCVTargetParser.cpp b/llvm/lib/TargetParser/RISCVTargetParser.cpp index acf8e4c..5ea63a9 100644 --- a/llvm/lib/TargetParser/RISCVTargetParser.cpp +++ b/llvm/lib/TargetParser/RISCVTargetParser.cpp @@ -228,6 +228,10 @@ void printVType(unsigned VType, raw_ostream &OS) { OS << ", mu"; } +void printXSfmmVType(unsigned VType, raw_ostream &OS) { + OS << "e" << getSEW(VType) << ", w" << getXSfmmWiden(VType); +} + unsigned getSEWLMULRatio(unsigned SEW, VLMUL VLMul) { unsigned LMul; bool Fractional; diff --git a/llvm/lib/TargetParser/TargetDataLayout.cpp b/llvm/lib/TargetParser/TargetDataLayout.cpp index cea246e..950bb2b 100644 --- a/llvm/lib/TargetParser/TargetDataLayout.cpp +++ b/llvm/lib/TargetParser/TargetDataLayout.cpp @@ -258,7 +258,7 @@ static std::string computePowerDataLayout(const Triple &T) { static std::string computeAMDDataLayout(const Triple &TT) { if (TT.getArch() == Triple::r600) { // 32-bit pointers. - return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" + return "e-m:e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"; } @@ -268,7 +268,7 @@ static std::string computeAMDDataLayout(const Triple &TT) { // (address space 7), and 128-bit non-integral buffer resourcees (address // space 8) which cannot be non-trivilally accessed by LLVM memory operations // like getelementptr. - return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" + return "e-m:e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" "-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-" "v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-" "v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"; diff --git a/llvm/lib/Transforms/Coroutines/CoroCloner.h b/llvm/lib/Transforms/Coroutines/CoroCloner.h index 26ec4f3..e05fe28 100644 --- a/llvm/lib/Transforms/Coroutines/CoroCloner.h +++ b/llvm/lib/Transforms/Coroutines/CoroCloner.h @@ -1,3 +1,4 @@ +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -19,9 +20,7 @@ #include "llvm/Transforms/Coroutines/CoroInstr.h" #include "llvm/Transforms/Utils/ValueMapper.h" -namespace llvm { - -namespace coro { +namespace llvm::coro { enum class CloneKind { /// The shared resume function for a switch lowering. @@ -149,8 +148,6 @@ public: } }; -} // end namespace coro - -} // end namespace llvm +} // end namespace llvm::coro #endif // LLVM_LIB_TRANSFORMS_COROUTINES_COROCLONER_H diff --git a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp index 471b9eb..cdb5852 100644 --- a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp @@ -38,7 +38,7 @@ public: AnyResumeFnPtrTy(PointerType::getUnqual(Context)) {} void lowerEarlyIntrinsics(Function &F); }; -} +} // namespace // Replace a direct call to coro.resume or coro.destroy with an indirect call to // an address returned by coro.subfn.addr intrinsic. This is done so that diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h index 52f4ffe..cc47a55 100644 --- a/llvm/lib/Transforms/Coroutines/CoroInternal.h +++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h @@ -16,11 +16,7 @@ #include "llvm/Transforms/Coroutines/CoroInstr.h" #include "llvm/Transforms/Coroutines/CoroShape.h" -namespace llvm { - -class CallGraph; - -namespace coro { +namespace llvm::coro { bool isSuspendBlock(BasicBlock *BB); bool declaresAnyIntrinsic(const Module &M); @@ -61,7 +57,6 @@ void normalizeCoroutine(Function &F, coro::Shape &Shape, CallInst *createMustTailCall(DebugLoc Loc, Function *MustTailCallFn, TargetTransformInfo &TTI, ArrayRef<Value *> Arguments, IRBuilder<> &); -} // End namespace coro. -} // End namespace llvm +} // End namespace llvm::coro #endif diff --git a/llvm/lib/Transforms/Coroutines/MaterializationUtils.cpp b/llvm/lib/Transforms/Coroutines/MaterializationUtils.cpp index 6aaabca..f2444da 100644 --- a/llvm/lib/Transforms/Coroutines/MaterializationUtils.cpp +++ b/llvm/lib/Transforms/Coroutines/MaterializationUtils.cpp @@ -137,8 +137,7 @@ struct RematGraph { } // namespace -namespace llvm { -template <> struct GraphTraits<RematGraph *> { +template <> struct llvm::GraphTraits<RematGraph *> { using NodeRef = RematGraph::RematNode *; using ChildIteratorType = RematGraph::RematNode **; @@ -149,8 +148,6 @@ template <> struct GraphTraits<RematGraph *> { static ChildIteratorType child_end(NodeRef N) { return N->Operands.end(); } }; -} // end namespace llvm - // For each instruction identified as materializable across the suspend point, // and its associated DAG of other rematerializable instructions, // recreate the DAG of instructions after the suspend point. diff --git a/llvm/lib/Transforms/Coroutines/SpillUtils.cpp b/llvm/lib/Transforms/Coroutines/SpillUtils.cpp index e474c07..81fe0c9 100644 --- a/llvm/lib/Transforms/Coroutines/SpillUtils.cpp +++ b/llvm/lib/Transforms/Coroutines/SpillUtils.cpp @@ -16,11 +16,8 @@ #include "llvm/IR/InstIterator.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -namespace llvm { - -namespace coro { - -namespace { +using namespace llvm; +using namespace llvm::coro; typedef SmallPtrSet<BasicBlock *, 8> VisitedBlocksSet; @@ -71,7 +68,7 @@ static bool isLocalAlloca(CoroAllocaAllocInst *AI) { /// This happens during the all-instructions iteration, so it must not /// delete the call. static Instruction * -lowerNonLocalAlloca(CoroAllocaAllocInst *AI, const coro::Shape &Shape, +lowerNonLocalAlloca(CoroAllocaAllocInst *AI, const Shape &Shape, SmallVectorImpl<Instruction *> &DeadInsts) { IRBuilder<> Builder(AI); auto Alloc = Shape.emitAlloc(Builder, AI->getSize(), nullptr); @@ -450,10 +447,8 @@ static void collectFrameAlloca(AllocaInst *AI, const coro::Shape &Shape, Visitor.getMayWriteBeforeCoroBegin()); } -} // namespace - -void collectSpillsFromArgs(SpillInfo &Spills, Function &F, - const SuspendCrossingInfo &Checker) { +void coro::collectSpillsFromArgs(SpillInfo &Spills, Function &F, + const SuspendCrossingInfo &Checker) { // Collect the spills for arguments and other not-materializable values. for (Argument &A : F.args()) for (User *U : A.users()) @@ -461,7 +456,7 @@ void collectSpillsFromArgs(SpillInfo &Spills, Function &F, Spills[&A].push_back(cast<Instruction>(U)); } -void collectSpillsAndAllocasFromInsts( +void coro::collectSpillsAndAllocasFromInsts( SpillInfo &Spills, SmallVector<AllocaInfo, 8> &Allocas, SmallVector<Instruction *, 4> &DeadInstructions, SmallVector<CoroAllocaAllocInst *, 4> &LocalAllocas, Function &F, @@ -516,8 +511,8 @@ void collectSpillsAndAllocasFromInsts( } } -void collectSpillsFromDbgInfo(SpillInfo &Spills, Function &F, - const SuspendCrossingInfo &Checker) { +void coro::collectSpillsFromDbgInfo(SpillInfo &Spills, Function &F, + const SuspendCrossingInfo &Checker) { // We don't want the layout of coroutine frame to be affected // by debug information. So we only choose to salvage dbg.values for // whose value is already in the frame. @@ -535,10 +530,9 @@ void collectSpillsFromDbgInfo(SpillInfo &Spills, Function &F, /// Async and Retcon{Once} conventions assume that all spill uses can be sunk /// after the coro.begin intrinsic. -void sinkSpillUsesAfterCoroBegin(const DominatorTree &Dom, - CoroBeginInst *CoroBegin, - coro::SpillInfo &Spills, - SmallVectorImpl<coro::AllocaInfo> &Allocas) { +void coro::sinkSpillUsesAfterCoroBegin( + const DominatorTree &Dom, CoroBeginInst *CoroBegin, coro::SpillInfo &Spills, + SmallVectorImpl<coro::AllocaInfo> &Allocas) { SmallSetVector<Instruction *, 32> ToMove; SmallVector<Instruction *, 32> Worklist; @@ -582,8 +576,9 @@ void sinkSpillUsesAfterCoroBegin(const DominatorTree &Dom, Inst->moveBefore(InsertPt->getIterator()); } -BasicBlock::iterator getSpillInsertionPt(const coro::Shape &Shape, Value *Def, - const DominatorTree &DT) { +BasicBlock::iterator coro::getSpillInsertionPt(const coro::Shape &Shape, + Value *Def, + const DominatorTree &DT) { BasicBlock::iterator InsertPt; if (auto *Arg = dyn_cast<Argument>(Def)) { // For arguments, we will place the store instruction right after @@ -625,7 +620,3 @@ BasicBlock::iterator getSpillInsertionPt(const coro::Shape &Shape, Value *Def, return InsertPt; } - -} // End namespace coro. - -} // End namespace llvm. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 7071876..943c223 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -471,7 +471,6 @@ private: Value *simplifyNonNullOperand(Value *V, bool HasDereferenceable, unsigned Depth = 0); -public: /// Create `select C, S1, S2`. Use only when the profile cannot be calculated /// from existing profile metadata: if the Function has profiles, this will /// set the profile of this select to "unknown". @@ -484,6 +483,7 @@ public: return Sel; } +public: /// Create and insert the idiom we use to indicate a block is unreachable /// without having to rewrite the CFG from within InstCombine. void CreateNonTerminatorUnreachable(Instruction *InsertAt) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 63e24a0..a330bb7 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -110,8 +110,8 @@ static Value *simplifyShiftSelectingPackedElement(Instruction *I, ShrAmt->getName() + ".z"); // There is no existing !prof metadata we can derive the !prof metadata for // this select. - Value *Select = IC.createSelectInstWithUnknownProfile(ShrAmtZ, Lower, Upper); - IC.Builder.Insert(Select); + Value *Select = IC.Builder.CreateSelectWithUnknownProfile(ShrAmtZ, Lower, + Upper, DEBUG_TYPE); Select->takeName(I); return Select; } diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 82ac903..3f11cae 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1690,6 +1690,11 @@ Instruction *InstCombinerImpl::foldFBinOpOfIntCastsFromSign( // 2) (fp_binop ({s|u}itofp x), FpC) // -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC))) Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) { + // Don't perform the fold on vectors, as the integer operation may be much + // more expensive than the float operation in that case. + if (BO.getType()->isVectorTy()) + return nullptr; + std::array<Value *, 2> IntOps = {nullptr, nullptr}; Constant *Op1FpC = nullptr; // Check for: diff --git a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp index c86092b..a6ec6c1 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/MemoryProfileInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/StaticDataProfileInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" @@ -194,6 +195,30 @@ static bool isAllocationWithHotColdVariant(const Function *Callee, } } +static void HandleUnsupportedAnnotationKinds(GlobalVariable &GVar, + AnnotationKind Kind) { + assert(Kind != llvm::memprof::AnnotationKind::AnnotationOK && + "Should not handle AnnotationOK here"); + SmallString<32> Reason; + switch (Kind) { + case llvm::memprof::AnnotationKind::ExplicitSection: + ++NumOfMemProfExplicitSectionGlobalVars; + Reason.append("explicit section name"); + break; + case llvm::memprof::AnnotationKind::DeclForLinker: + Reason.append("linker declaration"); + break; + case llvm::memprof::AnnotationKind::ReservedName: + Reason.append("name starts with `llvm.`"); + break; + default: + llvm_unreachable("Unexpected annotation kind"); + } + LLVM_DEBUG(dbgs() << "Skip annotation for " << GVar.getName() << " due to " + << Reason << ".\n"); + return; +} + struct AllocMatchInfo { uint64_t TotalSize = 0; AllocationType AllocType = AllocationType::None; @@ -775,29 +800,13 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) { return PreservedAnalyses::none(); } -// Returns true iff the global variable has custom section either by -// __attribute__((section("name"))) -// (https://clang.llvm.org/docs/AttributeReference.html#section-declspec-allocate) -// or #pragma clang section directives -// (https://clang.llvm.org/docs/LanguageExtensions.html#specifying-section-names-for-global-objects-pragma-clang-section). -static bool hasExplicitSectionName(const GlobalVariable &GVar) { - if (GVar.hasSection()) - return true; - - auto Attrs = GVar.getAttributes(); - if (Attrs.hasAttribute("bss-section") || Attrs.hasAttribute("data-section") || - Attrs.hasAttribute("relro-section") || - Attrs.hasAttribute("rodata-section")) - return true; - return false; -} - bool MemProfUsePass::annotateGlobalVariables( Module &M, const memprof::DataAccessProfData *DataAccessProf) { if (!AnnotateStaticDataSectionPrefix || M.globals().empty()) return false; if (!DataAccessProf) { + M.addModuleFlag(Module::Warning, "EnableDataAccessProf", 0U); M.getContext().diagnose(DiagnosticInfoPGOProfile( MemoryProfileFileName.data(), StringRef("Data access profiles not found in memprof. Ignore " @@ -805,6 +814,7 @@ bool MemProfUsePass::annotateGlobalVariables( DS_Warning)); return false; } + M.addModuleFlag(Module::Warning, "EnableDataAccessProf", 1U); bool Changed = false; // Iterate all global variables in the module and annotate them based on @@ -815,13 +825,9 @@ bool MemProfUsePass::annotateGlobalVariables( for (GlobalVariable &GVar : M.globals()) { assert(!GVar.getSectionPrefix().has_value() && "GVar shouldn't have section prefix yet"); - if (GVar.isDeclarationForLinker()) - continue; - - if (hasExplicitSectionName(GVar)) { - ++NumOfMemProfExplicitSectionGlobalVars; - LLVM_DEBUG(dbgs() << "Global variable " << GVar.getName() - << " has explicit section name. Skip annotating.\n"); + auto Kind = llvm::memprof::getAnnotationKind(GVar); + if (Kind != llvm::memprof::AnnotationKind::AnnotationOK) { + HandleUnsupportedAnnotationKinds(GVar, Kind); continue; } @@ -831,7 +837,6 @@ bool MemProfUsePass::annotateGlobalVariables( // TODO: Track string content hash in the profiles and compute it inside the // compiler to categeorize the hotness string literals. if (Name.starts_with(".str")) { - LLVM_DEBUG(dbgs() << "Skip annotating string literal " << Name << "\n"); continue; } diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index 66a2c76..09db464 100644 --- a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -2626,7 +2626,7 @@ void ObjCARCOpt::OptimizeAutoreleasePools(Function &F) { case ARCInstKind::Call: if (!MayAutorelease(cast<CallBase>(Inst))) break; - LLVM_FALLTHROUGH; + [[fallthrough]]; case ARCInstKind::Autorelease: case ARCInstKind::AutoreleaseRV: case ARCInstKind::FusedRetainAutorelease: diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp index 56e0569..7cae94eb 100644 --- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp +++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp @@ -1295,6 +1295,24 @@ public: return commonAlignment(InitialAlign, ElementSizeInBits / 8); } + IntegerType *getIndexType(Value *Ptr) const { + return cast<IntegerType>(DL.getIndexType(Ptr->getType())); + } + + Value *getIndex(Value *Ptr, uint64_t V) const { + return ConstantInt::get(getIndexType(Ptr), V); + } + + Value *castToIndexType(Value *Ptr, Value *V, IRBuilder<> &Builder) const { + assert(isa<IntegerType>(V->getType()) && + "Attempted to cast non-integral type to integer index"); + // In case the data layout's index type differs in width from the type of + // the value we're given, truncate or zero extend to the appropriate width. + // We zero extend here as indices are unsigned. + return Builder.CreateZExtOrTrunc(V, getIndexType(Ptr), + V->getName() + ".cast"); + } + /// Load a matrix with \p Shape starting at \p Ptr and using \p Stride between /// vectors. MatrixTy loadMatrix(Type *Ty, Value *Ptr, MaybeAlign MAlign, Value *Stride, @@ -1304,6 +1322,7 @@ public: Type *VecTy = FixedVectorType::get(EltTy, Shape.getStride()); Value *EltPtr = Ptr; MatrixTy Result; + Stride = castToIndexType(Ptr, Stride, Builder); for (unsigned I = 0, E = Shape.getNumVectors(); I < E; ++I) { Value *GEP = computeVectorAddr( EltPtr, Builder.getIntN(Stride->getType()->getScalarSizeInBits(), I), @@ -1325,14 +1344,14 @@ public: ShapeInfo ResultShape, Type *EltTy, IRBuilder<> &Builder) { Value *Offset = Builder.CreateAdd( - Builder.CreateMul(J, Builder.getInt64(MatrixShape.getStride())), I); + Builder.CreateMul(J, getIndex(MatrixPtr, MatrixShape.getStride())), I); Value *TileStart = Builder.CreateGEP(EltTy, MatrixPtr, Offset); auto *TileTy = FixedVectorType::get(EltTy, ResultShape.NumRows * ResultShape.NumColumns); return loadMatrix(TileTy, TileStart, Align, - Builder.getInt64(MatrixShape.getStride()), IsVolatile, + getIndex(MatrixPtr, MatrixShape.getStride()), IsVolatile, ResultShape, Builder); } @@ -1363,14 +1382,15 @@ public: MaybeAlign MAlign, bool IsVolatile, ShapeInfo MatrixShape, Value *I, Value *J, Type *EltTy, IRBuilder<> &Builder) { Value *Offset = Builder.CreateAdd( - Builder.CreateMul(J, Builder.getInt64(MatrixShape.getStride())), I); + Builder.CreateMul(J, getIndex(MatrixPtr, MatrixShape.getStride())), I); Value *TileStart = Builder.CreateGEP(EltTy, MatrixPtr, Offset); auto *TileTy = FixedVectorType::get(EltTy, StoreVal.getNumRows() * StoreVal.getNumColumns()); storeMatrix(TileTy, StoreVal, TileStart, MAlign, - Builder.getInt64(MatrixShape.getStride()), IsVolatile, Builder); + getIndex(MatrixPtr, MatrixShape.getStride()), IsVolatile, + Builder); } /// Store matrix \p StoreVal starting at \p Ptr and using \p Stride between @@ -1380,6 +1400,7 @@ public: IRBuilder<> &Builder) { auto *VType = cast<FixedVectorType>(Ty); Value *EltPtr = Ptr; + Stride = castToIndexType(Ptr, Stride, Builder); for (auto Vec : enumerate(StoreVal.vectors())) { Value *GEP = computeVectorAddr( EltPtr, @@ -2011,18 +2032,17 @@ public: const unsigned TileM = std::min(M - K, unsigned(TileSize)); MatrixTy A = loadMatrix(APtr, LoadOp0->getAlign(), LoadOp0->isVolatile(), - LShape, Builder.getInt64(I), Builder.getInt64(K), + LShape, getIndex(APtr, I), getIndex(APtr, K), {TileR, TileM}, EltType, Builder); MatrixTy B = loadMatrix(BPtr, LoadOp1->getAlign(), LoadOp1->isVolatile(), - RShape, Builder.getInt64(K), Builder.getInt64(J), + RShape, getIndex(BPtr, K), getIndex(BPtr, J), {TileM, TileC}, EltType, Builder); emitMatrixMultiply(Res, A, B, Builder, true, false, getFastMathFlags(MatMul)); } storeMatrix(Res, CPtr, Store->getAlign(), Store->isVolatile(), {R, M}, - Builder.getInt64(I), Builder.getInt64(J), EltType, - Builder); + getIndex(CPtr, I), getIndex(CPtr, J), EltType, Builder); } } @@ -2254,15 +2274,14 @@ public: /// Lower load instructions. MatrixTy VisitLoad(LoadInst *Inst, const ShapeInfo &SI, Value *Ptr, IRBuilder<> &Builder) { - return LowerLoad(Inst, Ptr, Inst->getAlign(), - Builder.getInt64(SI.getStride()), Inst->isVolatile(), SI, - Builder); + return LowerLoad(Inst, Ptr, Inst->getAlign(), getIndex(Ptr, SI.getStride()), + Inst->isVolatile(), SI, Builder); } MatrixTy VisitStore(StoreInst *Inst, const ShapeInfo &SI, Value *StoredVal, Value *Ptr, IRBuilder<> &Builder) { return LowerStore(Inst, StoredVal, Ptr, Inst->getAlign(), - Builder.getInt64(SI.getStride()), Inst->isVolatile(), SI, + getIndex(Ptr, SI.getStride()), Inst->isVolatile(), SI, Builder); } diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp index b187208..3ce569f 100644 --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -44,7 +44,7 @@ using namespace llvm; STATISTIC(RemappedAtomMax, "Highest global NextAtomGroup (after mapping)"); void llvm::mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap) { - auto CurGroup = DL->getAtomGroup(); + uint64_t CurGroup = DL->getAtomGroup(); if (!CurGroup) return; @@ -62,21 +62,20 @@ void llvm::mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap) { RemappedAtomMax = std::max<uint64_t>(NewGroup, RemappedAtomMax); } -namespace { -void collectDebugInfoFromInstructions(const Function &F, - DebugInfoFinder &DIFinder) { +static void collectDebugInfoFromInstructions(const Function &F, + DebugInfoFinder &DIFinder) { const Module *M = F.getParent(); - if (M) { - // Inspect instructions to process e.g. DILexicalBlocks of inlined functions - for (const auto &I : instructions(F)) - DIFinder.processInstruction(*M, I); - } + if (!M) + return; + // Inspect instructions to process e.g. DILexicalBlocks of inlined functions + for (const Instruction &I : instructions(F)) + DIFinder.processInstruction(*M, I); } // Create a predicate that matches the metadata that should be identity mapped // during function cloning. -MetadataPredicate createIdentityMDPredicate(const Function &F, - CloneFunctionChangeType Changes) { +static MetadataPredicate +createIdentityMDPredicate(const Function &F, CloneFunctionChangeType Changes) { if (Changes >= CloneFunctionChangeType::DifferentModule) return [](const Metadata *MD) { return false; }; @@ -107,7 +106,6 @@ MetadataPredicate createIdentityMDPredicate(const Function &F, return false; }; } -} // namespace /// See comments in Cloning.h. BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, @@ -213,10 +211,9 @@ void llvm::CloneFunctionMetadataInto(Function &NewFunc, const Function &OldFunc, const MetadataPredicate *IdentityMD) { SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; OldFunc.getAllMetadata(MDs); - for (auto MD : MDs) { - NewFunc.addMetadata(MD.first, - *MapMetadata(MD.second, VMap, RemapFlag, TypeMapper, - Materializer, IdentityMD)); + for (const auto &[Kind, MD] : MDs) { + NewFunc.addMetadata(Kind, *MapMetadata(MD, VMap, RemapFlag, TypeMapper, + Materializer, IdentityMD)); } } @@ -235,7 +232,6 @@ void llvm::CloneFunctionBodyInto(Function &NewFunc, const Function &OldFunc, // appropriate. Note that we save BE this way in order to handle cloning of // recursive functions into themselves. for (const BasicBlock &BB : OldFunc) { - // Create a new basic block and copy instructions into it! BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, &NewFunc, CodeInfo); @@ -321,7 +317,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, // Cloning is always a Module level operation, since Metadata needs to be // cloned. - const auto RemapFlag = RF_None; + const RemapFlags RemapFlag = RF_None; CloneFunctionMetadataInto(*NewFunc, *OldFunc, VMap, RemapFlag, TypeMapper, Materializer, &IdentityMD); @@ -346,8 +342,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, // visiting the metadata attached to global values, which would allow this // code to be deleted. Alternatively, perhaps give responsibility for this // update to CloneFunctionInto's callers. - auto *NewModule = NewFunc->getParent(); - auto *NMD = NewModule->getOrInsertNamedMetadata("llvm.dbg.cu"); + Module *NewModule = NewFunc->getParent(); + NamedMDNode *NMD = NewModule->getOrInsertNamedMetadata("llvm.dbg.cu"); // Avoid multiple insertions of the same DICompileUnit to NMD. SmallPtrSet<const void *, 8> Visited(llvm::from_range, NMD->operands()); @@ -355,7 +351,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, // the function (e.g. as instructions' scope). DebugInfoFinder DIFinder; collectDebugInfoFromInstructions(*OldFunc, DIFinder); - for (auto *Unit : DIFinder.compile_units()) { + for (DICompileUnit *Unit : DIFinder.compile_units()) { MDNode *MappedUnit = MapMetadata(Unit, VMap, RF_None, TypeMapper, Materializer); if (Visited.insert(MappedUnit).second) @@ -821,17 +817,16 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, --PredCount[Pred]; // Figure out how many entries to remove from each PHI. - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - ++PredCount[PN->getIncomingBlock(i)]; + for (BasicBlock *Pred : PN->blocks()) + ++PredCount[Pred]; // At this point, the excess predecessor entries are positive in the // map. Loop over all of the PHIs and remove excess predecessor // entries. BasicBlock::iterator I = NewBB->begin(); for (; (PN = dyn_cast<PHINode>(I)); ++I) { - for (const auto &PCI : PredCount) { - BasicBlock *Pred = PCI.first; - for (unsigned NumToRemove = PCI.second; NumToRemove; --NumToRemove) + for (const auto &[Pred, Count] : PredCount) { + for (unsigned _ : llvm::seq<unsigned>(Count)) PN->removeIncomingValue(Pred, false); } } @@ -866,8 +861,8 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // As phi-nodes have been now remapped, allow incremental simplification of // newly-cloned instructions. const DataLayout &DL = NewFunc->getDataLayout(); - for (const auto &BB : *OldFunc) { - for (const auto &I : BB) { + for (const BasicBlock &BB : *OldFunc) { + for (const Instruction &I : BB) { auto *NewI = dyn_cast_or_null<Instruction>(VMap.lookup(&I)); if (!NewI) continue; @@ -997,8 +992,8 @@ void llvm::CloneAndPruneFunctionInto( void llvm::remapInstructionsInBlocks(ArrayRef<BasicBlock *> Blocks, ValueToValueMapTy &VMap) { // Rewrite the code to refer to itself. - for (auto *BB : Blocks) { - for (auto &Inst : *BB) { + for (BasicBlock *BB : Blocks) { + for (Instruction &Inst : *BB) { RemapDbgRecordRange(Inst.getModule(), Inst.getDbgRecordRange(), VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); RemapInstruction(&Inst, VMap, @@ -1151,9 +1146,9 @@ void llvm::cloneNoAliasScopes(ArrayRef<MDNode *> NoAliasDeclScopes, StringRef Ext, LLVMContext &Context) { MDBuilder MDB(Context); - for (auto *ScopeList : NoAliasDeclScopes) { - for (const auto &MDOperand : ScopeList->operands()) { - if (MDNode *MD = dyn_cast<MDNode>(MDOperand)) { + for (MDNode *ScopeList : NoAliasDeclScopes) { + for (const MDOperand &MDOp : ScopeList->operands()) { + if (MDNode *MD = dyn_cast<MDNode>(MDOp)) { AliasScopeNode SNANode(MD); std::string Name; @@ -1177,7 +1172,7 @@ void llvm::adaptNoAliasScopes(Instruction *I, auto CloneScopeList = [&](const MDNode *ScopeList) -> MDNode * { bool NeedsReplacement = false; SmallVector<Metadata *, 8> NewScopeList; - for (const auto &MDOp : ScopeList->operands()) { + for (const MDOperand &MDOp : ScopeList->operands()) { if (MDNode *MD = dyn_cast<MDNode>(MDOp)) { if (auto *NewMD = ClonedScopes.lookup(MD)) { NewScopeList.push_back(NewMD); @@ -1193,12 +1188,12 @@ void llvm::adaptNoAliasScopes(Instruction *I, }; if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(I)) - if (auto *NewScopeList = CloneScopeList(Decl->getScopeList())) + if (MDNode *NewScopeList = CloneScopeList(Decl->getScopeList())) Decl->setScopeList(NewScopeList); auto replaceWhenNeeded = [&](unsigned MD_ID) { if (const MDNode *CSNoAlias = I->getMetadata(MD_ID)) - if (auto *NewScopeList = CloneScopeList(CSNoAlias)) + if (MDNode *NewScopeList = CloneScopeList(CSNoAlias)) I->setMetadata(MD_ID, NewScopeList); }; replaceWhenNeeded(LLVMContext::MD_noalias); diff --git a/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp b/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp index d7bf791..fb39fdd 100644 --- a/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp +++ b/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp @@ -11,11 +11,11 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/SSAUpdaterBulk.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/IteratedDominanceFrontier.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/Use.h" #include "llvm/IR/Value.h" @@ -112,7 +112,7 @@ struct BBValueInfo { void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT, SmallVectorImpl<PHINode *> *InsertedPHIs) { DenseMap<BasicBlock *, BBValueInfo> BBInfos; - for (auto &R : Rewrites) { + for (RewriteInfo &R : Rewrites) { BBInfos.clear(); // Compute locations for new phi-nodes. @@ -145,7 +145,7 @@ void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT, BBInfos[BB].LiveOutValue = V; // We've computed IDF, now insert new phi-nodes there. - for (auto *FrontierBB : IDFBlocks) { + for (BasicBlock *FrontierBB : IDFBlocks) { IRBuilder<> B(FrontierBB, FrontierBB->begin()); PHINode *PN = B.CreatePHI(R.Ty, 0, R.Name); BBInfos[FrontierBB].LiveInValue = PN; @@ -156,7 +156,7 @@ void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT, // IsLiveOut indicates whether we are computing live-out values (true) or // live-in values (false). auto ComputeValue = [&](BasicBlock *BB, bool IsLiveOut) -> Value * { - auto *BBInfo = &BBInfos[BB]; + BBValueInfo *BBInfo = &BBInfos[BB]; if (IsLiveOut && BBInfo->LiveOutValue) return BBInfo->LiveOutValue; @@ -187,7 +187,7 @@ void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT, if (!V) V = UndefValue::get(R.Ty); - for (auto *BBInfo : Stack) + for (BBValueInfo *BBInfo : Stack) // Loop above can insert new entries into the BBInfos map: assume the // map shouldn't grow due to [1] and BBInfo references are valid. BBInfo->LiveInValue = V; @@ -196,7 +196,7 @@ void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT, }; // Fill in arguments of the inserted PHIs. - for (auto *BB : IDFBlocks) { + for (BasicBlock *BB : IDFBlocks) { auto *PHI = cast<PHINode>(&BB->front()); for (BasicBlock *Pred : PredCache.get(BB)) PHI->addIncoming(ComputeValue(Pred, /*IsLiveOut=*/true), Pred); @@ -222,3 +222,96 @@ void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT, } } } + +// Perform a single pass of simplification over the worklist of PHIs. +// This should be called after RewriteAllUses() because simplifying PHIs +// immediately after creation would require updating all references to those +// PHIs in the BBValueInfo structures, which would necessitate additional +// reference tracking overhead. +static void simplifyPass(MutableArrayRef<PHINode *> Worklist, + const DataLayout &DL) { + for (PHINode *&PHI : Worklist) { + if (Value *Simplified = simplifyInstruction(PHI, DL)) { + PHI->replaceAllUsesWith(Simplified); + PHI->eraseFromParent(); + PHI = nullptr; // Mark as removed. + } + } +} + +#ifndef NDEBUG // Should this be under EXPENSIVE_CHECKS? +// New PHI nodes should not reference one another but they may reference +// themselves or existing PHI nodes, and existing PHI nodes may reference new +// PHI nodes. +static bool +PHIAreRefEachOther(const iterator_range<BasicBlock::phi_iterator> NewPHIs) { + SmallPtrSet<PHINode *, 8> NewPHISet; + for (PHINode &PN : NewPHIs) + NewPHISet.insert(&PN); + for (PHINode &PHI : NewPHIs) { + for (Value *V : PHI.incoming_values()) { + PHINode *IncPHI = dyn_cast<PHINode>(V); + if (IncPHI && IncPHI != &PHI && NewPHISet.contains(IncPHI)) + return true; + } + } + return false; +} +#endif + +static bool replaceIfIdentical(PHINode &PHI, PHINode &ReplPHI) { + if (!PHI.isIdenticalToWhenDefined(&ReplPHI)) + return false; + PHI.replaceAllUsesWith(&ReplPHI); + PHI.eraseFromParent(); + return true; +} + +bool EliminateNewDuplicatePHINodes(BasicBlock *BB, + BasicBlock::phi_iterator FirstExistingPN) { + assert(!PHIAreRefEachOther(make_range(BB->phis().begin(), FirstExistingPN))); + + // Deduplicate new PHIs first to reduce the number of comparisons on the + // following new -> existing pass. + bool Changed = false; + for (auto I = BB->phis().begin(); I != FirstExistingPN; ++I) { + for (auto J = std::next(I); J != FirstExistingPN;) { + Changed |= replaceIfIdentical(*J++, *I); + } + } + + // Iterate over existing PHIs and replace identical new PHIs. + for (PHINode &ExistingPHI : make_range(FirstExistingPN, BB->phis().end())) { + auto I = BB->phis().begin(); + assert(I != FirstExistingPN); // Should be at least one new PHI. + do { + Changed |= replaceIfIdentical(*I++, ExistingPHI); + } while (I != FirstExistingPN); + if (BB->phis().begin() == FirstExistingPN) + return Changed; + } + return Changed; +} + +static void deduplicatePass(ArrayRef<PHINode *> Worklist) { + SmallDenseMap<BasicBlock *, unsigned> BBs; + for (PHINode *PHI : Worklist) { + if (PHI) + ++BBs[PHI->getParent()]; + } + + for (auto [BB, NumNewPHIs] : BBs) { + auto FirstExistingPN = std::next(BB->phis().begin(), NumNewPHIs); + EliminateNewDuplicatePHINodes(BB, FirstExistingPN); + } +} + +void SSAUpdaterBulk::RewriteAndOptimizeAllUses(DominatorTree &DT) { + SmallVector<PHINode *, 4> PHIs; + RewriteAllUses(&DT, &PHIs); + if (PHIs.empty()) + return; + + simplifyPass(PHIs, PHIs.front()->getParent()->getDataLayout()); + deduplicatePass(PHIs); +} diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index a6f4bec..88af2cf 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -10659,7 +10659,8 @@ class InstructionsCompatibilityAnalysis { static bool isSupportedOpcode(const unsigned Opcode) { return Opcode == Instruction::Add || Opcode == Instruction::LShr || Opcode == Instruction::Shl || Opcode == Instruction::SDiv || - Opcode == Instruction::UDiv; + Opcode == Instruction::UDiv || Opcode == Instruction::And || + Opcode == Instruction::Or || Opcode == Instruction::Xor; } /// Identifies the best candidate value, which represents main opcode @@ -10984,6 +10985,9 @@ public: case Instruction::Shl: case Instruction::SDiv: case Instruction::UDiv: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: VectorCost = TTI.getArithmeticInstrCost(MainOpcode, VecTy, Kind); break; default: @@ -19456,7 +19460,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } assert(getNumElements(Cond->getType()) == TrueNumElements && "Cannot vectorize Instruction::Select"); - Value *V = Builder.CreateSelect(Cond, True, False); + Value *V = + Builder.CreateSelectWithUnknownProfile(Cond, True, False, DEBUG_TYPE); V = FinalShuffle(V, E); E->VectorizedValue = V; @@ -23576,18 +23581,19 @@ class HorizontalReduction { switch (Kind) { case RecurKind::Or: { if (UseSelect && OpTy == CmpInst::makeCmpResultType(OpTy)) - return Builder.CreateSelect( + return Builder.CreateSelectWithUnknownProfile( LHS, ConstantInt::getAllOnesValue(CmpInst::makeCmpResultType(OpTy)), - RHS, Name); + RHS, DEBUG_TYPE, Name); unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(Kind); return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS, Name); } case RecurKind::And: { if (UseSelect && OpTy == CmpInst::makeCmpResultType(OpTy)) - return Builder.CreateSelect( + return Builder.CreateSelectWithUnknownProfile( LHS, RHS, - ConstantInt::getNullValue(CmpInst::makeCmpResultType(OpTy)), Name); + ConstantInt::getNullValue(CmpInst::makeCmpResultType(OpTy)), + DEBUG_TYPE, Name); unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(Kind); return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS, Name); @@ -23608,7 +23614,8 @@ class HorizontalReduction { if (UseSelect) { CmpInst::Predicate Pred = llvm::getMinMaxReductionPredicate(Kind); Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS, Name); - return Builder.CreateSelect(Cmp, LHS, RHS, Name); + return Builder.CreateSelectWithUnknownProfile(Cmp, LHS, RHS, DEBUG_TYPE, + Name); } [[fallthrough]]; case RecurKind::FMax: diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 1fea068..0101942 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -635,9 +635,9 @@ static bool hasConditionalTerminator(const VPBasicBlock *VPBB) { const VPRecipeBase *R = &VPBB->back(); bool IsSwitch = isa<VPInstruction>(R) && cast<VPInstruction>(R)->getOpcode() == Instruction::Switch; - bool IsCondBranch = isa<VPBranchOnMaskRecipe>(R) || - match(R, m_BranchOnCond(m_VPValue())) || - match(R, m_BranchOnCount(m_VPValue(), m_VPValue())); + bool IsCondBranch = + isa<VPBranchOnMaskRecipe>(R) || + match(R, m_CombineOr(m_BranchOnCond(), m_BranchOnCount())); (void)IsCondBranch; (void)IsSwitch; if (VPBB->getNumSuccessors() == 2 || diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index fb696be..8ca3bed 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1064,6 +1064,7 @@ public: ResumeForEpilogue, /// Returns the value for vscale. VScale, + OpsEnd = VScale, }; /// Returns true if this VPInstruction generates scalar values for all lanes. diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 81deba2..c0147ce 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -433,8 +433,7 @@ static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB, // We are about to replace the branch to exit the region. Remove the original // BranchOnCond, if there is any. DebugLoc LatchDL = DL; - if (!LatchVPBB->empty() && - match(&LatchVPBB->back(), m_BranchOnCond(m_VPValue()))) { + if (!LatchVPBB->empty() && match(&LatchVPBB->back(), m_BranchOnCond())) { LatchDL = LatchVPBB->getTerminator()->getDebugLoc(); LatchVPBB->getTerminator()->eraseFromParent(); } @@ -480,8 +479,7 @@ static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB) { static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL, PredicatedScalarEvolution &PSE, Loop *TheLoop) { - VPDominatorTree VPDT; - VPDT.recalculate(Plan); + VPDominatorTree VPDT(Plan); auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor()); canonicalHeaderAndLatch(HeaderVPBB, VPDT); @@ -623,8 +621,7 @@ void VPlanTransforms::addMiddleCheck(VPlan &Plan, } void VPlanTransforms::createLoopRegions(VPlan &Plan) { - VPDominatorTree VPDT; - VPDT.recalculate(Plan); + VPDominatorTree VPDT(Plan); for (VPBlockBase *HeaderVPB : vp_post_order_shallow(Plan.getEntry())) if (canonicalHeaderAndLatch(HeaderVPB, VPDT)) createLoopRegion(Plan, HeaderVPB); @@ -875,8 +872,7 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) { Plan.getVectorLoopRegion()->getEntryBasicBlock())) { auto *VPBB = cast<VPBasicBlock>(VPB); for (auto &R : *VPBB) { - if (R.mayWriteToMemory() && - !match(&R, m_BranchOnCount(m_VPValue(), m_VPValue()))) + if (R.mayWriteToMemory() && !match(&R, m_BranchOnCount())) return false; } } diff --git a/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h b/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h index 577432f..44506f5a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h +++ b/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h @@ -39,7 +39,6 @@ class VPDominatorTree : public DominatorTreeBase<VPBlockBase, false> { using Base = DominatorTreeBase<VPBlockBase, false>; public: - VPDominatorTree() = default; explicit VPDominatorTree(VPlan &Plan) { recalculate(Plan); } /// Returns true if \p A properly dominates \p B. diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index 555efea..b42b049 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -344,6 +344,10 @@ m_Freeze(const Op0_t &Op0) { return m_VPInstruction<Instruction::Freeze>(Op0); } +inline VPInstruction_match<VPInstruction::BranchOnCond> m_BranchOnCond() { + return m_VPInstruction<VPInstruction::BranchOnCond>(); +} + template <typename Op0_t> inline VPInstruction_match<VPInstruction::BranchOnCond, Op0_t> m_BranchOnCond(const Op0_t &Op0) { @@ -374,6 +378,10 @@ m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) { return m_VPInstruction<VPInstruction::ActiveLaneMask>(Op0, Op1, Op2); } +inline VPInstruction_match<VPInstruction::BranchOnCount> m_BranchOnCount() { + return m_VPInstruction<VPInstruction::BranchOnCount>(); +} + template <typename Op0_t, typename Op1_t> inline VPInstruction_match<VPInstruction::BranchOnCount, Op0_t, Op1_t> m_BranchOnCount(const Op0_t &Op0, const Op1_t &Op1) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 8e916772..2368d18 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1154,7 +1154,7 @@ InstructionCost VPInstruction::computeCost(ElementCount VF, case VPInstruction::ExtractPenultimateElement: if (VF == ElementCount::getScalable(1)) return InstructionCost::getInvalid(); - LLVM_FALLTHROUGH; + [[fallthrough]]; default: // TODO: Compute cost other VPInstructions once the legacy cost model has // been retired. @@ -2855,7 +2855,7 @@ InstructionCost VPExpressionRecipe::computeCost(ElementCount VF, case ExpressionTypes::ExtNegatedMulAccReduction: assert(Opcode == Instruction::Add && "Unexpected opcode"); Opcode = Instruction::Sub; - LLVM_FALLTHROUGH; + [[fallthrough]]; case ExpressionTypes::ExtMulAccReduction: { return Ctx.TTI.getMulAccReductionCost( cast<VPWidenCastRecipe>(ExpressionRecipes.front())->getOpcode() == diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 9bb8820..40b7e8d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1658,7 +1658,7 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF, auto *Term = &ExitingVPBB->back(); VPValue *Cond; ScalarEvolution &SE = *PSE.getSE(); - if (match(Term, m_BranchOnCount(m_VPValue(), m_VPValue())) || + if (match(Term, m_BranchOnCount()) || match(Term, m_BranchOnCond(m_Not(m_ActiveLaneMask( m_VPValue(), m_VPValue(), m_VPValue()))))) { // Try to simplify the branch condition if TC <= VF * UF when the latch @@ -1909,8 +1909,7 @@ static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR, bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &LoopBuilder) { - VPDominatorTree VPDT; - VPDT.recalculate(Plan); + VPDominatorTree VPDT(Plan); SmallVector<VPFirstOrderRecurrencePHIRecipe *> RecurrencePhis; for (VPRecipeBase &R : @@ -1992,6 +1991,13 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> { .Case<VPWidenIntrinsicRecipe>([](auto *I) { return std::make_pair(true, I->getVectorIntrinsicID()); }) + .Case<VPVectorPointerRecipe>([](auto *I) { + // For recipes that do not directly map to LLVM IR instructions, + // assign opcodes after the last VPInstruction opcode (which is also + // after the last IR Instruction opcode), based on the VPDefID. + return std::make_pair(false, + VPInstruction::OpsEnd + 1 + I->getVPDefID()); + }) .Default([](auto *) { return std::nullopt; }); } @@ -2015,11 +2021,8 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> { static bool canHandle(const VPSingleDefRecipe *Def) { // We can extend the list of handled recipes in the future, // provided we account for the data embedded in them while checking for - // equality or hashing. We assign VPVectorEndPointerRecipe the GEP opcode, - // as it is essentially a GEP with different semantics. - auto C = isa<VPVectorPointerRecipe>(Def) - ? std::make_pair(false, Instruction::GetElementPtr) - : getOpcodeOrIntrinsicID(Def); + // equality or hashing. + auto C = getOpcodeOrIntrinsicID(Def); // The issue with (Insert|Extract)Value is that the index of the // insert/extract is not a proper operand in LLVM IR, and hence also not in @@ -2058,6 +2061,8 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> { vputils::isSingleScalar(L) != vputils::isSingleScalar(R) || !equal(L->operands(), R->operands())) return false; + assert(getOpcodeOrIntrinsicID(L) && getOpcodeOrIntrinsicID(R) && + "must have valid opcode info for both recipes"); if (auto *LFlags = dyn_cast<VPRecipeWithIRFlags>(L)) if (LFlags->hasPredicate() && LFlags->getPredicate() != @@ -3021,8 +3026,7 @@ void VPlanTransforms::createInterleaveGroups( // Interleave memory: for each Interleave Group we marked earlier as relevant // for this VPlan, replace the Recipes widening its memory instructions with a // single VPInterleaveRecipe at its insertion point. - VPDominatorTree VPDT; - VPDT.recalculate(Plan); + VPDominatorTree VPDT(Plan); for (const auto *IG : InterleaveGroups) { auto *Start = cast<VPWidenMemoryRecipe>(RecipeBuilder.getRecipe(IG->getMember(0))); @@ -3398,9 +3402,8 @@ void VPlanTransforms::handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB, VPBuilder Builder(LatchVPBB->getTerminator()); VPBlockBase *TrueSucc = EarlyExitingVPBB->getSuccessors()[0]; - assert( - match(EarlyExitingVPBB->getTerminator(), m_BranchOnCond(m_VPValue())) && - "Terminator must be be BranchOnCond"); + assert(match(EarlyExitingVPBB->getTerminator(), m_BranchOnCond()) && + "Terminator must be be BranchOnCond"); VPValue *CondOfEarlyExitingVPBB = EarlyExitingVPBB->getTerminator()->getOperand(0); auto *CondToEarlyExit = TrueSucc == EarlyExitVPBB @@ -3662,8 +3665,7 @@ void VPlanTransforms::materializeBroadcasts(VPlan &Plan) { return; #ifndef NDEBUG - VPDominatorTree VPDT; - VPDT.recalculate(Plan); + VPDominatorTree VPDT(Plan); #endif SmallVector<VPValue *> VPValues; @@ -4009,8 +4011,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF, unsigned VFMinVal = VF.getKnownMinValue(); SmallVector<VPInterleaveRecipe *> StoreGroups; for (auto &R : *VectorLoop->getEntryBasicBlock()) { - if (isa<VPCanonicalIVPHIRecipe>(&R) || - match(&R, m_BranchOnCount(m_VPValue(), m_VPValue()))) + if (isa<VPCanonicalIVPHIRecipe>(&R) || match(&R, m_BranchOnCount())) continue; if (isa<VPDerivedIVRecipe, VPScalarIVStepsRecipe>(&R) && diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index 5e7f19f..1c4adfc 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -259,8 +259,7 @@ void UnrollState::unrollHeaderPHIByUF(VPHeaderPHIRecipe *R, /// Handle non-header-phi recipes. void UnrollState::unrollRecipeByUF(VPRecipeBase &R) { - if (match(&R, m_BranchOnCond(m_VPValue())) || - match(&R, m_BranchOnCount(m_VPValue(), m_VPValue()))) + if (match(&R, m_CombineOr(m_BranchOnCond(), m_BranchOnCount()))) return; if (auto *VPI = dyn_cast<VPInstruction>(&R)) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 013ea2e..5262af6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -24,6 +24,7 @@ #define DEBUG_TYPE "loop-vectorize" using namespace llvm; +using namespace VPlanPatternMatch; namespace { class VPlanVerifier { @@ -198,7 +199,6 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const { } // EVLIVIncrement is only used by EVLIV & BranchOnCount. // Having more than two users is unexpected. - using namespace llvm::VPlanPatternMatch; if (I->getOpcode() != VPInstruction::Broadcast && I->getNumUsers() != 1 && (I->getNumUsers() != 2 || @@ -479,8 +479,7 @@ bool VPlanVerifier::verify(const VPlan &Plan) { } auto *LastInst = dyn_cast<VPInstruction>(std::prev(Exiting->end())); - if (!LastInst || (LastInst->getOpcode() != VPInstruction::BranchOnCount && - LastInst->getOpcode() != VPInstruction::BranchOnCond)) { + if (!match(LastInst, m_CombineOr(m_BranchOnCond(), m_BranchOnCount()))) { errs() << "VPlan vector loop exit must end with BranchOnCount or " "BranchOnCond VPInstruction\n"; return false; @@ -490,8 +489,7 @@ bool VPlanVerifier::verify(const VPlan &Plan) { } bool llvm::verifyVPlanIsValid(const VPlan &Plan, bool VerifyLate) { - VPDominatorTree VPDT; - VPDT.recalculate(const_cast<VPlan &>(Plan)); + VPDominatorTree VPDT(const_cast<VPlan &>(Plan)); VPTypeAnalysis TypeInfo(Plan); VPlanVerifier Verifier(VPDT, TypeInfo, VerifyLate); return Verifier.verify(Plan); diff --git a/llvm/lib/XRay/BlockIndexer.cpp b/llvm/lib/XRay/BlockIndexer.cpp index f4ba0eb..d0c6853 100644 --- a/llvm/lib/XRay/BlockIndexer.cpp +++ b/llvm/lib/XRay/BlockIndexer.cpp @@ -12,8 +12,8 @@ //===----------------------------------------------------------------------===// #include "llvm/XRay/BlockIndexer.h" -namespace llvm { -namespace xray { +using namespace llvm; +using namespace llvm::xray; Error BlockIndexer::visit(BufferExtents &) { return Error::success(); } @@ -89,6 +89,3 @@ Error BlockIndexer::flush() { CurrentBlock.WallclockTime = nullptr; return Error::success(); } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/BlockPrinter.cpp b/llvm/lib/XRay/BlockPrinter.cpp index 63a60c3..d85be5b 100644 --- a/llvm/lib/XRay/BlockPrinter.cpp +++ b/llvm/lib/XRay/BlockPrinter.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/XRay/BlockPrinter.h" -namespace llvm { -namespace xray { +using namespace llvm; +using namespace llvm::xray; Error BlockPrinter::visit(BufferExtents &R) { OS << "\n[New Block]\n"; @@ -108,6 +108,3 @@ Error BlockPrinter::visit(EndBufferRecord &R) { auto E = RP.visit(R); return E; } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/BlockVerifier.cpp b/llvm/lib/XRay/BlockVerifier.cpp index 99f255e..e39f6b6 100644 --- a/llvm/lib/XRay/BlockVerifier.cpp +++ b/llvm/lib/XRay/BlockVerifier.cpp @@ -10,19 +10,18 @@ #include <bitset> -namespace llvm { -namespace xray { -namespace { +using namespace llvm; +using namespace llvm::xray; -constexpr unsigned long long mask(BlockVerifier::State S) { +static constexpr unsigned long long mask(BlockVerifier::State S) { return 1uLL << static_cast<std::size_t>(S); } -constexpr std::size_t number(BlockVerifier::State S) { +static constexpr std::size_t number(BlockVerifier::State S) { return static_cast<std::size_t>(S); } -StringRef recordToString(BlockVerifier::State R) { +static StringRef recordToString(BlockVerifier::State R) { switch (R) { case BlockVerifier::State::BufferExtents: return "BufferExtents"; @@ -53,6 +52,8 @@ StringRef recordToString(BlockVerifier::State R) { llvm_unreachable("Unkown state!"); } +namespace { + struct Transition { BlockVerifier::State From; std::bitset<number(BlockVerifier::State::StateMax)> ToStates; @@ -133,7 +134,7 @@ Error BlockVerifier::transition(State To) { CurrentRecord = To; return Error::success(); -} // namespace xray +} Error BlockVerifier::visit(BufferExtents &) { return transition(State::BufferExtents); @@ -201,6 +202,3 @@ Error BlockVerifier::verify() { } void BlockVerifier::reset() { CurrentRecord = State::Unknown; } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/FDRRecordProducer.cpp b/llvm/lib/XRay/FDRRecordProducer.cpp index 479b710..0f4eed1 100644 --- a/llvm/lib/XRay/FDRRecordProducer.cpp +++ b/llvm/lib/XRay/FDRRecordProducer.cpp @@ -10,8 +10,8 @@ #include <cstdint> -namespace llvm { -namespace xray { +using namespace llvm; +using namespace llvm::xray; namespace { @@ -31,8 +31,9 @@ enum MetadataRecordKinds : uint8_t { // This is an end marker, used to identify the upper bound for this enum. EnumEndMarker, }; +} // namespace -Expected<std::unique_ptr<Record>> +static Expected<std::unique_ptr<Record>> metadataRecordType(const XRayFileHeader &Header, uint8_t T) { if (T >= static_cast<uint8_t>(MetadataRecordKinds::EnumEndMarker)) @@ -72,12 +73,10 @@ metadataRecordType(const XRayFileHeader &Header, uint8_t T) { llvm_unreachable("Unhandled MetadataRecordKinds enum value"); } -constexpr bool isMetadataIntroducer(uint8_t FirstByte) { +static constexpr bool isMetadataIntroducer(uint8_t FirstByte) { return FirstByte & 0x01u; } -} // namespace - Expected<std::unique_ptr<Record>> FileBasedRecordProducer::findNextBufferExtent() { // We seek one byte at a time until we find a suitable buffer extents metadata @@ -193,6 +192,3 @@ Expected<std::unique_ptr<Record>> FileBasedRecordProducer::produce() { assert(R != nullptr); return std::move(R); } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/FDRRecords.cpp b/llvm/lib/XRay/FDRRecords.cpp index ff315d3..a18f733 100644 --- a/llvm/lib/XRay/FDRRecords.cpp +++ b/llvm/lib/XRay/FDRRecords.cpp @@ -12,8 +12,8 @@ //===----------------------------------------------------------------------===// #include "llvm/XRay/FDRRecords.h" -namespace llvm { -namespace xray { +using namespace llvm; +using namespace llvm::xray; Error BufferExtents::apply(RecordVisitor &V) { return V.visit(*this); } Error WallclockRecord::apply(RecordVisitor &V) { return V.visit(*this); } @@ -61,6 +61,3 @@ StringRef Record::kindToString(RecordKind K) { } return "Unknown"; } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/FDRTraceExpander.cpp b/llvm/lib/XRay/FDRTraceExpander.cpp index b68e997..991e6e5 100644 --- a/llvm/lib/XRay/FDRTraceExpander.cpp +++ b/llvm/lib/XRay/FDRTraceExpander.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/XRay/FDRTraceExpander.h" -namespace llvm { -namespace xray { +using namespace llvm; +using namespace llvm::xray; void TraceExpander::resetCurrentRecord() { if (BuildingRecord) @@ -126,6 +126,3 @@ Error TraceExpander::flush() { resetCurrentRecord(); return Error::success(); } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/FDRTraceWriter.cpp b/llvm/lib/XRay/FDRTraceWriter.cpp index fb59125..3e320a6 100644 --- a/llvm/lib/XRay/FDRTraceWriter.cpp +++ b/llvm/lib/XRay/FDRTraceWriter.cpp @@ -12,8 +12,8 @@ #include "llvm/XRay/FDRTraceWriter.h" #include <tuple> -namespace llvm { -namespace xray { +using namespace llvm; +using namespace llvm::xray; namespace { @@ -37,9 +37,10 @@ template <size_t Index> struct IndexedWriter { return 0; } }; +} // namespace template <uint8_t Kind, class... Values> -Error writeMetadata(support::endian::Writer &OS, Values &&... Ds) { +static Error writeMetadata(support::endian::Writer &OS, Values &&...Ds) { // The first bit in the first byte of metadata records is always set to 1, so // we ensure this is the case when we write out the first byte of the record. uint8_t FirstByte = (static_cast<uint8_t>(Kind) << 1) | uint8_t{0x01u}; @@ -54,8 +55,6 @@ Error writeMetadata(support::endian::Writer &OS, Values &&... Ds) { return Error::success(); } -} // namespace - FDRTraceWriter::FDRTraceWriter(raw_ostream &O, const XRayFileHeader &H) : OS(O, llvm::endianness::native) { // We need to re-construct a header, by writing the fields we care about for @@ -146,6 +145,3 @@ Error FDRTraceWriter::visit(FunctionRecord &R) { OS.write(R.delta()); return Error::success(); } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/FileHeaderReader.cpp b/llvm/lib/XRay/FileHeaderReader.cpp index 6b6daf9..681cef7 100644 --- a/llvm/lib/XRay/FileHeaderReader.cpp +++ b/llvm/lib/XRay/FileHeaderReader.cpp @@ -7,12 +7,13 @@ //===----------------------------------------------------------------------===// #include "llvm/XRay/FileHeaderReader.h" -namespace llvm { -namespace xray { +using namespace llvm; +using namespace llvm::xray; // Populates the FileHeader reference by reading the first 32 bytes of the file. -Expected<XRayFileHeader> readBinaryFormatHeader(DataExtractor &HeaderExtractor, - uint64_t &OffsetPtr) { +Expected<XRayFileHeader> +xray::readBinaryFormatHeader(DataExtractor &HeaderExtractor, + uint64_t &OffsetPtr) { // FIXME: Maybe deduce whether the data is little or big-endian using some // magic bytes in the beginning of the file? @@ -68,6 +69,3 @@ Expected<XRayFileHeader> readBinaryFormatHeader(DataExtractor &HeaderExtractor, OffsetPtr += 16; return std::move(FileHeader); } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/LogBuilderConsumer.cpp b/llvm/lib/XRay/LogBuilderConsumer.cpp index ffb49f9..f0fc336 100644 --- a/llvm/lib/XRay/LogBuilderConsumer.cpp +++ b/llvm/lib/XRay/LogBuilderConsumer.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/XRay/FDRRecordConsumer.h" -namespace llvm { -namespace xray { +using namespace llvm; +using namespace llvm::xray; Error LogBuilderConsumer::consume(std::unique_ptr<Record> R) { if (!R) @@ -32,6 +32,3 @@ Error PipelineConsumer::consume(std::unique_ptr<Record> R) { Result = joinErrors(std::move(Result), R->apply(*V)); return Result; } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/Profile.cpp b/llvm/lib/XRay/Profile.cpp index 1b340e5..ecb767b 100644 --- a/llvm/lib/XRay/Profile.cpp +++ b/llvm/lib/XRay/Profile.cpp @@ -18,8 +18,8 @@ #include "llvm/XRay/Trace.h" #include <memory> -namespace llvm { -namespace xray { +using namespace llvm; +using namespace llvm::xray; Profile::Profile(const Profile &O) { // We need to re-create all the tries from the original (O), into the current @@ -46,6 +46,7 @@ struct BlockHeader { uint32_t Number; uint64_t Thread; }; +} // namespace static Expected<BlockHeader> readBlockHeader(DataExtractor &Extractor, uint64_t &Offset) { @@ -115,8 +116,6 @@ static Expected<Profile::Data> readData(DataExtractor &Extractor, return D; } -} // namespace - Error Profile::addBlock(Block &&B) { if (B.PathData.empty()) return make_error<StringError>( @@ -189,7 +188,7 @@ Profile::PathID Profile::internPath(ArrayRef<FuncID> P) { return Node->ID; } -Profile mergeProfilesByThread(const Profile &L, const Profile &R) { +Profile xray::mergeProfilesByThread(const Profile &L, const Profile &R) { Profile Merged; using PathDataMap = DenseMap<Profile::PathID, Profile::Data>; using PathDataMapPtr = std::unique_ptr<PathDataMap>; @@ -228,7 +227,7 @@ Profile mergeProfilesByThread(const Profile &L, const Profile &R) { return Merged; } -Profile mergeProfilesByStack(const Profile &L, const Profile &R) { +Profile xray::mergeProfilesByStack(const Profile &L, const Profile &R) { Profile Merged; using PathDataMap = DenseMap<Profile::PathID, Profile::Data>; PathDataMap PathData; @@ -258,7 +257,7 @@ Profile mergeProfilesByStack(const Profile &L, const Profile &R) { return Merged; } -Expected<Profile> loadProfile(StringRef Filename) { +Expected<Profile> xray::loadProfile(StringRef Filename) { Expected<sys::fs::file_t> FdOrErr = sys::fs::openNativeFileForRead(Filename); if (!FdOrErr) return FdOrErr.takeError(); @@ -322,7 +321,7 @@ struct StackEntry { } // namespace -Expected<Profile> profileFromTrace(const Trace &T) { +Expected<Profile> xray::profileFromTrace(const Trace &T) { Profile P; // The implementation of the algorithm re-creates the execution of @@ -397,6 +396,3 @@ Expected<Profile> profileFromTrace(const Trace &T) { return P; } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/RecordInitializer.cpp b/llvm/lib/XRay/RecordInitializer.cpp index 68ab3db..83d5f14 100644 --- a/llvm/lib/XRay/RecordInitializer.cpp +++ b/llvm/lib/XRay/RecordInitializer.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/XRay/FDRRecords.h" -namespace llvm { -namespace xray { +using namespace llvm; +using namespace llvm::xray; Error RecordInitializer::visit(BufferExtents &R) { if (!E.isValidOffsetForDataOfSize(OffsetPtr, sizeof(uint64_t))) @@ -426,6 +426,3 @@ Error RecordInitializer::visit(FunctionRecord &R) { assert(FunctionRecord::kFunctionRecordSize == (OffsetPtr - BeginOffset)); return Error::success(); } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/RecordPrinter.cpp b/llvm/lib/XRay/RecordPrinter.cpp index 32d4210..b9b7a16 100644 --- a/llvm/lib/XRay/RecordPrinter.cpp +++ b/llvm/lib/XRay/RecordPrinter.cpp @@ -9,8 +9,8 @@ #include "llvm/Support/FormatVariadic.h" -namespace llvm { -namespace xray { +using namespace llvm; +using namespace llvm::xray; Error RecordPrinter::visit(BufferExtents &R) { OS << formatv("<Buffer: size = {0} bytes>", R.size()) << Delim; @@ -103,6 +103,3 @@ Error RecordPrinter::visit(FunctionRecord &R) { OS << Delim; return Error::success(); } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/Trace.cpp b/llvm/lib/XRay/Trace.cpp index 74515b1..14a3f01 100644 --- a/llvm/lib/XRay/Trace.cpp +++ b/llvm/lib/XRay/Trace.cpp @@ -29,11 +29,9 @@ using namespace llvm; using namespace llvm::xray; using llvm::yaml::Input; -namespace { - -Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian, - XRayFileHeader &FileHeader, - std::vector<XRayRecord> &Records) { +static Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian, + XRayFileHeader &FileHeader, + std::vector<XRayRecord> &Records) { if (Data.size() < 32) return make_error<StringError>( "Not enough bytes for an XRay log.", @@ -265,8 +263,9 @@ Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian, /// what FunctionRecord instances use, and we no longer need to include the CPU /// id in the CustomEventRecord. /// -Error loadFDRLog(StringRef Data, bool IsLittleEndian, - XRayFileHeader &FileHeader, std::vector<XRayRecord> &Records) { +static Error loadFDRLog(StringRef Data, bool IsLittleEndian, + XRayFileHeader &FileHeader, + std::vector<XRayRecord> &Records) { if (Data.size() < 32) return createStringError(std::make_error_code(std::errc::invalid_argument), @@ -348,8 +347,8 @@ Error loadFDRLog(StringRef Data, bool IsLittleEndian, return Error::success(); } -Error loadYAMLLog(StringRef Data, XRayFileHeader &FileHeader, - std::vector<XRayRecord> &Records) { +static Error loadYAMLLog(StringRef Data, XRayFileHeader &FileHeader, + std::vector<XRayRecord> &Records) { YAMLXRayTrace Trace; Input In(Data); In >> Trace; @@ -376,7 +375,6 @@ Error loadYAMLLog(StringRef Data, XRayFileHeader &FileHeader, }); return Error::success(); } -} // namespace Expected<Trace> llvm::xray::loadTraceFile(StringRef Filename, bool Sort) { Expected<sys::fs::file_t> FdOrErr = sys::fs::openNativeFileForRead(Filename); |