diff options
Diffstat (limited to 'llvm/lib')
164 files changed, 2946 insertions, 1224 deletions
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp index 8d20b0e..805b682 100644 --- a/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -1180,32 +1180,41 @@ bool DependenceInfo::isKnownLessThan(const SCEV *S, const SCEV *Size) const { S = SE->getTruncateOrZeroExtend(S, MaxType); Size = SE->getTruncateOrZeroExtend(Size, MaxType); - // Special check for addrecs using BE taken count - if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) - if (AddRec->isAffine() && AddRec->hasNoSignedWrap()) { - const SCEV *BECount = SE->getBackedgeTakenCount(AddRec->getLoop()); - const SCEV *Start = AddRec->getStart(); - const SCEV *Step = AddRec->getStepRecurrence(*SE); - const SCEV *End = AddRec->evaluateAtIteration(BECount, *SE); - const SCEV *Diff0 = SE->getMinusSCEV(Start, Size); - const SCEV *Diff1 = SE->getMinusSCEV(End, Size); - - // If the value of Step is non-negative and the AddRec is non-wrap, it - // reaches its maximum at the last iteration. So it's enouth to check - // whether End - Size is negative. - if (SE->isKnownNonNegative(Step) && SE->isKnownNegative(Diff1)) - return true; + auto CheckAddRecBECount = [&]() { + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S); + if (!AddRec || !AddRec->isAffine() || !AddRec->hasNoSignedWrap()) + return false; + const SCEV *BECount = collectUpperBound(AddRec->getLoop(), MaxType); + // If the BTC cannot be computed, check the base case for S. + if (!BECount || isa<SCEVCouldNotCompute>(BECount)) + return false; + const SCEV *Start = AddRec->getStart(); + const SCEV *Step = AddRec->getStepRecurrence(*SE); + const SCEV *End = AddRec->evaluateAtIteration(BECount, *SE); + const SCEV *Diff0 = SE->getMinusSCEV(Start, Size); + const SCEV *Diff1 = SE->getMinusSCEV(End, Size); + + // If the value of Step is non-negative and the AddRec is non-wrap, it + // reaches its maximum at the last iteration. So it's enouth to check + // whether End - Size is negative. + if (SE->isKnownNonNegative(Step) && SE->isKnownNegative(Diff1)) + return true; - // If the value of Step is non-positive and the AddRec is non-wrap, the - // initial value is its maximum. - if (SE->isKnownNonPositive(Step) && SE->isKnownNegative(Diff0)) - return true; + // If the value of Step is non-positive and the AddRec is non-wrap, the + // initial value is its maximum. + if (SE->isKnownNonPositive(Step) && SE->isKnownNegative(Diff0)) + return true; - // Even if we don't know the sign of Step, either Start or End must be - // the maximum value of the AddRec since it is non-wrap. - if (SE->isKnownNegative(Diff0) && SE->isKnownNegative(Diff1)) - return true; - } + // Even if we don't know the sign of Step, either Start or End must be + // the maximum value of the AddRec since it is non-wrap. + if (SE->isKnownNegative(Diff0) && SE->isKnownNegative(Diff1)) + return true; + + return false; + }; + + if (CheckAddRecBECount()) + return true; // Check using normal isKnownNegative const SCEV *LimitedBound = SE->getMinusSCEV(S, Size); diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index b8c540c..9f8ac6e 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -849,17 +849,12 @@ RecurrenceDescriptor::isMinMaxPattern(Instruction *I, RecurKind Kind, /// %sum.2 = select %cmp, %add, %sum.1 RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isConditionalRdxPattern(Instruction *I) { - SelectInst *SI = dyn_cast<SelectInst>(I); - if (!SI) - return InstDesc(false, I); - - CmpInst *CI = dyn_cast<CmpInst>(SI->getCondition()); + Value *TrueVal, *FalseVal; // Only handle single use cases for now. - if (!CI || !CI->hasOneUse()) + if (!match(I, + m_Select(m_OneUse(m_Cmp()), m_Value(TrueVal), m_Value(FalseVal)))) return InstDesc(false, I); - Value *TrueVal = SI->getTrueValue(); - Value *FalseVal = SI->getFalseValue(); // Handle only when either of operands of select instruction is a PHI // node for now. if ((isa<PHINode>(TrueVal) && isa<PHINode>(FalseVal)) || @@ -886,7 +881,7 @@ RecurrenceDescriptor::isConditionalRdxPattern(Instruction *I) { if (!IPhi || IPhi != FalseVal) return InstDesc(false, I); - return InstDesc(true, SI); + return InstDesc(true, I); } RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr( diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 4e38626..e08ef60 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -6644,7 +6644,7 @@ Value *llvm::simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType, "Invalid mask width"); // If index-width (mask size) is less than pointer-size then mask is // 1-extended. - if (match(Op1, m_PtrToInt(m_Specific(Op0)))) + if (match(Op1, m_PtrToIntOrAddr(m_Specific(Op0)))) return Op0; // NOTE: We may have attributes associated with the return value of the diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp index ab37338..0b2e3fc 100644 --- a/llvm/lib/Analysis/MemorySSA.cpp +++ b/llvm/lib/Analysis/MemorySSA.cpp @@ -393,7 +393,7 @@ static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysisType &AA, /// \param AA The AliasAnalysis we used for our search. /// \param AllowImpreciseClobber Always false, unless we do relaxed verify. -LLVM_ATTRIBUTE_UNUSED static void +[[maybe_unused]] static void checkClobberSanity(const MemoryAccess *Start, MemoryAccess *ClobberAt, const MemoryLocation &StartLoc, const MemorySSA &MSSA, const UpwardsMemoryQuery &Query, BatchAAResults &AA, diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index b5b4cd9..a64b93d 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -1774,7 +1774,7 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty, { const SCEV *LHS; const SCEV *RHS; - if (matchURem(Op, LHS, RHS)) + if (match(Op, m_scev_URem(m_SCEV(LHS), m_SCEV(RHS), *this))) return getURemExpr(getZeroExtendExpr(LHS, Ty, Depth + 1), getZeroExtendExpr(RHS, Ty, Depth + 1)); } @@ -2699,17 +2699,12 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, } // Canonicalize (-1 * urem X, Y) + X --> (Y * X/Y) - if (Ops.size() == 2) { - const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[0]); - if (Mul && Mul->getNumOperands() == 2 && - Mul->getOperand(0)->isAllOnesValue()) { - const SCEV *X; - const SCEV *Y; - if (matchURem(Mul->getOperand(1), X, Y) && X == Ops[1]) { - return getMulExpr(Y, getUDivExpr(X, Y)); - } - } - } + const SCEV *Y; + if (Ops.size() == 2 && + match(Ops[0], + m_scev_Mul(m_scev_AllOnes(), + m_scev_URem(m_scev_Specific(Ops[1]), m_SCEV(Y), *this)))) + return getMulExpr(Y, getUDivExpr(Ops[1], Y)); // Skip past any other cast SCEVs. while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr) @@ -5419,20 +5414,15 @@ static Type *isSimpleCastedPHI(const SCEV *Op, const SCEVUnknown *SymbolicPHI, if (SourceBits != NewBits) return nullptr; - const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(Op); - const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(Op); - if (!SExt && !ZExt) - return nullptr; - const SCEVTruncateExpr *Trunc = - SExt ? dyn_cast<SCEVTruncateExpr>(SExt->getOperand()) - : dyn_cast<SCEVTruncateExpr>(ZExt->getOperand()); - if (!Trunc) - return nullptr; - const SCEV *X = Trunc->getOperand(); - if (X != SymbolicPHI) - return nullptr; - Signed = SExt != nullptr; - return Trunc->getType(); + if (match(Op, m_scev_SExt(m_scev_Trunc(m_scev_Specific(SymbolicPHI))))) { + Signed = true; + return cast<SCEVCastExpr>(Op)->getOperand()->getType(); + } + if (match(Op, m_scev_ZExt(m_scev_Trunc(m_scev_Specific(SymbolicPHI))))) { + Signed = false; + return cast<SCEVCastExpr>(Op)->getOperand()->getType(); + } + return nullptr; } static const Loop *isIntegerLoopHeaderPHI(const PHINode *PN, LoopInfo &LI) { @@ -6427,8 +6417,18 @@ APInt ScalarEvolution::getConstantMultipleImpl(const SCEV *S, case scSequentialUMinExpr: return GetGCDMultiple(cast<SCEVNAryExpr>(S)); case scUnknown: { - // ask ValueTracking for known bits + // Ask ValueTracking for known bits. SCEVUnknown only become available at + // the point their underlying IR instruction has been defined. If CtxI was + // not provided, use: + // * the first instruction in the entry block if it is an argument + // * the instruction itself otherwise. const SCEVUnknown *U = cast<SCEVUnknown>(S); + if (!CtxI) { + if (isa<Argument>(U->getValue())) + CtxI = &*F.getEntryBlock().begin(); + else if (auto *I = dyn_cast<Instruction>(U->getValue())) + CtxI = I; + } unsigned Known = computeKnownBits(U->getValue(), getDataLayout(), &AC, CtxI, &DT) .countMinTrailingZeros(); @@ -15415,67 +15415,6 @@ void PredicatedScalarEvolution::print(raw_ostream &OS, unsigned Depth) const { } } -// Match the mathematical pattern A - (A / B) * B, where A and B can be -// arbitrary expressions. Also match zext (trunc A to iB) to iY, which is used -// for URem with constant power-of-2 second operands. -// It's not always easy, as A and B can be folded (imagine A is X / 2, and B is -// 4, A / B becomes X / 8). -bool ScalarEvolution::matchURem(const SCEV *Expr, const SCEV *&LHS, - const SCEV *&RHS) { - if (Expr->getType()->isPointerTy()) - return false; - - // Try to match 'zext (trunc A to iB) to iY', which is used - // for URem with constant power-of-2 second operands. Make sure the size of - // the operand A matches the size of the whole expressions. - if (const auto *ZExt = dyn_cast<SCEVZeroExtendExpr>(Expr)) - if (const auto *Trunc = dyn_cast<SCEVTruncateExpr>(ZExt->getOperand(0))) { - LHS = Trunc->getOperand(); - // Bail out if the type of the LHS is larger than the type of the - // expression for now. - if (getTypeSizeInBits(LHS->getType()) > - getTypeSizeInBits(Expr->getType())) - return false; - if (LHS->getType() != Expr->getType()) - LHS = getZeroExtendExpr(LHS, Expr->getType()); - RHS = getConstant(APInt(getTypeSizeInBits(Expr->getType()), 1) - << getTypeSizeInBits(Trunc->getType())); - return true; - } - const auto *Add = dyn_cast<SCEVAddExpr>(Expr); - if (Add == nullptr || Add->getNumOperands() != 2) - return false; - - const SCEV *A = Add->getOperand(1); - const auto *Mul = dyn_cast<SCEVMulExpr>(Add->getOperand(0)); - - if (Mul == nullptr) - return false; - - const auto MatchURemWithDivisor = [&](const SCEV *B) { - // (SomeExpr + (-(SomeExpr / B) * B)). - if (Expr == getURemExpr(A, B)) { - LHS = A; - RHS = B; - return true; - } - return false; - }; - - // (SomeExpr + (-1 * (SomeExpr / B) * B)). - if (Mul->getNumOperands() == 3 && isa<SCEVConstant>(Mul->getOperand(0))) - return MatchURemWithDivisor(Mul->getOperand(1)) || - MatchURemWithDivisor(Mul->getOperand(2)); - - // (SomeExpr + ((-SomeExpr / B) * B)) or (SomeExpr + ((SomeExpr / B) * -B)). - if (Mul->getNumOperands() == 2) - return MatchURemWithDivisor(Mul->getOperand(1)) || - MatchURemWithDivisor(Mul->getOperand(0)) || - MatchURemWithDivisor(getNegativeSCEV(Mul->getOperand(1))) || - MatchURemWithDivisor(getNegativeSCEV(Mul->getOperand(0))); - return false; -} - ScalarEvolution::LoopGuards ScalarEvolution::LoopGuards::collect(const Loop *L, ScalarEvolution &SE) { BasicBlock *Header = L->getHeader(); @@ -15696,20 +15635,18 @@ void ScalarEvolution::LoopGuards::collectFromBlock( if (Predicate == CmpInst::ICMP_EQ && match(RHS, m_scev_Zero())) { // If LHS is A % B, i.e. A % B == 0, rewrite A to (A /u B) * B to // explicitly express that. - const SCEV *URemLHS = nullptr; + const SCEVUnknown *URemLHS = nullptr; const SCEV *URemRHS = nullptr; - if (SE.matchURem(LHS, URemLHS, URemRHS)) { - if (const SCEVUnknown *LHSUnknown = dyn_cast<SCEVUnknown>(URemLHS)) { - auto I = RewriteMap.find(LHSUnknown); - const SCEV *RewrittenLHS = - I != RewriteMap.end() ? I->second : LHSUnknown; - RewrittenLHS = ApplyDivisibiltyOnMinMaxExpr(RewrittenLHS, URemRHS); - const auto *Multiple = - SE.getMulExpr(SE.getUDivExpr(RewrittenLHS, URemRHS), URemRHS); - RewriteMap[LHSUnknown] = Multiple; - ExprsToRewrite.push_back(LHSUnknown); - return; - } + if (match(LHS, + m_scev_URem(m_SCEVUnknown(URemLHS), m_SCEV(URemRHS), SE))) { + auto I = RewriteMap.find(URemLHS); + const SCEV *RewrittenLHS = I != RewriteMap.end() ? I->second : URemLHS; + RewrittenLHS = ApplyDivisibiltyOnMinMaxExpr(RewrittenLHS, URemRHS); + const auto *Multiple = + SE.getMulExpr(SE.getUDivExpr(RewrittenLHS, URemRHS), URemRHS); + RewriteMap[URemLHS] = Multiple; + ExprsToRewrite.push_back(URemLHS); + return; } } @@ -15834,6 +15771,21 @@ void ScalarEvolution::LoopGuards::collectFromBlock( const SCEV *OneAlignedUp = GetNextSCEVDividesByDivisor(One, DividesBy); To = SE.getUMaxExpr(FromRewritten, OneAlignedUp); + } else { + if (LHS->getType()->isPointerTy()) { + LHS = SE.getLosslessPtrToIntExpr(LHS); + RHS = SE.getLosslessPtrToIntExpr(RHS); + if (isa<SCEVCouldNotCompute>(LHS) || isa<SCEVCouldNotCompute>(RHS)) + break; + } + auto AddSubRewrite = [&](const SCEV *A, const SCEV *B) { + const SCEV *Sub = SE.getMinusSCEV(A, B); + AddRewrite(Sub, Sub, + SE.getUMaxExpr(Sub, SE.getOne(From->getType()))); + }; + AddSubRewrite(LHS, RHS); + AddSubRewrite(RHS, LHS); + continue; } break; default: diff --git a/llvm/lib/Analysis/StaticDataProfileInfo.cpp b/llvm/lib/Analysis/StaticDataProfileInfo.cpp index b036b2d..1f751ee 100644 --- a/llvm/lib/Analysis/StaticDataProfileInfo.cpp +++ b/llvm/lib/Analysis/StaticDataProfileInfo.cpp @@ -6,6 +6,46 @@ #include "llvm/ProfileData/InstrProf.h" using namespace llvm; + +namespace llvm { +namespace memprof { +// Returns true iff the global variable has custom section either by +// __attribute__((section("name"))) +// (https://clang.llvm.org/docs/AttributeReference.html#section-declspec-allocate) +// or #pragma clang section directives +// (https://clang.llvm.org/docs/LanguageExtensions.html#specifying-section-names-for-global-objects-pragma-clang-section). +static bool hasExplicitSectionName(const GlobalVariable &GVar) { + if (GVar.hasSection()) + return true; + + auto Attrs = GVar.getAttributes(); + if (Attrs.hasAttribute("bss-section") || Attrs.hasAttribute("data-section") || + Attrs.hasAttribute("relro-section") || + Attrs.hasAttribute("rodata-section")) + return true; + return false; +} + +AnnotationKind getAnnotationKind(const GlobalVariable &GV) { + if (GV.isDeclarationForLinker()) + return AnnotationKind::DeclForLinker; + // Skip 'llvm.'-prefixed global variables conservatively because they are + // often handled specially, + StringRef Name = GV.getName(); + if (Name.starts_with("llvm.")) + return AnnotationKind::ReservedName; + // Respect user-specified custom data sections. + if (hasExplicitSectionName(GV)) + return AnnotationKind::ExplicitSection; + return AnnotationKind::AnnotationOK; +} + +bool IsAnnotationOK(const GlobalVariable &GV) { + return getAnnotationKind(GV) == AnnotationKind::AnnotationOK; +} +} // namespace memprof +} // namespace llvm + void StaticDataProfileInfo::addConstantProfileCount( const Constant *C, std::optional<uint64_t> Count) { if (!Count) { diff --git a/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp b/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp index 6356d71..873ac8f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp @@ -20,7 +20,7 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" -namespace llvm { +using namespace llvm; AIXException::AIXException(AsmPrinter *A) : EHStreamer(A) {} @@ -90,5 +90,3 @@ void AIXException::endFunction(const MachineFunction *MF) { emitExceptionInfoTable(LSDALabel, PerSym); } - -} // End of namespace llvm diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 219bbc9..05fffe9 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1437,7 +1437,8 @@ getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges, BrProbEnabled, MF.hasBBSections() && NumMBBSectionRanges > 1, static_cast<bool>(BBAddrMapSkipEmitBBEntries), - HasCalls}; + HasCalls, + false}; } void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { diff --git a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp index 260ce8f..93ae548 100644 --- a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp +++ b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp @@ -85,8 +85,7 @@ template <> struct llvm::DenseMapInfo<VariableID> { using VarLocInsertPt = PointerUnion<const Instruction *, const DbgRecord *>; -namespace std { -template <> struct hash<VarLocInsertPt> { +template <> struct std::hash<VarLocInsertPt> { using argument_type = VarLocInsertPt; using result_type = std::size_t; @@ -94,7 +93,6 @@ template <> struct hash<VarLocInsertPt> { return std::hash<void *>()(Arg.getOpaqueValue()); } }; -} // namespace std /// Helper class to build FunctionVarLocs, since that class isn't easy to /// modify. TODO: There's not a great deal of value in the split, it could be diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index 4931403..53f1cfe2 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -770,7 +770,7 @@ struct PartwordMaskValues { Value *Inv_Mask = nullptr; }; -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) { auto PrintObj = [&O](auto *V) { if (V) diff --git a/llvm/lib/CodeGen/BasicBlockPathCloning.cpp b/llvm/lib/CodeGen/BasicBlockPathCloning.cpp index fd7df6b..47b7a88 100644 --- a/llvm/lib/CodeGen/BasicBlockPathCloning.cpp +++ b/llvm/lib/CodeGen/BasicBlockPathCloning.cpp @@ -207,9 +207,7 @@ bool ApplyCloning(MachineFunction &MF, } return AnyPathsCloned; } -} // end anonymous namespace -namespace llvm { class BasicBlockPathCloning : public MachineFunctionPass { public: static char ID; @@ -229,7 +227,7 @@ public: bool runOnMachineFunction(MachineFunction &MF) override; }; -} // namespace llvm +} // namespace char BasicBlockPathCloning::ID = 0; INITIALIZE_PASS_BEGIN( diff --git a/llvm/lib/CodeGen/BreakFalseDeps.cpp b/llvm/lib/CodeGen/BreakFalseDeps.cpp index 28e6728..1846880 100644 --- a/llvm/lib/CodeGen/BreakFalseDeps.cpp +++ b/llvm/lib/CodeGen/BreakFalseDeps.cpp @@ -31,7 +31,7 @@ using namespace llvm; -namespace llvm { +namespace { class BreakFalseDeps : public MachineFunctionPass { private: @@ -95,7 +95,7 @@ private: void processUndefReads(MachineBasicBlock *); }; -} // namespace llvm +} // namespace #define DEBUG_TYPE "break-false-deps" diff --git a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp index 6c2a5a7..87ada87 100644 --- a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp +++ b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp @@ -126,8 +126,7 @@ hash_code hash_value(const ComplexValue &Arg) { } // end namespace typedef SmallVector<struct ComplexValue, 2> ComplexValues; -namespace llvm { -template <> struct DenseMapInfo<ComplexValue> { +template <> struct llvm::DenseMapInfo<ComplexValue> { static inline ComplexValue getEmptyKey() { return {DenseMapInfo<Value *>::getEmptyKey(), DenseMapInfo<Value *>::getEmptyKey()}; @@ -144,7 +143,6 @@ template <> struct DenseMapInfo<ComplexValue> { return LHS.Real == RHS.Real && LHS.Imag == RHS.Imag; } }; -} // end namespace llvm namespace { template <typename T, typename IterT> diff --git a/llvm/lib/CodeGen/EdgeBundles.cpp b/llvm/lib/CodeGen/EdgeBundles.cpp index f4335396..50dd66f 100644 --- a/llvm/lib/CodeGen/EdgeBundles.cpp +++ b/llvm/lib/CodeGen/EdgeBundles.cpp @@ -81,13 +81,10 @@ void EdgeBundles::init() { } } -namespace llvm { - /// Specialize WriteGraph, the standard implementation won't work. -template<> -raw_ostream &WriteGraph<>(raw_ostream &O, const EdgeBundles &G, - bool ShortNames, - const Twine &Title) { +template <> +raw_ostream &llvm::WriteGraph<>(raw_ostream &O, const EdgeBundles &G, + bool ShortNames, const Twine &Title) { const MachineFunction *MF = G.getMachineFunction(); O << "digraph {\n"; @@ -107,8 +104,6 @@ raw_ostream &WriteGraph<>(raw_ostream &O, const EdgeBundles &G, return O; } -} // end namespace llvm - /// view - Visualize the annotated bipartite CFG with Graphviz. void EdgeBundles::view() const { ViewGraph(*this, "EdgeBundles"); diff --git a/llvm/lib/CodeGen/ExpandFp.cpp b/llvm/lib/CodeGen/ExpandFp.cpp index 9cc6c6a..04c7008 100644 --- a/llvm/lib/CodeGen/ExpandFp.cpp +++ b/llvm/lib/CodeGen/ExpandFp.cpp @@ -82,7 +82,7 @@ public: } static FRemExpander create(IRBuilder<> &B, Type *Ty) { - assert(canExpandType(Ty)); + assert(canExpandType(Ty) && "Expected supported floating point type"); // The type to use for the computation of the remainder. This may be // wider than the input/result type which affects the ... @@ -356,8 +356,9 @@ Value *FRemExpander::buildFRem(Value *X, Value *Y, static bool expandFRem(BinaryOperator &I, std::optional<SimplifyQuery> &SQ) { LLVM_DEBUG(dbgs() << "Expanding instruction: " << I << '\n'); - Type *ReturnTy = I.getType(); - assert(FRemExpander::canExpandType(ReturnTy->getScalarType())); + Type *Ty = I.getType(); + assert(FRemExpander::canExpandType(Ty) && + "Expected supported floating point type"); FastMathFlags FMF = I.getFastMathFlags(); // TODO Make use of those flags for optimization? @@ -368,32 +369,10 @@ static bool expandFRem(BinaryOperator &I, std::optional<SimplifyQuery> &SQ) { B.setFastMathFlags(FMF); B.SetCurrentDebugLocation(I.getDebugLoc()); - Type *ElemTy = ReturnTy->getScalarType(); - const FRemExpander Expander = FRemExpander::create(B, ElemTy); - - Value *Ret; - if (ReturnTy->isFloatingPointTy()) - Ret = FMF.approxFunc() - ? Expander.buildApproxFRem(I.getOperand(0), I.getOperand(1)) - : Expander.buildFRem(I.getOperand(0), I.getOperand(1), SQ); - else { - auto *VecTy = cast<FixedVectorType>(ReturnTy); - - // This could use SplitBlockAndInsertForEachLane but the interface - // is a bit awkward for a constant number of elements and it will - // boil down to the same code. - // TODO Expand the FRem instruction only once and reuse the code. - Value *Nums = I.getOperand(0); - Value *Denums = I.getOperand(1); - Ret = PoisonValue::get(I.getType()); - for (int I = 0, E = VecTy->getNumElements(); I != E; ++I) { - Value *Num = B.CreateExtractElement(Nums, I); - Value *Denum = B.CreateExtractElement(Denums, I); - Value *Rem = FMF.approxFunc() ? Expander.buildApproxFRem(Num, Denum) - : Expander.buildFRem(Num, Denum, SQ); - Ret = B.CreateInsertElement(Ret, Rem, I); - } - } + const FRemExpander Expander = FRemExpander::create(B, Ty); + Value *Ret = FMF.approxFunc() + ? Expander.buildApproxFRem(I.getOperand(0), I.getOperand(1)) + : Expander.buildFRem(I.getOperand(0), I.getOperand(1), SQ); I.replaceAllUsesWith(Ret); Ret->takeName(&I); @@ -939,7 +918,8 @@ static void expandIToFP(Instruction *IToFP) { IToFP->eraseFromParent(); } -static void scalarize(Instruction *I, SmallVectorImpl<Instruction *> &Replace) { +static void scalarize(Instruction *I, + SmallVectorImpl<Instruction *> &Worklist) { VectorType *VTy = cast<FixedVectorType>(I->getType()); IRBuilder<> Builder(I); @@ -948,12 +928,25 @@ static void scalarize(Instruction *I, SmallVectorImpl<Instruction *> &Replace) { Value *Result = PoisonValue::get(VTy); for (unsigned Idx = 0; Idx < NumElements; ++Idx) { Value *Ext = Builder.CreateExtractElement(I->getOperand(0), Idx); - Value *Cast = Builder.CreateCast(cast<CastInst>(I)->getOpcode(), Ext, - I->getType()->getScalarType()); - Result = Builder.CreateInsertElement(Result, Cast, Idx); - if (isa<Instruction>(Cast)) - Replace.push_back(cast<Instruction>(Cast)); + + Value *NewOp = nullptr; + if (auto *BinOp = dyn_cast<BinaryOperator>(I)) + NewOp = Builder.CreateBinOp( + BinOp->getOpcode(), Ext, + Builder.CreateExtractElement(I->getOperand(1), Idx)); + else if (auto *CastI = dyn_cast<CastInst>(I)) + NewOp = Builder.CreateCast(CastI->getOpcode(), Ext, + I->getType()->getScalarType()); + else + llvm_unreachable("Unsupported instruction type"); + + Result = Builder.CreateInsertElement(Result, NewOp, Idx); + if (auto *ScalarizedI = dyn_cast<Instruction>(NewOp)) { + ScalarizedI->copyIRFlags(I, true); + Worklist.push_back(ScalarizedI); + } } + I->replaceAllUsesWith(Result); I->dropAllReferences(); I->eraseFromParent(); @@ -989,10 +982,17 @@ static bool targetSupportsFrem(const TargetLowering &TLI, Type *Ty) { return TLI.getLibcallName(fremToLibcall(Ty->getScalarType())); } +static void addToWorklist(Instruction &I, + SmallVector<Instruction *, 4> &Worklist) { + if (I.getOperand(0)->getType()->isVectorTy()) + scalarize(&I, Worklist); + else + Worklist.push_back(&I); +} + static bool runImpl(Function &F, const TargetLowering &TLI, AssumptionCache *AC) { - SmallVector<Instruction *, 4> Replace; - SmallVector<Instruction *, 4> ReplaceVector; + SmallVector<Instruction *, 4> Worklist; bool Modified = false; unsigned MaxLegalFpConvertBitWidth = @@ -1003,55 +1003,39 @@ static bool runImpl(Function &F, const TargetLowering &TLI, if (MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS) return false; - for (auto &I : instructions(F)) { - switch (I.getOpcode()) { - case Instruction::FRem: { - Type *Ty = I.getType(); - // TODO: This pass doesn't handle scalable vectors. - if (Ty->isScalableTy()) - continue; - - if (targetSupportsFrem(TLI, Ty) || - !FRemExpander::canExpandType(Ty->getScalarType())) - continue; - - Replace.push_back(&I); - Modified = true; + for (auto It = inst_begin(&F), End = inst_end(F); It != End;) { + Instruction &I = *It++; + Type *Ty = I.getType(); + // TODO: This pass doesn't handle scalable vectors. + if (Ty->isScalableTy()) + continue; + switch (I.getOpcode()) { + case Instruction::FRem: + if (!targetSupportsFrem(TLI, Ty) && + FRemExpander::canExpandType(Ty->getScalarType())) { + addToWorklist(I, Worklist); + Modified = true; + } break; - } case Instruction::FPToUI: case Instruction::FPToSI: { - // TODO: This pass doesn't handle scalable vectors. - if (I.getOperand(0)->getType()->isScalableTy()) - continue; - - auto *IntTy = cast<IntegerType>(I.getType()->getScalarType()); + auto *IntTy = cast<IntegerType>(Ty->getScalarType()); if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth) continue; - if (I.getOperand(0)->getType()->isVectorTy()) - ReplaceVector.push_back(&I); - else - Replace.push_back(&I); + addToWorklist(I, Worklist); Modified = true; break; } case Instruction::UIToFP: case Instruction::SIToFP: { - // TODO: This pass doesn't handle scalable vectors. - if (I.getOperand(0)->getType()->isScalableTy()) - continue; - auto *IntTy = cast<IntegerType>(I.getOperand(0)->getType()->getScalarType()); if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth) continue; - if (I.getOperand(0)->getType()->isVectorTy()) - ReplaceVector.push_back(&I); - else - Replace.push_back(&I); + addToWorklist(I, Worklist); Modified = true; break; } @@ -1060,16 +1044,8 @@ static bool runImpl(Function &F, const TargetLowering &TLI, } } - while (!ReplaceVector.empty()) { - Instruction *I = ReplaceVector.pop_back_val(); - scalarize(I, Replace); - } - - if (Replace.empty()) - return false; - - while (!Replace.empty()) { - Instruction *I = Replace.pop_back_val(); + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); if (I->getOpcode() == Instruction::FRem) { auto SQ = [&]() -> std::optional<SimplifyQuery> { if (AC) { diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp index 90c60d4..04d9309 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp @@ -112,7 +112,7 @@ APInt GISelValueTracking::getKnownOnes(Register R) { return getKnownBits(R).One; } -LLVM_ATTRIBUTE_UNUSED static void +[[maybe_unused]] static void dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) { dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth << "] Computed for: " << MI << "[" << Depth << "] Known: 0x" @@ -1975,6 +1975,81 @@ unsigned GISelValueTracking::computeNumSignBits(Register R, break; } + case TargetOpcode::G_SUB: { + Register Src2 = MI.getOperand(2).getReg(); + unsigned Src2NumSignBits = + computeNumSignBits(Src2, DemandedElts, Depth + 1); + if (Src2NumSignBits == 1) + return 1; // Early out. + + // Handle NEG. + Register Src1 = MI.getOperand(1).getReg(); + KnownBits Known1 = getKnownBits(Src1, DemandedElts, Depth); + if (Known1.isZero()) { + KnownBits Known2 = getKnownBits(Src2, DemandedElts, Depth); + // If the input is known to be 0 or 1, the output is 0/-1, which is all + // sign bits set. + if ((Known2.Zero | 1).isAllOnes()) + return TyBits; + + // If the input is known to be positive (the sign bit is known clear), + // the output of the NEG has, at worst, the same number of sign bits as + // the input. + if (Known2.isNonNegative()) { + FirstAnswer = Src2NumSignBits; + break; + } + + // Otherwise, we treat this like a SUB. + } + + unsigned Src1NumSignBits = + computeNumSignBits(Src1, DemandedElts, Depth + 1); + if (Src1NumSignBits == 1) + return 1; // Early Out. + + // Sub can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits) - 1; + break; + } + case TargetOpcode::G_ADD: { + Register Src2 = MI.getOperand(2).getReg(); + unsigned Src2NumSignBits = + computeNumSignBits(Src2, DemandedElts, Depth + 1); + if (Src2NumSignBits <= 2) + return 1; // Early out. + + Register Src1 = MI.getOperand(1).getReg(); + unsigned Src1NumSignBits = + computeNumSignBits(Src1, DemandedElts, Depth + 1); + if (Src1NumSignBits == 1) + return 1; // Early Out. + + // Special case decrementing a value (ADD X, -1): + KnownBits Known2 = getKnownBits(Src2, DemandedElts, Depth); + if (Known2.isAllOnes()) { + KnownBits Known1 = getKnownBits(Src1, DemandedElts, Depth); + // If the input is known to be 0 or 1, the output is 0/-1, which is all + // sign bits set. + if ((Known1.Zero | 1).isAllOnes()) + return TyBits; + + // If we are subtracting one from a positive number, there is no carry + // out of the result. + if (Known1.isNonNegative()) { + FirstAnswer = Src1NumSignBits; + break; + } + + // Otherwise, we treat this like an ADD. + } + + // Add can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits) - 1; + break; + } case TargetOpcode::G_FCMP: case TargetOpcode::G_ICMP: { bool IsFP = Opcode == TargetOpcode::G_FCMP; diff --git a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp index 47640c4a..81ab317 100644 --- a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp +++ b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp @@ -587,16 +587,12 @@ public: } // namespace char GlobalMergeFuncPassWrapper::ID = 0; -INITIALIZE_PASS_BEGIN(GlobalMergeFuncPassWrapper, "global-merge-func", - "Global merge function pass", false, false) -INITIALIZE_PASS_END(GlobalMergeFuncPassWrapper, "global-merge-func", - "Global merge function pass", false, false) +INITIALIZE_PASS(GlobalMergeFuncPassWrapper, "global-merge-func", + "Global merge function pass", false, false) -namespace llvm { -ModulePass *createGlobalMergeFuncPass() { +ModulePass *llvm::createGlobalMergeFuncPass() { return new GlobalMergeFuncPassWrapper(); } -} // namespace llvm GlobalMergeFuncPassWrapper::GlobalMergeFuncPassWrapper() : ModulePass(ID) { initializeGlobalMergeFuncPassWrapperPass( diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp index 3485a27..d2f2c3e 100644 --- a/llvm/lib/CodeGen/LiveIntervals.cpp +++ b/llvm/lib/CodeGen/LiveIntervals.cpp @@ -101,15 +101,11 @@ static cl::opt<bool> EnablePrecomputePhysRegs( static bool EnablePrecomputePhysRegs = false; #endif // NDEBUG -namespace llvm { - -cl::opt<bool> UseSegmentSetForPhysRegs( +cl::opt<bool> llvm::UseSegmentSetForPhysRegs( "use-segment-set-for-physregs", cl::Hidden, cl::init(true), cl::desc( "Use segment set for the computation of the live ranges of physregs.")); -} // end namespace llvm - void LiveIntervalsWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addPreserved<LiveVariablesWrapperPass>(); @@ -665,7 +661,10 @@ void LiveIntervals::extendToIndices(LiveRange &LR, void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill, SmallVectorImpl<SlotIndex> *EndPoints) { LiveQueryResult LRQ = LR.Query(Kill); - VNInfo *VNI = LRQ.valueOutOrDead(); + // LR may have liveness reachable from early clobber slot, which may be + // only live-in instead of live-out of the instruction. + // For example, LR =[1r, 3r), Kill = 3e, we have to prune [3e, 3r) of LR. + VNInfo *VNI = LRQ.valueOutOrDead() ? LRQ.valueOutOrDead() : LRQ.valueIn(); if (!VNI) return; diff --git a/llvm/lib/CodeGen/MIR2Vec.cpp b/llvm/lib/CodeGen/MIR2Vec.cpp index e859765..5c78d98 100644 --- a/llvm/lib/CodeGen/MIR2Vec.cpp +++ b/llvm/lib/CodeGen/MIR2Vec.cpp @@ -29,20 +29,17 @@ using namespace mir2vec; STATISTIC(MIRVocabMissCounter, "Number of lookups to MIR entities not present in the vocabulary"); -namespace llvm { -namespace mir2vec { -cl::OptionCategory MIR2VecCategory("MIR2Vec Options"); +cl::OptionCategory llvm::mir2vec::MIR2VecCategory("MIR2Vec Options"); // FIXME: Use a default vocab when not specified static cl::opt<std::string> VocabFile("mir2vec-vocab-path", cl::Optional, cl::desc("Path to the vocabulary file for MIR2Vec"), cl::init(""), cl::cat(MIR2VecCategory)); -cl::opt<float> OpcWeight("mir2vec-opc-weight", cl::Optional, cl::init(1.0), - cl::desc("Weight for machine opcode embeddings"), - cl::cat(MIR2VecCategory)); -} // namespace mir2vec -} // namespace llvm +cl::opt<float> + llvm::mir2vec::OpcWeight("mir2vec-opc-weight", cl::Optional, cl::init(1.0), + cl::desc("Weight for machine opcode embeddings"), + cl::cat(MIR2VecCategory)); //===----------------------------------------------------------------------===// // Vocabulary Implementation diff --git a/llvm/lib/CodeGen/MIRFSDiscriminator.cpp b/llvm/lib/CodeGen/MIRFSDiscriminator.cpp index f5146f5..d988a2a 100644 --- a/llvm/lib/CodeGen/MIRFSDiscriminator.cpp +++ b/llvm/lib/CodeGen/MIRFSDiscriminator.cpp @@ -40,7 +40,7 @@ cl::opt<bool> ImprovedFSDiscriminator( "improved-fs-discriminator", cl::Hidden, cl::init(false), cl::desc("New FS discriminators encoding (incompatible with the original " "encoding)")); -} +} // namespace llvm char MIRAddFSDiscriminators::ID = 0; diff --git a/llvm/lib/CodeGen/MIRNamerPass.cpp b/llvm/lib/CodeGen/MIRNamerPass.cpp index bc65700..cbf8867 100644 --- a/llvm/lib/CodeGen/MIRNamerPass.cpp +++ b/llvm/lib/CodeGen/MIRNamerPass.cpp @@ -23,10 +23,6 @@ using namespace llvm; -namespace llvm { -extern char &MIRNamerID; -} // namespace llvm - #define DEBUG_TYPE "mir-namer" namespace { @@ -53,10 +49,9 @@ public: VRegRenamer Renamer(MF.getRegInfo()); - unsigned BBIndex = 0; ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); - for (auto &MBB : RPOT) - Changed |= Renamer.renameVRegs(MBB, BBIndex++); + for (const auto &[BBIndex, MBB] : enumerate(RPOT)) + Changed |= Renamer.renameVRegs(MBB, BBIndex); return Changed; } @@ -66,10 +61,4 @@ public: char MIRNamer::ID; -char &llvm::MIRNamerID = MIRNamer::ID; - -INITIALIZE_PASS_BEGIN(MIRNamer, "mir-namer", "Rename Register Operands", false, - false) - -INITIALIZE_PASS_END(MIRNamer, "mir-namer", "Rename Register Operands", false, - false) +INITIALIZE_PASS(MIRNamer, "mir-namer", "Rename Register Operands", false, false) diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp index bf8a6cd..1d54d72 100644 --- a/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/llvm/lib/CodeGen/MIRPrinter.cpp @@ -107,10 +107,8 @@ struct MFPrintState { } // end anonymous namespace -namespace llvm::yaml { - /// This struct serializes the LLVM IR module. -template <> struct BlockScalarTraits<Module> { +template <> struct yaml::BlockScalarTraits<Module> { static void output(const Module &Mod, void *Ctxt, raw_ostream &OS) { Mod.print(OS, nullptr); } @@ -121,8 +119,6 @@ template <> struct BlockScalarTraits<Module> { } }; -} // end namespace llvm::yaml - static void printRegMIR(Register Reg, yaml::StringValue &Dest, const TargetRegisterInfo *TRI) { raw_string_ostream OS(Dest.Value); @@ -866,48 +862,46 @@ static void printMI(raw_ostream &OS, MFPrintState &State, OS << TII->getName(MI.getOpcode()); - LS = ListSeparator(); + // Print a space after the opcode if any additional tokens are printed. + LS = ListSeparator(", ", " "); - if (I < E) { - OS << ' '; - for (; I < E; ++I) { - OS << LS; - printMIOperand(OS, State, MI, I, TRI, TII, ShouldPrintRegisterTies, - PrintedTypes, MRI, /*PrintDef=*/true); - } + for (; I < E; ++I) { + OS << LS; + printMIOperand(OS, State, MI, I, TRI, TII, ShouldPrintRegisterTies, + PrintedTypes, MRI, /*PrintDef=*/true); } // Print any optional symbols attached to this instruction as-if they were // operands. if (MCSymbol *PreInstrSymbol = MI.getPreInstrSymbol()) { - OS << LS << " pre-instr-symbol "; + OS << LS << "pre-instr-symbol "; MachineOperand::printSymbol(OS, *PreInstrSymbol); } if (MCSymbol *PostInstrSymbol = MI.getPostInstrSymbol()) { - OS << LS << " post-instr-symbol "; + OS << LS << "post-instr-symbol "; MachineOperand::printSymbol(OS, *PostInstrSymbol); } if (MDNode *HeapAllocMarker = MI.getHeapAllocMarker()) { - OS << LS << " heap-alloc-marker "; + OS << LS << "heap-alloc-marker "; HeapAllocMarker->printAsOperand(OS, State.MST); } if (MDNode *PCSections = MI.getPCSections()) { - OS << LS << " pcsections "; + OS << LS << "pcsections "; PCSections->printAsOperand(OS, State.MST); } if (MDNode *MMRA = MI.getMMRAMetadata()) { - OS << LS << " mmra "; + OS << LS << "mmra "; MMRA->printAsOperand(OS, State.MST); } if (uint32_t CFIType = MI.getCFIType()) - OS << LS << " cfi-type " << CFIType; + OS << LS << "cfi-type " << CFIType; if (auto Num = MI.peekDebugInstrNum()) - OS << LS << " debug-instr-number " << Num; + OS << LS << "debug-instr-number " << Num; if (PrintLocations) { if (const DebugLoc &DL = MI.getDebugLoc()) { - OS << LS << " debug-location "; + OS << LS << "debug-location "; DL->printAsOperand(OS, State.MST); } } diff --git a/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp index b2731b69..a72c2c4 100644 --- a/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp +++ b/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp @@ -97,7 +97,9 @@ static const bool EnableDevelopmentFeatures = false; /// this happens only in development mode. It's a no-op otherwise. namespace llvm { extern cl::opt<unsigned> EvictInterferenceCutoff; +} // namespace llvm +namespace { class RegAllocScoring : public MachineFunctionPass { public: static char ID; @@ -124,11 +126,12 @@ public: /// Performs this pass bool runOnMachineFunction(MachineFunction &) override; }; +} // namespace char RegAllocScoring::ID = 0; -FunctionPass *createRegAllocScoringPass() { return new RegAllocScoring(); } - -} // namespace llvm +FunctionPass *llvm::createRegAllocScoringPass() { + return new RegAllocScoring(); +} INITIALIZE_PASS(RegAllocScoring, "regallocscoringpass", "Register Allocation Scoring Pass", false, false) diff --git a/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp index e7fa082..26eb10f 100644 --- a/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -29,7 +29,6 @@ using namespace llvm; #define DEBUG_TYPE "machine-block-freq" -namespace llvm { static cl::opt<GVDAGType> ViewMachineBlockFreqPropagationDAG( "view-machine-block-freq-propagation-dags", cl::Hidden, cl::desc("Pop up a window to show a dag displaying how machine block " @@ -44,6 +43,7 @@ static cl::opt<GVDAGType> ViewMachineBlockFreqPropagationDAG( clEnumValN(GVDT_Count, "count", "display a graph using the real " "profile count if available."))); +namespace llvm { // Similar option above, but used to control BFI display only after MBP pass cl::opt<GVDAGType> ViewBlockLayoutWithBFI( "view-block-layout-with-bfi", cl::Hidden, @@ -69,15 +69,15 @@ extern cl::opt<std::string> ViewBlockFreqFuncName; // Defined in Analysis/BlockFrequencyInfo.cpp: -view-hot-freq-perc= extern cl::opt<unsigned> ViewHotFreqPercent; -static cl::opt<bool> PrintMachineBlockFreq( - "print-machine-bfi", cl::init(false), cl::Hidden, - cl::desc("Print the machine block frequency info.")); - // Command line option to specify the name of the function for block frequency // dump. Defined in Analysis/BlockFrequencyInfo.cpp. extern cl::opt<std::string> PrintBFIFuncName; } // namespace llvm +static cl::opt<bool> + PrintMachineBlockFreq("print-machine-bfi", cl::init(false), cl::Hidden, + cl::desc("Print the machine block frequency info.")); + static GVDAGType getGVDT() { if (ViewBlockLayoutWithBFI != GVDT_None) return ViewBlockLayoutWithBFI; @@ -85,9 +85,7 @@ static GVDAGType getGVDT() { return ViewMachineBlockFreqPropagationDAG; } -namespace llvm { - -template <> struct GraphTraits<MachineBlockFrequencyInfo *> { +template <> struct llvm::GraphTraits<MachineBlockFrequencyInfo *> { using NodeRef = const MachineBasicBlock *; using ChildIteratorType = MachineBasicBlock::const_succ_iterator; using nodes_iterator = pointer_iterator<MachineFunction::const_iterator>; @@ -116,7 +114,7 @@ using MBFIDOTGraphTraitsBase = MachineBranchProbabilityInfo>; template <> -struct DOTGraphTraits<MachineBlockFrequencyInfo *> +struct llvm::DOTGraphTraits<MachineBlockFrequencyInfo *> : public MBFIDOTGraphTraitsBase { const MachineFunction *CurFunc = nullptr; DenseMap<const MachineBasicBlock *, int> LayoutOrderMap; @@ -159,8 +157,6 @@ struct DOTGraphTraits<MachineBlockFrequencyInfo *> } }; -} // end namespace llvm - AnalysisKey MachineBlockFrequencyAnalysis::Key; MachineBlockFrequencyAnalysis::Result diff --git a/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp index 2e92dd8..7ca4582 100644 --- a/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ b/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -18,13 +18,8 @@ using namespace llvm; -INITIALIZE_PASS_BEGIN(MachineBranchProbabilityInfoWrapperPass, - "machine-branch-prob", - "Machine Branch Probability Analysis", false, true) -INITIALIZE_PASS_END(MachineBranchProbabilityInfoWrapperPass, - "machine-branch-prob", - "Machine Branch Probability Analysis", false, true) - +INITIALIZE_PASS(MachineBranchProbabilityInfoWrapperPass, "machine-branch-prob", + "Machine Branch Probability Analysis", false, true) namespace llvm { cl::opt<unsigned> StaticLikelyProb("static-likely-prob", diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index 224231c..bfa5ab2 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -719,43 +719,41 @@ MachineFunction::CallSiteInfo::CallSiteInfo(const CallBase &CB) { } } -namespace llvm { +template <> +struct llvm::DOTGraphTraits<const MachineFunction *> + : public DefaultDOTGraphTraits { + DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} - template<> - struct DOTGraphTraits<const MachineFunction*> : public DefaultDOTGraphTraits { - DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} + static std::string getGraphName(const MachineFunction *F) { + return ("CFG for '" + F->getName() + "' function").str(); + } - static std::string getGraphName(const MachineFunction *F) { - return ("CFG for '" + F->getName() + "' function").str(); + std::string getNodeLabel(const MachineBasicBlock *Node, + const MachineFunction *Graph) { + std::string OutStr; + { + raw_string_ostream OSS(OutStr); + + if (isSimple()) { + OSS << printMBBReference(*Node); + if (const BasicBlock *BB = Node->getBasicBlock()) + OSS << ": " << BB->getName(); + } else + Node->print(OSS); } - std::string getNodeLabel(const MachineBasicBlock *Node, - const MachineFunction *Graph) { - std::string OutStr; - { - raw_string_ostream OSS(OutStr); - - if (isSimple()) { - OSS << printMBBReference(*Node); - if (const BasicBlock *BB = Node->getBasicBlock()) - OSS << ": " << BB->getName(); - } else - Node->print(OSS); - } - - if (OutStr[0] == '\n') OutStr.erase(OutStr.begin()); - - // Process string output to make it nicer... - for (unsigned i = 0; i != OutStr.length(); ++i) - if (OutStr[i] == '\n') { // Left justify - OutStr[i] = '\\'; - OutStr.insert(OutStr.begin()+i+1, 'l'); - } - return OutStr; - } - }; + if (OutStr[0] == '\n') + OutStr.erase(OutStr.begin()); -} // end namespace llvm + // Process string output to make it nicer... + for (unsigned i = 0; i != OutStr.length(); ++i) + if (OutStr[i] == '\n') { // Left justify + OutStr[i] = '\\'; + OutStr.insert(OutStr.begin() + i + 1, 'l'); + } + return OutStr; + } +}; void MachineFunction::viewCFG() const { diff --git a/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp index 0f88a7b..5111322 100644 --- a/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp +++ b/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp @@ -60,13 +60,11 @@ char &llvm::MachineFunctionPrinterPassID = MachineFunctionPrinterPass::ID; INITIALIZE_PASS(MachineFunctionPrinterPass, "machineinstr-printer", "Machine Function Printer", false, false) -namespace llvm { /// Returns a newly-created MachineFunction Printer pass. The /// default banner is empty. /// -MachineFunctionPass *createMachineFunctionPrinterPass(raw_ostream &OS, - const std::string &Banner){ +MachineFunctionPass * +llvm::createMachineFunctionPrinterPass(raw_ostream &OS, + const std::string &Banner) { return new MachineFunctionPrinterPass(OS, Banner); } - -} diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp index fdae3b4..9feb974 100644 --- a/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/llvm/lib/CodeGen/MachineOutliner.cpp @@ -593,15 +593,12 @@ struct MachineOutliner : public ModulePass { char MachineOutliner::ID = 0; -namespace llvm { -ModulePass *createMachineOutlinerPass(RunOutliner RunOutlinerMode) { +ModulePass *llvm::createMachineOutlinerPass(RunOutliner RunOutlinerMode) { MachineOutliner *OL = new MachineOutliner(); OL->RunOutlinerMode = RunOutlinerMode; return OL; } -} // namespace llvm - INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false, false) diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index 89ed4da..a717d9e 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -201,16 +201,15 @@ static cl::opt<unsigned> SwpMaxNumStores( cl::desc("Maximum number of stores allwed in the target loop."), cl::Hidden, cl::init(200)); -namespace llvm { - // A command line option to enable the CopyToPhi DAG mutation. -cl::opt<bool> SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden, - cl::init(true), - cl::desc("Enable CopyToPhi DAG Mutation")); +cl::opt<bool> + llvm::SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden, + cl::init(true), + cl::desc("Enable CopyToPhi DAG Mutation")); /// A command line argument to force pipeliner to use specified issue /// width. -cl::opt<int> SwpForceIssueWidth( +cl::opt<int> llvm::SwpForceIssueWidth( "pipeliner-force-issue-width", cl::desc("Force pipeliner to use specified issue width."), cl::Hidden, cl::init(-1)); @@ -226,8 +225,6 @@ static cl::opt<WindowSchedulingFlag> WindowSchedulingOption( clEnumValN(WindowSchedulingFlag::WS_Force, "force", "Use window algorithm instead of SMS algorithm."))); -} // end namespace llvm - unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5; char MachinePipeliner::ID = 0; #ifndef NDEBUG diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index 299bcc4..3ed1045 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -176,9 +176,7 @@ STATISTIC(NumNodeOrderPostRA, STATISTIC(NumFirstValidPostRA, "Number of scheduling units chosen for FirstValid heuristic post-RA"); -namespace llvm { - -cl::opt<MISched::Direction> PreRADirection( +cl::opt<MISched::Direction> llvm::PreRADirection( "misched-prera-direction", cl::Hidden, cl::desc("Pre reg-alloc list scheduling direction"), cl::init(MISched::Unspecified), @@ -206,33 +204,31 @@ static cl::opt<bool> DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout")); -cl::opt<bool> VerifyScheduling( +cl::opt<bool> llvm::VerifyScheduling( "verify-misched", cl::Hidden, cl::desc("Verify machine instrs before and after machine scheduling")); #ifndef NDEBUG -cl::opt<bool> ViewMISchedDAGs( +cl::opt<bool> llvm::ViewMISchedDAGs( "view-misched-dags", cl::Hidden, cl::desc("Pop up a window to show MISched dags after they are processed")); -cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden, - cl::desc("Print schedule DAGs")); -cl::opt<bool> MISchedDumpReservedCycles( +cl::opt<bool> llvm::PrintDAGs("misched-print-dags", cl::Hidden, + cl::desc("Print schedule DAGs")); +static cl::opt<bool> MISchedDumpReservedCycles( "misched-dump-reserved-cycles", cl::Hidden, cl::init(false), cl::desc("Dump resource usage at schedule boundary.")); -cl::opt<bool> MischedDetailResourceBooking( +static cl::opt<bool> MischedDetailResourceBooking( "misched-detail-resource-booking", cl::Hidden, cl::init(false), cl::desc("Show details of invoking getNextResoufceCycle.")); #else -const bool ViewMISchedDAGs = false; -const bool PrintDAGs = false; -const bool MischedDetailResourceBooking = false; +const bool llvm::ViewMISchedDAGs = false; +const bool llvm::PrintDAGs = false; +static const bool MischedDetailResourceBooking = false; #ifdef LLVM_ENABLE_DUMP -const bool MISchedDumpReservedCycles = false; +static const bool MISchedDumpReservedCycles = false; #endif // LLVM_ENABLE_DUMP #endif // NDEBUG -} // end namespace llvm - #ifndef NDEBUG /// In some situations a few uninteresting nodes depend on nearly all other /// nodes in the graph, provide a cutoff to hide them. @@ -2053,28 +2049,24 @@ public: } // end anonymous namespace -namespace llvm { - std::unique_ptr<ScheduleDAGMutation> -createLoadClusterDAGMutation(const TargetInstrInfo *TII, - const TargetRegisterInfo *TRI, - bool ReorderWhileClustering) { +llvm::createLoadClusterDAGMutation(const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI, + bool ReorderWhileClustering) { return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>( TII, TRI, ReorderWhileClustering) : nullptr; } std::unique_ptr<ScheduleDAGMutation> -createStoreClusterDAGMutation(const TargetInstrInfo *TII, - const TargetRegisterInfo *TRI, - bool ReorderWhileClustering) { +llvm::createStoreClusterDAGMutation(const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI, + bool ReorderWhileClustering) { return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>( TII, TRI, ReorderWhileClustering) : nullptr; } -} // end namespace llvm - // Sorting all the loads/stores first, then for each load/store, checking the // following load/store one by one, until reach the first non-dependent one and // call target hook to see if they can cluster. @@ -2304,16 +2296,12 @@ protected: } // end anonymous namespace -namespace llvm { - std::unique_ptr<ScheduleDAGMutation> -createCopyConstrainDAGMutation(const TargetInstrInfo *TII, - const TargetRegisterInfo *TRI) { +llvm::createCopyConstrainDAGMutation(const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI) { return std::make_unique<CopyConstrain>(TII, TRI); } -} // end namespace llvm - /// constrainLocalCopy handles two possibilities: /// 1) Local src: /// I0: = dst @@ -3445,14 +3433,13 @@ void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) { } #endif -namespace llvm { /// Return true if this heuristic determines order. /// TODO: Consider refactor return type of these functions as integer or enum, /// as we may need to differentiate whether TryCand is better than Cand. -bool tryLess(int TryVal, int CandVal, - GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - GenericSchedulerBase::CandReason Reason) { +bool llvm::tryLess(int TryVal, int CandVal, + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + GenericSchedulerBase::CandReason Reason) { if (TryVal < CandVal) { TryCand.Reason = Reason; return true; @@ -3465,10 +3452,10 @@ bool tryLess(int TryVal, int CandVal, return false; } -bool tryGreater(int TryVal, int CandVal, - GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - GenericSchedulerBase::CandReason Reason) { +bool llvm::tryGreater(int TryVal, int CandVal, + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + GenericSchedulerBase::CandReason Reason) { if (TryVal > CandVal) { TryCand.Reason = Reason; return true; @@ -3481,9 +3468,9 @@ bool tryGreater(int TryVal, int CandVal, return false; } -bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - SchedBoundary &Zone) { +bool llvm::tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + SchedBoundary &Zone) { if (Zone.isTop()) { // Prefer the candidate with the lesser depth, but only if one of them has // depth greater than the total latency scheduled so far, otherwise either @@ -3513,7 +3500,6 @@ bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, } return false; } -} // end namespace llvm static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop, bool IsPostRA = false) { @@ -3798,14 +3784,12 @@ void GenericScheduler::registerRoots() { } } -namespace llvm { -bool tryPressure(const PressureChange &TryP, - const PressureChange &CandP, - GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - GenericSchedulerBase::CandReason Reason, - const TargetRegisterInfo *TRI, - const MachineFunction &MF) { +bool llvm::tryPressure(const PressureChange &TryP, const PressureChange &CandP, + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + GenericSchedulerBase::CandReason Reason, + const TargetRegisterInfo *TRI, + const MachineFunction &MF) { // If one candidate decreases and the other increases, go with it. // Invalid candidates have UnitInc==0. if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand, @@ -3838,7 +3822,7 @@ bool tryPressure(const PressureChange &TryP, return tryGreater(TryRank, CandRank, TryCand, Cand, Reason); } -unsigned getWeakLeft(const SUnit *SU, bool isTop) { +unsigned llvm::getWeakLeft(const SUnit *SU, bool isTop) { return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft; } @@ -3849,7 +3833,7 @@ unsigned getWeakLeft(const SUnit *SU, bool isTop) { /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled /// with the operation that produces or consumes the physreg. We'll do this when /// regalloc has support for parallel copies. -int biasPhysReg(const SUnit *SU, bool isTop) { +int llvm::biasPhysReg(const SUnit *SU, bool isTop) { const MachineInstr *MI = SU->getInstr(); if (MI->isCopy()) { @@ -3884,7 +3868,6 @@ int biasPhysReg(const SUnit *SU, bool isTop) { return 0; } -} // end namespace llvm void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, @@ -4812,13 +4795,13 @@ static MachineSchedRegistry ShufflerRegistry( //===----------------------------------------------------------------------===// #ifndef NDEBUG -namespace llvm { -template<> struct GraphTraits< - ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {}; +template <> +struct llvm::GraphTraits<ScheduleDAGMI *> : public GraphTraits<ScheduleDAG *> { +}; -template<> -struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { +template <> +struct llvm::DOTGraphTraits<ScheduleDAGMI *> : public DefaultDOTGraphTraits { DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} static std::string getGraphName(const ScheduleDAG *G) { @@ -4878,7 +4861,6 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { } }; -} // end namespace llvm #endif // NDEBUG /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG diff --git a/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/llvm/lib/CodeGen/MachineTraceMetrics.cpp index c2d4aa0..9ac3f741 100644 --- a/llvm/lib/CodeGen/MachineTraceMetrics.cpp +++ b/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -485,10 +485,7 @@ struct LoopBounds { // Specialize po_iterator_storage in order to prune the post-order traversal so // it is limited to the current loop and doesn't traverse the loop back edges. -namespace llvm { - -template<> -class po_iterator_storage<LoopBounds, true> { +template <> class llvm::po_iterator_storage<LoopBounds, true> { LoopBounds &LB; public: @@ -519,8 +516,6 @@ public: } }; -} // end namespace llvm - /// Compute the trace through MBB. void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { LLVM_DEBUG(dbgs() << "Computing " << getName() << " trace through " diff --git a/llvm/lib/CodeGen/NonRelocatableStringpool.cpp b/llvm/lib/CodeGen/NonRelocatableStringpool.cpp index 087ac62..59c587c 100644 --- a/llvm/lib/CodeGen/NonRelocatableStringpool.cpp +++ b/llvm/lib/CodeGen/NonRelocatableStringpool.cpp @@ -9,7 +9,7 @@ #include "llvm/CodeGen/NonRelocatableStringpool.h" #include "llvm/ADT/STLExtras.h" -namespace llvm { +using namespace llvm; DwarfStringPoolEntryRef NonRelocatableStringpool::getEntry(StringRef S) { auto I = Strings.try_emplace(S); @@ -43,5 +43,3 @@ NonRelocatableStringpool::getEntriesForEmission() const { }); return Result; } - -} // namespace llvm diff --git a/llvm/lib/CodeGen/SafeStack.cpp b/llvm/lib/CodeGen/SafeStack.cpp index 6f373a5..e9ffa85 100644 --- a/llvm/lib/CodeGen/SafeStack.cpp +++ b/llvm/lib/CodeGen/SafeStack.cpp @@ -76,8 +76,6 @@ using namespace llvm::safestack; #define DEBUG_TYPE "safe-stack" -namespace llvm { - STATISTIC(NumFunctions, "Total number of functions"); STATISTIC(NumUnsafeStackFunctions, "Number of functions with unsafe stack"); STATISTIC(NumUnsafeStackRestorePointsFunctions, @@ -89,8 +87,6 @@ STATISTIC(NumUnsafeDynamicAllocas, "Number of unsafe dynamic allocas"); STATISTIC(NumUnsafeByValArguments, "Number of unsafe byval arguments"); STATISTIC(NumUnsafeStackRestorePoints, "Number of setjmps and landingpads"); -} // namespace llvm - /// Use __safestack_pointer_address even if the platform has a faster way of /// access safe stack pointer. static cl::opt<bool> diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index eae2e8c..9662511 100644 --- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -1551,14 +1551,10 @@ LLVM_DUMP_METHOD void ILPValue::dump() const { dbgs() << *this << '\n'; } -namespace llvm { - -LLVM_ATTRIBUTE_UNUSED -raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) { +[[maybe_unused]] +raw_ostream &llvm::operator<<(raw_ostream &OS, const ILPValue &Val) { Val.print(OS); return OS; } -} // end namespace llvm - #endif diff --git a/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp index e7b1494..c80eade 100644 --- a/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -16,57 +16,51 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -namespace llvm { - template<> - struct DOTGraphTraits<ScheduleDAG*> : public DefaultDOTGraphTraits { +template <> +struct llvm::DOTGraphTraits<ScheduleDAG *> : public DefaultDOTGraphTraits { - DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} + DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} - static std::string getGraphName(const ScheduleDAG *G) { - return std::string(G->MF.getName()); - } + static std::string getGraphName(const ScheduleDAG *G) { + return std::string(G->MF.getName()); + } - static bool renderGraphFromBottomUp() { - return true; - } + static bool renderGraphFromBottomUp() { return true; } - static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) { - return (Node->NumPreds > 10 || Node->NumSuccs > 10); - } + static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) { + return (Node->NumPreds > 10 || Node->NumSuccs > 10); + } - static std::string getNodeIdentifierLabel(const SUnit *Node, - const ScheduleDAG *Graph) { - std::string R; - raw_string_ostream OS(R); - OS << static_cast<const void *>(Node); - return R; - } + static std::string getNodeIdentifierLabel(const SUnit *Node, + const ScheduleDAG *Graph) { + std::string R; + raw_string_ostream OS(R); + OS << static_cast<const void *>(Node); + return R; + } - /// If you want to override the dot attributes printed for a particular - /// edge, override this method. - static std::string getEdgeAttributes(const SUnit *Node, - SUnitIterator EI, - const ScheduleDAG *Graph) { - if (EI.isArtificialDep()) - return "color=cyan,style=dashed"; - if (EI.isCtrlDep()) - return "color=blue,style=dashed"; - return ""; - } + /// If you want to override the dot attributes printed for a particular + /// edge, override this method. + static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, + const ScheduleDAG *Graph) { + if (EI.isArtificialDep()) + return "color=cyan,style=dashed"; + if (EI.isCtrlDep()) + return "color=blue,style=dashed"; + return ""; + } + std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *Graph); + static std::string getNodeAttributes(const SUnit *N, + const ScheduleDAG *Graph) { + return "shape=Mrecord"; + } - std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *Graph); - static std::string getNodeAttributes(const SUnit *N, - const ScheduleDAG *Graph) { - return "shape=Mrecord"; - } - - static void addCustomGraphFeatures(ScheduleDAG *G, - GraphWriter<ScheduleDAG*> &GW) { - return G->addCustomGraphFeatures(GW); - } - }; -} + static void addCustomGraphFeatures(ScheduleDAG *G, + GraphWriter<ScheduleDAG *> &GW) { + return G->addCustomGraphFeatures(GW); + } +}; std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU, const ScheduleDAG *G) { diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index b1accdd..787a81a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -509,6 +509,7 @@ namespace { SDValue visitFMUL(SDNode *N); template <class MatchContextClass> SDValue visitFMA(SDNode *N); SDValue visitFMAD(SDNode *N); + SDValue visitFMULADD(SDNode *N); SDValue visitFDIV(SDNode *N); SDValue visitFREM(SDNode *N); SDValue visitFSQRT(SDNode *N); @@ -1991,6 +1992,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FMUL: return visitFMUL(N); case ISD::FMA: return visitFMA<EmptyMatchContext>(N); case ISD::FMAD: return visitFMAD(N); + case ISD::FMULADD: return visitFMULADD(N); case ISD::FDIV: return visitFDIV(N); case ISD::FREM: return visitFREM(N); case ISD::FSQRT: return visitFSQRT(N); @@ -5042,7 +5044,6 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) { unsigned Opc = N->getOpcode(); bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc); - ConstantSDNode *N1C = isConstOrConstSplat(N1); // X / undef -> undef // X % undef -> undef @@ -5074,7 +5075,7 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) { // division-by-zero or remainder-by-zero, so assume the divisor is 1. // TODO: Similarly, if we're zero-extending a boolean divisor, then assume // it's a 1. - if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1)) + if (isOneOrOneSplat(N1) || (VT.getScalarType() == MVT::i1)) return IsDiv ? N0 : DAG.getConstant(0, DL, VT); return SDValue(); @@ -18444,6 +18445,21 @@ SDValue DAGCombiner::visitFMAD(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitFMULADD(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + EVT VT = N->getValueType(0); + SDLoc DL(N); + + // Constant fold FMULADD. + if (SDValue C = + DAG.FoldConstantArithmetic(ISD::FMULADD, DL, VT, {N0, N1, N2})) + return C; + + return SDValue(); +} + // Combine multiple FDIVs with the same divisor into multiple FMULs by the // reciprocal. // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 08af74c..90edaf3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5063,8 +5063,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, break; case ISD::ADD: case ISD::ADDC: - // Add can have at most one carry bit. Thus we know that the output - // is, at worst, one more bit than the inputs. + // TODO: Move Operand 1 check before Operand 0 check Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); if (Tmp == 1) return 1; // Early out. @@ -5088,6 +5087,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); if (Tmp2 == 1) return 1; // Early out. + + // Add can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. return std::min(Tmp, Tmp2) - 1; case ISD::SUB: Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); @@ -5786,6 +5788,7 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, case ISD::FCOPYSIGN: case ISD::FMA: case ISD::FMAD: + case ISD::FMULADD: case ISD::FP_EXTEND: case ISD::FP_TO_SINT_SAT: case ISD::FP_TO_UINT_SAT: @@ -5904,6 +5907,7 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, case ISD::FCOSH: case ISD::FTANH: case ISD::FMA: + case ISD::FMULADD: case ISD::FMAD: { if (SNaN) return true; @@ -6401,8 +6405,9 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT, if (VT.isScalableVector()) return SDValue(); - // A CONCAT_VECTOR with all UNDEF/BUILD_VECTOR operands can be - // simplified to one big BUILD_VECTOR. + // A CONCAT_VECTOR of scalar sources, such as UNDEF, BUILD_VECTOR and + // single-element INSERT_VECTOR_ELT operands can be simplified to one big + // BUILD_VECTOR. // FIXME: Add support for SCALAR_TO_VECTOR as well. EVT SVT = VT.getScalarType(); SmallVector<SDValue, 16> Elts; @@ -6412,6 +6417,10 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT, Elts.append(OpVT.getVectorNumElements(), DAG.getUNDEF(SVT)); else if (Op.getOpcode() == ISD::BUILD_VECTOR) Elts.append(Op->op_begin(), Op->op_end()); + else if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && + OpVT.getVectorNumElements() == 1 && + isNullConstant(Op.getOperand(2))) + Elts.push_back(Op.getOperand(1)); else return SDValue(); } @@ -7231,7 +7240,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, } // Handle fma/fmad special cases. - if (Opcode == ISD::FMA || Opcode == ISD::FMAD) { + if (Opcode == ISD::FMA || Opcode == ISD::FMAD || Opcode == ISD::FMULADD) { assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); assert(Ops[0].getValueType() == VT && Ops[1].getValueType() == VT && Ops[2].getValueType() == VT && "FMA types must match!"); @@ -7242,7 +7251,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, APFloat V1 = C1->getValueAPF(); const APFloat &V2 = C2->getValueAPF(); const APFloat &V3 = C3->getValueAPF(); - if (Opcode == ISD::FMAD) { + if (Opcode == ISD::FMAD || Opcode == ISD::FMULADD) { V1.multiply(V2, APFloat::rmNearestTiesToEven); V1.add(V3, APFloat::rmNearestTiesToEven); } else @@ -8781,7 +8790,7 @@ static SDValue getMemcpyLoadsAndStores( if (Value.getNode()) { Store = DAG.getStore( Chain, dl, Value, - DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl), + DAG.getObjectPtrOffset(dl, Dst, TypeSize::getFixed(DstOff)), DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, NewAAInfo); OutChains.push_back(Store); } @@ -8797,7 +8806,7 @@ static SDValue getMemcpyLoadsAndStores( assert(NVT.bitsGE(VT)); bool isDereferenceable = - SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL); + SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL); MachineMemOperand::Flags SrcMMOFlags = MMOFlags; if (isDereferenceable) SrcMMOFlags |= MachineMemOperand::MODereferenceable; @@ -8806,14 +8815,14 @@ static SDValue getMemcpyLoadsAndStores( Value = DAG.getExtLoad( ISD::EXTLOAD, dl, NVT, Chain, - DAG.getMemBasePlusOffset(Src, TypeSize::getFixed(SrcOff), dl), + DAG.getObjectPtrOffset(dl, Src, TypeSize::getFixed(SrcOff)), SrcPtrInfo.getWithOffset(SrcOff), VT, commonAlignment(*SrcAlign, SrcOff), SrcMMOFlags, NewAAInfo); OutLoadChains.push_back(Value.getValue(1)); Store = DAG.getTruncStore( Chain, dl, Value, - DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl), + DAG.getObjectPtrOffset(dl, Dst, TypeSize::getFixed(DstOff)), DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags, NewAAInfo); OutStoreChains.push_back(Store); } @@ -8943,14 +8952,14 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, SDValue Value; bool isDereferenceable = - SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL); + SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL); MachineMemOperand::Flags SrcMMOFlags = MMOFlags; if (isDereferenceable) SrcMMOFlags |= MachineMemOperand::MODereferenceable; Value = DAG.getLoad( VT, dl, Chain, - DAG.getMemBasePlusOffset(Src, TypeSize::getFixed(SrcOff), dl), + DAG.getObjectPtrOffset(dl, Src, TypeSize::getFixed(SrcOff)), SrcPtrInfo.getWithOffset(SrcOff), *SrcAlign, SrcMMOFlags, NewAAInfo); LoadValues.push_back(Value); LoadChains.push_back(Value.getValue(1)); @@ -8965,7 +8974,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, Store = DAG.getStore( Chain, dl, LoadValues[i], - DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl), + DAG.getObjectPtrOffset(dl, Dst, TypeSize::getFixed(DstOff)), DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, NewAAInfo); OutChains.push_back(Store); DstOff += VTSize; @@ -9097,7 +9106,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, assert(Value.getValueType() == VT && "Value with wrong type."); SDValue Store = DAG.getStore( Chain, dl, Value, - DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl), + DAG.getObjectPtrOffset(dl, Dst, TypeSize::getFixed(DstOff)), DstPtrInfo.getWithOffset(DstOff), Alignment, isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone, NewAAInfo); @@ -11844,25 +11853,38 @@ SDValue SelectionDAG::getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, /// getNodeIfExists - Get the specified node if it's already available, or /// else return NULL. SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, - ArrayRef<SDValue> Ops) { + ArrayRef<SDValue> Ops, + bool AllowCommute) { SDNodeFlags Flags; if (Inserter) Flags = Inserter->getFlags(); - return getNodeIfExists(Opcode, VTList, Ops, Flags); + return getNodeIfExists(Opcode, VTList, Ops, Flags, AllowCommute); } SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef<SDValue> Ops, - const SDNodeFlags Flags) { - if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) { + const SDNodeFlags Flags, + bool AllowCommute) { + if (VTList.VTs[VTList.NumVTs - 1] == MVT::Glue) + return nullptr; + + auto Lookup = [&](ArrayRef<SDValue> LookupOps) -> SDNode * { FoldingSetNodeID ID; - AddNodeIDNode(ID, Opcode, VTList, Ops); + AddNodeIDNode(ID, Opcode, VTList, LookupOps); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, SDLoc(), IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) { E->intersectFlagsWith(Flags); return E; } - } + return nullptr; + }; + + if (SDNode *Existing = Lookup(Ops)) + return Existing; + + if (AllowCommute && TLI->isCommutativeBinOp(Opcode)) + return Lookup({Ops[1], Ops[0]}); + return nullptr; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index c21890a..0f2b518 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6996,6 +6996,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)), Flags)); + } else if (TLI.isOperationLegalOrCustom(ISD::FMULADD, VT)) { + // TODO: Support splitting the vector. + setValue(&I, DAG.getNode(ISD::FMULADD, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)), Flags)); } else { // TODO: Intrinsic calls should have fast-math-flags. SDValue Mul = DAG.getNode( diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index fcfbfe6..39cbfad 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -310,6 +310,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FMA: return "fma"; case ISD::STRICT_FMA: return "strict_fma"; case ISD::FMAD: return "fmad"; + case ISD::FMULADD: return "fmuladd"; case ISD::FREM: return "frem"; case ISD::STRICT_FREM: return "strict_frem"; case ISD::FCOPYSIGN: return "fcopysign"; diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index cc503d3..920dff9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -7676,6 +7676,7 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, break; } case ISD::FMA: + case ISD::FMULADD: case ISD::FMAD: { if (!Flags.hasNoSignedZeros()) break; diff --git a/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp b/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp index 64e5cd5..95a9c3f 100644 --- a/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp +++ b/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp @@ -306,10 +306,7 @@ char &llvm::StackFrameLayoutAnalysisPassID = StackFrameLayoutAnalysisLegacy::ID; INITIALIZE_PASS(StackFrameLayoutAnalysisLegacy, "stack-frame-layout", "Stack Frame Layout", false, false) -namespace llvm { /// Returns a newly-created StackFrameLayout pass. -MachineFunctionPass *createStackFrameLayoutAnalysisPass() { +MachineFunctionPass *llvm::createStackFrameLayoutAnalysisPass() { return new StackFrameLayoutAnalysisLegacy(); } - -} // namespace llvm diff --git a/llvm/lib/CodeGen/StaticDataAnnotator.cpp b/llvm/lib/CodeGen/StaticDataAnnotator.cpp index 53a9ab4..eac20120 100644 --- a/llvm/lib/CodeGen/StaticDataAnnotator.cpp +++ b/llvm/lib/CodeGen/StaticDataAnnotator.cpp @@ -75,22 +75,11 @@ bool StaticDataAnnotator::runOnModule(Module &M) { bool Changed = false; for (auto &GV : M.globals()) { - if (GV.isDeclarationForLinker()) + if (!llvm::memprof::IsAnnotationOK(GV)) continue; - // The implementation below assumes prior passes don't set section prefixes, - // and specifically do 'assign' rather than 'update'. So report error if a - // section prefix is already set. - if (auto maybeSectionPrefix = GV.getSectionPrefix(); - maybeSectionPrefix && !maybeSectionPrefix->empty()) - llvm::report_fatal_error("Global variable " + GV.getName() + - " already has a section prefix " + - *maybeSectionPrefix); - StringRef SectionPrefix = SDPI->getConstantSectionPrefix(&GV, PSI); - if (SectionPrefix.empty()) - continue; - + // setSectionPrefix returns true if the section prefix is updated. Changed |= GV.setSectionPrefix(SectionPrefix); } diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp index e22dc25..1593a40 100644 --- a/llvm/lib/CodeGen/StaticDataSplitter.cpp +++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp @@ -130,10 +130,8 @@ StaticDataSplitter::getConstant(const MachineOperand &Op, if (Op.isGlobal()) { // Find global variables with local linkage. const GlobalVariable *GV = getLocalLinkageGlobalVariable(Op.getGlobal()); - // Skip 'llvm.'-prefixed global variables conservatively because they are - // often handled specially, and skip those not in static data - // sections. - if (!GV || GV->getName().starts_with("llvm.") || + // Skip those not eligible for annotation or not in static data sections. + if (!GV || !llvm::memprof::IsAnnotationOK(*GV) || !inStaticDataSection(*GV, TM)) return nullptr; return GV; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index c23281a..060b1dd 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -815,7 +815,8 @@ void TargetLoweringBase::initActions() { ISD::FTAN, ISD::FACOS, ISD::FASIN, ISD::FATAN, ISD::FCOSH, ISD::FSINH, - ISD::FTANH, ISD::FATAN2}, + ISD::FTANH, ISD::FATAN2, + ISD::FMULADD}, VT, Expand); // Overflow operations default to expand diff --git a/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/llvm/lib/CodeGen/TargetRegisterInfo.cpp index c9e4618..971f822 100644 --- a/llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ b/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -102,10 +102,8 @@ bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet, return true; } -namespace llvm { - -Printable printReg(Register Reg, const TargetRegisterInfo *TRI, - unsigned SubIdx, const MachineRegisterInfo *MRI) { +Printable llvm::printReg(Register Reg, const TargetRegisterInfo *TRI, + unsigned SubIdx, const MachineRegisterInfo *MRI) { return Printable([Reg, TRI, SubIdx, MRI](raw_ostream &OS) { if (!Reg) OS << "$noreg"; @@ -135,7 +133,7 @@ Printable printReg(Register Reg, const TargetRegisterInfo *TRI, }); } -Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) { +Printable llvm::printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) { return Printable([Unit, TRI](raw_ostream &OS) { // Generic printout when TRI is missing. if (!TRI) { @@ -158,7 +156,7 @@ Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) { }); } -Printable printVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) { +Printable llvm::printVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) { return Printable([Unit, TRI](raw_ostream &OS) { if (Register::isVirtualRegister(Unit)) { OS << '%' << Register(Unit).virtRegIndex(); @@ -168,8 +166,9 @@ Printable printVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) { }); } -Printable printRegClassOrBank(Register Reg, const MachineRegisterInfo &RegInfo, - const TargetRegisterInfo *TRI) { +Printable llvm::printRegClassOrBank(Register Reg, + const MachineRegisterInfo &RegInfo, + const TargetRegisterInfo *TRI) { return Printable([Reg, &RegInfo, TRI](raw_ostream &OS) { if (RegInfo.getRegClassOrNull(Reg)) OS << StringRef(TRI->getRegClassName(RegInfo.getRegClass(Reg))).lower(); @@ -183,8 +182,6 @@ Printable printRegClassOrBank(Register Reg, const MachineRegisterInfo &RegInfo, }); } -} // end namespace llvm - /// getAllocatableClass - Return the maximal subclass of the given register /// class that is alloctable, or NULL. const TargetRegisterClass * diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp index 7a0256f..38e9ac5 100644 --- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp +++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp @@ -629,6 +629,10 @@ Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) { size_t NumBefore = Gsym.getNumFunctionInfos(); auto getDie = [&](DWARFUnit &DwarfUnit) -> DWARFDie { DWARFDie ReturnDie = DwarfUnit.getUnitDIE(false); + // Apple uses DW_AT_GNU_dwo_id for things other than split DWARF. + if (IsMachO) + return ReturnDie; + if (DwarfUnit.getDWOId()) { DWARFUnit *DWOCU = DwarfUnit.getNonSkeletonUnitDIE(false).getDwarfUnit(); if (!DWOCU->isDWOUnit()) diff --git a/llvm/lib/ExecutionEngine/Orc/MapperJITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/MapperJITLinkMemoryManager.cpp index 33734b8..bb8d2cb 100644 --- a/llvm/lib/ExecutionEngine/Orc/MapperJITLinkMemoryManager.cpp +++ b/llvm/lib/ExecutionEngine/Orc/MapperJITLinkMemoryManager.cpp @@ -90,7 +90,7 @@ void MapperJITLinkMemoryManager::allocate(const JITLinkDylib *JD, LinkGraph &G, auto TotalSize = Seg.ContentSize + Seg.ZeroFillSize; Seg.Addr = NextSegAddr; - Seg.WorkingMem = Mapper->prepare(NextSegAddr, TotalSize); + Seg.WorkingMem = Mapper->prepare(G, NextSegAddr, TotalSize); NextSegAddr += alignTo(TotalSize, Mapper->getPageSize()); diff --git a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp index ea3b22a..7b327af 100644 --- a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp +++ b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp @@ -58,7 +58,8 @@ void InProcessMemoryMapper::reserve(size_t NumBytes, ExecutorAddrRange(ExecutorAddr::fromPtr(MB.base()), MB.allocatedSize())); } -char *InProcessMemoryMapper::prepare(ExecutorAddr Addr, size_t ContentSize) { +char *InProcessMemoryMapper::prepare(jitlink::LinkGraph &G, ExecutorAddr Addr, + size_t ContentSize) { return Addr.toPtr<char *>(); } @@ -324,7 +325,8 @@ void SharedMemoryMapper::reserve(size_t NumBytes, #endif } -char *SharedMemoryMapper::prepare(ExecutorAddr Addr, size_t ContentSize) { +char *SharedMemoryMapper::prepare(jitlink::LinkGraph &G, ExecutorAddr Addr, + size_t ContentSize) { auto R = Reservations.upper_bound(Addr); assert(R != Reservations.begin() && "Attempt to prepare unreserved range"); R--; diff --git a/llvm/lib/IR/ConstantFPRange.cpp b/llvm/lib/IR/ConstantFPRange.cpp index 51d2e21..5b87686 100644 --- a/llvm/lib/IR/ConstantFPRange.cpp +++ b/llvm/lib/IR/ConstantFPRange.cpp @@ -8,6 +8,7 @@ #include "llvm/IR/ConstantFPRange.h" #include "llvm/ADT/APFloat.h" +#include "llvm/ADT/FloatingPointMode.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include <cassert> @@ -506,3 +507,168 @@ ConstantFPRange ConstantFPRange::sub(const ConstantFPRange &Other) const { // fsub X, Y = fadd X, (fneg Y) return add(Other.negate()); } + +void ConstantFPRange::flushDenormals(DenormalMode::DenormalModeKind Mode) { + if (Mode == DenormalMode::IEEE) + return; + FPClassTest Class = classify(); + if (!(Class & fcSubnormal)) + return; + + auto &Sem = getSemantics(); + // PreserveSign: PosSubnormal -> PosZero, NegSubnormal -> NegZero + // PositiveZero: PosSubnormal -> PosZero, NegSubnormal -> PosZero + // Dynamic: PosSubnormal -> PosZero, NegSubnormal -> NegZero/PosZero + bool ZeroLowerNegative = + Mode != DenormalMode::PositiveZero && (Class & fcNegSubnormal); + bool ZeroUpperNegative = + Mode == DenormalMode::PreserveSign && !(Class & fcPosSubnormal); + assert((ZeroLowerNegative || !ZeroUpperNegative) && + "ZeroLower is greater than ZeroUpper."); + Lower = minnum(Lower, APFloat::getZero(Sem, ZeroLowerNegative)); + Upper = maxnum(Upper, APFloat::getZero(Sem, ZeroUpperNegative)); +} + +/// Represent a contiguous range of values sharing the same sign. +struct SameSignRange { + bool HasZero; + bool HasNonZero; + bool HasInf; + // The lower and upper bounds of the range (inclusive). + // The sign is dropped and infinities are excluded. + std::optional<std::pair<APFloat, APFloat>> FinitePart; + + explicit SameSignRange(const APFloat &Lower, const APFloat &Upper) + : HasZero(Lower.isZero()), HasNonZero(!Upper.isZero()), + HasInf(Upper.isInfinity()) { + assert(!Lower.isNegative() && !Upper.isNegative() && + "The sign should be dropped."); + assert(strictCompare(Lower, Upper) != APFloat::cmpGreaterThan && + "Empty set."); + if (!Lower.isInfinity()) + FinitePart = {Lower, + HasInf ? APFloat::getLargest(Lower.getSemantics()) : Upper}; + } +}; + +/// Split the range into positive and negative components. +static void splitPosNeg(const APFloat &Lower, const APFloat &Upper, + std::optional<SameSignRange> &NegPart, + std::optional<SameSignRange> &PosPart) { + assert(strictCompare(Lower, Upper) != APFloat::cmpGreaterThan && + "Non-NaN part is empty."); + if (Lower.isNegative() == Upper.isNegative()) { + if (Lower.isNegative()) + NegPart = SameSignRange{abs(Upper), abs(Lower)}; + else + PosPart = SameSignRange{Lower, Upper}; + return; + } + auto &Sem = Lower.getSemantics(); + NegPart = SameSignRange{APFloat::getZero(Sem), abs(Lower)}; + PosPart = SameSignRange{APFloat::getZero(Sem), Upper}; +} + +ConstantFPRange ConstantFPRange::mul(const ConstantFPRange &Other) const { + auto &Sem = getSemantics(); + bool ResMayBeQNaN = ((MayBeQNaN || MayBeSNaN) && !Other.isEmptySet()) || + ((Other.MayBeQNaN || Other.MayBeSNaN) && !isEmptySet()); + if (isNaNOnly() || Other.isNaNOnly()) + return getNaNOnly(Sem, /*MayBeQNaN=*/ResMayBeQNaN, + /*MayBeSNaN=*/false); + std::optional<SameSignRange> LHSNeg, LHSPos, RHSNeg, RHSPos; + splitPosNeg(Lower, Upper, LHSNeg, LHSPos); + splitPosNeg(Other.Lower, Other.Upper, RHSNeg, RHSPos); + APFloat ResLower = APFloat::getInf(Sem, /*Negative=*/false); + APFloat ResUpper = APFloat::getInf(Sem, /*Negative=*/true); + auto Update = [&](std::optional<SameSignRange> &LHS, + std::optional<SameSignRange> &RHS, bool Negative) { + if (!LHS || !RHS) + return; + // 0 * inf = QNaN + ResMayBeQNaN |= LHS->HasZero && RHS->HasInf; + ResMayBeQNaN |= RHS->HasZero && LHS->HasInf; + // NonZero * inf = inf + if ((LHS->HasInf && RHS->HasNonZero) || (RHS->HasInf && LHS->HasNonZero)) + (Negative ? ResLower : ResUpper) = APFloat::getInf(Sem, Negative); + // Finite * Finite + if (LHS->FinitePart && RHS->FinitePart) { + APFloat NewLower = LHS->FinitePart->first * RHS->FinitePart->first; + APFloat NewUpper = LHS->FinitePart->second * RHS->FinitePart->second; + if (Negative) { + ResLower = minnum(ResLower, -NewUpper); + ResUpper = maxnum(ResUpper, -NewLower); + } else { + ResLower = minnum(ResLower, NewLower); + ResUpper = maxnum(ResUpper, NewUpper); + } + } + }; + Update(LHSNeg, RHSNeg, /*Negative=*/false); + Update(LHSNeg, RHSPos, /*Negative=*/true); + Update(LHSPos, RHSNeg, /*Negative=*/true); + Update(LHSPos, RHSPos, /*Negative=*/false); + return ConstantFPRange(ResLower, ResUpper, ResMayBeQNaN, /*MayBeSNaN=*/false); +} + +ConstantFPRange ConstantFPRange::div(const ConstantFPRange &Other) const { + auto &Sem = getSemantics(); + bool ResMayBeQNaN = ((MayBeQNaN || MayBeSNaN) && !Other.isEmptySet()) || + ((Other.MayBeQNaN || Other.MayBeSNaN) && !isEmptySet()); + if (isNaNOnly() || Other.isNaNOnly()) + return getNaNOnly(Sem, /*MayBeQNaN=*/ResMayBeQNaN, + /*MayBeSNaN=*/false); + std::optional<SameSignRange> LHSNeg, LHSPos, RHSNeg, RHSPos; + splitPosNeg(Lower, Upper, LHSNeg, LHSPos); + splitPosNeg(Other.Lower, Other.Upper, RHSNeg, RHSPos); + APFloat ResLower = APFloat::getInf(Sem, /*Negative=*/false); + APFloat ResUpper = APFloat::getInf(Sem, /*Negative=*/true); + auto Update = [&](std::optional<SameSignRange> &LHS, + std::optional<SameSignRange> &RHS, bool Negative) { + if (!LHS || !RHS) + return; + // inf / inf = QNaN 0 / 0 = QNaN + ResMayBeQNaN |= LHS->HasInf && RHS->HasInf; + ResMayBeQNaN |= LHS->HasZero && RHS->HasZero; + // It is not straightforward to infer HasNonZeroFinite = HasFinite && + // HasNonZero. By definitions we have: + // HasFinite = HasNonZeroFinite || HasZero + // HasNonZero = HasNonZeroFinite || HasInf + // Since the range is contiguous, if both HasFinite and HasNonZero are true, + // HasNonZeroFinite must be true. + bool LHSHasNonZeroFinite = LHS->FinitePart && LHS->HasNonZero; + bool RHSHasNonZeroFinite = RHS->FinitePart && RHS->HasNonZero; + // inf / Finite = inf FiniteNonZero / 0 = inf + if ((LHS->HasInf && RHS->FinitePart) || + (LHSHasNonZeroFinite && RHS->HasZero)) + (Negative ? ResLower : ResUpper) = APFloat::getInf(Sem, Negative); + // Finite / inf = 0 + if (LHS->FinitePart && RHS->HasInf) { + APFloat Zero = APFloat::getZero(Sem, /*Negative=*/Negative); + ResLower = minnum(ResLower, Zero); + ResUpper = maxnum(ResUpper, Zero); + } + // Finite / FiniteNonZero + if (LHS->FinitePart && RHSHasNonZeroFinite) { + assert(!RHS->FinitePart->second.isZero() && + "Divisor should be non-zero."); + APFloat NewLower = LHS->FinitePart->first / RHS->FinitePart->second; + APFloat NewUpper = LHS->FinitePart->second / + (RHS->FinitePart->first.isZero() + ? APFloat::getSmallest(Sem, /*Negative=*/false) + : RHS->FinitePart->first); + if (Negative) { + ResLower = minnum(ResLower, -NewUpper); + ResUpper = maxnum(ResUpper, -NewLower); + } else { + ResLower = minnum(ResLower, NewLower); + ResUpper = maxnum(ResUpper, NewUpper); + } + } + }; + Update(LHSNeg, RHSNeg, /*Negative=*/false); + Update(LHSNeg, RHSPos, /*Negative=*/true); + Update(LHSPos, RHSNeg, /*Negative=*/true); + Update(LHSPos, RHSPos, /*Negative=*/false); + return ConstantFPRange(ResLower, ResUpper, ResMayBeQNaN, /*MayBeSNaN=*/false); +} diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index 3842b1a..6a9ef2e 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -741,7 +741,8 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, assert(!CI2->isZero() && "And zero handled above"); if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) { // If and'ing the address of a global with a constant, fold it. - if (CE1->getOpcode() == Instruction::PtrToInt && + if ((CE1->getOpcode() == Instruction::PtrToInt || + CE1->getOpcode() == Instruction::PtrToAddr) && isa<GlobalValue>(CE1->getOperand(0))) { GlobalValue *GV = cast<GlobalValue>(CE1->getOperand(0)); diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp index 2c2950c..cbce8bd 100644 --- a/llvm/lib/IR/Constants.cpp +++ b/llvm/lib/IR/Constants.cpp @@ -667,8 +667,11 @@ Constant::PossibleRelocationsTy Constant::getRelocationInfo() const { if (CE->getOpcode() == Instruction::Sub) { ConstantExpr *LHS = dyn_cast<ConstantExpr>(CE->getOperand(0)); ConstantExpr *RHS = dyn_cast<ConstantExpr>(CE->getOperand(1)); - if (LHS && RHS && LHS->getOpcode() == Instruction::PtrToInt && - RHS->getOpcode() == Instruction::PtrToInt) { + if (LHS && RHS && + (LHS->getOpcode() == Instruction::PtrToInt || + LHS->getOpcode() == Instruction::PtrToAddr) && + (RHS->getOpcode() == Instruction::PtrToInt || + RHS->getOpcode() == Instruction::PtrToAddr)) { Constant *LHSOp0 = LHS->getOperand(0); Constant *RHSOp0 = RHS->getOperand(0); diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp index 3f1cc1e..27d8294 100644 --- a/llvm/lib/IR/Core.cpp +++ b/llvm/lib/IR/Core.cpp @@ -4098,15 +4098,8 @@ LLVMValueRef LLVMBuildGlobalStringPtr(LLVMBuilderRef B, const char *Str, return wrap(unwrap(B)->CreateGlobalString(Str, Name)); } -LLVMBool LLVMGetVolatile(LLVMValueRef MemAccessInst) { - Value *P = unwrap(MemAccessInst); - if (LoadInst *LI = dyn_cast<LoadInst>(P)) - return LI->isVolatile(); - if (StoreInst *SI = dyn_cast<StoreInst>(P)) - return SI->isVolatile(); - if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(P)) - return AI->isVolatile(); - return cast<AtomicCmpXchgInst>(P)->isVolatile(); +LLVMBool LLVMGetVolatile(LLVMValueRef Inst) { + return cast<Instruction>(unwrap(Inst))->isVolatile(); } void LLVMSetVolatile(LLVMValueRef MemAccessInst, LLVMBool isVolatile) { diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp index 614c3a9..15c0198 100644 --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -25,6 +25,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/NoFolder.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/ProfDataUtils.h" #include "llvm/IR/Statepoint.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" @@ -1002,6 +1003,18 @@ CallInst *IRBuilderBase::CreateConstrainedFPCall( return C; } +Value *IRBuilderBase::CreateSelectWithUnknownProfile(Value *C, Value *True, + Value *False, + StringRef PassName, + const Twine &Name) { + Value *Ret = CreateSelectFMF(C, True, False, {}, Name); + if (auto *SI = dyn_cast<SelectInst>(Ret)) { + setExplicitlyUnknownBranchWeightsIfProfiled( + *SI, *SI->getParent()->getParent(), PassName); + } + return Ret; +} + Value *IRBuilderBase::CreateSelect(Value *C, Value *True, Value *False, const Twine &Name, Instruction *MDFrom) { return CreateSelectFMF(C, True, False, {}, Name, MDFrom); diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index 88e7c44..9060a89 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -2965,8 +2965,7 @@ unsigned CastInst::isEliminableCastPair(Instruction::CastOps firstOp, // zext, sext -> zext, because sext can't sign extend after zext return Instruction::ZExt; case 11: { - // inttoptr, ptrtoint/ptrtoaddr -> bitcast if SrcSize<=PtrSize/AddrSize - // and SrcSize==DstSize + // inttoptr, ptrtoint/ptrtoaddr -> integer cast if (!DL) return 0; unsigned MidSize = secondOp == Instruction::PtrToAddr @@ -2974,10 +2973,15 @@ unsigned CastInst::isEliminableCastPair(Instruction::CastOps firstOp, : DL->getPointerTypeSizeInBits(MidTy); unsigned SrcSize = SrcTy->getScalarSizeInBits(); unsigned DstSize = DstTy->getScalarSizeInBits(); - // TODO: Could also produce zext or trunc here. - if (SrcSize <= MidSize && SrcSize == DstSize) - return Instruction::BitCast; - return 0; + // If the middle size is smaller than both source and destination, + // an additional masking operation would be required. + if (MidSize < SrcSize && MidSize < DstSize) + return 0; + if (DstSize < SrcSize) + return Instruction::Trunc; + if (DstSize > SrcSize) + return Instruction::ZExt; + return Instruction::BitCast; } case 12: // addrspacecast, addrspacecast -> bitcast, if SrcAS == DstAS diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index c9ff86b..c79a950 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -893,7 +893,7 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) { if (GV.hasInitializer()) { const Constant *Init = GV.getInitializer(); const ConstantArray *InitArray = dyn_cast<ConstantArray>(Init); - Check(InitArray, "wrong initalizer for intrinsic global variable", + Check(InitArray, "wrong initializer for intrinsic global variable", Init); for (Value *Op : InitArray->operands()) { Value *V = Op->stripPointerCasts(); diff --git a/llvm/lib/ObjCopy/ConfigManager.cpp b/llvm/lib/ObjCopy/ConfigManager.cpp index eef8a21..6b7b4f1 100644 --- a/llvm/lib/ObjCopy/ConfigManager.cpp +++ b/llvm/lib/ObjCopy/ConfigManager.cpp @@ -122,14 +122,14 @@ ConfigManager::getDXContainerConfig() const { if (!Common.AddGnuDebugLink.empty() || !Common.SplitDWO.empty() || !Common.AllocSectionsPrefix.empty() || Common.DiscardMode != DiscardType::None || !Common.AddSection.empty() || - !Common.DumpSection.empty() || !Common.KeepSection.empty() || - !Common.SectionsToRename.empty() || !Common.SetSectionAlignment.empty() || - !Common.SetSectionFlags.empty() || !Common.SetSectionType.empty() || - Common.ExtractDWO || Common.OnlyKeepDebug || Common.StripAllGNU || - Common.StripDWO || Common.StripDebug || Common.StripNonAlloc || - Common.StripSections || Common.StripUnneeded || - Common.DecompressDebugSections || Common.GapFill != 0 || - Common.PadTo != 0 || Common.ChangeSectionLMAValAll != 0 || + !Common.KeepSection.empty() || !Common.SectionsToRename.empty() || + !Common.SetSectionAlignment.empty() || !Common.SetSectionFlags.empty() || + !Common.SetSectionType.empty() || Common.ExtractDWO || + Common.OnlyKeepDebug || Common.StripAllGNU || Common.StripDWO || + Common.StripDebug || Common.StripNonAlloc || Common.StripSections || + Common.StripUnneeded || Common.DecompressDebugSections || + Common.GapFill != 0 || Common.PadTo != 0 || + Common.ChangeSectionLMAValAll != 0 || !Common.ChangeSectionAddress.empty()) { return createStringError(llvm::errc::invalid_argument, "option is not supported for DXContainer"); diff --git a/llvm/lib/ObjCopy/DXContainer/DXContainerObjcopy.cpp b/llvm/lib/ObjCopy/DXContainer/DXContainerObjcopy.cpp index d7f3c0d..95ab3d9 100644 --- a/llvm/lib/ObjCopy/DXContainer/DXContainerObjcopy.cpp +++ b/llvm/lib/ObjCopy/DXContainer/DXContainerObjcopy.cpp @@ -9,8 +9,10 @@ #include "llvm/ObjCopy/DXContainer/DXContainerObjcopy.h" #include "DXContainerReader.h" #include "DXContainerWriter.h" +#include "llvm/BinaryFormat/DXContainer.h" #include "llvm/ObjCopy/CommonConfig.h" #include "llvm/ObjCopy/DXContainer/DXContainerConfig.h" +#include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/raw_ostream.h" namespace llvm { @@ -42,7 +44,47 @@ static Error extractPartAsObject(StringRef PartName, StringRef OutFilename, "part '%s' not found", PartName.str().c_str()); } +static Error dumpPartToFile(StringRef PartName, StringRef Filename, + StringRef InputFilename, Object &Obj) { + auto PartIter = llvm::find_if( + Obj.Parts, [&PartName](const Part &P) { return P.Name == PartName; }); + if (PartIter == Obj.Parts.end()) + return createFileError(Filename, + std::make_error_code(std::errc::invalid_argument), + "part '%s' not found", PartName.str().c_str()); + ArrayRef<uint8_t> Contents = PartIter->Data; + // The DXContainer format is a bit odd because the part-specific headers are + // contained inside the part data itself. For parts that contain LLVM bitcode + // when we dump the part we want to skip the part-specific header so that we + // get a valid .bc file that we can inspect. All the data contained inside the + // program header is pulled out of the bitcode, so the header can be + // reconstructed if needed from the bitcode itself. More comprehensive + // documentation on the DXContainer format can be found at + // https://llvm.org/docs/DirectX/DXContainer.html. + + if (PartName == "DXIL" || PartName == "STAT") + Contents = Contents.drop_front(sizeof(llvm::dxbc::ProgramHeader)); + if (Contents.empty()) + return createFileError(Filename, object_error::parse_failed, + "part '%s' is empty", PartName.str().c_str()); + Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr = + FileOutputBuffer::create(Filename, Contents.size()); + if (!BufferOrErr) + return createFileError(Filename, BufferOrErr.takeError()); + std::unique_ptr<FileOutputBuffer> Buf = std::move(*BufferOrErr); + llvm::copy(Contents, Buf->getBufferStart()); + if (Error E = Buf->commit()) + return createFileError(Filename, std::move(E)); + return Error::success(); +} + static Error handleArgs(const CommonConfig &Config, Object &Obj) { + for (StringRef Flag : Config.DumpSection) { + auto [SecName, FileName] = Flag.split("="); + if (Error E = dumpPartToFile(SecName, FileName, Config.InputFilename, Obj)) + return E; + } + // Extract all sections before any modifications. for (StringRef Flag : Config.ExtractSection) { StringRef SectionName; diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp index 53699ce0..f256e7b 100644 --- a/llvm/lib/Object/ELF.cpp +++ b/llvm/lib/Object/ELF.cpp @@ -837,7 +837,7 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF, Version = Data.getU8(Cur); if (!Cur) break; - if (Version < 2 || Version > 3) + if (Version < 2 || Version > 4) return createError("unsupported SHT_LLVM_BB_ADDR_MAP version: " + Twine(static_cast<int>(Version))); Feature = Data.getU8(Cur); // Feature byte @@ -852,6 +852,11 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF, "callsite offsets feature is enabled: version = " + Twine(static_cast<int>(Version)) + " feature = " + Twine(static_cast<int>(Feature))); + if (FeatEnable.BBHash && Version < 4) + return createError("version should be >= 4 for SHT_LLVM_BB_ADDR_MAP when " + "basic block hash feature is enabled: version = " + + Twine(static_cast<int>(Version)) + + " feature = " + Twine(static_cast<int>(Feature))); uint32_t NumBlocksInBBRange = 0; uint32_t NumBBRanges = 1; typename ELFFile<ELFT>::uintX_t RangeBaseAddress = 0; @@ -907,6 +912,7 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF, uint32_t Size = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr) + LastCallsiteEndOffset; uint32_t MD = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr); + uint64_t Hash = FeatEnable.BBHash ? Data.getU64(Cur) : 0; Expected<BBAddrMap::BBEntry::Metadata> MetadataOrErr = BBAddrMap::BBEntry::Metadata::decode(MD); if (!MetadataOrErr) { @@ -914,7 +920,7 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF, break; } BBEntries.push_back({ID, Offset + PrevBBEndOffset, Size, - *MetadataOrErr, CallsiteEndOffsets}); + *MetadataOrErr, CallsiteEndOffsets, Hash}); PrevBBEndOffset += Offset + Size; } TotalNumBlocks += BBEntries.size(); diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp index faeeab3..8b75fbe 100644 --- a/llvm/lib/ObjectYAML/ELFEmitter.cpp +++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp @@ -1465,7 +1465,7 @@ void ELFState<ELFT>::writeSectionContent( for (const auto &[Idx, E] : llvm::enumerate(*Section.Entries)) { // Write version and feature values. if (Section.Type == llvm::ELF::SHT_LLVM_BB_ADDR_MAP) { - if (E.Version > 3) + if (E.Version > 4) WithColor::warning() << "unsupported SHT_LLVM_BB_ADDR_MAP version: " << static_cast<int>(E.Version) << "; encoding using the most recent version"; @@ -1526,6 +1526,12 @@ void ELFState<ELFT>::writeSectionContent( } SHeader.sh_size += CBA.writeULEB128(BBE.Size); SHeader.sh_size += CBA.writeULEB128(BBE.Metadata); + if (FeatureOrErr->BBHash || BBE.Hash.has_value()) { + uint64_t Hash = + BBE.Hash.has_value() ? BBE.Hash.value() : llvm::yaml::Hex64(0); + CBA.write<uint64_t>(Hash, ELFT::Endianness); + SHeader.sh_size += 8; + } } } if (!PGOAnalyses) diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index d9cce1e..421d6603 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -1887,6 +1887,7 @@ void MappingTraits<ELFYAML::BBAddrMapEntry::BBEntry>::mapping( IO.mapRequired("Size", E.Size); IO.mapRequired("Metadata", E.Metadata); IO.mapOptional("CallsiteEndOffsets", E.CallsiteEndOffsets); + IO.mapOptional("Hash", E.Hash); } void MappingTraits<ELFYAML::PGOAnalysisMapEntry>::mapping( diff --git a/llvm/lib/Support/DebugCounter.cpp b/llvm/lib/Support/DebugCounter.cpp index 6b65720..5ab1def 100644 --- a/llvm/lib/Support/DebugCounter.cpp +++ b/llvm/lib/Support/DebugCounter.cpp @@ -136,6 +136,13 @@ struct DebugCounterOwner : DebugCounter { cl::location(this->ShouldPrintCounter), cl::init(false), cl::desc("Print out debug counter info after all counters accumulated")}; + cl::opt<bool, true> PrintDebugCounterQueries{ + "print-debug-counter-queries", + cl::Hidden, + cl::Optional, + cl::location(this->ShouldPrintCounterQueries), + cl::init(false), + cl::desc("Print out each query of an enabled debug counter")}; cl::opt<bool, true> BreakOnLastCount{ "debug-counter-break-on-last", cl::Hidden, @@ -221,31 +228,40 @@ void DebugCounter::print(raw_ostream &OS) const { } } +bool DebugCounter::handleCounterIncrement(CounterInfo &Info) { + int64_t CurrCount = Info.Count++; + uint64_t CurrIdx = Info.CurrChunkIdx; + + if (Info.Chunks.empty()) + return true; + if (CurrIdx >= Info.Chunks.size()) + return false; + + bool Res = Info.Chunks[CurrIdx].contains(CurrCount); + if (BreakOnLast && CurrIdx == (Info.Chunks.size() - 1) && + CurrCount == Info.Chunks[CurrIdx].End) { + LLVM_BUILTIN_DEBUGTRAP; + } + if (CurrCount > Info.Chunks[CurrIdx].End) { + Info.CurrChunkIdx++; + + /// Handle consecutive blocks. + if (Info.CurrChunkIdx < Info.Chunks.size() && + CurrCount == Info.Chunks[Info.CurrChunkIdx].Begin) + return true; + } + return Res; +} + bool DebugCounter::shouldExecuteImpl(unsigned CounterName) { auto &Us = instance(); auto Result = Us.Counters.find(CounterName); if (Result != Us.Counters.end()) { auto &CounterInfo = Result->second; - int64_t CurrCount = CounterInfo.Count++; - uint64_t CurrIdx = CounterInfo.CurrChunkIdx; - - if (CounterInfo.Chunks.empty()) - return true; - if (CurrIdx >= CounterInfo.Chunks.size()) - return false; - - bool Res = CounterInfo.Chunks[CurrIdx].contains(CurrCount); - if (Us.BreakOnLast && CurrIdx == (CounterInfo.Chunks.size() - 1) && - CurrCount == CounterInfo.Chunks[CurrIdx].End) { - LLVM_BUILTIN_DEBUGTRAP; - } - if (CurrCount > CounterInfo.Chunks[CurrIdx].End) { - CounterInfo.CurrChunkIdx++; - - /// Handle consecutive blocks. - if (CounterInfo.CurrChunkIdx < CounterInfo.Chunks.size() && - CurrCount == CounterInfo.Chunks[CounterInfo.CurrChunkIdx].Begin) - return true; + bool Res = Us.handleCounterIncrement(CounterInfo); + if (Us.ShouldPrintCounterQueries && CounterInfo.IsSet) { + dbgs() << "DebugCounter " << Us.RegisteredCounters[CounterName] << "=" + << (CounterInfo.Count - 1) << (Res ? " execute" : " skip") << "\n"; } return Res; } diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp index 80fd485..549c418 100644 --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -55,12 +55,20 @@ Error SpecialCaseList::RegexMatcher::insert(StringRef Pattern, return Error::success(); } +void SpecialCaseList::RegexMatcher::preprocess(bool BySize) { + if (BySize) { + llvm::stable_sort(RegExes, [](const Reg &A, const Reg &B) { + return A.Name.size() < B.Name.size(); + }); + } +} + void SpecialCaseList::RegexMatcher::match( StringRef Query, llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const { for (const auto &R : reverse(RegExes)) if (R.Rg.match(Query)) - Cb(R.Name, R.LineNo); + return Cb(R.Name, R.LineNo); } Error SpecialCaseList::GlobMatcher::insert(StringRef Pattern, @@ -75,12 +83,20 @@ Error SpecialCaseList::GlobMatcher::insert(StringRef Pattern, return Error::success(); } +void SpecialCaseList::GlobMatcher::preprocess(bool BySize) { + if (BySize) { + llvm::stable_sort(Globs, [](const Glob &A, const Glob &B) { + return A.Name.size() < B.Name.size(); + }); + } +} + void SpecialCaseList::GlobMatcher::match( StringRef Query, llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const { for (const auto &G : reverse(Globs)) if (G.Pattern.match(Query)) - Cb(G.Name, G.LineNo); + return Cb(G.Name, G.LineNo); } SpecialCaseList::Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash) @@ -91,6 +107,14 @@ SpecialCaseList::Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash) M.emplace<RegexMatcher>(); } +Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber) { + return std::visit([&](auto &V) { return V.insert(Pattern, LineNumber); }, M); +} + +LLVM_ABI void SpecialCaseList::Matcher::preprocess(bool BySize) { + return std::visit([&](auto &V) { return V.preprocess(BySize); }, M); +} + void SpecialCaseList::Matcher::match( StringRef Query, llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const { @@ -99,10 +123,6 @@ void SpecialCaseList::Matcher::match( return std::visit([&](auto &V) { return V.match(Query, Cb); }, M); } -Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber) { - return std::visit([&](auto &V) { return V.insert(Pattern, LineNumber); }, M); -} - // TODO: Refactor this to return Expected<...> std::unique_ptr<SpecialCaseList> SpecialCaseList::create(const std::vector<std::string> &Paths, @@ -141,7 +161,7 @@ bool SpecialCaseList::createInternal(const std::vector<std::string> &Paths, return false; } std::string ParseError; - if (!parse(i, FileOrErr.get().get(), ParseError)) { + if (!parse(i, FileOrErr.get().get(), ParseError, /*OrderBySize=*/false)) { Error = (Twine("error parsing file '") + Path + "': " + ParseError).str(); return false; } @@ -149,9 +169,9 @@ bool SpecialCaseList::createInternal(const std::vector<std::string> &Paths, return true; } -bool SpecialCaseList::createInternal(const MemoryBuffer *MB, - std::string &Error) { - if (!parse(0, MB, Error)) +bool SpecialCaseList::createInternal(const MemoryBuffer *MB, std::string &Error, + bool OrderBySize) { + if (!parse(0, MB, Error, OrderBySize)) return false; return true; } @@ -174,7 +194,7 @@ SpecialCaseList::addSection(StringRef SectionStr, unsigned FileNo, } bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB, - std::string &Error) { + std::string &Error, bool OrderBySize) { unsigned long long Version = 2; StringRef Header = MB->getBuffer(); @@ -246,6 +266,10 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB, return false; } } + + for (Section &S : Sections) + S.preprocess(OrderBySize); + return true; } @@ -283,6 +307,13 @@ SpecialCaseList::Section::findMatcher(StringRef Prefix, return &II->second; } +LLVM_ABI void SpecialCaseList::Section::preprocess(bool OrderBySize) { + SectionMatcher.preprocess(false); + for (auto &[K1, E] : Entries) + for (auto &[K2, M] : E) + M.preprocess(OrderBySize); +} + unsigned SpecialCaseList::Section::getLastMatch(StringRef Prefix, StringRef Query, StringRef Category) const { diff --git a/llvm/lib/Support/TextEncoding.cpp b/llvm/lib/Support/TextEncoding.cpp index 804ff07..41f5187 100644 --- a/llvm/lib/Support/TextEncoding.cpp +++ b/llvm/lib/Support/TextEncoding.cpp @@ -54,9 +54,9 @@ static std::optional<TextEncoding> getKnownEncoding(StringRef Name) { return std::nullopt; } -LLVM_ATTRIBUTE_UNUSED static void -HandleOverflow(size_t &Capacity, char *&Output, size_t &OutputLength, - SmallVectorImpl<char> &Result) { +[[maybe_unused]] static void HandleOverflow(size_t &Capacity, char *&Output, + size_t &OutputLength, + SmallVectorImpl<char> &Result) { // No space left in output buffer. Double the size of the underlying // memory in the SmallVectorImpl, adjust pointer and length and continue // the conversion. diff --git a/llvm/lib/Support/UnicodeNameToCodepoint.cpp b/llvm/lib/Support/UnicodeNameToCodepoint.cpp index 8d66348..6f8e091 100644 --- a/llvm/lib/Support/UnicodeNameToCodepoint.cpp +++ b/llvm/lib/Support/UnicodeNameToCodepoint.cpp @@ -476,7 +476,7 @@ nearestMatchesForCodepointName(StringRef Pattern, std::size_t MaxMatchesCount) { std::min(NormalizedName.size(), UnicodeNameToCodepointLargestNameSize) + 1; - LLVM_ATTRIBUTE_UNUSED static std::size_t Rows = + [[maybe_unused]] static std::size_t Rows = UnicodeNameToCodepointLargestNameSize + 1; std::vector<char> Distances( diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp index 2ea3a24..afce803 100644 --- a/llvm/lib/TableGen/Record.cpp +++ b/llvm/lib/TableGen/Record.cpp @@ -1363,9 +1363,12 @@ const Init *BinOpInit::Fold(const Record *CurRec) const { } case LISTSPLAT: { const auto *Value = dyn_cast<TypedInit>(LHS); - const auto *Size = dyn_cast<IntInit>(RHS); - if (Value && Size) { - SmallVector<const Init *, 8> Args(Size->getValue(), Value); + const auto *Count = dyn_cast<IntInit>(RHS); + if (Value && Count) { + if (Count->getValue() < 0) + PrintFatalError(Twine("!listsplat count ") + Count->getAsString() + + " is negative"); + SmallVector<const Init *, 8> Args(Count->getValue(), Value); return ListInit::get(Args, Value->getType()); } break; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 6965116..9926a4d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -26196,9 +26196,10 @@ static SDValue performFlagSettingCombine(SDNode *N, return DCI.CombineTo(N, Res, SDValue(N, 1)); } - // Combine identical generic nodes into this node, re-using the result. + // Combine equivalent generic nodes into this node, re-using the result. if (SDNode *Generic = DCI.DAG.getNodeIfExists( - GenericOpcode, DCI.DAG.getVTList(VT), {LHS, RHS})) + GenericOpcode, DCI.DAG.getVTList(VT), {LHS, RHS}, + /*AllowCommute=*/true)) DCI.CombineTo(Generic, SDValue(N, 0)); return SDValue(); diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp index f110558..7e03b97 100644 --- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp +++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp @@ -1360,14 +1360,24 @@ void AArch64EpilogueEmitter::emitEpilogue() { } bool CombineSPBump = shouldCombineCSRLocalStackBump(NumBytes); - // Assume we can't combine the last pop with the sp restore. - bool CombineAfterCSRBump = false; + + unsigned ProloguePopSize = PrologueSaveSize; if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) { + // With CalleeSavesAboveFrameRecord ProloguePopSize is the amount of stack + // that needs to be popped until we reach the start of the SVE save area. + // The "FixedObject" stack occurs after the SVE area and must be popped + // later. + ProloguePopSize -= FixedObject; AfterCSRPopSize += FixedObject; - } else if (!CombineSPBump && PrologueSaveSize != 0) { + } + + // Assume we can't combine the last pop with the sp restore. + if (!CombineSPBump && ProloguePopSize != 0) { MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator()); while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION || - AArch64InstrInfo::isSEHInstruction(*Pop)) + AArch64InstrInfo::isSEHInstruction(*Pop) || + (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord && + isPartOfSVECalleeSaves(Pop))) Pop = std::prev(Pop); // Converting the last ldp to a post-index ldp is valid only if the last // ldp's offset is 0. @@ -1377,18 +1387,27 @@ void AArch64EpilogueEmitter::emitEpilogue() { // may clobber), convert it to a post-index ldp. if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) { convertCalleeSaveRestoreToSPPrePostIncDec( - Pop, DL, PrologueSaveSize, EmitCFI, MachineInstr::FrameDestroy, - PrologueSaveSize); + Pop, DL, ProloguePopSize, EmitCFI, MachineInstr::FrameDestroy, + ProloguePopSize); + } else if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) { + MachineBasicBlock::iterator AfterLastPop = std::next(Pop); + if (AArch64InstrInfo::isSEHInstruction(*AfterLastPop)) + ++AfterLastPop; + // If not, and CalleeSavesAboveFrameRecord is enabled, deallocate + // callee-save non-SVE registers to move the stack pointer to the start of + // the SVE area. + emitFrameOffset(MBB, AfterLastPop, DL, AArch64::SP, AArch64::SP, + StackOffset::getFixed(ProloguePopSize), TII, + MachineInstr::FrameDestroy, false, NeedsWinCFI, + &HasWinCFI); } else { - // If not, make sure to emit an add after the last ldp. + // Otherwise, make sure to emit an add after the last ldp. // We're doing this by transferring the size to be restored from the // adjustment *before* the CSR pops to the adjustment *after* the CSR // pops. - AfterCSRPopSize += PrologueSaveSize; - CombineAfterCSRBump = true; + AfterCSRPopSize += ProloguePopSize; } } - // Move past the restores of the callee-saved registers. // If we plan on combining the sp bump of the local stack size and the callee // save stack size, we might need to adjust the CSR save and restore offsets. @@ -1419,6 +1438,17 @@ void AArch64EpilogueEmitter::emitEpilogue() { --SEHEpilogueStartI; } + // Determine the ranges of SVE callee-saves. This is done before emitting any + // code at the end of the epilogue (for Swift async), which can get in the way + // of finding SVE callee-saves with CalleeSavesAboveFrameRecord. + auto [PPR, ZPR] = getSVEStackFrameSizes(); + auto [PPRRange, ZPRRange] = partitionSVECS( + MBB, + SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord + ? MBB.getFirstTerminator() + : FirstGPRRestoreI, + PPR.CalleeSavesSize, ZPR.CalleeSavesSize, /*IsEpilogue=*/true); + if (HasFP && AFI->hasSwiftAsyncContext()) emitSwiftAsyncContextFramePointer(EpilogueEndI, DL); @@ -1441,14 +1471,6 @@ void AArch64EpilogueEmitter::emitEpilogue() { NumBytes -= PrologueSaveSize; assert(NumBytes >= 0 && "Negative stack allocation size!?"); - auto [PPR, ZPR] = getSVEStackFrameSizes(); - auto [PPRRange, ZPRRange] = partitionSVECS( - MBB, - SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord - ? MBB.getFirstTerminator() - : FirstGPRRestoreI, - PPR.CalleeSavesSize, ZPR.CalleeSavesSize, /*IsEpilogue=*/true); - StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize; StackOffset SVEStackSize = SVECalleeSavesSize + PPR.LocalsSize + ZPR.LocalsSize; @@ -1467,16 +1489,6 @@ void AArch64EpilogueEmitter::emitEpilogue() { NeedsWinCFI, &HasWinCFI); } - // Deallocate callee-save non-SVE registers. - emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed(AFI->getCalleeSavedStackSize()), TII, - MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); - - // Deallocate fixed objects. - emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed(FixedObject), TII, - MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); - // Deallocate callee-save SVE registers. emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP, SVECalleeSavesSize, TII, MachineInstr::FrameDestroy, false, @@ -1619,7 +1631,7 @@ void AArch64EpilogueEmitter::emitEpilogue() { MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, StackOffset::getFixed(AfterCSRPopSize), TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI, EmitCFI, - StackOffset::getFixed(CombineAfterCSRBump ? PrologueSaveSize : 0)); + StackOffset::getFixed(AfterCSRPopSize - ArgumentStackToRestore)); } } diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 2c3870c..636d4f8a 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -8217,6 +8217,8 @@ bool AArch64AsmParser::parseDataExpr(const MCExpr *&Res) { Spec = AArch64::S_GOTPCREL; else if (Identifier == "plt") Spec = AArch64::S_PLT; + else if (Identifier == "funcinit") + Spec = AArch64::S_FUNCINIT; } if (Spec == AArch64::S_None) return Error(Loc, "invalid relocation specifier"); diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp index a388216..892b8da 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp @@ -232,6 +232,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(const MCFixup &Fixup, } if (RefKind == AArch64::S_AUTH || RefKind == AArch64::S_AUTHADDR) return ELF::R_AARCH64_AUTH_ABS64; + if (RefKind == AArch64::S_FUNCINIT) + return ELF::R_AARCH64_FUNCINIT64; return ELF::R_AARCH64_ABS64; } case AArch64::fixup_aarch64_add_imm12: diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp index 2b5cf34..bc090c6 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp @@ -40,6 +40,7 @@ const MCAsmInfo::AtSpecifier ELFAtSpecifiers[] = { {AArch64::S_GOT, "GOT"}, {AArch64::S_GOTPCREL, "GOTPCREL"}, {AArch64::S_PLT, "PLT"}, + {AArch64::S_FUNCINIT, "FUNCINIT"}, }; const MCAsmInfo::AtSpecifier MachOAtSpecifiers[] = { diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h index 0dfa61b..f2acff5 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h @@ -164,6 +164,7 @@ enum { // ELF relocation specifiers in data directives: S_PLT = 0x400, S_GOTPCREL, + S_FUNCINIT, // Mach-O @ relocation specifiers: S_MACHO_GOT, diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp index dd6fa16..d71f728 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp +++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp @@ -130,6 +130,12 @@ SMECallAttrs::SMECallAttrs(const CallBase &CB, const AArch64TargetLowering *TLI) if (auto *CalledFunction = CB.getCalledFunction()) CalledFn = SMEAttrs(*CalledFunction, TLI); + // An `invoke` of an agnostic ZA function may not return normally (it may + // resume in an exception block). In this case, it acts like a private ZA + // callee and may require a ZA save to be set up before it is called. + if (isa<InvokeInst>(CB)) + CalledFn.set(SMEAttrs::ZA_State_Agnostic, /*Enable=*/false); + // FIXME: We probably should not allow SME attributes on direct calls but // clang duplicates streaming mode attributes at each callsite. assert((IsIndirect || diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp index dbe74b1..5700468 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp @@ -2394,15 +2394,19 @@ bool SchedGroup::canAddMI(const MachineInstr &MI) const { else if (((SGMask & SchedGroupMask::ALU) != SchedGroupMask::NONE) && (TII->isVALU(MI) || TII->isMFMAorWMMA(MI) || TII->isSALU(MI) || TII->isTRANS(MI))) - Result = true; + Result = !MI.mayLoadOrStore(); else if (((SGMask & SchedGroupMask::VALU) != SchedGroupMask::NONE) && - TII->isVALU(MI) && !TII->isMFMAorWMMA(MI) && !TII->isTRANS(MI)) - Result = true; + TII->isVALU(MI) && !TII->isMFMAorWMMA(MI) && !TII->isTRANS(MI)) { + // Some memory instructions may be marked as VALU (e.g. BUFFER_LOAD_*_LDS). + // For our purposes, these shall not be classified as VALU as this results + // in unexpected behavior. + Result = !MI.mayLoadOrStore(); + } else if (((SGMask & SchedGroupMask::SALU) != SchedGroupMask::NONE) && TII->isSALU(MI)) - Result = true; + Result = !MI.mayLoadOrStore(); else if (((SGMask & SchedGroupMask::MFMA) != SchedGroupMask::NONE) && TII->isMFMAorWMMA(MI)) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index a44af5f..1b559a6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2833,8 +2833,8 @@ SDValue AMDGPUTargetLowering::LowerFLOGCommon(SDValue Op, R = getMad(DAG, DL, VT, YH, CH, Mad1); } - const bool IsFiniteOnly = (Flags.hasNoNaNs() || Options.NoNaNsFPMath) && - (Flags.hasNoInfs() || Options.NoInfsFPMath); + const bool IsFiniteOnly = + (Flags.hasNoNaNs() || Options.NoNaNsFPMath) && Flags.hasNoInfs(); // TODO: Check if known finite from source value. if (!IsFiniteOnly) { @@ -3161,9 +3161,8 @@ SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const { DAG.getSetCC(SL, SetCCVT, X, UnderflowCheckConst, ISD::SETOLT); R = DAG.getNode(ISD::SELECT, SL, VT, Underflow, Zero, R); - const auto &Options = getTargetMachine().Options; - if (!Flags.hasNoInfs() && !Options.NoInfsFPMath) { + if (!Flags.hasNoInfs()) { SDValue OverflowCheckConst = DAG.getConstantFP(IsExp10 ? 0x1.344136p+5f : 0x1.62e430p+6f, SL, VT); SDValue Overflow = diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index ee466ca..596a895 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -3575,7 +3575,7 @@ bool AMDGPULegalizerInfo::legalizeFlogCommon(MachineInstr &MI, const bool IsFiniteOnly = (MI.getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath) && - (MI.getFlag(MachineInstr::FmNoInfs) || TM.Options.NoInfsFPMath); + MI.getFlag(MachineInstr::FmNoInfs); if (!IsFiniteOnly) { // Expand isfinite(x) => fabs(x) < inf @@ -3864,9 +3864,7 @@ bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI, R = B.buildSelect(Ty, Underflow, Zero, R); - const auto &Options = MF.getTarget().Options; - - if (!(Flags & MachineInstr::FmNoInfs) && !Options.NoInfsFPMath) { + if (!(Flags & MachineInstr::FmNoInfs)) { auto OverflowCheckConst = B.buildFConstant(Ty, IsExp10 ? 0x1.344136p+5f : 0x1.62e430p+6f); diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp index 71494be..4e11c4f 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp @@ -14,6 +14,7 @@ #include "GCNRegPressure.h" #include "AMDGPU.h" #include "SIMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/RegisterPressure.h" using namespace llvm; @@ -459,10 +460,14 @@ LaneBitmask llvm::getLiveLaneMask(const LiveInterval &LI, SlotIndex SI, GCNRPTracker::LiveRegSet llvm::getLiveRegs(SlotIndex SI, const LiveIntervals &LIS, - const MachineRegisterInfo &MRI) { + const MachineRegisterInfo &MRI, + GCNRegPressure::RegKind RegKind) { GCNRPTracker::LiveRegSet LiveRegs; for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { auto Reg = Register::index2VirtReg(I); + if (RegKind != GCNRegPressure::TOTAL_KINDS && + GCNRegPressure::getRegKind(Reg, MRI) != RegKind) + continue; if (!LIS.hasInterval(Reg)) continue; auto LiveMask = getLiveLaneMask(Reg, SI, LIS, MRI); @@ -986,3 +991,128 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) { #undef PFX } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void llvm::dumpMaxRegPressure(MachineFunction &MF, + GCNRegPressure::RegKind Kind, + LiveIntervals &LIS, + const MachineLoopInfo *MLI) { + + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); + auto &OS = dbgs(); + const char *RegName = GCNRegPressure::getName(Kind); + + unsigned MaxNumRegs = 0; + const MachineInstr *MaxPressureMI = nullptr; + GCNUpwardRPTracker RPT(LIS); + for (const MachineBasicBlock &MBB : MF) { + RPT.reset(MRI, LIS.getSlotIndexes()->getMBBEndIdx(&MBB).getPrevSlot()); + for (const MachineInstr &MI : reverse(MBB)) { + RPT.recede(MI); + unsigned NumRegs = RPT.getMaxPressure().getNumRegs(Kind); + if (NumRegs > MaxNumRegs) { + MaxNumRegs = NumRegs; + MaxPressureMI = &MI; + } + } + } + + SlotIndex MISlot = LIS.getInstructionIndex(*MaxPressureMI); + + // Max pressure can occur at either the early-clobber or register slot. + // Choose the maximum liveset between both slots. This is ugly but this is + // diagnostic code. + SlotIndex ECSlot = MISlot.getRegSlot(true); + SlotIndex RSlot = MISlot.getRegSlot(false); + GCNRPTracker::LiveRegSet ECLiveSet = getLiveRegs(ECSlot, LIS, MRI, Kind); + GCNRPTracker::LiveRegSet RLiveSet = getLiveRegs(RSlot, LIS, MRI, Kind); + unsigned ECNumRegs = getRegPressure(MRI, ECLiveSet).getNumRegs(Kind); + unsigned RNumRegs = getRegPressure(MRI, RLiveSet).getNumRegs(Kind); + GCNRPTracker::LiveRegSet *LiveSet = + ECNumRegs > RNumRegs ? &ECLiveSet : &RLiveSet; + SlotIndex MaxPressureSlot = ECNumRegs > RNumRegs ? ECSlot : RSlot; + assert(getRegPressure(MRI, *LiveSet).getNumRegs(Kind) == MaxNumRegs); + + // Split live registers into single-def and multi-def sets. + GCNRegPressure SDefPressure, MDefPressure; + SmallVector<Register, 16> SDefRegs, MDefRegs; + for (auto [Reg, LaneMask] : *LiveSet) { + assert(GCNRegPressure::getRegKind(Reg, MRI) == Kind); + LiveInterval &LI = LIS.getInterval(Reg); + if (LI.getNumValNums() == 1 || + (LI.hasSubRanges() && + llvm::all_of(LI.subranges(), [](const LiveInterval::SubRange &SR) { + return SR.getNumValNums() == 1; + }))) { + SDefPressure.inc(Reg, LaneBitmask::getNone(), LaneMask, MRI); + SDefRegs.push_back(Reg); + } else { + MDefPressure.inc(Reg, LaneBitmask::getNone(), LaneMask, MRI); + MDefRegs.push_back(Reg); + } + } + unsigned SDefNumRegs = SDefPressure.getNumRegs(Kind); + unsigned MDefNumRegs = MDefPressure.getNumRegs(Kind); + assert(SDefNumRegs + MDefNumRegs == MaxNumRegs); + + auto printLoc = [&](const MachineBasicBlock *MBB, SlotIndex SI) { + return Printable([&, MBB, SI](raw_ostream &OS) { + OS << SI << ':' << printMBBReference(*MBB); + if (MLI) + if (const MachineLoop *ML = MLI->getLoopFor(MBB)) + OS << " (LoopHdr " << printMBBReference(*ML->getHeader()) + << ", Depth " << ML->getLoopDepth() << ")"; + }); + }; + + auto PrintRegInfo = [&](Register Reg, LaneBitmask LiveMask) { + GCNRegPressure RegPressure; + RegPressure.inc(Reg, LaneBitmask::getNone(), LiveMask, MRI); + OS << " " << printReg(Reg, TRI) << ':' + << TRI->getRegClassName(MRI.getRegClass(Reg)) << ", LiveMask " + << PrintLaneMask(LiveMask) << " (" << RegPressure.getNumRegs(Kind) << ' ' + << RegName << "s)\n"; + + // Use std::map to sort def/uses by SlotIndex. + std::map<SlotIndex, const MachineInstr *> Instrs; + for (const MachineInstr &MI : MRI.reg_nodbg_instructions(Reg)) { + Instrs[LIS.getInstructionIndex(MI).getRegSlot()] = &MI; + } + + for (const auto &[SI, MI] : Instrs) { + OS << " "; + if (MI->definesRegister(Reg, TRI)) + OS << "def "; + if (MI->readsRegister(Reg, TRI)) + OS << "use "; + OS << printLoc(MI->getParent(), SI) << ": " << *MI; + } + }; + + OS << "\n*** Register pressure info (" << RegName << "s) for " << MF.getName() + << " ***\n"; + OS << "Max pressure is " << MaxNumRegs << ' ' << RegName << "s at " + << printLoc(MaxPressureMI->getParent(), MaxPressureSlot) << ": " + << *MaxPressureMI; + + OS << "\nLive registers with single definition (" << SDefNumRegs << ' ' + << RegName << "s):\n"; + + // Sort SDefRegs by number of uses (smallest first) + llvm::sort(SDefRegs, [&](Register A, Register B) { + return std::distance(MRI.use_nodbg_begin(A), MRI.use_nodbg_end()) < + std::distance(MRI.use_nodbg_begin(B), MRI.use_nodbg_end()); + }); + + for (const Register Reg : SDefRegs) { + PrintRegInfo(Reg, LiveSet->lookup(Reg)); + } + + OS << "\nLive registers with multiple definitions (" << MDefNumRegs << ' ' + << RegName << "s):\n"; + for (const Register Reg : MDefRegs) { + PrintRegInfo(Reg, LiveSet->lookup(Reg)); + } +} +#endif diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h index 898d1ff..979a8b0 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h @@ -31,6 +31,12 @@ class SlotIndex; struct GCNRegPressure { enum RegKind { SGPR, VGPR, AGPR, AVGPR, TOTAL_KINDS }; + static constexpr const char *getName(RegKind Kind) { + const char *Names[] = {"SGPR", "VGPR", "AGPR", "AVGPR"}; + assert(Kind < TOTAL_KINDS); + return Names[Kind]; + } + GCNRegPressure() { clear(); } @@ -41,6 +47,11 @@ struct GCNRegPressure { void clear() { std::fill(&Value[0], &Value[ValueArraySize], 0); } + unsigned getNumRegs(RegKind Kind) const { + assert(Kind < TOTAL_KINDS); + return Value[Kind]; + } + /// \returns the SGPR32 pressure unsigned getSGPRNum() const { return Value[SGPR]; } /// \returns the aggregated ArchVGPR32, AccVGPR32, and Pseudo AVGPR pressure @@ -138,6 +149,12 @@ struct GCNRegPressure { void dump() const; + static RegKind getRegKind(unsigned Reg, const MachineRegisterInfo &MRI) { + const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); + const SIRegisterInfo *STI = static_cast<const SIRegisterInfo *>(TRI); + return (RegKind)getRegKind(MRI.getRegClass(Reg), STI); + } + private: static constexpr unsigned ValueArraySize = TOTAL_KINDS * 2; @@ -294,8 +311,10 @@ public: } }; -GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS, - const MachineRegisterInfo &MRI); +GCNRPTracker::LiveRegSet +getLiveRegs(SlotIndex SI, const LiveIntervals &LIS, + const MachineRegisterInfo &MRI, + GCNRegPressure::RegKind RegKind = GCNRegPressure::TOTAL_KINDS); //////////////////////////////////////////////////////////////////////////////// // GCNUpwardRPTracker @@ -428,9 +447,6 @@ LaneBitmask getLiveLaneMask(const LiveInterval &LI, SlotIndex SI, const MachineRegisterInfo &MRI, LaneBitmask LaneMaskFilter = LaneBitmask::getAll()); -GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS, - const MachineRegisterInfo &MRI); - /// creates a map MachineInstr -> LiveRegSet /// R - range of iterators on instructions /// After - upon entry or exit of every instruction @@ -524,6 +540,11 @@ public: } }; +LLVM_ABI void dumpMaxRegPressure(MachineFunction &MF, + GCNRegPressure::RegKind Kind, + LiveIntervals &LIS, + const MachineLoopInfo *MLI); + } // end namespace llvm #endif // LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index bdc0810..58482ea 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -69,6 +69,21 @@ static cl::opt<bool> GCNTrackers( cl::desc("Use the AMDGPU specific RPTrackers during scheduling"), cl::init(false)); +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +#define DUMP_MAX_REG_PRESSURE +static cl::opt<bool> PrintMaxRPRegUsageBeforeScheduler( + "amdgpu-print-max-reg-pressure-regusage-before-scheduler", cl::Hidden, + cl::desc("Print a list of live registers along with their def/uses at the " + "point of maximum register pressure before scheduling."), + cl::init(false)); + +static cl::opt<bool> PrintMaxRPRegUsageAfterScheduler( + "amdgpu-print-max-reg-pressure-regusage-after-scheduler", cl::Hidden, + cl::desc("Print a list of live registers along with their def/uses at the " + "point of maximum register pressure after scheduling."), + cl::init(false)); +#endif + const unsigned ScheduleMetrics::ScaleFactor = 100; GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C) @@ -960,6 +975,14 @@ void GCNScheduleDAGMILive::runSchedStages() { RegionLiveOuts.buildLiveRegMap(); } +#ifdef DUMP_MAX_REG_PRESSURE + if (PrintMaxRPRegUsageBeforeScheduler) { + dumpMaxRegPressure(MF, GCNRegPressure::VGPR, *LIS, MLI); + dumpMaxRegPressure(MF, GCNRegPressure::SGPR, *LIS, MLI); + LIS->dump(); + } +#endif + GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl); while (S.advanceStage()) { auto Stage = createSchedStage(S.getCurrentStage()); @@ -995,6 +1018,14 @@ void GCNScheduleDAGMILive::runSchedStages() { Stage->finalizeGCNSchedStage(); } + +#ifdef DUMP_MAX_REG_PRESSURE + if (PrintMaxRPRegUsageAfterScheduler) { + dumpMaxRegPressure(MF, GCNRegPressure::VGPR, *LIS, MLI); + dumpMaxRegPressure(MF, GCNRegPressure::SGPR, *LIS, MLI); + LIS->dump(); + } +#endif } #ifndef NDEBUG diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index 64e34db..5f6d742 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -260,8 +260,12 @@ class NSAHelper { } class MIMGNSAHelper<int num_addrs, - list<RegisterClass> addr_types=!listsplat(VGPR_32, num_addrs)> - : NSAHelper<> { + list<RegisterOperand> addr_types_in=[]> + : NSAHelper<> { + list<RegisterOperand> addr_types = + !if(!empty(addr_types_in), !listsplat(VGPROp_32, num_addrs), + addr_types_in); + list<string> AddrAsmNames = !foreach(i, !range(num_addrs), "vaddr" # i); let AddrIns = !dag(ins, addr_types, AddrAsmNames); let AddrAsm = "[$" # !interleave(AddrAsmNames, ", $") # "]"; @@ -358,7 +362,7 @@ class MIMG_gfx11<int op, dag outs, string dns = ""> // Base class for all NSA MIMG instructions. // Note that 1-dword addresses always use non-NSA variants. class MIMG_nsa_gfx11<int op, dag outs, int num_addrs, string dns="", - list<RegisterClass> addr_types=[], + list<RegisterOperand> addr_types=[], RegisterOperand LastAddrRC = VGPROp_32> : MIMG<outs, dns>, MIMGe_gfx11<op> { let SubtargetPredicate = isGFX11Only; @@ -378,7 +382,7 @@ class MIMG_nsa_gfx11<int op, dag outs, int num_addrs, string dns="", } class VIMAGE_gfx12<int op, dag outs, int num_addrs, string dns="", - list<RegisterClass> addr_types=[]> + list<RegisterOperand> addr_types=[]> : VIMAGE<outs, dns>, VIMAGEe<op> { let SubtargetPredicate = isGFX12Plus; let AssemblerPredicate = isGFX12Plus; @@ -1521,12 +1525,12 @@ class MIMG_IntersectRay_Helper<bit Is64, bit IsA16, bit isDual, bit isBVH8> { int VAddrDwords = !srl(Size, 5); int GFX11PlusNSAAddrs = !if(IsA16, 4, 5); - RegisterClass node_ptr_type = !if(Is64, VReg_64, VGPR_32); - list<RegisterClass> GFX11PlusAddrTypes = - !cond(isBVH8 : [node_ptr_type, VReg_64, VReg_96, VReg_96, VGPR_32], - isDual : [node_ptr_type, VReg_64, VReg_96, VReg_96, VReg_64], - IsA16 : [node_ptr_type, VGPR_32, VReg_96, VReg_96], - true : [node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]); + RegisterOperand node_ptr_type = !if(Is64, VGPROp_64, VGPROp_32); + list<RegisterOperand> GFX11PlusAddrTypes = + !cond(isBVH8 : [node_ptr_type, VGPROp_64, VGPROp_96, VGPROp_96, VGPROp_32], + isDual : [node_ptr_type, VGPROp_64, VGPROp_96, VGPROp_96, VGPROp_64], + IsA16 : [node_ptr_type, VGPROp_32, VGPROp_96, VGPROp_96], + true : [node_ptr_type, VGPROp_32, VGPROp_96, VGPROp_96, VGPROp_96]); } class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterOperand AddrRC> @@ -1552,7 +1556,7 @@ class MIMG_IntersectRay_gfx11<mimgopc op, string opcode, RegisterOperand AddrRC> } class MIMG_IntersectRay_nsa_gfx11<mimgopc op, string opcode, int num_addrs, - list<RegisterClass> addr_types> + list<RegisterOperand> addr_types> : MIMG_nsa_gfx11<op.GFX11, (outs VReg_128:$vdata), num_addrs, "GFX11", addr_types> { let InOperandList = !con(nsah.AddrIns, (ins SReg_128_XNULL:$srsrc, A16:$a16)); @@ -1561,7 +1565,7 @@ class MIMG_IntersectRay_nsa_gfx11<mimgopc op, string opcode, int num_addrs, class VIMAGE_IntersectRay_gfx12<mimgopc op, string opcode, int num_addrs, bit isDual, bit isBVH8, - list<RegisterClass> addr_types> + list<RegisterOperand> addr_types> : VIMAGE_gfx12<op.GFX12, !if(!or(isDual, isBVH8), (outs VReg_320:$vdata, VReg_96:$ray_origin_out, VReg_96:$ray_dir_out), diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 80e985d..a2841c11 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -18168,7 +18168,7 @@ Align SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { return CacheLineAlign; } -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] static bool isCopyFromRegOfInlineAsm(const SDNode *N) { assert(N->getOpcode() == ISD::CopyFromReg); do { diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 5e27b37..6dcbced 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1019,7 +1019,7 @@ void WaitcntBrackets::updateByEvent(WaitEventType E, MachineInstr &Inst) { // SMEM and VMEM operations. So there will never be // outstanding address translations for both SMEM and // VMEM at the same time. - setScoreLB(T, CurrScore - 1); + setScoreLB(T, getScoreUB(T) - 1); PendingEvents &= ~(1 << OtherEvent); } for (const MachineOperand &Op : Inst.all_uses()) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index ec5c5bb3..d516330 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -865,22 +865,16 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } - if (DestReg == AMDGPU::VCC_LO) { - if (AMDGPU::SReg_32RegClass.contains(SrcReg)) { - BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), AMDGPU::VCC_LO) - .addReg(SrcReg, getKillRegState(KillSrc)); - } else { + if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) { + if (DestReg == AMDGPU::VCC_LO) { // FIXME: Hack until VReg_1 removed. assert(AMDGPU::VGPR_32RegClass.contains(SrcReg)); BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32)) - .addImm(0) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addImm(0) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; } - return; - } - - if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) { reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); return; } @@ -898,22 +892,16 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } - if (DestReg == AMDGPU::VCC) { - if (AMDGPU::SReg_64RegClass.contains(SrcReg)) { - BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC) - .addReg(SrcReg, getKillRegState(KillSrc)); - } else { + if (!AMDGPU::SReg_64_EncodableRegClass.contains(SrcReg)) { + if (DestReg == AMDGPU::VCC) { // FIXME: Hack until VReg_1 removed. assert(AMDGPU::VGPR_32RegClass.contains(SrcReg)); BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32)) - .addImm(0) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addImm(0) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; } - return; - } - - if (!AMDGPU::SReg_64_EncodableRegClass.contains(SrcReg)) { reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); return; } diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index eac9fd4..27e5ee9c 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3726,6 +3726,23 @@ def : GCNPat < } // End foreach Ty = ... } // End AddedComplexity = 1 +let True16Predicate = UseRealTrue16Insts in { +def : GCNPat< + (i32 (DivergentBinFrag<or> + (i32 (zext i16:$src_lo)), + (i32 (bitconvert (v2i16 (build_vector (i16 0), (i16 VGPR_16:$src_hi))))) + )), + (REG_SEQUENCE VGPR_32, $src_lo, lo16, $src_hi, hi16) +>; +def : GCNPat< + (i32 (DivergentBinFrag<or> + (i32 (bitconvert (v2i16 (build_vector (i16 0), (i16 VGPR_16:$src_hi))))), + (i32 (zext i16:$src_lo)) + )), + (REG_SEQUENCE VGPR_32, $src_lo, lo16, $src_hi, hi16) +>; +} + let True16Predicate = UseRealTrue16Insts in def : GCNPat < (v2i16 (DivergentBinFrag<build_vector> (i16 undef), (i16 (trunc i32:$src1)))), diff --git a/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp b/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp index 39e651d..8945ec3 100644 --- a/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp +++ b/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp @@ -166,7 +166,7 @@ static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, uint64_t RegNo, } // TODO -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] static DecodeStatus DecodesFPR128RegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const MCDisassembler *Decoder) { diff --git a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp index ca81d30..8ace2d2 100644 --- a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp +++ b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/MD5.h" #include "llvm/TargetParser/Triple.h" #include "llvm/Transforms/Utils/ModuleUtils.h" +#include <cstdint> #include <optional> using namespace llvm; @@ -193,7 +194,12 @@ void DXContainerGlobals::addResourcesForPSV(Module &M, PSVRuntimeInfo &PSV) { dxbc::PSV::v2::ResourceBindInfo BindInfo; BindInfo.Type = Type; BindInfo.LowerBound = Binding.LowerBound; - BindInfo.UpperBound = Binding.LowerBound + Binding.Size - 1; + assert(Binding.Size == UINT32_MAX || + (uint64_t)Binding.LowerBound + Binding.Size - 1 <= UINT32_MAX && + "Resource range is too large"); + BindInfo.UpperBound = (Binding.Size == UINT32_MAX) + ? UINT32_MAX + : Binding.LowerBound + Binding.Size - 1; BindInfo.Space = Binding.Space; BindInfo.Kind = static_cast<dxbc::PSV::ResourceKind>(Kind); BindInfo.Flags = Flags; diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp index 974f653..5f180d6 100644 --- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -667,11 +667,10 @@ static DecodeStatus DecodeHvxWRRegisterClass(MCInst &Inst, unsigned RegNo, return DecodeRegisterClass(Inst, RegNo, HvxWRDecoderTable); } -LLVM_ATTRIBUTE_UNUSED // Suppress warning temporarily. - static DecodeStatus - DecodeHvxVQRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t /*Address*/, - const MCDisassembler *Decoder) { +[[maybe_unused]] // Suppress warning temporarily. +static DecodeStatus DecodeHvxVQRRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, + const MCDisassembler *Decoder) { static const MCPhysReg HvxVQRDecoderTable[] = { Hexagon::VQ0, Hexagon::VQ1, Hexagon::VQ2, Hexagon::VQ3, Hexagon::VQ4, Hexagon::VQ5, Hexagon::VQ6, Hexagon::VQ7}; diff --git a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp index 5dde47a..a3296e0 100644 --- a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp +++ b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp @@ -419,8 +419,8 @@ namespace { using HCE = HexagonConstExtenders; - LLVM_ATTRIBUTE_UNUSED - raw_ostream &operator<< (raw_ostream &OS, const OffsetRange &OR) { + [[maybe_unused]] + raw_ostream &operator<<(raw_ostream &OS, const OffsetRange &OR) { if (OR.Min > OR.Max) OS << '!'; OS << '[' << OR.Min << ',' << OR.Max << "]a" << unsigned(OR.Align) @@ -435,8 +435,8 @@ namespace { const HexagonRegisterInfo &HRI; }; - LLVM_ATTRIBUTE_UNUSED - raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &P) { + [[maybe_unused]] + raw_ostream &operator<<(raw_ostream &OS, const PrintRegister &P) { if (P.Rs.Reg != 0) OS << printReg(P.Rs.Reg, &P.HRI, P.Rs.Sub); else @@ -451,8 +451,8 @@ namespace { const HexagonRegisterInfo &HRI; }; - LLVM_ATTRIBUTE_UNUSED - raw_ostream &operator<< (raw_ostream &OS, const PrintExpr &P) { + [[maybe_unused]] + raw_ostream &operator<<(raw_ostream &OS, const PrintExpr &P) { OS << "## " << (P.Ex.Neg ? "- " : "+ "); if (P.Ex.Rs.Reg != 0) OS << printReg(P.Ex.Rs.Reg, &P.HRI, P.Ex.Rs.Sub); @@ -469,15 +469,15 @@ namespace { const HexagonRegisterInfo &HRI; }; - LLVM_ATTRIBUTE_UNUSED - raw_ostream &operator<< (raw_ostream &OS, const PrintInit &P) { + [[maybe_unused]] + raw_ostream &operator<<(raw_ostream &OS, const PrintInit &P) { OS << '[' << P.ExtI.first << ", " << PrintExpr(P.ExtI.second, P.HRI) << ']'; return OS; } - LLVM_ATTRIBUTE_UNUSED - raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtDesc &ED) { + [[maybe_unused]] + raw_ostream &operator<<(raw_ostream &OS, const HCE::ExtDesc &ED) { assert(ED.OpNum != -1u); const MachineBasicBlock &MBB = *ED.getOp().getParent()->getParent(); const MachineFunction &MF = *MBB.getParent(); @@ -493,8 +493,8 @@ namespace { return OS; } - LLVM_ATTRIBUTE_UNUSED - raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtRoot &ER) { + [[maybe_unused]] + raw_ostream &operator<<(raw_ostream &OS, const HCE::ExtRoot &ER) { switch (ER.Kind) { case MachineOperand::MO_Immediate: OS << "imm:" << ER.V.ImmVal; @@ -527,8 +527,8 @@ namespace { return OS; } - LLVM_ATTRIBUTE_UNUSED - raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtValue &EV) { + [[maybe_unused]] + raw_ostream &operator<<(raw_ostream &OS, const HCE::ExtValue &EV) { OS << HCE::ExtRoot(EV) << " off:" << EV.Offset; return OS; } @@ -540,8 +540,8 @@ namespace { const HexagonRegisterInfo &HRI; }; - LLVM_ATTRIBUTE_UNUSED - raw_ostream &operator<< (raw_ostream &OS, const PrintIMap &P) { + [[maybe_unused]] + raw_ostream &operator<<(raw_ostream &OS, const PrintIMap &P) { OS << "{\n"; for (const std::pair<const HCE::ExtenderInit, HCE::IndexList> &Q : P.IMap) { OS << " " << PrintInit(Q.first, P.HRI) << " -> {"; diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp index 4d96cfa..c7a4f68 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp @@ -789,7 +789,7 @@ struct ShuffleMask { } }; -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const ShuffleMask &SM) { SM.print(OS); return OS; diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp index 87d052b..e4c0a16 100644 --- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp @@ -364,7 +364,7 @@ private: const HexagonVectorCombine &HVC; }; -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::AddrInfo &AI) { OS << "Inst: " << AI.Inst << " " << *AI.Inst << '\n'; OS << "Addr: " << *AI.Addr << '\n'; @@ -375,7 +375,7 @@ raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::AddrInfo &AI) { return OS; } -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::MoveGroup &MG) { OS << "IsLoad:" << (MG.IsLoad ? "yes" : "no"); OS << ", IsHvx:" << (MG.IsHvx ? "yes" : "no") << '\n'; @@ -394,7 +394,7 @@ raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::MoveGroup &MG) { return OS; } -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::ByteSpan::Block &B) { OS << " @" << B.Pos << " [" << B.Seg.Start << ',' << B.Seg.Size << "] "; @@ -408,7 +408,7 @@ raw_ostream &operator<<(raw_ostream &OS, return OS; } -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::ByteSpan &BS) { OS << "ByteSpan[size=" << BS.size() << ", extent=" << BS.extent() << '\n'; for (const AlignVectors::ByteSpan::Block &B : BS) diff --git a/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp b/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp index fa8ae60..2ff5843 100644 --- a/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp @@ -111,7 +111,7 @@ namespace { friend raw_ostream &operator<< (raw_ostream &OS, const DepChain &D); }; - LLVM_ATTRIBUTE_UNUSED + [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const DepChain &D) { const ChainOfDependences &CD = D.Chain; int ChainSize = CD.size(); @@ -144,7 +144,7 @@ namespace { bool isDefined() { return Inst2Replace != nullptr; } }; - LLVM_ATTRIBUTE_UNUSED + [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const ReuseValue &RU) { OS << "** ReuseValue ***\n"; OS << "Instruction to Replace: " << *(RU.Inst2Replace) << "\n"; diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h index ca98269..e3094b4 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h @@ -275,7 +275,7 @@ namespace HexagonII { INST_ICLASS_ALU32_3 = 0xf0000000 }; - LLVM_ATTRIBUTE_UNUSED + [[maybe_unused]] static unsigned getMemAccessSizeInBytes(MemAccessSize S) { switch (S) { case ByteAccess: return 1; diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index bef4868..7e7ee75 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -280,6 +280,10 @@ static unsigned getTcgen05LdOpcode(unsigned IID, bool enablePack) { } void NVPTXDAGToDAGISel::SelectTcgen05Ld(SDNode *N, bool hasOffset) { + if (!Subtarget->hasTcgen05InstSupport()) + report_fatal_error( + "tcgen05.ld is not supported on this architecture variant"); + SDLoc DL(N); unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); @@ -2136,6 +2140,10 @@ static unsigned getTcgen05StOpcode(unsigned IID, bool enableUnpack) { } void NVPTXDAGToDAGISel::SelectTcgen05St(SDNode *N, bool hasOffset) { + if (!Subtarget->hasTcgen05InstSupport()) + report_fatal_error( + "tcgen05.st is not supported on this architecture variant"); + SDLoc DL(N); unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 6c14cf0..dfde0cc 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -101,6 +101,22 @@ def PrmtMode : Operand<i32> { // NVPTX Instruction Predicate Definitions //===----------------------------------------------------------------------===// +// Checks PTX version and family-specific and architecture-specific SM versions. +// For example, sm_100{f/a} and any future variants in the same family will match +// for any PTX version greater than or equal to `PTXVersion`. +class PTXWithFamilySMs<int PTXVersion, list<int> SMVersions> : + Predicate<"Subtarget->hasPTXWithFamilySMs(" # PTXVersion # ", {" # + !interleave(SMVersions, ", ") # "})">; + +// Checks PTX version and architecture-specific SM versions. +// For example, sm_100{a} will match for any PTX version +// greater than or equal to `PTXVersion`. +class PTXWithAccelSMs<int PTXVersion, list<int> SMVersions> : + Predicate<"Subtarget->hasPTXWithAccelSMs(" # PTXVersion # ", {" # + !interleave(SMVersions, ", ") # "})">; + +// Helper predicate to call a subtarget method. +class callSubtarget<string SubtargetMethod> : Predicate<"Subtarget->" # SubtargetMethod # "()">; def hasAtomAddF64 : Predicate<"Subtarget->hasAtomAddF64()">; def hasAtomScope : Predicate<"Subtarget->hasAtomScope()">; diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index a8b854f..22cf3a7 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -5103,8 +5103,8 @@ let Predicates = [hasSM<90>, hasPTX<78>] in { def EXIT : NullaryInst<"exit", int_nvvm_exit>; // Tcgen05 intrinsics -let isConvergent = true, Predicates = [hasTcgen05Instructions] in { - +let isConvergent = true in { +let Predicates = [callSubtarget<"hasTcgen05InstSupport">] in { multiclass TCGEN05_ALLOC_INTR<string AS, string num, Intrinsic Intr> { def "" : BasicNVPTXInst<(outs), (ins ADDR:$dst, B32:$ncols), @@ -5156,15 +5156,6 @@ defm TCGEN05_COMMIT_CG2 : TCGEN05_COMMIT_INTR<"", "2">; defm TCGEN05_COMMIT_S64_CG1 : TCGEN05_COMMIT_INTR<"shared", "1">; defm TCGEN05_COMMIT_S64_CG2 : TCGEN05_COMMIT_INTR<"shared", "2">; -multiclass TCGEN05_SHIFT_INTR<string num, Intrinsic Intr> { - def "" : BasicNVPTXInst<(outs), - (ins ADDR:$tmem_addr), - "tcgen05.shift.cta_group::" # num # ".down", - [(Intr addr:$tmem_addr)]>; -} -defm TCGEN05_SHIFT_CG1: TCGEN05_SHIFT_INTR<"1", int_nvvm_tcgen05_shift_down_cg1>; -defm TCGEN05_SHIFT_CG2: TCGEN05_SHIFT_INTR<"2", int_nvvm_tcgen05_shift_down_cg2>; - multiclass TCGEN05_CP_INTR<string shape, string src_fmt, string mc = ""> { defvar dst_fmt = !if(!eq(src_fmt, ""), "", ".b8x16"); defvar fmt_asm = StrJoin<".", [dst_fmt, src_fmt]>.ret; @@ -5195,9 +5186,22 @@ foreach src_fmt = ["", "b6x16_p32", "b4x16_p64"] in { defm TCGEN05_CP_64x128_2 # src_fmt : TCGEN05_CP_INTR<"64x128b", src_fmt, "warpx2::01_23">; defm TCGEN05_CP_32x128 # src_fmt : TCGEN05_CP_INTR<"32x128b", src_fmt, "warpx4">; } +} // Predicates + +let Predicates = [callSubtarget<"hasTcgen05ShiftSupport">] in { +multiclass TCGEN05_SHIFT_INTR<string num, Intrinsic Intr> { + def "" : BasicNVPTXInst<(outs), + (ins ADDR:$tmem_addr), + "tcgen05.shift.cta_group::" # num # ".down", + [(Intr addr:$tmem_addr)]>; +} +defm TCGEN05_SHIFT_CG1: TCGEN05_SHIFT_INTR<"1", int_nvvm_tcgen05_shift_down_cg1>; +defm TCGEN05_SHIFT_CG2: TCGEN05_SHIFT_INTR<"2", int_nvvm_tcgen05_shift_down_cg2>; +} // Predicates + } // isConvergent -let hasSideEffects = 1, Predicates = [hasTcgen05Instructions] in { +let hasSideEffects = 1, Predicates = [callSubtarget<"hasTcgen05InstSupport">] in { def tcgen05_fence_before_thread_sync: NullaryInst< "tcgen05.fence::before_thread_sync", int_nvvm_tcgen05_fence_before_thread_sync>; @@ -5231,8 +5235,7 @@ class TCGEN05_LDST_REGINFO<int Veclen> { // class TCGEN05_LD_INST<string Shape, int Num, bit Pack> : - NVPTXInst<(outs), (ins), "?", []>, - Requires<[hasTcgen05Instructions]> { + NVPTXInst<(outs), (ins), "?", []> { TCGEN05_LDST_REGINFO Info = TCGEN05_LDST_REGINFO< NVVM_TCGEN05_LDST_ACCESS_SIZE<Shape, Num>.veclen>; @@ -5256,8 +5259,7 @@ class TCGEN05_LD_INST<string Shape, int Num, bit Pack> : // class TCGEN05_ST_INST<string Shape, int Num, bit Unpack> : - NVPTXInst<(outs), (ins), "?", []>, - Requires<[hasTcgen05Instructions]> { + NVPTXInst<(outs), (ins), "?", []> { TCGEN05_LDST_REGINFO Info = TCGEN05_LDST_REGINFO< NVVM_TCGEN05_LDST_ACCESS_SIZE<Shape, Num>.veclen>; diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp index c548967..989be50 100644 --- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -72,6 +72,40 @@ const SelectionDAGTargetInfo *NVPTXSubtarget::getSelectionDAGInfo() const { return TSInfo.get(); } +bool NVPTXSubtarget::hasPTXWithFamilySMs(unsigned PTXVersion, + ArrayRef<unsigned> SMVersions) const { + unsigned PTXVer = getPTXVersion(); + if (!hasFamilySpecificFeatures() || PTXVer < PTXVersion) + return false; + + unsigned SMVer = getSmVersion(); + return llvm::any_of(SMVersions, [&](unsigned SM) { + // sm_101 is a different family, never group it with sm_10x. + if (SMVer == 101 || SM == 101) + return SMVer == SM && + // PTX 9.0 and later renamed sm_101 to sm_110, so sm_101 is not + // supported. + !(PTXVer >= 90 && SMVer == 101); + + return getSmFamilyVersion() == SM / 10 && SMVer >= SM; + }); +} + +bool NVPTXSubtarget::hasPTXWithAccelSMs(unsigned PTXVersion, + ArrayRef<unsigned> SMVersions) const { + unsigned PTXVer = getPTXVersion(); + if (!hasArchAccelFeatures() || PTXVer < PTXVersion) + return false; + + unsigned SMVer = getSmVersion(); + return llvm::any_of(SMVersions, [&](unsigned SM) { + return SMVer == SM && + // PTX 9.0 and later renamed sm_101 to sm_110, so sm_101 is not + // supported. + !(PTXVer >= 90 && SMVer == 101); + }); +} + bool NVPTXSubtarget::allowFP16Math() const { return hasFP16Math() && NoF16Math == false; } diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h index e81c56b..194dbdc 100644 --- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h @@ -73,6 +73,18 @@ public: const SelectionDAGTargetInfo *getSelectionDAGInfo() const override; + // Checks PTX version and family-specific and architecture-specific SM + // versions. For example, sm_100{f/a} and any future variants in the same + // family will match for any PTX version greater than or equal to + // `PTXVersion`. + bool hasPTXWithFamilySMs(unsigned PTXVersion, + ArrayRef<unsigned> SMVersions) const; + // Checks PTX version and architecture-specific SM versions. + // For example, sm_100{a} will match for any PTX version greater than or equal + // to `PTXVersion`. + bool hasPTXWithAccelSMs(unsigned PTXVersion, + ArrayRef<unsigned> SMVersions) const; + bool has256BitVectorLoadStore(unsigned AS) const { return SmVersion >= 100 && PTXVersion >= 88 && AS == NVPTXAS::ADDRESS_SPACE_GLOBAL; @@ -127,6 +139,27 @@ public: return HasTcgen05 && PTXVersion >= MinPTXVersion; } + // Checks following instructions support: + // - tcgen05.ld/st + // - tcgen05.alloc/dealloc/relinquish + // - tcgen05.cp + // - tcgen05.fence/wait + // - tcgen05.commit + bool hasTcgen05InstSupport() const { + // sm_101 renamed to sm_110 in PTX 9.0 + return hasPTXWithFamilySMs(90, {100, 110}) || + hasPTXWithFamilySMs(88, {100, 101}) || + hasPTXWithAccelSMs(86, {100, 101}); + } + + // Checks tcgen05.shift instruction support. + bool hasTcgen05ShiftSupport() const { + // sm_101 renamed to sm_110 in PTX 9.0 + return hasPTXWithAccelSMs(90, {100, 110, 103}) || + hasPTXWithAccelSMs(88, {100, 101, 103}) || + hasPTXWithAccelSMs(86, {100, 101}); + } + bool hasTcgen05MMAScaleInputDImm() const { return FullSmVersion == 1003 && PTXVersion >= 86; } @@ -158,6 +191,7 @@ public: bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; } unsigned int getFullSmVersion() const { return FullSmVersion; } unsigned int getSmVersion() const { return getFullSmVersion() / 10; } + unsigned int getSmFamilyVersion() const { return getFullSmVersion() / 100; } // GPUs with "a" suffix have architecture-accelerated features that are // supported on the specified architecture only, hence such targets do not // follow the onion layer model. hasArchAccelFeatures() allows distinguishing diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 4b54231..8851a0f 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -1659,6 +1659,10 @@ bool RISCVAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return generateImmOutOfRangeError( Operands, ErrorInfo, -1, (1 << 5) - 1, "immediate must be non-zero in the range"); + case Match_InvalidXSfmmVType: { + SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc(); + return generateXSfmmVTypeError(ErrorLoc); + } case Match_InvalidVTypeI: { SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc(); return generateVTypeError(ErrorLoc); diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index 70b7c43..e75dfe3 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -142,6 +142,22 @@ enum { ReadsPastVLShift = DestEEWShift + 2, ReadsPastVLMask = 1ULL << ReadsPastVLShift, + + // 0 -> Don't care about altfmt bit in VTYPE. + // 1 -> Is not altfmt. + // 2 -> Is altfmt(BF16). + AltFmtTypeShift = ReadsPastVLShift + 1, + AltFmtTypeMask = 3ULL << AltFmtTypeShift, + + // XSfmmbase + HasTWidenOpShift = AltFmtTypeShift + 2, + HasTWidenOpMask = 1ULL << HasTWidenOpShift, + + HasTMOpShift = HasTWidenOpShift + 1, + HasTMOpMask = 1ULL << HasTMOpShift, + + HasTKOpShift = HasTMOpShift + 1, + HasTKOpMask = 1ULL << HasTKOpShift, }; // Helper functions to read TSFlags. @@ -183,6 +199,11 @@ static inline bool hasRoundModeOp(uint64_t TSFlags) { return TSFlags & HasRoundModeOpMask; } +enum class AltFmtType { DontCare, NotAltFmt, AltFmt }; +static inline AltFmtType getAltFmtType(uint64_t TSFlags) { + return static_cast<AltFmtType>((TSFlags & AltFmtTypeMask) >> AltFmtTypeShift); +} + /// \returns true if this instruction uses vxrm static inline bool usesVXRM(uint64_t TSFlags) { return TSFlags & UsesVXRMMask; } @@ -204,11 +225,47 @@ static inline bool readsPastVL(uint64_t TSFlags) { return TSFlags & ReadsPastVLMask; } +// XSfmmbase +static inline bool hasTWidenOp(uint64_t TSFlags) { + return TSFlags & HasTWidenOpMask; +} + +static inline bool hasTMOp(uint64_t TSFlags) { return TSFlags & HasTMOpMask; } + +static inline bool hasTKOp(uint64_t TSFlags) { return TSFlags & HasTKOpMask; } + +static inline unsigned getTNOpNum(const MCInstrDesc &Desc) { + const uint64_t TSFlags = Desc.TSFlags; + assert(hasTWidenOp(TSFlags) && hasVLOp(TSFlags)); + unsigned Offset = 3; + if (hasTKOp(TSFlags)) + Offset = 4; + return Desc.getNumOperands() - Offset; +} + +static inline unsigned getTMOpNum(const MCInstrDesc &Desc) { + const uint64_t TSFlags = Desc.TSFlags; + assert(hasTWidenOp(TSFlags) && hasTMOp(TSFlags)); + if (hasTKOp(TSFlags)) + return Desc.getNumOperands() - 5; + // vtzero.t + return Desc.getNumOperands() - 4; +} + +static inline unsigned getTKOpNum(const MCInstrDesc &Desc) { + [[maybe_unused]] const uint64_t TSFlags = Desc.TSFlags; + assert(hasTWidenOp(TSFlags) && hasTKOp(TSFlags)); + return Desc.getNumOperands() - 3; +} + static inline unsigned getVLOpNum(const MCInstrDesc &Desc) { const uint64_t TSFlags = Desc.TSFlags; // This method is only called if we expect to have a VL operand, and all // instructions with VL also have SEW. assert(hasSEWOp(TSFlags) && hasVLOp(TSFlags)); + // In Xsfmmbase, TN is an alias for VL, so here we use the same TSFlags bit. + if (hasTWidenOp(TSFlags)) + return getTNOpNum(Desc); unsigned Offset = 2; if (hasVecPolicyOp(TSFlags)) Offset = 3; @@ -226,7 +283,7 @@ static inline unsigned getSEWOpNum(const MCInstrDesc &Desc) { const uint64_t TSFlags = Desc.TSFlags; assert(hasSEWOp(TSFlags)); unsigned Offset = 1; - if (hasVecPolicyOp(TSFlags)) + if (hasVecPolicyOp(TSFlags) || hasTWidenOp(TSFlags)) Offset = 2; return Desc.getNumOperands() - Offset; } @@ -243,6 +300,9 @@ static inline int getFRMOpNum(const MCInstrDesc &Desc) { if (!hasRoundModeOp(TSFlags) || usesVXRM(TSFlags)) return -1; + if (hasTWidenOp(TSFlags) && hasTMOp(TSFlags)) + return getTMOpNum(Desc) - 1; + // The operand order // -------------------------------------- // | n-1 (if any) | n-2 | n-3 | n-4 | @@ -385,7 +445,9 @@ enum OperandType : unsigned { OPERAND_SEW_MASK, // Vector rounding mode for VXRM or FRM. OPERAND_VEC_RM, - OPERAND_LAST_RISCV_IMM = OPERAND_VEC_RM, + // Vtype operand for XSfmm extension. + OPERAND_XSFMM_VTYPE, + OPERAND_LAST_RISCV_IMM = OPERAND_XSFMM_VTYPE, // Operand is either a register or uimm5, this is used by V extension pseudo // instructions to represent a value that be passed as AVL to either vsetvli // or vsetivli. diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp index 66ca436..de433e4 100644 --- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp +++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp @@ -1100,6 +1100,12 @@ static bool lowerRISCVVMachineInstrToMCInst(const MachineInstr *MI, --NumOps; if (RISCVII::hasRoundModeOp(TSFlags)) --NumOps; + if (RISCVII::hasTWidenOp(TSFlags)) + --NumOps; + if (RISCVII::hasTMOp(TSFlags)) + --NumOps; + if (RISCVII::hasTKOp(TSFlags)) + --NumOps; bool hasVLOutput = RISCVInstrInfo::isFaultOnlyFirstLoad(*MI); for (unsigned OpNo = 0; OpNo != NumOps; ++OpNo) { diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 437022f..9a6afa1 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -516,6 +516,44 @@ void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) { CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp)); } +void RISCVDAGToDAGISel::selectXSfmmVSET(SDNode *Node) { + if (!Subtarget->hasVendorXSfmmbase()) + return; + + assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode"); + + SDLoc DL(Node); + MVT XLenVT = Subtarget->getXLenVT(); + + unsigned IntNo = Node->getConstantOperandVal(0); + + assert((IntNo == Intrinsic::riscv_sf_vsettnt || + IntNo == Intrinsic::riscv_sf_vsettm || + IntNo == Intrinsic::riscv_sf_vsettk) && + "Unexpected XSfmm vset intrinsic"); + + unsigned SEW = RISCVVType::decodeVSEW(Node->getConstantOperandVal(2)); + unsigned Widen = RISCVVType::decodeTWiden(Node->getConstantOperandVal(3)); + unsigned PseudoOpCode = + IntNo == Intrinsic::riscv_sf_vsettnt ? RISCV::PseudoSF_VSETTNT + : IntNo == Intrinsic::riscv_sf_vsettm ? RISCV::PseudoSF_VSETTM + : RISCV::PseudoSF_VSETTK; + + if (IntNo == Intrinsic::riscv_sf_vsettnt) { + unsigned VTypeI = RISCVVType::encodeXSfmmVType(SEW, Widen, 0); + SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT); + + ReplaceNode(Node, CurDAG->getMachineNode(PseudoOpCode, DL, XLenVT, + Node->getOperand(1), VTypeIOp)); + } else { + SDValue Log2SEW = CurDAG->getTargetConstant(Log2_32(SEW), DL, XLenVT); + SDValue TWiden = CurDAG->getTargetConstant(Widen, DL, XLenVT); + ReplaceNode(Node, + CurDAG->getMachineNode(PseudoOpCode, DL, XLenVT, + Node->getOperand(1), Log2SEW, TWiden)); + } +} + bool RISCVDAGToDAGISel::tryShrinkShlLogicImm(SDNode *Node) { MVT VT = Node->getSimpleValueType(0); unsigned Opcode = Node->getOpcode(); @@ -847,6 +885,11 @@ bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) { return true; } +static Register getTileReg(uint64_t TileNum) { + assert(TileNum <= 15 && "Invalid tile number"); + return RISCV::T0 + TileNum; +} + void RISCVDAGToDAGISel::selectSF_VC_X_SE(SDNode *Node) { if (!Subtarget->hasVInstructions()) return; @@ -2035,6 +2078,10 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { case Intrinsic::riscv_vsetvli: case Intrinsic::riscv_vsetvlimax: return selectVSETVLI(Node); + case Intrinsic::riscv_sf_vsettnt: + case Intrinsic::riscv_sf_vsettm: + case Intrinsic::riscv_sf_vsettk: + return selectXSfmmVSET(Node); } break; } @@ -2458,6 +2505,142 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { case Intrinsic::riscv_sf_vc_i_se: selectSF_VC_X_SE(Node); return; + case Intrinsic::riscv_sf_vlte8: + case Intrinsic::riscv_sf_vlte16: + case Intrinsic::riscv_sf_vlte32: + case Intrinsic::riscv_sf_vlte64: { + unsigned Log2SEW; + unsigned PseudoInst; + switch (IntNo) { + case Intrinsic::riscv_sf_vlte8: + PseudoInst = RISCV::PseudoSF_VLTE8; + Log2SEW = 3; + break; + case Intrinsic::riscv_sf_vlte16: + PseudoInst = RISCV::PseudoSF_VLTE16; + Log2SEW = 4; + break; + case Intrinsic::riscv_sf_vlte32: + PseudoInst = RISCV::PseudoSF_VLTE32; + Log2SEW = 5; + break; + case Intrinsic::riscv_sf_vlte64: + PseudoInst = RISCV::PseudoSF_VLTE64; + Log2SEW = 6; + break; + } + + SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); + SDValue TWidenOp = CurDAG->getTargetConstant(1, DL, XLenVT); + SDValue Operands[] = {Node->getOperand(2), + Node->getOperand(3), + Node->getOperand(4), + SEWOp, + TWidenOp, + Node->getOperand(0)}; + + MachineSDNode *TileLoad = + CurDAG->getMachineNode(PseudoInst, DL, Node->getVTList(), Operands); + if (auto *MemOp = dyn_cast<MemSDNode>(Node)) + CurDAG->setNodeMemRefs(TileLoad, {MemOp->getMemOperand()}); + + ReplaceNode(Node, TileLoad); + return; + } + case Intrinsic::riscv_sf_mm_s_s: + case Intrinsic::riscv_sf_mm_s_u: + case Intrinsic::riscv_sf_mm_u_s: + case Intrinsic::riscv_sf_mm_u_u: + case Intrinsic::riscv_sf_mm_e5m2_e5m2: + case Intrinsic::riscv_sf_mm_e5m2_e4m3: + case Intrinsic::riscv_sf_mm_e4m3_e5m2: + case Intrinsic::riscv_sf_mm_e4m3_e4m3: + case Intrinsic::riscv_sf_mm_f_f: { + bool HasFRM = false; + unsigned PseudoInst; + switch (IntNo) { + case Intrinsic::riscv_sf_mm_s_s: + PseudoInst = RISCV::PseudoSF_MM_S_S; + break; + case Intrinsic::riscv_sf_mm_s_u: + PseudoInst = RISCV::PseudoSF_MM_S_U; + break; + case Intrinsic::riscv_sf_mm_u_s: + PseudoInst = RISCV::PseudoSF_MM_U_S; + break; + case Intrinsic::riscv_sf_mm_u_u: + PseudoInst = RISCV::PseudoSF_MM_U_U; + break; + case Intrinsic::riscv_sf_mm_e5m2_e5m2: + PseudoInst = RISCV::PseudoSF_MM_E5M2_E5M2; + HasFRM = true; + break; + case Intrinsic::riscv_sf_mm_e5m2_e4m3: + PseudoInst = RISCV::PseudoSF_MM_E5M2_E4M3; + HasFRM = true; + break; + case Intrinsic::riscv_sf_mm_e4m3_e5m2: + PseudoInst = RISCV::PseudoSF_MM_E4M3_E5M2; + HasFRM = true; + break; + case Intrinsic::riscv_sf_mm_e4m3_e4m3: + PseudoInst = RISCV::PseudoSF_MM_E4M3_E4M3; + HasFRM = true; + break; + case Intrinsic::riscv_sf_mm_f_f: + if (Node->getOperand(3).getValueType().getScalarType() == MVT::bf16) + PseudoInst = RISCV::PseudoSF_MM_F_F_ALT; + else + PseudoInst = RISCV::PseudoSF_MM_F_F; + HasFRM = true; + break; + } + uint64_t TileNum = Node->getConstantOperandVal(2); + SDValue Op1 = Node->getOperand(3); + SDValue Op2 = Node->getOperand(4); + MVT VT = Op1->getSimpleValueType(0); + unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); + SDValue TmOp = Node->getOperand(5); + SDValue TnOp = Node->getOperand(6); + SDValue TkOp = Node->getOperand(7); + SDValue TWidenOp = Node->getOperand(8); + SDValue Chain = Node->getOperand(0); + + // sf.mm.f.f with sew=32, twiden=2 is invalid + if (IntNo == Intrinsic::riscv_sf_mm_f_f && Log2SEW == 5 && + TWidenOp->getAsZExtVal() == 2) + reportFatalUsageError("sf.mm.f.f doesn't support (sew=32, twiden=2)"); + + SmallVector<SDValue, 10> Operands( + {CurDAG->getRegister(getTileReg(TileNum), XLenVT), Op1, Op2}); + if (HasFRM) + Operands.push_back( + CurDAG->getTargetConstant(RISCVFPRndMode::DYN, DL, XLenVT)); + Operands.append({TmOp, TnOp, TkOp, + CurDAG->getTargetConstant(Log2SEW, DL, XLenVT), TWidenOp, + Chain}); + + auto *NewNode = + CurDAG->getMachineNode(PseudoInst, DL, Node->getVTList(), Operands); + + ReplaceNode(Node, NewNode); + return; + } + case Intrinsic::riscv_sf_vtzero_t: { + uint64_t TileNum = Node->getConstantOperandVal(2); + SDValue Tm = Node->getOperand(3); + SDValue Tn = Node->getOperand(4); + SDValue Log2SEW = Node->getOperand(5); + SDValue TWiden = Node->getOperand(6); + SDValue Chain = Node->getOperand(0); + auto *NewNode = CurDAG->getMachineNode( + RISCV::PseudoSF_VTZERO_T, DL, Node->getVTList(), + {CurDAG->getRegister(getTileReg(TileNum), XLenVT), Tm, Tn, Log2SEW, + TWiden, Chain}); + + ReplaceNode(Node, NewNode); + return; + } } break; } @@ -3353,14 +3536,20 @@ bool RISCVDAGToDAGISel::selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, 0); return true; } - // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the - // LHS is equal to the RHS and non-zero otherwise. + // If the RHS is [-2047,2048], we can use addi/addiw with -RHS to produce 0 + // if the LHS is equal to the RHS and non-zero otherwise. if (isInt<12>(CVal) || CVal == 2048) { - Val = SDValue( - CurDAG->getMachineNode( - RISCV::ADDI, DL, N->getValueType(0), LHS, - CurDAG->getSignedTargetConstant(-CVal, DL, N->getValueType(0))), - 0); + unsigned Opc = RISCV::ADDI; + if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG && + cast<VTSDNode>(LHS.getOperand(1))->getVT() == MVT::i32) { + Opc = RISCV::ADDIW; + LHS = LHS.getOperand(0); + } + + Val = SDValue(CurDAG->getMachineNode(Opc, DL, N->getValueType(0), LHS, + CurDAG->getSignedTargetConstant( + -CVal, DL, N->getValueType(0))), + 0); return true; } if (isPowerOf2_64(CVal) && Subtarget->hasStdExtZbs()) { diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index f03b44c..19ee103 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -165,6 +165,7 @@ public: void selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered); void selectVSETVLI(SDNode *Node); + void selectXSfmmVSET(SDNode *Node); void selectSF_VC_X_SE(SDNode *Node); diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index cf8d120..1b7cb9b 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -168,10 +168,13 @@ struct DemandedFields { // If this is true, we demand that VTYPE is set to some legal state, i.e. that // vill is unset. bool VILL = false; + bool TWiden = false; + bool AltFmt = false; // Return true if any part of VTYPE was used bool usedVTYPE() const { - return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy || VILL; + return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy || VILL || + TWiden || AltFmt; } // Return true if any property of VL was used @@ -187,6 +190,8 @@ struct DemandedFields { TailPolicy = true; MaskPolicy = true; VILL = true; + TWiden = true; + AltFmt = true; } // Mark all VL properties as demanded @@ -212,6 +217,8 @@ struct DemandedFields { TailPolicy |= B.TailPolicy; MaskPolicy |= B.MaskPolicy; VILL |= B.VILL; + AltFmt |= B.AltFmt; + TWiden |= B.TWiden; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -258,7 +265,9 @@ struct DemandedFields { OS << "SEWLMULRatio=" << SEWLMULRatio << ", "; OS << "TailPolicy=" << TailPolicy << ", "; OS << "MaskPolicy=" << MaskPolicy << ", "; - OS << "VILL=" << VILL; + OS << "VILL=" << VILL << ", "; + OS << "AltFmt=" << AltFmt << ", "; + OS << "TWiden=" << TWiden; OS << "}"; } #endif @@ -328,6 +337,15 @@ static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType, if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) != RISCVVType::isMaskAgnostic(NewVType)) return false; + if (Used.TWiden && (RISCVVType::hasXSfmmWiden(CurVType) != + RISCVVType::hasXSfmmWiden(NewVType) || + (RISCVVType::hasXSfmmWiden(CurVType) && + RISCVVType::getXSfmmWiden(CurVType) != + RISCVVType::getXSfmmWiden(NewVType)))) + return false; + if (Used.AltFmt && + RISCVVType::isAltFmt(CurVType) != RISCVVType::isAltFmt(NewVType)) + return false; return true; } @@ -479,6 +497,11 @@ DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) { Res.TailPolicy = false; } + Res.AltFmt = RISCVII::getAltFmtType(MI.getDesc().TSFlags) != + RISCVII::AltFmtType::DontCare; + Res.TWiden = RISCVII::hasTWidenOp(MI.getDesc().TSFlags) || + RISCVInstrInfo::isXSfmmVectorConfigInstr(MI); + return Res; } @@ -510,6 +533,8 @@ class VSETVLIInfo { uint8_t TailAgnostic : 1; uint8_t MaskAgnostic : 1; uint8_t SEWLMULRatioOnly : 1; + uint8_t AltFmt : 1; + uint8_t TWiden : 3; public: VSETVLIInfo() @@ -586,6 +611,8 @@ public: RISCVVType::VLMUL getVLMUL() const { return VLMul; } bool getTailAgnostic() const { return TailAgnostic; } bool getMaskAgnostic() const { return MaskAgnostic; } + bool getAltFmt() const { return AltFmt; } + unsigned getTWiden() const { return TWiden; } bool hasNonZeroAVL(const LiveIntervals *LIS) const { if (hasAVLImm()) @@ -647,21 +674,31 @@ public: SEW = RISCVVType::getSEW(VType); TailAgnostic = RISCVVType::isTailAgnostic(VType); MaskAgnostic = RISCVVType::isMaskAgnostic(VType); + AltFmt = RISCVVType::isAltFmt(VType); + TWiden = + RISCVVType::hasXSfmmWiden(VType) ? RISCVVType::getXSfmmWiden(VType) : 0; } - void setVTYPE(RISCVVType::VLMUL L, unsigned S, bool TA, bool MA) { + void setVTYPE(RISCVVType::VLMUL L, unsigned S, bool TA, bool MA, bool Altfmt, + unsigned W) { assert(isValid() && !isUnknown() && "Can't set VTYPE for uninitialized or unknown"); VLMul = L; SEW = S; TailAgnostic = TA; MaskAgnostic = MA; + AltFmt = Altfmt; + TWiden = W; } + void setAltFmt(bool AF) { AltFmt = AF; } + void setVLMul(RISCVVType::VLMUL VLMul) { this->VLMul = VLMul; } unsigned encodeVTYPE() const { assert(isValid() && !isUnknown() && !SEWLMULRatioOnly && "Can't encode VTYPE for uninitialized or unknown"); + if (TWiden != 0) + return RISCVVType::encodeXSfmmVType(SEW, TWiden, AltFmt); return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); } @@ -674,9 +711,9 @@ public: "Can't compare VTYPE in unknown state"); assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly && "Can't compare when only LMUL/SEW ratio is valid."); - return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) == + return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic, AltFmt, TWiden) == std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic, - Other.MaskAgnostic); + Other.MaskAgnostic, Other.AltFmt, Other.TWiden); } unsigned getSEWLMULRatio() const { @@ -825,7 +862,9 @@ public: << "SEW=e" << (unsigned)SEW << ", " << "TailAgnostic=" << (bool)TailAgnostic << ", " << "MaskAgnostic=" << (bool)MaskAgnostic << ", " - << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}"; + << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << ", " + << "TWiden=" << (unsigned)TWiden << ", " + << "AltFmt=" << (bool)AltFmt << "}"; } #endif }; @@ -853,6 +892,11 @@ struct BlockData { BlockData() = default; }; +enum TKTMMode { + VSETTK = 0, + VSETTM = 1, +}; + class RISCVInsertVSETVLI : public MachineFunctionPass { const RISCVSubtarget *ST; const TargetInstrInfo *TII; @@ -908,6 +952,7 @@ private: VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) const; VSETVLIInfo computeInfoForInstr(const MachineInstr &MI) const; void forwardVSETVLIAVL(VSETVLIInfo &Info) const; + bool insertVSETMTK(MachineBasicBlock &MBB, TKTMMode Mode) const; }; } // end anonymous namespace @@ -945,6 +990,18 @@ RISCVInsertVSETVLI::getInfoForVSETVLI(const MachineInstr &MI) const { VSETVLIInfo NewInfo; if (MI.getOpcode() == RISCV::PseudoVSETIVLI) { NewInfo.setAVLImm(MI.getOperand(1).getImm()); + } else if (RISCVInstrInfo::isXSfmmVectorConfigTNInstr(MI)) { + assert(MI.getOpcode() == RISCV::PseudoSF_VSETTNT || + MI.getOpcode() == RISCV::PseudoSF_VSETTNTX0); + switch (MI.getOpcode()) { + case RISCV::PseudoSF_VSETTNTX0: + NewInfo.setAVLVLMAX(); + break; + case RISCV::PseudoSF_VSETTNT: + Register ATNReg = MI.getOperand(1).getReg(); + NewInfo.setAVLRegDef(getVNInfoFromReg(ATNReg, MI, LIS), ATNReg); + break; + } } else { assert(MI.getOpcode() == RISCV::PseudoVSETVLI || MI.getOpcode() == RISCV::PseudoVSETVLIX0); @@ -1005,11 +1062,34 @@ RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const { RISCVVType::VLMUL VLMul = RISCVII::getLMul(TSFlags); + bool AltFmt = RISCVII::getAltFmtType(TSFlags) == RISCVII::AltFmtType::AltFmt; + InstrInfo.setAltFmt(AltFmt); + unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm(); // A Log2SEW of 0 is an operation on mask registers only. unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); + if (RISCVII::hasTWidenOp(TSFlags)) { + const MachineOperand &TWidenOp = + MI.getOperand(MI.getNumExplicitOperands() - 1); + unsigned TWiden = TWidenOp.getImm(); + + InstrInfo.setAVLVLMAX(); + if (RISCVII::hasVLOp(TSFlags)) { + const MachineOperand &TNOp = + MI.getOperand(RISCVII::getTNOpNum(MI.getDesc())); + + if (TNOp.getReg().isVirtual()) + InstrInfo.setAVLRegDef(getVNInfoFromReg(TNOp.getReg(), MI, LIS), + TNOp.getReg()); + } + + InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic, AltFmt, TWiden); + + return InstrInfo; + } + if (RISCVII::hasVLOp(TSFlags)) { const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); if (VLOp.isImm()) { @@ -1045,7 +1125,9 @@ RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const { assert(SEW == EEW && "Initial SEW doesn't match expected EEW"); } #endif - InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); + // TODO: Propagate the twiden from previous vtype for potential reuse. + InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic, AltFmt, + /*TWiden*/ 0); forwardVSETVLIAVL(InstrInfo); @@ -1053,10 +1135,33 @@ RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const { } void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, - MachineBasicBlock::iterator InsertPt, DebugLoc DL, - const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) { - + MachineBasicBlock::iterator InsertPt, + DebugLoc DL, const VSETVLIInfo &Info, + const VSETVLIInfo &PrevInfo) { ++NumInsertedVSETVL; + + if (Info.getTWiden()) { + if (Info.hasAVLVLMAX()) { + Register DestReg = MRI->createVirtualRegister(&RISCV::GPRNoX0RegClass); + auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoSF_VSETTNTX0)) + .addReg(DestReg, RegState::Define | RegState::Dead) + .addReg(RISCV::X0, RegState::Kill) + .addImm(Info.encodeVTYPE()); + if (LIS) { + LIS->InsertMachineInstrInMaps(*MI); + LIS->createAndComputeVirtRegInterval(DestReg); + } + } else { + auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoSF_VSETTNT)) + .addReg(RISCV::X0, RegState::Define | RegState::Dead) + .addReg(Info.getAVLReg()) + .addImm(Info.encodeVTYPE()); + if (LIS) + LIS->InsertMachineInstrInMaps(*MI); + } + return; + } + if (PrevInfo.isValid() && !PrevInfo.isUnknown()) { // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same // VLMAX. @@ -1198,7 +1303,8 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, // be coalesced into another vsetvli since we won't demand any fields. VSETVLIInfo NewInfo; // Need a new VSETVLIInfo to clear SEWLMULRatioOnly NewInfo.setAVLImm(1); - NewInfo.setVTYPE(RISCVVType::LMUL_1, /*sew*/ 8, /*ta*/ true, /*ma*/ true); + NewInfo.setVTYPE(RISCVVType::LMUL_1, /*sew*/ 8, /*ta*/ true, /*ma*/ true, + /*AltFmt*/ false, /*W*/ 0); Info = NewInfo; return; } @@ -1240,7 +1346,9 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() || IncomingInfo.getTailAgnostic(), (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() || - IncomingInfo.getMaskAgnostic()); + IncomingInfo.getMaskAgnostic(), + (Demanded.AltFmt ? IncomingInfo : Info).getAltFmt(), + Demanded.TWiden ? IncomingInfo.getTWiden() : 0); // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep // the AVL. @@ -1293,7 +1401,8 @@ bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB, if (RISCVInstrInfo::isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags) || - isVectorCopy(ST->getRegisterInfo(), MI)) + isVectorCopy(ST->getRegisterInfo(), MI) || + RISCVInstrInfo::isXSfmmVectorConfigInstr(MI)) HadVectorOp = true; transferAfter(Info, MI); @@ -1675,6 +1784,12 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const { }; for (MachineInstr &MI : make_early_inc_range(reverse(MBB))) { + // TODO: Support XSfmm. + if (RISCVII::hasTWidenOp(MI.getDesc().TSFlags) || + RISCVInstrInfo::isXSfmmVectorConfigInstr(MI)) { + NextMI = nullptr; + continue; + } if (!RISCVInstrInfo::isVectorConfigInstr(MI)) { Used.doUnion(getDemanded(MI, ST)); @@ -1788,6 +1903,65 @@ void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) { } } +bool RISCVInsertVSETVLI::insertVSETMTK(MachineBasicBlock &MBB, + TKTMMode Mode) const { + + bool Changed = false; + for (auto &MI : MBB) { + uint64_t TSFlags = MI.getDesc().TSFlags; + if (RISCVInstrInfo::isXSfmmVectorConfigTMTKInstr(MI) || + !RISCVII::hasSEWOp(TSFlags) || !RISCVII::hasTWidenOp(TSFlags)) + continue; + + VSETVLIInfo CurrInfo = computeInfoForInstr(MI); + + if (Mode == VSETTK && !RISCVII::hasTKOp(TSFlags)) + continue; + + if (Mode == VSETTM && !RISCVII::hasTMOp(TSFlags)) + continue; + + unsigned OpNum = 0; + unsigned Opcode = 0; + switch (Mode) { + case VSETTK: + OpNum = RISCVII::getTKOpNum(MI.getDesc()); + Opcode = RISCV::PseudoSF_VSETTK; + break; + case VSETTM: + OpNum = RISCVII::getTMOpNum(MI.getDesc()); + Opcode = RISCV::PseudoSF_VSETTM; + break; + } + + assert(OpNum && Opcode && "Invalid OpNum or Opcode"); + + MachineOperand &Op = MI.getOperand(OpNum); + + auto TmpMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(Opcode)) + .addReg(RISCV::X0, RegState::Define | RegState::Dead) + .addReg(Op.getReg()) + .addImm(Log2_32(CurrInfo.getSEW())) + .addImm(Log2_32(CurrInfo.getTWiden()) + 1); + + Changed = true; + Register Reg = Op.getReg(); + Op.setReg(Register()); + Op.setIsKill(false); + if (LIS) { + LIS->InsertMachineInstrInMaps(*TmpMI); + LiveInterval &LI = LIS->getInterval(Reg); + + // Erase the AVL operand from the instruction. + LIS->shrinkToUses(&LI); + // TODO: Enable this once needVSETVLIPHI is supported. + // SmallVector<LiveInterval *> SplitLIs; + // LIS->splitSeparateComponents(LI, SplitLIs); + } + } + return Changed; +} + bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { // Skip if the vector extension is not enabled. ST = &MF.getSubtarget<RISCVSubtarget>(); @@ -1865,6 +2039,11 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock &MBB : MF) insertReadVL(MBB); + for (MachineBasicBlock &MBB : MF) { + insertVSETMTK(MBB, VSETTM); + insertVSETMTK(MBB, VSETTK); + } + BlockInfo.clear(); return HaveVectorOp; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td index 2afd77a..fee1d15 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td +++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td @@ -177,7 +177,7 @@ def EltDepsVL : EltDeps<vl=1, mask=0>; def EltDepsMask : EltDeps<vl=0, mask=1>; def EltDepsVLMask : EltDeps<vl=1, mask=1>; -class EEW <bits<2> val> { +class EEW<bits<2> val> { bits<2> Value = val; } def EEW1 : EEW<0>; @@ -185,6 +185,13 @@ def EEWSEWx1 : EEW<1>; def EEWSEWx2 : EEW<2>; def EEWSEWx4 : EEW<3>; +class AltFmtType<bits<2> val> { + bits<2> Value = val; +} +def DONT_CARE_ALTFMT : AltFmtType<0>; +def IS_NOT_ALTFMT : AltFmtType<1>; +def IS_ALTFMT : AltFmtType<2>; + class RVInstCommon<dag outs, dag ins, string opcodestr, string argstr, list<dag> pattern, InstFormat format> : Instruction { let Namespace = "RISCV"; @@ -267,6 +274,22 @@ class RVInstCommon<dag outs, dag ins, string opcodestr, string argstr, // operands' VLs. bit ReadsPastVL = 0; let TSFlags{26} = ReadsPastVL; + + // 0 -> Don't care about altfmt bit in VTYPE. + // 1 -> Is not altfmt. + // 2 -> Is altfmt(BF16). + AltFmtType AltFmtType = DONT_CARE_ALTFMT; + let TSFlags{28-27} = AltFmtType.Value; + + // XSfmmbase + bit HasTWidenOp = 0; + let TSFlags{29} = HasTWidenOp; + + bit HasTmOp = 0; + let TSFlags{30} = HasTmOp; + + bit HasTkOp = 0; + let TSFlags{31} = HasTkOp; } class RVInst<dag outs, dag ins, string opcodestr, string argstr, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 96e1078..ddb53a2 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -3005,6 +3005,9 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI, else Ok = RISCVFPRndMode::isValidRoundingMode(Imm); break; + case RISCVOp::OPERAND_XSFMM_VTYPE: + Ok = RISCVVType::isValidXSfmmVType(Imm); + break; } if (!Ok) { ErrInfo = "Invalid immediate"; @@ -3670,6 +3673,11 @@ std::string RISCVInstrInfo::createMIROperandComment( RISCVVType::printVType(Imm, OS); break; } + case RISCVOp::OPERAND_XSFMM_VTYPE: { + unsigned Imm = Op.getImm(); + RISCVVType::printXSfmmVType(Imm, OS); + break; + } case RISCVOp::OPERAND_SEW: case RISCVOp::OPERAND_SEW_MASK: { unsigned Log2SEW = Op.getImm(); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 298d35a..65865ce 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -159,7 +159,8 @@ class PseudoToVInst<string PseudoInst> { ["_M4", ""], ["_M8", ""], ["_SE", ""], - ["_RM", ""] + ["_RM", ""], + ["_ALT", ""] ]; string VInst = !foldl(PseudoInst, AffixSubsts, Acc, AffixSubst, !subst(AffixSubst[0], AffixSubst[1], Acc)); @@ -6396,7 +6397,7 @@ let Defs = [VXSAT] in { // 13. Vector Floating-Point Instructions //===----------------------------------------------------------------------===// -let Predicates = [HasVInstructionsAnyF] in { +let Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT in { //===----------------------------------------------------------------------===// // 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions //===----------------------------------------------------------------------===// @@ -6565,7 +6566,7 @@ defm PseudoVFNCVT_F_F : VPseudoVNCVTD_W_RM; defm PseudoVFNCVT_ROD_F_F : VPseudoVNCVTD_W; } // mayRaiseFPException = true -} // Predicates = [HasVInstructionsAnyF] +} // Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT //===----------------------------------------------------------------------===// // 14. Vector Reduction Operations @@ -6593,7 +6594,7 @@ defm PseudoVWREDSUM : VPseudoVWRED_VS; } } // Predicates = [HasVInstructions] -let Predicates = [HasVInstructionsAnyF] in { +let Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT in { //===----------------------------------------------------------------------===// // 14.3. Vector Single-Width Floating-Point Reduction Instructions //===----------------------------------------------------------------------===// @@ -6612,7 +6613,7 @@ defm PseudoVFWREDUSUM : VPseudoVFWRED_VS_RM; defm PseudoVFWREDOSUM : VPseudoVFWREDO_VS_RM; } -} // Predicates = [HasVInstructionsAnyF] +} // Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT //===----------------------------------------------------------------------===// // 15. Vector Mask Instructions @@ -6703,7 +6704,7 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { // 16.2. Floating-Point Scalar Move Instructions //===----------------------------------------------------------------------===// -let Predicates = [HasVInstructionsAnyF] in { +let Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT in { let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { foreach f = FPList in { let HasSEWOp = 1, BaseInstr = VFMV_F_S in @@ -6718,7 +6719,7 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { Sched<[WriteVMovSF, ReadVMovSF_V, ReadVMovSF_F]>; } } -} // Predicates = [HasVInstructionsAnyF] +} // Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT //===----------------------------------------------------------------------===// // 16.3. Vector Slide Instructions @@ -6730,10 +6731,10 @@ let Predicates = [HasVInstructions] in { defm PseudoVSLIDE1DOWN : VPseudoVSLD1_VX; } // Predicates = [HasVInstructions] -let Predicates = [HasVInstructionsAnyF] in { +let Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT in { defm PseudoVFSLIDE1UP : VPseudoVSLD1_VF<"@earlyclobber $rd">; defm PseudoVFSLIDE1DOWN : VPseudoVSLD1_VF; -} // Predicates = [HasVInstructionsAnyF] +} // Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT //===----------------------------------------------------------------------===// // 16.4. Vector Register Gather Instructions diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td index 557d873..6a4119a 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td @@ -438,8 +438,10 @@ let Predicates = [HasVendorXSfvcp] in { } foreach f = FPList in { foreach m = f.MxList in { - defm f.FX # "V" : VPseudoVC_XV<m, f.fprclass, payload1>; - defm f.FX # "VV" : VPseudoVC_XVV<m, f.fprclass, payload1>; + let AltFmtType = IS_NOT_ALTFMT in { + defm f.FX # "V" : VPseudoVC_XV<m, f.fprclass, payload1>; + defm f.FX # "VV" : VPseudoVC_XVV<m, f.fprclass, payload1>; + } } } foreach m = MxListW in { @@ -449,7 +451,8 @@ let Predicates = [HasVendorXSfvcp] in { } foreach f = FPListW in { foreach m = f.MxList in - defm f.FX # "VW" : VPseudoVC_XVW<m, f.fprclass, payload1>; + let AltFmtType = IS_NOT_ALTFMT in + defm f.FX # "VW" : VPseudoVC_XVW<m, f.fprclass, payload1>; } } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td index a5ee701..d77a44a 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td @@ -225,7 +225,7 @@ let Predicates = [HasVendorXSfmmbase] in { def SF_VSETTM : SFInstSetSingle<(outs GPR:$rd), (ins GPR:$rs1), 0b00001, "sf.vsettm", "$rd, $rs1">; def SF_VSETTK : SFInstSetSingle<(outs GPR:$rd), (ins GPR:$rs1), 0b00010, - "sf.vsettk", "$rd, $rs1">; + "sf.vsettk", "$rd, $rs1">; def SF_VTDISCARD : SFInstVtDiscard<"sf.vtdiscard">; def SF_VTMV_V_T : SFInstTileMoveOp<0b010000, (outs VR:$vd), (ins GPR:$rs1), @@ -277,3 +277,195 @@ let Uses = [FRM], mayRaiseFPException = true in { } // Predicates = [HasVendorXSfmm32a8f] } // DecoderNamespace = "XSfvector" + +class VPseudoSF_VTileLoad + : RISCVVPseudo<(outs), (ins GPR:$rs2, GPR:$rs1, AVL:$atn, ixlenimm:$sew, + ixlenimm:$twiden)> { + let mayLoad = 1; + let mayStore = 0; + let HasVLOp = 1; // Tn + let HasSEWOp = 1; + let HasTWidenOp = 1; + let hasSideEffects = 1; +} + +class VPseudoSF_VTileStore + : RISCVVPseudo<(outs), (ins GPR:$rs2, GPR:$rs1, AVL:$atn, ixlenimm:$sew, + ixlenimm:$twiden)> { + let mayLoad = 0; + let mayStore = 1; + let HasVLOp = 1; // Tn + let HasSEWOp = 1; + let HasTWidenOp = 1; + let hasSideEffects = 1; +} + +class VPseudoSF_VTileMove_V_T + : RISCVVPseudo<(outs VRM8:$vd), (ins GPR:$rs1, AVL:$atn, ixlenimm:$sew, + ixlenimm:$twiden)> { + let mayLoad = 0; + let mayStore = 0; + let HasVLOp = 1; // Tn + let HasSEWOp = 1; + let HasTWidenOp = 1; + let hasSideEffects = 1; +} + +class VPseudoSF_VTileMove_T_V + : RISCVVPseudo<(outs), (ins GPR:$rs1, VRM8:$vs2, AVL:$atn, ixlenimm:$sew, + ixlenimm:$twiden)> { + let mayLoad = 0; + let mayStore = 0; + let HasVLOp = 1; // Tn + let HasSEWOp = 1; + let HasTWidenOp = 1; + let hasSideEffects = 1; +} + +class VPseudoSF_MatMul<RegisterClass mtd_class> + : RISCVVPseudo<(outs), + (ins mtd_class:$rd, VRM8:$vs2, VRM8:$vs1, AVL:$atm, AVL:$atn, + AVL:$atk, ixlenimm:$sew, ixlenimm:$twiden)> { + let mayLoad = 0; + let mayStore = 0; + let HasTmOp = 1; + let HasVLOp = 1; // Tn + let HasTkOp = 1; + let HasSEWOp = 1; + let HasTWidenOp = 1; + let hasSideEffects = 1; +} + +class VPseudoSF_MatMul_FRM<RegisterClass mtd_class> + : RISCVVPseudo<(outs), + (ins mtd_class:$rd, VRM8:$vs2, VRM8:$vs1, ixlenimm:$frm, + AVL:$atm, AVL:$atn, AVL:$atk, ixlenimm:$sew, + ixlenimm:$twiden), []> { + let mayLoad = 0; + let mayStore = 0; + let HasTmOp = 1; + let HasVLOp = 1; // Tn + let HasTkOp = 1; + let HasSEWOp = 1; + let HasRoundModeOp = 1; + let hasPostISelHook = 1; + let HasTWidenOp = 1; + let hasSideEffects = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { +let Defs = [VL, VTYPE] in { + def PseudoSF_VSETTNT + : Pseudo<(outs GPR:$rd), + (ins GPRNoX0:$rs1, XSfmmVTypeOp:$vtypei), []>, + PseudoInstExpansion<(VSETVLI GPR:$rd, GPR:$rs1, VTypeIOp11:$vtypei)>, + Sched<[WriteVSETVLI, ReadVSETVLI]>; + def PseudoSF_VSETTNTX0 + : Pseudo<(outs GPRNoX0:$rd), + (ins GPRX0:$rs1, XSfmmVTypeOp:$vtypei), []>, + PseudoInstExpansion<(VSETVLI GPR:$rd, GPR:$rs1, VTypeIOp11:$vtypei)>, + Sched<[WriteVSETVLI, ReadVSETVLI]>; + def PseudoSF_VSETTNTX0X0 + : Pseudo<(outs GPRX0:$rd), + (ins GPRX0:$rs1, XSfmmVTypeOp:$vtypei), []>, + PseudoInstExpansion<(VSETVLI GPR:$rd, GPR:$rs1, VTypeIOp11:$vtypei)>, + Sched<[WriteVSETVLI, ReadVSETVLI]>; +} + +let Defs = [VTYPE], Uses = [VTYPE], HasTWidenOp = 1, HasSEWOp = 1 in { + def PseudoSF_VSETTM + : Pseudo<(outs GPR:$rd), + (ins GPR:$rs1, ixlenimm:$log2sew, ixlenimm:$twiden), []>, + PseudoInstExpansion<(SF_VSETTM GPR:$rd, GPR:$rs1)>, + Sched<[WriteVSETVLI, ReadVSETVLI]>; + def PseudoSF_VSETTK + : Pseudo<(outs GPR:$rd), + (ins GPR:$rs1, ixlenimm:$logwsew, ixlenimm:$twiden), []>, + PseudoInstExpansion<(SF_VSETTK GPR:$rd, GPR:$rs1)>, + Sched<[WriteVSETVLI, ReadVSETVLI]>; +} +} + +foreach eew = [8, 16, 32, 64] in { + def PseudoSF_VLTE # eew : VPseudoSF_VTileLoad; + def PseudoSF_VSTE # eew : VPseudoSF_VTileStore; +} + +def PseudoSF_VTMV_T_V : VPseudoSF_VTileMove_T_V; +def PseudoSF_VTMV_V_T : VPseudoSF_VTileMove_V_T; + +foreach a = I8Encodes in + foreach b = I8Encodes in + def PseudoSF_MM_ # !toupper(a.Name) # _ # !toupper(b.Name) + : VPseudoSF_MatMul<TRM4>; + +let AltFmtType = IS_NOT_ALTFMT in + def PseudoSF_MM_F_F : VPseudoSF_MatMul_FRM<TRM2>; +let AltFmtType = IS_ALTFMT in + def PseudoSF_MM_F_F_ALT : VPseudoSF_MatMul_FRM<TRM2>; + +foreach e1 = [5, 4] in + foreach e2 = [5, 4] in + def PseudoSF_MM_E # e1 # M # !sub(7, e1) # _E # e2 # M # !sub(7, e2) + : VPseudoSF_MatMul_FRM<TRM4>; + +let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in { + let HasVLOp = 1, HasTmOp = 1, HasTWidenOp = 1, HasSEWOp = 1 in + def PseudoSF_VTZERO_T + : RISCVVPseudo<(outs), + (ins TR:$rd, AVL:$atm, AVL:$atn, ixlenimm:$sew, + ixlenimm:$twiden)>; + def PseudoSF_VTDISCARD : RISCVVPseudo<(outs), (ins), []>; +} + +class VPatXSfmmTileStore<string intrinsic_name, + string inst_name, + int log2sew> : + Pat<(!cast<Intrinsic>(intrinsic_name) + (XLenVT GPR:$rs2), + (XLenVT GPR:$rs1), + (XLenVT AVL:$tn)), + (!cast<Instruction>(inst_name) + (XLenVT GPR:$rs2), + (XLenVT GPR:$rs1), + GPR:$tn, log2sew, 1)>; + +class VPatXSfmmTileMove_T_V<string intrinsic_name, + string inst_name, + ValueType reg_type, + int log2sew> : + Pat<(!cast<Intrinsic>(intrinsic_name) + (XLenVT GPR:$rs1), + (reg_type VRM8:$vs2), + (XLenVT AVL:$atn)), + (!cast<Instruction>(inst_name) + (XLenVT GPR:$rs1), + (reg_type VRM8:$vs2), + GPR:$atn, log2sew, 1)>; + +class VPatXSfmmTileMove_V_T<string intrinsic_name, + string inst_name, + ValueType result_type, + int log2sew> : + Pat<(result_type (!cast<Intrinsic>(intrinsic_name) + (XLenVT GPR:$rs1), + (XLenVT AVL:$atn))), + (!cast<Instruction>(inst_name) + (XLenVT GPR:$rs1), + GPR:$atn, log2sew, 1)>; + +class VPatXSfmmVTDiscard<string intrinsic_name, + string inst_name> : + Pat<(!cast<Intrinsic>(intrinsic_name)), + (!cast<Instruction>(inst_name))>; + +foreach eew = [8, 16, 32, 64] in + def : VPatXSfmmTileStore<"int_riscv_sf_vste" # eew, "PseudoSF_VSTE" # eew, !logtwo(eew)>; + +foreach vti = [VI8M8, VI16M8, VI32M8, VI64M8, VF16M8, VF32M8, VF64M8, VBF16M8] in { + def : VPatXSfmmTileMove_T_V<"int_riscv_sf_vtmv_t_v", "PseudoSF_VTMV_T_V", vti.Vector, vti.Log2SEW>; + def : VPatXSfmmTileMove_V_T<"int_riscv_sf_vtmv_v_t", "PseudoSF_VTMV_V_T", vti.Vector, vti.Log2SEW>; +} + +def : VPatXSfmmVTDiscard<"int_riscv_sf_vtdiscard", "PseudoSF_VTDISCARD">; diff --git a/llvm/lib/Target/RISCV/RISCVInstrPredicates.td b/llvm/lib/Target/RISCV/RISCVInstrPredicates.td index 3658817..dcae977 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrPredicates.td +++ b/llvm/lib/Target/RISCV/RISCVInstrPredicates.td @@ -78,7 +78,41 @@ def isVectorConfigInstr PseudoVSETVLI, PseudoVSETVLIX0, PseudoVSETVLIX0X0, - PseudoVSETIVLI + PseudoVSETIVLI, + PseudoSF_VSETTNT, + PseudoSF_VSETTNTX0, + PseudoSF_VSETTNTX0X0 + ]>>>; + +// Returns true if this is a PseudoSF_VSETTNT* instructions. +def isXSfmmVectorConfigTNInstr + : TIIPredicate<"isXSfmmVectorConfigTNInstr", + MCReturnStatement< + CheckOpcode<[ + PseudoSF_VSETTNT, + PseudoSF_VSETTNTX0, + PseudoSF_VSETTNTX0X0 + ]>>>; + +// Returns true if this is PseudoSF_VSETTM or PseudoSF_VSETTK. +def isXSfmmVectorConfigTMTKInstr + : TIIPredicate<"isXSfmmVectorConfigTMTKInstr", + MCReturnStatement< + CheckOpcode<[ + PseudoSF_VSETTM, + PseudoSF_VSETTK + ]>>>; + +// Returns true if this is a XSfmm vector configuration instruction. +def isXSfmmVectorConfigInstr + : TIIPredicate<"isXSfmmVectorConfigInstr", + MCReturnStatement< + CheckOpcode<[ + PseudoSF_VSETTNT, + PseudoSF_VSETTNTX0, + PseudoSF_VSETTNTX0X0, + PseudoSF_VSETTM, + PseudoSF_VSETTK ]>>>; // Return true if this is 'vsetvli x0, x0, vtype' which preserves diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index 40b6416..e9f43b9 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -178,6 +178,10 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const { // Shadow stack pointer. markSuperRegs(Reserved, RISCV::SSP); + // XSfmmbase + for (MCPhysReg Reg = RISCV::T0; Reg <= RISCV::T15; Reg++) + markSuperRegs(Reserved, Reg); + assert(checkAllSuperRegsMarked(Reserved)); return Reserved; } diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index 96ad5c6..0a8838c 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -156,13 +156,13 @@ FunctionPass *llvm::createRISCVVLOptimizerPass() { return new RISCVVLOptimizer(); } -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] static raw_ostream &operator<<(raw_ostream &OS, const OperandInfo &OI) { OI.print(OS); return OS; } -LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] static raw_ostream &operator<<(raw_ostream &OS, const std::optional<OperandInfo> &OI) { if (OI) diff --git a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp index b765fec..56a6168 100644 --- a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp @@ -139,8 +139,8 @@ void SPIRVAsmPrinter::emitEndOfAsmFile(Module &M) { // anymore. void SPIRVAsmPrinter::cleanUp(Module &M) { // Verifier disallows uses of intrinsic global variables. - for (StringRef GVName : {"llvm.global_ctors", "llvm.global_dtors", - "llvm.used", "llvm.compiler.used"}) { + for (StringRef GVName : + {"llvm.global_ctors", "llvm.global_dtors", "llvm.used"}) { if (GlobalVariable *GV = M.getNamedGlobal(GVName)) GV->setName(""); } diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp index e16c8f0..c6c6182 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp @@ -17,6 +17,7 @@ #include "SPIRVTargetMachine.h" #include "SPIRVUtils.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringSet.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/InstVisitor.h" @@ -2028,9 +2029,13 @@ Instruction *SPIRVEmitIntrinsics::visitUnreachableInst(UnreachableInst &I) { void SPIRVEmitIntrinsics::processGlobalValue(GlobalVariable &GV, IRBuilder<> &B) { - // Skip special artifical variable llvm.global.annotations. - if (GV.getName() == "llvm.global.annotations") + // Skip special artificial variables. + static const StringSet<> ArtificialGlobals{"llvm.global.annotations", + "llvm.compiler.used"}; + + if (ArtificialGlobals.contains(GV.getName())) return; + Constant *Init = nullptr; if (hasInitializer(&GV)) { // Deduce element type and store results in Global Registry. diff --git a/llvm/lib/Target/SystemZ/SystemZ.h b/llvm/lib/Target/SystemZ/SystemZ.h index a0cf881..5a06ea3 100644 --- a/llvm/lib/Target/SystemZ/SystemZ.h +++ b/llvm/lib/Target/SystemZ/SystemZ.h @@ -24,6 +24,7 @@ class SystemZTargetMachine; namespace SystemZ { // Condition-code mask values. +const unsigned CCMASK_NONE = 0; const unsigned CCMASK_0 = 1 << 3; const unsigned CCMASK_1 = 1 << 2; const unsigned CCMASK_2 = 1 << 1; diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 3b7d11a..de28faf 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -15,6 +15,7 @@ #include "SystemZConstantPoolValue.h" #include "SystemZMachineFunctionInfo.h" #include "SystemZTargetMachine.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -24,6 +25,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsS390.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" @@ -1514,6 +1516,9 @@ SystemZTargetLowering::getConstraintType(StringRef Constraint) const { default: break; } + } else if (Constraint.size() == 5 && Constraint.starts_with("{")) { + if (StringRef("{@cc}").compare(Constraint) == 0) + return C_Other; } return TargetLowering::getConstraintType(Constraint); } @@ -1707,6 +1712,10 @@ SystemZTargetLowering::getRegForInlineAsmConstraint( return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass, SystemZMC::VR128Regs, 32); } + if (Constraint[1] == '@') { + if (StringRef("{@cc}").compare(Constraint) == 0) + return std::make_pair(0u, &SystemZ::GR32BitRegClass); + } } return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); } @@ -1737,6 +1746,38 @@ Register SystemZTargetLowering::getExceptionSelectorRegister( return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D; } +// Convert condition code in CCReg to an i32 value. +static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) { + SDLoc DL(CCReg); + SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg); + return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, + DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32)); +} + +// Lower @cc targets via setcc. +SDValue SystemZTargetLowering::LowerAsmOutputForConstraint( + SDValue &Chain, SDValue &Glue, const SDLoc &DL, + const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const { + if (StringRef("{@cc}").compare(OpInfo.ConstraintCode) != 0) + return SDValue(); + + // Check that return type is valid. + if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() || + OpInfo.ConstraintVT.getSizeInBits() < 8) + report_fatal_error("Glue output operand is of invalid type"); + + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + MRI.addLiveIn(SystemZ::CC); + + if (Glue.getNode()) { + Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32, Glue); + Chain = Glue.getValue(1); + } else + Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32); + return getCCResult(DAG, Glue); +} + void SystemZTargetLowering::LowerAsmOperandForConstraint( SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops, SelectionDAG &DAG) const { @@ -5300,14 +5341,6 @@ SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, Node->getMemoryVT(), Node->getMemOperand()); } -// Convert condition code in CCReg to an i32 value. -static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) { - SDLoc DL(CCReg); - SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg); - return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, - DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32)); -} - SDValue SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const { @@ -8723,95 +8756,247 @@ SDValue SystemZTargetLowering::combineSETCC( return SDValue(); } -static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { +static std::pair<SDValue, int> findCCUse(const SDValue &Val) { + switch (Val.getOpcode()) { + default: + return std::make_pair(SDValue(), SystemZ::CCMASK_NONE); + case SystemZISD::IPM: + if (Val.getOperand(0).getOpcode() == SystemZISD::CLC || + Val.getOperand(0).getOpcode() == SystemZISD::STRCMP) + return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ICMP); + return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ANY); + case SystemZISD::SELECT_CCMASK: { + SDValue Op4CCReg = Val.getOperand(4); + if (Op4CCReg.getOpcode() == SystemZISD::ICMP || + Op4CCReg.getOpcode() == SystemZISD::TM) { + auto [OpCC, OpCCValid] = findCCUse(Op4CCReg.getOperand(0)); + if (OpCC != SDValue()) + return std::make_pair(OpCC, OpCCValid); + } + auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2)); + if (!CCValid) + return std::make_pair(SDValue(), SystemZ::CCMASK_NONE); + int CCValidVal = CCValid->getZExtValue(); + return std::make_pair(Op4CCReg, CCValidVal); + } + case ISD::ADD: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + auto [Op0CC, Op0CCValid] = findCCUse(Val.getOperand(0)); + if (Op0CC != SDValue()) + return std::make_pair(Op0CC, Op0CCValid); + return findCCUse(Val.getOperand(1)); + } +} + +static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask, + SelectionDAG &DAG); + +SmallVector<SDValue, 4> static simplifyAssumingCCVal(SDValue &Val, SDValue &CC, + SelectionDAG &DAG) { + SDLoc DL(Val); + auto Opcode = Val.getOpcode(); + switch (Opcode) { + default: + return {}; + case ISD::Constant: + return {Val, Val, Val, Val}; + case SystemZISD::IPM: { + SDValue IPMOp0 = Val.getOperand(0); + if (IPMOp0 != CC) + return {}; + SmallVector<SDValue, 4> ShiftedCCVals; + for (auto CC : {0, 1, 2, 3}) + ShiftedCCVals.emplace_back( + DAG.getConstant((CC << SystemZ::IPM_CC), DL, MVT::i32)); + return ShiftedCCVals; + } + case SystemZISD::SELECT_CCMASK: { + SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1); + auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2)); + auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3)); + if (!CCValid || !CCMask) + return {}; + + int CCValidVal = CCValid->getZExtValue(); + int CCMaskVal = CCMask->getZExtValue(); + const auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CC, DAG); + const auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CC, DAG); + if (TrueSDVals.empty() || FalseSDVals.empty()) + return {}; + SDValue Op4CCReg = Val.getOperand(4); + if (Op4CCReg != CC) + combineCCMask(Op4CCReg, CCValidVal, CCMaskVal, DAG); + if (Op4CCReg != CC) + return {}; + SmallVector<SDValue, 4> MergedSDVals; + for (auto &CCVal : {0, 1, 2, 3}) + MergedSDVals.emplace_back(((CCMaskVal & (1 << (3 - CCVal))) != 0) + ? TrueSDVals[CCVal] + : FalseSDVals[CCVal]); + return MergedSDVals; + } + case ISD::ADD: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::SRA: + // Avoid introducing CC spills (because ADD/AND/OR/XOR/SRA + // would clobber CC). + if (!Val.hasOneUse()) + return {}; + [[fallthrough]]; + case ISD::SHL: + case ISD::SRL: + SDValue Op0 = Val.getOperand(0), Op1 = Val.getOperand(1); + const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, CC, DAG); + const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, CC, DAG); + if (Op0SDVals.empty() || Op1SDVals.empty()) + return {}; + SmallVector<SDValue, 4> BinaryOpSDVals; + for (auto CCVal : {0, 1, 2, 3}) + BinaryOpSDVals.emplace_back(DAG.getNode( + Opcode, DL, Val.getValueType(), Op0SDVals[CCVal], Op1SDVals[CCVal])); + return BinaryOpSDVals; + } +} + +static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask, + SelectionDAG &DAG) { // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code // set by the CCReg instruction using the CCValid / CCMask masks, - // If the CCReg instruction is itself a ICMP testing the condition + // If the CCReg instruction is itself a ICMP / TM testing the condition // code set by some other instruction, see whether we can directly // use that condition code. - - // Verify that we have an ICMP against some constant. - if (CCValid != SystemZ::CCMASK_ICMP) - return false; - auto *ICmp = CCReg.getNode(); - if (ICmp->getOpcode() != SystemZISD::ICMP) - return false; - auto *CompareLHS = ICmp->getOperand(0).getNode(); - auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1)); - if (!CompareRHS) + auto *CCNode = CCReg.getNode(); + if (!CCNode) return false; - // Optimize the case where CompareLHS is a SELECT_CCMASK. - if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) { - // Verify that we have an appropriate mask for a EQ or NE comparison. - bool Invert = false; - if (CCMask == SystemZ::CCMASK_CMP_NE) - Invert = !Invert; - else if (CCMask != SystemZ::CCMASK_CMP_EQ) + if (CCNode->getOpcode() == SystemZISD::TM) { + if (CCValid != SystemZ::CCMASK_TM) return false; - - // Verify that the ICMP compares against one of select values. - auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0)); - if (!TrueVal) - return false; - auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1)); - if (!FalseVal) + auto emulateTMCCMask = [](const SDValue &Op0Val, const SDValue &Op1Val) { + auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode()); + auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode()); + if (!Op0Node || !Op1Node) + return -1; + auto Op0APVal = Op0Node->getAPIntValue(); + auto Op1APVal = Op1Node->getAPIntValue(); + auto Result = Op0APVal & Op1APVal; + bool AllOnes = Result == Op1APVal; + bool AllZeros = Result == 0; + bool IsLeftMostBitSet = Result[Op1APVal.getActiveBits()] != 0; + return AllZeros ? 0 : AllOnes ? 3 : IsLeftMostBitSet ? 2 : 1; + }; + SDValue Op0 = CCNode->getOperand(0); + SDValue Op1 = CCNode->getOperand(1); + auto [Op0CC, Op0CCValid] = findCCUse(Op0); + if (Op0CC == SDValue()) return false; - if (CompareRHS->getAPIntValue() == FalseVal->getAPIntValue()) - Invert = !Invert; - else if (CompareRHS->getAPIntValue() != TrueVal->getAPIntValue()) + const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, Op0CC, DAG); + const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, Op0CC, DAG); + if (Op0SDVals.empty() || Op1SDVals.empty()) return false; - - // Compute the effective CC mask for the new branch or select. - auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2)); - auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3)); - if (!NewCCValid || !NewCCMask) - return false; - CCValid = NewCCValid->getZExtValue(); - CCMask = NewCCMask->getZExtValue(); - if (Invert) - CCMask ^= CCValid; - - // Return the updated CCReg link. - CCReg = CompareLHS->getOperand(4); + int NewCCMask = 0; + for (auto CC : {0, 1, 2, 3}) { + auto CCVal = emulateTMCCMask(Op0SDVals[CC], Op1SDVals[CC]); + if (CCVal < 0) + return false; + NewCCMask <<= 1; + NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0; + } + NewCCMask &= Op0CCValid; + CCReg = Op0CC; + CCMask = NewCCMask; + CCValid = Op0CCValid; return true; } + if (CCNode->getOpcode() != SystemZISD::ICMP || + CCValid != SystemZ::CCMASK_ICMP) + return false; - // Optimize the case where CompareRHS is (SRA (SHL (IPM))). - if (CompareLHS->getOpcode() == ISD::SRA) { - auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1)); - if (!SRACount || SRACount->getZExtValue() != 30) - return false; - auto *SHL = CompareLHS->getOperand(0).getNode(); - if (SHL->getOpcode() != ISD::SHL) - return false; - auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1)); - if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC) - return false; - auto *IPM = SHL->getOperand(0).getNode(); - if (IPM->getOpcode() != SystemZISD::IPM) - return false; - - // Avoid introducing CC spills (because SRA would clobber CC). - if (!CompareLHS->hasOneUse()) - return false; - // Verify that the ICMP compares against zero. - if (CompareRHS->getZExtValue() != 0) + SDValue CmpOp0 = CCNode->getOperand(0); + SDValue CmpOp1 = CCNode->getOperand(1); + SDValue CmpOp2 = CCNode->getOperand(2); + auto [Op0CC, Op0CCValid] = findCCUse(CmpOp0); + if (Op0CC != SDValue()) { + const auto &&Op0SDVals = simplifyAssumingCCVal(CmpOp0, Op0CC, DAG); + const auto &&Op1SDVals = simplifyAssumingCCVal(CmpOp1, Op0CC, DAG); + if (Op0SDVals.empty() || Op1SDVals.empty()) return false; - // Compute the effective CC mask for the new branch or select. - CCMask = SystemZ::reverseCCMask(CCMask); - - // Return the updated CCReg link. - CCReg = IPM->getOperand(0); + auto *CmpType = dyn_cast<ConstantSDNode>(CmpOp2); + auto CmpTypeVal = CmpType->getZExtValue(); + const auto compareCCSigned = [&CmpTypeVal](const SDValue &Op0Val, + const SDValue &Op1Val) { + auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode()); + auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode()); + if (!Op0Node || !Op1Node) + return -1; + auto Op0APVal = Op0Node->getAPIntValue(); + auto Op1APVal = Op1Node->getAPIntValue(); + if (CmpTypeVal == SystemZICMP::SignedOnly) + return Op0APVal == Op1APVal ? 0 : Op0APVal.slt(Op1APVal) ? 1 : 2; + return Op0APVal == Op1APVal ? 0 : Op0APVal.ult(Op1APVal) ? 1 : 2; + }; + int NewCCMask = 0; + for (auto CC : {0, 1, 2, 3}) { + auto CCVal = compareCCSigned(Op0SDVals[CC], Op1SDVals[CC]); + if (CCVal < 0) + return false; + NewCCMask <<= 1; + NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0; + } + NewCCMask &= Op0CCValid; + CCMask = NewCCMask; + CCReg = Op0CC; + CCValid = Op0CCValid; return true; } return false; } -SDValue SystemZTargetLowering::combineBR_CCMASK( - SDNode *N, DAGCombinerInfo &DCI) const { +// Merging versus split in multiple branches cost. +TargetLoweringBase::CondMergingParams +SystemZTargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc, + const Value *Lhs, + const Value *Rhs) const { + const auto isFlagOutOpCC = [](const Value *V) { + using namespace llvm::PatternMatch; + const Value *RHSVal; + const APInt *RHSC; + if (const auto *I = dyn_cast<Instruction>(V)) { + // PatternMatch.h provides concise tree-based pattern match of llvm IR. + if (match(I->getOperand(0), m_And(m_Value(RHSVal), m_APInt(RHSC))) || + match(I, m_Cmp(m_Value(RHSVal), m_APInt(RHSC)))) { + if (const auto *CB = dyn_cast<CallBase>(RHSVal)) { + if (CB->isInlineAsm()) { + const InlineAsm *IA = cast<InlineAsm>(CB->getCalledOperand()); + return IA && + IA->getConstraintString().find("{@cc}") != std::string::npos; + } + } + } + } + return false; + }; + // Pattern (ICmp %asm) or (ICmp (And %asm)). + // Cost of longest dependency chain (ICmp, And) is 2. CostThreshold or + // BaseCost can be set >=2. If cost of instruction <= CostThreshold + // conditionals will be merged or else conditionals will be split. + if (isFlagOutOpCC(Lhs) && isFlagOutOpCC(Rhs)) + return {3, 0, -1}; + // Default. + return {-1, -1, -1}; +} + +SDValue SystemZTargetLowering::combineBR_CCMASK(SDNode *N, + DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK. @@ -8824,8 +9009,7 @@ SDValue SystemZTargetLowering::combineBR_CCMASK( int CCMaskVal = CCMask->getZExtValue(); SDValue Chain = N->getOperand(0); SDValue CCReg = N->getOperand(4); - - if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) + if (combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG)) return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0), Chain, DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), @@ -8848,16 +9032,80 @@ SDValue SystemZTargetLowering::combineSELECT_CCMASK( int CCMaskVal = CCMask->getZExtValue(); SDValue CCReg = N->getOperand(4); - if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) - return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), - N->getOperand(0), N->getOperand(1), - DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), - DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), - CCReg); + bool IsCombinedCCReg = combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG); + + // Populate SDVals vector for each condition code ccval for given Val, which + // can again be another nested select_ccmask with the same CC. + const auto constructCCSDValsFromSELECT = [&CCReg](SDValue &Val) { + if (Val.getOpcode() == SystemZISD::SELECT_CCMASK) { + SmallVector<SDValue, 4> Res; + if (Val.getOperand(4) != CCReg) + return SmallVector<SDValue, 4>{}; + SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1); + auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3)); + if (!CCMask) + return SmallVector<SDValue, 4>{}; + + int CCMaskVal = CCMask->getZExtValue(); + for (auto &CC : {0, 1, 2, 3}) + Res.emplace_back(((CCMaskVal & (1 << (3 - CC))) != 0) ? TrueVal + : FalseVal); + return Res; + } + return SmallVector<SDValue, 4>{Val, Val, Val, Val}; + }; + // Attempting to optimize TrueVal/FalseVal in outermost select_ccmask either + // with CCReg found by combineCCMask or original CCReg. + SDValue TrueVal = N->getOperand(0); + SDValue FalseVal = N->getOperand(1); + auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CCReg, DAG); + auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CCReg, DAG); + // TrueSDVals/FalseSDVals might be empty in case of non-constant + // TrueVal/FalseVal for select_ccmask, which can not be optimized further. + if (TrueSDVals.empty()) + TrueSDVals = constructCCSDValsFromSELECT(TrueVal); + if (FalseSDVals.empty()) + FalseSDVals = constructCCSDValsFromSELECT(FalseVal); + if (!TrueSDVals.empty() && !FalseSDVals.empty()) { + SmallSet<SDValue, 4> MergedSDValsSet; + // Ignoring CC values outside CCValiid. + for (auto CC : {0, 1, 2, 3}) { + if ((CCValidVal & ((1 << (3 - CC)))) != 0) + MergedSDValsSet.insert(((CCMaskVal & (1 << (3 - CC))) != 0) + ? TrueSDVals[CC] + : FalseSDVals[CC]); + } + if (MergedSDValsSet.size() == 1) + return *MergedSDValsSet.begin(); + if (MergedSDValsSet.size() == 2) { + auto BeginIt = MergedSDValsSet.begin(); + SDValue NewTrueVal = *BeginIt, NewFalseVal = *next(BeginIt); + if (NewTrueVal == FalseVal || NewFalseVal == TrueVal) + std::swap(NewTrueVal, NewFalseVal); + int NewCCMask = 0; + for (auto CC : {0, 1, 2, 3}) { + NewCCMask <<= 1; + NewCCMask |= ((CCMaskVal & (1 << (3 - CC))) != 0) + ? (TrueSDVals[CC] == NewTrueVal) + : (FalseSDVals[CC] == NewTrueVal); + } + CCMaskVal = NewCCMask; + CCMaskVal &= CCValidVal; + TrueVal = NewTrueVal; + FalseVal = NewFalseVal; + IsCombinedCCReg = true; + } + } + + if (IsCombinedCCReg) + return DAG.getNode( + SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), TrueVal, + FalseVal, DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), + DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), CCReg); + return SDValue(); } - SDValue SystemZTargetLowering::combineGET_CCMASK( SDNode *N, DAGCombinerInfo &DCI) const { diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index f8706b7..d5b7603 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -533,6 +533,18 @@ public: } const char *getTargetNodeName(unsigned Opcode) const override; + + // This function currently returns cost for srl/ipm/cc sequence for merging. + CondMergingParams + getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs, + const Value *Rhs) const override; + + // Handle Lowering flag assembly outputs. + SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, + const SDLoc &DL, + const AsmOperandInfo &Constraint, + SelectionDAG &DAG) const override; + std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 6472334..47c24fc 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -317,6 +317,15 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Custom); } + if (Subtarget->hasFP16()) { + setOperationAction(ISD::FMA, MVT::v8f16, Legal); + } + + if (Subtarget->hasRelaxedSIMD()) { + setOperationAction(ISD::FMULADD, MVT::v4f32, Legal); + setOperationAction(ISD::FMULADD, MVT::v2f64, Legal); + } + // Partial MLA reductions. for (auto Op : {ISD::PARTIAL_REDUCE_SMLA, ISD::PARTIAL_REDUCE_UMLA}) { setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v16i8, Legal); @@ -1120,6 +1129,18 @@ WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const { return TargetLoweringBase::getPreferredVectorAction(VT); } +bool WebAssemblyTargetLowering::isFMAFasterThanFMulAndFAdd( + const MachineFunction &MF, EVT VT) const { + if (!Subtarget->hasFP16() || !VT.isVector()) + return false; + + EVT ScalarVT = VT.getScalarType(); + if (!ScalarVT.isSimple()) + return false; + + return ScalarVT.getSimpleVT().SimpleTy == MVT::f16; +} + bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts( SDValue Op, const TargetLoweringOpt &TLO) const { // ISel process runs DAGCombiner after legalization; this step is called diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h index b33a853..472ec67 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -81,6 +81,8 @@ private: TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override; + bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const override; SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const override; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 49af78b..0f6e1ca 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -1213,6 +1213,27 @@ defm EXTMUL_LOW_U : defm EXTMUL_HIGH_U : SIMDExtBinary<I64x2, extmul_high_u, "extmul_high_i32x4_u", 0xdf>; +// Pattern for i32x4.dot_i16x8_s +def : Pat< + (v4i32 (add + (wasm_shuffle + (v4i32 (extmul_low_s v8i16:$lhs, v8i16:$rhs)), + (v4i32 (extmul_high_s v8i16:$lhs, v8i16:$rhs)), + (i32 0), (i32 1), (i32 2), (i32 3), + (i32 8), (i32 9), (i32 10), (i32 11), + (i32 16), (i32 17), (i32 18), (i32 19), + (i32 24), (i32 25), (i32 26), (i32 27)), + (wasm_shuffle + (v4i32 (extmul_low_s v8i16:$lhs, v8i16:$rhs)), + (v4i32 (extmul_high_s v8i16:$lhs, v8i16:$rhs)), + (i32 4), (i32 5), (i32 6), (i32 7), + (i32 12), (i32 13), (i32 14), (i32 15), + (i32 20), (i32 21), (i32 22), (i32 23), + (i32 28), (i32 29), (i32 30), (i32 31))) + ), + (v4i32 (DOT v8i16:$lhs, v8i16:$rhs)) +>; + //===----------------------------------------------------------------------===// // Floating-point unary arithmetic //===----------------------------------------------------------------------===// @@ -1626,7 +1647,8 @@ defm "" : RelaxedConvert<I32x4, F64x2, int_wasm_relaxed_trunc_unsigned_zero, // Relaxed (Negative) Multiply-Add (madd/nmadd) //===----------------------------------------------------------------------===// -multiclass SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, list<Predicate> reqs> { +multiclass RELAXED_SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, + list<Predicate> reqs> { defm MADD_#vec : SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins), [(set (vec.vt V128:$dst), (int_wasm_relaxed_madd @@ -1640,16 +1662,46 @@ multiclass SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, list<Predicate> vec.prefix#".relaxed_nmadd\t$dst, $a, $b, $c", vec.prefix#".relaxed_nmadd", simdopS, reqs>; - def : Pat<(fadd_contract (vec.vt V128:$a), (fmul_contract (vec.vt V128:$b), (vec.vt V128:$c))), - (!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<[HasRelaxedSIMD]>; + def : Pat<(fadd_contract (fmul_contract (vec.vt V128:$a), (vec.vt V128:$b)), (vec.vt V128:$c)), + (!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>; + def : Pat<(fmuladd (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)), + (!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>; - def : Pat<(fsub_contract (vec.vt V128:$a), (fmul_contract (vec.vt V128:$b), (vec.vt V128:$c))), - (!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<[HasRelaxedSIMD]>; + def : Pat<(fsub_contract (vec.vt V128:$c), (fmul_contract (vec.vt V128:$a), (vec.vt V128:$b))), + (!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>; + def : Pat<(fmuladd (fneg (vec.vt V128:$a)), (vec.vt V128:$b), (vec.vt V128:$c)), + (!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>; } -defm "" : SIMDMADD<F32x4, 0x105, 0x106, [HasRelaxedSIMD]>; -defm "" : SIMDMADD<F64x2, 0x107, 0x108, [HasRelaxedSIMD]>; -defm "" : SIMDMADD<F16x8, 0x14e, 0x14f, [HasFP16]>; +defm "" : RELAXED_SIMDMADD<F32x4, 0x105, 0x106, [HasRelaxedSIMD]>; +defm "" : RELAXED_SIMDMADD<F64x2, 0x107, 0x108, [HasRelaxedSIMD]>; + +//===----------------------------------------------------------------------===// +// FP16 (Negative) Multiply-Add (madd/nmadd) +//===----------------------------------------------------------------------===// + +multiclass HALF_PRECISION_SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, + list<Predicate> reqs> { + defm MADD_#vec : + SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins), + [(set (vec.vt V128:$dst), (fma + (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))], + vec.prefix#".madd\t$dst, $a, $b, $c", + vec.prefix#".madd", simdopA, reqs>; + defm NMADD_#vec : + SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins), + [(set (vec.vt V128:$dst), (fma + (fneg (vec.vt V128:$a)), (vec.vt V128:$b), (vec.vt V128:$c)))], + vec.prefix#".nmadd\t$dst, $a, $b, $c", + vec.prefix#".nmadd", simdopS, reqs>; +} +defm "" : HALF_PRECISION_SIMDMADD<F16x8, 0x14e, 0x14f, [HasFP16]>; + +// TODO: I think separate intrinsics should be introduced for these FP16 operations. +def : Pat<(v8f16 (int_wasm_relaxed_madd (v8f16 V128:$a), (v8f16 V128:$b), (v8f16 V128:$c))), + (MADD_F16x8 V128:$a, V128:$b, V128:$c)>; +def : Pat<(v8f16 (int_wasm_relaxed_nmadd (v8f16 V128:$a), (v8f16 V128:$b), (v8f16 V128:$c))), + (NMADD_F16x8 V128:$a, V128:$b, V128:$c)>; //===----------------------------------------------------------------------===// // Laneselect diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp index 28fa2cd..b81641f 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp @@ -414,6 +414,8 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, getActionDefinitionsBuilder(G_SEXT_INREG).lower(); + getActionDefinitionsBuilder(G_IS_FPCLASS).lower(); + // fp constants getActionDefinitionsBuilder(G_FCONSTANT) .legalFor({s32, s64}) diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 3af8b3e..2bf016a 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -1335,9 +1335,8 @@ def ProcessorFeatures { !listconcat(ARLFeatures, ARLSAdditionalFeatures); // Pantherlake - list<SubtargetFeature> PTLAdditionalFeatures = [FeaturePREFETCHI]; list<SubtargetFeature> PTLFeatures = - !listremove(!listconcat(ARLSFeatures, PTLAdditionalFeatures), [FeatureWIDEKL]); + !listremove(ARLSFeatures, [FeatureWIDEKL]); // Clearwaterforest diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index eea84a2..c32b1a6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3624,6 +3624,16 @@ X86TargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc, match(Lhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value())) && match(Rhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value()))) BaseCost += 1; + + // For OR conditions with EQ comparisons, prefer splitting into branches + // (unless CCMP is available). OR+EQ cannot be optimized via bitwise ops, + // unlike OR+NE which becomes (P|Q)!=0. Similarly, don't split signed + // comparisons (SLT, SGT) that can be optimized. + if (BaseCost >= 0 && !Subtarget.hasCCMP() && Opc == Instruction::Or && + match(Lhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value())) && + match(Rhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value()))) + return {-1, -1, -1}; + return {BaseCost, BrMergingLikelyBias.getValue(), BrMergingUnlikelyBias.getValue()}; } @@ -3787,7 +3797,7 @@ static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) { /// Return true if every element in Mask, is an in-place blend/select mask or is /// undef. -LLVM_ATTRIBUTE_UNUSED static bool isBlendOrUndef(ArrayRef<int> Mask) { +[[maybe_unused]] static bool isBlendOrUndef(ArrayRef<int> Mask) { unsigned NumElts = Mask.size(); for (auto [I, M] : enumerate(Mask)) if (!isUndefOrEqual(M, I) && !isUndefOrEqual(M, I + NumElts)) @@ -8096,7 +8106,7 @@ static SDValue LowerBUILD_VECTORvXi1(SDValue Op, const SDLoc &dl, return DstVec; } -LLVM_ATTRIBUTE_UNUSED static bool isHorizOp(unsigned Opcode) { +[[maybe_unused]] static bool isHorizOp(unsigned Opcode) { switch (Opcode) { case X86ISD::PACKSS: case X86ISD::PACKUS: @@ -20813,7 +20823,7 @@ SDValue X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, // for DAG type consistency we have to match the FP operand type. APFloat Thresh(APFloat::IEEEsingle(), APInt(32, 0x5f000000)); - LLVM_ATTRIBUTE_UNUSED APFloat::opStatus Status = APFloat::opOK; + [[maybe_unused]] APFloat::opStatus Status = APFloat::opOK; bool LosesInfo = false; if (TheVT == MVT::f64) // The rounding mode is irrelevant as the conversion should be exact. @@ -22856,7 +22866,7 @@ static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, // be generated by the memcmp expansion pass with oversized integer compares // (see PR33325). bool IsOrXorXorTreeCCZero = isNullConstant(Y) && isOrXorXorTree(X); - if (isNullConstant(Y) && !IsOrXorXorTreeCCZero) + if (isNullConstant(Y) && OpSize == 128 && !IsOrXorXorTreeCCZero) return SDValue(); // Don't perform this combine if constructing the vector will be expensive. diff --git a/llvm/lib/TargetParser/RISCVTargetParser.cpp b/llvm/lib/TargetParser/RISCVTargetParser.cpp index acf8e4c..5ea63a9 100644 --- a/llvm/lib/TargetParser/RISCVTargetParser.cpp +++ b/llvm/lib/TargetParser/RISCVTargetParser.cpp @@ -228,6 +228,10 @@ void printVType(unsigned VType, raw_ostream &OS) { OS << ", mu"; } +void printXSfmmVType(unsigned VType, raw_ostream &OS) { + OS << "e" << getSEW(VType) << ", w" << getXSfmmWiden(VType); +} + unsigned getSEWLMULRatio(unsigned SEW, VLMUL VLMul) { unsigned LMul; bool Fractional; diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp index edca7c1..1932a3a 100644 --- a/llvm/lib/TargetParser/X86TargetParser.cpp +++ b/llvm/lib/TargetParser/X86TargetParser.cpp @@ -175,7 +175,7 @@ constexpr FeatureBitset FeaturesArrowlakeS = FeaturesArrowlake | FeatureAVXVNNIINT16 | FeatureSHA512 | FeatureSM3 | FeatureSM4; constexpr FeatureBitset FeaturesPantherlake = - (FeaturesArrowlakeS ^ FeatureWIDEKL) | FeaturePREFETCHI; + (FeaturesArrowlakeS ^ FeatureWIDEKL); constexpr FeatureBitset FeaturesClearwaterforest = (FeaturesSierraforest ^ FeatureWIDEKL) | FeatureAVXVNNIINT16 | FeatureSHA512 | FeatureSM3 | FeatureSM4 | FeaturePREFETCHI | FeatureUSERMSR; diff --git a/llvm/lib/Transforms/Coroutines/CoroCloner.h b/llvm/lib/Transforms/Coroutines/CoroCloner.h index 26ec4f3..e05fe28 100644 --- a/llvm/lib/Transforms/Coroutines/CoroCloner.h +++ b/llvm/lib/Transforms/Coroutines/CoroCloner.h @@ -1,3 +1,4 @@ +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -19,9 +20,7 @@ #include "llvm/Transforms/Coroutines/CoroInstr.h" #include "llvm/Transforms/Utils/ValueMapper.h" -namespace llvm { - -namespace coro { +namespace llvm::coro { enum class CloneKind { /// The shared resume function for a switch lowering. @@ -149,8 +148,6 @@ public: } }; -} // end namespace coro - -} // end namespace llvm +} // end namespace llvm::coro #endif // LLVM_LIB_TRANSFORMS_COROUTINES_COROCLONER_H diff --git a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp index 471b9eb..cdb5852 100644 --- a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp @@ -38,7 +38,7 @@ public: AnyResumeFnPtrTy(PointerType::getUnqual(Context)) {} void lowerEarlyIntrinsics(Function &F); }; -} +} // namespace // Replace a direct call to coro.resume or coro.destroy with an indirect call to // an address returned by coro.subfn.addr intrinsic. This is done so that diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h index 52f4ffe..cc47a55 100644 --- a/llvm/lib/Transforms/Coroutines/CoroInternal.h +++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h @@ -16,11 +16,7 @@ #include "llvm/Transforms/Coroutines/CoroInstr.h" #include "llvm/Transforms/Coroutines/CoroShape.h" -namespace llvm { - -class CallGraph; - -namespace coro { +namespace llvm::coro { bool isSuspendBlock(BasicBlock *BB); bool declaresAnyIntrinsic(const Module &M); @@ -61,7 +57,6 @@ void normalizeCoroutine(Function &F, coro::Shape &Shape, CallInst *createMustTailCall(DebugLoc Loc, Function *MustTailCallFn, TargetTransformInfo &TTI, ArrayRef<Value *> Arguments, IRBuilder<> &); -} // End namespace coro. -} // End namespace llvm +} // End namespace llvm::coro #endif diff --git a/llvm/lib/Transforms/Coroutines/MaterializationUtils.cpp b/llvm/lib/Transforms/Coroutines/MaterializationUtils.cpp index 6aaabca..f2444da 100644 --- a/llvm/lib/Transforms/Coroutines/MaterializationUtils.cpp +++ b/llvm/lib/Transforms/Coroutines/MaterializationUtils.cpp @@ -137,8 +137,7 @@ struct RematGraph { } // namespace -namespace llvm { -template <> struct GraphTraits<RematGraph *> { +template <> struct llvm::GraphTraits<RematGraph *> { using NodeRef = RematGraph::RematNode *; using ChildIteratorType = RematGraph::RematNode **; @@ -149,8 +148,6 @@ template <> struct GraphTraits<RematGraph *> { static ChildIteratorType child_end(NodeRef N) { return N->Operands.end(); } }; -} // end namespace llvm - // For each instruction identified as materializable across the suspend point, // and its associated DAG of other rematerializable instructions, // recreate the DAG of instructions after the suspend point. diff --git a/llvm/lib/Transforms/Coroutines/SpillUtils.cpp b/llvm/lib/Transforms/Coroutines/SpillUtils.cpp index e474c07..81fe0c9 100644 --- a/llvm/lib/Transforms/Coroutines/SpillUtils.cpp +++ b/llvm/lib/Transforms/Coroutines/SpillUtils.cpp @@ -16,11 +16,8 @@ #include "llvm/IR/InstIterator.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -namespace llvm { - -namespace coro { - -namespace { +using namespace llvm; +using namespace llvm::coro; typedef SmallPtrSet<BasicBlock *, 8> VisitedBlocksSet; @@ -71,7 +68,7 @@ static bool isLocalAlloca(CoroAllocaAllocInst *AI) { /// This happens during the all-instructions iteration, so it must not /// delete the call. static Instruction * -lowerNonLocalAlloca(CoroAllocaAllocInst *AI, const coro::Shape &Shape, +lowerNonLocalAlloca(CoroAllocaAllocInst *AI, const Shape &Shape, SmallVectorImpl<Instruction *> &DeadInsts) { IRBuilder<> Builder(AI); auto Alloc = Shape.emitAlloc(Builder, AI->getSize(), nullptr); @@ -450,10 +447,8 @@ static void collectFrameAlloca(AllocaInst *AI, const coro::Shape &Shape, Visitor.getMayWriteBeforeCoroBegin()); } -} // namespace - -void collectSpillsFromArgs(SpillInfo &Spills, Function &F, - const SuspendCrossingInfo &Checker) { +void coro::collectSpillsFromArgs(SpillInfo &Spills, Function &F, + const SuspendCrossingInfo &Checker) { // Collect the spills for arguments and other not-materializable values. for (Argument &A : F.args()) for (User *U : A.users()) @@ -461,7 +456,7 @@ void collectSpillsFromArgs(SpillInfo &Spills, Function &F, Spills[&A].push_back(cast<Instruction>(U)); } -void collectSpillsAndAllocasFromInsts( +void coro::collectSpillsAndAllocasFromInsts( SpillInfo &Spills, SmallVector<AllocaInfo, 8> &Allocas, SmallVector<Instruction *, 4> &DeadInstructions, SmallVector<CoroAllocaAllocInst *, 4> &LocalAllocas, Function &F, @@ -516,8 +511,8 @@ void collectSpillsAndAllocasFromInsts( } } -void collectSpillsFromDbgInfo(SpillInfo &Spills, Function &F, - const SuspendCrossingInfo &Checker) { +void coro::collectSpillsFromDbgInfo(SpillInfo &Spills, Function &F, + const SuspendCrossingInfo &Checker) { // We don't want the layout of coroutine frame to be affected // by debug information. So we only choose to salvage dbg.values for // whose value is already in the frame. @@ -535,10 +530,9 @@ void collectSpillsFromDbgInfo(SpillInfo &Spills, Function &F, /// Async and Retcon{Once} conventions assume that all spill uses can be sunk /// after the coro.begin intrinsic. -void sinkSpillUsesAfterCoroBegin(const DominatorTree &Dom, - CoroBeginInst *CoroBegin, - coro::SpillInfo &Spills, - SmallVectorImpl<coro::AllocaInfo> &Allocas) { +void coro::sinkSpillUsesAfterCoroBegin( + const DominatorTree &Dom, CoroBeginInst *CoroBegin, coro::SpillInfo &Spills, + SmallVectorImpl<coro::AllocaInfo> &Allocas) { SmallSetVector<Instruction *, 32> ToMove; SmallVector<Instruction *, 32> Worklist; @@ -582,8 +576,9 @@ void sinkSpillUsesAfterCoroBegin(const DominatorTree &Dom, Inst->moveBefore(InsertPt->getIterator()); } -BasicBlock::iterator getSpillInsertionPt(const coro::Shape &Shape, Value *Def, - const DominatorTree &DT) { +BasicBlock::iterator coro::getSpillInsertionPt(const coro::Shape &Shape, + Value *Def, + const DominatorTree &DT) { BasicBlock::iterator InsertPt; if (auto *Arg = dyn_cast<Argument>(Def)) { // For arguments, we will place the store instruction right after @@ -625,7 +620,3 @@ BasicBlock::iterator getSpillInsertionPt(const coro::Shape &Shape, Value *Def, return InsertPt; } - -} // End namespace coro. - -} // End namespace llvm. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 7071876..943c223 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -471,7 +471,6 @@ private: Value *simplifyNonNullOperand(Value *V, bool HasDereferenceable, unsigned Depth = 0); -public: /// Create `select C, S1, S2`. Use only when the profile cannot be calculated /// from existing profile metadata: if the Function has profiles, this will /// set the profile of this select to "unknown". @@ -484,6 +483,7 @@ public: return Sel; } +public: /// Create and insert the idiom we use to indicate a block is unreachable /// without having to rewrite the CFG from within InstCombine. void CreateNonTerminatorUnreachable(Instruction *InsertAt) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 6b67b48..09cb225 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -2979,10 +2979,14 @@ Instruction *InstCombinerImpl::foldAndOrOfSelectUsingImpliedCond(Value *Op, "Op must be either i1 or vector of i1."); if (SI.getCondition()->getType() != Op->getType()) return nullptr; - if (Value *V = simplifyNestedSelectsUsingImpliedCond(SI, Op, IsAnd, DL)) - return SelectInst::Create(Op, - IsAnd ? V : ConstantInt::getTrue(Op->getType()), - IsAnd ? ConstantInt::getFalse(Op->getType()) : V); + if (Value *V = simplifyNestedSelectsUsingImpliedCond(SI, Op, IsAnd, DL)) { + Instruction *MDFrom = nullptr; + if (!ProfcheckDisableMetadataFixes) + MDFrom = &SI; + return SelectInst::Create( + Op, IsAnd ? V : ConstantInt::getTrue(Op->getType()), + IsAnd ? ConstantInt::getFalse(Op->getType()) : V, "", nullptr, MDFrom); + } return nullptr; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 63e24a0..a330bb7 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -110,8 +110,8 @@ static Value *simplifyShiftSelectingPackedElement(Instruction *I, ShrAmt->getName() + ".z"); // There is no existing !prof metadata we can derive the !prof metadata for // this select. - Value *Select = IC.createSelectInstWithUnknownProfile(ShrAmtZ, Lower, Upper); - IC.Builder.Insert(Select); + Value *Select = IC.Builder.CreateSelectWithUnknownProfile(ShrAmtZ, Lower, + Upper, DEBUG_TYPE); Select->takeName(I); return Select; } diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 82ac903..3f11cae 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1690,6 +1690,11 @@ Instruction *InstCombinerImpl::foldFBinOpOfIntCastsFromSign( // 2) (fp_binop ({s|u}itofp x), FpC) // -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC))) Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) { + // Don't perform the fold on vectors, as the integer operation may be much + // more expensive than the float operation in that case. + if (BO.getType()->isVectorTy()) + return nullptr; + std::array<Value *, 2> IntOps = {nullptr, nullptr}; Constant *Op1FpC = nullptr; // Check for: diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 860f8f7..511bca4 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -3337,7 +3337,7 @@ PHINode *FunctionStackPoisoner::createPHI(IRBuilder<> &IRB, Value *Cond, Value *ValueIfTrue, Instruction *ThenTerm, Value *ValueIfFalse) { - PHINode *PHI = IRB.CreatePHI(IntptrTy, 2); + PHINode *PHI = IRB.CreatePHI(ValueIfTrue->getType(), 2); BasicBlock *CondBlock = cast<Instruction>(Cond)->getParent(); PHI->addIncoming(ValueIfFalse, CondBlock); BasicBlock *ThenBlock = ThenTerm->getParent(); @@ -3360,7 +3360,7 @@ Value *FunctionStackPoisoner::createAllocaForLayout( assert((ClRealignStack & (ClRealignStack - 1)) == 0); uint64_t FrameAlignment = std::max(L.FrameAlignment, uint64_t(ClRealignStack)); Alloca->setAlignment(Align(FrameAlignment)); - return IRB.CreatePointerCast(Alloca, IntptrTy); + return Alloca; } void FunctionStackPoisoner::createDynamicAllocasInitStorage() { @@ -3572,10 +3572,12 @@ void FunctionStackPoisoner::processStaticAllocas() { DoDynamicAlloca &= !HasInlineAsm && !HasReturnsTwiceCall; DoStackMalloc &= !HasInlineAsm && !HasReturnsTwiceCall; + Type *PtrTy = F.getDataLayout().getAllocaPtrType(F.getContext()); Value *StaticAlloca = DoDynamicAlloca ? nullptr : createAllocaForLayout(IRB, L, false); - Value *FakeStack; + Value *FakeStackPtr; + Value *FakeStackInt; Value *LocalStackBase; Value *LocalStackBaseAlloca; uint8_t DIExprFlags = DIExpression::ApplyOffset; @@ -3603,20 +3605,21 @@ void FunctionStackPoisoner::processStaticAllocas() { RTCI.createRuntimeCall(IRBIf, AsanStackMallocFunc[StackMallocIdx], ConstantInt::get(IntptrTy, LocalStackSize)); IRB.SetInsertPoint(InsBefore); - FakeStack = createPHI(IRB, UseAfterReturnIsEnabled, FakeStackValue, Term, - ConstantInt::get(IntptrTy, 0)); + FakeStackInt = createPHI(IRB, UseAfterReturnIsEnabled, FakeStackValue, + Term, ConstantInt::get(IntptrTy, 0)); } else { // assert(ASan.UseAfterReturn == AsanDetectStackUseAfterReturnMode:Always) // void *FakeStack = __asan_stack_malloc_N(LocalStackSize); // void *LocalStackBase = (FakeStack) ? FakeStack : // alloca(LocalStackSize); StackMallocIdx = StackMallocSizeClass(LocalStackSize); - FakeStack = + FakeStackInt = RTCI.createRuntimeCall(IRB, AsanStackMallocFunc[StackMallocIdx], ConstantInt::get(IntptrTy, LocalStackSize)); } + FakeStackPtr = IRB.CreateIntToPtr(FakeStackInt, PtrTy); Value *NoFakeStack = - IRB.CreateICmpEQ(FakeStack, Constant::getNullValue(IntptrTy)); + IRB.CreateICmpEQ(FakeStackInt, Constant::getNullValue(IntptrTy)); Instruction *Term = SplitBlockAndInsertIfThen(NoFakeStack, InsBefore, false); IRBuilder<> IRBIf(Term); @@ -3624,67 +3627,53 @@ void FunctionStackPoisoner::processStaticAllocas() { DoDynamicAlloca ? createAllocaForLayout(IRBIf, L, true) : StaticAlloca; IRB.SetInsertPoint(InsBefore); - LocalStackBase = createPHI(IRB, NoFakeStack, AllocaValue, Term, FakeStack); + LocalStackBase = + createPHI(IRB, NoFakeStack, AllocaValue, Term, FakeStackPtr); IRB.CreateStore(LocalStackBase, LocalStackBaseAlloca); DIExprFlags |= DIExpression::DerefBefore; } else { // void *FakeStack = nullptr; // void *LocalStackBase = alloca(LocalStackSize); - FakeStack = ConstantInt::get(IntptrTy, 0); + FakeStackInt = Constant::getNullValue(IntptrTy); + FakeStackPtr = Constant::getNullValue(PtrTy); LocalStackBase = DoDynamicAlloca ? createAllocaForLayout(IRB, L, true) : StaticAlloca; LocalStackBaseAlloca = LocalStackBase; } - // It shouldn't matter whether we pass an `alloca` or a `ptrtoint` as the - // dbg.declare address opereand, but passing a `ptrtoint` seems to confuse - // later passes and can result in dropped variable coverage in debug info. - Value *LocalStackBaseAllocaPtr = - isa<PtrToIntInst>(LocalStackBaseAlloca) - ? cast<PtrToIntInst>(LocalStackBaseAlloca)->getPointerOperand() - : LocalStackBaseAlloca; - assert(isa<AllocaInst>(LocalStackBaseAllocaPtr) && - "Variable descriptions relative to ASan stack base will be dropped"); - // Replace Alloca instructions with base+offset. SmallVector<Value *> NewAllocaPtrs; for (const auto &Desc : SVD) { AllocaInst *AI = Desc.AI; - replaceDbgDeclare(AI, LocalStackBaseAllocaPtr, DIB, DIExprFlags, - Desc.Offset); - Value *NewAllocaPtr = IRB.CreateIntToPtr( - IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Desc.Offset)), - AI->getType()); + replaceDbgDeclare(AI, LocalStackBaseAlloca, DIB, DIExprFlags, Desc.Offset); + Value *NewAllocaPtr = IRB.CreatePtrAdd( + LocalStackBase, ConstantInt::get(IntptrTy, Desc.Offset)); AI->replaceAllUsesWith(NewAllocaPtr); NewAllocaPtrs.push_back(NewAllocaPtr); } // The left-most redzone has enough space for at least 4 pointers. // Write the Magic value to redzone[0]. - Value *BasePlus0 = IRB.CreateIntToPtr(LocalStackBase, IntptrPtrTy); IRB.CreateStore(ConstantInt::get(IntptrTy, kCurrentStackFrameMagic), - BasePlus0); + LocalStackBase); // Write the frame description constant to redzone[1]. - Value *BasePlus1 = IRB.CreateIntToPtr( - IRB.CreateAdd(LocalStackBase, - ConstantInt::get(IntptrTy, ASan.LongSize / 8)), - IntptrPtrTy); + Value *BasePlus1 = IRB.CreatePtrAdd( + LocalStackBase, ConstantInt::get(IntptrTy, ASan.LongSize / 8)); GlobalVariable *StackDescriptionGlobal = createPrivateGlobalForString(*F.getParent(), DescriptionString, /*AllowMerging*/ true, genName("stack")); Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal, IntptrTy); IRB.CreateStore(Description, BasePlus1); // Write the PC to redzone[2]. - Value *BasePlus2 = IRB.CreateIntToPtr( - IRB.CreateAdd(LocalStackBase, - ConstantInt::get(IntptrTy, 2 * ASan.LongSize / 8)), - IntptrPtrTy); + Value *BasePlus2 = IRB.CreatePtrAdd( + LocalStackBase, ConstantInt::get(IntptrTy, 2 * ASan.LongSize / 8)); IRB.CreateStore(IRB.CreatePointerCast(&F, IntptrTy), BasePlus2); const auto &ShadowAfterScope = GetShadowBytesAfterScope(SVD, L); // Poison the stack red zones at the entry. - Value *ShadowBase = ASan.memToShadow(LocalStackBase, IRB); + Value *ShadowBase = + ASan.memToShadow(IRB.CreatePtrToInt(LocalStackBase, IntptrTy), IRB); // As mask we must use most poisoned case: red zones and after scope. // As bytes we can use either the same or just red zones only. copyToShadow(ShadowAfterScope, ShadowAfterScope, IRB, ShadowBase); @@ -3723,7 +3712,7 @@ void FunctionStackPoisoner::processStaticAllocas() { IRBuilder<> IRBRet(Ret); // Mark the current frame as retired. IRBRet.CreateStore(ConstantInt::get(IntptrTy, kRetiredStackFrameMagic), - BasePlus0); + LocalStackBase); if (DoStackMalloc) { assert(StackMallocIdx >= 0); // if FakeStack != 0 // LocalStackBase == FakeStack @@ -3737,7 +3726,7 @@ void FunctionStackPoisoner::processStaticAllocas() { // else // <This is not a fake stack; unpoison the redzones> Value *Cmp = - IRBRet.CreateICmpNE(FakeStack, Constant::getNullValue(IntptrTy)); + IRBRet.CreateICmpNE(FakeStackInt, Constant::getNullValue(IntptrTy)); Instruction *ThenTerm, *ElseTerm; SplitBlockAndInsertIfThenElse(Cmp, Ret, &ThenTerm, &ElseTerm); @@ -3748,11 +3737,10 @@ void FunctionStackPoisoner::processStaticAllocas() { kAsanStackUseAfterReturnMagic); copyToShadow(ShadowAfterReturn, ShadowAfterReturn, IRBPoison, ShadowBase); - Value *SavedFlagPtrPtr = IRBPoison.CreateAdd( - FakeStack, + Value *SavedFlagPtrPtr = IRBPoison.CreatePtrAdd( + FakeStackPtr, ConstantInt::get(IntptrTy, ClassSize - ASan.LongSize / 8)); - Value *SavedFlagPtr = IRBPoison.CreateLoad( - IntptrTy, IRBPoison.CreateIntToPtr(SavedFlagPtrPtr, IntptrPtrTy)); + Value *SavedFlagPtr = IRBPoison.CreateLoad(IntptrTy, SavedFlagPtrPtr); IRBPoison.CreateStore( Constant::getNullValue(IRBPoison.getInt8Ty()), IRBPoison.CreateIntToPtr(SavedFlagPtr, IRBPoison.getPtrTy())); @@ -3760,7 +3748,7 @@ void FunctionStackPoisoner::processStaticAllocas() { // For larger frames call __asan_stack_free_*. RTCI.createRuntimeCall( IRBPoison, AsanStackFreeFunc[StackMallocIdx], - {FakeStack, ConstantInt::get(IntptrTy, LocalStackSize)}); + {FakeStackInt, ConstantInt::get(IntptrTy, LocalStackSize)}); } IRBuilder<> IRBElse(ElseTerm); diff --git a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp index c86092b..a6ec6c1 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/MemoryProfileInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/StaticDataProfileInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" @@ -194,6 +195,30 @@ static bool isAllocationWithHotColdVariant(const Function *Callee, } } +static void HandleUnsupportedAnnotationKinds(GlobalVariable &GVar, + AnnotationKind Kind) { + assert(Kind != llvm::memprof::AnnotationKind::AnnotationOK && + "Should not handle AnnotationOK here"); + SmallString<32> Reason; + switch (Kind) { + case llvm::memprof::AnnotationKind::ExplicitSection: + ++NumOfMemProfExplicitSectionGlobalVars; + Reason.append("explicit section name"); + break; + case llvm::memprof::AnnotationKind::DeclForLinker: + Reason.append("linker declaration"); + break; + case llvm::memprof::AnnotationKind::ReservedName: + Reason.append("name starts with `llvm.`"); + break; + default: + llvm_unreachable("Unexpected annotation kind"); + } + LLVM_DEBUG(dbgs() << "Skip annotation for " << GVar.getName() << " due to " + << Reason << ".\n"); + return; +} + struct AllocMatchInfo { uint64_t TotalSize = 0; AllocationType AllocType = AllocationType::None; @@ -775,29 +800,13 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) { return PreservedAnalyses::none(); } -// Returns true iff the global variable has custom section either by -// __attribute__((section("name"))) -// (https://clang.llvm.org/docs/AttributeReference.html#section-declspec-allocate) -// or #pragma clang section directives -// (https://clang.llvm.org/docs/LanguageExtensions.html#specifying-section-names-for-global-objects-pragma-clang-section). -static bool hasExplicitSectionName(const GlobalVariable &GVar) { - if (GVar.hasSection()) - return true; - - auto Attrs = GVar.getAttributes(); - if (Attrs.hasAttribute("bss-section") || Attrs.hasAttribute("data-section") || - Attrs.hasAttribute("relro-section") || - Attrs.hasAttribute("rodata-section")) - return true; - return false; -} - bool MemProfUsePass::annotateGlobalVariables( Module &M, const memprof::DataAccessProfData *DataAccessProf) { if (!AnnotateStaticDataSectionPrefix || M.globals().empty()) return false; if (!DataAccessProf) { + M.addModuleFlag(Module::Warning, "EnableDataAccessProf", 0U); M.getContext().diagnose(DiagnosticInfoPGOProfile( MemoryProfileFileName.data(), StringRef("Data access profiles not found in memprof. Ignore " @@ -805,6 +814,7 @@ bool MemProfUsePass::annotateGlobalVariables( DS_Warning)); return false; } + M.addModuleFlag(Module::Warning, "EnableDataAccessProf", 1U); bool Changed = false; // Iterate all global variables in the module and annotate them based on @@ -815,13 +825,9 @@ bool MemProfUsePass::annotateGlobalVariables( for (GlobalVariable &GVar : M.globals()) { assert(!GVar.getSectionPrefix().has_value() && "GVar shouldn't have section prefix yet"); - if (GVar.isDeclarationForLinker()) - continue; - - if (hasExplicitSectionName(GVar)) { - ++NumOfMemProfExplicitSectionGlobalVars; - LLVM_DEBUG(dbgs() << "Global variable " << GVar.getName() - << " has explicit section name. Skip annotating.\n"); + auto Kind = llvm::memprof::getAnnotationKind(GVar); + if (Kind != llvm::memprof::AnnotationKind::AnnotationOK) { + HandleUnsupportedAnnotationKinds(GVar, Kind); continue; } @@ -831,7 +837,6 @@ bool MemProfUsePass::annotateGlobalVariables( // TODO: Track string content hash in the profiles and compute it inside the // compiler to categeorize the hotness string literals. if (Name.starts_with(".str")) { - LLVM_DEBUG(dbgs() << "Skip annotating string literal " << Name << "\n"); continue; } diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp index ff5f390..66e45ec 100644 --- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp @@ -266,8 +266,7 @@ void DFAJumpThreading::unfold(DomTreeUpdater *DTU, LoopInfo *LI, if (!ProfcheckDisableMetadataFixes) BI->setMetadata(LLVMContext::MD_prof, SI->getMetadata(LLVMContext::MD_prof)); - DTU->applyUpdates({{DominatorTree::Insert, StartBlock, EndBlock}, - {DominatorTree::Insert, StartBlock, NewBlock}}); + DTU->applyUpdates({{DominatorTree::Insert, StartBlock, NewBlock}}); } else { BasicBlock *EndBlock = SIUse->getParent(); BasicBlock *NewBlockT = BasicBlock::Create( @@ -1479,10 +1478,13 @@ bool DFAJumpThreading::run(Function &F) { DTU->flush(); #ifdef EXPENSIVE_CHECKS - assert(DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full)); verifyFunction(F, &dbgs()); #endif + if (MadeChanges && VerifyDomInfo) + assert(DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full) && + "Failed to maintain validity of domtree!"); + return MadeChanges; } diff --git a/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp b/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp index 213d0f3..1335665 100644 --- a/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp +++ b/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp @@ -39,10 +39,11 @@ public: private: AliasAnalysis *AA; }; +} // namespace /// iterativelyFlattenCFG - Call FlattenCFG on all the blocks in the function, /// iterating until no more changes are made. -bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) { +static bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) { bool Changed = false; bool LocalChange = true; @@ -67,7 +68,6 @@ bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) { } return Changed; } -} // namespace char FlattenCFGLegacyPass::ID = 0; diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp index 56e0569..7cae94eb 100644 --- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp +++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp @@ -1295,6 +1295,24 @@ public: return commonAlignment(InitialAlign, ElementSizeInBits / 8); } + IntegerType *getIndexType(Value *Ptr) const { + return cast<IntegerType>(DL.getIndexType(Ptr->getType())); + } + + Value *getIndex(Value *Ptr, uint64_t V) const { + return ConstantInt::get(getIndexType(Ptr), V); + } + + Value *castToIndexType(Value *Ptr, Value *V, IRBuilder<> &Builder) const { + assert(isa<IntegerType>(V->getType()) && + "Attempted to cast non-integral type to integer index"); + // In case the data layout's index type differs in width from the type of + // the value we're given, truncate or zero extend to the appropriate width. + // We zero extend here as indices are unsigned. + return Builder.CreateZExtOrTrunc(V, getIndexType(Ptr), + V->getName() + ".cast"); + } + /// Load a matrix with \p Shape starting at \p Ptr and using \p Stride between /// vectors. MatrixTy loadMatrix(Type *Ty, Value *Ptr, MaybeAlign MAlign, Value *Stride, @@ -1304,6 +1322,7 @@ public: Type *VecTy = FixedVectorType::get(EltTy, Shape.getStride()); Value *EltPtr = Ptr; MatrixTy Result; + Stride = castToIndexType(Ptr, Stride, Builder); for (unsigned I = 0, E = Shape.getNumVectors(); I < E; ++I) { Value *GEP = computeVectorAddr( EltPtr, Builder.getIntN(Stride->getType()->getScalarSizeInBits(), I), @@ -1325,14 +1344,14 @@ public: ShapeInfo ResultShape, Type *EltTy, IRBuilder<> &Builder) { Value *Offset = Builder.CreateAdd( - Builder.CreateMul(J, Builder.getInt64(MatrixShape.getStride())), I); + Builder.CreateMul(J, getIndex(MatrixPtr, MatrixShape.getStride())), I); Value *TileStart = Builder.CreateGEP(EltTy, MatrixPtr, Offset); auto *TileTy = FixedVectorType::get(EltTy, ResultShape.NumRows * ResultShape.NumColumns); return loadMatrix(TileTy, TileStart, Align, - Builder.getInt64(MatrixShape.getStride()), IsVolatile, + getIndex(MatrixPtr, MatrixShape.getStride()), IsVolatile, ResultShape, Builder); } @@ -1363,14 +1382,15 @@ public: MaybeAlign MAlign, bool IsVolatile, ShapeInfo MatrixShape, Value *I, Value *J, Type *EltTy, IRBuilder<> &Builder) { Value *Offset = Builder.CreateAdd( - Builder.CreateMul(J, Builder.getInt64(MatrixShape.getStride())), I); + Builder.CreateMul(J, getIndex(MatrixPtr, MatrixShape.getStride())), I); Value *TileStart = Builder.CreateGEP(EltTy, MatrixPtr, Offset); auto *TileTy = FixedVectorType::get(EltTy, StoreVal.getNumRows() * StoreVal.getNumColumns()); storeMatrix(TileTy, StoreVal, TileStart, MAlign, - Builder.getInt64(MatrixShape.getStride()), IsVolatile, Builder); + getIndex(MatrixPtr, MatrixShape.getStride()), IsVolatile, + Builder); } /// Store matrix \p StoreVal starting at \p Ptr and using \p Stride between @@ -1380,6 +1400,7 @@ public: IRBuilder<> &Builder) { auto *VType = cast<FixedVectorType>(Ty); Value *EltPtr = Ptr; + Stride = castToIndexType(Ptr, Stride, Builder); for (auto Vec : enumerate(StoreVal.vectors())) { Value *GEP = computeVectorAddr( EltPtr, @@ -2011,18 +2032,17 @@ public: const unsigned TileM = std::min(M - K, unsigned(TileSize)); MatrixTy A = loadMatrix(APtr, LoadOp0->getAlign(), LoadOp0->isVolatile(), - LShape, Builder.getInt64(I), Builder.getInt64(K), + LShape, getIndex(APtr, I), getIndex(APtr, K), {TileR, TileM}, EltType, Builder); MatrixTy B = loadMatrix(BPtr, LoadOp1->getAlign(), LoadOp1->isVolatile(), - RShape, Builder.getInt64(K), Builder.getInt64(J), + RShape, getIndex(BPtr, K), getIndex(BPtr, J), {TileM, TileC}, EltType, Builder); emitMatrixMultiply(Res, A, B, Builder, true, false, getFastMathFlags(MatMul)); } storeMatrix(Res, CPtr, Store->getAlign(), Store->isVolatile(), {R, M}, - Builder.getInt64(I), Builder.getInt64(J), EltType, - Builder); + getIndex(CPtr, I), getIndex(CPtr, J), EltType, Builder); } } @@ -2254,15 +2274,14 @@ public: /// Lower load instructions. MatrixTy VisitLoad(LoadInst *Inst, const ShapeInfo &SI, Value *Ptr, IRBuilder<> &Builder) { - return LowerLoad(Inst, Ptr, Inst->getAlign(), - Builder.getInt64(SI.getStride()), Inst->isVolatile(), SI, - Builder); + return LowerLoad(Inst, Ptr, Inst->getAlign(), getIndex(Ptr, SI.getStride()), + Inst->isVolatile(), SI, Builder); } MatrixTy VisitStore(StoreInst *Inst, const ShapeInfo &SI, Value *StoredVal, Value *Ptr, IRBuilder<> &Builder) { return LowerStore(Inst, StoredVal, Ptr, Inst->getAlign(), - Builder.getInt64(SI.getStride()), Inst->isVolatile(), SI, + getIndex(Ptr, SI.getStride()), Inst->isVolatile(), SI, Builder); } diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index b9d332b..d2f09e9 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -118,9 +118,13 @@ STATISTIC( STATISTIC(NumDeleted, "Number of instructions deleted"); STATISTIC(NumVectorized, "Number of vectorized aggregates"); +namespace llvm { /// Disable running mem2reg during SROA in order to test or debug SROA. static cl::opt<bool> SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false), cl::Hidden); +extern cl::opt<bool> ProfcheckDisableMetadataFixes; +} // namespace llvm + namespace { class AllocaSliceRewriter; @@ -1777,7 +1781,8 @@ static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI, } Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL, - LI.getName() + ".sroa.speculated"); + LI.getName() + ".sroa.speculated", + ProfcheckDisableMetadataFixes ? nullptr : &SI); LLVM_DEBUG(dbgs() << " speculated to: " << *V << "\n"); LI.replaceAllUsesWith(V); @@ -4360,10 +4365,13 @@ private: }; Value *Cond, *True, *False; + Instruction *MDFrom = nullptr; if (auto *SI = dyn_cast<SelectInst>(Sel)) { Cond = SI->getCondition(); True = SI->getTrueValue(); False = SI->getFalseValue(); + if (!ProfcheckDisableMetadataFixes) + MDFrom = SI; } else { Cond = Sel->getOperand(0); True = ConstantInt::get(Sel->getType(), 1); @@ -4383,8 +4391,12 @@ private: IRB.CreateGEP(Ty, FalseOps[0], ArrayRef(FalseOps).drop_front(), False->getName() + ".sroa.gep", NW); - Value *NSel = - IRB.CreateSelect(Cond, NTrue, NFalse, Sel->getName() + ".sroa.sel"); + Value *NSel = MDFrom + ? IRB.CreateSelect(Cond, NTrue, NFalse, + Sel->getName() + ".sroa.sel", MDFrom) + : IRB.CreateSelectWithUnknownProfile( + Cond, NTrue, NFalse, DEBUG_TYPE, + Sel->getName() + ".sroa.sel"); Visited.erase(&GEPI); GEPI.replaceAllUsesWith(NSel); GEPI.eraseFromParent(); diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index 2ee91a9..0f3978f 100644 --- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -47,6 +47,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Transforms/Utils/SSAUpdaterBulk.h" #include <cassert> #include <utility> @@ -321,7 +322,7 @@ class StructurizeCFG { void collectInfos(); - void insertConditions(bool Loops); + void insertConditions(bool Loops, SSAUpdaterBulk &PhiInserter); void simplifyConditions(); @@ -671,10 +672,9 @@ void StructurizeCFG::collectInfos() { } /// Insert the missing branch conditions -void StructurizeCFG::insertConditions(bool Loops) { +void StructurizeCFG::insertConditions(bool Loops, SSAUpdaterBulk &PhiInserter) { BranchVector &Conds = Loops ? LoopConds : Conditions; Value *Default = Loops ? BoolTrue : BoolFalse; - SSAUpdater PhiInserter; for (BranchInst *Term : Conds) { assert(Term->isConditional()); @@ -683,8 +683,9 @@ void StructurizeCFG::insertConditions(bool Loops) { BasicBlock *SuccTrue = Term->getSuccessor(0); BasicBlock *SuccFalse = Term->getSuccessor(1); - PhiInserter.Initialize(Boolean, ""); - PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default); + unsigned Variable = PhiInserter.AddVariable("", Boolean); + PhiInserter.AddAvailableValue(Variable, Loops ? SuccFalse : Parent, + Default); BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue]; @@ -697,7 +698,7 @@ void StructurizeCFG::insertConditions(bool Loops) { ParentInfo = PI; break; } - PhiInserter.AddAvailableValue(BB, PI.Pred); + PhiInserter.AddAvailableValue(Variable, BB, PI.Pred); Dominator.addAndRememberBlock(BB); } @@ -706,9 +707,9 @@ void StructurizeCFG::insertConditions(bool Loops) { CondBranchWeights::setMetadata(*Term, ParentInfo.Weights); } else { if (!Dominator.resultIsRememberedBlock()) - PhiInserter.AddAvailableValue(Dominator.result(), Default); + PhiInserter.AddAvailableValue(Variable, Dominator.result(), Default); - Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent)); + PhiInserter.AddUse(Variable, &Term->getOperandUse(0)); } } } @@ -1414,8 +1415,12 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT, orderNodes(); collectInfos(); createFlow(); - insertConditions(false); - insertConditions(true); + + SSAUpdaterBulk PhiInserter; + insertConditions(false, PhiInserter); + insertConditions(true, PhiInserter); + PhiInserter.RewriteAndOptimizeAllUses(*DT); + setPhiValues(); simplifyHoistedPhis(); simplifyConditions(); diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp index b187208..32924e7 100644 --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -44,7 +44,7 @@ using namespace llvm; STATISTIC(RemappedAtomMax, "Highest global NextAtomGroup (after mapping)"); void llvm::mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap) { - auto CurGroup = DL->getAtomGroup(); + uint64_t CurGroup = DL->getAtomGroup(); if (!CurGroup) return; @@ -62,21 +62,20 @@ void llvm::mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap) { RemappedAtomMax = std::max<uint64_t>(NewGroup, RemappedAtomMax); } -namespace { -void collectDebugInfoFromInstructions(const Function &F, - DebugInfoFinder &DIFinder) { +static void collectDebugInfoFromInstructions(const Function &F, + DebugInfoFinder &DIFinder) { const Module *M = F.getParent(); - if (M) { - // Inspect instructions to process e.g. DILexicalBlocks of inlined functions - for (const auto &I : instructions(F)) - DIFinder.processInstruction(*M, I); - } + if (!M) + return; + // Inspect instructions to process e.g. DILexicalBlocks of inlined functions + for (const Instruction &I : instructions(F)) + DIFinder.processInstruction(*M, I); } // Create a predicate that matches the metadata that should be identity mapped // during function cloning. -MetadataPredicate createIdentityMDPredicate(const Function &F, - CloneFunctionChangeType Changes) { +static MetadataPredicate +createIdentityMDPredicate(const Function &F, CloneFunctionChangeType Changes) { if (Changes >= CloneFunctionChangeType::DifferentModule) return [](const Metadata *MD) { return false; }; @@ -107,7 +106,6 @@ MetadataPredicate createIdentityMDPredicate(const Function &F, return false; }; } -} // namespace /// See comments in Cloning.h. BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, @@ -213,10 +211,9 @@ void llvm::CloneFunctionMetadataInto(Function &NewFunc, const Function &OldFunc, const MetadataPredicate *IdentityMD) { SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; OldFunc.getAllMetadata(MDs); - for (auto MD : MDs) { - NewFunc.addMetadata(MD.first, - *MapMetadata(MD.second, VMap, RemapFlag, TypeMapper, - Materializer, IdentityMD)); + for (const auto &[Kind, MD] : MDs) { + NewFunc.addMetadata(Kind, *MapMetadata(MD, VMap, RemapFlag, TypeMapper, + Materializer, IdentityMD)); } } @@ -235,7 +232,6 @@ void llvm::CloneFunctionBodyInto(Function &NewFunc, const Function &OldFunc, // appropriate. Note that we save BE this way in order to handle cloning of // recursive functions into themselves. for (const BasicBlock &BB : OldFunc) { - // Create a new basic block and copy instructions into it! BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, &NewFunc, CodeInfo); @@ -321,7 +317,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, // Cloning is always a Module level operation, since Metadata needs to be // cloned. - const auto RemapFlag = RF_None; + const RemapFlags RemapFlag = RF_None; CloneFunctionMetadataInto(*NewFunc, *OldFunc, VMap, RemapFlag, TypeMapper, Materializer, &IdentityMD); @@ -346,8 +342,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, // visiting the metadata attached to global values, which would allow this // code to be deleted. Alternatively, perhaps give responsibility for this // update to CloneFunctionInto's callers. - auto *NewModule = NewFunc->getParent(); - auto *NMD = NewModule->getOrInsertNamedMetadata("llvm.dbg.cu"); + Module *NewModule = NewFunc->getParent(); + NamedMDNode *NMD = NewModule->getOrInsertNamedMetadata("llvm.dbg.cu"); // Avoid multiple insertions of the same DICompileUnit to NMD. SmallPtrSet<const void *, 8> Visited(llvm::from_range, NMD->operands()); @@ -355,7 +351,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, // the function (e.g. as instructions' scope). DebugInfoFinder DIFinder; collectDebugInfoFromInstructions(*OldFunc, DIFinder); - for (auto *Unit : DIFinder.compile_units()) { + for (DICompileUnit *Unit : DIFinder.compile_units()) { MDNode *MappedUnit = MapMetadata(Unit, VMap, RF_None, TypeMapper, Materializer); if (Visited.insert(MappedUnit).second) @@ -821,17 +817,16 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, --PredCount[Pred]; // Figure out how many entries to remove from each PHI. - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - ++PredCount[PN->getIncomingBlock(i)]; + for (BasicBlock *Pred : PN->blocks()) + ++PredCount[Pred]; // At this point, the excess predecessor entries are positive in the // map. Loop over all of the PHIs and remove excess predecessor // entries. BasicBlock::iterator I = NewBB->begin(); for (; (PN = dyn_cast<PHINode>(I)); ++I) { - for (const auto &PCI : PredCount) { - BasicBlock *Pred = PCI.first; - for (unsigned NumToRemove = PCI.second; NumToRemove; --NumToRemove) + for (const auto &[Pred, Count] : PredCount) { + for ([[maybe_unused]] unsigned _ : llvm::seq<unsigned>(Count)) PN->removeIncomingValue(Pred, false); } } @@ -866,8 +861,8 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // As phi-nodes have been now remapped, allow incremental simplification of // newly-cloned instructions. const DataLayout &DL = NewFunc->getDataLayout(); - for (const auto &BB : *OldFunc) { - for (const auto &I : BB) { + for (const BasicBlock &BB : *OldFunc) { + for (const Instruction &I : BB) { auto *NewI = dyn_cast_or_null<Instruction>(VMap.lookup(&I)); if (!NewI) continue; @@ -997,8 +992,8 @@ void llvm::CloneAndPruneFunctionInto( void llvm::remapInstructionsInBlocks(ArrayRef<BasicBlock *> Blocks, ValueToValueMapTy &VMap) { // Rewrite the code to refer to itself. - for (auto *BB : Blocks) { - for (auto &Inst : *BB) { + for (BasicBlock *BB : Blocks) { + for (Instruction &Inst : *BB) { RemapDbgRecordRange(Inst.getModule(), Inst.getDbgRecordRange(), VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); RemapInstruction(&Inst, VMap, @@ -1151,9 +1146,9 @@ void llvm::cloneNoAliasScopes(ArrayRef<MDNode *> NoAliasDeclScopes, StringRef Ext, LLVMContext &Context) { MDBuilder MDB(Context); - for (auto *ScopeList : NoAliasDeclScopes) { - for (const auto &MDOperand : ScopeList->operands()) { - if (MDNode *MD = dyn_cast<MDNode>(MDOperand)) { + for (MDNode *ScopeList : NoAliasDeclScopes) { + for (const MDOperand &MDOp : ScopeList->operands()) { + if (MDNode *MD = dyn_cast<MDNode>(MDOp)) { AliasScopeNode SNANode(MD); std::string Name; @@ -1177,7 +1172,7 @@ void llvm::adaptNoAliasScopes(Instruction *I, auto CloneScopeList = [&](const MDNode *ScopeList) -> MDNode * { bool NeedsReplacement = false; SmallVector<Metadata *, 8> NewScopeList; - for (const auto &MDOp : ScopeList->operands()) { + for (const MDOperand &MDOp : ScopeList->operands()) { if (MDNode *MD = dyn_cast<MDNode>(MDOp)) { if (auto *NewMD = ClonedScopes.lookup(MD)) { NewScopeList.push_back(NewMD); @@ -1193,12 +1188,12 @@ void llvm::adaptNoAliasScopes(Instruction *I, }; if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(I)) - if (auto *NewScopeList = CloneScopeList(Decl->getScopeList())) + if (MDNode *NewScopeList = CloneScopeList(Decl->getScopeList())) Decl->setScopeList(NewScopeList); auto replaceWhenNeeded = [&](unsigned MD_ID) { if (const MDNode *CSNoAlias = I->getMetadata(MD_ID)) - if (auto *NewScopeList = CloneScopeList(CSNoAlias)) + if (MDNode *NewScopeList = CloneScopeList(CSNoAlias)) I->setMetadata(MD_ID, NewScopeList); }; replaceWhenNeeded(LLVMContext::MD_noalias); diff --git a/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp b/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp index d7bf791..fb39fdd 100644 --- a/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp +++ b/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp @@ -11,11 +11,11 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/SSAUpdaterBulk.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/IteratedDominanceFrontier.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/Use.h" #include "llvm/IR/Value.h" @@ -112,7 +112,7 @@ struct BBValueInfo { void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT, SmallVectorImpl<PHINode *> *InsertedPHIs) { DenseMap<BasicBlock *, BBValueInfo> BBInfos; - for (auto &R : Rewrites) { + for (RewriteInfo &R : Rewrites) { BBInfos.clear(); // Compute locations for new phi-nodes. @@ -145,7 +145,7 @@ void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT, BBInfos[BB].LiveOutValue = V; // We've computed IDF, now insert new phi-nodes there. - for (auto *FrontierBB : IDFBlocks) { + for (BasicBlock *FrontierBB : IDFBlocks) { IRBuilder<> B(FrontierBB, FrontierBB->begin()); PHINode *PN = B.CreatePHI(R.Ty, 0, R.Name); BBInfos[FrontierBB].LiveInValue = PN; @@ -156,7 +156,7 @@ void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT, // IsLiveOut indicates whether we are computing live-out values (true) or // live-in values (false). auto ComputeValue = [&](BasicBlock *BB, bool IsLiveOut) -> Value * { - auto *BBInfo = &BBInfos[BB]; + BBValueInfo *BBInfo = &BBInfos[BB]; if (IsLiveOut && BBInfo->LiveOutValue) return BBInfo->LiveOutValue; @@ -187,7 +187,7 @@ void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT, if (!V) V = UndefValue::get(R.Ty); - for (auto *BBInfo : Stack) + for (BBValueInfo *BBInfo : Stack) // Loop above can insert new entries into the BBInfos map: assume the // map shouldn't grow due to [1] and BBInfo references are valid. BBInfo->LiveInValue = V; @@ -196,7 +196,7 @@ void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT, }; // Fill in arguments of the inserted PHIs. - for (auto *BB : IDFBlocks) { + for (BasicBlock *BB : IDFBlocks) { auto *PHI = cast<PHINode>(&BB->front()); for (BasicBlock *Pred : PredCache.get(BB)) PHI->addIncoming(ComputeValue(Pred, /*IsLiveOut=*/true), Pred); @@ -222,3 +222,96 @@ void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT, } } } + +// Perform a single pass of simplification over the worklist of PHIs. +// This should be called after RewriteAllUses() because simplifying PHIs +// immediately after creation would require updating all references to those +// PHIs in the BBValueInfo structures, which would necessitate additional +// reference tracking overhead. +static void simplifyPass(MutableArrayRef<PHINode *> Worklist, + const DataLayout &DL) { + for (PHINode *&PHI : Worklist) { + if (Value *Simplified = simplifyInstruction(PHI, DL)) { + PHI->replaceAllUsesWith(Simplified); + PHI->eraseFromParent(); + PHI = nullptr; // Mark as removed. + } + } +} + +#ifndef NDEBUG // Should this be under EXPENSIVE_CHECKS? +// New PHI nodes should not reference one another but they may reference +// themselves or existing PHI nodes, and existing PHI nodes may reference new +// PHI nodes. +static bool +PHIAreRefEachOther(const iterator_range<BasicBlock::phi_iterator> NewPHIs) { + SmallPtrSet<PHINode *, 8> NewPHISet; + for (PHINode &PN : NewPHIs) + NewPHISet.insert(&PN); + for (PHINode &PHI : NewPHIs) { + for (Value *V : PHI.incoming_values()) { + PHINode *IncPHI = dyn_cast<PHINode>(V); + if (IncPHI && IncPHI != &PHI && NewPHISet.contains(IncPHI)) + return true; + } + } + return false; +} +#endif + +static bool replaceIfIdentical(PHINode &PHI, PHINode &ReplPHI) { + if (!PHI.isIdenticalToWhenDefined(&ReplPHI)) + return false; + PHI.replaceAllUsesWith(&ReplPHI); + PHI.eraseFromParent(); + return true; +} + +bool EliminateNewDuplicatePHINodes(BasicBlock *BB, + BasicBlock::phi_iterator FirstExistingPN) { + assert(!PHIAreRefEachOther(make_range(BB->phis().begin(), FirstExistingPN))); + + // Deduplicate new PHIs first to reduce the number of comparisons on the + // following new -> existing pass. + bool Changed = false; + for (auto I = BB->phis().begin(); I != FirstExistingPN; ++I) { + for (auto J = std::next(I); J != FirstExistingPN;) { + Changed |= replaceIfIdentical(*J++, *I); + } + } + + // Iterate over existing PHIs and replace identical new PHIs. + for (PHINode &ExistingPHI : make_range(FirstExistingPN, BB->phis().end())) { + auto I = BB->phis().begin(); + assert(I != FirstExistingPN); // Should be at least one new PHI. + do { + Changed |= replaceIfIdentical(*I++, ExistingPHI); + } while (I != FirstExistingPN); + if (BB->phis().begin() == FirstExistingPN) + return Changed; + } + return Changed; +} + +static void deduplicatePass(ArrayRef<PHINode *> Worklist) { + SmallDenseMap<BasicBlock *, unsigned> BBs; + for (PHINode *PHI : Worklist) { + if (PHI) + ++BBs[PHI->getParent()]; + } + + for (auto [BB, NumNewPHIs] : BBs) { + auto FirstExistingPN = std::next(BB->phis().begin(), NumNewPHIs); + EliminateNewDuplicatePHINodes(BB, FirstExistingPN); + } +} + +void SSAUpdaterBulk::RewriteAndOptimizeAllUses(DominatorTree &DT) { + SmallVector<PHINode *, 4> PHIs; + RewriteAllUses(&DT, &PHIs); + if (PHIs.empty()) + return; + + simplifyPass(PHIs, PHIs.front()->getParent()->getDataLayout()); + deduplicatePass(PHIs); +} diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index 45cee1e..9035e58 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -526,7 +526,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { // Recognize the canonical representation of an unsimplifed urem. const SCEV *URemLHS = nullptr; const SCEV *URemRHS = nullptr; - if (SE.matchURem(S, URemLHS, URemRHS)) { + if (match(S, m_scev_URem(m_SCEV(URemLHS), m_SCEV(URemRHS), SE))) { Value *LHS = expand(URemLHS); Value *RHS = expand(URemRHS); return InsertBinop(Instruction::URem, LHS, RHS, SCEV::FlagAnyWrap, diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index a6f4bec..88af2cf 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -10659,7 +10659,8 @@ class InstructionsCompatibilityAnalysis { static bool isSupportedOpcode(const unsigned Opcode) { return Opcode == Instruction::Add || Opcode == Instruction::LShr || Opcode == Instruction::Shl || Opcode == Instruction::SDiv || - Opcode == Instruction::UDiv; + Opcode == Instruction::UDiv || Opcode == Instruction::And || + Opcode == Instruction::Or || Opcode == Instruction::Xor; } /// Identifies the best candidate value, which represents main opcode @@ -10984,6 +10985,9 @@ public: case Instruction::Shl: case Instruction::SDiv: case Instruction::UDiv: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: VectorCost = TTI.getArithmeticInstrCost(MainOpcode, VecTy, Kind); break; default: @@ -19456,7 +19460,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } assert(getNumElements(Cond->getType()) == TrueNumElements && "Cannot vectorize Instruction::Select"); - Value *V = Builder.CreateSelect(Cond, True, False); + Value *V = + Builder.CreateSelectWithUnknownProfile(Cond, True, False, DEBUG_TYPE); V = FinalShuffle(V, E); E->VectorizedValue = V; @@ -23576,18 +23581,19 @@ class HorizontalReduction { switch (Kind) { case RecurKind::Or: { if (UseSelect && OpTy == CmpInst::makeCmpResultType(OpTy)) - return Builder.CreateSelect( + return Builder.CreateSelectWithUnknownProfile( LHS, ConstantInt::getAllOnesValue(CmpInst::makeCmpResultType(OpTy)), - RHS, Name); + RHS, DEBUG_TYPE, Name); unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(Kind); return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS, Name); } case RecurKind::And: { if (UseSelect && OpTy == CmpInst::makeCmpResultType(OpTy)) - return Builder.CreateSelect( + return Builder.CreateSelectWithUnknownProfile( LHS, RHS, - ConstantInt::getNullValue(CmpInst::makeCmpResultType(OpTy)), Name); + ConstantInt::getNullValue(CmpInst::makeCmpResultType(OpTy)), + DEBUG_TYPE, Name); unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(Kind); return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS, Name); @@ -23608,7 +23614,8 @@ class HorizontalReduction { if (UseSelect) { CmpInst::Predicate Pred = llvm::getMinMaxReductionPredicate(Kind); Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS, Name); - return Builder.CreateSelect(Cmp, LHS, RHS, Name); + return Builder.CreateSelectWithUnknownProfile(Cmp, LHS, RHS, DEBUG_TYPE, + Name); } [[fallthrough]]; case RecurKind::FMax: diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 1fea068..0101942 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -635,9 +635,9 @@ static bool hasConditionalTerminator(const VPBasicBlock *VPBB) { const VPRecipeBase *R = &VPBB->back(); bool IsSwitch = isa<VPInstruction>(R) && cast<VPInstruction>(R)->getOpcode() == Instruction::Switch; - bool IsCondBranch = isa<VPBranchOnMaskRecipe>(R) || - match(R, m_BranchOnCond(m_VPValue())) || - match(R, m_BranchOnCount(m_VPValue(), m_VPValue())); + bool IsCondBranch = + isa<VPBranchOnMaskRecipe>(R) || + match(R, m_CombineOr(m_BranchOnCond(), m_BranchOnCount())); (void)IsCondBranch; (void)IsSwitch; if (VPBB->getNumSuccessors() == 2 || diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index fb696be..23f5623 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -24,12 +24,9 @@ #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H -#include "VPlanAnalysis.h" #include "VPlanValue.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Twine.h" #include "llvm/ADT/ilist.h" @@ -41,10 +38,11 @@ #include "llvm/IR/Operator.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/InstructionCost.h" -#include <algorithm> #include <cassert> #include <cstddef> +#include <functional> #include <string> +#include <utility> namespace llvm { @@ -346,13 +344,6 @@ public: /// Return the cost of the block. virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx) = 0; - /// Return true if it is legal to hoist instructions into this block. - bool isLegalToHoistInto() { - // There are currently no constraints that prevent an instruction to be - // hoisted into a VPBlockBase. - return true; - } - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void printAsOperand(raw_ostream &OS, bool PrintType = false) const { OS << getName(); @@ -1064,6 +1055,7 @@ public: ResumeForEpilogue, /// Returns the value for vscale. VScale, + OpsEnd = VScale, }; /// Returns true if this VPInstruction generates scalar values for all lanes. diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 81deba2..c0147ce 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -433,8 +433,7 @@ static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB, // We are about to replace the branch to exit the region. Remove the original // BranchOnCond, if there is any. DebugLoc LatchDL = DL; - if (!LatchVPBB->empty() && - match(&LatchVPBB->back(), m_BranchOnCond(m_VPValue()))) { + if (!LatchVPBB->empty() && match(&LatchVPBB->back(), m_BranchOnCond())) { LatchDL = LatchVPBB->getTerminator()->getDebugLoc(); LatchVPBB->getTerminator()->eraseFromParent(); } @@ -480,8 +479,7 @@ static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB) { static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL, PredicatedScalarEvolution &PSE, Loop *TheLoop) { - VPDominatorTree VPDT; - VPDT.recalculate(Plan); + VPDominatorTree VPDT(Plan); auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor()); canonicalHeaderAndLatch(HeaderVPBB, VPDT); @@ -623,8 +621,7 @@ void VPlanTransforms::addMiddleCheck(VPlan &Plan, } void VPlanTransforms::createLoopRegions(VPlan &Plan) { - VPDominatorTree VPDT; - VPDT.recalculate(Plan); + VPDominatorTree VPDT(Plan); for (VPBlockBase *HeaderVPB : vp_post_order_shallow(Plan.getEntry())) if (canonicalHeaderAndLatch(HeaderVPB, VPDT)) createLoopRegion(Plan, HeaderVPB); @@ -875,8 +872,7 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) { Plan.getVectorLoopRegion()->getEntryBasicBlock())) { auto *VPBB = cast<VPBasicBlock>(VPB); for (auto &R : *VPBB) { - if (R.mayWriteToMemory() && - !match(&R, m_BranchOnCount(m_VPValue(), m_VPValue()))) + if (R.mayWriteToMemory() && !match(&R, m_BranchOnCount())) return false; } } diff --git a/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h b/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h index 577432f..44506f5a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h +++ b/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h @@ -39,7 +39,6 @@ class VPDominatorTree : public DominatorTreeBase<VPBlockBase, false> { using Base = DominatorTreeBase<VPBlockBase, false>; public: - VPDominatorTree() = default; explicit VPDominatorTree(VPlan &Plan) { recalculate(Plan); } /// Returns true if \p A properly dominates \p B. diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index 555efea..b42b049 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -344,6 +344,10 @@ m_Freeze(const Op0_t &Op0) { return m_VPInstruction<Instruction::Freeze>(Op0); } +inline VPInstruction_match<VPInstruction::BranchOnCond> m_BranchOnCond() { + return m_VPInstruction<VPInstruction::BranchOnCond>(); +} + template <typename Op0_t> inline VPInstruction_match<VPInstruction::BranchOnCond, Op0_t> m_BranchOnCond(const Op0_t &Op0) { @@ -374,6 +378,10 @@ m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) { return m_VPInstruction<VPInstruction::ActiveLaneMask>(Op0, Op1, Op2); } +inline VPInstruction_match<VPInstruction::BranchOnCount> m_BranchOnCount() { + return m_VPInstruction<VPInstruction::BranchOnCount>(); +} + template <typename Op0_t, typename Op1_t> inline VPInstruction_match<VPInstruction::BranchOnCount, Op0_t, Op1_t> m_BranchOnCount(const Op0_t &Op0, const Op1_t &Op1) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 9bb8820..40b7e8d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1658,7 +1658,7 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF, auto *Term = &ExitingVPBB->back(); VPValue *Cond; ScalarEvolution &SE = *PSE.getSE(); - if (match(Term, m_BranchOnCount(m_VPValue(), m_VPValue())) || + if (match(Term, m_BranchOnCount()) || match(Term, m_BranchOnCond(m_Not(m_ActiveLaneMask( m_VPValue(), m_VPValue(), m_VPValue()))))) { // Try to simplify the branch condition if TC <= VF * UF when the latch @@ -1909,8 +1909,7 @@ static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR, bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &LoopBuilder) { - VPDominatorTree VPDT; - VPDT.recalculate(Plan); + VPDominatorTree VPDT(Plan); SmallVector<VPFirstOrderRecurrencePHIRecipe *> RecurrencePhis; for (VPRecipeBase &R : @@ -1992,6 +1991,13 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> { .Case<VPWidenIntrinsicRecipe>([](auto *I) { return std::make_pair(true, I->getVectorIntrinsicID()); }) + .Case<VPVectorPointerRecipe>([](auto *I) { + // For recipes that do not directly map to LLVM IR instructions, + // assign opcodes after the last VPInstruction opcode (which is also + // after the last IR Instruction opcode), based on the VPDefID. + return std::make_pair(false, + VPInstruction::OpsEnd + 1 + I->getVPDefID()); + }) .Default([](auto *) { return std::nullopt; }); } @@ -2015,11 +2021,8 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> { static bool canHandle(const VPSingleDefRecipe *Def) { // We can extend the list of handled recipes in the future, // provided we account for the data embedded in them while checking for - // equality or hashing. We assign VPVectorEndPointerRecipe the GEP opcode, - // as it is essentially a GEP with different semantics. - auto C = isa<VPVectorPointerRecipe>(Def) - ? std::make_pair(false, Instruction::GetElementPtr) - : getOpcodeOrIntrinsicID(Def); + // equality or hashing. + auto C = getOpcodeOrIntrinsicID(Def); // The issue with (Insert|Extract)Value is that the index of the // insert/extract is not a proper operand in LLVM IR, and hence also not in @@ -2058,6 +2061,8 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> { vputils::isSingleScalar(L) != vputils::isSingleScalar(R) || !equal(L->operands(), R->operands())) return false; + assert(getOpcodeOrIntrinsicID(L) && getOpcodeOrIntrinsicID(R) && + "must have valid opcode info for both recipes"); if (auto *LFlags = dyn_cast<VPRecipeWithIRFlags>(L)) if (LFlags->hasPredicate() && LFlags->getPredicate() != @@ -3021,8 +3026,7 @@ void VPlanTransforms::createInterleaveGroups( // Interleave memory: for each Interleave Group we marked earlier as relevant // for this VPlan, replace the Recipes widening its memory instructions with a // single VPInterleaveRecipe at its insertion point. - VPDominatorTree VPDT; - VPDT.recalculate(Plan); + VPDominatorTree VPDT(Plan); for (const auto *IG : InterleaveGroups) { auto *Start = cast<VPWidenMemoryRecipe>(RecipeBuilder.getRecipe(IG->getMember(0))); @@ -3398,9 +3402,8 @@ void VPlanTransforms::handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB, VPBuilder Builder(LatchVPBB->getTerminator()); VPBlockBase *TrueSucc = EarlyExitingVPBB->getSuccessors()[0]; - assert( - match(EarlyExitingVPBB->getTerminator(), m_BranchOnCond(m_VPValue())) && - "Terminator must be be BranchOnCond"); + assert(match(EarlyExitingVPBB->getTerminator(), m_BranchOnCond()) && + "Terminator must be be BranchOnCond"); VPValue *CondOfEarlyExitingVPBB = EarlyExitingVPBB->getTerminator()->getOperand(0); auto *CondToEarlyExit = TrueSucc == EarlyExitVPBB @@ -3662,8 +3665,7 @@ void VPlanTransforms::materializeBroadcasts(VPlan &Plan) { return; #ifndef NDEBUG - VPDominatorTree VPDT; - VPDT.recalculate(Plan); + VPDominatorTree VPDT(Plan); #endif SmallVector<VPValue *> VPValues; @@ -4009,8 +4011,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF, unsigned VFMinVal = VF.getKnownMinValue(); SmallVector<VPInterleaveRecipe *> StoreGroups; for (auto &R : *VectorLoop->getEntryBasicBlock()) { - if (isa<VPCanonicalIVPHIRecipe>(&R) || - match(&R, m_BranchOnCount(m_VPValue(), m_VPValue()))) + if (isa<VPCanonicalIVPHIRecipe>(&R) || match(&R, m_BranchOnCount())) continue; if (isa<VPDerivedIVRecipe, VPScalarIVStepsRecipe>(&R) && diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index 5e7f19f..1c4adfc 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -259,8 +259,7 @@ void UnrollState::unrollHeaderPHIByUF(VPHeaderPHIRecipe *R, /// Handle non-header-phi recipes. void UnrollState::unrollRecipeByUF(VPRecipeBase &R) { - if (match(&R, m_BranchOnCond(m_VPValue())) || - match(&R, m_BranchOnCount(m_VPValue(), m_VPValue()))) + if (match(&R, m_CombineOr(m_BranchOnCond(), m_BranchOnCount()))) return; if (auto *VPI = dyn_cast<VPInstruction>(&R)) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 013ea2e..5262af6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -24,6 +24,7 @@ #define DEBUG_TYPE "loop-vectorize" using namespace llvm; +using namespace VPlanPatternMatch; namespace { class VPlanVerifier { @@ -198,7 +199,6 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const { } // EVLIVIncrement is only used by EVLIV & BranchOnCount. // Having more than two users is unexpected. - using namespace llvm::VPlanPatternMatch; if (I->getOpcode() != VPInstruction::Broadcast && I->getNumUsers() != 1 && (I->getNumUsers() != 2 || @@ -479,8 +479,7 @@ bool VPlanVerifier::verify(const VPlan &Plan) { } auto *LastInst = dyn_cast<VPInstruction>(std::prev(Exiting->end())); - if (!LastInst || (LastInst->getOpcode() != VPInstruction::BranchOnCount && - LastInst->getOpcode() != VPInstruction::BranchOnCond)) { + if (!match(LastInst, m_CombineOr(m_BranchOnCond(), m_BranchOnCount()))) { errs() << "VPlan vector loop exit must end with BranchOnCount or " "BranchOnCond VPInstruction\n"; return false; @@ -490,8 +489,7 @@ bool VPlanVerifier::verify(const VPlan &Plan) { } bool llvm::verifyVPlanIsValid(const VPlan &Plan, bool VerifyLate) { - VPDominatorTree VPDT; - VPDT.recalculate(const_cast<VPlan &>(Plan)); + VPDominatorTree VPDT(const_cast<VPlan &>(Plan)); VPTypeAnalysis TypeInfo(Plan); VPlanVerifier Verifier(VPDT, TypeInfo, VerifyLate); return Verifier.verify(Plan); diff --git a/llvm/lib/XRay/BlockIndexer.cpp b/llvm/lib/XRay/BlockIndexer.cpp index f4ba0eb..d0c6853 100644 --- a/llvm/lib/XRay/BlockIndexer.cpp +++ b/llvm/lib/XRay/BlockIndexer.cpp @@ -12,8 +12,8 @@ //===----------------------------------------------------------------------===// #include "llvm/XRay/BlockIndexer.h" -namespace llvm { -namespace xray { +using namespace llvm; +using namespace llvm::xray; Error BlockIndexer::visit(BufferExtents &) { return Error::success(); } @@ -89,6 +89,3 @@ Error BlockIndexer::flush() { CurrentBlock.WallclockTime = nullptr; return Error::success(); } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/BlockPrinter.cpp b/llvm/lib/XRay/BlockPrinter.cpp index 63a60c3..d85be5b 100644 --- a/llvm/lib/XRay/BlockPrinter.cpp +++ b/llvm/lib/XRay/BlockPrinter.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/XRay/BlockPrinter.h" -namespace llvm { -namespace xray { +using namespace llvm; +using namespace llvm::xray; Error BlockPrinter::visit(BufferExtents &R) { OS << "\n[New Block]\n"; @@ -108,6 +108,3 @@ Error BlockPrinter::visit(EndBufferRecord &R) { auto E = RP.visit(R); return E; } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/BlockVerifier.cpp b/llvm/lib/XRay/BlockVerifier.cpp index 99f255e..e39f6b6 100644 --- a/llvm/lib/XRay/BlockVerifier.cpp +++ b/llvm/lib/XRay/BlockVerifier.cpp @@ -10,19 +10,18 @@ #include <bitset> -namespace llvm { -namespace xray { -namespace { +using namespace llvm; +using namespace llvm::xray; -constexpr unsigned long long mask(BlockVerifier::State S) { +static constexpr unsigned long long mask(BlockVerifier::State S) { return 1uLL << static_cast<std::size_t>(S); } -constexpr std::size_t number(BlockVerifier::State S) { +static constexpr std::size_t number(BlockVerifier::State S) { return static_cast<std::size_t>(S); } -StringRef recordToString(BlockVerifier::State R) { +static StringRef recordToString(BlockVerifier::State R) { switch (R) { case BlockVerifier::State::BufferExtents: return "BufferExtents"; @@ -53,6 +52,8 @@ StringRef recordToString(BlockVerifier::State R) { llvm_unreachable("Unkown state!"); } +namespace { + struct Transition { BlockVerifier::State From; std::bitset<number(BlockVerifier::State::StateMax)> ToStates; @@ -133,7 +134,7 @@ Error BlockVerifier::transition(State To) { CurrentRecord = To; return Error::success(); -} // namespace xray +} Error BlockVerifier::visit(BufferExtents &) { return transition(State::BufferExtents); @@ -201,6 +202,3 @@ Error BlockVerifier::verify() { } void BlockVerifier::reset() { CurrentRecord = State::Unknown; } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/FDRRecordProducer.cpp b/llvm/lib/XRay/FDRRecordProducer.cpp index 479b710..0f4eed1 100644 --- a/llvm/lib/XRay/FDRRecordProducer.cpp +++ b/llvm/lib/XRay/FDRRecordProducer.cpp @@ -10,8 +10,8 @@ #include <cstdint> -namespace llvm { -namespace xray { +using namespace llvm; +using namespace llvm::xray; namespace { @@ -31,8 +31,9 @@ enum MetadataRecordKinds : uint8_t { // This is an end marker, used to identify the upper bound for this enum. EnumEndMarker, }; +} // namespace -Expected<std::unique_ptr<Record>> +static Expected<std::unique_ptr<Record>> metadataRecordType(const XRayFileHeader &Header, uint8_t T) { if (T >= static_cast<uint8_t>(MetadataRecordKinds::EnumEndMarker)) @@ -72,12 +73,10 @@ metadataRecordType(const XRayFileHeader &Header, uint8_t T) { llvm_unreachable("Unhandled MetadataRecordKinds enum value"); } -constexpr bool isMetadataIntroducer(uint8_t FirstByte) { +static constexpr bool isMetadataIntroducer(uint8_t FirstByte) { return FirstByte & 0x01u; } -} // namespace - Expected<std::unique_ptr<Record>> FileBasedRecordProducer::findNextBufferExtent() { // We seek one byte at a time until we find a suitable buffer extents metadata @@ -193,6 +192,3 @@ Expected<std::unique_ptr<Record>> FileBasedRecordProducer::produce() { assert(R != nullptr); return std::move(R); } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/FDRRecords.cpp b/llvm/lib/XRay/FDRRecords.cpp index ff315d3..a18f733 100644 --- a/llvm/lib/XRay/FDRRecords.cpp +++ b/llvm/lib/XRay/FDRRecords.cpp @@ -12,8 +12,8 @@ //===----------------------------------------------------------------------===// #include "llvm/XRay/FDRRecords.h" -namespace llvm { -namespace xray { +using namespace llvm; +using namespace llvm::xray; Error BufferExtents::apply(RecordVisitor &V) { return V.visit(*this); } Error WallclockRecord::apply(RecordVisitor &V) { return V.visit(*this); } @@ -61,6 +61,3 @@ StringRef Record::kindToString(RecordKind K) { } return "Unknown"; } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/FDRTraceExpander.cpp b/llvm/lib/XRay/FDRTraceExpander.cpp index b68e997..991e6e5 100644 --- a/llvm/lib/XRay/FDRTraceExpander.cpp +++ b/llvm/lib/XRay/FDRTraceExpander.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/XRay/FDRTraceExpander.h" -namespace llvm { -namespace xray { +using namespace llvm; +using namespace llvm::xray; void TraceExpander::resetCurrentRecord() { if (BuildingRecord) @@ -126,6 +126,3 @@ Error TraceExpander::flush() { resetCurrentRecord(); return Error::success(); } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/FDRTraceWriter.cpp b/llvm/lib/XRay/FDRTraceWriter.cpp index fb59125..3e320a6 100644 --- a/llvm/lib/XRay/FDRTraceWriter.cpp +++ b/llvm/lib/XRay/FDRTraceWriter.cpp @@ -12,8 +12,8 @@ #include "llvm/XRay/FDRTraceWriter.h" #include <tuple> -namespace llvm { -namespace xray { +using namespace llvm; +using namespace llvm::xray; namespace { @@ -37,9 +37,10 @@ template <size_t Index> struct IndexedWriter { return 0; } }; +} // namespace template <uint8_t Kind, class... Values> -Error writeMetadata(support::endian::Writer &OS, Values &&... Ds) { +static Error writeMetadata(support::endian::Writer &OS, Values &&...Ds) { // The first bit in the first byte of metadata records is always set to 1, so // we ensure this is the case when we write out the first byte of the record. uint8_t FirstByte = (static_cast<uint8_t>(Kind) << 1) | uint8_t{0x01u}; @@ -54,8 +55,6 @@ Error writeMetadata(support::endian::Writer &OS, Values &&... Ds) { return Error::success(); } -} // namespace - FDRTraceWriter::FDRTraceWriter(raw_ostream &O, const XRayFileHeader &H) : OS(O, llvm::endianness::native) { // We need to re-construct a header, by writing the fields we care about for @@ -146,6 +145,3 @@ Error FDRTraceWriter::visit(FunctionRecord &R) { OS.write(R.delta()); return Error::success(); } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/FileHeaderReader.cpp b/llvm/lib/XRay/FileHeaderReader.cpp index 6b6daf9..681cef7 100644 --- a/llvm/lib/XRay/FileHeaderReader.cpp +++ b/llvm/lib/XRay/FileHeaderReader.cpp @@ -7,12 +7,13 @@ //===----------------------------------------------------------------------===// #include "llvm/XRay/FileHeaderReader.h" -namespace llvm { -namespace xray { +using namespace llvm; +using namespace llvm::xray; // Populates the FileHeader reference by reading the first 32 bytes of the file. -Expected<XRayFileHeader> readBinaryFormatHeader(DataExtractor &HeaderExtractor, - uint64_t &OffsetPtr) { +Expected<XRayFileHeader> +xray::readBinaryFormatHeader(DataExtractor &HeaderExtractor, + uint64_t &OffsetPtr) { // FIXME: Maybe deduce whether the data is little or big-endian using some // magic bytes in the beginning of the file? @@ -68,6 +69,3 @@ Expected<XRayFileHeader> readBinaryFormatHeader(DataExtractor &HeaderExtractor, OffsetPtr += 16; return std::move(FileHeader); } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/LogBuilderConsumer.cpp b/llvm/lib/XRay/LogBuilderConsumer.cpp index ffb49f9..f0fc336 100644 --- a/llvm/lib/XRay/LogBuilderConsumer.cpp +++ b/llvm/lib/XRay/LogBuilderConsumer.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/XRay/FDRRecordConsumer.h" -namespace llvm { -namespace xray { +using namespace llvm; +using namespace llvm::xray; Error LogBuilderConsumer::consume(std::unique_ptr<Record> R) { if (!R) @@ -32,6 +32,3 @@ Error PipelineConsumer::consume(std::unique_ptr<Record> R) { Result = joinErrors(std::move(Result), R->apply(*V)); return Result; } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/Profile.cpp b/llvm/lib/XRay/Profile.cpp index 1b340e5..ecb767b 100644 --- a/llvm/lib/XRay/Profile.cpp +++ b/llvm/lib/XRay/Profile.cpp @@ -18,8 +18,8 @@ #include "llvm/XRay/Trace.h" #include <memory> -namespace llvm { -namespace xray { +using namespace llvm; +using namespace llvm::xray; Profile::Profile(const Profile &O) { // We need to re-create all the tries from the original (O), into the current @@ -46,6 +46,7 @@ struct BlockHeader { uint32_t Number; uint64_t Thread; }; +} // namespace static Expected<BlockHeader> readBlockHeader(DataExtractor &Extractor, uint64_t &Offset) { @@ -115,8 +116,6 @@ static Expected<Profile::Data> readData(DataExtractor &Extractor, return D; } -} // namespace - Error Profile::addBlock(Block &&B) { if (B.PathData.empty()) return make_error<StringError>( @@ -189,7 +188,7 @@ Profile::PathID Profile::internPath(ArrayRef<FuncID> P) { return Node->ID; } -Profile mergeProfilesByThread(const Profile &L, const Profile &R) { +Profile xray::mergeProfilesByThread(const Profile &L, const Profile &R) { Profile Merged; using PathDataMap = DenseMap<Profile::PathID, Profile::Data>; using PathDataMapPtr = std::unique_ptr<PathDataMap>; @@ -228,7 +227,7 @@ Profile mergeProfilesByThread(const Profile &L, const Profile &R) { return Merged; } -Profile mergeProfilesByStack(const Profile &L, const Profile &R) { +Profile xray::mergeProfilesByStack(const Profile &L, const Profile &R) { Profile Merged; using PathDataMap = DenseMap<Profile::PathID, Profile::Data>; PathDataMap PathData; @@ -258,7 +257,7 @@ Profile mergeProfilesByStack(const Profile &L, const Profile &R) { return Merged; } -Expected<Profile> loadProfile(StringRef Filename) { +Expected<Profile> xray::loadProfile(StringRef Filename) { Expected<sys::fs::file_t> FdOrErr = sys::fs::openNativeFileForRead(Filename); if (!FdOrErr) return FdOrErr.takeError(); @@ -322,7 +321,7 @@ struct StackEntry { } // namespace -Expected<Profile> profileFromTrace(const Trace &T) { +Expected<Profile> xray::profileFromTrace(const Trace &T) { Profile P; // The implementation of the algorithm re-creates the execution of @@ -397,6 +396,3 @@ Expected<Profile> profileFromTrace(const Trace &T) { return P; } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/RecordInitializer.cpp b/llvm/lib/XRay/RecordInitializer.cpp index 68ab3db..83d5f14 100644 --- a/llvm/lib/XRay/RecordInitializer.cpp +++ b/llvm/lib/XRay/RecordInitializer.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/XRay/FDRRecords.h" -namespace llvm { -namespace xray { +using namespace llvm; +using namespace llvm::xray; Error RecordInitializer::visit(BufferExtents &R) { if (!E.isValidOffsetForDataOfSize(OffsetPtr, sizeof(uint64_t))) @@ -426,6 +426,3 @@ Error RecordInitializer::visit(FunctionRecord &R) { assert(FunctionRecord::kFunctionRecordSize == (OffsetPtr - BeginOffset)); return Error::success(); } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/RecordPrinter.cpp b/llvm/lib/XRay/RecordPrinter.cpp index 32d4210..b9b7a16 100644 --- a/llvm/lib/XRay/RecordPrinter.cpp +++ b/llvm/lib/XRay/RecordPrinter.cpp @@ -9,8 +9,8 @@ #include "llvm/Support/FormatVariadic.h" -namespace llvm { -namespace xray { +using namespace llvm; +using namespace llvm::xray; Error RecordPrinter::visit(BufferExtents &R) { OS << formatv("<Buffer: size = {0} bytes>", R.size()) << Delim; @@ -103,6 +103,3 @@ Error RecordPrinter::visit(FunctionRecord &R) { OS << Delim; return Error::success(); } - -} // namespace xray -} // namespace llvm diff --git a/llvm/lib/XRay/Trace.cpp b/llvm/lib/XRay/Trace.cpp index 74515b1..14a3f01 100644 --- a/llvm/lib/XRay/Trace.cpp +++ b/llvm/lib/XRay/Trace.cpp @@ -29,11 +29,9 @@ using namespace llvm; using namespace llvm::xray; using llvm::yaml::Input; -namespace { - -Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian, - XRayFileHeader &FileHeader, - std::vector<XRayRecord> &Records) { +static Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian, + XRayFileHeader &FileHeader, + std::vector<XRayRecord> &Records) { if (Data.size() < 32) return make_error<StringError>( "Not enough bytes for an XRay log.", @@ -265,8 +263,9 @@ Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian, /// what FunctionRecord instances use, and we no longer need to include the CPU /// id in the CustomEventRecord. /// -Error loadFDRLog(StringRef Data, bool IsLittleEndian, - XRayFileHeader &FileHeader, std::vector<XRayRecord> &Records) { +static Error loadFDRLog(StringRef Data, bool IsLittleEndian, + XRayFileHeader &FileHeader, + std::vector<XRayRecord> &Records) { if (Data.size() < 32) return createStringError(std::make_error_code(std::errc::invalid_argument), @@ -348,8 +347,8 @@ Error loadFDRLog(StringRef Data, bool IsLittleEndian, return Error::success(); } -Error loadYAMLLog(StringRef Data, XRayFileHeader &FileHeader, - std::vector<XRayRecord> &Records) { +static Error loadYAMLLog(StringRef Data, XRayFileHeader &FileHeader, + std::vector<XRayRecord> &Records) { YAMLXRayTrace Trace; Input In(Data); In >> Trace; @@ -376,7 +375,6 @@ Error loadYAMLLog(StringRef Data, XRayFileHeader &FileHeader, }); return Error::success(); } -} // namespace Expected<Trace> llvm::xray::loadTraceFile(StringRef Filename, bool Sort) { Expected<sys::fs::file_t> FdOrErr = sys::fs::openNativeFileForRead(Filename); |