diff options
Diffstat (limited to 'llvm/lib')
68 files changed, 1374 insertions, 487 deletions
diff --git a/llvm/lib/Analysis/HashRecognize.cpp b/llvm/lib/Analysis/HashRecognize.cpp index 5d7ee1f..4529123 100644 --- a/llvm/lib/Analysis/HashRecognize.cpp +++ b/llvm/lib/Analysis/HashRecognize.cpp @@ -97,7 +97,7 @@ static bool containsUnreachable(const Loop &L, } } } - return std::distance(Latch->begin(), Latch->end()) != Visited.size(); + return Latch->size() != Visited.size(); } /// A structure that can hold either a Simple Recurrence or a Conditional diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 47dccde..7adb25d 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -233,19 +233,25 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap( const SCEV *DerefBytesSCEV = SE.getConstant(WiderTy, DerefBytes); // Check if we have a suitable dereferencable assumption we can use. - if (!StartPtrV->canBeFreed()) { - Instruction *CtxI = &*L->getHeader()->getFirstNonPHIIt(); - if (BasicBlock *LoopPred = L->getLoopPredecessor()) { - if (isa<BranchInst>(LoopPred->getTerminator())) - CtxI = LoopPred->getTerminator(); - } - - RetainedKnowledge DerefRK = getKnowledgeValidInContext( - StartPtrV, {Attribute::Dereferenceable}, *AC, CtxI, DT); - if (DerefRK) { - DerefBytesSCEV = - SE.getUMaxExpr(DerefBytesSCEV, SE.getSCEV(DerefRK.IRArgValue)); - } + Instruction *CtxI = &*L->getHeader()->getFirstNonPHIIt(); + if (BasicBlock *LoopPred = L->getLoopPredecessor()) { + if (isa<BranchInst>(LoopPred->getTerminator())) + CtxI = LoopPred->getTerminator(); + } + RetainedKnowledge DerefRK; + getKnowledgeForValue(StartPtrV, {Attribute::Dereferenceable}, *AC, + [&](RetainedKnowledge RK, Instruction *Assume, auto) { + if (!isValidAssumeForContext(Assume, CtxI, DT)) + return false; + if (StartPtrV->canBeFreed() && + !willNotFreeBetween(Assume, CtxI)) + return false; + DerefRK = std::max(DerefRK, RK); + return true; + }); + if (DerefRK) { + DerefBytesSCEV = + SE.getUMaxExpr(DerefBytesSCEV, SE.getSCEV(DerefRK.IRArgValue)); } if (DerefBytesSCEV->isZero()) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 09a8fbe..1eda7a7 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -89,6 +89,9 @@ using namespace llvm::PatternMatch; static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses", cl::Hidden, cl::init(20)); +/// Maximum number of instructions to check between assume and context +/// instruction. +static constexpr unsigned MaxInstrsToCheckForFree = 16; /// Returns the bitwidth of the given scalar or pointer type. For vector types, /// returns the element type's bitwidth. @@ -561,6 +564,29 @@ bool llvm::isValidAssumeForContext(const Instruction *Inv, return false; } +bool llvm::willNotFreeBetween(const Instruction *Assume, + const Instruction *CtxI) { + if (CtxI->getParent() != Assume->getParent() || !Assume->comesBefore(CtxI)) + return false; + // Make sure the current function cannot arrange for another thread to free on + // its behalf. + if (!CtxI->getFunction()->hasNoSync()) + return false; + + // Check if there are any calls between the assume and CtxI that may + // free memory. + for (const auto &[Idx, I] : + enumerate(make_range(Assume->getIterator(), CtxI->getIterator()))) { + // Limit number of instructions to walk. + if (Idx > MaxInstrsToCheckForFree) + return false; + if (const auto *CB = dyn_cast<CallBase>(&I)) + if (!CB->hasFnAttr(Attribute::NoFree)) + return false; + } + return true; +} + // TODO: cmpExcludesZero misses many cases where `RHS` is non-constant but // we still have enough information about `RHS` to conclude non-zero. For // example Pred=EQ, RHS=isKnownNonZero. cmpExcludesZero is called in loops diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index 1703b27..bc0bb34 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -618,12 +618,15 @@ bool DwarfExpression::addExpression( case dwarf::DW_OP_dup: case dwarf::DW_OP_push_object_address: case dwarf::DW_OP_over: + case dwarf::DW_OP_rot: case dwarf::DW_OP_eq: case dwarf::DW_OP_ne: case dwarf::DW_OP_gt: case dwarf::DW_OP_ge: case dwarf::DW_OP_lt: case dwarf::DW_OP_le: + case dwarf::DW_OP_neg: + case dwarf::DW_OP_abs: emitOp(OpNum); break; case dwarf::DW_OP_deref: diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp index abb3f3e..ae284f3 100644 --- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -83,8 +83,6 @@ constrainRegClass(MachineRegisterInfo &MRI, Register Reg, const TargetRegisterClass *MachineRegisterInfo::constrainRegClass( Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs) { - if (Reg.isPhysical()) - return nullptr; return ::constrainRegClass(*this, Reg, getRegClass(Reg), RC, MinNumRegs); } diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 558c5a0..309f1be 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6046,7 +6046,7 @@ static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, return N02; } - if (MaxC == 0 && MinCPlus1.isPowerOf2()) { + if (MaxC == 0 && MinC != 0 && MinCPlus1.isPowerOf2()) { BW = MinCPlus1.exactLogBase2(); Unsigned = true; return N02; diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 11bc64c..bb10cf6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -160,7 +160,7 @@ void InstrEmitter::EmitCopyFromReg(SDValue Op, bool IsClone, Register SrcReg, // If all uses are reading from the src physical register and copying the // register is either impossible or very expensive, then don't create a copy. - if (MatchReg && SrcRC->getCopyCost() < 0) { + if (MatchReg && SrcRC->expensiveOrImpossibleToCopy()) { VRBase = SrcReg; } else { // Create the reg, emit the copy. diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 31e7855..4f4fb9c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -111,15 +111,11 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, const TargetRegisterInfo *TRI, const TargetInstrInfo *TII, - const TargetLowering &TLI, MCRegister &PhysReg, int &Cost) { if (Op != 2 || User->getOpcode() != ISD::CopyToReg) return; Register Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); - if (TLI.checkForPhysRegDependency(Def, User, Op, TRI, TII, PhysReg, Cost)) - return; - if (Reg.isVirtual()) return; @@ -136,7 +132,7 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, if (PhysReg) { const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, Def->getSimpleValueType(ResNo)); - Cost = RC->getCopyCost(); + Cost = RC->expensiveOrImpossibleToCopy() ? -1 : RC->getCopyCost(); } } @@ -490,8 +486,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { MCRegister PhysReg; int Cost = 1; // Determine if this is a physical register dependency. - const TargetLowering &TLI = DAG->getTargetLoweringInfo(); - CheckForPhysRegDependency(OpN, N, i, TRI, TII, TLI, PhysReg, Cost); + CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost); assert((!PhysReg || !isChain) && "Chain dependence via physreg data?"); // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler // emits a copy from the physical register to a virtual register unless diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 95f53fe..6ea2e27 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -12698,6 +12698,45 @@ unsigned SelectionDAG::AssignTopologicalOrder() { return DAGSize; } +void SelectionDAG::getTopologicallyOrderedNodes( + SmallVectorImpl<const SDNode *> &SortedNodes) const { + SortedNodes.clear(); + // Node -> remaining number of outstanding operands. + DenseMap<const SDNode *, unsigned> RemainingOperands; + + // Put nodes without any operands into SortedNodes first. + for (const SDNode &N : allnodes()) { + checkForCycles(&N, this); + unsigned NumOperands = N.getNumOperands(); + if (NumOperands == 0) + SortedNodes.push_back(&N); + else + // Record their total number of outstanding operands. + RemainingOperands[&N] = NumOperands; + } + + // A node is pushed into SortedNodes when all of its operands (predecessors in + // the graph) are also in SortedNodes. + for (unsigned i = 0U; i < SortedNodes.size(); ++i) { + const SDNode *N = SortedNodes[i]; + for (const SDNode *U : N->users()) { + unsigned &NumRemOperands = RemainingOperands[U]; + assert(NumRemOperands && "Invalid number of remaining operands"); + --NumRemOperands; + if (!NumRemOperands) + SortedNodes.push_back(U); + } + } + + assert(SortedNodes.size() == AllNodes.size() && "Node count mismatch"); + assert(SortedNodes.front()->getOpcode() == ISD::EntryToken && + "First node in topological sort is not the entry token"); + assert(SortedNodes.front()->getNumOperands() == 0 && + "First node in topological sort has operands"); + assert(SortedNodes.back()->use_empty() && + "Last node in topologic sort has users"); +} + /// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the /// value is produced by SD. void SelectionDAG::AddDbgValue(SDDbgValue *DB, bool isParameter) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 4b2a00c..fcfbfe6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -1061,13 +1061,24 @@ static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { N->dump(G); } -LLVM_DUMP_METHOD void SelectionDAG::dump() const { +LLVM_DUMP_METHOD void SelectionDAG::dump(bool Sorted) const { dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:\n"; - for (const SDNode &N : allnodes()) { + auto dumpEachNode = [this](const SDNode &N) { if (!N.hasOneUse() && &N != getRoot().getNode() && (!shouldPrintInline(N, this) || N.use_empty())) DumpNodes(&N, 2, this); + }; + + if (Sorted) { + SmallVector<const SDNode *> SortedNodes; + SortedNodes.reserve(AllNodes.size()); + getTopologicallyOrderedNodes(SortedNodes); + for (const SDNode *N : SortedNodes) + dumpEachNode(*N); + } else { + for (const SDNode &N : allnodes()) + dumpEachNode(N); } if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index e61558c..c35f29d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -144,6 +144,11 @@ UseMBPI("use-mbpi", cl::init(true), cl::Hidden); #ifndef NDEBUG +static cl::opt<bool> + DumpSortedDAG("dump-sorted-dags", cl::Hidden, + cl::desc("Print DAGs with sorted nodes in debug dump"), + cl::init(false)); + static cl::opt<std::string> FilterDAGBasicBlockName("filter-view-dags", cl::Hidden, cl::desc("Only display the basic block whose name " @@ -932,7 +937,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { ISEL_DUMP(dbgs() << "\nInitial selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; - CurDAG->dump()); + CurDAG->dump(DumpSortedDAG)); #if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS if (TTI->hasBranchDivergence()) @@ -952,7 +957,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { ISEL_DUMP(dbgs() << "\nOptimized lowered selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; - CurDAG->dump()); + CurDAG->dump(DumpSortedDAG)); #if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS if (TTI->hasBranchDivergence()) @@ -974,7 +979,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { ISEL_DUMP(dbgs() << "\nType-legalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; - CurDAG->dump()); + CurDAG->dump(DumpSortedDAG)); #if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS if (TTI->hasBranchDivergence()) @@ -998,7 +1003,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { ISEL_DUMP(dbgs() << "\nOptimized type-legalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; - CurDAG->dump()); + CurDAG->dump(DumpSortedDAG)); #if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS if (TTI->hasBranchDivergence()) @@ -1016,7 +1021,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { ISEL_DUMP(dbgs() << "\nVector-legalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; - CurDAG->dump()); + CurDAG->dump(DumpSortedDAG)); #if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS if (TTI->hasBranchDivergence()) @@ -1032,7 +1037,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { ISEL_DUMP(dbgs() << "\nVector/type-legalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; - CurDAG->dump()); + CurDAG->dump(DumpSortedDAG)); #if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS if (TTI->hasBranchDivergence()) @@ -1052,7 +1057,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { ISEL_DUMP(dbgs() << "\nOptimized vector-legalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; - CurDAG->dump()); + CurDAG->dump(DumpSortedDAG)); #if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS if (TTI->hasBranchDivergence()) @@ -1072,7 +1077,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { ISEL_DUMP(dbgs() << "\nLegalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; - CurDAG->dump()); + CurDAG->dump(DumpSortedDAG)); #if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS if (TTI->hasBranchDivergence()) @@ -1092,7 +1097,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { ISEL_DUMP(dbgs() << "\nOptimized legalized selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; - CurDAG->dump()); + CurDAG->dump(DumpSortedDAG)); #if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS if (TTI->hasBranchDivergence()) @@ -1116,7 +1121,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { ISEL_DUMP(dbgs() << "\nSelected selection DAG: " << printMBBReference(*FuncInfo->MBB) << " '" << BlockName << "'\n"; - CurDAG->dump()); + CurDAG->dump(DumpSortedDAG)); if (ViewSchedDAGs && MatchFilterBB) CurDAG->viewGraph("scheduler input for " + BlockName); diff --git a/llvm/lib/IR/Assumptions.cpp b/llvm/lib/IR/Assumptions.cpp index f8bbcb3..3397f0e 100644 --- a/llvm/lib/IR/Assumptions.cpp +++ b/llvm/lib/IR/Assumptions.cpp @@ -20,9 +20,8 @@ using namespace llvm; -namespace { -bool hasAssumption(const Attribute &A, - const KnownAssumptionString &AssumptionStr) { +static bool hasAssumption(const Attribute &A, + const KnownAssumptionString &AssumptionStr) { if (!A.isValid()) return false; assert(A.isStringAttribute() && "Expected a string attribute!"); @@ -33,7 +32,7 @@ bool hasAssumption(const Attribute &A, return llvm::is_contained(Strings, AssumptionStr); } -DenseSet<StringRef> getAssumptions(const Attribute &A) { +static DenseSet<StringRef> getAssumptions(const Attribute &A) { if (!A.isValid()) return DenseSet<StringRef>(); assert(A.isStringAttribute() && "Expected a string attribute!"); @@ -47,8 +46,8 @@ DenseSet<StringRef> getAssumptions(const Attribute &A) { } template <typename AttrSite> -bool addAssumptionsImpl(AttrSite &Site, - const DenseSet<StringRef> &Assumptions) { +static bool addAssumptionsImpl(AttrSite &Site, + const DenseSet<StringRef> &Assumptions) { if (Assumptions.empty()) return false; @@ -64,7 +63,6 @@ bool addAssumptionsImpl(AttrSite &Site, return true; } -} // namespace bool llvm::hasAssumption(const Function &F, const KnownAssumptionString &AssumptionStr) { diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp index 1ededb9e7..77d044b 100644 --- a/llvm/lib/IR/DebugInfoMetadata.cpp +++ b/llvm/lib/IR/DebugInfoMetadata.cpp @@ -1768,6 +1768,7 @@ bool DIExpression::isValid() const { case dwarf::DW_OP_bregx: case dwarf::DW_OP_push_object_address: case dwarf::DW_OP_over: + case dwarf::DW_OP_rot: case dwarf::DW_OP_consts: case dwarf::DW_OP_eq: case dwarf::DW_OP_ne: @@ -1775,6 +1776,8 @@ bool DIExpression::isValid() const { case dwarf::DW_OP_ge: case dwarf::DW_OP_lt: case dwarf::DW_OP_le: + case dwarf::DW_OP_neg: + case dwarf::DW_OP_abs: break; } } diff --git a/llvm/lib/IR/DiagnosticHandler.cpp b/llvm/lib/IR/DiagnosticHandler.cpp index 683eade..eb2fe3b 100644 --- a/llvm/lib/IR/DiagnosticHandler.cpp +++ b/llvm/lib/IR/DiagnosticHandler.cpp @@ -36,6 +36,7 @@ struct PassRemarksOpt { } } }; +} // namespace static PassRemarksOpt PassRemarksPassedOptLoc; static PassRemarksOpt PassRemarksMissedOptLoc; @@ -66,7 +67,6 @@ static cl::opt<PassRemarksOpt, true, cl::parser<std::string>> "Enable optimization analysis remarks from passes whose name match " "the given regular expression"), cl::Hidden, cl::location(PassRemarksAnalysisOptLoc), cl::ValueRequired); -} bool DiagnosticHandler::isAnalysisRemarkEnabled(StringRef PassName) const { return (PassRemarksAnalysisOptLoc.Pattern && diff --git a/llvm/lib/IR/ModuleSummaryIndex.cpp b/llvm/lib/IR/ModuleSummaryIndex.cpp index d9024b0..dc55b63 100644 --- a/llvm/lib/IR/ModuleSummaryIndex.cpp +++ b/llvm/lib/IR/ModuleSummaryIndex.cpp @@ -409,7 +409,7 @@ struct Edge { GlobalValue::GUID Src; GlobalValue::GUID Dst; }; -} +} // namespace void Attributes::add(const Twine &Name, const Twine &Value, const Twine &Comment) { diff --git a/llvm/lib/IR/PassInstrumentation.cpp b/llvm/lib/IR/PassInstrumentation.cpp index 70bbe8f..52aad8f 100644 --- a/llvm/lib/IR/PassInstrumentation.cpp +++ b/llvm/lib/IR/PassInstrumentation.cpp @@ -15,7 +15,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/IR/PassManager.h" -namespace llvm { +using namespace llvm; template struct LLVM_EXPORT_TEMPLATE Any::TypeId<const Module *>; template struct LLVM_EXPORT_TEMPLATE Any::TypeId<const Function *>; @@ -42,7 +42,8 @@ PassInstrumentationCallbacks::getPassNameForClassName(StringRef ClassName) { AnalysisKey PassInstrumentationAnalysis::Key; -bool isSpecialPass(StringRef PassID, const std::vector<StringRef> &Specials) { +bool llvm::isSpecialPass(StringRef PassID, + const std::vector<StringRef> &Specials) { size_t Pos = PassID.find('<'); StringRef Prefix = PassID; if (Pos != StringRef::npos) @@ -50,5 +51,3 @@ bool isSpecialPass(StringRef PassID, const std::vector<StringRef> &Specials) { return any_of(Specials, [Prefix](StringRef S) { return Prefix.ends_with(S); }); } - -} // namespace llvm diff --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp index edeca97..fc2be51 100644 --- a/llvm/lib/IR/ProfDataUtils.cpp +++ b/llvm/lib/IR/ProfDataUtils.cpp @@ -24,8 +24,6 @@ using namespace llvm; -namespace { - // MD_prof nodes have the following layout // // In general: @@ -41,14 +39,15 @@ namespace { // correctly, and can change the behavior in the future if the layout changes // the minimum number of operands for MD_prof nodes with branch weights -constexpr unsigned MinBWOps = 3; +static constexpr unsigned MinBWOps = 3; // the minimum number of operands for MD_prof nodes with value profiles -constexpr unsigned MinVPOps = 5; +static constexpr unsigned MinVPOps = 5; // We may want to add support for other MD_prof types, so provide an abstraction // for checking the metadata type. -bool isTargetMD(const MDNode *ProfData, const char *Name, unsigned MinOps) { +static bool isTargetMD(const MDNode *ProfData, const char *Name, + unsigned MinOps) { // TODO: This routine may be simplified if MD_prof used an enum instead of a // string to differentiate the types of MD_prof nodes. if (!ProfData || !Name || MinOps < 2) @@ -101,14 +100,11 @@ static SmallVector<uint32_t> fitWeights(ArrayRef<uint64_t> Weights) { return Ret; } -} // namespace - -namespace llvm { -cl::opt<bool> ElideAllZeroBranchWeights("elide-all-zero-branch-weights", +static cl::opt<bool> ElideAllZeroBranchWeights("elide-all-zero-branch-weights", #if defined(LLVM_ENABLE_PROFCHECK) - cl::init(false) + cl::init(false) #else - cl::init(true) + cl::init(true) #endif ); const char *MDProfLabels::BranchWeights = "branch_weights"; @@ -118,21 +114,21 @@ const char *MDProfLabels::FunctionEntryCount = "function_entry_count"; const char *MDProfLabels::SyntheticFunctionEntryCount = "synthetic_function_entry_count"; const char *MDProfLabels::UnknownBranchWeightsMarker = "unknown"; -const char *LLVMLoopEstimatedTripCount = "llvm.loop.estimated_trip_count"; +const char *llvm::LLVMLoopEstimatedTripCount = "llvm.loop.estimated_trip_count"; -bool hasProfMD(const Instruction &I) { +bool llvm::hasProfMD(const Instruction &I) { return I.hasMetadata(LLVMContext::MD_prof); } -bool isBranchWeightMD(const MDNode *ProfileData) { +bool llvm::isBranchWeightMD(const MDNode *ProfileData) { return isTargetMD(ProfileData, MDProfLabels::BranchWeights, MinBWOps); } -bool isValueProfileMD(const MDNode *ProfileData) { +bool llvm::isValueProfileMD(const MDNode *ProfileData) { return isTargetMD(ProfileData, MDProfLabels::ValueProfile, MinVPOps); } -bool hasBranchWeightMD(const Instruction &I) { +bool llvm::hasBranchWeightMD(const Instruction &I) { auto *ProfileData = I.getMetadata(LLVMContext::MD_prof); return isBranchWeightMD(ProfileData); } @@ -147,16 +143,16 @@ static bool hasCountTypeMD(const Instruction &I) { return isa<CallBase>(I) && !isBranchWeightMD(ProfileData); } -bool hasValidBranchWeightMD(const Instruction &I) { +bool llvm::hasValidBranchWeightMD(const Instruction &I) { return getValidBranchWeightMDNode(I); } -bool hasBranchWeightOrigin(const Instruction &I) { +bool llvm::hasBranchWeightOrigin(const Instruction &I) { auto *ProfileData = I.getMetadata(LLVMContext::MD_prof); return hasBranchWeightOrigin(ProfileData); } -bool hasBranchWeightOrigin(const MDNode *ProfileData) { +bool llvm::hasBranchWeightOrigin(const MDNode *ProfileData) { if (!isBranchWeightMD(ProfileData)) return false; auto *ProfDataName = dyn_cast<MDString>(ProfileData->getOperand(1)); @@ -168,54 +164,54 @@ bool hasBranchWeightOrigin(const MDNode *ProfileData) { return ProfDataName != nullptr; } -unsigned getBranchWeightOffset(const MDNode *ProfileData) { +unsigned llvm::getBranchWeightOffset(const MDNode *ProfileData) { return hasBranchWeightOrigin(ProfileData) ? 2 : 1; } -unsigned getNumBranchWeights(const MDNode &ProfileData) { +unsigned llvm::getNumBranchWeights(const MDNode &ProfileData) { return ProfileData.getNumOperands() - getBranchWeightOffset(&ProfileData); } -MDNode *getBranchWeightMDNode(const Instruction &I) { +MDNode *llvm::getBranchWeightMDNode(const Instruction &I) { auto *ProfileData = I.getMetadata(LLVMContext::MD_prof); if (!isBranchWeightMD(ProfileData)) return nullptr; return ProfileData; } -MDNode *getValidBranchWeightMDNode(const Instruction &I) { +MDNode *llvm::getValidBranchWeightMDNode(const Instruction &I) { auto *ProfileData = getBranchWeightMDNode(I); if (ProfileData && getNumBranchWeights(*ProfileData) == I.getNumSuccessors()) return ProfileData; return nullptr; } -void extractFromBranchWeightMD32(const MDNode *ProfileData, - SmallVectorImpl<uint32_t> &Weights) { +void llvm::extractFromBranchWeightMD32(const MDNode *ProfileData, + SmallVectorImpl<uint32_t> &Weights) { extractFromBranchWeightMD(ProfileData, Weights); } -void extractFromBranchWeightMD64(const MDNode *ProfileData, - SmallVectorImpl<uint64_t> &Weights) { +void llvm::extractFromBranchWeightMD64(const MDNode *ProfileData, + SmallVectorImpl<uint64_t> &Weights) { extractFromBranchWeightMD(ProfileData, Weights); } -bool extractBranchWeights(const MDNode *ProfileData, - SmallVectorImpl<uint32_t> &Weights) { +bool llvm::extractBranchWeights(const MDNode *ProfileData, + SmallVectorImpl<uint32_t> &Weights) { if (!isBranchWeightMD(ProfileData)) return false; extractFromBranchWeightMD(ProfileData, Weights); return true; } -bool extractBranchWeights(const Instruction &I, - SmallVectorImpl<uint32_t> &Weights) { +bool llvm::extractBranchWeights(const Instruction &I, + SmallVectorImpl<uint32_t> &Weights) { auto *ProfileData = I.getMetadata(LLVMContext::MD_prof); return extractBranchWeights(ProfileData, Weights); } -bool extractBranchWeights(const Instruction &I, uint64_t &TrueVal, - uint64_t &FalseVal) { +bool llvm::extractBranchWeights(const Instruction &I, uint64_t &TrueVal, + uint64_t &FalseVal) { assert((I.getOpcode() == Instruction::Br || I.getOpcode() == Instruction::Select) && "Looking for branch weights on something besides branch, select, or " @@ -234,7 +230,8 @@ bool extractBranchWeights(const Instruction &I, uint64_t &TrueVal, return true; } -bool extractProfTotalWeight(const MDNode *ProfileData, uint64_t &TotalVal) { +bool llvm::extractProfTotalWeight(const MDNode *ProfileData, + uint64_t &TotalVal) { TotalVal = 0; if (!ProfileData) return false; @@ -262,11 +259,12 @@ bool extractProfTotalWeight(const MDNode *ProfileData, uint64_t &TotalVal) { return false; } -bool extractProfTotalWeight(const Instruction &I, uint64_t &TotalVal) { +bool llvm::extractProfTotalWeight(const Instruction &I, uint64_t &TotalVal) { return extractProfTotalWeight(I.getMetadata(LLVMContext::MD_prof), TotalVal); } -void setExplicitlyUnknownBranchWeights(Instruction &I, StringRef PassName) { +void llvm::setExplicitlyUnknownBranchWeights(Instruction &I, + StringRef PassName) { MDBuilder MDB(I.getContext()); I.setMetadata( LLVMContext::MD_prof, @@ -275,14 +273,16 @@ void setExplicitlyUnknownBranchWeights(Instruction &I, StringRef PassName) { MDB.createString(PassName)})); } -void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, Function &F, - StringRef PassName) { +void llvm::setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, + Function &F, + StringRef PassName) { if (std::optional<Function::ProfileCount> EC = F.getEntryCount(); EC && EC->getCount() > 0) setExplicitlyUnknownBranchWeights(I, PassName); } -void setExplicitlyUnknownFunctionEntryCount(Function &F, StringRef PassName) { +void llvm::setExplicitlyUnknownFunctionEntryCount(Function &F, + StringRef PassName) { MDBuilder MDB(F.getContext()); F.setMetadata( LLVMContext::MD_prof, @@ -291,21 +291,21 @@ void setExplicitlyUnknownFunctionEntryCount(Function &F, StringRef PassName) { MDB.createString(PassName)})); } -bool isExplicitlyUnknownProfileMetadata(const MDNode &MD) { +bool llvm::isExplicitlyUnknownProfileMetadata(const MDNode &MD) { if (MD.getNumOperands() != 2) return false; return MD.getOperand(0).equalsStr(MDProfLabels::UnknownBranchWeightsMarker); } -bool hasExplicitlyUnknownBranchWeights(const Instruction &I) { +bool llvm::hasExplicitlyUnknownBranchWeights(const Instruction &I) { auto *MD = I.getMetadata(LLVMContext::MD_prof); if (!MD) return false; return isExplicitlyUnknownProfileMetadata(*MD); } -void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights, - bool IsExpected, bool ElideAllZero) { +void llvm::setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights, + bool IsExpected, bool ElideAllZero) { if ((ElideAllZeroBranchWeights && ElideAllZero) && llvm::all_of(Weights, [](uint32_t V) { return V == 0; })) { I.setMetadata(LLVMContext::MD_prof, nullptr); @@ -317,13 +317,14 @@ void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights, I.setMetadata(LLVMContext::MD_prof, BranchWeights); } -void setFittedBranchWeights(Instruction &I, ArrayRef<uint64_t> Weights, - bool IsExpected, bool ElideAllZero) { +void llvm::setFittedBranchWeights(Instruction &I, ArrayRef<uint64_t> Weights, + bool IsExpected, bool ElideAllZero) { setBranchWeights(I, fitWeights(Weights), IsExpected, ElideAllZero); } -SmallVector<uint32_t> downscaleWeights(ArrayRef<uint64_t> Weights, - std::optional<uint64_t> KnownMaxCount) { +SmallVector<uint32_t> +llvm::downscaleWeights(ArrayRef<uint64_t> Weights, + std::optional<uint64_t> KnownMaxCount) { uint64_t MaxCount = KnownMaxCount.has_value() ? KnownMaxCount.value() : *llvm::max_element(Weights); assert(MaxCount > 0 && "Bad max count"); @@ -334,7 +335,7 @@ SmallVector<uint32_t> downscaleWeights(ArrayRef<uint64_t> Weights, return DownscaledWeights; } -void scaleProfData(Instruction &I, uint64_t S, uint64_t T) { +void llvm::scaleProfData(Instruction &I, uint64_t S, uint64_t T) { assert(T != 0 && "Caller should guarantee"); auto *ProfileData = I.getMetadata(LLVMContext::MD_prof); if (ProfileData == nullptr) @@ -387,5 +388,3 @@ void scaleProfData(Instruction &I, uint64_t S, uint64_t T) { } I.setMetadata(LLVMContext::MD_prof, MDNode::get(C, Vals)); } - -} // namespace llvm diff --git a/llvm/lib/IR/SafepointIRVerifier.cpp b/llvm/lib/IR/SafepointIRVerifier.cpp index e54894c..e35b5b3 100644 --- a/llvm/lib/IR/SafepointIRVerifier.cpp +++ b/llvm/lib/IR/SafepointIRVerifier.cpp @@ -196,7 +196,6 @@ protected: static void Verify(const Function &F, const DominatorTree &DT, const CFGDeadness &CD); -namespace llvm { PreservedAnalyses SafepointIRVerifierPass::run(Function &F, FunctionAnalysisManager &AM) { const auto &DT = AM.getResult<DominatorTreeAnalysis>(F); @@ -205,7 +204,6 @@ PreservedAnalyses SafepointIRVerifierPass::run(Function &F, Verify(F, DT, CD); return PreservedAnalyses::all(); } -} // namespace llvm namespace { diff --git a/llvm/lib/IR/VFABIDemangler.cpp b/llvm/lib/IR/VFABIDemangler.cpp index 2de05a5..4fcf436 100644 --- a/llvm/lib/IR/VFABIDemangler.cpp +++ b/llvm/lib/IR/VFABIDemangler.cpp @@ -20,15 +20,16 @@ using namespace llvm; #define DEBUG_TYPE "vfabi-demangler" -namespace { /// Utilities for the Vector Function ABI name parser. +namespace { /// Return types for the parser functions. enum class ParseRet { OK, // Found. None, // Not found. Error // Syntax error. }; +} // namespace /// Extracts the `<isa>` information from the mangled string, and /// sets the `ISA` accordingly. If successful, the <isa> token is removed @@ -372,7 +373,6 @@ getScalableECFromSignature(const FunctionType *Signature, const VFISAKind ISA, return std::nullopt; } -} // namespace // Format of the ABI name: // _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)] diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp index a347609..b775cbb 100644 --- a/llvm/lib/IR/Value.cpp +++ b/llvm/lib/IR/Value.cpp @@ -622,6 +622,7 @@ enum PointerStripKind { PSK_InBoundsConstantIndices, PSK_InBounds }; +} // end anonymous namespace template <PointerStripKind StripKind> static void NoopCallback(const Value *) {} @@ -696,7 +697,6 @@ static const Value *stripPointerCastsAndOffsets( return V; } -} // end anonymous namespace const Value *Value::stripPointerCasts() const { return stripPointerCastsAndOffsets<PSK_ZeroIndices>(this); diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt index 7da972f..42b21b5 100644 --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -207,6 +207,7 @@ add_llvm_component_library(LLVMSupport InstructionCost.cpp IntEqClasses.cpp IntervalMap.cpp + Jobserver.cpp JSON.cpp KnownBits.cpp KnownFPClass.cpp diff --git a/llvm/lib/Support/Jobserver.cpp b/llvm/lib/Support/Jobserver.cpp new file mode 100644 index 0000000..9f726eb --- /dev/null +++ b/llvm/lib/Support/Jobserver.cpp @@ -0,0 +1,259 @@ +//===- llvm/Support/Jobserver.cpp - Jobserver Client Implementation -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Jobserver.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/raw_ostream.h" + +#include <atomic> +#include <memory> +#include <mutex> +#include <new> + +#define DEBUG_TYPE "jobserver" + +using namespace llvm; + +namespace { +struct FdPair { + int Read = -1; + int Write = -1; + bool isValid() const { return Read >= 0 && Write >= 0; } +}; + +struct JobserverConfig { + enum Mode { + None, + PosixFifo, + PosixPipe, + Win32Semaphore, + }; + Mode TheMode = None; + std::string Path; + FdPair PipeFDs; +}; + +/// A helper function that checks if `Input` starts with `Prefix`. +/// If it does, it removes the prefix from `Input`, assigns the remainder to +/// `Value`, and returns true. Otherwise, it returns false. +bool getPrefixedValue(StringRef Input, StringRef Prefix, StringRef &Value) { + if (Input.consume_front(Prefix)) { + Value = Input; + return true; + } + return false; +} + +/// A helper function to parse a string in the format "R,W" where R and W are +/// non-negative integers representing file descriptors. It populates the +/// `ReadFD` and `WriteFD` output parameters. Returns true on success. +static std::optional<FdPair> getFileDescriptorPair(StringRef Input) { + FdPair FDs; + if (Input.consumeInteger(10, FDs.Read)) + return std::nullopt; + if (!Input.consume_front(",")) + return std::nullopt; + if (Input.consumeInteger(10, FDs.Write)) + return std::nullopt; + if (!Input.empty() || !FDs.isValid()) + return std::nullopt; + return FDs; +} + +/// Parses the `MAKEFLAGS` environment variable string to find jobserver +/// arguments. It splits the string into space-separated arguments and searches +/// for `--jobserver-auth` or `--jobserver-fds`. Based on the value of these +/// arguments, it determines the jobserver mode (Pipe, FIFO, or Semaphore) and +/// connection details (file descriptors or path). +Expected<JobserverConfig> parseNativeMakeFlags(StringRef MakeFlags) { + JobserverConfig Config; + if (MakeFlags.empty()) + return Config; + + // Split the MAKEFLAGS string into arguments. + SmallVector<StringRef, 8> Args; + SplitString(MakeFlags, Args); + + // If '-n' (dry-run) is present as a legacy flag (not starting with '-'), + // disable the jobserver. + if (!Args.empty() && !Args[0].starts_with("-") && Args[0].contains('n')) + return Config; + + // Iterate through arguments to find jobserver flags. + // Note that make may pass multiple --jobserver-auth flags; the last one wins. + for (StringRef Arg : Args) { + StringRef Value; + if (getPrefixedValue(Arg, "--jobserver-auth=", Value)) { + // Try to parse as a file descriptor pair first. + if (auto FDPair = getFileDescriptorPair(Value)) { + Config.TheMode = JobserverConfig::PosixPipe; + Config.PipeFDs = *FDPair; + } else { + StringRef FifoPath; + // If not FDs, try to parse as a named pipe (fifo). + if (getPrefixedValue(Value, "fifo:", FifoPath)) { + Config.TheMode = JobserverConfig::PosixFifo; + Config.Path = FifoPath.str(); + } else { + // Otherwise, assume it's a Windows semaphore. + Config.TheMode = JobserverConfig::Win32Semaphore; + Config.Path = Value.str(); + } + } + } else if (getPrefixedValue(Arg, "--jobserver-fds=", Value)) { + // This is an alternative, older syntax for the pipe-based server. + if (auto FDPair = getFileDescriptorPair(Value)) { + Config.TheMode = JobserverConfig::PosixPipe; + Config.PipeFDs = *FDPair; + } else { + return createStringError(inconvertibleErrorCode(), + "Invalid file descriptor pair in MAKEFLAGS"); + } + } + } + +// Perform platform-specific validation. +#ifdef _WIN32 + if (Config.TheMode == JobserverConfig::PosixFifo || + Config.TheMode == JobserverConfig::PosixPipe) + return createStringError( + inconvertibleErrorCode(), + "FIFO/Pipe-based jobserver is not supported on Windows"); +#else + if (Config.TheMode == JobserverConfig::Win32Semaphore) + return createStringError( + inconvertibleErrorCode(), + "Semaphore-based jobserver is not supported on this platform"); +#endif + return Config; +} + +std::once_flag GJobserverOnceFlag; +JobserverClient *GJobserver = nullptr; + +} // namespace + +namespace llvm { +class JobserverClientImpl : public JobserverClient { + bool IsInitialized = false; + std::atomic<bool> HasImplicitSlot{true}; + unsigned NumJobs = 0; + +public: + JobserverClientImpl(const JobserverConfig &Config); + ~JobserverClientImpl() override; + + JobSlot tryAcquire() override; + void release(JobSlot Slot) override; + unsigned getNumJobs() const override { return NumJobs; } + + bool isValid() const { return IsInitialized; } + +private: +#if defined(LLVM_ON_UNIX) + int ReadFD = -1; + int WriteFD = -1; + std::string FifoPath; +#elif defined(_WIN32) + void *Semaphore = nullptr; +#endif +}; +} // namespace llvm + +// Include the platform-specific parts of the class. +#if defined(LLVM_ON_UNIX) +#include "Unix/Jobserver.inc" +#elif defined(_WIN32) +#include "Windows/Jobserver.inc" +#else +// Dummy implementation for unsupported platforms. +JobserverClientImpl::JobserverClientImpl(const JobserverConfig &Config) {} +JobserverClientImpl::~JobserverClientImpl() = default; +JobSlot JobserverClientImpl::tryAcquire() { return JobSlot(); } +void JobserverClientImpl::release(JobSlot Slot) {} +#endif + +namespace llvm { +JobserverClient::~JobserverClient() = default; + +uint8_t JobSlot::getExplicitValue() const { + assert(isExplicit() && "Cannot get value of implicit or invalid slot"); + return static_cast<uint8_t>(Value); +} + +/// This is the main entry point for acquiring a jobserver client. It uses a +/// std::call_once to ensure the singleton `GJobserver` instance is created +/// safely in a multi-threaded environment. On first call, it reads the +/// `MAKEFLAGS` environment variable, parses it, and attempts to construct and +/// initialize a `JobserverClientImpl`. If successful, the global instance is +/// stored in `GJobserver`. Subsequent calls will return the existing instance. +JobserverClient *JobserverClient::getInstance() { + std::call_once(GJobserverOnceFlag, []() { + LLVM_DEBUG( + dbgs() + << "JobserverClient::getInstance() called for the first time.\n"); + const char *MakeFlagsEnv = getenv("MAKEFLAGS"); + if (!MakeFlagsEnv) { + errs() << "Warning: failed to create jobserver client due to MAKEFLAGS " + "environment variable not found\n"; + return; + } + + LLVM_DEBUG(dbgs() << "Found MAKEFLAGS = \"" << MakeFlagsEnv << "\"\n"); + + auto ConfigOrErr = parseNativeMakeFlags(MakeFlagsEnv); + if (Error Err = ConfigOrErr.takeError()) { + errs() << "Warning: failed to create jobserver client due to invalid " + "MAKEFLAGS environment variable: " + << toString(std::move(Err)) << "\n"; + return; + } + + JobserverConfig Config = *ConfigOrErr; + if (Config.TheMode == JobserverConfig::None) { + errs() << "Warning: failed to create jobserver client due to jobserver " + "mode missing in MAKEFLAGS environment variable\n"; + return; + } + + if (Config.TheMode == JobserverConfig::PosixPipe) { +#if defined(LLVM_ON_UNIX) + if (!areFdsValid(Config.PipeFDs.Read, Config.PipeFDs.Write)) { + errs() << "Warning: failed to create jobserver client due to invalid " + "Pipe FDs in MAKEFLAGS environment variable\n"; + return; + } +#endif + } + + auto Client = std::make_unique<JobserverClientImpl>(Config); + if (Client->isValid()) { + LLVM_DEBUG(dbgs() << "Jobserver client created successfully!\n"); + GJobserver = Client.release(); + } else + errs() << "Warning: jobserver client initialization failed.\n"; + }); + return GJobserver; +} + +/// For testing purposes only. This function resets the singleton instance by +/// destroying the existing client and re-initializing the `std::once_flag`. +/// This allows tests to simulate the first-time initialization of the +/// jobserver client multiple times. +void JobserverClient::resetForTesting() { + delete GJobserver; + GJobserver = nullptr; + // Re-construct the std::once_flag in place to reset the singleton state. + new (&GJobserverOnceFlag) std::once_flag(); +} +} // namespace llvm diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp index 3ac6fc7..8e0c724 100644 --- a/llvm/lib/Support/Parallel.cpp +++ b/llvm/lib/Support/Parallel.cpp @@ -7,12 +7,17 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/Parallel.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/Config/llvm-config.h" +#include "llvm/Support/ExponentialBackoff.h" +#include "llvm/Support/Jobserver.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Threading.h" #include <atomic> #include <future> +#include <memory> +#include <mutex> #include <thread> #include <vector> @@ -49,6 +54,9 @@ public: class ThreadPoolExecutor : public Executor { public: explicit ThreadPoolExecutor(ThreadPoolStrategy S) { + if (S.UseJobserver) + TheJobserver = JobserverClient::getInstance(); + ThreadCount = S.compute_thread_count(); // Spawn all but one of the threads in another thread as spawning threads // can take a while. @@ -69,6 +77,10 @@ public: }); } + // To make sure the thread pool executor can only be created with a parallel + // strategy. + ThreadPoolExecutor() = delete; + void stop() { { std::lock_guard<std::mutex> Lock(Mutex); @@ -111,15 +123,62 @@ private: void work(ThreadPoolStrategy S, unsigned ThreadID) { threadIndex = ThreadID; S.apply_thread_strategy(ThreadID); + // Note on jobserver deadlock avoidance: + // GNU Make grants each invoked process one implicit job slot. Our + // JobserverClient models this by returning an implicit JobSlot on the + // first successful tryAcquire() in a process. This guarantees forward + // progress without requiring a dedicated "always-on" thread here. + + static thread_local std::unique_ptr<ExponentialBackoff> Backoff; + while (true) { - std::unique_lock<std::mutex> Lock(Mutex); - Cond.wait(Lock, [&] { return Stop || !WorkStack.empty(); }); - if (Stop) - break; - auto Task = std::move(WorkStack.back()); - WorkStack.pop_back(); - Lock.unlock(); - Task(); + if (TheJobserver) { + // Jobserver-mode scheduling: + // - Acquire one job slot (with exponential backoff to avoid busy-wait). + // - While holding the slot, drain and run tasks from the local queue. + // - Release the slot when the queue is empty or when shutting down. + // Rationale: Holding a slot amortizes acquire/release overhead over + // multiple tasks and avoids requeue/yield churn, while still enforcing + // the jobserver’s global concurrency limit. With K available slots, + // up to K workers run tasks in parallel; within each worker tasks run + // sequentially until the local queue is empty. + ExponentialBackoff Backoff(std::chrono::hours(24)); + JobSlot Slot; + do { + if (Stop) + return; + Slot = TheJobserver->tryAcquire(); + if (Slot.isValid()) + break; + } while (Backoff.waitForNextAttempt()); + + auto SlotReleaser = llvm::make_scope_exit( + [&] { TheJobserver->release(std::move(Slot)); }); + + while (true) { + std::function<void()> Task; + { + std::unique_lock<std::mutex> Lock(Mutex); + Cond.wait(Lock, [&] { return Stop || !WorkStack.empty(); }); + if (Stop && WorkStack.empty()) + return; + if (WorkStack.empty()) + break; + Task = std::move(WorkStack.back()); + WorkStack.pop_back(); + } + Task(); + } + } else { + std::unique_lock<std::mutex> Lock(Mutex); + Cond.wait(Lock, [&] { return Stop || !WorkStack.empty(); }); + if (Stop) + break; + auto Task = std::move(WorkStack.back()); + WorkStack.pop_back(); + Lock.unlock(); + Task(); + } } } @@ -130,9 +189,20 @@ private: std::promise<void> ThreadsCreated; std::vector<std::thread> Threads; unsigned ThreadCount; + + JobserverClient *TheJobserver = nullptr; }; -Executor *Executor::getDefaultExecutor() { +// A global raw pointer to the executor. Lifetime is managed by the +// objects created within createExecutor(). +static Executor *TheExec = nullptr; +static std::once_flag Flag; + +// This function will be called exactly once to create the executor. +// It contains the necessary platform-specific logic. Since functions +// called by std::call_once cannot return value, we have to set the +// executor as a global variable. +void createExecutor() { #ifdef _WIN32 // The ManagedStatic enables the ThreadPoolExecutor to be stopped via // llvm_shutdown() which allows a "clean" fast exit, e.g. via _exit(). This @@ -156,16 +226,22 @@ Executor *Executor::getDefaultExecutor() { ThreadPoolExecutor::Deleter> ManagedExec; static std::unique_ptr<ThreadPoolExecutor> Exec(&(*ManagedExec)); - return Exec.get(); + TheExec = Exec.get(); #else // ManagedStatic is not desired on other platforms. When `Exec` is destroyed // by llvm_shutdown(), worker threads will clean up and invoke TLS // destructors. This can lead to race conditions if other threads attempt to // access TLS objects that have already been destroyed. static ThreadPoolExecutor Exec(strategy); - return &Exec; + TheExec = &Exec; #endif } + +Executor *Executor::getDefaultExecutor() { + // Use std::call_once to lazily and safely initialize the executor. + std::call_once(Flag, createExecutor); + return TheExec; +} } // namespace } // namespace detail diff --git a/llvm/lib/Support/ThreadPool.cpp b/llvm/lib/Support/ThreadPool.cpp index c304f0f..6960268 100644 --- a/llvm/lib/Support/ThreadPool.cpp +++ b/llvm/lib/Support/ThreadPool.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// // +// // This file implements a crude C++11 based thread pool. // //===----------------------------------------------------------------------===// @@ -14,6 +15,8 @@ #include "llvm/Config/llvm-config.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/Support/ExponentialBackoff.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Threading.h" #include "llvm/Support/raw_ostream.h" @@ -33,7 +36,10 @@ ThreadPoolInterface::~ThreadPoolInterface() = default; #if LLVM_ENABLE_THREADS StdThreadPool::StdThreadPool(ThreadPoolStrategy S) - : Strategy(S), MaxThreadCount(S.compute_thread_count()) {} + : Strategy(S), MaxThreadCount(S.compute_thread_count()) { + if (Strategy.UseJobserver) + TheJobserver = JobserverClient::getInstance(); +} void StdThreadPool::grow(int requested) { llvm::sys::ScopedWriter LockGuard(ThreadsLock); @@ -45,7 +51,15 @@ void StdThreadPool::grow(int requested) { Threads.emplace_back([this, ThreadID] { set_thread_name(formatv("llvm-worker-{0}", ThreadID)); Strategy.apply_thread_strategy(ThreadID); - processTasks(nullptr); + // Note on jobserver deadlock avoidance: + // GNU Make grants each invoked process one implicit job slot. + // JobserverClient::tryAcquire() returns that implicit slot on the first + // successful call in a process, ensuring forward progress without a + // dedicated "always-on" thread. + if (TheJobserver) + processTasksWithJobserver(); + else + processTasks(nullptr); }); } } @@ -133,6 +147,96 @@ void StdThreadPool::processTasks(ThreadPoolTaskGroup *WaitingForGroup) { } } +/// Main loop for worker threads when using a jobserver. +/// This function uses a two-level queue; it first acquires a job slot from the +/// external jobserver, then retrieves a task from the internal queue. +/// This allows the thread pool to cooperate with build systems like `make -j`. +void StdThreadPool::processTasksWithJobserver() { + while (true) { + // Acquire a job slot from the external jobserver. + // This polls for a slot and yields the thread to avoid a high-CPU wait. + JobSlot Slot; + // The timeout for the backoff can be very long, as the shutdown + // is checked on each iteration. The sleep duration is capped by MaxWait + // in ExponentialBackoff, so shutdown latency is not a problem. + ExponentialBackoff Backoff(std::chrono::hours(24)); + bool AcquiredToken = false; + do { + // Return if the thread pool is shutting down. + { + std::unique_lock<std::mutex> LockGuard(QueueLock); + if (!EnableFlag) + return; + } + + Slot = TheJobserver->tryAcquire(); + if (Slot.isValid()) { + AcquiredToken = true; + break; + } + } while (Backoff.waitForNextAttempt()); + + if (!AcquiredToken) { + // This is practically unreachable with a 24h timeout and indicates a + // deeper problem if hit. + report_fatal_error("Timed out waiting for jobserver token."); + } + + // `make_scope_exit` guarantees the job slot is released, even if the + // task throws or we exit early. This prevents deadlocking the build. + auto SlotReleaser = + make_scope_exit([&] { TheJobserver->release(std::move(Slot)); }); + + // While we hold a job slot, process tasks from the internal queue. + while (true) { + std::function<void()> Task; + ThreadPoolTaskGroup *GroupOfTask = nullptr; + + { + std::unique_lock<std::mutex> LockGuard(QueueLock); + + // Wait until a task is available or the pool is shutting down. + QueueCondition.wait(LockGuard, + [&] { return !EnableFlag || !Tasks.empty(); }); + + // If shutting down and the queue is empty, the thread can terminate. + if (!EnableFlag && Tasks.empty()) + return; + + // If the queue is empty, we're done processing tasks for now. + // Break the inner loop to release the job slot. + if (Tasks.empty()) + break; + + // A task is available. Mark it as active before releasing the lock + // to prevent race conditions with `wait()`. + ++ActiveThreads; + Task = std::move(Tasks.front().first); + GroupOfTask = Tasks.front().second; + if (GroupOfTask != nullptr) + ++ActiveGroups[GroupOfTask]; + Tasks.pop_front(); + } // The queue lock is released. + + // Run the task. The job slot remains acquired during execution. + Task(); + + // The task has finished. Update the active count and notify any waiters. + { + std::lock_guard<std::mutex> LockGuard(QueueLock); + --ActiveThreads; + if (GroupOfTask != nullptr) { + auto A = ActiveGroups.find(GroupOfTask); + if (--(A->second) == 0) + ActiveGroups.erase(A); + } + // If all tasks are complete, notify any waiting threads. + if (workCompletedUnlocked(nullptr)) + CompletionCondition.notify_all(); + } + } + } +} bool StdThreadPool::workCompletedUnlocked(ThreadPoolTaskGroup *Group) const { if (Group == nullptr) return !ActiveThreads && Tasks.empty(); diff --git a/llvm/lib/Support/Threading.cpp b/llvm/lib/Support/Threading.cpp index 693de0e..9da357a 100644 --- a/llvm/lib/Support/Threading.cpp +++ b/llvm/lib/Support/Threading.cpp @@ -14,6 +14,7 @@ #include "llvm/Support/Threading.h" #include "llvm/Config/config.h" #include "llvm/Config/llvm-config.h" +#include "llvm/Support/Jobserver.h" #include <cassert> #include <optional> @@ -51,6 +52,10 @@ int llvm::get_physical_cores() { return -1; } static int computeHostNumHardwareThreads(); unsigned llvm::ThreadPoolStrategy::compute_thread_count() const { + if (UseJobserver) + if (auto JS = JobserverClient::getInstance()) + return JS->getNumJobs(); + int MaxThreadCount = UseHyperThreads ? computeHostNumHardwareThreads() : get_physical_cores(); if (MaxThreadCount <= 0) diff --git a/llvm/lib/Support/Unix/Jobserver.inc b/llvm/lib/Support/Unix/Jobserver.inc new file mode 100644 index 0000000..53bf7f2 --- /dev/null +++ b/llvm/lib/Support/Unix/Jobserver.inc @@ -0,0 +1,195 @@ +//===- llvm/Support/Unix/Jobserver.inc - Unix Jobserver Impl ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the UNIX-specific parts of the JobserverClient class. +// +//===----------------------------------------------------------------------===// + +#include <atomic> +#include <cassert> +#include <cerrno> +#include <fcntl.h> +#include <string.h> +#include <sys/stat.h> +#include <unistd.h> + +namespace { +/// Returns true if the given file descriptor is a FIFO (named pipe). +bool isFifo(int FD) { + struct stat StatBuf; + if (::fstat(FD, &StatBuf) != 0) + return false; + return S_ISFIFO(StatBuf.st_mode); +} + +/// Returns true if the given file descriptors are valid. +bool areFdsValid(int ReadFD, int WriteFD) { + if (ReadFD == -1 || WriteFD == -1) + return false; + // Check if the file descriptors are actually valid by checking their flags. + return ::fcntl(ReadFD, F_GETFD) != -1 && ::fcntl(WriteFD, F_GETFD) != -1; +} +} // namespace + +/// The constructor sets up the client based on the provided configuration. +/// For pipe-based jobservers, it duplicates the inherited file descriptors, +/// sets them to close-on-exec, and makes the read descriptor non-blocking. +/// For FIFO-based jobservers, it opens the named pipe. After setup, it drains +/// all available tokens from the jobserver to determine the total number of +/// available jobs (`NumJobs`), then immediately releases them back. +JobserverClientImpl::JobserverClientImpl(const JobserverConfig &Config) { + switch (Config.TheMode) { + case JobserverConfig::PosixPipe: { + // Duplicate the read and write file descriptors. + int NewReadFD = ::dup(Config.PipeFDs.Read); + if (NewReadFD < 0) + return; + int NewWriteFD = ::dup(Config.PipeFDs.Write); + if (NewWriteFD < 0) { + ::close(NewReadFD); + return; + } + // Set the new descriptors to be closed automatically on exec(). + if (::fcntl(NewReadFD, F_SETFD, FD_CLOEXEC) == -1 || + ::fcntl(NewWriteFD, F_SETFD, FD_CLOEXEC) == -1) { + ::close(NewReadFD); + ::close(NewWriteFD); + return; + } + // Set the read descriptor to non-blocking. + int flags = ::fcntl(NewReadFD, F_GETFL, 0); + if (flags == -1 || ::fcntl(NewReadFD, F_SETFL, flags | O_NONBLOCK) == -1) { + ::close(NewReadFD); + ::close(NewWriteFD); + return; + } + ReadFD = NewReadFD; + WriteFD = NewWriteFD; + break; + } + case JobserverConfig::PosixFifo: + // Open the FIFO for reading. It must be non-blocking and close-on-exec. + ReadFD = ::open(Config.Path.c_str(), O_RDONLY | O_NONBLOCK | O_CLOEXEC); + if (ReadFD < 0 || !isFifo(ReadFD)) { + if (ReadFD >= 0) + ::close(ReadFD); + ReadFD = -1; + return; + } + FifoPath = Config.Path; + // The write FD is opened on-demand in release(). + WriteFD = -1; + break; + default: + return; + } + + IsInitialized = true; + // Determine the total number of jobs by acquiring all available slots and + // then immediately releasing them. + SmallVector<JobSlot, 8> Slots; + while (true) { + auto S = tryAcquire(); + if (!S.isValid()) + break; + Slots.push_back(std::move(S)); + } + NumJobs = Slots.size(); + assert(NumJobs >= 1 && "Invalid number of jobs"); + for (auto &S : Slots) + release(std::move(S)); +} + +/// The destructor closes any open file descriptors. +JobserverClientImpl::~JobserverClientImpl() { + if (ReadFD >= 0) + ::close(ReadFD); + if (WriteFD >= 0) + ::close(WriteFD); +} + +/// Tries to acquire a job slot. The first call to this function will always +/// successfully acquire the single "implicit" slot that is granted to every +/// process started by `make`. Subsequent calls attempt to read a one-byte +/// token from the jobserver's read pipe. A successful read grants one +/// explicit job slot. The read is non-blocking; if no token is available, +/// it fails and returns an invalid JobSlot. +JobSlot JobserverClientImpl::tryAcquire() { + if (!IsInitialized) + return JobSlot(); + + // The first acquisition is always for the implicit slot. + if (HasImplicitSlot.exchange(false, std::memory_order_acquire)) { + LLVM_DEBUG(dbgs() << "Acquired implicit job slot.\n"); + return JobSlot::createImplicit(); + } + + char Token; + ssize_t Ret; + LLVM_DEBUG(dbgs() << "Attempting to read token from FD " << ReadFD << ".\n"); + // Loop to retry on EINTR (interrupted system call). + do { + Ret = ::read(ReadFD, &Token, 1); + } while (Ret < 0 && errno == EINTR); + + if (Ret == 1) { + LLVM_DEBUG(dbgs() << "Acquired explicit token '" << Token << "'.\n"); + return JobSlot::createExplicit(static_cast<uint8_t>(Token)); + } + + LLVM_DEBUG(dbgs() << "Failed to acquire job slot, read returned " << Ret + << ".\n"); + return JobSlot(); +} + +/// Releases a job slot back to the pool. If the slot is implicit, it simply +/// resets a flag. If the slot is explicit, it writes the character token +/// associated with the slot back into the jobserver's write pipe. For FIFO +/// jobservers, this may require opening the FIFO for writing if it hasn't +/// been already. +void JobserverClientImpl::release(JobSlot Slot) { + if (!Slot.isValid()) + return; + + // Releasing the implicit slot just makes it available for the next acquire. + if (Slot.isImplicit()) { + LLVM_DEBUG(dbgs() << "Released implicit job slot.\n"); + [[maybe_unused]] bool was_already_released = + HasImplicitSlot.exchange(true, std::memory_order_release); + assert(!was_already_released && "Implicit slot released twice"); + return; + } + + uint8_t Token = Slot.getExplicitValue(); + LLVM_DEBUG(dbgs() << "Releasing explicit token '" << (char)Token << "' to FD " + << WriteFD << ".\n"); + + // For FIFO-based jobservers, the write FD might not be open yet. + // Open it on the first release. + if (WriteFD < 0) { + LLVM_DEBUG(dbgs() << "WriteFD is invalid, opening FIFO: " << FifoPath + << "\n"); + WriteFD = ::open(FifoPath.c_str(), O_WRONLY | O_CLOEXEC); + if (WriteFD < 0) { + LLVM_DEBUG(dbgs() << "Failed to open FIFO for writing.\n"); + return; + } + LLVM_DEBUG(dbgs() << "Opened FIFO as new WriteFD: " << WriteFD << "\n"); + } + + ssize_t Written; + // Loop to retry on EINTR (interrupted system call). + do { + Written = ::write(WriteFD, &Token, 1); + } while (Written < 0 && errno == EINTR); + + if (Written <= 0) { + LLVM_DEBUG(dbgs() << "Failed to write token to pipe, write returned " + << Written << "\n"); + } +} diff --git a/llvm/lib/Support/Windows/Jobserver.inc b/llvm/lib/Support/Windows/Jobserver.inc new file mode 100644 index 0000000..79028ee --- /dev/null +++ b/llvm/lib/Support/Windows/Jobserver.inc @@ -0,0 +1,79 @@ +//==- llvm/Support/Windows/Jobserver.inc - Windows Jobserver Impl -*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the Windows-specific parts of the JobserverClient class. +// On Windows, the jobserver is implemented using a named semaphore. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Windows/WindowsSupport.h" +#include <atomic> +#include <cassert> + +namespace llvm { +/// The constructor for the Windows jobserver client. It attempts to open a +/// handle to an existing named semaphore, the name of which is provided by +/// GNU make in the --jobserver-auth argument. If the semaphore is opened +/// successfully, the client is marked as initialized. +JobserverClientImpl::JobserverClientImpl(const JobserverConfig &Config) { + Semaphore = (void *)::OpenSemaphoreA(SEMAPHORE_MODIFY_STATE | SYNCHRONIZE, + FALSE, Config.Path.c_str()); + if (Semaphore != nullptr) + IsInitialized = true; +} + +/// The destructor closes the handle to the semaphore, releasing the resource. +JobserverClientImpl::~JobserverClientImpl() { + if (Semaphore != nullptr) + ::CloseHandle((HANDLE)Semaphore); +} + +/// Tries to acquire a job slot. The first call always returns the implicit +/// slot. Subsequent calls use a non-blocking wait on the semaphore +/// (`WaitForSingleObject` with a timeout of 0). If the wait succeeds, the +/// semaphore's count is decremented, and an explicit job slot is acquired. +/// If the wait times out, it means no slots are available, and an invalid +/// slot is returned. +JobSlot JobserverClientImpl::tryAcquire() { + if (!IsInitialized) + return JobSlot(); + + // First, grant the implicit slot. + if (HasImplicitSlot.exchange(false, std::memory_order_acquire)) { + return JobSlot::createImplicit(); + } + + // Try to acquire a slot from the semaphore without blocking. + if (::WaitForSingleObject((HANDLE)Semaphore, 0) == WAIT_OBJECT_0) { + // The explicit token value is arbitrary on Windows, as the semaphore + // count is the real resource. + return JobSlot::createExplicit(1); + } + + return JobSlot(); // Invalid slot +} + +/// Releases a job slot back to the pool. If the slot is implicit, it simply +/// resets a flag. For an explicit slot, it increments the semaphore's count +/// by one using `ReleaseSemaphore`, making the slot available to other +/// processes. +void JobserverClientImpl::release(JobSlot Slot) { + if (!IsInitialized || !Slot.isValid()) + return; + + if (Slot.isImplicit()) { + [[maybe_unused]] bool was_already_released = + HasImplicitSlot.exchange(true, std::memory_order_release); + assert(!was_already_released && "Implicit slot released twice"); + return; + } + + // Release the slot by incrementing the semaphore count. + (void)::ReleaseSemaphore((HANDLE)Semaphore, 1, NULL); +} +} // namespace llvm diff --git a/llvm/lib/TableGen/Error.cpp b/llvm/lib/TableGen/Error.cpp index de0c4c9..3ba2c6c 100644 --- a/llvm/lib/TableGen/Error.cpp +++ b/llvm/lib/TableGen/Error.cpp @@ -19,10 +19,10 @@ #include "llvm/TableGen/Record.h" #include <cstdlib> -namespace llvm { +using namespace llvm; -SourceMgr SrcMgr; -unsigned ErrorsPrinted = 0; +SourceMgr llvm::SrcMgr; +unsigned llvm::ErrorsPrinted = 0; static void PrintMessage(ArrayRef<SMLoc> Locs, SourceMgr::DiagKind Kind, const Twine &Msg) { @@ -49,118 +49,118 @@ static void PrintMessage(ArrayRef<SMLoc> Locs, SourceMgr::DiagKind Kind, // Functions to print notes. -void PrintNote(const Twine &Msg) { - WithColor::note() << Msg << "\n"; -} +void llvm::PrintNote(const Twine &Msg) { WithColor::note() << Msg << "\n"; } -void PrintNote(function_ref<void(raw_ostream &OS)> PrintMsg) { +void llvm::PrintNote(function_ref<void(raw_ostream &OS)> PrintMsg) { PrintMsg(WithColor::note()); } -void PrintNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg) { +void llvm::PrintNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg) { PrintMessage(NoteLoc, SourceMgr::DK_Note, Msg); } // Functions to print fatal notes. -void PrintFatalNote(const Twine &Msg) { +void llvm::PrintFatalNote(const Twine &Msg) { PrintNote(Msg); fatal_exit(); } -void PrintFatalNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg) { +void llvm::PrintFatalNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg) { PrintNote(NoteLoc, Msg); fatal_exit(); } // This method takes a Record and uses the source location // stored in it. -void PrintFatalNote(const Record *Rec, const Twine &Msg) { +void llvm::PrintFatalNote(const Record *Rec, const Twine &Msg) { PrintNote(Rec->getLoc(), Msg); fatal_exit(); } // This method takes a RecordVal and uses the source location // stored in it. -void PrintFatalNote(const RecordVal *RecVal, const Twine &Msg) { +void llvm::PrintFatalNote(const RecordVal *RecVal, const Twine &Msg) { PrintNote(RecVal->getLoc(), Msg); fatal_exit(); } // Functions to print warnings. -void PrintWarning(const Twine &Msg) { WithColor::warning() << Msg << "\n"; } +void llvm::PrintWarning(const Twine &Msg) { + WithColor::warning() << Msg << "\n"; +} -void PrintWarning(ArrayRef<SMLoc> WarningLoc, const Twine &Msg) { +void llvm::PrintWarning(ArrayRef<SMLoc> WarningLoc, const Twine &Msg) { PrintMessage(WarningLoc, SourceMgr::DK_Warning, Msg); } -void PrintWarning(const char *Loc, const Twine &Msg) { +void llvm::PrintWarning(const char *Loc, const Twine &Msg) { SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), SourceMgr::DK_Warning, Msg); } // Functions to print errors. -void PrintError(const Twine &Msg) { WithColor::error() << Msg << "\n"; } +void llvm::PrintError(const Twine &Msg) { WithColor::error() << Msg << "\n"; } -void PrintError(function_ref<void(raw_ostream &OS)> PrintMsg) { +void llvm::PrintError(function_ref<void(raw_ostream &OS)> PrintMsg) { PrintMsg(WithColor::error()); } -void PrintError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) { +void llvm::PrintError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) { PrintMessage(ErrorLoc, SourceMgr::DK_Error, Msg); } -void PrintError(const char *Loc, const Twine &Msg) { +void llvm::PrintError(const char *Loc, const Twine &Msg) { SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), SourceMgr::DK_Error, Msg); } // This method takes a Record and uses the source location // stored in it. -void PrintError(const Record *Rec, const Twine &Msg) { +void llvm::PrintError(const Record *Rec, const Twine &Msg) { PrintMessage(Rec->getLoc(), SourceMgr::DK_Error, Msg); } // This method takes a RecordVal and uses the source location // stored in it. -void PrintError(const RecordVal *RecVal, const Twine &Msg) { +void llvm::PrintError(const RecordVal *RecVal, const Twine &Msg) { PrintMessage(RecVal->getLoc(), SourceMgr::DK_Error, Msg); } // Functions to print fatal errors. -void PrintFatalError(const Twine &Msg) { +void llvm::PrintFatalError(const Twine &Msg) { PrintError(Msg); fatal_exit(); } -void PrintFatalError(function_ref<void(raw_ostream &OS)> PrintMsg) { +void llvm::PrintFatalError(function_ref<void(raw_ostream &OS)> PrintMsg) { PrintError(PrintMsg); fatal_exit(); } -void PrintFatalError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) { +void llvm::PrintFatalError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) { PrintError(ErrorLoc, Msg); fatal_exit(); } // This method takes a Record and uses the source location // stored in it. -void PrintFatalError(const Record *Rec, const Twine &Msg) { +void llvm::PrintFatalError(const Record *Rec, const Twine &Msg) { PrintError(Rec->getLoc(), Msg); fatal_exit(); } // This method takes a RecordVal and uses the source location // stored in it. -void PrintFatalError(const RecordVal *RecVal, const Twine &Msg) { +void llvm::PrintFatalError(const RecordVal *RecVal, const Twine &Msg) { PrintError(RecVal->getLoc(), Msg); fatal_exit(); } // Check an assertion: Obtain the condition value and be sure it is true. // If not, print a nonfatal error along with the message. -bool CheckAssert(SMLoc Loc, const Init *Condition, const Init *Message) { +bool llvm::CheckAssert(SMLoc Loc, const Init *Condition, const Init *Message) { auto *CondValue = dyn_cast_or_null<IntInit>(Condition->convertInitializerTo( IntRecTy::get(Condition->getRecordKeeper()))); if (!CondValue) { @@ -178,11 +178,9 @@ bool CheckAssert(SMLoc Loc, const Init *Condition, const Init *Message) { } // Dump a message to stderr. -void dumpMessage(SMLoc Loc, const Init *Message) { +void llvm::dumpMessage(SMLoc Loc, const Init *Message) { if (auto *MessageInit = dyn_cast<StringInit>(Message)) PrintNote(Loc, MessageInit->getValue()); else PrintError(Loc, "dump value is not of type string"); } - -} // end namespace llvm diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp index f545706..42043f7 100644 --- a/llvm/lib/TableGen/Main.cpp +++ b/llvm/lib/TableGen/Main.cpp @@ -64,14 +64,12 @@ WriteIfChanged("write-if-changed", cl::desc("Only write output if it changed")); static cl::opt<bool> TimePhases("time-phases", cl::desc("Time phases of parser and backend")); -namespace llvm { -cl::opt<bool> EmitLongStrLiterals( +cl::opt<bool> llvm::EmitLongStrLiterals( "long-string-literals", cl::desc("when emitting large string tables, prefer string literals over " "comma-separated char literals. This can be a readability and " "compile-time performance win, but upsets some compilers"), cl::Hidden, cl::init(true)); -} // end namespace llvm static cl::opt<bool> NoWarnOnUnusedTemplateArgs( "no-warn-on-unused-template-args", diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp index 051a896..2ea3a24 100644 --- a/llvm/lib/TableGen/Record.cpp +++ b/llvm/lib/TableGen/Record.cpp @@ -46,8 +46,7 @@ using namespace llvm; // Context //===----------------------------------------------------------------------===// -namespace llvm { -namespace detail { +namespace llvm::detail { /// This class represents the internal implementation of the RecordKeeper. /// It contains all of the contextual static state of the Record classes. It is /// kept out-of-line to simplify dependencies, and also make it easier for @@ -100,8 +99,7 @@ struct RecordKeeperImpl { void dumpAllocationStats(raw_ostream &OS) const; }; -} // namespace detail -} // namespace llvm +} // namespace llvm::detail void detail::RecordKeeperImpl::dumpAllocationStats(raw_ostream &OS) const { // Dump memory allocation related stats. diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp index f928ded..3d31d8e 100644 --- a/llvm/lib/TableGen/TGParser.cpp +++ b/llvm/lib/TableGen/TGParser.cpp @@ -31,8 +31,6 @@ using namespace llvm; // Support Code for the Semantic Actions. //===----------------------------------------------------------------------===// -namespace llvm { - RecordsEntry::RecordsEntry(std::unique_ptr<Record> Rec) : Rec(std::move(Rec)) {} RecordsEntry::RecordsEntry(std::unique_ptr<ForeachLoop> Loop) : Loop(std::move(Loop)) {} @@ -41,6 +39,7 @@ RecordsEntry::RecordsEntry(std::unique_ptr<Record::AssertionInfo> Assertion) RecordsEntry::RecordsEntry(std::unique_ptr<Record::DumpInfo> Dump) : Dump(std::move(Dump)) {} +namespace llvm { struct SubClassReference { SMRange RefRange; const Record *Rec = nullptr; @@ -61,6 +60,7 @@ struct SubMultiClassReference { bool isInvalid() const { return MC == nullptr; } void dump() const; }; +} // end namespace llvm #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void SubMultiClassReference::dump() const { @@ -74,8 +74,6 @@ LLVM_DUMP_METHOD void SubMultiClassReference::dump() const { } #endif -} // end namespace llvm - static bool checkBitsConcrete(Record &R, const RecordVal &RV) { const auto *BV = cast<BitsInit>(RV.getValue()); for (unsigned i = 0, e = BV->getNumBits(); i != e; ++i) { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 70d5ad7d..dc8e7c8 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -16461,7 +16461,7 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op, if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0), - DAG.getConstant(Cnt, DL, MVT::i32)); + DAG.getTargetConstant(Cnt, DL, MVT::i32)); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL, MVT::i32), @@ -16491,7 +16491,8 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op, unsigned Opc = (Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR; return DAG.getNode(Opc, DL, VT, Op.getOperand(0), - DAG.getConstant(Cnt, DL, MVT::i32), Op->getFlags()); + DAG.getTargetConstant(Cnt, DL, MVT::i32), + Op->getFlags()); } // Right shift register. Note, there is not a shift right register @@ -19973,7 +19974,7 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG, SDValue FixConv = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ResTy, DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), - Op->getOperand(0), DAG.getConstant(C, DL, MVT::i32)); + Op->getOperand(0), DAG.getTargetConstant(C, DL, MVT::i32)); // We can handle smaller integers by generating an extra trunc. if (IntBits < FloatBits) FixConv = DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), FixConv); @@ -20696,7 +20697,7 @@ static SDValue performConcatVectorsCombine(SDNode *N, N100 = DAG.getNode(AArch64ISD::NVCAST, DL, VT, N100); SDValue Uzp = DAG.getNode(AArch64ISD::UZP2, DL, VT, N000, N100); SDValue NewShiftConstant = - DAG.getConstant(N001ConstVal - NScalarSize, DL, MVT::i32); + DAG.getTargetConstant(N001ConstVal - NScalarSize, DL, MVT::i32); return DAG.getNode(AArch64ISD::VLSHR, DL, VT, Uzp, NewShiftConstant); } @@ -22373,14 +22374,14 @@ static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) { if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) { Op = DAG.getNode(Opcode, DL, VT, Op, - DAG.getSignedConstant(-ShiftAmount, DL, MVT::i32)); + DAG.getSignedConstant(-ShiftAmount, DL, MVT::i32, true)); if (N->getValueType(0) == MVT::i64) Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Op, DAG.getConstant(0, DL, MVT::i64)); return Op; } else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) { Op = DAG.getNode(Opcode, DL, VT, Op, - DAG.getConstant(ShiftAmount, DL, MVT::i32)); + DAG.getTargetConstant(ShiftAmount, DL, MVT::i32)); if (N->getValueType(0) == MVT::i64) Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Op, DAG.getConstant(0, DL, MVT::i64)); @@ -23198,7 +23199,7 @@ static SDValue performZExtUZPCombine(SDNode *N, SelectionDAG &DAG) { Op.getOperand(ExtOffset == 0 ? 0 : 1)); if (Shift != 0) BC = DAG.getNode(AArch64ISD::VLSHR, DL, VT, BC, - DAG.getConstant(Shift, DL, MVT::i32)); + DAG.getTargetConstant(Shift, DL, MVT::i32)); return DAG.getNode(ISD::AND, DL, VT, BC, DAG.getConstant(Mask, DL, VT)); } diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 6ef0a95..09ce713 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -812,49 +812,49 @@ def fixedpoint_recip_f16_i64 : fixedpoint_recip_i64<f16>; def fixedpoint_recip_f32_i64 : fixedpoint_recip_i64<f32>; def fixedpoint_recip_f64_i64 : fixedpoint_recip_i64<f64>; -def vecshiftR8 : Operand<i32>, ImmLeaf<i32, [{ +def vecshiftR8 : Operand<i32>, TImmLeaf<i32, [{ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9); }]> { let EncoderMethod = "getVecShiftR8OpValue"; let DecoderMethod = "DecodeVecShiftR8Imm"; let ParserMatchClass = Imm1_8Operand; } -def vecshiftR16 : Operand<i32>, ImmLeaf<i32, [{ +def vecshiftR16 : Operand<i32>, TImmLeaf<i32, [{ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17); }]> { let EncoderMethod = "getVecShiftR16OpValue"; let DecoderMethod = "DecodeVecShiftR16Imm"; let ParserMatchClass = Imm1_16Operand; } -def vecshiftR16Narrow : Operand<i32>, ImmLeaf<i32, [{ +def vecshiftR16Narrow : Operand<i32>, TImmLeaf<i32, [{ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9); }]> { let EncoderMethod = "getVecShiftR16OpValue"; let DecoderMethod = "DecodeVecShiftR16ImmNarrow"; let ParserMatchClass = Imm1_8Operand; } -def vecshiftR32 : Operand<i32>, ImmLeaf<i32, [{ +def vecshiftR32 : Operand<i32>, TImmLeaf<i32, [{ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33); }]> { let EncoderMethod = "getVecShiftR32OpValue"; let DecoderMethod = "DecodeVecShiftR32Imm"; let ParserMatchClass = Imm1_32Operand; } -def vecshiftR32Narrow : Operand<i32>, ImmLeaf<i32, [{ +def vecshiftR32Narrow : Operand<i32>, TImmLeaf<i32, [{ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17); }]> { let EncoderMethod = "getVecShiftR32OpValue"; let DecoderMethod = "DecodeVecShiftR32ImmNarrow"; let ParserMatchClass = Imm1_16Operand; } -def vecshiftR64 : Operand<i32>, ImmLeaf<i32, [{ +def vecshiftR64 : Operand<i32>, TImmLeaf<i32, [{ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 65); }]> { let EncoderMethod = "getVecShiftR64OpValue"; let DecoderMethod = "DecodeVecShiftR64Imm"; let ParserMatchClass = Imm1_64Operand; } -def vecshiftR64Narrow : Operand<i32>, ImmLeaf<i32, [{ +def vecshiftR64Narrow : Operand<i32>, TImmLeaf<i32, [{ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33); }]> { let EncoderMethod = "getVecShiftR64OpValue"; @@ -862,37 +862,6 @@ def vecshiftR64Narrow : Operand<i32>, ImmLeaf<i32, [{ let ParserMatchClass = Imm1_32Operand; } -// Same as vecshiftR#N, but use TargetConstant (TimmLeaf) instead of Constant -// (ImmLeaf) -def tvecshiftR8 : Operand<i32>, TImmLeaf<i32, [{ - return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9); -}]> { - let EncoderMethod = "getVecShiftR8OpValue"; - let DecoderMethod = "DecodeVecShiftR8Imm"; - let ParserMatchClass = Imm1_8Operand; -} -def tvecshiftR16 : Operand<i32>, TImmLeaf<i32, [{ - return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17); -}]> { - let EncoderMethod = "getVecShiftR16OpValue"; - let DecoderMethod = "DecodeVecShiftR16Imm"; - let ParserMatchClass = Imm1_16Operand; -} -def tvecshiftR32 : Operand<i32>, TImmLeaf<i32, [{ - return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33); -}]> { - let EncoderMethod = "getVecShiftR32OpValue"; - let DecoderMethod = "DecodeVecShiftR32Imm"; - let ParserMatchClass = Imm1_32Operand; -} -def tvecshiftR64 : Operand<i32>, TImmLeaf<i32, [{ - return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 65); -}]> { - let EncoderMethod = "getVecShiftR64OpValue"; - let DecoderMethod = "DecodeVecShiftR64Imm"; - let ParserMatchClass = Imm1_64Operand; -} - def Imm0_0Operand : AsmImmRange<0, 0>; def Imm0_1Operand : AsmImmRange<0, 1>; def Imm1_1Operand : AsmImmRange<1, 1>; @@ -904,28 +873,28 @@ def Imm0_15Operand : AsmImmRange<0, 15>; def Imm0_31Operand : AsmImmRange<0, 31>; def Imm0_63Operand : AsmImmRange<0, 63>; -def vecshiftL8 : Operand<i32>, ImmLeaf<i32, [{ +def vecshiftL8 : Operand<i32>, TImmLeaf<i32, [{ return (((uint32_t)Imm) < 8); }]> { let EncoderMethod = "getVecShiftL8OpValue"; let DecoderMethod = "DecodeVecShiftL8Imm"; let ParserMatchClass = Imm0_7Operand; } -def vecshiftL16 : Operand<i32>, ImmLeaf<i32, [{ +def vecshiftL16 : Operand<i32>, TImmLeaf<i32, [{ return (((uint32_t)Imm) < 16); }]> { let EncoderMethod = "getVecShiftL16OpValue"; let DecoderMethod = "DecodeVecShiftL16Imm"; let ParserMatchClass = Imm0_15Operand; } -def vecshiftL32 : Operand<i32>, ImmLeaf<i32, [{ +def vecshiftL32 : Operand<i32>, TImmLeaf<i32, [{ return (((uint32_t)Imm) < 32); }]> { let EncoderMethod = "getVecShiftL32OpValue"; let DecoderMethod = "DecodeVecShiftL32Imm"; let ParserMatchClass = Imm0_31Operand; } -def vecshiftL64 : Operand<i32>, ImmLeaf<i32, [{ +def vecshiftL64 : Operand<i32>, TImmLeaf<i32, [{ return (((uint32_t)Imm) < 64); }]> { let EncoderMethod = "getVecShiftL64OpValue"; @@ -933,36 +902,6 @@ def vecshiftL64 : Operand<i32>, ImmLeaf<i32, [{ let ParserMatchClass = Imm0_63Operand; } -// Same as vecshiftL#N, but use TargetConstant (TimmLeaf) instead of Constant -// (ImmLeaf) -def tvecshiftL8 : Operand<i32>, TImmLeaf<i32, [{ - return (((uint32_t)Imm) < 8); -}]> { - let EncoderMethod = "getVecShiftL8OpValue"; - let DecoderMethod = "DecodeVecShiftL8Imm"; - let ParserMatchClass = Imm0_7Operand; -} -def tvecshiftL16 : Operand<i32>, TImmLeaf<i32, [{ - return (((uint32_t)Imm) < 16); -}]> { - let EncoderMethod = "getVecShiftL16OpValue"; - let DecoderMethod = "DecodeVecShiftL16Imm"; - let ParserMatchClass = Imm0_15Operand; -} -def tvecshiftL32 : Operand<i32>, TImmLeaf<i32, [{ - return (((uint32_t)Imm) < 32); -}]> { - let EncoderMethod = "getVecShiftL32OpValue"; - let DecoderMethod = "DecodeVecShiftL32Imm"; - let ParserMatchClass = Imm0_31Operand; -} -def tvecshiftL64 : Operand<i32>, TImmLeaf<i32, [{ - return (((uint32_t)Imm) < 64); -}]> { - let EncoderMethod = "getVecShiftL64OpValue"; - let DecoderMethod = "DecodeVecShiftL64Imm"; - let ParserMatchClass = Imm0_63Operand; -} // Crazy immediate formats used by 32-bit and 64-bit logical immediate // instructions for splatting repeating bit patterns across the immediate. @@ -10232,7 +10171,7 @@ multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm, def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, V64, V64, vecshiftR16, asm, ".4h", ".4h", - [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (i32 imm:$imm)))]> { + [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (i32 vecshiftR16:$imm)))]> { bits<4> imm; let Inst{19-16} = imm; } @@ -10240,15 +10179,16 @@ multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm, def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, V128, V128, vecshiftR16, asm, ".8h", ".8h", - [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (i32 imm:$imm)))]> { + [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (i32 vecshiftR16:$imm)))]> { bits<4> imm; let Inst{19-16} = imm; } } // Predicates = [HasNEON, HasFullFP16] + def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, V64, V64, vecshiftR32, asm, ".2s", ".2s", - [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (i32 imm:$imm)))]> { + [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (i32 vecshiftR32:$imm)))]> { bits<5> imm; let Inst{20-16} = imm; } @@ -10256,7 +10196,7 @@ multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm, def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, V128, V128, vecshiftR32, asm, ".4s", ".4s", - [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (i32 imm:$imm)))]> { + [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (i32 vecshiftR32:$imm)))]> { bits<5> imm; let Inst{20-16} = imm; } @@ -10264,7 +10204,7 @@ multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm, def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?}, V128, V128, vecshiftR64, asm, ".2d", ".2d", - [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (i32 imm:$imm)))]> { + [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (i32 vecshiftR64:$imm)))]> { bits<6> imm; let Inst{21-16} = imm; } @@ -10276,7 +10216,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm, def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, V64, V64, vecshiftR16, asm, ".4h", ".4h", - [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (i32 imm:$imm)))]> { + [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (i32 vecshiftR16:$imm)))]> { bits<4> imm; let Inst{19-16} = imm; } @@ -10284,7 +10224,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm, def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, V128, V128, vecshiftR16, asm, ".8h", ".8h", - [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (i32 imm:$imm)))]> { + [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (i32 vecshiftR16:$imm)))]> { bits<4> imm; let Inst{19-16} = imm; } @@ -10293,7 +10233,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm, def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, V64, V64, vecshiftR32, asm, ".2s", ".2s", - [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (i32 imm:$imm)))]> { + [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (i32 vecshiftR32:$imm)))]> { bits<5> imm; let Inst{20-16} = imm; } @@ -10301,7 +10241,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm, def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, V128, V128, vecshiftR32, asm, ".4s", ".4s", - [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (i32 imm:$imm)))]> { + [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (i32 vecshiftR32:$imm)))]> { bits<5> imm; let Inst{20-16} = imm; } @@ -10309,7 +10249,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm, def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?}, V128, V128, vecshiftR64, asm, ".2d", ".2d", - [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (i32 imm:$imm)))]> { + [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (i32 vecshiftR64:$imm)))]> { bits<6> imm; let Inst{21-16} = imm; } diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 36c9cb6..bc6b931 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1010,6 +1010,36 @@ let Predicates = [HasSVE_or_SME] in { defm SEL_ZPZZ : sve_int_sel_vvv<"sel", vselect>; defm SPLICE_ZPZ : sve_int_perm_splice<"splice", AArch64splice>; + + // mul x (splat -1) -> neg x + def : Pat<(nxv16i8 (AArch64mul_m1 nxv16i1:$Op1, nxv16i8:$Op2, (nxv16i8 (splat_vector (i32 -1))))), + (NEG_ZPmZ_B $Op2, $Op1, $Op2)>; + def : Pat<(nxv8i16 (AArch64mul_m1 nxv8i1:$Op1, nxv8i16:$Op2, (nxv8i16 (splat_vector (i32 -1))))), + (NEG_ZPmZ_H $Op2, $Op1, $Op2)>; + def : Pat<(nxv4i32 (AArch64mul_m1 nxv4i1:$Op1, nxv4i32:$Op2, (nxv4i32 (splat_vector (i32 -1))))), + (NEG_ZPmZ_S $Op2, $Op1, $Op2)>; + def : Pat<(nxv2i64 (AArch64mul_m1 nxv2i1:$Op1, nxv2i64:$Op2, (nxv2i64 (splat_vector (i64 -1))))), + (NEG_ZPmZ_D $Op2, $Op1, $Op2)>; + + let AddedComplexity = 5 in { + def : Pat<(nxv16i8 (AArch64mul_p nxv16i1:$Op1, nxv16i8:$Op2, (nxv16i8 (splat_vector (i32 -1))))), + (NEG_ZPmZ_B_UNDEF $Op2, $Op1, $Op2)>; + def : Pat<(nxv8i16 (AArch64mul_p nxv8i1:$Op1, nxv8i16:$Op2, (nxv8i16 (splat_vector (i32 -1))))), + (NEG_ZPmZ_H_UNDEF $Op2, $Op1, $Op2)>; + def : Pat<(nxv4i32 (AArch64mul_p nxv4i1:$Op1, nxv4i32:$Op2, (nxv4i32 (splat_vector (i32 -1))))), + (NEG_ZPmZ_S_UNDEF $Op2, $Op1, $Op2)>; + def : Pat<(nxv2i64 (AArch64mul_p nxv2i1:$Op1, nxv2i64:$Op2, (nxv2i64 (splat_vector (i64 -1))))), + (NEG_ZPmZ_D_UNDEF $Op2, $Op1, $Op2)>; + } + + def : Pat<(nxv16i8 (AArch64mul_m1 nxv16i1:$Op1, (nxv16i8 (splat_vector (i32 -1))), nxv16i8:$Op2)), + (NEG_ZPmZ_B (DUP_ZI_B -1, 0), $Op1, $Op2)>; + def : Pat<(nxv8i16 (AArch64mul_m1 nxv8i1:$Op1, (nxv8i16 (splat_vector (i32 -1))), nxv8i16:$Op2)), + (NEG_ZPmZ_H (DUP_ZI_H -1, 0), $Op1, $Op2)>; + def : Pat<(nxv4i32 (AArch64mul_m1 nxv4i1:$Op1, (nxv4i32 (splat_vector (i32 -1))), nxv4i32:$Op2)), + (NEG_ZPmZ_S (DUP_ZI_S -1, 0), $Op1, $Op2)>; + def : Pat<(nxv2i64 (AArch64mul_m1 nxv2i1:$Op1, (nxv2i64 (splat_vector (i64 -1))), nxv2i64:$Op2)), + (NEG_ZPmZ_D (DUP_ZI_D -1, 0), $Op1, $Op2)>; } // End HasSVE_or_SME // COMPACT - word and doubleword diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 96cc3f3..3e55b76 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -2957,9 +2957,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering(); // Need special instructions for atomics that affect ordering. - if (Order != AtomicOrdering::NotAtomic && - Order != AtomicOrdering::Unordered && - Order != AtomicOrdering::Monotonic) { + if (isStrongerThanMonotonic(Order)) { assert(!isa<GZExtLoad>(LdSt)); assert(MemSizeInBytes <= 8 && "128-bit atomics should already be custom-legalized"); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp index 6025f1c..63313da 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp @@ -556,8 +556,7 @@ void applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI, unsigned NewOpc = Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR; MachineIRBuilder MIB(MI); - auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm); - MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef}); + MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1)}).addImm(Imm); MI.eraseFromParent(); } diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 539470d..be44b8f 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -4967,7 +4967,7 @@ multiclass sme2_movaz_array_to_vec_vg4_multi<string mnemonic> { //===----------------------------------------------------------------------===// // SME2 multi-vec saturating shift right narrow class sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u> - : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, tvecshiftR16:$imm4), + : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, vecshiftR16:$imm4), mnemonic, "\t$Zd, $Zn, $imm4", "", []>, Sched<[]> { bits<4> imm4; @@ -4985,7 +4985,7 @@ class sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u> multiclass sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u, SDPatternOperator intrinsic> { def _H : sme2_sat_shift_vector_vg2<mnemonic, op, u>; - def : SME2_Sat_Shift_VG2_Pat<NAME # _H, intrinsic, nxv8i16, nxv4i32, tvecshiftR16>; + def : SME2_Sat_Shift_VG2_Pat<NAME # _H, intrinsic, nxv8i16, nxv4i32, vecshiftR16>; } class sme2_sat_shift_vector_vg4<bits<2> sz, bits<3> op, ZPRRegOp zpr_ty, @@ -5008,20 +5008,20 @@ class sme2_sat_shift_vector_vg4<bits<2> sz, bits<3> op, ZPRRegOp zpr_ty, } multiclass sme2_sat_shift_vector_vg4<string mnemonic, bits<3> op, SDPatternOperator intrinsic> { - def _B : sme2_sat_shift_vector_vg4<{0,1}, op, ZPR8, ZZZZ_s_mul_r, tvecshiftR32, + def _B : sme2_sat_shift_vector_vg4<{0,1}, op, ZPR8, ZZZZ_s_mul_r, vecshiftR32, mnemonic>{ bits<5> imm; let Inst{20-16} = imm; } - def _H : sme2_sat_shift_vector_vg4<{1,?}, op, ZPR16, ZZZZ_d_mul_r, tvecshiftR64, + def _H : sme2_sat_shift_vector_vg4<{1,?}, op, ZPR16, ZZZZ_d_mul_r, vecshiftR64, mnemonic> { bits<6> imm; let Inst{22} = imm{5}; let Inst{20-16} = imm{4-0}; } - def : SME2_Sat_Shift_VG4_Pat<NAME # _B, intrinsic, nxv16i8, nxv4i32, tvecshiftR32>; - def : SME2_Sat_Shift_VG4_Pat<NAME # _H, intrinsic, nxv8i16, nxv2i64, tvecshiftR64>; + def : SME2_Sat_Shift_VG4_Pat<NAME # _B, intrinsic, nxv16i8, nxv4i32, vecshiftR32>; + def : SME2_Sat_Shift_VG4_Pat<NAME # _H, intrinsic, nxv8i16, nxv2i64, vecshiftR64>; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 9a23c35..3cdd505 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -4436,9 +4436,9 @@ multiclass sve2_bitwise_shift_left_long<bits<2> opc, string asm, ZPR64, ZPR32, vecshiftL32> { let Inst{20-19} = imm{4-3}; } - def : SVE_2_Op_Imm_Pat<nxv8i16, op, nxv16i8, i32, tvecshiftL8, !cast<Instruction>(NAME # _H)>; - def : SVE_2_Op_Imm_Pat<nxv4i32, op, nxv8i16, i32, tvecshiftL16, !cast<Instruction>(NAME # _S)>; - def : SVE_2_Op_Imm_Pat<nxv2i64, op, nxv4i32, i32, tvecshiftL32, !cast<Instruction>(NAME # _D)>; + def : SVE_2_Op_Imm_Pat<nxv8i16, op, nxv16i8, i32, vecshiftL8, !cast<Instruction>(NAME # _H)>; + def : SVE_2_Op_Imm_Pat<nxv4i32, op, nxv8i16, i32, vecshiftL16, !cast<Instruction>(NAME # _S)>; + def : SVE_2_Op_Imm_Pat<nxv2i64, op, nxv4i32, i32, vecshiftL32, !cast<Instruction>(NAME # _D)>; } //===----------------------------------------------------------------------===// @@ -4481,10 +4481,10 @@ multiclass sve2_int_bin_shift_imm_left<bit opc, string asm, let Inst{20-19} = imm{4-3}; } - def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftL8, !cast<Instruction>(NAME # _B)>; - def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftL16, !cast<Instruction>(NAME # _H)>; - def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftL32, !cast<Instruction>(NAME # _S)>; - def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftL64, !cast<Instruction>(NAME # _D)>; + def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, vecshiftL8, !cast<Instruction>(NAME # _B)>; + def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, vecshiftL16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, vecshiftL32, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, vecshiftL64, !cast<Instruction>(NAME # _D)>; } multiclass sve2_int_bin_shift_imm_right<bit opc, string asm, @@ -4501,10 +4501,10 @@ multiclass sve2_int_bin_shift_imm_right<bit opc, string asm, let Inst{20-19} = imm{4-3}; } - def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>; - def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>; - def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>; - def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>; + def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>; + def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, vecshiftR64, !cast<Instruction>(NAME # _D)>; } class sve2_int_bin_accum_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm, @@ -4546,10 +4546,10 @@ multiclass sve2_int_bin_accum_shift_imm_right<bits<2> opc, string asm, let Inst{20-19} = imm{4-3}; } - def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>; - def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>; - def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>; - def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>; + def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>; + def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, vecshiftR64, !cast<Instruction>(NAME # _D)>; def : SVE_Shift_Add_All_Active_Pat<nxv16i8, shift_op, nxv16i1, nxv16i8, nxv16i8, i32, !cast<Instruction>(NAME # _B)>; def : SVE_Shift_Add_All_Active_Pat<nxv8i16, shift_op, nxv8i1, nxv8i16, nxv8i16, i32, !cast<Instruction>(NAME # _H)>; @@ -4676,18 +4676,18 @@ class sve2_int_bin_shift_imm_narrow_bottom<bits<3> tsz8_64, bits<3> opc, multiclass sve2_int_bin_shift_imm_right_narrow_bottom<bits<3> opc, string asm, SDPatternOperator op> { def _B : sve2_int_bin_shift_imm_narrow_bottom<{0,0,1}, opc, asm, ZPR8, ZPR16, - tvecshiftR8>; + vecshiftR8>; def _H : sve2_int_bin_shift_imm_narrow_bottom<{0,1,?}, opc, asm, ZPR16, ZPR32, - tvecshiftR16> { + vecshiftR16> { let Inst{19} = imm{3}; } def _S : sve2_int_bin_shift_imm_narrow_bottom<{1,?,?}, opc, asm, ZPR32, ZPR64, - tvecshiftR32> { + vecshiftR32> { let Inst{20-19} = imm{4-3}; } - def : SVE_2_Op_Imm_Pat<nxv16i8, op, nxv8i16, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>; - def : SVE_2_Op_Imm_Pat<nxv8i16, op, nxv4i32, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>; - def : SVE_2_Op_Imm_Pat<nxv4i32, op, nxv2i64, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>; + def : SVE_2_Op_Imm_Pat<nxv16i8, op, nxv8i16, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>; + def : SVE_2_Op_Imm_Pat<nxv8i16, op, nxv4i32, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>; + def : SVE_2_Op_Imm_Pat<nxv4i32, op, nxv2i64, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>; } class sve2_int_bin_shift_imm_narrow_top<bits<3> tsz8_64, bits<3> opc, @@ -4717,18 +4717,18 @@ class sve2_int_bin_shift_imm_narrow_top<bits<3> tsz8_64, bits<3> opc, multiclass sve2_int_bin_shift_imm_right_narrow_top<bits<3> opc, string asm, SDPatternOperator op> { def _B : sve2_int_bin_shift_imm_narrow_top<{0,0,1}, opc, asm, ZPR8, ZPR16, - tvecshiftR8>; + vecshiftR8>; def _H : sve2_int_bin_shift_imm_narrow_top<{0,1,?}, opc, asm, ZPR16, ZPR32, - tvecshiftR16> { + vecshiftR16> { let Inst{19} = imm{3}; } def _S : sve2_int_bin_shift_imm_narrow_top<{1,?,?}, opc, asm, ZPR32, ZPR64, - tvecshiftR32> { + vecshiftR32> { let Inst{20-19} = imm{4-3}; } - def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv8i16, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>; - def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv4i32, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>; - def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv2i64, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv8i16, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>; + def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv4i32, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv2i64, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>; } class sve2_int_addsub_narrow_high_bottom<bits<2> sz, bits<2> opc, string asm, @@ -5461,10 +5461,10 @@ multiclass sve2_int_rotate_right_imm<string asm, SDPatternOperator op> { let Inst{20-19} = imm{4-3}; } - def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>; - def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>; - def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>; - def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>; + def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>; + def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, vecshiftR64, !cast<Instruction>(NAME # _D)>; } //===----------------------------------------------------------------------===// @@ -6443,10 +6443,10 @@ multiclass sve_int_bin_pred_shift_imm_left<bits<4> opc, string asm, string Ps, let Inst{9-8} = imm{4-3}; } - def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, tvecshiftL8, !cast<Instruction>(NAME # _B)>; - def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, tvecshiftL16, !cast<Instruction>(NAME # _H)>; - def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, tvecshiftL32, !cast<Instruction>(NAME # _S)>; - def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, tvecshiftL64, !cast<Instruction>(NAME # _D)>; + def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, vecshiftL8, !cast<Instruction>(NAME # _B)>; + def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, vecshiftL16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, vecshiftL32, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, vecshiftL64, !cast<Instruction>(NAME # _D)>; } // As above but shift amount takes the form of a "vector immediate". @@ -6460,15 +6460,15 @@ multiclass sve_int_bin_pred_shift_imm_left_dup<bits<4> opc, string asm, } multiclass sve_int_bin_pred_shift_imm_left_zeroing_bhsd<SDPatternOperator op> { - def _B_ZERO : PredTwoOpImmPseudo<NAME # _B, ZPR8, tvecshiftL8, FalseLanesZero>; - def _H_ZERO : PredTwoOpImmPseudo<NAME # _H, ZPR16, tvecshiftL16, FalseLanesZero>; - def _S_ZERO : PredTwoOpImmPseudo<NAME # _S, ZPR32, tvecshiftL32, FalseLanesZero>; - def _D_ZERO : PredTwoOpImmPseudo<NAME # _D, ZPR64, tvecshiftL64, FalseLanesZero>; + def _B_ZERO : PredTwoOpImmPseudo<NAME # _B, ZPR8, vecshiftL8, FalseLanesZero>; + def _H_ZERO : PredTwoOpImmPseudo<NAME # _H, ZPR16, vecshiftL16, FalseLanesZero>; + def _S_ZERO : PredTwoOpImmPseudo<NAME # _S, ZPR32, vecshiftL32, FalseLanesZero>; + def _D_ZERO : PredTwoOpImmPseudo<NAME # _D, ZPR64, vecshiftL64, FalseLanesZero>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, tvecshiftL8, !cast<Pseudo>(NAME # _B_ZERO)>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, tvecshiftL16, !cast<Pseudo>(NAME # _H_ZERO)>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, tvecshiftL32, !cast<Pseudo>(NAME # _S_ZERO)>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, tvecshiftL64, !cast<Pseudo>(NAME # _D_ZERO)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, vecshiftL8, !cast<Pseudo>(NAME # _B_ZERO)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, vecshiftL16, !cast<Pseudo>(NAME # _H_ZERO)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, vecshiftL32, !cast<Pseudo>(NAME # _S_ZERO)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, vecshiftL64, !cast<Pseudo>(NAME # _D_ZERO)>; } multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm, string Ps, @@ -6489,10 +6489,10 @@ multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm, string Ps, let Inst{9-8} = imm{4-3}; } - def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>; - def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>; - def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>; - def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>; + def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>; + def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, vecshiftR64, !cast<Instruction>(NAME # _D)>; } // As above but shift amount takes the form of a "vector immediate". @@ -6511,10 +6511,10 @@ multiclass sve_int_bin_pred_shift_imm_right_zeroing_bhsd<SDPatternOperator op = def _S_ZERO : PredTwoOpImmPseudo<NAME # _S, ZPR32, vecshiftR32, FalseLanesZero>; def _D_ZERO : PredTwoOpImmPseudo<NAME # _D, ZPR64, vecshiftR64, FalseLanesZero>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, tvecshiftR8, !cast<Pseudo>(NAME # _B_ZERO)>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, tvecshiftR16, !cast<Pseudo>(NAME # _H_ZERO)>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, tvecshiftR32, !cast<Pseudo>(NAME # _S_ZERO)>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, tvecshiftR64, !cast<Pseudo>(NAME # _D_ZERO)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, vecshiftR8, !cast<Pseudo>(NAME # _B_ZERO)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, vecshiftR16, !cast<Pseudo>(NAME # _H_ZERO)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, vecshiftR32, !cast<Pseudo>(NAME # _S_ZERO)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, vecshiftR64, !cast<Pseudo>(NAME # _D_ZERO)>; } class sve_int_bin_pred_shift<bits<2> sz8_64, bit wide, bits<3> opc, @@ -10031,7 +10031,7 @@ multiclass sve2p1_multi_vec_extract_narrow<string mnemonic, bits<2> opc, SDPatte // SVE2 multi-vec shift narrow class sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, bits<2> tsz> - : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, tvecshiftR16:$imm4), + : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, vecshiftR16:$imm4), mnemonic, "\t$Zd, $Zn, $imm4", "", []>, Sched<[]> { bits<5> Zd; @@ -10055,7 +10055,7 @@ class sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, bits<2> tsz> multiclass sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, SDPatternOperator intrinsic> { def NAME : sve2p1_multi_vec_shift_narrow<mnemonic, opc, 0b01>; - def : SVE2p1_Sat_Shift_VG2_Pat<NAME, intrinsic, nxv8i16, nxv4i32, tvecshiftR16>; + def : SVE2p1_Sat_Shift_VG2_Pat<NAME, intrinsic, nxv8i16, nxv4i32, vecshiftR16>; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 7003a40..9446144 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -2126,6 +2126,8 @@ def FeatureISAVersion12_50 : FeatureSet< FeatureLdsBarrierArriveAtomic, FeatureSetPrioIncWgInst, Feature45BitNumRecordsBufferResource, + FeatureSupportsXNACK, + FeatureXNACK, ]>; def FeatureISAVersion12_51 : FeatureSet< diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 2ba3156..9dd64e0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -131,10 +131,8 @@ static bool isDSAddress(const Constant *C) { return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS; } -/// Returns true if the function requires the implicit argument be passed -/// regardless of the function contents. -static bool funcRequiresHostcallPtr(const Function &F) { - // Sanitizers require the hostcall buffer passed in the implicit arguments. +/// Returns true if sanitizer attributes are present on a function. +static bool hasSanitizerAttributes(const Function &F) { return F.hasFnAttribute(Attribute::SanitizeAddress) || F.hasFnAttribute(Attribute::SanitizeThread) || F.hasFnAttribute(Attribute::SanitizeMemory) || @@ -469,15 +467,21 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { // If the function requires the implicit arg pointer due to sanitizers, // assume it's needed even if explicitly marked as not requiring it. - const bool NeedsHostcall = funcRequiresHostcallPtr(*F); - if (NeedsHostcall) { + // Flat scratch initialization is needed because `asan_malloc_impl` + // calls introduced later in pipeline will have flat scratch accesses. + // FIXME: FLAT_SCRATCH_INIT will not be required here if device-libs + // implementation for `asan_malloc_impl` is updated. + const bool HasSanitizerAttrs = hasSanitizerAttributes(*F); + if (HasSanitizerAttrs) { removeAssumedBits(IMPLICIT_ARG_PTR); removeAssumedBits(HOSTCALL_PTR); + removeAssumedBits(FLAT_SCRATCH_INIT); } for (auto Attr : ImplicitAttrs) { - if (NeedsHostcall && - (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR)) + if (HasSanitizerAttrs && + (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR || + Attr.first == FLAT_SCRATCH_INIT)) continue; if (F->hasFnAttribute(Attr.second)) diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 2d5ae29..2120bf8 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -2303,7 +2303,10 @@ Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1( KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n'; if (!hasArchitectedFlatScratch()) KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n'; - KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n'; + bool ReservedXnackMask = STI.hasFeature(AMDGPU::FeatureXNACK); + assert(!ReservedXnackMask || STI.hasFeature(AMDGPU::FeatureSupportsXNACK)); + KdStream << Indent << ".amdhsa_reserve_xnack_mask " << ReservedXnackMask + << '\n'; KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n"; CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY); diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index fed3778..90c828b 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -722,7 +722,8 @@ bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const { return false; } - if (!MRI->constrainRegClass(New->getReg(), ConstrainRC)) { + if (New->getReg().isVirtual() && + !MRI->constrainRegClass(New->getReg(), ConstrainRC)) { LLVM_DEBUG(dbgs() << "Cannot constrain " << printReg(New->getReg(), TRI) << TRI->getRegClassName(ConstrainRC) << '\n'); return false; @@ -931,7 +932,9 @@ static MachineOperand *lookUpCopyChain(const SIInstrInfo &TII, for (MachineInstr *SubDef = MRI.getVRegDef(SrcReg); SubDef && TII.isFoldableCopy(*SubDef); SubDef = MRI.getVRegDef(Sub->getReg())) { - MachineOperand &SrcOp = SubDef->getOperand(1); + unsigned SrcIdx = TII.getFoldableCopySrcIdx(*SubDef); + MachineOperand &SrcOp = SubDef->getOperand(SrcIdx); + if (SrcOp.isImm()) return &SrcOp; if (!SrcOp.isReg() || SrcOp.getReg().isPhysical()) diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index e4b3528..0189e7b 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -306,7 +306,8 @@ class PrologEpilogSGPRSpillBuilder { buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL, TmpVGPR, FI, FrameReg, DwordOff); - MRI.constrainRegClass(SubReg, &AMDGPU::SReg_32_XM0RegClass); + assert(SubReg.isPhysical()); + BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg) .addReg(TmpVGPR, RegState::Kill); DwordOff += 4; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index f7265c5..e233457 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -18860,31 +18860,6 @@ SITargetLowering::getTargetMMOFlags(const Instruction &I) const { return Flags; } -bool SITargetLowering::checkForPhysRegDependency( - SDNode *Def, SDNode *User, unsigned Op, const TargetRegisterInfo *TRI, - const TargetInstrInfo *TII, MCRegister &PhysReg, int &Cost) const { - if (User->getOpcode() != ISD::CopyToReg) - return false; - if (!Def->isMachineOpcode()) - return false; - MachineSDNode *MDef = dyn_cast<MachineSDNode>(Def); - if (!MDef) - return false; - - unsigned ResNo = User->getOperand(Op).getResNo(); - if (User->getOperand(Op)->getValueType(ResNo) != MVT::i1) - return false; - const MCInstrDesc &II = TII->get(MDef->getMachineOpcode()); - if (II.isCompare() && II.hasImplicitDefOfPhysReg(AMDGPU::SCC)) { - PhysReg = AMDGPU::SCC; - const TargetRegisterClass *RC = - TRI->getMinimalPhysRegClass(PhysReg, Def->getSimpleValueType(ResNo)); - Cost = RC->getCopyCost(); - return true; - } - return false; -} - void SITargetLowering::emitExpandAtomicAddrSpacePredicate( Instruction *AI) const { // Given: atomicrmw fadd ptr %addr, float %val ordering diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index a474dab..74e58f4 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -561,11 +561,6 @@ public: bool denormalsEnabledForType(const SelectionDAG &DAG, EVT VT) const; bool denormalsEnabledForType(LLT Ty, const MachineFunction &MF) const; - bool checkForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, - const TargetRegisterInfo *TRI, - const TargetInstrInfo *TII, - MCRegister &PhysReg, int &Cost) const override; - bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool SNaN = false, unsigned Depth = 0) const override; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 56435a5..46757cf 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2112,8 +2112,6 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case AMDGPU::SI_RESTORE_S32_FROM_VGPR: MI.setDesc(get(AMDGPU::V_READLANE_B32)); - MI.getMF()->getRegInfo().constrainRegClass(MI.getOperand(0).getReg(), - &AMDGPU::SReg_32_XM0RegClass); break; case AMDGPU::AV_MOV_B32_IMM_PSEUDO: { Register Dst = MI.getOperand(0).getReg(); @@ -3435,6 +3433,32 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) { } } +unsigned SIInstrInfo::getFoldableCopySrcIdx(const MachineInstr &MI) { + switch (MI.getOpcode()) { + case AMDGPU::V_MOV_B16_t16_e32: + case AMDGPU::V_MOV_B16_t16_e64: + return 2; + case AMDGPU::V_MOV_B32_e32: + case AMDGPU::V_MOV_B32_e64: + case AMDGPU::V_MOV_B64_PSEUDO: + case AMDGPU::V_MOV_B64_e32: + case AMDGPU::V_MOV_B64_e64: + case AMDGPU::S_MOV_B32: + case AMDGPU::S_MOV_B64: + case AMDGPU::S_MOV_B64_IMM_PSEUDO: + case AMDGPU::COPY: + case AMDGPU::WWM_COPY: + case AMDGPU::V_ACCVGPR_WRITE_B32_e64: + case AMDGPU::V_ACCVGPR_READ_B32_e64: + case AMDGPU::V_ACCVGPR_MOV_B32: + case AMDGPU::AV_MOV_B32_IMM_PSEUDO: + case AMDGPU::AV_MOV_B64_IMM_PSEUDO: + return 1; + default: + llvm_unreachable("MI is not a foldable copy"); + } +} + static constexpr AMDGPU::OpName ModifierOpNames[] = { AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp, @@ -8117,21 +8141,14 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist, // hope for the best. if (Inst.isCopy() && DstReg.isPhysical() && RI.isVGPR(MRI, Inst.getOperand(1).getReg())) { - // TODO: Only works for 32 bit registers. - if (MRI.constrainRegClass(DstReg, &AMDGPU::SReg_32_XM0RegClass)) { - BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(), - get(AMDGPU::V_READFIRSTLANE_B32), DstReg) - .add(Inst.getOperand(1)); - } else { - Register NewDst = - MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); - BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(), - get(AMDGPU::V_READFIRSTLANE_B32), NewDst) - .add(Inst.getOperand(1)); - BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(), get(AMDGPU::COPY), - DstReg) - .addReg(NewDst); - } + Register NewDst = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(), + get(AMDGPU::V_READFIRSTLANE_B32), NewDst) + .add(Inst.getOperand(1)); + BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(), get(AMDGPU::COPY), + DstReg) + .addReg(NewDst); + Inst.eraseFromParent(); return; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index a21089f..cc59acf 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -417,6 +417,7 @@ public: const MachineInstr &MIb) const override; static bool isFoldableCopy(const MachineInstr &MI); + static unsigned getFoldableCopySrcIdx(const MachineInstr &MI); void removeModOperands(MachineInstr &MI) const; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 205237f..3c2dd42 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -2222,8 +2222,6 @@ bool SIRegisterInfo::spillEmergencySGPR(MachineBasicBlock::iterator MI, // Don't need to write VGPR out. } - MachineRegisterInfo &MRI = MI->getMF()->getRegInfo(); - // Restore clobbered registers in the specified restore block. MI = RestoreMBB.end(); SB.setMI(&RestoreMBB, MI); @@ -2238,7 +2236,8 @@ bool SIRegisterInfo::spillEmergencySGPR(MachineBasicBlock::iterator MI, SB.NumSubRegs == 1 ? SB.SuperReg : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); - MRI.constrainRegClass(SubReg, &AMDGPU::SReg_32_XM0RegClass); + + assert(SubReg.isPhysical()); bool LastSubReg = (i + 1 == e); auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32), SubReg) @@ -3059,8 +3058,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, if (IsSALU && LiveSCC) { Register NewDest; if (IsCopy) { - MF->getRegInfo().constrainRegClass(ResultReg, - &AMDGPU::SReg_32_XM0RegClass); + assert(ResultReg.isPhysical()); NewDest = ResultReg; } else { NewDest = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass, @@ -3190,8 +3188,6 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, Register NewDest; if (IsCopy) { - MF->getRegInfo().constrainRegClass(ResultReg, - &AMDGPU::SReg_32_XM0RegClass); NewDest = ResultReg; } else { NewDest = RS->scavengeRegisterBackwards( diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 5630580..f98e312 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -367,19 +367,6 @@ def SCC_CLASS : SIRegisterClass<"AMDGPU", [i1], 1, (add SCC)> { let BaseClassOrder = 10000; } -def M0_CLASS : SIRegisterClass<"AMDGPU", [i32], 32, (add M0)> { - let CopyCost = 1; - let isAllocatable = 0; - let HasSGPR = 1; -} - -def M0_CLASS_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16, (add M0_LO16)> { - let CopyCost = 1; - let Size = 16; - let isAllocatable = 0; - let HasSGPR = 1; -} - // TODO: Do we need to set DwarfRegAlias on register tuples? def SGPR_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16, @@ -797,7 +784,7 @@ def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16, TMA_LO_LO16, TMA_HI_LO16, TBA_LO_LO16, TBA_HI_LO16, SRC_SHARED_BASE_LO_LO16, SRC_SHARED_LIMIT_LO_LO16, SRC_PRIVATE_BASE_LO_LO16, SRC_PRIVATE_LIMIT_LO_LO16, SRC_POPS_EXITING_WAVE_ID_LO16, SRC_VCCZ_LO16, SRC_EXECZ_LO16, SRC_SCC_LO16, - EXEC_LO_LO16, EXEC_HI_LO16, M0_CLASS_LO16, SRC_FLAT_SCRATCH_BASE_LO_LO16, + EXEC_LO_LO16, EXEC_HI_LO16, M0_LO16, SRC_FLAT_SCRATCH_BASE_LO_LO16, SRC_FLAT_SCRATCH_BASE_HI_LO16)> { let Size = 16; let isAllocatable = 0; @@ -805,7 +792,7 @@ def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16, } def SReg_32_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16, i1], 32, - (add SReg_32_XM0_XEXEC, M0_CLASS)> { + (add SReg_32_XM0_XEXEC, M0)> { let AllocationPriority = 0; } @@ -830,7 +817,7 @@ def APERTURE_Class : SIRegisterClass<"AMDGPU", Reg64Types.types, 32, // Register class for all scalar registers (SGPRs + Special Registers) def SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16, i1], 32, - (add SReg_32_XM0, M0_CLASS)> { + (add SReg_32_XM0, M0)> { let AllocationPriority = 0; let HasSGPR = 1; let BaseClassOrder = 32; diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index d0dfa47..a94e131 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -359,6 +359,8 @@ HexagonTargetLowering::initializeHVXLowering() { setCondCodeAction(ISD::SETULE, MVT::v64f16, Expand); setCondCodeAction(ISD::SETUGE, MVT::v64f16, Expand); setCondCodeAction(ISD::SETULT, MVT::v64f16, Expand); + setCondCodeAction(ISD::SETUO, MVT::v64f16, Expand); + setCondCodeAction(ISD::SETO, MVT::v64f16, Expand); setCondCodeAction(ISD::SETNE, MVT::v32f32, Expand); setCondCodeAction(ISD::SETLE, MVT::v32f32, Expand); @@ -372,6 +374,8 @@ HexagonTargetLowering::initializeHVXLowering() { setCondCodeAction(ISD::SETULE, MVT::v32f32, Expand); setCondCodeAction(ISD::SETUGE, MVT::v32f32, Expand); setCondCodeAction(ISD::SETULT, MVT::v32f32, Expand); + setCondCodeAction(ISD::SETUO, MVT::v32f32, Expand); + setCondCodeAction(ISD::SETO, MVT::v32f32, Expand); // Boolean vectors. @@ -449,6 +453,7 @@ HexagonTargetLowering::initializeHVXLowering() { // Include cases which are not hander earlier setOperationAction(ISD::UINT_TO_FP, MVT::v32i1, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v64i1, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::v32i1, Custom); setTargetDAGCombine({ISD::CONCAT_VECTORS, ISD::TRUNCATE, ISD::VSELECT}); } @@ -2337,7 +2342,7 @@ HexagonTargetLowering::LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const { return ExpandHvxFpToInt(Op, DAG); } -// For vector type v32i1 uint_to_fp to v32f32: +// For vector type v32i1 uint_to_fp/sint_to_fp to v32f32: // R1 = #1, R2 holds the v32i1 param // V1 = vsplat(R1) // V2 = vsplat(R2) @@ -2464,7 +2469,7 @@ HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const { MVT IntTy = ty(Op.getOperand(0)).getVectorElementType(); MVT FpTy = ResTy.getVectorElementType(); - if (Op.getOpcode() == ISD::UINT_TO_FP) { + if (Op.getOpcode() == ISD::UINT_TO_FP || Op.getOpcode() == ISD::SINT_TO_FP) { if (ResTy == MVT::v32f32 && ty(Op.getOperand(0)) == MVT::v32i1) return LowerHvxPred32ToFp(Op, DAG); if (ResTy == MVT::v64f16 && ty(Op.getOperand(0)) == MVT::v64i1) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 4cfbfca..7ddf996 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -2860,8 +2860,7 @@ static SDValue fillSubVectorFromBuildVector(BuildVectorSDNode *Node, EVT ResTy, unsigned first) { unsigned NumElts = ResTy.getVectorNumElements(); - assert(first >= 0 && - first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements()); + assert(first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements()); SmallVector<SDValue, 16> Ops(Node->op_begin() + first, Node->op_begin() + first + NumElts); diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index 395d2c4..662d3f6 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -629,7 +629,7 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FTAN, G_FPOW, G_FLOG, G_FLOG2, G_FLOG10, G_FEXP, G_FEXP2, G_FEXP10, G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH, G_FSINH, - G_FTANH}) + G_FTANH, G_FMODF}) .libcallFor({s32, s64}) .libcallFor(ST.is64Bit(), {s128}); getActionDefinitionsBuilder({G_FPOWI, G_FLDEXP}) diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 7d4535a..b37b740 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -1560,7 +1560,7 @@ static MCRegister getRVVBaseRegister(const RISCVRegisterInfo &TRI, MCRegister BaseReg = TRI.getSubReg(Reg, RISCV::sub_vrm1_0); // If it's not a grouped vector register, it doesn't have subregister, so // the base register is just itself. - if (BaseReg == RISCV::NoRegister) + if (!BaseReg.isValid()) BaseReg = Reg; return BaseReg; } diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td index cf6f83a..7f5d0af 100644 --- a/llvm/lib/Target/RISCV/RISCVGISel.td +++ b/llvm/lib/Target/RISCV/RISCVGISel.td @@ -126,13 +126,6 @@ let Predicates = [HasAtomicLdSt, IsRV64] in { // RV64 i32 patterns not used by SelectionDAG //===----------------------------------------------------------------------===// -def uimm5i32 : ImmLeaf<i32, [{return isUInt<5>(Imm);}]>; - -def zext_is_sext : PatFrag<(ops node:$src), (zext node:$src), [{ - KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0), 0); - return Known.isNonNegative(); -}]>; - let Predicates = [IsRV64] in { def : LdPat<extloadi8, LBU, i32>; // Prefer unsigned due to no c.lb in Zcb. def : LdPat<extloadi16, LH, i32>; @@ -140,15 +133,10 @@ def : LdPat<extloadi16, LH, i32>; def : StPat<truncstorei8, SB, GPR, i32>; def : StPat<truncstorei16, SH, GPR, i32>; -def : Pat<(anyext (i32 GPR:$src)), (COPY GPR:$src)>; def : Pat<(sext (i32 GPR:$src)), (ADDIW GPR:$src, 0)>; -def : Pat<(i32 (trunc GPR:$src)), (COPY GPR:$src)>; def : Pat<(sext_inreg (i64 (add GPR:$rs1, simm12_lo:$imm)), i32), (ADDIW GPR:$rs1, simm12_lo:$imm)>; - -// Use sext if the sign bit of the input is 0. -def : Pat<(zext_is_sext (i32 GPR:$src)), (ADDIW GPR:$src, 0)>; } let Predicates = [IsRV64, NoStdExtZba] in diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 50649cf..dcce2d2 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -533,7 +533,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI, ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP, ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2, - ISD::FLOG10, ISD::FLDEXP, ISD::FFREXP}, + ISD::FLOG10, ISD::FLDEXP, ISD::FFREXP, ISD::FMODF}, MVT::f16, Promote); // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 6a6ead2..cf8d120 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -128,7 +128,7 @@ static bool hasUndefinedPassthru(const MachineInstr &MI) { // All undefined passthrus should be $noreg: see // RISCVDAGToDAGISel::doPeepholeNoRegPassThru const MachineOperand &UseMO = MI.getOperand(UseOpIdx); - return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef(); + return !UseMO.getReg().isValid() || UseMO.isUndef(); } /// Return true if \p MI is a copy that will be lowered to one or more vmvNr.vs. @@ -1454,7 +1454,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { Register Reg = VLOp.getReg(); // Erase the AVL operand from the instruction. - VLOp.setReg(RISCV::NoRegister); + VLOp.setReg(Register()); VLOp.setIsKill(false); if (LIS) { LiveInterval &LI = LIS->getInterval(Reg); @@ -1663,7 +1663,7 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const { if (!MO.isReg() || !MO.getReg().isVirtual()) return; Register OldVLReg = MO.getReg(); - MO.setReg(RISCV::NoRegister); + MO.setReg(Register()); if (LIS) LIS->shrinkToUses(&LIS->getInterval(OldVLReg)); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 1e6b04f8..7db4832 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1364,7 +1364,7 @@ void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, RS->scavengeRegisterBackwards(RISCV::GPRRegClass, MI.getIterator(), /*RestoreAfter=*/false, /*SpAdj=*/0, /*AllowSpill=*/false); - if (TmpGPR != RISCV::NoRegister) + if (TmpGPR.isValid()) RS->setRegUsed(TmpGPR); else { // The case when there is no scavenged register needs special handling. @@ -3021,7 +3021,7 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI, ErrInfo = "Invalid operand type for VL operand"; return false; } - if (Op.isReg() && Op.getReg() != RISCV::NoRegister) { + if (Op.isReg() && Op.getReg().isValid()) { const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); auto *RC = MRI.getRegClass(Op.getReg()); if (!RISCV::GPRRegClass.hasSubClassEq(RC)) { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td index 1674c95..1dd7332 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td @@ -26,7 +26,7 @@ class LAQ_r<bit aq, bit rl, bits<3> funct3, string opcodestr> let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in class SRL_r<bit aq, bit rl, bits<3> funct3, string opcodestr> : RVInstRAtomic<0b00111, aq, rl, funct3, OPC_AMO, - (outs ), (ins GPRMemZeroOffset:$rs1, GPR:$rs2), + (outs), (ins GPR:$rs2, GPRMemZeroOffset:$rs1), opcodestr, "$rs2, $rs1"> { let rd = 0; } @@ -71,7 +71,7 @@ class PatLAQ<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT> // while atomic_store has data, addr class PatSRL<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT> : Pat<(OpNode (vt GPR:$rs2), (XLenVT GPRMemZeroOffset:$rs1)), - (Inst GPRMemZeroOffset:$rs1, GPR:$rs2)>; + (Inst GPR:$rs2, GPRMemZeroOffset:$rs1)>; let Predicates = [HasStdExtZalasr] in { diff --git a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp index f8d33ae..54569b1 100644 --- a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp +++ b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp @@ -259,7 +259,7 @@ static RegImmPair getRegImmPairPreventingCompression(const MachineInstr &MI) { if (isCompressibleLoad(MI) || isCompressibleStore(MI)) { const MachineOperand &MOImm = MI.getOperand(2); if (!MOImm.isImm()) - return RegImmPair(RISCV::NoRegister, 0); + return RegImmPair(Register(), 0); int64_t Offset = MOImm.getImm(); int64_t NewBaseAdjust = getBaseAdjustForCompression(Offset, Opcode); @@ -292,7 +292,7 @@ static RegImmPair getRegImmPairPreventingCompression(const MachineInstr &MI) { } } } - return RegImmPair(RISCV::NoRegister, 0); + return RegImmPair(Register(), 0); } // Check all uses after FirstMI of the given register, keeping a vector of diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp index ffba284..fdf9a4f 100644 --- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp +++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp @@ -382,7 +382,7 @@ bool RISCVVectorPeephole::convertAllOnesVMergeToVMv(MachineInstr &MI) const { // vmv.v.v doesn't have a mask operand, so we may be able to inflate the // register class for the destination and passthru operands e.g. VRNoV0 -> VR MRI->recomputeRegClass(MI.getOperand(0).getReg()); - if (MI.getOperand(1).getReg() != RISCV::NoRegister) + if (MI.getOperand(1).getReg().isValid()) MRI->recomputeRegClass(MI.getOperand(1).getReg()); return true; } @@ -448,7 +448,7 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) { Register FalseReg = MI.getOperand(2).getReg(); if (TruePassthruReg != FalseReg) { // If True's passthru is undef see if we can change it to False - if (TruePassthruReg != RISCV::NoRegister || + if (TruePassthruReg.isValid() || !MRI->hasOneUse(MI.getOperand(3).getReg()) || !ensureDominates(MI.getOperand(2), *True)) return false; @@ -467,7 +467,7 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) { // vmv.v.v doesn't have a mask operand, so we may be able to inflate the // register class for the destination and passthru operands e.g. VRNoV0 -> VR MRI->recomputeRegClass(MI.getOperand(0).getReg()); - if (MI.getOperand(1).getReg() != RISCV::NoRegister) + if (MI.getOperand(1).getReg().isValid()) MRI->recomputeRegClass(MI.getOperand(1).getReg()); return true; } @@ -517,7 +517,7 @@ bool RISCVVectorPeephole::convertToUnmasked(MachineInstr &MI) const { if (RISCVII::isFirstDefTiedToFirstUse(MaskedMCID)) { unsigned PassthruOpIdx = MI.getNumExplicitDefs(); if (HasPassthru) { - if (MI.getOperand(PassthruOpIdx).getReg() != RISCV::NoRegister) + if (MI.getOperand(PassthruOpIdx).getReg()) MRI->recomputeRegClass(MI.getOperand(PassthruOpIdx).getReg()); } else MI.removeOperand(PassthruOpIdx); @@ -576,7 +576,7 @@ static bool dominates(MachineBasicBlock::const_iterator A, bool RISCVVectorPeephole::ensureDominates(const MachineOperand &MO, MachineInstr &Src) const { assert(MO.getParent()->getParent() == Src.getParent()); - if (!MO.isReg() || MO.getReg() == RISCV::NoRegister) + if (!MO.isReg() || !MO.getReg().isValid()) return true; MachineInstr *Def = MRI->getVRegDef(MO.getReg()); @@ -593,7 +593,7 @@ bool RISCVVectorPeephole::ensureDominates(const MachineOperand &MO, bool RISCVVectorPeephole::foldUndefPassthruVMV_V_V(MachineInstr &MI) { if (RISCV::getRVVMCOpcode(MI.getOpcode()) != RISCV::VMV_V_V) return false; - if (MI.getOperand(1).getReg() != RISCV::NoRegister) + if (MI.getOperand(1).getReg().isValid()) return false; // If the input was a pseudo with a policy operand, we can give it a tail @@ -654,7 +654,7 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) { // Src needs to have the same passthru as VMV_V_V MachineOperand &SrcPassthru = Src->getOperand(Src->getNumExplicitDefs()); - if (SrcPassthru.getReg() != RISCV::NoRegister && + if (SrcPassthru.getReg().isValid() && SrcPassthru.getReg() != Passthru.getReg()) return false; @@ -672,7 +672,7 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) { if (SrcPassthru.getReg() != Passthru.getReg()) { SrcPassthru.setReg(Passthru.getReg()); // If Src is masked then its passthru needs to be in VRNoV0. - if (Passthru.getReg() != RISCV::NoRegister) + if (Passthru.getReg().isValid()) MRI->constrainRegClass( Passthru.getReg(), TII->getRegClass(Src->getDesc(), SrcPassthru.getOperandNo(), TRI)); diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp index 7505507..e8c849e 100644 --- a/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp @@ -188,8 +188,31 @@ class SPIRVLegalizePointerCast : public FunctionPass { FixedVectorType *SrcType = cast<FixedVectorType>(Src->getType()); FixedVectorType *DstType = cast<FixedVectorType>(GR->findDeducedElementType(Dst)); - assert(DstType->getNumElements() >= SrcType->getNumElements()); + auto dstNumElements = DstType->getNumElements(); + auto srcNumElements = SrcType->getNumElements(); + + // if the element type differs, it is a bitcast. + if (DstType->getElementType() != SrcType->getElementType()) { + // Support bitcast between vectors of different sizes only if + // the total bitwidth is the same. + [[maybe_unused]] auto dstBitWidth = + DstType->getElementType()->getScalarSizeInBits() * dstNumElements; + [[maybe_unused]] auto srcBitWidth = + SrcType->getElementType()->getScalarSizeInBits() * srcNumElements; + assert(dstBitWidth == srcBitWidth && + "Unsupported bitcast between vectors of different sizes."); + + Src = + B.CreateIntrinsic(Intrinsic::spv_bitcast, {DstType, SrcType}, {Src}); + buildAssignType(B, DstType, Src); + SrcType = DstType; + + StoreInst *SI = B.CreateStore(Src, Dst); + SI->setAlignment(Alignment); + return SI; + } + assert(DstType->getNumElements() >= SrcType->getNumElements()); LoadInst *LI = B.CreateLoad(DstType, Dst); LI->setAlignment(Alignment); Value *OldValues = LI; diff --git a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp index 6c19049..024030d 100644 --- a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp +++ b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp @@ -206,8 +206,8 @@ Filler::findDelayInstr(MachineBasicBlock &MBB, if (!done) --I; - // skip debug instruction - if (I->isDebugInstr()) + // Skip meta instructions. + if (I->isMetaInstruction()) continue; if (I->hasUnmodeledSideEffects() || I->isInlineAsm() || I->isPosition() || diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 3802506..931a10b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -13783,10 +13783,12 @@ static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask, // so prevents folding a load into this instruction or making a copy. const int UnpackLoMask[] = {0, 0, 1, 1}; const int UnpackHiMask[] = {2, 2, 3, 3}; - if (isShuffleEquivalent(Mask, {0, 0, 1, 1}, V1, V2)) - Mask = UnpackLoMask; - else if (isShuffleEquivalent(Mask, {2, 2, 3, 3}, V1, V2)) - Mask = UnpackHiMask; + if (!isSingleElementRepeatedMask(Mask)) { + if (isShuffleEquivalent(Mask, {0, 0, 1, 1}, V1, V2)) + Mask = UnpackLoMask; + else if (isShuffleEquivalent(Mask, {2, 2, 3, 3}, V1, V2)) + Mask = UnpackHiMask; + } return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1, getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); @@ -58135,6 +58137,14 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineToHorizontalAddSub(N, DAG, Subtarget)) return V; + // Prefer VSHLI to reduce uses, X86FixupInstTunings may revert this depending + // on the scheduler model. Limit multiple users to AVX+ targets to prevent + // introducing extra register moves. + if (Op0 == Op1 && supportedVectorShiftWithImm(VT, Subtarget, ISD::SHL)) + if (Subtarget.hasAVX() || N->isOnlyUserOf(Op0.getNode())) + return getTargetVShiftByConstNode(X86ISD::VSHLI, DL, VT.getSimpleVT(), + Op0, 1, DAG); + // Canonicalize hidden LEA pattern: // Fold (add (sub (shl x, c), y), z) -> (sub (add (shl x, c), z), y) // iff c < 4 diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index ddb95a4..faeab95 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -29,6 +29,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/MemoryProfileInfo.h" #include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" @@ -40,6 +41,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/InterleavedRange.h" +#include "llvm/Support/SHA1.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/CallPromotionUtils.h" @@ -60,6 +62,9 @@ STATISTIC(FunctionClonesThinBackend, "Number of function clones created during ThinLTO backend"); STATISTIC(FunctionsClonedThinBackend, "Number of functions that had clones created during ThinLTO backend"); +STATISTIC( + FunctionCloneDuplicatesThinBackend, + "Number of function clone duplicates detected during ThinLTO backend"); STATISTIC(AllocTypeNotCold, "Number of not cold static allocations (possibly " "cloned) during whole program analysis"); STATISTIC(AllocTypeCold, "Number of cold static allocations (possibly cloned) " @@ -5186,19 +5191,127 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() { return Changed; } +// Compute a SHA1 hash of the callsite and alloc version information of clone I +// in the summary, to use in detection of duplicate clones. +uint64_t ComputeHash(const FunctionSummary *FS, unsigned I) { + SHA1 Hasher; + // Update hash with any callsites that call non-default (non-zero) callee + // versions. + for (auto &SN : FS->callsites()) { + // In theory all callsites and allocs in this function should have the same + // number of clone entries, but handle any discrepancies gracefully below + // for NDEBUG builds. + assert( + SN.Clones.size() > I && + "Callsite summary has fewer entries than other summaries in function"); + if (SN.Clones.size() <= I || !SN.Clones[I]) + continue; + uint8_t Data[sizeof(SN.Clones[I])]; + support::endian::write32le(Data, SN.Clones[I]); + Hasher.update(Data); + } + // Update hash with any allocs that have non-default (non-None) hints. + for (auto &AN : FS->allocs()) { + // In theory all callsites and allocs in this function should have the same + // number of clone entries, but handle any discrepancies gracefully below + // for NDEBUG builds. + assert(AN.Versions.size() > I && + "Alloc summary has fewer entries than other summaries in function"); + if (AN.Versions.size() <= I || + (AllocationType)AN.Versions[I] == AllocationType::None) + continue; + Hasher.update(ArrayRef<uint8_t>(&AN.Versions[I], 1)); + } + return support::endian::read64le(Hasher.result().data()); +} + static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> createFunctionClones( Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>> - &FuncToAliasMap) { + &FuncToAliasMap, + FunctionSummary *FS) { + auto TakeDeclNameAndReplace = [](GlobalValue *DeclGV, GlobalValue *NewGV) { + // We might have created this when adjusting callsite in another + // function. It should be a declaration. + assert(DeclGV->isDeclaration()); + NewGV->takeName(DeclGV); + DeclGV->replaceAllUsesWith(NewGV); + DeclGV->eraseFromParent(); + }; + + // Handle aliases to this function, and create analogous alias clones to the + // provided clone of this function. + auto CloneFuncAliases = [&](Function *NewF, unsigned I) { + if (!FuncToAliasMap.count(&F)) + return; + for (auto *A : FuncToAliasMap[&F]) { + std::string AliasName = getMemProfFuncName(A->getName(), I); + auto *PrevA = M.getNamedAlias(AliasName); + auto *NewA = GlobalAlias::create(A->getValueType(), + A->getType()->getPointerAddressSpace(), + A->getLinkage(), AliasName, NewF); + NewA->copyAttributesFrom(A); + if (PrevA) + TakeDeclNameAndReplace(PrevA, NewA); + } + }; + // The first "clone" is the original copy, we should only call this if we // needed to create new clones. assert(NumClones > 1); SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps; VMaps.reserve(NumClones - 1); FunctionsClonedThinBackend++; + + // Map of hash of callsite/alloc versions to the instantiated function clone + // (possibly the original) implementing those calls. Used to avoid + // instantiating duplicate function clones. + // FIXME: Ideally the thin link would not generate such duplicate clones to + // start with, but right now it happens due to phase ordering in the function + // assignment and possible new clones that produces. We simply make each + // duplicate an alias to the matching instantiated clone recorded in the map + // (except for available_externally which are made declarations as they would + // be aliases in the prevailing module, and available_externally aliases are + // not well supported right now). + DenseMap<uint64_t, Function *> HashToFunc; + + // Save the hash of the original function version. + HashToFunc[ComputeHash(FS, 0)] = &F; + for (unsigned I = 1; I < NumClones; I++) { VMaps.emplace_back(std::make_unique<ValueToValueMapTy>()); + std::string Name = getMemProfFuncName(F.getName(), I); + auto Hash = ComputeHash(FS, I); + // If this clone would duplicate a previously seen clone, don't generate the + // duplicate clone body, just make an alias to satisfy any (potentially + // cross-module) references. + if (HashToFunc.contains(Hash)) { + FunctionCloneDuplicatesThinBackend++; + auto *Func = HashToFunc[Hash]; + if (Func->hasAvailableExternallyLinkage()) { + // Skip these as EliminateAvailableExternallyPass does not handle + // available_externally aliases correctly and we end up with an + // available_externally alias to a declaration. Just create a + // declaration for now as we know we will have a definition in another + // module. + auto Decl = M.getOrInsertFunction(Name, Func->getFunctionType()); + ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F) + << "created clone decl " << ore::NV("Decl", Decl.getCallee())); + continue; + } + auto *PrevF = M.getFunction(Name); + auto *Alias = GlobalAlias::create(Name, Func); + if (PrevF) + TakeDeclNameAndReplace(PrevF, Alias); + ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F) + << "created clone alias " << ore::NV("Alias", Alias)); + + // Now handle aliases to this function, and clone those as well. + CloneFuncAliases(Func, I); + continue; + } auto *NewF = CloneFunction(&F, *VMaps.back()); + HashToFunc[Hash] = NewF; FunctionClonesThinBackend++; // Strip memprof and callsite metadata from clone as they are no longer // needed. @@ -5208,40 +5321,17 @@ static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> createFunctionClones( Inst.setMetadata(LLVMContext::MD_callsite, nullptr); } } - std::string Name = getMemProfFuncName(F.getName(), I); auto *PrevF = M.getFunction(Name); - if (PrevF) { - // We might have created this when adjusting callsite in another - // function. It should be a declaration. - assert(PrevF->isDeclaration()); - NewF->takeName(PrevF); - PrevF->replaceAllUsesWith(NewF); - PrevF->eraseFromParent(); - } else + if (PrevF) + TakeDeclNameAndReplace(PrevF, NewF); + else NewF->setName(Name); updateSubprogramLinkageName(NewF, Name); ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F) << "created clone " << ore::NV("NewFunction", NewF)); // Now handle aliases to this function, and clone those as well. - if (!FuncToAliasMap.count(&F)) - continue; - for (auto *A : FuncToAliasMap[&F]) { - std::string Name = getMemProfFuncName(A->getName(), I); - auto *PrevA = M.getNamedAlias(Name); - auto *NewA = GlobalAlias::create(A->getValueType(), - A->getType()->getPointerAddressSpace(), - A->getLinkage(), Name, NewF); - NewA->copyAttributesFrom(A); - if (PrevA) { - // We might have created this when adjusting callsite in another - // function. It should be a declaration. - assert(PrevA->isDeclaration()); - NewA->takeName(PrevA); - PrevA->replaceAllUsesWith(NewA); - PrevA->eraseFromParent(); - } - } + CloneFuncAliases(NewF, I); } return VMaps; } @@ -5401,7 +5491,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps; bool ClonesCreated = false; unsigned NumClonesCreated = 0; - auto CloneFuncIfNeeded = [&](unsigned NumClones) { + auto CloneFuncIfNeeded = [&](unsigned NumClones, FunctionSummary *FS) { // We should at least have version 0 which is the original copy. assert(NumClones > 0); // If only one copy needed use original. @@ -5415,7 +5505,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { assert(NumClonesCreated == NumClones); return; } - VMaps = createFunctionClones(F, NumClones, M, ORE, FuncToAliasMap); + VMaps = createFunctionClones(F, NumClones, M, ORE, FuncToAliasMap, FS); // The first "clone" is the original copy, which doesn't have a VMap. assert(VMaps.size() == NumClones - 1); Changed = true; @@ -5424,9 +5514,9 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { }; auto CloneCallsite = [&](const CallsiteInfo &StackNode, CallBase *CB, - Function *CalledFunction) { + Function *CalledFunction, FunctionSummary *FS) { // Perform cloning if not yet done. - CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size()); + CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size(), FS); assert(!isMemProfClone(*CalledFunction)); @@ -5448,6 +5538,10 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { // below. auto CalleeOrigName = CalledFunction->getName(); for (unsigned J = 0; J < StackNode.Clones.size(); J++) { + // If the VMap is empty, this clone was a duplicate of another and was + // created as an alias or a declaration. + if (J > 0 && VMaps[J - 1]->empty()) + continue; // Do nothing if this version calls the original version of its // callee. if (!StackNode.Clones[J]) @@ -5591,7 +5685,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { #endif // Perform cloning if not yet done. - CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size()); + CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size(), FS); OrigAllocsThinBackend++; AllocVersionsThinBackend += AllocNode.Versions.size(); @@ -5624,6 +5718,10 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { // Update the allocation types per the summary info. for (unsigned J = 0; J < AllocNode.Versions.size(); J++) { + // If the VMap is empty, this clone was a duplicate of another and + // was created as an alias or a declaration. + if (J > 0 && VMaps[J - 1]->empty()) + continue; // Ignore any that didn't get an assigned allocation type. if (AllocNode.Versions[J] == (uint8_t)AllocationType::None) continue; @@ -5670,7 +5768,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { // we don't need to do ICP, but might need to clone this // function as it is the target of other cloned calls. if (NumClones) - CloneFuncIfNeeded(NumClones); + CloneFuncIfNeeded(NumClones, FS); } else { @@ -5690,7 +5788,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { } #endif - CloneCallsite(StackNode, CB, CalledFunction); + CloneCallsite(StackNode, CB, CalledFunction, FS); } } else if (CB->isTailCall() && CalledFunction) { // Locate the synthesized callsite info for the callee VI, if any was @@ -5700,7 +5798,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { if (CalleeVI && MapTailCallCalleeVIToCallsite.count(CalleeVI)) { auto Callsite = MapTailCallCalleeVIToCallsite.find(CalleeVI); assert(Callsite != MapTailCallCalleeVIToCallsite.end()); - CloneCallsite(Callsite->second, CB, CalledFunction); + CloneCallsite(Callsite->second, CB, CalledFunction, FS); } } } @@ -5846,6 +5944,10 @@ void MemProfContextDisambiguation::performICP( // check. CallBase *CBClone = CB; for (unsigned J = 0; J < NumClones; J++) { + // If the VMap is empty, this clone was a duplicate of another and was + // created as an alias or a declaration. + if (J > 0 && VMaps[J - 1]->empty()) + continue; // Copy 0 is the original function. if (J > 0) CBClone = cast<CallBase>((*VMaps[J - 1])[CB]); @@ -5891,6 +5993,10 @@ void MemProfContextDisambiguation::performICP( // TotalCount and the number promoted. CallBase *CBClone = CB; for (unsigned J = 0; J < NumClones; J++) { + // If the VMap is empty, this clone was a duplicate of another and was + // created as an alias or a declaration. + if (J > 0 && VMaps[J - 1]->empty()) + continue; // Copy 0 is the original function. if (J > 0) CBClone = cast<CallBase>((*VMaps[J - 1])[CB]); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index cf6d0ec..e1e24a9 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -318,18 +318,18 @@ Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) { // * Single constant active lane -> store // * Narrow width by halfs excluding zero/undef lanes Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) { + Value *StorePtr = II.getArgOperand(1); + Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue(); auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3)); if (!ConstMask) return nullptr; // If the mask is all zeros, this instruction does nothing. - if (ConstMask->isNullValue()) + if (maskIsAllZeroOrUndef(ConstMask)) return eraseInstFromFunction(II); // If the mask is all ones, this is a plain vector store of the 1st argument. - if (ConstMask->isAllOnesValue()) { - Value *StorePtr = II.getArgOperand(1); - Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue(); + if (maskIsAllOneOrUndef(ConstMask)) { StoreInst *S = new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment); S->copyMetadata(II); @@ -389,7 +389,7 @@ Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) { return nullptr; // If the mask is all zeros, a scatter does nothing. - if (ConstMask->isNullValue()) + if (maskIsAllZeroOrUndef(ConstMask)) return eraseInstFromFunction(II); // Vector splat address -> scalar store diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 87000a1..3df448d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -50,6 +50,9 @@ using namespace llvm; using namespace PatternMatch; +namespace llvm { +extern cl::opt<bool> ProfcheckDisableMetadataFixes; +} /// Replace a select operand based on an equality comparison with the identity /// constant of a binop. @@ -4492,8 +4495,21 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { auto FoldSelectWithAndOrCond = [&](bool IsAnd, Value *A, Value *B) -> Instruction * { if (Value *V = simplifySelectInst(B, TrueVal, FalseVal, - SQ.getWithInstruction(&SI))) - return SelectInst::Create(A, IsAnd ? V : TrueVal, IsAnd ? FalseVal : V); + SQ.getWithInstruction(&SI))) { + Value *NewTrueVal = IsAnd ? V : TrueVal; + Value *NewFalseVal = IsAnd ? FalseVal : V; + + // If the True and False values don't change, then preserve the branch + // metadata of the original select as the net effect of this change is to + // simplify the conditional. + Instruction *MDFrom = nullptr; + if (NewTrueVal == TrueVal && NewFalseVal == FalseVal && + !ProfcheckDisableMetadataFixes) { + MDFrom = &SI; + } + return SelectInst::Create(A, NewTrueVal, NewFalseVal, "", nullptr, + MDFrom); + } // Is (select B, T, F) a SPF? if (CondVal->hasOneUse() && SelType->isIntOrIntVectorTy()) { diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp index 9d4fb79..d6b7633 100644 --- a/llvm/lib/Transforms/Scalar/NewGVN.cpp +++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -1646,10 +1646,6 @@ NewGVN::performSymbolicPredicateInfoEvaluation(BitCastInst *I) const { // Evaluate read only and pure calls, and create an expression result. NewGVN::ExprResult NewGVN::performSymbolicCallEvaluation(Instruction *I) const { auto *CI = cast<CallInst>(I); - if (auto *II = dyn_cast<IntrinsicInst>(I)) { - if (auto *ReturnedValue = II->getReturnedArgOperand()) - return ExprResult::some(createVariableOrConstant(ReturnedValue)); - } // FIXME: Currently the calls which may access the thread id may // be considered as not accessing the memory. But this is diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 43d61f2..a88cffc 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3298,10 +3298,11 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, UI->getOpcode(), ValTy, Alignment, AS, Ctx.CostKind, OpInfo); Type *PtrTy = isSingleScalar() ? ScalarPtrTy : toVectorTy(ScalarPtrTy, VF); - + bool UsedByLoadStoreAddress = isUsedByLoadStoreAddress(this); InstructionCost ScalarCost = ScalarMemOpCost + Ctx.TTI.getAddressComputationCost( - PtrTy, &Ctx.SE, nullptr, Ctx.CostKind); + PtrTy, UsedByLoadStoreAddress ? nullptr : &Ctx.SE, + nullptr, Ctx.CostKind); if (isSingleScalar()) return ScalarCost; @@ -3312,7 +3313,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, // vectorized addressing or the loaded value is used as part of an address // of another load or store. bool PreferVectorizedAddressing = Ctx.TTI.prefersVectorizedAddressing(); - if (PreferVectorizedAddressing || !isUsedByLoadStoreAddress(this)) { + if (PreferVectorizedAddressing || !UsedByLoadStoreAddress) { bool EfficientVectorLoadStore = Ctx.TTI.supportsEfficientVectorElementLoadStore(); if (!(IsLoad && !PreferVectorizedAddressing) && |