diff options
Diffstat (limited to 'llvm/lib')
57 files changed, 455 insertions, 324 deletions
diff --git a/llvm/lib/Analysis/HashRecognize.cpp b/llvm/lib/Analysis/HashRecognize.cpp index 5d7ee1f..4529123 100644 --- a/llvm/lib/Analysis/HashRecognize.cpp +++ b/llvm/lib/Analysis/HashRecognize.cpp @@ -97,7 +97,7 @@ static bool containsUnreachable(const Loop &L, } } } - return std::distance(Latch->begin(), Latch->end()) != Visited.size(); + return Latch->size() != Visited.size(); } /// A structure that can hold either a Simple Recurrence or a Conditional diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 47dccde..7adb25d 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -233,19 +233,25 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap( const SCEV *DerefBytesSCEV = SE.getConstant(WiderTy, DerefBytes); // Check if we have a suitable dereferencable assumption we can use. - if (!StartPtrV->canBeFreed()) { - Instruction *CtxI = &*L->getHeader()->getFirstNonPHIIt(); - if (BasicBlock *LoopPred = L->getLoopPredecessor()) { - if (isa<BranchInst>(LoopPred->getTerminator())) - CtxI = LoopPred->getTerminator(); - } - - RetainedKnowledge DerefRK = getKnowledgeValidInContext( - StartPtrV, {Attribute::Dereferenceable}, *AC, CtxI, DT); - if (DerefRK) { - DerefBytesSCEV = - SE.getUMaxExpr(DerefBytesSCEV, SE.getSCEV(DerefRK.IRArgValue)); - } + Instruction *CtxI = &*L->getHeader()->getFirstNonPHIIt(); + if (BasicBlock *LoopPred = L->getLoopPredecessor()) { + if (isa<BranchInst>(LoopPred->getTerminator())) + CtxI = LoopPred->getTerminator(); + } + RetainedKnowledge DerefRK; + getKnowledgeForValue(StartPtrV, {Attribute::Dereferenceable}, *AC, + [&](RetainedKnowledge RK, Instruction *Assume, auto) { + if (!isValidAssumeForContext(Assume, CtxI, DT)) + return false; + if (StartPtrV->canBeFreed() && + !willNotFreeBetween(Assume, CtxI)) + return false; + DerefRK = std::max(DerefRK, RK); + return true; + }); + if (DerefRK) { + DerefBytesSCEV = + SE.getUMaxExpr(DerefBytesSCEV, SE.getSCEV(DerefRK.IRArgValue)); } if (DerefBytesSCEV->isZero()) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 09a8fbe..1eda7a7 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -89,6 +89,9 @@ using namespace llvm::PatternMatch; static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses", cl::Hidden, cl::init(20)); +/// Maximum number of instructions to check between assume and context +/// instruction. +static constexpr unsigned MaxInstrsToCheckForFree = 16; /// Returns the bitwidth of the given scalar or pointer type. For vector types, /// returns the element type's bitwidth. @@ -561,6 +564,29 @@ bool llvm::isValidAssumeForContext(const Instruction *Inv, return false; } +bool llvm::willNotFreeBetween(const Instruction *Assume, + const Instruction *CtxI) { + if (CtxI->getParent() != Assume->getParent() || !Assume->comesBefore(CtxI)) + return false; + // Make sure the current function cannot arrange for another thread to free on + // its behalf. + if (!CtxI->getFunction()->hasNoSync()) + return false; + + // Check if there are any calls between the assume and CtxI that may + // free memory. + for (const auto &[Idx, I] : + enumerate(make_range(Assume->getIterator(), CtxI->getIterator()))) { + // Limit number of instructions to walk. + if (Idx > MaxInstrsToCheckForFree) + return false; + if (const auto *CB = dyn_cast<CallBase>(&I)) + if (!CB->hasFnAttr(Attribute::NoFree)) + return false; + } + return true; +} + // TODO: cmpExcludesZero misses many cases where `RHS` is non-constant but // we still have enough information about `RHS` to conclude non-zero. For // example Pred=EQ, RHS=isKnownNonZero. cmpExcludesZero is called in loops diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index 1703b27..bc0bb34 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -618,12 +618,15 @@ bool DwarfExpression::addExpression( case dwarf::DW_OP_dup: case dwarf::DW_OP_push_object_address: case dwarf::DW_OP_over: + case dwarf::DW_OP_rot: case dwarf::DW_OP_eq: case dwarf::DW_OP_ne: case dwarf::DW_OP_gt: case dwarf::DW_OP_ge: case dwarf::DW_OP_lt: case dwarf::DW_OP_le: + case dwarf::DW_OP_neg: + case dwarf::DW_OP_abs: emitOp(OpNum); break; case dwarf::DW_OP_deref: diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp index abb3f3e..ae284f3 100644 --- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -83,8 +83,6 @@ constrainRegClass(MachineRegisterInfo &MRI, Register Reg, const TargetRegisterClass *MachineRegisterInfo::constrainRegClass( Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs) { - if (Reg.isPhysical()) - return nullptr; return ::constrainRegClass(*this, Reg, getRegClass(Reg), RC, MinNumRegs); } diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 11bc64c..bb10cf6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -160,7 +160,7 @@ void InstrEmitter::EmitCopyFromReg(SDValue Op, bool IsClone, Register SrcReg, // If all uses are reading from the src physical register and copying the // register is either impossible or very expensive, then don't create a copy. - if (MatchReg && SrcRC->getCopyCost() < 0) { + if (MatchReg && SrcRC->expensiveOrImpossibleToCopy()) { VRBase = SrcReg; } else { // Create the reg, emit the copy. diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 31e7855..4f4fb9c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -111,15 +111,11 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, const TargetRegisterInfo *TRI, const TargetInstrInfo *TII, - const TargetLowering &TLI, MCRegister &PhysReg, int &Cost) { if (Op != 2 || User->getOpcode() != ISD::CopyToReg) return; Register Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); - if (TLI.checkForPhysRegDependency(Def, User, Op, TRI, TII, PhysReg, Cost)) - return; - if (Reg.isVirtual()) return; @@ -136,7 +132,7 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, if (PhysReg) { const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, Def->getSimpleValueType(ResNo)); - Cost = RC->getCopyCost(); + Cost = RC->expensiveOrImpossibleToCopy() ? -1 : RC->getCopyCost(); } } @@ -490,8 +486,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { MCRegister PhysReg; int Cost = 1; // Determine if this is a physical register dependency. - const TargetLowering &TLI = DAG->getTargetLoweringInfo(); - CheckForPhysRegDependency(OpN, N, i, TRI, TII, TLI, PhysReg, Cost); + CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost); assert((!PhysReg || !isChain) && "Chain dependence via physreg data?"); // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler // emits a copy from the physical register to a virtual register unless diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp index 23b72da..6e316f1 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp @@ -280,6 +280,9 @@ std::vector<Block *> LinkGraph::splitBlockImpl(std::vector<Block *> Blocks, void LinkGraph::dump(raw_ostream &OS) { DenseMap<Block *, std::vector<Symbol *>> BlockSymbols; + OS << "LinkGraph \"" << getName() + << "\" (triple = " << getTargetTriple().str() << ")\n"; + // Map from blocks to the symbols pointing at them. for (auto *Sym : defined_symbols()) BlockSymbols[&Sym->getBlock()].push_back(Sym); diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp index 584b9f0..17050b0 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp @@ -21,23 +21,21 @@ JITLinkerBase::~JITLinkerBase() = default; void JITLinkerBase::linkPhase1(std::unique_ptr<JITLinkerBase> Self) { - LLVM_DEBUG({ - dbgs() << "Starting link phase 1 for graph " << G->getName() << "\n"; - }); + LLVM_DEBUG(dbgs() << "Starting link phase 1\n"); // Prune and optimize the graph. if (auto Err = runPasses(Passes.PrePrunePasses)) return Ctx->notifyFailed(std::move(Err)); LLVM_DEBUG({ - dbgs() << "Link graph \"" << G->getName() << "\" pre-pruning:\n"; + dbgs() << "Link graph pre-pruning:\n"; G->dump(dbgs()); }); prune(*G); LLVM_DEBUG({ - dbgs() << "Link graph \"" << G->getName() << "\" post-pruning:\n"; + dbgs() << "Link graph post-pruning:\n"; G->dump(dbgs()); }); @@ -67,14 +65,15 @@ void JITLinkerBase::linkPhase1(std::unique_ptr<JITLinkerBase> Self) { void JITLinkerBase::linkPhase2(std::unique_ptr<JITLinkerBase> Self, AllocResult AR) { + LLVM_DEBUG(dbgs() << "Starting link phase 2\n"); + if (AR) Alloc = std::move(*AR); else return Ctx->notifyFailed(AR.takeError()); LLVM_DEBUG({ - dbgs() << "Link graph \"" << G->getName() - << "\" before post-allocation passes:\n"; + dbgs() << "Link graph before post-allocation passes:\n"; G->dump(dbgs()); }); @@ -131,9 +130,7 @@ void JITLinkerBase::linkPhase2(std::unique_ptr<JITLinkerBase> Self, void JITLinkerBase::linkPhase3(std::unique_ptr<JITLinkerBase> Self, Expected<AsyncLookupResult> LR) { - LLVM_DEBUG({ - dbgs() << "Starting link phase 3 for graph " << G->getName() << "\n"; - }); + LLVM_DEBUG(dbgs() << "Starting link phase 3\n"); // If the lookup failed, bail out. if (!LR) @@ -143,8 +140,7 @@ void JITLinkerBase::linkPhase3(std::unique_ptr<JITLinkerBase> Self, applyLookupResult(*LR); LLVM_DEBUG({ - dbgs() << "Link graph \"" << G->getName() - << "\" before pre-fixup passes:\n"; + dbgs() << "Link graph before pre-fixup passes:\n"; G->dump(dbgs()); }); @@ -152,7 +148,7 @@ void JITLinkerBase::linkPhase3(std::unique_ptr<JITLinkerBase> Self, return abandonAllocAndBailOut(std::move(Self), std::move(Err)); LLVM_DEBUG({ - dbgs() << "Link graph \"" << G->getName() << "\" before copy-and-fixup:\n"; + dbgs() << "Link graph before copy-and-fixup:\n"; G->dump(dbgs()); }); @@ -161,7 +157,7 @@ void JITLinkerBase::linkPhase3(std::unique_ptr<JITLinkerBase> Self, return abandonAllocAndBailOut(std::move(Self), std::move(Err)); LLVM_DEBUG({ - dbgs() << "Link graph \"" << G->getName() << "\" after copy-and-fixup:\n"; + dbgs() << "Link graph after copy-and-fixup:\n"; G->dump(dbgs()); }); @@ -186,16 +182,14 @@ void JITLinkerBase::linkPhase3(std::unique_ptr<JITLinkerBase> Self, void JITLinkerBase::linkPhase4(std::unique_ptr<JITLinkerBase> Self, FinalizeResult FR) { - LLVM_DEBUG({ - dbgs() << "Starting link phase 4 for graph " << G->getName() << "\n"; - }); + LLVM_DEBUG(dbgs() << "Starting link phase 4\n"); if (!FR) return Ctx->notifyFailed(FR.takeError()); Ctx->notifyFinalized(std::move(*FR)); - LLVM_DEBUG({ dbgs() << "Link of graph " << G->getName() << " complete\n"; }); + LLVM_DEBUG({ dbgs() << "Link complete\n"; }); } Error JITLinkerBase::runPasses(LinkGraphPassList &Passes) { diff --git a/llvm/lib/IR/Assumptions.cpp b/llvm/lib/IR/Assumptions.cpp index f8bbcb3..3397f0e 100644 --- a/llvm/lib/IR/Assumptions.cpp +++ b/llvm/lib/IR/Assumptions.cpp @@ -20,9 +20,8 @@ using namespace llvm; -namespace { -bool hasAssumption(const Attribute &A, - const KnownAssumptionString &AssumptionStr) { +static bool hasAssumption(const Attribute &A, + const KnownAssumptionString &AssumptionStr) { if (!A.isValid()) return false; assert(A.isStringAttribute() && "Expected a string attribute!"); @@ -33,7 +32,7 @@ bool hasAssumption(const Attribute &A, return llvm::is_contained(Strings, AssumptionStr); } -DenseSet<StringRef> getAssumptions(const Attribute &A) { +static DenseSet<StringRef> getAssumptions(const Attribute &A) { if (!A.isValid()) return DenseSet<StringRef>(); assert(A.isStringAttribute() && "Expected a string attribute!"); @@ -47,8 +46,8 @@ DenseSet<StringRef> getAssumptions(const Attribute &A) { } template <typename AttrSite> -bool addAssumptionsImpl(AttrSite &Site, - const DenseSet<StringRef> &Assumptions) { +static bool addAssumptionsImpl(AttrSite &Site, + const DenseSet<StringRef> &Assumptions) { if (Assumptions.empty()) return false; @@ -64,7 +63,6 @@ bool addAssumptionsImpl(AttrSite &Site, return true; } -} // namespace bool llvm::hasAssumption(const Function &F, const KnownAssumptionString &AssumptionStr) { diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp index 1ededb9e7..77d044b 100644 --- a/llvm/lib/IR/DebugInfoMetadata.cpp +++ b/llvm/lib/IR/DebugInfoMetadata.cpp @@ -1768,6 +1768,7 @@ bool DIExpression::isValid() const { case dwarf::DW_OP_bregx: case dwarf::DW_OP_push_object_address: case dwarf::DW_OP_over: + case dwarf::DW_OP_rot: case dwarf::DW_OP_consts: case dwarf::DW_OP_eq: case dwarf::DW_OP_ne: @@ -1775,6 +1776,8 @@ bool DIExpression::isValid() const { case dwarf::DW_OP_ge: case dwarf::DW_OP_lt: case dwarf::DW_OP_le: + case dwarf::DW_OP_neg: + case dwarf::DW_OP_abs: break; } } diff --git a/llvm/lib/IR/DiagnosticHandler.cpp b/llvm/lib/IR/DiagnosticHandler.cpp index 683eade..eb2fe3b 100644 --- a/llvm/lib/IR/DiagnosticHandler.cpp +++ b/llvm/lib/IR/DiagnosticHandler.cpp @@ -36,6 +36,7 @@ struct PassRemarksOpt { } } }; +} // namespace static PassRemarksOpt PassRemarksPassedOptLoc; static PassRemarksOpt PassRemarksMissedOptLoc; @@ -66,7 +67,6 @@ static cl::opt<PassRemarksOpt, true, cl::parser<std::string>> "Enable optimization analysis remarks from passes whose name match " "the given regular expression"), cl::Hidden, cl::location(PassRemarksAnalysisOptLoc), cl::ValueRequired); -} bool DiagnosticHandler::isAnalysisRemarkEnabled(StringRef PassName) const { return (PassRemarksAnalysisOptLoc.Pattern && diff --git a/llvm/lib/IR/ModuleSummaryIndex.cpp b/llvm/lib/IR/ModuleSummaryIndex.cpp index d9024b0..dc55b63 100644 --- a/llvm/lib/IR/ModuleSummaryIndex.cpp +++ b/llvm/lib/IR/ModuleSummaryIndex.cpp @@ -409,7 +409,7 @@ struct Edge { GlobalValue::GUID Src; GlobalValue::GUID Dst; }; -} +} // namespace void Attributes::add(const Twine &Name, const Twine &Value, const Twine &Comment) { diff --git a/llvm/lib/IR/PassInstrumentation.cpp b/llvm/lib/IR/PassInstrumentation.cpp index 70bbe8f..52aad8f 100644 --- a/llvm/lib/IR/PassInstrumentation.cpp +++ b/llvm/lib/IR/PassInstrumentation.cpp @@ -15,7 +15,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/IR/PassManager.h" -namespace llvm { +using namespace llvm; template struct LLVM_EXPORT_TEMPLATE Any::TypeId<const Module *>; template struct LLVM_EXPORT_TEMPLATE Any::TypeId<const Function *>; @@ -42,7 +42,8 @@ PassInstrumentationCallbacks::getPassNameForClassName(StringRef ClassName) { AnalysisKey PassInstrumentationAnalysis::Key; -bool isSpecialPass(StringRef PassID, const std::vector<StringRef> &Specials) { +bool llvm::isSpecialPass(StringRef PassID, + const std::vector<StringRef> &Specials) { size_t Pos = PassID.find('<'); StringRef Prefix = PassID; if (Pos != StringRef::npos) @@ -50,5 +51,3 @@ bool isSpecialPass(StringRef PassID, const std::vector<StringRef> &Specials) { return any_of(Specials, [Prefix](StringRef S) { return Prefix.ends_with(S); }); } - -} // namespace llvm diff --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp index edeca97..fc2be51 100644 --- a/llvm/lib/IR/ProfDataUtils.cpp +++ b/llvm/lib/IR/ProfDataUtils.cpp @@ -24,8 +24,6 @@ using namespace llvm; -namespace { - // MD_prof nodes have the following layout // // In general: @@ -41,14 +39,15 @@ namespace { // correctly, and can change the behavior in the future if the layout changes // the minimum number of operands for MD_prof nodes with branch weights -constexpr unsigned MinBWOps = 3; +static constexpr unsigned MinBWOps = 3; // the minimum number of operands for MD_prof nodes with value profiles -constexpr unsigned MinVPOps = 5; +static constexpr unsigned MinVPOps = 5; // We may want to add support for other MD_prof types, so provide an abstraction // for checking the metadata type. -bool isTargetMD(const MDNode *ProfData, const char *Name, unsigned MinOps) { +static bool isTargetMD(const MDNode *ProfData, const char *Name, + unsigned MinOps) { // TODO: This routine may be simplified if MD_prof used an enum instead of a // string to differentiate the types of MD_prof nodes. if (!ProfData || !Name || MinOps < 2) @@ -101,14 +100,11 @@ static SmallVector<uint32_t> fitWeights(ArrayRef<uint64_t> Weights) { return Ret; } -} // namespace - -namespace llvm { -cl::opt<bool> ElideAllZeroBranchWeights("elide-all-zero-branch-weights", +static cl::opt<bool> ElideAllZeroBranchWeights("elide-all-zero-branch-weights", #if defined(LLVM_ENABLE_PROFCHECK) - cl::init(false) + cl::init(false) #else - cl::init(true) + cl::init(true) #endif ); const char *MDProfLabels::BranchWeights = "branch_weights"; @@ -118,21 +114,21 @@ const char *MDProfLabels::FunctionEntryCount = "function_entry_count"; const char *MDProfLabels::SyntheticFunctionEntryCount = "synthetic_function_entry_count"; const char *MDProfLabels::UnknownBranchWeightsMarker = "unknown"; -const char *LLVMLoopEstimatedTripCount = "llvm.loop.estimated_trip_count"; +const char *llvm::LLVMLoopEstimatedTripCount = "llvm.loop.estimated_trip_count"; -bool hasProfMD(const Instruction &I) { +bool llvm::hasProfMD(const Instruction &I) { return I.hasMetadata(LLVMContext::MD_prof); } -bool isBranchWeightMD(const MDNode *ProfileData) { +bool llvm::isBranchWeightMD(const MDNode *ProfileData) { return isTargetMD(ProfileData, MDProfLabels::BranchWeights, MinBWOps); } -bool isValueProfileMD(const MDNode *ProfileData) { +bool llvm::isValueProfileMD(const MDNode *ProfileData) { return isTargetMD(ProfileData, MDProfLabels::ValueProfile, MinVPOps); } -bool hasBranchWeightMD(const Instruction &I) { +bool llvm::hasBranchWeightMD(const Instruction &I) { auto *ProfileData = I.getMetadata(LLVMContext::MD_prof); return isBranchWeightMD(ProfileData); } @@ -147,16 +143,16 @@ static bool hasCountTypeMD(const Instruction &I) { return isa<CallBase>(I) && !isBranchWeightMD(ProfileData); } -bool hasValidBranchWeightMD(const Instruction &I) { +bool llvm::hasValidBranchWeightMD(const Instruction &I) { return getValidBranchWeightMDNode(I); } -bool hasBranchWeightOrigin(const Instruction &I) { +bool llvm::hasBranchWeightOrigin(const Instruction &I) { auto *ProfileData = I.getMetadata(LLVMContext::MD_prof); return hasBranchWeightOrigin(ProfileData); } -bool hasBranchWeightOrigin(const MDNode *ProfileData) { +bool llvm::hasBranchWeightOrigin(const MDNode *ProfileData) { if (!isBranchWeightMD(ProfileData)) return false; auto *ProfDataName = dyn_cast<MDString>(ProfileData->getOperand(1)); @@ -168,54 +164,54 @@ bool hasBranchWeightOrigin(const MDNode *ProfileData) { return ProfDataName != nullptr; } -unsigned getBranchWeightOffset(const MDNode *ProfileData) { +unsigned llvm::getBranchWeightOffset(const MDNode *ProfileData) { return hasBranchWeightOrigin(ProfileData) ? 2 : 1; } -unsigned getNumBranchWeights(const MDNode &ProfileData) { +unsigned llvm::getNumBranchWeights(const MDNode &ProfileData) { return ProfileData.getNumOperands() - getBranchWeightOffset(&ProfileData); } -MDNode *getBranchWeightMDNode(const Instruction &I) { +MDNode *llvm::getBranchWeightMDNode(const Instruction &I) { auto *ProfileData = I.getMetadata(LLVMContext::MD_prof); if (!isBranchWeightMD(ProfileData)) return nullptr; return ProfileData; } -MDNode *getValidBranchWeightMDNode(const Instruction &I) { +MDNode *llvm::getValidBranchWeightMDNode(const Instruction &I) { auto *ProfileData = getBranchWeightMDNode(I); if (ProfileData && getNumBranchWeights(*ProfileData) == I.getNumSuccessors()) return ProfileData; return nullptr; } -void extractFromBranchWeightMD32(const MDNode *ProfileData, - SmallVectorImpl<uint32_t> &Weights) { +void llvm::extractFromBranchWeightMD32(const MDNode *ProfileData, + SmallVectorImpl<uint32_t> &Weights) { extractFromBranchWeightMD(ProfileData, Weights); } -void extractFromBranchWeightMD64(const MDNode *ProfileData, - SmallVectorImpl<uint64_t> &Weights) { +void llvm::extractFromBranchWeightMD64(const MDNode *ProfileData, + SmallVectorImpl<uint64_t> &Weights) { extractFromBranchWeightMD(ProfileData, Weights); } -bool extractBranchWeights(const MDNode *ProfileData, - SmallVectorImpl<uint32_t> &Weights) { +bool llvm::extractBranchWeights(const MDNode *ProfileData, + SmallVectorImpl<uint32_t> &Weights) { if (!isBranchWeightMD(ProfileData)) return false; extractFromBranchWeightMD(ProfileData, Weights); return true; } -bool extractBranchWeights(const Instruction &I, - SmallVectorImpl<uint32_t> &Weights) { +bool llvm::extractBranchWeights(const Instruction &I, + SmallVectorImpl<uint32_t> &Weights) { auto *ProfileData = I.getMetadata(LLVMContext::MD_prof); return extractBranchWeights(ProfileData, Weights); } -bool extractBranchWeights(const Instruction &I, uint64_t &TrueVal, - uint64_t &FalseVal) { +bool llvm::extractBranchWeights(const Instruction &I, uint64_t &TrueVal, + uint64_t &FalseVal) { assert((I.getOpcode() == Instruction::Br || I.getOpcode() == Instruction::Select) && "Looking for branch weights on something besides branch, select, or " @@ -234,7 +230,8 @@ bool extractBranchWeights(const Instruction &I, uint64_t &TrueVal, return true; } -bool extractProfTotalWeight(const MDNode *ProfileData, uint64_t &TotalVal) { +bool llvm::extractProfTotalWeight(const MDNode *ProfileData, + uint64_t &TotalVal) { TotalVal = 0; if (!ProfileData) return false; @@ -262,11 +259,12 @@ bool extractProfTotalWeight(const MDNode *ProfileData, uint64_t &TotalVal) { return false; } -bool extractProfTotalWeight(const Instruction &I, uint64_t &TotalVal) { +bool llvm::extractProfTotalWeight(const Instruction &I, uint64_t &TotalVal) { return extractProfTotalWeight(I.getMetadata(LLVMContext::MD_prof), TotalVal); } -void setExplicitlyUnknownBranchWeights(Instruction &I, StringRef PassName) { +void llvm::setExplicitlyUnknownBranchWeights(Instruction &I, + StringRef PassName) { MDBuilder MDB(I.getContext()); I.setMetadata( LLVMContext::MD_prof, @@ -275,14 +273,16 @@ void setExplicitlyUnknownBranchWeights(Instruction &I, StringRef PassName) { MDB.createString(PassName)})); } -void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, Function &F, - StringRef PassName) { +void llvm::setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, + Function &F, + StringRef PassName) { if (std::optional<Function::ProfileCount> EC = F.getEntryCount(); EC && EC->getCount() > 0) setExplicitlyUnknownBranchWeights(I, PassName); } -void setExplicitlyUnknownFunctionEntryCount(Function &F, StringRef PassName) { +void llvm::setExplicitlyUnknownFunctionEntryCount(Function &F, + StringRef PassName) { MDBuilder MDB(F.getContext()); F.setMetadata( LLVMContext::MD_prof, @@ -291,21 +291,21 @@ void setExplicitlyUnknownFunctionEntryCount(Function &F, StringRef PassName) { MDB.createString(PassName)})); } -bool isExplicitlyUnknownProfileMetadata(const MDNode &MD) { +bool llvm::isExplicitlyUnknownProfileMetadata(const MDNode &MD) { if (MD.getNumOperands() != 2) return false; return MD.getOperand(0).equalsStr(MDProfLabels::UnknownBranchWeightsMarker); } -bool hasExplicitlyUnknownBranchWeights(const Instruction &I) { +bool llvm::hasExplicitlyUnknownBranchWeights(const Instruction &I) { auto *MD = I.getMetadata(LLVMContext::MD_prof); if (!MD) return false; return isExplicitlyUnknownProfileMetadata(*MD); } -void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights, - bool IsExpected, bool ElideAllZero) { +void llvm::setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights, + bool IsExpected, bool ElideAllZero) { if ((ElideAllZeroBranchWeights && ElideAllZero) && llvm::all_of(Weights, [](uint32_t V) { return V == 0; })) { I.setMetadata(LLVMContext::MD_prof, nullptr); @@ -317,13 +317,14 @@ void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights, I.setMetadata(LLVMContext::MD_prof, BranchWeights); } -void setFittedBranchWeights(Instruction &I, ArrayRef<uint64_t> Weights, - bool IsExpected, bool ElideAllZero) { +void llvm::setFittedBranchWeights(Instruction &I, ArrayRef<uint64_t> Weights, + bool IsExpected, bool ElideAllZero) { setBranchWeights(I, fitWeights(Weights), IsExpected, ElideAllZero); } -SmallVector<uint32_t> downscaleWeights(ArrayRef<uint64_t> Weights, - std::optional<uint64_t> KnownMaxCount) { +SmallVector<uint32_t> +llvm::downscaleWeights(ArrayRef<uint64_t> Weights, + std::optional<uint64_t> KnownMaxCount) { uint64_t MaxCount = KnownMaxCount.has_value() ? KnownMaxCount.value() : *llvm::max_element(Weights); assert(MaxCount > 0 && "Bad max count"); @@ -334,7 +335,7 @@ SmallVector<uint32_t> downscaleWeights(ArrayRef<uint64_t> Weights, return DownscaledWeights; } -void scaleProfData(Instruction &I, uint64_t S, uint64_t T) { +void llvm::scaleProfData(Instruction &I, uint64_t S, uint64_t T) { assert(T != 0 && "Caller should guarantee"); auto *ProfileData = I.getMetadata(LLVMContext::MD_prof); if (ProfileData == nullptr) @@ -387,5 +388,3 @@ void scaleProfData(Instruction &I, uint64_t S, uint64_t T) { } I.setMetadata(LLVMContext::MD_prof, MDNode::get(C, Vals)); } - -} // namespace llvm diff --git a/llvm/lib/IR/SafepointIRVerifier.cpp b/llvm/lib/IR/SafepointIRVerifier.cpp index e54894c..e35b5b3 100644 --- a/llvm/lib/IR/SafepointIRVerifier.cpp +++ b/llvm/lib/IR/SafepointIRVerifier.cpp @@ -196,7 +196,6 @@ protected: static void Verify(const Function &F, const DominatorTree &DT, const CFGDeadness &CD); -namespace llvm { PreservedAnalyses SafepointIRVerifierPass::run(Function &F, FunctionAnalysisManager &AM) { const auto &DT = AM.getResult<DominatorTreeAnalysis>(F); @@ -205,7 +204,6 @@ PreservedAnalyses SafepointIRVerifierPass::run(Function &F, Verify(F, DT, CD); return PreservedAnalyses::all(); } -} // namespace llvm namespace { diff --git a/llvm/lib/IR/VFABIDemangler.cpp b/llvm/lib/IR/VFABIDemangler.cpp index 2de05a5..4fcf436 100644 --- a/llvm/lib/IR/VFABIDemangler.cpp +++ b/llvm/lib/IR/VFABIDemangler.cpp @@ -20,15 +20,16 @@ using namespace llvm; #define DEBUG_TYPE "vfabi-demangler" -namespace { /// Utilities for the Vector Function ABI name parser. +namespace { /// Return types for the parser functions. enum class ParseRet { OK, // Found. None, // Not found. Error // Syntax error. }; +} // namespace /// Extracts the `<isa>` information from the mangled string, and /// sets the `ISA` accordingly. If successful, the <isa> token is removed @@ -372,7 +373,6 @@ getScalableECFromSignature(const FunctionType *Signature, const VFISAKind ISA, return std::nullopt; } -} // namespace // Format of the ABI name: // _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)] diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp index a347609..b775cbb 100644 --- a/llvm/lib/IR/Value.cpp +++ b/llvm/lib/IR/Value.cpp @@ -622,6 +622,7 @@ enum PointerStripKind { PSK_InBoundsConstantIndices, PSK_InBounds }; +} // end anonymous namespace template <PointerStripKind StripKind> static void NoopCallback(const Value *) {} @@ -696,7 +697,6 @@ static const Value *stripPointerCastsAndOffsets( return V; } -} // end anonymous namespace const Value *Value::stripPointerCasts() const { return stripPointerCastsAndOffsets<PSK_ZeroIndices>(this); diff --git a/llvm/lib/Object/BuildID.cpp b/llvm/lib/Object/BuildID.cpp index 89d6bc3..d1ee597 100644 --- a/llvm/lib/Object/BuildID.cpp +++ b/llvm/lib/Object/BuildID.cpp @@ -24,6 +24,24 @@ using namespace llvm::object; namespace { template <typename ELFT> BuildIDRef getBuildID(const ELFFile<ELFT> &Obj) { + auto findBuildID = [&Obj](const auto &ShdrOrPhdr, + uint64_t Alignment) -> std::optional<BuildIDRef> { + Error Err = Error::success(); + for (auto N : Obj.notes(ShdrOrPhdr, Err)) + if (N.getType() == ELF::NT_GNU_BUILD_ID && + N.getName() == ELF::ELF_NOTE_GNU) + return N.getDesc(Alignment); + consumeError(std::move(Err)); + return std::nullopt; + }; + + auto Sections = cantFail(Obj.sections()); + for (const auto &S : Sections) { + if (S.sh_type != ELF::SHT_NOTE) + continue; + if (std::optional<BuildIDRef> ShdrRes = findBuildID(S, S.sh_addralign)) + return ShdrRes.value(); + } auto PhdrsOrErr = Obj.program_headers(); if (!PhdrsOrErr) { consumeError(PhdrsOrErr.takeError()); @@ -32,12 +50,8 @@ template <typename ELFT> BuildIDRef getBuildID(const ELFFile<ELFT> &Obj) { for (const auto &P : *PhdrsOrErr) { if (P.p_type != ELF::PT_NOTE) continue; - Error Err = Error::success(); - for (auto N : Obj.notes(P, Err)) - if (N.getType() == ELF::NT_GNU_BUILD_ID && - N.getName() == ELF::ELF_NOTE_GNU) - return N.getDesc(P.p_align); - consumeError(std::move(Err)); + if (std::optional<BuildIDRef> PhdrRes = findBuildID(P, P.p_align)) + return PhdrRes.value(); } return {}; } diff --git a/llvm/lib/TableGen/Error.cpp b/llvm/lib/TableGen/Error.cpp index de0c4c9..3ba2c6c 100644 --- a/llvm/lib/TableGen/Error.cpp +++ b/llvm/lib/TableGen/Error.cpp @@ -19,10 +19,10 @@ #include "llvm/TableGen/Record.h" #include <cstdlib> -namespace llvm { +using namespace llvm; -SourceMgr SrcMgr; -unsigned ErrorsPrinted = 0; +SourceMgr llvm::SrcMgr; +unsigned llvm::ErrorsPrinted = 0; static void PrintMessage(ArrayRef<SMLoc> Locs, SourceMgr::DiagKind Kind, const Twine &Msg) { @@ -49,118 +49,118 @@ static void PrintMessage(ArrayRef<SMLoc> Locs, SourceMgr::DiagKind Kind, // Functions to print notes. -void PrintNote(const Twine &Msg) { - WithColor::note() << Msg << "\n"; -} +void llvm::PrintNote(const Twine &Msg) { WithColor::note() << Msg << "\n"; } -void PrintNote(function_ref<void(raw_ostream &OS)> PrintMsg) { +void llvm::PrintNote(function_ref<void(raw_ostream &OS)> PrintMsg) { PrintMsg(WithColor::note()); } -void PrintNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg) { +void llvm::PrintNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg) { PrintMessage(NoteLoc, SourceMgr::DK_Note, Msg); } // Functions to print fatal notes. -void PrintFatalNote(const Twine &Msg) { +void llvm::PrintFatalNote(const Twine &Msg) { PrintNote(Msg); fatal_exit(); } -void PrintFatalNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg) { +void llvm::PrintFatalNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg) { PrintNote(NoteLoc, Msg); fatal_exit(); } // This method takes a Record and uses the source location // stored in it. -void PrintFatalNote(const Record *Rec, const Twine &Msg) { +void llvm::PrintFatalNote(const Record *Rec, const Twine &Msg) { PrintNote(Rec->getLoc(), Msg); fatal_exit(); } // This method takes a RecordVal and uses the source location // stored in it. -void PrintFatalNote(const RecordVal *RecVal, const Twine &Msg) { +void llvm::PrintFatalNote(const RecordVal *RecVal, const Twine &Msg) { PrintNote(RecVal->getLoc(), Msg); fatal_exit(); } // Functions to print warnings. -void PrintWarning(const Twine &Msg) { WithColor::warning() << Msg << "\n"; } +void llvm::PrintWarning(const Twine &Msg) { + WithColor::warning() << Msg << "\n"; +} -void PrintWarning(ArrayRef<SMLoc> WarningLoc, const Twine &Msg) { +void llvm::PrintWarning(ArrayRef<SMLoc> WarningLoc, const Twine &Msg) { PrintMessage(WarningLoc, SourceMgr::DK_Warning, Msg); } -void PrintWarning(const char *Loc, const Twine &Msg) { +void llvm::PrintWarning(const char *Loc, const Twine &Msg) { SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), SourceMgr::DK_Warning, Msg); } // Functions to print errors. -void PrintError(const Twine &Msg) { WithColor::error() << Msg << "\n"; } +void llvm::PrintError(const Twine &Msg) { WithColor::error() << Msg << "\n"; } -void PrintError(function_ref<void(raw_ostream &OS)> PrintMsg) { +void llvm::PrintError(function_ref<void(raw_ostream &OS)> PrintMsg) { PrintMsg(WithColor::error()); } -void PrintError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) { +void llvm::PrintError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) { PrintMessage(ErrorLoc, SourceMgr::DK_Error, Msg); } -void PrintError(const char *Loc, const Twine &Msg) { +void llvm::PrintError(const char *Loc, const Twine &Msg) { SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), SourceMgr::DK_Error, Msg); } // This method takes a Record and uses the source location // stored in it. -void PrintError(const Record *Rec, const Twine &Msg) { +void llvm::PrintError(const Record *Rec, const Twine &Msg) { PrintMessage(Rec->getLoc(), SourceMgr::DK_Error, Msg); } // This method takes a RecordVal and uses the source location // stored in it. -void PrintError(const RecordVal *RecVal, const Twine &Msg) { +void llvm::PrintError(const RecordVal *RecVal, const Twine &Msg) { PrintMessage(RecVal->getLoc(), SourceMgr::DK_Error, Msg); } // Functions to print fatal errors. -void PrintFatalError(const Twine &Msg) { +void llvm::PrintFatalError(const Twine &Msg) { PrintError(Msg); fatal_exit(); } -void PrintFatalError(function_ref<void(raw_ostream &OS)> PrintMsg) { +void llvm::PrintFatalError(function_ref<void(raw_ostream &OS)> PrintMsg) { PrintError(PrintMsg); fatal_exit(); } -void PrintFatalError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) { +void llvm::PrintFatalError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) { PrintError(ErrorLoc, Msg); fatal_exit(); } // This method takes a Record and uses the source location // stored in it. -void PrintFatalError(const Record *Rec, const Twine &Msg) { +void llvm::PrintFatalError(const Record *Rec, const Twine &Msg) { PrintError(Rec->getLoc(), Msg); fatal_exit(); } // This method takes a RecordVal and uses the source location // stored in it. -void PrintFatalError(const RecordVal *RecVal, const Twine &Msg) { +void llvm::PrintFatalError(const RecordVal *RecVal, const Twine &Msg) { PrintError(RecVal->getLoc(), Msg); fatal_exit(); } // Check an assertion: Obtain the condition value and be sure it is true. // If not, print a nonfatal error along with the message. -bool CheckAssert(SMLoc Loc, const Init *Condition, const Init *Message) { +bool llvm::CheckAssert(SMLoc Loc, const Init *Condition, const Init *Message) { auto *CondValue = dyn_cast_or_null<IntInit>(Condition->convertInitializerTo( IntRecTy::get(Condition->getRecordKeeper()))); if (!CondValue) { @@ -178,11 +178,9 @@ bool CheckAssert(SMLoc Loc, const Init *Condition, const Init *Message) { } // Dump a message to stderr. -void dumpMessage(SMLoc Loc, const Init *Message) { +void llvm::dumpMessage(SMLoc Loc, const Init *Message) { if (auto *MessageInit = dyn_cast<StringInit>(Message)) PrintNote(Loc, MessageInit->getValue()); else PrintError(Loc, "dump value is not of type string"); } - -} // end namespace llvm diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp index f545706..42043f7 100644 --- a/llvm/lib/TableGen/Main.cpp +++ b/llvm/lib/TableGen/Main.cpp @@ -64,14 +64,12 @@ WriteIfChanged("write-if-changed", cl::desc("Only write output if it changed")); static cl::opt<bool> TimePhases("time-phases", cl::desc("Time phases of parser and backend")); -namespace llvm { -cl::opt<bool> EmitLongStrLiterals( +cl::opt<bool> llvm::EmitLongStrLiterals( "long-string-literals", cl::desc("when emitting large string tables, prefer string literals over " "comma-separated char literals. This can be a readability and " "compile-time performance win, but upsets some compilers"), cl::Hidden, cl::init(true)); -} // end namespace llvm static cl::opt<bool> NoWarnOnUnusedTemplateArgs( "no-warn-on-unused-template-args", diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp index 051a896..2ea3a24 100644 --- a/llvm/lib/TableGen/Record.cpp +++ b/llvm/lib/TableGen/Record.cpp @@ -46,8 +46,7 @@ using namespace llvm; // Context //===----------------------------------------------------------------------===// -namespace llvm { -namespace detail { +namespace llvm::detail { /// This class represents the internal implementation of the RecordKeeper. /// It contains all of the contextual static state of the Record classes. It is /// kept out-of-line to simplify dependencies, and also make it easier for @@ -100,8 +99,7 @@ struct RecordKeeperImpl { void dumpAllocationStats(raw_ostream &OS) const; }; -} // namespace detail -} // namespace llvm +} // namespace llvm::detail void detail::RecordKeeperImpl::dumpAllocationStats(raw_ostream &OS) const { // Dump memory allocation related stats. diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp index f928ded..3d31d8e 100644 --- a/llvm/lib/TableGen/TGParser.cpp +++ b/llvm/lib/TableGen/TGParser.cpp @@ -31,8 +31,6 @@ using namespace llvm; // Support Code for the Semantic Actions. //===----------------------------------------------------------------------===// -namespace llvm { - RecordsEntry::RecordsEntry(std::unique_ptr<Record> Rec) : Rec(std::move(Rec)) {} RecordsEntry::RecordsEntry(std::unique_ptr<ForeachLoop> Loop) : Loop(std::move(Loop)) {} @@ -41,6 +39,7 @@ RecordsEntry::RecordsEntry(std::unique_ptr<Record::AssertionInfo> Assertion) RecordsEntry::RecordsEntry(std::unique_ptr<Record::DumpInfo> Dump) : Dump(std::move(Dump)) {} +namespace llvm { struct SubClassReference { SMRange RefRange; const Record *Rec = nullptr; @@ -61,6 +60,7 @@ struct SubMultiClassReference { bool isInvalid() const { return MC == nullptr; } void dump() const; }; +} // end namespace llvm #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void SubMultiClassReference::dump() const { @@ -74,8 +74,6 @@ LLVM_DUMP_METHOD void SubMultiClassReference::dump() const { } #endif -} // end namespace llvm - static bool checkBitsConcrete(Record &R, const RecordVal &RV) { const auto *BV = cast<BitsInit>(RV.getValue()); for (unsigned i = 0, e = BV->getNumBits(); i != e; ++i) { diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 8d6eb91..4357264d 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -282,7 +282,7 @@ static cl::opt<bool> OrderFrameObjects("aarch64-order-frame-objects", static cl::opt<bool> SplitSVEObjects("aarch64-split-sve-objects", cl::desc("Split allocation of ZPR & PPR objects"), - cl::init(false), cl::Hidden); + cl::init(true), cl::Hidden); cl::opt<bool> EnableHomogeneousPrologEpilog( "homogeneous-prolog-epilog", cl::Hidden, diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 36c9cb6..bc6b931 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1010,6 +1010,36 @@ let Predicates = [HasSVE_or_SME] in { defm SEL_ZPZZ : sve_int_sel_vvv<"sel", vselect>; defm SPLICE_ZPZ : sve_int_perm_splice<"splice", AArch64splice>; + + // mul x (splat -1) -> neg x + def : Pat<(nxv16i8 (AArch64mul_m1 nxv16i1:$Op1, nxv16i8:$Op2, (nxv16i8 (splat_vector (i32 -1))))), + (NEG_ZPmZ_B $Op2, $Op1, $Op2)>; + def : Pat<(nxv8i16 (AArch64mul_m1 nxv8i1:$Op1, nxv8i16:$Op2, (nxv8i16 (splat_vector (i32 -1))))), + (NEG_ZPmZ_H $Op2, $Op1, $Op2)>; + def : Pat<(nxv4i32 (AArch64mul_m1 nxv4i1:$Op1, nxv4i32:$Op2, (nxv4i32 (splat_vector (i32 -1))))), + (NEG_ZPmZ_S $Op2, $Op1, $Op2)>; + def : Pat<(nxv2i64 (AArch64mul_m1 nxv2i1:$Op1, nxv2i64:$Op2, (nxv2i64 (splat_vector (i64 -1))))), + (NEG_ZPmZ_D $Op2, $Op1, $Op2)>; + + let AddedComplexity = 5 in { + def : Pat<(nxv16i8 (AArch64mul_p nxv16i1:$Op1, nxv16i8:$Op2, (nxv16i8 (splat_vector (i32 -1))))), + (NEG_ZPmZ_B_UNDEF $Op2, $Op1, $Op2)>; + def : Pat<(nxv8i16 (AArch64mul_p nxv8i1:$Op1, nxv8i16:$Op2, (nxv8i16 (splat_vector (i32 -1))))), + (NEG_ZPmZ_H_UNDEF $Op2, $Op1, $Op2)>; + def : Pat<(nxv4i32 (AArch64mul_p nxv4i1:$Op1, nxv4i32:$Op2, (nxv4i32 (splat_vector (i32 -1))))), + (NEG_ZPmZ_S_UNDEF $Op2, $Op1, $Op2)>; + def : Pat<(nxv2i64 (AArch64mul_p nxv2i1:$Op1, nxv2i64:$Op2, (nxv2i64 (splat_vector (i64 -1))))), + (NEG_ZPmZ_D_UNDEF $Op2, $Op1, $Op2)>; + } + + def : Pat<(nxv16i8 (AArch64mul_m1 nxv16i1:$Op1, (nxv16i8 (splat_vector (i32 -1))), nxv16i8:$Op2)), + (NEG_ZPmZ_B (DUP_ZI_B -1, 0), $Op1, $Op2)>; + def : Pat<(nxv8i16 (AArch64mul_m1 nxv8i1:$Op1, (nxv8i16 (splat_vector (i32 -1))), nxv8i16:$Op2)), + (NEG_ZPmZ_H (DUP_ZI_H -1, 0), $Op1, $Op2)>; + def : Pat<(nxv4i32 (AArch64mul_m1 nxv4i1:$Op1, (nxv4i32 (splat_vector (i32 -1))), nxv4i32:$Op2)), + (NEG_ZPmZ_S (DUP_ZI_S -1, 0), $Op1, $Op2)>; + def : Pat<(nxv2i64 (AArch64mul_m1 nxv2i1:$Op1, (nxv2i64 (splat_vector (i64 -1))), nxv2i64:$Op2)), + (NEG_ZPmZ_D (DUP_ZI_D -1, 0), $Op1, $Op2)>; } // End HasSVE_or_SME // COMPACT - word and doubleword diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 8c4b4f6..50a8754 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -5632,75 +5632,94 @@ InstructionCost AArch64TTIImpl::getPartialReductionCost( TTI::PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp, TTI::TargetCostKind CostKind) const { InstructionCost Invalid = InstructionCost::getInvalid(); - InstructionCost Cost(TTI::TCC_Basic); if (CostKind != TTI::TCK_RecipThroughput) return Invalid; - // Sub opcodes currently only occur in chained cases. - // Independent partial reduction subtractions are still costed as an add + if (VF.isFixed() && !ST->isSVEorStreamingSVEAvailable() && + (!ST->isNeonAvailable() || !ST->hasDotProd())) + return Invalid; + if ((Opcode != Instruction::Add && Opcode != Instruction::Sub) || OpAExtend == TTI::PR_None) return Invalid; + assert((BinOp || (OpBExtend == TTI::PR_None && !InputTypeB)) && + (!BinOp || (OpBExtend != TTI::PR_None && InputTypeB)) && + "Unexpected values for OpBExtend or InputTypeB"); + // We only support multiply binary operations for now, and for muls we // require the types being extended to be the same. - // NOTE: For muls AArch64 supports lowering mixed extensions to a usdot but - // only if the i8mm or sve/streaming features are available. - if (BinOp && (*BinOp != Instruction::Mul || InputTypeA != InputTypeB || - OpBExtend == TTI::PR_None || - (OpAExtend != OpBExtend && !ST->hasMatMulInt8() && - !ST->isSVEorStreamingSVEAvailable()))) + if (BinOp && (*BinOp != Instruction::Mul || InputTypeA != InputTypeB)) return Invalid; - assert((BinOp || (OpBExtend == TTI::PR_None && !InputTypeB)) && - "Unexpected values for OpBExtend or InputTypeB"); - EVT InputEVT = EVT::getEVT(InputTypeA); - EVT AccumEVT = EVT::getEVT(AccumType); + bool IsUSDot = OpBExtend != TTI::PR_None && OpAExtend != OpBExtend; + if (IsUSDot && !ST->hasMatMulInt8()) + return Invalid; + + unsigned Ratio = + AccumType->getScalarSizeInBits() / InputTypeA->getScalarSizeInBits(); + if (VF.getKnownMinValue() <= Ratio) + return Invalid; + + VectorType *InputVectorType = VectorType::get(InputTypeA, VF); + VectorType *AccumVectorType = + VectorType::get(AccumType, VF.divideCoefficientBy(Ratio)); + // We don't yet support all kinds of legalization. + auto TA = TLI->getTypeAction(AccumVectorType->getContext(), + EVT::getEVT(AccumVectorType)); + switch (TA) { + default: + return Invalid; + case TargetLowering::TypeLegal: + case TargetLowering::TypePromoteInteger: + case TargetLowering::TypeSplitVector: + break; + } + + // Check what kind of type-legalisation happens. + std::pair<InstructionCost, MVT> AccumLT = + getTypeLegalizationCost(AccumVectorType); + std::pair<InstructionCost, MVT> InputLT = + getTypeLegalizationCost(InputVectorType); - unsigned VFMinValue = VF.getKnownMinValue(); + InstructionCost Cost = InputLT.first * TTI::TCC_Basic; - if (VF.isScalable()) { - if (!ST->isSVEorStreamingSVEAvailable()) - return Invalid; + // Prefer using full types by costing half-full input types as more expensive. + if (TypeSize::isKnownLT(InputVectorType->getPrimitiveSizeInBits(), + TypeSize::getScalable(128))) + // FIXME: This can be removed after the cost of the extends are folded into + // the dot-product expression in VPlan, after landing: + // https://github.com/llvm/llvm-project/pull/147302 + Cost *= 2; - // Don't accept a partial reduction if the scaled accumulator is vscale x 1, - // since we can't lower that type. - unsigned Scale = - AccumEVT.getScalarSizeInBits() / InputEVT.getScalarSizeInBits(); - if (VFMinValue == Scale) - return Invalid; + if (ST->isSVEorStreamingSVEAvailable() && !IsUSDot) { + // i16 -> i64 is natively supported for udot/sdot + if (AccumLT.second.getScalarType() == MVT::i64 && + InputLT.second.getScalarType() == MVT::i16) + return Cost; + // i8 -> i64 is supported with an extra level of extends + if (AccumLT.second.getScalarType() == MVT::i64 && + InputLT.second.getScalarType() == MVT::i8) + // FIXME: This cost should probably be a little higher, e.g. Cost + 2 + // because it requires two extra extends on the inputs. But if we'd change + // that now, a regular reduction would be cheaper because the costs of + // the extends in the IR are still counted. This can be fixed + // after https://github.com/llvm/llvm-project/pull/147302 has landed. + return Cost; } - if (VF.isFixed() && - (!ST->isNeonAvailable() || !ST->hasDotProd() || AccumEVT == MVT::i64)) - return Invalid; - if (InputEVT == MVT::i8) { - switch (VFMinValue) { - default: - return Invalid; - case 8: - if (AccumEVT == MVT::i32) - Cost *= 2; - else if (AccumEVT != MVT::i64) - return Invalid; - break; - case 16: - if (AccumEVT == MVT::i64) - Cost *= 2; - else if (AccumEVT != MVT::i32) - return Invalid; - break; - } - } else if (InputEVT == MVT::i16) { - // FIXME: Allow i32 accumulator but increase cost, as we would extend - // it to i64. - if (VFMinValue != 8 || AccumEVT != MVT::i64) - return Invalid; - } else - return Invalid; + // i8 -> i32 is natively supported for udot/sdot/usdot, both for NEON and SVE. + if (ST->isSVEorStreamingSVEAvailable() || + (AccumLT.second.isFixedLengthVector() && ST->isNeonAvailable() && + ST->hasDotProd())) { + if (AccumLT.second.getScalarType() == MVT::i32 && + InputLT.second.getScalarType() == MVT::i8) + return Cost; + } - return Cost; + // Add additional cost for the extends that would need to be inserted. + return Cost + 4; } InstructionCost diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 7003a40..9446144 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -2126,6 +2126,8 @@ def FeatureISAVersion12_50 : FeatureSet< FeatureLdsBarrierArriveAtomic, FeatureSetPrioIncWgInst, Feature45BitNumRecordsBufferResource, + FeatureSupportsXNACK, + FeatureXNACK, ]>; def FeatureISAVersion12_51 : FeatureSet< diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 2ba3156..9dd64e0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -131,10 +131,8 @@ static bool isDSAddress(const Constant *C) { return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS; } -/// Returns true if the function requires the implicit argument be passed -/// regardless of the function contents. -static bool funcRequiresHostcallPtr(const Function &F) { - // Sanitizers require the hostcall buffer passed in the implicit arguments. +/// Returns true if sanitizer attributes are present on a function. +static bool hasSanitizerAttributes(const Function &F) { return F.hasFnAttribute(Attribute::SanitizeAddress) || F.hasFnAttribute(Attribute::SanitizeThread) || F.hasFnAttribute(Attribute::SanitizeMemory) || @@ -469,15 +467,21 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { // If the function requires the implicit arg pointer due to sanitizers, // assume it's needed even if explicitly marked as not requiring it. - const bool NeedsHostcall = funcRequiresHostcallPtr(*F); - if (NeedsHostcall) { + // Flat scratch initialization is needed because `asan_malloc_impl` + // calls introduced later in pipeline will have flat scratch accesses. + // FIXME: FLAT_SCRATCH_INIT will not be required here if device-libs + // implementation for `asan_malloc_impl` is updated. + const bool HasSanitizerAttrs = hasSanitizerAttributes(*F); + if (HasSanitizerAttrs) { removeAssumedBits(IMPLICIT_ARG_PTR); removeAssumedBits(HOSTCALL_PTR); + removeAssumedBits(FLAT_SCRATCH_INIT); } for (auto Attr : ImplicitAttrs) { - if (NeedsHostcall && - (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR)) + if (HasSanitizerAttrs && + (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR || + Attr.first == FLAT_SCRATCH_INIT)) continue; if (F->hasFnAttribute(Attr.second)) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp index 6efa78e..a4ef524 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -608,8 +608,6 @@ public: ? LDSToKernelsThatNeedToAccessItIndirectly[HybridModuleRoot] : EmptySet; - const size_t HybridModuleRootKernelsSize = HybridModuleRootKernels.size(); - for (auto &K : LDSToKernelsThatNeedToAccessItIndirectly) { // Each iteration of this loop assigns exactly one global variable to // exactly one of the implementation strategies. @@ -649,8 +647,7 @@ public: ModuleScopeVariables.insert(GV); } else if (K.second.size() == 1) { KernelAccessVariables.insert(GV); - } else if (K.second.size() == HybridModuleRootKernelsSize && - set_is_subset(K.second, HybridModuleRootKernels)) { + } else if (K.second == HybridModuleRootKernels) { ModuleScopeVariables.insert(GV); } else { TableLookupVariables.insert(GV); diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 2d5ae29..2120bf8 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -2303,7 +2303,10 @@ Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1( KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n'; if (!hasArchitectedFlatScratch()) KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n'; - KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n'; + bool ReservedXnackMask = STI.hasFeature(AMDGPU::FeatureXNACK); + assert(!ReservedXnackMask || STI.hasFeature(AMDGPU::FeatureSupportsXNACK)); + KdStream << Indent << ".amdhsa_reserve_xnack_mask " << ReservedXnackMask + << '\n'; KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n"; CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY); diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index fed3778..82789bc 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -722,7 +722,8 @@ bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const { return false; } - if (!MRI->constrainRegClass(New->getReg(), ConstrainRC)) { + if (New->getReg().isVirtual() && + !MRI->constrainRegClass(New->getReg(), ConstrainRC)) { LLVM_DEBUG(dbgs() << "Cannot constrain " << printReg(New->getReg(), TRI) << TRI->getRegClassName(ConstrainRC) << '\n'); return false; diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index e4b3528..0189e7b 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -306,7 +306,8 @@ class PrologEpilogSGPRSpillBuilder { buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL, TmpVGPR, FI, FrameReg, DwordOff); - MRI.constrainRegClass(SubReg, &AMDGPU::SReg_32_XM0RegClass); + assert(SubReg.isPhysical()); + BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg) .addReg(TmpVGPR, RegState::Kill); DwordOff += 4; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index f7265c5..e233457 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -18860,31 +18860,6 @@ SITargetLowering::getTargetMMOFlags(const Instruction &I) const { return Flags; } -bool SITargetLowering::checkForPhysRegDependency( - SDNode *Def, SDNode *User, unsigned Op, const TargetRegisterInfo *TRI, - const TargetInstrInfo *TII, MCRegister &PhysReg, int &Cost) const { - if (User->getOpcode() != ISD::CopyToReg) - return false; - if (!Def->isMachineOpcode()) - return false; - MachineSDNode *MDef = dyn_cast<MachineSDNode>(Def); - if (!MDef) - return false; - - unsigned ResNo = User->getOperand(Op).getResNo(); - if (User->getOperand(Op)->getValueType(ResNo) != MVT::i1) - return false; - const MCInstrDesc &II = TII->get(MDef->getMachineOpcode()); - if (II.isCompare() && II.hasImplicitDefOfPhysReg(AMDGPU::SCC)) { - PhysReg = AMDGPU::SCC; - const TargetRegisterClass *RC = - TRI->getMinimalPhysRegClass(PhysReg, Def->getSimpleValueType(ResNo)); - Cost = RC->getCopyCost(); - return true; - } - return false; -} - void SITargetLowering::emitExpandAtomicAddrSpacePredicate( Instruction *AI) const { // Given: atomicrmw fadd ptr %addr, float %val ordering diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index a474dab..74e58f4 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -561,11 +561,6 @@ public: bool denormalsEnabledForType(const SelectionDAG &DAG, EVT VT) const; bool denormalsEnabledForType(LLT Ty, const MachineFunction &MF) const; - bool checkForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, - const TargetRegisterInfo *TRI, - const TargetInstrInfo *TII, - MCRegister &PhysReg, int &Cost) const override; - bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool SNaN = false, unsigned Depth = 0) const override; diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 76bfce8..5e27b37 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1013,6 +1013,15 @@ void WaitcntBrackets::updateByEvent(WaitEventType E, MachineInstr &Inst) { } } } else if (T == X_CNT) { + WaitEventType OtherEvent = E == SMEM_GROUP ? VMEM_GROUP : SMEM_GROUP; + if (PendingEvents & (1 << OtherEvent)) { + // Hardware inserts an implicit xcnt between interleaved + // SMEM and VMEM operations. So there will never be + // outstanding address translations for both SMEM and + // VMEM at the same time. + setScoreLB(T, CurrScore - 1); + PendingEvents &= ~(1 << OtherEvent); + } for (const MachineOperand &Op : Inst.all_uses()) setScoreByOperand(&Inst, Op, T, CurrScore); } else /* LGKM_CNT || EXP_CNT || VS_CNT || NUM_INST_CNTS */ { @@ -2220,6 +2229,8 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst, // Now look at the instruction opcode. If it is a memory access // instruction, update the upper-bound of the appropriate counter's // bracket and the destination operand scores. + // For architectures with X_CNT, mark the source address operands + // with the appropriate counter values. // TODO: Use the (TSFlags & SIInstrFlags::DS_CNT) property everywhere. bool IsVMEMAccess = false; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 56435a5..cda8069 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2112,8 +2112,6 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case AMDGPU::SI_RESTORE_S32_FROM_VGPR: MI.setDesc(get(AMDGPU::V_READLANE_B32)); - MI.getMF()->getRegInfo().constrainRegClass(MI.getOperand(0).getReg(), - &AMDGPU::SReg_32_XM0RegClass); break; case AMDGPU::AV_MOV_B32_IMM_PSEUDO: { Register Dst = MI.getOperand(0).getReg(); @@ -8117,21 +8115,14 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist, // hope for the best. if (Inst.isCopy() && DstReg.isPhysical() && RI.isVGPR(MRI, Inst.getOperand(1).getReg())) { - // TODO: Only works for 32 bit registers. - if (MRI.constrainRegClass(DstReg, &AMDGPU::SReg_32_XM0RegClass)) { - BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(), - get(AMDGPU::V_READFIRSTLANE_B32), DstReg) - .add(Inst.getOperand(1)); - } else { - Register NewDst = - MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); - BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(), - get(AMDGPU::V_READFIRSTLANE_B32), NewDst) - .add(Inst.getOperand(1)); - BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(), get(AMDGPU::COPY), - DstReg) - .addReg(NewDst); - } + Register NewDst = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(), + get(AMDGPU::V_READFIRSTLANE_B32), NewDst) + .add(Inst.getOperand(1)); + BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(), get(AMDGPU::COPY), + DstReg) + .addReg(NewDst); + Inst.eraseFromParent(); return; } diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 205237f..3c2dd42 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -2222,8 +2222,6 @@ bool SIRegisterInfo::spillEmergencySGPR(MachineBasicBlock::iterator MI, // Don't need to write VGPR out. } - MachineRegisterInfo &MRI = MI->getMF()->getRegInfo(); - // Restore clobbered registers in the specified restore block. MI = RestoreMBB.end(); SB.setMI(&RestoreMBB, MI); @@ -2238,7 +2236,8 @@ bool SIRegisterInfo::spillEmergencySGPR(MachineBasicBlock::iterator MI, SB.NumSubRegs == 1 ? SB.SuperReg : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); - MRI.constrainRegClass(SubReg, &AMDGPU::SReg_32_XM0RegClass); + + assert(SubReg.isPhysical()); bool LastSubReg = (i + 1 == e); auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32), SubReg) @@ -3059,8 +3058,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, if (IsSALU && LiveSCC) { Register NewDest; if (IsCopy) { - MF->getRegInfo().constrainRegClass(ResultReg, - &AMDGPU::SReg_32_XM0RegClass); + assert(ResultReg.isPhysical()); NewDest = ResultReg; } else { NewDest = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass, @@ -3190,8 +3188,6 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, Register NewDest; if (IsCopy) { - MF->getRegInfo().constrainRegClass(ResultReg, - &AMDGPU::SReg_32_XM0RegClass); NewDest = ResultReg; } else { NewDest = RS->scavengeRegisterBackwards( diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 8f1dd62..5630580 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -1163,6 +1163,22 @@ def VS_64_Lo256 : SIRegisterClass<"AMDGPU", VReg_64.RegTypes, 32, let HasSGPR = 1; let Size = 64; } + +def VS_128 : SIRegisterClass<"AMDGPU", VReg_128.RegTypes, 32, + (add VReg_128, SReg_128)> { + let isAllocatable = 0; + let HasVGPR = 1; + let HasSGPR = 1; + let Size = 128; +} + +def VS_128_Align2 : SIRegisterClass<"AMDGPU", VReg_128.RegTypes, 32, + (add VReg_128_Align2, SReg_128)> { + let isAllocatable = 0; + let HasVGPR = 1; + let HasSGPR = 1; + let Size = 128; +} } // End GeneratePressureSet = 0 // Define a register tuple class, along with one requiring an even diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index fa130a1..26ff54c 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -775,6 +775,16 @@ class VectorType; bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const override; + /// Return true if it is profitable to fold a pair of shifts into a mask. + bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override { + EVT VT = Y.getValueType(); + + if (VT.isVector()) + return false; + + return VT.getScalarSizeInBits() <= 32; + } + bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X, SDValue Y) const override; diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index d0dfa47..a94e131 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -359,6 +359,8 @@ HexagonTargetLowering::initializeHVXLowering() { setCondCodeAction(ISD::SETULE, MVT::v64f16, Expand); setCondCodeAction(ISD::SETUGE, MVT::v64f16, Expand); setCondCodeAction(ISD::SETULT, MVT::v64f16, Expand); + setCondCodeAction(ISD::SETUO, MVT::v64f16, Expand); + setCondCodeAction(ISD::SETO, MVT::v64f16, Expand); setCondCodeAction(ISD::SETNE, MVT::v32f32, Expand); setCondCodeAction(ISD::SETLE, MVT::v32f32, Expand); @@ -372,6 +374,8 @@ HexagonTargetLowering::initializeHVXLowering() { setCondCodeAction(ISD::SETULE, MVT::v32f32, Expand); setCondCodeAction(ISD::SETUGE, MVT::v32f32, Expand); setCondCodeAction(ISD::SETULT, MVT::v32f32, Expand); + setCondCodeAction(ISD::SETUO, MVT::v32f32, Expand); + setCondCodeAction(ISD::SETO, MVT::v32f32, Expand); // Boolean vectors. @@ -449,6 +453,7 @@ HexagonTargetLowering::initializeHVXLowering() { // Include cases which are not hander earlier setOperationAction(ISD::UINT_TO_FP, MVT::v32i1, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v64i1, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::v32i1, Custom); setTargetDAGCombine({ISD::CONCAT_VECTORS, ISD::TRUNCATE, ISD::VSELECT}); } @@ -2337,7 +2342,7 @@ HexagonTargetLowering::LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const { return ExpandHvxFpToInt(Op, DAG); } -// For vector type v32i1 uint_to_fp to v32f32: +// For vector type v32i1 uint_to_fp/sint_to_fp to v32f32: // R1 = #1, R2 holds the v32i1 param // V1 = vsplat(R1) // V2 = vsplat(R2) @@ -2464,7 +2469,7 @@ HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const { MVT IntTy = ty(Op.getOperand(0)).getVectorElementType(); MVT FpTy = ResTy.getVectorElementType(); - if (Op.getOpcode() == ISD::UINT_TO_FP) { + if (Op.getOpcode() == ISD::UINT_TO_FP || Op.getOpcode() == ISD::SINT_TO_FP) { if (ResTy == MVT::v32f32 && ty(Op.getOperand(0)) == MVT::v32i1) return LowerHvxPred32ToFp(Op, DAG); if (ResTy == MVT::v64f16 && ty(Op.getOperand(0)) == MVT::v64i1) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 4cfbfca..7ddf996 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -2860,8 +2860,7 @@ static SDValue fillSubVectorFromBuildVector(BuildVectorSDNode *Node, EVT ResTy, unsigned first) { unsigned NumElts = ResTy.getVectorNumElements(); - assert(first >= 0 && - first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements()); + assert(first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements()); SmallVector<SDValue, 16> Ops(Node->op_begin() + first, Node->op_begin() + first + NumElts); diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index 395d2c4..662d3f6 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -629,7 +629,7 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FTAN, G_FPOW, G_FLOG, G_FLOG2, G_FLOG10, G_FEXP, G_FEXP2, G_FEXP10, G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH, G_FSINH, - G_FTANH}) + G_FTANH, G_FMODF}) .libcallFor({s32, s64}) .libcallFor(ST.is64Bit(), {s128}); getActionDefinitionsBuilder({G_FPOWI, G_FLDEXP}) diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 7d4535a..b37b740 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -1560,7 +1560,7 @@ static MCRegister getRVVBaseRegister(const RISCVRegisterInfo &TRI, MCRegister BaseReg = TRI.getSubReg(Reg, RISCV::sub_vrm1_0); // If it's not a grouped vector register, it doesn't have subregister, so // the base register is just itself. - if (BaseReg == RISCV::NoRegister) + if (!BaseReg.isValid()) BaseReg = Reg; return BaseReg; } diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td index af1ceb6..cf6f83a 100644 --- a/llvm/lib/Target/RISCV/RISCVGISel.td +++ b/llvm/lib/Target/RISCV/RISCVGISel.td @@ -110,16 +110,16 @@ def : StPat<truncstorei8, SB, GPR, i16>; let Predicates = [HasAtomicLdSt] in { // Prefer unsigned due to no c.lb in Zcb. - def : LdPat<atomic_load_aext_8, LBU, i16>; - def : LdPat<atomic_load_nonext_16, LH, i16>; + def : LdPat<relaxed_load<atomic_load_aext_8>, LBU, i16>; + def : LdPat<relaxed_load<atomic_load_nonext_16>, LH, i16>; - def : StPat<atomic_store_8, SB, GPR, i16>; - def : StPat<atomic_store_16, SH, GPR, i16>; + def : StPat<relaxed_store<atomic_store_8>, SB, GPR, i16>; + def : StPat<relaxed_store<atomic_store_16>, SH, GPR, i16>; } let Predicates = [HasAtomicLdSt, IsRV64] in { // Load pattern is in RISCVInstrInfoA.td and shared with RV32. - def : StPat<atomic_store_32, SW, GPR, i32>; + def : StPat<relaxed_store<atomic_store_32>, SW, GPR, i32>; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 50649cf..dcce2d2 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -533,7 +533,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI, ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP, ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2, - ISD::FLOG10, ISD::FLDEXP, ISD::FFREXP}, + ISD::FLOG10, ISD::FLDEXP, ISD::FFREXP, ISD::FMODF}, MVT::f16, Promote); // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 6a6ead2..cf8d120 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -128,7 +128,7 @@ static bool hasUndefinedPassthru(const MachineInstr &MI) { // All undefined passthrus should be $noreg: see // RISCVDAGToDAGISel::doPeepholeNoRegPassThru const MachineOperand &UseMO = MI.getOperand(UseOpIdx); - return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef(); + return !UseMO.getReg().isValid() || UseMO.isUndef(); } /// Return true if \p MI is a copy that will be lowered to one or more vmvNr.vs. @@ -1454,7 +1454,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { Register Reg = VLOp.getReg(); // Erase the AVL operand from the instruction. - VLOp.setReg(RISCV::NoRegister); + VLOp.setReg(Register()); VLOp.setIsKill(false); if (LIS) { LiveInterval &LI = LIS->getInterval(Reg); @@ -1663,7 +1663,7 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const { if (!MO.isReg() || !MO.getReg().isVirtual()) return; Register OldVLReg = MO.getReg(); - MO.setReg(RISCV::NoRegister); + MO.setReg(Register()); if (LIS) LIS->shrinkToUses(&LIS->getInterval(OldVLReg)); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 1e6b04f8..7db4832 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1364,7 +1364,7 @@ void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, RS->scavengeRegisterBackwards(RISCV::GPRRegClass, MI.getIterator(), /*RestoreAfter=*/false, /*SpAdj=*/0, /*AllowSpill=*/false); - if (TmpGPR != RISCV::NoRegister) + if (TmpGPR.isValid()) RS->setRegUsed(TmpGPR); else { // The case when there is no scavenged register needs special handling. @@ -3021,7 +3021,7 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI, ErrInfo = "Invalid operand type for VL operand"; return false; } - if (Op.isReg() && Op.getReg() != RISCV::NoRegister) { + if (Op.isReg() && Op.getReg().isValid()) { const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); auto *RC = MRI.getRegClass(Op.getReg()); if (!RISCV::GPRRegClass.hasSubClassEq(RC)) { diff --git a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp index f8d33ae..54569b1 100644 --- a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp +++ b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp @@ -259,7 +259,7 @@ static RegImmPair getRegImmPairPreventingCompression(const MachineInstr &MI) { if (isCompressibleLoad(MI) || isCompressibleStore(MI)) { const MachineOperand &MOImm = MI.getOperand(2); if (!MOImm.isImm()) - return RegImmPair(RISCV::NoRegister, 0); + return RegImmPair(Register(), 0); int64_t Offset = MOImm.getImm(); int64_t NewBaseAdjust = getBaseAdjustForCompression(Offset, Opcode); @@ -292,7 +292,7 @@ static RegImmPair getRegImmPairPreventingCompression(const MachineInstr &MI) { } } } - return RegImmPair(RISCV::NoRegister, 0); + return RegImmPair(Register(), 0); } // Check all uses after FirstMI of the given register, keeping a vector of diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp index ffba284..fdf9a4f 100644 --- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp +++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp @@ -382,7 +382,7 @@ bool RISCVVectorPeephole::convertAllOnesVMergeToVMv(MachineInstr &MI) const { // vmv.v.v doesn't have a mask operand, so we may be able to inflate the // register class for the destination and passthru operands e.g. VRNoV0 -> VR MRI->recomputeRegClass(MI.getOperand(0).getReg()); - if (MI.getOperand(1).getReg() != RISCV::NoRegister) + if (MI.getOperand(1).getReg().isValid()) MRI->recomputeRegClass(MI.getOperand(1).getReg()); return true; } @@ -448,7 +448,7 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) { Register FalseReg = MI.getOperand(2).getReg(); if (TruePassthruReg != FalseReg) { // If True's passthru is undef see if we can change it to False - if (TruePassthruReg != RISCV::NoRegister || + if (TruePassthruReg.isValid() || !MRI->hasOneUse(MI.getOperand(3).getReg()) || !ensureDominates(MI.getOperand(2), *True)) return false; @@ -467,7 +467,7 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) { // vmv.v.v doesn't have a mask operand, so we may be able to inflate the // register class for the destination and passthru operands e.g. VRNoV0 -> VR MRI->recomputeRegClass(MI.getOperand(0).getReg()); - if (MI.getOperand(1).getReg() != RISCV::NoRegister) + if (MI.getOperand(1).getReg().isValid()) MRI->recomputeRegClass(MI.getOperand(1).getReg()); return true; } @@ -517,7 +517,7 @@ bool RISCVVectorPeephole::convertToUnmasked(MachineInstr &MI) const { if (RISCVII::isFirstDefTiedToFirstUse(MaskedMCID)) { unsigned PassthruOpIdx = MI.getNumExplicitDefs(); if (HasPassthru) { - if (MI.getOperand(PassthruOpIdx).getReg() != RISCV::NoRegister) + if (MI.getOperand(PassthruOpIdx).getReg()) MRI->recomputeRegClass(MI.getOperand(PassthruOpIdx).getReg()); } else MI.removeOperand(PassthruOpIdx); @@ -576,7 +576,7 @@ static bool dominates(MachineBasicBlock::const_iterator A, bool RISCVVectorPeephole::ensureDominates(const MachineOperand &MO, MachineInstr &Src) const { assert(MO.getParent()->getParent() == Src.getParent()); - if (!MO.isReg() || MO.getReg() == RISCV::NoRegister) + if (!MO.isReg() || !MO.getReg().isValid()) return true; MachineInstr *Def = MRI->getVRegDef(MO.getReg()); @@ -593,7 +593,7 @@ bool RISCVVectorPeephole::ensureDominates(const MachineOperand &MO, bool RISCVVectorPeephole::foldUndefPassthruVMV_V_V(MachineInstr &MI) { if (RISCV::getRVVMCOpcode(MI.getOpcode()) != RISCV::VMV_V_V) return false; - if (MI.getOperand(1).getReg() != RISCV::NoRegister) + if (MI.getOperand(1).getReg().isValid()) return false; // If the input was a pseudo with a policy operand, we can give it a tail @@ -654,7 +654,7 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) { // Src needs to have the same passthru as VMV_V_V MachineOperand &SrcPassthru = Src->getOperand(Src->getNumExplicitDefs()); - if (SrcPassthru.getReg() != RISCV::NoRegister && + if (SrcPassthru.getReg().isValid() && SrcPassthru.getReg() != Passthru.getReg()) return false; @@ -672,7 +672,7 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) { if (SrcPassthru.getReg() != Passthru.getReg()) { SrcPassthru.setReg(Passthru.getReg()); // If Src is masked then its passthru needs to be in VRNoV0. - if (Passthru.getReg() != RISCV::NoRegister) + if (Passthru.getReg().isValid()) MRI->constrainRegClass( Passthru.getReg(), TII->getRegClass(Src->getDesc(), SrcPassthru.getOperandNo(), TRI)); diff --git a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp index 6c19049..024030d 100644 --- a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp +++ b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp @@ -206,8 +206,8 @@ Filler::findDelayInstr(MachineBasicBlock &MBB, if (!done) --I; - // skip debug instruction - if (I->isDebugInstr()) + // Skip meta instructions. + if (I->isMetaInstruction()) continue; if (I->hasUnmodeledSideEffects() || I->isInlineAsm() || I->isPosition() || diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp index 143c4c4..e7709ef 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp @@ -149,6 +149,10 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, }); } + getActionDefinitionsBuilder({G_UMIN, G_UMAX, G_SMIN, G_SMAX}) + .widenScalarToNextPow2(0, /*Min=*/32) + .lower(); + // integer addition/subtraction getActionDefinitionsBuilder({G_ADD, G_SUB}) .legalFor({s8, s16, s32}) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index cda5568..931a10b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -13783,10 +13783,12 @@ static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask, // so prevents folding a load into this instruction or making a copy. const int UnpackLoMask[] = {0, 0, 1, 1}; const int UnpackHiMask[] = {2, 2, 3, 3}; - if (isShuffleEquivalent(Mask, {0, 0, 1, 1}, V1, V2)) - Mask = UnpackLoMask; - else if (isShuffleEquivalent(Mask, {2, 2, 3, 3}, V1, V2)) - Mask = UnpackHiMask; + if (!isSingleElementRepeatedMask(Mask)) { + if (isShuffleEquivalent(Mask, {0, 0, 1, 1}, V1, V2)) + Mask = UnpackLoMask; + else if (isShuffleEquivalent(Mask, {2, 2, 3, 3}, V1, V2)) + Mask = UnpackHiMask; + } return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1, getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); @@ -45457,7 +45459,8 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src, const SDLoc &DL, const X86Subtarget &Subtarget) { EVT SrcVT = Src.getValueType(); - if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1) + if (Subtarget.useSoftFloat() || !SrcVT.isSimple() || + SrcVT.getScalarType() != MVT::i1) return SDValue(); // Recognize the IR pattern for the movmsk intrinsic under SSE1 before type @@ -58134,6 +58137,14 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineToHorizontalAddSub(N, DAG, Subtarget)) return V; + // Prefer VSHLI to reduce uses, X86FixupInstTunings may revert this depending + // on the scheduler model. Limit multiple users to AVX+ targets to prevent + // introducing extra register moves. + if (Op0 == Op1 && supportedVectorShiftWithImm(VT, Subtarget, ISD::SHL)) + if (Subtarget.hasAVX() || N->isOnlyUserOf(Op0.getNode())) + return getTargetVShiftByConstNode(X86ISD::VSHLI, DL, VT.getSimpleVT(), + Op0, 1, DAG); + // Canonicalize hidden LEA pattern: // Fold (add (sub (shl x, c), y), z) -> (sub (add (shl x, c), z), y) // iff c < 4 diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index cf6d0ec..e1e24a9 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -318,18 +318,18 @@ Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) { // * Single constant active lane -> store // * Narrow width by halfs excluding zero/undef lanes Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) { + Value *StorePtr = II.getArgOperand(1); + Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue(); auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3)); if (!ConstMask) return nullptr; // If the mask is all zeros, this instruction does nothing. - if (ConstMask->isNullValue()) + if (maskIsAllZeroOrUndef(ConstMask)) return eraseInstFromFunction(II); // If the mask is all ones, this is a plain vector store of the 1st argument. - if (ConstMask->isAllOnesValue()) { - Value *StorePtr = II.getArgOperand(1); - Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue(); + if (maskIsAllOneOrUndef(ConstMask)) { StoreInst *S = new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment); S->copyMetadata(II); @@ -389,7 +389,7 @@ Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) { return nullptr; // If the mask is all zeros, a scatter does nothing. - if (ConstMask->isNullValue()) + if (maskIsAllZeroOrUndef(ConstMask)) return eraseInstFromFunction(II); // Vector splat address -> scalar store diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index 26e17cc..b9b5b58 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -2287,6 +2287,35 @@ bool GVNPass::processLoad(LoadInst *L) { return true; } +// Attempt to process masked loads which have loaded from +// masked stores with the same mask +bool GVNPass::processMaskedLoad(IntrinsicInst *I) { + if (!MD) + return false; + MemDepResult Dep = MD->getDependency(I); + Instruction *DepInst = Dep.getInst(); + if (!DepInst || !Dep.isLocal() || !Dep.isDef()) + return false; + + Value *Mask = I->getOperand(2); + Value *Passthrough = I->getOperand(3); + Value *StoreVal; + if (!match(DepInst, m_MaskedStore(m_Value(StoreVal), m_Value(), m_Value(), + m_Specific(Mask))) || + StoreVal->getType() != I->getType()) + return false; + + // Remove the load but generate a select for the passthrough + Value *OpToForward = llvm::SelectInst::Create(Mask, StoreVal, Passthrough, "", + I->getIterator()); + + ICF->removeUsersOf(I); + I->replaceAllUsesWith(OpToForward); + salvageAndRemoveInstruction(I); + ++NumGVNLoad; + return true; +} + /// Return a pair the first field showing the value number of \p Exp and the /// second field showing whether it is a value number newly created. std::pair<uint32_t, bool> @@ -2734,6 +2763,10 @@ bool GVNPass::processInstruction(Instruction *I) { return false; } + if (match(I, m_Intrinsic<Intrinsic::masked_load>()) && + processMaskedLoad(cast<IntrinsicInst>(I))) + return true; + // For conditional branches, we can perform simple conditional propagation on // the condition value itself. if (BranchInst *BI = dyn_cast<BranchInst>(I)) { diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 7750687..cb6bfb2 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8694,7 +8694,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) { Plan->addVF(VF); if (!VPlanTransforms::tryToConvertVPInstructionsToVPRecipes( - Plan, + *Plan, [this](PHINode *P) { return Legal->getIntOrFpInductionDescriptor(P); }, diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index f76777b..ca63bf3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -45,13 +45,13 @@ static cl::opt<bool> EnableWideActiveLaneMask( cl::desc("Enable use of wide get active lane mask instructions")); bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes( - VPlanPtr &Plan, + VPlan &Plan, function_ref<const InductionDescriptor *(PHINode *)> GetIntOrFpInductionDescriptor, const TargetLibraryInfo &TLI) { ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT( - Plan->getVectorLoopRegion()); + Plan.getVectorLoopRegion()); for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) { // Skip blocks outside region if (!VPBB->getParent()) @@ -77,11 +77,11 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes( for (VPValue *Op : PhiR->operands()) NewRecipe->addOperand(Op); } else { - VPValue *Start = Plan->getOrAddLiveIn(II->getStartValue()); + VPValue *Start = Plan.getOrAddLiveIn(II->getStartValue()); VPValue *Step = - vputils::getOrCreateVPValueForSCEVExpr(*Plan, II->getStep()); + vputils::getOrCreateVPValueForSCEVExpr(Plan, II->getStep()); NewRecipe = new VPWidenIntOrFpInductionRecipe( - Phi, Start, Step, &Plan->getVF(), *II, Ingredient.getDebugLoc()); + Phi, Start, Step, &Plan.getVF(), *II, Ingredient.getDebugLoc()); } } else { assert(isa<VPInstruction>(&Ingredient) && diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 4c65cb7..2f00e51 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -138,7 +138,7 @@ struct VPlanTransforms { /// widen recipes. Returns false if any VPInstructions could not be converted /// to a wide recipe if needed. LLVM_ABI_FOR_TEST static bool tryToConvertVPInstructionsToVPRecipes( - VPlanPtr &Plan, + VPlan &Plan, function_ref<const InductionDescriptor *(PHINode *)> GetIntOrFpInductionDescriptor, const TargetLibraryInfo &TLI); |