diff options
Diffstat (limited to 'llvm/lib/Transforms')
20 files changed, 324 insertions, 365 deletions
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp index 9b9e2ba..9150b58 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp @@ -459,7 +459,7 @@ void TruncInstCombine::ReduceExpressionGraph(Type *SclTy) { Value *Op0 = I->getOperand(0); Value *LHS = getReducedOperand(I->getOperand(1), SclTy); Value *RHS = getReducedOperand(I->getOperand(2), SclTy); - Res = Builder.CreateSelect(Op0, LHS, RHS); + Res = Builder.CreateSelect(Op0, LHS, RHS, "", I); break; } case Instruction::PHI: { diff --git a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp index 9115946..f166fef 100644 --- a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp @@ -24,6 +24,9 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Transforms/Utils/CallGraphUpdater.h" #include "llvm/Transforms/Utils/Cloning.h" @@ -33,6 +36,11 @@ using namespace llvm; #define DEBUG_TYPE "coro-annotation-elide" +static cl::opt<float> CoroElideBranchRatio( + "coro-elide-branch-ratio", cl::init(0.55), cl::Hidden, + cl::desc("Minimum BranchProbability to consider a elide a coroutine.")); +extern cl::opt<unsigned> MinBlockCounterExecution; + static Instruction *getFirstNonAllocaInTheEntryBlock(Function *F) { for (Instruction &I : F->getEntryBlock()) if (!isa<AllocaInst>(&I)) @@ -145,6 +153,30 @@ PreservedAnalyses CoroAnnotationElidePass::run(LazyCallGraph::SCC &C, bool IsCallerPresplitCoroutine = Caller->isPresplitCoroutine(); bool HasAttr = CB->hasFnAttr(llvm::Attribute::CoroElideSafe); if (IsCallerPresplitCoroutine && HasAttr) { + BranchProbability MinBranchProbability( + static_cast<int>(CoroElideBranchRatio * MinBlockCounterExecution), + MinBlockCounterExecution); + + auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*Caller); + + auto Prob = BranchProbability::getBranchProbability( + BFI.getBlockFreq(CB->getParent()).getFrequency(), + BFI.getEntryFreq().getFrequency()); + + if (Prob < MinBranchProbability) { + ORE.emit([&]() { + return OptimizationRemarkMissed( + DEBUG_TYPE, "CoroAnnotationElideUnlikely", Caller) + << "'" << ore::NV("callee", Callee->getName()) + << "' not elided in '" + << ore::NV("caller", Caller->getName()) + << "' because of low probability: " + << ore::NV("probability", Prob) << " (threshold: " + << ore::NV("threshold", MinBranchProbability) << ")"; + }); + continue; + } + auto *CallerN = CG.lookup(*Caller); auto *CallerC = CallerN ? CG.lookupSCC(*CallerN) : nullptr; // If CallerC is nullptr, it means LazyCallGraph hasn't visited Caller @@ -156,7 +188,7 @@ PreservedAnalyses CoroAnnotationElidePass::run(LazyCallGraph::SCC &C, return OptimizationRemark(DEBUG_TYPE, "CoroAnnotationElide", Caller) << "'" << ore::NV("callee", Callee->getName()) << "' elided in '" << ore::NV("caller", Caller->getName()) - << "'"; + << "' (probability: " << ore::NV("probability", Prob) << ")"; }); FAM.invalidate(*Caller, PreservedAnalyses::none()); diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index cfdfd94..5066a99 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -1033,19 +1033,17 @@ private: }; } // namespace -namespace llvm { template <> -struct DenseMapInfo<typename CallsiteContextGraph< +struct llvm::DenseMapInfo<typename CallsiteContextGraph< ModuleCallsiteContextGraph, Function, Instruction *>::CallInfo> : public DenseMapInfo<std::pair<Instruction *, unsigned>> {}; template <> -struct DenseMapInfo<typename CallsiteContextGraph< +struct llvm::DenseMapInfo<typename CallsiteContextGraph< IndexCallsiteContextGraph, FunctionSummary, IndexCall>::CallInfo> : public DenseMapInfo<std::pair<IndexCall, unsigned>> {}; template <> -struct DenseMapInfo<IndexCall> +struct llvm::DenseMapInfo<IndexCall> : public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {}; -} // end namespace llvm namespace { diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp index 2583249..1a00d17 100644 --- a/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -109,7 +109,7 @@ static cl::opt<float> MinRegionSizeRatio( "outline candidate and original function")); // Used to tune the minimum number of execution counts needed in the predecessor // block to the cold edge. ie. confidence interval. -static cl::opt<unsigned> +cl::opt<unsigned> MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden, cl::desc("Minimum block executions to consider " "its BranchProbabilityInfo valid")); diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index ac41fdd..2d5cb82 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -372,9 +372,7 @@ struct VTableSlot { } // end anonymous namespace -namespace llvm { - -template <> struct DenseMapInfo<VTableSlot> { +template <> struct llvm::DenseMapInfo<VTableSlot> { static VTableSlot getEmptyKey() { return {DenseMapInfo<Metadata *>::getEmptyKey(), DenseMapInfo<uint64_t>::getEmptyKey()}; @@ -393,7 +391,7 @@ template <> struct DenseMapInfo<VTableSlot> { } }; -template <> struct DenseMapInfo<VTableSlotSummary> { +template <> struct llvm::DenseMapInfo<VTableSlotSummary> { static VTableSlotSummary getEmptyKey() { return {DenseMapInfo<StringRef>::getEmptyKey(), DenseMapInfo<uint64_t>::getEmptyKey()}; @@ -412,8 +410,6 @@ template <> struct DenseMapInfo<VTableSlotSummary> { } }; -} // end namespace llvm - // Returns true if the function must be unreachable based on ValueInfo. // // In particular, identifies a function as unreachable in the following diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 9b272c4..3ddf182 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -28,6 +28,10 @@ using namespace PatternMatch; #define DEBUG_TYPE "instcombine" +namespace llvm { +extern cl::opt<bool> ProfcheckDisableMetadataFixes; +} + /// This is the complement of getICmpCode, which turns an opcode and two /// operands into either a constant true or false, or a brand new ICmp /// instruction. The sign is passed in to determine which kind of predicate to @@ -1272,7 +1276,8 @@ Value *InstCombinerImpl::foldEqOfParts(Value *Cmp0, Value *Cmp1, bool IsAnd) { static Value *foldAndOrOfICmpsWithConstEq(ICmpInst *Cmp0, ICmpInst *Cmp1, bool IsAnd, bool IsLogical, InstCombiner::BuilderTy &Builder, - const SimplifyQuery &Q) { + const SimplifyQuery &Q, + Instruction &I) { // Match an equality compare with a non-poison constant as Cmp0. // Also, give up if the compare can be constant-folded to avoid looping. CmpPredicate Pred0; @@ -1306,9 +1311,12 @@ static Value *foldAndOrOfICmpsWithConstEq(ICmpInst *Cmp0, ICmpInst *Cmp1, return nullptr; SubstituteCmp = Builder.CreateICmp(Pred1, Y, C); } - if (IsLogical) - return IsAnd ? Builder.CreateLogicalAnd(Cmp0, SubstituteCmp) - : Builder.CreateLogicalOr(Cmp0, SubstituteCmp); + if (IsLogical) { + Instruction *MDFrom = + ProfcheckDisableMetadataFixes && isa<SelectInst>(I) ? nullptr : &I; + return IsAnd ? Builder.CreateLogicalAnd(Cmp0, SubstituteCmp, "", MDFrom) + : Builder.CreateLogicalOr(Cmp0, SubstituteCmp, "", MDFrom); + } return Builder.CreateBinOp(IsAnd ? Instruction::And : Instruction::Or, Cmp0, SubstituteCmp); } @@ -3396,13 +3404,13 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, /*IsLogical*/ false, Builder)) return V; - if (Value *V = - foldAndOrOfICmpsWithConstEq(LHS, RHS, IsAnd, IsLogical, Builder, Q)) + if (Value *V = foldAndOrOfICmpsWithConstEq(LHS, RHS, IsAnd, IsLogical, + Builder, Q, I)) return V; // We can convert this case to bitwise and, because both operands are used // on the LHS, and as such poison from both will propagate. - if (Value *V = foldAndOrOfICmpsWithConstEq(RHS, LHS, IsAnd, - /*IsLogical=*/false, Builder, Q)) { + if (Value *V = foldAndOrOfICmpsWithConstEq( + RHS, LHS, IsAnd, /*IsLogical=*/false, Builder, Q, I)) { // If RHS is still used, we should drop samesign flag. if (IsLogical && RHS->hasSameSign() && !RHS->use_empty()) { RHS->setSameSign(false); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 56194fe..4c9b10a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -2202,6 +2202,11 @@ Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) { return commonCastTransforms(CI); } +Instruction *InstCombinerImpl::visitPtrToAddr(PtrToAddrInst &CI) { + // FIXME: Implement variants of ptrtoint folds. + return commonCastTransforms(CI); +} + /// This input value (which is known to have vector type) is being zero extended /// or truncated to the specified vector type. Since the zext/trunc is done /// using an integer type, we have a (bitcast(cast(bitcast))) pattern, diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index e01c145..218aaf9 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -143,6 +143,7 @@ public: Instruction *visitUIToFP(CastInst &CI); Instruction *visitSIToFP(CastInst &CI); Instruction *visitPtrToInt(PtrToIntInst &CI); + Instruction *visitPtrToAddr(PtrToAddrInst &CI); Instruction *visitIntToPtr(IntToPtrInst &CI); Instruction *visitBitCast(BitCastInst &CI); Instruction *visitAddrSpaceCast(AddrSpaceCastInst &CI); diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp index 5c747bb..9815644 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -1069,27 +1069,22 @@ struct LoweredPHIRecord { }; } // namespace -namespace llvm { - template<> - struct DenseMapInfo<LoweredPHIRecord> { - static inline LoweredPHIRecord getEmptyKey() { - return LoweredPHIRecord(nullptr, 0); - } - static inline LoweredPHIRecord getTombstoneKey() { - return LoweredPHIRecord(nullptr, 1); - } - static unsigned getHashValue(const LoweredPHIRecord &Val) { - return DenseMapInfo<PHINode*>::getHashValue(Val.PN) ^ (Val.Shift>>3) ^ - (Val.Width>>3); - } - static bool isEqual(const LoweredPHIRecord &LHS, - const LoweredPHIRecord &RHS) { - return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift && - LHS.Width == RHS.Width; - } - }; -} // namespace llvm - +template <> struct llvm::DenseMapInfo<LoweredPHIRecord> { + static inline LoweredPHIRecord getEmptyKey() { + return LoweredPHIRecord(nullptr, 0); + } + static inline LoweredPHIRecord getTombstoneKey() { + return LoweredPHIRecord(nullptr, 1); + } + static unsigned getHashValue(const LoweredPHIRecord &Val) { + return DenseMapInfo<PHINode *>::getHashValue(Val.PN) ^ (Val.Shift >> 3) ^ + (Val.Width >> 3); + } + static bool isEqual(const LoweredPHIRecord &LHS, + const LoweredPHIRecord &RHS) { + return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift && LHS.Width == RHS.Width; + } +}; /// This is an integer PHI and we know that it has an illegal type: see if it is /// only used by trunc or trunc(lshr) operations. If so, we split the PHI into diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 3704ad7..860f8f7 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -600,9 +600,7 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, !IsRISCV64 && !IsLoongArch64 && !(Mapping.Offset & (Mapping.Offset - 1)) && Mapping.Offset != kDynamicShadowSentinel; - bool IsAndroidWithIfuncSupport = - IsAndroid && !TargetTriple.isAndroidVersionLT(21); - Mapping.InGlobal = ClWithIfunc && IsAndroidWithIfuncSupport && IsArmOrThumb; + Mapping.InGlobal = ClWithIfunc && IsAndroid && IsArmOrThumb; return Mapping; } diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp index 3f7003d..e5935f4 100644 --- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp @@ -148,19 +148,16 @@ public: class DFAJumpThreading { public: - DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI, + DFAJumpThreading(AssumptionCache *AC, DomTreeUpdater *DTU, LoopInfo *LI, TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE) - : AC(AC), DT(DT), LI(LI), TTI(TTI), ORE(ORE) {} + : AC(AC), DTU(DTU), LI(LI), TTI(TTI), ORE(ORE) {} bool run(Function &F); bool LoopInfoBroken; private: void - unfoldSelectInstrs(DominatorTree *DT, - const SmallVector<SelectInstToUnfold, 4> &SelectInsts) { - // TODO: Have everything use a single lazy DTU - DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); + unfoldSelectInstrs(const SmallVector<SelectInstToUnfold, 4> &SelectInsts) { SmallVector<SelectInstToUnfold, 4> Stack(SelectInsts); while (!Stack.empty()) { @@ -168,7 +165,7 @@ private: std::vector<SelectInstToUnfold> NewSIsToUnfold; std::vector<BasicBlock *> NewBBs; - unfold(&DTU, LI, SIToUnfold, &NewSIsToUnfold, &NewBBs); + unfold(DTU, LI, SIToUnfold, &NewSIsToUnfold, &NewBBs); // Put newly discovered select instructions into the work list. llvm::append_range(Stack, NewSIsToUnfold); @@ -181,7 +178,7 @@ private: std::vector<BasicBlock *> *NewBBs); AssumptionCache *AC; - DominatorTree *DT; + DomTreeUpdater *DTU; LoopInfo *LI; TargetTransformInfo *TTI; OptimizationRemarkEmitter *ORE; @@ -389,6 +386,22 @@ inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) { return OS; } +/// Helper to get the successor corresponding to a particular case value for +/// a switch statement. +static BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, + const APInt &NextState) { + BasicBlock *NextCase = nullptr; + for (auto Case : Switch->cases()) { + if (Case.getCaseValue()->getValue() == NextState) { + NextCase = Case.getCaseSuccessor(); + break; + } + } + if (!NextCase) + NextCase = Switch->getDefaultDest(); + return NextCase; +} + namespace { /// ThreadingPath is a path in the control flow of a loop that can be threaded /// by cloning necessary basic blocks and replacing conditional branches with @@ -401,6 +414,10 @@ struct ThreadingPath { ExitVal = V->getValue(); IsExitValSet = true; } + void setExitValue(const APInt &V) { + ExitVal = V; + IsExitValSet = true; + } bool isExitValueSet() const { return IsExitValSet; } /// Determinator is the basic block that determines the next state of the DFA. @@ -583,44 +600,8 @@ struct AllSwitchPaths { BasicBlock *getSwitchBlock() { return SwitchBlock; } void run() { - StateDefMap StateDef = getStateDefMap(); - if (StateDef.empty()) { - ORE->emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", - Switch) - << "Switch instruction is not predictable."; - }); - return; - } - - auto *SwitchPhi = cast<PHINode>(Switch->getOperand(0)); - auto *SwitchPhiDefBB = SwitchPhi->getParent(); - VisitedBlocks VB; - // Get paths from the determinator BBs to SwitchPhiDefBB - std::vector<ThreadingPath> PathsToPhiDef = - getPathsFromStateDefMap(StateDef, SwitchPhi, VB, MaxNumPaths); - if (SwitchPhiDefBB == SwitchBlock || PathsToPhiDef.empty()) { - TPaths = std::move(PathsToPhiDef); - return; - } - - assert(MaxNumPaths >= PathsToPhiDef.size() && !PathsToPhiDef.empty()); - auto PathsLimit = MaxNumPaths / PathsToPhiDef.size(); - // Find and append paths from SwitchPhiDefBB to SwitchBlock. - PathsType PathsToSwitchBB = - paths(SwitchPhiDefBB, SwitchBlock, VB, /* PathDepth = */ 1, PathsLimit); - if (PathsToSwitchBB.empty()) - return; - - std::vector<ThreadingPath> TempList; - for (const ThreadingPath &Path : PathsToPhiDef) { - for (const PathType &PathToSw : PathsToSwitchBB) { - ThreadingPath PathCopy(Path); - PathCopy.appendExcludingFirst(PathToSw); - TempList.push_back(PathCopy); - } - } - TPaths = std::move(TempList); + findTPaths(); + unifyTPaths(); } private: @@ -812,6 +793,69 @@ private: return Res; } + // Find all threadable paths. + void findTPaths() { + StateDefMap StateDef = getStateDefMap(); + if (StateDef.empty()) { + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", + Switch) + << "Switch instruction is not predictable."; + }); + return; + } + + auto *SwitchPhi = cast<PHINode>(Switch->getOperand(0)); + auto *SwitchPhiDefBB = SwitchPhi->getParent(); + VisitedBlocks VB; + // Get paths from the determinator BBs to SwitchPhiDefBB + std::vector<ThreadingPath> PathsToPhiDef = + getPathsFromStateDefMap(StateDef, SwitchPhi, VB, MaxNumPaths); + if (SwitchPhiDefBB == SwitchBlock || PathsToPhiDef.empty()) { + TPaths = std::move(PathsToPhiDef); + return; + } + + assert(MaxNumPaths >= PathsToPhiDef.size() && !PathsToPhiDef.empty()); + auto PathsLimit = MaxNumPaths / PathsToPhiDef.size(); + // Find and append paths from SwitchPhiDefBB to SwitchBlock. + PathsType PathsToSwitchBB = + paths(SwitchPhiDefBB, SwitchBlock, VB, /* PathDepth = */ 1, PathsLimit); + if (PathsToSwitchBB.empty()) + return; + + std::vector<ThreadingPath> TempList; + for (const ThreadingPath &Path : PathsToPhiDef) { + for (const PathType &PathToSw : PathsToSwitchBB) { + ThreadingPath PathCopy(Path); + PathCopy.appendExcludingFirst(PathToSw); + TempList.push_back(PathCopy); + } + } + TPaths = std::move(TempList); + } + + // Two states are equivalent if they have the same switch destination. + // Unify the states in different threading path if the states are equivalent. + void unifyTPaths() { + llvm::SmallDenseMap<BasicBlock *, APInt> DestToState; + for (ThreadingPath &Path : TPaths) { + APInt NextState = Path.getExitValue(); + BasicBlock *Dest = getNextCaseSuccessor(Switch, NextState); + auto StateIt = DestToState.find(Dest); + if (StateIt == DestToState.end()) { + DestToState.insert({Dest, NextState}); + continue; + } + + if (NextState != StateIt->second) { + LLVM_DEBUG(dbgs() << "Next state in " << Path << " is equivalent to " + << StateIt->second << "\n"); + Path.setExitValue(StateIt->second); + } + } + } + unsigned NumVisited = 0; SwitchInst *Switch; BasicBlock *SwitchBlock; @@ -822,11 +866,11 @@ private: }; struct TransformDFA { - TransformDFA(AllSwitchPaths *SwitchPaths, DominatorTree *DT, + TransformDFA(AllSwitchPaths *SwitchPaths, DomTreeUpdater *DTU, AssumptionCache *AC, TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, SmallPtrSet<const Value *, 32> EphValues) - : SwitchPaths(SwitchPaths), DT(DT), AC(AC), TTI(TTI), ORE(ORE), + : SwitchPaths(SwitchPaths), DTU(DTU), AC(AC), TTI(TTI), ORE(ORE), EphValues(EphValues) {} bool run() { @@ -1002,19 +1046,16 @@ private: SmallPtrSet<BasicBlock *, 16> BlocksToClean; BlocksToClean.insert_range(successors(SwitchBlock)); - { - DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy); - for (const ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { - createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, &DTU); - NumPaths++; - } - - // After all paths are cloned, now update the last successor of the cloned - // path so it skips over the switch statement - for (const ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) - updateLastSuccessor(TPath, DuplicateMap, &DTU); + for (const ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { + createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, DTU); + NumPaths++; } + // After all paths are cloned, now update the last successor of the cloned + // path so it skips over the switch statement + for (const ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) + updateLastSuccessor(TPath, DuplicateMap, DTU); + // For each instruction that was cloned and used outside, update its uses updateSSA(NewDefs); @@ -1118,7 +1159,7 @@ private: } // SSAUpdater handles phi placement and renaming uses with the appropriate // value. - SSAUpdate.RewriteAllUses(DT); + SSAUpdate.RewriteAllUses(&DTU->getDomTree()); } /// Clones a basic block, and adds it to the CFG. @@ -1335,28 +1376,13 @@ private: return It != ClonedBBs.end() ? (*It).BB : nullptr; } - /// Helper to get the successor corresponding to a particular case value for - /// a switch statement. - BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, const APInt &NextState) { - BasicBlock *NextCase = nullptr; - for (auto Case : Switch->cases()) { - if (Case.getCaseValue()->getValue() == NextState) { - NextCase = Case.getCaseSuccessor(); - break; - } - } - if (!NextCase) - NextCase = Switch->getDefaultDest(); - return NextCase; - } - /// Returns true if IncomingBB is a predecessor of BB. bool isPredecessor(BasicBlock *BB, BasicBlock *IncomingBB) { return llvm::is_contained(predecessors(BB), IncomingBB); } AllSwitchPaths *SwitchPaths; - DominatorTree *DT; + DomTreeUpdater *DTU; AssumptionCache *AC; TargetTransformInfo *TTI; OptimizationRemarkEmitter *ORE; @@ -1399,7 +1425,7 @@ bool DFAJumpThreading::run(Function &F) { << "candidate for jump threading\n"); LLVM_DEBUG(SI->dump()); - unfoldSelectInstrs(DT, Switch.getSelectInsts()); + unfoldSelectInstrs(Switch.getSelectInsts()); if (!Switch.getSelectInsts().empty()) MadeChanges = true; @@ -1421,7 +1447,7 @@ bool DFAJumpThreading::run(Function &F) { } #ifdef NDEBUG - LI->verify(*DT); + LI->verify(DTU->getDomTree()); #endif SmallPtrSet<const Value *, 32> EphValues; @@ -1429,13 +1455,15 @@ bool DFAJumpThreading::run(Function &F) { CodeMetrics::collectEphemeralValues(&F, AC, EphValues); for (AllSwitchPaths SwitchPaths : ThreadableLoops) { - TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues); + TransformDFA Transform(&SwitchPaths, DTU, AC, TTI, ORE, EphValues); if (Transform.run()) MadeChanges = LoopInfoBroken = true; } + DTU->flush(); + #ifdef EXPENSIVE_CHECKS - assert(DT->verify(DominatorTree::VerificationLevel::Full)); + assert(DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full)); verifyFunction(F, &dbgs()); #endif @@ -1450,7 +1478,9 @@ PreservedAnalyses DFAJumpThreadingPass::run(Function &F, LoopInfo &LI = AM.getResult<LoopAnalysis>(F); TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F); OptimizationRemarkEmitter ORE(&F); - DFAJumpThreading ThreadImpl(&AC, &DT, &LI, &TTI, &ORE); + + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); + DFAJumpThreading ThreadImpl(&AC, &DTU, &LI, &TTI, &ORE); if (!ThreadImpl.run(F)) return PreservedAnalyses::all(); diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index 0f8cc6c..2afa7b7 100644 --- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -108,7 +108,7 @@ struct SimpleValue { // of instruction handled below (UnaryOperator, etc.). if (CallInst *CI = dyn_cast<CallInst>(Inst)) { if (Function *F = CI->getCalledFunction()) { - switch ((Intrinsic::ID)F->getIntrinsicID()) { + switch (F->getIntrinsicID()) { case Intrinsic::experimental_constrained_fadd: case Intrinsic::experimental_constrained_fsub: case Intrinsic::experimental_constrained_fmul: @@ -154,9 +154,7 @@ struct SimpleValue { } // end anonymous namespace -namespace llvm { - -template <> struct DenseMapInfo<SimpleValue> { +template <> struct llvm::DenseMapInfo<SimpleValue> { static inline SimpleValue getEmptyKey() { return DenseMapInfo<Instruction *>::getEmptyKey(); } @@ -169,8 +167,6 @@ template <> struct DenseMapInfo<SimpleValue> { static bool isEqual(SimpleValue LHS, SimpleValue RHS); }; -} // end namespace llvm - /// Match a 'select' including an optional 'not's of the condition. static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A, Value *&B, @@ -509,9 +505,7 @@ struct CallValue { } // end anonymous namespace -namespace llvm { - -template <> struct DenseMapInfo<CallValue> { +template <> struct llvm::DenseMapInfo<CallValue> { static inline CallValue getEmptyKey() { return DenseMapInfo<Instruction *>::getEmptyKey(); } @@ -524,8 +518,6 @@ template <> struct DenseMapInfo<CallValue> { static bool isEqual(CallValue LHS, CallValue RHS); }; -} // end namespace llvm - unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) { Instruction *Inst = Val.Inst; @@ -580,9 +572,7 @@ struct GEPValue { } // namespace -namespace llvm { - -template <> struct DenseMapInfo<GEPValue> { +template <> struct llvm::DenseMapInfo<GEPValue> { static inline GEPValue getEmptyKey() { return DenseMapInfo<Instruction *>::getEmptyKey(); } @@ -595,8 +585,6 @@ template <> struct DenseMapInfo<GEPValue> { static bool isEqual(const GEPValue &LHS, const GEPValue &RHS); }; -} // end namespace llvm - unsigned DenseMapInfo<GEPValue>::getHashValue(const GEPValue &Val) { auto *GEP = cast<GetElementPtrInst>(Val.Inst); if (Val.ConstantOffset.has_value()) diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index 638952a..3a8ade8 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -170,9 +170,7 @@ struct llvm::GVNPass::Expression { } }; -namespace llvm { - -template <> struct DenseMapInfo<GVNPass::Expression> { +template <> struct llvm::DenseMapInfo<GVNPass::Expression> { static inline GVNPass::Expression getEmptyKey() { return ~0U; } static inline GVNPass::Expression getTombstoneKey() { return ~1U; } @@ -188,8 +186,6 @@ template <> struct DenseMapInfo<GVNPass::Expression> { } }; -} // end namespace llvm - /// Represents a particular available value that we know how to materialize. /// Materialization of an AvailableValue never fails. An AvailableValue is /// implicitly associated with a rematerialization point which is the @@ -2084,13 +2080,6 @@ bool GVNPass::processNonLocalLoad(LoadInst *Load) { return Changed; } -static bool hasUsersIn(Value *V, BasicBlock *BB) { - return any_of(V->users(), [BB](User *U) { - auto *I = dyn_cast<Instruction>(U); - return I && I->getParent() == BB; - }); -} - bool GVNPass::processAssumeIntrinsic(AssumeInst *IntrinsicI) { Value *V = IntrinsicI->getArgOperand(0); @@ -2149,85 +2138,7 @@ bool GVNPass::processAssumeIntrinsic(AssumeInst *IntrinsicI) { } Constant *True = ConstantInt::getTrue(V->getContext()); - bool Changed = false; - - for (BasicBlock *Successor : successors(IntrinsicI->getParent())) { - BasicBlockEdge Edge(IntrinsicI->getParent(), Successor); - - // This property is only true in dominated successors, propagateEquality - // will check dominance for us. - Changed |= propagateEquality(V, True, Edge, false); - } - - // We can replace assume value with true, which covers cases like this: - // call void @llvm.assume(i1 %cmp) - // br i1 %cmp, label %bb1, label %bb2 ; will change %cmp to true - ReplaceOperandsWithMap[V] = True; - - // Similarly, after assume(!NotV) we know that NotV == false. - Value *NotV; - if (match(V, m_Not(m_Value(NotV)))) - ReplaceOperandsWithMap[NotV] = ConstantInt::getFalse(V->getContext()); - - // If we find an equality fact, canonicalize all dominated uses in this block - // to one of the two values. We heuristically choice the "oldest" of the - // two where age is determined by value number. (Note that propagateEquality - // above handles the cross block case.) - // - // Key case to cover are: - // 1) - // %cmp = fcmp oeq float 3.000000e+00, %0 ; const on lhs could happen - // call void @llvm.assume(i1 %cmp) - // ret float %0 ; will change it to ret float 3.000000e+00 - // 2) - // %load = load float, float* %addr - // %cmp = fcmp oeq float %load, %0 - // call void @llvm.assume(i1 %cmp) - // ret float %load ; will change it to ret float %0 - if (auto *CmpI = dyn_cast<CmpInst>(V)) { - if (CmpI->isEquivalence()) { - Value *CmpLHS = CmpI->getOperand(0); - Value *CmpRHS = CmpI->getOperand(1); - // Heuristically pick the better replacement -- the choice of heuristic - // isn't terribly important here, but the fact we canonicalize on some - // replacement is for exposing other simplifications. - // TODO: pull this out as a helper function and reuse w/ existing - // (slightly different) logic. - if (isa<Constant>(CmpLHS) && !isa<Constant>(CmpRHS)) - std::swap(CmpLHS, CmpRHS); - if (!isa<Instruction>(CmpLHS) && isa<Instruction>(CmpRHS)) - std::swap(CmpLHS, CmpRHS); - if ((isa<Argument>(CmpLHS) && isa<Argument>(CmpRHS)) || - (isa<Instruction>(CmpLHS) && isa<Instruction>(CmpRHS))) { - // Move the 'oldest' value to the right-hand side, using the value - // number as a proxy for age. - uint32_t LVN = VN.lookupOrAdd(CmpLHS); - uint32_t RVN = VN.lookupOrAdd(CmpRHS); - if (LVN < RVN) - std::swap(CmpLHS, CmpRHS); - } - - // Handle degenerate case where we either haven't pruned a dead path or a - // removed a trivial assume yet. - if (isa<Constant>(CmpLHS) && isa<Constant>(CmpRHS)) - return Changed; - - LLVM_DEBUG(dbgs() << "Replacing dominated uses of " - << *CmpLHS << " with " - << *CmpRHS << " in block " - << IntrinsicI->getParent()->getName() << "\n"); - - // Setup the replacement map - this handles uses within the same block. - if (hasUsersIn(CmpLHS, IntrinsicI->getParent())) - ReplaceOperandsWithMap[CmpLHS] = CmpRHS; - - // NOTE: The non-block local cases are handled by the call to - // propagateEquality above; this block is just about handling the block - // local cases. TODO: There's a bunch of logic in propagateEqualiy which - // isn't duplicated for the block local case, can we share it somehow? - } - } - return Changed; + return propagateEquality(V, True, IntrinsicI); } static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) { @@ -2526,39 +2437,28 @@ void GVNPass::assignBlockRPONumber(Function &F) { InvalidBlockRPONumbers = false; } -bool GVNPass::replaceOperandsForInBlockEquality(Instruction *Instr) const { - bool Changed = false; - for (unsigned OpNum = 0; OpNum < Instr->getNumOperands(); ++OpNum) { - Use &Operand = Instr->getOperandUse(OpNum); - auto It = ReplaceOperandsWithMap.find(Operand.get()); - if (It != ReplaceOperandsWithMap.end()) { - const DataLayout &DL = Instr->getDataLayout(); - if (!canReplacePointersInUseIfEqual(Operand, It->second, DL)) - continue; - - LLVM_DEBUG(dbgs() << "GVN replacing: " << *Operand << " with " - << *It->second << " in instruction " << *Instr << '\n'); - Instr->setOperand(OpNum, It->second); - Changed = true; - } - } - return Changed; -} - -/// The given values are known to be equal in every block +/// The given values are known to be equal in every use /// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with /// 'RHS' everywhere in the scope. Returns whether a change was made. -/// If DominatesByEdge is false, then it means that we will propagate the RHS -/// value starting from the end of Root.Start. -bool GVNPass::propagateEquality(Value *LHS, Value *RHS, - const BasicBlockEdge &Root, - bool DominatesByEdge) { +/// The Root may either be a basic block edge (for conditions) or an +/// instruction (for assumes). +bool GVNPass::propagateEquality( + Value *LHS, Value *RHS, + const std::variant<BasicBlockEdge, Instruction *> &Root) { SmallVector<std::pair<Value*, Value*>, 4> Worklist; Worklist.push_back(std::make_pair(LHS, RHS)); bool Changed = false; - // For speed, compute a conservative fast approximation to - // DT->dominates(Root, Root.getEnd()); - const bool RootDominatesEnd = isOnlyReachableViaThisEdge(Root, DT); + SmallVector<const BasicBlock *> DominatedBlocks; + if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(&Root)) { + // For speed, compute a conservative fast approximation to + // DT->dominates(Root, Root.getEnd()); + if (isOnlyReachableViaThisEdge(*Edge, DT)) + DominatedBlocks.push_back(Edge->getEnd()); + } else { + Instruction *I = std::get<Instruction *>(Root); + for (const auto *Node : DT->getNode(I->getParent())->children()) + DominatedBlocks.push_back(Node->getBlock()); + } while (!Worklist.empty()) { std::pair<Value*, Value*> Item = Worklist.pop_back_val(); @@ -2606,9 +2506,9 @@ bool GVNPass::propagateEquality(Value *LHS, Value *RHS, // using the leader table is about compiling faster, not optimizing better). // The leader table only tracks basic blocks, not edges. Only add to if we // have the simple case where the edge dominates the end. - if (RootDominatesEnd && !isa<Instruction>(RHS) && - canReplacePointersIfEqual(LHS, RHS, DL)) - LeaderTable.insert(LVN, RHS, Root.getEnd()); + if (!isa<Instruction>(RHS) && canReplacePointersIfEqual(LHS, RHS, DL)) + for (const BasicBlock *BB : DominatedBlocks) + LeaderTable.insert(LVN, RHS, BB); // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As // LHS always has at least one use that is not dominated by Root, this will @@ -2618,12 +2518,14 @@ bool GVNPass::propagateEquality(Value *LHS, Value *RHS, auto CanReplacePointersCallBack = [&DL](const Use &U, const Value *To) { return canReplacePointersInUseIfEqual(U, To, DL); }; - unsigned NumReplacements = - DominatesByEdge - ? replaceDominatedUsesWithIf(LHS, RHS, *DT, Root, - CanReplacePointersCallBack) - : replaceDominatedUsesWithIf(LHS, RHS, *DT, Root.getStart(), - CanReplacePointersCallBack); + unsigned NumReplacements; + if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(&Root)) + NumReplacements = replaceDominatedUsesWithIf( + LHS, RHS, *DT, *Edge, CanReplacePointersCallBack); + else + NumReplacements = replaceDominatedUsesWithIf( + LHS, RHS, *DT, std::get<Instruction *>(Root), + CanReplacePointersCallBack); if (NumReplacements > 0) { Changed = true; @@ -2682,26 +2584,45 @@ bool GVNPass::propagateEquality(Value *LHS, Value *RHS, // If the number we were assigned was brand new then there is no point in // looking for an instruction realizing it: there cannot be one! if (Num < NextNum) { - Value *NotCmp = findLeader(Root.getEnd(), Num); - if (NotCmp && isa<Instruction>(NotCmp)) { - unsigned NumReplacements = - DominatesByEdge - ? replaceDominatedUsesWith(NotCmp, NotVal, *DT, Root) - : replaceDominatedUsesWith(NotCmp, NotVal, *DT, - Root.getStart()); - Changed |= NumReplacements > 0; - NumGVNEqProp += NumReplacements; - // Cached information for anything that uses NotCmp will be invalid. - if (MD) - MD->invalidateCachedPointerInfo(NotCmp); + for (const auto &Entry : LeaderTable.getLeaders(Num)) { + // Only look at leaders that either dominate the start of the edge, + // or are dominated by the end. This check is not necessary for + // correctness, it only discards cases for which the following + // use replacement will not work anyway. + if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(&Root)) { + if (!DT->dominates(Entry.BB, Edge->getStart()) && + !DT->dominates(Edge->getEnd(), Entry.BB)) + continue; + } else { + auto *InstBB = std::get<Instruction *>(Root)->getParent(); + if (!DT->dominates(Entry.BB, InstBB) && + !DT->dominates(InstBB, Entry.BB)) + continue; + } + + Value *NotCmp = Entry.Val; + if (NotCmp && isa<Instruction>(NotCmp)) { + unsigned NumReplacements; + if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(&Root)) + NumReplacements = + replaceDominatedUsesWith(NotCmp, NotVal, *DT, *Edge); + else + NumReplacements = replaceDominatedUsesWith( + NotCmp, NotVal, *DT, std::get<Instruction *>(Root)); + Changed |= NumReplacements > 0; + NumGVNEqProp += NumReplacements; + // Cached information for anything that uses NotCmp will be invalid. + if (MD) + MD->invalidateCachedPointerInfo(NotCmp); + } } } // Ensure that any instruction in scope that gets the "A < B" value number // is replaced with false. // The leader table only tracks basic blocks, not edges. Only add to if we // have the simple case where the edge dominates the end. - if (RootDominatesEnd) - LeaderTable.insert(Num, NotVal, Root.getEnd()); + for (const BasicBlock *BB : DominatedBlocks) + LeaderTable.insert(Num, NotVal, BB); continue; } @@ -2789,11 +2710,11 @@ bool GVNPass::processInstruction(Instruction *I) { Value *TrueVal = ConstantInt::getTrue(TrueSucc->getContext()); BasicBlockEdge TrueE(Parent, TrueSucc); - Changed |= propagateEquality(BranchCond, TrueVal, TrueE, true); + Changed |= propagateEquality(BranchCond, TrueVal, TrueE); Value *FalseVal = ConstantInt::getFalse(FalseSucc->getContext()); BasicBlockEdge FalseE(Parent, FalseSucc); - Changed |= propagateEquality(BranchCond, FalseVal, FalseE, true); + Changed |= propagateEquality(BranchCond, FalseVal, FalseE); return Changed; } @@ -2814,7 +2735,7 @@ bool GVNPass::processInstruction(Instruction *I) { // If there is only a single edge, propagate the case value into it. if (SwitchEdges.lookup(Dst) == 1) { BasicBlockEdge E(Parent, Dst); - Changed |= propagateEquality(SwitchCond, Case.getCaseValue(), E, true); + Changed |= propagateEquality(SwitchCond, Case.getCaseValue(), E); } } return Changed; @@ -2942,8 +2863,6 @@ bool GVNPass::processBlock(BasicBlock *BB) { if (DeadBlocks.count(BB)) return false; - // Clearing map before every BB because it can be used only for single BB. - ReplaceOperandsWithMap.clear(); bool ChangedFunction = false; // Since we may not have visited the input blocks of the phis, we can't @@ -2955,11 +2874,8 @@ bool GVNPass::processBlock(BasicBlock *BB) { for (PHINode *PN : PHINodesToRemove) { removeInstruction(PN); } - for (Instruction &Inst : make_early_inc_range(*BB)) { - if (!ReplaceOperandsWithMap.empty()) - ChangedFunction |= replaceOperandsForInBlockEquality(&Inst); + for (Instruction &Inst : make_early_inc_range(*BB)) ChangedFunction |= processInstruction(&Inst); - } return ChangedFunction; } diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp index 3c1a8ba..80aa98d 100644 --- a/llvm/lib/Transforms/Scalar/NewGVN.cpp +++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -434,10 +434,6 @@ private: int StoreCount = 0; }; -} // end anonymous namespace - -namespace llvm { - struct ExactEqualsExpression { const Expression &E; @@ -449,8 +445,9 @@ struct ExactEqualsExpression { return E.exactlyEquals(Other); } }; +} // end anonymous namespace -template <> struct DenseMapInfo<const Expression *> { +template <> struct llvm::DenseMapInfo<const Expression *> { static const Expression *getEmptyKey() { auto Val = static_cast<uintptr_t>(-1); Val <<= PointerLikeTypeTraits<const Expression *>::NumLowBitsAvailable; @@ -493,8 +490,6 @@ template <> struct DenseMapInfo<const Expression *> { } }; -} // end namespace llvm - namespace { class NewGVN { diff --git a/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp b/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp index 2190dcd..a87822c 100644 --- a/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp +++ b/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp @@ -84,10 +84,6 @@ public: bool run(); }; -} // anonymous namespace - -namespace llvm { - struct FrozenIndPHIInfo { // A freeze instruction that uses an induction phi FreezeInst *FI = nullptr; @@ -103,7 +99,9 @@ struct FrozenIndPHIInfo { bool operator==(const FrozenIndPHIInfo &Other) { return FI == Other.FI; } }; -template <> struct DenseMapInfo<FrozenIndPHIInfo> { +} // namespace + +template <> struct llvm::DenseMapInfo<FrozenIndPHIInfo> { static inline FrozenIndPHIInfo getEmptyKey() { return FrozenIndPHIInfo(DenseMapInfo<PHINode *>::getEmptyKey(), DenseMapInfo<BinaryOperator *>::getEmptyKey()); @@ -124,8 +122,6 @@ template <> struct DenseMapInfo<FrozenIndPHIInfo> { }; }; -} // end namespace llvm - // Given U = (value, user), replace value with freeze(value), and let // SCEV forget user. The inserted freeze is placed in the preheader. void CanonicalizeFreezeInLoopsImpl::InsertFreezeAndForgetFromSCEV(Use &U) { diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index b6ca52e..46f2903 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3246,6 +3246,13 @@ unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, return ::replaceDominatedUsesWith(From, To, Dominates); } +unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, + DominatorTree &DT, + const Instruction *I) { + auto Dominates = [&](const Use &U) { return DT.dominates(I, U); }; + return ::replaceDominatedUsesWith(From, To, Dominates); +} + unsigned llvm::replaceDominatedUsesWithIf( Value *From, Value *To, DominatorTree &DT, const BasicBlockEdge &Root, function_ref<bool(const Use &U, const Value *To)> ShouldReplace) { @@ -3264,6 +3271,15 @@ unsigned llvm::replaceDominatedUsesWithIf( return ::replaceDominatedUsesWith(From, To, DominatesAndShouldReplace); } +unsigned llvm::replaceDominatedUsesWithIf( + Value *From, Value *To, DominatorTree &DT, const Instruction *I, + function_ref<bool(const Use &U, const Value *To)> ShouldReplace) { + auto DominatesAndShouldReplace = [&](const Use &U) { + return DT.dominates(I, U) && ShouldReplace(U, To); + }; + return ::replaceDominatedUsesWith(From, To, DominatesAndShouldReplace); +} + bool llvm::callsGCLeafFunction(const CallBase *Call, const TargetLibraryInfo &TLI) { // Check if the function is specifically marked as a gc leaf function. diff --git a/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/llvm/lib/Transforms/Utils/LowerInvoke.cpp index ff2ab3c..cecb662 100644 --- a/llvm/lib/Transforms/Utils/LowerInvoke.cpp +++ b/llvm/lib/Transforms/Utils/LowerInvoke.cpp @@ -27,15 +27,15 @@ using namespace llvm; STATISTIC(NumInvokes, "Number of invokes replaced"); namespace { - class LowerInvokeLegacyPass : public FunctionPass { - public: - static char ID; // Pass identification, replacement for typeid - explicit LowerInvokeLegacyPass() : FunctionPass(ID) { - initializeLowerInvokeLegacyPassPass(*PassRegistry::getPassRegistry()); - } - bool runOnFunction(Function &F) override; - }; -} +class LowerInvokeLegacyPass : public FunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + explicit LowerInvokeLegacyPass() : FunctionPass(ID) { + initializeLowerInvokeLegacyPassPass(*PassRegistry::getPassRegistry()); + } + bool runOnFunction(Function &F) override; +}; +} // namespace char LowerInvokeLegacyPass::ID = 0; INITIALIZE_PASS(LowerInvokeLegacyPass, "lowerinvoke", @@ -78,11 +78,12 @@ bool LowerInvokeLegacyPass::runOnFunction(Function &F) { return runImpl(F); } -namespace llvm { -char &LowerInvokePassID = LowerInvokeLegacyPass::ID; +char &llvm::LowerInvokePassID = LowerInvokeLegacyPass::ID; // Public Interface To the LowerInvoke pass. -FunctionPass *createLowerInvokePass() { return new LowerInvokeLegacyPass(); } +FunctionPass *llvm::createLowerInvokePass() { + return new LowerInvokeLegacyPass(); +} PreservedAnalyses LowerInvokePass::run(Function &F, FunctionAnalysisManager &AM) { @@ -92,4 +93,3 @@ PreservedAnalyses LowerInvokePass::run(Function &F, return PreservedAnalyses::none(); } -} diff --git a/llvm/lib/Transforms/Utils/MisExpect.cpp b/llvm/lib/Transforms/Utils/MisExpect.cpp index ca7e09d..1585e9e 100644 --- a/llvm/lib/Transforms/Utils/MisExpect.cpp +++ b/llvm/lib/Transforms/Utils/MisExpect.cpp @@ -48,8 +48,6 @@ using namespace llvm; using namespace misexpect; -namespace llvm { - // Command line option to enable/disable the warning when profile data suggests // a mismatch with the use of the llvm.expect intrinsic static cl::opt<bool> PGOWarnMisExpect( @@ -63,22 +61,18 @@ static cl::opt<uint32_t> MisExpectTolerance( cl::desc("Prevents emitting diagnostics when profile counts are " "within N% of the threshold..")); -} // namespace llvm - -namespace { - -bool isMisExpectDiagEnabled(LLVMContext &Ctx) { +static bool isMisExpectDiagEnabled(const LLVMContext &Ctx) { return PGOWarnMisExpect || Ctx.getMisExpectWarningRequested(); } -uint32_t getMisExpectTolerance(LLVMContext &Ctx) { +static uint32_t getMisExpectTolerance(const LLVMContext &Ctx) { return std::max(static_cast<uint32_t>(MisExpectTolerance), Ctx.getDiagnosticsMisExpectTolerance()); } -Instruction *getInstCondition(Instruction *I) { +static const Instruction *getInstCondition(const Instruction *I) { assert(I != nullptr && "MisExpect target Instruction cannot be nullptr"); - Instruction *Ret = nullptr; + const Instruction *Ret = nullptr; if (auto *B = dyn_cast<BranchInst>(I)) { Ret = dyn_cast<Instruction>(B->getCondition()); } @@ -97,8 +91,8 @@ Instruction *getInstCondition(Instruction *I) { return Ret ? Ret : I; } -void emitMisexpectDiagnostic(Instruction *I, LLVMContext &Ctx, - uint64_t ProfCount, uint64_t TotalCount) { +static void emitMisexpectDiagnostic(const Instruction *I, LLVMContext &Ctx, + uint64_t ProfCount, uint64_t TotalCount) { double PercentageCorrect = (double)ProfCount / TotalCount; auto PerString = formatv("{0:P} ({1} / {2})", PercentageCorrect, ProfCount, TotalCount); @@ -106,20 +100,16 @@ void emitMisexpectDiagnostic(Instruction *I, LLVMContext &Ctx, "Potential performance regression from use of the llvm.expect intrinsic: " "Annotation was correct on {0} of profiled executions.", PerString); - Instruction *Cond = getInstCondition(I); + const Instruction *Cond = getInstCondition(I); if (isMisExpectDiagEnabled(Ctx)) Ctx.diagnose(DiagnosticInfoMisExpect(Cond, Twine(PerString))); OptimizationRemarkEmitter ORE(I->getParent()->getParent()); ORE.emit(OptimizationRemark(DEBUG_TYPE, "misexpect", Cond) << RemStr.str()); } -} // namespace - -namespace llvm { -namespace misexpect { - -void verifyMisExpect(Instruction &I, ArrayRef<uint32_t> RealWeights, - ArrayRef<uint32_t> ExpectedWeights) { +void misexpect::verifyMisExpect(const Instruction &I, + ArrayRef<uint32_t> RealWeights, + ArrayRef<uint32_t> ExpectedWeights) { // To determine if we emit a diagnostic, we need to compare the branch weights // from the profile to those added by the llvm.expect intrinsic. // So first, we extract the "likely" and "unlikely" weights from @@ -128,15 +118,13 @@ void verifyMisExpect(Instruction &I, ArrayRef<uint32_t> RealWeights, uint64_t LikelyBranchWeight = 0, UnlikelyBranchWeight = std::numeric_limits<uint32_t>::max(); size_t MaxIndex = 0; - for (size_t Idx = 0, End = ExpectedWeights.size(); Idx < End; Idx++) { - uint32_t V = ExpectedWeights[Idx]; + for (const auto &[Idx, V] : enumerate(ExpectedWeights)) { if (LikelyBranchWeight < V) { LikelyBranchWeight = V; MaxIndex = Idx; } - if (UnlikelyBranchWeight > V) { + if (UnlikelyBranchWeight > V) UnlikelyBranchWeight = V; - } } const uint64_t ProfiledWeight = RealWeights[MaxIndex]; @@ -161,7 +149,7 @@ void verifyMisExpect(Instruction &I, ArrayRef<uint32_t> RealWeights, uint64_t ScaledThreshold = LikelyProbablilty.scale(RealWeightsTotal); // clamp tolerance range to [0, 100) - auto Tolerance = getMisExpectTolerance(I.getContext()); + uint32_t Tolerance = getMisExpectTolerance(I.getContext()); Tolerance = std::clamp(Tolerance, 0u, 99u); // Allow users to relax checking by N% i.e., if they use a 5% tolerance, @@ -175,8 +163,8 @@ void verifyMisExpect(Instruction &I, ArrayRef<uint32_t> RealWeights, RealWeightsTotal); } -void checkBackendInstrumentation(Instruction &I, - const ArrayRef<uint32_t> RealWeights) { +void misexpect::checkBackendInstrumentation(const Instruction &I, + ArrayRef<uint32_t> RealWeights) { // Backend checking assumes any existing weight comes from an `llvm.expect` // intrinsic. However, SampleProfiling + ThinLTO add branch weights multiple // times, leading to an invalid assumption in our checking. Backend checks @@ -190,24 +178,19 @@ void checkBackendInstrumentation(Instruction &I, verifyMisExpect(I, RealWeights, ExpectedWeights); } -void checkFrontendInstrumentation(Instruction &I, - const ArrayRef<uint32_t> ExpectedWeights) { +void misexpect::checkFrontendInstrumentation( + const Instruction &I, ArrayRef<uint32_t> ExpectedWeights) { SmallVector<uint32_t> RealWeights; if (!extractBranchWeights(I, RealWeights)) return; verifyMisExpect(I, RealWeights, ExpectedWeights); } -void checkExpectAnnotations(Instruction &I, - const ArrayRef<uint32_t> ExistingWeights, - bool IsFrontend) { - if (IsFrontend) { +void misexpect::checkExpectAnnotations(const Instruction &I, + ArrayRef<uint32_t> ExistingWeights, + bool IsFrontend) { + if (IsFrontend) checkFrontendInstrumentation(I, ExistingWeights); - } else { + else checkBackendInstrumentation(I, ExistingWeights); - } } - -} // namespace misexpect -} // namespace llvm -#undef DEBUG_TYPE diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 155fcc5..d831c27 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -5959,7 +5959,11 @@ bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI, unsigned PreviousEdges = OtherCases->size(); if (OtherDest == SI->getDefaultDest()) ++PreviousEdges; - for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I) + unsigned E = PreviousEdges - 1; + // Remove all incoming values from OtherDest if OtherDest is unreachable. + if (NewBI->isUnconditional()) + ++E; + for (unsigned I = 0; I != E; ++I) cast<PHINode>(BBI)->removeIncomingValue(SI->getParent()); } @@ -7736,8 +7740,7 @@ struct SwitchSuccWrapper { DenseMap<PHINode *, SmallDenseMap<BasicBlock *, Value *, 8>> *PhiPredIVs; }; -namespace llvm { -template <> struct DenseMapInfo<const SwitchSuccWrapper *> { +template <> struct llvm::DenseMapInfo<const SwitchSuccWrapper *> { static const SwitchSuccWrapper *getEmptyKey() { return static_cast<SwitchSuccWrapper *>( DenseMapInfo<void *>::getEmptyKey()); @@ -7805,7 +7808,6 @@ template <> struct DenseMapInfo<const SwitchSuccWrapper *> { return true; } }; -} // namespace llvm bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI, DomTreeUpdater *DTU) { diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 3f16b03..e62d57e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5696,7 +5696,7 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) { Instruction *I = Worklist.pop_back_val(); for (auto &Op : I->operands()) if (auto *InstOp = dyn_cast<Instruction>(Op)) - if ((InstOp->getParent() == I->getParent()) && !isa<PHINode>(InstOp) && + if (TheLoop->contains(InstOp) && !isa<PHINode>(InstOp) && AddrDefs.insert(InstOp).second) Worklist.push_back(InstOp); } |