diff options
Diffstat (limited to 'llvm/lib/Transforms/Scalar')
-rw-r--r-- | llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp | 224 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/EarlyCSE.cpp | 20 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/GVN.cpp | 221 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/NewGVN.cpp | 9 |
4 files changed, 208 insertions, 266 deletions
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp index e448230..ff5f390 100644 --- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp @@ -61,6 +61,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/DomTreeUpdater.h" @@ -147,19 +148,16 @@ public: class DFAJumpThreading { public: - DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI, + DFAJumpThreading(AssumptionCache *AC, DomTreeUpdater *DTU, LoopInfo *LI, TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE) - : AC(AC), DT(DT), LI(LI), TTI(TTI), ORE(ORE) {} + : AC(AC), DTU(DTU), LI(LI), TTI(TTI), ORE(ORE) {} bool run(Function &F); bool LoopInfoBroken; private: void - unfoldSelectInstrs(DominatorTree *DT, - const SmallVector<SelectInstToUnfold, 4> &SelectInsts) { - // TODO: Have everything use a single lazy DTU - DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); + unfoldSelectInstrs(const SmallVector<SelectInstToUnfold, 4> &SelectInsts) { SmallVector<SelectInstToUnfold, 4> Stack(SelectInsts); while (!Stack.empty()) { @@ -167,7 +165,7 @@ private: std::vector<SelectInstToUnfold> NewSIsToUnfold; std::vector<BasicBlock *> NewBBs; - unfold(&DTU, LI, SIToUnfold, &NewSIsToUnfold, &NewBBs); + unfold(DTU, LI, SIToUnfold, &NewSIsToUnfold, &NewBBs); // Put newly discovered select instructions into the work list. llvm::append_range(Stack, NewSIsToUnfold); @@ -180,7 +178,7 @@ private: std::vector<BasicBlock *> *NewBBs); AssumptionCache *AC; - DominatorTree *DT; + DomTreeUpdater *DTU; LoopInfo *LI; TargetTransformInfo *TTI; OptimizationRemarkEmitter *ORE; @@ -382,16 +380,9 @@ typedef DenseMap<BasicBlock *, CloneList> DuplicateBlockMap; typedef MapVector<Instruction *, std::vector<Instruction *>> DefMap; inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) { - OS << "< "; - for (const BasicBlock *BB : Path) { - std::string BBName; - if (BB->hasName()) - raw_string_ostream(BBName) << BB->getName(); - else - raw_string_ostream(BBName) << BB; - OS << BBName << " "; - } - OS << ">"; + auto BBNames = llvm::map_range( + Path, [](const BasicBlock *BB) { return BB->getNameOrAsOperand(); }); + OS << "< " << llvm::join(BBNames, ", ") << " >"; return OS; } @@ -407,6 +398,10 @@ struct ThreadingPath { ExitVal = V->getValue(); IsExitValSet = true; } + void setExitValue(const APInt &V) { + ExitVal = V; + IsExitValSet = true; + } bool isExitValueSet() const { return IsExitValSet; } /// Determinator is the basic block that determines the next state of the DFA. @@ -423,7 +418,7 @@ struct ThreadingPath { } void print(raw_ostream &OS) const { - OS << Path << " [ " << ExitVal << ", " << DBB->getName() << " ]"; + OS << Path << " [ " << ExitVal << ", " << DBB->getNameOrAsOperand() << " ]"; } private: @@ -589,44 +584,8 @@ struct AllSwitchPaths { BasicBlock *getSwitchBlock() { return SwitchBlock; } void run() { - StateDefMap StateDef = getStateDefMap(); - if (StateDef.empty()) { - ORE->emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", - Switch) - << "Switch instruction is not predictable."; - }); - return; - } - - auto *SwitchPhi = cast<PHINode>(Switch->getOperand(0)); - auto *SwitchPhiDefBB = SwitchPhi->getParent(); - VisitedBlocks VB; - // Get paths from the determinator BBs to SwitchPhiDefBB - std::vector<ThreadingPath> PathsToPhiDef = - getPathsFromStateDefMap(StateDef, SwitchPhi, VB, MaxNumPaths); - if (SwitchPhiDefBB == SwitchBlock || PathsToPhiDef.empty()) { - TPaths = std::move(PathsToPhiDef); - return; - } - - assert(MaxNumPaths >= PathsToPhiDef.size() && !PathsToPhiDef.empty()); - auto PathsLimit = MaxNumPaths / PathsToPhiDef.size(); - // Find and append paths from SwitchPhiDefBB to SwitchBlock. - PathsType PathsToSwitchBB = - paths(SwitchPhiDefBB, SwitchBlock, VB, /* PathDepth = */ 1, PathsLimit); - if (PathsToSwitchBB.empty()) - return; - - std::vector<ThreadingPath> TempList; - for (const ThreadingPath &Path : PathsToPhiDef) { - for (const PathType &PathToSw : PathsToSwitchBB) { - ThreadingPath PathCopy(Path); - PathCopy.appendExcludingFirst(PathToSw); - TempList.push_back(PathCopy); - } - } - TPaths = std::move(TempList); + findTPaths(); + unifyTPaths(); } private: @@ -818,21 +777,98 @@ private: return Res; } + // Find all threadable paths. + void findTPaths() { + StateDefMap StateDef = getStateDefMap(); + if (StateDef.empty()) { + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", + Switch) + << "Switch instruction is not predictable."; + }); + return; + } + + auto *SwitchPhi = cast<PHINode>(Switch->getOperand(0)); + auto *SwitchPhiDefBB = SwitchPhi->getParent(); + VisitedBlocks VB; + // Get paths from the determinator BBs to SwitchPhiDefBB + std::vector<ThreadingPath> PathsToPhiDef = + getPathsFromStateDefMap(StateDef, SwitchPhi, VB, MaxNumPaths); + if (SwitchPhiDefBB == SwitchBlock || PathsToPhiDef.empty()) { + TPaths = std::move(PathsToPhiDef); + return; + } + + assert(MaxNumPaths >= PathsToPhiDef.size() && !PathsToPhiDef.empty()); + auto PathsLimit = MaxNumPaths / PathsToPhiDef.size(); + // Find and append paths from SwitchPhiDefBB to SwitchBlock. + PathsType PathsToSwitchBB = + paths(SwitchPhiDefBB, SwitchBlock, VB, /* PathDepth = */ 1, PathsLimit); + if (PathsToSwitchBB.empty()) + return; + + std::vector<ThreadingPath> TempList; + for (const ThreadingPath &Path : PathsToPhiDef) { + for (const PathType &PathToSw : PathsToSwitchBB) { + ThreadingPath PathCopy(Path); + PathCopy.appendExcludingFirst(PathToSw); + TempList.push_back(PathCopy); + } + } + TPaths = std::move(TempList); + } + + /// Fast helper to get the successor corresponding to a particular case value + /// for a switch statement. + BasicBlock *getNextCaseSuccessor(const APInt &NextState) { + // Precompute the value => successor mapping + if (CaseValToDest.empty()) { + for (auto Case : Switch->cases()) { + APInt CaseVal = Case.getCaseValue()->getValue(); + CaseValToDest[CaseVal] = Case.getCaseSuccessor(); + } + } + + auto SuccIt = CaseValToDest.find(NextState); + return SuccIt == CaseValToDest.end() ? Switch->getDefaultDest() + : SuccIt->second; + } + + // Two states are equivalent if they have the same switch destination. + // Unify the states in different threading path if the states are equivalent. + void unifyTPaths() { + SmallDenseMap<BasicBlock *, APInt> DestToState; + for (ThreadingPath &Path : TPaths) { + APInt NextState = Path.getExitValue(); + BasicBlock *Dest = getNextCaseSuccessor(NextState); + auto [StateIt, Inserted] = DestToState.try_emplace(Dest, NextState); + if (Inserted) + continue; + if (NextState != StateIt->second) { + LLVM_DEBUG(dbgs() << "Next state in " << Path << " is equivalent to " + << StateIt->second << "\n"); + Path.setExitValue(StateIt->second); + } + } + } + unsigned NumVisited = 0; SwitchInst *Switch; BasicBlock *SwitchBlock; OptimizationRemarkEmitter *ORE; std::vector<ThreadingPath> TPaths; + DenseMap<APInt, BasicBlock *> CaseValToDest; LoopInfo *LI; Loop *SwitchOuterLoop; }; struct TransformDFA { - TransformDFA(AllSwitchPaths *SwitchPaths, DominatorTree *DT, + TransformDFA(AllSwitchPaths *SwitchPaths, DomTreeUpdater *DTU, AssumptionCache *AC, TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, SmallPtrSet<const Value *, 32> EphValues) - : SwitchPaths(SwitchPaths), DT(DT), AC(AC), TTI(TTI), ORE(ORE), + : SwitchPaths(SwitchPaths), DTU(DTU), AC(AC), TTI(TTI), ORE(ORE), EphValues(EphValues) {} bool run() { @@ -1008,19 +1044,16 @@ private: SmallPtrSet<BasicBlock *, 16> BlocksToClean; BlocksToClean.insert_range(successors(SwitchBlock)); - { - DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy); - for (const ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { - createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, &DTU); - NumPaths++; - } - - // After all paths are cloned, now update the last successor of the cloned - // path so it skips over the switch statement - for (const ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) - updateLastSuccessor(TPath, DuplicateMap, &DTU); + for (const ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { + createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, DTU); + NumPaths++; } + // After all paths are cloned, now update the last successor of the cloned + // path so it skips over the switch statement + for (const ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) + updateLastSuccessor(TPath, DuplicateMap, DTU); + // For each instruction that was cloned and used outside, update its uses updateSSA(NewDefs); @@ -1124,7 +1157,25 @@ private: } // SSAUpdater handles phi placement and renaming uses with the appropriate // value. - SSAUpdate.RewriteAllUses(DT); + SSAUpdate.RewriteAllUses(&DTU->getDomTree()); + } + + /// Helper to get the successor corresponding to a particular case value for + /// a switch statement. + /// TODO: Unify it with SwitchPaths->getNextCaseSuccessor(SwitchInst *Switch) + /// by updating cached value => successor mapping during threading. + static BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, + const APInt &NextState) { + BasicBlock *NextCase = nullptr; + for (auto Case : Switch->cases()) { + if (Case.getCaseValue()->getValue() == NextState) { + NextCase = Case.getCaseSuccessor(); + break; + } + } + if (!NextCase) + NextCase = Switch->getDefaultDest(); + return NextCase; } /// Clones a basic block, and adds it to the CFG. @@ -1341,28 +1392,13 @@ private: return It != ClonedBBs.end() ? (*It).BB : nullptr; } - /// Helper to get the successor corresponding to a particular case value for - /// a switch statement. - BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, const APInt &NextState) { - BasicBlock *NextCase = nullptr; - for (auto Case : Switch->cases()) { - if (Case.getCaseValue()->getValue() == NextState) { - NextCase = Case.getCaseSuccessor(); - break; - } - } - if (!NextCase) - NextCase = Switch->getDefaultDest(); - return NextCase; - } - /// Returns true if IncomingBB is a predecessor of BB. bool isPredecessor(BasicBlock *BB, BasicBlock *IncomingBB) { return llvm::is_contained(predecessors(BB), IncomingBB); } AllSwitchPaths *SwitchPaths; - DominatorTree *DT; + DomTreeUpdater *DTU; AssumptionCache *AC; TargetTransformInfo *TTI; OptimizationRemarkEmitter *ORE; @@ -1405,7 +1441,7 @@ bool DFAJumpThreading::run(Function &F) { << "candidate for jump threading\n"); LLVM_DEBUG(SI->dump()); - unfoldSelectInstrs(DT, Switch.getSelectInsts()); + unfoldSelectInstrs(Switch.getSelectInsts()); if (!Switch.getSelectInsts().empty()) MadeChanges = true; @@ -1427,7 +1463,7 @@ bool DFAJumpThreading::run(Function &F) { } #ifdef NDEBUG - LI->verify(*DT); + LI->verify(DTU->getDomTree()); #endif SmallPtrSet<const Value *, 32> EphValues; @@ -1435,13 +1471,15 @@ bool DFAJumpThreading::run(Function &F) { CodeMetrics::collectEphemeralValues(&F, AC, EphValues); for (AllSwitchPaths SwitchPaths : ThreadableLoops) { - TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues); + TransformDFA Transform(&SwitchPaths, DTU, AC, TTI, ORE, EphValues); if (Transform.run()) MadeChanges = LoopInfoBroken = true; } + DTU->flush(); + #ifdef EXPENSIVE_CHECKS - assert(DT->verify(DominatorTree::VerificationLevel::Full)); + assert(DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full)); verifyFunction(F, &dbgs()); #endif @@ -1456,7 +1494,9 @@ PreservedAnalyses DFAJumpThreadingPass::run(Function &F, LoopInfo &LI = AM.getResult<LoopAnalysis>(F); TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F); OptimizationRemarkEmitter ORE(&F); - DFAJumpThreading ThreadImpl(&AC, &DT, &LI, &TTI, &ORE); + + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); + DFAJumpThreading ThreadImpl(&AC, &DTU, &LI, &TTI, &ORE); if (!ThreadImpl.run(F)) return PreservedAnalyses::all(); diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index 0f8cc6c..2afa7b7 100644 --- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -108,7 +108,7 @@ struct SimpleValue { // of instruction handled below (UnaryOperator, etc.). if (CallInst *CI = dyn_cast<CallInst>(Inst)) { if (Function *F = CI->getCalledFunction()) { - switch ((Intrinsic::ID)F->getIntrinsicID()) { + switch (F->getIntrinsicID()) { case Intrinsic::experimental_constrained_fadd: case Intrinsic::experimental_constrained_fsub: case Intrinsic::experimental_constrained_fmul: @@ -154,9 +154,7 @@ struct SimpleValue { } // end anonymous namespace -namespace llvm { - -template <> struct DenseMapInfo<SimpleValue> { +template <> struct llvm::DenseMapInfo<SimpleValue> { static inline SimpleValue getEmptyKey() { return DenseMapInfo<Instruction *>::getEmptyKey(); } @@ -169,8 +167,6 @@ template <> struct DenseMapInfo<SimpleValue> { static bool isEqual(SimpleValue LHS, SimpleValue RHS); }; -} // end namespace llvm - /// Match a 'select' including an optional 'not's of the condition. static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A, Value *&B, @@ -509,9 +505,7 @@ struct CallValue { } // end anonymous namespace -namespace llvm { - -template <> struct DenseMapInfo<CallValue> { +template <> struct llvm::DenseMapInfo<CallValue> { static inline CallValue getEmptyKey() { return DenseMapInfo<Instruction *>::getEmptyKey(); } @@ -524,8 +518,6 @@ template <> struct DenseMapInfo<CallValue> { static bool isEqual(CallValue LHS, CallValue RHS); }; -} // end namespace llvm - unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) { Instruction *Inst = Val.Inst; @@ -580,9 +572,7 @@ struct GEPValue { } // namespace -namespace llvm { - -template <> struct DenseMapInfo<GEPValue> { +template <> struct llvm::DenseMapInfo<GEPValue> { static inline GEPValue getEmptyKey() { return DenseMapInfo<Instruction *>::getEmptyKey(); } @@ -595,8 +585,6 @@ template <> struct DenseMapInfo<GEPValue> { static bool isEqual(const GEPValue &LHS, const GEPValue &RHS); }; -} // end namespace llvm - unsigned DenseMapInfo<GEPValue>::getHashValue(const GEPValue &Val) { auto *GEP = cast<GetElementPtrInst>(Val.Inst); if (Val.ConstantOffset.has_value()) diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index 638952a..42db424 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -170,9 +170,7 @@ struct llvm::GVNPass::Expression { } }; -namespace llvm { - -template <> struct DenseMapInfo<GVNPass::Expression> { +template <> struct llvm::DenseMapInfo<GVNPass::Expression> { static inline GVNPass::Expression getEmptyKey() { return ~0U; } static inline GVNPass::Expression getTombstoneKey() { return ~1U; } @@ -188,8 +186,6 @@ template <> struct DenseMapInfo<GVNPass::Expression> { } }; -} // end namespace llvm - /// Represents a particular available value that we know how to materialize. /// Materialization of an AvailableValue never fails. An AvailableValue is /// implicitly associated with a rematerialization point which is the @@ -2084,13 +2080,6 @@ bool GVNPass::processNonLocalLoad(LoadInst *Load) { return Changed; } -static bool hasUsersIn(Value *V, BasicBlock *BB) { - return any_of(V->users(), [BB](User *U) { - auto *I = dyn_cast<Instruction>(U); - return I && I->getParent() == BB; - }); -} - bool GVNPass::processAssumeIntrinsic(AssumeInst *IntrinsicI) { Value *V = IntrinsicI->getArgOperand(0); @@ -2149,85 +2138,7 @@ bool GVNPass::processAssumeIntrinsic(AssumeInst *IntrinsicI) { } Constant *True = ConstantInt::getTrue(V->getContext()); - bool Changed = false; - - for (BasicBlock *Successor : successors(IntrinsicI->getParent())) { - BasicBlockEdge Edge(IntrinsicI->getParent(), Successor); - - // This property is only true in dominated successors, propagateEquality - // will check dominance for us. - Changed |= propagateEquality(V, True, Edge, false); - } - - // We can replace assume value with true, which covers cases like this: - // call void @llvm.assume(i1 %cmp) - // br i1 %cmp, label %bb1, label %bb2 ; will change %cmp to true - ReplaceOperandsWithMap[V] = True; - - // Similarly, after assume(!NotV) we know that NotV == false. - Value *NotV; - if (match(V, m_Not(m_Value(NotV)))) - ReplaceOperandsWithMap[NotV] = ConstantInt::getFalse(V->getContext()); - - // If we find an equality fact, canonicalize all dominated uses in this block - // to one of the two values. We heuristically choice the "oldest" of the - // two where age is determined by value number. (Note that propagateEquality - // above handles the cross block case.) - // - // Key case to cover are: - // 1) - // %cmp = fcmp oeq float 3.000000e+00, %0 ; const on lhs could happen - // call void @llvm.assume(i1 %cmp) - // ret float %0 ; will change it to ret float 3.000000e+00 - // 2) - // %load = load float, float* %addr - // %cmp = fcmp oeq float %load, %0 - // call void @llvm.assume(i1 %cmp) - // ret float %load ; will change it to ret float %0 - if (auto *CmpI = dyn_cast<CmpInst>(V)) { - if (CmpI->isEquivalence()) { - Value *CmpLHS = CmpI->getOperand(0); - Value *CmpRHS = CmpI->getOperand(1); - // Heuristically pick the better replacement -- the choice of heuristic - // isn't terribly important here, but the fact we canonicalize on some - // replacement is for exposing other simplifications. - // TODO: pull this out as a helper function and reuse w/ existing - // (slightly different) logic. - if (isa<Constant>(CmpLHS) && !isa<Constant>(CmpRHS)) - std::swap(CmpLHS, CmpRHS); - if (!isa<Instruction>(CmpLHS) && isa<Instruction>(CmpRHS)) - std::swap(CmpLHS, CmpRHS); - if ((isa<Argument>(CmpLHS) && isa<Argument>(CmpRHS)) || - (isa<Instruction>(CmpLHS) && isa<Instruction>(CmpRHS))) { - // Move the 'oldest' value to the right-hand side, using the value - // number as a proxy for age. - uint32_t LVN = VN.lookupOrAdd(CmpLHS); - uint32_t RVN = VN.lookupOrAdd(CmpRHS); - if (LVN < RVN) - std::swap(CmpLHS, CmpRHS); - } - - // Handle degenerate case where we either haven't pruned a dead path or a - // removed a trivial assume yet. - if (isa<Constant>(CmpLHS) && isa<Constant>(CmpRHS)) - return Changed; - - LLVM_DEBUG(dbgs() << "Replacing dominated uses of " - << *CmpLHS << " with " - << *CmpRHS << " in block " - << IntrinsicI->getParent()->getName() << "\n"); - - // Setup the replacement map - this handles uses within the same block. - if (hasUsersIn(CmpLHS, IntrinsicI->getParent())) - ReplaceOperandsWithMap[CmpLHS] = CmpRHS; - - // NOTE: The non-block local cases are handled by the call to - // propagateEquality above; this block is just about handling the block - // local cases. TODO: There's a bunch of logic in propagateEqualiy which - // isn't duplicated for the block local case, can we share it somehow? - } - } - return Changed; + return propagateEquality(V, True, IntrinsicI); } static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) { @@ -2245,6 +2156,9 @@ bool GVNPass::processLoad(LoadInst *L) { if (!L->isUnordered()) return false; + if (L->getType()->isTokenLikeTy()) + return false; + if (L->use_empty()) { salvageAndRemoveInstruction(L); return true; @@ -2526,39 +2440,28 @@ void GVNPass::assignBlockRPONumber(Function &F) { InvalidBlockRPONumbers = false; } -bool GVNPass::replaceOperandsForInBlockEquality(Instruction *Instr) const { - bool Changed = false; - for (unsigned OpNum = 0; OpNum < Instr->getNumOperands(); ++OpNum) { - Use &Operand = Instr->getOperandUse(OpNum); - auto It = ReplaceOperandsWithMap.find(Operand.get()); - if (It != ReplaceOperandsWithMap.end()) { - const DataLayout &DL = Instr->getDataLayout(); - if (!canReplacePointersInUseIfEqual(Operand, It->second, DL)) - continue; - - LLVM_DEBUG(dbgs() << "GVN replacing: " << *Operand << " with " - << *It->second << " in instruction " << *Instr << '\n'); - Instr->setOperand(OpNum, It->second); - Changed = true; - } - } - return Changed; -} - -/// The given values are known to be equal in every block +/// The given values are known to be equal in every use /// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with /// 'RHS' everywhere in the scope. Returns whether a change was made. -/// If DominatesByEdge is false, then it means that we will propagate the RHS -/// value starting from the end of Root.Start. -bool GVNPass::propagateEquality(Value *LHS, Value *RHS, - const BasicBlockEdge &Root, - bool DominatesByEdge) { +/// The Root may either be a basic block edge (for conditions) or an +/// instruction (for assumes). +bool GVNPass::propagateEquality( + Value *LHS, Value *RHS, + const std::variant<BasicBlockEdge, Instruction *> &Root) { SmallVector<std::pair<Value*, Value*>, 4> Worklist; Worklist.push_back(std::make_pair(LHS, RHS)); bool Changed = false; - // For speed, compute a conservative fast approximation to - // DT->dominates(Root, Root.getEnd()); - const bool RootDominatesEnd = isOnlyReachableViaThisEdge(Root, DT); + SmallVector<const BasicBlock *> DominatedBlocks; + if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(&Root)) { + // For speed, compute a conservative fast approximation to + // DT->dominates(Root, Root.getEnd()); + if (isOnlyReachableViaThisEdge(*Edge, DT)) + DominatedBlocks.push_back(Edge->getEnd()); + } else { + Instruction *I = std::get<Instruction *>(Root); + for (const auto *Node : DT->getNode(I->getParent())->children()) + DominatedBlocks.push_back(Node->getBlock()); + } while (!Worklist.empty()) { std::pair<Value*, Value*> Item = Worklist.pop_back_val(); @@ -2606,9 +2509,9 @@ bool GVNPass::propagateEquality(Value *LHS, Value *RHS, // using the leader table is about compiling faster, not optimizing better). // The leader table only tracks basic blocks, not edges. Only add to if we // have the simple case where the edge dominates the end. - if (RootDominatesEnd && !isa<Instruction>(RHS) && - canReplacePointersIfEqual(LHS, RHS, DL)) - LeaderTable.insert(LVN, RHS, Root.getEnd()); + if (!isa<Instruction>(RHS) && canReplacePointersIfEqual(LHS, RHS, DL)) + for (const BasicBlock *BB : DominatedBlocks) + LeaderTable.insert(LVN, RHS, BB); // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As // LHS always has at least one use that is not dominated by Root, this will @@ -2618,12 +2521,14 @@ bool GVNPass::propagateEquality(Value *LHS, Value *RHS, auto CanReplacePointersCallBack = [&DL](const Use &U, const Value *To) { return canReplacePointersInUseIfEqual(U, To, DL); }; - unsigned NumReplacements = - DominatesByEdge - ? replaceDominatedUsesWithIf(LHS, RHS, *DT, Root, - CanReplacePointersCallBack) - : replaceDominatedUsesWithIf(LHS, RHS, *DT, Root.getStart(), - CanReplacePointersCallBack); + unsigned NumReplacements; + if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(&Root)) + NumReplacements = replaceDominatedUsesWithIf( + LHS, RHS, *DT, *Edge, CanReplacePointersCallBack); + else + NumReplacements = replaceDominatedUsesWithIf( + LHS, RHS, *DT, std::get<Instruction *>(Root), + CanReplacePointersCallBack); if (NumReplacements > 0) { Changed = true; @@ -2682,26 +2587,45 @@ bool GVNPass::propagateEquality(Value *LHS, Value *RHS, // If the number we were assigned was brand new then there is no point in // looking for an instruction realizing it: there cannot be one! if (Num < NextNum) { - Value *NotCmp = findLeader(Root.getEnd(), Num); - if (NotCmp && isa<Instruction>(NotCmp)) { - unsigned NumReplacements = - DominatesByEdge - ? replaceDominatedUsesWith(NotCmp, NotVal, *DT, Root) - : replaceDominatedUsesWith(NotCmp, NotVal, *DT, - Root.getStart()); - Changed |= NumReplacements > 0; - NumGVNEqProp += NumReplacements; - // Cached information for anything that uses NotCmp will be invalid. - if (MD) - MD->invalidateCachedPointerInfo(NotCmp); + for (const auto &Entry : LeaderTable.getLeaders(Num)) { + // Only look at leaders that either dominate the start of the edge, + // or are dominated by the end. This check is not necessary for + // correctness, it only discards cases for which the following + // use replacement will not work anyway. + if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(&Root)) { + if (!DT->dominates(Entry.BB, Edge->getStart()) && + !DT->dominates(Edge->getEnd(), Entry.BB)) + continue; + } else { + auto *InstBB = std::get<Instruction *>(Root)->getParent(); + if (!DT->dominates(Entry.BB, InstBB) && + !DT->dominates(InstBB, Entry.BB)) + continue; + } + + Value *NotCmp = Entry.Val; + if (NotCmp && isa<Instruction>(NotCmp)) { + unsigned NumReplacements; + if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(&Root)) + NumReplacements = + replaceDominatedUsesWith(NotCmp, NotVal, *DT, *Edge); + else + NumReplacements = replaceDominatedUsesWith( + NotCmp, NotVal, *DT, std::get<Instruction *>(Root)); + Changed |= NumReplacements > 0; + NumGVNEqProp += NumReplacements; + // Cached information for anything that uses NotCmp will be invalid. + if (MD) + MD->invalidateCachedPointerInfo(NotCmp); + } } } // Ensure that any instruction in scope that gets the "A < B" value number // is replaced with false. // The leader table only tracks basic blocks, not edges. Only add to if we // have the simple case where the edge dominates the end. - if (RootDominatesEnd) - LeaderTable.insert(Num, NotVal, Root.getEnd()); + for (const BasicBlock *BB : DominatedBlocks) + LeaderTable.insert(Num, NotVal, BB); continue; } @@ -2789,11 +2713,11 @@ bool GVNPass::processInstruction(Instruction *I) { Value *TrueVal = ConstantInt::getTrue(TrueSucc->getContext()); BasicBlockEdge TrueE(Parent, TrueSucc); - Changed |= propagateEquality(BranchCond, TrueVal, TrueE, true); + Changed |= propagateEquality(BranchCond, TrueVal, TrueE); Value *FalseVal = ConstantInt::getFalse(FalseSucc->getContext()); BasicBlockEdge FalseE(Parent, FalseSucc); - Changed |= propagateEquality(BranchCond, FalseVal, FalseE, true); + Changed |= propagateEquality(BranchCond, FalseVal, FalseE); return Changed; } @@ -2814,7 +2738,7 @@ bool GVNPass::processInstruction(Instruction *I) { // If there is only a single edge, propagate the case value into it. if (SwitchEdges.lookup(Dst) == 1) { BasicBlockEdge E(Parent, Dst); - Changed |= propagateEquality(SwitchCond, Case.getCaseValue(), E, true); + Changed |= propagateEquality(SwitchCond, Case.getCaseValue(), E); } } return Changed; @@ -2942,8 +2866,6 @@ bool GVNPass::processBlock(BasicBlock *BB) { if (DeadBlocks.count(BB)) return false; - // Clearing map before every BB because it can be used only for single BB. - ReplaceOperandsWithMap.clear(); bool ChangedFunction = false; // Since we may not have visited the input blocks of the phis, we can't @@ -2955,11 +2877,8 @@ bool GVNPass::processBlock(BasicBlock *BB) { for (PHINode *PN : PHINodesToRemove) { removeInstruction(PN); } - for (Instruction &Inst : make_early_inc_range(*BB)) { - if (!ReplaceOperandsWithMap.empty()) - ChangedFunction |= replaceOperandsForInBlockEquality(&Inst); + for (Instruction &Inst : make_early_inc_range(*BB)) ChangedFunction |= processInstruction(&Inst); - } return ChangedFunction; } diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp index 3c1a8ba..80aa98d 100644 --- a/llvm/lib/Transforms/Scalar/NewGVN.cpp +++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -434,10 +434,6 @@ private: int StoreCount = 0; }; -} // end anonymous namespace - -namespace llvm { - struct ExactEqualsExpression { const Expression &E; @@ -449,8 +445,9 @@ struct ExactEqualsExpression { return E.exactlyEquals(Other); } }; +} // end anonymous namespace -template <> struct DenseMapInfo<const Expression *> { +template <> struct llvm::DenseMapInfo<const Expression *> { static const Expression *getEmptyKey() { auto Val = static_cast<uintptr_t>(-1); Val <<= PointerLikeTypeTraits<const Expression *>::NumLowBitsAvailable; @@ -493,8 +490,6 @@ template <> struct DenseMapInfo<const Expression *> { } }; -} // end namespace llvm - namespace { class NewGVN { |