diff options
Diffstat (limited to 'llvm/lib/Transforms/Scalar')
-rw-r--r-- | llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp | 67 |
1 files changed, 36 insertions, 31 deletions
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp index 41a6c80..ae34b4e 100644 --- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp @@ -89,6 +89,7 @@ STATISTIC(NumTransforms, "Number of transformations done"); STATISTIC(NumCloned, "Number of blocks cloned"); STATISTIC(NumPaths, "Number of individual paths threaded"); +namespace llvm { static cl::opt<bool> ClViewCfgBefore("dfa-jump-view-cfg-before", cl::desc("View the CFG before DFA Jump Threading"), @@ -119,6 +120,7 @@ static cl::opt<unsigned> CostThreshold("dfa-cost-threshold", cl::desc("Maximum cost accepted for the transformation"), cl::Hidden, cl::init(50)); +} // namespace llvm static cl::opt<double> MaxClonedRate( "dfa-max-cloned-rate", @@ -127,7 +129,6 @@ static cl::opt<double> MaxClonedRate( cl::Hidden, cl::init(7.5)); namespace { - class SelectInstToUnfold { SelectInst *SI; PHINode *SIUse; @@ -141,10 +142,6 @@ public: explicit operator bool() const { return SI && SIUse; } }; -void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold, - std::vector<SelectInstToUnfold> *NewSIsToUnfold, - std::vector<BasicBlock *> *NewBBs); - class DFAJumpThreading { public: DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI, @@ -158,7 +155,8 @@ private: void unfoldSelectInstrs(DominatorTree *DT, const SmallVector<SelectInstToUnfold, 4> &SelectInsts) { - DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); + // TODO: Have everything use a single lazy DTU + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); SmallVector<SelectInstToUnfold, 4> Stack(SelectInsts); while (!Stack.empty()) { @@ -173,16 +171,18 @@ private: } } + static void unfold(DomTreeUpdater *DTU, LoopInfo *LI, + SelectInstToUnfold SIToUnfold, + std::vector<SelectInstToUnfold> *NewSIsToUnfold, + std::vector<BasicBlock *> *NewBBs); + AssumptionCache *AC; DominatorTree *DT; LoopInfo *LI; TargetTransformInfo *TTI; OptimizationRemarkEmitter *ORE; }; - -} // end anonymous namespace - -namespace { +} // namespace /// Unfold the select instruction held in \p SIToUnfold by replacing it with /// control flow. @@ -191,9 +191,10 @@ namespace { /// created basic blocks into \p NewBBs. /// /// TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible. -void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold, - std::vector<SelectInstToUnfold> *NewSIsToUnfold, - std::vector<BasicBlock *> *NewBBs) { +void DFAJumpThreading::unfold(DomTreeUpdater *DTU, LoopInfo *LI, + SelectInstToUnfold SIToUnfold, + std::vector<SelectInstToUnfold> *NewSIsToUnfold, + std::vector<BasicBlock *> *NewBBs) { SelectInst *SI = SIToUnfold.getInst(); PHINode *SIUse = SIToUnfold.getUse(); assert(SI->hasOneUse()); @@ -348,10 +349,12 @@ void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold, SI->eraseFromParent(); } +namespace { struct ClonedBlock { BasicBlock *BB; APInt State; ///< \p State corresponds to the next value of a switch stmnt. }; +} // namespace typedef std::deque<BasicBlock *> PathType; typedef std::vector<PathType> PathsType; @@ -381,6 +384,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) { return OS; } +namespace { /// ThreadingPath is a path in the control flow of a loop that can be threaded /// by cloning necessary basic blocks and replacing conditional branches with /// unconditional ones. A threading path includes a list of basic blocks, the @@ -820,11 +824,13 @@ struct TransformDFA { : SwitchPaths(SwitchPaths), DT(DT), AC(AC), TTI(TTI), ORE(ORE), EphValues(EphValues) {} - void run() { + bool run() { if (isLegalAndProfitableToTransform()) { createAllExitPaths(); NumTransforms++; + return true; } + return false; } private: @@ -975,8 +981,6 @@ private: /// Transform each threading path to effectively jump thread the DFA. void createAllExitPaths() { - DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Eager); - // Move the switch block to the end of the path, since it will be duplicated BasicBlock *SwitchBlock = SwitchPaths->getSwitchBlock(); for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { @@ -993,15 +997,18 @@ private: SmallPtrSet<BasicBlock *, 16> BlocksToClean; BlocksToClean.insert_range(successors(SwitchBlock)); - for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { - createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, &DTU); - NumPaths++; - } + { + DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy); + for (const ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { + createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, &DTU); + NumPaths++; + } - // After all paths are cloned, now update the last successor of the cloned - // path so it skips over the switch statement - for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) - updateLastSuccessor(TPath, DuplicateMap, &DTU); + // After all paths are cloned, now update the last successor of the cloned + // path so it skips over the switch statement + for (const ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) + updateLastSuccessor(TPath, DuplicateMap, &DTU); + } // For each instruction that was cloned and used outside, update its uses updateSSA(NewDefs); @@ -1017,7 +1024,7 @@ private: /// To remember the correct destination, we have to duplicate blocks /// corresponding to each state. Also update the terminating instruction of /// the predecessors, and phis in the successor blocks. - void createExitPath(DefMap &NewDefs, ThreadingPath &Path, + void createExitPath(DefMap &NewDefs, const ThreadingPath &Path, DuplicateBlockMap &DuplicateMap, SmallPtrSet<BasicBlock *, 16> &BlocksToClean, DomTreeUpdater *DTU) { @@ -1263,7 +1270,7 @@ private: /// /// Note that this is an optional step and would have been done in later /// optimizations, but it makes the CFG significantly easier to work with. - void updateLastSuccessor(ThreadingPath &TPath, + void updateLastSuccessor(const ThreadingPath &TPath, DuplicateBlockMap &DuplicateMap, DomTreeUpdater *DTU) { APInt NextState = TPath.getExitValue(); @@ -1360,6 +1367,7 @@ private: SmallPtrSet<const Value *, 32> EphValues; std::vector<ThreadingPath> TPaths; }; +} // namespace bool DFAJumpThreading::run(Function &F) { LLVM_DEBUG(dbgs() << "\nDFA Jump threading: " << F.getName() << "\n"); @@ -1426,9 +1434,8 @@ bool DFAJumpThreading::run(Function &F) { for (AllSwitchPaths SwitchPaths : ThreadableLoops) { TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues); - Transform.run(); - MadeChanges = true; - LoopInfoBroken = true; + if (Transform.run()) + MadeChanges = LoopInfoBroken = true; } #ifdef EXPENSIVE_CHECKS @@ -1439,8 +1446,6 @@ bool DFAJumpThreading::run(Function &F) { return MadeChanges; } -} // end anonymous namespace - /// Integrate with the new Pass Manager PreservedAnalyses DFAJumpThreadingPass::run(Function &F, FunctionAnalysisManager &AM) { |