aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Scalar
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Scalar')
-rw-r--r--llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp67
1 files changed, 36 insertions, 31 deletions
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index 41a6c80..ae34b4e 100644
--- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -89,6 +89,7 @@ STATISTIC(NumTransforms, "Number of transformations done");
STATISTIC(NumCloned, "Number of blocks cloned");
STATISTIC(NumPaths, "Number of individual paths threaded");
+namespace llvm {
static cl::opt<bool>
ClViewCfgBefore("dfa-jump-view-cfg-before",
cl::desc("View the CFG before DFA Jump Threading"),
@@ -119,6 +120,7 @@ static cl::opt<unsigned>
CostThreshold("dfa-cost-threshold",
cl::desc("Maximum cost accepted for the transformation"),
cl::Hidden, cl::init(50));
+} // namespace llvm
static cl::opt<double> MaxClonedRate(
"dfa-max-cloned-rate",
@@ -127,7 +129,6 @@ static cl::opt<double> MaxClonedRate(
cl::Hidden, cl::init(7.5));
namespace {
-
class SelectInstToUnfold {
SelectInst *SI;
PHINode *SIUse;
@@ -141,10 +142,6 @@ public:
explicit operator bool() const { return SI && SIUse; }
};
-void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold,
- std::vector<SelectInstToUnfold> *NewSIsToUnfold,
- std::vector<BasicBlock *> *NewBBs);
-
class DFAJumpThreading {
public:
DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI,
@@ -158,7 +155,8 @@ private:
void
unfoldSelectInstrs(DominatorTree *DT,
const SmallVector<SelectInstToUnfold, 4> &SelectInsts) {
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ // TODO: Have everything use a single lazy DTU
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
SmallVector<SelectInstToUnfold, 4> Stack(SelectInsts);
while (!Stack.empty()) {
@@ -173,16 +171,18 @@ private:
}
}
+ static void unfold(DomTreeUpdater *DTU, LoopInfo *LI,
+ SelectInstToUnfold SIToUnfold,
+ std::vector<SelectInstToUnfold> *NewSIsToUnfold,
+ std::vector<BasicBlock *> *NewBBs);
+
AssumptionCache *AC;
DominatorTree *DT;
LoopInfo *LI;
TargetTransformInfo *TTI;
OptimizationRemarkEmitter *ORE;
};
-
-} // end anonymous namespace
-
-namespace {
+} // namespace
/// Unfold the select instruction held in \p SIToUnfold by replacing it with
/// control flow.
@@ -191,9 +191,10 @@ namespace {
/// created basic blocks into \p NewBBs.
///
/// TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible.
-void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold,
- std::vector<SelectInstToUnfold> *NewSIsToUnfold,
- std::vector<BasicBlock *> *NewBBs) {
+void DFAJumpThreading::unfold(DomTreeUpdater *DTU, LoopInfo *LI,
+ SelectInstToUnfold SIToUnfold,
+ std::vector<SelectInstToUnfold> *NewSIsToUnfold,
+ std::vector<BasicBlock *> *NewBBs) {
SelectInst *SI = SIToUnfold.getInst();
PHINode *SIUse = SIToUnfold.getUse();
assert(SI->hasOneUse());
@@ -348,10 +349,12 @@ void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold,
SI->eraseFromParent();
}
+namespace {
struct ClonedBlock {
BasicBlock *BB;
APInt State; ///< \p State corresponds to the next value of a switch stmnt.
};
+} // namespace
typedef std::deque<BasicBlock *> PathType;
typedef std::vector<PathType> PathsType;
@@ -381,6 +384,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) {
return OS;
}
+namespace {
/// ThreadingPath is a path in the control flow of a loop that can be threaded
/// by cloning necessary basic blocks and replacing conditional branches with
/// unconditional ones. A threading path includes a list of basic blocks, the
@@ -820,11 +824,13 @@ struct TransformDFA {
: SwitchPaths(SwitchPaths), DT(DT), AC(AC), TTI(TTI), ORE(ORE),
EphValues(EphValues) {}
- void run() {
+ bool run() {
if (isLegalAndProfitableToTransform()) {
createAllExitPaths();
NumTransforms++;
+ return true;
}
+ return false;
}
private:
@@ -975,8 +981,6 @@ private:
/// Transform each threading path to effectively jump thread the DFA.
void createAllExitPaths() {
- DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Eager);
-
// Move the switch block to the end of the path, since it will be duplicated
BasicBlock *SwitchBlock = SwitchPaths->getSwitchBlock();
for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
@@ -993,15 +997,18 @@ private:
SmallPtrSet<BasicBlock *, 16> BlocksToClean;
BlocksToClean.insert_range(successors(SwitchBlock));
- for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
- createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, &DTU);
- NumPaths++;
- }
+ {
+ DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy);
+ for (const ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
+ createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, &DTU);
+ NumPaths++;
+ }
- // After all paths are cloned, now update the last successor of the cloned
- // path so it skips over the switch statement
- for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths())
- updateLastSuccessor(TPath, DuplicateMap, &DTU);
+ // After all paths are cloned, now update the last successor of the cloned
+ // path so it skips over the switch statement
+ for (const ThreadingPath &TPath : SwitchPaths->getThreadingPaths())
+ updateLastSuccessor(TPath, DuplicateMap, &DTU);
+ }
// For each instruction that was cloned and used outside, update its uses
updateSSA(NewDefs);
@@ -1017,7 +1024,7 @@ private:
/// To remember the correct destination, we have to duplicate blocks
/// corresponding to each state. Also update the terminating instruction of
/// the predecessors, and phis in the successor blocks.
- void createExitPath(DefMap &NewDefs, ThreadingPath &Path,
+ void createExitPath(DefMap &NewDefs, const ThreadingPath &Path,
DuplicateBlockMap &DuplicateMap,
SmallPtrSet<BasicBlock *, 16> &BlocksToClean,
DomTreeUpdater *DTU) {
@@ -1263,7 +1270,7 @@ private:
///
/// Note that this is an optional step and would have been done in later
/// optimizations, but it makes the CFG significantly easier to work with.
- void updateLastSuccessor(ThreadingPath &TPath,
+ void updateLastSuccessor(const ThreadingPath &TPath,
DuplicateBlockMap &DuplicateMap,
DomTreeUpdater *DTU) {
APInt NextState = TPath.getExitValue();
@@ -1360,6 +1367,7 @@ private:
SmallPtrSet<const Value *, 32> EphValues;
std::vector<ThreadingPath> TPaths;
};
+} // namespace
bool DFAJumpThreading::run(Function &F) {
LLVM_DEBUG(dbgs() << "\nDFA Jump threading: " << F.getName() << "\n");
@@ -1426,9 +1434,8 @@ bool DFAJumpThreading::run(Function &F) {
for (AllSwitchPaths SwitchPaths : ThreadableLoops) {
TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues);
- Transform.run();
- MadeChanges = true;
- LoopInfoBroken = true;
+ if (Transform.run())
+ MadeChanges = LoopInfoBroken = true;
}
#ifdef EXPENSIVE_CHECKS
@@ -1439,8 +1446,6 @@ bool DFAJumpThreading::run(Function &F) {
return MadeChanges;
}
-} // end anonymous namespace
-
/// Integrate with the new Pass Manager
PreservedAnalyses DFAJumpThreadingPass::run(Function &F,
FunctionAnalysisManager &AM) {