aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Utils/LoopUnroll.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Utils/LoopUnroll.cpp')
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnroll.cpp155
1 files changed, 147 insertions, 8 deletions
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 6f0d000..e9969ae 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -18,17 +18,20 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopedHashTable.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/ilist_iterator.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/BasicBlock.h"
@@ -209,13 +212,140 @@ static bool isEpilogProfitable(Loop *L) {
return false;
}
+struct LoadValue {
+ Instruction *DefI = nullptr;
+ unsigned Generation = 0;
+ LoadValue() = default;
+ LoadValue(Instruction *Inst, unsigned Generation)
+ : DefI(Inst), Generation(Generation) {}
+};
+
+class StackNode {
+ ScopedHashTable<const SCEV *, LoadValue>::ScopeTy LoadScope;
+ unsigned CurrentGeneration;
+ unsigned ChildGeneration;
+ DomTreeNode *Node;
+ DomTreeNode::const_iterator ChildIter;
+ DomTreeNode::const_iterator EndIter;
+ bool Processed = false;
+
+public:
+ StackNode(ScopedHashTable<const SCEV *, LoadValue> &AvailableLoads,
+ unsigned cg, DomTreeNode *N, DomTreeNode::const_iterator Child,
+ DomTreeNode::const_iterator End)
+ : LoadScope(AvailableLoads), CurrentGeneration(cg), ChildGeneration(cg),
+ Node(N), ChildIter(Child), EndIter(End) {}
+ // Accessors.
+ unsigned currentGeneration() const { return CurrentGeneration; }
+ unsigned childGeneration() const { return ChildGeneration; }
+ void childGeneration(unsigned generation) { ChildGeneration = generation; }
+ DomTreeNode *node() { return Node; }
+ DomTreeNode::const_iterator childIter() const { return ChildIter; }
+
+ DomTreeNode *nextChild() {
+ DomTreeNode *Child = *ChildIter;
+ ++ChildIter;
+ return Child;
+ }
+
+ DomTreeNode::const_iterator end() const { return EndIter; }
+ bool isProcessed() const { return Processed; }
+ void process() { Processed = true; }
+};
+
+Value *getMatchingValue(LoadValue LV, LoadInst *LI, unsigned CurrentGeneration,
+ BatchAAResults &BAA,
+ function_ref<MemorySSA *()> GetMSSA) {
+ if (!LV.DefI)
+ return nullptr;
+ if (LV.DefI->getType() != LI->getType())
+ return nullptr;
+ if (LV.Generation != CurrentGeneration) {
+ MemorySSA *MSSA = GetMSSA();
+ if (!MSSA)
+ return nullptr;
+ auto *EarlierMA = MSSA->getMemoryAccess(LV.DefI);
+ MemoryAccess *LaterDef =
+ MSSA->getWalker()->getClobberingMemoryAccess(LI, BAA);
+ if (!MSSA->dominates(LaterDef, EarlierMA))
+ return nullptr;
+ }
+ return LV.DefI;
+}
+
+void loadCSE(Loop *L, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI,
+ BatchAAResults &BAA, function_ref<MemorySSA *()> GetMSSA) {
+ ScopedHashTable<const SCEV *, LoadValue> AvailableLoads;
+ SmallVector<std::unique_ptr<StackNode>> NodesToProcess;
+ DomTreeNode *HeaderD = DT.getNode(L->getHeader());
+ NodesToProcess.emplace_back(new StackNode(AvailableLoads, 0, HeaderD,
+ HeaderD->begin(), HeaderD->end()));
+
+ unsigned CurrentGeneration = 0;
+ while (!NodesToProcess.empty()) {
+ StackNode *NodeToProcess = &*NodesToProcess.back();
+
+ CurrentGeneration = NodeToProcess->currentGeneration();
+
+ if (!NodeToProcess->isProcessed()) {
+ // Process the node.
+
+ // If this block has a single predecessor, then the predecessor is the
+ // parent
+ // of the domtree node and all of the live out memory values are still
+ // current in this block. If this block has multiple predecessors, then
+ // they could have invalidated the live-out memory values of our parent
+ // value. For now, just be conservative and invalidate memory if this
+ // block has multiple predecessors.
+ if (!NodeToProcess->node()->getBlock()->getSinglePredecessor())
+ ++CurrentGeneration;
+ for (auto &I : make_early_inc_range(*NodeToProcess->node()->getBlock())) {
+
+ auto *Load = dyn_cast<LoadInst>(&I);
+ if (!Load || !Load->isSimple()) {
+ if (I.mayWriteToMemory())
+ CurrentGeneration++;
+ continue;
+ }
+
+ const SCEV *PtrSCEV = SE.getSCEV(Load->getPointerOperand());
+ LoadValue LV = AvailableLoads.lookup(PtrSCEV);
+ if (Value *M =
+ getMatchingValue(LV, Load, CurrentGeneration, BAA, GetMSSA)) {
+ if (LI.replacementPreservesLCSSAForm(Load, M)) {
+ Load->replaceAllUsesWith(M);
+ Load->eraseFromParent();
+ }
+ } else {
+ AvailableLoads.insert(PtrSCEV, LoadValue(Load, CurrentGeneration));
+ }
+ }
+ NodeToProcess->childGeneration(CurrentGeneration);
+ NodeToProcess->process();
+ } else if (NodeToProcess->childIter() != NodeToProcess->end()) {
+ // Push the next child onto the stack.
+ DomTreeNode *Child = NodeToProcess->nextChild();
+ if (!L->contains(Child->getBlock()))
+ continue;
+ NodesToProcess.emplace_back(
+ new StackNode(AvailableLoads, NodeToProcess->childGeneration(), Child,
+ Child->begin(), Child->end()));
+ } else {
+ // It has been processed, and there are no more children to process,
+ // so delete it and pop it off the stack.
+ NodesToProcess.pop_back();
+ }
+ }
+}
+
/// Perform some cleanup and simplifications on loops after unrolling. It is
/// useful to simplify the IV's in the new loop, as well as do a quick
/// simplify/dce pass of the instructions.
void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
ScalarEvolution *SE, DominatorTree *DT,
AssumptionCache *AC,
- const TargetTransformInfo *TTI) {
+ const TargetTransformInfo *TTI,
+ AAResults *AA) {
using namespace llvm::PatternMatch;
// Simplify any new induction variables in the partially unrolled loop.
@@ -230,6 +360,16 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
RecursivelyDeleteTriviallyDeadInstructions(Inst);
}
+
+ if (AA) {
+ std::unique_ptr<MemorySSA> MSSA = nullptr;
+ BatchAAResults BAA(*AA);
+ loadCSE(L, *DT, *SE, *LI, BAA, [L, AA, DT, &MSSA]() -> MemorySSA * {
+ if (!MSSA)
+ MSSA.reset(new MemorySSA(*L, AA, DT));
+ return &*MSSA;
+ });
+ }
}
// At this point, the code is well formed. Perform constprop, instsimplify,
@@ -292,12 +432,11 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
///
/// If RemainderLoop is non-null, it will receive the remainder loop (if
/// required and not fully unrolled).
-LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
- ScalarEvolution *SE, DominatorTree *DT,
- AssumptionCache *AC,
- const TargetTransformInfo *TTI,
- OptimizationRemarkEmitter *ORE,
- bool PreserveLCSSA, Loop **RemainderLoop) {
+LoopUnrollResult
+llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
+ ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
+ const TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE,
+ bool PreserveLCSSA, Loop **RemainderLoop, AAResults *AA) {
assert(DT && "DomTree is required");
if (!L->getLoopPreheader()) {
@@ -852,7 +991,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
// At this point, the code is well formed. We now simplify the unrolled loop,
// doing constant propagation and dead code elimination as we go.
simplifyLoopAfterUnroll(L, !CompletelyUnroll && ULO.Count > 1, LI, SE, DT, AC,
- TTI);
+ TTI, AA);
NumCompletelyUnrolled += CompletelyUnroll;
++NumUnrolled;