aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
diff options
context:
space:
mode:
authorAnna Thomas <anna@azul.com>2017-06-30 17:57:07 +0000
committerAnna Thomas <anna@azul.com>2017-06-30 17:57:07 +0000
commite5e5e59d8bbe7ddb82b06e9e71a97e1a578467a1 (patch)
tree705ecf8d2bc83068ffb2c694eab83d57f6b2b2a1 /llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
parentd3213c792e1d427ab1c9bb181c1f06596bd352ab (diff)
downloadllvm-e5e5e59d8bbe7ddb82b06e9e71a97e1a578467a1.zip
llvm-e5e5e59d8bbe7ddb82b06e9e71a97e1a578467a1.tar.gz
llvm-e5e5e59d8bbe7ddb82b06e9e71a97e1a578467a1.tar.bz2
[RuntimeUnrolling] Add logic for loops with multiple exit blocks
Summary: Runtime unrolling is done for loops with a single exit block and a single exiting block (and this exiting block should be the latch block). This patch adds logic to support unrolling in the presence of multiple exit blocks (which also means multiple exiting blocks). Currently this is under an off-by-default option and is supported when epilog code is generated. Support in presence of prolog code will be in a future patch (we just need to add more tests, and update comments). This patch is essentially an implementation patch. I have not added any heuristic (in terms of branches added or code size) to decide when this should be enabled. Reviewers: mkuper, sanjoy, reames, evstupac Reviewed by: reames Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D33001 llvm-svn: 306846
Diffstat (limited to 'llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp')
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp124
1 files changed, 101 insertions, 23 deletions
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index d915831..9ad2b70 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -36,6 +36,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
#include <algorithm>
@@ -45,6 +46,10 @@ using namespace llvm;
STATISTIC(NumRuntimeUnrolled,
"Number of loops unrolled with run-time trip counts");
+static cl::opt<bool> UnrollRuntimeMultiExit(
+ "unroll-runtime-multi-exit", cl::init(false), cl::Hidden,
+ cl::desc("Allow runtime unrolling for loops with multiple exits, when "
+ "epilog is generated"));
/// Connect the unrolling prolog code to the original loop.
/// The unrolling prolog code contains code to execute the
@@ -285,15 +290,13 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
/// The cloned blocks should be inserted between InsertTop and InsertBot.
/// If loop structure is cloned InsertTop should be new preheader, InsertBot
/// new loop exit.
-///
-static void CloneLoopBlocks(Loop *L, Value *NewIter,
- const bool CreateRemainderLoop,
- const bool UseEpilogRemainder,
- BasicBlock *InsertTop, BasicBlock *InsertBot,
- BasicBlock *Preheader,
- std::vector<BasicBlock *> &NewBlocks,
- LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
- DominatorTree *DT, LoopInfo *LI) {
+/// Return the new cloned loop that is created when CreateRemainderLoop is true.
+static Loop *
+CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
+ const bool UseEpilogRemainder, BasicBlock *InsertTop,
+ BasicBlock *InsertBot, BasicBlock *Preheader,
+ std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
+ ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) {
StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
BasicBlock *Header = L->getHeader();
BasicBlock *Latch = L->getLoopLatch();
@@ -418,7 +421,10 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter,
// Set operand 0 to refer to the loop id itself.
NewLoopID->replaceOperandWith(0, NewLoopID);
NewLoop->setLoopID(NewLoopID);
+ return NewLoop;
}
+ else
+ return nullptr;
}
/// Insert code in the prolog/epilog code when unrolling a loop with a
@@ -465,29 +471,52 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
LoopInfo *LI, ScalarEvolution *SE,
DominatorTree *DT, bool PreserveLCSSA) {
// for now, only unroll loops that contain a single exit
- if (!L->getExitingBlock())
+ if (!UnrollRuntimeMultiExit && !L->getExitingBlock())
return false;
- // Make sure the loop is in canonical form, and there is a single
- // exit block only.
+ // Make sure the loop is in canonical form.
if (!L->isLoopSimplifyForm())
return false;
// Guaranteed by LoopSimplifyForm.
BasicBlock *Latch = L->getLoopLatch();
+ BasicBlock *Header = L->getHeader();
BasicBlock *LatchExit = L->getUniqueExitBlock(); // successor out of loop
- if (!LatchExit)
+ if (!LatchExit && !UnrollRuntimeMultiExit)
return false;
+ // These are exit blocks other than the target of the latch exiting block.
+ SmallVector<BasicBlock *, 4> OtherExits;
+ BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
+ unsigned int ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0;
// Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
- // targets of the Latch be the single exit block out of the loop. This needs
+ // targets of the Latch be an exit block out of the loop. This needs
// to be guaranteed by the callers of UnrollRuntimeLoopRemainder.
- BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
- assert((LatchBR->getSuccessor(0) == LatchExit ||
- LatchBR->getSuccessor(1) == LatchExit) &&
- "one of the loop latch successors should be "
- "the exit block!");
- (void)LatchBR;
+ assert(!L->contains(LatchBR->getSuccessor(ExitIndex)) &&
+ "one of the loop latch successors should be the exit block!");
+ // Support runtime unrolling for multiple exit blocks and multiple exiting
+ // blocks.
+ if (!LatchExit) {
+ assert(UseEpilogRemainder && "Multi exit unrolling is currently supported "
+ "unrolling with epilog remainder only!");
+ LatchExit = LatchBR->getSuccessor(ExitIndex);
+ // We rely on LCSSA form being preserved when the exit blocks are
+ // transformed.
+ if (!PreserveLCSSA)
+ return false;
+ // TODO: Support multiple exiting blocks jumping to the `LatchExit`. This
+ // will need updating the logic in connectEpilog.
+ if (!LatchExit->getSinglePredecessor())
+ return false;
+ SmallVector<BasicBlock *, 4> Exits;
+ L->getUniqueExitBlocks(Exits);
+ for (auto *BB : Exits)
+ if (BB != LatchExit)
+ OtherExits.push_back(BB);
+ }
+
+ assert(LatchExit && "Latch Exit should exist!");
+
// Use Scalar Evolution to compute the trip count. This allows more loops to
// be unrolled than relying on induction var simplification.
if (!SE)
@@ -512,7 +541,6 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
if (isa<SCEVCouldNotCompute>(TripCountSC))
return false;
- BasicBlock *Header = L->getHeader();
BasicBlock *PreHeader = L->getLoopPreheader();
BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
const DataLayout &DL = Header->getModule()->getDataLayout();
@@ -654,8 +682,9 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
// iterations. This function adds the appropriate CFG connections.
BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
- CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop,
- InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
+ Loop *remainderLoop = CloneLoopBlocks(
+ L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop, InsertBot,
+ NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
// Insert the cloned blocks into the function.
F->getBasicBlockList().splice(InsertBot->getIterator(),
@@ -663,6 +692,42 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
NewBlocks[0]->getIterator(),
F->end());
+ // Now the loop blocks are cloned and the other exiting blocks from the
+ // remainder are connected to the original Loop's exit blocks. The remaining
+ // work is to update the phi nodes in the original loop, and take in the
+ // values from the cloned region. Also update the dominator info for
+ // OtherExits, since we have new edges into OtherExits.
+ for (auto *BB : OtherExits) {
+ for (auto &II : *BB) {
+
+ // Given we preserve LCSSA form, we know that the values used outside the
+ // loop will be used through these phi nodes at the exit blocks that are
+ // transformed below.
+ if (!isa<PHINode>(II))
+ break;
+ PHINode *Phi = cast<PHINode>(&II);
+ unsigned oldNumOperands = Phi->getNumIncomingValues();
+ // Add the incoming values from the remainder code to the end of the phi
+ // node.
+ for (unsigned i =0; i < oldNumOperands; i++){
+ Value *newVal = VMap[Phi->getIncomingValue(i)];
+ if (!newVal) {
+ assert(isa<Constant>(Phi->getIncomingValue(i)) &&
+ "VMap should exist for all values except constants!");
+ newVal = Phi->getIncomingValue(i);
+ }
+ Phi->addIncoming(newVal,
+ cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)]));
+ }
+ }
+ // Update the dominator info because the immediate dominator is no longer the
+ // header of the original Loop. BB has edges both from L and remainder code.
+ // Since the preheader determines which loop is run (L or directly jump to
+ // the remainder code), we set the immediate dominator as the preheader.
+ if (DT)
+ DT->changeImmediateDominator(BB, PreHeader);
+ }
+
// Loop structure should be the following:
// Epilog Prolog
//
@@ -725,6 +790,19 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
if (Loop *ParentLoop = L->getParentLoop())
SE->forgetLoop(ParentLoop);
+ // Canonicalize to LoopSimplifyForm both original and remainder loops. We
+ // cannot rely on the LoopUnrollPass to do this because it only does
+ // canonicalization for parent/subloops and not the sibling loops.
+ if (OtherExits.size() > 0) {
+ // Generate dedicated exit blocks for the original loop, to preserve
+ // LoopSimplifyForm.
+ formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA);
+ // Generate dedicated exit blocks for the remainder loop if one exists, to
+ // preserve LoopSimplifyForm.
+ if (remainderLoop)
+ formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA);
+ }
+
NumRuntimeUnrolled++;
return true;
}