aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Utils/LoopPeel.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Utils/LoopPeel.cpp')
-rw-r--r--llvm/lib/Transforms/Utils/LoopPeel.cpp83
1 files changed, 62 insertions, 21 deletions
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index f348d24..bd025fd 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -38,6 +38,7 @@
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <cassert>
@@ -330,11 +331,7 @@ static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L,
bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) {
const SCEV *BTC = SE.getBackedgeTakenCount(&L);
- // The loop must execute at least 2 iterations to guarantee that peeled
- // iteration executes.
- // TODO: Add checks during codegen.
- if (isa<SCEVCouldNotCompute>(BTC) ||
- !SE.isKnownPredicate(CmpInst::ICMP_UGT, BTC, SE.getZero(BTC->getType())))
+ if (isa<SCEVCouldNotCompute>(BTC))
return false;
// Check if the exit condition of the loop can be adjusted by the peeling
@@ -354,6 +351,7 @@ bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) {
m_BasicBlock(Succ1), m_BasicBlock(Succ2))) &&
((Pred == CmpInst::ICMP_EQ && Succ2 == L.getHeader()) ||
(Pred == CmpInst::ICMP_NE && Succ1 == L.getHeader())) &&
+ Bound->getType()->isIntegerTy() &&
SE.isLoopInvariant(SE.getSCEV(Bound), &L) &&
match(SE.getSCEV(Inc),
m_scev_AffineAddRec(m_SCEV(), m_scev_One(), m_SpecificLoop(&L)));
@@ -364,12 +362,18 @@ bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) {
/// is known at the second-to-last.
static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred,
const SCEVAddRecExpr *LeftAR,
- const SCEV *RightSCEV,
- ScalarEvolution &SE) {
+ const SCEV *RightSCEV, ScalarEvolution &SE,
+ const TargetTransformInfo &TTI) {
if (!canPeelLastIteration(L, SE))
return false;
const SCEV *BTC = SE.getBackedgeTakenCount(&L);
+ SCEVExpander Expander(SE, L.getHeader()->getDataLayout(), "loop-peel");
+ if (!SE.isKnownNonZero(BTC) &&
+ Expander.isHighCostExpansion(BTC, &L, SCEVCheapExpansionBudget, &TTI,
+ L.getLoopPredecessor()->getTerminator()))
+ return false;
+
const SCEV *ValAtLastIter = LeftAR->evaluateAtIteration(BTC, SE);
const SCEV *ValAtSecondToLastIter = LeftAR->evaluateAtIteration(
SE.getMinusSCEV(BTC, SE.getOne(BTC->getType())), SE);
@@ -391,7 +395,8 @@ static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred,
// ..
// }
static std::pair<unsigned, unsigned>
-countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) {
+countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE,
+ const TargetTransformInfo &TTI) {
assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form");
unsigned DesiredPeelCount = 0;
unsigned DesiredPeelCountLast = 0;
@@ -479,7 +484,7 @@ countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) {
const SCEV *Step = LeftAR->getStepRecurrence(SE);
if (!PeelWhilePredicateIsKnown(NewPeelCount, IterVal, RightSCEV, Step,
Pred)) {
- if (shouldPeelLastIteration(L, Pred, LeftAR, RightSCEV, SE))
+ if (shouldPeelLastIteration(L, Pred, LeftAR, RightSCEV, SE, TTI))
DesiredPeelCountLast = 1;
return;
}
@@ -593,8 +598,8 @@ static bool violatesLegacyMultiExitLoopCheck(Loop *L) {
void llvm::computePeelCount(Loop *L, unsigned LoopSize,
TargetTransformInfo::PeelingPreferences &PP,
unsigned TripCount, DominatorTree &DT,
- ScalarEvolution &SE, AssumptionCache *AC,
- unsigned Threshold) {
+ ScalarEvolution &SE, const TargetTransformInfo &TTI,
+ AssumptionCache *AC, unsigned Threshold) {
assert(LoopSize > 0 && "Zero loop size is not allowed!");
// Save the PP.PeelCount value set by the target in
// TTI.getPeelingPreferences or by the flag -unroll-peel-count.
@@ -656,7 +661,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
}
const auto &[CountToEliminateCmps, CountToEliminateCmpsLast] =
- countToEliminateCompares(*L, MaxPeelCount, SE);
+ countToEliminateCompares(*L, MaxPeelCount, SE, TTI);
DesiredPeelCount = std::max(DesiredPeelCount, CountToEliminateCmps);
if (DesiredPeelCount == 0)
@@ -822,7 +827,7 @@ static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos,
/// instructions in the last peeled-off iteration.
static void cloneLoopBlocks(
Loop *L, unsigned IterNumber, bool PeelLast, BasicBlock *InsertTop,
- BasicBlock *InsertBot,
+ BasicBlock *InsertBot, BasicBlock *OrigPreHeader,
SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
@@ -914,12 +919,22 @@ static void cloneLoopBlocks(
// loop iteration. Since this copy is no longer part of the loop, we
// resolve this statically:
if (PeelLast) {
- // For the last iteration, we use the value from the latch of the original
- // loop directly.
+ // For the last iteration, we introduce new phis for each header phi in
+ // InsertTop, using the incoming value from the preheader for the original
+ // preheader (when skipping the main loop) and the incoming value from the
+ // latch for the latch (when continuing from the main loop).
+ IRBuilder<> B(InsertTop, InsertTop->getFirstNonPHIIt());
for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
- VMap[&*I] = NewPHI->getIncomingValueForBlock(Latch);
+ PHINode *PN = B.CreatePHI(NewPHI->getType(), 2);
NewPHI->eraseFromParent();
+ if (OrigPreHeader)
+ PN->addIncoming(cast<PHINode>(&*I)->getIncomingValueForBlock(PreHeader),
+ OrigPreHeader);
+
+ PN->addIncoming(cast<PHINode>(&*I)->getIncomingValueForBlock(Latch),
+ Latch);
+ VMap[&*I] = PN;
}
} else {
// For the first iteration, we use the value from the preheader directly.
@@ -1053,7 +1068,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
// Set up all the necessary basic blocks.
BasicBlock *InsertTop;
BasicBlock *InsertBot;
- BasicBlock *NewPreHeader;
+ BasicBlock *NewPreHeader = nullptr;
DenseMap<Instruction *, Value *> ExitValues;
if (PeelLast) {
// It is convenient to split the single exit block from the latch the
@@ -1084,11 +1099,34 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
for (PHINode &P : Exit->phis())
ExitValues[&P] = P.getIncomingValueForBlock(Latch);
+ const SCEV *BTC = SE->getBackedgeTakenCount(L);
+
InsertTop = SplitEdge(Latch, Exit, &DT, LI);
InsertBot = SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI);
InsertTop->setName(Exit->getName() + ".peel.begin");
InsertBot->setName(Exit->getName() + ".peel.next");
+ NewPreHeader = nullptr;
+
+ // If the original loop may only execute a single iteration we need to
+ // insert a trip count check and skip the original loop with the last
+ // iteration peeled off if necessary.
+ if (!SE->isKnownNonZero(BTC)) {
+ NewPreHeader = SplitEdge(PreHeader, Header, &DT, LI);
+ SCEVExpander Expander(*SE, Latch->getDataLayout(), "loop-peel");
+
+ BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
+ Value *BTCValue =
+ Expander.expandCodeFor(BTC, BTC->getType(), PreHeaderBR);
+ IRBuilder<> B(PreHeaderBR);
+ Value *Cond =
+ B.CreateICmpNE(BTCValue, ConstantInt::get(BTCValue->getType(), 0));
+ B.CreateCondBr(Cond, NewPreHeader, InsertTop);
+ PreHeaderBR->eraseFromParent();
+
+ // PreHeader now dominates InsertTop.
+ DT.changeImmediateDominator(InsertTop, PreHeader);
+ }
} else {
// It is convenient to split the preheader into 3 parts - two blocks to
// anchor the peeled copy of the loop body, and a new preheader for the
@@ -1162,8 +1200,9 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
SmallVector<BasicBlock *, 8> NewBlocks;
- cloneLoopBlocks(L, Iter, PeelLast, InsertTop, InsertBot, ExitEdges,
- NewBlocks, LoopBlocks, VMap, LVMap, &DT, LI,
+ cloneLoopBlocks(L, Iter, PeelLast, InsertTop, InsertBot,
+ NewPreHeader ? PreHeader : nullptr, ExitEdges, NewBlocks,
+ LoopBlocks, VMap, LVMap, &DT, LI,
LoopLocalNoAliasDeclScopes, *SE);
// Remap to use values from the current iteration instead of the
@@ -1216,9 +1255,11 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
if (PeelLast) {
// Now adjust users of the original exit values by replacing them with the
- // exit value from the peeled iteration.
- for (const auto &[P, E] : ExitValues)
+ // exit value from the peeled iteration and remove them.
+ for (const auto &[P, E] : ExitValues) {
P->replaceAllUsesWith(isa<Constant>(E) ? E : &*VMap.lookup(E));
+ P->eraseFromParent();
+ }
formLCSSA(*L, DT, LI, SE);
} else {
// Now adjust the phi nodes in the loop header to get their initial values