aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Utils/LoopPeel.cpp
diff options
context:
space:
mode:
authorFlorian Hahn <flo@fhahn.com>2025-05-26 20:08:02 +0100
committerGitHub <noreply@github.com>2025-05-26 20:08:02 +0100
commit24b97756decb7bf0e26dcf0e30a7a9aaf27f417c (patch)
tree71bf0d625d18fea35d2b81cf3a18ee354ec972aa /llvm/lib/Transforms/Utils/LoopPeel.cpp
parentc554fc9245e200b2e06509f4c44233974f6888f0 (diff)
downloadllvm-24b97756decb7bf0e26dcf0e30a7a9aaf27f417c.zip
llvm-24b97756decb7bf0e26dcf0e30a7a9aaf27f417c.tar.gz
llvm-24b97756decb7bf0e26dcf0e30a7a9aaf27f417c.tar.bz2
[LoopPeel] Remove known trip count restriction when peeling last. (#140792)
Remove the restriction that the loop must be known to execute at least 2 iterations when peeling the last iteration. If we cannot prove at least 2 iterations are executed, a check and branch to skip the peeled loop is inserted. PR: https://github.com/llvm/llvm-project/pull/140792
Diffstat (limited to 'llvm/lib/Transforms/Utils/LoopPeel.cpp')
-rw-r--r--llvm/lib/Transforms/Utils/LoopPeel.cpp82
1 files changed, 61 insertions, 21 deletions
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index f348d24..e1dc052 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -38,6 +38,7 @@
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <cassert>
@@ -330,11 +331,7 @@ static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L,
bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) {
const SCEV *BTC = SE.getBackedgeTakenCount(&L);
- // The loop must execute at least 2 iterations to guarantee that peeled
- // iteration executes.
- // TODO: Add checks during codegen.
- if (isa<SCEVCouldNotCompute>(BTC) ||
- !SE.isKnownPredicate(CmpInst::ICMP_UGT, BTC, SE.getZero(BTC->getType())))
+ if (isa<SCEVCouldNotCompute>(BTC))
return false;
// Check if the exit condition of the loop can be adjusted by the peeling
@@ -364,12 +361,18 @@ bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) {
/// is known at the second-to-last.
static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred,
const SCEVAddRecExpr *LeftAR,
- const SCEV *RightSCEV,
- ScalarEvolution &SE) {
+ const SCEV *RightSCEV, ScalarEvolution &SE,
+ const TargetTransformInfo &TTI) {
if (!canPeelLastIteration(L, SE))
return false;
const SCEV *BTC = SE.getBackedgeTakenCount(&L);
+ SCEVExpander Expander(SE, L.getHeader()->getDataLayout(), "loop-peel");
+ if (!SE.isKnownNonZero(BTC) &&
+ Expander.isHighCostExpansion(BTC, &L, SCEVCheapExpansionBudget, &TTI,
+ L.getLoopPredecessor()->getTerminator()))
+ return false;
+
const SCEV *ValAtLastIter = LeftAR->evaluateAtIteration(BTC, SE);
const SCEV *ValAtSecondToLastIter = LeftAR->evaluateAtIteration(
SE.getMinusSCEV(BTC, SE.getOne(BTC->getType())), SE);
@@ -391,7 +394,8 @@ static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred,
// ..
// }
static std::pair<unsigned, unsigned>
-countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) {
+countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE,
+ const TargetTransformInfo &TTI) {
assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form");
unsigned DesiredPeelCount = 0;
unsigned DesiredPeelCountLast = 0;
@@ -479,7 +483,7 @@ countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) {
const SCEV *Step = LeftAR->getStepRecurrence(SE);
if (!PeelWhilePredicateIsKnown(NewPeelCount, IterVal, RightSCEV, Step,
Pred)) {
- if (shouldPeelLastIteration(L, Pred, LeftAR, RightSCEV, SE))
+ if (shouldPeelLastIteration(L, Pred, LeftAR, RightSCEV, SE, TTI))
DesiredPeelCountLast = 1;
return;
}
@@ -593,8 +597,8 @@ static bool violatesLegacyMultiExitLoopCheck(Loop *L) {
void llvm::computePeelCount(Loop *L, unsigned LoopSize,
TargetTransformInfo::PeelingPreferences &PP,
unsigned TripCount, DominatorTree &DT,
- ScalarEvolution &SE, AssumptionCache *AC,
- unsigned Threshold) {
+ ScalarEvolution &SE, const TargetTransformInfo &TTI,
+ AssumptionCache *AC, unsigned Threshold) {
assert(LoopSize > 0 && "Zero loop size is not allowed!");
// Save the PP.PeelCount value set by the target in
// TTI.getPeelingPreferences or by the flag -unroll-peel-count.
@@ -656,7 +660,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
}
const auto &[CountToEliminateCmps, CountToEliminateCmpsLast] =
- countToEliminateCompares(*L, MaxPeelCount, SE);
+ countToEliminateCompares(*L, MaxPeelCount, SE, TTI);
DesiredPeelCount = std::max(DesiredPeelCount, CountToEliminateCmps);
if (DesiredPeelCount == 0)
@@ -822,7 +826,7 @@ static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos,
/// instructions in the last peeled-off iteration.
static void cloneLoopBlocks(
Loop *L, unsigned IterNumber, bool PeelLast, BasicBlock *InsertTop,
- BasicBlock *InsertBot,
+ BasicBlock *InsertBot, BasicBlock *OrigPreHeader,
SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
@@ -914,12 +918,22 @@ static void cloneLoopBlocks(
// loop iteration. Since this copy is no longer part of the loop, we
// resolve this statically:
if (PeelLast) {
- // For the last iteration, we use the value from the latch of the original
- // loop directly.
+ // For the last iteration, we introduce new phis for each header phi in
+ // InsertTop, using the incoming value from the preheader for the original
+ // preheader (when skipping the main loop) and the incoming value from the
+ // latch for the latch (when continuing from the main loop).
+ IRBuilder<> B(InsertTop->getTerminator());
for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
- VMap[&*I] = NewPHI->getIncomingValueForBlock(Latch);
+ PHINode *PN = B.CreatePHI(NewPHI->getType(), 2);
NewPHI->eraseFromParent();
+ if (OrigPreHeader)
+ PN->addIncoming(cast<PHINode>(&*I)->getIncomingValueForBlock(PreHeader),
+ OrigPreHeader);
+
+ PN->addIncoming(cast<PHINode>(&*I)->getIncomingValueForBlock(Latch),
+ Latch);
+ VMap[&*I] = PN;
}
} else {
// For the first iteration, we use the value from the preheader directly.
@@ -1053,7 +1067,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
// Set up all the necessary basic blocks.
BasicBlock *InsertTop;
BasicBlock *InsertBot;
- BasicBlock *NewPreHeader;
+ BasicBlock *NewPreHeader = nullptr;
DenseMap<Instruction *, Value *> ExitValues;
if (PeelLast) {
// It is convenient to split the single exit block from the latch the
@@ -1084,11 +1098,34 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
for (PHINode &P : Exit->phis())
ExitValues[&P] = P.getIncomingValueForBlock(Latch);
+ const SCEV *BTC = SE->getBackedgeTakenCount(L);
+
InsertTop = SplitEdge(Latch, Exit, &DT, LI);
InsertBot = SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI);
InsertTop->setName(Exit->getName() + ".peel.begin");
InsertBot->setName(Exit->getName() + ".peel.next");
+ NewPreHeader = nullptr;
+
+ // If the original loop may only execute a single iteration we need to
+ // insert a trip count check and skip the original loop with the last
+ // iteration peeled off if necessary.
+ if (!SE->isKnownNonZero(BTC)) {
+ NewPreHeader = SplitEdge(PreHeader, Header, &DT, LI);
+ SCEVExpander Expander(*SE, Latch->getDataLayout(), "loop-peel");
+
+ BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
+ Value *BTCValue =
+ Expander.expandCodeFor(BTC, BTC->getType(), PreHeaderBR);
+ IRBuilder<> B(PreHeaderBR);
+ Value *Cond =
+ B.CreateICmpNE(BTCValue, ConstantInt::get(BTCValue->getType(), 0));
+ B.CreateCondBr(Cond, NewPreHeader, InsertTop);
+ PreHeaderBR->eraseFromParent();
+
+ // PreHeader now dominates InsertTop.
+ DT.changeImmediateDominator(InsertTop, PreHeader);
+ }
} else {
// It is convenient to split the preheader into 3 parts - two blocks to
// anchor the peeled copy of the loop body, and a new preheader for the
@@ -1162,8 +1199,9 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
SmallVector<BasicBlock *, 8> NewBlocks;
- cloneLoopBlocks(L, Iter, PeelLast, InsertTop, InsertBot, ExitEdges,
- NewBlocks, LoopBlocks, VMap, LVMap, &DT, LI,
+ cloneLoopBlocks(L, Iter, PeelLast, InsertTop, InsertBot,
+ NewPreHeader ? PreHeader : nullptr, ExitEdges, NewBlocks,
+ LoopBlocks, VMap, LVMap, &DT, LI,
LoopLocalNoAliasDeclScopes, *SE);
// Remap to use values from the current iteration instead of the
@@ -1216,9 +1254,11 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
if (PeelLast) {
// Now adjust users of the original exit values by replacing them with the
- // exit value from the peeled iteration.
- for (const auto &[P, E] : ExitValues)
+ // exit value from the peeled iteration and remove them.
+ for (const auto &[P, E] : ExitValues) {
P->replaceAllUsesWith(isa<Constant>(E) ? E : &*VMap.lookup(E));
+ P->eraseFromParent();
+ }
formLCSSA(*L, DT, LI, SE);
} else {
// Now adjust the phi nodes in the loop header to get their initial values