diff options
author | Florian Hahn <flo@fhahn.com> | 2025-05-16 08:33:12 +0100 |
---|---|---|
committer | Florian Hahn <flo@fhahn.com> | 2025-05-16 08:33:12 +0100 |
commit | bf92b127d2637948f53d11a187e865aa10e2e74c (patch) | |
tree | e93694835b25107294e1ee8f50b61ad9543c8024 /llvm/lib/Transforms/Utils/LoopPeel.cpp | |
parent | 1c2c02c8cbb6949c06fe26a72200ccfb37ac8c96 (diff) | |
download | llvm-bf92b127d2637948f53d11a187e865aa10e2e74c.zip llvm-bf92b127d2637948f53d11a187e865aa10e2e74c.tar.gz llvm-bf92b127d2637948f53d11a187e865aa10e2e74c.tar.bz2 |
Revert "[LoopPeel] Implement initial peeling off the last loop iteration. (#139551)"
This reverts commit bb10c3ba7f77d40a7fbfd4ac815015d3a4ae476a.
Also reverts 4f663cca15f2b53c2bc6a84d1b1f5bd81679356d:
Revert "[LoopPeel] Make sure PeelLast is always initialized."
Revert for now to bring msan bots back to green
https://lab.llvm.org/buildbot/#/builders/164/builds/9992
https://lab.llvm.org/buildbot/#/builders/94/builds/7158
Diffstat (limited to 'llvm/lib/Transforms/Utils/LoopPeel.cpp')
-rw-r--r-- | llvm/lib/Transforms/Utils/LoopPeel.cpp | 378 |
1 files changed, 106 insertions, 272 deletions
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp index 99aac24..f6ace9c 100644 --- a/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -49,7 +49,6 @@ using namespace llvm::PatternMatch; #define DEBUG_TYPE "loop-peel" STATISTIC(NumPeeled, "Number of loops peeled"); -STATISTIC(NumPeeledEnd, "Number of loops peeled from end"); static cl::opt<unsigned> UnrollPeelCount( "unroll-peel-count", cl::Hidden, @@ -326,71 +325,19 @@ static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L, return 0; } -bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) { - const SCEV *BTC = SE.getBackedgeTakenCount(&L); - Value *Inc; - CmpPredicate Pred; - BasicBlock *Succ1; - BasicBlock *Succ2; - // The loop must execute at least 2 iterations to guarantee that peeled - // iteration executes. - // TODO: Add checks during codegen. - if (isa<SCEVCouldNotCompute>(BTC) || - !SE.isKnownPredicate(CmpInst::ICMP_UGT, BTC, SE.getZero(BTC->getType()))) - return false; - - // Check if the exit condition of the loop can be adjusted by the peeling - // codegen. For now, it must - // * exit via the latch, - // * the exit condition must be a NE/EQ compare of an induction with step - // of 1. - BasicBlock *Latch = L.getLoopLatch(); - return Latch && Latch == L.getExitingBlock() && - match(Latch->getTerminator(), - m_Br(m_ICmp(Pred, m_Value(Inc), m_Value()), m_BasicBlock(Succ1), - m_BasicBlock(Succ2))) && - ((Pred == CmpInst::ICMP_EQ && Succ2 == L.getHeader()) || - (Pred == CmpInst::ICMP_NE && Succ1 == L.getHeader())) && - isa<SCEVAddRecExpr>(SE.getSCEV(Inc)) && - cast<SCEVAddRecExpr>(SE.getSCEV(Inc))->getStepRecurrence(SE)->isOne(); -} - -/// Returns true if the last iteration can be peeled off and the condition (Pred -/// LeftAR, RightSCEV) is known at the last iteration and the inverse condition -/// is known at the second-to-last. -static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred, - const SCEVAddRecExpr *LeftAR, - const SCEV *RightSCEV, - ScalarEvolution &SE) { - if (!canPeelLastIteration(L, SE)) - return false; - - const SCEV *BTC = SE.getBackedgeTakenCount(&L); - const SCEV *ValAtLastIter = LeftAR->evaluateAtIteration(BTC, SE); - const SCEV *ValAtSecondToLastIter = LeftAR->evaluateAtIteration( - SE.getMinusSCEV(BTC, SE.getOne(BTC->getType())), SE); - - return SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), ValAtLastIter, - RightSCEV) && - SE.isKnownPredicate(Pred, ValAtSecondToLastIter, RightSCEV); -} - -// Return the number of iterations to peel off from the beginning and end of the -// loop respectively, that make conditions in the body true/false. For example, -// if we peel 2 iterations off the loop below, the condition i < 2 can be -// evaluated at compile time. -// +// Return the number of iterations to peel off that make conditions in the +// body true/false. For example, if we peel 2 iterations off the loop below, +// the condition i < 2 can be evaluated at compile time. // for (i = 0; i < n; i++) // if (i < 2) // .. // else // .. // } -static std::pair<unsigned, unsigned> -countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) { +static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, + ScalarEvolution &SE) { assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form"); unsigned DesiredPeelCount = 0; - unsigned DesiredPeelCountLast = 0; // Do not peel the entire loop. const SCEV *BE = SE.getConstantMaxBackedgeTakenCount(&L); @@ -474,11 +421,8 @@ countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) { const SCEV *Step = LeftAR->getStepRecurrence(SE); if (!PeelWhilePredicateIsKnown(NewPeelCount, IterVal, RightSCEV, Step, - Pred)) { - if (shouldPeelLastIteration(L, Pred, LeftAR, RightSCEV, SE)) - DesiredPeelCountLast = 1; + Pred)) return; - } // However, for equality comparisons, that isn't always sufficient to // eliminate the comparsion in loop body, we may need to peel one more @@ -495,7 +439,6 @@ countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) { } DesiredPeelCount = std::max(DesiredPeelCount, NewPeelCount); - DesiredPeelCountLast = std::max(DesiredPeelCountLast, NewPeelCount); }; auto ComputePeelCountMinMax = [&](MinMaxIntrinsic *MinMax) { @@ -557,7 +500,7 @@ countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) { ComputePeelCount(BI->getCondition(), 0); } - return {DesiredPeelCount, DesiredPeelCountLast}; + return DesiredPeelCount; } /// This "heuristic" exactly matches implicit behavior which used to exist @@ -596,7 +539,6 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, // TTI.getPeelingPreferences or by the flag -unroll-peel-count. unsigned TargetPeelCount = PP.PeelCount; PP.PeelCount = 0; - PP.PeelLast = false; if (!canPeel(L)) return; @@ -651,9 +593,8 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, DesiredPeelCount = std::max(DesiredPeelCount, *NumPeels); } - const auto &[CountToEliminateCmps, CountToEliminateCmpsLast] = - countToEliminateCompares(*L, MaxPeelCount, SE); - DesiredPeelCount = std::max(DesiredPeelCount, CountToEliminateCmps); + DesiredPeelCount = std::max(DesiredPeelCount, + countToEliminateCompares(*L, MaxPeelCount, SE)); if (DesiredPeelCount == 0) DesiredPeelCount = peelToTurnInvariantLoadsDerefencebale(*L, DT, AC); @@ -668,23 +609,6 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, << " some Phis into invariants.\n"); PP.PeelCount = DesiredPeelCount; PP.PeelProfiledIterations = false; - PP.PeelLast = false; - return; - } - } - - if (CountToEliminateCmpsLast > 0) { - unsigned DesiredPeelCountLast = - std::min(CountToEliminateCmpsLast, MaxPeelCount); - // Consider max peel count limitation. - assert(DesiredPeelCountLast > 0 && "Wrong loop size estimation?"); - if (DesiredPeelCountLast + AlreadyPeeled <= UnrollPeelMaxCount) { - LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount - << " iteration(s) to turn" - << " some Phis into invariants.\n"); - PP.PeelCount = DesiredPeelCountLast; - PP.PeelProfiledIterations = false; - PP.PeelLast = true; return; } } @@ -809,7 +733,6 @@ static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos, /// InsertBot. /// \param IterNumber The serial number of the iteration currently being /// peeled off. -/// \param PeelLast Peel off the last iterations from \p L. /// \param ExitEdges The exit edges of the original loop. /// \param[out] NewBlocks A list of the blocks in the newly created clone /// \param[out] VMap The value map between the loop and the new clone. @@ -817,8 +740,7 @@ static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos, /// \param LVMap A value-map that maps instructions from the original loop to /// instructions in the last peeled-off iteration. static void cloneLoopBlocks( - Loop *L, unsigned IterNumber, bool PeelLast, BasicBlock *InsertTop, - BasicBlock *InsertBot, + Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot, SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges, SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT, @@ -882,26 +804,16 @@ static void cloneLoopBlocks( // Similarly, for the latch: // The original exiting edge is still hooked up to the loop exit. + // The backedge now goes to the "bottom", which is either the loop's real + // header (for the last peeled iteration) or the copied header of the next + // iteration (for every other iteration) BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]); - if (PeelLast) { - // This is the last iteration and we definitely will go to the exit. Just - // set both successors to InsertBot and let the branch be simplified later. - assert(IterNumber == 0 && "Only peeling a single iteration implemented."); - auto *LatchTerm = cast<BranchInst>(NewLatch->getTerminator()); - LatchTerm->setSuccessor(0, InsertBot); - LatchTerm->setSuccessor(1, InsertBot); - } else { - auto *LatchTerm = cast<Instruction>(NewLatch->getTerminator()); - // The backedge now goes to the "bottom", which is either the loop's real - // header (for the last peeled iteration) or the copied header of the next - // iteration (for every other iteration) - for (unsigned idx = 0, e = LatchTerm->getNumSuccessors(); idx < e; ++idx) { - if (LatchTerm->getSuccessor(idx) == Header) { - LatchTerm->setSuccessor(idx, InsertBot); - break; - } + auto *LatchTerm = cast<Instruction>(NewLatch->getTerminator()); + for (unsigned idx = 0, e = LatchTerm->getNumSuccessors(); idx < e; ++idx) + if (LatchTerm->getSuccessor(idx) == Header) { + LatchTerm->setSuccessor(idx, InsertBot); + break; } - } if (DT) DT->changeImmediateDominator(InsertBot, NewLatch); @@ -909,33 +821,23 @@ static void cloneLoopBlocks( // that pick an incoming value from either the preheader, or the previous // loop iteration. Since this copy is no longer part of the loop, we // resolve this statically: - if (PeelLast) { - // For the last iteration, we use the value from the latch of the original - // loop directly. - for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { - PHINode *NewPHI = cast<PHINode>(VMap[&*I]); - VMap[&*I] = NewPHI->getIncomingValueForBlock(Latch); - NewPHI->eraseFromParent(); - } - } else { - // For the first iteration, we use the value from the preheader directly. - // For any other iteration, we replace the phi with the value generated by - // the immediately preceding clone of the loop body (which represents - // the previous iteration). - for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { - PHINode *NewPHI = cast<PHINode>(VMap[&*I]); - if (IterNumber == 0) { - VMap[&*I] = NewPHI->getIncomingValueForBlock(PreHeader); - } else { - Value *LatchVal = NewPHI->getIncomingValueForBlock(Latch); - Instruction *LatchInst = dyn_cast<Instruction>(LatchVal); - if (LatchInst && L->contains(LatchInst)) - VMap[&*I] = LVMap[LatchInst]; - else - VMap[&*I] = LatchVal; - } - NewPHI->eraseFromParent(); + // For the first iteration, we use the value from the preheader directly. + // For any other iteration, we replace the phi with the value generated by + // the immediately preceding clone of the loop body (which represents + // the previous iteration). + for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { + PHINode *NewPHI = cast<PHINode>(VMap[&*I]); + if (IterNumber == 0) { + VMap[&*I] = NewPHI->getIncomingValueForBlock(PreHeader); + } else { + Value *LatchVal = NewPHI->getIncomingValueForBlock(Latch); + Instruction *LatchInst = dyn_cast<Instruction>(LatchVal); + if (LatchInst && L->contains(LatchInst)) + VMap[&*I] = LVMap[LatchInst]; + else + VMap[&*I] = LatchVal; } + NewPHI->eraseFromParent(); } // Fix up the outgoing values - we need to add a value for the iteration @@ -1003,14 +905,11 @@ llvm::gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, /// this provides a benefit, since the peeled off iterations, which account /// for the bulk of dynamic execution, can be further simplified by scalar /// optimizations. -bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, +bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC, bool PreserveLCSSA, ValueToValueMapTy &LVMap) { assert(PeelCount > 0 && "Attempt to peel out zero iterations?"); assert(canPeel(L) && "Attempt to peel a loop which is not peelable?"); - assert((!PeelLast || (canPeelLastIteration(*L, *SE) && PeelCount == 1)) && - "when peeling the last iteration, the loop must be supported and can " - "only peel a single iteration"); LoopBlocksDFS LoopBlocks(L); LoopBlocks.perform(LI); @@ -1045,99 +944,60 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, Function *F = Header->getParent(); - // Set up all the necessary basic blocks. - BasicBlock *InsertTop; - BasicBlock *InsertBot; - BasicBlock *NewPreHeader; - DenseMap<Instruction *, Value *> ExitValues; - if (PeelLast) { - // It is convenient to split the single exit block from the latch the - // into 3 parts - two blocks to anchor the peeled copy of the loop body, - // and a new final exit block. - - // Peeling the last iteration transforms. - // - // PreHeader: - // ... - // Header: - // LoopBody - // If (cond) goto Header - // Exit: - // - // into - // - // Header: - // LoopBody - // If (cond) goto Header - // InsertTop: - // LoopBody - // If (!cond) goto InsertBot - // InsertBot: - // Exit: - // ... - BasicBlock *Exit = L->getExitBlock(); - for (PHINode &P : Exit->phis()) - ExitValues[&P] = P.getIncomingValueForBlock(Latch); - - InsertTop = SplitEdge(Latch, Exit, &DT, LI); - InsertBot = SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI); - - InsertTop->setName(Exit->getName() + ".peel.begin"); - InsertBot->setName(Exit->getName() + ".peel.next"); - } else { - // It is convenient to split the preheader into 3 parts - two blocks to - // anchor the peeled copy of the loop body, and a new preheader for the - // "real" loop. - - // Peeling the first iteration transforms. - // - // PreHeader: - // ... - // Header: - // LoopBody - // If (cond) goto Header - // Exit: - // - // into - // - // InsertTop: - // LoopBody - // If (!cond) goto Exit - // InsertBot: - // NewPreHeader: - // ... - // Header: - // LoopBody - // If (cond) goto Header - // Exit: - // - // Each following iteration will split the current bottom anchor in two, - // and put the new copy of the loop body between these two blocks. That - // is, after peeling another iteration from the example above, we'll - // split InsertBot, and get: - // - // InsertTop: - // LoopBody - // If (!cond) goto Exit - // InsertBot: - // LoopBody - // If (!cond) goto Exit - // InsertBot.next: - // NewPreHeader: - // ... - // Header: - // LoopBody - // If (cond) goto Header - // Exit: - // - InsertTop = SplitEdge(PreHeader, Header, &DT, LI); - InsertBot = SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI); - NewPreHeader = SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI); - - InsertTop->setName(Header->getName() + ".peel.begin"); - InsertBot->setName(Header->getName() + ".peel.next"); - NewPreHeader->setName(PreHeader->getName() + ".peel.newph"); - } + // Set up all the necessary basic blocks. It is convenient to split the + // preheader into 3 parts - two blocks to anchor the peeled copy of the loop + // body, and a new preheader for the "real" loop. + + // Peeling the first iteration transforms. + // + // PreHeader: + // ... + // Header: + // LoopBody + // If (cond) goto Header + // Exit: + // + // into + // + // InsertTop: + // LoopBody + // If (!cond) goto Exit + // InsertBot: + // NewPreHeader: + // ... + // Header: + // LoopBody + // If (cond) goto Header + // Exit: + // + // Each following iteration will split the current bottom anchor in two, + // and put the new copy of the loop body between these two blocks. That is, + // after peeling another iteration from the example above, we'll split + // InsertBot, and get: + // + // InsertTop: + // LoopBody + // If (!cond) goto Exit + // InsertBot: + // LoopBody + // If (!cond) goto Exit + // InsertBot.next: + // NewPreHeader: + // ... + // Header: + // LoopBody + // If (cond) goto Header + // Exit: + + BasicBlock *InsertTop = SplitEdge(PreHeader, Header, &DT, LI); + BasicBlock *InsertBot = + SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI); + BasicBlock *NewPreHeader = + SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI); + + InsertTop->setName(Header->getName() + ".peel.begin"); + InsertBot->setName(Header->getName() + ".peel.next"); + NewPreHeader->setName(PreHeader->getName() + ".peel.newph"); Instruction *LatchTerm = cast<Instruction>(cast<BasicBlock>(Latch)->getTerminator()); @@ -1153,40 +1013,23 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes); // For each peeled-off iteration, make a copy of the loop. - ValueToValueMapTy VMap; for (unsigned Iter = 0; Iter < PeelCount; ++Iter) { SmallVector<BasicBlock *, 8> NewBlocks; + ValueToValueMapTy VMap; - cloneLoopBlocks(L, Iter, PeelLast, InsertTop, InsertBot, ExitEdges, - NewBlocks, LoopBlocks, VMap, LVMap, &DT, LI, + cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks, + LoopBlocks, VMap, LVMap, &DT, LI, LoopLocalNoAliasDeclScopes, *SE); // Remap to use values from the current iteration instead of the // previous one. remapInstructionsInBlocks(NewBlocks, VMap); - if (Iter == 0) { - if (PeelLast) { - // Adjust the exit condition so the loop exits one iteration early. - // For now we simply subtract one form the second operand of the - // exit condition. This relies on the peel count computation to - // check that this is actually legal. In particular, it ensures that - // the first operand of the compare is an AddRec with step 1 and we - // execute more than one iteration. - auto *Cmp = - cast<ICmpInst>(L->getLoopLatch()->getTerminator()->getOperand(0)); - IRBuilder B(Cmp); - Cmp->setOperand( - 1, B.CreateSub(Cmp->getOperand(1), - ConstantInt::get(Cmp->getOperand(1)->getType(), 1))); - } else { - // Update IDoms of the blocks reachable through exits. - for (auto BBIDom : NonLoopBlocksIDom) - DT.changeImmediateDominator(BBIDom.first, - cast<BasicBlock>(LVMap[BBIDom.second])); - } - } - + // Update IDoms of the blocks reachable through exits. + if (Iter == 0) + for (auto BBIDom : NonLoopBlocksIDom) + DT.changeImmediateDominator(BBIDom.first, + cast<BasicBlock>(LVMap[BBIDom.second])); #ifdef EXPENSIVE_CHECKS assert(DT.verify(DominatorTree::VerificationLevel::Fast)); #endif @@ -1209,24 +1052,16 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, F->end()); } - if (PeelLast) { - // Now adjust users of the original exit values by replacing them with the - // exit value from the peeled iteration. - for (const auto &[P, E] : ExitValues) - P->replaceAllUsesWith(VMap.lookup(E)); - formLCSSA(*L, DT, LI, SE); - } else { - // Now adjust the phi nodes in the loop header to get their initial values - // from the last peeled-off iteration instead of the preheader. - for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { - PHINode *PHI = cast<PHINode>(I); - Value *NewVal = PHI->getIncomingValueForBlock(Latch); - Instruction *LatchInst = dyn_cast<Instruction>(NewVal); - if (LatchInst && L->contains(LatchInst)) - NewVal = LVMap[LatchInst]; + // Now adjust the phi nodes in the loop header to get their initial values + // from the last peeled-off iteration instead of the preheader. + for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { + PHINode *PHI = cast<PHINode>(I); + Value *NewVal = PHI->getIncomingValueForBlock(Latch); + Instruction *LatchInst = dyn_cast<Instruction>(NewVal); + if (LatchInst && L->contains(LatchInst)) + NewVal = LVMap[LatchInst]; - PHI->setIncomingValueForBlock(NewPreHeader, NewVal); - } + PHI->setIncomingValueForBlock(NewPreHeader, NewVal); } for (const auto &[Term, Info] : Weights) { @@ -1255,7 +1090,6 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, simplifyLoop(L, &DT, LI, SE, AC, nullptr, PreserveLCSSA); NumPeeled++; - NumPeeledEnd += PeelLast; return true; } |