diff options
Diffstat (limited to 'llvm/lib/Transforms/Utils/LoopPeel.cpp')
-rw-r--r-- | llvm/lib/Transforms/Utils/LoopPeel.cpp | 83 |
1 files changed, 62 insertions, 21 deletions
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp index f348d24..bd025fd 100644 --- a/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -38,6 +38,7 @@ #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include <algorithm> #include <cassert> @@ -330,11 +331,7 @@ static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L, bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) { const SCEV *BTC = SE.getBackedgeTakenCount(&L); - // The loop must execute at least 2 iterations to guarantee that peeled - // iteration executes. - // TODO: Add checks during codegen. - if (isa<SCEVCouldNotCompute>(BTC) || - !SE.isKnownPredicate(CmpInst::ICMP_UGT, BTC, SE.getZero(BTC->getType()))) + if (isa<SCEVCouldNotCompute>(BTC)) return false; // Check if the exit condition of the loop can be adjusted by the peeling @@ -354,6 +351,7 @@ bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) { m_BasicBlock(Succ1), m_BasicBlock(Succ2))) && ((Pred == CmpInst::ICMP_EQ && Succ2 == L.getHeader()) || (Pred == CmpInst::ICMP_NE && Succ1 == L.getHeader())) && + Bound->getType()->isIntegerTy() && SE.isLoopInvariant(SE.getSCEV(Bound), &L) && match(SE.getSCEV(Inc), m_scev_AffineAddRec(m_SCEV(), m_scev_One(), m_SpecificLoop(&L))); @@ -364,12 +362,18 @@ bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) { /// is known at the second-to-last. static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred, const SCEVAddRecExpr *LeftAR, - const SCEV *RightSCEV, - ScalarEvolution &SE) { + const SCEV *RightSCEV, ScalarEvolution &SE, + const TargetTransformInfo &TTI) { if (!canPeelLastIteration(L, SE)) return false; const SCEV *BTC = SE.getBackedgeTakenCount(&L); + SCEVExpander Expander(SE, L.getHeader()->getDataLayout(), "loop-peel"); + if (!SE.isKnownNonZero(BTC) && + Expander.isHighCostExpansion(BTC, &L, SCEVCheapExpansionBudget, &TTI, + L.getLoopPredecessor()->getTerminator())) + return false; + const SCEV *ValAtLastIter = LeftAR->evaluateAtIteration(BTC, SE); const SCEV *ValAtSecondToLastIter = LeftAR->evaluateAtIteration( SE.getMinusSCEV(BTC, SE.getOne(BTC->getType())), SE); @@ -391,7 +395,8 @@ static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred, // .. // } static std::pair<unsigned, unsigned> -countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) { +countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE, + const TargetTransformInfo &TTI) { assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form"); unsigned DesiredPeelCount = 0; unsigned DesiredPeelCountLast = 0; @@ -479,7 +484,7 @@ countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) { const SCEV *Step = LeftAR->getStepRecurrence(SE); if (!PeelWhilePredicateIsKnown(NewPeelCount, IterVal, RightSCEV, Step, Pred)) { - if (shouldPeelLastIteration(L, Pred, LeftAR, RightSCEV, SE)) + if (shouldPeelLastIteration(L, Pred, LeftAR, RightSCEV, SE, TTI)) DesiredPeelCountLast = 1; return; } @@ -593,8 +598,8 @@ static bool violatesLegacyMultiExitLoopCheck(Loop *L) { void llvm::computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::PeelingPreferences &PP, unsigned TripCount, DominatorTree &DT, - ScalarEvolution &SE, AssumptionCache *AC, - unsigned Threshold) { + ScalarEvolution &SE, const TargetTransformInfo &TTI, + AssumptionCache *AC, unsigned Threshold) { assert(LoopSize > 0 && "Zero loop size is not allowed!"); // Save the PP.PeelCount value set by the target in // TTI.getPeelingPreferences or by the flag -unroll-peel-count. @@ -656,7 +661,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, } const auto &[CountToEliminateCmps, CountToEliminateCmpsLast] = - countToEliminateCompares(*L, MaxPeelCount, SE); + countToEliminateCompares(*L, MaxPeelCount, SE, TTI); DesiredPeelCount = std::max(DesiredPeelCount, CountToEliminateCmps); if (DesiredPeelCount == 0) @@ -822,7 +827,7 @@ static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos, /// instructions in the last peeled-off iteration. static void cloneLoopBlocks( Loop *L, unsigned IterNumber, bool PeelLast, BasicBlock *InsertTop, - BasicBlock *InsertBot, + BasicBlock *InsertBot, BasicBlock *OrigPreHeader, SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges, SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT, @@ -914,12 +919,22 @@ static void cloneLoopBlocks( // loop iteration. Since this copy is no longer part of the loop, we // resolve this statically: if (PeelLast) { - // For the last iteration, we use the value from the latch of the original - // loop directly. + // For the last iteration, we introduce new phis for each header phi in + // InsertTop, using the incoming value from the preheader for the original + // preheader (when skipping the main loop) and the incoming value from the + // latch for the latch (when continuing from the main loop). + IRBuilder<> B(InsertTop, InsertTop->getFirstNonPHIIt()); for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *NewPHI = cast<PHINode>(VMap[&*I]); - VMap[&*I] = NewPHI->getIncomingValueForBlock(Latch); + PHINode *PN = B.CreatePHI(NewPHI->getType(), 2); NewPHI->eraseFromParent(); + if (OrigPreHeader) + PN->addIncoming(cast<PHINode>(&*I)->getIncomingValueForBlock(PreHeader), + OrigPreHeader); + + PN->addIncoming(cast<PHINode>(&*I)->getIncomingValueForBlock(Latch), + Latch); + VMap[&*I] = PN; } } else { // For the first iteration, we use the value from the preheader directly. @@ -1053,7 +1068,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, // Set up all the necessary basic blocks. BasicBlock *InsertTop; BasicBlock *InsertBot; - BasicBlock *NewPreHeader; + BasicBlock *NewPreHeader = nullptr; DenseMap<Instruction *, Value *> ExitValues; if (PeelLast) { // It is convenient to split the single exit block from the latch the @@ -1084,11 +1099,34 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, for (PHINode &P : Exit->phis()) ExitValues[&P] = P.getIncomingValueForBlock(Latch); + const SCEV *BTC = SE->getBackedgeTakenCount(L); + InsertTop = SplitEdge(Latch, Exit, &DT, LI); InsertBot = SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI); InsertTop->setName(Exit->getName() + ".peel.begin"); InsertBot->setName(Exit->getName() + ".peel.next"); + NewPreHeader = nullptr; + + // If the original loop may only execute a single iteration we need to + // insert a trip count check and skip the original loop with the last + // iteration peeled off if necessary. + if (!SE->isKnownNonZero(BTC)) { + NewPreHeader = SplitEdge(PreHeader, Header, &DT, LI); + SCEVExpander Expander(*SE, Latch->getDataLayout(), "loop-peel"); + + BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); + Value *BTCValue = + Expander.expandCodeFor(BTC, BTC->getType(), PreHeaderBR); + IRBuilder<> B(PreHeaderBR); + Value *Cond = + B.CreateICmpNE(BTCValue, ConstantInt::get(BTCValue->getType(), 0)); + B.CreateCondBr(Cond, NewPreHeader, InsertTop); + PreHeaderBR->eraseFromParent(); + + // PreHeader now dominates InsertTop. + DT.changeImmediateDominator(InsertTop, PreHeader); + } } else { // It is convenient to split the preheader into 3 parts - two blocks to // anchor the peeled copy of the loop body, and a new preheader for the @@ -1162,8 +1200,9 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, for (unsigned Iter = 0; Iter < PeelCount; ++Iter) { SmallVector<BasicBlock *, 8> NewBlocks; - cloneLoopBlocks(L, Iter, PeelLast, InsertTop, InsertBot, ExitEdges, - NewBlocks, LoopBlocks, VMap, LVMap, &DT, LI, + cloneLoopBlocks(L, Iter, PeelLast, InsertTop, InsertBot, + NewPreHeader ? PreHeader : nullptr, ExitEdges, NewBlocks, + LoopBlocks, VMap, LVMap, &DT, LI, LoopLocalNoAliasDeclScopes, *SE); // Remap to use values from the current iteration instead of the @@ -1216,9 +1255,11 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, if (PeelLast) { // Now adjust users of the original exit values by replacing them with the - // exit value from the peeled iteration. - for (const auto &[P, E] : ExitValues) + // exit value from the peeled iteration and remove them. + for (const auto &[P, E] : ExitValues) { P->replaceAllUsesWith(isa<Constant>(E) ? E : &*VMap.lookup(E)); + P->eraseFromParent(); + } formLCSSA(*L, DT, LI, SE); } else { // Now adjust the phi nodes in the loop header to get their initial values |