diff options
Diffstat (limited to 'llvm/lib/Transforms/Utils')
-rw-r--r-- | llvm/lib/Transforms/Utils/LoopPeel.cpp | 121 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 55 |
2 files changed, 71 insertions, 105 deletions
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp index 735bad1..e1dcaa85 100644 --- a/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -883,84 +883,6 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, } } -struct WeightInfo { - // Weights for current iteration. - SmallVector<uint32_t> Weights; - // Weights to subtract after each iteration. - const SmallVector<uint32_t> SubWeights; -}; - -/// Update the branch weights of an exiting block of a peeled-off loop -/// iteration. -/// Let F is a weight of the edge to continue (fallthrough) into the loop. -/// Let E is a weight of the edge to an exit. -/// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to -/// go to exit. -/// Then, Estimated ExitCount = F / E. -/// For I-th (counting from 0) peeled off iteration we set the weights for -/// the peeled exit as (EC - I, 1). It gives us reasonable distribution, -/// The probability to go to exit 1/(EC-I) increases. At the same time -/// the estimated exit count in the remainder loop reduces by I. -/// To avoid dealing with division rounding we can just multiple both part -/// of weights to E and use weight as (F - I * E, E). -static void updateBranchWeights(Instruction *Term, WeightInfo &Info) { - setBranchWeights(*Term, Info.Weights, /*IsExpected=*/false); - for (auto [Idx, SubWeight] : enumerate(Info.SubWeights)) - if (SubWeight != 0) - // Don't set the probability of taking the edge from latch to loop header - // to less than 1:1 ratio (meaning Weight should not be lower than - // SubWeight), as this could significantly reduce the loop's hotness, - // which would be incorrect in the case of underestimating the trip count. - Info.Weights[Idx] = - Info.Weights[Idx] > SubWeight - ? std::max(Info.Weights[Idx] - SubWeight, SubWeight) - : SubWeight; -} - -/// Initialize the weights for all exiting blocks. -static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos, - Loop *L) { - SmallVector<BasicBlock *> ExitingBlocks; - L->getExitingBlocks(ExitingBlocks); - for (BasicBlock *ExitingBlock : ExitingBlocks) { - Instruction *Term = ExitingBlock->getTerminator(); - SmallVector<uint32_t> Weights; - if (!extractBranchWeights(*Term, Weights)) - continue; - - // See the comment on updateBranchWeights() for an explanation of what we - // do here. - uint32_t FallThroughWeights = 0; - uint32_t ExitWeights = 0; - for (auto [Succ, Weight] : zip(successors(Term), Weights)) { - if (L->contains(Succ)) - FallThroughWeights += Weight; - else - ExitWeights += Weight; - } - - // Don't try to update weights for degenerate case. - if (FallThroughWeights == 0) - continue; - - SmallVector<uint32_t> SubWeights; - for (auto [Succ, Weight] : zip(successors(Term), Weights)) { - if (!L->contains(Succ)) { - // Exit weights stay the same. - SubWeights.push_back(0); - continue; - } - - // Subtract exit weights on each iteration, distributed across all - // fallthrough edges. - double W = (double)Weight / (double)FallThroughWeights; - SubWeights.push_back((uint32_t)(ExitWeights * W)); - } - - WeightInfos.insert({Term, {std::move(Weights), std::move(SubWeights)}}); - } -} - /// Clones the body of the loop L, putting it between \p InsertTop and \p /// InsertBot. /// \param IterNumber The serial number of the iteration currently being @@ -1332,11 +1254,6 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, Instruction *LatchTerm = cast<Instruction>(cast<BasicBlock>(Latch)->getTerminator()); - // If we have branch weight information, we'll want to update it for the - // newly created branches. - DenseMap<Instruction *, WeightInfo> Weights; - initBranchWeights(Weights, L); - // Identify what noalias metadata is inside the loop: if it is inside the // loop, the associated metadata must be cloned for each iteration. SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes; @@ -1382,11 +1299,6 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, assert(DT.verify(DominatorTree::VerificationLevel::Fast)); #endif - for (auto &[Term, Info] : Weights) { - auto *TermCopy = cast<Instruction>(VMap[Term]); - updateBranchWeights(TermCopy, Info); - } - // Remove Loop metadata from the latch branch instruction // because it is not the Loop's latch branch anymore. auto *LatchTermCopy = cast<Instruction>(VMap[LatchTerm]); @@ -1426,15 +1338,38 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, } } - for (const auto &[Term, Info] : Weights) { - setBranchWeights(*Term, Info.Weights, /*IsExpected=*/false); - } - // Update Metadata for count of peeled off iterations. unsigned AlreadyPeeled = 0; if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData)) AlreadyPeeled = *Peeled; - addStringMetadataToLoop(L, PeeledCountMetaData, AlreadyPeeled + PeelCount); + unsigned TotalPeeled = AlreadyPeeled + PeelCount; + addStringMetadataToLoop(L, PeeledCountMetaData, TotalPeeled); + + // Update metadata for the estimated trip count. The original branch weight + // metadata is already correct for both the remaining loop and the peeled loop + // iterations, so do not adjust it. + // + // For example, consider what happens when peeling 2 iterations from a loop + // with an estimated trip count of 10 and inserting them before the remaining + // loop. Each of the peeled iterations and each iteration in the remaining + // loop still has the same probability of exiting the *entire original* loop + // as it did when in the original loop, and thus it should still have the same + // branch weights. The peeled iterations' non-zero probabilities of exiting + // already appropriately reduce the probability of reaching the remaining + // iterations just as they did in the original loop. Trying to also adjust + // the remaining loop's branch weights to reflect its new trip count of 8 will + // erroneously further reduce its block frequencies. However, in case an + // analysis later needs to determine the trip count of the remaining loop + // while examining it in isolation without considering the probability of + // actually reaching it, we store the new trip count as separate metadata. + if (auto EstimatedTripCount = getLoopEstimatedTripCount(L)) { + unsigned EstimatedTripCountNew = *EstimatedTripCount; + if (EstimatedTripCountNew < TotalPeeled) + EstimatedTripCountNew = 0; + else + EstimatedTripCountNew -= TotalPeeled; + setLoopEstimatedTripCount(L, EstimatedTripCountNew); + } if (Loop *ParentLoop = L->getParentLoop()) L = ParentLoop; diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 8bba634..48055ad 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -5152,14 +5152,18 @@ bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI, if (ExtraCase && Values.size() < 2) return false; - // TODO: Preserve branch weight metadata, similarly to how - // foldValueComparisonIntoPredecessors preserves it. + SmallVector<uint32_t> BranchWeights; + const bool HasProfile = !ProfcheckDisableMetadataFixes && + extractBranchWeights(*BI, BranchWeights); // Figure out which block is which destination. BasicBlock *DefaultBB = BI->getSuccessor(1); BasicBlock *EdgeBB = BI->getSuccessor(0); - if (!TrueWhenEqual) + if (!TrueWhenEqual) { std::swap(DefaultBB, EdgeBB); + if (HasProfile) + std::swap(BranchWeights[0], BranchWeights[1]); + } BasicBlock *BB = BI->getParent(); @@ -5190,10 +5194,11 @@ bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI, if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr)) ExtraCase = Builder.CreateFreeze(ExtraCase); - if (TrueWhenEqual) - Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB); - else - Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB); + // We don't have any info about this condition. + auto *Br = TrueWhenEqual ? Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB) + : Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB); + setExplicitlyUnknownBranchWeightsIfProfiled(*Br, *NewBB->getParent(), + DEBUG_TYPE); OldTI->eraseFromParent(); @@ -5220,6 +5225,17 @@ bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI, // Create the new switch instruction now. SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size()); + if (HasProfile) { + // We know the weight of the default case. We don't know the weight of the + // other cases, but rather than completely lose profiling info, we split + // the remaining probability equally over them. + SmallVector<uint32_t> NewWeights(Values.size() + 1); + NewWeights[0] = BranchWeights[1]; // this is the default, and we swapped if + // TrueWhenEqual. + for (auto &V : drop_begin(NewWeights)) + V = BranchWeights[0] / Values.size(); + setBranchWeights(*New, NewWeights, /*IsExpected=*/false); + } // Add all of the 'cases' to the switch instruction. for (ConstantInt *Val : Values) @@ -7211,6 +7227,7 @@ static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder, Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest); BranchInst *RangeCheckBranch = nullptr; + BranchInst *CondBranch = nullptr; Builder.SetInsertPoint(SI); const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize); @@ -7225,6 +7242,7 @@ static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder, TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize)); RangeCheckBranch = Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest()); + CondBranch = RangeCheckBranch; if (DTU) Updates.push_back({DominatorTree::Insert, BB, LookupBB}); } @@ -7263,7 +7281,7 @@ static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder, Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted"); Value *LoBit = Builder.CreateTrunc( Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit"); - Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest()); + CondBranch = Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest()); if (DTU) { Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB}); Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()}); @@ -7303,19 +7321,32 @@ static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder, if (DTU) Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest}); + SmallVector<uint32_t> BranchWeights; + const bool HasBranchWeights = CondBranch && !ProfcheckDisableMetadataFixes && + extractBranchWeights(*SI, BranchWeights); + uint64_t ToLookupWeight = 0; + uint64_t ToDefaultWeight = 0; + // Remove the switch. SmallPtrSet<BasicBlock *, 8> RemovedSuccessors; - for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) { - BasicBlock *Succ = SI->getSuccessor(i); + for (unsigned I = 0, E = SI->getNumSuccessors(); I < E; ++I) { + BasicBlock *Succ = SI->getSuccessor(I); - if (Succ == SI->getDefaultDest()) + if (Succ == SI->getDefaultDest()) { + if (HasBranchWeights) + ToDefaultWeight += BranchWeights[I]; continue; + } Succ->removePredecessor(BB); if (DTU && RemovedSuccessors.insert(Succ).second) Updates.push_back({DominatorTree::Delete, BB, Succ}); + if (HasBranchWeights) + ToLookupWeight += BranchWeights[I]; } SI->eraseFromParent(); - + if (HasBranchWeights) + setFittedBranchWeights(*CondBranch, {ToLookupWeight, ToDefaultWeight}, + /*IsExpected=*/false); if (DTU) DTU->applyUpdates(Updates); |