aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Utils
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Utils')
-rw-r--r--llvm/lib/Transforms/Utils/LoopPeel.cpp121
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp55
2 files changed, 71 insertions, 105 deletions
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index 735bad1..e1dcaa85 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -883,84 +883,6 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
}
}
-struct WeightInfo {
- // Weights for current iteration.
- SmallVector<uint32_t> Weights;
- // Weights to subtract after each iteration.
- const SmallVector<uint32_t> SubWeights;
-};
-
-/// Update the branch weights of an exiting block of a peeled-off loop
-/// iteration.
-/// Let F is a weight of the edge to continue (fallthrough) into the loop.
-/// Let E is a weight of the edge to an exit.
-/// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to
-/// go to exit.
-/// Then, Estimated ExitCount = F / E.
-/// For I-th (counting from 0) peeled off iteration we set the weights for
-/// the peeled exit as (EC - I, 1). It gives us reasonable distribution,
-/// The probability to go to exit 1/(EC-I) increases. At the same time
-/// the estimated exit count in the remainder loop reduces by I.
-/// To avoid dealing with division rounding we can just multiple both part
-/// of weights to E and use weight as (F - I * E, E).
-static void updateBranchWeights(Instruction *Term, WeightInfo &Info) {
- setBranchWeights(*Term, Info.Weights, /*IsExpected=*/false);
- for (auto [Idx, SubWeight] : enumerate(Info.SubWeights))
- if (SubWeight != 0)
- // Don't set the probability of taking the edge from latch to loop header
- // to less than 1:1 ratio (meaning Weight should not be lower than
- // SubWeight), as this could significantly reduce the loop's hotness,
- // which would be incorrect in the case of underestimating the trip count.
- Info.Weights[Idx] =
- Info.Weights[Idx] > SubWeight
- ? std::max(Info.Weights[Idx] - SubWeight, SubWeight)
- : SubWeight;
-}
-
-/// Initialize the weights for all exiting blocks.
-static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos,
- Loop *L) {
- SmallVector<BasicBlock *> ExitingBlocks;
- L->getExitingBlocks(ExitingBlocks);
- for (BasicBlock *ExitingBlock : ExitingBlocks) {
- Instruction *Term = ExitingBlock->getTerminator();
- SmallVector<uint32_t> Weights;
- if (!extractBranchWeights(*Term, Weights))
- continue;
-
- // See the comment on updateBranchWeights() for an explanation of what we
- // do here.
- uint32_t FallThroughWeights = 0;
- uint32_t ExitWeights = 0;
- for (auto [Succ, Weight] : zip(successors(Term), Weights)) {
- if (L->contains(Succ))
- FallThroughWeights += Weight;
- else
- ExitWeights += Weight;
- }
-
- // Don't try to update weights for degenerate case.
- if (FallThroughWeights == 0)
- continue;
-
- SmallVector<uint32_t> SubWeights;
- for (auto [Succ, Weight] : zip(successors(Term), Weights)) {
- if (!L->contains(Succ)) {
- // Exit weights stay the same.
- SubWeights.push_back(0);
- continue;
- }
-
- // Subtract exit weights on each iteration, distributed across all
- // fallthrough edges.
- double W = (double)Weight / (double)FallThroughWeights;
- SubWeights.push_back((uint32_t)(ExitWeights * W));
- }
-
- WeightInfos.insert({Term, {std::move(Weights), std::move(SubWeights)}});
- }
-}
-
/// Clones the body of the loop L, putting it between \p InsertTop and \p
/// InsertBot.
/// \param IterNumber The serial number of the iteration currently being
@@ -1332,11 +1254,6 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
Instruction *LatchTerm =
cast<Instruction>(cast<BasicBlock>(Latch)->getTerminator());
- // If we have branch weight information, we'll want to update it for the
- // newly created branches.
- DenseMap<Instruction *, WeightInfo> Weights;
- initBranchWeights(Weights, L);
-
// Identify what noalias metadata is inside the loop: if it is inside the
// loop, the associated metadata must be cloned for each iteration.
SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
@@ -1382,11 +1299,6 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
assert(DT.verify(DominatorTree::VerificationLevel::Fast));
#endif
- for (auto &[Term, Info] : Weights) {
- auto *TermCopy = cast<Instruction>(VMap[Term]);
- updateBranchWeights(TermCopy, Info);
- }
-
// Remove Loop metadata from the latch branch instruction
// because it is not the Loop's latch branch anymore.
auto *LatchTermCopy = cast<Instruction>(VMap[LatchTerm]);
@@ -1426,15 +1338,38 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
}
}
- for (const auto &[Term, Info] : Weights) {
- setBranchWeights(*Term, Info.Weights, /*IsExpected=*/false);
- }
-
// Update Metadata for count of peeled off iterations.
unsigned AlreadyPeeled = 0;
if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
AlreadyPeeled = *Peeled;
- addStringMetadataToLoop(L, PeeledCountMetaData, AlreadyPeeled + PeelCount);
+ unsigned TotalPeeled = AlreadyPeeled + PeelCount;
+ addStringMetadataToLoop(L, PeeledCountMetaData, TotalPeeled);
+
+ // Update metadata for the estimated trip count. The original branch weight
+ // metadata is already correct for both the remaining loop and the peeled loop
+ // iterations, so do not adjust it.
+ //
+ // For example, consider what happens when peeling 2 iterations from a loop
+ // with an estimated trip count of 10 and inserting them before the remaining
+ // loop. Each of the peeled iterations and each iteration in the remaining
+ // loop still has the same probability of exiting the *entire original* loop
+ // as it did when in the original loop, and thus it should still have the same
+ // branch weights. The peeled iterations' non-zero probabilities of exiting
+ // already appropriately reduce the probability of reaching the remaining
+ // iterations just as they did in the original loop. Trying to also adjust
+ // the remaining loop's branch weights to reflect its new trip count of 8 will
+ // erroneously further reduce its block frequencies. However, in case an
+ // analysis later needs to determine the trip count of the remaining loop
+ // while examining it in isolation without considering the probability of
+ // actually reaching it, we store the new trip count as separate metadata.
+ if (auto EstimatedTripCount = getLoopEstimatedTripCount(L)) {
+ unsigned EstimatedTripCountNew = *EstimatedTripCount;
+ if (EstimatedTripCountNew < TotalPeeled)
+ EstimatedTripCountNew = 0;
+ else
+ EstimatedTripCountNew -= TotalPeeled;
+ setLoopEstimatedTripCount(L, EstimatedTripCountNew);
+ }
if (Loop *ParentLoop = L->getParentLoop())
L = ParentLoop;
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 8bba634..48055ad 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -5152,14 +5152,18 @@ bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
if (ExtraCase && Values.size() < 2)
return false;
- // TODO: Preserve branch weight metadata, similarly to how
- // foldValueComparisonIntoPredecessors preserves it.
+ SmallVector<uint32_t> BranchWeights;
+ const bool HasProfile = !ProfcheckDisableMetadataFixes &&
+ extractBranchWeights(*BI, BranchWeights);
// Figure out which block is which destination.
BasicBlock *DefaultBB = BI->getSuccessor(1);
BasicBlock *EdgeBB = BI->getSuccessor(0);
- if (!TrueWhenEqual)
+ if (!TrueWhenEqual) {
std::swap(DefaultBB, EdgeBB);
+ if (HasProfile)
+ std::swap(BranchWeights[0], BranchWeights[1]);
+ }
BasicBlock *BB = BI->getParent();
@@ -5190,10 +5194,11 @@ bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
ExtraCase = Builder.CreateFreeze(ExtraCase);
- if (TrueWhenEqual)
- Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
- else
- Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
+ // We don't have any info about this condition.
+ auto *Br = TrueWhenEqual ? Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB)
+ : Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
+ setExplicitlyUnknownBranchWeightsIfProfiled(*Br, *NewBB->getParent(),
+ DEBUG_TYPE);
OldTI->eraseFromParent();
@@ -5220,6 +5225,17 @@ bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
// Create the new switch instruction now.
SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
+ if (HasProfile) {
+ // We know the weight of the default case. We don't know the weight of the
+ // other cases, but rather than completely lose profiling info, we split
+ // the remaining probability equally over them.
+ SmallVector<uint32_t> NewWeights(Values.size() + 1);
+ NewWeights[0] = BranchWeights[1]; // this is the default, and we swapped if
+ // TrueWhenEqual.
+ for (auto &V : drop_begin(NewWeights))
+ V = BranchWeights[0] / Values.size();
+ setBranchWeights(*New, NewWeights, /*IsExpected=*/false);
+ }
// Add all of the 'cases' to the switch instruction.
for (ConstantInt *Val : Values)
@@ -7211,6 +7227,7 @@ static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder,
Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
BranchInst *RangeCheckBranch = nullptr;
+ BranchInst *CondBranch = nullptr;
Builder.SetInsertPoint(SI);
const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
@@ -7225,6 +7242,7 @@ static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder,
TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
RangeCheckBranch =
Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
+ CondBranch = RangeCheckBranch;
if (DTU)
Updates.push_back({DominatorTree::Insert, BB, LookupBB});
}
@@ -7263,7 +7281,7 @@ static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder,
Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
Value *LoBit = Builder.CreateTrunc(
Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
- Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
+ CondBranch = Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
if (DTU) {
Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
@@ -7303,19 +7321,32 @@ static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder,
if (DTU)
Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
+ SmallVector<uint32_t> BranchWeights;
+ const bool HasBranchWeights = CondBranch && !ProfcheckDisableMetadataFixes &&
+ extractBranchWeights(*SI, BranchWeights);
+ uint64_t ToLookupWeight = 0;
+ uint64_t ToDefaultWeight = 0;
+
// Remove the switch.
SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
- for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
- BasicBlock *Succ = SI->getSuccessor(i);
+ for (unsigned I = 0, E = SI->getNumSuccessors(); I < E; ++I) {
+ BasicBlock *Succ = SI->getSuccessor(I);
- if (Succ == SI->getDefaultDest())
+ if (Succ == SI->getDefaultDest()) {
+ if (HasBranchWeights)
+ ToDefaultWeight += BranchWeights[I];
continue;
+ }
Succ->removePredecessor(BB);
if (DTU && RemovedSuccessors.insert(Succ).second)
Updates.push_back({DominatorTree::Delete, BB, Succ});
+ if (HasBranchWeights)
+ ToLookupWeight += BranchWeights[I];
}
SI->eraseFromParent();
-
+ if (HasBranchWeights)
+ setFittedBranchWeights(*CondBranch, {ToLookupWeight, ToDefaultWeight},
+ /*IsExpected=*/false);
if (DTU)
DTU->applyUpdates(Updates);