aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Analysis/BranchProbabilityInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Analysis/BranchProbabilityInfo.cpp')
-rw-r--r--llvm/lib/Analysis/BranchProbabilityInfo.cpp645
1 files changed, 349 insertions, 296 deletions
diff --git a/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/llvm/lib/Analysis/BranchProbabilityInfo.cpp
index d4cb46c..884ba48 100644
--- a/llvm/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/llvm/lib/Analysis/BranchProbabilityInfo.cpp
@@ -61,6 +61,7 @@ INITIALIZE_PASS_BEGIN(BranchProbabilityInfoWrapperPass, "branch-prob",
"Branch Probability Analysis", false, true)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
INITIALIZE_PASS_END(BranchProbabilityInfoWrapperPass, "branch-prob",
"Branch Probability Analysis", false, true)
@@ -95,8 +96,6 @@ char BranchProbabilityInfoWrapperPass::ID = 0;
// Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125
static const uint32_t LBH_TAKEN_WEIGHT = 124;
static const uint32_t LBH_NONTAKEN_WEIGHT = 4;
-// Unlikely edges within a loop are half as likely as other edges
-static const uint32_t LBH_UNLIKELY_WEIGHT = 62;
/// Unreachable-terminating branch taken probability.
///
@@ -105,20 +104,6 @@ static const uint32_t LBH_UNLIKELY_WEIGHT = 62;
/// All reachable probability will proportionally share the remaining part.
static const BranchProbability UR_TAKEN_PROB = BranchProbability::getRaw(1);
-/// Weight for a branch taken going into a cold block.
-///
-/// This is the weight for a branch taken toward a block marked
-/// cold. A block is marked cold if it's postdominated by a
-/// block containing a call to a cold function. Cold functions
-/// are those marked with attribute 'cold'.
-static const uint32_t CC_TAKEN_WEIGHT = 4;
-
-/// Weight for a branch not-taken into a cold block.
-///
-/// This is the weight for a branch not taken toward a block marked
-/// cold.
-static const uint32_t CC_NONTAKEN_WEIGHT = 64;
-
static const uint32_t PH_TAKEN_WEIGHT = 20;
static const uint32_t PH_NONTAKEN_WEIGHT = 12;
@@ -135,18 +120,26 @@ static const uint32_t FPH_ORD_WEIGHT = 1024 * 1024 - 1;
/// exceptional case, so the result is unlikely.
static const uint32_t FPH_UNO_WEIGHT = 1;
-/// Invoke-terminating normal branch taken weight
-///
-/// This is the weight for branching to the normal destination of an invoke
-/// instruction. We expect this to happen most of the time. Set the weight to an
-/// absurdly high value so that nested loops subsume it.
-static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1;
-
-/// Invoke-terminating normal branch not-taken weight.
-///
-/// This is the weight for branching to the unwind destination of an invoke
-/// instruction. This is essentially never taken.
-static const uint32_t IH_NONTAKEN_WEIGHT = 1;
+/// Set of dedicated "absolute" execution weights for a block. These weights are
+/// meaningful relative to each other and their derivatives only.
+enum class BlockExecWeight : std::uint32_t {
+ /// Special weight used for cases with exact zero probability.
+ ZERO = 0x0,
+ /// Minimal possible non zero weight.
+ LOWEST_NON_ZERO = 0x1,
+ /// Weight to an 'unreachable' block.
+ UNREACHABLE = ZERO,
+ /// Weight to a block containing non returning call.
+ NORETURN = LOWEST_NON_ZERO,
+ /// Weight to 'unwind' block of an invoke instruction.
+ UNWIND = LOWEST_NON_ZERO,
+ /// Weight to a 'cold' block. Cold blocks are the ones containing calls marked
+ /// with attribute 'cold'.
+ COLD = 0xffff,
+ /// Default weight is used in cases when there is no dedicated execution
+ /// weight set. It is not propagated through the domination line either.
+ DEFAULT = 0xfffff
+};
BranchProbabilityInfo::SccInfo::SccInfo(const Function &F) {
// Record SCC numbers of blocks in the CFG to identify irreducible loops.
@@ -306,133 +299,6 @@ void BranchProbabilityInfo::getLoopExitBlocks(
}
}
-static void UpdatePDTWorklist(const BasicBlock *BB, PostDominatorTree *PDT,
- SmallVectorImpl<const BasicBlock *> &WorkList,
- SmallPtrSetImpl<const BasicBlock *> &TargetSet) {
- SmallVector<BasicBlock *, 8> Descendants;
- SmallPtrSet<const BasicBlock *, 16> NewItems;
-
- PDT->getDescendants(const_cast<BasicBlock *>(BB), Descendants);
- for (auto *BB : Descendants)
- if (TargetSet.insert(BB).second)
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- if (!TargetSet.count(*PI))
- NewItems.insert(*PI);
- WorkList.insert(WorkList.end(), NewItems.begin(), NewItems.end());
-}
-
-/// Compute a set of basic blocks that are post-dominated by unreachables.
-void BranchProbabilityInfo::computePostDominatedByUnreachable(
- const Function &F, PostDominatorTree *PDT) {
- SmallVector<const BasicBlock *, 8> WorkList;
- for (auto &BB : F) {
- const Instruction *TI = BB.getTerminator();
- if (TI->getNumSuccessors() == 0) {
- if (isa<UnreachableInst>(TI) ||
- // If this block is terminated by a call to
- // @llvm.experimental.deoptimize then treat it like an unreachable
- // since the @llvm.experimental.deoptimize call is expected to
- // practically never execute.
- BB.getTerminatingDeoptimizeCall())
- UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByUnreachable);
- }
- }
-
- while (!WorkList.empty()) {
- const BasicBlock *BB = WorkList.pop_back_val();
- if (PostDominatedByUnreachable.count(BB))
- continue;
- // If the terminator is an InvokeInst, check only the normal destination
- // block as the unwind edge of InvokeInst is also very unlikely taken.
- if (auto *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
- if (PostDominatedByUnreachable.count(II->getNormalDest()))
- UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable);
- }
- // If all the successors are unreachable, BB is unreachable as well.
- else if (!successors(BB).empty() &&
- llvm::all_of(successors(BB), [this](const BasicBlock *Succ) {
- return PostDominatedByUnreachable.count(Succ);
- }))
- UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable);
- }
-}
-
-/// compute a set of basic blocks that are post-dominated by ColdCalls.
-void BranchProbabilityInfo::computePostDominatedByColdCall(
- const Function &F, PostDominatorTree *PDT) {
- SmallVector<const BasicBlock *, 8> WorkList;
- for (auto &BB : F)
- for (auto &I : BB)
- if (const CallInst *CI = dyn_cast<CallInst>(&I))
- if (CI->hasFnAttr(Attribute::Cold))
- UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByColdCall);
-
- while (!WorkList.empty()) {
- const BasicBlock *BB = WorkList.pop_back_val();
-
- // If the terminator is an InvokeInst, check only the normal destination
- // block as the unwind edge of InvokeInst is also very unlikely taken.
- if (auto *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
- if (PostDominatedByColdCall.count(II->getNormalDest()))
- UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall);
- }
- // If all of successor are post dominated then BB is also done.
- else if (!successors(BB).empty() &&
- llvm::all_of(successors(BB), [this](const BasicBlock *Succ) {
- return PostDominatedByColdCall.count(Succ);
- }))
- UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall);
- }
-}
-
-/// Calculate edge weights for successors lead to unreachable.
-///
-/// Predict that a successor which leads necessarily to an
-/// unreachable-terminated block as extremely unlikely.
-bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) {
- const Instruction *TI = BB->getTerminator();
- (void) TI;
- assert(TI->getNumSuccessors() > 1 && "expected more than one successor!");
- assert(!isa<InvokeInst>(TI) &&
- "Invokes should have already been handled by calcInvokeHeuristics");
-
- SmallVector<unsigned, 4> UnreachableEdges;
- SmallVector<unsigned, 4> ReachableEdges;
-
- for (const_succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I)
- if (PostDominatedByUnreachable.count(*I))
- UnreachableEdges.push_back(I.getSuccessorIndex());
- else
- ReachableEdges.push_back(I.getSuccessorIndex());
-
- // Skip probabilities if all were reachable.
- if (UnreachableEdges.empty())
- return false;
-
- SmallVector<BranchProbability, 4> EdgeProbabilities(
- BB->getTerminator()->getNumSuccessors(), BranchProbability::getUnknown());
- if (ReachableEdges.empty()) {
- BranchProbability Prob(1, UnreachableEdges.size());
- for (unsigned SuccIdx : UnreachableEdges)
- EdgeProbabilities[SuccIdx] = Prob;
- setEdgeProbability(BB, EdgeProbabilities);
- return true;
- }
-
- auto UnreachableProb = UR_TAKEN_PROB;
- auto ReachableProb =
- (BranchProbability::getOne() - UR_TAKEN_PROB * UnreachableEdges.size()) /
- ReachableEdges.size();
-
- for (unsigned SuccIdx : UnreachableEdges)
- EdgeProbabilities[SuccIdx] = UnreachableProb;
- for (unsigned SuccIdx : ReachableEdges)
- EdgeProbabilities[SuccIdx] = ReachableProb;
-
- setEdgeProbability(BB, EdgeProbabilities);
- return true;
-}
-
// Propagate existing explicit probabilities from either profile data or
// 'expect' intrinsic processing. Examine metadata against unreachable
// heuristic. The probability of the edge coming to unreachable block is
@@ -473,7 +339,12 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
"Too many bits for uint32_t");
Weights.push_back(Weight->getZExtValue());
WeightSum += Weights.back();
- if (PostDominatedByUnreachable.count(TI->getSuccessor(I - 1)))
+ const LoopBlock SrcLoopBB = getLoopBlock(BB);
+ const LoopBlock DstLoopBB = getLoopBlock(TI->getSuccessor(I - 1));
+ auto EstimatedWeight = getEstimatedEdgeWeight({SrcLoopBB, DstLoopBB});
+ if (EstimatedWeight &&
+ EstimatedWeight.getValue() <=
+ static_cast<uint32_t>(BlockExecWeight::UNREACHABLE))
UnreachableIdxs.push_back(I - 1);
else
ReachableIdxs.push_back(I - 1);
@@ -578,60 +449,6 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
return true;
}
-/// Calculate edge weights for edges leading to cold blocks.
-///
-/// A cold block is one post-dominated by a block with a call to a
-/// cold function. Those edges are unlikely to be taken, so we give
-/// them relatively low weight.
-///
-/// Return true if we could compute the weights for cold edges.
-/// Return false, otherwise.
-bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) {
- const Instruction *TI = BB->getTerminator();
- (void) TI;
- assert(TI->getNumSuccessors() > 1 && "expected more than one successor!");
- assert(!isa<InvokeInst>(TI) &&
- "Invokes should have already been handled by calcInvokeHeuristics");
-
- // Determine which successors are post-dominated by a cold block.
- SmallVector<unsigned, 4> ColdEdges;
- SmallVector<unsigned, 4> NormalEdges;
- for (const_succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I)
- if (PostDominatedByColdCall.count(*I))
- ColdEdges.push_back(I.getSuccessorIndex());
- else
- NormalEdges.push_back(I.getSuccessorIndex());
-
- // Skip probabilities if no cold edges.
- if (ColdEdges.empty())
- return false;
-
- SmallVector<BranchProbability, 4> EdgeProbabilities(
- BB->getTerminator()->getNumSuccessors(), BranchProbability::getUnknown());
- if (NormalEdges.empty()) {
- BranchProbability Prob(1, ColdEdges.size());
- for (unsigned SuccIdx : ColdEdges)
- EdgeProbabilities[SuccIdx] = Prob;
- setEdgeProbability(BB, EdgeProbabilities);
- return true;
- }
-
- auto ColdProb = BranchProbability::getBranchProbability(
- CC_TAKEN_WEIGHT,
- (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) * uint64_t(ColdEdges.size()));
- auto NormalProb = BranchProbability::getBranchProbability(
- CC_NONTAKEN_WEIGHT,
- (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) * uint64_t(NormalEdges.size()));
-
- for (unsigned SuccIdx : ColdEdges)
- EdgeProbabilities[SuccIdx] = ColdProb;
- for (unsigned SuccIdx : NormalEdges)
- EdgeProbabilities[SuccIdx] = NormalProb;
-
- setEdgeProbability(BB, EdgeProbabilities);
- return true;
-}
-
// Calculate Edge Weights using "Pointer Heuristics". Predict a comparison
// between two pointer or pointer and NULL will fail.
bool BranchProbabilityInfo::calcPointerHeuristics(const BasicBlock *BB) {
@@ -775,81 +592,324 @@ computeUnlikelySuccessors(const BasicBlock *BB, Loop *L,
}
}
-// Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges
-// as taken, exiting edges as not-taken.
-bool BranchProbabilityInfo::calcLoopBranchHeuristics(const BasicBlock *BB,
- const LoopInfo &LI) {
- LoopBlock LB(BB, LI, *SccI.get());
- if (!LB.belongsToLoop())
+Optional<uint32_t>
+BranchProbabilityInfo::getEstimatedBlockWeight(const BasicBlock *BB) const {
+ auto WeightIt = EstimatedBlockWeight.find(BB);
+ if (WeightIt == EstimatedBlockWeight.end())
+ return None;
+ return WeightIt->second;
+}
+
+Optional<uint32_t>
+BranchProbabilityInfo::getEstimatedLoopWeight(const LoopData &L) const {
+ auto WeightIt = EstimatedLoopWeight.find(L);
+ if (WeightIt == EstimatedLoopWeight.end())
+ return None;
+ return WeightIt->second;
+}
+
+Optional<uint32_t>
+BranchProbabilityInfo::getEstimatedEdgeWeight(const LoopEdge &Edge) const {
+ // For edges entering a loop take weight of a loop rather than an individual
+ // block in the loop.
+ return isLoopEnteringEdge(Edge)
+ ? getEstimatedLoopWeight(Edge.second.getLoopData())
+ : getEstimatedBlockWeight(Edge.second.getBlock());
+}
+
+template <class IterT>
+Optional<uint32_t> BranchProbabilityInfo::getMaxEstimatedEdgeWeight(
+ const LoopBlock &SrcLoopBB, iterator_range<IterT> Successors) const {
+ SmallVector<uint32_t, 4> Weights;
+ Optional<uint32_t> MaxWeight;
+ for (const BasicBlock *DstBB : Successors) {
+ const LoopBlock DstLoopBB = getLoopBlock(DstBB);
+ auto Weight = getEstimatedEdgeWeight({SrcLoopBB, DstLoopBB});
+
+ if (!Weight)
+ return None;
+
+ if (!MaxWeight || MaxWeight.getValue() < Weight.getValue())
+ MaxWeight = Weight;
+ }
+
+ return MaxWeight;
+}
+
+// Updates \p LoopBB's weight and returns true. If \p LoopBB has already
+// an associated weight it is unchanged and false is returned.
+//
+// Please note by the algorithm the weight is not expected to change once set
+// thus 'false' status is used to track visited blocks.
+bool BranchProbabilityInfo::updateEstimatedBlockWeight(
+ LoopBlock &LoopBB, uint32_t BBWeight,
+ SmallVectorImpl<BasicBlock *> &BlockWorkList,
+ SmallVectorImpl<LoopBlock> &LoopWorkList) {
+ BasicBlock *BB = LoopBB.getBlock();
+
+ // In general, weight is assigned to a block when it has final value and
+ // can't/shouldn't be changed. However, there are cases when a block
+ // inherently has several (possibly "contradicting") weights. For example,
+ // "unwind" block may also contain "cold" call. In that case the first
+ // set weight is favored and all consequent weights are ignored.
+ if (!EstimatedBlockWeight.insert({BB, BBWeight}).second)
return false;
- SmallPtrSet<const BasicBlock*, 8> UnlikelyBlocks;
- if (LB.getLoop())
- computeUnlikelySuccessors(BB, LB.getLoop(), UnlikelyBlocks);
+ for (BasicBlock *PredBlock : predecessors(BB)) {
+ LoopBlock PredLoop = getLoopBlock(PredBlock);
+ // Add affected block/loop to a working list.
+ if (isLoopExitingEdge({PredLoop, LoopBB})) {
+ if (!EstimatedLoopWeight.count(PredLoop.getLoopData()))
+ LoopWorkList.push_back(PredLoop);
+ } else if (!EstimatedBlockWeight.count(PredBlock))
+ BlockWorkList.push_back(PredBlock);
+ }
+ return true;
+}
- SmallVector<unsigned, 8> BackEdges;
- SmallVector<unsigned, 8> ExitingEdges;
- SmallVector<unsigned, 8> InEdges; // Edges from header to the loop.
- SmallVector<unsigned, 8> UnlikelyEdges;
+// Starting from \p BB traverse through dominator blocks and assign \p BBWeight
+// to all such blocks that are post dominated by \BB. In other words to all
+// blocks that the one is executed if and only if another one is executed.
+// Importantly, we skip loops here for two reasons. First weights of blocks in
+// a loop should be scaled by trip count (yet possibly unknown). Second there is
+// no any value in doing that because that doesn't give any additional
+// information regarding distribution of probabilities inside the loop.
+// Exception is loop 'enter' and 'exit' edges that are handled in a special way
+// at calcEstimatedHeuristics.
+//
+// In addition, \p WorkList is populated with basic blocks if at leas one
+// successor has updated estimated weight.
+void BranchProbabilityInfo::propagateEstimatedBlockWeight(
+ const LoopBlock &LoopBB, DominatorTree *DT, PostDominatorTree *PDT,
+ uint32_t BBWeight, SmallVectorImpl<BasicBlock *> &BlockWorkList,
+ SmallVectorImpl<LoopBlock> &LoopWorkList) {
+ const BasicBlock *BB = LoopBB.getBlock();
+ const auto *DTStartNode = DT->getNode(BB);
+ const auto *PDTStartNode = PDT->getNode(BB);
+
+ // TODO: Consider propagating weight down the domination line as well.
+ for (const auto *DTNode = DTStartNode; DTNode != nullptr;
+ DTNode = DTNode->getIDom()) {
+ auto *DomBB = DTNode->getBlock();
+ // Consider blocks which lie on one 'line'.
+ if (!PDT->dominates(PDTStartNode, PDT->getNode(DomBB)))
+ // If BB doesn't post dominate DomBB it will not post dominate dominators
+ // of DomBB as well.
+ break;
- for (const_succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
- LoopBlock SuccLB(*I, LI, *SccI.get());
- LoopEdge Edge(LB, SuccLB);
- bool IsUnlikelyEdge = LB.getLoop() && UnlikelyBlocks.contains(*I);
-
- if (IsUnlikelyEdge)
- UnlikelyEdges.push_back(I.getSuccessorIndex());
- else if (isLoopExitingEdge(Edge))
- ExitingEdges.push_back(I.getSuccessorIndex());
- else if (isLoopBackEdge(Edge))
- BackEdges.push_back(I.getSuccessorIndex());
- else {
- InEdges.push_back(I.getSuccessorIndex());
+ LoopBlock DomLoopBB = getLoopBlock(DomBB);
+ const LoopEdge Edge{DomLoopBB, LoopBB};
+ // Don't propagate weight to blocks belonging to different loops.
+ if (!isLoopEnteringExitingEdge(Edge)) {
+ if (!updateEstimatedBlockWeight(DomLoopBB, BBWeight, BlockWorkList,
+ LoopWorkList))
+ // If DomBB has weight set then all it's predecessors are already
+ // processed (since we propagate weight up to the top of IR each time).
+ break;
+ } else if (isLoopExitingEdge(Edge)) {
+ LoopWorkList.push_back(DomLoopBB);
}
}
+}
+
+Optional<uint32_t> BranchProbabilityInfo::getInitialEstimatedBlockWeight(
+ const BasicBlock *BB) {
+ // Returns true if \p BB has call marked with "NoReturn" attribute.
+ auto hasNoReturn = [&](const BasicBlock *BB) {
+ for (const auto &I : reverse(*BB))
+ if (const CallInst *CI = dyn_cast<CallInst>(&I))
+ if (CI->hasFnAttr(Attribute::NoReturn))
+ return true;
- if (BackEdges.empty() && ExitingEdges.empty() && UnlikelyEdges.empty())
return false;
+ };
+
+ // Important note regarding the order of checks. They are ordered by weight
+ // from lowest to highest. Doing that allows to avoid "unstable" results
+ // when several conditions heuristics can be applied simultaneously.
+ if (isa<UnreachableInst>(BB->getTerminator()) ||
+ // If this block is terminated by a call to
+ // @llvm.experimental.deoptimize then treat it like an unreachable
+ // since it is expected to practically never execute.
+ // TODO: Should we actually treat as never returning call?
+ BB->getTerminatingDeoptimizeCall())
+ return hasNoReturn(BB)
+ ? static_cast<uint32_t>(BlockExecWeight::NORETURN)
+ : static_cast<uint32_t>(BlockExecWeight::UNREACHABLE);
+
+ // Check if the block is 'unwind' handler of some invoke instruction.
+ for (const auto *Pred : predecessors(BB))
+ if (Pred)
+ if (const auto *II = dyn_cast<InvokeInst>(Pred->getTerminator()))
+ if (II->getUnwindDest() == BB)
+ return static_cast<uint32_t>(BlockExecWeight::UNWIND);
+
+ // Check if the block contains 'cold' call.
+ for (const auto &I : *BB)
+ if (const CallInst *CI = dyn_cast<CallInst>(&I))
+ if (CI->hasFnAttr(Attribute::Cold))
+ return static_cast<uint32_t>(BlockExecWeight::COLD);
+
+ return None;
+}
- // Collect the sum of probabilities of back-edges/in-edges/exiting-edges, and
- // normalize them so that they sum up to one.
- unsigned Denom = (BackEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) +
- (InEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) +
- (UnlikelyEdges.empty() ? 0 : LBH_UNLIKELY_WEIGHT) +
- (ExitingEdges.empty() ? 0 : LBH_NONTAKEN_WEIGHT);
+// Does RPO traversal over all blocks in \p F and assigns weights to
+// 'unreachable', 'noreturn', 'cold', 'unwind' blocks. In addition it does its
+// best to propagate the weight to up/down the IR.
+void BranchProbabilityInfo::computeEestimateBlockWeight(
+ const Function &F, DominatorTree *DT, PostDominatorTree *PDT) {
+ SmallVector<BasicBlock *, 8> BlockWorkList;
+ SmallVector<LoopBlock, 8> LoopWorkList;
+
+ // By doing RPO we make sure that all predecessors already have weights
+ // calculated before visiting theirs successors.
+ ReversePostOrderTraversal<const Function *> RPOT(&F);
+ for (const auto *BB : RPOT)
+ if (auto BBWeight = getInitialEstimatedBlockWeight(BB))
+ // If we were able to find estimated weight for the block set it to this
+ // block and propagate up the IR.
+ propagateEstimatedBlockWeight(getLoopBlock(BB), DT, PDT,
+ BBWeight.getValue(), BlockWorkList,
+ LoopWorkList);
+
+ // BlockWorklist/LoopWorkList contains blocks/loops with at least one
+ // successor/exit having estimated weight. Try to propagate weight to such
+ // blocks/loops from successors/exits.
+ // Process loops and blocks. Order is not important.
+ do {
+ while (!LoopWorkList.empty()) {
+ const LoopBlock LoopBB = LoopWorkList.pop_back_val();
+
+ if (EstimatedLoopWeight.count(LoopBB.getLoopData()))
+ continue;
- SmallVector<BranchProbability, 4> EdgeProbabilities(
- BB->getTerminator()->getNumSuccessors(), BranchProbability::getUnknown());
- if (uint32_t numBackEdges = BackEdges.size()) {
- BranchProbability TakenProb = BranchProbability(LBH_TAKEN_WEIGHT, Denom);
- auto Prob = TakenProb / numBackEdges;
- for (unsigned SuccIdx : BackEdges)
- EdgeProbabilities[SuccIdx] = Prob;
- }
+ SmallVector<BasicBlock *, 4> Exits;
+ getLoopExitBlocks(LoopBB, Exits);
+ auto LoopWeight = getMaxEstimatedEdgeWeight(
+ LoopBB, make_range(Exits.begin(), Exits.end()));
- if (uint32_t numInEdges = InEdges.size()) {
- BranchProbability TakenProb = BranchProbability(LBH_TAKEN_WEIGHT, Denom);
- auto Prob = TakenProb / numInEdges;
- for (unsigned SuccIdx : InEdges)
- EdgeProbabilities[SuccIdx] = Prob;
- }
+ if (LoopWeight) {
+ // If we never exit the loop then we can enter it once at maximum.
+ if (LoopWeight <= static_cast<uint32_t>(BlockExecWeight::UNREACHABLE))
+ LoopWeight = static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO);
+
+ EstimatedLoopWeight.insert(
+ {LoopBB.getLoopData(), LoopWeight.getValue()});
+ // Add all blocks entering the loop into working list.
+ getLoopEnterBlocks(LoopBB, BlockWorkList);
+ }
+ }
- if (uint32_t numExitingEdges = ExitingEdges.size()) {
- BranchProbability NotTakenProb = BranchProbability(LBH_NONTAKEN_WEIGHT,
- Denom);
- auto Prob = NotTakenProb / numExitingEdges;
- for (unsigned SuccIdx : ExitingEdges)
- EdgeProbabilities[SuccIdx] = Prob;
+ while (!BlockWorkList.empty()) {
+ // We can reach here only if BlockWorkList is not empty.
+ const BasicBlock *BB = BlockWorkList.pop_back_val();
+ if (EstimatedBlockWeight.count(BB))
+ continue;
+
+ // We take maximum over all weights of successors. In other words we take
+ // weight of "hot" path. In theory we can probably find a better function
+ // which gives higher accuracy results (comparing to "maximum") but I
+ // can't
+ // think of any right now. And I doubt it will make any difference in
+ // practice.
+ const LoopBlock LoopBB = getLoopBlock(BB);
+ auto MaxWeight = getMaxEstimatedEdgeWeight(LoopBB, successors(BB));
+
+ if (MaxWeight)
+ propagateEstimatedBlockWeight(LoopBB, DT, PDT, MaxWeight.getValue(),
+ BlockWorkList, LoopWorkList);
+ }
+ } while (!BlockWorkList.empty() || !LoopWorkList.empty());
+}
+
+// Calculate edge probabilities based on block's estimated weight.
+// Note that gathered weights were not scaled for loops. Thus edges entering
+// and exiting loops requires special processing.
+bool BranchProbabilityInfo::calcEstimatedHeuristics(const BasicBlock *BB) {
+ assert(BB->getTerminator()->getNumSuccessors() > 1 &&
+ "expected more than one successor!");
+
+ const LoopBlock LoopBB = getLoopBlock(BB);
+
+ SmallPtrSet<const BasicBlock *, 8> UnlikelyBlocks;
+ uint32_t TC = LBH_TAKEN_WEIGHT / LBH_NONTAKEN_WEIGHT;
+ if (LoopBB.getLoop())
+ computeUnlikelySuccessors(BB, LoopBB.getLoop(), UnlikelyBlocks);
+
+ // Changed to 'true' if at least one successor has estimated weight.
+ bool FoundEstimatedWeight = false;
+ SmallVector<uint32_t, 4> SuccWeights;
+ uint64_t TotalWeight = 0;
+ // Go over all successors of BB and put their weights into SuccWeights.
+ for (const_succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+ const BasicBlock *SuccBB = *I;
+ Optional<uint32_t> Weight;
+ const LoopBlock SuccLoopBB = getLoopBlock(SuccBB);
+ const LoopEdge Edge{LoopBB, SuccLoopBB};
+
+ Weight = getEstimatedEdgeWeight(Edge);
+
+ if (isLoopExitingEdge(Edge) &&
+ // Avoid adjustment of ZERO weight since it should remain unchanged.
+ Weight != static_cast<uint32_t>(BlockExecWeight::ZERO)) {
+ // Scale down loop exiting weight by trip count.
+ Weight = std::max(
+ static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO),
+ Weight.getValueOr(static_cast<uint32_t>(BlockExecWeight::DEFAULT)) /
+ TC);
+ }
+ bool IsUnlikelyEdge = LoopBB.getLoop() && UnlikelyBlocks.contains(SuccBB);
+ if (IsUnlikelyEdge &&
+ // Avoid adjustment of ZERO weight since it should remain unchanged.
+ Weight != static_cast<uint32_t>(BlockExecWeight::ZERO)) {
+ // 'Unlikely' blocks have twice lower weight.
+ Weight = std::max(
+ static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO),
+ Weight.getValueOr(static_cast<uint32_t>(BlockExecWeight::DEFAULT)) /
+ 2);
+ }
+
+ if (Weight)
+ FoundEstimatedWeight = true;
+
+ auto WeightVal =
+ Weight.getValueOr(static_cast<uint32_t>(BlockExecWeight::DEFAULT));
+ TotalWeight += WeightVal;
+ SuccWeights.push_back(WeightVal);
}
- if (uint32_t numUnlikelyEdges = UnlikelyEdges.size()) {
- BranchProbability UnlikelyProb = BranchProbability(LBH_UNLIKELY_WEIGHT,
- Denom);
- auto Prob = UnlikelyProb / numUnlikelyEdges;
- for (unsigned SuccIdx : UnlikelyEdges)
- EdgeProbabilities[SuccIdx] = Prob;
+ // If non of blocks have estimated weight bail out.
+ // If TotalWeight is 0 that means weight of each successor is 0 as well and
+ // equally likely. Bail out early to not deal with devision by zero.
+ if (!FoundEstimatedWeight || TotalWeight == 0)
+ return false;
+
+ assert(SuccWeights.size() == succ_size(BB) && "Missed successor?");
+ const unsigned SuccCount = SuccWeights.size();
+
+ // If the sum of weights does not fit in 32 bits, scale every weight down
+ // accordingly.
+ if (TotalWeight > UINT32_MAX) {
+ uint64_t ScalingFactor = TotalWeight / UINT32_MAX + 1;
+ TotalWeight = 0;
+ for (unsigned Idx = 0; Idx < SuccCount; ++Idx) {
+ SuccWeights[Idx] /= ScalingFactor;
+ if (SuccWeights[Idx] == static_cast<uint32_t>(BlockExecWeight::ZERO))
+ SuccWeights[Idx] =
+ static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO);
+ TotalWeight += SuccWeights[Idx];
+ }
+ assert(TotalWeight <= UINT32_MAX && "Total weight overflows");
}
+ // Finally set probabilities to edges according to estimated block weights.
+ SmallVector<BranchProbability, 4> EdgeProbabilities(
+ SuccCount, BranchProbability::getUnknown());
+
+ for (unsigned Idx = 0; Idx < SuccCount; ++Idx) {
+ EdgeProbabilities[Idx] =
+ BranchProbability(SuccWeights[Idx], (uint32_t)TotalWeight);
+ }
setEdgeProbability(BB, EdgeProbabilities);
return true;
}
@@ -1015,18 +1075,6 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) {
return true;
}
-bool BranchProbabilityInfo::calcInvokeHeuristics(const BasicBlock *BB) {
- const InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator());
- if (!II)
- return false;
-
- BranchProbability TakenProb(IH_TAKEN_WEIGHT,
- IH_TAKEN_WEIGHT + IH_NONTAKEN_WEIGHT);
- setEdgeProbability(
- BB, SmallVector<BranchProbability, 2>({TakenProb, TakenProb.getCompl()}));
- return true;
-}
-
void BranchProbabilityInfo::releaseMemory() {
Probs.clear();
Handles.clear();
@@ -1202,26 +1250,34 @@ void BranchProbabilityInfo::eraseBlock(const BasicBlock *BB) {
}
}
-void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI,
+void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LoopI,
const TargetLibraryInfo *TLI,
+ DominatorTree *DT,
PostDominatorTree *PDT) {
LLVM_DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName()
<< " ----\n\n");
LastF = &F; // Store the last function we ran on for printing.
- assert(PostDominatedByUnreachable.empty());
- assert(PostDominatedByColdCall.empty());
+ LI = &LoopI;
SccI = std::make_unique<SccInfo>(F);
+ assert(EstimatedBlockWeight.empty());
+ assert(EstimatedLoopWeight.empty());
+
+ std::unique_ptr<DominatorTree> DTPtr;
std::unique_ptr<PostDominatorTree> PDTPtr;
+ if (!DT) {
+ DTPtr = std::make_unique<DominatorTree>(const_cast<Function &>(F));
+ DT = DTPtr.get();
+ }
+
if (!PDT) {
PDTPtr = std::make_unique<PostDominatorTree>(const_cast<Function &>(F));
PDT = PDTPtr.get();
}
- computePostDominatedByUnreachable(F, PDT);
- computePostDominatedByColdCall(F, PDT);
+ computeEestimateBlockWeight(F, DT, PDT);
// Walk the basic blocks in post-order so that we can build up state about
// the successors of a block iteratively.
@@ -1233,13 +1289,7 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI,
continue;
if (calcMetadataWeights(BB))
continue;
- if (calcInvokeHeuristics(BB))
- continue;
- if (calcUnreachableHeuristics(BB))
- continue;
- if (calcColdCallHeuristics(BB))
- continue;
- if (calcLoopBranchHeuristics(BB, LI))
+ if (calcEstimatedHeuristics(BB))
continue;
if (calcPointerHeuristics(BB))
continue;
@@ -1249,8 +1299,8 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI,
continue;
}
- PostDominatedByUnreachable.clear();
- PostDominatedByColdCall.clear();
+ EstimatedLoopWeight.clear();
+ EstimatedBlockWeight.clear();
SccI.reset();
if (PrintBranchProb &&
@@ -1268,6 +1318,7 @@ void BranchProbabilityInfoWrapperPass::getAnalysisUsage(
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<PostDominatorTreeWrapperPass>();
AU.setPreservesAll();
}
@@ -1276,9 +1327,10 @@ bool BranchProbabilityInfoWrapperPass::runOnFunction(Function &F) {
const LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
const TargetLibraryInfo &TLI =
getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
PostDominatorTree &PDT =
getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
- BPI.calculate(F, LI, &TLI, &PDT);
+ BPI.calculate(F, LI, &TLI, &DT, &PDT);
return false;
}
@@ -1295,6 +1347,7 @@ BranchProbabilityAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
BranchProbabilityInfo BPI;
BPI.calculate(F, AM.getResult<LoopAnalysis>(F),
&AM.getResult<TargetLibraryAnalysis>(F),
+ &AM.getResult<DominatorTreeAnalysis>(F),
&AM.getResult<PostDominatorTreeAnalysis>(F));
return BPI;
}