aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Utils
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Utils')
-rw-r--r--llvm/lib/Transforms/Utils/BasicBlockUtils.cpp73
-rw-r--r--llvm/lib/Transforms/Utils/ControlFlowUtils.cpp5
-rw-r--r--llvm/lib/Transforms/Utils/FixIrreducible.cpp126
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnroll.cpp59
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp118
-rw-r--r--llvm/lib/Transforms/Utils/LoopUtils.cpp48
-rw-r--r--llvm/lib/Transforms/Utils/PredicateInfo.cpp1
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp27
-rw-r--r--llvm/lib/Transforms/Utils/UnifyLoopExits.cpp77
9 files changed, 429 insertions, 105 deletions
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 9829d4d..11db0ec 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -674,6 +674,79 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT,
return SplitBlock(BB, BB->getTerminator(), DT, LI, MSSAU, BBName);
}
+/// Helper function to update the cycle or loop information after inserting a
+/// new block between a callbr instruction and one of its target blocks. Adds
+/// the new block to the innermost cycle or loop that the callbr instruction and
+/// the original target block share.
+/// \p LCI cycle or loop information to update
+/// \p CallBrBlock block containing the callbr instruction
+/// \p CallBrTarget new target block of the callbr instruction
+/// \p Succ original target block of the callbr instruction
+template <typename TI, typename T>
+static bool updateCycleLoopInfo(TI *LCI, BasicBlock *CallBrBlock,
+ BasicBlock *CallBrTarget, BasicBlock *Succ) {
+ static_assert(std::is_same_v<TI, CycleInfo> || std::is_same_v<TI, LoopInfo>,
+ "type must be CycleInfo or LoopInfo");
+ if (!LCI)
+ return false;
+
+ T *LC;
+ if constexpr (std::is_same_v<TI, CycleInfo>)
+ LC = LCI->getSmallestCommonCycle(CallBrBlock, Succ);
+ else
+ LC = LCI->getSmallestCommonLoop(CallBrBlock, Succ);
+ if (!LC)
+ return false;
+
+ if constexpr (std::is_same_v<TI, CycleInfo>)
+ LCI->addBlockToCycle(CallBrTarget, LC);
+ else
+ LC->addBasicBlockToLoop(CallBrTarget, *LCI);
+
+ return true;
+}
+
+BasicBlock *llvm::SplitCallBrEdge(BasicBlock *CallBrBlock, BasicBlock *Succ,
+ unsigned SuccIdx, DomTreeUpdater *DTU,
+ CycleInfo *CI, LoopInfo *LI,
+ bool *UpdatedLI) {
+ CallBrInst *CallBr = dyn_cast<CallBrInst>(CallBrBlock->getTerminator());
+ assert(CallBr && "expected callbr terminator");
+ assert(SuccIdx < CallBr->getNumSuccessors() &&
+ Succ == CallBr->getSuccessor(SuccIdx) && "invalid successor index");
+
+ // Create a new block between callbr and the specified successor.
+ // splitBlockBefore cannot be re-used here since it cannot split if the split
+ // point is a PHI node (because BasicBlock::splitBasicBlockBefore cannot
+ // handle that). But we don't need to rewire every part of a potential PHI
+ // node. We only care about the edge between CallBrBlock and the original
+ // successor.
+ BasicBlock *CallBrTarget =
+ BasicBlock::Create(CallBrBlock->getContext(),
+ CallBrBlock->getName() + ".target." + Succ->getName(),
+ CallBrBlock->getParent());
+ // Rewire control flow from the new target block to the original successor.
+ Succ->replacePhiUsesWith(CallBrBlock, CallBrTarget);
+ // Rewire control flow from callbr to the new target block.
+ CallBr->setSuccessor(SuccIdx, CallBrTarget);
+ // Jump from the new target block to the original successor.
+ BranchInst::Create(Succ, CallBrTarget);
+
+ bool Updated =
+ updateCycleLoopInfo<LoopInfo, Loop>(LI, CallBrBlock, CallBrTarget, Succ);
+ if (UpdatedLI)
+ *UpdatedLI = Updated;
+ updateCycleLoopInfo<CycleInfo, Cycle>(CI, CallBrBlock, CallBrTarget, Succ);
+ if (DTU) {
+ DTU->applyUpdates({{DominatorTree::Insert, CallBrBlock, CallBrTarget}});
+ if (DTU->getDomTree().dominates(CallBrBlock, Succ))
+ DTU->applyUpdates({{DominatorTree::Delete, CallBrBlock, Succ},
+ {DominatorTree::Insert, CallBrTarget, Succ}});
+ }
+
+ return CallBrTarget;
+}
+
void llvm::setUnwindEdgeTo(Instruction *TI, BasicBlock *Succ) {
if (auto *II = dyn_cast<InvokeInst>(TI))
II->setUnwindDest(Succ);
diff --git a/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp b/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp
index 0046a00..287a177 100644
--- a/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp
+++ b/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp
@@ -13,6 +13,7 @@
#include "llvm/Transforms/Utils/ControlFlowUtils.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/ValueHandle.h"
@@ -281,7 +282,9 @@ std::pair<BasicBlock *, bool> ControlFlowHub::finalize(
for (auto [BB, Succ0, Succ1] : Branches) {
#ifndef NDEBUG
- assert(Incoming.insert(BB).second && "Duplicate entry for incoming block.");
+ assert(
+ (Incoming.insert(BB).second || isa<CallBrInst>(BB->getTerminator())) &&
+ "Duplicate entry for incoming block.");
#endif
if (Succ0)
Outgoing.insert(Succ0);
diff --git a/llvm/lib/Transforms/Utils/FixIrreducible.cpp b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
index 45e1d12..804af22 100644
--- a/llvm/lib/Transforms/Utils/FixIrreducible.cpp
+++ b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
@@ -79,6 +79,53 @@
// Limitation: The pass cannot handle switch statements and indirect
// branches. Both must be lowered to plain branches first.
//
+// CallBr support: CallBr is handled as a more general branch instruction which
+// can have multiple successors. The pass redirects the edges to intermediate
+// target blocks that unconditionally branch to the original callbr target
+// blocks. This allows the control flow hub to know to which of the original
+// target blocks to jump to.
+// Example input CFG:
+// Entry (callbr)
+// / \
+// v v
+// H ----> B
+// ^ /|
+// `----' |
+// v
+// Exit
+//
+// becomes:
+// Entry (callbr)
+// / \
+// v v
+// target.H target.B
+// | |
+// v v
+// H ----> B
+// ^ /|
+// `----' |
+// v
+// Exit
+//
+// Note
+// OUTPUT CFG: Converted to a natural loop with a new header N.
+//
+// Entry (callbr)
+// / \
+// v v
+// target.H target.B
+// \ /
+// \ /
+// v v
+// N <---.
+// / \ \
+// / \ |
+// v v /
+// H --> B --'
+// |
+// v
+// Exit
+//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/FixIrreducible.h"
@@ -231,6 +278,7 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
return false;
LLVM_DEBUG(dbgs() << "Processing cycle:\n" << CI.print(&C) << "\n";);
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
ControlFlowHub CHub;
SetVector<BasicBlock *> Predecessors;
@@ -242,18 +290,32 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
}
for (BasicBlock *P : Predecessors) {
- auto *Branch = cast<BranchInst>(P->getTerminator());
- // Exactly one of the two successors is the header.
- BasicBlock *Succ0 = Branch->getSuccessor(0) == Header ? Header : nullptr;
- BasicBlock *Succ1 = Succ0 ? nullptr : Header;
- if (!Succ0)
- assert(Branch->getSuccessor(1) == Header);
- assert(Succ0 || Succ1);
- CHub.addBranch(P, Succ0, Succ1);
-
- LLVM_DEBUG(dbgs() << "Added internal branch: " << P->getName() << " -> "
- << (Succ0 ? Succ0->getName() : "") << " "
- << (Succ1 ? Succ1->getName() : "") << "\n");
+ if (BranchInst *Branch = dyn_cast<BranchInst>(P->getTerminator())) {
+ // Exactly one of the two successors is the header.
+ BasicBlock *Succ0 = Branch->getSuccessor(0) == Header ? Header : nullptr;
+ BasicBlock *Succ1 = Succ0 ? nullptr : Header;
+ assert(Succ0 || Branch->getSuccessor(1) == Header);
+ assert(Succ0 || Succ1);
+ CHub.addBranch(P, Succ0, Succ1);
+
+ LLVM_DEBUG(dbgs() << "Added internal branch: " << printBasicBlock(P)
+ << " -> " << printBasicBlock(Succ0)
+ << (Succ0 && Succ1 ? " " : "") << printBasicBlock(Succ1)
+ << '\n');
+ } else if (CallBrInst *CallBr = dyn_cast<CallBrInst>(P->getTerminator())) {
+ for (unsigned I = 0; I < CallBr->getNumSuccessors(); ++I) {
+ BasicBlock *Succ = CallBr->getSuccessor(I);
+ if (Succ != Header)
+ continue;
+ BasicBlock *NewSucc = SplitCallBrEdge(P, Succ, I, &DTU, &CI, LI);
+ CHub.addBranch(NewSucc, Succ);
+ LLVM_DEBUG(dbgs() << "Added internal branch: "
+ << printBasicBlock(NewSucc) << " -> "
+ << printBasicBlock(Succ) << '\n');
+ }
+ } else {
+ llvm_unreachable("unsupported block terminator");
+ }
}
// Redirect external incoming edges. This includes the edges on the header.
@@ -266,17 +328,32 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
}
for (BasicBlock *P : Predecessors) {
- auto *Branch = cast<BranchInst>(P->getTerminator());
- BasicBlock *Succ0 = Branch->getSuccessor(0);
- Succ0 = C.contains(Succ0) ? Succ0 : nullptr;
- BasicBlock *Succ1 =
- Branch->isUnconditional() ? nullptr : Branch->getSuccessor(1);
- Succ1 = Succ1 && C.contains(Succ1) ? Succ1 : nullptr;
- CHub.addBranch(P, Succ0, Succ1);
-
- LLVM_DEBUG(dbgs() << "Added external branch: " << P->getName() << " -> "
- << (Succ0 ? Succ0->getName() : "") << " "
- << (Succ1 ? Succ1->getName() : "") << "\n");
+ if (BranchInst *Branch = dyn_cast<BranchInst>(P->getTerminator()); Branch) {
+ BasicBlock *Succ0 = Branch->getSuccessor(0);
+ Succ0 = C.contains(Succ0) ? Succ0 : nullptr;
+ BasicBlock *Succ1 =
+ Branch->isUnconditional() ? nullptr : Branch->getSuccessor(1);
+ Succ1 = Succ1 && C.contains(Succ1) ? Succ1 : nullptr;
+ CHub.addBranch(P, Succ0, Succ1);
+
+ LLVM_DEBUG(dbgs() << "Added external branch: " << printBasicBlock(P)
+ << " -> " << printBasicBlock(Succ0)
+ << (Succ0 && Succ1 ? " " : "") << printBasicBlock(Succ1)
+ << '\n');
+ } else if (CallBrInst *CallBr = dyn_cast<CallBrInst>(P->getTerminator())) {
+ for (unsigned I = 0; I < CallBr->getNumSuccessors(); ++I) {
+ BasicBlock *Succ = CallBr->getSuccessor(I);
+ if (!C.contains(Succ))
+ continue;
+ BasicBlock *NewSucc = SplitCallBrEdge(P, Succ, I, &DTU, &CI, LI);
+ CHub.addBranch(NewSucc, Succ);
+ LLVM_DEBUG(dbgs() << "Added external branch: "
+ << printBasicBlock(NewSucc) << " -> "
+ << printBasicBlock(Succ) << '\n');
+ }
+ } else {
+ llvm_unreachable("unsupported block terminator");
+ }
}
// Redirect all the backedges through a "hub" consisting of a series
@@ -292,7 +369,6 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
SetVector<BasicBlock *> Entries;
Entries.insert(C.entry_rbegin(), C.entry_rend());
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
CHub.finalize(&DTU, GuardBlocks, "irr");
#if defined(EXPENSIVE_CHECKS)
assert(DT.verify(DominatorTree::VerificationLevel::Full));
@@ -325,8 +401,6 @@ static bool FixIrreducibleImpl(Function &F, CycleInfo &CI, DominatorTree &DT,
LLVM_DEBUG(dbgs() << "===== Fix irreducible control-flow in function: "
<< F.getName() << "\n");
- assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator.");
-
bool Changed = false;
for (Cycle *TopCycle : CI.toplevel_cycles()) {
for (Cycle *C : depth_first(TopCycle)) {
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 4fe736a..94dfd3a 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -499,9 +499,9 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
const unsigned MaxTripCount = SE->getSmallConstantMaxTripCount(L);
const bool MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L);
- unsigned EstimatedLoopInvocationWeight = 0;
std::optional<unsigned> OriginalTripCount =
- llvm::getLoopEstimatedTripCount(L, &EstimatedLoopInvocationWeight);
+ llvm::getLoopEstimatedTripCount(L);
+ BranchProbability OriginalLoopProb = llvm::getLoopProbability(L);
// Effectively "DCE" unrolled iterations that are beyond the max tripcount
// and will never be executed.
@@ -592,11 +592,11 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
: isEpilogProfitable(L);
if (ULO.Runtime &&
- !UnrollRuntimeLoopRemainder(L, ULO.Count, ULO.AllowExpensiveTripCount,
- EpilogProfitability, ULO.UnrollRemainder,
- ULO.ForgetAllSCEV, LI, SE, DT, AC, TTI,
- PreserveLCSSA, ULO.SCEVExpansionBudget,
- ULO.RuntimeUnrollMultiExit, RemainderLoop)) {
+ !UnrollRuntimeLoopRemainder(
+ L, ULO.Count, ULO.AllowExpensiveTripCount, EpilogProfitability,
+ ULO.UnrollRemainder, ULO.ForgetAllSCEV, LI, SE, DT, AC, TTI,
+ PreserveLCSSA, ULO.SCEVExpansionBudget, ULO.RuntimeUnrollMultiExit,
+ RemainderLoop, OriginalTripCount, OriginalLoopProb)) {
if (ULO.Force)
ULO.Runtime = false;
else {
@@ -1130,11 +1130,46 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
LI->erase(L);
// We shouldn't try to use `L` anymore.
L = nullptr;
- } else if (OriginalTripCount) {
- // Update the trip count. Note that the remainder has already logic
- // computing it in `UnrollRuntimeLoopRemainder`.
- setLoopEstimatedTripCount(L, *OriginalTripCount / ULO.Count,
- EstimatedLoopInvocationWeight);
+ } else {
+ // Update metadata for the loop's branch weights and estimated trip count:
+ // - If ULO.Runtime, UnrollRuntimeLoopRemainder sets the guard branch
+ // weights, latch branch weights, and estimated trip count of the
+ // remainder loop it creates. It also sets the branch weights for the
+ // unrolled loop guard it creates. The branch weights for the unrolled
+ // loop latch are adjusted below. FIXME: Handle prologue loops.
+ // - Otherwise, if unrolled loop iteration latches become unconditional,
+ // branch weights are adjusted above. FIXME: Actually handle such
+ // unconditional latches.
+ // - Otherwise, the original loop's branch weights are correct for the
+ // unrolled loop, so do not adjust them.
+ // - In all cases, the unrolled loop's estimated trip count is set below.
+ //
+ // As an example of the last case, consider what happens if the unroll count
+ // is 4 for a loop with an estimated trip count of 10 when we do not create
+ // a remainder loop and all iterations' latches remain conditional. Each
+ // unrolled iteration's latch still has the same probability of exiting the
+ // loop as it did when in the original loop, and thus it should still have
+ // the same branch weights. Each unrolled iteration's non-zero probability
+ // of exiting already appropriately reduces the probability of reaching the
+ // remaining iterations just as it did in the original loop. Trying to also
+ // adjust the branch weights of the final unrolled iteration's latch (i.e.,
+ // the backedge for the unrolled loop as a whole) to reflect its new trip
+ // count of 3 will erroneously further reduce its block frequencies.
+ // However, in case an analysis later needs to estimate the trip count of
+ // the unrolled loop as a whole without considering the branch weights for
+ // each unrolled iteration's latch within it, we store the new trip count as
+ // separate metadata.
+ if (!OriginalLoopProb.isUnknown() && ULO.Runtime && EpilogProfitability) {
+ // Where p is always the probability of executing at least 1 more
+ // iteration, the probability for at least n more iterations is p^n.
+ setLoopProbability(L, OriginalLoopProb.pow(ULO.Count));
+ }
+ if (OriginalTripCount) {
+ unsigned NewTripCount = *OriginalTripCount / ULO.Count;
+ if (!ULO.Runtime && *OriginalTripCount % ULO.Count)
+ ++NewTripCount;
+ setLoopEstimatedTripCount(L, NewTripCount);
+ }
}
// LoopInfo should not be valid, confirm that.
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 6312831..1e8f6cc 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -40,6 +40,7 @@
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include <cmath>
using namespace llvm;
@@ -195,6 +196,21 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
}
}
+/// Assume, due to our position in the remainder loop or its guard, anywhere
+/// from 0 to \p N more iterations can possibly execute. Among such cases in
+/// the original loop (with loop probability \p OriginalLoopProb), what is the
+/// probability of executing at least one more iteration?
+static BranchProbability
+probOfNextInRemainder(BranchProbability OriginalLoopProb, unsigned N) {
+ // Each of these variables holds the original loop's probability that the
+ // number of iterations it will execute is some m in the specified range.
+ BranchProbability ProbOne = OriginalLoopProb; // 1 <= m
+ BranchProbability ProbTooMany = ProbOne.pow(N + 1); // N + 1 <= m
+ BranchProbability ProbNotTooMany = ProbTooMany.getCompl(); // 0 <= m <= N
+ BranchProbability ProbOneNotTooMany = ProbOne - ProbTooMany; // 1 <= m <= N
+ return ProbOneNotTooMany / ProbNotTooMany;
+}
+
/// Connect the unrolling epilog code to the original loop.
/// The unrolling epilog code contains code to execute the
/// 'extra' iterations if the run-time trip count modulo the
@@ -221,7 +237,8 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,
ValueToValueMapTy &VMap, DominatorTree *DT,
LoopInfo *LI, bool PreserveLCSSA, ScalarEvolution &SE,
- unsigned Count, AssumptionCache &AC) {
+ unsigned Count, AssumptionCache &AC,
+ BranchProbability OriginalLoopProb) {
BasicBlock *Latch = L->getLoopLatch();
assert(Latch && "Loop must have a latch");
BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
@@ -332,12 +349,19 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
PreserveLCSSA);
// Add the branch to the exit block (around the epilog loop)
MDNode *BranchWeights = nullptr;
- if (hasBranchWeightMD(*Latch->getTerminator())) {
+ if (OriginalLoopProb.isUnknown() &&
+ hasBranchWeightMD(*Latch->getTerminator())) {
// Assume equal distribution in interval [0, Count).
MDBuilder MDB(B.getContext());
BranchWeights = MDB.createBranchWeights(1, Count - 1);
}
- B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit, BranchWeights);
+ BranchInst *RemainderLoopGuard =
+ B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit, BranchWeights);
+ if (!OriginalLoopProb.isUnknown()) {
+ setBranchProbability(RemainderLoopGuard,
+ probOfNextInRemainder(OriginalLoopProb, Count - 1),
+ /*ForFirstTarget=*/true);
+ }
InsertPt->eraseFromParent();
if (DT) {
auto *NewDom = DT->findNearestCommonDominator(Exit, NewExit);
@@ -357,14 +381,15 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
/// The cloned blocks should be inserted between InsertTop and InsertBot.
/// InsertTop should be new preheader, InsertBot new loop exit.
/// Returns the new cloned loop that is created.
-static Loop *
-CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,
- const bool UnrollRemainder,
- BasicBlock *InsertTop,
- BasicBlock *InsertBot, BasicBlock *Preheader,
+static Loop *CloneLoopBlocks(Loop *L, Value *NewIter,
+ const bool UseEpilogRemainder,
+ const bool UnrollRemainder, BasicBlock *InsertTop,
+ BasicBlock *InsertBot, BasicBlock *Preheader,
std::vector<BasicBlock *> &NewBlocks,
LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
- DominatorTree *DT, LoopInfo *LI, unsigned Count) {
+ DominatorTree *DT, LoopInfo *LI, unsigned Count,
+ std::optional<unsigned> OriginalTripCount,
+ BranchProbability OriginalLoopProb) {
StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
BasicBlock *Header = L->getHeader();
BasicBlock *Latch = L->getLoopLatch();
@@ -419,7 +444,8 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,
Builder.CreateAdd(NewIdx, One, NewIdx->getName() + ".next");
Value *IdxCmp = Builder.CreateICmpNE(IdxNext, NewIter, NewIdx->getName() + ".cmp");
MDNode *BranchWeights = nullptr;
- if (hasBranchWeightMD(*LatchBR)) {
+ if ((OriginalLoopProb.isUnknown() || !UseEpilogRemainder) &&
+ hasBranchWeightMD(*LatchBR)) {
uint32_t ExitWeight;
uint32_t BackEdgeWeight;
if (Count >= 3) {
@@ -437,7 +463,29 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,
MDBuilder MDB(Builder.getContext());
BranchWeights = MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
}
- Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot, BranchWeights);
+ BranchInst *RemainderLoopLatch =
+ Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot, BranchWeights);
+ if (!OriginalLoopProb.isUnknown() && UseEpilogRemainder) {
+ // Compute the total frequency of the original loop body from the
+ // remainder iterations. Once we've reached them, the first of them
+ // always executes, so its frequency and probability are 1.
+ double FreqRemIters = 1;
+ if (Count > 2) {
+ BranchProbability ProbReaching = BranchProbability::getOne();
+ for (unsigned N = Count - 2; N >= 1; --N) {
+ ProbReaching *= probOfNextInRemainder(OriginalLoopProb, N);
+ FreqRemIters += double(ProbReaching.getNumerator()) /
+ ProbReaching.getDenominator();
+ }
+ }
+ // Solve for the loop probability that would produce that frequency.
+ // Sum(i=0..inf)(Prob^i) = 1/(1-Prob) = FreqRemIters.
+ double ProbDouble = 1 - 1 / FreqRemIters;
+ BranchProbability Prob = BranchProbability::getBranchProbability(
+ std::round(ProbDouble * BranchProbability::getDenominator()),
+ BranchProbability::getDenominator());
+ setBranchProbability(RemainderLoopLatch, Prob, /*ForFirstTarget=*/true);
+ }
NewIdx->addIncoming(Zero, InsertTop);
NewIdx->addIncoming(IdxNext, NewBB);
LatchBR->eraseFromParent();
@@ -460,25 +508,13 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,
Loop *NewLoop = NewLoops[L];
assert(NewLoop && "L should have been cloned");
- MDNode *LoopID = NewLoop->getLoopID();
- // Only add loop metadata if the loop is not going to be completely
- // unrolled.
- if (UnrollRemainder)
- return NewLoop;
-
- std::optional<MDNode *> NewLoopID = makeFollowupLoopID(
- LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder});
- if (NewLoopID) {
- NewLoop->setLoopID(*NewLoopID);
-
- // Do not setLoopAlreadyUnrolled if loop attributes have been defined
- // explicitly.
- return NewLoop;
- }
+ if (OriginalTripCount && UseEpilogRemainder)
+ setLoopEstimatedTripCount(NewLoop, *OriginalTripCount % Count);
// Add unroll disable metadata to disable future unrolling for this loop.
- NewLoop->setLoopAlreadyUnrolled();
+ if (!UnrollRemainder)
+ NewLoop->setLoopAlreadyUnrolled();
return NewLoop;
}
@@ -603,7 +639,8 @@ bool llvm::UnrollRuntimeLoopRemainder(
LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
const TargetTransformInfo *TTI, bool PreserveLCSSA,
unsigned SCEVExpansionBudget, bool RuntimeUnrollMultiExit,
- Loop **ResultLoop) {
+ Loop **ResultLoop, std::optional<unsigned> OriginalTripCount,
+ BranchProbability OriginalLoopProb) {
LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
LLVM_DEBUG(L->dump());
LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n"
@@ -823,12 +860,23 @@ bool llvm::UnrollRuntimeLoopRemainder(
BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
// Branch to either remainder (extra iterations) loop or unrolling loop.
MDNode *BranchWeights = nullptr;
- if (hasBranchWeightMD(*Latch->getTerminator())) {
+ if ((OriginalLoopProb.isUnknown() || !UseEpilogRemainder) &&
+ hasBranchWeightMD(*Latch->getTerminator())) {
// Assume loop is nearly always entered.
MDBuilder MDB(B.getContext());
BranchWeights = MDB.createBranchWeights(EpilogHeaderWeights);
}
- B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop, BranchWeights);
+ BranchInst *UnrollingLoopGuard =
+ B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop, BranchWeights);
+ if (!OriginalLoopProb.isUnknown() && UseEpilogRemainder) {
+ // The original loop's first iteration always happens. Compute the
+ // probability of the original loop executing Count-1 iterations after that
+ // to complete the first iteration of the unrolled loop.
+ BranchProbability ProbOne = OriginalLoopProb;
+ BranchProbability ProbRest = ProbOne.pow(Count - 1);
+ setBranchProbability(UnrollingLoopGuard, ProbRest,
+ /*ForFirstTarget=*/false);
+ }
PreHeaderBR->eraseFromParent();
if (DT) {
if (UseEpilogRemainder)
@@ -855,9 +903,10 @@ bool llvm::UnrollRuntimeLoopRemainder(
// iterations. This function adds the appropriate CFG connections.
BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
- Loop *remainderLoop = CloneLoopBlocks(
- L, ModVal, UseEpilogRemainder, UnrollRemainder, InsertTop, InsertBot,
- NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI, Count);
+ Loop *remainderLoop =
+ CloneLoopBlocks(L, ModVal, UseEpilogRemainder, UnrollRemainder, InsertTop,
+ InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT,
+ LI, Count, OriginalTripCount, OriginalLoopProb);
// Insert the cloned blocks into the function.
F->splice(InsertBot->getIterator(), F, NewBlocks[0]->getIterator(), F->end());
@@ -956,7 +1005,8 @@ bool llvm::UnrollRuntimeLoopRemainder(
// Connect the epilog code to the original loop and update the
// PHI functions.
ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader,
- NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE, Count, *AC);
+ NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE, Count, *AC,
+ OriginalLoopProb);
// Update counter in loop for unrolling.
// Use an incrementing IV. Pre-incr/post-incr is backedge/trip count.
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index b6ba822..8be471b 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -962,13 +962,51 @@ bool llvm::setLoopEstimatedTripCount(
if (LatchBranch->getSuccessor(0) != L->getHeader())
std::swap(BackedgeTakenWeight, LatchExitWeight);
- MDBuilder MDB(LatchBranch->getContext());
-
// Set/Update profile metadata.
- LatchBranch->setMetadata(
- LLVMContext::MD_prof,
- MDB.createBranchWeights(BackedgeTakenWeight, LatchExitWeight));
+ setBranchWeights(*LatchBranch, {BackedgeTakenWeight, LatchExitWeight},
+ /*IsExpected=*/false);
+
+ return true;
+}
+
+BranchProbability llvm::getLoopProbability(Loop *L) {
+ BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L);
+ if (!LatchBranch)
+ return BranchProbability::getUnknown();
+ bool FirstTargetIsLoop = LatchBranch->getSuccessor(0) == L->getHeader();
+ return getBranchProbability(LatchBranch, FirstTargetIsLoop);
+}
+bool llvm::setLoopProbability(Loop *L, BranchProbability P) {
+ BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L);
+ if (!LatchBranch)
+ return false;
+ bool FirstTargetIsLoop = LatchBranch->getSuccessor(0) == L->getHeader();
+ return setBranchProbability(LatchBranch, P, FirstTargetIsLoop);
+}
+
+BranchProbability llvm::getBranchProbability(BranchInst *B,
+ bool ForFirstTarget) {
+ if (B->getNumSuccessors() != 2)
+ return BranchProbability::getUnknown();
+ uint64_t Weight0, Weight1;
+ if (!extractBranchWeights(*B, Weight0, Weight1))
+ return BranchProbability::getUnknown();
+ if (!ForFirstTarget)
+ std::swap(Weight0, Weight1);
+ return BranchProbability::getBranchProbability(Weight0, Weight0 + Weight1);
+}
+
+bool llvm::setBranchProbability(BranchInst *B, BranchProbability P,
+ bool ForFirstTarget) {
+ if (B->getNumSuccessors() != 2)
+ return false;
+ BranchProbability Prob0 = P;
+ BranchProbability Prob1 = P.getCompl();
+ if (!ForFirstTarget)
+ std::swap(Prob0, Prob1);
+ setBranchWeights(*B, {Prob0.getNumerator(), Prob1.getNumerator()},
+ /*IsExpected=*/false);
return true;
}
diff --git a/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
index a9ab3b3..27fed73 100644
--- a/llvm/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
@@ -809,7 +809,6 @@ public:
void emitInstructionAnnot(const Instruction *I,
formatted_raw_ostream &OS) override {
if (const auto *PI = PredInfo->getPredicateInfoFor(I)) {
- OS << "; Has predicate info\n";
if (const auto *PB = dyn_cast<PredicateBranch>(PI)) {
OS << "; branch predicate info { TrueEdge: " << PB->TrueEdge
<< " Comparison:" << *PB->Condition << " Edge: [";
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 4fac5d3..7f6d779 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1866,10 +1866,19 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
// If either of the blocks has it's address taken, then we can't do this fold,
// because the code we'd hoist would no longer run when we jump into the block
// by it's address.
- for (auto *Succ : successors(BB))
- if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor())
+ for (auto *Succ : successors(BB)) {
+ if (Succ->hasAddressTaken())
return false;
-
+ if (Succ->getSinglePredecessor())
+ continue;
+ // If Succ has >1 predecessors, continue to check if the Succ contains only
+ // one `unreachable` inst. Since executing `unreachable` inst is an UB, we
+ // can relax the condition based on the assumptiom that the program would
+ // never enter Succ and trigger such an UB.
+ if (isa<UnreachableInst>(*Succ->begin()))
+ continue;
+ return false;
+ }
// The second of pair is a SkipFlags bitmask.
using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
SmallVector<SuccIterPair, 8> SuccIterPairs;
@@ -5968,14 +5977,14 @@ bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
}
// Prune obsolete incoming values off the successors' PHI nodes.
- for (auto BBI = Dest->begin(); isa<PHINode>(BBI); ++BBI) {
+ for (auto &PHI : make_early_inc_range(Dest->phis())) {
unsigned PreviousEdges = Cases->size();
if (Dest == SI->getDefaultDest())
++PreviousEdges;
for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
- cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
+ PHI.removeIncomingValue(SI->getParent());
}
- for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
+ for (auto &PHI : make_early_inc_range(OtherDest->phis())) {
unsigned PreviousEdges = OtherCases->size();
if (OtherDest == SI->getDefaultDest())
++PreviousEdges;
@@ -5984,7 +5993,7 @@ bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
if (NewBI->isUnconditional())
++E;
for (unsigned I = 0; I != E; ++I)
- cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
+ PHI.removeIncomingValue(SI->getParent());
}
// Clean up the default block - it may have phis or other instructions before
@@ -7623,7 +7632,9 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
auto *DefaultCaseBB = SI->getDefaultDest();
BasicBlock *SplitBB = SplitBlock(OrigBB, SI, DTU);
auto It = OrigBB->getTerminator()->getIterator();
- BranchInst::Create(SplitBB, DefaultCaseBB, IsPow2, It);
+ auto *BI = BranchInst::Create(SplitBB, DefaultCaseBB, IsPow2, It);
+ // BI is handling the default case for SI, and so should share its DebugLoc.
+ BI->setDebugLoc(SI->getDebugLoc());
It->eraseFromParent();
addPredecessorToBlock(DefaultCaseBB, OrigBB, SplitBB);
diff --git a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
index 9f338db..94c5c170 100644
--- a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
+++ b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
@@ -12,7 +12,11 @@
//
// Limitation: This assumes that all terminators in the CFG are direct branches
// (the "br" instruction). The presence of any other control flow
-// such as indirectbr, switch or callbr will cause an assert.
+// such as indirectbr or switch will cause an assert.
+// The callbr terminator is supported by creating intermediate
+// target blocks that unconditionally branch to the original target
+// blocks. These intermediate target blocks can then be redirected
+// through the ControlFlowHub as usual.
//
//===----------------------------------------------------------------------===//
@@ -150,25 +154,55 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
SmallVector<BasicBlock *, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ SmallVector<BasicBlock *, 8> CallBrTargetBlocksToFix;
// Redirect exiting edges through a control flow hub.
ControlFlowHub CHub;
- for (auto *BB : ExitingBlocks) {
- auto *Branch = cast<BranchInst>(BB->getTerminator());
- BasicBlock *Succ0 = Branch->getSuccessor(0);
- Succ0 = L->contains(Succ0) ? nullptr : Succ0;
-
- BasicBlock *Succ1 =
- Branch->isUnconditional() ? nullptr : Branch->getSuccessor(1);
- Succ1 = L->contains(Succ1) ? nullptr : Succ1;
- CHub.addBranch(BB, Succ0, Succ1);
-
- LLVM_DEBUG(dbgs() << "Added exiting branch: " << BB->getName() << " -> {"
- << (Succ0 ? Succ0->getName() : "<none>") << ", "
- << (Succ1 ? Succ1->getName() : "<none>") << "}\n");
+
+ for (unsigned I = 0; I < ExitingBlocks.size(); ++I) {
+ BasicBlock *BB = ExitingBlocks[I];
+ if (BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator())) {
+ BasicBlock *Succ0 = Branch->getSuccessor(0);
+ Succ0 = L->contains(Succ0) ? nullptr : Succ0;
+
+ BasicBlock *Succ1 =
+ Branch->isUnconditional() ? nullptr : Branch->getSuccessor(1);
+ Succ1 = L->contains(Succ1) ? nullptr : Succ1;
+ CHub.addBranch(BB, Succ0, Succ1);
+
+ LLVM_DEBUG(dbgs() << "Added extiting branch: " << printBasicBlock(BB)
+ << " -> " << printBasicBlock(Succ0)
+ << (Succ0 && Succ1 ? " " : "") << printBasicBlock(Succ1)
+ << '\n');
+ } else if (CallBrInst *CallBr = dyn_cast<CallBrInst>(BB->getTerminator())) {
+ for (unsigned J = 0; J < CallBr->getNumSuccessors(); ++J) {
+ BasicBlock *Succ = CallBr->getSuccessor(J);
+ if (L->contains(Succ))
+ continue;
+ bool UpdatedLI = false;
+ BasicBlock *NewSucc =
+ SplitCallBrEdge(BB, Succ, J, &DTU, nullptr, &LI, &UpdatedLI);
+ // Even if CallBr and Succ do not have a common parent loop, we need to
+ // add the new target block to the parent loop of the current loop.
+ if (!UpdatedLI)
+ CallBrTargetBlocksToFix.push_back(NewSucc);
+ // ExitingBlocks is later used to restore SSA, so we need to make sure
+ // that the blocks used for phi nodes in the guard blocks match the
+ // predecessors of the guard blocks, which, in the case of callbr, are
+ // the new intermediate target blocks instead of the callbr blocks
+ // themselves.
+ ExitingBlocks[I] = NewSucc;
+ CHub.addBranch(NewSucc, Succ);
+ LLVM_DEBUG(dbgs() << "Added exiting branch: "
+ << printBasicBlock(NewSucc) << " -> "
+ << printBasicBlock(Succ) << '\n');
+ }
+ } else {
+ llvm_unreachable("unsupported block terminator");
+ }
}
SmallVector<BasicBlock *, 8> GuardBlocks;
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
BasicBlock *LoopExitBlock;
bool ChangedCFG;
std::tie(LoopExitBlock, ChangedCFG) = CHub.finalize(
@@ -187,10 +221,19 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
// The guard blocks were created outside the loop, so they need to become
// members of the parent loop.
- if (auto ParentLoop = L->getParentLoop()) {
+ // Same goes for the callbr target blocks. Although we try to add them to the
+ // smallest common parent loop of the callbr block and the corresponding
+ // original target block, there might not have been such a loop, in which case
+ // the newly created callbr target blocks are not part of any loop. For nested
+ // loops, this might result in them leading to a loop with multiple entry
+ // points.
+ if (auto *ParentLoop = L->getParentLoop()) {
for (auto *G : GuardBlocks) {
ParentLoop->addBasicBlockToLoop(G, LI);
}
+ for (auto *C : CallBrTargetBlocksToFix) {
+ ParentLoop->addBasicBlockToLoop(C, LI);
+ }
ParentLoop->verifyLoop();
}
@@ -218,8 +261,6 @@ bool UnifyLoopExitsLegacyPass::runOnFunction(Function &F) {
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator.");
-
return runImpl(LI, DT);
}