aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Scalar
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Scalar')
-rw-r--r--llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp38
-rw-r--r--llvm/lib/Transforms/Scalar/IndVarSimplify.cpp85
-rw-r--r--llvm/lib/Transforms/Scalar/LICM.cpp87
-rw-r--r--llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp38
-rw-r--r--llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp20
-rw-r--r--llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp60
6 files changed, 211 insertions, 117 deletions
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index 66e45ec..e84ca81 100644
--- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -122,16 +122,22 @@ static cl::opt<unsigned>
cl::desc("Maximum cost accepted for the transformation"),
cl::Hidden, cl::init(50));
-extern cl::opt<bool> ProfcheckDisableMetadataFixes;
-
-} // namespace llvm
-
static cl::opt<double> MaxClonedRate(
"dfa-max-cloned-rate",
cl::desc(
"Maximum cloned instructions rate accepted for the transformation"),
cl::Hidden, cl::init(7.5));
+static cl::opt<unsigned>
+ MaxOuterUseBlocks("dfa-max-out-use-blocks",
+ cl::desc("Maximum unduplicated blocks with outer uses "
+ "accepted for the transformation"),
+ cl::Hidden, cl::init(40));
+
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+
+} // namespace llvm
+
namespace {
class SelectInstToUnfold {
SelectInst *SI;
@@ -965,8 +971,16 @@ private:
// SLPVectorizer.
// TODO: Thread the switch partially before reaching the threshold.
uint64_t NumOrigInst = 0;
- for (auto *BB : DuplicateMap.keys())
+ uint64_t NumOuterUseBlock = 0;
+ for (auto *BB : DuplicateMap.keys()) {
NumOrigInst += BB->sizeWithoutDebug();
+ // Only unduplicated blocks with single predecessor require new phi
+ // nodes.
+ for (auto *Succ : successors(BB))
+ if (!DuplicateMap.count(Succ) && Succ->getSinglePredecessor())
+ NumOuterUseBlock++;
+ }
+
if (double(NumClonedInst) / double(NumOrigInst) > MaxClonedRate) {
LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, too much "
"instructions wll be cloned\n");
@@ -977,6 +991,20 @@ private:
return false;
}
+ // Too much unduplicated blocks with outer uses may cause too much
+ // insertions of phi nodes for duplicated definitions. TODO: Drop this
+ // threshold if we come up with another way to reduce the number of inserted
+ // phi nodes.
+ if (NumOuterUseBlock > MaxOuterUseBlocks) {
+ LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, too much "
+ "blocks with outer uses\n");
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable", Switch)
+ << "Too much blocks with outer uses.";
+ });
+ return false;
+ }
+
InstructionCost DuplicationCost = 0;
unsigned JumpTableSize = 0;
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 7ebcc21..4ba4ba3 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -162,8 +162,6 @@ class IndVarSimplify {
const SCEV *ExitCount,
PHINode *IndVar, SCEVExpander &Rewriter);
- bool sinkUnusedInvariants(Loop *L);
-
public:
IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
const DataLayout &DL, TargetLibraryInfo *TLI,
@@ -1079,85 +1077,6 @@ linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
return true;
}
-//===----------------------------------------------------------------------===//
-// sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
-//===----------------------------------------------------------------------===//
-
-/// If there's a single exit block, sink any loop-invariant values that
-/// were defined in the preheader but not used inside the loop into the
-/// exit block to reduce register pressure in the loop.
-bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
- BasicBlock *ExitBlock = L->getExitBlock();
- if (!ExitBlock) return false;
-
- BasicBlock *Preheader = L->getLoopPreheader();
- if (!Preheader) return false;
-
- bool MadeAnyChanges = false;
- for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) {
-
- // Skip BB Terminator.
- if (Preheader->getTerminator() == &I)
- continue;
-
- // New instructions were inserted at the end of the preheader.
- if (isa<PHINode>(I))
- break;
-
- // Don't move instructions which might have side effects, since the side
- // effects need to complete before instructions inside the loop. Also don't
- // move instructions which might read memory, since the loop may modify
- // memory. Note that it's okay if the instruction might have undefined
- // behavior: LoopSimplify guarantees that the preheader dominates the exit
- // block.
- if (I.mayHaveSideEffects() || I.mayReadFromMemory())
- continue;
-
- // Skip debug or pseudo instructions.
- if (I.isDebugOrPseudoInst())
- continue;
-
- // Skip eh pad instructions.
- if (I.isEHPad())
- continue;
-
- // Don't sink alloca: we never want to sink static alloca's out of the
- // entry block, and correctly sinking dynamic alloca's requires
- // checks for stacksave/stackrestore intrinsics.
- // FIXME: Refactor this check somehow?
- if (isa<AllocaInst>(&I))
- continue;
-
- // Determine if there is a use in or before the loop (direct or
- // otherwise).
- bool UsedInLoop = false;
- for (Use &U : I.uses()) {
- Instruction *User = cast<Instruction>(U.getUser());
- BasicBlock *UseBB = User->getParent();
- if (PHINode *P = dyn_cast<PHINode>(User)) {
- unsigned i =
- PHINode::getIncomingValueNumForOperand(U.getOperandNo());
- UseBB = P->getIncomingBlock(i);
- }
- if (UseBB == Preheader || L->contains(UseBB)) {
- UsedInLoop = true;
- break;
- }
- }
-
- // If there is, the def must remain in the preheader.
- if (UsedInLoop)
- continue;
-
- // Otherwise, sink it to the exit block.
- I.moveBefore(ExitBlock->getFirstInsertionPt());
- SE->forgetValue(&I);
- MadeAnyChanges = true;
- }
-
- return MadeAnyChanges;
-}
-
static void replaceExitCond(BranchInst *BI, Value *NewCond,
SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
auto *OldCond = BI->getCondition();
@@ -2065,10 +1984,6 @@ bool IndVarSimplify::run(Loop *L) {
// The Rewriter may not be used from this point on.
- // Loop-invariant instructions in the preheader that aren't used in the
- // loop may be sunk below the loop to reduce register pressure.
- Changed |= sinkUnusedInvariants(L);
-
// rewriteFirstIterationLoopExitValues does not rely on the computation of
// trip count and therefore can further simplify exit values in addition to
// rewriteLoopExitValues.
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index b2c526b..d13b990 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -211,9 +211,15 @@ static Instruction *cloneInstructionInExitBlock(
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
MemorySSAUpdater &MSSAU);
-static void moveInstructionBefore(Instruction &I, BasicBlock::iterator Dest,
- ICFLoopSafetyInfo &SafetyInfo,
- MemorySSAUpdater &MSSAU, ScalarEvolution *SE);
+static void moveInstructionBefore(
+ Instruction &I, BasicBlock::iterator Dest, ICFLoopSafetyInfo &SafetyInfo,
+ MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
+ MemorySSA::InsertionPlace Point = MemorySSA::BeforeTerminator);
+
+static bool sinkUnusedInvariantsFromPreheaderToExit(
+ Loop *L, AAResults *AA, ICFLoopSafetyInfo *SafetyInfo,
+ MemorySSAUpdater &MSSAU, ScalarEvolution *SE, DominatorTree *DT,
+ SinkAndHoistLICMFlags &SinkFlags, OptimizationRemarkEmitter *ORE);
static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
function_ref<void(Instruction *)> Fn);
@@ -471,6 +477,12 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI,
TLI, TTI, L, MSSAU, &SafetyInfo, Flags, ORE)
: sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
MSSAU, &SafetyInfo, Flags, ORE);
+
+ // sink pre-header defs that are unused in-loop into the unique exit to reduce
+ // pressure.
+ Changed |= sinkUnusedInvariantsFromPreheaderToExit(L, AA, &SafetyInfo, MSSAU,
+ SE, DT, Flags, ORE);
+
Flags.setIsSink(false);
if (Preheader)
Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, AC, TLI, L,
@@ -1456,19 +1468,80 @@ static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
static void moveInstructionBefore(Instruction &I, BasicBlock::iterator Dest,
ICFLoopSafetyInfo &SafetyInfo,
- MemorySSAUpdater &MSSAU,
- ScalarEvolution *SE) {
+ MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
+ MemorySSA::InsertionPlace Point) {
SafetyInfo.removeInstruction(&I);
SafetyInfo.insertInstructionTo(&I, Dest->getParent());
I.moveBefore(*Dest->getParent(), Dest);
if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
MSSAU.getMemorySSA()->getMemoryAccess(&I)))
- MSSAU.moveToPlace(OldMemAcc, Dest->getParent(),
- MemorySSA::BeforeTerminator);
+ MSSAU.moveToPlace(OldMemAcc, Dest->getParent(), Point);
if (SE)
SE->forgetBlockAndLoopDispositions(&I);
}
+// If there's a single exit block, sink any loop-invariant values that were
+// defined in the preheader but not used inside the loop into the exit block
+// to reduce register pressure in the loop.
+static bool sinkUnusedInvariantsFromPreheaderToExit(
+ Loop *L, AAResults *AA, ICFLoopSafetyInfo *SafetyInfo,
+ MemorySSAUpdater &MSSAU, ScalarEvolution *SE, DominatorTree *DT,
+ SinkAndHoistLICMFlags &SinkFlags, OptimizationRemarkEmitter *ORE) {
+ BasicBlock *ExitBlock = L->getExitBlock();
+ if (!ExitBlock)
+ return false;
+
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader)
+ return false;
+
+ bool MadeAnyChanges = false;
+
+ for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) {
+
+ // Skip terminator.
+ if (Preheader->getTerminator() == &I)
+ continue;
+
+ // New instructions were inserted at the end of the preheader.
+ if (isa<PHINode>(I))
+ break;
+
+ // Don't move instructions which might have side effects, since the side
+ // effects need to complete before instructions inside the loop. Note that
+ // it's okay if the instruction might have undefined behavior: LoopSimplify
+ // guarantees that the preheader dominates the exit block.
+ if (I.mayHaveSideEffects())
+ continue;
+
+ if (!canSinkOrHoistInst(I, AA, DT, L, MSSAU, true, SinkFlags, nullptr))
+ continue;
+
+ // Determine if there is a use in or before the loop (direct or
+ // otherwise).
+ bool UsedInLoopOrPreheader = false;
+ for (Use &U : I.uses()) {
+ auto *UserI = cast<Instruction>(U.getUser());
+ BasicBlock *UseBB = UserI->getParent();
+ if (auto *PN = dyn_cast<PHINode>(UserI)) {
+ UseBB = PN->getIncomingBlock(U);
+ }
+ if (UseBB == Preheader || L->contains(UseBB)) {
+ UsedInLoopOrPreheader = true;
+ break;
+ }
+ }
+ if (UsedInLoopOrPreheader)
+ continue;
+
+ moveInstructionBefore(I, ExitBlock->getFirstInsertionPt(), *SafetyInfo,
+ MSSAU, SE, MemorySSA::Beginning);
+ MadeAnyChanges = true;
+ }
+
+ return MadeAnyChanges;
+}
+
static Instruction *sinkThroughTriviallyReplaceablePHI(
PHINode *TPN, Instruction *I, LoopInfo *LI,
SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies,
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 1a279b6..001215a 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -1318,6 +1318,11 @@ public:
/// the loop, in which case some special-case heuristics may be used.
bool AllFixupsOutsideLoop = true;
+ /// This records whether all of the fixups using this LSRUse are unconditional
+ /// within the loop, meaning they will be executed on every path to the loop
+ /// latch. This includes fixups before early exits.
+ bool AllFixupsUnconditional = true;
+
/// RigidFormula is set to true to guarantee that this use will be associated
/// with a single formula--the one that initially matched. Some SCEV
/// expressions cannot be expanded. This allows LSR to consider the registers
@@ -1421,16 +1426,22 @@ void Cost::RateRegister(const Formula &F, const SCEV *Reg,
if (TTI->isIndexedLoadLegal(TTI->MIM_PostInc, AR->getType()) ||
TTI->isIndexedStoreLegal(TTI->MIM_PostInc, AR->getType())) {
const SCEV *Start;
- const SCEVConstant *Step;
- if (match(AR, m_scev_AffineAddRec(m_SCEV(Start), m_SCEVConstant(Step))))
+ const APInt *Step;
+ if (match(AR, m_scev_AffineAddRec(m_SCEV(Start), m_scev_APInt(Step)))) {
// If the step size matches the base offset, we could use pre-indexed
// addressing.
- if (((AMK & TTI::AMK_PreIndexed) && F.BaseOffset.isFixed() &&
- Step->getAPInt() == F.BaseOffset.getFixedValue()) ||
- ((AMK & TTI::AMK_PostIndexed) && !isa<SCEVConstant>(Start) &&
- SE->isLoopInvariant(Start, L)))
+ bool CanPreIndex = (AMK & TTI::AMK_PreIndexed) &&
+ F.BaseOffset.isFixed() &&
+ *Step == F.BaseOffset.getFixedValue();
+ bool CanPostIndex = (AMK & TTI::AMK_PostIndexed) &&
+ !isa<SCEVConstant>(Start) &&
+ SE->isLoopInvariant(Start, L);
+ // We can only pre or post index when the load/store is unconditional.
+ if ((CanPreIndex || CanPostIndex) && LU.AllFixupsUnconditional)
LoopCost = 0;
+ }
}
+
// If the loop counts down to zero and we'll be using a hardware loop then
// the addrec will be combined into the hardware loop instruction.
if (LU.Kind == LSRUse::ICmpZero && F.countsDownToZero() &&
@@ -1783,6 +1794,9 @@ void LSRUse::print(raw_ostream &OS) const {
if (AllFixupsOutsideLoop)
OS << ", all-fixups-outside-loop";
+ if (AllFixupsUnconditional)
+ OS << ", all-fixups-unconditional";
+
if (WidestFixupType)
OS << ", widest fixup type: " << *WidestFixupType;
}
@@ -2213,6 +2227,7 @@ class LSRInstance {
void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
void CountRegisters(const Formula &F, size_t LUIdx);
bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
+ bool IsFixupExecutedEachIncrement(const LSRFixup &LF) const;
void CollectLoopInvariantFixupsAndFormulae();
@@ -3607,6 +3622,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
LF.PostIncLoops = TmpPostIncLoops;
LF.Offset = Offset;
LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
+ LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);
// Create SCEV as Formula for calculating baseline cost
if (!VisitedLSRUse.count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
@@ -3680,6 +3696,14 @@ bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
return true;
}
+/// Test whether this fixup will be executed each time the corresponding IV
+/// increment instruction is executed.
+bool LSRInstance::IsFixupExecutedEachIncrement(const LSRFixup &LF) const {
+ // If the fixup block dominates the IV increment block then there is no path
+ // through the loop to the increment that doesn't pass through the fixup.
+ return DT.dominates(LF.UserInst->getParent(), IVIncInsertPos->getParent());
+}
+
/// Check for other uses of loop-invariant values which we're tracking. These
/// other uses will pin these values in registers, making them less profitable
/// for elimination.
@@ -3803,6 +3827,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
LF.OperandValToReplace = U;
LF.Offset = Offset;
LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
+ LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);
if (!LU.WidestFixupType ||
SE.getTypeSizeInBits(LU.WidestFixupType) <
SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
@@ -4940,6 +4965,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
LLVM_DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << '\n');
LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
+ LUThatHas->AllFixupsUnconditional &= LU.AllFixupsUnconditional;
// Transfer the fixups of LU to LUThatHas.
for (LSRFixup &Fixup : LU.Fixups) {
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index e043d07..08be5df 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1534,8 +1534,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
bool SrcNotDom = false;
auto CaptureTrackingWithModRef =
- [&](Instruction *AI,
- function_ref<bool(Instruction *)> ModRefCallback) -> bool {
+ [&](Instruction *AI, function_ref<bool(Instruction *)> ModRefCallback,
+ bool &AddressCaptured) -> bool {
SmallVector<Instruction *, 8> Worklist;
Worklist.push_back(AI);
unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking();
@@ -1559,8 +1559,9 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
if (!Visited.insert(&U).second)
continue;
UseCaptureInfo CI = DetermineUseCaptureKind(U, AI);
- if (capturesAnything(CI.UseCC))
+ if (capturesAnyProvenance(CI.UseCC))
return false;
+ AddressCaptured |= capturesAddress(CI.UseCC);
if (UI->mayReadOrWriteMemory()) {
if (UI->isLifetimeStartOrEnd()) {
@@ -1627,7 +1628,9 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
return true;
};
- if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback))
+ bool DestAddressCaptured = false;
+ if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback,
+ DestAddressCaptured))
return false;
// Bailout if Dest may have any ModRef before Store.
if (!ReachabilityWorklist.empty() &&
@@ -1653,7 +1656,14 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
return true;
};
- if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback))
+ bool SrcAddressCaptured = false;
+ if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback,
+ SrcAddressCaptured))
+ return false;
+
+ // If both the source and destination address are captured, the fact that they
+ // are no longer two separate allocations may be observed.
+ if (DestAddressCaptured && SrcAddressCaptured)
return false;
// We can do the transformation. First, move the SrcAlloca to the start of the
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 5af6c96..bb6c879 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -81,6 +81,7 @@ STATISTIC(
STATISTIC(NumInvariantConditionsInjected,
"Number of invariant conditions injected and unswitched");
+namespace llvm {
static cl::opt<bool> EnableNonTrivialUnswitch(
"enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
cl::desc("Forcibly enables non-trivial loop unswitching rather than "
@@ -131,11 +132,17 @@ static cl::opt<bool> InjectInvariantConditions(
static cl::opt<unsigned> InjectInvariantConditionHotnesThreshold(
"simple-loop-unswitch-inject-invariant-condition-hotness-threshold",
- cl::Hidden, cl::desc("Only try to inject loop invariant conditions and "
- "unswitch on them to eliminate branches that are "
- "not-taken 1/<this option> times or less."),
+ cl::Hidden,
+ cl::desc("Only try to inject loop invariant conditions and "
+ "unswitch on them to eliminate branches that are "
+ "not-taken 1/<this option> times or less."),
cl::init(16));
+static cl::opt<bool> EstimateProfile("simple-loop-unswitch-estimate-profile",
+ cl::Hidden, cl::init(true));
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+} // namespace llvm
+
AnalysisKey ShouldRunExtraSimpleLoopUnswitch::Key;
namespace {
struct CompareDesc {
@@ -268,13 +275,42 @@ static bool areLoopExitPHIsLoopInvariant(const Loop &L,
llvm_unreachable("Basic blocks should never be empty!");
}
-/// Copy a set of loop invariant values \p ToDuplicate and insert them at the
+/// Copy a set of loop invariant values \p Invariants and insert them at the
/// end of \p BB and conditionally branch on the copied condition. We only
/// branch on a single value.
+/// We attempt to estimate the profile of the resulting conditional branch from
+/// \p ComputeProfFrom, which is the original conditional branch we're
+/// unswitching.
+/// When \p Direction is true, the \p Invariants form a disjunction, and the
+/// branch conditioned on it exits the loop on the "true" case. When \p
+/// Direction is false, the \p Invariants form a conjunction and the branch
+/// exits on the "false" case.
static void buildPartialUnswitchConditionalBranch(
BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction,
BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze,
- const Instruction *I, AssumptionCache *AC, const DominatorTree &DT) {
+ const Instruction *I, AssumptionCache *AC, const DominatorTree &DT,
+ const BranchInst &ComputeProfFrom) {
+
+ SmallVector<uint32_t> BranchWeights;
+ bool HasBranchWeights = EstimateProfile && !ProfcheckDisableMetadataFixes &&
+ extractBranchWeights(ComputeProfFrom, BranchWeights);
+ // If Direction is true, that means we had a disjunction and that the "true"
+ // case exits. The probability of the disjunction of the subset of terms is at
+ // most as high as the original one. So, if the probability is higher than the
+ // one we'd assign in absence of a profile (i.e. 0.5), we will use 0.5,
+ // but if it's lower, we will use the original probability.
+ // Conversely, if Direction is false, that means we had a conjunction, and the
+ // probability of exiting is captured in the second branch weight. That
+ // probability is a disjunction (of the negation of the original terms). The
+ // same reasoning applies as above.
+ // Issue #165649: should we expect BFI to conserve, and use that to calculate
+ // the branch weights?
+ if (HasBranchWeights &&
+ static_cast<double>(BranchWeights[Direction ? 0 : 1]) /
+ static_cast<double>(sum_of(BranchWeights)) >
+ 0.5)
+ HasBranchWeights = false;
+
IRBuilder<> IRB(&BB);
IRB.SetCurrentDebugLocation(DebugLoc::getCompilerGenerated());
@@ -287,8 +323,14 @@ static void buildPartialUnswitchConditionalBranch(
Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants)
: IRB.CreateAnd(FrozenInvariants);
- IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
- Direction ? &NormalSucc : &UnswitchedSucc);
+ auto *BR = IRB.CreateCondBr(
+ Cond, Direction ? &UnswitchedSucc : &NormalSucc,
+ Direction ? &NormalSucc : &UnswitchedSucc,
+ HasBranchWeights ? ComputeProfFrom.getMetadata(LLVMContext::MD_prof)
+ : nullptr);
+ if (!HasBranchWeights)
+ setExplicitlyUnknownBranchWeightsIfProfiled(
+ *BR, *BR->getParent()->getParent(), DEBUG_TYPE);
}
/// Copy a set of loop invariant values, and conditionally branch on them.
@@ -658,7 +700,7 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
" condition!");
buildPartialUnswitchConditionalBranch(
*OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,
- FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT);
+ FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT, BI);
}
// Update the dominator tree with the added edge.
@@ -2477,7 +2519,7 @@ static void unswitchNontrivialInvariants(
else {
buildPartialUnswitchConditionalBranch(
*SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
- FreezeLoopUnswitchCond, BI, &AC, DT);
+ FreezeLoopUnswitchCond, BI, &AC, DT, *BI);
}
DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});