diff options
Diffstat (limited to 'llvm/lib/Transforms/Scalar')
| -rw-r--r-- | llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp | 38 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 85 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Scalar/LICM.cpp | 87 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp | 38 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp | 20 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp | 60 |
6 files changed, 211 insertions, 117 deletions
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp index 66e45ec..e84ca81 100644 --- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp @@ -122,16 +122,22 @@ static cl::opt<unsigned> cl::desc("Maximum cost accepted for the transformation"), cl::Hidden, cl::init(50)); -extern cl::opt<bool> ProfcheckDisableMetadataFixes; - -} // namespace llvm - static cl::opt<double> MaxClonedRate( "dfa-max-cloned-rate", cl::desc( "Maximum cloned instructions rate accepted for the transformation"), cl::Hidden, cl::init(7.5)); +static cl::opt<unsigned> + MaxOuterUseBlocks("dfa-max-out-use-blocks", + cl::desc("Maximum unduplicated blocks with outer uses " + "accepted for the transformation"), + cl::Hidden, cl::init(40)); + +extern cl::opt<bool> ProfcheckDisableMetadataFixes; + +} // namespace llvm + namespace { class SelectInstToUnfold { SelectInst *SI; @@ -965,8 +971,16 @@ private: // SLPVectorizer. // TODO: Thread the switch partially before reaching the threshold. uint64_t NumOrigInst = 0; - for (auto *BB : DuplicateMap.keys()) + uint64_t NumOuterUseBlock = 0; + for (auto *BB : DuplicateMap.keys()) { NumOrigInst += BB->sizeWithoutDebug(); + // Only unduplicated blocks with single predecessor require new phi + // nodes. + for (auto *Succ : successors(BB)) + if (!DuplicateMap.count(Succ) && Succ->getSinglePredecessor()) + NumOuterUseBlock++; + } + if (double(NumClonedInst) / double(NumOrigInst) > MaxClonedRate) { LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, too much " "instructions wll be cloned\n"); @@ -977,6 +991,20 @@ private: return false; } + // Too much unduplicated blocks with outer uses may cause too much + // insertions of phi nodes for duplicated definitions. TODO: Drop this + // threshold if we come up with another way to reduce the number of inserted + // phi nodes. + if (NumOuterUseBlock > MaxOuterUseBlocks) { + LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, too much " + "blocks with outer uses\n"); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable", Switch) + << "Too much blocks with outer uses."; + }); + return false; + } + InstructionCost DuplicationCost = 0; unsigned JumpTableSize = 0; diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 7ebcc21..4ba4ba3 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -162,8 +162,6 @@ class IndVarSimplify { const SCEV *ExitCount, PHINode *IndVar, SCEVExpander &Rewriter); - bool sinkUnusedInvariants(Loop *L); - public: IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, const DataLayout &DL, TargetLibraryInfo *TLI, @@ -1079,85 +1077,6 @@ linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, return true; } -//===----------------------------------------------------------------------===// -// sinkUnusedInvariants. A late subpass to cleanup loop preheaders. -//===----------------------------------------------------------------------===// - -/// If there's a single exit block, sink any loop-invariant values that -/// were defined in the preheader but not used inside the loop into the -/// exit block to reduce register pressure in the loop. -bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { - BasicBlock *ExitBlock = L->getExitBlock(); - if (!ExitBlock) return false; - - BasicBlock *Preheader = L->getLoopPreheader(); - if (!Preheader) return false; - - bool MadeAnyChanges = false; - for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) { - - // Skip BB Terminator. - if (Preheader->getTerminator() == &I) - continue; - - // New instructions were inserted at the end of the preheader. - if (isa<PHINode>(I)) - break; - - // Don't move instructions which might have side effects, since the side - // effects need to complete before instructions inside the loop. Also don't - // move instructions which might read memory, since the loop may modify - // memory. Note that it's okay if the instruction might have undefined - // behavior: LoopSimplify guarantees that the preheader dominates the exit - // block. - if (I.mayHaveSideEffects() || I.mayReadFromMemory()) - continue; - - // Skip debug or pseudo instructions. - if (I.isDebugOrPseudoInst()) - continue; - - // Skip eh pad instructions. - if (I.isEHPad()) - continue; - - // Don't sink alloca: we never want to sink static alloca's out of the - // entry block, and correctly sinking dynamic alloca's requires - // checks for stacksave/stackrestore intrinsics. - // FIXME: Refactor this check somehow? - if (isa<AllocaInst>(&I)) - continue; - - // Determine if there is a use in or before the loop (direct or - // otherwise). - bool UsedInLoop = false; - for (Use &U : I.uses()) { - Instruction *User = cast<Instruction>(U.getUser()); - BasicBlock *UseBB = User->getParent(); - if (PHINode *P = dyn_cast<PHINode>(User)) { - unsigned i = - PHINode::getIncomingValueNumForOperand(U.getOperandNo()); - UseBB = P->getIncomingBlock(i); - } - if (UseBB == Preheader || L->contains(UseBB)) { - UsedInLoop = true; - break; - } - } - - // If there is, the def must remain in the preheader. - if (UsedInLoop) - continue; - - // Otherwise, sink it to the exit block. - I.moveBefore(ExitBlock->getFirstInsertionPt()); - SE->forgetValue(&I); - MadeAnyChanges = true; - } - - return MadeAnyChanges; -} - static void replaceExitCond(BranchInst *BI, Value *NewCond, SmallVectorImpl<WeakTrackingVH> &DeadInsts) { auto *OldCond = BI->getCondition(); @@ -2065,10 +1984,6 @@ bool IndVarSimplify::run(Loop *L) { // The Rewriter may not be used from this point on. - // Loop-invariant instructions in the preheader that aren't used in the - // loop may be sunk below the loop to reduce register pressure. - Changed |= sinkUnusedInvariants(L); - // rewriteFirstIterationLoopExitValues does not rely on the computation of // trip count and therefore can further simplify exit values in addition to // rewriteLoopExitValues. diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index b2c526b..d13b990 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -211,9 +211,15 @@ static Instruction *cloneInstructionInExitBlock( static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU); -static void moveInstructionBefore(Instruction &I, BasicBlock::iterator Dest, - ICFLoopSafetyInfo &SafetyInfo, - MemorySSAUpdater &MSSAU, ScalarEvolution *SE); +static void moveInstructionBefore( + Instruction &I, BasicBlock::iterator Dest, ICFLoopSafetyInfo &SafetyInfo, + MemorySSAUpdater &MSSAU, ScalarEvolution *SE, + MemorySSA::InsertionPlace Point = MemorySSA::BeforeTerminator); + +static bool sinkUnusedInvariantsFromPreheaderToExit( + Loop *L, AAResults *AA, ICFLoopSafetyInfo *SafetyInfo, + MemorySSAUpdater &MSSAU, ScalarEvolution *SE, DominatorTree *DT, + SinkAndHoistLICMFlags &SinkFlags, OptimizationRemarkEmitter *ORE); static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L, function_ref<void(Instruction *)> Fn); @@ -471,6 +477,12 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI, TLI, TTI, L, MSSAU, &SafetyInfo, Flags, ORE) : sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L, MSSAU, &SafetyInfo, Flags, ORE); + + // sink pre-header defs that are unused in-loop into the unique exit to reduce + // pressure. + Changed |= sinkUnusedInvariantsFromPreheaderToExit(L, AA, &SafetyInfo, MSSAU, + SE, DT, Flags, ORE); + Flags.setIsSink(false); if (Preheader) Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, AC, TLI, L, @@ -1456,19 +1468,80 @@ static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, static void moveInstructionBefore(Instruction &I, BasicBlock::iterator Dest, ICFLoopSafetyInfo &SafetyInfo, - MemorySSAUpdater &MSSAU, - ScalarEvolution *SE) { + MemorySSAUpdater &MSSAU, ScalarEvolution *SE, + MemorySSA::InsertionPlace Point) { SafetyInfo.removeInstruction(&I); SafetyInfo.insertInstructionTo(&I, Dest->getParent()); I.moveBefore(*Dest->getParent(), Dest); if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>( MSSAU.getMemorySSA()->getMemoryAccess(&I))) - MSSAU.moveToPlace(OldMemAcc, Dest->getParent(), - MemorySSA::BeforeTerminator); + MSSAU.moveToPlace(OldMemAcc, Dest->getParent(), Point); if (SE) SE->forgetBlockAndLoopDispositions(&I); } +// If there's a single exit block, sink any loop-invariant values that were +// defined in the preheader but not used inside the loop into the exit block +// to reduce register pressure in the loop. +static bool sinkUnusedInvariantsFromPreheaderToExit( + Loop *L, AAResults *AA, ICFLoopSafetyInfo *SafetyInfo, + MemorySSAUpdater &MSSAU, ScalarEvolution *SE, DominatorTree *DT, + SinkAndHoistLICMFlags &SinkFlags, OptimizationRemarkEmitter *ORE) { + BasicBlock *ExitBlock = L->getExitBlock(); + if (!ExitBlock) + return false; + + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) + return false; + + bool MadeAnyChanges = false; + + for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) { + + // Skip terminator. + if (Preheader->getTerminator() == &I) + continue; + + // New instructions were inserted at the end of the preheader. + if (isa<PHINode>(I)) + break; + + // Don't move instructions which might have side effects, since the side + // effects need to complete before instructions inside the loop. Note that + // it's okay if the instruction might have undefined behavior: LoopSimplify + // guarantees that the preheader dominates the exit block. + if (I.mayHaveSideEffects()) + continue; + + if (!canSinkOrHoistInst(I, AA, DT, L, MSSAU, true, SinkFlags, nullptr)) + continue; + + // Determine if there is a use in or before the loop (direct or + // otherwise). + bool UsedInLoopOrPreheader = false; + for (Use &U : I.uses()) { + auto *UserI = cast<Instruction>(U.getUser()); + BasicBlock *UseBB = UserI->getParent(); + if (auto *PN = dyn_cast<PHINode>(UserI)) { + UseBB = PN->getIncomingBlock(U); + } + if (UseBB == Preheader || L->contains(UseBB)) { + UsedInLoopOrPreheader = true; + break; + } + } + if (UsedInLoopOrPreheader) + continue; + + moveInstructionBefore(I, ExitBlock->getFirstInsertionPt(), *SafetyInfo, + MSSAU, SE, MemorySSA::Beginning); + MadeAnyChanges = true; + } + + return MadeAnyChanges; +} + static Instruction *sinkThroughTriviallyReplaceablePHI( PHINode *TPN, Instruction *I, LoopInfo *LI, SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies, diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 1a279b6..001215a 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1318,6 +1318,11 @@ public: /// the loop, in which case some special-case heuristics may be used. bool AllFixupsOutsideLoop = true; + /// This records whether all of the fixups using this LSRUse are unconditional + /// within the loop, meaning they will be executed on every path to the loop + /// latch. This includes fixups before early exits. + bool AllFixupsUnconditional = true; + /// RigidFormula is set to true to guarantee that this use will be associated /// with a single formula--the one that initially matched. Some SCEV /// expressions cannot be expanded. This allows LSR to consider the registers @@ -1421,16 +1426,22 @@ void Cost::RateRegister(const Formula &F, const SCEV *Reg, if (TTI->isIndexedLoadLegal(TTI->MIM_PostInc, AR->getType()) || TTI->isIndexedStoreLegal(TTI->MIM_PostInc, AR->getType())) { const SCEV *Start; - const SCEVConstant *Step; - if (match(AR, m_scev_AffineAddRec(m_SCEV(Start), m_SCEVConstant(Step)))) + const APInt *Step; + if (match(AR, m_scev_AffineAddRec(m_SCEV(Start), m_scev_APInt(Step)))) { // If the step size matches the base offset, we could use pre-indexed // addressing. - if (((AMK & TTI::AMK_PreIndexed) && F.BaseOffset.isFixed() && - Step->getAPInt() == F.BaseOffset.getFixedValue()) || - ((AMK & TTI::AMK_PostIndexed) && !isa<SCEVConstant>(Start) && - SE->isLoopInvariant(Start, L))) + bool CanPreIndex = (AMK & TTI::AMK_PreIndexed) && + F.BaseOffset.isFixed() && + *Step == F.BaseOffset.getFixedValue(); + bool CanPostIndex = (AMK & TTI::AMK_PostIndexed) && + !isa<SCEVConstant>(Start) && + SE->isLoopInvariant(Start, L); + // We can only pre or post index when the load/store is unconditional. + if ((CanPreIndex || CanPostIndex) && LU.AllFixupsUnconditional) LoopCost = 0; + } } + // If the loop counts down to zero and we'll be using a hardware loop then // the addrec will be combined into the hardware loop instruction. if (LU.Kind == LSRUse::ICmpZero && F.countsDownToZero() && @@ -1783,6 +1794,9 @@ void LSRUse::print(raw_ostream &OS) const { if (AllFixupsOutsideLoop) OS << ", all-fixups-outside-loop"; + if (AllFixupsUnconditional) + OS << ", all-fixups-unconditional"; + if (WidestFixupType) OS << ", widest fixup type: " << *WidestFixupType; } @@ -2213,6 +2227,7 @@ class LSRInstance { void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx); void CountRegisters(const Formula &F, size_t LUIdx); bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F); + bool IsFixupExecutedEachIncrement(const LSRFixup &LF) const; void CollectLoopInvariantFixupsAndFormulae(); @@ -3607,6 +3622,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { LF.PostIncLoops = TmpPostIncLoops; LF.Offset = Offset; LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); + LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF); // Create SCEV as Formula for calculating baseline cost if (!VisitedLSRUse.count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) { @@ -3680,6 +3696,14 @@ bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) { return true; } +/// Test whether this fixup will be executed each time the corresponding IV +/// increment instruction is executed. +bool LSRInstance::IsFixupExecutedEachIncrement(const LSRFixup &LF) const { + // If the fixup block dominates the IV increment block then there is no path + // through the loop to the increment that doesn't pass through the fixup. + return DT.dominates(LF.UserInst->getParent(), IVIncInsertPos->getParent()); +} + /// Check for other uses of loop-invariant values which we're tracking. These /// other uses will pin these values in registers, making them less profitable /// for elimination. @@ -3803,6 +3827,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { LF.OperandValToReplace = U; LF.Offset = Offset; LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); + LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF); if (!LU.WidestFixupType || SE.getTypeSizeInBits(LU.WidestFixupType) < SE.getTypeSizeInBits(LF.OperandValToReplace->getType())) @@ -4940,6 +4965,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { LLVM_DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << '\n'); LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop; + LUThatHas->AllFixupsUnconditional &= LU.AllFixupsUnconditional; // Transfer the fixups of LU to LUThatHas. for (LSRFixup &Fixup : LU.Fixups) { diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index e043d07..08be5df 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -1534,8 +1534,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, bool SrcNotDom = false; auto CaptureTrackingWithModRef = - [&](Instruction *AI, - function_ref<bool(Instruction *)> ModRefCallback) -> bool { + [&](Instruction *AI, function_ref<bool(Instruction *)> ModRefCallback, + bool &AddressCaptured) -> bool { SmallVector<Instruction *, 8> Worklist; Worklist.push_back(AI); unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking(); @@ -1559,8 +1559,9 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, if (!Visited.insert(&U).second) continue; UseCaptureInfo CI = DetermineUseCaptureKind(U, AI); - if (capturesAnything(CI.UseCC)) + if (capturesAnyProvenance(CI.UseCC)) return false; + AddressCaptured |= capturesAddress(CI.UseCC); if (UI->mayReadOrWriteMemory()) { if (UI->isLifetimeStartOrEnd()) { @@ -1627,7 +1628,9 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, return true; }; - if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback)) + bool DestAddressCaptured = false; + if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback, + DestAddressCaptured)) return false; // Bailout if Dest may have any ModRef before Store. if (!ReachabilityWorklist.empty() && @@ -1653,7 +1656,14 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, return true; }; - if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback)) + bool SrcAddressCaptured = false; + if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback, + SrcAddressCaptured)) + return false; + + // If both the source and destination address are captured, the fact that they + // are no longer two separate allocations may be observed. + if (DestAddressCaptured && SrcAddressCaptured) return false; // We can do the transformation. First, move the SrcAlloca to the start of the diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index 5af6c96..bb6c879 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -81,6 +81,7 @@ STATISTIC( STATISTIC(NumInvariantConditionsInjected, "Number of invariant conditions injected and unswitched"); +namespace llvm { static cl::opt<bool> EnableNonTrivialUnswitch( "enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " @@ -131,11 +132,17 @@ static cl::opt<bool> InjectInvariantConditions( static cl::opt<unsigned> InjectInvariantConditionHotnesThreshold( "simple-loop-unswitch-inject-invariant-condition-hotness-threshold", - cl::Hidden, cl::desc("Only try to inject loop invariant conditions and " - "unswitch on them to eliminate branches that are " - "not-taken 1/<this option> times or less."), + cl::Hidden, + cl::desc("Only try to inject loop invariant conditions and " + "unswitch on them to eliminate branches that are " + "not-taken 1/<this option> times or less."), cl::init(16)); +static cl::opt<bool> EstimateProfile("simple-loop-unswitch-estimate-profile", + cl::Hidden, cl::init(true)); +extern cl::opt<bool> ProfcheckDisableMetadataFixes; +} // namespace llvm + AnalysisKey ShouldRunExtraSimpleLoopUnswitch::Key; namespace { struct CompareDesc { @@ -268,13 +275,42 @@ static bool areLoopExitPHIsLoopInvariant(const Loop &L, llvm_unreachable("Basic blocks should never be empty!"); } -/// Copy a set of loop invariant values \p ToDuplicate and insert them at the +/// Copy a set of loop invariant values \p Invariants and insert them at the /// end of \p BB and conditionally branch on the copied condition. We only /// branch on a single value. +/// We attempt to estimate the profile of the resulting conditional branch from +/// \p ComputeProfFrom, which is the original conditional branch we're +/// unswitching. +/// When \p Direction is true, the \p Invariants form a disjunction, and the +/// branch conditioned on it exits the loop on the "true" case. When \p +/// Direction is false, the \p Invariants form a conjunction and the branch +/// exits on the "false" case. static void buildPartialUnswitchConditionalBranch( BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze, - const Instruction *I, AssumptionCache *AC, const DominatorTree &DT) { + const Instruction *I, AssumptionCache *AC, const DominatorTree &DT, + const BranchInst &ComputeProfFrom) { + + SmallVector<uint32_t> BranchWeights; + bool HasBranchWeights = EstimateProfile && !ProfcheckDisableMetadataFixes && + extractBranchWeights(ComputeProfFrom, BranchWeights); + // If Direction is true, that means we had a disjunction and that the "true" + // case exits. The probability of the disjunction of the subset of terms is at + // most as high as the original one. So, if the probability is higher than the + // one we'd assign in absence of a profile (i.e. 0.5), we will use 0.5, + // but if it's lower, we will use the original probability. + // Conversely, if Direction is false, that means we had a conjunction, and the + // probability of exiting is captured in the second branch weight. That + // probability is a disjunction (of the negation of the original terms). The + // same reasoning applies as above. + // Issue #165649: should we expect BFI to conserve, and use that to calculate + // the branch weights? + if (HasBranchWeights && + static_cast<double>(BranchWeights[Direction ? 0 : 1]) / + static_cast<double>(sum_of(BranchWeights)) > + 0.5) + HasBranchWeights = false; + IRBuilder<> IRB(&BB); IRB.SetCurrentDebugLocation(DebugLoc::getCompilerGenerated()); @@ -287,8 +323,14 @@ static void buildPartialUnswitchConditionalBranch( Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants) : IRB.CreateAnd(FrozenInvariants); - IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc, - Direction ? &NormalSucc : &UnswitchedSucc); + auto *BR = IRB.CreateCondBr( + Cond, Direction ? &UnswitchedSucc : &NormalSucc, + Direction ? &NormalSucc : &UnswitchedSucc, + HasBranchWeights ? ComputeProfFrom.getMetadata(LLVMContext::MD_prof) + : nullptr); + if (!HasBranchWeights) + setExplicitlyUnknownBranchWeightsIfProfiled( + *BR, *BR->getParent()->getParent(), DEBUG_TYPE); } /// Copy a set of loop invariant values, and conditionally branch on them. @@ -658,7 +700,7 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, " condition!"); buildPartialUnswitchConditionalBranch( *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH, - FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT); + FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT, BI); } // Update the dominator tree with the added edge. @@ -2477,7 +2519,7 @@ static void unswitchNontrivialInvariants( else { buildPartialUnswitchConditionalBranch( *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, - FreezeLoopUnswitchCond, BI, &AC, DT); + FreezeLoopUnswitchCond, BI, &AC, DT, *BI); } DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH}); |
