diff options
Diffstat (limited to 'llvm/lib/Transforms/Scalar')
| -rw-r--r-- | llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp | 38 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 85 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Scalar/LICM.cpp | 87 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp | 38 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp | 20 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp | 60 | 
6 files changed, 211 insertions, 117 deletions
| diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp index 66e45ec..e84ca81 100644 --- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp @@ -122,16 +122,22 @@ static cl::opt<unsigned>                    cl::desc("Maximum cost accepted for the transformation"),                    cl::Hidden, cl::init(50)); -extern cl::opt<bool> ProfcheckDisableMetadataFixes; - -} // namespace llvm -  static cl::opt<double> MaxClonedRate(      "dfa-max-cloned-rate",      cl::desc(          "Maximum cloned instructions rate accepted for the transformation"),      cl::Hidden, cl::init(7.5)); +static cl::opt<unsigned> +    MaxOuterUseBlocks("dfa-max-out-use-blocks", +                      cl::desc("Maximum unduplicated blocks with outer uses " +                               "accepted for the transformation"), +                      cl::Hidden, cl::init(40)); + +extern cl::opt<bool> ProfcheckDisableMetadataFixes; + +} // namespace llvm +  namespace {  class SelectInstToUnfold {    SelectInst *SI; @@ -965,8 +971,16 @@ private:      // SLPVectorizer.      // TODO: Thread the switch partially before reaching the threshold.      uint64_t NumOrigInst = 0; -    for (auto *BB : DuplicateMap.keys()) +    uint64_t NumOuterUseBlock = 0; +    for (auto *BB : DuplicateMap.keys()) {        NumOrigInst += BB->sizeWithoutDebug(); +      // Only unduplicated blocks with single predecessor require new phi +      // nodes. +      for (auto *Succ : successors(BB)) +        if (!DuplicateMap.count(Succ) && Succ->getSinglePredecessor()) +          NumOuterUseBlock++; +    } +      if (double(NumClonedInst) / double(NumOrigInst) > MaxClonedRate) {        LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, too much "                             "instructions wll be cloned\n"); @@ -977,6 +991,20 @@ private:        return false;      } +    // Too much unduplicated blocks with outer uses may cause too much +    // insertions of phi nodes for duplicated definitions. TODO: Drop this +    // threshold if we come up with another way to reduce the number of inserted +    // phi nodes. +    if (NumOuterUseBlock > MaxOuterUseBlocks) { +      LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, too much " +                           "blocks with outer uses\n"); +      ORE->emit([&]() { +        return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable", Switch) +               << "Too much blocks with outer uses."; +      }); +      return false; +    } +      InstructionCost DuplicationCost = 0;      unsigned JumpTableSize = 0; diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 7ebcc21..4ba4ba3 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -162,8 +162,6 @@ class IndVarSimplify {                                   const SCEV *ExitCount,                                   PHINode *IndVar, SCEVExpander &Rewriter); -  bool sinkUnusedInvariants(Loop *L); -  public:    IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,                   const DataLayout &DL, TargetLibraryInfo *TLI, @@ -1079,85 +1077,6 @@ linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,    return true;  } -//===----------------------------------------------------------------------===// -//  sinkUnusedInvariants. A late subpass to cleanup loop preheaders. -//===----------------------------------------------------------------------===// - -/// If there's a single exit block, sink any loop-invariant values that -/// were defined in the preheader but not used inside the loop into the -/// exit block to reduce register pressure in the loop. -bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { -  BasicBlock *ExitBlock = L->getExitBlock(); -  if (!ExitBlock) return false; - -  BasicBlock *Preheader = L->getLoopPreheader(); -  if (!Preheader) return false; - -  bool MadeAnyChanges = false; -  for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) { - -    // Skip BB Terminator. -    if (Preheader->getTerminator() == &I) -      continue; - -    // New instructions were inserted at the end of the preheader. -    if (isa<PHINode>(I)) -      break; - -    // Don't move instructions which might have side effects, since the side -    // effects need to complete before instructions inside the loop.  Also don't -    // move instructions which might read memory, since the loop may modify -    // memory. Note that it's okay if the instruction might have undefined -    // behavior: LoopSimplify guarantees that the preheader dominates the exit -    // block. -    if (I.mayHaveSideEffects() || I.mayReadFromMemory()) -      continue; - -    // Skip debug or pseudo instructions. -    if (I.isDebugOrPseudoInst()) -      continue; - -    // Skip eh pad instructions. -    if (I.isEHPad()) -      continue; - -    // Don't sink alloca: we never want to sink static alloca's out of the -    // entry block, and correctly sinking dynamic alloca's requires -    // checks for stacksave/stackrestore intrinsics. -    // FIXME: Refactor this check somehow? -    if (isa<AllocaInst>(&I)) -      continue; - -    // Determine if there is a use in or before the loop (direct or -    // otherwise). -    bool UsedInLoop = false; -    for (Use &U : I.uses()) { -      Instruction *User = cast<Instruction>(U.getUser()); -      BasicBlock *UseBB = User->getParent(); -      if (PHINode *P = dyn_cast<PHINode>(User)) { -        unsigned i = -          PHINode::getIncomingValueNumForOperand(U.getOperandNo()); -        UseBB = P->getIncomingBlock(i); -      } -      if (UseBB == Preheader || L->contains(UseBB)) { -        UsedInLoop = true; -        break; -      } -    } - -    // If there is, the def must remain in the preheader. -    if (UsedInLoop) -      continue; - -    // Otherwise, sink it to the exit block. -    I.moveBefore(ExitBlock->getFirstInsertionPt()); -    SE->forgetValue(&I); -    MadeAnyChanges = true; -  } - -  return MadeAnyChanges; -} -  static void replaceExitCond(BranchInst *BI, Value *NewCond,                              SmallVectorImpl<WeakTrackingVH> &DeadInsts) {    auto *OldCond = BI->getCondition(); @@ -2065,10 +1984,6 @@ bool IndVarSimplify::run(Loop *L) {    // The Rewriter may not be used from this point on. -  // Loop-invariant instructions in the preheader that aren't used in the -  // loop may be sunk below the loop to reduce register pressure. -  Changed |= sinkUnusedInvariants(L); -    // rewriteFirstIterationLoopExitValues does not rely on the computation of    // trip count and therefore can further simplify exit values in addition to    // rewriteLoopExitValues. diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index b2c526b..d13b990 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -211,9 +211,15 @@ static Instruction *cloneInstructionInExitBlock(  static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,                               MemorySSAUpdater &MSSAU); -static void moveInstructionBefore(Instruction &I, BasicBlock::iterator Dest, -                                  ICFLoopSafetyInfo &SafetyInfo, -                                  MemorySSAUpdater &MSSAU, ScalarEvolution *SE); +static void moveInstructionBefore( +    Instruction &I, BasicBlock::iterator Dest, ICFLoopSafetyInfo &SafetyInfo, +    MemorySSAUpdater &MSSAU, ScalarEvolution *SE, +    MemorySSA::InsertionPlace Point = MemorySSA::BeforeTerminator); + +static bool sinkUnusedInvariantsFromPreheaderToExit( +    Loop *L, AAResults *AA, ICFLoopSafetyInfo *SafetyInfo, +    MemorySSAUpdater &MSSAU, ScalarEvolution *SE, DominatorTree *DT, +    SinkAndHoistLICMFlags &SinkFlags, OptimizationRemarkEmitter *ORE);  static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,                                  function_ref<void(Instruction *)> Fn); @@ -471,6 +477,12 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI,                                      TLI, TTI, L, MSSAU, &SafetyInfo, Flags, ORE)              : sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,                           MSSAU, &SafetyInfo, Flags, ORE); + +  // sink pre-header defs that are unused in-loop into the unique exit to reduce +  // pressure. +  Changed |= sinkUnusedInvariantsFromPreheaderToExit(L, AA, &SafetyInfo, MSSAU, +                                                     SE, DT, Flags, ORE); +    Flags.setIsSink(false);    if (Preheader)      Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, AC, TLI, L, @@ -1456,19 +1468,80 @@ static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,  static void moveInstructionBefore(Instruction &I, BasicBlock::iterator Dest,                                    ICFLoopSafetyInfo &SafetyInfo, -                                  MemorySSAUpdater &MSSAU, -                                  ScalarEvolution *SE) { +                                  MemorySSAUpdater &MSSAU, ScalarEvolution *SE, +                                  MemorySSA::InsertionPlace Point) {    SafetyInfo.removeInstruction(&I);    SafetyInfo.insertInstructionTo(&I, Dest->getParent());    I.moveBefore(*Dest->getParent(), Dest);    if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(            MSSAU.getMemorySSA()->getMemoryAccess(&I))) -    MSSAU.moveToPlace(OldMemAcc, Dest->getParent(), -                      MemorySSA::BeforeTerminator); +    MSSAU.moveToPlace(OldMemAcc, Dest->getParent(), Point);    if (SE)      SE->forgetBlockAndLoopDispositions(&I);  } +// If there's a single exit block, sink any loop-invariant values that were +// defined in the preheader but not used inside the loop into the exit block +// to reduce register pressure in the loop. +static bool sinkUnusedInvariantsFromPreheaderToExit( +    Loop *L, AAResults *AA, ICFLoopSafetyInfo *SafetyInfo, +    MemorySSAUpdater &MSSAU, ScalarEvolution *SE, DominatorTree *DT, +    SinkAndHoistLICMFlags &SinkFlags, OptimizationRemarkEmitter *ORE) { +  BasicBlock *ExitBlock = L->getExitBlock(); +  if (!ExitBlock) +    return false; + +  BasicBlock *Preheader = L->getLoopPreheader(); +  if (!Preheader) +    return false; + +  bool MadeAnyChanges = false; + +  for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) { + +    // Skip terminator. +    if (Preheader->getTerminator() == &I) +      continue; + +    // New instructions were inserted at the end of the preheader. +    if (isa<PHINode>(I)) +      break; + +    // Don't move instructions which might have side effects, since the side +    // effects need to complete before instructions inside the loop. Note that +    // it's okay if the instruction might have undefined behavior: LoopSimplify +    // guarantees that the preheader dominates the exit block. +    if (I.mayHaveSideEffects()) +      continue; + +    if (!canSinkOrHoistInst(I, AA, DT, L, MSSAU, true, SinkFlags, nullptr)) +      continue; + +    // Determine if there is a use in or before the loop (direct or +    // otherwise). +    bool UsedInLoopOrPreheader = false; +    for (Use &U : I.uses()) { +      auto *UserI = cast<Instruction>(U.getUser()); +      BasicBlock *UseBB = UserI->getParent(); +      if (auto *PN = dyn_cast<PHINode>(UserI)) { +        UseBB = PN->getIncomingBlock(U); +      } +      if (UseBB == Preheader || L->contains(UseBB)) { +        UsedInLoopOrPreheader = true; +        break; +      } +    } +    if (UsedInLoopOrPreheader) +      continue; + +    moveInstructionBefore(I, ExitBlock->getFirstInsertionPt(), *SafetyInfo, +                          MSSAU, SE, MemorySSA::Beginning); +    MadeAnyChanges = true; +  } + +  return MadeAnyChanges; +} +  static Instruction *sinkThroughTriviallyReplaceablePHI(      PHINode *TPN, Instruction *I, LoopInfo *LI,      SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies, diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 1a279b6..001215a 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1318,6 +1318,11 @@ public:    /// the loop, in which case some special-case heuristics may be used.    bool AllFixupsOutsideLoop = true; +  /// This records whether all of the fixups using this LSRUse are unconditional +  /// within the loop, meaning they will be executed on every path to the loop +  /// latch. This includes fixups before early exits. +  bool AllFixupsUnconditional = true; +    /// RigidFormula is set to true to guarantee that this use will be associated    /// with a single formula--the one that initially matched. Some SCEV    /// expressions cannot be expanded. This allows LSR to consider the registers @@ -1421,16 +1426,22 @@ void Cost::RateRegister(const Formula &F, const SCEV *Reg,      if (TTI->isIndexedLoadLegal(TTI->MIM_PostInc, AR->getType()) ||          TTI->isIndexedStoreLegal(TTI->MIM_PostInc, AR->getType())) {        const SCEV *Start; -      const SCEVConstant *Step; -      if (match(AR, m_scev_AffineAddRec(m_SCEV(Start), m_SCEVConstant(Step)))) +      const APInt *Step; +      if (match(AR, m_scev_AffineAddRec(m_SCEV(Start), m_scev_APInt(Step)))) {          // If the step size matches the base offset, we could use pre-indexed          // addressing. -        if (((AMK & TTI::AMK_PreIndexed) && F.BaseOffset.isFixed() && -             Step->getAPInt() == F.BaseOffset.getFixedValue()) || -            ((AMK & TTI::AMK_PostIndexed) && !isa<SCEVConstant>(Start) && -             SE->isLoopInvariant(Start, L))) +        bool CanPreIndex = (AMK & TTI::AMK_PreIndexed) && +                           F.BaseOffset.isFixed() && +                           *Step == F.BaseOffset.getFixedValue(); +        bool CanPostIndex = (AMK & TTI::AMK_PostIndexed) && +                            !isa<SCEVConstant>(Start) && +                            SE->isLoopInvariant(Start, L); +        // We can only pre or post index when the load/store is unconditional. +        if ((CanPreIndex || CanPostIndex) && LU.AllFixupsUnconditional)            LoopCost = 0; +      }      } +      // If the loop counts down to zero and we'll be using a hardware loop then      // the addrec will be combined into the hardware loop instruction.      if (LU.Kind == LSRUse::ICmpZero && F.countsDownToZero() && @@ -1783,6 +1794,9 @@ void LSRUse::print(raw_ostream &OS) const {    if (AllFixupsOutsideLoop)      OS << ", all-fixups-outside-loop"; +  if (AllFixupsUnconditional) +    OS << ", all-fixups-unconditional"; +    if (WidestFixupType)      OS << ", widest fixup type: " << *WidestFixupType;  } @@ -2213,6 +2227,7 @@ class LSRInstance {    void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);    void CountRegisters(const Formula &F, size_t LUIdx);    bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F); +  bool IsFixupExecutedEachIncrement(const LSRFixup &LF) const;    void CollectLoopInvariantFixupsAndFormulae(); @@ -3607,6 +3622,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {      LF.PostIncLoops = TmpPostIncLoops;      LF.Offset = Offset;      LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); +    LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);      // Create SCEV as Formula for calculating baseline cost      if (!VisitedLSRUse.count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) { @@ -3680,6 +3696,14 @@ bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {    return true;  } +/// Test whether this fixup will be executed each time the corresponding IV +/// increment instruction is executed. +bool LSRInstance::IsFixupExecutedEachIncrement(const LSRFixup &LF) const { +  // If the fixup block dominates the IV increment block then there is no path +  // through the loop to the increment that doesn't pass through the fixup. +  return DT.dominates(LF.UserInst->getParent(), IVIncInsertPos->getParent()); +} +  /// Check for other uses of loop-invariant values which we're tracking. These  /// other uses will pin these values in registers, making them less profitable  /// for elimination. @@ -3803,6 +3827,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {          LF.OperandValToReplace = U;          LF.Offset = Offset;          LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); +        LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);          if (!LU.WidestFixupType ||              SE.getTypeSizeInBits(LU.WidestFixupType) <              SE.getTypeSizeInBits(LF.OperandValToReplace->getType())) @@ -4940,6 +4965,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {        LLVM_DEBUG(dbgs() << "  Deleting use "; LU.print(dbgs()); dbgs() << '\n');        LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop; +      LUThatHas->AllFixupsUnconditional &= LU.AllFixupsUnconditional;        // Transfer the fixups of LU to LUThatHas.        for (LSRFixup &Fixup : LU.Fixups) { diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index e043d07..08be5df 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -1534,8 +1534,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,    bool SrcNotDom = false;    auto CaptureTrackingWithModRef = -      [&](Instruction *AI, -          function_ref<bool(Instruction *)> ModRefCallback) -> bool { +      [&](Instruction *AI, function_ref<bool(Instruction *)> ModRefCallback, +          bool &AddressCaptured) -> bool {      SmallVector<Instruction *, 8> Worklist;      Worklist.push_back(AI);      unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking(); @@ -1559,8 +1559,9 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,          if (!Visited.insert(&U).second)            continue;          UseCaptureInfo CI = DetermineUseCaptureKind(U, AI); -        if (capturesAnything(CI.UseCC)) +        if (capturesAnyProvenance(CI.UseCC))            return false; +        AddressCaptured |= capturesAddress(CI.UseCC);          if (UI->mayReadOrWriteMemory()) {            if (UI->isLifetimeStartOrEnd()) { @@ -1627,7 +1628,9 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,      return true;    }; -  if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback)) +  bool DestAddressCaptured = false; +  if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback, +                                 DestAddressCaptured))      return false;    // Bailout if Dest may have any ModRef before Store.    if (!ReachabilityWorklist.empty() && @@ -1653,7 +1656,14 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,      return true;    }; -  if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback)) +  bool SrcAddressCaptured = false; +  if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback, +                                 SrcAddressCaptured)) +    return false; + +  // If both the source and destination address are captured, the fact that they +  // are no longer two separate allocations may be observed. +  if (DestAddressCaptured && SrcAddressCaptured)      return false;    // We can do the transformation. First, move the SrcAlloca to the start of the diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index 5af6c96..bb6c879 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -81,6 +81,7 @@ STATISTIC(  STATISTIC(NumInvariantConditionsInjected,            "Number of invariant conditions injected and unswitched"); +namespace llvm {  static cl::opt<bool> EnableNonTrivialUnswitch(      "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,      cl::desc("Forcibly enables non-trivial loop unswitching rather than " @@ -131,11 +132,17 @@ static cl::opt<bool> InjectInvariantConditions(  static cl::opt<unsigned> InjectInvariantConditionHotnesThreshold(      "simple-loop-unswitch-inject-invariant-condition-hotness-threshold", -    cl::Hidden, cl::desc("Only try to inject loop invariant conditions and " -                         "unswitch on them to eliminate branches that are " -                         "not-taken 1/<this option> times or less."), +    cl::Hidden, +    cl::desc("Only try to inject loop invariant conditions and " +             "unswitch on them to eliminate branches that are " +             "not-taken 1/<this option> times or less."),      cl::init(16)); +static cl::opt<bool> EstimateProfile("simple-loop-unswitch-estimate-profile", +                                     cl::Hidden, cl::init(true)); +extern cl::opt<bool> ProfcheckDisableMetadataFixes; +} // namespace llvm +  AnalysisKey ShouldRunExtraSimpleLoopUnswitch::Key;  namespace {  struct CompareDesc { @@ -268,13 +275,42 @@ static bool areLoopExitPHIsLoopInvariant(const Loop &L,    llvm_unreachable("Basic blocks should never be empty!");  } -/// Copy a set of loop invariant values \p ToDuplicate and insert them at the +/// Copy a set of loop invariant values \p Invariants and insert them at the  /// end of \p BB and conditionally branch on the copied condition. We only  /// branch on a single value. +/// We attempt to estimate the profile of the resulting conditional branch from +/// \p ComputeProfFrom, which is the original conditional branch we're +/// unswitching. +/// When \p Direction is true, the \p Invariants form a disjunction, and the +/// branch conditioned on it exits the loop on the "true" case. When \p +/// Direction is false, the \p Invariants form a conjunction and the branch +/// exits on the "false" case.  static void buildPartialUnswitchConditionalBranch(      BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction,      BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze, -    const Instruction *I, AssumptionCache *AC, const DominatorTree &DT) { +    const Instruction *I, AssumptionCache *AC, const DominatorTree &DT, +    const BranchInst &ComputeProfFrom) { + +  SmallVector<uint32_t> BranchWeights; +  bool HasBranchWeights = EstimateProfile && !ProfcheckDisableMetadataFixes && +                          extractBranchWeights(ComputeProfFrom, BranchWeights); +  // If Direction is true, that means we had a disjunction and that the "true" +  // case exits. The probability of the disjunction of the subset of terms is at +  // most as high as the original one. So, if the probability is higher than the +  // one we'd assign in absence of a profile (i.e. 0.5), we will use 0.5, +  // but if it's lower, we will use the original probability. +  // Conversely, if Direction is false, that means we had a conjunction, and the +  // probability of exiting is captured in the second branch weight. That +  // probability is a disjunction (of the negation of the original terms). The +  // same reasoning applies as above. +  // Issue #165649: should we expect BFI to conserve, and use that to calculate +  // the branch weights? +  if (HasBranchWeights && +      static_cast<double>(BranchWeights[Direction ? 0 : 1]) / +              static_cast<double>(sum_of(BranchWeights)) > +          0.5) +    HasBranchWeights = false; +    IRBuilder<> IRB(&BB);    IRB.SetCurrentDebugLocation(DebugLoc::getCompilerGenerated()); @@ -287,8 +323,14 @@ static void buildPartialUnswitchConditionalBranch(    Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants)                            : IRB.CreateAnd(FrozenInvariants); -  IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc, -                   Direction ? &NormalSucc : &UnswitchedSucc); +  auto *BR = IRB.CreateCondBr( +      Cond, Direction ? &UnswitchedSucc : &NormalSucc, +      Direction ? &NormalSucc : &UnswitchedSucc, +      HasBranchWeights ? ComputeProfFrom.getMetadata(LLVMContext::MD_prof) +                       : nullptr); +  if (!HasBranchWeights) +    setExplicitlyUnknownBranchWeightsIfProfiled( +        *BR, *BR->getParent()->getParent(), DEBUG_TYPE);  }  /// Copy a set of loop invariant values, and conditionally branch on them. @@ -658,7 +700,7 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,               " condition!");      buildPartialUnswitchConditionalBranch(          *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH, -        FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT); +        FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT, BI);    }    // Update the dominator tree with the added edge. @@ -2477,7 +2519,7 @@ static void unswitchNontrivialInvariants(      else {        buildPartialUnswitchConditionalBranch(            *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, -          FreezeLoopUnswitchCond, BI, &AC, DT); +          FreezeLoopUnswitchCond, BI, &AC, DT, *BI);      }      DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH}); | 
