diff options
Diffstat (limited to 'llvm/lib/Transforms/Utils')
-rw-r--r-- | llvm/lib/Transforms/Utils/FunctionImportUtils.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/LoopPeel.cpp | 121 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 140 |
3 files changed, 84 insertions, 181 deletions
diff --git a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp index 1a9e16b..d31154f 100644 --- a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp +++ b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp @@ -17,6 +17,8 @@ using namespace llvm; +namespace llvm { + /// Uses the "source_filename" instead of a Module hash ID for the suffix of /// promoted locals during LTO. NOTE: This requires that the source filename /// has a unique name / path to avoid name collisions. @@ -35,6 +37,8 @@ cl::list<GlobalValue::GUID> MoveSymbolGUID( "used with the name of contextual profiling roots."), cl::Hidden); +} // end namespace llvm + FunctionImportGlobalProcessing::FunctionImportGlobalProcessing( Module &M, const ModuleSummaryIndex &Index, SetVector<GlobalValue *> *GlobalsToImport, bool ClearDSOLocalOnDeclarations) diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp index 735bad1..e1dcaa85 100644 --- a/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -883,84 +883,6 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, } } -struct WeightInfo { - // Weights for current iteration. - SmallVector<uint32_t> Weights; - // Weights to subtract after each iteration. - const SmallVector<uint32_t> SubWeights; -}; - -/// Update the branch weights of an exiting block of a peeled-off loop -/// iteration. -/// Let F is a weight of the edge to continue (fallthrough) into the loop. -/// Let E is a weight of the edge to an exit. -/// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to -/// go to exit. -/// Then, Estimated ExitCount = F / E. -/// For I-th (counting from 0) peeled off iteration we set the weights for -/// the peeled exit as (EC - I, 1). It gives us reasonable distribution, -/// The probability to go to exit 1/(EC-I) increases. At the same time -/// the estimated exit count in the remainder loop reduces by I. -/// To avoid dealing with division rounding we can just multiple both part -/// of weights to E and use weight as (F - I * E, E). -static void updateBranchWeights(Instruction *Term, WeightInfo &Info) { - setBranchWeights(*Term, Info.Weights, /*IsExpected=*/false); - for (auto [Idx, SubWeight] : enumerate(Info.SubWeights)) - if (SubWeight != 0) - // Don't set the probability of taking the edge from latch to loop header - // to less than 1:1 ratio (meaning Weight should not be lower than - // SubWeight), as this could significantly reduce the loop's hotness, - // which would be incorrect in the case of underestimating the trip count. - Info.Weights[Idx] = - Info.Weights[Idx] > SubWeight - ? std::max(Info.Weights[Idx] - SubWeight, SubWeight) - : SubWeight; -} - -/// Initialize the weights for all exiting blocks. -static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos, - Loop *L) { - SmallVector<BasicBlock *> ExitingBlocks; - L->getExitingBlocks(ExitingBlocks); - for (BasicBlock *ExitingBlock : ExitingBlocks) { - Instruction *Term = ExitingBlock->getTerminator(); - SmallVector<uint32_t> Weights; - if (!extractBranchWeights(*Term, Weights)) - continue; - - // See the comment on updateBranchWeights() for an explanation of what we - // do here. - uint32_t FallThroughWeights = 0; - uint32_t ExitWeights = 0; - for (auto [Succ, Weight] : zip(successors(Term), Weights)) { - if (L->contains(Succ)) - FallThroughWeights += Weight; - else - ExitWeights += Weight; - } - - // Don't try to update weights for degenerate case. - if (FallThroughWeights == 0) - continue; - - SmallVector<uint32_t> SubWeights; - for (auto [Succ, Weight] : zip(successors(Term), Weights)) { - if (!L->contains(Succ)) { - // Exit weights stay the same. - SubWeights.push_back(0); - continue; - } - - // Subtract exit weights on each iteration, distributed across all - // fallthrough edges. - double W = (double)Weight / (double)FallThroughWeights; - SubWeights.push_back((uint32_t)(ExitWeights * W)); - } - - WeightInfos.insert({Term, {std::move(Weights), std::move(SubWeights)}}); - } -} - /// Clones the body of the loop L, putting it between \p InsertTop and \p /// InsertBot. /// \param IterNumber The serial number of the iteration currently being @@ -1332,11 +1254,6 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, Instruction *LatchTerm = cast<Instruction>(cast<BasicBlock>(Latch)->getTerminator()); - // If we have branch weight information, we'll want to update it for the - // newly created branches. - DenseMap<Instruction *, WeightInfo> Weights; - initBranchWeights(Weights, L); - // Identify what noalias metadata is inside the loop: if it is inside the // loop, the associated metadata must be cloned for each iteration. SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes; @@ -1382,11 +1299,6 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, assert(DT.verify(DominatorTree::VerificationLevel::Fast)); #endif - for (auto &[Term, Info] : Weights) { - auto *TermCopy = cast<Instruction>(VMap[Term]); - updateBranchWeights(TermCopy, Info); - } - // Remove Loop metadata from the latch branch instruction // because it is not the Loop's latch branch anymore. auto *LatchTermCopy = cast<Instruction>(VMap[LatchTerm]); @@ -1426,15 +1338,38 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, } } - for (const auto &[Term, Info] : Weights) { - setBranchWeights(*Term, Info.Weights, /*IsExpected=*/false); - } - // Update Metadata for count of peeled off iterations. unsigned AlreadyPeeled = 0; if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData)) AlreadyPeeled = *Peeled; - addStringMetadataToLoop(L, PeeledCountMetaData, AlreadyPeeled + PeelCount); + unsigned TotalPeeled = AlreadyPeeled + PeelCount; + addStringMetadataToLoop(L, PeeledCountMetaData, TotalPeeled); + + // Update metadata for the estimated trip count. The original branch weight + // metadata is already correct for both the remaining loop and the peeled loop + // iterations, so do not adjust it. + // + // For example, consider what happens when peeling 2 iterations from a loop + // with an estimated trip count of 10 and inserting them before the remaining + // loop. Each of the peeled iterations and each iteration in the remaining + // loop still has the same probability of exiting the *entire original* loop + // as it did when in the original loop, and thus it should still have the same + // branch weights. The peeled iterations' non-zero probabilities of exiting + // already appropriately reduce the probability of reaching the remaining + // iterations just as they did in the original loop. Trying to also adjust + // the remaining loop's branch weights to reflect its new trip count of 8 will + // erroneously further reduce its block frequencies. However, in case an + // analysis later needs to determine the trip count of the remaining loop + // while examining it in isolation without considering the probability of + // actually reaching it, we store the new trip count as separate metadata. + if (auto EstimatedTripCount = getLoopEstimatedTripCount(L)) { + unsigned EstimatedTripCountNew = *EstimatedTripCount; + if (EstimatedTripCountNew < TotalPeeled) + EstimatedTripCountNew = 0; + else + EstimatedTripCountNew -= TotalPeeled; + setLoopEstimatedTripCount(L, EstimatedTripCountNew); + } if (Loop *ParentLoop = L->getParentLoop()) L = ParentLoop; diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 216bdf4..8bba634 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -95,7 +95,9 @@ using namespace PatternMatch; #define DEBUG_TYPE "simplifycfg" -cl::opt<bool> llvm::RequireAndPreserveDomTree( +namespace llvm { + +cl::opt<bool> RequireAndPreserveDomTree( "simplifycfg-require-and-preserve-domtree", cl::Hidden, cl::desc( @@ -205,6 +207,8 @@ static cl::opt<unsigned> MaxJumpThreadingLiveBlocks( extern cl::opt<bool> ProfcheckDisableMetadataFixes; +} // end namespace llvm + STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping"); @@ -955,33 +959,6 @@ static bool valuesOverlap(std::vector<ValueEqualityComparisonCase> &C1, return false; } -// Set branch weights on SwitchInst. This sets the metadata if there is at -// least one non-zero weight. -static void setBranchWeights(SwitchInst *SI, ArrayRef<uint32_t> Weights, - bool IsExpected) { - // Check that there is at least one non-zero weight. Otherwise, pass - // nullptr to setMetadata which will erase the existing metadata. - MDNode *N = nullptr; - if (llvm::any_of(Weights, [](uint32_t W) { return W != 0; })) - N = MDBuilder(SI->getParent()->getContext()) - .createBranchWeights(Weights, IsExpected); - SI->setMetadata(LLVMContext::MD_prof, N); -} - -// Similar to the above, but for branch and select instructions that take -// exactly 2 weights. -static void setBranchWeights(Instruction *I, uint32_t TrueWeight, - uint32_t FalseWeight, bool IsExpected) { - assert(isa<BranchInst>(I) || isa<SelectInst>(I)); - // Check that there is at least one non-zero weight. Otherwise, pass - // nullptr to setMetadata which will erase the existing metadata. - MDNode *N = nullptr; - if (TrueWeight || FalseWeight) - N = MDBuilder(I->getParent()->getContext()) - .createBranchWeights(TrueWeight, FalseWeight, IsExpected); - I->setMetadata(LLVMContext::MD_prof, N); -} - /// If TI is known to be a terminator instruction and its block is known to /// only have a single predecessor block, check to see if that predecessor is /// also a value comparison with the same value, and if that comparison @@ -1181,16 +1158,6 @@ static void getBranchWeights(Instruction *TI, } } -/// Keep halving the weights until all can fit in uint32_t. -static void fitWeights(MutableArrayRef<uint64_t> Weights) { - uint64_t Max = *llvm::max_element(Weights); - if (Max > UINT_MAX) { - unsigned Offset = 32 - llvm::countl_zero(Max); - for (uint64_t &I : Weights) - I >>= Offset; - } -} - static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses( BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) { Instruction *PTI = PredBlock->getTerminator(); @@ -1446,14 +1413,9 @@ bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding( for (ValueEqualityComparisonCase &V : PredCases) NewSI->addCase(V.Value, V.Dest); - if (PredHasWeights || SuccHasWeights) { - // Halve the weights if any of them cannot fit in an uint32_t - fitWeights(Weights); - - SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); - - setBranchWeights(NewSI, MDWeights, /*IsExpected=*/false); - } + if (PredHasWeights || SuccHasWeights) + setFittedBranchWeights(*NewSI, Weights, /*IsExpected=*/false, + /*ElideAllZero=*/true); eraseTerminatorAndDCECond(PTI); @@ -4053,39 +4015,34 @@ static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, // Try to update branch weights. uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; - SmallVector<uint32_t, 2> MDWeights; + SmallVector<uint64_t, 2> MDWeights; if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight)) { - SmallVector<uint64_t, 8> NewWeights; if (PBI->getSuccessor(0) == BB) { // PBI: br i1 %x, BB, FalseDest // BI: br i1 %y, UniqueSucc, FalseDest // TrueWeight is TrueWeight for PBI * TrueWeight for BI. - NewWeights.push_back(PredTrueWeight * SuccTrueWeight); + MDWeights.push_back(PredTrueWeight * SuccTrueWeight); // FalseWeight is FalseWeight for PBI * TotalWeight for BI + // TrueWeight for PBI * FalseWeight for BI. // We assume that total weights of a BranchInst can fit into 32 bits. // Therefore, we will not have overflow using 64-bit arithmetic. - NewWeights.push_back(PredFalseWeight * - (SuccFalseWeight + SuccTrueWeight) + - PredTrueWeight * SuccFalseWeight); + MDWeights.push_back(PredFalseWeight * (SuccFalseWeight + SuccTrueWeight) + + PredTrueWeight * SuccFalseWeight); } else { // PBI: br i1 %x, TrueDest, BB // BI: br i1 %y, TrueDest, UniqueSucc // TrueWeight is TrueWeight for PBI * TotalWeight for BI + // FalseWeight for PBI * TrueWeight for BI. - NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) + - PredFalseWeight * SuccTrueWeight); + MDWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) + + PredFalseWeight * SuccTrueWeight); // FalseWeight is FalseWeight for PBI * FalseWeight for BI. - NewWeights.push_back(PredFalseWeight * SuccFalseWeight); + MDWeights.push_back(PredFalseWeight * SuccFalseWeight); } - // Halve the weights if any of them cannot fit in an uint32_t - fitWeights(NewWeights); - - append_range(MDWeights, NewWeights); - setBranchWeights(PBI, MDWeights[0], MDWeights[1], /*IsExpected=*/false); + setFittedBranchWeights(*PBI, MDWeights, /*IsExpected=*/false, + /*ElideAllZero=*/true); // TODO: If BB is reachable from all paths through PredBlock, then we // could replace PBI's branch probabilities with BI's. @@ -4125,8 +4082,8 @@ static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, if (auto *SI = dyn_cast<SelectInst>(PBI->getCondition())) if (!MDWeights.empty()) { assert(isSelectInRoleOfConjunctionOrDisjunction(SI)); - setBranchWeights(SI, MDWeights[0], MDWeights[1], - /*IsExpected=*/false); + setFittedBranchWeights(*SI, {MDWeights[0], MDWeights[1]}, + /*IsExpected=*/false, /*ElideAllZero=*/true); } ++NumFoldBranchToCommonDest; @@ -4478,9 +4435,9 @@ static bool mergeConditionalStoreToAddress( if (InvertQCond) std::swap(QWeights[0], QWeights[1]); auto CombinedWeights = getDisjunctionWeights(PWeights, QWeights); - setBranchWeights(PostBB->getTerminator(), CombinedWeights[0], - CombinedWeights[1], - /*IsExpected=*/false); + setFittedBranchWeights(*PostBB->getTerminator(), + {CombinedWeights[0], CombinedWeights[1]}, + /*IsExpected=*/false, /*ElideAllZero=*/true); } QB.SetInsertPoint(T); @@ -4836,10 +4793,9 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) + PredOther * SuccCommon, PredOther * SuccOther}; - // Halve the weights if any of them cannot fit in an uint32_t - fitWeights(NewWeights); - setBranchWeights(PBI, NewWeights[0], NewWeights[1], /*IsExpected=*/false); + setFittedBranchWeights(*PBI, NewWeights, /*IsExpected=*/false, + /*ElideAllZero=*/true); // Cond may be a select instruction with the first operand set to "true", or // the second to "false" (see how createLogicalOp works for `and` and `or`) if (!ProfcheckDisableMetadataFixes) @@ -4849,8 +4805,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, assert(dyn_cast<SelectInst>(SI)->getCondition() == PBICond); // The corresponding probabilities are what was referred to above as // PredCommon and PredOther. - setBranchWeights(SI, PredCommon, PredOther, - /*IsExpected=*/false); + setFittedBranchWeights(*SI, {PredCommon, PredOther}, + /*IsExpected=*/false, /*ElideAllZero=*/true); } } @@ -4876,8 +4832,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, if (HasWeights) { uint64_t TrueWeight = PBIOp ? PredFalseWeight : PredTrueWeight; uint64_t FalseWeight = PBIOp ? PredTrueWeight : PredFalseWeight; - setBranchWeights(NV, TrueWeight, FalseWeight, - /*IsExpected=*/false); + setFittedBranchWeights(*NV, {TrueWeight, FalseWeight}, + /*IsExpected=*/false, /*ElideAllZero=*/true); } } } @@ -4940,7 +4896,8 @@ bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm, // Create a conditional branch sharing the condition of the select. BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB); if (TrueWeight != FalseWeight) - setBranchWeights(NewBI, TrueWeight, FalseWeight, /*IsExpected=*/false); + setBranchWeights(*NewBI, {TrueWeight, FalseWeight}, + /*IsExpected=*/false, /*ElideAllZero=*/true); } } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) { // Neither of the selected blocks were successors, so this @@ -5889,7 +5846,8 @@ bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI, TrueWeight /= 2; FalseWeight /= 2; } - setBranchWeights(NewBI, TrueWeight, FalseWeight, /*IsExpected=*/false); + setFittedBranchWeights(*NewBI, {TrueWeight, FalseWeight}, + /*IsExpected=*/false, /*ElideAllZero=*/true); } } @@ -6364,9 +6322,9 @@ static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, // BranchWeights. We want the probability and negative probability of // Condition == SecondCase. assert(BranchWeights.size() == 3); - setBranchWeights(SI, BranchWeights[2], - BranchWeights[0] + BranchWeights[1], - /*IsExpected=*/false); + setBranchWeights( + *SI, {BranchWeights[2], BranchWeights[0] + BranchWeights[1]}, + /*IsExpected=*/false, /*ElideAllZero=*/true); } } Value *ValueCompare = @@ -6381,9 +6339,10 @@ static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, size_t FirstCasePos = (Condition != nullptr); size_t SecondCasePos = FirstCasePos + 1; uint32_t DefaultCase = (Condition != nullptr) ? BranchWeights[0] : 0; - setBranchWeights(SI, BranchWeights[FirstCasePos], - DefaultCase + BranchWeights[SecondCasePos], - /*IsExpected=*/false); + setBranchWeights(*SI, + {BranchWeights[FirstCasePos], + DefaultCase + BranchWeights[SecondCasePos]}, + /*IsExpected=*/false, /*ElideAllZero=*/true); } return Ret; } @@ -6427,8 +6386,10 @@ static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, // We know there's a Default case. We base the resulting branch // weights off its probability. assert(BranchWeights.size() >= 2); - setBranchWeights(SI, accumulate(drop_begin(BranchWeights), 0), - BranchWeights[0], /*IsExpected=*/false); + setBranchWeights( + *SI, + {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]}, + /*IsExpected=*/false, /*ElideAllZero=*/true); } return Ret; } @@ -6451,8 +6412,10 @@ static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult); if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) { assert(BranchWeights.size() >= 2); - setBranchWeights(SI, accumulate(drop_begin(BranchWeights), 0), - BranchWeights[0], /*IsExpected=*/false); + setBranchWeights( + *SI, + {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]}, + /*IsExpected=*/false, /*ElideAllZero=*/true); } return Ret; } @@ -6469,8 +6432,9 @@ static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult); if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) { assert(BranchWeights.size() >= 2); - setBranchWeights(SI, accumulate(drop_begin(BranchWeights), 0), - BranchWeights[0], /*IsExpected=*/false); + setBranchWeights( + *SI, {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]}, + /*IsExpected=*/false, /*ElideAllZero=*/true); } return Ret; } @@ -8152,8 +8116,8 @@ static bool mergeNestedCondBranch(BranchInst *BI, DomTreeUpdater *DTU) { if (HasWeight) { uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight, BBTWeight * BB1TWeight + BBFWeight * BB2FWeight}; - fitWeights(Weights); - setBranchWeights(BI, Weights[0], Weights[1], /*IsExpected=*/false); + setFittedBranchWeights(*BI, Weights, /*IsExpected=*/false, + /*ElideAllZero=*/true); } return true; } |