aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Utils
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Utils')
-rw-r--r--llvm/lib/Transforms/Utils/FunctionImportUtils.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/LoopPeel.cpp121
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp140
3 files changed, 84 insertions, 181 deletions
diff --git a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
index 1a9e16b..d31154f 100644
--- a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -17,6 +17,8 @@
using namespace llvm;
+namespace llvm {
+
/// Uses the "source_filename" instead of a Module hash ID for the suffix of
/// promoted locals during LTO. NOTE: This requires that the source filename
/// has a unique name / path to avoid name collisions.
@@ -35,6 +37,8 @@ cl::list<GlobalValue::GUID> MoveSymbolGUID(
"used with the name of contextual profiling roots."),
cl::Hidden);
+} // end namespace llvm
+
FunctionImportGlobalProcessing::FunctionImportGlobalProcessing(
Module &M, const ModuleSummaryIndex &Index,
SetVector<GlobalValue *> *GlobalsToImport, bool ClearDSOLocalOnDeclarations)
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index 735bad1..e1dcaa85 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -883,84 +883,6 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
}
}
-struct WeightInfo {
- // Weights for current iteration.
- SmallVector<uint32_t> Weights;
- // Weights to subtract after each iteration.
- const SmallVector<uint32_t> SubWeights;
-};
-
-/// Update the branch weights of an exiting block of a peeled-off loop
-/// iteration.
-/// Let F is a weight of the edge to continue (fallthrough) into the loop.
-/// Let E is a weight of the edge to an exit.
-/// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to
-/// go to exit.
-/// Then, Estimated ExitCount = F / E.
-/// For I-th (counting from 0) peeled off iteration we set the weights for
-/// the peeled exit as (EC - I, 1). It gives us reasonable distribution,
-/// The probability to go to exit 1/(EC-I) increases. At the same time
-/// the estimated exit count in the remainder loop reduces by I.
-/// To avoid dealing with division rounding we can just multiple both part
-/// of weights to E and use weight as (F - I * E, E).
-static void updateBranchWeights(Instruction *Term, WeightInfo &Info) {
- setBranchWeights(*Term, Info.Weights, /*IsExpected=*/false);
- for (auto [Idx, SubWeight] : enumerate(Info.SubWeights))
- if (SubWeight != 0)
- // Don't set the probability of taking the edge from latch to loop header
- // to less than 1:1 ratio (meaning Weight should not be lower than
- // SubWeight), as this could significantly reduce the loop's hotness,
- // which would be incorrect in the case of underestimating the trip count.
- Info.Weights[Idx] =
- Info.Weights[Idx] > SubWeight
- ? std::max(Info.Weights[Idx] - SubWeight, SubWeight)
- : SubWeight;
-}
-
-/// Initialize the weights for all exiting blocks.
-static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos,
- Loop *L) {
- SmallVector<BasicBlock *> ExitingBlocks;
- L->getExitingBlocks(ExitingBlocks);
- for (BasicBlock *ExitingBlock : ExitingBlocks) {
- Instruction *Term = ExitingBlock->getTerminator();
- SmallVector<uint32_t> Weights;
- if (!extractBranchWeights(*Term, Weights))
- continue;
-
- // See the comment on updateBranchWeights() for an explanation of what we
- // do here.
- uint32_t FallThroughWeights = 0;
- uint32_t ExitWeights = 0;
- for (auto [Succ, Weight] : zip(successors(Term), Weights)) {
- if (L->contains(Succ))
- FallThroughWeights += Weight;
- else
- ExitWeights += Weight;
- }
-
- // Don't try to update weights for degenerate case.
- if (FallThroughWeights == 0)
- continue;
-
- SmallVector<uint32_t> SubWeights;
- for (auto [Succ, Weight] : zip(successors(Term), Weights)) {
- if (!L->contains(Succ)) {
- // Exit weights stay the same.
- SubWeights.push_back(0);
- continue;
- }
-
- // Subtract exit weights on each iteration, distributed across all
- // fallthrough edges.
- double W = (double)Weight / (double)FallThroughWeights;
- SubWeights.push_back((uint32_t)(ExitWeights * W));
- }
-
- WeightInfos.insert({Term, {std::move(Weights), std::move(SubWeights)}});
- }
-}
-
/// Clones the body of the loop L, putting it between \p InsertTop and \p
/// InsertBot.
/// \param IterNumber The serial number of the iteration currently being
@@ -1332,11 +1254,6 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
Instruction *LatchTerm =
cast<Instruction>(cast<BasicBlock>(Latch)->getTerminator());
- // If we have branch weight information, we'll want to update it for the
- // newly created branches.
- DenseMap<Instruction *, WeightInfo> Weights;
- initBranchWeights(Weights, L);
-
// Identify what noalias metadata is inside the loop: if it is inside the
// loop, the associated metadata must be cloned for each iteration.
SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
@@ -1382,11 +1299,6 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
assert(DT.verify(DominatorTree::VerificationLevel::Fast));
#endif
- for (auto &[Term, Info] : Weights) {
- auto *TermCopy = cast<Instruction>(VMap[Term]);
- updateBranchWeights(TermCopy, Info);
- }
-
// Remove Loop metadata from the latch branch instruction
// because it is not the Loop's latch branch anymore.
auto *LatchTermCopy = cast<Instruction>(VMap[LatchTerm]);
@@ -1426,15 +1338,38 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
}
}
- for (const auto &[Term, Info] : Weights) {
- setBranchWeights(*Term, Info.Weights, /*IsExpected=*/false);
- }
-
// Update Metadata for count of peeled off iterations.
unsigned AlreadyPeeled = 0;
if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
AlreadyPeeled = *Peeled;
- addStringMetadataToLoop(L, PeeledCountMetaData, AlreadyPeeled + PeelCount);
+ unsigned TotalPeeled = AlreadyPeeled + PeelCount;
+ addStringMetadataToLoop(L, PeeledCountMetaData, TotalPeeled);
+
+ // Update metadata for the estimated trip count. The original branch weight
+ // metadata is already correct for both the remaining loop and the peeled loop
+ // iterations, so do not adjust it.
+ //
+ // For example, consider what happens when peeling 2 iterations from a loop
+ // with an estimated trip count of 10 and inserting them before the remaining
+ // loop. Each of the peeled iterations and each iteration in the remaining
+ // loop still has the same probability of exiting the *entire original* loop
+ // as it did when in the original loop, and thus it should still have the same
+ // branch weights. The peeled iterations' non-zero probabilities of exiting
+ // already appropriately reduce the probability of reaching the remaining
+ // iterations just as they did in the original loop. Trying to also adjust
+ // the remaining loop's branch weights to reflect its new trip count of 8 will
+ // erroneously further reduce its block frequencies. However, in case an
+ // analysis later needs to determine the trip count of the remaining loop
+ // while examining it in isolation without considering the probability of
+ // actually reaching it, we store the new trip count as separate metadata.
+ if (auto EstimatedTripCount = getLoopEstimatedTripCount(L)) {
+ unsigned EstimatedTripCountNew = *EstimatedTripCount;
+ if (EstimatedTripCountNew < TotalPeeled)
+ EstimatedTripCountNew = 0;
+ else
+ EstimatedTripCountNew -= TotalPeeled;
+ setLoopEstimatedTripCount(L, EstimatedTripCountNew);
+ }
if (Loop *ParentLoop = L->getParentLoop())
L = ParentLoop;
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 216bdf4..8bba634 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -95,7 +95,9 @@ using namespace PatternMatch;
#define DEBUG_TYPE "simplifycfg"
-cl::opt<bool> llvm::RequireAndPreserveDomTree(
+namespace llvm {
+
+cl::opt<bool> RequireAndPreserveDomTree(
"simplifycfg-require-and-preserve-domtree", cl::Hidden,
cl::desc(
@@ -205,6 +207,8 @@ static cl::opt<unsigned> MaxJumpThreadingLiveBlocks(
extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+} // end namespace llvm
+
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
STATISTIC(NumLinearMaps,
"Number of switch instructions turned into linear mapping");
@@ -955,33 +959,6 @@ static bool valuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
return false;
}
-// Set branch weights on SwitchInst. This sets the metadata if there is at
-// least one non-zero weight.
-static void setBranchWeights(SwitchInst *SI, ArrayRef<uint32_t> Weights,
- bool IsExpected) {
- // Check that there is at least one non-zero weight. Otherwise, pass
- // nullptr to setMetadata which will erase the existing metadata.
- MDNode *N = nullptr;
- if (llvm::any_of(Weights, [](uint32_t W) { return W != 0; }))
- N = MDBuilder(SI->getParent()->getContext())
- .createBranchWeights(Weights, IsExpected);
- SI->setMetadata(LLVMContext::MD_prof, N);
-}
-
-// Similar to the above, but for branch and select instructions that take
-// exactly 2 weights.
-static void setBranchWeights(Instruction *I, uint32_t TrueWeight,
- uint32_t FalseWeight, bool IsExpected) {
- assert(isa<BranchInst>(I) || isa<SelectInst>(I));
- // Check that there is at least one non-zero weight. Otherwise, pass
- // nullptr to setMetadata which will erase the existing metadata.
- MDNode *N = nullptr;
- if (TrueWeight || FalseWeight)
- N = MDBuilder(I->getParent()->getContext())
- .createBranchWeights(TrueWeight, FalseWeight, IsExpected);
- I->setMetadata(LLVMContext::MD_prof, N);
-}
-
/// If TI is known to be a terminator instruction and its block is known to
/// only have a single predecessor block, check to see if that predecessor is
/// also a value comparison with the same value, and if that comparison
@@ -1181,16 +1158,6 @@ static void getBranchWeights(Instruction *TI,
}
}
-/// Keep halving the weights until all can fit in uint32_t.
-static void fitWeights(MutableArrayRef<uint64_t> Weights) {
- uint64_t Max = *llvm::max_element(Weights);
- if (Max > UINT_MAX) {
- unsigned Offset = 32 - llvm::countl_zero(Max);
- for (uint64_t &I : Weights)
- I >>= Offset;
- }
-}
-
static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
Instruction *PTI = PredBlock->getTerminator();
@@ -1446,14 +1413,9 @@ bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding(
for (ValueEqualityComparisonCase &V : PredCases)
NewSI->addCase(V.Value, V.Dest);
- if (PredHasWeights || SuccHasWeights) {
- // Halve the weights if any of them cannot fit in an uint32_t
- fitWeights(Weights);
-
- SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
-
- setBranchWeights(NewSI, MDWeights, /*IsExpected=*/false);
- }
+ if (PredHasWeights || SuccHasWeights)
+ setFittedBranchWeights(*NewSI, Weights, /*IsExpected=*/false,
+ /*ElideAllZero=*/true);
eraseTerminatorAndDCECond(PTI);
@@ -4053,39 +4015,34 @@ static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
// Try to update branch weights.
uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
- SmallVector<uint32_t, 2> MDWeights;
+ SmallVector<uint64_t, 2> MDWeights;
if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
SuccTrueWeight, SuccFalseWeight)) {
- SmallVector<uint64_t, 8> NewWeights;
if (PBI->getSuccessor(0) == BB) {
// PBI: br i1 %x, BB, FalseDest
// BI: br i1 %y, UniqueSucc, FalseDest
// TrueWeight is TrueWeight for PBI * TrueWeight for BI.
- NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
+ MDWeights.push_back(PredTrueWeight * SuccTrueWeight);
// FalseWeight is FalseWeight for PBI * TotalWeight for BI +
// TrueWeight for PBI * FalseWeight for BI.
// We assume that total weights of a BranchInst can fit into 32 bits.
// Therefore, we will not have overflow using 64-bit arithmetic.
- NewWeights.push_back(PredFalseWeight *
- (SuccFalseWeight + SuccTrueWeight) +
- PredTrueWeight * SuccFalseWeight);
+ MDWeights.push_back(PredFalseWeight * (SuccFalseWeight + SuccTrueWeight) +
+ PredTrueWeight * SuccFalseWeight);
} else {
// PBI: br i1 %x, TrueDest, BB
// BI: br i1 %y, TrueDest, UniqueSucc
// TrueWeight is TrueWeight for PBI * TotalWeight for BI +
// FalseWeight for PBI * TrueWeight for BI.
- NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
- PredFalseWeight * SuccTrueWeight);
+ MDWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
+ PredFalseWeight * SuccTrueWeight);
// FalseWeight is FalseWeight for PBI * FalseWeight for BI.
- NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
+ MDWeights.push_back(PredFalseWeight * SuccFalseWeight);
}
- // Halve the weights if any of them cannot fit in an uint32_t
- fitWeights(NewWeights);
-
- append_range(MDWeights, NewWeights);
- setBranchWeights(PBI, MDWeights[0], MDWeights[1], /*IsExpected=*/false);
+ setFittedBranchWeights(*PBI, MDWeights, /*IsExpected=*/false,
+ /*ElideAllZero=*/true);
// TODO: If BB is reachable from all paths through PredBlock, then we
// could replace PBI's branch probabilities with BI's.
@@ -4125,8 +4082,8 @@ static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
if (auto *SI = dyn_cast<SelectInst>(PBI->getCondition()))
if (!MDWeights.empty()) {
assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
- setBranchWeights(SI, MDWeights[0], MDWeights[1],
- /*IsExpected=*/false);
+ setFittedBranchWeights(*SI, {MDWeights[0], MDWeights[1]},
+ /*IsExpected=*/false, /*ElideAllZero=*/true);
}
++NumFoldBranchToCommonDest;
@@ -4478,9 +4435,9 @@ static bool mergeConditionalStoreToAddress(
if (InvertQCond)
std::swap(QWeights[0], QWeights[1]);
auto CombinedWeights = getDisjunctionWeights(PWeights, QWeights);
- setBranchWeights(PostBB->getTerminator(), CombinedWeights[0],
- CombinedWeights[1],
- /*IsExpected=*/false);
+ setFittedBranchWeights(*PostBB->getTerminator(),
+ {CombinedWeights[0], CombinedWeights[1]},
+ /*IsExpected=*/false, /*ElideAllZero=*/true);
}
QB.SetInsertPoint(T);
@@ -4836,10 +4793,9 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
PredOther * SuccCommon,
PredOther * SuccOther};
- // Halve the weights if any of them cannot fit in an uint32_t
- fitWeights(NewWeights);
- setBranchWeights(PBI, NewWeights[0], NewWeights[1], /*IsExpected=*/false);
+ setFittedBranchWeights(*PBI, NewWeights, /*IsExpected=*/false,
+ /*ElideAllZero=*/true);
// Cond may be a select instruction with the first operand set to "true", or
// the second to "false" (see how createLogicalOp works for `and` and `or`)
if (!ProfcheckDisableMetadataFixes)
@@ -4849,8 +4805,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
assert(dyn_cast<SelectInst>(SI)->getCondition() == PBICond);
// The corresponding probabilities are what was referred to above as
// PredCommon and PredOther.
- setBranchWeights(SI, PredCommon, PredOther,
- /*IsExpected=*/false);
+ setFittedBranchWeights(*SI, {PredCommon, PredOther},
+ /*IsExpected=*/false, /*ElideAllZero=*/true);
}
}
@@ -4876,8 +4832,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
if (HasWeights) {
uint64_t TrueWeight = PBIOp ? PredFalseWeight : PredTrueWeight;
uint64_t FalseWeight = PBIOp ? PredTrueWeight : PredFalseWeight;
- setBranchWeights(NV, TrueWeight, FalseWeight,
- /*IsExpected=*/false);
+ setFittedBranchWeights(*NV, {TrueWeight, FalseWeight},
+ /*IsExpected=*/false, /*ElideAllZero=*/true);
}
}
}
@@ -4940,7 +4896,8 @@ bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm,
// Create a conditional branch sharing the condition of the select.
BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
if (TrueWeight != FalseWeight)
- setBranchWeights(NewBI, TrueWeight, FalseWeight, /*IsExpected=*/false);
+ setBranchWeights(*NewBI, {TrueWeight, FalseWeight},
+ /*IsExpected=*/false, /*ElideAllZero=*/true);
}
} else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
// Neither of the selected blocks were successors, so this
@@ -5889,7 +5846,8 @@ bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
TrueWeight /= 2;
FalseWeight /= 2;
}
- setBranchWeights(NewBI, TrueWeight, FalseWeight, /*IsExpected=*/false);
+ setFittedBranchWeights(*NewBI, {TrueWeight, FalseWeight},
+ /*IsExpected=*/false, /*ElideAllZero=*/true);
}
}
@@ -6364,9 +6322,9 @@ static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
// BranchWeights. We want the probability and negative probability of
// Condition == SecondCase.
assert(BranchWeights.size() == 3);
- setBranchWeights(SI, BranchWeights[2],
- BranchWeights[0] + BranchWeights[1],
- /*IsExpected=*/false);
+ setBranchWeights(
+ *SI, {BranchWeights[2], BranchWeights[0] + BranchWeights[1]},
+ /*IsExpected=*/false, /*ElideAllZero=*/true);
}
}
Value *ValueCompare =
@@ -6381,9 +6339,10 @@ static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
size_t FirstCasePos = (Condition != nullptr);
size_t SecondCasePos = FirstCasePos + 1;
uint32_t DefaultCase = (Condition != nullptr) ? BranchWeights[0] : 0;
- setBranchWeights(SI, BranchWeights[FirstCasePos],
- DefaultCase + BranchWeights[SecondCasePos],
- /*IsExpected=*/false);
+ setBranchWeights(*SI,
+ {BranchWeights[FirstCasePos],
+ DefaultCase + BranchWeights[SecondCasePos]},
+ /*IsExpected=*/false, /*ElideAllZero=*/true);
}
return Ret;
}
@@ -6427,8 +6386,10 @@ static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
// We know there's a Default case. We base the resulting branch
// weights off its probability.
assert(BranchWeights.size() >= 2);
- setBranchWeights(SI, accumulate(drop_begin(BranchWeights), 0),
- BranchWeights[0], /*IsExpected=*/false);
+ setBranchWeights(
+ *SI,
+ {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
+ /*IsExpected=*/false, /*ElideAllZero=*/true);
}
return Ret;
}
@@ -6451,8 +6412,10 @@ static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
assert(BranchWeights.size() >= 2);
- setBranchWeights(SI, accumulate(drop_begin(BranchWeights), 0),
- BranchWeights[0], /*IsExpected=*/false);
+ setBranchWeights(
+ *SI,
+ {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
+ /*IsExpected=*/false, /*ElideAllZero=*/true);
}
return Ret;
}
@@ -6469,8 +6432,9 @@ static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
assert(BranchWeights.size() >= 2);
- setBranchWeights(SI, accumulate(drop_begin(BranchWeights), 0),
- BranchWeights[0], /*IsExpected=*/false);
+ setBranchWeights(
+ *SI, {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
+ /*IsExpected=*/false, /*ElideAllZero=*/true);
}
return Ret;
}
@@ -8152,8 +8116,8 @@ static bool mergeNestedCondBranch(BranchInst *BI, DomTreeUpdater *DTU) {
if (HasWeight) {
uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
- fitWeights(Weights);
- setBranchWeights(BI, Weights[0], Weights[1], /*IsExpected=*/false);
+ setFittedBranchWeights(*BI, Weights, /*IsExpected=*/false,
+ /*ElideAllZero=*/true);
}
return true;
}