aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShatianWang <38512325+ShatianWang@users.noreply.github.com>2023-11-29 15:43:21 -0500
committerGitHub <noreply@github.com>2023-11-29 15:43:21 -0500
commit076bd22f579ec4c90b41c700fca21a90fd2b6dbc (patch)
treef7b1ff09f16173345b06ca587278c0163a2f00e4
parent69b0cb9c567eb0f937474f5424b9ed23b61c04d7 (diff)
downloadllvm-076bd22f579ec4c90b41c700fca21a90fd2b6dbc.zip
llvm-076bd22f579ec4c90b41c700fca21a90fd2b6dbc.tar.gz
llvm-076bd22f579ec4c90b41c700fca21a90fd2b6dbc.tar.bz2
[BOLT] Add structure of CDSplit to SplitFunctions (#73430)
This commit establishes the general structure of the CDSplit strategy in SplitFunctions without incorporating the exact splitting logic. With -split-functions -split-strategy=cdsplit, the SplitFunctions pass will run twice: the first time is before function reordering and functions are hot-cold split; the second time is after function reordering and functions are hot-warm-cold split based on the fixed function ordering. Currently, all functions are hot-warm split after the entry block in the second splitting pass. Subsequent commits will introduce the precise splitting logic. NFC.
-rw-r--r--bolt/include/bolt/Core/BinaryContext.h3
-rw-r--r--bolt/include/bolt/Passes/SplitFunctions.h5
-rw-r--r--bolt/lib/Passes/ReorderFunctions.cpp2
-rw-r--r--bolt/lib/Passes/SplitFunctions.cpp85
-rw-r--r--bolt/lib/Rewrite/BinaryPassManager.cpp7
5 files changed, 93 insertions, 9 deletions
diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index 312678f..48a8b2a 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -611,6 +611,9 @@ public:
/// Indicates if the binary contains split functions.
bool HasSplitFunctions{false};
+ /// Indicates if the function ordering of the binary is finalized.
+ bool HasFinalizedFunctionOrder{false};
+
/// Is the binary always loaded at a fixed address. Shared objects and
/// position-independent executables (PIEs) are examples of binaries that
/// will have HasFixedLoadAddress set to false.
diff --git a/bolt/include/bolt/Passes/SplitFunctions.h b/bolt/include/bolt/Passes/SplitFunctions.h
index 4058f33..28e9e79 100644
--- a/bolt/include/bolt/Passes/SplitFunctions.h
+++ b/bolt/include/bolt/Passes/SplitFunctions.h
@@ -23,6 +23,9 @@ enum SplitFunctionsStrategy : char {
/// Split each function into a hot and cold fragment using profiling
/// information.
Profile2 = 0,
+ /// Split each function into a hot, warm, and cold fragment using
+ /// profiling information.
+ CDSplit,
/// Split each function into a hot and cold fragment at a randomly chosen
/// split point (ignoring any available profiling information).
Random2,
@@ -40,7 +43,7 @@ public:
virtual ~SplitStrategy() = default;
virtual bool canSplit(const BinaryFunction &BF) = 0;
- virtual bool keepEmpty() = 0;
+ virtual bool compactFragments() = 0;
virtual void fragment(const BlockIt Start, const BlockIt End) = 0;
};
diff --git a/bolt/lib/Passes/ReorderFunctions.cpp b/bolt/lib/Passes/ReorderFunctions.cpp
index 92c50f4..7e67296 100644
--- a/bolt/lib/Passes/ReorderFunctions.cpp
+++ b/bolt/lib/Passes/ReorderFunctions.cpp
@@ -427,6 +427,8 @@ void ReorderFunctions::runOnFunctions(BinaryContext &BC) {
reorder(std::move(Clusters), BFs);
+ BC.HasFinalizedFunctionOrder = true;
+
std::unique_ptr<std::ofstream> FuncsFile;
if (!opts::GenerateFunctionOrderFile.empty()) {
FuncsFile = std::make_unique<std::ofstream>(opts::GenerateFunctionOrderFile,
diff --git a/bolt/lib/Passes/SplitFunctions.cpp b/bolt/lib/Passes/SplitFunctions.cpp
index 34973ce..af2a5aa 100644
--- a/bolt/lib/Passes/SplitFunctions.cpp
+++ b/bolt/lib/Passes/SplitFunctions.cpp
@@ -92,6 +92,9 @@ static cl::opt<SplitFunctionsStrategy> SplitStrategy(
cl::values(clEnumValN(SplitFunctionsStrategy::Profile2, "profile2",
"split each function into a hot and cold fragment "
"using profiling information")),
+ cl::values(clEnumValN(SplitFunctionsStrategy::CDSplit, "cdsplit",
+ "split each function into a hot, warm, and cold "
+ "fragment using profiling information")),
cl::values(clEnumValN(
SplitFunctionsStrategy::Random2, "random2",
"split each function into a hot and cold fragment at a randomly chosen "
@@ -126,7 +129,7 @@ struct SplitProfile2 final : public SplitStrategy {
return BF.hasValidProfile() && hasFullProfile(BF) && !allBlocksCold(BF);
}
- bool keepEmpty() override { return false; }
+ bool compactFragments() override { return true; }
void fragment(const BlockIt Start, const BlockIt End) override {
for (BinaryBasicBlock *const BB : llvm::make_range(Start, End)) {
@@ -136,6 +139,51 @@ struct SplitProfile2 final : public SplitStrategy {
}
};
+struct SplitCacheDirected final : public SplitStrategy {
+ using BasicBlockOrder = BinaryFunction::BasicBlockOrderType;
+
+ bool canSplit(const BinaryFunction &BF) override {
+ return BF.hasValidProfile() && hasFullProfile(BF) && !allBlocksCold(BF);
+ }
+
+ // When some functions are hot-warm split and others are hot-warm-cold split,
+ // we do not want to change the fragment numbers of the blocks in the hot-warm
+ // split functions.
+ bool compactFragments() override { return false; }
+
+ void fragment(const BlockIt Start, const BlockIt End) override {
+ BasicBlockOrder BlockOrder(Start, End);
+ BinaryFunction &BF = *BlockOrder.front()->getFunction();
+
+ size_t BestSplitIndex = findSplitIndex(BF, BlockOrder);
+
+ // Assign fragments based on the computed best split index.
+ // All basic blocks with index up to the best split index become hot.
+ // All remaining blocks are warm / cold depending on if count is
+ // greater than 0 or not.
+ FragmentNum Main(0);
+ FragmentNum Cold(1);
+ FragmentNum Warm(2);
+ for (size_t Index = 0; Index < BlockOrder.size(); Index++) {
+ BinaryBasicBlock *BB = BlockOrder[Index];
+ if (Index <= BestSplitIndex)
+ BB->setFragmentNum(Main);
+ else
+ BB->setFragmentNum(BB->getKnownExecutionCount() > 0 ? Warm : Cold);
+ }
+ }
+
+private:
+ /// Find the best index for splitting. The returned value is the index of the
+ /// last hot basic block. Hence, "no splitting" is equivalent to returning the
+ /// value which is one less than the size of the function.
+ size_t findSplitIndex(const BinaryFunction &BF,
+ const BasicBlockOrder &BlockOrder) {
+ // Placeholder: hot-warm split after entry block.
+ return 0;
+ }
+};
+
struct SplitRandom2 final : public SplitStrategy {
std::minstd_rand0 Gen;
@@ -143,7 +191,7 @@ struct SplitRandom2 final : public SplitStrategy {
bool canSplit(const BinaryFunction &BF) override { return true; }
- bool keepEmpty() override { return false; }
+ bool compactFragments() override { return true; }
void fragment(const BlockIt Start, const BlockIt End) override {
using DiffT = typename std::iterator_traits<BlockIt>::difference_type;
@@ -170,7 +218,7 @@ struct SplitRandomN final : public SplitStrategy {
bool canSplit(const BinaryFunction &BF) override { return true; }
- bool keepEmpty() override { return false; }
+ bool compactFragments() override { return true; }
void fragment(const BlockIt Start, const BlockIt End) override {
using DiffT = typename std::iterator_traits<BlockIt>::difference_type;
@@ -217,10 +265,10 @@ struct SplitRandomN final : public SplitStrategy {
struct SplitAll final : public SplitStrategy {
bool canSplit(const BinaryFunction &BF) override { return true; }
- bool keepEmpty() override {
+ bool compactFragments() override {
// Keeping empty fragments allows us to test, that empty fragments do not
// generate symbols.
- return true;
+ return false;
}
void fragment(const BlockIt Start, const BlockIt End) override {
@@ -246,10 +294,26 @@ void SplitFunctions::runOnFunctions(BinaryContext &BC) {
if (!opts::SplitFunctions)
return;
+ // If split strategy is not CDSplit, then a second run of the pass is not
+ // needed after function reordering.
+ if (BC.HasFinalizedFunctionOrder &&
+ opts::SplitStrategy != SplitFunctionsStrategy::CDSplit)
+ return;
+
std::unique_ptr<SplitStrategy> Strategy;
bool ForceSequential = false;
switch (opts::SplitStrategy) {
+ case SplitFunctionsStrategy::CDSplit:
+ // CDSplit runs two splitting passes: hot-cold splitting (SplitPrfoile2)
+ // before function reordering and hot-warm-cold splitting
+ // (SplitCacheDirected) after function reordering.
+ if (BC.HasFinalizedFunctionOrder)
+ Strategy = std::make_unique<SplitCacheDirected>();
+ else
+ Strategy = std::make_unique<SplitProfile2>();
+ opts::AggressiveSplitting = true;
+ break;
case SplitFunctionsStrategy::Profile2:
Strategy = std::make_unique<SplitProfile2>();
break;
@@ -382,7 +446,7 @@ void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) {
CurrentFragment = BB->getFragmentNum();
}
- if (!S.keepEmpty()) {
+ if (S.compactFragments()) {
FragmentNum CurrentFragment = FragmentNum::main();
FragmentNum NewFragment = FragmentNum::main();
for (BinaryBasicBlock *const BB : NewLayout) {
@@ -394,7 +458,7 @@ void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) {
}
}
- BF.getLayout().update(NewLayout);
+ const bool LayoutUpdated = BF.getLayout().update(NewLayout);
// For shared objects, invoke instructions and corresponding landing pads
// have to be placed in the same fragment. When we split them, create
@@ -404,7 +468,7 @@ void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) {
Trampolines = createEHTrampolines(BF);
// Check the new size to see if it's worth splitting the function.
- if (BC.isX86() && BF.isSplit()) {
+ if (BC.isX86() && LayoutUpdated) {
std::tie(HotSize, ColdSize) = BC.calculateEmittedSize(BF);
LLVM_DEBUG(dbgs() << "Estimated size for function " << BF
<< " post-split is <0x" << Twine::utohexstr(HotSize)
@@ -431,6 +495,11 @@ void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) {
SplitBytesCold += ColdSize;
}
}
+
+ // Fix branches if the splitting decision of the pass after function
+ // reordering is different from that of the pass before function reordering.
+ if (LayoutUpdated && BC.HasFinalizedFunctionOrder)
+ BF.fixBranches();
}
SplitFunctions::TrampolineSetType
diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp
index 37de3ea..9946608 100644
--- a/bolt/lib/Rewrite/BinaryPassManager.cpp
+++ b/bolt/lib/Rewrite/BinaryPassManager.cpp
@@ -430,6 +430,13 @@ void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
Manager.registerPass(
std::make_unique<ReorderFunctions>(PrintReorderedFunctions));
+ // This is the second run of the SplitFunctions pass required by certain
+ // splitting strategies (e.g. cdsplit). Running the SplitFunctions pass again
+ // after ReorderFunctions allows the finalized function order to be utilized
+ // to make more sophisticated splitting decisions, like hot-warm-cold
+ // splitting.
+ Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit));
+
// Print final dyno stats right while CFG and instruction analysis are intact.
Manager.registerPass(
std::make_unique<DynoStatsPrintPass>(