diff options
author | ShatianWang <38512325+ShatianWang@users.noreply.github.com> | 2023-11-29 15:43:21 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-11-29 15:43:21 -0500 |
commit | 076bd22f579ec4c90b41c700fca21a90fd2b6dbc (patch) | |
tree | f7b1ff09f16173345b06ca587278c0163a2f00e4 | |
parent | 69b0cb9c567eb0f937474f5424b9ed23b61c04d7 (diff) | |
download | llvm-076bd22f579ec4c90b41c700fca21a90fd2b6dbc.zip llvm-076bd22f579ec4c90b41c700fca21a90fd2b6dbc.tar.gz llvm-076bd22f579ec4c90b41c700fca21a90fd2b6dbc.tar.bz2 |
[BOLT] Add structure of CDSplit to SplitFunctions (#73430)
This commit establishes the general structure of the CDSplit strategy in
SplitFunctions without incorporating the exact splitting logic. With
-split-functions -split-strategy=cdsplit, the SplitFunctions pass will
run twice: the first time is before function reordering and functions
are hot-cold split; the second time is after function reordering and
functions are hot-warm-cold split based on the fixed function ordering.
Currently, all functions are hot-warm split after the entry block in the
second splitting pass. Subsequent commits will introduce the precise
splitting logic. NFC.
-rw-r--r-- | bolt/include/bolt/Core/BinaryContext.h | 3 | ||||
-rw-r--r-- | bolt/include/bolt/Passes/SplitFunctions.h | 5 | ||||
-rw-r--r-- | bolt/lib/Passes/ReorderFunctions.cpp | 2 | ||||
-rw-r--r-- | bolt/lib/Passes/SplitFunctions.cpp | 85 | ||||
-rw-r--r-- | bolt/lib/Rewrite/BinaryPassManager.cpp | 7 |
5 files changed, 93 insertions, 9 deletions
diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index 312678f..48a8b2a 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -611,6 +611,9 @@ public: /// Indicates if the binary contains split functions. bool HasSplitFunctions{false}; + /// Indicates if the function ordering of the binary is finalized. + bool HasFinalizedFunctionOrder{false}; + /// Is the binary always loaded at a fixed address. Shared objects and /// position-independent executables (PIEs) are examples of binaries that /// will have HasFixedLoadAddress set to false. diff --git a/bolt/include/bolt/Passes/SplitFunctions.h b/bolt/include/bolt/Passes/SplitFunctions.h index 4058f33..28e9e79 100644 --- a/bolt/include/bolt/Passes/SplitFunctions.h +++ b/bolt/include/bolt/Passes/SplitFunctions.h @@ -23,6 +23,9 @@ enum SplitFunctionsStrategy : char { /// Split each function into a hot and cold fragment using profiling /// information. Profile2 = 0, + /// Split each function into a hot, warm, and cold fragment using + /// profiling information. + CDSplit, /// Split each function into a hot and cold fragment at a randomly chosen /// split point (ignoring any available profiling information). Random2, @@ -40,7 +43,7 @@ public: virtual ~SplitStrategy() = default; virtual bool canSplit(const BinaryFunction &BF) = 0; - virtual bool keepEmpty() = 0; + virtual bool compactFragments() = 0; virtual void fragment(const BlockIt Start, const BlockIt End) = 0; }; diff --git a/bolt/lib/Passes/ReorderFunctions.cpp b/bolt/lib/Passes/ReorderFunctions.cpp index 92c50f4..7e67296 100644 --- a/bolt/lib/Passes/ReorderFunctions.cpp +++ b/bolt/lib/Passes/ReorderFunctions.cpp @@ -427,6 +427,8 @@ void ReorderFunctions::runOnFunctions(BinaryContext &BC) { reorder(std::move(Clusters), BFs); + BC.HasFinalizedFunctionOrder = true; + std::unique_ptr<std::ofstream> FuncsFile; if (!opts::GenerateFunctionOrderFile.empty()) { FuncsFile = std::make_unique<std::ofstream>(opts::GenerateFunctionOrderFile, diff --git a/bolt/lib/Passes/SplitFunctions.cpp b/bolt/lib/Passes/SplitFunctions.cpp index 34973ce..af2a5aa 100644 --- a/bolt/lib/Passes/SplitFunctions.cpp +++ b/bolt/lib/Passes/SplitFunctions.cpp @@ -92,6 +92,9 @@ static cl::opt<SplitFunctionsStrategy> SplitStrategy( cl::values(clEnumValN(SplitFunctionsStrategy::Profile2, "profile2", "split each function into a hot and cold fragment " "using profiling information")), + cl::values(clEnumValN(SplitFunctionsStrategy::CDSplit, "cdsplit", + "split each function into a hot, warm, and cold " + "fragment using profiling information")), cl::values(clEnumValN( SplitFunctionsStrategy::Random2, "random2", "split each function into a hot and cold fragment at a randomly chosen " @@ -126,7 +129,7 @@ struct SplitProfile2 final : public SplitStrategy { return BF.hasValidProfile() && hasFullProfile(BF) && !allBlocksCold(BF); } - bool keepEmpty() override { return false; } + bool compactFragments() override { return true; } void fragment(const BlockIt Start, const BlockIt End) override { for (BinaryBasicBlock *const BB : llvm::make_range(Start, End)) { @@ -136,6 +139,51 @@ struct SplitProfile2 final : public SplitStrategy { } }; +struct SplitCacheDirected final : public SplitStrategy { + using BasicBlockOrder = BinaryFunction::BasicBlockOrderType; + + bool canSplit(const BinaryFunction &BF) override { + return BF.hasValidProfile() && hasFullProfile(BF) && !allBlocksCold(BF); + } + + // When some functions are hot-warm split and others are hot-warm-cold split, + // we do not want to change the fragment numbers of the blocks in the hot-warm + // split functions. + bool compactFragments() override { return false; } + + void fragment(const BlockIt Start, const BlockIt End) override { + BasicBlockOrder BlockOrder(Start, End); + BinaryFunction &BF = *BlockOrder.front()->getFunction(); + + size_t BestSplitIndex = findSplitIndex(BF, BlockOrder); + + // Assign fragments based on the computed best split index. + // All basic blocks with index up to the best split index become hot. + // All remaining blocks are warm / cold depending on if count is + // greater than 0 or not. + FragmentNum Main(0); + FragmentNum Cold(1); + FragmentNum Warm(2); + for (size_t Index = 0; Index < BlockOrder.size(); Index++) { + BinaryBasicBlock *BB = BlockOrder[Index]; + if (Index <= BestSplitIndex) + BB->setFragmentNum(Main); + else + BB->setFragmentNum(BB->getKnownExecutionCount() > 0 ? Warm : Cold); + } + } + +private: + /// Find the best index for splitting. The returned value is the index of the + /// last hot basic block. Hence, "no splitting" is equivalent to returning the + /// value which is one less than the size of the function. + size_t findSplitIndex(const BinaryFunction &BF, + const BasicBlockOrder &BlockOrder) { + // Placeholder: hot-warm split after entry block. + return 0; + } +}; + struct SplitRandom2 final : public SplitStrategy { std::minstd_rand0 Gen; @@ -143,7 +191,7 @@ struct SplitRandom2 final : public SplitStrategy { bool canSplit(const BinaryFunction &BF) override { return true; } - bool keepEmpty() override { return false; } + bool compactFragments() override { return true; } void fragment(const BlockIt Start, const BlockIt End) override { using DiffT = typename std::iterator_traits<BlockIt>::difference_type; @@ -170,7 +218,7 @@ struct SplitRandomN final : public SplitStrategy { bool canSplit(const BinaryFunction &BF) override { return true; } - bool keepEmpty() override { return false; } + bool compactFragments() override { return true; } void fragment(const BlockIt Start, const BlockIt End) override { using DiffT = typename std::iterator_traits<BlockIt>::difference_type; @@ -217,10 +265,10 @@ struct SplitRandomN final : public SplitStrategy { struct SplitAll final : public SplitStrategy { bool canSplit(const BinaryFunction &BF) override { return true; } - bool keepEmpty() override { + bool compactFragments() override { // Keeping empty fragments allows us to test, that empty fragments do not // generate symbols. - return true; + return false; } void fragment(const BlockIt Start, const BlockIt End) override { @@ -246,10 +294,26 @@ void SplitFunctions::runOnFunctions(BinaryContext &BC) { if (!opts::SplitFunctions) return; + // If split strategy is not CDSplit, then a second run of the pass is not + // needed after function reordering. + if (BC.HasFinalizedFunctionOrder && + opts::SplitStrategy != SplitFunctionsStrategy::CDSplit) + return; + std::unique_ptr<SplitStrategy> Strategy; bool ForceSequential = false; switch (opts::SplitStrategy) { + case SplitFunctionsStrategy::CDSplit: + // CDSplit runs two splitting passes: hot-cold splitting (SplitPrfoile2) + // before function reordering and hot-warm-cold splitting + // (SplitCacheDirected) after function reordering. + if (BC.HasFinalizedFunctionOrder) + Strategy = std::make_unique<SplitCacheDirected>(); + else + Strategy = std::make_unique<SplitProfile2>(); + opts::AggressiveSplitting = true; + break; case SplitFunctionsStrategy::Profile2: Strategy = std::make_unique<SplitProfile2>(); break; @@ -382,7 +446,7 @@ void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) { CurrentFragment = BB->getFragmentNum(); } - if (!S.keepEmpty()) { + if (S.compactFragments()) { FragmentNum CurrentFragment = FragmentNum::main(); FragmentNum NewFragment = FragmentNum::main(); for (BinaryBasicBlock *const BB : NewLayout) { @@ -394,7 +458,7 @@ void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) { } } - BF.getLayout().update(NewLayout); + const bool LayoutUpdated = BF.getLayout().update(NewLayout); // For shared objects, invoke instructions and corresponding landing pads // have to be placed in the same fragment. When we split them, create @@ -404,7 +468,7 @@ void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) { Trampolines = createEHTrampolines(BF); // Check the new size to see if it's worth splitting the function. - if (BC.isX86() && BF.isSplit()) { + if (BC.isX86() && LayoutUpdated) { std::tie(HotSize, ColdSize) = BC.calculateEmittedSize(BF); LLVM_DEBUG(dbgs() << "Estimated size for function " << BF << " post-split is <0x" << Twine::utohexstr(HotSize) @@ -431,6 +495,11 @@ void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) { SplitBytesCold += ColdSize; } } + + // Fix branches if the splitting decision of the pass after function + // reordering is different from that of the pass before function reordering. + if (LayoutUpdated && BC.HasFinalizedFunctionOrder) + BF.fixBranches(); } SplitFunctions::TrampolineSetType diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp index 37de3ea..9946608 100644 --- a/bolt/lib/Rewrite/BinaryPassManager.cpp +++ b/bolt/lib/Rewrite/BinaryPassManager.cpp @@ -430,6 +430,13 @@ void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { Manager.registerPass( std::make_unique<ReorderFunctions>(PrintReorderedFunctions)); + // This is the second run of the SplitFunctions pass required by certain + // splitting strategies (e.g. cdsplit). Running the SplitFunctions pass again + // after ReorderFunctions allows the finalized function order to be utilized + // to make more sophisticated splitting decisions, like hot-warm-cold + // splitting. + Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit)); + // Print final dyno stats right while CFG and instruction analysis are intact. Manager.registerPass( std::make_unique<DynoStatsPrintPass>( |