aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/MachineOutliner.cpp
diff options
context:
space:
mode:
authorKyungwoo Lee <kyulee@meta.com>2024-09-10 06:56:31 -0700
committerGitHub <noreply@github.com>2024-09-10 06:56:31 -0700
commit0f525452896771cc8c579eb362dc7645e38fd0b9 (patch)
tree0a484075a29372d4ca51e4137e74268fea9b7a93 /llvm/lib/CodeGen/MachineOutliner.cpp
parent46a76c33343c34c0eb5fa7ec849d0de42cfed412 (diff)
downloadllvm-0f525452896771cc8c579eb362dc7645e38fd0b9.zip
llvm-0f525452896771cc8c579eb362dc7645e38fd0b9.tar.gz
llvm-0f525452896771cc8c579eb362dc7645e38fd0b9.tar.bz2
[CGData][MachineOutliner] Global Outlining (#90074)
This commit introduces support for outlining functions across modules using codegen data generated from previous codegen. The codegen data currently manages the outlined hash tree, which records outlining instances that occurred locally in the past. The machine outliner now operates in one of three modes: 1. CGDataMode::None: This is the default outliner mode that uses the suffix tree to identify (local) outlining candidates within a module. This mode is also used by (full)LTO to maintain optimal behavior with the combined module. 2. CGDataMode::Write (`-codegen-data-generate`): This mode is identical to the default mode, but it also publishes the stable hash sequences of instructions in the outlined functions into a local outlined hash tree. It then encodes this into the `__llvm_outline` section, which will be dead-stripped at link time. 3. CGDataMode::Read (`-codegen-data-use-path={.cgdata}`): This mode reads a codegen data file (.cgdata) and initializes a global outlined hash tree. This tree is used to generate global outlining candidates. Note that the codegen data file has been post-processed with the raw `__llvm_outline` sections from all native objects using the `llvm-cgdata` tool (or a linker, `LLD`, or a new ThinLTO pipeline later). This depends on https://github.com/llvm/llvm-project/pull/105398. After this PR, LLD (https://github.com/llvm/llvm-project/pull/90166) and Clang (https://github.com/llvm/llvm-project/pull/90304) will follow for each client side support. This is a patch for https://discourse.llvm.org/t/rfc-enhanced-machine-outliner-part-2-thinlto-nolto/78753.
Diffstat (limited to 'llvm/lib/CodeGen/MachineOutliner.cpp')
-rw-r--r--llvm/lib/CodeGen/MachineOutliner.cpp254
1 files changed, 253 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp
index 97500d0..c9cb888 100644
--- a/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -59,7 +59,9 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/ModuleSummaryAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/CGData/CodeGenDataReader.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
@@ -75,6 +77,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/SuffixTree.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <functional>
#include <tuple>
#include <vector>
@@ -98,6 +101,10 @@ STATISTIC(NumInvisible,
"Invisible instructions skipped during mapping");
STATISTIC(UnsignedVecSize,
"Total number of instructions mapped and saved to mapping vector");
+STATISTIC(StableHashAttempts,
+ "Count of hashing attempts made for outlined functions");
+STATISTIC(StableHashDropped,
+ "Count of unsuccessful hashing attempts for outlined functions");
// Set to true if the user wants the outliner to run on linkonceodr linkage
// functions. This is false by default because the linker can dedupe linkonceodr
@@ -128,6 +135,19 @@ static cl::opt<bool> OutlinerLeafDescendants(
"tree as candidates for outlining (if false, only leaf children "
"are considered)"));
+static cl::opt<bool>
+ DisableGlobalOutlining("disable-global-outlining", cl::Hidden,
+ cl::desc("Disable global outlining only by ignoring "
+ "the codegen data generation or use"),
+ cl::init(false));
+
+static cl::opt<bool> AppendContentHashToOutlinedName(
+ "append-content-hash-outlined-name", cl::Hidden,
+ cl::desc("This appends the content hash to the globally outlined function "
+ "name. It's beneficial for enhancing the precision of the stable "
+ "hash and for ordering the outlined functions."),
+ cl::init(true));
+
namespace {
/// Maps \p MachineInstrs to unsigned integers and stores the mappings.
@@ -421,11 +441,29 @@ struct MachineOutliner : public ModulePass {
/// Set when the pass is constructed in TargetPassConfig.
bool RunOnAllFunctions = true;
+ /// This is a compact representation of hash sequences of outlined functions.
+ /// It is used when OutlinerMode = CGDataMode::Write.
+ /// The resulting hash tree will be emitted into __llvm_outlined section
+ /// which will be dead-stripped not going to the final binary.
+ /// A post-process using llvm-cgdata, lld, or ThinLTO can merge them into
+ /// a global oulined hash tree for the subsequent codegen.
+ std::unique_ptr<OutlinedHashTree> LocalHashTree;
+
+ /// The mode of the outliner.
+ /// When is's CGDataMode::None, candidates are populated with the suffix tree
+ /// within a module and outlined.
+ /// When it's CGDataMode::Write, in addition to CGDataMode::None, the hash
+ /// sequences of outlined functions are published into LocalHashTree.
+ /// When it's CGDataMode::Read, candidates are populated with the global
+ /// outlined hash tree that has been built by the previous codegen.
+ CGDataMode OutlinerMode = CGDataMode::None;
+
StringRef getPassName() const override { return "Machine Outliner"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineModuleInfoWrapperPass>();
AU.addPreserved<MachineModuleInfoWrapperPass>();
+ AU.addRequired<ImmutableModuleSummaryIndexWrapperPass>();
AU.setPreservesAll();
ModulePass::getAnalysisUsage(AU);
}
@@ -460,6 +498,16 @@ struct MachineOutliner : public ModulePass {
findCandidates(InstructionMapper &Mapper,
std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList);
+ /// Find all repeated substrings that match in the global outlined hash
+ /// tree built from the previous codegen.
+ ///
+ /// \param Mapper Contains outlining mapping information.
+ /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions
+ /// each type of candidate.
+ void findGlobalCandidates(
+ InstructionMapper &Mapper,
+ std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList);
+
/// Replace the sequences of instructions represented by \p OutlinedFunctions
/// with calls to functions.
///
@@ -476,6 +524,17 @@ struct MachineOutliner : public ModulePass {
InstructionMapper &Mapper,
unsigned Name);
+ /// Compute and publish the stable hash sequence of instructions in the
+ /// outlined function, \p MF. The parameter \p CandSize represents the number
+ /// of candidates that have identical instruction sequences to \p MF.
+ void computeAndPublishHashSequence(MachineFunction &MF, unsigned CandSize);
+
+ /// Initialize the outliner mode.
+ void initializeOutlinerMode(const Module &M);
+
+ /// Emit the outlined hash tree into __llvm_outline section.
+ void emitOutlinedHashTree(Module &M);
+
/// Calls 'doOutline()' 1 + OutlinerReruns times.
bool runOnModule(Module &M) override;
@@ -585,6 +644,104 @@ void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
MORE.emit(R);
}
+struct MatchedEntry {
+ unsigned StartIdx;
+ unsigned EndIdx;
+ unsigned Count;
+ MatchedEntry(unsigned StartIdx, unsigned EndIdx, unsigned Count)
+ : StartIdx(StartIdx), EndIdx(EndIdx), Count(Count) {}
+ MatchedEntry() = delete;
+};
+
+// Find all matches in the global outlined hash tree.
+// It's quadratic complexity in theory, but it's nearly linear in practice
+// since the length of outlined sequences are small within a block.
+static SmallVector<MatchedEntry> getMatchedEntries(InstructionMapper &Mapper) {
+ auto &InstrList = Mapper.InstrList;
+ auto &UnsignedVec = Mapper.UnsignedVec;
+
+ SmallVector<MatchedEntry> MatchedEntries;
+ auto Size = UnsignedVec.size();
+
+ // Get the global outlined hash tree built from the previous run.
+ assert(cgdata::hasOutlinedHashTree());
+ const auto *RootNode = cgdata::getOutlinedHashTree()->getRoot();
+
+ auto getValidInstr = [&](unsigned Index) -> const MachineInstr * {
+ if (UnsignedVec[Index] >= Mapper.LegalInstrNumber)
+ return nullptr;
+ return &(*InstrList[Index]);
+ };
+
+ auto getStableHashAndFollow =
+ [](const MachineInstr &MI, const HashNode *CurrNode) -> const HashNode * {
+ stable_hash StableHash = stableHashValue(MI);
+ if (!StableHash)
+ return nullptr;
+ auto It = CurrNode->Successors.find(StableHash);
+ return (It == CurrNode->Successors.end()) ? nullptr : It->second.get();
+ };
+
+ for (unsigned I = 0; I < Size; ++I) {
+ const MachineInstr *MI = getValidInstr(I);
+ if (!MI || MI->isDebugInstr())
+ continue;
+ const HashNode *CurrNode = getStableHashAndFollow(*MI, RootNode);
+ if (!CurrNode)
+ continue;
+
+ for (unsigned J = I + 1; J < Size; ++J) {
+ const MachineInstr *MJ = getValidInstr(J);
+ if (!MJ)
+ break;
+ // Skip debug instructions as we did for the outlined function.
+ if (MJ->isDebugInstr())
+ continue;
+ CurrNode = getStableHashAndFollow(*MJ, CurrNode);
+ if (!CurrNode)
+ break;
+ // Even with a match ending with a terminal, we continue finding
+ // matches to populate all candidates.
+ if (auto Count = CurrNode->Terminals)
+ MatchedEntries.emplace_back(I, J, *Count);
+ }
+ }
+
+ return MatchedEntries;
+}
+
+void MachineOutliner::findGlobalCandidates(
+ InstructionMapper &Mapper,
+ std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList) {
+ FunctionList.clear();
+ auto &InstrList = Mapper.InstrList;
+ auto &MBBFlagsMap = Mapper.MBBFlagsMap;
+
+ std::vector<Candidate> CandidatesForRepeatedSeq;
+ for (auto &ME : getMatchedEntries(Mapper)) {
+ CandidatesForRepeatedSeq.clear();
+ MachineBasicBlock::iterator StartIt = InstrList[ME.StartIdx];
+ MachineBasicBlock::iterator EndIt = InstrList[ME.EndIdx];
+ auto Length = ME.EndIdx - ME.StartIdx + 1;
+ MachineBasicBlock *MBB = StartIt->getParent();
+ CandidatesForRepeatedSeq.emplace_back(ME.StartIdx, Length, StartIt, EndIt,
+ MBB, FunctionList.size(),
+ MBBFlagsMap[MBB]);
+ const TargetInstrInfo *TII =
+ MBB->getParent()->getSubtarget().getInstrInfo();
+ unsigned MinRepeats = 1;
+ std::optional<std::unique_ptr<OutlinedFunction>> OF =
+ TII->getOutliningCandidateInfo(*MMI, CandidatesForRepeatedSeq,
+ MinRepeats);
+ if (!OF.has_value() || OF.value()->Candidates.empty())
+ continue;
+ // We create a global candidate for each match.
+ assert(OF.value()->Candidates.size() == MinRepeats);
+ FunctionList.emplace_back(std::make_unique<GlobalOutlinedFunction>(
+ std::move(OF.value()), ME.Count));
+ }
+}
+
void MachineOutliner::findCandidates(
InstructionMapper &Mapper,
std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList) {
@@ -695,6 +852,39 @@ void MachineOutliner::findCandidates(
}
}
+void MachineOutliner::computeAndPublishHashSequence(MachineFunction &MF,
+ unsigned CandSize) {
+ // Compute the hash sequence for the outlined function.
+ SmallVector<stable_hash> OutlinedHashSequence;
+ for (auto &MBB : MF) {
+ for (auto &NewMI : MBB) {
+ stable_hash Hash = stableHashValue(NewMI);
+ if (!Hash) {
+ OutlinedHashSequence.clear();
+ break;
+ }
+ OutlinedHashSequence.push_back(Hash);
+ }
+ }
+
+ // Append a unique name based on the non-empty hash sequence.
+ if (AppendContentHashToOutlinedName && !OutlinedHashSequence.empty()) {
+ auto CombinedHash = stable_hash_combine(OutlinedHashSequence);
+ auto NewName =
+ MF.getName().str() + ".content." + std::to_string(CombinedHash);
+ MF.getFunction().setName(NewName);
+ }
+
+ // Publish the non-empty hash sequence to the local hash tree.
+ if (OutlinerMode == CGDataMode::Write) {
+ StableHashAttempts++;
+ if (!OutlinedHashSequence.empty())
+ LocalHashTree->insert({OutlinedHashSequence, CandSize});
+ else
+ StableHashDropped++;
+ }
+}
+
MachineFunction *MachineOutliner::createOutlinedFunction(
Module &M, OutlinedFunction &OF, InstructionMapper &Mapper, unsigned Name) {
@@ -769,6 +959,9 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
}
}
+ if (OutlinerMode != CGDataMode::None)
+ computeAndPublishHashSequence(MF, OF.Candidates.size());
+
// Set normal properties for a late MachineFunction.
MF.getProperties().reset(MachineFunctionProperties::Property::IsSSA);
MF.getProperties().set(MachineFunctionProperties::Property::NoPHIs);
@@ -1133,12 +1326,65 @@ void MachineOutliner::emitInstrCountChangedRemark(
}
}
+void MachineOutliner::initializeOutlinerMode(const Module &M) {
+ if (DisableGlobalOutlining)
+ return;
+
+ if (auto *IndexWrapperPass =
+ getAnalysisIfAvailable<ImmutableModuleSummaryIndexWrapperPass>()) {
+ auto *TheIndex = IndexWrapperPass->getIndex();
+ // (Full)LTO module does not have functions added to the index.
+ // In this case, we run the outliner without using codegen data as usual.
+ if (TheIndex && !TheIndex->hasExportedFunctions(M))
+ return;
+ }
+
+ // When codegen data write is enabled, we want to write the local outlined
+ // hash tree to the custom section, `__llvm_outline`.
+ // When the outlined hash tree is available from the previous codegen data,
+ // we want to read it to optimistically create global outlining candidates.
+ if (cgdata::emitCGData()) {
+ OutlinerMode = CGDataMode::Write;
+ // Create a local outlined hash tree to be published.
+ LocalHashTree = std::make_unique<OutlinedHashTree>();
+ // We don't need to read the outlined hash tree from the previous codegen
+ } else if (cgdata::hasOutlinedHashTree())
+ OutlinerMode = CGDataMode::Read;
+}
+
+void MachineOutliner::emitOutlinedHashTree(Module &M) {
+ assert(LocalHashTree);
+ if (!LocalHashTree->empty()) {
+ LLVM_DEBUG({
+ dbgs() << "Emit outlined hash tree. Size: " << LocalHashTree->size()
+ << "\n";
+ });
+ SmallVector<char> Buf;
+ raw_svector_ostream OS(Buf);
+
+ OutlinedHashTreeRecord HTR(std::move(LocalHashTree));
+ HTR.serialize(OS);
+
+ llvm::StringRef Data(Buf.data(), Buf.size());
+ std::unique_ptr<MemoryBuffer> Buffer =
+ MemoryBuffer::getMemBuffer(Data, "in-memory outlined hash tree", false);
+
+ Triple TT(M.getTargetTriple());
+ embedBufferInModule(
+ M, *Buffer.get(),
+ getCodeGenDataSectionName(CG_outline, TT.getObjectFormat()));
+ }
+}
+
bool MachineOutliner::runOnModule(Module &M) {
// Check if there's anything in the module. If it's empty, then there's
// nothing to outline.
if (M.empty())
return false;
+ // Initialize the outliner mode.
+ initializeOutlinerMode(M);
+
MMI = &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
// Number to append to the current outlined function.
@@ -1160,6 +1406,9 @@ bool MachineOutliner::runOnModule(Module &M) {
}
}
+ if (OutlinerMode == CGDataMode::Write)
+ emitOutlinedHashTree(M);
+
return true;
}
@@ -1188,7 +1437,10 @@ bool MachineOutliner::doOutline(Module &M, unsigned &OutlinedFunctionNum) {
std::vector<std::unique_ptr<OutlinedFunction>> FunctionList;
// Find all of the outlining candidates.
- findCandidates(Mapper, FunctionList);
+ if (OutlinerMode == CGDataMode::Read)
+ findGlobalCandidates(Mapper, FunctionList);
+ else
+ findCandidates(Mapper, FunctionList);
// If we've requested size remarks, then collect the MI counts of every
// function before outlining, and the MI counts after outlining.