aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
diff options
context:
space:
mode:
authorTeresa Johnson <tejohnson@google.com>2022-10-11 14:00:37 -0700
committerTeresa Johnson <tejohnson@google.com>2022-11-15 06:45:12 -0800
commit47459455009db4790ffc3765a2ec0f8b4934c2a4 (patch)
treea0a2cdb04b5514eb45f5ba5291a49f0c0806c104 /llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
parenta8673b722989c209ca41f02ab09150362bf1afd4 (diff)
downloadllvm-47459455009db4790ffc3765a2ec0f8b4934c2a4.zip
llvm-47459455009db4790ffc3765a2ec0f8b4934c2a4.tar.gz
llvm-47459455009db4790ffc3765a2ec0f8b4934c2a4.tar.bz2
[MemProf] ThinLTO summary support
Implements the ThinLTO summary support for memprof related metadata. This includes support for the assembly format, and for building the summary from IR during ModuleSummaryAnalysis. To reduce space in both the bitcode format and the in memory index, we do 2 things: 1. We keep a single vector of all uniq stack id hashes, and record the index into this vector in the callsite and allocation memprof summaries. 2. When building the combined index during the LTO link, the callsite and allocation memprof summaries are only kept on the FunctionSummary of the prevailing copy. Differential Revision: https://reviews.llvm.org/D135714
Diffstat (limited to 'llvm/lib/Bitcode/Writer/BitcodeWriter.cpp')
-rw-r--r--llvm/lib/Bitcode/Writer/BitcodeWriter.cpp185
1 files changed, 175 insertions, 10 deletions
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 4bf881a..bc81afb 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -211,12 +211,10 @@ protected:
void writePerModuleGlobalValueSummary();
private:
- void writePerModuleFunctionSummaryRecord(SmallVector<uint64_t, 64> &NameVals,
- GlobalValueSummary *Summary,
- unsigned ValueID,
- unsigned FSCallsAbbrev,
- unsigned FSCallsProfileAbbrev,
- const Function &F);
+ void writePerModuleFunctionSummaryRecord(
+ SmallVector<uint64_t, 64> &NameVals, GlobalValueSummary *Summary,
+ unsigned ValueID, unsigned FSCallsAbbrev, unsigned FSCallsProfileAbbrev,
+ unsigned CallsiteAbbrev, unsigned AllocAbbrev, const Function &F);
void writeModuleLevelReferences(const GlobalVariable &V,
SmallVector<uint64_t, 64> &NameVals,
unsigned FSModRefsAbbrev,
@@ -424,6 +422,11 @@ class IndexBitcodeWriter : public BitcodeWriterBase {
/// index and a value id generated by this class to use in references.
std::map<GlobalValue::GUID, unsigned> GUIDToValueIdMap;
+ // The sorted stack id indices actually used in the summary entries being
+ // written, which will be a subset of those in the full index in the case of
+ // distributed indexes.
+ std::vector<unsigned> StackIdIndices;
+
/// Tracks the last value id recorded in the GUIDToValueMap.
unsigned GlobalValueId = 0;
@@ -441,9 +444,28 @@ public:
// in writing out the call graph edges. Save the mapping from GUID
// to the new global value id to use when writing those edges, which
// are currently saved in the index in terms of GUID.
- forEachSummary([&](GVInfo I, bool) {
+ forEachSummary([&](GVInfo I, bool IsAliasee) {
GUIDToValueIdMap[I.first] = ++GlobalValueId;
+ if (IsAliasee)
+ return;
+ auto *FS = dyn_cast<FunctionSummary>(I.second);
+ if (!FS)
+ return;
+ // Record all stack id indices actually used in the summary entries being
+ // written, so that we can compact them in the case of distributed ThinLTO
+ // indexes.
+ for (auto &CI : FS->callsites())
+ for (auto Idx : CI.StackIdIndices)
+ StackIdIndices.push_back(Idx);
+ for (auto &AI : FS->allocs())
+ for (auto &MIB : AI.MIBs)
+ for (auto Idx : MIB.StackIdIndices)
+ StackIdIndices.push_back(Idx);
});
+ llvm::sort(StackIdIndices);
+ StackIdIndices.erase(
+ std::unique(StackIdIndices.begin(), StackIdIndices.end()),
+ StackIdIndices.end());
}
/// The below iterator returns the GUID and associated summary.
@@ -3888,11 +3910,64 @@ static void writeTypeIdCompatibleVtableSummaryRecord(
}
}
+static void writeFunctionHeapProfileRecords(
+ BitstreamWriter &Stream, FunctionSummary *FS, unsigned CallsiteAbbrev,
+ unsigned AllocAbbrev, bool PerModule,
+ std::function<unsigned(const ValueInfo &VI)> GetValueID,
+ std::function<unsigned(unsigned)> GetStackIndex) {
+ SmallVector<uint64_t> Record;
+
+ for (auto &CI : FS->callsites()) {
+ Record.clear();
+ // Per module callsite clones should always have a single entry of
+ // value 0.
+ assert(!PerModule || (CI.Clones.size() == 1 && CI.Clones[0] == 0));
+ Record.push_back(GetValueID(CI.Callee));
+ if (!PerModule) {
+ Record.push_back(CI.StackIdIndices.size());
+ Record.push_back(CI.Clones.size());
+ }
+ for (auto Id : CI.StackIdIndices)
+ Record.push_back(GetStackIndex(Id));
+ if (!PerModule) {
+ for (auto V : CI.Clones)
+ Record.push_back(V);
+ }
+ Stream.EmitRecord(PerModule ? bitc::FS_PERMODULE_CALLSITE_INFO
+ : bitc::FS_COMBINED_CALLSITE_INFO,
+ Record, CallsiteAbbrev);
+ }
+
+ for (auto &AI : FS->allocs()) {
+ Record.clear();
+ // Per module alloc versions should always have a single entry of
+ // value 0.
+ assert(!PerModule || (AI.Versions.size() == 1 && AI.Versions[0] == 0));
+ if (!PerModule) {
+ Record.push_back(AI.MIBs.size());
+ Record.push_back(AI.Versions.size());
+ }
+ for (auto &MIB : AI.MIBs) {
+ Record.push_back((uint8_t)MIB.AllocType);
+ Record.push_back(MIB.StackIdIndices.size());
+ for (auto Id : MIB.StackIdIndices)
+ Record.push_back(GetStackIndex(Id));
+ }
+ if (!PerModule) {
+ for (auto V : AI.Versions)
+ Record.push_back(V);
+ }
+ Stream.EmitRecord(PerModule ? bitc::FS_PERMODULE_ALLOC_INFO
+ : bitc::FS_COMBINED_ALLOC_INFO,
+ Record, AllocAbbrev);
+ }
+}
+
// Helper to emit a single function summary record.
void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
SmallVector<uint64_t, 64> &NameVals, GlobalValueSummary *Summary,
unsigned ValueID, unsigned FSCallsAbbrev, unsigned FSCallsProfileAbbrev,
- const Function &F) {
+ unsigned CallsiteAbbrev, unsigned AllocAbbrev, const Function &F) {
NameVals.push_back(ValueID);
FunctionSummary *FS = cast<FunctionSummary>(Summary);
@@ -3902,6 +3977,12 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
return {VE.getValueID(VI.getValue())};
});
+ writeFunctionHeapProfileRecords(
+ Stream, FS, CallsiteAbbrev, AllocAbbrev,
+ /*PerModule*/ true,
+ /*GetValueId*/ [&](const ValueInfo &VI) { return getValueId(VI); },
+ /*GetStackIndex*/ [&](unsigned I) { return I; });
+
auto SpecialRefCnts = FS->specialRefCounts();
NameVals.push_back(getEncodedGVSummaryFlags(FS->flags()));
NameVals.push_back(FS->instCount());
@@ -4013,6 +4094,16 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
ArrayRef<uint64_t>{GVI.second, GVI.first});
}
+ if (!Index->stackIds().empty()) {
+ auto StackIdAbbv = std::make_shared<BitCodeAbbrev>();
+ StackIdAbbv->Add(BitCodeAbbrevOp(bitc::FS_STACK_IDS));
+ // numids x stackid
+ StackIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ StackIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ unsigned StackIdAbbvId = Stream.EmitAbbrev(std::move(StackIdAbbv));
+ Stream.EmitRecord(bitc::FS_STACK_IDS, Index->stackIds(), StackIdAbbvId);
+ }
+
// Abbrev for FS_PERMODULE_PROFILE.
auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_PROFILE));
@@ -4084,6 +4175,21 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
unsigned TypeIdCompatibleVtableAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+ Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_CALLSITE_INFO));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
+ // n x stackidindex
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ unsigned CallsiteAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+
+ Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_ALLOC_INFO));
+ // n x (alloc type, numstackids, numstackids x stackidindex)
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+
SmallVector<uint64_t, 64> NameVals;
// Iterate over the list of functions instead of the Index to
// ensure the ordering is stable.
@@ -4102,7 +4208,8 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
}
auto *Summary = VI.getSummaryList()[0].get();
writePerModuleFunctionSummaryRecord(NameVals, Summary, VE.getValueID(&F),
- FSCallsAbbrev, FSCallsProfileAbbrev, F);
+ FSCallsAbbrev, FSCallsProfileAbbrev,
+ CallsiteAbbrev, AllocAbbrev, F);
}
// Capture references from GlobalVariable initializers, which are outside
@@ -4144,7 +4251,7 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
/// Emit the combined summary section into the combined index file.
void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
- Stream.EnterSubblock(bitc::GLOBALVAL_SUMMARY_BLOCK_ID, 3);
+ Stream.EnterSubblock(bitc::GLOBALVAL_SUMMARY_BLOCK_ID, 4);
Stream.EmitRecord(
bitc::FS_VERSION,
ArrayRef<uint64_t>{ModuleSummaryIndex::BitcodeSummaryVersion});
@@ -4157,6 +4264,21 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
ArrayRef<uint64_t>{GVI.second, GVI.first});
}
+ if (!StackIdIndices.empty()) {
+ auto StackIdAbbv = std::make_shared<BitCodeAbbrev>();
+ StackIdAbbv->Add(BitCodeAbbrevOp(bitc::FS_STACK_IDS));
+ // numids x stackid
+ StackIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ StackIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ unsigned StackIdAbbvId = Stream.EmitAbbrev(std::move(StackIdAbbv));
+ // Write the stack ids used by this index, which will be a subset of those in
+ // the full index in the case of distributed indexes.
+ std::vector<uint64_t> StackIds;
+ for (auto &I : StackIdIndices)
+ StackIds.push_back(Index.getStackIdAtIndex(I));
+ Stream.EmitRecord(bitc::FS_STACK_IDS, StackIds, StackIdAbbvId);
+ }
+
// Abbrev for FS_COMBINED.
auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED));
@@ -4210,6 +4332,26 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
unsigned FSAliasAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+ Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_CALLSITE_INFO));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numstackindices
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numver
+ // numstackindices x stackidindex, numver x version
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ unsigned CallsiteAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+
+ Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_ALLOC_INFO));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // nummib
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numver
+ // nummib x (alloc type, numstackids, numstackids x stackidindex),
+ // numver x version
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+
// The aliases are emitted as a post-pass, and will point to the value
// id of the aliasee. Save them in a vector for post-processing.
SmallVector<AliasSummary *, 64> Aliases;
@@ -4286,6 +4428,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
}
auto GetValueId = [&](const ValueInfo &VI) -> Optional<unsigned> {
+ if (!VI)
+ return None;
return getValueId(VI.getGUID());
};
@@ -4293,6 +4437,27 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
writeFunctionTypeMetadataRecords(Stream, FS, GetValueId);
getReferencedTypeIds(FS, ReferencedTypeIds);
+ writeFunctionHeapProfileRecords(
+ Stream, FS, CallsiteAbbrev, AllocAbbrev,
+ /*PerModule*/ false,
+ /*GetValueId*/ [&](const ValueInfo &VI) -> unsigned {
+ Optional<unsigned> ValueID = GetValueId(VI);
+ // This can happen in shared index files for distributed ThinLTO if
+ // the callee function summary is not included. Record 0 which we
+ // will have to deal with conservatively when doing any kind of
+ // validation in the ThinLTO backends.
+ if (!ValueID)
+ return 0;
+ return *ValueID;
+ },
+ /*GetStackIndex*/ [&](unsigned I) {
+ // Get the corresponding index into the list of StackIdIndices
+ // actually being written for this combined index (which may be a
+ // subset in the case of distributed indexes).
+ auto Lower = llvm::lower_bound(StackIdIndices, I);
+ return std::distance(StackIdIndices.begin(), Lower);
+ });
+
NameVals.push_back(*ValueId);
NameVals.push_back(Index.getModuleId(FS->modulePath()));
NameVals.push_back(getEncodedGVSummaryFlags(FS->flags()));