diff options
author | Teresa Johnson <tejohnson@google.com> | 2024-11-15 08:24:44 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-15 08:24:44 -0800 |
commit | 9513f2fdf2ad50f55726154a6b6a4aa463bc457f (patch) | |
tree | 738757d593af34a24cc6fddb5c3881386b6f9bd0 /llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | |
parent | f6e1d64458130643377511baeec430de67ddddfb (diff) | |
download | llvm-9513f2fdf2ad50f55726154a6b6a4aa463bc457f.zip llvm-9513f2fdf2ad50f55726154a6b6a4aa463bc457f.tar.gz llvm-9513f2fdf2ad50f55726154a6b6a4aa463bc457f.tar.bz2 |
[MemProf] Print full context hash when reporting hinted bytes (#114465)
Improve the information printed when -memprof-report-hinted-sizes is
enabled. Now print the full context hash computed from the original
profile, similar to what we do when reporting matching statistics. This
will make it easier to correlate with the profile.
Note that the full context hash must be computed at profile match time
and saved in the metadata and summary, because we may trim the context
during matching when it isn't needed for distinguishing hotness.
Similarly, due to the context trimming, we may have more than one full
context id and total size pair per MIB in the metadata and summary,
which now get a list of these pairs.
Remove the old aggregate size from the metadata and summary support.
One other change from the prior support is that we no longer write the
size information into the combined index for the LTO backends, which
don't use this information, which reduces unnecessary bloat in
distributed index files.
Diffstat (limited to 'llvm/lib/Bitcode/Writer/BitcodeWriter.cpp')
-rw-r--r-- | llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 68 |
1 files changed, 54 insertions, 14 deletions
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index cd6541c..5829af3 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -230,7 +230,8 @@ private: void writePerModuleFunctionSummaryRecord( SmallVector<uint64_t, 64> &NameVals, GlobalValueSummary *Summary, unsigned ValueID, unsigned FSCallsAbbrev, unsigned FSCallsProfileAbbrev, - unsigned CallsiteAbbrev, unsigned AllocAbbrev, const Function &F); + unsigned CallsiteAbbrev, unsigned AllocAbbrev, unsigned ContextIdAbbvId, + const Function &F); void writeModuleLevelReferences(const GlobalVariable &V, SmallVector<uint64_t, 64> &NameVals, unsigned FSModRefsAbbrev, @@ -4196,9 +4197,10 @@ static void writeTypeIdCompatibleVtableSummaryRecord( static void writeFunctionHeapProfileRecords( BitstreamWriter &Stream, FunctionSummary *FS, unsigned CallsiteAbbrev, - unsigned AllocAbbrev, bool PerModule, + unsigned AllocAbbrev, unsigned ContextIdAbbvId, bool PerModule, std::function<unsigned(const ValueInfo &VI)> GetValueID, - std::function<unsigned(unsigned)> GetStackIndex) { + std::function<unsigned(unsigned)> GetStackIndex, + bool WriteContextSizeInfoIndex) { SmallVector<uint64_t> Record; for (auto &CI : FS->callsites()) { @@ -4240,10 +4242,34 @@ static void writeFunctionHeapProfileRecords( for (auto V : AI.Versions) Record.push_back(V); } - assert(AI.TotalSizes.empty() || AI.TotalSizes.size() == AI.MIBs.size()); - if (!AI.TotalSizes.empty()) { - for (auto Size : AI.TotalSizes) - Record.push_back(Size); + assert(AI.ContextSizeInfos.empty() || + AI.ContextSizeInfos.size() == AI.MIBs.size()); + // Optionally emit the context size information if it exists. + if (WriteContextSizeInfoIndex && !AI.ContextSizeInfos.empty()) { + // The abbreviation id for the context ids record should have been created + // if we are emitting the per-module index, which is where we write this + // info. + assert(ContextIdAbbvId); + SmallVector<uint32_t> ContextIds; + // At least one context id per ContextSizeInfos entry (MIB), broken into 2 + // halves. + ContextIds.reserve(AI.ContextSizeInfos.size() * 2); + for (auto &Infos : AI.ContextSizeInfos) { + Record.push_back(Infos.size()); + for (auto [FullStackId, TotalSize] : Infos) { + // The context ids are emitted separately as a fixed width array, + // which is more efficient than a VBR given that these hashes are + // typically close to 64-bits. The max fixed width entry is 32 bits so + // it is split into 2. + ContextIds.push_back(static_cast<uint32_t>(FullStackId >> 32)); + ContextIds.push_back(static_cast<uint32_t>(FullStackId)); + Record.push_back(TotalSize); + } + } + // The context ids are expected by the reader to immediately precede the + // associated alloc info record. + Stream.EmitRecord(bitc::FS_ALLOC_CONTEXT_IDS, ContextIds, + ContextIdAbbvId); } Stream.EmitRecord(PerModule ? bitc::FS_PERMODULE_ALLOC_INFO : bitc::FS_COMBINED_ALLOC_INFO, @@ -4256,7 +4282,7 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord( SmallVector<uint64_t, 64> &NameVals, GlobalValueSummary *Summary, unsigned ValueID, unsigned FSCallsRelBFAbbrev, unsigned FSCallsProfileAbbrev, unsigned CallsiteAbbrev, - unsigned AllocAbbrev, const Function &F) { + unsigned AllocAbbrev, unsigned ContextIdAbbvId, const Function &F) { NameVals.push_back(ValueID); FunctionSummary *FS = cast<FunctionSummary>(Summary); @@ -4267,10 +4293,11 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord( }); writeFunctionHeapProfileRecords( - Stream, FS, CallsiteAbbrev, AllocAbbrev, + Stream, FS, CallsiteAbbrev, AllocAbbrev, ContextIdAbbvId, /*PerModule*/ true, /*GetValueId*/ [&](const ValueInfo &VI) { return getValueId(VI); }, - /*GetStackIndex*/ [&](unsigned I) { return I; }); + /*GetStackIndex*/ [&](unsigned I) { return I; }, + /*WriteContextSizeInfoIndex*/ true); auto SpecialRefCnts = FS->specialRefCounts(); NameVals.push_back(getEncodedGVSummaryFlags(FS->flags())); @@ -4402,11 +4429,23 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() { StackIdAbbv->Add(BitCodeAbbrevOp(bitc::FS_STACK_IDS)); // numids x stackid StackIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + // FIXME: The stack ids are hashes that are close to 64 bits in size, so + // emitting as a pair of 32-bit fixed-width values, as we do for context + // ids, would be more efficient. StackIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); unsigned StackIdAbbvId = Stream.EmitAbbrev(std::move(StackIdAbbv)); Stream.EmitRecord(bitc::FS_STACK_IDS, Index->stackIds(), StackIdAbbvId); } + // n x context id + auto ContextIdAbbv = std::make_shared<BitCodeAbbrev>(); + ContextIdAbbv->Add(BitCodeAbbrevOp(bitc::FS_ALLOC_CONTEXT_IDS)); + ContextIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + // The context ids are hashes that are close to 64 bits in size, so emitting + // as a pair of 32-bit fixed-width values is more efficient than a VBR. + ContextIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); + unsigned ContextIdAbbvId = Stream.EmitAbbrev(std::move(ContextIdAbbv)); + // Abbrev for FS_PERMODULE_PROFILE. Abbv = std::make_shared<BitCodeAbbrev>(); Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_PROFILE)); @@ -4487,7 +4526,7 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() { Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_ALLOC_INFO)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // nummib // n x (alloc type, numstackids, numstackids x stackidindex) - // optional: nummib x total size + // optional: nummib x (numcontext x total size) Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv)); @@ -4511,7 +4550,7 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() { auto *Summary = VI.getSummaryList()[0].get(); writePerModuleFunctionSummaryRecord( NameVals, Summary, VE.getValueID(&F), FSCallsRelBFAbbrev, - FSCallsProfileAbbrev, CallsiteAbbrev, AllocAbbrev, F); + FSCallsProfileAbbrev, CallsiteAbbrev, AllocAbbrev, ContextIdAbbvId, F); } // Capture references from GlobalVariable initializers, which are outside @@ -4740,7 +4779,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { getReferencedTypeIds(FS, ReferencedTypeIds); writeFunctionHeapProfileRecords( - Stream, FS, CallsiteAbbrev, AllocAbbrev, + Stream, FS, CallsiteAbbrev, AllocAbbrev, /*ContextIdAbbvId*/ 0, /*PerModule*/ false, /*GetValueId*/ [&](const ValueInfo &VI) -> unsigned { @@ -4760,7 +4799,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { // the case of distributed indexes). assert(StackIdIndicesToIndex.contains(I)); return StackIdIndicesToIndex[I]; - }); + }, + /*WriteContextSizeInfoIndex*/ false); NameVals.push_back(*ValueId); assert(ModuleIdMap.count(FS->modulePath())); |