diff options
author | Kazu Hirata <kazu@google.com> | 2024-04-18 14:12:58 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-18 14:12:58 -0700 |
commit | 172f6ddfa7662a36e3a41db6bb944f7d72fd0b00 (patch) | |
tree | 57e5d885aeda67aa160c2134ebdce099d09d5c14 /llvm | |
parent | 7aad1ee70f2c6be603707ff52a13e0e22330e859 (diff) | |
download | llvm-172f6ddfa7662a36e3a41db6bb944f7d72fd0b00.zip llvm-172f6ddfa7662a36e3a41db6bb944f7d72fd0b00.tar.gz llvm-172f6ddfa7662a36e3a41db6bb944f7d72fd0b00.tar.bz2 |
[memprof] Add Version2 of the indexed MemProf format (#89100)
This patch adds Version2 of the indexed MemProf format. The new
format comes with a hash table from CallStackId to actual call stacks
llvm::SmallVector<FrameId>. The rest of the format refers to call
stacks with CallStackId. This "values + references" model effectively
deduplicates call stacks. Without this patch, a large indexed memprof
file of mine shrinks from 4.4GB to 1.6GB, a 64% reduction.
This patch does not make Version2 generally available yet as I am
planning to make a few more changes to the format.
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/include/llvm/ProfileData/InstrProfReader.h | 4 | ||||
-rw-r--r-- | llvm/include/llvm/ProfileData/InstrProfWriter.h | 15 | ||||
-rw-r--r-- | llvm/include/llvm/ProfileData/MemProf.h | 92 | ||||
-rw-r--r-- | llvm/include/llvm/ProfileData/MemProfReader.h | 6 | ||||
-rw-r--r-- | llvm/lib/ProfileData/InstrProfReader.cpp | 66 | ||||
-rw-r--r-- | llvm/lib/ProfileData/InstrProfWriter.cpp | 90 | ||||
-rw-r--r-- | llvm/test/tools/llvm-profdata/memprof-merge-v0.test | 3 | ||||
-rw-r--r-- | llvm/tools/llvm-profdata/llvm-profdata.cpp | 15 | ||||
-rw-r--r-- | llvm/unittests/ProfileData/InstrProfTest.cpp | 89 |
9 files changed, 362 insertions, 18 deletions
diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h index f662feb..c45b908 100644 --- a/llvm/include/llvm/ProfileData/InstrProfReader.h +++ b/llvm/include/llvm/ProfileData/InstrProfReader.h @@ -560,6 +560,8 @@ using MemProfRecordHashTable = OnDiskIterableChainedHashTable<memprof::RecordLookupTrait>; using MemProfFrameHashTable = OnDiskIterableChainedHashTable<memprof::FrameLookupTrait>; +using MemProfCallStackHashTable = + OnDiskIterableChainedHashTable<memprof::CallStackLookupTrait>; template <typename HashTableImpl> class InstrProfReaderItaniumRemapper; @@ -666,6 +668,8 @@ private: std::unique_ptr<MemProfRecordHashTable> MemProfRecordTable; /// MemProf frame profile data on-disk indexed via frame id. std::unique_ptr<MemProfFrameHashTable> MemProfFrameTable; + /// MemProf call stack data on-disk indexed via call stack id. + std::unique_ptr<MemProfCallStackHashTable> MemProfCallStackTable; /// VTableNamePtr points to the beginning of compressed vtable names. /// When a symtab is constructed from profiles by llvm-profdata, the list of /// names could be decompressed based on `VTableNamePtr` and diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h index 4b42392..b0ae8f3 100644 --- a/llvm/include/llvm/ProfileData/InstrProfWriter.h +++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h @@ -60,6 +60,10 @@ private: // inline. llvm::MapVector<memprof::FrameId, memprof::Frame> MemProfFrameData; + // A map to hold call stack id to call stacks. + llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>> + MemProfCallStackData; + // List of binary ids. std::vector<llvm::object::BuildID> BinaryIds; @@ -114,6 +118,12 @@ public: bool addMemProfFrame(const memprof::FrameId, const memprof::Frame &F, function_ref<void(Error)> Warn); + /// Add a call stack identified by the hash of the contents of the call stack + /// in \p CallStack. + bool addMemProfCallStack(const memprof::CallStackId CSId, + const llvm::SmallVector<memprof::FrameId> &CallStack, + function_ref<void(Error)> Warn); + // Add a binary id to the binary ids list. void addBinaryIds(ArrayRef<llvm::object::BuildID> BIs); @@ -187,9 +197,12 @@ public: return static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage); } - // Internal interface for testing purpose only. + // Internal interfaces for testing purpose only. void setValueProfDataEndianness(llvm::endianness Endianness); void setOutputSparse(bool Sparse); + void setMemProfVersionRequested(memprof::IndexedVersion Version) { + MemProfVersionRequested = Version; + } // Compute the overlap b/w this object and Other. Program level result is // stored in Overlap and function level result is stored in FuncLevelOverlap. void overlapRecord(NamedInstrProfRecord &&Other, OverlapStats &Overlap, diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index 7f3956b..faaec33 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -29,7 +29,7 @@ enum IndexedVersion : uint64_t { }; constexpr uint64_t MinimumSupportedVersion = Version0; -constexpr uint64_t MaximumSupportedVersion = Version1; +constexpr uint64_t MaximumSupportedVersion = Version2; // Verify that the minimum and maximum satisfy the obvious constraint. static_assert(MinimumSupportedVersion <= MaximumSupportedVersion); @@ -633,6 +633,96 @@ public: } }; +// Trait for writing call stacks to the on-disk hash table. +class CallStackWriterTrait { +public: + using key_type = CallStackId; + using key_type_ref = CallStackId; + + using data_type = llvm::SmallVector<FrameId>; + using data_type_ref = llvm::SmallVector<FrameId> &; + + using hash_value_type = CallStackId; + using offset_type = uint64_t; + + static hash_value_type ComputeHash(key_type_ref K) { return K; } + + static std::pair<offset_type, offset_type> + EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) { + using namespace support; + endian::Writer LE(Out, llvm::endianness::little); + offset_type N = sizeof(K); + LE.write<offset_type>(N); + offset_type M = sizeof(FrameId) * V.size(); + LE.write<offset_type>(M); + return std::make_pair(N, M); + } + + void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) { + using namespace support; + endian::Writer LE(Out, llvm::endianness::little); + LE.write<key_type>(K); + } + + void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V, + offset_type /*Unused*/) { + using namespace support; + endian::Writer LE(Out, llvm::endianness::little); + // Emit the frames. We do not explicitly emit the length of the vector + // because it can be inferred from the data length. + for (FrameId F : V) + LE.write<FrameId>(F); + } +}; + +// Trait for reading call stack mappings from the on-disk hash table. +class CallStackLookupTrait { +public: + using data_type = const llvm::SmallVector<FrameId>; + using internal_key_type = CallStackId; + using external_key_type = CallStackId; + using hash_value_type = CallStackId; + using offset_type = uint64_t; + + static bool EqualKey(internal_key_type A, internal_key_type B) { + return A == B; + } + static uint64_t GetInternalKey(internal_key_type K) { return K; } + static uint64_t GetExternalKey(external_key_type K) { return K; } + + hash_value_type ComputeHash(internal_key_type K) { return K; } + + static std::pair<offset_type, offset_type> + ReadKeyDataLength(const unsigned char *&D) { + using namespace support; + + offset_type KeyLen = + endian::readNext<offset_type, llvm::endianness::little>(D); + offset_type DataLen = + endian::readNext<offset_type, llvm::endianness::little>(D); + return std::make_pair(KeyLen, DataLen); + } + + uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) { + using namespace support; + return endian::readNext<external_key_type, llvm::endianness::little>(D); + } + + data_type ReadData(uint64_t K, const unsigned char *D, offset_type Length) { + using namespace support; + llvm::SmallVector<FrameId> CS; + // Derive the number of frames from the data length. + uint64_t NumFrames = Length / sizeof(FrameId); + assert(Length % sizeof(FrameId) == 0); + CS.reserve(NumFrames); + for (size_t I = 0; I != NumFrames; ++I) { + FrameId F = endian::readNext<FrameId, llvm::endianness::little>(D); + CS.push_back(F); + } + return CS; + } +}; + // Compute a CallStackId for a given call stack. CallStackId hashCallStack(ArrayRef<FrameId> CS); diff --git a/llvm/include/llvm/ProfileData/MemProfReader.h b/llvm/include/llvm/ProfileData/MemProfReader.h index 7fa8af1..444c58e 100644 --- a/llvm/include/llvm/ProfileData/MemProfReader.h +++ b/llvm/include/llvm/ProfileData/MemProfReader.h @@ -51,6 +51,12 @@ public: return IdToFrame; } + // Return a const reference to the internal Id to call stacks. + const llvm::DenseMap<CallStackId, llvm::SmallVector<FrameId>> & + getCallStacks() const { + return CSIdToCallStack; + } + // Return a const reference to the internal function profile data. const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> & getProfileData() const { diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index 8574a96..1e42ae8 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -1249,14 +1249,14 @@ Error IndexedInstrProfReader::readHeader() { // Read the first 64-bit word, which may be RecordTableOffset in // memprof::MemProfVersion0 or the MemProf version number in - // memprof::MemProfVersion1. + // memprof::MemProfVersion1 and above. const uint64_t FirstWord = support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); memprof::IndexedVersion Version = memprof::Version0; - if (FirstWord == memprof::Version1) { + if (FirstWord == memprof::Version1 || FirstWord == memprof::Version2) { // Everything is good. We can proceed to deserialize the rest. - Version = memprof::Version1; + Version = static_cast<memprof::IndexedVersion>(FirstWord); } else if (FirstWord >= 24) { // This is a heuristic/hack to detect memprof::MemProfVersion0, // which does not have a version field in the header. @@ -1286,6 +1286,18 @@ Error IndexedInstrProfReader::readHeader() { const uint64_t FrameTableOffset = support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); + // The offset in the stream right before invoking + // CallStackTableGenerator.Emit. + uint64_t CallStackPayloadOffset = 0; + // The value returned from CallStackTableGenerator.Emit. + uint64_t CallStackTableOffset = 0; + if (Version >= memprof::Version2) { + CallStackPayloadOffset = + support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); + CallStackTableOffset = + support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); + } + // Read the schema. auto SchemaOr = memprof::readMemProfSchema(Ptr); if (!SchemaOr) @@ -1296,7 +1308,7 @@ Error IndexedInstrProfReader::readHeader() { MemProfRecordTable.reset(MemProfRecordHashTable::Create( /*Buckets=*/Start + RecordTableOffset, /*Payload=*/Ptr, - /*Base=*/Start, memprof::RecordLookupTrait(memprof::Version1, Schema))); + /*Base=*/Start, memprof::RecordLookupTrait(Version, Schema))); // Initialize the frame table reader with the payload and bucket offsets. MemProfFrameTable.reset(MemProfFrameHashTable::Create( @@ -1304,12 +1316,22 @@ Error IndexedInstrProfReader::readHeader() { /*Payload=*/Start + FramePayloadOffset, /*Base=*/Start, memprof::FrameLookupTrait())); + if (Version >= memprof::Version2) + MemProfCallStackTable.reset(MemProfCallStackHashTable::Create( + /*Buckets=*/Start + CallStackTableOffset, + /*Payload=*/Start + CallStackPayloadOffset, + /*Base=*/Start, memprof::CallStackLookupTrait())); + #ifdef EXPENSIVE_CHECKS // Go through all the records and verify that CSId has been correctly // populated. Do this only under EXPENSIVE_CHECKS. Otherwise, we // would defeat the purpose of OnDiskIterableChainedHashTable. - for (const auto &Record : MemProfRecordTable->data()) - verifyIndexedMemProfRecord(Record); + // Note that we can compare CSId against actual call stacks only for + // Version0 and Version1 because IndexedAllocationInfo::CallStack and + // IndexedMemProfRecord::CallSites are not populated in Version2. + if (Version <= memprof::Version1) + for (const auto &Record : MemProfRecordTable->data()) + verifyIndexedMemProfRecord(Record); #endif } @@ -1502,7 +1524,29 @@ IndexedInstrProfReader::getMemProfRecord(const uint64_t FuncNameHash) { return *FrIter; }; - memprof::MemProfRecord Record(*Iter, IdToFrameCallback); + // Setup a callback to convert call stack ids to call stacks using the on-disk + // hash table. + std::optional<memprof::CallStackId> LastUnmappedCSId; + auto CSIdToCallStackCallback = [&](memprof::CallStackId CSId) { + llvm::SmallVector<memprof::Frame> Frames; + auto CSIter = MemProfCallStackTable->find(CSId); + if (CSIter == MemProfCallStackTable->end()) { + LastUnmappedCSId = CSId; + } else { + const llvm::SmallVector<memprof::FrameId> &CS = *CSIter; + Frames.reserve(CS.size()); + for (memprof::FrameId Id : CS) + Frames.push_back(IdToFrameCallback(Id)); + } + return Frames; + }; + + const memprof::IndexedMemProfRecord IndexedRecord = *Iter; + memprof::MemProfRecord Record; + if (MemProfCallStackTable) + Record = IndexedRecord.toMemProfRecord(CSIdToCallStackCallback); + else + Record = memprof::MemProfRecord(IndexedRecord, IdToFrameCallback); // Check that all frame ids were successfully converted to frames. if (LastUnmappedFrameId) { @@ -1510,6 +1554,14 @@ IndexedInstrProfReader::getMemProfRecord(const uint64_t FuncNameHash) { "memprof frame not found for frame id " + Twine(*LastUnmappedFrameId)); } + + // Check that all call stack ids were successfully converted to call stacks. + if (LastUnmappedCSId) { + return make_error<InstrProfError>( + instrprof_error::hash_mismatch, + "memprof call stack not found for call stack id " + + Twine(*LastUnmappedCSId)); + } return Record; } diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index ede042d..f70a497 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -297,6 +297,23 @@ bool InstrProfWriter::addMemProfFrame(const memprof::FrameId Id, return true; } +bool InstrProfWriter::addMemProfCallStack( + const memprof::CallStackId CSId, + const llvm::SmallVector<memprof::FrameId> &CallStack, + function_ref<void(Error)> Warn) { + auto [Iter, Inserted] = MemProfCallStackData.insert({CSId, CallStack}); + // If a mapping already exists for the current call stack id and it does not + // match the new mapping provided then reset the existing contents and bail + // out. We don't support the merging of memprof data whose CallStack -> Id + // mapping across profiles is inconsistent. + if (!Inserted && Iter->second != CallStack) { + Warn(make_error<InstrProfError>(instrprof_error::malformed, + "call stack to id mapping mismatch")); + return false; + } + return true; +} + void InstrProfWriter::addBinaryIds(ArrayRef<llvm::object::BuildID> BIs) { llvm::append_range(BinaryIds, BIs); } @@ -378,6 +395,12 @@ void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW, return; } + MemProfCallStackData.reserve(IPW.MemProfCallStackData.size()); + for (auto &[CSId, CallStack] : IPW.MemProfCallStackData) { + if (!addMemProfCallStack(CSId, CallStack, Warn)) + return; + } + MemProfRecordData.reserve(IPW.MemProfRecordData.size()); for (auto &I : IPW.MemProfRecordData) { addMemProfRecord(I.first, I.second); @@ -427,8 +450,8 @@ static uint64_t writeMemProfRecords( ProfOStream &OS, llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord> &MemProfRecordData, - memprof::MemProfSchema *Schema) { - memprof::RecordWriterTrait RecordWriter(memprof::Version1); + memprof::MemProfSchema *Schema, memprof::IndexedVersion Version) { + memprof::RecordWriterTrait RecordWriter(Version); RecordWriter.Schema = Schema; OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait> RecordTableGenerator; @@ -461,6 +484,20 @@ static uint64_t writeMemProfFrames( return FrameTableGenerator.Emit(OS.OS); } +static uint64_t writeMemProfCallStacks( + ProfOStream &OS, + llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>> + &MemProfCallStackData) { + OnDiskChainedHashTableGenerator<memprof::CallStackWriterTrait> + CallStackTableGenerator; + for (auto &[CSId, CallStack] : MemProfCallStackData) + CallStackTableGenerator.insert(CSId, CallStack); + // Release the memory of this vector as it is no longer needed. + MemProfCallStackData.clear(); + + return CallStackTableGenerator.Emit(OS.OS); +} + static Error writeMemProfV0( ProfOStream &OS, llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord> @@ -475,7 +512,7 @@ static Error writeMemProfV0( writeMemProfSchema(OS, Schema); uint64_t RecordTableOffset = - writeMemProfRecords(OS, MemProfRecordData, &Schema); + writeMemProfRecords(OS, MemProfRecordData, &Schema, memprof::Version0); uint64_t FramePayloadOffset = OS.tell(); uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfFrameData); @@ -501,7 +538,7 @@ static Error writeMemProfV1( writeMemProfSchema(OS, Schema); uint64_t RecordTableOffset = - writeMemProfRecords(OS, MemProfRecordData, &Schema); + writeMemProfRecords(OS, MemProfRecordData, &Schema, memprof::Version1); uint64_t FramePayloadOffset = OS.tell(); uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfFrameData); @@ -512,6 +549,43 @@ static Error writeMemProfV1( return Error::success(); } +static Error writeMemProfV2( + ProfOStream &OS, + llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord> + &MemProfRecordData, + llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData, + llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>> + &MemProfCallStackData) { + OS.write(memprof::Version2); + uint64_t HeaderUpdatePos = OS.tell(); + OS.write(0ULL); // Reserve space for the memprof record table offset. + OS.write(0ULL); // Reserve space for the memprof frame payload offset. + OS.write(0ULL); // Reserve space for the memprof frame table offset. + OS.write(0ULL); // Reserve space for the memprof call stack payload offset. + OS.write(0ULL); // Reserve space for the memprof call stack table offset. + + auto Schema = memprof::PortableMemInfoBlock::getSchema(); + writeMemProfSchema(OS, Schema); + + uint64_t RecordTableOffset = + writeMemProfRecords(OS, MemProfRecordData, &Schema, memprof::Version2); + + uint64_t FramePayloadOffset = OS.tell(); + uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfFrameData); + + uint64_t CallStackPayloadOffset = OS.tell(); + uint64_t CallStackTableOffset = + writeMemProfCallStacks(OS, MemProfCallStackData); + + uint64_t Header[] = { + RecordTableOffset, FramePayloadOffset, FrameTableOffset, + CallStackPayloadOffset, CallStackTableOffset, + }; + OS.patch({{HeaderUpdatePos, Header, std::size(Header)}}); + + return Error::success(); +} + // The MemProf profile data includes a simple schema // with the format described below followed by the hashtable: // uint64_t Version @@ -530,6 +604,8 @@ static Error writeMemProf( llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord> &MemProfRecordData, llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData, + llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>> + &MemProfCallStackData, memprof::IndexedVersion MemProfVersionRequested) { switch (MemProfVersionRequested) { @@ -538,8 +614,8 @@ static Error writeMemProf( case memprof::Version1: return writeMemProfV1(OS, MemProfRecordData, MemProfFrameData); case memprof::Version2: - // TODO: Implement. Fall through to the error handling below for now. - break; + return writeMemProfV2(OS, MemProfRecordData, MemProfFrameData, + MemProfCallStackData); } return make_error<InstrProfError>( @@ -658,7 +734,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf)) { MemProfSectionStart = OS.tell(); if (auto E = writeMemProf(OS, MemProfRecordData, MemProfFrameData, - MemProfVersionRequested)) + MemProfCallStackData, MemProfVersionRequested)) return E; } diff --git a/llvm/test/tools/llvm-profdata/memprof-merge-v0.test b/llvm/test/tools/llvm-profdata/memprof-merge-v0.test index 6813296..03ccbdd 100644 --- a/llvm/test/tools/llvm-profdata/memprof-merge-v0.test +++ b/llvm/test/tools/llvm-profdata/memprof-merge-v0.test @@ -13,6 +13,9 @@ RUN: llvm-profdata show %t.prof.v0 | FileCheck %s RUN: llvm-profdata merge %t.proftext %p/Inputs/basic.memprofraw --memprof-version=1 --profiled-binary %p/Inputs/basic.memprofexe -o %t.prof.v1 RUN: llvm-profdata show %t.prof.v1 | FileCheck %s +RUN: llvm-profdata merge %t.proftext %p/Inputs/basic.memprofraw --memprof-version=2 --profiled-binary %p/Inputs/basic.memprofexe -o %t.prof.v2 +RUN: llvm-profdata show %t.prof.v2 | FileCheck %s + For now we only check the validity of the instrumented profile since we don't have a way to display the contents of the memprof indexed format yet. diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index 66a4cea..78daf9f 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -305,7 +305,8 @@ cl::opt<memprof::IndexedVersion> MemProfVersionRequested( cl::desc("Specify the version of the memprof format to use"), cl::init(memprof::Version0), cl::values(clEnumValN(memprof::Version0, "0", "version 0"), - clEnumValN(memprof::Version1, "1", "version 1"))); + clEnumValN(memprof::Version1, "1", "version 1"), + clEnumValN(memprof::Version2, "2", "version 2"))); // Options specific to overlap subcommand. cl::opt<std::string> BaseFilename(cl::Positional, cl::Required, @@ -677,6 +678,18 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper, if (!Succeeded) return; } + + // Add the call stacks into the writer context. + const auto &CSIdToCallStacks = Reader->getCallStacks(); + for (const auto &I : CSIdToCallStacks) { + bool Succeeded = WC->Writer.addMemProfCallStack( + /*Id=*/I.first, /*Frame=*/I.getSecond(), MemProfError); + // If we weren't able to add the call stacks then it doesn't make sense + // to try to add the records from this profile. + if (!Succeeded) + return; + } + const auto &FunctionProfileData = Reader->getProfileData(); // Add the memprof records into the writer context. for (const auto &[GUID, Record] : FunctionProfileData) { diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp index 82701c4..16b8b50 100644 --- a/llvm/unittests/ProfileData/InstrProfTest.cpp +++ b/llvm/unittests/ProfileData/InstrProfTest.cpp @@ -347,6 +347,9 @@ using ::llvm::memprof::IndexedMemProfRecord; using ::llvm::memprof::MemInfoBlock; using FrameIdMapTy = llvm::DenseMap<::llvm::memprof::FrameId, ::llvm::memprof::Frame>; +using CallStackIdMapTy = + llvm::DenseMap<::llvm::memprof::CallStackId, + ::llvm::SmallVector<::llvm::memprof::FrameId>>; static FrameIdMapTy getFrameMapping() { FrameIdMapTy Mapping; @@ -359,6 +362,14 @@ static FrameIdMapTy getFrameMapping() { return Mapping; } +static CallStackIdMapTy getCallStackMapping() { + CallStackIdMapTy Mapping; + Mapping.insert({0x111, {0, 1}}); + Mapping.insert({0x222, {2, 3}}); + Mapping.insert({0x333, {4, 5}}); + return Mapping; +} + IndexedMemProfRecord makeRecord( std::initializer_list<std::initializer_list<::llvm::memprof::FrameId>> AllocFrames, @@ -374,6 +385,21 @@ IndexedMemProfRecord makeRecord( return MR; } +IndexedMemProfRecord +makeRecordV2(std::initializer_list<::llvm::memprof::CallStackId> AllocFrames, + std::initializer_list<::llvm::memprof::CallStackId> CallSiteFrames, + const MemInfoBlock &Block = MemInfoBlock()) { + llvm::memprof::IndexedMemProfRecord MR; + for (const auto &CSId : AllocFrames) + // We don't populate IndexedAllocationInfo::CallStack because we use it only + // in Version0 and Version1. + MR.AllocSites.emplace_back(::llvm::SmallVector<memprof::FrameId>(), CSId, + Block); + for (const auto &CSId : CallSiteFrames) + MR.CallSiteIds.push_back(CSId); + return MR; +} + MATCHER_P(EqualsRecord, Want, "") { const memprof::MemProfRecord &Got = arg; @@ -408,7 +434,7 @@ MATCHER_P(EqualsRecord, Want, "") { return true; } -TEST_F(InstrProfTest, test_memprof) { +TEST_F(InstrProfTest, test_memprof_v0) { ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::MemProf), Succeeded()); @@ -450,6 +476,67 @@ TEST_F(InstrProfTest, test_memprof) { EXPECT_THAT(WantRecord, EqualsRecord(Record)); } +TEST_F(InstrProfTest, test_memprof_v2) { + Writer.setMemProfVersionRequested(memprof::Version2); + + ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::MemProf), + Succeeded()); + + const IndexedMemProfRecord IndexedMR = makeRecordV2( + /*AllocFrames=*/{0x111, 0x222}, + /*CallSiteFrames=*/{0x333}); + const FrameIdMapTy IdToFrameMap = getFrameMapping(); + const auto CSIdToCallStackMap = getCallStackMapping(); + for (const auto &I : IdToFrameMap) { + Writer.addMemProfFrame(I.first, I.getSecond(), Err); + } + for (const auto &I : CSIdToCallStackMap) { + Writer.addMemProfCallStack(I.first, I.getSecond(), Err); + } + Writer.addMemProfRecord(/*Id=*/0x9999, IndexedMR); + + auto Profile = Writer.writeBuffer(); + readProfile(std::move(Profile)); + + auto RecordOr = Reader->getMemProfRecord(0x9999); + ASSERT_THAT_ERROR(RecordOr.takeError(), Succeeded()); + const memprof::MemProfRecord &Record = RecordOr.get(); + + std::optional<memprof::FrameId> LastUnmappedFrameId; + auto IdToFrameCallback = [&](const memprof::FrameId Id) { + auto Iter = IdToFrameMap.find(Id); + if (Iter == IdToFrameMap.end()) { + LastUnmappedFrameId = Id; + return memprof::Frame(0, 0, 0, false); + } + return Iter->second; + }; + + std::optional<::llvm::memprof::CallStackId> LastUnmappedCSId; + auto CSIdToCallStackCallback = [&](::llvm::memprof::CallStackId CSId) { + llvm::SmallVector<memprof::Frame> Frames; + auto CSIter = CSIdToCallStackMap.find(CSId); + if (CSIter == CSIdToCallStackMap.end()) { + LastUnmappedCSId = CSId; + } else { + const ::llvm::SmallVector<::llvm::memprof::FrameId> &CS = + CSIter->getSecond(); + Frames.reserve(CS.size()); + for (::llvm::memprof::FrameId Id : CS) + Frames.push_back(IdToFrameCallback(Id)); + } + return Frames; + }; + + const ::llvm::memprof::MemProfRecord WantRecord = + IndexedMR.toMemProfRecord(CSIdToCallStackCallback); + ASSERT_EQ(LastUnmappedFrameId, std::nullopt) + << "could not map frame id: " << *LastUnmappedFrameId; + ASSERT_EQ(LastUnmappedCSId, std::nullopt) + << "could not map call stack id: " << *LastUnmappedCSId; + EXPECT_THAT(WantRecord, EqualsRecord(Record)); +} + TEST_F(InstrProfTest, test_memprof_getrecord_error) { ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::MemProf), Succeeded()); |