diff options
author | Kazu Hirata <kazu@google.com> | 2025-04-23 15:39:45 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-04-23 15:39:45 -0700 |
commit | 9a8f90dba3f8c25cbb3525a482053d3abcd3fddc (patch) | |
tree | 83ae9f65e11cb86052598ca6658e5d28278f4447 /llvm/lib/ProfileData | |
parent | b6f32ad8b03dccaba0db7ded9d561ee83e4530ab (diff) | |
download | llvm-9a8f90dba3f8c25cbb3525a482053d3abcd3fddc.zip llvm-9a8f90dba3f8c25cbb3525a482053d3abcd3fddc.tar.gz llvm-9a8f90dba3f8c25cbb3525a482053d3abcd3fddc.tar.bz2 |
[memprof] Move writeMemProf to a separate file (#137051)
This patch moves writeMemProf and its subroutines to a separate file.
The intent is as follows:
- Reduce the size of InstrProfWriter.cpp.
- Move the subroutines to a separate file because they don't interact
with anything else in InstrProfWriter.cpp.
Remarks:
- The new file is named IndexedMemProfData.cpp without "Writer" in the
name so that we can move the reader code to this file in the future.
- This patch just moves code without changing the function signatures
for now. It might make sense to implement a class encompassing
"serialize" and "deserialize" methods for IndexedMemProfData, but
that's left to subsequent patches.
Diffstat (limited to 'llvm/lib/ProfileData')
-rw-r--r-- | llvm/lib/ProfileData/CMakeLists.txt | 1 | ||||
-rw-r--r-- | llvm/lib/ProfileData/IndexedMemProfData.cpp | 300 | ||||
-rw-r--r-- | llvm/lib/ProfileData/InstrProfWriter.cpp | 283 |
3 files changed, 302 insertions, 282 deletions
diff --git a/llvm/lib/ProfileData/CMakeLists.txt b/llvm/lib/ProfileData/CMakeLists.txt index 4fa1b76..eb7c2a3 100644 --- a/llvm/lib/ProfileData/CMakeLists.txt +++ b/llvm/lib/ProfileData/CMakeLists.txt @@ -1,5 +1,6 @@ add_llvm_component_library(LLVMProfileData GCOV.cpp + IndexedMemProfData.cpp InstrProf.cpp InstrProfCorrelator.cpp InstrProfReader.cpp diff --git a/llvm/lib/ProfileData/IndexedMemProfData.cpp b/llvm/lib/ProfileData/IndexedMemProfData.cpp new file mode 100644 index 0000000..fb4a891 --- /dev/null +++ b/llvm/lib/ProfileData/IndexedMemProfData.cpp @@ -0,0 +1,300 @@ +//===- IndexedMemProfData.h - MemProf format support ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// MemProf data is serialized in writeMemProf provided in this file. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/ProfileData/MemProf.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/OnDiskHashTable.h" + +namespace llvm { + +// Serialize Schema. +static void writeMemProfSchema(ProfOStream &OS, + const memprof::MemProfSchema &Schema) { + OS.write(static_cast<uint64_t>(Schema.size())); + for (const auto Id : Schema) + OS.write(static_cast<uint64_t>(Id)); +} + +// Serialize MemProfRecordData. Return RecordTableOffset. +static uint64_t writeMemProfRecords( + ProfOStream &OS, + llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord> + &MemProfRecordData, + memprof::MemProfSchema *Schema, memprof::IndexedVersion Version, + llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId> + *MemProfCallStackIndexes = nullptr) { + memprof::RecordWriterTrait RecordWriter(Schema, Version, + MemProfCallStackIndexes); + OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait> + RecordTableGenerator; + for (auto &[GUID, Record] : MemProfRecordData) { + // Insert the key (func hash) and value (memprof record). + RecordTableGenerator.insert(GUID, Record, RecordWriter); + } + // Release the memory of this MapVector as it is no longer needed. + MemProfRecordData.clear(); + + // The call to Emit invokes RecordWriterTrait::EmitData which destructs + // the memprof record copies owned by the RecordTableGenerator. This works + // because the RecordTableGenerator is not used after this point. + return RecordTableGenerator.Emit(OS.OS, RecordWriter); +} + +// Serialize MemProfFrameData. Return FrameTableOffset. +static uint64_t writeMemProfFrames( + ProfOStream &OS, + llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData) { + OnDiskChainedHashTableGenerator<memprof::FrameWriterTrait> + FrameTableGenerator; + for (auto &[FrameId, Frame] : MemProfFrameData) { + // Insert the key (frame id) and value (frame contents). + FrameTableGenerator.insert(FrameId, Frame); + } + // Release the memory of this MapVector as it is no longer needed. + MemProfFrameData.clear(); + + return FrameTableGenerator.Emit(OS.OS); +} + +// Serialize MemProfFrameData. Return the mapping from FrameIds to their +// indexes within the frame array. +static llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> +writeMemProfFrameArray( + ProfOStream &OS, + llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData, + llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram) { + // Mappings from FrameIds to array indexes. + llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes; + + // Compute the order in which we serialize Frames. The order does not matter + // in terms of correctness, but we still compute it for deserialization + // performance. Specifically, if we serialize frequently used Frames one + // after another, we have better cache utilization. For two Frames that + // appear equally frequently, we break a tie by serializing the one that tends + // to appear earlier in call stacks. We implement the tie-breaking mechanism + // by computing the sum of indexes within call stacks for each Frame. If we + // still have a tie, then we just resort to compare two FrameIds, which is + // just for stability of output. + std::vector<std::pair<memprof::FrameId, const memprof::Frame *>> FrameIdOrder; + FrameIdOrder.reserve(MemProfFrameData.size()); + for (const auto &[Id, Frame] : MemProfFrameData) + FrameIdOrder.emplace_back(Id, &Frame); + assert(MemProfFrameData.size() == FrameIdOrder.size()); + llvm::sort(FrameIdOrder, + [&](const std::pair<memprof::FrameId, const memprof::Frame *> &L, + const std::pair<memprof::FrameId, const memprof::Frame *> &R) { + const auto &SL = FrameHistogram[L.first]; + const auto &SR = FrameHistogram[R.first]; + // Popular FrameIds should come first. + if (SL.Count != SR.Count) + return SL.Count > SR.Count; + // If they are equally popular, then the one that tends to appear + // earlier in call stacks should come first. + if (SL.PositionSum != SR.PositionSum) + return SL.PositionSum < SR.PositionSum; + // Compare their FrameIds for sort stability. + return L.first < R.first; + }); + + // Serialize all frames while creating mappings from linear IDs to FrameIds. + uint64_t Index = 0; + MemProfFrameIndexes.reserve(FrameIdOrder.size()); + for (const auto &[Id, F] : FrameIdOrder) { + F->serialize(OS.OS); + MemProfFrameIndexes.insert({Id, Index}); + ++Index; + } + assert(MemProfFrameData.size() == Index); + assert(MemProfFrameData.size() == MemProfFrameIndexes.size()); + + // Release the memory of this MapVector as it is no longer needed. + MemProfFrameData.clear(); + + return MemProfFrameIndexes; +} + +static uint64_t writeMemProfCallStacks( + ProfOStream &OS, + llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>> + &MemProfCallStackData) { + OnDiskChainedHashTableGenerator<memprof::CallStackWriterTrait> + CallStackTableGenerator; + for (auto &[CSId, CallStack] : MemProfCallStackData) + CallStackTableGenerator.insert(CSId, CallStack); + // Release the memory of this vector as it is no longer needed. + MemProfCallStackData.clear(); + + return CallStackTableGenerator.Emit(OS.OS); +} + +static llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId> +writeMemProfCallStackArray( + ProfOStream &OS, + llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>> + &MemProfCallStackData, + llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> + &MemProfFrameIndexes, + llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram, + unsigned &NumElements) { + llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId> + MemProfCallStackIndexes; + + memprof::CallStackRadixTreeBuilder<memprof::FrameId> Builder; + Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes, + FrameHistogram); + for (auto I : Builder.getRadixArray()) + OS.write32(I); + NumElements = Builder.getRadixArray().size(); + MemProfCallStackIndexes = Builder.takeCallStackPos(); + + // Release the memory of this vector as it is no longer needed. + MemProfCallStackData.clear(); + + return MemProfCallStackIndexes; +} + +// Write out MemProf Version2 as follows: +// uint64_t Version +// uint64_t RecordTableOffset = RecordTableGenerator.Emit +// uint64_t FramePayloadOffset = Offset for the frame payload +// uint64_t FrameTableOffset = FrameTableGenerator.Emit +// uint64_t CallStackPayloadOffset = Offset for the call stack payload (NEW V2) +// uint64_t CallStackTableOffset = CallStackTableGenerator.Emit (NEW in V2) +// uint64_t Num schema entries +// uint64_t Schema entry 0 +// uint64_t Schema entry 1 +// .... +// uint64_t Schema entry N - 1 +// OnDiskChainedHashTable MemProfRecordData +// OnDiskChainedHashTable MemProfFrameData +// OnDiskChainedHashTable MemProfCallStackData (NEW in V2) +static Error writeMemProfV2(ProfOStream &OS, + memprof::IndexedMemProfData &MemProfData, + bool MemProfFullSchema) { + OS.write(memprof::Version2); + uint64_t HeaderUpdatePos = OS.tell(); + OS.write(0ULL); // Reserve space for the memprof record table offset. + OS.write(0ULL); // Reserve space for the memprof frame payload offset. + OS.write(0ULL); // Reserve space for the memprof frame table offset. + OS.write(0ULL); // Reserve space for the memprof call stack payload offset. + OS.write(0ULL); // Reserve space for the memprof call stack table offset. + + auto Schema = memprof::getHotColdSchema(); + if (MemProfFullSchema) + Schema = memprof::getFullSchema(); + writeMemProfSchema(OS, Schema); + + uint64_t RecordTableOffset = + writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version2); + + uint64_t FramePayloadOffset = OS.tell(); + uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfData.Frames); + + uint64_t CallStackPayloadOffset = OS.tell(); + uint64_t CallStackTableOffset = + writeMemProfCallStacks(OS, MemProfData.CallStacks); + + uint64_t Header[] = { + RecordTableOffset, FramePayloadOffset, FrameTableOffset, + CallStackPayloadOffset, CallStackTableOffset, + }; + OS.patch({{HeaderUpdatePos, Header}}); + + return Error::success(); +} + +// Write out MemProf Version3 as follows: +// uint64_t Version +// uint64_t CallStackPayloadOffset = Offset for the call stack payload +// uint64_t RecordPayloadOffset = Offset for the record payload +// uint64_t RecordTableOffset = RecordTableGenerator.Emit +// uint64_t Num schema entries +// uint64_t Schema entry 0 +// uint64_t Schema entry 1 +// .... +// uint64_t Schema entry N - 1 +// Frames serialized one after another +// Call stacks encoded as a radix tree +// OnDiskChainedHashTable MemProfRecordData +static Error writeMemProfV3(ProfOStream &OS, + memprof::IndexedMemProfData &MemProfData, + bool MemProfFullSchema) { + OS.write(memprof::Version3); + uint64_t HeaderUpdatePos = OS.tell(); + OS.write(0ULL); // Reserve space for the memprof call stack payload offset. + OS.write(0ULL); // Reserve space for the memprof record payload offset. + OS.write(0ULL); // Reserve space for the memprof record table offset. + + auto Schema = memprof::getHotColdSchema(); + if (MemProfFullSchema) + Schema = memprof::getFullSchema(); + writeMemProfSchema(OS, Schema); + + llvm::DenseMap<memprof::FrameId, memprof::FrameStat> FrameHistogram = + memprof::computeFrameHistogram(MemProfData.CallStacks); + assert(MemProfData.Frames.size() == FrameHistogram.size()); + + llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes = + writeMemProfFrameArray(OS, MemProfData.Frames, FrameHistogram); + + uint64_t CallStackPayloadOffset = OS.tell(); + // The number of elements in the call stack array. + unsigned NumElements = 0; + llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId> + MemProfCallStackIndexes = + writeMemProfCallStackArray(OS, MemProfData.CallStacks, + MemProfFrameIndexes, FrameHistogram, + NumElements); + + uint64_t RecordPayloadOffset = OS.tell(); + uint64_t RecordTableOffset = + writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version3, + &MemProfCallStackIndexes); + + // IndexedMemProfReader::deserializeV3 computes the number of elements in the + // call stack array from the difference between CallStackPayloadOffset and + // RecordPayloadOffset. Verify that the computation works. + assert(CallStackPayloadOffset + + NumElements * sizeof(memprof::LinearFrameId) == + RecordPayloadOffset); + + uint64_t Header[] = { + CallStackPayloadOffset, + RecordPayloadOffset, + RecordTableOffset, + }; + OS.patch({{HeaderUpdatePos, Header}}); + + return Error::success(); +} + +// Write out the MemProf data in a requested version. +Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData, + memprof::IndexedVersion MemProfVersionRequested, + bool MemProfFullSchema) { + switch (MemProfVersionRequested) { + case memprof::Version2: + return writeMemProfV2(OS, MemProfData, MemProfFullSchema); + case memprof::Version3: + return writeMemProfV3(OS, MemProfData, MemProfFullSchema); + } + + return make_error<InstrProfError>( + instrprof_error::unsupported_version, + formatv("MemProf version {} not supported; " + "requires version between {} and {}, inclusive", + MemProfVersionRequested, memprof::MinimumSupportedVersion, + memprof::MaximumSupportedVersion)); +} + +} // namespace llvm diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index f1882dc..2759346 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/ProfileSummary.h" +#include "llvm/ProfileData/IndexedMemProfData.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/MemProf.h" #include "llvm/ProfileData/ProfileCommon.h" @@ -23,7 +24,6 @@ #include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/Error.h" -#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/OnDiskHashTable.h" #include "llvm/Support/raw_ostream.h" @@ -449,287 +449,6 @@ static void setSummary(IndexedInstrProf::Summary *TheSummary, TheSummary->setEntry(I, Res[I]); } -// Serialize Schema. -static void writeMemProfSchema(ProfOStream &OS, - const memprof::MemProfSchema &Schema) { - OS.write(static_cast<uint64_t>(Schema.size())); - for (const auto Id : Schema) - OS.write(static_cast<uint64_t>(Id)); -} - -// Serialize MemProfRecordData. Return RecordTableOffset. -static uint64_t writeMemProfRecords( - ProfOStream &OS, - llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord> - &MemProfRecordData, - memprof::MemProfSchema *Schema, memprof::IndexedVersion Version, - llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId> - *MemProfCallStackIndexes = nullptr) { - memprof::RecordWriterTrait RecordWriter(Schema, Version, - MemProfCallStackIndexes); - OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait> - RecordTableGenerator; - for (auto &[GUID, Record] : MemProfRecordData) { - // Insert the key (func hash) and value (memprof record). - RecordTableGenerator.insert(GUID, Record, RecordWriter); - } - // Release the memory of this MapVector as it is no longer needed. - MemProfRecordData.clear(); - - // The call to Emit invokes RecordWriterTrait::EmitData which destructs - // the memprof record copies owned by the RecordTableGenerator. This works - // because the RecordTableGenerator is not used after this point. - return RecordTableGenerator.Emit(OS.OS, RecordWriter); -} - -// Serialize MemProfFrameData. Return FrameTableOffset. -static uint64_t writeMemProfFrames( - ProfOStream &OS, - llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData) { - OnDiskChainedHashTableGenerator<memprof::FrameWriterTrait> - FrameTableGenerator; - for (auto &[FrameId, Frame] : MemProfFrameData) { - // Insert the key (frame id) and value (frame contents). - FrameTableGenerator.insert(FrameId, Frame); - } - // Release the memory of this MapVector as it is no longer needed. - MemProfFrameData.clear(); - - return FrameTableGenerator.Emit(OS.OS); -} - -// Serialize MemProfFrameData. Return the mapping from FrameIds to their -// indexes within the frame array. -static llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> -writeMemProfFrameArray( - ProfOStream &OS, - llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData, - llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram) { - // Mappings from FrameIds to array indexes. - llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes; - - // Compute the order in which we serialize Frames. The order does not matter - // in terms of correctness, but we still compute it for deserialization - // performance. Specifically, if we serialize frequently used Frames one - // after another, we have better cache utilization. For two Frames that - // appear equally frequently, we break a tie by serializing the one that tends - // to appear earlier in call stacks. We implement the tie-breaking mechanism - // by computing the sum of indexes within call stacks for each Frame. If we - // still have a tie, then we just resort to compare two FrameIds, which is - // just for stability of output. - std::vector<std::pair<memprof::FrameId, const memprof::Frame *>> FrameIdOrder; - FrameIdOrder.reserve(MemProfFrameData.size()); - for (const auto &[Id, Frame] : MemProfFrameData) - FrameIdOrder.emplace_back(Id, &Frame); - assert(MemProfFrameData.size() == FrameIdOrder.size()); - llvm::sort(FrameIdOrder, - [&](const std::pair<memprof::FrameId, const memprof::Frame *> &L, - const std::pair<memprof::FrameId, const memprof::Frame *> &R) { - const auto &SL = FrameHistogram[L.first]; - const auto &SR = FrameHistogram[R.first]; - // Popular FrameIds should come first. - if (SL.Count != SR.Count) - return SL.Count > SR.Count; - // If they are equally popular, then the one that tends to appear - // earlier in call stacks should come first. - if (SL.PositionSum != SR.PositionSum) - return SL.PositionSum < SR.PositionSum; - // Compare their FrameIds for sort stability. - return L.first < R.first; - }); - - // Serialize all frames while creating mappings from linear IDs to FrameIds. - uint64_t Index = 0; - MemProfFrameIndexes.reserve(FrameIdOrder.size()); - for (const auto &[Id, F] : FrameIdOrder) { - F->serialize(OS.OS); - MemProfFrameIndexes.insert({Id, Index}); - ++Index; - } - assert(MemProfFrameData.size() == Index); - assert(MemProfFrameData.size() == MemProfFrameIndexes.size()); - - // Release the memory of this MapVector as it is no longer needed. - MemProfFrameData.clear(); - - return MemProfFrameIndexes; -} - -static uint64_t writeMemProfCallStacks( - ProfOStream &OS, - llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>> - &MemProfCallStackData) { - OnDiskChainedHashTableGenerator<memprof::CallStackWriterTrait> - CallStackTableGenerator; - for (auto &[CSId, CallStack] : MemProfCallStackData) - CallStackTableGenerator.insert(CSId, CallStack); - // Release the memory of this vector as it is no longer needed. - MemProfCallStackData.clear(); - - return CallStackTableGenerator.Emit(OS.OS); -} - -static llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId> -writeMemProfCallStackArray( - ProfOStream &OS, - llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>> - &MemProfCallStackData, - llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> - &MemProfFrameIndexes, - llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram, - unsigned &NumElements) { - llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId> - MemProfCallStackIndexes; - - memprof::CallStackRadixTreeBuilder<memprof::FrameId> Builder; - Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes, - FrameHistogram); - for (auto I : Builder.getRadixArray()) - OS.write32(I); - NumElements = Builder.getRadixArray().size(); - MemProfCallStackIndexes = Builder.takeCallStackPos(); - - // Release the memory of this vector as it is no longer needed. - MemProfCallStackData.clear(); - - return MemProfCallStackIndexes; -} - -// Write out MemProf Version2 as follows: -// uint64_t Version -// uint64_t RecordTableOffset = RecordTableGenerator.Emit -// uint64_t FramePayloadOffset = Offset for the frame payload -// uint64_t FrameTableOffset = FrameTableGenerator.Emit -// uint64_t CallStackPayloadOffset = Offset for the call stack payload (NEW V2) -// uint64_t CallStackTableOffset = CallStackTableGenerator.Emit (NEW in V2) -// uint64_t Num schema entries -// uint64_t Schema entry 0 -// uint64_t Schema entry 1 -// .... -// uint64_t Schema entry N - 1 -// OnDiskChainedHashTable MemProfRecordData -// OnDiskChainedHashTable MemProfFrameData -// OnDiskChainedHashTable MemProfCallStackData (NEW in V2) -static Error writeMemProfV2(ProfOStream &OS, - memprof::IndexedMemProfData &MemProfData, - bool MemProfFullSchema) { - OS.write(memprof::Version2); - uint64_t HeaderUpdatePos = OS.tell(); - OS.write(0ULL); // Reserve space for the memprof record table offset. - OS.write(0ULL); // Reserve space for the memprof frame payload offset. - OS.write(0ULL); // Reserve space for the memprof frame table offset. - OS.write(0ULL); // Reserve space for the memprof call stack payload offset. - OS.write(0ULL); // Reserve space for the memprof call stack table offset. - - auto Schema = memprof::getHotColdSchema(); - if (MemProfFullSchema) - Schema = memprof::getFullSchema(); - writeMemProfSchema(OS, Schema); - - uint64_t RecordTableOffset = - writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version2); - - uint64_t FramePayloadOffset = OS.tell(); - uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfData.Frames); - - uint64_t CallStackPayloadOffset = OS.tell(); - uint64_t CallStackTableOffset = - writeMemProfCallStacks(OS, MemProfData.CallStacks); - - uint64_t Header[] = { - RecordTableOffset, FramePayloadOffset, FrameTableOffset, - CallStackPayloadOffset, CallStackTableOffset, - }; - OS.patch({{HeaderUpdatePos, Header}}); - - return Error::success(); -} - -// Write out MemProf Version3 as follows: -// uint64_t Version -// uint64_t CallStackPayloadOffset = Offset for the call stack payload -// uint64_t RecordPayloadOffset = Offset for the record payload -// uint64_t RecordTableOffset = RecordTableGenerator.Emit -// uint64_t Num schema entries -// uint64_t Schema entry 0 -// uint64_t Schema entry 1 -// .... -// uint64_t Schema entry N - 1 -// Frames serialized one after another -// Call stacks encoded as a radix tree -// OnDiskChainedHashTable MemProfRecordData -static Error writeMemProfV3(ProfOStream &OS, - memprof::IndexedMemProfData &MemProfData, - bool MemProfFullSchema) { - OS.write(memprof::Version3); - uint64_t HeaderUpdatePos = OS.tell(); - OS.write(0ULL); // Reserve space for the memprof call stack payload offset. - OS.write(0ULL); // Reserve space for the memprof record payload offset. - OS.write(0ULL); // Reserve space for the memprof record table offset. - - auto Schema = memprof::getHotColdSchema(); - if (MemProfFullSchema) - Schema = memprof::getFullSchema(); - writeMemProfSchema(OS, Schema); - - llvm::DenseMap<memprof::FrameId, memprof::FrameStat> FrameHistogram = - memprof::computeFrameHistogram(MemProfData.CallStacks); - assert(MemProfData.Frames.size() == FrameHistogram.size()); - - llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes = - writeMemProfFrameArray(OS, MemProfData.Frames, FrameHistogram); - - uint64_t CallStackPayloadOffset = OS.tell(); - // The number of elements in the call stack array. - unsigned NumElements = 0; - llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId> - MemProfCallStackIndexes = - writeMemProfCallStackArray(OS, MemProfData.CallStacks, - MemProfFrameIndexes, FrameHistogram, - NumElements); - - uint64_t RecordPayloadOffset = OS.tell(); - uint64_t RecordTableOffset = - writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version3, - &MemProfCallStackIndexes); - - // IndexedMemProfReader::deserializeV3 computes the number of elements in the - // call stack array from the difference between CallStackPayloadOffset and - // RecordPayloadOffset. Verify that the computation works. - assert(CallStackPayloadOffset + - NumElements * sizeof(memprof::LinearFrameId) == - RecordPayloadOffset); - - uint64_t Header[] = { - CallStackPayloadOffset, - RecordPayloadOffset, - RecordTableOffset, - }; - OS.patch({{HeaderUpdatePos, Header}}); - - return Error::success(); -} - -// Write out the MemProf data in a requested version. -static Error writeMemProf(ProfOStream &OS, - memprof::IndexedMemProfData &MemProfData, - memprof::IndexedVersion MemProfVersionRequested, - bool MemProfFullSchema) { - switch (MemProfVersionRequested) { - case memprof::Version2: - return writeMemProfV2(OS, MemProfData, MemProfFullSchema); - case memprof::Version3: - return writeMemProfV3(OS, MemProfData, MemProfFullSchema); - } - - return make_error<InstrProfError>( - instrprof_error::unsupported_version, - formatv("MemProf version {} not supported; " - "requires version between {} and {}, inclusive", - MemProfVersionRequested, memprof::MinimumSupportedVersion, - memprof::MaximumSupportedVersion)); -} - uint64_t InstrProfWriter::writeHeader(const IndexedInstrProf::Header &Header, const bool WritePrevVersion, ProfOStream &OS) { |