diff options
author | Kazu Hirata <kazu@google.com> | 2024-11-26 23:48:03 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-26 23:48:03 -0800 |
commit | c00e53208db638c35499fc80b555f8e14baa35f0 (patch) | |
tree | 6afae1dd49aec877b23bbd36e60d40c3ec8c0497 | |
parent | 50dfb0772b3995c6e5543600b2813f56d0736442 (diff) | |
download | llvm-c00e53208db638c35499fc80b555f8e14baa35f0.zip llvm-c00e53208db638c35499fc80b555f8e14baa35f0.tar.gz llvm-c00e53208db638c35499fc80b555f8e14baa35f0.tar.bz2 |
[memprof] Add YAML-based deserialization for MemProf profile (#117829)
This patch adds YAML-based deserialization for MemProf profile.
It's been painful to write tests for MemProf passes because we do not
have a text format for the MemProf profile. We would write a test
case in C++, run it for a binary MemProf profile, and then finally run
a test written in LLVM IR with the binary profile.
This patch paves the way toward YAML-based MemProf profile.
Specifically, it adds new class YAMLMemProfReader derived from
MemProfReader. For now, it only adds a function to parse StringRef
pointing to YAML data. Subseqeunt patches will wire it to
llvm-profdata and read from a file.
The field names are based on various printYAML functions in MemProf.h.
I'm not aiming for compatibility with the format used in printYAML,
but I don't see a point in changing the field names.
-rw-r--r-- | llvm/include/llvm/ProfileData/MemProf.h | 22 | ||||
-rw-r--r-- | llvm/include/llvm/ProfileData/MemProfReader.h | 6 | ||||
-rw-r--r-- | llvm/lib/ProfileData/MemProfReader.cpp | 120 | ||||
-rw-r--r-- | llvm/unittests/ProfileData/MemProfTest.cpp | 77 |
4 files changed, 225 insertions, 0 deletions
diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index d64addc..3b631fa 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -19,6 +19,10 @@ #include <optional> namespace llvm { +namespace yaml { +template <typename T> struct CustomMappingTraits; +} // namespace yaml + namespace memprof { struct MemProfRecord; @@ -193,6 +197,9 @@ struct PortableMemInfoBlock { return Result; } + // Give YAML access to the individual MIB fields. + friend struct yaml::CustomMappingTraits<memprof::PortableMemInfoBlock>; + private: // The set of available fields, indexed by Meta::Name. std::bitset<llvm::to_underlying(Meta::Size)> Schema; @@ -362,6 +369,8 @@ struct IndexedAllocationInfo { IndexedAllocationInfo(CallStackId CSId, const MemInfoBlock &MB, const MemProfSchema &Schema = getFullSchema()) : CSId(CSId), Info(MB, Schema) {} + IndexedAllocationInfo(CallStackId CSId, const PortableMemInfoBlock &MB) + : CSId(CSId), Info(MB) {} // Returns the size in bytes when this allocation info struct is serialized. size_t serializedSize(const MemProfSchema &Schema, @@ -498,6 +507,19 @@ struct MemProfRecord { } }; +// Helper struct for AllMemProfData. In YAML, we treat the GUID and the fields +// within MemProfRecord at the same level as if the GUID were part of +// MemProfRecord. +struct GUIDMemProfRecordPair { + GlobalValue::GUID GUID; + MemProfRecord Record; +}; + +// The top-level data structure, only used with YAML for now. +struct AllMemProfData { + std::vector<GUIDMemProfRecordPair> HeapProfileRecords; +}; + // Reads a memprof schema from a buffer. All entries in the buffer are // interpreted as uint64_t. The first entry in the buffer denotes the number of // ids in the schema. Subsequent entries are integers which map to memprof::Meta diff --git a/llvm/include/llvm/ProfileData/MemProfReader.h b/llvm/include/llvm/ProfileData/MemProfReader.h index bf2f915..0529f79 100644 --- a/llvm/include/llvm/ProfileData/MemProfReader.h +++ b/llvm/include/llvm/ProfileData/MemProfReader.h @@ -209,6 +209,12 @@ private: // A mapping of the hash to symbol name, only used if KeepSymbolName is true. llvm::DenseMap<uint64_t, std::string> GuidToSymbolName; }; + +class YAMLMemProfReader final : public MemProfReader { +public: + YAMLMemProfReader() = default; + void parse(StringRef YAMLData); +}; } // namespace memprof } // namespace llvm diff --git a/llvm/lib/ProfileData/MemProfReader.cpp b/llvm/lib/ProfileData/MemProfReader.cpp index 921ff34..16729c8 100644 --- a/llvm/lib/ProfileData/MemProfReader.cpp +++ b/llvm/lib/ProfileData/MemProfReader.cpp @@ -40,6 +40,85 @@ #include "llvm/Support/Path.h" #define DEBUG_TYPE "memprof" + +namespace llvm { +namespace yaml { +template <> struct MappingTraits<memprof::Frame> { + static void mapping(IO &Io, memprof::Frame &F) { + Io.mapRequired("Function", F.Function); + Io.mapRequired("LineOffset", F.LineOffset); + Io.mapRequired("Column", F.Column); + Io.mapRequired("Inline", F.IsInlineFrame); + + // Assert that the definition of Frame matches what we expect. The + // structured bindings below detect changes to the number of fields. + // static_assert checks the type of each field. + const auto &[Function, SymbolName, LineOffset, Column, IsInlineFrame] = F; + static_assert( + std::is_same_v<remove_cvref_t<decltype(Function)>, GlobalValue::GUID>); + static_assert(std::is_same_v<remove_cvref_t<decltype(SymbolName)>, + std::unique_ptr<std::string>>); + static_assert( + std::is_same_v<remove_cvref_t<decltype(LineOffset)>, uint32_t>); + static_assert(std::is_same_v<remove_cvref_t<decltype(Column)>, uint32_t>); + static_assert( + std::is_same_v<remove_cvref_t<decltype(IsInlineFrame)>, bool>); + } +}; + +template <> struct CustomMappingTraits<memprof::PortableMemInfoBlock> { + static void inputOne(IO &Io, StringRef KeyStr, + memprof::PortableMemInfoBlock &MIB) { + // PortableMemInfoBlock keeps track of the set of fields that actually have + // values. We update the set here as we receive a key-value pair from the + // YAML document. +#define MIBEntryDef(NameTag, Name, Type) \ + if (KeyStr == #Name) { \ + Io.mapRequired(KeyStr.str().c_str(), MIB.Name); \ + MIB.Schema.set(llvm::to_underlying(memprof::Meta::Name)); \ + return; \ + } +#include "llvm/ProfileData/MIBEntryDef.inc" +#undef MIBEntryDef + Io.setError("Key is not a valid validation event"); + } + + static void output(IO &Io, memprof::PortableMemInfoBlock &VI) { + llvm_unreachable("To be implemented"); + } +}; + +template <> struct MappingTraits<memprof::AllocationInfo> { + static void mapping(IO &Io, memprof::AllocationInfo &AI) { + Io.mapRequired("Callstack", AI.CallStack); + Io.mapRequired("MemInfoBlock", AI.Info); + } +}; + +// In YAML, we use GUIDMemProfRecordPair instead of MemProfRecord so that we can +// treat the GUID and the fields within MemProfRecord at the same level as if +// the GUID were part of MemProfRecord. +template <> struct MappingTraits<memprof::GUIDMemProfRecordPair> { + static void mapping(IO &Io, memprof::GUIDMemProfRecordPair &Pair) { + Io.mapRequired("GUID", Pair.GUID); + Io.mapRequired("AllocSites", Pair.Record.AllocSites); + Io.mapRequired("CallSites", Pair.Record.CallSites); + } +}; + +template <> struct MappingTraits<memprof::AllMemProfData> { + static void mapping(IO &Io, memprof::AllMemProfData &Data) { + Io.mapRequired("HeapProfileRecords", Data.HeapProfileRecords); + } +}; +} // namespace yaml +} // namespace llvm + +LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::Frame) +LLVM_YAML_IS_SEQUENCE_VECTOR(std::vector<memprof::Frame>) +LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::AllocationInfo) +LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDMemProfRecordPair) + namespace llvm { namespace memprof { namespace { @@ -756,5 +835,46 @@ Error RawMemProfReader::readNextRecord( }; return MemProfReader::readNextRecord(GuidRecord, IdToFrameCallback); } + +void YAMLMemProfReader::parse(StringRef YAMLData) { + memprof::AllMemProfData Doc; + yaml::Input Yin(YAMLData); + + Yin >> Doc; + if (Yin.error()) + return; + + // Add a call stack to MemProfData.CallStacks and return its CallStackId. + auto AddCallStack = [&](ArrayRef<Frame> CallStack) -> CallStackId { + SmallVector<FrameId> IndexedCallStack; + IndexedCallStack.reserve(CallStack.size()); + for (const Frame &F : CallStack) { + FrameId Id = F.hash(); + MemProfData.Frames.try_emplace(Id, F); + IndexedCallStack.push_back(Id); + } + CallStackId CSId = hashCallStack(IndexedCallStack); + MemProfData.CallStacks.try_emplace(CSId, std::move(IndexedCallStack)); + return CSId; + }; + + for (const auto &[GUID, Record] : Doc.HeapProfileRecords) { + IndexedMemProfRecord IndexedRecord; + + // Convert AllocationInfo to IndexedAllocationInfo. + for (const AllocationInfo &AI : Record.AllocSites) { + CallStackId CSId = AddCallStack(AI.CallStack); + IndexedRecord.AllocSites.emplace_back(CSId, AI.Info); + } + + // Populate CallSiteIds. + for (const auto &CallSite : Record.CallSites) { + CallStackId CSId = AddCallStack(CallSite); + IndexedRecord.CallSiteIds.push_back(CSId); + } + + MemProfData.Records.try_emplace(GUID, std::move(IndexedRecord)); + } +} } // namespace memprof } // namespace llvm diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp index b3b6249..5ab860e 100644 --- a/llvm/unittests/ProfileData/MemProfTest.cpp +++ b/llvm/unittests/ProfileData/MemProfTest.cpp @@ -34,6 +34,7 @@ using ::llvm::memprof::CallStackId; using ::llvm::memprof::CallStackMap; using ::llvm::memprof::Frame; using ::llvm::memprof::FrameId; +using ::llvm::memprof::hashCallStack; using ::llvm::memprof::IndexedAllocationInfo; using ::llvm::memprof::IndexedMemProfRecord; using ::llvm::memprof::MemInfoBlock; @@ -46,8 +47,11 @@ using ::llvm::memprof::RawMemProfReader; using ::llvm::memprof::SegmentEntry; using ::llvm::object::SectionedAddress; using ::llvm::symbolize::SymbolizableModule; +using ::testing::ElementsAre; +using ::testing::Pair; using ::testing::Return; using ::testing::SizeIs; +using ::testing::UnorderedElementsAre; class MockSymbolizer : public SymbolizableModule { public: @@ -742,4 +746,77 @@ TEST(MemProf, RadixTreeBuilderSuccessiveJumps) { EXPECT_THAT(Mappings, testing::Contains(testing::Pair( llvm::memprof::hashCallStack(CS4), 10U))); } + +// Verify that we can parse YAML and retrieve IndexedMemProfData as expected. +TEST(MemProf, YAMLParser) { + StringRef YAMLData = R"YAML( +--- +HeapProfileRecords: +- GUID: 0xdeadbeef12345678 + AllocSites: + - Callstack: + - {Function: 0x100, LineOffset: 11, Column: 10, Inline: true} + - {Function: 0x200, LineOffset: 22, Column: 20, Inline: false} + MemInfoBlock: + AllocCount: 777 + TotalSize: 888 + - Callstack: + - {Function: 0x300, LineOffset: 33, Column: 30, Inline: false} + - {Function: 0x400, LineOffset: 44, Column: 40, Inline: true} + MemInfoBlock: + AllocCount: 666 + TotalSize: 555 + CallSites: + - - {Function: 0x500, LineOffset: 55, Column: 50, Inline: true} + - {Function: 0x600, LineOffset: 66, Column: 60, Inline: false} + - - {Function: 0x700, LineOffset: 77, Column: 70, Inline: true} + - {Function: 0x800, LineOffset: 88, Column: 80, Inline: false} +)YAML"; + + llvm::memprof::YAMLMemProfReader YAMLReader; + YAMLReader.parse(YAMLData); + llvm::memprof::IndexedMemProfData MemProfData = YAMLReader.takeMemProfData(); + + Frame F1(0x100, 11, 10, true); + Frame F2(0x200, 22, 20, false); + Frame F3(0x300, 33, 30, false); + Frame F4(0x400, 44, 40, true); + Frame F5(0x500, 55, 50, true); + Frame F6(0x600, 66, 60, false); + Frame F7(0x700, 77, 70, true); + Frame F8(0x800, 88, 80, false); + + llvm::SmallVector<FrameId> CS1 = {F1.hash(), F2.hash()}; + llvm::SmallVector<FrameId> CS2 = {F3.hash(), F4.hash()}; + llvm::SmallVector<FrameId> CS3 = {F5.hash(), F6.hash()}; + llvm::SmallVector<FrameId> CS4 = {F7.hash(), F8.hash()}; + + // Verify the entire contents of MemProfData.Frames. + EXPECT_THAT(MemProfData.Frames, + UnorderedElementsAre(Pair(F1.hash(), F1), Pair(F2.hash(), F2), + Pair(F3.hash(), F3), Pair(F4.hash(), F4), + Pair(F5.hash(), F5), Pair(F6.hash(), F6), + Pair(F7.hash(), F7), Pair(F8.hash(), F8))); + + // Verify the entire contents of MemProfData.Frames. + EXPECT_THAT(MemProfData.CallStacks, + UnorderedElementsAre(Pair(hashCallStack(CS1), CS1), + Pair(hashCallStack(CS2), CS2), + Pair(hashCallStack(CS3), CS3), + Pair(hashCallStack(CS4), CS4))); + + // Verify the entire contents of MemProfData.Records. + ASSERT_THAT(MemProfData.Records, SizeIs(1)); + const auto &[GUID, Record] = *MemProfData.Records.begin(); + EXPECT_EQ(GUID, 0xdeadbeef12345678ULL); + ASSERT_THAT(Record.AllocSites, SizeIs(2)); + EXPECT_EQ(Record.AllocSites[0].CSId, hashCallStack(CS1)); + EXPECT_EQ(Record.AllocSites[0].Info.getAllocCount(), 777U); + EXPECT_EQ(Record.AllocSites[0].Info.getTotalSize(), 888U); + EXPECT_EQ(Record.AllocSites[1].CSId, hashCallStack(CS2)); + EXPECT_EQ(Record.AllocSites[1].Info.getAllocCount(), 666U); + EXPECT_EQ(Record.AllocSites[1].Info.getTotalSize(), 555U); + EXPECT_THAT(Record.CallSiteIds, + ElementsAre(hashCallStack(CS3), hashCallStack(CS4))); +} } // namespace |