[memprof] Add Version2 of IndexedMemProfRecord serialization (#87455)

I'm currently developing a new version of the indexed memprof format where we deduplicate call stacks in IndexedAllocationInfo::CallStack and IndexedMemProfRecord::CallSites. We refer to call stacks with integer IDs, namely CallStackId, just as we refer to Frame with FrameId. The deduplication will cut down the profile file size by 80% in a large memprof file of mine. As a step toward the goal, this patch teaches IndexedMemProfRecord::{serialize,deserialize} to speak Version2. A subsequent patch will add Version2 support to llvm-profdata. The essense of the patch is to replace the serialization of a call stack, a vector of FrameIDs, with that of a CallStackId. That is: const IndexedAllocationInfo &N = ...; ... LE.write<uint64_t>(N.CallStack.size()); for (const FrameId &Id : N.CallStack) LE.write<FrameId>(Id); becomes: LE.write<CallStackId>(N.CSId);
author: Kazu Hirata <kazu@google.com> 2024-04-03 21:48:38 -0700
committer: GitHub <noreply@github.com> 2024-04-03 21:48:38 -0700
commit: d89914f30bc7c180fe349a5aa0f03438ae6c20a4 (patch)
tree: bd2319e84ed6e6cd439d6f97c12c137d96179ce5 /llvm/unittests/ProfileData/MemProfTest.cpp
parent: 3a7b5223a6639e497c856368da11b5d74ec9d6e8 (diff)
download: llvm-d89914f30bc7c180fe349a5aa0f03438ae6c20a4.zip
llvm-d89914f30bc7c180fe349a5aa0f03438ae6c20a4.tar.gz
llvm-d89914f30bc7c180fe349a5aa0f03438ae6c20a4.tar.bz2
1 files changed, 38 insertions, 3 deletions
diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp
index 1cca44e..f1aa6f3 100644
--- a/llvm/unittests/ProfileData/MemProfTest.cpp
+++ b/llvm/unittests/ProfileData/MemProfTest.cpp
@@ -265,7 +265,9 @@ TEST(MemProf, PortableWrapper) {
   EXPECT_EQ(3UL, ReadBlock.getAllocCpuId());
 }
 
-TEST(MemProf, RecordSerializationRoundTrip) {
+// Version0 and Version1 serialize IndexedMemProfRecord in the same format, so
+// we share one test.
+TEST(MemProf, RecordSerializationRoundTripVersion0And1) {
   const MemProfSchema Schema = getFullSchema();
 
   MemInfoBlock Info(/*size=*/16, /*access_count=*/7, /*alloc_timestamp=*/1000,
@@ -284,14 +286,47 @@ TEST(MemProf, RecordSerializationRoundTrip) {
                                    Info);
   }
   Record.CallSites.assign(CallSites);
+  for (const auto &CS : CallSites)
+    Record.CallSiteIds.push_back(llvm::memprof::hashCallStack(CS));
 
   std::string Buffer;
   llvm::raw_string_ostream OS(Buffer);
-  Record.serialize(Schema, OS);
+  Record.serialize(Schema, OS, llvm::memprof::Version0);
   OS.flush();
 
   const IndexedMemProfRecord GotRecord = IndexedMemProfRecord::deserialize(
-      Schema, reinterpret_cast<const unsigned char *>(Buffer.data()));
+      Schema, reinterpret_cast<const unsigned char *>(Buffer.data()),
+      llvm::memprof::Version0);
+
+  EXPECT_EQ(Record, GotRecord);
+}
+
+TEST(MemProf, RecordSerializationRoundTripVerion2) {
+  const MemProfSchema Schema = getFullSchema();
+
+  MemInfoBlock Info(/*size=*/16, /*access_count=*/7, /*alloc_timestamp=*/1000,
+                    /*dealloc_timestamp=*/2000, /*alloc_cpu=*/3,
+                    /*dealloc_cpu=*/4);
+
+  llvm::SmallVector<llvm::memprof::CallStackId> CallStackIds = {0x123, 0x456};
+
+  llvm::SmallVector<llvm::memprof::CallStackId> CallSiteIds = {0x333, 0x444};
+
+  IndexedMemProfRecord Record;
+  for (const auto &CSId : CallStackIds) {
+    // Use the same info block for both allocation sites.
+    Record.AllocSites.emplace_back(llvm::SmallVector<FrameId>(), CSId, Info);
+  }
+  Record.CallSiteIds.assign(CallSiteIds);
+
+  std::string Buffer;
+  llvm::raw_string_ostream OS(Buffer);
+  Record.serialize(Schema, OS, llvm::memprof::Version2);
+  OS.flush();
+
+  const IndexedMemProfRecord GotRecord = IndexedMemProfRecord::deserialize(
+      Schema, reinterpret_cast<const unsigned char *>(Buffer.data()),
+      llvm::memprof::Version2);
 
   EXPECT_EQ(Record, GotRecord);
 }
author	Kazu Hirata <kazu@google.com>	2024-04-03 21:48:38 -0700
committer	GitHub <noreply@github.com>	2024-04-03 21:48:38 -0700
commit	d89914f30bc7c180fe349a5aa0f03438ae6c20a4 (patch)
tree	bd2319e84ed6e6cd439d6f97c12c137d96179ce5 /llvm/unittests/ProfileData/MemProfTest.cpp
parent	3a7b5223a6639e497c856368da11b5d74ec9d6e8 (diff)
download	llvm-d89914f30bc7c180fe349a5aa0f03438ae6c20a4.zip llvm-d89914f30bc7c180fe349a5aa0f03438ae6c20a4.tar.gz llvm-d89914f30bc7c180fe349a5aa0f03438ae6c20a4.tar.bz2