diff options
author | Kazu Hirata <kazu@google.com> | 2024-03-23 19:50:15 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-23 19:50:15 -0700 |
commit | 74799f424063a2d751e0f9ea698db1f4efd0d8b2 (patch) | |
tree | 293b0e30843ddc22326ad5540e2ebdba6a183109 /llvm/lib/ProfileData | |
parent | 7c9b5228da94a44f5e3948814d896de537d162bb (diff) | |
download | llvm-74799f424063a2d751e0f9ea698db1f4efd0d8b2.zip llvm-74799f424063a2d751e0f9ea698db1f4efd0d8b2.tar.gz llvm-74799f424063a2d751e0f9ea698db1f4efd0d8b2.tar.bz2 |
[memprof] Add call stack IDs to IndexedAllocationInfo (#85888)
The indexed MemProf file has a huge amount of redundancy. In a large
internal application, 82% of call stacks, stored in
IndexedAllocationInfo::CallStack, are duplicates.
We should work toward deduplicating call stacks by referring to them
with unique IDs with actual call stacks stored in a separate data
structure, much like we refer to memprof::Frame with memprof::FrameId.
At the same time, we need to facilitate a graceful transition from the
current version of the MemProf format to the next. We should be able
to read (but not write) the current version of the MemProf file even
after we move onto the next one.
With those goals in mind, I propose to have an integer ID next to
CallStack in IndexedAllocationInfo to refer to a call stack in a
succinct manner. We'll gradually increase the areas of the compiler
where IDs and call stacks have one-to-one correspondence and
eventually remove the existing CallStack field.
This patch adds call stack ID, named CSId, to IndexedAllocationInfo
and teaches the raw profile reader to compute unique call stack IDs
and store them in the new field. It does not introduce any user of
the call stack IDs yet, except in verifyFunctionProfileData.
Diffstat (limited to 'llvm/lib/ProfileData')
-rw-r--r-- | llvm/lib/ProfileData/MemProf.cpp | 25 | ||||
-rw-r--r-- | llvm/lib/ProfileData/RawMemProfReader.cpp | 6 |
2 files changed, 30 insertions, 1 deletions
diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp index 0461f0e..bffa4ed 100644 --- a/llvm/lib/ProfileData/MemProf.cpp +++ b/llvm/lib/ProfileData/MemProf.cpp @@ -3,8 +3,10 @@ #include "llvm/IR/Function.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/SampleProf.h" +#include "llvm/Support/BLAKE3.h" #include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" +#include "llvm/Support/HashBuilder.h" namespace llvm { namespace memprof { @@ -117,5 +119,28 @@ Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer) { return Result; } +CallStackId hashCallStack(ArrayRef<FrameId> CS) { + llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little> + HashBuilder; + for (FrameId F : CS) + HashBuilder.add(F); + llvm::BLAKE3Result<8> Hash = HashBuilder.final(); + CallStackId CSId; + std::memcpy(&CSId, Hash.data(), sizeof(Hash)); + return CSId; +} + +void verifyFunctionProfileData( + const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> + &FunctionProfileData) { + for (const auto &[GUID, Record] : FunctionProfileData) { + (void)GUID; + for (const auto &AS : Record.AllocSites) { + assert(AS.CSId == hashCallStack(AS.CallStack)); + (void)AS; + } + } +} + } // namespace memprof } // namespace llvm diff --git a/llvm/lib/ProfileData/RawMemProfReader.cpp b/llvm/lib/ProfileData/RawMemProfReader.cpp index 60c37c4..5dc1ff8 100644 --- a/llvm/lib/ProfileData/RawMemProfReader.cpp +++ b/llvm/lib/ProfileData/RawMemProfReader.cpp @@ -446,6 +446,8 @@ Error RawMemProfReader::mapRawProfileToRecords() { Callstack.append(Frames.begin(), Frames.end()); } + CallStackId CSId = hashCallStack(Callstack); + // We attach the memprof record to each function bottom-up including the // first non-inline frame. for (size_t I = 0; /*Break out using the condition below*/; I++) { @@ -453,7 +455,7 @@ Error RawMemProfReader::mapRawProfileToRecords() { auto Result = FunctionProfileData.insert({F.Function, IndexedMemProfRecord()}); IndexedMemProfRecord &Record = Result.first->second; - Record.AllocSites.emplace_back(Callstack, Entry.second); + Record.AllocSites.emplace_back(Callstack, CSId, Entry.second); if (!F.IsInlineFrame) break; @@ -471,6 +473,8 @@ Error RawMemProfReader::mapRawProfileToRecords() { } } + verifyFunctionProfileData(FunctionProfileData); + return Error::success(); } |