diff options
author | Kazu Hirata <kazu@google.com> | 2024-11-14 15:54:55 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-14 15:54:55 -0800 |
commit | 59da1afd2ad74af2a8b8475412353c5d54a7d7f5 (patch) | |
tree | 25842b3ff84fc28031b031019fcaef7526690d4e /llvm/lib/ProfileData/InstrProfWriter.cpp | |
parent | d761b7485dbf0d951db34abcca270c405be1e93a (diff) | |
download | llvm-59da1afd2ad74af2a8b8475412353c5d54a7d7f5.zip llvm-59da1afd2ad74af2a8b8475412353c5d54a7d7f5.tar.gz llvm-59da1afd2ad74af2a8b8475412353c5d54a7d7f5.tar.bz2 |
[memprof] Speed up caller-callee pair extraction (#116184)
We know that the MemProf profile has a lot of duplicate call stacks.
Extracting caller-callee pairs from a call stack we've seen before is
a wasteful effort.
This patch makes the extraction more efficient by first coming up with
a work list of linear call stack IDs -- the set of starting positions
in the radix tree array -- and then extract caller-callee pairs from
each call stack in the work list.
We implement the work list as a bit vector because we expect the work
list to be dense in the range [0, RadixTreeSize). Also, we want the
set insertion to be cheap.
Without this patch, it takes 25 seconds to extract caller-callee pairs
from a large MemProf profile. This patch shortenes that down to 4
seconds.
Diffstat (limited to 'llvm/lib/ProfileData/InstrProfWriter.cpp')
-rw-r--r-- | llvm/lib/ProfileData/InstrProfWriter.cpp | 19 |
1 files changed, 16 insertions, 3 deletions
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index 0ab9f94..4560147 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -601,7 +601,8 @@ writeMemProfCallStackArray( &MemProfCallStackData, llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> &MemProfFrameIndexes, - llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram) { + llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram, + unsigned &NumElements) { llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId> MemProfCallStackIndexes; @@ -610,6 +611,7 @@ writeMemProfCallStackArray( FrameHistogram); for (auto I : Builder.getRadixArray()) OS.write32(I); + NumElements = Builder.getRadixArray().size(); MemProfCallStackIndexes = Builder.takeCallStackPos(); // Release the memory of this vector as it is no longer needed. @@ -771,15 +773,26 @@ static Error writeMemProfV3(ProfOStream &OS, writeMemProfFrameArray(OS, MemProfData.Frames, FrameHistogram); uint64_t CallStackPayloadOffset = OS.tell(); + // The number of elements in the call stack array. + unsigned NumElements = 0; llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId> - MemProfCallStackIndexes = writeMemProfCallStackArray( - OS, MemProfData.CallStacks, MemProfFrameIndexes, FrameHistogram); + MemProfCallStackIndexes = + writeMemProfCallStackArray(OS, MemProfData.CallStacks, + MemProfFrameIndexes, FrameHistogram, + NumElements); uint64_t RecordPayloadOffset = OS.tell(); uint64_t RecordTableOffset = writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version3, &MemProfCallStackIndexes); + // IndexedMemProfReader::deserializeV3 computes the number of elements in the + // call stack array from the difference between CallStackPayloadOffset and + // RecordPayloadOffset. Verify that the computation works. + assert(CallStackPayloadOffset + + NumElements * sizeof(memprof::LinearFrameId) == + RecordPayloadOffset); + uint64_t Header[] = { CallStackPayloadOffset, RecordPayloadOffset, |