aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/ProfileData/InstrProfWriter.cpp
diff options
context:
space:
mode:
authorKazu Hirata <kazu@google.com>2024-11-14 15:54:55 -0800
committerGitHub <noreply@github.com>2024-11-14 15:54:55 -0800
commit59da1afd2ad74af2a8b8475412353c5d54a7d7f5 (patch)
tree25842b3ff84fc28031b031019fcaef7526690d4e /llvm/lib/ProfileData/InstrProfWriter.cpp
parentd761b7485dbf0d951db34abcca270c405be1e93a (diff)
downloadllvm-59da1afd2ad74af2a8b8475412353c5d54a7d7f5.zip
llvm-59da1afd2ad74af2a8b8475412353c5d54a7d7f5.tar.gz
llvm-59da1afd2ad74af2a8b8475412353c5d54a7d7f5.tar.bz2
[memprof] Speed up caller-callee pair extraction (#116184)
We know that the MemProf profile has a lot of duplicate call stacks. Extracting caller-callee pairs from a call stack we've seen before is a wasteful effort. This patch makes the extraction more efficient by first coming up with a work list of linear call stack IDs -- the set of starting positions in the radix tree array -- and then extract caller-callee pairs from each call stack in the work list. We implement the work list as a bit vector because we expect the work list to be dense in the range [0, RadixTreeSize). Also, we want the set insertion to be cheap. Without this patch, it takes 25 seconds to extract caller-callee pairs from a large MemProf profile. This patch shortenes that down to 4 seconds.
Diffstat (limited to 'llvm/lib/ProfileData/InstrProfWriter.cpp')
-rw-r--r--llvm/lib/ProfileData/InstrProfWriter.cpp19
1 files changed, 16 insertions, 3 deletions
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 0ab9f94..4560147 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -601,7 +601,8 @@ writeMemProfCallStackArray(
&MemProfCallStackData,
llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId>
&MemProfFrameIndexes,
- llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram) {
+ llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram,
+ unsigned &NumElements) {
llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
MemProfCallStackIndexes;
@@ -610,6 +611,7 @@ writeMemProfCallStackArray(
FrameHistogram);
for (auto I : Builder.getRadixArray())
OS.write32(I);
+ NumElements = Builder.getRadixArray().size();
MemProfCallStackIndexes = Builder.takeCallStackPos();
// Release the memory of this vector as it is no longer needed.
@@ -771,15 +773,26 @@ static Error writeMemProfV3(ProfOStream &OS,
writeMemProfFrameArray(OS, MemProfData.Frames, FrameHistogram);
uint64_t CallStackPayloadOffset = OS.tell();
+ // The number of elements in the call stack array.
+ unsigned NumElements = 0;
llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
- MemProfCallStackIndexes = writeMemProfCallStackArray(
- OS, MemProfData.CallStacks, MemProfFrameIndexes, FrameHistogram);
+ MemProfCallStackIndexes =
+ writeMemProfCallStackArray(OS, MemProfData.CallStacks,
+ MemProfFrameIndexes, FrameHistogram,
+ NumElements);
uint64_t RecordPayloadOffset = OS.tell();
uint64_t RecordTableOffset =
writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version3,
&MemProfCallStackIndexes);
+ // IndexedMemProfReader::deserializeV3 computes the number of elements in the
+ // call stack array from the difference between CallStackPayloadOffset and
+ // RecordPayloadOffset. Verify that the computation works.
+ assert(CallStackPayloadOffset +
+ NumElements * sizeof(memprof::LinearFrameId) ==
+ RecordPayloadOffset);
+
uint64_t Header[] = {
CallStackPayloadOffset,
RecordPayloadOffset,