diff options
author | Kazu Hirata <kazu@google.com> | 2024-11-13 23:40:12 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-13 23:40:12 -0800 |
commit | 9a730d878e96e2a992f337acc94f897d47c920e3 (patch) | |
tree | 070afad72773cf5211d4d8f2a9e9bc668bed7cfc /llvm/unittests/ProfileData/InstrProfTest.cpp | |
parent | 627b8f87e2c499c62df2e9bd6048f795fd085545 (diff) | |
download | llvm-9a730d878e96e2a992f337acc94f897d47c920e3.zip llvm-9a730d878e96e2a992f337acc94f897d47c920e3.tar.gz llvm-9a730d878e96e2a992f337acc94f897d47c920e3.tar.bz2 |
[memprof] Add IndexedMemProfReader::getMemProfCallerCalleePairs (#115807)
Undrifting the MemProf profile requires two sets of information:
- caller-callee pairs from the profile
- callee-callee pairs from the IR
This patch adds a function to do the former. The latter has been
addressed by extractCallsFromIR.
Unfortunately, the current MemProf format does not directly give us
the caller-callee pairs from the profile. "struct Frame" just tells
us where the call site is -- Caller GUID and line/column numbers; it
doesn't tell us what function a given Frame is calling. To extract
caller-callee pairs, we need to scan each call stack, look at two
adjacent Frames, and extract a caller-callee pair.
Conceptually, we would extract caller-callee pairs with:
for each MemProfRecord in the profile:
for each call stack in AllocSites:
extract caller-callee pairs from adjacent pairs of Frames
However, this is highly inefficient. Obtaining MemProfRecord involves
looking up the OnDiskHashTable, allocating several vectors on the
heap, and populating fields that are irrelevant to us, such as MIB and
CallSites.
This patch adds an efficient way of doing the above. Specifically, we
- go though all IndexedMemProfRecords,
- look at each linear call stack ID
- extract caller-callee pairs from each call stack
The extraction is done by a new class CallerCalleePairExtractor,
modified from LinearCallStackIdConverter, which reconstructs a call
stack from the radix tree array. For our purposes, we skip the
reconstruction and immediately populates the data structure for
caller-callee pairs.
The resulting caller-callee-pairs is of the type:
DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>> CallerCalleePairs;
which can be passed directly to longestCommonSequence just like the
result of extractCallsFromIR.
Further performance optimizations are possible for the new functions
in this patch. I'll address those in follow-up patches.
Diffstat (limited to 'llvm/unittests/ProfileData/InstrProfTest.cpp')
-rw-r--r-- | llvm/unittests/ProfileData/InstrProfTest.cpp | 62 |
1 files changed, 62 insertions, 0 deletions
diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp index 7fdfd15..cf3cf7f 100644 --- a/llvm/unittests/ProfileData/InstrProfTest.cpp +++ b/llvm/unittests/ProfileData/InstrProfTest.cpp @@ -580,6 +580,68 @@ TEST_F(InstrProfTest, test_memprof_v2_partial_schema) { EXPECT_THAT(WantRecord, EqualsRecord(Record)); } +TEST_F(InstrProfTest, test_caller_callee_pairs) { + const MemInfoBlock MIB = makePartialMIB(); + + Writer.setMemProfVersionRequested(memprof::Version3); + Writer.setMemProfFullSchema(false); + + ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::MemProf), + Succeeded()); + + // Call Hierarchy + // + // Function GUID:0x123 + // Line: 1, Column: 2 + // Function GUID: 0x234 + // Line: 3, Column: 4 + // new(...) + // Line: 5, Column: 6 + // Function GUID: 0x345 + // Line: 7, Column: 8 + // new(...) + + const std::pair<memprof::FrameId, memprof::Frame> Frames[] = { + {0, {0x123, 1, 2, false}}, + {1, {0x234, 3, 4, true}}, + {2, {0x123, 5, 6, false}}, + {3, {0x345, 7, 8, true}}}; + for (const auto &[FrameId, Frame] : Frames) + Writer.addMemProfFrame(FrameId, Frame, Err); + + const std::pair<memprof::CallStackId, SmallVector<memprof::FrameId>> + CallStacks[] = {{0x111, {1, 0}}, {0x222, {3, 2}}}; + for (const auto &[CSId, CallStack] : CallStacks) + Writer.addMemProfCallStack(CSId, CallStack, Err); + + const IndexedMemProfRecord IndexedMR = makeRecordV2( + /*AllocFrames=*/{0x111, 0x222}, + /*CallSiteFrames=*/{}, MIB, memprof::getHotColdSchema()); + Writer.addMemProfRecord(/*Id=*/0x9999, IndexedMR); + + auto Profile = Writer.writeBuffer(); + readProfile(std::move(Profile)); + + auto Pairs = Reader->getMemProfCallerCalleePairs(); + ASSERT_THAT(Pairs, SizeIs(3)); + + auto It = Pairs.find(0x123); + ASSERT_NE(It, Pairs.end()); + ASSERT_THAT(It->second, SizeIs(2)); + EXPECT_THAT(It->second[0], testing::Pair(testing::FieldsAre(1U, 2U), 0x234U)); + EXPECT_THAT(It->second[1], testing::Pair(testing::FieldsAre(5U, 6U), 0x345U)); + + It = Pairs.find(0x234); + ASSERT_NE(It, Pairs.end()); + ASSERT_THAT(It->second, SizeIs(1)); + EXPECT_THAT(It->second[0], testing::Pair(testing::FieldsAre(3U, 4U), 0U)); + + It = Pairs.find(0x345); + ASSERT_NE(It, Pairs.end()); + ASSERT_THAT(It->second, SizeIs(1)); + EXPECT_THAT(It->second[0], testing::Pair(testing::FieldsAre(7U, 8U), 0U)); +} + TEST_F(InstrProfTest, test_memprof_getrecord_error) { ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::MemProf), Succeeded()); |