From ff7b42c194e0fa23e6a76f0a33a80c0c3af14e7d Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 24 Nov 2024 21:08:54 -0800 Subject: [memprof] Speed up llvm-profdata (#117446) CallStackRadixTreeBuilder::build takes the parameter MemProfFrameIndexes by value, involving copies: std::optional> MemProfFrameIndexes Then "build" makes another copy of MemProfFrameIndexe and passes it to encodeCallStack for every call stack, which is painfully slow. This patch changes the type to a pointer so that we don't have to make a copy every time we pass the argument. Without this patch, it takes 553 seconds to run "llvm-profdata merge" on a large MemProf raw profile. This patch shortenes that down to 67 seconds. --- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'llvm/lib/Bitcode/Writer/BitcodeWriter.cpp') diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 8f22a50..63f4e34 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -4235,7 +4235,7 @@ static DenseMap writeMemoryProfileRadixTree( CallStackRadixTreeBuilder Builder; // We don't need a MemProfFrameIndexes map as we have already converted the // full stack id hash to a linear offset into the StackIds array. - Builder.build(std::move(CallStacks), /*MemProfFrameIndexes=*/std::nullopt, + Builder.build(std::move(CallStacks), /*MemProfFrameIndexes=*/nullptr, FrameHistogram); Stream.EmitRecord(bitc::FS_CONTEXT_RADIX_TREE_ARRAY, Builder.getRadixArray(), RadixAbbrev); -- cgit v1.1