aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Utils/CloneFunction.cpp
diff options
context:
space:
mode:
authorTeresa Johnson <tejohnson@google.com>2024-10-03 08:06:56 -0700
committerGitHub <noreply@github.com>2024-10-03 08:06:56 -0700
commit79b32bcda662a3e7789ad2835a021020fd2a5158 (patch)
treee5b349528d9b018aa7177d1e6b02c5fc62a4219d /llvm/lib/Transforms/Utils/CloneFunction.cpp
parentdce5bf8efc13896ebf0ababfda00393eaa5cc99d (diff)
downloadllvm-79b32bcda662a3e7789ad2835a021020fd2a5158.zip
llvm-79b32bcda662a3e7789ad2835a021020fd2a5158.tar.gz
llvm-79b32bcda662a3e7789ad2835a021020fd2a5158.tar.bz2
[MemProf] Strip callsite metadata when inlining an unprofiled callsite (#110998)
We weren't flagging inlined callee functions with callsite but not memprof metadata correctly, leading to the callsite metadata not being stripped when that function was inlined into a callsite that didn't itself have callsite metadata. In practice, this meant that we went into the LTO link with many more calls than necessary having callsite metadata / summary records, which in turn made the graph larger than necessary. Fixing this oversight resulted in huge reductions in the thin link of a large target: 99% fewer duplicated context ids (recall we have to duplicate when callsites containing the same stack ids are in different functions) 71% fewer graph edges 17% fewer graph nodes 13% fewer functions cloned 44% smaller peak memory 47% smaller time
Diffstat (limited to 'llvm/lib/Transforms/Utils/CloneFunction.cpp')
-rw-r--r--llvm/lib/Transforms/Utils/CloneFunction.cpp2
1 files changed, 2 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index dc9ca14..fc03643 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -70,6 +70,7 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
if (isa<CallInst>(I) && !I.isDebugOrPseudoInst()) {
hasCalls = true;
hasMemProfMetadata |= I.hasMetadata(LLVMContext::MD_memprof);
+ hasMemProfMetadata |= I.hasMetadata(LLVMContext::MD_callsite);
}
if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
if (!AI->isStaticAlloca()) {
@@ -556,6 +557,7 @@ void PruningFunctionCloner::CloneBlock(
if (isa<CallInst>(II) && !II->isDebugOrPseudoInst()) {
hasCalls = true;
hasMemProfMetadata |= II->hasMetadata(LLVMContext::MD_memprof);
+ hasMemProfMetadata |= II->hasMetadata(LLVMContext::MD_callsite);
}
CloneDbgRecordsToHere(NewInst, II);