diff options
author | Teresa Johnson <tejohnson@google.com> | 2024-10-11 13:53:35 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-10-11 13:53:35 -0700 |
commit | 1de71652fd232163dadfee68e2f2b3f0d6dfb1e1 (patch) | |
tree | 0493f5b192aebd69fc2b1aeec1335d994146537b /llvm/lib/Analysis/ModuleSummaryAnalysis.cpp | |
parent | 111b062f63ba52552f41e425449ba1db048dc51a (diff) | |
download | llvm-1de71652fd232163dadfee68e2f2b3f0d6dfb1e1.zip llvm-1de71652fd232163dadfee68e2f2b3f0d6dfb1e1.tar.gz llvm-1de71652fd232163dadfee68e2f2b3f0d6dfb1e1.tar.bz2 |
[MemProf] Support cloning for indirect calls with ThinLTO (#110625)
This patch enables support for cloning in indirect callsites.
This is done by synthesizing callsite records for each virtual call
target from the profile metadata. In the thin link all the synthesized
records for a particular indirect callsite initially share the same
context node, but support is added to partition the callsites and
outgoing edges based on the callee function, creating a separate node
for each target.
In the LTO backend, when cloning is needed we first perform indirect
call promotion, then change the target of the new direct call to the
desired clone.
Note this is ThinLTO-specific, since for regular LTO indirect call
promotion should have already occurred.
Diffstat (limited to 'llvm/lib/Analysis/ModuleSummaryAnalysis.cpp')
-rw-r--r-- | llvm/lib/Analysis/ModuleSummaryAnalysis.cpp | 59 |
1 files changed, 36 insertions, 23 deletions
diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp index 2d4961d..1bd9ee6 100644 --- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -81,6 +81,11 @@ static cl::opt<std::string> ModuleSummaryDotFile( "module-summary-dot-file", cl::Hidden, cl::value_desc("filename"), cl::desc("File to emit dot graph of new summary into")); +static cl::opt<bool> EnableMemProfIndirectCallSupport( + "enable-memprof-indirect-call-support", cl::init(true), cl::Hidden, + cl::desc( + "Enable MemProf support for summarizing and cloning indirect calls")); + extern cl::opt<bool> ScalePartialSampleProfileWorkingSetSize; extern cl::opt<unsigned> MaxNumVTableAnnotations; @@ -404,6 +409,11 @@ static void computeFunctionSummary( if (HasLocalsInUsedOrAsm && CI && CI->isInlineAsm()) HasInlineAsmMaybeReferencingInternal = true; + // Compute this once per indirect call. + uint32_t NumCandidates = 0; + uint64_t TotalCount = 0; + MutableArrayRef<InstrProfValueData> CandidateProfileData; + auto *CalledValue = CB->getCalledOperand(); auto *CalledFunction = CB->getCalledFunction(); if (CalledValue && !CalledFunction) { @@ -481,9 +491,7 @@ static void computeFunctionSummary( } } - uint32_t NumCandidates; - uint64_t TotalCount; - auto CandidateProfileData = + CandidateProfileData = ICallAnalysis.getPromotionCandidatesForInstruction(&I, TotalCount, NumCandidates); for (const auto &Candidate : CandidateProfileData) @@ -495,16 +503,6 @@ static void computeFunctionSummary( if (!IsThinLTO) continue; - // TODO: Skip indirect calls for now. Need to handle these better, likely - // by creating multiple Callsites, one per target, then speculatively - // devirtualize while applying clone info in the ThinLTO backends. This - // will also be important because we will have a different set of clone - // versions per target. This handling needs to match that in the ThinLTO - // backend so we handle things consistently for matching of callsite - // summaries to instructions. - if (!CalledFunction) - continue; - // Ensure we keep this analysis in sync with the handling in the ThinLTO // backend (see MemProfContextDisambiguation::applyImport). Save this call // so that we can skip it in checking the reverse case later. @@ -555,13 +553,24 @@ static void computeFunctionSummary( SmallVector<unsigned> StackIdIndices; for (auto StackId : InstCallsite) StackIdIndices.push_back(Index.addOrGetStackIdIndex(StackId)); - // Use the original CalledValue, in case it was an alias. We want - // to record the call edge to the alias in that case. Eventually - // an alias summary will be created to associate the alias and - // aliasee. - auto CalleeValueInfo = - Index.getOrInsertValueInfo(cast<GlobalValue>(CalledValue)); - Callsites.push_back({CalleeValueInfo, StackIdIndices}); + if (CalledFunction) { + // Use the original CalledValue, in case it was an alias. We want + // to record the call edge to the alias in that case. Eventually + // an alias summary will be created to associate the alias and + // aliasee. + auto CalleeValueInfo = + Index.getOrInsertValueInfo(cast<GlobalValue>(CalledValue)); + Callsites.push_back({CalleeValueInfo, StackIdIndices}); + } else if (EnableMemProfIndirectCallSupport) { + // For indirect callsites, create multiple Callsites, one per target. + // This enables having a different set of clone versions per target, + // and we will apply the cloning decisions while speculatively + // devirtualizing in the ThinLTO backends. + for (const auto &Candidate : CandidateProfileData) { + auto CalleeValueInfo = Index.getOrInsertValueInfo(Candidate.Value); + Callsites.push_back({CalleeValueInfo, StackIdIndices}); + } + } } } } @@ -1214,9 +1223,13 @@ bool llvm::mayHaveMemprofSummary(const CallBase *CB) { if (CI && CalledFunction->isIntrinsic()) return false; } else { - // TODO: For now skip indirect calls. See comments in - // computeFunctionSummary for what is needed to handle this. - return false; + // Skip inline assembly calls. + if (CI && CI->isInlineAsm()) + return false; + // Skip direct calls via Constant. + if (!CalledValue || isa<Constant>(CalledValue)) + return false; + return true; } return true; } |