aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/ProfileData/SampleProf.cpp
diff options
context:
space:
mode:
authorLei Wang <wlei@fb.com>2024-07-17 10:33:00 -0700
committerGitHub <noreply@github.com>2024-07-17 10:33:00 -0700
commit18cdfa72e046a40deeee4372ee98602fd1a65a94 (patch)
treed2d4fd664545c7bf174898b9f95748db1fcb6b57 /llvm/lib/ProfileData/SampleProf.cpp
parent81955da03bd4731b668fee401b3d6aca8b7d4da6 (diff)
downloadllvm-18cdfa72e046a40deeee4372ee98602fd1a65a94.zip
llvm-18cdfa72e046a40deeee4372ee98602fd1a65a94.tar.gz
llvm-18cdfa72e046a40deeee4372ee98602fd1a65a94.tar.bz2
[SampleFDO] Stale profile call-graph matching (#95135)
Profile staleness could be due to function renaming. Given that sample profile loader relies on exact string matching, a trivial change in the function signature( such as `int foo()` --> `long foo()` ) can make the mangled name different, the function profile(including all nested children profile) becomes unavailable. This patch introduces stale profile call-graph level matching, targeting at identifying the trivial function renaming and reusing the old function profile. Some noteworthy details: 1. Extend the LCS based CFG level matching to identify new function. - Extend to match function and profile have different name instead of the exact function name matching. This leverages LCS, i.e during the finding of callsite anchor matching, when two function name are different, try matching the functions instead of return. - In LCS, the equal function check is replaced by `functionMatchesProfile`. - Only try matching functions that are new functions(neither appears on each side). This reduces the matching scope as we don't need to match the originally matched function. 2. Determine the matching by call-site anchor similarity check. - A new function `functionMatchesProfile(IRFunc, ProfFunc)` is used to check the renaming for the possible <IRFunc, ProfFunc> pair, use the LCS(diff) matching to compute the equal set and we define: `Similarity = |equalSet * 2| / (|A| + |B|)`. The profile name is marked as renamed if the similarity is above a threshold(`-func-profile-similarity-threshold`) 3. Process the matching in top-down function order - when a caller's is done matching, the new function names are saved for later use, using top-down order will maximize the reused results. - `ProfileNameToFuncMap` is used to save or cache the matching result. 4. Update the original profile at the end using `ProfileNameToFuncMap`. 5. Added a new switch --salvage-unused-profile to control this, default is false. Verified on one Meta's internal big service, confirmed 90%+ of the found renaming pair is good. (There could be incorrect renaming pair if the num of the anchor is small, but checked that those functions are simple cold function)
Diffstat (limited to 'llvm/lib/ProfileData/SampleProf.cpp')
-rw-r--r--llvm/lib/ProfileData/SampleProf.cpp36
1 files changed, 26 insertions, 10 deletions
diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp
index 294f646..addb473 100644
--- a/llvm/lib/ProfileData/SampleProf.cpp
+++ b/llvm/lib/ProfileData/SampleProf.cpp
@@ -236,7 +236,9 @@ LineLocation FunctionSamples::getCallSiteIdentifier(const DILocation *DIL,
}
const FunctionSamples *FunctionSamples::findFunctionSamples(
- const DILocation *DIL, SampleProfileReaderItaniumRemapper *Remapper) const {
+ const DILocation *DIL, SampleProfileReaderItaniumRemapper *Remapper,
+ const HashKeyMap<std::unordered_map, FunctionId, FunctionId>
+ *FuncNameToProfNameMap) const {
assert(DIL);
SmallVector<std::pair<LineLocation, StringRef>, 10> S;
@@ -256,7 +258,8 @@ const FunctionSamples *FunctionSamples::findFunctionSamples(
return this;
const FunctionSamples *FS = this;
for (int i = S.size() - 1; i >= 0 && FS != nullptr; i--) {
- FS = FS->findFunctionSamplesAt(S[i].first, S[i].second, Remapper);
+ FS = FS->findFunctionSamplesAt(S[i].first, S[i].second, Remapper,
+ FuncNameToProfNameMap);
}
return FS;
}
@@ -277,19 +280,32 @@ void FunctionSamples::findAllNames(DenseSet<FunctionId> &NameSet) const {
const FunctionSamples *FunctionSamples::findFunctionSamplesAt(
const LineLocation &Loc, StringRef CalleeName,
- SampleProfileReaderItaniumRemapper *Remapper) const {
+ SampleProfileReaderItaniumRemapper *Remapper,
+ const HashKeyMap<std::unordered_map, FunctionId, FunctionId>
+ *FuncNameToProfNameMap) const {
CalleeName = getCanonicalFnName(CalleeName);
- auto iter = CallsiteSamples.find(mapIRLocToProfileLoc(Loc));
- if (iter == CallsiteSamples.end())
+ auto I = CallsiteSamples.find(mapIRLocToProfileLoc(Loc));
+ if (I == CallsiteSamples.end())
return nullptr;
- auto FS = iter->second.find(getRepInFormat(CalleeName));
- if (FS != iter->second.end())
+ auto FS = I->second.find(getRepInFormat(CalleeName));
+ if (FS != I->second.end())
return &FS->second;
+
+ if (FuncNameToProfNameMap && !FuncNameToProfNameMap->empty()) {
+ auto R = FuncNameToProfNameMap->find(FunctionId(CalleeName));
+ if (R != FuncNameToProfNameMap->end()) {
+ CalleeName = R->second.stringRef();
+ auto FS = I->second.find(getRepInFormat(CalleeName));
+ if (FS != I->second.end())
+ return &FS->second;
+ }
+ }
+
if (Remapper) {
if (auto NameInProfile = Remapper->lookUpNameInProfile(CalleeName)) {
- auto FS = iter->second.find(getRepInFormat(*NameInProfile));
- if (FS != iter->second.end())
+ auto FS = I->second.find(getRepInFormat(*NameInProfile));
+ if (FS != I->second.end())
return &FS->second;
}
}
@@ -300,7 +316,7 @@ const FunctionSamples *FunctionSamples::findFunctionSamplesAt(
return nullptr;
uint64_t MaxTotalSamples = 0;
const FunctionSamples *R = nullptr;
- for (const auto &NameFS : iter->second)
+ for (const auto &NameFS : I->second)
if (NameFS.second.getTotalSamples() >= MaxTotalSamples) {
MaxTotalSamples = NameFS.second.getTotalSamples();
R = &NameFS.second;