aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
authorEllis Hoag <ellis.sparky.hoag@gmail.com>2023-08-04 11:26:14 -0700
committerEllis Hoag <ellis.sparky.hoag@gmail.com>2023-08-07 10:15:08 -0700
commitfe051934cbb0aaf25d960d7d45305135635d650b (patch)
tree8f168ed70ecb3bb7967ee78cfc3279c4aabc6b7a /llvm/lib
parent165f7f06a125d072574b74ca5540b4e6141b5b7b (diff)
downloadllvm-fe051934cbb0aaf25d960d7d45305135635d650b.zip
llvm-fe051934cbb0aaf25d960d7d45305135635d650b.tar.gz
llvm-fe051934cbb0aaf25d960d7d45305135635d650b.tar.bz2
[InstrProf] Encode linkage names in IRPGO counter names
Prior to this diff, names in the `__llvm_prf_names` section had the format `[<filepath>:]<function-name>`, e.g., `main.cpp:foo`, `bar`. `<filepath>` is used to discriminate between possibly identical function names when linkage is local and `<function-name>` simply comes from `F.getName()`. This has two problems: * `:` is commonly found in Objective-C functions so that names like `main.mm:-[C foo::]` and `-[C bar::]` are difficult to parse * `<function-name>` might be different from the linkage name, so it cannot be used to pass a function order to the linker via `-symbol-ordering-file` or `-order_file` (see https://discourse.llvm.org/t/rfc-temporal-profiling-extension-for-irpgo/68068) Instead, this diff changes the format to `[<filepath>;]<linkage-name>`, e.g., `main.cpp;_foo`, `_bar`. The hope is that `;` won't realistically be found in either `<filepath>` or `<linkage-name>`. To prevent invalidating all prior IRPGO profiles, we also lookup the prior name format when a record is not found (see `InstrProfSymtab::create()`, `readMemprof()`, and `getInstrProfRecord()`). It seems that Swift and Clang FE-PGO rely on the original `getPGOFuncName()`, so we cannot simply replace it. Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D156569
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/ProfileData/InstrProf.cpp150
-rw-r--r--llvm/lib/ProfileData/InstrProfReader.cpp21
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemProfiler.cpp29
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp10
4 files changed, 156 insertions, 54 deletions
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 0f9c33d..835dd69 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -27,6 +27,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Mangler.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
@@ -264,6 +265,67 @@ static StringRef stripDirPrefix(StringRef PathNameStr, uint32_t NumPrefix) {
return PathNameStr.substr(LastPos);
}
+static StringRef getStrippedSourceFileName(const Function &F) {
+ StringRef FileName(F.getParent()->getSourceFileName());
+ uint32_t StripLevel = StaticFuncFullModulePrefix ? 0 : (uint32_t)-1;
+ if (StripLevel < StaticFuncStripDirNamePrefix)
+ StripLevel = StaticFuncStripDirNamePrefix;
+ if (StripLevel)
+ FileName = stripDirPrefix(FileName, StripLevel);
+ return FileName;
+}
+
+// The PGO name has the format [<filepath>;]<linkage-name> where <filepath>; is
+// provided if linkage is local and <linkage-name> is the mangled function
+// name. The filepath is used to discriminate possibly identical function names.
+// ; is used because it is unlikely to be found in either <filepath> or
+// <linkage-name>.
+//
+// Older compilers used getPGOFuncName() which has the format
+// [<filepath>:]<function-name>. <filepath> is used to discriminate between
+// possibly identical function names when linkage is local and <function-name>
+// simply comes from F.getName(). This caused trouble for Objective-C functions
+// which commonly have :'s in their names. Also, since <function-name> is not
+// mangled, they cannot be passed to Mach-O linkers via -order_file. We still
+// need to compute this name to lookup functions from profiles built by older
+// compilers.
+static std::string getIRPGOFuncName(const Function &F,
+ GlobalValue::LinkageTypes Linkage,
+ StringRef FileName) {
+ SmallString<64> Name;
+ if (llvm::GlobalValue::isLocalLinkage(Linkage)) {
+ Name.append(FileName.empty() ? "<unknown>" : FileName);
+ Name.append(";");
+ }
+ Mangler().getNameWithPrefix(Name, &F, /*CannotUsePrivateLabel=*/true);
+ return Name.str().str();
+}
+
+static std::optional<std::string> lookupPGOFuncName(const Function &F) {
+ if (MDNode *MD = getPGOFuncNameMetadata(F)) {
+ StringRef S = cast<MDString>(MD->getOperand(0))->getString();
+ return S.str();
+ }
+ return {};
+}
+
+// See getPGOFuncName()
+std::string getIRPGOFuncName(const Function &F, bool InLTO) {
+ if (!InLTO) {
+ auto FileName = getStrippedSourceFileName(F);
+ return getIRPGOFuncName(F, F.getLinkage(), FileName);
+ }
+
+ // In LTO mode (when InLTO is true), first check if there is a meta data.
+ if (auto IRPGOFuncName = lookupPGOFuncName(F))
+ return *IRPGOFuncName;
+
+ // If there is no meta data, the function must be a global before the value
+ // profile annotation pass. Its current linkage may be internal if it is
+ // internalized in LTO mode.
+ return getIRPGOFuncName(F, GlobalValue::ExternalLinkage, "");
+}
+
// Return the PGOFuncName. This function has some special handling when called
// in LTO optimization. The following only applies when calling in LTO passes
// (when \c InLTO is true): LTO's internalization privatizes many global linkage
@@ -279,20 +341,13 @@ static StringRef stripDirPrefix(StringRef PathNameStr, uint32_t NumPrefix) {
// data, its original linkage must be non-internal.
std::string getPGOFuncName(const Function &F, bool InLTO, uint64_t Version) {
if (!InLTO) {
- StringRef FileName(F.getParent()->getSourceFileName());
- uint32_t StripLevel = StaticFuncFullModulePrefix ? 0 : (uint32_t)-1;
- if (StripLevel < StaticFuncStripDirNamePrefix)
- StripLevel = StaticFuncStripDirNamePrefix;
- if (StripLevel)
- FileName = stripDirPrefix(FileName, StripLevel);
+ auto FileName = getStrippedSourceFileName(F);
return getPGOFuncName(F.getName(), F.getLinkage(), FileName, Version);
}
// In LTO mode (when InLTO is true), first check if there is a meta data.
- if (MDNode *MD = getPGOFuncNameMetadata(F)) {
- StringRef S = cast<MDString>(MD->getOperand(0))->getString();
- return S.str();
- }
+ if (auto PGOFuncName = lookupPGOFuncName(F))
+ return *PGOFuncName;
// If there is no meta data, the function must be a global before the value
// profile annotation pass. Its current linkage may be internal if it is
@@ -300,6 +355,15 @@ std::string getPGOFuncName(const Function &F, bool InLTO, uint64_t Version) {
return getPGOFuncName(F.getName(), GlobalValue::ExternalLinkage, "");
}
+// See getIRPGOFuncName() for a discription of the format.
+std::pair<StringRef, StringRef>
+getParsedIRPGOFuncName(StringRef IRPGOFuncName) {
+ auto [FileName, FuncName] = IRPGOFuncName.split(';');
+ if (FuncName.empty())
+ return std::make_pair(StringRef(), IRPGOFuncName);
+ return std::make_pair(FileName, FuncName);
+}
+
StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, StringRef FileName) {
if (FileName.empty())
return PGOFuncName;
@@ -320,7 +384,7 @@ std::string getPGOFuncNameVarName(StringRef FuncName,
return VarName;
// Now fix up illegal chars in local VarName that may upset the assembler.
- const char *InvalidChars = "-:<>/\"'";
+ const char InvalidChars[] = "-:;<>/\"'";
size_t found = VarName.find_first_of(InvalidChars);
while (found != std::string::npos) {
VarName[found] = '_';
@@ -366,41 +430,49 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) {
// Ignore in this case.
if (!F.hasName())
continue;
- const std::string &PGOFuncName = getPGOFuncName(F, InLTO);
- if (Error E = addFuncName(PGOFuncName))
+ if (Error E = addFuncWithName(F, getIRPGOFuncName(F, InLTO)))
+ return E;
+ // Also use getPGOFuncName() so that we can find records from older profiles
+ if (Error E = addFuncWithName(F, getPGOFuncName(F, InLTO)))
return E;
- MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F);
- // In ThinLTO, local function may have been promoted to global and have
- // suffix ".llvm." added to the function name. We need to add the
- // stripped function name to the symbol table so that we can find a match
- // from profile.
- //
- // We may have other suffixes similar as ".llvm." which are needed to
- // be stripped before the matching, but ".__uniq." suffix which is used
- // to differentiate internal linkage functions in different modules
- // should be kept. Now this is the only suffix with the pattern ".xxx"
- // which is kept before matching.
- const std::string UniqSuffix = ".__uniq.";
- auto pos = PGOFuncName.find(UniqSuffix);
- // Search '.' after ".__uniq." if ".__uniq." exists, otherwise
- // search '.' from the beginning.
- if (pos != std::string::npos)
- pos += UniqSuffix.length();
- else
- pos = 0;
- pos = PGOFuncName.find('.', pos);
- if (pos != std::string::npos && pos != 0) {
- const std::string &OtherFuncName = PGOFuncName.substr(0, pos);
- if (Error E = addFuncName(OtherFuncName))
- return E;
- MD5FuncMap.emplace_back(Function::getGUID(OtherFuncName), &F);
- }
}
Sorted = false;
finalizeSymtab();
return Error::success();
}
+Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) {
+ if (Error E = addFuncName(PGOFuncName))
+ return E;
+ MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F);
+ // In ThinLTO, local function may have been promoted to global and have
+ // suffix ".llvm." added to the function name. We need to add the
+ // stripped function name to the symbol table so that we can find a match
+ // from profile.
+ //
+ // We may have other suffixes similar as ".llvm." which are needed to
+ // be stripped before the matching, but ".__uniq." suffix which is used
+ // to differentiate internal linkage functions in different modules
+ // should be kept. Now this is the only suffix with the pattern ".xxx"
+ // which is kept before matching.
+ const std::string UniqSuffix = ".__uniq.";
+ auto pos = PGOFuncName.find(UniqSuffix);
+ // Search '.' after ".__uniq." if ".__uniq." exists, otherwise
+ // search '.' from the beginning.
+ if (pos != std::string::npos)
+ pos += UniqSuffix.length();
+ else
+ pos = 0;
+ pos = PGOFuncName.find('.', pos);
+ if (pos != std::string::npos && pos != 0) {
+ StringRef OtherFuncName = PGOFuncName.substr(0, pos);
+ if (Error E = addFuncName(OtherFuncName))
+ return E;
+ MD5FuncMap.emplace_back(Function::getGUID(OtherFuncName), &F);
+ }
+ return Error::success();
+}
+
uint64_t InstrProfSymtab::getFunctionHashFromAddress(uint64_t Address) {
finalizeSymtab();
auto It = partition_point(AddrToMD5Map, [=](std::pair<uint64_t, uint64_t> A) {
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index 4160f7e..7f856c4 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -1214,12 +1214,25 @@ InstrProfSymtab &IndexedInstrProfReader::getSymtab() {
}
Expected<InstrProfRecord> IndexedInstrProfReader::getInstrProfRecord(
- StringRef FuncName, uint64_t FuncHash, uint64_t *MismatchedFuncSum) {
+ StringRef FuncName, uint64_t FuncHash, StringRef DeprecatedFuncName,
+ uint64_t *MismatchedFuncSum) {
ArrayRef<NamedInstrProfRecord> Data;
uint64_t FuncSum = 0;
- Error Err = Remapper->getRecords(FuncName, Data);
- if (Err)
- return std::move(Err);
+ auto Err = Remapper->getRecords(FuncName, Data);
+ if (Err) {
+ // If we don't find FuncName, try DeprecatedFuncName to handle profiles
+ // built by older compilers.
+ auto Err2 =
+ handleErrors(std::move(Err), [&](const InstrProfError &IE) -> Error {
+ if (IE.get() != instrprof_error::unknown_function)
+ return make_error<InstrProfError>(IE);
+ if (auto Err = Remapper->getRecords(DeprecatedFuncName, Data))
+ return Err;
+ return Error::success();
+ });
+ if (Err2)
+ return std::move(Err2);
+ }
// Found it. Look for counters with the right hash.
// A flag to indicate if the records are from the same type
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 789ed00..453dd00 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -679,12 +679,26 @@ static void readMemprof(Module &M, Function &F,
const TargetLibraryInfo &TLI) {
auto &Ctx = M.getContext();
- auto FuncName = getPGOFuncName(F);
+ auto FuncName = getIRPGOFuncName(F);
auto FuncGUID = Function::getGUID(FuncName);
- Expected<memprof::MemProfRecord> MemProfResult =
- MemProfReader->getMemProfRecord(FuncGUID);
- if (Error E = MemProfResult.takeError()) {
- handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
+ std::optional<memprof::MemProfRecord> MemProfRec;
+ auto Err = MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec);
+ if (Err) {
+ // If we don't find getIRPGOFuncName(), try getPGOFuncName() to handle
+ // profiles built by older compilers
+ Err = handleErrors(std::move(Err), [&](const InstrProfError &IE) -> Error {
+ if (IE.get() != instrprof_error::unknown_function)
+ return make_error<InstrProfError>(IE);
+ auto FuncName = getPGOFuncName(F);
+ auto FuncGUID = Function::getGUID(FuncName);
+ if (auto Err =
+ MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec))
+ return Err;
+ return Error::success();
+ });
+ }
+ if (Err) {
+ handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
auto Err = IPE.get();
bool SkipWarning = false;
LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName
@@ -722,15 +736,14 @@ static void readMemprof(Module &M, Function &F,
// the frame array (see comments below where the map entries are added).
std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, unsigned>>>
LocHashToCallSites;
- const auto MemProfRec = std::move(MemProfResult.get());
- for (auto &AI : MemProfRec.AllocSites) {
+ for (auto &AI : MemProfRec->AllocSites) {
// Associate the allocation info with the leaf frame. The later matching
// code will match any inlined call sequences in the IR with a longer prefix
// of call stack frames.
uint64_t StackId = computeStackId(AI.CallStack[0]);
LocHashToAllocInfo[StackId].insert(&AI);
}
- for (auto &CS : MemProfRec.CallSites) {
+ for (auto &CS : MemProfRec->CallSites) {
// Need to record all frames from leaf up to and including this function,
// as any of these may or may not have been inlined at this point.
unsigned Idx = 0;
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 3c8f25d..1fe9c57 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -525,6 +525,7 @@ public:
std::vector<std::vector<VPCandidateInfo>> ValueSites;
SelectInstVisitor SIVisitor;
std::string FuncName;
+ std::string DeprecatedFuncName;
GlobalVariable *FuncNameVar;
// CFG hash value for this function.
@@ -590,7 +591,8 @@ public:
NumOfCSPGOBB += MST.BBInfos.size();
}
- FuncName = getPGOFuncName(F);
+ FuncName = getIRPGOFuncName(F);
+ DeprecatedFuncName = getPGOFuncName(F);
computeCFGHash();
if (!ComdatMembers.empty())
renameComdatFunction();
@@ -1336,7 +1338,8 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
auto &Ctx = M->getContext();
uint64_t MismatchedFuncSum = 0;
Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord(
- FuncInfo.FuncName, FuncInfo.FunctionHash, &MismatchedFuncSum);
+ FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
+ &MismatchedFuncSum);
if (Error E = Result.takeError()) {
handleInstrProfError(std::move(E), MismatchedFuncSum);
return false;
@@ -1381,7 +1384,8 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
void PGOUseFunc::populateCoverage(IndexedInstrProfReader *PGOReader) {
uint64_t MismatchedFuncSum = 0;
Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord(
- FuncInfo.FuncName, FuncInfo.FunctionHash, &MismatchedFuncSum);
+ FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
+ &MismatchedFuncSum);
if (auto Err = Result.takeError()) {
handleInstrProfError(std::move(Err), MismatchedFuncSum);
return;