diff options
author | Ellis Hoag <ellis.sparky.hoag@gmail.com> | 2023-08-04 11:26:14 -0700 |
---|---|---|
committer | Ellis Hoag <ellis.sparky.hoag@gmail.com> | 2023-08-07 10:15:08 -0700 |
commit | fe051934cbb0aaf25d960d7d45305135635d650b (patch) | |
tree | 8f168ed70ecb3bb7967ee78cfc3279c4aabc6b7a /llvm/lib | |
parent | 165f7f06a125d072574b74ca5540b4e6141b5b7b (diff) | |
download | llvm-fe051934cbb0aaf25d960d7d45305135635d650b.zip llvm-fe051934cbb0aaf25d960d7d45305135635d650b.tar.gz llvm-fe051934cbb0aaf25d960d7d45305135635d650b.tar.bz2 |
[InstrProf] Encode linkage names in IRPGO counter names
Prior to this diff, names in the `__llvm_prf_names` section had the format `[<filepath>:]<function-name>`, e.g., `main.cpp:foo`, `bar`. `<filepath>` is used to discriminate between possibly identical function names when linkage is local and `<function-name>` simply comes from `F.getName()`. This has two problems:
* `:` is commonly found in Objective-C functions so that names like `main.mm:-[C foo::]` and `-[C bar::]` are difficult to parse
* `<function-name>` might be different from the linkage name, so it cannot be used to pass a function order to the linker via `-symbol-ordering-file` or `-order_file` (see https://discourse.llvm.org/t/rfc-temporal-profiling-extension-for-irpgo/68068)
Instead, this diff changes the format to `[<filepath>;]<linkage-name>`, e.g., `main.cpp;_foo`, `_bar`. The hope is that `;` won't realistically be found in either `<filepath>` or `<linkage-name>`.
To prevent invalidating all prior IRPGO profiles, we also lookup the prior name format when a record is not found (see `InstrProfSymtab::create()`, `readMemprof()`, and `getInstrProfRecord()`). It seems that Swift and Clang FE-PGO rely on the original `getPGOFuncName()`, so we cannot simply replace it.
Reviewed By: MaskRay
Differential Revision: https://reviews.llvm.org/D156569
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/ProfileData/InstrProf.cpp | 150 | ||||
-rw-r--r-- | llvm/lib/ProfileData/InstrProfReader.cpp | 21 | ||||
-rw-r--r-- | llvm/lib/Transforms/Instrumentation/MemProfiler.cpp | 29 | ||||
-rw-r--r-- | llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp | 10 |
4 files changed, 156 insertions, 54 deletions
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index 0f9c33d..835dd69 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -27,6 +27,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Mangler.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" @@ -264,6 +265,67 @@ static StringRef stripDirPrefix(StringRef PathNameStr, uint32_t NumPrefix) { return PathNameStr.substr(LastPos); } +static StringRef getStrippedSourceFileName(const Function &F) { + StringRef FileName(F.getParent()->getSourceFileName()); + uint32_t StripLevel = StaticFuncFullModulePrefix ? 0 : (uint32_t)-1; + if (StripLevel < StaticFuncStripDirNamePrefix) + StripLevel = StaticFuncStripDirNamePrefix; + if (StripLevel) + FileName = stripDirPrefix(FileName, StripLevel); + return FileName; +} + +// The PGO name has the format [<filepath>;]<linkage-name> where <filepath>; is +// provided if linkage is local and <linkage-name> is the mangled function +// name. The filepath is used to discriminate possibly identical function names. +// ; is used because it is unlikely to be found in either <filepath> or +// <linkage-name>. +// +// Older compilers used getPGOFuncName() which has the format +// [<filepath>:]<function-name>. <filepath> is used to discriminate between +// possibly identical function names when linkage is local and <function-name> +// simply comes from F.getName(). This caused trouble for Objective-C functions +// which commonly have :'s in their names. Also, since <function-name> is not +// mangled, they cannot be passed to Mach-O linkers via -order_file. We still +// need to compute this name to lookup functions from profiles built by older +// compilers. +static std::string getIRPGOFuncName(const Function &F, + GlobalValue::LinkageTypes Linkage, + StringRef FileName) { + SmallString<64> Name; + if (llvm::GlobalValue::isLocalLinkage(Linkage)) { + Name.append(FileName.empty() ? "<unknown>" : FileName); + Name.append(";"); + } + Mangler().getNameWithPrefix(Name, &F, /*CannotUsePrivateLabel=*/true); + return Name.str().str(); +} + +static std::optional<std::string> lookupPGOFuncName(const Function &F) { + if (MDNode *MD = getPGOFuncNameMetadata(F)) { + StringRef S = cast<MDString>(MD->getOperand(0))->getString(); + return S.str(); + } + return {}; +} + +// See getPGOFuncName() +std::string getIRPGOFuncName(const Function &F, bool InLTO) { + if (!InLTO) { + auto FileName = getStrippedSourceFileName(F); + return getIRPGOFuncName(F, F.getLinkage(), FileName); + } + + // In LTO mode (when InLTO is true), first check if there is a meta data. + if (auto IRPGOFuncName = lookupPGOFuncName(F)) + return *IRPGOFuncName; + + // If there is no meta data, the function must be a global before the value + // profile annotation pass. Its current linkage may be internal if it is + // internalized in LTO mode. + return getIRPGOFuncName(F, GlobalValue::ExternalLinkage, ""); +} + // Return the PGOFuncName. This function has some special handling when called // in LTO optimization. The following only applies when calling in LTO passes // (when \c InLTO is true): LTO's internalization privatizes many global linkage @@ -279,20 +341,13 @@ static StringRef stripDirPrefix(StringRef PathNameStr, uint32_t NumPrefix) { // data, its original linkage must be non-internal. std::string getPGOFuncName(const Function &F, bool InLTO, uint64_t Version) { if (!InLTO) { - StringRef FileName(F.getParent()->getSourceFileName()); - uint32_t StripLevel = StaticFuncFullModulePrefix ? 0 : (uint32_t)-1; - if (StripLevel < StaticFuncStripDirNamePrefix) - StripLevel = StaticFuncStripDirNamePrefix; - if (StripLevel) - FileName = stripDirPrefix(FileName, StripLevel); + auto FileName = getStrippedSourceFileName(F); return getPGOFuncName(F.getName(), F.getLinkage(), FileName, Version); } // In LTO mode (when InLTO is true), first check if there is a meta data. - if (MDNode *MD = getPGOFuncNameMetadata(F)) { - StringRef S = cast<MDString>(MD->getOperand(0))->getString(); - return S.str(); - } + if (auto PGOFuncName = lookupPGOFuncName(F)) + return *PGOFuncName; // If there is no meta data, the function must be a global before the value // profile annotation pass. Its current linkage may be internal if it is @@ -300,6 +355,15 @@ std::string getPGOFuncName(const Function &F, bool InLTO, uint64_t Version) { return getPGOFuncName(F.getName(), GlobalValue::ExternalLinkage, ""); } +// See getIRPGOFuncName() for a discription of the format. +std::pair<StringRef, StringRef> +getParsedIRPGOFuncName(StringRef IRPGOFuncName) { + auto [FileName, FuncName] = IRPGOFuncName.split(';'); + if (FuncName.empty()) + return std::make_pair(StringRef(), IRPGOFuncName); + return std::make_pair(FileName, FuncName); +} + StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, StringRef FileName) { if (FileName.empty()) return PGOFuncName; @@ -320,7 +384,7 @@ std::string getPGOFuncNameVarName(StringRef FuncName, return VarName; // Now fix up illegal chars in local VarName that may upset the assembler. - const char *InvalidChars = "-:<>/\"'"; + const char InvalidChars[] = "-:;<>/\"'"; size_t found = VarName.find_first_of(InvalidChars); while (found != std::string::npos) { VarName[found] = '_'; @@ -366,41 +430,49 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) { // Ignore in this case. if (!F.hasName()) continue; - const std::string &PGOFuncName = getPGOFuncName(F, InLTO); - if (Error E = addFuncName(PGOFuncName)) + if (Error E = addFuncWithName(F, getIRPGOFuncName(F, InLTO))) + return E; + // Also use getPGOFuncName() so that we can find records from older profiles + if (Error E = addFuncWithName(F, getPGOFuncName(F, InLTO))) return E; - MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F); - // In ThinLTO, local function may have been promoted to global and have - // suffix ".llvm." added to the function name. We need to add the - // stripped function name to the symbol table so that we can find a match - // from profile. - // - // We may have other suffixes similar as ".llvm." which are needed to - // be stripped before the matching, but ".__uniq." suffix which is used - // to differentiate internal linkage functions in different modules - // should be kept. Now this is the only suffix with the pattern ".xxx" - // which is kept before matching. - const std::string UniqSuffix = ".__uniq."; - auto pos = PGOFuncName.find(UniqSuffix); - // Search '.' after ".__uniq." if ".__uniq." exists, otherwise - // search '.' from the beginning. - if (pos != std::string::npos) - pos += UniqSuffix.length(); - else - pos = 0; - pos = PGOFuncName.find('.', pos); - if (pos != std::string::npos && pos != 0) { - const std::string &OtherFuncName = PGOFuncName.substr(0, pos); - if (Error E = addFuncName(OtherFuncName)) - return E; - MD5FuncMap.emplace_back(Function::getGUID(OtherFuncName), &F); - } } Sorted = false; finalizeSymtab(); return Error::success(); } +Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) { + if (Error E = addFuncName(PGOFuncName)) + return E; + MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F); + // In ThinLTO, local function may have been promoted to global and have + // suffix ".llvm." added to the function name. We need to add the + // stripped function name to the symbol table so that we can find a match + // from profile. + // + // We may have other suffixes similar as ".llvm." which are needed to + // be stripped before the matching, but ".__uniq." suffix which is used + // to differentiate internal linkage functions in different modules + // should be kept. Now this is the only suffix with the pattern ".xxx" + // which is kept before matching. + const std::string UniqSuffix = ".__uniq."; + auto pos = PGOFuncName.find(UniqSuffix); + // Search '.' after ".__uniq." if ".__uniq." exists, otherwise + // search '.' from the beginning. + if (pos != std::string::npos) + pos += UniqSuffix.length(); + else + pos = 0; + pos = PGOFuncName.find('.', pos); + if (pos != std::string::npos && pos != 0) { + StringRef OtherFuncName = PGOFuncName.substr(0, pos); + if (Error E = addFuncName(OtherFuncName)) + return E; + MD5FuncMap.emplace_back(Function::getGUID(OtherFuncName), &F); + } + return Error::success(); +} + uint64_t InstrProfSymtab::getFunctionHashFromAddress(uint64_t Address) { finalizeSymtab(); auto It = partition_point(AddrToMD5Map, [=](std::pair<uint64_t, uint64_t> A) { diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index 4160f7e..7f856c4 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -1214,12 +1214,25 @@ InstrProfSymtab &IndexedInstrProfReader::getSymtab() { } Expected<InstrProfRecord> IndexedInstrProfReader::getInstrProfRecord( - StringRef FuncName, uint64_t FuncHash, uint64_t *MismatchedFuncSum) { + StringRef FuncName, uint64_t FuncHash, StringRef DeprecatedFuncName, + uint64_t *MismatchedFuncSum) { ArrayRef<NamedInstrProfRecord> Data; uint64_t FuncSum = 0; - Error Err = Remapper->getRecords(FuncName, Data); - if (Err) - return std::move(Err); + auto Err = Remapper->getRecords(FuncName, Data); + if (Err) { + // If we don't find FuncName, try DeprecatedFuncName to handle profiles + // built by older compilers. + auto Err2 = + handleErrors(std::move(Err), [&](const InstrProfError &IE) -> Error { + if (IE.get() != instrprof_error::unknown_function) + return make_error<InstrProfError>(IE); + if (auto Err = Remapper->getRecords(DeprecatedFuncName, Data)) + return Err; + return Error::success(); + }); + if (Err2) + return std::move(Err2); + } // Found it. Look for counters with the right hash. // A flag to indicate if the records are from the same type diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp index 789ed00..453dd00 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp @@ -679,12 +679,26 @@ static void readMemprof(Module &M, Function &F, const TargetLibraryInfo &TLI) { auto &Ctx = M.getContext(); - auto FuncName = getPGOFuncName(F); + auto FuncName = getIRPGOFuncName(F); auto FuncGUID = Function::getGUID(FuncName); - Expected<memprof::MemProfRecord> MemProfResult = - MemProfReader->getMemProfRecord(FuncGUID); - if (Error E = MemProfResult.takeError()) { - handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { + std::optional<memprof::MemProfRecord> MemProfRec; + auto Err = MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec); + if (Err) { + // If we don't find getIRPGOFuncName(), try getPGOFuncName() to handle + // profiles built by older compilers + Err = handleErrors(std::move(Err), [&](const InstrProfError &IE) -> Error { + if (IE.get() != instrprof_error::unknown_function) + return make_error<InstrProfError>(IE); + auto FuncName = getPGOFuncName(F); + auto FuncGUID = Function::getGUID(FuncName); + if (auto Err = + MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec)) + return Err; + return Error::success(); + }); + } + if (Err) { + handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) { auto Err = IPE.get(); bool SkipWarning = false; LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName @@ -722,15 +736,14 @@ static void readMemprof(Module &M, Function &F, // the frame array (see comments below where the map entries are added). std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, unsigned>>> LocHashToCallSites; - const auto MemProfRec = std::move(MemProfResult.get()); - for (auto &AI : MemProfRec.AllocSites) { + for (auto &AI : MemProfRec->AllocSites) { // Associate the allocation info with the leaf frame. The later matching // code will match any inlined call sequences in the IR with a longer prefix // of call stack frames. uint64_t StackId = computeStackId(AI.CallStack[0]); LocHashToAllocInfo[StackId].insert(&AI); } - for (auto &CS : MemProfRec.CallSites) { + for (auto &CS : MemProfRec->CallSites) { // Need to record all frames from leaf up to and including this function, // as any of these may or may not have been inlined at this point. unsigned Idx = 0; diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 3c8f25d..1fe9c57 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -525,6 +525,7 @@ public: std::vector<std::vector<VPCandidateInfo>> ValueSites; SelectInstVisitor SIVisitor; std::string FuncName; + std::string DeprecatedFuncName; GlobalVariable *FuncNameVar; // CFG hash value for this function. @@ -590,7 +591,8 @@ public: NumOfCSPGOBB += MST.BBInfos.size(); } - FuncName = getPGOFuncName(F); + FuncName = getIRPGOFuncName(F); + DeprecatedFuncName = getPGOFuncName(F); computeCFGHash(); if (!ComdatMembers.empty()) renameComdatFunction(); @@ -1336,7 +1338,8 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, auto &Ctx = M->getContext(); uint64_t MismatchedFuncSum = 0; Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord( - FuncInfo.FuncName, FuncInfo.FunctionHash, &MismatchedFuncSum); + FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName, + &MismatchedFuncSum); if (Error E = Result.takeError()) { handleInstrProfError(std::move(E), MismatchedFuncSum); return false; @@ -1381,7 +1384,8 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, void PGOUseFunc::populateCoverage(IndexedInstrProfReader *PGOReader) { uint64_t MismatchedFuncSum = 0; Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord( - FuncInfo.FuncName, FuncInfo.FunctionHash, &MismatchedFuncSum); + FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName, + &MismatchedFuncSum); if (auto Err = Result.takeError()) { handleInstrProfError(std::move(Err), MismatchedFuncSum); return; |