//===- CtxProfAnalysis.cpp - contextual profile analysis ------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Implementation of the contextual profile analysis, which maintains contextual // profiling info through IPO passes. // //===----------------------------------------------------------------------===// #include "llvm/Analysis/CtxProfAnalysis.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/CFG.h" #include "llvm/IR/Analysis.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/ProfileData/PGOCtxProfReader.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include #include #define DEBUG_TYPE "ctx_prof" using namespace llvm; cl::opt UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden, cl::desc("Use the specified contextual profile file")); static cl::opt PrintLevel( "ctx-profile-printer-level", cl::init(CtxProfAnalysisPrinterPass::PrintMode::YAML), cl::Hidden, cl::values(clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::Everything, "everything", "print everything - most verbose"), clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::YAML, "yaml", "just the yaml representation of the profile")), cl::desc("Verbosity level of the contextual profile printer pass.")); static cl::opt ForceIsInSpecializedModule( "ctx-profile-force-is-specialized", cl::init(false), cl::desc("Treat the given module as-if it were containing the " "post-thinlink module containing the root")); const char *AssignGUIDPass::GUIDMetadataName = "guid"; namespace llvm { class ProfileAnnotatorImpl final { friend class ProfileAnnotator; class BBInfo; struct EdgeInfo { BBInfo *const Src; BBInfo *const Dest; std::optional Count; explicit EdgeInfo(BBInfo &Src, BBInfo &Dest) : Src(&Src), Dest(&Dest) {} }; class BBInfo { std::optional Count; // OutEdges is dimensioned to match the number of terminator operands. // Entries in the vector match the index in the terminator operand list. In // some cases - see `shouldExcludeEdge` and its implementation - an entry // will be nullptr. // InEdges doesn't have the above constraint. SmallVector OutEdges; SmallVector InEdges; size_t UnknownCountOutEdges = 0; size_t UnknownCountInEdges = 0; // Pass AssumeAllKnown when we try to propagate counts from edges to BBs - // because all the edge counters must be known. // Return std::nullopt if there were no edges to sum. The user can decide // how to interpret that. std::optional getEdgeSum(const SmallVector &Edges, bool AssumeAllKnown) const { std::optional Sum; for (const auto *E : Edges) { // `Edges` may be `OutEdges`, case in which `E` could be nullptr. if (E) { if (!Sum.has_value()) Sum = 0; *Sum += (AssumeAllKnown ? *E->Count : E->Count.value_or(0U)); } } return Sum; } bool computeCountFrom(const SmallVector &Edges) { assert(!Count.has_value()); Count = getEdgeSum(Edges, true); return Count.has_value(); } void setSingleUnknownEdgeCount(SmallVector &Edges) { uint64_t KnownSum = getEdgeSum(Edges, false).value_or(0U); uint64_t EdgeVal = *Count > KnownSum ? *Count - KnownSum : 0U; EdgeInfo *E = nullptr; for (auto *I : Edges) if (I && !I->Count.has_value()) { E = I; #ifdef NDEBUG break; #else assert((!E || E == I) && "Expected exactly one edge to have an unknown count, " "found a second one"); continue; #endif } assert(E && "Expected exactly one edge to have an unknown count"); assert(!E->Count.has_value()); E->Count = EdgeVal; assert(E->Src->UnknownCountOutEdges > 0); assert(E->Dest->UnknownCountInEdges > 0); --E->Src->UnknownCountOutEdges; --E->Dest->UnknownCountInEdges; } public: BBInfo(size_t NumInEdges, size_t NumOutEdges, std::optional Count) : Count(Count) { // For in edges, we just want to pre-allocate enough space, since we know // it at this stage. For out edges, we will insert edges at the indices // corresponding to positions in this BB's terminator instruction, so we // construct a default (nullptr values)-initialized vector. A nullptr edge // corresponds to those that are excluded (see shouldExcludeEdge). InEdges.reserve(NumInEdges); OutEdges.resize(NumOutEdges); } bool tryTakeCountFromKnownOutEdges(const BasicBlock &BB) { if (!UnknownCountOutEdges) { return computeCountFrom(OutEdges); } return false; } bool tryTakeCountFromKnownInEdges(const BasicBlock &BB) { if (!UnknownCountInEdges) { return computeCountFrom(InEdges); } return false; } void addInEdge(EdgeInfo &Info) { InEdges.push_back(&Info); ++UnknownCountInEdges; } // For the out edges, we care about the position we place them in, which is // the position in terminator instruction's list (at construction). Later, // we build branch_weights metadata with edge frequency values matching // these positions. void addOutEdge(size_t Index, EdgeInfo &Info) { OutEdges[Index] = &Info; ++UnknownCountOutEdges; } bool hasCount() const { return Count.has_value(); } uint64_t getCount() const { return *Count; } bool trySetSingleUnknownInEdgeCount() { if (UnknownCountInEdges == 1) { setSingleUnknownEdgeCount(InEdges); return true; } return false; } bool trySetSingleUnknownOutEdgeCount() { if (UnknownCountOutEdges == 1) { setSingleUnknownEdgeCount(OutEdges); return true; } return false; } size_t getNumOutEdges() const { return OutEdges.size(); } uint64_t getEdgeCount(size_t Index) const { if (auto *E = OutEdges[Index]) return *E->Count; return 0U; } }; const Function &F; ArrayRef Counters; // To be accessed through getBBInfo() after construction. std::map BBInfos; std::vector EdgeInfos; // The only criteria for exclusion is faux suspend -> exit edges in presplit // coroutines. The API serves for readability, currently. bool shouldExcludeEdge(const BasicBlock &Src, const BasicBlock &Dest) const { return llvm::isPresplitCoroSuspendExitEdge(Src, Dest); } BBInfo &getBBInfo(const BasicBlock &BB) { return BBInfos.find(&BB)->second; } const BBInfo &getBBInfo(const BasicBlock &BB) const { return BBInfos.find(&BB)->second; } // validation function after we propagate the counters: all BBs and edges' // counters must have a value. bool allCountersAreAssigned() const { for (const auto &BBInfo : BBInfos) if (!BBInfo.second.hasCount()) return false; for (const auto &EdgeInfo : EdgeInfos) if (!EdgeInfo.Count.has_value()) return false; return true; } /// Check that all paths from the entry basic block that use edges with /// non-zero counts arrive at a basic block with no successors (i.e. "exit") bool allTakenPathsExit() const { std::deque Worklist; DenseSet Visited; Worklist.push_back(&F.getEntryBlock()); bool HitExit = false; while (!Worklist.empty()) { const auto *BB = Worklist.front(); Worklist.pop_front(); if (!Visited.insert(BB).second) continue; if (succ_size(BB) == 0) { if (isa(BB->getTerminator())) return false; HitExit = true; continue; } if (succ_size(BB) == 1) { Worklist.push_back(BB->getUniqueSuccessor()); continue; } const auto &BBInfo = getBBInfo(*BB); bool HasAWayOut = false; for (auto I = 0U; I < BB->getTerminator()->getNumSuccessors(); ++I) { const auto *Succ = BB->getTerminator()->getSuccessor(I); if (!shouldExcludeEdge(*BB, *Succ)) { if (BBInfo.getEdgeCount(I) > 0) { HasAWayOut = true; Worklist.push_back(Succ); } } } if (!HasAWayOut) return false; } return HitExit; } bool allNonColdSelectsHaveProfile() const { for (const auto &BB : F) { if (getBBInfo(BB).getCount() > 0) { for (const auto &I : BB) { if (const auto *SI = dyn_cast(&I)) { if (const auto *Inst = CtxProfAnalysis::getSelectInstrumentation( *const_cast(SI))) { auto Index = Inst->getIndex()->getZExtValue(); assert(Index < Counters.size()); if (Counters[Index] == 0) return false; } } } } } return true; } // This is an adaptation of PGOUseFunc::populateCounters. // FIXME(mtrofin): look into factoring the code to share one implementation. void propagateCounterValues() { bool KeepGoing = true; while (KeepGoing) { KeepGoing = false; for (const auto &BB : F) { auto &Info = getBBInfo(BB); if (!Info.hasCount()) KeepGoing |= Info.tryTakeCountFromKnownOutEdges(BB) || Info.tryTakeCountFromKnownInEdges(BB); if (Info.hasCount()) { KeepGoing |= Info.trySetSingleUnknownOutEdgeCount(); KeepGoing |= Info.trySetSingleUnknownInEdgeCount(); } } } assert(allCountersAreAssigned() && "[ctx-prof] Expected all counters have been assigned."); assert(allTakenPathsExit() && "[ctx-prof] Encountered a BB with more than one successor, where " "all outgoing edges have a 0 count. This occurs in non-exiting " "functions (message pumps, usually) which are not supported in the " "contextual profiling case"); assert(allNonColdSelectsHaveProfile() && "[ctx-prof] All non-cold select instructions were expected to have " "a profile."); } public: ProfileAnnotatorImpl(const Function &F, ArrayRef Counters) : F(F), Counters(Counters) { assert(!F.isDeclaration()); assert(!Counters.empty()); size_t NrEdges = 0; for (const auto &BB : F) { std::optional Count; if (auto *Ins = CtxProfAnalysis::getBBInstrumentation( const_cast(BB))) { auto Index = Ins->getIndex()->getZExtValue(); assert(Index < Counters.size() && "The index must be inside the counters vector by construction - " "tripping this assertion indicates a bug in how the contextual " "profile is managed by IPO transforms"); (void)Index; Count = Counters[Ins->getIndex()->getZExtValue()]; } else if (isa(BB.getTerminator())) { // The program presumably didn't crash. Count = 0; } auto [It, Ins] = BBInfos.insert({&BB, {pred_size(&BB), succ_size(&BB), Count}}); (void)Ins; assert(Ins && "We iterate through the function's BBs, no reason to " "insert one more than once"); NrEdges += llvm::count_if(successors(&BB), [&](const auto *Succ) { return !shouldExcludeEdge(BB, *Succ); }); } // Pre-allocate the vector, we want references to its contents to be stable. EdgeInfos.reserve(NrEdges); for (const auto &BB : F) { auto &Info = getBBInfo(BB); for (auto I = 0U; I < BB.getTerminator()->getNumSuccessors(); ++I) { const auto *Succ = BB.getTerminator()->getSuccessor(I); if (!shouldExcludeEdge(BB, *Succ)) { auto &EI = EdgeInfos.emplace_back(getBBInfo(BB), getBBInfo(*Succ)); Info.addOutEdge(I, EI); getBBInfo(*Succ).addInEdge(EI); } } } assert(EdgeInfos.capacity() == NrEdges && "The capacity of EdgeInfos should have stayed unchanged it was " "populated, because we need pointers to its contents to be stable"); propagateCounterValues(); } uint64_t getBBCount(const BasicBlock &BB) { return getBBInfo(BB).getCount(); } }; } // namespace llvm ProfileAnnotator::ProfileAnnotator(const Function &F, ArrayRef RawCounters) : PImpl(std::make_unique(F, RawCounters)) {} ProfileAnnotator::~ProfileAnnotator() = default; uint64_t ProfileAnnotator::getBBCount(const BasicBlock &BB) const { return PImpl->getBBCount(BB); } bool ProfileAnnotator::getSelectInstrProfile(SelectInst &SI, uint64_t &TrueCount, uint64_t &FalseCount) const { const auto &BBInfo = PImpl->getBBInfo(*SI.getParent()); TrueCount = FalseCount = 0; if (BBInfo.getCount() == 0) return false; auto *Step = CtxProfAnalysis::getSelectInstrumentation(SI); if (!Step) return false; auto Index = Step->getIndex()->getZExtValue(); assert(Index < PImpl->Counters.size() && "The index of the step instruction must be inside the " "counters vector by " "construction - tripping this assertion indicates a bug in " "how the contextual profile is managed by IPO transforms"); auto TotalCount = BBInfo.getCount(); TrueCount = PImpl->Counters[Index]; FalseCount = (TotalCount > TrueCount ? TotalCount - TrueCount : 0U); return true; } bool ProfileAnnotator::getOutgoingBranchWeights( BasicBlock &BB, SmallVectorImpl &Profile, uint64_t &MaxCount) const { Profile.clear(); if (succ_size(&BB) < 2) return false; auto *Term = BB.getTerminator(); Profile.resize(Term->getNumSuccessors()); const auto &BBInfo = PImpl->getBBInfo(BB); MaxCount = 0; for (unsigned SuccIdx = 0, Size = BBInfo.getNumOutEdges(); SuccIdx < Size; ++SuccIdx) { uint64_t EdgeCount = BBInfo.getEdgeCount(SuccIdx); if (EdgeCount > MaxCount) MaxCount = EdgeCount; Profile[SuccIdx] = EdgeCount; } return MaxCount > 0; } PreservedAnalyses AssignGUIDPass::run(Module &M, ModuleAnalysisManager &MAM) { for (auto &F : M.functions()) { if (F.isDeclaration()) continue; if (F.getMetadata(GUIDMetadataName)) continue; const GlobalValue::GUID GUID = F.getGUID(); F.setMetadata(GUIDMetadataName, MDNode::get(M.getContext(), {ConstantAsMetadata::get(ConstantInt::get( Type::getInt64Ty(M.getContext()), GUID))})); } return PreservedAnalyses::none(); } GlobalValue::GUID AssignGUIDPass::getGUID(const Function &F) { if (F.isDeclaration()) { assert(GlobalValue::isExternalLinkage(F.getLinkage())); return F.getGUID(); } auto *MD = F.getMetadata(GUIDMetadataName); assert(MD && "guid not found for defined function"); return cast(cast(MD->getOperand(0)) ->getValue() ->stripPointerCasts()) ->getZExtValue(); } AnalysisKey CtxProfAnalysis::Key; CtxProfAnalysis::CtxProfAnalysis(std::optional Profile) : Profile([&]() -> std::optional { if (Profile) return *Profile; if (UseCtxProfile.getNumOccurrences()) return UseCtxProfile; return std::nullopt; }()) {} PGOContextualProfile CtxProfAnalysis::run(Module &M, ModuleAnalysisManager &MAM) { if (!Profile) return {}; ErrorOr> MB = MemoryBuffer::getFile(*Profile); if (auto EC = MB.getError()) { M.getContext().emitError("could not open contextual profile file: " + EC.message()); return {}; } PGOCtxProfileReader Reader(MB.get()->getBuffer()); auto MaybeProfiles = Reader.loadProfiles(); if (!MaybeProfiles) { M.getContext().emitError("contextual profile file is invalid: " + toString(MaybeProfiles.takeError())); return {}; } // FIXME: We should drive this from ThinLTO, but for the time being, use the // module name as indicator. // We want to *only* keep the contextual profiles in modules that capture // context trees. That allows us to compute specific PSIs, for example. auto DetermineRootsInModule = [&M]() -> const DenseSet { DenseSet ProfileRootsInModule; auto ModName = M.getName(); auto Filename = sys::path::filename(ModName); // Drop the file extension. Filename = Filename.substr(0, Filename.find_last_of('.')); // See if it parses APInt Guid; // getAsInteger returns true if there are more chars to read other than the // integer. So the "false" test is what we want. if (!Filename.getAsInteger(0, Guid)) ProfileRootsInModule.insert(Guid.getZExtValue()); return ProfileRootsInModule; }; const auto ProfileRootsInModule = DetermineRootsInModule(); PGOContextualProfile Result; // the logic from here on allows for modules that contain - by design - more // than one root. We currently don't support that, because the determination // happens based on the module name matching the root guid, but the logic can // avoid assuming that. if (!ProfileRootsInModule.empty()) { Result.IsInSpecializedModule = true; // Trim first the roots that aren't in this module. for (auto &[RootGuid, _] : llvm::make_early_inc_range(MaybeProfiles->Contexts)) if (!ProfileRootsInModule.contains(RootGuid)) MaybeProfiles->Contexts.erase(RootGuid); // we can also drop the flat profiles MaybeProfiles->FlatProfiles.clear(); } for (const auto &F : M) { if (F.isDeclaration()) continue; auto GUID = AssignGUIDPass::getGUID(F); assert(GUID && "guid not found for defined function"); const auto &Entry = F.begin(); uint32_t MaxCounters = 0; // we expect at least a counter. for (const auto &I : *Entry) if (auto *C = dyn_cast(&I)) { MaxCounters = static_cast(C->getNumCounters()->getZExtValue()); break; } if (!MaxCounters) continue; uint32_t MaxCallsites = 0; for (const auto &BB : F) for (const auto &I : BB) if (auto *C = dyn_cast(&I)) { MaxCallsites = static_cast(C->getNumCounters()->getZExtValue()); break; } auto [It, Ins] = Result.FuncInfo.insert( {GUID, PGOContextualProfile::FunctionInfo(F.getName())}); (void)Ins; assert(Ins); It->second.NextCallsiteIndex = MaxCallsites; It->second.NextCounterIndex = MaxCounters; } // If we made it this far, the Result is valid - which we mark by setting // .Profiles. Result.Profiles = std::move(*MaybeProfiles); Result.initIndex(); return Result; } GlobalValue::GUID PGOContextualProfile::getDefinedFunctionGUID(const Function &F) const { if (auto It = FuncInfo.find(AssignGUIDPass::getGUID(F)); It != FuncInfo.end()) return It->first; return 0; } CtxProfAnalysisPrinterPass::CtxProfAnalysisPrinterPass(raw_ostream &OS) : OS(OS), Mode(PrintLevel) {} PreservedAnalyses CtxProfAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &MAM) { CtxProfAnalysis::Result &C = MAM.getResult(M); if (C.contexts().empty()) { OS << "No contextual profile was provided.\n"; return PreservedAnalyses::all(); } if (Mode == PrintMode::Everything) { OS << "Function Info:\n"; for (const auto &[Guid, FuncInfo] : C.FuncInfo) OS << Guid << " : " << FuncInfo.Name << ". MaxCounterID: " << FuncInfo.NextCounterIndex << ". MaxCallsiteID: " << FuncInfo.NextCallsiteIndex << "\n"; } if (Mode == PrintMode::Everything) OS << "\nCurrent Profile:\n"; convertCtxProfToYaml(OS, C.profiles()); OS << "\n"; if (Mode == PrintMode::YAML) return PreservedAnalyses::all(); OS << "\nFlat Profile:\n"; auto Flat = C.flatten(); for (const auto &[Guid, Counters] : Flat) { OS << Guid << " : "; for (auto V : Counters) OS << V << " "; OS << "\n"; } return PreservedAnalyses::all(); } InstrProfCallsite *CtxProfAnalysis::getCallsiteInstrumentation(CallBase &CB) { if (!InstrProfCallsite::canInstrumentCallsite(CB)) return nullptr; for (auto *Prev = CB.getPrevNode(); Prev; Prev = Prev->getPrevNode()) { if (auto *IPC = dyn_cast(Prev)) return IPC; assert(!isa(Prev) && "didn't expect to find another call, that's not the callsite " "instrumentation, before an instrumentable callsite"); } return nullptr; } InstrProfIncrementInst *CtxProfAnalysis::getBBInstrumentation(BasicBlock &BB) { for (auto &I : BB) if (auto *Incr = dyn_cast(&I)) if (!isa(&I)) return Incr; return nullptr; } InstrProfIncrementInstStep * CtxProfAnalysis::getSelectInstrumentation(SelectInst &SI) { Instruction *Prev = &SI; while ((Prev = Prev->getPrevNode())) if (auto *Step = dyn_cast(Prev)) return Step; return nullptr; } template static void preorderVisitOneRoot(ProfTy &Profile, function_ref Visitor) { std::function Traverser = [&](auto &Ctx) { Visitor(Ctx); for (auto &[_, SubCtxSet] : Ctx.callsites()) for (auto &[__, Subctx] : SubCtxSet) Traverser(Subctx); }; Traverser(Profile); } template static void preorderVisit(ProfilesTy &Profiles, function_ref Visitor) { for (auto &[_, P] : Profiles) preorderVisitOneRoot(P, Visitor); } void PGOContextualProfile::initIndex() { // Initialize the head of the index list for each function. We don't need it // after this point. DenseMap InsertionPoints; for (auto &[Guid, FI] : FuncInfo) InsertionPoints[Guid] = &FI.Index; preorderVisit( Profiles.Contexts, [&](PGOCtxProfContext &Ctx) { auto InsertIt = InsertionPoints.find(Ctx.guid()); if (InsertIt == InsertionPoints.end()) return; // Insert at the end of the list. Since we traverse in preorder, it // means that when we iterate the list from the beginning, we'd // encounter the contexts in the order we would have, should we have // performed a full preorder traversal. InsertIt->second->Next = &Ctx; Ctx.Previous = InsertIt->second; InsertIt->second = &Ctx; }); } bool PGOContextualProfile::isInSpecializedModule() const { return ForceIsInSpecializedModule.getNumOccurrences() > 0 ? ForceIsInSpecializedModule : IsInSpecializedModule; } void PGOContextualProfile::update(Visitor V, const Function &F) { assert(isFunctionKnown(F)); GlobalValue::GUID G = getDefinedFunctionGUID(F); for (auto *Node = FuncInfo.find(G)->second.Index.Next; Node; Node = Node->Next) V(*reinterpret_cast(Node)); } void PGOContextualProfile::visit(ConstVisitor V, const Function *F) const { if (!F) return preorderVisit(Profiles.Contexts, V); assert(isFunctionKnown(*F)); GlobalValue::GUID G = getDefinedFunctionGUID(*F); for (const auto *Node = FuncInfo.find(G)->second.Index.Next; Node; Node = Node->Next) V(*reinterpret_cast(Node)); } const CtxProfFlatProfile PGOContextualProfile::flatten() const { CtxProfFlatProfile Flat; auto Accummulate = [](SmallVectorImpl &Into, const SmallVectorImpl &From, uint64_t SamplingRate) { if (Into.empty()) Into.resize(From.size()); assert(Into.size() == From.size() && "All contexts corresponding to a function should have the exact " "same number of counters."); for (size_t I = 0, E = Into.size(); I < E; ++I) Into[I] += From[I] * SamplingRate; }; for (const auto &[_, CtxRoot] : Profiles.Contexts) { const uint64_t SamplingFactor = CtxRoot.getTotalRootEntryCount(); preorderVisitOneRoot( CtxRoot, [&](const PGOCtxProfContext &Ctx) { Accummulate(Flat[Ctx.guid()], Ctx.counters(), SamplingFactor); }); for (const auto &[G, Unh] : CtxRoot.getUnhandled()) Accummulate(Flat[G], Unh, SamplingFactor); } // We don't sample "Flat" currently, so sampling rate is 1. for (const auto &[G, FC] : Profiles.FlatProfiles) Accummulate(Flat[G], FC, /*SamplingRate=*/1); return Flat; } const CtxProfFlatIndirectCallProfile PGOContextualProfile::flattenVirtCalls() const { CtxProfFlatIndirectCallProfile Ret; for (const auto &[_, CtxRoot] : Profiles.Contexts) { const uint64_t TotalRootEntryCount = CtxRoot.getTotalRootEntryCount(); preorderVisitOneRoot( CtxRoot, [&](const PGOCtxProfContext &Ctx) { auto &Targets = Ret[Ctx.guid()]; for (const auto &[ID, SubctxSet] : Ctx.callsites()) for (const auto &Subctx : SubctxSet) Targets[ID][Subctx.first] += Subctx.second.getEntrycount() * TotalRootEntryCount; }); } return Ret; } void CtxProfAnalysis::collectIndirectCallPromotionList( CallBase &IC, Result &Profile, SetVector> &Candidates) { const auto *Instr = CtxProfAnalysis::getCallsiteInstrumentation(IC); if (!Instr) return; Module &M = *IC.getParent()->getModule(); const uint32_t CallID = Instr->getIndex()->getZExtValue(); Profile.visit( [&](const PGOCtxProfContext &Ctx) { const auto &Targets = Ctx.callsites().find(CallID); if (Targets == Ctx.callsites().end()) return; for (const auto &[Guid, _] : Targets->second) if (auto Name = Profile.getFunctionName(Guid); !Name.empty()) if (auto *Target = M.getFunction(Name)) if (Target->hasFnAttribute(Attribute::AlwaysInline)) Candidates.insert({&IC, Target}); }, IC.getCaller()); }