diff options
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/include/llvm/ProfileData/InstrProf.h | 4 | ||||
-rw-r--r-- | llvm/include/llvm/ProfileData/InstrProfData.inc | 5 | ||||
-rw-r--r-- | llvm/include/llvm/ProfileData/InstrProfReader.h | 21 | ||||
-rw-r--r-- | llvm/include/llvm/ProfileData/InstrProfWriter.h | 4 | ||||
-rw-r--r-- | llvm/include/llvm/Transforms/Instrumentation/CFGMST.h | 41 | ||||
-rw-r--r-- | llvm/lib/ProfileData/InstrProfReader.cpp | 5 | ||||
-rw-r--r-- | llvm/lib/ProfileData/InstrProfWriter.cpp | 7 | ||||
-rw-r--r-- | llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp | 72 | ||||
-rw-r--r-- | llvm/test/Transforms/PGOProfile/loop_entries_gen.ll | 58 | ||||
-rw-r--r-- | llvm/test/Transforms/PGOProfile/loop_entries_use.ll | 106 | ||||
-rw-r--r-- | llvm/tools/llvm-profdata/llvm-profdata.cpp | 4 |
12 files changed, 301 insertions, 29 deletions
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index c5f7800..7133c0c 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -344,7 +344,9 @@ enum class InstrProfKind { MemProf = 0x40, // A temporal profile. TemporalProfile = 0x80, - LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/TemporalProfile) + // A profile with loop entry basic blocks instrumentation. + LoopEntriesInstrumentation = 0x100, + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/LoopEntriesInstrumentation) }; const std::error_category &instrprof_category(); diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc index c66b046..39613da 100644 --- a/llvm/include/llvm/ProfileData/InstrProfData.inc +++ b/llvm/include/llvm/ProfileData/InstrProfData.inc @@ -730,10 +730,12 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, #define INSTR_PROF_COVMAP_VERSION 6 /* Profile version is always of type uint64_t. Reserve the upper 32 bits in the - * version for other variants of profile. We set the 8th most significant bit + * version for other variants of profile. We set the 8th most significant bit * (i.e. bit 56) to 1 to indicate if this is an IR-level instrumentation * generated profile, and 0 if this is a Clang FE generated profile. * 1 in bit 57 indicates there are context-sensitive records in the profile. + * The 54th bit indicates whether to always instrument loop entry blocks. + * The 58th bit indicates whether to always instrument function entry blocks. * The 59th bit indicates whether to use debug info to correlate profiles. * The 60th bit indicates single byte coverage instrumentation. * The 61st bit indicates function entry instrumentation only. @@ -742,6 +744,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, */ #define VARIANT_MASKS_ALL 0xffffffff00000000ULL #define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL) +#define VARIANT_MASK_INSTR_LOOP_ENTRIES (0x1ULL << 55) #define VARIANT_MASK_IR_PROF (0x1ULL << 56) #define VARIANT_MASK_CSIR_PROF (0x1ULL << 57) #define VARIANT_MASK_INSTR_ENTRY (0x1ULL << 58) diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h index 1fad234..330cf54 100644 --- a/llvm/include/llvm/ProfileData/InstrProfReader.h +++ b/llvm/include/llvm/ProfileData/InstrProfReader.h @@ -123,6 +123,9 @@ public: virtual bool instrEntryBBEnabled() const = 0; + /// Return true if the profile instruments all loop entries. + virtual bool instrLoopEntriesEnabled() const = 0; + /// Return true if the profile has single byte counters representing coverage. virtual bool hasSingleByteCoverage() const = 0; @@ -274,6 +277,11 @@ public: InstrProfKind::FunctionEntryInstrumentation); } + bool instrLoopEntriesEnabled() const override { + return static_cast<bool>(ProfileKind & + InstrProfKind::LoopEntriesInstrumentation); + } + bool hasSingleByteCoverage() const override { return static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage); } @@ -398,6 +406,10 @@ public: return (Version & VARIANT_MASK_INSTR_ENTRY) != 0; } + bool instrLoopEntriesEnabled() const override { + return (Version & VARIANT_MASK_INSTR_LOOP_ENTRIES) != 0; + } + bool hasSingleByteCoverage() const override { return (Version & VARIANT_MASK_BYTE_COVERAGE) != 0; } @@ -564,6 +576,7 @@ struct InstrProfReaderIndexBase { virtual bool isIRLevelProfile() const = 0; virtual bool hasCSIRLevelProfile() const = 0; virtual bool instrEntryBBEnabled() const = 0; + virtual bool instrLoopEntriesEnabled() const = 0; virtual bool hasSingleByteCoverage() const = 0; virtual bool functionEntryOnly() const = 0; virtual bool hasMemoryProfile() const = 0; @@ -628,6 +641,10 @@ public: return (FormatVersion & VARIANT_MASK_INSTR_ENTRY) != 0; } + bool instrLoopEntriesEnabled() const override { + return (FormatVersion & VARIANT_MASK_INSTR_LOOP_ENTRIES) != 0; + } + bool hasSingleByteCoverage() const override { return (FormatVersion & VARIANT_MASK_BYTE_COVERAGE) != 0; } @@ -753,6 +770,10 @@ public: return Index->instrEntryBBEnabled(); } + bool instrLoopEntriesEnabled() const override { + return Index->instrLoopEntriesEnabled(); + } + bool hasSingleByteCoverage() const override { return Index->hasSingleByteCoverage(); } diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h index fa30926..fdb51c4 100644 --- a/llvm/include/llvm/ProfileData/InstrProfWriter.h +++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h @@ -190,7 +190,9 @@ public: return make_error<InstrProfError>(instrprof_error::unsupported_version); } if (testIncompatible(InstrProfKind::FunctionEntryOnly, - InstrProfKind::FunctionEntryInstrumentation)) { + InstrProfKind::FunctionEntryInstrumentation) || + testIncompatible(InstrProfKind::FunctionEntryOnly, + InstrProfKind::LoopEntriesInstrumentation)) { return make_error<InstrProfError>( instrprof_error::unsupported_version, "cannot merge FunctionEntryOnly profiles and BB profiles together"); diff --git a/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h b/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h index 35b3d61..f6bf045 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h +++ b/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h @@ -19,6 +19,7 @@ #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/BranchProbability.h" @@ -52,10 +53,14 @@ template <class Edge, class BBInfo> class CFGMST { BranchProbabilityInfo *const BPI; BlockFrequencyInfo *const BFI; + LoopInfo *const LI; // If function entry will be always instrumented. const bool InstrumentFuncEntry; + // If true loop entries will be always instrumented. + const bool InstrumentLoopEntries; + // Find the root group of the G and compress the path from G to the root. BBInfo *findAndCompressGroup(BBInfo *G) { if (G->Group != G) @@ -154,6 +159,16 @@ template <class Edge, class BBInfo> class CFGMST { } if (BPI != nullptr) Weight = BPI->getEdgeProbability(&BB, TargetBB).scale(scaleFactor); + // If InstrumentLoopEntries is on and the current edge leads to a loop + // (i.e., TargetBB is a loop head and BB is outside its loop), set + // Weight to be minimal, so that the edge won't be chosen for the MST + // and will be instrumented. + if (InstrumentLoopEntries && LI->isLoopHeader(TargetBB)) { + Loop *TargetLoop = LI->getLoopFor(TargetBB); + assert(TargetLoop); + if (!TargetLoop->contains(&BB)) + Weight = 0; + } if (Weight == 0) Weight++; auto *E = &addEdge(&BB, TargetBB, Weight); @@ -252,6 +267,19 @@ template <class Edge, class BBInfo> class CFGMST { } } + [[maybe_unused]] bool validateLoopEntryInstrumentation() { + if (!InstrumentLoopEntries) + return true; + for (auto &Ei : AllEdges) { + if (Ei->Removed) + continue; + if (Ei->DestBB && LI->isLoopHeader(Ei->DestBB) && + !LI->getLoopFor(Ei->DestBB)->contains(Ei->SrcBB) && Ei->InMST) + return false; + } + return true; + } + public: // Dump the Debug information about the instrumentation. void dumpEdges(raw_ostream &OS, const Twine &Message) const { @@ -291,13 +319,20 @@ public: return *AllEdges.back(); } - CFGMST(Function &Func, bool InstrumentFuncEntry, + CFGMST(Function &Func, bool InstrumentFuncEntry, bool InstrumentLoopEntries, BranchProbabilityInfo *BPI = nullptr, - BlockFrequencyInfo *BFI = nullptr) - : F(Func), BPI(BPI), BFI(BFI), InstrumentFuncEntry(InstrumentFuncEntry) { + BlockFrequencyInfo *BFI = nullptr, LoopInfo *LI = nullptr) + : F(Func), BPI(BPI), BFI(BFI), LI(LI), + InstrumentFuncEntry(InstrumentFuncEntry), + InstrumentLoopEntries(InstrumentLoopEntries) { + assert(!(InstrumentLoopEntries && !LI) && + "expected a LoopInfo to instrumenting loop entries"); buildEdges(); sortEdgesByWeight(); computeMinimumSpanningTree(); + assert(validateLoopEntryInstrumentation() && + "Loop entries should not be in MST when " + "InstrumentLoopEntries is on"); if (AllEdges.size() > 1 && InstrumentFuncEntry) std::iter_swap(std::move(AllEdges.begin()), std::move(AllEdges.begin() + AllEdges.size() - 1)); diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index 7663852..dad79b2 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -52,6 +52,9 @@ static InstrProfKind getProfileKindFromVersion(uint64_t Version) { if (Version & VARIANT_MASK_INSTR_ENTRY) { ProfileKind |= InstrProfKind::FunctionEntryInstrumentation; } + if (Version & VARIANT_MASK_INSTR_LOOP_ENTRIES) { + ProfileKind |= InstrProfKind::LoopEntriesInstrumentation; + } if (Version & VARIANT_MASK_BYTE_COVERAGE) { ProfileKind |= InstrProfKind::SingleByteCoverage; } @@ -262,6 +265,8 @@ Error TextInstrProfReader::readHeader() { ProfileKind |= InstrProfKind::FunctionEntryInstrumentation; else if (Str.equals_insensitive("not_entry_first")) ProfileKind &= ~InstrProfKind::FunctionEntryInstrumentation; + else if (Str.equals_insensitive("instrument_loop_entries")) + ProfileKind |= InstrProfKind::LoopEntriesInstrumentation; else if (Str.equals_insensitive("single_byte_coverage")) ProfileKind |= InstrProfKind::SingleByteCoverage; else if (Str.equals_insensitive("temporal_prof_traces")) { diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index d8ab18d..64625de 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -877,6 +877,9 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { if (static_cast<bool>(ProfileKind & InstrProfKind::FunctionEntryInstrumentation)) Header.Version |= VARIANT_MASK_INSTR_ENTRY; + if (static_cast<bool>(ProfileKind & + InstrProfKind::LoopEntriesInstrumentation)) + Header.Version |= VARIANT_MASK_INSTR_LOOP_ENTRIES; if (static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage)) Header.Version |= VARIANT_MASK_BYTE_COVERAGE; if (static_cast<bool>(ProfileKind & InstrProfKind::FunctionEntryOnly)) @@ -1120,6 +1123,10 @@ Error InstrProfWriter::writeText(raw_fd_ostream &OS) { if (static_cast<bool>(ProfileKind & InstrProfKind::FunctionEntryInstrumentation)) OS << "# Always instrument the function entry block\n:entry_first\n"; + if (static_cast<bool>(ProfileKind & + InstrProfKind::LoopEntriesInstrumentation)) + OS << "# Always instrument the loop entry " + "blocks\n:instrument_loop_entries\n"; if (static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage)) OS << "# Instrument block coverage\n:single_byte_coverage\n"; InstrProfSymtab Symtab; diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 2ea89be..f9be7f9 100644 --- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -820,7 +820,8 @@ bool GCOVProfiler::emitProfileNotes( SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI); - CFGMST<Edge, BBInfo> MST(F, /*InstrumentFuncEntry_=*/false, BPI, BFI); + CFGMST<Edge, BBInfo> MST(F, /*InstrumentFuncEntry=*/false, + /*InstrumentLoopEntries=*/false, BPI, BFI); // getInstrBB can split basic blocks and push elements to AllEdges. for (size_t I : llvm::seq<size_t>(0, MST.numEdges())) { diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 4d81414..471086c 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -259,6 +259,11 @@ static cl::opt<bool> PGOInstrumentEntry( "pgo-instrument-entry", cl::init(false), cl::Hidden, cl::desc("Force to instrument function entry basicblock.")); +static cl::opt<bool> + PGOInstrumentLoopEntries("pgo-instrument-loop-entries", cl::init(false), + cl::Hidden, + cl::desc("Force to instrument loop entries.")); + static cl::opt<bool> PGOFunctionEntryCoverage( "pgo-function-entry-coverage", cl::Hidden, cl::desc( @@ -359,6 +364,7 @@ class FunctionInstrumenter final { std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; BranchProbabilityInfo *const BPI; BlockFrequencyInfo *const BFI; + LoopInfo *const LI; const PGOInstrumentationType InstrumentationType; @@ -376,14 +382,17 @@ class FunctionInstrumenter final { InstrumentationType == PGOInstrumentationType::CTXPROF; } + bool shouldInstrumentLoopEntries() const { return PGOInstrumentLoopEntries; } + public: FunctionInstrumenter( Module &M, Function &F, TargetLibraryInfo &TLI, std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, BranchProbabilityInfo *BPI = nullptr, BlockFrequencyInfo *BFI = nullptr, + LoopInfo *LI = nullptr, PGOInstrumentationType InstrumentationType = PGOInstrumentationType::FDO) : M(M), F(F), TLI(TLI), ComdatMembers(ComdatMembers), BPI(BPI), BFI(BFI), - InstrumentationType(InstrumentationType) {} + LI(LI), InstrumentationType(InstrumentationType) {} void instrument(); }; @@ -439,6 +448,8 @@ createIRLevelProfileFlagVar(Module &M, if (PGOInstrumentEntry || InstrumentationType == PGOInstrumentationType::CTXPROF) ProfileVersion |= VARIANT_MASK_INSTR_ENTRY; + if (PGOInstrumentLoopEntries) + ProfileVersion |= VARIANT_MASK_INSTR_LOOP_ENTRIES; if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) ProfileVersion |= VARIANT_MASK_DBG_CORRELATE; if (PGOFunctionEntryCoverage) @@ -625,12 +636,13 @@ public: Function &Func, TargetLibraryInfo &TLI, std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, - BlockFrequencyInfo *BFI = nullptr, bool IsCS = false, - bool InstrumentFuncEntry = true, bool HasSingleByteCoverage = false) + BlockFrequencyInfo *BFI = nullptr, LoopInfo *LI = nullptr, + bool IsCS = false, bool InstrumentFuncEntry = true, + bool InstrumentLoopEntries = false, bool HasSingleByteCoverage = false) : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI), TLI(TLI), ValueSites(IPVK_Last + 1), SIVisitor(Func, HasSingleByteCoverage), - MST(F, InstrumentFuncEntry, BPI, BFI), + MST(F, InstrumentFuncEntry, InstrumentLoopEntries, BPI, BFI, LI), BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) { if (BCI && PGOViewBlockCoverageGraph) BCI->viewBlockCoverageGraph(); @@ -916,9 +928,10 @@ void FunctionInstrumenter::instrument() { const bool IsCtxProf = InstrumentationType == PGOInstrumentationType::CTXPROF; FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo( - F, TLI, ComdatMembers, /*CreateGlobalVar=*/!IsCtxProf, BPI, BFI, + F, TLI, ComdatMembers, /*CreateGlobalVar=*/!IsCtxProf, BPI, BFI, LI, InstrumentationType == PGOInstrumentationType::CSFDO, - shouldInstrumentEntryBB(), PGOBlockCoverage); + shouldInstrumentEntryBB(), shouldInstrumentLoopEntries(), + PGOBlockCoverage); auto *const Name = IsCtxProf ? cast<GlobalValue>(&F) : FuncInfo.FuncNameVar; auto *const CFGHash = @@ -1136,11 +1149,13 @@ public: PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI, std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin, - ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry, + LoopInfo *LI, ProfileSummaryInfo *PSI, bool IsCS, + bool InstrumentFuncEntry, bool InstrumentLoopEntries, bool HasSingleByteCoverage) : F(Func), M(Modu), BFI(BFIin), PSI(PSI), - FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS, - InstrumentFuncEntry, HasSingleByteCoverage), + FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, LI, IsCS, + InstrumentFuncEntry, InstrumentLoopEntries, + HasSingleByteCoverage), FreqAttr(FFA_Normal), IsCS(IsCS), VPC(Func, TLI) {} void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum); @@ -1923,6 +1938,7 @@ static bool InstrumentAllFunctions( Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI, function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, + function_ref<LoopInfo *(Function &)> LookupLI, PGOInstrumentationType InstrumentationType) { // For the context-sensitve instrumentation, we should have a separated pass // (before LTO/ThinLTO linking) to create these variables. @@ -1943,10 +1959,11 @@ static bool InstrumentAllFunctions( for (auto &F : M) { if (skipPGOGen(F)) continue; - auto &TLI = LookupTLI(F); - auto *BPI = LookupBPI(F); - auto *BFI = LookupBFI(F); - FunctionInstrumenter FI(M, F, TLI, ComdatMembers, BPI, BFI, + TargetLibraryInfo &TLI = LookupTLI(F); + BranchProbabilityInfo *BPI = LookupBPI(F); + BlockFrequencyInfo *BFI = LookupBFI(F); + LoopInfo *LI = LookupLI(F); + FunctionInstrumenter FI(M, F, TLI, ComdatMembers, BPI, BFI, LI, InstrumentationType); FI.instrument(); } @@ -1980,8 +1997,11 @@ PreservedAnalyses PGOInstrumentationGen::run(Module &M, auto LookupBFI = [&FAM](Function &F) { return &FAM.getResult<BlockFrequencyAnalysis>(F); }; + auto LookupLI = [&FAM](Function &F) { + return &FAM.getResult<LoopAnalysis>(F); + }; - if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, + if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, LookupLI, InstrumentationType)) return PreservedAnalyses::all(); @@ -2116,7 +2136,8 @@ static bool annotateAllFunctions( function_ref<TargetLibraryInfo &(Function &)> LookupTLI, function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, - ProfileSummaryInfo *PSI, bool IsCS) { + function_ref<LoopInfo *(Function &)> LookupLI, ProfileSummaryInfo *PSI, + bool IsCS) { LLVM_DEBUG(dbgs() << "Read in profile counters: "); auto &Ctx = M.getContext(); // Read the counter array from file. @@ -2181,22 +2202,27 @@ static bool annotateAllFunctions( bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled(); if (PGOInstrumentEntry.getNumOccurrences() > 0) InstrumentFuncEntry = PGOInstrumentEntry; + bool InstrumentLoopEntries = PGOReader->instrLoopEntriesEnabled(); + if (PGOInstrumentLoopEntries.getNumOccurrences() > 0) + InstrumentLoopEntries = PGOInstrumentLoopEntries; bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage(); for (auto &F : M) { if (skipPGOUse(F)) continue; - auto &TLI = LookupTLI(F); - auto *BPI = LookupBPI(F); - auto *BFI = LookupBFI(F); + TargetLibraryInfo &TLI = LookupTLI(F); + BranchProbabilityInfo *BPI = LookupBPI(F); + BlockFrequencyInfo *BFI = LookupBFI(F); + LoopInfo *LI = LookupLI(F); if (!HasSingleByteCoverage) { // Split indirectbr critical edges here before computing the MST rather // than later in getInstrBB() to avoid invalidating it. SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI); } - PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS, - InstrumentFuncEntry, HasSingleByteCoverage); + PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, LI, PSI, IsCS, + InstrumentFuncEntry, InstrumentLoopEntries, + HasSingleByteCoverage); if (HasSingleByteCoverage) { Func.populateCoverage(PGOReader.get()); continue; @@ -2335,10 +2361,14 @@ PreservedAnalyses PGOInstrumentationUse::run(Module &M, auto LookupBFI = [&FAM](Function &F) { return &FAM.getResult<BlockFrequencyAnalysis>(F); }; + auto LookupLI = [&FAM](Function &F) { + return &FAM.getResult<LoopAnalysis>(F); + }; auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M); if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS, - LookupTLI, LookupBPI, LookupBFI, PSI, IsCS)) + LookupTLI, LookupBPI, LookupBFI, LookupLI, PSI, + IsCS)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); diff --git a/llvm/test/Transforms/PGOProfile/loop_entries_gen.ll b/llvm/test/Transforms/PGOProfile/loop_entries_gen.ll new file mode 100644 index 0000000..ed10127 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/loop_entries_gen.ll @@ -0,0 +1,58 @@ +; RUN: opt %s -passes=pgo-instr-gen -S | FileCheck %s --check-prefixes=CHECK,NOTLOOPENTRIES --implicit-check-not=@llvm.instrprof.increment +; RUN: opt %s -passes=pgo-instr-gen -pgo-instrument-loop-entries -S | FileCheck %s --check-prefixes=CHECK,LOOPENTRIES --implicit-check-not=@llvm.instrprof.increment +; RUN: opt %s -passes=pgo-instr-gen -pgo-instrument-entry -S | FileCheck %s --check-prefixes=CHECK,FUNCTIONENTRY --implicit-check-not=@llvm.instrprof.increment + +; CHECK: $__llvm_profile_raw_version = comdat any +; CHECK: @__llvm_profile_raw_version = hidden constant i64 {{[0-9]+}}, comdat +; CHECK: @__profn_test_simple_for_with_bypass = private constant [27 x i8] c"test_simple_for_with_bypass" + +define i32 @test_simple_for_with_bypass(i32 %n) { +entry: +; CHECK: entry: +; NOTLOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 1) +; LOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 1) +; FUNCTIONENTRY: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 0) + %mask = and i32 %n, 65535 + %skip = icmp eq i32 %mask, 0 + br i1 %skip, label %end, label %for.entry + +for.entry: +; CHECK: for.entry: +; LOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 2) + br label %for.cond + +for.cond: +; CHECK: for.cond: + %i = phi i32 [ 0, %for.entry ], [ %inc1, %for.inc ] + %sum = phi i32 [ 1, %for.entry ], [ %inc, %for.inc ] + %cmp = icmp slt i32 %i, %n + br i1 %cmp, label %for.body, label %for.end, !prof !1 + +for.body: +; CHECK: for.body: + %inc = add nsw i32 %sum, 1 + br label %for.inc + +for.inc: +; CHECK: for.inc: +; NOTLOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 0) +; LOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 0) +; FUNCTIONENTRY: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 1) + %inc1 = add nsw i32 %i, 1 + br label %for.cond + +for.end: +; CHECK: for.end: +; NOTLOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 2) +; FUNCTIONENTRY: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 2) + br label %end + +end: +; CHECK: end: + %final_sum = phi i32 [ %sum, %for.end ], [ 0, %entry ] + ret i32 %final_sum +} + +; CHECK: declare void @llvm.instrprof.increment(ptr, i64, i32, i32) #0 + +!1 = !{!"branch_weights", i32 100000, i32 80} diff --git a/llvm/test/Transforms/PGOProfile/loop_entries_use.ll b/llvm/test/Transforms/PGOProfile/loop_entries_use.ll new file mode 100644 index 0000000..616ecba --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/loop_entries_use.ll @@ -0,0 +1,106 @@ +; RUN: rm -rf %t && split-file %s %t + +; RUN: llvm-profdata merge %t/default.proftext -o %t/default.profdata +; RUN: opt %t/main.ll -passes=pgo-instr-use -pgo-test-profile-file=%t/default.profdata -S | FileCheck %s +; RUN: llvm-profdata merge %t/loop_entries.proftext -o %t/loop_entries.profdata +; RUN: opt %t/main.ll -passes=pgo-instr-use -pgo-test-profile-file=%t/loop_entries.profdata -S | FileCheck %s +; RUN: llvm-profdata merge %t/function_entry.proftext -o %t/function_entry.profdata +; RUN: opt %t/main.ll -passes=pgo-instr-use -pgo-test-profile-file=%t/function_entry.profdata -S | FileCheck %s + +;--- main.ll + +define i32 @test_simple_for_with_bypass(i32 %n) { +; CHECK: define i32 @test_simple_for_with_bypass(i32 %n) +; CHECK-SAME: !prof ![[ENTRY_COUNT:[0-9]*]] +entry: +; CHECK: entry: + %mask = and i32 %n, 65535 + %skip = icmp eq i32 %mask, 0 + br i1 %skip, label %end, label %for.entry +; CHECK: br i1 %skip, label %end, label %for.entry +; CHECK-SAME: !prof ![[BW_FOR_BYPASS:[0-9]+]] + +for.entry: +; CHECK: for.entry: + br label %for.cond + +for.cond: +; CHECK: for.cond: + %i = phi i32 [ 0, %for.entry ], [ %inc1, %for.inc ] + %sum = phi i32 [ 1, %for.entry ], [ %inc, %for.inc ] + %cmp = icmp slt i32 %i, %n + br i1 %cmp, label %for.body, label %for.end, !prof !1 +; CHECK: br i1 %cmp, label %for.body, label %for.end +; CHECK-SAME: !prof ![[BW_FOR_COND:[0-9]+]] + +for.body: +; CHECK: for.body: + %inc = add nsw i32 %sum, 1 + br label %for.inc + +for.inc: +; CHECK: for.inc: + %inc1 = add nsw i32 %i, 1 + br label %for.cond + +for.end: +; CHECK: for.end: + br label %end + +end: +; CHECK: end: + %final_sum = phi i32 [ %sum, %for.end ], [ 0, %entry ] + ret i32 %final_sum +} + +!1 = !{!"branch_weights", i32 100000, i32 80} + +; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 12} +; CHECK: ![[BW_FOR_BYPASS]] = !{!"branch_weights", i32 4, i32 8} +; CHECK: ![[BW_FOR_COND]] = !{!"branch_weights", i32 123456, i32 8} + +;--- default.proftext + +# :ir is the flag to indicate this is IR level profile. +:ir +test_simple_for_with_bypass +# Func Hash: +536873292337293370 +# Num Counters: +3 +# Counter Values: +123456 +12 +8 + +;--- loop_entries.proftext + +# :ir is the flag to indicate this is IR level profile. +:ir +# Always instrument the loop entry blocks +:instrument_loop_entries +test_simple_for_with_bypass +# Func Hash: +536873292337293370 +# Num Counters: +3 +# Counter Values: +123456 +12 +8 + +;--- function_entry.proftext + +# :ir is the flag to indicate this is IR level profile. +:ir +# Always instrument the function entry block +:entry_first +test_simple_for_with_bypass +# Func Hash: +536873292337293370 +# Num Counters: +3 +# Counter Values: +12 +123456 +8 diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index 2acf1cc..1d9d7bcf 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -2967,8 +2967,10 @@ static int showInstrProfile(ShowFormat SFormat, raw_fd_ostream &OS) { std::unique_ptr<ProfileSummary> PS(Builder.getSummary()); bool IsIR = Reader->isIRLevelProfile(); OS << "Instrumentation level: " << (IsIR ? "IR" : "Front-end"); - if (IsIR) + if (IsIR) { OS << " entry_first = " << Reader->instrEntryBBEnabled(); + OS << " instrument_loop_entries = " << Reader->instrLoopEntriesEnabled(); + } OS << "\n"; if (ShowAllFunctions || !FuncNameFilter.empty()) OS << "Functions shown: " << ShownFunctions << "\n"; |