diff options
author | Mircea Trofin <mtrofin@google.com> | 2024-05-01 14:47:49 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-01 14:47:49 -0700 |
commit | c2d892668b7fc296a37679a6b729f40ce08a6179 (patch) | |
tree | 76894bfd712cf681898d08aaca5b2ad4cc9c9869 | |
parent | a2be1b8d03320908c4d4b42e5bd8b7569b1f9aae (diff) | |
download | llvm-c2d892668b7fc296a37679a6b729f40ce08a6179.zip llvm-c2d892668b7fc296a37679a6b729f40ce08a6179.tar.gz llvm-c2d892668b7fc296a37679a6b729f40ce08a6179.tar.bz2 |
[llvm][ctx_profile] Add instrumentation (#90136)
This adds instrumenting callsites to PGOInstrumentation, *if* contextual profiling is requested. The latter also enables inserting counters in the entry basic block and disables value profiling (the latter is a point in time change)
This change adds the skeleton of the contextual profiling lowering pass, just so we can introduce the flag controlling that and the API to check that. The actual lowering pass will be introduced in a subsequent patch.
(Tracking Issue: #89287, RFC referenced there)
5 files changed, 149 insertions, 5 deletions
diff --git a/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfLowering.h b/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfLowering.h new file mode 100644 index 0000000..38afa0c --- /dev/null +++ b/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfLowering.h @@ -0,0 +1,24 @@ +//===-- PGOCtxProfLowering.h - Contextual PGO Instr. Lowering ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the PGOCtxProfLoweringPass class. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_PGOCTXPROFLOWERING_H +#define LLVM_TRANSFORMS_INSTRUMENTATION_PGOCTXPROFLOWERING_H + +namespace llvm { +class Type; + +class PGOCtxProfLoweringPass { +public: + explicit PGOCtxProfLoweringPass() = default; + static bool isContextualIRPGOEnabled(); +}; +} // namespace llvm +#endif diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt index 9814053..8d345d3 100644 --- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt @@ -14,6 +14,7 @@ add_llvm_component_library(LLVMInstrumentation InstrProfiling.cpp KCFI.cpp LowerAllowCheckPass.cpp + PGOCtxProfLowering.cpp PGOForceFunctionAttrs.cpp PGOInstrumentation.cpp PGOMemOPSizeOpt.cpp diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp new file mode 100644 index 0000000..9d6dd5c --- /dev/null +++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp @@ -0,0 +1,24 @@ +//===- PGOCtxProfLowering.cpp - Contextual PGO Instr. Lowering ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// + +#include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +static cl::list<std::string> ContextRoots( + "profile-context-root", cl::Hidden, + cl::desc( + "A function name, assumed to be global, which will be treated as the " + "root of an interesting graph, which will be profiled independently " + "from other similar graphs.")); + +bool PGOCtxProfLoweringPass::isContextualIRPGOEnabled() { + return !ContextRoots.empty(); +} diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index a7b7556..b333b15 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -110,6 +110,7 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Instrumentation/BlockCoverageInference.h" #include "llvm/Transforms/Instrumentation/CFGMST.h" +#include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/MisExpect.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -333,6 +334,20 @@ extern cl::opt<bool> EnableVTableValueProfiling; extern cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate; } // namespace llvm +bool shouldInstrumentEntryBB() { + return PGOInstrumentEntry || + PGOCtxProfLoweringPass::isContextualIRPGOEnabled(); +} + +// FIXME(mtrofin): re-enable this for ctx profiling, for non-indirect calls. Ctx +// profiling implicitly captures indirect call cases, but not other values. +// Supporting other values is relatively straight-forward - just another counter +// range within the context. +bool isValueProfilingDisabled() { + return DisableValueProfiling || + PGOCtxProfLoweringPass::isContextualIRPGOEnabled(); +} + // Return a string describing the branch condition that can be // used in static branch probability heuristics: static std::string getBranchCondString(Instruction *TI) { @@ -379,7 +394,7 @@ static GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS) { uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF); if (IsCS) ProfileVersion |= VARIANT_MASK_CSIR_PROF; - if (PGOInstrumentEntry) + if (shouldInstrumentEntryBB()) ProfileVersion |= VARIANT_MASK_INSTR_ENTRY; if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) ProfileVersion |= VARIANT_MASK_DBG_CORRELATE; @@ -861,7 +876,7 @@ static void instrumentOneFunc( } FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo( - F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry, + F, TLI, ComdatMembers, true, BPI, BFI, IsCS, shouldInstrumentEntryBB(), PGOBlockCoverage); auto Name = FuncInfo.FuncNameVar; @@ -883,6 +898,43 @@ static void instrumentOneFunc( unsigned NumCounters = InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); + if (PGOCtxProfLoweringPass::isContextualIRPGOEnabled()) { + auto *CSIntrinsic = + Intrinsic::getDeclaration(M, Intrinsic::instrprof_callsite); + // We want to count the instrumentable callsites, then instrument them. This + // is because the llvm.instrprof.callsite intrinsic has an argument (like + // the other instrprof intrinsics) capturing the total number of + // instrumented objects (counters, or callsites, in this case). In this + // case, we want that value so we can readily pass it to the compiler-rt + // APIs that may have to allocate memory based on the nr of callsites. + // The traversal logic is the same for both counting and instrumentation, + // just needs to be done in succession. + auto Visit = [&](llvm::function_ref<void(CallBase * CB)> Visitor) { + for (auto &BB : F) + for (auto &Instr : BB) + if (auto *CS = dyn_cast<CallBase>(&Instr)) { + if ((CS->getCalledFunction() && + CS->getCalledFunction()->isIntrinsic()) || + dyn_cast<InlineAsm>(CS->getCalledOperand())) + continue; + Visitor(CS); + } + }; + // First, count callsites. + uint32_t TotalNrCallsites = 0; + Visit([&TotalNrCallsites](auto *) { ++TotalNrCallsites; }); + + // Now instrument. + uint32_t CallsiteIndex = 0; + Visit([&](auto *CB) { + IRBuilder<> Builder(CB); + Builder.CreateCall(CSIntrinsic, + {Name, CFGHash, Builder.getInt32(TotalNrCallsites), + Builder.getInt32(CallsiteIndex++), + CB->getCalledOperand()}); + }); + } + uint32_t I = 0; if (PGOTemporalInstrumentation) { NumCounters += PGOBlockCoverage ? 8 : 1; @@ -914,7 +966,7 @@ static void instrumentOneFunc( FuncInfo.FunctionHash); assert(I == NumCounters); - if (DisableValueProfiling) + if (isValueProfilingDisabled()) return; NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size(); @@ -1676,7 +1728,7 @@ void SelectInstVisitor::visitSelectInst(SelectInst &SI) { // Traverse all valuesites and annotate the instructions for all value kind. void PGOUseFunc::annotateValueSites() { - if (DisableValueProfiling) + if (isValueProfilingDisabled()) return; // Create the PGOFuncName meta data. @@ -1779,7 +1831,7 @@ static bool InstrumentAllFunctions( function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) { // For the context-sensitve instrumentation, we should have a separated pass // (before LTO/ThinLTO linking) to create these variables. - if (!IsCS) + if (!IsCS && !PGOCtxProfLoweringPass::isContextualIRPGOEnabled()) createIRLevelProfileFlagVar(M, /*IsCS=*/false); Triple TT(M.getTargetTriple()); @@ -2018,6 +2070,8 @@ static bool annotateAllFunctions( bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled(); if (PGOInstrumentEntry.getNumOccurrences() > 0) InstrumentFuncEntry = PGOInstrumentEntry; + InstrumentFuncEntry |= PGOCtxProfLoweringPass::isContextualIRPGOEnabled(); + bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage(); for (auto &F : M) { if (skipPGOUse(F)) diff --git a/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll b/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll new file mode 100644 index 0000000..2ad95ab --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll @@ -0,0 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4 +; RUN: opt -passes=pgo-instr-gen -profile-context-root=an_entrypoint \ +; RUN: -S < %s | FileCheck --check-prefix=INSTRUMENT %s + +declare void @bar() + +;. +; INSTRUMENT: @__profn_foo = private constant [3 x i8] c"foo" +;. +define void @foo(i32 %a, ptr %fct) { +; INSTRUMENT-LABEL: define void @foo( +; INSTRUMENT-SAME: i32 [[A:%.*]], ptr [[FCT:%.*]]) { +; INSTRUMENT-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0) +; INSTRUMENT-NEXT: [[T:%.*]] = icmp eq i32 [[A]], 0 +; INSTRUMENT-NEXT: br i1 [[T]], label [[YES:%.*]], label [[NO:%.*]] +; INSTRUMENT: yes: +; INSTRUMENT-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 1) +; INSTRUMENT-NEXT: call void @llvm.instrprof.callsite(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0, ptr [[FCT]]) +; INSTRUMENT-NEXT: call void [[FCT]](i32 [[A]]) +; INSTRUMENT-NEXT: br label [[EXIT:%.*]] +; INSTRUMENT: no: +; INSTRUMENT-NEXT: call void @llvm.instrprof.callsite(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 1, ptr @bar) +; INSTRUMENT-NEXT: call void @bar() +; INSTRUMENT-NEXT: br label [[EXIT]] +; INSTRUMENT: exit: +; INSTRUMENT-NEXT: ret void +; + %t = icmp eq i32 %a, 0 + br i1 %t, label %yes, label %no +yes: + call void %fct(i32 %a) + br label %exit +no: + call void @bar() + br label %exit +exit: + ret void +} +;. +; INSTRUMENT: attributes #[[ATTR0:[0-9]+]] = { nounwind } +;. |