diff options
5 files changed, 149 insertions, 5 deletions
diff --git a/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfLowering.h b/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfLowering.h new file mode 100644 index 0000000..38afa0c --- /dev/null +++ b/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfLowering.h @@ -0,0 +1,24 @@ +//===-- PGOCtxProfLowering.h - Contextual PGO Instr. Lowering ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the PGOCtxProfLoweringPass class. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_PGOCTXPROFLOWERING_H +#define LLVM_TRANSFORMS_INSTRUMENTATION_PGOCTXPROFLOWERING_H + +namespace llvm { +class Type; + +class PGOCtxProfLoweringPass { +public: + explicit PGOCtxProfLoweringPass() = default; + static bool isContextualIRPGOEnabled(); +}; +} // namespace llvm +#endif diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt index 9814053..8d345d3 100644 --- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt @@ -14,6 +14,7 @@ add_llvm_component_library(LLVMInstrumentation InstrProfiling.cpp KCFI.cpp LowerAllowCheckPass.cpp + PGOCtxProfLowering.cpp PGOForceFunctionAttrs.cpp PGOInstrumentation.cpp PGOMemOPSizeOpt.cpp diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp new file mode 100644 index 0000000..9d6dd5c --- /dev/null +++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp @@ -0,0 +1,24 @@ +//===- PGOCtxProfLowering.cpp - Contextual PGO Instr. Lowering ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// + +#include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +static cl::list<std::string> ContextRoots( + "profile-context-root", cl::Hidden, + cl::desc( + "A function name, assumed to be global, which will be treated as the " + "root of an interesting graph, which will be profiled independently " + "from other similar graphs.")); + +bool PGOCtxProfLoweringPass::isContextualIRPGOEnabled() { + return !ContextRoots.empty(); +} diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index a7b7556..b333b15 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -110,6 +110,7 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Instrumentation/BlockCoverageInference.h" #include "llvm/Transforms/Instrumentation/CFGMST.h" +#include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/MisExpect.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -333,6 +334,20 @@ extern cl::opt<bool> EnableVTableValueProfiling; extern cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate; } // namespace llvm +bool shouldInstrumentEntryBB() { + return PGOInstrumentEntry || + PGOCtxProfLoweringPass::isContextualIRPGOEnabled(); +} + +// FIXME(mtrofin): re-enable this for ctx profiling, for non-indirect calls. Ctx +// profiling implicitly captures indirect call cases, but not other values. +// Supporting other values is relatively straight-forward - just another counter +// range within the context. +bool isValueProfilingDisabled() { + return DisableValueProfiling || + PGOCtxProfLoweringPass::isContextualIRPGOEnabled(); +} + // Return a string describing the branch condition that can be // used in static branch probability heuristics: static std::string getBranchCondString(Instruction *TI) { @@ -379,7 +394,7 @@ static GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS) { uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF); if (IsCS) ProfileVersion |= VARIANT_MASK_CSIR_PROF; - if (PGOInstrumentEntry) + if (shouldInstrumentEntryBB()) ProfileVersion |= VARIANT_MASK_INSTR_ENTRY; if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) ProfileVersion |= VARIANT_MASK_DBG_CORRELATE; @@ -861,7 +876,7 @@ static void instrumentOneFunc( } FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo( - F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry, + F, TLI, ComdatMembers, true, BPI, BFI, IsCS, shouldInstrumentEntryBB(), PGOBlockCoverage); auto Name = FuncInfo.FuncNameVar; @@ -883,6 +898,43 @@ static void instrumentOneFunc( unsigned NumCounters = InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); + if (PGOCtxProfLoweringPass::isContextualIRPGOEnabled()) { + auto *CSIntrinsic = + Intrinsic::getDeclaration(M, Intrinsic::instrprof_callsite); + // We want to count the instrumentable callsites, then instrument them. This + // is because the llvm.instrprof.callsite intrinsic has an argument (like + // the other instrprof intrinsics) capturing the total number of + // instrumented objects (counters, or callsites, in this case). In this + // case, we want that value so we can readily pass it to the compiler-rt + // APIs that may have to allocate memory based on the nr of callsites. + // The traversal logic is the same for both counting and instrumentation, + // just needs to be done in succession. + auto Visit = [&](llvm::function_ref<void(CallBase * CB)> Visitor) { + for (auto &BB : F) + for (auto &Instr : BB) + if (auto *CS = dyn_cast<CallBase>(&Instr)) { + if ((CS->getCalledFunction() && + CS->getCalledFunction()->isIntrinsic()) || + dyn_cast<InlineAsm>(CS->getCalledOperand())) + continue; + Visitor(CS); + } + }; + // First, count callsites. + uint32_t TotalNrCallsites = 0; + Visit([&TotalNrCallsites](auto *) { ++TotalNrCallsites; }); + + // Now instrument. + uint32_t CallsiteIndex = 0; + Visit([&](auto *CB) { + IRBuilder<> Builder(CB); + Builder.CreateCall(CSIntrinsic, + {Name, CFGHash, Builder.getInt32(TotalNrCallsites), + Builder.getInt32(CallsiteIndex++), + CB->getCalledOperand()}); + }); + } + uint32_t I = 0; if (PGOTemporalInstrumentation) { NumCounters += PGOBlockCoverage ? 8 : 1; @@ -914,7 +966,7 @@ static void instrumentOneFunc( FuncInfo.FunctionHash); assert(I == NumCounters); - if (DisableValueProfiling) + if (isValueProfilingDisabled()) return; NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size(); @@ -1676,7 +1728,7 @@ void SelectInstVisitor::visitSelectInst(SelectInst &SI) { // Traverse all valuesites and annotate the instructions for all value kind. void PGOUseFunc::annotateValueSites() { - if (DisableValueProfiling) + if (isValueProfilingDisabled()) return; // Create the PGOFuncName meta data. @@ -1779,7 +1831,7 @@ static bool InstrumentAllFunctions( function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) { // For the context-sensitve instrumentation, we should have a separated pass // (before LTO/ThinLTO linking) to create these variables. - if (!IsCS) + if (!IsCS && !PGOCtxProfLoweringPass::isContextualIRPGOEnabled()) createIRLevelProfileFlagVar(M, /*IsCS=*/false); Triple TT(M.getTargetTriple()); @@ -2018,6 +2070,8 @@ static bool annotateAllFunctions( bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled(); if (PGOInstrumentEntry.getNumOccurrences() > 0) InstrumentFuncEntry = PGOInstrumentEntry; + InstrumentFuncEntry |= PGOCtxProfLoweringPass::isContextualIRPGOEnabled(); + bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage(); for (auto &F : M) { if (skipPGOUse(F)) diff --git a/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll b/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll new file mode 100644 index 0000000..2ad95ab --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll @@ -0,0 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4 +; RUN: opt -passes=pgo-instr-gen -profile-context-root=an_entrypoint \ +; RUN: -S < %s | FileCheck --check-prefix=INSTRUMENT %s + +declare void @bar() + +;. +; INSTRUMENT: @__profn_foo = private constant [3 x i8] c"foo" +;. +define void @foo(i32 %a, ptr %fct) { +; INSTRUMENT-LABEL: define void @foo( +; INSTRUMENT-SAME: i32 [[A:%.*]], ptr [[FCT:%.*]]) { +; INSTRUMENT-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0) +; INSTRUMENT-NEXT: [[T:%.*]] = icmp eq i32 [[A]], 0 +; INSTRUMENT-NEXT: br i1 [[T]], label [[YES:%.*]], label [[NO:%.*]] +; INSTRUMENT: yes: +; INSTRUMENT-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 1) +; INSTRUMENT-NEXT: call void @llvm.instrprof.callsite(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0, ptr [[FCT]]) +; INSTRUMENT-NEXT: call void [[FCT]](i32 [[A]]) +; INSTRUMENT-NEXT: br label [[EXIT:%.*]] +; INSTRUMENT: no: +; INSTRUMENT-NEXT: call void @llvm.instrprof.callsite(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 1, ptr @bar) +; INSTRUMENT-NEXT: call void @bar() +; INSTRUMENT-NEXT: br label [[EXIT]] +; INSTRUMENT: exit: +; INSTRUMENT-NEXT: ret void +; + %t = icmp eq i32 %a, 0 + br i1 %t, label %yes, label %no +yes: + call void %fct(i32 %a) + br label %exit +no: + call void @bar() + br label %exit +exit: + ret void +} +;. +; INSTRUMENT: attributes #[[ATTR0:[0-9]+]] = { nounwind } +;. |