aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMircea Trofin <mtrofin@google.com>2024-05-01 14:47:49 -0700
committerGitHub <noreply@github.com>2024-05-01 14:47:49 -0700
commitc2d892668b7fc296a37679a6b729f40ce08a6179 (patch)
tree76894bfd712cf681898d08aaca5b2ad4cc9c9869
parenta2be1b8d03320908c4d4b42e5bd8b7569b1f9aae (diff)
downloadllvm-c2d892668b7fc296a37679a6b729f40ce08a6179.zip
llvm-c2d892668b7fc296a37679a6b729f40ce08a6179.tar.gz
llvm-c2d892668b7fc296a37679a6b729f40ce08a6179.tar.bz2
[llvm][ctx_profile] Add instrumentation (#90136)
This adds instrumenting callsites to PGOInstrumentation, *if* contextual profiling is requested. The latter also enables inserting counters in the entry basic block and disables value profiling (the latter is a point in time change) This change adds the skeleton of the contextual profiling lowering pass, just so we can introduce the flag controlling that and the API to check that. The actual lowering pass will be introduced in a subsequent patch. (Tracking Issue: #89287, RFC referenced there)
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfLowering.h24
-rw-r--r--llvm/lib/Transforms/Instrumentation/CMakeLists.txt1
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp24
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp64
-rw-r--r--llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll41
5 files changed, 149 insertions, 5 deletions
diff --git a/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfLowering.h b/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfLowering.h
new file mode 100644
index 0000000..38afa0c
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfLowering.h
@@ -0,0 +1,24 @@
+//===-- PGOCtxProfLowering.h - Contextual PGO Instr. Lowering ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PGOCtxProfLoweringPass class.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_PGOCTXPROFLOWERING_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_PGOCTXPROFLOWERING_H
+
+namespace llvm {
+class Type;
+
+class PGOCtxProfLoweringPass {
+public:
+ explicit PGOCtxProfLoweringPass() = default;
+ static bool isContextualIRPGOEnabled();
+};
+} // namespace llvm
+#endif
diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
index 9814053..8d345d3 100644
--- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -14,6 +14,7 @@ add_llvm_component_library(LLVMInstrumentation
InstrProfiling.cpp
KCFI.cpp
LowerAllowCheckPass.cpp
+ PGOCtxProfLowering.cpp
PGOForceFunctionAttrs.cpp
PGOInstrumentation.cpp
PGOMemOPSizeOpt.cpp
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
new file mode 100644
index 0000000..9d6dd5c
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
@@ -0,0 +1,24 @@
+//===- PGOCtxProfLowering.cpp - Contextual PGO Instr. Lowering ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+static cl::list<std::string> ContextRoots(
+ "profile-context-root", cl::Hidden,
+ cl::desc(
+ "A function name, assumed to be global, which will be treated as the "
+ "root of an interesting graph, which will be profiled independently "
+ "from other similar graphs."));
+
+bool PGOCtxProfLoweringPass::isContextualIRPGOEnabled() {
+ return !ContextRoots.empty();
+}
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index a7b7556..b333b15 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -110,6 +110,7 @@
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Instrumentation/BlockCoverageInference.h"
#include "llvm/Transforms/Instrumentation/CFGMST.h"
+#include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/MisExpect.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -333,6 +334,20 @@ extern cl::opt<bool> EnableVTableValueProfiling;
extern cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate;
} // namespace llvm
+bool shouldInstrumentEntryBB() {
+ return PGOInstrumentEntry ||
+ PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
+}
+
+// FIXME(mtrofin): re-enable this for ctx profiling, for non-indirect calls. Ctx
+// profiling implicitly captures indirect call cases, but not other values.
+// Supporting other values is relatively straight-forward - just another counter
+// range within the context.
+bool isValueProfilingDisabled() {
+ return DisableValueProfiling ||
+ PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
+}
+
// Return a string describing the branch condition that can be
// used in static branch probability heuristics:
static std::string getBranchCondString(Instruction *TI) {
@@ -379,7 +394,7 @@ static GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS) {
uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
if (IsCS)
ProfileVersion |= VARIANT_MASK_CSIR_PROF;
- if (PGOInstrumentEntry)
+ if (shouldInstrumentEntryBB())
ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO)
ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
@@ -861,7 +876,7 @@ static void instrumentOneFunc(
}
FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(
- F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry,
+ F, TLI, ComdatMembers, true, BPI, BFI, IsCS, shouldInstrumentEntryBB(),
PGOBlockCoverage);
auto Name = FuncInfo.FuncNameVar;
@@ -883,6 +898,43 @@ static void instrumentOneFunc(
unsigned NumCounters =
InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
+ if (PGOCtxProfLoweringPass::isContextualIRPGOEnabled()) {
+ auto *CSIntrinsic =
+ Intrinsic::getDeclaration(M, Intrinsic::instrprof_callsite);
+ // We want to count the instrumentable callsites, then instrument them. This
+ // is because the llvm.instrprof.callsite intrinsic has an argument (like
+ // the other instrprof intrinsics) capturing the total number of
+ // instrumented objects (counters, or callsites, in this case). In this
+ // case, we want that value so we can readily pass it to the compiler-rt
+ // APIs that may have to allocate memory based on the nr of callsites.
+ // The traversal logic is the same for both counting and instrumentation,
+ // just needs to be done in succession.
+ auto Visit = [&](llvm::function_ref<void(CallBase * CB)> Visitor) {
+ for (auto &BB : F)
+ for (auto &Instr : BB)
+ if (auto *CS = dyn_cast<CallBase>(&Instr)) {
+ if ((CS->getCalledFunction() &&
+ CS->getCalledFunction()->isIntrinsic()) ||
+ dyn_cast<InlineAsm>(CS->getCalledOperand()))
+ continue;
+ Visitor(CS);
+ }
+ };
+ // First, count callsites.
+ uint32_t TotalNrCallsites = 0;
+ Visit([&TotalNrCallsites](auto *) { ++TotalNrCallsites; });
+
+ // Now instrument.
+ uint32_t CallsiteIndex = 0;
+ Visit([&](auto *CB) {
+ IRBuilder<> Builder(CB);
+ Builder.CreateCall(CSIntrinsic,
+ {Name, CFGHash, Builder.getInt32(TotalNrCallsites),
+ Builder.getInt32(CallsiteIndex++),
+ CB->getCalledOperand()});
+ });
+ }
+
uint32_t I = 0;
if (PGOTemporalInstrumentation) {
NumCounters += PGOBlockCoverage ? 8 : 1;
@@ -914,7 +966,7 @@ static void instrumentOneFunc(
FuncInfo.FunctionHash);
assert(I == NumCounters);
- if (DisableValueProfiling)
+ if (isValueProfilingDisabled())
return;
NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
@@ -1676,7 +1728,7 @@ void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
// Traverse all valuesites and annotate the instructions for all value kind.
void PGOUseFunc::annotateValueSites() {
- if (DisableValueProfiling)
+ if (isValueProfilingDisabled())
return;
// Create the PGOFuncName meta data.
@@ -1779,7 +1831,7 @@ static bool InstrumentAllFunctions(
function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
// For the context-sensitve instrumentation, we should have a separated pass
// (before LTO/ThinLTO linking) to create these variables.
- if (!IsCS)
+ if (!IsCS && !PGOCtxProfLoweringPass::isContextualIRPGOEnabled())
createIRLevelProfileFlagVar(M, /*IsCS=*/false);
Triple TT(M.getTargetTriple());
@@ -2018,6 +2070,8 @@ static bool annotateAllFunctions(
bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
if (PGOInstrumentEntry.getNumOccurrences() > 0)
InstrumentFuncEntry = PGOInstrumentEntry;
+ InstrumentFuncEntry |= PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
+
bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
for (auto &F : M) {
if (skipPGOUse(F))
diff --git a/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll b/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll
new file mode 100644
index 0000000..2ad95ab
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; RUN: opt -passes=pgo-instr-gen -profile-context-root=an_entrypoint \
+; RUN: -S < %s | FileCheck --check-prefix=INSTRUMENT %s
+
+declare void @bar()
+
+;.
+; INSTRUMENT: @__profn_foo = private constant [3 x i8] c"foo"
+;.
+define void @foo(i32 %a, ptr %fct) {
+; INSTRUMENT-LABEL: define void @foo(
+; INSTRUMENT-SAME: i32 [[A:%.*]], ptr [[FCT:%.*]]) {
+; INSTRUMENT-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0)
+; INSTRUMENT-NEXT: [[T:%.*]] = icmp eq i32 [[A]], 0
+; INSTRUMENT-NEXT: br i1 [[T]], label [[YES:%.*]], label [[NO:%.*]]
+; INSTRUMENT: yes:
+; INSTRUMENT-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 1)
+; INSTRUMENT-NEXT: call void @llvm.instrprof.callsite(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0, ptr [[FCT]])
+; INSTRUMENT-NEXT: call void [[FCT]](i32 [[A]])
+; INSTRUMENT-NEXT: br label [[EXIT:%.*]]
+; INSTRUMENT: no:
+; INSTRUMENT-NEXT: call void @llvm.instrprof.callsite(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 1, ptr @bar)
+; INSTRUMENT-NEXT: call void @bar()
+; INSTRUMENT-NEXT: br label [[EXIT]]
+; INSTRUMENT: exit:
+; INSTRUMENT-NEXT: ret void
+;
+ %t = icmp eq i32 %a, 0
+ br i1 %t, label %yes, label %no
+yes:
+ call void %fct(i32 %a)
+ br label %exit
+no:
+ call void @bar()
+ br label %exit
+exit:
+ ret void
+}
+;.
+; INSTRUMENT: attributes #[[ATTR0:[0-9]+]] = { nounwind }
+;.