aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfLowering.h24
-rw-r--r--llvm/lib/Transforms/Instrumentation/CMakeLists.txt1
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp24
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp64
-rw-r--r--llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll41
5 files changed, 149 insertions, 5 deletions
diff --git a/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfLowering.h b/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfLowering.h
new file mode 100644
index 0000000..38afa0c
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfLowering.h
@@ -0,0 +1,24 @@
+//===-- PGOCtxProfLowering.h - Contextual PGO Instr. Lowering ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PGOCtxProfLoweringPass class.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_PGOCTXPROFLOWERING_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_PGOCTXPROFLOWERING_H
+
+namespace llvm {
+class Type;
+
+class PGOCtxProfLoweringPass {
+public:
+ explicit PGOCtxProfLoweringPass() = default;
+ static bool isContextualIRPGOEnabled();
+};
+} // namespace llvm
+#endif
diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
index 9814053..8d345d3 100644
--- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -14,6 +14,7 @@ add_llvm_component_library(LLVMInstrumentation
InstrProfiling.cpp
KCFI.cpp
LowerAllowCheckPass.cpp
+ PGOCtxProfLowering.cpp
PGOForceFunctionAttrs.cpp
PGOInstrumentation.cpp
PGOMemOPSizeOpt.cpp
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
new file mode 100644
index 0000000..9d6dd5c
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
@@ -0,0 +1,24 @@
+//===- PGOCtxProfLowering.cpp - Contextual PGO Instr. Lowering ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+static cl::list<std::string> ContextRoots(
+ "profile-context-root", cl::Hidden,
+ cl::desc(
+ "A function name, assumed to be global, which will be treated as the "
+ "root of an interesting graph, which will be profiled independently "
+ "from other similar graphs."));
+
+bool PGOCtxProfLoweringPass::isContextualIRPGOEnabled() {
+ return !ContextRoots.empty();
+}
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index a7b7556..b333b15 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -110,6 +110,7 @@
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Instrumentation/BlockCoverageInference.h"
#include "llvm/Transforms/Instrumentation/CFGMST.h"
+#include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/MisExpect.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -333,6 +334,20 @@ extern cl::opt<bool> EnableVTableValueProfiling;
extern cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate;
} // namespace llvm
+bool shouldInstrumentEntryBB() {
+ return PGOInstrumentEntry ||
+ PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
+}
+
+// FIXME(mtrofin): re-enable this for ctx profiling, for non-indirect calls. Ctx
+// profiling implicitly captures indirect call cases, but not other values.
+// Supporting other values is relatively straight-forward - just another counter
+// range within the context.
+bool isValueProfilingDisabled() {
+ return DisableValueProfiling ||
+ PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
+}
+
// Return a string describing the branch condition that can be
// used in static branch probability heuristics:
static std::string getBranchCondString(Instruction *TI) {
@@ -379,7 +394,7 @@ static GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS) {
uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
if (IsCS)
ProfileVersion |= VARIANT_MASK_CSIR_PROF;
- if (PGOInstrumentEntry)
+ if (shouldInstrumentEntryBB())
ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO)
ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
@@ -861,7 +876,7 @@ static void instrumentOneFunc(
}
FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(
- F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry,
+ F, TLI, ComdatMembers, true, BPI, BFI, IsCS, shouldInstrumentEntryBB(),
PGOBlockCoverage);
auto Name = FuncInfo.FuncNameVar;
@@ -883,6 +898,43 @@ static void instrumentOneFunc(
unsigned NumCounters =
InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
+ if (PGOCtxProfLoweringPass::isContextualIRPGOEnabled()) {
+ auto *CSIntrinsic =
+ Intrinsic::getDeclaration(M, Intrinsic::instrprof_callsite);
+ // We want to count the instrumentable callsites, then instrument them. This
+ // is because the llvm.instrprof.callsite intrinsic has an argument (like
+ // the other instrprof intrinsics) capturing the total number of
+ // instrumented objects (counters, or callsites, in this case). In this
+ // case, we want that value so we can readily pass it to the compiler-rt
+ // APIs that may have to allocate memory based on the nr of callsites.
+ // The traversal logic is the same for both counting and instrumentation,
+ // just needs to be done in succession.
+ auto Visit = [&](llvm::function_ref<void(CallBase * CB)> Visitor) {
+ for (auto &BB : F)
+ for (auto &Instr : BB)
+ if (auto *CS = dyn_cast<CallBase>(&Instr)) {
+ if ((CS->getCalledFunction() &&
+ CS->getCalledFunction()->isIntrinsic()) ||
+ dyn_cast<InlineAsm>(CS->getCalledOperand()))
+ continue;
+ Visitor(CS);
+ }
+ };
+ // First, count callsites.
+ uint32_t TotalNrCallsites = 0;
+ Visit([&TotalNrCallsites](auto *) { ++TotalNrCallsites; });
+
+ // Now instrument.
+ uint32_t CallsiteIndex = 0;
+ Visit([&](auto *CB) {
+ IRBuilder<> Builder(CB);
+ Builder.CreateCall(CSIntrinsic,
+ {Name, CFGHash, Builder.getInt32(TotalNrCallsites),
+ Builder.getInt32(CallsiteIndex++),
+ CB->getCalledOperand()});
+ });
+ }
+
uint32_t I = 0;
if (PGOTemporalInstrumentation) {
NumCounters += PGOBlockCoverage ? 8 : 1;
@@ -914,7 +966,7 @@ static void instrumentOneFunc(
FuncInfo.FunctionHash);
assert(I == NumCounters);
- if (DisableValueProfiling)
+ if (isValueProfilingDisabled())
return;
NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
@@ -1676,7 +1728,7 @@ void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
// Traverse all valuesites and annotate the instructions for all value kind.
void PGOUseFunc::annotateValueSites() {
- if (DisableValueProfiling)
+ if (isValueProfilingDisabled())
return;
// Create the PGOFuncName meta data.
@@ -1779,7 +1831,7 @@ static bool InstrumentAllFunctions(
function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
// For the context-sensitve instrumentation, we should have a separated pass
// (before LTO/ThinLTO linking) to create these variables.
- if (!IsCS)
+ if (!IsCS && !PGOCtxProfLoweringPass::isContextualIRPGOEnabled())
createIRLevelProfileFlagVar(M, /*IsCS=*/false);
Triple TT(M.getTargetTriple());
@@ -2018,6 +2070,8 @@ static bool annotateAllFunctions(
bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
if (PGOInstrumentEntry.getNumOccurrences() > 0)
InstrumentFuncEntry = PGOInstrumentEntry;
+ InstrumentFuncEntry |= PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
+
bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
for (auto &F : M) {
if (skipPGOUse(F))
diff --git a/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll b/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll
new file mode 100644
index 0000000..2ad95ab
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; RUN: opt -passes=pgo-instr-gen -profile-context-root=an_entrypoint \
+; RUN: -S < %s | FileCheck --check-prefix=INSTRUMENT %s
+
+declare void @bar()
+
+;.
+; INSTRUMENT: @__profn_foo = private constant [3 x i8] c"foo"
+;.
+define void @foo(i32 %a, ptr %fct) {
+; INSTRUMENT-LABEL: define void @foo(
+; INSTRUMENT-SAME: i32 [[A:%.*]], ptr [[FCT:%.*]]) {
+; INSTRUMENT-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0)
+; INSTRUMENT-NEXT: [[T:%.*]] = icmp eq i32 [[A]], 0
+; INSTRUMENT-NEXT: br i1 [[T]], label [[YES:%.*]], label [[NO:%.*]]
+; INSTRUMENT: yes:
+; INSTRUMENT-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 1)
+; INSTRUMENT-NEXT: call void @llvm.instrprof.callsite(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0, ptr [[FCT]])
+; INSTRUMENT-NEXT: call void [[FCT]](i32 [[A]])
+; INSTRUMENT-NEXT: br label [[EXIT:%.*]]
+; INSTRUMENT: no:
+; INSTRUMENT-NEXT: call void @llvm.instrprof.callsite(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 1, ptr @bar)
+; INSTRUMENT-NEXT: call void @bar()
+; INSTRUMENT-NEXT: br label [[EXIT]]
+; INSTRUMENT: exit:
+; INSTRUMENT-NEXT: ret void
+;
+ %t = icmp eq i32 %a, 0
+ br i1 %t, label %yes, label %no
+yes:
+ call void %fct(i32 %a)
+ br label %exit
+no:
+ call void @bar()
+ br label %exit
+exit:
+ ret void
+}
+;.
+; INSTRUMENT: attributes #[[ATTR0:[0-9]+]] = { nounwind }
+;.