aboutsummaryrefslogtreecommitdiff
path: root/llvm
diff options
context:
space:
mode:
authorMircea Trofin <mtrofin@google.com>2024-08-02 20:51:27 -0400
committerGitHub <noreply@github.com>2024-08-02 20:51:27 -0400
commitba4da5a087f28c9522bc7f173e99673bb3009af9 (patch)
tree92b87009e6bbe820860cc936e9ffd1cfe284337a /llvm
parentac319a8dd8b7999ffa0342e50e02932c5c8f7a6a (diff)
downloadllvm-ba4da5a087f28c9522bc7f173e99673bb3009af9.zip
llvm-ba4da5a087f28c9522bc7f173e99673bb3009af9.tar.gz
llvm-ba4da5a087f28c9522bc7f173e99673bb3009af9.tar.bz2
[ctx_prof] "Use" support for pre-thinlink. (#101338)
There is currently no plan to support contextual profiling use in a non- ThinLTO scenario. In the pre-link phase, we only instrument and then immediately bail out to let the linker group functions under an entrypoint in the same module as the entrypoint. We don't actually care what the profile contains - just that we want to use a contextual profile. After that, in post-thinlink, we require the profile be passed again so we can actually use it. The earlier instrumentation will be used to match counter values. While the feature is in development, we add a hidden flag for the use scenario, but we can eventually tie it to the `PGOOptions` mechanism. We will use the same flag in both pre- and post-thinlink, because it simplifies things - usually the post-thinlink args are the same as the ones for pre-. This, despite the flag being basically treated as a boolean in pre-thinlink.
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Passes/PassBuilderPipelines.cpp21
-rw-r--r--llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll38
2 files changed, 57 insertions, 2 deletions
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index a611872..adebbb5 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -304,6 +304,10 @@ static cl::opt<bool> UseLoopVersioningLICM(
"enable-loop-versioning-licm", cl::init(false), cl::Hidden,
cl::desc("Enable the experimental Loop Versioning LICM pass"));
+static cl::opt<std::string>
+ UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden,
+ cl::desc("Use the specified contextual profile file"));
+
namespace llvm {
extern cl::opt<bool> EnableMemProfContextDisambiguation;
@@ -1176,8 +1180,11 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
// Enable contextual profiling instrumentation.
const bool IsCtxProfGen = !IsPGOInstrGen && IsPreLink &&
PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
+ const bool IsCtxProfUse = !UseCtxProfile.empty() && !PGOOpt &&
+ Phase == ThinOrFullLTOPhase::ThinLTOPreLink;
- if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen)
+ if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen ||
+ IsCtxProfUse)
addPreInlinerPasses(MPM, Level, Phase);
// Add all the requested passes for instrumentation PGO, if requested.
@@ -1187,8 +1194,13 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
/*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
PGOOpt->FS);
- } else if (IsCtxProfGen) {
+ } else if (IsCtxProfGen || IsCtxProfUse) {
MPM.addPass(PGOInstrumentationGen(false));
+ // In pre-link, we just want the instrumented IR. We use the contextual
+ // profile in the post-thinlink phase.
+ // The instrumentation will be removed in post-thinlink after IPO.
+ if (IsCtxProfUse)
+ return MPM;
addPostPGOLoopRotation(MPM, Level);
MPM.addPass(PGOCtxProfLoweringPass());
}
@@ -1655,6 +1667,11 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
// can.
MPM.addPass(buildModuleSimplificationPipeline(
Level, ThinOrFullLTOPhase::ThinLTOPreLink));
+ // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let
+ // thinlto use the contextual info to perform imports; then use the contextual
+ // profile in the post-thinlink phase.
+ if (!UseCtxProfile.empty() && !PGOOpt)
+ return MPM;
// Run partial inlining pass to partially inline functions that have
// large bodies.
diff --git a/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll b/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll
new file mode 100644
index 0000000..b50a815
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; There is no profile, but that's OK because the prelink does not care about
+; the content of the profile, just that we intend to use one.
+; There is no scenario currently of doing ctx profile use without thinlto.
+;
+; RUN: opt -passes='thinlto-pre-link<O2>' -use-ctx-profile=something_that_does_not_exist %s -S | FileCheck %s
+
+declare void @bar()
+
+define void @foo(i32 %a, ptr %fct) {
+; CHECK-LABEL: define void @foo(
+; CHECK-SAME: i32 [[A:%.*]], ptr [[FCT:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[T:%.*]] = icmp eq i32 [[A]], 0
+; CHECK-NEXT: br i1 [[T]], label %[[YES:.*]], label %[[NO:.*]]
+; CHECK: [[YES]]:
+; CHECK-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 1)
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[FCT]] to i64
+; CHECK-NEXT: call void @llvm.instrprof.value.profile(ptr @__profn_foo, i64 728453322856651412, i64 [[TMP1]], i32 0, i32 0)
+; CHECK-NEXT: call void [[FCT]](i32 0)
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[NO]]:
+; CHECK-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0)
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+ %t = icmp eq i32 %a, 0
+ br i1 %t, label %yes, label %no
+yes:
+ call void %fct(i32 %a)
+ br label %exit
+no:
+ call void @bar()
+ br label %exit
+exit:
+ ret void
+}