aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Passes/PassBuilderPipelines.cpp21
-rw-r--r--llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll38
2 files changed, 57 insertions, 2 deletions
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index a611872..adebbb5 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -304,6 +304,10 @@ static cl::opt<bool> UseLoopVersioningLICM(
"enable-loop-versioning-licm", cl::init(false), cl::Hidden,
cl::desc("Enable the experimental Loop Versioning LICM pass"));
+static cl::opt<std::string>
+ UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden,
+ cl::desc("Use the specified contextual profile file"));
+
namespace llvm {
extern cl::opt<bool> EnableMemProfContextDisambiguation;
@@ -1176,8 +1180,11 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
// Enable contextual profiling instrumentation.
const bool IsCtxProfGen = !IsPGOInstrGen && IsPreLink &&
PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
+ const bool IsCtxProfUse = !UseCtxProfile.empty() && !PGOOpt &&
+ Phase == ThinOrFullLTOPhase::ThinLTOPreLink;
- if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen)
+ if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen ||
+ IsCtxProfUse)
addPreInlinerPasses(MPM, Level, Phase);
// Add all the requested passes for instrumentation PGO, if requested.
@@ -1187,8 +1194,13 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
/*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
PGOOpt->FS);
- } else if (IsCtxProfGen) {
+ } else if (IsCtxProfGen || IsCtxProfUse) {
MPM.addPass(PGOInstrumentationGen(false));
+ // In pre-link, we just want the instrumented IR. We use the contextual
+ // profile in the post-thinlink phase.
+ // The instrumentation will be removed in post-thinlink after IPO.
+ if (IsCtxProfUse)
+ return MPM;
addPostPGOLoopRotation(MPM, Level);
MPM.addPass(PGOCtxProfLoweringPass());
}
@@ -1655,6 +1667,11 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
// can.
MPM.addPass(buildModuleSimplificationPipeline(
Level, ThinOrFullLTOPhase::ThinLTOPreLink));
+ // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let
+ // thinlto use the contextual info to perform imports; then use the contextual
+ // profile in the post-thinlink phase.
+ if (!UseCtxProfile.empty() && !PGOOpt)
+ return MPM;
// Run partial inlining pass to partially inline functions that have
// large bodies.
diff --git a/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll b/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll
new file mode 100644
index 0000000..b50a815
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; There is no profile, but that's OK because the prelink does not care about
+; the content of the profile, just that we intend to use one.
+; There is no scenario currently of doing ctx profile use without thinlto.
+;
+; RUN: opt -passes='thinlto-pre-link<O2>' -use-ctx-profile=something_that_does_not_exist %s -S | FileCheck %s
+
+declare void @bar()
+
+define void @foo(i32 %a, ptr %fct) {
+; CHECK-LABEL: define void @foo(
+; CHECK-SAME: i32 [[A:%.*]], ptr [[FCT:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[T:%.*]] = icmp eq i32 [[A]], 0
+; CHECK-NEXT: br i1 [[T]], label %[[YES:.*]], label %[[NO:.*]]
+; CHECK: [[YES]]:
+; CHECK-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 1)
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[FCT]] to i64
+; CHECK-NEXT: call void @llvm.instrprof.value.profile(ptr @__profn_foo, i64 728453322856651412, i64 [[TMP1]], i32 0, i32 0)
+; CHECK-NEXT: call void [[FCT]](i32 0)
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[NO]]:
+; CHECK-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0)
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+ %t = icmp eq i32 %a, 0
+ br i1 %t, label %yes, label %no
+yes:
+ call void %fct(i32 %a)
+ br label %exit
+no:
+ call void @bar()
+ br label %exit
+exit:
+ ret void
+}