aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAmir Ayupov <aaupov@fb.com>2025-07-25 07:24:38 -0700
committerAmir Ayupov <aaupov@fb.com>2025-07-25 07:24:38 -0700
commit42339ce24b0767736d8835d512bcb4e2824867d0 (patch)
treeeac89c7f1b049265ae7ca5803fdd73a8d8bcb450
parente38f98f535f6e2ce2b42ea0413919f87b1239964 (diff)
parentf34ba3d7a3d12f585798b6a3c0e73fb69f2c7ba4 (diff)
downloadllvm-users/aaupov/spr/boltnfc-register-profiled-functions-once-1.zip
llvm-users/aaupov/spr/boltnfc-register-profiled-functions-once-1.tar.gz
llvm-users/aaupov/spr/boltnfc-register-profiled-functions-once-1.tar.bz2
Created using spr 1.3.4
-rw-r--r--bolt/include/bolt/Profile/DataAggregator.h3
-rw-r--r--bolt/lib/Profile/DataAggregator.cpp44
-rw-r--r--bolt/lib/Rewrite/RewriteInstance.cpp15
-rw-r--r--bolt/test/X86/unclaimed-jt-entries.s18
4 files changed, 42 insertions, 38 deletions
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index db0f690..cb1b87f 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -502,6 +502,9 @@ private:
/// entries).
void imputeFallThroughs();
+ /// Register profiled functions for lite mode.
+ void registerProfiledFunctions();
+
/// Debugging dump methods
void dump() const;
void dump(const PerfBranchSample &Sample) const;
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 905728de..c13fa6d 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -581,6 +581,26 @@ void DataAggregator::imputeFallThroughs() {
outs() << "BOLT-INFO: imputed " << InferredTraces << " traces\n";
}
+void DataAggregator::registerProfiledFunctions() {
+ DenseSet<uint64_t> Addrs;
+ for (const auto &Trace : llvm::make_first_range(Traces)) {
+ if (Trace.Branch != Trace::FT_ONLY &&
+ Trace.Branch != Trace::FT_EXTERNAL_ORIGIN)
+ Addrs.insert(Trace.Branch);
+ Addrs.insert(Trace.From);
+ }
+
+ for (const auto [PC, _] : BasicSamples)
+ Addrs.insert(PC);
+
+ for (const PerfMemSample &MemSample : MemSamples)
+ Addrs.insert(MemSample.PC);
+
+ for (const uint64_t Addr : Addrs)
+ if (BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr))
+ Func->setHasProfileAvailable();
+}
+
Error DataAggregator::preprocessProfile(BinaryContext &BC) {
this->BC = &BC;
@@ -603,6 +623,7 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
exit(0);
}
+ registerProfiledFunctions();
return Error::success();
}
@@ -906,11 +927,10 @@ DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, const Trace &Trace,
if (BF.isPseudo())
return Branches;
- if (!BF.isSimple())
+ // Can only record traces in CFG state
+ if (!BF.hasCFG())
return std::nullopt;
- assert(BF.hasCFG() && "can only record traces in CFG state");
-
const BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From);
const BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To);
@@ -1348,10 +1368,6 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
}
const uint64_t FromOffset = Addr[0]->Offset;
- BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(FromOffset);
- if (FromFunc)
- FromFunc->setHasProfileAvailable();
-
int64_t Count = Counters[0];
int64_t Mispreds = Counters[1];
@@ -1362,11 +1378,6 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
return std::error_code();
}
- const uint64_t ToOffset = Addr[1]->Offset;
- BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(ToOffset);
- if (ToFunc)
- ToFunc->setHasProfileAvailable();
-
/// For fall-through types, adjust locations to match Trace container.
if (Type == FT || Type == FT_EXTERNAL_ORIGIN || Type == FT_EXTERNAL_RETURN) {
Addr[2] = Location(Addr[1]->Offset); // Trace To
@@ -1614,9 +1625,6 @@ std::error_code DataAggregator::parseBranchEvents() {
Traces.reserve(TraceMap.size());
for (const auto &[Trace, Info] : TraceMap) {
Traces.emplace_back(Trace, Info);
- for (const uint64_t Addr : {Trace.Branch, Trace.From})
- if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Addr))
- BF->setHasProfileAvailable();
}
clear(TraceMap);
@@ -1677,9 +1685,6 @@ std::error_code DataAggregator::parseBasicEvents() {
continue;
++NumTotalSamples;
- if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
- BF->setHasProfileAvailable();
-
++BasicSamples[Sample->PC];
EventNames.insert(Sample->EventName);
}
@@ -1717,9 +1722,6 @@ std::error_code DataAggregator::parseMemEvents() {
if (std::error_code EC = Sample.getError())
return EC;
- if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
- BF->setHasProfileAvailable();
-
MemSamples.emplace_back(std::move(Sample.get()));
}
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index f1b94e4..9f243a1 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -714,21 +714,6 @@ Error RewriteInstance::run() {
preprocessProfileData();
- // Skip disassembling if we have a translation table and we are running an
- // aggregation job.
- if (opts::AggregateOnly && BAT->enabledFor(InputFile)) {
- // YAML profile in BAT mode requires CFG for .bolt.org.text functions
- if (!opts::SaveProfile.empty() ||
- opts::ProfileFormat == opts::ProfileFormatKind::PF_YAML) {
- selectFunctionsToProcess();
- disassembleFunctions();
- processMetadataPreCFG();
- buildFunctionsCFG();
- }
- processProfileData();
- return Error::success();
- }
-
selectFunctionsToProcess();
readDebugInfo();
diff --git a/bolt/test/X86/unclaimed-jt-entries.s b/bolt/test/X86/unclaimed-jt-entries.s
index 1102e4a..31b72c4 100644
--- a/bolt/test/X86/unclaimed-jt-entries.s
+++ b/bolt/test/X86/unclaimed-jt-entries.s
@@ -18,6 +18,18 @@
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
# RUN: %clang %cflags -no-pie %t.o -o %t.exe -Wl,-q
+
+## Check that non-simple function profile is emitted in perf2bolt mode
+# RUN: link_fdata %s %t.exe %t.pa PREAGG
+# RUN: llvm-strip -N L5 -N L5_ret %t.exe
+# RUN: perf2bolt %t.exe -p %t.pa --pa -o %t.fdata -strict=0 -print-profile \
+# RUN: -print-only=main | FileCheck %s --check-prefix=CHECK-P2B
+# CHECK-P2B: PERF2BOLT: traces mismatching disassembled function contents: 0
+# CHECK-P2B: Binary Function "main"
+# CHECK-P2B: IsSimple : 0
+# RUN: FileCheck %s --input-file %t.fdata --check-prefix=CHECK-FDATA
+# CHECK-FDATA: 1 main 0 1 main 7 0 1
+
# RUN: llvm-bolt %t.exe -v=1 -o %t.out 2>&1 | FileCheck %s
# CHECK: BOLT-WARNING: unclaimed data to code reference (possibly an unrecognized jump table entry) to .Ltmp[[#]] in main
@@ -33,8 +45,10 @@
.size main, .Lend-main
main:
jmp *L4-24(,%rdi,8)
-.L5:
+# PREAGG: T #main# #L5# #L5_ret# 1
+L5:
movl $4, %eax
+L5_ret:
ret
.L9:
movl $2, %eax
@@ -58,7 +72,7 @@ L4:
.quad .L3
.quad .L6
.quad .L3
- .quad .L5
+ .quad L5
.quad .L3
.quad .L3
.quad .L3