diff options
author | Amir Ayupov <aaupov@fb.com> | 2025-07-25 07:24:38 -0700 |
---|---|---|
committer | Amir Ayupov <aaupov@fb.com> | 2025-07-25 07:24:38 -0700 |
commit | 42339ce24b0767736d8835d512bcb4e2824867d0 (patch) | |
tree | eac89c7f1b049265ae7ca5803fdd73a8d8bcb450 | |
parent | e38f98f535f6e2ce2b42ea0413919f87b1239964 (diff) | |
parent | f34ba3d7a3d12f585798b6a3c0e73fb69f2c7ba4 (diff) | |
download | llvm-users/aaupov/spr/boltnfc-register-profiled-functions-once-1.zip llvm-users/aaupov/spr/boltnfc-register-profiled-functions-once-1.tar.gz llvm-users/aaupov/spr/boltnfc-register-profiled-functions-once-1.tar.bz2 |
[𝘀𝗽𝗿] initial versionusers/aaupov/spr/boltnfc-register-profiled-functions-once-1
Created using spr 1.3.4
-rw-r--r-- | bolt/include/bolt/Profile/DataAggregator.h | 3 | ||||
-rw-r--r-- | bolt/lib/Profile/DataAggregator.cpp | 44 | ||||
-rw-r--r-- | bolt/lib/Rewrite/RewriteInstance.cpp | 15 | ||||
-rw-r--r-- | bolt/test/X86/unclaimed-jt-entries.s | 18 |
4 files changed, 42 insertions, 38 deletions
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h index db0f690..cb1b87f 100644 --- a/bolt/include/bolt/Profile/DataAggregator.h +++ b/bolt/include/bolt/Profile/DataAggregator.h @@ -502,6 +502,9 @@ private: /// entries). void imputeFallThroughs(); + /// Register profiled functions for lite mode. + void registerProfiledFunctions(); + /// Debugging dump methods void dump() const; void dump(const PerfBranchSample &Sample) const; diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index 905728de..c13fa6d 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -581,6 +581,26 @@ void DataAggregator::imputeFallThroughs() { outs() << "BOLT-INFO: imputed " << InferredTraces << " traces\n"; } +void DataAggregator::registerProfiledFunctions() { + DenseSet<uint64_t> Addrs; + for (const auto &Trace : llvm::make_first_range(Traces)) { + if (Trace.Branch != Trace::FT_ONLY && + Trace.Branch != Trace::FT_EXTERNAL_ORIGIN) + Addrs.insert(Trace.Branch); + Addrs.insert(Trace.From); + } + + for (const auto [PC, _] : BasicSamples) + Addrs.insert(PC); + + for (const PerfMemSample &MemSample : MemSamples) + Addrs.insert(MemSample.PC); + + for (const uint64_t Addr : Addrs) + if (BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr)) + Func->setHasProfileAvailable(); +} + Error DataAggregator::preprocessProfile(BinaryContext &BC) { this->BC = &BC; @@ -603,6 +623,7 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) { exit(0); } + registerProfiledFunctions(); return Error::success(); } @@ -906,11 +927,10 @@ DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, const Trace &Trace, if (BF.isPseudo()) return Branches; - if (!BF.isSimple()) + // Can only record traces in CFG state + if (!BF.hasCFG()) return std::nullopt; - assert(BF.hasCFG() && "can only record traces in CFG state"); - const BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From); const BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To); @@ -1348,10 +1368,6 @@ std::error_code DataAggregator::parseAggregatedLBREntry() { } const uint64_t FromOffset = Addr[0]->Offset; - BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(FromOffset); - if (FromFunc) - FromFunc->setHasProfileAvailable(); - int64_t Count = Counters[0]; int64_t Mispreds = Counters[1]; @@ -1362,11 +1378,6 @@ std::error_code DataAggregator::parseAggregatedLBREntry() { return std::error_code(); } - const uint64_t ToOffset = Addr[1]->Offset; - BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(ToOffset); - if (ToFunc) - ToFunc->setHasProfileAvailable(); - /// For fall-through types, adjust locations to match Trace container. if (Type == FT || Type == FT_EXTERNAL_ORIGIN || Type == FT_EXTERNAL_RETURN) { Addr[2] = Location(Addr[1]->Offset); // Trace To @@ -1614,9 +1625,6 @@ std::error_code DataAggregator::parseBranchEvents() { Traces.reserve(TraceMap.size()); for (const auto &[Trace, Info] : TraceMap) { Traces.emplace_back(Trace, Info); - for (const uint64_t Addr : {Trace.Branch, Trace.From}) - if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Addr)) - BF->setHasProfileAvailable(); } clear(TraceMap); @@ -1677,9 +1685,6 @@ std::error_code DataAggregator::parseBasicEvents() { continue; ++NumTotalSamples; - if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC)) - BF->setHasProfileAvailable(); - ++BasicSamples[Sample->PC]; EventNames.insert(Sample->EventName); } @@ -1717,9 +1722,6 @@ std::error_code DataAggregator::parseMemEvents() { if (std::error_code EC = Sample.getError()) return EC; - if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC)) - BF->setHasProfileAvailable(); - MemSamples.emplace_back(std::move(Sample.get())); } diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index f1b94e4..9f243a1 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -714,21 +714,6 @@ Error RewriteInstance::run() { preprocessProfileData(); - // Skip disassembling if we have a translation table and we are running an - // aggregation job. - if (opts::AggregateOnly && BAT->enabledFor(InputFile)) { - // YAML profile in BAT mode requires CFG for .bolt.org.text functions - if (!opts::SaveProfile.empty() || - opts::ProfileFormat == opts::ProfileFormatKind::PF_YAML) { - selectFunctionsToProcess(); - disassembleFunctions(); - processMetadataPreCFG(); - buildFunctionsCFG(); - } - processProfileData(); - return Error::success(); - } - selectFunctionsToProcess(); readDebugInfo(); diff --git a/bolt/test/X86/unclaimed-jt-entries.s b/bolt/test/X86/unclaimed-jt-entries.s index 1102e4a..31b72c4 100644 --- a/bolt/test/X86/unclaimed-jt-entries.s +++ b/bolt/test/X86/unclaimed-jt-entries.s @@ -18,6 +18,18 @@ # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o # RUN: %clang %cflags -no-pie %t.o -o %t.exe -Wl,-q + +## Check that non-simple function profile is emitted in perf2bolt mode +# RUN: link_fdata %s %t.exe %t.pa PREAGG +# RUN: llvm-strip -N L5 -N L5_ret %t.exe +# RUN: perf2bolt %t.exe -p %t.pa --pa -o %t.fdata -strict=0 -print-profile \ +# RUN: -print-only=main | FileCheck %s --check-prefix=CHECK-P2B +# CHECK-P2B: PERF2BOLT: traces mismatching disassembled function contents: 0 +# CHECK-P2B: Binary Function "main" +# CHECK-P2B: IsSimple : 0 +# RUN: FileCheck %s --input-file %t.fdata --check-prefix=CHECK-FDATA +# CHECK-FDATA: 1 main 0 1 main 7 0 1 + # RUN: llvm-bolt %t.exe -v=1 -o %t.out 2>&1 | FileCheck %s # CHECK: BOLT-WARNING: unclaimed data to code reference (possibly an unrecognized jump table entry) to .Ltmp[[#]] in main @@ -33,8 +45,10 @@ .size main, .Lend-main main: jmp *L4-24(,%rdi,8) -.L5: +# PREAGG: T #main# #L5# #L5_ret# 1 +L5: movl $4, %eax +L5_ret: ret .L9: movl $2, %eax @@ -58,7 +72,7 @@ L4: .quad .L3 .quad .L6 .quad .L3 - .quad .L5 + .quad L5 .quad .L3 .quad .L3 .quad .L3 |