aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAmir Ayupov <aaupov@fb.com>2025-07-11 21:06:42 -0700
committerAmir Ayupov <aaupov@fb.com>2025-07-11 21:06:42 -0700
commit31279c10f03fd955480a4ec195433d5c676e6593 (patch)
treeedd075f68b40b85a2044214810227d44315b4cf2
parent0da9aacf4898fc9debfd930ab3dfbac7084c5e2a (diff)
downloadllvm-users/aaupov/spr/main.bolt-invoke-terminator.zip
llvm-users/aaupov/spr/main.bolt-invoke-terminator.tar.gz
llvm-users/aaupov/spr/main.bolt-invoke-terminator.tar.bz2
[𝘀𝗽𝗿] changes to main this commit is based onusers/aaupov/spr/main.bolt-invoke-terminator
Created using spr 1.3.4 [skip ci]
-rw-r--r--bolt/include/bolt/Core/MCPlusBuilder.h11
-rw-r--r--bolt/include/bolt/Profile/DataAggregator.h43
-rw-r--r--bolt/lib/Profile/DataAggregator.cpp95
3 files changed, 130 insertions, 19 deletions
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index b99cf8b..f902a8c 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -430,6 +430,17 @@ public:
return Analysis->isIndirectBranch(Inst);
}
+ /// Returns true if the instruction unconditionally transfers the control to
+ /// another program point, interrupting sequential code execution, e.g. by a
+ /// call, return, or unconditional jump. This explicitly leaves out
+ /// conditional branches as they may not be taken, but does allow transferring
+ /// the control to the next instruction (zero-displacement jump/call).
+ bool isUnconditionalControlTransfer(const MCInst &Inst) const {
+ const MCInstrDesc &Desc = Info->get(Inst.getOpcode());
+ // barrier captures returns and unconditional branches
+ return Desc.isBarrier() || Desc.isCall();
+ }
+
/// Returns true if the instruction is memory indirect call or jump
virtual bool isBranchOnMem(const MCInst &Inst) const {
llvm_unreachable("not implemented");
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index c7400b0..db0f690 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -137,7 +137,7 @@ private:
std::vector<std::pair<Trace, TakenBranchInfo>> Traces;
/// Pre-populated addresses of returns, coming from pre-aggregated data or
/// disassembly. Used to disambiguate call-continuation fall-throughs.
- std::unordered_set<uint64_t> Returns;
+ std::unordered_map<uint64_t, bool> Returns;
std::unordered_map<uint64_t, uint64_t> BasicSamples;
std::vector<PerfMemSample> MemSamples;
@@ -498,6 +498,10 @@ private:
/// If \p FileBuildID has no match, then issue an error and exit.
void processFileBuildID(StringRef FileBuildID);
+ /// Infer missing fall-throughs for branch-only traces (LBR top-of-stack
+ /// entries).
+ void imputeFallThroughs();
+
/// Debugging dump methods
void dump() const;
void dump(const PerfBranchSample &Sample) const;
@@ -509,6 +513,43 @@ private:
void printBasicSamplesDiagnostics(uint64_t OutOfRangeSamples) const;
void printBranchStacksDiagnostics(uint64_t IgnoredSamples) const;
+ /// Get instruction at \p Addr either from containing binary function or
+ /// disassemble in-place, and invoke \p Callback on resulting MCInst.
+ /// Returns the result of the callback or nullopt.
+ template <typename T>
+ std::optional<T>
+ testInstructionAt(const uint64_t Addr,
+ std::function<T(const MCInst &)> Callback) const {
+ BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr);
+ if (!Func)
+ return std::nullopt;
+ const uint64_t Offset = Addr - Func->getAddress();
+ if (Func->hasInstructions()) {
+ if (auto *MI = Func->getInstructionAtOffset(Offset))
+ return Callback(*MI);
+ } else {
+ if (auto MI = Func->disassembleInstructionAtOffset(Offset))
+ return Callback(*MI);
+ }
+ return std::nullopt;
+ }
+
+ /// Apply \p Callback to the instruction at \p Addr, and memoize the result
+ /// in a \p Map.
+ template <typename T>
+ std::optional<T> testAndSet(const uint64_t Addr,
+ std::function<T(const MCInst &)> Callback,
+ std::unordered_map<uint64_t, T> &Map) {
+ auto It = Map.find(Addr);
+ if (It != Map.end())
+ return It->second;
+ if (std::optional<T> Res = testInstructionAt<T>(Addr, Callback)) {
+ Map.emplace(Addr, *Res);
+ return *Res;
+ }
+ return std::nullopt;
+ }
+
public:
/// If perf.data was collected without build ids, the buildid-list may contain
/// incomplete entries. Return true if the buffer containing
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index c60591c..905728de 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -77,6 +77,11 @@ FilterPID("pid",
cl::Optional,
cl::cat(AggregatorCategory));
+static cl::opt<bool> ImputeTraceFallthrough(
+ "impute-trace-fall-through",
+ cl::desc("impute missing fall-throughs for branch-only traces"),
+ cl::Optional, cl::cat(AggregatorCategory));
+
static cl::opt<bool>
IgnoreBuildID("ignore-build-id",
cl::desc("continue even if build-ids in input binary and perf.data mismatch"),
@@ -513,6 +518,69 @@ void DataAggregator::parsePerfData(BinaryContext &BC) {
deleteTempFiles();
}
+void DataAggregator::imputeFallThroughs() {
+ if (Traces.empty())
+ return;
+
+ std::pair PrevBranch(Trace::EXTERNAL, Trace::EXTERNAL);
+ uint64_t AggregateCount = 0;
+ uint64_t AggregateFallthroughSize = 0;
+ uint64_t InferredTraces = 0;
+
+ // Helper map with whether the instruction is a call/ret/unconditional branch
+ std::unordered_map<uint64_t, bool> IsUncondCTMap;
+ auto checkUnconditionalControlTransfer = [&](const uint64_t Addr) {
+ auto isUncondCT = [&](const MCInst &MI) -> bool {
+ return BC->MIB->isUnconditionalControlTransfer(MI);
+ };
+ return testAndSet<bool>(Addr, isUncondCT, IsUncondCTMap).value_or(true);
+ };
+
+ // Traces are sorted by their component addresses (Branch, From, To).
+ // assert(is_sorted(Traces));
+
+ // Traces corresponding to the top-of-stack branch entry with a missing
+ // fall-through have BR_ONLY(-1ULL/UINT64_MAX) in To field, meaning that for
+ // fixed values of Branch and From branch-only traces are stored after all
+ // traces with valid fall-through.
+ //
+ // Group traces by (Branch, From) and compute weighted average fall-through
+ // length for the top-of-stack trace (closing the group) by accumulating the
+ // fall-through lengths of traces with valid fall-throughs earlier in the
+ // group.
+ for (auto &[Trace, Info] : Traces) {
+ // Skip fall-throughs in external code.
+ if (Trace.From == Trace::EXTERNAL)
+ continue;
+ std::pair CurrentBranch(Trace.Branch, Trace.From);
+ // BR_ONLY must be the last trace in the group
+ if (Trace.To == Trace::BR_ONLY) {
+ // If the group is not empty, use aggregate values, otherwise 0-length
+ // for unconditional jumps (call/ret/uncond branch) or 1-length for others
+ uint64_t InferredBytes =
+ PrevBranch == CurrentBranch
+ ? AggregateFallthroughSize / AggregateCount
+ : !checkUnconditionalControlTransfer(Trace.From);
+ Trace.To = Trace.From + InferredBytes;
+ LLVM_DEBUG(dbgs() << "imputed " << Trace << " (" << InferredBytes
+ << " bytes)\n");
+ ++InferredTraces;
+ } else {
+ // Trace with a valid fall-through
+ // New group: reset aggregates.
+ if (CurrentBranch != PrevBranch)
+ AggregateCount = AggregateFallthroughSize = 0;
+ // Only use valid fall-through lengths
+ if (Trace.To != Trace::EXTERNAL)
+ AggregateFallthroughSize += (Trace.To - Trace.From) * Info.TakenCount;
+ AggregateCount += Info.TakenCount;
+ }
+ PrevBranch = CurrentBranch;
+ }
+ if (opts::Verbosity >= 1)
+ outs() << "BOLT-INFO: imputed " << InferredTraces << " traces\n";
+}
+
Error DataAggregator::preprocessProfile(BinaryContext &BC) {
this->BC = &BC;
@@ -525,6 +593,9 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
// Sort parsed traces for faster processing.
llvm::sort(Traces, llvm::less_first());
+ if (opts::ImputeTraceFallthrough)
+ imputeFallThroughs();
+
if (opts::HeatmapMode) {
if (std::error_code EC = printLBRHeatMap())
return errorCodeToError(EC);
@@ -726,22 +797,10 @@ bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
}
bool DataAggregator::checkReturn(uint64_t Addr) {
- auto isReturn = [&](auto MI) { return MI && BC->MIB->isReturn(*MI); };
- if (llvm::is_contained(Returns, Addr))
- return true;
-
- BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr);
- if (!Func)
- return false;
-
- const uint64_t Offset = Addr - Func->getAddress();
- if (Func->hasInstructions()
- ? isReturn(Func->getInstructionAtOffset(Offset))
- : isReturn(Func->disassembleInstructionAtOffset(Offset))) {
- Returns.emplace(Addr);
- return true;
- }
- return false;
+ auto isReturn = [&](const MCInst &MI) -> bool {
+ return BC->MIB->isReturn(MI);
+ };
+ return testAndSet<bool>(Addr, isReturn, Returns).value_or(false);
}
bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
@@ -1331,7 +1390,7 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
if (!Addr[0]->Offset)
Addr[0]->Offset = Trace::FT_EXTERNAL_RETURN;
else
- Returns.emplace(Addr[0]->Offset);
+ Returns.emplace(Addr[0]->Offset, true);
}
/// Record a trace.
@@ -1592,7 +1651,7 @@ void DataAggregator::processBranchEvents() {
NamedRegionTimer T("processBranch", "Processing branch events",
TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
- Returns.emplace(Trace::FT_EXTERNAL_RETURN);
+ Returns.emplace(Trace::FT_EXTERNAL_RETURN, true);
for (const auto &[Trace, Info] : Traces) {
bool IsReturn = checkReturn(Trace.Branch);
// Ignore returns.