aboutsummaryrefslogtreecommitdiff
path: root/bolt/lib
diff options
context:
space:
mode:
Diffstat (limited to 'bolt/lib')
-rw-r--r--bolt/lib/Core/BinaryBasicBlock.cpp2
-rw-r--r--bolt/lib/Core/BinaryContext.cpp61
-rw-r--r--bolt/lib/Core/BinaryFunction.cpp169
-rw-r--r--bolt/lib/Core/BinarySection.cpp6
-rw-r--r--bolt/lib/Core/DebugNames.cpp2
-rw-r--r--bolt/lib/Core/DynoStats.cpp2
-rw-r--r--bolt/lib/Core/Exceptions.cpp8
-rw-r--r--bolt/lib/Core/MCPlusBuilder.cpp27
-rw-r--r--bolt/lib/Passes/CMakeLists.txt4
-rw-r--r--bolt/lib/Passes/IdenticalCodeFolding.cpp11
-rw-r--r--bolt/lib/Passes/Inliner.cpp45
-rw-r--r--bolt/lib/Passes/InsertNegateRAStatePass.cpp142
-rw-r--r--bolt/lib/Passes/PAuthGadgetScanner.cpp2
-rw-r--r--bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp (renamed from bolt/lib/Passes/MarkRAStates.cpp)78
-rw-r--r--bolt/lib/Passes/PointerAuthCFIFixup.cpp268
-rw-r--r--bolt/lib/Profile/DataAggregator.cpp18
-rw-r--r--bolt/lib/Profile/StaleProfileMatching.cpp24
-rw-r--r--bolt/lib/Profile/YAMLProfileWriter.cpp103
-rw-r--r--bolt/lib/Rewrite/BinaryPassManager.cpp19
-rw-r--r--bolt/lib/Rewrite/CMakeLists.txt1
-rw-r--r--bolt/lib/Rewrite/DWARFRewriter.cpp120
-rw-r--r--bolt/lib/Rewrite/RSeqRewriter.cpp72
-rw-r--r--bolt/lib/Rewrite/RewriteInstance.cpp280
-rw-r--r--bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp112
-rw-r--r--bolt/lib/Target/X86/X86MCPlusBuilder.cpp6
-rw-r--r--bolt/lib/Utils/CommandLineOpts.cpp10
26 files changed, 1168 insertions, 424 deletions
diff --git a/bolt/lib/Core/BinaryBasicBlock.cpp b/bolt/lib/Core/BinaryBasicBlock.cpp
index d680850..a6d0ca9 100644
--- a/bolt/lib/Core/BinaryBasicBlock.cpp
+++ b/bolt/lib/Core/BinaryBasicBlock.cpp
@@ -22,8 +22,6 @@
namespace llvm {
namespace bolt {
-constexpr uint32_t BinaryBasicBlock::INVALID_OFFSET;
-
bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS) {
return LHS.Index < RHS.Index;
}
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index b478925..51bc867 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -531,20 +531,40 @@ BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
}
MCSymbol *BinaryContext::handleExternalBranchTarget(uint64_t Address,
- BinaryFunction &BF) {
- if (BF.isInConstantIsland(Address)) {
- BF.setIgnored();
- this->outs() << "BOLT-WARNING: ignoring entry point at address 0x"
- << Twine::utohexstr(Address)
- << " in constant island of function " << BF << '\n';
- return nullptr;
+ BinaryFunction &Source,
+ BinaryFunction &Target) {
+ const uint64_t Offset = Address - Target.getAddress();
+ assert(Offset < Target.getSize() &&
+ "Address should be inside the referenced function");
+
+ bool IsValid = true;
+ if (Source.NeedBranchValidation) {
+ if (Target.CurrentState == BinaryFunction::State::Disassembled &&
+ !Target.getInstructionAtOffset(Offset)) {
+ this->errs()
+ << "BOLT-WARNING: corrupted control flow detected in function "
+ << Source
+ << ": an external branch/call targets an invalid instruction "
+ << "in function " << Target << " at address 0x"
+ << Twine::utohexstr(Address) << "; ignoring both functions\n";
+ IsValid = false;
+ }
+ if (Target.isInConstantIsland(Address)) {
+ this->errs() << "BOLT-WARNING: ignoring entry point at address 0x"
+ << Twine::utohexstr(Address)
+ << " in constant island of function " << Target << '\n';
+ IsValid = false;
+ }
}
- const uint64_t Offset = Address - BF.getAddress();
- assert(Offset < BF.getSize() &&
- "Address should be inside the referenced function");
+ if (!IsValid) {
+ Source.NeedBranchValidation = false;
+ Source.setIgnored();
+ Target.setIgnored();
+ return nullptr;
+ }
- return Offset ? BF.addEntryPointAtOffset(Offset) : BF.getSymbol();
+ return Offset ? Target.addEntryPointAtOffset(Offset) : Target.getSymbol();
}
MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address,
@@ -1433,13 +1453,11 @@ void BinaryContext::processInterproceduralReferences() {
// Create an extra entry point if needed. Can also render the target
// function ignored if the reference is invalid.
- handleExternalBranchTarget(Address, *TargetFunction);
+ handleExternalBranchTarget(Address, Function, *TargetFunction);
continue;
}
- // Check if address falls in function padding space - this could be
- // unmarked data in code. In this case adjust the padding space size.
ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
assert(Section && "cannot get section for referenced address");
@@ -1451,7 +1469,7 @@ void BinaryContext::processInterproceduralReferences() {
if (SectionName == ".plt" || SectionName == ".plt.got")
continue;
- // Check if it is aarch64 veneer written at Address
+ // Check if it is aarch64 veneer written at Address.
if (isAArch64() && handleAArch64Veneer(Address))
continue;
@@ -1463,6 +1481,8 @@ void BinaryContext::processInterproceduralReferences() {
exit(1);
}
+ // Check if the address falls into the function padding space - this could
+ // be an unmarked data in code. In this case, adjust the padding space size.
TargetFunction = getBinaryFunctionContainingAddress(Address,
/*CheckPastEnd=*/false,
/*UseMaxSize=*/true);
@@ -1520,6 +1540,17 @@ void BinaryContext::foldFunction(BinaryFunction &ChildBF,
}
ChildBF.getSymbols().clear();
+ // Reset function mapping for local symbols.
+ for (uint64_t RelOffset : ChildBF.getInternalRefDataRelocations()) {
+ const Relocation *Rel = getRelocationAt(RelOffset);
+ if (!Rel || !Rel->Symbol)
+ continue;
+
+ WriteSymbolMapLock.lock();
+ SymbolToFunctionMap[Rel->Symbol] = nullptr;
+ WriteSymbolMapLock.unlock();
+ }
+
// Move other names the child function is known under.
llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases));
ChildBF.Aliases.clear();
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index ddaad6e..4ccef98 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -61,6 +61,8 @@ extern cl::OptionCategory BoltOptCategory;
extern cl::opt<bool> EnableBAT;
extern cl::opt<bool> Instrument;
+extern cl::list<std::string> PrintOnly;
+extern cl::opt<std::string> PrintOnlyFile;
extern cl::opt<bool> StrictMode;
extern cl::opt<bool> UpdateDebugSections;
extern cl::opt<unsigned> Verbosity;
@@ -133,14 +135,6 @@ PrintDynoStatsOnly("print-dyno-stats-only",
cl::Hidden,
cl::cat(BoltCategory));
-static cl::list<std::string>
-PrintOnly("print-only",
- cl::CommaSeparated,
- cl::desc("list of functions to print"),
- cl::value_desc("func1,func2,func3,..."),
- cl::Hidden,
- cl::cat(BoltCategory));
-
cl::opt<bool>
TimeBuild("time-build",
cl::desc("print time spent constructing binary functions"),
@@ -1044,8 +1038,10 @@ MCSymbol *BinaryFunction::getOrCreateLocalLabel(uint64_t Address) {
// For AArch64, check if this address is part of a constant island.
if (BC.isAArch64()) {
- if (MCSymbol *IslandSym = getOrCreateIslandAccess(Address))
+ if (MCSymbol *IslandSym = getOrCreateIslandAccess(Address)) {
+ Labels[Offset] = IslandSym;
return IslandSym;
+ }
}
if (Offset == getSize())
@@ -1414,9 +1410,7 @@ Error BinaryFunction::disassemble() {
// A recursive call. Calls to internal blocks are handled by
// ValidateInternalCalls pass.
TargetSymbol = getSymbol();
- }
-
- if (!TargetSymbol) {
+ } else {
// Create either local label or external symbol.
if (containsAddress(TargetAddress)) {
TargetSymbol = getOrCreateLocalLabel(TargetAddress);
@@ -1700,7 +1694,7 @@ bool BinaryFunction::scanExternalRefs() {
// Get a reference symbol for the function when address is a valid code
// reference.
BranchTargetSymbol =
- BC.handleExternalBranchTarget(TargetAddress, *TargetFunction);
+ BC.handleExternalBranchTarget(TargetAddress, *this, *TargetFunction);
if (!BranchTargetSymbol)
continue;
}
@@ -1896,16 +1890,6 @@ bool BinaryFunction::scanExternalRefs() {
}
}
- // Inform BinaryContext that this function symbols will not be defined and
- // relocations should not be created against them.
- if (BC.HasRelocations) {
- for (std::pair<const uint32_t, MCSymbol *> &LI : Labels)
- BC.UndefinedSymbols.insert(LI.second);
- for (MCSymbol *const EndLabel : FunctionEndLabels)
- if (EndLabel)
- BC.UndefinedSymbols.insert(EndLabel);
- }
-
clearList(Relocations);
clearList(ExternallyReferencedOffsets);
@@ -1918,6 +1902,36 @@ bool BinaryFunction::scanExternalRefs() {
return Success;
}
+bool BinaryFunction::validateInternalBranches() {
+ if (!isSimple() || TrapsOnEntry)
+ return true;
+
+ for (const auto &KV : Labels) {
+ MCSymbol *Label = KV.second;
+ if (getSecondaryEntryPointSymbol(Label))
+ continue;
+
+ const uint32_t Offset = KV.first;
+ // Skip empty functions and out-of-bounds offsets,
+ // as they may not be disassembled.
+ if (!Offset || (Offset > getSize()))
+ continue;
+
+ if (!getInstructionAtOffset(Offset) ||
+ isInConstantIsland(getAddress() + Offset)) {
+ BC.errs() << "BOLT-WARNING: corrupted control flow detected in function "
+ << *this << ": an internal branch/call targets an invalid "
+ << "instruction at address 0x"
+ << Twine::utohexstr(getAddress() + Offset)
+ << "; ignoring this function\n";
+ setIgnored();
+ return false;
+ }
+ }
+
+ return true;
+}
+
void BinaryFunction::postProcessEntryPoints() {
if (!isSimple())
return;
@@ -2059,41 +2073,47 @@ void BinaryFunction::postProcessJumpTables() {
}
}
-bool BinaryFunction::validateExternallyReferencedOffsets() {
- SmallPtrSet<MCSymbol *, 4> JTTargets;
- for (const JumpTable *JT : llvm::make_second_range(JumpTables))
- JTTargets.insert_range(JT->Entries);
+bool BinaryFunction::validateInternalRefDataRelocations() {
+ if (InternalRefDataRelocations.empty())
+ return true;
- bool HasUnclaimedReference = false;
- for (uint64_t Destination : ExternallyReferencedOffsets) {
- // Ignore __builtin_unreachable().
- if (Destination == getSize())
- continue;
- // Ignore constant islands
- if (isInConstantIsland(Destination + getAddress()))
- continue;
+ // Rely on the user hint that all data refs are valid and only used as
+ // destinations by indirect branch in the same function.
+ if (opts::StrictMode)
+ return true;
- if (BinaryBasicBlock *BB = getBasicBlockAtOffset(Destination)) {
- // Check if the externally referenced offset is a recognized jump table
- // target.
- if (JTTargets.contains(BB->getLabel()))
- continue;
+ DenseSet<uint64_t> UnclaimedRelocations(InternalRefDataRelocations);
+ for (const JumpTable *JT : llvm::make_second_range(JumpTables)) {
+ uint64_t EntryAddress = JT->getAddress();
+ while (EntryAddress < JT->getAddress() + JT->getSize()) {
+ UnclaimedRelocations.erase(EntryAddress);
+ EntryAddress += JT->EntrySize;
+ }
+ }
- if (opts::Verbosity >= 1) {
- BC.errs() << "BOLT-WARNING: unclaimed data to code reference (possibly "
- << "an unrecognized jump table entry) to " << BB->getName()
- << " in " << *this << "\n";
- }
- auto L = BC.scopeLock();
- addEntryPoint(*BB);
- } else {
- BC.errs() << "BOLT-WARNING: unknown data to code reference to offset "
- << Twine::utohexstr(Destination) << " in " << *this << "\n";
- setIgnored();
+ if (UnclaimedRelocations.empty())
+ return true;
+
+ BC.errs() << "BOLT-WARNING: " << UnclaimedRelocations.size()
+ << " unclaimed data relocation"
+ << (UnclaimedRelocations.size() > 1 ? "s" : "")
+ << " remain against function " << *this;
+ if (opts::Verbosity) {
+ BC.errs() << ":\n";
+ for (uint64_t RelocationAddress : UnclaimedRelocations) {
+ const Relocation *Relocation = BC.getRelocationAt(RelocationAddress);
+ BC.errs() << " ";
+ if (Relocation)
+ BC.errs() << *Relocation;
+ else
+ BC.errs() << "<missing relocation>";
+ BC.errs() << '\n';
}
- HasUnclaimedReference = true;
+ } else {
+ BC.errs() << ". Re-run with -v=1 to see the list\n";
}
- return !HasUnclaimedReference;
+
+ return false;
}
bool BinaryFunction::postProcessIndirectBranches(
@@ -2177,13 +2197,10 @@ bool BinaryFunction::postProcessIndirectBranches(
continue;
}
- // If this block contains an epilogue code and has an indirect branch,
- // then most likely it's a tail call. Otherwise, we cannot tell for sure
- // what it is and conservatively reject the function's CFG.
- bool IsEpilogue = llvm::any_of(BB, [&](const MCInst &Instr) {
- return BC.MIB->isLeave(Instr) || BC.MIB->isPop(Instr);
- });
- if (IsEpilogue) {
+ // If this block contains epilogue code and has an indirect branch,
+ // then most likely it's a tail call. Otherwise, we cannot tell for
+ // sure what it is and conservatively reject the function's CFG.
+ if (BC.MIB->isEpilogue(BB)) {
BC.MIB->convertJmpToTailCall(Instr);
BB.removeAllSuccessors();
continue;
@@ -2221,14 +2238,6 @@ bool BinaryFunction::postProcessIndirectBranches(
LastIndirectJumpBB->updateJumpTableSuccessors();
}
- // Validate that all data references to function offsets are claimed by
- // recognized jump tables. Register externally referenced blocks as entry
- // points.
- if (!opts::StrictMode && hasInternalReference()) {
- if (!validateExternallyReferencedOffsets())
- return false;
- }
-
if (HasUnknownControlFlow && !BC.HasRelocations)
return false;
@@ -2517,12 +2526,18 @@ Error BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) {
CurrentState = State::CFG;
// Make any necessary adjustments for indirect branches.
- if (!postProcessIndirectBranches(AllocatorId)) {
- if (opts::Verbosity) {
- BC.errs() << "BOLT-WARNING: failed to post-process indirect branches for "
- << *this << '\n';
- }
+ bool ValidCFG = postProcessIndirectBranches(AllocatorId);
+ if (!ValidCFG && opts::Verbosity) {
+ BC.errs() << "BOLT-WARNING: failed to post-process indirect branches for "
+ << *this << '\n';
+ }
+
+ // Validate that all data references to function offsets are claimed by
+ // recognized jump tables.
+ if (ValidCFG)
+ ValidCFG = validateInternalRefDataRelocations();
+ if (!ValidCFG) {
if (BC.isAArch64())
PreserveNops = BC.HasRelocations;
@@ -3234,14 +3249,6 @@ void BinaryFunction::clearDisasmState() {
clearList(Instructions);
clearList(IgnoredBranches);
clearList(TakenBranches);
-
- if (BC.HasRelocations) {
- for (std::pair<const uint32_t, MCSymbol *> &LI : Labels)
- BC.UndefinedSymbols.insert(LI.second);
- for (MCSymbol *const EndLabel : FunctionEndLabels)
- if (EndLabel)
- BC.UndefinedSymbols.insert(EndLabel);
- }
}
void BinaryFunction::setTrapOnEntry() {
diff --git a/bolt/lib/Core/BinarySection.cpp b/bolt/lib/Core/BinarySection.cpp
index 6f07017..e803d17 100644
--- a/bolt/lib/Core/BinarySection.cpp
+++ b/bolt/lib/Core/BinarySection.cpp
@@ -112,8 +112,10 @@ void BinarySection::emitAsData(MCStreamer &Streamer,
RI = ROE;
// Skip undefined symbols.
- auto HasUndefSym = [this](const auto &Relocation) {
- return BC.UndefinedSymbols.count(Relocation.Symbol);
+ auto HasUndefSym = [](const auto &Relocation) {
+ return Relocation.Symbol && Relocation.Symbol->isTemporary() &&
+ Relocation.Symbol->isUndefined() &&
+ !Relocation.Symbol->isRegistered();
};
if (std::any_of(ROI, ROE, HasUndefSym))
diff --git a/bolt/lib/Core/DebugNames.cpp b/bolt/lib/Core/DebugNames.cpp
index 6be2c5a..5272d40 100644
--- a/bolt/lib/Core/DebugNames.cpp
+++ b/bolt/lib/Core/DebugNames.cpp
@@ -555,7 +555,7 @@ void DWARF5AcceleratorTable::populateAbbrevsMap() {
void DWARF5AcceleratorTable::writeEntry(BOLTDWARF5AccelTableData &Entry) {
const uint64_t EntryID = getEntryID(Entry);
- if (EntryRelativeOffsets.find(EntryID) != EntryRelativeOffsets.end())
+ if (EntryRelativeOffsets.contains(EntryID))
EntryRelativeOffsets[EntryID] = EntriesBuffer->size();
const std::optional<DWARF5AccelTable::UnitIndexAndEncoding> EntryRet =
diff --git a/bolt/lib/Core/DynoStats.cpp b/bolt/lib/Core/DynoStats.cpp
index 1d98187..64a6d12 100644
--- a/bolt/lib/Core/DynoStats.cpp
+++ b/bolt/lib/Core/DynoStats.cpp
@@ -51,8 +51,6 @@ PrintDynoOpcodeStat("print-dyno-opcode-stats",
namespace llvm {
namespace bolt {
-constexpr const char *DynoStats::Desc[];
-
bool DynoStats::operator<(const DynoStats &Other) const {
return std::lexicographical_compare(
&Stats[FIRST_DYNO_STAT], &Stats[LAST_DYNO_STAT],
diff --git a/bolt/lib/Core/Exceptions.cpp b/bolt/lib/Core/Exceptions.cpp
index 27656c7..9c33a7c 100644
--- a/bolt/lib/Core/Exceptions.cpp
+++ b/bolt/lib/Core/Exceptions.cpp
@@ -572,7 +572,7 @@ bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
if (Function.getBinaryContext().isAArch64()) {
// Support for pointer authentication:
// We need to annotate instructions that modify the RA State, to work
- // out the state of each instruction in MarkRAStates Pass.
+ // out the state of each instruction in PointerAuthCFIAnalyzer Pass.
if (Offset != 0)
Function.setInstModifiesRAState(DW_CFA_remember_state, Offset);
}
@@ -583,7 +583,7 @@ bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
if (Function.getBinaryContext().isAArch64()) {
// Support for pointer authentication:
// We need to annotate instructions that modify the RA State, to work
- // out the state of each instruction in MarkRAStates Pass.
+ // out the state of each instruction in PointerAuthCFIAnalyzer Pass.
if (Offset != 0)
Function.setInstModifiesRAState(DW_CFA_restore_state, Offset);
}
@@ -652,7 +652,7 @@ bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
// BasicBlocks, which changes during optimizations. Instead of adding
// OpNegateRAState CFIs, an annotation is added to the instruction, to
// mark that the instruction modifies the RA State. The actual state for
- // instructions are worked out in MarkRAStates based on these
+ // instructions are worked out in PointerAuthCFIAnalyzer based on these
// annotations.
if (Offset != 0)
Function.setInstModifiesRAState(DW_CFA_AARCH64_negate_ra_state,
@@ -660,7 +660,7 @@ bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
else
// We cannot Annotate an instruction at Offset == 0.
// Instead, we save the initial (Signed) state, and push it to
- // MarkRAStates' RAStateStack.
+ // PointerAuthCFIAnalyzer's RAStateStack.
Function.setInitialRAState(true);
break;
}
diff --git a/bolt/lib/Core/MCPlusBuilder.cpp b/bolt/lib/Core/MCPlusBuilder.cpp
index e96de80..0cb4ba1 100644
--- a/bolt/lib/Core/MCPlusBuilder.cpp
+++ b/bolt/lib/Core/MCPlusBuilder.cpp
@@ -186,26 +186,21 @@ bool MCPlusBuilder::hasRestoreState(const MCInst &Inst) const {
return hasAnnotation(Inst, MCAnnotation::kRestoreState);
}
-void MCPlusBuilder::setRASigned(MCInst &Inst) const {
+void MCPlusBuilder::setRAState(MCInst &Inst, bool State) const {
assert(!hasAnnotation(Inst, MCAnnotation::kRASigned));
- setAnnotationOpValue(Inst, MCAnnotation::kRASigned, true);
-}
-
-bool MCPlusBuilder::isRASigned(const MCInst &Inst) const {
- return hasAnnotation(Inst, MCAnnotation::kRASigned);
-}
-
-void MCPlusBuilder::setRAUnsigned(MCInst &Inst) const {
assert(!hasAnnotation(Inst, MCAnnotation::kRAUnsigned));
- setAnnotationOpValue(Inst, MCAnnotation::kRAUnsigned, true);
+ if (State)
+ setAnnotationOpValue(Inst, MCAnnotation::kRASigned, true);
+ else
+ setAnnotationOpValue(Inst, MCAnnotation::kRAUnsigned, true);
}
-bool MCPlusBuilder::isRAUnsigned(const MCInst &Inst) const {
- return hasAnnotation(Inst, MCAnnotation::kRAUnsigned);
-}
-
-bool MCPlusBuilder::isRAStateUnknown(const MCInst &Inst) const {
- return !(isRAUnsigned(Inst) || isRASigned(Inst));
+std::optional<bool> MCPlusBuilder::getRAState(const MCInst &Inst) const {
+ if (hasAnnotation(Inst, MCAnnotation::kRASigned))
+ return true;
+ if (hasAnnotation(Inst, MCAnnotation::kRAUnsigned))
+ return false;
+ return std::nullopt;
}
std::optional<MCLandingPad> MCPlusBuilder::getEHInfo(const MCInst &Inst) const {
diff --git a/bolt/lib/Passes/CMakeLists.txt b/bolt/lib/Passes/CMakeLists.txt
index 3197e62..ec012f0 100644
--- a/bolt/lib/Passes/CMakeLists.txt
+++ b/bolt/lib/Passes/CMakeLists.txt
@@ -17,18 +17,18 @@ add_llvm_library(LLVMBOLTPasses
IdenticalCodeFolding.cpp
IndirectCallPromotion.cpp
Inliner.cpp
- InsertNegateRAStatePass.cpp
Instrumentation.cpp
JTFootprintReduction.cpp
LongJmp.cpp
LoopInversionPass.cpp
LivenessAnalysis.cpp
MCF.cpp
- MarkRAStates.cpp
PatchEntries.cpp
PAuthGadgetScanner.cpp
PettisAndHansen.cpp
PLTCall.cpp
+ PointerAuthCFIAnalyzer.cpp
+ PointerAuthCFIFixup.cpp
ProfileQualityStats.cpp
RegAnalysis.cpp
RegReAssign.cpp
diff --git a/bolt/lib/Passes/IdenticalCodeFolding.cpp b/bolt/lib/Passes/IdenticalCodeFolding.cpp
index 8923562..c5c33b7 100644
--- a/bolt/lib/Passes/IdenticalCodeFolding.cpp
+++ b/bolt/lib/Passes/IdenticalCodeFolding.cpp
@@ -377,9 +377,11 @@ namespace bolt {
void IdenticalCodeFolding::initVTableReferences(const BinaryContext &BC) {
for (const auto &[Address, Data] : BC.getBinaryData()) {
// Filter out all symbols that are not vtables.
- if (!Data->getName().starts_with("_ZTV"))
+ if (!Data->getName().starts_with("_ZTV") && // vtable
+ !Data->getName().starts_with("_ZTCN")) // construction vtable
continue;
- for (uint64_t I = Address, End = I + Data->getSize(); I < End; I += 8)
+ for (uint64_t I = Address, End = I + Data->getSize(); I < End;
+ I += VTableAddressGranularity)
setAddressUsedInVTable(I);
}
}
@@ -437,8 +439,9 @@ void IdenticalCodeFolding::markFunctionsUnsafeToFold(BinaryContext &BC) {
NamedRegionTimer MarkFunctionsUnsafeToFoldTimer(
"markFunctionsUnsafeToFold", "markFunctionsUnsafeToFold", "ICF breakdown",
"ICF breakdown", opts::TimeICF);
- if (!BC.isX86())
- BC.outs() << "BOLT-WARNING: safe ICF is only supported for x86\n";
+ if (!BC.isX86() && !BC.isAArch64())
+ BC.outs()
+ << "BOLT-WARNING: safe ICF is only supported for x86 and AArch64\n";
analyzeDataRelocations(BC);
analyzeFunctions(BC);
}
diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp
index 9b28c7e..0740fce 100644
--- a/bolt/lib/Passes/Inliner.cpp
+++ b/bolt/lib/Passes/Inliner.cpp
@@ -195,6 +195,13 @@ InliningInfo getInliningInfo(const BinaryFunction &BF) {
if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
continue;
+ // Pointer signing and authenticatin instructions are used around
+ // Push and Pop. These are also straightforward to handle.
+ if (BC.isAArch64() &&
+ (BC.MIB->isPSignOnLR(Inst) || BC.MIB->isPAuthOnLR(Inst) ||
+ BC.MIB->isPAuthAndRet(Inst)))
+ continue;
+
DirectSP |= BC.MIB->hasDefOfPhysReg(Inst, SPReg) ||
BC.MIB->hasUseOfPhysReg(Inst, SPReg);
}
@@ -338,6 +345,18 @@ Inliner::inlineCall(BinaryBasicBlock &CallerBB,
BC.Ctx.get());
}
+ // Handling fused authentication and return instructions (Armv8.3-A):
+ // if the Callee does not end in a tailcall, the return will be removed
+ // from the inlined block. If that return is RETA(A|B), we have to keep
+ // the authentication part.
+ // RETAA -> AUTIASP
+ // RETAB -> AUTIBSP
+ if (!CSIsTailCall && BC.isAArch64() && BC.MIB->isPAuthAndRet(Inst)) {
+ MCInst Auth;
+ BC.MIB->createMatchingAuth(Inst, Auth);
+ InsertII =
+ std::next(InlinedBB->insertInstruction(InsertII, std::move(Auth)));
+ }
if (CSIsTailCall || (!MIB.isCall(Inst) && !MIB.isReturn(Inst))) {
InsertII =
std::next(InlinedBB->insertInstruction(InsertII, std::move(Inst)));
@@ -472,6 +491,32 @@ bool Inliner::inlineCallsInFunction(BinaryFunction &Function) {
}
}
+ // AArch64 BTI:
+ // If the callee has an indirect tailcall (BR), we would transform it to
+ // an indirect call (BLR) in InlineCall. Because of this, we would have to
+ // update the BTI at the target of the tailcall. However, these targets
+ // are not known. Instead, we skip inlining blocks with indirect
+ // tailcalls.
+ auto HasIndirectTailCall = [&](const BinaryFunction &BF) -> bool {
+ for (const auto &BB : BF) {
+ for (const auto &II : BB) {
+ if (BC.MIB->isIndirectBranch(II) && BC.MIB->isTailCall(II)) {
+ return true;
+ }
+ }
+ }
+ return false;
+ };
+
+ if (BC.isAArch64() && BC.usesBTI() &&
+ HasIndirectTailCall(*TargetFunction)) {
+ ++InstIt;
+ LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Skipping inlining block with tailcall"
+ << " in " << Function << " : " << BB->getName()
+ << " to keep BTIs consistent.\n");
+ continue;
+ }
+
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: inlining call to " << *TargetFunction
<< " in " << Function << " : " << BB->getName()
<< ". Count: " << BB->getKnownExecutionCount()
diff --git a/bolt/lib/Passes/InsertNegateRAStatePass.cpp b/bolt/lib/Passes/InsertNegateRAStatePass.cpp
deleted file mode 100644
index 33664e1..0000000
--- a/bolt/lib/Passes/InsertNegateRAStatePass.cpp
+++ /dev/null
@@ -1,142 +0,0 @@
-//===- bolt/Passes/InsertNegateRAStatePass.cpp ----------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the InsertNegateRAStatePass class. It inserts
-// OpNegateRAState CFIs to places where the state of two consecutive
-// instructions are different.
-//
-//===----------------------------------------------------------------------===//
-#include "bolt/Passes/InsertNegateRAStatePass.h"
-#include "bolt/Core/BinaryFunction.h"
-#include "bolt/Core/ParallelUtilities.h"
-#include <cstdlib>
-
-using namespace llvm;
-
-namespace llvm {
-namespace bolt {
-
-void InsertNegateRAState::runOnFunction(BinaryFunction &BF) {
- BinaryContext &BC = BF.getBinaryContext();
-
- if (BF.getState() == BinaryFunction::State::Empty)
- return;
-
- if (BF.getState() != BinaryFunction::State::CFG &&
- BF.getState() != BinaryFunction::State::CFG_Finalized) {
- BC.outs() << "BOLT-INFO: no CFG for " << BF.getPrintName()
- << " in InsertNegateRAStatePass\n";
- return;
- }
-
- inferUnknownStates(BF);
-
- for (FunctionFragment &FF : BF.getLayout().fragments()) {
- coverFunctionFragmentStart(BF, FF);
- bool FirstIter = true;
- MCInst PrevInst;
- // As this pass runs after function splitting, we should only check
- // consecutive instructions inside FunctionFragments.
- for (BinaryBasicBlock *BB : FF) {
- for (auto It = BB->begin(); It != BB->end(); ++It) {
- MCInst &Inst = *It;
- if (BC.MIB->isCFI(Inst))
- continue;
- if (!FirstIter) {
- // Consecutive instructions with different RAState means we need to
- // add a OpNegateRAState.
- if ((BC.MIB->isRASigned(PrevInst) && BC.MIB->isRAUnsigned(Inst)) ||
- (BC.MIB->isRAUnsigned(PrevInst) && BC.MIB->isRASigned(Inst))) {
- It = BF.addCFIInstruction(
- BB, It, MCCFIInstruction::createNegateRAState(nullptr));
- }
- } else {
- FirstIter = false;
- }
- PrevInst = *It;
- }
- }
- }
-}
-
-void InsertNegateRAState::coverFunctionFragmentStart(BinaryFunction &BF,
- FunctionFragment &FF) {
- BinaryContext &BC = BF.getBinaryContext();
- if (FF.empty())
- return;
- // Find the first BB in the FF which has Instructions.
- // BOLT can generate empty BBs at function splitting which are only used as
- // target labels. We should add the negate-ra-state CFI to the first
- // non-empty BB.
- auto *FirstNonEmpty =
- std::find_if(FF.begin(), FF.end(), [](BinaryBasicBlock *BB) {
- // getFirstNonPseudo returns BB.end() if it does not find any
- // Instructions.
- return BB->getFirstNonPseudo() != BB->end();
- });
- // If a function is already split in the input, the first FF can also start
- // with Signed state. This covers that scenario as well.
- if (BC.MIB->isRASigned(*((*FirstNonEmpty)->begin()))) {
- BF.addCFIInstruction(*FirstNonEmpty, (*FirstNonEmpty)->begin(),
- MCCFIInstruction::createNegateRAState(nullptr));
- }
-}
-
-void InsertNegateRAState::inferUnknownStates(BinaryFunction &BF) {
- BinaryContext &BC = BF.getBinaryContext();
- bool FirstIter = true;
- MCInst PrevInst;
- for (BinaryBasicBlock &BB : BF) {
- for (MCInst &Inst : BB) {
- if (BC.MIB->isCFI(Inst))
- continue;
-
- if (!FirstIter && BC.MIB->isRAStateUnknown(Inst)) {
- if (BC.MIB->isRASigned(PrevInst) || BC.MIB->isPSignOnLR(PrevInst)) {
- BC.MIB->setRASigned(Inst);
- } else if (BC.MIB->isRAUnsigned(PrevInst) ||
- BC.MIB->isPAuthOnLR(PrevInst)) {
- BC.MIB->setRAUnsigned(Inst);
- }
- } else {
- FirstIter = false;
- }
- PrevInst = Inst;
- }
- }
-}
-
-Error InsertNegateRAState::runOnFunctions(BinaryContext &BC) {
- std::atomic<uint64_t> FunctionsModified{0};
- ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
- FunctionsModified++;
- runOnFunction(BF);
- };
-
- ParallelUtilities::PredicateTy SkipPredicate = [&](const BinaryFunction &BF) {
- // We can skip functions which did not include negate-ra-state CFIs. This
- // includes code using pac-ret hardening as well, if the binary is
- // compiled with `-fno-exceptions -fno-unwind-tables
- // -fno-asynchronous-unwind-tables`
- return !BF.containedNegateRAState() || BF.isIgnored();
- };
-
- ParallelUtilities::runOnEachFunction(
- BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun,
- SkipPredicate, "InsertNegateRAStatePass");
-
- BC.outs() << "BOLT-INFO: rewritten pac-ret DWARF info in "
- << FunctionsModified << " out of " << BC.getBinaryFunctions().size()
- << " functions "
- << format("(%.2lf%%).\n", (100.0 * FunctionsModified) /
- BC.getBinaryFunctions().size());
- return Error::success();
-}
-
-} // end namespace bolt
-} // end namespace llvm
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 01b350b..d38a7fa 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -547,7 +547,7 @@ protected:
// Being trusted is a strictly stronger property than being
// safe-to-dereference.
- assert(!Next.TrustedRegs.test(Next.SafeToDerefRegs) &&
+ assert(Next.TrustedRegs.subsetOf(Next.SafeToDerefRegs) &&
"SafeToDerefRegs should contain all TrustedRegs");
return Next;
diff --git a/bolt/lib/Passes/MarkRAStates.cpp b/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
index b262d66..697b1bb 100644
--- a/bolt/lib/Passes/MarkRAStates.cpp
+++ b/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
@@ -1,4 +1,4 @@
-//===- bolt/Passes/MarkRAStates.cpp ---------------------------------===//
+//===- bolt/Passes/PointerAuthCFIAnalyzer.cpp -----------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the MarkRAStates class.
+// This file implements the PointerAuthCFIAnalyzer class.
// Three CFIs have an influence on the RA State of an instruction:
// - NegateRAState flips the RA State,
// - RememberState pushes the RA State to a stack,
@@ -16,10 +16,10 @@
// the RA State of each instruction, and save it as new MCAnnotations. The new
// annotations are Signing, Signed, Authenticating and Unsigned. After
// optimizations, .cfi_negate_ra_state CFIs are added to the places where the
-// state changes in InsertNegateRAStatePass.
+// state changes in PointerAuthCFIFixup.
//
//===----------------------------------------------------------------------===//
-#include "bolt/Passes/MarkRAStates.h"
+#include "bolt/Passes/PointerAuthCFIAnalyzer.h"
#include "bolt/Core/BinaryFunction.h"
#include "bolt/Core/ParallelUtilities.h"
#include <cstdlib>
@@ -28,10 +28,14 @@
using namespace llvm;
+namespace opts {
+extern llvm::cl::opt<unsigned> Verbosity;
+} // namespace opts
+
namespace llvm {
namespace bolt {
-bool MarkRAStates::runOnFunction(BinaryFunction &BF) {
+bool PointerAuthCFIAnalyzer::runOnFunction(BinaryFunction &BF) {
BinaryContext &BC = BF.getBinaryContext();
@@ -43,9 +47,10 @@ bool MarkRAStates::runOnFunction(BinaryFunction &BF) {
// Not all functions have .cfi_negate_ra_state in them. But if one does,
// we expect psign/pauth instructions to have the hasNegateRAState
// annotation.
- BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
- << BF.getPrintName()
- << ": ptr sign/auth inst without .cfi_negate_ra_state\n";
+ if (opts::Verbosity >= 1)
+ BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
+ << BF.getPrintName()
+ << ": ptr sign/auth inst without .cfi_negate_ra_state\n";
std::lock_guard<std::mutex> Lock(IgnoreMutex);
BF.setIgnored();
return false;
@@ -65,36 +70,30 @@ bool MarkRAStates::runOnFunction(BinaryFunction &BF) {
if (BC.MIB->isPSignOnLR(Inst)) {
if (RAState) {
// RA signing instructions should only follow unsigned RA state.
- BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
- << BF.getPrintName()
- << ": ptr signing inst encountered in Signed RA state\n";
+ if (opts::Verbosity >= 1)
+ BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
+ << BF.getPrintName()
+ << ": ptr signing inst encountered in Signed RA state\n";
std::lock_guard<std::mutex> Lock(IgnoreMutex);
BF.setIgnored();
return false;
}
- // The signing instruction itself is unsigned, the next will be
- // signed.
- BC.MIB->setRAUnsigned(Inst);
} else if (BC.MIB->isPAuthOnLR(Inst)) {
if (!RAState) {
// RA authenticating instructions should only follow signed RA state.
- BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
- << BF.getPrintName()
- << ": ptr authenticating inst encountered in Unsigned RA "
- "state\n";
+ if (opts::Verbosity >= 1)
+ BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
+ << BF.getPrintName()
+ << ": ptr authenticating inst encountered in Unsigned RA "
+ "state\n";
std::lock_guard<std::mutex> Lock(IgnoreMutex);
BF.setIgnored();
return false;
}
- // The authenticating instruction itself is signed, but the next will be
- // unsigned.
- BC.MIB->setRASigned(Inst);
- } else if (RAState) {
- BC.MIB->setRASigned(Inst);
- } else {
- BC.MIB->setRAUnsigned(Inst);
}
+ BC.MIB->setRAState(Inst, RAState);
+
// Updating RAState. All updates are valid from the next instruction.
// Because the same instruction can have remember and restore, the order
// here is relevant. This is the reason to loop over Annotations instead
@@ -118,7 +117,7 @@ bool MarkRAStates::runOnFunction(BinaryFunction &BF) {
return true;
}
-Error MarkRAStates::runOnFunctions(BinaryContext &BC) {
+Error PointerAuthCFIAnalyzer::runOnFunctions(BinaryContext &BC) {
std::atomic<uint64_t> FunctionsIgnored{0};
ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
if (!runOnFunction(BF)) {
@@ -138,14 +137,35 @@ Error MarkRAStates::runOnFunctions(BinaryContext &BC) {
return P.second.containedNegateRAState() && !P.second.isIgnored();
});
+ if (Total == 0)
+ return Error::success();
+
ParallelUtilities::runOnEachFunction(
BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun,
- SkipPredicate, "MarkRAStates");
- BC.outs() << "BOLT-INFO: MarkRAStates ran on " << Total
+ SkipPredicate, "PointerAuthCFIAnalyzer");
+
+ float IgnoredPercent = (100.0 * FunctionsIgnored) / Total;
+ BC.outs() << "BOLT-INFO: PointerAuthCFIAnalyzer ran on " << Total
<< " functions. Ignored " << FunctionsIgnored << " functions "
- << format("(%.2lf%%)", (100.0 * FunctionsIgnored) / Total)
+ << format("(%.2lf%%)", IgnoredPercent)
<< " because of CFI inconsistencies\n";
+ // Errors in the input are expected from two sources:
+ // - compilers emitting incorrect CFIs. This happens more frequently with
+ // older compiler versions, but it should not account for a large
+ // percentage.
+ // - input binary is using synchronous unwind tables. This means that after
+ // call sites, the unwind CFIs are dropped: the pass sees missing
+ // .cfi_negate_ra_state from autiasp instructions. If this is the case, a
+ // larger percentage of functions will be ignored.
+ //
+ // This is why the 10% threshold was chosen: we should not warn about
+ // synchronous unwind tables if only a few % are ignored.
+ if (IgnoredPercent >= 10.0)
+ BC.outs() << "BOLT-WARNING: PointerAuthCFIAnalyzer only supports "
+ "asynchronous unwind tables. For C compilers, see "
+ "-fasynchronous-unwind-tables.\n";
+
return Error::success();
}
diff --git a/bolt/lib/Passes/PointerAuthCFIFixup.cpp b/bolt/lib/Passes/PointerAuthCFIFixup.cpp
new file mode 100644
index 0000000..56086da
--- /dev/null
+++ b/bolt/lib/Passes/PointerAuthCFIFixup.cpp
@@ -0,0 +1,268 @@
+//===- bolt/Passes/PointerAuthCFIFixup.cpp --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PointerAuthCFIFixup class. It inserts
+// OpNegateRAState CFIs to places where the state of two consecutive
+// instructions are different.
+//
+//===----------------------------------------------------------------------===//
+#include "bolt/Passes/PointerAuthCFIFixup.h"
+#include "bolt/Core/BinaryFunction.h"
+#include "bolt/Core/ParallelUtilities.h"
+#include <cstdlib>
+
+using namespace llvm;
+
+namespace llvm {
+namespace bolt {
+
+static bool PassFailed = false;
+
+void PointerAuthCFIFixup::runOnFunction(BinaryFunction &BF) {
+ if (PassFailed)
+ return;
+
+ BinaryContext &BC = BF.getBinaryContext();
+
+ if (BF.getState() == BinaryFunction::State::Empty)
+ return;
+
+ if (BF.getState() != BinaryFunction::State::CFG &&
+ BF.getState() != BinaryFunction::State::CFG_Finalized) {
+ BC.outs() << "BOLT-INFO: no CFG for " << BF.getPrintName()
+ << " in PointerAuthCFIFixup\n";
+ return;
+ }
+
+ inferUnknownStates(BF);
+
+ for (FunctionFragment &FF : BF.getLayout().fragments()) {
+ coverFunctionFragmentStart(BF, FF);
+ bool FirstIter = true;
+ bool PrevRAState = false;
+ // As this pass runs after function splitting, we should only check
+ // consecutive instructions inside FunctionFragments.
+ for (BinaryBasicBlock *BB : FF) {
+ for (auto It = BB->begin(); It != BB->end(); ++It) {
+ MCInst &Inst = *It;
+ if (BC.MIB->isCFI(Inst))
+ continue;
+ std::optional<bool> RAState = BC.MIB->getRAState(Inst);
+ if (!RAState.has_value()) {
+ BC.errs() << "BOLT-ERROR: unknown RAState after inferUnknownStates "
+ << " in function " << BF.getPrintName() << "\n";
+ PassFailed = true;
+ return;
+ }
+ if (!FirstIter) {
+ // Consecutive instructions with different RAState means we need to
+ // add a OpNegateRAState.
+ if (*RAState != PrevRAState)
+ It = BF.addCFIInstruction(
+ BB, It, MCCFIInstruction::createNegateRAState(nullptr));
+ } else {
+ FirstIter = false;
+ }
+ PrevRAState = *RAState;
+ }
+ }
+ }
+}
+
+void PointerAuthCFIFixup::inferUnknownStates(BinaryFunction &BF) {
+ BinaryContext &BC = BF.getBinaryContext();
+
+ // Fill in missing RAStates in simple cases (inside BBs).
+ for (BinaryBasicBlock &BB : BF) {
+ fillUnknownStateInBB(BC, BB);
+ }
+ // BasicBlocks which are made entirely of "new instructions" (instructions
+ // without RAState annotation) are stubs, and do not have correct unwind info.
+ // We should iterate in layout order and fill them based on previous known
+ // RAState.
+ fillUnknownStubs(BF);
+}
+
+void PointerAuthCFIFixup::coverFunctionFragmentStart(BinaryFunction &BF,
+ FunctionFragment &FF) {
+ BinaryContext &BC = BF.getBinaryContext();
+ if (FF.empty())
+ return;
+ // Find the first BB in the FF which has Instructions.
+ // BOLT can generate empty BBs at function splitting which are only used as
+ // target labels. We should add the negate-ra-state CFI to the first
+ // non-empty BB.
+ auto *FirstNonEmpty =
+ std::find_if(FF.begin(), FF.end(), [](BinaryBasicBlock *BB) {
+ // getFirstNonPseudo returns BB.end() if it does not find any
+ // Instructions.
+ return BB->getFirstNonPseudo() != BB->end();
+ });
+ // If a function is already split in the input, the first FF can also start
+ // with Signed state. This covers that scenario as well.
+ auto II = (*FirstNonEmpty)->getFirstNonPseudo();
+ std::optional<bool> RAState = BC.MIB->getRAState(*II);
+ if (!RAState.has_value()) {
+ BC.errs() << "BOLT-ERROR: unknown RAState after inferUnknownStates "
+ << " in function " << BF.getPrintName() << "\n";
+ PassFailed = true;
+ return;
+ }
+ if (*RAState)
+ BF.addCFIInstruction(*FirstNonEmpty, II,
+ MCCFIInstruction::createNegateRAState(nullptr));
+}
+
+std::optional<bool>
+PointerAuthCFIFixup::getFirstKnownRAState(BinaryContext &BC,
+ BinaryBasicBlock &BB) {
+ for (const MCInst &Inst : BB) {
+ if (BC.MIB->isCFI(Inst))
+ continue;
+ std::optional<bool> RAState = BC.MIB->getRAState(Inst);
+ if (RAState.has_value())
+ return RAState;
+ }
+ return std::nullopt;
+}
+
+bool PointerAuthCFIFixup::isUnknownBlock(BinaryContext &BC,
+ BinaryBasicBlock &BB) {
+ std::optional<bool> FirstRAState = getFirstKnownRAState(BC, BB);
+ return !FirstRAState.has_value();
+}
+
+void PointerAuthCFIFixup::fillUnknownStateInBB(BinaryContext &BC,
+ BinaryBasicBlock &BB) {
+
+ auto First = BB.getFirstNonPseudo();
+ if (First == BB.end())
+ return;
+ // If the first instruction has unknown RAState, we should copy the first
+ // known RAState.
+ std::optional<bool> RAState = BC.MIB->getRAState(*First);
+ if (!RAState.has_value()) {
+ std::optional<bool> FirstRAState = getFirstKnownRAState(BC, BB);
+ if (!FirstRAState.has_value())
+ // We fill unknown BBs later.
+ return;
+
+ BC.MIB->setRAState(*First, *FirstRAState);
+ }
+
+ // At this point we know the RAState of the first instruction,
+ // so we can propagate the RAStates to all subsequent unknown instructions.
+ MCInst Prev = *First;
+ for (auto It = First + 1; It != BB.end(); ++It) {
+ MCInst &Inst = *It;
+ if (BC.MIB->isCFI(Inst))
+ continue;
+
+ // No need to check for nullopt: we only entered this loop after the first
+ // instruction had its RAState set, and RAState is always set for the
+ // previous instruction in the previous iteration of the loop.
+ std::optional<bool> PrevRAState = BC.MIB->getRAState(Prev);
+
+ std::optional<bool> RAState = BC.MIB->getRAState(Inst);
+ if (!RAState.has_value()) {
+ if (BC.MIB->isPSignOnLR(Prev))
+ PrevRAState = true;
+ else if (BC.MIB->isPAuthOnLR(Prev))
+ PrevRAState = false;
+ BC.MIB->setRAState(Inst, *PrevRAState);
+ }
+ Prev = Inst;
+ }
+}
+
+void PointerAuthCFIFixup::markUnknownBlock(BinaryContext &BC,
+ BinaryBasicBlock &BB, bool State) {
+ // If we call this when an Instruction has either kRASigned or kRAUnsigned
+ // annotation, setRASigned or setRAUnsigned would fail.
+ assert(isUnknownBlock(BC, BB) &&
+ "markUnknownBlock should only be called on unknown blocks");
+ for (MCInst &Inst : BB) {
+ if (BC.MIB->isCFI(Inst))
+ continue;
+ BC.MIB->setRAState(Inst, State);
+ }
+}
+
+void PointerAuthCFIFixup::fillUnknownStubs(BinaryFunction &BF) {
+ BinaryContext &BC = BF.getBinaryContext();
+ bool FirstIter = true;
+ MCInst PrevInst;
+ for (FunctionFragment &FF : BF.getLayout().fragments()) {
+ for (BinaryBasicBlock *BB : FF) {
+ if (FirstIter) {
+ FirstIter = false;
+ if (isUnknownBlock(BC, *BB))
+ // If the first BasicBlock is unknown, the function's entry RAState
+ // should be used.
+ markUnknownBlock(BC, *BB, BF.getInitialRAState());
+ } else if (isUnknownBlock(BC, *BB)) {
+ // As explained in issue #160989, the unwind info is incorrect for
+ // stubs. Indicating the correct RAState without the rest of the unwind
+ // info being correct is not useful. Instead, we copy the RAState from
+ // the previous instruction.
+ std::optional<bool> PrevRAState = BC.MIB->getRAState(PrevInst);
+ if (!PrevRAState.has_value()) {
+ // No non-cfi instruction encountered in the function yet.
+ // This means the RAState is the same as at the function entry.
+ markUnknownBlock(BC, *BB, BF.getInitialRAState());
+ continue;
+ }
+
+ if (BC.MIB->isPSignOnLR(PrevInst))
+ PrevRAState = true;
+ else if (BC.MIB->isPAuthOnLR(PrevInst))
+ PrevRAState = false;
+ markUnknownBlock(BC, *BB, *PrevRAState);
+ }
+ // This function iterates on BasicBlocks, so the PrevInst has to be
+ // updated to the last instruction of the current BasicBlock. If the
+ // BasicBlock is empty, or only has PseudoInstructions, PrevInst will not
+ // be updated.
+ auto Last = BB->getLastNonPseudo();
+ if (Last != BB->rend())
+ PrevInst = *Last;
+ }
+ }
+}
+
+Error PointerAuthCFIFixup::runOnFunctions(BinaryContext &BC) {
+ std::atomic<uint64_t> FunctionsModified{0};
+ ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
+ FunctionsModified++;
+ runOnFunction(BF);
+ };
+
+ ParallelUtilities::PredicateTy SkipPredicate = [&](const BinaryFunction &BF) {
+ // We can skip functions which did not include negate-ra-state CFIs. This
+ // includes code using pac-ret hardening as well, if the binary is
+ // compiled with `-fno-exceptions -fno-unwind-tables
+ // -fno-asynchronous-unwind-tables`
+ return !BF.containedNegateRAState() || BF.isIgnored();
+ };
+
+ ParallelUtilities::runOnEachFunction(
+ BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun,
+ SkipPredicate, "PointerAuthCFIFixup");
+
+ BC.outs() << "BOLT-INFO: rewritten pac-ret DWARF info in "
+ << FunctionsModified << " out of " << BC.getBinaryFunctions().size()
+ << " functions "
+ << format("(%.2lf%%).\n", (100.0 * FunctionsModified) /
+ BC.getBinaryFunctions().size());
+ if (PassFailed)
+ return createFatalBOLTError("");
+ return Error::success();
+}
+
+} // end namespace bolt
+} // end namespace llvm
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 8554683..6b96901 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -159,8 +159,6 @@ std::vector<SectionNameAndRange> getTextSections(const BinaryContext *BC) {
}
}
-constexpr uint64_t DataAggregator::KernelBaseAddr;
-
DataAggregator::~DataAggregator() { deleteTempFiles(); }
namespace {
@@ -2399,10 +2397,7 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
PseudoProbeDecoder->getAddress2ProbesMap();
BinaryFunction::FragmentsSetTy Fragments(BF->Fragments);
Fragments.insert(BF);
- DenseMap<
- uint32_t,
- std::vector<std::reference_wrapper<const MCDecodedPseudoProbe>>>
- BlockProbes;
+ DenseMap<uint32_t, YAMLProfileWriter::BlockProbeCtx> BlockCtx;
for (const BinaryFunction *F : Fragments) {
const uint64_t FuncAddr = F->getAddress();
for (const MCDecodedPseudoProbe &Probe :
@@ -2410,15 +2405,14 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
const uint32_t OutputAddress = Probe.getAddress();
const uint32_t InputOffset = BAT->translate(
FuncAddr, OutputAddress - FuncAddr, /*IsBranchSrc=*/true);
- const unsigned BlockIndex = getBlock(InputOffset).second;
- BlockProbes[BlockIndex].emplace_back(Probe);
+ const auto &[BlockOffset, BlockIndex] = getBlock(InputOffset);
+ BlockCtx[BlockIndex].addBlockProbe(InlineTreeNodeId, Probe,
+ InputOffset - BlockOffset);
}
}
- for (auto &[Block, Probes] : BlockProbes) {
- YamlBF.Blocks[Block].PseudoProbes =
- YAMLProfileWriter::writeBlockProbes(Probes, InlineTreeNodeId);
- }
+ for (auto &[Block, Ctx] : BlockCtx)
+ Ctx.finalize(YamlBF.Blocks[Block]);
}
// Skip printing if there's no profile data
llvm::erase_if(
diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 1a61949..5fb65153 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -348,26 +348,10 @@ private:
return It->second;
};
- auto matchPseudoProbeInfo = [&](const yaml::bolt::PseudoProbeInfo
- &ProfileProbe,
- uint32_t NodeId) {
- for (uint64_t Index = 0; Index < 64; ++Index)
- if (ProfileProbe.BlockMask & 1ull << Index)
- ++FlowBlockMatchCount[matchProfileProbeToBlock(NodeId, Index + 1)];
- for (const auto &ProfileProbes :
- {ProfileProbe.BlockProbes, ProfileProbe.IndCallProbes,
- ProfileProbe.CallProbes})
- for (uint64_t ProfileProbe : ProfileProbes)
- ++FlowBlockMatchCount[matchProfileProbeToBlock(NodeId, ProfileProbe)];
- };
-
- for (const yaml::bolt::PseudoProbeInfo &ProfileProbe : BlockPseudoProbes) {
- if (!ProfileProbe.InlineTreeNodes.empty())
- for (uint32_t ProfileInlineTreeNode : ProfileProbe.InlineTreeNodes)
- matchPseudoProbeInfo(ProfileProbe, ProfileInlineTreeNode);
- else
- matchPseudoProbeInfo(ProfileProbe, ProfileProbe.InlineTreeIndex);
- }
+ for (const yaml::bolt::PseudoProbeInfo &ProfileProbe : BlockPseudoProbes)
+ for (uint32_t Node : ProfileProbe.InlineTreeNodes)
+ for (uint64_t Probe : ProfileProbe.BlockProbes)
+ ++FlowBlockMatchCount[matchProfileProbeToBlock(Node, Probe)];
uint32_t BestMatchCount = 0;
uint32_t TotalMatchCount = 0;
const FlowBlock *BestMatchBlock = nullptr;
diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp
index 5c631f9..cd4e77b 100644
--- a/bolt/lib/Profile/YAMLProfileWriter.cpp
+++ b/bolt/lib/Profile/YAMLProfileWriter.cpp
@@ -129,50 +129,62 @@ YAMLProfileWriter::convertPseudoProbeDesc(const MCPseudoProbeDecoder &Decoder) {
return {Desc, InlineTree};
}
-std::vector<yaml::bolt::PseudoProbeInfo>
-YAMLProfileWriter::convertNodeProbes(NodeIdToProbes &NodeProbes) {
- struct BlockProbeInfoHasher {
- size_t operator()(const yaml::bolt::PseudoProbeInfo &BPI) const {
- return llvm::hash_combine(llvm::hash_combine_range(BPI.BlockProbes),
- llvm::hash_combine_range(BPI.CallProbes),
- llvm::hash_combine_range(BPI.IndCallProbes));
+void YAMLProfileWriter::BlockProbeCtx::addBlockProbe(
+ const InlineTreeMapTy &Map, const MCDecodedPseudoProbe &Probe,
+ uint32_t ProbeOffset) {
+ auto It = Map.find(Probe.getInlineTreeNode());
+ if (It == Map.end())
+ return;
+ auto NodeId = It->second;
+ uint32_t Index = Probe.getIndex();
+ if (Probe.isCall())
+ CallProbes[ProbeOffset] =
+ Call{Index, NodeId, Probe.isIndirectCall(), false};
+ else
+ NodeToProbes[NodeId].emplace_back(Index);
+}
+
+void YAMLProfileWriter::BlockProbeCtx::finalize(
+ yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
+ // Hash block probes by vector
+ struct ProbeHasher {
+ size_t operator()(const ArrayRef<uint64_t> Probes) const {
+ return llvm::hash_combine_range(Probes);
}
};
- // Check identical BlockProbeInfo structs and merge them
- std::unordered_map<yaml::bolt::PseudoProbeInfo, std::vector<uint32_t>,
- BlockProbeInfoHasher>
- BPIToNodes;
- for (auto &[NodeId, Probes] : NodeProbes) {
- yaml::bolt::PseudoProbeInfo BPI;
- BPI.BlockProbes = std::vector(Probes[0].begin(), Probes[0].end());
- BPI.IndCallProbes = std::vector(Probes[1].begin(), Probes[1].end());
- BPI.CallProbes = std::vector(Probes[2].begin(), Probes[2].end());
- BPIToNodes[BPI].push_back(NodeId);
+ // Check identical block probes and merge them
+ std::unordered_map<std::vector<uint64_t>, std::vector<uint32_t>, ProbeHasher>
+ ProbesToNodes;
+ for (auto &[NodeId, Probes] : NodeToProbes) {
+ llvm::sort(Probes);
+ ProbesToNodes[Probes].emplace_back(NodeId);
}
-
- auto handleMask = [](const auto &Ids, auto &Vec, auto &Mask) {
- for (auto Id : Ids)
- if (Id > 64)
- Vec.emplace_back(Id);
- else
- Mask |= 1ull << (Id - 1);
- };
-
- // Add to YAML with merged nodes/block mask optimizations
- std::vector<yaml::bolt::PseudoProbeInfo> YamlProbes;
- YamlProbes.reserve(BPIToNodes.size());
- for (const auto &[BPI, Nodes] : BPIToNodes) {
- auto &YamlBPI = YamlProbes.emplace_back(yaml::bolt::PseudoProbeInfo());
- YamlBPI.CallProbes = BPI.CallProbes;
- YamlBPI.IndCallProbes = BPI.IndCallProbes;
- if (Nodes.size() == 1)
- YamlBPI.InlineTreeIndex = Nodes.front();
- else
- YamlBPI.InlineTreeNodes = Nodes;
- handleMask(BPI.BlockProbes, YamlBPI.BlockProbes, YamlBPI.BlockMask);
+ for (auto &[Probes, Nodes] : ProbesToNodes) {
+ llvm::sort(Nodes);
+ YamlBB.PseudoProbes.emplace_back(
+ yaml::bolt::PseudoProbeInfo{Probes, Nodes});
+ }
+ for (yaml::bolt::CallSiteInfo &CSI : YamlBB.CallSites) {
+ auto It = CallProbes.find(CSI.Offset);
+ if (It == CallProbes.end())
+ continue;
+ Call &Probe = It->second;
+ CSI.Probe = Probe.Id;
+ CSI.InlineTreeNode = Probe.Node;
+ CSI.Indirect = Probe.Indirect;
+ Probe.Used = true;
+ }
+ for (const auto &[Offset, Probe] : CallProbes) {
+ if (Probe.Used)
+ continue;
+ yaml::bolt::CallSiteInfo CSI;
+ CSI.Offset = Offset;
+ CSI.Probe = Probe.Id;
+ CSI.InlineTreeNode = Probe.Node;
+ CSI.Indirect = Probe.Indirect;
+ YamlBB.CallSites.emplace_back(CSI);
}
- return YamlProbes;
}
std::tuple<std::vector<yaml::bolt::InlineTreeNode>,
@@ -343,12 +355,13 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
const AddressProbesMap &ProbeMap =
PseudoProbeDecoder->getAddress2ProbesMap();
const uint64_t FuncAddr = BF.getAddress();
- const std::pair<uint64_t, uint64_t> &BlockRange =
- BB->getInputAddressRange();
- const std::pair<uint64_t, uint64_t> BlockAddrRange = {
- FuncAddr + BlockRange.first, FuncAddr + BlockRange.second};
- auto Probes = ProbeMap.find(BlockAddrRange.first, BlockAddrRange.second);
- YamlBB.PseudoProbes = writeBlockProbes(Probes, InlineTreeNodeId);
+ auto [Start, End] = BB->getInputAddressRange();
+ Start += FuncAddr;
+ End += FuncAddr;
+ BlockProbeCtx Ctx;
+ for (const MCDecodedPseudoProbe &Probe : ProbeMap.find(Start, End))
+ Ctx.addBlockProbe(InlineTreeNodeId, Probe, Probe.getAddress() - Start);
+ Ctx.finalize(YamlBB);
}
YamlBF.Blocks.emplace_back(YamlBB);
diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp
index 1a0f6d7..85f23dc 100644
--- a/bolt/lib/Rewrite/BinaryPassManager.cpp
+++ b/bolt/lib/Rewrite/BinaryPassManager.cpp
@@ -19,15 +19,15 @@
#include "bolt/Passes/IdenticalCodeFolding.h"
#include "bolt/Passes/IndirectCallPromotion.h"
#include "bolt/Passes/Inliner.h"
-#include "bolt/Passes/InsertNegateRAStatePass.h"
#include "bolt/Passes/Instrumentation.h"
#include "bolt/Passes/JTFootprintReduction.h"
#include "bolt/Passes/LongJmp.h"
#include "bolt/Passes/LoopInversionPass.h"
#include "bolt/Passes/MCF.h"
-#include "bolt/Passes/MarkRAStates.h"
#include "bolt/Passes/PLTCall.h"
#include "bolt/Passes/PatchEntries.h"
+#include "bolt/Passes/PointerAuthCFIAnalyzer.h"
+#include "bolt/Passes/PointerAuthCFIFixup.h"
#include "bolt/Passes/ProfileQualityStats.h"
#include "bolt/Passes/RegReAssign.h"
#include "bolt/Passes/ReorderData.h"
@@ -134,6 +134,15 @@ static cl::opt<bool> PrintAArch64Relaxation(
cl::desc("print functions after ADR/LDR Relaxation pass"), cl::Hidden,
cl::cat(BoltOptCategory));
+cl::opt<bool> PrintPAuthCFIAnalyzer(
+ "print-pointer-auth-cfi-analyzer",
+ cl::desc("print functions after PointerAuthCFIAnalyzer pass"), cl::Hidden,
+ cl::cat(BoltOptCategory));
+static cl::opt<bool> PrintPAuthCFIFixup(
+ "print-pointer-auth-cfi-fixup",
+ cl::desc("print functions after PointerAuthCFIFixup pass"), cl::Hidden,
+ cl::cat(BoltOptCategory));
+
static cl::opt<bool>
PrintLongJmp("print-longjmp",
cl::desc("print functions after longjmp pass"), cl::Hidden,
@@ -362,7 +371,8 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
BinaryFunctionPassManager Manager(BC);
if (BC.isAArch64())
- Manager.registerPass(std::make_unique<MarkRAStates>());
+ Manager.registerPass(
+ std::make_unique<PointerAuthCFIAnalyzer>(PrintPAuthCFIAnalyzer));
Manager.registerPass(
std::make_unique<EstimateEdgeCounts>(PrintEstimateEdgeCounts));
@@ -524,7 +534,8 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
// relocations out of range and crash during linking.
Manager.registerPass(std::make_unique<LongJmpPass>(PrintLongJmp));
- Manager.registerPass(std::make_unique<InsertNegateRAState>());
+ Manager.registerPass(
+ std::make_unique<PointerAuthCFIFixup>(PrintPAuthCFIFixup));
}
// This pass should always run last.*
diff --git a/bolt/lib/Rewrite/CMakeLists.txt b/bolt/lib/Rewrite/CMakeLists.txt
index 5b15edc..bc1b2ed 100644
--- a/bolt/lib/Rewrite/CMakeLists.txt
+++ b/bolt/lib/Rewrite/CMakeLists.txt
@@ -24,6 +24,7 @@ add_llvm_library(LLVMBOLTRewrite
BuildIDRewriter.cpp
PseudoProbeRewriter.cpp
RewriteInstance.cpp
+ RSeqRewriter.cpp
SDTRewriter.cpp
GNUPropertyRewriter.cpp
diff --git a/bolt/lib/Rewrite/DWARFRewriter.cpp b/bolt/lib/Rewrite/DWARFRewriter.cpp
index 5e3fa93..816acb2 100644
--- a/bolt/lib/Rewrite/DWARFRewriter.cpp
+++ b/bolt/lib/Rewrite/DWARFRewriter.cpp
@@ -1723,7 +1723,76 @@ StringRef getSectionName(const SectionRef &Section) {
return Name;
}
-// Extracts an appropriate slice if input is DWP.
+/// Extracts the slice of the .debug_str.dwo section for a given CU from a DWP
+/// file, based on the .debug_str_offsets.dwo section. This helps address DWO
+/// bloat that may occur after updates.
+///
+/// A slice of .debug_str.dwo may be composed of several non-contiguous
+/// fragments. These non-contiguous string views will be written out
+/// sequentially, avoiding the copying overhead caused by assembling them.
+///
+/// The .debug_str_offsets for the first CU often does not need to be updated,
+/// so copying is only performed when .debug_str_offsets requires updating.
+static void UpdateStrAndStrOffsets(StringRef StrDWOContent,
+ StringRef StrOffsetsContent,
+ SmallVectorImpl<StringRef> &StrDWOOutData,
+ std::string &StrOffsetsOutData,
+ unsigned DwarfVersion, bool IsLittleEndian) {
+ const llvm::endianness Endian =
+ IsLittleEndian ? llvm::endianness::little : llvm::endianness::big;
+ const uint64_t HeaderOffset = (DwarfVersion >= 5) ? 8 : 0;
+ constexpr size_t SizeOfOffset = sizeof(int32_t);
+ const uint64_t NumOffsets =
+ (StrOffsetsContent.size() - HeaderOffset) / SizeOfOffset;
+
+ DataExtractor Extractor(StrOffsetsContent, IsLittleEndian, 0);
+ uint64_t ExtractionOffset = HeaderOffset;
+
+ using StringFragment = DWARFUnitIndex::Entry::SectionContribution;
+ const auto getStringLength = [](StringRef Content,
+ uint64_t Offset) -> uint64_t {
+ size_t NullPos = Content.find('\0', Offset);
+ return (NullPos != StringRef::npos) ? (NullPos - Offset + 1) : 0;
+ };
+ const auto isContiguous = [](const StringFragment &Fragment,
+ uint64_t NextOffset) -> bool {
+ return NextOffset == Fragment.getOffset() + Fragment.getLength();
+ };
+ std::optional<StringFragment> CurrentFragment;
+ uint64_t AccumulatedStrLen = 0;
+ for (uint64_t I = 0; I < NumOffsets; ++I) {
+ const uint64_t StrOffset = Extractor.getU32(&ExtractionOffset);
+ const uint64_t StringLength = getStringLength(StrDWOContent, StrOffset);
+ if (!CurrentFragment) {
+ // First init.
+ CurrentFragment = StringFragment(StrOffset, StringLength);
+ } else {
+ if (isContiguous(*CurrentFragment, StrOffset)) {
+ // Expanding the current fragment.
+ CurrentFragment->setLength(CurrentFragment->getLength() + StringLength);
+ } else {
+ // Saving the current fragment and start a new one.
+ StrDWOOutData.push_back(StrDWOContent.substr(
+ CurrentFragment->getOffset(), CurrentFragment->getLength()));
+ CurrentFragment = StringFragment(StrOffset, StringLength);
+ }
+ }
+ if (AccumulatedStrLen != StrOffset) {
+ // Updating str offsets.
+ if (StrOffsetsOutData.empty())
+ StrOffsetsOutData = StrOffsetsContent.str();
+ llvm::support::endian::write32(
+ &StrOffsetsOutData[HeaderOffset + I * SizeOfOffset],
+ static_cast<uint32_t>(AccumulatedStrLen), Endian);
+ }
+ AccumulatedStrLen += StringLength;
+ }
+ if (CurrentFragment)
+ StrDWOOutData.push_back(StrDWOContent.substr(CurrentFragment->getOffset(),
+ CurrentFragment->getLength()));
+}
+
+// Exctracts an appropriate slice if input is DWP.
// Applies patches or overwrites the section.
std::optional<StringRef> updateDebugData(
DWARFContext &DWCtx, StringRef SectionName, StringRef SectionContents,
@@ -1772,6 +1841,8 @@ std::optional<StringRef> updateDebugData(
errs() << "BOLT-WARNING: unsupported debug section: " << SectionName
<< "\n";
if (StrWriter.isInitialized()) {
+ if (CUDWOEntry)
+ return StrWriter.getBufferStr();
OutputBuffer = StrWriter.releaseBuffer();
return StringRef(reinterpret_cast<const char *>(OutputBuffer->data()),
OutputBuffer->size());
@@ -1786,6 +1857,8 @@ std::optional<StringRef> updateDebugData(
}
case DWARFSectionKind::DW_SECT_STR_OFFSETS: {
if (StrOffstsWriter.isFinalized()) {
+ if (CUDWOEntry)
+ return StrOffstsWriter.getBufferStr();
OutputBuffer = StrOffstsWriter.releaseBuffer();
return StringRef(reinterpret_cast<const char *>(OutputBuffer->data()),
OutputBuffer->size());
@@ -1888,6 +1961,10 @@ void DWARFRewriter::writeDWOFiles(
}
}
+ StringRef StrDWOContent;
+ StringRef StrOffsetsContent;
+ llvm::SmallVector<StringRef, 3> StrDWOOutData;
+ std::string StrOffsetsOutData;
for (const SectionRef &Section : File->sections()) {
std::unique_ptr<DebugBufferVector> OutputData;
StringRef SectionName = getSectionName(Section);
@@ -1895,11 +1972,50 @@ void DWARFRewriter::writeDWOFiles(
continue;
Expected<StringRef> ContentsExp = Section.getContents();
assert(ContentsExp && "Invalid contents.");
+ if (IsDWP && SectionName == "debug_str.dwo") {
+ if (StrWriter.isInitialized())
+ StrDWOContent = StrWriter.getBufferStr();
+ else
+ StrDWOContent = *ContentsExp;
+ continue;
+ }
if (std::optional<StringRef> OutData = updateDebugData(
(*DWOCU)->getContext(), SectionName, *ContentsExp, KnownSections,
*Streamer, *this, CUDWOEntry, DWOId, OutputData, RangeListssWriter,
- LocWriter, StrOffstsWriter, StrWriter, OverridenSections))
+ LocWriter, StrOffstsWriter, StrWriter, OverridenSections)) {
+ if (IsDWP && SectionName == "debug_str_offsets.dwo") {
+ StrOffsetsContent = *OutData;
+ continue;
+ }
Streamer->emitBytes(*OutData);
+ }
+ }
+
+ if (IsDWP) {
+ // Handling both .debug_str.dwo and .debug_str_offsets.dwo concurrently. In
+ // the original DWP, .debug_str is a deduplicated global table, and the
+ // .debug_str.dwo slice for a single CU needs to be extracted according to
+ // .debug_str_offsets.dwo.
+ UpdateStrAndStrOffsets(StrDWOContent, StrOffsetsContent, StrDWOOutData,
+ StrOffsetsOutData, CU.getVersion(),
+ (*DWOCU)->getContext().isLittleEndian());
+ auto SectionIter = KnownSections.find("debug_str.dwo");
+ if (SectionIter != KnownSections.end()) {
+ Streamer->switchSection(SectionIter->second.first);
+ for (size_t i = 0; i < StrDWOOutData.size(); ++i) {
+ StringRef OutData = StrDWOOutData[i];
+ if (!OutData.empty())
+ Streamer->emitBytes(OutData);
+ }
+ }
+ SectionIter = KnownSections.find("debug_str_offsets.dwo");
+ if (SectionIter != KnownSections.end()) {
+ Streamer->switchSection(SectionIter->second.first);
+ if (!StrOffsetsOutData.empty())
+ Streamer->emitBytes(StrOffsetsOutData);
+ else
+ Streamer->emitBytes(StrOffsetsContent);
+ }
}
Streamer->finish();
TempOut->keep();
diff --git a/bolt/lib/Rewrite/RSeqRewriter.cpp b/bolt/lib/Rewrite/RSeqRewriter.cpp
new file mode 100644
index 0000000..46bce66
--- /dev/null
+++ b/bolt/lib/Rewrite/RSeqRewriter.cpp
@@ -0,0 +1,72 @@
+//===- bolt/Rewrite/RSeqRewriter.cpp --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Basic support for restartable sequences used by tcmalloc. Prevent critical
+// section overrides by ignoring optimizations in containing functions.
+//
+// References:
+// * https://google.github.io/tcmalloc/rseq.html
+// * tcmalloc/internal/percpu_rseq_x86_64.S
+//
+//===----------------------------------------------------------------------===//
+
+#include "bolt/Core/BinaryFunction.h"
+#include "bolt/Rewrite/MetadataRewriter.h"
+#include "bolt/Rewrite/MetadataRewriters.h"
+#include "llvm/Support/Errc.h"
+
+using namespace llvm;
+using namespace bolt;
+
+namespace {
+
+class RSeqRewriter final : public MetadataRewriter {
+public:
+ RSeqRewriter(StringRef Name, BinaryContext &BC)
+ : MetadataRewriter(Name, BC) {}
+
+ Error preCFGInitializer() override {
+ for (const BinarySection &Section : BC.allocatableSections()) {
+ if (Section.getName() != "__rseq_cs")
+ continue;
+
+ auto handleRelocation = [&](const Relocation &Rel, bool IsDynamic) {
+ BinaryFunction *BF = nullptr;
+ if (Rel.Symbol)
+ BF = BC.getFunctionForSymbol(Rel.Symbol);
+ else if (Relocation::isRelative(Rel.Type))
+ BF = BC.getBinaryFunctionContainingAddress(Rel.Addend);
+
+ if (!BF) {
+ BC.errs() << "BOLT-WARNING: no function found matching "
+ << (IsDynamic ? "dynamic " : "")
+ << "relocation in __rseq_cs\n";
+ } else if (!BF->isIgnored()) {
+ BC.outs() << "BOLT-INFO: restartable sequence reference detected in "
+ << *BF << ". Function will not be optimized\n";
+ BF->setIgnored();
+ }
+ };
+
+ for (const Relocation &Rel : Section.dynamicRelocations())
+ handleRelocation(Rel, /*IsDynamic*/ true);
+
+ for (const Relocation &Rel : Section.relocations())
+ handleRelocation(Rel, /*IsDynamic*/ false);
+ }
+
+ return Error::success();
+ }
+};
+
+} // namespace
+
+std::unique_ptr<MetadataRewriter>
+llvm::bolt::createRSeqRewriter(BinaryContext &BC) {
+ return std::make_unique<RSeqRewriter>("rseq-cs-rewriter", BC);
+}
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 77e5688..0e14500 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -80,8 +80,11 @@ namespace opts {
extern cl::list<std::string> HotTextMoveSections;
extern cl::opt<bool> Hugify;
extern cl::opt<bool> Instrument;
+extern cl::opt<uint32_t> InstrumentationSleepTime;
extern cl::opt<bool> KeepNops;
extern cl::opt<bool> Lite;
+extern cl::list<std::string> PrintOnly;
+extern cl::opt<std::string> PrintOnlyFile;
extern cl::list<std::string> ReorderData;
extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions;
extern cl::opt<bool> TerminalHLT;
@@ -292,10 +295,31 @@ cl::bits<GadgetScannerKind> GadgetScannersToRun(
clEnumValN(GS_ALL, "all", "All implemented scanners")),
cl::ZeroOrMore, cl::CommaSeparated, cl::cat(BinaryAnalysisCategory));
+// Primary targets for hooking runtime library initialization hooking
+// with fallback to next item in case if current item is not available
+// in the input binary.
+enum RuntimeLibInitHookTarget : char {
+ RLIH_ENTRY_POINT = 0, /// Use ELF Header Entry Point
+ RLIH_INIT = 1, /// Use ELF DT_INIT entry
+ RLIH_INIT_ARRAY = 2, /// Use ELF .init_array entry
+};
+
+cl::opt<RuntimeLibInitHookTarget> RuntimeLibInitHook(
+ "runtime-lib-init-hook",
+ cl::desc("Primary target for hooking runtime library initialization, used "
+ "in fallback order of availabiliy in input binary (entry_point -> "
+ "init -> init_array) (default: entry_point)"),
+ cl::Hidden, cl::init(RLIH_ENTRY_POINT),
+ cl::values(clEnumValN(RLIH_ENTRY_POINT, "entry_point",
+ "use ELF Header Entry Point"),
+ clEnumValN(RLIH_INIT, "init", "use ELF DT_INIT entry"),
+ clEnumValN(RLIH_INIT_ARRAY, "init_array",
+ "use ELF .init_array entry")),
+ cl::ZeroOrMore, cl::cat(BoltOptCategory));
+
} // namespace opts
// FIXME: implement a better way to mark sections for replacement.
-constexpr const char *RewriteInstance::SectionsToOverwrite[];
std::vector<std::string> RewriteInstance::DebugSectionsToOverwrite = {
".debug_abbrev", ".debug_aranges", ".debug_line", ".debug_line_str",
".debug_loc", ".debug_loclists", ".debug_ranges", ".debug_rnglists",
@@ -731,6 +755,8 @@ Error RewriteInstance::run() {
<< "\n";
BC->outs() << "BOLT-INFO: BOLT version: " << BoltRevision << "\n";
+ selectFunctionsToPrint();
+
if (Error E = discoverStorage())
return E;
if (Error E = readSpecialSections())
@@ -738,9 +764,12 @@ Error RewriteInstance::run() {
adjustCommandLineOptions();
discoverFileObjects();
- if (opts::Instrument && !BC->IsStaticExecutable)
+ if (opts::Instrument && !BC->IsStaticExecutable) {
+ if (Error E = discoverRtInitAddress())
+ return E;
if (Error E = discoverRtFiniAddress())
return E;
+ }
preprocessProfileData();
@@ -782,8 +811,12 @@ Error RewriteInstance::run() {
updateMetadata();
- if (opts::Instrument && !BC->IsStaticExecutable)
- updateRtFiniReloc();
+ if (opts::Instrument && !BC->IsStaticExecutable) {
+ if (Error E = updateRtInitReloc())
+ return E;
+ if (Error E = updateRtFiniReloc())
+ return E;
+ }
if (opts::OutputFilename == "/dev/null") {
BC->outs() << "BOLT-INFO: skipping writing final binary to disk\n";
@@ -1408,6 +1441,65 @@ void RewriteInstance::discoverBOLTReserved() {
NextAvailableAddress = BC->BOLTReserved.start();
}
+Error RewriteInstance::discoverRtInitAddress() {
+ if (BC->HasInterpHeader && opts::RuntimeLibInitHook == opts::RLIH_ENTRY_POINT)
+ return Error::success();
+
+ // Use DT_INIT if it's available.
+ if (BC->InitAddress && opts::RuntimeLibInitHook <= opts::RLIH_INIT) {
+ BC->StartFunctionAddress = BC->InitAddress;
+ return Error::success();
+ }
+
+ if (!BC->InitArrayAddress || !BC->InitArraySize) {
+ return createStringError(std::errc::not_supported,
+ "Instrumentation of shared library needs either "
+ "DT_INIT or DT_INIT_ARRAY");
+ }
+
+ if (*BC->InitArraySize < BC->AsmInfo->getCodePointerSize()) {
+ return createStringError(std::errc::not_supported,
+ "Need at least 1 DT_INIT_ARRAY slot");
+ }
+
+ ErrorOr<BinarySection &> InitArraySection =
+ BC->getSectionForAddress(*BC->InitArrayAddress);
+ if (auto EC = InitArraySection.getError())
+ return errorCodeToError(EC);
+
+ if (InitArraySection->getAddress() != *BC->InitArrayAddress) {
+ return createStringError(std::errc::not_supported,
+ "Inconsistent address of .init_array section");
+ }
+
+ if (const Relocation *Reloc = InitArraySection->getDynamicRelocationAt(0)) {
+ if (Reloc->isRelative()) {
+ BC->StartFunctionAddress = Reloc->Addend;
+ } else {
+ MCSymbol *Sym = Reloc->Symbol;
+ if (!Sym)
+ return createStringError(
+ std::errc::not_supported,
+ "Failed to locate symbol for 0 entry of .init_array");
+ const BinaryFunction *BF = BC->getFunctionForSymbol(Sym);
+ if (!BF)
+ return createStringError(
+ std::errc::not_supported,
+ "Failed to locate binary function for 0 entry of .init_array");
+ BC->StartFunctionAddress = BF->getAddress() + Reloc->Addend;
+ }
+ return Error::success();
+ }
+
+ if (const Relocation *Reloc = InitArraySection->getRelocationAt(0)) {
+ BC->StartFunctionAddress = Reloc->Value;
+ return Error::success();
+ }
+
+ return createStringError(std::errc::not_supported,
+ "No relocation for first DT_INIT_ARRAY slot");
+}
+
Error RewriteInstance::discoverRtFiniAddress() {
// Use DT_FINI if it's available.
if (BC->FiniAddress) {
@@ -1416,6 +1508,9 @@ Error RewriteInstance::discoverRtFiniAddress() {
}
if (!BC->FiniArrayAddress || !BC->FiniArraySize) {
+ // Missing fini hooks are allowed when instrumentation-sleep-time is in use.
+ if (opts::InstrumentationSleepTime > 0)
+ return Error::success();
return createStringError(
std::errc::not_supported,
"Instrumentation needs either DT_FINI or DT_FINI_ARRAY");
@@ -1431,6 +1526,11 @@ Error RewriteInstance::discoverRtFiniAddress() {
if (auto EC = FiniArraySection.getError())
return errorCodeToError(EC);
+ if (FiniArraySection->getAddress() != *BC->FiniArrayAddress) {
+ return createStringError(std::errc::not_supported,
+ "Inconsistent address of .fini_array section");
+ }
+
if (const Relocation *Reloc = FiniArraySection->getDynamicRelocationAt(0)) {
BC->FiniFunctionAddress = Reloc->Addend;
return Error::success();
@@ -1445,26 +1545,99 @@ Error RewriteInstance::discoverRtFiniAddress() {
"No relocation for first DT_FINI_ARRAY slot");
}
-void RewriteInstance::updateRtFiniReloc() {
+Error RewriteInstance::updateRtInitReloc() {
+ if (BC->HasInterpHeader && opts::RuntimeLibInitHook == opts::RLIH_ENTRY_POINT)
+ return Error::success();
+
+ // Updating DT_INIT is handled by patchELFDynamic.
+ if (BC->InitAddress && opts::RuntimeLibInitHook <= opts::RLIH_INIT)
+ return Error::success();
+
+ const RuntimeLibrary *RT = BC->getRuntimeLibrary();
+ if (!RT || !RT->getRuntimeStartAddress())
+ return Error::success();
+
+ if (!BC->InitArrayAddress)
+ return Error::success();
+
+ if (!BC->InitArrayAddress || !BC->InitArraySize)
+ return createStringError(std::errc::not_supported,
+ "inconsistent .init_array state");
+
+ ErrorOr<BinarySection &> InitArraySection =
+ BC->getSectionForAddress(*BC->InitArrayAddress);
+ if (!InitArraySection)
+ return createStringError(std::errc::not_supported, ".init_array removed");
+
+ if (std::optional<Relocation> Reloc =
+ InitArraySection->takeDynamicRelocationAt(0)) {
+ if (Reloc->isRelative()) {
+ if (Reloc->Addend != BC->StartFunctionAddress)
+ return createStringError(std::errc::not_supported,
+ "inconsistent .init_array dynamic relocation");
+ Reloc->Addend = RT->getRuntimeStartAddress();
+ InitArraySection->addDynamicRelocation(*Reloc);
+ } else {
+ MCSymbol *Sym = Reloc->Symbol;
+ if (!Sym)
+ return createStringError(
+ std::errc::not_supported,
+ "Failed to locate symbol for 0 entry of .init_array");
+ const BinaryFunction *BF = BC->getFunctionForSymbol(Sym);
+ if (!BF)
+ return createStringError(
+ std::errc::not_supported,
+ "Failed to locate binary function for 0 entry of .init_array");
+ if (BF->getAddress() + Reloc->Addend != BC->StartFunctionAddress)
+ return createStringError(std::errc::not_supported,
+ "inconsistent .init_array dynamic relocation");
+ InitArraySection->addDynamicRelocation(Relocation{
+ /*Offset*/ 0, /*Symbol*/ nullptr, /*Type*/ Relocation::getAbs64(),
+ /*Addend*/ RT->getRuntimeStartAddress(), /*Value*/ 0});
+ }
+ }
+ // Update the static relocation by adding a pending relocation which will get
+ // patched when flushPendingRelocations is called in rewriteFile. Note that
+ // flushPendingRelocations will calculate the value to patch as
+ // "Symbol + Addend". Since we don't have a symbol, just set the addend to the
+ // desired value.
+ InitArraySection->addPendingRelocation(Relocation{
+ /*Offset*/ 0, /*Symbol*/ nullptr, /*Type*/ Relocation::getAbs64(),
+ /*Addend*/ RT->getRuntimeStartAddress(), /*Value*/ 0});
+ BC->outs()
+ << "BOLT-INFO: runtime library initialization was hooked via .init_array "
+ "entry, set to 0x"
+ << Twine::utohexstr(RT->getRuntimeStartAddress()) << "\n";
+ return Error::success();
+}
+
+Error RewriteInstance::updateRtFiniReloc() {
// Updating DT_FINI is handled by patchELFDynamic.
if (BC->FiniAddress)
- return;
+ return Error::success();
const RuntimeLibrary *RT = BC->getRuntimeLibrary();
if (!RT || !RT->getRuntimeFiniAddress())
- return;
+ return Error::success();
- assert(BC->FiniArrayAddress && BC->FiniArraySize &&
- "inconsistent .fini_array state");
+ if (!BC->FiniArrayAddress || !BC->FiniArraySize) {
+ // Missing fini hooks are allowed when instrumentation-sleep-time is in use.
+ if (opts::InstrumentationSleepTime > 0)
+ return Error::success();
+ return createStringError(std::errc::not_supported,
+ "inconsistent .fini_array state");
+ }
ErrorOr<BinarySection &> FiniArraySection =
BC->getSectionForAddress(*BC->FiniArrayAddress);
- assert(FiniArraySection && ".fini_array removed");
+ if (!FiniArraySection)
+ return createStringError(std::errc::not_supported, ".fini_array removed");
if (std::optional<Relocation> Reloc =
FiniArraySection->takeDynamicRelocationAt(0)) {
- assert(Reloc->Addend == BC->FiniFunctionAddress &&
- "inconsistent .fini_array dynamic relocation");
+ if (Reloc->Addend != BC->FiniFunctionAddress)
+ return createStringError(std::errc::not_supported,
+ "inconsistent .fini_array dynamic relocation");
Reloc->Addend = RT->getRuntimeFiniAddress();
FiniArraySection->addDynamicRelocation(*Reloc);
}
@@ -1477,6 +1650,10 @@ void RewriteInstance::updateRtFiniReloc() {
FiniArraySection->addPendingRelocation(Relocation{
/*Offset*/ 0, /*Symbol*/ nullptr, /*Type*/ Relocation::getAbs64(),
/*Addend*/ RT->getRuntimeFiniAddress(), /*Value*/ 0});
+ BC->outs() << "BOLT-INFO: runtime library finalization was hooked via "
+ ".fini_array entry, set to 0x"
+ << Twine::utohexstr(RT->getRuntimeFiniAddress()) << "\n";
+ return Error::success();
}
void RewriteInstance::registerFragments() {
@@ -2075,7 +2252,7 @@ Error RewriteInstance::readSpecialSections() {
if (BC->IsStripped && !opts::AllowStripped) {
BC->errs()
<< "BOLT-ERROR: stripped binaries are not supported. If you know "
- "what you're doing, use --allow-stripped to proceed";
+ "what you're doing, use --allow-stripped to proceed\n";
exit(1);
}
@@ -2175,6 +2352,14 @@ void RewriteInstance::adjustCommandLineOptions() {
exit(1);
}
+ if (opts::Instrument && opts::RuntimeLibInitHook == opts::RLIH_ENTRY_POINT &&
+ !BC->HasInterpHeader) {
+ BC->errs()
+ << "BOLT-WARNING: adjusted runtime-lib-init-hook to 'init' due to "
+ "absence of INTERP header\n";
+ opts::RuntimeLibInitHook = opts::RLIH_INIT;
+ }
+
if (opts::HotText && opts::HotTextMoveSections.getNumOccurrences() == 0) {
opts::HotTextMoveSections.addValue(".stub");
opts::HotTextMoveSections.addValue(".mover");
@@ -2955,8 +3140,10 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
// if-condition above) so we're handling a relocation from a function
// to itself. RISC-V uses such relocations for branches, for example.
// These should not be registered as externally references offsets.
- if (!ContainingBF)
- ReferencedBF->registerReferencedOffset(RefFunctionOffset);
+ if (!ContainingBF && !ReferencedBF->isInConstantIsland(Address)) {
+ ReferencedBF->registerInternalRefDataRelocation(RefFunctionOffset,
+ Rel.getOffset());
+ }
}
if (opts::Verbosity > 1 &&
BinarySection(*BC, RelocatedSection).isWritable())
@@ -3099,17 +3286,22 @@ static BinaryFunction *getInitFunctionIfStaticBinary(BinaryContext &BC) {
return BC.getBinaryFunctionAtAddress(BD->getAddress());
}
+static void populateFunctionNames(cl::opt<std::string> &FunctionNamesFile,
+ cl::list<std::string> &FunctionNames) {
+ if (FunctionNamesFile.empty())
+ return;
+ std::ifstream FuncsFile(FunctionNamesFile, std::ios::in);
+ std::string FuncName;
+ while (std::getline(FuncsFile, FuncName))
+ FunctionNames.push_back(FuncName);
+}
+
+void RewriteInstance::selectFunctionsToPrint() {
+ populateFunctionNames(opts::PrintOnlyFile, opts::PrintOnly);
+}
+
void RewriteInstance::selectFunctionsToProcess() {
// Extend the list of functions to process or skip from a file.
- auto populateFunctionNames = [](cl::opt<std::string> &FunctionNamesFile,
- cl::list<std::string> &FunctionNames) {
- if (FunctionNamesFile.empty())
- return;
- std::ifstream FuncsFile(FunctionNamesFile, std::ios::in);
- std::string FuncName;
- while (std::getline(FuncsFile, FuncName))
- FunctionNames.push_back(FuncName);
- };
populateFunctionNames(opts::FunctionNamesFile, opts::ForceFunctionNames);
populateFunctionNames(opts::SkipFunctionNamesFile, opts::SkipFunctionNames);
populateFunctionNames(opts::FunctionNamesFileNR, opts::ForceFunctionNamesNR);
@@ -3345,6 +3537,8 @@ void RewriteInstance::initializeMetadataManager() {
MetadataManager.registerRewriter(createPseudoProbeRewriter(*BC));
+ MetadataManager.registerRewriter(createRSeqRewriter(*BC));
+
MetadataManager.registerRewriter(createSDTRewriter(*BC));
MetadataManager.registerRewriter(createGNUPropertyRewriter(*BC));
@@ -3495,6 +3689,7 @@ void RewriteInstance::disassembleFunctions() {
if (!shouldDisassemble(Function))
continue;
+ Function.validateInternalBranches();
Function.postProcessEntryPoints();
Function.postProcessJumpTables();
}
@@ -4837,9 +5032,14 @@ void RewriteInstance::patchELFSectionHeaderTable(ELFObjectFile<ELFT> *File) {
ELFEhdrTy NewEhdr = Obj.getHeader();
if (BC->HasRelocations) {
- if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary())
+ RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary();
+ if (RtLibrary && opts::RuntimeLibInitHook == opts::RLIH_ENTRY_POINT) {
NewEhdr.e_entry = RtLibrary->getRuntimeStartAddress();
- else
+ BC->outs()
+ << "BOLT-INFO: runtime library initialization was hooked via ELF "
+ "Header Entry Point, set to 0x"
+ << Twine::utohexstr(NewEhdr.e_entry) << "\n";
+ } else
NewEhdr.e_entry = getNewFunctionAddress(NewEhdr.e_entry);
assert((NewEhdr.e_entry || !Obj.getHeader().e_entry) &&
"cannot find new address for entry point");
@@ -5680,14 +5880,23 @@ void RewriteInstance::patchELFDynamic(ELFObjectFile<ELFT> *File) {
}
RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary();
if (RtLibrary && Dyn.getTag() == ELF::DT_FINI) {
- if (uint64_t Addr = RtLibrary->getRuntimeFiniAddress())
+ if (uint64_t Addr = RtLibrary->getRuntimeFiniAddress()) {
NewDE.d_un.d_ptr = Addr;
+ BC->outs()
+ << "BOLT-INFO: runtime library finalization was hooked via "
+ "DT_FINI, set to 0x"
+ << Twine::utohexstr(Addr) << "\n";
+ }
}
- if (RtLibrary && Dyn.getTag() == ELF::DT_INIT && !BC->HasInterpHeader) {
+ if (RtLibrary && Dyn.getTag() == ELF::DT_INIT &&
+ (!BC->HasInterpHeader ||
+ opts::RuntimeLibInitHook == opts::RLIH_INIT)) {
if (auto Addr = RtLibrary->getRuntimeStartAddress()) {
- LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set DT_INIT to 0x"
- << Twine::utohexstr(Addr) << '\n');
NewDE.d_un.d_ptr = Addr;
+ BC->outs()
+ << "BOLT-INFO: runtime library initialization was hooked via "
+ "DT_INIT, set to 0x"
+ << Twine::utohexstr(Addr) << "\n";
}
}
break;
@@ -5755,10 +5964,13 @@ Error RewriteInstance::readELFDynamic(ELFObjectFile<ELFT> *File) {
for (const Elf_Dyn &Dyn : DynamicEntries) {
switch (Dyn.d_tag) {
case ELF::DT_INIT:
- if (!BC->HasInterpHeader) {
- LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set start function address\n");
- BC->StartFunctionAddress = Dyn.getPtr();
- }
+ BC->InitAddress = Dyn.getPtr();
+ break;
+ case ELF::DT_INIT_ARRAY:
+ BC->InitArrayAddress = Dyn.getPtr();
+ break;
+ case ELF::DT_INIT_ARRAYSZ:
+ BC->InitArraySize = Dyn.getPtr();
break;
case ELF::DT_FINI:
BC->FiniAddress = Dyn.getPtr();
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 3c77091..5881d3f 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -164,11 +164,53 @@ public:
bool isPush(const MCInst &Inst) const override {
return isStoreToStack(Inst);
- };
+ }
bool isPop(const MCInst &Inst) const override {
return isLoadFromStack(Inst);
- };
+ }
+
+ // We look for instructions that load from stack or make stack pointer
+ // adjustment, and assume the basic block is an epilogue if and only if
+ // such instructions are present and also immediately precede the branch
+ // instruction that ends the basic block.
+ bool isEpilogue(const BinaryBasicBlock &BB) const override {
+ if (BB.succ_size())
+ return false;
+
+ bool SeenLoadFromStack = false;
+ bool SeenStackPointerAdjustment = false;
+ for (const MCInst &Instr : BB) {
+ // Skip CFI pseudo instruction.
+ if (isCFI(Instr))
+ continue;
+
+ bool IsPop = isPop(Instr);
+ // A load from stack instruction could do SP adjustment in pre-index or
+ // post-index form, which we can skip to check for epilogue recognition
+ // purpose.
+ bool IsSPAdj = (isADD(Instr) || isMOVW(Instr)) &&
+ Instr.getOperand(0).isReg() &&
+ Instr.getOperand(0).getReg() == AArch64::SP;
+ SeenLoadFromStack |= IsPop;
+ SeenStackPointerAdjustment |= IsSPAdj;
+
+ if (!SeenLoadFromStack && !SeenStackPointerAdjustment)
+ continue;
+ if (IsPop || IsSPAdj || isPAuthOnLR(Instr))
+ continue;
+ if (isReturn(Instr))
+ return true;
+ if (isBranch(Instr))
+ break;
+
+ // Any previously seen load from stack or stack adjustment instruction
+ // is definitely not part of epilogue code sequence, so reset these two.
+ SeenLoadFromStack = false;
+ SeenStackPointerAdjustment = false;
+ }
+ return SeenLoadFromStack || SeenStackPointerAdjustment;
+ }
void createCall(MCInst &Inst, const MCSymbol *Target,
MCContext *Ctx) override {
@@ -271,6 +313,33 @@ public:
Inst.getOpcode() == AArch64::RETABSPPCr;
}
+ void createMatchingAuth(const MCInst &AuthAndRet, MCInst &Auth) override {
+ Auth.clear();
+ Auth.setOperands(AuthAndRet.getOperands());
+ switch (AuthAndRet.getOpcode()) {
+ case AArch64::RETAA:
+ Auth.setOpcode(AArch64::AUTIASP);
+ break;
+ case AArch64::RETAB:
+ Auth.setOpcode(AArch64::AUTIBSP);
+ break;
+ case AArch64::RETAASPPCi:
+ Auth.setOpcode(AArch64::AUTIASPPCi);
+ break;
+ case AArch64::RETABSPPCi:
+ Auth.setOpcode(AArch64::AUTIBSPPCi);
+ break;
+ case AArch64::RETAASPPCr:
+ Auth.setOpcode(AArch64::AUTIASPPCr);
+ break;
+ case AArch64::RETABSPPCr:
+ Auth.setOpcode(AArch64::AUTIBSPPCr);
+ break;
+ default:
+ llvm_unreachable("Unhandled fused pauth-and-return instruction");
+ }
+ }
+
std::optional<MCPhysReg> getSignedReg(const MCInst &Inst) const override {
switch (Inst.getOpcode()) {
case AArch64::PACIA:
@@ -1793,14 +1862,12 @@ public:
}
bool isNoop(const MCInst &Inst) const override {
- return Inst.getOpcode() == AArch64::HINT &&
- Inst.getOperand(0).getImm() == 0;
+ return Inst.getOpcode() == AArch64::NOP;
}
void createNoop(MCInst &Inst) const override {
- Inst.setOpcode(AArch64::HINT);
+ Inst.setOpcode(AArch64::NOP);
Inst.clear();
- Inst.addOperand(MCOperand::createImm(0));
}
bool isTrap(const MCInst &Inst) const override {
@@ -2706,6 +2773,39 @@ public:
return Insts;
}
+ void createBTI(MCInst &Inst, bool CallTarget,
+ bool JumpTarget) const override {
+ Inst.setOpcode(AArch64::HINT);
+ unsigned HintNum = getBTIHintNum(CallTarget, JumpTarget);
+ Inst.addOperand(MCOperand::createImm(HintNum));
+ }
+
+ bool isBTILandingPad(MCInst &Inst, bool CallTarget,
+ bool JumpTarget) const override {
+ unsigned HintNum = getBTIHintNum(CallTarget, JumpTarget);
+ bool IsExplicitBTI =
+ Inst.getOpcode() == AArch64::HINT && Inst.getNumOperands() == 1 &&
+ Inst.getOperand(0).isImm() && Inst.getOperand(0).getImm() == HintNum;
+
+ bool IsImplicitBTI = HintNum == 34 && isImplicitBTIC(Inst);
+ return IsExplicitBTI || IsImplicitBTI;
+ }
+
+ bool isImplicitBTIC(MCInst &Inst) const override {
+ // PACI[AB]SP are always implicitly BTI C, independently of
+ // SCTLR_EL1.BT[01].
+ return Inst.getOpcode() == AArch64::PACIASP ||
+ Inst.getOpcode() == AArch64::PACIBSP;
+ }
+
+ void updateBTIVariant(MCInst &Inst, bool CallTarget,
+ bool JumpTarget) const override {
+ assert(Inst.getOpcode() == AArch64::HINT && "Not a BTI instruction.");
+ unsigned HintNum = getBTIHintNum(CallTarget, JumpTarget);
+ Inst.clear();
+ Inst.addOperand(MCOperand::createImm(HintNum));
+ }
+
InstructionListType materializeAddress(const MCSymbol *Target, MCContext *Ctx,
MCPhysReg RegName,
int64_t Addend = 0) const override {
diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
index 5fca5e8..7c24c2c 100644
--- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
+++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
@@ -219,6 +219,12 @@ public:
return getPopSize(Inst) == 0 ? false : true;
}
+ bool isEpilogue(const BinaryBasicBlock &BB) const override {
+ return ::llvm::any_of(BB, [&](const MCInst &Instr) {
+ return isLeave(Instr) || isPop(Instr);
+ });
+ }
+
bool isTerminateBranch(const MCInst &Inst) const override {
return Inst.getOpcode() == X86::ENDBR32 || Inst.getOpcode() == X86::ENDBR64;
}
diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp
index 5be04d2..b7eb209 100644
--- a/bolt/lib/Utils/CommandLineOpts.cpp
+++ b/bolt/lib/Utils/CommandLineOpts.cpp
@@ -245,6 +245,16 @@ cl::opt<bool> PrintCacheMetrics(
cl::desc("calculate and print various metrics for instruction cache"),
cl::cat(BoltOptCategory));
+cl::list<std::string> PrintOnly("print-only", cl::CommaSeparated,
+ cl::desc("list of functions to print"),
+ cl::value_desc("func1,func2,func3,..."),
+ cl::Hidden, cl::cat(BoltCategory));
+
+cl::opt<std::string>
+ PrintOnlyFile("print-only-file",
+ cl::desc("file with list of functions to print"), cl::Hidden,
+ cl::cat(BoltCategory));
+
cl::opt<bool> PrintSections("print-sections",
cl::desc("print all registered sections"),
cl::Hidden, cl::cat(BoltCategory));