diff options
Diffstat (limited to 'bolt/lib')
26 files changed, 1168 insertions, 424 deletions
diff --git a/bolt/lib/Core/BinaryBasicBlock.cpp b/bolt/lib/Core/BinaryBasicBlock.cpp index d680850..a6d0ca9 100644 --- a/bolt/lib/Core/BinaryBasicBlock.cpp +++ b/bolt/lib/Core/BinaryBasicBlock.cpp @@ -22,8 +22,6 @@ namespace llvm { namespace bolt { -constexpr uint32_t BinaryBasicBlock::INVALID_OFFSET; - bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS) { return LHS.Index < RHS.Index; } diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index b478925..51bc867 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -531,20 +531,40 @@ BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF, } MCSymbol *BinaryContext::handleExternalBranchTarget(uint64_t Address, - BinaryFunction &BF) { - if (BF.isInConstantIsland(Address)) { - BF.setIgnored(); - this->outs() << "BOLT-WARNING: ignoring entry point at address 0x" - << Twine::utohexstr(Address) - << " in constant island of function " << BF << '\n'; - return nullptr; + BinaryFunction &Source, + BinaryFunction &Target) { + const uint64_t Offset = Address - Target.getAddress(); + assert(Offset < Target.getSize() && + "Address should be inside the referenced function"); + + bool IsValid = true; + if (Source.NeedBranchValidation) { + if (Target.CurrentState == BinaryFunction::State::Disassembled && + !Target.getInstructionAtOffset(Offset)) { + this->errs() + << "BOLT-WARNING: corrupted control flow detected in function " + << Source + << ": an external branch/call targets an invalid instruction " + << "in function " << Target << " at address 0x" + << Twine::utohexstr(Address) << "; ignoring both functions\n"; + IsValid = false; + } + if (Target.isInConstantIsland(Address)) { + this->errs() << "BOLT-WARNING: ignoring entry point at address 0x" + << Twine::utohexstr(Address) + << " in constant island of function " << Target << '\n'; + IsValid = false; + } } - const uint64_t Offset = Address - BF.getAddress(); - assert(Offset < BF.getSize() && - "Address should be inside the referenced function"); + if (!IsValid) { + Source.NeedBranchValidation = false; + Source.setIgnored(); + Target.setIgnored(); + return nullptr; + } - return Offset ? BF.addEntryPointAtOffset(Offset) : BF.getSymbol(); + return Offset ? Target.addEntryPointAtOffset(Offset) : Target.getSymbol(); } MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address, @@ -1433,13 +1453,11 @@ void BinaryContext::processInterproceduralReferences() { // Create an extra entry point if needed. Can also render the target // function ignored if the reference is invalid. - handleExternalBranchTarget(Address, *TargetFunction); + handleExternalBranchTarget(Address, Function, *TargetFunction); continue; } - // Check if address falls in function padding space - this could be - // unmarked data in code. In this case adjust the padding space size. ErrorOr<BinarySection &> Section = getSectionForAddress(Address); assert(Section && "cannot get section for referenced address"); @@ -1451,7 +1469,7 @@ void BinaryContext::processInterproceduralReferences() { if (SectionName == ".plt" || SectionName == ".plt.got") continue; - // Check if it is aarch64 veneer written at Address + // Check if it is aarch64 veneer written at Address. if (isAArch64() && handleAArch64Veneer(Address)) continue; @@ -1463,6 +1481,8 @@ void BinaryContext::processInterproceduralReferences() { exit(1); } + // Check if the address falls into the function padding space - this could + // be an unmarked data in code. In this case, adjust the padding space size. TargetFunction = getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false, /*UseMaxSize=*/true); @@ -1520,6 +1540,17 @@ void BinaryContext::foldFunction(BinaryFunction &ChildBF, } ChildBF.getSymbols().clear(); + // Reset function mapping for local symbols. + for (uint64_t RelOffset : ChildBF.getInternalRefDataRelocations()) { + const Relocation *Rel = getRelocationAt(RelOffset); + if (!Rel || !Rel->Symbol) + continue; + + WriteSymbolMapLock.lock(); + SymbolToFunctionMap[Rel->Symbol] = nullptr; + WriteSymbolMapLock.unlock(); + } + // Move other names the child function is known under. llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases)); ChildBF.Aliases.clear(); diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index ddaad6e..4ccef98 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -61,6 +61,8 @@ extern cl::OptionCategory BoltOptCategory; extern cl::opt<bool> EnableBAT; extern cl::opt<bool> Instrument; +extern cl::list<std::string> PrintOnly; +extern cl::opt<std::string> PrintOnlyFile; extern cl::opt<bool> StrictMode; extern cl::opt<bool> UpdateDebugSections; extern cl::opt<unsigned> Verbosity; @@ -133,14 +135,6 @@ PrintDynoStatsOnly("print-dyno-stats-only", cl::Hidden, cl::cat(BoltCategory)); -static cl::list<std::string> -PrintOnly("print-only", - cl::CommaSeparated, - cl::desc("list of functions to print"), - cl::value_desc("func1,func2,func3,..."), - cl::Hidden, - cl::cat(BoltCategory)); - cl::opt<bool> TimeBuild("time-build", cl::desc("print time spent constructing binary functions"), @@ -1044,8 +1038,10 @@ MCSymbol *BinaryFunction::getOrCreateLocalLabel(uint64_t Address) { // For AArch64, check if this address is part of a constant island. if (BC.isAArch64()) { - if (MCSymbol *IslandSym = getOrCreateIslandAccess(Address)) + if (MCSymbol *IslandSym = getOrCreateIslandAccess(Address)) { + Labels[Offset] = IslandSym; return IslandSym; + } } if (Offset == getSize()) @@ -1414,9 +1410,7 @@ Error BinaryFunction::disassemble() { // A recursive call. Calls to internal blocks are handled by // ValidateInternalCalls pass. TargetSymbol = getSymbol(); - } - - if (!TargetSymbol) { + } else { // Create either local label or external symbol. if (containsAddress(TargetAddress)) { TargetSymbol = getOrCreateLocalLabel(TargetAddress); @@ -1700,7 +1694,7 @@ bool BinaryFunction::scanExternalRefs() { // Get a reference symbol for the function when address is a valid code // reference. BranchTargetSymbol = - BC.handleExternalBranchTarget(TargetAddress, *TargetFunction); + BC.handleExternalBranchTarget(TargetAddress, *this, *TargetFunction); if (!BranchTargetSymbol) continue; } @@ -1896,16 +1890,6 @@ bool BinaryFunction::scanExternalRefs() { } } - // Inform BinaryContext that this function symbols will not be defined and - // relocations should not be created against them. - if (BC.HasRelocations) { - for (std::pair<const uint32_t, MCSymbol *> &LI : Labels) - BC.UndefinedSymbols.insert(LI.second); - for (MCSymbol *const EndLabel : FunctionEndLabels) - if (EndLabel) - BC.UndefinedSymbols.insert(EndLabel); - } - clearList(Relocations); clearList(ExternallyReferencedOffsets); @@ -1918,6 +1902,36 @@ bool BinaryFunction::scanExternalRefs() { return Success; } +bool BinaryFunction::validateInternalBranches() { + if (!isSimple() || TrapsOnEntry) + return true; + + for (const auto &KV : Labels) { + MCSymbol *Label = KV.second; + if (getSecondaryEntryPointSymbol(Label)) + continue; + + const uint32_t Offset = KV.first; + // Skip empty functions and out-of-bounds offsets, + // as they may not be disassembled. + if (!Offset || (Offset > getSize())) + continue; + + if (!getInstructionAtOffset(Offset) || + isInConstantIsland(getAddress() + Offset)) { + BC.errs() << "BOLT-WARNING: corrupted control flow detected in function " + << *this << ": an internal branch/call targets an invalid " + << "instruction at address 0x" + << Twine::utohexstr(getAddress() + Offset) + << "; ignoring this function\n"; + setIgnored(); + return false; + } + } + + return true; +} + void BinaryFunction::postProcessEntryPoints() { if (!isSimple()) return; @@ -2059,41 +2073,47 @@ void BinaryFunction::postProcessJumpTables() { } } -bool BinaryFunction::validateExternallyReferencedOffsets() { - SmallPtrSet<MCSymbol *, 4> JTTargets; - for (const JumpTable *JT : llvm::make_second_range(JumpTables)) - JTTargets.insert_range(JT->Entries); +bool BinaryFunction::validateInternalRefDataRelocations() { + if (InternalRefDataRelocations.empty()) + return true; - bool HasUnclaimedReference = false; - for (uint64_t Destination : ExternallyReferencedOffsets) { - // Ignore __builtin_unreachable(). - if (Destination == getSize()) - continue; - // Ignore constant islands - if (isInConstantIsland(Destination + getAddress())) - continue; + // Rely on the user hint that all data refs are valid and only used as + // destinations by indirect branch in the same function. + if (opts::StrictMode) + return true; - if (BinaryBasicBlock *BB = getBasicBlockAtOffset(Destination)) { - // Check if the externally referenced offset is a recognized jump table - // target. - if (JTTargets.contains(BB->getLabel())) - continue; + DenseSet<uint64_t> UnclaimedRelocations(InternalRefDataRelocations); + for (const JumpTable *JT : llvm::make_second_range(JumpTables)) { + uint64_t EntryAddress = JT->getAddress(); + while (EntryAddress < JT->getAddress() + JT->getSize()) { + UnclaimedRelocations.erase(EntryAddress); + EntryAddress += JT->EntrySize; + } + } - if (opts::Verbosity >= 1) { - BC.errs() << "BOLT-WARNING: unclaimed data to code reference (possibly " - << "an unrecognized jump table entry) to " << BB->getName() - << " in " << *this << "\n"; - } - auto L = BC.scopeLock(); - addEntryPoint(*BB); - } else { - BC.errs() << "BOLT-WARNING: unknown data to code reference to offset " - << Twine::utohexstr(Destination) << " in " << *this << "\n"; - setIgnored(); + if (UnclaimedRelocations.empty()) + return true; + + BC.errs() << "BOLT-WARNING: " << UnclaimedRelocations.size() + << " unclaimed data relocation" + << (UnclaimedRelocations.size() > 1 ? "s" : "") + << " remain against function " << *this; + if (opts::Verbosity) { + BC.errs() << ":\n"; + for (uint64_t RelocationAddress : UnclaimedRelocations) { + const Relocation *Relocation = BC.getRelocationAt(RelocationAddress); + BC.errs() << " "; + if (Relocation) + BC.errs() << *Relocation; + else + BC.errs() << "<missing relocation>"; + BC.errs() << '\n'; } - HasUnclaimedReference = true; + } else { + BC.errs() << ". Re-run with -v=1 to see the list\n"; } - return !HasUnclaimedReference; + + return false; } bool BinaryFunction::postProcessIndirectBranches( @@ -2177,13 +2197,10 @@ bool BinaryFunction::postProcessIndirectBranches( continue; } - // If this block contains an epilogue code and has an indirect branch, - // then most likely it's a tail call. Otherwise, we cannot tell for sure - // what it is and conservatively reject the function's CFG. - bool IsEpilogue = llvm::any_of(BB, [&](const MCInst &Instr) { - return BC.MIB->isLeave(Instr) || BC.MIB->isPop(Instr); - }); - if (IsEpilogue) { + // If this block contains epilogue code and has an indirect branch, + // then most likely it's a tail call. Otherwise, we cannot tell for + // sure what it is and conservatively reject the function's CFG. + if (BC.MIB->isEpilogue(BB)) { BC.MIB->convertJmpToTailCall(Instr); BB.removeAllSuccessors(); continue; @@ -2221,14 +2238,6 @@ bool BinaryFunction::postProcessIndirectBranches( LastIndirectJumpBB->updateJumpTableSuccessors(); } - // Validate that all data references to function offsets are claimed by - // recognized jump tables. Register externally referenced blocks as entry - // points. - if (!opts::StrictMode && hasInternalReference()) { - if (!validateExternallyReferencedOffsets()) - return false; - } - if (HasUnknownControlFlow && !BC.HasRelocations) return false; @@ -2517,12 +2526,18 @@ Error BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) { CurrentState = State::CFG; // Make any necessary adjustments for indirect branches. - if (!postProcessIndirectBranches(AllocatorId)) { - if (opts::Verbosity) { - BC.errs() << "BOLT-WARNING: failed to post-process indirect branches for " - << *this << '\n'; - } + bool ValidCFG = postProcessIndirectBranches(AllocatorId); + if (!ValidCFG && opts::Verbosity) { + BC.errs() << "BOLT-WARNING: failed to post-process indirect branches for " + << *this << '\n'; + } + + // Validate that all data references to function offsets are claimed by + // recognized jump tables. + if (ValidCFG) + ValidCFG = validateInternalRefDataRelocations(); + if (!ValidCFG) { if (BC.isAArch64()) PreserveNops = BC.HasRelocations; @@ -3234,14 +3249,6 @@ void BinaryFunction::clearDisasmState() { clearList(Instructions); clearList(IgnoredBranches); clearList(TakenBranches); - - if (BC.HasRelocations) { - for (std::pair<const uint32_t, MCSymbol *> &LI : Labels) - BC.UndefinedSymbols.insert(LI.second); - for (MCSymbol *const EndLabel : FunctionEndLabels) - if (EndLabel) - BC.UndefinedSymbols.insert(EndLabel); - } } void BinaryFunction::setTrapOnEntry() { diff --git a/bolt/lib/Core/BinarySection.cpp b/bolt/lib/Core/BinarySection.cpp index 6f07017..e803d17 100644 --- a/bolt/lib/Core/BinarySection.cpp +++ b/bolt/lib/Core/BinarySection.cpp @@ -112,8 +112,10 @@ void BinarySection::emitAsData(MCStreamer &Streamer, RI = ROE; // Skip undefined symbols. - auto HasUndefSym = [this](const auto &Relocation) { - return BC.UndefinedSymbols.count(Relocation.Symbol); + auto HasUndefSym = [](const auto &Relocation) { + return Relocation.Symbol && Relocation.Symbol->isTemporary() && + Relocation.Symbol->isUndefined() && + !Relocation.Symbol->isRegistered(); }; if (std::any_of(ROI, ROE, HasUndefSym)) diff --git a/bolt/lib/Core/DebugNames.cpp b/bolt/lib/Core/DebugNames.cpp index 6be2c5a..5272d40 100644 --- a/bolt/lib/Core/DebugNames.cpp +++ b/bolt/lib/Core/DebugNames.cpp @@ -555,7 +555,7 @@ void DWARF5AcceleratorTable::populateAbbrevsMap() { void DWARF5AcceleratorTable::writeEntry(BOLTDWARF5AccelTableData &Entry) { const uint64_t EntryID = getEntryID(Entry); - if (EntryRelativeOffsets.find(EntryID) != EntryRelativeOffsets.end()) + if (EntryRelativeOffsets.contains(EntryID)) EntryRelativeOffsets[EntryID] = EntriesBuffer->size(); const std::optional<DWARF5AccelTable::UnitIndexAndEncoding> EntryRet = diff --git a/bolt/lib/Core/DynoStats.cpp b/bolt/lib/Core/DynoStats.cpp index 1d98187..64a6d12 100644 --- a/bolt/lib/Core/DynoStats.cpp +++ b/bolt/lib/Core/DynoStats.cpp @@ -51,8 +51,6 @@ PrintDynoOpcodeStat("print-dyno-opcode-stats", namespace llvm { namespace bolt { -constexpr const char *DynoStats::Desc[]; - bool DynoStats::operator<(const DynoStats &Other) const { return std::lexicographical_compare( &Stats[FIRST_DYNO_STAT], &Stats[LAST_DYNO_STAT], diff --git a/bolt/lib/Core/Exceptions.cpp b/bolt/lib/Core/Exceptions.cpp index 27656c7..9c33a7c 100644 --- a/bolt/lib/Core/Exceptions.cpp +++ b/bolt/lib/Core/Exceptions.cpp @@ -572,7 +572,7 @@ bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const { if (Function.getBinaryContext().isAArch64()) { // Support for pointer authentication: // We need to annotate instructions that modify the RA State, to work - // out the state of each instruction in MarkRAStates Pass. + // out the state of each instruction in PointerAuthCFIAnalyzer Pass. if (Offset != 0) Function.setInstModifiesRAState(DW_CFA_remember_state, Offset); } @@ -583,7 +583,7 @@ bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const { if (Function.getBinaryContext().isAArch64()) { // Support for pointer authentication: // We need to annotate instructions that modify the RA State, to work - // out the state of each instruction in MarkRAStates Pass. + // out the state of each instruction in PointerAuthCFIAnalyzer Pass. if (Offset != 0) Function.setInstModifiesRAState(DW_CFA_restore_state, Offset); } @@ -652,7 +652,7 @@ bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const { // BasicBlocks, which changes during optimizations. Instead of adding // OpNegateRAState CFIs, an annotation is added to the instruction, to // mark that the instruction modifies the RA State. The actual state for - // instructions are worked out in MarkRAStates based on these + // instructions are worked out in PointerAuthCFIAnalyzer based on these // annotations. if (Offset != 0) Function.setInstModifiesRAState(DW_CFA_AARCH64_negate_ra_state, @@ -660,7 +660,7 @@ bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const { else // We cannot Annotate an instruction at Offset == 0. // Instead, we save the initial (Signed) state, and push it to - // MarkRAStates' RAStateStack. + // PointerAuthCFIAnalyzer's RAStateStack. Function.setInitialRAState(true); break; } diff --git a/bolt/lib/Core/MCPlusBuilder.cpp b/bolt/lib/Core/MCPlusBuilder.cpp index e96de80..0cb4ba1 100644 --- a/bolt/lib/Core/MCPlusBuilder.cpp +++ b/bolt/lib/Core/MCPlusBuilder.cpp @@ -186,26 +186,21 @@ bool MCPlusBuilder::hasRestoreState(const MCInst &Inst) const { return hasAnnotation(Inst, MCAnnotation::kRestoreState); } -void MCPlusBuilder::setRASigned(MCInst &Inst) const { +void MCPlusBuilder::setRAState(MCInst &Inst, bool State) const { assert(!hasAnnotation(Inst, MCAnnotation::kRASigned)); - setAnnotationOpValue(Inst, MCAnnotation::kRASigned, true); -} - -bool MCPlusBuilder::isRASigned(const MCInst &Inst) const { - return hasAnnotation(Inst, MCAnnotation::kRASigned); -} - -void MCPlusBuilder::setRAUnsigned(MCInst &Inst) const { assert(!hasAnnotation(Inst, MCAnnotation::kRAUnsigned)); - setAnnotationOpValue(Inst, MCAnnotation::kRAUnsigned, true); + if (State) + setAnnotationOpValue(Inst, MCAnnotation::kRASigned, true); + else + setAnnotationOpValue(Inst, MCAnnotation::kRAUnsigned, true); } -bool MCPlusBuilder::isRAUnsigned(const MCInst &Inst) const { - return hasAnnotation(Inst, MCAnnotation::kRAUnsigned); -} - -bool MCPlusBuilder::isRAStateUnknown(const MCInst &Inst) const { - return !(isRAUnsigned(Inst) || isRASigned(Inst)); +std::optional<bool> MCPlusBuilder::getRAState(const MCInst &Inst) const { + if (hasAnnotation(Inst, MCAnnotation::kRASigned)) + return true; + if (hasAnnotation(Inst, MCAnnotation::kRAUnsigned)) + return false; + return std::nullopt; } std::optional<MCLandingPad> MCPlusBuilder::getEHInfo(const MCInst &Inst) const { diff --git a/bolt/lib/Passes/CMakeLists.txt b/bolt/lib/Passes/CMakeLists.txt index 3197e62..ec012f0 100644 --- a/bolt/lib/Passes/CMakeLists.txt +++ b/bolt/lib/Passes/CMakeLists.txt @@ -17,18 +17,18 @@ add_llvm_library(LLVMBOLTPasses IdenticalCodeFolding.cpp IndirectCallPromotion.cpp Inliner.cpp - InsertNegateRAStatePass.cpp Instrumentation.cpp JTFootprintReduction.cpp LongJmp.cpp LoopInversionPass.cpp LivenessAnalysis.cpp MCF.cpp - MarkRAStates.cpp PatchEntries.cpp PAuthGadgetScanner.cpp PettisAndHansen.cpp PLTCall.cpp + PointerAuthCFIAnalyzer.cpp + PointerAuthCFIFixup.cpp ProfileQualityStats.cpp RegAnalysis.cpp RegReAssign.cpp diff --git a/bolt/lib/Passes/IdenticalCodeFolding.cpp b/bolt/lib/Passes/IdenticalCodeFolding.cpp index 8923562..c5c33b7 100644 --- a/bolt/lib/Passes/IdenticalCodeFolding.cpp +++ b/bolt/lib/Passes/IdenticalCodeFolding.cpp @@ -377,9 +377,11 @@ namespace bolt { void IdenticalCodeFolding::initVTableReferences(const BinaryContext &BC) { for (const auto &[Address, Data] : BC.getBinaryData()) { // Filter out all symbols that are not vtables. - if (!Data->getName().starts_with("_ZTV")) + if (!Data->getName().starts_with("_ZTV") && // vtable + !Data->getName().starts_with("_ZTCN")) // construction vtable continue; - for (uint64_t I = Address, End = I + Data->getSize(); I < End; I += 8) + for (uint64_t I = Address, End = I + Data->getSize(); I < End; + I += VTableAddressGranularity) setAddressUsedInVTable(I); } } @@ -437,8 +439,9 @@ void IdenticalCodeFolding::markFunctionsUnsafeToFold(BinaryContext &BC) { NamedRegionTimer MarkFunctionsUnsafeToFoldTimer( "markFunctionsUnsafeToFold", "markFunctionsUnsafeToFold", "ICF breakdown", "ICF breakdown", opts::TimeICF); - if (!BC.isX86()) - BC.outs() << "BOLT-WARNING: safe ICF is only supported for x86\n"; + if (!BC.isX86() && !BC.isAArch64()) + BC.outs() + << "BOLT-WARNING: safe ICF is only supported for x86 and AArch64\n"; analyzeDataRelocations(BC); analyzeFunctions(BC); } diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp index 9b28c7e..0740fce 100644 --- a/bolt/lib/Passes/Inliner.cpp +++ b/bolt/lib/Passes/Inliner.cpp @@ -195,6 +195,13 @@ InliningInfo getInliningInfo(const BinaryFunction &BF) { if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst)) continue; + // Pointer signing and authenticatin instructions are used around + // Push and Pop. These are also straightforward to handle. + if (BC.isAArch64() && + (BC.MIB->isPSignOnLR(Inst) || BC.MIB->isPAuthOnLR(Inst) || + BC.MIB->isPAuthAndRet(Inst))) + continue; + DirectSP |= BC.MIB->hasDefOfPhysReg(Inst, SPReg) || BC.MIB->hasUseOfPhysReg(Inst, SPReg); } @@ -338,6 +345,18 @@ Inliner::inlineCall(BinaryBasicBlock &CallerBB, BC.Ctx.get()); } + // Handling fused authentication and return instructions (Armv8.3-A): + // if the Callee does not end in a tailcall, the return will be removed + // from the inlined block. If that return is RETA(A|B), we have to keep + // the authentication part. + // RETAA -> AUTIASP + // RETAB -> AUTIBSP + if (!CSIsTailCall && BC.isAArch64() && BC.MIB->isPAuthAndRet(Inst)) { + MCInst Auth; + BC.MIB->createMatchingAuth(Inst, Auth); + InsertII = + std::next(InlinedBB->insertInstruction(InsertII, std::move(Auth))); + } if (CSIsTailCall || (!MIB.isCall(Inst) && !MIB.isReturn(Inst))) { InsertII = std::next(InlinedBB->insertInstruction(InsertII, std::move(Inst))); @@ -472,6 +491,32 @@ bool Inliner::inlineCallsInFunction(BinaryFunction &Function) { } } + // AArch64 BTI: + // If the callee has an indirect tailcall (BR), we would transform it to + // an indirect call (BLR) in InlineCall. Because of this, we would have to + // update the BTI at the target of the tailcall. However, these targets + // are not known. Instead, we skip inlining blocks with indirect + // tailcalls. + auto HasIndirectTailCall = [&](const BinaryFunction &BF) -> bool { + for (const auto &BB : BF) { + for (const auto &II : BB) { + if (BC.MIB->isIndirectBranch(II) && BC.MIB->isTailCall(II)) { + return true; + } + } + } + return false; + }; + + if (BC.isAArch64() && BC.usesBTI() && + HasIndirectTailCall(*TargetFunction)) { + ++InstIt; + LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Skipping inlining block with tailcall" + << " in " << Function << " : " << BB->getName() + << " to keep BTIs consistent.\n"); + continue; + } + LLVM_DEBUG(dbgs() << "BOLT-DEBUG: inlining call to " << *TargetFunction << " in " << Function << " : " << BB->getName() << ". Count: " << BB->getKnownExecutionCount() diff --git a/bolt/lib/Passes/InsertNegateRAStatePass.cpp b/bolt/lib/Passes/InsertNegateRAStatePass.cpp deleted file mode 100644 index 33664e1..0000000 --- a/bolt/lib/Passes/InsertNegateRAStatePass.cpp +++ /dev/null @@ -1,142 +0,0 @@ -//===- bolt/Passes/InsertNegateRAStatePass.cpp ----------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the InsertNegateRAStatePass class. It inserts -// OpNegateRAState CFIs to places where the state of two consecutive -// instructions are different. -// -//===----------------------------------------------------------------------===// -#include "bolt/Passes/InsertNegateRAStatePass.h" -#include "bolt/Core/BinaryFunction.h" -#include "bolt/Core/ParallelUtilities.h" -#include <cstdlib> - -using namespace llvm; - -namespace llvm { -namespace bolt { - -void InsertNegateRAState::runOnFunction(BinaryFunction &BF) { - BinaryContext &BC = BF.getBinaryContext(); - - if (BF.getState() == BinaryFunction::State::Empty) - return; - - if (BF.getState() != BinaryFunction::State::CFG && - BF.getState() != BinaryFunction::State::CFG_Finalized) { - BC.outs() << "BOLT-INFO: no CFG for " << BF.getPrintName() - << " in InsertNegateRAStatePass\n"; - return; - } - - inferUnknownStates(BF); - - for (FunctionFragment &FF : BF.getLayout().fragments()) { - coverFunctionFragmentStart(BF, FF); - bool FirstIter = true; - MCInst PrevInst; - // As this pass runs after function splitting, we should only check - // consecutive instructions inside FunctionFragments. - for (BinaryBasicBlock *BB : FF) { - for (auto It = BB->begin(); It != BB->end(); ++It) { - MCInst &Inst = *It; - if (BC.MIB->isCFI(Inst)) - continue; - if (!FirstIter) { - // Consecutive instructions with different RAState means we need to - // add a OpNegateRAState. - if ((BC.MIB->isRASigned(PrevInst) && BC.MIB->isRAUnsigned(Inst)) || - (BC.MIB->isRAUnsigned(PrevInst) && BC.MIB->isRASigned(Inst))) { - It = BF.addCFIInstruction( - BB, It, MCCFIInstruction::createNegateRAState(nullptr)); - } - } else { - FirstIter = false; - } - PrevInst = *It; - } - } - } -} - -void InsertNegateRAState::coverFunctionFragmentStart(BinaryFunction &BF, - FunctionFragment &FF) { - BinaryContext &BC = BF.getBinaryContext(); - if (FF.empty()) - return; - // Find the first BB in the FF which has Instructions. - // BOLT can generate empty BBs at function splitting which are only used as - // target labels. We should add the negate-ra-state CFI to the first - // non-empty BB. - auto *FirstNonEmpty = - std::find_if(FF.begin(), FF.end(), [](BinaryBasicBlock *BB) { - // getFirstNonPseudo returns BB.end() if it does not find any - // Instructions. - return BB->getFirstNonPseudo() != BB->end(); - }); - // If a function is already split in the input, the first FF can also start - // with Signed state. This covers that scenario as well. - if (BC.MIB->isRASigned(*((*FirstNonEmpty)->begin()))) { - BF.addCFIInstruction(*FirstNonEmpty, (*FirstNonEmpty)->begin(), - MCCFIInstruction::createNegateRAState(nullptr)); - } -} - -void InsertNegateRAState::inferUnknownStates(BinaryFunction &BF) { - BinaryContext &BC = BF.getBinaryContext(); - bool FirstIter = true; - MCInst PrevInst; - for (BinaryBasicBlock &BB : BF) { - for (MCInst &Inst : BB) { - if (BC.MIB->isCFI(Inst)) - continue; - - if (!FirstIter && BC.MIB->isRAStateUnknown(Inst)) { - if (BC.MIB->isRASigned(PrevInst) || BC.MIB->isPSignOnLR(PrevInst)) { - BC.MIB->setRASigned(Inst); - } else if (BC.MIB->isRAUnsigned(PrevInst) || - BC.MIB->isPAuthOnLR(PrevInst)) { - BC.MIB->setRAUnsigned(Inst); - } - } else { - FirstIter = false; - } - PrevInst = Inst; - } - } -} - -Error InsertNegateRAState::runOnFunctions(BinaryContext &BC) { - std::atomic<uint64_t> FunctionsModified{0}; - ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { - FunctionsModified++; - runOnFunction(BF); - }; - - ParallelUtilities::PredicateTy SkipPredicate = [&](const BinaryFunction &BF) { - // We can skip functions which did not include negate-ra-state CFIs. This - // includes code using pac-ret hardening as well, if the binary is - // compiled with `-fno-exceptions -fno-unwind-tables - // -fno-asynchronous-unwind-tables` - return !BF.containedNegateRAState() || BF.isIgnored(); - }; - - ParallelUtilities::runOnEachFunction( - BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun, - SkipPredicate, "InsertNegateRAStatePass"); - - BC.outs() << "BOLT-INFO: rewritten pac-ret DWARF info in " - << FunctionsModified << " out of " << BC.getBinaryFunctions().size() - << " functions " - << format("(%.2lf%%).\n", (100.0 * FunctionsModified) / - BC.getBinaryFunctions().size()); - return Error::success(); -} - -} // end namespace bolt -} // end namespace llvm diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp index 01b350b..d38a7fa 100644 --- a/bolt/lib/Passes/PAuthGadgetScanner.cpp +++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp @@ -547,7 +547,7 @@ protected: // Being trusted is a strictly stronger property than being // safe-to-dereference. - assert(!Next.TrustedRegs.test(Next.SafeToDerefRegs) && + assert(Next.TrustedRegs.subsetOf(Next.SafeToDerefRegs) && "SafeToDerefRegs should contain all TrustedRegs"); return Next; diff --git a/bolt/lib/Passes/MarkRAStates.cpp b/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp index b262d66..697b1bb 100644 --- a/bolt/lib/Passes/MarkRAStates.cpp +++ b/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp @@ -1,4 +1,4 @@ -//===- bolt/Passes/MarkRAStates.cpp ---------------------------------===// +//===- bolt/Passes/PointerAuthCFIAnalyzer.cpp -----------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// This file implements the MarkRAStates class. +// This file implements the PointerAuthCFIAnalyzer class. // Three CFIs have an influence on the RA State of an instruction: // - NegateRAState flips the RA State, // - RememberState pushes the RA State to a stack, @@ -16,10 +16,10 @@ // the RA State of each instruction, and save it as new MCAnnotations. The new // annotations are Signing, Signed, Authenticating and Unsigned. After // optimizations, .cfi_negate_ra_state CFIs are added to the places where the -// state changes in InsertNegateRAStatePass. +// state changes in PointerAuthCFIFixup. // //===----------------------------------------------------------------------===// -#include "bolt/Passes/MarkRAStates.h" +#include "bolt/Passes/PointerAuthCFIAnalyzer.h" #include "bolt/Core/BinaryFunction.h" #include "bolt/Core/ParallelUtilities.h" #include <cstdlib> @@ -28,10 +28,14 @@ using namespace llvm; +namespace opts { +extern llvm::cl::opt<unsigned> Verbosity; +} // namespace opts + namespace llvm { namespace bolt { -bool MarkRAStates::runOnFunction(BinaryFunction &BF) { +bool PointerAuthCFIAnalyzer::runOnFunction(BinaryFunction &BF) { BinaryContext &BC = BF.getBinaryContext(); @@ -43,9 +47,10 @@ bool MarkRAStates::runOnFunction(BinaryFunction &BF) { // Not all functions have .cfi_negate_ra_state in them. But if one does, // we expect psign/pauth instructions to have the hasNegateRAState // annotation. - BC.outs() << "BOLT-INFO: inconsistent RAStates in function " - << BF.getPrintName() - << ": ptr sign/auth inst without .cfi_negate_ra_state\n"; + if (opts::Verbosity >= 1) + BC.outs() << "BOLT-INFO: inconsistent RAStates in function " + << BF.getPrintName() + << ": ptr sign/auth inst without .cfi_negate_ra_state\n"; std::lock_guard<std::mutex> Lock(IgnoreMutex); BF.setIgnored(); return false; @@ -65,36 +70,30 @@ bool MarkRAStates::runOnFunction(BinaryFunction &BF) { if (BC.MIB->isPSignOnLR(Inst)) { if (RAState) { // RA signing instructions should only follow unsigned RA state. - BC.outs() << "BOLT-INFO: inconsistent RAStates in function " - << BF.getPrintName() - << ": ptr signing inst encountered in Signed RA state\n"; + if (opts::Verbosity >= 1) + BC.outs() << "BOLT-INFO: inconsistent RAStates in function " + << BF.getPrintName() + << ": ptr signing inst encountered in Signed RA state\n"; std::lock_guard<std::mutex> Lock(IgnoreMutex); BF.setIgnored(); return false; } - // The signing instruction itself is unsigned, the next will be - // signed. - BC.MIB->setRAUnsigned(Inst); } else if (BC.MIB->isPAuthOnLR(Inst)) { if (!RAState) { // RA authenticating instructions should only follow signed RA state. - BC.outs() << "BOLT-INFO: inconsistent RAStates in function " - << BF.getPrintName() - << ": ptr authenticating inst encountered in Unsigned RA " - "state\n"; + if (opts::Verbosity >= 1) + BC.outs() << "BOLT-INFO: inconsistent RAStates in function " + << BF.getPrintName() + << ": ptr authenticating inst encountered in Unsigned RA " + "state\n"; std::lock_guard<std::mutex> Lock(IgnoreMutex); BF.setIgnored(); return false; } - // The authenticating instruction itself is signed, but the next will be - // unsigned. - BC.MIB->setRASigned(Inst); - } else if (RAState) { - BC.MIB->setRASigned(Inst); - } else { - BC.MIB->setRAUnsigned(Inst); } + BC.MIB->setRAState(Inst, RAState); + // Updating RAState. All updates are valid from the next instruction. // Because the same instruction can have remember and restore, the order // here is relevant. This is the reason to loop over Annotations instead @@ -118,7 +117,7 @@ bool MarkRAStates::runOnFunction(BinaryFunction &BF) { return true; } -Error MarkRAStates::runOnFunctions(BinaryContext &BC) { +Error PointerAuthCFIAnalyzer::runOnFunctions(BinaryContext &BC) { std::atomic<uint64_t> FunctionsIgnored{0}; ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { if (!runOnFunction(BF)) { @@ -138,14 +137,35 @@ Error MarkRAStates::runOnFunctions(BinaryContext &BC) { return P.second.containedNegateRAState() && !P.second.isIgnored(); }); + if (Total == 0) + return Error::success(); + ParallelUtilities::runOnEachFunction( BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun, - SkipPredicate, "MarkRAStates"); - BC.outs() << "BOLT-INFO: MarkRAStates ran on " << Total + SkipPredicate, "PointerAuthCFIAnalyzer"); + + float IgnoredPercent = (100.0 * FunctionsIgnored) / Total; + BC.outs() << "BOLT-INFO: PointerAuthCFIAnalyzer ran on " << Total << " functions. Ignored " << FunctionsIgnored << " functions " - << format("(%.2lf%%)", (100.0 * FunctionsIgnored) / Total) + << format("(%.2lf%%)", IgnoredPercent) << " because of CFI inconsistencies\n"; + // Errors in the input are expected from two sources: + // - compilers emitting incorrect CFIs. This happens more frequently with + // older compiler versions, but it should not account for a large + // percentage. + // - input binary is using synchronous unwind tables. This means that after + // call sites, the unwind CFIs are dropped: the pass sees missing + // .cfi_negate_ra_state from autiasp instructions. If this is the case, a + // larger percentage of functions will be ignored. + // + // This is why the 10% threshold was chosen: we should not warn about + // synchronous unwind tables if only a few % are ignored. + if (IgnoredPercent >= 10.0) + BC.outs() << "BOLT-WARNING: PointerAuthCFIAnalyzer only supports " + "asynchronous unwind tables. For C compilers, see " + "-fasynchronous-unwind-tables.\n"; + return Error::success(); } diff --git a/bolt/lib/Passes/PointerAuthCFIFixup.cpp b/bolt/lib/Passes/PointerAuthCFIFixup.cpp new file mode 100644 index 0000000..56086da --- /dev/null +++ b/bolt/lib/Passes/PointerAuthCFIFixup.cpp @@ -0,0 +1,268 @@ +//===- bolt/Passes/PointerAuthCFIFixup.cpp --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the PointerAuthCFIFixup class. It inserts +// OpNegateRAState CFIs to places where the state of two consecutive +// instructions are different. +// +//===----------------------------------------------------------------------===// +#include "bolt/Passes/PointerAuthCFIFixup.h" +#include "bolt/Core/BinaryFunction.h" +#include "bolt/Core/ParallelUtilities.h" +#include <cstdlib> + +using namespace llvm; + +namespace llvm { +namespace bolt { + +static bool PassFailed = false; + +void PointerAuthCFIFixup::runOnFunction(BinaryFunction &BF) { + if (PassFailed) + return; + + BinaryContext &BC = BF.getBinaryContext(); + + if (BF.getState() == BinaryFunction::State::Empty) + return; + + if (BF.getState() != BinaryFunction::State::CFG && + BF.getState() != BinaryFunction::State::CFG_Finalized) { + BC.outs() << "BOLT-INFO: no CFG for " << BF.getPrintName() + << " in PointerAuthCFIFixup\n"; + return; + } + + inferUnknownStates(BF); + + for (FunctionFragment &FF : BF.getLayout().fragments()) { + coverFunctionFragmentStart(BF, FF); + bool FirstIter = true; + bool PrevRAState = false; + // As this pass runs after function splitting, we should only check + // consecutive instructions inside FunctionFragments. + for (BinaryBasicBlock *BB : FF) { + for (auto It = BB->begin(); It != BB->end(); ++It) { + MCInst &Inst = *It; + if (BC.MIB->isCFI(Inst)) + continue; + std::optional<bool> RAState = BC.MIB->getRAState(Inst); + if (!RAState.has_value()) { + BC.errs() << "BOLT-ERROR: unknown RAState after inferUnknownStates " + << " in function " << BF.getPrintName() << "\n"; + PassFailed = true; + return; + } + if (!FirstIter) { + // Consecutive instructions with different RAState means we need to + // add a OpNegateRAState. + if (*RAState != PrevRAState) + It = BF.addCFIInstruction( + BB, It, MCCFIInstruction::createNegateRAState(nullptr)); + } else { + FirstIter = false; + } + PrevRAState = *RAState; + } + } + } +} + +void PointerAuthCFIFixup::inferUnknownStates(BinaryFunction &BF) { + BinaryContext &BC = BF.getBinaryContext(); + + // Fill in missing RAStates in simple cases (inside BBs). + for (BinaryBasicBlock &BB : BF) { + fillUnknownStateInBB(BC, BB); + } + // BasicBlocks which are made entirely of "new instructions" (instructions + // without RAState annotation) are stubs, and do not have correct unwind info. + // We should iterate in layout order and fill them based on previous known + // RAState. + fillUnknownStubs(BF); +} + +void PointerAuthCFIFixup::coverFunctionFragmentStart(BinaryFunction &BF, + FunctionFragment &FF) { + BinaryContext &BC = BF.getBinaryContext(); + if (FF.empty()) + return; + // Find the first BB in the FF which has Instructions. + // BOLT can generate empty BBs at function splitting which are only used as + // target labels. We should add the negate-ra-state CFI to the first + // non-empty BB. + auto *FirstNonEmpty = + std::find_if(FF.begin(), FF.end(), [](BinaryBasicBlock *BB) { + // getFirstNonPseudo returns BB.end() if it does not find any + // Instructions. + return BB->getFirstNonPseudo() != BB->end(); + }); + // If a function is already split in the input, the first FF can also start + // with Signed state. This covers that scenario as well. + auto II = (*FirstNonEmpty)->getFirstNonPseudo(); + std::optional<bool> RAState = BC.MIB->getRAState(*II); + if (!RAState.has_value()) { + BC.errs() << "BOLT-ERROR: unknown RAState after inferUnknownStates " + << " in function " << BF.getPrintName() << "\n"; + PassFailed = true; + return; + } + if (*RAState) + BF.addCFIInstruction(*FirstNonEmpty, II, + MCCFIInstruction::createNegateRAState(nullptr)); +} + +std::optional<bool> +PointerAuthCFIFixup::getFirstKnownRAState(BinaryContext &BC, + BinaryBasicBlock &BB) { + for (const MCInst &Inst : BB) { + if (BC.MIB->isCFI(Inst)) + continue; + std::optional<bool> RAState = BC.MIB->getRAState(Inst); + if (RAState.has_value()) + return RAState; + } + return std::nullopt; +} + +bool PointerAuthCFIFixup::isUnknownBlock(BinaryContext &BC, + BinaryBasicBlock &BB) { + std::optional<bool> FirstRAState = getFirstKnownRAState(BC, BB); + return !FirstRAState.has_value(); +} + +void PointerAuthCFIFixup::fillUnknownStateInBB(BinaryContext &BC, + BinaryBasicBlock &BB) { + + auto First = BB.getFirstNonPseudo(); + if (First == BB.end()) + return; + // If the first instruction has unknown RAState, we should copy the first + // known RAState. + std::optional<bool> RAState = BC.MIB->getRAState(*First); + if (!RAState.has_value()) { + std::optional<bool> FirstRAState = getFirstKnownRAState(BC, BB); + if (!FirstRAState.has_value()) + // We fill unknown BBs later. + return; + + BC.MIB->setRAState(*First, *FirstRAState); + } + + // At this point we know the RAState of the first instruction, + // so we can propagate the RAStates to all subsequent unknown instructions. + MCInst Prev = *First; + for (auto It = First + 1; It != BB.end(); ++It) { + MCInst &Inst = *It; + if (BC.MIB->isCFI(Inst)) + continue; + + // No need to check for nullopt: we only entered this loop after the first + // instruction had its RAState set, and RAState is always set for the + // previous instruction in the previous iteration of the loop. + std::optional<bool> PrevRAState = BC.MIB->getRAState(Prev); + + std::optional<bool> RAState = BC.MIB->getRAState(Inst); + if (!RAState.has_value()) { + if (BC.MIB->isPSignOnLR(Prev)) + PrevRAState = true; + else if (BC.MIB->isPAuthOnLR(Prev)) + PrevRAState = false; + BC.MIB->setRAState(Inst, *PrevRAState); + } + Prev = Inst; + } +} + +void PointerAuthCFIFixup::markUnknownBlock(BinaryContext &BC, + BinaryBasicBlock &BB, bool State) { + // If we call this when an Instruction has either kRASigned or kRAUnsigned + // annotation, setRASigned or setRAUnsigned would fail. + assert(isUnknownBlock(BC, BB) && + "markUnknownBlock should only be called on unknown blocks"); + for (MCInst &Inst : BB) { + if (BC.MIB->isCFI(Inst)) + continue; + BC.MIB->setRAState(Inst, State); + } +} + +void PointerAuthCFIFixup::fillUnknownStubs(BinaryFunction &BF) { + BinaryContext &BC = BF.getBinaryContext(); + bool FirstIter = true; + MCInst PrevInst; + for (FunctionFragment &FF : BF.getLayout().fragments()) { + for (BinaryBasicBlock *BB : FF) { + if (FirstIter) { + FirstIter = false; + if (isUnknownBlock(BC, *BB)) + // If the first BasicBlock is unknown, the function's entry RAState + // should be used. + markUnknownBlock(BC, *BB, BF.getInitialRAState()); + } else if (isUnknownBlock(BC, *BB)) { + // As explained in issue #160989, the unwind info is incorrect for + // stubs. Indicating the correct RAState without the rest of the unwind + // info being correct is not useful. Instead, we copy the RAState from + // the previous instruction. + std::optional<bool> PrevRAState = BC.MIB->getRAState(PrevInst); + if (!PrevRAState.has_value()) { + // No non-cfi instruction encountered in the function yet. + // This means the RAState is the same as at the function entry. + markUnknownBlock(BC, *BB, BF.getInitialRAState()); + continue; + } + + if (BC.MIB->isPSignOnLR(PrevInst)) + PrevRAState = true; + else if (BC.MIB->isPAuthOnLR(PrevInst)) + PrevRAState = false; + markUnknownBlock(BC, *BB, *PrevRAState); + } + // This function iterates on BasicBlocks, so the PrevInst has to be + // updated to the last instruction of the current BasicBlock. If the + // BasicBlock is empty, or only has PseudoInstructions, PrevInst will not + // be updated. + auto Last = BB->getLastNonPseudo(); + if (Last != BB->rend()) + PrevInst = *Last; + } + } +} + +Error PointerAuthCFIFixup::runOnFunctions(BinaryContext &BC) { + std::atomic<uint64_t> FunctionsModified{0}; + ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { + FunctionsModified++; + runOnFunction(BF); + }; + + ParallelUtilities::PredicateTy SkipPredicate = [&](const BinaryFunction &BF) { + // We can skip functions which did not include negate-ra-state CFIs. This + // includes code using pac-ret hardening as well, if the binary is + // compiled with `-fno-exceptions -fno-unwind-tables + // -fno-asynchronous-unwind-tables` + return !BF.containedNegateRAState() || BF.isIgnored(); + }; + + ParallelUtilities::runOnEachFunction( + BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun, + SkipPredicate, "PointerAuthCFIFixup"); + + BC.outs() << "BOLT-INFO: rewritten pac-ret DWARF info in " + << FunctionsModified << " out of " << BC.getBinaryFunctions().size() + << " functions " + << format("(%.2lf%%).\n", (100.0 * FunctionsModified) / + BC.getBinaryFunctions().size()); + if (PassFailed) + return createFatalBOLTError(""); + return Error::success(); +} + +} // end namespace bolt +} // end namespace llvm diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index 8554683..6b96901 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -159,8 +159,6 @@ std::vector<SectionNameAndRange> getTextSections(const BinaryContext *BC) { } } -constexpr uint64_t DataAggregator::KernelBaseAddr; - DataAggregator::~DataAggregator() { deleteTempFiles(); } namespace { @@ -2399,10 +2397,7 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC, PseudoProbeDecoder->getAddress2ProbesMap(); BinaryFunction::FragmentsSetTy Fragments(BF->Fragments); Fragments.insert(BF); - DenseMap< - uint32_t, - std::vector<std::reference_wrapper<const MCDecodedPseudoProbe>>> - BlockProbes; + DenseMap<uint32_t, YAMLProfileWriter::BlockProbeCtx> BlockCtx; for (const BinaryFunction *F : Fragments) { const uint64_t FuncAddr = F->getAddress(); for (const MCDecodedPseudoProbe &Probe : @@ -2410,15 +2405,14 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC, const uint32_t OutputAddress = Probe.getAddress(); const uint32_t InputOffset = BAT->translate( FuncAddr, OutputAddress - FuncAddr, /*IsBranchSrc=*/true); - const unsigned BlockIndex = getBlock(InputOffset).second; - BlockProbes[BlockIndex].emplace_back(Probe); + const auto &[BlockOffset, BlockIndex] = getBlock(InputOffset); + BlockCtx[BlockIndex].addBlockProbe(InlineTreeNodeId, Probe, + InputOffset - BlockOffset); } } - for (auto &[Block, Probes] : BlockProbes) { - YamlBF.Blocks[Block].PseudoProbes = - YAMLProfileWriter::writeBlockProbes(Probes, InlineTreeNodeId); - } + for (auto &[Block, Ctx] : BlockCtx) + Ctx.finalize(YamlBF.Blocks[Block]); } // Skip printing if there's no profile data llvm::erase_if( diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp index 1a61949..5fb65153 100644 --- a/bolt/lib/Profile/StaleProfileMatching.cpp +++ b/bolt/lib/Profile/StaleProfileMatching.cpp @@ -348,26 +348,10 @@ private: return It->second; }; - auto matchPseudoProbeInfo = [&](const yaml::bolt::PseudoProbeInfo - &ProfileProbe, - uint32_t NodeId) { - for (uint64_t Index = 0; Index < 64; ++Index) - if (ProfileProbe.BlockMask & 1ull << Index) - ++FlowBlockMatchCount[matchProfileProbeToBlock(NodeId, Index + 1)]; - for (const auto &ProfileProbes : - {ProfileProbe.BlockProbes, ProfileProbe.IndCallProbes, - ProfileProbe.CallProbes}) - for (uint64_t ProfileProbe : ProfileProbes) - ++FlowBlockMatchCount[matchProfileProbeToBlock(NodeId, ProfileProbe)]; - }; - - for (const yaml::bolt::PseudoProbeInfo &ProfileProbe : BlockPseudoProbes) { - if (!ProfileProbe.InlineTreeNodes.empty()) - for (uint32_t ProfileInlineTreeNode : ProfileProbe.InlineTreeNodes) - matchPseudoProbeInfo(ProfileProbe, ProfileInlineTreeNode); - else - matchPseudoProbeInfo(ProfileProbe, ProfileProbe.InlineTreeIndex); - } + for (const yaml::bolt::PseudoProbeInfo &ProfileProbe : BlockPseudoProbes) + for (uint32_t Node : ProfileProbe.InlineTreeNodes) + for (uint64_t Probe : ProfileProbe.BlockProbes) + ++FlowBlockMatchCount[matchProfileProbeToBlock(Node, Probe)]; uint32_t BestMatchCount = 0; uint32_t TotalMatchCount = 0; const FlowBlock *BestMatchBlock = nullptr; diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp index 5c631f9..cd4e77b 100644 --- a/bolt/lib/Profile/YAMLProfileWriter.cpp +++ b/bolt/lib/Profile/YAMLProfileWriter.cpp @@ -129,50 +129,62 @@ YAMLProfileWriter::convertPseudoProbeDesc(const MCPseudoProbeDecoder &Decoder) { return {Desc, InlineTree}; } -std::vector<yaml::bolt::PseudoProbeInfo> -YAMLProfileWriter::convertNodeProbes(NodeIdToProbes &NodeProbes) { - struct BlockProbeInfoHasher { - size_t operator()(const yaml::bolt::PseudoProbeInfo &BPI) const { - return llvm::hash_combine(llvm::hash_combine_range(BPI.BlockProbes), - llvm::hash_combine_range(BPI.CallProbes), - llvm::hash_combine_range(BPI.IndCallProbes)); +void YAMLProfileWriter::BlockProbeCtx::addBlockProbe( + const InlineTreeMapTy &Map, const MCDecodedPseudoProbe &Probe, + uint32_t ProbeOffset) { + auto It = Map.find(Probe.getInlineTreeNode()); + if (It == Map.end()) + return; + auto NodeId = It->second; + uint32_t Index = Probe.getIndex(); + if (Probe.isCall()) + CallProbes[ProbeOffset] = + Call{Index, NodeId, Probe.isIndirectCall(), false}; + else + NodeToProbes[NodeId].emplace_back(Index); +} + +void YAMLProfileWriter::BlockProbeCtx::finalize( + yaml::bolt::BinaryBasicBlockProfile &YamlBB) { + // Hash block probes by vector + struct ProbeHasher { + size_t operator()(const ArrayRef<uint64_t> Probes) const { + return llvm::hash_combine_range(Probes); } }; - // Check identical BlockProbeInfo structs and merge them - std::unordered_map<yaml::bolt::PseudoProbeInfo, std::vector<uint32_t>, - BlockProbeInfoHasher> - BPIToNodes; - for (auto &[NodeId, Probes] : NodeProbes) { - yaml::bolt::PseudoProbeInfo BPI; - BPI.BlockProbes = std::vector(Probes[0].begin(), Probes[0].end()); - BPI.IndCallProbes = std::vector(Probes[1].begin(), Probes[1].end()); - BPI.CallProbes = std::vector(Probes[2].begin(), Probes[2].end()); - BPIToNodes[BPI].push_back(NodeId); + // Check identical block probes and merge them + std::unordered_map<std::vector<uint64_t>, std::vector<uint32_t>, ProbeHasher> + ProbesToNodes; + for (auto &[NodeId, Probes] : NodeToProbes) { + llvm::sort(Probes); + ProbesToNodes[Probes].emplace_back(NodeId); } - - auto handleMask = [](const auto &Ids, auto &Vec, auto &Mask) { - for (auto Id : Ids) - if (Id > 64) - Vec.emplace_back(Id); - else - Mask |= 1ull << (Id - 1); - }; - - // Add to YAML with merged nodes/block mask optimizations - std::vector<yaml::bolt::PseudoProbeInfo> YamlProbes; - YamlProbes.reserve(BPIToNodes.size()); - for (const auto &[BPI, Nodes] : BPIToNodes) { - auto &YamlBPI = YamlProbes.emplace_back(yaml::bolt::PseudoProbeInfo()); - YamlBPI.CallProbes = BPI.CallProbes; - YamlBPI.IndCallProbes = BPI.IndCallProbes; - if (Nodes.size() == 1) - YamlBPI.InlineTreeIndex = Nodes.front(); - else - YamlBPI.InlineTreeNodes = Nodes; - handleMask(BPI.BlockProbes, YamlBPI.BlockProbes, YamlBPI.BlockMask); + for (auto &[Probes, Nodes] : ProbesToNodes) { + llvm::sort(Nodes); + YamlBB.PseudoProbes.emplace_back( + yaml::bolt::PseudoProbeInfo{Probes, Nodes}); + } + for (yaml::bolt::CallSiteInfo &CSI : YamlBB.CallSites) { + auto It = CallProbes.find(CSI.Offset); + if (It == CallProbes.end()) + continue; + Call &Probe = It->second; + CSI.Probe = Probe.Id; + CSI.InlineTreeNode = Probe.Node; + CSI.Indirect = Probe.Indirect; + Probe.Used = true; + } + for (const auto &[Offset, Probe] : CallProbes) { + if (Probe.Used) + continue; + yaml::bolt::CallSiteInfo CSI; + CSI.Offset = Offset; + CSI.Probe = Probe.Id; + CSI.InlineTreeNode = Probe.Node; + CSI.Indirect = Probe.Indirect; + YamlBB.CallSites.emplace_back(CSI); } - return YamlProbes; } std::tuple<std::vector<yaml::bolt::InlineTreeNode>, @@ -343,12 +355,13 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS, const AddressProbesMap &ProbeMap = PseudoProbeDecoder->getAddress2ProbesMap(); const uint64_t FuncAddr = BF.getAddress(); - const std::pair<uint64_t, uint64_t> &BlockRange = - BB->getInputAddressRange(); - const std::pair<uint64_t, uint64_t> BlockAddrRange = { - FuncAddr + BlockRange.first, FuncAddr + BlockRange.second}; - auto Probes = ProbeMap.find(BlockAddrRange.first, BlockAddrRange.second); - YamlBB.PseudoProbes = writeBlockProbes(Probes, InlineTreeNodeId); + auto [Start, End] = BB->getInputAddressRange(); + Start += FuncAddr; + End += FuncAddr; + BlockProbeCtx Ctx; + for (const MCDecodedPseudoProbe &Probe : ProbeMap.find(Start, End)) + Ctx.addBlockProbe(InlineTreeNodeId, Probe, Probe.getAddress() - Start); + Ctx.finalize(YamlBB); } YamlBF.Blocks.emplace_back(YamlBB); diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp index 1a0f6d7..85f23dc 100644 --- a/bolt/lib/Rewrite/BinaryPassManager.cpp +++ b/bolt/lib/Rewrite/BinaryPassManager.cpp @@ -19,15 +19,15 @@ #include "bolt/Passes/IdenticalCodeFolding.h" #include "bolt/Passes/IndirectCallPromotion.h" #include "bolt/Passes/Inliner.h" -#include "bolt/Passes/InsertNegateRAStatePass.h" #include "bolt/Passes/Instrumentation.h" #include "bolt/Passes/JTFootprintReduction.h" #include "bolt/Passes/LongJmp.h" #include "bolt/Passes/LoopInversionPass.h" #include "bolt/Passes/MCF.h" -#include "bolt/Passes/MarkRAStates.h" #include "bolt/Passes/PLTCall.h" #include "bolt/Passes/PatchEntries.h" +#include "bolt/Passes/PointerAuthCFIAnalyzer.h" +#include "bolt/Passes/PointerAuthCFIFixup.h" #include "bolt/Passes/ProfileQualityStats.h" #include "bolt/Passes/RegReAssign.h" #include "bolt/Passes/ReorderData.h" @@ -134,6 +134,15 @@ static cl::opt<bool> PrintAArch64Relaxation( cl::desc("print functions after ADR/LDR Relaxation pass"), cl::Hidden, cl::cat(BoltOptCategory)); +cl::opt<bool> PrintPAuthCFIAnalyzer( + "print-pointer-auth-cfi-analyzer", + cl::desc("print functions after PointerAuthCFIAnalyzer pass"), cl::Hidden, + cl::cat(BoltOptCategory)); +static cl::opt<bool> PrintPAuthCFIFixup( + "print-pointer-auth-cfi-fixup", + cl::desc("print functions after PointerAuthCFIFixup pass"), cl::Hidden, + cl::cat(BoltOptCategory)); + static cl::opt<bool> PrintLongJmp("print-longjmp", cl::desc("print functions after longjmp pass"), cl::Hidden, @@ -362,7 +371,8 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { BinaryFunctionPassManager Manager(BC); if (BC.isAArch64()) - Manager.registerPass(std::make_unique<MarkRAStates>()); + Manager.registerPass( + std::make_unique<PointerAuthCFIAnalyzer>(PrintPAuthCFIAnalyzer)); Manager.registerPass( std::make_unique<EstimateEdgeCounts>(PrintEstimateEdgeCounts)); @@ -524,7 +534,8 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { // relocations out of range and crash during linking. Manager.registerPass(std::make_unique<LongJmpPass>(PrintLongJmp)); - Manager.registerPass(std::make_unique<InsertNegateRAState>()); + Manager.registerPass( + std::make_unique<PointerAuthCFIFixup>(PrintPAuthCFIFixup)); } // This pass should always run last.* diff --git a/bolt/lib/Rewrite/CMakeLists.txt b/bolt/lib/Rewrite/CMakeLists.txt index 5b15edc..bc1b2ed 100644 --- a/bolt/lib/Rewrite/CMakeLists.txt +++ b/bolt/lib/Rewrite/CMakeLists.txt @@ -24,6 +24,7 @@ add_llvm_library(LLVMBOLTRewrite BuildIDRewriter.cpp PseudoProbeRewriter.cpp RewriteInstance.cpp + RSeqRewriter.cpp SDTRewriter.cpp GNUPropertyRewriter.cpp diff --git a/bolt/lib/Rewrite/DWARFRewriter.cpp b/bolt/lib/Rewrite/DWARFRewriter.cpp index 5e3fa93..816acb2 100644 --- a/bolt/lib/Rewrite/DWARFRewriter.cpp +++ b/bolt/lib/Rewrite/DWARFRewriter.cpp @@ -1723,7 +1723,76 @@ StringRef getSectionName(const SectionRef &Section) { return Name; } -// Extracts an appropriate slice if input is DWP. +/// Extracts the slice of the .debug_str.dwo section for a given CU from a DWP +/// file, based on the .debug_str_offsets.dwo section. This helps address DWO +/// bloat that may occur after updates. +/// +/// A slice of .debug_str.dwo may be composed of several non-contiguous +/// fragments. These non-contiguous string views will be written out +/// sequentially, avoiding the copying overhead caused by assembling them. +/// +/// The .debug_str_offsets for the first CU often does not need to be updated, +/// so copying is only performed when .debug_str_offsets requires updating. +static void UpdateStrAndStrOffsets(StringRef StrDWOContent, + StringRef StrOffsetsContent, + SmallVectorImpl<StringRef> &StrDWOOutData, + std::string &StrOffsetsOutData, + unsigned DwarfVersion, bool IsLittleEndian) { + const llvm::endianness Endian = + IsLittleEndian ? llvm::endianness::little : llvm::endianness::big; + const uint64_t HeaderOffset = (DwarfVersion >= 5) ? 8 : 0; + constexpr size_t SizeOfOffset = sizeof(int32_t); + const uint64_t NumOffsets = + (StrOffsetsContent.size() - HeaderOffset) / SizeOfOffset; + + DataExtractor Extractor(StrOffsetsContent, IsLittleEndian, 0); + uint64_t ExtractionOffset = HeaderOffset; + + using StringFragment = DWARFUnitIndex::Entry::SectionContribution; + const auto getStringLength = [](StringRef Content, + uint64_t Offset) -> uint64_t { + size_t NullPos = Content.find('\0', Offset); + return (NullPos != StringRef::npos) ? (NullPos - Offset + 1) : 0; + }; + const auto isContiguous = [](const StringFragment &Fragment, + uint64_t NextOffset) -> bool { + return NextOffset == Fragment.getOffset() + Fragment.getLength(); + }; + std::optional<StringFragment> CurrentFragment; + uint64_t AccumulatedStrLen = 0; + for (uint64_t I = 0; I < NumOffsets; ++I) { + const uint64_t StrOffset = Extractor.getU32(&ExtractionOffset); + const uint64_t StringLength = getStringLength(StrDWOContent, StrOffset); + if (!CurrentFragment) { + // First init. + CurrentFragment = StringFragment(StrOffset, StringLength); + } else { + if (isContiguous(*CurrentFragment, StrOffset)) { + // Expanding the current fragment. + CurrentFragment->setLength(CurrentFragment->getLength() + StringLength); + } else { + // Saving the current fragment and start a new one. + StrDWOOutData.push_back(StrDWOContent.substr( + CurrentFragment->getOffset(), CurrentFragment->getLength())); + CurrentFragment = StringFragment(StrOffset, StringLength); + } + } + if (AccumulatedStrLen != StrOffset) { + // Updating str offsets. + if (StrOffsetsOutData.empty()) + StrOffsetsOutData = StrOffsetsContent.str(); + llvm::support::endian::write32( + &StrOffsetsOutData[HeaderOffset + I * SizeOfOffset], + static_cast<uint32_t>(AccumulatedStrLen), Endian); + } + AccumulatedStrLen += StringLength; + } + if (CurrentFragment) + StrDWOOutData.push_back(StrDWOContent.substr(CurrentFragment->getOffset(), + CurrentFragment->getLength())); +} + +// Exctracts an appropriate slice if input is DWP. // Applies patches or overwrites the section. std::optional<StringRef> updateDebugData( DWARFContext &DWCtx, StringRef SectionName, StringRef SectionContents, @@ -1772,6 +1841,8 @@ std::optional<StringRef> updateDebugData( errs() << "BOLT-WARNING: unsupported debug section: " << SectionName << "\n"; if (StrWriter.isInitialized()) { + if (CUDWOEntry) + return StrWriter.getBufferStr(); OutputBuffer = StrWriter.releaseBuffer(); return StringRef(reinterpret_cast<const char *>(OutputBuffer->data()), OutputBuffer->size()); @@ -1786,6 +1857,8 @@ std::optional<StringRef> updateDebugData( } case DWARFSectionKind::DW_SECT_STR_OFFSETS: { if (StrOffstsWriter.isFinalized()) { + if (CUDWOEntry) + return StrOffstsWriter.getBufferStr(); OutputBuffer = StrOffstsWriter.releaseBuffer(); return StringRef(reinterpret_cast<const char *>(OutputBuffer->data()), OutputBuffer->size()); @@ -1888,6 +1961,10 @@ void DWARFRewriter::writeDWOFiles( } } + StringRef StrDWOContent; + StringRef StrOffsetsContent; + llvm::SmallVector<StringRef, 3> StrDWOOutData; + std::string StrOffsetsOutData; for (const SectionRef &Section : File->sections()) { std::unique_ptr<DebugBufferVector> OutputData; StringRef SectionName = getSectionName(Section); @@ -1895,11 +1972,50 @@ void DWARFRewriter::writeDWOFiles( continue; Expected<StringRef> ContentsExp = Section.getContents(); assert(ContentsExp && "Invalid contents."); + if (IsDWP && SectionName == "debug_str.dwo") { + if (StrWriter.isInitialized()) + StrDWOContent = StrWriter.getBufferStr(); + else + StrDWOContent = *ContentsExp; + continue; + } if (std::optional<StringRef> OutData = updateDebugData( (*DWOCU)->getContext(), SectionName, *ContentsExp, KnownSections, *Streamer, *this, CUDWOEntry, DWOId, OutputData, RangeListssWriter, - LocWriter, StrOffstsWriter, StrWriter, OverridenSections)) + LocWriter, StrOffstsWriter, StrWriter, OverridenSections)) { + if (IsDWP && SectionName == "debug_str_offsets.dwo") { + StrOffsetsContent = *OutData; + continue; + } Streamer->emitBytes(*OutData); + } + } + + if (IsDWP) { + // Handling both .debug_str.dwo and .debug_str_offsets.dwo concurrently. In + // the original DWP, .debug_str is a deduplicated global table, and the + // .debug_str.dwo slice for a single CU needs to be extracted according to + // .debug_str_offsets.dwo. + UpdateStrAndStrOffsets(StrDWOContent, StrOffsetsContent, StrDWOOutData, + StrOffsetsOutData, CU.getVersion(), + (*DWOCU)->getContext().isLittleEndian()); + auto SectionIter = KnownSections.find("debug_str.dwo"); + if (SectionIter != KnownSections.end()) { + Streamer->switchSection(SectionIter->second.first); + for (size_t i = 0; i < StrDWOOutData.size(); ++i) { + StringRef OutData = StrDWOOutData[i]; + if (!OutData.empty()) + Streamer->emitBytes(OutData); + } + } + SectionIter = KnownSections.find("debug_str_offsets.dwo"); + if (SectionIter != KnownSections.end()) { + Streamer->switchSection(SectionIter->second.first); + if (!StrOffsetsOutData.empty()) + Streamer->emitBytes(StrOffsetsOutData); + else + Streamer->emitBytes(StrOffsetsContent); + } } Streamer->finish(); TempOut->keep(); diff --git a/bolt/lib/Rewrite/RSeqRewriter.cpp b/bolt/lib/Rewrite/RSeqRewriter.cpp new file mode 100644 index 0000000..46bce66 --- /dev/null +++ b/bolt/lib/Rewrite/RSeqRewriter.cpp @@ -0,0 +1,72 @@ +//===- bolt/Rewrite/RSeqRewriter.cpp --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Basic support for restartable sequences used by tcmalloc. Prevent critical +// section overrides by ignoring optimizations in containing functions. +// +// References: +// * https://google.github.io/tcmalloc/rseq.html +// * tcmalloc/internal/percpu_rseq_x86_64.S +// +//===----------------------------------------------------------------------===// + +#include "bolt/Core/BinaryFunction.h" +#include "bolt/Rewrite/MetadataRewriter.h" +#include "bolt/Rewrite/MetadataRewriters.h" +#include "llvm/Support/Errc.h" + +using namespace llvm; +using namespace bolt; + +namespace { + +class RSeqRewriter final : public MetadataRewriter { +public: + RSeqRewriter(StringRef Name, BinaryContext &BC) + : MetadataRewriter(Name, BC) {} + + Error preCFGInitializer() override { + for (const BinarySection &Section : BC.allocatableSections()) { + if (Section.getName() != "__rseq_cs") + continue; + + auto handleRelocation = [&](const Relocation &Rel, bool IsDynamic) { + BinaryFunction *BF = nullptr; + if (Rel.Symbol) + BF = BC.getFunctionForSymbol(Rel.Symbol); + else if (Relocation::isRelative(Rel.Type)) + BF = BC.getBinaryFunctionContainingAddress(Rel.Addend); + + if (!BF) { + BC.errs() << "BOLT-WARNING: no function found matching " + << (IsDynamic ? "dynamic " : "") + << "relocation in __rseq_cs\n"; + } else if (!BF->isIgnored()) { + BC.outs() << "BOLT-INFO: restartable sequence reference detected in " + << *BF << ". Function will not be optimized\n"; + BF->setIgnored(); + } + }; + + for (const Relocation &Rel : Section.dynamicRelocations()) + handleRelocation(Rel, /*IsDynamic*/ true); + + for (const Relocation &Rel : Section.relocations()) + handleRelocation(Rel, /*IsDynamic*/ false); + } + + return Error::success(); + } +}; + +} // namespace + +std::unique_ptr<MetadataRewriter> +llvm::bolt::createRSeqRewriter(BinaryContext &BC) { + return std::make_unique<RSeqRewriter>("rseq-cs-rewriter", BC); +} diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 77e5688..0e14500 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -80,8 +80,11 @@ namespace opts { extern cl::list<std::string> HotTextMoveSections; extern cl::opt<bool> Hugify; extern cl::opt<bool> Instrument; +extern cl::opt<uint32_t> InstrumentationSleepTime; extern cl::opt<bool> KeepNops; extern cl::opt<bool> Lite; +extern cl::list<std::string> PrintOnly; +extern cl::opt<std::string> PrintOnlyFile; extern cl::list<std::string> ReorderData; extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions; extern cl::opt<bool> TerminalHLT; @@ -292,10 +295,31 @@ cl::bits<GadgetScannerKind> GadgetScannersToRun( clEnumValN(GS_ALL, "all", "All implemented scanners")), cl::ZeroOrMore, cl::CommaSeparated, cl::cat(BinaryAnalysisCategory)); +// Primary targets for hooking runtime library initialization hooking +// with fallback to next item in case if current item is not available +// in the input binary. +enum RuntimeLibInitHookTarget : char { + RLIH_ENTRY_POINT = 0, /// Use ELF Header Entry Point + RLIH_INIT = 1, /// Use ELF DT_INIT entry + RLIH_INIT_ARRAY = 2, /// Use ELF .init_array entry +}; + +cl::opt<RuntimeLibInitHookTarget> RuntimeLibInitHook( + "runtime-lib-init-hook", + cl::desc("Primary target for hooking runtime library initialization, used " + "in fallback order of availabiliy in input binary (entry_point -> " + "init -> init_array) (default: entry_point)"), + cl::Hidden, cl::init(RLIH_ENTRY_POINT), + cl::values(clEnumValN(RLIH_ENTRY_POINT, "entry_point", + "use ELF Header Entry Point"), + clEnumValN(RLIH_INIT, "init", "use ELF DT_INIT entry"), + clEnumValN(RLIH_INIT_ARRAY, "init_array", + "use ELF .init_array entry")), + cl::ZeroOrMore, cl::cat(BoltOptCategory)); + } // namespace opts // FIXME: implement a better way to mark sections for replacement. -constexpr const char *RewriteInstance::SectionsToOverwrite[]; std::vector<std::string> RewriteInstance::DebugSectionsToOverwrite = { ".debug_abbrev", ".debug_aranges", ".debug_line", ".debug_line_str", ".debug_loc", ".debug_loclists", ".debug_ranges", ".debug_rnglists", @@ -731,6 +755,8 @@ Error RewriteInstance::run() { << "\n"; BC->outs() << "BOLT-INFO: BOLT version: " << BoltRevision << "\n"; + selectFunctionsToPrint(); + if (Error E = discoverStorage()) return E; if (Error E = readSpecialSections()) @@ -738,9 +764,12 @@ Error RewriteInstance::run() { adjustCommandLineOptions(); discoverFileObjects(); - if (opts::Instrument && !BC->IsStaticExecutable) + if (opts::Instrument && !BC->IsStaticExecutable) { + if (Error E = discoverRtInitAddress()) + return E; if (Error E = discoverRtFiniAddress()) return E; + } preprocessProfileData(); @@ -782,8 +811,12 @@ Error RewriteInstance::run() { updateMetadata(); - if (opts::Instrument && !BC->IsStaticExecutable) - updateRtFiniReloc(); + if (opts::Instrument && !BC->IsStaticExecutable) { + if (Error E = updateRtInitReloc()) + return E; + if (Error E = updateRtFiniReloc()) + return E; + } if (opts::OutputFilename == "/dev/null") { BC->outs() << "BOLT-INFO: skipping writing final binary to disk\n"; @@ -1408,6 +1441,65 @@ void RewriteInstance::discoverBOLTReserved() { NextAvailableAddress = BC->BOLTReserved.start(); } +Error RewriteInstance::discoverRtInitAddress() { + if (BC->HasInterpHeader && opts::RuntimeLibInitHook == opts::RLIH_ENTRY_POINT) + return Error::success(); + + // Use DT_INIT if it's available. + if (BC->InitAddress && opts::RuntimeLibInitHook <= opts::RLIH_INIT) { + BC->StartFunctionAddress = BC->InitAddress; + return Error::success(); + } + + if (!BC->InitArrayAddress || !BC->InitArraySize) { + return createStringError(std::errc::not_supported, + "Instrumentation of shared library needs either " + "DT_INIT or DT_INIT_ARRAY"); + } + + if (*BC->InitArraySize < BC->AsmInfo->getCodePointerSize()) { + return createStringError(std::errc::not_supported, + "Need at least 1 DT_INIT_ARRAY slot"); + } + + ErrorOr<BinarySection &> InitArraySection = + BC->getSectionForAddress(*BC->InitArrayAddress); + if (auto EC = InitArraySection.getError()) + return errorCodeToError(EC); + + if (InitArraySection->getAddress() != *BC->InitArrayAddress) { + return createStringError(std::errc::not_supported, + "Inconsistent address of .init_array section"); + } + + if (const Relocation *Reloc = InitArraySection->getDynamicRelocationAt(0)) { + if (Reloc->isRelative()) { + BC->StartFunctionAddress = Reloc->Addend; + } else { + MCSymbol *Sym = Reloc->Symbol; + if (!Sym) + return createStringError( + std::errc::not_supported, + "Failed to locate symbol for 0 entry of .init_array"); + const BinaryFunction *BF = BC->getFunctionForSymbol(Sym); + if (!BF) + return createStringError( + std::errc::not_supported, + "Failed to locate binary function for 0 entry of .init_array"); + BC->StartFunctionAddress = BF->getAddress() + Reloc->Addend; + } + return Error::success(); + } + + if (const Relocation *Reloc = InitArraySection->getRelocationAt(0)) { + BC->StartFunctionAddress = Reloc->Value; + return Error::success(); + } + + return createStringError(std::errc::not_supported, + "No relocation for first DT_INIT_ARRAY slot"); +} + Error RewriteInstance::discoverRtFiniAddress() { // Use DT_FINI if it's available. if (BC->FiniAddress) { @@ -1416,6 +1508,9 @@ Error RewriteInstance::discoverRtFiniAddress() { } if (!BC->FiniArrayAddress || !BC->FiniArraySize) { + // Missing fini hooks are allowed when instrumentation-sleep-time is in use. + if (opts::InstrumentationSleepTime > 0) + return Error::success(); return createStringError( std::errc::not_supported, "Instrumentation needs either DT_FINI or DT_FINI_ARRAY"); @@ -1431,6 +1526,11 @@ Error RewriteInstance::discoverRtFiniAddress() { if (auto EC = FiniArraySection.getError()) return errorCodeToError(EC); + if (FiniArraySection->getAddress() != *BC->FiniArrayAddress) { + return createStringError(std::errc::not_supported, + "Inconsistent address of .fini_array section"); + } + if (const Relocation *Reloc = FiniArraySection->getDynamicRelocationAt(0)) { BC->FiniFunctionAddress = Reloc->Addend; return Error::success(); @@ -1445,26 +1545,99 @@ Error RewriteInstance::discoverRtFiniAddress() { "No relocation for first DT_FINI_ARRAY slot"); } -void RewriteInstance::updateRtFiniReloc() { +Error RewriteInstance::updateRtInitReloc() { + if (BC->HasInterpHeader && opts::RuntimeLibInitHook == opts::RLIH_ENTRY_POINT) + return Error::success(); + + // Updating DT_INIT is handled by patchELFDynamic. + if (BC->InitAddress && opts::RuntimeLibInitHook <= opts::RLIH_INIT) + return Error::success(); + + const RuntimeLibrary *RT = BC->getRuntimeLibrary(); + if (!RT || !RT->getRuntimeStartAddress()) + return Error::success(); + + if (!BC->InitArrayAddress) + return Error::success(); + + if (!BC->InitArrayAddress || !BC->InitArraySize) + return createStringError(std::errc::not_supported, + "inconsistent .init_array state"); + + ErrorOr<BinarySection &> InitArraySection = + BC->getSectionForAddress(*BC->InitArrayAddress); + if (!InitArraySection) + return createStringError(std::errc::not_supported, ".init_array removed"); + + if (std::optional<Relocation> Reloc = + InitArraySection->takeDynamicRelocationAt(0)) { + if (Reloc->isRelative()) { + if (Reloc->Addend != BC->StartFunctionAddress) + return createStringError(std::errc::not_supported, + "inconsistent .init_array dynamic relocation"); + Reloc->Addend = RT->getRuntimeStartAddress(); + InitArraySection->addDynamicRelocation(*Reloc); + } else { + MCSymbol *Sym = Reloc->Symbol; + if (!Sym) + return createStringError( + std::errc::not_supported, + "Failed to locate symbol for 0 entry of .init_array"); + const BinaryFunction *BF = BC->getFunctionForSymbol(Sym); + if (!BF) + return createStringError( + std::errc::not_supported, + "Failed to locate binary function for 0 entry of .init_array"); + if (BF->getAddress() + Reloc->Addend != BC->StartFunctionAddress) + return createStringError(std::errc::not_supported, + "inconsistent .init_array dynamic relocation"); + InitArraySection->addDynamicRelocation(Relocation{ + /*Offset*/ 0, /*Symbol*/ nullptr, /*Type*/ Relocation::getAbs64(), + /*Addend*/ RT->getRuntimeStartAddress(), /*Value*/ 0}); + } + } + // Update the static relocation by adding a pending relocation which will get + // patched when flushPendingRelocations is called in rewriteFile. Note that + // flushPendingRelocations will calculate the value to patch as + // "Symbol + Addend". Since we don't have a symbol, just set the addend to the + // desired value. + InitArraySection->addPendingRelocation(Relocation{ + /*Offset*/ 0, /*Symbol*/ nullptr, /*Type*/ Relocation::getAbs64(), + /*Addend*/ RT->getRuntimeStartAddress(), /*Value*/ 0}); + BC->outs() + << "BOLT-INFO: runtime library initialization was hooked via .init_array " + "entry, set to 0x" + << Twine::utohexstr(RT->getRuntimeStartAddress()) << "\n"; + return Error::success(); +} + +Error RewriteInstance::updateRtFiniReloc() { // Updating DT_FINI is handled by patchELFDynamic. if (BC->FiniAddress) - return; + return Error::success(); const RuntimeLibrary *RT = BC->getRuntimeLibrary(); if (!RT || !RT->getRuntimeFiniAddress()) - return; + return Error::success(); - assert(BC->FiniArrayAddress && BC->FiniArraySize && - "inconsistent .fini_array state"); + if (!BC->FiniArrayAddress || !BC->FiniArraySize) { + // Missing fini hooks are allowed when instrumentation-sleep-time is in use. + if (opts::InstrumentationSleepTime > 0) + return Error::success(); + return createStringError(std::errc::not_supported, + "inconsistent .fini_array state"); + } ErrorOr<BinarySection &> FiniArraySection = BC->getSectionForAddress(*BC->FiniArrayAddress); - assert(FiniArraySection && ".fini_array removed"); + if (!FiniArraySection) + return createStringError(std::errc::not_supported, ".fini_array removed"); if (std::optional<Relocation> Reloc = FiniArraySection->takeDynamicRelocationAt(0)) { - assert(Reloc->Addend == BC->FiniFunctionAddress && - "inconsistent .fini_array dynamic relocation"); + if (Reloc->Addend != BC->FiniFunctionAddress) + return createStringError(std::errc::not_supported, + "inconsistent .fini_array dynamic relocation"); Reloc->Addend = RT->getRuntimeFiniAddress(); FiniArraySection->addDynamicRelocation(*Reloc); } @@ -1477,6 +1650,10 @@ void RewriteInstance::updateRtFiniReloc() { FiniArraySection->addPendingRelocation(Relocation{ /*Offset*/ 0, /*Symbol*/ nullptr, /*Type*/ Relocation::getAbs64(), /*Addend*/ RT->getRuntimeFiniAddress(), /*Value*/ 0}); + BC->outs() << "BOLT-INFO: runtime library finalization was hooked via " + ".fini_array entry, set to 0x" + << Twine::utohexstr(RT->getRuntimeFiniAddress()) << "\n"; + return Error::success(); } void RewriteInstance::registerFragments() { @@ -2075,7 +2252,7 @@ Error RewriteInstance::readSpecialSections() { if (BC->IsStripped && !opts::AllowStripped) { BC->errs() << "BOLT-ERROR: stripped binaries are not supported. If you know " - "what you're doing, use --allow-stripped to proceed"; + "what you're doing, use --allow-stripped to proceed\n"; exit(1); } @@ -2175,6 +2352,14 @@ void RewriteInstance::adjustCommandLineOptions() { exit(1); } + if (opts::Instrument && opts::RuntimeLibInitHook == opts::RLIH_ENTRY_POINT && + !BC->HasInterpHeader) { + BC->errs() + << "BOLT-WARNING: adjusted runtime-lib-init-hook to 'init' due to " + "absence of INTERP header\n"; + opts::RuntimeLibInitHook = opts::RLIH_INIT; + } + if (opts::HotText && opts::HotTextMoveSections.getNumOccurrences() == 0) { opts::HotTextMoveSections.addValue(".stub"); opts::HotTextMoveSections.addValue(".mover"); @@ -2955,8 +3140,10 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection, // if-condition above) so we're handling a relocation from a function // to itself. RISC-V uses such relocations for branches, for example. // These should not be registered as externally references offsets. - if (!ContainingBF) - ReferencedBF->registerReferencedOffset(RefFunctionOffset); + if (!ContainingBF && !ReferencedBF->isInConstantIsland(Address)) { + ReferencedBF->registerInternalRefDataRelocation(RefFunctionOffset, + Rel.getOffset()); + } } if (opts::Verbosity > 1 && BinarySection(*BC, RelocatedSection).isWritable()) @@ -3099,17 +3286,22 @@ static BinaryFunction *getInitFunctionIfStaticBinary(BinaryContext &BC) { return BC.getBinaryFunctionAtAddress(BD->getAddress()); } +static void populateFunctionNames(cl::opt<std::string> &FunctionNamesFile, + cl::list<std::string> &FunctionNames) { + if (FunctionNamesFile.empty()) + return; + std::ifstream FuncsFile(FunctionNamesFile, std::ios::in); + std::string FuncName; + while (std::getline(FuncsFile, FuncName)) + FunctionNames.push_back(FuncName); +} + +void RewriteInstance::selectFunctionsToPrint() { + populateFunctionNames(opts::PrintOnlyFile, opts::PrintOnly); +} + void RewriteInstance::selectFunctionsToProcess() { // Extend the list of functions to process or skip from a file. - auto populateFunctionNames = [](cl::opt<std::string> &FunctionNamesFile, - cl::list<std::string> &FunctionNames) { - if (FunctionNamesFile.empty()) - return; - std::ifstream FuncsFile(FunctionNamesFile, std::ios::in); - std::string FuncName; - while (std::getline(FuncsFile, FuncName)) - FunctionNames.push_back(FuncName); - }; populateFunctionNames(opts::FunctionNamesFile, opts::ForceFunctionNames); populateFunctionNames(opts::SkipFunctionNamesFile, opts::SkipFunctionNames); populateFunctionNames(opts::FunctionNamesFileNR, opts::ForceFunctionNamesNR); @@ -3345,6 +3537,8 @@ void RewriteInstance::initializeMetadataManager() { MetadataManager.registerRewriter(createPseudoProbeRewriter(*BC)); + MetadataManager.registerRewriter(createRSeqRewriter(*BC)); + MetadataManager.registerRewriter(createSDTRewriter(*BC)); MetadataManager.registerRewriter(createGNUPropertyRewriter(*BC)); @@ -3495,6 +3689,7 @@ void RewriteInstance::disassembleFunctions() { if (!shouldDisassemble(Function)) continue; + Function.validateInternalBranches(); Function.postProcessEntryPoints(); Function.postProcessJumpTables(); } @@ -4837,9 +5032,14 @@ void RewriteInstance::patchELFSectionHeaderTable(ELFObjectFile<ELFT> *File) { ELFEhdrTy NewEhdr = Obj.getHeader(); if (BC->HasRelocations) { - if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) + RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary(); + if (RtLibrary && opts::RuntimeLibInitHook == opts::RLIH_ENTRY_POINT) { NewEhdr.e_entry = RtLibrary->getRuntimeStartAddress(); - else + BC->outs() + << "BOLT-INFO: runtime library initialization was hooked via ELF " + "Header Entry Point, set to 0x" + << Twine::utohexstr(NewEhdr.e_entry) << "\n"; + } else NewEhdr.e_entry = getNewFunctionAddress(NewEhdr.e_entry); assert((NewEhdr.e_entry || !Obj.getHeader().e_entry) && "cannot find new address for entry point"); @@ -5680,14 +5880,23 @@ void RewriteInstance::patchELFDynamic(ELFObjectFile<ELFT> *File) { } RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary(); if (RtLibrary && Dyn.getTag() == ELF::DT_FINI) { - if (uint64_t Addr = RtLibrary->getRuntimeFiniAddress()) + if (uint64_t Addr = RtLibrary->getRuntimeFiniAddress()) { NewDE.d_un.d_ptr = Addr; + BC->outs() + << "BOLT-INFO: runtime library finalization was hooked via " + "DT_FINI, set to 0x" + << Twine::utohexstr(Addr) << "\n"; + } } - if (RtLibrary && Dyn.getTag() == ELF::DT_INIT && !BC->HasInterpHeader) { + if (RtLibrary && Dyn.getTag() == ELF::DT_INIT && + (!BC->HasInterpHeader || + opts::RuntimeLibInitHook == opts::RLIH_INIT)) { if (auto Addr = RtLibrary->getRuntimeStartAddress()) { - LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set DT_INIT to 0x" - << Twine::utohexstr(Addr) << '\n'); NewDE.d_un.d_ptr = Addr; + BC->outs() + << "BOLT-INFO: runtime library initialization was hooked via " + "DT_INIT, set to 0x" + << Twine::utohexstr(Addr) << "\n"; } } break; @@ -5755,10 +5964,13 @@ Error RewriteInstance::readELFDynamic(ELFObjectFile<ELFT> *File) { for (const Elf_Dyn &Dyn : DynamicEntries) { switch (Dyn.d_tag) { case ELF::DT_INIT: - if (!BC->HasInterpHeader) { - LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set start function address\n"); - BC->StartFunctionAddress = Dyn.getPtr(); - } + BC->InitAddress = Dyn.getPtr(); + break; + case ELF::DT_INIT_ARRAY: + BC->InitArrayAddress = Dyn.getPtr(); + break; + case ELF::DT_INIT_ARRAYSZ: + BC->InitArraySize = Dyn.getPtr(); break; case ELF::DT_FINI: BC->FiniAddress = Dyn.getPtr(); diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp index 3c77091..5881d3f 100644 --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -164,11 +164,53 @@ public: bool isPush(const MCInst &Inst) const override { return isStoreToStack(Inst); - }; + } bool isPop(const MCInst &Inst) const override { return isLoadFromStack(Inst); - }; + } + + // We look for instructions that load from stack or make stack pointer + // adjustment, and assume the basic block is an epilogue if and only if + // such instructions are present and also immediately precede the branch + // instruction that ends the basic block. + bool isEpilogue(const BinaryBasicBlock &BB) const override { + if (BB.succ_size()) + return false; + + bool SeenLoadFromStack = false; + bool SeenStackPointerAdjustment = false; + for (const MCInst &Instr : BB) { + // Skip CFI pseudo instruction. + if (isCFI(Instr)) + continue; + + bool IsPop = isPop(Instr); + // A load from stack instruction could do SP adjustment in pre-index or + // post-index form, which we can skip to check for epilogue recognition + // purpose. + bool IsSPAdj = (isADD(Instr) || isMOVW(Instr)) && + Instr.getOperand(0).isReg() && + Instr.getOperand(0).getReg() == AArch64::SP; + SeenLoadFromStack |= IsPop; + SeenStackPointerAdjustment |= IsSPAdj; + + if (!SeenLoadFromStack && !SeenStackPointerAdjustment) + continue; + if (IsPop || IsSPAdj || isPAuthOnLR(Instr)) + continue; + if (isReturn(Instr)) + return true; + if (isBranch(Instr)) + break; + + // Any previously seen load from stack or stack adjustment instruction + // is definitely not part of epilogue code sequence, so reset these two. + SeenLoadFromStack = false; + SeenStackPointerAdjustment = false; + } + return SeenLoadFromStack || SeenStackPointerAdjustment; + } void createCall(MCInst &Inst, const MCSymbol *Target, MCContext *Ctx) override { @@ -271,6 +313,33 @@ public: Inst.getOpcode() == AArch64::RETABSPPCr; } + void createMatchingAuth(const MCInst &AuthAndRet, MCInst &Auth) override { + Auth.clear(); + Auth.setOperands(AuthAndRet.getOperands()); + switch (AuthAndRet.getOpcode()) { + case AArch64::RETAA: + Auth.setOpcode(AArch64::AUTIASP); + break; + case AArch64::RETAB: + Auth.setOpcode(AArch64::AUTIBSP); + break; + case AArch64::RETAASPPCi: + Auth.setOpcode(AArch64::AUTIASPPCi); + break; + case AArch64::RETABSPPCi: + Auth.setOpcode(AArch64::AUTIBSPPCi); + break; + case AArch64::RETAASPPCr: + Auth.setOpcode(AArch64::AUTIASPPCr); + break; + case AArch64::RETABSPPCr: + Auth.setOpcode(AArch64::AUTIBSPPCr); + break; + default: + llvm_unreachable("Unhandled fused pauth-and-return instruction"); + } + } + std::optional<MCPhysReg> getSignedReg(const MCInst &Inst) const override { switch (Inst.getOpcode()) { case AArch64::PACIA: @@ -1793,14 +1862,12 @@ public: } bool isNoop(const MCInst &Inst) const override { - return Inst.getOpcode() == AArch64::HINT && - Inst.getOperand(0).getImm() == 0; + return Inst.getOpcode() == AArch64::NOP; } void createNoop(MCInst &Inst) const override { - Inst.setOpcode(AArch64::HINT); + Inst.setOpcode(AArch64::NOP); Inst.clear(); - Inst.addOperand(MCOperand::createImm(0)); } bool isTrap(const MCInst &Inst) const override { @@ -2706,6 +2773,39 @@ public: return Insts; } + void createBTI(MCInst &Inst, bool CallTarget, + bool JumpTarget) const override { + Inst.setOpcode(AArch64::HINT); + unsigned HintNum = getBTIHintNum(CallTarget, JumpTarget); + Inst.addOperand(MCOperand::createImm(HintNum)); + } + + bool isBTILandingPad(MCInst &Inst, bool CallTarget, + bool JumpTarget) const override { + unsigned HintNum = getBTIHintNum(CallTarget, JumpTarget); + bool IsExplicitBTI = + Inst.getOpcode() == AArch64::HINT && Inst.getNumOperands() == 1 && + Inst.getOperand(0).isImm() && Inst.getOperand(0).getImm() == HintNum; + + bool IsImplicitBTI = HintNum == 34 && isImplicitBTIC(Inst); + return IsExplicitBTI || IsImplicitBTI; + } + + bool isImplicitBTIC(MCInst &Inst) const override { + // PACI[AB]SP are always implicitly BTI C, independently of + // SCTLR_EL1.BT[01]. + return Inst.getOpcode() == AArch64::PACIASP || + Inst.getOpcode() == AArch64::PACIBSP; + } + + void updateBTIVariant(MCInst &Inst, bool CallTarget, + bool JumpTarget) const override { + assert(Inst.getOpcode() == AArch64::HINT && "Not a BTI instruction."); + unsigned HintNum = getBTIHintNum(CallTarget, JumpTarget); + Inst.clear(); + Inst.addOperand(MCOperand::createImm(HintNum)); + } + InstructionListType materializeAddress(const MCSymbol *Target, MCContext *Ctx, MCPhysReg RegName, int64_t Addend = 0) const override { diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp index 5fca5e8..7c24c2c 100644 --- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp +++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp @@ -219,6 +219,12 @@ public: return getPopSize(Inst) == 0 ? false : true; } + bool isEpilogue(const BinaryBasicBlock &BB) const override { + return ::llvm::any_of(BB, [&](const MCInst &Instr) { + return isLeave(Instr) || isPop(Instr); + }); + } + bool isTerminateBranch(const MCInst &Inst) const override { return Inst.getOpcode() == X86::ENDBR32 || Inst.getOpcode() == X86::ENDBR64; } diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp index 5be04d2..b7eb209 100644 --- a/bolt/lib/Utils/CommandLineOpts.cpp +++ b/bolt/lib/Utils/CommandLineOpts.cpp @@ -245,6 +245,16 @@ cl::opt<bool> PrintCacheMetrics( cl::desc("calculate and print various metrics for instruction cache"), cl::cat(BoltOptCategory)); +cl::list<std::string> PrintOnly("print-only", cl::CommaSeparated, + cl::desc("list of functions to print"), + cl::value_desc("func1,func2,func3,..."), + cl::Hidden, cl::cat(BoltCategory)); + +cl::opt<std::string> + PrintOnlyFile("print-only-file", + cl::desc("file with list of functions to print"), cl::Hidden, + cl::cat(BoltCategory)); + cl::opt<bool> PrintSections("print-sections", cl::desc("print all registered sections"), cl::Hidden, cl::cat(BoltCategory)); |
