diff options
Diffstat (limited to 'bolt/lib')
-rw-r--r-- | bolt/lib/Core/CMakeLists.txt | 1 | ||||
-rw-r--r-- | bolt/lib/Core/MCInstUtils.cpp | 86 | ||||
-rw-r--r-- | bolt/lib/Passes/PAuthGadgetScanner.cpp | 108 | ||||
-rw-r--r-- | bolt/lib/Rewrite/DWARFRewriter.cpp | 4 | ||||
-rw-r--r-- | bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp | 91 |
5 files changed, 156 insertions, 134 deletions
diff --git a/bolt/lib/Core/CMakeLists.txt b/bolt/lib/Core/CMakeLists.txt index fc72dc0..58cfcab 100644 --- a/bolt/lib/Core/CMakeLists.txt +++ b/bolt/lib/Core/CMakeLists.txt @@ -32,6 +32,7 @@ add_llvm_library(LLVMBOLTCore GDBIndex.cpp HashUtilities.cpp JumpTable.cpp + MCInstUtils.cpp MCPlusBuilder.cpp ParallelUtilities.cpp Relocation.cpp diff --git a/bolt/lib/Core/MCInstUtils.cpp b/bolt/lib/Core/MCInstUtils.cpp new file mode 100644 index 0000000..f505bf7 --- /dev/null +++ b/bolt/lib/Core/MCInstUtils.cpp @@ -0,0 +1,86 @@ +//===- bolt/Core/MCInstUtils.cpp ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "bolt/Core/MCInstUtils.h" +#include "bolt/Core/BinaryBasicBlock.h" +#include "bolt/Core/BinaryFunction.h" + +#include <type_traits> + +using namespace llvm; +using namespace llvm::bolt; + +// It is assumed in a few places that BinaryBasicBlock stores its instructions +// in a contiguous vector. +using BasicBlockStorageIsVector = + std::is_same<BinaryBasicBlock::const_iterator, + std::vector<MCInst>::const_iterator>; +static_assert(BasicBlockStorageIsVector::value); + +MCInstReference MCInstReference::get(const MCInst &Inst, + const BinaryFunction &BF) { + if (BF.hasCFG()) { + for (BinaryBasicBlock &BB : BF) { + for (MCInst &MI : BB) + if (&MI == &Inst) + return MCInstReference(BB, Inst); + } + llvm_unreachable("Inst is not contained in BF"); + } + + for (auto I = BF.instrs().begin(), E = BF.instrs().end(); I != E; ++I) { + if (&I->second == &Inst) + return MCInstReference(BF, I); + } + llvm_unreachable("Inst is not contained in BF"); +} + +uint64_t MCInstReference::computeAddress(const MCCodeEmitter *Emitter) const { + assert(!empty() && "Taking instruction address by empty reference"); + + const BinaryContext &BC = getFunction()->getBinaryContext(); + if (auto *Ref = tryGetRefInBB()) { + const uint64_t AddressOfBB = + getFunction()->getAddress() + Ref->BB->getOffset(); + const MCInst *FirstInstInBB = &*Ref->BB->begin(); + const MCInst *ThisInst = &getMCInst(); + + // Usage of plain 'const MCInst *' as iterators assumes the instructions + // are stored in a vector, see BasicBlockStorageIsVector. + const uint64_t OffsetInBB = + BC.computeCodeSize(FirstInstInBB, ThisInst, Emitter); + + return AddressOfBB + OffsetInBB; + } + + auto &Ref = getRefInBF(); + const uint64_t OffsetInBF = Ref.It->first; + + return getFunction()->getAddress() + OffsetInBF; +} + +raw_ostream &MCInstReference::print(raw_ostream &OS) const { + if (const RefInBB *Ref = tryGetRefInBB()) { + OS << "MCInstBBRef<"; + if (Ref->BB == nullptr) + OS << "BB:(null)"; + else + OS << "BB:" << Ref->BB->getName() << ":" << Ref->Index; + OS << ">"; + return OS; + } + + const RefInBF &Ref = getRefInBF(); + OS << "MCInstBFRef<"; + if (Ref.BF == nullptr) + OS << "BF:(null)"; + else + OS << "BF:" << Ref.BF->getPrintName() << ":" << Ref.It->first; + OS << ">"; + return OS; +} diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp index 65c84eb..cfe4b6b 100644 --- a/bolt/lib/Passes/PAuthGadgetScanner.cpp +++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp @@ -24,39 +24,6 @@ namespace llvm { namespace bolt { - -raw_ostream &operator<<(raw_ostream &OS, const MCInstInBBReference &Ref) { - OS << "MCInstBBRef<"; - if (Ref.BB == nullptr) - OS << "BB:(null)"; - else - OS << "BB:" << Ref.BB->getName() << ":" << Ref.BBIndex; - OS << ">"; - return OS; -} - -raw_ostream &operator<<(raw_ostream &OS, const MCInstInBFReference &Ref) { - OS << "MCInstBFRef<"; - if (Ref.BF == nullptr) - OS << "BF:(null)"; - else - OS << "BF:" << Ref.BF->getPrintName() << ":" << Ref.getOffset(); - OS << ">"; - return OS; -} - -raw_ostream &operator<<(raw_ostream &OS, const MCInstReference &Ref) { - switch (Ref.ParentKind) { - case MCInstReference::BasicBlockParent: - OS << Ref.U.BBRef; - return OS; - case MCInstReference::FunctionParent: - OS << Ref.U.BFRef; - return OS; - } - llvm_unreachable(""); -} - namespace PAuthGadgetScanner { [[maybe_unused]] static void traceInst(const BinaryContext &BC, StringRef Label, @@ -91,10 +58,10 @@ template <typename T> static void iterateOverInstrs(BinaryFunction &BF, T Fn) { if (BF.hasCFG()) { for (BinaryBasicBlock &BB : BF) for (int64_t I = 0, E = BB.size(); I < E; ++I) - Fn(MCInstInBBReference(&BB, I)); + Fn(MCInstReference(BB, I)); } else { - for (auto I : BF.instrs()) - Fn(MCInstInBFReference(&BF, I.first)); + for (auto I = BF.instrs().begin(), E = BF.instrs().end(); I != E; ++I) + Fn(MCInstReference(BF, I)); } } @@ -564,11 +531,8 @@ public: const SrcState &S = getStateBefore(Inst); std::vector<MCInstReference> Result; - for (const MCInst *Inst : lastWritingInsts(S, ClobberedReg)) { - MCInstReference Ref = MCInstReference::get(Inst, BF); - assert(Ref && "Expected Inst to be found"); - Result.push_back(Ref); - } + for (const MCInst *Inst : lastWritingInsts(S, ClobberedReg)) + Result.push_back(MCInstReference::get(*Inst, BF)); return Result; } }; @@ -1136,11 +1100,8 @@ public: const DstState &S = getStateAfter(Inst); std::vector<MCInstReference> Result; - for (const MCInst *Inst : firstLeakingInsts(S, LeakedReg)) { - MCInstReference Ref = MCInstReference::get(Inst, BF); - assert(Ref && "Expected Inst to be found"); - Result.push_back(Ref); - } + for (const MCInst *Inst : firstLeakingInsts(S, LeakedReg)) + Result.push_back(MCInstReference::get(*Inst, BF)); return Result; } }; @@ -1345,8 +1306,7 @@ static bool shouldAnalyzeTailCallInst(const BinaryContext &BC, // (such as isBranch at the time of writing this comment), some don't (such // as isCall). For that reason, call MCInstrDesc's methods explicitly when // it is important. - const MCInstrDesc &Desc = - BC.MII->get(static_cast<const MCInst &>(Inst).getOpcode()); + const MCInstrDesc &Desc = BC.MII->get(Inst.getMCInst().getOpcode()); // Tail call should be a branch (but not necessarily an indirect one). if (!Desc.isBranch()) return false; @@ -1541,7 +1501,7 @@ void FunctionAnalysisContext::findUnsafeUses( // This is printed as "[message] in function [name], basic block ..., // at address ..." when the issue is reported to the user. Reports.push_back(make_generic_report( - MCInstReference::get(FirstInst, BF), + MCInstReference(BB, *FirstInst), "Warning: possibly imprecise CFG, the analysis quality may be " "degraded in this function. According to BOLT, unreachable code is " "found" /* in function [name]... */)); @@ -1705,48 +1665,44 @@ void Analysis::runOnFunction(BinaryFunction &BF, } } -static void printBB(const BinaryContext &BC, const BinaryBasicBlock *BB, +static void printBB(const BinaryContext &BC, const BinaryBasicBlock &BB, size_t StartIndex = 0, size_t EndIndex = -1) { if (EndIndex == (size_t)-1) - EndIndex = BB->size() - 1; - const BinaryFunction *BF = BB->getFunction(); + EndIndex = BB.size() - 1; + const BinaryFunction *BF = BB.getFunction(); for (unsigned I = StartIndex; I <= EndIndex; ++I) { - // FIXME: this assumes all instructions are 4 bytes in size. This is true - // for AArch64, but it might be good to extract this function so it can be - // used elsewhere and for other targets too. - uint64_t Address = BB->getOffset() + BF->getAddress() + 4 * I; - const MCInst &Inst = BB->getInstructionAtIndex(I); + MCInstReference Inst(BB, I); if (BC.MIB->isCFI(Inst)) continue; - BC.printInstruction(outs(), Inst, Address, BF); + BC.printInstruction(outs(), Inst, Inst.computeAddress(), BF); } } static void reportFoundGadgetInSingleBBSingleRelatedInst( raw_ostream &OS, const BinaryContext &BC, const MCInstReference RelatedInst, const MCInstReference Location) { - BinaryBasicBlock *BB = Location.getBasicBlock(); - assert(RelatedInst.ParentKind == MCInstReference::BasicBlockParent); - assert(Location.ParentKind == MCInstReference::BasicBlockParent); - MCInstInBBReference RelatedInstBB = RelatedInst.U.BBRef; - if (BB == RelatedInstBB.BB) { + const BinaryBasicBlock *BB = Location.getBasicBlock(); + assert(RelatedInst.hasCFG()); + assert(Location.hasCFG()); + if (BB == RelatedInst.getBasicBlock()) { OS << " This happens in the following basic block:\n"; - printBB(BC, BB); + printBB(BC, *BB); } } void Diagnostic::printBasicInfo(raw_ostream &OS, const BinaryContext &BC, StringRef IssueKind) const { - BinaryFunction *BF = Location.getFunction(); - BinaryBasicBlock *BB = Location.getBasicBlock(); + const BinaryBasicBlock *BB = Location.getBasicBlock(); + const BinaryFunction *BF = Location.getFunction(); + const uint64_t Address = Location.computeAddress(); OS << "\nGS-PAUTH: " << IssueKind; OS << " in function " << BF->getPrintName(); if (BB) OS << ", basic block " << BB->getName(); - OS << ", at address " << llvm::format("%x", Location.getAddress()) << "\n"; + OS << ", at address " << llvm::format("%x", Address) << "\n"; OS << " The instruction is "; - BC.printInstruction(OS, Location, Location.getAddress(), BF); + BC.printInstruction(OS, Location, Address, BF); } void GadgetDiagnostic::generateReport(raw_ostream &OS, @@ -1760,21 +1716,23 @@ static void printRelatedInstrs(raw_ostream &OS, const MCInstReference Location, const BinaryContext &BC = BF.getBinaryContext(); // Sort by address to ensure output is deterministic. - SmallVector<MCInstReference> RI(RelatedInstrs); - llvm::sort(RI, [](const MCInstReference &A, const MCInstReference &B) { - return A.getAddress() < B.getAddress(); - }); + SmallVector<std::pair<uint64_t, MCInstReference>> RI; + for (auto &InstRef : RelatedInstrs) + RI.push_back(std::make_pair(InstRef.computeAddress(), InstRef)); + llvm::sort(RI, [](auto A, auto B) { return A.first < B.first; }); + for (unsigned I = 0; I < RI.size(); ++I) { - MCInstReference InstRef = RI[I]; + auto [Address, InstRef] = RI[I]; OS << " " << (I + 1) << ". "; - BC.printInstruction(OS, InstRef, InstRef.getAddress(), &BF); + BC.printInstruction(OS, InstRef, Address, &BF); }; + if (RelatedInstrs.size() == 1) { const MCInstReference RelatedInst = RelatedInstrs[0]; // Printing the details for the MCInstReference::FunctionParent case // is not implemented not to overcomplicate the code, as most functions // are expected to have CFG information. - if (RelatedInst.ParentKind == MCInstReference::BasicBlockParent) + if (RelatedInst.hasCFG()) reportFoundGadgetInSingleBBSingleRelatedInst(OS, BC, RelatedInst, Location); } diff --git a/bolt/lib/Rewrite/DWARFRewriter.cpp b/bolt/lib/Rewrite/DWARFRewriter.cpp index 6752489..5c89a42 100644 --- a/bolt/lib/Rewrite/DWARFRewriter.cpp +++ b/bolt/lib/Rewrite/DWARFRewriter.cpp @@ -504,9 +504,7 @@ static void emitDWOBuilder(const std::string &DWOName, } emitUnit(DWODIEBuilder, *Streamer, SplitCU); } else { - for (std::unique_ptr<llvm::DWARFUnit> &CU : - SplitCU.getContext().dwo_compile_units()) - emitUnit(DWODIEBuilder, *Streamer, *CU); + emitUnit(DWODIEBuilder, *Streamer, SplitCU); // emit debug_types sections for dwarf4 for (DWARFUnit *CU : DWODIEBuilder.getDWARF4TUVector()) diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp index a6589f8..f271867 100644 --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -19,6 +19,7 @@ #include "Utils/AArch64BaseInfo.h" #include "bolt/Core/BinaryBasicBlock.h" #include "bolt/Core/BinaryFunction.h" +#include "bolt/Core/MCInstUtils.h" #include "bolt/Core/MCPlusBuilder.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" @@ -401,81 +402,59 @@ public: // Iterate over the instructions of BB in reverse order, matching opcodes // and operands. - MCPhysReg TestedReg = 0; - MCPhysReg ScratchReg = 0; + auto It = BB.end(); - auto StepAndGetOpcode = [&It, &BB]() -> int { - if (It == BB.begin()) - return -1; - --It; - return It->getOpcode(); + auto StepBack = [&]() { + while (It != BB.begin()) { + --It; + // Skip any CFI instructions, but no other pseudos are expected here. + if (!isCFI(*It)) + return true; + } + return false; }; - - switch (StepAndGetOpcode()) { - default: - // Not matched the branch instruction. + // Step to the last non-CFI instruction. + if (!StepBack()) return std::nullopt; - case AArch64::Bcc: - // Bcc EQ, .Lon_success - if (It->getOperand(0).getImm() != AArch64CC::EQ) - return std::nullopt; - // Not checking .Lon_success (see above). - // SUBSXrs XZR, TestedReg, ScratchReg, 0 (used by "CMP reg, reg" alias) - if (StepAndGetOpcode() != AArch64::SUBSXrs || - It->getOperand(0).getReg() != AArch64::XZR || - It->getOperand(3).getImm() != 0) + using namespace llvm::bolt::LowLevelInstMatcherDSL; + Reg TestedReg; + Reg ScratchReg; + + if (matchInst(*It, AArch64::Bcc, Imm(AArch64CC::EQ) /*, .Lon_success*/)) { + if (!StepBack() || !matchInst(*It, AArch64::SUBSXrs, Reg(AArch64::XZR), + TestedReg, ScratchReg, Imm(0))) return std::nullopt; - TestedReg = It->getOperand(1).getReg(); - ScratchReg = It->getOperand(2).getReg(); // Either XPAC(I|D) ScratchReg, ScratchReg // or XPACLRI - switch (StepAndGetOpcode()) { - default: + if (!StepBack()) return std::nullopt; - case AArch64::XPACLRI: + if (matchInst(*It, AArch64::XPACLRI)) { // No operands to check, but using XPACLRI forces TestedReg to be X30. - if (TestedReg != AArch64::LR) - return std::nullopt; - break; - case AArch64::XPACI: - case AArch64::XPACD: - if (It->getOperand(0).getReg() != ScratchReg || - It->getOperand(1).getReg() != ScratchReg) + if (TestedReg.get() != AArch64::LR) return std::nullopt; - break; + } else if (!matchInst(*It, AArch64::XPACI, ScratchReg, ScratchReg) && + !matchInst(*It, AArch64::XPACD, ScratchReg, ScratchReg)) { + return std::nullopt; } - // ORRXrs ScratchReg, XZR, TestedReg, 0 (used by "MOV reg, reg" alias) - if (StepAndGetOpcode() != AArch64::ORRXrs) + if (!StepBack() || !matchInst(*It, AArch64::ORRXrs, ScratchReg, + Reg(AArch64::XZR), TestedReg, Imm(0))) return std::nullopt; - if (It->getOperand(0).getReg() != ScratchReg || - It->getOperand(1).getReg() != AArch64::XZR || - It->getOperand(2).getReg() != TestedReg || - It->getOperand(3).getImm() != 0) - return std::nullopt; - - return std::make_pair(TestedReg, &*It); - case AArch64::TBZX: - // TBZX ScratchReg, 62, .Lon_success - ScratchReg = It->getOperand(0).getReg(); - if (It->getOperand(1).getImm() != 62) - return std::nullopt; - // Not checking .Lon_success (see above). + return std::make_pair(TestedReg.get(), &*It); + } - // EORXrs ScratchReg, TestedReg, TestedReg, 1 - if (StepAndGetOpcode() != AArch64::EORXrs) - return std::nullopt; - TestedReg = It->getOperand(1).getReg(); - if (It->getOperand(0).getReg() != ScratchReg || - It->getOperand(2).getReg() != TestedReg || - It->getOperand(3).getImm() != 1) + if (matchInst(*It, AArch64::TBZX, ScratchReg, Imm(62) /*, .Lon_success*/)) { + if (!StepBack() || !matchInst(*It, AArch64::EORXrs, ScratchReg, TestedReg, + TestedReg, Imm(1))) return std::nullopt; - return std::make_pair(TestedReg, &*It); + return std::make_pair(TestedReg.get(), &*It); } + + return std::nullopt; } std::optional<MCPhysReg> getAuthCheckedReg(const MCInst &Inst, |