diff options
Diffstat (limited to 'bolt')
39 files changed, 1166 insertions, 156 deletions
diff --git a/bolt/docs/CommandLineArgumentReference.md b/bolt/docs/CommandLineArgumentReference.md index f3881c9..d65cf39 100644 --- a/bolt/docs/CommandLineArgumentReference.md +++ b/bolt/docs/CommandLineArgumentReference.md @@ -138,6 +138,12 @@ Dump function CFGs to graphviz format after each stage;enable '-print-loops' for color-coded blocks +- `--dump-dot-func=<func1,func2,func3...>` + + Dump function CFGs to graphviz format for specified functions only; + takes function name patterns (regex supported). Note: C++ function names + must be passed using their mangled names + - `--dump-linux-exceptions` Dump Linux kernel exception table diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index ae58052..b59926c 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -1196,11 +1196,6 @@ public: return getSecondaryEntryPointSymbol(BB.getLabel()); } - /// Remove a label from the secondary entry point map. - void removeSymbolFromSecondaryEntryPointMap(const MCSymbol *Label) { - SecondaryEntryPoints.erase(Label); - } - /// Return true if the basic block is an entry point into the function /// (either primary or secondary). bool isEntryPoint(const BinaryBasicBlock &BB) const { diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index f902a8c..ae04891 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -718,6 +718,20 @@ public: return false; } + /// Returns true if Inst is a trap instruction. + /// + /// Tests if Inst is an instruction that immediately causes an abnormal + /// program termination, for example when a security violation is detected + /// by a compiler-inserted check. + /// + /// @note An implementation of this method should likely return false for + /// calls to library functions like abort(), as it is possible that the + /// execution state is partially attacker-controlled at this point. + virtual bool isTrap(const MCInst &Inst) const { + llvm_unreachable("not implemented"); + return false; + } + virtual bool isBreakpoint(const MCInst &Inst) const { llvm_unreachable("not implemented"); return false; @@ -740,6 +754,10 @@ public: return false; } + /// Return true if the hlt instruction under the x86, otherwise, default to + /// false. + virtual bool isX86HLT(const MCInst &Inst) const { return false; } + /// Return the width, in bytes, of the memory access performed by \p Inst, if /// this is a pop instruction. Return zero otherwise. virtual int getPopSize(const MCInst &Inst) const { diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h index 91d62a7..19dcce8 100644 --- a/bolt/include/bolt/Rewrite/RewriteInstance.h +++ b/bolt/include/bolt/Rewrite/RewriteInstance.h @@ -241,7 +241,7 @@ private: /// Adjust function sizes and set proper maximum size values after the whole /// symbol table has been processed. - void adjustFunctionBoundaries(); + void adjustFunctionBoundaries(DenseMap<uint64_t, MarkerSymType> &MarkerSyms); /// Make .eh_frame section relocatable. void relocateEHFrameSection(); diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h index a75b6bf..859d6f3 100644 --- a/bolt/include/bolt/Utils/CommandLineOpts.h +++ b/bolt/include/bolt/Utils/CommandLineOpts.h @@ -15,6 +15,12 @@ #include "llvm/Support/CommandLine.h" +namespace llvm { +namespace bolt { +class BinaryFunction; +} +} // namespace llvm + namespace opts { enum HeatmapModeKind { @@ -100,6 +106,9 @@ extern llvm::cl::opt<unsigned> Verbosity; /// Return true if we should process all functions in the binary. bool processAllFunctions(); +/// Return true if we should dump dot graphs for the given function. +bool shouldDumpDot(const llvm::bolt::BinaryFunction &Function); + enum GadgetScannerKind { GS_PACRET, GS_PAUTH, GS_ALL }; extern llvm::cl::bits<GadgetScannerKind> GadgetScannersToRun; diff --git a/bolt/lib/Core/BinaryBasicBlock.cpp b/bolt/lib/Core/BinaryBasicBlock.cpp index 311d5c1..eeab1ed 100644 --- a/bolt/lib/Core/BinaryBasicBlock.cpp +++ b/bolt/lib/Core/BinaryBasicBlock.cpp @@ -103,9 +103,18 @@ bool BinaryBasicBlock::validateSuccessorInvariants() { Valid &= (Sym == Function->getFunctionEndLabel() || Sym == Function->getFunctionEndLabel(getFragmentNum())); if (!Valid) { - BC.errs() << "BOLT-WARNING: Jump table contains illegal entry: " - << Sym->getName() << "\n"; + const BinaryFunction *TargetBF = BC.getFunctionForSymbol(Sym); + if (TargetBF) { + // It's possible for another function to be in the jump table entry + // as a result of built-in unreachable. + Valid = true; + } else { + BC.errs() << "BOLT-WARNING: Jump table contains illegal entry: " + << Sym->getName() << "\n"; + } } + if (!Valid) + break; } } } else { diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index 84f1853..dd0d041 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -1568,23 +1568,19 @@ unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID, DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID); const DWARFDebugLine::LineTable *LineTable = DwCtx->getLineTableForUnit(SrcUnit); - const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = - LineTable->Prologue.FileNames; - // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 + const DWARFDebugLine::FileNameEntry &FileNameEntry = + LineTable->Prologue.getFileNameEntry(FileIndex); + // Dir indexes start at 1 and a dir index 0 // means empty dir. - assert(FileIndex > 0 && FileIndex <= FileNames.size() && - "FileIndex out of range for the compilation unit."); StringRef Dir = ""; - if (FileNames[FileIndex - 1].DirIdx != 0) { + if (FileNameEntry.DirIdx != 0) { if (std::optional<const char *> DirName = dwarf::toString( - LineTable->Prologue - .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) { + LineTable->Prologue.IncludeDirectories[FileNameEntry.DirIdx - 1])) { Dir = *DirName; } } StringRef FileName = ""; - if (std::optional<const char *> FName = - dwarf::toString(FileNames[FileIndex - 1].Name)) + if (std::optional<const char *> FName = dwarf::toString(FileNameEntry.Name)) FileName = *FName; assert(FileName != ""); DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID); @@ -1925,7 +1921,7 @@ static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction, const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; StringRef FileName = ""; if (std::optional<const char *> FName = - dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name)) + dwarf::toString(LineTable->Prologue.getFileNameEntry(Row.File).Name)) FileName = *FName; OS << " # debug line " << FileName << ":" << Row.Line; if (Row.Column) @@ -2517,7 +2513,7 @@ BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) { // Clean-up the effect of the code emission. for (const MCSymbol &Symbol : Assembler.symbols()) { MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol); - MutableSymbol->setUndefined(); + MutableSymbol->setFragment(nullptr); MutableSymbol->setIsRegistered(false); } diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index eec68ff..6cac2d0 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -1915,13 +1915,9 @@ void BinaryFunction::postProcessEntryPoints() { continue; // If we have grabbed a wrong code label which actually points to some - // constant island inside the function, ignore this label and remove it - // from the secondary entry point map. - if (isStartOfConstantIsland(Offset)) { - BC.SymbolToFunctionMap.erase(Label); - removeSymbolFromSecondaryEntryPointMap(Label); + // constant island inside the function, ignore this label. + if (isStartOfConstantIsland(Offset)) continue; - } BC.errs() << "BOLT-WARNING: reference in the middle of instruction " "detected in function " @@ -1963,7 +1959,9 @@ void BinaryFunction::postProcessJumpTables() { return EntryAddress == Parent->getAddress() + Parent->getSize(); }); if (IsBuiltinUnreachable) { - MCSymbol *Label = getOrCreateLocalLabel(EntryAddress, true); + BinaryFunction *TargetBF = BC.getBinaryFunctionAtAddress(EntryAddress); + MCSymbol *Label = TargetBF ? TargetBF->getSymbol() + : getOrCreateLocalLabel(EntryAddress, true); JT.Entries.push_back(Label); continue; } @@ -3775,6 +3773,8 @@ MCSymbol *BinaryFunction::addEntryPointAtOffset(uint64_t Offset) { assert(Offset && "cannot add primary entry point"); const uint64_t EntryPointAddress = getAddress() + Offset; + assert(!isInConstantIsland(EntryPointAddress) && + "cannot add entry point that points to constant data"); MCSymbol *LocalSymbol = getOrCreateLocalLabel(EntryPointAddress); MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(LocalSymbol); diff --git a/bolt/lib/Core/MCPlusBuilder.cpp b/bolt/lib/Core/MCPlusBuilder.cpp index fa8f4d1..7f962e1 100644 --- a/bolt/lib/Core/MCPlusBuilder.cpp +++ b/bolt/lib/Core/MCPlusBuilder.cpp @@ -31,6 +31,11 @@ using namespace MCPlus; namespace opts { cl::opt<bool> + TerminalHLT("terminal-x86-hlt", + cl::desc("Assume that execution stops at x86 HLT instruction"), + cl::init(true), cl::Hidden, cl::cat(BoltCategory)); + +cl::opt<bool> TerminalTrap("terminal-trap", cl::desc("Assume that execution stops at trap instruction"), cl::init(true), cl::Hidden, cl::cat(BoltCategory)); @@ -132,8 +137,13 @@ bool MCPlusBuilder::equals(const MCSpecifierExpr &A, const MCSpecifierExpr &B, } bool MCPlusBuilder::isTerminator(const MCInst &Inst) const { - return Analysis->isTerminator(Inst) || - (opts::TerminalTrap && Info->get(Inst.getOpcode()).isTrap()); + if (isX86HLT(Inst)) + return opts::TerminalHLT; + + if (Info->get(Inst.getOpcode()).isTrap()) + return opts::TerminalTrap; + + return Analysis->isTerminator(Inst); } void MCPlusBuilder::setTailCall(MCInst &Inst) const { diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp index 5d44e1a..d7f02b9 100644 --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -662,7 +662,7 @@ Error CleanMCState::runOnFunctions(BinaryContext &BC) { if (S->isDefined()) { LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Symbol \"" << S->getName() << "\" is already defined\n"); - const_cast<MCSymbol *>(S)->setUndefined(); + const_cast<MCSymbol *>(S)->setFragment(nullptr); } if (S->isRegistered()) { LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Symbol \"" << S->getName() diff --git a/bolt/lib/Passes/FrameOptimizer.cpp b/bolt/lib/Passes/FrameOptimizer.cpp index 81d4d93..b0b7207f 100644 --- a/bolt/lib/Passes/FrameOptimizer.cpp +++ b/bolt/lib/Passes/FrameOptimizer.cpp @@ -224,6 +224,11 @@ Error FrameOptimizerPass::runOnFunctions(BinaryContext &BC) { if (opts::FrameOptimization == FOP_NONE) return Error::success(); + if (!BC.isX86()) { + BC.errs() << "BOLT-ERROR: " << getName() << " is supported only on X86\n"; + exit(1); + } + std::unique_ptr<BinaryFunctionCallGraph> CG; std::unique_ptr<FrameAnalysis> FA; std::unique_ptr<RegAnalysis> RA; diff --git a/bolt/lib/Passes/IndirectCallPromotion.cpp b/bolt/lib/Passes/IndirectCallPromotion.cpp index 2b5a591..8a01cb9 100644 --- a/bolt/lib/Passes/IndirectCallPromotion.cpp +++ b/bolt/lib/Passes/IndirectCallPromotion.cpp @@ -261,10 +261,7 @@ IndirectCallPromotion::getCallTargets(BinaryBasicBlock &BB, for (size_t I = Range.first; I < Range.second; ++I, JI += JIAdj) { MCSymbol *Entry = JT->Entries[I]; const BinaryBasicBlock *ToBB = BF.getBasicBlockForLabel(Entry); - assert(ToBB || Entry == BF.getFunctionEndLabel() || - Entry == BF.getFunctionEndLabel(FragmentNum::cold())); - if (Entry == BF.getFunctionEndLabel() || - Entry == BF.getFunctionEndLabel(FragmentNum::cold())) + if (!ToBB) continue; const Location To(Entry); const BinaryBasicBlock::BinaryBranchInfo &BI = BB.getBranchInfo(*ToBB); diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp index f928dd4..65c84eb 100644 --- a/bolt/lib/Passes/PAuthGadgetScanner.cpp +++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp @@ -1078,6 +1078,15 @@ protected: dbgs() << ")\n"; }); + // If this instruction terminates the program immediately, no + // authentication oracles are possible past this point. + if (BC.MIB->isTrap(Point)) { + LLVM_DEBUG({ traceInst(BC, "Trap instruction found", Point); }); + DstState Next(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters()); + Next.CannotEscapeUnchecked.set(); + return Next; + } + // If this instruction is reachable by the analysis, a non-empty state will // be propagated to it sooner or later. Until then, skip computeNext(). if (Cur.empty()) { @@ -1185,8 +1194,8 @@ protected: // // A basic block without any successors, on the other hand, can be // pessimistically initialized to everything-is-unsafe: this will naturally - // handle both return and tail call instructions and is harmless for - // internal indirect branch instructions (such as computed gotos). + // handle return, trap and tail call instructions. At the same time, it is + // harmless for internal indirect branch instructions, like computed gotos. if (BB.succ_empty()) return createUnsafeState(); diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp index 996d2e9..0ddb73f 100644 --- a/bolt/lib/Rewrite/BinaryPassManager.cpp +++ b/bolt/lib/Rewrite/BinaryPassManager.cpp @@ -52,6 +52,7 @@ namespace opts { extern cl::opt<bool> PrintAll; extern cl::opt<bool> PrintDynoStats; extern cl::opt<bool> DumpDotAll; +extern bool shouldDumpDot(const bolt::BinaryFunction &Function); extern cl::opt<std::string> AsmDump; extern cl::opt<bolt::PLTCall::OptType> PLT; extern cl::opt<bolt::IdenticalCodeFolding::ICFLevel, false, @@ -340,7 +341,7 @@ Error BinaryFunctionPassManager::runPasses() { Function.print(BC.outs(), Message); - if (opts::DumpDotAll) + if (opts::shouldDumpDot(Function)) Function.dumpGraphForPass(PassIdName); } } diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index fe4a23c..a6e4dbc 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -84,6 +84,7 @@ extern cl::opt<bool> KeepNops; extern cl::opt<bool> Lite; extern cl::list<std::string> ReorderData; extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions; +extern cl::opt<bool> TerminalHLT; extern cl::opt<bool> TerminalTrap; extern cl::opt<bool> TimeBuild; extern cl::opt<bool> TimeRewrite; @@ -114,6 +115,35 @@ cl::opt<bool> DumpDotAll( "enable '-print-loops' for color-coded blocks"), cl::Hidden, cl::cat(BoltCategory)); +cl::list<std::string> DumpDotFunc( + "dump-dot-func", cl::CommaSeparated, + cl::desc( + "dump function CFGs to graphviz format for specified functions only;" + "takes function name patterns (regex supported)"), + cl::value_desc("func1,func2,func3,..."), cl::Hidden, cl::cat(BoltCategory)); + +bool shouldDumpDot(const bolt::BinaryFunction &Function) { + // If dump-dot-all is enabled, dump all functions + if (DumpDotAll) + return !Function.isIgnored(); + + // If no specific functions specified in dump-dot-func, don't dump any + if (DumpDotFunc.empty()) + return false; + + if (Function.isIgnored()) + return false; + + // Check if function matches any of the specified patterns + for (const std::string &Name : DumpDotFunc) { + if (Function.hasNameRegex(Name)) { + return true; + } + } + + return false; +} + static cl::list<std::string> ForceFunctionNames("funcs", cl::CommaSeparated, @@ -880,14 +910,9 @@ void RewriteInstance::discoverFileObjects() { // code section (see IHI0056B). $d identifies data contents. // Compilers usually merge multiple data objects in a single $d-$x interval, // but we need every data object to be marked with $d. Because of that we - // create a vector of MarkerSyms with all locations of data objects. + // keep track of marker symbols with all locations of data objects. - struct MarkerSym { - uint64_t Address; - MarkerSymType Type; - }; - - std::vector<MarkerSym> SortedMarkerSymbols; + DenseMap<uint64_t, MarkerSymType> MarkerSymbols; auto addExtraDataMarkerPerSymbol = [&]() { bool IsData = false; uint64_t LastAddr = 0; @@ -911,14 +936,14 @@ void RewriteInstance::discoverFileObjects() { } if (MarkerType != MarkerSymType::NONE) { - SortedMarkerSymbols.push_back(MarkerSym{SymInfo.Address, MarkerType}); + MarkerSymbols[SymInfo.Address] = MarkerType; LastAddr = SymInfo.Address; IsData = MarkerType == MarkerSymType::DATA; continue; } if (IsData) { - SortedMarkerSymbols.push_back({SymInfo.Address, MarkerSymType::DATA}); + MarkerSymbols[SymInfo.Address] = MarkerSymType::DATA; LastAddr = SymInfo.Address; } } @@ -1283,27 +1308,24 @@ void RewriteInstance::discoverFileObjects() { BC->setHasSymbolsWithFileName(FileSymbols.size()); // Now that all the functions were created - adjust their boundaries. - adjustFunctionBoundaries(); + adjustFunctionBoundaries(MarkerSymbols); // Annotate functions with code/data markers in AArch64 - for (auto ISym = SortedMarkerSymbols.begin(); - ISym != SortedMarkerSymbols.end(); ++ISym) { - - auto *BF = - BC->getBinaryFunctionContainingAddress(ISym->Address, true, true); + for (auto &[Address, Type] : MarkerSymbols) { + auto *BF = BC->getBinaryFunctionContainingAddress(Address, true, true); if (!BF) { // Stray marker continue; } - const auto EntryOffset = ISym->Address - BF->getAddress(); - if (ISym->Type == MarkerSymType::CODE) { + const auto EntryOffset = Address - BF->getAddress(); + if (Type == MarkerSymType::CODE) { BF->markCodeAtOffset(EntryOffset); continue; } - if (ISym->Type == MarkerSymType::DATA) { + if (Type == MarkerSymType::DATA) { BF->markDataAtOffset(EntryOffset); - BC->AddressToConstantIslandMap[ISym->Address] = BF; + BC->AddressToConstantIslandMap[Address] = BF; continue; } llvm_unreachable("Unknown marker"); @@ -1832,7 +1854,8 @@ void RewriteInstance::disassemblePLT() { } } -void RewriteInstance::adjustFunctionBoundaries() { +void RewriteInstance::adjustFunctionBoundaries( + DenseMap<uint64_t, MarkerSymType> &MarkerSyms) { for (auto BFI = BC->getBinaryFunctions().begin(), BFE = BC->getBinaryFunctions().end(); BFI != BFE; ++BFI) { @@ -1870,12 +1893,15 @@ void RewriteInstance::adjustFunctionBoundaries() { continue; } - // This is potentially another entry point into the function. - uint64_t EntryOffset = NextSymRefI->first - Function.getAddress(); - LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding entry point to function " - << Function << " at offset 0x" - << Twine::utohexstr(EntryOffset) << '\n'); - Function.addEntryPointAtOffset(EntryOffset); + auto It = MarkerSyms.find(NextSymRefI->first); + if (It == MarkerSyms.end() || It->second != MarkerSymType::DATA) { + // This is potentially another entry point into the function. + uint64_t EntryOffset = NextSymRefI->first - Function.getAddress(); + LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding entry point to function " + << Function << " at offset 0x" + << Twine::utohexstr(EntryOffset) << '\n'); + Function.addEntryPointAtOffset(EntryOffset); + } ++NextSymRefI; } @@ -2177,7 +2203,9 @@ void RewriteInstance::adjustCommandLineOptions() { if (!opts::KeepNops.getNumOccurrences()) opts::KeepNops = true; - // Linux kernel may resume execution after a trap instruction in some cases. + // Linux kernel may resume execution after a trap or x86 HLT instruction. + if (!opts::TerminalHLT.getNumOccurrences()) + opts::TerminalHLT = false; if (!opts::TerminalTrap.getNumOccurrences()) opts::TerminalTrap = false; } @@ -2907,7 +2935,8 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection, ReferencedSymbol = nullptr; ExtractedValue = Address; } else if (RefFunctionOffset) { - if (ContainingBF && ContainingBF != ReferencedBF) { + if (ContainingBF && ContainingBF != ReferencedBF && + !ReferencedBF->isInConstantIsland(Address)) { ReferencedSymbol = ReferencedBF->addEntryPointAtOffset(RefFunctionOffset); } else { @@ -3570,7 +3599,7 @@ void RewriteInstance::postProcessFunctions() { if (opts::PrintAll || opts::PrintCFG) Function.print(BC->outs(), "after building cfg"); - if (opts::DumpDotAll) + if (opts::shouldDumpDot(Function)) Function.dumpGraphForPass("00_build-cfg"); if (opts::PrintLoopInfo) { diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp index 9732617..72f95ce 100644 --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -382,10 +382,9 @@ public: // the list of successors of this basic block as appropriate. // Any of the above code sequences assume the fall-through basic block - // is a dead-end BRK instruction (any immediate operand is accepted). + // is a dead-end trap instruction. const BinaryBasicBlock *BreakBB = BB.getFallthrough(); - if (!BreakBB || BreakBB->empty() || - BreakBB->front().getOpcode() != AArch64::BRK) + if (!BreakBB || BreakBB->empty() || !isTrap(BreakBB->front())) return std::nullopt; // Iterate over the instructions of BB in reverse order, matching opcodes @@ -1744,6 +1743,34 @@ public: Inst.addOperand(MCOperand::createImm(0)); } + bool isTrap(const MCInst &Inst) const override { + if (Inst.getOpcode() != AArch64::BRK) + return false; + // Only match the immediate values that are likely to indicate this BRK + // instruction is emitted to terminate the program immediately and not to + // be handled by a SIGTRAP handler, for example. + switch (Inst.getOperand(0).getImm()) { + case 0xc470: + case 0xc471: + case 0xc472: + case 0xc473: + // Explicit Pointer Authentication check failed, see + // AArch64AsmPrinter::emitPtrauthCheckAuthenticatedValue(). + return true; + case 0x1: + // __builtin_trap(), as emitted by Clang. + return true; + case 0x3e8: // decimal 1000 + // __builtin_trap(), as emitted by GCC. + return true; + default: + // Some constants may indicate intentionally recoverable break-points. + // This is the case at least for 0xf000, which is used by + // __builtin_debugtrap() supported by Clang. + return false; + } + } + bool isStorePair(const MCInst &Inst) const { const unsigned opcode = Inst.getOpcode(); diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp index a60c1a6..1842509 100644 --- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp +++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp @@ -223,6 +223,10 @@ public: return Inst.getOpcode() == X86::ENDBR32 || Inst.getOpcode() == X86::ENDBR64; } + bool isX86HLT(const MCInst &Inst) const override { + return Inst.getOpcode() == X86::HLT; + } + int getPopSize(const MCInst &Inst) const override { switch (Inst.getOpcode()) { case X86::POP16r: diff --git a/bolt/test/AArch64/data-marker-invalidates-extra-entrypoint.s b/bolt/test/AArch64/data-marker-invalidates-extra-entrypoint.s new file mode 100644 index 0000000..3bcbcbb --- /dev/null +++ b/bolt/test/AArch64/data-marker-invalidates-extra-entrypoint.s @@ -0,0 +1,38 @@ +# This test is to ensure that we query data marker symbols to avoid +# misidentifying constant data island symbol as extra entry point. + +# RUN: %clang %cflags %s -o %t.so -Wl,-q -Wl,--init=_bar -Wl,--fini=_bar +# RUN: llvm-bolt %t.so -o %t.instr.so + + .text + .global _start + .type _start, %function +_start: + ret + + .text + .global _foo + .type _foo, %function +_foo: + cbz x1, _foo_2 +_foo_1: + add x1, x2, x0 + b _foo +_foo_2: + ret + +# None of these constant island symbols should be identified as extra entry +# point for function `_foo'. + .align 4 +_const1: .short 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80 +_const2: .short 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0 +_const3: .short 0x04, 0x08, 0x0c, 0x20, 0x60, 0x80, 0xa0, 0xc0 + + .text + .global _bar + .type _bar, %function +_bar: + ret + + # Dummy relocation to force relocation mode + .reloc 0, R_AARCH64_NONE diff --git a/bolt/test/AArch64/unsupported-passes.test b/bolt/test/AArch64/unsupported-passes.test new file mode 100644 index 0000000..886fc1c --- /dev/null +++ b/bolt/test/AArch64/unsupported-passes.test @@ -0,0 +1,8 @@ +// Checks that non-fully supported passes on AArch64 are handled appropriately. + +// REQUIRES: system-linux,asserts,target=aarch64{{.*}} + +RUN: %clang %cflags %p/../Inputs/hello.c -o %t -Wl,-q +RUN: not llvm-bolt %t -o %t.bolt --frame-opt=all 2>&1 | FileCheck %s + +CHECK: BOLT-ERROR: frame-optimizer is supported only on X86 diff --git a/bolt/test/AArch64/validate-secondary-entry-point.s b/bolt/test/AArch64/validate-secondary-entry-point.s index 0099a0e..3ad6946 100644 --- a/bolt/test/AArch64/validate-secondary-entry-point.s +++ b/bolt/test/AArch64/validate-secondary-entry-point.s @@ -1,13 +1,23 @@ # This test is to verify that BOLT won't take a label pointing to constant -# island as a secondary entry point (function `_start` doesn't have ELF size -# set originally) and the function won't otherwise be mistaken as non-simple. +# island as a secondary entry point. This could happen when function doesn't +# have ELF size set if it is from assembly code, or a constant island is +# referenced by another function discovered during relocation processing. -# RUN: %clang %cflags -pie %s -o %t.so -Wl,-q -Wl,--init=_foo -Wl,--fini=_foo +# RUN: split-file %s %t + +# RUN: %clang %cflags -pie %t/tt.asm -o %t.so \ +# RUN: -Wl,-q -Wl,--init=_foo -Wl,--fini=_foo # RUN: llvm-bolt %t.so -o %t.bolt.so --print-cfg 2>&1 | FileCheck %s # CHECK-NOT: BOLT-WARNING: reference in the middle of instruction detected \ # CHECK-NOT: function _start at offset 0x{{[0-9a-f]+}} # CHECK: Binary Function "_start" after building cfg +# RUN: %clang %cflags -ffunction-sections -shared %t/tt.c %t/ss.c -o %tt.so \ +# RUN: -Wl,-q -Wl,--init=_start -Wl,--fini=_start \ +# RUN: -Wl,--version-script=%t/linker_script +# RUN: llvm-bolt %tt.so -o %tt.bolted.so + +;--- tt.asm .text .global _foo @@ -32,3 +42,31 @@ _bar: # Dummy relocation to force relocation mode .reloc 0, R_AARCH64_NONE + +;--- tt.c +void _start() {} + +__attribute__((naked)) void foo() { + asm("ldr x16, .L_fnptr\n" + "blr x16\n" + "ret\n" + + "_rodatx:" + ".global _rodatx;" + ".quad 0;" + ".L_fnptr:" + ".quad 0;"); +} + +;--- ss.c +__attribute__((visibility("hidden"))) extern void* _rodatx; +void* bar() { return &_rodatx; } + +;--- linker_script +{ +global: + _start; + foo; + bar; +local: *; +}; diff --git a/bolt/test/AArch64/veneer-lld-abs.s b/bolt/test/AArch64/veneer-lld-abs.s index b22301d..77d6f0ce2 100644 --- a/bolt/test/AArch64/veneer-lld-abs.s +++ b/bolt/test/AArch64/veneer-lld-abs.s @@ -12,7 +12,7 @@ ## Occasionally, we see the linker not generating $d symbols for long veneers ## causing BOLT to fail veneer elimination. -# RUN: llvm-objcopy --remove-symbol-prefix=\$d %t.exe %t.no-marker.exe +# RUN: llvm-objcopy --remove-symbol-prefix='$d' %t.exe %t.no-marker.exe # RUN: llvm-bolt %t.no-marker.exe -o %t.no-marker.bolt \ # RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-BOLT # RUN: llvm-objdump -d -j .text %t.no-marker.bolt | \ diff --git a/bolt/test/Inputs/multi-func.cpp b/bolt/test/Inputs/multi-func.cpp new file mode 100644 index 0000000..61c968f --- /dev/null +++ b/bolt/test/Inputs/multi-func.cpp @@ -0,0 +1,24 @@ +#include <iostream> + +// Multiple functions to test selective dumping +int add(int a, int b) { return a + b; } + +int multiply(int a, int b) { return a * b; } + +int main_helper() { + std::cout << "Helper function" << std::endl; + return 42; +} + +int main_secondary() { return add(5, 3); } + +void other_function() { std::cout << "Other function" << std::endl; } + +int main() { + int result = add(10, 20); + result = multiply(result, 2); + main_helper(); + main_secondary(); + other_function(); + return result; +} diff --git a/bolt/test/X86/double-jump.test b/bolt/test/X86/double-jump.test index 424747c..94b1578 100644 --- a/bolt/test/X86/double-jump.test +++ b/bolt/test/X86/double-jump.test @@ -1,15 +1,11 @@ ## Test the double jump removal peephole. -## This test has commands that rely on shell capabilities that won't execute -## correctly on Windows e.g. subshell execution -REQUIRES: shell - RUN: %clangxx %cxxflags %p/Inputs/double_jump.cpp -o %t.exe -RUN: (llvm-bolt %t.exe --peepholes=double-jumps \ -RUN: --eliminate-unreachable -o %t 2>&1 \ -RUN: && llvm-objdump -d %t --print-imm-hex --no-show-raw-insn) | FileCheck %s +RUN: llvm-bolt %t.exe --peepholes=double-jumps \ +RUN: --eliminate-unreachable -o %t | FileCheck --check-prefix CHECK-BOLT %s +RUN: llvm-objdump -d %t --print-imm-hex --no-show-raw-insn | FileCheck %s -CHECK: BOLT-INFO: Peephole: 1 double jumps patched. +CHECK-BOLT: BOLT-INFO: Peephole: 1 double jumps patched. CHECK: <_Z3foom>: CHECK-NEXT: pushq %rbp diff --git a/bolt/test/X86/dwarf5-debug-line-print.s b/bolt/test/X86/dwarf5-debug-line-print.s new file mode 100644 index 0000000..b0a5bab --- /dev/null +++ b/bolt/test/X86/dwarf5-debug-line-print.s @@ -0,0 +1,148 @@ +# REQUIRES: system-linux + +## Check that BOLT correctly prints debug line comments for DWARF-5. + + +# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %s -o %t1.o +# RUN: %clang %cflags -dwarf-5 %t1.o -o %t.exe -Wl,-q +# RUN: llvm-bolt %t.exe --update-debug-sections --print-debug-info \ +# RUN: --print-after-lowering -o %t.bolt | FileCheck %s + +# CHECK: xorq %rdi, %rdi # debug line main.c:2:5 + +# __attribute__((naked)) void _start() { +# __asm__( +# "xor %rdi, %rdi\n" // exit code 0 +# "mov $60, %rax\n" // syscall number for exit +# "syscall\n" +# ); +# } + + .file "main.c" + .text + .globl _start # -- Begin function _start + .p2align 4 + .type _start,@function +_start: # @_start +.Lfunc_begin0: + .file 0 "/home/gpastukhov/tmp2" "main.c" md5 0x94c0e54a615c2a21415ddb904991abd8 + .cfi_startproc +# %bb.0: + .loc 0 2 5 prologue_end # main.c:2:5 + #APP + xorq %rdi, %rdi + movq $60, %rax + syscall + + #NO_APP +.Ltmp0: +.Lfunc_end0: + .size _start, .Lfunc_end0-_start + .cfi_endproc + # -- End function + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 37 # DW_FORM_strx1 + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 114 # DW_AT_str_offsets_base + .byte 23 # DW_FORM_sec_offset + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 37 # DW_FORM_strx1 + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 115 # DW_AT_addr_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 0 # DW_CHILDREN_no + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 1 # Abbrev [1] 0xc:0x23 DW_TAG_compile_unit + .byte 0 # DW_AT_producer + .short 29 # DW_AT_language + .byte 1 # DW_AT_name + .long .Lstr_offsets_base0 # DW_AT_str_offsets_base + .long .Lline_table_start0 # DW_AT_stmt_list + .byte 2 # DW_AT_comp_dir + .byte 0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .long .Laddr_table_base0 # DW_AT_addr_base + .byte 2 # Abbrev [2] 0x23:0xb DW_TAG_subprogram + .byte 0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 87 + .byte 3 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + # DW_AT_external + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + .section .debug_str_offsets,"",@progbits + .long 20 # Length of String Offsets Set + .short 5 + .short 0 +.Lstr_offsets_base0: + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 20.1.8 (CentOS 20.1.8-1.el9)" # string offset=0 +.Linfo_string1: + .asciz "main.c" # string offset=43 +.Linfo_string2: + .asciz "/home/gpastukhov/tmp2" # string offset=50 +.Linfo_string3: + .asciz "_start" # string offset=72 + .section .debug_str_offsets,"",@progbits + .long .Linfo_string0 + .long .Linfo_string1 + .long .Linfo_string2 + .long .Linfo_string3 + .section .debug_addr,"",@progbits + .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution +.Ldebug_addr_start0: + .short 5 # DWARF version number + .byte 8 # Address size + .byte 0 # Segment selector size +.Laddr_table_base0: + .quad .Lfunc_begin0 +.Ldebug_addr_end0: + .ident "clang version 20.1.8 (CentOS 20.1.8-1.el9)" + .section ".note.GNU-stack","",@progbits + .addrsig + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/bolt/test/X86/dwarf5-two-cus.s b/bolt/test/X86/dwarf5-two-cus.s new file mode 100644 index 0000000..8b5afb4 --- /dev/null +++ b/bolt/test/X86/dwarf5-two-cus.s @@ -0,0 +1,251 @@ +## Check that BOLT correctly handles two CUs with DWARF-5 debug info (does not crash), when +## a function from one CU is forced to be inlined into another. + +# REQUIRES: system-linux + +# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %s -o %t-main.o +# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5_helper.s -o %thelper.o +# RUN: %clang %cflags -gdwarf-5 -Wl,-q %t-main.o %thelper.o -o %t.exe +# RUN: llvm-bolt %t.exe --update-debug-sections --force-inline=_Z3fooi \ +# RUN: -o %t.bolt | FileCheck %s + +# CHECK-NOT: BOLT-ERROR +# CHECK-NOT: BOLT-WARNING +# CHECK: BOLT-INFO: inlined {{[0-9]+}} calls at {{[1-9][0-9]*}} call sites + +# extern int foo(int); +# int main(){ +# foo(10); +# return 0; +# } + .file "main.cpp" + .text + .globl main # -- Begin function main + .p2align 4 + .type main,@function +main: # @main +.Lfunc_begin0: + .file 0 "/home/gpastukhov/tmp2" "main.cpp" md5 0x5c930f5d3a068b09fd18ece59c58bdcf + .loc 0 2 0 # main.cpp:2:0 + .cfi_startproc +# %bb.0: + pushq %rax + .cfi_def_cfa_offset 16 +.Ltmp0: + .loc 0 3 5 prologue_end # main.cpp:3:5 + movl $10, %edi + callq _Z3fooi +.Ltmp1: + .loc 0 4 5 # main.cpp:4:5 + xorl %eax, %eax + .loc 0 4 5 epilogue_begin is_stmt 0 # main.cpp:4:5 + popq %rcx + .cfi_def_cfa_offset 8 + retq +.Ltmp2: +.Lfunc_end0: + .size main, .Lfunc_end0-main + .cfi_endproc + # -- End function + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 37 # DW_FORM_strx1 + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 114 # DW_AT_str_offsets_base + .byte 23 # DW_FORM_sec_offset + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 37 # DW_FORM_strx1 + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 115 # DW_AT_addr_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 122 # DW_AT_call_all_calls + .byte 25 # DW_FORM_flag_present + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 72 # DW_TAG_call_site + .byte 1 # DW_CHILDREN_yes + .byte 127 # DW_AT_call_origin + .byte 19 # DW_FORM_ref4 + .byte 125 # DW_AT_call_return_pc + .byte 27 # DW_FORM_addrx + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 73 # DW_TAG_call_site_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 126 # DW_AT_call_value + .byte 24 # DW_FORM_exprloc + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 5 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 110 # DW_AT_linkage_name + .byte 37 # DW_FORM_strx1 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 60 # DW_AT_declaration + .byte 25 # DW_FORM_flag_present + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 6 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 7 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 1 # Abbrev [1] 0xc:0x47 DW_TAG_compile_unit + .byte 0 # DW_AT_producer + .short 33 # DW_AT_language + .byte 1 # DW_AT_name + .long .Lstr_offsets_base0 # DW_AT_str_offsets_base + .long .Lline_table_start0 # DW_AT_stmt_list + .byte 2 # DW_AT_comp_dir + .byte 0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .long .Laddr_table_base0 # DW_AT_addr_base + .byte 2 # Abbrev [2] 0x23:0x1c DW_TAG_subprogram + .byte 0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 87 + # DW_AT_call_all_calls + .byte 6 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 2 # DW_AT_decl_line + .long 78 # DW_AT_type + # DW_AT_external + .byte 3 # Abbrev [3] 0x32:0xc DW_TAG_call_site + .long 63 # DW_AT_call_origin + .byte 1 # DW_AT_call_return_pc + .byte 4 # Abbrev [4] 0x38:0x5 DW_TAG_call_site_parameter + .byte 1 # DW_AT_location + .byte 85 + .byte 1 # DW_AT_call_value + .byte 58 + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 5 # Abbrev [5] 0x3f:0xf DW_TAG_subprogram + .byte 3 # DW_AT_linkage_name + .byte 4 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 78 # DW_AT_type + # DW_AT_declaration + # DW_AT_external + .byte 6 # Abbrev [6] 0x48:0x5 DW_TAG_formal_parameter + .long 78 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 7 # Abbrev [7] 0x4e:0x4 DW_TAG_base_type + .byte 5 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + .section .debug_str_offsets,"",@progbits + .long 32 # Length of String Offsets Set + .short 5 + .short 0 +.Lstr_offsets_base0: + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 20.1.8 (CentOS 20.1.8-1.el9)" # string offset=0 +.Linfo_string1: + .asciz "main.cpp" # string offset=43 +.Linfo_string2: + .asciz "/home/gpastukhov/tmp2" # string offset=52 +.Linfo_string3: + .asciz "_Z3fooi" # string offset=74 +.Linfo_string4: + .asciz "foo" # string offset=82 +.Linfo_string5: + .asciz "int" # string offset=86 +.Linfo_string6: + .asciz "main" # string offset=90 + .section .debug_str_offsets,"",@progbits + .long .Linfo_string0 + .long .Linfo_string1 + .long .Linfo_string2 + .long .Linfo_string3 + .long .Linfo_string4 + .long .Linfo_string5 + .long .Linfo_string6 + .section .debug_addr,"",@progbits + .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution +.Ldebug_addr_start0: + .short 5 # DWARF version number + .byte 8 # Address size + .byte 0 # Segment selector size +.Laddr_table_base0: + .quad .Lfunc_begin0 + .quad .Ltmp1 +.Ldebug_addr_end0: + .ident "clang version 20.1.8 (CentOS 20.1.8-1.el9)" + .section ".note.GNU-stack","",@progbits + .addrsig + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/bolt/test/X86/hlt-terminator.s b/bolt/test/X86/hlt-terminator.s new file mode 100644 index 0000000..3f67182 --- /dev/null +++ b/bolt/test/X86/hlt-terminator.s @@ -0,0 +1,24 @@ +## Check that HLT instruction is handled differently depending on the flags. +## It's a terminator in the user-level code, but the execution can resume in +## ring 0. + +# RUN: %clang %cflags %s -static -o %t.exe -nostdlib +# RUN: llvm-bolt %t.exe --print-cfg --print-only=main --terminal-x86-hlt=0 \ +# RUN: -o %t.ring0 2>&1 | FileCheck %s --check-prefix=CHECK-RING0 +# RUN: llvm-bolt %t.exe --print-cfg --print-only=main \ +# RUN: -o %t.ring3 2>&1 | FileCheck %s --check-prefix=CHECK-RING3 +# RUN: llvm-objdump -d %t.ring0 --print-imm-hex | FileCheck %s --check-prefix=CHECK-BIN + +# CHECK-RING0: BB Count : 1 +# CHECK-RING3: BB Count : 2 + +# CHECK-BIN: <main>: +# CHECK-BIN-NEXT: f4 hlt +# CHECK-BIN-NEXT: c3 retq + +.global main + .type main, %function +main: + hlt + retq +.size main, .-main diff --git a/bolt/test/X86/jmp-optimization.test b/bolt/test/X86/jmp-optimization.test index f969578..847c4822 100644 --- a/bolt/test/X86/jmp-optimization.test +++ b/bolt/test/X86/jmp-optimization.test @@ -1,10 +1,7 @@ ## Tests the optimization of functions that just do a tail call in the beginning. -## This test has commands that rely on shell capabilities that won't execute -## correctly on Windows e.g. unsupported parameter expansion -REQUIRES: shell - -RUN: %clangxx %cxxflags -O2 %S/Inputs/jmp_opt{,2,3}.cpp -o %t +RUN: %clangxx %cxxflags -O2 %S/Inputs/jmp_opt.cpp %S/Inputs/jmp_opt2.cpp \ +RUN: %S/Inputs/jmp_opt3.cpp -o %t RUN: llvm-bolt -inline-small-functions %t -o %t.bolt RUN: llvm-objdump -d %t.bolt --print-imm-hex | FileCheck %s diff --git a/bolt/test/X86/jump-table-ambiguous-unreachable.s b/bolt/test/X86/jump-table-ambiguous-unreachable.s new file mode 100644 index 0000000..eb87b96 --- /dev/null +++ b/bolt/test/X86/jump-table-ambiguous-unreachable.s @@ -0,0 +1,87 @@ +## Check that llvm-bolt correctly updates ambiguous jump table entries that +## can correspond to either builtin_unreachable() or could be a pointer to +## the next function. + +# REQUIRES: system-linux + +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o +# RUN: %clang %cflags %t.o -o %t.exe -no-pie -Wl,-q + +# RUN: llvm-bolt %t.exe --print-normalized --print-only=foo -o %t.out \ +# RUN: 2>&1 | FileCheck %s + + + + .text + .globl _start + .type _start, %function +_start: + .cfi_startproc + call foo + ret + .cfi_endproc + .size _start, .-_start + + .globl foo + .type foo, %function +foo: + .cfi_startproc +.LBB00: + movq 0x8(%rdi), %rdi + movzbl 0x1(%rdi), %eax +.LBB00_br: + jmpq *"JUMP_TABLE/foo.0"(,%rax,8) +# CHECK: jmpq {{.*}} # JUMPTABLE +# CHECK-NEXT: Successors: {{.*}}, {{.*}}, {{.*}}, {{.*}}, {{.*}} + +.Ltmp87085: + xorl %eax, %eax + retq + +.Ltmp87086: + cmpb $0x0, 0x8(%rdi) + setne %al + retq + +.Ltmp87088: + movb $0x1, %al + retq + +.Ltmp87087: + movzbl 0x14(%rdi), %eax + andb $0x2, %al + shrb %al + retq + + .cfi_endproc +.size foo, .-foo + + .globl bar + .type bar, %function +bar: + .cfi_startproc + ret + .cfi_endproc + .size bar, .-bar + +# Jump tables +.section .rodata + .global jump_table +jump_table: +"JUMP_TABLE/foo.0": + .quad bar + .quad .Ltmp87085 + .quad bar + .quad .Ltmp87086 + .quad .Ltmp87087 + .quad .LBB00 + .quad .Ltmp87088 + .quad bar + .quad .LBB00 + +# CHECK: Jump table {{.*}} for function foo +# CHECK-NEXT: 0x{{.*}} : bar +# CHECK-NEXT: 0x{{.*}} : +# CHECK-NEXT: 0x{{.*}} : bar +# CHECK-NEXT: 0x{{.*}} : +# CHECK-NEXT: 0x{{.*}} : diff --git a/bolt/test/X86/jump-table-icp.test b/bolt/test/X86/jump-table-icp.test index f147432..a095929 100644 --- a/bolt/test/X86/jump-table-icp.test +++ b/bolt/test/X86/jump-table-icp.test @@ -4,11 +4,7 @@ RUN: link_fdata %p/Inputs/jump_table_icp.s %t.o %t.fdata --nmtool llvm-nm RUN: llvm-strip --strip-unneeded %t.o RUN: %clang %cflags -no-pie %t.o -o %t.exe -Wl,-q -## This test has commands that rely on shell capabilities that won't execute -## correctly on Windows e.g. subshell execution -REQUIRES: shell - -RUN: (llvm-bolt %t.exe --data %t.fdata -o %t --relocs \ +RUN: llvm-bolt %t.exe --data %t.fdata -o %t --relocs \ RUN: --reorder-blocks=cache --split-functions --split-all-cold \ RUN: --use-gnu-stack --dyno-stats --indirect-call-promotion=jump-tables \ RUN: --print-icp -v=0 \ @@ -16,8 +12,8 @@ RUN: --enable-bat --print-cache-metrics \ RUN: --icp-jt-remaining-percent-threshold=10 \ RUN: --icp-jt-total-percent-threshold=2 \ RUN: --indirect-call-promotion-topn=1 \ -RUN: --icp-jump-tables-targets --align-functions-max-bytes=7 2>&1 && \ -RUN: llvm-objdump -d %t --print-imm-hex) | FileCheck %s +RUN: --icp-jump-tables-targets --align-functions-max-bytes=7 | FileCheck %s +RUN: llvm-objdump -d %t --print-imm-hex | FileCheck --check-prefix CHECK-ASM %s BOLT-INFO: ICP total indirect callsites = 0 BOLT-INFO: ICP total jump table callsites = 2 @@ -107,14 +103,14 @@ CHECK-NEXT: Exec Count : 140 CHECK: Predecessors: .Ltmp{{.*}}, .LFT{{.*}} CHECK: Successors: .Ltmp{{.*}} (mispreds: 0, count: 98) -CHECK: <_Z3inci>: -CHECK: movq 0x{{.*}}(,%rax,8), %rax -CHECK-NEXT: cmpq $0x{{.*}}, %rax -CHECK-NEXT: je {{.*}} <_Z3inci+0x{{.*}}> -CHECK-NEXT: jmpq *%rax - -CHECK: <_Z7inc_dupi>: -CHECK: movq 0x{{.*}}(,%rax,8), %rax -CHECK-NEXT: cmpq $0x{{.*}}, %rax -CHECK-NEXT: je {{.*}} <_Z7inc_dupi+0x{{.*}}> -CHECK-NEXT: jmpq *%rax +CHECK-ASM: <_Z3inci>: +CHECK-ASM: movq 0x{{.*}}(,%rax,8), %rax +CHECK-ASM-NEXT: cmpq $0x{{.*}}, %rax +CHECK-ASM-NEXT: je {{.*}} <_Z3inci+0x{{.*}}> +CHECK-ASM-NEXT: jmpq *%rax + +CHECK-ASM: <_Z7inc_dupi>: +CHECK-ASM: movq 0x{{.*}}(,%rax,8), %rax +CHECK-ASM-NEXT: cmpq $0x{{.*}}, %rax +CHECK-ASM-NEXT: je {{.*}} <_Z7inc_dupi+0x{{.*}}> +CHECK-ASM-NEXT: jmpq *%rax diff --git a/bolt/test/X86/shrinkwrapping.test b/bolt/test/X86/shrinkwrapping.test index 521b456..5470b5d 100644 --- a/bolt/test/X86/shrinkwrapping.test +++ b/bolt/test/X86/shrinkwrapping.test @@ -2,23 +2,21 @@ ## shrink-wrapping when optimizing a function without ## frame pointers. -## This test has commands that rely on shell capabilities that won't execute -## correctly on Windows e.g. subshell execution to capture command output. -REQUIRES: shell - RUN: %clangxx %cxxflags -no-pie %S/Inputs/exc4sw.S -o %t.exe -Wl,-q RUN: llvm-bolt %t.exe -o %t --relocs --frame-opt=all \ RUN: --print-only=main --print-cfg \ RUN: --data=%p/Inputs/exc4sw.fdata --reorder-blocks=cache 2>&1 | \ RUN: FileCheck %s --check-prefix=CHECK-BOLT -RUN: llvm-objdump --dwarf=frames %t.exe | grep -A20 -e \ -RUN: `llvm-nm --numeric-sort %t.exe | grep main | tail -n 1 | \ -RUN: cut -f1 -d' ' | tail -c9` 2>&1 | FileCheck %s --check-prefix=CHECK-INPUT +RUN: llvm-nm --numeric-sort %t.exe | grep main | tail -n 1 | \ +RUN: cut -f1 -d' ' | tail -c9 > %t.input_address +RUN: llvm-objdump --dwarf=frames %t.exe | grep -A20 -f %t.input_address \ +RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-INPUT -RUN: llvm-objdump --dwarf=frames %t | grep -A20 -e \ -RUN: `llvm-nm --numeric-sort %t | grep main | tail -n 1 | cut -f1 -d' ' | \ -RUN: tail -c9` 2>&1 | FileCheck %s --check-prefix=CHECK-OUTPUT +RUN: llvm-nm --numeric-sort %t | grep main | tail -n 1 | \ +RUN: cut -f1 -d' ' | tail -c9 > %t.output_address +RUN: llvm-objdump --dwarf=frames %t | grep -A20 -f %t.output_address \ +RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-OUTPUT CHECK-BOLT: Extern Entry Count: 100 CHECK-BOLT: Shrink wrapping moved 2 spills inserting load/stores and 0 spills inserting push/pops diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-address-checks.s b/bolt/test/binary-analysis/AArch64/gs-pauth-address-checks.s index 3f982dd..74f2761 100644 --- a/bolt/test/binary-analysis/AArch64/gs-pauth-address-checks.s +++ b/bolt/test/binary-analysis/AArch64/gs-pauth-address-checks.s @@ -31,7 +31,7 @@ resign_xpaci_good: xpaci x16 cmp x0, x16 b.eq 1f - brk 0x1234 + brk 0xc471 1: pacia x0, x2 ret @@ -46,7 +46,7 @@ resign_xpacd_good: xpacd x16 cmp x0, x16 b.eq 1f - brk 0x1234 + brk 0xc473 1: pacda x0, x2 ret @@ -117,7 +117,7 @@ resign_xpaci_unrelated_auth_and_check: xpaci x16 cmp x0, x16 b.eq 1f - brk 0x1234 + brk 0xc471 1: pacia x10, x2 ret @@ -139,7 +139,7 @@ resign_xpaci_wrong_pattern_1: xpaci x16 cmp x0, x16 b.eq 1f - brk 0x1234 + brk 0xc471 1: pacia x0, x2 ret @@ -157,7 +157,7 @@ resign_xpaci_wrong_pattern_2: xpaci x0 // x0 instead of x16 cmp x0, x16 b.eq 1f - brk 0x1234 + brk 0xc471 1: pacia x0, x2 ret @@ -174,7 +174,7 @@ resign_xpaci_wrong_pattern_3: xpaci x16 cmp x16, x16 // x16 instead of x0 b.eq 1f - brk 0x1234 + brk 0xc471 1: pacia x0, x2 ret @@ -191,7 +191,7 @@ resign_xpaci_wrong_pattern_4: xpaci x16 cmp x0, x0 // x0 instead of x16 b.eq 1f - brk 0x1234 + brk 0xc471 1: pacia x0, x2 ret @@ -208,7 +208,7 @@ resign_xpaci_wrong_pattern_5: mov x16, x16 // replace xpaci with a no-op instruction cmp x0, x16 b.eq 1f - brk 0x1234 + brk 0xc471 1: pacia x0, x2 ret @@ -228,7 +228,7 @@ resign_xpaclri_good: xpaclri cmp x30, x16 b.eq 1f - brk 0x1234 + brk 0xc471 1: pacia x30, x2 @@ -246,7 +246,7 @@ xpaclri_check_keeps_lr_safe: xpaclri // clobbers LR cmp x30, x16 b.eq 1f - brk 0x1234 // marks LR as trusted and safe-to-dereference + brk 0xc471 // marks LR as trusted and safe-to-dereference 1: ret // not reporting non-protected return .size xpaclri_check_keeps_lr_safe, .-xpaclri_check_keeps_lr_safe @@ -265,7 +265,7 @@ xpaclri_check_requires_safe_lr: xpaclri cmp x30, x16 b.eq 1f - brk 0x1234 + brk 0xc471 1: ret .size xpaclri_check_requires_safe_lr, .-xpaclri_check_requires_safe_lr @@ -283,7 +283,7 @@ resign_xpaclri_wrong_reg: xpaclri // ... but xpaclri still operates on x30 cmp x20, x16 b.eq 1f - brk 0x1234 + brk 0xc471 1: pacia x20, x2 @@ -303,7 +303,7 @@ resign_checked_not_authenticated: xpaci x16 cmp x0, x16 b.eq 1f - brk 0x1234 + brk 0xc471 1: pacia x0, x2 ret @@ -323,7 +323,7 @@ resign_checked_before_authenticated: xpaci x16 cmp x0, x16 b.eq 1f - brk 0x1234 + brk 0xc471 1: autib x0, x1 pacia x0, x2 @@ -339,7 +339,7 @@ resign_high_bits_tbz_good: autib x0, x1 eor x16, x0, x0, lsl #1 tbz x16, #62, 1f - brk 0x1234 + brk 0xc471 1: pacia x0, x2 ret @@ -378,7 +378,7 @@ resign_high_bits_tbz_wrong_bit: autib x0, x1 eor x16, x0, x0, lsl #1 tbz x16, #63, 1f - brk 0x1234 + brk 0xc471 1: pacia x0, x2 ret @@ -393,7 +393,7 @@ resign_high_bits_tbz_wrong_shift_amount: autib x0, x1 eor x16, x0, x0, lsl #2 tbz x16, #62, 1f - brk 0x1234 + brk 0xc471 1: pacia x0, x2 ret @@ -408,7 +408,7 @@ resign_high_bits_tbz_wrong_shift_type: autib x0, x1 eor x16, x0, x0, lsr #1 tbz x16, #62, 1f - brk 0x1234 + brk 0xc471 1: pacia x0, x2 ret @@ -423,7 +423,7 @@ resign_high_bits_tbz_wrong_pattern_1: autib x0, x1 eor x16, x0, x0, lsl #1 tbz x17, #62, 1f - brk 0x1234 + brk 0xc471 1: pacia x0, x2 ret @@ -438,7 +438,7 @@ resign_high_bits_tbz_wrong_pattern_2: autib x0, x1 eor x16, x10, x0, lsl #1 tbz x16, #62, 1f - brk 0x1234 + brk 0xc471 1: pacia x0, x2 ret @@ -453,7 +453,7 @@ resign_high_bits_tbz_wrong_pattern_3: autib x0, x1 eor x16, x0, x10, lsl #1 tbz x16, #62, 1f - brk 0x1234 + brk 0xc471 1: pacia x0, x2 ret @@ -648,7 +648,7 @@ many_checked_regs: xpacd x16 // ... cmp x2, x16 // ... b.eq 2f // end of basic block - brk 0x1234 + brk 0xc473 2: pacdza x0 pacdza x1 diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-authentication-oracles.s b/bolt/test/binary-analysis/AArch64/gs-pauth-authentication-oracles.s index c314bc7..f44ba21 100644 --- a/bolt/test/binary-analysis/AArch64/gs-pauth-authentication-oracles.s +++ b/bolt/test/binary-analysis/AArch64/gs-pauth-authentication-oracles.s @@ -79,7 +79,7 @@ good_explicit_check: autia x0, x1 eor x16, x0, x0, lsl #1 tbz x16, #62, 1f - brk 0x1234 + brk 0xc470 1: ret .size good_explicit_check, .-good_explicit_check @@ -373,7 +373,7 @@ good_explicit_check_multi_bb: 1: eor x16, x0, x0, lsl #1 tbz x16, #62, 2f - brk 0x1234 + brk 0xc470 2: cbz x1, 3f nop @@ -685,8 +685,7 @@ good_address_arith_nocfg: .globl good_explicit_check_unrelated_reg .type good_explicit_check_unrelated_reg,@function good_explicit_check_unrelated_reg: -// CHECK-LABEL: GS-PAUTH: authentication oracle found in function good_explicit_check_unrelated_reg, basic block {{[^,]+}}, at address - // FIXME: The below instruction is not an authentication oracle +// CHECK-NOT: good_explicit_check_unrelated_reg autia x2, x3 // One of possible execution paths after this instruction // ends at BRK below, thus BRK used as a trap instruction // should formally "check everything" not to introduce @@ -694,7 +693,7 @@ good_explicit_check_unrelated_reg: autia x0, x1 eor x16, x0, x0, lsl #1 tbz x16, #62, 1f - brk 0x1234 + brk 0xc470 1: ldr x4, [x2] // Right before this instruction X2 is checked - this // should be propagated to the basic block ending with diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-signing-oracles.s b/bolt/test/binary-analysis/AArch64/gs-pauth-signing-oracles.s index 3a4d383..4d4bb7b 100644 --- a/bolt/test/binary-analysis/AArch64/gs-pauth-signing-oracles.s +++ b/bolt/test/binary-analysis/AArch64/gs-pauth-signing-oracles.s @@ -57,7 +57,7 @@ good_sign_auted_checked_brk: autda x0, x2 eor x16, x0, x0, lsl #1 tbz x16, #62, 1f - brk 0x1234 + brk 0xc472 1: pacda x0, x1 ret @@ -351,7 +351,7 @@ good_sign_auted_checked_brk_multi_bb: 1: eor x16, x0, x0, lsl #1 tbz x16, #62, 2f - brk 0x1234 + brk 0xc472 2: cbz x4, 3f nop @@ -705,7 +705,7 @@ good_resign_with_increment_brk: add x0, x0, #8 eor x16, x0, x0, lsl #1 tbz x16, #62, 1f - brk 0x1234 + brk 0xc472 1: mov x2, x0 pacda x2, x1 diff --git a/bolt/test/binary-analysis/AArch64/trap-instructions.s b/bolt/test/binary-analysis/AArch64/trap-instructions.s new file mode 100644 index 0000000..7810b2d --- /dev/null +++ b/bolt/test/binary-analysis/AArch64/trap-instructions.s @@ -0,0 +1,213 @@ +// RUN: %clang %cflags -march=armv8.3-a %s -o %t.exe -Wl,--emit-relocs +// RUN: llvm-bolt-binary-analysis --scanners=pauth %t.exe 2>&1 | FileCheck %s + +// Test what instructions can be used to terminate the program abnormally +// on security violation. +// +// All test cases have the same structure: +// +// cbz x0, 1f // [a], ensures [c] is never reported as unreachable +// autia x2, x3 +// cbz x1, 2f // [b] +// [instruction under test] +// 1: +// ret // [c] +// 2: +// ldr x0, [x2] +// ret +// +// This is to handle three possible cases: the instruction under test may be +// considered by BOLT as +// * trapping (and thus no-return): after being authenticated, x2 is ether +// checked by LDR (if [b] is taken) or the program is terminated +// immediately without leaking x2 (if [b] falls through to the trapping +// instruction under test). Nothing is reported. +// * non-trapping, but no-return (such as calling abort()): x2 is leaked if [b] +// falls through. Authentication oracle is reported. +// * non-trapping and falling-through (i.e. a regular instruction): +// x2 is leaked by [c]. Authentication oracle is reported. + + .text + + .globl brk_key_ia + .type brk_key_ia,@function +brk_key_ia: +// CHECK-NOT: brk_key_ia + cbz x0, 1f + autia x2, x3 + cbz x1, 2f + brk 0xc470 +1: + ret +2: + ldr x0, [x2] + ret + .size brk_key_ia, .-brk_key_ia + + .globl brk_key_ib + .type brk_key_ib,@function +brk_key_ib: +// CHECK-NOT: brk_key_ib + cbz x0, 1f + autia x2, x3 + cbz x1, 2f + brk 0xc471 +1: + ret +2: + ldr x0, [x2] + ret + .size brk_key_ib, .-brk_key_ib + + .globl brk_key_da + .type brk_key_da,@function +brk_key_da: +// CHECK-NOT: brk_key_da + cbz x0, 1f + autia x2, x3 + cbz x1, 2f + brk 0xc472 +1: + ret +2: + ldr x0, [x2] + ret + .size brk_key_da, .-brk_key_da + + .globl brk_key_db + .type brk_key_db,@function +brk_key_db: +// CHECK-NOT: brk_key_db + cbz x0, 1f + autia x2, x3 + cbz x1, 2f + brk 0xc473 +1: + ret +2: + ldr x0, [x2] + ret + .size brk_key_db, .-brk_key_db + +// The immediate operand of BRK instruction may indicate whether the instruction +// is intended to be a non-recoverable trap: for example, for this code +// +// int test_trap(void) { +// __builtin_trap(); +// return 42; +// } +// int test_debugtrap(void) { +// __builtin_debugtrap(); +// return 42; +// } +// +// Clang produces the following assembly: +// +// test_trap: +// brk #0x1 +// test_debugtrap: +// brk #0xf000 +// mov w0, #42 +// ret +// +// In GCC, __builtin_trap() uses "brk 0x3e8" (i.e. decimal 1000) and +// __builtin_debugtrap() is not supported. +// +// At the time of writing these test cases, any BRK instruction is considered +// no-return by BOLT, thus it ends its basic block and prevents falling through +// to the next BB. +// FIXME: Make BOLT handle __builtin_debugtrap() properly from the CFG point +// of view. + + .globl brk_gcc_builtin_trap + .type brk_gcc_builtin_trap,@function +brk_gcc_builtin_trap: +// CHECK-NOT: brk_gcc_builtin_trap + cbz x0, 1f + autia x2, x3 + cbz x1, 2f + brk 0x3e8 // __builtin_trap() +1: + ret +2: + ldr x0, [x2] + ret + .size brk_gcc_builtin_trap, .-brk_gcc_builtin_trap + + .globl brk_clang_builtin_trap + .type brk_clang_builtin_trap,@function +brk_clang_builtin_trap: +// CHECK-NOT: brk_clang_builtin_trap + cbz x0, 1f + autia x2, x3 + cbz x1, 2f + brk 0x1 // __builtin_trap() +1: + ret +2: + ldr x0, [x2] + ret + .size brk_clang_builtin_trap, .-brk_clang_builtin_trap + + .globl brk_clang_builtin_debugtrap + .type brk_clang_builtin_debugtrap,@function +brk_clang_builtin_debugtrap: +// CHECK-LABEL: GS-PAUTH: authentication oracle found in function brk_clang_builtin_debugtrap, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x2, x3 +// CHECK-NEXT: The 0 instructions that leak the affected registers are: + cbz x0, 1f + autia x2, x3 + cbz x1, 2f + brk 0xf000 // __builtin_debugtrap() +1: + ret +2: + ldr x0, [x2] + ret + .size brk_clang_builtin_debugtrap, .-brk_clang_builtin_debugtrap + +// Conservatively assume BRK with an unknown immediate operand as not suitable +// for terminating the program on security violation. + .globl brk_unknown_imm + .type brk_unknown_imm,@function +brk_unknown_imm: +// CHECK-LABEL: GS-PAUTH: authentication oracle found in function brk_unknown_imm, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x2, x3 +// CHECK-NEXT: The 0 instructions that leak the affected registers are: + cbz x0, 1f + autia x2, x3 + cbz x1, 2f + brk 0x3572 +1: + ret +2: + ldr x0, [x2] + ret + .size brk_unknown_imm, .-brk_unknown_imm + +// Conservatively assume calling the abort() function may be an unsafe way to +// terminate the program, as there is some amount of instructions that would +// be executed when the program state is already tampered with. + .globl call_abort_fn + .type call_abort_fn,@function +call_abort_fn: +// CHECK-LABEL: GS-PAUTH: authentication oracle found in function call_abort_fn, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x2, x3 +// CHECK-NEXT: The 0 instructions that leak the affected registers are: + cbz x0, 1f + autia x2, x3 + cbz x1, 2f + b abort // a no-return tail call to abort() +1: + ret +2: + ldr x0, [x2] + ret + .size call_abort_fn, .-call_abort_fn + + .globl main + .type main,@function +main: + mov x0, 0 + ret + .size main, .-main diff --git a/bolt/test/dump-dot-func.test b/bolt/test/dump-dot-func.test new file mode 100644 index 0000000..f05bfc1 --- /dev/null +++ b/bolt/test/dump-dot-func.test @@ -0,0 +1,52 @@ +# Test the --dump-dot-func option with multiple functions +# (includes tests for both mangled/unmangled names) + +RUN: %clangxx %p/Inputs/multi-func.cpp -o %t.exe -Wl,-q + +# Test 1: --dump-dot-func with specific function name (mangled) +RUN: llvm-bolt %t.exe -o %t.bolt1 --dump-dot-func=_Z3addii -v=1 2>&1 | FileCheck %s --check-prefix=ADD + +# Test 2: --dump-dot-func with regex pattern (main.*) +RUN: llvm-bolt %t.exe -o %t.bolt2 --dump-dot-func="main.*" -v=1 2>&1 | FileCheck %s --check-prefix=MAIN-REGEX + +# Test 3: --dump-dot-func with multiple specific functions (mangled names) +RUN: llvm-bolt %t.exe -o %t.bolt3 --dump-dot-func=_Z3addii,_Z8multiplyii -v=1 2>&1 | FileCheck %s --check-prefix=MULTI + +# Test 4: No option specified should create no dot files +RUN: llvm-bolt %t.exe -o %t.bolt4 2>&1 | FileCheck %s --check-prefix=NONE + +# Test 5: --dump-dot-func with non-existent function +RUN: llvm-bolt %t.exe -o %t.bolt5 --dump-dot-func=nonexistent -v=1 2>&1 | FileCheck %s --check-prefix=NONEXISTENT + +# Test 6: Backward compatibility - --dump-dot-all should still work +RUN: llvm-bolt %t.exe -o %t.bolt6 --dump-dot-all -v=1 2>&1 | FileCheck %s --check-prefix=ALL + +# Test 7: Test with unmangled function name (main function) +RUN: llvm-bolt %t.exe -o %t.bolt7 --dump-dot-func=main -v=1 2>&1 | FileCheck %s --check-prefix=MAIN-UNMANGLED + +# Check that specific functions are dumped +ADD: BOLT-INFO: dumping CFG to _Z3addii-00_build-cfg.dot +ADD-NOT: BOLT-INFO: dumping CFG to main-00_build-cfg.dot +ADD-NOT: BOLT-INFO: dumping CFG to _Z8multiplyii-00_build-cfg.dot +ADD-NOT: BOLT-INFO: dumping CFG to _Z11main_helperv-00_build-cfg.dot + +MAIN-REGEX-DAG: BOLT-INFO: dumping CFG to main-00_build-cfg.dot +MAIN-REGEX-NOT: BOLT-INFO: dumping CFG to _Z3addii-00_build-cfg.dot +MAIN-REGEX-NOT: BOLT-INFO: dumping CFG to _Z8multiplyii-00_build-cfg.dot + +MULTI-DAG: BOLT-INFO: dumping CFG to _Z3addii-00_build-cfg.dot +MULTI-DAG: BOLT-INFO: dumping CFG to _Z8multiplyii-00_build-cfg.dot +MULTI-NOT: BOLT-INFO: dumping CFG to main-00_build-cfg.dot +MULTI-NOT: BOLT-INFO: dumping CFG to _Z11main_helperv-00_build-cfg.dot + +# Should be no dumping messages when no option is specified +NONE-NOT: BOLT-INFO: dumping CFG + +# Should be no dumping messages for non-existent function +NONEXISTENT-NOT: BOLT-INFO: dumping CFG + +ALL: BOLT-INFO: dumping CFG to main-00_build-cfg.dot + +MAIN-UNMANGLED: BOLT-INFO: dumping CFG to main-00_build-cfg.dot +MAIN-UNMANGLED-NOT: BOLT-INFO: dumping CFG to _Z3addii-00_build-cfg.dot +MAIN-UNMANGLED-NOT: BOLT-INFO: dumping CFG to _Z8multiplyii-00_build-cfg.dot
\ No newline at end of file diff --git a/bolt/test/lit.cfg.py b/bolt/test/lit.cfg.py index 0d05229..bef570b 100644 --- a/bolt/test/lit.cfg.py +++ b/bolt/test/lit.cfg.py @@ -18,11 +18,22 @@ from lit.llvm.subst import FindTool # name: The name of this test suite. config.name = "BOLT" +# TODO: Consolidate the logic for turning on the internal shell by default for all LLVM test suites. +# See https://github.com/llvm/llvm-project/issues/106636 for more details. +# +# We prefer the lit internal shell which provides a better user experience on failures +# and is faster unless the user explicitly disables it with LIT_USE_INTERNAL_SHELL=0 +# env var. +use_lit_shell = True +lit_shell_env = os.environ.get("LIT_USE_INTERNAL_SHELL") +if lit_shell_env: + use_lit_shell = lit.util.pythonize_bool(lit_shell_env) + # testFormat: The test format to use to interpret tests. # # For now we require '&&' between commands, until they get globally killed and # the test runner updated. -config.test_format = lit.formats.ShTest(not llvm_config.use_lit_shell) +config.test_format = lit.formats.ShTest(execute_external=not use_lit_shell) # suffixes: A list of file extensions to treat as test files. config.suffixes = [ diff --git a/bolt/test/perf2bolt/perf_test.test b/bolt/test/perf2bolt/perf_test.test index 434d4d2..08b3413 100644 --- a/bolt/test/perf2bolt/perf_test.test +++ b/bolt/test/perf2bolt/perf_test.test @@ -2,7 +2,7 @@ REQUIRES: system-linux, perf -RUN: %clang %S/Inputs/perf_test.c -fuse-ld=lld -Wl,--script=%S/Inputs/perf_test.lds -o %t +RUN: %clang %S/Inputs/perf_test.c -fuse-ld=lld -pie -Wl,--script=%S/Inputs/perf_test.lds -o %t RUN: perf record -Fmax -e cycles:u -o %t2 -- %t RUN: perf2bolt %t -p=%t2 -o %t3 -nl -ignore-build-id --show-density \ RUN: --heatmap %t.hm 2>&1 | FileCheck %s diff --git a/bolt/test/permission.test b/bolt/test/permission.test index f495e87..ecb51fc 100644 --- a/bolt/test/permission.test +++ b/bolt/test/permission.test @@ -1,13 +1,28 @@ # Ensure that the permissions of the optimized binary file comply with the # system's umask. -# This test performs a logical AND operation on the results of the `stat -c %a -# %t.bolt` and `umask` commands (both results are displayed in octal), and -# checks whether the result is equal to 0. -REQUIRES: shell, system-linux +# This test uses umask, which is Linux specific. +REQUIRES: system-linux -RUN: %clang %cflags %p/Inputs/hello.c -o %t -Wl,-q -RUN: llvm-bolt %t -o %t.bolt -RUN: echo $(( 8#$(stat -c %a %t.bolt) & 8#$(umask) )) | FileCheck %s +# RUN: rm -f %t +# RUN: touch %t +# RUN: chmod 0755 %t +# RUN: ls -l %t | cut -f 1 -d ' ' > %t.0755 +# RUN: chmod 0600 %t +# RUN: ls -l %t | cut -f 1 -d ' ' > %t.0600 +# RUN: chmod 0655 %t +# RUN: ls -l %t | cut -f 1 -d ' ' > %t.0655 -CHECK: 0 +RUN: %clang %cflags %p/Inputs/hello.c -o %t.exe -Wl,-q + +RUN: umask 0022 +RUN: llvm-bolt %t.exe -o %t1 +RUN: ls -l %t1 | cut -f 1 -d ' ' | cmp - %t.0755 + +RUN: umask 0177 +RUN: llvm-bolt %t.exe -o %t2 +RUN: ls -l %t2 | cut -f 1 -d ' ' | cmp - %t.0600 + +RUN: umask 0122 +RUN: llvm-bolt %t.exe -o %t3 +RUN: ls -l %t3 | cut -f 1 -d ' ' | cmp - %t.0655 diff --git a/bolt/test/runtime/X86/tail-duplication-constant-prop.s b/bolt/test/runtime/X86/tail-duplication-constant-prop.s index 863c6ff..c28c2f4 100644 --- a/bolt/test/runtime/X86/tail-duplication-constant-prop.s +++ b/bolt/test/runtime/X86/tail-duplication-constant-prop.s @@ -8,8 +8,8 @@ # RUN: --print-finalized \ # RUN: --tail-duplication=moderate --tail-duplication-minimum-offset=1 \ # RUN: --tail-duplication-const-copy-propagation=1 -o %t.out | FileCheck %s -# RUN: %t.exe; echo $? -# RUN: %t.out; echo $? +# RUN: not %t.exe +# RUN: not %t.out # FDATA: 1 main 14 1 main #.BB2# 0 10 # FDATA: 1 main 16 1 main #.BB2# 0 20 |