aboutsummaryrefslogtreecommitdiff
path: root/bolt
diff options
context:
space:
mode:
Diffstat (limited to 'bolt')
-rw-r--r--bolt/docs/CommandLineArgumentReference.md6
-rw-r--r--bolt/include/bolt/Core/BinaryFunction.h5
-rw-r--r--bolt/include/bolt/Core/MCPlusBuilder.h18
-rw-r--r--bolt/include/bolt/Rewrite/RewriteInstance.h2
-rw-r--r--bolt/include/bolt/Utils/CommandLineOpts.h9
-rw-r--r--bolt/lib/Core/BinaryBasicBlock.cpp13
-rw-r--r--bolt/lib/Core/BinaryContext.cpp20
-rw-r--r--bolt/lib/Core/BinaryFunction.cpp14
-rw-r--r--bolt/lib/Core/MCPlusBuilder.cpp14
-rw-r--r--bolt/lib/Passes/BinaryPasses.cpp2
-rw-r--r--bolt/lib/Passes/FrameOptimizer.cpp5
-rw-r--r--bolt/lib/Passes/IndirectCallPromotion.cpp5
-rw-r--r--bolt/lib/Passes/PAuthGadgetScanner.cpp13
-rw-r--r--bolt/lib/Rewrite/BinaryPassManager.cpp3
-rw-r--r--bolt/lib/Rewrite/RewriteInstance.cpp87
-rw-r--r--bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp33
-rw-r--r--bolt/lib/Target/X86/X86MCPlusBuilder.cpp4
-rw-r--r--bolt/test/AArch64/data-marker-invalidates-extra-entrypoint.s38
-rw-r--r--bolt/test/AArch64/unsupported-passes.test8
-rw-r--r--bolt/test/AArch64/validate-secondary-entry-point.s44
-rw-r--r--bolt/test/AArch64/veneer-lld-abs.s2
-rw-r--r--bolt/test/Inputs/multi-func.cpp24
-rw-r--r--bolt/test/X86/double-jump.test12
-rw-r--r--bolt/test/X86/dwarf5-debug-line-print.s148
-rw-r--r--bolt/test/X86/dwarf5-two-cus.s251
-rw-r--r--bolt/test/X86/hlt-terminator.s24
-rw-r--r--bolt/test/X86/jmp-optimization.test7
-rw-r--r--bolt/test/X86/jump-table-ambiguous-unreachable.s87
-rw-r--r--bolt/test/X86/jump-table-icp.test32
-rw-r--r--bolt/test/X86/shrinkwrapping.test18
-rw-r--r--bolt/test/binary-analysis/AArch64/gs-pauth-address-checks.s44
-rw-r--r--bolt/test/binary-analysis/AArch64/gs-pauth-authentication-oracles.s9
-rw-r--r--bolt/test/binary-analysis/AArch64/gs-pauth-signing-oracles.s6
-rw-r--r--bolt/test/binary-analysis/AArch64/trap-instructions.s213
-rw-r--r--bolt/test/dump-dot-func.test52
-rw-r--r--bolt/test/lit.cfg.py13
-rw-r--r--bolt/test/perf2bolt/perf_test.test2
-rw-r--r--bolt/test/permission.test31
-rw-r--r--bolt/test/runtime/X86/tail-duplication-constant-prop.s4
39 files changed, 1166 insertions, 156 deletions
diff --git a/bolt/docs/CommandLineArgumentReference.md b/bolt/docs/CommandLineArgumentReference.md
index f3881c9..d65cf39 100644
--- a/bolt/docs/CommandLineArgumentReference.md
+++ b/bolt/docs/CommandLineArgumentReference.md
@@ -138,6 +138,12 @@
Dump function CFGs to graphviz format after each stage;enable '-print-loops'
for color-coded blocks
+- `--dump-dot-func=<func1,func2,func3...>`
+
+ Dump function CFGs to graphviz format for specified functions only;
+ takes function name patterns (regex supported). Note: C++ function names
+ must be passed using their mangled names
+
- `--dump-linux-exceptions`
Dump Linux kernel exception table
diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index ae58052..b59926c 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -1196,11 +1196,6 @@ public:
return getSecondaryEntryPointSymbol(BB.getLabel());
}
- /// Remove a label from the secondary entry point map.
- void removeSymbolFromSecondaryEntryPointMap(const MCSymbol *Label) {
- SecondaryEntryPoints.erase(Label);
- }
-
/// Return true if the basic block is an entry point into the function
/// (either primary or secondary).
bool isEntryPoint(const BinaryBasicBlock &BB) const {
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index f902a8c..ae04891 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -718,6 +718,20 @@ public:
return false;
}
+ /// Returns true if Inst is a trap instruction.
+ ///
+ /// Tests if Inst is an instruction that immediately causes an abnormal
+ /// program termination, for example when a security violation is detected
+ /// by a compiler-inserted check.
+ ///
+ /// @note An implementation of this method should likely return false for
+ /// calls to library functions like abort(), as it is possible that the
+ /// execution state is partially attacker-controlled at this point.
+ virtual bool isTrap(const MCInst &Inst) const {
+ llvm_unreachable("not implemented");
+ return false;
+ }
+
virtual bool isBreakpoint(const MCInst &Inst) const {
llvm_unreachable("not implemented");
return false;
@@ -740,6 +754,10 @@ public:
return false;
}
+ /// Return true if the hlt instruction under the x86, otherwise, default to
+ /// false.
+ virtual bool isX86HLT(const MCInst &Inst) const { return false; }
+
/// Return the width, in bytes, of the memory access performed by \p Inst, if
/// this is a pop instruction. Return zero otherwise.
virtual int getPopSize(const MCInst &Inst) const {
diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h
index 91d62a7..19dcce8 100644
--- a/bolt/include/bolt/Rewrite/RewriteInstance.h
+++ b/bolt/include/bolt/Rewrite/RewriteInstance.h
@@ -241,7 +241,7 @@ private:
/// Adjust function sizes and set proper maximum size values after the whole
/// symbol table has been processed.
- void adjustFunctionBoundaries();
+ void adjustFunctionBoundaries(DenseMap<uint64_t, MarkerSymType> &MarkerSyms);
/// Make .eh_frame section relocatable.
void relocateEHFrameSection();
diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h
index a75b6bf..859d6f3 100644
--- a/bolt/include/bolt/Utils/CommandLineOpts.h
+++ b/bolt/include/bolt/Utils/CommandLineOpts.h
@@ -15,6 +15,12 @@
#include "llvm/Support/CommandLine.h"
+namespace llvm {
+namespace bolt {
+class BinaryFunction;
+}
+} // namespace llvm
+
namespace opts {
enum HeatmapModeKind {
@@ -100,6 +106,9 @@ extern llvm::cl::opt<unsigned> Verbosity;
/// Return true if we should process all functions in the binary.
bool processAllFunctions();
+/// Return true if we should dump dot graphs for the given function.
+bool shouldDumpDot(const llvm::bolt::BinaryFunction &Function);
+
enum GadgetScannerKind { GS_PACRET, GS_PAUTH, GS_ALL };
extern llvm::cl::bits<GadgetScannerKind> GadgetScannersToRun;
diff --git a/bolt/lib/Core/BinaryBasicBlock.cpp b/bolt/lib/Core/BinaryBasicBlock.cpp
index 311d5c1..eeab1ed 100644
--- a/bolt/lib/Core/BinaryBasicBlock.cpp
+++ b/bolt/lib/Core/BinaryBasicBlock.cpp
@@ -103,9 +103,18 @@ bool BinaryBasicBlock::validateSuccessorInvariants() {
Valid &= (Sym == Function->getFunctionEndLabel() ||
Sym == Function->getFunctionEndLabel(getFragmentNum()));
if (!Valid) {
- BC.errs() << "BOLT-WARNING: Jump table contains illegal entry: "
- << Sym->getName() << "\n";
+ const BinaryFunction *TargetBF = BC.getFunctionForSymbol(Sym);
+ if (TargetBF) {
+ // It's possible for another function to be in the jump table entry
+ // as a result of built-in unreachable.
+ Valid = true;
+ } else {
+ BC.errs() << "BOLT-WARNING: Jump table contains illegal entry: "
+ << Sym->getName() << "\n";
+ }
}
+ if (!Valid)
+ break;
}
}
} else {
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index 84f1853..dd0d041 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -1568,23 +1568,19 @@ unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID);
const DWARFDebugLine::LineTable *LineTable =
DwCtx->getLineTableForUnit(SrcUnit);
- const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
- LineTable->Prologue.FileNames;
- // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
+ const DWARFDebugLine::FileNameEntry &FileNameEntry =
+ LineTable->Prologue.getFileNameEntry(FileIndex);
+ // Dir indexes start at 1 and a dir index 0
// means empty dir.
- assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
- "FileIndex out of range for the compilation unit.");
StringRef Dir = "";
- if (FileNames[FileIndex - 1].DirIdx != 0) {
+ if (FileNameEntry.DirIdx != 0) {
if (std::optional<const char *> DirName = dwarf::toString(
- LineTable->Prologue
- .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) {
+ LineTable->Prologue.IncludeDirectories[FileNameEntry.DirIdx - 1])) {
Dir = *DirName;
}
}
StringRef FileName = "";
- if (std::optional<const char *> FName =
- dwarf::toString(FileNames[FileIndex - 1].Name))
+ if (std::optional<const char *> FName = dwarf::toString(FileNameEntry.Name))
FileName = *FName;
assert(FileName != "");
DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID);
@@ -1925,7 +1921,7 @@ static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction,
const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
StringRef FileName = "";
if (std::optional<const char *> FName =
- dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name))
+ dwarf::toString(LineTable->Prologue.getFileNameEntry(Row.File).Name))
FileName = *FName;
OS << " # debug line " << FileName << ":" << Row.Line;
if (Row.Column)
@@ -2517,7 +2513,7 @@ BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
// Clean-up the effect of the code emission.
for (const MCSymbol &Symbol : Assembler.symbols()) {
MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol);
- MutableSymbol->setUndefined();
+ MutableSymbol->setFragment(nullptr);
MutableSymbol->setIsRegistered(false);
}
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index eec68ff..6cac2d0 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1915,13 +1915,9 @@ void BinaryFunction::postProcessEntryPoints() {
continue;
// If we have grabbed a wrong code label which actually points to some
- // constant island inside the function, ignore this label and remove it
- // from the secondary entry point map.
- if (isStartOfConstantIsland(Offset)) {
- BC.SymbolToFunctionMap.erase(Label);
- removeSymbolFromSecondaryEntryPointMap(Label);
+ // constant island inside the function, ignore this label.
+ if (isStartOfConstantIsland(Offset))
continue;
- }
BC.errs() << "BOLT-WARNING: reference in the middle of instruction "
"detected in function "
@@ -1963,7 +1959,9 @@ void BinaryFunction::postProcessJumpTables() {
return EntryAddress == Parent->getAddress() + Parent->getSize();
});
if (IsBuiltinUnreachable) {
- MCSymbol *Label = getOrCreateLocalLabel(EntryAddress, true);
+ BinaryFunction *TargetBF = BC.getBinaryFunctionAtAddress(EntryAddress);
+ MCSymbol *Label = TargetBF ? TargetBF->getSymbol()
+ : getOrCreateLocalLabel(EntryAddress, true);
JT.Entries.push_back(Label);
continue;
}
@@ -3775,6 +3773,8 @@ MCSymbol *BinaryFunction::addEntryPointAtOffset(uint64_t Offset) {
assert(Offset && "cannot add primary entry point");
const uint64_t EntryPointAddress = getAddress() + Offset;
+ assert(!isInConstantIsland(EntryPointAddress) &&
+ "cannot add entry point that points to constant data");
MCSymbol *LocalSymbol = getOrCreateLocalLabel(EntryPointAddress);
MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(LocalSymbol);
diff --git a/bolt/lib/Core/MCPlusBuilder.cpp b/bolt/lib/Core/MCPlusBuilder.cpp
index fa8f4d1..7f962e1 100644
--- a/bolt/lib/Core/MCPlusBuilder.cpp
+++ b/bolt/lib/Core/MCPlusBuilder.cpp
@@ -31,6 +31,11 @@ using namespace MCPlus;
namespace opts {
cl::opt<bool>
+ TerminalHLT("terminal-x86-hlt",
+ cl::desc("Assume that execution stops at x86 HLT instruction"),
+ cl::init(true), cl::Hidden, cl::cat(BoltCategory));
+
+cl::opt<bool>
TerminalTrap("terminal-trap",
cl::desc("Assume that execution stops at trap instruction"),
cl::init(true), cl::Hidden, cl::cat(BoltCategory));
@@ -132,8 +137,13 @@ bool MCPlusBuilder::equals(const MCSpecifierExpr &A, const MCSpecifierExpr &B,
}
bool MCPlusBuilder::isTerminator(const MCInst &Inst) const {
- return Analysis->isTerminator(Inst) ||
- (opts::TerminalTrap && Info->get(Inst.getOpcode()).isTrap());
+ if (isX86HLT(Inst))
+ return opts::TerminalHLT;
+
+ if (Info->get(Inst.getOpcode()).isTrap())
+ return opts::TerminalTrap;
+
+ return Analysis->isTerminator(Inst);
}
void MCPlusBuilder::setTailCall(MCInst &Inst) const {
diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp
index 5d44e1a..d7f02b9 100644
--- a/bolt/lib/Passes/BinaryPasses.cpp
+++ b/bolt/lib/Passes/BinaryPasses.cpp
@@ -662,7 +662,7 @@ Error CleanMCState::runOnFunctions(BinaryContext &BC) {
if (S->isDefined()) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Symbol \"" << S->getName()
<< "\" is already defined\n");
- const_cast<MCSymbol *>(S)->setUndefined();
+ const_cast<MCSymbol *>(S)->setFragment(nullptr);
}
if (S->isRegistered()) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Symbol \"" << S->getName()
diff --git a/bolt/lib/Passes/FrameOptimizer.cpp b/bolt/lib/Passes/FrameOptimizer.cpp
index 81d4d93..b0b7207f 100644
--- a/bolt/lib/Passes/FrameOptimizer.cpp
+++ b/bolt/lib/Passes/FrameOptimizer.cpp
@@ -224,6 +224,11 @@ Error FrameOptimizerPass::runOnFunctions(BinaryContext &BC) {
if (opts::FrameOptimization == FOP_NONE)
return Error::success();
+ if (!BC.isX86()) {
+ BC.errs() << "BOLT-ERROR: " << getName() << " is supported only on X86\n";
+ exit(1);
+ }
+
std::unique_ptr<BinaryFunctionCallGraph> CG;
std::unique_ptr<FrameAnalysis> FA;
std::unique_ptr<RegAnalysis> RA;
diff --git a/bolt/lib/Passes/IndirectCallPromotion.cpp b/bolt/lib/Passes/IndirectCallPromotion.cpp
index 2b5a591..8a01cb9 100644
--- a/bolt/lib/Passes/IndirectCallPromotion.cpp
+++ b/bolt/lib/Passes/IndirectCallPromotion.cpp
@@ -261,10 +261,7 @@ IndirectCallPromotion::getCallTargets(BinaryBasicBlock &BB,
for (size_t I = Range.first; I < Range.second; ++I, JI += JIAdj) {
MCSymbol *Entry = JT->Entries[I];
const BinaryBasicBlock *ToBB = BF.getBasicBlockForLabel(Entry);
- assert(ToBB || Entry == BF.getFunctionEndLabel() ||
- Entry == BF.getFunctionEndLabel(FragmentNum::cold()));
- if (Entry == BF.getFunctionEndLabel() ||
- Entry == BF.getFunctionEndLabel(FragmentNum::cold()))
+ if (!ToBB)
continue;
const Location To(Entry);
const BinaryBasicBlock::BinaryBranchInfo &BI = BB.getBranchInfo(*ToBB);
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index f928dd4..65c84eb 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -1078,6 +1078,15 @@ protected:
dbgs() << ")\n";
});
+ // If this instruction terminates the program immediately, no
+ // authentication oracles are possible past this point.
+ if (BC.MIB->isTrap(Point)) {
+ LLVM_DEBUG({ traceInst(BC, "Trap instruction found", Point); });
+ DstState Next(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters());
+ Next.CannotEscapeUnchecked.set();
+ return Next;
+ }
+
// If this instruction is reachable by the analysis, a non-empty state will
// be propagated to it sooner or later. Until then, skip computeNext().
if (Cur.empty()) {
@@ -1185,8 +1194,8 @@ protected:
//
// A basic block without any successors, on the other hand, can be
// pessimistically initialized to everything-is-unsafe: this will naturally
- // handle both return and tail call instructions and is harmless for
- // internal indirect branch instructions (such as computed gotos).
+ // handle return, trap and tail call instructions. At the same time, it is
+ // harmless for internal indirect branch instructions, like computed gotos.
if (BB.succ_empty())
return createUnsafeState();
diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp
index 996d2e9..0ddb73f 100644
--- a/bolt/lib/Rewrite/BinaryPassManager.cpp
+++ b/bolt/lib/Rewrite/BinaryPassManager.cpp
@@ -52,6 +52,7 @@ namespace opts {
extern cl::opt<bool> PrintAll;
extern cl::opt<bool> PrintDynoStats;
extern cl::opt<bool> DumpDotAll;
+extern bool shouldDumpDot(const bolt::BinaryFunction &Function);
extern cl::opt<std::string> AsmDump;
extern cl::opt<bolt::PLTCall::OptType> PLT;
extern cl::opt<bolt::IdenticalCodeFolding::ICFLevel, false,
@@ -340,7 +341,7 @@ Error BinaryFunctionPassManager::runPasses() {
Function.print(BC.outs(), Message);
- if (opts::DumpDotAll)
+ if (opts::shouldDumpDot(Function))
Function.dumpGraphForPass(PassIdName);
}
}
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index fe4a23c..a6e4dbc 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -84,6 +84,7 @@ extern cl::opt<bool> KeepNops;
extern cl::opt<bool> Lite;
extern cl::list<std::string> ReorderData;
extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions;
+extern cl::opt<bool> TerminalHLT;
extern cl::opt<bool> TerminalTrap;
extern cl::opt<bool> TimeBuild;
extern cl::opt<bool> TimeRewrite;
@@ -114,6 +115,35 @@ cl::opt<bool> DumpDotAll(
"enable '-print-loops' for color-coded blocks"),
cl::Hidden, cl::cat(BoltCategory));
+cl::list<std::string> DumpDotFunc(
+ "dump-dot-func", cl::CommaSeparated,
+ cl::desc(
+ "dump function CFGs to graphviz format for specified functions only;"
+ "takes function name patterns (regex supported)"),
+ cl::value_desc("func1,func2,func3,..."), cl::Hidden, cl::cat(BoltCategory));
+
+bool shouldDumpDot(const bolt::BinaryFunction &Function) {
+ // If dump-dot-all is enabled, dump all functions
+ if (DumpDotAll)
+ return !Function.isIgnored();
+
+ // If no specific functions specified in dump-dot-func, don't dump any
+ if (DumpDotFunc.empty())
+ return false;
+
+ if (Function.isIgnored())
+ return false;
+
+ // Check if function matches any of the specified patterns
+ for (const std::string &Name : DumpDotFunc) {
+ if (Function.hasNameRegex(Name)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
static cl::list<std::string>
ForceFunctionNames("funcs",
cl::CommaSeparated,
@@ -880,14 +910,9 @@ void RewriteInstance::discoverFileObjects() {
// code section (see IHI0056B). $d identifies data contents.
// Compilers usually merge multiple data objects in a single $d-$x interval,
// but we need every data object to be marked with $d. Because of that we
- // create a vector of MarkerSyms with all locations of data objects.
+ // keep track of marker symbols with all locations of data objects.
- struct MarkerSym {
- uint64_t Address;
- MarkerSymType Type;
- };
-
- std::vector<MarkerSym> SortedMarkerSymbols;
+ DenseMap<uint64_t, MarkerSymType> MarkerSymbols;
auto addExtraDataMarkerPerSymbol = [&]() {
bool IsData = false;
uint64_t LastAddr = 0;
@@ -911,14 +936,14 @@ void RewriteInstance::discoverFileObjects() {
}
if (MarkerType != MarkerSymType::NONE) {
- SortedMarkerSymbols.push_back(MarkerSym{SymInfo.Address, MarkerType});
+ MarkerSymbols[SymInfo.Address] = MarkerType;
LastAddr = SymInfo.Address;
IsData = MarkerType == MarkerSymType::DATA;
continue;
}
if (IsData) {
- SortedMarkerSymbols.push_back({SymInfo.Address, MarkerSymType::DATA});
+ MarkerSymbols[SymInfo.Address] = MarkerSymType::DATA;
LastAddr = SymInfo.Address;
}
}
@@ -1283,27 +1308,24 @@ void RewriteInstance::discoverFileObjects() {
BC->setHasSymbolsWithFileName(FileSymbols.size());
// Now that all the functions were created - adjust their boundaries.
- adjustFunctionBoundaries();
+ adjustFunctionBoundaries(MarkerSymbols);
// Annotate functions with code/data markers in AArch64
- for (auto ISym = SortedMarkerSymbols.begin();
- ISym != SortedMarkerSymbols.end(); ++ISym) {
-
- auto *BF =
- BC->getBinaryFunctionContainingAddress(ISym->Address, true, true);
+ for (auto &[Address, Type] : MarkerSymbols) {
+ auto *BF = BC->getBinaryFunctionContainingAddress(Address, true, true);
if (!BF) {
// Stray marker
continue;
}
- const auto EntryOffset = ISym->Address - BF->getAddress();
- if (ISym->Type == MarkerSymType::CODE) {
+ const auto EntryOffset = Address - BF->getAddress();
+ if (Type == MarkerSymType::CODE) {
BF->markCodeAtOffset(EntryOffset);
continue;
}
- if (ISym->Type == MarkerSymType::DATA) {
+ if (Type == MarkerSymType::DATA) {
BF->markDataAtOffset(EntryOffset);
- BC->AddressToConstantIslandMap[ISym->Address] = BF;
+ BC->AddressToConstantIslandMap[Address] = BF;
continue;
}
llvm_unreachable("Unknown marker");
@@ -1832,7 +1854,8 @@ void RewriteInstance::disassemblePLT() {
}
}
-void RewriteInstance::adjustFunctionBoundaries() {
+void RewriteInstance::adjustFunctionBoundaries(
+ DenseMap<uint64_t, MarkerSymType> &MarkerSyms) {
for (auto BFI = BC->getBinaryFunctions().begin(),
BFE = BC->getBinaryFunctions().end();
BFI != BFE; ++BFI) {
@@ -1870,12 +1893,15 @@ void RewriteInstance::adjustFunctionBoundaries() {
continue;
}
- // This is potentially another entry point into the function.
- uint64_t EntryOffset = NextSymRefI->first - Function.getAddress();
- LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding entry point to function "
- << Function << " at offset 0x"
- << Twine::utohexstr(EntryOffset) << '\n');
- Function.addEntryPointAtOffset(EntryOffset);
+ auto It = MarkerSyms.find(NextSymRefI->first);
+ if (It == MarkerSyms.end() || It->second != MarkerSymType::DATA) {
+ // This is potentially another entry point into the function.
+ uint64_t EntryOffset = NextSymRefI->first - Function.getAddress();
+ LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding entry point to function "
+ << Function << " at offset 0x"
+ << Twine::utohexstr(EntryOffset) << '\n');
+ Function.addEntryPointAtOffset(EntryOffset);
+ }
++NextSymRefI;
}
@@ -2177,7 +2203,9 @@ void RewriteInstance::adjustCommandLineOptions() {
if (!opts::KeepNops.getNumOccurrences())
opts::KeepNops = true;
- // Linux kernel may resume execution after a trap instruction in some cases.
+ // Linux kernel may resume execution after a trap or x86 HLT instruction.
+ if (!opts::TerminalHLT.getNumOccurrences())
+ opts::TerminalHLT = false;
if (!opts::TerminalTrap.getNumOccurrences())
opts::TerminalTrap = false;
}
@@ -2907,7 +2935,8 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
ReferencedSymbol = nullptr;
ExtractedValue = Address;
} else if (RefFunctionOffset) {
- if (ContainingBF && ContainingBF != ReferencedBF) {
+ if (ContainingBF && ContainingBF != ReferencedBF &&
+ !ReferencedBF->isInConstantIsland(Address)) {
ReferencedSymbol =
ReferencedBF->addEntryPointAtOffset(RefFunctionOffset);
} else {
@@ -3570,7 +3599,7 @@ void RewriteInstance::postProcessFunctions() {
if (opts::PrintAll || opts::PrintCFG)
Function.print(BC->outs(), "after building cfg");
- if (opts::DumpDotAll)
+ if (opts::shouldDumpDot(Function))
Function.dumpGraphForPass("00_build-cfg");
if (opts::PrintLoopInfo) {
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 9732617..72f95ce 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -382,10 +382,9 @@ public:
// the list of successors of this basic block as appropriate.
// Any of the above code sequences assume the fall-through basic block
- // is a dead-end BRK instruction (any immediate operand is accepted).
+ // is a dead-end trap instruction.
const BinaryBasicBlock *BreakBB = BB.getFallthrough();
- if (!BreakBB || BreakBB->empty() ||
- BreakBB->front().getOpcode() != AArch64::BRK)
+ if (!BreakBB || BreakBB->empty() || !isTrap(BreakBB->front()))
return std::nullopt;
// Iterate over the instructions of BB in reverse order, matching opcodes
@@ -1744,6 +1743,34 @@ public:
Inst.addOperand(MCOperand::createImm(0));
}
+ bool isTrap(const MCInst &Inst) const override {
+ if (Inst.getOpcode() != AArch64::BRK)
+ return false;
+ // Only match the immediate values that are likely to indicate this BRK
+ // instruction is emitted to terminate the program immediately and not to
+ // be handled by a SIGTRAP handler, for example.
+ switch (Inst.getOperand(0).getImm()) {
+ case 0xc470:
+ case 0xc471:
+ case 0xc472:
+ case 0xc473:
+ // Explicit Pointer Authentication check failed, see
+ // AArch64AsmPrinter::emitPtrauthCheckAuthenticatedValue().
+ return true;
+ case 0x1:
+ // __builtin_trap(), as emitted by Clang.
+ return true;
+ case 0x3e8: // decimal 1000
+ // __builtin_trap(), as emitted by GCC.
+ return true;
+ default:
+ // Some constants may indicate intentionally recoverable break-points.
+ // This is the case at least for 0xf000, which is used by
+ // __builtin_debugtrap() supported by Clang.
+ return false;
+ }
+ }
+
bool isStorePair(const MCInst &Inst) const {
const unsigned opcode = Inst.getOpcode();
diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
index a60c1a6..1842509 100644
--- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
+++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
@@ -223,6 +223,10 @@ public:
return Inst.getOpcode() == X86::ENDBR32 || Inst.getOpcode() == X86::ENDBR64;
}
+ bool isX86HLT(const MCInst &Inst) const override {
+ return Inst.getOpcode() == X86::HLT;
+ }
+
int getPopSize(const MCInst &Inst) const override {
switch (Inst.getOpcode()) {
case X86::POP16r:
diff --git a/bolt/test/AArch64/data-marker-invalidates-extra-entrypoint.s b/bolt/test/AArch64/data-marker-invalidates-extra-entrypoint.s
new file mode 100644
index 0000000..3bcbcbb
--- /dev/null
+++ b/bolt/test/AArch64/data-marker-invalidates-extra-entrypoint.s
@@ -0,0 +1,38 @@
+# This test is to ensure that we query data marker symbols to avoid
+# misidentifying constant data island symbol as extra entry point.
+
+# RUN: %clang %cflags %s -o %t.so -Wl,-q -Wl,--init=_bar -Wl,--fini=_bar
+# RUN: llvm-bolt %t.so -o %t.instr.so
+
+ .text
+ .global _start
+ .type _start, %function
+_start:
+ ret
+
+ .text
+ .global _foo
+ .type _foo, %function
+_foo:
+ cbz x1, _foo_2
+_foo_1:
+ add x1, x2, x0
+ b _foo
+_foo_2:
+ ret
+
+# None of these constant island symbols should be identified as extra entry
+# point for function `_foo'.
+ .align 4
+_const1: .short 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80
+_const2: .short 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0
+_const3: .short 0x04, 0x08, 0x0c, 0x20, 0x60, 0x80, 0xa0, 0xc0
+
+ .text
+ .global _bar
+ .type _bar, %function
+_bar:
+ ret
+
+ # Dummy relocation to force relocation mode
+ .reloc 0, R_AARCH64_NONE
diff --git a/bolt/test/AArch64/unsupported-passes.test b/bolt/test/AArch64/unsupported-passes.test
new file mode 100644
index 0000000..886fc1c
--- /dev/null
+++ b/bolt/test/AArch64/unsupported-passes.test
@@ -0,0 +1,8 @@
+// Checks that non-fully supported passes on AArch64 are handled appropriately.
+
+// REQUIRES: system-linux,asserts,target=aarch64{{.*}}
+
+RUN: %clang %cflags %p/../Inputs/hello.c -o %t -Wl,-q
+RUN: not llvm-bolt %t -o %t.bolt --frame-opt=all 2>&1 | FileCheck %s
+
+CHECK: BOLT-ERROR: frame-optimizer is supported only on X86
diff --git a/bolt/test/AArch64/validate-secondary-entry-point.s b/bolt/test/AArch64/validate-secondary-entry-point.s
index 0099a0e..3ad6946 100644
--- a/bolt/test/AArch64/validate-secondary-entry-point.s
+++ b/bolt/test/AArch64/validate-secondary-entry-point.s
@@ -1,13 +1,23 @@
# This test is to verify that BOLT won't take a label pointing to constant
-# island as a secondary entry point (function `_start` doesn't have ELF size
-# set originally) and the function won't otherwise be mistaken as non-simple.
+# island as a secondary entry point. This could happen when function doesn't
+# have ELF size set if it is from assembly code, or a constant island is
+# referenced by another function discovered during relocation processing.
-# RUN: %clang %cflags -pie %s -o %t.so -Wl,-q -Wl,--init=_foo -Wl,--fini=_foo
+# RUN: split-file %s %t
+
+# RUN: %clang %cflags -pie %t/tt.asm -o %t.so \
+# RUN: -Wl,-q -Wl,--init=_foo -Wl,--fini=_foo
# RUN: llvm-bolt %t.so -o %t.bolt.so --print-cfg 2>&1 | FileCheck %s
# CHECK-NOT: BOLT-WARNING: reference in the middle of instruction detected \
# CHECK-NOT: function _start at offset 0x{{[0-9a-f]+}}
# CHECK: Binary Function "_start" after building cfg
+# RUN: %clang %cflags -ffunction-sections -shared %t/tt.c %t/ss.c -o %tt.so \
+# RUN: -Wl,-q -Wl,--init=_start -Wl,--fini=_start \
+# RUN: -Wl,--version-script=%t/linker_script
+# RUN: llvm-bolt %tt.so -o %tt.bolted.so
+
+;--- tt.asm
.text
.global _foo
@@ -32,3 +42,31 @@ _bar:
# Dummy relocation to force relocation mode
.reloc 0, R_AARCH64_NONE
+
+;--- tt.c
+void _start() {}
+
+__attribute__((naked)) void foo() {
+ asm("ldr x16, .L_fnptr\n"
+ "blr x16\n"
+ "ret\n"
+
+ "_rodatx:"
+ ".global _rodatx;"
+ ".quad 0;"
+ ".L_fnptr:"
+ ".quad 0;");
+}
+
+;--- ss.c
+__attribute__((visibility("hidden"))) extern void* _rodatx;
+void* bar() { return &_rodatx; }
+
+;--- linker_script
+{
+global:
+ _start;
+ foo;
+ bar;
+local: *;
+};
diff --git a/bolt/test/AArch64/veneer-lld-abs.s b/bolt/test/AArch64/veneer-lld-abs.s
index b22301d..77d6f0ce2 100644
--- a/bolt/test/AArch64/veneer-lld-abs.s
+++ b/bolt/test/AArch64/veneer-lld-abs.s
@@ -12,7 +12,7 @@
## Occasionally, we see the linker not generating $d symbols for long veneers
## causing BOLT to fail veneer elimination.
-# RUN: llvm-objcopy --remove-symbol-prefix=\$d %t.exe %t.no-marker.exe
+# RUN: llvm-objcopy --remove-symbol-prefix='$d' %t.exe %t.no-marker.exe
# RUN: llvm-bolt %t.no-marker.exe -o %t.no-marker.bolt \
# RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-BOLT
# RUN: llvm-objdump -d -j .text %t.no-marker.bolt | \
diff --git a/bolt/test/Inputs/multi-func.cpp b/bolt/test/Inputs/multi-func.cpp
new file mode 100644
index 0000000..61c968f
--- /dev/null
+++ b/bolt/test/Inputs/multi-func.cpp
@@ -0,0 +1,24 @@
+#include <iostream>
+
+// Multiple functions to test selective dumping
+int add(int a, int b) { return a + b; }
+
+int multiply(int a, int b) { return a * b; }
+
+int main_helper() {
+ std::cout << "Helper function" << std::endl;
+ return 42;
+}
+
+int main_secondary() { return add(5, 3); }
+
+void other_function() { std::cout << "Other function" << std::endl; }
+
+int main() {
+ int result = add(10, 20);
+ result = multiply(result, 2);
+ main_helper();
+ main_secondary();
+ other_function();
+ return result;
+}
diff --git a/bolt/test/X86/double-jump.test b/bolt/test/X86/double-jump.test
index 424747c..94b1578 100644
--- a/bolt/test/X86/double-jump.test
+++ b/bolt/test/X86/double-jump.test
@@ -1,15 +1,11 @@
## Test the double jump removal peephole.
-## This test has commands that rely on shell capabilities that won't execute
-## correctly on Windows e.g. subshell execution
-REQUIRES: shell
-
RUN: %clangxx %cxxflags %p/Inputs/double_jump.cpp -o %t.exe
-RUN: (llvm-bolt %t.exe --peepholes=double-jumps \
-RUN: --eliminate-unreachable -o %t 2>&1 \
-RUN: && llvm-objdump -d %t --print-imm-hex --no-show-raw-insn) | FileCheck %s
+RUN: llvm-bolt %t.exe --peepholes=double-jumps \
+RUN: --eliminate-unreachable -o %t | FileCheck --check-prefix CHECK-BOLT %s
+RUN: llvm-objdump -d %t --print-imm-hex --no-show-raw-insn | FileCheck %s
-CHECK: BOLT-INFO: Peephole: 1 double jumps patched.
+CHECK-BOLT: BOLT-INFO: Peephole: 1 double jumps patched.
CHECK: <_Z3foom>:
CHECK-NEXT: pushq %rbp
diff --git a/bolt/test/X86/dwarf5-debug-line-print.s b/bolt/test/X86/dwarf5-debug-line-print.s
new file mode 100644
index 0000000..b0a5bab
--- /dev/null
+++ b/bolt/test/X86/dwarf5-debug-line-print.s
@@ -0,0 +1,148 @@
+# REQUIRES: system-linux
+
+## Check that BOLT correctly prints debug line comments for DWARF-5.
+
+
+# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %s -o %t1.o
+# RUN: %clang %cflags -dwarf-5 %t1.o -o %t.exe -Wl,-q
+# RUN: llvm-bolt %t.exe --update-debug-sections --print-debug-info \
+# RUN: --print-after-lowering -o %t.bolt | FileCheck %s
+
+# CHECK: xorq %rdi, %rdi # debug line main.c:2:5
+
+# __attribute__((naked)) void _start() {
+# __asm__(
+# "xor %rdi, %rdi\n" // exit code 0
+# "mov $60, %rax\n" // syscall number for exit
+# "syscall\n"
+# );
+# }
+
+ .file "main.c"
+ .text
+ .globl _start # -- Begin function _start
+ .p2align 4
+ .type _start,@function
+_start: # @_start
+.Lfunc_begin0:
+ .file 0 "/home/gpastukhov/tmp2" "main.c" md5 0x94c0e54a615c2a21415ddb904991abd8
+ .cfi_startproc
+# %bb.0:
+ .loc 0 2 5 prologue_end # main.c:2:5
+ #APP
+ xorq %rdi, %rdi
+ movq $60, %rax
+ syscall
+
+ #NO_APP
+.Ltmp0:
+.Lfunc_end0:
+ .size _start, .Lfunc_end0-_start
+ .cfi_endproc
+ # -- End function
+ .section .debug_abbrev,"",@progbits
+ .byte 1 # Abbreviation Code
+ .byte 17 # DW_TAG_compile_unit
+ .byte 1 # DW_CHILDREN_yes
+ .byte 37 # DW_AT_producer
+ .byte 37 # DW_FORM_strx1
+ .byte 19 # DW_AT_language
+ .byte 5 # DW_FORM_data2
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 114 # DW_AT_str_offsets_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 16 # DW_AT_stmt_list
+ .byte 23 # DW_FORM_sec_offset
+ .byte 27 # DW_AT_comp_dir
+ .byte 37 # DW_FORM_strx1
+ .byte 17 # DW_AT_low_pc
+ .byte 27 # DW_FORM_addrx
+ .byte 18 # DW_AT_high_pc
+ .byte 6 # DW_FORM_data4
+ .byte 115 # DW_AT_addr_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 2 # Abbreviation Code
+ .byte 46 # DW_TAG_subprogram
+ .byte 0 # DW_CHILDREN_no
+ .byte 17 # DW_AT_low_pc
+ .byte 27 # DW_FORM_addrx
+ .byte 18 # DW_AT_high_pc
+ .byte 6 # DW_FORM_data4
+ .byte 64 # DW_AT_frame_base
+ .byte 24 # DW_FORM_exprloc
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 63 # DW_AT_external
+ .byte 25 # DW_FORM_flag_present
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+ .section .debug_info,"",@progbits
+.Lcu_begin0:
+ .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+ .short 5 # DWARF version number
+ .byte 1 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev # Offset Into Abbrev. Section
+ .byte 1 # Abbrev [1] 0xc:0x23 DW_TAG_compile_unit
+ .byte 0 # DW_AT_producer
+ .short 29 # DW_AT_language
+ .byte 1 # DW_AT_name
+ .long .Lstr_offsets_base0 # DW_AT_str_offsets_base
+ .long .Lline_table_start0 # DW_AT_stmt_list
+ .byte 2 # DW_AT_comp_dir
+ .byte 0 # DW_AT_low_pc
+ .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+ .long .Laddr_table_base0 # DW_AT_addr_base
+ .byte 2 # Abbrev [2] 0x23:0xb DW_TAG_subprogram
+ .byte 0 # DW_AT_low_pc
+ .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+ .byte 1 # DW_AT_frame_base
+ .byte 87
+ .byte 3 # DW_AT_name
+ .byte 0 # DW_AT_decl_file
+ .byte 1 # DW_AT_decl_line
+ # DW_AT_external
+ .byte 0 # End Of Children Mark
+.Ldebug_info_end0:
+ .section .debug_str_offsets,"",@progbits
+ .long 20 # Length of String Offsets Set
+ .short 5
+ .short 0
+.Lstr_offsets_base0:
+ .section .debug_str,"MS",@progbits,1
+.Linfo_string0:
+ .asciz "clang version 20.1.8 (CentOS 20.1.8-1.el9)" # string offset=0
+.Linfo_string1:
+ .asciz "main.c" # string offset=43
+.Linfo_string2:
+ .asciz "/home/gpastukhov/tmp2" # string offset=50
+.Linfo_string3:
+ .asciz "_start" # string offset=72
+ .section .debug_str_offsets,"",@progbits
+ .long .Linfo_string0
+ .long .Linfo_string1
+ .long .Linfo_string2
+ .long .Linfo_string3
+ .section .debug_addr,"",@progbits
+ .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution
+.Ldebug_addr_start0:
+ .short 5 # DWARF version number
+ .byte 8 # Address size
+ .byte 0 # Segment selector size
+.Laddr_table_base0:
+ .quad .Lfunc_begin0
+.Ldebug_addr_end0:
+ .ident "clang version 20.1.8 (CentOS 20.1.8-1.el9)"
+ .section ".note.GNU-stack","",@progbits
+ .addrsig
+ .section .debug_line,"",@progbits
+.Lline_table_start0:
diff --git a/bolt/test/X86/dwarf5-two-cus.s b/bolt/test/X86/dwarf5-two-cus.s
new file mode 100644
index 0000000..8b5afb4
--- /dev/null
+++ b/bolt/test/X86/dwarf5-two-cus.s
@@ -0,0 +1,251 @@
+## Check that BOLT correctly handles two CUs with DWARF-5 debug info (does not crash), when
+## a function from one CU is forced to be inlined into another.
+
+# REQUIRES: system-linux
+
+# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %s -o %t-main.o
+# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5_helper.s -o %thelper.o
+# RUN: %clang %cflags -gdwarf-5 -Wl,-q %t-main.o %thelper.o -o %t.exe
+# RUN: llvm-bolt %t.exe --update-debug-sections --force-inline=_Z3fooi \
+# RUN: -o %t.bolt | FileCheck %s
+
+# CHECK-NOT: BOLT-ERROR
+# CHECK-NOT: BOLT-WARNING
+# CHECK: BOLT-INFO: inlined {{[0-9]+}} calls at {{[1-9][0-9]*}} call sites
+
+# extern int foo(int);
+# int main(){
+# foo(10);
+# return 0;
+# }
+ .file "main.cpp"
+ .text
+ .globl main # -- Begin function main
+ .p2align 4
+ .type main,@function
+main: # @main
+.Lfunc_begin0:
+ .file 0 "/home/gpastukhov/tmp2" "main.cpp" md5 0x5c930f5d3a068b09fd18ece59c58bdcf
+ .loc 0 2 0 # main.cpp:2:0
+ .cfi_startproc
+# %bb.0:
+ pushq %rax
+ .cfi_def_cfa_offset 16
+.Ltmp0:
+ .loc 0 3 5 prologue_end # main.cpp:3:5
+ movl $10, %edi
+ callq _Z3fooi
+.Ltmp1:
+ .loc 0 4 5 # main.cpp:4:5
+ xorl %eax, %eax
+ .loc 0 4 5 epilogue_begin is_stmt 0 # main.cpp:4:5
+ popq %rcx
+ .cfi_def_cfa_offset 8
+ retq
+.Ltmp2:
+.Lfunc_end0:
+ .size main, .Lfunc_end0-main
+ .cfi_endproc
+ # -- End function
+ .section .debug_abbrev,"",@progbits
+ .byte 1 # Abbreviation Code
+ .byte 17 # DW_TAG_compile_unit
+ .byte 1 # DW_CHILDREN_yes
+ .byte 37 # DW_AT_producer
+ .byte 37 # DW_FORM_strx1
+ .byte 19 # DW_AT_language
+ .byte 5 # DW_FORM_data2
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 114 # DW_AT_str_offsets_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 16 # DW_AT_stmt_list
+ .byte 23 # DW_FORM_sec_offset
+ .byte 27 # DW_AT_comp_dir
+ .byte 37 # DW_FORM_strx1
+ .byte 17 # DW_AT_low_pc
+ .byte 27 # DW_FORM_addrx
+ .byte 18 # DW_AT_high_pc
+ .byte 6 # DW_FORM_data4
+ .byte 115 # DW_AT_addr_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 2 # Abbreviation Code
+ .byte 46 # DW_TAG_subprogram
+ .byte 1 # DW_CHILDREN_yes
+ .byte 17 # DW_AT_low_pc
+ .byte 27 # DW_FORM_addrx
+ .byte 18 # DW_AT_high_pc
+ .byte 6 # DW_FORM_data4
+ .byte 64 # DW_AT_frame_base
+ .byte 24 # DW_FORM_exprloc
+ .byte 122 # DW_AT_call_all_calls
+ .byte 25 # DW_FORM_flag_present
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 63 # DW_AT_external
+ .byte 25 # DW_FORM_flag_present
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 3 # Abbreviation Code
+ .byte 72 # DW_TAG_call_site
+ .byte 1 # DW_CHILDREN_yes
+ .byte 127 # DW_AT_call_origin
+ .byte 19 # DW_FORM_ref4
+ .byte 125 # DW_AT_call_return_pc
+ .byte 27 # DW_FORM_addrx
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 4 # Abbreviation Code
+ .byte 73 # DW_TAG_call_site_parameter
+ .byte 0 # DW_CHILDREN_no
+ .byte 2 # DW_AT_location
+ .byte 24 # DW_FORM_exprloc
+ .byte 126 # DW_AT_call_value
+ .byte 24 # DW_FORM_exprloc
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 5 # Abbreviation Code
+ .byte 46 # DW_TAG_subprogram
+ .byte 1 # DW_CHILDREN_yes
+ .byte 110 # DW_AT_linkage_name
+ .byte 37 # DW_FORM_strx1
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 60 # DW_AT_declaration
+ .byte 25 # DW_FORM_flag_present
+ .byte 63 # DW_AT_external
+ .byte 25 # DW_FORM_flag_present
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 6 # Abbreviation Code
+ .byte 5 # DW_TAG_formal_parameter
+ .byte 0 # DW_CHILDREN_no
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 7 # Abbreviation Code
+ .byte 36 # DW_TAG_base_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 62 # DW_AT_encoding
+ .byte 11 # DW_FORM_data1
+ .byte 11 # DW_AT_byte_size
+ .byte 11 # DW_FORM_data1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+ .section .debug_info,"",@progbits
+.Lcu_begin0:
+ .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+ .short 5 # DWARF version number
+ .byte 1 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev # Offset Into Abbrev. Section
+ .byte 1 # Abbrev [1] 0xc:0x47 DW_TAG_compile_unit
+ .byte 0 # DW_AT_producer
+ .short 33 # DW_AT_language
+ .byte 1 # DW_AT_name
+ .long .Lstr_offsets_base0 # DW_AT_str_offsets_base
+ .long .Lline_table_start0 # DW_AT_stmt_list
+ .byte 2 # DW_AT_comp_dir
+ .byte 0 # DW_AT_low_pc
+ .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+ .long .Laddr_table_base0 # DW_AT_addr_base
+ .byte 2 # Abbrev [2] 0x23:0x1c DW_TAG_subprogram
+ .byte 0 # DW_AT_low_pc
+ .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+ .byte 1 # DW_AT_frame_base
+ .byte 87
+ # DW_AT_call_all_calls
+ .byte 6 # DW_AT_name
+ .byte 0 # DW_AT_decl_file
+ .byte 2 # DW_AT_decl_line
+ .long 78 # DW_AT_type
+ # DW_AT_external
+ .byte 3 # Abbrev [3] 0x32:0xc DW_TAG_call_site
+ .long 63 # DW_AT_call_origin
+ .byte 1 # DW_AT_call_return_pc
+ .byte 4 # Abbrev [4] 0x38:0x5 DW_TAG_call_site_parameter
+ .byte 1 # DW_AT_location
+ .byte 85
+ .byte 1 # DW_AT_call_value
+ .byte 58
+ .byte 0 # End Of Children Mark
+ .byte 0 # End Of Children Mark
+ .byte 5 # Abbrev [5] 0x3f:0xf DW_TAG_subprogram
+ .byte 3 # DW_AT_linkage_name
+ .byte 4 # DW_AT_name
+ .byte 0 # DW_AT_decl_file
+ .byte 1 # DW_AT_decl_line
+ .long 78 # DW_AT_type
+ # DW_AT_declaration
+ # DW_AT_external
+ .byte 6 # Abbrev [6] 0x48:0x5 DW_TAG_formal_parameter
+ .long 78 # DW_AT_type
+ .byte 0 # End Of Children Mark
+ .byte 7 # Abbrev [7] 0x4e:0x4 DW_TAG_base_type
+ .byte 5 # DW_AT_name
+ .byte 5 # DW_AT_encoding
+ .byte 4 # DW_AT_byte_size
+ .byte 0 # End Of Children Mark
+.Ldebug_info_end0:
+ .section .debug_str_offsets,"",@progbits
+ .long 32 # Length of String Offsets Set
+ .short 5
+ .short 0
+.Lstr_offsets_base0:
+ .section .debug_str,"MS",@progbits,1
+.Linfo_string0:
+ .asciz "clang version 20.1.8 (CentOS 20.1.8-1.el9)" # string offset=0
+.Linfo_string1:
+ .asciz "main.cpp" # string offset=43
+.Linfo_string2:
+ .asciz "/home/gpastukhov/tmp2" # string offset=52
+.Linfo_string3:
+ .asciz "_Z3fooi" # string offset=74
+.Linfo_string4:
+ .asciz "foo" # string offset=82
+.Linfo_string5:
+ .asciz "int" # string offset=86
+.Linfo_string6:
+ .asciz "main" # string offset=90
+ .section .debug_str_offsets,"",@progbits
+ .long .Linfo_string0
+ .long .Linfo_string1
+ .long .Linfo_string2
+ .long .Linfo_string3
+ .long .Linfo_string4
+ .long .Linfo_string5
+ .long .Linfo_string6
+ .section .debug_addr,"",@progbits
+ .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution
+.Ldebug_addr_start0:
+ .short 5 # DWARF version number
+ .byte 8 # Address size
+ .byte 0 # Segment selector size
+.Laddr_table_base0:
+ .quad .Lfunc_begin0
+ .quad .Ltmp1
+.Ldebug_addr_end0:
+ .ident "clang version 20.1.8 (CentOS 20.1.8-1.el9)"
+ .section ".note.GNU-stack","",@progbits
+ .addrsig
+ .section .debug_line,"",@progbits
+.Lline_table_start0:
diff --git a/bolt/test/X86/hlt-terminator.s b/bolt/test/X86/hlt-terminator.s
new file mode 100644
index 0000000..3f67182
--- /dev/null
+++ b/bolt/test/X86/hlt-terminator.s
@@ -0,0 +1,24 @@
+## Check that HLT instruction is handled differently depending on the flags.
+## It's a terminator in the user-level code, but the execution can resume in
+## ring 0.
+
+# RUN: %clang %cflags %s -static -o %t.exe -nostdlib
+# RUN: llvm-bolt %t.exe --print-cfg --print-only=main --terminal-x86-hlt=0 \
+# RUN: -o %t.ring0 2>&1 | FileCheck %s --check-prefix=CHECK-RING0
+# RUN: llvm-bolt %t.exe --print-cfg --print-only=main \
+# RUN: -o %t.ring3 2>&1 | FileCheck %s --check-prefix=CHECK-RING3
+# RUN: llvm-objdump -d %t.ring0 --print-imm-hex | FileCheck %s --check-prefix=CHECK-BIN
+
+# CHECK-RING0: BB Count : 1
+# CHECK-RING3: BB Count : 2
+
+# CHECK-BIN: <main>:
+# CHECK-BIN-NEXT: f4 hlt
+# CHECK-BIN-NEXT: c3 retq
+
+.global main
+ .type main, %function
+main:
+ hlt
+ retq
+.size main, .-main
diff --git a/bolt/test/X86/jmp-optimization.test b/bolt/test/X86/jmp-optimization.test
index f969578..847c4822 100644
--- a/bolt/test/X86/jmp-optimization.test
+++ b/bolt/test/X86/jmp-optimization.test
@@ -1,10 +1,7 @@
## Tests the optimization of functions that just do a tail call in the beginning.
-## This test has commands that rely on shell capabilities that won't execute
-## correctly on Windows e.g. unsupported parameter expansion
-REQUIRES: shell
-
-RUN: %clangxx %cxxflags -O2 %S/Inputs/jmp_opt{,2,3}.cpp -o %t
+RUN: %clangxx %cxxflags -O2 %S/Inputs/jmp_opt.cpp %S/Inputs/jmp_opt2.cpp \
+RUN: %S/Inputs/jmp_opt3.cpp -o %t
RUN: llvm-bolt -inline-small-functions %t -o %t.bolt
RUN: llvm-objdump -d %t.bolt --print-imm-hex | FileCheck %s
diff --git a/bolt/test/X86/jump-table-ambiguous-unreachable.s b/bolt/test/X86/jump-table-ambiguous-unreachable.s
new file mode 100644
index 0000000..eb87b96
--- /dev/null
+++ b/bolt/test/X86/jump-table-ambiguous-unreachable.s
@@ -0,0 +1,87 @@
+## Check that llvm-bolt correctly updates ambiguous jump table entries that
+## can correspond to either builtin_unreachable() or could be a pointer to
+## the next function.
+
+# REQUIRES: system-linux
+
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
+# RUN: %clang %cflags %t.o -o %t.exe -no-pie -Wl,-q
+
+# RUN: llvm-bolt %t.exe --print-normalized --print-only=foo -o %t.out \
+# RUN: 2>&1 | FileCheck %s
+
+
+
+ .text
+ .globl _start
+ .type _start, %function
+_start:
+ .cfi_startproc
+ call foo
+ ret
+ .cfi_endproc
+ .size _start, .-_start
+
+ .globl foo
+ .type foo, %function
+foo:
+ .cfi_startproc
+.LBB00:
+ movq 0x8(%rdi), %rdi
+ movzbl 0x1(%rdi), %eax
+.LBB00_br:
+ jmpq *"JUMP_TABLE/foo.0"(,%rax,8)
+# CHECK: jmpq {{.*}} # JUMPTABLE
+# CHECK-NEXT: Successors: {{.*}}, {{.*}}, {{.*}}, {{.*}}, {{.*}}
+
+.Ltmp87085:
+ xorl %eax, %eax
+ retq
+
+.Ltmp87086:
+ cmpb $0x0, 0x8(%rdi)
+ setne %al
+ retq
+
+.Ltmp87088:
+ movb $0x1, %al
+ retq
+
+.Ltmp87087:
+ movzbl 0x14(%rdi), %eax
+ andb $0x2, %al
+ shrb %al
+ retq
+
+ .cfi_endproc
+.size foo, .-foo
+
+ .globl bar
+ .type bar, %function
+bar:
+ .cfi_startproc
+ ret
+ .cfi_endproc
+ .size bar, .-bar
+
+# Jump tables
+.section .rodata
+ .global jump_table
+jump_table:
+"JUMP_TABLE/foo.0":
+ .quad bar
+ .quad .Ltmp87085
+ .quad bar
+ .quad .Ltmp87086
+ .quad .Ltmp87087
+ .quad .LBB00
+ .quad .Ltmp87088
+ .quad bar
+ .quad .LBB00
+
+# CHECK: Jump table {{.*}} for function foo
+# CHECK-NEXT: 0x{{.*}} : bar
+# CHECK-NEXT: 0x{{.*}} :
+# CHECK-NEXT: 0x{{.*}} : bar
+# CHECK-NEXT: 0x{{.*}} :
+# CHECK-NEXT: 0x{{.*}} :
diff --git a/bolt/test/X86/jump-table-icp.test b/bolt/test/X86/jump-table-icp.test
index f147432..a095929 100644
--- a/bolt/test/X86/jump-table-icp.test
+++ b/bolt/test/X86/jump-table-icp.test
@@ -4,11 +4,7 @@ RUN: link_fdata %p/Inputs/jump_table_icp.s %t.o %t.fdata --nmtool llvm-nm
RUN: llvm-strip --strip-unneeded %t.o
RUN: %clang %cflags -no-pie %t.o -o %t.exe -Wl,-q
-## This test has commands that rely on shell capabilities that won't execute
-## correctly on Windows e.g. subshell execution
-REQUIRES: shell
-
-RUN: (llvm-bolt %t.exe --data %t.fdata -o %t --relocs \
+RUN: llvm-bolt %t.exe --data %t.fdata -o %t --relocs \
RUN: --reorder-blocks=cache --split-functions --split-all-cold \
RUN: --use-gnu-stack --dyno-stats --indirect-call-promotion=jump-tables \
RUN: --print-icp -v=0 \
@@ -16,8 +12,8 @@ RUN: --enable-bat --print-cache-metrics \
RUN: --icp-jt-remaining-percent-threshold=10 \
RUN: --icp-jt-total-percent-threshold=2 \
RUN: --indirect-call-promotion-topn=1 \
-RUN: --icp-jump-tables-targets --align-functions-max-bytes=7 2>&1 && \
-RUN: llvm-objdump -d %t --print-imm-hex) | FileCheck %s
+RUN: --icp-jump-tables-targets --align-functions-max-bytes=7 | FileCheck %s
+RUN: llvm-objdump -d %t --print-imm-hex | FileCheck --check-prefix CHECK-ASM %s
BOLT-INFO: ICP total indirect callsites = 0
BOLT-INFO: ICP total jump table callsites = 2
@@ -107,14 +103,14 @@ CHECK-NEXT: Exec Count : 140
CHECK: Predecessors: .Ltmp{{.*}}, .LFT{{.*}}
CHECK: Successors: .Ltmp{{.*}} (mispreds: 0, count: 98)
-CHECK: <_Z3inci>:
-CHECK: movq 0x{{.*}}(,%rax,8), %rax
-CHECK-NEXT: cmpq $0x{{.*}}, %rax
-CHECK-NEXT: je {{.*}} <_Z3inci+0x{{.*}}>
-CHECK-NEXT: jmpq *%rax
-
-CHECK: <_Z7inc_dupi>:
-CHECK: movq 0x{{.*}}(,%rax,8), %rax
-CHECK-NEXT: cmpq $0x{{.*}}, %rax
-CHECK-NEXT: je {{.*}} <_Z7inc_dupi+0x{{.*}}>
-CHECK-NEXT: jmpq *%rax
+CHECK-ASM: <_Z3inci>:
+CHECK-ASM: movq 0x{{.*}}(,%rax,8), %rax
+CHECK-ASM-NEXT: cmpq $0x{{.*}}, %rax
+CHECK-ASM-NEXT: je {{.*}} <_Z3inci+0x{{.*}}>
+CHECK-ASM-NEXT: jmpq *%rax
+
+CHECK-ASM: <_Z7inc_dupi>:
+CHECK-ASM: movq 0x{{.*}}(,%rax,8), %rax
+CHECK-ASM-NEXT: cmpq $0x{{.*}}, %rax
+CHECK-ASM-NEXT: je {{.*}} <_Z7inc_dupi+0x{{.*}}>
+CHECK-ASM-NEXT: jmpq *%rax
diff --git a/bolt/test/X86/shrinkwrapping.test b/bolt/test/X86/shrinkwrapping.test
index 521b456..5470b5d 100644
--- a/bolt/test/X86/shrinkwrapping.test
+++ b/bolt/test/X86/shrinkwrapping.test
@@ -2,23 +2,21 @@
## shrink-wrapping when optimizing a function without
## frame pointers.
-## This test has commands that rely on shell capabilities that won't execute
-## correctly on Windows e.g. subshell execution to capture command output.
-REQUIRES: shell
-
RUN: %clangxx %cxxflags -no-pie %S/Inputs/exc4sw.S -o %t.exe -Wl,-q
RUN: llvm-bolt %t.exe -o %t --relocs --frame-opt=all \
RUN: --print-only=main --print-cfg \
RUN: --data=%p/Inputs/exc4sw.fdata --reorder-blocks=cache 2>&1 | \
RUN: FileCheck %s --check-prefix=CHECK-BOLT
-RUN: llvm-objdump --dwarf=frames %t.exe | grep -A20 -e \
-RUN: `llvm-nm --numeric-sort %t.exe | grep main | tail -n 1 | \
-RUN: cut -f1 -d' ' | tail -c9` 2>&1 | FileCheck %s --check-prefix=CHECK-INPUT
+RUN: llvm-nm --numeric-sort %t.exe | grep main | tail -n 1 | \
+RUN: cut -f1 -d' ' | tail -c9 > %t.input_address
+RUN: llvm-objdump --dwarf=frames %t.exe | grep -A20 -f %t.input_address \
+RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-INPUT
-RUN: llvm-objdump --dwarf=frames %t | grep -A20 -e \
-RUN: `llvm-nm --numeric-sort %t | grep main | tail -n 1 | cut -f1 -d' ' | \
-RUN: tail -c9` 2>&1 | FileCheck %s --check-prefix=CHECK-OUTPUT
+RUN: llvm-nm --numeric-sort %t | grep main | tail -n 1 | \
+RUN: cut -f1 -d' ' | tail -c9 > %t.output_address
+RUN: llvm-objdump --dwarf=frames %t | grep -A20 -f %t.output_address \
+RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-OUTPUT
CHECK-BOLT: Extern Entry Count: 100
CHECK-BOLT: Shrink wrapping moved 2 spills inserting load/stores and 0 spills inserting push/pops
diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-address-checks.s b/bolt/test/binary-analysis/AArch64/gs-pauth-address-checks.s
index 3f982dd..74f2761 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pauth-address-checks.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pauth-address-checks.s
@@ -31,7 +31,7 @@ resign_xpaci_good:
xpaci x16
cmp x0, x16
b.eq 1f
- brk 0x1234
+ brk 0xc471
1:
pacia x0, x2
ret
@@ -46,7 +46,7 @@ resign_xpacd_good:
xpacd x16
cmp x0, x16
b.eq 1f
- brk 0x1234
+ brk 0xc473
1:
pacda x0, x2
ret
@@ -117,7 +117,7 @@ resign_xpaci_unrelated_auth_and_check:
xpaci x16
cmp x0, x16
b.eq 1f
- brk 0x1234
+ brk 0xc471
1:
pacia x10, x2
ret
@@ -139,7 +139,7 @@ resign_xpaci_wrong_pattern_1:
xpaci x16
cmp x0, x16
b.eq 1f
- brk 0x1234
+ brk 0xc471
1:
pacia x0, x2
ret
@@ -157,7 +157,7 @@ resign_xpaci_wrong_pattern_2:
xpaci x0 // x0 instead of x16
cmp x0, x16
b.eq 1f
- brk 0x1234
+ brk 0xc471
1:
pacia x0, x2
ret
@@ -174,7 +174,7 @@ resign_xpaci_wrong_pattern_3:
xpaci x16
cmp x16, x16 // x16 instead of x0
b.eq 1f
- brk 0x1234
+ brk 0xc471
1:
pacia x0, x2
ret
@@ -191,7 +191,7 @@ resign_xpaci_wrong_pattern_4:
xpaci x16
cmp x0, x0 // x0 instead of x16
b.eq 1f
- brk 0x1234
+ brk 0xc471
1:
pacia x0, x2
ret
@@ -208,7 +208,7 @@ resign_xpaci_wrong_pattern_5:
mov x16, x16 // replace xpaci with a no-op instruction
cmp x0, x16
b.eq 1f
- brk 0x1234
+ brk 0xc471
1:
pacia x0, x2
ret
@@ -228,7 +228,7 @@ resign_xpaclri_good:
xpaclri
cmp x30, x16
b.eq 1f
- brk 0x1234
+ brk 0xc471
1:
pacia x30, x2
@@ -246,7 +246,7 @@ xpaclri_check_keeps_lr_safe:
xpaclri // clobbers LR
cmp x30, x16
b.eq 1f
- brk 0x1234 // marks LR as trusted and safe-to-dereference
+ brk 0xc471 // marks LR as trusted and safe-to-dereference
1:
ret // not reporting non-protected return
.size xpaclri_check_keeps_lr_safe, .-xpaclri_check_keeps_lr_safe
@@ -265,7 +265,7 @@ xpaclri_check_requires_safe_lr:
xpaclri
cmp x30, x16
b.eq 1f
- brk 0x1234
+ brk 0xc471
1:
ret
.size xpaclri_check_requires_safe_lr, .-xpaclri_check_requires_safe_lr
@@ -283,7 +283,7 @@ resign_xpaclri_wrong_reg:
xpaclri // ... but xpaclri still operates on x30
cmp x20, x16
b.eq 1f
- brk 0x1234
+ brk 0xc471
1:
pacia x20, x2
@@ -303,7 +303,7 @@ resign_checked_not_authenticated:
xpaci x16
cmp x0, x16
b.eq 1f
- brk 0x1234
+ brk 0xc471
1:
pacia x0, x2
ret
@@ -323,7 +323,7 @@ resign_checked_before_authenticated:
xpaci x16
cmp x0, x16
b.eq 1f
- brk 0x1234
+ brk 0xc471
1:
autib x0, x1
pacia x0, x2
@@ -339,7 +339,7 @@ resign_high_bits_tbz_good:
autib x0, x1
eor x16, x0, x0, lsl #1
tbz x16, #62, 1f
- brk 0x1234
+ brk 0xc471
1:
pacia x0, x2
ret
@@ -378,7 +378,7 @@ resign_high_bits_tbz_wrong_bit:
autib x0, x1
eor x16, x0, x0, lsl #1
tbz x16, #63, 1f
- brk 0x1234
+ brk 0xc471
1:
pacia x0, x2
ret
@@ -393,7 +393,7 @@ resign_high_bits_tbz_wrong_shift_amount:
autib x0, x1
eor x16, x0, x0, lsl #2
tbz x16, #62, 1f
- brk 0x1234
+ brk 0xc471
1:
pacia x0, x2
ret
@@ -408,7 +408,7 @@ resign_high_bits_tbz_wrong_shift_type:
autib x0, x1
eor x16, x0, x0, lsr #1
tbz x16, #62, 1f
- brk 0x1234
+ brk 0xc471
1:
pacia x0, x2
ret
@@ -423,7 +423,7 @@ resign_high_bits_tbz_wrong_pattern_1:
autib x0, x1
eor x16, x0, x0, lsl #1
tbz x17, #62, 1f
- brk 0x1234
+ brk 0xc471
1:
pacia x0, x2
ret
@@ -438,7 +438,7 @@ resign_high_bits_tbz_wrong_pattern_2:
autib x0, x1
eor x16, x10, x0, lsl #1
tbz x16, #62, 1f
- brk 0x1234
+ brk 0xc471
1:
pacia x0, x2
ret
@@ -453,7 +453,7 @@ resign_high_bits_tbz_wrong_pattern_3:
autib x0, x1
eor x16, x0, x10, lsl #1
tbz x16, #62, 1f
- brk 0x1234
+ brk 0xc471
1:
pacia x0, x2
ret
@@ -648,7 +648,7 @@ many_checked_regs:
xpacd x16 // ...
cmp x2, x16 // ...
b.eq 2f // end of basic block
- brk 0x1234
+ brk 0xc473
2:
pacdza x0
pacdza x1
diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-authentication-oracles.s b/bolt/test/binary-analysis/AArch64/gs-pauth-authentication-oracles.s
index c314bc7..f44ba21 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pauth-authentication-oracles.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pauth-authentication-oracles.s
@@ -79,7 +79,7 @@ good_explicit_check:
autia x0, x1
eor x16, x0, x0, lsl #1
tbz x16, #62, 1f
- brk 0x1234
+ brk 0xc470
1:
ret
.size good_explicit_check, .-good_explicit_check
@@ -373,7 +373,7 @@ good_explicit_check_multi_bb:
1:
eor x16, x0, x0, lsl #1
tbz x16, #62, 2f
- brk 0x1234
+ brk 0xc470
2:
cbz x1, 3f
nop
@@ -685,8 +685,7 @@ good_address_arith_nocfg:
.globl good_explicit_check_unrelated_reg
.type good_explicit_check_unrelated_reg,@function
good_explicit_check_unrelated_reg:
-// CHECK-LABEL: GS-PAUTH: authentication oracle found in function good_explicit_check_unrelated_reg, basic block {{[^,]+}}, at address
- // FIXME: The below instruction is not an authentication oracle
+// CHECK-NOT: good_explicit_check_unrelated_reg
autia x2, x3 // One of possible execution paths after this instruction
// ends at BRK below, thus BRK used as a trap instruction
// should formally "check everything" not to introduce
@@ -694,7 +693,7 @@ good_explicit_check_unrelated_reg:
autia x0, x1
eor x16, x0, x0, lsl #1
tbz x16, #62, 1f
- brk 0x1234
+ brk 0xc470
1:
ldr x4, [x2] // Right before this instruction X2 is checked - this
// should be propagated to the basic block ending with
diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-signing-oracles.s b/bolt/test/binary-analysis/AArch64/gs-pauth-signing-oracles.s
index 3a4d383..4d4bb7b 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pauth-signing-oracles.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pauth-signing-oracles.s
@@ -57,7 +57,7 @@ good_sign_auted_checked_brk:
autda x0, x2
eor x16, x0, x0, lsl #1
tbz x16, #62, 1f
- brk 0x1234
+ brk 0xc472
1:
pacda x0, x1
ret
@@ -351,7 +351,7 @@ good_sign_auted_checked_brk_multi_bb:
1:
eor x16, x0, x0, lsl #1
tbz x16, #62, 2f
- brk 0x1234
+ brk 0xc472
2:
cbz x4, 3f
nop
@@ -705,7 +705,7 @@ good_resign_with_increment_brk:
add x0, x0, #8
eor x16, x0, x0, lsl #1
tbz x16, #62, 1f
- brk 0x1234
+ brk 0xc472
1:
mov x2, x0
pacda x2, x1
diff --git a/bolt/test/binary-analysis/AArch64/trap-instructions.s b/bolt/test/binary-analysis/AArch64/trap-instructions.s
new file mode 100644
index 0000000..7810b2d
--- /dev/null
+++ b/bolt/test/binary-analysis/AArch64/trap-instructions.s
@@ -0,0 +1,213 @@
+// RUN: %clang %cflags -march=armv8.3-a %s -o %t.exe -Wl,--emit-relocs
+// RUN: llvm-bolt-binary-analysis --scanners=pauth %t.exe 2>&1 | FileCheck %s
+
+// Test what instructions can be used to terminate the program abnormally
+// on security violation.
+//
+// All test cases have the same structure:
+//
+// cbz x0, 1f // [a], ensures [c] is never reported as unreachable
+// autia x2, x3
+// cbz x1, 2f // [b]
+// [instruction under test]
+// 1:
+// ret // [c]
+// 2:
+// ldr x0, [x2]
+// ret
+//
+// This is to handle three possible cases: the instruction under test may be
+// considered by BOLT as
+// * trapping (and thus no-return): after being authenticated, x2 is ether
+// checked by LDR (if [b] is taken) or the program is terminated
+// immediately without leaking x2 (if [b] falls through to the trapping
+// instruction under test). Nothing is reported.
+// * non-trapping, but no-return (such as calling abort()): x2 is leaked if [b]
+// falls through. Authentication oracle is reported.
+// * non-trapping and falling-through (i.e. a regular instruction):
+// x2 is leaked by [c]. Authentication oracle is reported.
+
+ .text
+
+ .globl brk_key_ia
+ .type brk_key_ia,@function
+brk_key_ia:
+// CHECK-NOT: brk_key_ia
+ cbz x0, 1f
+ autia x2, x3
+ cbz x1, 2f
+ brk 0xc470
+1:
+ ret
+2:
+ ldr x0, [x2]
+ ret
+ .size brk_key_ia, .-brk_key_ia
+
+ .globl brk_key_ib
+ .type brk_key_ib,@function
+brk_key_ib:
+// CHECK-NOT: brk_key_ib
+ cbz x0, 1f
+ autia x2, x3
+ cbz x1, 2f
+ brk 0xc471
+1:
+ ret
+2:
+ ldr x0, [x2]
+ ret
+ .size brk_key_ib, .-brk_key_ib
+
+ .globl brk_key_da
+ .type brk_key_da,@function
+brk_key_da:
+// CHECK-NOT: brk_key_da
+ cbz x0, 1f
+ autia x2, x3
+ cbz x1, 2f
+ brk 0xc472
+1:
+ ret
+2:
+ ldr x0, [x2]
+ ret
+ .size brk_key_da, .-brk_key_da
+
+ .globl brk_key_db
+ .type brk_key_db,@function
+brk_key_db:
+// CHECK-NOT: brk_key_db
+ cbz x0, 1f
+ autia x2, x3
+ cbz x1, 2f
+ brk 0xc473
+1:
+ ret
+2:
+ ldr x0, [x2]
+ ret
+ .size brk_key_db, .-brk_key_db
+
+// The immediate operand of BRK instruction may indicate whether the instruction
+// is intended to be a non-recoverable trap: for example, for this code
+//
+// int test_trap(void) {
+// __builtin_trap();
+// return 42;
+// }
+// int test_debugtrap(void) {
+// __builtin_debugtrap();
+// return 42;
+// }
+//
+// Clang produces the following assembly:
+//
+// test_trap:
+// brk #0x1
+// test_debugtrap:
+// brk #0xf000
+// mov w0, #42
+// ret
+//
+// In GCC, __builtin_trap() uses "brk 0x3e8" (i.e. decimal 1000) and
+// __builtin_debugtrap() is not supported.
+//
+// At the time of writing these test cases, any BRK instruction is considered
+// no-return by BOLT, thus it ends its basic block and prevents falling through
+// to the next BB.
+// FIXME: Make BOLT handle __builtin_debugtrap() properly from the CFG point
+// of view.
+
+ .globl brk_gcc_builtin_trap
+ .type brk_gcc_builtin_trap,@function
+brk_gcc_builtin_trap:
+// CHECK-NOT: brk_gcc_builtin_trap
+ cbz x0, 1f
+ autia x2, x3
+ cbz x1, 2f
+ brk 0x3e8 // __builtin_trap()
+1:
+ ret
+2:
+ ldr x0, [x2]
+ ret
+ .size brk_gcc_builtin_trap, .-brk_gcc_builtin_trap
+
+ .globl brk_clang_builtin_trap
+ .type brk_clang_builtin_trap,@function
+brk_clang_builtin_trap:
+// CHECK-NOT: brk_clang_builtin_trap
+ cbz x0, 1f
+ autia x2, x3
+ cbz x1, 2f
+ brk 0x1 // __builtin_trap()
+1:
+ ret
+2:
+ ldr x0, [x2]
+ ret
+ .size brk_clang_builtin_trap, .-brk_clang_builtin_trap
+
+ .globl brk_clang_builtin_debugtrap
+ .type brk_clang_builtin_debugtrap,@function
+brk_clang_builtin_debugtrap:
+// CHECK-LABEL: GS-PAUTH: authentication oracle found in function brk_clang_builtin_debugtrap, basic block {{[^,]+}}, at address
+// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x2, x3
+// CHECK-NEXT: The 0 instructions that leak the affected registers are:
+ cbz x0, 1f
+ autia x2, x3
+ cbz x1, 2f
+ brk 0xf000 // __builtin_debugtrap()
+1:
+ ret
+2:
+ ldr x0, [x2]
+ ret
+ .size brk_clang_builtin_debugtrap, .-brk_clang_builtin_debugtrap
+
+// Conservatively assume BRK with an unknown immediate operand as not suitable
+// for terminating the program on security violation.
+ .globl brk_unknown_imm
+ .type brk_unknown_imm,@function
+brk_unknown_imm:
+// CHECK-LABEL: GS-PAUTH: authentication oracle found in function brk_unknown_imm, basic block {{[^,]+}}, at address
+// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x2, x3
+// CHECK-NEXT: The 0 instructions that leak the affected registers are:
+ cbz x0, 1f
+ autia x2, x3
+ cbz x1, 2f
+ brk 0x3572
+1:
+ ret
+2:
+ ldr x0, [x2]
+ ret
+ .size brk_unknown_imm, .-brk_unknown_imm
+
+// Conservatively assume calling the abort() function may be an unsafe way to
+// terminate the program, as there is some amount of instructions that would
+// be executed when the program state is already tampered with.
+ .globl call_abort_fn
+ .type call_abort_fn,@function
+call_abort_fn:
+// CHECK-LABEL: GS-PAUTH: authentication oracle found in function call_abort_fn, basic block {{[^,]+}}, at address
+// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x2, x3
+// CHECK-NEXT: The 0 instructions that leak the affected registers are:
+ cbz x0, 1f
+ autia x2, x3
+ cbz x1, 2f
+ b abort // a no-return tail call to abort()
+1:
+ ret
+2:
+ ldr x0, [x2]
+ ret
+ .size call_abort_fn, .-call_abort_fn
+
+ .globl main
+ .type main,@function
+main:
+ mov x0, 0
+ ret
+ .size main, .-main
diff --git a/bolt/test/dump-dot-func.test b/bolt/test/dump-dot-func.test
new file mode 100644
index 0000000..f05bfc1
--- /dev/null
+++ b/bolt/test/dump-dot-func.test
@@ -0,0 +1,52 @@
+# Test the --dump-dot-func option with multiple functions
+# (includes tests for both mangled/unmangled names)
+
+RUN: %clangxx %p/Inputs/multi-func.cpp -o %t.exe -Wl,-q
+
+# Test 1: --dump-dot-func with specific function name (mangled)
+RUN: llvm-bolt %t.exe -o %t.bolt1 --dump-dot-func=_Z3addii -v=1 2>&1 | FileCheck %s --check-prefix=ADD
+
+# Test 2: --dump-dot-func with regex pattern (main.*)
+RUN: llvm-bolt %t.exe -o %t.bolt2 --dump-dot-func="main.*" -v=1 2>&1 | FileCheck %s --check-prefix=MAIN-REGEX
+
+# Test 3: --dump-dot-func with multiple specific functions (mangled names)
+RUN: llvm-bolt %t.exe -o %t.bolt3 --dump-dot-func=_Z3addii,_Z8multiplyii -v=1 2>&1 | FileCheck %s --check-prefix=MULTI
+
+# Test 4: No option specified should create no dot files
+RUN: llvm-bolt %t.exe -o %t.bolt4 2>&1 | FileCheck %s --check-prefix=NONE
+
+# Test 5: --dump-dot-func with non-existent function
+RUN: llvm-bolt %t.exe -o %t.bolt5 --dump-dot-func=nonexistent -v=1 2>&1 | FileCheck %s --check-prefix=NONEXISTENT
+
+# Test 6: Backward compatibility - --dump-dot-all should still work
+RUN: llvm-bolt %t.exe -o %t.bolt6 --dump-dot-all -v=1 2>&1 | FileCheck %s --check-prefix=ALL
+
+# Test 7: Test with unmangled function name (main function)
+RUN: llvm-bolt %t.exe -o %t.bolt7 --dump-dot-func=main -v=1 2>&1 | FileCheck %s --check-prefix=MAIN-UNMANGLED
+
+# Check that specific functions are dumped
+ADD: BOLT-INFO: dumping CFG to _Z3addii-00_build-cfg.dot
+ADD-NOT: BOLT-INFO: dumping CFG to main-00_build-cfg.dot
+ADD-NOT: BOLT-INFO: dumping CFG to _Z8multiplyii-00_build-cfg.dot
+ADD-NOT: BOLT-INFO: dumping CFG to _Z11main_helperv-00_build-cfg.dot
+
+MAIN-REGEX-DAG: BOLT-INFO: dumping CFG to main-00_build-cfg.dot
+MAIN-REGEX-NOT: BOLT-INFO: dumping CFG to _Z3addii-00_build-cfg.dot
+MAIN-REGEX-NOT: BOLT-INFO: dumping CFG to _Z8multiplyii-00_build-cfg.dot
+
+MULTI-DAG: BOLT-INFO: dumping CFG to _Z3addii-00_build-cfg.dot
+MULTI-DAG: BOLT-INFO: dumping CFG to _Z8multiplyii-00_build-cfg.dot
+MULTI-NOT: BOLT-INFO: dumping CFG to main-00_build-cfg.dot
+MULTI-NOT: BOLT-INFO: dumping CFG to _Z11main_helperv-00_build-cfg.dot
+
+# Should be no dumping messages when no option is specified
+NONE-NOT: BOLT-INFO: dumping CFG
+
+# Should be no dumping messages for non-existent function
+NONEXISTENT-NOT: BOLT-INFO: dumping CFG
+
+ALL: BOLT-INFO: dumping CFG to main-00_build-cfg.dot
+
+MAIN-UNMANGLED: BOLT-INFO: dumping CFG to main-00_build-cfg.dot
+MAIN-UNMANGLED-NOT: BOLT-INFO: dumping CFG to _Z3addii-00_build-cfg.dot
+MAIN-UNMANGLED-NOT: BOLT-INFO: dumping CFG to _Z8multiplyii-00_build-cfg.dot \ No newline at end of file
diff --git a/bolt/test/lit.cfg.py b/bolt/test/lit.cfg.py
index 0d05229..bef570b 100644
--- a/bolt/test/lit.cfg.py
+++ b/bolt/test/lit.cfg.py
@@ -18,11 +18,22 @@ from lit.llvm.subst import FindTool
# name: The name of this test suite.
config.name = "BOLT"
+# TODO: Consolidate the logic for turning on the internal shell by default for all LLVM test suites.
+# See https://github.com/llvm/llvm-project/issues/106636 for more details.
+#
+# We prefer the lit internal shell which provides a better user experience on failures
+# and is faster unless the user explicitly disables it with LIT_USE_INTERNAL_SHELL=0
+# env var.
+use_lit_shell = True
+lit_shell_env = os.environ.get("LIT_USE_INTERNAL_SHELL")
+if lit_shell_env:
+ use_lit_shell = lit.util.pythonize_bool(lit_shell_env)
+
# testFormat: The test format to use to interpret tests.
#
# For now we require '&&' between commands, until they get globally killed and
# the test runner updated.
-config.test_format = lit.formats.ShTest(not llvm_config.use_lit_shell)
+config.test_format = lit.formats.ShTest(execute_external=not use_lit_shell)
# suffixes: A list of file extensions to treat as test files.
config.suffixes = [
diff --git a/bolt/test/perf2bolt/perf_test.test b/bolt/test/perf2bolt/perf_test.test
index 434d4d2..08b3413 100644
--- a/bolt/test/perf2bolt/perf_test.test
+++ b/bolt/test/perf2bolt/perf_test.test
@@ -2,7 +2,7 @@
REQUIRES: system-linux, perf
-RUN: %clang %S/Inputs/perf_test.c -fuse-ld=lld -Wl,--script=%S/Inputs/perf_test.lds -o %t
+RUN: %clang %S/Inputs/perf_test.c -fuse-ld=lld -pie -Wl,--script=%S/Inputs/perf_test.lds -o %t
RUN: perf record -Fmax -e cycles:u -o %t2 -- %t
RUN: perf2bolt %t -p=%t2 -o %t3 -nl -ignore-build-id --show-density \
RUN: --heatmap %t.hm 2>&1 | FileCheck %s
diff --git a/bolt/test/permission.test b/bolt/test/permission.test
index f495e87..ecb51fc 100644
--- a/bolt/test/permission.test
+++ b/bolt/test/permission.test
@@ -1,13 +1,28 @@
# Ensure that the permissions of the optimized binary file comply with the
# system's umask.
-# This test performs a logical AND operation on the results of the `stat -c %a
-# %t.bolt` and `umask` commands (both results are displayed in octal), and
-# checks whether the result is equal to 0.
-REQUIRES: shell, system-linux
+# This test uses umask, which is Linux specific.
+REQUIRES: system-linux
-RUN: %clang %cflags %p/Inputs/hello.c -o %t -Wl,-q
-RUN: llvm-bolt %t -o %t.bolt
-RUN: echo $(( 8#$(stat -c %a %t.bolt) & 8#$(umask) )) | FileCheck %s
+# RUN: rm -f %t
+# RUN: touch %t
+# RUN: chmod 0755 %t
+# RUN: ls -l %t | cut -f 1 -d ' ' > %t.0755
+# RUN: chmod 0600 %t
+# RUN: ls -l %t | cut -f 1 -d ' ' > %t.0600
+# RUN: chmod 0655 %t
+# RUN: ls -l %t | cut -f 1 -d ' ' > %t.0655
-CHECK: 0
+RUN: %clang %cflags %p/Inputs/hello.c -o %t.exe -Wl,-q
+
+RUN: umask 0022
+RUN: llvm-bolt %t.exe -o %t1
+RUN: ls -l %t1 | cut -f 1 -d ' ' | cmp - %t.0755
+
+RUN: umask 0177
+RUN: llvm-bolt %t.exe -o %t2
+RUN: ls -l %t2 | cut -f 1 -d ' ' | cmp - %t.0600
+
+RUN: umask 0122
+RUN: llvm-bolt %t.exe -o %t3
+RUN: ls -l %t3 | cut -f 1 -d ' ' | cmp - %t.0655
diff --git a/bolt/test/runtime/X86/tail-duplication-constant-prop.s b/bolt/test/runtime/X86/tail-duplication-constant-prop.s
index 863c6ff..c28c2f4 100644
--- a/bolt/test/runtime/X86/tail-duplication-constant-prop.s
+++ b/bolt/test/runtime/X86/tail-duplication-constant-prop.s
@@ -8,8 +8,8 @@
# RUN: --print-finalized \
# RUN: --tail-duplication=moderate --tail-duplication-minimum-offset=1 \
# RUN: --tail-duplication-const-copy-propagation=1 -o %t.out | FileCheck %s
-# RUN: %t.exe; echo $?
-# RUN: %t.out; echo $?
+# RUN: not %t.exe
+# RUN: not %t.out
# FDATA: 1 main 14 1 main #.BB2# 0 10
# FDATA: 1 main 16 1 main #.BB2# 0 20