aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAmir Ayupov <aaupov@fb.com>2025-04-09 16:52:45 -0700
committerAmir Ayupov <aaupov@fb.com>2025-04-09 16:52:45 -0700
commit3d2d135d70b4cdc661ad1a16e44eb740f3c337b6 (patch)
tree01b4b78c34196f7c7093664135acce67de7a501f
parentcde2ea377d457e272ce1572d588643e5ee533c30 (diff)
downloadllvm-users/aaupov/spr/main.bolt-jump-table-trampoline-insertion-pass.zip
llvm-users/aaupov/spr/main.bolt-jump-table-trampoline-insertion-pass.tar.gz
llvm-users/aaupov/spr/main.bolt-jump-table-trampoline-insertion-pass.tar.bz2
[𝘀𝗽𝗿] changes to main this commit is based onusers/aaupov/spr/main.bolt-jump-table-trampoline-insertion-pass
Created using spr 1.3.4 [skip ci]
-rw-r--r--bolt/include/bolt/Core/BinaryContext.h40
-rw-r--r--bolt/include/bolt/Core/BinaryFunction.h8
-rw-r--r--bolt/include/bolt/Core/JumpTable.h40
-rw-r--r--bolt/include/bolt/Passes/LongJmp.h22
-rw-r--r--bolt/include/bolt/Rewrite/MetadataManager.h4
-rw-r--r--bolt/include/bolt/Rewrite/MetadataRewriter.h4
-rw-r--r--bolt/include/bolt/Rewrite/MetadataRewriters.h2
-rw-r--r--bolt/include/bolt/Rewrite/RewriteInstance.h3
-rw-r--r--bolt/lib/Core/BinaryContext.cpp175
-rw-r--r--bolt/lib/Core/BinaryEmitter.cpp113
-rw-r--r--bolt/lib/Core/BinaryFunction.cpp32
-rw-r--r--bolt/lib/Core/JumpTable.cpp8
-rw-r--r--bolt/lib/Passes/Aligner.cpp5
-rw-r--r--bolt/lib/Passes/BinaryPasses.cpp2
-rw-r--r--bolt/lib/Passes/IndirectCallPromotion.cpp4
-rw-r--r--bolt/lib/Passes/JTFootprintReduction.cpp2
-rw-r--r--bolt/lib/Passes/LongJmp.cpp186
-rw-r--r--bolt/lib/Passes/PatchEntries.cpp14
-rw-r--r--bolt/lib/Rewrite/BinaryPassManager.cpp6
-rw-r--r--bolt/lib/Rewrite/CMakeLists.txt1
-rw-r--r--bolt/lib/Rewrite/JumpTableInfoReader.cpp91
-rw-r--r--bolt/lib/Rewrite/MetadataManager.cpp12
-rw-r--r--bolt/lib/Rewrite/RewriteInstance.cpp61
-rw-r--r--bolt/test/AArch64/Inputs/jump-table.c20
-rw-r--r--bolt/test/AArch64/jump-table-info.s186
-rw-r--r--bolt/test/program-header.test14
-rw-r--r--clang/include/clang/Driver/Options.td4
-rw-r--r--clang/lib/Driver/ToolChains/Gnu.cpp29
-rw-r--r--cross-project-tests/lit.cfg.py14
-rw-r--r--cross-project-tests/lit.site.cfg.py.in4
-rw-r--r--lldb/test/API/lit.cfg.py11
-rw-r--r--lldb/test/API/lit.site.cfg.py.in8
-rw-r--r--lldb/test/Shell/helper/toolchain.py5
-rw-r--r--lldb/test/Shell/lit.site.cfg.py.in9
-rw-r--r--llvm/CMakeLists.txt4
35 files changed, 986 insertions, 157 deletions
diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index 88313a6..77595bd 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -230,6 +230,12 @@ class BinaryContext {
/// Functions injected by BOLT
std::vector<BinaryFunction *> InjectedBinaryFunctions;
+ /// Thunk functions.
+ std::vector<BinaryFunction *> ThunkBinaryFunctions;
+
+ /// Function that precedes thunks in the binary.
+ const BinaryFunction *ThunkLocation{nullptr};
+
/// Jump tables for all functions mapped by address.
std::map<uint64_t, JumpTable *> JumpTables;
@@ -435,7 +441,18 @@ public:
/// Return size of an entry for the given jump table \p Type.
uint64_t getJumpTableEntrySize(JumpTable::JumpTableType Type) const {
- return Type == JumpTable::JTT_PIC ? 4 : AsmInfo->getCodePointerSize();
+ switch (Type) {
+ case JumpTable::JTT_X86_64_PIC4:
+ return 4;
+ case JumpTable::JTT_X86_64_ABS:
+ return AsmInfo->getCodePointerSize();
+ case JumpTable::JTT_AARCH64_REL1:
+ return 1;
+ case JumpTable::JTT_AARCH64_REL2:
+ return 2;
+ case JumpTable::JTT_AARCH64_REL4:
+ return 4;
+ }
}
/// Return JumpTable containing a given \p Address.
@@ -553,6 +570,16 @@ public:
return InjectedBinaryFunctions;
}
+ BinaryFunction *createThunkBinaryFunction(const std::string &Name);
+
+ std::vector<BinaryFunction *> &getThunkBinaryFunctions() {
+ return ThunkBinaryFunctions;
+ }
+
+ const BinaryFunction *getThunkLocation() const { return ThunkLocation; }
+
+ void setThunkLocation(const BinaryFunction *BF) { ThunkLocation = BF; }
+
/// Return vector with all functions, i.e. include functions from the input
/// binary and functions created by BOLT.
std::vector<BinaryFunction *> getAllBinaryFunctions();
@@ -574,14 +601,13 @@ public:
/// If \p NextJTAddress is different from zero, it is used as an upper
/// bound for jump table memory layout.
///
- /// Optionally, populate \p Address from jump table entries. The entries
- /// could be partially populated if the jump table detection fails.
+ /// If \p JT is set, populate it with jump table entries. The entries could be
+ /// partially populated if the jump table detection fails.
bool analyzeJumpTable(const uint64_t Address,
const JumpTable::JumpTableType Type,
const BinaryFunction &BF,
const uint64_t NextJTAddress = 0,
- JumpTable::AddressesType *EntriesAsAddress = nullptr,
- bool *HasEntryInFragment = nullptr) const;
+ JumpTable *JT = nullptr) const;
/// After jump table locations are established, this function will populate
/// their EntriesAsAddress based on memory contents.
@@ -1372,6 +1398,10 @@ public:
uint64_t
computeInstructionSize(const MCInst &Inst,
const MCCodeEmitter *Emitter = nullptr) const {
+ // FIXME: hack for faster size computation on aarch64.
+ if (isAArch64())
+ return MIB->isPseudo(Inst) ? 0 : 4;
+
if (std::optional<uint32_t> Size = MIB->getSize(Inst))
return *Size;
diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index d3d11f8..c18a43f 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -363,6 +363,10 @@ private:
/// True if the function should not have an associated symbol table entry.
bool IsAnonymous{false};
+ /// True if the function is used for remapping hot text and shall not be
+ /// placed on a huge page.
+ bool IsHotTextMover{false};
+
/// Name for the section this function code should reside in.
std::string CodeSectionName;
@@ -1385,6 +1389,8 @@ public:
/// Return true if the function uses ORC format for stack unwinding.
bool hasORC() const { return HasORC; }
+ bool isHotTextMover() const { return IsHotTextMover; }
+
const JumpTable *getJumpTable(const MCInst &Inst) const {
const uint64_t Address = BC.MIB->getJumpTable(Inst);
return getJumpTableContainingAddress(Address);
@@ -1735,6 +1741,8 @@ public:
/// Mark function that should not be emitted.
void setIgnored();
+ void setHotTextMover(bool V) { IsHotTextMover = V; }
+
void setHasIndirectTargetToSplitFragment(bool V) {
HasIndirectTargetToSplitFragment = V;
}
diff --git a/bolt/include/bolt/Core/JumpTable.h b/bolt/include/bolt/Core/JumpTable.h
index 52b9cce..c76e2a9 100644
--- a/bolt/include/bolt/Core/JumpTable.h
+++ b/bolt/include/bolt/Core/JumpTable.h
@@ -16,6 +16,7 @@
#include "bolt/Core/BinaryData.h"
#include <map>
+#include <variant>
#include <vector>
namespace llvm {
@@ -40,6 +41,7 @@ class BinaryFunction;
/// a different label at a different offset in this jump table.
class JumpTable : public BinaryData {
friend class BinaryContext;
+ friend class JumpTableInfoReader;
JumpTable() = delete;
JumpTable(const JumpTable &) = delete;
@@ -47,10 +49,34 @@ class JumpTable : public BinaryData {
public:
enum JumpTableType : char {
- JTT_NORMAL,
- JTT_PIC,
+ JTT_X86_64_FIRST = 0,
+ JTT_X86_64_ABS = JTT_X86_64_FIRST,
+ JTT_X86_64_PIC4,
+ JTT_X86_64_LAST = JTT_X86_64_PIC4,
+ JTT_AARCH64_FIRST,
+ JTT_AARCH64_REL1 = JTT_AARCH64_FIRST,
+ JTT_AARCH64_REL2,
+ JTT_AARCH64_REL4,
+ JTT_AARCH64_LAST = JTT_AARCH64_REL4
};
+ static StringRef getTypeStr(JumpTableType Type) {
+ switch (Type) {
+ case JTT_X86_64_ABS:
+ return "X86_64_ABS";
+ case JTT_X86_64_PIC4:
+ return "X86_64_PIC4";
+ case JTT_AARCH64_REL1:
+ return "AARCH64_REL1";
+ case JTT_AARCH64_REL2:
+ return "AARCH64_REL2";
+ case JTT_AARCH64_REL4:
+ return "AARCH64_REL4";
+ }
+ }
+
+ const StringRef getTypeStr() { return getTypeStr(Type); }
+
/// Branch statistics for jump table entries.
struct JumpInfo {
uint64_t Mispreds{0};
@@ -92,6 +118,16 @@ public:
/// BinaryFunction this jump tables belongs to.
SmallVector<BinaryFunction *, 1> Parents;
+ ///
+ /// AArch64-specific fields
+ ///
+
+ /// Entries are offsets relative to an arbitrary function location.
+ std::variant<uint64_t, MCSymbol *> BaseAddress;
+
+ /// Address of the instruction referencing the jump table (MemLocInstr).
+ uint64_t MemLocInstrAddress{0};
+
private:
/// Constructor should only be called by a BinaryContext.
JumpTable(MCSymbol &Symbol, uint64_t Address, size_t EntrySize,
diff --git a/bolt/include/bolt/Passes/LongJmp.h b/bolt/include/bolt/Passes/LongJmp.h
index df3ea96..5388d7e 100644
--- a/bolt/include/bolt/Passes/LongJmp.h
+++ b/bolt/include/bolt/Passes/LongJmp.h
@@ -76,6 +76,28 @@ class LongJmpPass : public BinaryFunctionPass {
/// 128MB of each other.
void relaxLocalBranches(BinaryFunction &BF);
+ struct FunctionCluster {
+ DenseSet<BinaryFunction *> Functions;
+
+ // Functions that this cluster of functions is calling. Note that it
+ // excludes all functions in the cluster itself.
+ DenseSet<BinaryFunction *> Callees;
+
+ uint64_t Size{0};
+
+ // Last function in the cluster.
+ BinaryFunction *LastBF{nullptr};
+ };
+
+ /// Maximum size of the function cluster. Note that it's less than 128MB
+ /// as the size of the cluster plus thunk island should be less than 128MB.
+ static constexpr uint64_t MaxClusterSize = 125 * 1024 * 1024;
+
+ /// Relax calls for medium code model where code is < 256MB.
+ /// A thunk island will be introduced between two clusters of functions to
+ /// enable calls over 128MB.
+ void relaxCalls(BinaryContext &BC);
+
/// -- Layout estimation methods --
/// Try to do layout before running the emitter, by looking at BinaryFunctions
/// and MCInsts -- this is an estimation. To be correct for longjmp inserter
diff --git a/bolt/include/bolt/Rewrite/MetadataManager.h b/bolt/include/bolt/Rewrite/MetadataManager.h
index 6001b70..cc6e3f9 100644
--- a/bolt/include/bolt/Rewrite/MetadataManager.h
+++ b/bolt/include/bolt/Rewrite/MetadataManager.h
@@ -31,6 +31,10 @@ public:
/// Run initializers after sections are discovered.
void runSectionInitializers();
+ /// Execute metadata initializers when functions are discovered but not yet
+ /// disassembled.
+ void runInitializersPreDisasm();
+
/// Execute initialization of rewriters while functions are disassembled, but
/// CFG is not yet built.
void runInitializersPreCFG();
diff --git a/bolt/include/bolt/Rewrite/MetadataRewriter.h b/bolt/include/bolt/Rewrite/MetadataRewriter.h
index 6ff8f0a..d39500c 100644
--- a/bolt/include/bolt/Rewrite/MetadataRewriter.h
+++ b/bolt/include/bolt/Rewrite/MetadataRewriter.h
@@ -49,6 +49,10 @@ public:
/// but before functions are discovered.
virtual Error sectionInitializer() { return Error::success(); }
+ /// Run initialization after the functions are identified but not yet
+ /// disassembled.
+ virtual Error preDisasmInitializer() { return Error::success(); }
+
/// Interface for modifying/annotating functions in the binary based on the
/// contents of the section. Functions are in pre-cfg state.
virtual Error preCFGInitializer() { return Error::success(); }
diff --git a/bolt/include/bolt/Rewrite/MetadataRewriters.h b/bolt/include/bolt/Rewrite/MetadataRewriters.h
index b71bd6c..ae34194 100644
--- a/bolt/include/bolt/Rewrite/MetadataRewriters.h
+++ b/bolt/include/bolt/Rewrite/MetadataRewriters.h
@@ -27,6 +27,8 @@ std::unique_ptr<MetadataRewriter> createPseudoProbeRewriter(BinaryContext &);
std::unique_ptr<MetadataRewriter> createSDTRewriter(BinaryContext &);
+std::unique_ptr<MetadataRewriter> createJumpTableInfoReader(BinaryContext &);
+
} // namespace bolt
} // namespace llvm
diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h
index 94dd06e..8fd6a77 100644
--- a/bolt/include/bolt/Rewrite/RewriteInstance.h
+++ b/bolt/include/bolt/Rewrite/RewriteInstance.h
@@ -181,6 +181,9 @@ private:
/// Process metadata in sections before functions are discovered.
void processSectionMetadata();
+ /// Process metadata in special sections before functions are disassembled.
+ void processMetadataPreDisasm();
+
/// Process metadata in special sections before CFG is built for functions.
void processMetadataPreCFG();
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index 80b15d7..25a39f1 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -497,7 +497,7 @@ BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
const MemoryContentsType MemType = analyzeMemoryAt(Address, BF);
if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) {
const MCSymbol *Symbol =
- getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC);
+ getOrCreateJumpTable(BF, Address, JumpTable::JTT_X86_64_PIC4);
return std::make_pair(Symbol, 0);
}
@@ -541,10 +541,10 @@ MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address,
// Start with checking for PIC jump table. We expect non-PIC jump tables
// to have high 32 bits set to 0.
- if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF))
+ if (analyzeJumpTable(Address, JumpTable::JTT_X86_64_PIC4, BF))
return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
- if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF))
+ if (analyzeJumpTable(Address, JumpTable::JTT_X86_64_ABS, BF))
return MemoryContentsType::POSSIBLE_JUMP_TABLE;
return MemoryContentsType::UNKNOWN;
@@ -554,8 +554,7 @@ bool BinaryContext::analyzeJumpTable(const uint64_t Address,
const JumpTable::JumpTableType Type,
const BinaryFunction &BF,
const uint64_t NextJTAddress,
- JumpTable::AddressesType *EntriesAsAddress,
- bool *HasEntryInFragment) const {
+ JumpTable *JT) const {
// Target address of __builtin_unreachable.
const uint64_t UnreachableAddress = BF.getAddress() + BF.getSize();
@@ -572,11 +571,11 @@ bool BinaryContext::analyzeJumpTable(const uint64_t Address,
size_t TrimmedSize = 0;
auto addEntryAddress = [&](uint64_t EntryAddress, bool Unreachable = false) {
- if (!EntriesAsAddress)
+ if (!JT)
return;
- EntriesAsAddress->emplace_back(EntryAddress);
+ JT->EntriesAsAddress.emplace_back(EntryAddress);
if (!Unreachable)
- TrimmedSize = EntriesAsAddress->size();
+ TrimmedSize = JT->EntriesAsAddress.size();
};
ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
@@ -595,12 +594,9 @@ bool BinaryContext::analyzeJumpTable(const uint64_t Address,
if (NextJTAddress)
UpperBound = std::min(NextJTAddress, UpperBound);
- LLVM_DEBUG({
- using JTT = JumpTable::JumpTableType;
- dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n",
- Address, BF.getPrintName(),
- Type == JTT::JTT_PIC ? "PIC" : "Normal");
- });
+ LLVM_DEBUG(
+ dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n",
+ Address, BF, JumpTable::getTypeStr(Type)));
const uint64_t EntrySize = getJumpTableEntrySize(Type);
for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize;
EntryAddress += EntrySize) {
@@ -608,13 +604,13 @@ bool BinaryContext::analyzeJumpTable(const uint64_t Address,
<< " -> ");
// Check if there's a proper relocation against the jump table entry.
if (HasRelocations) {
- if (Type == JumpTable::JTT_PIC &&
+ if (Type == JumpTable::JTT_X86_64_PIC4 &&
!DataPCRelocations.count(EntryAddress)) {
LLVM_DEBUG(
dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n");
break;
}
- if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) {
+ if (Type == JumpTable::JTT_X86_64_ABS && !getRelocationAt(EntryAddress)) {
LLVM_DEBUG(
dbgs()
<< "FAIL: JTT_NORMAL table, no relocation for this address\n");
@@ -622,10 +618,24 @@ bool BinaryContext::analyzeJumpTable(const uint64_t Address,
}
}
- const uint64_t Value =
- (Type == JumpTable::JTT_PIC)
- ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize)
- : *getPointerAtAddress(EntryAddress);
+ uint64_t Value = 0;
+ switch (Type) {
+ case JumpTable::JTT_X86_64_PIC4:
+ Value = Address + *getSignedValueAtAddress(EntryAddress, EntrySize);
+ break;
+ case JumpTable::JTT_X86_64_ABS:
+ Value = *getPointerAtAddress(EntryAddress);
+ break;
+ case JumpTable::JTT_AARCH64_REL1:
+ case JumpTable::JTT_AARCH64_REL2:
+ case JumpTable::JTT_AARCH64_REL4:
+ unsigned ShiftAmt = Type == JumpTable::JTT_AARCH64_REL4 ? 0 : 2;
+ assert(JT &&
+ "jump table must be non-null for AArch64 in analyzeJumpTable");
+ Value = std::get<uint64_t>(JT->BaseAddress) +
+ (*getUnsignedValueAtAddress(EntryAddress, EntrySize) << ShiftAmt);
+ break;
+ }
// __builtin_unreachable() case.
if (Value == UnreachableAddress) {
@@ -646,24 +656,19 @@ bool BinaryContext::analyzeJumpTable(const uint64_t Address,
// Function or one of its fragments.
const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value);
- const bool DoesBelongToFunction =
- BF.containsAddress(Value) ||
- (TargetBF && areRelatedFragments(TargetBF, &BF));
- if (!DoesBelongToFunction) {
+ if (!TargetBF || !areRelatedFragments(TargetBF, &BF)) {
LLVM_DEBUG({
- if (!BF.containsAddress(Value)) {
- dbgs() << "FAIL: function doesn't contain this address\n";
- if (TargetBF) {
- dbgs() << " ! function containing this address: "
- << TargetBF->getPrintName() << '\n';
- if (TargetBF->isFragment()) {
- dbgs() << " ! is a fragment";
- for (BinaryFunction *Parent : TargetBF->ParentFragments)
- dbgs() << ", parent: " << Parent->getPrintName();
- dbgs() << '\n';
- }
- }
- }
+ dbgs() << "FAIL: function doesn't contain this address\n";
+ if (!TargetBF)
+ break;
+ dbgs() << " ! function containing this address: " << *TargetBF << '\n';
+ if (!TargetBF->isFragment())
+ break;
+ dbgs() << " ! is a fragment with parents: ";
+ ListSeparator LS;
+ for (BinaryFunction *Parent : TargetBF->ParentFragments)
+ dbgs() << LS << *Parent;
+ dbgs() << '\n';
});
break;
}
@@ -678,17 +683,17 @@ bool BinaryContext::analyzeJumpTable(const uint64_t Address,
++NumRealEntries;
LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value));
- if (TargetBF != &BF && HasEntryInFragment)
- *HasEntryInFragment = true;
+ if (TargetBF != &BF && JT)
+ JT->IsSplit = true;
addEntryAddress(Value);
}
// Trim direct/normal jump table to exclude trailing unreachable entries that
// can collide with a function address.
- if (Type == JumpTable::JTT_NORMAL && EntriesAsAddress &&
- TrimmedSize != EntriesAsAddress->size() &&
+ if (Type == JumpTable::JTT_X86_64_ABS && JT &&
+ TrimmedSize != JT->EntriesAsAddress.size() &&
getBinaryFunctionAtAddress(UnreachableAddress))
- EntriesAsAddress->resize(TrimmedSize);
+ JT->EntriesAsAddress.resize(TrimmedSize);
// It's a jump table if the number of real entries is more than 1, or there's
// one real entry and one or more special targets. If there are only multiple
@@ -703,20 +708,20 @@ void BinaryContext::populateJumpTables() {
++JTI) {
JumpTable *JT = JTI->second;
- bool NonSimpleParent = false;
- for (BinaryFunction *BF : JT->Parents)
- NonSimpleParent |= !BF->isSimple();
- if (NonSimpleParent)
+ auto isSimple = std::bind(&BinaryFunction::isSimple, std::placeholders::_1);
+ if (!llvm::all_of(JT->Parents, isSimple))
continue;
uint64_t NextJTAddress = 0;
auto NextJTI = std::next(JTI);
- if (NextJTI != JTE)
+ if (isAArch64()) {
+ NextJTAddress = JT->getAddress() + JT->getSize();
+ JT->Entries.clear();
+ } else if (NextJTI != JTE)
NextJTAddress = NextJTI->second->getAddress();
- const bool Success =
- analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]),
- NextJTAddress, &JT->EntriesAsAddress, &JT->IsSplit);
+ const bool Success = analyzeJumpTable(
+ JT->getAddress(), JT->Type, *JT->Parents.front(), NextJTAddress, JT);
if (!Success) {
LLVM_DEBUG({
dbgs() << "failed to analyze ";
@@ -744,7 +749,7 @@ void BinaryContext::populateJumpTables() {
// In strict mode, erase PC-relative relocation record. Later we check that
// all such records are erased and thus have been accounted for.
- if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) {
+ if (opts::StrictMode && JT->Type == JumpTable::JTT_X86_64_PIC4) {
for (uint64_t Address = JT->getAddress();
Address < JT->getAddress() + JT->getSize();
Address += JT->EntrySize) {
@@ -840,33 +845,26 @@ BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
assert(JT->Type == Type && "jump table types have to match");
assert(Address == JT->getAddress() && "unexpected non-empty jump table");
- // Prevent associating a jump table to a specific fragment twice.
- if (!llvm::is_contained(JT->Parents, &Function)) {
- assert(llvm::all_of(JT->Parents,
- [&](const BinaryFunction *BF) {
- return areRelatedFragments(&Function, BF);
- }) &&
- "cannot re-use jump table of a different function");
- // Duplicate the entry for the parent function for easy access
- JT->Parents.push_back(&Function);
- if (opts::Verbosity > 2) {
- this->outs() << "BOLT-INFO: Multiple fragments access same jump table: "
- << JT->Parents[0]->getPrintName() << "; "
- << Function.getPrintName() << "\n";
- JT->print(this->outs());
- }
- Function.JumpTables.emplace(Address, JT);
- for (BinaryFunction *Parent : JT->Parents)
- Parent->setHasIndirectTargetToSplitFragment(true);
- }
+ if (llvm::is_contained(JT->Parents, &Function))
+ return JT->getFirstLabel();
- bool IsJumpTableParent = false;
- (void)IsJumpTableParent;
- for (BinaryFunction *Frag : JT->Parents)
- if (Frag == &Function)
- IsJumpTableParent = true;
- assert(IsJumpTableParent &&
+ // Prevent associating a jump table to a specific fragment twice.
+ auto isSibling = std::bind(&BinaryContext::areRelatedFragments, this,
+ &Function, std::placeholders::_1);
+ assert(llvm::all_of(JT->Parents, isSibling) &&
"cannot re-use jump table of a different function");
+ if (opts::Verbosity > 2) {
+ this->outs() << "BOLT-INFO: Multiple fragments access same jump table: "
+ << JT->Parents[0]->getPrintName() << "; "
+ << Function.getPrintName() << "\n";
+ JT->print(this->outs());
+ }
+ if (JT->Parents.size() == 1)
+ JT->Parents.front()->setHasIndirectTargetToSplitFragment(true);
+ Function.setHasIndirectTargetToSplitFragment(true);
+ // Duplicate the entry for the parent function for easy access
+ JT->Parents.push_back(&Function);
+ Function.JumpTables.emplace(Address, JT);
return JT->getFirstLabel();
}
@@ -1611,7 +1609,21 @@ std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() {
SortedFunctions.begin(),
[](BinaryFunction &BF) { return &BF; });
- llvm::stable_sort(SortedFunctions, compareBinaryFunctionByIndex);
+ llvm::stable_sort(SortedFunctions,
+ [](const BinaryFunction *A, const BinaryFunction *B) {
+ // Place hot text movers at the start.
+ if (A->isHotTextMover() && !B->isHotTextMover())
+ return true;
+ if (!A->isHotTextMover() && B->isHotTextMover())
+ return false;
+ if (A->hasValidIndex() && B->hasValidIndex()) {
+ return A->getIndex() < B->getIndex();
+ }
+ if (opts::HotFunctionsAtEnd)
+ return B->hasValidIndex();
+ else
+ return A->hasValidIndex();
+ });
return SortedFunctions;
}
@@ -2434,6 +2446,15 @@ BinaryContext::createInstructionPatch(uint64_t Address,
return PBF;
}
+BinaryFunction *
+BinaryContext::createThunkBinaryFunction(const std::string &Name) {
+ ThunkBinaryFunctions.push_back(new BinaryFunction(Name, *this, true));
+ BinaryFunction *BF = ThunkBinaryFunctions.back();
+ setSymbolToFunctionMap(BF->getSymbol(), BF);
+ BF->CurrentState = BinaryFunction::State::CFG;
+ return BF;
+}
+
std::pair<size_t, size_t>
BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
// Adjust branch instruction to match the current layout.
diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp
index 1aad252..db0f11bc 100644
--- a/bolt/lib/Core/BinaryEmitter.cpp
+++ b/bolt/lib/Core/BinaryEmitter.cpp
@@ -19,6 +19,7 @@
#include "bolt/Utils/CommandLineOpts.h"
#include "bolt/Utils/Utils.h"
#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/CommandLine.h"
@@ -271,6 +272,14 @@ void BinaryEmitter::emitFunctions() {
if (Emitted)
Function->setEmitted(/*KeepCFG=*/opts::PrintCacheMetrics);
+
+ // Emit thunks.
+ if (BC.getThunkLocation() != Function)
+ continue;
+
+ for (BinaryFunction *Thunk : BC.getThunkBinaryFunctions()) {
+ emitFunction(*Thunk, Thunk->getLayout().getMainFragment());
+ }
}
};
@@ -809,57 +818,71 @@ void BinaryEmitter::emitJumpTable(const JumpTable &JT, MCSection *HotSection,
Streamer.switchSection(JT.Count > 0 ? HotSection : ColdSection);
Streamer.emitValueToAlignment(Align(JT.EntrySize));
}
- MCSymbol *LastLabel = nullptr;
+ MCSymbol *JTLabel = nullptr;
+ MCContext &Context = Streamer.getContext();
uint64_t Offset = 0;
for (MCSymbol *Entry : JT.Entries) {
auto LI = JT.Labels.find(Offset);
- if (LI != JT.Labels.end()) {
- LLVM_DEBUG({
- dbgs() << "BOLT-DEBUG: emitting jump table " << LI->second->getName()
- << " (originally was at address 0x"
- << Twine::utohexstr(JT.getAddress() + Offset)
- << (Offset ? ") as part of larger jump table\n" : ")\n");
- });
- if (!LabelCounts.empty()) {
- LLVM_DEBUG(dbgs() << "BOLT-DEBUG: jump table count: "
- << LabelCounts[LI->second] << '\n');
- if (LabelCounts[LI->second] > 0)
- Streamer.switchSection(HotSection);
- else
- Streamer.switchSection(ColdSection);
- Streamer.emitValueToAlignment(Align(JT.EntrySize));
- }
- // Emit all labels registered at the address of this jump table
- // to sync with our global symbol table. We may have two labels
- // registered at this address if one label was created via
- // getOrCreateGlobalSymbol() (e.g. LEA instructions referencing
- // this location) and another via getOrCreateJumpTable(). This
- // creates a race where the symbols created by these two
- // functions may or may not be the same, but they are both
- // registered in our symbol table at the same address. By
- // emitting them all here we make sure there is no ambiguity
- // that depends on the order that these symbols were created, so
- // whenever this address is referenced in the binary, it is
- // certain to point to the jump table identified at this
- // address.
- if (BinaryData *BD = BC.getBinaryDataByName(LI->second->getName())) {
- for (MCSymbol *S : BD->getSymbols())
- Streamer.emitLabel(S);
- } else {
- Streamer.emitLabel(LI->second);
- }
- LastLabel = LI->second;
+ if (LI == JT.Labels.end())
+ goto emitEntry;
+ JTLabel = LI->second;
+ LLVM_DEBUG({
+ dbgs() << "BOLT-DEBUG: emitting jump table " << JTLabel->getName()
+ << " (originally was at address 0x"
+ << Twine::utohexstr(JT.getAddress() + Offset)
+ << (Offset ? ") as part of larger jump table\n" : ")\n");
+ });
+ if (!LabelCounts.empty()) {
+ uint64_t JTCount = LabelCounts[JTLabel];
+ LLVM_DEBUG(dbgs() << "BOLT-DEBUG: jump table count: " << JTCount << '\n');
+ Streamer.switchSection(JTCount ? HotSection : ColdSection);
+ Streamer.emitValueToAlignment(Align(JT.EntrySize));
}
- if (JT.Type == JumpTable::JTT_NORMAL) {
+ // Emit all labels registered at the address of this jump table
+ // to sync with our global symbol table. We may have two labels
+ // registered at this address if one label was created via
+ // getOrCreateGlobalSymbol() (e.g. LEA instructions referencing
+ // this location) and another via getOrCreateJumpTable(). This
+ // creates a race where the symbols created by these two
+ // functions may or may not be the same, but they are both
+ // registered in our symbol table at the same address. By
+ // emitting them all here we make sure there is no ambiguity
+ // that depends on the order that these symbols were created, so
+ // whenever this address is referenced in the binary, it is
+ // certain to point to the jump table identified at this
+ // address.
+ if (BinaryData *BD = BC.getBinaryDataByName(JTLabel->getName())) {
+ for (MCSymbol *S : BD->getSymbols())
+ Streamer.emitLabel(S);
+ } else {
+ Streamer.emitLabel(JTLabel);
+ }
+ emitEntry:
+ switch (JT.Type) {
+ case JumpTable::JTT_X86_64_ABS:
Streamer.emitSymbolValue(Entry, JT.OutputEntrySize);
- } else { // JTT_PIC
- const MCSymbolRefExpr *JTExpr =
- MCSymbolRefExpr::create(LastLabel, Streamer.getContext());
- const MCSymbolRefExpr *E =
- MCSymbolRefExpr::create(Entry, Streamer.getContext());
- const MCBinaryExpr *Value =
- MCBinaryExpr::createSub(E, JTExpr, Streamer.getContext());
+ break;
+ case JumpTable::JTT_X86_64_PIC4: {
+ const MCSymbolRefExpr *JTExpr = MCSymbolRefExpr::create(JTLabel, Context);
+ const MCSymbolRefExpr *E = MCSymbolRefExpr::create(Entry, Context);
+ const MCBinaryExpr *Value = MCBinaryExpr::createSub(E, JTExpr, Context);
+ Streamer.emitValue(Value, JT.EntrySize);
+ break;
+ }
+ case JumpTable::JTT_AARCH64_REL1:
+ case JumpTable::JTT_AARCH64_REL2:
+ case JumpTable::JTT_AARCH64_REL4: {
+ MCSymbol *BaseSym = std::get<MCSymbol *>(JT.BaseAddress);
+ const MCExpr *Base = MCSymbolRefExpr::create(BaseSym, Context);
+ const MCExpr *E = MCSymbolRefExpr::create(Entry, Context);
+ const MCBinaryExpr *Value = MCBinaryExpr::createSub(E, Base, Context);
+ if (JT.EntrySize != 4)
+ Value = MCBinaryExpr::createLShr(
+ Value, MCConstantExpr::create(2, Context), Context);
+
Streamer.emitValue(Value, JT.EntrySize);
+ break;
+ }
}
Offset += JT.EntrySize;
}
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index d1b293a..678b944 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -112,6 +112,10 @@ cl::opt<bool>
cl::desc("try to preserve basic block alignment"),
cl::cat(BoltOptCategory));
+static cl::opt<bool> PrintOffsets("print-offsets",
+ cl::desc("print basic block offsets"),
+ cl::Hidden, cl::cat(BoltOptCategory));
+
static cl::opt<bool> PrintOutputAddressRange(
"print-output-address-range",
cl::desc(
@@ -556,6 +560,11 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) {
if (BB->isLandingPad())
OS << " Landing Pad\n";
+ if (opts::PrintOffsets && BB->getOutputStartAddress()) {
+ OS << " OutputOffset: 0x"
+ << Twine::utohexstr(BB->getOutputStartAddress()) << '\n';
+ }
+
uint64_t BBExecCount = BB->getExecutionCount();
if (hasValidProfile()) {
OS << " Exec Count : ";
@@ -909,7 +918,7 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size,
"Invalid memory instruction");
const MCExpr *FixedEntryDispExpr = FixedEntryDispOperand->getExpr();
const uint64_t EntryAddress = getExprValue(FixedEntryDispExpr);
- uint64_t EntrySize = BC.getJumpTableEntrySize(JumpTable::JTT_PIC);
+ uint64_t EntrySize = BC.getJumpTableEntrySize(JumpTable::JTT_X86_64_PIC4);
ErrorOr<int64_t> Value =
BC.getSignedValueAtAddress(EntryAddress, EntrySize);
if (!Value)
@@ -979,12 +988,14 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size,
MemoryContentsType MemType;
if (JumpTable *JT = BC.getJumpTableContainingAddress(ArrayStart)) {
switch (JT->Type) {
- case JumpTable::JTT_NORMAL:
+ case JumpTable::JTT_X86_64_ABS:
MemType = MemoryContentsType::POSSIBLE_JUMP_TABLE;
break;
- case JumpTable::JTT_PIC:
+ case JumpTable::JTT_X86_64_PIC4:
MemType = MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
break;
+ default:
+ llvm_unreachable("Unhandled jump table type");
}
} else {
MemType = BC.analyzeMemoryAt(ArrayStart, *this);
@@ -995,7 +1006,7 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size,
if (BranchType == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE) {
if (MemType != MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE)
return IndirectBranchType::UNKNOWN;
- JTType = JumpTable::JTT_PIC;
+ JTType = JumpTable::JTT_X86_64_PIC4;
} else {
if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE)
return IndirectBranchType::UNKNOWN;
@@ -1004,7 +1015,7 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size,
return IndirectBranchType::POSSIBLE_TAIL_CALL;
BranchType = IndirectBranchType::POSSIBLE_JUMP_TABLE;
- JTType = JumpTable::JTT_NORMAL;
+ JTType = JumpTable::JTT_X86_64_ABS;
}
// Convert the instruction into jump table branch.
@@ -1908,7 +1919,8 @@ void BinaryFunction::postProcessJumpTables() {
// Create labels for all entries.
for (auto &JTI : JumpTables) {
JumpTable &JT = *JTI.second;
- if (JT.Type == JumpTable::JTT_PIC && opts::JumpTables == JTS_BASIC) {
+ if ((JT.Type == JumpTable::JTT_X86_64_PIC4 || BC.isAArch64()) &&
+ opts::JumpTables == JTS_BASIC) {
opts::JumpTables = JTS_MOVE;
BC.outs() << "BOLT-INFO: forcing -jump-tables=move as PIC jump table was "
"detected in function "
@@ -1953,6 +1965,12 @@ void BinaryFunction::postProcessJumpTables() {
}
JT.Entries.push_back(Label);
}
+ // Register jump table base address as a local symbol
+ if (uint64_t BaseAddress = std::get<0>(JT.BaseAddress)) {
+ BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(BaseAddress);
+ assert(BF && "must have a valid jump table base address");
+ JT.BaseAddress = BF->getOrCreateLocalLabel(BaseAddress);
+ }
}
// Add TakenBranches from JumpTables.
@@ -2103,7 +2121,7 @@ bool BinaryFunction::postProcessIndirectBranches(
BC.MIB->unsetJumpTable(Instr);
JumpTable *JT = BC.getJumpTableContainingAddress(LastJT);
- if (JT->Type == JumpTable::JTT_NORMAL) {
+ if (JT->Type == JumpTable::JTT_X86_64_ABS) {
// Invalidating the jump table may also invalidate other jump table
// boundaries. Until we have/need a support for this, mark the
// function as non-simple.
diff --git a/bolt/lib/Core/JumpTable.cpp b/bolt/lib/Core/JumpTable.cpp
index 6f588d2..e780c73 100644
--- a/bolt/lib/Core/JumpTable.cpp
+++ b/bolt/lib/Core/JumpTable.cpp
@@ -84,10 +84,10 @@ void bolt::JumpTable::updateOriginal() {
const uint64_t BaseOffset = getAddress() - getSection().getAddress();
uint64_t EntryOffset = BaseOffset;
for (MCSymbol *Entry : Entries) {
- const uint32_t RelType =
- Type == JTT_NORMAL ? ELF::R_X86_64_64 : ELF::R_X86_64_PC32;
+ const uint64_t RelType =
+ Type == JTT_X86_64_ABS ? ELF::R_X86_64_64 : ELF::R_X86_64_PC32;
const uint64_t RelAddend =
- Type == JTT_NORMAL ? 0 : EntryOffset - BaseOffset;
+ Type == JTT_X86_64_ABS ? 0 : EntryOffset - BaseOffset;
// Replace existing relocation with the new one to allow any modifications
// to the original jump table.
if (BC.HasRelocations)
@@ -99,7 +99,7 @@ void bolt::JumpTable::updateOriginal() {
void bolt::JumpTable::print(raw_ostream &OS) const {
uint64_t Offset = 0;
- if (Type == JTT_PIC)
+ if (Type == JTT_X86_64_PIC4)
OS << "PIC ";
ListSeparator LS;
diff --git a/bolt/lib/Passes/Aligner.cpp b/bolt/lib/Passes/Aligner.cpp
index c3ddeda..1b499ac 100644
--- a/bolt/lib/Passes/Aligner.cpp
+++ b/bolt/lib/Passes/Aligner.cpp
@@ -77,6 +77,11 @@ static void alignCompact(BinaryFunction &Function,
size_t HotSize = 0;
size_t ColdSize = 0;
+ if (!Function.hasProfile() && BC.isAArch64()) {
+ Function.setAlignment(Function.getMinAlignment());
+ return;
+ }
+
for (const BinaryBasicBlock &BB : Function)
if (BB.isSplit())
ColdSize += BC.computeCodeSize(BB.begin(), BB.end(), Emitter);
diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp
index d8628c6..6b5e08b 100644
--- a/bolt/lib/Passes/BinaryPasses.cpp
+++ b/bolt/lib/Passes/BinaryPasses.cpp
@@ -1287,6 +1287,8 @@ Error AssignSections::runOnFunctions(BinaryContext &BC) {
if (opts::isHotTextMover(Function)) {
Function.setCodeSectionName(BC.getHotTextMoverSectionName());
Function.setColdCodeSectionName(BC.getHotTextMoverSectionName());
+ // TODO: find a better place to mark a function as a mover.
+ Function.setHotTextMover(true);
continue;
}
diff --git a/bolt/lib/Passes/IndirectCallPromotion.cpp b/bolt/lib/Passes/IndirectCallPromotion.cpp
index 2b5a591..d70fd0e 100644
--- a/bolt/lib/Passes/IndirectCallPromotion.cpp
+++ b/bolt/lib/Passes/IndirectCallPromotion.cpp
@@ -246,7 +246,7 @@ IndirectCallPromotion::getCallTargets(BinaryBasicBlock &BB,
if (const JumpTable *JT = BF.getJumpTable(Inst)) {
// Don't support PIC jump tables for now
- if (!opts::ICPJumpTablesByTarget && JT->Type == JumpTable::JTT_PIC)
+ if (!opts::ICPJumpTablesByTarget && JT->Type == JumpTable::JTT_X86_64_PIC4)
return Targets;
const Location From(BF.getSymbol());
const std::pair<size_t, size_t> Range =
@@ -256,7 +256,7 @@ IndirectCallPromotion::getCallTargets(BinaryBasicBlock &BB,
const JumpTable::JumpInfo *JI =
JT->Counts.empty() ? &DefaultJI : &JT->Counts[Range.first];
const size_t JIAdj = JT->Counts.empty() ? 0 : 1;
- assert(JT->Type == JumpTable::JTT_PIC ||
+ assert(JT->Type == JumpTable::JTT_X86_64_PIC4 ||
JT->EntrySize == BC.AsmInfo->getCodePointerSize());
for (size_t I = Range.first; I < Range.second; ++I, JI += JIAdj) {
MCSymbol *Entry = JT->Entries[I];
diff --git a/bolt/lib/Passes/JTFootprintReduction.cpp b/bolt/lib/Passes/JTFootprintReduction.cpp
index 71bdbba..13b37dc3 100644
--- a/bolt/lib/Passes/JTFootprintReduction.cpp
+++ b/bolt/lib/Passes/JTFootprintReduction.cpp
@@ -202,7 +202,7 @@ bool JTFootprintReduction::tryOptimizePIC(BinaryContext &BC,
JumpTable->OutputEntrySize = 4;
// DePICify
- JumpTable->Type = JumpTable::JTT_NORMAL;
+ JumpTable->Type = JumpTable::JTT_X86_64_ABS;
BB.replaceInstruction(Inst, NewFrag.begin(), NewFrag.end());
return true;
diff --git a/bolt/lib/Passes/LongJmp.cpp b/bolt/lib/Passes/LongJmp.cpp
index e6bd417..75227da 100644
--- a/bolt/lib/Passes/LongJmp.cpp
+++ b/bolt/lib/Passes/LongJmp.cpp
@@ -31,6 +31,11 @@ static cl::opt<bool>
cl::desc("generate code for binaries <128MB on AArch64"),
cl::init(false), cl::cat(BoltCategory));
+static cl::opt<bool>
+ ExperimentalRelaxation("relax-exp",
+ cl::desc("run experimental relaxation pass"),
+ cl::init(false), cl::cat(BoltOptCategory));
+
static cl::opt<bool> GroupStubs("group-stubs",
cl::desc("share stubs across functions"),
cl::init(true), cl::cat(BoltOptCategory));
@@ -897,12 +902,185 @@ void LongJmpPass::relaxLocalBranches(BinaryFunction &BF) {
}
}
+void LongJmpPass::relaxCalls(BinaryContext &BC) {
+ // Map every function to its direct callees. Note that this is different from
+ // a typical call graph as we completely ignore indirect calls.
+ uint64_t EstimatedSize = 0;
+ // Conservatively estimate emitted function size.
+ auto estimateFunctionSize = [&](const BinaryFunction &BF) -> uint64_t {
+ if (!BC.shouldEmit(BF))
+ return 0;
+ uint64_t Size = BF.estimateSize();
+ if (BF.hasValidIndex())
+ Size += BF.getAlignment();
+ if (BF.hasIslandsInfo()) {
+ Size += BF.estimateConstantIslandSize();
+ Size += BF.getConstantIslandAlignment();
+ }
+
+ return Size;
+ };
+
+ std::unordered_map<BinaryFunction *, std::set<BinaryFunction *>> CallMap;
+ for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) {
+ if (!BC.shouldEmit(BF))
+ continue;
+
+ EstimatedSize += estimateFunctionSize(BF);
+
+ for (const BinaryBasicBlock &BB : BF) {
+ for (const MCInst &Inst : BB) {
+ if (!BC.MIB->isCall(Inst) || BC.MIB->isIndirectCall(Inst) ||
+ BC.MIB->isIndirectBranch(Inst))
+ continue;
+ const MCSymbol *TargetSymbol = BC.MIB->getTargetSymbol(Inst);
+ assert(TargetSymbol);
+
+ BinaryFunction *Callee = BC.getFunctionForSymbol(TargetSymbol);
+ if (!Callee) {
+ /* Ignore internall calls */
+ continue;
+ }
+
+ CallMap[&BF].insert(Callee);
+ }
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "LongJmp: estimated code size : " << EstimatedSize
+ << '\n');
+
+ // Build clusters in the order the functions will appear in the output.
+ std::vector<FunctionCluster> Clusters;
+ Clusters.emplace_back(FunctionCluster());
+
+ for (BinaryFunction *BF : BC.getSortedFunctions()) {
+ if (!BC.shouldEmit(*BF))
+ continue;
+
+ const uint64_t BFSize = estimateFunctionSize(*BF);
+ if (Clusters.empty() || Clusters.back().Size + BFSize > MaxClusterSize) {
+ Clusters.emplace_back(FunctionCluster());
+ }
+
+ FunctionCluster &FC = Clusters.back();
+ FC.Functions.insert(BF);
+ auto It = FC.Callees.find(BF);
+ if (It != FC.Callees.end()) {
+ FC.Callees.erase(It);
+ }
+ FC.Size += BFSize;
+ FC.LastBF = BF;
+
+ for (BinaryFunction *Callee : CallMap[BF])
+ if (!FC.Functions.count(Callee))
+ FC.Callees.insert(Callee);
+ }
+
+ // Print cluster stats.
+ dbgs() << "Built " << Clusters.size() << " clusters\n";
+ uint64_t Index = 0;
+ for (const FunctionCluster &FC : Clusters) {
+ dbgs() << " Cluster: " << Index++ << '\n';
+ dbgs() << " " << FC.Functions.size() << " functions\n";
+ dbgs() << " " << FC.Callees.size() << " callees\n";
+ dbgs() << " " << FC.Size << " bytes\n";
+ }
+
+ if (Clusters.size() > 2) {
+ BC.errs() << "Large code model is unsupported\n";
+ exit(1);
+ }
+
+ if (Clusters.size() == 1)
+ return;
+
+ // Populate one of the clusters with PLT functions based on the proximity of
+ // the PLT section to avoid unneeded thunk redirection.
+ // FIXME: this part is extremely fragile as it depends on the placement
+ // of PLT section and its proximity to old or new .text.
+ // FIXME: a slightly better approach will be to always use thunks for PLT and
+ // eliminate redirection later using final addresses in address maps.
+ const size_t PLTClusterNum = opts::UseOldText ? 1 : 0;
+ for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) {
+ if (BF.isPLTFunction()) {
+ auto &PLTCluster = Clusters[PLTClusterNum];
+ PLTCluster.Functions.insert(&BF);
+ auto It = PLTCluster.Callees.find(&BF);
+ if (It != PLTCluster.Callees.end())
+ PLTCluster.Callees.erase(It);
+ }
+ }
+
+ // FIXME: section name to use for thunks.
+ std::string SectionName =
+ Clusters[0].LastBF->getCodeSectionName().str().str();
+
+ // Build thunk functions.
+ auto createSmallThunk = [&](BinaryFunction &Callee) {
+ BinaryFunction *ThunkBF =
+ BC.createThunkBinaryFunction("__BThunk__" + Callee.getOneName().str());
+ MCInst Inst;
+ BC.MIB->createTailCall(Inst, Callee.getSymbol(), BC.Ctx.get());
+ ThunkBF->addBasicBlock()->addInstruction(Inst);
+ ThunkBF->setCodeSectionName(SectionName);
+
+ return ThunkBF;
+ };
+
+ DenseMap<BinaryFunction *, BinaryFunction *> Thunks;
+ for (FunctionCluster &FC : Clusters) {
+ SmallVector<BinaryFunction *, 16> Callees(FC.Callees.begin(),
+ FC.Callees.end());
+ llvm::sort(Callees, compareBinaryFunctionByIndex);
+ for (BinaryFunction *Callee : Callees)
+ Thunks[Callee] = createSmallThunk(*Callee);
+ }
+
+ BC.outs() << "BOLT-INFO: " << Thunks.size() << " thunks created\n";
+
+ // Replace callees with thunks.
+ for (FunctionCluster &FC : Clusters) {
+ for (BinaryFunction *BF : FC.Functions) {
+ if (!CallMap.count(BF))
+ continue;
+
+ for (BinaryBasicBlock &BB : *BF) {
+ for (MCInst &Inst : BB) {
+ if (!BC.MIB->isCall(Inst) || BC.MIB->isIndirectCall(Inst) ||
+ BC.MIB->isIndirectBranch(Inst))
+ continue;
+ const MCSymbol *TargetSymbol = BC.MIB->getTargetSymbol(Inst);
+ assert(TargetSymbol);
+
+ BinaryFunction *Callee = BC.getFunctionForSymbol(TargetSymbol);
+ if (!Callee) {
+ /* Ignore internal calls */
+ continue;
+ }
+
+ // Check if the callee is in the same cluster.
+ if (!FC.Callees.count(Callee))
+ continue;
+
+ // Use thunk as the call destination.
+ BC.MIB->replaceBranchTarget(Inst, Thunks[Callee]->getSymbol(),
+ BC.Ctx.get());
+ }
+ }
+ }
+ }
+
+ BC.setThunkLocation(Clusters[0].LastBF);
+}
+
Error LongJmpPass::runOnFunctions(BinaryContext &BC) {
- if (opts::CompactCodeModel) {
+ if (opts::CompactCodeModel || opts::ExperimentalRelaxation) {
BC.outs()
<< "BOLT-INFO: relaxing branches for compact code model (<128MB)\n";
+ // TODO: set correct code model based on the total size of split-code.
ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
relaxLocalBranches(BF);
};
@@ -916,6 +1094,12 @@ Error LongJmpPass::runOnFunctions(BinaryContext &BC) {
BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun,
SkipPredicate, "RelaxLocalBranches");
+ if (!opts::ExperimentalRelaxation)
+ return Error::success();
+
+ BC.outs() << "BOLT-INFO: starting experimental relaxation pass\n";
+ relaxCalls(BC);
+
return Error::success();
}
diff --git a/bolt/lib/Passes/PatchEntries.cpp b/bolt/lib/Passes/PatchEntries.cpp
index 8a2f0a3..a37ee33 100644
--- a/bolt/lib/Passes/PatchEntries.cpp
+++ b/bolt/lib/Passes/PatchEntries.cpp
@@ -36,16 +36,20 @@ Error PatchEntries::runOnFunctions(BinaryContext &BC) {
if (!opts::ForcePatch) {
// Mark the binary for patching if we did not create external references
// for original code in any of functions we are not going to emit.
- bool NeedsPatching = llvm::any_of(
- llvm::make_second_range(BC.getBinaryFunctions()),
- [&](BinaryFunction &BF) {
- return !BC.shouldEmit(BF) && !BF.hasExternalRefRelocations();
- });
+ bool NeedsPatching =
+ llvm::any_of(llvm::make_second_range(BC.getBinaryFunctions()),
+ [&](BinaryFunction &BF) {
+ return !BF.isPseudo() && !BC.shouldEmit(BF) &&
+ !BF.hasExternalRefRelocations();
+ });
if (!NeedsPatching)
return Error::success();
}
+ assert(!opts::UseOldText &&
+ "Cannot patch entries while overwriting original .text");
+
if (opts::Verbosity >= 1)
BC.outs() << "BOLT-INFO: patching entries in original code\n";
diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp
index dd48653..8304693 100644
--- a/bolt/lib/Rewrite/BinaryPassManager.cpp
+++ b/bolt/lib/Rewrite/BinaryPassManager.cpp
@@ -497,6 +497,9 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
// memory profiling data.
Manager.registerPass(std::make_unique<ReorderData>());
+ // Assign each function an output section.
+ Manager.registerPass(std::make_unique<AssignSections>());
+
if (BC.isAArch64()) {
Manager.registerPass(
std::make_unique<ADRRelaxationPass>(PrintAdrRelaxation));
@@ -521,9 +524,6 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
Manager.registerPass(
std::make_unique<RetpolineInsertion>(PrintRetpolineInsertion));
- // Assign each function an output section.
- Manager.registerPass(std::make_unique<AssignSections>());
-
// Patch original function entries
if (BC.HasRelocations)
Manager.registerPass(std::make_unique<PatchEntries>());
diff --git a/bolt/lib/Rewrite/CMakeLists.txt b/bolt/lib/Rewrite/CMakeLists.txt
index c83cf36..d27bd22 100644
--- a/bolt/lib/Rewrite/CMakeLists.txt
+++ b/bolt/lib/Rewrite/CMakeLists.txt
@@ -17,6 +17,7 @@ add_llvm_library(LLVMBOLTRewrite
DWARFRewriter.cpp
ExecutableFileMemoryManager.cpp
JITLinkLinker.cpp
+ JumpTableInfoReader.cpp
LinuxKernelRewriter.cpp
MachORewriteInstance.cpp
MetadataManager.cpp
diff --git a/bolt/lib/Rewrite/JumpTableInfoReader.cpp b/bolt/lib/Rewrite/JumpTableInfoReader.cpp
new file mode 100644
index 0000000..98230a2
--- /dev/null
+++ b/bolt/lib/Rewrite/JumpTableInfoReader.cpp
@@ -0,0 +1,91 @@
+//===- bolt/Rewrite/JumpTableInfoReader.cpp -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Read .llvm_jump_table_info section and register jump tables.
+//
+//===----------------------------------------------------------------------===//
+
+#include "bolt/Core/JumpTable.h"
+#include "bolt/Rewrite/MetadataRewriter.h"
+#include "bolt/Rewrite/MetadataRewriters.h"
+#include "llvm/Support/DataExtractor.h"
+
+using namespace llvm;
+using namespace bolt;
+
+namespace {
+class JumpTableInfoReader final : public MetadataRewriter {
+
+public:
+ JumpTableInfoReader(StringRef Name, BinaryContext &BC)
+ : MetadataRewriter(Name, BC) {}
+ Error preDisasmInitializer() override;
+};
+
+Error JumpTableInfoReader::preDisasmInitializer() {
+ if (!BC.isAArch64())
+ return Error::success();
+
+ ErrorOr<BinarySection &> ErrorOrJTInfoSection =
+ BC.getUniqueSectionByName(".llvm_jump_table_info");
+ if (std::error_code E = ErrorOrJTInfoSection.getError())
+ return Error::success();
+ BinarySection &JTInfoSection = *ErrorOrJTInfoSection;
+ StringRef Buf = JTInfoSection.getContents();
+ DataExtractor DE = DataExtractor(Buf, BC.AsmInfo->isLittleEndian(),
+ BC.AsmInfo->getCodePointerSize());
+ DataExtractor::Cursor Cursor(0);
+ while (Cursor && !DE.eof(Cursor)) {
+ const uint8_t Format = DE.getU8(Cursor);
+ const uint64_t JTAddr = DE.getAddress(Cursor);
+ const uint64_t JTBase = DE.getAddress(Cursor);
+ const uint64_t JTLoad = DE.getAddress(Cursor);
+ const uint64_t Branch = DE.getAddress(Cursor);
+ const uint64_t NumEntries = DE.getULEB128(Cursor);
+
+ JumpTable::JumpTableType Type = JumpTable::JTT_AARCH64_LAST;
+ switch (Format) {
+ case 2:
+ Type = JumpTable::JTT_AARCH64_REL1;
+ break;
+ case 3:
+ Type = JumpTable::JTT_AARCH64_REL2;
+ break;
+ case 4:
+ Type = JumpTable::JTT_AARCH64_REL4;
+ break;
+ default:
+ errs() << "BOLT-WARNING: unknown jump table info type " << Format
+ << " for jump table " << Twine::utohexstr(JTAddr) << '\n';
+ continue;
+ }
+
+ BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(Branch);
+ if (!BF) {
+ BC.errs() << "BOLT-WARNING: binary function not found for jump table "
+ "with address "
+ << Twine::utohexstr(JTAddr) << " and branch "
+ << Twine::utohexstr(Branch) << '\n';
+ continue;
+ }
+ const MCSymbol *JTSym = BC.getOrCreateJumpTable(*BF, JTAddr, Type);
+ assert(JTSym && "failed to create a jump table");
+ JumpTable *JT = BC.getJumpTableContainingAddress(JTAddr);
+ assert(JT && "internal error creating jump table");
+ JT->BaseAddress = JTBase;
+ JT->MemLocInstrAddress = JTLoad;
+ JT->Entries.resize(NumEntries);
+ }
+ return Cursor.takeError();
+}
+} // namespace
+
+std::unique_ptr<MetadataRewriter>
+llvm::bolt::createJumpTableInfoReader(BinaryContext &BC) {
+ return std::make_unique<JumpTableInfoReader>("jump-table-info-reader", BC);
+}
diff --git a/bolt/lib/Rewrite/MetadataManager.cpp b/bolt/lib/Rewrite/MetadataManager.cpp
index 713d2e4..8114e156 100644
--- a/bolt/lib/Rewrite/MetadataManager.cpp
+++ b/bolt/lib/Rewrite/MetadataManager.cpp
@@ -32,6 +32,18 @@ void MetadataManager::runSectionInitializers() {
}
}
+void MetadataManager::runInitializersPreDisasm() {
+ for (auto &Rewriter : Rewriters) {
+ LLVM_DEBUG(dbgs() << "BOLT-DEBUG: invoking " << Rewriter->getName()
+ << " after reading sections\n");
+ if (Error E = Rewriter->preDisasmInitializer()) {
+ errs() << "BOLT-ERROR: while running " << Rewriter->getName()
+ << " in pre-disasm state: " << toString(std::move(E)) << '\n';
+ exit(1);
+ }
+ }
+}
+
void MetadataManager::runInitializersPreCFG() {
for (auto &Rewriter : Rewriters) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: invoking " << Rewriter->getName()
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index f204aa3..33526b0 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -695,7 +695,7 @@ Error RewriteInstance::run() {
selectFunctionsToProcess();
- readDebugInfo();
+ processMetadataPreDisasm();
disassembleFunctions();
@@ -2479,6 +2479,13 @@ void RewriteInstance::readDynamicRelocations(const SectionRef &Section,
exit(1);
}
+ // Workaround for AArch64 issue with hot text.
+ if (BC->isAArch64() && (SymbolName == "__hot_start" ||
+ SymbolName == "__hot_end")) {
+ BC->addRelocation(Rel.getOffset(), Symbol, ELF::R_AARCH64_ABS64, Addend);
+ continue;
+ }
+
BC->addDynamicRelocation(Rel.getOffset(), Symbol, RType, Addend);
}
}
@@ -3249,6 +3256,8 @@ void RewriteInstance::initializeMetadataManager() {
MetadataManager.registerRewriter(createPseudoProbeRewriter(*BC));
MetadataManager.registerRewriter(createSDTRewriter(*BC));
+
+ MetadataManager.registerRewriter(createJumpTableInfoReader(*BC));
}
void RewriteInstance::processSectionMetadata() {
@@ -3259,6 +3268,14 @@ void RewriteInstance::processSectionMetadata() {
MetadataManager.runSectionInitializers();
}
+void RewriteInstance::processMetadataPreDisasm() {
+ NamedRegionTimer T("processmetadata-predisasm", "process metadata pre-disasm",
+ TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
+ MetadataManager.runInitializersPreDisasm();
+
+ readDebugInfo();
+}
+
void RewriteInstance::processMetadataPreCFG() {
NamedRegionTimer T("processmetadata-precfg", "process metadata pre-CFG",
TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
@@ -3854,15 +3871,41 @@ void RewriteInstance::mapCodeSections(BOLTLinker::SectionMapper MapSection) {
return Address;
};
+ // Try to allocate sections before the \p Address and return an address for
+ // the allocation of the first section or 0 if \p is not big enough.
+ auto allocateBefore = [&](uint64_t Address) -> uint64_t {
+ for (auto SI = CodeSections.rbegin(), SE = CodeSections.rend(); SI != SE;
+ ++SI) {
+ BinarySection *Section = *SI;
+ if (Section->getOutputSize() > Address)
+ return 0;
+ Address -= Section->getOutputSize();
+ Address = alignDown(Address, Section->getAlignment());
+ Section->setOutputAddress(Address);
+ }
+ return Address;
+ };
+
// Check if we can fit code in the original .text
bool AllocationDone = false;
if (opts::UseOldText) {
- const uint64_t CodeSize =
- allocateAt(BC->OldTextSectionAddress) - BC->OldTextSectionAddress;
+ uint64_t StartAddress;
+ uint64_t EndAddress;
+ if (opts::HotFunctionsAtEnd) {
+ EndAddress = BC->OldTextSectionAddress + BC->OldTextSectionSize;
+ StartAddress = allocateBefore(EndAddress);
+ } else {
+ StartAddress = BC->OldTextSectionAddress;
+ EndAddress = allocateAt(BC->OldTextSectionAddress);
+ }
+ const uint64_t CodeSize = EndAddress - StartAddress;
if (CodeSize <= BC->OldTextSectionSize) {
BC->outs() << "BOLT-INFO: using original .text for new code with 0x"
- << Twine::utohexstr(opts::AlignText) << " alignment\n";
+ << Twine::utohexstr(opts::AlignText) << " alignment";
+ if (StartAddress != BC->OldTextSectionAddress)
+ BC->outs() << " at 0x" << Twine::utohexstr(StartAddress);
+ BC->outs() << '\n';
AllocationDone = true;
} else {
BC->errs()
@@ -4133,6 +4176,11 @@ void RewriteInstance::patchELFPHDRTable() {
NewWritableSegmentSize = NextAvailableAddress - NewWritableSegmentAddress;
}
+ if (!NewTextSegmentSize && !NewWritableSegmentSize) {
+ BC->outs() << "BOLT-INFO: not adding new segments\n";
+ return;
+ }
+
const uint64_t SavedPos = OS.tell();
OS.seek(PHDRTableOffset);
@@ -4487,6 +4535,11 @@ bool RewriteInstance::shouldStrip(const ELFShdrTy &Section,
if (opts::RemoveSymtab && Section.sh_type == ELF::SHT_SYMTAB)
return true;
+ // Strip jump table metadata by default.
+ // TBD: add a flag to rewrite it.
+ if (SectionName == ".llvm_jump_table_info")
+ return true;
+
return false;
}
diff --git a/bolt/test/AArch64/Inputs/jump-table.c b/bolt/test/AArch64/Inputs/jump-table.c
new file mode 100644
index 0000000..198c483
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/jump-table.c
@@ -0,0 +1,20 @@
+volatile int g;
+void switchy(int x) {
+ switch (x) {
+ case 0: g--; break;
+ case 1: g++; break;
+ case 2: g = 42; break;
+ case 3: g += 17; break;
+ case 4: g -= 66; break;
+ case 5: g++; g--; break;
+ case 6: g--; g++; break;
+ case 66: g-=3; g++; break;
+ case 8: g+=5; g--; break;
+ case 10: g+=5; g--; break;
+ case 12: g+=42; g--; break;
+ case 15: g+=99; g--; break;
+ case 20: switchy(g); break;
+ case 21: g -= 1234; break;
+ default: g = 0; break;
+ }
+}
diff --git a/bolt/test/AArch64/jump-table-info.s b/bolt/test/AArch64/jump-table-info.s
new file mode 100644
index 0000000..e2b67c6
--- /dev/null
+++ b/bolt/test/AArch64/jump-table-info.s
@@ -0,0 +1,186 @@
+## Check parsing of a .llvm_jump_table_info section
+## The assembly is produced from bolt/test/AArch64/Inputs/jump-table.c
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
+# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -static
+# RUN: llvm-bolt %t.exe -o %t.null -print-jump-tables | FileCheck %s
+
+# Confirm 67 entries are parsed:
+# CHECK: jump tables for function _Z7switchyi:
+# CHECK-NEXT: Jump table {{.*}} for function _Z7switchyi
+# CHECK: 0x0042 : .Ltmp16
+
+ .text
+ .globl _Z7switchyi // -- Begin function _Z7switchyi
+ .p2align 2
+ .type _Z7switchyi,@function
+_Z7switchyi: // @_Z7switchyi
+ .cfi_startproc
+// %bb.0: // %entry
+ adrp x8, g
+ cmp w0, #20
+ b.ne .LBB0_2
+.LBB0_1: // %sw.bb26
+ // =>This Inner Loop Header: Depth=1
+ ldr w0, [x8, :lo12:g]
+ cmp w0, #20
+ b.eq .LBB0_1
+.LBB0_2: // %tailrecurse
+ cmp w0, #66
+ b.hi .LBB0_18
+// %bb.3: // %tailrecurse
+ mov w9, w0
+ adrp x10, .LJTI0_0
+ add x10, x10, :lo12:.LJTI0_0
+ adr x11, .LBB0_4
+.Ltmp0:
+ ldrb w12, [x10, x9]
+ add x11, x11, x12, lsl #2
+.Ltmp1:
+ br x11
+.LBB0_4: // %sw.bb17
+ ldr w9, [x8, :lo12:g]
+ add w9, w9, #5
+ b .LBB0_13
+.LBB0_5: // %sw.bb11
+ ldr w9, [x8, :lo12:g]
+ sub w9, w9, #3
+ b .LBB0_10
+.LBB0_6: // %sw.bb5
+ ldr w9, [x8, :lo12:g]
+ add w9, w9, #1
+ b .LBB0_13
+.LBB0_7: // %sw.bb3
+ ldr w9, [x8, :lo12:g]
+ add w9, w9, #17
+ str w9, [x8, :lo12:g]
+ ret
+.LBB0_8: // %sw.bb23
+ ldr w9, [x8, :lo12:g]
+ add w9, w9, #99
+ b .LBB0_13
+.LBB0_9: // %sw.bb8
+ ldr w9, [x8, :lo12:g]
+ sub w9, w9, #1
+.LBB0_10: // %sw.epilog
+ str w9, [x8, :lo12:g]
+.LBB0_11: // %sw.bb1
+ ldr w9, [x8, :lo12:g]
+ add w9, w9, #1
+ str w9, [x8, :lo12:g]
+ ret
+.LBB0_12: // %sw.bb20
+ ldr w9, [x8, :lo12:g]
+ add w9, w9, #42
+.LBB0_13: // %sw.epilog
+ str w9, [x8, :lo12:g]
+.LBB0_14: // %sw.bb
+ ldr w9, [x8, :lo12:g]
+ sub w9, w9, #1
+ str w9, [x8, :lo12:g]
+ ret
+.LBB0_15: // %sw.epilog.loopexit
+ mov w9, #42 // =0x2a
+ str w9, [x8, :lo12:g]
+ ret
+.LBB0_16: // %sw.bb27
+ ldr w9, [x8, :lo12:g]
+ sub w9, w9, #1234
+ str w9, [x8, :lo12:g]
+ ret
+.LBB0_17: // %sw.bb4
+ ldr w9, [x8, :lo12:g]
+ sub w9, w9, #66
+ str w9, [x8, :lo12:g]
+ ret
+.LBB0_18: // %sw.epilog.loopexit29
+ str wzr, [x8, :lo12:g]
+ ret
+.Lfunc_end0:
+ .size _Z7switchyi, .Lfunc_end0-_Z7switchyi
+ .cfi_endproc
+ .section .rodata,"a",@progbits
+.LJTI0_0:
+ .byte (.LBB0_14-.LBB0_4)>>2
+ .byte (.LBB0_11-.LBB0_4)>>2
+ .byte (.LBB0_15-.LBB0_4)>>2
+ .byte (.LBB0_7-.LBB0_4)>>2
+ .byte (.LBB0_17-.LBB0_4)>>2
+ .byte (.LBB0_6-.LBB0_4)>>2
+ .byte (.LBB0_9-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_4-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_4-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_12-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_8-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_16-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_18-.LBB0_4)>>2
+ .byte (.LBB0_5-.LBB0_4)>>2
+ .section .llvm_jump_table_info,"",@0x6fff4c0e
+ .byte 2 // format 2: 1b relative; shr 2
+ .xword .LJTI0_0
+ .xword .LBB0_4 // Base
+ .xword .Ltmp0 // Load Instruction
+ .xword .Ltmp1 // Branch Instruction
+ .byte 67 // Number of Entries
+ // -- End function
+ .type g,@object // @g
+ .bss
+ .globl g
+ .p2align 2, 0x0
+g:
+ .word 0 // 0x0
+ .size g, 4
+ .section ".note.GNU-stack","",@progbits
diff --git a/bolt/test/program-header.test b/bolt/test/program-header.test
new file mode 100644
index 0000000..4552303
--- /dev/null
+++ b/bolt/test/program-header.test
@@ -0,0 +1,14 @@
+# Check that llvm-bolt does not add new segments when writing code in-place.
+
+REQUIRES: system-linux
+
+RUN: %clang %cflags %p/Inputs/hello.c -o %t -no-pie -Wl,-q
+RUN: llvm-bolt %t -o %t.bolt --use-old-text --align-functions=1 \
+RUN: --no-huge-pages --align-text=1 --use-gnu-stack \
+RUN: | FileCheck %s --check-prefix=CHECK-BOLT
+RUN: llvm-readelf -WS %t.bolt | FileCheck %s
+
+CHECK-BOLT: rewriting .eh_frame_hdr in-place
+CHECK-BOLT: not adding new segments
+
+CHECK-NOT: .bolt.org.eh_frame_hdr
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 4c01088..d3e038d 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -5730,6 +5730,10 @@ def pg : Flag<["-"], "pg">, HelpText<"Enable mcount instrumentation">,
MarshallingInfoFlag<CodeGenOpts<"InstrumentForProfiling">>;
def pipe : Flag<["-", "--"], "pipe">,
HelpText<"Use pipes between commands, when possible">;
+// Facebook T92898286
+def post_link_optimize : Flag<["--"], "post-link-optimize">,
+ HelpText<"Apply post-link optimizations using BOLT">;
+// End Facebook T92898286
def prebind__all__twolevel__modules : Flag<["-"], "prebind_all_twolevel_modules">;
def prebind : Flag<["-"], "prebind">;
def preload : Flag<["-"], "preload">;
diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp
index a0fa3c6..9ffff3e 100644
--- a/clang/lib/Driver/ToolChains/Gnu.cpp
+++ b/clang/lib/Driver/ToolChains/Gnu.cpp
@@ -671,12 +671,41 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
}
}
+ // Facebook T92898286
+ if (Args.hasArg(options::OPT_post_link_optimize))
+ CmdArgs.push_back("-q");
+ // End Facebook T92898286
+
Args.addAllArgs(CmdArgs, {options::OPT_T, options::OPT_t});
const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath());
C.addCommand(std::make_unique<Command>(JA, *this,
ResponseFileSupport::AtFileCurCP(),
Exec, CmdArgs, Inputs, Output));
+ // Facebook T92898286
+ if (!Args.hasArg(options::OPT_post_link_optimize) || !Output.isFilename())
+ return;
+
+ const char *MvExec = Args.MakeArgString(ToolChain.GetProgramPath("mv"));
+ ArgStringList MoveCmdArgs;
+ MoveCmdArgs.push_back(Output.getFilename());
+ const char *PreBoltBin =
+ Args.MakeArgString(Twine(Output.getFilename()) + ".pre-bolt");
+ MoveCmdArgs.push_back(PreBoltBin);
+ C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
+ MvExec, MoveCmdArgs, std::nullopt));
+
+ ArgStringList BoltCmdArgs;
+ const char *BoltExec =
+ Args.MakeArgString(ToolChain.GetProgramPath("llvm-bolt"));
+ BoltCmdArgs.push_back(PreBoltBin);
+ BoltCmdArgs.push_back("-reorder-blocks=reverse");
+ BoltCmdArgs.push_back("-update-debug-sections");
+ BoltCmdArgs.push_back("-o");
+ BoltCmdArgs.push_back(Output.getFilename());
+ C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
+ BoltExec, BoltCmdArgs, std::nullopt));
+ // End Facebook T92898286
}
void tools::gnutools::Assembler::ConstructJob(Compilation &C,
diff --git a/cross-project-tests/lit.cfg.py b/cross-project-tests/lit.cfg.py
index ccd3d01..d38df0e 100644
--- a/cross-project-tests/lit.cfg.py
+++ b/cross-project-tests/lit.cfg.py
@@ -92,7 +92,13 @@ if is_msvc:
# use_clang() and use_lld() respectively, so set them to "", if needed.
if not hasattr(config, "clang_src_dir"):
config.clang_src_dir = ""
-llvm_config.use_clang(required=("clang" in config.llvm_enabled_projects))
+# Facebook T92898286
+should_test_bolt = get_required_attr(config, "llvm_test_bolt")
+if should_test_bolt:
+ llvm_config.use_clang(required=("clang" in config.llvm_enabled_projects), additional_flags=["--post-link-optimize"])
+else:
+ llvm_config.use_clang(required=("clang" in config.llvm_enabled_projects))
+# End Facebook T92898286
if not hasattr(config, "lld_src_dir"):
config.lld_src_dir = ""
@@ -305,3 +311,9 @@ llvm_config.feature_config([("--build-mode", {"Debug|RelWithDebInfo": "debug-inf
# Allow 'REQUIRES: XXX-registered-target' in tests.
for arch in config.targets_to_build:
config.available_features.add(arch.lower() + "-registered-target")
+
+# Facebook T92898286
+# Ensure the user's PYTHONPATH is included.
+if "PYTHONPATH" in os.environ:
+ config.environment["PYTHONPATH"] = os.environ["PYTHONPATH"]
+# End Facebook T92898286
diff --git a/cross-project-tests/lit.site.cfg.py.in b/cross-project-tests/lit.site.cfg.py.in
index 39458df..2d53cd3 100644
--- a/cross-project-tests/lit.site.cfg.py.in
+++ b/cross-project-tests/lit.site.cfg.py.in
@@ -21,6 +21,10 @@ config.mlir_src_root = "@MLIR_SOURCE_DIR@"
config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
+# Facebook T92898286
+config.llvm_test_bolt = lit.util.pythonize_bool("@LLVM_TEST_BOLT@")
+# End Facebook T92898286
+
import lit.llvm
lit.llvm.initialize(lit_config, config)
diff --git a/lldb/test/API/lit.cfg.py b/lldb/test/API/lit.cfg.py
index 08cf11c..077fb4c 100644
--- a/lldb/test/API/lit.cfg.py
+++ b/lldb/test/API/lit.cfg.py
@@ -271,6 +271,17 @@ if is_configured("lldb_libs_dir"):
if is_configured("lldb_framework_dir"):
dotest_cmd += ["--framework", config.lldb_framework_dir]
+# Facebook T92898286
+if is_configured("llvm_test_bolt"):
+ dotest_cmd += ["-E", '"--post-link-optimize"']
+# End Facebook T92898286
+
+if (
+ "lldb-repro-capture" in config.available_features
+ or "lldb-repro-replay" in config.available_features
+):
+ dotest_cmd += ["--skip-category=lldb-dap", "--skip-category=std-module"]
+
if "lldb-simulator-ios" in config.available_features:
dotest_cmd += ["--apple-sdk", "iphonesimulator", "--platform-name", "ios-simulator"]
elif "lldb-simulator-watchos" in config.available_features:
diff --git a/lldb/test/API/lit.site.cfg.py.in b/lldb/test/API/lit.site.cfg.py.in
index ecebc44..1c2ab4d 100644
--- a/lldb/test/API/lit.site.cfg.py.in
+++ b/lldb/test/API/lit.site.cfg.py.in
@@ -1,5 +1,9 @@
@LIT_SITE_CFG_IN_HEADER@
+#Facebook T92898286
+import lit.util
+#End Facebook T92898286
+
config.llvm_src_root = "@LLVM_SOURCE_DIR@"
config.llvm_obj_root = "@LLVM_BINARY_DIR@"
config.llvm_tools_dir = lit_config.substitute("@LLVM_TOOLS_DIR@")
@@ -44,6 +48,10 @@ config.libcxx_include_target_dir = "@LIBCXX_GENERATED_INCLUDE_TARGET_DIR@"
config.lldb_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_LLDB@", "lldb-api")
config.clang_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_CLANG@", "lldb-api")
+# Facebook T92898286
+config.llvm_test_bolt = lit.util.pythonize_bool("@LLVM_TEST_BOLT@")
+# End Facebook T92898286
+
# Plugins
lldb_build_intel_pt = '@LLDB_BUILD_INTEL_PT@'
if lldb_build_intel_pt == '1':
diff --git a/lldb/test/Shell/helper/toolchain.py b/lldb/test/Shell/helper/toolchain.py
index 42968128..ac895e8d 100644
--- a/lldb/test/Shell/helper/toolchain.py
+++ b/lldb/test/Shell/helper/toolchain.py
@@ -241,6 +241,11 @@ def use_support_substitutions(config):
"-lc++",
]
+ # Facebook T92898286
+ if config.llvm_test_bolt:
+ host_flags += ["--post-link-optimize"]
+ # End Facebook T92898286
+
host_flags = " ".join(host_flags)
config.substitutions.append(("%clang_host", "%clang " + host_flags))
config.substitutions.append(("%clangxx_host", "%clangxx " + host_flags))
diff --git a/lldb/test/Shell/lit.site.cfg.py.in b/lldb/test/Shell/lit.site.cfg.py.in
index 31a6d68..8b37d98 100644
--- a/lldb/test/Shell/lit.site.cfg.py.in
+++ b/lldb/test/Shell/lit.site.cfg.py.in
@@ -1,5 +1,10 @@
@LIT_SITE_CFG_IN_HEADER@
+#Facebook T92898286
+import lit.util
+#End Facebook T92898286
+
+
config.llvm_src_root = "@LLVM_SOURCE_DIR@"
config.llvm_obj_root = "@LLVM_BINARY_DIR@"
config.llvm_tools_dir = lit_config.substitute("@LLVM_TOOLS_DIR@")
@@ -36,6 +41,10 @@ config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
config.lldb_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_LLDB@", "lldb-shell")
config.clang_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_CLANG@", "lldb-shell")
+# Facebook T92898286
+config.llvm_test_bolt = lit.util.pythonize_bool("@LLVM_TEST_BOLT@")
+# End Facebook T92898286
+
import lit.llvm
lit.llvm.initialize(lit_config, config)
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index cfd1a08..1478eea 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -729,6 +729,10 @@ set(LLVM_LIB_FUZZING_ENGINE "" CACHE PATH
option(LLVM_USE_SPLIT_DWARF
"Use -gsplit-dwarf when compiling llvm and --gdb-index when linking." OFF)
+# Facebook T92898286
+option(LLVM_TEST_BOLT "Enable BOLT testing in non-BOLT tests that use clang" OFF)
+# End Facebook T92898286
+
# Define an option controlling whether we should build for 32-bit on 64-bit
# platforms, where supported.
if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT (WIN32 OR ${CMAKE_SYSTEM_NAME} MATCHES "AIX"))