diff options
author | Amir Ayupov <aaupov@fb.com> | 2025-04-09 16:52:45 -0700 |
---|---|---|
committer | Amir Ayupov <aaupov@fb.com> | 2025-04-09 16:52:45 -0700 |
commit | 3d2d135d70b4cdc661ad1a16e44eb740f3c337b6 (patch) | |
tree | 01b4b78c34196f7c7093664135acce67de7a501f | |
parent | cde2ea377d457e272ce1572d588643e5ee533c30 (diff) | |
download | llvm-users/aaupov/spr/main.bolt-jump-table-trampoline-insertion-pass.zip llvm-users/aaupov/spr/main.bolt-jump-table-trampoline-insertion-pass.tar.gz llvm-users/aaupov/spr/main.bolt-jump-table-trampoline-insertion-pass.tar.bz2 |
[𝘀𝗽𝗿] changes to main this commit is based onusers/aaupov/spr/main.bolt-jump-table-trampoline-insertion-pass
Created using spr 1.3.4
[skip ci]
35 files changed, 986 insertions, 157 deletions
diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index 88313a6..77595bd 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -230,6 +230,12 @@ class BinaryContext { /// Functions injected by BOLT std::vector<BinaryFunction *> InjectedBinaryFunctions; + /// Thunk functions. + std::vector<BinaryFunction *> ThunkBinaryFunctions; + + /// Function that precedes thunks in the binary. + const BinaryFunction *ThunkLocation{nullptr}; + /// Jump tables for all functions mapped by address. std::map<uint64_t, JumpTable *> JumpTables; @@ -435,7 +441,18 @@ public: /// Return size of an entry for the given jump table \p Type. uint64_t getJumpTableEntrySize(JumpTable::JumpTableType Type) const { - return Type == JumpTable::JTT_PIC ? 4 : AsmInfo->getCodePointerSize(); + switch (Type) { + case JumpTable::JTT_X86_64_PIC4: + return 4; + case JumpTable::JTT_X86_64_ABS: + return AsmInfo->getCodePointerSize(); + case JumpTable::JTT_AARCH64_REL1: + return 1; + case JumpTable::JTT_AARCH64_REL2: + return 2; + case JumpTable::JTT_AARCH64_REL4: + return 4; + } } /// Return JumpTable containing a given \p Address. @@ -553,6 +570,16 @@ public: return InjectedBinaryFunctions; } + BinaryFunction *createThunkBinaryFunction(const std::string &Name); + + std::vector<BinaryFunction *> &getThunkBinaryFunctions() { + return ThunkBinaryFunctions; + } + + const BinaryFunction *getThunkLocation() const { return ThunkLocation; } + + void setThunkLocation(const BinaryFunction *BF) { ThunkLocation = BF; } + /// Return vector with all functions, i.e. include functions from the input /// binary and functions created by BOLT. std::vector<BinaryFunction *> getAllBinaryFunctions(); @@ -574,14 +601,13 @@ public: /// If \p NextJTAddress is different from zero, it is used as an upper /// bound for jump table memory layout. /// - /// Optionally, populate \p Address from jump table entries. The entries - /// could be partially populated if the jump table detection fails. + /// If \p JT is set, populate it with jump table entries. The entries could be + /// partially populated if the jump table detection fails. bool analyzeJumpTable(const uint64_t Address, const JumpTable::JumpTableType Type, const BinaryFunction &BF, const uint64_t NextJTAddress = 0, - JumpTable::AddressesType *EntriesAsAddress = nullptr, - bool *HasEntryInFragment = nullptr) const; + JumpTable *JT = nullptr) const; /// After jump table locations are established, this function will populate /// their EntriesAsAddress based on memory contents. @@ -1372,6 +1398,10 @@ public: uint64_t computeInstructionSize(const MCInst &Inst, const MCCodeEmitter *Emitter = nullptr) const { + // FIXME: hack for faster size computation on aarch64. + if (isAArch64()) + return MIB->isPseudo(Inst) ? 0 : 4; + if (std::optional<uint32_t> Size = MIB->getSize(Inst)) return *Size; diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index d3d11f8..c18a43f 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -363,6 +363,10 @@ private: /// True if the function should not have an associated symbol table entry. bool IsAnonymous{false}; + /// True if the function is used for remapping hot text and shall not be + /// placed on a huge page. + bool IsHotTextMover{false}; + /// Name for the section this function code should reside in. std::string CodeSectionName; @@ -1385,6 +1389,8 @@ public: /// Return true if the function uses ORC format for stack unwinding. bool hasORC() const { return HasORC; } + bool isHotTextMover() const { return IsHotTextMover; } + const JumpTable *getJumpTable(const MCInst &Inst) const { const uint64_t Address = BC.MIB->getJumpTable(Inst); return getJumpTableContainingAddress(Address); @@ -1735,6 +1741,8 @@ public: /// Mark function that should not be emitted. void setIgnored(); + void setHotTextMover(bool V) { IsHotTextMover = V; } + void setHasIndirectTargetToSplitFragment(bool V) { HasIndirectTargetToSplitFragment = V; } diff --git a/bolt/include/bolt/Core/JumpTable.h b/bolt/include/bolt/Core/JumpTable.h index 52b9cce..c76e2a9 100644 --- a/bolt/include/bolt/Core/JumpTable.h +++ b/bolt/include/bolt/Core/JumpTable.h @@ -16,6 +16,7 @@ #include "bolt/Core/BinaryData.h" #include <map> +#include <variant> #include <vector> namespace llvm { @@ -40,6 +41,7 @@ class BinaryFunction; /// a different label at a different offset in this jump table. class JumpTable : public BinaryData { friend class BinaryContext; + friend class JumpTableInfoReader; JumpTable() = delete; JumpTable(const JumpTable &) = delete; @@ -47,10 +49,34 @@ class JumpTable : public BinaryData { public: enum JumpTableType : char { - JTT_NORMAL, - JTT_PIC, + JTT_X86_64_FIRST = 0, + JTT_X86_64_ABS = JTT_X86_64_FIRST, + JTT_X86_64_PIC4, + JTT_X86_64_LAST = JTT_X86_64_PIC4, + JTT_AARCH64_FIRST, + JTT_AARCH64_REL1 = JTT_AARCH64_FIRST, + JTT_AARCH64_REL2, + JTT_AARCH64_REL4, + JTT_AARCH64_LAST = JTT_AARCH64_REL4 }; + static StringRef getTypeStr(JumpTableType Type) { + switch (Type) { + case JTT_X86_64_ABS: + return "X86_64_ABS"; + case JTT_X86_64_PIC4: + return "X86_64_PIC4"; + case JTT_AARCH64_REL1: + return "AARCH64_REL1"; + case JTT_AARCH64_REL2: + return "AARCH64_REL2"; + case JTT_AARCH64_REL4: + return "AARCH64_REL4"; + } + } + + const StringRef getTypeStr() { return getTypeStr(Type); } + /// Branch statistics for jump table entries. struct JumpInfo { uint64_t Mispreds{0}; @@ -92,6 +118,16 @@ public: /// BinaryFunction this jump tables belongs to. SmallVector<BinaryFunction *, 1> Parents; + /// + /// AArch64-specific fields + /// + + /// Entries are offsets relative to an arbitrary function location. + std::variant<uint64_t, MCSymbol *> BaseAddress; + + /// Address of the instruction referencing the jump table (MemLocInstr). + uint64_t MemLocInstrAddress{0}; + private: /// Constructor should only be called by a BinaryContext. JumpTable(MCSymbol &Symbol, uint64_t Address, size_t EntrySize, diff --git a/bolt/include/bolt/Passes/LongJmp.h b/bolt/include/bolt/Passes/LongJmp.h index df3ea96..5388d7e 100644 --- a/bolt/include/bolt/Passes/LongJmp.h +++ b/bolt/include/bolt/Passes/LongJmp.h @@ -76,6 +76,28 @@ class LongJmpPass : public BinaryFunctionPass { /// 128MB of each other. void relaxLocalBranches(BinaryFunction &BF); + struct FunctionCluster { + DenseSet<BinaryFunction *> Functions; + + // Functions that this cluster of functions is calling. Note that it + // excludes all functions in the cluster itself. + DenseSet<BinaryFunction *> Callees; + + uint64_t Size{0}; + + // Last function in the cluster. + BinaryFunction *LastBF{nullptr}; + }; + + /// Maximum size of the function cluster. Note that it's less than 128MB + /// as the size of the cluster plus thunk island should be less than 128MB. + static constexpr uint64_t MaxClusterSize = 125 * 1024 * 1024; + + /// Relax calls for medium code model where code is < 256MB. + /// A thunk island will be introduced between two clusters of functions to + /// enable calls over 128MB. + void relaxCalls(BinaryContext &BC); + /// -- Layout estimation methods -- /// Try to do layout before running the emitter, by looking at BinaryFunctions /// and MCInsts -- this is an estimation. To be correct for longjmp inserter diff --git a/bolt/include/bolt/Rewrite/MetadataManager.h b/bolt/include/bolt/Rewrite/MetadataManager.h index 6001b70..cc6e3f9 100644 --- a/bolt/include/bolt/Rewrite/MetadataManager.h +++ b/bolt/include/bolt/Rewrite/MetadataManager.h @@ -31,6 +31,10 @@ public: /// Run initializers after sections are discovered. void runSectionInitializers(); + /// Execute metadata initializers when functions are discovered but not yet + /// disassembled. + void runInitializersPreDisasm(); + /// Execute initialization of rewriters while functions are disassembled, but /// CFG is not yet built. void runInitializersPreCFG(); diff --git a/bolt/include/bolt/Rewrite/MetadataRewriter.h b/bolt/include/bolt/Rewrite/MetadataRewriter.h index 6ff8f0a..d39500c 100644 --- a/bolt/include/bolt/Rewrite/MetadataRewriter.h +++ b/bolt/include/bolt/Rewrite/MetadataRewriter.h @@ -49,6 +49,10 @@ public: /// but before functions are discovered. virtual Error sectionInitializer() { return Error::success(); } + /// Run initialization after the functions are identified but not yet + /// disassembled. + virtual Error preDisasmInitializer() { return Error::success(); } + /// Interface for modifying/annotating functions in the binary based on the /// contents of the section. Functions are in pre-cfg state. virtual Error preCFGInitializer() { return Error::success(); } diff --git a/bolt/include/bolt/Rewrite/MetadataRewriters.h b/bolt/include/bolt/Rewrite/MetadataRewriters.h index b71bd6c..ae34194 100644 --- a/bolt/include/bolt/Rewrite/MetadataRewriters.h +++ b/bolt/include/bolt/Rewrite/MetadataRewriters.h @@ -27,6 +27,8 @@ std::unique_ptr<MetadataRewriter> createPseudoProbeRewriter(BinaryContext &); std::unique_ptr<MetadataRewriter> createSDTRewriter(BinaryContext &); +std::unique_ptr<MetadataRewriter> createJumpTableInfoReader(BinaryContext &); + } // namespace bolt } // namespace llvm diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h index 94dd06e..8fd6a77 100644 --- a/bolt/include/bolt/Rewrite/RewriteInstance.h +++ b/bolt/include/bolt/Rewrite/RewriteInstance.h @@ -181,6 +181,9 @@ private: /// Process metadata in sections before functions are discovered. void processSectionMetadata(); + /// Process metadata in special sections before functions are disassembled. + void processMetadataPreDisasm(); + /// Process metadata in special sections before CFG is built for functions. void processMetadataPreCFG(); diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index 80b15d7..25a39f1 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -497,7 +497,7 @@ BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF, const MemoryContentsType MemType = analyzeMemoryAt(Address, BF); if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) { const MCSymbol *Symbol = - getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC); + getOrCreateJumpTable(BF, Address, JumpTable::JTT_X86_64_PIC4); return std::make_pair(Symbol, 0); } @@ -541,10 +541,10 @@ MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address, // Start with checking for PIC jump table. We expect non-PIC jump tables // to have high 32 bits set to 0. - if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF)) + if (analyzeJumpTable(Address, JumpTable::JTT_X86_64_PIC4, BF)) return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE; - if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF)) + if (analyzeJumpTable(Address, JumpTable::JTT_X86_64_ABS, BF)) return MemoryContentsType::POSSIBLE_JUMP_TABLE; return MemoryContentsType::UNKNOWN; @@ -554,8 +554,7 @@ bool BinaryContext::analyzeJumpTable(const uint64_t Address, const JumpTable::JumpTableType Type, const BinaryFunction &BF, const uint64_t NextJTAddress, - JumpTable::AddressesType *EntriesAsAddress, - bool *HasEntryInFragment) const { + JumpTable *JT) const { // Target address of __builtin_unreachable. const uint64_t UnreachableAddress = BF.getAddress() + BF.getSize(); @@ -572,11 +571,11 @@ bool BinaryContext::analyzeJumpTable(const uint64_t Address, size_t TrimmedSize = 0; auto addEntryAddress = [&](uint64_t EntryAddress, bool Unreachable = false) { - if (!EntriesAsAddress) + if (!JT) return; - EntriesAsAddress->emplace_back(EntryAddress); + JT->EntriesAsAddress.emplace_back(EntryAddress); if (!Unreachable) - TrimmedSize = EntriesAsAddress->size(); + TrimmedSize = JT->EntriesAsAddress.size(); }; ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); @@ -595,12 +594,9 @@ bool BinaryContext::analyzeJumpTable(const uint64_t Address, if (NextJTAddress) UpperBound = std::min(NextJTAddress, UpperBound); - LLVM_DEBUG({ - using JTT = JumpTable::JumpTableType; - dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n", - Address, BF.getPrintName(), - Type == JTT::JTT_PIC ? "PIC" : "Normal"); - }); + LLVM_DEBUG( + dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n", + Address, BF, JumpTable::getTypeStr(Type))); const uint64_t EntrySize = getJumpTableEntrySize(Type); for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize; EntryAddress += EntrySize) { @@ -608,13 +604,13 @@ bool BinaryContext::analyzeJumpTable(const uint64_t Address, << " -> "); // Check if there's a proper relocation against the jump table entry. if (HasRelocations) { - if (Type == JumpTable::JTT_PIC && + if (Type == JumpTable::JTT_X86_64_PIC4 && !DataPCRelocations.count(EntryAddress)) { LLVM_DEBUG( dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n"); break; } - if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) { + if (Type == JumpTable::JTT_X86_64_ABS && !getRelocationAt(EntryAddress)) { LLVM_DEBUG( dbgs() << "FAIL: JTT_NORMAL table, no relocation for this address\n"); @@ -622,10 +618,24 @@ bool BinaryContext::analyzeJumpTable(const uint64_t Address, } } - const uint64_t Value = - (Type == JumpTable::JTT_PIC) - ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize) - : *getPointerAtAddress(EntryAddress); + uint64_t Value = 0; + switch (Type) { + case JumpTable::JTT_X86_64_PIC4: + Value = Address + *getSignedValueAtAddress(EntryAddress, EntrySize); + break; + case JumpTable::JTT_X86_64_ABS: + Value = *getPointerAtAddress(EntryAddress); + break; + case JumpTable::JTT_AARCH64_REL1: + case JumpTable::JTT_AARCH64_REL2: + case JumpTable::JTT_AARCH64_REL4: + unsigned ShiftAmt = Type == JumpTable::JTT_AARCH64_REL4 ? 0 : 2; + assert(JT && + "jump table must be non-null for AArch64 in analyzeJumpTable"); + Value = std::get<uint64_t>(JT->BaseAddress) + + (*getUnsignedValueAtAddress(EntryAddress, EntrySize) << ShiftAmt); + break; + } // __builtin_unreachable() case. if (Value == UnreachableAddress) { @@ -646,24 +656,19 @@ bool BinaryContext::analyzeJumpTable(const uint64_t Address, // Function or one of its fragments. const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value); - const bool DoesBelongToFunction = - BF.containsAddress(Value) || - (TargetBF && areRelatedFragments(TargetBF, &BF)); - if (!DoesBelongToFunction) { + if (!TargetBF || !areRelatedFragments(TargetBF, &BF)) { LLVM_DEBUG({ - if (!BF.containsAddress(Value)) { - dbgs() << "FAIL: function doesn't contain this address\n"; - if (TargetBF) { - dbgs() << " ! function containing this address: " - << TargetBF->getPrintName() << '\n'; - if (TargetBF->isFragment()) { - dbgs() << " ! is a fragment"; - for (BinaryFunction *Parent : TargetBF->ParentFragments) - dbgs() << ", parent: " << Parent->getPrintName(); - dbgs() << '\n'; - } - } - } + dbgs() << "FAIL: function doesn't contain this address\n"; + if (!TargetBF) + break; + dbgs() << " ! function containing this address: " << *TargetBF << '\n'; + if (!TargetBF->isFragment()) + break; + dbgs() << " ! is a fragment with parents: "; + ListSeparator LS; + for (BinaryFunction *Parent : TargetBF->ParentFragments) + dbgs() << LS << *Parent; + dbgs() << '\n'; }); break; } @@ -678,17 +683,17 @@ bool BinaryContext::analyzeJumpTable(const uint64_t Address, ++NumRealEntries; LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value)); - if (TargetBF != &BF && HasEntryInFragment) - *HasEntryInFragment = true; + if (TargetBF != &BF && JT) + JT->IsSplit = true; addEntryAddress(Value); } // Trim direct/normal jump table to exclude trailing unreachable entries that // can collide with a function address. - if (Type == JumpTable::JTT_NORMAL && EntriesAsAddress && - TrimmedSize != EntriesAsAddress->size() && + if (Type == JumpTable::JTT_X86_64_ABS && JT && + TrimmedSize != JT->EntriesAsAddress.size() && getBinaryFunctionAtAddress(UnreachableAddress)) - EntriesAsAddress->resize(TrimmedSize); + JT->EntriesAsAddress.resize(TrimmedSize); // It's a jump table if the number of real entries is more than 1, or there's // one real entry and one or more special targets. If there are only multiple @@ -703,20 +708,20 @@ void BinaryContext::populateJumpTables() { ++JTI) { JumpTable *JT = JTI->second; - bool NonSimpleParent = false; - for (BinaryFunction *BF : JT->Parents) - NonSimpleParent |= !BF->isSimple(); - if (NonSimpleParent) + auto isSimple = std::bind(&BinaryFunction::isSimple, std::placeholders::_1); + if (!llvm::all_of(JT->Parents, isSimple)) continue; uint64_t NextJTAddress = 0; auto NextJTI = std::next(JTI); - if (NextJTI != JTE) + if (isAArch64()) { + NextJTAddress = JT->getAddress() + JT->getSize(); + JT->Entries.clear(); + } else if (NextJTI != JTE) NextJTAddress = NextJTI->second->getAddress(); - const bool Success = - analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]), - NextJTAddress, &JT->EntriesAsAddress, &JT->IsSplit); + const bool Success = analyzeJumpTable( + JT->getAddress(), JT->Type, *JT->Parents.front(), NextJTAddress, JT); if (!Success) { LLVM_DEBUG({ dbgs() << "failed to analyze "; @@ -744,7 +749,7 @@ void BinaryContext::populateJumpTables() { // In strict mode, erase PC-relative relocation record. Later we check that // all such records are erased and thus have been accounted for. - if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) { + if (opts::StrictMode && JT->Type == JumpTable::JTT_X86_64_PIC4) { for (uint64_t Address = JT->getAddress(); Address < JT->getAddress() + JT->getSize(); Address += JT->EntrySize) { @@ -840,33 +845,26 @@ BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address, assert(JT->Type == Type && "jump table types have to match"); assert(Address == JT->getAddress() && "unexpected non-empty jump table"); - // Prevent associating a jump table to a specific fragment twice. - if (!llvm::is_contained(JT->Parents, &Function)) { - assert(llvm::all_of(JT->Parents, - [&](const BinaryFunction *BF) { - return areRelatedFragments(&Function, BF); - }) && - "cannot re-use jump table of a different function"); - // Duplicate the entry for the parent function for easy access - JT->Parents.push_back(&Function); - if (opts::Verbosity > 2) { - this->outs() << "BOLT-INFO: Multiple fragments access same jump table: " - << JT->Parents[0]->getPrintName() << "; " - << Function.getPrintName() << "\n"; - JT->print(this->outs()); - } - Function.JumpTables.emplace(Address, JT); - for (BinaryFunction *Parent : JT->Parents) - Parent->setHasIndirectTargetToSplitFragment(true); - } + if (llvm::is_contained(JT->Parents, &Function)) + return JT->getFirstLabel(); - bool IsJumpTableParent = false; - (void)IsJumpTableParent; - for (BinaryFunction *Frag : JT->Parents) - if (Frag == &Function) - IsJumpTableParent = true; - assert(IsJumpTableParent && + // Prevent associating a jump table to a specific fragment twice. + auto isSibling = std::bind(&BinaryContext::areRelatedFragments, this, + &Function, std::placeholders::_1); + assert(llvm::all_of(JT->Parents, isSibling) && "cannot re-use jump table of a different function"); + if (opts::Verbosity > 2) { + this->outs() << "BOLT-INFO: Multiple fragments access same jump table: " + << JT->Parents[0]->getPrintName() << "; " + << Function.getPrintName() << "\n"; + JT->print(this->outs()); + } + if (JT->Parents.size() == 1) + JT->Parents.front()->setHasIndirectTargetToSplitFragment(true); + Function.setHasIndirectTargetToSplitFragment(true); + // Duplicate the entry for the parent function for easy access + JT->Parents.push_back(&Function); + Function.JumpTables.emplace(Address, JT); return JT->getFirstLabel(); } @@ -1611,7 +1609,21 @@ std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() { SortedFunctions.begin(), [](BinaryFunction &BF) { return &BF; }); - llvm::stable_sort(SortedFunctions, compareBinaryFunctionByIndex); + llvm::stable_sort(SortedFunctions, + [](const BinaryFunction *A, const BinaryFunction *B) { + // Place hot text movers at the start. + if (A->isHotTextMover() && !B->isHotTextMover()) + return true; + if (!A->isHotTextMover() && B->isHotTextMover()) + return false; + if (A->hasValidIndex() && B->hasValidIndex()) { + return A->getIndex() < B->getIndex(); + } + if (opts::HotFunctionsAtEnd) + return B->hasValidIndex(); + else + return A->hasValidIndex(); + }); return SortedFunctions; } @@ -2434,6 +2446,15 @@ BinaryContext::createInstructionPatch(uint64_t Address, return PBF; } +BinaryFunction * +BinaryContext::createThunkBinaryFunction(const std::string &Name) { + ThunkBinaryFunctions.push_back(new BinaryFunction(Name, *this, true)); + BinaryFunction *BF = ThunkBinaryFunctions.back(); + setSymbolToFunctionMap(BF->getSymbol(), BF); + BF->CurrentState = BinaryFunction::State::CFG; + return BF; +} + std::pair<size_t, size_t> BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) { // Adjust branch instruction to match the current layout. diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp index 1aad252..db0f11bc 100644 --- a/bolt/lib/Core/BinaryEmitter.cpp +++ b/bolt/lib/Core/BinaryEmitter.cpp @@ -19,6 +19,7 @@ #include "bolt/Utils/CommandLineOpts.h" #include "bolt/Utils/Utils.h" #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/CommandLine.h" @@ -271,6 +272,14 @@ void BinaryEmitter::emitFunctions() { if (Emitted) Function->setEmitted(/*KeepCFG=*/opts::PrintCacheMetrics); + + // Emit thunks. + if (BC.getThunkLocation() != Function) + continue; + + for (BinaryFunction *Thunk : BC.getThunkBinaryFunctions()) { + emitFunction(*Thunk, Thunk->getLayout().getMainFragment()); + } } }; @@ -809,57 +818,71 @@ void BinaryEmitter::emitJumpTable(const JumpTable &JT, MCSection *HotSection, Streamer.switchSection(JT.Count > 0 ? HotSection : ColdSection); Streamer.emitValueToAlignment(Align(JT.EntrySize)); } - MCSymbol *LastLabel = nullptr; + MCSymbol *JTLabel = nullptr; + MCContext &Context = Streamer.getContext(); uint64_t Offset = 0; for (MCSymbol *Entry : JT.Entries) { auto LI = JT.Labels.find(Offset); - if (LI != JT.Labels.end()) { - LLVM_DEBUG({ - dbgs() << "BOLT-DEBUG: emitting jump table " << LI->second->getName() - << " (originally was at address 0x" - << Twine::utohexstr(JT.getAddress() + Offset) - << (Offset ? ") as part of larger jump table\n" : ")\n"); - }); - if (!LabelCounts.empty()) { - LLVM_DEBUG(dbgs() << "BOLT-DEBUG: jump table count: " - << LabelCounts[LI->second] << '\n'); - if (LabelCounts[LI->second] > 0) - Streamer.switchSection(HotSection); - else - Streamer.switchSection(ColdSection); - Streamer.emitValueToAlignment(Align(JT.EntrySize)); - } - // Emit all labels registered at the address of this jump table - // to sync with our global symbol table. We may have two labels - // registered at this address if one label was created via - // getOrCreateGlobalSymbol() (e.g. LEA instructions referencing - // this location) and another via getOrCreateJumpTable(). This - // creates a race where the symbols created by these two - // functions may or may not be the same, but they are both - // registered in our symbol table at the same address. By - // emitting them all here we make sure there is no ambiguity - // that depends on the order that these symbols were created, so - // whenever this address is referenced in the binary, it is - // certain to point to the jump table identified at this - // address. - if (BinaryData *BD = BC.getBinaryDataByName(LI->second->getName())) { - for (MCSymbol *S : BD->getSymbols()) - Streamer.emitLabel(S); - } else { - Streamer.emitLabel(LI->second); - } - LastLabel = LI->second; + if (LI == JT.Labels.end()) + goto emitEntry; + JTLabel = LI->second; + LLVM_DEBUG({ + dbgs() << "BOLT-DEBUG: emitting jump table " << JTLabel->getName() + << " (originally was at address 0x" + << Twine::utohexstr(JT.getAddress() + Offset) + << (Offset ? ") as part of larger jump table\n" : ")\n"); + }); + if (!LabelCounts.empty()) { + uint64_t JTCount = LabelCounts[JTLabel]; + LLVM_DEBUG(dbgs() << "BOLT-DEBUG: jump table count: " << JTCount << '\n'); + Streamer.switchSection(JTCount ? HotSection : ColdSection); + Streamer.emitValueToAlignment(Align(JT.EntrySize)); } - if (JT.Type == JumpTable::JTT_NORMAL) { + // Emit all labels registered at the address of this jump table + // to sync with our global symbol table. We may have two labels + // registered at this address if one label was created via + // getOrCreateGlobalSymbol() (e.g. LEA instructions referencing + // this location) and another via getOrCreateJumpTable(). This + // creates a race where the symbols created by these two + // functions may or may not be the same, but they are both + // registered in our symbol table at the same address. By + // emitting them all here we make sure there is no ambiguity + // that depends on the order that these symbols were created, so + // whenever this address is referenced in the binary, it is + // certain to point to the jump table identified at this + // address. + if (BinaryData *BD = BC.getBinaryDataByName(JTLabel->getName())) { + for (MCSymbol *S : BD->getSymbols()) + Streamer.emitLabel(S); + } else { + Streamer.emitLabel(JTLabel); + } + emitEntry: + switch (JT.Type) { + case JumpTable::JTT_X86_64_ABS: Streamer.emitSymbolValue(Entry, JT.OutputEntrySize); - } else { // JTT_PIC - const MCSymbolRefExpr *JTExpr = - MCSymbolRefExpr::create(LastLabel, Streamer.getContext()); - const MCSymbolRefExpr *E = - MCSymbolRefExpr::create(Entry, Streamer.getContext()); - const MCBinaryExpr *Value = - MCBinaryExpr::createSub(E, JTExpr, Streamer.getContext()); + break; + case JumpTable::JTT_X86_64_PIC4: { + const MCSymbolRefExpr *JTExpr = MCSymbolRefExpr::create(JTLabel, Context); + const MCSymbolRefExpr *E = MCSymbolRefExpr::create(Entry, Context); + const MCBinaryExpr *Value = MCBinaryExpr::createSub(E, JTExpr, Context); + Streamer.emitValue(Value, JT.EntrySize); + break; + } + case JumpTable::JTT_AARCH64_REL1: + case JumpTable::JTT_AARCH64_REL2: + case JumpTable::JTT_AARCH64_REL4: { + MCSymbol *BaseSym = std::get<MCSymbol *>(JT.BaseAddress); + const MCExpr *Base = MCSymbolRefExpr::create(BaseSym, Context); + const MCExpr *E = MCSymbolRefExpr::create(Entry, Context); + const MCBinaryExpr *Value = MCBinaryExpr::createSub(E, Base, Context); + if (JT.EntrySize != 4) + Value = MCBinaryExpr::createLShr( + Value, MCConstantExpr::create(2, Context), Context); + Streamer.emitValue(Value, JT.EntrySize); + break; + } } Offset += JT.EntrySize; } diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index d1b293a..678b944 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -112,6 +112,10 @@ cl::opt<bool> cl::desc("try to preserve basic block alignment"), cl::cat(BoltOptCategory)); +static cl::opt<bool> PrintOffsets("print-offsets", + cl::desc("print basic block offsets"), + cl::Hidden, cl::cat(BoltOptCategory)); + static cl::opt<bool> PrintOutputAddressRange( "print-output-address-range", cl::desc( @@ -556,6 +560,11 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) { if (BB->isLandingPad()) OS << " Landing Pad\n"; + if (opts::PrintOffsets && BB->getOutputStartAddress()) { + OS << " OutputOffset: 0x" + << Twine::utohexstr(BB->getOutputStartAddress()) << '\n'; + } + uint64_t BBExecCount = BB->getExecutionCount(); if (hasValidProfile()) { OS << " Exec Count : "; @@ -909,7 +918,7 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size, "Invalid memory instruction"); const MCExpr *FixedEntryDispExpr = FixedEntryDispOperand->getExpr(); const uint64_t EntryAddress = getExprValue(FixedEntryDispExpr); - uint64_t EntrySize = BC.getJumpTableEntrySize(JumpTable::JTT_PIC); + uint64_t EntrySize = BC.getJumpTableEntrySize(JumpTable::JTT_X86_64_PIC4); ErrorOr<int64_t> Value = BC.getSignedValueAtAddress(EntryAddress, EntrySize); if (!Value) @@ -979,12 +988,14 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size, MemoryContentsType MemType; if (JumpTable *JT = BC.getJumpTableContainingAddress(ArrayStart)) { switch (JT->Type) { - case JumpTable::JTT_NORMAL: + case JumpTable::JTT_X86_64_ABS: MemType = MemoryContentsType::POSSIBLE_JUMP_TABLE; break; - case JumpTable::JTT_PIC: + case JumpTable::JTT_X86_64_PIC4: MemType = MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE; break; + default: + llvm_unreachable("Unhandled jump table type"); } } else { MemType = BC.analyzeMemoryAt(ArrayStart, *this); @@ -995,7 +1006,7 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size, if (BranchType == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE) { if (MemType != MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE) return IndirectBranchType::UNKNOWN; - JTType = JumpTable::JTT_PIC; + JTType = JumpTable::JTT_X86_64_PIC4; } else { if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE) return IndirectBranchType::UNKNOWN; @@ -1004,7 +1015,7 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size, return IndirectBranchType::POSSIBLE_TAIL_CALL; BranchType = IndirectBranchType::POSSIBLE_JUMP_TABLE; - JTType = JumpTable::JTT_NORMAL; + JTType = JumpTable::JTT_X86_64_ABS; } // Convert the instruction into jump table branch. @@ -1908,7 +1919,8 @@ void BinaryFunction::postProcessJumpTables() { // Create labels for all entries. for (auto &JTI : JumpTables) { JumpTable &JT = *JTI.second; - if (JT.Type == JumpTable::JTT_PIC && opts::JumpTables == JTS_BASIC) { + if ((JT.Type == JumpTable::JTT_X86_64_PIC4 || BC.isAArch64()) && + opts::JumpTables == JTS_BASIC) { opts::JumpTables = JTS_MOVE; BC.outs() << "BOLT-INFO: forcing -jump-tables=move as PIC jump table was " "detected in function " @@ -1953,6 +1965,12 @@ void BinaryFunction::postProcessJumpTables() { } JT.Entries.push_back(Label); } + // Register jump table base address as a local symbol + if (uint64_t BaseAddress = std::get<0>(JT.BaseAddress)) { + BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(BaseAddress); + assert(BF && "must have a valid jump table base address"); + JT.BaseAddress = BF->getOrCreateLocalLabel(BaseAddress); + } } // Add TakenBranches from JumpTables. @@ -2103,7 +2121,7 @@ bool BinaryFunction::postProcessIndirectBranches( BC.MIB->unsetJumpTable(Instr); JumpTable *JT = BC.getJumpTableContainingAddress(LastJT); - if (JT->Type == JumpTable::JTT_NORMAL) { + if (JT->Type == JumpTable::JTT_X86_64_ABS) { // Invalidating the jump table may also invalidate other jump table // boundaries. Until we have/need a support for this, mark the // function as non-simple. diff --git a/bolt/lib/Core/JumpTable.cpp b/bolt/lib/Core/JumpTable.cpp index 6f588d2..e780c73 100644 --- a/bolt/lib/Core/JumpTable.cpp +++ b/bolt/lib/Core/JumpTable.cpp @@ -84,10 +84,10 @@ void bolt::JumpTable::updateOriginal() { const uint64_t BaseOffset = getAddress() - getSection().getAddress(); uint64_t EntryOffset = BaseOffset; for (MCSymbol *Entry : Entries) { - const uint32_t RelType = - Type == JTT_NORMAL ? ELF::R_X86_64_64 : ELF::R_X86_64_PC32; + const uint64_t RelType = + Type == JTT_X86_64_ABS ? ELF::R_X86_64_64 : ELF::R_X86_64_PC32; const uint64_t RelAddend = - Type == JTT_NORMAL ? 0 : EntryOffset - BaseOffset; + Type == JTT_X86_64_ABS ? 0 : EntryOffset - BaseOffset; // Replace existing relocation with the new one to allow any modifications // to the original jump table. if (BC.HasRelocations) @@ -99,7 +99,7 @@ void bolt::JumpTable::updateOriginal() { void bolt::JumpTable::print(raw_ostream &OS) const { uint64_t Offset = 0; - if (Type == JTT_PIC) + if (Type == JTT_X86_64_PIC4) OS << "PIC "; ListSeparator LS; diff --git a/bolt/lib/Passes/Aligner.cpp b/bolt/lib/Passes/Aligner.cpp index c3ddeda..1b499ac 100644 --- a/bolt/lib/Passes/Aligner.cpp +++ b/bolt/lib/Passes/Aligner.cpp @@ -77,6 +77,11 @@ static void alignCompact(BinaryFunction &Function, size_t HotSize = 0; size_t ColdSize = 0; + if (!Function.hasProfile() && BC.isAArch64()) { + Function.setAlignment(Function.getMinAlignment()); + return; + } + for (const BinaryBasicBlock &BB : Function) if (BB.isSplit()) ColdSize += BC.computeCodeSize(BB.begin(), BB.end(), Emitter); diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp index d8628c6..6b5e08b 100644 --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -1287,6 +1287,8 @@ Error AssignSections::runOnFunctions(BinaryContext &BC) { if (opts::isHotTextMover(Function)) { Function.setCodeSectionName(BC.getHotTextMoverSectionName()); Function.setColdCodeSectionName(BC.getHotTextMoverSectionName()); + // TODO: find a better place to mark a function as a mover. + Function.setHotTextMover(true); continue; } diff --git a/bolt/lib/Passes/IndirectCallPromotion.cpp b/bolt/lib/Passes/IndirectCallPromotion.cpp index 2b5a591..d70fd0e 100644 --- a/bolt/lib/Passes/IndirectCallPromotion.cpp +++ b/bolt/lib/Passes/IndirectCallPromotion.cpp @@ -246,7 +246,7 @@ IndirectCallPromotion::getCallTargets(BinaryBasicBlock &BB, if (const JumpTable *JT = BF.getJumpTable(Inst)) { // Don't support PIC jump tables for now - if (!opts::ICPJumpTablesByTarget && JT->Type == JumpTable::JTT_PIC) + if (!opts::ICPJumpTablesByTarget && JT->Type == JumpTable::JTT_X86_64_PIC4) return Targets; const Location From(BF.getSymbol()); const std::pair<size_t, size_t> Range = @@ -256,7 +256,7 @@ IndirectCallPromotion::getCallTargets(BinaryBasicBlock &BB, const JumpTable::JumpInfo *JI = JT->Counts.empty() ? &DefaultJI : &JT->Counts[Range.first]; const size_t JIAdj = JT->Counts.empty() ? 0 : 1; - assert(JT->Type == JumpTable::JTT_PIC || + assert(JT->Type == JumpTable::JTT_X86_64_PIC4 || JT->EntrySize == BC.AsmInfo->getCodePointerSize()); for (size_t I = Range.first; I < Range.second; ++I, JI += JIAdj) { MCSymbol *Entry = JT->Entries[I]; diff --git a/bolt/lib/Passes/JTFootprintReduction.cpp b/bolt/lib/Passes/JTFootprintReduction.cpp index 71bdbba..13b37dc3 100644 --- a/bolt/lib/Passes/JTFootprintReduction.cpp +++ b/bolt/lib/Passes/JTFootprintReduction.cpp @@ -202,7 +202,7 @@ bool JTFootprintReduction::tryOptimizePIC(BinaryContext &BC, JumpTable->OutputEntrySize = 4; // DePICify - JumpTable->Type = JumpTable::JTT_NORMAL; + JumpTable->Type = JumpTable::JTT_X86_64_ABS; BB.replaceInstruction(Inst, NewFrag.begin(), NewFrag.end()); return true; diff --git a/bolt/lib/Passes/LongJmp.cpp b/bolt/lib/Passes/LongJmp.cpp index e6bd417..75227da 100644 --- a/bolt/lib/Passes/LongJmp.cpp +++ b/bolt/lib/Passes/LongJmp.cpp @@ -31,6 +31,11 @@ static cl::opt<bool> cl::desc("generate code for binaries <128MB on AArch64"), cl::init(false), cl::cat(BoltCategory)); +static cl::opt<bool> + ExperimentalRelaxation("relax-exp", + cl::desc("run experimental relaxation pass"), + cl::init(false), cl::cat(BoltOptCategory)); + static cl::opt<bool> GroupStubs("group-stubs", cl::desc("share stubs across functions"), cl::init(true), cl::cat(BoltOptCategory)); @@ -897,12 +902,185 @@ void LongJmpPass::relaxLocalBranches(BinaryFunction &BF) { } } +void LongJmpPass::relaxCalls(BinaryContext &BC) { + // Map every function to its direct callees. Note that this is different from + // a typical call graph as we completely ignore indirect calls. + uint64_t EstimatedSize = 0; + // Conservatively estimate emitted function size. + auto estimateFunctionSize = [&](const BinaryFunction &BF) -> uint64_t { + if (!BC.shouldEmit(BF)) + return 0; + uint64_t Size = BF.estimateSize(); + if (BF.hasValidIndex()) + Size += BF.getAlignment(); + if (BF.hasIslandsInfo()) { + Size += BF.estimateConstantIslandSize(); + Size += BF.getConstantIslandAlignment(); + } + + return Size; + }; + + std::unordered_map<BinaryFunction *, std::set<BinaryFunction *>> CallMap; + for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) { + if (!BC.shouldEmit(BF)) + continue; + + EstimatedSize += estimateFunctionSize(BF); + + for (const BinaryBasicBlock &BB : BF) { + for (const MCInst &Inst : BB) { + if (!BC.MIB->isCall(Inst) || BC.MIB->isIndirectCall(Inst) || + BC.MIB->isIndirectBranch(Inst)) + continue; + const MCSymbol *TargetSymbol = BC.MIB->getTargetSymbol(Inst); + assert(TargetSymbol); + + BinaryFunction *Callee = BC.getFunctionForSymbol(TargetSymbol); + if (!Callee) { + /* Ignore internall calls */ + continue; + } + + CallMap[&BF].insert(Callee); + } + } + } + + LLVM_DEBUG(dbgs() << "LongJmp: estimated code size : " << EstimatedSize + << '\n'); + + // Build clusters in the order the functions will appear in the output. + std::vector<FunctionCluster> Clusters; + Clusters.emplace_back(FunctionCluster()); + + for (BinaryFunction *BF : BC.getSortedFunctions()) { + if (!BC.shouldEmit(*BF)) + continue; + + const uint64_t BFSize = estimateFunctionSize(*BF); + if (Clusters.empty() || Clusters.back().Size + BFSize > MaxClusterSize) { + Clusters.emplace_back(FunctionCluster()); + } + + FunctionCluster &FC = Clusters.back(); + FC.Functions.insert(BF); + auto It = FC.Callees.find(BF); + if (It != FC.Callees.end()) { + FC.Callees.erase(It); + } + FC.Size += BFSize; + FC.LastBF = BF; + + for (BinaryFunction *Callee : CallMap[BF]) + if (!FC.Functions.count(Callee)) + FC.Callees.insert(Callee); + } + + // Print cluster stats. + dbgs() << "Built " << Clusters.size() << " clusters\n"; + uint64_t Index = 0; + for (const FunctionCluster &FC : Clusters) { + dbgs() << " Cluster: " << Index++ << '\n'; + dbgs() << " " << FC.Functions.size() << " functions\n"; + dbgs() << " " << FC.Callees.size() << " callees\n"; + dbgs() << " " << FC.Size << " bytes\n"; + } + + if (Clusters.size() > 2) { + BC.errs() << "Large code model is unsupported\n"; + exit(1); + } + + if (Clusters.size() == 1) + return; + + // Populate one of the clusters with PLT functions based on the proximity of + // the PLT section to avoid unneeded thunk redirection. + // FIXME: this part is extremely fragile as it depends on the placement + // of PLT section and its proximity to old or new .text. + // FIXME: a slightly better approach will be to always use thunks for PLT and + // eliminate redirection later using final addresses in address maps. + const size_t PLTClusterNum = opts::UseOldText ? 1 : 0; + for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) { + if (BF.isPLTFunction()) { + auto &PLTCluster = Clusters[PLTClusterNum]; + PLTCluster.Functions.insert(&BF); + auto It = PLTCluster.Callees.find(&BF); + if (It != PLTCluster.Callees.end()) + PLTCluster.Callees.erase(It); + } + } + + // FIXME: section name to use for thunks. + std::string SectionName = + Clusters[0].LastBF->getCodeSectionName().str().str(); + + // Build thunk functions. + auto createSmallThunk = [&](BinaryFunction &Callee) { + BinaryFunction *ThunkBF = + BC.createThunkBinaryFunction("__BThunk__" + Callee.getOneName().str()); + MCInst Inst; + BC.MIB->createTailCall(Inst, Callee.getSymbol(), BC.Ctx.get()); + ThunkBF->addBasicBlock()->addInstruction(Inst); + ThunkBF->setCodeSectionName(SectionName); + + return ThunkBF; + }; + + DenseMap<BinaryFunction *, BinaryFunction *> Thunks; + for (FunctionCluster &FC : Clusters) { + SmallVector<BinaryFunction *, 16> Callees(FC.Callees.begin(), + FC.Callees.end()); + llvm::sort(Callees, compareBinaryFunctionByIndex); + for (BinaryFunction *Callee : Callees) + Thunks[Callee] = createSmallThunk(*Callee); + } + + BC.outs() << "BOLT-INFO: " << Thunks.size() << " thunks created\n"; + + // Replace callees with thunks. + for (FunctionCluster &FC : Clusters) { + for (BinaryFunction *BF : FC.Functions) { + if (!CallMap.count(BF)) + continue; + + for (BinaryBasicBlock &BB : *BF) { + for (MCInst &Inst : BB) { + if (!BC.MIB->isCall(Inst) || BC.MIB->isIndirectCall(Inst) || + BC.MIB->isIndirectBranch(Inst)) + continue; + const MCSymbol *TargetSymbol = BC.MIB->getTargetSymbol(Inst); + assert(TargetSymbol); + + BinaryFunction *Callee = BC.getFunctionForSymbol(TargetSymbol); + if (!Callee) { + /* Ignore internal calls */ + continue; + } + + // Check if the callee is in the same cluster. + if (!FC.Callees.count(Callee)) + continue; + + // Use thunk as the call destination. + BC.MIB->replaceBranchTarget(Inst, Thunks[Callee]->getSymbol(), + BC.Ctx.get()); + } + } + } + } + + BC.setThunkLocation(Clusters[0].LastBF); +} + Error LongJmpPass::runOnFunctions(BinaryContext &BC) { - if (opts::CompactCodeModel) { + if (opts::CompactCodeModel || opts::ExperimentalRelaxation) { BC.outs() << "BOLT-INFO: relaxing branches for compact code model (<128MB)\n"; + // TODO: set correct code model based on the total size of split-code. ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { relaxLocalBranches(BF); }; @@ -916,6 +1094,12 @@ Error LongJmpPass::runOnFunctions(BinaryContext &BC) { BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun, SkipPredicate, "RelaxLocalBranches"); + if (!opts::ExperimentalRelaxation) + return Error::success(); + + BC.outs() << "BOLT-INFO: starting experimental relaxation pass\n"; + relaxCalls(BC); + return Error::success(); } diff --git a/bolt/lib/Passes/PatchEntries.cpp b/bolt/lib/Passes/PatchEntries.cpp index 8a2f0a3..a37ee33 100644 --- a/bolt/lib/Passes/PatchEntries.cpp +++ b/bolt/lib/Passes/PatchEntries.cpp @@ -36,16 +36,20 @@ Error PatchEntries::runOnFunctions(BinaryContext &BC) { if (!opts::ForcePatch) { // Mark the binary for patching if we did not create external references // for original code in any of functions we are not going to emit. - bool NeedsPatching = llvm::any_of( - llvm::make_second_range(BC.getBinaryFunctions()), - [&](BinaryFunction &BF) { - return !BC.shouldEmit(BF) && !BF.hasExternalRefRelocations(); - }); + bool NeedsPatching = + llvm::any_of(llvm::make_second_range(BC.getBinaryFunctions()), + [&](BinaryFunction &BF) { + return !BF.isPseudo() && !BC.shouldEmit(BF) && + !BF.hasExternalRefRelocations(); + }); if (!NeedsPatching) return Error::success(); } + assert(!opts::UseOldText && + "Cannot patch entries while overwriting original .text"); + if (opts::Verbosity >= 1) BC.outs() << "BOLT-INFO: patching entries in original code\n"; diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp index dd48653..8304693 100644 --- a/bolt/lib/Rewrite/BinaryPassManager.cpp +++ b/bolt/lib/Rewrite/BinaryPassManager.cpp @@ -497,6 +497,9 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { // memory profiling data. Manager.registerPass(std::make_unique<ReorderData>()); + // Assign each function an output section. + Manager.registerPass(std::make_unique<AssignSections>()); + if (BC.isAArch64()) { Manager.registerPass( std::make_unique<ADRRelaxationPass>(PrintAdrRelaxation)); @@ -521,9 +524,6 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { Manager.registerPass( std::make_unique<RetpolineInsertion>(PrintRetpolineInsertion)); - // Assign each function an output section. - Manager.registerPass(std::make_unique<AssignSections>()); - // Patch original function entries if (BC.HasRelocations) Manager.registerPass(std::make_unique<PatchEntries>()); diff --git a/bolt/lib/Rewrite/CMakeLists.txt b/bolt/lib/Rewrite/CMakeLists.txt index c83cf36..d27bd22 100644 --- a/bolt/lib/Rewrite/CMakeLists.txt +++ b/bolt/lib/Rewrite/CMakeLists.txt @@ -17,6 +17,7 @@ add_llvm_library(LLVMBOLTRewrite DWARFRewriter.cpp ExecutableFileMemoryManager.cpp JITLinkLinker.cpp + JumpTableInfoReader.cpp LinuxKernelRewriter.cpp MachORewriteInstance.cpp MetadataManager.cpp diff --git a/bolt/lib/Rewrite/JumpTableInfoReader.cpp b/bolt/lib/Rewrite/JumpTableInfoReader.cpp new file mode 100644 index 0000000..98230a2 --- /dev/null +++ b/bolt/lib/Rewrite/JumpTableInfoReader.cpp @@ -0,0 +1,91 @@ +//===- bolt/Rewrite/JumpTableInfoReader.cpp -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Read .llvm_jump_table_info section and register jump tables. +// +//===----------------------------------------------------------------------===// + +#include "bolt/Core/JumpTable.h" +#include "bolt/Rewrite/MetadataRewriter.h" +#include "bolt/Rewrite/MetadataRewriters.h" +#include "llvm/Support/DataExtractor.h" + +using namespace llvm; +using namespace bolt; + +namespace { +class JumpTableInfoReader final : public MetadataRewriter { + +public: + JumpTableInfoReader(StringRef Name, BinaryContext &BC) + : MetadataRewriter(Name, BC) {} + Error preDisasmInitializer() override; +}; + +Error JumpTableInfoReader::preDisasmInitializer() { + if (!BC.isAArch64()) + return Error::success(); + + ErrorOr<BinarySection &> ErrorOrJTInfoSection = + BC.getUniqueSectionByName(".llvm_jump_table_info"); + if (std::error_code E = ErrorOrJTInfoSection.getError()) + return Error::success(); + BinarySection &JTInfoSection = *ErrorOrJTInfoSection; + StringRef Buf = JTInfoSection.getContents(); + DataExtractor DE = DataExtractor(Buf, BC.AsmInfo->isLittleEndian(), + BC.AsmInfo->getCodePointerSize()); + DataExtractor::Cursor Cursor(0); + while (Cursor && !DE.eof(Cursor)) { + const uint8_t Format = DE.getU8(Cursor); + const uint64_t JTAddr = DE.getAddress(Cursor); + const uint64_t JTBase = DE.getAddress(Cursor); + const uint64_t JTLoad = DE.getAddress(Cursor); + const uint64_t Branch = DE.getAddress(Cursor); + const uint64_t NumEntries = DE.getULEB128(Cursor); + + JumpTable::JumpTableType Type = JumpTable::JTT_AARCH64_LAST; + switch (Format) { + case 2: + Type = JumpTable::JTT_AARCH64_REL1; + break; + case 3: + Type = JumpTable::JTT_AARCH64_REL2; + break; + case 4: + Type = JumpTable::JTT_AARCH64_REL4; + break; + default: + errs() << "BOLT-WARNING: unknown jump table info type " << Format + << " for jump table " << Twine::utohexstr(JTAddr) << '\n'; + continue; + } + + BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(Branch); + if (!BF) { + BC.errs() << "BOLT-WARNING: binary function not found for jump table " + "with address " + << Twine::utohexstr(JTAddr) << " and branch " + << Twine::utohexstr(Branch) << '\n'; + continue; + } + const MCSymbol *JTSym = BC.getOrCreateJumpTable(*BF, JTAddr, Type); + assert(JTSym && "failed to create a jump table"); + JumpTable *JT = BC.getJumpTableContainingAddress(JTAddr); + assert(JT && "internal error creating jump table"); + JT->BaseAddress = JTBase; + JT->MemLocInstrAddress = JTLoad; + JT->Entries.resize(NumEntries); + } + return Cursor.takeError(); +} +} // namespace + +std::unique_ptr<MetadataRewriter> +llvm::bolt::createJumpTableInfoReader(BinaryContext &BC) { + return std::make_unique<JumpTableInfoReader>("jump-table-info-reader", BC); +} diff --git a/bolt/lib/Rewrite/MetadataManager.cpp b/bolt/lib/Rewrite/MetadataManager.cpp index 713d2e4..8114e156 100644 --- a/bolt/lib/Rewrite/MetadataManager.cpp +++ b/bolt/lib/Rewrite/MetadataManager.cpp @@ -32,6 +32,18 @@ void MetadataManager::runSectionInitializers() { } } +void MetadataManager::runInitializersPreDisasm() { + for (auto &Rewriter : Rewriters) { + LLVM_DEBUG(dbgs() << "BOLT-DEBUG: invoking " << Rewriter->getName() + << " after reading sections\n"); + if (Error E = Rewriter->preDisasmInitializer()) { + errs() << "BOLT-ERROR: while running " << Rewriter->getName() + << " in pre-disasm state: " << toString(std::move(E)) << '\n'; + exit(1); + } + } +} + void MetadataManager::runInitializersPreCFG() { for (auto &Rewriter : Rewriters) { LLVM_DEBUG(dbgs() << "BOLT-DEBUG: invoking " << Rewriter->getName() diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index f204aa3..33526b0 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -695,7 +695,7 @@ Error RewriteInstance::run() { selectFunctionsToProcess(); - readDebugInfo(); + processMetadataPreDisasm(); disassembleFunctions(); @@ -2479,6 +2479,13 @@ void RewriteInstance::readDynamicRelocations(const SectionRef &Section, exit(1); } + // Workaround for AArch64 issue with hot text. + if (BC->isAArch64() && (SymbolName == "__hot_start" || + SymbolName == "__hot_end")) { + BC->addRelocation(Rel.getOffset(), Symbol, ELF::R_AARCH64_ABS64, Addend); + continue; + } + BC->addDynamicRelocation(Rel.getOffset(), Symbol, RType, Addend); } } @@ -3249,6 +3256,8 @@ void RewriteInstance::initializeMetadataManager() { MetadataManager.registerRewriter(createPseudoProbeRewriter(*BC)); MetadataManager.registerRewriter(createSDTRewriter(*BC)); + + MetadataManager.registerRewriter(createJumpTableInfoReader(*BC)); } void RewriteInstance::processSectionMetadata() { @@ -3259,6 +3268,14 @@ void RewriteInstance::processSectionMetadata() { MetadataManager.runSectionInitializers(); } +void RewriteInstance::processMetadataPreDisasm() { + NamedRegionTimer T("processmetadata-predisasm", "process metadata pre-disasm", + TimerGroupName, TimerGroupDesc, opts::TimeRewrite); + MetadataManager.runInitializersPreDisasm(); + + readDebugInfo(); +} + void RewriteInstance::processMetadataPreCFG() { NamedRegionTimer T("processmetadata-precfg", "process metadata pre-CFG", TimerGroupName, TimerGroupDesc, opts::TimeRewrite); @@ -3854,15 +3871,41 @@ void RewriteInstance::mapCodeSections(BOLTLinker::SectionMapper MapSection) { return Address; }; + // Try to allocate sections before the \p Address and return an address for + // the allocation of the first section or 0 if \p is not big enough. + auto allocateBefore = [&](uint64_t Address) -> uint64_t { + for (auto SI = CodeSections.rbegin(), SE = CodeSections.rend(); SI != SE; + ++SI) { + BinarySection *Section = *SI; + if (Section->getOutputSize() > Address) + return 0; + Address -= Section->getOutputSize(); + Address = alignDown(Address, Section->getAlignment()); + Section->setOutputAddress(Address); + } + return Address; + }; + // Check if we can fit code in the original .text bool AllocationDone = false; if (opts::UseOldText) { - const uint64_t CodeSize = - allocateAt(BC->OldTextSectionAddress) - BC->OldTextSectionAddress; + uint64_t StartAddress; + uint64_t EndAddress; + if (opts::HotFunctionsAtEnd) { + EndAddress = BC->OldTextSectionAddress + BC->OldTextSectionSize; + StartAddress = allocateBefore(EndAddress); + } else { + StartAddress = BC->OldTextSectionAddress; + EndAddress = allocateAt(BC->OldTextSectionAddress); + } + const uint64_t CodeSize = EndAddress - StartAddress; if (CodeSize <= BC->OldTextSectionSize) { BC->outs() << "BOLT-INFO: using original .text for new code with 0x" - << Twine::utohexstr(opts::AlignText) << " alignment\n"; + << Twine::utohexstr(opts::AlignText) << " alignment"; + if (StartAddress != BC->OldTextSectionAddress) + BC->outs() << " at 0x" << Twine::utohexstr(StartAddress); + BC->outs() << '\n'; AllocationDone = true; } else { BC->errs() @@ -4133,6 +4176,11 @@ void RewriteInstance::patchELFPHDRTable() { NewWritableSegmentSize = NextAvailableAddress - NewWritableSegmentAddress; } + if (!NewTextSegmentSize && !NewWritableSegmentSize) { + BC->outs() << "BOLT-INFO: not adding new segments\n"; + return; + } + const uint64_t SavedPos = OS.tell(); OS.seek(PHDRTableOffset); @@ -4487,6 +4535,11 @@ bool RewriteInstance::shouldStrip(const ELFShdrTy &Section, if (opts::RemoveSymtab && Section.sh_type == ELF::SHT_SYMTAB) return true; + // Strip jump table metadata by default. + // TBD: add a flag to rewrite it. + if (SectionName == ".llvm_jump_table_info") + return true; + return false; } diff --git a/bolt/test/AArch64/Inputs/jump-table.c b/bolt/test/AArch64/Inputs/jump-table.c new file mode 100644 index 0000000..198c483 --- /dev/null +++ b/bolt/test/AArch64/Inputs/jump-table.c @@ -0,0 +1,20 @@ +volatile int g; +void switchy(int x) { + switch (x) { + case 0: g--; break; + case 1: g++; break; + case 2: g = 42; break; + case 3: g += 17; break; + case 4: g -= 66; break; + case 5: g++; g--; break; + case 6: g--; g++; break; + case 66: g-=3; g++; break; + case 8: g+=5; g--; break; + case 10: g+=5; g--; break; + case 12: g+=42; g--; break; + case 15: g+=99; g--; break; + case 20: switchy(g); break; + case 21: g -= 1234; break; + default: g = 0; break; + } +} diff --git a/bolt/test/AArch64/jump-table-info.s b/bolt/test/AArch64/jump-table-info.s new file mode 100644 index 0000000..e2b67c6 --- /dev/null +++ b/bolt/test/AArch64/jump-table-info.s @@ -0,0 +1,186 @@ +## Check parsing of a .llvm_jump_table_info section +## The assembly is produced from bolt/test/AArch64/Inputs/jump-table.c + +# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o +# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -static +# RUN: llvm-bolt %t.exe -o %t.null -print-jump-tables | FileCheck %s + +# Confirm 67 entries are parsed: +# CHECK: jump tables for function _Z7switchyi: +# CHECK-NEXT: Jump table {{.*}} for function _Z7switchyi +# CHECK: 0x0042 : .Ltmp16 + + .text + .globl _Z7switchyi // -- Begin function _Z7switchyi + .p2align 2 + .type _Z7switchyi,@function +_Z7switchyi: // @_Z7switchyi + .cfi_startproc +// %bb.0: // %entry + adrp x8, g + cmp w0, #20 + b.ne .LBB0_2 +.LBB0_1: // %sw.bb26 + // =>This Inner Loop Header: Depth=1 + ldr w0, [x8, :lo12:g] + cmp w0, #20 + b.eq .LBB0_1 +.LBB0_2: // %tailrecurse + cmp w0, #66 + b.hi .LBB0_18 +// %bb.3: // %tailrecurse + mov w9, w0 + adrp x10, .LJTI0_0 + add x10, x10, :lo12:.LJTI0_0 + adr x11, .LBB0_4 +.Ltmp0: + ldrb w12, [x10, x9] + add x11, x11, x12, lsl #2 +.Ltmp1: + br x11 +.LBB0_4: // %sw.bb17 + ldr w9, [x8, :lo12:g] + add w9, w9, #5 + b .LBB0_13 +.LBB0_5: // %sw.bb11 + ldr w9, [x8, :lo12:g] + sub w9, w9, #3 + b .LBB0_10 +.LBB0_6: // %sw.bb5 + ldr w9, [x8, :lo12:g] + add w9, w9, #1 + b .LBB0_13 +.LBB0_7: // %sw.bb3 + ldr w9, [x8, :lo12:g] + add w9, w9, #17 + str w9, [x8, :lo12:g] + ret +.LBB0_8: // %sw.bb23 + ldr w9, [x8, :lo12:g] + add w9, w9, #99 + b .LBB0_13 +.LBB0_9: // %sw.bb8 + ldr w9, [x8, :lo12:g] + sub w9, w9, #1 +.LBB0_10: // %sw.epilog + str w9, [x8, :lo12:g] +.LBB0_11: // %sw.bb1 + ldr w9, [x8, :lo12:g] + add w9, w9, #1 + str w9, [x8, :lo12:g] + ret +.LBB0_12: // %sw.bb20 + ldr w9, [x8, :lo12:g] + add w9, w9, #42 +.LBB0_13: // %sw.epilog + str w9, [x8, :lo12:g] +.LBB0_14: // %sw.bb + ldr w9, [x8, :lo12:g] + sub w9, w9, #1 + str w9, [x8, :lo12:g] + ret +.LBB0_15: // %sw.epilog.loopexit + mov w9, #42 // =0x2a + str w9, [x8, :lo12:g] + ret +.LBB0_16: // %sw.bb27 + ldr w9, [x8, :lo12:g] + sub w9, w9, #1234 + str w9, [x8, :lo12:g] + ret +.LBB0_17: // %sw.bb4 + ldr w9, [x8, :lo12:g] + sub w9, w9, #66 + str w9, [x8, :lo12:g] + ret +.LBB0_18: // %sw.epilog.loopexit29 + str wzr, [x8, :lo12:g] + ret +.Lfunc_end0: + .size _Z7switchyi, .Lfunc_end0-_Z7switchyi + .cfi_endproc + .section .rodata,"a",@progbits +.LJTI0_0: + .byte (.LBB0_14-.LBB0_4)>>2 + .byte (.LBB0_11-.LBB0_4)>>2 + .byte (.LBB0_15-.LBB0_4)>>2 + .byte (.LBB0_7-.LBB0_4)>>2 + .byte (.LBB0_17-.LBB0_4)>>2 + .byte (.LBB0_6-.LBB0_4)>>2 + .byte (.LBB0_9-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_4-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_4-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_12-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_8-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_16-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_18-.LBB0_4)>>2 + .byte (.LBB0_5-.LBB0_4)>>2 + .section .llvm_jump_table_info,"",@0x6fff4c0e + .byte 2 // format 2: 1b relative; shr 2 + .xword .LJTI0_0 + .xword .LBB0_4 // Base + .xword .Ltmp0 // Load Instruction + .xword .Ltmp1 // Branch Instruction + .byte 67 // Number of Entries + // -- End function + .type g,@object // @g + .bss + .globl g + .p2align 2, 0x0 +g: + .word 0 // 0x0 + .size g, 4 + .section ".note.GNU-stack","",@progbits diff --git a/bolt/test/program-header.test b/bolt/test/program-header.test new file mode 100644 index 0000000..4552303 --- /dev/null +++ b/bolt/test/program-header.test @@ -0,0 +1,14 @@ +# Check that llvm-bolt does not add new segments when writing code in-place. + +REQUIRES: system-linux + +RUN: %clang %cflags %p/Inputs/hello.c -o %t -no-pie -Wl,-q +RUN: llvm-bolt %t -o %t.bolt --use-old-text --align-functions=1 \ +RUN: --no-huge-pages --align-text=1 --use-gnu-stack \ +RUN: | FileCheck %s --check-prefix=CHECK-BOLT +RUN: llvm-readelf -WS %t.bolt | FileCheck %s + +CHECK-BOLT: rewriting .eh_frame_hdr in-place +CHECK-BOLT: not adding new segments + +CHECK-NOT: .bolt.org.eh_frame_hdr diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 4c01088..d3e038d 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5730,6 +5730,10 @@ def pg : Flag<["-"], "pg">, HelpText<"Enable mcount instrumentation">, MarshallingInfoFlag<CodeGenOpts<"InstrumentForProfiling">>; def pipe : Flag<["-", "--"], "pipe">, HelpText<"Use pipes between commands, when possible">; +// Facebook T92898286 +def post_link_optimize : Flag<["--"], "post-link-optimize">, + HelpText<"Apply post-link optimizations using BOLT">; +// End Facebook T92898286 def prebind__all__twolevel__modules : Flag<["-"], "prebind_all_twolevel_modules">; def prebind : Flag<["-"], "prebind">; def preload : Flag<["-"], "preload">; diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index a0fa3c6..9ffff3e 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -671,12 +671,41 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, } } + // Facebook T92898286 + if (Args.hasArg(options::OPT_post_link_optimize)) + CmdArgs.push_back("-q"); + // End Facebook T92898286 + Args.addAllArgs(CmdArgs, {options::OPT_T, options::OPT_t}); const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs, Output)); + // Facebook T92898286 + if (!Args.hasArg(options::OPT_post_link_optimize) || !Output.isFilename()) + return; + + const char *MvExec = Args.MakeArgString(ToolChain.GetProgramPath("mv")); + ArgStringList MoveCmdArgs; + MoveCmdArgs.push_back(Output.getFilename()); + const char *PreBoltBin = + Args.MakeArgString(Twine(Output.getFilename()) + ".pre-bolt"); + MoveCmdArgs.push_back(PreBoltBin); + C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(), + MvExec, MoveCmdArgs, std::nullopt)); + + ArgStringList BoltCmdArgs; + const char *BoltExec = + Args.MakeArgString(ToolChain.GetProgramPath("llvm-bolt")); + BoltCmdArgs.push_back(PreBoltBin); + BoltCmdArgs.push_back("-reorder-blocks=reverse"); + BoltCmdArgs.push_back("-update-debug-sections"); + BoltCmdArgs.push_back("-o"); + BoltCmdArgs.push_back(Output.getFilename()); + C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(), + BoltExec, BoltCmdArgs, std::nullopt)); + // End Facebook T92898286 } void tools::gnutools::Assembler::ConstructJob(Compilation &C, diff --git a/cross-project-tests/lit.cfg.py b/cross-project-tests/lit.cfg.py index ccd3d01..d38df0e 100644 --- a/cross-project-tests/lit.cfg.py +++ b/cross-project-tests/lit.cfg.py @@ -92,7 +92,13 @@ if is_msvc: # use_clang() and use_lld() respectively, so set them to "", if needed. if not hasattr(config, "clang_src_dir"): config.clang_src_dir = "" -llvm_config.use_clang(required=("clang" in config.llvm_enabled_projects)) +# Facebook T92898286 +should_test_bolt = get_required_attr(config, "llvm_test_bolt") +if should_test_bolt: + llvm_config.use_clang(required=("clang" in config.llvm_enabled_projects), additional_flags=["--post-link-optimize"]) +else: + llvm_config.use_clang(required=("clang" in config.llvm_enabled_projects)) +# End Facebook T92898286 if not hasattr(config, "lld_src_dir"): config.lld_src_dir = "" @@ -305,3 +311,9 @@ llvm_config.feature_config([("--build-mode", {"Debug|RelWithDebInfo": "debug-inf # Allow 'REQUIRES: XXX-registered-target' in tests. for arch in config.targets_to_build: config.available_features.add(arch.lower() + "-registered-target") + +# Facebook T92898286 +# Ensure the user's PYTHONPATH is included. +if "PYTHONPATH" in os.environ: + config.environment["PYTHONPATH"] = os.environ["PYTHONPATH"] +# End Facebook T92898286 diff --git a/cross-project-tests/lit.site.cfg.py.in b/cross-project-tests/lit.site.cfg.py.in index 39458df..2d53cd3 100644 --- a/cross-project-tests/lit.site.cfg.py.in +++ b/cross-project-tests/lit.site.cfg.py.in @@ -21,6 +21,10 @@ config.mlir_src_root = "@MLIR_SOURCE_DIR@" config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" +# Facebook T92898286 +config.llvm_test_bolt = lit.util.pythonize_bool("@LLVM_TEST_BOLT@") +# End Facebook T92898286 + import lit.llvm lit.llvm.initialize(lit_config, config) diff --git a/lldb/test/API/lit.cfg.py b/lldb/test/API/lit.cfg.py index 08cf11c..077fb4c 100644 --- a/lldb/test/API/lit.cfg.py +++ b/lldb/test/API/lit.cfg.py @@ -271,6 +271,17 @@ if is_configured("lldb_libs_dir"): if is_configured("lldb_framework_dir"): dotest_cmd += ["--framework", config.lldb_framework_dir] +# Facebook T92898286 +if is_configured("llvm_test_bolt"): + dotest_cmd += ["-E", '"--post-link-optimize"'] +# End Facebook T92898286 + +if ( + "lldb-repro-capture" in config.available_features + or "lldb-repro-replay" in config.available_features +): + dotest_cmd += ["--skip-category=lldb-dap", "--skip-category=std-module"] + if "lldb-simulator-ios" in config.available_features: dotest_cmd += ["--apple-sdk", "iphonesimulator", "--platform-name", "ios-simulator"] elif "lldb-simulator-watchos" in config.available_features: diff --git a/lldb/test/API/lit.site.cfg.py.in b/lldb/test/API/lit.site.cfg.py.in index ecebc44..1c2ab4d 100644 --- a/lldb/test/API/lit.site.cfg.py.in +++ b/lldb/test/API/lit.site.cfg.py.in @@ -1,5 +1,9 @@ @LIT_SITE_CFG_IN_HEADER@ +#Facebook T92898286 +import lit.util +#End Facebook T92898286 + config.llvm_src_root = "@LLVM_SOURCE_DIR@" config.llvm_obj_root = "@LLVM_BINARY_DIR@" config.llvm_tools_dir = lit_config.substitute("@LLVM_TOOLS_DIR@") @@ -44,6 +48,10 @@ config.libcxx_include_target_dir = "@LIBCXX_GENERATED_INCLUDE_TARGET_DIR@" config.lldb_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_LLDB@", "lldb-api") config.clang_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_CLANG@", "lldb-api") +# Facebook T92898286 +config.llvm_test_bolt = lit.util.pythonize_bool("@LLVM_TEST_BOLT@") +# End Facebook T92898286 + # Plugins lldb_build_intel_pt = '@LLDB_BUILD_INTEL_PT@' if lldb_build_intel_pt == '1': diff --git a/lldb/test/Shell/helper/toolchain.py b/lldb/test/Shell/helper/toolchain.py index 42968128..ac895e8d 100644 --- a/lldb/test/Shell/helper/toolchain.py +++ b/lldb/test/Shell/helper/toolchain.py @@ -241,6 +241,11 @@ def use_support_substitutions(config): "-lc++", ] + # Facebook T92898286 + if config.llvm_test_bolt: + host_flags += ["--post-link-optimize"] + # End Facebook T92898286 + host_flags = " ".join(host_flags) config.substitutions.append(("%clang_host", "%clang " + host_flags)) config.substitutions.append(("%clangxx_host", "%clangxx " + host_flags)) diff --git a/lldb/test/Shell/lit.site.cfg.py.in b/lldb/test/Shell/lit.site.cfg.py.in index 31a6d68..8b37d98 100644 --- a/lldb/test/Shell/lit.site.cfg.py.in +++ b/lldb/test/Shell/lit.site.cfg.py.in @@ -1,5 +1,10 @@ @LIT_SITE_CFG_IN_HEADER@ +#Facebook T92898286 +import lit.util +#End Facebook T92898286 + + config.llvm_src_root = "@LLVM_SOURCE_DIR@" config.llvm_obj_root = "@LLVM_BINARY_DIR@" config.llvm_tools_dir = lit_config.substitute("@LLVM_TOOLS_DIR@") @@ -36,6 +41,10 @@ config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" config.lldb_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_LLDB@", "lldb-shell") config.clang_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_CLANG@", "lldb-shell") +# Facebook T92898286 +config.llvm_test_bolt = lit.util.pythonize_bool("@LLVM_TEST_BOLT@") +# End Facebook T92898286 + import lit.llvm lit.llvm.initialize(lit_config, config) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index cfd1a08..1478eea 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -729,6 +729,10 @@ set(LLVM_LIB_FUZZING_ENGINE "" CACHE PATH option(LLVM_USE_SPLIT_DWARF "Use -gsplit-dwarf when compiling llvm and --gdb-index when linking." OFF) +# Facebook T92898286 +option(LLVM_TEST_BOLT "Enable BOLT testing in non-BOLT tests that use clang" OFF) +# End Facebook T92898286 + # Define an option controlling whether we should build for 32-bit on 64-bit # platforms, where supported. if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT (WIN32 OR ${CMAKE_SYSTEM_NAME} MATCHES "AIX")) |