aboutsummaryrefslogtreecommitdiff
path: root/lld/MachO
diff options
context:
space:
mode:
Diffstat (limited to 'lld/MachO')
-rw-r--r--lld/MachO/Arch/X86_64.cpp3
-rw-r--r--lld/MachO/ConcatOutputSection.cpp6
-rw-r--r--lld/MachO/Config.h1
-rw-r--r--lld/MachO/Driver.cpp54
-rw-r--r--lld/MachO/ICF.cpp63
-rw-r--r--lld/MachO/InputFiles.cpp18
-rw-r--r--lld/MachO/Options.td8
-rw-r--r--lld/MachO/SectionPriorities.cpp130
-rw-r--r--lld/MachO/SectionPriorities.h28
-rw-r--r--lld/MachO/SymbolTable.cpp35
-rw-r--r--lld/MachO/SyntheticSections.cpp62
-rw-r--r--lld/MachO/UnwindInfoSection.cpp75
12 files changed, 275 insertions, 208 deletions
diff --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp
index 111c4d9..f351399 100644
--- a/lld/MachO/Arch/X86_64.cpp
+++ b/lld/MachO/Arch/X86_64.cpp
@@ -54,7 +54,8 @@ static constexpr std::array<RelocAttrs, 10> relocAttrsArray{{
{"UNSIGNED", B(UNSIGNED) | B(ABSOLUTE) | B(EXTERN) | B(LOCAL) | B(BYTE1) |
B(BYTE4) | B(BYTE8)},
{"SIGNED", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)},
- {"BRANCH", B(PCREL) | B(EXTERN) | B(BRANCH) | B(BYTE1) | B(BYTE4)},
+ {"BRANCH",
+ B(PCREL) | B(EXTERN) | B(LOCAL) | B(BRANCH) | B(BYTE1) | B(BYTE4)},
{"GOT_LOAD", B(PCREL) | B(EXTERN) | B(GOT) | B(LOAD) | B(BYTE4)},
{"GOT", B(PCREL) | B(EXTERN) | B(GOT) | B(POINTER) | B(BYTE4)},
{"SUBTRACTOR", B(SUBTRAHEND) | B(EXTERN) | B(BYTE4) | B(BYTE8)},
diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp
index 8067d23..e559676 100644
--- a/lld/MachO/ConcatOutputSection.cpp
+++ b/lld/MachO/ConcatOutputSection.cpp
@@ -306,7 +306,7 @@ void TextOutputSection::finalize() {
// contains several branch instructions in succession, then the distance
// from the current position to the position where the thunks are inserted
// grows. So leave room for a bunch of thunks.
- unsigned slop = 256 * thunkSize;
+ unsigned slop = config->slopScale * thunkSize;
while (finalIdx < endIdx) {
uint64_t expectedNewSize =
alignToPowerOf2(addr + size, inputs[finalIdx]->align) +
@@ -384,7 +384,9 @@ void TextOutputSection::finalize() {
// above. If you hit this: For the current algorithm, just bumping up
// slop above and trying again is probably simplest. (See also PR51578
// comment 5).
- fatal(Twine(__FUNCTION__) + ": FIXME: thunk range overrun");
+ fatal(Twine(__FUNCTION__) +
+ ": FIXME: thunk range overrun. Consider increasing the "
+ "slop-scale with `--slop-scale=<unsigned_int>`.");
}
thunkInfo.isec =
makeSyntheticInputSection(isec->getSegName(), isec->getName());
diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index a2ca577..759a8cb 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -224,6 +224,7 @@ struct Configuration {
bool disableVerify;
bool separateCstringLiteralSections;
bool tailMergeStrings;
+ unsigned slopScale = 256;
bool callGraphProfileSort = false;
llvm::StringRef printSymbolOrder;
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 32b2099..f4f3aba 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -41,6 +41,7 @@
#include "llvm/Object/Archive.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Parallel.h"
#include "llvm/Support/Path.h"
@@ -53,6 +54,10 @@
#include "llvm/TextAPI/Architecture.h"
#include "llvm/TextAPI/PackedVersion.h"
+#if !_WIN32
+#include <sys/mman.h>
+#endif
+
using namespace llvm;
using namespace llvm::MachO;
using namespace llvm::object;
@@ -292,12 +297,13 @@ struct DeferredFile {
using DeferredFiles = std::vector<DeferredFile>;
#if LLVM_ENABLE_THREADS
-class SerialBackgroundQueue {
+class SerialBackgroundWorkQueue {
std::deque<std::function<void()>> queue;
std::thread *running;
std::mutex mutex;
public:
+ std::atomic_bool stopAllWork = false;
void queueWork(std::function<void()> work) {
mutex.lock();
if (running && queue.empty()) {
@@ -312,7 +318,7 @@ public:
queue.emplace_back(std::move(work));
if (!running)
running = new std::thread([&]() {
- while (true) {
+ while (!stopAllWork) {
mutex.lock();
if (queue.empty()) {
mutex.unlock();
@@ -331,6 +337,8 @@ public:
}
};
+static SerialBackgroundWorkQueue pageInQueue;
+
// Most input files have been mapped but not yet paged in.
// This code forces the page-ins on multiple threads so
// the process is not stalled waiting on disk buffer i/o.
@@ -339,8 +347,8 @@ void multiThreadedPageInBackground(DeferredFiles &deferred) {
static const size_t largeArchive = 10 * 1024 * 1024;
#ifndef NDEBUG
using namespace std::chrono;
- std::atomic_int numDeferedFilesTouched = 0;
static std::atomic_uint64_t totalBytes = 0;
+ std::atomic_int numDeferedFilesAdvised = 0;
auto t0 = high_resolution_clock::now();
#endif
@@ -348,24 +356,34 @@ void multiThreadedPageInBackground(DeferredFiles &deferred) {
const StringRef &buff = deferredFile.buffer.getBuffer();
if (buff.size() > largeArchive)
return;
+
#ifndef NDEBUG
totalBytes += buff.size();
- numDeferedFilesTouched += 1;
+ numDeferedFilesAdvised += 1;
#endif
-
+#if _WIN32
// Reference all file's mmap'd pages to load them into memory.
- for (const char *page = buff.data(), *end = page + buff.size(); page < end;
- page += pageSize) {
+ for (const char *page = buff.data(), *end = page + buff.size();
+ page < end && !pageInQueue.stopAllWork; page += pageSize) {
[[maybe_unused]] volatile char t = *page;
(void)t;
}
+#else
+#define DEBUG_TYPE "lld-madvise"
+ auto aligned =
+ llvm::alignDown(reinterpret_cast<uintptr_t>(buff.data()), pageSize);
+ if (madvise((void *)aligned, buff.size(), MADV_WILLNEED) < 0)
+ LLVM_DEBUG(llvm::dbgs() << "madvise error: " << strerror(errno) << "\n");
+#undef DEBUG_TYPE
+#endif
};
+
{ // Create scope for waiting for the taskGroup
std::atomic_size_t index = 0;
llvm::parallel::TaskGroup taskGroup;
for (int w = 0; w < config->readWorkers; w++)
taskGroup.spawn([&index, &preloadDeferredFile, &deferred]() {
- while (true) {
+ while (!pageInQueue.stopAllWork) {
size_t localIndex = index.fetch_add(1);
if (localIndex >= deferred.size())
break;
@@ -373,17 +391,17 @@ void multiThreadedPageInBackground(DeferredFiles &deferred) {
}
});
}
+
#ifndef NDEBUG
auto dt = high_resolution_clock::now() - t0;
if (Process::GetEnv("LLD_MULTI_THREAD_PAGE"))
llvm::dbgs() << "multiThreadedPageIn " << totalBytes << "/"
- << numDeferedFilesTouched << "/" << deferred.size() << "/"
+ << numDeferedFilesAdvised << "/" << deferred.size() << "/"
<< duration_cast<milliseconds>(dt).count() / 1000. << "\n";
#endif
}
static void multiThreadedPageIn(const DeferredFiles &deferred) {
- static SerialBackgroundQueue pageInQueue;
pageInQueue.queueWork([=]() {
DeferredFiles files = deferred;
multiThreadedPageInBackground(files);
@@ -489,7 +507,7 @@ static InputFile *processFile(std::optional<MemoryBufferRef> buffer,
continue;
}
- if (archiveContents)
+ if (config->readWorkers && archiveContents)
archiveContents->push_back({path, isLazy, *mb});
if (!hasObjCSection(*mb))
continue;
@@ -1447,6 +1465,8 @@ static void createFiles(const InputArgList &args) {
multiThreadedPageIn(archiveContents);
for (auto *archive : archives)
archive->addLazySymbols();
+
+ pageInQueue.stopAllWork = true;
}
#endif
}
@@ -1845,8 +1865,8 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
"'");
config->readWorkers = workers;
#else
- error(arg->getSpelling() +
- ": option unavailable because lld was not built with thread support");
+ warn(arg->getSpelling() +
+ ": option unavailable because lld was not built with thread support");
#endif
}
if (auto *arg = args.getLastArg(OPT_threads_eq)) {
@@ -1995,6 +2015,14 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
OPT_no_separate_cstring_literal_sections, false);
config->tailMergeStrings =
args.hasFlag(OPT_tail_merge_strings, OPT_no_tail_merge_strings, false);
+ if (auto *arg = args.getLastArg(OPT_slop_scale_eq)) {
+ StringRef v(arg->getValue());
+ unsigned slop = 0;
+ if (!llvm::to_integer(v, slop))
+ error(arg->getSpelling() +
+ ": expected a non-negative integer, but got '" + v + "'");
+ config->slopScale = slop;
+ }
auto IncompatWithCGSort = [&](StringRef firstArgStr) {
// Throw an error only if --call-graph-profile-sort is explicitly specified
diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp
index 7b31378c3..e0fc897 100644
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@@ -173,14 +173,37 @@ bool ICF::equalsConstant(const ConcatInputSection *ia,
// a valid offset in the literal section.
return isecA->getOffset(valueA) == isecB->getOffset(valueB) &&
ra.addend == rb.addend;
- else {
- assert(valueA == 0 && valueB == 0);
- // For section relocs, we compare the content at the section offset.
- return isecA->getOffset(ra.addend) == isecB->getOffset(rb.addend);
- }
+ assert(valueA == 0 && valueB == 0);
+ // For section relocs, we compare the content at the section offset.
+ return isecA->getOffset(ra.addend) == isecB->getOffset(rb.addend);
};
- return std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(),
- f);
+ if (!llvm::equal(ia->relocs, ib->relocs, f))
+ return false;
+
+ // Check unwind info structural compatibility: if there are symbols with
+ // associated unwind info, check that both sections have compatible symbol
+ // layouts. For simplicity, we only attempt folding when all symbols are at
+ // offset zero within the section (which is typically the case with
+ // .subsections_via_symbols.)
+ auto hasUnwind = [](Defined *d) { return d->unwindEntry() != nullptr; };
+ const auto *itA = llvm::find_if(ia->symbols, hasUnwind);
+ const auto *itB = llvm::find_if(ib->symbols, hasUnwind);
+ if (itA == ia->symbols.end())
+ return itB == ib->symbols.end();
+ if (itB == ib->symbols.end())
+ return false;
+ const Defined *da = *itA;
+ const Defined *db = *itB;
+ if (da->value != 0 || db->value != 0)
+ return false;
+ auto isZero = [](Defined *d) { return d->value == 0; };
+ // Since symbols are stored in order of value, and since we have already
+ // checked that da/db have value zero, we just need to do the isZero check on
+ // the subsequent symbols.
+ return std::find_if_not(std::next(itA), ia->symbols.end(), isZero) ==
+ ia->symbols.end() &&
+ std::find_if_not(std::next(itB), ib->symbols.end(), isZero) ==
+ ib->symbols.end();
}
// Compare the "moving" parts of two ConcatInputSections -- i.e. everything not
@@ -217,31 +240,19 @@ bool ICF::equalsVariable(const ConcatInputSection *ia,
}
return isecA->icfEqClass[icfPass % 2] == isecB->icfEqClass[icfPass % 2];
};
- if (!std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(), f))
+ if (!llvm::equal(ia->relocs, ib->relocs, f))
return false;
- // If there are symbols with associated unwind info, check that the unwind
- // info matches. For simplicity, we only handle the case where there are only
- // symbols at offset zero within the section (which is typically the case with
- // .subsections_via_symbols.)
+ // Compare unwind info equivalence classes.
auto hasUnwind = [](Defined *d) { return d->unwindEntry() != nullptr; };
const auto *itA = llvm::find_if(ia->symbols, hasUnwind);
- const auto *itB = llvm::find_if(ib->symbols, hasUnwind);
if (itA == ia->symbols.end())
- return itB == ib->symbols.end();
- if (itB == ib->symbols.end())
- return false;
+ return true;
const Defined *da = *itA;
- const Defined *db = *itB;
- if (da->unwindEntry()->icfEqClass[icfPass % 2] !=
- db->unwindEntry()->icfEqClass[icfPass % 2] ||
- da->value != 0 || db->value != 0)
- return false;
- auto isZero = [](Defined *d) { return d->value == 0; };
- return std::find_if_not(std::next(itA), ia->symbols.end(), isZero) ==
- ia->symbols.end() &&
- std::find_if_not(std::next(itB), ib->symbols.end(), isZero) ==
- ib->symbols.end();
+ // equalsConstant() guarantees that both sections have unwind info.
+ const Defined *db = *llvm::find_if(ib->symbols, hasUnwind);
+ return da->unwindEntry()->icfEqClass[icfPass % 2] ==
+ db->unwindEntry()->icfEqClass[icfPass % 2];
}
// Find the first InputSection after BEGIN whose equivalence class differs
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 20e4a1d..81caef5 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -217,7 +217,8 @@ std::optional<MemoryBufferRef> macho::readFile(StringRef path) {
if (entry != cachedReads.end())
return entry->second;
- ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr = MemoryBuffer::getFile(path);
+ ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr =
+ MemoryBuffer::getFile(path, false, /*RequiresNullTerminator=*/false);
if (std::error_code ec = mbOrErr.getError()) {
error("cannot open " + path + ": " + ec.message());
return std::nullopt;
@@ -594,8 +595,8 @@ void ObjFile::parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
// FIXME This logic was written around x86_64 behavior -- ARM64 doesn't
// have pcrel section relocations. We may want to factor this out into
// the arch-specific .cpp file.
- assert(target->hasAttr(r.type, RelocAttrBits::BYTE4));
- referentOffset = sec.addr + relInfo.r_address + 4 + totalAddend -
+ referentOffset = sec.addr + relInfo.r_address +
+ (1ull << relInfo.r_length) + totalAddend -
referentSecHead.addr;
} else {
// The addend for a non-pcrel relocation is its absolute address.
@@ -808,6 +809,17 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
continue;
if ((sym.n_type & N_TYPE) == N_SECT) {
+ if (sym.n_sect == 0) {
+ fatal("section symbol " + StringRef(strtab + sym.n_strx) + " in " +
+ toString(this) + " has an invalid section index [0]");
+ }
+ if (sym.n_sect > sections.size()) {
+ fatal("section symbol " + StringRef(strtab + sym.n_strx) + " in " +
+ toString(this) + " has an invalid section index [" +
+ Twine(static_cast<unsigned>(sym.n_sect)) +
+ "] greater than the total number of sections [" +
+ Twine(sections.size()) + "]");
+ }
Subsections &subsections = sections[sym.n_sect - 1]->subsections;
// parseSections() may have chosen not to parse this section.
if (subsections.empty())
diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index be1a1cc..f5f7549 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -1095,6 +1095,14 @@ defm tail_merge_strings
: BB<"tail-merge-strings", "Enable string tail merging",
"Disable string tail merging to improve link-time performance">,
Group<grp_rare>;
+def slop_scale_eq
+ : Joined<["--"], "slop_scale=">,
+ MetaVarName<"<unsigned_int>">,
+ HelpText<"Specify the slop scale. Default value is 256. If your binary "
+ "has too many consecutive branch instructions resulting in "
+ "thunk-range overrun, then you need to increase this value to a "
+ "higher value, such as 512 or 1024, etc">,
+ Group<grp_rare>;
def grp_deprecated : OptionGroup<"deprecated">, HelpText<"DEPRECATED">;
diff --git a/lld/MachO/SectionPriorities.cpp b/lld/MachO/SectionPriorities.cpp
index cf657aa..b652d1e 100644
--- a/lld/MachO/SectionPriorities.cpp
+++ b/lld/MachO/SectionPriorities.cpp
@@ -27,6 +27,7 @@
#include "llvm/Support/Path.h"
#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/xxhash.h"
#include <numeric>
@@ -246,33 +247,45 @@ DenseMap<const InputSection *, int> CallGraphSort::run() {
return orderMap;
}
-std::optional<int>
-macho::PriorityBuilder::getSymbolOrCStringPriority(const StringRef key,
- InputFile *f) {
+void macho::PriorityBuilder::SymbolPriorityEntry::setPriority(
+ int priority, StringRef objectFile) {
+ if (!objectFile.empty())
+ objectFiles.try_emplace(objectFile, priority);
+ else
+ anyObjectFile = std::min(anyObjectFile, priority);
+}
- auto it = priorities.find(key);
- if (it == priorities.end())
- return std::nullopt;
- const SymbolPriorityEntry &entry = it->second;
+int macho::PriorityBuilder::SymbolPriorityEntry::getPriority(
+ const InputFile *f) const {
if (!f)
- return entry.anyObjectFile;
+ return anyObjectFile;
// We don't use toString(InputFile *) here because it returns the full path
// for object files, and we only want the basename.
- StringRef filename;
- if (f->archiveName.empty())
- filename = path::filename(f->getName());
- else
- filename = saver().save(path::filename(f->archiveName) + "(" +
- path::filename(f->getName()) + ")");
- return std::min(entry.objectFiles.lookup(filename), entry.anyObjectFile);
+ StringRef basename = path::filename(f->getName());
+ StringRef filename =
+ f->archiveName.empty()
+ ? basename
+ : saver().save(path::filename(f->archiveName) + "(" + basename + ")");
+ return std::min(objectFiles.lookup(filename), anyObjectFile);
}
std::optional<int>
-macho::PriorityBuilder::getSymbolPriority(const Defined *sym) {
+macho::PriorityBuilder::getCStringPriority(uint32_t hash,
+ const InputFile *f) const {
+ auto it = cStringPriorities.find(hash);
+ if (it == cStringPriorities.end())
+ return std::nullopt;
+ return it->second.getPriority(f);
+}
+
+std::optional<int>
+macho::PriorityBuilder::getSymbolPriority(const Defined *sym) const {
if (sym->isAbsolute())
return std::nullopt;
- return getSymbolOrCStringPriority(utils::getRootSymbol(sym->getName()),
- sym->isec()->getFile());
+ auto it = priorities.find(utils::getRootSymbol(sym->getName()));
+ if (it == priorities.end())
+ return std::nullopt;
+ return it->second.getPriority(sym->isec()->getFile());
}
void macho::PriorityBuilder::extractCallGraphProfile() {
@@ -307,7 +320,7 @@ void macho::PriorityBuilder::parseOrderFile(StringRef path) {
int prio = std::numeric_limits<int>::min();
MemoryBufferRef mbref = *buffer;
for (StringRef line : args::getLines(mbref)) {
- StringRef objectFile, symbolOrCStrHash;
+ StringRef objectFile;
line = line.take_until([](char c) { return c == '#'; }); // ignore comments
line = line.ltrim();
@@ -338,22 +351,16 @@ void macho::PriorityBuilder::parseOrderFile(StringRef path) {
}
// The rest of the line is either <symbol name> or
- // CStringEntryPrefix<cstring hash>
+ // cStringEntryPrefix<cstring hash>
line = line.trim();
- if (line.starts_with(CStringEntryPrefix)) {
- StringRef possibleHash = line.drop_front(CStringEntryPrefix.size());
+ if (line.consume_front(cStringEntryPrefix)) {
uint32_t hash = 0;
- if (to_integer(possibleHash, hash))
- symbolOrCStrHash = possibleHash;
- } else
- symbolOrCStrHash = utils::getRootSymbol(line);
-
- if (!symbolOrCStrHash.empty()) {
- SymbolPriorityEntry &entry = priorities[symbolOrCStrHash];
- if (!objectFile.empty())
- entry.objectFiles.insert(std::make_pair(objectFile, prio));
- else
- entry.anyObjectFile = std::min(entry.anyObjectFile, prio);
+ if (to_integer(line, hash))
+ cStringPriorities[hash].setPriority(prio, objectFile);
+ } else {
+ StringRef symbol = utils::getRootSymbol(line);
+ if (!symbol.empty())
+ priorities[symbol].setPriority(prio, objectFile);
}
++prio;
@@ -405,40 +412,39 @@ macho::PriorityBuilder::buildInputSectionPriorities() {
return sectionPriorities;
}
-std::vector<StringPiecePair> macho::PriorityBuilder::buildCStringPriorities(
- ArrayRef<CStringInputSection *> inputs) {
- // Split the input strings into hold and cold sets.
- // Order hot set based on -order_file_cstring for performance improvement;
- // TODO: Order cold set of cstrings for compression via BP.
- std::vector<std::pair<int, StringPiecePair>>
- hotStringPrioritiesAndStringPieces;
- std::vector<StringPiecePair> coldStringPieces;
- std::vector<StringPiecePair> orderedStringPieces;
-
+void macho::PriorityBuilder::forEachStringPiece(
+ ArrayRef<CStringInputSection *> inputs,
+ std::function<void(CStringInputSection &, StringPiece &, size_t)> f,
+ bool forceInputOrder, bool computeHash) const {
+ std::vector<std::tuple<int, CStringInputSection *, size_t>> orderedPieces;
+ std::vector<std::pair<CStringInputSection *, size_t>> unorderedPieces;
for (CStringInputSection *isec : inputs) {
for (const auto &[stringPieceIdx, piece] : llvm::enumerate(isec->pieces)) {
if (!piece.live)
continue;
-
- std::optional<int> priority = getSymbolOrCStringPriority(
- std::to_string(piece.hash), isec->getFile());
- if (!priority)
- coldStringPieces.emplace_back(isec, stringPieceIdx);
+ // Process pieces in input order if we have no cstrings in our orderfile
+ if (forceInputOrder || cStringPriorities.empty()) {
+ f(*isec, piece, stringPieceIdx);
+ continue;
+ }
+ uint32_t hash =
+ computeHash
+ ? (xxh3_64bits(isec->getStringRef(stringPieceIdx)) & 0x7fffffff)
+ : piece.hash;
+ if (auto priority = getCStringPriority(hash, isec->getFile()))
+ orderedPieces.emplace_back(*priority, isec, stringPieceIdx);
else
- hotStringPrioritiesAndStringPieces.emplace_back(
- *priority, std::make_pair(isec, stringPieceIdx));
+ unorderedPieces.emplace_back(isec, stringPieceIdx);
}
}
-
- // Order hot set for perf
- llvm::stable_sort(hotStringPrioritiesAndStringPieces);
- for (auto &[priority, stringPiecePair] : hotStringPrioritiesAndStringPieces)
- orderedStringPieces.push_back(stringPiecePair);
-
- // TODO: Order cold set for compression
-
- orderedStringPieces.insert(orderedStringPieces.end(),
- coldStringPieces.begin(), coldStringPieces.end());
-
- return orderedStringPieces;
+ if (orderedPieces.empty() && unorderedPieces.empty())
+ return;
+ llvm::stable_sort(orderedPieces, [](const auto &left, const auto &right) {
+ return std::get<0>(left) < std::get<0>(right);
+ });
+ for (auto &[priority, isec, pieceIdx] : orderedPieces)
+ f(*isec, isec->pieces[pieceIdx], pieceIdx);
+ // TODO: Add option to order the remaining cstrings for compression
+ for (auto &[isec, pieceIdx] : unorderedPieces)
+ f(*isec, isec->pieces[pieceIdx], pieceIdx);
}
diff --git a/lld/MachO/SectionPriorities.h b/lld/MachO/SectionPriorities.h
index cc4e30f..24d2dbc 100644
--- a/lld/MachO/SectionPriorities.h
+++ b/lld/MachO/SectionPriorities.h
@@ -16,7 +16,6 @@
namespace lld::macho {
using SectionPair = std::pair<const InputSection *, const InputSection *>;
-using StringPiecePair = std::pair<CStringInputSection *, size_t>;
class PriorityBuilder {
public:
@@ -29,7 +28,7 @@ public:
//
// An order file has one entry per line, in the following format:
//
- // <cpu>:<object file>:[<symbol name> | CStringEntryPrefix <cstring hash>]
+ // <cpu>:<object file>:[<symbol name> | cStringEntryPrefix <cstring hash>]
//
// <cpu> and <object file> are optional.
// If not specified, then that entry tries to match either,
@@ -42,7 +41,7 @@ public:
// lowest-ordered entry (the one nearest to the front of the list.)
//
// or 2) any cstring literal with the given hash, if the entry has the
- // CStringEntryPrefix prefix defined below in the file. <cstring hash> is the
+ // cStringEntryPrefix prefix defined below in the file. <cstring hash> is the
// hash of cstring literal content.
//
// Cstring literals are not symbolized, we can't identify them by name
@@ -54,6 +53,16 @@ public:
// The file can also have line comments that start with '#'.
void parseOrderFile(StringRef path);
+ /// Call \p f for each string piece in \p inputs. If there are any cstring
+ /// literals in the orderfile (and \p forceInputOrder is false) then string
+ /// pieces are ordered by the orderfile. \p computeHash must be set when
+ /// \p deduplicateLiterals is false because then the string piece hash is not
+ /// set.
+ void forEachStringPiece(
+ ArrayRef<CStringInputSection *> inputs,
+ std::function<void(CStringInputSection &, StringPiece &, size_t)> f,
+ bool forceInputOrder = false, bool computeHash = false) const;
+
// Returns layout priorities for some or all input sections. Sections are laid
// out in decreasing order; that is, a higher priority section will be closer
// to the beginning of its output section.
@@ -66,8 +75,6 @@ public:
// Each section gets assigned the priority of the highest-priority symbol it
// contains.
llvm::DenseMap<const InputSection *, int> buildInputSectionPriorities();
- std::vector<StringPiecePair>
- buildCStringPriorities(ArrayRef<CStringInputSection *>);
private:
// The symbol with the smallest priority should be ordered first in the output
@@ -78,13 +85,16 @@ private:
int anyObjectFile = 0;
// The priority given to a matching symbol from a particular object file.
llvm::DenseMap<llvm::StringRef, int> objectFiles;
+ void setPriority(int priority, StringRef objectFile);
+ int getPriority(const InputFile *f) const;
};
- const llvm::StringRef CStringEntryPrefix = "CSTR;";
+ const llvm::StringRef cStringEntryPrefix = "CSTR;";
- std::optional<int> getSymbolPriority(const Defined *sym);
- std::optional<int> getSymbolOrCStringPriority(const StringRef key,
- InputFile *f);
+ std::optional<int> getSymbolPriority(const Defined *sym) const;
+ std::optional<int> getCStringPriority(uint32_t hash,
+ const InputFile *f) const;
llvm::DenseMap<llvm::StringRef, SymbolPriorityEntry> priorities;
+ llvm::DenseMap<uint32_t, SymbolPriorityEntry> cStringPriorities;
llvm::MapVector<SectionPair, uint64_t> callGraphProfile;
};
diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp
index baddddc..a7db5a3a 100644
--- a/lld/MachO/SymbolTable.cpp
+++ b/lld/MachO/SymbolTable.cpp
@@ -61,8 +61,8 @@ struct DuplicateSymbolDiag {
SmallVector<DuplicateSymbolDiag> dupSymDiags;
} // namespace
-// Move symbols at \p fromOff in \p fromIsec into \p toIsec, unless that symbol
-// is \p skip.
+// Move local symbols at \p fromOff in \p fromIsec into \p toIsec, unless that
+// symbol is \p skip, in which case we just remove it.
static void transplantSymbolsAtOffset(InputSection *fromIsec,
InputSection *toIsec, Defined *skip,
uint64_t fromOff, uint64_t toOff) {
@@ -78,22 +78,23 @@ static void transplantSymbolsAtOffset(InputSection *fromIsec,
auto insertIt = llvm::upper_bound(toIsec->symbols, toOff, symSucceedsOff);
llvm::erase_if(fromIsec->symbols, [&](Symbol *s) {
auto *d = cast<Defined>(s);
- if (d->value != fromOff)
+ if (d == skip)
+ return true;
+ if (d->value != fromOff || d->isExternal())
return false;
- if (d != skip) {
- // This repeated insertion will be quadratic unless insertIt is the end
- // iterator. However, that is typically the case for files that have
- // .subsections_via_symbols set.
- insertIt = toIsec->symbols.insert(insertIt, d);
- d->originalIsec = toIsec;
- d->value = toOff;
- // We don't want to have more than one unwindEntry at a given address, so
- // drop the redundant ones. We We can safely drop the unwindEntries of
- // the symbols in fromIsec since we will be adding another unwindEntry as
- // we finish parsing toIsec's file. (We can assume that toIsec has its
- // own unwindEntry because of the ODR.)
- d->originalUnwindEntry = nullptr;
- }
+
+ // This repeated insertion will be quadratic unless insertIt is the end
+ // iterator. However, that is typically the case for files that have
+ // .subsections_via_symbols set.
+ insertIt = toIsec->symbols.insert(insertIt, d);
+ d->originalIsec = toIsec;
+ d->value = toOff;
+ // We don't want to have more than one unwindEntry at a given address, so
+ // drop the redundant ones. We can safely drop the unwindEntries of the
+ // symbols in fromIsec since we will be adding another unwindEntry as we
+ // finish parsing toIsec's file. (We can assume that toIsec has its own
+ // unwindEntry because of the ODR.)
+ d->originalUnwindEntry = nullptr;
return true;
});
}
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 187cccb..fecc51f 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -1721,26 +1721,24 @@ void CStringSection::writeTo(uint8_t *buf) const {
// and don't need this alignment. They will be emitted at some arbitrary address
// `A`, but ld64 will treat them as being 16-byte aligned with an offset of
// `16 % A`.
-static Align getStringPieceAlignment(const CStringInputSection *isec,
+static Align getStringPieceAlignment(const CStringInputSection &isec,
const StringPiece &piece) {
- return llvm::Align(1ULL << llvm::countr_zero(isec->align | piece.inSecOff));
+ return llvm::Align(1ULL << llvm::countr_zero(isec.align | piece.inSecOff));
}
void CStringSection::finalizeContents() {
size = 0;
- // TODO: Call buildCStringPriorities() to support cstring ordering when
- // deduplication is off, although this may negatively impact build
- // performance.
- for (CStringInputSection *isec : inputs) {
- for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) {
- if (!piece.live)
- continue;
- piece.outSecOff = alignTo(size, getStringPieceAlignment(isec, piece));
- StringRef string = isec->getStringRef(i);
- size = piece.outSecOff + string.size() + 1; // account for null terminator
- }
+ priorityBuilder.forEachStringPiece(
+ inputs,
+ [&](CStringInputSection &isec, StringPiece &piece, size_t pieceIdx) {
+ piece.outSecOff = alignTo(size, getStringPieceAlignment(isec, piece));
+ StringRef string = isec.getStringRef(pieceIdx);
+ size =
+ piece.outSecOff + string.size() + 1; // account for null terminator
+ },
+ /*forceInputOrder=*/false, /*computeHash=*/true);
+ for (CStringInputSection *isec : inputs)
isec->isFinal = true;
- }
}
void DeduplicatedCStringSection::finalizeContents() {
@@ -1748,20 +1746,19 @@ void DeduplicatedCStringSection::finalizeContents() {
DenseMap<CachedHashStringRef, Align> strToAlignment;
// Used for tail merging only
std::vector<CachedHashStringRef> deduplicatedStrs;
- for (const CStringInputSection *isec : inputs) {
- for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) {
- if (!piece.live)
- continue;
- auto s = isec->getCachedHashStringRef(i);
- assert(isec->align != 0);
- auto align = getStringPieceAlignment(isec, piece);
- auto [it, wasInserted] = strToAlignment.try_emplace(s, align);
- if (config->tailMergeStrings && wasInserted)
- deduplicatedStrs.push_back(s);
- if (!wasInserted && it->second < align)
- it->second = align;
- }
- }
+ priorityBuilder.forEachStringPiece(
+ inputs,
+ [&](CStringInputSection &isec, StringPiece &piece, size_t pieceIdx) {
+ auto s = isec.getCachedHashStringRef(pieceIdx);
+ assert(isec.align != 0);
+ auto align = getStringPieceAlignment(isec, piece);
+ auto [it, wasInserted] = strToAlignment.try_emplace(s, align);
+ if (config->tailMergeStrings && wasInserted)
+ deduplicatedStrs.push_back(s);
+ if (!wasInserted && it->second < align)
+ it->second = align;
+ },
+ /*forceInputOrder=*/true);
// Like lexigraphical sort, except we read strings in reverse and take the
// longest string first
@@ -1801,9 +1798,10 @@ void DeduplicatedCStringSection::finalizeContents() {
// Sort the strings for performance and compression size win, and then
// assign an offset for each string and save it to the corresponding
// StringPieces for easy access.
- for (auto &[isec, i] : priorityBuilder.buildCStringPriorities(inputs)) {
- auto &piece = isec->pieces[i];
- auto s = isec->getCachedHashStringRef(i);
+ priorityBuilder.forEachStringPiece(inputs, [&](CStringInputSection &isec,
+ StringPiece &piece,
+ size_t pieceIdx) {
+ auto s = isec.getCachedHashStringRef(pieceIdx);
// Any string can be tail merged with itself with an offset of zero
uint64_t tailMergeOffset = 0;
auto mergeIt =
@@ -1829,7 +1827,7 @@ void DeduplicatedCStringSection::finalizeContents() {
stringOffsetMap[tailMergedString] = piece.outSecOff;
assert(isAligned(strToAlignment.at(tailMergedString), piece.outSecOff));
}
- }
+ });
for (CStringInputSection *isec : inputs)
isec->isFinal = true;
}
diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp
index 6e9f6c2..bf01b12 100644
--- a/lld/MachO/UnwindInfoSection.cpp
+++ b/lld/MachO/UnwindInfoSection.cpp
@@ -153,8 +153,6 @@ private:
// The entries here will be in the same order as their originating symbols
// in symbolsVec.
std::vector<CompactUnwindEntry> cuEntries;
- // Indices into the cuEntries vector.
- std::vector<size_t> cuIndices;
std::vector<Symbol *> personalities;
SmallDenseMap<std::pair<InputSection *, uint64_t /* addend */>, Symbol *>
personalityTable;
@@ -400,8 +398,7 @@ void UnwindInfoSectionImpl::relocateCompactUnwind(
// There should only be a handful of unique personality pointers, so we can
// encode them as 2-bit indices into a small array.
void UnwindInfoSectionImpl::encodePersonalities() {
- for (size_t idx : cuIndices) {
- CompactUnwindEntry &cu = cuEntries[idx];
+ for (CompactUnwindEntry &cu : cuEntries) {
if (cu.personality == nullptr)
continue;
// Linear search is fast enough for a small array.
@@ -467,27 +464,24 @@ void UnwindInfoSectionImpl::finalize() {
symbolsVec = symbols.takeVector();
relocateCompactUnwind(cuEntries);
- // Rather than sort & fold the 32-byte entries directly, we create a
- // vector of indices to entries and sort & fold that instead.
- cuIndices.resize(cuEntries.size());
- std::iota(cuIndices.begin(), cuIndices.end(), 0);
- llvm::sort(cuIndices, [&](size_t a, size_t b) {
- return cuEntries[a].functionAddress < cuEntries[b].functionAddress;
+ // Sort the entries by address.
+ llvm::sort(cuEntries, [&](auto &a, auto &b) {
+ return a.functionAddress < b.functionAddress;
});
// Record the ending boundary before we fold the entries.
- cueEndBoundary = cuEntries[cuIndices.back()].functionAddress +
- cuEntries[cuIndices.back()].functionLength;
+ cueEndBoundary =
+ cuEntries.back().functionAddress + cuEntries.back().functionLength;
// Fold adjacent entries with matching encoding+personality and without LSDA
- // We use three iterators on the same cuIndices to fold in-situ:
+ // We use three iterators to fold in-situ:
// (1) `foldBegin` is the first of a potential sequence of matching entries
// (2) `foldEnd` is the first non-matching entry after `foldBegin`.
// The semi-open interval [ foldBegin .. foldEnd ) contains a range
// entries that can be folded into a single entry and written to ...
// (3) `foldWrite`
- auto foldWrite = cuIndices.begin();
- for (auto foldBegin = cuIndices.begin(); foldBegin < cuIndices.end();) {
+ auto foldWrite = cuEntries.begin();
+ for (auto foldBegin = cuEntries.begin(); foldBegin != cuEntries.end();) {
auto foldEnd = foldBegin;
// Common LSDA encodings (e.g. for C++ and Objective-C) contain offsets from
// a base address. The base address is normally not contained directly in
@@ -503,9 +497,9 @@ void UnwindInfoSectionImpl::finalize() {
// directly in the LSDA, two functions at different addresses would
// necessarily have different LSDAs, so their CU entries would not have been
// folded anyway.
- while (++foldEnd < cuIndices.end() &&
- cuEntries[*foldBegin].encoding == cuEntries[*foldEnd].encoding &&
- !cuEntries[*foldBegin].lsda && !cuEntries[*foldEnd].lsda &&
+ while (++foldEnd != cuEntries.end() &&
+ foldBegin->encoding == foldEnd->encoding && !foldBegin->lsda &&
+ !foldEnd->lsda &&
// If we've gotten to this point, we don't have an LSDA, which should
// also imply that we don't have a personality function, since in all
// likelihood a personality function needs the LSDA to do anything
@@ -513,21 +507,20 @@ void UnwindInfoSectionImpl::finalize() {
// and no LSDA though (e.g. the C++ personality __gxx_personality_v0
// is just a no-op without LSDA), so we still check for personality
// function equivalence to handle that case.
- cuEntries[*foldBegin].personality ==
- cuEntries[*foldEnd].personality &&
- canFoldEncoding(cuEntries[*foldEnd].encoding))
+ foldBegin->personality == foldEnd->personality &&
+ canFoldEncoding(foldEnd->encoding))
;
*foldWrite++ = *foldBegin;
foldBegin = foldEnd;
}
- cuIndices.erase(foldWrite, cuIndices.end());
+ cuEntries.erase(foldWrite, cuEntries.end());
encodePersonalities();
// Count frequencies of the folded encodings
EncodingMap encodingFrequencies;
- for (size_t idx : cuIndices)
- encodingFrequencies[cuEntries[idx].encoding]++;
+ for (const CompactUnwindEntry &cu : cuEntries)
+ encodingFrequencies[cu.encoding]++;
// Make a vector of encodings, sorted by descending frequency
for (const auto &frequency : encodingFrequencies)
@@ -558,21 +551,19 @@ void UnwindInfoSectionImpl::finalize() {
// and 127..255 references a local per-second-level-page table.
// First we try the compact format and determine how many entries fit.
// If more entries fit in the regular format, we use that.
- for (size_t i = 0; i < cuIndices.size();) {
- size_t idx = cuIndices[i];
+ for (size_t i = 0; i < cuEntries.size();) {
secondLevelPages.emplace_back();
SecondLevelPage &page = secondLevelPages.back();
page.entryIndex = i;
uint64_t functionAddressMax =
- cuEntries[idx].functionAddress + COMPRESSED_ENTRY_FUNC_OFFSET_MASK;
+ cuEntries[i].functionAddress + COMPRESSED_ENTRY_FUNC_OFFSET_MASK;
size_t n = commonEncodings.size();
size_t wordsRemaining =
SECOND_LEVEL_PAGE_WORDS -
sizeof(unwind_info_compressed_second_level_page_header) /
sizeof(uint32_t);
- while (wordsRemaining >= 1 && i < cuIndices.size()) {
- idx = cuIndices[i];
- const CompactUnwindEntry *cuPtr = &cuEntries[idx];
+ while (wordsRemaining >= 1 && i < cuEntries.size()) {
+ const CompactUnwindEntry *cuPtr = &cuEntries[i];
if (cuPtr->functionAddress >= functionAddressMax)
break;
if (commonEncodingIndexes.count(cuPtr->encoding) ||
@@ -593,21 +584,21 @@ void UnwindInfoSectionImpl::finalize() {
// If this is not the final page, see if it's possible to fit more entries
// by using the regular format. This can happen when there are many unique
// encodings, and we saturated the local encoding table early.
- if (i < cuIndices.size() &&
+ if (i < cuEntries.size() &&
page.entryCount < REGULAR_SECOND_LEVEL_ENTRIES_MAX) {
page.kind = UNWIND_SECOND_LEVEL_REGULAR;
page.entryCount = std::min(REGULAR_SECOND_LEVEL_ENTRIES_MAX,
- cuIndices.size() - page.entryIndex);
+ cuEntries.size() - page.entryIndex);
i = page.entryIndex + page.entryCount;
} else {
page.kind = UNWIND_SECOND_LEVEL_COMPRESSED;
}
}
- for (size_t idx : cuIndices) {
- lsdaIndex[idx] = entriesWithLsda.size();
- if (cuEntries[idx].lsda)
- entriesWithLsda.push_back(idx);
+ for (size_t i = 0; i < cuEntries.size(); ++i) {
+ lsdaIndex[i] = entriesWithLsda.size();
+ if (cuEntries[i].lsda)
+ entriesWithLsda.push_back(i);
}
// compute size of __TEXT,__unwind_info section
@@ -626,7 +617,7 @@ void UnwindInfoSectionImpl::finalize() {
// All inputs are relocated and output addresses are known, so write!
void UnwindInfoSectionImpl::writeTo(uint8_t *buf) const {
- assert(!cuIndices.empty() && "call only if there is unwind info");
+ assert(!cuEntries.empty() && "call only if there is unwind info");
// section header
auto *uip = reinterpret_cast<unwind_info_section_header *>(buf);
@@ -660,7 +651,7 @@ void UnwindInfoSectionImpl::writeTo(uint8_t *buf) const {
uint64_t l2PagesOffset = level2PagesOffset;
auto *iep = reinterpret_cast<unwind_info_section_header_index_entry *>(i32p);
for (const SecondLevelPage &page : secondLevelPages) {
- size_t idx = cuIndices[page.entryIndex];
+ size_t idx = page.entryIndex;
iep->functionOffset = cuEntries[idx].functionAddress - in.header->addr;
iep->secondLevelPagesSectionOffset = l2PagesOffset;
iep->lsdaIndexArraySectionOffset =
@@ -695,7 +686,7 @@ void UnwindInfoSectionImpl::writeTo(uint8_t *buf) const {
for (const SecondLevelPage &page : secondLevelPages) {
if (page.kind == UNWIND_SECOND_LEVEL_COMPRESSED) {
uintptr_t functionAddressBase =
- cuEntries[cuIndices[page.entryIndex]].functionAddress;
+ cuEntries[page.entryIndex].functionAddress;
auto *p2p =
reinterpret_cast<unwind_info_compressed_second_level_page_header *>(
pp);
@@ -708,8 +699,7 @@ void UnwindInfoSectionImpl::writeTo(uint8_t *buf) const {
p2p->encodingsCount = page.localEncodings.size();
auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]);
for (size_t i = 0; i < page.entryCount; i++) {
- const CompactUnwindEntry &cue =
- cuEntries[cuIndices[page.entryIndex + i]];
+ const CompactUnwindEntry &cue = cuEntries[page.entryIndex + i];
auto it = commonEncodingIndexes.find(cue.encoding);
if (it == commonEncodingIndexes.end())
it = page.localEncodingIndexes.find(cue.encoding);
@@ -728,8 +718,7 @@ void UnwindInfoSectionImpl::writeTo(uint8_t *buf) const {
p2p->entryCount = page.entryCount;
auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]);
for (size_t i = 0; i < page.entryCount; i++) {
- const CompactUnwindEntry &cue =
- cuEntries[cuIndices[page.entryIndex + i]];
+ const CompactUnwindEntry &cue = cuEntries[page.entryIndex + i];
*ep++ = cue.functionAddress;
*ep++ = cue.encoding;
}