diff options
Diffstat (limited to 'lld/MachO')
| -rw-r--r-- | lld/MachO/Arch/X86_64.cpp | 3 | ||||
| -rw-r--r-- | lld/MachO/ConcatOutputSection.cpp | 6 | ||||
| -rw-r--r-- | lld/MachO/Config.h | 1 | ||||
| -rw-r--r-- | lld/MachO/Driver.cpp | 54 | ||||
| -rw-r--r-- | lld/MachO/ICF.cpp | 63 | ||||
| -rw-r--r-- | lld/MachO/InputFiles.cpp | 18 | ||||
| -rw-r--r-- | lld/MachO/Options.td | 8 | ||||
| -rw-r--r-- | lld/MachO/SectionPriorities.cpp | 130 | ||||
| -rw-r--r-- | lld/MachO/SectionPriorities.h | 28 | ||||
| -rw-r--r-- | lld/MachO/SymbolTable.cpp | 35 | ||||
| -rw-r--r-- | lld/MachO/SyntheticSections.cpp | 62 | ||||
| -rw-r--r-- | lld/MachO/UnwindInfoSection.cpp | 75 |
12 files changed, 275 insertions, 208 deletions
diff --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp index 111c4d9..f351399 100644 --- a/lld/MachO/Arch/X86_64.cpp +++ b/lld/MachO/Arch/X86_64.cpp @@ -54,7 +54,8 @@ static constexpr std::array<RelocAttrs, 10> relocAttrsArray{{ {"UNSIGNED", B(UNSIGNED) | B(ABSOLUTE) | B(EXTERN) | B(LOCAL) | B(BYTE1) | B(BYTE4) | B(BYTE8)}, {"SIGNED", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)}, - {"BRANCH", B(PCREL) | B(EXTERN) | B(BRANCH) | B(BYTE1) | B(BYTE4)}, + {"BRANCH", + B(PCREL) | B(EXTERN) | B(LOCAL) | B(BRANCH) | B(BYTE1) | B(BYTE4)}, {"GOT_LOAD", B(PCREL) | B(EXTERN) | B(GOT) | B(LOAD) | B(BYTE4)}, {"GOT", B(PCREL) | B(EXTERN) | B(GOT) | B(POINTER) | B(BYTE4)}, {"SUBTRACTOR", B(SUBTRAHEND) | B(EXTERN) | B(BYTE4) | B(BYTE8)}, diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp index 8067d23..e559676 100644 --- a/lld/MachO/ConcatOutputSection.cpp +++ b/lld/MachO/ConcatOutputSection.cpp @@ -306,7 +306,7 @@ void TextOutputSection::finalize() { // contains several branch instructions in succession, then the distance // from the current position to the position where the thunks are inserted // grows. So leave room for a bunch of thunks. - unsigned slop = 256 * thunkSize; + unsigned slop = config->slopScale * thunkSize; while (finalIdx < endIdx) { uint64_t expectedNewSize = alignToPowerOf2(addr + size, inputs[finalIdx]->align) + @@ -384,7 +384,9 @@ void TextOutputSection::finalize() { // above. If you hit this: For the current algorithm, just bumping up // slop above and trying again is probably simplest. (See also PR51578 // comment 5). - fatal(Twine(__FUNCTION__) + ": FIXME: thunk range overrun"); + fatal(Twine(__FUNCTION__) + + ": FIXME: thunk range overrun. Consider increasing the " + "slop-scale with `--slop-scale=<unsigned_int>`."); } thunkInfo.isec = makeSyntheticInputSection(isec->getSegName(), isec->getName()); diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h index a2ca577..759a8cb 100644 --- a/lld/MachO/Config.h +++ b/lld/MachO/Config.h @@ -224,6 +224,7 @@ struct Configuration { bool disableVerify; bool separateCstringLiteralSections; bool tailMergeStrings; + unsigned slopScale = 256; bool callGraphProfileSort = false; llvm::StringRef printSymbolOrder; diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index 32b2099..f4f3aba 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -41,6 +41,7 @@ #include "llvm/Object/Archive.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Parallel.h" #include "llvm/Support/Path.h" @@ -53,6 +54,10 @@ #include "llvm/TextAPI/Architecture.h" #include "llvm/TextAPI/PackedVersion.h" +#if !_WIN32 +#include <sys/mman.h> +#endif + using namespace llvm; using namespace llvm::MachO; using namespace llvm::object; @@ -292,12 +297,13 @@ struct DeferredFile { using DeferredFiles = std::vector<DeferredFile>; #if LLVM_ENABLE_THREADS -class SerialBackgroundQueue { +class SerialBackgroundWorkQueue { std::deque<std::function<void()>> queue; std::thread *running; std::mutex mutex; public: + std::atomic_bool stopAllWork = false; void queueWork(std::function<void()> work) { mutex.lock(); if (running && queue.empty()) { @@ -312,7 +318,7 @@ public: queue.emplace_back(std::move(work)); if (!running) running = new std::thread([&]() { - while (true) { + while (!stopAllWork) { mutex.lock(); if (queue.empty()) { mutex.unlock(); @@ -331,6 +337,8 @@ public: } }; +static SerialBackgroundWorkQueue pageInQueue; + // Most input files have been mapped but not yet paged in. // This code forces the page-ins on multiple threads so // the process is not stalled waiting on disk buffer i/o. @@ -339,8 +347,8 @@ void multiThreadedPageInBackground(DeferredFiles &deferred) { static const size_t largeArchive = 10 * 1024 * 1024; #ifndef NDEBUG using namespace std::chrono; - std::atomic_int numDeferedFilesTouched = 0; static std::atomic_uint64_t totalBytes = 0; + std::atomic_int numDeferedFilesAdvised = 0; auto t0 = high_resolution_clock::now(); #endif @@ -348,24 +356,34 @@ void multiThreadedPageInBackground(DeferredFiles &deferred) { const StringRef &buff = deferredFile.buffer.getBuffer(); if (buff.size() > largeArchive) return; + #ifndef NDEBUG totalBytes += buff.size(); - numDeferedFilesTouched += 1; + numDeferedFilesAdvised += 1; #endif - +#if _WIN32 // Reference all file's mmap'd pages to load them into memory. - for (const char *page = buff.data(), *end = page + buff.size(); page < end; - page += pageSize) { + for (const char *page = buff.data(), *end = page + buff.size(); + page < end && !pageInQueue.stopAllWork; page += pageSize) { [[maybe_unused]] volatile char t = *page; (void)t; } +#else +#define DEBUG_TYPE "lld-madvise" + auto aligned = + llvm::alignDown(reinterpret_cast<uintptr_t>(buff.data()), pageSize); + if (madvise((void *)aligned, buff.size(), MADV_WILLNEED) < 0) + LLVM_DEBUG(llvm::dbgs() << "madvise error: " << strerror(errno) << "\n"); +#undef DEBUG_TYPE +#endif }; + { // Create scope for waiting for the taskGroup std::atomic_size_t index = 0; llvm::parallel::TaskGroup taskGroup; for (int w = 0; w < config->readWorkers; w++) taskGroup.spawn([&index, &preloadDeferredFile, &deferred]() { - while (true) { + while (!pageInQueue.stopAllWork) { size_t localIndex = index.fetch_add(1); if (localIndex >= deferred.size()) break; @@ -373,17 +391,17 @@ void multiThreadedPageInBackground(DeferredFiles &deferred) { } }); } + #ifndef NDEBUG auto dt = high_resolution_clock::now() - t0; if (Process::GetEnv("LLD_MULTI_THREAD_PAGE")) llvm::dbgs() << "multiThreadedPageIn " << totalBytes << "/" - << numDeferedFilesTouched << "/" << deferred.size() << "/" + << numDeferedFilesAdvised << "/" << deferred.size() << "/" << duration_cast<milliseconds>(dt).count() / 1000. << "\n"; #endif } static void multiThreadedPageIn(const DeferredFiles &deferred) { - static SerialBackgroundQueue pageInQueue; pageInQueue.queueWork([=]() { DeferredFiles files = deferred; multiThreadedPageInBackground(files); @@ -489,7 +507,7 @@ static InputFile *processFile(std::optional<MemoryBufferRef> buffer, continue; } - if (archiveContents) + if (config->readWorkers && archiveContents) archiveContents->push_back({path, isLazy, *mb}); if (!hasObjCSection(*mb)) continue; @@ -1447,6 +1465,8 @@ static void createFiles(const InputArgList &args) { multiThreadedPageIn(archiveContents); for (auto *archive : archives) archive->addLazySymbols(); + + pageInQueue.stopAllWork = true; } #endif } @@ -1845,8 +1865,8 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS, "'"); config->readWorkers = workers; #else - error(arg->getSpelling() + - ": option unavailable because lld was not built with thread support"); + warn(arg->getSpelling() + + ": option unavailable because lld was not built with thread support"); #endif } if (auto *arg = args.getLastArg(OPT_threads_eq)) { @@ -1995,6 +2015,14 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS, OPT_no_separate_cstring_literal_sections, false); config->tailMergeStrings = args.hasFlag(OPT_tail_merge_strings, OPT_no_tail_merge_strings, false); + if (auto *arg = args.getLastArg(OPT_slop_scale_eq)) { + StringRef v(arg->getValue()); + unsigned slop = 0; + if (!llvm::to_integer(v, slop)) + error(arg->getSpelling() + + ": expected a non-negative integer, but got '" + v + "'"); + config->slopScale = slop; + } auto IncompatWithCGSort = [&](StringRef firstArgStr) { // Throw an error only if --call-graph-profile-sort is explicitly specified diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp index 7b31378c3..e0fc897 100644 --- a/lld/MachO/ICF.cpp +++ b/lld/MachO/ICF.cpp @@ -173,14 +173,37 @@ bool ICF::equalsConstant(const ConcatInputSection *ia, // a valid offset in the literal section. return isecA->getOffset(valueA) == isecB->getOffset(valueB) && ra.addend == rb.addend; - else { - assert(valueA == 0 && valueB == 0); - // For section relocs, we compare the content at the section offset. - return isecA->getOffset(ra.addend) == isecB->getOffset(rb.addend); - } + assert(valueA == 0 && valueB == 0); + // For section relocs, we compare the content at the section offset. + return isecA->getOffset(ra.addend) == isecB->getOffset(rb.addend); }; - return std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(), - f); + if (!llvm::equal(ia->relocs, ib->relocs, f)) + return false; + + // Check unwind info structural compatibility: if there are symbols with + // associated unwind info, check that both sections have compatible symbol + // layouts. For simplicity, we only attempt folding when all symbols are at + // offset zero within the section (which is typically the case with + // .subsections_via_symbols.) + auto hasUnwind = [](Defined *d) { return d->unwindEntry() != nullptr; }; + const auto *itA = llvm::find_if(ia->symbols, hasUnwind); + const auto *itB = llvm::find_if(ib->symbols, hasUnwind); + if (itA == ia->symbols.end()) + return itB == ib->symbols.end(); + if (itB == ib->symbols.end()) + return false; + const Defined *da = *itA; + const Defined *db = *itB; + if (da->value != 0 || db->value != 0) + return false; + auto isZero = [](Defined *d) { return d->value == 0; }; + // Since symbols are stored in order of value, and since we have already + // checked that da/db have value zero, we just need to do the isZero check on + // the subsequent symbols. + return std::find_if_not(std::next(itA), ia->symbols.end(), isZero) == + ia->symbols.end() && + std::find_if_not(std::next(itB), ib->symbols.end(), isZero) == + ib->symbols.end(); } // Compare the "moving" parts of two ConcatInputSections -- i.e. everything not @@ -217,31 +240,19 @@ bool ICF::equalsVariable(const ConcatInputSection *ia, } return isecA->icfEqClass[icfPass % 2] == isecB->icfEqClass[icfPass % 2]; }; - if (!std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(), f)) + if (!llvm::equal(ia->relocs, ib->relocs, f)) return false; - // If there are symbols with associated unwind info, check that the unwind - // info matches. For simplicity, we only handle the case where there are only - // symbols at offset zero within the section (which is typically the case with - // .subsections_via_symbols.) + // Compare unwind info equivalence classes. auto hasUnwind = [](Defined *d) { return d->unwindEntry() != nullptr; }; const auto *itA = llvm::find_if(ia->symbols, hasUnwind); - const auto *itB = llvm::find_if(ib->symbols, hasUnwind); if (itA == ia->symbols.end()) - return itB == ib->symbols.end(); - if (itB == ib->symbols.end()) - return false; + return true; const Defined *da = *itA; - const Defined *db = *itB; - if (da->unwindEntry()->icfEqClass[icfPass % 2] != - db->unwindEntry()->icfEqClass[icfPass % 2] || - da->value != 0 || db->value != 0) - return false; - auto isZero = [](Defined *d) { return d->value == 0; }; - return std::find_if_not(std::next(itA), ia->symbols.end(), isZero) == - ia->symbols.end() && - std::find_if_not(std::next(itB), ib->symbols.end(), isZero) == - ib->symbols.end(); + // equalsConstant() guarantees that both sections have unwind info. + const Defined *db = *llvm::find_if(ib->symbols, hasUnwind); + return da->unwindEntry()->icfEqClass[icfPass % 2] == + db->unwindEntry()->icfEqClass[icfPass % 2]; } // Find the first InputSection after BEGIN whose equivalence class differs diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp index 20e4a1d..81caef5 100644 --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -217,7 +217,8 @@ std::optional<MemoryBufferRef> macho::readFile(StringRef path) { if (entry != cachedReads.end()) return entry->second; - ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr = MemoryBuffer::getFile(path); + ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr = + MemoryBuffer::getFile(path, false, /*RequiresNullTerminator=*/false); if (std::error_code ec = mbOrErr.getError()) { error("cannot open " + path + ": " + ec.message()); return std::nullopt; @@ -594,8 +595,8 @@ void ObjFile::parseRelocations(ArrayRef<SectionHeader> sectionHeaders, // FIXME This logic was written around x86_64 behavior -- ARM64 doesn't // have pcrel section relocations. We may want to factor this out into // the arch-specific .cpp file. - assert(target->hasAttr(r.type, RelocAttrBits::BYTE4)); - referentOffset = sec.addr + relInfo.r_address + 4 + totalAddend - + referentOffset = sec.addr + relInfo.r_address + + (1ull << relInfo.r_length) + totalAddend - referentSecHead.addr; } else { // The addend for a non-pcrel relocation is its absolute address. @@ -808,6 +809,17 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders, continue; if ((sym.n_type & N_TYPE) == N_SECT) { + if (sym.n_sect == 0) { + fatal("section symbol " + StringRef(strtab + sym.n_strx) + " in " + + toString(this) + " has an invalid section index [0]"); + } + if (sym.n_sect > sections.size()) { + fatal("section symbol " + StringRef(strtab + sym.n_strx) + " in " + + toString(this) + " has an invalid section index [" + + Twine(static_cast<unsigned>(sym.n_sect)) + + "] greater than the total number of sections [" + + Twine(sections.size()) + "]"); + } Subsections &subsections = sections[sym.n_sect - 1]->subsections; // parseSections() may have chosen not to parse this section. if (subsections.empty()) diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td index be1a1cc..f5f7549 100644 --- a/lld/MachO/Options.td +++ b/lld/MachO/Options.td @@ -1095,6 +1095,14 @@ defm tail_merge_strings : BB<"tail-merge-strings", "Enable string tail merging", "Disable string tail merging to improve link-time performance">, Group<grp_rare>; +def slop_scale_eq + : Joined<["--"], "slop_scale=">, + MetaVarName<"<unsigned_int>">, + HelpText<"Specify the slop scale. Default value is 256. If your binary " + "has too many consecutive branch instructions resulting in " + "thunk-range overrun, then you need to increase this value to a " + "higher value, such as 512 or 1024, etc">, + Group<grp_rare>; def grp_deprecated : OptionGroup<"deprecated">, HelpText<"DEPRECATED">; diff --git a/lld/MachO/SectionPriorities.cpp b/lld/MachO/SectionPriorities.cpp index cf657aa..b652d1e 100644 --- a/lld/MachO/SectionPriorities.cpp +++ b/lld/MachO/SectionPriorities.cpp @@ -27,6 +27,7 @@ #include "llvm/Support/Path.h" #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/xxhash.h" #include <numeric> @@ -246,33 +247,45 @@ DenseMap<const InputSection *, int> CallGraphSort::run() { return orderMap; } -std::optional<int> -macho::PriorityBuilder::getSymbolOrCStringPriority(const StringRef key, - InputFile *f) { +void macho::PriorityBuilder::SymbolPriorityEntry::setPriority( + int priority, StringRef objectFile) { + if (!objectFile.empty()) + objectFiles.try_emplace(objectFile, priority); + else + anyObjectFile = std::min(anyObjectFile, priority); +} - auto it = priorities.find(key); - if (it == priorities.end()) - return std::nullopt; - const SymbolPriorityEntry &entry = it->second; +int macho::PriorityBuilder::SymbolPriorityEntry::getPriority( + const InputFile *f) const { if (!f) - return entry.anyObjectFile; + return anyObjectFile; // We don't use toString(InputFile *) here because it returns the full path // for object files, and we only want the basename. - StringRef filename; - if (f->archiveName.empty()) - filename = path::filename(f->getName()); - else - filename = saver().save(path::filename(f->archiveName) + "(" + - path::filename(f->getName()) + ")"); - return std::min(entry.objectFiles.lookup(filename), entry.anyObjectFile); + StringRef basename = path::filename(f->getName()); + StringRef filename = + f->archiveName.empty() + ? basename + : saver().save(path::filename(f->archiveName) + "(" + basename + ")"); + return std::min(objectFiles.lookup(filename), anyObjectFile); } std::optional<int> -macho::PriorityBuilder::getSymbolPriority(const Defined *sym) { +macho::PriorityBuilder::getCStringPriority(uint32_t hash, + const InputFile *f) const { + auto it = cStringPriorities.find(hash); + if (it == cStringPriorities.end()) + return std::nullopt; + return it->second.getPriority(f); +} + +std::optional<int> +macho::PriorityBuilder::getSymbolPriority(const Defined *sym) const { if (sym->isAbsolute()) return std::nullopt; - return getSymbolOrCStringPriority(utils::getRootSymbol(sym->getName()), - sym->isec()->getFile()); + auto it = priorities.find(utils::getRootSymbol(sym->getName())); + if (it == priorities.end()) + return std::nullopt; + return it->second.getPriority(sym->isec()->getFile()); } void macho::PriorityBuilder::extractCallGraphProfile() { @@ -307,7 +320,7 @@ void macho::PriorityBuilder::parseOrderFile(StringRef path) { int prio = std::numeric_limits<int>::min(); MemoryBufferRef mbref = *buffer; for (StringRef line : args::getLines(mbref)) { - StringRef objectFile, symbolOrCStrHash; + StringRef objectFile; line = line.take_until([](char c) { return c == '#'; }); // ignore comments line = line.ltrim(); @@ -338,22 +351,16 @@ void macho::PriorityBuilder::parseOrderFile(StringRef path) { } // The rest of the line is either <symbol name> or - // CStringEntryPrefix<cstring hash> + // cStringEntryPrefix<cstring hash> line = line.trim(); - if (line.starts_with(CStringEntryPrefix)) { - StringRef possibleHash = line.drop_front(CStringEntryPrefix.size()); + if (line.consume_front(cStringEntryPrefix)) { uint32_t hash = 0; - if (to_integer(possibleHash, hash)) - symbolOrCStrHash = possibleHash; - } else - symbolOrCStrHash = utils::getRootSymbol(line); - - if (!symbolOrCStrHash.empty()) { - SymbolPriorityEntry &entry = priorities[symbolOrCStrHash]; - if (!objectFile.empty()) - entry.objectFiles.insert(std::make_pair(objectFile, prio)); - else - entry.anyObjectFile = std::min(entry.anyObjectFile, prio); + if (to_integer(line, hash)) + cStringPriorities[hash].setPriority(prio, objectFile); + } else { + StringRef symbol = utils::getRootSymbol(line); + if (!symbol.empty()) + priorities[symbol].setPriority(prio, objectFile); } ++prio; @@ -405,40 +412,39 @@ macho::PriorityBuilder::buildInputSectionPriorities() { return sectionPriorities; } -std::vector<StringPiecePair> macho::PriorityBuilder::buildCStringPriorities( - ArrayRef<CStringInputSection *> inputs) { - // Split the input strings into hold and cold sets. - // Order hot set based on -order_file_cstring for performance improvement; - // TODO: Order cold set of cstrings for compression via BP. - std::vector<std::pair<int, StringPiecePair>> - hotStringPrioritiesAndStringPieces; - std::vector<StringPiecePair> coldStringPieces; - std::vector<StringPiecePair> orderedStringPieces; - +void macho::PriorityBuilder::forEachStringPiece( + ArrayRef<CStringInputSection *> inputs, + std::function<void(CStringInputSection &, StringPiece &, size_t)> f, + bool forceInputOrder, bool computeHash) const { + std::vector<std::tuple<int, CStringInputSection *, size_t>> orderedPieces; + std::vector<std::pair<CStringInputSection *, size_t>> unorderedPieces; for (CStringInputSection *isec : inputs) { for (const auto &[stringPieceIdx, piece] : llvm::enumerate(isec->pieces)) { if (!piece.live) continue; - - std::optional<int> priority = getSymbolOrCStringPriority( - std::to_string(piece.hash), isec->getFile()); - if (!priority) - coldStringPieces.emplace_back(isec, stringPieceIdx); + // Process pieces in input order if we have no cstrings in our orderfile + if (forceInputOrder || cStringPriorities.empty()) { + f(*isec, piece, stringPieceIdx); + continue; + } + uint32_t hash = + computeHash + ? (xxh3_64bits(isec->getStringRef(stringPieceIdx)) & 0x7fffffff) + : piece.hash; + if (auto priority = getCStringPriority(hash, isec->getFile())) + orderedPieces.emplace_back(*priority, isec, stringPieceIdx); else - hotStringPrioritiesAndStringPieces.emplace_back( - *priority, std::make_pair(isec, stringPieceIdx)); + unorderedPieces.emplace_back(isec, stringPieceIdx); } } - - // Order hot set for perf - llvm::stable_sort(hotStringPrioritiesAndStringPieces); - for (auto &[priority, stringPiecePair] : hotStringPrioritiesAndStringPieces) - orderedStringPieces.push_back(stringPiecePair); - - // TODO: Order cold set for compression - - orderedStringPieces.insert(orderedStringPieces.end(), - coldStringPieces.begin(), coldStringPieces.end()); - - return orderedStringPieces; + if (orderedPieces.empty() && unorderedPieces.empty()) + return; + llvm::stable_sort(orderedPieces, [](const auto &left, const auto &right) { + return std::get<0>(left) < std::get<0>(right); + }); + for (auto &[priority, isec, pieceIdx] : orderedPieces) + f(*isec, isec->pieces[pieceIdx], pieceIdx); + // TODO: Add option to order the remaining cstrings for compression + for (auto &[isec, pieceIdx] : unorderedPieces) + f(*isec, isec->pieces[pieceIdx], pieceIdx); } diff --git a/lld/MachO/SectionPriorities.h b/lld/MachO/SectionPriorities.h index cc4e30f..24d2dbc 100644 --- a/lld/MachO/SectionPriorities.h +++ b/lld/MachO/SectionPriorities.h @@ -16,7 +16,6 @@ namespace lld::macho { using SectionPair = std::pair<const InputSection *, const InputSection *>; -using StringPiecePair = std::pair<CStringInputSection *, size_t>; class PriorityBuilder { public: @@ -29,7 +28,7 @@ public: // // An order file has one entry per line, in the following format: // - // <cpu>:<object file>:[<symbol name> | CStringEntryPrefix <cstring hash>] + // <cpu>:<object file>:[<symbol name> | cStringEntryPrefix <cstring hash>] // // <cpu> and <object file> are optional. // If not specified, then that entry tries to match either, @@ -42,7 +41,7 @@ public: // lowest-ordered entry (the one nearest to the front of the list.) // // or 2) any cstring literal with the given hash, if the entry has the - // CStringEntryPrefix prefix defined below in the file. <cstring hash> is the + // cStringEntryPrefix prefix defined below in the file. <cstring hash> is the // hash of cstring literal content. // // Cstring literals are not symbolized, we can't identify them by name @@ -54,6 +53,16 @@ public: // The file can also have line comments that start with '#'. void parseOrderFile(StringRef path); + /// Call \p f for each string piece in \p inputs. If there are any cstring + /// literals in the orderfile (and \p forceInputOrder is false) then string + /// pieces are ordered by the orderfile. \p computeHash must be set when + /// \p deduplicateLiterals is false because then the string piece hash is not + /// set. + void forEachStringPiece( + ArrayRef<CStringInputSection *> inputs, + std::function<void(CStringInputSection &, StringPiece &, size_t)> f, + bool forceInputOrder = false, bool computeHash = false) const; + // Returns layout priorities for some or all input sections. Sections are laid // out in decreasing order; that is, a higher priority section will be closer // to the beginning of its output section. @@ -66,8 +75,6 @@ public: // Each section gets assigned the priority of the highest-priority symbol it // contains. llvm::DenseMap<const InputSection *, int> buildInputSectionPriorities(); - std::vector<StringPiecePair> - buildCStringPriorities(ArrayRef<CStringInputSection *>); private: // The symbol with the smallest priority should be ordered first in the output @@ -78,13 +85,16 @@ private: int anyObjectFile = 0; // The priority given to a matching symbol from a particular object file. llvm::DenseMap<llvm::StringRef, int> objectFiles; + void setPriority(int priority, StringRef objectFile); + int getPriority(const InputFile *f) const; }; - const llvm::StringRef CStringEntryPrefix = "CSTR;"; + const llvm::StringRef cStringEntryPrefix = "CSTR;"; - std::optional<int> getSymbolPriority(const Defined *sym); - std::optional<int> getSymbolOrCStringPriority(const StringRef key, - InputFile *f); + std::optional<int> getSymbolPriority(const Defined *sym) const; + std::optional<int> getCStringPriority(uint32_t hash, + const InputFile *f) const; llvm::DenseMap<llvm::StringRef, SymbolPriorityEntry> priorities; + llvm::DenseMap<uint32_t, SymbolPriorityEntry> cStringPriorities; llvm::MapVector<SectionPair, uint64_t> callGraphProfile; }; diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp index baddddc..a7db5a3a 100644 --- a/lld/MachO/SymbolTable.cpp +++ b/lld/MachO/SymbolTable.cpp @@ -61,8 +61,8 @@ struct DuplicateSymbolDiag { SmallVector<DuplicateSymbolDiag> dupSymDiags; } // namespace -// Move symbols at \p fromOff in \p fromIsec into \p toIsec, unless that symbol -// is \p skip. +// Move local symbols at \p fromOff in \p fromIsec into \p toIsec, unless that +// symbol is \p skip, in which case we just remove it. static void transplantSymbolsAtOffset(InputSection *fromIsec, InputSection *toIsec, Defined *skip, uint64_t fromOff, uint64_t toOff) { @@ -78,22 +78,23 @@ static void transplantSymbolsAtOffset(InputSection *fromIsec, auto insertIt = llvm::upper_bound(toIsec->symbols, toOff, symSucceedsOff); llvm::erase_if(fromIsec->symbols, [&](Symbol *s) { auto *d = cast<Defined>(s); - if (d->value != fromOff) + if (d == skip) + return true; + if (d->value != fromOff || d->isExternal()) return false; - if (d != skip) { - // This repeated insertion will be quadratic unless insertIt is the end - // iterator. However, that is typically the case for files that have - // .subsections_via_symbols set. - insertIt = toIsec->symbols.insert(insertIt, d); - d->originalIsec = toIsec; - d->value = toOff; - // We don't want to have more than one unwindEntry at a given address, so - // drop the redundant ones. We We can safely drop the unwindEntries of - // the symbols in fromIsec since we will be adding another unwindEntry as - // we finish parsing toIsec's file. (We can assume that toIsec has its - // own unwindEntry because of the ODR.) - d->originalUnwindEntry = nullptr; - } + + // This repeated insertion will be quadratic unless insertIt is the end + // iterator. However, that is typically the case for files that have + // .subsections_via_symbols set. + insertIt = toIsec->symbols.insert(insertIt, d); + d->originalIsec = toIsec; + d->value = toOff; + // We don't want to have more than one unwindEntry at a given address, so + // drop the redundant ones. We can safely drop the unwindEntries of the + // symbols in fromIsec since we will be adding another unwindEntry as we + // finish parsing toIsec's file. (We can assume that toIsec has its own + // unwindEntry because of the ODR.) + d->originalUnwindEntry = nullptr; return true; }); } diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index 187cccb..fecc51f 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -1721,26 +1721,24 @@ void CStringSection::writeTo(uint8_t *buf) const { // and don't need this alignment. They will be emitted at some arbitrary address // `A`, but ld64 will treat them as being 16-byte aligned with an offset of // `16 % A`. -static Align getStringPieceAlignment(const CStringInputSection *isec, +static Align getStringPieceAlignment(const CStringInputSection &isec, const StringPiece &piece) { - return llvm::Align(1ULL << llvm::countr_zero(isec->align | piece.inSecOff)); + return llvm::Align(1ULL << llvm::countr_zero(isec.align | piece.inSecOff)); } void CStringSection::finalizeContents() { size = 0; - // TODO: Call buildCStringPriorities() to support cstring ordering when - // deduplication is off, although this may negatively impact build - // performance. - for (CStringInputSection *isec : inputs) { - for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) { - if (!piece.live) - continue; - piece.outSecOff = alignTo(size, getStringPieceAlignment(isec, piece)); - StringRef string = isec->getStringRef(i); - size = piece.outSecOff + string.size() + 1; // account for null terminator - } + priorityBuilder.forEachStringPiece( + inputs, + [&](CStringInputSection &isec, StringPiece &piece, size_t pieceIdx) { + piece.outSecOff = alignTo(size, getStringPieceAlignment(isec, piece)); + StringRef string = isec.getStringRef(pieceIdx); + size = + piece.outSecOff + string.size() + 1; // account for null terminator + }, + /*forceInputOrder=*/false, /*computeHash=*/true); + for (CStringInputSection *isec : inputs) isec->isFinal = true; - } } void DeduplicatedCStringSection::finalizeContents() { @@ -1748,20 +1746,19 @@ void DeduplicatedCStringSection::finalizeContents() { DenseMap<CachedHashStringRef, Align> strToAlignment; // Used for tail merging only std::vector<CachedHashStringRef> deduplicatedStrs; - for (const CStringInputSection *isec : inputs) { - for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) { - if (!piece.live) - continue; - auto s = isec->getCachedHashStringRef(i); - assert(isec->align != 0); - auto align = getStringPieceAlignment(isec, piece); - auto [it, wasInserted] = strToAlignment.try_emplace(s, align); - if (config->tailMergeStrings && wasInserted) - deduplicatedStrs.push_back(s); - if (!wasInserted && it->second < align) - it->second = align; - } - } + priorityBuilder.forEachStringPiece( + inputs, + [&](CStringInputSection &isec, StringPiece &piece, size_t pieceIdx) { + auto s = isec.getCachedHashStringRef(pieceIdx); + assert(isec.align != 0); + auto align = getStringPieceAlignment(isec, piece); + auto [it, wasInserted] = strToAlignment.try_emplace(s, align); + if (config->tailMergeStrings && wasInserted) + deduplicatedStrs.push_back(s); + if (!wasInserted && it->second < align) + it->second = align; + }, + /*forceInputOrder=*/true); // Like lexigraphical sort, except we read strings in reverse and take the // longest string first @@ -1801,9 +1798,10 @@ void DeduplicatedCStringSection::finalizeContents() { // Sort the strings for performance and compression size win, and then // assign an offset for each string and save it to the corresponding // StringPieces for easy access. - for (auto &[isec, i] : priorityBuilder.buildCStringPriorities(inputs)) { - auto &piece = isec->pieces[i]; - auto s = isec->getCachedHashStringRef(i); + priorityBuilder.forEachStringPiece(inputs, [&](CStringInputSection &isec, + StringPiece &piece, + size_t pieceIdx) { + auto s = isec.getCachedHashStringRef(pieceIdx); // Any string can be tail merged with itself with an offset of zero uint64_t tailMergeOffset = 0; auto mergeIt = @@ -1829,7 +1827,7 @@ void DeduplicatedCStringSection::finalizeContents() { stringOffsetMap[tailMergedString] = piece.outSecOff; assert(isAligned(strToAlignment.at(tailMergedString), piece.outSecOff)); } - } + }); for (CStringInputSection *isec : inputs) isec->isFinal = true; } diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp index 6e9f6c2..bf01b12 100644 --- a/lld/MachO/UnwindInfoSection.cpp +++ b/lld/MachO/UnwindInfoSection.cpp @@ -153,8 +153,6 @@ private: // The entries here will be in the same order as their originating symbols // in symbolsVec. std::vector<CompactUnwindEntry> cuEntries; - // Indices into the cuEntries vector. - std::vector<size_t> cuIndices; std::vector<Symbol *> personalities; SmallDenseMap<std::pair<InputSection *, uint64_t /* addend */>, Symbol *> personalityTable; @@ -400,8 +398,7 @@ void UnwindInfoSectionImpl::relocateCompactUnwind( // There should only be a handful of unique personality pointers, so we can // encode them as 2-bit indices into a small array. void UnwindInfoSectionImpl::encodePersonalities() { - for (size_t idx : cuIndices) { - CompactUnwindEntry &cu = cuEntries[idx]; + for (CompactUnwindEntry &cu : cuEntries) { if (cu.personality == nullptr) continue; // Linear search is fast enough for a small array. @@ -467,27 +464,24 @@ void UnwindInfoSectionImpl::finalize() { symbolsVec = symbols.takeVector(); relocateCompactUnwind(cuEntries); - // Rather than sort & fold the 32-byte entries directly, we create a - // vector of indices to entries and sort & fold that instead. - cuIndices.resize(cuEntries.size()); - std::iota(cuIndices.begin(), cuIndices.end(), 0); - llvm::sort(cuIndices, [&](size_t a, size_t b) { - return cuEntries[a].functionAddress < cuEntries[b].functionAddress; + // Sort the entries by address. + llvm::sort(cuEntries, [&](auto &a, auto &b) { + return a.functionAddress < b.functionAddress; }); // Record the ending boundary before we fold the entries. - cueEndBoundary = cuEntries[cuIndices.back()].functionAddress + - cuEntries[cuIndices.back()].functionLength; + cueEndBoundary = + cuEntries.back().functionAddress + cuEntries.back().functionLength; // Fold adjacent entries with matching encoding+personality and without LSDA - // We use three iterators on the same cuIndices to fold in-situ: + // We use three iterators to fold in-situ: // (1) `foldBegin` is the first of a potential sequence of matching entries // (2) `foldEnd` is the first non-matching entry after `foldBegin`. // The semi-open interval [ foldBegin .. foldEnd ) contains a range // entries that can be folded into a single entry and written to ... // (3) `foldWrite` - auto foldWrite = cuIndices.begin(); - for (auto foldBegin = cuIndices.begin(); foldBegin < cuIndices.end();) { + auto foldWrite = cuEntries.begin(); + for (auto foldBegin = cuEntries.begin(); foldBegin != cuEntries.end();) { auto foldEnd = foldBegin; // Common LSDA encodings (e.g. for C++ and Objective-C) contain offsets from // a base address. The base address is normally not contained directly in @@ -503,9 +497,9 @@ void UnwindInfoSectionImpl::finalize() { // directly in the LSDA, two functions at different addresses would // necessarily have different LSDAs, so their CU entries would not have been // folded anyway. - while (++foldEnd < cuIndices.end() && - cuEntries[*foldBegin].encoding == cuEntries[*foldEnd].encoding && - !cuEntries[*foldBegin].lsda && !cuEntries[*foldEnd].lsda && + while (++foldEnd != cuEntries.end() && + foldBegin->encoding == foldEnd->encoding && !foldBegin->lsda && + !foldEnd->lsda && // If we've gotten to this point, we don't have an LSDA, which should // also imply that we don't have a personality function, since in all // likelihood a personality function needs the LSDA to do anything @@ -513,21 +507,20 @@ void UnwindInfoSectionImpl::finalize() { // and no LSDA though (e.g. the C++ personality __gxx_personality_v0 // is just a no-op without LSDA), so we still check for personality // function equivalence to handle that case. - cuEntries[*foldBegin].personality == - cuEntries[*foldEnd].personality && - canFoldEncoding(cuEntries[*foldEnd].encoding)) + foldBegin->personality == foldEnd->personality && + canFoldEncoding(foldEnd->encoding)) ; *foldWrite++ = *foldBegin; foldBegin = foldEnd; } - cuIndices.erase(foldWrite, cuIndices.end()); + cuEntries.erase(foldWrite, cuEntries.end()); encodePersonalities(); // Count frequencies of the folded encodings EncodingMap encodingFrequencies; - for (size_t idx : cuIndices) - encodingFrequencies[cuEntries[idx].encoding]++; + for (const CompactUnwindEntry &cu : cuEntries) + encodingFrequencies[cu.encoding]++; // Make a vector of encodings, sorted by descending frequency for (const auto &frequency : encodingFrequencies) @@ -558,21 +551,19 @@ void UnwindInfoSectionImpl::finalize() { // and 127..255 references a local per-second-level-page table. // First we try the compact format and determine how many entries fit. // If more entries fit in the regular format, we use that. - for (size_t i = 0; i < cuIndices.size();) { - size_t idx = cuIndices[i]; + for (size_t i = 0; i < cuEntries.size();) { secondLevelPages.emplace_back(); SecondLevelPage &page = secondLevelPages.back(); page.entryIndex = i; uint64_t functionAddressMax = - cuEntries[idx].functionAddress + COMPRESSED_ENTRY_FUNC_OFFSET_MASK; + cuEntries[i].functionAddress + COMPRESSED_ENTRY_FUNC_OFFSET_MASK; size_t n = commonEncodings.size(); size_t wordsRemaining = SECOND_LEVEL_PAGE_WORDS - sizeof(unwind_info_compressed_second_level_page_header) / sizeof(uint32_t); - while (wordsRemaining >= 1 && i < cuIndices.size()) { - idx = cuIndices[i]; - const CompactUnwindEntry *cuPtr = &cuEntries[idx]; + while (wordsRemaining >= 1 && i < cuEntries.size()) { + const CompactUnwindEntry *cuPtr = &cuEntries[i]; if (cuPtr->functionAddress >= functionAddressMax) break; if (commonEncodingIndexes.count(cuPtr->encoding) || @@ -593,21 +584,21 @@ void UnwindInfoSectionImpl::finalize() { // If this is not the final page, see if it's possible to fit more entries // by using the regular format. This can happen when there are many unique // encodings, and we saturated the local encoding table early. - if (i < cuIndices.size() && + if (i < cuEntries.size() && page.entryCount < REGULAR_SECOND_LEVEL_ENTRIES_MAX) { page.kind = UNWIND_SECOND_LEVEL_REGULAR; page.entryCount = std::min(REGULAR_SECOND_LEVEL_ENTRIES_MAX, - cuIndices.size() - page.entryIndex); + cuEntries.size() - page.entryIndex); i = page.entryIndex + page.entryCount; } else { page.kind = UNWIND_SECOND_LEVEL_COMPRESSED; } } - for (size_t idx : cuIndices) { - lsdaIndex[idx] = entriesWithLsda.size(); - if (cuEntries[idx].lsda) - entriesWithLsda.push_back(idx); + for (size_t i = 0; i < cuEntries.size(); ++i) { + lsdaIndex[i] = entriesWithLsda.size(); + if (cuEntries[i].lsda) + entriesWithLsda.push_back(i); } // compute size of __TEXT,__unwind_info section @@ -626,7 +617,7 @@ void UnwindInfoSectionImpl::finalize() { // All inputs are relocated and output addresses are known, so write! void UnwindInfoSectionImpl::writeTo(uint8_t *buf) const { - assert(!cuIndices.empty() && "call only if there is unwind info"); + assert(!cuEntries.empty() && "call only if there is unwind info"); // section header auto *uip = reinterpret_cast<unwind_info_section_header *>(buf); @@ -660,7 +651,7 @@ void UnwindInfoSectionImpl::writeTo(uint8_t *buf) const { uint64_t l2PagesOffset = level2PagesOffset; auto *iep = reinterpret_cast<unwind_info_section_header_index_entry *>(i32p); for (const SecondLevelPage &page : secondLevelPages) { - size_t idx = cuIndices[page.entryIndex]; + size_t idx = page.entryIndex; iep->functionOffset = cuEntries[idx].functionAddress - in.header->addr; iep->secondLevelPagesSectionOffset = l2PagesOffset; iep->lsdaIndexArraySectionOffset = @@ -695,7 +686,7 @@ void UnwindInfoSectionImpl::writeTo(uint8_t *buf) const { for (const SecondLevelPage &page : secondLevelPages) { if (page.kind == UNWIND_SECOND_LEVEL_COMPRESSED) { uintptr_t functionAddressBase = - cuEntries[cuIndices[page.entryIndex]].functionAddress; + cuEntries[page.entryIndex].functionAddress; auto *p2p = reinterpret_cast<unwind_info_compressed_second_level_page_header *>( pp); @@ -708,8 +699,7 @@ void UnwindInfoSectionImpl::writeTo(uint8_t *buf) const { p2p->encodingsCount = page.localEncodings.size(); auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]); for (size_t i = 0; i < page.entryCount; i++) { - const CompactUnwindEntry &cue = - cuEntries[cuIndices[page.entryIndex + i]]; + const CompactUnwindEntry &cue = cuEntries[page.entryIndex + i]; auto it = commonEncodingIndexes.find(cue.encoding); if (it == commonEncodingIndexes.end()) it = page.localEncodingIndexes.find(cue.encoding); @@ -728,8 +718,7 @@ void UnwindInfoSectionImpl::writeTo(uint8_t *buf) const { p2p->entryCount = page.entryCount; auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]); for (size_t i = 0; i < page.entryCount; i++) { - const CompactUnwindEntry &cue = - cuEntries[cuIndices[page.entryIndex + i]]; + const CompactUnwindEntry &cue = cuEntries[page.entryIndex + i]; *ep++ = cue.functionAddress; *ep++ = cue.encoding; } |
