diff options
author | Sam Clegg <sbc@chromium.org> | 2022-12-06 16:49:13 -0800 |
---|---|---|
committer | Tom Stellard <tstellar@redhat.com> | 2023-06-01 09:02:00 -0700 |
commit | 74b5a0af52eb5681d7897d161e0984dbf7b18702 (patch) | |
tree | 4afe6b660a4809982450dbdf3a3bfe61c9791ef2 | |
parent | 9c865c230791b111f3df550288ac4f2dc7cb1c51 (diff) | |
download | llvm-74b5a0af52eb5681d7897d161e0984dbf7b18702.zip llvm-74b5a0af52eb5681d7897d161e0984dbf7b18702.tar.gz llvm-74b5a0af52eb5681d7897d161e0984dbf7b18702.tar.bz2 |
[lld][WebAssembly] Initial support for stub libraries
See the docs in lld/docs/WebAssembly.rst for more on this.
This feature unlocks a lot of simplification in the emscripten toolchain
since we can represent the JS libraries to wasm-ld as stub libraries.
See https://github.com/emscripten-core/emscripten/issues/18875
Differential Revision: https://reviews.llvm.org/D145308
(cherry picked from commit 3111784ff7d3d51a9e981b1a0bbc8f6511c34d25)
-rw-r--r-- | lld/docs/WebAssembly.rst | 33 | ||||
-rw-r--r-- | lld/test/wasm/Inputs/libstub-missing-dep.so | 2 | ||||
-rw-r--r-- | lld/test/wasm/Inputs/libstub-missing-sym.so | 3 | ||||
-rw-r--r-- | lld/test/wasm/Inputs/libstub.so | 5 | ||||
-rw-r--r-- | lld/test/wasm/stub_library.s | 48 | ||||
-rw-r--r-- | lld/wasm/Driver.cpp | 55 | ||||
-rw-r--r-- | lld/wasm/InputFiles.cpp | 43 | ||||
-rw-r--r-- | lld/wasm/InputFiles.h | 13 | ||||
-rw-r--r-- | lld/wasm/Relocations.cpp | 4 | ||||
-rw-r--r-- | lld/wasm/SymbolTable.cpp | 7 | ||||
-rw-r--r-- | lld/wasm/SymbolTable.h | 1 | ||||
-rw-r--r-- | lld/wasm/Symbols.cpp | 4 | ||||
-rw-r--r-- | lld/wasm/Symbols.h | 7 | ||||
-rw-r--r-- | lld/wasm/Writer.cpp | 4 |
14 files changed, 224 insertions, 5 deletions
diff --git a/lld/docs/WebAssembly.rst b/lld/docs/WebAssembly.rst index c40d4b3..dad3177 100644 --- a/lld/docs/WebAssembly.rst +++ b/lld/docs/WebAssembly.rst @@ -75,6 +75,11 @@ WebAssembly-specific options: flag which corresponds to ``--unresolve-symbols=ignore`` + ``--import-undefined``. +.. option:: --allow-undefined-file=<filename> + + Like ``--allow-undefined``, but the filename specified a flat list of + symbols, one per line, which are allowed to be undefined. + .. option:: --unresolved-symbols=<method> This is a more full featured version of ``--allow-undefined``. @@ -182,11 +187,39 @@ Imports By default no undefined symbols are allowed in the final binary. The flag ``--allow-undefined`` results in a WebAssembly import being defined for each undefined symbol. It is then up to the runtime to provide such symbols. +``--allow-undefined-file`` is the same but allows a list of symbols to be +specified. Alternatively symbols can be marked in the source code as with the ``import_name`` and/or ``import_module`` clang attributes which signals that they are expected to be undefined at static link time. +Stub Libraries +~~~~~~~~~~~~~~ + +Another way to specify imports and exports is via a "stub library". This +feature is inspired by the ELF stub objects which are supported by the Solaris +linker. Stub libraries are text files that can be passed as normal linker +inputs, similar to how linker scripts can be passed to the ELF linker. The stub +library is a stand-in for a set of symbols that will be available at runtime, +but doesn't contain any actual code or data. Instead it contains just a list of +symbols, one per line. Each symbol can specify zero or more dependencies. +These dependencies are symbols that must be defined, and exported, by the output +module if the symbol is question is imported/required by the output module. + +For example, imagine the runtime provides an external symbol ``foo`` that +depends on the ``malloc`` and ``free``. This can be expressed simply as:: + + #STUB + foo: malloc,free + +Here we are saying that ``foo`` is allowed to be imported (undefined) but that +if it is imported, then the output module must also export ``malloc`` and +``free`` to the runtime. If ``foo`` is imported (undefined), but the output +module does not define ``malloc`` and ``free`` then the link will fail. + +Stub libraries must begin with ``#STUB`` on a line by itself. + Garbage Collection ~~~~~~~~~~~~~~~~~~ diff --git a/lld/test/wasm/Inputs/libstub-missing-dep.so b/lld/test/wasm/Inputs/libstub-missing-dep.so new file mode 100644 index 0000000..f2345b7 --- /dev/null +++ b/lld/test/wasm/Inputs/libstub-missing-dep.so @@ -0,0 +1,2 @@ +#STUB +foo: missing_dep,missing_dep2 diff --git a/lld/test/wasm/Inputs/libstub-missing-sym.so b/lld/test/wasm/Inputs/libstub-missing-sym.so new file mode 100644 index 0000000..2120b94 --- /dev/null +++ b/lld/test/wasm/Inputs/libstub-missing-sym.so @@ -0,0 +1,3 @@ +#STUB +# Symbol `foo` is missing from this file which causes stub_object.s to fail +bar diff --git a/lld/test/wasm/Inputs/libstub.so b/lld/test/wasm/Inputs/libstub.so new file mode 100644 index 0000000..57e61f6 --- /dev/null +++ b/lld/test/wasm/Inputs/libstub.so @@ -0,0 +1,5 @@ +#STUB +# This is a comment +foo: foodep1,foodep2 +# This symbols as no dependencies +bar diff --git a/lld/test/wasm/stub_library.s b/lld/test/wasm/stub_library.s new file mode 100644 index 0000000..9cbf250 --- /dev/null +++ b/lld/test/wasm/stub_library.s @@ -0,0 +1,48 @@ +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s +# RUN: wasm-ld %t.o %p/Inputs/libstub.so -o %t.wasm +# RUN: obj2yaml %t.wasm | FileCheck %s + +# When the dependencies are missing the link fails +# RUN: not wasm-ld %t.o %p/Inputs/libstub-missing-dep.so -o %t.wasm 2>&1 | FileCheck --check-prefix=MISSING-DEP %s + +# When the dependencies are missing the link fails +# RUN: not wasm-ld %t.o %p/Inputs/libstub-missing-sym.so -o %t.wasm 2>&1 | FileCheck --check-prefix=MISSING-SYM %s + +# MISSING-DEP: libstub-missing-dep.so: undefined symbol: missing_dep. Required by foo +# MISSING-DEP: libstub-missing-dep.so: undefined symbol: missing_dep2. Required by foo + +# MISSING-SYM: undefined symbol: foo + +# The function foo is defined in libstub.so but depend on foodep1 and foodep2 +.functype foo () -> () + +.globl foodep1 +foodep1: + .functype foodep1 () -> () + end_function + +.globl foodep2 +foodep2: + .functype foodep2 () -> () + end_function + +.globl _start +_start: + .functype _start () -> () + call foo + end_function + +# CHECK: - Type: EXPORT +# CHECK-NEXT: Exports: +# CHECK-NEXT: - Name: memory +# CHECK-NEXT: Kind: MEMORY +# CHECK-NEXT: Index: 0 +# CHECK-NEXT: - Name: foodep1 +# CHECK-NEXT: Kind: FUNCTION +# CHECK-NEXT: Index: 1 +# CHECK-NEXT: - Name: foodep2 +# CHECK-NEXT: Kind: FUNCTION +# CHECK-NEXT: Index: 2 +# CHECK-NEXT: - Name: _start +# CHECK-NEXT: Kind: FUNCTION +# CHECK-NEXT: Index: 3 diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index 79d9813..8f7ed8b 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -280,6 +280,12 @@ void LinkerDriver::addFile(StringRef path) { case file_magic::wasm_object: files.push_back(createObjectFile(mbref)); break; + case file_magic::unknown: + if (mbref.getBuffer().starts_with("#STUB\n")) { + files.push_back(make<StubFile>(mbref)); + break; + } + [[fallthrough]]; default: error("unknown file type: " + mbref.getBufferIdentifier()); } @@ -834,6 +840,53 @@ static void createOptionalSymbols() { WasmSym::tlsBase = createOptionalGlobal("__tls_base", false); } +static void processStubLibraries() { + log("-- processStubLibraries"); + for (auto &stub_file : symtab->stubFiles) { + LLVM_DEBUG(llvm::dbgs() + << "processing stub file: " << stub_file->getName() << "\n"); + for (auto [name, deps]: stub_file->symbolDependencies) { + auto* sym = symtab->find(name); + if (!sym || !sym->isUndefined() || !sym->isUsedInRegularObj || + sym->forceImport) { + LLVM_DEBUG(llvm::dbgs() << "stub not in needed: " << name << "\n"); + continue; + } + // The first stub library to define a given symbol sets this and + // definitions in later stub libraries are ignored. + sym->forceImport = true; + if (sym->traced) + message(toString(stub_file) + ": importing " + name); + else + LLVM_DEBUG(llvm::dbgs() + << toString(stub_file) << ": importing " << name << "\n"); + for (const auto dep : deps) { + auto* needed = symtab->find(dep); + if (!needed) { + error(toString(stub_file) + ": undefined symbol: " + dep + + ". Required by " + toString(*sym)); + } else if (needed->isUndefined()) { + error(toString(stub_file) + + ": undefined symbol: " + toString(*needed) + + ". Required by " + toString(*sym)); + } else { + LLVM_DEBUG(llvm::dbgs() + << "force export: " << toString(*needed) << "\n"); + needed->forceExport = true; + needed->isUsedInRegularObj = true; + if (auto *lazy = dyn_cast<LazySymbol>(needed)) { + lazy->fetch(); + if (!config->whyExtract.empty()) + config->whyExtractRecords.emplace_back(stub_file->getName(), + sym->getFile(), *sym); + } + } + } + } + } + log("-- done processStubLibraries"); +} + // Reconstructs command line arguments so that so that you can re-run // the same command with the same inputs. This is for --reproduce. static std::string createResponseFile(const opt::InputArgList &args) { @@ -1132,6 +1185,8 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) { if (errorCount()) return; + processStubLibraries(); + createOptionalSymbols(); // Resolve any variant symbols that were created due to signature diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp index e8a3701..3d4fe93 100644 --- a/lld/wasm/InputFiles.cpp +++ b/lld/wasm/InputFiles.cpp @@ -12,6 +12,7 @@ #include "InputElement.h" #include "OutputSegment.h" #include "SymbolTable.h" +#include "lld/Common/Args.h" #include "lld/Common/CommonLinkerContext.h" #include "lld/Common/Reproduce.h" #include "llvm/Object/Binary.h" @@ -678,6 +679,48 @@ Symbol *ObjFile::createUndefined(const WasmSymbol &sym, bool isCalledDirectly) { llvm_unreachable("unknown symbol kind"); } + +StringRef strip(StringRef s) { + while (s.starts_with(" ")) { + s = s.drop_front(); + } + while (s.ends_with(" ")) { + s = s.drop_back(); + } + return s; +} + +void StubFile::parse() { + bool first = false; + + for (StringRef line : args::getLines(mb)) { + // File must begin with #STUB + if (first) { + assert(line == "#STUB\n"); + first = false; + } + + // Lines starting with # are considered comments + if (line.startswith("#")) + continue; + + StringRef sym; + StringRef rest; + std::tie(sym, rest) = line.split(':'); + sym = strip(sym); + rest = strip(rest); + + symbolDependencies[sym] = {}; + + while (rest.size()) { + StringRef first; + std::tie(first, rest) = rest.split(','); + first = strip(first); + symbolDependencies[sym].push_back(first); + } + } +} + void ArchiveFile::parse() { // Parse a MemoryBufferRef as an archive file. LLVM_DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n"); diff --git a/lld/wasm/InputFiles.h b/lld/wasm/InputFiles.h index 22066cb..4c46ae8 100644 --- a/lld/wasm/InputFiles.h +++ b/lld/wasm/InputFiles.h @@ -47,6 +47,7 @@ public: SharedKind, ArchiveKind, BitcodeKind, + StubKind, }; virtual ~InputFile() {} @@ -183,6 +184,18 @@ public: static bool doneLTO; }; +// Stub libray (See docs/WebAssembly.rst) +class StubFile : public InputFile { +public: + explicit StubFile(MemoryBufferRef m) : InputFile(StubKind, m) {} + + static bool classof(const InputFile *f) { return f->kind() == StubKind; } + + void parse(); + + llvm::DenseMap<StringRef, std::vector<StringRef>> symbolDependencies; +}; + inline bool isBitcode(MemoryBufferRef mb) { return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode; } diff --git a/lld/wasm/Relocations.cpp b/lld/wasm/Relocations.cpp index c7710a9..a725b6d 100644 --- a/lld/wasm/Relocations.cpp +++ b/lld/wasm/Relocations.cpp @@ -33,9 +33,9 @@ static bool requiresGOTAccess(const Symbol *sym) { } static bool allowUndefined(const Symbol* sym) { - // Symbols with explicit import names are always allowed to be undefined at + // Symbols that are explicitly imported are always allowed to be undefined at // link time. - if (sym->importName) + if (sym->isImported()) return true; if (isa<UndefinedFunction>(sym) && config->importUndefined) return true; diff --git a/lld/wasm/SymbolTable.cpp b/lld/wasm/SymbolTable.cpp index 2cd659b..e5898c5 100644 --- a/lld/wasm/SymbolTable.cpp +++ b/lld/wasm/SymbolTable.cpp @@ -39,6 +39,13 @@ void SymbolTable::addFile(InputFile *file) { return; } + // stub file + if (auto *f = dyn_cast<StubFile>(file)) { + f->parse(); + stubFiles.push_back(f); + return; + } + if (config->trace) message(toString(file)); diff --git a/lld/wasm/SymbolTable.h b/lld/wasm/SymbolTable.h index f624b8b..311d418 100644 --- a/lld/wasm/SymbolTable.h +++ b/lld/wasm/SymbolTable.h @@ -103,6 +103,7 @@ public: DefinedFunction *createUndefinedStub(const WasmSignature &sig); std::vector<ObjFile *> objectFiles; + std::vector<StubFile *> stubFiles; std::vector<SharedFile *> sharedFiles; std::vector<BitcodeFile *> bitcodeFiles; std::vector<InputFunction *> syntheticFunctions; diff --git a/lld/wasm/Symbols.cpp b/lld/wasm/Symbols.cpp index 5ef92dd..9a92355 100644 --- a/lld/wasm/Symbols.cpp +++ b/lld/wasm/Symbols.cpp @@ -220,6 +220,10 @@ void Symbol::setHidden(bool isHidden) { flags |= WASM_SYMBOL_VISIBILITY_DEFAULT; } +bool Symbol::isImported() const { + return isUndefined() && (importName.has_value() || forceImport); +} + bool Symbol::isExported() const { if (!isDefined() || isLocal()) return false; diff --git a/lld/wasm/Symbols.h b/lld/wasm/Symbols.h index 6bdf587f..232339f 100644 --- a/lld/wasm/Symbols.h +++ b/lld/wasm/Symbols.h @@ -114,6 +114,7 @@ public: void setOutputSymbolIndex(uint32_t index); WasmSymbolType getWasmType() const; + bool isImported() const; bool isExported() const; bool isExportedExplicit() const; @@ -135,7 +136,8 @@ protected: Symbol(StringRef name, Kind k, uint32_t flags, InputFile *f) : name(name), file(f), symbolKind(k), referenced(!config->gcSections), requiresGOT(false), isUsedInRegularObj(false), forceExport(false), - canInline(false), traced(false), isStub(false), flags(flags) {} + forceImport(false), canInline(false), traced(false), isStub(false), + flags(flags) {} StringRef name; InputFile *file; @@ -160,6 +162,8 @@ public: // -e/--export command line flag) bool forceExport : 1; + bool forceImport : 1; + // False if LTO shouldn't inline whatever this symbol points to. If a symbol // is overwritten after LTO, LTO shouldn't inline the symbol because it // doesn't know the final contents of the symbol. @@ -656,6 +660,7 @@ T *replaceSymbol(Symbol *s, ArgT &&... arg) { T *s2 = new (s) T(std::forward<ArgT>(arg)...); s2->isUsedInRegularObj = symCopy.isUsedInRegularObj; s2->forceExport = symCopy.forceExport; + s2->forceImport = symCopy.forceImport; s2->canInline = symCopy.canInline; s2->traced = symCopy.traced; s2->referenced = symCopy.referenced; diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp index 3600503..304897b 100644 --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -647,7 +647,7 @@ static bool shouldImport(Symbol *sym) { if (config->allowUndefinedSymbols.count(sym->getName()) != 0) return true; - return sym->importName.has_value(); + return sym->isImported(); } void Writer::calculateImports() { @@ -1570,7 +1570,7 @@ void Writer::run() { sym->forceExport = true; } - // Delay reporting error about explicit exports until after + // Delay reporting errors about explicit exports until after // addStartStopSymbols which can create optional symbols. for (auto &name : config->requiredExports) { Symbol *sym = symtab->find(name); |