diff options
author | Derek Schuff <dschuff@chromium.org> | 2024-01-25 09:48:38 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-25 09:48:38 -0800 |
commit | 7f409cd82b322038f08a984a07377758e76b0e4c (patch) | |
tree | 84f9327a3a41423a37abd8852d40d743911a9ba6 /llvm/lib/Object/WasmObjectFile.cpp | |
parent | 7fdb932c4e78706cec3468a1f149e5a54a865d36 (diff) | |
download | llvm-7f409cd82b322038f08a984a07377758e76b0e4c.zip llvm-7f409cd82b322038f08a984a07377758e76b0e4c.tar.gz llvm-7f409cd82b322038f08a984a07377758e76b0e4c.tar.bz2 |
[Object][Wasm] Allow parsing of GC types in type and table sections (#79235)
This change allows a WasmObjectFile to be created from a wasm file even
if it uses typed funcrefs and GC types. It does not significantly change how
lib/Object models its various internal types (e.g. WasmSignature,
WasmElemSegment), so LLVM does not really "support" or understand such
files, but it is sufficient to parse the type, global and element sections, discarding
types that are not understood. This is useful for low-level binary tools such as
nm and objcopy, which use only limited aspects of the binary (such as function
definitions) or deal with sections as opaque blobs.
This is done by allowing `WasmValType` to have a value of `OTHERREF`
(representing any unmodeled reference type), and adding a field to
`WasmSignature` indicating it's a placeholder for an unmodeled reference
type (since there is a 1:1 correspondence between WasmSignature objects
and types in the type section).
Then the object file parsers for the type and element sections are expanded
to parse encoded reference types and discard any unmodeled fields.
Diffstat (limited to 'llvm/lib/Object/WasmObjectFile.cpp')
-rw-r--r-- | llvm/lib/Object/WasmObjectFile.cpp | 173 |
1 files changed, 145 insertions, 28 deletions
diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp index b9a8e97..953e7c7 100644 --- a/llvm/lib/Object/WasmObjectFile.cpp +++ b/llvm/lib/Object/WasmObjectFile.cpp @@ -21,6 +21,7 @@ #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/ScopedPrinter.h" #include "llvm/TargetParser/SubtargetFeature.h" @@ -29,6 +30,7 @@ #include <cassert> #include <cstdint> #include <cstring> +#include <limits> #define DEBUG_TYPE "wasm-object" @@ -173,6 +175,26 @@ static uint8_t readOpcode(WasmObjectFile::ReadContext &Ctx) { return readUint8(Ctx); } +static wasm::ValType parseValType(WasmObjectFile::ReadContext &Ctx, + uint32_t Code) { + // only directly encoded FUNCREF/EXTERNREF are supported + // (not ref null func or ref null extern) + switch (Code) { + case wasm::WASM_TYPE_I32: + case wasm::WASM_TYPE_I64: + case wasm::WASM_TYPE_F32: + case wasm::WASM_TYPE_F64: + case wasm::WASM_TYPE_V128: + case wasm::WASM_TYPE_FUNCREF: + case wasm::WASM_TYPE_EXTERNREF: + return wasm::ValType(Code); + } + if (Code == wasm::WASM_TYPE_NULLABLE || Code == wasm::WASM_TYPE_NONNULLABLE) { + /* Discard HeapType */ readVarint64(Ctx); + } + return wasm::ValType(wasm::ValType::OTHERREF); +} + static Error readInitExpr(wasm::WasmInitExpr &Expr, WasmObjectFile::ReadContext &Ctx) { auto Start = Ctx.Ptr; @@ -196,11 +218,7 @@ static Error readInitExpr(wasm::WasmInitExpr &Expr, Expr.Inst.Value.Global = readULEB128(Ctx); break; case wasm::WASM_OPCODE_REF_NULL: { - wasm::ValType Ty = static_cast<wasm::ValType>(readULEB128(Ctx)); - if (Ty != wasm::ValType::EXTERNREF) { - return make_error<GenericBinaryError>("invalid type for ref.null", - object_error::parse_failed); - } + /* Discard type */ parseValType(Ctx, readVaruint32(Ctx)); break; } default: @@ -221,10 +239,15 @@ static Error readInitExpr(wasm::WasmInitExpr &Expr, case wasm::WASM_OPCODE_I32_CONST: case wasm::WASM_OPCODE_GLOBAL_GET: case wasm::WASM_OPCODE_REF_NULL: + case wasm::WASM_OPCODE_REF_FUNC: case wasm::WASM_OPCODE_I64_CONST: + readULEB128(Ctx); + break; case wasm::WASM_OPCODE_F32_CONST: + readFloat32(Ctx); + break; case wasm::WASM_OPCODE_F64_CONST: - readULEB128(Ctx); + readFloat64(Ctx); break; case wasm::WASM_OPCODE_I32_ADD: case wasm::WASM_OPCODE_I32_SUB: @@ -233,6 +256,23 @@ static Error readInitExpr(wasm::WasmInitExpr &Expr, case wasm::WASM_OPCODE_I64_SUB: case wasm::WASM_OPCODE_I64_MUL: break; + case wasm::WASM_OPCODE_GC_PREFIX: + break; + // The GC opcodes are in a separate (prefixed space). This flat switch + // structure works as long as there is no overlap between the GC and + // general opcodes used in init exprs. + case wasm::WASM_OPCODE_STRUCT_NEW: + case wasm::WASM_OPCODE_STRUCT_NEW_DEFAULT: + case wasm::WASM_OPCODE_ARRAY_NEW: + case wasm::WASM_OPCODE_ARRAY_NEW_DEFAULT: + readULEB128(Ctx); // heap type index + break; + case wasm::WASM_OPCODE_ARRAY_NEW_FIXED: + readULEB128(Ctx); // heap type index + readULEB128(Ctx); // array size + break; + case wasm::WASM_OPCODE_REF_I31: + break; case wasm::WASM_OPCODE_END: Expr.Body = ArrayRef<uint8_t>(Start, Ctx.Ptr - Start); return Error::success(); @@ -258,7 +298,8 @@ static wasm::WasmLimits readLimits(WasmObjectFile::ReadContext &Ctx) { static wasm::WasmTableType readTableType(WasmObjectFile::ReadContext &Ctx) { wasm::WasmTableType TableType; - TableType.ElemType = wasm::ValType(readVaruint32(Ctx)); + auto ElemType = parseValType(Ctx, readVaruint32(Ctx)); + TableType.ElemType = ElemType; TableType.Limits = readLimits(Ctx); return TableType; } @@ -1104,26 +1145,75 @@ Error WasmObjectFile::parseCustomSection(WasmSection &Sec, ReadContext &Ctx) { } Error WasmObjectFile::parseTypeSection(ReadContext &Ctx) { + auto parseFieldDef = [&]() { + uint32_t TypeCode = readVaruint32((Ctx)); + /* Discard StorageType */ parseValType(Ctx, TypeCode); + /* Discard Mutability */ readVaruint32(Ctx); + }; + uint32_t Count = readVaruint32(Ctx); Signatures.reserve(Count); while (Count--) { wasm::WasmSignature Sig; uint8_t Form = readUint8(Ctx); + if (Form == wasm::WASM_TYPE_REC) { + // Rec groups expand the type index space (beyond what was declared at + // the top of the section, and also consume one element in that space. + uint32_t RecSize = readVaruint32(Ctx); + if (RecSize == 0) + return make_error<GenericBinaryError>("Rec group size cannot be 0", + object_error::parse_failed); + Signatures.reserve(Signatures.size() + RecSize); + Count += RecSize; + Sig.Kind = wasm::WasmSignature::Placeholder; + Signatures.push_back(std::move(Sig)); + HasUnmodeledTypes = true; + continue; + } if (Form != wasm::WASM_TYPE_FUNC) { - return make_error<GenericBinaryError>("invalid signature type", - object_error::parse_failed); + // Currently LLVM only models function types, and not other composite + // types. Here we parse the type declarations just enough to skip past + // them in the binary. + if (Form == wasm::WASM_TYPE_SUB || Form == wasm::WASM_TYPE_SUB_FINAL) { + uint32_t Supers = readVaruint32(Ctx); + if (Supers > 0) { + if (Supers != 1) + return make_error<GenericBinaryError>( + "Invalid number of supertypes", object_error::parse_failed); + /* Discard SuperIndex */ readVaruint32(Ctx); + } + Form = readVaruint32(Ctx); + } + if (Form == wasm::WASM_TYPE_STRUCT) { + uint32_t FieldCount = readVaruint32(Ctx); + while (FieldCount--) { + parseFieldDef(); + } + } else if (Form == wasm::WASM_TYPE_ARRAY) { + parseFieldDef(); + } else { + return make_error<GenericBinaryError>("bad form", + object_error::parse_failed); + } + Sig.Kind = wasm::WasmSignature::Placeholder; + Signatures.push_back(std::move(Sig)); + HasUnmodeledTypes = true; + continue; } + uint32_t ParamCount = readVaruint32(Ctx); Sig.Params.reserve(ParamCount); while (ParamCount--) { uint32_t ParamType = readUint8(Ctx); - Sig.Params.push_back(wasm::ValType(ParamType)); + Sig.Params.push_back(parseValType(Ctx, ParamType)); + continue; } uint32_t ReturnCount = readVaruint32(Ctx); while (ReturnCount--) { uint32_t ReturnType = readUint8(Ctx); - Sig.Returns.push_back(wasm::ValType(ReturnType)); + Sig.Returns.push_back(parseValType(Ctx, ReturnType)); } + Signatures.push_back(std::move(Sig)); } if (Ctx.Ptr != Ctx.End) @@ -1164,7 +1254,8 @@ Error WasmObjectFile::parseImportSection(ReadContext &Ctx) { NumImportedTables++; auto ElemType = Im.Table.ElemType; if (ElemType != wasm::ValType::FUNCREF && - ElemType != wasm::ValType::EXTERNREF) + ElemType != wasm::ValType::EXTERNREF && + ElemType != wasm::ValType::OTHERREF) return make_error<GenericBinaryError>("invalid table element type", object_error::parse_failed); break; @@ -1221,7 +1312,8 @@ Error WasmObjectFile::parseTableSection(ReadContext &Ctx) { Tables.push_back(T); auto ElemType = Tables.back().Type.ElemType; if (ElemType != wasm::ValType::FUNCREF && - ElemType != wasm::ValType::EXTERNREF) { + ElemType != wasm::ValType::EXTERNREF && + ElemType != wasm::ValType::OTHERREF) { return make_error<GenericBinaryError>("invalid table element type", object_error::parse_failed); } @@ -1263,6 +1355,7 @@ Error WasmObjectFile::parseTagSection(ReadContext &Ctx) { wasm::WasmTag Tag; Tag.Index = NumImportedTags + Tags.size(); Tag.SigIndex = Type; + Signatures[Type].Kind = wasm::WasmSignature::Tag; Tags.push_back(Tag); } @@ -1279,7 +1372,10 @@ Error WasmObjectFile::parseGlobalSection(ReadContext &Ctx) { while (Count--) { wasm::WasmGlobal Global; Global.Index = NumImportedGlobals + Globals.size(); - Global.Type.Type = readUint8(Ctx); + auto GlobalOpcode = readVaruint32(Ctx); + auto GlobalType = parseValType(Ctx, GlobalOpcode); + // assert(GlobalType <= std::numeric_limits<wasm::ValType>::max()); + Global.Type.Type = (uint8_t)GlobalType; Global.Type.Mutable = readVaruint1(Ctx); if (Error Err = readInitExpr(Global.InitExpr, Ctx)) return Err; @@ -1516,15 +1612,28 @@ Error WasmObjectFile::parseElemSection(ReadContext &Ctx) { return make_error<GenericBinaryError>( "Unsupported flags for element segment", object_error::parse_failed); - if (Segment.Flags & wasm::WASM_ELEM_SEGMENT_HAS_TABLE_NUMBER) + bool IsPassive = (Segment.Flags & wasm::WASM_ELEM_SEGMENT_IS_PASSIVE) != 0; + bool IsDeclarative = + IsPassive && (Segment.Flags & wasm::WASM_ELEM_SEGMENT_IS_DECLARATIVE); + bool HasTableNumber = + !IsPassive && + (Segment.Flags & wasm::WASM_ELEM_SEGMENT_HAS_TABLE_NUMBER); + bool HasInitExprs = + (Segment.Flags & wasm::WASM_ELEM_SEGMENT_HAS_INIT_EXPRS); + bool HasElemKind = + (Segment.Flags & wasm::WASM_ELEM_SEGMENT_MASK_HAS_ELEM_KIND) && + !HasInitExprs; + + if (HasTableNumber) Segment.TableNumber = readVaruint32(Ctx); else Segment.TableNumber = 0; + if (!isValidTableNumber(Segment.TableNumber)) return make_error<GenericBinaryError>("invalid TableNumber", object_error::parse_failed); - if (Segment.Flags & wasm::WASM_ELEM_SEGMENT_IS_PASSIVE) { + if (IsPassive || IsDeclarative) { Segment.Offset.Extended = false; Segment.Offset.Inst.Opcode = wasm::WASM_OPCODE_I32_CONST; Segment.Offset.Inst.Value.Int32 = 0; @@ -1533,33 +1642,41 @@ Error WasmObjectFile::parseElemSection(ReadContext &Ctx) { return Err; } - if (Segment.Flags & wasm::WASM_ELEM_SEGMENT_MASK_HAS_ELEM_KIND) { + if (HasElemKind) { auto ElemKind = readVaruint32(Ctx); if (Segment.Flags & wasm::WASM_ELEM_SEGMENT_HAS_INIT_EXPRS) { - Segment.ElemKind = wasm::ValType(ElemKind); + Segment.ElemKind = parseValType(Ctx, ElemKind); if (Segment.ElemKind != wasm::ValType::FUNCREF && - Segment.ElemKind != wasm::ValType::EXTERNREF) { - return make_error<GenericBinaryError>("invalid reference type", + Segment.ElemKind != wasm::ValType::EXTERNREF && + Segment.ElemKind != wasm::ValType::OTHERREF) { + return make_error<GenericBinaryError>("invalid elem type", object_error::parse_failed); } } else { if (ElemKind != 0) - return make_error<GenericBinaryError>("invalid elemtype", + return make_error<GenericBinaryError>("invalid elem type", object_error::parse_failed); Segment.ElemKind = wasm::ValType::FUNCREF; } + } else if (HasInitExprs) { + auto ElemType = parseValType(Ctx, readVaruint32(Ctx)); + Segment.ElemKind = ElemType; } else { Segment.ElemKind = wasm::ValType::FUNCREF; } - if (Segment.Flags & wasm::WASM_ELEM_SEGMENT_HAS_INIT_EXPRS) - return make_error<GenericBinaryError>( - "elem segment init expressions not yet implemented", - object_error::parse_failed); - uint32_t NumElems = readVaruint32(Ctx); - while (NumElems--) { - Segment.Functions.push_back(readVaruint32(Ctx)); + + if (HasInitExprs) { + while (NumElems--) { + wasm::WasmInitExpr Expr; + if (Error Err = readInitExpr(Expr, Ctx)) + return Err; + } + } else { + while (NumElems--) { + Segment.Functions.push_back(readVaruint32(Ctx)); + } } ElemSegments.push_back(Segment); } |