aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Object/WasmObjectFile.cpp
diff options
context:
space:
mode:
authorDerek Schuff <dschuff@chromium.org>2024-01-25 09:48:38 -0800
committerGitHub <noreply@github.com>2024-01-25 09:48:38 -0800
commit7f409cd82b322038f08a984a07377758e76b0e4c (patch)
tree84f9327a3a41423a37abd8852d40d743911a9ba6 /llvm/lib/Object/WasmObjectFile.cpp
parent7fdb932c4e78706cec3468a1f149e5a54a865d36 (diff)
downloadllvm-7f409cd82b322038f08a984a07377758e76b0e4c.zip
llvm-7f409cd82b322038f08a984a07377758e76b0e4c.tar.gz
llvm-7f409cd82b322038f08a984a07377758e76b0e4c.tar.bz2
[Object][Wasm] Allow parsing of GC types in type and table sections (#79235)
This change allows a WasmObjectFile to be created from a wasm file even if it uses typed funcrefs and GC types. It does not significantly change how lib/Object models its various internal types (e.g. WasmSignature, WasmElemSegment), so LLVM does not really "support" or understand such files, but it is sufficient to parse the type, global and element sections, discarding types that are not understood. This is useful for low-level binary tools such as nm and objcopy, which use only limited aspects of the binary (such as function definitions) or deal with sections as opaque blobs. This is done by allowing `WasmValType` to have a value of `OTHERREF` (representing any unmodeled reference type), and adding a field to `WasmSignature` indicating it's a placeholder for an unmodeled reference type (since there is a 1:1 correspondence between WasmSignature objects and types in the type section). Then the object file parsers for the type and element sections are expanded to parse encoded reference types and discard any unmodeled fields.
Diffstat (limited to 'llvm/lib/Object/WasmObjectFile.cpp')
-rw-r--r--llvm/lib/Object/WasmObjectFile.cpp173
1 files changed, 145 insertions, 28 deletions
diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp
index b9a8e97..953e7c7 100644
--- a/llvm/lib/Object/WasmObjectFile.cpp
+++ b/llvm/lib/Object/WasmObjectFile.cpp
@@ -21,6 +21,7 @@
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/ScopedPrinter.h"
#include "llvm/TargetParser/SubtargetFeature.h"
@@ -29,6 +30,7 @@
#include <cassert>
#include <cstdint>
#include <cstring>
+#include <limits>
#define DEBUG_TYPE "wasm-object"
@@ -173,6 +175,26 @@ static uint8_t readOpcode(WasmObjectFile::ReadContext &Ctx) {
return readUint8(Ctx);
}
+static wasm::ValType parseValType(WasmObjectFile::ReadContext &Ctx,
+ uint32_t Code) {
+ // only directly encoded FUNCREF/EXTERNREF are supported
+ // (not ref null func or ref null extern)
+ switch (Code) {
+ case wasm::WASM_TYPE_I32:
+ case wasm::WASM_TYPE_I64:
+ case wasm::WASM_TYPE_F32:
+ case wasm::WASM_TYPE_F64:
+ case wasm::WASM_TYPE_V128:
+ case wasm::WASM_TYPE_FUNCREF:
+ case wasm::WASM_TYPE_EXTERNREF:
+ return wasm::ValType(Code);
+ }
+ if (Code == wasm::WASM_TYPE_NULLABLE || Code == wasm::WASM_TYPE_NONNULLABLE) {
+ /* Discard HeapType */ readVarint64(Ctx);
+ }
+ return wasm::ValType(wasm::ValType::OTHERREF);
+}
+
static Error readInitExpr(wasm::WasmInitExpr &Expr,
WasmObjectFile::ReadContext &Ctx) {
auto Start = Ctx.Ptr;
@@ -196,11 +218,7 @@ static Error readInitExpr(wasm::WasmInitExpr &Expr,
Expr.Inst.Value.Global = readULEB128(Ctx);
break;
case wasm::WASM_OPCODE_REF_NULL: {
- wasm::ValType Ty = static_cast<wasm::ValType>(readULEB128(Ctx));
- if (Ty != wasm::ValType::EXTERNREF) {
- return make_error<GenericBinaryError>("invalid type for ref.null",
- object_error::parse_failed);
- }
+ /* Discard type */ parseValType(Ctx, readVaruint32(Ctx));
break;
}
default:
@@ -221,10 +239,15 @@ static Error readInitExpr(wasm::WasmInitExpr &Expr,
case wasm::WASM_OPCODE_I32_CONST:
case wasm::WASM_OPCODE_GLOBAL_GET:
case wasm::WASM_OPCODE_REF_NULL:
+ case wasm::WASM_OPCODE_REF_FUNC:
case wasm::WASM_OPCODE_I64_CONST:
+ readULEB128(Ctx);
+ break;
case wasm::WASM_OPCODE_F32_CONST:
+ readFloat32(Ctx);
+ break;
case wasm::WASM_OPCODE_F64_CONST:
- readULEB128(Ctx);
+ readFloat64(Ctx);
break;
case wasm::WASM_OPCODE_I32_ADD:
case wasm::WASM_OPCODE_I32_SUB:
@@ -233,6 +256,23 @@ static Error readInitExpr(wasm::WasmInitExpr &Expr,
case wasm::WASM_OPCODE_I64_SUB:
case wasm::WASM_OPCODE_I64_MUL:
break;
+ case wasm::WASM_OPCODE_GC_PREFIX:
+ break;
+ // The GC opcodes are in a separate (prefixed space). This flat switch
+ // structure works as long as there is no overlap between the GC and
+ // general opcodes used in init exprs.
+ case wasm::WASM_OPCODE_STRUCT_NEW:
+ case wasm::WASM_OPCODE_STRUCT_NEW_DEFAULT:
+ case wasm::WASM_OPCODE_ARRAY_NEW:
+ case wasm::WASM_OPCODE_ARRAY_NEW_DEFAULT:
+ readULEB128(Ctx); // heap type index
+ break;
+ case wasm::WASM_OPCODE_ARRAY_NEW_FIXED:
+ readULEB128(Ctx); // heap type index
+ readULEB128(Ctx); // array size
+ break;
+ case wasm::WASM_OPCODE_REF_I31:
+ break;
case wasm::WASM_OPCODE_END:
Expr.Body = ArrayRef<uint8_t>(Start, Ctx.Ptr - Start);
return Error::success();
@@ -258,7 +298,8 @@ static wasm::WasmLimits readLimits(WasmObjectFile::ReadContext &Ctx) {
static wasm::WasmTableType readTableType(WasmObjectFile::ReadContext &Ctx) {
wasm::WasmTableType TableType;
- TableType.ElemType = wasm::ValType(readVaruint32(Ctx));
+ auto ElemType = parseValType(Ctx, readVaruint32(Ctx));
+ TableType.ElemType = ElemType;
TableType.Limits = readLimits(Ctx);
return TableType;
}
@@ -1104,26 +1145,75 @@ Error WasmObjectFile::parseCustomSection(WasmSection &Sec, ReadContext &Ctx) {
}
Error WasmObjectFile::parseTypeSection(ReadContext &Ctx) {
+ auto parseFieldDef = [&]() {
+ uint32_t TypeCode = readVaruint32((Ctx));
+ /* Discard StorageType */ parseValType(Ctx, TypeCode);
+ /* Discard Mutability */ readVaruint32(Ctx);
+ };
+
uint32_t Count = readVaruint32(Ctx);
Signatures.reserve(Count);
while (Count--) {
wasm::WasmSignature Sig;
uint8_t Form = readUint8(Ctx);
+ if (Form == wasm::WASM_TYPE_REC) {
+ // Rec groups expand the type index space (beyond what was declared at
+ // the top of the section, and also consume one element in that space.
+ uint32_t RecSize = readVaruint32(Ctx);
+ if (RecSize == 0)
+ return make_error<GenericBinaryError>("Rec group size cannot be 0",
+ object_error::parse_failed);
+ Signatures.reserve(Signatures.size() + RecSize);
+ Count += RecSize;
+ Sig.Kind = wasm::WasmSignature::Placeholder;
+ Signatures.push_back(std::move(Sig));
+ HasUnmodeledTypes = true;
+ continue;
+ }
if (Form != wasm::WASM_TYPE_FUNC) {
- return make_error<GenericBinaryError>("invalid signature type",
- object_error::parse_failed);
+ // Currently LLVM only models function types, and not other composite
+ // types. Here we parse the type declarations just enough to skip past
+ // them in the binary.
+ if (Form == wasm::WASM_TYPE_SUB || Form == wasm::WASM_TYPE_SUB_FINAL) {
+ uint32_t Supers = readVaruint32(Ctx);
+ if (Supers > 0) {
+ if (Supers != 1)
+ return make_error<GenericBinaryError>(
+ "Invalid number of supertypes", object_error::parse_failed);
+ /* Discard SuperIndex */ readVaruint32(Ctx);
+ }
+ Form = readVaruint32(Ctx);
+ }
+ if (Form == wasm::WASM_TYPE_STRUCT) {
+ uint32_t FieldCount = readVaruint32(Ctx);
+ while (FieldCount--) {
+ parseFieldDef();
+ }
+ } else if (Form == wasm::WASM_TYPE_ARRAY) {
+ parseFieldDef();
+ } else {
+ return make_error<GenericBinaryError>("bad form",
+ object_error::parse_failed);
+ }
+ Sig.Kind = wasm::WasmSignature::Placeholder;
+ Signatures.push_back(std::move(Sig));
+ HasUnmodeledTypes = true;
+ continue;
}
+
uint32_t ParamCount = readVaruint32(Ctx);
Sig.Params.reserve(ParamCount);
while (ParamCount--) {
uint32_t ParamType = readUint8(Ctx);
- Sig.Params.push_back(wasm::ValType(ParamType));
+ Sig.Params.push_back(parseValType(Ctx, ParamType));
+ continue;
}
uint32_t ReturnCount = readVaruint32(Ctx);
while (ReturnCount--) {
uint32_t ReturnType = readUint8(Ctx);
- Sig.Returns.push_back(wasm::ValType(ReturnType));
+ Sig.Returns.push_back(parseValType(Ctx, ReturnType));
}
+
Signatures.push_back(std::move(Sig));
}
if (Ctx.Ptr != Ctx.End)
@@ -1164,7 +1254,8 @@ Error WasmObjectFile::parseImportSection(ReadContext &Ctx) {
NumImportedTables++;
auto ElemType = Im.Table.ElemType;
if (ElemType != wasm::ValType::FUNCREF &&
- ElemType != wasm::ValType::EXTERNREF)
+ ElemType != wasm::ValType::EXTERNREF &&
+ ElemType != wasm::ValType::OTHERREF)
return make_error<GenericBinaryError>("invalid table element type",
object_error::parse_failed);
break;
@@ -1221,7 +1312,8 @@ Error WasmObjectFile::parseTableSection(ReadContext &Ctx) {
Tables.push_back(T);
auto ElemType = Tables.back().Type.ElemType;
if (ElemType != wasm::ValType::FUNCREF &&
- ElemType != wasm::ValType::EXTERNREF) {
+ ElemType != wasm::ValType::EXTERNREF &&
+ ElemType != wasm::ValType::OTHERREF) {
return make_error<GenericBinaryError>("invalid table element type",
object_error::parse_failed);
}
@@ -1263,6 +1355,7 @@ Error WasmObjectFile::parseTagSection(ReadContext &Ctx) {
wasm::WasmTag Tag;
Tag.Index = NumImportedTags + Tags.size();
Tag.SigIndex = Type;
+ Signatures[Type].Kind = wasm::WasmSignature::Tag;
Tags.push_back(Tag);
}
@@ -1279,7 +1372,10 @@ Error WasmObjectFile::parseGlobalSection(ReadContext &Ctx) {
while (Count--) {
wasm::WasmGlobal Global;
Global.Index = NumImportedGlobals + Globals.size();
- Global.Type.Type = readUint8(Ctx);
+ auto GlobalOpcode = readVaruint32(Ctx);
+ auto GlobalType = parseValType(Ctx, GlobalOpcode);
+ // assert(GlobalType <= std::numeric_limits<wasm::ValType>::max());
+ Global.Type.Type = (uint8_t)GlobalType;
Global.Type.Mutable = readVaruint1(Ctx);
if (Error Err = readInitExpr(Global.InitExpr, Ctx))
return Err;
@@ -1516,15 +1612,28 @@ Error WasmObjectFile::parseElemSection(ReadContext &Ctx) {
return make_error<GenericBinaryError>(
"Unsupported flags for element segment", object_error::parse_failed);
- if (Segment.Flags & wasm::WASM_ELEM_SEGMENT_HAS_TABLE_NUMBER)
+ bool IsPassive = (Segment.Flags & wasm::WASM_ELEM_SEGMENT_IS_PASSIVE) != 0;
+ bool IsDeclarative =
+ IsPassive && (Segment.Flags & wasm::WASM_ELEM_SEGMENT_IS_DECLARATIVE);
+ bool HasTableNumber =
+ !IsPassive &&
+ (Segment.Flags & wasm::WASM_ELEM_SEGMENT_HAS_TABLE_NUMBER);
+ bool HasInitExprs =
+ (Segment.Flags & wasm::WASM_ELEM_SEGMENT_HAS_INIT_EXPRS);
+ bool HasElemKind =
+ (Segment.Flags & wasm::WASM_ELEM_SEGMENT_MASK_HAS_ELEM_KIND) &&
+ !HasInitExprs;
+
+ if (HasTableNumber)
Segment.TableNumber = readVaruint32(Ctx);
else
Segment.TableNumber = 0;
+
if (!isValidTableNumber(Segment.TableNumber))
return make_error<GenericBinaryError>("invalid TableNumber",
object_error::parse_failed);
- if (Segment.Flags & wasm::WASM_ELEM_SEGMENT_IS_PASSIVE) {
+ if (IsPassive || IsDeclarative) {
Segment.Offset.Extended = false;
Segment.Offset.Inst.Opcode = wasm::WASM_OPCODE_I32_CONST;
Segment.Offset.Inst.Value.Int32 = 0;
@@ -1533,33 +1642,41 @@ Error WasmObjectFile::parseElemSection(ReadContext &Ctx) {
return Err;
}
- if (Segment.Flags & wasm::WASM_ELEM_SEGMENT_MASK_HAS_ELEM_KIND) {
+ if (HasElemKind) {
auto ElemKind = readVaruint32(Ctx);
if (Segment.Flags & wasm::WASM_ELEM_SEGMENT_HAS_INIT_EXPRS) {
- Segment.ElemKind = wasm::ValType(ElemKind);
+ Segment.ElemKind = parseValType(Ctx, ElemKind);
if (Segment.ElemKind != wasm::ValType::FUNCREF &&
- Segment.ElemKind != wasm::ValType::EXTERNREF) {
- return make_error<GenericBinaryError>("invalid reference type",
+ Segment.ElemKind != wasm::ValType::EXTERNREF &&
+ Segment.ElemKind != wasm::ValType::OTHERREF) {
+ return make_error<GenericBinaryError>("invalid elem type",
object_error::parse_failed);
}
} else {
if (ElemKind != 0)
- return make_error<GenericBinaryError>("invalid elemtype",
+ return make_error<GenericBinaryError>("invalid elem type",
object_error::parse_failed);
Segment.ElemKind = wasm::ValType::FUNCREF;
}
+ } else if (HasInitExprs) {
+ auto ElemType = parseValType(Ctx, readVaruint32(Ctx));
+ Segment.ElemKind = ElemType;
} else {
Segment.ElemKind = wasm::ValType::FUNCREF;
}
- if (Segment.Flags & wasm::WASM_ELEM_SEGMENT_HAS_INIT_EXPRS)
- return make_error<GenericBinaryError>(
- "elem segment init expressions not yet implemented",
- object_error::parse_failed);
-
uint32_t NumElems = readVaruint32(Ctx);
- while (NumElems--) {
- Segment.Functions.push_back(readVaruint32(Ctx));
+
+ if (HasInitExprs) {
+ while (NumElems--) {
+ wasm::WasmInitExpr Expr;
+ if (Error Err = readInitExpr(Expr, Ctx))
+ return Err;
+ }
+ } else {
+ while (NumElems--) {
+ Segment.Functions.push_back(readVaruint32(Ctx));
+ }
}
ElemSegments.push_back(Segment);
}