diff options
Diffstat (limited to 'libgo/go/encoding')
45 files changed, 18233 insertions, 2 deletions
diff --git a/libgo/go/encoding/asn1/asn1.go b/libgo/go/encoding/asn1/asn1.go new file mode 100644 index 0000000..a006665 --- /dev/null +++ b/libgo/go/encoding/asn1/asn1.go @@ -0,0 +1,841 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package asn1 implements parsing of DER-encoded ASN.1 data structures, +// as defined in ITU-T Rec X.690. +// +// See also ``A Layman's Guide to a Subset of ASN.1, BER, and DER,'' +// http://luca.ntop.org/Teaching/Appunti/asn1.html. +package asn1 + +// ASN.1 is a syntax for specifying abstract objects and BER, DER, PER, XER etc +// are different encoding formats for those objects. Here, we'll be dealing +// with DER, the Distinguished Encoding Rules. DER is used in X.509 because +// it's fast to parse and, unlike BER, has a unique encoding for every object. +// When calculating hashes over objects, it's important that the resulting +// bytes be the same at both ends and DER removes this margin of error. +// +// ASN.1 is very complex and this package doesn't attempt to implement +// everything by any means. + +import ( + "fmt" + "math/big" + "reflect" + "time" +) + +// A StructuralError suggests that the ASN.1 data is valid, but the Go type +// which is receiving it doesn't match. +type StructuralError struct { + Msg string +} + +func (e StructuralError) Error() string { return "ASN.1 structure error: " + e.Msg } + +// A SyntaxError suggests that the ASN.1 data is invalid. +type SyntaxError struct { + Msg string +} + +func (e SyntaxError) Error() string { return "ASN.1 syntax error: " + e.Msg } + +// We start by dealing with each of the primitive types in turn. + +// BOOLEAN + +func parseBool(bytes []byte) (ret bool, err error) { + if len(bytes) != 1 { + err = SyntaxError{"invalid boolean"} + return + } + + return bytes[0] != 0, nil +} + +// INTEGER + +// parseInt64 treats the given bytes as a big-endian, signed integer and +// returns the result. +func parseInt64(bytes []byte) (ret int64, err error) { + if len(bytes) > 8 { + // We'll overflow an int64 in this case. + err = StructuralError{"integer too large"} + return + } + for bytesRead := 0; bytesRead < len(bytes); bytesRead++ { + ret <<= 8 + ret |= int64(bytes[bytesRead]) + } + + // Shift up and down in order to sign extend the result. + ret <<= 64 - uint8(len(bytes))*8 + ret >>= 64 - uint8(len(bytes))*8 + return +} + +// parseInt treats the given bytes as a big-endian, signed integer and returns +// the result. +func parseInt(bytes []byte) (int, error) { + ret64, err := parseInt64(bytes) + if err != nil { + return 0, err + } + if ret64 != int64(int(ret64)) { + return 0, StructuralError{"integer too large"} + } + return int(ret64), nil +} + +var bigOne = big.NewInt(1) + +// parseBigInt treats the given bytes as a big-endian, signed integer and returns +// the result. +func parseBigInt(bytes []byte) *big.Int { + ret := new(big.Int) + if len(bytes) > 0 && bytes[0]&0x80 == 0x80 { + // This is a negative number. + notBytes := make([]byte, len(bytes)) + for i := range notBytes { + notBytes[i] = ^bytes[i] + } + ret.SetBytes(notBytes) + ret.Add(ret, bigOne) + ret.Neg(ret) + return ret + } + ret.SetBytes(bytes) + return ret +} + +// BIT STRING + +// BitString is the structure to use when you want an ASN.1 BIT STRING type. A +// bit string is padded up to the nearest byte in memory and the number of +// valid bits is recorded. Padding bits will be zero. +type BitString struct { + Bytes []byte // bits packed into bytes. + BitLength int // length in bits. +} + +// At returns the bit at the given index. If the index is out of range it +// returns false. +func (b BitString) At(i int) int { + if i < 0 || i >= b.BitLength { + return 0 + } + x := i / 8 + y := 7 - uint(i%8) + return int(b.Bytes[x]>>y) & 1 +} + +// RightAlign returns a slice where the padding bits are at the beginning. The +// slice may share memory with the BitString. +func (b BitString) RightAlign() []byte { + shift := uint(8 - (b.BitLength % 8)) + if shift == 8 || len(b.Bytes) == 0 { + return b.Bytes + } + + a := make([]byte, len(b.Bytes)) + a[0] = b.Bytes[0] >> shift + for i := 1; i < len(b.Bytes); i++ { + a[i] = b.Bytes[i-1] << (8 - shift) + a[i] |= b.Bytes[i] >> shift + } + + return a +} + +// parseBitString parses an ASN.1 bit string from the given byte slice and returns it. +func parseBitString(bytes []byte) (ret BitString, err error) { + if len(bytes) == 0 { + err = SyntaxError{"zero length BIT STRING"} + return + } + paddingBits := int(bytes[0]) + if paddingBits > 7 || + len(bytes) == 1 && paddingBits > 0 || + bytes[len(bytes)-1]&((1<<bytes[0])-1) != 0 { + err = SyntaxError{"invalid padding bits in BIT STRING"} + return + } + ret.BitLength = (len(bytes)-1)*8 - paddingBits + ret.Bytes = bytes[1:] + return +} + +// OBJECT IDENTIFIER + +// An ObjectIdentifier represents an ASN.1 OBJECT IDENTIFIER. +type ObjectIdentifier []int + +// Equal returns true iff oi and other represent the same identifier. +func (oi ObjectIdentifier) Equal(other ObjectIdentifier) bool { + if len(oi) != len(other) { + return false + } + for i := 0; i < len(oi); i++ { + if oi[i] != other[i] { + return false + } + } + + return true +} + +// parseObjectIdentifier parses an OBJECT IDENTIFIER from the given bytes and +// returns it. An object identifier is a sequence of variable length integers +// that are assigned in a hierarchy. +func parseObjectIdentifier(bytes []byte) (s []int, err error) { + if len(bytes) == 0 { + err = SyntaxError{"zero length OBJECT IDENTIFIER"} + return + } + + // In the worst case, we get two elements from the first byte (which is + // encoded differently) and then every varint is a single byte long. + s = make([]int, len(bytes)+1) + + // The first byte is 40*value1 + value2: + s[0] = int(bytes[0]) / 40 + s[1] = int(bytes[0]) % 40 + i := 2 + for offset := 1; offset < len(bytes); i++ { + var v int + v, offset, err = parseBase128Int(bytes, offset) + if err != nil { + return + } + s[i] = v + } + s = s[0:i] + return +} + +// ENUMERATED + +// An Enumerated is represented as a plain int. +type Enumerated int + +// FLAG + +// A Flag accepts any data and is set to true if present. +type Flag bool + +// parseBase128Int parses a base-128 encoded int from the given offset in the +// given byte slice. It returns the value and the new offset. +func parseBase128Int(bytes []byte, initOffset int) (ret, offset int, err error) { + offset = initOffset + for shifted := 0; offset < len(bytes); shifted++ { + if shifted > 4 { + err = StructuralError{"base 128 integer too large"} + return + } + ret <<= 7 + b := bytes[offset] + ret |= int(b & 0x7f) + offset++ + if b&0x80 == 0 { + return + } + } + err = SyntaxError{"truncated base 128 integer"} + return +} + +// UTCTime + +func parseUTCTime(bytes []byte) (ret *time.Time, err error) { + s := string(bytes) + ret, err = time.Parse("0601021504Z0700", s) + if err == nil { + return + } + ret, err = time.Parse("060102150405Z0700", s) + return +} + +// parseGeneralizedTime parses the GeneralizedTime from the given byte slice +// and returns the resulting time. +func parseGeneralizedTime(bytes []byte) (ret *time.Time, err error) { + return time.Parse("20060102150405Z0700", string(bytes)) +} + +// PrintableString + +// parsePrintableString parses a ASN.1 PrintableString from the given byte +// array and returns it. +func parsePrintableString(bytes []byte) (ret string, err error) { + for _, b := range bytes { + if !isPrintable(b) { + err = SyntaxError{"PrintableString contains invalid character"} + return + } + } + ret = string(bytes) + return +} + +// isPrintable returns true iff the given b is in the ASN.1 PrintableString set. +func isPrintable(b byte) bool { + return 'a' <= b && b <= 'z' || + 'A' <= b && b <= 'Z' || + '0' <= b && b <= '9' || + '\'' <= b && b <= ')' || + '+' <= b && b <= '/' || + b == ' ' || + b == ':' || + b == '=' || + b == '?' || + // This is technically not allowed in a PrintableString. + // However, x509 certificates with wildcard strings don't + // always use the correct string type so we permit it. + b == '*' +} + +// IA5String + +// parseIA5String parses a ASN.1 IA5String (ASCII string) from the given +// byte slice and returns it. +func parseIA5String(bytes []byte) (ret string, err error) { + for _, b := range bytes { + if b >= 0x80 { + err = SyntaxError{"IA5String contains invalid character"} + return + } + } + ret = string(bytes) + return +} + +// T61String + +// parseT61String parses a ASN.1 T61String (8-bit clean string) from the given +// byte slice and returns it. +func parseT61String(bytes []byte) (ret string, err error) { + return string(bytes), nil +} + +// UTF8String + +// parseUTF8String parses a ASN.1 UTF8String (raw UTF-8) from the given byte +// array and returns it. +func parseUTF8String(bytes []byte) (ret string, err error) { + return string(bytes), nil +} + +// A RawValue represents an undecoded ASN.1 object. +type RawValue struct { + Class, Tag int + IsCompound bool + Bytes []byte + FullBytes []byte // includes the tag and length +} + +// RawContent is used to signal that the undecoded, DER data needs to be +// preserved for a struct. To use it, the first field of the struct must have +// this type. It's an error for any of the other fields to have this type. +type RawContent []byte + +// Tagging + +// parseTagAndLength parses an ASN.1 tag and length pair from the given offset +// into a byte slice. It returns the parsed data and the new offset. SET and +// SET OF (tag 17) are mapped to SEQUENCE and SEQUENCE OF (tag 16) since we +// don't distinguish between ordered and unordered objects in this code. +func parseTagAndLength(bytes []byte, initOffset int) (ret tagAndLength, offset int, err error) { + offset = initOffset + b := bytes[offset] + offset++ + ret.class = int(b >> 6) + ret.isCompound = b&0x20 == 0x20 + ret.tag = int(b & 0x1f) + + // If the bottom five bits are set, then the tag number is actually base 128 + // encoded afterwards + if ret.tag == 0x1f { + ret.tag, offset, err = parseBase128Int(bytes, offset) + if err != nil { + return + } + } + if offset >= len(bytes) { + err = SyntaxError{"truncated tag or length"} + return + } + b = bytes[offset] + offset++ + if b&0x80 == 0 { + // The length is encoded in the bottom 7 bits. + ret.length = int(b & 0x7f) + } else { + // Bottom 7 bits give the number of length bytes to follow. + numBytes := int(b & 0x7f) + // We risk overflowing a signed 32-bit number if we accept more than 3 bytes. + if numBytes > 3 { + err = StructuralError{"length too large"} + return + } + if numBytes == 0 { + err = SyntaxError{"indefinite length found (not DER)"} + return + } + ret.length = 0 + for i := 0; i < numBytes; i++ { + if offset >= len(bytes) { + err = SyntaxError{"truncated tag or length"} + return + } + b = bytes[offset] + offset++ + ret.length <<= 8 + ret.length |= int(b) + } + } + + return +} + +// parseSequenceOf is used for SEQUENCE OF and SET OF values. It tries to parse +// a number of ASN.1 values from the given byte slice and returns them as a +// slice of Go values of the given type. +func parseSequenceOf(bytes []byte, sliceType reflect.Type, elemType reflect.Type) (ret reflect.Value, err error) { + expectedTag, compoundType, ok := getUniversalType(elemType) + if !ok { + err = StructuralError{"unknown Go type for slice"} + return + } + + // First we iterate over the input and count the number of elements, + // checking that the types are correct in each case. + numElements := 0 + for offset := 0; offset < len(bytes); { + var t tagAndLength + t, offset, err = parseTagAndLength(bytes, offset) + if err != nil { + return + } + // We pretend that GENERAL STRINGs are PRINTABLE STRINGs so + // that a sequence of them can be parsed into a []string. + if t.tag == tagGeneralString { + t.tag = tagPrintableString + } + if t.class != classUniversal || t.isCompound != compoundType || t.tag != expectedTag { + err = StructuralError{"sequence tag mismatch"} + return + } + if invalidLength(offset, t.length, len(bytes)) { + err = SyntaxError{"truncated sequence"} + return + } + offset += t.length + numElements++ + } + ret = reflect.MakeSlice(sliceType, numElements, numElements) + params := fieldParameters{} + offset := 0 + for i := 0; i < numElements; i++ { + offset, err = parseField(ret.Index(i), bytes, offset, params) + if err != nil { + return + } + } + return +} + +var ( + bitStringType = reflect.TypeOf(BitString{}) + objectIdentifierType = reflect.TypeOf(ObjectIdentifier{}) + enumeratedType = reflect.TypeOf(Enumerated(0)) + flagType = reflect.TypeOf(Flag(false)) + timeType = reflect.TypeOf(&time.Time{}) + rawValueType = reflect.TypeOf(RawValue{}) + rawContentsType = reflect.TypeOf(RawContent(nil)) + bigIntType = reflect.TypeOf(new(big.Int)) +) + +// invalidLength returns true iff offset + length > sliceLength, or if the +// addition would overflow. +func invalidLength(offset, length, sliceLength int) bool { + return offset+length < offset || offset+length > sliceLength +} + +// parseField is the main parsing function. Given a byte slice and an offset +// into the array, it will try to parse a suitable ASN.1 value out and store it +// in the given Value. +func parseField(v reflect.Value, bytes []byte, initOffset int, params fieldParameters) (offset int, err error) { + offset = initOffset + fieldType := v.Type() + + // If we have run out of data, it may be that there are optional elements at the end. + if offset == len(bytes) { + if !setDefaultValue(v, params) { + err = SyntaxError{"sequence truncated"} + } + return + } + + // Deal with raw values. + if fieldType == rawValueType { + var t tagAndLength + t, offset, err = parseTagAndLength(bytes, offset) + if err != nil { + return + } + if invalidLength(offset, t.length, len(bytes)) { + err = SyntaxError{"data truncated"} + return + } + result := RawValue{t.class, t.tag, t.isCompound, bytes[offset : offset+t.length], bytes[initOffset : offset+t.length]} + offset += t.length + v.Set(reflect.ValueOf(result)) + return + } + + // Deal with the ANY type. + if ifaceType := fieldType; ifaceType.Kind() == reflect.Interface && ifaceType.NumMethod() == 0 { + var t tagAndLength + t, offset, err = parseTagAndLength(bytes, offset) + if err != nil { + return + } + if invalidLength(offset, t.length, len(bytes)) { + err = SyntaxError{"data truncated"} + return + } + var result interface{} + if !t.isCompound && t.class == classUniversal { + innerBytes := bytes[offset : offset+t.length] + switch t.tag { + case tagPrintableString: + result, err = parsePrintableString(innerBytes) + case tagIA5String: + result, err = parseIA5String(innerBytes) + case tagT61String: + result, err = parseT61String(innerBytes) + case tagUTF8String: + result, err = parseUTF8String(innerBytes) + case tagInteger: + result, err = parseInt64(innerBytes) + case tagBitString: + result, err = parseBitString(innerBytes) + case tagOID: + result, err = parseObjectIdentifier(innerBytes) + case tagUTCTime: + result, err = parseUTCTime(innerBytes) + case tagOctetString: + result = innerBytes + default: + // If we don't know how to handle the type, we just leave Value as nil. + } + } + offset += t.length + if err != nil { + return + } + if result != nil { + v.Set(reflect.ValueOf(result)) + } + return + } + universalTag, compoundType, ok1 := getUniversalType(fieldType) + if !ok1 { + err = StructuralError{fmt.Sprintf("unknown Go type: %v", fieldType)} + return + } + + t, offset, err := parseTagAndLength(bytes, offset) + if err != nil { + return + } + if params.explicit { + expectedClass := classContextSpecific + if params.application { + expectedClass = classApplication + } + if t.class == expectedClass && t.tag == *params.tag && (t.length == 0 || t.isCompound) { + if t.length > 0 { + t, offset, err = parseTagAndLength(bytes, offset) + if err != nil { + return + } + } else { + if fieldType != flagType { + err = StructuralError{"Zero length explicit tag was not an asn1.Flag"} + return + } + v.SetBool(true) + return + } + } else { + // The tags didn't match, it might be an optional element. + ok := setDefaultValue(v, params) + if ok { + offset = initOffset + } else { + err = StructuralError{"explicitly tagged member didn't match"} + } + return + } + } + + // Special case for strings: all the ASN.1 string types map to the Go + // type string. getUniversalType returns the tag for PrintableString + // when it sees a string, so if we see a different string type on the + // wire, we change the universal type to match. + if universalTag == tagPrintableString { + switch t.tag { + case tagIA5String, tagGeneralString, tagT61String, tagUTF8String: + universalTag = t.tag + } + } + + // Special case for time: UTCTime and GeneralizedTime both map to the + // Go type time.Time. + if universalTag == tagUTCTime && t.tag == tagGeneralizedTime { + universalTag = tagGeneralizedTime + } + + expectedClass := classUniversal + expectedTag := universalTag + + if !params.explicit && params.tag != nil { + expectedClass = classContextSpecific + expectedTag = *params.tag + } + + if !params.explicit && params.application && params.tag != nil { + expectedClass = classApplication + expectedTag = *params.tag + } + + // We have unwrapped any explicit tagging at this point. + if t.class != expectedClass || t.tag != expectedTag || t.isCompound != compoundType { + // Tags don't match. Again, it could be an optional element. + ok := setDefaultValue(v, params) + if ok { + offset = initOffset + } else { + err = StructuralError{fmt.Sprintf("tags don't match (%d vs %+v) %+v %s @%d", expectedTag, t, params, fieldType.Name(), offset)} + } + return + } + if invalidLength(offset, t.length, len(bytes)) { + err = SyntaxError{"data truncated"} + return + } + innerBytes := bytes[offset : offset+t.length] + offset += t.length + + // We deal with the structures defined in this package first. + switch fieldType { + case objectIdentifierType: + newSlice, err1 := parseObjectIdentifier(innerBytes) + v.Set(reflect.MakeSlice(v.Type(), len(newSlice), len(newSlice))) + if err1 == nil { + reflect.Copy(v, reflect.ValueOf(newSlice)) + } + err = err1 + return + case bitStringType: + bs, err1 := parseBitString(innerBytes) + if err1 == nil { + v.Set(reflect.ValueOf(bs)) + } + err = err1 + return + case timeType: + var time *time.Time + var err1 error + if universalTag == tagUTCTime { + time, err1 = parseUTCTime(innerBytes) + } else { + time, err1 = parseGeneralizedTime(innerBytes) + } + if err1 == nil { + v.Set(reflect.ValueOf(time)) + } + err = err1 + return + case enumeratedType: + parsedInt, err1 := parseInt(innerBytes) + if err1 == nil { + v.SetInt(int64(parsedInt)) + } + err = err1 + return + case flagType: + v.SetBool(true) + return + case bigIntType: + parsedInt := parseBigInt(innerBytes) + v.Set(reflect.ValueOf(parsedInt)) + return + } + switch val := v; val.Kind() { + case reflect.Bool: + parsedBool, err1 := parseBool(innerBytes) + if err1 == nil { + val.SetBool(parsedBool) + } + err = err1 + return + case reflect.Int, reflect.Int32: + parsedInt, err1 := parseInt(innerBytes) + if err1 == nil { + val.SetInt(int64(parsedInt)) + } + err = err1 + return + case reflect.Int64: + parsedInt, err1 := parseInt64(innerBytes) + if err1 == nil { + val.SetInt(parsedInt) + } + err = err1 + return + // TODO(dfc) Add support for the remaining integer types + case reflect.Struct: + structType := fieldType + + if structType.NumField() > 0 && + structType.Field(0).Type == rawContentsType { + bytes := bytes[initOffset:offset] + val.Field(0).Set(reflect.ValueOf(RawContent(bytes))) + } + + innerOffset := 0 + for i := 0; i < structType.NumField(); i++ { + field := structType.Field(i) + if i == 0 && field.Type == rawContentsType { + continue + } + innerOffset, err = parseField(val.Field(i), innerBytes, innerOffset, parseFieldParameters(field.Tag.Get("asn1"))) + if err != nil { + return + } + } + // We allow extra bytes at the end of the SEQUENCE because + // adding elements to the end has been used in X.509 as the + // version numbers have increased. + return + case reflect.Slice: + sliceType := fieldType + if sliceType.Elem().Kind() == reflect.Uint8 { + val.Set(reflect.MakeSlice(sliceType, len(innerBytes), len(innerBytes))) + reflect.Copy(val, reflect.ValueOf(innerBytes)) + return + } + newSlice, err1 := parseSequenceOf(innerBytes, sliceType, sliceType.Elem()) + if err1 == nil { + val.Set(newSlice) + } + err = err1 + return + case reflect.String: + var v string + switch universalTag { + case tagPrintableString: + v, err = parsePrintableString(innerBytes) + case tagIA5String: + v, err = parseIA5String(innerBytes) + case tagT61String: + v, err = parseT61String(innerBytes) + case tagUTF8String: + v, err = parseUTF8String(innerBytes) + case tagGeneralString: + // GeneralString is specified in ISO-2022/ECMA-35, + // A brief review suggests that it includes structures + // that allow the encoding to change midstring and + // such. We give up and pass it as an 8-bit string. + v, err = parseT61String(innerBytes) + default: + err = SyntaxError{fmt.Sprintf("internal error: unknown string type %d", universalTag)} + } + if err == nil { + val.SetString(v) + } + return + } + err = StructuralError{"unsupported: " + v.Type().String()} + return +} + +// setDefaultValue is used to install a default value, from a tag string, into +// a Value. It is successful is the field was optional, even if a default value +// wasn't provided or it failed to install it into the Value. +func setDefaultValue(v reflect.Value, params fieldParameters) (ok bool) { + if !params.optional { + return + } + ok = true + if params.defaultValue == nil { + return + } + switch val := v; val.Kind() { + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + val.SetInt(*params.defaultValue) + } + return +} + +// Unmarshal parses the DER-encoded ASN.1 data structure b +// and uses the reflect package to fill in an arbitrary value pointed at by val. +// Because Unmarshal uses the reflect package, the structs +// being written to must use upper case field names. +// +// An ASN.1 INTEGER can be written to an int, int32 or int64. +// If the encoded value does not fit in the Go type, +// Unmarshal returns a parse error. +// +// An ASN.1 BIT STRING can be written to a BitString. +// +// An ASN.1 OCTET STRING can be written to a []byte. +// +// An ASN.1 OBJECT IDENTIFIER can be written to an +// ObjectIdentifier. +// +// An ASN.1 ENUMERATED can be written to an Enumerated. +// +// An ASN.1 UTCTIME or GENERALIZEDTIME can be written to a *time.Time. +// +// An ASN.1 PrintableString or IA5String can be written to a string. +// +// Any of the above ASN.1 values can be written to an interface{}. +// The value stored in the interface has the corresponding Go type. +// For integers, that type is int64. +// +// An ASN.1 SEQUENCE OF x or SET OF x can be written +// to a slice if an x can be written to the slice's element type. +// +// An ASN.1 SEQUENCE or SET can be written to a struct +// if each of the elements in the sequence can be +// written to the corresponding element in the struct. +// +// The following tags on struct fields have special meaning to Unmarshal: +// +// optional marks the field as ASN.1 OPTIONAL +// [explicit] tag:x specifies the ASN.1 tag number; implies ASN.1 CONTEXT SPECIFIC +// default:x sets the default value for optional integer fields +// +// If the type of the first field of a structure is RawContent then the raw +// ASN1 contents of the struct will be stored in it. +// +// Other ASN.1 types are not supported; if it encounters them, +// Unmarshal returns a parse error. +func Unmarshal(b []byte, val interface{}) (rest []byte, err error) { + return UnmarshalWithParams(b, val, "") +} + +// UnmarshalWithParams allows field parameters to be specified for the +// top-level element. The form of the params is the same as the field tags. +func UnmarshalWithParams(b []byte, val interface{}, params string) (rest []byte, err error) { + v := reflect.ValueOf(val).Elem() + offset, err := parseField(v, b, 0, parseFieldParameters(params)) + if err != nil { + return nil, err + } + return b[offset:], nil +} diff --git a/libgo/go/encoding/asn1/asn1_test.go b/libgo/go/encoding/asn1/asn1_test.go new file mode 100644 index 0000000..1c529bd --- /dev/null +++ b/libgo/go/encoding/asn1/asn1_test.go @@ -0,0 +1,689 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package asn1 + +import ( + "bytes" + "reflect" + "testing" + "time" +) + +type int64Test struct { + in []byte + ok bool + out int64 +} + +var int64TestData = []int64Test{ + {[]byte{0x00}, true, 0}, + {[]byte{0x7f}, true, 127}, + {[]byte{0x00, 0x80}, true, 128}, + {[]byte{0x01, 0x00}, true, 256}, + {[]byte{0x80}, true, -128}, + {[]byte{0xff, 0x7f}, true, -129}, + {[]byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, true, -1}, + {[]byte{0xff}, true, -1}, + {[]byte{0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, true, -9223372036854775808}, + {[]byte{0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, false, 0}, +} + +func TestParseInt64(t *testing.T) { + for i, test := range int64TestData { + ret, err := parseInt64(test.in) + if (err == nil) != test.ok { + t.Errorf("#%d: Incorrect error result (did fail? %v, expected: %v)", i, err == nil, test.ok) + } + if test.ok && ret != test.out { + t.Errorf("#%d: Bad result: %v (expected %v)", i, ret, test.out) + } + } +} + +type int32Test struct { + in []byte + ok bool + out int32 +} + +var int32TestData = []int32Test{ + {[]byte{0x00}, true, 0}, + {[]byte{0x7f}, true, 127}, + {[]byte{0x00, 0x80}, true, 128}, + {[]byte{0x01, 0x00}, true, 256}, + {[]byte{0x80}, true, -128}, + {[]byte{0xff, 0x7f}, true, -129}, + {[]byte{0xff, 0xff, 0xff, 0xff}, true, -1}, + {[]byte{0xff}, true, -1}, + {[]byte{0x80, 0x00, 0x00, 0x00}, true, -2147483648}, + {[]byte{0x80, 0x00, 0x00, 0x00, 0x00}, false, 0}, +} + +func TestParseInt32(t *testing.T) { + for i, test := range int32TestData { + ret, err := parseInt(test.in) + if (err == nil) != test.ok { + t.Errorf("#%d: Incorrect error result (did fail? %v, expected: %v)", i, err == nil, test.ok) + } + if test.ok && int32(ret) != test.out { + t.Errorf("#%d: Bad result: %v (expected %v)", i, ret, test.out) + } + } +} + +var bigIntTests = []struct { + in []byte + base10 string +}{ + {[]byte{0xff}, "-1"}, + {[]byte{0x00}, "0"}, + {[]byte{0x01}, "1"}, + {[]byte{0x00, 0xff}, "255"}, + {[]byte{0xff, 0x00}, "-256"}, + {[]byte{0x01, 0x00}, "256"}, +} + +func TestParseBigInt(t *testing.T) { + for i, test := range bigIntTests { + ret := parseBigInt(test.in) + if ret.String() != test.base10 { + t.Errorf("#%d: bad result from %x, got %s want %s", i, test.in, ret.String(), test.base10) + } + fw := newForkableWriter() + marshalBigInt(fw, ret) + result := fw.Bytes() + if !bytes.Equal(result, test.in) { + t.Errorf("#%d: got %x from marshaling %s, want %x", i, result, ret, test.in) + } + } +} + +type bitStringTest struct { + in []byte + ok bool + out []byte + bitLength int +} + +var bitStringTestData = []bitStringTest{ + {[]byte{}, false, []byte{}, 0}, + {[]byte{0x00}, true, []byte{}, 0}, + {[]byte{0x07, 0x00}, true, []byte{0x00}, 1}, + {[]byte{0x07, 0x01}, false, []byte{}, 0}, + {[]byte{0x07, 0x40}, false, []byte{}, 0}, + {[]byte{0x08, 0x00}, false, []byte{}, 0}, +} + +func TestBitString(t *testing.T) { + for i, test := range bitStringTestData { + ret, err := parseBitString(test.in) + if (err == nil) != test.ok { + t.Errorf("#%d: Incorrect error result (did fail? %v, expected: %v)", i, err == nil, test.ok) + } + if err == nil { + if test.bitLength != ret.BitLength || bytes.Compare(ret.Bytes, test.out) != 0 { + t.Errorf("#%d: Bad result: %v (expected %v %v)", i, ret, test.out, test.bitLength) + } + } + } +} + +func TestBitStringAt(t *testing.T) { + bs := BitString{[]byte{0x82, 0x40}, 16} + if bs.At(0) != 1 { + t.Error("#1: Failed") + } + if bs.At(1) != 0 { + t.Error("#2: Failed") + } + if bs.At(6) != 1 { + t.Error("#3: Failed") + } + if bs.At(9) != 1 { + t.Error("#4: Failed") + } +} + +type bitStringRightAlignTest struct { + in []byte + inlen int + out []byte +} + +var bitStringRightAlignTests = []bitStringRightAlignTest{ + {[]byte{0x80}, 1, []byte{0x01}}, + {[]byte{0x80, 0x80}, 9, []byte{0x01, 0x01}}, + {[]byte{}, 0, []byte{}}, + {[]byte{0xce}, 8, []byte{0xce}}, + {[]byte{0xce, 0x47}, 16, []byte{0xce, 0x47}}, + {[]byte{0x34, 0x50}, 12, []byte{0x03, 0x45}}, +} + +func TestBitStringRightAlign(t *testing.T) { + for i, test := range bitStringRightAlignTests { + bs := BitString{test.in, test.inlen} + out := bs.RightAlign() + if bytes.Compare(out, test.out) != 0 { + t.Errorf("#%d got: %x want: %x", i, out, test.out) + } + } +} + +type objectIdentifierTest struct { + in []byte + ok bool + out []int +} + +var objectIdentifierTestData = []objectIdentifierTest{ + {[]byte{}, false, []int{}}, + {[]byte{85}, true, []int{2, 5}}, + {[]byte{85, 0x02}, true, []int{2, 5, 2}}, + {[]byte{85, 0x02, 0xc0, 0x00}, true, []int{2, 5, 2, 0x2000}}, + {[]byte{85, 0x02, 0xc0, 0x80, 0x80, 0x80, 0x80}, false, []int{}}, +} + +func TestObjectIdentifier(t *testing.T) { + for i, test := range objectIdentifierTestData { + ret, err := parseObjectIdentifier(test.in) + if (err == nil) != test.ok { + t.Errorf("#%d: Incorrect error result (did fail? %v, expected: %v)", i, err == nil, test.ok) + } + if err == nil { + if !reflect.DeepEqual(test.out, ret) { + t.Errorf("#%d: Bad result: %v (expected %v)", i, ret, test.out) + } + } + } +} + +type timeTest struct { + in string + ok bool + out *time.Time +} + +var utcTestData = []timeTest{ + {"910506164540-0700", true, &time.Time{1991, 05, 06, 16, 45, 40, 0, -7 * 60 * 60, ""}}, + {"910506164540+0730", true, &time.Time{1991, 05, 06, 16, 45, 40, 0, 7*60*60 + 30*60, ""}}, + {"910506234540Z", true, &time.Time{1991, 05, 06, 23, 45, 40, 0, 0, "UTC"}}, + {"9105062345Z", true, &time.Time{1991, 05, 06, 23, 45, 0, 0, 0, "UTC"}}, + {"a10506234540Z", false, nil}, + {"91a506234540Z", false, nil}, + {"9105a6234540Z", false, nil}, + {"910506a34540Z", false, nil}, + {"910506334a40Z", false, nil}, + {"91050633444aZ", false, nil}, + {"910506334461Z", false, nil}, + {"910506334400Za", false, nil}, +} + +func TestUTCTime(t *testing.T) { + for i, test := range utcTestData { + ret, err := parseUTCTime([]byte(test.in)) + if (err == nil) != test.ok { + t.Errorf("#%d: Incorrect error result (did fail? %v, expected: %v)", i, err == nil, test.ok) + } + if err == nil { + if !reflect.DeepEqual(test.out, ret) { + t.Errorf("#%d: Bad result: %v (expected %v)", i, ret, test.out) + } + } + } +} + +var generalizedTimeTestData = []timeTest{ + {"20100102030405Z", true, &time.Time{2010, 01, 02, 03, 04, 05, 0, 0, "UTC"}}, + {"20100102030405", false, nil}, + {"20100102030405+0607", true, &time.Time{2010, 01, 02, 03, 04, 05, 0, 6*60*60 + 7*60, ""}}, + {"20100102030405-0607", true, &time.Time{2010, 01, 02, 03, 04, 05, 0, -6*60*60 - 7*60, ""}}, +} + +func TestGeneralizedTime(t *testing.T) { + for i, test := range generalizedTimeTestData { + ret, err := parseGeneralizedTime([]byte(test.in)) + if (err == nil) != test.ok { + t.Errorf("#%d: Incorrect error result (did fail? %v, expected: %v)", i, err == nil, test.ok) + } + if err == nil { + if !reflect.DeepEqual(test.out, ret) { + t.Errorf("#%d: Bad result: %v (expected %v)", i, ret, test.out) + } + } + } +} + +type tagAndLengthTest struct { + in []byte + ok bool + out tagAndLength +} + +var tagAndLengthData = []tagAndLengthTest{ + {[]byte{0x80, 0x01}, true, tagAndLength{2, 0, 1, false}}, + {[]byte{0xa0, 0x01}, true, tagAndLength{2, 0, 1, true}}, + {[]byte{0x02, 0x00}, true, tagAndLength{0, 2, 0, false}}, + {[]byte{0xfe, 0x00}, true, tagAndLength{3, 30, 0, true}}, + {[]byte{0x1f, 0x01, 0x00}, true, tagAndLength{0, 1, 0, false}}, + {[]byte{0x1f, 0x81, 0x00, 0x00}, true, tagAndLength{0, 128, 0, false}}, + {[]byte{0x1f, 0x81, 0x80, 0x01, 0x00}, true, tagAndLength{0, 0x4001, 0, false}}, + {[]byte{0x00, 0x81, 0x01}, true, tagAndLength{0, 0, 1, false}}, + {[]byte{0x00, 0x82, 0x01, 0x00}, true, tagAndLength{0, 0, 256, false}}, + {[]byte{0x00, 0x83, 0x01, 0x00}, false, tagAndLength{}}, + {[]byte{0x1f, 0x85}, false, tagAndLength{}}, + {[]byte{0x30, 0x80}, false, tagAndLength{}}, +} + +func TestParseTagAndLength(t *testing.T) { + for i, test := range tagAndLengthData { + tagAndLength, _, err := parseTagAndLength(test.in, 0) + if (err == nil) != test.ok { + t.Errorf("#%d: Incorrect error result (did pass? %v, expected: %v)", i, err == nil, test.ok) + } + if err == nil && !reflect.DeepEqual(test.out, tagAndLength) { + t.Errorf("#%d: Bad result: %v (expected %v)", i, tagAndLength, test.out) + } + } +} + +type parseFieldParametersTest struct { + in string + out fieldParameters +} + +func newInt(n int) *int { return &n } + +func newInt64(n int64) *int64 { return &n } + +func newString(s string) *string { return &s } + +func newBool(b bool) *bool { return &b } + +var parseFieldParametersTestData []parseFieldParametersTest = []parseFieldParametersTest{ + {"", fieldParameters{}}, + {"ia5", fieldParameters{stringType: tagIA5String}}, + {"printable", fieldParameters{stringType: tagPrintableString}}, + {"optional", fieldParameters{optional: true}}, + {"explicit", fieldParameters{explicit: true, tag: new(int)}}, + {"application", fieldParameters{application: true, tag: new(int)}}, + {"optional,explicit", fieldParameters{optional: true, explicit: true, tag: new(int)}}, + {"default:42", fieldParameters{defaultValue: newInt64(42)}}, + {"tag:17", fieldParameters{tag: newInt(17)}}, + {"optional,explicit,default:42,tag:17", fieldParameters{optional: true, explicit: true, defaultValue: newInt64(42), tag: newInt(17)}}, + {"optional,explicit,default:42,tag:17,rubbish1", fieldParameters{true, true, false, newInt64(42), newInt(17), 0, false}}, + {"set", fieldParameters{set: true}}, +} + +func TestParseFieldParameters(t *testing.T) { + for i, test := range parseFieldParametersTestData { + f := parseFieldParameters(test.in) + if !reflect.DeepEqual(f, test.out) { + t.Errorf("#%d: Bad result: %v (expected %v)", i, f, test.out) + } + } +} + +type TestObjectIdentifierStruct struct { + OID ObjectIdentifier +} + +type TestContextSpecificTags struct { + A int `asn1:"tag:1"` +} + +type TestContextSpecificTags2 struct { + A int `asn1:"explicit,tag:1"` + B int +} + +type TestElementsAfterString struct { + S string + A, B int +} + +var unmarshalTestData = []struct { + in []byte + out interface{} +}{ + {[]byte{0x02, 0x01, 0x42}, newInt(0x42)}, + {[]byte{0x30, 0x08, 0x06, 0x06, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d}, &TestObjectIdentifierStruct{[]int{1, 2, 840, 113549}}}, + {[]byte{0x03, 0x04, 0x06, 0x6e, 0x5d, 0xc0}, &BitString{[]byte{110, 93, 192}, 18}}, + {[]byte{0x30, 0x09, 0x02, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x01, 0x03}, &[]int{1, 2, 3}}, + {[]byte{0x02, 0x01, 0x10}, newInt(16)}, + {[]byte{0x13, 0x04, 't', 'e', 's', 't'}, newString("test")}, + {[]byte{0x16, 0x04, 't', 'e', 's', 't'}, newString("test")}, + {[]byte{0x16, 0x04, 't', 'e', 's', 't'}, &RawValue{0, 22, false, []byte("test"), []byte("\x16\x04test")}}, + {[]byte{0x04, 0x04, 1, 2, 3, 4}, &RawValue{0, 4, false, []byte{1, 2, 3, 4}, []byte{4, 4, 1, 2, 3, 4}}}, + {[]byte{0x30, 0x03, 0x81, 0x01, 0x01}, &TestContextSpecificTags{1}}, + {[]byte{0x30, 0x08, 0xa1, 0x03, 0x02, 0x01, 0x01, 0x02, 0x01, 0x02}, &TestContextSpecificTags2{1, 2}}, + {[]byte{0x01, 0x01, 0x00}, newBool(false)}, + {[]byte{0x01, 0x01, 0x01}, newBool(true)}, + {[]byte{0x30, 0x0b, 0x13, 0x03, 0x66, 0x6f, 0x6f, 0x02, 0x01, 0x22, 0x02, 0x01, 0x33}, &TestElementsAfterString{"foo", 0x22, 0x33}}, +} + +func TestUnmarshal(t *testing.T) { + for i, test := range unmarshalTestData { + pv := reflect.New(reflect.TypeOf(test.out).Elem()) + val := pv.Interface() + _, err := Unmarshal(test.in, val) + if err != nil { + t.Errorf("Unmarshal failed at index %d %v", i, err) + } + if !reflect.DeepEqual(val, test.out) { + t.Errorf("#%d:\nhave %#v\nwant %#v", i, val, test.out) + } + } +} + +type Certificate struct { + TBSCertificate TBSCertificate + SignatureAlgorithm AlgorithmIdentifier + SignatureValue BitString +} + +type TBSCertificate struct { + Version int `asn1:"optional,explicit,default:0,tag:0"` + SerialNumber RawValue + SignatureAlgorithm AlgorithmIdentifier + Issuer RDNSequence + Validity Validity + Subject RDNSequence + PublicKey PublicKeyInfo +} + +type AlgorithmIdentifier struct { + Algorithm ObjectIdentifier +} + +type RDNSequence []RelativeDistinguishedNameSET + +type RelativeDistinguishedNameSET []AttributeTypeAndValue + +type AttributeTypeAndValue struct { + Type ObjectIdentifier + Value interface{} +} + +type Validity struct { + NotBefore, NotAfter *time.Time +} + +type PublicKeyInfo struct { + Algorithm AlgorithmIdentifier + PublicKey BitString +} + +func TestCertificate(t *testing.T) { + // This is a minimal, self-signed certificate that should parse correctly. + var cert Certificate + if _, err := Unmarshal(derEncodedSelfSignedCertBytes, &cert); err != nil { + t.Errorf("Unmarshal failed: %v", err) + } + if !reflect.DeepEqual(cert, derEncodedSelfSignedCert) { + t.Errorf("Bad result:\ngot: %+v\nwant: %+v", cert, derEncodedSelfSignedCert) + } +} + +func TestCertificateWithNUL(t *testing.T) { + // This is the paypal NUL-hack certificate. It should fail to parse because + // NUL isn't a permitted character in a PrintableString. + + var cert Certificate + if _, err := Unmarshal(derEncodedPaypalNULCertBytes, &cert); err == nil { + t.Error("Unmarshal succeeded, should not have") + } +} + +type rawStructTest struct { + Raw RawContent + A int +} + +func TestRawStructs(t *testing.T) { + var s rawStructTest + input := []byte{0x30, 0x03, 0x02, 0x01, 0x50} + + rest, err := Unmarshal(input, &s) + if len(rest) != 0 { + t.Errorf("incomplete parse: %x", rest) + return + } + if err != nil { + t.Error(err) + return + } + if s.A != 0x50 { + t.Errorf("bad value for A: got %d want %d", s.A, 0x50) + } + if bytes.Compare([]byte(s.Raw), input) != 0 { + t.Errorf("bad value for Raw: got %x want %x", s.Raw, input) + } +} + +var derEncodedSelfSignedCert = Certificate{ + TBSCertificate: TBSCertificate{ + Version: 0, + SerialNumber: RawValue{Class: 0, Tag: 2, IsCompound: false, Bytes: []uint8{0x0, 0x8c, 0xc3, 0x37, 0x92, 0x10, 0xec, 0x2c, 0x98}, FullBytes: []byte{2, 9, 0x0, 0x8c, 0xc3, 0x37, 0x92, 0x10, 0xec, 0x2c, 0x98}}, + SignatureAlgorithm: AlgorithmIdentifier{Algorithm: ObjectIdentifier{1, 2, 840, 113549, 1, 1, 5}}, + Issuer: RDNSequence{ + RelativeDistinguishedNameSET{AttributeTypeAndValue{Type: ObjectIdentifier{2, 5, 4, 6}, Value: "XX"}}, + RelativeDistinguishedNameSET{AttributeTypeAndValue{Type: ObjectIdentifier{2, 5, 4, 8}, Value: "Some-State"}}, + RelativeDistinguishedNameSET{AttributeTypeAndValue{Type: ObjectIdentifier{2, 5, 4, 7}, Value: "City"}}, + RelativeDistinguishedNameSET{AttributeTypeAndValue{Type: ObjectIdentifier{2, 5, 4, 10}, Value: "Internet Widgits Pty Ltd"}}, + RelativeDistinguishedNameSET{AttributeTypeAndValue{Type: ObjectIdentifier{2, 5, 4, 3}, Value: "false.example.com"}}, + RelativeDistinguishedNameSET{AttributeTypeAndValue{Type: ObjectIdentifier{1, 2, 840, 113549, 1, 9, 1}, Value: "false@example.com"}}, + }, + Validity: Validity{NotBefore: &time.Time{Year: 2009, Month: 10, Day: 8, Hour: 0, Minute: 25, Second: 53, ZoneOffset: 0, Zone: "UTC"}, NotAfter: &time.Time{Year: 2010, Month: 10, Day: 8, Hour: 0, Minute: 25, Second: 53, ZoneOffset: 0, Zone: "UTC"}}, + Subject: RDNSequence{ + RelativeDistinguishedNameSET{AttributeTypeAndValue{Type: ObjectIdentifier{2, 5, 4, 6}, Value: "XX"}}, + RelativeDistinguishedNameSET{AttributeTypeAndValue{Type: ObjectIdentifier{2, 5, 4, 8}, Value: "Some-State"}}, + RelativeDistinguishedNameSET{AttributeTypeAndValue{Type: ObjectIdentifier{2, 5, 4, 7}, Value: "City"}}, + RelativeDistinguishedNameSET{AttributeTypeAndValue{Type: ObjectIdentifier{2, 5, 4, 10}, Value: "Internet Widgits Pty Ltd"}}, + RelativeDistinguishedNameSET{AttributeTypeAndValue{Type: ObjectIdentifier{2, 5, 4, 3}, Value: "false.example.com"}}, + RelativeDistinguishedNameSET{AttributeTypeAndValue{Type: ObjectIdentifier{1, 2, 840, 113549, 1, 9, 1}, Value: "false@example.com"}}, + }, + PublicKey: PublicKeyInfo{ + Algorithm: AlgorithmIdentifier{Algorithm: ObjectIdentifier{1, 2, 840, 113549, 1, 1, 1}}, + PublicKey: BitString{ + Bytes: []uint8{ + 0x30, 0x48, 0x2, 0x41, 0x0, 0xcd, 0xb7, + 0x63, 0x9c, 0x32, 0x78, 0xf0, 0x6, 0xaa, 0x27, 0x7f, 0x6e, 0xaf, 0x42, + 0x90, 0x2b, 0x59, 0x2d, 0x8c, 0xbc, 0xbe, 0x38, 0xa1, 0xc9, 0x2b, 0xa4, + 0x69, 0x5a, 0x33, 0x1b, 0x1d, 0xea, 0xde, 0xad, 0xd8, 0xe9, 0xa5, 0xc2, + 0x7e, 0x8c, 0x4c, 0x2f, 0xd0, 0xa8, 0x88, 0x96, 0x57, 0x72, 0x2a, 0x4f, + 0x2a, 0xf7, 0x58, 0x9c, 0xf2, 0xc7, 0x70, 0x45, 0xdc, 0x8f, 0xde, 0xec, + 0x35, 0x7d, 0x2, 0x3, 0x1, 0x0, 0x1, + }, + BitLength: 592, + }, + }, + }, + SignatureAlgorithm: AlgorithmIdentifier{Algorithm: ObjectIdentifier{1, 2, 840, 113549, 1, 1, 5}}, + SignatureValue: BitString{ + Bytes: []uint8{ + 0xa6, 0x7b, 0x6, 0xec, 0x5e, 0xce, + 0x92, 0x77, 0x2c, 0xa4, 0x13, 0xcb, 0xa3, 0xca, 0x12, 0x56, 0x8f, 0xdc, 0x6c, + 0x7b, 0x45, 0x11, 0xcd, 0x40, 0xa7, 0xf6, 0x59, 0x98, 0x4, 0x2, 0xdf, 0x2b, + 0x99, 0x8b, 0xb9, 0xa4, 0xa8, 0xcb, 0xeb, 0x34, 0xc0, 0xf0, 0xa7, 0x8c, 0xf8, + 0xd9, 0x1e, 0xde, 0x14, 0xa5, 0xed, 0x76, 0xbf, 0x11, 0x6f, 0xe3, 0x60, 0xaa, + 0xfa, 0x88, 0x21, 0x49, 0x4, 0x35, + }, + BitLength: 512, + }, +} + +var derEncodedSelfSignedCertBytes = []byte{ + 0x30, 0x82, 0x02, 0x18, 0x30, + 0x82, 0x01, 0xc2, 0x02, 0x09, 0x00, 0x8c, 0xc3, 0x37, 0x92, 0x10, 0xec, 0x2c, + 0x98, 0x30, 0x0d, 0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, + 0x05, 0x05, 0x00, 0x30, 0x81, 0x92, 0x31, 0x0b, 0x30, 0x09, 0x06, 0x03, 0x55, + 0x04, 0x06, 0x13, 0x02, 0x58, 0x58, 0x31, 0x13, 0x30, 0x11, 0x06, 0x03, 0x55, + 0x04, 0x08, 0x13, 0x0a, 0x53, 0x6f, 0x6d, 0x65, 0x2d, 0x53, 0x74, 0x61, 0x74, + 0x65, 0x31, 0x0d, 0x30, 0x0b, 0x06, 0x03, 0x55, 0x04, 0x07, 0x13, 0x04, 0x43, + 0x69, 0x74, 0x79, 0x31, 0x21, 0x30, 0x1f, 0x06, 0x03, 0x55, 0x04, 0x0a, 0x13, + 0x18, 0x49, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x65, 0x74, 0x20, 0x57, 0x69, 0x64, + 0x67, 0x69, 0x74, 0x73, 0x20, 0x50, 0x74, 0x79, 0x20, 0x4c, 0x74, 0x64, 0x31, + 0x1a, 0x30, 0x18, 0x06, 0x03, 0x55, 0x04, 0x03, 0x13, 0x11, 0x66, 0x61, 0x6c, + 0x73, 0x65, 0x2e, 0x65, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x2e, 0x63, 0x6f, + 0x6d, 0x31, 0x20, 0x30, 0x1e, 0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, + 0x01, 0x09, 0x01, 0x16, 0x11, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x40, 0x65, 0x78, + 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x2e, 0x63, 0x6f, 0x6d, 0x30, 0x1e, 0x17, 0x0d, + 0x30, 0x39, 0x31, 0x30, 0x30, 0x38, 0x30, 0x30, 0x32, 0x35, 0x35, 0x33, 0x5a, + 0x17, 0x0d, 0x31, 0x30, 0x31, 0x30, 0x30, 0x38, 0x30, 0x30, 0x32, 0x35, 0x35, + 0x33, 0x5a, 0x30, 0x81, 0x92, 0x31, 0x0b, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, + 0x06, 0x13, 0x02, 0x58, 0x58, 0x31, 0x13, 0x30, 0x11, 0x06, 0x03, 0x55, 0x04, + 0x08, 0x13, 0x0a, 0x53, 0x6f, 0x6d, 0x65, 0x2d, 0x53, 0x74, 0x61, 0x74, 0x65, + 0x31, 0x0d, 0x30, 0x0b, 0x06, 0x03, 0x55, 0x04, 0x07, 0x13, 0x04, 0x43, 0x69, + 0x74, 0x79, 0x31, 0x21, 0x30, 0x1f, 0x06, 0x03, 0x55, 0x04, 0x0a, 0x13, 0x18, + 0x49, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x65, 0x74, 0x20, 0x57, 0x69, 0x64, 0x67, + 0x69, 0x74, 0x73, 0x20, 0x50, 0x74, 0x79, 0x20, 0x4c, 0x74, 0x64, 0x31, 0x1a, + 0x30, 0x18, 0x06, 0x03, 0x55, 0x04, 0x03, 0x13, 0x11, 0x66, 0x61, 0x6c, 0x73, + 0x65, 0x2e, 0x65, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x2e, 0x63, 0x6f, 0x6d, + 0x31, 0x20, 0x30, 0x1e, 0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, + 0x09, 0x01, 0x16, 0x11, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x40, 0x65, 0x78, 0x61, + 0x6d, 0x70, 0x6c, 0x65, 0x2e, 0x63, 0x6f, 0x6d, 0x30, 0x5c, 0x30, 0x0d, 0x06, + 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x01, 0x05, 0x00, 0x03, + 0x4b, 0x00, 0x30, 0x48, 0x02, 0x41, 0x00, 0xcd, 0xb7, 0x63, 0x9c, 0x32, 0x78, + 0xf0, 0x06, 0xaa, 0x27, 0x7f, 0x6e, 0xaf, 0x42, 0x90, 0x2b, 0x59, 0x2d, 0x8c, + 0xbc, 0xbe, 0x38, 0xa1, 0xc9, 0x2b, 0xa4, 0x69, 0x5a, 0x33, 0x1b, 0x1d, 0xea, + 0xde, 0xad, 0xd8, 0xe9, 0xa5, 0xc2, 0x7e, 0x8c, 0x4c, 0x2f, 0xd0, 0xa8, 0x88, + 0x96, 0x57, 0x72, 0x2a, 0x4f, 0x2a, 0xf7, 0x58, 0x9c, 0xf2, 0xc7, 0x70, 0x45, + 0xdc, 0x8f, 0xde, 0xec, 0x35, 0x7d, 0x02, 0x03, 0x01, 0x00, 0x01, 0x30, 0x0d, + 0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x05, 0x05, 0x00, + 0x03, 0x41, 0x00, 0xa6, 0x7b, 0x06, 0xec, 0x5e, 0xce, 0x92, 0x77, 0x2c, 0xa4, + 0x13, 0xcb, 0xa3, 0xca, 0x12, 0x56, 0x8f, 0xdc, 0x6c, 0x7b, 0x45, 0x11, 0xcd, + 0x40, 0xa7, 0xf6, 0x59, 0x98, 0x04, 0x02, 0xdf, 0x2b, 0x99, 0x8b, 0xb9, 0xa4, + 0xa8, 0xcb, 0xeb, 0x34, 0xc0, 0xf0, 0xa7, 0x8c, 0xf8, 0xd9, 0x1e, 0xde, 0x14, + 0xa5, 0xed, 0x76, 0xbf, 0x11, 0x6f, 0xe3, 0x60, 0xaa, 0xfa, 0x88, 0x21, 0x49, + 0x04, 0x35, +} + +var derEncodedPaypalNULCertBytes = []byte{ + 0x30, 0x82, 0x06, 0x44, 0x30, + 0x82, 0x05, 0xad, 0xa0, 0x03, 0x02, 0x01, 0x02, 0x02, 0x03, 0x00, 0xf0, 0x9b, + 0x30, 0x0d, 0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x05, + 0x05, 0x00, 0x30, 0x82, 0x01, 0x12, 0x31, 0x0b, 0x30, 0x09, 0x06, 0x03, 0x55, + 0x04, 0x06, 0x13, 0x02, 0x45, 0x53, 0x31, 0x12, 0x30, 0x10, 0x06, 0x03, 0x55, + 0x04, 0x08, 0x13, 0x09, 0x42, 0x61, 0x72, 0x63, 0x65, 0x6c, 0x6f, 0x6e, 0x61, + 0x31, 0x12, 0x30, 0x10, 0x06, 0x03, 0x55, 0x04, 0x07, 0x13, 0x09, 0x42, 0x61, + 0x72, 0x63, 0x65, 0x6c, 0x6f, 0x6e, 0x61, 0x31, 0x29, 0x30, 0x27, 0x06, 0x03, + 0x55, 0x04, 0x0a, 0x13, 0x20, 0x49, 0x50, 0x53, 0x20, 0x43, 0x65, 0x72, 0x74, + 0x69, 0x66, 0x69, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x41, 0x75, 0x74, + 0x68, 0x6f, 0x72, 0x69, 0x74, 0x79, 0x20, 0x73, 0x2e, 0x6c, 0x2e, 0x31, 0x2e, + 0x30, 0x2c, 0x06, 0x03, 0x55, 0x04, 0x0a, 0x14, 0x25, 0x67, 0x65, 0x6e, 0x65, + 0x72, 0x61, 0x6c, 0x40, 0x69, 0x70, 0x73, 0x63, 0x61, 0x2e, 0x63, 0x6f, 0x6d, + 0x20, 0x43, 0x2e, 0x49, 0x2e, 0x46, 0x2e, 0x20, 0x20, 0x42, 0x2d, 0x42, 0x36, + 0x32, 0x32, 0x31, 0x30, 0x36, 0x39, 0x35, 0x31, 0x2e, 0x30, 0x2c, 0x06, 0x03, + 0x55, 0x04, 0x0b, 0x13, 0x25, 0x69, 0x70, 0x73, 0x43, 0x41, 0x20, 0x43, 0x4c, + 0x41, 0x53, 0x45, 0x41, 0x31, 0x20, 0x43, 0x65, 0x72, 0x74, 0x69, 0x66, 0x69, + 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x41, 0x75, 0x74, 0x68, 0x6f, 0x72, + 0x69, 0x74, 0x79, 0x31, 0x2e, 0x30, 0x2c, 0x06, 0x03, 0x55, 0x04, 0x03, 0x13, + 0x25, 0x69, 0x70, 0x73, 0x43, 0x41, 0x20, 0x43, 0x4c, 0x41, 0x53, 0x45, 0x41, + 0x31, 0x20, 0x43, 0x65, 0x72, 0x74, 0x69, 0x66, 0x69, 0x63, 0x61, 0x74, 0x69, + 0x6f, 0x6e, 0x20, 0x41, 0x75, 0x74, 0x68, 0x6f, 0x72, 0x69, 0x74, 0x79, 0x31, + 0x20, 0x30, 0x1e, 0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x09, + 0x01, 0x16, 0x11, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x6c, 0x40, 0x69, 0x70, + 0x73, 0x63, 0x61, 0x2e, 0x63, 0x6f, 0x6d, 0x30, 0x1e, 0x17, 0x0d, 0x30, 0x39, + 0x30, 0x32, 0x32, 0x34, 0x32, 0x33, 0x30, 0x34, 0x31, 0x37, 0x5a, 0x17, 0x0d, + 0x31, 0x31, 0x30, 0x32, 0x32, 0x34, 0x32, 0x33, 0x30, 0x34, 0x31, 0x37, 0x5a, + 0x30, 0x81, 0x94, 0x31, 0x0b, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, + 0x02, 0x55, 0x53, 0x31, 0x13, 0x30, 0x11, 0x06, 0x03, 0x55, 0x04, 0x08, 0x13, + 0x0a, 0x43, 0x61, 0x6c, 0x69, 0x66, 0x6f, 0x72, 0x6e, 0x69, 0x61, 0x31, 0x16, + 0x30, 0x14, 0x06, 0x03, 0x55, 0x04, 0x07, 0x13, 0x0d, 0x53, 0x61, 0x6e, 0x20, + 0x46, 0x72, 0x61, 0x6e, 0x63, 0x69, 0x73, 0x63, 0x6f, 0x31, 0x11, 0x30, 0x0f, + 0x06, 0x03, 0x55, 0x04, 0x0a, 0x13, 0x08, 0x53, 0x65, 0x63, 0x75, 0x72, 0x69, + 0x74, 0x79, 0x31, 0x14, 0x30, 0x12, 0x06, 0x03, 0x55, 0x04, 0x0b, 0x13, 0x0b, + 0x53, 0x65, 0x63, 0x75, 0x72, 0x65, 0x20, 0x55, 0x6e, 0x69, 0x74, 0x31, 0x2f, + 0x30, 0x2d, 0x06, 0x03, 0x55, 0x04, 0x03, 0x13, 0x26, 0x77, 0x77, 0x77, 0x2e, + 0x70, 0x61, 0x79, 0x70, 0x61, 0x6c, 0x2e, 0x63, 0x6f, 0x6d, 0x00, 0x73, 0x73, + 0x6c, 0x2e, 0x73, 0x65, 0x63, 0x75, 0x72, 0x65, 0x63, 0x6f, 0x6e, 0x6e, 0x65, + 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x63, 0x63, 0x30, 0x81, 0x9f, 0x30, 0x0d, + 0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x01, 0x05, 0x00, + 0x03, 0x81, 0x8d, 0x00, 0x30, 0x81, 0x89, 0x02, 0x81, 0x81, 0x00, 0xd2, 0x69, + 0xfa, 0x6f, 0x3a, 0x00, 0xb4, 0x21, 0x1b, 0xc8, 0xb1, 0x02, 0xd7, 0x3f, 0x19, + 0xb2, 0xc4, 0x6d, 0xb4, 0x54, 0xf8, 0x8b, 0x8a, 0xcc, 0xdb, 0x72, 0xc2, 0x9e, + 0x3c, 0x60, 0xb9, 0xc6, 0x91, 0x3d, 0x82, 0xb7, 0x7d, 0x99, 0xff, 0xd1, 0x29, + 0x84, 0xc1, 0x73, 0x53, 0x9c, 0x82, 0xdd, 0xfc, 0x24, 0x8c, 0x77, 0xd5, 0x41, + 0xf3, 0xe8, 0x1e, 0x42, 0xa1, 0xad, 0x2d, 0x9e, 0xff, 0x5b, 0x10, 0x26, 0xce, + 0x9d, 0x57, 0x17, 0x73, 0x16, 0x23, 0x38, 0xc8, 0xd6, 0xf1, 0xba, 0xa3, 0x96, + 0x5b, 0x16, 0x67, 0x4a, 0x4f, 0x73, 0x97, 0x3a, 0x4d, 0x14, 0xa4, 0xf4, 0xe2, + 0x3f, 0x8b, 0x05, 0x83, 0x42, 0xd1, 0xd0, 0xdc, 0x2f, 0x7a, 0xe5, 0xb6, 0x10, + 0xb2, 0x11, 0xc0, 0xdc, 0x21, 0x2a, 0x90, 0xff, 0xae, 0x97, 0x71, 0x5a, 0x49, + 0x81, 0xac, 0x40, 0xf3, 0x3b, 0xb8, 0x59, 0xb2, 0x4f, 0x02, 0x03, 0x01, 0x00, + 0x01, 0xa3, 0x82, 0x03, 0x21, 0x30, 0x82, 0x03, 0x1d, 0x30, 0x09, 0x06, 0x03, + 0x55, 0x1d, 0x13, 0x04, 0x02, 0x30, 0x00, 0x30, 0x11, 0x06, 0x09, 0x60, 0x86, + 0x48, 0x01, 0x86, 0xf8, 0x42, 0x01, 0x01, 0x04, 0x04, 0x03, 0x02, 0x06, 0x40, + 0x30, 0x0b, 0x06, 0x03, 0x55, 0x1d, 0x0f, 0x04, 0x04, 0x03, 0x02, 0x03, 0xf8, + 0x30, 0x13, 0x06, 0x03, 0x55, 0x1d, 0x25, 0x04, 0x0c, 0x30, 0x0a, 0x06, 0x08, + 0x2b, 0x06, 0x01, 0x05, 0x05, 0x07, 0x03, 0x01, 0x30, 0x1d, 0x06, 0x03, 0x55, + 0x1d, 0x0e, 0x04, 0x16, 0x04, 0x14, 0x61, 0x8f, 0x61, 0x34, 0x43, 0x55, 0x14, + 0x7f, 0x27, 0x09, 0xce, 0x4c, 0x8b, 0xea, 0x9b, 0x7b, 0x19, 0x25, 0xbc, 0x6e, + 0x30, 0x1f, 0x06, 0x03, 0x55, 0x1d, 0x23, 0x04, 0x18, 0x30, 0x16, 0x80, 0x14, + 0x0e, 0x07, 0x60, 0xd4, 0x39, 0xc9, 0x1b, 0x5b, 0x5d, 0x90, 0x7b, 0x23, 0xc8, + 0xd2, 0x34, 0x9d, 0x4a, 0x9a, 0x46, 0x39, 0x30, 0x09, 0x06, 0x03, 0x55, 0x1d, + 0x11, 0x04, 0x02, 0x30, 0x00, 0x30, 0x1c, 0x06, 0x03, 0x55, 0x1d, 0x12, 0x04, + 0x15, 0x30, 0x13, 0x81, 0x11, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x6c, 0x40, + 0x69, 0x70, 0x73, 0x63, 0x61, 0x2e, 0x63, 0x6f, 0x6d, 0x30, 0x72, 0x06, 0x09, + 0x60, 0x86, 0x48, 0x01, 0x86, 0xf8, 0x42, 0x01, 0x0d, 0x04, 0x65, 0x16, 0x63, + 0x4f, 0x72, 0x67, 0x61, 0x6e, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x20, + 0x49, 0x6e, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x4e, + 0x4f, 0x54, 0x20, 0x56, 0x41, 0x4c, 0x49, 0x44, 0x41, 0x54, 0x45, 0x44, 0x2e, + 0x20, 0x43, 0x4c, 0x41, 0x53, 0x45, 0x41, 0x31, 0x20, 0x53, 0x65, 0x72, 0x76, + 0x65, 0x72, 0x20, 0x43, 0x65, 0x72, 0x74, 0x69, 0x66, 0x69, 0x63, 0x61, 0x74, + 0x65, 0x20, 0x69, 0x73, 0x73, 0x75, 0x65, 0x64, 0x20, 0x62, 0x79, 0x20, 0x68, + 0x74, 0x74, 0x70, 0x73, 0x3a, 0x2f, 0x2f, 0x77, 0x77, 0x77, 0x2e, 0x69, 0x70, + 0x73, 0x63, 0x61, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x30, 0x2f, 0x06, 0x09, 0x60, + 0x86, 0x48, 0x01, 0x86, 0xf8, 0x42, 0x01, 0x02, 0x04, 0x22, 0x16, 0x20, 0x68, + 0x74, 0x74, 0x70, 0x73, 0x3a, 0x2f, 0x2f, 0x77, 0x77, 0x77, 0x2e, 0x69, 0x70, + 0x73, 0x63, 0x61, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x69, 0x70, 0x73, 0x63, 0x61, + 0x32, 0x30, 0x30, 0x32, 0x2f, 0x30, 0x43, 0x06, 0x09, 0x60, 0x86, 0x48, 0x01, + 0x86, 0xf8, 0x42, 0x01, 0x04, 0x04, 0x36, 0x16, 0x34, 0x68, 0x74, 0x74, 0x70, + 0x73, 0x3a, 0x2f, 0x2f, 0x77, 0x77, 0x77, 0x2e, 0x69, 0x70, 0x73, 0x63, 0x61, + 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x69, 0x70, 0x73, 0x63, 0x61, 0x32, 0x30, 0x30, + 0x32, 0x2f, 0x69, 0x70, 0x73, 0x63, 0x61, 0x32, 0x30, 0x30, 0x32, 0x43, 0x4c, + 0x41, 0x53, 0x45, 0x41, 0x31, 0x2e, 0x63, 0x72, 0x6c, 0x30, 0x46, 0x06, 0x09, + 0x60, 0x86, 0x48, 0x01, 0x86, 0xf8, 0x42, 0x01, 0x03, 0x04, 0x39, 0x16, 0x37, + 0x68, 0x74, 0x74, 0x70, 0x73, 0x3a, 0x2f, 0x2f, 0x77, 0x77, 0x77, 0x2e, 0x69, + 0x70, 0x73, 0x63, 0x61, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x69, 0x70, 0x73, 0x63, + 0x61, 0x32, 0x30, 0x30, 0x32, 0x2f, 0x72, 0x65, 0x76, 0x6f, 0x63, 0x61, 0x74, + 0x69, 0x6f, 0x6e, 0x43, 0x4c, 0x41, 0x53, 0x45, 0x41, 0x31, 0x2e, 0x68, 0x74, + 0x6d, 0x6c, 0x3f, 0x30, 0x43, 0x06, 0x09, 0x60, 0x86, 0x48, 0x01, 0x86, 0xf8, + 0x42, 0x01, 0x07, 0x04, 0x36, 0x16, 0x34, 0x68, 0x74, 0x74, 0x70, 0x73, 0x3a, + 0x2f, 0x2f, 0x77, 0x77, 0x77, 0x2e, 0x69, 0x70, 0x73, 0x63, 0x61, 0x2e, 0x63, + 0x6f, 0x6d, 0x2f, 0x69, 0x70, 0x73, 0x63, 0x61, 0x32, 0x30, 0x30, 0x32, 0x2f, + 0x72, 0x65, 0x6e, 0x65, 0x77, 0x61, 0x6c, 0x43, 0x4c, 0x41, 0x53, 0x45, 0x41, + 0x31, 0x2e, 0x68, 0x74, 0x6d, 0x6c, 0x3f, 0x30, 0x41, 0x06, 0x09, 0x60, 0x86, + 0x48, 0x01, 0x86, 0xf8, 0x42, 0x01, 0x08, 0x04, 0x34, 0x16, 0x32, 0x68, 0x74, + 0x74, 0x70, 0x73, 0x3a, 0x2f, 0x2f, 0x77, 0x77, 0x77, 0x2e, 0x69, 0x70, 0x73, + 0x63, 0x61, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x69, 0x70, 0x73, 0x63, 0x61, 0x32, + 0x30, 0x30, 0x32, 0x2f, 0x70, 0x6f, 0x6c, 0x69, 0x63, 0x79, 0x43, 0x4c, 0x41, + 0x53, 0x45, 0x41, 0x31, 0x2e, 0x68, 0x74, 0x6d, 0x6c, 0x30, 0x81, 0x83, 0x06, + 0x03, 0x55, 0x1d, 0x1f, 0x04, 0x7c, 0x30, 0x7a, 0x30, 0x39, 0xa0, 0x37, 0xa0, + 0x35, 0x86, 0x33, 0x68, 0x74, 0x74, 0x70, 0x3a, 0x2f, 0x2f, 0x77, 0x77, 0x77, + 0x2e, 0x69, 0x70, 0x73, 0x63, 0x61, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x69, 0x70, + 0x73, 0x63, 0x61, 0x32, 0x30, 0x30, 0x32, 0x2f, 0x69, 0x70, 0x73, 0x63, 0x61, + 0x32, 0x30, 0x30, 0x32, 0x43, 0x4c, 0x41, 0x53, 0x45, 0x41, 0x31, 0x2e, 0x63, + 0x72, 0x6c, 0x30, 0x3d, 0xa0, 0x3b, 0xa0, 0x39, 0x86, 0x37, 0x68, 0x74, 0x74, + 0x70, 0x3a, 0x2f, 0x2f, 0x77, 0x77, 0x77, 0x62, 0x61, 0x63, 0x6b, 0x2e, 0x69, + 0x70, 0x73, 0x63, 0x61, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x69, 0x70, 0x73, 0x63, + 0x61, 0x32, 0x30, 0x30, 0x32, 0x2f, 0x69, 0x70, 0x73, 0x63, 0x61, 0x32, 0x30, + 0x30, 0x32, 0x43, 0x4c, 0x41, 0x53, 0x45, 0x41, 0x31, 0x2e, 0x63, 0x72, 0x6c, + 0x30, 0x32, 0x06, 0x08, 0x2b, 0x06, 0x01, 0x05, 0x05, 0x07, 0x01, 0x01, 0x04, + 0x26, 0x30, 0x24, 0x30, 0x22, 0x06, 0x08, 0x2b, 0x06, 0x01, 0x05, 0x05, 0x07, + 0x30, 0x01, 0x86, 0x16, 0x68, 0x74, 0x74, 0x70, 0x3a, 0x2f, 0x2f, 0x6f, 0x63, + 0x73, 0x70, 0x2e, 0x69, 0x70, 0x73, 0x63, 0x61, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, + 0x30, 0x0d, 0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x05, + 0x05, 0x00, 0x03, 0x81, 0x81, 0x00, 0x68, 0xee, 0x79, 0x97, 0x97, 0xdd, 0x3b, + 0xef, 0x16, 0x6a, 0x06, 0xf2, 0x14, 0x9a, 0x6e, 0xcd, 0x9e, 0x12, 0xf7, 0xaa, + 0x83, 0x10, 0xbd, 0xd1, 0x7c, 0x98, 0xfa, 0xc7, 0xae, 0xd4, 0x0e, 0x2c, 0x9e, + 0x38, 0x05, 0x9d, 0x52, 0x60, 0xa9, 0x99, 0x0a, 0x81, 0xb4, 0x98, 0x90, 0x1d, + 0xae, 0xbb, 0x4a, 0xd7, 0xb9, 0xdc, 0x88, 0x9e, 0x37, 0x78, 0x41, 0x5b, 0xf7, + 0x82, 0xa5, 0xf2, 0xba, 0x41, 0x25, 0x5a, 0x90, 0x1a, 0x1e, 0x45, 0x38, 0xa1, + 0x52, 0x58, 0x75, 0x94, 0x26, 0x44, 0xfb, 0x20, 0x07, 0xba, 0x44, 0xcc, 0xe5, + 0x4a, 0x2d, 0x72, 0x3f, 0x98, 0x47, 0xf6, 0x26, 0xdc, 0x05, 0x46, 0x05, 0x07, + 0x63, 0x21, 0xab, 0x46, 0x9b, 0x9c, 0x78, 0xd5, 0x54, 0x5b, 0x3d, 0x0c, 0x1e, + 0xc8, 0x64, 0x8c, 0xb5, 0x50, 0x23, 0x82, 0x6f, 0xdb, 0xb8, 0x22, 0x1c, 0x43, + 0x96, 0x07, 0xa8, 0xbb, +} diff --git a/libgo/go/encoding/asn1/common.go b/libgo/go/encoding/asn1/common.go new file mode 100644 index 0000000..01f4f7b --- /dev/null +++ b/libgo/go/encoding/asn1/common.go @@ -0,0 +1,158 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package asn1 + +import ( + "reflect" + "strconv" + "strings" +) + +// ASN.1 objects have metadata preceding them: +// the tag: the type of the object +// a flag denoting if this object is compound or not +// the class type: the namespace of the tag +// the length of the object, in bytes + +// Here are some standard tags and classes + +const ( + tagBoolean = 1 + tagInteger = 2 + tagBitString = 3 + tagOctetString = 4 + tagOID = 6 + tagEnum = 10 + tagUTF8String = 12 + tagSequence = 16 + tagSet = 17 + tagPrintableString = 19 + tagT61String = 20 + tagIA5String = 22 + tagUTCTime = 23 + tagGeneralizedTime = 24 + tagGeneralString = 27 +) + +const ( + classUniversal = 0 + classApplication = 1 + classContextSpecific = 2 + classPrivate = 3 +) + +type tagAndLength struct { + class, tag, length int + isCompound bool +} + +// ASN.1 has IMPLICIT and EXPLICIT tags, which can be translated as "instead +// of" and "in addition to". When not specified, every primitive type has a +// default tag in the UNIVERSAL class. +// +// For example: a BIT STRING is tagged [UNIVERSAL 3] by default (although ASN.1 +// doesn't actually have a UNIVERSAL keyword). However, by saying [IMPLICIT +// CONTEXT-SPECIFIC 42], that means that the tag is replaced by another. +// +// On the other hand, if it said [EXPLICIT CONTEXT-SPECIFIC 10], then an +// /additional/ tag would wrap the default tag. This explicit tag will have the +// compound flag set. +// +// (This is used in order to remove ambiguity with optional elements.) +// +// You can layer EXPLICIT and IMPLICIT tags to an arbitrary depth, however we +// don't support that here. We support a single layer of EXPLICIT or IMPLICIT +// tagging with tag strings on the fields of a structure. + +// fieldParameters is the parsed representation of tag string from a structure field. +type fieldParameters struct { + optional bool // true iff the field is OPTIONAL + explicit bool // true iff an EXPLICIT tag is in use. + application bool // true iff an APPLICATION tag is in use. + defaultValue *int64 // a default value for INTEGER typed fields (maybe nil). + tag *int // the EXPLICIT or IMPLICIT tag (maybe nil). + stringType int // the string tag to use when marshaling. + set bool // true iff this should be encoded as a SET + + // Invariants: + // if explicit is set, tag is non-nil. +} + +// Given a tag string with the format specified in the package comment, +// parseFieldParameters will parse it into a fieldParameters structure, +// ignoring unknown parts of the string. +func parseFieldParameters(str string) (ret fieldParameters) { + for _, part := range strings.Split(str, ",") { + switch { + case part == "optional": + ret.optional = true + case part == "explicit": + ret.explicit = true + if ret.tag == nil { + ret.tag = new(int) + } + case part == "ia5": + ret.stringType = tagIA5String + case part == "printable": + ret.stringType = tagPrintableString + case strings.HasPrefix(part, "default:"): + i, err := strconv.Atoi64(part[8:]) + if err == nil { + ret.defaultValue = new(int64) + *ret.defaultValue = i + } + case strings.HasPrefix(part, "tag:"): + i, err := strconv.Atoi(part[4:]) + if err == nil { + ret.tag = new(int) + *ret.tag = i + } + case part == "set": + ret.set = true + case part == "application": + ret.application = true + if ret.tag == nil { + ret.tag = new(int) + } + } + } + return +} + +// Given a reflected Go type, getUniversalType returns the default tag number +// and expected compound flag. +func getUniversalType(t reflect.Type) (tagNumber int, isCompound, ok bool) { + switch t { + case objectIdentifierType: + return tagOID, false, true + case bitStringType: + return tagBitString, false, true + case timeType: + return tagUTCTime, false, true + case enumeratedType: + return tagEnum, false, true + case bigIntType: + return tagInteger, false, true + } + switch t.Kind() { + case reflect.Bool: + return tagBoolean, false, true + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return tagInteger, false, true + case reflect.Struct: + return tagSequence, true, true + case reflect.Slice: + if t.Elem().Kind() == reflect.Uint8 { + return tagOctetString, false, true + } + if strings.HasSuffix(t.Name(), "SET") { + return tagSet, true, true + } + return tagSequence, true, true + case reflect.String: + return tagPrintableString, false, true + } + return 0, false, false +} diff --git a/libgo/go/encoding/asn1/marshal.go b/libgo/go/encoding/asn1/marshal.go new file mode 100644 index 0000000..89c50a7 --- /dev/null +++ b/libgo/go/encoding/asn1/marshal.go @@ -0,0 +1,547 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package asn1 + +import ( + "bytes" + "fmt" + "io" + "math/big" + "reflect" + "time" +) + +// A forkableWriter is an in-memory buffer that can be +// 'forked' to create new forkableWriters that bracket the +// original. After +// pre, post := w.fork(); +// the overall sequence of bytes represented is logically w+pre+post. +type forkableWriter struct { + *bytes.Buffer + pre, post *forkableWriter +} + +func newForkableWriter() *forkableWriter { + return &forkableWriter{bytes.NewBuffer(nil), nil, nil} +} + +func (f *forkableWriter) fork() (pre, post *forkableWriter) { + if f.pre != nil || f.post != nil { + panic("have already forked") + } + f.pre = newForkableWriter() + f.post = newForkableWriter() + return f.pre, f.post +} + +func (f *forkableWriter) Len() (l int) { + l += f.Buffer.Len() + if f.pre != nil { + l += f.pre.Len() + } + if f.post != nil { + l += f.post.Len() + } + return +} + +func (f *forkableWriter) writeTo(out io.Writer) (n int, err error) { + n, err = out.Write(f.Bytes()) + if err != nil { + return + } + + var nn int + + if f.pre != nil { + nn, err = f.pre.writeTo(out) + n += nn + if err != nil { + return + } + } + + if f.post != nil { + nn, err = f.post.writeTo(out) + n += nn + } + return +} + +func marshalBase128Int(out *forkableWriter, n int64) (err error) { + if n == 0 { + err = out.WriteByte(0) + return + } + + l := 0 + for i := n; i > 0; i >>= 7 { + l++ + } + + for i := l - 1; i >= 0; i-- { + o := byte(n >> uint(i*7)) + o &= 0x7f + if i != 0 { + o |= 0x80 + } + err = out.WriteByte(o) + if err != nil { + return + } + } + + return nil +} + +func marshalInt64(out *forkableWriter, i int64) (err error) { + n := int64Length(i) + + for ; n > 0; n-- { + err = out.WriteByte(byte(i >> uint((n-1)*8))) + if err != nil { + return + } + } + + return nil +} + +func int64Length(i int64) (numBytes int) { + numBytes = 1 + + for i > 127 { + numBytes++ + i >>= 8 + } + + for i < -128 { + numBytes++ + i >>= 8 + } + + return +} + +func marshalBigInt(out *forkableWriter, n *big.Int) (err error) { + if n.Sign() < 0 { + // A negative number has to be converted to two's-complement + // form. So we'll subtract 1 and invert. If the + // most-significant-bit isn't set then we'll need to pad the + // beginning with 0xff in order to keep the number negative. + nMinus1 := new(big.Int).Neg(n) + nMinus1.Sub(nMinus1, bigOne) + bytes := nMinus1.Bytes() + for i := range bytes { + bytes[i] ^= 0xff + } + if len(bytes) == 0 || bytes[0]&0x80 == 0 { + err = out.WriteByte(0xff) + if err != nil { + return + } + } + _, err = out.Write(bytes) + } else if n.Sign() == 0 { + // Zero is written as a single 0 zero rather than no bytes. + err = out.WriteByte(0x00) + } else { + bytes := n.Bytes() + if len(bytes) > 0 && bytes[0]&0x80 != 0 { + // We'll have to pad this with 0x00 in order to stop it + // looking like a negative number. + err = out.WriteByte(0) + if err != nil { + return + } + } + _, err = out.Write(bytes) + } + return +} + +func marshalLength(out *forkableWriter, i int) (err error) { + n := lengthLength(i) + + for ; n > 0; n-- { + err = out.WriteByte(byte(i >> uint((n-1)*8))) + if err != nil { + return + } + } + + return nil +} + +func lengthLength(i int) (numBytes int) { + numBytes = 1 + for i > 255 { + numBytes++ + i >>= 8 + } + return +} + +func marshalTagAndLength(out *forkableWriter, t tagAndLength) (err error) { + b := uint8(t.class) << 6 + if t.isCompound { + b |= 0x20 + } + if t.tag >= 31 { + b |= 0x1f + err = out.WriteByte(b) + if err != nil { + return + } + err = marshalBase128Int(out, int64(t.tag)) + if err != nil { + return + } + } else { + b |= uint8(t.tag) + err = out.WriteByte(b) + if err != nil { + return + } + } + + if t.length >= 128 { + l := lengthLength(t.length) + err = out.WriteByte(0x80 | byte(l)) + if err != nil { + return + } + err = marshalLength(out, t.length) + if err != nil { + return + } + } else { + err = out.WriteByte(byte(t.length)) + if err != nil { + return + } + } + + return nil +} + +func marshalBitString(out *forkableWriter, b BitString) (err error) { + paddingBits := byte((8 - b.BitLength%8) % 8) + err = out.WriteByte(paddingBits) + if err != nil { + return + } + _, err = out.Write(b.Bytes) + return +} + +func marshalObjectIdentifier(out *forkableWriter, oid []int) (err error) { + if len(oid) < 2 || oid[0] > 6 || oid[1] >= 40 { + return StructuralError{"invalid object identifier"} + } + + err = out.WriteByte(byte(oid[0]*40 + oid[1])) + if err != nil { + return + } + for i := 2; i < len(oid); i++ { + err = marshalBase128Int(out, int64(oid[i])) + if err != nil { + return + } + } + + return +} + +func marshalPrintableString(out *forkableWriter, s string) (err error) { + b := []byte(s) + for _, c := range b { + if !isPrintable(c) { + return StructuralError{"PrintableString contains invalid character"} + } + } + + _, err = out.Write(b) + return +} + +func marshalIA5String(out *forkableWriter, s string) (err error) { + b := []byte(s) + for _, c := range b { + if c > 127 { + return StructuralError{"IA5String contains invalid character"} + } + } + + _, err = out.Write(b) + return +} + +func marshalTwoDigits(out *forkableWriter, v int) (err error) { + err = out.WriteByte(byte('0' + (v/10)%10)) + if err != nil { + return + } + return out.WriteByte(byte('0' + v%10)) +} + +func marshalUTCTime(out *forkableWriter, t *time.Time) (err error) { + switch { + case 1950 <= t.Year && t.Year < 2000: + err = marshalTwoDigits(out, int(t.Year-1900)) + case 2000 <= t.Year && t.Year < 2050: + err = marshalTwoDigits(out, int(t.Year-2000)) + default: + return StructuralError{"Cannot represent time as UTCTime"} + } + + if err != nil { + return + } + + err = marshalTwoDigits(out, t.Month) + if err != nil { + return + } + + err = marshalTwoDigits(out, t.Day) + if err != nil { + return + } + + err = marshalTwoDigits(out, t.Hour) + if err != nil { + return + } + + err = marshalTwoDigits(out, t.Minute) + if err != nil { + return + } + + err = marshalTwoDigits(out, t.Second) + if err != nil { + return + } + + switch { + case t.ZoneOffset/60 == 0: + err = out.WriteByte('Z') + return + case t.ZoneOffset > 0: + err = out.WriteByte('+') + case t.ZoneOffset < 0: + err = out.WriteByte('-') + } + + if err != nil { + return + } + + offsetMinutes := t.ZoneOffset / 60 + if offsetMinutes < 0 { + offsetMinutes = -offsetMinutes + } + + err = marshalTwoDigits(out, offsetMinutes/60) + if err != nil { + return + } + + err = marshalTwoDigits(out, offsetMinutes%60) + return +} + +func stripTagAndLength(in []byte) []byte { + _, offset, err := parseTagAndLength(in, 0) + if err != nil { + return in + } + return in[offset:] +} + +func marshalBody(out *forkableWriter, value reflect.Value, params fieldParameters) (err error) { + switch value.Type() { + case timeType: + return marshalUTCTime(out, value.Interface().(*time.Time)) + case bitStringType: + return marshalBitString(out, value.Interface().(BitString)) + case objectIdentifierType: + return marshalObjectIdentifier(out, value.Interface().(ObjectIdentifier)) + case bigIntType: + return marshalBigInt(out, value.Interface().(*big.Int)) + } + + switch v := value; v.Kind() { + case reflect.Bool: + if v.Bool() { + return out.WriteByte(255) + } else { + return out.WriteByte(0) + } + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return marshalInt64(out, int64(v.Int())) + case reflect.Struct: + t := v.Type() + + startingField := 0 + + // If the first element of the structure is a non-empty + // RawContents, then we don't bother serializing the rest. + if t.NumField() > 0 && t.Field(0).Type == rawContentsType { + s := v.Field(0) + if s.Len() > 0 { + bytes := make([]byte, s.Len()) + for i := 0; i < s.Len(); i++ { + bytes[i] = uint8(s.Index(i).Uint()) + } + /* The RawContents will contain the tag and + * length fields but we'll also be writing + * those ourselves, so we strip them out of + * bytes */ + _, err = out.Write(stripTagAndLength(bytes)) + return + } else { + startingField = 1 + } + } + + for i := startingField; i < t.NumField(); i++ { + var pre *forkableWriter + pre, out = out.fork() + err = marshalField(pre, v.Field(i), parseFieldParameters(t.Field(i).Tag.Get("asn1"))) + if err != nil { + return + } + } + return + case reflect.Slice: + sliceType := v.Type() + if sliceType.Elem().Kind() == reflect.Uint8 { + bytes := make([]byte, v.Len()) + for i := 0; i < v.Len(); i++ { + bytes[i] = uint8(v.Index(i).Uint()) + } + _, err = out.Write(bytes) + return + } + + var params fieldParameters + for i := 0; i < v.Len(); i++ { + var pre *forkableWriter + pre, out = out.fork() + err = marshalField(pre, v.Index(i), params) + if err != nil { + return + } + } + return + case reflect.String: + if params.stringType == tagIA5String { + return marshalIA5String(out, v.String()) + } else { + return marshalPrintableString(out, v.String()) + } + return + } + + return StructuralError{"unknown Go type"} +} + +func marshalField(out *forkableWriter, v reflect.Value, params fieldParameters) (err error) { + // If the field is an interface{} then recurse into it. + if v.Kind() == reflect.Interface && v.Type().NumMethod() == 0 { + return marshalField(out, v.Elem(), params) + } + + if params.optional && reflect.DeepEqual(v.Interface(), reflect.Zero(v.Type()).Interface()) { + return + } + + if v.Type() == rawValueType { + rv := v.Interface().(RawValue) + if len(rv.FullBytes) != 0 { + _, err = out.Write(rv.FullBytes) + } else { + err = marshalTagAndLength(out, tagAndLength{rv.Class, rv.Tag, len(rv.Bytes), rv.IsCompound}) + if err != nil { + return + } + _, err = out.Write(rv.Bytes) + } + return + } + + tag, isCompound, ok := getUniversalType(v.Type()) + if !ok { + err = StructuralError{fmt.Sprintf("unknown Go type: %v", v.Type())} + return + } + class := classUniversal + + if params.stringType != 0 { + if tag != tagPrintableString { + return StructuralError{"Explicit string type given to non-string member"} + } + tag = params.stringType + } + + if params.set { + if tag != tagSequence { + return StructuralError{"Non sequence tagged as set"} + } + tag = tagSet + } + + tags, body := out.fork() + + err = marshalBody(body, v, params) + if err != nil { + return + } + + bodyLen := body.Len() + + var explicitTag *forkableWriter + if params.explicit { + explicitTag, tags = tags.fork() + } + + if !params.explicit && params.tag != nil { + // implicit tag. + tag = *params.tag + class = classContextSpecific + } + + err = marshalTagAndLength(tags, tagAndLength{class, tag, bodyLen, isCompound}) + if err != nil { + return + } + + if params.explicit { + err = marshalTagAndLength(explicitTag, tagAndLength{ + class: classContextSpecific, + tag: *params.tag, + length: bodyLen + tags.Len(), + isCompound: true, + }) + } + + return nil +} + +// Marshal returns the ASN.1 encoding of val. +func Marshal(val interface{}) ([]byte, error) { + var out bytes.Buffer + v := reflect.ValueOf(val) + f := newForkableWriter() + err := marshalField(f, v, fieldParameters{}) + if err != nil { + return nil, err + } + _, err = f.writeTo(&out) + return out.Bytes(), nil +} diff --git a/libgo/go/encoding/asn1/marshal_test.go b/libgo/go/encoding/asn1/marshal_test.go new file mode 100644 index 0000000..03df5f1 --- /dev/null +++ b/libgo/go/encoding/asn1/marshal_test.go @@ -0,0 +1,129 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package asn1 + +import ( + "bytes" + "encoding/hex" + "testing" + "time" +) + +type intStruct struct { + A int +} + +type twoIntStruct struct { + A int + B int +} + +type nestedStruct struct { + A intStruct +} + +type rawContentsStruct struct { + Raw RawContent + A int +} + +type implicitTagTest struct { + A int `asn1:"implicit,tag:5"` +} + +type explicitTagTest struct { + A int `asn1:"explicit,tag:5"` +} + +type ia5StringTest struct { + A string `asn1:"ia5"` +} + +type printableStringTest struct { + A string `asn1:"printable"` +} + +type optionalRawValueTest struct { + A RawValue `asn1:"optional"` +} + +type testSET []int + +func setPST(t *time.Time) *time.Time { + t.ZoneOffset = -28800 + return t +} + +type marshalTest struct { + in interface{} + out string // hex encoded +} + +var marshalTests = []marshalTest{ + {10, "02010a"}, + {127, "02017f"}, + {128, "02020080"}, + {-128, "020180"}, + {-129, "0202ff7f"}, + {intStruct{64}, "3003020140"}, + {twoIntStruct{64, 65}, "3006020140020141"}, + {nestedStruct{intStruct{127}}, "3005300302017f"}, + {[]byte{1, 2, 3}, "0403010203"}, + {implicitTagTest{64}, "3003850140"}, + {explicitTagTest{64}, "3005a503020140"}, + {time.SecondsToUTC(0), "170d3730303130313030303030305a"}, + {time.SecondsToUTC(1258325776), "170d3039313131353232353631365a"}, + {setPST(time.SecondsToUTC(1258325776)), "17113039313131353232353631362d30383030"}, + {BitString{[]byte{0x80}, 1}, "03020780"}, + {BitString{[]byte{0x81, 0xf0}, 12}, "03030481f0"}, + {ObjectIdentifier([]int{1, 2, 3, 4}), "06032a0304"}, + {ObjectIdentifier([]int{1, 2, 840, 133549, 1, 1, 5}), "06092a864888932d010105"}, + {"test", "130474657374"}, + { + "" + + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", // This is 127 times 'x' + "137f" + + "7878787878787878787878787878787878787878787878787878787878787878" + + "7878787878787878787878787878787878787878787878787878787878787878" + + "7878787878787878787878787878787878787878787878787878787878787878" + + "78787878787878787878787878787878787878787878787878787878787878", + }, + { + "" + + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", // This is 128 times 'x' + "138180" + + "7878787878787878787878787878787878787878787878787878787878787878" + + "7878787878787878787878787878787878787878787878787878787878787878" + + "7878787878787878787878787878787878787878787878787878787878787878" + + "7878787878787878787878787878787878787878787878787878787878787878", + }, + {ia5StringTest{"test"}, "3006160474657374"}, + {optionalRawValueTest{}, "3000"}, + {printableStringTest{"test"}, "3006130474657374"}, + {printableStringTest{"test*"}, "30071305746573742a"}, + {rawContentsStruct{nil, 64}, "3003020140"}, + {rawContentsStruct{[]byte{0x30, 3, 1, 2, 3}, 64}, "3003010203"}, + {RawValue{Tag: 1, Class: 2, IsCompound: false, Bytes: []byte{1, 2, 3}}, "8103010203"}, + {testSET([]int{10}), "310302010a"}, +} + +func TestMarshal(t *testing.T) { + for i, test := range marshalTests { + data, err := Marshal(test.in) + if err != nil { + t.Errorf("#%d failed: %s", i, err) + } + out, _ := hex.DecodeString(test.out) + if bytes.Compare(out, data) != 0 { + t.Errorf("#%d got: %x want %x", i, data, out) + } + } +} diff --git a/libgo/go/encoding/binary/binary.go b/libgo/go/encoding/binary/binary.go index 65b9f01..d2f8b1e 100644 --- a/libgo/go/encoding/binary/binary.go +++ b/libgo/go/encoding/binary/binary.go @@ -9,8 +9,8 @@ package binary import ( "errors" - "math" "io" + "math" "reflect" ) diff --git a/libgo/go/encoding/binary/binary_test.go b/libgo/go/encoding/binary/binary_test.go index e753aa0..fd4fdb0 100644 --- a/libgo/go/encoding/binary/binary_test.go +++ b/libgo/go/encoding/binary/binary_test.go @@ -7,7 +7,6 @@ package binary import ( "bytes" "io" - "bytes" "math" "reflect" "testing" diff --git a/libgo/go/encoding/csv/reader.go b/libgo/go/encoding/csv/reader.go new file mode 100644 index 0000000..ae0f567 --- /dev/null +++ b/libgo/go/encoding/csv/reader.go @@ -0,0 +1,372 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package csv reads and writes comma-separated values (CSV) files. +// +// A csv file contains zero or more records of one or more fields per record. +// Each record is separated by the newline character. The final record may +// optionally be followed by a newline character. +// +// field1,field2,field3 +// +// White space is considered part of a field. +// +// Carriage returns before newline characters are silently removed. +// +// Blank lines are ignored. A line with only whitespace characters (excluding +// the ending newline character) is not considered a blank line. +// +// Fields which start and stop with the quote character " are called +// quoted-fields. The beginning and ending quote are not part of the +// field. +// +// The source: +// +// normal string,"quoted-field" +// +// results in the fields +// +// {`normal string`, `quoted-field`} +// +// Within a quoted-field a quote character followed by a second quote +// character is considered a single quote. +// +// "the ""word"" is true","a ""quoted-field""" +// +// results in +// +// {`the "word" is true`, `a "quoted-field"`} +// +// Newlines and commas may be included in a quoted-field +// +// "Multi-line +// field","comma is ," +// +// results in +// +// {`Multi-line +// field`, `comma is ,`} +package csv + +import ( + "bufio" + "bytes" + "errors" + "fmt" + "io" + "unicode" +) + +// A ParseError is returned for parsing errors. +// The first line is 1. The first column is 0. +type ParseError struct { + Line int // Line where the error occurred + Column int // Column (rune index) where the error occurred + Err error // The actual error +} + +func (e *ParseError) Error() string { + return fmt.Sprintf("line %d, column %d: %s", e.Line, e.Column, e.Err) +} + +// These are the errors that can be returned in ParseError.Error +var ( + ErrTrailingComma = errors.New("extra delimiter at end of line") + ErrBareQuote = errors.New("bare \" in non-quoted-field") + ErrQuote = errors.New("extraneous \" in field") + ErrFieldCount = errors.New("wrong number of fields in line") +) + +// A Reader reads records from a CSV-encoded file. +// +// As returned by NewReader, a Reader expects input conforming to RFC 4180. +// The exported fields can be changed to customize the details before the +// first call to Read or ReadAll. +// +// Comma is the field delimiter. It defaults to ','. +// +// Comment, if not 0, is the comment character. Lines beginning with the +// Comment character are ignored. +// +// If FieldsPerRecord is positive, Read requires each record to +// have the given number of fields. If FieldsPerRecord is 0, Read sets it to +// the number of fields in the first record, so that future records must +// have the same field count. +// +// If LazyQuotes is true, a quote may appear in an unquoted field and a +// non-doubled quote may appear in a quoted field. +// +// If TrailingComma is true, the last field may be an unquoted empty field. +// +// If TrimLeadingSpace is true, leading white space in a field is ignored. +type Reader struct { + Comma rune // Field delimiter (set to ',' by NewReader) + Comment rune // Comment character for start of line + FieldsPerRecord int // Number of expected fields per record + LazyQuotes bool // Allow lazy quotes + TrailingComma bool // Allow trailing comma + TrimLeadingSpace bool // Trim leading space + line int + column int + r *bufio.Reader + field bytes.Buffer +} + +// NewReader returns a new Reader that reads from r. +func NewReader(r io.Reader) *Reader { + return &Reader{ + Comma: ',', + r: bufio.NewReader(r), + } +} + +// error creates a new ParseError based on err. +func (r *Reader) error(err error) error { + return &ParseError{ + Line: r.line, + Column: r.column, + Err: err, + } +} + +// Read reads one record from r. The record is a slice of strings with each +// string representing one field. +func (r *Reader) Read() (record []string, err error) { + for { + record, err = r.parseRecord() + if record != nil { + break + } + if err != nil { + return nil, err + } + } + + if r.FieldsPerRecord > 0 { + if len(record) != r.FieldsPerRecord { + r.column = 0 // report at start of record + return record, r.error(ErrFieldCount) + } + } else if r.FieldsPerRecord == 0 { + r.FieldsPerRecord = len(record) + } + return record, nil +} + +// ReadAll reads all the remaining records from r. +// Each record is a slice of fields. +func (r *Reader) ReadAll() (records [][]string, err error) { + for { + record, err := r.Read() + if err == io.EOF { + return records, nil + } + if err != nil { + return nil, err + } + records = append(records, record) + } + panic("unreachable") +} + +// readRune reads one rune from r, folding \r\n to \n and keeping track +// of how far into the line we have read. r.column will point to the start +// of this rune, not the end of this rune. +func (r *Reader) readRune() (rune, error) { + r1, _, err := r.r.ReadRune() + + // Handle \r\n here. We make the simplifying assumption that + // anytime \r is followed by \n that it can be folded to \n. + // We will not detect files which contain both \r\n and bare \n. + if r1 == '\r' { + r1, _, err = r.r.ReadRune() + if err == nil { + if r1 != '\n' { + r.r.UnreadRune() + r1 = '\r' + } + } + } + r.column++ + return r1, err +} + +// unreadRune puts the last rune read from r back. +func (r *Reader) unreadRune() { + r.r.UnreadRune() + r.column-- +} + +// skip reads runes up to and including the rune delim or until error. +func (r *Reader) skip(delim rune) error { + for { + r1, err := r.readRune() + if err != nil { + return err + } + if r1 == delim { + return nil + } + } + panic("unreachable") +} + +// parseRecord reads and parses a single csv record from r. +func (r *Reader) parseRecord() (fields []string, err error) { + // Each record starts on a new line. We increment our line + // number (lines start at 1, not 0) and set column to -1 + // so as we increment in readRune it points to the character we read. + r.line++ + r.column = -1 + + // Peek at the first rune. If it is an error we are done. + // If we are support comments and it is the comment character + // then skip to the end of line. + + r1, _, err := r.r.ReadRune() + if err != nil { + return nil, err + } + + if r.Comment != 0 && r1 == r.Comment { + return nil, r.skip('\n') + } + r.r.UnreadRune() + + // At this point we have at least one field. + for { + haveField, delim, err := r.parseField() + if haveField { + fields = append(fields, r.field.String()) + } + if delim == '\n' || err == io.EOF { + return fields, err + } else if err != nil { + return nil, err + } + } + panic("unreachable") +} + +// parseField parses the next field in the record. The read field is +// located in r.field. Delim is the first character not part of the field +// (r.Comma or '\n'). +func (r *Reader) parseField() (haveField bool, delim rune, err error) { + r.field.Reset() + + r1, err := r.readRune() + if err != nil { + // If we have EOF and are not at the start of a line + // then we return the empty field. We have already + // checked for trailing commas if needed. + if err == io.EOF && r.column != 0 { + return true, 0, err + } + return false, 0, err + } + + if r.TrimLeadingSpace { + for r1 != '\n' && unicode.IsSpace(r1) { + r1, err = r.readRune() + if err != nil { + return false, 0, err + } + } + } + + switch r1 { + case r.Comma: + // will check below + + case '\n': + // We are a trailing empty field or a blank line + if r.column == 0 { + return false, r1, nil + } + return true, r1, nil + + case '"': + // quoted field + Quoted: + for { + r1, err = r.readRune() + if err != nil { + if err == io.EOF { + if r.LazyQuotes { + return true, 0, err + } + return false, 0, r.error(ErrQuote) + } + return false, 0, err + } + switch r1 { + case '"': + r1, err = r.readRune() + if err != nil || r1 == r.Comma { + break Quoted + } + if r1 == '\n' { + return true, r1, nil + } + if r1 != '"' { + if !r.LazyQuotes { + r.column-- + return false, 0, r.error(ErrQuote) + } + // accept the bare quote + r.field.WriteRune('"') + } + case '\n': + r.line++ + r.column = -1 + } + r.field.WriteRune(r1) + } + + default: + // unquoted field + for { + r.field.WriteRune(r1) + r1, err = r.readRune() + if err != nil || r1 == r.Comma { + break + } + if r1 == '\n' { + return true, r1, nil + } + if !r.LazyQuotes && r1 == '"' { + return false, 0, r.error(ErrBareQuote) + } + } + } + + if err != nil { + if err == io.EOF { + return true, 0, err + } + return false, 0, err + } + + if !r.TrailingComma { + // We don't allow trailing commas. See if we + // are at the end of the line (being mindful + // of trimming spaces). + c := r.column + r1, err = r.readRune() + if r.TrimLeadingSpace { + for r1 != '\n' && unicode.IsSpace(r1) { + r1, err = r.readRune() + if err != nil { + break + } + } + } + if err == io.EOF || r1 == '\n' { + r.column = c // report the comma + return false, 0, r.error(ErrTrailingComma) + } + r.unreadRune() + } + return true, r1, nil +} diff --git a/libgo/go/encoding/csv/reader_test.go b/libgo/go/encoding/csv/reader_test.go new file mode 100644 index 0000000..5fd84a7 --- /dev/null +++ b/libgo/go/encoding/csv/reader_test.go @@ -0,0 +1,281 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package csv + +import ( + "reflect" + "strings" + "testing" +) + +var readTests = []struct { + Name string + Input string + Output [][]string + UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1 + + // These fields are copied into the Reader + Comma rune + Comment rune + FieldsPerRecord int + LazyQuotes bool + TrailingComma bool + TrimLeadingSpace bool + + Error string + Line int // Expected error line if != 0 + Column int // Expected error column if line != 0 +}{ + { + Name: "Simple", + Input: "a,b,c\n", + Output: [][]string{{"a", "b", "c"}}, + }, + { + Name: "CRLF", + Input: "a,b\r\nc,d\r\n", + Output: [][]string{{"a", "b"}, {"c", "d"}}, + }, + { + Name: "BareCR", + Input: "a,b\rc,d\r\n", + Output: [][]string{{"a", "b\rc", "d"}}, + }, + { + Name: "RFC4180test", + UseFieldsPerRecord: true, + Input: `#field1,field2,field3 +"aaa","bb +b","ccc" +"a,a","b""bb","ccc" +zzz,yyy,xxx +`, + Output: [][]string{ + {"#field1", "field2", "field3"}, + {"aaa", "bb\nb", "ccc"}, + {"a,a", `b"bb`, "ccc"}, + {"zzz", "yyy", "xxx"}, + }, + }, + { + Name: "NoEOLTest", + Input: "a,b,c", + Output: [][]string{{"a", "b", "c"}}, + }, + { + Name: "Semicolon", + Comma: ';', + Input: "a;b;c\n", + Output: [][]string{{"a", "b", "c"}}, + }, + { + Name: "MultiLine", + Input: `"two +line","one line","three +line +field"`, + Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}}, + }, + { + Name: "BlankLine", + Input: "a,b,c\n\nd,e,f\n\n", + Output: [][]string{ + {"a", "b", "c"}, + {"d", "e", "f"}, + }, + }, + { + Name: "TrimSpace", + Input: " a, b, c\n", + TrimLeadingSpace: true, + Output: [][]string{{"a", "b", "c"}}, + }, + { + Name: "LeadingSpace", + Input: " a, b, c\n", + Output: [][]string{{" a", " b", " c"}}, + }, + { + Name: "Comment", + Comment: '#', + Input: "#1,2,3\na,b,c\n#comment", + Output: [][]string{{"a", "b", "c"}}, + }, + { + Name: "NoComment", + Input: "#1,2,3\na,b,c", + Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}}, + }, + { + Name: "LazyQuotes", + LazyQuotes: true, + Input: `a "word","1"2",a","b`, + Output: [][]string{{`a "word"`, `1"2`, `a"`, `b`}}, + }, + { + Name: "BareQuotes", + LazyQuotes: true, + Input: `a "word","1"2",a"`, + Output: [][]string{{`a "word"`, `1"2`, `a"`}}, + }, + { + Name: "BareDoubleQuotes", + LazyQuotes: true, + Input: `a""b,c`, + Output: [][]string{{`a""b`, `c`}}, + }, + { + Name: "BadDoubleQuotes", + Input: `a""b,c`, + Error: `bare " in non-quoted-field`, Line: 1, Column: 1, + }, + { + Name: "TrimQuote", + Input: ` "a"," b",c`, + TrimLeadingSpace: true, + Output: [][]string{{"a", " b", "c"}}, + }, + { + Name: "BadBareQuote", + Input: `a "word","b"`, + Error: `bare " in non-quoted-field`, Line: 1, Column: 2, + }, + { + Name: "BadTrailingQuote", + Input: `"a word",b"`, + Error: `bare " in non-quoted-field`, Line: 1, Column: 10, + }, + { + Name: "ExtraneousQuote", + Input: `"a "word","b"`, + Error: `extraneous " in field`, Line: 1, Column: 3, + }, + { + Name: "BadFieldCount", + UseFieldsPerRecord: true, + Input: "a,b,c\nd,e", + Error: "wrong number of fields", Line: 2, + }, + { + Name: "BadFieldCount1", + UseFieldsPerRecord: true, + FieldsPerRecord: 2, + Input: `a,b,c`, + Error: "wrong number of fields", Line: 1, + }, + { + Name: "FieldCount", + Input: "a,b,c\nd,e", + Output: [][]string{{"a", "b", "c"}, {"d", "e"}}, + }, + { + Name: "BadTrailingCommaEOF", + Input: "a,b,c,", + Error: "extra delimiter at end of line", Line: 1, Column: 5, + }, + { + Name: "BadTrailingCommaEOL", + Input: "a,b,c,\n", + Error: "extra delimiter at end of line", Line: 1, Column: 5, + }, + { + Name: "BadTrailingCommaSpaceEOF", + TrimLeadingSpace: true, + Input: "a,b,c, ", + Error: "extra delimiter at end of line", Line: 1, Column: 5, + }, + { + Name: "BadTrailingCommaSpaceEOL", + TrimLeadingSpace: true, + Input: "a,b,c, \n", + Error: "extra delimiter at end of line", Line: 1, Column: 5, + }, + { + Name: "BadTrailingCommaLine3", + TrimLeadingSpace: true, + Input: "a,b,c\nd,e,f\ng,hi,", + Error: "extra delimiter at end of line", Line: 3, Column: 4, + }, + { + Name: "NotTrailingComma3", + Input: "a,b,c, \n", + Output: [][]string{{"a", "b", "c", " "}}, + }, + { + Name: "CommaFieldTest", + TrailingComma: true, + Input: `x,y,z,w +x,y,z, +x,y,, +x,,, +,,, +"x","y","z","w" +"x","y","z","" +"x","y","","" +"x","","","" +"","","","" +`, + Output: [][]string{ + {"x", "y", "z", "w"}, + {"x", "y", "z", ""}, + {"x", "y", "", ""}, + {"x", "", "", ""}, + {"", "", "", ""}, + {"x", "y", "z", "w"}, + {"x", "y", "z", ""}, + {"x", "y", "", ""}, + {"x", "", "", ""}, + {"", "", "", ""}, + }, + }, + { + Name: "Issue 2366", + TrailingComma: true, + TrimLeadingSpace: true, + Input: "a,b,\nc,d,e", + Output: [][]string{ + {"a", "b", ""}, + {"c", "d", "e"}, + }, + }, + { + Name: "Issue 2366a", + TrailingComma: false, + TrimLeadingSpace: true, + Input: "a,b,\nc,d,e", + Error: "extra delimiter at end of line", + }, +} + +func TestRead(t *testing.T) { + for _, tt := range readTests { + r := NewReader(strings.NewReader(tt.Input)) + r.Comment = tt.Comment + if tt.UseFieldsPerRecord { + r.FieldsPerRecord = tt.FieldsPerRecord + } else { + r.FieldsPerRecord = -1 + } + r.LazyQuotes = tt.LazyQuotes + r.TrailingComma = tt.TrailingComma + r.TrimLeadingSpace = tt.TrimLeadingSpace + if tt.Comma != 0 { + r.Comma = tt.Comma + } + out, err := r.ReadAll() + perr, _ := err.(*ParseError) + if tt.Error != "" { + if err == nil || !strings.Contains(err.Error(), tt.Error) { + t.Errorf("%s: error %v, want error %q", tt.Name, err, tt.Error) + } else if tt.Line != 0 && (tt.Line != perr.Line || tt.Column != perr.Column) { + t.Errorf("%s: error at %d:%d expected %d:%d", tt.Name, perr.Line, perr.Column, tt.Line, tt.Column) + } + } else if err != nil { + t.Errorf("%s: unexpected error %v", tt.Name, err) + } else if !reflect.DeepEqual(out, tt.Output) { + t.Errorf("%s: out=%q want %q", tt.Name, out, tt.Output) + } + } +} diff --git a/libgo/go/encoding/csv/writer.go b/libgo/go/encoding/csv/writer.go new file mode 100644 index 0000000..c4dcba5 --- /dev/null +++ b/libgo/go/encoding/csv/writer.go @@ -0,0 +1,121 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package csv + +import ( + "bufio" + "io" + "strings" + "unicode" + "unicode/utf8" +) + +// A Writer writes records to a CSV encoded file. +// +// As returned by NewWriter, a Writer writes records terminated by a +// newline and uses ',' as the field delimiter. The exported fields can be +// changed to customize the details before the first call to Write or WriteAll. +// +// Comma is the field delimiter. +// +// If UseCRLF is true, the Writer ends each record with \r\n instead of \n. +type Writer struct { + Comma rune // Field delimiter (set to to ',' by NewWriter) + UseCRLF bool // True to use \r\n as the line terminator + w *bufio.Writer +} + +// NewWriter returns a new Writer that writes to w. +func NewWriter(w io.Writer) *Writer { + return &Writer{ + Comma: ',', + w: bufio.NewWriter(w), + } +} + +// Writer writes a single CSV record to w along with any necessary quoting. +// A record is a slice of strings with each string being one field. +func (w *Writer) Write(record []string) (err error) { + for n, field := range record { + if n > 0 { + if _, err = w.w.WriteRune(w.Comma); err != nil { + return + } + } + + // If we don't have to have a quoted field then just + // write out the field and continue to the next field. + if !w.fieldNeedsQuotes(field) { + if _, err = w.w.WriteString(field); err != nil { + return + } + continue + } + if err = w.w.WriteByte('"'); err != nil { + return + } + + for _, r1 := range field { + switch r1 { + case '"': + _, err = w.w.WriteString(`""`) + case '\r': + if !w.UseCRLF { + err = w.w.WriteByte('\r') + } + case '\n': + if w.UseCRLF { + _, err = w.w.WriteString("\r\n") + } else { + err = w.w.WriteByte('\n') + } + default: + _, err = w.w.WriteRune(r1) + } + if err != nil { + return + } + } + + if err = w.w.WriteByte('"'); err != nil { + return + } + } + if w.UseCRLF { + _, err = w.w.WriteString("\r\n") + } else { + err = w.w.WriteByte('\n') + } + return +} + +// Flush writes any buffered data to the underlying io.Writer. +func (w *Writer) Flush() { + w.w.Flush() +} + +// WriteAll writes multiple CSV records to w using Write and then calls Flush. +func (w *Writer) WriteAll(records [][]string) (err error) { + for _, record := range records { + err = w.Write(record) + if err != nil { + break + } + } + w.Flush() + return nil +} + +// fieldNeedsQuotes returns true if our field must be enclosed in quotes. +// Empty fields, files with a Comma, fields with a quote or newline, and +// fields which start with a space must be enclosed in quotes. +func (w *Writer) fieldNeedsQuotes(field string) bool { + if len(field) == 0 || strings.IndexRune(field, w.Comma) >= 0 || strings.IndexAny(field, "\"\r\n") >= 0 { + return true + } + + r1, _ := utf8.DecodeRuneInString(field) + return unicode.IsSpace(r1) +} diff --git a/libgo/go/encoding/csv/writer_test.go b/libgo/go/encoding/csv/writer_test.go new file mode 100644 index 0000000..5789590 --- /dev/null +++ b/libgo/go/encoding/csv/writer_test.go @@ -0,0 +1,44 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package csv + +import ( + "bytes" + "testing" +) + +var writeTests = []struct { + Input [][]string + Output string + UseCRLF bool +}{ + {Input: [][]string{{"abc"}}, Output: "abc\n"}, + {Input: [][]string{{"abc"}}, Output: "abc\r\n", UseCRLF: true}, + {Input: [][]string{{`"abc"`}}, Output: `"""abc"""` + "\n"}, + {Input: [][]string{{`a"b`}}, Output: `"a""b"` + "\n"}, + {Input: [][]string{{`"a"b"`}}, Output: `"""a""b"""` + "\n"}, + {Input: [][]string{{" abc"}}, Output: `" abc"` + "\n"}, + {Input: [][]string{{"abc,def"}}, Output: `"abc,def"` + "\n"}, + {Input: [][]string{{"abc", "def"}}, Output: "abc,def\n"}, + {Input: [][]string{{"abc"}, {"def"}}, Output: "abc\ndef\n"}, + {Input: [][]string{{"abc\ndef"}}, Output: "\"abc\ndef\"\n"}, + {Input: [][]string{{"abc\ndef"}}, Output: "\"abc\r\ndef\"\r\n", UseCRLF: true}, +} + +func TestWrite(t *testing.T) { + for n, tt := range writeTests { + b := &bytes.Buffer{} + f := NewWriter(b) + f.UseCRLF = tt.UseCRLF + err := f.WriteAll(tt.Input) + if err != nil { + t.Errorf("Unexpected error: %s\n", err) + } + out := b.String() + if out != tt.Output { + t.Errorf("#%d: out=%q want %q", n, out, tt.Output) + } + } +} diff --git a/libgo/go/encoding/gob/codec_test.go b/libgo/go/encoding/gob/codec_test.go new file mode 100644 index 0000000..dc0e007 --- /dev/null +++ b/libgo/go/encoding/gob/codec_test.go @@ -0,0 +1,1406 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gob + +import ( + "bytes" + "errors" + "math" + "reflect" + "strings" + "testing" + "unsafe" +) + +// Guarantee encoding format by comparing some encodings to hand-written values +type EncodeT struct { + x uint64 + b []byte +} + +var encodeT = []EncodeT{ + {0x00, []byte{0x00}}, + {0x0F, []byte{0x0F}}, + {0xFF, []byte{0xFF, 0xFF}}, + {0xFFFF, []byte{0xFE, 0xFF, 0xFF}}, + {0xFFFFFF, []byte{0xFD, 0xFF, 0xFF, 0xFF}}, + {0xFFFFFFFF, []byte{0xFC, 0xFF, 0xFF, 0xFF, 0xFF}}, + {0xFFFFFFFFFF, []byte{0xFB, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}}, + {0xFFFFFFFFFFFF, []byte{0xFA, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}}, + {0xFFFFFFFFFFFFFF, []byte{0xF9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}}, + {0xFFFFFFFFFFFFFFFF, []byte{0xF8, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}}, + {0x1111, []byte{0xFE, 0x11, 0x11}}, + {0x1111111111111111, []byte{0xF8, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}}, + {0x8888888888888888, []byte{0xF8, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88}}, + {1 << 63, []byte{0xF8, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, +} + +// testError is meant to be used as a deferred function to turn a panic(gobError) into a +// plain test.Error call. +func testError(t *testing.T) { + if e := recover(); e != nil { + t.Error(e.(gobError).err) // Will re-panic if not one of our errors, such as a runtime error. + } + return +} + +// Test basic encode/decode routines for unsigned integers +func TestUintCodec(t *testing.T) { + defer testError(t) + b := new(bytes.Buffer) + encState := newEncoderState(b) + for _, tt := range encodeT { + b.Reset() + encState.encodeUint(tt.x) + if !bytes.Equal(tt.b, b.Bytes()) { + t.Errorf("encodeUint: %#x encode: expected % x got % x", tt.x, tt.b, b.Bytes()) + } + } + decState := newDecodeState(b) + for u := uint64(0); ; u = (u + 1) * 7 { + b.Reset() + encState.encodeUint(u) + v := decState.decodeUint() + if u != v { + t.Errorf("Encode/Decode: sent %#x received %#x", u, v) + } + if u&(1<<63) != 0 { + break + } + } +} + +func verifyInt(i int64, t *testing.T) { + defer testError(t) + var b = new(bytes.Buffer) + encState := newEncoderState(b) + encState.encodeInt(i) + decState := newDecodeState(b) + decState.buf = make([]byte, 8) + j := decState.decodeInt() + if i != j { + t.Errorf("Encode/Decode: sent %#x received %#x", uint64(i), uint64(j)) + } +} + +// Test basic encode/decode routines for signed integers +func TestIntCodec(t *testing.T) { + for u := uint64(0); ; u = (u + 1) * 7 { + // Do positive and negative values + i := int64(u) + verifyInt(i, t) + verifyInt(-i, t) + verifyInt(^i, t) + if u&(1<<63) != 0 { + break + } + } + verifyInt(-1<<63, t) // a tricky case +} + +// The result of encoding a true boolean with field number 7 +var boolResult = []byte{0x07, 0x01} +// The result of encoding a number 17 with field number 7 +var signedResult = []byte{0x07, 2 * 17} +var unsignedResult = []byte{0x07, 17} +var floatResult = []byte{0x07, 0xFE, 0x31, 0x40} +// The result of encoding a number 17+19i with field number 7 +var complexResult = []byte{0x07, 0xFE, 0x31, 0x40, 0xFE, 0x33, 0x40} +// The result of encoding "hello" with field number 7 +var bytesResult = []byte{0x07, 0x05, 'h', 'e', 'l', 'l', 'o'} + +func newDecodeState(buf *bytes.Buffer) *decoderState { + d := new(decoderState) + d.b = buf + d.buf = make([]byte, uint64Size) + return d +} + +func newEncoderState(b *bytes.Buffer) *encoderState { + b.Reset() + state := &encoderState{enc: nil, b: b} + state.fieldnum = -1 + return state +} + +// Test instruction execution for encoding. +// Do not run the machine yet; instead do individual instructions crafted by hand. +func TestScalarEncInstructions(t *testing.T) { + var b = new(bytes.Buffer) + + // bool + { + data := struct{ a bool }{true} + instr := &encInstr{encBool, 6, 0, 0} + state := newEncoderState(b) + instr.op(instr, state, unsafe.Pointer(&data)) + if !bytes.Equal(boolResult, b.Bytes()) { + t.Errorf("bool enc instructions: expected % x got % x", boolResult, b.Bytes()) + } + } + + // int + { + b.Reset() + data := struct{ a int }{17} + instr := &encInstr{encInt, 6, 0, 0} + state := newEncoderState(b) + instr.op(instr, state, unsafe.Pointer(&data)) + if !bytes.Equal(signedResult, b.Bytes()) { + t.Errorf("int enc instructions: expected % x got % x", signedResult, b.Bytes()) + } + } + + // uint + { + b.Reset() + data := struct{ a uint }{17} + instr := &encInstr{encUint, 6, 0, 0} + state := newEncoderState(b) + instr.op(instr, state, unsafe.Pointer(&data)) + if !bytes.Equal(unsignedResult, b.Bytes()) { + t.Errorf("uint enc instructions: expected % x got % x", unsignedResult, b.Bytes()) + } + } + + // int8 + { + b.Reset() + data := struct{ a int8 }{17} + instr := &encInstr{encInt8, 6, 0, 0} + state := newEncoderState(b) + instr.op(instr, state, unsafe.Pointer(&data)) + if !bytes.Equal(signedResult, b.Bytes()) { + t.Errorf("int8 enc instructions: expected % x got % x", signedResult, b.Bytes()) + } + } + + // uint8 + { + b.Reset() + data := struct{ a uint8 }{17} + instr := &encInstr{encUint8, 6, 0, 0} + state := newEncoderState(b) + instr.op(instr, state, unsafe.Pointer(&data)) + if !bytes.Equal(unsignedResult, b.Bytes()) { + t.Errorf("uint8 enc instructions: expected % x got % x", unsignedResult, b.Bytes()) + } + } + + // int16 + { + b.Reset() + data := struct{ a int16 }{17} + instr := &encInstr{encInt16, 6, 0, 0} + state := newEncoderState(b) + instr.op(instr, state, unsafe.Pointer(&data)) + if !bytes.Equal(signedResult, b.Bytes()) { + t.Errorf("int16 enc instructions: expected % x got % x", signedResult, b.Bytes()) + } + } + + // uint16 + { + b.Reset() + data := struct{ a uint16 }{17} + instr := &encInstr{encUint16, 6, 0, 0} + state := newEncoderState(b) + instr.op(instr, state, unsafe.Pointer(&data)) + if !bytes.Equal(unsignedResult, b.Bytes()) { + t.Errorf("uint16 enc instructions: expected % x got % x", unsignedResult, b.Bytes()) + } + } + + // int32 + { + b.Reset() + data := struct{ a int32 }{17} + instr := &encInstr{encInt32, 6, 0, 0} + state := newEncoderState(b) + instr.op(instr, state, unsafe.Pointer(&data)) + if !bytes.Equal(signedResult, b.Bytes()) { + t.Errorf("int32 enc instructions: expected % x got % x", signedResult, b.Bytes()) + } + } + + // uint32 + { + b.Reset() + data := struct{ a uint32 }{17} + instr := &encInstr{encUint32, 6, 0, 0} + state := newEncoderState(b) + instr.op(instr, state, unsafe.Pointer(&data)) + if !bytes.Equal(unsignedResult, b.Bytes()) { + t.Errorf("uint32 enc instructions: expected % x got % x", unsignedResult, b.Bytes()) + } + } + + // int64 + { + b.Reset() + data := struct{ a int64 }{17} + instr := &encInstr{encInt64, 6, 0, 0} + state := newEncoderState(b) + instr.op(instr, state, unsafe.Pointer(&data)) + if !bytes.Equal(signedResult, b.Bytes()) { + t.Errorf("int64 enc instructions: expected % x got % x", signedResult, b.Bytes()) + } + } + + // uint64 + { + b.Reset() + data := struct{ a uint64 }{17} + instr := &encInstr{encUint64, 6, 0, 0} + state := newEncoderState(b) + instr.op(instr, state, unsafe.Pointer(&data)) + if !bytes.Equal(unsignedResult, b.Bytes()) { + t.Errorf("uint64 enc instructions: expected % x got % x", unsignedResult, b.Bytes()) + } + } + + // float32 + { + b.Reset() + data := struct{ a float32 }{17} + instr := &encInstr{encFloat32, 6, 0, 0} + state := newEncoderState(b) + instr.op(instr, state, unsafe.Pointer(&data)) + if !bytes.Equal(floatResult, b.Bytes()) { + t.Errorf("float32 enc instructions: expected % x got % x", floatResult, b.Bytes()) + } + } + + // float64 + { + b.Reset() + data := struct{ a float64 }{17} + instr := &encInstr{encFloat64, 6, 0, 0} + state := newEncoderState(b) + instr.op(instr, state, unsafe.Pointer(&data)) + if !bytes.Equal(floatResult, b.Bytes()) { + t.Errorf("float64 enc instructions: expected % x got % x", floatResult, b.Bytes()) + } + } + + // bytes == []uint8 + { + b.Reset() + data := struct{ a []byte }{[]byte("hello")} + instr := &encInstr{encUint8Array, 6, 0, 0} + state := newEncoderState(b) + instr.op(instr, state, unsafe.Pointer(&data)) + if !bytes.Equal(bytesResult, b.Bytes()) { + t.Errorf("bytes enc instructions: expected % x got % x", bytesResult, b.Bytes()) + } + } + + // string + { + b.Reset() + data := struct{ a string }{"hello"} + instr := &encInstr{encString, 6, 0, 0} + state := newEncoderState(b) + instr.op(instr, state, unsafe.Pointer(&data)) + if !bytes.Equal(bytesResult, b.Bytes()) { + t.Errorf("string enc instructions: expected % x got % x", bytesResult, b.Bytes()) + } + } +} + +func execDec(typ string, instr *decInstr, state *decoderState, t *testing.T, p unsafe.Pointer) { + defer testError(t) + v := int(state.decodeUint()) + if v+state.fieldnum != 6 { + t.Fatalf("decoding field number %d, got %d", 6, v+state.fieldnum) + } + instr.op(instr, state, decIndirect(p, instr.indir)) + state.fieldnum = 6 +} + +func newDecodeStateFromData(data []byte) *decoderState { + b := bytes.NewBuffer(data) + state := newDecodeState(b) + state.fieldnum = -1 + return state +} + +// Test instruction execution for decoding. +// Do not run the machine yet; instead do individual instructions crafted by hand. +func TestScalarDecInstructions(t *testing.T) { + ovfl := errors.New("overflow") + + // bool + { + var data struct { + a bool + } + instr := &decInstr{decBool, 6, 0, 0, ovfl} + state := newDecodeStateFromData(boolResult) + execDec("bool", instr, state, t, unsafe.Pointer(&data)) + if data.a != true { + t.Errorf("bool a = %v not true", data.a) + } + } + // int + { + var data struct { + a int + } + instr := &decInstr{decOpTable[reflect.Int], 6, 0, 0, ovfl} + state := newDecodeStateFromData(signedResult) + execDec("int", instr, state, t, unsafe.Pointer(&data)) + if data.a != 17 { + t.Errorf("int a = %v not 17", data.a) + } + } + + // uint + { + var data struct { + a uint + } + instr := &decInstr{decOpTable[reflect.Uint], 6, 0, 0, ovfl} + state := newDecodeStateFromData(unsignedResult) + execDec("uint", instr, state, t, unsafe.Pointer(&data)) + if data.a != 17 { + t.Errorf("uint a = %v not 17", data.a) + } + } + + // int8 + { + var data struct { + a int8 + } + instr := &decInstr{decInt8, 6, 0, 0, ovfl} + state := newDecodeStateFromData(signedResult) + execDec("int8", instr, state, t, unsafe.Pointer(&data)) + if data.a != 17 { + t.Errorf("int8 a = %v not 17", data.a) + } + } + + // uint8 + { + var data struct { + a uint8 + } + instr := &decInstr{decUint8, 6, 0, 0, ovfl} + state := newDecodeStateFromData(unsignedResult) + execDec("uint8", instr, state, t, unsafe.Pointer(&data)) + if data.a != 17 { + t.Errorf("uint8 a = %v not 17", data.a) + } + } + + // int16 + { + var data struct { + a int16 + } + instr := &decInstr{decInt16, 6, 0, 0, ovfl} + state := newDecodeStateFromData(signedResult) + execDec("int16", instr, state, t, unsafe.Pointer(&data)) + if data.a != 17 { + t.Errorf("int16 a = %v not 17", data.a) + } + } + + // uint16 + { + var data struct { + a uint16 + } + instr := &decInstr{decUint16, 6, 0, 0, ovfl} + state := newDecodeStateFromData(unsignedResult) + execDec("uint16", instr, state, t, unsafe.Pointer(&data)) + if data.a != 17 { + t.Errorf("uint16 a = %v not 17", data.a) + } + } + + // int32 + { + var data struct { + a int32 + } + instr := &decInstr{decInt32, 6, 0, 0, ovfl} + state := newDecodeStateFromData(signedResult) + execDec("int32", instr, state, t, unsafe.Pointer(&data)) + if data.a != 17 { + t.Errorf("int32 a = %v not 17", data.a) + } + } + + // uint32 + { + var data struct { + a uint32 + } + instr := &decInstr{decUint32, 6, 0, 0, ovfl} + state := newDecodeStateFromData(unsignedResult) + execDec("uint32", instr, state, t, unsafe.Pointer(&data)) + if data.a != 17 { + t.Errorf("uint32 a = %v not 17", data.a) + } + } + + // uintptr + { + var data struct { + a uintptr + } + instr := &decInstr{decOpTable[reflect.Uintptr], 6, 0, 0, ovfl} + state := newDecodeStateFromData(unsignedResult) + execDec("uintptr", instr, state, t, unsafe.Pointer(&data)) + if data.a != 17 { + t.Errorf("uintptr a = %v not 17", data.a) + } + } + + // int64 + { + var data struct { + a int64 + } + instr := &decInstr{decInt64, 6, 0, 0, ovfl} + state := newDecodeStateFromData(signedResult) + execDec("int64", instr, state, t, unsafe.Pointer(&data)) + if data.a != 17 { + t.Errorf("int64 a = %v not 17", data.a) + } + } + + // uint64 + { + var data struct { + a uint64 + } + instr := &decInstr{decUint64, 6, 0, 0, ovfl} + state := newDecodeStateFromData(unsignedResult) + execDec("uint64", instr, state, t, unsafe.Pointer(&data)) + if data.a != 17 { + t.Errorf("uint64 a = %v not 17", data.a) + } + } + + // float32 + { + var data struct { + a float32 + } + instr := &decInstr{decFloat32, 6, 0, 0, ovfl} + state := newDecodeStateFromData(floatResult) + execDec("float32", instr, state, t, unsafe.Pointer(&data)) + if data.a != 17 { + t.Errorf("float32 a = %v not 17", data.a) + } + } + + // float64 + { + var data struct { + a float64 + } + instr := &decInstr{decFloat64, 6, 0, 0, ovfl} + state := newDecodeStateFromData(floatResult) + execDec("float64", instr, state, t, unsafe.Pointer(&data)) + if data.a != 17 { + t.Errorf("float64 a = %v not 17", data.a) + } + } + + // complex64 + { + var data struct { + a complex64 + } + instr := &decInstr{decOpTable[reflect.Complex64], 6, 0, 0, ovfl} + state := newDecodeStateFromData(complexResult) + execDec("complex", instr, state, t, unsafe.Pointer(&data)) + if data.a != 17+19i { + t.Errorf("complex a = %v not 17+19i", data.a) + } + } + + // complex128 + { + var data struct { + a complex128 + } + instr := &decInstr{decOpTable[reflect.Complex128], 6, 0, 0, ovfl} + state := newDecodeStateFromData(complexResult) + execDec("complex", instr, state, t, unsafe.Pointer(&data)) + if data.a != 17+19i { + t.Errorf("complex a = %v not 17+19i", data.a) + } + } + + // bytes == []uint8 + { + var data struct { + a []byte + } + instr := &decInstr{decUint8Slice, 6, 0, 0, ovfl} + state := newDecodeStateFromData(bytesResult) + execDec("bytes", instr, state, t, unsafe.Pointer(&data)) + if string(data.a) != "hello" { + t.Errorf(`bytes a = %q not "hello"`, string(data.a)) + } + } + + // string + { + var data struct { + a string + } + instr := &decInstr{decString, 6, 0, 0, ovfl} + state := newDecodeStateFromData(bytesResult) + execDec("bytes", instr, state, t, unsafe.Pointer(&data)) + if data.a != "hello" { + t.Errorf(`bytes a = %q not "hello"`, data.a) + } + } +} + +func TestEndToEnd(t *testing.T) { + type T2 struct { + T string + } + s1 := "string1" + s2 := "string2" + type T1 struct { + A, B, C int + M map[string]*float64 + EmptyMap map[string]int // to check that we receive a non-nil map. + N *[3]float64 + Strs *[2]string + Int64s *[]int64 + RI complex64 + S string + Y []byte + T *T2 + } + pi := 3.14159 + e := 2.71828 + t1 := &T1{ + A: 17, + B: 18, + C: -5, + M: map[string]*float64{"pi": &pi, "e": &e}, + EmptyMap: make(map[string]int), + N: &[3]float64{1.5, 2.5, 3.5}, + Strs: &[2]string{s1, s2}, + Int64s: &[]int64{77, 89, 123412342134}, + RI: 17 - 23i, + S: "Now is the time", + Y: []byte("hello, sailor"), + T: &T2{"this is T2"}, + } + b := new(bytes.Buffer) + err := NewEncoder(b).Encode(t1) + if err != nil { + t.Error("encode:", err) + } + var _t1 T1 + err = NewDecoder(b).Decode(&_t1) + if err != nil { + t.Fatal("decode:", err) + } + if !reflect.DeepEqual(t1, &_t1) { + t.Errorf("encode expected %v got %v", *t1, _t1) + } + // Be absolutely sure the received map is non-nil. + if t1.EmptyMap == nil { + t.Errorf("nil map sent") + } + if _t1.EmptyMap == nil { + t.Errorf("nil map received") + } +} + +func TestOverflow(t *testing.T) { + type inputT struct { + Maxi int64 + Mini int64 + Maxu uint64 + Maxf float64 + Minf float64 + Maxc complex128 + Minc complex128 + } + var it inputT + var err error + b := new(bytes.Buffer) + enc := NewEncoder(b) + dec := NewDecoder(b) + + // int8 + b.Reset() + it = inputT{ + Maxi: math.MaxInt8 + 1, + } + type outi8 struct { + Maxi int8 + Mini int8 + } + var o1 outi8 + enc.Encode(it) + err = dec.Decode(&o1) + if err == nil || err.Error() != `value for "Maxi" out of range` { + t.Error("wrong overflow error for int8:", err) + } + it = inputT{ + Mini: math.MinInt8 - 1, + } + b.Reset() + enc.Encode(it) + err = dec.Decode(&o1) + if err == nil || err.Error() != `value for "Mini" out of range` { + t.Error("wrong underflow error for int8:", err) + } + + // int16 + b.Reset() + it = inputT{ + Maxi: math.MaxInt16 + 1, + } + type outi16 struct { + Maxi int16 + Mini int16 + } + var o2 outi16 + enc.Encode(it) + err = dec.Decode(&o2) + if err == nil || err.Error() != `value for "Maxi" out of range` { + t.Error("wrong overflow error for int16:", err) + } + it = inputT{ + Mini: math.MinInt16 - 1, + } + b.Reset() + enc.Encode(it) + err = dec.Decode(&o2) + if err == nil || err.Error() != `value for "Mini" out of range` { + t.Error("wrong underflow error for int16:", err) + } + + // int32 + b.Reset() + it = inputT{ + Maxi: math.MaxInt32 + 1, + } + type outi32 struct { + Maxi int32 + Mini int32 + } + var o3 outi32 + enc.Encode(it) + err = dec.Decode(&o3) + if err == nil || err.Error() != `value for "Maxi" out of range` { + t.Error("wrong overflow error for int32:", err) + } + it = inputT{ + Mini: math.MinInt32 - 1, + } + b.Reset() + enc.Encode(it) + err = dec.Decode(&o3) + if err == nil || err.Error() != `value for "Mini" out of range` { + t.Error("wrong underflow error for int32:", err) + } + + // uint8 + b.Reset() + it = inputT{ + Maxu: math.MaxUint8 + 1, + } + type outu8 struct { + Maxu uint8 + } + var o4 outu8 + enc.Encode(it) + err = dec.Decode(&o4) + if err == nil || err.Error() != `value for "Maxu" out of range` { + t.Error("wrong overflow error for uint8:", err) + } + + // uint16 + b.Reset() + it = inputT{ + Maxu: math.MaxUint16 + 1, + } + type outu16 struct { + Maxu uint16 + } + var o5 outu16 + enc.Encode(it) + err = dec.Decode(&o5) + if err == nil || err.Error() != `value for "Maxu" out of range` { + t.Error("wrong overflow error for uint16:", err) + } + + // uint32 + b.Reset() + it = inputT{ + Maxu: math.MaxUint32 + 1, + } + type outu32 struct { + Maxu uint32 + } + var o6 outu32 + enc.Encode(it) + err = dec.Decode(&o6) + if err == nil || err.Error() != `value for "Maxu" out of range` { + t.Error("wrong overflow error for uint32:", err) + } + + // float32 + b.Reset() + it = inputT{ + Maxf: math.MaxFloat32 * 2, + } + type outf32 struct { + Maxf float32 + Minf float32 + } + var o7 outf32 + enc.Encode(it) + err = dec.Decode(&o7) + if err == nil || err.Error() != `value for "Maxf" out of range` { + t.Error("wrong overflow error for float32:", err) + } + + // complex64 + b.Reset() + it = inputT{ + Maxc: complex(math.MaxFloat32*2, math.MaxFloat32*2), + } + type outc64 struct { + Maxc complex64 + Minc complex64 + } + var o8 outc64 + enc.Encode(it) + err = dec.Decode(&o8) + if err == nil || err.Error() != `value for "Maxc" out of range` { + t.Error("wrong overflow error for complex64:", err) + } +} + +func TestNesting(t *testing.T) { + type RT struct { + A string + Next *RT + } + rt := new(RT) + rt.A = "level1" + rt.Next = new(RT) + rt.Next.A = "level2" + b := new(bytes.Buffer) + NewEncoder(b).Encode(rt) + var drt RT + dec := NewDecoder(b) + err := dec.Decode(&drt) + if err != nil { + t.Fatal("decoder error:", err) + } + if drt.A != rt.A { + t.Errorf("nesting: encode expected %v got %v", *rt, drt) + } + if drt.Next == nil { + t.Errorf("nesting: recursion failed") + } + if drt.Next.A != rt.Next.A { + t.Errorf("nesting: encode expected %v got %v", *rt.Next, *drt.Next) + } +} + +// These three structures have the same data with different indirections +type T0 struct { + A int + B int + C int + D int +} +type T1 struct { + A int + B *int + C **int + D ***int +} +type T2 struct { + A ***int + B **int + C *int + D int +} + +func TestAutoIndirection(t *testing.T) { + // First transfer t1 into t0 + var t1 T1 + t1.A = 17 + t1.B = new(int) + *t1.B = 177 + t1.C = new(*int) + *t1.C = new(int) + **t1.C = 1777 + t1.D = new(**int) + *t1.D = new(*int) + **t1.D = new(int) + ***t1.D = 17777 + b := new(bytes.Buffer) + enc := NewEncoder(b) + enc.Encode(t1) + dec := NewDecoder(b) + var t0 T0 + dec.Decode(&t0) + if t0.A != 17 || t0.B != 177 || t0.C != 1777 || t0.D != 17777 { + t.Errorf("t1->t0: expected {17 177 1777 17777}; got %v", t0) + } + + // Now transfer t2 into t0 + var t2 T2 + t2.D = 17777 + t2.C = new(int) + *t2.C = 1777 + t2.B = new(*int) + *t2.B = new(int) + **t2.B = 177 + t2.A = new(**int) + *t2.A = new(*int) + **t2.A = new(int) + ***t2.A = 17 + b.Reset() + enc.Encode(t2) + t0 = T0{} + dec.Decode(&t0) + if t0.A != 17 || t0.B != 177 || t0.C != 1777 || t0.D != 17777 { + t.Errorf("t2->t0 expected {17 177 1777 17777}; got %v", t0) + } + + // Now transfer t0 into t1 + t0 = T0{17, 177, 1777, 17777} + b.Reset() + enc.Encode(t0) + t1 = T1{} + dec.Decode(&t1) + if t1.A != 17 || *t1.B != 177 || **t1.C != 1777 || ***t1.D != 17777 { + t.Errorf("t0->t1 expected {17 177 1777 17777}; got {%d %d %d %d}", t1.A, *t1.B, **t1.C, ***t1.D) + } + + // Now transfer t0 into t2 + b.Reset() + enc.Encode(t0) + t2 = T2{} + dec.Decode(&t2) + if ***t2.A != 17 || **t2.B != 177 || *t2.C != 1777 || t2.D != 17777 { + t.Errorf("t0->t2 expected {17 177 1777 17777}; got {%d %d %d %d}", ***t2.A, **t2.B, *t2.C, t2.D) + } + + // Now do t2 again but without pre-allocated pointers. + b.Reset() + enc.Encode(t0) + ***t2.A = 0 + **t2.B = 0 + *t2.C = 0 + t2.D = 0 + dec.Decode(&t2) + if ***t2.A != 17 || **t2.B != 177 || *t2.C != 1777 || t2.D != 17777 { + t.Errorf("t0->t2 expected {17 177 1777 17777}; got {%d %d %d %d}", ***t2.A, **t2.B, *t2.C, t2.D) + } +} + +type RT0 struct { + A int + B string + C float64 +} +type RT1 struct { + C float64 + B string + A int + NotSet string +} + +func TestReorderedFields(t *testing.T) { + var rt0 RT0 + rt0.A = 17 + rt0.B = "hello" + rt0.C = 3.14159 + b := new(bytes.Buffer) + NewEncoder(b).Encode(rt0) + dec := NewDecoder(b) + var rt1 RT1 + // Wire type is RT0, local type is RT1. + err := dec.Decode(&rt1) + if err != nil { + t.Fatal("decode error:", err) + } + if rt0.A != rt1.A || rt0.B != rt1.B || rt0.C != rt1.C { + t.Errorf("rt1->rt0: expected %v; got %v", rt0, rt1) + } +} + +// Like an RT0 but with fields we'll ignore on the decode side. +type IT0 struct { + A int64 + B string + Ignore_d []int + Ignore_e [3]float64 + Ignore_f bool + Ignore_g string + Ignore_h []byte + Ignore_i *RT1 + Ignore_m map[string]int + C float64 +} + +func TestIgnoredFields(t *testing.T) { + var it0 IT0 + it0.A = 17 + it0.B = "hello" + it0.C = 3.14159 + it0.Ignore_d = []int{1, 2, 3} + it0.Ignore_e[0] = 1.0 + it0.Ignore_e[1] = 2.0 + it0.Ignore_e[2] = 3.0 + it0.Ignore_f = true + it0.Ignore_g = "pay no attention" + it0.Ignore_h = []byte("to the curtain") + it0.Ignore_i = &RT1{3.1, "hi", 7, "hello"} + it0.Ignore_m = map[string]int{"one": 1, "two": 2} + + b := new(bytes.Buffer) + NewEncoder(b).Encode(it0) + dec := NewDecoder(b) + var rt1 RT1 + // Wire type is IT0, local type is RT1. + err := dec.Decode(&rt1) + if err != nil { + t.Error("error: ", err) + } + if int(it0.A) != rt1.A || it0.B != rt1.B || it0.C != rt1.C { + t.Errorf("rt0->rt1: expected %v; got %v", it0, rt1) + } +} + +func TestBadRecursiveType(t *testing.T) { + type Rec ***Rec + var rec Rec + b := new(bytes.Buffer) + err := NewEncoder(b).Encode(&rec) + if err == nil { + t.Error("expected error; got none") + } else if strings.Index(err.Error(), "recursive") < 0 { + t.Error("expected recursive type error; got", err) + } + // Can't test decode easily because we can't encode one, so we can't pass one to a Decoder. +} + +type Bad0 struct { + CH chan int + C float64 +} + +func TestInvalidField(t *testing.T) { + var bad0 Bad0 + bad0.CH = make(chan int) + b := new(bytes.Buffer) + dummyEncoder := new(Encoder) // sufficient for this purpose. + dummyEncoder.encode(b, reflect.ValueOf(&bad0), userType(reflect.TypeOf(&bad0))) + if err := dummyEncoder.err; err == nil { + t.Error("expected error; got none") + } else if strings.Index(err.Error(), "type") < 0 { + t.Error("expected type error; got", err) + } +} + +type Indirect struct { + A ***[3]int + S ***[]int + M ****map[string]int +} + +type Direct struct { + A [3]int + S []int + M map[string]int +} + +func TestIndirectSliceMapArray(t *testing.T) { + // Marshal indirect, unmarshal to direct. + i := new(Indirect) + i.A = new(**[3]int) + *i.A = new(*[3]int) + **i.A = new([3]int) + ***i.A = [3]int{1, 2, 3} + i.S = new(**[]int) + *i.S = new(*[]int) + **i.S = new([]int) + ***i.S = []int{4, 5, 6} + i.M = new(***map[string]int) + *i.M = new(**map[string]int) + **i.M = new(*map[string]int) + ***i.M = new(map[string]int) + ****i.M = map[string]int{"one": 1, "two": 2, "three": 3} + b := new(bytes.Buffer) + NewEncoder(b).Encode(i) + dec := NewDecoder(b) + var d Direct + err := dec.Decode(&d) + if err != nil { + t.Error("error: ", err) + } + if len(d.A) != 3 || d.A[0] != 1 || d.A[1] != 2 || d.A[2] != 3 { + t.Errorf("indirect to direct: d.A is %v not %v", d.A, ***i.A) + } + if len(d.S) != 3 || d.S[0] != 4 || d.S[1] != 5 || d.S[2] != 6 { + t.Errorf("indirect to direct: d.S is %v not %v", d.S, ***i.S) + } + if len(d.M) != 3 || d.M["one"] != 1 || d.M["two"] != 2 || d.M["three"] != 3 { + t.Errorf("indirect to direct: d.M is %v not %v", d.M, ***i.M) + } + // Marshal direct, unmarshal to indirect. + d.A = [3]int{11, 22, 33} + d.S = []int{44, 55, 66} + d.M = map[string]int{"four": 4, "five": 5, "six": 6} + i = new(Indirect) + b.Reset() + NewEncoder(b).Encode(d) + dec = NewDecoder(b) + err = dec.Decode(&i) + if err != nil { + t.Fatal("error: ", err) + } + if len(***i.A) != 3 || (***i.A)[0] != 11 || (***i.A)[1] != 22 || (***i.A)[2] != 33 { + t.Errorf("direct to indirect: ***i.A is %v not %v", ***i.A, d.A) + } + if len(***i.S) != 3 || (***i.S)[0] != 44 || (***i.S)[1] != 55 || (***i.S)[2] != 66 { + t.Errorf("direct to indirect: ***i.S is %v not %v", ***i.S, ***i.S) + } + if len(****i.M) != 3 || (****i.M)["four"] != 4 || (****i.M)["five"] != 5 || (****i.M)["six"] != 6 { + t.Errorf("direct to indirect: ****i.M is %v not %v", ****i.M, d.M) + } +} + +// An interface with several implementations +type Squarer interface { + Square() int +} + +type Int int + +func (i Int) Square() int { + return int(i * i) +} + +type Float float64 + +func (f Float) Square() int { + return int(f * f) +} + +type Vector []int + +func (v Vector) Square() int { + sum := 0 + for _, x := range v { + sum += x * x + } + return sum +} + +type Point struct { + X, Y int +} + +func (p Point) Square() int { + return p.X*p.X + p.Y*p.Y +} + +// A struct with interfaces in it. +type InterfaceItem struct { + I int + Sq1, Sq2, Sq3 Squarer + F float64 + Sq []Squarer +} + +// The same struct without interfaces +type NoInterfaceItem struct { + I int + F float64 +} + +func TestInterface(t *testing.T) { + iVal := Int(3) + fVal := Float(5) + // Sending a Vector will require that the receiver define a type in the middle of + // receiving the value for item2. + vVal := Vector{1, 2, 3} + b := new(bytes.Buffer) + item1 := &InterfaceItem{1, iVal, fVal, vVal, 11.5, []Squarer{iVal, fVal, nil, vVal}} + // Register the types. + Register(Int(0)) + Register(Float(0)) + Register(Vector{}) + err := NewEncoder(b).Encode(item1) + if err != nil { + t.Error("expected no encode error; got", err) + } + + item2 := InterfaceItem{} + err = NewDecoder(b).Decode(&item2) + if err != nil { + t.Fatal("decode:", err) + } + if item2.I != item1.I { + t.Error("normal int did not decode correctly") + } + if item2.Sq1 == nil || item2.Sq1.Square() != iVal.Square() { + t.Error("Int did not decode correctly") + } + if item2.Sq2 == nil || item2.Sq2.Square() != fVal.Square() { + t.Error("Float did not decode correctly") + } + if item2.Sq3 == nil || item2.Sq3.Square() != vVal.Square() { + t.Error("Vector did not decode correctly") + } + if item2.F != item1.F { + t.Error("normal float did not decode correctly") + } + // Now check that we received a slice of Squarers correctly, including a nil element + if len(item1.Sq) != len(item2.Sq) { + t.Fatalf("[]Squarer length wrong: got %d; expected %d", len(item2.Sq), len(item1.Sq)) + } + for i, v1 := range item1.Sq { + v2 := item2.Sq[i] + if v1 == nil || v2 == nil { + if v1 != nil || v2 != nil { + t.Errorf("item %d inconsistent nils", i) + } + continue + if v1.Square() != v2.Square() { + t.Errorf("item %d inconsistent values: %v %v", i, v1, v2) + } + } + } +} + +// A struct with all basic types, stored in interfaces. +type BasicInterfaceItem struct { + Int, Int8, Int16, Int32, Int64 interface{} + Uint, Uint8, Uint16, Uint32, Uint64 interface{} + Float32, Float64 interface{} + Complex64, Complex128 interface{} + Bool interface{} + String interface{} + Bytes interface{} +} + +func TestInterfaceBasic(t *testing.T) { + b := new(bytes.Buffer) + item1 := &BasicInterfaceItem{ + int(1), int8(1), int16(1), int32(1), int64(1), + uint(1), uint8(1), uint16(1), uint32(1), uint64(1), + float32(1), 1.0, + complex64(1i), complex128(1i), + true, + "hello", + []byte("sailor"), + } + err := NewEncoder(b).Encode(item1) + if err != nil { + t.Error("expected no encode error; got", err) + } + + item2 := &BasicInterfaceItem{} + err = NewDecoder(b).Decode(&item2) + if err != nil { + t.Fatal("decode:", err) + } + if !reflect.DeepEqual(item1, item2) { + t.Errorf("encode expected %v got %v", item1, item2) + } + // Hand check a couple for correct types. + if v, ok := item2.Bool.(bool); !ok || !v { + t.Error("boolean should be true") + } + if v, ok := item2.String.(string); !ok || v != item1.String.(string) { + t.Errorf("string should be %v is %v", item1.String, v) + } +} + +type String string + +type PtrInterfaceItem struct { + Str1 interface{} // basic + Str2 interface{} // derived +} + +// We'll send pointers; should receive values. +// Also check that we can register T but send *T. +func TestInterfacePointer(t *testing.T) { + b := new(bytes.Buffer) + str1 := "howdy" + str2 := String("kiddo") + item1 := &PtrInterfaceItem{ + &str1, + &str2, + } + // Register the type. + Register(str2) + err := NewEncoder(b).Encode(item1) + if err != nil { + t.Error("expected no encode error; got", err) + } + + item2 := &PtrInterfaceItem{} + err = NewDecoder(b).Decode(&item2) + if err != nil { + t.Fatal("decode:", err) + } + // Hand test for correct types and values. + if v, ok := item2.Str1.(string); !ok || v != str1 { + t.Errorf("basic string failed: %q should be %q", v, str1) + } + if v, ok := item2.Str2.(String); !ok || v != str2 { + t.Errorf("derived type String failed: %q should be %q", v, str2) + } +} + +func TestIgnoreInterface(t *testing.T) { + iVal := Int(3) + fVal := Float(5) + // Sending a Point will require that the receiver define a type in the middle of + // receiving the value for item2. + pVal := Point{2, 3} + b := new(bytes.Buffer) + item1 := &InterfaceItem{1, iVal, fVal, pVal, 11.5, nil} + // Register the types. + Register(Int(0)) + Register(Float(0)) + Register(Point{}) + err := NewEncoder(b).Encode(item1) + if err != nil { + t.Error("expected no encode error; got", err) + } + + item2 := NoInterfaceItem{} + err = NewDecoder(b).Decode(&item2) + if err != nil { + t.Fatal("decode:", err) + } + if item2.I != item1.I { + t.Error("normal int did not decode correctly") + } + if item2.F != item2.F { + t.Error("normal float did not decode correctly") + } +} + +type U struct { + A int + B string + c float64 + D uint +} + +func TestUnexportedFields(t *testing.T) { + var u0 U + u0.A = 17 + u0.B = "hello" + u0.c = 3.14159 + u0.D = 23 + b := new(bytes.Buffer) + NewEncoder(b).Encode(u0) + dec := NewDecoder(b) + var u1 U + u1.c = 1234. + err := dec.Decode(&u1) + if err != nil { + t.Fatal("decode error:", err) + } + if u0.A != u0.A || u0.B != u1.B || u0.D != u1.D { + t.Errorf("u1->u0: expected %v; got %v", u0, u1) + } + if u1.c != 1234. { + t.Error("u1.c modified") + } +} + +var singletons = []interface{}{ + true, + 7, + 3.2, + "hello", + [3]int{11, 22, 33}, + []float32{0.5, 0.25, 0.125}, + map[string]int{"one": 1, "two": 2}, +} + +func TestDebugSingleton(t *testing.T) { + if debugFunc == nil { + return + } + b := new(bytes.Buffer) + // Accumulate a number of values and print them out all at once. + for _, x := range singletons { + err := NewEncoder(b).Encode(x) + if err != nil { + t.Fatal("encode:", err) + } + } + debugFunc(b) +} + +// A type that won't be defined in the gob until we send it in an interface value. +type OnTheFly struct { + A int +} + +type DT struct { + // X OnTheFly + A int + B string + C float64 + I interface{} + J interface{} + I_nil interface{} + M map[string]int + T [3]int + S []string +} + +func TestDebugStruct(t *testing.T) { + if debugFunc == nil { + return + } + Register(OnTheFly{}) + var dt DT + dt.A = 17 + dt.B = "hello" + dt.C = 3.14159 + dt.I = 271828 + dt.J = OnTheFly{3} + dt.I_nil = nil + dt.M = map[string]int{"one": 1, "two": 2} + dt.T = [3]int{11, 22, 33} + dt.S = []string{"hi", "joe"} + b := new(bytes.Buffer) + err := NewEncoder(b).Encode(dt) + if err != nil { + t.Fatal("encode:", err) + } + debugBuffer := bytes.NewBuffer(b.Bytes()) + dt2 := &DT{} + err = NewDecoder(b).Decode(&dt2) + if err != nil { + t.Error("decode:", err) + } + debugFunc(debugBuffer) +} diff --git a/libgo/go/encoding/gob/debug.go b/libgo/go/encoding/gob/debug.go new file mode 100644 index 0000000..b21c7fa --- /dev/null +++ b/libgo/go/encoding/gob/debug.go @@ -0,0 +1,687 @@ +package gob + +// This file is not normally included in the gob package. Used only for debugging the package itself. +// Add debug.go to the files listed in the Makefile to add Debug to the gob package. +// Except for reading uints, it is an implementation of a reader that is independent of +// the one implemented by Decoder. + +import ( + "bytes" + "fmt" + "io" + "os" + "strings" + "sync" +) + +var dumpBytes = false // If true, print the remaining bytes in the input buffer at each item. + +// Init installs the debugging facility. If this file is not compiled in the +// package, the tests in codec_test.go are no-ops. +func init() { + debugFunc = Debug +} + +var ( + blanks = bytes.Repeat([]byte{' '}, 3*10) + empty = []byte(": <empty>\n") + tabs = strings.Repeat("\t", 100) +) + +// tab indents itself when printed. +type tab int + +func (t tab) String() string { + n := int(t) + if n > len(tabs) { + n = len(tabs) + } + return tabs[0:n] +} + +func (t tab) print() { + fmt.Fprint(os.Stderr, t) +} + +// A peekReader wraps an io.Reader, allowing one to peek ahead to see +// what's coming without stealing the data from the client of the Reader. +type peekReader struct { + r io.Reader + data []byte // read-ahead data +} + +// newPeekReader returns a peekReader that wraps r. +func newPeekReader(r io.Reader) *peekReader { + return &peekReader{r: r} +} + +// Read is the usual method. It will first take data that has been read ahead. +func (p *peekReader) Read(b []byte) (n int, err error) { + if len(p.data) == 0 { + return p.r.Read(b) + } + // Satisfy what's possible from the read-ahead data. + n = copy(b, p.data) + // Move data down to beginning of slice, to avoid endless growth + copy(p.data, p.data[n:]) + p.data = p.data[:len(p.data)-n] + return +} + +// peek returns as many bytes as possible from the unread +// portion of the stream, up to the length of b. +func (p *peekReader) peek(b []byte) (n int, err error) { + if len(p.data) > 0 { + n = copy(b, p.data) + if n == len(b) { + return + } + b = b[n:] + } + if len(b) == 0 { + return + } + m, e := io.ReadFull(p.r, b) + if m > 0 { + p.data = append(p.data, b[:m]...) + } + n += m + if e == io.ErrUnexpectedEOF { + // That means m > 0 but we reached EOF. If we got data + // we won't complain about not being able to peek enough. + if n > 0 { + e = nil + } else { + e = io.EOF + } + } + return n, e +} + +type debugger struct { + mutex sync.Mutex + remain int // the number of bytes known to remain in the input + remainingKnown bool // the value of 'remain' is valid + r *peekReader + wireType map[typeId]*wireType + tmp []byte // scratch space for decoding uints. +} + +// dump prints the next nBytes of the input. +// It arranges to print the output aligned from call to +// call, to make it easy to see what has been consumed. +func (deb *debugger) dump(format string, args ...interface{}) { + if !dumpBytes { + return + } + fmt.Fprintf(os.Stderr, format+" ", args...) + if !deb.remainingKnown { + return + } + if deb.remain < 0 { + fmt.Fprintf(os.Stderr, "remaining byte count is negative! %d\n", deb.remain) + return + } + data := make([]byte, deb.remain) + n, _ := deb.r.peek(data) + if n == 0 { + os.Stderr.Write(empty) + return + } + b := new(bytes.Buffer) + fmt.Fprintf(b, "[%d]{\n", deb.remain) + // Blanks until first byte + lineLength := 0 + if n := len(data); n%10 != 0 { + lineLength = 10 - n%10 + fmt.Fprintf(b, "\t%s", blanks[:lineLength*3]) + } + // 10 bytes per line + for len(data) > 0 { + if lineLength == 0 { + fmt.Fprint(b, "\t") + } + m := 10 - lineLength + lineLength = 0 + if m > len(data) { + m = len(data) + } + fmt.Fprintf(b, "% x\n", data[:m]) + data = data[m:] + } + fmt.Fprint(b, "}\n") + os.Stderr.Write(b.Bytes()) +} + +// Debug prints a human-readable representation of the gob data read from r. +// It is a no-op unless debugging was enabled when the package was built. +func Debug(r io.Reader) { + err := debug(r) + if err != nil { + fmt.Fprintf(os.Stderr, "gob debug: %s\n", err) + } +} + +// debug implements Debug, but catches panics and returns +// them as errors to be printed by Debug. +func debug(r io.Reader) (err error) { + defer catchError(&err) + fmt.Fprintln(os.Stderr, "Start of debugging") + deb := &debugger{ + r: newPeekReader(r), + wireType: make(map[typeId]*wireType), + tmp: make([]byte, 16), + } + if b, ok := r.(*bytes.Buffer); ok { + deb.remain = b.Len() + deb.remainingKnown = true + } + deb.gobStream() + return +} + +// note that we've consumed some bytes +func (deb *debugger) consumed(n int) { + if deb.remainingKnown { + deb.remain -= n + } +} + +// int64 decodes and returns the next integer, which must be present. +// Don't call this if you could be at EOF. +func (deb *debugger) int64() int64 { + return toInt(deb.uint64()) +} + +// uint64 returns and decodes the next unsigned integer, which must be present. +// Don't call this if you could be at EOF. +// TODO: handle errors better. +func (deb *debugger) uint64() uint64 { + n, w, err := decodeUintReader(deb.r, deb.tmp) + if err != nil { + errorf("debug: read error: %s", err) + } + deb.consumed(w) + return n +} + +// GobStream: +// DelimitedMessage* (until EOF) +func (deb *debugger) gobStream() { + // Make sure we're single-threaded through here. + deb.mutex.Lock() + defer deb.mutex.Unlock() + + for deb.delimitedMessage(0) { + } +} + +// DelimitedMessage: +// uint(lengthOfMessage) Message +func (deb *debugger) delimitedMessage(indent tab) bool { + for { + n := deb.loadBlock(true) + if n < 0 { + return false + } + deb.dump("Delimited message of length %d", n) + deb.message(indent) + } + return true +} + +// loadBlock preps us to read a message +// of the length specified next in the input. It returns +// the length of the block. The argument tells whether +// an EOF is acceptable now. If it is and one is found, +// the return value is negative. +func (deb *debugger) loadBlock(eofOK bool) int { + n64, w, err := decodeUintReader(deb.r, deb.tmp) // deb.uint64 will error at EOF + if err != nil { + if eofOK && err == io.EOF { + return -1 + } + errorf("debug: unexpected error: %s", err) + } + deb.consumed(w) + n := int(n64) + if n < 0 { + errorf("huge value for message length: %d", n64) + } + return int(n) +} + +// Message: +// TypeSequence TypedValue +// TypeSequence +// (TypeDefinition DelimitedTypeDefinition*)? +// DelimitedTypeDefinition: +// uint(lengthOfTypeDefinition) TypeDefinition +// TypedValue: +// int(typeId) Value +func (deb *debugger) message(indent tab) bool { + for { + // Convert the uint64 to a signed integer typeId + uid := deb.int64() + id := typeId(uid) + deb.dump("type id=%d", id) + if id < 0 { + deb.typeDefinition(indent, -id) + n := deb.loadBlock(false) + deb.dump("Message of length %d", n) + continue + } else { + deb.value(indent, id) + break + } + } + return true +} + +// Helper methods to make it easy to scan a type descriptor. + +// common returns the CommonType at the input point. +func (deb *debugger) common() CommonType { + fieldNum := -1 + name := "" + id := typeId(0) + for { + delta := deb.delta(-1) + if delta == 0 { + break + } + fieldNum += delta + switch fieldNum { + case 0: + name = deb.string() + case 1: + // Id typeId + id = deb.typeId() + default: + errorf("corrupted CommonType") + } + } + return CommonType{name, id} +} + +// uint returns the unsigned int at the input point, as a uint (not uint64). +func (deb *debugger) uint() uint { + return uint(deb.uint64()) +} + +// int returns the signed int at the input point, as an int (not int64). +func (deb *debugger) int() int { + return int(deb.int64()) +} + +// typeId returns the type id at the input point. +func (deb *debugger) typeId() typeId { + return typeId(deb.int64()) +} + +// string returns the string at the input point. +func (deb *debugger) string() string { + x := int(deb.uint64()) + b := make([]byte, x) + nb, _ := deb.r.Read(b) + if nb != x { + errorf("corrupted type") + } + deb.consumed(nb) + return string(b) +} + +// delta returns the field delta at the input point. The expect argument, +// if non-negative, identifies what the value should be. +func (deb *debugger) delta(expect int) int { + delta := int(deb.uint64()) + if delta < 0 || (expect >= 0 && delta != expect) { + errorf("decode: corrupted type: delta %d expected %d", delta, expect) + } + return delta +} + +// TypeDefinition: +// [int(-typeId) (already read)] encodingOfWireType +func (deb *debugger) typeDefinition(indent tab, id typeId) { + deb.dump("type definition for id %d", id) + // Encoding is of a wireType. Decode the structure as usual + fieldNum := -1 + wire := new(wireType) + // A wireType defines a single field. + delta := deb.delta(-1) + fieldNum += delta + switch fieldNum { + case 0: // array type, one field of {{Common}, elem, length} + // Field number 0 is CommonType + deb.delta(1) + com := deb.common() + // Field number 1 is type Id of elem + deb.delta(1) + id := deb.typeId() + // Field number 3 is length + deb.delta(1) + length := deb.int() + wire.ArrayT = &arrayType{com, id, length} + + case 1: // slice type, one field of {{Common}, elem} + // Field number 0 is CommonType + deb.delta(1) + com := deb.common() + // Field number 1 is type Id of elem + deb.delta(1) + id := deb.typeId() + wire.SliceT = &sliceType{com, id} + + case 2: // struct type, one field of {{Common}, []fieldType} + // Field number 0 is CommonType + deb.delta(1) + com := deb.common() + // Field number 1 is slice of FieldType + deb.delta(1) + numField := int(deb.uint()) + field := make([]*fieldType, numField) + for i := 0; i < numField; i++ { + field[i] = new(fieldType) + deb.delta(1) // field 0 of fieldType: name + field[i].Name = deb.string() + deb.delta(1) // field 1 of fieldType: id + field[i].Id = deb.typeId() + deb.delta(0) // end of fieldType + } + wire.StructT = &structType{com, field} + + case 3: // map type, one field of {{Common}, key, elem} + // Field number 0 is CommonType + deb.delta(1) + com := deb.common() + // Field number 1 is type Id of key + deb.delta(1) + keyId := deb.typeId() + // Field number 2 is type Id of elem + deb.delta(1) + elemId := deb.typeId() + wire.MapT = &mapType{com, keyId, elemId} + case 4: // GobEncoder type, one field of {{Common}} + // Field number 0 is CommonType + deb.delta(1) + com := deb.common() + wire.GobEncoderT = &gobEncoderType{com} + default: + errorf("bad field in type %d", fieldNum) + } + deb.printWireType(indent, wire) + deb.delta(0) // end inner type (arrayType, etc.) + deb.delta(0) // end wireType + // Remember we've seen this type. + deb.wireType[id] = wire +} + +// Value: +// SingletonValue | StructValue +func (deb *debugger) value(indent tab, id typeId) { + wire, ok := deb.wireType[id] + if ok && wire.StructT != nil { + deb.structValue(indent, id) + } else { + deb.singletonValue(indent, id) + } +} + +// SingletonValue: +// uint(0) FieldValue +func (deb *debugger) singletonValue(indent tab, id typeId) { + deb.dump("Singleton value") + // is it a builtin type? + wire := deb.wireType[id] + _, ok := builtinIdToType[id] + if !ok && wire == nil { + errorf("type id %d not defined", id) + } + m := deb.uint64() + if m != 0 { + errorf("expected zero; got %d", m) + } + deb.fieldValue(indent, id) +} + +// InterfaceValue: +// NilInterfaceValue | NonNilInterfaceValue +func (deb *debugger) interfaceValue(indent tab) { + deb.dump("Start of interface value") + if nameLen := deb.uint64(); nameLen == 0 { + deb.nilInterfaceValue(indent) + } else { + deb.nonNilInterfaceValue(indent, int(nameLen)) + } +} + +// NilInterfaceValue: +// uint(0) [already read] +func (deb *debugger) nilInterfaceValue(indent tab) int { + fmt.Fprintf(os.Stderr, "%snil interface\n", indent) + return 0 +} + +// NonNilInterfaceValue: +// ConcreteTypeName TypeSequence InterfaceContents +// ConcreteTypeName: +// uint(lengthOfName) [already read=n] name +// InterfaceContents: +// int(concreteTypeId) DelimitedValue +// DelimitedValue: +// uint(length) Value +func (deb *debugger) nonNilInterfaceValue(indent tab, nameLen int) { + // ConcreteTypeName + b := make([]byte, nameLen) + deb.r.Read(b) // TODO: CHECK THESE READS!! + deb.consumed(nameLen) + name := string(b) + + for { + id := deb.typeId() + if id < 0 { + deb.typeDefinition(indent, -id) + n := deb.loadBlock(false) + deb.dump("Nested message of length %d", n) + } else { + // DelimitedValue + x := deb.uint64() // in case we want to ignore the value; we don't. + fmt.Fprintf(os.Stderr, "%sinterface value, type %q id=%d; valueLength %d\n", indent, name, id, x) + deb.value(indent, id) + break + } + } +} + +// printCommonType prints a common type; used by printWireType. +func (deb *debugger) printCommonType(indent tab, kind string, common *CommonType) { + indent.print() + fmt.Fprintf(os.Stderr, "%s %q id=%d\n", kind, common.Name, common.Id) +} + +// printWireType prints the contents of a wireType. +func (deb *debugger) printWireType(indent tab, wire *wireType) { + fmt.Fprintf(os.Stderr, "%stype definition {\n", indent) + indent++ + switch { + case wire.ArrayT != nil: + deb.printCommonType(indent, "array", &wire.ArrayT.CommonType) + fmt.Fprintf(os.Stderr, "%slen %d\n", indent+1, wire.ArrayT.Len) + fmt.Fprintf(os.Stderr, "%selemid %d\n", indent+1, wire.ArrayT.Elem) + case wire.MapT != nil: + deb.printCommonType(indent, "map", &wire.MapT.CommonType) + fmt.Fprintf(os.Stderr, "%skey id=%d\n", indent+1, wire.MapT.Key) + fmt.Fprintf(os.Stderr, "%selem id=%d\n", indent+1, wire.MapT.Elem) + case wire.SliceT != nil: + deb.printCommonType(indent, "slice", &wire.SliceT.CommonType) + fmt.Fprintf(os.Stderr, "%selem id=%d\n", indent+1, wire.SliceT.Elem) + case wire.StructT != nil: + deb.printCommonType(indent, "struct", &wire.StructT.CommonType) + for i, field := range wire.StructT.Field { + fmt.Fprintf(os.Stderr, "%sfield %d:\t%s\tid=%d\n", indent+1, i, field.Name, field.Id) + } + case wire.GobEncoderT != nil: + deb.printCommonType(indent, "GobEncoder", &wire.GobEncoderT.CommonType) + } + indent-- + fmt.Fprintf(os.Stderr, "%s}\n", indent) +} + +// fieldValue prints a value of any type, such as a struct field. +// FieldValue: +// builtinValue | ArrayValue | MapValue | SliceValue | StructValue | InterfaceValue +func (deb *debugger) fieldValue(indent tab, id typeId) { + _, ok := builtinIdToType[id] + if ok { + if id == tInterface { + deb.interfaceValue(indent) + } else { + deb.printBuiltin(indent, id) + } + return + } + wire, ok := deb.wireType[id] + if !ok { + errorf("type id %d not defined", id) + } + switch { + case wire.ArrayT != nil: + deb.arrayValue(indent, wire) + case wire.MapT != nil: + deb.mapValue(indent, wire) + case wire.SliceT != nil: + deb.sliceValue(indent, wire) + case wire.StructT != nil: + deb.structValue(indent, id) + case wire.GobEncoderT != nil: + deb.gobEncoderValue(indent, id) + default: + panic("bad wire type for field") + } +} + +// printBuiltin prints a value not of a fundamental type, that is, +// one whose type is known to gobs at bootstrap time. +func (deb *debugger) printBuiltin(indent tab, id typeId) { + switch id { + case tBool: + x := deb.int64() + if x == 0 { + fmt.Fprintf(os.Stderr, "%sfalse\n", indent) + } else { + fmt.Fprintf(os.Stderr, "%strue\n", indent) + } + case tInt: + x := deb.int64() + fmt.Fprintf(os.Stderr, "%s%d\n", indent, x) + case tUint: + x := deb.int64() + fmt.Fprintf(os.Stderr, "%s%d\n", indent, x) + case tFloat: + x := deb.uint64() + fmt.Fprintf(os.Stderr, "%s%g\n", indent, floatFromBits(x)) + case tComplex: + r := deb.uint64() + i := deb.uint64() + fmt.Fprintf(os.Stderr, "%s%g+%gi\n", indent, floatFromBits(r), floatFromBits(i)) + case tBytes: + x := int(deb.uint64()) + b := make([]byte, x) + deb.r.Read(b) + deb.consumed(x) + fmt.Fprintf(os.Stderr, "%s{% x}=%q\n", indent, b, b) + case tString: + x := int(deb.uint64()) + b := make([]byte, x) + deb.r.Read(b) + deb.consumed(x) + fmt.Fprintf(os.Stderr, "%s%q\n", indent, b) + default: + panic("unknown builtin") + } +} + +// ArrayValue: +// uint(n) FieldValue*n +func (deb *debugger) arrayValue(indent tab, wire *wireType) { + elemId := wire.ArrayT.Elem + u := deb.uint64() + length := int(u) + for i := 0; i < length; i++ { + deb.fieldValue(indent, elemId) + } + if length != wire.ArrayT.Len { + fmt.Fprintf(os.Stderr, "%s(wrong length for array: %d should be %d)\n", indent, length, wire.ArrayT.Len) + } +} + +// MapValue: +// uint(n) (FieldValue FieldValue)*n [n (key, value) pairs] +func (deb *debugger) mapValue(indent tab, wire *wireType) { + keyId := wire.MapT.Key + elemId := wire.MapT.Elem + u := deb.uint64() + length := int(u) + for i := 0; i < length; i++ { + deb.fieldValue(indent+1, keyId) + deb.fieldValue(indent+1, elemId) + } +} + +// SliceValue: +// uint(n) (n FieldValue) +func (deb *debugger) sliceValue(indent tab, wire *wireType) { + elemId := wire.SliceT.Elem + u := deb.uint64() + length := int(u) + deb.dump("Start of slice of length %d", length) + + for i := 0; i < length; i++ { + deb.fieldValue(indent, elemId) + } +} + +// StructValue: +// (uint(fieldDelta) FieldValue)* +func (deb *debugger) structValue(indent tab, id typeId) { + deb.dump("Start of struct value of %q id=%d\n<<\n", id.name(), id) + fmt.Fprintf(os.Stderr, "%s%s struct {\n", indent, id.name()) + wire, ok := deb.wireType[id] + if !ok { + errorf("type id %d not defined", id) + } + strct := wire.StructT + fieldNum := -1 + indent++ + for { + delta := deb.uint64() + if delta == 0 { // struct terminator is zero delta fieldnum + break + } + fieldNum += int(delta) + if fieldNum < 0 || fieldNum >= len(strct.Field) { + deb.dump("field number out of range: prevField=%d delta=%d", fieldNum-int(delta), delta) + break + } + fmt.Fprintf(os.Stderr, "%sfield %d:\t%s\n", indent, fieldNum, wire.StructT.Field[fieldNum].Name) + deb.fieldValue(indent+1, strct.Field[fieldNum].Id) + } + indent-- + fmt.Fprintf(os.Stderr, "%s} // end %s struct\n", indent, id.name()) + deb.dump(">> End of struct value of type %d %q", id, id.name()) +} + +// GobEncoderValue: +// uint(n) byte*n +func (deb *debugger) gobEncoderValue(indent tab, id typeId) { + len := deb.uint64() + deb.dump("GobEncoder value of %q id=%d, length %d\n", id.name(), id, len) + fmt.Fprintf(os.Stderr, "%s%s (implements GobEncoder)\n", indent, id.name()) + data := make([]byte, len) + _, err := deb.r.Read(data) + if err != nil { + errorf("gobEncoder data read: %s", err) + } + fmt.Fprintf(os.Stderr, "%s[% .2x]\n", indent+1, data) +} diff --git a/libgo/go/encoding/gob/decode.go b/libgo/go/encoding/gob/decode.go new file mode 100644 index 0000000..1515d12 --- /dev/null +++ b/libgo/go/encoding/gob/decode.go @@ -0,0 +1,1279 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gob + +// TODO(rsc): When garbage collector changes, revisit +// the allocations in this file that use unsafe.Pointer. + +import ( + "bytes" + "errors" + "io" + "math" + "reflect" + "unsafe" +) + +var ( + errBadUint = errors.New("gob: encoded unsigned integer out of range") + errBadType = errors.New("gob: unknown type id or corrupted data") + errRange = errors.New("gob: bad data: field numbers out of bounds") +) + +// decoderState is the execution state of an instance of the decoder. A new state +// is created for nested objects. +type decoderState struct { + dec *Decoder + // The buffer is stored with an extra indirection because it may be replaced + // if we load a type during decode (when reading an interface value). + b *bytes.Buffer + fieldnum int // the last field number read. + buf []byte + next *decoderState // for free list +} + +// We pass the bytes.Buffer separately for easier testing of the infrastructure +// without requiring a full Decoder. +func (dec *Decoder) newDecoderState(buf *bytes.Buffer) *decoderState { + d := dec.freeList + if d == nil { + d = new(decoderState) + d.dec = dec + d.buf = make([]byte, uint64Size) + } else { + dec.freeList = d.next + } + d.b = buf + return d +} + +func (dec *Decoder) freeDecoderState(d *decoderState) { + d.next = dec.freeList + dec.freeList = d +} + +func overflow(name string) error { + return errors.New(`value for "` + name + `" out of range`) +} + +// decodeUintReader reads an encoded unsigned integer from an io.Reader. +// Used only by the Decoder to read the message length. +func decodeUintReader(r io.Reader, buf []byte) (x uint64, width int, err error) { + width = 1 + _, err = r.Read(buf[0:width]) + if err != nil { + return + } + b := buf[0] + if b <= 0x7f { + return uint64(b), width, nil + } + n := -int(int8(b)) + if n > uint64Size { + err = errBadUint + return + } + width, err = io.ReadFull(r, buf[0:n]) + if err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + return + } + // Could check that the high byte is zero but it's not worth it. + for _, b := range buf[0:width] { + x = x<<8 | uint64(b) + } + width++ // +1 for length byte + return +} + +// decodeUint reads an encoded unsigned integer from state.r. +// Does not check for overflow. +func (state *decoderState) decodeUint() (x uint64) { + b, err := state.b.ReadByte() + if err != nil { + error_(err) + } + if b <= 0x7f { + return uint64(b) + } + n := -int(int8(b)) + if n > uint64Size { + error_(errBadUint) + } + width, err := state.b.Read(state.buf[0:n]) + if err != nil { + error_(err) + } + // Don't need to check error; it's safe to loop regardless. + // Could check that the high byte is zero but it's not worth it. + for _, b := range state.buf[0:width] { + x = x<<8 | uint64(b) + } + return x +} + +// decodeInt reads an encoded signed integer from state.r. +// Does not check for overflow. +func (state *decoderState) decodeInt() int64 { + x := state.decodeUint() + if x&1 != 0 { + return ^int64(x >> 1) + } + return int64(x >> 1) +} + +// decOp is the signature of a decoding operator for a given type. +type decOp func(i *decInstr, state *decoderState, p unsafe.Pointer) + +// The 'instructions' of the decoding machine +type decInstr struct { + op decOp + field int // field number of the wire type + indir int // how many pointer indirections to reach the value in the struct + offset uintptr // offset in the structure of the field to encode + ovfl error // error message for overflow/underflow (for arrays, of the elements) +} + +// Since the encoder writes no zeros, if we arrive at a decoder we have +// a value to extract and store. The field number has already been read +// (it's how we knew to call this decoder). +// Each decoder is responsible for handling any indirections associated +// with the data structure. If any pointer so reached is nil, allocation must +// be done. + +// Walk the pointer hierarchy, allocating if we find a nil. Stop one before the end. +func decIndirect(p unsafe.Pointer, indir int) unsafe.Pointer { + for ; indir > 1; indir-- { + if *(*unsafe.Pointer)(p) == nil { + // Allocation required + *(*unsafe.Pointer)(p) = unsafe.Pointer(new(unsafe.Pointer)) + } + p = *(*unsafe.Pointer)(p) + } + return p +} + +// ignoreUint discards a uint value with no destination. +func ignoreUint(i *decInstr, state *decoderState, p unsafe.Pointer) { + state.decodeUint() +} + +// ignoreTwoUints discards a uint value with no destination. It's used to skip +// complex values. +func ignoreTwoUints(i *decInstr, state *decoderState, p unsafe.Pointer) { + state.decodeUint() + state.decodeUint() +} + +// decBool decodes a uint and stores it as a boolean through p. +func decBool(i *decInstr, state *decoderState, p unsafe.Pointer) { + if i.indir > 0 { + if *(*unsafe.Pointer)(p) == nil { + *(*unsafe.Pointer)(p) = unsafe.Pointer(new(bool)) + } + p = *(*unsafe.Pointer)(p) + } + *(*bool)(p) = state.decodeUint() != 0 +} + +// decInt8 decodes an integer and stores it as an int8 through p. +func decInt8(i *decInstr, state *decoderState, p unsafe.Pointer) { + if i.indir > 0 { + if *(*unsafe.Pointer)(p) == nil { + *(*unsafe.Pointer)(p) = unsafe.Pointer(new(int8)) + } + p = *(*unsafe.Pointer)(p) + } + v := state.decodeInt() + if v < math.MinInt8 || math.MaxInt8 < v { + error_(i.ovfl) + } else { + *(*int8)(p) = int8(v) + } +} + +// decUint8 decodes an unsigned integer and stores it as a uint8 through p. +func decUint8(i *decInstr, state *decoderState, p unsafe.Pointer) { + if i.indir > 0 { + if *(*unsafe.Pointer)(p) == nil { + *(*unsafe.Pointer)(p) = unsafe.Pointer(new(uint8)) + } + p = *(*unsafe.Pointer)(p) + } + v := state.decodeUint() + if math.MaxUint8 < v { + error_(i.ovfl) + } else { + *(*uint8)(p) = uint8(v) + } +} + +// decInt16 decodes an integer and stores it as an int16 through p. +func decInt16(i *decInstr, state *decoderState, p unsafe.Pointer) { + if i.indir > 0 { + if *(*unsafe.Pointer)(p) == nil { + *(*unsafe.Pointer)(p) = unsafe.Pointer(new(int16)) + } + p = *(*unsafe.Pointer)(p) + } + v := state.decodeInt() + if v < math.MinInt16 || math.MaxInt16 < v { + error_(i.ovfl) + } else { + *(*int16)(p) = int16(v) + } +} + +// decUint16 decodes an unsigned integer and stores it as a uint16 through p. +func decUint16(i *decInstr, state *decoderState, p unsafe.Pointer) { + if i.indir > 0 { + if *(*unsafe.Pointer)(p) == nil { + *(*unsafe.Pointer)(p) = unsafe.Pointer(new(uint16)) + } + p = *(*unsafe.Pointer)(p) + } + v := state.decodeUint() + if math.MaxUint16 < v { + error_(i.ovfl) + } else { + *(*uint16)(p) = uint16(v) + } +} + +// decInt32 decodes an integer and stores it as an int32 through p. +func decInt32(i *decInstr, state *decoderState, p unsafe.Pointer) { + if i.indir > 0 { + if *(*unsafe.Pointer)(p) == nil { + *(*unsafe.Pointer)(p) = unsafe.Pointer(new(int32)) + } + p = *(*unsafe.Pointer)(p) + } + v := state.decodeInt() + if v < math.MinInt32 || math.MaxInt32 < v { + error_(i.ovfl) + } else { + *(*int32)(p) = int32(v) + } +} + +// decUint32 decodes an unsigned integer and stores it as a uint32 through p. +func decUint32(i *decInstr, state *decoderState, p unsafe.Pointer) { + if i.indir > 0 { + if *(*unsafe.Pointer)(p) == nil { + *(*unsafe.Pointer)(p) = unsafe.Pointer(new(uint32)) + } + p = *(*unsafe.Pointer)(p) + } + v := state.decodeUint() + if math.MaxUint32 < v { + error_(i.ovfl) + } else { + *(*uint32)(p) = uint32(v) + } +} + +// decInt64 decodes an integer and stores it as an int64 through p. +func decInt64(i *decInstr, state *decoderState, p unsafe.Pointer) { + if i.indir > 0 { + if *(*unsafe.Pointer)(p) == nil { + *(*unsafe.Pointer)(p) = unsafe.Pointer(new(int64)) + } + p = *(*unsafe.Pointer)(p) + } + *(*int64)(p) = int64(state.decodeInt()) +} + +// decUint64 decodes an unsigned integer and stores it as a uint64 through p. +func decUint64(i *decInstr, state *decoderState, p unsafe.Pointer) { + if i.indir > 0 { + if *(*unsafe.Pointer)(p) == nil { + *(*unsafe.Pointer)(p) = unsafe.Pointer(new(uint64)) + } + p = *(*unsafe.Pointer)(p) + } + *(*uint64)(p) = uint64(state.decodeUint()) +} + +// Floating-point numbers are transmitted as uint64s holding the bits +// of the underlying representation. They are sent byte-reversed, with +// the exponent end coming out first, so integer floating point numbers +// (for example) transmit more compactly. This routine does the +// unswizzling. +func floatFromBits(u uint64) float64 { + var v uint64 + for i := 0; i < 8; i++ { + v <<= 8 + v |= u & 0xFF + u >>= 8 + } + return math.Float64frombits(v) +} + +// storeFloat32 decodes an unsigned integer, treats it as a 32-bit floating-point +// number, and stores it through p. It's a helper function for float32 and complex64. +func storeFloat32(i *decInstr, state *decoderState, p unsafe.Pointer) { + v := floatFromBits(state.decodeUint()) + av := v + if av < 0 { + av = -av + } + // +Inf is OK in both 32- and 64-bit floats. Underflow is always OK. + if math.MaxFloat32 < av && av <= math.MaxFloat64 { + error_(i.ovfl) + } else { + *(*float32)(p) = float32(v) + } +} + +// decFloat32 decodes an unsigned integer, treats it as a 32-bit floating-point +// number, and stores it through p. +func decFloat32(i *decInstr, state *decoderState, p unsafe.Pointer) { + if i.indir > 0 { + if *(*unsafe.Pointer)(p) == nil { + *(*unsafe.Pointer)(p) = unsafe.Pointer(new(float32)) + } + p = *(*unsafe.Pointer)(p) + } + storeFloat32(i, state, p) +} + +// decFloat64 decodes an unsigned integer, treats it as a 64-bit floating-point +// number, and stores it through p. +func decFloat64(i *decInstr, state *decoderState, p unsafe.Pointer) { + if i.indir > 0 { + if *(*unsafe.Pointer)(p) == nil { + *(*unsafe.Pointer)(p) = unsafe.Pointer(new(float64)) + } + p = *(*unsafe.Pointer)(p) + } + *(*float64)(p) = floatFromBits(uint64(state.decodeUint())) +} + +// decComplex64 decodes a pair of unsigned integers, treats them as a +// pair of floating point numbers, and stores them as a complex64 through p. +// The real part comes first. +func decComplex64(i *decInstr, state *decoderState, p unsafe.Pointer) { + if i.indir > 0 { + if *(*unsafe.Pointer)(p) == nil { + *(*unsafe.Pointer)(p) = unsafe.Pointer(new(complex64)) + } + p = *(*unsafe.Pointer)(p) + } + storeFloat32(i, state, p) + storeFloat32(i, state, unsafe.Pointer(uintptr(p)+unsafe.Sizeof(float32(0)))) +} + +// decComplex128 decodes a pair of unsigned integers, treats them as a +// pair of floating point numbers, and stores them as a complex128 through p. +// The real part comes first. +func decComplex128(i *decInstr, state *decoderState, p unsafe.Pointer) { + if i.indir > 0 { + if *(*unsafe.Pointer)(p) == nil { + *(*unsafe.Pointer)(p) = unsafe.Pointer(new(complex128)) + } + p = *(*unsafe.Pointer)(p) + } + real := floatFromBits(uint64(state.decodeUint())) + imag := floatFromBits(uint64(state.decodeUint())) + *(*complex128)(p) = complex(real, imag) +} + +// decUint8Slice decodes a byte slice and stores through p a slice header +// describing the data. +// uint8 slices are encoded as an unsigned count followed by the raw bytes. +func decUint8Slice(i *decInstr, state *decoderState, p unsafe.Pointer) { + if i.indir > 0 { + if *(*unsafe.Pointer)(p) == nil { + *(*unsafe.Pointer)(p) = unsafe.Pointer(new([]uint8)) + } + p = *(*unsafe.Pointer)(p) + } + n := int(state.decodeUint()) + if n < 0 { + errorf("negative length decoding []byte") + } + slice := (*[]uint8)(p) + if cap(*slice) < n { + *slice = make([]uint8, n) + } else { + *slice = (*slice)[0:n] + } + if _, err := state.b.Read(*slice); err != nil { + errorf("error decoding []byte: %s", err) + } +} + +// decString decodes byte array and stores through p a string header +// describing the data. +// Strings are encoded as an unsigned count followed by the raw bytes. +func decString(i *decInstr, state *decoderState, p unsafe.Pointer) { + if i.indir > 0 { + if *(*unsafe.Pointer)(p) == nil { + *(*unsafe.Pointer)(p) = unsafe.Pointer(new(string)) + } + p = *(*unsafe.Pointer)(p) + } + b := make([]byte, state.decodeUint()) + state.b.Read(b) + // It would be a shame to do the obvious thing here, + // *(*string)(p) = string(b) + // because we've already allocated the storage and this would + // allocate again and copy. So we do this ugly hack, which is even + // even more unsafe than it looks as it depends the memory + // representation of a string matching the beginning of the memory + // representation of a byte slice (a byte slice is longer). + *(*string)(p) = *(*string)(unsafe.Pointer(&b)) +} + +// ignoreUint8Array skips over the data for a byte slice value with no destination. +func ignoreUint8Array(i *decInstr, state *decoderState, p unsafe.Pointer) { + b := make([]byte, state.decodeUint()) + state.b.Read(b) +} + +// Execution engine + +// The encoder engine is an array of instructions indexed by field number of the incoming +// decoder. It is executed with random access according to field number. +type decEngine struct { + instr []decInstr + numInstr int // the number of active instructions +} + +// allocate makes sure storage is available for an object of underlying type rtyp +// that is indir levels of indirection through p. +func allocate(rtyp reflect.Type, p uintptr, indir int) uintptr { + if indir == 0 { + return p + } + up := unsafe.Pointer(p) + if indir > 1 { + up = decIndirect(up, indir) + } + if *(*unsafe.Pointer)(up) == nil { + // Allocate object. + *(*unsafe.Pointer)(up) = unsafe.New(rtyp) + } + return *(*uintptr)(up) +} + +// decodeSingle decodes a top-level value that is not a struct and stores it through p. +// Such values are preceded by a zero, making them have the memory layout of a +// struct field (although with an illegal field number). +func (dec *Decoder) decodeSingle(engine *decEngine, ut *userTypeInfo, basep uintptr) (err error) { + state := dec.newDecoderState(&dec.buf) + state.fieldnum = singletonField + delta := int(state.decodeUint()) + if delta != 0 { + errorf("decode: corrupted data: non-zero delta for singleton") + } + instr := &engine.instr[singletonField] + if instr.indir != ut.indir { + return errors.New("gob: internal error: inconsistent indirection") + } + ptr := unsafe.Pointer(basep) // offset will be zero + if instr.indir > 1 { + ptr = decIndirect(ptr, instr.indir) + } + instr.op(instr, state, ptr) + dec.freeDecoderState(state) + return nil +} + +// decodeSingle decodes a top-level struct and stores it through p. +// Indir is for the value, not the type. At the time of the call it may +// differ from ut.indir, which was computed when the engine was built. +// This state cannot arise for decodeSingle, which is called directly +// from the user's value, not from the innards of an engine. +func (dec *Decoder) decodeStruct(engine *decEngine, ut *userTypeInfo, p uintptr, indir int) { + p = allocate(ut.base, p, indir) + state := dec.newDecoderState(&dec.buf) + state.fieldnum = -1 + basep := p + for state.b.Len() > 0 { + delta := int(state.decodeUint()) + if delta < 0 { + errorf("decode: corrupted data: negative delta") + } + if delta == 0 { // struct terminator is zero delta fieldnum + break + } + fieldnum := state.fieldnum + delta + if fieldnum >= len(engine.instr) { + error_(errRange) + break + } + instr := &engine.instr[fieldnum] + p := unsafe.Pointer(basep + instr.offset) + if instr.indir > 1 { + p = decIndirect(p, instr.indir) + } + instr.op(instr, state, p) + state.fieldnum = fieldnum + } + dec.freeDecoderState(state) +} + +// ignoreStruct discards the data for a struct with no destination. +func (dec *Decoder) ignoreStruct(engine *decEngine) { + state := dec.newDecoderState(&dec.buf) + state.fieldnum = -1 + for state.b.Len() > 0 { + delta := int(state.decodeUint()) + if delta < 0 { + errorf("ignore decode: corrupted data: negative delta") + } + if delta == 0 { // struct terminator is zero delta fieldnum + break + } + fieldnum := state.fieldnum + delta + if fieldnum >= len(engine.instr) { + error_(errRange) + } + instr := &engine.instr[fieldnum] + instr.op(instr, state, unsafe.Pointer(nil)) + state.fieldnum = fieldnum + } + dec.freeDecoderState(state) +} + +// ignoreSingle discards the data for a top-level non-struct value with no +// destination. It's used when calling Decode with a nil value. +func (dec *Decoder) ignoreSingle(engine *decEngine) { + state := dec.newDecoderState(&dec.buf) + state.fieldnum = singletonField + delta := int(state.decodeUint()) + if delta != 0 { + errorf("decode: corrupted data: non-zero delta for singleton") + } + instr := &engine.instr[singletonField] + instr.op(instr, state, unsafe.Pointer(nil)) + dec.freeDecoderState(state) +} + +// decodeArrayHelper does the work for decoding arrays and slices. +func (dec *Decoder) decodeArrayHelper(state *decoderState, p uintptr, elemOp decOp, elemWid uintptr, length, elemIndir int, ovfl error) { + instr := &decInstr{elemOp, 0, elemIndir, 0, ovfl} + for i := 0; i < length; i++ { + up := unsafe.Pointer(p) + if elemIndir > 1 { + up = decIndirect(up, elemIndir) + } + elemOp(instr, state, up) + p += uintptr(elemWid) + } +} + +// decodeArray decodes an array and stores it through p, that is, p points to the zeroth element. +// The length is an unsigned integer preceding the elements. Even though the length is redundant +// (it's part of the type), it's a useful check and is included in the encoding. +func (dec *Decoder) decodeArray(atyp reflect.Type, state *decoderState, p uintptr, elemOp decOp, elemWid uintptr, length, indir, elemIndir int, ovfl error) { + if indir > 0 { + p = allocate(atyp, p, 1) // All but the last level has been allocated by dec.Indirect + } + if n := state.decodeUint(); n != uint64(length) { + errorf("length mismatch in decodeArray") + } + dec.decodeArrayHelper(state, p, elemOp, elemWid, length, elemIndir, ovfl) +} + +// decodeIntoValue is a helper for map decoding. Since maps are decoded using reflection, +// unlike the other items we can't use a pointer directly. +func decodeIntoValue(state *decoderState, op decOp, indir int, v reflect.Value, ovfl error) reflect.Value { + instr := &decInstr{op, 0, indir, 0, ovfl} + up := unsafe.Pointer(unsafeAddr(v)) + if indir > 1 { + up = decIndirect(up, indir) + } + op(instr, state, up) + return v +} + +// decodeMap decodes a map and stores its header through p. +// Maps are encoded as a length followed by key:value pairs. +// Because the internals of maps are not visible to us, we must +// use reflection rather than pointer magic. +func (dec *Decoder) decodeMap(mtyp reflect.Type, state *decoderState, p uintptr, keyOp, elemOp decOp, indir, keyIndir, elemIndir int, ovfl error) { + if indir > 0 { + p = allocate(mtyp, p, 1) // All but the last level has been allocated by dec.Indirect + } + up := unsafe.Pointer(p) + if *(*unsafe.Pointer)(up) == nil { // maps are represented as a pointer in the runtime + // Allocate map. + *(*unsafe.Pointer)(up) = unsafe.Pointer(reflect.MakeMap(mtyp).Pointer()) + } + // Maps cannot be accessed by moving addresses around the way + // that slices etc. can. We must recover a full reflection value for + // the iteration. + v := reflect.ValueOf(unsafe.Unreflect(mtyp, unsafe.Pointer(p))) + n := int(state.decodeUint()) + for i := 0; i < n; i++ { + key := decodeIntoValue(state, keyOp, keyIndir, allocValue(mtyp.Key()), ovfl) + elem := decodeIntoValue(state, elemOp, elemIndir, allocValue(mtyp.Elem()), ovfl) + v.SetMapIndex(key, elem) + } +} + +// ignoreArrayHelper does the work for discarding arrays and slices. +func (dec *Decoder) ignoreArrayHelper(state *decoderState, elemOp decOp, length int) { + instr := &decInstr{elemOp, 0, 0, 0, errors.New("no error")} + for i := 0; i < length; i++ { + elemOp(instr, state, nil) + } +} + +// ignoreArray discards the data for an array value with no destination. +func (dec *Decoder) ignoreArray(state *decoderState, elemOp decOp, length int) { + if n := state.decodeUint(); n != uint64(length) { + errorf("length mismatch in ignoreArray") + } + dec.ignoreArrayHelper(state, elemOp, length) +} + +// ignoreMap discards the data for a map value with no destination. +func (dec *Decoder) ignoreMap(state *decoderState, keyOp, elemOp decOp) { + n := int(state.decodeUint()) + keyInstr := &decInstr{keyOp, 0, 0, 0, errors.New("no error")} + elemInstr := &decInstr{elemOp, 0, 0, 0, errors.New("no error")} + for i := 0; i < n; i++ { + keyOp(keyInstr, state, nil) + elemOp(elemInstr, state, nil) + } +} + +// decodeSlice decodes a slice and stores the slice header through p. +// Slices are encoded as an unsigned length followed by the elements. +func (dec *Decoder) decodeSlice(atyp reflect.Type, state *decoderState, p uintptr, elemOp decOp, elemWid uintptr, indir, elemIndir int, ovfl error) { + n := int(uintptr(state.decodeUint())) + if indir > 0 { + up := unsafe.Pointer(p) + if *(*unsafe.Pointer)(up) == nil { + // Allocate the slice header. + *(*unsafe.Pointer)(up) = unsafe.Pointer(new([]unsafe.Pointer)) + } + p = *(*uintptr)(up) + } + // Allocate storage for the slice elements, that is, the underlying array, + // if the existing slice does not have the capacity. + // Always write a header at p. + hdrp := (*reflect.SliceHeader)(unsafe.Pointer(p)) + if hdrp.Cap < n { + hdrp.Data = uintptr(unsafe.NewArray(atyp.Elem(), n)) + hdrp.Cap = n + } + hdrp.Len = n + dec.decodeArrayHelper(state, hdrp.Data, elemOp, elemWid, n, elemIndir, ovfl) +} + +// ignoreSlice skips over the data for a slice value with no destination. +func (dec *Decoder) ignoreSlice(state *decoderState, elemOp decOp) { + dec.ignoreArrayHelper(state, elemOp, int(state.decodeUint())) +} + +// setInterfaceValue sets an interface value to a concrete value, +// but first it checks that the assignment will succeed. +func setInterfaceValue(ivalue reflect.Value, value reflect.Value) { + if !value.Type().AssignableTo(ivalue.Type()) { + errorf("cannot assign value of type %s to %s", value.Type(), ivalue.Type()) + } + ivalue.Set(value) +} + +// decodeInterface decodes an interface value and stores it through p. +// Interfaces are encoded as the name of a concrete type followed by a value. +// If the name is empty, the value is nil and no value is sent. +func (dec *Decoder) decodeInterface(ityp reflect.Type, state *decoderState, p uintptr, indir int) { + // Create a writable interface reflect.Value. We need one even for the nil case. + ivalue := allocValue(ityp) + // Read the name of the concrete type. + b := make([]byte, state.decodeUint()) + state.b.Read(b) + name := string(b) + if name == "" { + // Copy the representation of the nil interface value to the target. + // This is horribly unsafe and special. + *(*[2]uintptr)(unsafe.Pointer(p)) = ivalue.InterfaceData() + return + } + // The concrete type must be registered. + typ, ok := nameToConcreteType[name] + if !ok { + errorf("name not registered for interface: %q", name) + } + // Read the type id of the concrete value. + concreteId := dec.decodeTypeSequence(true) + if concreteId < 0 { + error_(dec.err) + } + // Byte count of value is next; we don't care what it is (it's there + // in case we want to ignore the value by skipping it completely). + state.decodeUint() + // Read the concrete value. + value := allocValue(typ) + dec.decodeValue(concreteId, value) + if dec.err != nil { + error_(dec.err) + } + // Allocate the destination interface value. + if indir > 0 { + p = allocate(ityp, p, 1) // All but the last level has been allocated by dec.Indirect + } + // Assign the concrete value to the interface. + // Tread carefully; it might not satisfy the interface. + setInterfaceValue(ivalue, value) + // Copy the representation of the interface value to the target. + // This is horribly unsafe and special. + *(*[2]uintptr)(unsafe.Pointer(p)) = ivalue.InterfaceData() +} + +// ignoreInterface discards the data for an interface value with no destination. +func (dec *Decoder) ignoreInterface(state *decoderState) { + // Read the name of the concrete type. + b := make([]byte, state.decodeUint()) + _, err := state.b.Read(b) + if err != nil { + error_(err) + } + id := dec.decodeTypeSequence(true) + if id < 0 { + error_(dec.err) + } + // At this point, the decoder buffer contains a delimited value. Just toss it. + state.b.Next(int(state.decodeUint())) +} + +// decodeGobDecoder decodes something implementing the GobDecoder interface. +// The data is encoded as a byte slice. +func (dec *Decoder) decodeGobDecoder(state *decoderState, v reflect.Value) { + // Read the bytes for the value. + b := make([]byte, state.decodeUint()) + _, err := state.b.Read(b) + if err != nil { + error_(err) + } + // We know it's a GobDecoder, so just call the method directly. + err = v.Interface().(GobDecoder).GobDecode(b) + if err != nil { + error_(err) + } +} + +// ignoreGobDecoder discards the data for a GobDecoder value with no destination. +func (dec *Decoder) ignoreGobDecoder(state *decoderState) { + // Read the bytes for the value. + b := make([]byte, state.decodeUint()) + _, err := state.b.Read(b) + if err != nil { + error_(err) + } +} + +// Index by Go types. +var decOpTable = [...]decOp{ + reflect.Bool: decBool, + reflect.Int8: decInt8, + reflect.Int16: decInt16, + reflect.Int32: decInt32, + reflect.Int64: decInt64, + reflect.Uint8: decUint8, + reflect.Uint16: decUint16, + reflect.Uint32: decUint32, + reflect.Uint64: decUint64, + reflect.Float32: decFloat32, + reflect.Float64: decFloat64, + reflect.Complex64: decComplex64, + reflect.Complex128: decComplex128, + reflect.String: decString, +} + +// Indexed by gob types. tComplex will be added during type.init(). +var decIgnoreOpMap = map[typeId]decOp{ + tBool: ignoreUint, + tInt: ignoreUint, + tUint: ignoreUint, + tFloat: ignoreUint, + tBytes: ignoreUint8Array, + tString: ignoreUint8Array, + tComplex: ignoreTwoUints, +} + +// decOpFor returns the decoding op for the base type under rt and +// the indirection count to reach it. +func (dec *Decoder) decOpFor(wireId typeId, rt reflect.Type, name string, inProgress map[reflect.Type]*decOp) (*decOp, int) { + ut := userType(rt) + // If the type implements GobEncoder, we handle it without further processing. + if ut.isGobDecoder { + return dec.gobDecodeOpFor(ut) + } + // If this type is already in progress, it's a recursive type (e.g. map[string]*T). + // Return the pointer to the op we're already building. + if opPtr := inProgress[rt]; opPtr != nil { + return opPtr, ut.indir + } + typ := ut.base + indir := ut.indir + var op decOp + k := typ.Kind() + if int(k) < len(decOpTable) { + op = decOpTable[k] + } + if op == nil { + inProgress[rt] = &op + // Special cases + switch t := typ; t.Kind() { + case reflect.Array: + name = "element of " + name + elemId := dec.wireType[wireId].ArrayT.Elem + elemOp, elemIndir := dec.decOpFor(elemId, t.Elem(), name, inProgress) + ovfl := overflow(name) + op = func(i *decInstr, state *decoderState, p unsafe.Pointer) { + state.dec.decodeArray(t, state, uintptr(p), *elemOp, t.Elem().Size(), t.Len(), i.indir, elemIndir, ovfl) + } + + case reflect.Map: + name = "element of " + name + keyId := dec.wireType[wireId].MapT.Key + elemId := dec.wireType[wireId].MapT.Elem + keyOp, keyIndir := dec.decOpFor(keyId, t.Key(), name, inProgress) + elemOp, elemIndir := dec.decOpFor(elemId, t.Elem(), name, inProgress) + ovfl := overflow(name) + op = func(i *decInstr, state *decoderState, p unsafe.Pointer) { + up := unsafe.Pointer(p) + state.dec.decodeMap(t, state, uintptr(up), *keyOp, *elemOp, i.indir, keyIndir, elemIndir, ovfl) + } + + case reflect.Slice: + name = "element of " + name + if t.Elem().Kind() == reflect.Uint8 { + op = decUint8Slice + break + } + var elemId typeId + if tt, ok := builtinIdToType[wireId]; ok { + elemId = tt.(*sliceType).Elem + } else { + elemId = dec.wireType[wireId].SliceT.Elem + } + elemOp, elemIndir := dec.decOpFor(elemId, t.Elem(), name, inProgress) + ovfl := overflow(name) + op = func(i *decInstr, state *decoderState, p unsafe.Pointer) { + state.dec.decodeSlice(t, state, uintptr(p), *elemOp, t.Elem().Size(), i.indir, elemIndir, ovfl) + } + + case reflect.Struct: + // Generate a closure that calls out to the engine for the nested type. + enginePtr, err := dec.getDecEnginePtr(wireId, userType(typ)) + if err != nil { + error_(err) + } + op = func(i *decInstr, state *decoderState, p unsafe.Pointer) { + // indirect through enginePtr to delay evaluation for recursive structs. + dec.decodeStruct(*enginePtr, userType(typ), uintptr(p), i.indir) + } + case reflect.Interface: + op = func(i *decInstr, state *decoderState, p unsafe.Pointer) { + state.dec.decodeInterface(t, state, uintptr(p), i.indir) + } + } + } + if op == nil { + errorf("decode can't handle type %s", rt) + } + return &op, indir +} + +// decIgnoreOpFor returns the decoding op for a field that has no destination. +func (dec *Decoder) decIgnoreOpFor(wireId typeId) decOp { + op, ok := decIgnoreOpMap[wireId] + if !ok { + if wireId == tInterface { + // Special case because it's a method: the ignored item might + // define types and we need to record their state in the decoder. + op = func(i *decInstr, state *decoderState, p unsafe.Pointer) { + state.dec.ignoreInterface(state) + } + return op + } + // Special cases + wire := dec.wireType[wireId] + switch { + case wire == nil: + errorf("bad data: undefined type %s", wireId.string()) + case wire.ArrayT != nil: + elemId := wire.ArrayT.Elem + elemOp := dec.decIgnoreOpFor(elemId) + op = func(i *decInstr, state *decoderState, p unsafe.Pointer) { + state.dec.ignoreArray(state, elemOp, wire.ArrayT.Len) + } + + case wire.MapT != nil: + keyId := dec.wireType[wireId].MapT.Key + elemId := dec.wireType[wireId].MapT.Elem + keyOp := dec.decIgnoreOpFor(keyId) + elemOp := dec.decIgnoreOpFor(elemId) + op = func(i *decInstr, state *decoderState, p unsafe.Pointer) { + state.dec.ignoreMap(state, keyOp, elemOp) + } + + case wire.SliceT != nil: + elemId := wire.SliceT.Elem + elemOp := dec.decIgnoreOpFor(elemId) + op = func(i *decInstr, state *decoderState, p unsafe.Pointer) { + state.dec.ignoreSlice(state, elemOp) + } + + case wire.StructT != nil: + // Generate a closure that calls out to the engine for the nested type. + enginePtr, err := dec.getIgnoreEnginePtr(wireId) + if err != nil { + error_(err) + } + op = func(i *decInstr, state *decoderState, p unsafe.Pointer) { + // indirect through enginePtr to delay evaluation for recursive structs + state.dec.ignoreStruct(*enginePtr) + } + + case wire.GobEncoderT != nil: + op = func(i *decInstr, state *decoderState, p unsafe.Pointer) { + state.dec.ignoreGobDecoder(state) + } + } + } + if op == nil { + errorf("bad data: ignore can't handle type %s", wireId.string()) + } + return op +} + +// gobDecodeOpFor returns the op for a type that is known to implement +// GobDecoder. +func (dec *Decoder) gobDecodeOpFor(ut *userTypeInfo) (*decOp, int) { + rcvrType := ut.user + if ut.decIndir == -1 { + rcvrType = reflect.PtrTo(rcvrType) + } else if ut.decIndir > 0 { + for i := int8(0); i < ut.decIndir; i++ { + rcvrType = rcvrType.Elem() + } + } + var op decOp + op = func(i *decInstr, state *decoderState, p unsafe.Pointer) { + // Caller has gotten us to within one indirection of our value. + if i.indir > 0 { + if *(*unsafe.Pointer)(p) == nil { + *(*unsafe.Pointer)(p) = unsafe.New(ut.base) + } + } + // Now p is a pointer to the base type. Do we need to climb out to + // get to the receiver type? + var v reflect.Value + if ut.decIndir == -1 { + v = reflect.ValueOf(unsafe.Unreflect(rcvrType, unsafe.Pointer(&p))) + } else { + v = reflect.ValueOf(unsafe.Unreflect(rcvrType, p)) + } + state.dec.decodeGobDecoder(state, v) + } + return &op, int(ut.indir) + +} + +// compatibleType asks: Are these two gob Types compatible? +// Answers the question for basic types, arrays, maps and slices, plus +// GobEncoder/Decoder pairs. +// Structs are considered ok; fields will be checked later. +func (dec *Decoder) compatibleType(fr reflect.Type, fw typeId, inProgress map[reflect.Type]typeId) bool { + if rhs, ok := inProgress[fr]; ok { + return rhs == fw + } + inProgress[fr] = fw + ut := userType(fr) + wire, ok := dec.wireType[fw] + // If fr is a GobDecoder, the wire type must be GobEncoder. + // And if fr is not a GobDecoder, the wire type must not be either. + if ut.isGobDecoder != (ok && wire.GobEncoderT != nil) { // the parentheses look odd but are correct. + return false + } + if ut.isGobDecoder { // This test trumps all others. + return true + } + switch t := ut.base; t.Kind() { + default: + // chan, etc: cannot handle. + return false + case reflect.Bool: + return fw == tBool + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return fw == tInt + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + return fw == tUint + case reflect.Float32, reflect.Float64: + return fw == tFloat + case reflect.Complex64, reflect.Complex128: + return fw == tComplex + case reflect.String: + return fw == tString + case reflect.Interface: + return fw == tInterface + case reflect.Array: + if !ok || wire.ArrayT == nil { + return false + } + array := wire.ArrayT + return t.Len() == array.Len && dec.compatibleType(t.Elem(), array.Elem, inProgress) + case reflect.Map: + if !ok || wire.MapT == nil { + return false + } + MapType := wire.MapT + return dec.compatibleType(t.Key(), MapType.Key, inProgress) && dec.compatibleType(t.Elem(), MapType.Elem, inProgress) + case reflect.Slice: + // Is it an array of bytes? + if t.Elem().Kind() == reflect.Uint8 { + return fw == tBytes + } + // Extract and compare element types. + var sw *sliceType + if tt, ok := builtinIdToType[fw]; ok { + sw = tt.(*sliceType) + } else { + sw = dec.wireType[fw].SliceT + } + elem := userType(t.Elem()).base + return sw != nil && dec.compatibleType(elem, sw.Elem, inProgress) + case reflect.Struct: + return true + } + return true +} + +// typeString returns a human-readable description of the type identified by remoteId. +func (dec *Decoder) typeString(remoteId typeId) string { + if t := idToType[remoteId]; t != nil { + // globally known type. + return t.string() + } + return dec.wireType[remoteId].string() +} + +// compileSingle compiles the decoder engine for a non-struct top-level value, including +// GobDecoders. +func (dec *Decoder) compileSingle(remoteId typeId, ut *userTypeInfo) (engine *decEngine, err error) { + rt := ut.user + engine = new(decEngine) + engine.instr = make([]decInstr, 1) // one item + name := rt.String() // best we can do + if !dec.compatibleType(rt, remoteId, make(map[reflect.Type]typeId)) { + return nil, errors.New("gob: wrong type received for local value " + name + ": " + dec.typeString(remoteId)) + } + op, indir := dec.decOpFor(remoteId, rt, name, make(map[reflect.Type]*decOp)) + ovfl := errors.New(`value for "` + name + `" out of range`) + engine.instr[singletonField] = decInstr{*op, singletonField, indir, 0, ovfl} + engine.numInstr = 1 + return +} + +// compileIgnoreSingle compiles the decoder engine for a non-struct top-level value that will be discarded. +func (dec *Decoder) compileIgnoreSingle(remoteId typeId) (engine *decEngine, err error) { + engine = new(decEngine) + engine.instr = make([]decInstr, 1) // one item + op := dec.decIgnoreOpFor(remoteId) + ovfl := overflow(dec.typeString(remoteId)) + engine.instr[0] = decInstr{op, 0, 0, 0, ovfl} + engine.numInstr = 1 + return +} + +// compileDec compiles the decoder engine for a value. If the value is not a struct, +// it calls out to compileSingle. +func (dec *Decoder) compileDec(remoteId typeId, ut *userTypeInfo) (engine *decEngine, err error) { + rt := ut.base + srt := rt + if srt.Kind() != reflect.Struct || + ut.isGobDecoder { + return dec.compileSingle(remoteId, ut) + } + var wireStruct *structType + // Builtin types can come from global pool; the rest must be defined by the decoder. + // Also we know we're decoding a struct now, so the client must have sent one. + if t, ok := builtinIdToType[remoteId]; ok { + wireStruct, _ = t.(*structType) + } else { + wire := dec.wireType[remoteId] + if wire == nil { + error_(errBadType) + } + wireStruct = wire.StructT + } + if wireStruct == nil { + errorf("type mismatch in decoder: want struct type %s; got non-struct", rt) + } + engine = new(decEngine) + engine.instr = make([]decInstr, len(wireStruct.Field)) + seen := make(map[reflect.Type]*decOp) + // Loop over the fields of the wire type. + for fieldnum := 0; fieldnum < len(wireStruct.Field); fieldnum++ { + wireField := wireStruct.Field[fieldnum] + if wireField.Name == "" { + errorf("empty name for remote field of type %s", wireStruct.Name) + } + ovfl := overflow(wireField.Name) + // Find the field of the local type with the same name. + localField, present := srt.FieldByName(wireField.Name) + // TODO(r): anonymous names + if !present || !isExported(wireField.Name) { + op := dec.decIgnoreOpFor(wireField.Id) + engine.instr[fieldnum] = decInstr{op, fieldnum, 0, 0, ovfl} + continue + } + if !dec.compatibleType(localField.Type, wireField.Id, make(map[reflect.Type]typeId)) { + errorf("wrong type (%s) for received field %s.%s", localField.Type, wireStruct.Name, wireField.Name) + } + op, indir := dec.decOpFor(wireField.Id, localField.Type, localField.Name, seen) + engine.instr[fieldnum] = decInstr{*op, fieldnum, indir, uintptr(localField.Offset), ovfl} + engine.numInstr++ + } + return +} + +// getDecEnginePtr returns the engine for the specified type. +func (dec *Decoder) getDecEnginePtr(remoteId typeId, ut *userTypeInfo) (enginePtr **decEngine, err error) { + rt := ut.base + decoderMap, ok := dec.decoderCache[rt] + if !ok { + decoderMap = make(map[typeId]**decEngine) + dec.decoderCache[rt] = decoderMap + } + if enginePtr, ok = decoderMap[remoteId]; !ok { + // To handle recursive types, mark this engine as underway before compiling. + enginePtr = new(*decEngine) + decoderMap[remoteId] = enginePtr + *enginePtr, err = dec.compileDec(remoteId, ut) + if err != nil { + delete(decoderMap, remoteId) + } + } + return +} + +// emptyStruct is the type we compile into when ignoring a struct value. +type emptyStruct struct{} + +var emptyStructType = reflect.TypeOf(emptyStruct{}) + +// getDecEnginePtr returns the engine for the specified type when the value is to be discarded. +func (dec *Decoder) getIgnoreEnginePtr(wireId typeId) (enginePtr **decEngine, err error) { + var ok bool + if enginePtr, ok = dec.ignorerCache[wireId]; !ok { + // To handle recursive types, mark this engine as underway before compiling. + enginePtr = new(*decEngine) + dec.ignorerCache[wireId] = enginePtr + wire := dec.wireType[wireId] + if wire != nil && wire.StructT != nil { + *enginePtr, err = dec.compileDec(wireId, userType(emptyStructType)) + } else { + *enginePtr, err = dec.compileIgnoreSingle(wireId) + } + if err != nil { + delete(dec.ignorerCache, wireId) + } + } + return +} + +// decodeValue decodes the data stream representing a value and stores it in val. +func (dec *Decoder) decodeValue(wireId typeId, val reflect.Value) { + defer catchError(&dec.err) + // If the value is nil, it means we should just ignore this item. + if !val.IsValid() { + dec.decodeIgnoredValue(wireId) + return + } + // Dereference down to the underlying type. + ut := userType(val.Type()) + base := ut.base + var enginePtr **decEngine + enginePtr, dec.err = dec.getDecEnginePtr(wireId, ut) + if dec.err != nil { + return + } + engine := *enginePtr + if st := base; st.Kind() == reflect.Struct && !ut.isGobDecoder { + if engine.numInstr == 0 && st.NumField() > 0 && len(dec.wireType[wireId].StructT.Field) > 0 { + name := base.Name() + errorf("type mismatch: no fields matched compiling decoder for %s", name) + } + dec.decodeStruct(engine, ut, uintptr(unsafeAddr(val)), ut.indir) + } else { + dec.decodeSingle(engine, ut, uintptr(unsafeAddr(val))) + } +} + +// decodeIgnoredValue decodes the data stream representing a value of the specified type and discards it. +func (dec *Decoder) decodeIgnoredValue(wireId typeId) { + var enginePtr **decEngine + enginePtr, dec.err = dec.getIgnoreEnginePtr(wireId) + if dec.err != nil { + return + } + wire := dec.wireType[wireId] + if wire != nil && wire.StructT != nil { + dec.ignoreStruct(*enginePtr) + } else { + dec.ignoreSingle(*enginePtr) + } +} + +func init() { + var iop, uop decOp + switch reflect.TypeOf(int(0)).Bits() { + case 32: + iop = decInt32 + uop = decUint32 + case 64: + iop = decInt64 + uop = decUint64 + default: + panic("gob: unknown size of int/uint") + } + decOpTable[reflect.Int] = iop + decOpTable[reflect.Uint] = uop + + // Finally uintptr + switch reflect.TypeOf(uintptr(0)).Bits() { + case 32: + uop = decUint32 + case 64: + uop = decUint64 + default: + panic("gob: unknown size of uintptr") + } + decOpTable[reflect.Uintptr] = uop +} + +// Gob assumes it can call UnsafeAddr on any Value +// in order to get a pointer it can copy data from. +// Values that have just been created and do not point +// into existing structs or slices cannot be addressed, +// so simulate it by returning a pointer to a copy. +// Each call allocates once. +func unsafeAddr(v reflect.Value) uintptr { + if v.CanAddr() { + return v.UnsafeAddr() + } + x := reflect.New(v.Type()).Elem() + x.Set(v) + return x.UnsafeAddr() +} + +// Gob depends on being able to take the address +// of zeroed Values it creates, so use this wrapper instead +// of the standard reflect.Zero. +// Each call allocates once. +func allocValue(t reflect.Type) reflect.Value { + return reflect.New(t).Elem() +} diff --git a/libgo/go/encoding/gob/decoder.go b/libgo/go/encoding/gob/decoder.go new file mode 100644 index 0000000..5e684d3 --- /dev/null +++ b/libgo/go/encoding/gob/decoder.go @@ -0,0 +1,214 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gob + +import ( + "bufio" + "bytes" + "errors" + "io" + "reflect" + "sync" +) + +// A Decoder manages the receipt of type and data information read from the +// remote side of a connection. +type Decoder struct { + mutex sync.Mutex // each item must be received atomically + r io.Reader // source of the data + buf bytes.Buffer // buffer for more efficient i/o from r + wireType map[typeId]*wireType // map from remote ID to local description + decoderCache map[reflect.Type]map[typeId]**decEngine // cache of compiled engines + ignorerCache map[typeId]**decEngine // ditto for ignored objects + freeList *decoderState // list of free decoderStates; avoids reallocation + countBuf []byte // used for decoding integers while parsing messages + tmp []byte // temporary storage for i/o; saves reallocating + err error +} + +// NewDecoder returns a new decoder that reads from the io.Reader. +// If r does not also implement io.ByteReader, it will be wrapped in a +// bufio.Reader. +func NewDecoder(r io.Reader) *Decoder { + dec := new(Decoder) + // We use the ability to read bytes as a plausible surrogate for buffering. + if _, ok := r.(io.ByteReader); !ok { + r = bufio.NewReader(r) + } + dec.r = r + dec.wireType = make(map[typeId]*wireType) + dec.decoderCache = make(map[reflect.Type]map[typeId]**decEngine) + dec.ignorerCache = make(map[typeId]**decEngine) + dec.countBuf = make([]byte, 9) // counts may be uint64s (unlikely!), require 9 bytes + + return dec +} + +// recvType loads the definition of a type. +func (dec *Decoder) recvType(id typeId) { + // Have we already seen this type? That's an error + if id < firstUserId || dec.wireType[id] != nil { + dec.err = errors.New("gob: duplicate type received") + return + } + + // Type: + wire := new(wireType) + dec.decodeValue(tWireType, reflect.ValueOf(wire)) + if dec.err != nil { + return + } + // Remember we've seen this type. + dec.wireType[id] = wire +} + +var errBadCount = errors.New("invalid message length") + +// recvMessage reads the next count-delimited item from the input. It is the converse +// of Encoder.writeMessage. It returns false on EOF or other error reading the message. +func (dec *Decoder) recvMessage() bool { + // Read a count. + nbytes, _, err := decodeUintReader(dec.r, dec.countBuf) + if err != nil { + dec.err = err + return false + } + if nbytes >= 1<<31 { + dec.err = errBadCount + return false + } + dec.readMessage(int(nbytes)) + return dec.err == nil +} + +// readMessage reads the next nbytes bytes from the input. +func (dec *Decoder) readMessage(nbytes int) { + // Allocate the buffer. + if cap(dec.tmp) < nbytes { + dec.tmp = make([]byte, nbytes+100) // room to grow + } + dec.tmp = dec.tmp[:nbytes] + + // Read the data + _, dec.err = io.ReadFull(dec.r, dec.tmp) + if dec.err != nil { + if dec.err == io.EOF { + dec.err = io.ErrUnexpectedEOF + } + return + } + dec.buf.Write(dec.tmp) +} + +// toInt turns an encoded uint64 into an int, according to the marshaling rules. +func toInt(x uint64) int64 { + i := int64(x >> 1) + if x&1 != 0 { + i = ^i + } + return i +} + +func (dec *Decoder) nextInt() int64 { + n, _, err := decodeUintReader(&dec.buf, dec.countBuf) + if err != nil { + dec.err = err + } + return toInt(n) +} + +func (dec *Decoder) nextUint() uint64 { + n, _, err := decodeUintReader(&dec.buf, dec.countBuf) + if err != nil { + dec.err = err + } + return n +} + +// decodeTypeSequence parses: +// TypeSequence +// (TypeDefinition DelimitedTypeDefinition*)? +// and returns the type id of the next value. It returns -1 at +// EOF. Upon return, the remainder of dec.buf is the value to be +// decoded. If this is an interface value, it can be ignored by +// simply resetting that buffer. +func (dec *Decoder) decodeTypeSequence(isInterface bool) typeId { + for dec.err == nil { + if dec.buf.Len() == 0 { + if !dec.recvMessage() { + break + } + } + // Receive a type id. + id := typeId(dec.nextInt()) + if id >= 0 { + // Value follows. + return id + } + // Type definition for (-id) follows. + dec.recvType(-id) + // When decoding an interface, after a type there may be a + // DelimitedValue still in the buffer. Skip its count. + // (Alternatively, the buffer is empty and the byte count + // will be absorbed by recvMessage.) + if dec.buf.Len() > 0 { + if !isInterface { + dec.err = errors.New("extra data in buffer") + break + } + dec.nextUint() + } + } + return -1 +} + +// Decode reads the next value from the connection and stores +// it in the data represented by the empty interface value. +// If e is nil, the value will be discarded. Otherwise, +// the value underlying e must be a pointer to the +// correct type for the next data item received. +func (dec *Decoder) Decode(e interface{}) error { + if e == nil { + return dec.DecodeValue(reflect.Value{}) + } + value := reflect.ValueOf(e) + // If e represents a value as opposed to a pointer, the answer won't + // get back to the caller. Make sure it's a pointer. + if value.Type().Kind() != reflect.Ptr { + dec.err = errors.New("gob: attempt to decode into a non-pointer") + return dec.err + } + return dec.DecodeValue(value) +} + +// DecodeValue reads the next value from the connection. +// If v is the zero reflect.Value (v.Kind() == Invalid), DecodeValue discards the value. +// Otherwise, it stores the value into v. In that case, v must represent +// a non-nil pointer to data or be an assignable reflect.Value (v.CanSet()) +func (dec *Decoder) DecodeValue(v reflect.Value) error { + if v.IsValid() { + if v.Kind() == reflect.Ptr && !v.IsNil() { + // That's okay, we'll store through the pointer. + } else if !v.CanSet() { + return errors.New("gob: DecodeValue of unassignable value") + } + } + // Make sure we're single-threaded through here. + dec.mutex.Lock() + defer dec.mutex.Unlock() + + dec.buf.Reset() // In case data lingers from previous invocation. + dec.err = nil + id := dec.decodeTypeSequence(false) + if dec.err == nil { + dec.decodeValue(id, v) + } + return dec.err +} + +// If debug.go is compiled into the program , debugFunc prints a human-readable +// representation of the gob data read from r by calling that file's Debug function. +// Otherwise it is nil. +var debugFunc func(io.Reader) diff --git a/libgo/go/encoding/gob/doc.go b/libgo/go/encoding/gob/doc.go new file mode 100644 index 0000000..05ebef1 --- /dev/null +++ b/libgo/go/encoding/gob/doc.go @@ -0,0 +1,366 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package gob manages streams of gobs - binary values exchanged between an +Encoder (transmitter) and a Decoder (receiver). A typical use is transporting +arguments and results of remote procedure calls (RPCs) such as those provided by +package "rpc". + +A stream of gobs is self-describing. Each data item in the stream is preceded by +a specification of its type, expressed in terms of a small set of predefined +types. Pointers are not transmitted, but the things they point to are +transmitted; that is, the values are flattened. Recursive types work fine, but +recursive values (data with cycles) are problematic. This may change. + +To use gobs, create an Encoder and present it with a series of data items as +values or addresses that can be dereferenced to values. The Encoder makes sure +all type information is sent before it is needed. At the receive side, a +Decoder retrieves values from the encoded stream and unpacks them into local +variables. + +The source and destination values/types need not correspond exactly. For structs, +fields (identified by name) that are in the source but absent from the receiving +variable will be ignored. Fields that are in the receiving variable but missing +from the transmitted type or value will be ignored in the destination. If a field +with the same name is present in both, their types must be compatible. Both the +receiver and transmitter will do all necessary indirection and dereferencing to +convert between gobs and actual Go values. For instance, a gob type that is +schematically, + + struct { A, B int } + +can be sent from or received into any of these Go types: + + struct { A, B int } // the same + *struct { A, B int } // extra indirection of the struct + struct { *A, **B int } // extra indirection of the fields + struct { A, B int64 } // different concrete value type; see below + +It may also be received into any of these: + + struct { A, B int } // the same + struct { B, A int } // ordering doesn't matter; matching is by name + struct { A, B, C int } // extra field (C) ignored + struct { B int } // missing field (A) ignored; data will be dropped + struct { B, C int } // missing field (A) ignored; extra field (C) ignored. + +Attempting to receive into these types will draw a decode error: + + struct { A int; B uint } // change of signedness for B + struct { A int; B float } // change of type for B + struct { } // no field names in common + struct { C, D int } // no field names in common + +Integers are transmitted two ways: arbitrary precision signed integers or +arbitrary precision unsigned integers. There is no int8, int16 etc. +discrimination in the gob format; there are only signed and unsigned integers. As +described below, the transmitter sends the value in a variable-length encoding; +the receiver accepts the value and stores it in the destination variable. +Floating-point numbers are always sent using IEEE-754 64-bit precision (see +below). + +Signed integers may be received into any signed integer variable: int, int16, etc.; +unsigned integers may be received into any unsigned integer variable; and floating +point values may be received into any floating point variable. However, +the destination variable must be able to represent the value or the decode +operation will fail. + +Structs, arrays and slices are also supported. Strings and arrays of bytes are +supported with a special, efficient representation (see below). When a slice is +decoded, if the existing slice has capacity the slice will be extended in place; +if not, a new array is allocated. Regardless, the length of the resuling slice +reports the number of elements decoded. + +Functions and channels cannot be sent in a gob. Attempting +to encode a value that contains one will fail. + +The rest of this comment documents the encoding, details that are not important +for most users. Details are presented bottom-up. + +An unsigned integer is sent one of two ways. If it is less than 128, it is sent +as a byte with that value. Otherwise it is sent as a minimal-length big-endian +(high byte first) byte stream holding the value, preceded by one byte holding the +byte count, negated. Thus 0 is transmitted as (00), 7 is transmitted as (07) and +256 is transmitted as (FE 01 00). + +A boolean is encoded within an unsigned integer: 0 for false, 1 for true. + +A signed integer, i, is encoded within an unsigned integer, u. Within u, bits 1 +upward contain the value; bit 0 says whether they should be complemented upon +receipt. The encode algorithm looks like this: + + uint u; + if i < 0 { + u = (^i << 1) | 1 // complement i, bit 0 is 1 + } else { + u = (i << 1) // do not complement i, bit 0 is 0 + } + encodeUnsigned(u) + +The low bit is therefore analogous to a sign bit, but making it the complement bit +instead guarantees that the largest negative integer is not a special case. For +example, -129=^128=(^256>>1) encodes as (FE 01 01). + +Floating-point numbers are always sent as a representation of a float64 value. +That value is converted to a uint64 using math.Float64bits. The uint64 is then +byte-reversed and sent as a regular unsigned integer. The byte-reversal means the +exponent and high-precision part of the mantissa go first. Since the low bits are +often zero, this can save encoding bytes. For instance, 17.0 is encoded in only +three bytes (FE 31 40). + +Strings and slices of bytes are sent as an unsigned count followed by that many +uninterpreted bytes of the value. + +All other slices and arrays are sent as an unsigned count followed by that many +elements using the standard gob encoding for their type, recursively. + +Maps are sent as an unsigned count followed by that man key, element +pairs. Empty but non-nil maps are sent, so if the sender has allocated +a map, the receiver will allocate a map even no elements are +transmitted. + +Structs are sent as a sequence of (field number, field value) pairs. The field +value is sent using the standard gob encoding for its type, recursively. If a +field has the zero value for its type, it is omitted from the transmission. The +field number is defined by the type of the encoded struct: the first field of the +encoded type is field 0, the second is field 1, etc. When encoding a value, the +field numbers are delta encoded for efficiency and the fields are always sent in +order of increasing field number; the deltas are therefore unsigned. The +initialization for the delta encoding sets the field number to -1, so an unsigned +integer field 0 with value 7 is transmitted as unsigned delta = 1, unsigned value += 7 or (01 07). Finally, after all the fields have been sent a terminating mark +denotes the end of the struct. That mark is a delta=0 value, which has +representation (00). + +Interface types are not checked for compatibility; all interface types are +treated, for transmission, as members of a single "interface" type, analogous to +int or []byte - in effect they're all treated as interface{}. Interface values +are transmitted as a string identifying the concrete type being sent (a name +that must be pre-defined by calling Register), followed by a byte count of the +length of the following data (so the value can be skipped if it cannot be +stored), followed by the usual encoding of concrete (dynamic) value stored in +the interface value. (A nil interface value is identified by the empty string +and transmits no value.) Upon receipt, the decoder verifies that the unpacked +concrete item satisfies the interface of the receiving variable. + +The representation of types is described below. When a type is defined on a given +connection between an Encoder and Decoder, it is assigned a signed integer type +id. When Encoder.Encode(v) is called, it makes sure there is an id assigned for +the type of v and all its elements and then it sends the pair (typeid, encoded-v) +where typeid is the type id of the encoded type of v and encoded-v is the gob +encoding of the value v. + +To define a type, the encoder chooses an unused, positive type id and sends the +pair (-type id, encoded-type) where encoded-type is the gob encoding of a wireType +description, constructed from these types: + + type wireType struct { + ArrayT *ArrayType + SliceT *SliceType + StructT *StructType + MapT *MapType + } + type ArrayType struct { + CommonType + Elem typeId + Len int + } + type CommonType struct { + Name string // the name of the struct type + Id int // the id of the type, repeated so it's inside the type + } + type SliceType struct { + CommonType + Elem typeId + } + type StructType struct { + CommonType + Field []*fieldType // the fields of the struct. + } + type FieldType struct { + Name string // the name of the field. + Id int // the type id of the field, which must be already defined + } + type MapType struct { + CommonType + Key typeId + Elem typeId + } + +If there are nested type ids, the types for all inner type ids must be defined +before the top-level type id is used to describe an encoded-v. + +For simplicity in setup, the connection is defined to understand these types a +priori, as well as the basic gob types int, uint, etc. Their ids are: + + bool 1 + int 2 + uint 3 + float 4 + []byte 5 + string 6 + complex 7 + interface 8 + // gap for reserved ids. + WireType 16 + ArrayType 17 + CommonType 18 + SliceType 19 + StructType 20 + FieldType 21 + // 22 is slice of fieldType. + MapType 23 + +Finally, each message created by a call to Encode is preceded by an encoded +unsigned integer count of the number of bytes remaining in the message. After +the initial type name, interface values are wrapped the same way; in effect, the +interface value acts like a recursive invocation of Encode. + +In summary, a gob stream looks like + + (byteCount (-type id, encoding of a wireType)* (type id, encoding of a value))* + +where * signifies zero or more repetitions and the type id of a value must +be predefined or be defined before the value in the stream. + +See "Gobs of data" for a design discussion of the gob wire format: +http://blog.golang.org/2011/03/gobs-of-data.html +*/ +package gob + +/* +Grammar: + +Tokens starting with a lower case letter are terminals; int(n) +and uint(n) represent the signed/unsigned encodings of the value n. + +GobStream: + DelimitedMessage* +DelimitedMessage: + uint(lengthOfMessage) Message +Message: + TypeSequence TypedValue +TypeSequence + (TypeDefinition DelimitedTypeDefinition*)? +DelimitedTypeDefinition: + uint(lengthOfTypeDefinition) TypeDefinition +TypedValue: + int(typeId) Value +TypeDefinition: + int(-typeId) encodingOfWireType +Value: + SingletonValue | StructValue +SingletonValue: + uint(0) FieldValue +FieldValue: + builtinValue | ArrayValue | MapValue | SliceValue | StructValue | InterfaceValue +InterfaceValue: + NilInterfaceValue | NonNilInterfaceValue +NilInterfaceValue: + uint(0) +NonNilInterfaceValue: + ConcreteTypeName TypeSequence InterfaceContents +ConcreteTypeName: + uint(lengthOfName) [already read=n] name +InterfaceContents: + int(concreteTypeId) DelimitedValue +DelimitedValue: + uint(length) Value +ArrayValue: + uint(n) FieldValue*n [n elements] +MapValue: + uint(n) (FieldValue FieldValue)*n [n (key, value) pairs] +SliceValue: + uint(n) FieldValue*n [n elements] +StructValue: + (uint(fieldDelta) FieldValue)* +*/ + +/* +For implementers and the curious, here is an encoded example. Given + type Point struct {X, Y int} +and the value + p := Point{22, 33} +the bytes transmitted that encode p will be: + 1f ff 81 03 01 01 05 50 6f 69 6e 74 01 ff 82 00 + 01 02 01 01 58 01 04 00 01 01 59 01 04 00 00 00 + 07 ff 82 01 2c 01 42 00 +They are determined as follows. + +Since this is the first transmission of type Point, the type descriptor +for Point itself must be sent before the value. This is the first type +we've sent on this Encoder, so it has type id 65 (0 through 64 are +reserved). + + 1f // This item (a type descriptor) is 31 bytes long. + ff 81 // The negative of the id for the type we're defining, -65. + // This is one byte (indicated by FF = -1) followed by + // ^-65<<1 | 1. The low 1 bit signals to complement the + // rest upon receipt. + + // Now we send a type descriptor, which is itself a struct (wireType). + // The type of wireType itself is known (it's built in, as is the type of + // all its components), so we just need to send a *value* of type wireType + // that represents type "Point". + // Here starts the encoding of that value. + // Set the field number implicitly to -1; this is done at the beginning + // of every struct, including nested structs. + 03 // Add 3 to field number; now 2 (wireType.structType; this is a struct). + // structType starts with an embedded commonType, which appears + // as a regular structure here too. + 01 // add 1 to field number (now 0); start of embedded commonType. + 01 // add 1 to field number (now 0, the name of the type) + 05 // string is (unsigned) 5 bytes long + 50 6f 69 6e 74 // wireType.structType.commonType.name = "Point" + 01 // add 1 to field number (now 1, the id of the type) + ff 82 // wireType.structType.commonType._id = 65 + 00 // end of embedded wiretype.structType.commonType struct + 01 // add 1 to field number (now 1, the field array in wireType.structType) + 02 // There are two fields in the type (len(structType.field)) + 01 // Start of first field structure; add 1 to get field number 0: field[0].name + 01 // 1 byte + 58 // structType.field[0].name = "X" + 01 // Add 1 to get field number 1: field[0].id + 04 // structType.field[0].typeId is 2 (signed int). + 00 // End of structType.field[0]; start structType.field[1]; set field number to -1. + 01 // Add 1 to get field number 0: field[1].name + 01 // 1 byte + 59 // structType.field[1].name = "Y" + 01 // Add 1 to get field number 1: field[0].id + 04 // struct.Type.field[1].typeId is 2 (signed int). + 00 // End of structType.field[1]; end of structType.field. + 00 // end of wireType.structType structure + 00 // end of wireType structure + +Now we can send the Point value. Again the field number resets to -1: + + 07 // this value is 7 bytes long + ff 82 // the type number, 65 (1 byte (-FF) followed by 65<<1) + 01 // add one to field number, yielding field 0 + 2c // encoding of signed "22" (0x22 = 44 = 22<<1); Point.x = 22 + 01 // add one to field number, yielding field 1 + 42 // encoding of signed "33" (0x42 = 66 = 33<<1); Point.y = 33 + 00 // end of structure + +The type encoding is long and fairly intricate but we send it only once. +If p is transmitted a second time, the type is already known so the +output will be just: + + 07 ff 82 01 2c 01 42 00 + +A single non-struct value at top level is transmitted like a field with +delta tag 0. For instance, a signed integer with value 3 presented as +the argument to Encode will emit: + + 03 04 00 06 + +Which represents: + + 03 // this value is 3 bytes long + 04 // the type number, 2, represents an integer + 00 // tag delta 0 + 06 // value 3 + +*/ diff --git a/libgo/go/encoding/gob/dump.go b/libgo/go/encoding/gob/dump.go new file mode 100644 index 0000000..0d0017c --- /dev/null +++ b/libgo/go/encoding/gob/dump.go @@ -0,0 +1,22 @@ +package main + +// Need to compile package gob with debug.go to build this program. + +import ( + "encoding/gob" + "fmt" + "os" +) + +func main() { + var err error + file := os.Stdin + if len(os.Args) > 1 { + file, err = os.Open(os.Args[1]) + if err != nil { + fmt.Fprintf(os.Stderr, "dump: %s\n", err) + os.Exit(1) + } + } + gob.Debug(file) +} diff --git a/libgo/go/encoding/gob/encode.go b/libgo/go/encoding/gob/encode.go new file mode 100644 index 0000000..c7e4823 --- /dev/null +++ b/libgo/go/encoding/gob/encode.go @@ -0,0 +1,717 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gob + +import ( + "bytes" + "math" + "reflect" + "unsafe" +) + +const uint64Size = int(unsafe.Sizeof(uint64(0))) + +// encoderState is the global execution state of an instance of the encoder. +// Field numbers are delta encoded and always increase. The field +// number is initialized to -1 so 0 comes out as delta(1). A delta of +// 0 terminates the structure. +type encoderState struct { + enc *Encoder + b *bytes.Buffer + sendZero bool // encoding an array element or map key/value pair; send zero values + fieldnum int // the last field number written. + buf [1 + uint64Size]byte // buffer used by the encoder; here to avoid allocation. + next *encoderState // for free list +} + +func (enc *Encoder) newEncoderState(b *bytes.Buffer) *encoderState { + e := enc.freeList + if e == nil { + e = new(encoderState) + e.enc = enc + } else { + enc.freeList = e.next + } + e.sendZero = false + e.fieldnum = 0 + e.b = b + return e +} + +func (enc *Encoder) freeEncoderState(e *encoderState) { + e.next = enc.freeList + enc.freeList = e +} + +// Unsigned integers have a two-state encoding. If the number is less +// than 128 (0 through 0x7F), its value is written directly. +// Otherwise the value is written in big-endian byte order preceded +// by the byte length, negated. + +// encodeUint writes an encoded unsigned integer to state.b. +func (state *encoderState) encodeUint(x uint64) { + if x <= 0x7F { + err := state.b.WriteByte(uint8(x)) + if err != nil { + error_(err) + } + return + } + i := uint64Size + for x > 0 { + state.buf[i] = uint8(x) + x >>= 8 + i-- + } + state.buf[i] = uint8(i - uint64Size) // = loop count, negated + _, err := state.b.Write(state.buf[i : uint64Size+1]) + if err != nil { + error_(err) + } +} + +// encodeInt writes an encoded signed integer to state.w. +// The low bit of the encoding says whether to bit complement the (other bits of the) +// uint to recover the int. +func (state *encoderState) encodeInt(i int64) { + var x uint64 + if i < 0 { + x = uint64(^i<<1) | 1 + } else { + x = uint64(i << 1) + } + state.encodeUint(uint64(x)) +} + +// encOp is the signature of an encoding operator for a given type. +type encOp func(i *encInstr, state *encoderState, p unsafe.Pointer) + +// The 'instructions' of the encoding machine +type encInstr struct { + op encOp + field int // field number + indir int // how many pointer indirections to reach the value in the struct + offset uintptr // offset in the structure of the field to encode +} + +// update emits a field number and updates the state to record its value for delta encoding. +// If the instruction pointer is nil, it does nothing +func (state *encoderState) update(instr *encInstr) { + if instr != nil { + state.encodeUint(uint64(instr.field - state.fieldnum)) + state.fieldnum = instr.field + } +} + +// Each encoder for a composite is responsible for handling any +// indirections associated with the elements of the data structure. +// If any pointer so reached is nil, no bytes are written. If the +// data item is zero, no bytes are written. Single values - ints, +// strings etc. - are indirected before calling their encoders. +// Otherwise, the output (for a scalar) is the field number, as an +// encoded integer, followed by the field data in its appropriate +// format. + +// encIndirect dereferences p indir times and returns the result. +func encIndirect(p unsafe.Pointer, indir int) unsafe.Pointer { + for ; indir > 0; indir-- { + p = *(*unsafe.Pointer)(p) + if p == nil { + return unsafe.Pointer(nil) + } + } + return p +} + +// encBool encodes the bool with address p as an unsigned 0 or 1. +func encBool(i *encInstr, state *encoderState, p unsafe.Pointer) { + b := *(*bool)(p) + if b || state.sendZero { + state.update(i) + if b { + state.encodeUint(1) + } else { + state.encodeUint(0) + } + } +} + +// encInt encodes the int with address p. +func encInt(i *encInstr, state *encoderState, p unsafe.Pointer) { + v := int64(*(*int)(p)) + if v != 0 || state.sendZero { + state.update(i) + state.encodeInt(v) + } +} + +// encUint encodes the uint with address p. +func encUint(i *encInstr, state *encoderState, p unsafe.Pointer) { + v := uint64(*(*uint)(p)) + if v != 0 || state.sendZero { + state.update(i) + state.encodeUint(v) + } +} + +// encInt8 encodes the int8 with address p. +func encInt8(i *encInstr, state *encoderState, p unsafe.Pointer) { + v := int64(*(*int8)(p)) + if v != 0 || state.sendZero { + state.update(i) + state.encodeInt(v) + } +} + +// encUint8 encodes the uint8 with address p. +func encUint8(i *encInstr, state *encoderState, p unsafe.Pointer) { + v := uint64(*(*uint8)(p)) + if v != 0 || state.sendZero { + state.update(i) + state.encodeUint(v) + } +} + +// encInt16 encodes the int16 with address p. +func encInt16(i *encInstr, state *encoderState, p unsafe.Pointer) { + v := int64(*(*int16)(p)) + if v != 0 || state.sendZero { + state.update(i) + state.encodeInt(v) + } +} + +// encUint16 encodes the uint16 with address p. +func encUint16(i *encInstr, state *encoderState, p unsafe.Pointer) { + v := uint64(*(*uint16)(p)) + if v != 0 || state.sendZero { + state.update(i) + state.encodeUint(v) + } +} + +// encInt32 encodes the int32 with address p. +func encInt32(i *encInstr, state *encoderState, p unsafe.Pointer) { + v := int64(*(*int32)(p)) + if v != 0 || state.sendZero { + state.update(i) + state.encodeInt(v) + } +} + +// encUint encodes the uint32 with address p. +func encUint32(i *encInstr, state *encoderState, p unsafe.Pointer) { + v := uint64(*(*uint32)(p)) + if v != 0 || state.sendZero { + state.update(i) + state.encodeUint(v) + } +} + +// encInt64 encodes the int64 with address p. +func encInt64(i *encInstr, state *encoderState, p unsafe.Pointer) { + v := *(*int64)(p) + if v != 0 || state.sendZero { + state.update(i) + state.encodeInt(v) + } +} + +// encInt64 encodes the uint64 with address p. +func encUint64(i *encInstr, state *encoderState, p unsafe.Pointer) { + v := *(*uint64)(p) + if v != 0 || state.sendZero { + state.update(i) + state.encodeUint(v) + } +} + +// encUintptr encodes the uintptr with address p. +func encUintptr(i *encInstr, state *encoderState, p unsafe.Pointer) { + v := uint64(*(*uintptr)(p)) + if v != 0 || state.sendZero { + state.update(i) + state.encodeUint(v) + } +} + +// floatBits returns a uint64 holding the bits of a floating-point number. +// Floating-point numbers are transmitted as uint64s holding the bits +// of the underlying representation. They are sent byte-reversed, with +// the exponent end coming out first, so integer floating point numbers +// (for example) transmit more compactly. This routine does the +// swizzling. +func floatBits(f float64) uint64 { + u := math.Float64bits(f) + var v uint64 + for i := 0; i < 8; i++ { + v <<= 8 + v |= u & 0xFF + u >>= 8 + } + return v +} + +// encFloat32 encodes the float32 with address p. +func encFloat32(i *encInstr, state *encoderState, p unsafe.Pointer) { + f := *(*float32)(p) + if f != 0 || state.sendZero { + v := floatBits(float64(f)) + state.update(i) + state.encodeUint(v) + } +} + +// encFloat64 encodes the float64 with address p. +func encFloat64(i *encInstr, state *encoderState, p unsafe.Pointer) { + f := *(*float64)(p) + if f != 0 || state.sendZero { + state.update(i) + v := floatBits(f) + state.encodeUint(v) + } +} + +// encComplex64 encodes the complex64 with address p. +// Complex numbers are just a pair of floating-point numbers, real part first. +func encComplex64(i *encInstr, state *encoderState, p unsafe.Pointer) { + c := *(*complex64)(p) + if c != 0+0i || state.sendZero { + rpart := floatBits(float64(real(c))) + ipart := floatBits(float64(imag(c))) + state.update(i) + state.encodeUint(rpart) + state.encodeUint(ipart) + } +} + +// encComplex128 encodes the complex128 with address p. +func encComplex128(i *encInstr, state *encoderState, p unsafe.Pointer) { + c := *(*complex128)(p) + if c != 0+0i || state.sendZero { + rpart := floatBits(real(c)) + ipart := floatBits(imag(c)) + state.update(i) + state.encodeUint(rpart) + state.encodeUint(ipart) + } +} + +// encUint8Array encodes the byte slice whose header has address p. +// Byte arrays are encoded as an unsigned count followed by the raw bytes. +func encUint8Array(i *encInstr, state *encoderState, p unsafe.Pointer) { + b := *(*[]byte)(p) + if len(b) > 0 || state.sendZero { + state.update(i) + state.encodeUint(uint64(len(b))) + state.b.Write(b) + } +} + +// encString encodes the string whose header has address p. +// Strings are encoded as an unsigned count followed by the raw bytes. +func encString(i *encInstr, state *encoderState, p unsafe.Pointer) { + s := *(*string)(p) + if len(s) > 0 || state.sendZero { + state.update(i) + state.encodeUint(uint64(len(s))) + state.b.WriteString(s) + } +} + +// encStructTerminator encodes the end of an encoded struct +// as delta field number of 0. +func encStructTerminator(i *encInstr, state *encoderState, p unsafe.Pointer) { + state.encodeUint(0) +} + +// Execution engine + +// encEngine an array of instructions indexed by field number of the encoding +// data, typically a struct. It is executed top to bottom, walking the struct. +type encEngine struct { + instr []encInstr +} + +const singletonField = 0 + +// encodeSingle encodes a single top-level non-struct value. +func (enc *Encoder) encodeSingle(b *bytes.Buffer, engine *encEngine, basep uintptr) { + state := enc.newEncoderState(b) + state.fieldnum = singletonField + // There is no surrounding struct to frame the transmission, so we must + // generate data even if the item is zero. To do this, set sendZero. + state.sendZero = true + instr := &engine.instr[singletonField] + p := unsafe.Pointer(basep) // offset will be zero + if instr.indir > 0 { + if p = encIndirect(p, instr.indir); p == nil { + return + } + } + instr.op(instr, state, p) + enc.freeEncoderState(state) +} + +// encodeStruct encodes a single struct value. +func (enc *Encoder) encodeStruct(b *bytes.Buffer, engine *encEngine, basep uintptr) { + state := enc.newEncoderState(b) + state.fieldnum = -1 + for i := 0; i < len(engine.instr); i++ { + instr := &engine.instr[i] + p := unsafe.Pointer(basep + instr.offset) + if instr.indir > 0 { + if p = encIndirect(p, instr.indir); p == nil { + continue + } + } + instr.op(instr, state, p) + } + enc.freeEncoderState(state) +} + +// encodeArray encodes the array whose 0th element is at p. +func (enc *Encoder) encodeArray(b *bytes.Buffer, p uintptr, op encOp, elemWid uintptr, elemIndir int, length int) { + state := enc.newEncoderState(b) + state.fieldnum = -1 + state.sendZero = true + state.encodeUint(uint64(length)) + for i := 0; i < length; i++ { + elemp := p + up := unsafe.Pointer(elemp) + if elemIndir > 0 { + if up = encIndirect(up, elemIndir); up == nil { + errorf("encodeArray: nil element") + } + elemp = uintptr(up) + } + op(nil, state, unsafe.Pointer(elemp)) + p += uintptr(elemWid) + } + enc.freeEncoderState(state) +} + +// encodeReflectValue is a helper for maps. It encodes the value v. +func encodeReflectValue(state *encoderState, v reflect.Value, op encOp, indir int) { + for i := 0; i < indir && v.IsValid(); i++ { + v = reflect.Indirect(v) + } + if !v.IsValid() { + errorf("encodeReflectValue: nil element") + } + op(nil, state, unsafe.Pointer(unsafeAddr(v))) +} + +// encodeMap encodes a map as unsigned count followed by key:value pairs. +// Because map internals are not exposed, we must use reflection rather than +// addresses. +func (enc *Encoder) encodeMap(b *bytes.Buffer, mv reflect.Value, keyOp, elemOp encOp, keyIndir, elemIndir int) { + state := enc.newEncoderState(b) + state.fieldnum = -1 + state.sendZero = true + keys := mv.MapKeys() + state.encodeUint(uint64(len(keys))) + for _, key := range keys { + encodeReflectValue(state, key, keyOp, keyIndir) + encodeReflectValue(state, mv.MapIndex(key), elemOp, elemIndir) + } + enc.freeEncoderState(state) +} + +// encodeInterface encodes the interface value iv. +// To send an interface, we send a string identifying the concrete type, followed +// by the type identifier (which might require defining that type right now), followed +// by the concrete value. A nil value gets sent as the empty string for the name, +// followed by no value. +func (enc *Encoder) encodeInterface(b *bytes.Buffer, iv reflect.Value) { + state := enc.newEncoderState(b) + state.fieldnum = -1 + state.sendZero = true + if iv.IsNil() { + state.encodeUint(0) + return + } + + ut := userType(iv.Elem().Type()) + name, ok := concreteTypeToName[ut.base] + if !ok { + errorf("type not registered for interface: %s", ut.base) + } + // Send the name. + state.encodeUint(uint64(len(name))) + _, err := state.b.WriteString(name) + if err != nil { + error_(err) + } + // Define the type id if necessary. + enc.sendTypeDescriptor(enc.writer(), state, ut) + // Send the type id. + enc.sendTypeId(state, ut) + // Encode the value into a new buffer. Any nested type definitions + // should be written to b, before the encoded value. + enc.pushWriter(b) + data := new(bytes.Buffer) + data.Write(spaceForLength) + enc.encode(data, iv.Elem(), ut) + if enc.err != nil { + error_(enc.err) + } + enc.popWriter() + enc.writeMessage(b, data) + if enc.err != nil { + error_(err) + } + enc.freeEncoderState(state) +} + +// isZero returns whether the value is the zero of its type. +func isZero(val reflect.Value) bool { + switch val.Kind() { + case reflect.Array, reflect.Map, reflect.Slice, reflect.String: + return val.Len() == 0 + case reflect.Bool: + return !val.Bool() + case reflect.Complex64, reflect.Complex128: + return val.Complex() == 0 + case reflect.Chan, reflect.Func, reflect.Ptr: + return val.IsNil() + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return val.Int() == 0 + case reflect.Float32, reflect.Float64: + return val.Float() == 0 + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + return val.Uint() == 0 + } + panic("unknown type in isZero " + val.Type().String()) +} + +// encGobEncoder encodes a value that implements the GobEncoder interface. +// The data is sent as a byte array. +func (enc *Encoder) encodeGobEncoder(b *bytes.Buffer, v reflect.Value) { + // TODO: should we catch panics from the called method? + // We know it's a GobEncoder, so just call the method directly. + data, err := v.Interface().(GobEncoder).GobEncode() + if err != nil { + error_(err) + } + state := enc.newEncoderState(b) + state.fieldnum = -1 + state.encodeUint(uint64(len(data))) + state.b.Write(data) + enc.freeEncoderState(state) +} + +var encOpTable = [...]encOp{ + reflect.Bool: encBool, + reflect.Int: encInt, + reflect.Int8: encInt8, + reflect.Int16: encInt16, + reflect.Int32: encInt32, + reflect.Int64: encInt64, + reflect.Uint: encUint, + reflect.Uint8: encUint8, + reflect.Uint16: encUint16, + reflect.Uint32: encUint32, + reflect.Uint64: encUint64, + reflect.Uintptr: encUintptr, + reflect.Float32: encFloat32, + reflect.Float64: encFloat64, + reflect.Complex64: encComplex64, + reflect.Complex128: encComplex128, + reflect.String: encString, +} + +// encOpFor returns (a pointer to) the encoding op for the base type under rt and +// the indirection count to reach it. +func (enc *Encoder) encOpFor(rt reflect.Type, inProgress map[reflect.Type]*encOp) (*encOp, int) { + ut := userType(rt) + // If the type implements GobEncoder, we handle it without further processing. + if ut.isGobEncoder { + return enc.gobEncodeOpFor(ut) + } + // If this type is already in progress, it's a recursive type (e.g. map[string]*T). + // Return the pointer to the op we're already building. + if opPtr := inProgress[rt]; opPtr != nil { + return opPtr, ut.indir + } + typ := ut.base + indir := ut.indir + k := typ.Kind() + var op encOp + if int(k) < len(encOpTable) { + op = encOpTable[k] + } + if op == nil { + inProgress[rt] = &op + // Special cases + switch t := typ; t.Kind() { + case reflect.Slice: + if t.Elem().Kind() == reflect.Uint8 { + op = encUint8Array + break + } + // Slices have a header; we decode it to find the underlying array. + elemOp, indir := enc.encOpFor(t.Elem(), inProgress) + op = func(i *encInstr, state *encoderState, p unsafe.Pointer) { + slice := (*reflect.SliceHeader)(p) + if !state.sendZero && slice.Len == 0 { + return + } + state.update(i) + state.enc.encodeArray(state.b, slice.Data, *elemOp, t.Elem().Size(), indir, int(slice.Len)) + } + case reflect.Array: + // True arrays have size in the type. + elemOp, indir := enc.encOpFor(t.Elem(), inProgress) + op = func(i *encInstr, state *encoderState, p unsafe.Pointer) { + state.update(i) + state.enc.encodeArray(state.b, uintptr(p), *elemOp, t.Elem().Size(), indir, t.Len()) + } + case reflect.Map: + keyOp, keyIndir := enc.encOpFor(t.Key(), inProgress) + elemOp, elemIndir := enc.encOpFor(t.Elem(), inProgress) + op = func(i *encInstr, state *encoderState, p unsafe.Pointer) { + // Maps cannot be accessed by moving addresses around the way + // that slices etc. can. We must recover a full reflection value for + // the iteration. + v := reflect.ValueOf(unsafe.Unreflect(t, unsafe.Pointer(p))) + mv := reflect.Indirect(v) + // We send zero-length (but non-nil) maps because the + // receiver might want to use the map. (Maps don't use append.) + if !state.sendZero && mv.IsNil() { + return + } + state.update(i) + state.enc.encodeMap(state.b, mv, *keyOp, *elemOp, keyIndir, elemIndir) + } + case reflect.Struct: + // Generate a closure that calls out to the engine for the nested type. + enc.getEncEngine(userType(typ)) + info := mustGetTypeInfo(typ) + op = func(i *encInstr, state *encoderState, p unsafe.Pointer) { + state.update(i) + // indirect through info to delay evaluation for recursive structs + state.enc.encodeStruct(state.b, info.encoder, uintptr(p)) + } + case reflect.Interface: + op = func(i *encInstr, state *encoderState, p unsafe.Pointer) { + // Interfaces transmit the name and contents of the concrete + // value they contain. + v := reflect.ValueOf(unsafe.Unreflect(t, unsafe.Pointer(p))) + iv := reflect.Indirect(v) + if !state.sendZero && (!iv.IsValid() || iv.IsNil()) { + return + } + state.update(i) + state.enc.encodeInterface(state.b, iv) + } + } + } + if op == nil { + errorf("can't happen: encode type %s", rt) + } + return &op, indir +} + +// gobEncodeOpFor returns the op for a type that is known to implement +// GobEncoder. +func (enc *Encoder) gobEncodeOpFor(ut *userTypeInfo) (*encOp, int) { + rt := ut.user + if ut.encIndir == -1 { + rt = reflect.PtrTo(rt) + } else if ut.encIndir > 0 { + for i := int8(0); i < ut.encIndir; i++ { + rt = rt.Elem() + } + } + var op encOp + op = func(i *encInstr, state *encoderState, p unsafe.Pointer) { + var v reflect.Value + if ut.encIndir == -1 { + // Need to climb up one level to turn value into pointer. + v = reflect.ValueOf(unsafe.Unreflect(rt, unsafe.Pointer(&p))) + } else { + v = reflect.ValueOf(unsafe.Unreflect(rt, p)) + } + if !state.sendZero && isZero(v) { + return + } + state.update(i) + state.enc.encodeGobEncoder(state.b, v) + } + return &op, int(ut.encIndir) // encIndir: op will get called with p == address of receiver. +} + +// compileEnc returns the engine to compile the type. +func (enc *Encoder) compileEnc(ut *userTypeInfo) *encEngine { + srt := ut.base + engine := new(encEngine) + seen := make(map[reflect.Type]*encOp) + rt := ut.base + if ut.isGobEncoder { + rt = ut.user + } + if !ut.isGobEncoder && + srt.Kind() == reflect.Struct { + for fieldNum, wireFieldNum := 0, 0; fieldNum < srt.NumField(); fieldNum++ { + f := srt.Field(fieldNum) + if !isExported(f.Name) { + continue + } + op, indir := enc.encOpFor(f.Type, seen) + engine.instr = append(engine.instr, encInstr{*op, wireFieldNum, indir, uintptr(f.Offset)}) + wireFieldNum++ + } + if srt.NumField() > 0 && len(engine.instr) == 0 { + errorf("type %s has no exported fields", rt) + } + engine.instr = append(engine.instr, encInstr{encStructTerminator, 0, 0, 0}) + } else { + engine.instr = make([]encInstr, 1) + op, indir := enc.encOpFor(rt, seen) + engine.instr[0] = encInstr{*op, singletonField, indir, 0} // offset is zero + } + return engine +} + +// getEncEngine returns the engine to compile the type. +// typeLock must be held (or we're in initialization and guaranteed single-threaded). +func (enc *Encoder) getEncEngine(ut *userTypeInfo) *encEngine { + info, err1 := getTypeInfo(ut) + if err1 != nil { + error_(err1) + } + if info.encoder == nil { + // mark this engine as underway before compiling to handle recursive types. + info.encoder = new(encEngine) + info.encoder = enc.compileEnc(ut) + } + return info.encoder +} + +// lockAndGetEncEngine is a function that locks and compiles. +// This lets us hold the lock only while compiling, not when encoding. +func (enc *Encoder) lockAndGetEncEngine(ut *userTypeInfo) *encEngine { + typeLock.Lock() + defer typeLock.Unlock() + return enc.getEncEngine(ut) +} + +func (enc *Encoder) encode(b *bytes.Buffer, value reflect.Value, ut *userTypeInfo) { + defer catchError(&enc.err) + engine := enc.lockAndGetEncEngine(ut) + indir := ut.indir + if ut.isGobEncoder { + indir = int(ut.encIndir) + } + for i := 0; i < indir; i++ { + value = reflect.Indirect(value) + } + if !ut.isGobEncoder && value.Type().Kind() == reflect.Struct { + enc.encodeStruct(b, engine, unsafeAddr(value)) + } else { + enc.encodeSingle(b, engine, unsafeAddr(value)) + } +} diff --git a/libgo/go/encoding/gob/encoder.go b/libgo/go/encoding/gob/encoder.go new file mode 100644 index 0000000..e4a48df --- /dev/null +++ b/libgo/go/encoding/gob/encoder.go @@ -0,0 +1,251 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gob + +import ( + "bytes" + "errors" + "io" + "reflect" + "sync" +) + +// An Encoder manages the transmission of type and data information to the +// other side of a connection. +type Encoder struct { + mutex sync.Mutex // each item must be sent atomically + w []io.Writer // where to send the data + sent map[reflect.Type]typeId // which types we've already sent + countState *encoderState // stage for writing counts + freeList *encoderState // list of free encoderStates; avoids reallocation + byteBuf bytes.Buffer // buffer for top-level encoderState + err error +} + +// Before we encode a message, we reserve space at the head of the +// buffer in which to encode its length. This means we can use the +// buffer to assemble the message without another allocation. +const maxLength = 9 // Maximum size of an encoded length. +var spaceForLength = make([]byte, maxLength) + +// NewEncoder returns a new encoder that will transmit on the io.Writer. +func NewEncoder(w io.Writer) *Encoder { + enc := new(Encoder) + enc.w = []io.Writer{w} + enc.sent = make(map[reflect.Type]typeId) + enc.countState = enc.newEncoderState(new(bytes.Buffer)) + return enc +} + +// writer() returns the innermost writer the encoder is using +func (enc *Encoder) writer() io.Writer { + return enc.w[len(enc.w)-1] +} + +// pushWriter adds a writer to the encoder. +func (enc *Encoder) pushWriter(w io.Writer) { + enc.w = append(enc.w, w) +} + +// popWriter pops the innermost writer. +func (enc *Encoder) popWriter() { + enc.w = enc.w[0 : len(enc.w)-1] +} + +func (enc *Encoder) badType(rt reflect.Type) { + enc.setError(errors.New("gob: can't encode type " + rt.String())) +} + +func (enc *Encoder) setError(err error) { + if enc.err == nil { // remember the first. + enc.err = err + } +} + +// writeMessage sends the data item preceded by a unsigned count of its length. +func (enc *Encoder) writeMessage(w io.Writer, b *bytes.Buffer) { + // Space has been reserved for the length at the head of the message. + // This is a little dirty: we grab the slice from the bytes.Buffer and massage + // it by hand. + message := b.Bytes() + messageLen := len(message) - maxLength + // Encode the length. + enc.countState.b.Reset() + enc.countState.encodeUint(uint64(messageLen)) + // Copy the length to be a prefix of the message. + offset := maxLength - enc.countState.b.Len() + copy(message[offset:], enc.countState.b.Bytes()) + // Write the data. + _, err := w.Write(message[offset:]) + // Drain the buffer and restore the space at the front for the count of the next message. + b.Reset() + b.Write(spaceForLength) + if err != nil { + enc.setError(err) + } +} + +// sendActualType sends the requested type, without further investigation, unless +// it's been sent before. +func (enc *Encoder) sendActualType(w io.Writer, state *encoderState, ut *userTypeInfo, actual reflect.Type) (sent bool) { + if _, alreadySent := enc.sent[actual]; alreadySent { + return false + } + typeLock.Lock() + info, err := getTypeInfo(ut) + typeLock.Unlock() + if err != nil { + enc.setError(err) + return + } + // Send the pair (-id, type) + // Id: + state.encodeInt(-int64(info.id)) + // Type: + enc.encode(state.b, reflect.ValueOf(info.wire), wireTypeUserInfo) + enc.writeMessage(w, state.b) + if enc.err != nil { + return + } + + // Remember we've sent this type, both what the user gave us and the base type. + enc.sent[ut.base] = info.id + if ut.user != ut.base { + enc.sent[ut.user] = info.id + } + // Now send the inner types + switch st := actual; st.Kind() { + case reflect.Struct: + for i := 0; i < st.NumField(); i++ { + enc.sendType(w, state, st.Field(i).Type) + } + case reflect.Array, reflect.Slice: + enc.sendType(w, state, st.Elem()) + case reflect.Map: + enc.sendType(w, state, st.Key()) + enc.sendType(w, state, st.Elem()) + } + return true +} + +// sendType sends the type info to the other side, if necessary. +func (enc *Encoder) sendType(w io.Writer, state *encoderState, origt reflect.Type) (sent bool) { + ut := userType(origt) + if ut.isGobEncoder { + // The rules are different: regardless of the underlying type's representation, + // we need to tell the other side that this exact type is a GobEncoder. + return enc.sendActualType(w, state, ut, ut.user) + } + + // It's a concrete value, so drill down to the base type. + switch rt := ut.base; rt.Kind() { + default: + // Basic types and interfaces do not need to be described. + return + case reflect.Slice: + // If it's []uint8, don't send; it's considered basic. + if rt.Elem().Kind() == reflect.Uint8 { + return + } + // Otherwise we do send. + break + case reflect.Array: + // arrays must be sent so we know their lengths and element types. + break + case reflect.Map: + // maps must be sent so we know their lengths and key/value types. + break + case reflect.Struct: + // structs must be sent so we know their fields. + break + case reflect.Chan, reflect.Func: + // Probably a bad field in a struct. + enc.badType(rt) + return + } + + return enc.sendActualType(w, state, ut, ut.base) +} + +// Encode transmits the data item represented by the empty interface value, +// guaranteeing that all necessary type information has been transmitted first. +func (enc *Encoder) Encode(e interface{}) error { + return enc.EncodeValue(reflect.ValueOf(e)) +} + +// sendTypeDescriptor makes sure the remote side knows about this type. +// It will send a descriptor if this is the first time the type has been +// sent. +func (enc *Encoder) sendTypeDescriptor(w io.Writer, state *encoderState, ut *userTypeInfo) { + // Make sure the type is known to the other side. + // First, have we already sent this type? + rt := ut.base + if ut.isGobEncoder { + rt = ut.user + } + if _, alreadySent := enc.sent[rt]; !alreadySent { + // No, so send it. + sent := enc.sendType(w, state, rt) + if enc.err != nil { + return + } + // If the type info has still not been transmitted, it means we have + // a singleton basic type (int, []byte etc.) at top level. We don't + // need to send the type info but we do need to update enc.sent. + if !sent { + typeLock.Lock() + info, err := getTypeInfo(ut) + typeLock.Unlock() + if err != nil { + enc.setError(err) + return + } + enc.sent[rt] = info.id + } + } +} + +// sendTypeId sends the id, which must have already been defined. +func (enc *Encoder) sendTypeId(state *encoderState, ut *userTypeInfo) { + // Identify the type of this top-level value. + state.encodeInt(int64(enc.sent[ut.base])) +} + +// EncodeValue transmits the data item represented by the reflection value, +// guaranteeing that all necessary type information has been transmitted first. +func (enc *Encoder) EncodeValue(value reflect.Value) error { + // Make sure we're single-threaded through here, so multiple + // goroutines can share an encoder. + enc.mutex.Lock() + defer enc.mutex.Unlock() + + // Remove any nested writers remaining due to previous errors. + enc.w = enc.w[0:1] + + ut, err := validUserType(value.Type()) + if err != nil { + return err + } + + enc.err = nil + enc.byteBuf.Reset() + enc.byteBuf.Write(spaceForLength) + state := enc.newEncoderState(&enc.byteBuf) + + enc.sendTypeDescriptor(enc.writer(), state, ut) + enc.sendTypeId(state, ut) + if enc.err != nil { + return enc.err + } + + // Encode the object. + enc.encode(state.b, value, ut) + if enc.err == nil { + enc.writeMessage(enc.writer(), state.b) + } + + enc.freeEncoderState(state) + return enc.err +} diff --git a/libgo/go/encoding/gob/encoder_test.go b/libgo/go/encoding/gob/encoder_test.go new file mode 100644 index 0000000..bc5af12 --- /dev/null +++ b/libgo/go/encoding/gob/encoder_test.go @@ -0,0 +1,664 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gob + +import ( + "bytes" + "fmt" + "io" + "reflect" + "strings" + "testing" +) + +type ET2 struct { + X string +} + +type ET1 struct { + A int + Et2 *ET2 + Next *ET1 +} + +// Like ET1 but with a different name for a field +type ET3 struct { + A int + Et2 *ET2 + DifferentNext *ET1 +} + +// Like ET1 but with a different type for a field +type ET4 struct { + A int + Et2 float64 + Next int +} + +func TestEncoderDecoder(t *testing.T) { + b := new(bytes.Buffer) + enc := NewEncoder(b) + et1 := new(ET1) + et1.A = 7 + et1.Et2 = new(ET2) + err := enc.Encode(et1) + if err != nil { + t.Error("encoder fail:", err) + } + dec := NewDecoder(b) + newEt1 := new(ET1) + err = dec.Decode(newEt1) + if err != nil { + t.Fatal("error decoding ET1:", err) + } + + if !reflect.DeepEqual(et1, newEt1) { + t.Fatalf("invalid data for et1: expected %+v; got %+v", *et1, *newEt1) + } + if b.Len() != 0 { + t.Error("not at eof;", b.Len(), "bytes left") + } + + enc.Encode(et1) + newEt1 = new(ET1) + err = dec.Decode(newEt1) + if err != nil { + t.Fatal("round 2: error decoding ET1:", err) + } + if !reflect.DeepEqual(et1, newEt1) { + t.Fatalf("round 2: invalid data for et1: expected %+v; got %+v", *et1, *newEt1) + } + if b.Len() != 0 { + t.Error("round 2: not at eof;", b.Len(), "bytes left") + } + + // Now test with a running encoder/decoder pair that we recognize a type mismatch. + err = enc.Encode(et1) + if err != nil { + t.Error("round 3: encoder fail:", err) + } + newEt2 := new(ET2) + err = dec.Decode(newEt2) + if err == nil { + t.Fatal("round 3: expected `bad type' error decoding ET2") + } +} + +// Run one value through the encoder/decoder, but use the wrong type. +// Input is always an ET1; we compare it to whatever is under 'e'. +func badTypeCheck(e interface{}, shouldFail bool, msg string, t *testing.T) { + b := new(bytes.Buffer) + enc := NewEncoder(b) + et1 := new(ET1) + et1.A = 7 + et1.Et2 = new(ET2) + err := enc.Encode(et1) + if err != nil { + t.Error("encoder fail:", err) + } + dec := NewDecoder(b) + err = dec.Decode(e) + if shouldFail && err == nil { + t.Error("expected error for", msg) + } + if !shouldFail && err != nil { + t.Error("unexpected error for", msg, err) + } +} + +// Test that we recognize a bad type the first time. +func TestWrongTypeDecoder(t *testing.T) { + badTypeCheck(new(ET2), true, "no fields in common", t) + badTypeCheck(new(ET3), false, "different name of field", t) + badTypeCheck(new(ET4), true, "different type of field", t) +} + +func corruptDataCheck(s string, err error, t *testing.T) { + b := bytes.NewBufferString(s) + dec := NewDecoder(b) + err1 := dec.Decode(new(ET2)) + if err1 != err { + t.Errorf("from %q expected error %s; got %s", s, err, err1) + } +} + +// Check that we survive bad data. +func TestBadData(t *testing.T) { + corruptDataCheck("", io.EOF, t) + corruptDataCheck("\x7Fhi", io.ErrUnexpectedEOF, t) + corruptDataCheck("\x03now is the time for all good men", errBadType, t) +} + +// Types not supported by the Encoder. +var unsupportedValues = []interface{}{ + make(chan int), + func(a int) bool { return true }, +} + +func TestUnsupported(t *testing.T) { + var b bytes.Buffer + enc := NewEncoder(&b) + for _, v := range unsupportedValues { + err := enc.Encode(v) + if err == nil { + t.Errorf("expected error for %T; got none", v) + } + } +} + +func encAndDec(in, out interface{}) error { + b := new(bytes.Buffer) + enc := NewEncoder(b) + err := enc.Encode(in) + if err != nil { + return err + } + dec := NewDecoder(b) + err = dec.Decode(out) + if err != nil { + return err + } + return nil +} + +func TestTypeToPtrType(t *testing.T) { + // Encode a T, decode a *T + type Type0 struct { + A int + } + t0 := Type0{7} + t0p := new(Type0) + if err := encAndDec(t0, t0p); err != nil { + t.Error(err) + } +} + +func TestPtrTypeToType(t *testing.T) { + // Encode a *T, decode a T + type Type1 struct { + A uint + } + t1p := &Type1{17} + var t1 Type1 + if err := encAndDec(t1, t1p); err != nil { + t.Error(err) + } +} + +func TestTypeToPtrPtrPtrPtrType(t *testing.T) { + type Type2 struct { + A ****float64 + } + t2 := Type2{} + t2.A = new(***float64) + *t2.A = new(**float64) + **t2.A = new(*float64) + ***t2.A = new(float64) + ****t2.A = 27.4 + t2pppp := new(***Type2) + if err := encAndDec(t2, t2pppp); err != nil { + t.Fatal(err) + } + if ****(****t2pppp).A != ****t2.A { + t.Errorf("wrong value after decode: %g not %g", ****(****t2pppp).A, ****t2.A) + } +} + +func TestSlice(t *testing.T) { + type Type3 struct { + A []string + } + t3p := &Type3{[]string{"hello", "world"}} + var t3 Type3 + if err := encAndDec(t3, t3p); err != nil { + t.Error(err) + } +} + +func TestValueError(t *testing.T) { + // Encode a *T, decode a T + type Type4 struct { + A int + } + t4p := &Type4{3} + var t4 Type4 // note: not a pointer. + if err := encAndDec(t4p, t4); err == nil || strings.Index(err.Error(), "pointer") < 0 { + t.Error("expected error about pointer; got", err) + } +} + +func TestArray(t *testing.T) { + type Type5 struct { + A [3]string + B [3]byte + } + type Type6 struct { + A [2]string // can't hold t5.a + } + t5 := Type5{[3]string{"hello", ",", "world"}, [3]byte{1, 2, 3}} + var t5p Type5 + if err := encAndDec(t5, &t5p); err != nil { + t.Error(err) + } + var t6 Type6 + if err := encAndDec(t5, &t6); err == nil { + t.Error("should fail with mismatched array sizes") + } +} + +func TestRecursiveMapType(t *testing.T) { + type recursiveMap map[string]recursiveMap + r1 := recursiveMap{"A": recursiveMap{"B": nil, "C": nil}, "D": nil} + r2 := make(recursiveMap) + if err := encAndDec(r1, &r2); err != nil { + t.Error(err) + } +} + +func TestRecursiveSliceType(t *testing.T) { + type recursiveSlice []recursiveSlice + r1 := recursiveSlice{0: recursiveSlice{0: nil}, 1: nil} + r2 := make(recursiveSlice, 0) + if err := encAndDec(r1, &r2); err != nil { + t.Error(err) + } +} + +// Regression test for bug: must send zero values inside arrays +func TestDefaultsInArray(t *testing.T) { + type Type7 struct { + B []bool + I []int + S []string + F []float64 + } + t7 := Type7{ + []bool{false, false, true}, + []int{0, 0, 1}, + []string{"hi", "", "there"}, + []float64{0, 0, 1}, + } + var t7p Type7 + if err := encAndDec(t7, &t7p); err != nil { + t.Error(err) + } +} + +var testInt int +var testFloat32 float32 +var testString string +var testSlice []string +var testMap map[string]int +var testArray [7]int + +type SingleTest struct { + in interface{} + out interface{} + err string +} + +var singleTests = []SingleTest{ + {17, &testInt, ""}, + {float32(17.5), &testFloat32, ""}, + {"bike shed", &testString, ""}, + {[]string{"bike", "shed", "paint", "color"}, &testSlice, ""}, + {map[string]int{"seven": 7, "twelve": 12}, &testMap, ""}, + {[7]int{4, 55, 0, 0, 0, 0, 0}, &testArray, ""}, // case that once triggered a bug + {[7]int{4, 55, 1, 44, 22, 66, 1234}, &testArray, ""}, + + // Decode errors + {172, &testFloat32, "wrong type"}, +} + +func TestSingletons(t *testing.T) { + b := new(bytes.Buffer) + enc := NewEncoder(b) + dec := NewDecoder(b) + for _, test := range singleTests { + b.Reset() + err := enc.Encode(test.in) + if err != nil { + t.Errorf("error encoding %v: %s", test.in, err) + continue + } + err = dec.Decode(test.out) + switch { + case err != nil && test.err == "": + t.Errorf("error decoding %v: %s", test.in, err) + continue + case err == nil && test.err != "": + t.Errorf("expected error decoding %v: %s", test.in, test.err) + continue + case err != nil && test.err != "": + if strings.Index(err.Error(), test.err) < 0 { + t.Errorf("wrong error decoding %v: wanted %s, got %v", test.in, test.err, err) + } + continue + } + // Get rid of the pointer in the rhs + val := reflect.ValueOf(test.out).Elem().Interface() + if !reflect.DeepEqual(test.in, val) { + t.Errorf("decoding singleton: expected %v got %v", test.in, val) + } + } +} + +func TestStructNonStruct(t *testing.T) { + type Struct struct { + A string + } + type NonStruct string + s := Struct{"hello"} + var sp Struct + if err := encAndDec(s, &sp); err != nil { + t.Error(err) + } + var ns NonStruct + if err := encAndDec(s, &ns); err == nil { + t.Error("should get error for struct/non-struct") + } else if strings.Index(err.Error(), "type") < 0 { + t.Error("for struct/non-struct expected type error; got", err) + } + // Now try the other way + var nsp NonStruct + if err := encAndDec(ns, &nsp); err != nil { + t.Error(err) + } + if err := encAndDec(ns, &s); err == nil { + t.Error("should get error for non-struct/struct") + } else if strings.Index(err.Error(), "type") < 0 { + t.Error("for non-struct/struct expected type error; got", err) + } +} + +type interfaceIndirectTestI interface { + F() bool +} + +type interfaceIndirectTestT struct{} + +func (this *interfaceIndirectTestT) F() bool { + return true +} + +// A version of a bug reported on golang-nuts. Also tests top-level +// slice of interfaces. The issue was registering *T caused T to be +// stored as the concrete type. +func TestInterfaceIndirect(t *testing.T) { + Register(&interfaceIndirectTestT{}) + b := new(bytes.Buffer) + w := []interfaceIndirectTestI{&interfaceIndirectTestT{}} + err := NewEncoder(b).Encode(w) + if err != nil { + t.Fatal("encode error:", err) + } + + var r []interfaceIndirectTestI + err = NewDecoder(b).Decode(&r) + if err != nil { + t.Fatal("decode error:", err) + } +} + +// Now follow various tests that decode into things that can't represent the +// encoded value, all of which should be legal. + +// Also, when the ignored object contains an interface value, it may define +// types. Make sure that skipping the value still defines the types by using +// the encoder/decoder pair to send a value afterwards. If an interface +// is sent, its type in the test is always NewType0, so this checks that the +// encoder and decoder don't skew with respect to type definitions. + +type Struct0 struct { + I interface{} +} + +type NewType0 struct { + S string +} + +type ignoreTest struct { + in, out interface{} +} + +var ignoreTests = []ignoreTest{ + // Decode normal struct into an empty struct + {&struct{ A int }{23}, &struct{}{}}, + // Decode normal struct into a nil. + {&struct{ A int }{23}, nil}, + // Decode singleton string into a nil. + {"hello, world", nil}, + // Decode singleton slice into a nil. + {[]int{1, 2, 3, 4}, nil}, + // Decode struct containing an interface into a nil. + {&Struct0{&NewType0{"value0"}}, nil}, + // Decode singleton slice of interfaces into a nil. + {[]interface{}{"hi", &NewType0{"value1"}, 23}, nil}, +} + +func TestDecodeIntoNothing(t *testing.T) { + Register(new(NewType0)) + for i, test := range ignoreTests { + b := new(bytes.Buffer) + enc := NewEncoder(b) + err := enc.Encode(test.in) + if err != nil { + t.Errorf("%d: encode error %s:", i, err) + continue + } + dec := NewDecoder(b) + err = dec.Decode(test.out) + if err != nil { + t.Errorf("%d: decode error: %s", i, err) + continue + } + // Now see if the encoder and decoder are in a consistent state. + str := fmt.Sprintf("Value %d", i) + err = enc.Encode(&NewType0{str}) + if err != nil { + t.Fatalf("%d: NewType0 encode error: %s", i, err) + } + ns := new(NewType0) + err = dec.Decode(ns) + if err != nil { + t.Fatalf("%d: NewType0 decode error: %s", i, err) + } + if ns.S != str { + t.Fatalf("%d: expected %q got %q", i, str, ns.S) + } + } +} + +// Another bug from golang-nuts, involving nested interfaces. +type Bug0Outer struct { + Bug0Field interface{} +} + +type Bug0Inner struct { + A int +} + +func TestNestedInterfaces(t *testing.T) { + var buf bytes.Buffer + e := NewEncoder(&buf) + d := NewDecoder(&buf) + Register(new(Bug0Outer)) + Register(new(Bug0Inner)) + f := &Bug0Outer{&Bug0Outer{&Bug0Inner{7}}} + var v interface{} = f + err := e.Encode(&v) + if err != nil { + t.Fatal("Encode:", err) + } + err = d.Decode(&v) + if err != nil { + t.Fatal("Decode:", err) + } + // Make sure it decoded correctly. + outer1, ok := v.(*Bug0Outer) + if !ok { + t.Fatalf("v not Bug0Outer: %T", v) + } + outer2, ok := outer1.Bug0Field.(*Bug0Outer) + if !ok { + t.Fatalf("v.Bug0Field not Bug0Outer: %T", outer1.Bug0Field) + } + inner, ok := outer2.Bug0Field.(*Bug0Inner) + if !ok { + t.Fatalf("v.Bug0Field.Bug0Field not Bug0Inner: %T", outer2.Bug0Field) + } + if inner.A != 7 { + t.Fatalf("final value %d; expected %d", inner.A, 7) + } +} + +// The bugs keep coming. We forgot to send map subtypes before the map. + +type Bug1Elem struct { + Name string + Id int +} + +type Bug1StructMap map[string]Bug1Elem + +func bug1EncDec(in Bug1StructMap, out *Bug1StructMap) error { + return nil +} + +func TestMapBug1(t *testing.T) { + in := make(Bug1StructMap) + in["val1"] = Bug1Elem{"elem1", 1} + in["val2"] = Bug1Elem{"elem2", 2} + + b := new(bytes.Buffer) + enc := NewEncoder(b) + err := enc.Encode(in) + if err != nil { + t.Fatal("encode:", err) + } + dec := NewDecoder(b) + out := make(Bug1StructMap) + err = dec.Decode(&out) + if err != nil { + t.Fatal("decode:", err) + } + if !reflect.DeepEqual(in, out) { + t.Errorf("mismatch: %v %v", in, out) + } +} + +func TestGobMapInterfaceEncode(t *testing.T) { + m := map[string]interface{}{ + "up": uintptr(0), + "i0": []int{-1}, + "i1": []int8{-1}, + "i2": []int16{-1}, + "i3": []int32{-1}, + "i4": []int64{-1}, + "u0": []uint{1}, + "u1": []uint8{1}, + "u2": []uint16{1}, + "u3": []uint32{1}, + "u4": []uint64{1}, + "f0": []float32{1}, + "f1": []float64{1}, + "c0": []complex64{complex(2, -2)}, + "c1": []complex128{complex(2, float64(-2))}, + "us": []uintptr{0}, + "bo": []bool{false}, + "st": []string{"s"}, + } + buf := bytes.NewBuffer(nil) + enc := NewEncoder(buf) + err := enc.Encode(m) + if err != nil { + t.Errorf("encode map: %s", err) + } +} + +func TestSliceReusesMemory(t *testing.T) { + buf := bytes.NewBuffer(nil) + // Bytes + { + x := []byte("abcd") + enc := NewEncoder(buf) + err := enc.Encode(x) + if err != nil { + t.Errorf("bytes: encode: %s", err) + } + // Decode into y, which is big enough. + y := []byte("ABCDE") + addr := &y[0] + dec := NewDecoder(buf) + err = dec.Decode(&y) + if err != nil { + t.Fatal("bytes: decode:", err) + } + if !bytes.Equal(x, y) { + t.Errorf("bytes: expected %q got %q\n", x, y) + } + if addr != &y[0] { + t.Errorf("bytes: unnecessary reallocation") + } + } + // general slice + { + x := []rune("abcd") + enc := NewEncoder(buf) + err := enc.Encode(x) + if err != nil { + t.Errorf("ints: encode: %s", err) + } + // Decode into y, which is big enough. + y := []rune("ABCDE") + addr := &y[0] + dec := NewDecoder(buf) + err = dec.Decode(&y) + if err != nil { + t.Fatal("ints: decode:", err) + } + if !reflect.DeepEqual(x, y) { + t.Errorf("ints: expected %q got %q\n", x, y) + } + if addr != &y[0] { + t.Errorf("ints: unnecessary reallocation") + } + } +} + +// Used to crash: negative count in recvMessage. +func TestBadCount(t *testing.T) { + b := []byte{0xfb, 0xa5, 0x82, 0x2f, 0xca, 0x1} + if err := NewDecoder(bytes.NewBuffer(b)).Decode(nil); err == nil { + t.Error("expected error from bad count") + } else if err.Error() != errBadCount.Error() { + t.Error("expected bad count error; got", err) + } +} + +// Verify that sequential Decoders built on a single input will +// succeed if the input implements ReadByte and there is no +// type information in the stream. +func TestSequentialDecoder(t *testing.T) { + b := new(bytes.Buffer) + enc := NewEncoder(b) + const count = 10 + for i := 0; i < count; i++ { + s := fmt.Sprintf("%d", i) + if err := enc.Encode(s); err != nil { + t.Error("encoder fail:", err) + } + } + for i := 0; i < count; i++ { + dec := NewDecoder(b) + var s string + if err := dec.Decode(&s); err != nil { + t.Fatal("decoder fail:", err) + } + if s != fmt.Sprintf("%d", i) { + t.Fatalf("decode expected %d got %s", i, s) + } + } +} diff --git a/libgo/go/encoding/gob/error.go b/libgo/go/encoding/gob/error.go new file mode 100644 index 0000000..fbae8b6 --- /dev/null +++ b/libgo/go/encoding/gob/error.go @@ -0,0 +1,39 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gob + +import "fmt" + +// Errors in decoding and encoding are handled using panic and recover. +// Panics caused by user error (that is, everything except run-time panics +// such as "index out of bounds" errors) do not leave the file that caused +// them, but are instead turned into plain error returns. Encoding and +// decoding functions and methods that do not return an error either use +// panic to report an error or are guaranteed error-free. + +// A gobError is used to distinguish errors (panics) generated in this package. +type gobError struct { + err error +} + +// errorf is like error_ but takes Printf-style arguments to construct an error. +// It always prefixes the message with "gob: ". +func errorf(format string, args ...interface{}) { + error_(fmt.Errorf("gob: "+format, args...)) +} + +// error wraps the argument error and uses it as the argument to panic. +func error_(err error) { + panic(gobError{err}) +} + +// catchError is meant to be used as a deferred function to turn a panic(gobError) into a +// plain error. It overwrites the error return of the function that deferred its call. +func catchError(err *error) { + if e := recover(); e != nil { + *err = e.(gobError).err // Will re-panic if not one of our errors, such as a runtime error. + } + return +} diff --git a/libgo/go/encoding/gob/gobencdec_test.go b/libgo/go/encoding/gob/gobencdec_test.go new file mode 100644 index 0000000..eacfd84 --- /dev/null +++ b/libgo/go/encoding/gob/gobencdec_test.go @@ -0,0 +1,528 @@ +// Copyright 20011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file contains tests of the GobEncoder/GobDecoder support. + +package gob + +import ( + "bytes" + "errors" + "fmt" + "io" + "strings" + "testing" +) + +// Types that implement the GobEncoder/Decoder interfaces. + +type ByteStruct struct { + a byte // not an exported field +} + +type StringStruct struct { + s string // not an exported field +} + +type ArrayStruct struct { + a [8192]byte // not an exported field +} + +type Gobber int + +type ValueGobber string // encodes with a value, decodes with a pointer. + +// The relevant methods + +func (g *ByteStruct) GobEncode() ([]byte, error) { + b := make([]byte, 3) + b[0] = g.a + b[1] = g.a + 1 + b[2] = g.a + 2 + return b, nil +} + +func (g *ByteStruct) GobDecode(data []byte) error { + if g == nil { + return errors.New("NIL RECEIVER") + } + // Expect N sequential-valued bytes. + if len(data) == 0 { + return io.EOF + } + g.a = data[0] + for i, c := range data { + if c != g.a+byte(i) { + return errors.New("invalid data sequence") + } + } + return nil +} + +func (g *StringStruct) GobEncode() ([]byte, error) { + return []byte(g.s), nil +} + +func (g *StringStruct) GobDecode(data []byte) error { + // Expect N sequential-valued bytes. + if len(data) == 0 { + return io.EOF + } + a := data[0] + for i, c := range data { + if c != a+byte(i) { + return errors.New("invalid data sequence") + } + } + g.s = string(data) + return nil +} + +func (a *ArrayStruct) GobEncode() ([]byte, error) { + return a.a[:], nil +} + +func (a *ArrayStruct) GobDecode(data []byte) error { + if len(data) != len(a.a) { + return errors.New("wrong length in array decode") + } + copy(a.a[:], data) + return nil +} + +func (g *Gobber) GobEncode() ([]byte, error) { + return []byte(fmt.Sprintf("VALUE=%d", *g)), nil +} + +func (g *Gobber) GobDecode(data []byte) error { + _, err := fmt.Sscanf(string(data), "VALUE=%d", (*int)(g)) + return err +} + +func (v ValueGobber) GobEncode() ([]byte, error) { + return []byte(fmt.Sprintf("VALUE=%s", v)), nil +} + +func (v *ValueGobber) GobDecode(data []byte) error { + _, err := fmt.Sscanf(string(data), "VALUE=%s", (*string)(v)) + return err +} + +// Structs that include GobEncodable fields. + +type GobTest0 struct { + X int // guarantee we have something in common with GobTest* + G *ByteStruct +} + +type GobTest1 struct { + X int // guarantee we have something in common with GobTest* + G *StringStruct +} + +type GobTest2 struct { + X int // guarantee we have something in common with GobTest* + G string // not a GobEncoder - should give us errors +} + +type GobTest3 struct { + X int // guarantee we have something in common with GobTest* + G *Gobber +} + +type GobTest4 struct { + X int // guarantee we have something in common with GobTest* + V ValueGobber +} + +type GobTest5 struct { + X int // guarantee we have something in common with GobTest* + V *ValueGobber +} + +type GobTestIgnoreEncoder struct { + X int // guarantee we have something in common with GobTest* +} + +type GobTestValueEncDec struct { + X int // guarantee we have something in common with GobTest* + G StringStruct // not a pointer. +} + +type GobTestIndirectEncDec struct { + X int // guarantee we have something in common with GobTest* + G ***StringStruct // indirections to the receiver. +} + +type GobTestArrayEncDec struct { + X int // guarantee we have something in common with GobTest* + A ArrayStruct // not a pointer. +} + +type GobTestIndirectArrayEncDec struct { + X int // guarantee we have something in common with GobTest* + A ***ArrayStruct // indirections to a large receiver. +} + +func TestGobEncoderField(t *testing.T) { + b := new(bytes.Buffer) + // First a field that's a structure. + enc := NewEncoder(b) + err := enc.Encode(GobTest0{17, &ByteStruct{'A'}}) + if err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + x := new(GobTest0) + err = dec.Decode(x) + if err != nil { + t.Fatal("decode error:", err) + } + if x.G.a != 'A' { + t.Errorf("expected 'A' got %c", x.G.a) + } + // Now a field that's not a structure. + b.Reset() + gobber := Gobber(23) + err = enc.Encode(GobTest3{17, &gobber}) + if err != nil { + t.Fatal("encode error:", err) + } + y := new(GobTest3) + err = dec.Decode(y) + if err != nil { + t.Fatal("decode error:", err) + } + if *y.G != 23 { + t.Errorf("expected '23 got %d", *y.G) + } +} + +// Even though the field is a value, we can still take its address +// and should be able to call the methods. +func TestGobEncoderValueField(t *testing.T) { + b := new(bytes.Buffer) + // First a field that's a structure. + enc := NewEncoder(b) + err := enc.Encode(GobTestValueEncDec{17, StringStruct{"HIJKL"}}) + if err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + x := new(GobTestValueEncDec) + err = dec.Decode(x) + if err != nil { + t.Fatal("decode error:", err) + } + if x.G.s != "HIJKL" { + t.Errorf("expected `HIJKL` got %s", x.G.s) + } +} + +// GobEncode/Decode should work even if the value is +// more indirect than the receiver. +func TestGobEncoderIndirectField(t *testing.T) { + b := new(bytes.Buffer) + // First a field that's a structure. + enc := NewEncoder(b) + s := &StringStruct{"HIJKL"} + sp := &s + err := enc.Encode(GobTestIndirectEncDec{17, &sp}) + if err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + x := new(GobTestIndirectEncDec) + err = dec.Decode(x) + if err != nil { + t.Fatal("decode error:", err) + } + if (***x.G).s != "HIJKL" { + t.Errorf("expected `HIJKL` got %s", (***x.G).s) + } +} + +// Test with a large field with methods. +func TestGobEncoderArrayField(t *testing.T) { + b := new(bytes.Buffer) + enc := NewEncoder(b) + var a GobTestArrayEncDec + a.X = 17 + for i := range a.A.a { + a.A.a[i] = byte(i) + } + err := enc.Encode(a) + if err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + x := new(GobTestArrayEncDec) + err = dec.Decode(x) + if err != nil { + t.Fatal("decode error:", err) + } + for i, v := range x.A.a { + if v != byte(i) { + t.Errorf("expected %x got %x", byte(i), v) + break + } + } +} + +// Test an indirection to a large field with methods. +func TestGobEncoderIndirectArrayField(t *testing.T) { + b := new(bytes.Buffer) + enc := NewEncoder(b) + var a GobTestIndirectArrayEncDec + a.X = 17 + var array ArrayStruct + ap := &array + app := &ap + a.A = &app + for i := range array.a { + array.a[i] = byte(i) + } + err := enc.Encode(a) + if err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + x := new(GobTestIndirectArrayEncDec) + err = dec.Decode(x) + if err != nil { + t.Fatal("decode error:", err) + } + for i, v := range (***x.A).a { + if v != byte(i) { + t.Errorf("expected %x got %x", byte(i), v) + break + } + } +} + +// As long as the fields have the same name and implement the +// interface, we can cross-connect them. Not sure it's useful +// and may even be bad but it works and it's hard to prevent +// without exposing the contents of the object, which would +// defeat the purpose. +func TestGobEncoderFieldsOfDifferentType(t *testing.T) { + // first, string in field to byte in field + b := new(bytes.Buffer) + enc := NewEncoder(b) + err := enc.Encode(GobTest1{17, &StringStruct{"ABC"}}) + if err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + x := new(GobTest0) + err = dec.Decode(x) + if err != nil { + t.Fatal("decode error:", err) + } + if x.G.a != 'A' { + t.Errorf("expected 'A' got %c", x.G.a) + } + // now the other direction, byte in field to string in field + b.Reset() + err = enc.Encode(GobTest0{17, &ByteStruct{'X'}}) + if err != nil { + t.Fatal("encode error:", err) + } + y := new(GobTest1) + err = dec.Decode(y) + if err != nil { + t.Fatal("decode error:", err) + } + if y.G.s != "XYZ" { + t.Fatalf("expected `XYZ` got %c", y.G.s) + } +} + +// Test that we can encode a value and decode into a pointer. +func TestGobEncoderValueEncoder(t *testing.T) { + // first, string in field to byte in field + b := new(bytes.Buffer) + enc := NewEncoder(b) + err := enc.Encode(GobTest4{17, ValueGobber("hello")}) + if err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + x := new(GobTest5) + err = dec.Decode(x) + if err != nil { + t.Fatal("decode error:", err) + } + if *x.V != "hello" { + t.Errorf("expected `hello` got %s", x.V) + } +} + +func TestGobEncoderFieldTypeError(t *testing.T) { + // GobEncoder to non-decoder: error + b := new(bytes.Buffer) + enc := NewEncoder(b) + err := enc.Encode(GobTest1{17, &StringStruct{"ABC"}}) + if err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + x := &GobTest2{} + err = dec.Decode(x) + if err == nil { + t.Fatal("expected decode error for mismatched fields (encoder to non-decoder)") + } + if strings.Index(err.Error(), "type") < 0 { + t.Fatal("expected type error; got", err) + } + // Non-encoder to GobDecoder: error + b.Reset() + err = enc.Encode(GobTest2{17, "ABC"}) + if err != nil { + t.Fatal("encode error:", err) + } + y := &GobTest1{} + err = dec.Decode(y) + if err == nil { + t.Fatal("expected decode error for mismatched fields (non-encoder to decoder)") + } + if strings.Index(err.Error(), "type") < 0 { + t.Fatal("expected type error; got", err) + } +} + +// Even though ByteStruct is a struct, it's treated as a singleton at the top level. +func TestGobEncoderStructSingleton(t *testing.T) { + b := new(bytes.Buffer) + enc := NewEncoder(b) + err := enc.Encode(&ByteStruct{'A'}) + if err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + x := new(ByteStruct) + err = dec.Decode(x) + if err != nil { + t.Fatal("decode error:", err) + } + if x.a != 'A' { + t.Errorf("expected 'A' got %c", x.a) + } +} + +func TestGobEncoderNonStructSingleton(t *testing.T) { + b := new(bytes.Buffer) + enc := NewEncoder(b) + err := enc.Encode(Gobber(1234)) + if err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + var x Gobber + err = dec.Decode(&x) + if err != nil { + t.Fatal("decode error:", err) + } + if x != 1234 { + t.Errorf("expected 1234 got %d", x) + } +} + +func TestGobEncoderIgnoreStructField(t *testing.T) { + b := new(bytes.Buffer) + // First a field that's a structure. + enc := NewEncoder(b) + err := enc.Encode(GobTest0{17, &ByteStruct{'A'}}) + if err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + x := new(GobTestIgnoreEncoder) + err = dec.Decode(x) + if err != nil { + t.Fatal("decode error:", err) + } + if x.X != 17 { + t.Errorf("expected 17 got %c", x.X) + } +} + +func TestGobEncoderIgnoreNonStructField(t *testing.T) { + b := new(bytes.Buffer) + // First a field that's a structure. + enc := NewEncoder(b) + gobber := Gobber(23) + err := enc.Encode(GobTest3{17, &gobber}) + if err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + x := new(GobTestIgnoreEncoder) + err = dec.Decode(x) + if err != nil { + t.Fatal("decode error:", err) + } + if x.X != 17 { + t.Errorf("expected 17 got %c", x.X) + } +} + +func TestGobEncoderIgnoreNilEncoder(t *testing.T) { + b := new(bytes.Buffer) + // First a field that's a structure. + enc := NewEncoder(b) + err := enc.Encode(GobTest0{X: 18}) // G is nil + if err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + x := new(GobTest0) + err = dec.Decode(x) + if err != nil { + t.Fatal("decode error:", err) + } + if x.X != 18 { + t.Errorf("expected x.X = 18, got %v", x.X) + } + if x.G != nil { + t.Errorf("expected x.G = nil, got %v", x.G) + } +} + +type gobDecoderBug0 struct { + foo, bar string +} + +func (br *gobDecoderBug0) String() string { + return br.foo + "-" + br.bar +} + +func (br *gobDecoderBug0) GobEncode() ([]byte, error) { + return []byte(br.String()), nil +} + +func (br *gobDecoderBug0) GobDecode(b []byte) error { + br.foo = "foo" + br.bar = "bar" + return nil +} + +// This was a bug: the receiver has a different indirection level +// than the variable. +func TestGobEncoderExtraIndirect(t *testing.T) { + gdb := &gobDecoderBug0{"foo", "bar"} + buf := new(bytes.Buffer) + e := NewEncoder(buf) + if err := e.Encode(gdb); err != nil { + t.Fatalf("encode: %v", err) + } + d := NewDecoder(buf) + var got *gobDecoderBug0 + if err := d.Decode(&got); err != nil { + t.Fatalf("decode: %v", err) + } + if got.foo != gdb.foo || got.bar != gdb.bar { + t.Errorf("got = %q, want %q", got, gdb) + } +} diff --git a/libgo/go/encoding/gob/timing_test.go b/libgo/go/encoding/gob/timing_test.go new file mode 100644 index 0000000..47437a6 --- /dev/null +++ b/libgo/go/encoding/gob/timing_test.go @@ -0,0 +1,94 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gob + +import ( + "bytes" + "fmt" + "io" + "os" + "runtime" + "testing" +) + +type Bench struct { + A int + B float64 + C string + D []byte +} + +func benchmarkEndToEnd(r io.Reader, w io.Writer, b *testing.B) { + b.StopTimer() + enc := NewEncoder(w) + dec := NewDecoder(r) + bench := &Bench{7, 3.2, "now is the time", []byte("for all good men")} + b.StartTimer() + for i := 0; i < b.N; i++ { + if enc.Encode(bench) != nil { + panic("encode error") + } + if dec.Decode(bench) != nil { + panic("decode error") + } + } +} + +func BenchmarkEndToEndPipe(b *testing.B) { + r, w, err := os.Pipe() + if err != nil { + panic("can't get pipe:" + err.Error()) + } + benchmarkEndToEnd(r, w, b) +} + +func BenchmarkEndToEndByteBuffer(b *testing.B) { + var buf bytes.Buffer + benchmarkEndToEnd(&buf, &buf, b) +} + +func TestCountEncodeMallocs(t *testing.T) { + var buf bytes.Buffer + enc := NewEncoder(&buf) + bench := &Bench{7, 3.2, "now is the time", []byte("for all good men")} + runtime.UpdateMemStats() + mallocs := 0 - runtime.MemStats.Mallocs + const count = 1000 + for i := 0; i < count; i++ { + err := enc.Encode(bench) + if err != nil { + t.Fatal("encode:", err) + } + } + runtime.UpdateMemStats() + mallocs += runtime.MemStats.Mallocs + fmt.Printf("mallocs per encode of type Bench: %d\n", mallocs/count) +} + +func TestCountDecodeMallocs(t *testing.T) { + var buf bytes.Buffer + enc := NewEncoder(&buf) + bench := &Bench{7, 3.2, "now is the time", []byte("for all good men")} + const count = 1000 + for i := 0; i < count; i++ { + err := enc.Encode(bench) + if err != nil { + t.Fatal("encode:", err) + } + } + dec := NewDecoder(&buf) + runtime.UpdateMemStats() + mallocs := 0 - runtime.MemStats.Mallocs + for i := 0; i < count; i++ { + *bench = Bench{} + err := dec.Decode(&bench) + if err != nil { + t.Fatal("decode:", err) + } + } + runtime.UpdateMemStats() + mallocs += runtime.MemStats.Mallocs + fmt.Printf("mallocs per decode of type Bench: %d\n", mallocs/count) +} diff --git a/libgo/go/encoding/gob/type.go b/libgo/go/encoding/gob/type.go new file mode 100644 index 0000000..1b20843 --- /dev/null +++ b/libgo/go/encoding/gob/type.go @@ -0,0 +1,788 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gob + +import ( + "errors" + "fmt" + "os" + "reflect" + "sync" + "unicode" + "unicode/utf8" +) + +// userTypeInfo stores the information associated with a type the user has handed +// to the package. It's computed once and stored in a map keyed by reflection +// type. +type userTypeInfo struct { + user reflect.Type // the type the user handed us + base reflect.Type // the base type after all indirections + indir int // number of indirections to reach the base type + isGobEncoder bool // does the type implement GobEncoder? + isGobDecoder bool // does the type implement GobDecoder? + encIndir int8 // number of indirections to reach the receiver type; may be negative + decIndir int8 // number of indirections to reach the receiver type; may be negative +} + +var ( + // Protected by an RWMutex because we read it a lot and write + // it only when we see a new type, typically when compiling. + userTypeLock sync.RWMutex + userTypeCache = make(map[reflect.Type]*userTypeInfo) +) + +// validType returns, and saves, the information associated with user-provided type rt. +// If the user type is not valid, err will be non-nil. To be used when the error handler +// is not set up. +func validUserType(rt reflect.Type) (ut *userTypeInfo, err error) { + userTypeLock.RLock() + ut = userTypeCache[rt] + userTypeLock.RUnlock() + if ut != nil { + return + } + // Now set the value under the write lock. + userTypeLock.Lock() + defer userTypeLock.Unlock() + if ut = userTypeCache[rt]; ut != nil { + // Lost the race; not a problem. + return + } + ut = new(userTypeInfo) + ut.base = rt + ut.user = rt + // A type that is just a cycle of pointers (such as type T *T) cannot + // be represented in gobs, which need some concrete data. We use a + // cycle detection algorithm from Knuth, Vol 2, Section 3.1, Ex 6, + // pp 539-540. As we step through indirections, run another type at + // half speed. If they meet up, there's a cycle. + slowpoke := ut.base // walks half as fast as ut.base + for { + pt := ut.base + if pt.Kind() != reflect.Ptr { + break + } + ut.base = pt.Elem() + if ut.base == slowpoke { // ut.base lapped slowpoke + // recursive pointer type. + return nil, errors.New("can't represent recursive pointer type " + ut.base.String()) + } + if ut.indir%2 == 0 { + slowpoke = slowpoke.Elem() + } + ut.indir++ + } + ut.isGobEncoder, ut.encIndir = implementsInterface(ut.user, gobEncoderInterfaceType) + ut.isGobDecoder, ut.decIndir = implementsInterface(ut.user, gobDecoderInterfaceType) + userTypeCache[rt] = ut + return +} + +var ( + gobEncoderInterfaceType = reflect.TypeOf((*GobEncoder)(nil)).Elem() + gobDecoderInterfaceType = reflect.TypeOf((*GobDecoder)(nil)).Elem() +) + +// implementsInterface reports whether the type implements the +// gobEncoder/gobDecoder interface. +// It also returns the number of indirections required to get to the +// implementation. +func implementsInterface(typ, gobEncDecType reflect.Type) (success bool, indir int8) { + if typ == nil { + return + } + rt := typ + // The type might be a pointer and we need to keep + // dereferencing to the base type until we find an implementation. + for { + if rt.Implements(gobEncDecType) { + return true, indir + } + if p := rt; p.Kind() == reflect.Ptr { + indir++ + if indir > 100 { // insane number of indirections + return false, 0 + } + rt = p.Elem() + continue + } + break + } + // No luck yet, but if this is a base type (non-pointer), the pointer might satisfy. + if typ.Kind() != reflect.Ptr { + // Not a pointer, but does the pointer work? + if reflect.PtrTo(typ).Implements(gobEncDecType) { + return true, -1 + } + } + return false, 0 +} + +// userType returns, and saves, the information associated with user-provided type rt. +// If the user type is not valid, it calls error. +func userType(rt reflect.Type) *userTypeInfo { + ut, err := validUserType(rt) + if err != nil { + error_(err) + } + return ut +} +// A typeId represents a gob Type as an integer that can be passed on the wire. +// Internally, typeIds are used as keys to a map to recover the underlying type info. +type typeId int32 + +var nextId typeId // incremented for each new type we build +var typeLock sync.Mutex // set while building a type +const firstUserId = 64 // lowest id number granted to user + +type gobType interface { + id() typeId + setId(id typeId) + name() string + string() string // not public; only for debugging + safeString(seen map[typeId]bool) string +} + +var types = make(map[reflect.Type]gobType) +var idToType = make(map[typeId]gobType) +var builtinIdToType map[typeId]gobType // set in init() after builtins are established + +func setTypeId(typ gobType) { + nextId++ + typ.setId(nextId) + idToType[nextId] = typ +} + +func (t typeId) gobType() gobType { + if t == 0 { + return nil + } + return idToType[t] +} + +// string returns the string representation of the type associated with the typeId. +func (t typeId) string() string { + if t.gobType() == nil { + return "<nil>" + } + return t.gobType().string() +} + +// Name returns the name of the type associated with the typeId. +func (t typeId) name() string { + if t.gobType() == nil { + return "<nil>" + } + return t.gobType().name() +} + +// Common elements of all types. +type CommonType struct { + Name string + Id typeId +} + +func (t *CommonType) id() typeId { return t.Id } + +func (t *CommonType) setId(id typeId) { t.Id = id } + +func (t *CommonType) string() string { return t.Name } + +func (t *CommonType) safeString(seen map[typeId]bool) string { + return t.Name +} + +func (t *CommonType) name() string { return t.Name } + +// Create and check predefined types +// The string for tBytes is "bytes" not "[]byte" to signify its specialness. + +var ( + // Primordial types, needed during initialization. + // Always passed as pointers so the interface{} type + // goes through without losing its interfaceness. + tBool = bootstrapType("bool", (*bool)(nil), 1) + tInt = bootstrapType("int", (*int)(nil), 2) + tUint = bootstrapType("uint", (*uint)(nil), 3) + tFloat = bootstrapType("float", (*float64)(nil), 4) + tBytes = bootstrapType("bytes", (*[]byte)(nil), 5) + tString = bootstrapType("string", (*string)(nil), 6) + tComplex = bootstrapType("complex", (*complex128)(nil), 7) + tInterface = bootstrapType("interface", (*interface{})(nil), 8) + // Reserve some Ids for compatible expansion + tReserved7 = bootstrapType("_reserved1", (*struct{ r7 int })(nil), 9) + tReserved6 = bootstrapType("_reserved1", (*struct{ r6 int })(nil), 10) + tReserved5 = bootstrapType("_reserved1", (*struct{ r5 int })(nil), 11) + tReserved4 = bootstrapType("_reserved1", (*struct{ r4 int })(nil), 12) + tReserved3 = bootstrapType("_reserved1", (*struct{ r3 int })(nil), 13) + tReserved2 = bootstrapType("_reserved1", (*struct{ r2 int })(nil), 14) + tReserved1 = bootstrapType("_reserved1", (*struct{ r1 int })(nil), 15) +) + +// Predefined because it's needed by the Decoder +var tWireType = mustGetTypeInfo(reflect.TypeOf(wireType{})).id +var wireTypeUserInfo *userTypeInfo // userTypeInfo of (*wireType) + +func init() { + // Some magic numbers to make sure there are no surprises. + checkId(16, tWireType) + checkId(17, mustGetTypeInfo(reflect.TypeOf(arrayType{})).id) + checkId(18, mustGetTypeInfo(reflect.TypeOf(CommonType{})).id) + checkId(19, mustGetTypeInfo(reflect.TypeOf(sliceType{})).id) + checkId(20, mustGetTypeInfo(reflect.TypeOf(structType{})).id) + checkId(21, mustGetTypeInfo(reflect.TypeOf(fieldType{})).id) + checkId(23, mustGetTypeInfo(reflect.TypeOf(mapType{})).id) + + builtinIdToType = make(map[typeId]gobType) + for k, v := range idToType { + builtinIdToType[k] = v + } + + // Move the id space upwards to allow for growth in the predefined world + // without breaking existing files. + if nextId > firstUserId { + panic(fmt.Sprintln("nextId too large:", nextId)) + } + nextId = firstUserId + registerBasics() + wireTypeUserInfo = userType(reflect.TypeOf((*wireType)(nil))) +} + +// Array type +type arrayType struct { + CommonType + Elem typeId + Len int +} + +func newArrayType(name string) *arrayType { + a := &arrayType{CommonType{Name: name}, 0, 0} + return a +} + +func (a *arrayType) init(elem gobType, len int) { + // Set our type id before evaluating the element's, in case it's our own. + setTypeId(a) + a.Elem = elem.id() + a.Len = len +} + +func (a *arrayType) safeString(seen map[typeId]bool) string { + if seen[a.Id] { + return a.Name + } + seen[a.Id] = true + return fmt.Sprintf("[%d]%s", a.Len, a.Elem.gobType().safeString(seen)) +} + +func (a *arrayType) string() string { return a.safeString(make(map[typeId]bool)) } + +// GobEncoder type (something that implements the GobEncoder interface) +type gobEncoderType struct { + CommonType +} + +func newGobEncoderType(name string) *gobEncoderType { + g := &gobEncoderType{CommonType{Name: name}} + setTypeId(g) + return g +} + +func (g *gobEncoderType) safeString(seen map[typeId]bool) string { + return g.Name +} + +func (g *gobEncoderType) string() string { return g.Name } + +// Map type +type mapType struct { + CommonType + Key typeId + Elem typeId +} + +func newMapType(name string) *mapType { + m := &mapType{CommonType{Name: name}, 0, 0} + return m +} + +func (m *mapType) init(key, elem gobType) { + // Set our type id before evaluating the element's, in case it's our own. + setTypeId(m) + m.Key = key.id() + m.Elem = elem.id() +} + +func (m *mapType) safeString(seen map[typeId]bool) string { + if seen[m.Id] { + return m.Name + } + seen[m.Id] = true + key := m.Key.gobType().safeString(seen) + elem := m.Elem.gobType().safeString(seen) + return fmt.Sprintf("map[%s]%s", key, elem) +} + +func (m *mapType) string() string { return m.safeString(make(map[typeId]bool)) } + +// Slice type +type sliceType struct { + CommonType + Elem typeId +} + +func newSliceType(name string) *sliceType { + s := &sliceType{CommonType{Name: name}, 0} + return s +} + +func (s *sliceType) init(elem gobType) { + // Set our type id before evaluating the element's, in case it's our own. + setTypeId(s) + s.Elem = elem.id() +} + +func (s *sliceType) safeString(seen map[typeId]bool) string { + if seen[s.Id] { + return s.Name + } + seen[s.Id] = true + return fmt.Sprintf("[]%s", s.Elem.gobType().safeString(seen)) +} + +func (s *sliceType) string() string { return s.safeString(make(map[typeId]bool)) } + +// Struct type +type fieldType struct { + Name string + Id typeId +} + +type structType struct { + CommonType + Field []*fieldType +} + +func (s *structType) safeString(seen map[typeId]bool) string { + if s == nil { + return "<nil>" + } + if _, ok := seen[s.Id]; ok { + return s.Name + } + seen[s.Id] = true + str := s.Name + " = struct { " + for _, f := range s.Field { + str += fmt.Sprintf("%s %s; ", f.Name, f.Id.gobType().safeString(seen)) + } + str += "}" + return str +} + +func (s *structType) string() string { return s.safeString(make(map[typeId]bool)) } + +func newStructType(name string) *structType { + s := &structType{CommonType{Name: name}, nil} + // For historical reasons we set the id here rather than init. + // See the comment in newTypeObject for details. + setTypeId(s) + return s +} + +// newTypeObject allocates a gobType for the reflection type rt. +// Unless ut represents a GobEncoder, rt should be the base type +// of ut. +// This is only called from the encoding side. The decoding side +// works through typeIds and userTypeInfos alone. +func newTypeObject(name string, ut *userTypeInfo, rt reflect.Type) (gobType, error) { + // Does this type implement GobEncoder? + if ut.isGobEncoder { + return newGobEncoderType(name), nil + } + var err error + var type0, type1 gobType + defer func() { + if err != nil { + delete(types, rt) + } + }() + // Install the top-level type before the subtypes (e.g. struct before + // fields) so recursive types can be constructed safely. + switch t := rt; t.Kind() { + // All basic types are easy: they are predefined. + case reflect.Bool: + return tBool.gobType(), nil + + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return tInt.gobType(), nil + + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + return tUint.gobType(), nil + + case reflect.Float32, reflect.Float64: + return tFloat.gobType(), nil + + case reflect.Complex64, reflect.Complex128: + return tComplex.gobType(), nil + + case reflect.String: + return tString.gobType(), nil + + case reflect.Interface: + return tInterface.gobType(), nil + + case reflect.Array: + at := newArrayType(name) + types[rt] = at + type0, err = getBaseType("", t.Elem()) + if err != nil { + return nil, err + } + // Historical aside: + // For arrays, maps, and slices, we set the type id after the elements + // are constructed. This is to retain the order of type id allocation after + // a fix made to handle recursive types, which changed the order in + // which types are built. Delaying the setting in this way preserves + // type ids while allowing recursive types to be described. Structs, + // done below, were already handling recursion correctly so they + // assign the top-level id before those of the field. + at.init(type0, t.Len()) + return at, nil + + case reflect.Map: + mt := newMapType(name) + types[rt] = mt + type0, err = getBaseType("", t.Key()) + if err != nil { + return nil, err + } + type1, err = getBaseType("", t.Elem()) + if err != nil { + return nil, err + } + mt.init(type0, type1) + return mt, nil + + case reflect.Slice: + // []byte == []uint8 is a special case + if t.Elem().Kind() == reflect.Uint8 { + return tBytes.gobType(), nil + } + st := newSliceType(name) + types[rt] = st + type0, err = getBaseType(t.Elem().Name(), t.Elem()) + if err != nil { + return nil, err + } + st.init(type0) + return st, nil + + case reflect.Struct: + st := newStructType(name) + types[rt] = st + idToType[st.id()] = st + for i := 0; i < t.NumField(); i++ { + f := t.Field(i) + if !isExported(f.Name) { + continue + } + typ := userType(f.Type).base + tname := typ.Name() + if tname == "" { + t := userType(f.Type).base + tname = t.String() + } + gt, err := getBaseType(tname, f.Type) + if err != nil { + return nil, err + } + st.Field = append(st.Field, &fieldType{f.Name, gt.id()}) + } + return st, nil + + default: + return nil, errors.New("gob NewTypeObject can't handle type: " + rt.String()) + } + return nil, nil +} + +// isExported reports whether this is an exported - upper case - name. +func isExported(name string) bool { + rune, _ := utf8.DecodeRuneInString(name) + return unicode.IsUpper(rune) +} + +// getBaseType returns the Gob type describing the given reflect.Type's base type. +// typeLock must be held. +func getBaseType(name string, rt reflect.Type) (gobType, error) { + ut := userType(rt) + return getType(name, ut, ut.base) +} + +// getType returns the Gob type describing the given reflect.Type. +// Should be called only when handling GobEncoders/Decoders, +// which may be pointers. All other types are handled through the +// base type, never a pointer. +// typeLock must be held. +func getType(name string, ut *userTypeInfo, rt reflect.Type) (gobType, error) { + typ, present := types[rt] + if present { + return typ, nil + } + typ, err := newTypeObject(name, ut, rt) + if err == nil { + types[rt] = typ + } + return typ, err +} + +func checkId(want, got typeId) { + if want != got { + fmt.Fprintf(os.Stderr, "checkId: %d should be %d\n", int(got), int(want)) + panic("bootstrap type wrong id: " + got.name() + " " + got.string() + " not " + want.string()) + } +} + +// used for building the basic types; called only from init(). the incoming +// interface always refers to a pointer. +func bootstrapType(name string, e interface{}, expect typeId) typeId { + rt := reflect.TypeOf(e).Elem() + _, present := types[rt] + if present { + panic("bootstrap type already present: " + name + ", " + rt.String()) + } + typ := &CommonType{Name: name} + types[rt] = typ + setTypeId(typ) + checkId(expect, nextId) + userType(rt) // might as well cache it now + return nextId +} + +// Representation of the information we send and receive about this type. +// Each value we send is preceded by its type definition: an encoded int. +// However, the very first time we send the value, we first send the pair +// (-id, wireType). +// For bootstrapping purposes, we assume that the recipient knows how +// to decode a wireType; it is exactly the wireType struct here, interpreted +// using the gob rules for sending a structure, except that we assume the +// ids for wireType and structType etc. are known. The relevant pieces +// are built in encode.go's init() function. +// To maintain binary compatibility, if you extend this type, always put +// the new fields last. +type wireType struct { + ArrayT *arrayType + SliceT *sliceType + StructT *structType + MapT *mapType + GobEncoderT *gobEncoderType +} + +func (w *wireType) string() string { + const unknown = "unknown type" + if w == nil { + return unknown + } + switch { + case w.ArrayT != nil: + return w.ArrayT.Name + case w.SliceT != nil: + return w.SliceT.Name + case w.StructT != nil: + return w.StructT.Name + case w.MapT != nil: + return w.MapT.Name + case w.GobEncoderT != nil: + return w.GobEncoderT.Name + } + return unknown +} + +type typeInfo struct { + id typeId + encoder *encEngine + wire *wireType +} + +var typeInfoMap = make(map[reflect.Type]*typeInfo) // protected by typeLock + +// typeLock must be held. +func getTypeInfo(ut *userTypeInfo) (*typeInfo, error) { + rt := ut.base + if ut.isGobEncoder { + // We want the user type, not the base type. + rt = ut.user + } + info, ok := typeInfoMap[rt] + if ok { + return info, nil + } + info = new(typeInfo) + gt, err := getBaseType(rt.Name(), rt) + if err != nil { + return nil, err + } + info.id = gt.id() + + if ut.isGobEncoder { + userType, err := getType(rt.Name(), ut, rt) + if err != nil { + return nil, err + } + info.wire = &wireType{GobEncoderT: userType.id().gobType().(*gobEncoderType)} + typeInfoMap[ut.user] = info + return info, nil + } + + t := info.id.gobType() + switch typ := rt; typ.Kind() { + case reflect.Array: + info.wire = &wireType{ArrayT: t.(*arrayType)} + case reflect.Map: + info.wire = &wireType{MapT: t.(*mapType)} + case reflect.Slice: + // []byte == []uint8 is a special case handled separately + if typ.Elem().Kind() != reflect.Uint8 { + info.wire = &wireType{SliceT: t.(*sliceType)} + } + case reflect.Struct: + info.wire = &wireType{StructT: t.(*structType)} + } + typeInfoMap[rt] = info + return info, nil +} + +// Called only when a panic is acceptable and unexpected. +func mustGetTypeInfo(rt reflect.Type) *typeInfo { + t, err := getTypeInfo(userType(rt)) + if err != nil { + panic("getTypeInfo: " + err.Error()) + } + return t +} + +// GobEncoder is the interface describing data that provides its own +// representation for encoding values for transmission to a GobDecoder. +// A type that implements GobEncoder and GobDecoder has complete +// control over the representation of its data and may therefore +// contain things such as private fields, channels, and functions, +// which are not usually transmissible in gob streams. +// +// Note: Since gobs can be stored permanently, It is good design +// to guarantee the encoding used by a GobEncoder is stable as the +// software evolves. For instance, it might make sense for GobEncode +// to include a version number in the encoding. +type GobEncoder interface { + // GobEncode returns a byte slice representing the encoding of the + // receiver for transmission to a GobDecoder, usually of the same + // concrete type. + GobEncode() ([]byte, error) +} + +// GobDecoder is the interface describing data that provides its own +// routine for decoding transmitted values sent by a GobEncoder. +type GobDecoder interface { + // GobDecode overwrites the receiver, which must be a pointer, + // with the value represented by the byte slice, which was written + // by GobEncode, usually for the same concrete type. + GobDecode([]byte) error +} + +var ( + nameToConcreteType = make(map[string]reflect.Type) + concreteTypeToName = make(map[reflect.Type]string) +) + +// RegisterName is like Register but uses the provided name rather than the +// type's default. +func RegisterName(name string, value interface{}) { + if name == "" { + // reserved for nil + panic("attempt to register empty name") + } + ut := userType(reflect.TypeOf(value)) + // Check for incompatible duplicates. The name must refer to the + // same user type, and vice versa. + if t, ok := nameToConcreteType[name]; ok && t != ut.user { + panic(fmt.Sprintf("gob: registering duplicate types for %q: %s != %s", name, t, ut.user)) + } + if n, ok := concreteTypeToName[ut.base]; ok && n != name { + panic(fmt.Sprintf("gob: registering duplicate names for %s: %q != %q", ut.user, n, name)) + } + // Store the name and type provided by the user.... + nameToConcreteType[name] = reflect.TypeOf(value) + // but the flattened type in the type table, since that's what decode needs. + concreteTypeToName[ut.base] = name +} + +// Register records a type, identified by a value for that type, under its +// internal type name. That name will identify the concrete type of a value +// sent or received as an interface variable. Only types that will be +// transferred as implementations of interface values need to be registered. +// Expecting to be used only during initialization, it panics if the mapping +// between types and names is not a bijection. +func Register(value interface{}) { + // Default to printed representation for unnamed types + rt := reflect.TypeOf(value) + name := rt.String() + + // But for named types (or pointers to them), qualify with import path. + // Dereference one pointer looking for a named type. + star := "" + if rt.Name() == "" { + if pt := rt; pt.Kind() == reflect.Ptr { + star = "*" + rt = pt + } + } + if rt.Name() != "" { + if rt.PkgPath() == "" { + name = star + rt.Name() + } else { + name = star + rt.PkgPath() + "." + rt.Name() + } + } + + RegisterName(name, value) +} + +func registerBasics() { + Register(int(0)) + Register(int8(0)) + Register(int16(0)) + Register(int32(0)) + Register(int64(0)) + Register(uint(0)) + Register(uint8(0)) + Register(uint16(0)) + Register(uint32(0)) + Register(uint64(0)) + Register(float32(0)) + Register(float64(0)) + Register(complex64(0i)) + Register(complex128(0i)) + Register(uintptr(0)) + Register(false) + Register("") + Register([]byte(nil)) + Register([]int(nil)) + Register([]int8(nil)) + Register([]int16(nil)) + Register([]int32(nil)) + Register([]int64(nil)) + Register([]uint(nil)) + Register([]uint8(nil)) + Register([]uint16(nil)) + Register([]uint32(nil)) + Register([]uint64(nil)) + Register([]float32(nil)) + Register([]float64(nil)) + Register([]complex64(nil)) + Register([]complex128(nil)) + Register([]uintptr(nil)) + Register([]bool(nil)) + Register([]string(nil)) +} diff --git a/libgo/go/encoding/gob/type_test.go b/libgo/go/encoding/gob/type_test.go new file mode 100644 index 0000000..42bdb4c --- /dev/null +++ b/libgo/go/encoding/gob/type_test.go @@ -0,0 +1,161 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gob + +import ( + "reflect" + "testing" +) + +type typeT struct { + id typeId + str string +} + +var basicTypes = []typeT{ + {tBool, "bool"}, + {tInt, "int"}, + {tUint, "uint"}, + {tFloat, "float"}, + {tBytes, "bytes"}, + {tString, "string"}, +} + +func getTypeUnlocked(name string, rt reflect.Type) gobType { + typeLock.Lock() + defer typeLock.Unlock() + t, err := getBaseType(name, rt) + if err != nil { + panic("getTypeUnlocked: " + err.Error()) + } + return t +} + +// Sanity checks +func TestBasic(t *testing.T) { + for _, tt := range basicTypes { + if tt.id.string() != tt.str { + t.Errorf("checkType: expected %q got %s", tt.str, tt.id.string()) + } + if tt.id == 0 { + t.Errorf("id for %q is zero", tt.str) + } + } +} + +// Reregister some basic types to check registration is idempotent. +func TestReregistration(t *testing.T) { + newtyp := getTypeUnlocked("int", reflect.TypeOf(int(0))) + if newtyp != tInt.gobType() { + t.Errorf("reregistration of %s got new type", newtyp.string()) + } + newtyp = getTypeUnlocked("uint", reflect.TypeOf(uint(0))) + if newtyp != tUint.gobType() { + t.Errorf("reregistration of %s got new type", newtyp.string()) + } + newtyp = getTypeUnlocked("string", reflect.TypeOf("hello")) + if newtyp != tString.gobType() { + t.Errorf("reregistration of %s got new type", newtyp.string()) + } +} + +func TestArrayType(t *testing.T) { + var a3 [3]int + a3int := getTypeUnlocked("foo", reflect.TypeOf(a3)) + newa3int := getTypeUnlocked("bar", reflect.TypeOf(a3)) + if a3int != newa3int { + t.Errorf("second registration of [3]int creates new type") + } + var a4 [4]int + a4int := getTypeUnlocked("goo", reflect.TypeOf(a4)) + if a3int == a4int { + t.Errorf("registration of [3]int creates same type as [4]int") + } + var b3 [3]bool + a3bool := getTypeUnlocked("", reflect.TypeOf(b3)) + if a3int == a3bool { + t.Errorf("registration of [3]bool creates same type as [3]int") + } + str := a3bool.string() + expected := "[3]bool" + if str != expected { + t.Errorf("array printed as %q; expected %q", str, expected) + } +} + +func TestSliceType(t *testing.T) { + var s []int + sint := getTypeUnlocked("slice", reflect.TypeOf(s)) + var news []int + newsint := getTypeUnlocked("slice1", reflect.TypeOf(news)) + if sint != newsint { + t.Errorf("second registration of []int creates new type") + } + var b []bool + sbool := getTypeUnlocked("", reflect.TypeOf(b)) + if sbool == sint { + t.Errorf("registration of []bool creates same type as []int") + } + str := sbool.string() + expected := "[]bool" + if str != expected { + t.Errorf("slice printed as %q; expected %q", str, expected) + } +} + +func TestMapType(t *testing.T) { + var m map[string]int + mapStringInt := getTypeUnlocked("map", reflect.TypeOf(m)) + var newm map[string]int + newMapStringInt := getTypeUnlocked("map1", reflect.TypeOf(newm)) + if mapStringInt != newMapStringInt { + t.Errorf("second registration of map[string]int creates new type") + } + var b map[string]bool + mapStringBool := getTypeUnlocked("", reflect.TypeOf(b)) + if mapStringBool == mapStringInt { + t.Errorf("registration of map[string]bool creates same type as map[string]int") + } + str := mapStringBool.string() + expected := "map[string]bool" + if str != expected { + t.Errorf("map printed as %q; expected %q", str, expected) + } +} + +type Bar struct { + X string +} + +// This structure has pointers and refers to itself, making it a good test case. +type Foo struct { + A int + B int32 // will become int + C string + D []byte + E *float64 // will become float64 + F ****float64 // will become float64 + G *Bar + H *Bar // should not interpolate the definition of Bar again + I *Foo // will not explode +} + +func TestStructType(t *testing.T) { + sstruct := getTypeUnlocked("Foo", reflect.TypeOf(Foo{})) + str := sstruct.string() + // If we can print it correctly, we built it correctly. + expected := "Foo = struct { A int; B int; C string; D bytes; E float; F float; G Bar = struct { X string; }; H Bar; I Foo; }" + if str != expected { + t.Errorf("struct printed as %q; expected %q", str, expected) + } +} + +// Should be OK to register the same type multiple times, as long as they're +// at the same level of indirection. +func TestRegistration(t *testing.T) { + type T struct{ a int } + Register(new(T)) + Register(new(T)) +} diff --git a/libgo/go/encoding/json/decode.go b/libgo/go/encoding/json/decode.go new file mode 100644 index 0000000..41295d2 --- /dev/null +++ b/libgo/go/encoding/json/decode.go @@ -0,0 +1,939 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Represents JSON data structure using native Go types: booleans, floats, +// strings, arrays, and maps. + +package json + +import ( + "encoding/base64" + "errors" + "reflect" + "runtime" + "strconv" + "strings" + "unicode" + "unicode/utf16" + "unicode/utf8" +) + +// Unmarshal parses the JSON-encoded data and stores the result +// in the value pointed to by v. +// +// Unmarshal uses the inverse of the encodings that +// Marshal uses, allocating maps, slices, and pointers as necessary, +// with the following additional rules: +// +// To unmarshal JSON into a pointer, Unmarshal first handles the case of +// the JSON being the JSON literal null. In that case, Unmarshal sets +// the pointer to nil. Otherwise, Unmarshal unmarshals the JSON into +// the value pointed at by the pointer. If the pointer is nil, Unmarshal +// allocates a new value for it to point to. +// +// To unmarshal JSON into an interface value, Unmarshal unmarshals +// the JSON into the concrete value contained in the interface value. +// If the interface value is nil, that is, has no concrete value stored in it, +// Unmarshal stores one of these in the interface value: +// +// bool, for JSON booleans +// float64, for JSON numbers +// string, for JSON strings +// []interface{}, for JSON arrays +// map[string]interface{}, for JSON objects +// nil for JSON null +// +// If a JSON value is not appropriate for a given target type, +// or if a JSON number overflows the target type, Unmarshal +// skips that field and completes the unmarshalling as best it can. +// If no more serious errors are encountered, Unmarshal returns +// an UnmarshalTypeError describing the earliest such error. +// +func Unmarshal(data []byte, v interface{}) error { + d := new(decodeState).init(data) + + // Quick check for well-formedness. + // Avoids filling out half a data structure + // before discovering a JSON syntax error. + err := checkValid(data, &d.scan) + if err != nil { + return err + } + + return d.unmarshal(v) +} + +// Unmarshaler is the interface implemented by objects +// that can unmarshal a JSON description of themselves. +// The input can be assumed to be a valid JSON object +// encoding. UnmarshalJSON must copy the JSON data +// if it wishes to retain the data after returning. +type Unmarshaler interface { + UnmarshalJSON([]byte) error +} + +// An UnmarshalTypeError describes a JSON value that was +// not appropriate for a value of a specific Go type. +type UnmarshalTypeError struct { + Value string // description of JSON value - "bool", "array", "number -5" + Type reflect.Type // type of Go value it could not be assigned to +} + +func (e *UnmarshalTypeError) Error() string { + return "json: cannot unmarshal " + e.Value + " into Go value of type " + e.Type.String() +} + +// An UnmarshalFieldError describes a JSON object key that +// led to an unexported (and therefore unwritable) struct field. +type UnmarshalFieldError struct { + Key string + Type reflect.Type + Field reflect.StructField +} + +func (e *UnmarshalFieldError) Error() string { + return "json: cannot unmarshal object key " + strconv.Quote(e.Key) + " into unexported field " + e.Field.Name + " of type " + e.Type.String() +} + +// An InvalidUnmarshalError describes an invalid argument passed to Unmarshal. +// (The argument to Unmarshal must be a non-nil pointer.) +type InvalidUnmarshalError struct { + Type reflect.Type +} + +func (e *InvalidUnmarshalError) Error() string { + if e.Type == nil { + return "json: Unmarshal(nil)" + } + + if e.Type.Kind() != reflect.Ptr { + return "json: Unmarshal(non-pointer " + e.Type.String() + ")" + } + return "json: Unmarshal(nil " + e.Type.String() + ")" +} + +func (d *decodeState) unmarshal(v interface{}) (err error) { + defer func() { + if r := recover(); r != nil { + if _, ok := r.(runtime.Error); ok { + panic(r) + } + err = r.(error) + } + }() + + rv := reflect.ValueOf(v) + pv := rv + if pv.Kind() != reflect.Ptr || pv.IsNil() { + return &InvalidUnmarshalError{reflect.TypeOf(v)} + } + + d.scan.reset() + // We decode rv not pv.Elem because the Unmarshaler interface + // test must be applied at the top level of the value. + d.value(rv) + return d.savedError +} + +// decodeState represents the state while decoding a JSON value. +type decodeState struct { + data []byte + off int // read offset in data + scan scanner + nextscan scanner // for calls to nextValue + savedError error + tempstr string // scratch space to avoid some allocations +} + +// errPhase is used for errors that should not happen unless +// there is a bug in the JSON decoder or something is editing +// the data slice while the decoder executes. +var errPhase = errors.New("JSON decoder out of sync - data changing underfoot?") + +func (d *decodeState) init(data []byte) *decodeState { + d.data = data + d.off = 0 + d.savedError = nil + return d +} + +// error aborts the decoding by panicking with err. +func (d *decodeState) error(err error) { + panic(err) +} + +// saveError saves the first err it is called with, +// for reporting at the end of the unmarshal. +func (d *decodeState) saveError(err error) { + if d.savedError == nil { + d.savedError = err + } +} + +// next cuts off and returns the next full JSON value in d.data[d.off:]. +// The next value is known to be an object or array, not a literal. +func (d *decodeState) next() []byte { + c := d.data[d.off] + item, rest, err := nextValue(d.data[d.off:], &d.nextscan) + if err != nil { + d.error(err) + } + d.off = len(d.data) - len(rest) + + // Our scanner has seen the opening brace/bracket + // and thinks we're still in the middle of the object. + // invent a closing brace/bracket to get it out. + if c == '{' { + d.scan.step(&d.scan, '}') + } else { + d.scan.step(&d.scan, ']') + } + + return item +} + +// scanWhile processes bytes in d.data[d.off:] until it +// receives a scan code not equal to op. +// It updates d.off and returns the new scan code. +func (d *decodeState) scanWhile(op int) int { + var newOp int + for { + if d.off >= len(d.data) { + newOp = d.scan.eof() + d.off = len(d.data) + 1 // mark processed EOF with len+1 + } else { + c := int(d.data[d.off]) + d.off++ + newOp = d.scan.step(&d.scan, c) + } + if newOp != op { + break + } + } + return newOp +} + +// value decodes a JSON value from d.data[d.off:] into the value. +// it updates d.off to point past the decoded value. +func (d *decodeState) value(v reflect.Value) { + if !v.IsValid() { + _, rest, err := nextValue(d.data[d.off:], &d.nextscan) + if err != nil { + d.error(err) + } + d.off = len(d.data) - len(rest) + + // d.scan thinks we're still at the beginning of the item. + // Feed in an empty string - the shortest, simplest value - + // so that it knows we got to the end of the value. + if d.scan.step == stateRedo { + panic("redo") + } + d.scan.step(&d.scan, '"') + d.scan.step(&d.scan, '"') + return + } + + switch op := d.scanWhile(scanSkipSpace); op { + default: + d.error(errPhase) + + case scanBeginArray: + d.array(v) + + case scanBeginObject: + d.object(v) + + case scanBeginLiteral: + d.literal(v) + } +} + +// indirect walks down v allocating pointers as needed, +// until it gets to a non-pointer. +// if it encounters an Unmarshaler, indirect stops and returns that. +// if decodingNull is true, indirect stops at the last pointer so it can be set to nil. +func (d *decodeState) indirect(v reflect.Value, decodingNull bool) (Unmarshaler, reflect.Value) { + // If v is a named type and is addressable, + // start with its address, so that if the type has pointer methods, + // we find them. + if v.Kind() != reflect.Ptr && v.Type().Name() != "" && v.CanAddr() { + v = v.Addr() + } + for { + var isUnmarshaler bool + if v.Type().NumMethod() > 0 { + // Remember that this is an unmarshaler, + // but wait to return it until after allocating + // the pointer (if necessary). + _, isUnmarshaler = v.Interface().(Unmarshaler) + } + + if iv := v; iv.Kind() == reflect.Interface && !iv.IsNil() { + v = iv.Elem() + continue + } + + pv := v + if pv.Kind() != reflect.Ptr { + break + } + + if pv.Elem().Kind() != reflect.Ptr && decodingNull && pv.CanSet() { + return nil, pv + } + if pv.IsNil() { + pv.Set(reflect.New(pv.Type().Elem())) + } + if isUnmarshaler { + // Using v.Interface().(Unmarshaler) + // here means that we have to use a pointer + // as the struct field. We cannot use a value inside + // a pointer to a struct, because in that case + // v.Interface() is the value (x.f) not the pointer (&x.f). + // This is an unfortunate consequence of reflect. + // An alternative would be to look up the + // UnmarshalJSON method and return a FuncValue. + return v.Interface().(Unmarshaler), reflect.Value{} + } + v = pv.Elem() + } + return nil, v +} + +// array consumes an array from d.data[d.off-1:], decoding into the value v. +// the first byte of the array ('[') has been read already. +func (d *decodeState) array(v reflect.Value) { + // Check for unmarshaler. + unmarshaler, pv := d.indirect(v, false) + if unmarshaler != nil { + d.off-- + err := unmarshaler.UnmarshalJSON(d.next()) + if err != nil { + d.error(err) + } + return + } + v = pv + + // Decoding into nil interface? Switch to non-reflect code. + iv := v + ok := iv.Kind() == reflect.Interface + if ok { + iv.Set(reflect.ValueOf(d.arrayInterface())) + return + } + + // Check type of target. + av := v + if av.Kind() != reflect.Array && av.Kind() != reflect.Slice { + d.saveError(&UnmarshalTypeError{"array", v.Type()}) + d.off-- + d.next() + return + } + + sv := v + + i := 0 + for { + // Look ahead for ] - can only happen on first iteration. + op := d.scanWhile(scanSkipSpace) + if op == scanEndArray { + break + } + + // Back up so d.value can have the byte we just read. + d.off-- + d.scan.undo(op) + + // Get element of array, growing if necessary. + if i >= av.Cap() && sv.IsValid() { + newcap := sv.Cap() + sv.Cap()/2 + if newcap < 4 { + newcap = 4 + } + newv := reflect.MakeSlice(sv.Type(), sv.Len(), newcap) + reflect.Copy(newv, sv) + sv.Set(newv) + } + if i >= av.Len() && sv.IsValid() { + // Must be slice; gave up on array during i >= av.Cap(). + sv.SetLen(i + 1) + } + + // Decode into element. + if i < av.Len() { + d.value(av.Index(i)) + } else { + // Ran out of fixed array: skip. + d.value(reflect.Value{}) + } + i++ + + // Next token must be , or ]. + op = d.scanWhile(scanSkipSpace) + if op == scanEndArray { + break + } + if op != scanArrayValue { + d.error(errPhase) + } + } + if i < av.Len() { + if !sv.IsValid() { + // Array. Zero the rest. + z := reflect.Zero(av.Type().Elem()) + for ; i < av.Len(); i++ { + av.Index(i).Set(z) + } + } else { + sv.SetLen(i) + } + } +} + +// object consumes an object from d.data[d.off-1:], decoding into the value v. +// the first byte of the object ('{') has been read already. +func (d *decodeState) object(v reflect.Value) { + // Check for unmarshaler. + unmarshaler, pv := d.indirect(v, false) + if unmarshaler != nil { + d.off-- + err := unmarshaler.UnmarshalJSON(d.next()) + if err != nil { + d.error(err) + } + return + } + v = pv + + // Decoding into nil interface? Switch to non-reflect code. + iv := v + if iv.Kind() == reflect.Interface { + iv.Set(reflect.ValueOf(d.objectInterface())) + return + } + + // Check type of target: struct or map[string]T + var ( + mv reflect.Value + sv reflect.Value + ) + switch v.Kind() { + case reflect.Map: + // map must have string type + t := v.Type() + if t.Key() != reflect.TypeOf("") { + d.saveError(&UnmarshalTypeError{"object", v.Type()}) + break + } + mv = v + if mv.IsNil() { + mv.Set(reflect.MakeMap(t)) + } + case reflect.Struct: + sv = v + default: + d.saveError(&UnmarshalTypeError{"object", v.Type()}) + } + + if !mv.IsValid() && !sv.IsValid() { + d.off-- + d.next() // skip over { } in input + return + } + + var mapElem reflect.Value + + for { + // Read opening " of string key or closing }. + op := d.scanWhile(scanSkipSpace) + if op == scanEndObject { + // closing } - can only happen on first iteration. + break + } + if op != scanBeginLiteral { + d.error(errPhase) + } + + // Read string key. + start := d.off - 1 + op = d.scanWhile(scanContinue) + item := d.data[start : d.off-1] + key, ok := unquote(item) + if !ok { + d.error(errPhase) + } + + // Figure out field corresponding to key. + var subv reflect.Value + destring := false // whether the value is wrapped in a string to be decoded first + + if mv.IsValid() { + elemType := mv.Type().Elem() + if !mapElem.IsValid() { + mapElem = reflect.New(elemType).Elem() + } else { + mapElem.Set(reflect.Zero(elemType)) + } + subv = mapElem + } else { + var f reflect.StructField + var ok bool + st := sv.Type() + for i := 0; i < sv.NumField(); i++ { + sf := st.Field(i) + tag := sf.Tag.Get("json") + if tag == "-" { + // Pretend this field doesn't exist. + continue + } + // First, tag match + tagName, _ := parseTag(tag) + if tagName == key { + f = sf + ok = true + break // no better match possible + } + // Second, exact field name match + if sf.Name == key { + f = sf + ok = true + } + // Third, case-insensitive field name match, + // but only if a better match hasn't already been seen + if !ok && strings.EqualFold(sf.Name, key) { + f = sf + ok = true + } + } + + // Extract value; name must be exported. + if ok { + if f.PkgPath != "" { + d.saveError(&UnmarshalFieldError{key, st, f}) + } else { + subv = sv.FieldByIndex(f.Index) + } + _, opts := parseTag(f.Tag.Get("json")) + destring = opts.Contains("string") + } + } + + // Read : before value. + if op == scanSkipSpace { + op = d.scanWhile(scanSkipSpace) + } + if op != scanObjectKey { + d.error(errPhase) + } + + // Read value. + if destring { + d.value(reflect.ValueOf(&d.tempstr)) + d.literalStore([]byte(d.tempstr), subv) + } else { + d.value(subv) + } + // Write value back to map; + // if using struct, subv points into struct already. + if mv.IsValid() { + mv.SetMapIndex(reflect.ValueOf(key), subv) + } + + // Next token must be , or }. + op = d.scanWhile(scanSkipSpace) + if op == scanEndObject { + break + } + if op != scanObjectValue { + d.error(errPhase) + } + } +} + +// literal consumes a literal from d.data[d.off-1:], decoding into the value v. +// The first byte of the literal has been read already +// (that's how the caller knows it's a literal). +func (d *decodeState) literal(v reflect.Value) { + // All bytes inside literal return scanContinue op code. + start := d.off - 1 + op := d.scanWhile(scanContinue) + + // Scan read one byte too far; back up. + d.off-- + d.scan.undo(op) + + d.literalStore(d.data[start:d.off], v) +} + +// literalStore decodes a literal stored in item into v. +func (d *decodeState) literalStore(item []byte, v reflect.Value) { + // Check for unmarshaler. + wantptr := item[0] == 'n' // null + unmarshaler, pv := d.indirect(v, wantptr) + if unmarshaler != nil { + err := unmarshaler.UnmarshalJSON(item) + if err != nil { + d.error(err) + } + return + } + v = pv + + switch c := item[0]; c { + case 'n': // null + switch v.Kind() { + default: + d.saveError(&UnmarshalTypeError{"null", v.Type()}) + case reflect.Interface, reflect.Ptr, reflect.Map, reflect.Slice: + v.Set(reflect.Zero(v.Type())) + } + + case 't', 'f': // true, false + value := c == 't' + switch v.Kind() { + default: + d.saveError(&UnmarshalTypeError{"bool", v.Type()}) + case reflect.Bool: + v.SetBool(value) + case reflect.Interface: + v.Set(reflect.ValueOf(value)) + } + + case '"': // string + s, ok := unquoteBytes(item) + if !ok { + d.error(errPhase) + } + switch v.Kind() { + default: + d.saveError(&UnmarshalTypeError{"string", v.Type()}) + case reflect.Slice: + if v.Type() != byteSliceType { + d.saveError(&UnmarshalTypeError{"string", v.Type()}) + break + } + b := make([]byte, base64.StdEncoding.DecodedLen(len(s))) + n, err := base64.StdEncoding.Decode(b, s) + if err != nil { + d.saveError(err) + break + } + v.Set(reflect.ValueOf(b[0:n])) + case reflect.String: + v.SetString(string(s)) + case reflect.Interface: + v.Set(reflect.ValueOf(string(s))) + } + + default: // number + if c != '-' && (c < '0' || c > '9') { + d.error(errPhase) + } + s := string(item) + switch v.Kind() { + default: + d.error(&UnmarshalTypeError{"number", v.Type()}) + case reflect.Interface: + n, err := strconv.Atof64(s) + if err != nil { + d.saveError(&UnmarshalTypeError{"number " + s, v.Type()}) + break + } + v.Set(reflect.ValueOf(n)) + + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + n, err := strconv.Atoi64(s) + if err != nil || v.OverflowInt(n) { + d.saveError(&UnmarshalTypeError{"number " + s, v.Type()}) + break + } + v.SetInt(n) + + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + n, err := strconv.Atoui64(s) + if err != nil || v.OverflowUint(n) { + d.saveError(&UnmarshalTypeError{"number " + s, v.Type()}) + break + } + v.SetUint(n) + + case reflect.Float32, reflect.Float64: + n, err := strconv.AtofN(s, v.Type().Bits()) + if err != nil || v.OverflowFloat(n) { + d.saveError(&UnmarshalTypeError{"number " + s, v.Type()}) + break + } + v.SetFloat(n) + } + } +} + +// The xxxInterface routines build up a value to be stored +// in an empty interface. They are not strictly necessary, +// but they avoid the weight of reflection in this common case. + +// valueInterface is like value but returns interface{} +func (d *decodeState) valueInterface() interface{} { + switch d.scanWhile(scanSkipSpace) { + default: + d.error(errPhase) + case scanBeginArray: + return d.arrayInterface() + case scanBeginObject: + return d.objectInterface() + case scanBeginLiteral: + return d.literalInterface() + } + panic("unreachable") +} + +// arrayInterface is like array but returns []interface{}. +func (d *decodeState) arrayInterface() []interface{} { + var v []interface{} + for { + // Look ahead for ] - can only happen on first iteration. + op := d.scanWhile(scanSkipSpace) + if op == scanEndArray { + break + } + + // Back up so d.value can have the byte we just read. + d.off-- + d.scan.undo(op) + + v = append(v, d.valueInterface()) + + // Next token must be , or ]. + op = d.scanWhile(scanSkipSpace) + if op == scanEndArray { + break + } + if op != scanArrayValue { + d.error(errPhase) + } + } + return v +} + +// objectInterface is like object but returns map[string]interface{}. +func (d *decodeState) objectInterface() map[string]interface{} { + m := make(map[string]interface{}) + for { + // Read opening " of string key or closing }. + op := d.scanWhile(scanSkipSpace) + if op == scanEndObject { + // closing } - can only happen on first iteration. + break + } + if op != scanBeginLiteral { + d.error(errPhase) + } + + // Read string key. + start := d.off - 1 + op = d.scanWhile(scanContinue) + item := d.data[start : d.off-1] + key, ok := unquote(item) + if !ok { + d.error(errPhase) + } + + // Read : before value. + if op == scanSkipSpace { + op = d.scanWhile(scanSkipSpace) + } + if op != scanObjectKey { + d.error(errPhase) + } + + // Read value. + m[key] = d.valueInterface() + + // Next token must be , or }. + op = d.scanWhile(scanSkipSpace) + if op == scanEndObject { + break + } + if op != scanObjectValue { + d.error(errPhase) + } + } + return m +} + +// literalInterface is like literal but returns an interface value. +func (d *decodeState) literalInterface() interface{} { + // All bytes inside literal return scanContinue op code. + start := d.off - 1 + op := d.scanWhile(scanContinue) + + // Scan read one byte too far; back up. + d.off-- + d.scan.undo(op) + item := d.data[start:d.off] + + switch c := item[0]; c { + case 'n': // null + return nil + + case 't', 'f': // true, false + return c == 't' + + case '"': // string + s, ok := unquote(item) + if !ok { + d.error(errPhase) + } + return s + + default: // number + if c != '-' && (c < '0' || c > '9') { + d.error(errPhase) + } + n, err := strconv.Atof64(string(item)) + if err != nil { + d.saveError(&UnmarshalTypeError{"number " + string(item), reflect.TypeOf(0.0)}) + } + return n + } + panic("unreachable") +} + +// getu4 decodes \uXXXX from the beginning of s, returning the hex value, +// or it returns -1. +func getu4(s []byte) rune { + if len(s) < 6 || s[0] != '\\' || s[1] != 'u' { + return -1 + } + r, err := strconv.Btoui64(string(s[2:6]), 16) + if err != nil { + return -1 + } + return rune(r) +} + +// unquote converts a quoted JSON string literal s into an actual string t. +// The rules are different than for Go, so cannot use strconv.Unquote. +func unquote(s []byte) (t string, ok bool) { + s, ok = unquoteBytes(s) + t = string(s) + return +} + +func unquoteBytes(s []byte) (t []byte, ok bool) { + if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' { + return + } + s = s[1 : len(s)-1] + + // Check for unusual characters. If there are none, + // then no unquoting is needed, so return a slice of the + // original bytes. + r := 0 + for r < len(s) { + c := s[r] + if c == '\\' || c == '"' || c < ' ' { + break + } + if c < utf8.RuneSelf { + r++ + continue + } + rr, size := utf8.DecodeRune(s[r:]) + if rr == utf8.RuneError && size == 1 { + break + } + r += size + } + if r == len(s) { + return s, true + } + + b := make([]byte, len(s)+2*utf8.UTFMax) + w := copy(b, s[0:r]) + for r < len(s) { + // Out of room? Can only happen if s is full of + // malformed UTF-8 and we're replacing each + // byte with RuneError. + if w >= len(b)-2*utf8.UTFMax { + nb := make([]byte, (len(b)+utf8.UTFMax)*2) + copy(nb, b[0:w]) + b = nb + } + switch c := s[r]; { + case c == '\\': + r++ + if r >= len(s) { + return + } + switch s[r] { + default: + return + case '"', '\\', '/', '\'': + b[w] = s[r] + r++ + w++ + case 'b': + b[w] = '\b' + r++ + w++ + case 'f': + b[w] = '\f' + r++ + w++ + case 'n': + b[w] = '\n' + r++ + w++ + case 'r': + b[w] = '\r' + r++ + w++ + case 't': + b[w] = '\t' + r++ + w++ + case 'u': + r-- + rr := getu4(s[r:]) + if rr < 0 { + return + } + r += 6 + if utf16.IsSurrogate(rr) { + rr1 := getu4(s[r:]) + if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar { + // A valid pair; consume. + r += 6 + w += utf8.EncodeRune(b[w:], dec) + break + } + // Invalid surrogate; fall back to replacement rune. + rr = unicode.ReplacementChar + } + w += utf8.EncodeRune(b[w:], rr) + } + + // Quote, control characters are invalid. + case c == '"', c < ' ': + return + + // ASCII + case c < utf8.RuneSelf: + b[w] = c + r++ + w++ + + // Coerce to well-formed UTF-8. + default: + rr, size := utf8.DecodeRune(s[r:]) + r += size + w += utf8.EncodeRune(b[w:], rr) + } + } + return b[0:w], true +} diff --git a/libgo/go/encoding/json/decode_test.go b/libgo/go/encoding/json/decode_test.go new file mode 100644 index 0000000..bd4326a --- /dev/null +++ b/libgo/go/encoding/json/decode_test.go @@ -0,0 +1,564 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "bytes" + "reflect" + "strings" + "testing" +) + +type T struct { + X string + Y int + Z int `json:"-"` +} + +type tx struct { + x int +} + +var txType = reflect.TypeOf((*tx)(nil)).Elem() + +// A type that can unmarshal itself. + +type unmarshaler struct { + T bool +} + +func (u *unmarshaler) UnmarshalJSON(b []byte) error { + *u = unmarshaler{true} // All we need to see that UnmarshalJson is called. + return nil +} + +type ustruct struct { + M unmarshaler +} + +var ( + um0, um1 unmarshaler // target2 of unmarshaling + ump = &um1 + umtrue = unmarshaler{true} + umslice = []unmarshaler{{true}} + umslicep = new([]unmarshaler) + umstruct = ustruct{unmarshaler{true}} +) + +type unmarshalTest struct { + in string + ptr interface{} + out interface{} + err error +} + +var unmarshalTests = []unmarshalTest{ + // basic types + {`true`, new(bool), true, nil}, + {`1`, new(int), 1, nil}, + {`1.2`, new(float64), 1.2, nil}, + {`-5`, new(int16), int16(-5), nil}, + {`"a\u1234"`, new(string), "a\u1234", nil}, + {`"http:\/\/"`, new(string), "http://", nil}, + {`"g-clef: \uD834\uDD1E"`, new(string), "g-clef: \U0001D11E", nil}, + {`"invalid: \uD834x\uDD1E"`, new(string), "invalid: \uFFFDx\uFFFD", nil}, + {"null", new(interface{}), nil, nil}, + {`{"X": [1,2,3], "Y": 4}`, new(T), T{Y: 4}, &UnmarshalTypeError{"array", reflect.TypeOf("")}}, + {`{"x": 1}`, new(tx), tx{}, &UnmarshalFieldError{"x", txType, txType.Field(0)}}, + + // Z has a "-" tag. + {`{"Y": 1, "Z": 2}`, new(T), T{Y: 1}, nil}, + + // syntax errors + {`{"X": "foo", "Y"}`, nil, nil, &SyntaxError{"invalid character '}' after object key", 17}}, + + // composite tests + {allValueIndent, new(All), allValue, nil}, + {allValueCompact, new(All), allValue, nil}, + {allValueIndent, new(*All), &allValue, nil}, + {allValueCompact, new(*All), &allValue, nil}, + {pallValueIndent, new(All), pallValue, nil}, + {pallValueCompact, new(All), pallValue, nil}, + {pallValueIndent, new(*All), &pallValue, nil}, + {pallValueCompact, new(*All), &pallValue, nil}, + + // unmarshal interface test + {`{"T":false}`, &um0, umtrue, nil}, // use "false" so test will fail if custom unmarshaler is not called + {`{"T":false}`, &ump, &umtrue, nil}, + {`[{"T":false}]`, &umslice, umslice, nil}, + {`[{"T":false}]`, &umslicep, &umslice, nil}, + {`{"M":{"T":false}}`, &umstruct, umstruct, nil}, +} + +func TestMarshal(t *testing.T) { + b, err := Marshal(allValue) + if err != nil { + t.Fatalf("Marshal allValue: %v", err) + } + if string(b) != allValueCompact { + t.Errorf("Marshal allValueCompact") + diff(t, b, []byte(allValueCompact)) + return + } + + b, err = Marshal(pallValue) + if err != nil { + t.Fatalf("Marshal pallValue: %v", err) + } + if string(b) != pallValueCompact { + t.Errorf("Marshal pallValueCompact") + diff(t, b, []byte(pallValueCompact)) + return + } +} + +func TestMarshalBadUTF8(t *testing.T) { + s := "hello\xffworld" + b, err := Marshal(s) + if err == nil { + t.Fatal("Marshal bad UTF8: no error") + } + if len(b) != 0 { + t.Fatal("Marshal returned data") + } + if _, ok := err.(*InvalidUTF8Error); !ok { + t.Fatalf("Marshal did not return InvalidUTF8Error: %T %v", err, err) + } +} + +func TestUnmarshal(t *testing.T) { + for i, tt := range unmarshalTests { + var scan scanner + in := []byte(tt.in) + if err := checkValid(in, &scan); err != nil { + if !reflect.DeepEqual(err, tt.err) { + t.Errorf("#%d: checkValid: %#v", i, err) + continue + } + } + if tt.ptr == nil { + continue + } + // v = new(right-type) + v := reflect.New(reflect.TypeOf(tt.ptr).Elem()) + if err := Unmarshal([]byte(in), v.Interface()); !reflect.DeepEqual(err, tt.err) { + t.Errorf("#%d: %v want %v", i, err, tt.err) + continue + } + if !reflect.DeepEqual(v.Elem().Interface(), tt.out) { + t.Errorf("#%d: mismatch\nhave: %#+v\nwant: %#+v", i, v.Elem().Interface(), tt.out) + data, _ := Marshal(v.Elem().Interface()) + println(string(data)) + data, _ = Marshal(tt.out) + println(string(data)) + continue + } + } +} + +func TestUnmarshalMarshal(t *testing.T) { + initBig() + var v interface{} + if err := Unmarshal(jsonBig, &v); err != nil { + t.Fatalf("Unmarshal: %v", err) + } + b, err := Marshal(v) + if err != nil { + t.Fatalf("Marshal: %v", err) + } + if bytes.Compare(jsonBig, b) != 0 { + t.Errorf("Marshal jsonBig") + diff(t, b, jsonBig) + return + } +} + +func TestLargeByteSlice(t *testing.T) { + s0 := make([]byte, 2000) + for i := range s0 { + s0[i] = byte(i) + } + b, err := Marshal(s0) + if err != nil { + t.Fatalf("Marshal: %v", err) + } + var s1 []byte + if err := Unmarshal(b, &s1); err != nil { + t.Fatalf("Unmarshal: %v", err) + } + if bytes.Compare(s0, s1) != 0 { + t.Errorf("Marshal large byte slice") + diff(t, s0, s1) + } +} + +type Xint struct { + X int +} + +func TestUnmarshalInterface(t *testing.T) { + var xint Xint + var i interface{} = &xint + if err := Unmarshal([]byte(`{"X":1}`), &i); err != nil { + t.Fatalf("Unmarshal: %v", err) + } + if xint.X != 1 { + t.Fatalf("Did not write to xint") + } +} + +func TestUnmarshalPtrPtr(t *testing.T) { + var xint Xint + pxint := &xint + if err := Unmarshal([]byte(`{"X":1}`), &pxint); err != nil { + t.Fatalf("Unmarshal: %v", err) + } + if xint.X != 1 { + t.Fatalf("Did not write to xint") + } +} + +func TestEscape(t *testing.T) { + const input = `"foobar"<html>` + const expected = `"\"foobar\"\u003chtml\u003e"` + b, err := Marshal(input) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + if s := string(b); s != expected { + t.Errorf("Encoding of [%s] was [%s], want [%s]", input, s, expected) + } +} + +func TestHTMLEscape(t *testing.T) { + b, err := MarshalForHTML("foobarbaz<>&quux") + if err != nil { + t.Fatalf("MarshalForHTML error: %v", err) + } + if !bytes.Equal(b, []byte(`"foobarbaz\u003c\u003e\u0026quux"`)) { + t.Fatalf("Unexpected encoding of \"<>&\": %s", b) + } +} + +func noSpace(c rune) rune { + if isSpace(c) { + return -1 + } + return c +} + +type All struct { + Bool bool + Int int + Int8 int8 + Int16 int16 + Int32 int32 + Int64 int64 + Uint uint + Uint8 uint8 + Uint16 uint16 + Uint32 uint32 + Uint64 uint64 + Uintptr uintptr + Float32 float32 + Float64 float64 + + Foo string `json:"bar"` + Foo2 string `json:"bar2,dummyopt"` + + IntStr int64 `json:",string"` + + PBool *bool + PInt *int + PInt8 *int8 + PInt16 *int16 + PInt32 *int32 + PInt64 *int64 + PUint *uint + PUint8 *uint8 + PUint16 *uint16 + PUint32 *uint32 + PUint64 *uint64 + PUintptr *uintptr + PFloat32 *float32 + PFloat64 *float64 + + String string + PString *string + + Map map[string]Small + MapP map[string]*Small + PMap *map[string]Small + PMapP *map[string]*Small + + EmptyMap map[string]Small + NilMap map[string]Small + + Slice []Small + SliceP []*Small + PSlice *[]Small + PSliceP *[]*Small + + EmptySlice []Small + NilSlice []Small + + StringSlice []string + ByteSlice []byte + + Small Small + PSmall *Small + PPSmall **Small + + Interface interface{} + PInterface *interface{} + + unexported int +} + +type Small struct { + Tag string +} + +var allValue = All{ + Bool: true, + Int: 2, + Int8: 3, + Int16: 4, + Int32: 5, + Int64: 6, + Uint: 7, + Uint8: 8, + Uint16: 9, + Uint32: 10, + Uint64: 11, + Uintptr: 12, + Float32: 14.1, + Float64: 15.1, + Foo: "foo", + Foo2: "foo2", + IntStr: 42, + String: "16", + Map: map[string]Small{ + "17": {Tag: "tag17"}, + "18": {Tag: "tag18"}, + }, + MapP: map[string]*Small{ + "19": &Small{Tag: "tag19"}, + "20": nil, + }, + EmptyMap: map[string]Small{}, + Slice: []Small{{Tag: "tag20"}, {Tag: "tag21"}}, + SliceP: []*Small{&Small{Tag: "tag22"}, nil, &Small{Tag: "tag23"}}, + EmptySlice: []Small{}, + StringSlice: []string{"str24", "str25", "str26"}, + ByteSlice: []byte{27, 28, 29}, + Small: Small{Tag: "tag30"}, + PSmall: &Small{Tag: "tag31"}, + Interface: 5.2, +} + +var pallValue = All{ + PBool: &allValue.Bool, + PInt: &allValue.Int, + PInt8: &allValue.Int8, + PInt16: &allValue.Int16, + PInt32: &allValue.Int32, + PInt64: &allValue.Int64, + PUint: &allValue.Uint, + PUint8: &allValue.Uint8, + PUint16: &allValue.Uint16, + PUint32: &allValue.Uint32, + PUint64: &allValue.Uint64, + PUintptr: &allValue.Uintptr, + PFloat32: &allValue.Float32, + PFloat64: &allValue.Float64, + PString: &allValue.String, + PMap: &allValue.Map, + PMapP: &allValue.MapP, + PSlice: &allValue.Slice, + PSliceP: &allValue.SliceP, + PPSmall: &allValue.PSmall, + PInterface: &allValue.Interface, +} + +var allValueIndent = `{ + "Bool": true, + "Int": 2, + "Int8": 3, + "Int16": 4, + "Int32": 5, + "Int64": 6, + "Uint": 7, + "Uint8": 8, + "Uint16": 9, + "Uint32": 10, + "Uint64": 11, + "Uintptr": 12, + "Float32": 14.1, + "Float64": 15.1, + "bar": "foo", + "bar2": "foo2", + "IntStr": "42", + "PBool": null, + "PInt": null, + "PInt8": null, + "PInt16": null, + "PInt32": null, + "PInt64": null, + "PUint": null, + "PUint8": null, + "PUint16": null, + "PUint32": null, + "PUint64": null, + "PUintptr": null, + "PFloat32": null, + "PFloat64": null, + "String": "16", + "PString": null, + "Map": { + "17": { + "Tag": "tag17" + }, + "18": { + "Tag": "tag18" + } + }, + "MapP": { + "19": { + "Tag": "tag19" + }, + "20": null + }, + "PMap": null, + "PMapP": null, + "EmptyMap": {}, + "NilMap": null, + "Slice": [ + { + "Tag": "tag20" + }, + { + "Tag": "tag21" + } + ], + "SliceP": [ + { + "Tag": "tag22" + }, + null, + { + "Tag": "tag23" + } + ], + "PSlice": null, + "PSliceP": null, + "EmptySlice": [], + "NilSlice": null, + "StringSlice": [ + "str24", + "str25", + "str26" + ], + "ByteSlice": "Gxwd", + "Small": { + "Tag": "tag30" + }, + "PSmall": { + "Tag": "tag31" + }, + "PPSmall": null, + "Interface": 5.2, + "PInterface": null +}` + +var allValueCompact = strings.Map(noSpace, allValueIndent) + +var pallValueIndent = `{ + "Bool": false, + "Int": 0, + "Int8": 0, + "Int16": 0, + "Int32": 0, + "Int64": 0, + "Uint": 0, + "Uint8": 0, + "Uint16": 0, + "Uint32": 0, + "Uint64": 0, + "Uintptr": 0, + "Float32": 0, + "Float64": 0, + "bar": "", + "bar2": "", + "IntStr": "0", + "PBool": true, + "PInt": 2, + "PInt8": 3, + "PInt16": 4, + "PInt32": 5, + "PInt64": 6, + "PUint": 7, + "PUint8": 8, + "PUint16": 9, + "PUint32": 10, + "PUint64": 11, + "PUintptr": 12, + "PFloat32": 14.1, + "PFloat64": 15.1, + "String": "", + "PString": "16", + "Map": null, + "MapP": null, + "PMap": { + "17": { + "Tag": "tag17" + }, + "18": { + "Tag": "tag18" + } + }, + "PMapP": { + "19": { + "Tag": "tag19" + }, + "20": null + }, + "EmptyMap": null, + "NilMap": null, + "Slice": null, + "SliceP": null, + "PSlice": [ + { + "Tag": "tag20" + }, + { + "Tag": "tag21" + } + ], + "PSliceP": [ + { + "Tag": "tag22" + }, + null, + { + "Tag": "tag23" + } + ], + "EmptySlice": null, + "NilSlice": null, + "StringSlice": null, + "ByteSlice": null, + "Small": { + "Tag": "" + }, + "PSmall": null, + "PPSmall": { + "Tag": "tag31" + }, + "Interface": null, + "PInterface": 5.2 +}` + +var pallValueCompact = strings.Map(noSpace, pallValueIndent) diff --git a/libgo/go/encoding/json/encode.go b/libgo/go/encoding/json/encode.go new file mode 100644 index 0000000..35964c5 --- /dev/null +++ b/libgo/go/encoding/json/encode.go @@ -0,0 +1,472 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package json implements encoding and decoding of JSON objects as defined in +// RFC 4627. +// +// See "JSON and Go" for an introduction to this package: +// http://blog.golang.org/2011/01/json-and-go.html +package json + +import ( + "bytes" + "encoding/base64" + "reflect" + "runtime" + "sort" + "strconv" + "unicode" + "unicode/utf8" +) + +// Marshal returns the JSON encoding of v. +// +// Marshal traverses the value v recursively. +// If an encountered value implements the Marshaler interface +// and is not a nil pointer, Marshal calls its MarshalJSON method +// to produce JSON. The nil pointer exception is not strictly necessary +// but mimics a similar, necessary exception in the behavior of +// UnmarshalJSON. +// +// Otherwise, Marshal uses the following type-dependent default encodings: +// +// Boolean values encode as JSON booleans. +// +// Floating point and integer values encode as JSON numbers. +// +// String values encode as JSON strings, with each invalid UTF-8 sequence +// replaced by the encoding of the Unicode replacement character U+FFFD. +// +// Array and slice values encode as JSON arrays, except that +// []byte encodes as a base64-encoded string. +// +// Struct values encode as JSON objects. Each exported struct field +// becomes a member of the object unless +// - the field's tag is "-", or +// - the field is empty and its tag specifies the "omitempty" option. +// The empty values are false, 0, any +// nil pointer or interface value, and any array, slice, map, or string of +// length zero. The object's default key string is the struct field name +// but can be specified in the struct field's tag value. The "json" key in +// struct field's tag value is the key name, followed by an optional comma +// and options. Examples: +// +// // Field is ignored by this package. +// Field int `json:"-"` +// +// // Field appears in JSON as key "myName". +// Field int `json:"myName"` +// +// // Field appears in JSON as key "myName" and +// // the field is omitted from the object if its value is empty, +// // as defined above. +// Field int `json:"myName,omitempty"` +// +// // Field appears in JSON as key "Field" (the default), but +// // the field is skipped if empty. +// // Note the leading comma. +// Field int `json:",omitempty"` +// +// The "string" option signals that a field is stored as JSON inside a +// JSON-encoded string. This extra level of encoding is sometimes +// used when communicating with JavaScript programs: +// +// Int64String int64 `json:",string"` +// +// The key name will be used if it's a non-empty string consisting of +// only Unicode letters, digits, dollar signs, hyphens, and underscores. +// +// Map values encode as JSON objects. +// The map's key type must be string; the object keys are used directly +// as map keys. +// +// Pointer values encode as the value pointed to. +// A nil pointer encodes as the null JSON object. +// +// Interface values encode as the value contained in the interface. +// A nil interface value encodes as the null JSON object. +// +// Channel, complex, and function values cannot be encoded in JSON. +// Attempting to encode such a value causes Marshal to return +// an InvalidTypeError. +// +// JSON cannot represent cyclic data structures and Marshal does not +// handle them. Passing cyclic structures to Marshal will result in +// an infinite recursion. +// +func Marshal(v interface{}) ([]byte, error) { + e := &encodeState{} + err := e.marshal(v) + if err != nil { + return nil, err + } + return e.Bytes(), nil +} + +// MarshalIndent is like Marshal but applies Indent to format the output. +func MarshalIndent(v interface{}, prefix, indent string) ([]byte, error) { + b, err := Marshal(v) + if err != nil { + return nil, err + } + var buf bytes.Buffer + err = Indent(&buf, b, prefix, indent) + if err != nil { + return nil, err + } + return buf.Bytes(), nil +} + +// MarshalForHTML is like Marshal but applies HTMLEscape to the output. +func MarshalForHTML(v interface{}) ([]byte, error) { + b, err := Marshal(v) + if err != nil { + return nil, err + } + var buf bytes.Buffer + HTMLEscape(&buf, b) + return buf.Bytes(), nil +} + +// HTMLEscape appends to dst the JSON-encoded src with <, >, and & +// characters inside string literals changed to \u003c, \u003e, \u0026 +// so that the JSON will be safe to embed inside HTML <script> tags. +// For historical reasons, web browsers don't honor standard HTML +// escaping within <script> tags, so an alternative JSON encoding must +// be used. +func HTMLEscape(dst *bytes.Buffer, src []byte) { + // < > & can only appear in string literals, + // so just scan the string one byte at a time. + start := 0 + for i, c := range src { + if c == '<' || c == '>' || c == '&' { + if start < i { + dst.Write(src[start:i]) + } + dst.WriteString(`\u00`) + dst.WriteByte(hex[c>>4]) + dst.WriteByte(hex[c&0xF]) + start = i + 1 + } + } + if start < len(src) { + dst.Write(src[start:]) + } +} + +// Marshaler is the interface implemented by objects that +// can marshal themselves into valid JSON. +type Marshaler interface { + MarshalJSON() ([]byte, error) +} + +type UnsupportedTypeError struct { + Type reflect.Type +} + +func (e *UnsupportedTypeError) Error() string { + return "json: unsupported type: " + e.Type.String() +} + +type InvalidUTF8Error struct { + S string +} + +func (e *InvalidUTF8Error) Error() string { + return "json: invalid UTF-8 in string: " + strconv.Quote(e.S) +} + +type MarshalerError struct { + Type reflect.Type + Err error +} + +func (e *MarshalerError) Error() string { + return "json: error calling MarshalJSON for type " + e.Type.String() + ": " + e.Err.Error() +} + +type interfaceOrPtrValue interface { + IsNil() bool + Elem() reflect.Value +} + +var hex = "0123456789abcdef" + +// An encodeState encodes JSON into a bytes.Buffer. +type encodeState struct { + bytes.Buffer // accumulated output +} + +func (e *encodeState) marshal(v interface{}) (err error) { + defer func() { + if r := recover(); r != nil { + if _, ok := r.(runtime.Error); ok { + panic(r) + } + err = r.(error) + } + }() + e.reflectValue(reflect.ValueOf(v)) + return nil +} + +func (e *encodeState) error(err error) { + panic(err) +} + +var byteSliceType = reflect.TypeOf([]byte(nil)) + +func isEmptyValue(v reflect.Value) bool { + switch v.Kind() { + case reflect.Array, reflect.Map, reflect.Slice, reflect.String: + return v.Len() == 0 + case reflect.Bool: + return !v.Bool() + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return v.Int() == 0 + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + return v.Uint() == 0 + case reflect.Float32, reflect.Float64: + return v.Float() == 0 + case reflect.Interface, reflect.Ptr: + return v.IsNil() + } + return false +} + +func (e *encodeState) reflectValue(v reflect.Value) { + e.reflectValueQuoted(v, false) +} + +// reflectValueQuoted writes the value in v to the output. +// If quoted is true, the serialization is wrapped in a JSON string. +func (e *encodeState) reflectValueQuoted(v reflect.Value, quoted bool) { + if !v.IsValid() { + e.WriteString("null") + return + } + + if j, ok := v.Interface().(Marshaler); ok && (v.Kind() != reflect.Ptr || !v.IsNil()) { + b, err := j.MarshalJSON() + if err == nil { + // copy JSON into buffer, checking validity. + err = Compact(&e.Buffer, b) + } + if err != nil { + e.error(&MarshalerError{v.Type(), err}) + } + return + } + + writeString := (*encodeState).WriteString + if quoted { + writeString = (*encodeState).string + } + + switch v.Kind() { + case reflect.Bool: + x := v.Bool() + if x { + writeString(e, "true") + } else { + writeString(e, "false") + } + + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + writeString(e, strconv.Itoa64(v.Int())) + + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + writeString(e, strconv.Uitoa64(v.Uint())) + + case reflect.Float32, reflect.Float64: + writeString(e, strconv.FtoaN(v.Float(), 'g', -1, v.Type().Bits())) + + case reflect.String: + if quoted { + sb, err := Marshal(v.String()) + if err != nil { + e.error(err) + } + e.string(string(sb)) + } else { + e.string(v.String()) + } + + case reflect.Struct: + e.WriteByte('{') + t := v.Type() + n := v.NumField() + first := true + for i := 0; i < n; i++ { + f := t.Field(i) + if f.PkgPath != "" { + continue + } + tag, omitEmpty, quoted := f.Name, false, false + if tv := f.Tag.Get("json"); tv != "" { + if tv == "-" { + continue + } + name, opts := parseTag(tv) + if isValidTag(name) { + tag = name + } + omitEmpty = opts.Contains("omitempty") + quoted = opts.Contains("string") + } + fieldValue := v.Field(i) + if omitEmpty && isEmptyValue(fieldValue) { + continue + } + if first { + first = false + } else { + e.WriteByte(',') + } + e.string(tag) + e.WriteByte(':') + e.reflectValueQuoted(fieldValue, quoted) + } + e.WriteByte('}') + + case reflect.Map: + if v.Type().Key().Kind() != reflect.String { + e.error(&UnsupportedTypeError{v.Type()}) + } + if v.IsNil() { + e.WriteString("null") + break + } + e.WriteByte('{') + var sv stringValues = v.MapKeys() + sort.Sort(sv) + for i, k := range sv { + if i > 0 { + e.WriteByte(',') + } + e.string(k.String()) + e.WriteByte(':') + e.reflectValue(v.MapIndex(k)) + } + e.WriteByte('}') + + case reflect.Slice: + if v.IsNil() { + e.WriteString("null") + break + } + // Slices can be marshalled as nil, but otherwise are handled + // as arrays. + fallthrough + case reflect.Array: + if v.Type() == byteSliceType { + e.WriteByte('"') + s := v.Interface().([]byte) + if len(s) < 1024 { + // for small buffers, using Encode directly is much faster. + dst := make([]byte, base64.StdEncoding.EncodedLen(len(s))) + base64.StdEncoding.Encode(dst, s) + e.Write(dst) + } else { + // for large buffers, avoid unnecessary extra temporary + // buffer space. + enc := base64.NewEncoder(base64.StdEncoding, e) + enc.Write(s) + enc.Close() + } + e.WriteByte('"') + break + } + e.WriteByte('[') + n := v.Len() + for i := 0; i < n; i++ { + if i > 0 { + e.WriteByte(',') + } + e.reflectValue(v.Index(i)) + } + e.WriteByte(']') + + case reflect.Interface, reflect.Ptr: + if v.IsNil() { + e.WriteString("null") + return + } + e.reflectValue(v.Elem()) + + default: + e.error(&UnsupportedTypeError{v.Type()}) + } + return +} + +func isValidTag(s string) bool { + if s == "" { + return false + } + for _, c := range s { + if c != '$' && c != '-' && c != '_' && !unicode.IsLetter(c) && !unicode.IsDigit(c) { + return false + } + } + return true +} + +// stringValues is a slice of reflect.Value holding *reflect.StringValue. +// It implements the methods to sort by string. +type stringValues []reflect.Value + +func (sv stringValues) Len() int { return len(sv) } +func (sv stringValues) Swap(i, j int) { sv[i], sv[j] = sv[j], sv[i] } +func (sv stringValues) Less(i, j int) bool { return sv.get(i) < sv.get(j) } +func (sv stringValues) get(i int) string { return sv[i].String() } + +func (e *encodeState) string(s string) (int, error) { + len0 := e.Len() + e.WriteByte('"') + start := 0 + for i := 0; i < len(s); { + if b := s[i]; b < utf8.RuneSelf { + if 0x20 <= b && b != '\\' && b != '"' && b != '<' && b != '>' { + i++ + continue + } + if start < i { + e.WriteString(s[start:i]) + } + switch b { + case '\\', '"': + e.WriteByte('\\') + e.WriteByte(b) + case '\n': + e.WriteByte('\\') + e.WriteByte('n') + case '\r': + e.WriteByte('\\') + e.WriteByte('r') + default: + // This encodes bytes < 0x20 except for \n and \r, + // as well as < and >. The latter are escaped because they + // can lead to security holes when user-controlled strings + // are rendered into JSON and served to some browsers. + e.WriteString(`\u00`) + e.WriteByte(hex[b>>4]) + e.WriteByte(hex[b&0xF]) + } + i++ + start = i + continue + } + c, size := utf8.DecodeRuneInString(s[i:]) + if c == utf8.RuneError && size == 1 { + e.error(&InvalidUTF8Error{s}) + } + i += size + } + if start < len(s) { + e.WriteString(s[start:]) + } + e.WriteByte('"') + return e.Len() - len0, nil +} diff --git a/libgo/go/encoding/json/encode_test.go b/libgo/go/encoding/json/encode_test.go new file mode 100644 index 0000000..92f266a --- /dev/null +++ b/libgo/go/encoding/json/encode_test.go @@ -0,0 +1,84 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "bytes" + "reflect" + "testing" +) + +type Optionals struct { + Sr string `json:"sr"` + So string `json:"so,omitempty"` + Sw string `json:"-"` + + Ir int `json:"omitempty"` // actually named omitempty, not an option + Io int `json:"io,omitempty"` + + Slr []string `json:"slr,random"` + Slo []string `json:"slo,omitempty"` + + Mr map[string]interface{} `json:"mr"` + Mo map[string]interface{} `json:",omitempty"` +} + +var optionalsExpected = `{ + "sr": "", + "omitempty": 0, + "slr": null, + "mr": {} +}` + +func TestOmitEmpty(t *testing.T) { + var o Optionals + o.Sw = "something" + o.Mr = map[string]interface{}{} + o.Mo = map[string]interface{}{} + + got, err := MarshalIndent(&o, "", " ") + if err != nil { + t.Fatal(err) + } + if got := string(got); got != optionalsExpected { + t.Errorf(" got: %s\nwant: %s\n", got, optionalsExpected) + } +} + +type StringTag struct { + BoolStr bool `json:",string"` + IntStr int64 `json:",string"` + StrStr string `json:",string"` +} + +var stringTagExpected = `{ + "BoolStr": "true", + "IntStr": "42", + "StrStr": "\"xzbit\"" +}` + +func TestStringTag(t *testing.T) { + var s StringTag + s.BoolStr = true + s.IntStr = 42 + s.StrStr = "xzbit" + got, err := MarshalIndent(&s, "", " ") + if err != nil { + t.Fatal(err) + } + if got := string(got); got != stringTagExpected { + t.Fatalf(" got: %s\nwant: %s\n", got, stringTagExpected) + } + + // Verify that it round-trips. + var s2 StringTag + err = NewDecoder(bytes.NewBuffer(got)).Decode(&s2) + if err != nil { + t.Fatalf("Decode: %v", err) + } + if !reflect.DeepEqual(s, s2) { + t.Fatalf("decode didn't match.\nsource: %#v\nEncoded as:\n%s\ndecode: %#v", s, string(got), s2) + } +} diff --git a/libgo/go/encoding/json/indent.go b/libgo/go/encoding/json/indent.go new file mode 100644 index 0000000..5ba19b0 --- /dev/null +++ b/libgo/go/encoding/json/indent.go @@ -0,0 +1,114 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import "bytes" + +// Compact appends to dst the JSON-encoded src with +// insignificant space characters elided. +func Compact(dst *bytes.Buffer, src []byte) error { + origLen := dst.Len() + var scan scanner + scan.reset() + start := 0 + for i, c := range src { + v := scan.step(&scan, int(c)) + if v >= scanSkipSpace { + if v == scanError { + break + } + if start < i { + dst.Write(src[start:i]) + } + start = i + 1 + } + } + if scan.eof() == scanError { + dst.Truncate(origLen) + return scan.err + } + if start < len(src) { + dst.Write(src[start:]) + } + return nil +} + +func newline(dst *bytes.Buffer, prefix, indent string, depth int) { + dst.WriteByte('\n') + dst.WriteString(prefix) + for i := 0; i < depth; i++ { + dst.WriteString(indent) + } +} + +// Indent appends to dst an indented form of the JSON-encoded src. +// Each element in a JSON object or array begins on a new, +// indented line beginning with prefix followed by one or more +// copies of indent according to the indentation nesting. +// The data appended to dst has no trailing newline, to make it easier +// to embed inside other formatted JSON data. +func Indent(dst *bytes.Buffer, src []byte, prefix, indent string) error { + origLen := dst.Len() + var scan scanner + scan.reset() + needIndent := false + depth := 0 + for _, c := range src { + scan.bytes++ + v := scan.step(&scan, int(c)) + if v == scanSkipSpace { + continue + } + if v == scanError { + break + } + if needIndent && v != scanEndObject && v != scanEndArray { + needIndent = false + depth++ + newline(dst, prefix, indent, depth) + } + + // Emit semantically uninteresting bytes + // (in particular, punctuation in strings) unmodified. + if v == scanContinue { + dst.WriteByte(c) + continue + } + + // Add spacing around real punctuation. + switch c { + case '{', '[': + // delay indent so that empty object and array are formatted as {} and []. + needIndent = true + dst.WriteByte(c) + + case ',': + dst.WriteByte(c) + newline(dst, prefix, indent, depth) + + case ':': + dst.WriteByte(c) + dst.WriteByte(' ') + + case '}', ']': + if needIndent { + // suppress indent in empty object/array + needIndent = false + } else { + depth-- + newline(dst, prefix, indent, depth) + } + dst.WriteByte(c) + + default: + dst.WriteByte(c) + } + } + if scan.eof() == scanError { + dst.Truncate(origLen) + return scan.err + } + return nil +} diff --git a/libgo/go/encoding/json/scanner.go b/libgo/go/encoding/json/scanner.go new file mode 100644 index 0000000..1796904 --- /dev/null +++ b/libgo/go/encoding/json/scanner.go @@ -0,0 +1,621 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +// JSON value parser state machine. +// Just about at the limit of what is reasonable to write by hand. +// Some parts are a bit tedious, but overall it nicely factors out the +// otherwise common code from the multiple scanning functions +// in this package (Compact, Indent, checkValid, nextValue, etc). +// +// This file starts with two simple examples using the scanner +// before diving into the scanner itself. + +import "strconv" + +// checkValid verifies that data is valid JSON-encoded data. +// scan is passed in for use by checkValid to avoid an allocation. +func checkValid(data []byte, scan *scanner) error { + scan.reset() + for _, c := range data { + scan.bytes++ + if scan.step(scan, int(c)) == scanError { + return scan.err + } + } + if scan.eof() == scanError { + return scan.err + } + return nil +} + +// nextValue splits data after the next whole JSON value, +// returning that value and the bytes that follow it as separate slices. +// scan is passed in for use by nextValue to avoid an allocation. +func nextValue(data []byte, scan *scanner) (value, rest []byte, err error) { + scan.reset() + for i, c := range data { + v := scan.step(scan, int(c)) + if v >= scanEnd { + switch v { + case scanError: + return nil, nil, scan.err + case scanEnd: + return data[0:i], data[i:], nil + } + } + } + if scan.eof() == scanError { + return nil, nil, scan.err + } + return data, nil, nil +} + +// A SyntaxError is a description of a JSON syntax error. +type SyntaxError struct { + msg string // description of error + Offset int64 // error occurred after reading Offset bytes +} + +func (e *SyntaxError) Error() string { return e.msg } + +// A scanner is a JSON scanning state machine. +// Callers call scan.reset() and then pass bytes in one at a time +// by calling scan.step(&scan, c) for each byte. +// The return value, referred to as an opcode, tells the +// caller about significant parsing events like beginning +// and ending literals, objects, and arrays, so that the +// caller can follow along if it wishes. +// The return value scanEnd indicates that a single top-level +// JSON value has been completed, *before* the byte that +// just got passed in. (The indication must be delayed in order +// to recognize the end of numbers: is 123 a whole value or +// the beginning of 12345e+6?). +type scanner struct { + // The step is a func to be called to execute the next transition. + // Also tried using an integer constant and a single func + // with a switch, but using the func directly was 10% faster + // on a 64-bit Mac Mini, and it's nicer to read. + step func(*scanner, int) int + + // Stack of what we're in the middle of - array values, object keys, object values. + parseState []int + + // Error that happened, if any. + err error + + // 1-byte redo (see undo method) + redoCode int + redoState func(*scanner, int) int + + // total bytes consumed, updated by decoder.Decode + bytes int64 +} + +// These values are returned by the state transition functions +// assigned to scanner.state and the method scanner.eof. +// They give details about the current state of the scan that +// callers might be interested to know about. +// It is okay to ignore the return value of any particular +// call to scanner.state: if one call returns scanError, +// every subsequent call will return scanError too. +const ( + // Continue. + scanContinue = iota // uninteresting byte + scanBeginLiteral // end implied by next result != scanContinue + scanBeginObject // begin object + scanObjectKey // just finished object key (string) + scanObjectValue // just finished non-last object value + scanEndObject // end object (implies scanObjectValue if possible) + scanBeginArray // begin array + scanArrayValue // just finished array value + scanEndArray // end array (implies scanArrayValue if possible) + scanSkipSpace // space byte; can skip; known to be last "continue" result + + // Stop. + scanEnd // top-level value ended *before* this byte; known to be first "stop" result + scanError // hit an error, scanner.err. +) + +// These values are stored in the parseState stack. +// They give the current state of a composite value +// being scanned. If the parser is inside a nested value +// the parseState describes the nested state, outermost at entry 0. +const ( + parseObjectKey = iota // parsing object key (before colon) + parseObjectValue // parsing object value (after colon) + parseArrayValue // parsing array value +) + +// reset prepares the scanner for use. +// It must be called before calling s.step. +func (s *scanner) reset() { + s.step = stateBeginValue + s.parseState = s.parseState[0:0] + s.err = nil +} + +// eof tells the scanner that the end of input has been reached. +// It returns a scan status just as s.step does. +func (s *scanner) eof() int { + if s.err != nil { + return scanError + } + if s.step == stateEndTop { + return scanEnd + } + s.step(s, ' ') + if s.step == stateEndTop { + return scanEnd + } + if s.err == nil { + s.err = &SyntaxError{"unexpected end of JSON input", s.bytes} + } + return scanError +} + +// pushParseState pushes a new parse state p onto the parse stack. +func (s *scanner) pushParseState(p int) { + s.parseState = append(s.parseState, p) +} + +// popParseState pops a parse state (already obtained) off the stack +// and updates s.step accordingly. +func (s *scanner) popParseState() { + n := len(s.parseState) - 1 + s.parseState = s.parseState[0:n] + if n == 0 { + s.step = stateEndTop + } else { + s.step = stateEndValue + } +} + +func isSpace(c rune) bool { + return c == ' ' || c == '\t' || c == '\r' || c == '\n' +} + +// NOTE(rsc): The various instances of +// +// if c <= ' ' && (c == ' ' || c == '\t' || c == '\r' || c == '\n') +// +// below should all be if c <= ' ' && isSpace(c), but inlining +// the checks makes a significant difference (>10%) in tight loops +// such as nextValue. These should be rewritten with the clearer +// function call once 6g knows to inline the call. + +// stateBeginValueOrEmpty is the state after reading `[`. +func stateBeginValueOrEmpty(s *scanner, c int) int { + if c <= ' ' && (c == ' ' || c == '\t' || c == '\r' || c == '\n') { + return scanSkipSpace + } + if c == ']' { + return stateEndValue(s, c) + } + return stateBeginValue(s, c) +} + +// stateBeginValue is the state at the beginning of the input. +func stateBeginValue(s *scanner, c int) int { + if c <= ' ' && (c == ' ' || c == '\t' || c == '\r' || c == '\n') { + return scanSkipSpace + } + switch c { + case '{': + s.step = stateBeginStringOrEmpty + s.pushParseState(parseObjectKey) + return scanBeginObject + case '[': + s.step = stateBeginValueOrEmpty + s.pushParseState(parseArrayValue) + return scanBeginArray + case '"': + s.step = stateInString + return scanBeginLiteral + case '-': + s.step = stateNeg + return scanBeginLiteral + case '0': // beginning of 0.123 + s.step = state0 + return scanBeginLiteral + case 't': // beginning of true + s.step = stateT + return scanBeginLiteral + case 'f': // beginning of false + s.step = stateF + return scanBeginLiteral + case 'n': // beginning of null + s.step = stateN + return scanBeginLiteral + } + if '1' <= c && c <= '9' { // beginning of 1234.5 + s.step = state1 + return scanBeginLiteral + } + return s.error(c, "looking for beginning of value") +} + +// stateBeginStringOrEmpty is the state after reading `{`. +func stateBeginStringOrEmpty(s *scanner, c int) int { + if c <= ' ' && (c == ' ' || c == '\t' || c == '\r' || c == '\n') { + return scanSkipSpace + } + if c == '}' { + n := len(s.parseState) + s.parseState[n-1] = parseObjectValue + return stateEndValue(s, c) + } + return stateBeginString(s, c) +} + +// stateBeginString is the state after reading `{"key": value,`. +func stateBeginString(s *scanner, c int) int { + if c <= ' ' && (c == ' ' || c == '\t' || c == '\r' || c == '\n') { + return scanSkipSpace + } + if c == '"' { + s.step = stateInString + return scanBeginLiteral + } + return s.error(c, "looking for beginning of object key string") +} + +// stateEndValue is the state after completing a value, +// such as after reading `{}` or `true` or `["x"`. +func stateEndValue(s *scanner, c int) int { + n := len(s.parseState) + if n == 0 { + // Completed top-level before the current byte. + s.step = stateEndTop + return stateEndTop(s, c) + } + if c <= ' ' && (c == ' ' || c == '\t' || c == '\r' || c == '\n') { + s.step = stateEndValue + return scanSkipSpace + } + ps := s.parseState[n-1] + switch ps { + case parseObjectKey: + if c == ':' { + s.parseState[n-1] = parseObjectValue + s.step = stateBeginValue + return scanObjectKey + } + return s.error(c, "after object key") + case parseObjectValue: + if c == ',' { + s.parseState[n-1] = parseObjectKey + s.step = stateBeginString + return scanObjectValue + } + if c == '}' { + s.popParseState() + return scanEndObject + } + return s.error(c, "after object key:value pair") + case parseArrayValue: + if c == ',' { + s.step = stateBeginValue + return scanArrayValue + } + if c == ']' { + s.popParseState() + return scanEndArray + } + return s.error(c, "after array element") + } + return s.error(c, "") +} + +// stateEndTop is the state after finishing the top-level value, +// such as after reading `{}` or `[1,2,3]`. +// Only space characters should be seen now. +func stateEndTop(s *scanner, c int) int { + if c != ' ' && c != '\t' && c != '\r' && c != '\n' { + // Complain about non-space byte on next call. + s.error(c, "after top-level value") + } + return scanEnd +} + +// stateInString is the state after reading `"`. +func stateInString(s *scanner, c int) int { + if c == '"' { + s.step = stateEndValue + return scanContinue + } + if c == '\\' { + s.step = stateInStringEsc + return scanContinue + } + if c < 0x20 { + return s.error(c, "in string literal") + } + return scanContinue +} + +// stateInStringEsc is the state after reading `"\` during a quoted string. +func stateInStringEsc(s *scanner, c int) int { + switch c { + case 'b', 'f', 'n', 'r', 't', '\\', '/', '"': + s.step = stateInString + return scanContinue + } + if c == 'u' { + s.step = stateInStringEscU + return scanContinue + } + return s.error(c, "in string escape code") +} + +// stateInStringEscU is the state after reading `"\u` during a quoted string. +func stateInStringEscU(s *scanner, c int) int { + if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { + s.step = stateInStringEscU1 + return scanContinue + } + // numbers + return s.error(c, "in \\u hexadecimal character escape") +} + +// stateInStringEscU1 is the state after reading `"\u1` during a quoted string. +func stateInStringEscU1(s *scanner, c int) int { + if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { + s.step = stateInStringEscU12 + return scanContinue + } + // numbers + return s.error(c, "in \\u hexadecimal character escape") +} + +// stateInStringEscU12 is the state after reading `"\u12` during a quoted string. +func stateInStringEscU12(s *scanner, c int) int { + if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { + s.step = stateInStringEscU123 + return scanContinue + } + // numbers + return s.error(c, "in \\u hexadecimal character escape") +} + +// stateInStringEscU123 is the state after reading `"\u123` during a quoted string. +func stateInStringEscU123(s *scanner, c int) int { + if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { + s.step = stateInString + return scanContinue + } + // numbers + return s.error(c, "in \\u hexadecimal character escape") +} + +// stateInStringEscU123 is the state after reading `-` during a number. +func stateNeg(s *scanner, c int) int { + if c == '0' { + s.step = state0 + return scanContinue + } + if '1' <= c && c <= '9' { + s.step = state1 + return scanContinue + } + return s.error(c, "in numeric literal") +} + +// state1 is the state after reading a non-zero integer during a number, +// such as after reading `1` or `100` but not `0`. +func state1(s *scanner, c int) int { + if '0' <= c && c <= '9' { + s.step = state1 + return scanContinue + } + return state0(s, c) +} + +// state0 is the state after reading `0` during a number. +func state0(s *scanner, c int) int { + if c == '.' { + s.step = stateDot + return scanContinue + } + if c == 'e' || c == 'E' { + s.step = stateE + return scanContinue + } + return stateEndValue(s, c) +} + +// stateDot is the state after reading the integer and decimal point in a number, +// such as after reading `1.`. +func stateDot(s *scanner, c int) int { + if '0' <= c && c <= '9' { + s.step = stateDot0 + return scanContinue + } + return s.error(c, "after decimal point in numeric literal") +} + +// stateDot0 is the state after reading the integer, decimal point, and subsequent +// digits of a number, such as after reading `3.14`. +func stateDot0(s *scanner, c int) int { + if '0' <= c && c <= '9' { + s.step = stateDot0 + return scanContinue + } + if c == 'e' || c == 'E' { + s.step = stateE + return scanContinue + } + return stateEndValue(s, c) +} + +// stateE is the state after reading the mantissa and e in a number, +// such as after reading `314e` or `0.314e`. +func stateE(s *scanner, c int) int { + if c == '+' { + s.step = stateESign + return scanContinue + } + if c == '-' { + s.step = stateESign + return scanContinue + } + return stateESign(s, c) +} + +// stateESign is the state after reading the mantissa, e, and sign in a number, +// such as after reading `314e-` or `0.314e+`. +func stateESign(s *scanner, c int) int { + if '0' <= c && c <= '9' { + s.step = stateE0 + return scanContinue + } + return s.error(c, "in exponent of numeric literal") +} + +// stateE0 is the state after reading the mantissa, e, optional sign, +// and at least one digit of the exponent in a number, +// such as after reading `314e-2` or `0.314e+1` or `3.14e0`. +func stateE0(s *scanner, c int) int { + if '0' <= c && c <= '9' { + s.step = stateE0 + return scanContinue + } + return stateEndValue(s, c) +} + +// stateT is the state after reading `t`. +func stateT(s *scanner, c int) int { + if c == 'r' { + s.step = stateTr + return scanContinue + } + return s.error(c, "in literal true (expecting 'r')") +} + +// stateTr is the state after reading `tr`. +func stateTr(s *scanner, c int) int { + if c == 'u' { + s.step = stateTru + return scanContinue + } + return s.error(c, "in literal true (expecting 'u')") +} + +// stateTru is the state after reading `tru`. +func stateTru(s *scanner, c int) int { + if c == 'e' { + s.step = stateEndValue + return scanContinue + } + return s.error(c, "in literal true (expecting 'e')") +} + +// stateF is the state after reading `f`. +func stateF(s *scanner, c int) int { + if c == 'a' { + s.step = stateFa + return scanContinue + } + return s.error(c, "in literal false (expecting 'a')") +} + +// stateFa is the state after reading `fa`. +func stateFa(s *scanner, c int) int { + if c == 'l' { + s.step = stateFal + return scanContinue + } + return s.error(c, "in literal false (expecting 'l')") +} + +// stateFal is the state after reading `fal`. +func stateFal(s *scanner, c int) int { + if c == 's' { + s.step = stateFals + return scanContinue + } + return s.error(c, "in literal false (expecting 's')") +} + +// stateFals is the state after reading `fals`. +func stateFals(s *scanner, c int) int { + if c == 'e' { + s.step = stateEndValue + return scanContinue + } + return s.error(c, "in literal false (expecting 'e')") +} + +// stateN is the state after reading `n`. +func stateN(s *scanner, c int) int { + if c == 'u' { + s.step = stateNu + return scanContinue + } + return s.error(c, "in literal null (expecting 'u')") +} + +// stateNu is the state after reading `nu`. +func stateNu(s *scanner, c int) int { + if c == 'l' { + s.step = stateNul + return scanContinue + } + return s.error(c, "in literal null (expecting 'l')") +} + +// stateNul is the state after reading `nul`. +func stateNul(s *scanner, c int) int { + if c == 'l' { + s.step = stateEndValue + return scanContinue + } + return s.error(c, "in literal null (expecting 'l')") +} + +// stateError is the state after reaching a syntax error, +// such as after reading `[1}` or `5.1.2`. +func stateError(s *scanner, c int) int { + return scanError +} + +// error records an error and switches to the error state. +func (s *scanner) error(c int, context string) int { + s.step = stateError + s.err = &SyntaxError{"invalid character " + quoteChar(c) + " " + context, s.bytes} + return scanError +} + +// quoteChar formats c as a quoted character literal +func quoteChar(c int) string { + // special cases - different from quoted strings + if c == '\'' { + return `'\''` + } + if c == '"' { + return `'"'` + } + + // use quoted string with different quotation marks + s := strconv.Quote(string(c)) + return "'" + s[1:len(s)-1] + "'" +} + +// undo causes the scanner to return scanCode from the next state transition. +// This gives callers a simple 1-byte undo mechanism. +func (s *scanner) undo(scanCode int) { + if s.step == stateRedo { + panic("invalid use of scanner") + } + s.redoCode = scanCode + s.redoState = s.step + s.step = stateRedo +} + +// stateRedo helps implement the scanner's 1-byte undo. +func stateRedo(s *scanner, c int) int { + s.step = s.redoState + return s.redoCode +} diff --git a/libgo/go/encoding/json/scanner_test.go b/libgo/go/encoding/json/scanner_test.go new file mode 100644 index 0000000..a0a5995 --- /dev/null +++ b/libgo/go/encoding/json/scanner_test.go @@ -0,0 +1,302 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "bytes" + "math" + "math/rand" + "reflect" + "testing" +) + +// Tests of simple examples. + +type example struct { + compact string + indent string +} + +var examples = []example{ + {`1`, `1`}, + {`{}`, `{}`}, + {`[]`, `[]`}, + {`{"":2}`, "{\n\t\"\": 2\n}"}, + {`[3]`, "[\n\t3\n]"}, + {`[1,2,3]`, "[\n\t1,\n\t2,\n\t3\n]"}, + {`{"x":1}`, "{\n\t\"x\": 1\n}"}, + {ex1, ex1i}, +} + +var ex1 = `[true,false,null,"x",1,1.5,0,-5e+2]` + +var ex1i = `[ + true, + false, + null, + "x", + 1, + 1.5, + 0, + -5e+2 +]` + +func TestCompact(t *testing.T) { + var buf bytes.Buffer + for _, tt := range examples { + buf.Reset() + if err := Compact(&buf, []byte(tt.compact)); err != nil { + t.Errorf("Compact(%#q): %v", tt.compact, err) + } else if s := buf.String(); s != tt.compact { + t.Errorf("Compact(%#q) = %#q, want original", tt.compact, s) + } + + buf.Reset() + if err := Compact(&buf, []byte(tt.indent)); err != nil { + t.Errorf("Compact(%#q): %v", tt.indent, err) + continue + } else if s := buf.String(); s != tt.compact { + t.Errorf("Compact(%#q) = %#q, want %#q", tt.indent, s, tt.compact) + } + } +} + +func TestIndent(t *testing.T) { + var buf bytes.Buffer + for _, tt := range examples { + buf.Reset() + if err := Indent(&buf, []byte(tt.indent), "", "\t"); err != nil { + t.Errorf("Indent(%#q): %v", tt.indent, err) + } else if s := buf.String(); s != tt.indent { + t.Errorf("Indent(%#q) = %#q, want original", tt.indent, s) + } + + buf.Reset() + if err := Indent(&buf, []byte(tt.compact), "", "\t"); err != nil { + t.Errorf("Indent(%#q): %v", tt.compact, err) + continue + } else if s := buf.String(); s != tt.indent { + t.Errorf("Indent(%#q) = %#q, want %#q", tt.compact, s, tt.indent) + } + } +} + +// Tests of a large random structure. + +func TestCompactBig(t *testing.T) { + initBig() + var buf bytes.Buffer + if err := Compact(&buf, jsonBig); err != nil { + t.Fatalf("Compact: %v", err) + } + b := buf.Bytes() + if bytes.Compare(b, jsonBig) != 0 { + t.Error("Compact(jsonBig) != jsonBig") + diff(t, b, jsonBig) + return + } +} + +func TestIndentBig(t *testing.T) { + initBig() + var buf bytes.Buffer + if err := Indent(&buf, jsonBig, "", "\t"); err != nil { + t.Fatalf("Indent1: %v", err) + } + b := buf.Bytes() + if len(b) == len(jsonBig) { + // jsonBig is compact (no unnecessary spaces); + // indenting should make it bigger + t.Fatalf("Indent(jsonBig) did not get bigger") + } + + // should be idempotent + var buf1 bytes.Buffer + if err := Indent(&buf1, b, "", "\t"); err != nil { + t.Fatalf("Indent2: %v", err) + } + b1 := buf1.Bytes() + if bytes.Compare(b1, b) != 0 { + t.Error("Indent(Indent(jsonBig)) != Indent(jsonBig)") + diff(t, b1, b) + return + } + + // should get back to original + buf1.Reset() + if err := Compact(&buf1, b); err != nil { + t.Fatalf("Compact: %v", err) + } + b1 = buf1.Bytes() + if bytes.Compare(b1, jsonBig) != 0 { + t.Error("Compact(Indent(jsonBig)) != jsonBig") + diff(t, b1, jsonBig) + return + } +} + +type indentErrorTest struct { + in string + err error +} + +var indentErrorTests = []indentErrorTest{ + {`{"X": "foo", "Y"}`, &SyntaxError{"invalid character '}' after object key", 17}}, + {`{"X": "foo" "Y": "bar"}`, &SyntaxError{"invalid character '\"' after object key:value pair", 13}}, +} + +func TestIndentErrors(t *testing.T) { + for i, tt := range indentErrorTests { + slice := make([]uint8, 0) + buf := bytes.NewBuffer(slice) + if err := Indent(buf, []uint8(tt.in), "", ""); err != nil { + if !reflect.DeepEqual(err, tt.err) { + t.Errorf("#%d: Indent: %#v", i, err) + continue + } + } + } +} + +func TestNextValueBig(t *testing.T) { + initBig() + var scan scanner + item, rest, err := nextValue(jsonBig, &scan) + if err != nil { + t.Fatalf("nextValue: %s", err) + } + if len(item) != len(jsonBig) || &item[0] != &jsonBig[0] { + t.Errorf("invalid item: %d %d", len(item), len(jsonBig)) + } + if len(rest) != 0 { + t.Errorf("invalid rest: %d", len(rest)) + } + + item, rest, err = nextValue(append(jsonBig, "HELLO WORLD"...), &scan) + if err != nil { + t.Fatalf("nextValue extra: %s", err) + } + if len(item) != len(jsonBig) { + t.Errorf("invalid item: %d %d", len(item), len(jsonBig)) + } + if string(rest) != "HELLO WORLD" { + t.Errorf("invalid rest: %d", len(rest)) + } +} + +func BenchmarkSkipValue(b *testing.B) { + initBig() + var scan scanner + for i := 0; i < b.N; i++ { + nextValue(jsonBig, &scan) + } + b.SetBytes(int64(len(jsonBig))) +} + +func diff(t *testing.T, a, b []byte) { + for i := 0; ; i++ { + if i >= len(a) || i >= len(b) || a[i] != b[i] { + j := i - 10 + if j < 0 { + j = 0 + } + t.Errorf("diverge at %d: «%s» vs «%s»", i, trim(a[j:]), trim(b[j:])) + return + } + } +} + +func trim(b []byte) []byte { + if len(b) > 20 { + return b[0:20] + } + return b +} + +// Generate a random JSON object. + +var jsonBig []byte + +const ( + big = 10000 + small = 100 +) + +func initBig() { + n := big + if testing.Short() { + n = small + } + if len(jsonBig) != n { + b, err := Marshal(genValue(n)) + if err != nil { + panic(err) + } + jsonBig = b + } +} + +func genValue(n int) interface{} { + if n > 1 { + switch rand.Intn(2) { + case 0: + return genArray(n) + case 1: + return genMap(n) + } + } + switch rand.Intn(3) { + case 0: + return rand.Intn(2) == 0 + case 1: + return rand.NormFloat64() + case 2: + return genString(30) + } + panic("unreachable") +} + +func genString(stddev float64) string { + n := int(math.Abs(rand.NormFloat64()*stddev + stddev/2)) + c := make([]rune, n) + for i := range c { + f := math.Abs(rand.NormFloat64()*64 + 32) + if f > 0x10ffff { + f = 0x10ffff + } + c[i] = rune(f) + } + return string(c) +} + +func genArray(n int) []interface{} { + f := int(math.Abs(rand.NormFloat64()) * math.Min(10, float64(n/2))) + if f > n { + f = n + } + if n > 0 && f == 0 { + f = 1 + } + x := make([]interface{}, f) + for i := range x { + x[i] = genValue(((i+1)*n)/f - (i*n)/f) + } + return x +} + +func genMap(n int) map[string]interface{} { + f := int(math.Abs(rand.NormFloat64()) * math.Min(10, float64(n/2))) + if f > n { + f = n + } + if n > 0 && f == 0 { + f = 1 + } + x := make(map[string]interface{}) + for i := 0; i < f; i++ { + x[genString(10)] = genValue(((i+1)*n)/f - (i*n)/f) + } + return x +} diff --git a/libgo/go/encoding/json/stream.go b/libgo/go/encoding/json/stream.go new file mode 100644 index 0000000..f247639 --- /dev/null +++ b/libgo/go/encoding/json/stream.go @@ -0,0 +1,185 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "errors" + "io" +) + +// A Decoder reads and decodes JSON objects from an input stream. +type Decoder struct { + r io.Reader + buf []byte + d decodeState + scan scanner + err error +} + +// NewDecoder returns a new decoder that reads from r. +func NewDecoder(r io.Reader) *Decoder { + return &Decoder{r: r} +} + +// Decode reads the next JSON-encoded value from its +// input and stores it in the value pointed to by v. +// +// See the documentation for Unmarshal for details about +// the conversion of JSON into a Go value. +func (dec *Decoder) Decode(v interface{}) error { + if dec.err != nil { + return dec.err + } + + n, err := dec.readValue() + if err != nil { + return err + } + + // Don't save err from unmarshal into dec.err: + // the connection is still usable since we read a complete JSON + // object from it before the error happened. + dec.d.init(dec.buf[0:n]) + err = dec.d.unmarshal(v) + + // Slide rest of data down. + rest := copy(dec.buf, dec.buf[n:]) + dec.buf = dec.buf[0:rest] + + return err +} + +// readValue reads a JSON value into dec.buf. +// It returns the length of the encoding. +func (dec *Decoder) readValue() (int, error) { + dec.scan.reset() + + scanp := 0 + var err error +Input: + for { + // Look in the buffer for a new value. + for i, c := range dec.buf[scanp:] { + dec.scan.bytes++ + v := dec.scan.step(&dec.scan, int(c)) + if v == scanEnd { + scanp += i + break Input + } + // scanEnd is delayed one byte. + // We might block trying to get that byte from src, + // so instead invent a space byte. + if v == scanEndObject && dec.scan.step(&dec.scan, ' ') == scanEnd { + scanp += i + 1 + break Input + } + if v == scanError { + dec.err = dec.scan.err + return 0, dec.scan.err + } + } + scanp = len(dec.buf) + + // Did the last read have an error? + // Delayed until now to allow buffer scan. + if err != nil { + if err == io.EOF { + if dec.scan.step(&dec.scan, ' ') == scanEnd { + break Input + } + if nonSpace(dec.buf) { + err = io.ErrUnexpectedEOF + } + } + dec.err = err + return 0, err + } + + // Make room to read more into the buffer. + const minRead = 512 + if cap(dec.buf)-len(dec.buf) < minRead { + newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead) + copy(newBuf, dec.buf) + dec.buf = newBuf + } + + // Read. Delay error for next iteration (after scan). + var n int + n, err = dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)]) + dec.buf = dec.buf[0 : len(dec.buf)+n] + } + return scanp, nil +} + +func nonSpace(b []byte) bool { + for _, c := range b { + if !isSpace(rune(c)) { + return true + } + } + return false +} + +// An Encoder writes JSON objects to an output stream. +type Encoder struct { + w io.Writer + e encodeState + err error +} + +// NewEncoder returns a new encoder that writes to w. +func NewEncoder(w io.Writer) *Encoder { + return &Encoder{w: w} +} + +// Encode writes the JSON encoding of v to the connection. +// +// See the documentation for Marshal for details about the +// conversion of Go values to JSON. +func (enc *Encoder) Encode(v interface{}) error { + if enc.err != nil { + return enc.err + } + enc.e.Reset() + err := enc.e.marshal(v) + if err != nil { + return err + } + + // Terminate each value with a newline. + // This makes the output look a little nicer + // when debugging, and some kind of space + // is required if the encoded value was a number, + // so that the reader knows there aren't more + // digits coming. + enc.e.WriteByte('\n') + + if _, err = enc.w.Write(enc.e.Bytes()); err != nil { + enc.err = err + } + return err +} + +// RawMessage is a raw encoded JSON object. +// It implements Marshaler and Unmarshaler and can +// be used to delay JSON decoding or precompute a JSON encoding. +type RawMessage []byte + +// MarshalJSON returns *m as the JSON encoding of m. +func (m *RawMessage) MarshalJSON() ([]byte, error) { + return *m, nil +} + +// UnmarshalJSON sets *m to a copy of data. +func (m *RawMessage) UnmarshalJSON(data []byte) error { + if m == nil { + return errors.New("json.RawMessage: UnmarshalJSON on nil pointer") + } + *m = append((*m)[0:0], data...) + return nil +} + +var _ Marshaler = (*RawMessage)(nil) +var _ Unmarshaler = (*RawMessage)(nil) diff --git a/libgo/go/encoding/json/stream_test.go b/libgo/go/encoding/json/stream_test.go new file mode 100644 index 0000000..ce5a7e6 --- /dev/null +++ b/libgo/go/encoding/json/stream_test.go @@ -0,0 +1,147 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "bytes" + "reflect" + "testing" +) + +// Test values for the stream test. +// One of each JSON kind. +var streamTest = []interface{}{ + 0.1, + "hello", + nil, + true, + false, + []interface{}{"a", "b", "c"}, + map[string]interface{}{"K": "Kelvin", "ß": "long s"}, + 3.14, // another value to make sure something can follow map +} + +var streamEncoded = `0.1 +"hello" +null +true +false +["a","b","c"] +{"ß":"long s","K":"Kelvin"} +3.14 +` + +func TestEncoder(t *testing.T) { + for i := 0; i <= len(streamTest); i++ { + var buf bytes.Buffer + enc := NewEncoder(&buf) + for j, v := range streamTest[0:i] { + if err := enc.Encode(v); err != nil { + t.Fatalf("encode #%d: %v", j, err) + } + } + if have, want := buf.String(), nlines(streamEncoded, i); have != want { + t.Errorf("encoding %d items: mismatch", i) + diff(t, []byte(have), []byte(want)) + break + } + } +} + +func TestDecoder(t *testing.T) { + for i := 0; i <= len(streamTest); i++ { + // Use stream without newlines as input, + // just to stress the decoder even more. + // Our test input does not include back-to-back numbers. + // Otherwise stripping the newlines would + // merge two adjacent JSON values. + var buf bytes.Buffer + for _, c := range nlines(streamEncoded, i) { + if c != '\n' { + buf.WriteRune(c) + } + } + out := make([]interface{}, i) + dec := NewDecoder(&buf) + for j := range out { + if err := dec.Decode(&out[j]); err != nil { + t.Fatalf("decode #%d/%d: %v", j, i, err) + } + } + if !reflect.DeepEqual(out, streamTest[0:i]) { + t.Errorf("decoding %d items: mismatch", i) + for j := range out { + if !reflect.DeepEqual(out[j], streamTest[j]) { + t.Errorf("#%d: have %v want %v", j, out[j], streamTest[j]) + } + } + break + } + } +} + +func nlines(s string, n int) string { + if n <= 0 { + return "" + } + for i, c := range s { + if c == '\n' { + if n--; n == 0 { + return s[0 : i+1] + } + } + } + return s +} + +func TestRawMessage(t *testing.T) { + // TODO(rsc): Should not need the * in *RawMessage + var data struct { + X float64 + Id *RawMessage + Y float32 + } + const raw = `["\u0056",null]` + const msg = `{"X":0.1,"Id":["\u0056",null],"Y":0.2}` + err := Unmarshal([]byte(msg), &data) + if err != nil { + t.Fatalf("Unmarshal: %v", err) + } + if string([]byte(*data.Id)) != raw { + t.Fatalf("Raw mismatch: have %#q want %#q", []byte(*data.Id), raw) + } + b, err := Marshal(&data) + if err != nil { + t.Fatalf("Marshal: %v", err) + } + if string(b) != msg { + t.Fatalf("Marshal: have %#q want %#q", b, msg) + } +} + +func TestNullRawMessage(t *testing.T) { + // TODO(rsc): Should not need the * in *RawMessage + var data struct { + X float64 + Id *RawMessage + Y float32 + } + data.Id = new(RawMessage) + const msg = `{"X":0.1,"Id":null,"Y":0.2}` + err := Unmarshal([]byte(msg), &data) + if err != nil { + t.Fatalf("Unmarshal: %v", err) + } + if data.Id != nil { + t.Fatalf("Raw mismatch: have non-nil, want nil") + } + b, err := Marshal(&data) + if err != nil { + t.Fatalf("Marshal: %v", err) + } + if string(b) != msg { + t.Fatalf("Marshal: have %#q want %#q", b, msg) + } +} diff --git a/libgo/go/encoding/json/tagkey_test.go b/libgo/go/encoding/json/tagkey_test.go new file mode 100644 index 0000000..31fe2be --- /dev/null +++ b/libgo/go/encoding/json/tagkey_test.go @@ -0,0 +1,95 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "testing" +) + +type basicLatin2xTag struct { + V string `json:"$-"` +} + +type basicLatin3xTag struct { + V string `json:"0123456789"` +} + +type basicLatin4xTag struct { + V string `json:"ABCDEFGHIJKLMO"` +} + +type basicLatin5xTag struct { + V string `json:"PQRSTUVWXYZ_"` +} + +type basicLatin6xTag struct { + V string `json:"abcdefghijklmno"` +} + +type basicLatin7xTag struct { + V string `json:"pqrstuvwxyz"` +} + +type miscPlaneTag struct { + V string `json:"色は匂へど"` +} + +type emptyTag struct { + W string +} + +type misnamedTag struct { + X string `jsom:"Misnamed"` +} + +type badFormatTag struct { + Y string `:"BadFormat"` +} + +type badCodeTag struct { + Z string `json:" !\"#%&'()*+,./"` +} + +var structTagObjectKeyTests = []struct { + raw interface{} + value string + key string +}{ + {basicLatin2xTag{"2x"}, "2x", "$-"}, + {basicLatin3xTag{"3x"}, "3x", "0123456789"}, + {basicLatin4xTag{"4x"}, "4x", "ABCDEFGHIJKLMO"}, + {basicLatin5xTag{"5x"}, "5x", "PQRSTUVWXYZ_"}, + {basicLatin6xTag{"6x"}, "6x", "abcdefghijklmno"}, + {basicLatin7xTag{"7x"}, "7x", "pqrstuvwxyz"}, + {miscPlaneTag{"いろはにほへと"}, "いろはにほへと", "色は匂へど"}, + {emptyTag{"Pour Moi"}, "Pour Moi", "W"}, + {misnamedTag{"Animal Kingdom"}, "Animal Kingdom", "X"}, + {badFormatTag{"Orfevre"}, "Orfevre", "Y"}, + {badCodeTag{"Reliable Man"}, "Reliable Man", "Z"}, +} + +func TestStructTagObjectKey(t *testing.T) { + for _, tt := range structTagObjectKeyTests { + b, err := Marshal(tt.raw) + if err != nil { + t.Fatalf("Marshal(%#q) failed: %v", tt.raw, err) + } + var f interface{} + err = Unmarshal(b, &f) + if err != nil { + t.Fatalf("Unmarshal(%#q) failed: %v", b, err) + } + for i, v := range f.(map[string]interface{}) { + switch i { + case tt.key: + if s, ok := v.(string); !ok || s != tt.value { + t.Fatalf("Unexpected value: %#q, want %v", s, tt.value) + } + default: + t.Fatalf("Unexpected key: %#q", i) + } + } + } +} diff --git a/libgo/go/encoding/json/tags.go b/libgo/go/encoding/json/tags.go new file mode 100644 index 0000000..58cda20 --- /dev/null +++ b/libgo/go/encoding/json/tags.go @@ -0,0 +1,44 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "strings" +) + +// tagOptions is the string following a comma in a struct field's "json" +// tag, or the empty string. It does not include the leading comma. +type tagOptions string + +// parseTag splits a struct field's json tag into its name and +// comma-separated options. +func parseTag(tag string) (string, tagOptions) { + if idx := strings.Index(tag, ","); idx != -1 { + return tag[:idx], tagOptions(tag[idx+1:]) + } + return tag, tagOptions("") +} + +// Contains returns whether checks that a comma-separated list of options +// contains a particular substr flag. substr must be surrounded by a +// string boundary or commas. +func (o tagOptions) Contains(optionName string) bool { + if len(o) == 0 { + return false + } + s := string(o) + for s != "" { + var next string + i := strings.Index(s, ",") + if i >= 0 { + s, next = s[:i], s[i+1:] + } + if s == optionName { + return true + } + s = next + } + return false +} diff --git a/libgo/go/encoding/json/tags_test.go b/libgo/go/encoding/json/tags_test.go new file mode 100644 index 0000000..91fb188 --- /dev/null +++ b/libgo/go/encoding/json/tags_test.go @@ -0,0 +1,28 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "testing" +) + +func TestTagParsing(t *testing.T) { + name, opts := parseTag("field,foobar,foo") + if name != "field" { + t.Fatalf("name = %q, want field", name) + } + for _, tt := range []struct { + opt string + want bool + }{ + {"foobar", true}, + {"foo", true}, + {"bar", false}, + } { + if opts.Contains(tt.opt) != tt.want { + t.Errorf("Contains(%q) = %v", tt.opt, !tt.want) + } + } +} diff --git a/libgo/go/encoding/xml/atom_test.go b/libgo/go/encoding/xml/atom_test.go new file mode 100644 index 0000000..d365510 --- /dev/null +++ b/libgo/go/encoding/xml/atom_test.go @@ -0,0 +1,50 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xml + +var atomValue = &Feed{ + Title: "Example Feed", + Link: []Link{{Href: "http://example.org/"}}, + Updated: ParseTime("2003-12-13T18:30:02Z"), + Author: Person{Name: "John Doe"}, + Id: "urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6", + + Entry: []Entry{ + { + Title: "Atom-Powered Robots Run Amok", + Link: []Link{{Href: "http://example.org/2003/12/13/atom03"}}, + Id: "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a", + Updated: ParseTime("2003-12-13T18:30:02Z"), + Summary: NewText("Some text."), + }, + }, +} + +var atomXml = `` + + `<feed xmlns="http://www.w3.org/2005/Atom">` + + `<Title>Example Feed</Title>` + + `<Id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</Id>` + + `<Link href="http://example.org/"></Link>` + + `<Updated>2003-12-13T18:30:02Z</Updated>` + + `<Author><Name>John Doe</Name><URI></URI><Email></Email></Author>` + + `<Entry>` + + `<Title>Atom-Powered Robots Run Amok</Title>` + + `<Id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</Id>` + + `<Link href="http://example.org/2003/12/13/atom03"></Link>` + + `<Updated>2003-12-13T18:30:02Z</Updated>` + + `<Author><Name></Name><URI></URI><Email></Email></Author>` + + `<Summary>Some text.</Summary>` + + `</Entry>` + + `</feed>` + +func ParseTime(str string) Time { + return Time(str) +} + +func NewText(text string) Text { + return Text{ + Body: text, + } +} diff --git a/libgo/go/encoding/xml/embed_test.go b/libgo/go/encoding/xml/embed_test.go new file mode 100644 index 0000000..ec7f478 --- /dev/null +++ b/libgo/go/encoding/xml/embed_test.go @@ -0,0 +1,124 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xml + +import "testing" + +type C struct { + Name string + Open bool +} + +type A struct { + XMLName Name `xml:"http://domain a"` + C + B B + FieldA string +} + +type B struct { + XMLName Name `xml:"b"` + C + FieldB string +} + +const _1a = ` +<?xml version="1.0" encoding="UTF-8"?> +<a xmlns="http://domain"> + <name>KmlFile</name> + <open>1</open> + <b> + <name>Absolute</name> + <open>0</open> + <fieldb>bar</fieldb> + </b> + <fielda>foo</fielda> +</a> +` + +// Tests that embedded structs are marshalled. +func TestEmbedded1(t *testing.T) { + var a A + if e := Unmarshal(StringReader(_1a), &a); e != nil { + t.Fatalf("Unmarshal: %s", e) + } + if a.FieldA != "foo" { + t.Fatalf("Unmarshal: expected 'foo' but found '%s'", a.FieldA) + } + if a.Name != "KmlFile" { + t.Fatalf("Unmarshal: expected 'KmlFile' but found '%s'", a.Name) + } + if !a.Open { + t.Fatal("Unmarshal: expected 'true' but found otherwise") + } + if a.B.FieldB != "bar" { + t.Fatalf("Unmarshal: expected 'bar' but found '%s'", a.B.FieldB) + } + if a.B.Name != "Absolute" { + t.Fatalf("Unmarshal: expected 'Absolute' but found '%s'", a.B.Name) + } + if a.B.Open { + t.Fatal("Unmarshal: expected 'false' but found otherwise") + } +} + +type A2 struct { + XMLName Name `xml:"http://domain a"` + XY string + Xy string +} + +const _2a = ` +<?xml version="1.0" encoding="UTF-8"?> +<a xmlns="http://domain"> + <xy>foo</xy> +</a> +` + +// Tests that conflicting field names get excluded. +func TestEmbedded2(t *testing.T) { + var a A2 + if e := Unmarshal(StringReader(_2a), &a); e != nil { + t.Fatalf("Unmarshal: %s", e) + } + if a.XY != "" { + t.Fatalf("Unmarshal: expected empty string but found '%s'", a.XY) + } + if a.Xy != "" { + t.Fatalf("Unmarshal: expected empty string but found '%s'", a.Xy) + } +} + +type A3 struct { + XMLName Name `xml:"http://domain a"` + xy string +} + +// Tests that private fields are not set. +func TestEmbedded3(t *testing.T) { + var a A3 + if e := Unmarshal(StringReader(_2a), &a); e != nil { + t.Fatalf("Unmarshal: %s", e) + } + if a.xy != "" { + t.Fatalf("Unmarshal: expected empty string but found '%s'", a.xy) + } +} + +type A4 struct { + XMLName Name `xml:"http://domain a"` + Any string +} + +// Tests that private fields are not set. +func TestEmbedded4(t *testing.T) { + var a A4 + if e := Unmarshal(StringReader(_2a), &a); e != nil { + t.Fatalf("Unmarshal: %s", e) + } + if a.Any != "foo" { + t.Fatalf("Unmarshal: expected 'foo' but found '%s'", a.Any) + } +} diff --git a/libgo/go/encoding/xml/marshal.go b/libgo/go/encoding/xml/marshal.go new file mode 100644 index 0000000..691b70d --- /dev/null +++ b/libgo/go/encoding/xml/marshal.go @@ -0,0 +1,304 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xml + +import ( + "bufio" + "io" + "reflect" + "strconv" + "strings" +) + +const ( + // A generic XML header suitable for use with the output of Marshal and + // MarshalIndent. This is not automatically added to any output of this + // package, it is provided as a convenience. + Header = `<?xml version="1.0" encoding="UTF-8"?>` + "\n" +) + +// A Marshaler can produce well-formatted XML representing its internal state. +// It is used by both Marshal and MarshalIndent. +type Marshaler interface { + MarshalXML() ([]byte, error) +} + +type printer struct { + *bufio.Writer +} + +// Marshal writes an XML-formatted representation of v to w. +// +// If v implements Marshaler, then Marshal calls its MarshalXML method. +// Otherwise, Marshal uses the following procedure to create the XML. +// +// Marshal handles an array or slice by marshalling each of the elements. +// Marshal handles a pointer by marshalling the value it points at or, if the +// pointer is nil, by writing nothing. Marshal handles an interface value by +// marshalling the value it contains or, if the interface value is nil, by +// writing nothing. Marshal handles all other data by writing one or more XML +// elements containing the data. +// +// The name for the XML elements is taken from, in order of preference: +// - the tag on an XMLName field, if the data is a struct +// - the value of an XMLName field of type xml.Name +// - the tag of the struct field used to obtain the data +// - the name of the struct field used to obtain the data +// - the name '???'. +// +// The XML element for a struct contains marshalled elements for each of the +// exported fields of the struct, with these exceptions: +// - the XMLName field, described above, is omitted. +// - a field with tag "attr" becomes an attribute in the XML element. +// - a field with tag "chardata" is written as character data, +// not as an XML element. +// - a field with tag "innerxml" is written verbatim, +// not subject to the usual marshalling procedure. +// +// If a field uses a tag "a>b>c", then the element c will be nested inside +// parent elements a and b. Fields that appear next to each other that name +// the same parent will be enclosed in one XML element. For example: +// +// type Result struct { +// XMLName xml.Name `xml:"result"` +// FirstName string `xml:"person>name>first"` +// LastName string `xml:"person>name>last"` +// Age int `xml:"person>age"` +// } +// +// xml.Marshal(w, &Result{FirstName: "John", LastName: "Doe", Age: 42}) +// +// would be marshalled as: +// +// <result> +// <person> +// <name> +// <first>John</first> +// <last>Doe</last> +// </name> +// <age>42</age> +// </person> +// </result> +// +// Marshal will return an error if asked to marshal a channel, function, or map. +func Marshal(w io.Writer, v interface{}) (err error) { + p := &printer{bufio.NewWriter(w)} + err = p.marshalValue(reflect.ValueOf(v), "???") + p.Flush() + return err +} + +func (p *printer) marshalValue(val reflect.Value, name string) error { + if !val.IsValid() { + return nil + } + + kind := val.Kind() + typ := val.Type() + + // Try Marshaler + if typ.NumMethod() > 0 { + if marshaler, ok := val.Interface().(Marshaler); ok { + bytes, err := marshaler.MarshalXML() + if err != nil { + return err + } + p.Write(bytes) + return nil + } + } + + // Drill into pointers/interfaces + if kind == reflect.Ptr || kind == reflect.Interface { + if val.IsNil() { + return nil + } + return p.marshalValue(val.Elem(), name) + } + + // Slices and arrays iterate over the elements. They do not have an enclosing tag. + if (kind == reflect.Slice || kind == reflect.Array) && typ.Elem().Kind() != reflect.Uint8 { + for i, n := 0, val.Len(); i < n; i++ { + if err := p.marshalValue(val.Index(i), name); err != nil { + return err + } + } + return nil + } + + // Find XML name + xmlns := "" + if kind == reflect.Struct { + if f, ok := typ.FieldByName("XMLName"); ok { + if tag := f.Tag.Get("xml"); tag != "" { + if i := strings.Index(tag, " "); i >= 0 { + xmlns, name = tag[:i], tag[i+1:] + } else { + name = tag + } + } else if v, ok := val.FieldByIndex(f.Index).Interface().(Name); ok && v.Local != "" { + xmlns, name = v.Space, v.Local + } + } + } + + p.WriteByte('<') + p.WriteString(name) + + // Attributes + if kind == reflect.Struct { + if len(xmlns) > 0 { + p.WriteString(` xmlns="`) + Escape(p, []byte(xmlns)) + p.WriteByte('"') + } + + for i, n := 0, typ.NumField(); i < n; i++ { + if f := typ.Field(i); f.PkgPath == "" && f.Tag.Get("xml") == "attr" { + if f.Type.Kind() == reflect.String { + if str := val.Field(i).String(); str != "" { + p.WriteByte(' ') + p.WriteString(strings.ToLower(f.Name)) + p.WriteString(`="`) + Escape(p, []byte(str)) + p.WriteByte('"') + } + } + } + } + } + p.WriteByte('>') + + switch k := val.Kind(); k { + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + p.WriteString(strconv.Itoa64(val.Int())) + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + p.WriteString(strconv.Uitoa64(val.Uint())) + case reflect.Float32, reflect.Float64: + p.WriteString(strconv.Ftoa64(val.Float(), 'g', -1)) + case reflect.String: + Escape(p, []byte(val.String())) + case reflect.Bool: + p.WriteString(strconv.Btoa(val.Bool())) + case reflect.Array: + // will be [...]byte + bytes := make([]byte, val.Len()) + for i := range bytes { + bytes[i] = val.Index(i).Interface().(byte) + } + Escape(p, bytes) + case reflect.Slice: + // will be []byte + bytes := val.Interface().([]byte) + Escape(p, bytes) + case reflect.Struct: + s := parentStack{printer: p} + for i, n := 0, val.NumField(); i < n; i++ { + if f := typ.Field(i); f.Name != "XMLName" && f.PkgPath == "" { + name := f.Name + vf := val.Field(i) + switch tag := f.Tag.Get("xml"); tag { + case "": + s.trim(nil) + case "chardata": + if tk := f.Type.Kind(); tk == reflect.String { + Escape(p, []byte(vf.String())) + } else if tk == reflect.Slice { + if elem, ok := vf.Interface().([]byte); ok { + Escape(p, elem) + } + } + continue + case "innerxml": + iface := vf.Interface() + switch raw := iface.(type) { + case []byte: + p.Write(raw) + continue + case string: + p.WriteString(raw) + continue + } + case "attr": + continue + default: + parents := strings.Split(tag, ">") + if len(parents) == 1 { + parents, name = nil, tag + } else { + parents, name = parents[:len(parents)-1], parents[len(parents)-1] + if parents[0] == "" { + parents[0] = f.Name + } + } + + s.trim(parents) + if !(vf.Kind() == reflect.Ptr || vf.Kind() == reflect.Interface) || !vf.IsNil() { + s.push(parents[len(s.stack):]) + } + } + + if err := p.marshalValue(vf, name); err != nil { + return err + } + } + } + s.trim(nil) + default: + return &UnsupportedTypeError{typ} + } + + p.WriteByte('<') + p.WriteByte('/') + p.WriteString(name) + p.WriteByte('>') + + return nil +} + +type parentStack struct { + *printer + stack []string +} + +// trim updates the XML context to match the longest common prefix of the stack +// and the given parents. A closing tag will be written for every parent +// popped. Passing a zero slice or nil will close all the elements. +func (s *parentStack) trim(parents []string) { + split := 0 + for ; split < len(parents) && split < len(s.stack); split++ { + if parents[split] != s.stack[split] { + break + } + } + + for i := len(s.stack) - 1; i >= split; i-- { + s.WriteString("</") + s.WriteString(s.stack[i]) + s.WriteByte('>') + } + + s.stack = parents[:split] +} + +// push adds parent elements to the stack and writes open tags. +func (s *parentStack) push(parents []string) { + for i := 0; i < len(parents); i++ { + s.WriteString("<") + s.WriteString(parents[i]) + s.WriteByte('>') + } + s.stack = append(s.stack, parents...) +} + +// A MarshalXMLError is returned when Marshal or MarshalIndent encounter a type +// that cannot be converted into XML. +type UnsupportedTypeError struct { + Type reflect.Type +} + +func (e *UnsupportedTypeError) Error() string { + return "xml: unsupported type: " + e.Type.String() +} diff --git a/libgo/go/encoding/xml/marshal_test.go b/libgo/go/encoding/xml/marshal_test.go new file mode 100644 index 0000000..a6f7d2d --- /dev/null +++ b/libgo/go/encoding/xml/marshal_test.go @@ -0,0 +1,423 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xml + +import ( + "bytes" + "reflect" + "strconv" + "strings" + "testing" +) + +type DriveType int + +const ( + HyperDrive DriveType = iota + ImprobabilityDrive +) + +type Passenger struct { + Name []string `xml:"name"` + Weight float32 `xml:"weight"` +} + +type Ship struct { + XMLName Name `xml:"spaceship"` + + Name string `xml:"attr"` + Pilot string `xml:"attr"` + Drive DriveType `xml:"drive"` + Age uint `xml:"age"` + Passenger []*Passenger `xml:"passenger"` + secret string +} + +type RawXML string + +func (rx RawXML) MarshalXML() ([]byte, error) { + return []byte(rx), nil +} + +type NamedType string + +type Port struct { + XMLName Name `xml:"port"` + Type string `xml:"attr"` + Number string `xml:"chardata"` +} + +type Domain struct { + XMLName Name `xml:"domain"` + Country string `xml:"attr"` + Name []byte `xml:"chardata"` +} + +type Book struct { + XMLName Name `xml:"book"` + Title string `xml:"chardata"` +} + +type SecretAgent struct { + XMLName Name `xml:"agent"` + Handle string `xml:"attr"` + Identity string + Obfuscate string `xml:"innerxml"` +} + +type NestedItems struct { + XMLName Name `xml:"result"` + Items []string `xml:">item"` + Item1 []string `xml:"Items>item1"` +} + +type NestedOrder struct { + XMLName Name `xml:"result"` + Field1 string `xml:"parent>c"` + Field2 string `xml:"parent>b"` + Field3 string `xml:"parent>a"` +} + +type MixedNested struct { + XMLName Name `xml:"result"` + A string `xml:"parent1>a"` + B string `xml:"b"` + C string `xml:"parent1>parent2>c"` + D string `xml:"parent1>d"` +} + +type NilTest struct { + A interface{} `xml:"parent1>parent2>a"` + B interface{} `xml:"parent1>b"` + C interface{} `xml:"parent1>parent2>c"` +} + +type Service struct { + XMLName Name `xml:"service"` + Domain *Domain `xml:"host>domain"` + Port *Port `xml:"host>port"` + Extra1 interface{} + Extra2 interface{} `xml:"host>extra2"` +} + +var nilStruct *Ship + +var marshalTests = []struct { + Value interface{} + ExpectXML string +}{ + // Test nil marshals to nothing + {Value: nil, ExpectXML: ``}, + {Value: nilStruct, ExpectXML: ``}, + + // Test value types (no tag name, so ???) + {Value: true, ExpectXML: `<???>true</???>`}, + {Value: int(42), ExpectXML: `<???>42</???>`}, + {Value: int8(42), ExpectXML: `<???>42</???>`}, + {Value: int16(42), ExpectXML: `<???>42</???>`}, + {Value: int32(42), ExpectXML: `<???>42</???>`}, + {Value: uint(42), ExpectXML: `<???>42</???>`}, + {Value: uint8(42), ExpectXML: `<???>42</???>`}, + {Value: uint16(42), ExpectXML: `<???>42</???>`}, + {Value: uint32(42), ExpectXML: `<???>42</???>`}, + {Value: float32(1.25), ExpectXML: `<???>1.25</???>`}, + {Value: float64(1.25), ExpectXML: `<???>1.25</???>`}, + {Value: uintptr(0xFFDD), ExpectXML: `<???>65501</???>`}, + {Value: "gopher", ExpectXML: `<???>gopher</???>`}, + {Value: []byte("gopher"), ExpectXML: `<???>gopher</???>`}, + {Value: "</>", ExpectXML: `<???></></???>`}, + {Value: []byte("</>"), ExpectXML: `<???></></???>`}, + {Value: [3]byte{'<', '/', '>'}, ExpectXML: `<???></></???>`}, + {Value: NamedType("potato"), ExpectXML: `<???>potato</???>`}, + {Value: []int{1, 2, 3}, ExpectXML: `<???>1</???><???>2</???><???>3</???>`}, + {Value: [3]int{1, 2, 3}, ExpectXML: `<???>1</???><???>2</???><???>3</???>`}, + + // Test innerxml + {Value: RawXML("</>"), ExpectXML: `</>`}, + { + Value: &SecretAgent{ + Handle: "007", + Identity: "James Bond", + Obfuscate: "<redacted/>", + }, + //ExpectXML: `<agent handle="007"><redacted/></agent>`, + ExpectXML: `<agent handle="007"><Identity>James Bond</Identity><redacted/></agent>`, + }, + + // Test structs + {Value: &Port{Type: "ssl", Number: "443"}, ExpectXML: `<port type="ssl">443</port>`}, + {Value: &Port{Number: "443"}, ExpectXML: `<port>443</port>`}, + {Value: &Port{Type: "<unix>"}, ExpectXML: `<port type="<unix>"></port>`}, + {Value: &Domain{Name: []byte("google.com&friends")}, ExpectXML: `<domain>google.com&friends</domain>`}, + {Value: &Book{Title: "Pride & Prejudice"}, ExpectXML: `<book>Pride & Prejudice</book>`}, + {Value: atomValue, ExpectXML: atomXml}, + { + Value: &Ship{ + Name: "Heart of Gold", + Pilot: "Computer", + Age: 1, + Drive: ImprobabilityDrive, + Passenger: []*Passenger{ + &Passenger{ + Name: []string{"Zaphod", "Beeblebrox"}, + Weight: 7.25, + }, + &Passenger{ + Name: []string{"Trisha", "McMillen"}, + Weight: 5.5, + }, + &Passenger{ + Name: []string{"Ford", "Prefect"}, + Weight: 7, + }, + &Passenger{ + Name: []string{"Arthur", "Dent"}, + Weight: 6.75, + }, + }, + }, + ExpectXML: `<spaceship name="Heart of Gold" pilot="Computer">` + + `<drive>` + strconv.Itoa(int(ImprobabilityDrive)) + `</drive>` + + `<age>1</age>` + + `<passenger>` + + `<name>Zaphod</name>` + + `<name>Beeblebrox</name>` + + `<weight>7.25</weight>` + + `</passenger>` + + `<passenger>` + + `<name>Trisha</name>` + + `<name>McMillen</name>` + + `<weight>5.5</weight>` + + `</passenger>` + + `<passenger>` + + `<name>Ford</name>` + + `<name>Prefect</name>` + + `<weight>7</weight>` + + `</passenger>` + + `<passenger>` + + `<name>Arthur</name>` + + `<name>Dent</name>` + + `<weight>6.75</weight>` + + `</passenger>` + + `</spaceship>`, + }, + // Test a>b + { + Value: NestedItems{Items: []string{}, Item1: []string{}}, + ExpectXML: `<result>` + + `<Items>` + + `</Items>` + + `</result>`, + }, + { + Value: NestedItems{Items: []string{}, Item1: []string{"A"}}, + ExpectXML: `<result>` + + `<Items>` + + `<item1>A</item1>` + + `</Items>` + + `</result>`, + }, + { + Value: NestedItems{Items: []string{"A", "B"}, Item1: []string{}}, + ExpectXML: `<result>` + + `<Items>` + + `<item>A</item>` + + `<item>B</item>` + + `</Items>` + + `</result>`, + }, + { + Value: NestedItems{Items: []string{"A", "B"}, Item1: []string{"C"}}, + ExpectXML: `<result>` + + `<Items>` + + `<item>A</item>` + + `<item>B</item>` + + `<item1>C</item1>` + + `</Items>` + + `</result>`, + }, + { + Value: NestedOrder{Field1: "C", Field2: "B", Field3: "A"}, + ExpectXML: `<result>` + + `<parent>` + + `<c>C</c>` + + `<b>B</b>` + + `<a>A</a>` + + `</parent>` + + `</result>`, + }, + { + Value: NilTest{A: "A", B: nil, C: "C"}, + ExpectXML: `<???>` + + `<parent1>` + + `<parent2><a>A</a></parent2>` + + `<parent2><c>C</c></parent2>` + + `</parent1>` + + `</???>`, + }, + { + Value: MixedNested{A: "A", B: "B", C: "C", D: "D"}, + ExpectXML: `<result>` + + `<parent1><a>A</a></parent1>` + + `<b>B</b>` + + `<parent1>` + + `<parent2><c>C</c></parent2>` + + `<d>D</d>` + + `</parent1>` + + `</result>`, + }, + { + Value: Service{Port: &Port{Number: "80"}}, + ExpectXML: `<service><host><port>80</port></host></service>`, + }, + { + Value: Service{}, + ExpectXML: `<service></service>`, + }, + { + Value: Service{Port: &Port{Number: "80"}, Extra1: "A", Extra2: "B"}, + ExpectXML: `<service>` + + `<host><port>80</port></host>` + + `<Extra1>A</Extra1>` + + `<host><extra2>B</extra2></host>` + + `</service>`, + }, + { + Value: Service{Port: &Port{Number: "80"}, Extra2: "example"}, + ExpectXML: `<service>` + + `<host><port>80</port></host>` + + `<host><extra2>example</extra2></host>` + + `</service>`, + }, +} + +func TestMarshal(t *testing.T) { + for idx, test := range marshalTests { + buf := bytes.NewBuffer(nil) + err := Marshal(buf, test.Value) + if err != nil { + t.Errorf("#%d: Error: %s", idx, err) + continue + } + if got, want := buf.String(), test.ExpectXML; got != want { + if strings.Contains(want, "\n") { + t.Errorf("#%d: marshal(%#v) - GOT:\n%s\nWANT:\n%s", idx, test.Value, got, want) + } else { + t.Errorf("#%d: marshal(%#v) = %#q want %#q", idx, test.Value, got, want) + } + } + } +} + +var marshalErrorTests = []struct { + Value interface{} + Err string + Kind reflect.Kind +}{ + { + Value: make(chan bool), + Err: "xml: unsupported type: chan bool", + Kind: reflect.Chan, + }, + { + Value: map[string]string{ + "question": "What do you get when you multiply six by nine?", + "answer": "42", + }, + Err: "xml: unsupported type: map[string] string", + Kind: reflect.Map, + }, + { + Value: map[*Ship]bool{nil: false}, + Err: "xml: unsupported type: map[*xml.Ship] bool", + Kind: reflect.Map, + }, +} + +func TestMarshalErrors(t *testing.T) { + for idx, test := range marshalErrorTests { + buf := bytes.NewBuffer(nil) + err := Marshal(buf, test.Value) + if err == nil || err.Error() != test.Err { + t.Errorf("#%d: marshal(%#v) = [error] %q, want %q", idx, test.Value, err, test.Err) + } + if kind := err.(*UnsupportedTypeError).Type.Kind(); kind != test.Kind { + t.Errorf("#%d: marshal(%#v) = [error kind] %s, want %s", idx, test.Value, kind, test.Kind) + } + } +} + +// Do invertibility testing on the various structures that we test +func TestUnmarshal(t *testing.T) { + for i, test := range marshalTests { + // Skip the nil pointers + if i <= 1 { + continue + } + + var dest interface{} + + switch test.Value.(type) { + case *Ship, Ship: + dest = &Ship{} + case *Port, Port: + dest = &Port{} + case *Domain, Domain: + dest = &Domain{} + case *Feed, Feed: + dest = &Feed{} + default: + continue + } + + buffer := bytes.NewBufferString(test.ExpectXML) + err := Unmarshal(buffer, dest) + + // Don't compare XMLNames + switch fix := dest.(type) { + case *Ship: + fix.XMLName = Name{} + case *Port: + fix.XMLName = Name{} + case *Domain: + fix.XMLName = Name{} + case *Feed: + fix.XMLName = Name{} + fix.Author.InnerXML = "" + for i := range fix.Entry { + fix.Entry[i].Author.InnerXML = "" + } + } + + if err != nil { + t.Errorf("#%d: unexpected error: %#v", i, err) + } else if got, want := dest, test.Value; !reflect.DeepEqual(got, want) { + t.Errorf("#%d: unmarshal(%#s) = %#v, want %#v", i, test.ExpectXML, got, want) + } + } +} + +func BenchmarkMarshal(b *testing.B) { + idx := len(marshalTests) - 1 + test := marshalTests[idx] + + buf := bytes.NewBuffer(nil) + for i := 0; i < b.N; i++ { + Marshal(buf, test.Value) + buf.Truncate(0) + } +} + +func BenchmarkUnmarshal(b *testing.B) { + idx := len(marshalTests) - 1 + test := marshalTests[idx] + sm := &Ship{} + xml := []byte(test.ExpectXML) + + for i := 0; i < b.N; i++ { + buffer := bytes.NewBuffer(xml) + Unmarshal(buffer, sm) + } +} diff --git a/libgo/go/encoding/xml/read.go b/libgo/go/encoding/xml/read.go new file mode 100644 index 0000000..c6a3d75 --- /dev/null +++ b/libgo/go/encoding/xml/read.go @@ -0,0 +1,630 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xml + +import ( + "bytes" + "errors" + "fmt" + "io" + "reflect" + "strconv" + "strings" + "unicode" + "unicode/utf8" +) + +// BUG(rsc): Mapping between XML elements and data structures is inherently flawed: +// an XML element is an order-dependent collection of anonymous +// values, while a data structure is an order-independent collection +// of named values. +// See package json for a textual representation more suitable +// to data structures. + +// Unmarshal parses an XML element from r and uses the +// reflect library to fill in an arbitrary struct, slice, or string +// pointed at by val. Well-formed data that does not fit +// into val is discarded. +// +// For example, given these definitions: +// +// type Email struct { +// Where string `xml:"attr"` +// Addr string +// } +// +// type Result struct { +// XMLName xml.Name `xml:"result"` +// Name string +// Phone string +// Email []Email +// Groups []string `xml:"group>value"` +// } +// +// result := Result{Name: "name", Phone: "phone", Email: nil} +// +// unmarshalling the XML input +// +// <result> +// <email where="home"> +// <addr>gre@example.com</addr> +// </email> +// <email where='work'> +// <addr>gre@work.com</addr> +// </email> +// <name>Grace R. Emlin</name> +// <group> +// <value>Friends</value> +// <value>Squash</value> +// </group> +// <address>123 Main Street</address> +// </result> +// +// via Unmarshal(r, &result) is equivalent to assigning +// +// r = Result{xml.Name{"", "result"}, +// "Grace R. Emlin", // name +// "phone", // no phone given +// []Email{ +// Email{"home", "gre@example.com"}, +// Email{"work", "gre@work.com"}, +// }, +// []string{"Friends", "Squash"}, +// } +// +// Note that the field r.Phone has not been modified and +// that the XML <address> element was discarded. Also, the field +// Groups was assigned considering the element path provided in the +// field tag. +// +// Because Unmarshal uses the reflect package, it can only assign +// to exported (upper case) fields. Unmarshal uses a case-insensitive +// comparison to match XML element names to struct field names. +// +// Unmarshal maps an XML element to a struct using the following rules. +// In the rules, the tag of a field refers to the value associated with the +// key 'xml' in the struct field's tag (see the example above). +// +// * If the struct has a field of type []byte or string with tag "innerxml", +// Unmarshal accumulates the raw XML nested inside the element +// in that field. The rest of the rules still apply. +// +// * If the struct has a field named XMLName of type xml.Name, +// Unmarshal records the element name in that field. +// +// * If the XMLName field has an associated tag of the form +// "name" or "namespace-URL name", the XML element must have +// the given name (and, optionally, name space) or else Unmarshal +// returns an error. +// +// * If the XML element has an attribute whose name matches a +// struct field of type string with tag "attr", Unmarshal records +// the attribute value in that field. +// +// * If the XML element contains character data, that data is +// accumulated in the first struct field that has tag "chardata". +// The struct field may have type []byte or string. +// If there is no such field, the character data is discarded. +// +// * If the XML element contains comments, they are accumulated in +// the first struct field that has tag "comments". The struct +// field may have type []byte or string. If there is no such +// field, the comments are discarded. +// +// * If the XML element contains a sub-element whose name matches +// the prefix of a tag formatted as "a>b>c", unmarshal +// will descend into the XML structure looking for elements with the +// given names, and will map the innermost elements to that struct field. +// A tag starting with ">" is equivalent to one starting +// with the field name followed by ">". +// +// * If the XML element contains a sub-element whose name +// matches a field whose tag is neither "attr" nor "chardata", +// Unmarshal maps the sub-element to that struct field. +// Otherwise, if the struct has a field named Any, unmarshal +// maps the sub-element to that struct field. +// +// Unmarshal maps an XML element to a string or []byte by saving the +// concatenation of that element's character data in the string or +// []byte. +// +// Unmarshal maps an attribute value to a string or []byte by saving +// the value in the string or slice. +// +// Unmarshal maps an XML element to a slice by extending the length of +// the slice and mapping the element to the newly created value. +// +// Unmarshal maps an XML element or attribute value to a bool by +// setting it to the boolean value represented by the string. +// +// Unmarshal maps an XML element or attribute value to an integer or +// floating-point field by setting the field to the result of +// interpreting the string value in decimal. There is no check for +// overflow. +// +// Unmarshal maps an XML element to an xml.Name by recording the +// element name. +// +// Unmarshal maps an XML element to a pointer by setting the pointer +// to a freshly allocated value and then mapping the element to that value. +// +func Unmarshal(r io.Reader, val interface{}) error { + v := reflect.ValueOf(val) + if v.Kind() != reflect.Ptr { + return errors.New("non-pointer passed to Unmarshal") + } + p := NewParser(r) + elem := v.Elem() + err := p.unmarshal(elem, nil) + if err != nil { + return err + } + return nil +} + +// An UnmarshalError represents an error in the unmarshalling process. +type UnmarshalError string + +func (e UnmarshalError) Error() string { return string(e) } + +// A TagPathError represents an error in the unmarshalling process +// caused by the use of field tags with conflicting paths. +type TagPathError struct { + Struct reflect.Type + Field1, Tag1 string + Field2, Tag2 string +} + +func (e *TagPathError) Error() string { + return fmt.Sprintf("%s field %q with tag %q conflicts with field %q with tag %q", e.Struct, e.Field1, e.Tag1, e.Field2, e.Tag2) +} + +// The Parser's Unmarshal method is like xml.Unmarshal +// except that it can be passed a pointer to the initial start element, +// useful when a client reads some raw XML tokens itself +// but also defers to Unmarshal for some elements. +// Passing a nil start element indicates that Unmarshal should +// read the token stream to find the start element. +func (p *Parser) Unmarshal(val interface{}, start *StartElement) error { + v := reflect.ValueOf(val) + if v.Kind() != reflect.Ptr { + return errors.New("non-pointer passed to Unmarshal") + } + return p.unmarshal(v.Elem(), start) +} + +// fieldName strips invalid characters from an XML name +// to create a valid Go struct name. It also converts the +// name to lower case letters. +func fieldName(original string) string { + + var i int + //remove leading underscores, without exhausting all characters + for i = 0; i < len(original)-1 && original[i] == '_'; i++ { + } + + return strings.Map( + func(x rune) rune { + if x == '_' || unicode.IsDigit(x) || unicode.IsLetter(x) { + return unicode.ToLower(x) + } + return -1 + }, + original[i:]) +} + +// Unmarshal a single XML element into val. +func (p *Parser) unmarshal(val reflect.Value, start *StartElement) error { + // Find start element if we need it. + if start == nil { + for { + tok, err := p.Token() + if err != nil { + return err + } + if t, ok := tok.(StartElement); ok { + start = &t + break + } + } + } + + if pv := val; pv.Kind() == reflect.Ptr { + if pv.IsNil() { + pv.Set(reflect.New(pv.Type().Elem())) + } + val = pv.Elem() + } + + var ( + data []byte + saveData reflect.Value + comment []byte + saveComment reflect.Value + saveXML reflect.Value + saveXMLIndex int + saveXMLData []byte + sv reflect.Value + styp reflect.Type + fieldPaths map[string]pathInfo + ) + + switch v := val; v.Kind() { + default: + return errors.New("unknown type " + v.Type().String()) + + case reflect.Slice: + typ := v.Type() + if typ.Elem().Kind() == reflect.Uint8 { + // []byte + saveData = v + break + } + + // Slice of element values. + // Grow slice. + n := v.Len() + if n >= v.Cap() { + ncap := 2 * n + if ncap < 4 { + ncap = 4 + } + new := reflect.MakeSlice(typ, n, ncap) + reflect.Copy(new, v) + v.Set(new) + } + v.SetLen(n + 1) + + // Recur to read element into slice. + if err := p.unmarshal(v.Index(n), start); err != nil { + v.SetLen(n) + return err + } + return nil + + case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, reflect.String: + saveData = v + + case reflect.Struct: + if _, ok := v.Interface().(Name); ok { + v.Set(reflect.ValueOf(start.Name)) + break + } + + sv = v + typ := sv.Type() + styp = typ + // Assign name. + if f, ok := typ.FieldByName("XMLName"); ok { + // Validate element name. + if tag := f.Tag.Get("xml"); tag != "" { + ns := "" + i := strings.LastIndex(tag, " ") + if i >= 0 { + ns, tag = tag[0:i], tag[i+1:] + } + if tag != start.Name.Local { + return UnmarshalError("expected element type <" + tag + "> but have <" + start.Name.Local + ">") + } + if ns != "" && ns != start.Name.Space { + e := "expected element <" + tag + "> in name space " + ns + " but have " + if start.Name.Space == "" { + e += "no name space" + } else { + e += start.Name.Space + } + return UnmarshalError(e) + } + } + + // Save + v := sv.FieldByIndex(f.Index) + if _, ok := v.Interface().(Name); ok { + v.Set(reflect.ValueOf(start.Name)) + } + } + + // Assign attributes. + // Also, determine whether we need to save character data or comments. + for i, n := 0, typ.NumField(); i < n; i++ { + f := typ.Field(i) + switch f.Tag.Get("xml") { + case "attr": + strv := sv.FieldByIndex(f.Index) + // Look for attribute. + val := "" + k := strings.ToLower(f.Name) + for _, a := range start.Attr { + if fieldName(a.Name.Local) == k { + val = a.Value + break + } + } + copyValue(strv, []byte(val)) + + case "comment": + if !saveComment.IsValid() { + saveComment = sv.FieldByIndex(f.Index) + } + + case "chardata": + if !saveData.IsValid() { + saveData = sv.FieldByIndex(f.Index) + } + + case "innerxml": + if !saveXML.IsValid() { + saveXML = sv.FieldByIndex(f.Index) + if p.saved == nil { + saveXMLIndex = 0 + p.saved = new(bytes.Buffer) + } else { + saveXMLIndex = p.savedOffset() + } + } + + default: + if tag := f.Tag.Get("xml"); strings.Contains(tag, ">") { + if fieldPaths == nil { + fieldPaths = make(map[string]pathInfo) + } + path := strings.ToLower(tag) + if strings.HasPrefix(tag, ">") { + path = strings.ToLower(f.Name) + path + } + if strings.HasSuffix(tag, ">") { + path = path[:len(path)-1] + } + err := addFieldPath(sv, fieldPaths, path, f.Index) + if err != nil { + return err + } + } + } + } + } + + // Find end element. + // Process sub-elements along the way. +Loop: + for { + var savedOffset int + if saveXML.IsValid() { + savedOffset = p.savedOffset() + } + tok, err := p.Token() + if err != nil { + return err + } + switch t := tok.(type) { + case StartElement: + // Sub-element. + // Look up by tag name. + if sv.IsValid() { + k := fieldName(t.Name.Local) + + if fieldPaths != nil { + if _, found := fieldPaths[k]; found { + if err := p.unmarshalPaths(sv, fieldPaths, k, &t); err != nil { + return err + } + continue Loop + } + } + + match := func(s string) bool { + // check if the name matches ignoring case + if strings.ToLower(s) != k { + return false + } + // now check that it's public + c, _ := utf8.DecodeRuneInString(s) + return unicode.IsUpper(c) + } + + f, found := styp.FieldByNameFunc(match) + if !found { // fall back to mop-up field named "Any" + f, found = styp.FieldByName("Any") + } + if found { + if err := p.unmarshal(sv.FieldByIndex(f.Index), &t); err != nil { + return err + } + continue Loop + } + } + // Not saving sub-element but still have to skip over it. + if err := p.Skip(); err != nil { + return err + } + + case EndElement: + if saveXML.IsValid() { + saveXMLData = p.saved.Bytes()[saveXMLIndex:savedOffset] + if saveXMLIndex == 0 { + p.saved = nil + } + } + break Loop + + case CharData: + if saveData.IsValid() { + data = append(data, t...) + } + + case Comment: + if saveComment.IsValid() { + comment = append(comment, t...) + } + } + } + + if err := copyValue(saveData, data); err != nil { + return err + } + + switch t := saveComment; t.Kind() { + case reflect.String: + t.SetString(string(comment)) + case reflect.Slice: + t.Set(reflect.ValueOf(comment)) + } + + switch t := saveXML; t.Kind() { + case reflect.String: + t.SetString(string(saveXMLData)) + case reflect.Slice: + t.Set(reflect.ValueOf(saveXMLData)) + } + + return nil +} + +func copyValue(dst reflect.Value, src []byte) (err error) { + // Helper functions for integer and unsigned integer conversions + var itmp int64 + getInt64 := func() bool { + itmp, err = strconv.Atoi64(string(src)) + // TODO: should check sizes + return err == nil + } + var utmp uint64 + getUint64 := func() bool { + utmp, err = strconv.Atoui64(string(src)) + // TODO: check for overflow? + return err == nil + } + var ftmp float64 + getFloat64 := func() bool { + ftmp, err = strconv.Atof64(string(src)) + // TODO: check for overflow? + return err == nil + } + + // Save accumulated data and comments + switch t := dst; t.Kind() { + case reflect.Invalid: + // Probably a comment, handled below + default: + return errors.New("cannot happen: unknown type " + t.Type().String()) + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + if !getInt64() { + return err + } + t.SetInt(itmp) + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + if !getUint64() { + return err + } + t.SetUint(utmp) + case reflect.Float32, reflect.Float64: + if !getFloat64() { + return err + } + t.SetFloat(ftmp) + case reflect.Bool: + value, err := strconv.Atob(strings.TrimSpace(string(src))) + if err != nil { + return err + } + t.SetBool(value) + case reflect.String: + t.SetString(string(src)) + case reflect.Slice: + t.Set(reflect.ValueOf(src)) + } + return nil +} + +type pathInfo struct { + fieldIdx []int + complete bool +} + +// addFieldPath takes an element path such as "a>b>c" and fills the +// paths map with all paths leading to it ("a", "a>b", and "a>b>c"). +// It is okay for paths to share a common, shorter prefix but not ok +// for one path to itself be a prefix of another. +func addFieldPath(sv reflect.Value, paths map[string]pathInfo, path string, fieldIdx []int) error { + if info, found := paths[path]; found { + return tagError(sv, info.fieldIdx, fieldIdx) + } + paths[path] = pathInfo{fieldIdx, true} + for { + i := strings.LastIndex(path, ">") + if i < 0 { + break + } + path = path[:i] + if info, found := paths[path]; found { + if info.complete { + return tagError(sv, info.fieldIdx, fieldIdx) + } + } else { + paths[path] = pathInfo{fieldIdx, false} + } + } + return nil + +} + +func tagError(sv reflect.Value, idx1 []int, idx2 []int) error { + t := sv.Type() + f1 := t.FieldByIndex(idx1) + f2 := t.FieldByIndex(idx2) + return &TagPathError{t, f1.Name, f1.Tag.Get("xml"), f2.Name, f2.Tag.Get("xml")} +} + +// unmarshalPaths walks down an XML structure looking for +// wanted paths, and calls unmarshal on them. +func (p *Parser) unmarshalPaths(sv reflect.Value, paths map[string]pathInfo, path string, start *StartElement) error { + if info, _ := paths[path]; info.complete { + return p.unmarshal(sv.FieldByIndex(info.fieldIdx), start) + } + for { + tok, err := p.Token() + if err != nil { + return err + } + switch t := tok.(type) { + case StartElement: + k := path + ">" + fieldName(t.Name.Local) + if _, found := paths[k]; found { + if err := p.unmarshalPaths(sv, paths, k, &t); err != nil { + return err + } + continue + } + if err := p.Skip(); err != nil { + return err + } + case EndElement: + return nil + } + } + panic("unreachable") +} + +// Have already read a start element. +// Read tokens until we find the end element. +// Token is taking care of making sure the +// end element matches the start element we saw. +func (p *Parser) Skip() error { + for { + tok, err := p.Token() + if err != nil { + return err + } + switch tok.(type) { + case StartElement: + if err := p.Skip(); err != nil { + return err + } + case EndElement: + return nil + } + } + panic("unreachable") +} diff --git a/libgo/go/encoding/xml/read_test.go b/libgo/go/encoding/xml/read_test.go new file mode 100644 index 0000000..fbb7fd5 --- /dev/null +++ b/libgo/go/encoding/xml/read_test.go @@ -0,0 +1,402 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xml + +import ( + "reflect" + "testing" +) + +// Stripped down Atom feed data structures. + +func TestUnmarshalFeed(t *testing.T) { + var f Feed + if err := Unmarshal(StringReader(atomFeedString), &f); err != nil { + t.Fatalf("Unmarshal: %s", err) + } + if !reflect.DeepEqual(f, atomFeed) { + t.Fatalf("have %#v\nwant %#v", f, atomFeed) + } +} + +// hget http://codereview.appspot.com/rss/mine/rsc +const atomFeedString = ` +<?xml version="1.0" encoding="utf-8"?> +<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en-us"><title>Code Review - My issues</title><link href="http://codereview.appspot.com/" rel="alternate"></link><li-nk href="http://codereview.appspot.com/rss/mine/rsc" rel="self"></li-nk><id>http://codereview.appspot.com/</id><updated>2009-10-04T01:35:58+00:00</updated><author><name>rietveld<></name></author><entry><title>rietveld: an attempt at pubsubhubbub +</title><link hre-f="http://codereview.appspot.com/126085" rel="alternate"></link><updated>2009-10-04T01:35:58+00:00</updated><author><name>email-address-removed</name></author><id>urn:md5:134d9179c41f806be79b3a5f7877d19a</id><summary type="html"> + An attempt at adding pubsubhubbub support to Rietveld. +http://code.google.com/p/pubsubhubbub +http://code.google.com/p/rietveld/issues/detail?id=155 + +The server side of the protocol is trivial: + 1. add a &lt;link rel=&quot;hub&quot; href=&quot;hub-server&quot;&gt; tag to all + feeds that will be pubsubhubbubbed. + 2. every time one of those feeds changes, tell the hub + with a simple POST request. + +I have tested this by adding debug prints to a local hub +server and checking that the server got the right publish +requests. + +I can&#39;t quite get the server to work, but I think the bug +is not in my code. I think that the server expects to be +able to grab the feed and see the feed&#39;s actual URL in +the link rel=&quot;self&quot;, but the default value for that drops +the :port from the URL, and I cannot for the life of me +figure out how to get the Atom generator deep inside +django not to do that, or even where it is doing that, +or even what code is running to generate the Atom feed. +(I thought I knew but I added some assert False statements +and it kept running!) + +Ignoring that particular problem, I would appreciate +feedback on the right way to get the two values at +the top of feeds.py marked NOTE(rsc). + + +</summary></entry><entry><title>rietveld: correct tab handling +</title><link href="http://codereview.appspot.com/124106" rel="alternate"></link><updated>2009-10-03T23:02:17+00:00</updated><author><name>email-address-removed</name></author><id>urn:md5:0a2a4f19bb815101f0ba2904aed7c35a</id><summary type="html"> + This fixes the buggy tab rendering that can be seen at +http://codereview.appspot.com/116075/diff/1/2 + +The fundamental problem was that the tab code was +not being told what column the text began in, so it +didn&#39;t know where to put the tab stops. Another problem +was that some of the code assumed that string byte +offsets were the same as column offsets, which is only +true if there are no tabs. + +In the process of fixing this, I cleaned up the arguments +to Fold and ExpandTabs and renamed them Break and +_ExpandTabs so that I could be sure that I found all the +call sites. I also wanted to verify that ExpandTabs was +not being used from outside intra_region_diff.py. + + +</summary></entry></feed> ` + +type Feed struct { + XMLName Name `xml:"http://www.w3.org/2005/Atom feed"` + Title string + Id string + Link []Link + Updated Time + Author Person + Entry []Entry +} + +type Entry struct { + Title string + Id string + Link []Link + Updated Time + Author Person + Summary Text +} + +type Link struct { + Rel string `xml:"attr"` + Href string `xml:"attr"` +} + +type Person struct { + Name string + URI string + Email string + InnerXML string `xml:"innerxml"` +} + +type Text struct { + Type string `xml:"attr"` + Body string `xml:"chardata"` +} + +type Time string + +var atomFeed = Feed{ + XMLName: Name{"http://www.w3.org/2005/Atom", "feed"}, + Title: "Code Review - My issues", + Link: []Link{ + {Rel: "alternate", Href: "http://codereview.appspot.com/"}, + {Rel: "self", Href: "http://codereview.appspot.com/rss/mine/rsc"}, + }, + Id: "http://codereview.appspot.com/", + Updated: "2009-10-04T01:35:58+00:00", + Author: Person{ + Name: "rietveld<>", + InnerXML: "<name>rietveld<></name>", + }, + Entry: []Entry{ + { + Title: "rietveld: an attempt at pubsubhubbub\n", + Link: []Link{ + {Rel: "alternate", Href: "http://codereview.appspot.com/126085"}, + }, + Updated: "2009-10-04T01:35:58+00:00", + Author: Person{ + Name: "email-address-removed", + InnerXML: "<name>email-address-removed</name>", + }, + Id: "urn:md5:134d9179c41f806be79b3a5f7877d19a", + Summary: Text{ + Type: "html", + Body: ` + An attempt at adding pubsubhubbub support to Rietveld. +http://code.google.com/p/pubsubhubbub +http://code.google.com/p/rietveld/issues/detail?id=155 + +The server side of the protocol is trivial: + 1. add a <link rel="hub" href="hub-server"> tag to all + feeds that will be pubsubhubbubbed. + 2. every time one of those feeds changes, tell the hub + with a simple POST request. + +I have tested this by adding debug prints to a local hub +server and checking that the server got the right publish +requests. + +I can't quite get the server to work, but I think the bug +is not in my code. I think that the server expects to be +able to grab the feed and see the feed's actual URL in +the link rel="self", but the default value for that drops +the :port from the URL, and I cannot for the life of me +figure out how to get the Atom generator deep inside +django not to do that, or even where it is doing that, +or even what code is running to generate the Atom feed. +(I thought I knew but I added some assert False statements +and it kept running!) + +Ignoring that particular problem, I would appreciate +feedback on the right way to get the two values at +the top of feeds.py marked NOTE(rsc). + + +`, + }, + }, + { + Title: "rietveld: correct tab handling\n", + Link: []Link{ + {Rel: "alternate", Href: "http://codereview.appspot.com/124106"}, + }, + Updated: "2009-10-03T23:02:17+00:00", + Author: Person{ + Name: "email-address-removed", + InnerXML: "<name>email-address-removed</name>", + }, + Id: "urn:md5:0a2a4f19bb815101f0ba2904aed7c35a", + Summary: Text{ + Type: "html", + Body: ` + This fixes the buggy tab rendering that can be seen at +http://codereview.appspot.com/116075/diff/1/2 + +The fundamental problem was that the tab code was +not being told what column the text began in, so it +didn't know where to put the tab stops. Another problem +was that some of the code assumed that string byte +offsets were the same as column offsets, which is only +true if there are no tabs. + +In the process of fixing this, I cleaned up the arguments +to Fold and ExpandTabs and renamed them Break and +_ExpandTabs so that I could be sure that I found all the +call sites. I also wanted to verify that ExpandTabs was +not being used from outside intra_region_diff.py. + + +`, + }, + }, + }, +} + +type FieldNameTest struct { + in, out string +} + +var FieldNameTests = []FieldNameTest{ + {"Profile-Image", "profileimage"}, + {"_score", "score"}, +} + +func TestFieldName(t *testing.T) { + for _, tt := range FieldNameTests { + a := fieldName(tt.in) + if a != tt.out { + t.Fatalf("have %#v\nwant %#v\n\n", a, tt.out) + } + } +} + +const pathTestString = ` +<result> + <before>1</before> + <items> + <item1> + <value>A</value> + </item1> + <item2> + <value>B</value> + </item2> + <Item1> + <Value>C</Value> + <Value>D</Value> + </Item1> + <_> + <value>E</value> + </_> + </items> + <after>2</after> +</result> +` + +type PathTestItem struct { + Value string +} + +type PathTestA struct { + Items []PathTestItem `xml:">item1"` + Before, After string +} + +type PathTestB struct { + Other []PathTestItem `xml:"items>Item1"` + Before, After string +} + +type PathTestC struct { + Values1 []string `xml:"items>item1>value"` + Values2 []string `xml:"items>item2>value"` + Before, After string +} + +type PathTestSet struct { + Item1 []PathTestItem +} + +type PathTestD struct { + Other PathTestSet `xml:"items>"` + Before, After string +} + +type PathTestE struct { + Underline string `xml:"items>_>value"` + Before, After string +} + +var pathTests = []interface{}{ + &PathTestA{Items: []PathTestItem{{"A"}, {"D"}}, Before: "1", After: "2"}, + &PathTestB{Other: []PathTestItem{{"A"}, {"D"}}, Before: "1", After: "2"}, + &PathTestC{Values1: []string{"A", "C", "D"}, Values2: []string{"B"}, Before: "1", After: "2"}, + &PathTestD{Other: PathTestSet{Item1: []PathTestItem{{"A"}, {"D"}}}, Before: "1", After: "2"}, + &PathTestE{Underline: "E", Before: "1", After: "2"}, +} + +func TestUnmarshalPaths(t *testing.T) { + for _, pt := range pathTests { + v := reflect.New(reflect.TypeOf(pt).Elem()).Interface() + if err := Unmarshal(StringReader(pathTestString), v); err != nil { + t.Fatalf("Unmarshal: %s", err) + } + if !reflect.DeepEqual(v, pt) { + t.Fatalf("have %#v\nwant %#v", v, pt) + } + } +} + +type BadPathTestA struct { + First string `xml:"items>item1"` + Other string `xml:"items>item2"` + Second string `xml:"items>"` +} + +type BadPathTestB struct { + Other string `xml:"items>item2>value"` + First string `xml:"items>item1"` + Second string `xml:"items>item1>value"` +} + +var badPathTests = []struct { + v, e interface{} +}{ + {&BadPathTestA{}, &TagPathError{reflect.TypeOf(BadPathTestA{}), "First", "items>item1", "Second", "items>"}}, + {&BadPathTestB{}, &TagPathError{reflect.TypeOf(BadPathTestB{}), "First", "items>item1", "Second", "items>item1>value"}}, +} + +func TestUnmarshalBadPaths(t *testing.T) { + for _, tt := range badPathTests { + err := Unmarshal(StringReader(pathTestString), tt.v) + if !reflect.DeepEqual(err, tt.e) { + t.Fatalf("Unmarshal with %#v didn't fail properly: %#v", tt.v, err) + } + } +} + +func TestUnmarshalAttrs(t *testing.T) { + var f AttrTest + if err := Unmarshal(StringReader(attrString), &f); err != nil { + t.Fatalf("Unmarshal: %s", err) + } + if !reflect.DeepEqual(f, attrStruct) { + t.Fatalf("have %#v\nwant %#v", f, attrStruct) + } +} + +type AttrTest struct { + Test1 Test1 + Test2 Test2 +} + +type Test1 struct { + Int int `xml:"attr"` + Float float64 `xml:"attr"` + Uint8 uint8 `xml:"attr"` +} + +type Test2 struct { + Bool bool `xml:"attr"` +} + +const attrString = ` +<?xml version="1.0" charset="utf-8"?> +<attrtest> + <test1 int="8" float="23.5" uint8="255"/> + <test2 bool="true"/> +</attrtest> +` + +var attrStruct = AttrTest{ + Test1: Test1{ + Int: 8, + Float: 23.5, + Uint8: 255, + }, + Test2: Test2{ + Bool: true, + }, +} + +// test data for TestUnmarshalWithoutNameType + +const OK = "OK" +const withoutNameTypeData = ` +<?xml version="1.0" charset="utf-8"?> +<Test3 attr="OK" />` + +type TestThree struct { + XMLName bool `xml:"Test3"` // XMLName field without an xml.Name type + Attr string `xml:"attr"` +} + +func TestUnmarshalWithoutNameType(t *testing.T) { + var x TestThree + if err := Unmarshal(StringReader(withoutNameTypeData), &x); err != nil { + t.Fatalf("Unmarshal: %s", err) + } + if x.Attr != OK { + t.Fatalf("have %v\nwant %v", x.Attr, OK) + } +} diff --git a/libgo/go/encoding/xml/xml.go b/libgo/go/encoding/xml/xml.go new file mode 100644 index 0000000..216d888 --- /dev/null +++ b/libgo/go/encoding/xml/xml.go @@ -0,0 +1,1697 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package xml implements a simple XML 1.0 parser that +// understands XML name spaces. +package xml + +// References: +// Annotated XML spec: http://www.xml.com/axml/testaxml.htm +// XML name spaces: http://www.w3.org/TR/REC-xml-names/ + +// TODO(rsc): +// Test error handling. + +import ( + "bufio" + "bytes" + "fmt" + "io" + "strconv" + "strings" + "unicode" + "unicode/utf8" +) + +// A SyntaxError represents a syntax error in the XML input stream. +type SyntaxError struct { + Msg string + Line int +} + +func (e *SyntaxError) Error() string { + return "XML syntax error on line " + strconv.Itoa(e.Line) + ": " + e.Msg +} + +// A Name represents an XML name (Local) annotated +// with a name space identifier (Space). +// In tokens returned by Parser.Token, the Space identifier +// is given as a canonical URL, not the short prefix used +// in the document being parsed. +type Name struct { + Space, Local string +} + +// An Attr represents an attribute in an XML element (Name=Value). +type Attr struct { + Name Name + Value string +} + +// A Token is an interface holding one of the token types: +// StartElement, EndElement, CharData, Comment, ProcInst, or Directive. +type Token interface{} + +// A StartElement represents an XML start element. +type StartElement struct { + Name Name + Attr []Attr +} + +func (e StartElement) Copy() StartElement { + attrs := make([]Attr, len(e.Attr)) + copy(e.Attr, attrs) + e.Attr = attrs + return e +} + +// An EndElement represents an XML end element. +type EndElement struct { + Name Name +} + +// A CharData represents XML character data (raw text), +// in which XML escape sequences have been replaced by +// the characters they represent. +type CharData []byte + +func makeCopy(b []byte) []byte { + b1 := make([]byte, len(b)) + copy(b1, b) + return b1 +} + +func (c CharData) Copy() CharData { return CharData(makeCopy(c)) } + +// A Comment represents an XML comment of the form <!--comment-->. +// The bytes do not include the <!-- and --> comment markers. +type Comment []byte + +func (c Comment) Copy() Comment { return Comment(makeCopy(c)) } + +// A ProcInst represents an XML processing instruction of the form <?target inst?> +type ProcInst struct { + Target string + Inst []byte +} + +func (p ProcInst) Copy() ProcInst { + p.Inst = makeCopy(p.Inst) + return p +} + +// A Directive represents an XML directive of the form <!text>. +// The bytes do not include the <! and > markers. +type Directive []byte + +func (d Directive) Copy() Directive { return Directive(makeCopy(d)) } + +// CopyToken returns a copy of a Token. +func CopyToken(t Token) Token { + switch v := t.(type) { + case CharData: + return v.Copy() + case Comment: + return v.Copy() + case Directive: + return v.Copy() + case ProcInst: + return v.Copy() + case StartElement: + return v.Copy() + } + return t +} + +// A Parser represents an XML parser reading a particular input stream. +// The parser assumes that its input is encoded in UTF-8. +type Parser struct { + // Strict defaults to true, enforcing the requirements + // of the XML specification. + // If set to false, the parser allows input containing common + // mistakes: + // * If an element is missing an end tag, the parser invents + // end tags as necessary to keep the return values from Token + // properly balanced. + // * In attribute values and character data, unknown or malformed + // character entities (sequences beginning with &) are left alone. + // + // Setting: + // + // p.Strict = false; + // p.AutoClose = HTMLAutoClose; + // p.Entity = HTMLEntity + // + // creates a parser that can handle typical HTML. + Strict bool + + // When Strict == false, AutoClose indicates a set of elements to + // consider closed immediately after they are opened, regardless + // of whether an end element is present. + AutoClose []string + + // Entity can be used to map non-standard entity names to string replacements. + // The parser behaves as if these standard mappings are present in the map, + // regardless of the actual map content: + // + // "lt": "<", + // "gt": ">", + // "amp": "&", + // "apos": "'", + // "quot": `"`, + Entity map[string]string + + // CharsetReader, if non-nil, defines a function to generate + // charset-conversion readers, converting from the provided + // non-UTF-8 charset into UTF-8. If CharsetReader is nil or + // returns an error, parsing stops with an error. One of the + // the CharsetReader's result values must be non-nil. + CharsetReader func(charset string, input io.Reader) (io.Reader, error) + + r io.ByteReader + buf bytes.Buffer + saved *bytes.Buffer + stk *stack + free *stack + needClose bool + toClose Name + nextToken Token + nextByte int + ns map[string]string + err error + line int + tmp [32]byte +} + +// NewParser creates a new XML parser reading from r. +func NewParser(r io.Reader) *Parser { + p := &Parser{ + ns: make(map[string]string), + nextByte: -1, + line: 1, + Strict: true, + } + p.switchToReader(r) + return p +} + +// Token returns the next XML token in the input stream. +// At the end of the input stream, Token returns nil, io.EOF. +// +// Slices of bytes in the returned token data refer to the +// parser's internal buffer and remain valid only until the next +// call to Token. To acquire a copy of the bytes, call CopyToken +// or the token's Copy method. +// +// Token expands self-closing elements such as <br/> +// into separate start and end elements returned by successive calls. +// +// Token guarantees that the StartElement and EndElement +// tokens it returns are properly nested and matched: +// if Token encounters an unexpected end element, +// it will return an error. +// +// Token implements XML name spaces as described by +// http://www.w3.org/TR/REC-xml-names/. Each of the +// Name structures contained in the Token has the Space +// set to the URL identifying its name space when known. +// If Token encounters an unrecognized name space prefix, +// it uses the prefix as the Space rather than report an error. +func (p *Parser) Token() (t Token, err error) { + if p.nextToken != nil { + t = p.nextToken + p.nextToken = nil + } else if t, err = p.RawToken(); err != nil { + return + } + + if !p.Strict { + if t1, ok := p.autoClose(t); ok { + p.nextToken = t + t = t1 + } + } + switch t1 := t.(type) { + case StartElement: + // In XML name spaces, the translations listed in the + // attributes apply to the element name and + // to the other attribute names, so process + // the translations first. + for _, a := range t1.Attr { + if a.Name.Space == "xmlns" { + v, ok := p.ns[a.Name.Local] + p.pushNs(a.Name.Local, v, ok) + p.ns[a.Name.Local] = a.Value + } + if a.Name.Space == "" && a.Name.Local == "xmlns" { + // Default space for untagged names + v, ok := p.ns[""] + p.pushNs("", v, ok) + p.ns[""] = a.Value + } + } + + p.translate(&t1.Name, true) + for i := range t1.Attr { + p.translate(&t1.Attr[i].Name, false) + } + p.pushElement(t1.Name) + t = t1 + + case EndElement: + p.translate(&t1.Name, true) + if !p.popElement(&t1) { + return nil, p.err + } + t = t1 + } + return +} + +// Apply name space translation to name n. +// The default name space (for Space=="") +// applies only to element names, not to attribute names. +func (p *Parser) translate(n *Name, isElementName bool) { + switch { + case n.Space == "xmlns": + return + case n.Space == "" && !isElementName: + return + case n.Space == "" && n.Local == "xmlns": + return + } + if v, ok := p.ns[n.Space]; ok { + n.Space = v + } +} + +func (p *Parser) switchToReader(r io.Reader) { + // Get efficient byte at a time reader. + // Assume that if reader has its own + // ReadByte, it's efficient enough. + // Otherwise, use bufio. + if rb, ok := r.(io.ByteReader); ok { + p.r = rb + } else { + p.r = bufio.NewReader(r) + } +} + +// Parsing state - stack holds old name space translations +// and the current set of open elements. The translations to pop when +// ending a given tag are *below* it on the stack, which is +// more work but forced on us by XML. +type stack struct { + next *stack + kind int + name Name + ok bool +} + +const ( + stkStart = iota + stkNs +) + +func (p *Parser) push(kind int) *stack { + s := p.free + if s != nil { + p.free = s.next + } else { + s = new(stack) + } + s.next = p.stk + s.kind = kind + p.stk = s + return s +} + +func (p *Parser) pop() *stack { + s := p.stk + if s != nil { + p.stk = s.next + s.next = p.free + p.free = s + } + return s +} + +// Record that we are starting an element with the given name. +func (p *Parser) pushElement(name Name) { + s := p.push(stkStart) + s.name = name +} + +// Record that we are changing the value of ns[local]. +// The old value is url, ok. +func (p *Parser) pushNs(local string, url string, ok bool) { + s := p.push(stkNs) + s.name.Local = local + s.name.Space = url + s.ok = ok +} + +// Creates a SyntaxError with the current line number. +func (p *Parser) syntaxError(msg string) error { + return &SyntaxError{Msg: msg, Line: p.line} +} + +// Record that we are ending an element with the given name. +// The name must match the record at the top of the stack, +// which must be a pushElement record. +// After popping the element, apply any undo records from +// the stack to restore the name translations that existed +// before we saw this element. +func (p *Parser) popElement(t *EndElement) bool { + s := p.pop() + name := t.Name + switch { + case s == nil || s.kind != stkStart: + p.err = p.syntaxError("unexpected end element </" + name.Local + ">") + return false + case s.name.Local != name.Local: + if !p.Strict { + p.needClose = true + p.toClose = t.Name + t.Name = s.name + return true + } + p.err = p.syntaxError("element <" + s.name.Local + "> closed by </" + name.Local + ">") + return false + case s.name.Space != name.Space: + p.err = p.syntaxError("element <" + s.name.Local + "> in space " + s.name.Space + + "closed by </" + name.Local + "> in space " + name.Space) + return false + } + + // Pop stack until a Start is on the top, undoing the + // translations that were associated with the element we just closed. + for p.stk != nil && p.stk.kind != stkStart { + s := p.pop() + if s.ok { + p.ns[s.name.Local] = s.name.Space + } else { + delete(p.ns, s.name.Local) + } + } + + return true +} + +// If the top element on the stack is autoclosing and +// t is not the end tag, invent the end tag. +func (p *Parser) autoClose(t Token) (Token, bool) { + if p.stk == nil || p.stk.kind != stkStart { + return nil, false + } + name := strings.ToLower(p.stk.name.Local) + for _, s := range p.AutoClose { + if strings.ToLower(s) == name { + // This one should be auto closed if t doesn't close it. + et, ok := t.(EndElement) + if !ok || et.Name.Local != name { + return EndElement{p.stk.name}, true + } + break + } + } + return nil, false +} + +// RawToken is like Token but does not verify that +// start and end elements match and does not translate +// name space prefixes to their corresponding URLs. +func (p *Parser) RawToken() (Token, error) { + if p.err != nil { + return nil, p.err + } + if p.needClose { + // The last element we read was self-closing and + // we returned just the StartElement half. + // Return the EndElement half now. + p.needClose = false + return EndElement{p.toClose}, nil + } + + b, ok := p.getc() + if !ok { + return nil, p.err + } + + if b != '<' { + // Text section. + p.ungetc(b) + data := p.text(-1, false) + if data == nil { + return nil, p.err + } + return CharData(data), nil + } + + if b, ok = p.mustgetc(); !ok { + return nil, p.err + } + switch b { + case '/': + // </: End element + var name Name + if name, ok = p.nsname(); !ok { + if p.err == nil { + p.err = p.syntaxError("expected element name after </") + } + return nil, p.err + } + p.space() + if b, ok = p.mustgetc(); !ok { + return nil, p.err + } + if b != '>' { + p.err = p.syntaxError("invalid characters between </" + name.Local + " and >") + return nil, p.err + } + return EndElement{name}, nil + + case '?': + // <?: Processing instruction. + // TODO(rsc): Should parse the <?xml declaration to make sure + // the version is 1.0 and the encoding is UTF-8. + var target string + if target, ok = p.name(); !ok { + if p.err == nil { + p.err = p.syntaxError("expected target name after <?") + } + return nil, p.err + } + p.space() + p.buf.Reset() + var b0 byte + for { + if b, ok = p.mustgetc(); !ok { + return nil, p.err + } + p.buf.WriteByte(b) + if b0 == '?' && b == '>' { + break + } + b0 = b + } + data := p.buf.Bytes() + data = data[0 : len(data)-2] // chop ?> + + if target == "xml" { + enc := procInstEncoding(string(data)) + if enc != "" && enc != "utf-8" && enc != "UTF-8" { + if p.CharsetReader == nil { + p.err = fmt.Errorf("xml: encoding %q declared but Parser.CharsetReader is nil", enc) + return nil, p.err + } + newr, err := p.CharsetReader(enc, p.r.(io.Reader)) + if err != nil { + p.err = fmt.Errorf("xml: opening charset %q: %v", enc, err) + return nil, p.err + } + if newr == nil { + panic("CharsetReader returned a nil Reader for charset " + enc) + } + p.switchToReader(newr) + } + } + return ProcInst{target, data}, nil + + case '!': + // <!: Maybe comment, maybe CDATA. + if b, ok = p.mustgetc(); !ok { + return nil, p.err + } + switch b { + case '-': // <!- + // Probably <!-- for a comment. + if b, ok = p.mustgetc(); !ok { + return nil, p.err + } + if b != '-' { + p.err = p.syntaxError("invalid sequence <!- not part of <!--") + return nil, p.err + } + // Look for terminator. + p.buf.Reset() + var b0, b1 byte + for { + if b, ok = p.mustgetc(); !ok { + return nil, p.err + } + p.buf.WriteByte(b) + if b0 == '-' && b1 == '-' && b == '>' { + break + } + b0, b1 = b1, b + } + data := p.buf.Bytes() + data = data[0 : len(data)-3] // chop --> + return Comment(data), nil + + case '[': // <![ + // Probably <![CDATA[. + for i := 0; i < 6; i++ { + if b, ok = p.mustgetc(); !ok { + return nil, p.err + } + if b != "CDATA["[i] { + p.err = p.syntaxError("invalid <![ sequence") + return nil, p.err + } + } + // Have <![CDATA[. Read text until ]]>. + data := p.text(-1, true) + if data == nil { + return nil, p.err + } + return CharData(data), nil + } + + // Probably a directive: <!DOCTYPE ...>, <!ENTITY ...>, etc. + // We don't care, but accumulate for caller. Quoted angle + // brackets do not count for nesting. + p.buf.Reset() + p.buf.WriteByte(b) + inquote := uint8(0) + depth := 0 + for { + if b, ok = p.mustgetc(); !ok { + return nil, p.err + } + if inquote == 0 && b == '>' && depth == 0 { + break + } + p.buf.WriteByte(b) + switch { + case b == inquote: + inquote = 0 + + case inquote != 0: + // in quotes, no special action + + case b == '\'' || b == '"': + inquote = b + + case b == '>' && inquote == 0: + depth-- + + case b == '<' && inquote == 0: + depth++ + } + } + return Directive(p.buf.Bytes()), nil + } + + // Must be an open element like <a href="foo"> + p.ungetc(b) + + var ( + name Name + empty bool + attr []Attr + ) + if name, ok = p.nsname(); !ok { + if p.err == nil { + p.err = p.syntaxError("expected element name after <") + } + return nil, p.err + } + + attr = make([]Attr, 0, 4) + for { + p.space() + if b, ok = p.mustgetc(); !ok { + return nil, p.err + } + if b == '/' { + empty = true + if b, ok = p.mustgetc(); !ok { + return nil, p.err + } + if b != '>' { + p.err = p.syntaxError("expected /> in element") + return nil, p.err + } + break + } + if b == '>' { + break + } + p.ungetc(b) + + n := len(attr) + if n >= cap(attr) { + nattr := make([]Attr, n, 2*cap(attr)) + copy(nattr, attr) + attr = nattr + } + attr = attr[0 : n+1] + a := &attr[n] + if a.Name, ok = p.nsname(); !ok { + if p.err == nil { + p.err = p.syntaxError("expected attribute name in element") + } + return nil, p.err + } + p.space() + if b, ok = p.mustgetc(); !ok { + return nil, p.err + } + if b != '=' { + if p.Strict { + p.err = p.syntaxError("attribute name without = in element") + return nil, p.err + } else { + p.ungetc(b) + a.Value = a.Name.Local + } + } else { + p.space() + data := p.attrval() + if data == nil { + return nil, p.err + } + a.Value = string(data) + } + } + if empty { + p.needClose = true + p.toClose = name + } + return StartElement{name, attr}, nil +} + +func (p *Parser) attrval() []byte { + b, ok := p.mustgetc() + if !ok { + return nil + } + // Handle quoted attribute values + if b == '"' || b == '\'' { + return p.text(int(b), false) + } + // Handle unquoted attribute values for strict parsers + if p.Strict { + p.err = p.syntaxError("unquoted or missing attribute value in element") + return nil + } + // Handle unquoted attribute values for unstrict parsers + p.ungetc(b) + p.buf.Reset() + for { + b, ok = p.mustgetc() + if !ok { + return nil + } + // http://www.w3.org/TR/REC-html40/intro/sgmltut.html#h-3.2.2 + if 'a' <= b && b <= 'z' || 'A' <= b && b <= 'Z' || + '0' <= b && b <= '9' || b == '_' || b == ':' || b == '-' { + p.buf.WriteByte(b) + } else { + p.ungetc(b) + break + } + } + return p.buf.Bytes() +} + +// Skip spaces if any +func (p *Parser) space() { + for { + b, ok := p.getc() + if !ok { + return + } + switch b { + case ' ', '\r', '\n', '\t': + default: + p.ungetc(b) + return + } + } +} + +// Read a single byte. +// If there is no byte to read, return ok==false +// and leave the error in p.err. +// Maintain line number. +func (p *Parser) getc() (b byte, ok bool) { + if p.err != nil { + return 0, false + } + if p.nextByte >= 0 { + b = byte(p.nextByte) + p.nextByte = -1 + } else { + b, p.err = p.r.ReadByte() + if p.err != nil { + return 0, false + } + if p.saved != nil { + p.saved.WriteByte(b) + } + } + if b == '\n' { + p.line++ + } + return b, true +} + +// Return saved offset. +// If we did ungetc (nextByte >= 0), have to back up one. +func (p *Parser) savedOffset() int { + n := p.saved.Len() + if p.nextByte >= 0 { + n-- + } + return n +} + +// Must read a single byte. +// If there is no byte to read, +// set p.err to SyntaxError("unexpected EOF") +// and return ok==false +func (p *Parser) mustgetc() (b byte, ok bool) { + if b, ok = p.getc(); !ok { + if p.err == io.EOF { + p.err = p.syntaxError("unexpected EOF") + } + } + return +} + +// Unread a single byte. +func (p *Parser) ungetc(b byte) { + if b == '\n' { + p.line-- + } + p.nextByte = int(b) +} + +var entity = map[string]int{ + "lt": '<', + "gt": '>', + "amp": '&', + "apos": '\'', + "quot": '"', +} + +// Read plain text section (XML calls it character data). +// If quote >= 0, we are in a quoted string and need to find the matching quote. +// If cdata == true, we are in a <![CDATA[ section and need to find ]]>. +// On failure return nil and leave the error in p.err. +func (p *Parser) text(quote int, cdata bool) []byte { + var b0, b1 byte + var trunc int + p.buf.Reset() +Input: + for { + b, ok := p.getc() + if !ok { + if cdata { + if p.err == io.EOF { + p.err = p.syntaxError("unexpected EOF in CDATA section") + } + return nil + } + break Input + } + + // <![CDATA[ section ends with ]]>. + // It is an error for ]]> to appear in ordinary text. + if b0 == ']' && b1 == ']' && b == '>' { + if cdata { + trunc = 2 + break Input + } + p.err = p.syntaxError("unescaped ]]> not in CDATA section") + return nil + } + + // Stop reading text if we see a <. + if b == '<' && !cdata { + if quote >= 0 { + p.err = p.syntaxError("unescaped < inside quoted string") + return nil + } + p.ungetc('<') + break Input + } + if quote >= 0 && b == byte(quote) { + break Input + } + if b == '&' && !cdata { + // Read escaped character expression up to semicolon. + // XML in all its glory allows a document to define and use + // its own character names with <!ENTITY ...> directives. + // Parsers are required to recognize lt, gt, amp, apos, and quot + // even if they have not been declared. That's all we allow. + var i int + for i = 0; i < len(p.tmp); i++ { + var ok bool + p.tmp[i], ok = p.getc() + if !ok { + if p.err == io.EOF { + p.err = p.syntaxError("unexpected EOF") + } + return nil + } + c := p.tmp[i] + if c == ';' { + break + } + if 'a' <= c && c <= 'z' || + 'A' <= c && c <= 'Z' || + '0' <= c && c <= '9' || + c == '_' || c == '#' { + continue + } + p.ungetc(c) + break + } + s := string(p.tmp[0:i]) + if i >= len(p.tmp) { + if !p.Strict { + b0, b1 = 0, 0 + p.buf.WriteByte('&') + p.buf.Write(p.tmp[0:i]) + continue Input + } + p.err = p.syntaxError("character entity expression &" + s + "... too long") + return nil + } + var haveText bool + var text string + if i >= 2 && s[0] == '#' { + var n uint64 + var err error + if i >= 3 && s[1] == 'x' { + n, err = strconv.Btoui64(s[2:], 16) + } else { + n, err = strconv.Btoui64(s[1:], 10) + } + if err == nil && n <= unicode.MaxRune { + text = string(n) + haveText = true + } + } else { + if r, ok := entity[s]; ok { + text = string(r) + haveText = true + } else if p.Entity != nil { + text, haveText = p.Entity[s] + } + } + if !haveText { + if !p.Strict { + b0, b1 = 0, 0 + p.buf.WriteByte('&') + p.buf.Write(p.tmp[0:i]) + continue Input + } + p.err = p.syntaxError("invalid character entity &" + s + ";") + return nil + } + p.buf.Write([]byte(text)) + b0, b1 = 0, 0 + continue Input + } + p.buf.WriteByte(b) + b0, b1 = b1, b + } + data := p.buf.Bytes() + data = data[0 : len(data)-trunc] + + // Inspect each rune for being a disallowed character. + buf := data + for len(buf) > 0 { + r, size := utf8.DecodeRune(buf) + if r == utf8.RuneError && size == 1 { + p.err = p.syntaxError("invalid UTF-8") + return nil + } + buf = buf[size:] + if !isInCharacterRange(r) { + p.err = p.syntaxError(fmt.Sprintf("illegal character code %U", r)) + return nil + } + } + + // Must rewrite \r and \r\n into \n. + w := 0 + for r := 0; r < len(data); r++ { + b := data[r] + if b == '\r' { + if r+1 < len(data) && data[r+1] == '\n' { + continue + } + b = '\n' + } + data[w] = b + w++ + } + return data[0:w] +} + +// Decide whether the given rune is in the XML Character Range, per +// the Char production of http://www.xml.com/axml/testaxml.htm, +// Section 2.2 Characters. +func isInCharacterRange(r rune) (inrange bool) { + return r == 0x09 || + r == 0x0A || + r == 0x0D || + r >= 0x20 && r <= 0xDF77 || + r >= 0xE000 && r <= 0xFFFD || + r >= 0x10000 && r <= 0x10FFFF +} + +// Get name space name: name with a : stuck in the middle. +// The part before the : is the name space identifier. +func (p *Parser) nsname() (name Name, ok bool) { + s, ok := p.name() + if !ok { + return + } + i := strings.Index(s, ":") + if i < 0 { + name.Local = s + } else { + name.Space = s[0:i] + name.Local = s[i+1:] + } + return name, true +} + +// Get name: /first(first|second)*/ +// Do not set p.err if the name is missing (unless unexpected EOF is received): +// let the caller provide better context. +func (p *Parser) name() (s string, ok bool) { + var b byte + if b, ok = p.mustgetc(); !ok { + return + } + + // As a first approximation, we gather the bytes [A-Za-z_:.-\x80-\xFF]* + if b < utf8.RuneSelf && !isNameByte(b) { + p.ungetc(b) + return "", false + } + p.buf.Reset() + p.buf.WriteByte(b) + for { + if b, ok = p.mustgetc(); !ok { + return + } + if b < utf8.RuneSelf && !isNameByte(b) { + p.ungetc(b) + break + } + p.buf.WriteByte(b) + } + + // Then we check the characters. + s = p.buf.String() + for i, c := range s { + if !unicode.Is(first, c) && (i == 0 || !unicode.Is(second, c)) { + p.err = p.syntaxError("invalid XML name: " + s) + return "", false + } + } + return s, true +} + +func isNameByte(c byte) bool { + return 'A' <= c && c <= 'Z' || + 'a' <= c && c <= 'z' || + '0' <= c && c <= '9' || + c == '_' || c == ':' || c == '.' || c == '-' +} + +// These tables were generated by cut and paste from Appendix B of +// the XML spec at http://www.xml.com/axml/testaxml.htm +// and then reformatting. First corresponds to (Letter | '_' | ':') +// and second corresponds to NameChar. + +var first = &unicode.RangeTable{ + R16: []unicode.Range16{ + {0x003A, 0x003A, 1}, + {0x0041, 0x005A, 1}, + {0x005F, 0x005F, 1}, + {0x0061, 0x007A, 1}, + {0x00C0, 0x00D6, 1}, + {0x00D8, 0x00F6, 1}, + {0x00F8, 0x00FF, 1}, + {0x0100, 0x0131, 1}, + {0x0134, 0x013E, 1}, + {0x0141, 0x0148, 1}, + {0x014A, 0x017E, 1}, + {0x0180, 0x01C3, 1}, + {0x01CD, 0x01F0, 1}, + {0x01F4, 0x01F5, 1}, + {0x01FA, 0x0217, 1}, + {0x0250, 0x02A8, 1}, + {0x02BB, 0x02C1, 1}, + {0x0386, 0x0386, 1}, + {0x0388, 0x038A, 1}, + {0x038C, 0x038C, 1}, + {0x038E, 0x03A1, 1}, + {0x03A3, 0x03CE, 1}, + {0x03D0, 0x03D6, 1}, + {0x03DA, 0x03E0, 2}, + {0x03E2, 0x03F3, 1}, + {0x0401, 0x040C, 1}, + {0x040E, 0x044F, 1}, + {0x0451, 0x045C, 1}, + {0x045E, 0x0481, 1}, + {0x0490, 0x04C4, 1}, + {0x04C7, 0x04C8, 1}, + {0x04CB, 0x04CC, 1}, + {0x04D0, 0x04EB, 1}, + {0x04EE, 0x04F5, 1}, + {0x04F8, 0x04F9, 1}, + {0x0531, 0x0556, 1}, + {0x0559, 0x0559, 1}, + {0x0561, 0x0586, 1}, + {0x05D0, 0x05EA, 1}, + {0x05F0, 0x05F2, 1}, + {0x0621, 0x063A, 1}, + {0x0641, 0x064A, 1}, + {0x0671, 0x06B7, 1}, + {0x06BA, 0x06BE, 1}, + {0x06C0, 0x06CE, 1}, + {0x06D0, 0x06D3, 1}, + {0x06D5, 0x06D5, 1}, + {0x06E5, 0x06E6, 1}, + {0x0905, 0x0939, 1}, + {0x093D, 0x093D, 1}, + {0x0958, 0x0961, 1}, + {0x0985, 0x098C, 1}, + {0x098F, 0x0990, 1}, + {0x0993, 0x09A8, 1}, + {0x09AA, 0x09B0, 1}, + {0x09B2, 0x09B2, 1}, + {0x09B6, 0x09B9, 1}, + {0x09DC, 0x09DD, 1}, + {0x09DF, 0x09E1, 1}, + {0x09F0, 0x09F1, 1}, + {0x0A05, 0x0A0A, 1}, + {0x0A0F, 0x0A10, 1}, + {0x0A13, 0x0A28, 1}, + {0x0A2A, 0x0A30, 1}, + {0x0A32, 0x0A33, 1}, + {0x0A35, 0x0A36, 1}, + {0x0A38, 0x0A39, 1}, + {0x0A59, 0x0A5C, 1}, + {0x0A5E, 0x0A5E, 1}, + {0x0A72, 0x0A74, 1}, + {0x0A85, 0x0A8B, 1}, + {0x0A8D, 0x0A8D, 1}, + {0x0A8F, 0x0A91, 1}, + {0x0A93, 0x0AA8, 1}, + {0x0AAA, 0x0AB0, 1}, + {0x0AB2, 0x0AB3, 1}, + {0x0AB5, 0x0AB9, 1}, + {0x0ABD, 0x0AE0, 0x23}, + {0x0B05, 0x0B0C, 1}, + {0x0B0F, 0x0B10, 1}, + {0x0B13, 0x0B28, 1}, + {0x0B2A, 0x0B30, 1}, + {0x0B32, 0x0B33, 1}, + {0x0B36, 0x0B39, 1}, + {0x0B3D, 0x0B3D, 1}, + {0x0B5C, 0x0B5D, 1}, + {0x0B5F, 0x0B61, 1}, + {0x0B85, 0x0B8A, 1}, + {0x0B8E, 0x0B90, 1}, + {0x0B92, 0x0B95, 1}, + {0x0B99, 0x0B9A, 1}, + {0x0B9C, 0x0B9C, 1}, + {0x0B9E, 0x0B9F, 1}, + {0x0BA3, 0x0BA4, 1}, + {0x0BA8, 0x0BAA, 1}, + {0x0BAE, 0x0BB5, 1}, + {0x0BB7, 0x0BB9, 1}, + {0x0C05, 0x0C0C, 1}, + {0x0C0E, 0x0C10, 1}, + {0x0C12, 0x0C28, 1}, + {0x0C2A, 0x0C33, 1}, + {0x0C35, 0x0C39, 1}, + {0x0C60, 0x0C61, 1}, + {0x0C85, 0x0C8C, 1}, + {0x0C8E, 0x0C90, 1}, + {0x0C92, 0x0CA8, 1}, + {0x0CAA, 0x0CB3, 1}, + {0x0CB5, 0x0CB9, 1}, + {0x0CDE, 0x0CDE, 1}, + {0x0CE0, 0x0CE1, 1}, + {0x0D05, 0x0D0C, 1}, + {0x0D0E, 0x0D10, 1}, + {0x0D12, 0x0D28, 1}, + {0x0D2A, 0x0D39, 1}, + {0x0D60, 0x0D61, 1}, + {0x0E01, 0x0E2E, 1}, + {0x0E30, 0x0E30, 1}, + {0x0E32, 0x0E33, 1}, + {0x0E40, 0x0E45, 1}, + {0x0E81, 0x0E82, 1}, + {0x0E84, 0x0E84, 1}, + {0x0E87, 0x0E88, 1}, + {0x0E8A, 0x0E8D, 3}, + {0x0E94, 0x0E97, 1}, + {0x0E99, 0x0E9F, 1}, + {0x0EA1, 0x0EA3, 1}, + {0x0EA5, 0x0EA7, 2}, + {0x0EAA, 0x0EAB, 1}, + {0x0EAD, 0x0EAE, 1}, + {0x0EB0, 0x0EB0, 1}, + {0x0EB2, 0x0EB3, 1}, + {0x0EBD, 0x0EBD, 1}, + {0x0EC0, 0x0EC4, 1}, + {0x0F40, 0x0F47, 1}, + {0x0F49, 0x0F69, 1}, + {0x10A0, 0x10C5, 1}, + {0x10D0, 0x10F6, 1}, + {0x1100, 0x1100, 1}, + {0x1102, 0x1103, 1}, + {0x1105, 0x1107, 1}, + {0x1109, 0x1109, 1}, + {0x110B, 0x110C, 1}, + {0x110E, 0x1112, 1}, + {0x113C, 0x1140, 2}, + {0x114C, 0x1150, 2}, + {0x1154, 0x1155, 1}, + {0x1159, 0x1159, 1}, + {0x115F, 0x1161, 1}, + {0x1163, 0x1169, 2}, + {0x116D, 0x116E, 1}, + {0x1172, 0x1173, 1}, + {0x1175, 0x119E, 0x119E - 0x1175}, + {0x11A8, 0x11AB, 0x11AB - 0x11A8}, + {0x11AE, 0x11AF, 1}, + {0x11B7, 0x11B8, 1}, + {0x11BA, 0x11BA, 1}, + {0x11BC, 0x11C2, 1}, + {0x11EB, 0x11F0, 0x11F0 - 0x11EB}, + {0x11F9, 0x11F9, 1}, + {0x1E00, 0x1E9B, 1}, + {0x1EA0, 0x1EF9, 1}, + {0x1F00, 0x1F15, 1}, + {0x1F18, 0x1F1D, 1}, + {0x1F20, 0x1F45, 1}, + {0x1F48, 0x1F4D, 1}, + {0x1F50, 0x1F57, 1}, + {0x1F59, 0x1F5B, 0x1F5B - 0x1F59}, + {0x1F5D, 0x1F5D, 1}, + {0x1F5F, 0x1F7D, 1}, + {0x1F80, 0x1FB4, 1}, + {0x1FB6, 0x1FBC, 1}, + {0x1FBE, 0x1FBE, 1}, + {0x1FC2, 0x1FC4, 1}, + {0x1FC6, 0x1FCC, 1}, + {0x1FD0, 0x1FD3, 1}, + {0x1FD6, 0x1FDB, 1}, + {0x1FE0, 0x1FEC, 1}, + {0x1FF2, 0x1FF4, 1}, + {0x1FF6, 0x1FFC, 1}, + {0x2126, 0x2126, 1}, + {0x212A, 0x212B, 1}, + {0x212E, 0x212E, 1}, + {0x2180, 0x2182, 1}, + {0x3007, 0x3007, 1}, + {0x3021, 0x3029, 1}, + {0x3041, 0x3094, 1}, + {0x30A1, 0x30FA, 1}, + {0x3105, 0x312C, 1}, + {0x4E00, 0x9FA5, 1}, + {0xAC00, 0xD7A3, 1}, + }, +} + +var second = &unicode.RangeTable{ + R16: []unicode.Range16{ + {0x002D, 0x002E, 1}, + {0x0030, 0x0039, 1}, + {0x00B7, 0x00B7, 1}, + {0x02D0, 0x02D1, 1}, + {0x0300, 0x0345, 1}, + {0x0360, 0x0361, 1}, + {0x0387, 0x0387, 1}, + {0x0483, 0x0486, 1}, + {0x0591, 0x05A1, 1}, + {0x05A3, 0x05B9, 1}, + {0x05BB, 0x05BD, 1}, + {0x05BF, 0x05BF, 1}, + {0x05C1, 0x05C2, 1}, + {0x05C4, 0x0640, 0x0640 - 0x05C4}, + {0x064B, 0x0652, 1}, + {0x0660, 0x0669, 1}, + {0x0670, 0x0670, 1}, + {0x06D6, 0x06DC, 1}, + {0x06DD, 0x06DF, 1}, + {0x06E0, 0x06E4, 1}, + {0x06E7, 0x06E8, 1}, + {0x06EA, 0x06ED, 1}, + {0x06F0, 0x06F9, 1}, + {0x0901, 0x0903, 1}, + {0x093C, 0x093C, 1}, + {0x093E, 0x094C, 1}, + {0x094D, 0x094D, 1}, + {0x0951, 0x0954, 1}, + {0x0962, 0x0963, 1}, + {0x0966, 0x096F, 1}, + {0x0981, 0x0983, 1}, + {0x09BC, 0x09BC, 1}, + {0x09BE, 0x09BF, 1}, + {0x09C0, 0x09C4, 1}, + {0x09C7, 0x09C8, 1}, + {0x09CB, 0x09CD, 1}, + {0x09D7, 0x09D7, 1}, + {0x09E2, 0x09E3, 1}, + {0x09E6, 0x09EF, 1}, + {0x0A02, 0x0A3C, 0x3A}, + {0x0A3E, 0x0A3F, 1}, + {0x0A40, 0x0A42, 1}, + {0x0A47, 0x0A48, 1}, + {0x0A4B, 0x0A4D, 1}, + {0x0A66, 0x0A6F, 1}, + {0x0A70, 0x0A71, 1}, + {0x0A81, 0x0A83, 1}, + {0x0ABC, 0x0ABC, 1}, + {0x0ABE, 0x0AC5, 1}, + {0x0AC7, 0x0AC9, 1}, + {0x0ACB, 0x0ACD, 1}, + {0x0AE6, 0x0AEF, 1}, + {0x0B01, 0x0B03, 1}, + {0x0B3C, 0x0B3C, 1}, + {0x0B3E, 0x0B43, 1}, + {0x0B47, 0x0B48, 1}, + {0x0B4B, 0x0B4D, 1}, + {0x0B56, 0x0B57, 1}, + {0x0B66, 0x0B6F, 1}, + {0x0B82, 0x0B83, 1}, + {0x0BBE, 0x0BC2, 1}, + {0x0BC6, 0x0BC8, 1}, + {0x0BCA, 0x0BCD, 1}, + {0x0BD7, 0x0BD7, 1}, + {0x0BE7, 0x0BEF, 1}, + {0x0C01, 0x0C03, 1}, + {0x0C3E, 0x0C44, 1}, + {0x0C46, 0x0C48, 1}, + {0x0C4A, 0x0C4D, 1}, + {0x0C55, 0x0C56, 1}, + {0x0C66, 0x0C6F, 1}, + {0x0C82, 0x0C83, 1}, + {0x0CBE, 0x0CC4, 1}, + {0x0CC6, 0x0CC8, 1}, + {0x0CCA, 0x0CCD, 1}, + {0x0CD5, 0x0CD6, 1}, + {0x0CE6, 0x0CEF, 1}, + {0x0D02, 0x0D03, 1}, + {0x0D3E, 0x0D43, 1}, + {0x0D46, 0x0D48, 1}, + {0x0D4A, 0x0D4D, 1}, + {0x0D57, 0x0D57, 1}, + {0x0D66, 0x0D6F, 1}, + {0x0E31, 0x0E31, 1}, + {0x0E34, 0x0E3A, 1}, + {0x0E46, 0x0E46, 1}, + {0x0E47, 0x0E4E, 1}, + {0x0E50, 0x0E59, 1}, + {0x0EB1, 0x0EB1, 1}, + {0x0EB4, 0x0EB9, 1}, + {0x0EBB, 0x0EBC, 1}, + {0x0EC6, 0x0EC6, 1}, + {0x0EC8, 0x0ECD, 1}, + {0x0ED0, 0x0ED9, 1}, + {0x0F18, 0x0F19, 1}, + {0x0F20, 0x0F29, 1}, + {0x0F35, 0x0F39, 2}, + {0x0F3E, 0x0F3F, 1}, + {0x0F71, 0x0F84, 1}, + {0x0F86, 0x0F8B, 1}, + {0x0F90, 0x0F95, 1}, + {0x0F97, 0x0F97, 1}, + {0x0F99, 0x0FAD, 1}, + {0x0FB1, 0x0FB7, 1}, + {0x0FB9, 0x0FB9, 1}, + {0x20D0, 0x20DC, 1}, + {0x20E1, 0x3005, 0x3005 - 0x20E1}, + {0x302A, 0x302F, 1}, + {0x3031, 0x3035, 1}, + {0x3099, 0x309A, 1}, + {0x309D, 0x309E, 1}, + {0x30FC, 0x30FE, 1}, + }, +} + +// HTMLEntity is an entity map containing translations for the +// standard HTML entity characters. +var HTMLEntity = htmlEntity + +var htmlEntity = map[string]string{ + /* + hget http://www.w3.org/TR/html4/sgml/entities.html | + ssam ' + ,y /\>/ x/\<(.|\n)+/ s/\n/ /g + ,x v/^\<!ENTITY/d + ,s/\<!ENTITY ([^ ]+) .*U\+([0-9A-F][0-9A-F][0-9A-F][0-9A-F]) .+/ "\1": "\\u\2",/g + ' + */ + "nbsp": "\u00A0", + "iexcl": "\u00A1", + "cent": "\u00A2", + "pound": "\u00A3", + "curren": "\u00A4", + "yen": "\u00A5", + "brvbar": "\u00A6", + "sect": "\u00A7", + "uml": "\u00A8", + "copy": "\u00A9", + "ordf": "\u00AA", + "laquo": "\u00AB", + "not": "\u00AC", + "shy": "\u00AD", + "reg": "\u00AE", + "macr": "\u00AF", + "deg": "\u00B0", + "plusmn": "\u00B1", + "sup2": "\u00B2", + "sup3": "\u00B3", + "acute": "\u00B4", + "micro": "\u00B5", + "para": "\u00B6", + "middot": "\u00B7", + "cedil": "\u00B8", + "sup1": "\u00B9", + "ordm": "\u00BA", + "raquo": "\u00BB", + "frac14": "\u00BC", + "frac12": "\u00BD", + "frac34": "\u00BE", + "iquest": "\u00BF", + "Agrave": "\u00C0", + "Aacute": "\u00C1", + "Acirc": "\u00C2", + "Atilde": "\u00C3", + "Auml": "\u00C4", + "Aring": "\u00C5", + "AElig": "\u00C6", + "Ccedil": "\u00C7", + "Egrave": "\u00C8", + "Eacute": "\u00C9", + "Ecirc": "\u00CA", + "Euml": "\u00CB", + "Igrave": "\u00CC", + "Iacute": "\u00CD", + "Icirc": "\u00CE", + "Iuml": "\u00CF", + "ETH": "\u00D0", + "Ntilde": "\u00D1", + "Ograve": "\u00D2", + "Oacute": "\u00D3", + "Ocirc": "\u00D4", + "Otilde": "\u00D5", + "Ouml": "\u00D6", + "times": "\u00D7", + "Oslash": "\u00D8", + "Ugrave": "\u00D9", + "Uacute": "\u00DA", + "Ucirc": "\u00DB", + "Uuml": "\u00DC", + "Yacute": "\u00DD", + "THORN": "\u00DE", + "szlig": "\u00DF", + "agrave": "\u00E0", + "aacute": "\u00E1", + "acirc": "\u00E2", + "atilde": "\u00E3", + "auml": "\u00E4", + "aring": "\u00E5", + "aelig": "\u00E6", + "ccedil": "\u00E7", + "egrave": "\u00E8", + "eacute": "\u00E9", + "ecirc": "\u00EA", + "euml": "\u00EB", + "igrave": "\u00EC", + "iacute": "\u00ED", + "icirc": "\u00EE", + "iuml": "\u00EF", + "eth": "\u00F0", + "ntilde": "\u00F1", + "ograve": "\u00F2", + "oacute": "\u00F3", + "ocirc": "\u00F4", + "otilde": "\u00F5", + "ouml": "\u00F6", + "divide": "\u00F7", + "oslash": "\u00F8", + "ugrave": "\u00F9", + "uacute": "\u00FA", + "ucirc": "\u00FB", + "uuml": "\u00FC", + "yacute": "\u00FD", + "thorn": "\u00FE", + "yuml": "\u00FF", + "fnof": "\u0192", + "Alpha": "\u0391", + "Beta": "\u0392", + "Gamma": "\u0393", + "Delta": "\u0394", + "Epsilon": "\u0395", + "Zeta": "\u0396", + "Eta": "\u0397", + "Theta": "\u0398", + "Iota": "\u0399", + "Kappa": "\u039A", + "Lambda": "\u039B", + "Mu": "\u039C", + "Nu": "\u039D", + "Xi": "\u039E", + "Omicron": "\u039F", + "Pi": "\u03A0", + "Rho": "\u03A1", + "Sigma": "\u03A3", + "Tau": "\u03A4", + "Upsilon": "\u03A5", + "Phi": "\u03A6", + "Chi": "\u03A7", + "Psi": "\u03A8", + "Omega": "\u03A9", + "alpha": "\u03B1", + "beta": "\u03B2", + "gamma": "\u03B3", + "delta": "\u03B4", + "epsilon": "\u03B5", + "zeta": "\u03B6", + "eta": "\u03B7", + "theta": "\u03B8", + "iota": "\u03B9", + "kappa": "\u03BA", + "lambda": "\u03BB", + "mu": "\u03BC", + "nu": "\u03BD", + "xi": "\u03BE", + "omicron": "\u03BF", + "pi": "\u03C0", + "rho": "\u03C1", + "sigmaf": "\u03C2", + "sigma": "\u03C3", + "tau": "\u03C4", + "upsilon": "\u03C5", + "phi": "\u03C6", + "chi": "\u03C7", + "psi": "\u03C8", + "omega": "\u03C9", + "thetasym": "\u03D1", + "upsih": "\u03D2", + "piv": "\u03D6", + "bull": "\u2022", + "hellip": "\u2026", + "prime": "\u2032", + "Prime": "\u2033", + "oline": "\u203E", + "frasl": "\u2044", + "weierp": "\u2118", + "image": "\u2111", + "real": "\u211C", + "trade": "\u2122", + "alefsym": "\u2135", + "larr": "\u2190", + "uarr": "\u2191", + "rarr": "\u2192", + "darr": "\u2193", + "harr": "\u2194", + "crarr": "\u21B5", + "lArr": "\u21D0", + "uArr": "\u21D1", + "rArr": "\u21D2", + "dArr": "\u21D3", + "hArr": "\u21D4", + "forall": "\u2200", + "part": "\u2202", + "exist": "\u2203", + "empty": "\u2205", + "nabla": "\u2207", + "isin": "\u2208", + "notin": "\u2209", + "ni": "\u220B", + "prod": "\u220F", + "sum": "\u2211", + "minus": "\u2212", + "lowast": "\u2217", + "radic": "\u221A", + "prop": "\u221D", + "infin": "\u221E", + "ang": "\u2220", + "and": "\u2227", + "or": "\u2228", + "cap": "\u2229", + "cup": "\u222A", + "int": "\u222B", + "there4": "\u2234", + "sim": "\u223C", + "cong": "\u2245", + "asymp": "\u2248", + "ne": "\u2260", + "equiv": "\u2261", + "le": "\u2264", + "ge": "\u2265", + "sub": "\u2282", + "sup": "\u2283", + "nsub": "\u2284", + "sube": "\u2286", + "supe": "\u2287", + "oplus": "\u2295", + "otimes": "\u2297", + "perp": "\u22A5", + "sdot": "\u22C5", + "lceil": "\u2308", + "rceil": "\u2309", + "lfloor": "\u230A", + "rfloor": "\u230B", + "lang": "\u2329", + "rang": "\u232A", + "loz": "\u25CA", + "spades": "\u2660", + "clubs": "\u2663", + "hearts": "\u2665", + "diams": "\u2666", + "quot": "\u0022", + "amp": "\u0026", + "lt": "\u003C", + "gt": "\u003E", + "OElig": "\u0152", + "oelig": "\u0153", + "Scaron": "\u0160", + "scaron": "\u0161", + "Yuml": "\u0178", + "circ": "\u02C6", + "tilde": "\u02DC", + "ensp": "\u2002", + "emsp": "\u2003", + "thinsp": "\u2009", + "zwnj": "\u200C", + "zwj": "\u200D", + "lrm": "\u200E", + "rlm": "\u200F", + "ndash": "\u2013", + "mdash": "\u2014", + "lsquo": "\u2018", + "rsquo": "\u2019", + "sbquo": "\u201A", + "ldquo": "\u201C", + "rdquo": "\u201D", + "bdquo": "\u201E", + "dagger": "\u2020", + "Dagger": "\u2021", + "permil": "\u2030", + "lsaquo": "\u2039", + "rsaquo": "\u203A", + "euro": "\u20AC", +} + +// HTMLAutoClose is the set of HTML elements that +// should be considered to close automatically. +var HTMLAutoClose = htmlAutoClose + +var htmlAutoClose = []string{ + /* + hget http://www.w3.org/TR/html4/loose.dtd | + 9 sed -n 's/<!ELEMENT (.*) - O EMPTY.+/ "\1",/p' | tr A-Z a-z + */ + "basefont", + "br", + "area", + "link", + "img", + "param", + "hr", + "input", + "col ", + "frame", + "isindex", + "base", + "meta", +} + +var ( + esc_quot = []byte(""") // shorter than """ + esc_apos = []byte("'") // shorter than "'" + esc_amp = []byte("&") + esc_lt = []byte("<") + esc_gt = []byte(">") +) + +// Escape writes to w the properly escaped XML equivalent +// of the plain text data s. +func Escape(w io.Writer, s []byte) { + var esc []byte + last := 0 + for i, c := range s { + switch c { + case '"': + esc = esc_quot + case '\'': + esc = esc_apos + case '&': + esc = esc_amp + case '<': + esc = esc_lt + case '>': + esc = esc_gt + default: + continue + } + w.Write(s[last:i]) + w.Write(esc) + last = i + 1 + } + w.Write(s[last:]) +} + +// procInstEncoding parses the `encoding="..."` or `encoding='...'` +// value out of the provided string, returning "" if not found. +func procInstEncoding(s string) string { + // TODO: this parsing is somewhat lame and not exact. + // It works for all actual cases, though. + idx := strings.Index(s, "encoding=") + if idx == -1 { + return "" + } + v := s[idx+len("encoding="):] + if v == "" { + return "" + } + if v[0] != '\'' && v[0] != '"' { + return "" + } + idx = strings.IndexRune(v[1:], rune(v[0])) + if idx == -1 { + return "" + } + return v[1 : idx+1] +} diff --git a/libgo/go/encoding/xml/xml_test.go b/libgo/go/encoding/xml/xml_test.go new file mode 100644 index 0000000..6c874fa --- /dev/null +++ b/libgo/go/encoding/xml/xml_test.go @@ -0,0 +1,609 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xml + +import ( + "bytes" + "io" + "os" + "reflect" + "strings" + "testing" +) + +const testInput = ` +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` + + "\r\n\t" + ` > + <hello lang="en">World <>'" 白鵬翔</hello> + <goodbye /> + <outer foo:attr="value" xmlns:tag="ns4"> + <inner/> + </outer> + <tag:name> + <![CDATA[Some text here.]]> + </tag:name> +</body><!-- missing final newline -->` + +var rawTokens = []Token{ + CharData([]byte("\n")), + ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, + CharData([]byte("\n")), + Directive([]byte(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), + ), + CharData([]byte("\n")), + StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, + CharData([]byte("\n ")), + StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, + CharData([]byte("World <>'\" 白鵬翔")), + EndElement{Name{"", "hello"}}, + CharData([]byte("\n ")), + StartElement{Name{"", "goodbye"}, nil}, + EndElement{Name{"", "goodbye"}}, + CharData([]byte("\n ")), + StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, + CharData([]byte("\n ")), + StartElement{Name{"", "inner"}, nil}, + EndElement{Name{"", "inner"}}, + CharData([]byte("\n ")), + EndElement{Name{"", "outer"}}, + CharData([]byte("\n ")), + StartElement{Name{"tag", "name"}, nil}, + CharData([]byte("\n ")), + CharData([]byte("Some text here.")), + CharData([]byte("\n ")), + EndElement{Name{"tag", "name"}}, + CharData([]byte("\n")), + EndElement{Name{"", "body"}}, + Comment([]byte(" missing final newline ")), +} + +var cookedTokens = []Token{ + CharData([]byte("\n")), + ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, + CharData([]byte("\n")), + Directive([]byte(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), + ), + CharData([]byte("\n")), + StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, + CharData([]byte("\n ")), + StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, + CharData([]byte("World <>'\" 白鵬翔")), + EndElement{Name{"ns2", "hello"}}, + CharData([]byte("\n ")), + StartElement{Name{"ns2", "goodbye"}, nil}, + EndElement{Name{"ns2", "goodbye"}}, + CharData([]byte("\n ")), + StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, + CharData([]byte("\n ")), + StartElement{Name{"ns2", "inner"}, nil}, + EndElement{Name{"ns2", "inner"}}, + CharData([]byte("\n ")), + EndElement{Name{"ns2", "outer"}}, + CharData([]byte("\n ")), + StartElement{Name{"ns3", "name"}, nil}, + CharData([]byte("\n ")), + CharData([]byte("Some text here.")), + CharData([]byte("\n ")), + EndElement{Name{"ns3", "name"}}, + CharData([]byte("\n")), + EndElement{Name{"ns2", "body"}}, + Comment([]byte(" missing final newline ")), +} + +const testInputAltEncoding = ` +<?xml version="1.0" encoding="x-testing-uppercase"?> +<TAG>VALUE</TAG>` + +var rawTokensAltEncoding = []Token{ + CharData([]byte("\n")), + ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)}, + CharData([]byte("\n")), + StartElement{Name{"", "tag"}, nil}, + CharData([]byte("value")), + EndElement{Name{"", "tag"}}, +} + +var xmlInput = []string{ + // unexpected EOF cases + "<", + "<t", + "<t ", + "<t/", + "<!", + "<!-", + "<!--", + "<!--c-", + "<!--c--", + "<!d", + "<t></", + "<t></t", + "<?", + "<?p", + "<t a", + "<t a=", + "<t a='", + "<t a=''", + "<t/><![", + "<t/><![C", + "<t/><![CDATA[d", + "<t/><![CDATA[d]", + "<t/><![CDATA[d]]", + + // other Syntax errors + "<>", + "<t/a", + "<0 />", + "<?0 >", + // "<!0 >", // let the Token() caller handle + "</0>", + "<t 0=''>", + "<t a='&'>", + "<t a='<'>", + "<t> c;</t>", + "<t a>", + "<t a=>", + "<t a=v>", + // "<![CDATA[d]]>", // let the Token() caller handle + "<t></e>", + "<t></>", + "<t></t!", + "<t>cdata]]></t>", +} + +type stringReader struct { + s string + off int +} + +func (r *stringReader) Read(b []byte) (n int, err error) { + if r.off >= len(r.s) { + return 0, io.EOF + } + for r.off < len(r.s) && n < len(b) { + b[n] = r.s[r.off] + n++ + r.off++ + } + return +} + +func (r *stringReader) ReadByte() (b byte, err error) { + if r.off >= len(r.s) { + return 0, io.EOF + } + b = r.s[r.off] + r.off++ + return +} + +func StringReader(s string) io.Reader { return &stringReader{s, 0} } + +func TestRawToken(t *testing.T) { + p := NewParser(StringReader(testInput)) + testRawToken(t, p, rawTokens) +} + +type downCaser struct { + t *testing.T + r io.ByteReader +} + +func (d *downCaser) ReadByte() (c byte, err error) { + c, err = d.r.ReadByte() + if c >= 'A' && c <= 'Z' { + c += 'a' - 'A' + } + return +} + +func (d *downCaser) Read(p []byte) (int, error) { + d.t.Fatalf("unexpected Read call on downCaser reader") + return 0, os.EINVAL +} + +func TestRawTokenAltEncoding(t *testing.T) { + sawEncoding := "" + p := NewParser(StringReader(testInputAltEncoding)) + p.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { + sawEncoding = charset + if charset != "x-testing-uppercase" { + t.Fatalf("unexpected charset %q", charset) + } + return &downCaser{t, input.(io.ByteReader)}, nil + } + testRawToken(t, p, rawTokensAltEncoding) +} + +func TestRawTokenAltEncodingNoConverter(t *testing.T) { + p := NewParser(StringReader(testInputAltEncoding)) + token, err := p.RawToken() + if token == nil { + t.Fatalf("expected a token on first RawToken call") + } + if err != nil { + t.Fatal(err) + } + token, err = p.RawToken() + if token != nil { + t.Errorf("expected a nil token; got %#v", token) + } + if err == nil { + t.Fatalf("expected an error on second RawToken call") + } + const encoding = "x-testing-uppercase" + if !strings.Contains(err.Error(), encoding) { + t.Errorf("expected error to contain %q; got error: %v", + encoding, err) + } +} + +func testRawToken(t *testing.T, p *Parser, rawTokens []Token) { + for i, want := range rawTokens { + have, err := p.RawToken() + if err != nil { + t.Fatalf("token %d: unexpected error: %s", i, err) + } + if !reflect.DeepEqual(have, want) { + t.Errorf("token %d = %#v want %#v", i, have, want) + } + } +} + +// Ensure that directives (specifically !DOCTYPE) include the complete +// text of any nested directives, noting that < and > do not change +// nesting depth if they are in single or double quotes. + +var nestedDirectivesInput = ` +<!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]> +<!DOCTYPE [<!ENTITY xlt ">">]> +<!DOCTYPE [<!ENTITY xlt "<">]> +<!DOCTYPE [<!ENTITY xlt '>'>]> +<!DOCTYPE [<!ENTITY xlt '<'>]> +<!DOCTYPE [<!ENTITY xlt '">'>]> +<!DOCTYPE [<!ENTITY xlt "'<">]> +` + +var nestedDirectivesTokens = []Token{ + CharData([]byte("\n")), + Directive([]byte(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`)), + CharData([]byte("\n")), + Directive([]byte(`DOCTYPE [<!ENTITY xlt ">">]`)), + CharData([]byte("\n")), + Directive([]byte(`DOCTYPE [<!ENTITY xlt "<">]`)), + CharData([]byte("\n")), + Directive([]byte(`DOCTYPE [<!ENTITY xlt '>'>]`)), + CharData([]byte("\n")), + Directive([]byte(`DOCTYPE [<!ENTITY xlt '<'>]`)), + CharData([]byte("\n")), + Directive([]byte(`DOCTYPE [<!ENTITY xlt '">'>]`)), + CharData([]byte("\n")), + Directive([]byte(`DOCTYPE [<!ENTITY xlt "'<">]`)), + CharData([]byte("\n")), +} + +func TestNestedDirectives(t *testing.T) { + p := NewParser(StringReader(nestedDirectivesInput)) + + for i, want := range nestedDirectivesTokens { + have, err := p.Token() + if err != nil { + t.Fatalf("token %d: unexpected error: %s", i, err) + } + if !reflect.DeepEqual(have, want) { + t.Errorf("token %d = %#v want %#v", i, have, want) + } + } +} + +func TestToken(t *testing.T) { + p := NewParser(StringReader(testInput)) + + for i, want := range cookedTokens { + have, err := p.Token() + if err != nil { + t.Fatalf("token %d: unexpected error: %s", i, err) + } + if !reflect.DeepEqual(have, want) { + t.Errorf("token %d = %#v want %#v", i, have, want) + } + } +} + +func TestSyntax(t *testing.T) { + for i := range xmlInput { + p := NewParser(StringReader(xmlInput[i])) + var err error + for _, err = p.Token(); err == nil; _, err = p.Token() { + } + if _, ok := err.(*SyntaxError); !ok { + t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i]) + } + } +} + +type allScalars struct { + True1 bool + True2 bool + False1 bool + False2 bool + Int int + Int8 int8 + Int16 int16 + Int32 int32 + Int64 int64 + Uint int + Uint8 uint8 + Uint16 uint16 + Uint32 uint32 + Uint64 uint64 + Uintptr uintptr + Float32 float32 + Float64 float64 + String string + PtrString *string +} + +var all = allScalars{ + True1: true, + True2: true, + False1: false, + False2: false, + Int: 1, + Int8: -2, + Int16: 3, + Int32: -4, + Int64: 5, + Uint: 6, + Uint8: 7, + Uint16: 8, + Uint32: 9, + Uint64: 10, + Uintptr: 11, + Float32: 13.0, + Float64: 14.0, + String: "15", + PtrString: &sixteen, +} + +var sixteen = "16" + +const testScalarsInput = `<allscalars> + <true1>true</true1> + <true2>1</true2> + <false1>false</false1> + <false2>0</false2> + <int>1</int> + <int8>-2</int8> + <int16>3</int16> + <int32>-4</int32> + <int64>5</int64> + <uint>6</uint> + <uint8>7</uint8> + <uint16>8</uint16> + <uint32>9</uint32> + <uint64>10</uint64> + <uintptr>11</uintptr> + <float>12.0</float> + <float32>13.0</float32> + <float64>14.0</float64> + <string>15</string> + <ptrstring>16</ptrstring> +</allscalars>` + +func TestAllScalars(t *testing.T) { + var a allScalars + buf := bytes.NewBufferString(testScalarsInput) + err := Unmarshal(buf, &a) + + if err != nil { + t.Fatal(err) + } + if !reflect.DeepEqual(a, all) { + t.Errorf("have %+v want %+v", a, all) + } +} + +type item struct { + Field_a string +} + +func TestIssue569(t *testing.T) { + data := `<item><field_a>abcd</field_a></item>` + var i item + buf := bytes.NewBufferString(data) + err := Unmarshal(buf, &i) + + if err != nil || i.Field_a != "abcd" { + t.Fatal("Expecting abcd") + } +} + +func TestUnquotedAttrs(t *testing.T) { + data := "<tag attr=azAZ09:-_\t>" + p := NewParser(StringReader(data)) + p.Strict = false + token, err := p.Token() + if _, ok := err.(*SyntaxError); ok { + t.Errorf("Unexpected error: %v", err) + } + if token.(StartElement).Name.Local != "tag" { + t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) + } + attr := token.(StartElement).Attr[0] + if attr.Value != "azAZ09:-_" { + t.Errorf("Unexpected attribute value: %v", attr.Value) + } + if attr.Name.Local != "attr" { + t.Errorf("Unexpected attribute name: %v", attr.Name.Local) + } +} + +func TestValuelessAttrs(t *testing.T) { + tests := [][3]string{ + {"<p nowrap>", "p", "nowrap"}, + {"<p nowrap >", "p", "nowrap"}, + {"<input checked/>", "input", "checked"}, + {"<input checked />", "input", "checked"}, + } + for _, test := range tests { + p := NewParser(StringReader(test[0])) + p.Strict = false + token, err := p.Token() + if _, ok := err.(*SyntaxError); ok { + t.Errorf("Unexpected error: %v", err) + } + if token.(StartElement).Name.Local != test[1] { + t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) + } + attr := token.(StartElement).Attr[0] + if attr.Value != test[2] { + t.Errorf("Unexpected attribute value: %v", attr.Value) + } + if attr.Name.Local != test[2] { + t.Errorf("Unexpected attribute name: %v", attr.Name.Local) + } + } +} + +func TestCopyTokenCharData(t *testing.T) { + data := []byte("same data") + var tok1 Token = CharData(data) + tok2 := CopyToken(tok1) + if !reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(CharData) != CharData") + } + data[1] = 'o' + if reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(CharData) uses same buffer.") + } +} + +func TestCopyTokenStartElement(t *testing.T) { + elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}} + var tok1 Token = elt + tok2 := CopyToken(tok1) + if !reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(StartElement) != StartElement") + } + elt.Attr[0] = Attr{Name{"", "lang"}, "de"} + if reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(CharData) uses same buffer.") + } +} + +func TestSyntaxErrorLineNum(t *testing.T) { + testInput := "<P>Foo<P>\n\n<P>Bar</>\n" + p := NewParser(StringReader(testInput)) + var err error + for _, err = p.Token(); err == nil; _, err = p.Token() { + } + synerr, ok := err.(*SyntaxError) + if !ok { + t.Error("Expected SyntaxError.") + } + if synerr.Line != 3 { + t.Error("SyntaxError didn't have correct line number.") + } +} + +func TestTrailingRawToken(t *testing.T) { + input := `<FOO></FOO> ` + p := NewParser(StringReader(input)) + var err error + for _, err = p.RawToken(); err == nil; _, err = p.RawToken() { + } + if err != io.EOF { + t.Fatalf("p.RawToken() = _, %v, want _, io.EOF", err) + } +} + +func TestTrailingToken(t *testing.T) { + input := `<FOO></FOO> ` + p := NewParser(StringReader(input)) + var err error + for _, err = p.Token(); err == nil; _, err = p.Token() { + } + if err != io.EOF { + t.Fatalf("p.Token() = _, %v, want _, io.EOF", err) + } +} + +func TestEntityInsideCDATA(t *testing.T) { + input := `<test><![CDATA[ &val=foo ]]></test>` + p := NewParser(StringReader(input)) + var err error + for _, err = p.Token(); err == nil; _, err = p.Token() { + } + if err != io.EOF { + t.Fatalf("p.Token() = _, %v, want _, io.EOF", err) + } +} + +// The last three tests (respectively one for characters in attribute +// names and two for character entities) pass not because of code +// changed for issue 1259, but instead pass with the given messages +// from other parts of xml.Parser. I provide these to note the +// current behavior of situations where one might think that character +// range checking would detect the error, but it does not in fact. + +var characterTests = []struct { + in string + err string +}{ + {"\x12<doc/>", "illegal character code U+0012"}, + {"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"}, + {"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"}, + {"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"}, + {"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"}, + {"<doc>&\x01;</doc>", "invalid character entity &;"}, + {"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &;"}, +} + +func TestDisallowedCharacters(t *testing.T) { + + for i, tt := range characterTests { + p := NewParser(StringReader(tt.in)) + var err error + + for err == nil { + _, err = p.Token() + } + synerr, ok := err.(*SyntaxError) + if !ok { + t.Fatalf("input %d p.Token() = _, %v, want _, *SyntaxError", i, err) + } + if synerr.Msg != tt.err { + t.Fatalf("input %d synerr.Msg wrong: want '%s', got '%s'", i, tt.err, synerr.Msg) + } + } +} + +type procInstEncodingTest struct { + expect, got string +} + +var procInstTests = []struct { + input, expect string +}{ + {`version="1.0" encoding="utf-8"`, "utf-8"}, + {`version="1.0" encoding='utf-8'`, "utf-8"}, + {`version="1.0" encoding='utf-8' `, "utf-8"}, + {`version="1.0" encoding=utf-8`, ""}, + {`encoding="FOO" `, "FOO"}, +} + +func TestProcInstEncoding(t *testing.T) { + for _, test := range procInstTests { + got := procInstEncoding(test.input) + if got != test.expect { + t.Errorf("procInstEncoding(%q) = %q; want %q", test.input, got, test.expect) + } + } +} |