diff options
author | Ian Lance Taylor <iant@golang.org> | 2020-01-02 15:05:27 -0800 |
---|---|---|
committer | Ian Lance Taylor <iant@golang.org> | 2020-01-21 23:53:22 -0800 |
commit | 5a8ea165926cb0737ab03bc48c18dc5198ab5305 (patch) | |
tree | 962dc3357c57f019f85658f99e2e753e30201c27 /libgo/go/encoding | |
parent | 6ac6529e155c9baa0aaaed7aca06bd38ebda5b43 (diff) | |
download | gcc-5a8ea165926cb0737ab03bc48c18dc5198ab5305.zip gcc-5a8ea165926cb0737ab03bc48c18dc5198ab5305.tar.gz gcc-5a8ea165926cb0737ab03bc48c18dc5198ab5305.tar.bz2 |
libgo: update to Go1.14beta1
Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/214297
Diffstat (limited to 'libgo/go/encoding')
27 files changed, 946 insertions, 277 deletions
diff --git a/libgo/go/encoding/asn1/asn1.go b/libgo/go/encoding/asn1/asn1.go index 3cfd9d1..fd4dd68 100644 --- a/libgo/go/encoding/asn1/asn1.go +++ b/libgo/go/encoding/asn1/asn1.go @@ -27,6 +27,7 @@ import ( "reflect" "strconv" "time" + "unicode/utf16" "unicode/utf8" ) @@ -475,6 +476,29 @@ func parseUTF8String(bytes []byte) (ret string, err error) { return string(bytes), nil } +// BMPString + +// parseBMPString parses an ASN.1 BMPString (Basic Multilingual Plane of +// ISO/IEC/ITU 10646-1) from the given byte slice and returns it. +func parseBMPString(bmpString []byte) (string, error) { + if len(bmpString)%2 != 0 { + return "", errors.New("pkcs12: odd-length BMP string") + } + + // Strip terminator if present. + if l := len(bmpString); l >= 2 && bmpString[l-1] == 0 && bmpString[l-2] == 0 { + bmpString = bmpString[:l-2] + } + + s := make([]uint16, 0, len(bmpString)/2) + for len(bmpString) > 0 { + s = append(s, uint16(bmpString[0])<<8+uint16(bmpString[1])) + bmpString = bmpString[2:] + } + + return string(utf16.Decode(s)), nil +} + // A RawValue represents an undecoded ASN.1 object. type RawValue struct { Class, Tag int @@ -589,7 +613,7 @@ func parseSequenceOf(bytes []byte, sliceType reflect.Type, elemType reflect.Type return } switch t.tag { - case TagIA5String, TagGeneralString, TagT61String, TagUTF8String, TagNumericString: + case TagIA5String, TagGeneralString, TagT61String, TagUTF8String, TagNumericString, TagBMPString: // We pretend that various other string types are // PRINTABLE STRINGs so that a sequence of them can be // parsed into a []string. @@ -691,6 +715,8 @@ func parseField(v reflect.Value, bytes []byte, initOffset int, params fieldParam result, err = parseGeneralizedTime(innerBytes) case TagOctetString: result = innerBytes + case TagBMPString: + result, err = parseBMPString(innerBytes) default: // If we don't know how to handle the type, we just leave Value as nil. } @@ -759,7 +785,7 @@ func parseField(v reflect.Value, bytes []byte, initOffset int, params fieldParam if universalTag == TagPrintableString { if t.class == ClassUniversal { switch t.tag { - case TagIA5String, TagGeneralString, TagT61String, TagUTF8String, TagNumericString: + case TagIA5String, TagGeneralString, TagT61String, TagUTF8String, TagNumericString, TagBMPString: universalTag = t.tag } } else if params.stringType != 0 { @@ -957,6 +983,9 @@ func parseField(v reflect.Value, bytes []byte, initOffset int, params fieldParam // that allow the encoding to change midstring and // such. We give up and pass it as an 8-bit string. v, err = parseT61String(innerBytes) + case TagBMPString: + v, err = parseBMPString(innerBytes) + default: err = SyntaxError{fmt.Sprintf("internal error: unknown string type %d", universalTag)} } diff --git a/libgo/go/encoding/asn1/asn1_test.go b/libgo/go/encoding/asn1/asn1_test.go index f0a54e0..d5649bf 100644 --- a/libgo/go/encoding/asn1/asn1_test.go +++ b/libgo/go/encoding/asn1/asn1_test.go @@ -6,6 +6,7 @@ package asn1 import ( "bytes" + "encoding/hex" "fmt" "math" "math/big" @@ -1096,3 +1097,35 @@ func TestTaggedRawValue(t *testing.T) { } } } + +var bmpStringTests = []struct { + decoded string + encodedHex string +}{ + {"", "0000"}, + // Example from https://tools.ietf.org/html/rfc7292#appendix-B. + {"Beavis", "0042006500610076006900730000"}, + // Some characters from the "Letterlike Symbols Unicode block". + {"\u2115 - Double-struck N", "21150020002d00200044006f00750062006c0065002d00730074007200750063006b0020004e0000"}, +} + +func TestBMPString(t *testing.T) { + for i, test := range bmpStringTests { + encoded, err := hex.DecodeString(test.encodedHex) + if err != nil { + t.Fatalf("#%d: failed to decode from hex string", i) + } + + decoded, err := parseBMPString(encoded) + + if err != nil { + t.Errorf("#%d: decoding output gave an error: %s", i, err) + continue + } + + if decoded != test.decoded { + t.Errorf("#%d: decoding output resulted in %q, but it should have been %q", i, decoded, test.decoded) + continue + } + } +} diff --git a/libgo/go/encoding/asn1/common.go b/libgo/go/encoding/asn1/common.go index 255d1eb..e2aa8bd 100644 --- a/libgo/go/encoding/asn1/common.go +++ b/libgo/go/encoding/asn1/common.go @@ -37,6 +37,7 @@ const ( TagUTCTime = 23 TagGeneralizedTime = 24 TagGeneralString = 27 + TagBMPString = 30 ) // ASN.1 class types represent the namespace of the tag. diff --git a/libgo/go/encoding/base32/base32.go b/libgo/go/encoding/base32/base32.go index e14d2d4..2f7d363 100644 --- a/libgo/go/encoding/base32/base32.go +++ b/libgo/go/encoding/base32/base32.go @@ -6,10 +6,8 @@ package base32 import ( - "bytes" "io" "strconv" - "strings" ) /* @@ -62,13 +60,6 @@ var StdEncoding = NewEncoding(encodeStd) // It is typically used in DNS. var HexEncoding = NewEncoding(encodeHex) -var removeNewlinesMapper = func(r rune) rune { - if r == '\r' || r == '\n' { - return -1 - } - return r -} - // WithPadding creates a new encoding identical to enc except // with a specified padding character, or NoPadding to disable padding. // The padding character must not be '\r' or '\n', must not @@ -302,7 +293,7 @@ func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) { // We have reached the end and are missing padding return n, false, CorruptInputError(olen - len(src) - j) } - // We have reached the end and are not expecing any padding + // We have reached the end and are not expecting any padding dlen, end = j, true break } @@ -372,17 +363,18 @@ func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) { // number of bytes successfully written and CorruptInputError. // New line characters (\r and \n) are ignored. func (enc *Encoding) Decode(dst, src []byte) (n int, err error) { - src = bytes.Map(removeNewlinesMapper, src) - n, _, err = enc.decode(dst, src) + buf := make([]byte, len(src)) + l := stripNewlines(buf, src) + n, _, err = enc.decode(dst, buf[:l]) return } // DecodeString returns the bytes represented by the base32 string s. func (enc *Encoding) DecodeString(s string) ([]byte, error) { - s = strings.Map(removeNewlinesMapper, s) - dbuf := make([]byte, enc.DecodedLen(len(s))) - n, _, err := enc.decode(dbuf, []byte(s)) - return dbuf[:n], err + buf := []byte(s) + l := stripNewlines(buf, buf) + n, _, err := enc.decode(buf, buf[:l]) + return buf[:n], err } type decoder struct { @@ -497,18 +489,25 @@ type newlineFilteringReader struct { wrapped io.Reader } +// stripNewlines removes newline characters and returns the number +// of non-newline characters copied to dst. +func stripNewlines(dst, src []byte) int { + offset := 0 + for _, b := range src { + if b == '\r' || b == '\n' { + continue + } + dst[offset] = b + offset++ + } + return offset +} + func (r *newlineFilteringReader) Read(p []byte) (int, error) { n, err := r.wrapped.Read(p) for n > 0 { - offset := 0 - for i, b := range p[0:n] { - if b != '\r' && b != '\n' { - if i != offset { - p[offset] = b - } - offset++ - } - } + s := p[0:n] + offset := stripNewlines(s, s) if err != nil || offset > 0 { return offset, err } diff --git a/libgo/go/encoding/base32/base32_test.go b/libgo/go/encoding/base32/base32_test.go index eb14f1e..0b611db 100644 --- a/libgo/go/encoding/base32/base32_test.go +++ b/libgo/go/encoding/base32/base32_test.go @@ -445,6 +445,15 @@ LNEBUWIIDFON2CA3DBMJXXE5LNFY== } } +func BenchmarkEncode(b *testing.B) { + data := make([]byte, 8192) + buf := make([]byte, StdEncoding.EncodedLen(len(data))) + b.SetBytes(int64(len(data))) + for i := 0; i < b.N; i++ { + StdEncoding.Encode(buf, data) + } +} + func BenchmarkEncodeToString(b *testing.B) { data := make([]byte, 8192) b.SetBytes(int64(len(data))) @@ -453,6 +462,15 @@ func BenchmarkEncodeToString(b *testing.B) { } } +func BenchmarkDecode(b *testing.B) { + data := make([]byte, StdEncoding.EncodedLen(8192)) + StdEncoding.Encode(data, make([]byte, 8192)) + buf := make([]byte, 8192) + b.SetBytes(int64(len(data))) + for i := 0; i < b.N; i++ { + StdEncoding.Decode(buf, data) + } +} func BenchmarkDecodeString(b *testing.B) { data := StdEncoding.EncodeToString(make([]byte, 8192)) b.SetBytes(int64(len(data))) diff --git a/libgo/go/encoding/base64/base64.go b/libgo/go/encoding/base64/base64.go index 0822101..690d3ce 100644 --- a/libgo/go/encoding/base64/base64.go +++ b/libgo/go/encoding/base64/base64.go @@ -86,6 +86,9 @@ func (enc Encoding) WithPadding(padding rune) *Encoding { // Strict creates a new encoding identical to enc except with // strict decoding enabled. In this mode, the decoder requires that // trailing padding bits are zero, as described in RFC 4648 section 3.5. +// +// Note that the input is still malleable, as new line characters +// (CR and LF) are still ignored. func (enc Encoding) Strict() *Encoding { enc.strict = true return &enc diff --git a/libgo/go/encoding/binary/binary.go b/libgo/go/encoding/binary/binary.go index 8c2d1d9..a99ca01 100644 --- a/libgo/go/encoding/binary/binary.go +++ b/libgo/go/encoding/binary/binary.go @@ -26,6 +26,7 @@ import ( "io" "math" "reflect" + "sync" ) // A ByteOrder specifies how to convert byte sequences into @@ -184,6 +185,10 @@ func Read(r io.Reader, order ByteOrder, data interface{}) error { *data = int64(order.Uint64(bs)) case *uint64: *data = order.Uint64(bs) + case *float32: + *data = math.Float32frombits(order.Uint32(bs)) + case *float64: + *data = math.Float64frombits(order.Uint64(bs)) case []bool: for i, x := range bs { // Easier to loop over the input for 8-bit values. data[i] = x != 0 @@ -218,8 +223,20 @@ func Read(r io.Reader, order ByteOrder, data interface{}) error { for i := range data { data[i] = order.Uint64(bs[8*i:]) } + case []float32: + for i := range data { + data[i] = math.Float32frombits(order.Uint32(bs[4*i:])) + } + case []float64: + for i := range data { + data[i] = math.Float64frombits(order.Uint64(bs[8*i:])) + } + default: + n = 0 // fast path doesn't apply + } + if n != 0 { + return nil } - return nil } // Fallback to reflect-based decoding. @@ -338,6 +355,22 @@ func Write(w io.Writer, order ByteOrder, data interface{}) error { for i, x := range v { order.PutUint64(bs[8*i:], x) } + case *float32: + order.PutUint32(bs, math.Float32bits(*v)) + case float32: + order.PutUint32(bs, math.Float32bits(v)) + case []float32: + for i, x := range v { + order.PutUint32(bs[4*i:], math.Float32bits(x)) + } + case *float64: + order.PutUint64(bs, math.Float64bits(*v)) + case float64: + order.PutUint64(bs, math.Float64bits(v)) + case []float64: + for i, x := range v { + order.PutUint64(bs[8*i:], math.Float64bits(x)) + } } _, err := w.Write(bs) return err @@ -363,18 +396,32 @@ func Size(v interface{}) int { return dataSize(reflect.Indirect(reflect.ValueOf(v))) } +var structSize sync.Map // map[reflect.Type]int + // dataSize returns the number of bytes the actual data represented by v occupies in memory. // For compound structures, it sums the sizes of the elements. Thus, for instance, for a slice // it returns the length of the slice times the element size and does not count the memory // occupied by the header. If the type of v is not acceptable, dataSize returns -1. func dataSize(v reflect.Value) int { - if v.Kind() == reflect.Slice { + switch v.Kind() { + case reflect.Slice: if s := sizeof(v.Type().Elem()); s >= 0 { return s * v.Len() } return -1 + + case reflect.Struct: + t := v.Type() + if size, ok := structSize.Load(t); ok { + return size.(int) + } + size := sizeof(t) + structSize.Store(t, size) + return size + + default: + return sizeof(v.Type()) } - return sizeof(v.Type()) } // sizeof returns the size >= 0 of variables for the given type or -1 if the type is not acceptable. @@ -677,6 +724,14 @@ func intDataSize(data interface{}) int { return 8 * len(data) case []uint64: return 8 * len(data) + case float32, *float32: + return 4 + case float64, *float64: + return 8 + case []float32: + return 4 * len(data) + case []float64: + return 8 * len(data) } return 0 } diff --git a/libgo/go/encoding/binary/binary_test.go b/libgo/go/encoding/binary/binary_test.go index af40257..aeb4212 100644 --- a/libgo/go/encoding/binary/binary_test.go +++ b/libgo/go/encoding/binary/binary_test.go @@ -6,10 +6,13 @@ package binary import ( "bytes" + "fmt" "io" + "io/ioutil" "math" "reflect" "strings" + "sync" "testing" ) @@ -296,6 +299,58 @@ func TestBlankFields(t *testing.T) { } } +func TestSizeStructCache(t *testing.T) { + // Reset the cache, otherwise multiple test runs fail. + structSize = sync.Map{} + + count := func() int { + var i int + structSize.Range(func(_, _ interface{}) bool { + i++ + return true + }) + return i + } + + var total int + added := func() int { + delta := count() - total + total += delta + return delta + } + + type foo struct { + A uint32 + } + + type bar struct { + A Struct + B foo + C Struct + } + + testcases := []struct { + val interface{} + want int + }{ + {new(foo), 1}, + {new(bar), 1}, + {new(bar), 0}, + {new(struct{ A Struct }), 1}, + {new(struct{ A Struct }), 0}, + } + + for _, tc := range testcases { + if Size(tc.val) == -1 { + t.Fatalf("Can't get the size of %T", tc.val) + } + + if n := added(); n != tc.want { + t.Errorf("Sizing %T added %d entries to the cache, want %d", tc.val, n, tc.want) + } + } +} + // An attempt to read into a struct with an unexported field will // panic. This is probably not the best choice, but at this point // anything else would be an API change. @@ -397,6 +452,35 @@ func TestEarlyBoundsChecks(t *testing.T) { } } +func TestReadInvalidDestination(t *testing.T) { + testReadInvalidDestination(t, BigEndian) + testReadInvalidDestination(t, LittleEndian) +} + +func testReadInvalidDestination(t *testing.T, order ByteOrder) { + destinations := []interface{}{ + int8(0), + int16(0), + int32(0), + int64(0), + + uint8(0), + uint16(0), + uint32(0), + uint64(0), + + bool(false), + } + + for _, dst := range destinations { + err := Read(bytes.NewReader([]byte{1, 2, 3, 4, 5, 6, 7, 8}), order, dst) + want := fmt.Sprintf("binary.Read: invalid type %T", dst) + if err == nil || err.Error() != want { + t.Fatalf("for type %T: got %q; want %q", dst, err, want) + } + } +} + type byteSliceReader struct { remain []byte } @@ -436,6 +520,14 @@ func BenchmarkReadStruct(b *testing.B) { } } +func BenchmarkWriteStruct(b *testing.B) { + b.SetBytes(int64(Size(&s))) + b.ResetTimer() + for i := 0; i < b.N; i++ { + Write(ioutil.Discard, BigEndian, &s) + } +} + func BenchmarkReadInts(b *testing.B) { var ls Struct bsr := &byteSliceReader{} @@ -542,3 +634,75 @@ func BenchmarkLittleEndianPutUint64(b *testing.B) { LittleEndian.PutUint64(putbuf[:], uint64(i)) } } + +func BenchmarkReadFloats(b *testing.B) { + var ls Struct + bsr := &byteSliceReader{} + var r io.Reader = bsr + b.SetBytes(4 + 8) + b.ResetTimer() + for i := 0; i < b.N; i++ { + bsr.remain = big[30:] + Read(r, BigEndian, &ls.Float32) + Read(r, BigEndian, &ls.Float64) + } + b.StopTimer() + want := s + want.Int8 = 0 + want.Int16 = 0 + want.Int32 = 0 + want.Int64 = 0 + want.Uint8 = 0 + want.Uint16 = 0 + want.Uint32 = 0 + want.Uint64 = 0 + want.Complex64 = 0 + want.Complex128 = 0 + want.Array = [4]uint8{0, 0, 0, 0} + want.Bool = false + want.BoolArray = [4]bool{false, false, false, false} + if b.N > 0 && !reflect.DeepEqual(ls, want) { + b.Fatalf("struct doesn't match:\ngot %v;\nwant %v", ls, want) + } +} + +func BenchmarkWriteFloats(b *testing.B) { + buf := new(bytes.Buffer) + var w io.Writer = buf + b.SetBytes(4 + 8) + b.ResetTimer() + for i := 0; i < b.N; i++ { + buf.Reset() + Write(w, BigEndian, s.Float32) + Write(w, BigEndian, s.Float64) + } + b.StopTimer() + if b.N > 0 && !bytes.Equal(buf.Bytes(), big[30:30+4+8]) { + b.Fatalf("first half doesn't match: %x %x", buf.Bytes(), big[30:30+4+8]) + } +} + +func BenchmarkReadSlice1000Float32s(b *testing.B) { + bsr := &byteSliceReader{} + slice := make([]float32, 1000) + buf := make([]byte, len(slice)*4) + b.SetBytes(int64(len(buf))) + b.ResetTimer() + for i := 0; i < b.N; i++ { + bsr.remain = buf + Read(bsr, BigEndian, slice) + } +} + +func BenchmarkWriteSlice1000Float32s(b *testing.B) { + slice := make([]float32, 1000) + buf := new(bytes.Buffer) + var w io.Writer = buf + b.SetBytes(4 * 1000) + b.ResetTimer() + for i := 0; i < b.N; i++ { + buf.Reset() + Write(w, BigEndian, slice) + } + b.StopTimer() +} diff --git a/libgo/go/encoding/csv/fuzz.go b/libgo/go/encoding/csv/fuzz.go index dc33893..8be21d5 100644 --- a/libgo/go/encoding/csv/fuzz.go +++ b/libgo/go/encoding/csv/fuzz.go @@ -17,13 +17,13 @@ func Fuzz(data []byte) int { buf := new(bytes.Buffer) for _, tt := range []Reader{ - Reader{}, - Reader{Comma: ';'}, - Reader{Comma: '\t'}, - Reader{LazyQuotes: true}, - Reader{TrimLeadingSpace: true}, - Reader{Comment: '#'}, - Reader{Comment: ';'}, + {}, + {Comma: ';'}, + {Comma: '\t'}, + {LazyQuotes: true}, + {TrimLeadingSpace: true}, + {Comment: '#'}, + {Comment: ';'}, } { r := NewReader(bytes.NewReader(data)) r.Comma = tt.Comma diff --git a/libgo/go/encoding/gob/codec_test.go b/libgo/go/encoding/gob/codec_test.go index 494abc9..f38e88b 100644 --- a/libgo/go/encoding/gob/codec_test.go +++ b/libgo/go/encoding/gob/codec_test.go @@ -591,7 +591,7 @@ func TestEndToEnd(t *testing.T) { B: 18, C: -5, M: map[string]*float64{"pi": &pi, "e": &e}, - M2: map[int]T3{4: T3{X: pi, Z: &meaning}, 10: T3{X: e, Z: &fingers}}, + M2: map[int]T3{4: {X: pi, Z: &meaning}, 10: {X: e, Z: &fingers}}, Mstring: map[string]string{"pi": "3.14", "e": "2.71"}, Mintptr: map[int]*int{meaning: &fingers, fingers: &meaning}, Mcomp: map[complex128]complex128{comp1: comp2, comp2: comp1}, diff --git a/libgo/go/encoding/gob/gobencdec_test.go b/libgo/go/encoding/gob/gobencdec_test.go index 41a06b2..6d2c8db 100644 --- a/libgo/go/encoding/gob/gobencdec_test.go +++ b/libgo/go/encoding/gob/gobencdec_test.go @@ -707,7 +707,7 @@ func TestGobEncoderExtraIndirect(t *testing.T) { // Another bug: this caused a crash with the new Go1 Time type. // We throw in a gob-encoding array, to test another case of isZero, -// and a struct containing an nil interface, to test a third. +// and a struct containing a nil interface, to test a third. type isZeroBug struct { T time.Time S string diff --git a/libgo/go/encoding/json/bench_test.go b/libgo/go/encoding/json/bench_test.go index f2592e3..4a5fe7e 100644 --- a/libgo/go/encoding/json/bench_test.go +++ b/libgo/go/encoding/json/bench_test.go @@ -297,6 +297,22 @@ func BenchmarkIssue10335(b *testing.B) { }) } +func BenchmarkIssue34127(b *testing.B) { + b.ReportAllocs() + j := struct { + Bar string `json:"bar,string"` + }{ + Bar: `foobar`, + } + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + if _, err := Marshal(&j); err != nil { + b.Fatal(err) + } + } + }) +} + func BenchmarkUnmapped(b *testing.B) { b.ReportAllocs() j := []byte(`{"s": "hello", "y": 2, "o": {"x": 0}, "a": [1, 99, {"x": 1}]}`) @@ -373,3 +389,22 @@ func BenchmarkTypeFieldsCache(b *testing.B) { }) } } + +func BenchmarkEncodeMarshaler(b *testing.B) { + b.ReportAllocs() + + m := struct { + A int + B RawMessage + }{} + + b.RunParallel(func(pb *testing.PB) { + enc := NewEncoder(ioutil.Discard) + + for pb.Next() { + if err := enc.Encode(&m); err != nil { + b.Fatal("Encode:", err) + } + } + }) +} diff --git a/libgo/go/encoding/json/decode.go b/libgo/go/encoding/json/decode.go index cbd71ac..b434846 100644 --- a/libgo/go/encoding/json/decode.go +++ b/libgo/go/encoding/json/decode.go @@ -72,7 +72,8 @@ import ( // use. If the map is nil, Unmarshal allocates a new map. Otherwise Unmarshal // reuses the existing map, keeping existing entries. Unmarshal then stores // key-value pairs from the JSON object into the map. The map's key type must -// either be a string, an integer, or implement encoding.TextUnmarshaler. +// either be any string type, an integer, implement json.Unmarshaler, or +// implement encoding.TextUnmarshaler. // // If a JSON value is not appropriate for a given target type, // or if a JSON number overflows the target type, Unmarshal @@ -199,66 +200,6 @@ func (n Number) Int64() (int64, error) { return strconv.ParseInt(string(n), 10, 64) } -// isValidNumber reports whether s is a valid JSON number literal. -func isValidNumber(s string) bool { - // This function implements the JSON numbers grammar. - // See https://tools.ietf.org/html/rfc7159#section-6 - // and https://json.org/number.gif - - if s == "" { - return false - } - - // Optional - - if s[0] == '-' { - s = s[1:] - if s == "" { - return false - } - } - - // Digits - switch { - default: - return false - - case s[0] == '0': - s = s[1:] - - case '1' <= s[0] && s[0] <= '9': - s = s[1:] - for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { - s = s[1:] - } - } - - // . followed by 1 or more digits. - if len(s) >= 2 && s[0] == '.' && '0' <= s[1] && s[1] <= '9' { - s = s[2:] - for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { - s = s[1:] - } - } - - // e or E followed by an optional - or + and - // 1 or more digits. - if len(s) >= 2 && (s[0] == 'e' || s[0] == 'E') { - s = s[1:] - if s[0] == '+' || s[0] == '-' { - s = s[1:] - if s == "" { - return false - } - } - for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { - s = s[1:] - } - } - - // Make sure we are at the end. - return s == "" -} - // decodeState represents the state while decoding a JSON value. type decodeState struct { data []byte @@ -272,6 +213,9 @@ type decodeState struct { savedError error useNumber bool disallowUnknownFields bool + // safeUnquote is the number of current string literal bytes that don't + // need to be unquoted. When negative, no bytes need unquoting. + safeUnquote int } // readIndex returns the position of the last byte read. @@ -373,13 +317,27 @@ func (d *decodeState) rescanLiteral() { Switch: switch data[i-1] { case '"': // string + // safeUnquote is initialized at -1, which means that all bytes + // checked so far can be unquoted at a later time with no work + // at all. When reaching the closing '"', if safeUnquote is + // still -1, all bytes can be unquoted with no work. Otherwise, + // only those bytes up until the first '\\' or non-ascii rune + // can be safely unquoted. + safeUnquote := -1 for ; i < len(data); i++ { - switch data[i] { - case '\\': + if c := data[i]; c == '\\' { + if safeUnquote < 0 { // first unsafe byte + safeUnquote = int(i - d.off) + } i++ // escaped char - case '"': + } else if c == '"' { + d.safeUnquote = safeUnquote i++ // tokenize the closing quote too break Switch + } else if c >= utf8.RuneSelf { + if safeUnquote < 0 { // first unsafe byte + safeUnquote = int(i - d.off) + } } } case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-': // number @@ -475,8 +433,9 @@ func (d *decodeState) valueQuoted() interface{} { // indirect walks down v allocating pointers as needed, // until it gets to a non-pointer. -// if it encounters an Unmarshaler, indirect stops and returns that. -// if decodingNull is true, indirect stops at the last pointer so it can be set to nil. +// If it encounters an Unmarshaler, indirect stops and returns that. +// If decodingNull is true, indirect stops at the first settable pointer so it +// can be set to nil. func indirect(v reflect.Value, decodingNull bool) (Unmarshaler, encoding.TextUnmarshaler, reflect.Value) { // Issue #24153 indicates that it is generally not a guaranteed property // that you may round-trip a reflect.Value by calling Value.Addr().Elem() @@ -515,7 +474,7 @@ func indirect(v reflect.Value, decodingNull bool) (Unmarshaler, encoding.TextUnm break } - if v.Elem().Kind() != reflect.Ptr && decodingNull && v.CanSet() { + if decodingNull && v.CanSet() { break } @@ -732,7 +691,7 @@ func (d *decodeState) object(v reflect.Value) error { start := d.readIndex() d.rescanLiteral() item := d.data[start:d.readIndex()] - key, ok := unquoteBytes(item) + key, ok := d.unquoteBytes(item) if !ok { panic(phasePanicMsg) } @@ -831,14 +790,14 @@ func (d *decodeState) object(v reflect.Value) error { kt := t.Key() var kv reflect.Value switch { - case kt.Kind() == reflect.String: - kv = reflect.ValueOf(key).Convert(kt) case reflect.PtrTo(kt).Implements(textUnmarshalerType): kv = reflect.New(kt) if err := d.literalStore(item, kv, true); err != nil { return err } kv = kv.Elem() + case kt.Kind() == reflect.String: + kv = reflect.ValueOf(key).Convert(kt) default: switch kt.Kind() { case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: @@ -933,7 +892,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool d.saveError(&UnmarshalTypeError{Value: val, Type: v.Type(), Offset: int64(d.readIndex())}) return nil } - s, ok := unquoteBytes(item) + s, ok := d.unquoteBytes(item) if !ok { if fromQuoted { return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) @@ -984,7 +943,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool } case '"': // string - s, ok := unquoteBytes(item) + s, ok := d.unquoteBytes(item) if !ok { if fromQuoted { return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) @@ -1007,6 +966,9 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool } v.SetBytes(b[:n]) case reflect.String: + if v.Type() == numberType && !isValidNumber(string(s)) { + return fmt.Errorf("json: invalid number literal, trying to unmarshal %q into Number", item) + } v.SetString(string(s)) case reflect.Interface: if v.NumMethod() == 0 { @@ -1027,10 +989,9 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool switch v.Kind() { default: if v.Kind() == reflect.String && v.Type() == numberType { + // s must be a valid number, because it's + // already been tokenized. v.SetString(s) - if !isValidNumber(s) { - return fmt.Errorf("json: invalid number literal, trying to unmarshal %q into Number", item) - } break } if fromQuoted { @@ -1142,7 +1103,7 @@ func (d *decodeState) objectInterface() map[string]interface{} { start := d.readIndex() d.rescanLiteral() item := d.data[start:d.readIndex()] - key, ok := unquote(item) + key, ok := d.unquote(item) if !ok { panic(phasePanicMsg) } @@ -1191,7 +1152,7 @@ func (d *decodeState) literalInterface() interface{} { return c == 't' case '"': // string - s, ok := unquote(item) + s, ok := d.unquote(item) if !ok { panic(phasePanicMsg) } @@ -1234,38 +1195,26 @@ func getu4(s []byte) rune { // unquote converts a quoted JSON string literal s into an actual string t. // The rules are different than for Go, so cannot use strconv.Unquote. -func unquote(s []byte) (t string, ok bool) { - s, ok = unquoteBytes(s) +// The first byte in s must be '"'. +func (d *decodeState) unquote(s []byte) (t string, ok bool) { + s, ok = d.unquoteBytes(s) t = string(s) return } -func unquoteBytes(s []byte) (t []byte, ok bool) { - if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' { +func (d *decodeState) unquoteBytes(s []byte) (t []byte, ok bool) { + // We already know that s[0] == '"'. However, we don't know that the + // closing quote exists in all cases, such as when the string is nested + // via the ",string" option. + if len(s) < 2 || s[len(s)-1] != '"' { return } s = s[1 : len(s)-1] - // Check for unusual characters. If there are none, - // then no unquoting is needed, so return a slice of the - // original bytes. - r := 0 - for r < len(s) { - c := s[r] - if c == '\\' || c == '"' || c < ' ' { - break - } - if c < utf8.RuneSelf { - r++ - continue - } - rr, size := utf8.DecodeRune(s[r:]) - if rr == utf8.RuneError && size == 1 { - break - } - r += size - } - if r == len(s) { + // If there are no unusual characters, no unquoting is needed, so return + // a slice of the original bytes. + r := d.safeUnquote + if r == -1 { return s, true } diff --git a/libgo/go/encoding/json/decode_test.go b/libgo/go/encoding/json/decode_test.go index 3f25893..498bd97 100644 --- a/libgo/go/encoding/json/decode_test.go +++ b/libgo/go/encoding/json/decode_test.go @@ -145,23 +145,15 @@ func (u8 *u8marshal) UnmarshalText(b []byte) error { var _ encoding.TextUnmarshaler = (*u8marshal)(nil) var ( - um0, um1 unmarshaler // target2 of unmarshaling - ump = &um1 umtrue = unmarshaler{true} umslice = []unmarshaler{{true}} - umslicep = new([]unmarshaler) umstruct = ustruct{unmarshaler{true}} - um0T, um1T unmarshalerText // target2 of unmarshaling - umpType = &um1T - umtrueXY = unmarshalerText{"x", "y"} - umsliceXY = []unmarshalerText{{"x", "y"}} - umslicepType = new([]unmarshalerText) - umstructType = new(ustructText) - umstructXY = ustructText{unmarshalerText{"x", "y"}} + umtrueXY = unmarshalerText{"x", "y"} + umsliceXY = []unmarshalerText{{"x", "y"}} + umstructXY = ustructText{unmarshalerText{"x", "y"}} - ummapType = map[unmarshalerText]bool{} - ummapXY = map[unmarshalerText]bool{{"x", "y"}: true} + ummapXY = map[unmarshalerText]bool{{"x", "y"}: true} ) // Test data structures for anonymous fields. @@ -279,9 +271,6 @@ type unexportedWithMethods struct{} func (unexportedWithMethods) F() {} -func sliceAddr(x []int) *[]int { return &x } -func mapAddr(x map[string]int) *map[string]int { return &x } - type byteWithMarshalJSON byte func (b byteWithMarshalJSON) MarshalJSON() ([]byte, error) { @@ -400,7 +389,7 @@ type mapStringToStringData struct { type unmarshalTest struct { in string - ptr interface{} + ptr interface{} // new(type) out interface{} err error useNumber bool @@ -412,6 +401,11 @@ type B struct { B bool `json:",string"` } +type DoublePtr struct { + I **int + J **int +} + var unmarshalTests = []unmarshalTest{ // basic types {in: `true`, ptr: new(bool), out: true}, @@ -459,6 +453,7 @@ var unmarshalTests = []unmarshalTest{ {in: `[1, 2, 3+]`, err: &SyntaxError{"invalid character '+' after array element", 9}}, {in: `{"X":12x}`, err: &SyntaxError{"invalid character 'x' after object key:value pair", 8}, useNumber: true}, {in: `[2, 3`, err: &SyntaxError{msg: "unexpected end of JSON input", Offset: 5}}, + {in: `{"F3": -}`, ptr: new(V), out: V{F3: Number("-")}, err: &SyntaxError{msg: "invalid character '}' in numeric literal", Offset: 9}}, // raw value errors {in: "\x01 42", err: &SyntaxError{"invalid character '\\x01' looking for beginning of value", 1}}, @@ -493,18 +488,18 @@ var unmarshalTests = []unmarshalTest{ {in: pallValueCompact, ptr: new(*All), out: &pallValue}, // unmarshal interface test - {in: `{"T":false}`, ptr: &um0, out: umtrue}, // use "false" so test will fail if custom unmarshaler is not called - {in: `{"T":false}`, ptr: &ump, out: &umtrue}, - {in: `[{"T":false}]`, ptr: &umslice, out: umslice}, - {in: `[{"T":false}]`, ptr: &umslicep, out: &umslice}, - {in: `{"M":{"T":"x:y"}}`, ptr: &umstruct, out: umstruct}, + {in: `{"T":false}`, ptr: new(unmarshaler), out: umtrue}, // use "false" so test will fail if custom unmarshaler is not called + {in: `{"T":false}`, ptr: new(*unmarshaler), out: &umtrue}, + {in: `[{"T":false}]`, ptr: new([]unmarshaler), out: umslice}, + {in: `[{"T":false}]`, ptr: new(*[]unmarshaler), out: &umslice}, + {in: `{"M":{"T":"x:y"}}`, ptr: new(ustruct), out: umstruct}, // UnmarshalText interface test - {in: `"x:y"`, ptr: &um0T, out: umtrueXY}, - {in: `"x:y"`, ptr: &umpType, out: &umtrueXY}, - {in: `["x:y"]`, ptr: &umsliceXY, out: umsliceXY}, - {in: `["x:y"]`, ptr: &umslicepType, out: &umsliceXY}, - {in: `{"M":"x:y"}`, ptr: umstructType, out: umstructXY}, + {in: `"x:y"`, ptr: new(unmarshalerText), out: umtrueXY}, + {in: `"x:y"`, ptr: new(*unmarshalerText), out: &umtrueXY}, + {in: `["x:y"]`, ptr: new([]unmarshalerText), out: umsliceXY}, + {in: `["x:y"]`, ptr: new(*[]unmarshalerText), out: &umsliceXY}, + {in: `{"M":"x:y"}`, ptr: new(ustructText), out: umstructXY}, // integer-keyed map test { @@ -579,15 +574,9 @@ var unmarshalTests = []unmarshalTest{ }, // Map keys can be encoding.TextUnmarshalers. - {in: `{"x:y":true}`, ptr: &ummapType, out: ummapXY}, + {in: `{"x:y":true}`, ptr: new(map[unmarshalerText]bool), out: ummapXY}, // If multiple values for the same key exists, only the most recent value is used. - {in: `{"x:y":false,"x:y":true}`, ptr: &ummapType, out: ummapXY}, - - // Overwriting of data. - // This is different from package xml, but it's what we've always done. - // Now documented and tested. - {in: `[2]`, ptr: sliceAddr([]int{1}), out: []int{2}}, - {in: `{"key": 2}`, ptr: mapAddr(map[string]int{"old": 0, "key": 1}), out: map[string]int{"key": 2}}, + {in: `{"x:y":false,"x:y":true}`, ptr: new(map[unmarshalerText]bool), out: ummapXY}, { in: `{ @@ -672,6 +661,11 @@ var unmarshalTests = []unmarshalTest{ err: fmt.Errorf("json: unknown field \"X\""), disallowUnknownFields: true, }, + { + in: `{"I": 0, "I": null, "J": null}`, + ptr: new(DoublePtr), + out: DoublePtr{I: nil, J: nil}, + }, // invalid UTF-8 is coerced to valid UTF-8. { @@ -713,19 +707,19 @@ var unmarshalTests = []unmarshalTest{ // Used to be issue 8305, but time.Time implements encoding.TextUnmarshaler so this works now. { in: `{"2009-11-10T23:00:00Z": "hello world"}`, - ptr: &map[time.Time]string{}, + ptr: new(map[time.Time]string), out: map[time.Time]string{time.Date(2009, 11, 10, 23, 0, 0, 0, time.UTC): "hello world"}, }, // issue 8305 { in: `{"2009-11-10T23:00:00Z": "hello world"}`, - ptr: &map[Point]string{}, + ptr: new(map[Point]string), err: &UnmarshalTypeError{Value: "object", Type: reflect.TypeOf(map[Point]string{}), Offset: 1}, }, { in: `{"asdf": "hello world"}`, - ptr: &map[unmarshaler]string{}, + ptr: new(map[unmarshaler]string), err: &UnmarshalTypeError{Value: "object", Type: reflect.TypeOf(map[unmarshaler]string{}), Offset: 1}, }, @@ -955,6 +949,37 @@ var unmarshalTests = []unmarshalTest{ Offset: 29, }, }, + // #14702 + { + in: `invalid`, + ptr: new(Number), + err: &SyntaxError{ + msg: "invalid character 'i' looking for beginning of value", + Offset: 1, + }, + }, + { + in: `"invalid"`, + ptr: new(Number), + err: fmt.Errorf("json: invalid number literal, trying to unmarshal %q into Number", `"invalid"`), + }, + { + in: `{"A":"invalid"}`, + ptr: new(struct{ A Number }), + err: fmt.Errorf("json: invalid number literal, trying to unmarshal %q into Number", `"invalid"`), + }, + { + in: `{"A":"invalid"}`, + ptr: new(struct { + A Number `json:",string"` + }), + err: fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into json.Number", `invalid`), + }, + { + in: `{"A":"invalid"}`, + ptr: new(map[string]Number), + err: fmt.Errorf("json: invalid number literal, trying to unmarshal %q into Number", `"invalid"`), + }, } func TestMarshal(t *testing.T) { @@ -1077,8 +1102,27 @@ func TestUnmarshal(t *testing.T) { continue } + typ := reflect.TypeOf(tt.ptr) + if typ.Kind() != reflect.Ptr { + t.Errorf("#%d: unmarshalTest.ptr %T is not a pointer type", i, tt.ptr) + continue + } + typ = typ.Elem() + // v = new(right-type) - v := reflect.New(reflect.TypeOf(tt.ptr).Elem()) + v := reflect.New(typ) + + if !reflect.DeepEqual(tt.ptr, v.Interface()) { + // There's no reason for ptr to point to non-zero data, + // as we decode into new(right-type), so the data is + // discarded. + // This can easily mean tests that silently don't test + // what they should. To test decoding into existing + // data, see TestPrefilled. + t.Errorf("#%d: unmarshalTest.ptr %#v is not a pointer to a zero value", i, tt.ptr) + continue + } + dec := NewDecoder(bytes.NewReader(in)) if tt.useNumber { dec.UseNumber() @@ -1741,41 +1785,6 @@ type NullTest struct { Struct struct{} } -type NullTestStrings struct { - Bool bool `json:",string"` - Int int `json:",string"` - Int8 int8 `json:",string"` - Int16 int16 `json:",string"` - Int32 int32 `json:",string"` - Int64 int64 `json:",string"` - Uint uint `json:",string"` - Uint8 uint8 `json:",string"` - Uint16 uint16 `json:",string"` - Uint32 uint32 `json:",string"` - Uint64 uint64 `json:",string"` - Float32 float32 `json:",string"` - Float64 float64 `json:",string"` - String string `json:",string"` - PBool *bool `json:",string"` - Map map[string]string `json:",string"` - Slice []string `json:",string"` - Interface interface{} `json:",string"` - - PRaw *RawMessage `json:",string"` - PTime *time.Time `json:",string"` - PBigInt *big.Int `json:",string"` - PText *MustNotUnmarshalText `json:",string"` - PBuffer *bytes.Buffer `json:",string"` - PStruct *struct{} `json:",string"` - - Raw RawMessage `json:",string"` - Time time.Time `json:",string"` - BigInt big.Int `json:",string"` - Text MustNotUnmarshalText `json:",string"` - Buffer bytes.Buffer `json:",string"` - Struct struct{} `json:",string"` -} - // JSON null values should be ignored for primitives and string values instead of resulting in an error. // Issue 2540 func TestUnmarshalNulls(t *testing.T) { @@ -2086,11 +2095,10 @@ func TestSkipArrayObjects(t *testing.T) { } } -// Test semantics of pre-filled struct fields and pre-filled map fields. -// Issue 4900. +// Test semantics of pre-filled data, such as struct fields, map elements, +// slices, and arrays. +// Issues 4900 and 8837, among others. func TestPrefilled(t *testing.T) { - ptrToMap := func(m map[string]interface{}) *map[string]interface{} { return &m } - // Values here change, cannot reuse table across runs. var prefillTests = []struct { in string @@ -2104,8 +2112,28 @@ func TestPrefilled(t *testing.T) { }, { in: `{"X": 1, "Y": 2}`, - ptr: ptrToMap(map[string]interface{}{"X": float32(3), "Y": int16(4), "Z": 1.5}), - out: ptrToMap(map[string]interface{}{"X": float64(1), "Y": float64(2), "Z": 1.5}), + ptr: &map[string]interface{}{"X": float32(3), "Y": int16(4), "Z": 1.5}, + out: &map[string]interface{}{"X": float64(1), "Y": float64(2), "Z": 1.5}, + }, + { + in: `[2]`, + ptr: &[]int{1}, + out: &[]int{2}, + }, + { + in: `[2, 3]`, + ptr: &[]int{1}, + out: &[]int{2, 3}, + }, + { + in: `[2, 3]`, + ptr: &[...]int{1}, + out: &[...]int{2}, + }, + { + in: `[3]`, + ptr: &[...]int{1, 2}, + out: &[...]int{3, 0}, }, } @@ -2323,6 +2351,41 @@ func TestUnmarshalEmbeddedUnexported(t *testing.T) { } } +func TestUnmarshalErrorAfterMultipleJSON(t *testing.T) { + tests := []struct { + in string + err error + }{{ + in: `1 false null :`, + err: &SyntaxError{"invalid character ':' looking for beginning of value", 14}, + }, { + in: `1 [] [,]`, + err: &SyntaxError{"invalid character ',' looking for beginning of value", 7}, + }, { + in: `1 [] [true:]`, + err: &SyntaxError{"invalid character ':' after array element", 11}, + }, { + in: `1 {} {"x"=}`, + err: &SyntaxError{"invalid character '=' after object key", 14}, + }, { + in: `falsetruenul#`, + err: &SyntaxError{"invalid character '#' in literal null (expecting 'l')", 13}, + }} + for i, tt := range tests { + dec := NewDecoder(strings.NewReader(tt.in)) + var err error + for { + var v interface{} + if err = dec.Decode(&v); err != nil { + break + } + } + if !reflect.DeepEqual(err, tt.err) { + t.Errorf("#%d: got %#v, want %#v", i, err, tt.err) + } + } +} + type unmarshalPanic struct{} func (unmarshalPanic) UnmarshalJSON([]byte) error { panic(0xdead) } @@ -2348,3 +2411,23 @@ func TestUnmarshalRecursivePointer(t *testing.T) { t.Fatal(err) } } + +type textUnmarshalerString string + +func (m *textUnmarshalerString) UnmarshalText(text []byte) error { + *m = textUnmarshalerString(strings.ToLower(string(text))) + return nil +} + +// Test unmarshal to a map, with map key is a user defined type. +// See golang.org/issues/34437. +func TestUnmarshalMapWithTextUnmarshalerStringKey(t *testing.T) { + var p map[textUnmarshalerString]string + if err := Unmarshal([]byte(`{"FOO": "1"}`), &p); err != nil { + t.Fatalf("Unmarshal unexpected error: %v", err) + } + + if _, ok := p["foo"]; !ok { + t.Errorf(`Key "foo" is not existed in map: %v`, p) + } +} diff --git a/libgo/go/encoding/json/encode.go b/libgo/go/encoding/json/encode.go index 6741276..39cdaeb 100644 --- a/libgo/go/encoding/json/encode.go +++ b/libgo/go/encoding/json/encode.go @@ -153,7 +153,7 @@ import ( // // JSON cannot represent cyclic data structures and Marshal does not // handle them. Passing cyclic structures to Marshal will result in -// an infinite recursion. +// an error. // func Marshal(v interface{}) ([]byte, error) { e := newEncodeState() @@ -164,7 +164,6 @@ func Marshal(v interface{}) ([]byte, error) { } buf := append([]byte(nil), e.Bytes()...) - e.Reset() encodeStatePool.Put(e) return buf, nil @@ -262,14 +261,22 @@ func (e *InvalidUTF8Error) Error() string { // A MarshalerError represents an error from calling a MarshalJSON or MarshalText method. type MarshalerError struct { - Type reflect.Type - Err error + Type reflect.Type + Err error + sourceFunc string } func (e *MarshalerError) Error() string { - return "json: error calling MarshalJSON for type " + e.Type.String() + ": " + e.Err.Error() + srcFunc := e.sourceFunc + if srcFunc == "" { + srcFunc = "MarshalJSON" + } + return "json: error calling " + srcFunc + + " for type " + e.Type.String() + + ": " + e.Err.Error() } +// Unwrap returns the underlying error. func (e *MarshalerError) Unwrap() error { return e.Err } var hex = "0123456789abcdef" @@ -278,17 +285,31 @@ var hex = "0123456789abcdef" type encodeState struct { bytes.Buffer // accumulated output scratch [64]byte + + // Keep track of what pointers we've seen in the current recursive call + // path, to avoid cycles that could lead to a stack overflow. Only do + // the relatively expensive map operations if ptrLevel is larger than + // startDetectingCyclesAfter, so that we skip the work if we're within a + // reasonable amount of nested pointers deep. + ptrLevel uint + ptrSeen map[interface{}]struct{} } +const startDetectingCyclesAfter = 1000 + var encodeStatePool sync.Pool func newEncodeState() *encodeState { if v := encodeStatePool.Get(); v != nil { e := v.(*encodeState) e.Reset() + if len(e.ptrSeen) > 0 { + panic("ptrEncoder.encode should have emptied ptrSeen via defers") + } + e.ptrLevel = 0 return e } - return new(encodeState) + return &encodeState{ptrSeen: make(map[interface{}]struct{})} } // jsonError is an error wrapper type for internal use only. @@ -392,19 +413,22 @@ var ( // newTypeEncoder constructs an encoderFunc for a type. // The returned encoder only checks CanAddr when allowAddr is true. func newTypeEncoder(t reflect.Type, allowAddr bool) encoderFunc { - if t.Implements(marshalerType) { - return marshalerEncoder - } + // If we have a non-pointer value whose type implements + // Marshaler with a value receiver, then we're better off taking + // the address of the value - otherwise we end up with an + // allocation as we cast the value to an interface. if t.Kind() != reflect.Ptr && allowAddr && reflect.PtrTo(t).Implements(marshalerType) { return newCondAddrEncoder(addrMarshalerEncoder, newTypeEncoder(t, false)) } - - if t.Implements(textMarshalerType) { - return textMarshalerEncoder + if t.Implements(marshalerType) { + return marshalerEncoder } if t.Kind() != reflect.Ptr && allowAddr && reflect.PtrTo(t).Implements(textMarshalerType) { return newCondAddrEncoder(addrTextMarshalerEncoder, newTypeEncoder(t, false)) } + if t.Implements(textMarshalerType) { + return textMarshalerEncoder + } switch t.Kind() { case reflect.Bool: @@ -456,7 +480,7 @@ func marshalerEncoder(e *encodeState, v reflect.Value, opts encOpts) { err = compact(&e.Buffer, b, opts.escapeHTML) } if err != nil { - e.error(&MarshalerError{v.Type(), err}) + e.error(&MarshalerError{v.Type(), err, "MarshalJSON"}) } } @@ -473,7 +497,7 @@ func addrMarshalerEncoder(e *encodeState, v reflect.Value, opts encOpts) { err = compact(&e.Buffer, b, opts.escapeHTML) } if err != nil { - e.error(&MarshalerError{v.Type(), err}) + e.error(&MarshalerError{v.Type(), err, "MarshalJSON"}) } } @@ -482,10 +506,14 @@ func textMarshalerEncoder(e *encodeState, v reflect.Value, opts encOpts) { e.WriteString("null") return } - m := v.Interface().(encoding.TextMarshaler) + m, ok := v.Interface().(encoding.TextMarshaler) + if !ok { + e.WriteString("null") + return + } b, err := m.MarshalText() if err != nil { - e.error(&MarshalerError{v.Type(), err}) + e.error(&MarshalerError{v.Type(), err, "MarshalText"}) } e.stringBytes(b, opts.escapeHTML) } @@ -499,7 +527,7 @@ func addrTextMarshalerEncoder(e *encodeState, v reflect.Value, opts encOpts) { m := va.Interface().(encoding.TextMarshaler) b, err := m.MarshalText() if err != nil { - e.error(&MarshalerError{v.Type(), err}) + e.error(&MarshalerError{v.Type(), err, "MarshalText"}) } e.stringBytes(b, opts.escapeHTML) } @@ -597,20 +625,86 @@ func stringEncoder(e *encodeState, v reflect.Value, opts encOpts) { if !isValidNumber(numStr) { e.error(fmt.Errorf("json: invalid number literal %q", numStr)) } + if opts.quoted { + e.WriteByte('"') + } e.WriteString(numStr) + if opts.quoted { + e.WriteByte('"') + } return } if opts.quoted { - sb, err := Marshal(v.String()) - if err != nil { - e.error(err) - } - e.string(string(sb), opts.escapeHTML) + b := make([]byte, 0, v.Len()+2) + b = append(b, '"') + b = append(b, []byte(v.String())...) + b = append(b, '"') + e.stringBytes(b, opts.escapeHTML) } else { e.string(v.String(), opts.escapeHTML) } } +// isValidNumber reports whether s is a valid JSON number literal. +func isValidNumber(s string) bool { + // This function implements the JSON numbers grammar. + // See https://tools.ietf.org/html/rfc7159#section-6 + // and https://json.org/number.gif + + if s == "" { + return false + } + + // Optional - + if s[0] == '-' { + s = s[1:] + if s == "" { + return false + } + } + + // Digits + switch { + default: + return false + + case s[0] == '0': + s = s[1:] + + case '1' <= s[0] && s[0] <= '9': + s = s[1:] + for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { + s = s[1:] + } + } + + // . followed by 1 or more digits. + if len(s) >= 2 && s[0] == '.' && '0' <= s[1] && s[1] <= '9' { + s = s[2:] + for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { + s = s[1:] + } + } + + // e or E followed by an optional - or + and + // 1 or more digits. + if len(s) >= 2 && (s[0] == 'e' || s[0] == 'E') { + s = s[1:] + if s[0] == '+' || s[0] == '-' { + s = s[1:] + if s == "" { + return false + } + } + for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { + s = s[1:] + } + } + + // Make sure we are at the end. + return s == "" +} + func interfaceEncoder(e *encodeState, v reflect.Value, opts encOpts) { if v.IsNil() { e.WriteString("null") @@ -692,7 +786,7 @@ func (me mapEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { for i, v := range keys { sv[i].v = v if err := sv[i].resolve(); err != nil { - e.error(&MarshalerError{v.Type(), err}) + e.error(fmt.Errorf("json: encoding error for type %q: %q", v.Type().String(), err.Error())) } } sort.Slice(sv, func(i, j int) bool { return sv[i].s < sv[j].s }) @@ -807,7 +901,18 @@ func (pe ptrEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { e.WriteString("null") return } + if e.ptrLevel++; e.ptrLevel > startDetectingCyclesAfter { + // We're a large number of nested ptrEncoder.encode calls deep; + // start checking if we've run into a pointer cycle. + ptr := v.Interface() + if _, ok := e.ptrSeen[ptr]; ok { + e.error(&UnsupportedValueError{v, fmt.Sprintf("encountered a cycle via %s", v.Type())}) + } + e.ptrSeen[ptr] = struct{}{} + defer delete(e.ptrSeen, ptr) + } pe.elemEnc(e, v.Elem(), opts) + e.ptrLevel-- } func newPtrEncoder(t reflect.Type) encoderFunc { @@ -872,6 +977,9 @@ func (w *reflectWithString) resolve() error { return nil } if tm, ok := w.v.Interface().(encoding.TextMarshaler); ok { + if w.v.Kind() == reflect.Ptr && w.v.IsNil() { + return nil + } buf, err := tm.MarshalText() w.s = string(buf) return err diff --git a/libgo/go/encoding/json/encode_test.go b/libgo/go/encoding/json/encode_test.go index bdf2a9f..5110c7d 100644 --- a/libgo/go/encoding/json/encode_test.go +++ b/libgo/go/encoding/json/encode_test.go @@ -6,6 +6,7 @@ package json import ( "bytes" + "encoding" "fmt" "log" "math" @@ -75,13 +76,15 @@ type StringTag struct { IntStr int64 `json:",string"` UintptrStr uintptr `json:",string"` StrStr string `json:",string"` + NumberStr Number `json:",string"` } var stringTagExpected = `{ "BoolStr": "true", "IntStr": "42", "UintptrStr": "44", - "StrStr": "\"xzbit\"" + "StrStr": "\"xzbit\"", + "NumberStr": "46" }` func TestStringTag(t *testing.T) { @@ -90,6 +93,7 @@ func TestStringTag(t *testing.T) { s.IntStr = 42 s.UintptrStr = 44 s.StrStr = "xzbit" + s.NumberStr = "46" got, err := MarshalIndent(&s, "", " ") if err != nil { t.Fatal(err) @@ -134,10 +138,45 @@ func TestEncodeRenamedByteSlice(t *testing.T) { } } +type SamePointerNoCycle struct { + Ptr1, Ptr2 *SamePointerNoCycle +} + +var samePointerNoCycle = &SamePointerNoCycle{} + +type PointerCycle struct { + Ptr *PointerCycle +} + +var pointerCycle = &PointerCycle{} + +type PointerCycleIndirect struct { + Ptrs []interface{} +} + +var pointerCycleIndirect = &PointerCycleIndirect{} + +func init() { + ptr := &SamePointerNoCycle{} + samePointerNoCycle.Ptr1 = ptr + samePointerNoCycle.Ptr2 = ptr + + pointerCycle.Ptr = pointerCycle + pointerCycleIndirect.Ptrs = []interface{}{pointerCycleIndirect} +} + +func TestSamePointerNoCycle(t *testing.T) { + if _, err := Marshal(samePointerNoCycle); err != nil { + t.Fatalf("unexpected error: %v", err) + } +} + var unsupportedValues = []interface{}{ math.NaN(), math.Inf(-1), math.Inf(1), + pointerCycle, + pointerCycleIndirect, } func TestUnsupportedValues(t *testing.T) { @@ -453,18 +492,31 @@ type BugX struct { BugB } -// Issue 16042. Even if a nil interface value is passed in -// as long as it implements MarshalJSON, it should be marshaled. -type nilMarshaler string +// golang.org/issue/16042. +// Even if a nil interface value is passed in, as long as +// it implements Marshaler, it should be marshaled. +type nilJSONMarshaler string -func (nm *nilMarshaler) MarshalJSON() ([]byte, error) { +func (nm *nilJSONMarshaler) MarshalJSON() ([]byte, error) { if nm == nil { return Marshal("0zenil0") } return Marshal("zenil:" + string(*nm)) } -// Issue 16042. +// golang.org/issue/34235. +// Even if a nil interface value is passed in, as long as +// it implements encoding.TextMarshaler, it should be marshaled. +type nilTextMarshaler string + +func (nm *nilTextMarshaler) MarshalText() ([]byte, error) { + if nm == nil { + return []byte("0zenil0"), nil + } + return []byte("zenil:" + string(*nm)), nil +} + +// See golang.org/issue/16042 and golang.org/issue/34235. func TestNilMarshal(t *testing.T) { testCases := []struct { v interface{} @@ -478,8 +530,11 @@ func TestNilMarshal(t *testing.T) { {v: []byte(nil), want: `null`}, {v: struct{ M string }{"gopher"}, want: `{"M":"gopher"}`}, {v: struct{ M Marshaler }{}, want: `{"M":null}`}, - {v: struct{ M Marshaler }{(*nilMarshaler)(nil)}, want: `{"M":"0zenil0"}`}, - {v: struct{ M interface{} }{(*nilMarshaler)(nil)}, want: `{"M":null}`}, + {v: struct{ M Marshaler }{(*nilJSONMarshaler)(nil)}, want: `{"M":"0zenil0"}`}, + {v: struct{ M interface{} }{(*nilJSONMarshaler)(nil)}, want: `{"M":null}`}, + {v: struct{ M encoding.TextMarshaler }{}, want: `{"M":null}`}, + {v: struct{ M encoding.TextMarshaler }{(*nilTextMarshaler)(nil)}, want: `{"M":"0zenil0"}`}, + {v: struct{ M interface{} }{(*nilTextMarshaler)(nil)}, want: `{"M":null}`}, } for _, tt := range testCases { @@ -793,6 +848,21 @@ func TestTextMarshalerMapKeysAreSorted(t *testing.T) { } } +// https://golang.org/issue/33675 +func TestNilMarshalerTextMapKey(t *testing.T) { + b, err := Marshal(map[*unmarshalerText]int{ + (*unmarshalerText)(nil): 1, + {"A", "B"}: 2, + }) + if err != nil { + t.Fatalf("Failed to Marshal *text.Marshaler: %v", err) + } + const want = `{"":1,"A:B":2}` + if string(b) != want { + t.Errorf("Marshal map with *text.Marshaler keys: got %#q, want %#q", b, want) + } +} + var re = regexp.MustCompile // syntactic checks on form of marshaled floating point numbers. @@ -1029,3 +1099,30 @@ func TestMarshalUncommonFieldNames(t *testing.T) { t.Fatalf("Marshal: got %s want %s", got, want) } } + +func TestMarshalerError(t *testing.T) { + s := "test variable" + st := reflect.TypeOf(s) + errText := "json: test error" + + tests := []struct { + err *MarshalerError + want string + }{ + { + &MarshalerError{st, fmt.Errorf(errText), ""}, + "json: error calling MarshalJSON for type " + st.String() + ": " + errText, + }, + { + &MarshalerError{st, fmt.Errorf(errText), "TestMarshalerError"}, + "json: error calling TestMarshalerError for type " + st.String() + ": " + errText, + }, + } + + for i, tt := range tests { + got := tt.err.Error() + if got != tt.want { + t.Errorf("MarshalerError test %d, got: %s, want: %s", i, got, tt.want) + } + } +} diff --git a/libgo/go/encoding/json/indent.go b/libgo/go/encoding/json/indent.go index fba1954..2924d3b 100644 --- a/libgo/go/encoding/json/indent.go +++ b/libgo/go/encoding/json/indent.go @@ -4,7 +4,9 @@ package json -import "bytes" +import ( + "bytes" +) // Compact appends to dst the JSON-encoded src with // insignificant space characters elided. @@ -14,8 +16,8 @@ func Compact(dst *bytes.Buffer, src []byte) error { func compact(dst *bytes.Buffer, src []byte, escape bool) error { origLen := dst.Len() - var scan scanner - scan.reset() + scan := newScanner() + defer freeScanner(scan) start := 0 for i, c := range src { if escape && (c == '<' || c == '>' || c == '&') { @@ -28,7 +30,7 @@ func compact(dst *bytes.Buffer, src []byte, escape bool) error { start = i + 1 } // Convert U+2028 and U+2029 (E2 80 A8 and E2 80 A9). - if c == 0xE2 && i+2 < len(src) && src[i+1] == 0x80 && src[i+2]&^1 == 0xA8 { + if escape && c == 0xE2 && i+2 < len(src) && src[i+1] == 0x80 && src[i+2]&^1 == 0xA8 { if start < i { dst.Write(src[start:i]) } @@ -36,7 +38,7 @@ func compact(dst *bytes.Buffer, src []byte, escape bool) error { dst.WriteByte(hex[src[i+2]&0xF]) start = i + 3 } - v := scan.step(&scan, c) + v := scan.step(scan, c) if v >= scanSkipSpace { if v == scanError { break @@ -78,13 +80,13 @@ func newline(dst *bytes.Buffer, prefix, indent string, depth int) { // if src ends in a trailing newline, so will dst. func Indent(dst *bytes.Buffer, src []byte, prefix, indent string) error { origLen := dst.Len() - var scan scanner - scan.reset() + scan := newScanner() + defer freeScanner(scan) needIndent := false depth := 0 for _, c := range src { scan.bytes++ - v := scan.step(&scan, c) + v := scan.step(scan, c) if v == scanSkipSpace { continue } diff --git a/libgo/go/encoding/json/scanner.go b/libgo/go/encoding/json/scanner.go index 8857224..552bd70 100644 --- a/libgo/go/encoding/json/scanner.go +++ b/libgo/go/encoding/json/scanner.go @@ -13,11 +13,16 @@ package json // This file starts with two simple examples using the scanner // before diving into the scanner itself. -import "strconv" +import ( + "strconv" + "sync" +) // Valid reports whether data is a valid JSON encoding. func Valid(data []byte) bool { - return checkValid(data, &scanner{}) == nil + scan := newScanner() + defer freeScanner(scan) + return checkValid(data, scan) == nil } // checkValid verifies that data is valid JSON-encoded data. @@ -45,7 +50,7 @@ type SyntaxError struct { func (e *SyntaxError) Error() string { return e.msg } // A scanner is a JSON scanning state machine. -// Callers call scan.reset() and then pass bytes in one at a time +// Callers call scan.reset and then pass bytes in one at a time // by calling scan.step(&scan, c) for each byte. // The return value, referred to as an opcode, tells the // caller about significant parsing events like beginning @@ -72,10 +77,33 @@ type scanner struct { // Error that happened, if any. err error - // total bytes consumed, updated by decoder.Decode + // total bytes consumed, updated by decoder.Decode (and deliberately + // not set to zero by scan.reset) bytes int64 } +var scannerPool = sync.Pool{ + New: func() interface{} { + return &scanner{} + }, +} + +func newScanner() *scanner { + scan := scannerPool.Get().(*scanner) + // scan.reset by design doesn't set bytes to zero + scan.bytes = 0 + scan.reset() + return scan +} + +func freeScanner(scan *scanner) { + // Avoid hanging on to too much memory in extreme cases. + if len(scan.parseState) > 1024 { + scan.parseState = nil + } + scannerPool.Put(scan) +} + // These values are returned by the state transition functions // assigned to scanner.state and the method scanner.eof. // They give details about the current state of the scan that diff --git a/libgo/go/encoding/json/scanner_test.go b/libgo/go/encoding/json/scanner_test.go index 6cdbe7d..3737516 100644 --- a/libgo/go/encoding/json/scanner_test.go +++ b/libgo/go/encoding/json/scanner_test.go @@ -48,6 +48,7 @@ var examples = []example{ {`[1,2,3]`, "[\n\t1,\n\t2,\n\t3\n]"}, {`{"x":1}`, "{\n\t\"x\": 1\n}"}, {ex1, ex1i}, + {"{\"\":\"<>&\u2028\u2029\"}", "{\n\t\"\": \"<>&\u2028\u2029\"\n}"}, // See golang.org/issue/34070 } var ex1 = `[true,false,null,"x",1,1.5,0,-5e+2]` @@ -89,8 +90,8 @@ func TestCompactSeparators(t *testing.T) { tests := []struct { in, compact string }{ - {"{\"\u2028\": 1}", `{"\u2028":1}`}, - {"{\"\u2029\" :2}", `{"\u2029":2}`}, + {"{\"\u2028\": 1}", "{\"\u2028\":1}"}, + {"{\"\u2029\" :2}", "{\"\u2029\":2}"}, } for _, tt := range tests { var buf bytes.Buffer diff --git a/libgo/go/encoding/json/stream.go b/libgo/go/encoding/json/stream.go index e291274..81f404f 100644 --- a/libgo/go/encoding/json/stream.go +++ b/libgo/go/encoding/json/stream.go @@ -56,7 +56,7 @@ func (dec *Decoder) Decode(v interface{}) error { } if !dec.tokenValueAllowed() { - return &SyntaxError{msg: "not at beginning of value", Offset: dec.offset()} + return &SyntaxError{msg: "not at beginning of value", Offset: dec.InputOffset()} } // Read whole value into buffer. @@ -102,6 +102,10 @@ Input: dec.scan.bytes++ switch dec.scan.step(&dec.scan, c) { case scanEnd: + // scanEnd is delayed one byte so we decrement + // the scanner bytes count by 1 to ensure that + // this value is correct in the next call of Decode. + dec.scan.bytes-- break Input case scanEndObject, scanEndArray: // scanEnd is delayed one byte. @@ -310,7 +314,7 @@ func (dec *Decoder) tokenPrepareForDecode() error { return err } if c != ',' { - return &SyntaxError{"expected comma after array element", dec.offset()} + return &SyntaxError{"expected comma after array element", dec.InputOffset()} } dec.scanp++ dec.tokenState = tokenArrayValue @@ -320,7 +324,7 @@ func (dec *Decoder) tokenPrepareForDecode() error { return err } if c != ':' { - return &SyntaxError{"expected colon after object key", dec.offset()} + return &SyntaxError{"expected colon after object key", dec.InputOffset()} } dec.scanp++ dec.tokenState = tokenObjectValue @@ -473,7 +477,7 @@ func (dec *Decoder) tokenError(c byte) (Token, error) { case tokenObjectComma: context = " after object key:value pair" } - return nil, &SyntaxError{"invalid character " + quoteChar(c) + context, dec.offset()} + return nil, &SyntaxError{"invalid character " + quoteChar(c) + context, dec.InputOffset()} } // More reports whether there is another element in the @@ -502,6 +506,9 @@ func (dec *Decoder) peek() (byte, error) { } } -func (dec *Decoder) offset() int64 { +// InputOffset returns the input stream byte offset of the current decoder position. +// The offset gives the location of the end of the most recently returned token +// and the beginning of the next token. +func (dec *Decoder) InputOffset() int64 { return dec.scanned + int64(dec.scanp) } diff --git a/libgo/go/encoding/json/stream_test.go b/libgo/go/encoding/json/stream_test.go index e3317dd..ebb4f23 100644 --- a/libgo/go/encoding/json/stream_test.go +++ b/libgo/go/encoding/json/stream_test.go @@ -118,6 +118,11 @@ func TestEncoderSetEscapeHTML(t *testing.T) { Ptr strPtrMarshaler }{`"<str>"`, `"<str>"`} + // https://golang.org/issue/34154 + stringOption := struct { + Bar string `json:"bar,string"` + }{`<html>foobar</html>`} + for _, tt := range []struct { name string v interface{} @@ -137,6 +142,11 @@ func TestEncoderSetEscapeHTML(t *testing.T) { `{"NonPtr":"\u003cstr\u003e","Ptr":"\u003cstr\u003e"}`, `{"NonPtr":"<str>","Ptr":"<str>"}`, }, + { + "stringOption", stringOption, + `{"bar":"\"\u003chtml\u003efoobar\u003c/html\u003e\""}`, + `{"bar":"\"<html>foobar</html>\""}`, + }, } { var buf bytes.Buffer enc := NewEncoder(&buf) diff --git a/libgo/go/encoding/xml/marshal.go b/libgo/go/encoding/xml/marshal.go index add5ece..e325f31 100644 --- a/libgo/go/encoding/xml/marshal.go +++ b/libgo/go/encoding/xml/marshal.go @@ -914,7 +914,7 @@ func (p *printer) marshalStruct(tinfo *typeInfo, val reflect.Value) error { p.WriteString("-->") continue - case fInnerXml: + case fInnerXML: vf = indirect(vf) iface := vf.Interface() switch raw := iface.(type) { diff --git a/libgo/go/encoding/xml/marshal_test.go b/libgo/go/encoding/xml/marshal_test.go index a0ccf44..8473158b 100644 --- a/libgo/go/encoding/xml/marshal_test.go +++ b/libgo/go/encoding/xml/marshal_test.go @@ -392,7 +392,6 @@ func stringptr(x string) *string { type T1 struct{} type T2 struct{} -type T3 struct{} type IndirComment struct { T1 T1 diff --git a/libgo/go/encoding/xml/read.go b/libgo/go/encoding/xml/read.go index 12102bc..10a60ee 100644 --- a/libgo/go/encoding/xml/read.go +++ b/libgo/go/encoding/xml/read.go @@ -491,7 +491,7 @@ func (d *Decoder) unmarshal(val reflect.Value, start *StartElement) error { saveAny = finfo.value(sv) } - case fInnerXml: + case fInnerXML: if !saveXML.IsValid() { saveXML = finfo.value(sv) if d.saved == nil { diff --git a/libgo/go/encoding/xml/typeinfo.go b/libgo/go/encoding/xml/typeinfo.go index 48de3d7..639952c 100644 --- a/libgo/go/encoding/xml/typeinfo.go +++ b/libgo/go/encoding/xml/typeinfo.go @@ -33,13 +33,13 @@ const ( fAttr fCDATA fCharData - fInnerXml + fInnerXML fComment fAny fOmitEmpty - fMode = fElement | fAttr | fCDATA | fCharData | fInnerXml | fComment | fAny + fMode = fElement | fAttr | fCDATA | fCharData | fInnerXML | fComment | fAny xmlName = "XMLName" ) @@ -134,7 +134,7 @@ func structFieldInfo(typ reflect.Type, f *reflect.StructField) (*fieldInfo, erro case "chardata": finfo.flags |= fCharData case "innerxml": - finfo.flags |= fInnerXml + finfo.flags |= fInnerXML case "comment": finfo.flags |= fComment case "any": @@ -149,7 +149,7 @@ func structFieldInfo(typ reflect.Type, f *reflect.StructField) (*fieldInfo, erro switch mode := finfo.flags & fMode; mode { case 0: finfo.flags |= fElement - case fAttr, fCDATA, fCharData, fInnerXml, fComment, fAny, fAny | fAttr: + case fAttr, fCDATA, fCharData, fInnerXML, fComment, fAny, fAny | fAttr: if f.Name == xmlName || tag != "" && mode != fAttr { valid = false } diff --git a/libgo/go/encoding/xml/xml.go b/libgo/go/encoding/xml/xml.go index ca05944..5e73dcf 100644 --- a/libgo/go/encoding/xml/xml.go +++ b/libgo/go/encoding/xml/xml.go @@ -286,7 +286,10 @@ func (d *Decoder) Token() (Token, error) { t = d.nextToken d.nextToken = nil } else if t, err = d.rawToken(); err != nil { - if err == io.EOF && d.stk != nil && d.stk.kind != stkEOF { + switch { + case err == io.EOF && d.t != nil: + err = nil + case err == io.EOF && d.stk != nil && d.stk.kind != stkEOF: err = d.syntaxError("unexpected EOF") } return t, err diff --git a/libgo/go/encoding/xml/xml_test.go b/libgo/go/encoding/xml/xml_test.go index ee4ffa2..efddca4 100644 --- a/libgo/go/encoding/xml/xml_test.go +++ b/libgo/go/encoding/xml/xml_test.go @@ -14,6 +14,51 @@ import ( "unicode/utf8" ) +type toks struct { + earlyEOF bool + t []Token +} + +func (t *toks) Token() (Token, error) { + if len(t.t) == 0 { + return nil, io.EOF + } + var tok Token + tok, t.t = t.t[0], t.t[1:] + if t.earlyEOF && len(t.t) == 0 { + return tok, io.EOF + } + return tok, nil +} + +func TestDecodeEOF(t *testing.T) { + start := StartElement{Name: Name{Local: "test"}} + t.Run("EarlyEOF", func(t *testing.T) { + d := NewTokenDecoder(&toks{earlyEOF: true, t: []Token{ + start, + start.End(), + }}) + err := d.Decode(&struct { + XMLName Name `xml:"test"` + }{}) + if err != nil { + t.Error(err) + } + }) + t.Run("LateEOF", func(t *testing.T) { + d := NewTokenDecoder(&toks{t: []Token{ + start, + start.End(), + }}) + err := d.Decode(&struct { + XMLName Name `xml:"test"` + }{}) + if err != nil { + t.Error(err) + } + }) +} + const testInput = ` <?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" |