aboutsummaryrefslogtreecommitdiff
path: root/libgo/go/encoding/json/decode.go
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/go/encoding/json/decode.go')
-rw-r--r--libgo/go/encoding/json/decode.go173
1 files changed, 118 insertions, 55 deletions
diff --git a/libgo/go/encoding/json/decode.go b/libgo/go/encoding/json/decode.go
index 731553d..bdd94e3 100644
--- a/libgo/go/encoding/json/decode.go
+++ b/libgo/go/encoding/json/decode.go
@@ -8,12 +8,12 @@
package json
import (
- "bytes"
"encoding"
"encoding/base64"
"fmt"
"reflect"
"strconv"
+ "strings"
"unicode"
"unicode/utf16"
"unicode/utf8"
@@ -125,7 +125,7 @@ type UnmarshalTypeError struct {
Type reflect.Type // type of Go value it could not be assigned to
Offset int64 // error occurred after reading Offset bytes
Struct string // name of the struct type containing the field
- Field string // name of the field holding the Go value
+ Field string // the full path from root node to the field
}
func (e *UnmarshalTypeError) Error() string {
@@ -266,12 +266,15 @@ type decodeState struct {
opcode int // last read result
scan scanner
errorContext struct { // provides context for type errors
- Struct reflect.Type
- Field string
+ Struct reflect.Type
+ FieldStack []string
}
savedError error
useNumber bool
disallowUnknownFields bool
+ // safeUnquote is the number of current string literal bytes that don't
+ // need to be unquoted. When negative, no bytes need unquoting.
+ safeUnquote int
}
// readIndex returns the position of the last byte read.
@@ -289,7 +292,9 @@ func (d *decodeState) init(data []byte) *decodeState {
d.off = 0
d.savedError = nil
d.errorContext.Struct = nil
- d.errorContext.Field = ""
+
+ // Reuse the allocated space for the FieldStack slice.
+ d.errorContext.FieldStack = d.errorContext.FieldStack[:0]
return d
}
@@ -303,11 +308,11 @@ func (d *decodeState) saveError(err error) {
// addErrorContext returns a new error enhanced with information from d.errorContext
func (d *decodeState) addErrorContext(err error) error {
- if d.errorContext.Struct != nil || d.errorContext.Field != "" {
+ if d.errorContext.Struct != nil || len(d.errorContext.FieldStack) > 0 {
switch err := err.(type) {
case *UnmarshalTypeError:
err.Struct = d.errorContext.Struct.Name()
- err.Field = d.errorContext.Field
+ err.Field = strings.Join(d.errorContext.FieldStack, ".")
return err
}
}
@@ -358,6 +363,66 @@ func (d *decodeState) scanWhile(op int) {
d.opcode = d.scan.eof()
}
+// rescanLiteral is similar to scanWhile(scanContinue), but it specialises the
+// common case where we're decoding a literal. The decoder scans the input
+// twice, once for syntax errors and to check the length of the value, and the
+// second to perform the decoding.
+//
+// Only in the second step do we use decodeState to tokenize literals, so we
+// know there aren't any syntax errors. We can take advantage of that knowledge,
+// and scan a literal's bytes much more quickly.
+func (d *decodeState) rescanLiteral() {
+ data, i := d.data, d.off
+Switch:
+ switch data[i-1] {
+ case '"': // string
+ // safeUnquote is initialized at -1, which means that all bytes
+ // checked so far can be unquoted at a later time with no work
+ // at all. When reaching the closing '"', if safeUnquote is
+ // still -1, all bytes can be unquoted with no work. Otherwise,
+ // only those bytes up until the first '\\' or non-ascii rune
+ // can be safely unquoted.
+ safeUnquote := -1
+ for ; i < len(data); i++ {
+ if c := data[i]; c == '\\' {
+ if safeUnquote < 0 { // first unsafe byte
+ safeUnquote = int(i - d.off)
+ }
+ i++ // escaped char
+ } else if c == '"' {
+ d.safeUnquote = safeUnquote
+ i++ // tokenize the closing quote too
+ break Switch
+ } else if c >= utf8.RuneSelf {
+ if safeUnquote < 0 { // first unsafe byte
+ safeUnquote = int(i - d.off)
+ }
+ }
+ }
+ case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-': // number
+ for ; i < len(data); i++ {
+ switch data[i] {
+ case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+ '.', 'e', 'E', '+', '-':
+ default:
+ break Switch
+ }
+ }
+ case 't': // true
+ i += len("rue")
+ case 'f': // false
+ i += len("alse")
+ case 'n': // null
+ i += len("ull")
+ }
+ if i < len(data) {
+ d.opcode = stateEndValue(&d.scan, data[i])
+ } else {
+ d.opcode = scanEnd
+ }
+ d.off = i + 1
+}
+
// value consumes a JSON value from d.data[d.off-1:], decoding into v, and
// reads the following byte ahead. If v is invalid, the value is discarded.
// The first byte of the value has been read already.
@@ -389,7 +454,7 @@ func (d *decodeState) value(v reflect.Value) error {
case scanBeginLiteral:
// All bytes inside literal return scanContinue op code.
start := d.readIndex()
- d.scanWhile(scanContinue)
+ d.rescanLiteral()
if v.IsValid() {
if err := d.literalStore(d.data[start:d.readIndex()], v, false); err != nil {
@@ -470,6 +535,14 @@ func indirect(v reflect.Value, decodingNull bool) (Unmarshaler, encoding.TextUnm
if v.Elem().Kind() != reflect.Ptr && decodingNull && v.CanSet() {
break
}
+
+ // Prevent infinite loop if v is an interface pointing to its own address:
+ // var v interface{}
+ // v = &v
+ if v.Elem().Kind() == reflect.Interface && v.Elem().Elem() == v {
+ v = v.Elem()
+ break
+ }
if v.IsNil() {
v.Set(reflect.New(v.Type().Elem()))
}
@@ -625,7 +698,7 @@ func (d *decodeState) object(v reflect.Value) error {
return nil
}
- var fields []field
+ var fields structFields
// Check type of target:
// struct or
@@ -659,7 +732,7 @@ func (d *decodeState) object(v reflect.Value) error {
}
var mapElem reflect.Value
- originalErrorContext := d.errorContext
+ origErrorContext := d.errorContext
for {
// Read opening " of string key or closing }.
@@ -674,9 +747,9 @@ func (d *decodeState) object(v reflect.Value) error {
// Read key.
start := d.readIndex()
- d.scanWhile(scanContinue)
+ d.rescanLiteral()
item := d.data[start:d.readIndex()]
- key, ok := unquoteBytes(item)
+ key, ok := d.unquoteBytes(item)
if !ok {
panic(phasePanicMsg)
}
@@ -695,14 +768,18 @@ func (d *decodeState) object(v reflect.Value) error {
subv = mapElem
} else {
var f *field
- for i := range fields {
- ff := &fields[i]
- if bytes.Equal(ff.nameBytes, key) {
- f = ff
- break
- }
- if f == nil && ff.equalFold(ff.nameBytes, key) {
- f = ff
+ if i, ok := fields.nameIndex[string(key)]; ok {
+ // Found an exact name match.
+ f = &fields.list[i]
+ } else {
+ // Fall back to the expensive case-insensitive
+ // linear search.
+ for i := range fields.list {
+ ff := &fields.list[i]
+ if ff.equalFold(ff.nameBytes, key) {
+ f = ff
+ break
+ }
}
}
if f != nil {
@@ -730,7 +807,7 @@ func (d *decodeState) object(v reflect.Value) error {
}
subv = subv.Field(i)
}
- d.errorContext.Field = f.name
+ d.errorContext.FieldStack = append(d.errorContext.FieldStack, f.name)
d.errorContext.Struct = t
} else if d.disallowUnknownFields {
d.saveError(fmt.Errorf("json: unknown field %q", key))
@@ -810,14 +887,17 @@ func (d *decodeState) object(v reflect.Value) error {
if d.opcode == scanSkipSpace {
d.scanWhile(scanSkipSpace)
}
+ // Reset errorContext to its original state.
+ // Keep the same underlying array for FieldStack, to reuse the
+ // space and avoid unnecessary allocs.
+ d.errorContext.FieldStack = d.errorContext.FieldStack[:len(origErrorContext.FieldStack)]
+ d.errorContext.Struct = origErrorContext.Struct
if d.opcode == scanEndObject {
break
}
if d.opcode != scanObjectValue {
panic(phasePanicMsg)
}
-
- d.errorContext = originalErrorContext
}
return nil
}
@@ -870,7 +950,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
d.saveError(&UnmarshalTypeError{Value: val, Type: v.Type(), Offset: int64(d.readIndex())})
return nil
}
- s, ok := unquoteBytes(item)
+ s, ok := d.unquoteBytes(item)
if !ok {
if fromQuoted {
return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())
@@ -921,7 +1001,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
}
case '"': // string
- s, ok := unquoteBytes(item)
+ s, ok := d.unquoteBytes(item)
if !ok {
if fromQuoted {
return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())
@@ -1077,9 +1157,9 @@ func (d *decodeState) objectInterface() map[string]interface{} {
// Read string key.
start := d.readIndex()
- d.scanWhile(scanContinue)
+ d.rescanLiteral()
item := d.data[start:d.readIndex()]
- key, ok := unquote(item)
+ key, ok := d.unquote(item)
if !ok {
panic(phasePanicMsg)
}
@@ -1116,7 +1196,7 @@ func (d *decodeState) objectInterface() map[string]interface{} {
func (d *decodeState) literalInterface() interface{} {
// All bytes inside literal return scanContinue op code.
start := d.readIndex()
- d.scanWhile(scanContinue)
+ d.rescanLiteral()
item := d.data[start:d.readIndex()]
@@ -1128,7 +1208,7 @@ func (d *decodeState) literalInterface() interface{} {
return c == 't'
case '"': // string
- s, ok := unquote(item)
+ s, ok := d.unquote(item)
if !ok {
panic(phasePanicMsg)
}
@@ -1171,38 +1251,21 @@ func getu4(s []byte) rune {
// unquote converts a quoted JSON string literal s into an actual string t.
// The rules are different than for Go, so cannot use strconv.Unquote.
-func unquote(s []byte) (t string, ok bool) {
- s, ok = unquoteBytes(s)
+func (d *decodeState) unquote(s []byte) (t string, ok bool) {
+ s, ok = d.unquoteBytes(s)
t = string(s)
return
}
-func unquoteBytes(s []byte) (t []byte, ok bool) {
- if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' {
- return
- }
+func (d *decodeState) unquoteBytes(s []byte) (t []byte, ok bool) {
+ r := d.safeUnquote
+ // The bytes have been scanned, so we know that the first and last bytes
+ // are double quotes.
s = s[1 : len(s)-1]
- // Check for unusual characters. If there are none,
- // then no unquoting is needed, so return a slice of the
- // original bytes.
- r := 0
- for r < len(s) {
- c := s[r]
- if c == '\\' || c == '"' || c < ' ' {
- break
- }
- if c < utf8.RuneSelf {
- r++
- continue
- }
- rr, size := utf8.DecodeRune(s[r:])
- if rr == utf8.RuneError && size == 1 {
- break
- }
- r += size
- }
- if r == len(s) {
+ // If there are no unusual characters, no unquoting is needed, so return
+ // a slice of the original bytes.
+ if r == -1 {
return s, true
}