diff options
Diffstat (limited to 'libgo/go/encoding/json/decode.go')
-rw-r--r-- | libgo/go/encoding/json/decode.go | 173 |
1 files changed, 118 insertions, 55 deletions
diff --git a/libgo/go/encoding/json/decode.go b/libgo/go/encoding/json/decode.go index 731553d..bdd94e3 100644 --- a/libgo/go/encoding/json/decode.go +++ b/libgo/go/encoding/json/decode.go @@ -8,12 +8,12 @@ package json import ( - "bytes" "encoding" "encoding/base64" "fmt" "reflect" "strconv" + "strings" "unicode" "unicode/utf16" "unicode/utf8" @@ -125,7 +125,7 @@ type UnmarshalTypeError struct { Type reflect.Type // type of Go value it could not be assigned to Offset int64 // error occurred after reading Offset bytes Struct string // name of the struct type containing the field - Field string // name of the field holding the Go value + Field string // the full path from root node to the field } func (e *UnmarshalTypeError) Error() string { @@ -266,12 +266,15 @@ type decodeState struct { opcode int // last read result scan scanner errorContext struct { // provides context for type errors - Struct reflect.Type - Field string + Struct reflect.Type + FieldStack []string } savedError error useNumber bool disallowUnknownFields bool + // safeUnquote is the number of current string literal bytes that don't + // need to be unquoted. When negative, no bytes need unquoting. + safeUnquote int } // readIndex returns the position of the last byte read. @@ -289,7 +292,9 @@ func (d *decodeState) init(data []byte) *decodeState { d.off = 0 d.savedError = nil d.errorContext.Struct = nil - d.errorContext.Field = "" + + // Reuse the allocated space for the FieldStack slice. + d.errorContext.FieldStack = d.errorContext.FieldStack[:0] return d } @@ -303,11 +308,11 @@ func (d *decodeState) saveError(err error) { // addErrorContext returns a new error enhanced with information from d.errorContext func (d *decodeState) addErrorContext(err error) error { - if d.errorContext.Struct != nil || d.errorContext.Field != "" { + if d.errorContext.Struct != nil || len(d.errorContext.FieldStack) > 0 { switch err := err.(type) { case *UnmarshalTypeError: err.Struct = d.errorContext.Struct.Name() - err.Field = d.errorContext.Field + err.Field = strings.Join(d.errorContext.FieldStack, ".") return err } } @@ -358,6 +363,66 @@ func (d *decodeState) scanWhile(op int) { d.opcode = d.scan.eof() } +// rescanLiteral is similar to scanWhile(scanContinue), but it specialises the +// common case where we're decoding a literal. The decoder scans the input +// twice, once for syntax errors and to check the length of the value, and the +// second to perform the decoding. +// +// Only in the second step do we use decodeState to tokenize literals, so we +// know there aren't any syntax errors. We can take advantage of that knowledge, +// and scan a literal's bytes much more quickly. +func (d *decodeState) rescanLiteral() { + data, i := d.data, d.off +Switch: + switch data[i-1] { + case '"': // string + // safeUnquote is initialized at -1, which means that all bytes + // checked so far can be unquoted at a later time with no work + // at all. When reaching the closing '"', if safeUnquote is + // still -1, all bytes can be unquoted with no work. Otherwise, + // only those bytes up until the first '\\' or non-ascii rune + // can be safely unquoted. + safeUnquote := -1 + for ; i < len(data); i++ { + if c := data[i]; c == '\\' { + if safeUnquote < 0 { // first unsafe byte + safeUnquote = int(i - d.off) + } + i++ // escaped char + } else if c == '"' { + d.safeUnquote = safeUnquote + i++ // tokenize the closing quote too + break Switch + } else if c >= utf8.RuneSelf { + if safeUnquote < 0 { // first unsafe byte + safeUnquote = int(i - d.off) + } + } + } + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-': // number + for ; i < len(data); i++ { + switch data[i] { + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + '.', 'e', 'E', '+', '-': + default: + break Switch + } + } + case 't': // true + i += len("rue") + case 'f': // false + i += len("alse") + case 'n': // null + i += len("ull") + } + if i < len(data) { + d.opcode = stateEndValue(&d.scan, data[i]) + } else { + d.opcode = scanEnd + } + d.off = i + 1 +} + // value consumes a JSON value from d.data[d.off-1:], decoding into v, and // reads the following byte ahead. If v is invalid, the value is discarded. // The first byte of the value has been read already. @@ -389,7 +454,7 @@ func (d *decodeState) value(v reflect.Value) error { case scanBeginLiteral: // All bytes inside literal return scanContinue op code. start := d.readIndex() - d.scanWhile(scanContinue) + d.rescanLiteral() if v.IsValid() { if err := d.literalStore(d.data[start:d.readIndex()], v, false); err != nil { @@ -470,6 +535,14 @@ func indirect(v reflect.Value, decodingNull bool) (Unmarshaler, encoding.TextUnm if v.Elem().Kind() != reflect.Ptr && decodingNull && v.CanSet() { break } + + // Prevent infinite loop if v is an interface pointing to its own address: + // var v interface{} + // v = &v + if v.Elem().Kind() == reflect.Interface && v.Elem().Elem() == v { + v = v.Elem() + break + } if v.IsNil() { v.Set(reflect.New(v.Type().Elem())) } @@ -625,7 +698,7 @@ func (d *decodeState) object(v reflect.Value) error { return nil } - var fields []field + var fields structFields // Check type of target: // struct or @@ -659,7 +732,7 @@ func (d *decodeState) object(v reflect.Value) error { } var mapElem reflect.Value - originalErrorContext := d.errorContext + origErrorContext := d.errorContext for { // Read opening " of string key or closing }. @@ -674,9 +747,9 @@ func (d *decodeState) object(v reflect.Value) error { // Read key. start := d.readIndex() - d.scanWhile(scanContinue) + d.rescanLiteral() item := d.data[start:d.readIndex()] - key, ok := unquoteBytes(item) + key, ok := d.unquoteBytes(item) if !ok { panic(phasePanicMsg) } @@ -695,14 +768,18 @@ func (d *decodeState) object(v reflect.Value) error { subv = mapElem } else { var f *field - for i := range fields { - ff := &fields[i] - if bytes.Equal(ff.nameBytes, key) { - f = ff - break - } - if f == nil && ff.equalFold(ff.nameBytes, key) { - f = ff + if i, ok := fields.nameIndex[string(key)]; ok { + // Found an exact name match. + f = &fields.list[i] + } else { + // Fall back to the expensive case-insensitive + // linear search. + for i := range fields.list { + ff := &fields.list[i] + if ff.equalFold(ff.nameBytes, key) { + f = ff + break + } } } if f != nil { @@ -730,7 +807,7 @@ func (d *decodeState) object(v reflect.Value) error { } subv = subv.Field(i) } - d.errorContext.Field = f.name + d.errorContext.FieldStack = append(d.errorContext.FieldStack, f.name) d.errorContext.Struct = t } else if d.disallowUnknownFields { d.saveError(fmt.Errorf("json: unknown field %q", key)) @@ -810,14 +887,17 @@ func (d *decodeState) object(v reflect.Value) error { if d.opcode == scanSkipSpace { d.scanWhile(scanSkipSpace) } + // Reset errorContext to its original state. + // Keep the same underlying array for FieldStack, to reuse the + // space and avoid unnecessary allocs. + d.errorContext.FieldStack = d.errorContext.FieldStack[:len(origErrorContext.FieldStack)] + d.errorContext.Struct = origErrorContext.Struct if d.opcode == scanEndObject { break } if d.opcode != scanObjectValue { panic(phasePanicMsg) } - - d.errorContext = originalErrorContext } return nil } @@ -870,7 +950,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool d.saveError(&UnmarshalTypeError{Value: val, Type: v.Type(), Offset: int64(d.readIndex())}) return nil } - s, ok := unquoteBytes(item) + s, ok := d.unquoteBytes(item) if !ok { if fromQuoted { return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) @@ -921,7 +1001,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool } case '"': // string - s, ok := unquoteBytes(item) + s, ok := d.unquoteBytes(item) if !ok { if fromQuoted { return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) @@ -1077,9 +1157,9 @@ func (d *decodeState) objectInterface() map[string]interface{} { // Read string key. start := d.readIndex() - d.scanWhile(scanContinue) + d.rescanLiteral() item := d.data[start:d.readIndex()] - key, ok := unquote(item) + key, ok := d.unquote(item) if !ok { panic(phasePanicMsg) } @@ -1116,7 +1196,7 @@ func (d *decodeState) objectInterface() map[string]interface{} { func (d *decodeState) literalInterface() interface{} { // All bytes inside literal return scanContinue op code. start := d.readIndex() - d.scanWhile(scanContinue) + d.rescanLiteral() item := d.data[start:d.readIndex()] @@ -1128,7 +1208,7 @@ func (d *decodeState) literalInterface() interface{} { return c == 't' case '"': // string - s, ok := unquote(item) + s, ok := d.unquote(item) if !ok { panic(phasePanicMsg) } @@ -1171,38 +1251,21 @@ func getu4(s []byte) rune { // unquote converts a quoted JSON string literal s into an actual string t. // The rules are different than for Go, so cannot use strconv.Unquote. -func unquote(s []byte) (t string, ok bool) { - s, ok = unquoteBytes(s) +func (d *decodeState) unquote(s []byte) (t string, ok bool) { + s, ok = d.unquoteBytes(s) t = string(s) return } -func unquoteBytes(s []byte) (t []byte, ok bool) { - if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' { - return - } +func (d *decodeState) unquoteBytes(s []byte) (t []byte, ok bool) { + r := d.safeUnquote + // The bytes have been scanned, so we know that the first and last bytes + // are double quotes. s = s[1 : len(s)-1] - // Check for unusual characters. If there are none, - // then no unquoting is needed, so return a slice of the - // original bytes. - r := 0 - for r < len(s) { - c := s[r] - if c == '\\' || c == '"' || c < ' ' { - break - } - if c < utf8.RuneSelf { - r++ - continue - } - rr, size := utf8.DecodeRune(s[r:]) - if rr == utf8.RuneError && size == 1 { - break - } - r += size - } - if r == len(s) { + // If there are no unusual characters, no unquoting is needed, so return + // a slice of the original bytes. + if r == -1 { return s, true } |