diff options
author | Giuliano Belinassi <giuliano.belinassi@usp.br> | 2020-08-22 17:43:43 -0300 |
---|---|---|
committer | Giuliano Belinassi <giuliano.belinassi@usp.br> | 2020-08-22 17:43:43 -0300 |
commit | a926878ddbd5a98b272c22171ce58663fc04c3e0 (patch) | |
tree | 86af256e5d9a9c06263c00adc90e5fe348008c43 /libgo/go/encoding/json | |
parent | 542730f087133690b47e036dfd43eb0db8a650ce (diff) | |
parent | 07cbaed8ba7d1b6e4ab3a9f44175502a4e1ecdb1 (diff) | |
download | gcc-devel/autopar_devel.zip gcc-devel/autopar_devel.tar.gz gcc-devel/autopar_devel.tar.bz2 |
Merge branch 'autopar_rebase2' into autopar_develdevel/autopar_devel
Quickly commit changes in the rebase branch.
Diffstat (limited to 'libgo/go/encoding/json')
-rw-r--r-- | libgo/go/encoding/json/decode.go | 69 | ||||
-rw-r--r-- | libgo/go/encoding/json/decode_test.go | 145 | ||||
-rw-r--r-- | libgo/go/encoding/json/encode.go | 13 | ||||
-rw-r--r-- | libgo/go/encoding/json/encode_test.go | 87 | ||||
-rw-r--r-- | libgo/go/encoding/json/scanner.go | 31 | ||||
-rw-r--r-- | libgo/go/encoding/json/stream_test.go | 8 |
6 files changed, 264 insertions, 89 deletions
diff --git a/libgo/go/encoding/json/decode.go b/libgo/go/encoding/json/decode.go index b434846..86d8a69 100644 --- a/libgo/go/encoding/json/decode.go +++ b/libgo/go/encoding/json/decode.go @@ -213,9 +213,6 @@ type decodeState struct { savedError error useNumber bool disallowUnknownFields bool - // safeUnquote is the number of current string literal bytes that don't - // need to be unquoted. When negative, no bytes need unquoting. - safeUnquote int } // readIndex returns the position of the last byte read. @@ -317,27 +314,13 @@ func (d *decodeState) rescanLiteral() { Switch: switch data[i-1] { case '"': // string - // safeUnquote is initialized at -1, which means that all bytes - // checked so far can be unquoted at a later time with no work - // at all. When reaching the closing '"', if safeUnquote is - // still -1, all bytes can be unquoted with no work. Otherwise, - // only those bytes up until the first '\\' or non-ascii rune - // can be safely unquoted. - safeUnquote := -1 for ; i < len(data); i++ { - if c := data[i]; c == '\\' { - if safeUnquote < 0 { // first unsafe byte - safeUnquote = int(i - d.off) - } + switch data[i] { + case '\\': i++ // escaped char - } else if c == '"' { - d.safeUnquote = safeUnquote + case '"': i++ // tokenize the closing quote too break Switch - } else if c >= utf8.RuneSelf { - if safeUnquote < 0 { // first unsafe byte - safeUnquote = int(i - d.off) - } } } case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-': // number @@ -691,7 +674,7 @@ func (d *decodeState) object(v reflect.Value) error { start := d.readIndex() d.rescanLiteral() item := d.data[start:d.readIndex()] - key, ok := d.unquoteBytes(item) + key, ok := unquoteBytes(item) if !ok { panic(phasePanicMsg) } @@ -892,7 +875,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool d.saveError(&UnmarshalTypeError{Value: val, Type: v.Type(), Offset: int64(d.readIndex())}) return nil } - s, ok := d.unquoteBytes(item) + s, ok := unquoteBytes(item) if !ok { if fromQuoted { return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) @@ -943,7 +926,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool } case '"': // string - s, ok := d.unquoteBytes(item) + s, ok := unquoteBytes(item) if !ok { if fromQuoted { return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) @@ -1103,7 +1086,7 @@ func (d *decodeState) objectInterface() map[string]interface{} { start := d.readIndex() d.rescanLiteral() item := d.data[start:d.readIndex()] - key, ok := d.unquote(item) + key, ok := unquote(item) if !ok { panic(phasePanicMsg) } @@ -1152,7 +1135,7 @@ func (d *decodeState) literalInterface() interface{} { return c == 't' case '"': // string - s, ok := d.unquote(item) + s, ok := unquote(item) if !ok { panic(phasePanicMsg) } @@ -1195,26 +1178,38 @@ func getu4(s []byte) rune { // unquote converts a quoted JSON string literal s into an actual string t. // The rules are different than for Go, so cannot use strconv.Unquote. -// The first byte in s must be '"'. -func (d *decodeState) unquote(s []byte) (t string, ok bool) { - s, ok = d.unquoteBytes(s) +func unquote(s []byte) (t string, ok bool) { + s, ok = unquoteBytes(s) t = string(s) return } -func (d *decodeState) unquoteBytes(s []byte) (t []byte, ok bool) { - // We already know that s[0] == '"'. However, we don't know that the - // closing quote exists in all cases, such as when the string is nested - // via the ",string" option. - if len(s) < 2 || s[len(s)-1] != '"' { +func unquoteBytes(s []byte) (t []byte, ok bool) { + if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' { return } s = s[1 : len(s)-1] - // If there are no unusual characters, no unquoting is needed, so return - // a slice of the original bytes. - r := d.safeUnquote - if r == -1 { + // Check for unusual characters. If there are none, + // then no unquoting is needed, so return a slice of the + // original bytes. + r := 0 + for r < len(s) { + c := s[r] + if c == '\\' || c == '"' || c < ' ' { + break + } + if c < utf8.RuneSelf { + r++ + continue + } + rr, size := utf8.DecodeRune(s[r:]) + if rr == utf8.RuneError && size == 1 { + break + } + r += size + } + if r == len(s) { return s, true } diff --git a/libgo/go/encoding/json/decode_test.go b/libgo/go/encoding/json/decode_test.go index 498bd97..219e845 100644 --- a/libgo/go/encoding/json/decode_test.go +++ b/libgo/go/encoding/json/decode_test.go @@ -2419,7 +2419,7 @@ func (m *textUnmarshalerString) UnmarshalText(text []byte) error { return nil } -// Test unmarshal to a map, with map key is a user defined type. +// Test unmarshal to a map, where the map key is a user defined type. // See golang.org/issues/34437. func TestUnmarshalMapWithTextUnmarshalerStringKey(t *testing.T) { var p map[textUnmarshalerString]string @@ -2428,6 +2428,147 @@ func TestUnmarshalMapWithTextUnmarshalerStringKey(t *testing.T) { } if _, ok := p["foo"]; !ok { - t.Errorf(`Key "foo" is not existed in map: %v`, p) + t.Errorf(`Key "foo" does not exist in map: %v`, p) + } +} + +func TestUnmarshalRescanLiteralMangledUnquote(t *testing.T) { + // See golang.org/issues/38105. + var p map[textUnmarshalerString]string + if err := Unmarshal([]byte(`{"开源":"12345开源"}`), &p); err != nil { + t.Fatalf("Unmarshal unexpected error: %v", err) + } + if _, ok := p["开源"]; !ok { + t.Errorf(`Key "开源" does not exist in map: %v`, p) + } + + // See golang.org/issues/38126. + type T struct { + F1 string `json:"F1,string"` + } + t1 := T{"aaa\tbbb"} + + b, err := Marshal(t1) + if err != nil { + t.Fatalf("Marshal unexpected error: %v", err) + } + var t2 T + if err := Unmarshal(b, &t2); err != nil { + t.Fatalf("Unmarshal unexpected error: %v", err) + } + if t1 != t2 { + t.Errorf("Marshal and Unmarshal roundtrip mismatch: want %q got %q", t1, t2) + } + + // See golang.org/issues/39555. + input := map[textUnmarshalerString]string{"FOO": "", `"`: ""} + + encoded, err := Marshal(input) + if err != nil { + t.Fatalf("Marshal unexpected error: %v", err) + } + var got map[textUnmarshalerString]string + if err := Unmarshal(encoded, &got); err != nil { + t.Fatalf("Unmarshal unexpected error: %v", err) + } + want := map[textUnmarshalerString]string{"foo": "", `"`: ""} + if !reflect.DeepEqual(want, got) { + t.Fatalf("Unexpected roundtrip result:\nwant: %q\ngot: %q", want, got) + } +} + +func TestUnmarshalMaxDepth(t *testing.T) { + testcases := []struct { + name string + data string + errMaxDepth bool + }{ + { + name: "ArrayUnderMaxNestingDepth", + data: `{"a":` + strings.Repeat(`[`, 10000-1) + strings.Repeat(`]`, 10000-1) + `}`, + errMaxDepth: false, + }, + { + name: "ArrayOverMaxNestingDepth", + data: `{"a":` + strings.Repeat(`[`, 10000) + strings.Repeat(`]`, 10000) + `}`, + errMaxDepth: true, + }, + { + name: "ArrayOverStackDepth", + data: `{"a":` + strings.Repeat(`[`, 3000000) + strings.Repeat(`]`, 3000000) + `}`, + errMaxDepth: true, + }, + { + name: "ObjectUnderMaxNestingDepth", + data: `{"a":` + strings.Repeat(`{"a":`, 10000-1) + `0` + strings.Repeat(`}`, 10000-1) + `}`, + errMaxDepth: false, + }, + { + name: "ObjectOverMaxNestingDepth", + data: `{"a":` + strings.Repeat(`{"a":`, 10000) + `0` + strings.Repeat(`}`, 10000) + `}`, + errMaxDepth: true, + }, + { + name: "ObjectOverStackDepth", + data: `{"a":` + strings.Repeat(`{"a":`, 3000000) + `0` + strings.Repeat(`}`, 3000000) + `}`, + errMaxDepth: true, + }, + } + + targets := []struct { + name string + newValue func() interface{} + }{ + { + name: "unstructured", + newValue: func() interface{} { + var v interface{} + return &v + }, + }, + { + name: "typed named field", + newValue: func() interface{} { + v := struct { + A interface{} `json:"a"` + }{} + return &v + }, + }, + { + name: "typed missing field", + newValue: func() interface{} { + v := struct { + B interface{} `json:"b"` + }{} + return &v + }, + }, + { + name: "custom unmarshaler", + newValue: func() interface{} { + v := unmarshaler{} + return &v + }, + }, + } + + for _, tc := range testcases { + for _, target := range targets { + t.Run(target.name+"-"+tc.name, func(t *testing.T) { + err := Unmarshal([]byte(tc.data), target.newValue()) + if !tc.errMaxDepth { + if err != nil { + t.Errorf("unexpected error: %v", err) + } + } else { + if err == nil { + t.Errorf("expected error containing 'exceeded max depth', got none") + } else if !strings.Contains(err.Error(), "exceeded max depth") { + t.Errorf("expected error containing 'exceeded max depth', got: %v", err) + } + } + }) + } } } diff --git a/libgo/go/encoding/json/encode.go b/libgo/go/encoding/json/encode.go index 39cdaeb..578d551 100644 --- a/libgo/go/encoding/json/encode.go +++ b/libgo/go/encoding/json/encode.go @@ -635,11 +635,12 @@ func stringEncoder(e *encodeState, v reflect.Value, opts encOpts) { return } if opts.quoted { - b := make([]byte, 0, v.Len()+2) - b = append(b, '"') - b = append(b, []byte(v.String())...) - b = append(b, '"') - e.stringBytes(b, opts.escapeHTML) + e2 := newEncodeState() + // Since we encode the string twice, we only need to escape HTML + // the first time. + e2.string(v.String(), opts.escapeHTML) + e.stringBytes(e2.Bytes(), false) + encodeStatePool.Put(e2) } else { e.string(v.String(), opts.escapeHTML) } @@ -649,7 +650,7 @@ func stringEncoder(e *encodeState, v reflect.Value, opts encOpts) { func isValidNumber(s string) bool { // This function implements the JSON numbers grammar. // See https://tools.ietf.org/html/rfc7159#section-6 - // and https://json.org/number.gif + // and https://www.json.org/img/number.png if s == "" { return false diff --git a/libgo/go/encoding/json/encode_test.go b/libgo/go/encoding/json/encode_test.go index 5110c7d..7290eca 100644 --- a/libgo/go/encoding/json/encode_test.go +++ b/libgo/go/encoding/json/encode_test.go @@ -79,37 +79,66 @@ type StringTag struct { NumberStr Number `json:",string"` } -var stringTagExpected = `{ - "BoolStr": "true", - "IntStr": "42", - "UintptrStr": "44", - "StrStr": "\"xzbit\"", - "NumberStr": "46" -}` - -func TestStringTag(t *testing.T) { - var s StringTag - s.BoolStr = true - s.IntStr = 42 - s.UintptrStr = 44 - s.StrStr = "xzbit" - s.NumberStr = "46" - got, err := MarshalIndent(&s, "", " ") - if err != nil { - t.Fatal(err) - } - if got := string(got); got != stringTagExpected { - t.Fatalf(" got: %s\nwant: %s\n", got, stringTagExpected) +func TestRoundtripStringTag(t *testing.T) { + tests := []struct { + name string + in StringTag + want string // empty to just test that we roundtrip + }{ + { + name: "AllTypes", + in: StringTag{ + BoolStr: true, + IntStr: 42, + UintptrStr: 44, + StrStr: "xzbit", + NumberStr: "46", + }, + want: `{ + "BoolStr": "true", + "IntStr": "42", + "UintptrStr": "44", + "StrStr": "\"xzbit\"", + "NumberStr": "46" + }`, + }, + { + // See golang.org/issues/38173. + name: "StringDoubleEscapes", + in: StringTag{ + StrStr: "\b\f\n\r\t\"\\", + NumberStr: "0", // just to satisfy the roundtrip + }, + want: `{ + "BoolStr": "false", + "IntStr": "0", + "UintptrStr": "0", + "StrStr": "\"\\u0008\\u000c\\n\\r\\t\\\"\\\\\"", + "NumberStr": "0" + }`, + }, } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + // Indent with a tab prefix to make the multi-line string + // literals in the table nicer to read. + got, err := MarshalIndent(&test.in, "\t\t\t", "\t") + if err != nil { + t.Fatal(err) + } + if got := string(got); got != test.want { + t.Fatalf(" got: %s\nwant: %s\n", got, test.want) + } - // Verify that it round-trips. - var s2 StringTag - err = NewDecoder(bytes.NewReader(got)).Decode(&s2) - if err != nil { - t.Fatalf("Decode: %v", err) - } - if !reflect.DeepEqual(s, s2) { - t.Fatalf("decode didn't match.\nsource: %#v\nEncoded as:\n%s\ndecode: %#v", s, string(got), s2) + // Verify that it round-trips. + var s2 StringTag + if err := Unmarshal(got, &s2); err != nil { + t.Fatalf("Decode: %v", err) + } + if !reflect.DeepEqual(test.in, s2) { + t.Fatalf("decode didn't match.\nsource: %#v\nEncoded as:\n%s\ndecode: %#v", test.in, string(got), s2) + } + }) } } diff --git a/libgo/go/encoding/json/scanner.go b/libgo/go/encoding/json/scanner.go index 552bd70..9dc1903 100644 --- a/libgo/go/encoding/json/scanner.go +++ b/libgo/go/encoding/json/scanner.go @@ -139,6 +139,10 @@ const ( parseArrayValue // parsing array value ) +// This limits the max nesting depth to prevent stack overflow. +// This is permitted by https://tools.ietf.org/html/rfc7159#section-9 +const maxNestingDepth = 10000 + // reset prepares the scanner for use. // It must be called before calling s.step. func (s *scanner) reset() { @@ -168,8 +172,13 @@ func (s *scanner) eof() int { } // pushParseState pushes a new parse state p onto the parse stack. -func (s *scanner) pushParseState(p int) { - s.parseState = append(s.parseState, p) +// an error state is returned if maxNestingDepth was exceeded, otherwise successState is returned. +func (s *scanner) pushParseState(c byte, newParseState int, successState int) int { + s.parseState = append(s.parseState, newParseState) + if len(s.parseState) <= maxNestingDepth { + return successState + } + return s.error(c, "exceeded max depth") } // popParseState pops a parse state (already obtained) off the stack @@ -186,12 +195,12 @@ func (s *scanner) popParseState() { } func isSpace(c byte) bool { - return c == ' ' || c == '\t' || c == '\r' || c == '\n' + return c <= ' ' && (c == ' ' || c == '\t' || c == '\r' || c == '\n') } // stateBeginValueOrEmpty is the state after reading `[`. func stateBeginValueOrEmpty(s *scanner, c byte) int { - if c <= ' ' && isSpace(c) { + if isSpace(c) { return scanSkipSpace } if c == ']' { @@ -202,18 +211,16 @@ func stateBeginValueOrEmpty(s *scanner, c byte) int { // stateBeginValue is the state at the beginning of the input. func stateBeginValue(s *scanner, c byte) int { - if c <= ' ' && isSpace(c) { + if isSpace(c) { return scanSkipSpace } switch c { case '{': s.step = stateBeginStringOrEmpty - s.pushParseState(parseObjectKey) - return scanBeginObject + return s.pushParseState(c, parseObjectKey, scanBeginObject) case '[': s.step = stateBeginValueOrEmpty - s.pushParseState(parseArrayValue) - return scanBeginArray + return s.pushParseState(c, parseArrayValue, scanBeginArray) case '"': s.step = stateInString return scanBeginLiteral @@ -242,7 +249,7 @@ func stateBeginValue(s *scanner, c byte) int { // stateBeginStringOrEmpty is the state after reading `{`. func stateBeginStringOrEmpty(s *scanner, c byte) int { - if c <= ' ' && isSpace(c) { + if isSpace(c) { return scanSkipSpace } if c == '}' { @@ -255,7 +262,7 @@ func stateBeginStringOrEmpty(s *scanner, c byte) int { // stateBeginString is the state after reading `{"key": value,`. func stateBeginString(s *scanner, c byte) int { - if c <= ' ' && isSpace(c) { + if isSpace(c) { return scanSkipSpace } if c == '"' { @@ -275,7 +282,7 @@ func stateEndValue(s *scanner, c byte) int { s.endTop = true return stateEndTop(s, c) } - if c <= ' ' && isSpace(c) { + if isSpace(c) { s.step = stateEndValue return scanSkipSpace } diff --git a/libgo/go/encoding/json/stream_test.go b/libgo/go/encoding/json/stream_test.go index ebb4f23..c9e5334 100644 --- a/libgo/go/encoding/json/stream_test.go +++ b/libgo/go/encoding/json/stream_test.go @@ -144,14 +144,15 @@ func TestEncoderSetEscapeHTML(t *testing.T) { }, { "stringOption", stringOption, - `{"bar":"\"\u003chtml\u003efoobar\u003c/html\u003e\""}`, + `{"bar":"\"\\u003chtml\\u003efoobar\\u003c/html\\u003e\""}`, `{"bar":"\"<html>foobar</html>\""}`, }, } { var buf bytes.Buffer enc := NewEncoder(&buf) if err := enc.Encode(tt.v); err != nil { - t.Fatalf("Encode(%s): %s", tt.name, err) + t.Errorf("Encode(%s): %s", tt.name, err) + continue } if got := strings.TrimSpace(buf.String()); got != tt.wantEscape { t.Errorf("Encode(%s) = %#q, want %#q", tt.name, got, tt.wantEscape) @@ -159,7 +160,8 @@ func TestEncoderSetEscapeHTML(t *testing.T) { buf.Reset() enc.SetEscapeHTML(false) if err := enc.Encode(tt.v); err != nil { - t.Fatalf("SetEscapeHTML(false) Encode(%s): %s", tt.name, err) + t.Errorf("SetEscapeHTML(false) Encode(%s): %s", tt.name, err) + continue } if got := strings.TrimSpace(buf.String()); got != tt.want { t.Errorf("SetEscapeHTML(false) Encode(%s) = %#q, want %#q", |