aboutsummaryrefslogtreecommitdiff
path: root/libgo/go/encoding
diff options
context:
space:
mode:
authorIan Lance Taylor <iant@golang.org>2021-07-30 14:28:58 -0700
committerIan Lance Taylor <iant@golang.org>2021-08-12 20:23:07 -0700
commitc5b21c3f4c17b0649155035d2f9aa97b2da8a813 (patch)
treec6d3a68b503ba5b16182acbb958e3e5dbc95a43b /libgo/go/encoding
parent72be20e20299ec57b4bc9ba03d5b7d6bf10e97cc (diff)
downloadgcc-c5b21c3f4c17b0649155035d2f9aa97b2da8a813.zip
gcc-c5b21c3f4c17b0649155035d2f9aa97b2da8a813.tar.gz
gcc-c5b21c3f4c17b0649155035d2f9aa97b2da8a813.tar.bz2
libgo: update to Go1.17rc2
Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/341629
Diffstat (limited to 'libgo/go/encoding')
-rw-r--r--libgo/go/encoding/asn1/asn1.go2
-rw-r--r--libgo/go/encoding/asn1/marshal.go2
-rw-r--r--libgo/go/encoding/asn1/marshal_test.go2
-rw-r--r--libgo/go/encoding/binary/varint.go7
-rw-r--r--libgo/go/encoding/binary/varint_test.go63
-rw-r--r--libgo/go/encoding/csv/fuzz.go1
-rw-r--r--libgo/go/encoding/csv/reader.go71
-rw-r--r--libgo/go/encoding/csv/reader_test.go909
-rw-r--r--libgo/go/encoding/gob/debug.go1
-rw-r--r--libgo/go/encoding/gob/decgen.go1
-rw-r--r--libgo/go/encoding/gob/decoder.go5
-rw-r--r--libgo/go/encoding/gob/dump.go1
-rw-r--r--libgo/go/encoding/gob/encgen.go1
-rw-r--r--libgo/go/encoding/gob/encoder_test.go25
-rw-r--r--libgo/go/encoding/json/decode.go51
-rw-r--r--libgo/go/encoding/json/encode.go41
-rw-r--r--libgo/go/encoding/json/encode_test.go21
-rw-r--r--libgo/go/encoding/json/fuzz.go1
-rw-r--r--libgo/go/encoding/xml/typeinfo.go2
-rw-r--r--libgo/go/encoding/xml/xml.go13
-rw-r--r--libgo/go/encoding/xml/xml_test.go65
21 files changed, 834 insertions, 451 deletions
diff --git a/libgo/go/encoding/asn1/asn1.go b/libgo/go/encoding/asn1/asn1.go
index f9b9cb4..cffc06d 100644
--- a/libgo/go/encoding/asn1/asn1.go
+++ b/libgo/go/encoding/asn1/asn1.go
@@ -914,7 +914,7 @@ func parseField(v reflect.Value, bytes []byte, initOffset int, params fieldParam
structType := fieldType
for i := 0; i < structType.NumField(); i++ {
- if structType.Field(i).PkgPath != "" {
+ if !structType.Field(i).IsExported() {
err = StructuralError{"struct contains unexported fields"}
return
}
diff --git a/libgo/go/encoding/asn1/marshal.go b/libgo/go/encoding/asn1/marshal.go
index 0d34d5a..5b4d786 100644
--- a/libgo/go/encoding/asn1/marshal.go
+++ b/libgo/go/encoding/asn1/marshal.go
@@ -488,7 +488,7 @@ func makeBody(value reflect.Value, params fieldParameters) (e encoder, err error
t := v.Type()
for i := 0; i < t.NumField(); i++ {
- if t.Field(i).PkgPath != "" {
+ if !t.Field(i).IsExported() {
return nil, StructuralError{"struct contains unexported fields"}
}
}
diff --git a/libgo/go/encoding/asn1/marshal_test.go b/libgo/go/encoding/asn1/marshal_test.go
index e3a7d8f..f0217ba 100644
--- a/libgo/go/encoding/asn1/marshal_test.go
+++ b/libgo/go/encoding/asn1/marshal_test.go
@@ -306,7 +306,7 @@ func TestIssue11130(t *testing.T) {
return
}
if !reflect.DeepEqual(v, v1) {
- t.Errorf("got: %#v data=%q , want : %#v data=%q\n ", v1, data1, v, data)
+ t.Errorf("got: %#v data=%q, want : %#v data=%q\n ", v1, data1, v, data)
}
}
diff --git a/libgo/go/encoding/binary/varint.go b/libgo/go/encoding/binary/varint.go
index 1fa325d..8fe20b5 100644
--- a/libgo/go/encoding/binary/varint.go
+++ b/libgo/go/encoding/binary/varint.go
@@ -61,8 +61,13 @@ func Uvarint(buf []byte) (uint64, int) {
var x uint64
var s uint
for i, b := range buf {
+ if i == MaxVarintLen64 {
+ // Catch byte reads past MaxVarintLen64.
+ // See issue https://golang.org/issues/41185
+ return 0, -(i + 1) // overflow
+ }
if b < 0x80 {
- if i >= MaxVarintLen64 || i == MaxVarintLen64-1 && b > 1 {
+ if i == MaxVarintLen64-1 && b > 1 {
return 0, -(i + 1) // overflow
}
return x | uint64(b)<<s, i + 1
diff --git a/libgo/go/encoding/binary/varint_test.go b/libgo/go/encoding/binary/varint_test.go
index 6ef4c99..d025a67 100644
--- a/libgo/go/encoding/binary/varint_test.go
+++ b/libgo/go/encoding/binary/varint_test.go
@@ -7,6 +7,7 @@ package binary
import (
"bytes"
"io"
+ "math"
"testing"
)
@@ -121,10 +122,66 @@ func TestBufferTooSmall(t *testing.T) {
}
}
+// Ensure that we catch overflows of bytes going past MaxVarintLen64.
+// See issue https://golang.org/issues/41185
+func TestBufferTooBigWithOverflow(t *testing.T) {
+ tests := []struct {
+ in []byte
+ name string
+ wantN int
+ wantValue uint64
+ }{
+ {
+ name: "invalid: 1000 bytes",
+ in: func() []byte {
+ b := make([]byte, 1000)
+ for i := range b {
+ b[i] = 0xff
+ }
+ b[999] = 0
+ return b
+ }(),
+ wantN: -11,
+ wantValue: 0,
+ },
+ {
+ name: "valid: math.MaxUint64-40",
+ in: []byte{0xd7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01},
+ wantValue: math.MaxUint64 - 40,
+ wantN: 10,
+ },
+ {
+ name: "invalid: with more than MaxVarintLen64 bytes",
+ in: []byte{0xd7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01},
+ wantN: -11,
+ wantValue: 0,
+ },
+ {
+ name: "invalid: 10th byte",
+ in: []byte{0xd7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f},
+ wantN: -10,
+ wantValue: 0,
+ },
+ }
+
+ for _, tt := range tests {
+ tt := tt
+ t.Run(tt.name, func(t *testing.T) {
+ value, n := Uvarint(tt.in)
+ if g, w := n, tt.wantN; g != w {
+ t.Errorf("bytes returned=%d, want=%d", g, w)
+ }
+ if g, w := value, tt.wantValue; g != w {
+ t.Errorf("value=%d, want=%d", g, w)
+ }
+ })
+ }
+}
+
func testOverflow(t *testing.T, buf []byte, x0 uint64, n0 int, err0 error) {
x, n := Uvarint(buf)
if x != 0 || n != n0 {
- t.Errorf("Uvarint(%v): got x = %d, n = %d; want 0, %d", buf, x, n, n0)
+ t.Errorf("Uvarint(% X): got x = %d, n = %d; want 0, %d", buf, x, n, n0)
}
r := bytes.NewReader(buf)
@@ -140,8 +197,8 @@ func testOverflow(t *testing.T, buf []byte, x0 uint64, n0 int, err0 error) {
func TestOverflow(t *testing.T) {
testOverflow(t, []byte{0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x2}, 0, -10, overflow)
- testOverflow(t, []byte{0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x1, 0, 0}, 0, -13, overflow)
- testOverflow(t, []byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}, 1<<64-1, 0, overflow) // 11 bytes, should overflow
+ testOverflow(t, []byte{0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x1, 0, 0}, 0, -11, overflow)
+ testOverflow(t, []byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}, 1<<64-1, -11, overflow) // 11 bytes, should overflow
}
func TestNonCanonicalZero(t *testing.T) {
diff --git a/libgo/go/encoding/csv/fuzz.go b/libgo/go/encoding/csv/fuzz.go
index 8be21d5..a03fa83 100644
--- a/libgo/go/encoding/csv/fuzz.go
+++ b/libgo/go/encoding/csv/fuzz.go
@@ -2,6 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+//go:build gofuzz
// +build gofuzz
package csv
diff --git a/libgo/go/encoding/csv/reader.go b/libgo/go/encoding/csv/reader.go
index c40aa50..f860f4f 100644
--- a/libgo/go/encoding/csv/reader.go
+++ b/libgo/go/encoding/csv/reader.go
@@ -66,7 +66,7 @@ import (
type ParseError struct {
StartLine int // Line where the record starts
Line int // Line where the error occurred
- Column int // Column (rune index) where the error occurred
+ Column int // Column (1-based byte index) where the error occurred
Err error // The actual error
}
@@ -162,6 +162,10 @@ type Reader struct {
// The i'th field ends at offset fieldIndexes[i] in recordBuffer.
fieldIndexes []int
+ // fieldPositions is an index of field positions for the
+ // last record returned by Read.
+ fieldPositions []position
+
// lastRecord is a record cache and only used when ReuseRecord == true.
lastRecord []string
}
@@ -192,6 +196,25 @@ func (r *Reader) Read() (record []string, err error) {
return record, err
}
+// FieldPos returns the line and column corresponding to
+// the start of the field with the given index in the slice most recently
+// returned by Read. Numbering of lines and columns starts at 1;
+// columns are counted in bytes, not runes.
+//
+// If this is called with an out-of-bounds index, it panics.
+func (r *Reader) FieldPos(field int) (line, column int) {
+ if field < 0 || field >= len(r.fieldPositions) {
+ panic("out of range index passed to FieldPos")
+ }
+ p := &r.fieldPositions[field]
+ return p.line, p.col
+}
+
+// pos holds the position of a field in the current line.
+type position struct {
+ line, col int
+}
+
// ReadAll reads all the remaining records from r.
// Each record is a slice of fields.
// A successful call returns err == nil, not err == io.EOF. Because ReadAll is
@@ -260,7 +283,7 @@ func (r *Reader) readRecord(dst []string) ([]string, error) {
}
// Read line (automatically skipping past empty lines and any comments).
- var line, fullLine []byte
+ var line []byte
var errRead error
for errRead == nil {
line, errRead = r.readLine()
@@ -272,7 +295,6 @@ func (r *Reader) readRecord(dst []string) ([]string, error) {
line = nil
continue // Skip empty lines
}
- fullLine = line
break
}
if errRead == io.EOF {
@@ -286,10 +308,20 @@ func (r *Reader) readRecord(dst []string) ([]string, error) {
recLine := r.numLine // Starting line for record
r.recordBuffer = r.recordBuffer[:0]
r.fieldIndexes = r.fieldIndexes[:0]
+ r.fieldPositions = r.fieldPositions[:0]
+ pos := position{line: r.numLine, col: 1}
parseField:
for {
if r.TrimLeadingSpace {
- line = bytes.TrimLeftFunc(line, unicode.IsSpace)
+ i := bytes.IndexFunc(line, func(r rune) bool {
+ return !unicode.IsSpace(r)
+ })
+ if i < 0 {
+ i = len(line)
+ pos.col -= lengthNL(line)
+ }
+ line = line[i:]
+ pos.col += i
}
if len(line) == 0 || line[0] != '"' {
// Non-quoted string field
@@ -303,48 +335,56 @@ parseField:
// Check to make sure a quote does not appear in field.
if !r.LazyQuotes {
if j := bytes.IndexByte(field, '"'); j >= 0 {
- col := utf8.RuneCount(fullLine[:len(fullLine)-len(line[j:])])
+ col := pos.col + j
err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrBareQuote}
break parseField
}
}
r.recordBuffer = append(r.recordBuffer, field...)
r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
+ r.fieldPositions = append(r.fieldPositions, pos)
if i >= 0 {
line = line[i+commaLen:]
+ pos.col += i + commaLen
continue parseField
}
break parseField
} else {
// Quoted string field
+ fieldPos := pos
line = line[quoteLen:]
+ pos.col += quoteLen
for {
i := bytes.IndexByte(line, '"')
if i >= 0 {
// Hit next quote.
r.recordBuffer = append(r.recordBuffer, line[:i]...)
line = line[i+quoteLen:]
+ pos.col += i + quoteLen
switch rn := nextRune(line); {
case rn == '"':
// `""` sequence (append quote).
r.recordBuffer = append(r.recordBuffer, '"')
line = line[quoteLen:]
+ pos.col += quoteLen
case rn == r.Comma:
// `",` sequence (end of field).
line = line[commaLen:]
+ pos.col += commaLen
r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
+ r.fieldPositions = append(r.fieldPositions, fieldPos)
continue parseField
case lengthNL(line) == len(line):
// `"\n` sequence (end of line).
r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
+ r.fieldPositions = append(r.fieldPositions, fieldPos)
break parseField
case r.LazyQuotes:
// `"` sequence (bare quote).
r.recordBuffer = append(r.recordBuffer, '"')
default:
// `"*` sequence (invalid non-escaped quote).
- col := utf8.RuneCount(fullLine[:len(fullLine)-len(line)-quoteLen])
- err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrQuote}
+ err = &ParseError{StartLine: recLine, Line: r.numLine, Column: pos.col - quoteLen, Err: ErrQuote}
break parseField
}
} else if len(line) > 0 {
@@ -353,19 +393,23 @@ parseField:
if errRead != nil {
break parseField
}
+ pos.col += len(line)
line, errRead = r.readLine()
+ if len(line) > 0 {
+ pos.line++
+ pos.col = 1
+ }
if errRead == io.EOF {
errRead = nil
}
- fullLine = line
} else {
// Abrupt end of file (EOF or error).
if !r.LazyQuotes && errRead == nil {
- col := utf8.RuneCount(fullLine)
- err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrQuote}
+ err = &ParseError{StartLine: recLine, Line: pos.line, Column: pos.col, Err: ErrQuote}
break parseField
}
r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
+ r.fieldPositions = append(r.fieldPositions, fieldPos)
break parseField
}
}
@@ -392,7 +436,12 @@ parseField:
// Check or update the expected fields per record.
if r.FieldsPerRecord > 0 {
if len(dst) != r.FieldsPerRecord && err == nil {
- err = &ParseError{StartLine: recLine, Line: recLine, Err: ErrFieldCount}
+ err = &ParseError{
+ StartLine: recLine,
+ Line: recLine,
+ Column: 1,
+ Err: ErrFieldCount,
+ }
}
} else if r.FieldsPerRecord == 0 {
r.FieldsPerRecord = len(dst)
diff --git a/libgo/go/encoding/csv/reader_test.go b/libgo/go/encoding/csv/reader_test.go
index 5121791..abe3fdf 100644
--- a/libgo/go/encoding/csv/reader_test.go
+++ b/libgo/go/encoding/csv/reader_test.go
@@ -5,6 +5,8 @@
package csv
import (
+ "errors"
+ "fmt"
"io"
"reflect"
"strings"
@@ -12,405 +14,544 @@ import (
"unicode/utf8"
)
-func TestRead(t *testing.T) {
- tests := []struct {
- Name string
- Input string
- Output [][]string
- Error error
+type readTest struct {
+ Name string
+ Input string
+ Output [][]string
+ Positions [][][2]int
+ Errors []error
+
+ // These fields are copied into the Reader
+ Comma rune
+ Comment rune
+ UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1
+ FieldsPerRecord int
+ LazyQuotes bool
+ TrimLeadingSpace bool
+ ReuseRecord bool
+}
- // These fields are copied into the Reader
- Comma rune
- Comment rune
- UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1
- FieldsPerRecord int
- LazyQuotes bool
- TrimLeadingSpace bool
- ReuseRecord bool
- }{{
- Name: "Simple",
- Input: "a,b,c\n",
- Output: [][]string{{"a", "b", "c"}},
- }, {
- Name: "CRLF",
- Input: "a,b\r\nc,d\r\n",
- Output: [][]string{{"a", "b"}, {"c", "d"}},
- }, {
- Name: "BareCR",
- Input: "a,b\rc,d\r\n",
- Output: [][]string{{"a", "b\rc", "d"}},
- }, {
- Name: "RFC4180test",
- Input: `#field1,field2,field3
-"aaa","bb
-b","ccc"
-"a,a","b""bb","ccc"
-zzz,yyy,xxx
+// In these tests, the §, ¶ and ∑ characters in readTest.Input are used to denote
+// the start of a field, a record boundary and the position of an error respectively.
+// They are removed before parsing and are used to verify the position
+// information reported by FieldPos.
+
+var readTests = []readTest{{
+ Name: "Simple",
+ Input: "§a,§b,§c\n",
+ Output: [][]string{{"a", "b", "c"}},
+}, {
+ Name: "CRLF",
+ Input: "§a,§b\r\n¶§c,§d\r\n",
+ Output: [][]string{{"a", "b"}, {"c", "d"}},
+}, {
+ Name: "BareCR",
+ Input: "§a,§b\rc,§d\r\n",
+ Output: [][]string{{"a", "b\rc", "d"}},
+}, {
+ Name: "RFC4180test",
+ Input: `§#field1,§field2,§field3
+¶§"aaa",§"bb
+b",§"ccc"
+¶§"a,a",§"b""bb",§"ccc"
+¶§zzz,§yyy,§xxx
`,
- Output: [][]string{
- {"#field1", "field2", "field3"},
- {"aaa", "bb\nb", "ccc"},
- {"a,a", `b"bb`, "ccc"},
- {"zzz", "yyy", "xxx"},
- },
- UseFieldsPerRecord: true,
- FieldsPerRecord: 0,
- }, {
- Name: "NoEOLTest",
- Input: "a,b,c",
- Output: [][]string{{"a", "b", "c"}},
- }, {
- Name: "Semicolon",
- Input: "a;b;c\n",
- Output: [][]string{{"a", "b", "c"}},
- Comma: ';',
- }, {
- Name: "MultiLine",
- Input: `"two
-line","one line","three
+ Output: [][]string{
+ {"#field1", "field2", "field3"},
+ {"aaa", "bb\nb", "ccc"},
+ {"a,a", `b"bb`, "ccc"},
+ {"zzz", "yyy", "xxx"},
+ },
+ UseFieldsPerRecord: true,
+ FieldsPerRecord: 0,
+}, {
+ Name: "NoEOLTest",
+ Input: "§a,§b,§c",
+ Output: [][]string{{"a", "b", "c"}},
+}, {
+ Name: "Semicolon",
+ Input: "§a;§b;§c\n",
+ Output: [][]string{{"a", "b", "c"}},
+ Comma: ';',
+}, {
+ Name: "MultiLine",
+ Input: `§"two
+line",§"one line",§"three
line
field"`,
- Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}},
- }, {
- Name: "BlankLine",
- Input: "a,b,c\n\nd,e,f\n\n",
- Output: [][]string{
- {"a", "b", "c"},
- {"d", "e", "f"},
- },
- }, {
- Name: "BlankLineFieldCount",
- Input: "a,b,c\n\nd,e,f\n\n",
- Output: [][]string{
- {"a", "b", "c"},
- {"d", "e", "f"},
- },
- UseFieldsPerRecord: true,
- FieldsPerRecord: 0,
- }, {
- Name: "TrimSpace",
- Input: " a, b, c\n",
- Output: [][]string{{"a", "b", "c"}},
- TrimLeadingSpace: true,
- }, {
- Name: "LeadingSpace",
- Input: " a, b, c\n",
- Output: [][]string{{" a", " b", " c"}},
- }, {
- Name: "Comment",
- Input: "#1,2,3\na,b,c\n#comment",
- Output: [][]string{{"a", "b", "c"}},
- Comment: '#',
- }, {
- Name: "NoComment",
- Input: "#1,2,3\na,b,c",
- Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}},
- }, {
- Name: "LazyQuotes",
- Input: `a "word","1"2",a","b`,
- Output: [][]string{{`a "word"`, `1"2`, `a"`, `b`}},
- LazyQuotes: true,
- }, {
- Name: "BareQuotes",
- Input: `a "word","1"2",a"`,
- Output: [][]string{{`a "word"`, `1"2`, `a"`}},
- LazyQuotes: true,
- }, {
- Name: "BareDoubleQuotes",
- Input: `a""b,c`,
- Output: [][]string{{`a""b`, `c`}},
- LazyQuotes: true,
- }, {
- Name: "BadDoubleQuotes",
- Input: `a""b,c`,
- Error: &ParseError{StartLine: 1, Line: 1, Column: 1, Err: ErrBareQuote},
- }, {
- Name: "TrimQuote",
- Input: ` "a"," b",c`,
- Output: [][]string{{"a", " b", "c"}},
- TrimLeadingSpace: true,
- }, {
- Name: "BadBareQuote",
- Input: `a "word","b"`,
- Error: &ParseError{StartLine: 1, Line: 1, Column: 2, Err: ErrBareQuote},
- }, {
- Name: "BadTrailingQuote",
- Input: `"a word",b"`,
- Error: &ParseError{StartLine: 1, Line: 1, Column: 10, Err: ErrBareQuote},
- }, {
- Name: "ExtraneousQuote",
- Input: `"a "word","b"`,
- Error: &ParseError{StartLine: 1, Line: 1, Column: 3, Err: ErrQuote},
- }, {
- Name: "BadFieldCount",
- Input: "a,b,c\nd,e",
- Error: &ParseError{StartLine: 2, Line: 2, Err: ErrFieldCount},
- UseFieldsPerRecord: true,
- FieldsPerRecord: 0,
- }, {
- Name: "BadFieldCount1",
- Input: `a,b,c`,
- Error: &ParseError{StartLine: 1, Line: 1, Err: ErrFieldCount},
- UseFieldsPerRecord: true,
- FieldsPerRecord: 2,
- }, {
- Name: "FieldCount",
- Input: "a,b,c\nd,e",
- Output: [][]string{{"a", "b", "c"}, {"d", "e"}},
- }, {
- Name: "TrailingCommaEOF",
- Input: "a,b,c,",
- Output: [][]string{{"a", "b", "c", ""}},
- }, {
- Name: "TrailingCommaEOL",
- Input: "a,b,c,\n",
- Output: [][]string{{"a", "b", "c", ""}},
- }, {
- Name: "TrailingCommaSpaceEOF",
- Input: "a,b,c, ",
- Output: [][]string{{"a", "b", "c", ""}},
- TrimLeadingSpace: true,
- }, {
- Name: "TrailingCommaSpaceEOL",
- Input: "a,b,c, \n",
- Output: [][]string{{"a", "b", "c", ""}},
- TrimLeadingSpace: true,
- }, {
- Name: "TrailingCommaLine3",
- Input: "a,b,c\nd,e,f\ng,hi,",
- Output: [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}},
- TrimLeadingSpace: true,
- }, {
- Name: "NotTrailingComma3",
- Input: "a,b,c, \n",
- Output: [][]string{{"a", "b", "c", " "}},
- }, {
- Name: "CommaFieldTest",
- Input: `x,y,z,w
-x,y,z,
-x,y,,
-x,,,
-,,,
-"x","y","z","w"
-"x","y","z",""
-"x","y","",""
-"x","","",""
-"","","",""
+ Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}},
+}, {
+ Name: "BlankLine",
+ Input: "§a,§b,§c\n\n¶§d,§e,§f\n\n",
+ Output: [][]string{
+ {"a", "b", "c"},
+ {"d", "e", "f"},
+ },
+}, {
+ Name: "BlankLineFieldCount",
+ Input: "§a,§b,§c\n\n¶§d,§e,§f\n\n",
+ Output: [][]string{
+ {"a", "b", "c"},
+ {"d", "e", "f"},
+ },
+ UseFieldsPerRecord: true,
+ FieldsPerRecord: 0,
+}, {
+ Name: "TrimSpace",
+ Input: " §a, §b, §c\n",
+ Output: [][]string{{"a", "b", "c"}},
+ TrimLeadingSpace: true,
+}, {
+ Name: "LeadingSpace",
+ Input: "§ a,§ b,§ c\n",
+ Output: [][]string{{" a", " b", " c"}},
+}, {
+ Name: "Comment",
+ Input: "#1,2,3\n§a,§b,§c\n#comment",
+ Output: [][]string{{"a", "b", "c"}},
+ Comment: '#',
+}, {
+ Name: "NoComment",
+ Input: "§#1,§2,§3\n¶§a,§b,§c",
+ Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}},
+}, {
+ Name: "LazyQuotes",
+ Input: `§a "word",§"1"2",§a",§"b`,
+ Output: [][]string{{`a "word"`, `1"2`, `a"`, `b`}},
+ LazyQuotes: true,
+}, {
+ Name: "BareQuotes",
+ Input: `§a "word",§"1"2",§a"`,
+ Output: [][]string{{`a "word"`, `1"2`, `a"`}},
+ LazyQuotes: true,
+}, {
+ Name: "BareDoubleQuotes",
+ Input: `§a""b,§c`,
+ Output: [][]string{{`a""b`, `c`}},
+ LazyQuotes: true,
+}, {
+ Name: "BadDoubleQuotes",
+ Input: `§a∑""b,c`,
+ Errors: []error{&ParseError{Err: ErrBareQuote}},
+}, {
+ Name: "TrimQuote",
+ Input: ` §"a",§" b",§c`,
+ Output: [][]string{{"a", " b", "c"}},
+ TrimLeadingSpace: true,
+}, {
+ Name: "BadBareQuote",
+ Input: `§a ∑"word","b"`,
+ Errors: []error{&ParseError{Err: ErrBareQuote}},
+}, {
+ Name: "BadTrailingQuote",
+ Input: `§"a word",b∑"`,
+ Errors: []error{&ParseError{Err: ErrBareQuote}},
+}, {
+ Name: "ExtraneousQuote",
+ Input: `§"a ∑"word","b"`,
+ Errors: []error{&ParseError{Err: ErrQuote}},
+}, {
+ Name: "BadFieldCount",
+ Input: "§a,§b,§c\n¶∑§d,§e",
+ Errors: []error{nil, &ParseError{Err: ErrFieldCount}},
+ Output: [][]string{{"a", "b", "c"}, {"d", "e"}},
+ UseFieldsPerRecord: true,
+ FieldsPerRecord: 0,
+}, {
+ Name: "BadFieldCountMultiple",
+ Input: "§a,§b,§c\n¶∑§d,§e\n¶∑§f",
+ Errors: []error{nil, &ParseError{Err: ErrFieldCount}, &ParseError{Err: ErrFieldCount}},
+ Output: [][]string{{"a", "b", "c"}, {"d", "e"}, {"f"}},
+ UseFieldsPerRecord: true,
+ FieldsPerRecord: 0,
+}, {
+ Name: "BadFieldCount1",
+ Input: `§∑a,§b,§c`,
+ Errors: []error{&ParseError{Err: ErrFieldCount}},
+ Output: [][]string{{"a", "b", "c"}},
+ UseFieldsPerRecord: true,
+ FieldsPerRecord: 2,
+}, {
+ Name: "FieldCount",
+ Input: "§a,§b,§c\n¶§d,§e",
+ Output: [][]string{{"a", "b", "c"}, {"d", "e"}},
+}, {
+ Name: "TrailingCommaEOF",
+ Input: "§a,§b,§c,§",
+ Output: [][]string{{"a", "b", "c", ""}},
+}, {
+ Name: "TrailingCommaEOL",
+ Input: "§a,§b,§c,§\n",
+ Output: [][]string{{"a", "b", "c", ""}},
+}, {
+ Name: "TrailingCommaSpaceEOF",
+ Input: "§a,§b,§c, §",
+ Output: [][]string{{"a", "b", "c", ""}},
+ TrimLeadingSpace: true,
+}, {
+ Name: "TrailingCommaSpaceEOL",
+ Input: "§a,§b,§c, §\n",
+ Output: [][]string{{"a", "b", "c", ""}},
+ TrimLeadingSpace: true,
+}, {
+ Name: "TrailingCommaLine3",
+ Input: "§a,§b,§c\n¶§d,§e,§f\n¶§g,§hi,§",
+ Output: [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}},
+ TrimLeadingSpace: true,
+}, {
+ Name: "NotTrailingComma3",
+ Input: "§a,§b,§c,§ \n",
+ Output: [][]string{{"a", "b", "c", " "}},
+}, {
+ Name: "CommaFieldTest",
+ Input: `§x,§y,§z,§w
+¶§x,§y,§z,§
+¶§x,§y,§,§
+¶§x,§,§,§
+¶§,§,§,§
+¶§"x",§"y",§"z",§"w"
+¶§"x",§"y",§"z",§""
+¶§"x",§"y",§"",§""
+¶§"x",§"",§"",§""
+¶§"",§"",§"",§""
`,
- Output: [][]string{
- {"x", "y", "z", "w"},
- {"x", "y", "z", ""},
- {"x", "y", "", ""},
- {"x", "", "", ""},
- {"", "", "", ""},
- {"x", "y", "z", "w"},
- {"x", "y", "z", ""},
- {"x", "y", "", ""},
- {"x", "", "", ""},
- {"", "", "", ""},
- },
- }, {
- Name: "TrailingCommaIneffective1",
- Input: "a,b,\nc,d,e",
- Output: [][]string{
- {"a", "b", ""},
- {"c", "d", "e"},
- },
- TrimLeadingSpace: true,
- }, {
- Name: "ReadAllReuseRecord",
- Input: "a,b\nc,d",
- Output: [][]string{
- {"a", "b"},
- {"c", "d"},
- },
- ReuseRecord: true,
- }, {
- Name: "StartLine1", // Issue 19019
- Input: "a,\"b\nc\"d,e",
- Error: &ParseError{StartLine: 1, Line: 2, Column: 1, Err: ErrQuote},
- }, {
- Name: "StartLine2",
- Input: "a,b\n\"d\n\n,e",
- Error: &ParseError{StartLine: 2, Line: 5, Column: 0, Err: ErrQuote},
- }, {
- Name: "CRLFInQuotedField", // Issue 21201
- Input: "A,\"Hello\r\nHi\",B\r\n",
- Output: [][]string{
- {"A", "Hello\nHi", "B"},
- },
- }, {
- Name: "BinaryBlobField", // Issue 19410
- Input: "x09\x41\xb4\x1c,aktau",
- Output: [][]string{{"x09A\xb4\x1c", "aktau"}},
- }, {
- Name: "TrailingCR",
- Input: "field1,field2\r",
- Output: [][]string{{"field1", "field2"}},
- }, {
- Name: "QuotedTrailingCR",
- Input: "\"field\"\r",
- Output: [][]string{{"field"}},
- }, {
- Name: "QuotedTrailingCRCR",
- Input: "\"field\"\r\r",
- Error: &ParseError{StartLine: 1, Line: 1, Column: 6, Err: ErrQuote},
- }, {
- Name: "FieldCR",
- Input: "field\rfield\r",
- Output: [][]string{{"field\rfield"}},
- }, {
- Name: "FieldCRCR",
- Input: "field\r\rfield\r\r",
- Output: [][]string{{"field\r\rfield\r"}},
- }, {
- Name: "FieldCRCRLF",
- Input: "field\r\r\nfield\r\r\n",
- Output: [][]string{{"field\r"}, {"field\r"}},
- }, {
- Name: "FieldCRCRLFCR",
- Input: "field\r\r\n\rfield\r\r\n\r",
- Output: [][]string{{"field\r"}, {"\rfield\r"}},
- }, {
- Name: "FieldCRCRLFCRCR",
- Input: "field\r\r\n\r\rfield\r\r\n\r\r",
- Output: [][]string{{"field\r"}, {"\r\rfield\r"}, {"\r"}},
- }, {
- Name: "MultiFieldCRCRLFCRCR",
- Input: "field1,field2\r\r\n\r\rfield1,field2\r\r\n\r\r,",
- Output: [][]string{
- {"field1", "field2\r"},
- {"\r\rfield1", "field2\r"},
- {"\r\r", ""},
- },
- }, {
- Name: "NonASCIICommaAndComment",
- Input: "a£b,c£ \td,e\n€ comment\n",
- Output: [][]string{{"a", "b,c", "d,e"}},
- TrimLeadingSpace: true,
- Comma: '£',
- Comment: '€',
- }, {
- Name: "NonASCIICommaAndCommentWithQuotes",
- Input: "a€\" b,\"€ c\nλ comment\n",
- Output: [][]string{{"a", " b,", " c"}},
- Comma: '€',
- Comment: 'λ',
- }, {
- // λ and θ start with the same byte.
- // This tests that the parser doesn't confuse such characters.
- Name: "NonASCIICommaConfusion",
- Input: "\"abθcd\"λefθgh",
- Output: [][]string{{"abθcd", "efθgh"}},
- Comma: 'λ',
- Comment: '€',
- }, {
- Name: "NonASCIICommentConfusion",
- Input: "λ\nλ\nθ\nλ\n",
- Output: [][]string{{"λ"}, {"λ"}, {"λ"}},
- Comment: 'θ',
- }, {
- Name: "QuotedFieldMultipleLF",
- Input: "\"\n\n\n\n\"",
- Output: [][]string{{"\n\n\n\n"}},
- }, {
- Name: "MultipleCRLF",
- Input: "\r\n\r\n\r\n\r\n",
- }, {
- // The implementation may read each line in several chunks if it doesn't fit entirely
- // in the read buffer, so we should test the code to handle that condition.
- Name: "HugeLines",
- Input: strings.Repeat("#ignore\n", 10000) + strings.Repeat("@", 5000) + "," + strings.Repeat("*", 5000),
- Output: [][]string{{strings.Repeat("@", 5000), strings.Repeat("*", 5000)}},
- Comment: '#',
- }, {
- Name: "QuoteWithTrailingCRLF",
- Input: "\"foo\"bar\"\r\n",
- Error: &ParseError{StartLine: 1, Line: 1, Column: 4, Err: ErrQuote},
- }, {
- Name: "LazyQuoteWithTrailingCRLF",
- Input: "\"foo\"bar\"\r\n",
- Output: [][]string{{`foo"bar`}},
- LazyQuotes: true,
- }, {
- Name: "DoubleQuoteWithTrailingCRLF",
- Input: "\"foo\"\"bar\"\r\n",
- Output: [][]string{{`foo"bar`}},
- }, {
- Name: "EvenQuotes",
- Input: `""""""""`,
- Output: [][]string{{`"""`}},
- }, {
- Name: "OddQuotes",
- Input: `"""""""`,
- Error: &ParseError{StartLine: 1, Line: 1, Column: 7, Err: ErrQuote},
- }, {
- Name: "LazyOddQuotes",
- Input: `"""""""`,
- Output: [][]string{{`"""`}},
- LazyQuotes: true,
- }, {
- Name: "BadComma1",
- Comma: '\n',
- Error: errInvalidDelim,
- }, {
- Name: "BadComma2",
- Comma: '\r',
- Error: errInvalidDelim,
- }, {
- Name: "BadComma3",
- Comma: '"',
- Error: errInvalidDelim,
- }, {
- Name: "BadComma4",
- Comma: utf8.RuneError,
- Error: errInvalidDelim,
- }, {
- Name: "BadComment1",
- Comment: '\n',
- Error: errInvalidDelim,
- }, {
- Name: "BadComment2",
- Comment: '\r',
- Error: errInvalidDelim,
- }, {
- Name: "BadComment3",
- Comment: utf8.RuneError,
- Error: errInvalidDelim,
- }, {
- Name: "BadCommaComment",
- Comma: 'X',
- Comment: 'X',
- Error: errInvalidDelim,
- }}
+ Output: [][]string{
+ {"x", "y", "z", "w"},
+ {"x", "y", "z", ""},
+ {"x", "y", "", ""},
+ {"x", "", "", ""},
+ {"", "", "", ""},
+ {"x", "y", "z", "w"},
+ {"x", "y", "z", ""},
+ {"x", "y", "", ""},
+ {"x", "", "", ""},
+ {"", "", "", ""},
+ },
+}, {
+ Name: "TrailingCommaIneffective1",
+ Input: "§a,§b,§\n¶§c,§d,§e",
+ Output: [][]string{
+ {"a", "b", ""},
+ {"c", "d", "e"},
+ },
+ TrimLeadingSpace: true,
+}, {
+ Name: "ReadAllReuseRecord",
+ Input: "§a,§b\n¶§c,§d",
+ Output: [][]string{
+ {"a", "b"},
+ {"c", "d"},
+ },
+ ReuseRecord: true,
+}, {
+ Name: "StartLine1", // Issue 19019
+ Input: "§a,\"b\nc∑\"d,e",
+ Errors: []error{&ParseError{Err: ErrQuote}},
+}, {
+ Name: "StartLine2",
+ Input: "§a,§b\n¶§\"d\n\n,e∑",
+ Errors: []error{nil, &ParseError{Err: ErrQuote}},
+ Output: [][]string{{"a", "b"}},
+}, {
+ Name: "CRLFInQuotedField", // Issue 21201
+ Input: "§A,§\"Hello\r\nHi\",§B\r\n",
+ Output: [][]string{
+ {"A", "Hello\nHi", "B"},
+ },
+}, {
+ Name: "BinaryBlobField", // Issue 19410
+ Input: "§x09\x41\xb4\x1c,§aktau",
+ Output: [][]string{{"x09A\xb4\x1c", "aktau"}},
+}, {
+ Name: "TrailingCR",
+ Input: "§field1,§field2\r",
+ Output: [][]string{{"field1", "field2"}},
+}, {
+ Name: "QuotedTrailingCR",
+ Input: "§\"field\"\r",
+ Output: [][]string{{"field"}},
+}, {
+ Name: "QuotedTrailingCRCR",
+ Input: "§\"field∑\"\r\r",
+ Errors: []error{&ParseError{Err: ErrQuote}},
+}, {
+ Name: "FieldCR",
+ Input: "§field\rfield\r",
+ Output: [][]string{{"field\rfield"}},
+}, {
+ Name: "FieldCRCR",
+ Input: "§field\r\rfield\r\r",
+ Output: [][]string{{"field\r\rfield\r"}},
+}, {
+ Name: "FieldCRCRLF",
+ Input: "§field\r\r\n¶§field\r\r\n",
+ Output: [][]string{{"field\r"}, {"field\r"}},
+}, {
+ Name: "FieldCRCRLFCR",
+ Input: "§field\r\r\n¶§\rfield\r\r\n\r",
+ Output: [][]string{{"field\r"}, {"\rfield\r"}},
+}, {
+ Name: "FieldCRCRLFCRCR",
+ Input: "§field\r\r\n¶§\r\rfield\r\r\n¶§\r\r",
+ Output: [][]string{{"field\r"}, {"\r\rfield\r"}, {"\r"}},
+}, {
+ Name: "MultiFieldCRCRLFCRCR",
+ Input: "§field1,§field2\r\r\n¶§\r\rfield1,§field2\r\r\n¶§\r\r,§",
+ Output: [][]string{
+ {"field1", "field2\r"},
+ {"\r\rfield1", "field2\r"},
+ {"\r\r", ""},
+ },
+}, {
+ Name: "NonASCIICommaAndComment",
+ Input: "§a£§b,c£ \t§d,e\n€ comment\n",
+ Output: [][]string{{"a", "b,c", "d,e"}},
+ TrimLeadingSpace: true,
+ Comma: '£',
+ Comment: '€',
+}, {
+ Name: "NonASCIICommaAndCommentWithQuotes",
+ Input: "§a€§\" b,\"€§ c\nλ comment\n",
+ Output: [][]string{{"a", " b,", " c"}},
+ Comma: '€',
+ Comment: 'λ',
+}, {
+ // λ and θ start with the same byte.
+ // This tests that the parser doesn't confuse such characters.
+ Name: "NonASCIICommaConfusion",
+ Input: "§\"abθcd\"λ§efθgh",
+ Output: [][]string{{"abθcd", "efθgh"}},
+ Comma: 'λ',
+ Comment: '€',
+}, {
+ Name: "NonASCIICommentConfusion",
+ Input: "§λ\n¶§λ\nθ\n¶§λ\n",
+ Output: [][]string{{"λ"}, {"λ"}, {"λ"}},
+ Comment: 'θ',
+}, {
+ Name: "QuotedFieldMultipleLF",
+ Input: "§\"\n\n\n\n\"",
+ Output: [][]string{{"\n\n\n\n"}},
+}, {
+ Name: "MultipleCRLF",
+ Input: "\r\n\r\n\r\n\r\n",
+}, {
+ // The implementation may read each line in several chunks if it doesn't fit entirely
+ // in the read buffer, so we should test the code to handle that condition.
+ Name: "HugeLines",
+ Input: strings.Repeat("#ignore\n", 10000) + "§" + strings.Repeat("@", 5000) + ",§" + strings.Repeat("*", 5000),
+ Output: [][]string{{strings.Repeat("@", 5000), strings.Repeat("*", 5000)}},
+ Comment: '#',
+}, {
+ Name: "QuoteWithTrailingCRLF",
+ Input: "§\"foo∑\"bar\"\r\n",
+ Errors: []error{&ParseError{Err: ErrQuote}},
+}, {
+ Name: "LazyQuoteWithTrailingCRLF",
+ Input: "§\"foo\"bar\"\r\n",
+ Output: [][]string{{`foo"bar`}},
+ LazyQuotes: true,
+}, {
+ Name: "DoubleQuoteWithTrailingCRLF",
+ Input: "§\"foo\"\"bar\"\r\n",
+ Output: [][]string{{`foo"bar`}},
+}, {
+ Name: "EvenQuotes",
+ Input: `§""""""""`,
+ Output: [][]string{{`"""`}},
+}, {
+ Name: "OddQuotes",
+ Input: `§"""""""∑`,
+ Errors: []error{&ParseError{Err: ErrQuote}},
+}, {
+ Name: "LazyOddQuotes",
+ Input: `§"""""""`,
+ Output: [][]string{{`"""`}},
+ LazyQuotes: true,
+}, {
+ Name: "BadComma1",
+ Comma: '\n',
+ Errors: []error{errInvalidDelim},
+}, {
+ Name: "BadComma2",
+ Comma: '\r',
+ Errors: []error{errInvalidDelim},
+}, {
+ Name: "BadComma3",
+ Comma: '"',
+ Errors: []error{errInvalidDelim},
+}, {
+ Name: "BadComma4",
+ Comma: utf8.RuneError,
+ Errors: []error{errInvalidDelim},
+}, {
+ Name: "BadComment1",
+ Comment: '\n',
+ Errors: []error{errInvalidDelim},
+}, {
+ Name: "BadComment2",
+ Comment: '\r',
+ Errors: []error{errInvalidDelim},
+}, {
+ Name: "BadComment3",
+ Comment: utf8.RuneError,
+ Errors: []error{errInvalidDelim},
+}, {
+ Name: "BadCommaComment",
+ Comma: 'X',
+ Comment: 'X',
+ Errors: []error{errInvalidDelim},
+}}
- for _, tt := range tests {
- t.Run(tt.Name, func(t *testing.T) {
- r := NewReader(strings.NewReader(tt.Input))
+func TestRead(t *testing.T) {
+ newReader := func(tt readTest) (*Reader, [][][2]int, map[int][2]int) {
+ positions, errPositions, input := makePositions(tt.Input)
+ r := NewReader(strings.NewReader(input))
- if tt.Comma != 0 {
- r.Comma = tt.Comma
- }
- r.Comment = tt.Comment
- if tt.UseFieldsPerRecord {
- r.FieldsPerRecord = tt.FieldsPerRecord
+ if tt.Comma != 0 {
+ r.Comma = tt.Comma
+ }
+ r.Comment = tt.Comment
+ if tt.UseFieldsPerRecord {
+ r.FieldsPerRecord = tt.FieldsPerRecord
+ } else {
+ r.FieldsPerRecord = -1
+ }
+ r.LazyQuotes = tt.LazyQuotes
+ r.TrimLeadingSpace = tt.TrimLeadingSpace
+ r.ReuseRecord = tt.ReuseRecord
+ return r, positions, errPositions
+ }
+
+ for _, tt := range readTests {
+ t.Run(tt.Name, func(t *testing.T) {
+ r, positions, errPositions := newReader(tt)
+ out, err := r.ReadAll()
+ if wantErr := firstError(tt.Errors, positions, errPositions); wantErr != nil {
+ if !reflect.DeepEqual(err, wantErr) {
+ t.Fatalf("ReadAll() error mismatch:\ngot %v (%#v)\nwant %v (%#v)", err, err, wantErr, wantErr)
+ }
+ if out != nil {
+ t.Fatalf("ReadAll() output:\ngot %q\nwant nil", out)
+ }
} else {
- r.FieldsPerRecord = -1
+ if err != nil {
+ t.Fatalf("unexpected Readall() error: %v", err)
+ }
+ if !reflect.DeepEqual(out, tt.Output) {
+ t.Fatalf("ReadAll() output:\ngot %q\nwant %q", out, tt.Output)
+ }
}
- r.LazyQuotes = tt.LazyQuotes
- r.TrimLeadingSpace = tt.TrimLeadingSpace
- r.ReuseRecord = tt.ReuseRecord
- out, err := r.ReadAll()
- if !reflect.DeepEqual(err, tt.Error) {
- t.Errorf("ReadAll() error:\ngot %v\nwant %v", err, tt.Error)
- } else if !reflect.DeepEqual(out, tt.Output) {
- t.Errorf("ReadAll() output:\ngot %q\nwant %q", out, tt.Output)
+ // Check field and error positions.
+ r, _, _ = newReader(tt)
+ for recNum := 0; ; recNum++ {
+ rec, err := r.Read()
+ var wantErr error
+ if recNum < len(tt.Errors) && tt.Errors[recNum] != nil {
+ wantErr = errorWithPosition(tt.Errors[recNum], recNum, positions, errPositions)
+ } else if recNum >= len(tt.Output) {
+ wantErr = io.EOF
+ }
+ if !reflect.DeepEqual(err, wantErr) {
+ t.Fatalf("Read() error at record %d:\ngot %v (%#v)\nwant %v (%#v)", recNum, err, err, wantErr, wantErr)
+ }
+ // ErrFieldCount is explicitly non-fatal.
+ if err != nil && !errors.Is(err, ErrFieldCount) {
+ if recNum < len(tt.Output) {
+ t.Fatalf("need more records; got %d want %d", recNum, len(tt.Output))
+ }
+ break
+ }
+ if got, want := rec, tt.Output[recNum]; !reflect.DeepEqual(got, want) {
+ t.Errorf("Read vs ReadAll mismatch;\ngot %q\nwant %q", got, want)
+ }
+ pos := positions[recNum]
+ if len(pos) != len(rec) {
+ t.Fatalf("mismatched position length at record %d", recNum)
+ }
+ for i := range rec {
+ line, col := r.FieldPos(i)
+ if got, want := [2]int{line, col}, pos[i]; got != want {
+ t.Errorf("position mismatch at record %d, field %d;\ngot %v\nwant %v", recNum, i, got, want)
+ }
+ }
}
})
}
}
+// firstError returns the first non-nil error in errs,
+// with the position adjusted according to the error's
+// index inside positions.
+func firstError(errs []error, positions [][][2]int, errPositions map[int][2]int) error {
+ for i, err := range errs {
+ if err != nil {
+ return errorWithPosition(err, i, positions, errPositions)
+ }
+ }
+ return nil
+}
+
+func errorWithPosition(err error, recNum int, positions [][][2]int, errPositions map[int][2]int) error {
+ parseErr, ok := err.(*ParseError)
+ if !ok {
+ return err
+ }
+ if recNum >= len(positions) {
+ panic(fmt.Errorf("no positions found for error at record %d", recNum))
+ }
+ errPos, ok := errPositions[recNum]
+ if !ok {
+ panic(fmt.Errorf("no error position found for error at record %d", recNum))
+ }
+ parseErr1 := *parseErr
+ parseErr1.StartLine = positions[recNum][0][0]
+ parseErr1.Line = errPos[0]
+ parseErr1.Column = errPos[1]
+ return &parseErr1
+}
+
+// makePositions returns the expected field positions of all
+// the fields in text, the positions of any errors, and the text with the position markers
+// removed.
+//
+// The start of each field is marked with a § symbol;
+// CSV lines are separated by ¶ symbols;
+// Error positions are marked with ∑ symbols.
+func makePositions(text string) ([][][2]int, map[int][2]int, string) {
+ buf := make([]byte, 0, len(text))
+ var positions [][][2]int
+ errPositions := make(map[int][2]int)
+ line, col := 1, 1
+ recNum := 0
+
+ for len(text) > 0 {
+ r, size := utf8.DecodeRuneInString(text)
+ switch r {
+ case '\n':
+ line++
+ col = 1
+ buf = append(buf, '\n')
+ case '§':
+ if len(positions) == 0 {
+ positions = append(positions, [][2]int{})
+ }
+ positions[len(positions)-1] = append(positions[len(positions)-1], [2]int{line, col})
+ case '¶':
+ positions = append(positions, [][2]int{})
+ recNum++
+ case '∑':
+ errPositions[recNum] = [2]int{line, col}
+ default:
+ buf = append(buf, text[:size]...)
+ col += size
+ }
+ text = text[size:]
+ }
+ return positions, errPositions, string(buf)
+}
+
// nTimes is an io.Reader which yields the string s n times.
type nTimes struct {
s string
diff --git a/libgo/go/encoding/gob/debug.go b/libgo/go/encoding/gob/debug.go
index 8f93742..5965fea 100644
--- a/libgo/go/encoding/gob/debug.go
+++ b/libgo/go/encoding/gob/debug.go
@@ -3,6 +3,7 @@
// license that can be found in the LICENSE file.
// Delete the next line to include in the gob package.
+//go:build ignore
// +build ignore
package gob
diff --git a/libgo/go/encoding/gob/decgen.go b/libgo/go/encoding/gob/decgen.go
index 1c31e66..994be87 100644
--- a/libgo/go/encoding/gob/decgen.go
+++ b/libgo/go/encoding/gob/decgen.go
@@ -2,6 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+//go:build ignore
// +build ignore
// encgen writes the helper functions for encoding. Intended to be
diff --git a/libgo/go/encoding/gob/decoder.go b/libgo/go/encoding/gob/decoder.go
index b52aabe..b476aaa 100644
--- a/libgo/go/encoding/gob/decoder.go
+++ b/libgo/go/encoding/gob/decoder.go
@@ -152,6 +152,9 @@ func (dec *Decoder) decodeTypeSequence(isInterface bool) typeId {
}
// Type definition for (-id) follows.
dec.recvType(-id)
+ if dec.err != nil {
+ break
+ }
// When decoding an interface, after a type there may be a
// DelimitedValue still in the buffer. Skip its count.
// (Alternatively, the buffer is empty and the byte count
@@ -215,7 +218,7 @@ func (dec *Decoder) DecodeValue(v reflect.Value) error {
return dec.err
}
-// If debug.go is compiled into the program , debugFunc prints a human-readable
+// If debug.go is compiled into the program, debugFunc prints a human-readable
// representation of the gob data read from r by calling that file's Debug function.
// Otherwise it is nil.
var debugFunc func(io.Reader)
diff --git a/libgo/go/encoding/gob/dump.go b/libgo/go/encoding/gob/dump.go
index 17238c9..8c0bbc4 100644
--- a/libgo/go/encoding/gob/dump.go
+++ b/libgo/go/encoding/gob/dump.go
@@ -2,6 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+//go:build ignore
// +build ignore
package main
diff --git a/libgo/go/encoding/gob/encgen.go b/libgo/go/encoding/gob/encgen.go
index 409b8c9..b562da1 100644
--- a/libgo/go/encoding/gob/encgen.go
+++ b/libgo/go/encoding/gob/encgen.go
@@ -2,6 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+//go:build ignore
// +build ignore
// encgen writes the helper functions for encoding. Intended to be
diff --git a/libgo/go/encoding/gob/encoder_test.go b/libgo/go/encoding/gob/encoder_test.go
index fe27749..6183646 100644
--- a/libgo/go/encoding/gob/encoder_test.go
+++ b/libgo/go/encoding/gob/encoder_test.go
@@ -1127,3 +1127,28 @@ func TestBadData(t *testing.T) {
}
}
}
+
+func TestDecodeErrorMultipleTypes(t *testing.T) {
+ type Test struct {
+ A string
+ B int
+ }
+ var b bytes.Buffer
+ NewEncoder(&b).Encode(Test{"one", 1})
+
+ var result, result2 Test
+ dec := NewDecoder(&b)
+ err := dec.Decode(&result)
+ if err != nil {
+ t.Errorf("decode: unexpected error %v", err)
+ }
+
+ b.Reset()
+ NewEncoder(&b).Encode(Test{"two", 2})
+ err = dec.Decode(&result2)
+ if err == nil {
+ t.Errorf("decode: expected duplicate type error, got nil")
+ } else if !strings.Contains(err.Error(), "duplicate type") {
+ t.Errorf("decode: expected duplicate type error, got %s", err.Error())
+ }
+}
diff --git a/libgo/go/encoding/json/decode.go b/libgo/go/encoding/json/decode.go
index 86d8a69..a9917e7 100644
--- a/libgo/go/encoding/json/decode.go
+++ b/libgo/go/encoding/json/decode.go
@@ -200,16 +200,19 @@ func (n Number) Int64() (int64, error) {
return strconv.ParseInt(string(n), 10, 64)
}
+// An errorContext provides context for type errors during decoding.
+type errorContext struct {
+ Struct reflect.Type
+ FieldStack []string
+}
+
// decodeState represents the state while decoding a JSON value.
type decodeState struct {
- data []byte
- off int // next read offset in data
- opcode int // last read result
- scan scanner
- errorContext struct { // provides context for type errors
- Struct reflect.Type
- FieldStack []string
- }
+ data []byte
+ off int // next read offset in data
+ opcode int // last read result
+ scan scanner
+ errorContext *errorContext
savedError error
useNumber bool
disallowUnknownFields bool
@@ -229,10 +232,11 @@ func (d *decodeState) init(data []byte) *decodeState {
d.data = data
d.off = 0
d.savedError = nil
- d.errorContext.Struct = nil
-
- // Reuse the allocated space for the FieldStack slice.
- d.errorContext.FieldStack = d.errorContext.FieldStack[:0]
+ if d.errorContext != nil {
+ d.errorContext.Struct = nil
+ // Reuse the allocated space for the FieldStack slice.
+ d.errorContext.FieldStack = d.errorContext.FieldStack[:0]
+ }
return d
}
@@ -246,12 +250,11 @@ func (d *decodeState) saveError(err error) {
// addErrorContext returns a new error enhanced with information from d.errorContext
func (d *decodeState) addErrorContext(err error) error {
- if d.errorContext.Struct != nil || len(d.errorContext.FieldStack) > 0 {
+ if d.errorContext != nil && (d.errorContext.Struct != nil || len(d.errorContext.FieldStack) > 0) {
switch err := err.(type) {
case *UnmarshalTypeError:
err.Struct = d.errorContext.Struct.Name()
err.Field = strings.Join(d.errorContext.FieldStack, ".")
- return err
}
}
return err
@@ -657,7 +660,10 @@ func (d *decodeState) object(v reflect.Value) error {
}
var mapElem reflect.Value
- origErrorContext := d.errorContext
+ var origErrorContext errorContext
+ if d.errorContext != nil {
+ origErrorContext = *d.errorContext
+ }
for {
// Read opening " of string key or closing }.
@@ -732,6 +738,9 @@ func (d *decodeState) object(v reflect.Value) error {
}
subv = subv.Field(i)
}
+ if d.errorContext == nil {
+ d.errorContext = new(errorContext)
+ }
d.errorContext.FieldStack = append(d.errorContext.FieldStack, f.name)
d.errorContext.Struct = t
} else if d.disallowUnknownFields {
@@ -812,11 +821,13 @@ func (d *decodeState) object(v reflect.Value) error {
if d.opcode == scanSkipSpace {
d.scanWhile(scanSkipSpace)
}
- // Reset errorContext to its original state.
- // Keep the same underlying array for FieldStack, to reuse the
- // space and avoid unnecessary allocs.
- d.errorContext.FieldStack = d.errorContext.FieldStack[:len(origErrorContext.FieldStack)]
- d.errorContext.Struct = origErrorContext.Struct
+ if d.errorContext != nil {
+ // Reset errorContext to its original state.
+ // Keep the same underlying array for FieldStack, to reuse the
+ // space and avoid unnecessary allocs.
+ d.errorContext.FieldStack = d.errorContext.FieldStack[:len(origErrorContext.FieldStack)]
+ d.errorContext.Struct = origErrorContext.Struct
+ }
if d.opcode == scanEndObject {
break
}
diff --git a/libgo/go/encoding/json/encode.go b/libgo/go/encoding/json/encode.go
index 483b9d8..e473e61 100644
--- a/libgo/go/encoding/json/encode.go
+++ b/libgo/go/encoding/json/encode.go
@@ -794,23 +794,24 @@ func (me mapEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) {
e.WriteByte('{')
// Extract and sort the keys.
- keys := v.MapKeys()
- sv := make([]reflectWithString, len(keys))
- for i, v := range keys {
- sv[i].v = v
+ sv := make([]reflectWithString, v.Len())
+ mi := v.MapRange()
+ for i := 0; mi.Next(); i++ {
+ sv[i].k = mi.Key()
+ sv[i].v = mi.Value()
if err := sv[i].resolve(); err != nil {
e.error(fmt.Errorf("json: encoding error for type %q: %q", v.Type().String(), err.Error()))
}
}
- sort.Slice(sv, func(i, j int) bool { return sv[i].s < sv[j].s })
+ sort.Slice(sv, func(i, j int) bool { return sv[i].ks < sv[j].ks })
for i, kv := range sv {
if i > 0 {
e.WriteByte(',')
}
- e.string(kv.s, opts.escapeHTML)
+ e.string(kv.ks, opts.escapeHTML)
e.WriteByte(':')
- me.elemEnc(e, v.MapIndex(kv.v), opts)
+ me.elemEnc(e, kv.v, opts)
}
e.WriteByte('}')
e.ptrLevel--
@@ -997,29 +998,30 @@ func typeByIndex(t reflect.Type, index []int) reflect.Type {
}
type reflectWithString struct {
- v reflect.Value
- s string
+ k reflect.Value
+ v reflect.Value
+ ks string
}
func (w *reflectWithString) resolve() error {
- if w.v.Kind() == reflect.String {
- w.s = w.v.String()
+ if w.k.Kind() == reflect.String {
+ w.ks = w.k.String()
return nil
}
- if tm, ok := w.v.Interface().(encoding.TextMarshaler); ok {
- if w.v.Kind() == reflect.Ptr && w.v.IsNil() {
+ if tm, ok := w.k.Interface().(encoding.TextMarshaler); ok {
+ if w.k.Kind() == reflect.Ptr && w.k.IsNil() {
return nil
}
buf, err := tm.MarshalText()
- w.s = string(buf)
+ w.ks = string(buf)
return err
}
- switch w.v.Kind() {
+ switch w.k.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
- w.s = strconv.FormatInt(w.v.Int(), 10)
+ w.ks = strconv.FormatInt(w.k.Int(), 10)
return nil
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
- w.s = strconv.FormatUint(w.v.Uint(), 10)
+ w.ks = strconv.FormatUint(w.k.Uint(), 10)
return nil
}
panic("unexpected map key type")
@@ -1239,19 +1241,18 @@ func typeFields(t reflect.Type) structFields {
// Scan f.typ for fields to include.
for i := 0; i < f.typ.NumField(); i++ {
sf := f.typ.Field(i)
- isUnexported := sf.PkgPath != ""
if sf.Anonymous {
t := sf.Type
if t.Kind() == reflect.Ptr {
t = t.Elem()
}
- if isUnexported && t.Kind() != reflect.Struct {
+ if !sf.IsExported() && t.Kind() != reflect.Struct {
// Ignore embedded fields of unexported non-struct types.
continue
}
// Do not ignore embedded fields of unexported struct types
// since they may have exported fields.
- } else if isUnexported {
+ } else if !sf.IsExported() {
// Ignore unexported non-embedded fields.
continue
}
diff --git a/libgo/go/encoding/json/encode_test.go b/libgo/go/encoding/json/encode_test.go
index 42bb09d..0dad951 100644
--- a/libgo/go/encoding/json/encode_test.go
+++ b/libgo/go/encoding/json/encode_test.go
@@ -245,6 +245,22 @@ func TestUnsupportedValues(t *testing.T) {
}
}
+// Issue 43207
+func TestMarshalTextFloatMap(t *testing.T) {
+ m := map[textfloat]string{
+ textfloat(math.NaN()): "1",
+ textfloat(math.NaN()): "1",
+ }
+ got, err := Marshal(m)
+ if err != nil {
+ t.Errorf("Marshal() error: %v", err)
+ }
+ want := `{"TF:NaN":"1","TF:NaN":"1"}`
+ if string(got) != want {
+ t.Errorf("Marshal() = %s, want %s", got, want)
+ }
+}
+
// Ref has Marshaler and Unmarshaler methods with pointer receiver.
type Ref int
@@ -854,6 +870,10 @@ func tenc(format string, a ...interface{}) ([]byte, error) {
return buf.Bytes(), nil
}
+type textfloat float64
+
+func (f textfloat) MarshalText() ([]byte, error) { return tenc(`TF:%0.2f`, f) }
+
// Issue 13783
func TestEncodeBytekind(t *testing.T) {
testdata := []struct {
@@ -872,6 +892,7 @@ func TestEncodeBytekind(t *testing.T) {
{[]jsonint{5, 4}, `[{"JI":5},{"JI":4}]`},
{[]textint{9, 3}, `["TI:9","TI:3"]`},
{[]int{9, 3}, `[9,3]`},
+ {[]textfloat{12, 3}, `["TF:12.00","TF:3.00"]`},
}
for _, d := range testdata {
js, err := Marshal(d.data)
diff --git a/libgo/go/encoding/json/fuzz.go b/libgo/go/encoding/json/fuzz.go
index be03f0d..d3fa2d11 100644
--- a/libgo/go/encoding/json/fuzz.go
+++ b/libgo/go/encoding/json/fuzz.go
@@ -2,6 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+//go:build gofuzz
// +build gofuzz
package json
diff --git a/libgo/go/encoding/xml/typeinfo.go b/libgo/go/encoding/xml/typeinfo.go
index f30fe58..162724e 100644
--- a/libgo/go/encoding/xml/typeinfo.go
+++ b/libgo/go/encoding/xml/typeinfo.go
@@ -60,7 +60,7 @@ func getTypeInfo(typ reflect.Type) (*typeInfo, error) {
n := typ.NumField()
for i := 0; i < n; i++ {
f := typ.Field(i)
- if (f.PkgPath != "" && !f.Anonymous) || f.Tag.Get("xml") == "-" {
+ if (!f.IsExported() && !f.Anonymous) || f.Tag.Get("xml") == "-" {
continue // Private field
}
diff --git a/libgo/go/encoding/xml/xml.go b/libgo/go/encoding/xml/xml.go
index 6f9594d..c14954d 100644
--- a/libgo/go/encoding/xml/xml.go
+++ b/libgo/go/encoding/xml/xml.go
@@ -261,7 +261,7 @@ func NewTokenDecoder(t TokenReader) *Decoder {
// call to Token. To acquire a copy of the bytes, call CopyToken
// or the token's Copy method.
//
-// Token expands self-closing elements such as <br/>
+// Token expands self-closing elements such as <br>
// into separate start and end elements returned by successive calls.
//
// Token guarantees that the StartElement and EndElement
@@ -768,6 +768,12 @@ func (d *Decoder) rawToken() (Token, error) {
}
b0, b1 = b1, b
}
+
+ // Replace the comment with a space in the returned Directive
+ // body, so that markup parts that were separated by the comment
+ // (like a "<" and a "!") don't get joined when re-encoding the
+ // Directive, taking new semantic meaning.
+ d.buf.WriteByte(' ')
}
}
return Directive(d.buf.Bytes()), nil
@@ -1156,8 +1162,9 @@ func (d *Decoder) nsname() (name Name, ok bool) {
if !ok {
return
}
- i := strings.Index(s, ":")
- if i < 0 {
+ if strings.Count(s, ":") > 1 {
+ name.Local = s
+ } else if i := strings.Index(s, ":"); i < 1 || i > len(s)-2 {
name.Local = s
} else {
name.Space = s[0:i]
diff --git a/libgo/go/encoding/xml/xml_test.go b/libgo/go/encoding/xml/xml_test.go
index 5672ebb..19152db 100644
--- a/libgo/go/encoding/xml/xml_test.go
+++ b/libgo/go/encoding/xml/xml_test.go
@@ -802,11 +802,11 @@ var directivesWithCommentsInput = `
var directivesWithCommentsTokens = []Token{
CharData("\n"),
- Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
+ Directive(`DOCTYPE [ <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
CharData("\n"),
- Directive(`DOCTYPE [<!ENTITY go "Golang">]`),
+ Directive(`DOCTYPE [<!ENTITY go "Golang"> ]`),
CharData("\n"),
- Directive(`DOCTYPE <!-> <!> [<!ENTITY go "Golang">]`),
+ Directive(`DOCTYPE <!-> <!> [<!ENTITY go "Golang"> ]`),
CharData("\n"),
}
@@ -940,7 +940,7 @@ func (m mapper) Token() (Token, error) {
}
func TestNewTokenDecoderIdempotent(t *testing.T) {
- d := NewDecoder(strings.NewReader(`<br/>`))
+ d := NewDecoder(strings.NewReader(`<br>`))
d2 := NewTokenDecoder(d)
if d != d2 {
t.Error("NewTokenDecoder did not detect underlying Decoder")
@@ -1003,3 +1003,60 @@ func TestTokenUnmarshaler(t *testing.T) {
d := NewTokenDecoder(tokReader{})
d.Decode(&Failure{})
}
+
+func testRoundTrip(t *testing.T, input string) {
+ d := NewDecoder(strings.NewReader(input))
+ var tokens []Token
+ var buf bytes.Buffer
+ e := NewEncoder(&buf)
+ for {
+ tok, err := d.Token()
+ if err == io.EOF {
+ break
+ }
+ if err != nil {
+ t.Fatalf("invalid input: %v", err)
+ }
+ if err := e.EncodeToken(tok); err != nil {
+ t.Fatalf("failed to re-encode input: %v", err)
+ }
+ tokens = append(tokens, CopyToken(tok))
+ }
+ if err := e.Flush(); err != nil {
+ t.Fatal(err)
+ }
+
+ d = NewDecoder(&buf)
+ for {
+ tok, err := d.Token()
+ if err == io.EOF {
+ break
+ }
+ if err != nil {
+ t.Fatalf("failed to decode output: %v", err)
+ }
+ if len(tokens) == 0 {
+ t.Fatalf("unexpected token: %#v", tok)
+ }
+ a, b := tokens[0], tok
+ if !reflect.DeepEqual(a, b) {
+ t.Fatalf("token mismatch: %#v vs %#v", a, b)
+ }
+ tokens = tokens[1:]
+ }
+ if len(tokens) > 0 {
+ t.Fatalf("lost tokens: %#v", tokens)
+ }
+}
+
+func TestRoundTrip(t *testing.T) {
+ tests := map[string]string{
+ "leading colon": `<::Test ::foo="bar"><:::Hello></:::Hello><Hello></Hello></::Test>`,
+ "trailing colon": `<foo abc:="x"></foo>`,
+ "double colon": `<x:y:foo></x:y:foo>`,
+ "comments in directives": `<!ENTITY x<!<!-- c1 [ " -->--x --> > <e></e> <!DOCTYPE xxx [ x<!-- c2 " -->--x ]>`,
+ }
+ for name, input := range tests {
+ t.Run(name, func(t *testing.T) { testRoundTrip(t, input) })
+ }
+}