diff options
author | Ian Lance Taylor <ian@gcc.gnu.org> | 2015-10-31 00:59:47 +0000 |
---|---|---|
committer | Ian Lance Taylor <ian@gcc.gnu.org> | 2015-10-31 00:59:47 +0000 |
commit | af146490bb04205107cb23e301ec7a8ff927b5fc (patch) | |
tree | 13beeaed3698c61903fe93fb1ce70bd9b18d4e7f /libgo/go/encoding | |
parent | 725e1be3406315d9bcc8195d7eef0a7082b3c7cc (diff) | |
download | gcc-af146490bb04205107cb23e301ec7a8ff927b5fc.zip gcc-af146490bb04205107cb23e301ec7a8ff927b5fc.tar.gz gcc-af146490bb04205107cb23e301ec7a8ff927b5fc.tar.bz2 |
runtime: Remove now unnecessary pad field from ParFor.
It is not needed due to the removal of the ctx field.
Reviewed-on: https://go-review.googlesource.com/16525
From-SVN: r229616
Diffstat (limited to 'libgo/go/encoding')
34 files changed, 2105 insertions, 319 deletions
diff --git a/libgo/go/encoding/asn1/asn1.go b/libgo/go/encoding/asn1/asn1.go index 8b3d1b3..2ac411a 100644 --- a/libgo/go/encoding/asn1/asn1.go +++ b/libgo/go/encoding/asn1/asn1.go @@ -20,11 +20,13 @@ package asn1 // everything by any means. import ( + "errors" "fmt" "math/big" "reflect" "strconv" "time" + "unicode/utf8" ) // A StructuralError suggests that the ASN.1 data is valid, but the Go type @@ -287,11 +289,23 @@ func parseBase128Int(bytes []byte, initOffset int) (ret, offset int, err error) func parseUTCTime(bytes []byte) (ret time.Time, err error) { s := string(bytes) - ret, err = time.Parse("0601021504Z0700", s) + + formatStr := "0601021504Z0700" + ret, err = time.Parse(formatStr, s) + if err != nil { + formatStr = "060102150405Z0700" + ret, err = time.Parse(formatStr, s) + } if err != nil { - ret, err = time.Parse("060102150405Z0700", s) + return } - if err == nil && ret.Year() >= 2050 { + + if serialized := ret.Format(formatStr); serialized != s { + err = fmt.Errorf("asn1: time did not serialize back to the original value and may be invalid: given %q, but serialized as %q", s, serialized) + return + } + + if ret.Year() >= 2050 { // UTCTime only encodes times prior to 2050. See https://tools.ietf.org/html/rfc5280#section-4.1.2.5.1 ret = ret.AddDate(-100, 0, 0) } @@ -302,7 +316,18 @@ func parseUTCTime(bytes []byte) (ret time.Time, err error) { // parseGeneralizedTime parses the GeneralizedTime from the given byte slice // and returns the resulting time. func parseGeneralizedTime(bytes []byte) (ret time.Time, err error) { - return time.Parse("20060102150405Z0700", string(bytes)) + const formatStr = "20060102150405Z0700" + s := string(bytes) + + if ret, err = time.Parse(formatStr, s); err != nil { + return + } + + if serialized := ret.Format(formatStr); serialized != s { + err = fmt.Errorf("asn1: time did not serialize back to the original value and may be invalid: given %q, but serialized as %q", s, serialized) + } + + return } // PrintableString @@ -320,7 +345,7 @@ func parsePrintableString(bytes []byte) (ret string, err error) { return } -// isPrintable returns true iff the given b is in the ASN.1 PrintableString set. +// isPrintable reports whether the given b is in the ASN.1 PrintableString set. func isPrintable(b byte) bool { return 'a' <= b && b <= 'z' || 'A' <= b && b <= 'Z' || @@ -365,6 +390,9 @@ func parseT61String(bytes []byte) (ret string, err error) { // parseUTF8String parses a ASN.1 UTF8String (raw UTF-8) from the given byte // array and returns it. func parseUTF8String(bytes []byte) (ret string, err error) { + if !utf8.Valid(bytes) { + return "", errors.New("asn1: invalid UTF-8 string") + } return string(bytes), nil } @@ -389,6 +417,12 @@ type RawContent []byte // don't distinguish between ordered and unordered objects in this code. func parseTagAndLength(bytes []byte, initOffset int) (ret tagAndLength, offset int, err error) { offset = initOffset + // parseTagAndLength should not be called without at least a single + // byte to read. Thus this check is for robustness: + if offset >= len(bytes) { + err = errors.New("asn1: internal error in parseTagAndLength") + return + } b := bytes[offset] offset++ ret.class = int(b >> 6) @@ -579,6 +613,8 @@ func parseField(v reflect.Value, bytes []byte, initOffset int, params fieldParam result, err = parseObjectIdentifier(innerBytes) case tagUTCTime: result, err = parseUTCTime(innerBytes) + case tagGeneralizedTime: + result, err = parseGeneralizedTime(innerBytes) case tagOctetString: result = innerBytes default: @@ -609,6 +645,10 @@ func parseField(v reflect.Value, bytes []byte, initOffset int, params fieldParam if params.application { expectedClass = classApplication } + if offset == len(bytes) { + err = StructuralError{"explicit tag has no child"} + return + } if t.class == expectedClass && t.tag == *params.tag && (t.length == 0 || t.isCompound) { if t.length > 0 { t, offset, err = parseTagAndLength(bytes, offset) diff --git a/libgo/go/encoding/asn1/asn1_test.go b/libgo/go/encoding/asn1/asn1_test.go index 4e864d0..893d080 100644 --- a/libgo/go/encoding/asn1/asn1_test.go +++ b/libgo/go/encoding/asn1/asn1_test.go @@ -9,6 +9,7 @@ import ( "fmt" "math/big" "reflect" + "strings" "testing" "time" ) @@ -258,6 +259,24 @@ var utcTestData = []timeTest{ {"91050633444aZ", false, time.Time{}}, {"910506334461Z", false, time.Time{}}, {"910506334400Za", false, time.Time{}}, + /* These are invalid times. However, the time package normalises times + * and they were accepted in some versions. See #11134. */ + {"000100000000Z", false, time.Time{}}, + {"101302030405Z", false, time.Time{}}, + {"100002030405Z", false, time.Time{}}, + {"100100030405Z", false, time.Time{}}, + {"100132030405Z", false, time.Time{}}, + {"100231030405Z", false, time.Time{}}, + {"100102240405Z", false, time.Time{}}, + {"100102036005Z", false, time.Time{}}, + {"100102030460Z", false, time.Time{}}, + {"-100102030410Z", false, time.Time{}}, + {"10-0102030410Z", false, time.Time{}}, + {"10-0002030410Z", false, time.Time{}}, + {"1001-02030410Z", false, time.Time{}}, + {"100102-030410Z", false, time.Time{}}, + {"10010203-0410Z", false, time.Time{}}, + {"1001020304-10Z", false, time.Time{}}, } func TestUTCTime(t *testing.T) { @@ -287,6 +306,24 @@ var generalizedTimeTestData = []timeTest{ {"20100102030405", false, time.Time{}}, {"20100102030405+0607", true, time.Date(2010, 01, 02, 03, 04, 05, 0, time.FixedZone("", 6*60*60+7*60))}, {"20100102030405-0607", true, time.Date(2010, 01, 02, 03, 04, 05, 0, time.FixedZone("", -6*60*60-7*60))}, + /* These are invalid times. However, the time package normalises times + * and they were accepted in some versions. See #11134. */ + {"00000100000000Z", false, time.Time{}}, + {"20101302030405Z", false, time.Time{}}, + {"20100002030405Z", false, time.Time{}}, + {"20100100030405Z", false, time.Time{}}, + {"20100132030405Z", false, time.Time{}}, + {"20100231030405Z", false, time.Time{}}, + {"20100102240405Z", false, time.Time{}}, + {"20100102036005Z", false, time.Time{}}, + {"20100102030460Z", false, time.Time{}}, + {"-20100102030410Z", false, time.Time{}}, + {"2010-0102030410Z", false, time.Time{}}, + {"2010-0002030410Z", false, time.Time{}}, + {"201001-02030410Z", false, time.Time{}}, + {"20100102-030410Z", false, time.Time{}}, + {"2010010203-0410Z", false, time.Time{}}, + {"201001020304-10Z", false, time.Time{}}, } func TestGeneralizedTime(t *testing.T) { @@ -297,7 +334,7 @@ func TestGeneralizedTime(t *testing.T) { } if err == nil { if !reflect.DeepEqual(test.out, ret) { - t.Errorf("#%d: Bad result: %v (expected %v)", i, ret, test.out) + t.Errorf("#%d: Bad result: %q → %v (expected %v)", i, test.in, ret, test.out) } } } @@ -358,6 +395,8 @@ func newBool(b bool) *bool { return &b } var parseFieldParametersTestData []parseFieldParametersTest = []parseFieldParametersTest{ {"", fieldParameters{}}, {"ia5", fieldParameters{stringType: tagIA5String}}, + {"generalized", fieldParameters{timeType: tagGeneralizedTime}}, + {"utc", fieldParameters{timeType: tagUTCTime}}, {"printable", fieldParameters{stringType: tagPrintableString}}, {"optional", fieldParameters{optional: true}}, {"explicit", fieldParameters{explicit: true, tag: new(int)}}, @@ -366,7 +405,7 @@ var parseFieldParametersTestData []parseFieldParametersTest = []parseFieldParame {"default:42", fieldParameters{defaultValue: newInt64(42)}}, {"tag:17", fieldParameters{tag: newInt(17)}}, {"optional,explicit,default:42,tag:17", fieldParameters{optional: true, explicit: true, defaultValue: newInt64(42), tag: newInt(17)}}, - {"optional,explicit,default:42,tag:17,rubbish1", fieldParameters{true, true, false, newInt64(42), newInt(17), 0, false, false}}, + {"optional,explicit,default:42,tag:17,rubbish1", fieldParameters{true, true, false, newInt64(42), newInt(17), 0, 0, false, false}}, {"set", fieldParameters{set: true}}, } @@ -865,3 +904,39 @@ func TestImplicitTaggedTime(t *testing.T) { t.Errorf("Wrong result. Got %v, want %v", result.Time, expected) } } + +type truncatedExplicitTagTest struct { + Test int `asn1:"explicit,tag:0"` +} + +func TestTruncatedExplicitTag(t *testing.T) { + // This crashed Unmarshal in the past. See #11154. + der := []byte{ + 0x30, // SEQUENCE + 0x02, // two bytes long + 0xa0, // context-specific, tag 0 + 0x30, // 48 bytes long + } + + var result truncatedExplicitTagTest + if _, err := Unmarshal(der, &result); err == nil { + t.Error("Unmarshal returned without error") + } +} + +type invalidUTF8Test struct { + Str string `asn1:"utf8"` +} + +func TestUnmarshalInvalidUTF8(t *testing.T) { + data := []byte("0\x05\f\x03a\xc9c") + var result invalidUTF8Test + _, err := Unmarshal(data, &result) + + const expectedSubstring = "UTF" + if err == nil { + t.Fatal("Successfully unmarshaled invalid UTF-8 data") + } else if !strings.Contains(err.Error(), expectedSubstring) { + t.Fatalf("Expected error to mention %q but error was %q", expectedSubstring, err.Error()) + } +} diff --git a/libgo/go/encoding/asn1/common.go b/libgo/go/encoding/asn1/common.go index 33a117e..ab85e04 100644 --- a/libgo/go/encoding/asn1/common.go +++ b/libgo/go/encoding/asn1/common.go @@ -74,6 +74,7 @@ type fieldParameters struct { defaultValue *int64 // a default value for INTEGER typed fields (maybe nil). tag *int // the EXPLICIT or IMPLICIT tag (maybe nil). stringType int // the string tag to use when marshaling. + timeType int // the time tag to use when marshaling. set bool // true iff this should be encoded as a SET omitEmpty bool // true iff this should be omitted if empty when marshaling. @@ -94,6 +95,10 @@ func parseFieldParameters(str string) (ret fieldParameters) { if ret.tag == nil { ret.tag = new(int) } + case part == "generalized": + ret.timeType = tagGeneralizedTime + case part == "utc": + ret.timeType = tagUTCTime case part == "ia5": ret.stringType = tagIA5String case part == "printable": diff --git a/libgo/go/encoding/asn1/marshal.go b/libgo/go/encoding/asn1/marshal.go index b2f104b..67a019d 100644 --- a/libgo/go/encoding/asn1/marshal.go +++ b/libgo/go/encoding/asn1/marshal.go @@ -18,7 +18,7 @@ import ( // A forkableWriter is an in-memory buffer that can be // 'forked' to create new forkableWriters that bracket the // original. After -// pre, post := w.fork(); +// pre, post := w.fork() // the overall sequence of bytes represented is logically w+pre+post. type forkableWriter struct { *bytes.Buffer @@ -410,9 +410,11 @@ func stripTagAndLength(in []byte) []byte { func marshalBody(out *forkableWriter, value reflect.Value, params fieldParameters) (err error) { switch value.Type() { + case flagType: + return nil case timeType: t := value.Interface().(time.Time) - if outsideUTCRange(t) { + if params.timeType == tagGeneralizedTime || outsideUTCRange(t) { return marshalGeneralizedTime(out, t) } else { return marshalUTCTime(out, t) @@ -552,6 +554,10 @@ func marshalField(out *forkableWriter, v reflect.Value, params fieldParameters) } class := classUniversal + if params.timeType != 0 && tag != tagUTCTime { + return StructuralError{"explicit time type given to non-time member"} + } + if params.stringType != 0 && tag != tagPrintableString { return StructuralError{"explicit string type given to non-string member"} } @@ -575,7 +581,7 @@ func marshalField(out *forkableWriter, v reflect.Value, params fieldParameters) tag = params.stringType } case tagUTCTime: - if outsideUTCRange(v.Interface().(time.Time)) { + if params.timeType == tagGeneralizedTime || outsideUTCRange(v.Interface().(time.Time)) { tag = tagGeneralizedTime } } diff --git a/libgo/go/encoding/asn1/marshal_test.go b/libgo/go/encoding/asn1/marshal_test.go index 5b0115f2..cdca8aa 100644 --- a/libgo/go/encoding/asn1/marshal_test.go +++ b/libgo/go/encoding/asn1/marshal_test.go @@ -42,6 +42,14 @@ type explicitTagTest struct { A int `asn1:"explicit,tag:5"` } +type flagTest struct { + A Flag `asn1:"tag:0,optional"` +} + +type generalizedTimeTest struct { + A time.Time `asn1:"generalized"` +} + type ia5StringTest struct { A string `asn1:"ia5"` } @@ -92,10 +100,13 @@ var marshalTests = []marshalTest{ {[]byte{1, 2, 3}, "0403010203"}, {implicitTagTest{64}, "3003850140"}, {explicitTagTest{64}, "3005a503020140"}, + {flagTest{true}, "30028000"}, + {flagTest{false}, "3000"}, {time.Unix(0, 0).UTC(), "170d3730303130313030303030305a"}, {time.Unix(1258325776, 0).UTC(), "170d3039313131353232353631365a"}, {time.Unix(1258325776, 0).In(PST), "17113039313131353134353631362d30383030"}, {farFuture(), "180f32313030303430353132303130315a"}, + {generalizedTimeTest{time.Unix(1258325776, 0).UTC()}, "3011180f32303039313131353232353631365a"}, {BitString{[]byte{0x80}, 1}, "03020780"}, {BitString{[]byte{0x81, 0xf0}, 12}, "03030481f0"}, {ObjectIdentifier([]int{1, 2, 3, 4}), "06032a0304"}, diff --git a/libgo/go/encoding/base64/base64.go b/libgo/go/encoding/base64/base64.go index ad3abe6..3302fb4 100644 --- a/libgo/go/encoding/base64/base64.go +++ b/libgo/go/encoding/base64/base64.go @@ -6,10 +6,8 @@ package base64 import ( - "bytes" "io" "strconv" - "strings" ) /* @@ -22,18 +20,32 @@ import ( // (RFC 1421). RFC 4648 also defines an alternate encoding, which is // the standard encoding with - and _ substituted for + and /. type Encoding struct { - encode string + encode [64]byte decodeMap [256]byte + padChar rune } +const ( + StdPadding rune = '=' // Standard padding character + NoPadding rune = -1 // No padding +) + const encodeStd = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" const encodeURL = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" -// NewEncoding returns a new Encoding defined by the given alphabet, +// NewEncoding returns a new padded Encoding defined by the given alphabet, // which must be a 64-byte string. +// The resulting Encoding uses the default padding character ('='), +// which may be changed or disabled via WithPadding. func NewEncoding(encoder string) *Encoding { + if len(encoder) != 64 { + panic("encoding alphabet is not 64-bytes long") + } + e := new(Encoding) - e.encode = encoder + e.padChar = StdPadding + copy(e.encode[:], encoder) + for i := 0; i < len(e.decodeMap); i++ { e.decodeMap[i] = 0xFF } @@ -43,6 +55,13 @@ func NewEncoding(encoder string) *Encoding { return e } +// WithPadding creates a new encoding identical to enc except +// with a specified padding character, or NoPadding to disable padding. +func (enc Encoding) WithPadding(padding rune) *Encoding { + enc.padChar = padding + return &enc +} + // StdEncoding is the standard base64 encoding, as defined in // RFC 4648. var StdEncoding = NewEncoding(encodeStd) @@ -51,12 +70,15 @@ var StdEncoding = NewEncoding(encodeStd) // It is typically used in URLs and file names. var URLEncoding = NewEncoding(encodeURL) -var removeNewlinesMapper = func(r rune) rune { - if r == '\r' || r == '\n' { - return -1 - } - return r -} +// RawStdEncoding is the standard raw, unpadded base64 encoding, +// as defined in RFC 4648 section 3.2. +// This is the same as StdEncoding but omits padding characters. +var RawStdEncoding = StdEncoding.WithPadding(NoPadding) + +// URLEncoding is the unpadded alternate base64 encoding defined in RFC 4648. +// It is typically used in URLs and file names. +// This is the same as URLEncoding but omits padding characters. +var RawURLEncoding = URLEncoding.WithPadding(NoPadding) /* * Encoder @@ -73,42 +95,45 @@ func (enc *Encoding) Encode(dst, src []byte) { return } - for len(src) > 0 { - var b0, b1, b2, b3 byte + di, si := 0, 0 + n := (len(src) / 3) * 3 + for si < n { + // Convert 3x 8bit source bytes into 4 bytes + val := uint(src[si+0])<<16 | uint(src[si+1])<<8 | uint(src[si+2]) - // Unpack 4x 6-bit source blocks into a 4 byte - // destination quantum - switch len(src) { - default: - b3 = src[2] & 0x3F - b2 = src[2] >> 6 - fallthrough - case 2: - b2 |= (src[1] << 2) & 0x3F - b1 = src[1] >> 4 - fallthrough - case 1: - b1 |= (src[0] << 4) & 0x3F - b0 = src[0] >> 2 - } + dst[di+0] = enc.encode[val>>18&0x3F] + dst[di+1] = enc.encode[val>>12&0x3F] + dst[di+2] = enc.encode[val>>6&0x3F] + dst[di+3] = enc.encode[val&0x3F] - // Encode 6-bit blocks using the base64 alphabet - dst[0] = enc.encode[b0] - dst[1] = enc.encode[b1] - dst[2] = enc.encode[b2] - dst[3] = enc.encode[b3] - - // Pad the final quantum - if len(src) < 3 { - dst[3] = '=' - if len(src) < 2 { - dst[2] = '=' - } - break - } + si += 3 + di += 4 + } + + remain := len(src) - si + if remain == 0 { + return + } + // Add the remaining small block + val := uint(src[si+0]) << 16 + if remain == 2 { + val |= uint(src[si+1]) << 8 + } + + dst[di+0] = enc.encode[val>>18&0x3F] + dst[di+1] = enc.encode[val>>12&0x3F] - src = src[3:] - dst = dst[4:] + switch remain { + case 2: + dst[di+2] = enc.encode[val>>6&0x3F] + if enc.padChar != NoPadding { + dst[di+3] = byte(enc.padChar) + } + case 1: + if enc.padChar != NoPadding { + dst[di+2] = byte(enc.padChar) + dst[di+3] = byte(enc.padChar) + } } } @@ -145,8 +170,8 @@ func (e *encoder) Write(p []byte) (n int, err error) { if e.nbuf < 3 { return } - e.enc.Encode(e.out[0:], e.buf[0:]) - if _, e.err = e.w.Write(e.out[0:4]); e.err != nil { + e.enc.Encode(e.out[:], e.buf[:]) + if _, e.err = e.w.Write(e.out[:4]); e.err != nil { return n, e.err } e.nbuf = 0 @@ -159,7 +184,7 @@ func (e *encoder) Write(p []byte) (n int, err error) { nn = len(p) nn -= nn % 3 } - e.enc.Encode(e.out[0:], p[0:nn]) + e.enc.Encode(e.out[:], p[:nn]) if _, e.err = e.w.Write(e.out[0 : nn/3*4]); e.err != nil { return n, e.err } @@ -181,9 +206,9 @@ func (e *encoder) Write(p []byte) (n int, err error) { func (e *encoder) Close() error { // If there's anything left in the buffer, flush it out if e.err == nil && e.nbuf > 0 { - e.enc.Encode(e.out[0:], e.buf[0:e.nbuf]) + e.enc.Encode(e.out[:], e.buf[:e.nbuf]) + _, e.err = e.w.Write(e.out[:e.enc.EncodedLen(e.nbuf)]) e.nbuf = 0 - _, e.err = e.w.Write(e.out[0:4]) } return e.err } @@ -199,7 +224,12 @@ func NewEncoder(enc *Encoding, w io.Writer) io.WriteCloser { // EncodedLen returns the length in bytes of the base64 encoding // of an input buffer of length n. -func (enc *Encoding) EncodedLen(n int) int { return (n + 2) / 3 * 4 } +func (enc *Encoding) EncodedLen(n int) int { + if enc.padChar == NoPadding { + return (n*8 + 5) / 6 // minimum # chars at 6 bits per char + } + return (n + 2) / 3 * 4 // minimum # 4-char quanta, 3 bytes each +} /* * Decoder @@ -212,66 +242,86 @@ func (e CorruptInputError) Error() string { } // decode is like Decode but returns an additional 'end' value, which -// indicates if end-of-message padding was encountered and thus any -// additional data is an error. This method assumes that src has been -// stripped of all supported whitespace ('\r' and '\n'). +// indicates if end-of-message padding or a partial quantum was encountered +// and thus any additional data is an error. func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) { - olen := len(src) - for len(src) > 0 && !end { + si := 0 + + // skip over newlines + for si < len(src) && (src[si] == '\n' || src[si] == '\r') { + si++ + } + + for si < len(src) && !end { // Decode quantum using the base64 alphabet var dbuf [4]byte - dlen := 4 + dinc, dlen := 3, 4 for j := range dbuf { - if len(src) == 0 { - return n, false, CorruptInputError(olen - len(src) - j) + if len(src) == si { + if enc.padChar != NoPadding || j < 2 { + return n, false, CorruptInputError(si - j) + } + dinc, dlen, end = j-1, j, true + break } - in := src[0] - src = src[1:] - if in == '=' { + in := src[si] + + si++ + // skip over newlines + for si < len(src) && (src[si] == '\n' || src[si] == '\r') { + si++ + } + + if rune(in) == enc.padChar { // We've reached the end and there's padding switch j { case 0, 1: // incorrect padding - return n, false, CorruptInputError(olen - len(src) - 1) + return n, false, CorruptInputError(si - 1) case 2: // "==" is expected, the first "=" is already consumed. - if len(src) == 0 { + if si == len(src) { // not enough padding - return n, false, CorruptInputError(olen) + return n, false, CorruptInputError(len(src)) } - if src[0] != '=' { + if rune(src[si]) != enc.padChar { // incorrect padding - return n, false, CorruptInputError(olen - len(src) - 1) + return n, false, CorruptInputError(si - 1) + } + + si++ + // skip over newlines + for si < len(src) && (src[si] == '\n' || src[si] == '\r') { + si++ } - src = src[1:] } - if len(src) > 0 { + if si < len(src) { // trailing garbage - err = CorruptInputError(olen - len(src)) + err = CorruptInputError(si) } - dlen, end = j, true + dinc, dlen, end = 3, j, true break } dbuf[j] = enc.decodeMap[in] if dbuf[j] == 0xFF { - return n, false, CorruptInputError(olen - len(src) - 1) + return n, false, CorruptInputError(si - 1) } } - // Pack 4x 6-bit source blocks into 3 byte destination - // quantum + // Convert 4x 6bit source bytes into 3 bytes + val := uint(dbuf[0])<<18 | uint(dbuf[1])<<12 | uint(dbuf[2])<<6 | uint(dbuf[3]) switch dlen { case 4: - dst[2] = dbuf[2]<<6 | dbuf[3] + dst[2] = byte(val >> 0) fallthrough case 3: - dst[1] = dbuf[1]<<4 | dbuf[2]>>2 + dst[1] = byte(val >> 8) fallthrough case 2: - dst[0] = dbuf[0]<<2 | dbuf[1]>>4 + dst[0] = byte(val >> 16) } - dst = dst[3:] + dst = dst[dinc:] n += dlen - 1 } @@ -284,14 +334,12 @@ func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) { // number of bytes successfully written and CorruptInputError. // New line characters (\r and \n) are ignored. func (enc *Encoding) Decode(dst, src []byte) (n int, err error) { - src = bytes.Map(removeNewlinesMapper, src) n, _, err = enc.decode(dst, src) return } // DecodeString returns the bytes represented by the base64 string s. func (enc *Encoding) DecodeString(s string) ([]byte, error) { - s = strings.Map(removeNewlinesMapper, s) dbuf := make([]byte, enc.DecodedLen(len(s))) n, _, err := enc.decode(dbuf, []byte(s)) return dbuf[:n], err @@ -320,6 +368,8 @@ func (d *decoder) Read(p []byte) (n int, err error) { return n, nil } + // This code assumes that d.r strips supported whitespace ('\r' and '\n'). + // Read a chunk. nn := len(p) / 3 * 4 if nn < 4 { @@ -338,12 +388,12 @@ func (d *decoder) Read(p []byte) (n int, err error) { nr := d.nbuf / 4 * 4 nw := d.nbuf / 4 * 3 if nw > len(p) { - nw, d.end, d.err = d.enc.decode(d.outbuf[0:], d.buf[0:nr]) - d.out = d.outbuf[0:nw] + nw, d.end, d.err = d.enc.decode(d.outbuf[:], d.buf[:nr]) + d.out = d.outbuf[:nw] n = copy(p, d.out) d.out = d.out[n:] } else { - n, d.end, d.err = d.enc.decode(p, d.buf[0:nr]) + n, d.end, d.err = d.enc.decode(p, d.buf[:nr]) } d.nbuf -= nr for i := 0; i < d.nbuf; i++ { @@ -364,7 +414,7 @@ func (r *newlineFilteringReader) Read(p []byte) (int, error) { n, err := r.wrapped.Read(p) for n > 0 { offset := 0 - for i, b := range p[0:n] { + for i, b := range p[:n] { if b != '\r' && b != '\n' { if i != offset { p[offset] = b @@ -388,4 +438,11 @@ func NewDecoder(enc *Encoding, r io.Reader) io.Reader { // DecodedLen returns the maximum length in bytes of the decoded data // corresponding to n bytes of base64-encoded data. -func (enc *Encoding) DecodedLen(n int) int { return n / 4 * 3 } +func (enc *Encoding) DecodedLen(n int) int { + if enc.padChar == NoPadding { + // Unpadded data may end with partial block of 2-3 characters. + return (n*6 + 7) / 8 + } + // Padded base64 should always be a multiple of 4 characters in length. + return n / 4 * 3 +} diff --git a/libgo/go/encoding/base64/base64_test.go b/libgo/go/encoding/base64/base64_test.go index 7d199bf..d144b96 100644 --- a/libgo/go/encoding/base64/base64_test.go +++ b/libgo/go/encoding/base64/base64_test.go @@ -45,6 +45,48 @@ var pairs = []testpair{ {"sure.", "c3VyZS4="}, } +// Do nothing to a reference base64 string (leave in standard format) +func stdRef(ref string) string { + return ref +} + +// Convert a reference string to URL-encoding +func urlRef(ref string) string { + ref = strings.Replace(ref, "+", "-", -1) + ref = strings.Replace(ref, "/", "_", -1) + return ref +} + +// Convert a reference string to raw, unpadded format +func rawRef(ref string) string { + return strings.TrimRight(ref, "=") +} + +// Both URL and unpadding conversions +func rawUrlRef(ref string) string { + return rawRef(urlRef(ref)) +} + +// A nonstandard encoding with a funny padding character, for testing +var funnyEncoding = NewEncoding(encodeStd).WithPadding(rune('@')) + +func funnyRef(ref string) string { + return strings.Replace(ref, "=", "@", -1) +} + +type encodingTest struct { + enc *Encoding // Encoding to test + conv func(string) string // Reference string converter +} + +var encodingTests = []encodingTest{ + encodingTest{StdEncoding, stdRef}, + encodingTest{URLEncoding, urlRef}, + encodingTest{RawStdEncoding, rawRef}, + encodingTest{RawURLEncoding, rawUrlRef}, + encodingTest{funnyEncoding, funnyRef}, +} + var bigtest = testpair{ "Twas brillig, and the slithy toves", "VHdhcyBicmlsbGlnLCBhbmQgdGhlIHNsaXRoeSB0b3Zlcw==", @@ -60,8 +102,11 @@ func testEqual(t *testing.T, msg string, args ...interface{}) bool { func TestEncode(t *testing.T) { for _, p := range pairs { - got := StdEncoding.EncodeToString([]byte(p.decoded)) - testEqual(t, "Encode(%q) = %q, want %q", p.decoded, got, p.encoded) + for _, tt := range encodingTests { + got := tt.enc.EncodeToString([]byte(p.decoded)) + testEqual(t, "Encode(%q) = %q, want %q", p.decoded, + got, tt.conv(p.encoded)) + } } } @@ -97,18 +142,21 @@ func TestEncoderBuffering(t *testing.T) { func TestDecode(t *testing.T) { for _, p := range pairs { - dbuf := make([]byte, StdEncoding.DecodedLen(len(p.encoded))) - count, end, err := StdEncoding.decode(dbuf, []byte(p.encoded)) - testEqual(t, "Decode(%q) = error %v, want %v", p.encoded, err, error(nil)) - testEqual(t, "Decode(%q) = length %v, want %v", p.encoded, count, len(p.decoded)) - if len(p.encoded) > 0 { - testEqual(t, "Decode(%q) = end %v, want %v", p.encoded, end, (p.encoded[len(p.encoded)-1] == '=')) - } - testEqual(t, "Decode(%q) = %q, want %q", p.encoded, string(dbuf[0:count]), p.decoded) + for _, tt := range encodingTests { + encoded := tt.conv(p.encoded) + dbuf := make([]byte, tt.enc.DecodedLen(len(encoded))) + count, end, err := tt.enc.decode(dbuf, []byte(encoded)) + testEqual(t, "Decode(%q) = error %v, want %v", encoded, err, error(nil)) + testEqual(t, "Decode(%q) = length %v, want %v", encoded, count, len(p.decoded)) + if len(encoded) > 0 { + testEqual(t, "Decode(%q) = end %v, want %v", encoded, end, len(p.decoded)%3 != 0) + } + testEqual(t, "Decode(%q) = %q, want %q", encoded, string(dbuf[0:count]), p.decoded) - dbuf, err = StdEncoding.DecodeString(p.encoded) - testEqual(t, "DecodeString(%q) = error %v, want %v", p.encoded, err, error(nil)) - testEqual(t, "DecodeString(%q) = %q, want %q", string(dbuf), p.decoded) + dbuf, err = tt.enc.DecodeString(encoded) + testEqual(t, "DecodeString(%q) = error %v, want %v", encoded, err, error(nil)) + testEqual(t, "DecodeString(%q) = %q, want %q", string(dbuf), p.decoded) + } } } diff --git a/libgo/go/encoding/binary/binary.go b/libgo/go/encoding/binary/binary.go index 466bf97..2bbe07c 100644 --- a/libgo/go/encoding/binary/binary.go +++ b/libgo/go/encoding/binary/binary.go @@ -13,7 +13,7 @@ // The varint functions encode and decode single integer values using // a variable-length encoding; smaller values require fewer bytes. // For a specification, see -// http://code.google.com/apis/protocolbuffers/docs/encoding.html. +// https://developers.google.com/protocol-buffers/docs/encoding. // // This package favors simplicity over efficiency. Clients that require // high-performance serialization, especially for large data structures, @@ -239,78 +239,62 @@ func Write(w io.Writer, order ByteOrder, data interface{}) error { } switch v := data.(type) { case *int8: - bs = b[:1] b[0] = byte(*v) case int8: - bs = b[:1] b[0] = byte(v) case []int8: for i, x := range v { bs[i] = byte(x) } case *uint8: - bs = b[:1] b[0] = *v case uint8: - bs = b[:1] b[0] = byte(v) case []uint8: bs = v case *int16: - bs = b[:2] order.PutUint16(bs, uint16(*v)) case int16: - bs = b[:2] order.PutUint16(bs, uint16(v)) case []int16: for i, x := range v { order.PutUint16(bs[2*i:], uint16(x)) } case *uint16: - bs = b[:2] order.PutUint16(bs, *v) case uint16: - bs = b[:2] order.PutUint16(bs, v) case []uint16: for i, x := range v { order.PutUint16(bs[2*i:], x) } case *int32: - bs = b[:4] order.PutUint32(bs, uint32(*v)) case int32: - bs = b[:4] order.PutUint32(bs, uint32(v)) case []int32: for i, x := range v { order.PutUint32(bs[4*i:], uint32(x)) } case *uint32: - bs = b[:4] order.PutUint32(bs, *v) case uint32: - bs = b[:4] order.PutUint32(bs, v) case []uint32: for i, x := range v { order.PutUint32(bs[4*i:], x) } case *int64: - bs = b[:8] order.PutUint64(bs, uint64(*v)) case int64: - bs = b[:8] order.PutUint64(bs, uint64(v)) case []int64: for i, x := range v { order.PutUint64(bs[8*i:], uint64(x)) } case *uint64: - bs = b[:8] order.PutUint64(bs, *v) case uint64: - bs = b[:8] order.PutUint64(bs, v) case []uint64: for i, x := range v { @@ -605,25 +589,25 @@ func (e *encoder) skip(v reflect.Value) { // It returns zero if the type cannot be implemented by the fast path in Read or Write. func intDataSize(data interface{}) int { switch data := data.(type) { - case int8, *int8, *uint8: + case int8, uint8, *int8, *uint8: return 1 case []int8: return len(data) case []uint8: return len(data) - case int16, *int16, *uint16: + case int16, uint16, *int16, *uint16: return 2 case []int16: return 2 * len(data) case []uint16: return 2 * len(data) - case int32, *int32, *uint32: + case int32, uint32, *int32, *uint32: return 4 case []int32: return 4 * len(data) case []uint32: return 4 * len(data) - case int64, *int64, *uint64: + case int64, uint64, *int64, *uint64: return 8 case []int64: return 8 * len(data) diff --git a/libgo/go/encoding/csv/example_test.go b/libgo/go/encoding/csv/example_test.go new file mode 100644 index 0000000..e3c3bd5 --- /dev/null +++ b/libgo/go/encoding/csv/example_test.go @@ -0,0 +1,133 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +package csv_test + +import ( + "encoding/csv" + "fmt" + "io" + "log" + "os" + "strings" +) + +func ExampleReader() { + in := `first_name,last_name,username +"Rob","Pike",rob +Ken,Thompson,ken +"Robert","Griesemer","gri" +` + r := csv.NewReader(strings.NewReader(in)) + + for { + record, err := r.Read() + if err == io.EOF { + break + } + if err != nil { + log.Fatal(err) + } + + fmt.Println(record) + } + // Output: + // [first_name last_name username] + // [Rob Pike rob] + // [Ken Thompson ken] + // [Robert Griesemer gri] +} + +// This example shows how csv.Reader can be configured to handle other +// types of CSV files. +func ExampleReader_options() { + in := `first_name;last_name;username +"Rob";"Pike";rob +# lines beginning with a # character are ignored +Ken;Thompson;ken +"Robert";"Griesemer";"gri" +` + r := csv.NewReader(strings.NewReader(in)) + r.Comma = ';' + r.Comment = '#' + + records, err := r.ReadAll() + if err != nil { + log.Fatal(err) + } + + fmt.Print(records) + // Output: + // [[first_name last_name username] [Rob Pike rob] [Ken Thompson ken] [Robert Griesemer gri]] +} + +func ExampleReader_ReadAll() { + in := `first_name,last_name,username +"Rob","Pike",rob +Ken,Thompson,ken +"Robert","Griesemer","gri" +` + r := csv.NewReader(strings.NewReader(in)) + + records, err := r.ReadAll() + if err != nil { + log.Fatal(err) + } + + fmt.Print(records) + // Output: + // [[first_name last_name username] [Rob Pike rob] [Ken Thompson ken] [Robert Griesemer gri]] +} + +func ExampleWriter() { + records := [][]string{ + {"first_name", "last_name", "username"}, + {"Rob", "Pike", "rob"}, + {"Ken", "Thompson", "ken"}, + {"Robert", "Griesemer", "gri"}, + } + + w := csv.NewWriter(os.Stdout) + + for _, record := range records { + if err := w.Write(record); err != nil { + log.Fatalln("error writing record to csv:", err) + } + } + + // Write any buffered data to the underlying writer (standard output). + w.Flush() + + if err := w.Error(); err != nil { + log.Fatal(err) + } + // Output: + // first_name,last_name,username + // Rob,Pike,rob + // Ken,Thompson,ken + // Robert,Griesemer,gri +} + +func ExampleWriter_WriteAll() { + records := [][]string{ + {"first_name", "last_name", "username"}, + {"Rob", "Pike", "rob"}, + {"Ken", "Thompson", "ken"}, + {"Robert", "Griesemer", "gri"}, + } + + w := csv.NewWriter(os.Stdout) + w.WriteAll(records) // calls Flush internally + + if err := w.Error(); err != nil { + log.Fatalln("error writing csv:", err) + } + // Output: + // first_name,last_name,username + // Rob,Pike,rob + // Ken,Thompson,ken + // Robert,Griesemer,gri +} diff --git a/libgo/go/encoding/csv/reader.go b/libgo/go/encoding/csv/reader.go index d943295..37bf80c 100644 --- a/libgo/go/encoding/csv/reader.go +++ b/libgo/go/encoding/csv/reader.go @@ -215,7 +215,7 @@ func (r *Reader) parseRecord() (fields []string, err error) { r.column = -1 // Peek at the first rune. If it is an error we are done. - // If we are support comments and it is the comment character + // If we support comments and it is the comment character // then skip to the end of line. r1, _, err := r.r.ReadRune() @@ -232,6 +232,11 @@ func (r *Reader) parseRecord() (fields []string, err error) { for { haveField, delim, err := r.parseField() if haveField { + // If FieldsPerRecord is greater then 0 we can assume the final + // length of fields to be equal to FieldsPerRecord. + if r.FieldsPerRecord > 0 && fields == nil { + fields = make([]string, 0, r.FieldsPerRecord) + } fields = append(fields, r.field.String()) } if delim == '\n' || err == io.EOF { diff --git a/libgo/go/encoding/csv/reader_test.go b/libgo/go/encoding/csv/reader_test.go index 123df06..be1002d 100644 --- a/libgo/go/encoding/csv/reader_test.go +++ b/libgo/go/encoding/csv/reader_test.go @@ -87,6 +87,15 @@ field"`, }, }, { + Name: "BlankLineFieldCount", + Input: "a,b,c\n\nd,e,f\n\n", + UseFieldsPerRecord: true, + Output: [][]string{ + {"a", "b", "c"}, + {"d", "e", "f"}, + }, + }, + { Name: "TrimSpace", Input: " a, b, c\n", TrimLeadingSpace: true, @@ -282,3 +291,25 @@ func TestRead(t *testing.T) { } } } + +func BenchmarkRead(b *testing.B) { + data := `x,y,z,w +x,y,z, +x,y,, +x,,, +,,, +"x","y","z","w" +"x","y","z","" +"x","y","","" +"x","","","" +"","","","" +` + + for i := 0; i < b.N; i++ { + _, err := NewReader(strings.NewReader(data)).ReadAll() + + if err != nil { + b.Fatalf("could not read data: %s", err) + } + } +} diff --git a/libgo/go/encoding/csv/writer.go b/libgo/go/encoding/csv/writer.go index 17e7bb7..353d91f 100644 --- a/libgo/go/encoding/csv/writer.go +++ b/libgo/go/encoding/csv/writer.go @@ -114,7 +114,7 @@ func (w *Writer) WriteAll(records [][]string) (err error) { return w.w.Flush() } -// fieldNeedsQuotes returns true if our field must be enclosed in quotes. +// fieldNeedsQuotes reports whether our field must be enclosed in quotes. // Fields with a Comma, fields with a quote or newline, and // fields which start with a space must be enclosed in quotes. // We used to quote empty strings, but we do not anymore (as of Go 1.4). @@ -125,7 +125,7 @@ func (w *Writer) WriteAll(records [][]string) (err error) { // CSV with quoted empty strings strictly less useful. // Not quoting the empty string also makes this package match the behavior // of Microsoft Excel and Google Drive. -// For Postgres, quote the data termating string `\.`. +// For Postgres, quote the data terminating string `\.`. func (w *Writer) fieldNeedsQuotes(field string) bool { if field == "" { return false diff --git a/libgo/go/encoding/gob/codec_test.go b/libgo/go/encoding/gob/codec_test.go index 56a7298..c2583bf 100644 --- a/libgo/go/encoding/gob/codec_test.go +++ b/libgo/go/encoding/gob/codec_test.go @@ -1473,3 +1473,22 @@ func TestFuzzOneByte(t *testing.T) { } } } + +// Don't crash, just give error with invalid type id. +// Issue 9649. +func TestErrorInvalidTypeId(t *testing.T) { + data := []byte{0x01, 0x00, 0x01, 0x00} + d := NewDecoder(bytes.NewReader(data)) + // When running d.Decode(&foo) the first time the decoder stops + // after []byte{0x01, 0x00} and reports an errBadType. Running + // d.Decode(&foo) again on exactly the same input sequence should + // give another errBadType, but instead caused a panic because + // decoderMap wasn't cleaned up properly after the first error. + for i := 0; i < 2; i++ { + var foo struct{} + err := d.Decode(&foo) + if err != errBadType { + t.Fatal("decode: expected %s, got %s", errBadType, err) + } + } +} diff --git a/libgo/go/encoding/gob/decode.go b/libgo/go/encoding/gob/decode.go index a5bef93..e913f15 100644 --- a/libgo/go/encoding/gob/decode.go +++ b/libgo/go/encoding/gob/decode.go @@ -182,6 +182,17 @@ func (state *decoderState) decodeInt() int64 { return int64(x >> 1) } +// getLength decodes the next uint and makes sure it is a possible +// size for a data item that follows, which means it must fit in a +// non-negative int and fit in the buffer. +func (state *decoderState) getLength() (int, bool) { + n := int(state.decodeUint()) + if n < 0 || state.b.Len() < n || tooBig <= n { + return 0, false + } + return n, true +} + // decOp is the signature of a decoding operator for a given type. type decOp func(i *decInstr, state *decoderState, v reflect.Value) @@ -363,16 +374,9 @@ func decComplex128(i *decInstr, state *decoderState, value reflect.Value) { // describing the data. // uint8 slices are encoded as an unsigned count followed by the raw bytes. func decUint8Slice(i *decInstr, state *decoderState, value reflect.Value) { - u := state.decodeUint() - n := int(u) - if n < 0 || uint64(n) != u { - errorf("length of %s exceeds input size (%d bytes)", value.Type(), u) - } - if n > state.b.Len() { - errorf("%s data too long for buffer: %d", value.Type(), n) - } - if n > tooBig { - errorf("byte slice too big: %d", n) + n, ok := state.getLength() + if !ok { + errorf("bad %s slice length: %d", value.Type(), n) } if value.Cap() < n { value.Set(reflect.MakeSlice(value.Type(), n, n)) @@ -388,13 +392,9 @@ func decUint8Slice(i *decInstr, state *decoderState, value reflect.Value) { // describing the data. // Strings are encoded as an unsigned count followed by the raw bytes. func decString(i *decInstr, state *decoderState, value reflect.Value) { - u := state.decodeUint() - n := int(u) - if n < 0 || uint64(n) != u || n > state.b.Len() { - errorf("length of %s exceeds input size (%d bytes)", value.Type(), u) - } - if n > state.b.Len() { - errorf("%s data too long for buffer: %d", value.Type(), n) + n, ok := state.getLength() + if !ok { + errorf("bad %s slice length: %d", value.Type(), n) } // Read the data. data := make([]byte, n) @@ -406,7 +406,11 @@ func decString(i *decInstr, state *decoderState, value reflect.Value) { // ignoreUint8Array skips over the data for a byte slice value with no destination. func ignoreUint8Array(i *decInstr, state *decoderState, value reflect.Value) { - b := make([]byte, state.decodeUint()) + n, ok := state.getLength() + if !ok { + errorf("slice length too large") + } + b := make([]byte, n) state.b.Read(b) } @@ -571,6 +575,9 @@ func (dec *Decoder) decodeMap(mtyp reflect.Type, state *decoderState, value refl func (dec *Decoder) ignoreArrayHelper(state *decoderState, elemOp decOp, length int) { instr := &decInstr{elemOp, 0, nil, errors.New("no error")} for i := 0; i < length; i++ { + if state.b.Len() == 0 { + errorf("decoding array or slice: length exceeds input size (%d elements)", length) + } elemOp(instr, state, noValue) } } @@ -678,7 +685,11 @@ func (dec *Decoder) decodeInterface(ityp reflect.Type, state *decoderState, valu // ignoreInterface discards the data for an interface value with no destination. func (dec *Decoder) ignoreInterface(state *decoderState) { // Read the name of the concrete type. - b := make([]byte, state.decodeUint()) + n, ok := state.getLength() + if !ok { + errorf("bad interface encoding: name too large for buffer") + } + b := make([]byte, n) _, err := state.b.Read(b) if err != nil { error_(err) @@ -688,14 +699,22 @@ func (dec *Decoder) ignoreInterface(state *decoderState) { error_(dec.err) } // At this point, the decoder buffer contains a delimited value. Just toss it. - state.b.Drop(int(state.decodeUint())) + n, ok = state.getLength() + if !ok { + errorf("bad interface encoding: data length too large for buffer") + } + state.b.Drop(n) } // decodeGobDecoder decodes something implementing the GobDecoder interface. // The data is encoded as a byte slice. func (dec *Decoder) decodeGobDecoder(ut *userTypeInfo, state *decoderState, value reflect.Value) { // Read the bytes for the value. - b := make([]byte, state.decodeUint()) + n, ok := state.getLength() + if !ok { + errorf("GobDecoder: length too large for buffer") + } + b := make([]byte, n) _, err := state.b.Read(b) if err != nil { error_(err) @@ -717,7 +736,11 @@ func (dec *Decoder) decodeGobDecoder(ut *userTypeInfo, state *decoderState, valu // ignoreGobDecoder discards the data for a GobDecoder value with no destination. func (dec *Decoder) ignoreGobDecoder(state *decoderState) { // Read the bytes for the value. - b := make([]byte, state.decodeUint()) + n, ok := state.getLength() + if !ok { + errorf("GobDecoder: length too large for buffer") + } + b := make([]byte, n) _, err := state.b.Read(b) if err != nil { error_(err) @@ -840,16 +863,22 @@ func (dec *Decoder) decOpFor(wireId typeId, rt reflect.Type, name string, inProg } // decIgnoreOpFor returns the decoding op for a field that has no destination. -func (dec *Decoder) decIgnoreOpFor(wireId typeId) decOp { +func (dec *Decoder) decIgnoreOpFor(wireId typeId, inProgress map[typeId]*decOp) *decOp { + // If this type is already in progress, it's a recursive type (e.g. map[string]*T). + // Return the pointer to the op we're already building. + if opPtr := inProgress[wireId]; opPtr != nil { + return opPtr + } op, ok := decIgnoreOpMap[wireId] if !ok { + inProgress[wireId] = &op if wireId == tInterface { // Special case because it's a method: the ignored item might // define types and we need to record their state in the decoder. op = func(i *decInstr, state *decoderState, value reflect.Value) { state.dec.ignoreInterface(state) } - return op + return &op } // Special cases wire := dec.wireType[wireId] @@ -858,25 +887,25 @@ func (dec *Decoder) decIgnoreOpFor(wireId typeId) decOp { errorf("bad data: undefined type %s", wireId.string()) case wire.ArrayT != nil: elemId := wire.ArrayT.Elem - elemOp := dec.decIgnoreOpFor(elemId) + elemOp := dec.decIgnoreOpFor(elemId, inProgress) op = func(i *decInstr, state *decoderState, value reflect.Value) { - state.dec.ignoreArray(state, elemOp, wire.ArrayT.Len) + state.dec.ignoreArray(state, *elemOp, wire.ArrayT.Len) } case wire.MapT != nil: keyId := dec.wireType[wireId].MapT.Key elemId := dec.wireType[wireId].MapT.Elem - keyOp := dec.decIgnoreOpFor(keyId) - elemOp := dec.decIgnoreOpFor(elemId) + keyOp := dec.decIgnoreOpFor(keyId, inProgress) + elemOp := dec.decIgnoreOpFor(elemId, inProgress) op = func(i *decInstr, state *decoderState, value reflect.Value) { - state.dec.ignoreMap(state, keyOp, elemOp) + state.dec.ignoreMap(state, *keyOp, *elemOp) } case wire.SliceT != nil: elemId := wire.SliceT.Elem - elemOp := dec.decIgnoreOpFor(elemId) + elemOp := dec.decIgnoreOpFor(elemId, inProgress) op = func(i *decInstr, state *decoderState, value reflect.Value) { - state.dec.ignoreSlice(state, elemOp) + state.dec.ignoreSlice(state, *elemOp) } case wire.StructT != nil: @@ -899,7 +928,7 @@ func (dec *Decoder) decIgnoreOpFor(wireId typeId) decOp { if op == nil { errorf("bad data: ignore can't handle type %s", wireId.string()) } - return op + return &op } // gobDecodeOpFor returns the op for a type that is known to implement @@ -1033,9 +1062,9 @@ func (dec *Decoder) compileSingle(remoteId typeId, ut *userTypeInfo) (engine *de func (dec *Decoder) compileIgnoreSingle(remoteId typeId) (engine *decEngine, err error) { engine = new(decEngine) engine.instr = make([]decInstr, 1) // one item - op := dec.decIgnoreOpFor(remoteId) + op := dec.decIgnoreOpFor(remoteId, make(map[typeId]*decOp)) ovfl := overflow(dec.typeString(remoteId)) - engine.instr[0] = decInstr{op, 0, nil, ovfl} + engine.instr[0] = decInstr{*op, 0, nil, ovfl} engine.numInstr = 1 return } @@ -1043,6 +1072,7 @@ func (dec *Decoder) compileIgnoreSingle(remoteId typeId) (engine *decEngine, err // compileDec compiles the decoder engine for a value. If the value is not a struct, // it calls out to compileSingle. func (dec *Decoder) compileDec(remoteId typeId, ut *userTypeInfo) (engine *decEngine, err error) { + defer catchError(&err) rt := ut.base srt := rt if srt.Kind() != reflect.Struct || ut.externalDec != 0 { @@ -1077,8 +1107,8 @@ func (dec *Decoder) compileDec(remoteId typeId, ut *userTypeInfo) (engine *decEn localField, present := srt.FieldByName(wireField.Name) // TODO(r): anonymous names if !present || !isExported(wireField.Name) { - op := dec.decIgnoreOpFor(wireField.Id) - engine.instr[fieldnum] = decInstr{op, fieldnum, nil, ovfl} + op := dec.decIgnoreOpFor(wireField.Id, make(map[typeId]*decOp)) + engine.instr[fieldnum] = decInstr{*op, fieldnum, nil, ovfl} continue } if !dec.compatibleType(localField.Type, wireField.Id, make(map[reflect.Type]typeId)) { @@ -1116,7 +1146,7 @@ type emptyStruct struct{} var emptyStructType = reflect.TypeOf(emptyStruct{}) -// getDecEnginePtr returns the engine for the specified type when the value is to be discarded. +// getIgnoreEnginePtr returns the engine for the specified type when the value is to be discarded. func (dec *Decoder) getIgnoreEnginePtr(wireId typeId) (enginePtr **decEngine, err error) { var ok bool if enginePtr, ok = dec.ignorerCache[wireId]; !ok { @@ -1155,8 +1185,9 @@ func (dec *Decoder) decodeValue(wireId typeId, value reflect.Value) { value = decAlloc(value) engine := *enginePtr if st := base; st.Kind() == reflect.Struct && ut.externalDec == 0 { + wt := dec.wireType[wireId] if engine.numInstr == 0 && st.NumField() > 0 && - dec.wireType[wireId] != nil && len(dec.wireType[wireId].StructT.Field) > 0 { + wt != nil && len(wt.StructT.Field) > 0 { name := base.Name() errorf("type mismatch: no fields matched compiling decoder for %s", name) } diff --git a/libgo/go/encoding/gob/doc.go b/libgo/go/encoding/gob/doc.go index d0acaba..4d3d007 100644 --- a/libgo/go/encoding/gob/doc.go +++ b/libgo/go/encoding/gob/doc.go @@ -6,7 +6,7 @@ Package gob manages streams of gobs - binary values exchanged between an Encoder (transmitter) and a Decoder (receiver). A typical use is transporting arguments and results of remote procedure calls (RPCs) such as those provided by -package "rpc". +package "net/rpc". The implementation compiles a custom codec for each data type in the stream and is most efficient when a single Encoder is used to transmit a stream of values, @@ -83,7 +83,7 @@ allocated. Regardless, the length of the resulting slice reports the number of elements decoded. Functions and channels will not be sent in a gob. Attempting to encode such a value -at top the level will fail. A struct field of chan or func type is treated exactly +at the top level will fail. A struct field of chan or func type is treated exactly like an unexported field and is ignored. Gob can encode a value of any type implementing the GobEncoder or @@ -111,11 +111,11 @@ A signed integer, i, is encoded within an unsigned integer, u. Within u, bits 1 upward contain the value; bit 0 says whether they should be complemented upon receipt. The encode algorithm looks like this: - uint u; + var u uint if i < 0 { - u = (^i << 1) | 1 // complement i, bit 0 is 1 + u = (^uint(i) << 1) | 1 // complement i, bit 0 is 1 } else { - u = (i << 1) // do not complement i, bit 0 is 0 + u = (uint(i) << 1) // do not complement i, bit 0 is 0 } encodeUnsigned(u) @@ -137,9 +137,9 @@ All other slices and arrays are sent as an unsigned count followed by that many elements using the standard gob encoding for their type, recursively. Maps are sent as an unsigned count followed by that many key, element -pairs. Empty but non-nil maps are sent, so if the sender has allocated -a map, the receiver will allocate a map even if no elements are -transmitted. +pairs. Empty but non-nil maps are sent, so if the receiver has not allocated +one already, one will always be allocated on receipt unless the transmitted map +is nil and not at the top level. Structs are sent as a sequence of (field number, field value) pairs. The field value is sent using the standard gob encoding for its type, recursively. If a @@ -246,7 +246,7 @@ where * signifies zero or more repetitions and the type id of a value must be predefined or be defined before the value in the stream. See "Gobs of data" for a design discussion of the gob wire format: -http://golang.org/doc/articles/gobs_of_data.html +https://blog.golang.org/gobs-of-data */ package gob diff --git a/libgo/go/encoding/gob/encoder.go b/libgo/go/encoding/gob/encoder.go index a340e47..62d0f42 100644 --- a/libgo/go/encoding/gob/encoder.go +++ b/libgo/go/encoding/gob/encoder.go @@ -5,6 +5,7 @@ package gob import ( + "errors" "io" "reflect" "sync" @@ -65,6 +66,11 @@ func (enc *Encoder) writeMessage(w io.Writer, b *encBuffer) { // it by hand. message := b.Bytes() messageLen := len(message) - maxLength + // Length cannot be bigger than the decoder can handle. + if messageLen >= tooBig { + enc.setError(errors.New("gob: encoder: message too big")) + return + } // Encode the length. enc.countState.b.Reset() enc.countState.encodeUint(uint64(messageLen)) diff --git a/libgo/go/encoding/gob/encoder_test.go b/libgo/go/encoding/gob/encoder_test.go index 0ea4c0e..dc65734 100644 --- a/libgo/go/encoding/gob/encoder_test.go +++ b/libgo/go/encoding/gob/encoder_test.go @@ -6,8 +6,8 @@ package gob import ( "bytes" + "encoding/hex" "fmt" - "io" "reflect" "strings" "testing" @@ -187,24 +187,6 @@ func TestWrongTypeDecoder(t *testing.T) { badTypeCheck(new(ET4), true, "different type of field", t) } -func corruptDataCheck(s string, err error, t *testing.T) { - b := bytes.NewBufferString(s) - dec := NewDecoder(b) - err1 := dec.Decode(new(ET2)) - if err1 != err { - t.Errorf("from %q expected error %s; got %s", s, err, err1) - } -} - -// Check that we survive bad data. -func TestBadData(t *testing.T) { - corruptDataCheck("", io.EOF, t) - corruptDataCheck("\x7Fhi", io.ErrUnexpectedEOF, t) - corruptDataCheck("\x03now is the time for all good men", errBadType, t) - // issue 6323. - corruptDataCheck("\x04\x24foo", errRange, t) -} - // Types not supported at top level by the Encoder. var unsupportedValues = []interface{}{ make(chan int), @@ -545,6 +527,30 @@ func TestDecodeIntoNothing(t *testing.T) { } } +func TestIgnoreRecursiveType(t *testing.T) { + // It's hard to build a self-contained test for this because + // we can't build compatible types in one package with + // different items so something is ignored. Here is + // some data that represents, according to debug.go: + // type definition { + // slice "recursiveSlice" id=106 + // elem id=106 + // } + data := []byte{ + 0x1d, 0xff, 0xd3, 0x02, 0x01, 0x01, 0x0e, 0x72, + 0x65, 0x63, 0x75, 0x72, 0x73, 0x69, 0x76, 0x65, + 0x53, 0x6c, 0x69, 0x63, 0x65, 0x01, 0xff, 0xd4, + 0x00, 0x01, 0xff, 0xd4, 0x00, 0x00, 0x07, 0xff, + 0xd4, 0x00, 0x02, 0x01, 0x00, 0x00, + } + dec := NewDecoder(bytes.NewReader(data)) + // Issue 10415: This caused infinite recursion. + err := dec.Decode(nil) + if err != nil { + t.Fatal(err) + } +} + // Another bug from golang-nuts, involving nested interfaces. type Bug0Outer struct { Bug0Field interface{} @@ -951,6 +957,64 @@ func TestErrorForHugeSlice(t *testing.T) { t.Fatal("decode: no error") } if !strings.Contains(err.Error(), "slice too big") { - t.Fatal("decode: expected slice too big error, got %s", err.Error()) + t.Fatalf("decode: expected slice too big error, got %s", err.Error()) + } +} + +type badDataTest struct { + input string // The input encoded as a hex string. + error string // A substring of the error that should result. + data interface{} // What to decode into. +} + +var badDataTests = []badDataTest{ + {"", "EOF", nil}, + {"7F6869", "unexpected EOF", nil}, + {"036e6f77206973207468652074696d6520666f7220616c6c20676f6f64206d656e", "unknown type id", new(ET2)}, + {"0424666f6f", "field numbers out of bounds", new(ET2)}, // Issue 6323. + {"05100028557b02027f8302", "interface encoding", nil}, // Issue 10270. + // Issue 10273. + {"130a00fb5dad0bf8ff020263e70002fa28020202a89859", "slice length too large", nil}, + {"0f1000fb285d003316020735ff023a65c5", "interface encoding", nil}, + {"03fffb0616fffc00f902ff02ff03bf005d02885802a311a8120228022c028ee7", "GobDecoder", nil}, + // Issue 10491. + {"10fe010f020102fe01100001fe010e000016fe010d030102fe010e00010101015801fe01100000000bfe011000f85555555555555555", "length exceeds input size", nil}, +} + +// TestBadData tests that various problems caused by malformed input +// are caught as errors and do not cause panics. +func TestBadData(t *testing.T) { + for i, test := range badDataTests { + data, err := hex.DecodeString(test.input) + if err != nil { + t.Fatalf("#%d: hex error: %s", i, err) + } + d := NewDecoder(bytes.NewReader(data)) + err = d.Decode(test.data) + if err == nil { + t.Errorf("decode: no error") + continue + } + if !strings.Contains(err.Error(), test.error) { + t.Errorf("#%d: decode: expected %q error, got %s", i, test.error, err.Error()) + } + } +} + +// TestHugeWriteFails tests that enormous messages trigger an error. +func TestHugeWriteFails(t *testing.T) { + if testing.Short() { + // Requires allocating a monster, so don't do this from all.bash. + t.Skip("skipping huge allocation in short mode") + } + huge := make([]byte, tooBig) + huge[0] = 7 // Make sure it's not all zeros. + buf := new(bytes.Buffer) + err := NewEncoder(buf).Encode(huge) + if err == nil { + t.Fatalf("expected error for huge slice") + } + if !strings.Contains(err.Error(), "message too big") { + t.Fatalf("expected 'too big' error; got %s\n", err.Error()) } } diff --git a/libgo/go/encoding/json/bench_test.go b/libgo/go/encoding/json/bench_test.go index 29dbc26..ed89d11 100644 --- a/libgo/go/encoding/json/bench_test.go +++ b/libgo/go/encoding/json/bench_test.go @@ -15,6 +15,7 @@ import ( "compress/gzip" "io/ioutil" "os" + "strings" "testing" ) @@ -126,6 +127,28 @@ func BenchmarkCodeDecoder(b *testing.B) { b.SetBytes(int64(len(codeJSON))) } +func BenchmarkDecoderStream(b *testing.B) { + b.StopTimer() + var buf bytes.Buffer + dec := NewDecoder(&buf) + buf.WriteString(`"` + strings.Repeat("x", 1000000) + `"` + "\n\n\n") + var x interface{} + if err := dec.Decode(&x); err != nil { + b.Fatal("Decode:", err) + } + ones := strings.Repeat(" 1\n", 300000) + "\n\n\n" + b.StartTimer() + for i := 0; i < b.N; i++ { + if i%300000 == 0 { + buf.WriteString(ones) + } + x = nil + if err := dec.Decode(&x); err != nil || x != 1.0 { + b.Fatalf("Decode: %v after %d", err, i) + } + } +} + func BenchmarkCodeUnmarshal(b *testing.B) { if codeJSON == nil { b.StopTimer() @@ -187,3 +210,14 @@ func BenchmarkUnmarshalInt64(b *testing.B) { } } } + +func BenchmarkIssue10335(b *testing.B) { + b.ReportAllocs() + var s struct{} + j := []byte(`{"a":{ }}`) + for n := 0; n < b.N; n++ { + if err := Unmarshal(j, &s); err != nil { + b.Fatal(err) + } + } +} diff --git a/libgo/go/encoding/json/decode.go b/libgo/go/encoding/json/decode.go index 705bc2e..530e852 100644 --- a/libgo/go/encoding/json/decode.go +++ b/libgo/go/encoding/json/decode.go @@ -48,6 +48,13 @@ import ( // map[string]interface{}, for JSON objects // nil for JSON null // +// To unmarshal a JSON array into a slice, Unmarshal resets the slice to nil +// and then appends each element to the slice. +// +// To unmarshal a JSON object into a map, Unmarshal replaces the map +// with an empty map and then adds key-value pairs from the object to +// the map. +// // If a JSON value is not appropriate for a given target type, // or if a JSON number overflows the target type, Unmarshal // skips that field and completes the unmarshalling as best it can. @@ -90,8 +97,9 @@ type Unmarshaler interface { // An UnmarshalTypeError describes a JSON value that was // not appropriate for a value of a specific Go type. type UnmarshalTypeError struct { - Value string // description of JSON value - "bool", "array", "number -5" - Type reflect.Type // type of Go value it could not be assigned to + Value string // description of JSON value - "bool", "array", "number -5" + Type reflect.Type // type of Go value it could not be assigned to + Offset int64 // error occurred after reading Offset bytes } func (e *UnmarshalTypeError) Error() string { @@ -377,7 +385,7 @@ func (d *decodeState) array(v reflect.Value) { return } if ut != nil { - d.saveError(&UnmarshalTypeError{"array", v.Type()}) + d.saveError(&UnmarshalTypeError{"array", v.Type(), int64(d.off)}) d.off-- d.next() return @@ -396,7 +404,7 @@ func (d *decodeState) array(v reflect.Value) { // Otherwise it's invalid. fallthrough default: - d.saveError(&UnmarshalTypeError{"array", v.Type()}) + d.saveError(&UnmarshalTypeError{"array", v.Type(), int64(d.off)}) d.off-- d.next() return @@ -485,7 +493,7 @@ func (d *decodeState) object(v reflect.Value) { return } if ut != nil { - d.saveError(&UnmarshalTypeError{"object", v.Type()}) + d.saveError(&UnmarshalTypeError{"object", v.Type(), int64(d.off)}) d.off-- d.next() // skip over { } in input return @@ -504,7 +512,7 @@ func (d *decodeState) object(v reflect.Value) { // map must have string kind t := v.Type() if t.Key().Kind() != reflect.String { - d.saveError(&UnmarshalTypeError{"object", v.Type()}) + d.saveError(&UnmarshalTypeError{"object", v.Type(), int64(d.off)}) d.off-- d.next() // skip over { } in input return @@ -515,7 +523,7 @@ func (d *decodeState) object(v reflect.Value) { case reflect.Struct: default: - d.saveError(&UnmarshalTypeError{"object", v.Type()}) + d.saveError(&UnmarshalTypeError{"object", v.Type(), int64(d.off)}) d.off-- d.next() // skip over { } in input return @@ -599,7 +607,7 @@ func (d *decodeState) object(v reflect.Value) { case string: d.literalStore([]byte(qv), subv, true) default: - d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal unquoted value into %v", item, v.Type())) + d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal unquoted value into %v", subv.Type())) } } else { d.value(subv) @@ -646,7 +654,7 @@ func (d *decodeState) convertNumber(s string) (interface{}, error) { } f, err := strconv.ParseFloat(s, 64) if err != nil { - return nil, &UnmarshalTypeError{"number " + s, reflect.TypeOf(0.0)} + return nil, &UnmarshalTypeError{"number " + s, reflect.TypeOf(0.0), int64(d.off)} } return f, nil } @@ -679,8 +687,9 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool if fromQuoted { d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) } else { - d.saveError(&UnmarshalTypeError{"string", v.Type()}) + d.saveError(&UnmarshalTypeError{"string", v.Type(), int64(d.off)}) } + return } s, ok := unquoteBytes(item) if !ok { @@ -713,7 +722,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool if fromQuoted { d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) } else { - d.saveError(&UnmarshalTypeError{"bool", v.Type()}) + d.saveError(&UnmarshalTypeError{"bool", v.Type(), int64(d.off)}) } case reflect.Bool: v.SetBool(value) @@ -721,7 +730,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool if v.NumMethod() == 0 { v.Set(reflect.ValueOf(value)) } else { - d.saveError(&UnmarshalTypeError{"bool", v.Type()}) + d.saveError(&UnmarshalTypeError{"bool", v.Type(), int64(d.off)}) } } @@ -736,10 +745,10 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool } switch v.Kind() { default: - d.saveError(&UnmarshalTypeError{"string", v.Type()}) + d.saveError(&UnmarshalTypeError{"string", v.Type(), int64(d.off)}) case reflect.Slice: - if v.Type() != byteSliceType { - d.saveError(&UnmarshalTypeError{"string", v.Type()}) + if v.Type().Elem().Kind() != reflect.Uint8 { + d.saveError(&UnmarshalTypeError{"string", v.Type(), int64(d.off)}) break } b := make([]byte, base64.StdEncoding.DecodedLen(len(s))) @@ -755,7 +764,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool if v.NumMethod() == 0 { v.Set(reflect.ValueOf(string(s))) } else { - d.saveError(&UnmarshalTypeError{"string", v.Type()}) + d.saveError(&UnmarshalTypeError{"string", v.Type(), int64(d.off)}) } } @@ -777,7 +786,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool if fromQuoted { d.error(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) } else { - d.error(&UnmarshalTypeError{"number", v.Type()}) + d.error(&UnmarshalTypeError{"number", v.Type(), int64(d.off)}) } case reflect.Interface: n, err := d.convertNumber(s) @@ -786,7 +795,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool break } if v.NumMethod() != 0 { - d.saveError(&UnmarshalTypeError{"number", v.Type()}) + d.saveError(&UnmarshalTypeError{"number", v.Type(), int64(d.off)}) break } v.Set(reflect.ValueOf(n)) @@ -794,7 +803,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: n, err := strconv.ParseInt(s, 10, 64) if err != nil || v.OverflowInt(n) { - d.saveError(&UnmarshalTypeError{"number " + s, v.Type()}) + d.saveError(&UnmarshalTypeError{"number " + s, v.Type(), int64(d.off)}) break } v.SetInt(n) @@ -802,7 +811,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: n, err := strconv.ParseUint(s, 10, 64) if err != nil || v.OverflowUint(n) { - d.saveError(&UnmarshalTypeError{"number " + s, v.Type()}) + d.saveError(&UnmarshalTypeError{"number " + s, v.Type(), int64(d.off)}) break } v.SetUint(n) @@ -810,7 +819,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool case reflect.Float32, reflect.Float64: n, err := strconv.ParseFloat(s, v.Type().Bits()) if err != nil || v.OverflowFloat(n) { - d.saveError(&UnmarshalTypeError{"number " + s, v.Type()}) + d.saveError(&UnmarshalTypeError{"number " + s, v.Type(), int64(d.off)}) break } v.SetFloat(n) diff --git a/libgo/go/encoding/json/decode_test.go b/libgo/go/encoding/json/decode_test.go index 7235969..8aa158f 100644 --- a/libgo/go/encoding/json/decode_test.go +++ b/libgo/go/encoding/json/decode_test.go @@ -9,6 +9,7 @@ import ( "encoding" "fmt" "image" + "net" "reflect" "strings" "testing" @@ -216,6 +217,9 @@ type XYZ struct { Z interface{} } +func sliceAddr(x []int) *[]int { return &x } +func mapAddr(x map[string]int) *map[string]int { return &x } + var unmarshalTests = []unmarshalTest{ // basic types {in: `true`, ptr: new(bool), out: true}, @@ -231,7 +235,7 @@ var unmarshalTests = []unmarshalTest{ {in: `"g-clef: \uD834\uDD1E"`, ptr: new(string), out: "g-clef: \U0001D11E"}, {in: `"invalid: \uD834x\uDD1E"`, ptr: new(string), out: "invalid: \uFFFDx\uFFFD"}, {in: "null", ptr: new(interface{}), out: nil}, - {in: `{"X": [1,2,3], "Y": 4}`, ptr: new(T), out: T{Y: 4}, err: &UnmarshalTypeError{"array", reflect.TypeOf("")}}, + {in: `{"X": [1,2,3], "Y": 4}`, ptr: new(T), out: T{Y: 4}, err: &UnmarshalTypeError{"array", reflect.TypeOf(""), 7}}, {in: `{"x": 1}`, ptr: new(tx), out: tx{}}, {in: `{"F1":1,"F2":2,"F3":3}`, ptr: new(V), out: V{F1: float64(1), F2: int32(2), F3: Number("3")}}, {in: `{"F1":1,"F2":2,"F3":3}`, ptr: new(V), out: V{F1: Number("1"), F2: int32(2), F3: Number("3")}, useNumber: true}, @@ -302,6 +306,12 @@ var unmarshalTests = []unmarshalTest{ {in: `["X"]`, ptr: &umslicepT, out: &umsliceT}, {in: `{"M":"X"}`, ptr: &umstructT, out: umstructT}, + // Overwriting of data. + // This is different from package xml, but it's what we've always done. + // Now documented and tested. + {in: `[2]`, ptr: sliceAddr([]int{1}), out: []int{2}}, + {in: `{"key": 2}`, ptr: mapAddr(map[string]int{"old": 0, "key": 1}), out: map[string]int{"key": 2}}, + { in: `{ "Level0": 1, @@ -411,7 +421,7 @@ var unmarshalTests = []unmarshalTest{ { in: `{"2009-11-10T23:00:00Z": "hello world"}`, ptr: &map[time.Time]string{}, - err: &UnmarshalTypeError{"object", reflect.TypeOf(map[time.Time]string{})}, + err: &UnmarshalTypeError{"object", reflect.TypeOf(map[time.Time]string{}), 1}, }, } @@ -688,6 +698,7 @@ var wrongStringTests = []wrongStringTest{ {`{"result":"x"}`, `json: invalid use of ,string struct tag, trying to unmarshal "x" into string`}, {`{"result":"foo"}`, `json: invalid use of ,string struct tag, trying to unmarshal "foo" into string`}, {`{"result":"123"}`, `json: invalid use of ,string struct tag, trying to unmarshal "123" into string`}, + {`{"result":123}`, `json: invalid use of ,string struct tag, trying to unmarshal unquoted value into string`}, } // If people misuse the ,string modifier, the error message should be @@ -1085,7 +1096,7 @@ func TestNullString(t *testing.T) { *s.C = 2 err := Unmarshal(data, &s) if err != nil { - t.Fatalf("Unmarshal: %v") + t.Fatalf("Unmarshal: %v", err) } if s.B != 1 || s.C != nil { t.Fatalf("after Unmarshal, s.B=%d, s.C=%p, want 1, nil", s.B, s.C) @@ -1206,7 +1217,28 @@ func TestStringKind(t *testing.T) { if !reflect.DeepEqual(m1, m2) { t.Error("Items should be equal after encoding and then decoding") } +} + +// Custom types with []byte as underlying type could not be marshalled +// and then unmarshalled. +// Issue 8962. +func TestByteKind(t *testing.T) { + type byteKind []byte + a := byteKind("hello") + + data, err := Marshal(a) + if err != nil { + t.Error(err) + } + var b byteKind + err = Unmarshal(data, &b) + if err != nil { + t.Fatal(err) + } + if !reflect.DeepEqual(a, b) { + t.Errorf("expected %v == %v", a, b) + } } var decodeTypeErrorTests = []struct { @@ -1371,3 +1403,51 @@ func TestInvalidUnmarshal(t *testing.T) { } } } + +var invalidUnmarshalTextTests = []struct { + v interface{} + want string +}{ + {nil, "json: Unmarshal(nil)"}, + {struct{}{}, "json: Unmarshal(non-pointer struct {})"}, + {(*int)(nil), "json: Unmarshal(nil *int)"}, + {new(net.IP), "json: cannot unmarshal string into Go value of type *net.IP"}, +} + +func TestInvalidUnmarshalText(t *testing.T) { + buf := []byte(`123`) + for _, tt := range invalidUnmarshalTextTests { + err := Unmarshal(buf, tt.v) + if err == nil { + t.Errorf("Unmarshal expecting error, got nil") + continue + } + if got := err.Error(); got != tt.want { + t.Errorf("Unmarshal = %q; want %q", got, tt.want) + } + } +} + +// Test that string option is ignored for invalid types. +// Issue 9812. +func TestInvalidStringOption(t *testing.T) { + num := 0 + item := struct { + T time.Time `json:",string"` + M map[string]string `json:",string"` + S []string `json:",string"` + A [1]string `json:",string"` + I interface{} `json:",string"` + P *int `json:",string"` + }{M: make(map[string]string), S: make([]string, 0), I: num, P: &num} + + data, err := Marshal(item) + if err != nil { + t.Fatalf("Marshal: %v", err) + } + + err = Unmarshal(data, &item) + if err != nil { + t.Fatalf("Unmarshal: %v", err) + } +} diff --git a/libgo/go/encoding/json/encode.go b/libgo/go/encoding/json/encode.go index fca2a09..90782de 100644 --- a/libgo/go/encoding/json/encode.go +++ b/libgo/go/encoding/json/encode.go @@ -7,7 +7,7 @@ // in the documentation for the Marshal and Unmarshal functions. // // See "JSON and Go" for an introduction to this package: -// http://golang.org/doc/articles/json_and_go.html +// https://golang.org/doc/articles/json_and_go.html package json import ( @@ -79,8 +79,8 @@ import ( // // The "string" option signals that a field is stored as JSON inside a // JSON-encoded string. It applies only to fields of string, floating point, -// or integer types. This extra level of encoding is sometimes used when -// communicating with JavaScript programs: +// integer, or boolean types. This extra level of encoding is sometimes used +// when communicating with JavaScript programs: // // Int64String int64 `json:",string"` // @@ -113,8 +113,8 @@ import ( // a JSON tag of "-". // // Map values encode as JSON objects. -// The map's key type must be string; the object keys are used directly -// as map keys. +// The map's key type must be string; the map keys are used as JSON object +// keys, subject to the UTF-8 coercion described for string values above. // // Pointer values encode as the value pointed to. // A nil pointer encodes as the null JSON object. @@ -275,8 +275,6 @@ func (e *encodeState) error(err error) { panic(err) } -var byteSliceType = reflect.TypeOf([]byte(nil)) - func isEmptyValue(v reflect.Value) bool { switch v.Kind() { case reflect.Array, reflect.Map, reflect.Slice, reflect.String: @@ -1045,6 +1043,19 @@ func typeFields(t reflect.Type) []field { ft = ft.Elem() } + // Only strings, floats, integers, and booleans can be quoted. + quoted := false + if opts.Contains("string") { + switch ft.Kind() { + case reflect.Bool, + reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, + reflect.Float32, reflect.Float64, + reflect.String: + quoted = true + } + } + // Record found field and index sequence. if name != "" || !sf.Anonymous || ft.Kind() != reflect.Struct { tagged := name != "" @@ -1057,7 +1068,7 @@ func typeFields(t reflect.Type) []field { index: index, typ: ft, omitEmpty: opts.Contains("omitempty"), - quoted: opts.Contains("string"), + quoted: quoted, })) if count[f.typ] > 1 { // If there were multiple instances, add a second, diff --git a/libgo/go/encoding/json/fold.go b/libgo/go/encoding/json/fold.go index d6f77c9..9e17012 100644 --- a/libgo/go/encoding/json/fold.go +++ b/libgo/go/encoding/json/fold.go @@ -26,7 +26,7 @@ const ( // The letters S and K are special because they map to 3 runes, not just 2: // * S maps to s and to U+017F 'ſ' Latin small letter long s // * k maps to K and to U+212A 'K' Kelvin sign -// See http://play.golang.org/p/tTxjOc0OGo +// See https://play.golang.org/p/tTxjOc0OGo // // The returned function is specialized for matching against s and // should only be given s. It's not curried for performance reasons. diff --git a/libgo/go/encoding/json/scanner.go b/libgo/go/encoding/json/scanner.go index a4609c8..38d0b08 100644 --- a/libgo/go/encoding/json/scanner.go +++ b/libgo/go/encoding/json/scanner.go @@ -38,8 +38,15 @@ func nextValue(data []byte, scan *scanner) (value, rest []byte, err error) { scan.reset() for i, c := range data { v := scan.step(scan, int(c)) - if v >= scanEnd { + if v >= scanEndObject { switch v { + // probe the scanner with a space to determine whether we will + // get scanEnd on the next character. Otherwise, if the next character + // is not a space, scanEndTop allocates a needless error. + case scanEndObject, scanEndArray: + if scan.step(scan, ' ') == scanEnd { + return data[:i+1], data[i+1:], nil + } case scanError: return nil, nil, scan.err case scanEnd: diff --git a/libgo/go/encoding/json/scanner_test.go b/libgo/go/encoding/json/scanner_test.go index 7880342..66383ef 100644 --- a/libgo/go/encoding/json/scanner_test.go +++ b/libgo/go/encoding/json/scanner_test.go @@ -209,6 +209,7 @@ var benchScan scanner func BenchmarkSkipValue(b *testing.B) { initBig() + b.ResetTimer() for i := 0; i < b.N; i++ { nextValue(jsonBig, &benchScan) } diff --git a/libgo/go/encoding/json/stream.go b/libgo/go/encoding/json/stream.go index 9566eca..dc53bce 100644 --- a/libgo/go/encoding/json/stream.go +++ b/libgo/go/encoding/json/stream.go @@ -12,11 +12,15 @@ import ( // A Decoder reads and decodes JSON objects from an input stream. type Decoder struct { - r io.Reader - buf []byte - d decodeState - scan scanner - err error + r io.Reader + buf []byte + d decodeState + scanp int // start of unread data in buf + scan scanner + err error + + tokenState int + tokenStack []int } // NewDecoder returns a new decoder that reads from r. @@ -41,20 +45,29 @@ func (dec *Decoder) Decode(v interface{}) error { return dec.err } + if err := dec.tokenPrepareForDecode(); err != nil { + return err + } + + if !dec.tokenValueAllowed() { + return &SyntaxError{msg: "not at beginning of value"} + } + + // Read whole value into buffer. n, err := dec.readValue() if err != nil { return err } + dec.d.init(dec.buf[dec.scanp : dec.scanp+n]) + dec.scanp += n // Don't save err from unmarshal into dec.err: // the connection is still usable since we read a complete JSON // object from it before the error happened. - dec.d.init(dec.buf[0:n]) err = dec.d.unmarshal(v) - // Slide rest of data down. - rest := copy(dec.buf, dec.buf[n:]) - dec.buf = dec.buf[0:rest] + // fixup token streaming state + dec.tokenValueEnd() return err } @@ -62,7 +75,7 @@ func (dec *Decoder) Decode(v interface{}) error { // Buffered returns a reader of the data remaining in the Decoder's // buffer. The reader is valid until the next call to Decode. func (dec *Decoder) Buffered() io.Reader { - return bytes.NewReader(dec.buf) + return bytes.NewReader(dec.buf[dec.scanp:]) } // readValue reads a JSON value into dec.buf. @@ -70,7 +83,7 @@ func (dec *Decoder) Buffered() io.Reader { func (dec *Decoder) readValue() (int, error) { dec.scan.reset() - scanp := 0 + scanp := dec.scanp var err error Input: for { @@ -111,20 +124,35 @@ Input: return 0, err } - // Make room to read more into the buffer. - const minRead = 512 - if cap(dec.buf)-len(dec.buf) < minRead { - newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead) - copy(newBuf, dec.buf) - dec.buf = newBuf - } + n := scanp - dec.scanp + err = dec.refill() + scanp = dec.scanp + n + } + return scanp - dec.scanp, nil +} - // Read. Delay error for next iteration (after scan). - var n int - n, err = dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)]) - dec.buf = dec.buf[0 : len(dec.buf)+n] +func (dec *Decoder) refill() error { + // Make room to read more into the buffer. + // First slide down data already consumed. + if dec.scanp > 0 { + n := copy(dec.buf, dec.buf[dec.scanp:]) + dec.buf = dec.buf[:n] + dec.scanp = 0 } - return scanp, nil + + // Grow buffer if not large enough. + const minRead = 512 + if cap(dec.buf)-len(dec.buf) < minRead { + newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead) + copy(newBuf, dec.buf) + dec.buf = newBuf + } + + // Read. Delay error for next iteration (after scan). + n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)]) + dec.buf = dec.buf[0 : len(dec.buf)+n] + + return err } func nonSpace(b []byte) bool { @@ -198,3 +226,255 @@ func (m *RawMessage) UnmarshalJSON(data []byte) error { var _ Marshaler = (*RawMessage)(nil) var _ Unmarshaler = (*RawMessage)(nil) + +// A Token holds a value of one of these types: +// +// Delim, for the four JSON delimiters [ ] { } +// bool, for JSON booleans +// float64, for JSON numbers +// Number, for JSON numbers +// string, for JSON string literals +// nil, for JSON null +// +type Token interface{} + +const ( + tokenTopValue = iota + tokenArrayStart + tokenArrayValue + tokenArrayComma + tokenObjectStart + tokenObjectKey + tokenObjectColon + tokenObjectValue + tokenObjectComma +) + +// advance tokenstate from a separator state to a value state +func (dec *Decoder) tokenPrepareForDecode() error { + // Note: Not calling peek before switch, to avoid + // putting peek into the standard Decode path. + // peek is only called when using the Token API. + switch dec.tokenState { + case tokenArrayComma: + c, err := dec.peek() + if err != nil { + return err + } + if c != ',' { + return &SyntaxError{"expected comma after array element", 0} + } + dec.scanp++ + dec.tokenState = tokenArrayValue + case tokenObjectColon: + c, err := dec.peek() + if err != nil { + return err + } + if c != ':' { + return &SyntaxError{"expected colon after object key", 0} + } + dec.scanp++ + dec.tokenState = tokenObjectValue + } + return nil +} + +func (dec *Decoder) tokenValueAllowed() bool { + switch dec.tokenState { + case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue: + return true + } + return false +} + +func (dec *Decoder) tokenValueEnd() { + switch dec.tokenState { + case tokenArrayStart, tokenArrayValue: + dec.tokenState = tokenArrayComma + case tokenObjectValue: + dec.tokenState = tokenObjectComma + } +} + +// A Delim is a JSON array or object delimiter, one of [ ] { or }. +type Delim rune + +func (d Delim) String() string { + return string(d) +} + +// Token returns the next JSON token in the input stream. +// At the end of the input stream, Token returns nil, io.EOF. +// +// Token guarantees that the delimiters [ ] { } it returns are +// properly nested and matched: if Token encounters an unexpected +// delimiter in the input, it will return an error. +// +// The input stream consists of basic JSON values—bool, string, +// number, and null—along with delimiters [ ] { } of type Delim +// to mark the start and end of arrays and objects. +// Commas and colons are elided. +func (dec *Decoder) Token() (Token, error) { + for { + c, err := dec.peek() + if err != nil { + return nil, err + } + switch c { + case '[': + if !dec.tokenValueAllowed() { + return dec.tokenError(c) + } + dec.scanp++ + dec.tokenStack = append(dec.tokenStack, dec.tokenState) + dec.tokenState = tokenArrayStart + return Delim('['), nil + + case ']': + if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma { + return dec.tokenError(c) + } + dec.scanp++ + dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1] + dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1] + dec.tokenValueEnd() + return Delim(']'), nil + + case '{': + if !dec.tokenValueAllowed() { + return dec.tokenError(c) + } + dec.scanp++ + dec.tokenStack = append(dec.tokenStack, dec.tokenState) + dec.tokenState = tokenObjectStart + return Delim('{'), nil + + case '}': + if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma { + return dec.tokenError(c) + } + dec.scanp++ + dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1] + dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1] + dec.tokenValueEnd() + return Delim('}'), nil + + case ':': + if dec.tokenState != tokenObjectColon { + return dec.tokenError(c) + } + dec.scanp++ + dec.tokenState = tokenObjectValue + continue + + case ',': + if dec.tokenState == tokenArrayComma { + dec.scanp++ + dec.tokenState = tokenArrayValue + continue + } + if dec.tokenState == tokenObjectComma { + dec.scanp++ + dec.tokenState = tokenObjectKey + continue + } + return dec.tokenError(c) + + case '"': + if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey { + var x string + old := dec.tokenState + dec.tokenState = tokenTopValue + err := dec.Decode(&x) + dec.tokenState = old + if err != nil { + clearOffset(err) + return nil, err + } + dec.tokenState = tokenObjectColon + return x, nil + } + fallthrough + + default: + if !dec.tokenValueAllowed() { + return dec.tokenError(c) + } + var x interface{} + if err := dec.Decode(&x); err != nil { + clearOffset(err) + return nil, err + } + return x, nil + } + } +} + +func clearOffset(err error) { + if s, ok := err.(*SyntaxError); ok { + s.Offset = 0 + } +} + +func (dec *Decoder) tokenError(c byte) (Token, error) { + var context string + switch dec.tokenState { + case tokenTopValue: + context = " looking for beginning of value" + case tokenArrayStart, tokenArrayValue, tokenObjectValue: + context = " looking for beginning of value" + case tokenArrayComma: + context = " after array element" + case tokenObjectKey: + context = " looking for beginning of object key string" + case tokenObjectColon: + context = " after object key" + case tokenObjectComma: + context = " after object key:value pair" + } + return nil, &SyntaxError{"invalid character " + quoteChar(int(c)) + " " + context, 0} +} + +// More reports whether there is another element in the +// current array or object being parsed. +func (dec *Decoder) More() bool { + c, err := dec.peek() + return err == nil && c != ']' && c != '}' +} + +func (dec *Decoder) peek() (byte, error) { + var err error + for { + for i := dec.scanp; i < len(dec.buf); i++ { + c := dec.buf[i] + if isSpace(rune(c)) { + continue + } + dec.scanp = i + return c, nil + } + // buffer has been scanned, now report any error + if err != nil { + return 0, err + } + err = dec.refill() + } +} + +/* +TODO + +// EncodeToken writes the given JSON token to the stream. +// It returns an error if the delimiters [ ] { } are not properly used. +// +// EncodeToken does not call Flush, because usually it is part of +// a larger operation such as Encode, and those will call Flush when finished. +// Callers that create an Encoder and then invoke EncodeToken directly, +// without using Encode, need to call Flush when finished to ensure that +// the JSON is written to the underlying writer. +func (e *Encoder) EncodeToken(t Token) error { + ... +} + +*/ diff --git a/libgo/go/encoding/json/stream_test.go b/libgo/go/encoding/json/stream_test.go index b562e87..c2e3040 100644 --- a/libgo/go/encoding/json/stream_test.go +++ b/libgo/go/encoding/json/stream_test.go @@ -6,8 +6,12 @@ package json import ( "bytes" + "io" "io/ioutil" + "log" "net" + "net/http" + "net/http/httptest" "reflect" "strings" "testing" @@ -204,3 +208,147 @@ func BenchmarkEncoderEncode(b *testing.B) { } } } + +type tokenStreamCase struct { + json string + expTokens []interface{} +} + +type decodeThis struct { + v interface{} +} + +var tokenStreamCases []tokenStreamCase = []tokenStreamCase{ + // streaming token cases + {json: `10`, expTokens: []interface{}{float64(10)}}, + {json: ` [10] `, expTokens: []interface{}{ + Delim('['), float64(10), Delim(']')}}, + {json: ` [false,10,"b"] `, expTokens: []interface{}{ + Delim('['), false, float64(10), "b", Delim(']')}}, + {json: `{ "a": 1 }`, expTokens: []interface{}{ + Delim('{'), "a", float64(1), Delim('}')}}, + {json: `{"a": 1, "b":"3"}`, expTokens: []interface{}{ + Delim('{'), "a", float64(1), "b", "3", Delim('}')}}, + {json: ` [{"a": 1},{"a": 2}] `, expTokens: []interface{}{ + Delim('['), + Delim('{'), "a", float64(1), Delim('}'), + Delim('{'), "a", float64(2), Delim('}'), + Delim(']')}}, + {json: `{"obj": {"a": 1}}`, expTokens: []interface{}{ + Delim('{'), "obj", Delim('{'), "a", float64(1), Delim('}'), + Delim('}')}}, + {json: `{"obj": [{"a": 1}]}`, expTokens: []interface{}{ + Delim('{'), "obj", Delim('['), + Delim('{'), "a", float64(1), Delim('}'), + Delim(']'), Delim('}')}}, + + // streaming tokens with intermittent Decode() + {json: `{ "a": 1 }`, expTokens: []interface{}{ + Delim('{'), "a", + decodeThis{float64(1)}, + Delim('}')}}, + {json: ` [ { "a" : 1 } ] `, expTokens: []interface{}{ + Delim('['), + decodeThis{map[string]interface{}{"a": float64(1)}}, + Delim(']')}}, + {json: ` [{"a": 1},{"a": 2}] `, expTokens: []interface{}{ + Delim('['), + decodeThis{map[string]interface{}{"a": float64(1)}}, + decodeThis{map[string]interface{}{"a": float64(2)}}, + Delim(']')}}, + {json: `{ "obj" : [ { "a" : 1 } ] }`, expTokens: []interface{}{ + Delim('{'), "obj", Delim('['), + decodeThis{map[string]interface{}{"a": float64(1)}}, + Delim(']'), Delim('}')}}, + + {json: `{"obj": {"a": 1}}`, expTokens: []interface{}{ + Delim('{'), "obj", + decodeThis{map[string]interface{}{"a": float64(1)}}, + Delim('}')}}, + {json: `{"obj": [{"a": 1}]}`, expTokens: []interface{}{ + Delim('{'), "obj", + decodeThis{[]interface{}{ + map[string]interface{}{"a": float64(1)}, + }}, + Delim('}')}}, + {json: ` [{"a": 1} {"a": 2}] `, expTokens: []interface{}{ + Delim('['), + decodeThis{map[string]interface{}{"a": float64(1)}}, + decodeThis{&SyntaxError{"expected comma after array element", 0}}, + }}, + {json: `{ "a" 1 }`, expTokens: []interface{}{ + Delim('{'), "a", + decodeThis{&SyntaxError{"expected colon after object key", 0}}, + }}, +} + +func TestDecodeInStream(t *testing.T) { + + for ci, tcase := range tokenStreamCases { + + dec := NewDecoder(strings.NewReader(tcase.json)) + for i, etk := range tcase.expTokens { + + var tk interface{} + var err error + + if dt, ok := etk.(decodeThis); ok { + etk = dt.v + err = dec.Decode(&tk) + } else { + tk, err = dec.Token() + } + if experr, ok := etk.(error); ok { + if err == nil || err.Error() != experr.Error() { + t.Errorf("case %v: Expected error %v in %q, but was %v", ci, experr, tcase.json, err) + } + break + } else if err == io.EOF { + t.Errorf("case %v: Unexpected EOF in %q", ci, tcase.json) + break + } else if err != nil { + t.Errorf("case %v: Unexpected error '%v' in %q", ci, err, tcase.json) + break + } + if !reflect.DeepEqual(tk, etk) { + t.Errorf(`case %v: %q @ %v expected %T(%v) was %T(%v)`, ci, tcase.json, i, etk, etk, tk, tk) + break + } + } + } + +} + +// Test from golang.org/issue/11893 +func TestHTTPDecoding(t *testing.T) { + const raw = `{ "foo": "bar" }` + + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Write([]byte(raw)) + })) + defer ts.Close() + res, err := http.Get(ts.URL) + if err != nil { + log.Fatalf("GET failed: %v", err) + } + defer res.Body.Close() + + foo := struct { + Foo string + }{} + + d := NewDecoder(res.Body) + err = d.Decode(&foo) + if err != nil { + t.Fatalf("Decode: %v", err) + } + if foo.Foo != "bar" { + t.Errorf("decoded %q; want \"bar\"", foo.Foo) + } + + // make sure we get the EOF the second time + err = d.Decode(&foo) + if err != io.EOF { + t.Errorf("err = %v; want io.EOF", err) + } +} diff --git a/libgo/go/encoding/json/tagkey_test.go b/libgo/go/encoding/json/tagkey_test.go index 23e71c7..85bb4ba 100644 --- a/libgo/go/encoding/json/tagkey_test.go +++ b/libgo/go/encoding/json/tagkey_test.go @@ -37,11 +37,11 @@ type miscPlaneTag struct { } type percentSlashTag struct { - V string `json:"text/html%"` // http://golang.org/issue/2718 + V string `json:"text/html%"` // https://golang.org/issue/2718 } type punctuationTag struct { - V string `json:"!#$%&()*+-./:<=>?@[]^_{|}~"` // http://golang.org/issue/3546 + V string `json:"!#$%&()*+-./:<=>?@[]^_{|}~"` // https://golang.org/issue/3546 } type emptyTag struct { diff --git a/libgo/go/encoding/pem/pem.go b/libgo/go/encoding/pem/pem.go index 8ff7ee8..506196b 100644 --- a/libgo/go/encoding/pem/pem.go +++ b/libgo/go/encoding/pem/pem.go @@ -10,8 +10,10 @@ package pem import ( "bytes" "encoding/base64" + "errors" "io" "sort" + "strings" ) // A Block represents a PEM encoded structure. @@ -110,27 +112,37 @@ func Decode(data []byte) (p *Block, rest []byte) { } // TODO(agl): need to cope with values that spread across lines. - key, val := line[0:i], line[i+1:] + key, val := line[:i], line[i+1:] key = bytes.TrimSpace(key) val = bytes.TrimSpace(val) p.Headers[string(key)] = string(val) rest = next } - i := bytes.Index(rest, pemEnd) - if i < 0 { + var endIndex int + // If there were no headers, the END line might occur + // immediately, without a leading newline. + if len(p.Headers) == 0 && bytes.HasPrefix(rest, pemEnd[1:]) { + endIndex = 0 + } else { + endIndex = bytes.Index(rest, pemEnd) + } + + if endIndex < 0 { return decodeError(data, rest) } - base64Data := removeWhitespace(rest[0:i]) + base64Data := removeWhitespace(rest[:endIndex]) p.Bytes = make([]byte, base64.StdEncoding.DecodedLen(len(base64Data))) n, err := base64.StdEncoding.Decode(p.Bytes, base64Data) if err != nil { return decodeError(data, rest) } - p.Bytes = p.Bytes[0:n] + p.Bytes = p.Bytes[:n] - _, rest = getLine(rest[i+len(pemEnd):]) + // the -1 is because we might have only matched pemEnd without the + // leading newline if the PEM block was empty. + _, rest = getLine(rest[endIndex+len(pemEnd)-1:]) return } @@ -171,6 +183,8 @@ type lineBreaker struct { out io.Writer } +var nl = []byte{'\n'} + func (l *lineBreaker) Write(b []byte) (n int, err error) { if l.used+len(b) < pemLineLength { copy(l.line[l.used:], b) @@ -190,7 +204,7 @@ func (l *lineBreaker) Write(b []byte) (n int, err error) { return } - n, err = l.out.Write([]byte{'\n'}) + n, err = l.out.Write(nl) if err != nil { return } @@ -204,7 +218,7 @@ func (l *lineBreaker) Close() (err error) { if err != nil { return } - _, err = l.out.Write([]byte{'\n'}) + _, err = l.out.Write(nl) } return @@ -244,11 +258,14 @@ func Encode(out io.Writer, b *Block) error { // For consistency of output, write other headers sorted by key. sort.Strings(h) for _, k := range h { + if strings.Contains(k, ":") { + return errors.New("pem: cannot encode a header key that contains a colon") + } if err := writeHeader(out, k, b.Headers[k]); err != nil { return err } } - if _, err := out.Write([]byte{'\n'}); err != nil { + if _, err := out.Write(nl); err != nil { return err } } diff --git a/libgo/go/encoding/pem/pem_test.go b/libgo/go/encoding/pem/pem_test.go index ccce42c..ab656c6 100644 --- a/libgo/go/encoding/pem/pem_test.go +++ b/libgo/go/encoding/pem/pem_test.go @@ -6,8 +6,11 @@ package pem import ( "bytes" + "io/ioutil" "reflect" + "strings" "testing" + "testing/quick" ) type GetLineTest struct { @@ -43,6 +46,32 @@ func TestDecode(t *testing.T) { if !reflect.DeepEqual(result, privateKey) { t.Errorf("#1 got:%#v want:%#v", result, privateKey) } + + isEmpty := func(block *Block) bool { + return block != nil && block.Type == "EMPTY" && len(block.Headers) == 0 && len(block.Bytes) == 0 + } + result, remainder = Decode(remainder) + if !isEmpty(result) { + t.Errorf("#2 should be empty but got:%#v", result) + } + result, remainder = Decode(remainder) + if !isEmpty(result) { + t.Errorf("#3 should be empty but got:%#v", result) + } + result, remainder = Decode(remainder) + if !isEmpty(result) { + t.Errorf("#4 should be empty but got:%#v", result) + } + + result, remainder = Decode(remainder) + if result == nil || result.Type != "HEADERS" || len(result.Headers) != 1 { + t.Errorf("#5 expected single header block but got :%v", result) + } + + if len(remainder) != 0 { + t.Errorf("expected nothing remaining of pemData, but found %s", string(remainder)) + } + result, _ = Decode([]byte(pemPrivateKey2)) if !reflect.DeepEqual(result, privateKey2) { t.Errorf("#2 got:%#v want:%#v", result, privateKey2) @@ -116,6 +145,62 @@ func TestLineBreaker(t *testing.T) { } } +func TestFuzz(t *testing.T) { + testRoundtrip := func(block Block) bool { + for key := range block.Headers { + if strings.Contains(key, ":") { + // Keys with colons cannot be encoded. + return true + } + } + + var buf bytes.Buffer + err := Encode(&buf, &block) + decoded, rest := Decode(buf.Bytes()) + + switch { + case err != nil: + t.Errorf("Encode of %#v resulted in error: %s", &block, err) + case !reflect.DeepEqual(&block, decoded): + t.Errorf("Encode of %#v decoded as %#v", &block, decoded) + case len(rest) != 0: + t.Errorf("Encode of %#v decoded correctly, but with %x left over", block, rest) + default: + return true + } + return false + } + + // Explicitly test the empty block. + if !testRoundtrip(Block{ + Type: "EMPTY", + Headers: make(map[string]string), + Bytes: []byte{}, + }) { + return + } + + quick.Check(testRoundtrip, nil) +} + +func BenchmarkEncode(b *testing.B) { + data := &Block{Bytes: make([]byte, 65536)} + b.SetBytes(int64(len(data.Bytes))) + for i := 0; i < b.N; i++ { + Encode(ioutil.Discard, data) + } +} + +func BenchmarkDecode(b *testing.B) { + block := &Block{Bytes: make([]byte, 65536)} + data := EncodeToMemory(block) + b.SetBytes(int64(len(data))) + b.ResetTimer() + for i := 0; i < b.N; i++ { + Decode(data) + } +} + var pemData = `verify return:0 -----BEGIN CERTIFICATE----- sdlfkjskldfj @@ -169,7 +254,32 @@ BTiHcL3s3KrJu1vDVrshvxfnz71KTeNnZH8UbOqT5i7fPGyXtY1XJddcbI/Q6tXf wHFsZc20TzSdsVLBtwksUacpbDogcEVMctnNrB8FIrB3vZEv9Q0Z1VeY7nmTpF+6 a+z2P7acL7j6A6Pr3+q8P9CPiPC7zFonVzuVPyB8GchGR2hytyiOVpuD9+k8hcuw ZWAaUoVtWIQ52aKS0p19G99hhb+IVANC4akkdHV4SP8i7MVNZhfUmg== ------END RSA PRIVATE KEY-----` +-----END RSA PRIVATE KEY----- + + +-----BEGIN EMPTY----- +-----END EMPTY----- + +-----BEGIN EMPTY----- + +-----END EMPTY----- + +-----BEGIN EMPTY----- + + +-----END EMPTY----- + +# This shouldn't be recognised because of the missing newline after the +headers. +-----BEGIN HEADERS----- +Header: 1 +-----END HEADERS----- + +# This should be valid, however. +-----BEGIN HEADERS----- +Header: 1 + +-----END HEADERS-----` var certificate = &Block{Type: "CERTIFICATE", Headers: map[string]string{}, diff --git a/libgo/go/encoding/xml/marshal.go b/libgo/go/encoding/xml/marshal.go index 8c63420..86d1422 100644 --- a/libgo/go/encoding/xml/marshal.go +++ b/libgo/go/encoding/xml/marshal.go @@ -173,6 +173,7 @@ func (enc *Encoder) EncodeElement(v interface{}, start StartElement) error { } var ( + begComment = []byte("<!--") endComment = []byte("-->") endProcInst = []byte("?>") endDirective = []byte(">") @@ -191,6 +192,7 @@ var ( // EncodeToken allows writing a ProcInst with Target set to "xml" only as the first token // in the stream. func (enc *Encoder) EncodeToken(t Token) error { + p := &enc.p switch t := t.(type) { case StartElement: @@ -202,7 +204,7 @@ func (enc *Encoder) EncodeToken(t Token) error { return err } case CharData: - EscapeText(p, t) + escapeText(p, t, false) case Comment: if bytes.Contains(t, endComment) { return fmt.Errorf("xml: EncodeToken of Comment containing --> marker") @@ -231,16 +233,59 @@ func (enc *Encoder) EncodeToken(t Token) error { } p.WriteString("?>") case Directive: - if bytes.Contains(t, endDirective) { - return fmt.Errorf("xml: EncodeToken of Directive containing > marker") + if !isValidDirective(t) { + return fmt.Errorf("xml: EncodeToken of Directive containing wrong < or > markers") } p.WriteString("<!") p.Write(t) p.WriteString(">") + default: + return fmt.Errorf("xml: EncodeToken of invalid token type") + } return p.cachedWriteError() } +// isValidDirective reports whether dir is a valid directive text, +// meaning angle brackets are matched, ignoring comments and strings. +func isValidDirective(dir Directive) bool { + var ( + depth int + inquote uint8 + incomment bool + ) + for i, c := range dir { + switch { + case incomment: + if c == '>' { + if n := 1 + i - len(endComment); n >= 0 && bytes.Equal(dir[n:i+1], endComment) { + incomment = false + } + } + // Just ignore anything in comment + case inquote != 0: + if c == inquote { + inquote = 0 + } + // Just ignore anything within quotes + case c == '\'' || c == '"': + inquote = c + case c == '<': + if i+len(begComment) < len(dir) && bytes.Equal(dir[i:i+len(begComment)], begComment) { + incomment = true + } else { + depth++ + } + case c == '>': + if depth == 0 { + return false + } + depth-- + } + } + return depth == 0 && inquote == 0 && !incomment +} + // Flush flushes any buffered XML to the underlying writer. // See the EncodeToken documentation for details about when it is necessary. func (enc *Encoder) Flush() error { @@ -724,6 +769,9 @@ func (p *printer) marshalStruct(tinfo *typeInfo, val reflect.Value) error { switch finfo.flags & fMode { case fCharData: + if err := s.trim(finfo.parents); err != nil { + return err + } if vf.CanInterface() && vf.Type().Implements(textMarshalerType) { data, err := vf.Interface().(encoding.TextMarshaler).MarshalText() if err != nil { @@ -767,6 +815,9 @@ func (p *printer) marshalStruct(tinfo *typeInfo, val reflect.Value) error { continue case fComment: + if err := s.trim(finfo.parents); err != nil { + return err + } k := vf.Kind() if !(k == reflect.String || k == reflect.Slice && vf.Type().Elem().Kind() == reflect.Uint8) { return fmt.Errorf("xml: bad type for comment field of %s", val.Type()) @@ -894,7 +945,7 @@ func (s *parentStack) trim(parents []string) error { return err } } - s.stack = parents[:split] + s.stack = s.stack[:split] return nil } diff --git a/libgo/go/encoding/xml/marshal_test.go b/libgo/go/encoding/xml/marshal_test.go index 14f73a7..66675d7 100644 --- a/libgo/go/encoding/xml/marshal_test.go +++ b/libgo/go/encoding/xml/marshal_test.go @@ -12,6 +12,7 @@ import ( "reflect" "strconv" "strings" + "sync" "testing" "time" ) @@ -339,6 +340,16 @@ type OuterOuterStruct struct { OuterStruct } +type NestedAndChardata struct { + AB []string `xml:"A>B"` + Chardata string `xml:",chardata"` +} + +type NestedAndComment struct { + AB []string `xml:"A>B"` + Comment string `xml:",comment"` +} + func ifaceptr(x interface{}) interface{} { return &x } @@ -617,6 +628,69 @@ var marshalTests = []struct { `</service>`, MarshalOnly: true, }, + { + Value: &struct { + XMLName struct{} `xml:"space top"` + A string `xml:"x>a"` + B string `xml:"x>b"` + C string `xml:"space x>c"` + C1 string `xml:"space1 x>c"` + D1 string `xml:"space1 x>d"` + }{ + A: "a", + B: "b", + C: "c", + C1: "c1", + D1: "d1", + }, + ExpectXML: `<top xmlns="space">` + + `<x><a>a</a><b>b</b><c xmlns="space">c</c>` + + `<c xmlns="space1">c1</c>` + + `<d xmlns="space1">d1</d>` + + `</x>` + + `</top>`, + }, + { + Value: &struct { + XMLName Name + A string `xml:"x>a"` + B string `xml:"x>b"` + C string `xml:"space x>c"` + C1 string `xml:"space1 x>c"` + D1 string `xml:"space1 x>d"` + }{ + XMLName: Name{ + Space: "space0", + Local: "top", + }, + A: "a", + B: "b", + C: "c", + C1: "c1", + D1: "d1", + }, + ExpectXML: `<top xmlns="space0">` + + `<x><a>a</a><b>b</b>` + + `<c xmlns="space">c</c>` + + `<c xmlns="space1">c1</c>` + + `<d xmlns="space1">d1</d>` + + `</x>` + + `</top>`, + }, + { + Value: &struct { + XMLName struct{} `xml:"top"` + B string `xml:"space x>b"` + B1 string `xml:"space1 x>b"` + }{ + B: "b", + B1: "b1", + }, + ExpectXML: `<top>` + + `<x><b xmlns="space">b</b>` + + `<b xmlns="space1">b1</b></x>` + + `</top>`, + }, // Test struct embedding { @@ -924,6 +998,14 @@ var marshalTests = []struct { ExpectXML: `<outer xmlns="testns" int="10"></outer>`, Value: &OuterOuterStruct{OuterStruct{IntAttr: 10}}, }, + { + ExpectXML: `<NestedAndChardata><A><B></B><B></B></A>test</NestedAndChardata>`, + Value: &NestedAndChardata{AB: make([]string, 2), Chardata: "test"}, + }, + { + ExpectXML: `<NestedAndComment><A><B></B><B></B></A><!--test--></NestedAndComment>`, + Value: &NestedAndComment{AB: make([]string, 2), Comment: "test"}, + }, } func TestMarshal(t *testing.T) { @@ -933,7 +1015,7 @@ func TestMarshal(t *testing.T) { } data, err := Marshal(test.Value) if err != nil { - t.Errorf("#%d: Error: %s", idx, err) + t.Errorf("#%d: marshal(%#v): %s", idx, test.Value, err) continue } if got, want := string(data), test.ExpectXML; got != want { @@ -1037,6 +1119,14 @@ func TestUnmarshal(t *testing.T) { if _, ok := test.Value.(*Plain); ok { continue } + if test.ExpectXML == `<top>`+ + `<x><b xmlns="space">b</b>`+ + `<b xmlns="space1">b1</b></x>`+ + `</top>` { + // TODO(rogpeppe): re-enable this test in + // https://go-review.googlesource.com/#/c/5910/ + continue + } vt := reflect.TypeOf(test.Value) dest := reflect.New(vt.Elem()).Interface() @@ -1148,12 +1238,14 @@ func TestMarshalFlush(t *testing.T) { } func BenchmarkMarshal(b *testing.B) { + b.ReportAllocs() for i := 0; i < b.N; i++ { Marshal(atomValue) } } func BenchmarkUnmarshal(b *testing.B) { + b.ReportAllocs() xml := []byte(atomXml) for i := 0; i < b.N; i++ { Unmarshal(xml, &Feed{}) @@ -1192,41 +1284,369 @@ func TestStructPointerMarshal(t *testing.T) { } var encodeTokenTests = []struct { - tok Token + desc string + toks []Token want string - ok bool -}{ - {StartElement{Name{"space", "local"}, nil}, "<local xmlns=\"space\">", true}, - {StartElement{Name{"space", ""}, nil}, "", false}, - {EndElement{Name{"space", ""}}, "", false}, - {CharData("foo"), "foo", true}, - {Comment("foo"), "<!--foo-->", true}, - {Comment("foo-->"), "", false}, - {ProcInst{"Target", []byte("Instruction")}, "<?Target Instruction?>", true}, - {ProcInst{"", []byte("Instruction")}, "", false}, - {ProcInst{"Target", []byte("Instruction?>")}, "", false}, - {Directive("foo"), "<!foo>", true}, - {Directive("foo>"), "", false}, -} + err string +}{{ + desc: "start element with name space", + toks: []Token{ + StartElement{Name{"space", "local"}, nil}, + }, + want: `<local xmlns="space">`, +}, { + desc: "start element with no name", + toks: []Token{ + StartElement{Name{"space", ""}, nil}, + }, + err: "xml: start tag with no name", +}, { + desc: "end element with no name", + toks: []Token{ + EndElement{Name{"space", ""}}, + }, + err: "xml: end tag with no name", +}, { + desc: "char data", + toks: []Token{ + CharData("foo"), + }, + want: `foo`, +}, { + desc: "char data with escaped chars", + toks: []Token{ + CharData(" \t\n"), + }, + want: " 	\n", +}, { + desc: "comment", + toks: []Token{ + Comment("foo"), + }, + want: `<!--foo-->`, +}, { + desc: "comment with invalid content", + toks: []Token{ + Comment("foo-->"), + }, + err: "xml: EncodeToken of Comment containing --> marker", +}, { + desc: "proc instruction", + toks: []Token{ + ProcInst{"Target", []byte("Instruction")}, + }, + want: `<?Target Instruction?>`, +}, { + desc: "proc instruction with empty target", + toks: []Token{ + ProcInst{"", []byte("Instruction")}, + }, + err: "xml: EncodeToken of ProcInst with invalid Target", +}, { + desc: "proc instruction with bad content", + toks: []Token{ + ProcInst{"", []byte("Instruction?>")}, + }, + err: "xml: EncodeToken of ProcInst with invalid Target", +}, { + desc: "directive", + toks: []Token{ + Directive("foo"), + }, + want: `<!foo>`, +}, { + desc: "more complex directive", + toks: []Token{ + Directive("DOCTYPE doc [ <!ELEMENT doc '>'> <!-- com>ment --> ]"), + }, + want: `<!DOCTYPE doc [ <!ELEMENT doc '>'> <!-- com>ment --> ]>`, +}, { + desc: "directive instruction with bad name", + toks: []Token{ + Directive("foo>"), + }, + err: "xml: EncodeToken of Directive containing wrong < or > markers", +}, { + desc: "end tag without start tag", + toks: []Token{ + EndElement{Name{"foo", "bar"}}, + }, + err: "xml: end tag </bar> without start tag", +}, { + desc: "mismatching end tag local name", + toks: []Token{ + StartElement{Name{"", "foo"}, nil}, + EndElement{Name{"", "bar"}}, + }, + err: "xml: end tag </bar> does not match start tag <foo>", + want: `<foo>`, +}, { + desc: "mismatching end tag namespace", + toks: []Token{ + StartElement{Name{"space", "foo"}, nil}, + EndElement{Name{"another", "foo"}}, + }, + err: "xml: end tag </foo> in namespace another does not match start tag <foo> in namespace space", + want: `<foo xmlns="space">`, +}, { + desc: "start element with explicit namespace", + toks: []Token{ + StartElement{Name{"space", "local"}, []Attr{ + {Name{"xmlns", "x"}, "space"}, + {Name{"space", "foo"}, "value"}, + }}, + }, + want: `<local xmlns="space" xmlns:_xmlns="xmlns" _xmlns:x="space" xmlns:space="space" space:foo="value">`, +}, { + desc: "start element with explicit namespace and colliding prefix", + toks: []Token{ + StartElement{Name{"space", "local"}, []Attr{ + {Name{"xmlns", "x"}, "space"}, + {Name{"space", "foo"}, "value"}, + {Name{"x", "bar"}, "other"}, + }}, + }, + want: `<local xmlns="space" xmlns:_xmlns="xmlns" _xmlns:x="space" xmlns:space="space" space:foo="value" xmlns:x="x" x:bar="other">`, +}, { + desc: "start element using previously defined namespace", + toks: []Token{ + StartElement{Name{"", "local"}, []Attr{ + {Name{"xmlns", "x"}, "space"}, + }}, + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"space", "x"}, "y"}, + }}, + }, + want: `<local xmlns:_xmlns="xmlns" _xmlns:x="space"><foo xmlns="space" xmlns:space="space" space:x="y">`, +}, { + desc: "nested name space with same prefix", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"xmlns", "x"}, "space1"}, + }}, + StartElement{Name{"", "foo"}, []Attr{ + {Name{"xmlns", "x"}, "space2"}, + }}, + StartElement{Name{"", "foo"}, []Attr{ + {Name{"space1", "a"}, "space1 value"}, + {Name{"space2", "b"}, "space2 value"}, + }}, + EndElement{Name{"", "foo"}}, + EndElement{Name{"", "foo"}}, + StartElement{Name{"", "foo"}, []Attr{ + {Name{"space1", "a"}, "space1 value"}, + {Name{"space2", "b"}, "space2 value"}, + }}, + }, + want: `<foo xmlns:_xmlns="xmlns" _xmlns:x="space1"><foo _xmlns:x="space2"><foo xmlns:space1="space1" space1:a="space1 value" xmlns:space2="space2" space2:b="space2 value"></foo></foo><foo xmlns:space1="space1" space1:a="space1 value" xmlns:space2="space2" space2:b="space2 value">`, +}, { + desc: "start element defining several prefixes for the same name space", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"xmlns", "a"}, "space"}, + {Name{"xmlns", "b"}, "space"}, + {Name{"space", "x"}, "value"}, + }}, + }, + want: `<foo xmlns="space" xmlns:_xmlns="xmlns" _xmlns:a="space" _xmlns:b="space" xmlns:space="space" space:x="value">`, +}, { + desc: "nested element redefines name space", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"xmlns", "x"}, "space"}, + }}, + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"xmlns", "y"}, "space"}, + {Name{"space", "a"}, "value"}, + }}, + }, + want: `<foo xmlns:_xmlns="xmlns" _xmlns:x="space"><foo xmlns="space" _xmlns:y="space" xmlns:space="space" space:a="value">`, +}, { + desc: "nested element creates alias for default name space", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + }}, + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"xmlns", "y"}, "space"}, + {Name{"space", "a"}, "value"}, + }}, + }, + want: `<foo xmlns="space" xmlns="space"><foo xmlns="space" xmlns:_xmlns="xmlns" _xmlns:y="space" xmlns:space="space" space:a="value">`, +}, { + desc: "nested element defines default name space with existing prefix", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"xmlns", "x"}, "space"}, + }}, + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + {Name{"space", "a"}, "value"}, + }}, + }, + want: `<foo xmlns:_xmlns="xmlns" _xmlns:x="space"><foo xmlns="space" xmlns="space" xmlns:space="space" space:a="value">`, +}, { + desc: "nested element uses empty attribute name space when default ns defined", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + }}, + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "attr"}, "value"}, + }}, + }, + want: `<foo xmlns="space" xmlns="space"><foo xmlns="space" attr="value">`, +}, { + desc: "redefine xmlns", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"foo", "xmlns"}, "space"}, + }}, + }, + want: `<foo xmlns:foo="foo" foo:xmlns="space">`, +}, { + desc: "xmlns with explicit name space #1", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"xml", "xmlns"}, "space"}, + }}, + }, + want: `<foo xmlns="space" xmlns:_xml="xml" _xml:xmlns="space">`, +}, { + desc: "xmlns with explicit name space #2", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{xmlURL, "xmlns"}, "space"}, + }}, + }, + want: `<foo xmlns="space" xml:xmlns="space">`, +}, { + desc: "empty name space declaration is ignored", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"xmlns", "foo"}, ""}, + }}, + }, + want: `<foo xmlns:_xmlns="xmlns" _xmlns:foo="">`, +}, { + desc: "attribute with no name is ignored", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"", ""}, "value"}, + }}, + }, + want: `<foo>`, +}, { + desc: "namespace URL with non-valid name", + toks: []Token{ + StartElement{Name{"/34", "foo"}, []Attr{ + {Name{"/34", "x"}, "value"}, + }}, + }, + want: `<foo xmlns="/34" xmlns:_="/34" _:x="value">`, +}, { + desc: "nested element resets default namespace to empty", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + }}, + StartElement{Name{"", "foo"}, []Attr{ + {Name{"", "xmlns"}, ""}, + {Name{"", "x"}, "value"}, + {Name{"space", "x"}, "value"}, + }}, + }, + want: `<foo xmlns="space" xmlns="space"><foo xmlns="" x="value" xmlns:space="space" space:x="value">`, +}, { + desc: "nested element requires empty default name space", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + }}, + StartElement{Name{"", "foo"}, nil}, + }, + want: `<foo xmlns="space" xmlns="space"><foo>`, +}, { + desc: "attribute uses name space from xmlns", + toks: []Token{ + StartElement{Name{"some/space", "foo"}, []Attr{ + {Name{"", "attr"}, "value"}, + {Name{"some/space", "other"}, "other value"}, + }}, + }, + want: `<foo xmlns="some/space" attr="value" xmlns:space="some/space" space:other="other value">`, +}, { + desc: "default name space should not be used by attributes", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + {Name{"xmlns", "bar"}, "space"}, + {Name{"space", "baz"}, "foo"}, + }}, + StartElement{Name{"space", "baz"}, nil}, + EndElement{Name{"space", "baz"}}, + EndElement{Name{"space", "foo"}}, + }, + want: `<foo xmlns="space" xmlns="space" xmlns:_xmlns="xmlns" _xmlns:bar="space" xmlns:space="space" space:baz="foo"><baz xmlns="space"></baz></foo>`, +}, { + desc: "default name space not used by attributes, not explicitly defined", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + {Name{"space", "baz"}, "foo"}, + }}, + StartElement{Name{"space", "baz"}, nil}, + EndElement{Name{"space", "baz"}}, + EndElement{Name{"space", "foo"}}, + }, + want: `<foo xmlns="space" xmlns="space" xmlns:space="space" space:baz="foo"><baz xmlns="space"></baz></foo>`, +}, { + desc: "impossible xmlns declaration", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + }}, + StartElement{Name{"space", "bar"}, []Attr{ + {Name{"space", "attr"}, "value"}, + }}, + }, + want: `<foo xmlns="space"><bar xmlns="space" xmlns:space="space" space:attr="value">`, +}} func TestEncodeToken(t *testing.T) { - for _, tt := range encodeTokenTests { +loop: + for i, tt := range encodeTokenTests { var buf bytes.Buffer enc := NewEncoder(&buf) - err := enc.EncodeToken(tt.tok) + var err error + for j, tok := range tt.toks { + err = enc.EncodeToken(tok) + if err != nil && j < len(tt.toks)-1 { + t.Errorf("#%d %s token #%d: %v", i, tt.desc, j, err) + continue loop + } + } + errorf := func(f string, a ...interface{}) { + t.Errorf("#%d %s token #%d:%s", i, tt.desc, len(tt.toks)-1, fmt.Sprintf(f, a...)) + } switch { - case !tt.ok && err == nil: - t.Errorf("enc.EncodeToken(%#v): expected error; got none", tt.tok) - case tt.ok && err != nil: - t.Fatalf("enc.EncodeToken: %v", err) - case !tt.ok && err != nil: - // expected error, got one + case tt.err != "" && err == nil: + errorf(" expected error; got none") + continue + case tt.err == "" && err != nil: + errorf(" got error: %v", err) + continue + case tt.err != "" && err != nil && tt.err != err.Error(): + errorf(" error mismatch; got %v, want %v", err, tt.err) + continue } if err := enc.Flush(); err != nil { - t.Fatalf("enc.EncodeToken: %v", err) + errorf(" %v", err) + continue } if got := buf.String(); got != tt.want { - t.Errorf("enc.EncodeToken = %s; want: %s", got, tt.want) + errorf("\ngot %v\nwant %v", got, tt.want) + continue } } } @@ -1264,3 +1684,83 @@ func TestDecodeEncode(t *testing.T) { } } } + +// Issue 9796. Used to fail with GORACE="halt_on_error=1" -race. +func TestRace9796(t *testing.T) { + type A struct{} + type B struct { + C []A `xml:"X>Y"` + } + var wg sync.WaitGroup + for i := 0; i < 2; i++ { + wg.Add(1) + go func() { + Marshal(B{[]A{A{}}}) + wg.Done() + }() + } + wg.Wait() +} + +func TestIsValidDirective(t *testing.T) { + testOK := []string{ + "<>", + "< < > >", + "<!DOCTYPE '<' '>' '>' <!--nothing-->>", + "<!DOCTYPE doc [ <!ELEMENT doc ANY> <!ELEMENT doc ANY> ]>", + "<!DOCTYPE doc [ <!ELEMENT doc \"ANY> '<' <!E\" LEMENT '>' doc ANY> ]>", + "<!DOCTYPE doc <!-- just>>>> a < comment --> [ <!ITEM anything> ] >", + } + testKO := []string{ + "<", + ">", + "<!--", + "-->", + "< > > < < >", + "<!dummy <!-- > -->", + "<!DOCTYPE doc '>", + "<!DOCTYPE doc '>'", + "<!DOCTYPE doc <!--comment>", + } + for _, s := range testOK { + if !isValidDirective(Directive(s)) { + t.Errorf("Directive %q is expected to be valid", s) + } + } + for _, s := range testKO { + if isValidDirective(Directive(s)) { + t.Errorf("Directive %q is expected to be invalid", s) + } + } +} + +// Issue 11719. EncodeToken used to silently eat tokens with an invalid type. +func TestSimpleUseOfEncodeToken(t *testing.T) { + var buf bytes.Buffer + enc := NewEncoder(&buf) + if err := enc.EncodeToken(&StartElement{Name: Name{"", "object1"}}); err == nil { + t.Errorf("enc.EncodeToken: pointer type should be rejected") + } + if err := enc.EncodeToken(&EndElement{Name: Name{"", "object1"}}); err == nil { + t.Errorf("enc.EncodeToken: pointer type should be rejected") + } + if err := enc.EncodeToken(StartElement{Name: Name{"", "object2"}}); err != nil { + t.Errorf("enc.EncodeToken: StartElement %s", err) + } + if err := enc.EncodeToken(EndElement{Name: Name{"", "object2"}}); err != nil { + t.Errorf("enc.EncodeToken: EndElement %s", err) + } + if err := enc.EncodeToken(Universe{}); err == nil { + t.Errorf("enc.EncodeToken: invalid type not caught") + } + if err := enc.Flush(); err != nil { + t.Errorf("enc.Flush: %s", err) + } + if buf.Len() == 0 { + t.Errorf("enc.EncodeToken: empty buffer") + } + want := "<object2></object2>" + if buf.String() != want { + t.Errorf("enc.EncodeToken: expected %q; got %q", want, buf.String()) + } +} diff --git a/libgo/go/encoding/xml/read_test.go b/libgo/go/encoding/xml/read_test.go index 01f55d0..7d004dc 100644 --- a/libgo/go/encoding/xml/read_test.go +++ b/libgo/go/encoding/xml/read_test.go @@ -694,7 +694,7 @@ type Pod struct { Pea interface{} `xml:"Pea"` } -// https://code.google.com/p/go/issues/detail?id=6836 +// https://golang.org/issue/6836 func TestUnmarshalIntoInterface(t *testing.T) { pod := new(Pod) pod.Pea = new(Pea) diff --git a/libgo/go/encoding/xml/xml.go b/libgo/go/encoding/xml/xml.go index 8c15b98..0a21c93 100644 --- a/libgo/go/encoding/xml/xml.go +++ b/libgo/go/encoding/xml/xml.go @@ -549,7 +549,6 @@ func (d *Decoder) rawToken() (Token, error) { case '?': // <?: Processing instruction. - // TODO(rsc): Should parse the <?xml declaration to make sure the version is 1.0. var target string if target, ok = d.name(); !ok { if d.err == nil { @@ -574,7 +573,13 @@ func (d *Decoder) rawToken() (Token, error) { data = data[0 : len(data)-2] // chop ?> if target == "xml" { - enc := procInstEncoding(string(data)) + content := string(data) + ver := procInst("version", content) + if ver != "" && ver != "1.0" { + d.err = fmt.Errorf("xml: unsupported version %q; only version 1.0 is supported", ver) + return nil, d.err + } + enc := procInst("encoding", content) if enc != "" && enc != "utf-8" && enc != "UTF-8" { if d.CharsetReader == nil { d.err = fmt.Errorf("xml: encoding %q declared but Decoder.CharsetReader is nil", enc) @@ -723,7 +728,7 @@ func (d *Decoder) rawToken() (Token, error) { return nil, d.err } - attr = make([]Attr, 0, 4) + attr = []Attr{} for { d.space() if b, ok = d.mustgetc(); !ok { @@ -747,7 +752,11 @@ func (d *Decoder) rawToken() (Token, error) { n := len(attr) if n >= cap(attr) { - nattr := make([]Attr, n, 2*cap(attr)) + nCap := 2 * cap(attr) + if nCap == 0 { + nCap = 4 + } + nattr := make([]Attr, n, nCap) copy(nattr, attr) attr = nattr } @@ -1119,12 +1128,12 @@ func (d *Decoder) name() (s string, ok bool) { } // Now we check the characters. - s = d.buf.String() - if !isName([]byte(s)) { - d.err = d.syntaxError("invalid XML name: " + s) + b := d.buf.Bytes() + if !isName(b) { + d.err = d.syntaxError("invalid XML name: " + string(b)) return "", false } - return s, true + return string(b), true } // Read a name and append its bytes to d.buf. @@ -1832,6 +1841,13 @@ var ( // EscapeText writes to w the properly escaped XML equivalent // of the plain text data s. func EscapeText(w io.Writer, s []byte) error { + return escapeText(w, s, true) +} + +// escapeText writes to w the properly escaped XML equivalent +// of the plain text data s. If escapeNewline is true, newline +// characters will be escaped. +func escapeText(w io.Writer, s []byte, escapeNewline bool) error { var esc []byte last := 0 for i := 0; i < len(s); { @@ -1851,6 +1867,9 @@ func EscapeText(w io.Writer, s []byte) error { case '\t': esc = esc_tab case '\n': + if !escapeNewline { + continue + } esc = esc_nl case '\r': esc = esc_cr @@ -1921,16 +1940,17 @@ func Escape(w io.Writer, s []byte) { EscapeText(w, s) } -// procInstEncoding parses the `encoding="..."` or `encoding='...'` +// procInst parses the `param="..."` or `param='...'` // value out of the provided string, returning "" if not found. -func procInstEncoding(s string) string { +func procInst(param, s string) string { // TODO: this parsing is somewhat lame and not exact. // It works for all actual cases, though. - idx := strings.Index(s, "encoding=") + param = param + "=" + idx := strings.Index(s, param) if idx == -1 { return "" } - v := s[idx+len("encoding="):] + v := s[idx+len(param):] if v == "" { return "" } diff --git a/libgo/go/encoding/xml/xml_test.go b/libgo/go/encoding/xml/xml_test.go index be995c0..312a7c9 100644 --- a/libgo/go/encoding/xml/xml_test.go +++ b/libgo/go/encoding/xml/xml_test.go @@ -657,20 +657,23 @@ type procInstEncodingTest struct { } var procInstTests = []struct { - input, expect string + input string + expect [2]string }{ - {`version="1.0" encoding="utf-8"`, "utf-8"}, - {`version="1.0" encoding='utf-8'`, "utf-8"}, - {`version="1.0" encoding='utf-8' `, "utf-8"}, - {`version="1.0" encoding=utf-8`, ""}, - {`encoding="FOO" `, "FOO"}, + {`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}}, + {`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}}, + {`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}}, + {`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}}, + {`encoding="FOO" `, [2]string{"", "FOO"}}, } func TestProcInstEncoding(t *testing.T) { for _, test := range procInstTests { - got := procInstEncoding(test.input) - if got != test.expect { - t.Errorf("procInstEncoding(%q) = %q; want %q", test.input, got, test.expect) + if got := procInst("version", test.input); got != test.expect[0] { + t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0]) + } + if got := procInst("encoding", test.input); got != test.expect[1] { + t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1]) } } } |