diff options
| author | Ian Lance Taylor <iant@golang.org> | 2018-01-09 01:23:08 +0000 |
|---|---|---|
| committer | Ian Lance Taylor <ian@gcc.gnu.org> | 2018-01-09 01:23:08 +0000 |
| commit | 1a2f01efa63036a5104f203a4789e682c0e0915d (patch) | |
| tree | 373e15778dc8295354584e1f86915ae493b604ff /libgo/go/encoding | |
| parent | 8799df67f2dab88f9fda11739c501780a85575e2 (diff) | |
| download | gcc-1a2f01efa63036a5104f203a4789e682c0e0915d.zip gcc-1a2f01efa63036a5104f203a4789e682c0e0915d.tar.gz gcc-1a2f01efa63036a5104f203a4789e682c0e0915d.tar.bz2 | |
libgo: update to Go1.10beta1
Update the Go library to the 1.10beta1 release.
Requires a few changes to the compiler for modifications to the map
runtime code, and to handle some nowritebarrier cases in the runtime.
Reviewed-on: https://go-review.googlesource.com/86455
gotools/:
* Makefile.am (go_cmd_vet_files): New variable.
(go_cmd_buildid_files, go_cmd_test2json_files): New variables.
(s-zdefaultcc): Change from constants to functions.
(noinst_PROGRAMS): Add vet, buildid, and test2json.
(cgo$(EXEEXT)): Link against $(LIBGOTOOL).
(vet$(EXEEXT)): New target.
(buildid$(EXEEXT)): New target.
(test2json$(EXEEXT)): New target.
(install-exec-local): Install all $(noinst_PROGRAMS).
(uninstall-local): Uninstasll all $(noinst_PROGRAMS).
(check-go-tool): Depend on $(noinst_PROGRAMS). Copy down
objabi.go.
(check-runtime): Depend on $(noinst_PROGRAMS).
(check-cgo-test, check-carchive-test): Likewise.
(check-vet): New target.
(check): Depend on check-vet. Look at cmd_vet-testlog.
(.PHONY): Add check-vet.
* Makefile.in: Rebuild.
From-SVN: r256365
Diffstat (limited to 'libgo/go/encoding')
36 files changed, 1930 insertions, 823 deletions
diff --git a/libgo/go/encoding/asn1/asn1.go b/libgo/go/encoding/asn1/asn1.go index b8e2770..26868a3 100644 --- a/libgo/go/encoding/asn1/asn1.go +++ b/libgo/go/encoding/asn1/asn1.go @@ -372,13 +372,32 @@ func parseGeneralizedTime(bytes []byte) (ret time.Time, err error) { return } +// NumericString + +// parseNumericString parses an ASN.1 NumericString from the given byte array +// and returns it. +func parseNumericString(bytes []byte) (ret string, err error) { + for _, b := range bytes { + if !isNumeric(b) { + return "", SyntaxError{"NumericString contains invalid character"} + } + } + return string(bytes), nil +} + +// isNumeric reports whether the given b is in the ASN.1 NumericString set. +func isNumeric(b byte) bool { + return '0' <= b && b <= '9' || + b == ' ' +} + // PrintableString -// parsePrintableString parses a ASN.1 PrintableString from the given byte +// parsePrintableString parses an ASN.1 PrintableString from the given byte // array and returns it. func parsePrintableString(bytes []byte) (ret string, err error) { for _, b := range bytes { - if !isPrintable(b) { + if !isPrintable(b, allowAsterisk, allowAmpersand) { err = SyntaxError{"PrintableString contains invalid character"} return } @@ -387,8 +406,21 @@ func parsePrintableString(bytes []byte) (ret string, err error) { return } +type asteriskFlag bool +type ampersandFlag bool + +const ( + allowAsterisk asteriskFlag = true + rejectAsterisk asteriskFlag = false + + allowAmpersand ampersandFlag = true + rejectAmpersand ampersandFlag = false +) + // isPrintable reports whether the given b is in the ASN.1 PrintableString set. -func isPrintable(b byte) bool { +// If asterisk is allowAsterisk then '*' is also allowed, reflecting existing +// practice. If ampersand is allowAmpersand then '&' is allowed as well. +func isPrintable(b byte, asterisk asteriskFlag, ampersand ampersandFlag) bool { return 'a' <= b && b <= 'z' || 'A' <= b && b <= 'Z' || '0' <= b && b <= '9' || @@ -401,12 +433,17 @@ func isPrintable(b byte) bool { // This is technically not allowed in a PrintableString. // However, x509 certificates with wildcard strings don't // always use the correct string type so we permit it. - b == '*' + (bool(asterisk) && b == '*') || + // This is not technically allowed either. However, not + // only is it relatively common, but there are also a + // handful of CA certificates that contain it. At least + // one of which will not expire until 2027. + (bool(ampersand) && b == '&') } // IA5String -// parseIA5String parses a ASN.1 IA5String (ASCII string) from the given +// parseIA5String parses an ASN.1 IA5String (ASCII string) from the given // byte slice and returns it. func parseIA5String(bytes []byte) (ret string, err error) { for _, b := range bytes { @@ -421,7 +458,7 @@ func parseIA5String(bytes []byte) (ret string, err error) { // T61String -// parseT61String parses a ASN.1 T61String (8-bit clean string) from the given +// parseT61String parses an ASN.1 T61String (8-bit clean string) from the given // byte slice and returns it. func parseT61String(bytes []byte) (ret string, err error) { return string(bytes), nil @@ -429,7 +466,7 @@ func parseT61String(bytes []byte) (ret string, err error) { // UTF8String -// parseUTF8String parses a ASN.1 UTF8String (raw UTF-8) from the given byte +// parseUTF8String parses an ASN.1 UTF8String (raw UTF-8) from the given byte // array and returns it. func parseUTF8String(bytes []byte) (ret string, err error) { if !utf8.Valid(bytes) { @@ -536,7 +573,7 @@ func parseTagAndLength(bytes []byte, initOffset int) (ret tagAndLength, offset i // a number of ASN.1 values from the given byte slice and returns them as a // slice of Go values of the given type. func parseSequenceOf(bytes []byte, sliceType reflect.Type, elemType reflect.Type) (ret reflect.Value, err error) { - expectedTag, compoundType, ok := getUniversalType(elemType) + matchAny, expectedTag, compoundType, ok := getUniversalType(elemType) if !ok { err = StructuralError{"unknown Go type for slice"} return @@ -552,7 +589,7 @@ func parseSequenceOf(bytes []byte, sliceType reflect.Type, elemType reflect.Type return } switch t.tag { - case TagIA5String, TagGeneralString, TagT61String, TagUTF8String: + case TagIA5String, TagGeneralString, TagT61String, TagUTF8String, TagNumericString: // We pretend that various other string types are // PRINTABLE STRINGs so that a sequence of them can be // parsed into a []string. @@ -562,7 +599,7 @@ func parseSequenceOf(bytes []byte, sliceType reflect.Type, elemType reflect.Type t.tag = TagUTCTime } - if t.class != ClassUniversal || t.isCompound != compoundType || t.tag != expectedTag { + if !matchAny && (t.class != ClassUniversal || t.isCompound != compoundType || t.tag != expectedTag) { err = StructuralError{"sequence tag mismatch"} return } @@ -617,23 +654,6 @@ func parseField(v reflect.Value, bytes []byte, initOffset int, params fieldParam return } - // Deal with raw values. - if fieldType == rawValueType { - var t tagAndLength - t, offset, err = parseTagAndLength(bytes, offset) - if err != nil { - return - } - if invalidLength(offset, t.length, len(bytes)) { - err = SyntaxError{"data truncated"} - return - } - result := RawValue{t.class, t.tag, t.isCompound, bytes[offset : offset+t.length], bytes[initOffset : offset+t.length]} - offset += t.length - v.Set(reflect.ValueOf(result)) - return - } - // Deal with the ANY type. if ifaceType := fieldType; ifaceType.Kind() == reflect.Interface && ifaceType.NumMethod() == 0 { var t tagAndLength @@ -651,6 +671,8 @@ func parseField(v reflect.Value, bytes []byte, initOffset int, params fieldParam switch t.tag { case TagPrintableString: result, err = parsePrintableString(innerBytes) + case TagNumericString: + result, err = parseNumericString(innerBytes) case TagIA5String: result, err = parseIA5String(innerBytes) case TagT61String: @@ -682,11 +704,6 @@ func parseField(v reflect.Value, bytes []byte, initOffset int, params fieldParam } return } - universalTag, compoundType, ok1 := getUniversalType(fieldType) - if !ok1 { - err = StructuralError{fmt.Sprintf("unknown Go type: %v", fieldType)} - return - } t, offset, err := parseTagAndLength(bytes, offset) if err != nil { @@ -702,7 +719,9 @@ func parseField(v reflect.Value, bytes []byte, initOffset int, params fieldParam return } if t.class == expectedClass && t.tag == *params.tag && (t.length == 0 || t.isCompound) { - if t.length > 0 { + if fieldType == rawValueType { + // The inner element should not be parsed for RawValues. + } else if t.length > 0 { t, offset, err = parseTagAndLength(bytes, offset) if err != nil { return @@ -727,6 +746,12 @@ func parseField(v reflect.Value, bytes []byte, initOffset int, params fieldParam } } + matchAny, universalTag, compoundType, ok1 := getUniversalType(fieldType) + if !ok1 { + err = StructuralError{fmt.Sprintf("unknown Go type: %v", fieldType)} + return + } + // Special case for strings: all the ASN.1 string types map to the Go // type string. getUniversalType returns the tag for PrintableString // when it sees a string, so if we see a different string type on the @@ -734,7 +759,7 @@ func parseField(v reflect.Value, bytes []byte, initOffset int, params fieldParam if universalTag == TagPrintableString { if t.class == ClassUniversal { switch t.tag { - case TagIA5String, TagGeneralString, TagT61String, TagUTF8String: + case TagIA5String, TagGeneralString, TagT61String, TagUTF8String, TagNumericString: universalTag = t.tag } } else if params.stringType != 0 { @@ -752,21 +777,25 @@ func parseField(v reflect.Value, bytes []byte, initOffset int, params fieldParam universalTag = TagSet } + matchAnyClassAndTag := matchAny expectedClass := ClassUniversal expectedTag := universalTag if !params.explicit && params.tag != nil { expectedClass = ClassContextSpecific expectedTag = *params.tag + matchAnyClassAndTag = false } if !params.explicit && params.application && params.tag != nil { expectedClass = ClassApplication expectedTag = *params.tag + matchAnyClassAndTag = false } // We have unwrapped any explicit tagging at this point. - if t.class != expectedClass || t.tag != expectedTag || t.isCompound != compoundType { + if !matchAnyClassAndTag && (t.class != expectedClass || t.tag != expectedTag) || + (!matchAny && t.isCompound != compoundType) { // Tags don't match. Again, it could be an optional element. ok := setDefaultValue(v, params) if ok { @@ -785,6 +814,10 @@ func parseField(v reflect.Value, bytes []byte, initOffset int, params fieldParam // We deal with the structures defined in this package first. switch fieldType { + case rawValueType: + result := RawValue{t.class, t.tag, t.isCompound, innerBytes, bytes[initOffset:offset]} + v.Set(reflect.ValueOf(result)) + return case objectIdentifierType: newSlice, err1 := parseObjectIdentifier(innerBytes) v.Set(reflect.MakeSlice(v.Type(), len(newSlice), len(newSlice))) @@ -904,6 +937,8 @@ func parseField(v reflect.Value, bytes []byte, initOffset int, params fieldParam switch universalTag { case TagPrintableString: v, err = parsePrintableString(innerBytes) + case TagNumericString: + v, err = parseNumericString(innerBytes) case TagIA5String: v, err = parseIA5String(innerBytes) case TagT61String: @@ -977,7 +1012,7 @@ func setDefaultValue(v reflect.Value, params fieldParameters) (ok bool) { // // An ASN.1 UTCTIME or GENERALIZEDTIME can be written to a time.Time. // -// An ASN.1 PrintableString or IA5String can be written to a string. +// An ASN.1 PrintableString, IA5String, or NumericString can be written to a string. // // Any of the above ASN.1 values can be written to an interface{}. // The value stored in the interface has the corresponding Go type. @@ -992,7 +1027,7 @@ func setDefaultValue(v reflect.Value, params fieldParameters) (ok bool) { // // The following tags on struct fields have special meaning to Unmarshal: // -// application specifies that a APPLICATION tag is used +// application specifies that an APPLICATION tag is used // default:x sets the default value for optional integer fields (only used if optional is also present) // explicit specifies that an additional, explicit tag wraps the implicit one // optional marks the field as ASN.1 OPTIONAL diff --git a/libgo/go/encoding/asn1/asn1_test.go b/libgo/go/encoding/asn1/asn1_test.go index c9eda40..5e67dc5 100644 --- a/libgo/go/encoding/asn1/asn1_test.go +++ b/libgo/go/encoding/asn1/asn1_test.go @@ -424,6 +424,7 @@ var parseFieldParametersTestData []parseFieldParametersTest = []parseFieldParame {"generalized", fieldParameters{timeType: TagGeneralizedTime}}, {"utc", fieldParameters{timeType: TagUTCTime}}, {"printable", fieldParameters{stringType: TagPrintableString}}, + {"numeric", fieldParameters{stringType: TagNumericString}}, {"optional", fieldParameters{optional: true}}, {"explicit", fieldParameters{explicit: true, tag: new(int)}}, {"application", fieldParameters{application: true, tag: new(int)}}, @@ -486,6 +487,8 @@ var unmarshalTestData = []struct { {[]byte{0x02, 0x01, 0x10}, newInt(16)}, {[]byte{0x13, 0x04, 't', 'e', 's', 't'}, newString("test")}, {[]byte{0x16, 0x04, 't', 'e', 's', 't'}, newString("test")}, + // Ampersand is allowed in PrintableString due to mistakes by major CAs. + {[]byte{0x13, 0x05, 't', 'e', 's', 't', '&'}, newString("test&")}, {[]byte{0x16, 0x04, 't', 'e', 's', 't'}, &RawValue{0, 22, false, []byte("test"), []byte("\x16\x04test")}}, {[]byte{0x04, 0x04, 1, 2, 3, 4}, &RawValue{0, 4, false, []byte{1, 2, 3, 4}, []byte{4, 4, 1, 2, 3, 4}}}, {[]byte{0x30, 0x03, 0x81, 0x01, 0x01}, &TestContextSpecificTags{1}}, @@ -496,6 +499,7 @@ var unmarshalTestData = []struct { {[]byte{0x30, 0x0b, 0x13, 0x03, 0x66, 0x6f, 0x6f, 0x02, 0x01, 0x22, 0x02, 0x01, 0x33}, &TestElementsAfterString{"foo", 0x22, 0x33}}, {[]byte{0x30, 0x05, 0x02, 0x03, 0x12, 0x34, 0x56}, &TestBigInt{big.NewInt(0x123456)}}, {[]byte{0x30, 0x0b, 0x31, 0x09, 0x02, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x01, 0x03}, &TestSet{Ints: []int{1, 2, 3}}}, + {[]byte{0x12, 0x0b, '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ' '}, newString("0123456789 ")}, } func TestUnmarshal(t *testing.T) { @@ -1015,7 +1019,7 @@ func TestNull(t *testing.T) { t.Fatal(err) } if !bytes.Equal(NullBytes, marshaled) { - t.Errorf("Expected Marshal of NullRawValue to yeild %x, got %x", NullBytes, marshaled) + t.Errorf("Expected Marshal of NullRawValue to yield %x, got %x", NullBytes, marshaled) } unmarshaled := RawValue{} @@ -1033,3 +1037,60 @@ func TestNull(t *testing.T) { t.Errorf("Expected Unmarshal of NullBytes to yield %v, got %v", NullRawValue, unmarshaled) } } + +func TestExplicitTagRawValueStruct(t *testing.T) { + type foo struct { + A RawValue `asn1:"optional,explicit,tag:5"` + B []byte `asn1:"optional,explicit,tag:6"` + } + before := foo{B: []byte{1, 2, 3}} + derBytes, err := Marshal(before) + if err != nil { + t.Fatal(err) + } + + var after foo + if rest, err := Unmarshal(derBytes, &after); err != nil || len(rest) != 0 { + t.Fatal(err) + } + + got := fmt.Sprintf("%#v", after) + want := fmt.Sprintf("%#v", before) + if got != want { + t.Errorf("got %s, want %s (DER: %x)", got, want, derBytes) + } +} + +func TestTaggedRawValue(t *testing.T) { + type taggedRawValue struct { + A RawValue `asn1:"tag:5"` + } + type untaggedRawValue struct { + A RawValue + } + const isCompound = 0x20 + const tag = 5 + + tests := []struct { + shouldMatch bool + derBytes []byte + }{ + {false, []byte{0x30, 3, TagInteger, 1, 1}}, + {true, []byte{0x30, 3, (ClassContextSpecific << 6) | tag, 1, 1}}, + {true, []byte{0x30, 3, (ClassContextSpecific << 6) | tag | isCompound, 1, 1}}, + {false, []byte{0x30, 3, (ClassApplication << 6) | tag | isCompound, 1, 1}}, + } + + for i, test := range tests { + var tagged taggedRawValue + if _, err := Unmarshal(test.derBytes, &tagged); (err == nil) != test.shouldMatch { + t.Errorf("#%d: unexpected result parsing %x: %s", i, test.derBytes, err) + } + + // An untagged RawValue should accept anything. + var untagged untaggedRawValue + if _, err := Unmarshal(test.derBytes, &untagged); err != nil { + t.Errorf("#%d: unexpected failure parsing %x with untagged RawValue: %s", i, test.derBytes, err) + } + } +} diff --git a/libgo/go/encoding/asn1/common.go b/libgo/go/encoding/asn1/common.go index cd93b27..a6589a5 100644 --- a/libgo/go/encoding/asn1/common.go +++ b/libgo/go/encoding/asn1/common.go @@ -30,6 +30,7 @@ const ( TagUTF8String = 12 TagSequence = 16 TagSet = 17 + TagNumericString = 18 TagPrintableString = 19 TagT61String = 20 TagIA5String = 22 @@ -106,6 +107,8 @@ func parseFieldParameters(str string) (ret fieldParameters) { ret.stringType = TagIA5String case part == "printable": ret.stringType = TagPrintableString + case part == "numeric": + ret.stringType = TagNumericString case part == "utf8": ret.stringType = TagUTF8String case strings.HasPrefix(part, "default:"): @@ -136,36 +139,38 @@ func parseFieldParameters(str string) (ret fieldParameters) { // Given a reflected Go type, getUniversalType returns the default tag number // and expected compound flag. -func getUniversalType(t reflect.Type) (tagNumber int, isCompound, ok bool) { +func getUniversalType(t reflect.Type) (matchAny bool, tagNumber int, isCompound, ok bool) { switch t { + case rawValueType: + return true, -1, false, true case objectIdentifierType: - return TagOID, false, true + return false, TagOID, false, true case bitStringType: - return TagBitString, false, true + return false, TagBitString, false, true case timeType: - return TagUTCTime, false, true + return false, TagUTCTime, false, true case enumeratedType: - return TagEnum, false, true + return false, TagEnum, false, true case bigIntType: - return TagInteger, false, true + return false, TagInteger, false, true } switch t.Kind() { case reflect.Bool: - return TagBoolean, false, true + return false, TagBoolean, false, true case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - return TagInteger, false, true + return false, TagInteger, false, true case reflect.Struct: - return TagSequence, true, true + return false, TagSequence, true, true case reflect.Slice: if t.Elem().Kind() == reflect.Uint8 { - return TagOctetString, false, true + return false, TagOctetString, false, true } if strings.HasSuffix(t.Name(), "SET") { - return TagSet, true, true + return false, TagSet, true, true } - return TagSequence, true, true + return false, TagSequence, true, true case reflect.String: - return TagPrintableString, false, true + return false, TagPrintableString, false, true } - return 0, false, false + return false, 0, false, false } diff --git a/libgo/go/encoding/asn1/marshal.go b/libgo/go/encoding/asn1/marshal.go index fdadb39..3e85651 100644 --- a/libgo/go/encoding/asn1/marshal.go +++ b/libgo/go/encoding/asn1/marshal.go @@ -18,7 +18,7 @@ var ( byteFFEncoder encoder = byteEncoder(0xff) ) -// encoder represents a ASN.1 element that is waiting to be marshaled. +// encoder represents an ASN.1 element that is waiting to be marshaled. type encoder interface { // Len returns the number of bytes needed to marshal this element. Len() int @@ -268,7 +268,13 @@ func makeObjectIdentifier(oid []int) (e encoder, err error) { func makePrintableString(s string) (e encoder, err error) { for i := 0; i < len(s); i++ { - if !isPrintable(s[i]) { + // The asterisk is often used in PrintableString, even though + // it is invalid. If a PrintableString was specifically + // requested then the asterisk is permitted by this code. + // Ampersand is allowed in parsing due a handful of CA + // certificates, however when making new certificates + // it is rejected. + if !isPrintable(s[i], allowAsterisk, rejectAmpersand) { return nil, StructuralError{"PrintableString contains invalid character"} } } @@ -286,6 +292,16 @@ func makeIA5String(s string) (e encoder, err error) { return stringEncoder(s), nil } +func makeNumericString(s string) (e encoder, err error) { + for i := 0; i < len(s); i++ { + if !isNumeric(s[i]) { + return nil, StructuralError{"NumericString contains invalid character"} + } + } + + return stringEncoder(s), nil +} + func makeUTF8String(s string) encoder { return stringEncoder(s) } @@ -503,6 +519,8 @@ func makeBody(value reflect.Value, params fieldParameters) (e encoder, err error return makeIA5String(v.String()) case TagPrintableString: return makePrintableString(v.String()) + case TagNumericString: + return makeNumericString(v.String()) default: return makeUTF8String(v.String()), nil } @@ -556,11 +574,10 @@ func makeField(v reflect.Value, params fieldParameters) (e encoder, err error) { return t, nil } - tag, isCompound, ok := getUniversalType(v.Type()) - if !ok { + matchAny, tag, isCompound, ok := getUniversalType(v.Type()) + if !ok || matchAny { return nil, StructuralError{fmt.Sprintf("unknown Go type: %v", v.Type())} } - class := ClassUniversal if params.timeType != 0 && tag != TagUTCTime { return nil, StructuralError{"explicit time type given to non-time member"} @@ -577,7 +594,7 @@ func makeField(v reflect.Value, params fieldParameters) (e encoder, err error) { // a PrintableString if the character set in the string is // sufficiently limited, otherwise we'll use a UTF8String. for _, r := range v.String() { - if r >= utf8.RuneSelf || !isPrintable(byte(r)) { + if r >= utf8.RuneSelf || !isPrintable(byte(r), rejectAsterisk, rejectAmpersand) { if !utf8.ValidString(v.String()) { return nil, errors.New("asn1: string not valid UTF-8") } @@ -610,27 +627,33 @@ func makeField(v reflect.Value, params fieldParameters) (e encoder, err error) { bodyLen := t.body.Len() - if params.explicit { - t.tag = bytesEncoder(appendTagAndLength(t.scratch[:0], tagAndLength{class, tag, bodyLen, isCompound})) + class := ClassUniversal + if params.tag != nil { + if params.application { + class = ClassApplication + } else { + class = ClassContextSpecific + } + + if params.explicit { + t.tag = bytesEncoder(appendTagAndLength(t.scratch[:0], tagAndLength{ClassUniversal, tag, bodyLen, isCompound})) - tt := new(taggedEncoder) + tt := new(taggedEncoder) - tt.body = t + tt.body = t - tt.tag = bytesEncoder(appendTagAndLength(tt.scratch[:0], tagAndLength{ - class: ClassContextSpecific, - tag: *params.tag, - length: bodyLen + t.tag.Len(), - isCompound: true, - })) + tt.tag = bytesEncoder(appendTagAndLength(tt.scratch[:0], tagAndLength{ + class: class, + tag: *params.tag, + length: bodyLen + t.tag.Len(), + isCompound: true, + })) - return tt, nil - } + return tt, nil + } - if params.tag != nil { // implicit tag. tag = *params.tag - class = ClassContextSpecific } t.tag = bytesEncoder(appendTagAndLength(t.scratch[:0], tagAndLength{class, tag, bodyLen, isCompound})) @@ -650,7 +673,13 @@ func makeField(v reflect.Value, params fieldParameters) (e encoder, err error) { // utc: causes time.Time to be marshaled as ASN.1, UTCTime values // generalized: causes time.Time to be marshaled as ASN.1, GeneralizedTime values func Marshal(val interface{}) ([]byte, error) { - e, err := makeField(reflect.ValueOf(val), fieldParameters{}) + return MarshalWithParams(val, "") +} + +// MarshalWithParams allows field parameters to be specified for the +// top-level element. The form of the params is the same as the field tags. +func MarshalWithParams(val interface{}, params string) ([]byte, error) { + e, err := makeField(reflect.ValueOf(val), parseFieldParameters(params)) if err != nil { return nil, err } diff --git a/libgo/go/encoding/asn1/marshal_test.go b/libgo/go/encoding/asn1/marshal_test.go index 10db1aa..4f755a1 100644 --- a/libgo/go/encoding/asn1/marshal_test.go +++ b/libgo/go/encoding/asn1/marshal_test.go @@ -59,6 +59,10 @@ type printableStringTest struct { A string `asn1:"printable"` } +type genericStringTest struct { + A string +} + type optionalRawValueTest struct { A RawValue `asn1:"optional"` } @@ -71,6 +75,15 @@ type defaultTest struct { A int `asn1:"optional,default:1"` } +type applicationTest struct { + A int `asn1:"application,tag:0"` + B int `asn1:"application,tag:1,explicit"` +} + +type numericStringTest struct { + A string `asn1:"numeric"` +} + type testSET []int var PST = time.FixedZone("PST", -8*60*60) @@ -142,6 +155,9 @@ var marshalTests = []marshalTest{ {optionalRawValueTest{}, "3000"}, {printableStringTest{"test"}, "3006130474657374"}, {printableStringTest{"test*"}, "30071305746573742a"}, + {genericStringTest{"test"}, "3006130474657374"}, + {genericStringTest{"test*"}, "30070c05746573742a"}, + {genericStringTest{"test&"}, "30070c057465737426"}, {rawContentsStruct{nil, 64}, "3003020140"}, {rawContentsStruct{[]byte{0x30, 3, 1, 2, 3}, 64}, "3003010203"}, {RawValue{Tag: 1, Class: 2, IsCompound: false, Bytes: []byte{1, 2, 3}}, "8103010203"}, @@ -152,6 +168,8 @@ var marshalTests = []marshalTest{ {defaultTest{0}, "3003020100"}, {defaultTest{1}, "3000"}, {defaultTest{2}, "3003020102"}, + {applicationTest{1, 2}, "30084001016103020102"}, + {numericStringTest{"1 9"}, "30051203312039"}, } func TestMarshal(t *testing.T) { @@ -168,6 +186,31 @@ func TestMarshal(t *testing.T) { } } +type marshalWithParamsTest struct { + in interface{} + params string + out string // hex encoded +} + +var marshalWithParamsTests = []marshalWithParamsTest{ + {intStruct{10}, "set", "310302010a"}, + {intStruct{10}, "application", "600302010a"}, +} + +func TestMarshalWithParams(t *testing.T) { + for i, test := range marshalWithParamsTests { + data, err := MarshalWithParams(test.in, test.params) + if err != nil { + t.Errorf("#%d failed: %s", i, err) + } + out, _ := hex.DecodeString(test.out) + if !bytes.Equal(out, data) { + t.Errorf("#%d got: %x want %x\n\t%q\n\t%q", i, data, out, data, out) + + } + } +} + type marshalErrTest struct { in interface{} err string @@ -175,6 +218,9 @@ type marshalErrTest struct { var marshalErrTests = []marshalErrTest{ {bigIntStruct{nil}, "empty integer"}, + {numericStringTest{"a"}, "invalid character"}, + {ia5StringTest{"\xb0"}, "invalid character"}, + {printableStringTest{"!"}, "invalid character"}, } func TestMarshalError(t *testing.T) { diff --git a/libgo/go/encoding/base32/base32.go b/libgo/go/encoding/base32/base32.go index bf341b5..e72ba74 100644 --- a/libgo/go/encoding/base32/base32.go +++ b/libgo/go/encoding/base32/base32.go @@ -130,8 +130,19 @@ func (enc *Encoding) Encode(dst, src []byte) { } // Encode 5-bit blocks using the base32 alphabet - for i := 0; i < 8; i++ { - if len(dst) > i { + size := len(dst) + if size >= 8 { + // Common case, unrolled for extra performance + dst[0] = enc.encode[b[0]] + dst[1] = enc.encode[b[1]] + dst[2] = enc.encode[b[2]] + dst[3] = enc.encode[b[3]] + dst[4] = enc.encode[b[4]] + dst[5] = enc.encode[b[5]] + dst[6] = enc.encode[b[6]] + dst[7] = enc.encode[b[7]] + } else { + for i := 0; i < size; i++ { dst[i] = enc.encode[b[i]] } } diff --git a/libgo/go/encoding/base64/base64.go b/libgo/go/encoding/base64/base64.go index b208f9e..9a99370 100644 --- a/libgo/go/encoding/base64/base64.go +++ b/libgo/go/encoding/base64/base64.go @@ -6,6 +6,7 @@ package base64 import ( + "encoding/binary" "io" "strconv" ) @@ -269,121 +270,110 @@ func (e CorruptInputError) Error() string { return "illegal base64 data at input byte " + strconv.FormatInt(int64(e), 10) } -// decode is like Decode but returns an additional 'end' value, which -// indicates if end-of-message padding or a partial quantum was encountered -// and thus any additional data is an error. -func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) { - si := 0 - - for si < len(src) && !end { - // Decode quantum using the base64 alphabet - var dbuf [4]byte - dinc, dlen := 3, 4 - - for j := 0; j < len(dbuf); j++ { - if len(src) == si { - switch { - case j == 0: - return n, false, nil - case j == 1, enc.padChar != NoPadding: - return n, false, CorruptInputError(si - j) - } - dinc, dlen, end = j-1, j, true - break +// decodeQuantum decodes up to 4 base64 bytes. It takes for parameters +// the destination buffer dst, the source buffer src and an index in the +// source buffer si. +// It returns the number of bytes read from src, the number of bytes written +// to dst, and an error, if any. +func (enc *Encoding) decodeQuantum(dst, src []byte, si int) (nsi, n int, err error) { + // Decode quantum using the base64 alphabet + var dbuf [4]byte + dinc, dlen := 3, 4 + + for j := 0; j < len(dbuf); j++ { + if len(src) == si { + switch { + case j == 0: + return si, 0, nil + case j == 1, enc.padChar != NoPadding: + return si, 0, CorruptInputError(si - j) } - in := src[si] + dinc, dlen = j-1, j + break + } + in := src[si] + si++ - si++ + out := enc.decodeMap[in] + if out != 0xff { + dbuf[j] = out + continue + } - out := enc.decodeMap[in] - if out != 0xFF { - dbuf[j] = out - continue - } + if in == '\n' || in == '\r' { + j-- + continue + } - if in == '\n' || in == '\r' { - j-- - continue - } - if rune(in) == enc.padChar { - // We've reached the end and there's padding - switch j { - case 0, 1: - // incorrect padding - return n, false, CorruptInputError(si - 1) - case 2: - // "==" is expected, the first "=" is already consumed. - // skip over newlines - for si < len(src) && (src[si] == '\n' || src[si] == '\r') { - si++ - } - if si == len(src) { - // not enough padding - return n, false, CorruptInputError(len(src)) - } - if rune(src[si]) != enc.padChar { - // incorrect padding - return n, false, CorruptInputError(si - 1) - } - - si++ - } - // skip over newlines - for si < len(src) && (src[si] == '\n' || src[si] == '\r') { - si++ - } - if si < len(src) { - // trailing garbage - err = CorruptInputError(si) - } - dinc, dlen, end = 3, j, true - break - } - return n, false, CorruptInputError(si - 1) + if rune(in) != enc.padChar { + return si, 0, CorruptInputError(si - 1) } - // Convert 4x 6bit source bytes into 3 bytes - val := uint(dbuf[0])<<18 | uint(dbuf[1])<<12 | uint(dbuf[2])<<6 | uint(dbuf[3]) - dbuf[2], dbuf[1], dbuf[0] = byte(val>>0), byte(val>>8), byte(val>>16) - switch dlen { - case 4: - dst[2] = dbuf[2] - dbuf[2] = 0 - fallthrough - case 3: - dst[1] = dbuf[1] - if enc.strict && dbuf[2] != 0 { - return n, end, CorruptInputError(si - 1) - } - dbuf[1] = 0 - fallthrough + // We've reached the end and there's padding + switch j { + case 0, 1: + // incorrect padding + return si, 0, CorruptInputError(si - 1) case 2: - dst[0] = dbuf[0] - if enc.strict && (dbuf[1] != 0 || dbuf[2] != 0) { - return n, end, CorruptInputError(si - 2) + // "==" is expected, the first "=" is already consumed. + // skip over newlines + for si < len(src) && (src[si] == '\n' || src[si] == '\r') { + si++ + } + if si == len(src) { + // not enough padding + return si, 0, CorruptInputError(len(src)) } + if rune(src[si]) != enc.padChar { + // incorrect padding + return si, 0, CorruptInputError(si - 1) + } + + si++ + } + + // skip over newlines + for si < len(src) && (src[si] == '\n' || src[si] == '\r') { + si++ + } + if si < len(src) { + // trailing garbage + err = CorruptInputError(si) } - dst = dst[dinc:] - n += dlen - 1 + dinc, dlen = 3, j + break } - return n, end, err -} + // Convert 4x 6bit source bytes into 3 bytes + val := uint(dbuf[0])<<18 | uint(dbuf[1])<<12 | uint(dbuf[2])<<6 | uint(dbuf[3]) + dbuf[2], dbuf[1], dbuf[0] = byte(val>>0), byte(val>>8), byte(val>>16) + switch dlen { + case 4: + dst[2] = dbuf[2] + dbuf[2] = 0 + fallthrough + case 3: + dst[1] = dbuf[1] + if enc.strict && dbuf[2] != 0 { + return si, 0, CorruptInputError(si - 1) + } + dbuf[1] = 0 + fallthrough + case 2: + dst[0] = dbuf[0] + if enc.strict && (dbuf[1] != 0 || dbuf[2] != 0) { + return si, 0, CorruptInputError(si - 2) + } + } + dst = dst[dinc:] -// Decode decodes src using the encoding enc. It writes at most -// DecodedLen(len(src)) bytes to dst and returns the number of bytes -// written. If src contains invalid base64 data, it will return the -// number of bytes successfully written and CorruptInputError. -// New line characters (\r and \n) are ignored. -func (enc *Encoding) Decode(dst, src []byte) (n int, err error) { - n, _, err = enc.decode(dst, src) - return + return si, dlen - 1, err } // DecodeString returns the bytes represented by the base64 string s. func (enc *Encoding) DecodeString(s string) ([]byte, error) { dbuf := make([]byte, enc.DecodedLen(len(s))) - n, _, err := enc.decode(dbuf, []byte(s)) + n, err := enc.Decode(dbuf, []byte(s)) return dbuf[:n], err } @@ -392,7 +382,6 @@ type decoder struct { readErr error // error from r.Read enc *Encoding r io.Reader - end bool // saw end of message buf [1024]byte // leftover input nbuf int out []byte // leftover decoded output @@ -430,9 +419,8 @@ func (d *decoder) Read(p []byte) (n int, err error) { if d.enc.padChar == NoPadding && d.nbuf > 0 { // Decode final fragment, without padding. var nw int - nw, _, d.err = d.enc.decode(d.outbuf[:], d.buf[:d.nbuf]) + nw, d.err = d.enc.Decode(d.outbuf[:], d.buf[:d.nbuf]) d.nbuf = 0 - d.end = true d.out = d.outbuf[:nw] n = copy(p, d.out) d.out = d.out[n:] @@ -454,18 +442,138 @@ func (d *decoder) Read(p []byte) (n int, err error) { nr := d.nbuf / 4 * 4 nw := d.nbuf / 4 * 3 if nw > len(p) { - nw, d.end, d.err = d.enc.decode(d.outbuf[:], d.buf[:nr]) + nw, d.err = d.enc.Decode(d.outbuf[:], d.buf[:nr]) d.out = d.outbuf[:nw] n = copy(p, d.out) d.out = d.out[n:] } else { - n, d.end, d.err = d.enc.decode(p, d.buf[:nr]) + n, d.err = d.enc.Decode(p, d.buf[:nr]) } d.nbuf -= nr copy(d.buf[:d.nbuf], d.buf[nr:]) return n, d.err } +// Decode decodes src using the encoding enc. It writes at most +// DecodedLen(len(src)) bytes to dst and returns the number of bytes +// written. If src contains invalid base64 data, it will return the +// number of bytes successfully written and CorruptInputError. +// New line characters (\r and \n) are ignored. +func (enc *Encoding) Decode(dst, src []byte) (n int, err error) { + if len(src) == 0 { + return 0, nil + } + + si := 0 + ilen := len(src) + olen := len(dst) + for strconv.IntSize >= 64 && ilen-si >= 8 && olen-n >= 8 { + if ok := enc.decode64(dst[n:], src[si:]); ok { + n += 6 + si += 8 + } else { + var ninc int + si, ninc, err = enc.decodeQuantum(dst[n:], src, si) + n += ninc + if err != nil { + return n, err + } + } + } + + for ilen-si >= 4 && olen-n >= 4 { + if ok := enc.decode32(dst[n:], src[si:]); ok { + n += 3 + si += 4 + } else { + var ninc int + si, ninc, err = enc.decodeQuantum(dst[n:], src, si) + n += ninc + if err != nil { + return n, err + } + } + } + + for si < len(src) { + var ninc int + si, ninc, err = enc.decodeQuantum(dst[n:], src, si) + n += ninc + if err != nil { + return n, err + } + } + return n, err +} + +// decode32 tries to decode 4 base64 char into 3 bytes. +// len(dst) and len(src) must both be >= 4. +// Returns true if decode succeeded. +func (enc *Encoding) decode32(dst, src []byte) bool { + var dn, n uint32 + if n = uint32(enc.decodeMap[src[0]]); n == 0xff { + return false + } + dn |= n << 26 + if n = uint32(enc.decodeMap[src[1]]); n == 0xff { + return false + } + dn |= n << 20 + if n = uint32(enc.decodeMap[src[2]]); n == 0xff { + return false + } + dn |= n << 14 + if n = uint32(enc.decodeMap[src[3]]); n == 0xff { + return false + } + dn |= n << 8 + + binary.BigEndian.PutUint32(dst, dn) + return true +} + +// decode64 tries to decode 8 base64 char into 6 bytes. +// len(dst) and len(src) must both be >= 8. +// Returns true if decode succeeded. +func (enc *Encoding) decode64(dst, src []byte) bool { + var dn, n uint64 + if n = uint64(enc.decodeMap[src[0]]); n == 0xff { + return false + } + dn |= n << 58 + if n = uint64(enc.decodeMap[src[1]]); n == 0xff { + return false + } + dn |= n << 52 + if n = uint64(enc.decodeMap[src[2]]); n == 0xff { + return false + } + dn |= n << 46 + if n = uint64(enc.decodeMap[src[3]]); n == 0xff { + return false + } + dn |= n << 40 + if n = uint64(enc.decodeMap[src[4]]); n == 0xff { + return false + } + dn |= n << 34 + if n = uint64(enc.decodeMap[src[5]]); n == 0xff { + return false + } + dn |= n << 28 + if n = uint64(enc.decodeMap[src[6]]); n == 0xff { + return false + } + dn |= n << 22 + if n = uint64(enc.decodeMap[src[7]]); n == 0xff { + return false + } + dn |= n << 16 + + binary.BigEndian.PutUint64(dst, dn) + return true +} + type newlineFilteringReader struct { wrapped io.Reader } diff --git a/libgo/go/encoding/base64/base64_test.go b/libgo/go/encoding/base64/base64_test.go index 05011fb..9f5c493 100644 --- a/libgo/go/encoding/base64/base64_test.go +++ b/libgo/go/encoding/base64/base64_test.go @@ -152,12 +152,9 @@ func TestDecode(t *testing.T) { for _, tt := range encodingTests { encoded := tt.conv(p.encoded) dbuf := make([]byte, tt.enc.DecodedLen(len(encoded))) - count, end, err := tt.enc.decode(dbuf, []byte(encoded)) + count, err := tt.enc.Decode(dbuf, []byte(encoded)) testEqual(t, "Decode(%q) = error %v, want %v", encoded, err, error(nil)) testEqual(t, "Decode(%q) = length %v, want %v", encoded, count, len(p.decoded)) - if len(encoded) > 0 { - testEqual(t, "Decode(%q) = end %v, want %v", encoded, end, len(p.decoded)%3 != 0) - } testEqual(t, "Decode(%q) = %q, want %q", encoded, string(dbuf[0:count]), p.decoded) dbuf, err = tt.enc.DecodeString(encoded) diff --git a/libgo/go/encoding/binary/binary_test.go b/libgo/go/encoding/binary/binary_test.go index 0547bee..af40257 100644 --- a/libgo/go/encoding/binary/binary_test.go +++ b/libgo/go/encoding/binary/binary_test.go @@ -109,6 +109,7 @@ var little = []byte{ var src = []byte{1, 2, 3, 4, 5, 6, 7, 8} var res = []int32{0x01020304, 0x05060708} +var putbuf = []byte{0, 0, 0, 0, 0, 0, 0, 0} func checkResult(t *testing.T, dir string, order ByteOrder, err error, have, want interface{}) { if err != nil { @@ -502,25 +503,42 @@ func BenchmarkWriteSlice1000Int32s(b *testing.B) { } func BenchmarkPutUint16(b *testing.B) { - buf := [2]byte{} b.SetBytes(2) for i := 0; i < b.N; i++ { - BigEndian.PutUint16(buf[:], uint16(i)) + BigEndian.PutUint16(putbuf[:], uint16(i)) } } func BenchmarkPutUint32(b *testing.B) { - buf := [4]byte{} b.SetBytes(4) for i := 0; i < b.N; i++ { - BigEndian.PutUint32(buf[:], uint32(i)) + BigEndian.PutUint32(putbuf[:], uint32(i)) } } func BenchmarkPutUint64(b *testing.B) { - buf := [8]byte{} b.SetBytes(8) for i := 0; i < b.N; i++ { - BigEndian.PutUint64(buf[:], uint64(i)) + BigEndian.PutUint64(putbuf[:], uint64(i)) + } +} +func BenchmarkLittleEndianPutUint16(b *testing.B) { + b.SetBytes(2) + for i := 0; i < b.N; i++ { + LittleEndian.PutUint16(putbuf[:], uint16(i)) + } +} + +func BenchmarkLittleEndianPutUint32(b *testing.B) { + b.SetBytes(4) + for i := 0; i < b.N; i++ { + LittleEndian.PutUint32(putbuf[:], uint32(i)) + } +} + +func BenchmarkLittleEndianPutUint64(b *testing.B) { + b.SetBytes(8) + for i := 0; i < b.N; i++ { + LittleEndian.PutUint64(putbuf[:], uint64(i)) } } diff --git a/libgo/go/encoding/csv/reader.go b/libgo/go/encoding/csv/reader.go index a3497c8..2efc7ad 100644 --- a/libgo/go/encoding/csv/reader.go +++ b/libgo/go/encoding/csv/reader.go @@ -58,44 +58,67 @@ import ( "fmt" "io" "unicode" + "unicode/utf8" ) // A ParseError is returned for parsing errors. -// The first line is 1. The first column is 0. +// Line numbers are 1-indexed and columns are 0-indexed. type ParseError struct { - Line int // Line where the error occurred - Column int // Column (rune index) where the error occurred - Err error // The actual error + StartLine int // Line where the record starts + Line int // Line where the error occurred + Column int // Column (rune index) where the error occurred + Err error // The actual error } func (e *ParseError) Error() string { - return fmt.Sprintf("line %d, column %d: %s", e.Line, e.Column, e.Err) + if e.Err == ErrFieldCount { + return fmt.Sprintf("record on line %d: %v", e.Line, e.Err) + } + if e.StartLine != e.Line { + return fmt.Sprintf("record on line %d; parse error on line %d, column %d: %v", e.StartLine, e.Line, e.Column, e.Err) + } + return fmt.Sprintf("parse error on line %d, column %d: %v", e.Line, e.Column, e.Err) } -// These are the errors that can be returned in ParseError.Error +// These are the errors that can be returned in ParseError.Err. var ( - ErrTrailingComma = errors.New("extra delimiter at end of line") // no longer used + ErrTrailingComma = errors.New("extra delimiter at end of line") // Deprecated: No longer used. ErrBareQuote = errors.New("bare \" in non-quoted-field") - ErrQuote = errors.New("extraneous \" in field") - ErrFieldCount = errors.New("wrong number of fields in line") + ErrQuote = errors.New("extraneous or missing \" in quoted-field") + ErrFieldCount = errors.New("wrong number of fields") ) +var errInvalidDelim = errors.New("csv: invalid field or comment delimiter") + +func validDelim(r rune) bool { + return r != 0 && r != '\r' && r != '\n' && utf8.ValidRune(r) && r != utf8.RuneError +} + // A Reader reads records from a CSV-encoded file. // // As returned by NewReader, a Reader expects input conforming to RFC 4180. // The exported fields can be changed to customize the details before the // first call to Read or ReadAll. // -// +// The Reader converts all \r\n sequences in its input to plain \n, +// including in multiline field values, so that the returned data does +// not depend on which line-ending convention an input file uses. type Reader struct { // Comma is the field delimiter. // It is set to comma (',') by NewReader. + // Comma must be a valid rune and must not be \r, \n, + // or the Unicode replacement character (0xFFFD). Comma rune + // Comment, if not 0, is the comment character. Lines beginning with the // Comment character without preceding whitespace are ignored. // With leading whitespace the Comment character becomes part of the // field, even if TrimLeadingSpace is true. + // Comment must be a valid rune and must not be \r, \n, + // or the Unicode replacement character (0xFFFD). + // It must also not be equal to Comma. Comment rune + // FieldsPerRecord is the number of expected fields per record. // If FieldsPerRecord is positive, Read requires each record to // have the given number of fields. If FieldsPerRecord is 0, Read sets it to @@ -103,31 +126,41 @@ type Reader struct { // have the same field count. If FieldsPerRecord is negative, no check is // made and records may have a variable number of fields. FieldsPerRecord int + // If LazyQuotes is true, a quote may appear in an unquoted field and a // non-doubled quote may appear in a quoted field. - LazyQuotes bool - TrailingComma bool // ignored; here for backwards compatibility + LazyQuotes bool + // If TrimLeadingSpace is true, leading white space in a field is ignored. // This is done even if the field delimiter, Comma, is white space. TrimLeadingSpace bool + // ReuseRecord controls whether calls to Read may return a slice sharing // the backing array of the previous call's returned slice for performance. // By default, each call to Read returns newly allocated memory owned by the caller. ReuseRecord bool - line int - column int - r *bufio.Reader - // lineBuffer holds the unescaped fields read by readField, one after another. + TrailingComma bool // Deprecated: No longer used. + + r *bufio.Reader + + // numLine is the current line being read in the CSV file. + numLine int + + // rawBuffer is a line buffer only used by the readLine method. + rawBuffer []byte + + // recordBuffer holds the unescaped fields, one after another. // The fields can be accessed by using the indexes in fieldIndexes. - // Example: for the row `a,"b","c""d",e` lineBuffer will contain `abc"de` and - // fieldIndexes will contain the indexes 0, 1, 2, 5. - lineBuffer bytes.Buffer - // Indexes of fields inside lineBuffer - // The i'th field starts at offset fieldIndexes[i] in lineBuffer. + // E.g., For the row `a,"b","c""d",e`, recordBuffer will contain `abc"de` + // and fieldIndexes will contain the indexes [1, 2, 5, 6]. + recordBuffer []byte + + // fieldIndexes is an index of fields inside recordBuffer. + // The i'th field ends at offset fieldIndexes[i] in recordBuffer. fieldIndexes []int - // only used when ReuseRecord == true + // lastRecord is a record cache and only used when ReuseRecord == true. lastRecord []string } @@ -139,15 +172,6 @@ func NewReader(r io.Reader) *Reader { } } -// error creates a new ParseError based on err. -func (r *Reader) error(err error) error { - return &ParseError{ - Line: r.line, - Column: r.column, - Err: err, - } -} - // Read reads one record (a slice of fields) from r. // If the record has an unexpected number of fields, // Read returns the record along with the error ErrFieldCount. @@ -163,7 +187,6 @@ func (r *Reader) Read() (record []string, err error) { } else { record, err = r.readRecord(nil) } - return record, err } @@ -185,226 +208,192 @@ func (r *Reader) ReadAll() (records [][]string, err error) { } } -// readRecord reads and parses a single csv record from r. -// Unlike parseRecord, readRecord handles FieldsPerRecord. -// If dst has enough capacity it will be used for the returned record. -func (r *Reader) readRecord(dst []string) (record []string, err error) { - for { - record, err = r.parseRecord(dst) - if record != nil { - break - } - if err != nil { - return nil, err +// readLine reads the next line (with the trailing endline). +// If EOF is hit without a trailing endline, it will be omitted. +// If some bytes were read, then the error is never io.EOF. +// The result is only valid until the next call to readLine. +func (r *Reader) readLine() ([]byte, error) { + line, err := r.r.ReadSlice('\n') + if err == bufio.ErrBufferFull { + r.rawBuffer = append(r.rawBuffer[:0], line...) + for err == bufio.ErrBufferFull { + line, err = r.r.ReadSlice('\n') + r.rawBuffer = append(r.rawBuffer, line...) } + line = r.rawBuffer } - - if r.FieldsPerRecord > 0 { - if len(record) != r.FieldsPerRecord { - r.column = 0 // report at start of record - return record, r.error(ErrFieldCount) + if len(line) > 0 && err == io.EOF { + err = nil + // For backwards compatibility, drop trailing \r before EOF. + if line[len(line)-1] == '\r' { + line = line[:len(line)-1] } - } else if r.FieldsPerRecord == 0 { - r.FieldsPerRecord = len(record) } - return record, nil -} - -// readRune reads one rune from r, folding \r\n to \n and keeping track -// of how far into the line we have read. r.column will point to the start -// of this rune, not the end of this rune. -func (r *Reader) readRune() (rune, error) { - r1, _, err := r.r.ReadRune() - - // Handle \r\n here. We make the simplifying assumption that - // anytime \r is followed by \n that it can be folded to \n. - // We will not detect files which contain both \r\n and bare \n. - if r1 == '\r' { - r1, _, err = r.r.ReadRune() - if err == nil { - if r1 != '\n' { - r.r.UnreadRune() - r1 = '\r' - } - } + r.numLine++ + // Normalize \r\n to \n on all input lines. + if n := len(line); n >= 2 && line[n-2] == '\r' && line[n-1] == '\n' { + line[n-2] = '\n' + line = line[:n-1] } - r.column++ - return r1, err + return line, err } -// skip reads runes up to and including the rune delim or until error. -func (r *Reader) skip(delim rune) error { - for { - r1, err := r.readRune() - if err != nil { - return err - } - if r1 == delim { - return nil - } +// lengthNL reports the number of bytes for the trailing \n. +func lengthNL(b []byte) int { + if len(b) > 0 && b[len(b)-1] == '\n' { + return 1 } + return 0 } -// parseRecord reads and parses a single csv record from r. -// If dst has enough capacity it will be used for the returned fields. -func (r *Reader) parseRecord(dst []string) (fields []string, err error) { - // Each record starts on a new line. We increment our line - // number (lines start at 1, not 0) and set column to -1 - // so as we increment in readRune it points to the character we read. - r.line++ - r.column = -1 - - // Peek at the first rune. If it is an error we are done. - // If we support comments and it is the comment character - // then skip to the end of line. - - r1, _, err := r.r.ReadRune() - if err != nil { - return nil, err - } +// nextRune returns the next rune in b or utf8.RuneError. +func nextRune(b []byte) rune { + r, _ := utf8.DecodeRune(b) + return r +} - if r.Comment != 0 && r1 == r.Comment { - return nil, r.skip('\n') +func (r *Reader) readRecord(dst []string) ([]string, error) { + if r.Comma == r.Comment || !validDelim(r.Comma) || (r.Comment != 0 && !validDelim(r.Comment)) { + return nil, errInvalidDelim } - r.r.UnreadRune() - - r.lineBuffer.Reset() - r.fieldIndexes = r.fieldIndexes[:0] - - // At this point we have at least one field. - for { - idx := r.lineBuffer.Len() - - haveField, delim, err := r.parseField() - if haveField { - r.fieldIndexes = append(r.fieldIndexes, idx) - } - if delim == '\n' || err == io.EOF { - if len(r.fieldIndexes) == 0 { - return nil, err - } - break + // Read line (automatically skipping past empty lines and any comments). + var line, fullLine []byte + var errRead error + for errRead == nil { + line, errRead = r.readLine() + if r.Comment != 0 && nextRune(line) == r.Comment { + line = nil + continue // Skip comment lines } - - if err != nil { - return nil, err + if errRead == nil && len(line) == lengthNL(line) { + line = nil + continue // Skip empty lines } + fullLine = line + break } - - fieldCount := len(r.fieldIndexes) - // Using this approach (creating a single string and taking slices of it) - // means that a single reference to any of the fields will retain the whole - // string. The risk of a nontrivial space leak caused by this is considered - // minimal and a tradeoff for better performance through the combined - // allocations. - line := r.lineBuffer.String() - - if cap(dst) >= fieldCount { - fields = dst[:fieldCount] - } else { - fields = make([]string, fieldCount) + if errRead == io.EOF { + return nil, errRead } - for i, idx := range r.fieldIndexes { - if i == fieldCount-1 { - fields[i] = line[idx:] - } else { - fields[i] = line[idx:r.fieldIndexes[i+1]] - } - } - - return fields, nil -} - -// parseField parses the next field in the record. The read field is -// appended to r.lineBuffer. Delim is the first character not part of the field -// (r.Comma or '\n'). -func (r *Reader) parseField() (haveField bool, delim rune, err error) { - r1, err := r.readRune() - for err == nil && r.TrimLeadingSpace && r1 != '\n' && unicode.IsSpace(r1) { - r1, err = r.readRune() - } - - if err == io.EOF && r.column != 0 { - return true, 0, err - } - if err != nil { - return false, 0, err - } - - switch r1 { - case r.Comma: - // will check below - - case '\n': - // We are a trailing empty field or a blank line - if r.column == 0 { - return false, r1, nil + // Parse each field in the record. + var err error + const quoteLen = len(`"`) + commaLen := utf8.RuneLen(r.Comma) + recLine := r.numLine // Starting line for record + r.recordBuffer = r.recordBuffer[:0] + r.fieldIndexes = r.fieldIndexes[:0] +parseField: + for { + if r.TrimLeadingSpace { + line = bytes.TrimLeftFunc(line, unicode.IsSpace) } - return true, r1, nil - - case '"': - // quoted field - Quoted: - for { - r1, err = r.readRune() - if err != nil { - if err == io.EOF { - if r.LazyQuotes { - return true, 0, err - } - return false, 0, r.error(ErrQuote) - } - return false, 0, err + if len(line) == 0 || line[0] != '"' { + // Non-quoted string field + i := bytes.IndexRune(line, r.Comma) + field := line + if i >= 0 { + field = field[:i] + } else { + field = field[:len(field)-lengthNL(field)] } - switch r1 { - case '"': - r1, err = r.readRune() - if err != nil || r1 == r.Comma { - break Quoted + // Check to make sure a quote does not appear in field. + if !r.LazyQuotes { + if j := bytes.IndexByte(field, '"'); j >= 0 { + col := utf8.RuneCount(fullLine[:len(fullLine)-len(line[j:])]) + err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrBareQuote} + break parseField } - if r1 == '\n' { - return true, r1, nil - } - if r1 != '"' { - if !r.LazyQuotes { - r.column-- - return false, 0, r.error(ErrQuote) + } + r.recordBuffer = append(r.recordBuffer, field...) + r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer)) + if i >= 0 { + line = line[i+commaLen:] + continue parseField + } + break parseField + } else { + // Quoted string field + line = line[quoteLen:] + for { + i := bytes.IndexByte(line, '"') + if i >= 0 { + // Hit next quote. + r.recordBuffer = append(r.recordBuffer, line[:i]...) + line = line[i+quoteLen:] + switch rn := nextRune(line); { + case rn == '"': + // `""` sequence (append quote). + r.recordBuffer = append(r.recordBuffer, '"') + line = line[quoteLen:] + case rn == r.Comma: + // `",` sequence (end of field). + line = line[commaLen:] + r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer)) + continue parseField + case lengthNL(line) == len(line): + // `"\n` sequence (end of line). + r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer)) + break parseField + case r.LazyQuotes: + // `"` sequence (bare quote). + r.recordBuffer = append(r.recordBuffer, '"') + default: + // `"*` sequence (invalid non-escaped quote). + col := utf8.RuneCount(fullLine[:len(fullLine)-len(line)-quoteLen]) + err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrQuote} + break parseField + } + } else if len(line) > 0 { + // Hit end of line (copy all data so far). + r.recordBuffer = append(r.recordBuffer, line...) + if errRead != nil { + break parseField + } + line, errRead = r.readLine() + if errRead == io.EOF { + errRead = nil } - // accept the bare quote - r.lineBuffer.WriteRune('"') + fullLine = line + } else { + // Abrupt end of file (EOF or error). + if !r.LazyQuotes && errRead == nil { + col := utf8.RuneCount(fullLine) + err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrQuote} + break parseField + } + r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer)) + break parseField } - case '\n': - r.line++ - r.column = -1 } - r.lineBuffer.WriteRune(r1) } + } + if err == nil { + err = errRead + } - default: - // unquoted field - for { - r.lineBuffer.WriteRune(r1) - r1, err = r.readRune() - if err != nil || r1 == r.Comma { - break - } - if r1 == '\n' { - return true, r1, nil - } - if !r.LazyQuotes && r1 == '"' { - return false, 0, r.error(ErrBareQuote) - } - } + // Create a single string and create slices out of it. + // This pins the memory of the fields together, but allocates once. + str := string(r.recordBuffer) // Convert to string once to batch allocations + dst = dst[:0] + if cap(dst) < len(r.fieldIndexes) { + dst = make([]string, len(r.fieldIndexes)) + } + dst = dst[:len(r.fieldIndexes)] + var preIdx int + for i, idx := range r.fieldIndexes { + dst[i] = str[preIdx:idx] + preIdx = idx } - if err != nil { - if err == io.EOF { - return true, 0, err + // Check or update the expected fields per record. + if r.FieldsPerRecord > 0 { + if len(dst) != r.FieldsPerRecord && err == nil { + err = &ParseError{StartLine: recLine, Line: recLine, Err: ErrFieldCount} } - return false, 0, err + } else if r.FieldsPerRecord == 0 { + r.FieldsPerRecord = len(dst) } - - return true, r1, nil + return dst, err } diff --git a/libgo/go/encoding/csv/reader_test.go b/libgo/go/encoding/csv/reader_test.go index 5ab1b61..1fc69f9 100644 --- a/libgo/go/encoding/csv/reader_test.go +++ b/libgo/go/encoding/csv/reader_test.go @@ -9,45 +9,38 @@ import ( "reflect" "strings" "testing" + "unicode/utf8" ) -var readTests = []struct { - Name string - Input string - Output [][]string - UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1 - - // These fields are copied into the Reader - Comma rune - Comment rune - FieldsPerRecord int - LazyQuotes bool - TrailingComma bool - TrimLeadingSpace bool - ReuseRecord bool +func TestRead(t *testing.T) { + tests := []struct { + Name string + Input string + Output [][]string + Error error - Error string - Line int // Expected error line if != 0 - Column int // Expected error column if line != 0 -}{ - { + // These fields are copied into the Reader + Comma rune + Comment rune + UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1 + FieldsPerRecord int + LazyQuotes bool + TrimLeadingSpace bool + ReuseRecord bool + }{{ Name: "Simple", Input: "a,b,c\n", Output: [][]string{{"a", "b", "c"}}, - }, - { + }, { Name: "CRLF", Input: "a,b\r\nc,d\r\n", Output: [][]string{{"a", "b"}, {"c", "d"}}, - }, - { + }, { Name: "BareCR", Input: "a,b\rc,d\r\n", Output: [][]string{{"a", "b\rc", "d"}}, - }, - { - Name: "RFC4180test", - UseFieldsPerRecord: true, + }, { + Name: "RFC4180test", Input: `#field1,field2,field3 "aaa","bb b","ccc" @@ -60,163 +53,139 @@ zzz,yyy,xxx {"a,a", `b"bb`, "ccc"}, {"zzz", "yyy", "xxx"}, }, - }, - { + UseFieldsPerRecord: true, + FieldsPerRecord: 0, + }, { Name: "NoEOLTest", Input: "a,b,c", Output: [][]string{{"a", "b", "c"}}, - }, - { + }, { Name: "Semicolon", - Comma: ';', Input: "a;b;c\n", Output: [][]string{{"a", "b", "c"}}, - }, - { + Comma: ';', + }, { Name: "MultiLine", Input: `"two line","one line","three line field"`, Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}}, - }, - { + }, { Name: "BlankLine", Input: "a,b,c\n\nd,e,f\n\n", Output: [][]string{ {"a", "b", "c"}, {"d", "e", "f"}, }, - }, - { - Name: "BlankLineFieldCount", - Input: "a,b,c\n\nd,e,f\n\n", - UseFieldsPerRecord: true, + }, { + Name: "BlankLineFieldCount", + Input: "a,b,c\n\nd,e,f\n\n", Output: [][]string{ {"a", "b", "c"}, {"d", "e", "f"}, }, - }, - { + UseFieldsPerRecord: true, + FieldsPerRecord: 0, + }, { Name: "TrimSpace", Input: " a, b, c\n", - TrimLeadingSpace: true, Output: [][]string{{"a", "b", "c"}}, - }, - { + TrimLeadingSpace: true, + }, { Name: "LeadingSpace", Input: " a, b, c\n", Output: [][]string{{" a", " b", " c"}}, - }, - { + }, { Name: "Comment", - Comment: '#', Input: "#1,2,3\na,b,c\n#comment", Output: [][]string{{"a", "b", "c"}}, - }, - { + Comment: '#', + }, { Name: "NoComment", Input: "#1,2,3\na,b,c", Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}}, - }, - { + }, { Name: "LazyQuotes", - LazyQuotes: true, Input: `a "word","1"2",a","b`, Output: [][]string{{`a "word"`, `1"2`, `a"`, `b`}}, - }, - { - Name: "BareQuotes", LazyQuotes: true, + }, { + Name: "BareQuotes", Input: `a "word","1"2",a"`, Output: [][]string{{`a "word"`, `1"2`, `a"`}}, - }, - { - Name: "BareDoubleQuotes", LazyQuotes: true, + }, { + Name: "BareDoubleQuotes", Input: `a""b,c`, Output: [][]string{{`a""b`, `c`}}, - }, - { + LazyQuotes: true, + }, { Name: "BadDoubleQuotes", Input: `a""b,c`, - Error: `bare " in non-quoted-field`, Line: 1, Column: 1, - }, - { + Error: &ParseError{StartLine: 1, Line: 1, Column: 1, Err: ErrBareQuote}, + }, { Name: "TrimQuote", Input: ` "a"," b",c`, - TrimLeadingSpace: true, Output: [][]string{{"a", " b", "c"}}, - }, - { + TrimLeadingSpace: true, + }, { Name: "BadBareQuote", Input: `a "word","b"`, - Error: `bare " in non-quoted-field`, Line: 1, Column: 2, - }, - { + Error: &ParseError{StartLine: 1, Line: 1, Column: 2, Err: ErrBareQuote}, + }, { Name: "BadTrailingQuote", Input: `"a word",b"`, - Error: `bare " in non-quoted-field`, Line: 1, Column: 10, - }, - { + Error: &ParseError{StartLine: 1, Line: 1, Column: 10, Err: ErrBareQuote}, + }, { Name: "ExtraneousQuote", Input: `"a "word","b"`, - Error: `extraneous " in field`, Line: 1, Column: 3, - }, - { + Error: &ParseError{StartLine: 1, Line: 1, Column: 3, Err: ErrQuote}, + }, { Name: "BadFieldCount", - UseFieldsPerRecord: true, Input: "a,b,c\nd,e", - Error: "wrong number of fields", Line: 2, - }, - { + Error: &ParseError{StartLine: 2, Line: 2, Err: ErrFieldCount}, + UseFieldsPerRecord: true, + FieldsPerRecord: 0, + }, { Name: "BadFieldCount1", + Input: `a,b,c`, + Error: &ParseError{StartLine: 1, Line: 1, Err: ErrFieldCount}, UseFieldsPerRecord: true, FieldsPerRecord: 2, - Input: `a,b,c`, - Error: "wrong number of fields", Line: 1, - }, - { + }, { Name: "FieldCount", Input: "a,b,c\nd,e", Output: [][]string{{"a", "b", "c"}, {"d", "e"}}, - }, - { + }, { Name: "TrailingCommaEOF", Input: "a,b,c,", Output: [][]string{{"a", "b", "c", ""}}, - }, - { + }, { Name: "TrailingCommaEOL", Input: "a,b,c,\n", Output: [][]string{{"a", "b", "c", ""}}, - }, - { + }, { Name: "TrailingCommaSpaceEOF", - TrimLeadingSpace: true, Input: "a,b,c, ", Output: [][]string{{"a", "b", "c", ""}}, - }, - { - Name: "TrailingCommaSpaceEOL", TrimLeadingSpace: true, + }, { + Name: "TrailingCommaSpaceEOL", Input: "a,b,c, \n", Output: [][]string{{"a", "b", "c", ""}}, - }, - { - Name: "TrailingCommaLine3", TrimLeadingSpace: true, + }, { + Name: "TrailingCommaLine3", Input: "a,b,c\nd,e,f\ng,hi,", Output: [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}}, - }, - { + TrimLeadingSpace: true, + }, { Name: "NotTrailingComma3", Input: "a,b,c, \n", Output: [][]string{{"a", "b", "c", " "}}, - }, - { - Name: "CommaFieldTest", - TrailingComma: true, + }, { + Name: "CommaFieldTest", Input: `x,y,z,w x,y,z, x,y,, @@ -240,67 +209,201 @@ x,,, {"x", "", "", ""}, {"", "", "", ""}, }, - }, - { - Name: "TrailingCommaIneffective1", - TrailingComma: true, - TrimLeadingSpace: true, - Input: "a,b,\nc,d,e", + }, { + Name: "TrailingCommaIneffective1", + Input: "a,b,\nc,d,e", Output: [][]string{ {"a", "b", ""}, {"c", "d", "e"}, }, - }, - { - Name: "TrailingCommaIneffective2", - TrailingComma: false, TrimLeadingSpace: true, - Input: "a,b,\nc,d,e", + }, { + Name: "ReadAllReuseRecord", + Input: "a,b\nc,d", Output: [][]string{ - {"a", "b", ""}, - {"c", "d", "e"}, + {"a", "b"}, + {"c", "d"}, }, - }, - { - Name: "ReadAllReuseRecord", ReuseRecord: true, - Input: "a,b\nc,d", + }, { + Name: "StartLine1", // Issue 19019 + Input: "a,\"b\nc\"d,e", + Error: &ParseError{StartLine: 1, Line: 2, Column: 1, Err: ErrQuote}, + }, { + Name: "StartLine2", + Input: "a,b\n\"d\n\n,e", + Error: &ParseError{StartLine: 2, Line: 5, Column: 0, Err: ErrQuote}, + }, { + Name: "CRLFInQuotedField", // Issue 21201 + Input: "A,\"Hello\r\nHi\",B\r\n", Output: [][]string{ - {"a", "b"}, - {"c", "d"}, + {"A", "Hello\nHi", "B"}, }, - }, -} + }, { + Name: "BinaryBlobField", // Issue 19410 + Input: "x09\x41\xb4\x1c,aktau", + Output: [][]string{{"x09A\xb4\x1c", "aktau"}}, + }, { + Name: "TrailingCR", + Input: "field1,field2\r", + Output: [][]string{{"field1", "field2"}}, + }, { + Name: "QuotedTrailingCR", + Input: "\"field\"\r", + Output: [][]string{{"field"}}, + }, { + Name: "QuotedTrailingCRCR", + Input: "\"field\"\r\r", + Error: &ParseError{StartLine: 1, Line: 1, Column: 6, Err: ErrQuote}, + }, { + Name: "FieldCR", + Input: "field\rfield\r", + Output: [][]string{{"field\rfield"}}, + }, { + Name: "FieldCRCR", + Input: "field\r\rfield\r\r", + Output: [][]string{{"field\r\rfield\r"}}, + }, { + Name: "FieldCRCRLF", + Input: "field\r\r\nfield\r\r\n", + Output: [][]string{{"field\r"}, {"field\r"}}, + }, { + Name: "FieldCRCRLFCR", + Input: "field\r\r\n\rfield\r\r\n\r", + Output: [][]string{{"field\r"}, {"\rfield\r"}}, + }, { + Name: "FieldCRCRLFCRCR", + Input: "field\r\r\n\r\rfield\r\r\n\r\r", + Output: [][]string{{"field\r"}, {"\r\rfield\r"}, {"\r"}}, + }, { + Name: "MultiFieldCRCRLFCRCR", + Input: "field1,field2\r\r\n\r\rfield1,field2\r\r\n\r\r,", + Output: [][]string{ + {"field1", "field2\r"}, + {"\r\rfield1", "field2\r"}, + {"\r\r", ""}, + }, + }, { + Name: "NonASCIICommaAndComment", + Input: "a£b,c£ \td,e\n€ comment\n", + Output: [][]string{{"a", "b,c", "d,e"}}, + TrimLeadingSpace: true, + Comma: '£', + Comment: '€', + }, { + Name: "NonASCIICommaAndCommentWithQuotes", + Input: "a€\" b,\"€ c\nλ comment\n", + Output: [][]string{{"a", " b,", " c"}}, + Comma: '€', + Comment: 'λ', + }, { + // λ and θ start with the same byte. + // This tests that the parser doesn't confuse such characters. + Name: "NonASCIICommaConfusion", + Input: "\"abθcd\"λefθgh", + Output: [][]string{{"abθcd", "efθgh"}}, + Comma: 'λ', + Comment: '€', + }, { + Name: "NonASCIICommentConfusion", + Input: "λ\nλ\nθ\nλ\n", + Output: [][]string{{"λ"}, {"λ"}, {"λ"}}, + Comment: 'θ', + }, { + Name: "QuotedFieldMultipleLF", + Input: "\"\n\n\n\n\"", + Output: [][]string{{"\n\n\n\n"}}, + }, { + Name: "MultipleCRLF", + Input: "\r\n\r\n\r\n\r\n", + }, { + // The implementation may read each line in several chunks if it doesn't fit entirely + // in the read buffer, so we should test the code to handle that condition. + Name: "HugeLines", + Input: strings.Repeat("#ignore\n", 10000) + strings.Repeat("@", 5000) + "," + strings.Repeat("*", 5000), + Output: [][]string{{strings.Repeat("@", 5000), strings.Repeat("*", 5000)}}, + Comment: '#', + }, { + Name: "QuoteWithTrailingCRLF", + Input: "\"foo\"bar\"\r\n", + Error: &ParseError{StartLine: 1, Line: 1, Column: 4, Err: ErrQuote}, + }, { + Name: "LazyQuoteWithTrailingCRLF", + Input: "\"foo\"bar\"\r\n", + Output: [][]string{{`foo"bar`}}, + LazyQuotes: true, + }, { + Name: "DoubleQuoteWithTrailingCRLF", + Input: "\"foo\"\"bar\"\r\n", + Output: [][]string{{`foo"bar`}}, + }, { + Name: "EvenQuotes", + Input: `""""""""`, + Output: [][]string{{`"""`}}, + }, { + Name: "OddQuotes", + Input: `"""""""`, + Error: &ParseError{StartLine: 1, Line: 1, Column: 7, Err: ErrQuote}, + }, { + Name: "LazyOddQuotes", + Input: `"""""""`, + Output: [][]string{{`"""`}}, + LazyQuotes: true, + }, { + Name: "BadComma1", + Comma: '\n', + Error: errInvalidDelim, + }, { + Name: "BadComma2", + Comma: '\r', + Error: errInvalidDelim, + }, { + Name: "BadComma3", + Comma: utf8.RuneError, + Error: errInvalidDelim, + }, { + Name: "BadComment1", + Comment: '\n', + Error: errInvalidDelim, + }, { + Name: "BadComment2", + Comment: '\r', + Error: errInvalidDelim, + }, { + Name: "BadComment3", + Comment: utf8.RuneError, + Error: errInvalidDelim, + }, { + Name: "BadCommaComment", + Comma: 'X', + Comment: 'X', + Error: errInvalidDelim, + }} -func TestRead(t *testing.T) { - for _, tt := range readTests { - r := NewReader(strings.NewReader(tt.Input)) - r.Comment = tt.Comment - if tt.UseFieldsPerRecord { - r.FieldsPerRecord = tt.FieldsPerRecord - } else { - r.FieldsPerRecord = -1 - } - r.LazyQuotes = tt.LazyQuotes - r.TrailingComma = tt.TrailingComma - r.TrimLeadingSpace = tt.TrimLeadingSpace - r.ReuseRecord = tt.ReuseRecord - if tt.Comma != 0 { - r.Comma = tt.Comma - } - out, err := r.ReadAll() - perr, _ := err.(*ParseError) - if tt.Error != "" { - if err == nil || !strings.Contains(err.Error(), tt.Error) { - t.Errorf("%s: error %v, want error %q", tt.Name, err, tt.Error) - } else if tt.Line != 0 && (tt.Line != perr.Line || tt.Column != perr.Column) { - t.Errorf("%s: error at %d:%d expected %d:%d", tt.Name, perr.Line, perr.Column, tt.Line, tt.Column) + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + r := NewReader(strings.NewReader(tt.Input)) + + if tt.Comma != 0 { + r.Comma = tt.Comma } - } else if err != nil { - t.Errorf("%s: unexpected error %v", tt.Name, err) - } else if !reflect.DeepEqual(out, tt.Output) { - t.Errorf("%s: out=%q want %q", tt.Name, out, tt.Output) - } + r.Comment = tt.Comment + if tt.UseFieldsPerRecord { + r.FieldsPerRecord = tt.FieldsPerRecord + } else { + r.FieldsPerRecord = -1 + } + r.LazyQuotes = tt.LazyQuotes + r.TrimLeadingSpace = tt.TrimLeadingSpace + r.ReuseRecord = tt.ReuseRecord + + out, err := r.ReadAll() + if !reflect.DeepEqual(err, tt.Error) { + t.Errorf("ReadAll() error:\ngot %v\nwant %v", err, tt.Error) + } else if !reflect.DeepEqual(out, tt.Output) { + t.Errorf("ReadAll() output:\ngot %q\nwant %q", out, tt.Output) + } + }) } } diff --git a/libgo/go/encoding/csv/writer.go b/libgo/go/encoding/csv/writer.go index 84b7aa1..ef3594e 100644 --- a/libgo/go/encoding/csv/writer.go +++ b/libgo/go/encoding/csv/writer.go @@ -20,7 +20,7 @@ import ( // // Comma is the field delimiter. // -// If UseCRLF is true, the Writer ends each record with \r\n instead of \n. +// If UseCRLF is true, the Writer ends each output line with \r\n instead of \n. type Writer struct { Comma rune // Field delimiter (set to ',' by NewWriter) UseCRLF bool // True to use \r\n as the line terminator @@ -38,6 +38,10 @@ func NewWriter(w io.Writer) *Writer { // Writer writes a single CSV record to w along with any necessary quoting. // A record is a slice of strings with each string being one field. func (w *Writer) Write(record []string) error { + if !validDelim(w.Comma) { + return errInvalidDelim + } + for n, field := range record { if n > 0 { if _, err := w.w.WriteRune(w.Comma); err != nil { diff --git a/libgo/go/encoding/gob/codec_test.go b/libgo/go/encoding/gob/codec_test.go index eb9f306..8f7b6f3 100644 --- a/libgo/go/encoding/gob/codec_test.go +++ b/libgo/go/encoding/gob/codec_test.go @@ -1321,6 +1321,7 @@ func TestUnexportedFields(t *testing.T) { var singletons = []interface{}{ true, 7, + uint(10), 3.2, "hello", [3]int{11, 22, 33}, diff --git a/libgo/go/encoding/gob/debug.go b/libgo/go/encoding/gob/debug.go index d69d36f..8f93742 100644 --- a/libgo/go/encoding/gob/debug.go +++ b/libgo/go/encoding/gob/debug.go @@ -594,7 +594,7 @@ func (deb *debugger) printBuiltin(indent tab, id typeId) { x := deb.int64() fmt.Fprintf(os.Stderr, "%s%d\n", indent, x) case tUint: - x := deb.int64() + x := deb.uint64() fmt.Fprintf(os.Stderr, "%s%d\n", indent, x) case tFloat: x := deb.uint64() diff --git a/libgo/go/encoding/gob/decoder.go b/libgo/go/encoding/gob/decoder.go index 8e0b1dd..5ef0388 100644 --- a/libgo/go/encoding/gob/decoder.go +++ b/libgo/go/encoding/gob/decoder.go @@ -55,7 +55,7 @@ func NewDecoder(r io.Reader) *Decoder { // recvType loads the definition of a type. func (dec *Decoder) recvType(id typeId) { - // Have we already seen this type? That's an error + // Have we already seen this type? That's an error if id < firstUserId || dec.wireType[id] != nil { dec.err = errors.New("gob: duplicate type received") return @@ -99,10 +99,8 @@ func (dec *Decoder) readMessage(nbytes int) { // Read the data dec.buf.Size(nbytes) _, dec.err = io.ReadFull(dec.r, dec.buf.Bytes()) - if dec.err != nil { - if dec.err == io.EOF { - dec.err = io.ErrUnexpectedEOF - } + if dec.err == io.EOF { + dec.err = io.ErrUnexpectedEOF } } diff --git a/libgo/go/encoding/gob/doc.go b/libgo/go/encoding/gob/doc.go index db734ec..fa53431 100644 --- a/libgo/go/encoding/gob/doc.go +++ b/libgo/go/encoding/gob/doc.go @@ -381,7 +381,7 @@ Now we can send the Point value. Again the field number resets to -1: 07 // this value is 7 bytes long ff 82 // the type number, 65 (1 byte (-FF) followed by 65<<1) 01 // add one to field number, yielding field 0 - 2c // encoding of signed "22" (0x22 = 44 = 22<<1); Point.x = 22 + 2c // encoding of signed "22" (0x2c = 44 = 22<<1); Point.x = 22 01 // add one to field number, yielding field 1 42 // encoding of signed "33" (0x42 = 66 = 33<<1); Point.y = 33 00 // end of structure diff --git a/libgo/go/encoding/hex/hex.go b/libgo/go/encoding/hex/hex.go index 2768f1b..e4df6cb 100644 --- a/libgo/go/encoding/hex/hex.go +++ b/libgo/go/encoding/hex/hex.go @@ -31,7 +31,9 @@ func Encode(dst, src []byte) int { return len(src) * 2 } -// ErrLength results from decoding an odd length slice. +// ErrLength reports an attempt to decode an odd-length input +// using Decode or DecodeString. +// The stream-based Decoder returns io.ErrUnexpectedEOF instead of ErrLength. var ErrLength = errors.New("encoding/hex: odd length hex string") // InvalidByteError values describe errors resulting from an invalid byte in a hex string. @@ -50,24 +52,30 @@ func DecodedLen(x int) int { return x / 2 } // // Decode expects that src contain only hexadecimal // characters and that src should have an even length. +// If the input is malformed, Decode returns the number +// of bytes decoded before the error. func Decode(dst, src []byte) (int, error) { - if len(src)%2 == 1 { - return 0, ErrLength - } - - for i := 0; i < len(src)/2; i++ { + var i int + for i = 0; i < len(src)/2; i++ { a, ok := fromHexChar(src[i*2]) if !ok { - return 0, InvalidByteError(src[i*2]) + return i, InvalidByteError(src[i*2]) } b, ok := fromHexChar(src[i*2+1]) if !ok { - return 0, InvalidByteError(src[i*2+1]) + return i, InvalidByteError(src[i*2+1]) } dst[i] = (a << 4) | b } - - return len(src) / 2, nil + if len(src)%2 == 1 { + // Check for invalid char before reporting bad length, + // since the invalid char (if present) is an earlier problem. + if _, ok := fromHexChar(src[i*2]); !ok { + return i, InvalidByteError(src[i*2]) + } + return i, ErrLength + } + return i, nil } // fromHexChar converts a hex character into its value and a success flag. @@ -92,14 +100,17 @@ func EncodeToString(src []byte) string { } // DecodeString returns the bytes represented by the hexadecimal string s. +// +// DecodeString expects that src contain only hexadecimal +// characters and that src should have an even length. +// If the input is malformed, DecodeString returns a string +// containing the bytes decoded before the error. func DecodeString(s string) ([]byte, error) { src := []byte(s) - dst := make([]byte, DecodedLen(len(src))) - _, err := Decode(dst, src) - if err != nil { - return nil, err - } - return dst, nil + // We can use the source slice itself as the destination + // because the decode loop increments by one and then the 'seen' byte is not used anymore. + n, err := Decode(src, src) + return src[:n], err } // Dump returns a string that contains a hex dump of the given data. The format @@ -112,6 +123,81 @@ func Dump(data []byte) string { return buf.String() } +// bufferSize is the number of hexadecimal characters to buffer in encoder and decoder. +const bufferSize = 1024 + +type encoder struct { + w io.Writer + err error + out [bufferSize]byte // output buffer +} + +// NewEncoder returns an io.Writer that writes lowercase hexadecimal characters to w. +func NewEncoder(w io.Writer) io.Writer { + return &encoder{w: w} +} + +func (e *encoder) Write(p []byte) (n int, err error) { + for len(p) > 0 && e.err == nil { + chunkSize := bufferSize / 2 + if len(p) < chunkSize { + chunkSize = len(p) + } + + var written int + encoded := Encode(e.out[:], p[:chunkSize]) + written, e.err = e.w.Write(e.out[:encoded]) + n += written / 2 + p = p[chunkSize:] + } + return n, e.err +} + +type decoder struct { + r io.Reader + err error + in []byte // input buffer (encoded form) + arr [bufferSize]byte // backing array for in +} + +// NewDecoder returns an io.Reader that decodes hexadecimal characters from r. +// NewDecoder expects that r contain only an even number of hexadecimal characters. +func NewDecoder(r io.Reader) io.Reader { + return &decoder{r: r} +} + +func (d *decoder) Read(p []byte) (n int, err error) { + // Fill internal buffer with sufficient bytes to decode + if len(d.in) < 2 && d.err == nil { + var numCopy, numRead int + numCopy = copy(d.arr[:], d.in) // Copies either 0 or 1 bytes + numRead, d.err = d.r.Read(d.arr[numCopy:]) + d.in = d.arr[:numCopy+numRead] + if d.err == io.EOF && len(d.in)%2 != 0 { + if _, ok := fromHexChar(d.in[len(d.in)-1]); !ok { + d.err = InvalidByteError(d.in[len(d.in)-1]) + } else { + d.err = io.ErrUnexpectedEOF + } + } + } + + // Decode internal buffer into output buffer + if numAvail := len(d.in) / 2; len(p) > numAvail { + p = p[:numAvail] + } + numDec, err := Decode(p, d.in[:len(p)*2]) + d.in = d.in[2*numDec:] + if err != nil { + d.in, d.err = nil, err // Decode error; discard input remainder + } + + if len(d.in) < 2 { + return numDec, d.err // Only expose errors when buffer fully consumed + } + return numDec, nil +} + // Dumper returns a WriteCloser that writes a hex dump of all written data to // w. The format of the dump matches the output of `hexdump -C` on the command // line. diff --git a/libgo/go/encoding/hex/hex_test.go b/libgo/go/encoding/hex/hex_test.go index 64dabbd..b6bab21 100644 --- a/libgo/go/encoding/hex/hex_test.go +++ b/libgo/go/encoding/hex/hex_test.go @@ -7,6 +7,9 @@ package hex import ( "bytes" "fmt" + "io" + "io/ioutil" + "strings" "testing" ) @@ -75,37 +78,86 @@ func TestDecodeString(t *testing.T) { } } -type errTest struct { +var errTests = []struct { in string - err string + out string + err error +}{ + {"", "", nil}, + {"0", "", ErrLength}, + {"zd4aa", "", InvalidByteError('z')}, + {"d4aaz", "\xd4\xaa", InvalidByteError('z')}, + {"30313", "01", ErrLength}, + {"0g", "", InvalidByteError('g')}, + {"00gg", "\x00", InvalidByteError('g')}, + {"0\x01", "", InvalidByteError('\x01')}, + {"ffeed", "\xff\xee", ErrLength}, } -var errTests = []errTest{ - {"0", "encoding/hex: odd length hex string"}, - {"0g", "encoding/hex: invalid byte: U+0067 'g'"}, - {"00gg", "encoding/hex: invalid byte: U+0067 'g'"}, - {"0\x01", "encoding/hex: invalid byte: U+0001"}, +func TestDecodeErr(t *testing.T) { + for _, tt := range errTests { + out := make([]byte, len(tt.in)+10) + n, err := Decode(out, []byte(tt.in)) + if string(out[:n]) != tt.out || err != tt.err { + t.Errorf("Decode(%q) = %q, %v, want %q, %v", tt.in, string(out[:n]), err, tt.out, tt.err) + } + } } -func TestInvalidErr(t *testing.T) { - for i, test := range errTests { - dst := make([]byte, DecodedLen(len(test.in))) - _, err := Decode(dst, []byte(test.in)) - if err == nil { - t.Errorf("#%d: expected error; got none", i) - } else if err.Error() != test.err { - t.Errorf("#%d: got: %v want: %v", i, err, test.err) +func TestDecodeStringErr(t *testing.T) { + for _, tt := range errTests { + out, err := DecodeString(tt.in) + if string(out) != tt.out || err != tt.err { + t.Errorf("DecodeString(%q) = %q, %v, want %q, %v", tt.in, out, err, tt.out, tt.err) + } + } +} + +func TestEncoderDecoder(t *testing.T) { + for _, multiplier := range []int{1, 128, 192} { + for _, test := range encDecTests { + input := bytes.Repeat(test.dec, multiplier) + output := strings.Repeat(test.enc, multiplier) + + var buf bytes.Buffer + enc := NewEncoder(&buf) + r := struct{ io.Reader }{bytes.NewReader(input)} // io.Reader only; not io.WriterTo + if n, err := io.CopyBuffer(enc, r, make([]byte, 7)); n != int64(len(input)) || err != nil { + t.Errorf("encoder.Write(%q*%d) = (%d, %v), want (%d, nil)", test.dec, multiplier, n, err, len(input)) + continue + } + + if encDst := buf.String(); encDst != output { + t.Errorf("buf(%q*%d) = %v, want %v", test.dec, multiplier, encDst, output) + continue + } + + dec := NewDecoder(&buf) + var decBuf bytes.Buffer + w := struct{ io.Writer }{&decBuf} // io.Writer only; not io.ReaderFrom + if _, err := io.CopyBuffer(w, dec, make([]byte, 7)); err != nil || decBuf.Len() != len(input) { + t.Errorf("decoder.Read(%q*%d) = (%d, %v), want (%d, nil)", test.enc, multiplier, decBuf.Len(), err, len(input)) + } + + if !bytes.Equal(decBuf.Bytes(), input) { + t.Errorf("decBuf(%q*%d) = %v, want %v", test.dec, multiplier, decBuf.Bytes(), input) + continue + } } } } -func TestInvalidStringErr(t *testing.T) { - for i, test := range errTests { - _, err := DecodeString(test.in) - if err == nil { - t.Errorf("#%d: expected error; got none", i) - } else if err.Error() != test.err { - t.Errorf("#%d: got: %v want: %v", i, err, test.err) +func TestDecoderErr(t *testing.T) { + for _, tt := range errTests { + dec := NewDecoder(strings.NewReader(tt.in)) + out, err := ioutil.ReadAll(dec) + wantErr := tt.err + // Decoder is reading from stream, so it reports io.ErrUnexpectedEOF instead of ErrLength. + if wantErr == ErrLength { + wantErr = io.ErrUnexpectedEOF + } + if string(out) != tt.out || err != wantErr { + t.Errorf("NewDecoder(%q) = %q, %v, want %q, %v", tt.in, out, err, tt.out, wantErr) } } } diff --git a/libgo/go/encoding/json/bench_test.go b/libgo/go/encoding/json/bench_test.go index 85d7ae0..42439eb7 100644 --- a/libgo/go/encoding/json/bench_test.go +++ b/libgo/go/encoding/json/bench_test.go @@ -133,6 +133,21 @@ func BenchmarkCodeDecoder(b *testing.B) { b.SetBytes(int64(len(codeJSON))) } +func BenchmarkUnicodeDecoder(b *testing.B) { + j := []byte(`"\uD83D\uDE01"`) + b.SetBytes(int64(len(j))) + r := bytes.NewReader(j) + dec := NewDecoder(r) + var out string + b.ResetTimer() + for i := 0; i < b.N; i++ { + if err := dec.Decode(&out); err != nil { + b.Fatal("Decode:", err) + } + r.Seek(0, 0) + } +} + func BenchmarkDecoderStream(b *testing.B) { b.StopTimer() var buf bytes.Buffer diff --git a/libgo/go/encoding/json/decode.go b/libgo/go/encoding/json/decode.go index 420a07e..536f25d 100644 --- a/libgo/go/encoding/json/decode.go +++ b/libgo/go/encoding/json/decode.go @@ -44,8 +44,9 @@ import ( // // To unmarshal JSON into a struct, Unmarshal matches incoming object // keys to the keys used by Marshal (either the struct field name or its tag), -// preferring an exact match but also accepting a case-insensitive match. -// Unmarshal will only set exported fields of the struct. +// preferring an exact match but also accepting a case-insensitive match. By +// default, object keys which don't have a corresponding struct field are +// ignored (see Decoder.DisallowUnknownFields for an alternative). // // To unmarshal JSON into an interface value, // Unmarshal stores one of these in the interface value: @@ -138,7 +139,8 @@ func (e *UnmarshalTypeError) Error() string { // An UnmarshalFieldError describes a JSON object key that // led to an unexported (and therefore unwritable) struct field. -// (No longer used; kept for compatibility.) +// +// Deprecated: No longer used; kept for compatibility. type UnmarshalFieldError struct { Key string Type reflect.Type @@ -274,8 +276,9 @@ type decodeState struct { Struct string Field string } - savedError error - useNumber bool + savedError error + useNumber bool + disallowUnknownFields bool } // errPhase is used for errors that should not happen unless @@ -508,7 +511,7 @@ func (d *decodeState) array(v reflect.Value) { switch v.Kind() { case reflect.Interface: if v.NumMethod() == 0 { - // Decoding into nil interface? Switch to non-reflect code. + // Decoding into nil interface? Switch to non-reflect code. v.Set(reflect.ValueOf(d.arrayInterface())) return } @@ -612,7 +615,7 @@ func (d *decodeState) object(v reflect.Value) { } v = pv - // Decoding into nil interface? Switch to non-reflect code. + // Decoding into nil interface? Switch to non-reflect code. if v.Kind() == reflect.Interface && v.NumMethod() == 0 { v.Set(reflect.ValueOf(d.objectInterface())) return @@ -704,6 +707,19 @@ func (d *decodeState) object(v reflect.Value) { for _, i := range f.index { if subv.Kind() == reflect.Ptr { if subv.IsNil() { + // If a struct embeds a pointer to an unexported type, + // it is not possible to set a newly allocated value + // since the field is unexported. + // + // See https://golang.org/issue/21357 + if !subv.CanSet() { + d.saveError(fmt.Errorf("json: cannot set embedded pointer to unexported struct: %v", subv.Type().Elem())) + // Invalidate subv to ensure d.value(subv) skips over + // the JSON value without assigning it to subv. + subv = reflect.Value{} + destring = false + break + } subv.Set(reflect.New(subv.Type().Elem())) } subv = subv.Elem() @@ -712,6 +728,8 @@ func (d *decodeState) object(v reflect.Value) { } d.errorContext.Field = f.name d.errorContext.Struct = v.Type().Name() + } else if d.disallowUnknownFields { + d.saveError(fmt.Errorf("json: unknown field %q", key)) } } @@ -1143,11 +1161,21 @@ func getu4(s []byte) rune { if len(s) < 6 || s[0] != '\\' || s[1] != 'u' { return -1 } - r, err := strconv.ParseUint(string(s[2:6]), 16, 64) - if err != nil { - return -1 + var r rune + for _, c := range s[2:6] { + switch { + case '0' <= c && c <= '9': + c = c - '0' + case 'a' <= c && c <= 'f': + c = c - 'a' + 10 + case 'A' <= c && c <= 'F': + c = c - 'A' + 10 + default: + return -1 + } + r = r*16 + rune(c) } - return rune(r) + return r } // unquote converts a quoted JSON string literal s into an actual string t. @@ -1190,7 +1218,7 @@ func unquoteBytes(s []byte) (t []byte, ok bool) { b := make([]byte, len(s)+2*utf8.UTFMax) w := copy(b, s[0:r]) for r < len(s) { - // Out of room? Can only happen if s is full of + // Out of room? Can only happen if s is full of // malformed UTF-8 and we're replacing each // byte with RuneError. if w >= len(b)-2*utf8.UTFMax { diff --git a/libgo/go/encoding/json/decode_test.go b/libgo/go/encoding/json/decode_test.go index bd38ddd..34b7ec6 100644 --- a/libgo/go/encoding/json/decode_test.go +++ b/libgo/go/encoding/json/decode_test.go @@ -88,7 +88,7 @@ func (u unmarshalerText) MarshalText() ([]byte, error) { } func (u *unmarshalerText) UnmarshalText(b []byte) error { - pos := bytes.Index(b, []byte(":")) + pos := bytes.IndexByte(b, ':') if pos == -1 { return errors.New("missing separator") } @@ -372,12 +372,13 @@ func (b *intWithPtrMarshalText) UnmarshalText(data []byte) error { } type unmarshalTest struct { - in string - ptr interface{} - out interface{} - err error - useNumber bool - golden bool + in string + ptr interface{} + out interface{} + err error + useNumber bool + golden bool + disallowUnknownFields bool } type B struct { @@ -401,6 +402,7 @@ var unmarshalTests = []unmarshalTest{ {in: "null", ptr: new(interface{}), out: nil}, {in: `{"X": [1,2,3], "Y": 4}`, ptr: new(T), out: T{Y: 4}, err: &UnmarshalTypeError{"array", reflect.TypeOf(""), 7, "T", "X"}}, {in: `{"x": 1}`, ptr: new(tx), out: tx{}}, + {in: `{"x": 1}`, ptr: new(tx), err: fmt.Errorf("json: unknown field \"x\""), disallowUnknownFields: true}, {in: `{"F1":1,"F2":2,"F3":3}`, ptr: new(V), out: V{F1: float64(1), F2: int32(2), F3: Number("3")}}, {in: `{"F1":1,"F2":2,"F3":3}`, ptr: new(V), out: V{F1: Number("1"), F2: int32(2), F3: Number("3")}, useNumber: true}, {in: `{"k1":1,"k2":"s","k3":[1,2.0,3e-3],"k4":{"kk1":"s","kk2":2}}`, ptr: new(interface{}), out: ifaceNumAsFloat64}, @@ -415,10 +417,13 @@ var unmarshalTests = []unmarshalTest{ // Z has a "-" tag. {in: `{"Y": 1, "Z": 2}`, ptr: new(T), out: T{Y: 1}}, + {in: `{"Y": 1, "Z": 2}`, ptr: new(T), err: fmt.Errorf("json: unknown field \"Z\""), disallowUnknownFields: true}, {in: `{"alpha": "abc", "alphabet": "xyz"}`, ptr: new(U), out: U{Alphabet: "abc"}}, + {in: `{"alpha": "abc", "alphabet": "xyz"}`, ptr: new(U), err: fmt.Errorf("json: unknown field \"alphabet\""), disallowUnknownFields: true}, {in: `{"alpha": "abc"}`, ptr: new(U), out: U{Alphabet: "abc"}}, {in: `{"alphabet": "xyz"}`, ptr: new(U), out: U{}}, + {in: `{"alphabet": "xyz"}`, ptr: new(U), err: fmt.Errorf("json: unknown field \"alphabet\""), disallowUnknownFields: true}, // syntax errors {in: `{"X": "foo", "Y"}`, err: &SyntaxError{"invalid character '}' after object key", 17}}, @@ -611,9 +616,21 @@ var unmarshalTests = []unmarshalTest{ }, { in: `{"X": 1,"Y":2}`, + ptr: new(S5), + err: fmt.Errorf("json: unknown field \"X\""), + disallowUnknownFields: true, + }, + { + in: `{"X": 1,"Y":2}`, ptr: new(S10), out: S10{S13: S13{S8: S8{S9: S9{Y: 2}}}}, }, + { + in: `{"X": 1,"Y":2}`, + ptr: new(S10), + err: fmt.Errorf("json: unknown field \"X\""), + disallowUnknownFields: true, + }, // invalid UTF-8 is coerced to valid UTF-8. { @@ -793,6 +810,62 @@ var unmarshalTests = []unmarshalTest{ {in: `{"B": "False"}`, ptr: new(B), err: errors.New(`json: invalid use of ,string struct tag, trying to unmarshal "False" into bool`)}, {in: `{"B": "null"}`, ptr: new(B), out: B{false}}, {in: `{"B": "nul"}`, ptr: new(B), err: errors.New(`json: invalid use of ,string struct tag, trying to unmarshal "nul" into bool`)}, + + // additional tests for disallowUnknownFields + { + in: `{ + "Level0": 1, + "Level1b": 2, + "Level1c": 3, + "x": 4, + "Level1a": 5, + "LEVEL1B": 6, + "e": { + "Level1a": 8, + "Level1b": 9, + "Level1c": 10, + "Level1d": 11, + "x": 12 + }, + "Loop1": 13, + "Loop2": 14, + "X": 15, + "Y": 16, + "Z": 17, + "Q": 18, + "extra": true + }`, + ptr: new(Top), + err: fmt.Errorf("json: unknown field \"extra\""), + disallowUnknownFields: true, + }, + { + in: `{ + "Level0": 1, + "Level1b": 2, + "Level1c": 3, + "x": 4, + "Level1a": 5, + "LEVEL1B": 6, + "e": { + "Level1a": 8, + "Level1b": 9, + "Level1c": 10, + "Level1d": 11, + "x": 12, + "extra": null + }, + "Loop1": 13, + "Loop2": 14, + "X": 15, + "Y": 16, + "Z": 17, + "Q": 18 + }`, + ptr: new(Top), + err: fmt.Errorf("json: unknown field \"extra\""), + disallowUnknownFields: true, + }, } func TestMarshal(t *testing.T) { @@ -911,6 +984,9 @@ func TestUnmarshal(t *testing.T) { if tt.useNumber { dec.UseNumber() } + if tt.disallowUnknownFields { + dec.DisallowUnknownFields() + } if err := dec.Decode(v.Interface()); !reflect.DeepEqual(err, tt.err) { t.Errorf("#%d: %v, want %v", i, err, tt.err) continue @@ -1117,7 +1193,8 @@ type All struct { Foo string `json:"bar"` Foo2 string `json:"bar2,dummyopt"` - IntStr int64 `json:",string"` + IntStr int64 `json:",string"` + UintptrStr uintptr `json:",string"` PBool *bool PInt *int @@ -1171,24 +1248,25 @@ type Small struct { } var allValue = All{ - Bool: true, - Int: 2, - Int8: 3, - Int16: 4, - Int32: 5, - Int64: 6, - Uint: 7, - Uint8: 8, - Uint16: 9, - Uint32: 10, - Uint64: 11, - Uintptr: 12, - Float32: 14.1, - Float64: 15.1, - Foo: "foo", - Foo2: "foo2", - IntStr: 42, - String: "16", + Bool: true, + Int: 2, + Int8: 3, + Int16: 4, + Int32: 5, + Int64: 6, + Uint: 7, + Uint8: 8, + Uint16: 9, + Uint32: 10, + Uint64: 11, + Uintptr: 12, + Float32: 14.1, + Float64: 15.1, + Foo: "foo", + Foo2: "foo2", + IntStr: 42, + UintptrStr: 44, + String: "16", Map: map[string]Small{ "17": {Tag: "tag17"}, "18": {Tag: "tag18"}, @@ -1250,6 +1328,7 @@ var allValueIndent = `{ "bar": "foo", "bar2": "foo2", "IntStr": "42", + "UintptrStr": "44", "PBool": null, "PInt": null, "PInt8": null, @@ -1342,6 +1421,7 @@ var pallValueIndent = `{ "bar": "", "bar2": "", "IntStr": "0", + "UintptrStr": "0", "PBool": true, "PInt": 2, "PInt8": 3, @@ -2008,3 +2088,81 @@ func TestInvalidStringOption(t *testing.T) { t.Fatalf("Unmarshal: %v", err) } } + +// Test unmarshal behavior with regards to embedded pointers to unexported structs. +// If unallocated, this returns an error because unmarshal cannot set the field. +// Issue 21357. +func TestUnmarshalEmbeddedPointerUnexported(t *testing.T) { + type ( + embed1 struct{ Q int } + embed2 struct{ Q int } + embed3 struct { + Q int64 `json:",string"` + } + S1 struct { + *embed1 + R int + } + S2 struct { + *embed1 + Q int + } + S3 struct { + embed1 + R int + } + S4 struct { + *embed1 + embed2 + } + S5 struct { + *embed3 + R int + } + ) + + tests := []struct { + in string + ptr interface{} + out interface{} + err error + }{{ + // Error since we cannot set S1.embed1, but still able to set S1.R. + in: `{"R":2,"Q":1}`, + ptr: new(S1), + out: &S1{R: 2}, + err: fmt.Errorf("json: cannot set embedded pointer to unexported struct: json.embed1"), + }, { + // The top level Q field takes precedence. + in: `{"Q":1}`, + ptr: new(S2), + out: &S2{Q: 1}, + }, { + // No issue with non-pointer variant. + in: `{"R":2,"Q":1}`, + ptr: new(S3), + out: &S3{embed1: embed1{Q: 1}, R: 2}, + }, { + // No error since both embedded structs have field R, which annihilate each other. + // Thus, no attempt is made at setting S4.embed1. + in: `{"R":2}`, + ptr: new(S4), + out: new(S4), + }, { + // Error since we cannot set S5.embed1, but still able to set S5.R. + in: `{"R":2,"Q":1}`, + ptr: new(S5), + out: &S5{R: 2}, + err: fmt.Errorf("json: cannot set embedded pointer to unexported struct: json.embed3"), + }} + + for i, tt := range tests { + err := Unmarshal([]byte(tt.in), tt.ptr) + if !reflect.DeepEqual(err, tt.err) { + t.Errorf("#%d: %v, want %v", i, err, tt.err) + } + if !reflect.DeepEqual(tt.ptr, tt.out) { + t.Errorf("#%d: mismatch\ngot: %#+v\nwant: %#+v", i, tt.ptr, tt.out) + } + } +} diff --git a/libgo/go/encoding/json/encode.go b/libgo/go/encoding/json/encode.go index 0371f0a..1e45e44 100644 --- a/libgo/go/encoding/json/encode.go +++ b/libgo/go/encoding/json/encode.go @@ -3,7 +3,7 @@ // license that can be found in the LICENSE file. // Package json implements encoding and decoding of JSON as defined in -// RFC 4627. The mapping between JSON and Go values is described +// RFC 7159. The mapping between JSON and Go values is described // in the documentation for the Marshal and Unmarshal functions. // // See "JSON and Go" for an introduction to this package: @@ -166,6 +166,8 @@ func Marshal(v interface{}) ([]byte, error) { } // MarshalIndent is like Marshal but applies Indent to format the output. +// Each JSON element in the output will begin on a new line beginning with prefix +// followed by one or more copies of indent according to the indentation nesting. func MarshalIndent(v interface{}, prefix, indent string) ([]byte, error) { b, err := Marshal(v) if err != nil { @@ -243,8 +245,8 @@ func (e *UnsupportedValueError) Error() string { // attempting to encode a string value with invalid UTF-8 sequences. // As of Go 1.2, Marshal instead coerces the string to valid UTF-8 by // replacing invalid bytes with the Unicode replacement rune U+FFFD. -// This error is no longer generated but is kept for backwards compatibility -// with programs that might mention it. +// +// Deprecated: No longer used; kept for compatibility. type InvalidUTF8Error struct { S string // the whole string value that caused the error } @@ -871,8 +873,7 @@ func (w *reflectWithString) resolve() error { } // NOTE: keep in sync with stringBytes below. -func (e *encodeState) string(s string, escapeHTML bool) int { - len0 := e.Len() +func (e *encodeState) string(s string, escapeHTML bool) { e.WriteByte('"') start := 0 for i := 0; i < len(s); { @@ -944,12 +945,10 @@ func (e *encodeState) string(s string, escapeHTML bool) int { e.WriteString(s[start:]) } e.WriteByte('"') - return e.Len() - len0 } // NOTE: keep in sync with string above. -func (e *encodeState) stringBytes(s []byte, escapeHTML bool) int { - len0 := e.Len() +func (e *encodeState) stringBytes(s []byte, escapeHTML bool) { e.WriteByte('"') start := 0 for i := 0; i < len(s); { @@ -1021,7 +1020,6 @@ func (e *encodeState) stringBytes(s []byte, escapeHTML bool) int { e.Write(s[start:]) } e.WriteByte('"') - return e.Len() - len0 } // A field represents a single field found in a struct. @@ -1093,21 +1091,19 @@ func typeFields(t reflect.Type) []field { // Scan f.typ for fields to include. for i := 0; i < f.typ.NumField(); i++ { sf := f.typ.Field(i) + isUnexported := sf.PkgPath != "" if sf.Anonymous { t := sf.Type if t.Kind() == reflect.Ptr { t = t.Elem() } - // If embedded, StructField.PkgPath is not a reliable - // indicator of whether the field is exported. - // See https://golang.org/issue/21122 - if !isExported(t.Name()) && t.Kind() != reflect.Struct { + if isUnexported && t.Kind() != reflect.Struct { // Ignore embedded fields of unexported non-struct types. - // Do not ignore embedded fields of unexported struct types - // since they may have exported fields. continue } - } else if sf.PkgPath != "" { + // Do not ignore embedded fields of unexported struct types + // since they may have exported fields. + } else if isUnexported { // Ignore unexported non-embedded fields. continue } @@ -1135,7 +1131,7 @@ func typeFields(t reflect.Type) []field { switch ft.Kind() { case reflect.Bool, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, - reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, + reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, reflect.Float32, reflect.Float64, reflect.String: quoted = true @@ -1226,12 +1222,6 @@ func typeFields(t reflect.Type) []field { return fields } -// isExported reports whether the identifier is exported. -func isExported(id string) bool { - r, _ := utf8.DecodeRuneInString(id) - return unicode.IsUpper(r) -} - // dominantField looks through the fields, all of which are known to // have the same name, to find the single field that dominates the // others using Go's embedding rules, modified by the presence of diff --git a/libgo/go/encoding/json/encode_test.go b/libgo/go/encoding/json/encode_test.go index 3fda6a0..0f194e1 100644 --- a/libgo/go/encoding/json/encode_test.go +++ b/libgo/go/encoding/json/encode_test.go @@ -71,14 +71,16 @@ func TestOmitEmpty(t *testing.T) { } type StringTag struct { - BoolStr bool `json:",string"` - IntStr int64 `json:",string"` - StrStr string `json:",string"` + BoolStr bool `json:",string"` + IntStr int64 `json:",string"` + UintptrStr uintptr `json:",string"` + StrStr string `json:",string"` } var stringTagExpected = `{ "BoolStr": "true", "IntStr": "42", + "UintptrStr": "44", "StrStr": "\"xzbit\"" }` @@ -86,6 +88,7 @@ func TestStringTag(t *testing.T) { var s StringTag s.BoolStr = true s.IntStr = 42 + s.UintptrStr = 44 s.StrStr = "xzbit" got, err := MarshalIndent(&s, "", " ") if err != nil { @@ -943,7 +946,7 @@ func TestMarshalRawMessageValue(t *testing.T) { // // The tests below marked with Issue6458 used to generate "ImZvbyI=" instead "foo". // This behavior was intentionally changed in Go 1.8. - // See https://github.com/golang/go/issues/14493#issuecomment-255857318 + // See https://golang.org/issues/14493#issuecomment-255857318 {rawText, `"foo"`, true}, // Issue6458 {&rawText, `"foo"`, true}, {[]interface{}{rawText}, `["foo"]`, true}, // Issue6458 diff --git a/libgo/go/encoding/json/stream.go b/libgo/go/encoding/json/stream.go index 95e30ce..75a4270 100644 --- a/libgo/go/encoding/json/stream.go +++ b/libgo/go/encoding/json/stream.go @@ -12,12 +12,13 @@ import ( // A Decoder reads and decodes JSON values from an input stream. type Decoder struct { - r io.Reader - buf []byte - d decodeState - scanp int // start of unread data in buf - scan scanner - err error + r io.Reader + buf []byte + d decodeState + scanp int // start of unread data in buf + scanned int64 // amount of data already scanned + scan scanner + err error tokenState int tokenStack []int @@ -35,6 +36,11 @@ func NewDecoder(r io.Reader) *Decoder { // Number instead of as a float64. func (dec *Decoder) UseNumber() { dec.d.useNumber = true } +// DisallowUnknownFields causes the Decoder to return an error when the destination +// is a struct and the input contains object keys which do not match any +// non-ignored, exported fields in the destination. +func (dec *Decoder) DisallowUnknownFields() { dec.d.disallowUnknownFields = true } + // Decode reads the next JSON-encoded value from its // input and stores it in the value pointed to by v. // @@ -50,7 +56,7 @@ func (dec *Decoder) Decode(v interface{}) error { } if !dec.tokenValueAllowed() { - return &SyntaxError{msg: "not at beginning of value"} + return &SyntaxError{msg: "not at beginning of value", Offset: dec.offset()} } // Read whole value into buffer. @@ -135,6 +141,7 @@ func (dec *Decoder) refill() error { // Make room to read more into the buffer. // First slide down data already consumed. if dec.scanp > 0 { + dec.scanned += int64(dec.scanp) n := copy(dec.buf, dec.buf[dec.scanp:]) dec.buf = dec.buf[:n] dec.scanp = 0 @@ -301,7 +308,7 @@ func (dec *Decoder) tokenPrepareForDecode() error { return err } if c != ',' { - return &SyntaxError{"expected comma after array element", 0} + return &SyntaxError{"expected comma after array element", dec.offset()} } dec.scanp++ dec.tokenState = tokenArrayValue @@ -311,7 +318,7 @@ func (dec *Decoder) tokenPrepareForDecode() error { return err } if c != ':' { - return &SyntaxError{"expected colon after object key", 0} + return &SyntaxError{"expected colon after object key", dec.offset()} } dec.scanp++ dec.tokenState = tokenObjectValue @@ -428,7 +435,6 @@ func (dec *Decoder) Token() (Token, error) { err := dec.Decode(&x) dec.tokenState = old if err != nil { - clearOffset(err) return nil, err } dec.tokenState = tokenObjectColon @@ -442,7 +448,6 @@ func (dec *Decoder) Token() (Token, error) { } var x interface{} if err := dec.Decode(&x); err != nil { - clearOffset(err) return nil, err } return x, nil @@ -450,12 +455,6 @@ func (dec *Decoder) Token() (Token, error) { } } -func clearOffset(err error) { - if s, ok := err.(*SyntaxError); ok { - s.Offset = 0 - } -} - func (dec *Decoder) tokenError(c byte) (Token, error) { var context string switch dec.tokenState { @@ -472,7 +471,7 @@ func (dec *Decoder) tokenError(c byte) (Token, error) { case tokenObjectComma: context = " after object key:value pair" } - return nil, &SyntaxError{"invalid character " + quoteChar(c) + " " + context, 0} + return nil, &SyntaxError{"invalid character " + quoteChar(c) + " " + context, dec.offset()} } // More reports whether there is another element in the @@ -501,19 +500,6 @@ func (dec *Decoder) peek() (byte, error) { } } -/* -TODO - -// EncodeToken writes the given JSON token to the stream. -// It returns an error if the delimiters [ ] { } are not properly used. -// -// EncodeToken does not call Flush, because usually it is part of -// a larger operation such as Encode, and those will call Flush when finished. -// Callers that create an Encoder and then invoke EncodeToken directly, -// without using Encode, need to call Flush when finished to ensure that -// the JSON is written to the underlying writer. -func (e *Encoder) EncodeToken(t Token) error { - ... +func (dec *Decoder) offset() int64 { + return dec.scanned + int64(dec.scanp) } - -*/ diff --git a/libgo/go/encoding/json/stream_test.go b/libgo/go/encoding/json/stream_test.go index d0b3ffb..83c01d1 100644 --- a/libgo/go/encoding/json/stream_test.go +++ b/libgo/go/encoding/json/stream_test.go @@ -342,11 +342,18 @@ var tokenStreamCases []tokenStreamCase = []tokenStreamCase{ {json: ` [{"a": 1} {"a": 2}] `, expTokens: []interface{}{ Delim('['), decodeThis{map[string]interface{}{"a": float64(1)}}, - decodeThis{&SyntaxError{"expected comma after array element", 0}}, + decodeThis{&SyntaxError{"expected comma after array element", 11}}, }}, - {json: `{ "a" 1 }`, expTokens: []interface{}{ - Delim('{'), "a", - decodeThis{&SyntaxError{"expected colon after object key", 0}}, + {json: `{ "` + strings.Repeat("a", 513) + `" 1 }`, expTokens: []interface{}{ + Delim('{'), strings.Repeat("a", 513), + decodeThis{&SyntaxError{"expected colon after object key", 518}}, + }}, + {json: `{ "\a" }`, expTokens: []interface{}{ + Delim('{'), + &SyntaxError{"invalid character 'a' in string escape code", 3}, + }}, + {json: ` \a`, expTokens: []interface{}{ + &SyntaxError{"invalid character '\\\\' looking for beginning of value", 1}, }}, } @@ -367,15 +374,15 @@ func TestDecodeInStream(t *testing.T) { tk, err = dec.Token() } if experr, ok := etk.(error); ok { - if err == nil || err.Error() != experr.Error() { - t.Errorf("case %v: Expected error %v in %q, but was %v", ci, experr, tcase.json, err) + if err == nil || !reflect.DeepEqual(err, experr) { + t.Errorf("case %v: Expected error %#v in %q, but was %#v", ci, experr, tcase.json, err) } break } else if err == io.EOF { t.Errorf("case %v: Unexpected EOF in %q", ci, tcase.json) break } else if err != nil { - t.Errorf("case %v: Unexpected error '%v' in %q", ci, err, tcase.json) + t.Errorf("case %v: Unexpected error '%#v' in %q", ci, err, tcase.json) break } if !reflect.DeepEqual(tk, etk) { diff --git a/libgo/go/encoding/pem/example_test.go b/libgo/go/encoding/pem/example_test.go index ffd962b..22081b4 100644 --- a/libgo/go/encoding/pem/example_test.go +++ b/libgo/go/encoding/pem/example_test.go @@ -11,6 +11,7 @@ import ( "encoding/pem" "fmt" "log" + "os" ) func ExampleDecode() { @@ -44,3 +45,23 @@ and some more`) fmt.Printf("Got a %T, with remaining data: %q", pub, rest) // Output: Got a *rsa.PublicKey, with remaining data: "and some more" } + +func ExampleEncode() { + block := &pem.Block{ + Type: "MESSAGE", + Headers: map[string]string{ + "Animal": "Gopher", + }, + Bytes: []byte("test"), + } + + if err := pem.Encode(os.Stdout, block); err != nil { + log.Fatal(err) + } + // Output: + // -----BEGIN MESSAGE----- + // Animal: Gopher + // + // dGVzdA== + // -----END MESSAGE----- +} diff --git a/libgo/go/encoding/pem/pem.go b/libgo/go/encoding/pem/pem.go index 5e1ab90..35058c3 100644 --- a/libgo/go/encoding/pem/pem.go +++ b/libgo/go/encoding/pem/pem.go @@ -36,7 +36,7 @@ type Block struct { // bytes) is also returned and this will always be smaller than the original // argument. func getLine(data []byte) (line, rest []byte) { - i := bytes.Index(data, []byte{'\n'}) + i := bytes.IndexByte(data, '\n') var j int if i < 0 { i = len(data) @@ -106,7 +106,7 @@ func Decode(data []byte) (p *Block, rest []byte) { } line, next := getLine(rest) - i := bytes.Index(line, []byte{':'}) + i := bytes.IndexByte(line, ':') if i == -1 { break } @@ -252,7 +252,18 @@ func writeHeader(out io.Writer, k, v string) error { return err } +// Encode writes the PEM encoding of b to out. func Encode(out io.Writer, b *Block) error { + // Check for invalid block before writing any output. + for k := range b.Headers { + if strings.Contains(k, ":") { + return errors.New("pem: cannot encode a header key that contains a colon") + } + } + + // All errors below are relayed from underlying io.Writer, + // so it is now safe to write data. + if _, err := out.Write(pemStart[1:]); err != nil { return err } @@ -281,9 +292,6 @@ func Encode(out io.Writer, b *Block) error { // For consistency of output, write other headers sorted by key. sort.Strings(h) for _, k := range h { - if strings.Contains(k, ":") { - return errors.New("pem: cannot encode a header key that contains a colon") - } if err := writeHeader(out, k, b.Headers[k]); err != nil { return err } @@ -310,8 +318,15 @@ func Encode(out io.Writer, b *Block) error { return err } +// EncodeToMemory returns the PEM encoding of b. +// +// If b has invalid headers and cannot be encoded, +// EncodeToMemory returns nil. If it is important to +// report details about this error case, use Encode instead. func EncodeToMemory(b *Block) []byte { var buf bytes.Buffer - Encode(&buf, b) + if err := Encode(&buf, b); err != nil { + return nil + } return buf.Bytes() } diff --git a/libgo/go/encoding/pem/pem_test.go b/libgo/go/encoding/pem/pem_test.go index 1a1250a..6a17516 100644 --- a/libgo/go/encoding/pem/pem_test.go +++ b/libgo/go/encoding/pem/pem_test.go @@ -590,3 +590,17 @@ N4XPksobn/NO2IDvPM7N9ZCe+aeyDEkE8QmP6mPScLuGvzSrsgOxWTMWF7Dbdzj0 tJQLJRZ+ItT5Irl4owSEBNLahC1j3fhQavbj9WVAfKk= -----END RSA PRIVATE KEY----- ` + +func TestBadEncode(t *testing.T) { + b := &Block{Type: "BAD", Headers: map[string]string{"X:Y": "Z"}} + var buf bytes.Buffer + if err := Encode(&buf, b); err == nil { + t.Fatalf("Encode did not report invalid header") + } + if buf.Len() != 0 { + t.Fatalf("Encode wrote data before reporting invalid header") + } + if data := EncodeToMemory(b); data != nil { + t.Fatalf("EncodeToMemory returned non-nil data") + } +} diff --git a/libgo/go/encoding/xml/atom_test.go b/libgo/go/encoding/xml/atom_test.go index a712843..f394dab 100644 --- a/libgo/go/encoding/xml/atom_test.go +++ b/libgo/go/encoding/xml/atom_test.go @@ -12,20 +12,20 @@ var atomValue = &Feed{ Link: []Link{{Href: "http://example.org/"}}, Updated: ParseTime("2003-12-13T18:30:02Z"), Author: Person{Name: "John Doe"}, - Id: "urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6", + ID: "urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6", Entry: []Entry{ { Title: "Atom-Powered Robots Run Amok", Link: []Link{{Href: "http://example.org/2003/12/13/atom03"}}, - Id: "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a", + ID: "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a", Updated: ParseTime("2003-12-13T18:30:02Z"), Summary: NewText("Some text."), }, }, } -var atomXml = `` + +var atomXML = `` + `<feed xmlns="http://www.w3.org/2005/Atom" updated="2003-12-13T18:30:02Z">` + `<title>Example Feed</title>` + `<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>` + diff --git a/libgo/go/encoding/xml/marshal.go b/libgo/go/encoding/xml/marshal.go index 4c6ba8c..d393d06 100644 --- a/libgo/go/encoding/xml/marshal.go +++ b/libgo/go/encoding/xml/marshal.go @@ -16,7 +16,7 @@ import ( ) const ( - // A generic XML header suitable for use with the output of Marshal. + // Header is a generic XML header suitable for use with the output of Marshal. // This is not automatically added to any output of this package, // it is provided as a convenience. Header = `<?xml version="1.0" encoding="UTF-8"?>` + "\n" @@ -66,6 +66,9 @@ const ( // parent elements a and b. Fields that appear next to each other that name // the same parent will be enclosed in one XML element. // +// If the XML name for a struct field is defined by both the field tag and the +// struct's XMLName field, the names must match. +// // See MarshalIndent for an example. // // Marshal will return an error if asked to marshal a channel, function, or map. @@ -320,7 +323,7 @@ func (p *printer) createAttrPrefix(url string) string { // (The "http://www.w3.org/2000/xmlns/" name space is also predefined as "xmlns", // but users should not be trying to use that one directly - that's our job.) if url == xmlURL { - return "xml" + return xmlPrefix } // Need to define a new name space. @@ -1011,7 +1014,7 @@ func (s *parentStack) push(parents []string) error { return nil } -// A MarshalXMLError is returned when Marshal encounters a type +// UnsupportedTypeError is returned when Marshal encounters a type // that cannot be converted into XML. type UnsupportedTypeError struct { Type reflect.Type diff --git a/libgo/go/encoding/xml/marshal_test.go b/libgo/go/encoding/xml/marshal_test.go index 674c6b5..5c79a48 100644 --- a/libgo/go/encoding/xml/marshal_test.go +++ b/libgo/go/encoding/xml/marshal_test.go @@ -646,7 +646,7 @@ var marshalTests = []struct { {Value: &Universe{Visible: 9.3e13}, ExpectXML: `<universe>9.3e+13</universe>`}, {Value: &Particle{HasMass: true}, ExpectXML: `<particle>true</particle>`}, {Value: &Departure{When: ParseTime("2013-01-09T00:15:00-09:00")}, ExpectXML: `<departure>2013-01-09T00:15:00-09:00</departure>`}, - {Value: atomValue, ExpectXML: atomXml}, + {Value: atomValue, ExpectXML: atomXML}, { Value: &Ship{ Name: "Heart of Gold", @@ -1910,7 +1910,7 @@ func BenchmarkMarshal(b *testing.B) { func BenchmarkUnmarshal(b *testing.B) { b.ReportAllocs() - xml := []byte(atomXml) + xml := []byte(atomXML) b.RunParallel(func(pb *testing.PB) { for pb.Next() { Unmarshal(xml, &Feed{}) @@ -2441,3 +2441,22 @@ func TestIssue16158(t *testing.T) { t.Errorf("Unmarshal: expected error, got nil") } } + +// Issue 20953. Crash on invalid XMLName attribute. + +type InvalidXMLName struct { + XMLName Name `xml:"error"` + Type struct { + XMLName Name `xml:"type,attr"` + } +} + +func TestInvalidXMLName(t *testing.T) { + var buf bytes.Buffer + enc := NewEncoder(&buf) + if err := enc.Encode(InvalidXMLName{}); err == nil { + t.Error("unexpected success") + } else if want := "invalid tag"; !strings.Contains(err.Error(), want) { + t.Errorf("error %q does not contain %q", err, want) + } +} diff --git a/libgo/go/encoding/xml/read.go b/libgo/go/encoding/xml/read.go index 000d9fb..36c7ba6 100644 --- a/libgo/go/encoding/xml/read.go +++ b/libgo/go/encoding/xml/read.go @@ -107,12 +107,13 @@ import ( // to the newly created value. // // Unmarshal maps an XML element or attribute value to a bool by -// setting it to the boolean value represented by the string. +// setting it to the boolean value represented by the string. Whitespace +// is trimmed and ignored. // // Unmarshal maps an XML element or attribute value to an integer or // floating-point field by setting the field to the result of // interpreting the string value in decimal. There is no check for -// overflow. +// overflow. Whitespace is trimmed and ignored. // // Unmarshal maps an XML element to a Name by recording the element // name. @@ -160,7 +161,7 @@ func (e UnmarshalError) Error() string { return string(e) } // UnmarshalXML must consume exactly one XML element. // One common implementation strategy is to unmarshal into // a separate value with a layout matching the expected XML -// using d.DecodeElement, and then to copy the data from +// using d.DecodeElement, and then to copy the data from // that value into the receiver. // Another common strategy is to use d.Token to process the // XML object one token at a time. @@ -192,19 +193,19 @@ func receiverType(val interface{}) string { // unmarshalInterface unmarshals a single XML element into val. // start is the opening tag of the element. -func (p *Decoder) unmarshalInterface(val Unmarshaler, start *StartElement) error { +func (d *Decoder) unmarshalInterface(val Unmarshaler, start *StartElement) error { // Record that decoder must stop at end tag corresponding to start. - p.pushEOF() + d.pushEOF() - p.unmarshalDepth++ - err := val.UnmarshalXML(p, *start) - p.unmarshalDepth-- + d.unmarshalDepth++ + err := val.UnmarshalXML(d, *start) + d.unmarshalDepth-- if err != nil { - p.popEOF() + d.popEOF() return err } - if !p.popEOF() { + if !d.popEOF() { return fmt.Errorf("xml: %s.UnmarshalXML did not consume entire <%s> element", receiverType(val), start.Name.Local) } @@ -214,11 +215,11 @@ func (p *Decoder) unmarshalInterface(val Unmarshaler, start *StartElement) error // unmarshalTextInterface unmarshals a single XML element into val. // The chardata contained in the element (but not its children) // is passed to the text unmarshaler. -func (p *Decoder) unmarshalTextInterface(val encoding.TextUnmarshaler) error { +func (d *Decoder) unmarshalTextInterface(val encoding.TextUnmarshaler) error { var buf []byte depth := 1 for depth > 0 { - t, err := p.Token() + t, err := d.Token() if err != nil { return err } @@ -237,7 +238,7 @@ func (p *Decoder) unmarshalTextInterface(val encoding.TextUnmarshaler) error { } // unmarshalAttr unmarshals a single XML attribute into val. -func (p *Decoder) unmarshalAttr(val reflect.Value, attr Attr) error { +func (d *Decoder) unmarshalAttr(val reflect.Value, attr Attr) error { if val.Kind() == reflect.Ptr { if val.IsNil() { val.Set(reflect.New(val.Type().Elem())) @@ -276,7 +277,7 @@ func (p *Decoder) unmarshalAttr(val reflect.Value, attr Attr) error { val.Set(reflect.Append(val, reflect.Zero(val.Type().Elem()))) // Recur to read element into slice. - if err := p.unmarshalAttr(val.Index(n), attr); err != nil { + if err := d.unmarshalAttr(val.Index(n), attr); err != nil { val.SetLen(n) return err } @@ -299,11 +300,11 @@ var ( ) // Unmarshal a single XML element into val. -func (p *Decoder) unmarshal(val reflect.Value, start *StartElement) error { +func (d *Decoder) unmarshal(val reflect.Value, start *StartElement) error { // Find start element if we need it. if start == nil { for { - tok, err := p.Token() + tok, err := d.Token() if err != nil { return err } @@ -333,24 +334,24 @@ func (p *Decoder) unmarshal(val reflect.Value, start *StartElement) error { if val.CanInterface() && val.Type().Implements(unmarshalerType) { // This is an unmarshaler with a non-pointer receiver, // so it's likely to be incorrect, but we do what we're told. - return p.unmarshalInterface(val.Interface().(Unmarshaler), start) + return d.unmarshalInterface(val.Interface().(Unmarshaler), start) } if val.CanAddr() { pv := val.Addr() if pv.CanInterface() && pv.Type().Implements(unmarshalerType) { - return p.unmarshalInterface(pv.Interface().(Unmarshaler), start) + return d.unmarshalInterface(pv.Interface().(Unmarshaler), start) } } if val.CanInterface() && val.Type().Implements(textUnmarshalerType) { - return p.unmarshalTextInterface(val.Interface().(encoding.TextUnmarshaler)) + return d.unmarshalTextInterface(val.Interface().(encoding.TextUnmarshaler)) } if val.CanAddr() { pv := val.Addr() if pv.CanInterface() && pv.Type().Implements(textUnmarshalerType) { - return p.unmarshalTextInterface(pv.Interface().(encoding.TextUnmarshaler)) + return d.unmarshalTextInterface(pv.Interface().(encoding.TextUnmarshaler)) } } @@ -376,7 +377,7 @@ func (p *Decoder) unmarshal(val reflect.Value, start *StartElement) error { // TODO: For now, simply ignore the field. In the near // future we may choose to unmarshal the start // element on it, if not nil. - return p.Skip() + return d.Skip() case reflect.Slice: typ := v.Type() @@ -392,7 +393,7 @@ func (p *Decoder) unmarshal(val reflect.Value, start *StartElement) error { v.Set(reflect.Append(val, reflect.Zero(v.Type().Elem()))) // Recur to read element into slice. - if err := p.unmarshal(v.Index(n), start); err != nil { + if err := d.unmarshal(v.Index(n), start); err != nil { v.SetLen(n) return err } @@ -445,7 +446,7 @@ func (p *Decoder) unmarshal(val reflect.Value, start *StartElement) error { case fAttr: strv := finfo.value(sv) if a.Name.Local == finfo.name && (finfo.xmlns == "" || finfo.xmlns == a.Name.Space) { - if err := p.unmarshalAttr(strv, a); err != nil { + if err := d.unmarshalAttr(strv, a); err != nil { return err } handled = true @@ -460,7 +461,7 @@ func (p *Decoder) unmarshal(val reflect.Value, start *StartElement) error { if !handled && any >= 0 { finfo := &tinfo.fields[any] strv := finfo.value(sv) - if err := p.unmarshalAttr(strv, a); err != nil { + if err := d.unmarshalAttr(strv, a); err != nil { return err } } @@ -488,11 +489,11 @@ func (p *Decoder) unmarshal(val reflect.Value, start *StartElement) error { case fInnerXml: if !saveXML.IsValid() { saveXML = finfo.value(sv) - if p.saved == nil { + if d.saved == nil { saveXMLIndex = 0 - p.saved = new(bytes.Buffer) + d.saved = new(bytes.Buffer) } else { - saveXMLIndex = p.savedOffset() + saveXMLIndex = d.savedOffset() } } } @@ -505,9 +506,9 @@ Loop: for { var savedOffset int if saveXML.IsValid() { - savedOffset = p.savedOffset() + savedOffset = d.savedOffset() } - tok, err := p.Token() + tok, err := d.Token() if err != nil { return err } @@ -515,28 +516,28 @@ Loop: case StartElement: consumed := false if sv.IsValid() { - consumed, err = p.unmarshalPath(tinfo, sv, nil, &t) + consumed, err = d.unmarshalPath(tinfo, sv, nil, &t) if err != nil { return err } if !consumed && saveAny.IsValid() { consumed = true - if err := p.unmarshal(saveAny, &t); err != nil { + if err := d.unmarshal(saveAny, &t); err != nil { return err } } } if !consumed { - if err := p.Skip(); err != nil { + if err := d.Skip(); err != nil { return err } } case EndElement: if saveXML.IsValid() { - saveXMLData = p.saved.Bytes()[saveXMLIndex:savedOffset] + saveXMLData = d.saved.Bytes()[saveXMLIndex:savedOffset] if saveXMLIndex == 0 { - p.saved = nil + d.saved = nil } } break Loop @@ -614,7 +615,7 @@ func copyValue(dst reflect.Value, src []byte) (err error) { dst.SetInt(0) return nil } - itmp, err := strconv.ParseInt(string(src), 10, dst.Type().Bits()) + itmp, err := strconv.ParseInt(strings.TrimSpace(string(src)), 10, dst.Type().Bits()) if err != nil { return err } @@ -624,7 +625,7 @@ func copyValue(dst reflect.Value, src []byte) (err error) { dst.SetUint(0) return nil } - utmp, err := strconv.ParseUint(string(src), 10, dst.Type().Bits()) + utmp, err := strconv.ParseUint(strings.TrimSpace(string(src)), 10, dst.Type().Bits()) if err != nil { return err } @@ -634,7 +635,7 @@ func copyValue(dst reflect.Value, src []byte) (err error) { dst.SetFloat(0) return nil } - ftmp, err := strconv.ParseFloat(string(src), dst.Type().Bits()) + ftmp, err := strconv.ParseFloat(strings.TrimSpace(string(src)), dst.Type().Bits()) if err != nil { return err } @@ -666,7 +667,7 @@ func copyValue(dst reflect.Value, src []byte) (err error) { // The consumed result tells whether XML elements have been consumed // from the Decoder until start's matching end element, or if it's // still untouched because start is uninteresting for sv's fields. -func (p *Decoder) unmarshalPath(tinfo *typeInfo, sv reflect.Value, parents []string, start *StartElement) (consumed bool, err error) { +func (d *Decoder) unmarshalPath(tinfo *typeInfo, sv reflect.Value, parents []string, start *StartElement) (consumed bool, err error) { recurse := false Loop: for i := range tinfo.fields { @@ -681,7 +682,7 @@ Loop: } if len(finfo.parents) == len(parents) && finfo.name == start.Name.Local { // It's a perfect match, unmarshal the field. - return true, p.unmarshal(finfo.value(sv), start) + return true, d.unmarshal(finfo.value(sv), start) } if len(finfo.parents) > len(parents) && finfo.parents[len(parents)] == start.Name.Local { // It's a prefix for the field. Break and recurse @@ -704,18 +705,18 @@ Loop: // prefix. Recurse and attempt to match these. for { var tok Token - tok, err = p.Token() + tok, err = d.Token() if err != nil { return true, err } switch t := tok.(type) { case StartElement: - consumed2, err := p.unmarshalPath(tinfo, sv, parents, &t) + consumed2, err := d.unmarshalPath(tinfo, sv, parents, &t) if err != nil { return true, err } if !consumed2 { - if err := p.Skip(); err != nil { + if err := d.Skip(); err != nil { return true, err } } diff --git a/libgo/go/encoding/xml/read_test.go b/libgo/go/encoding/xml/read_test.go index a1eb516..8c2e70f 100644 --- a/libgo/go/encoding/xml/read_test.go +++ b/libgo/go/encoding/xml/read_test.go @@ -83,7 +83,7 @@ not being used from outside intra_region_diff.py. type Feed struct { XMLName Name `xml:"http://www.w3.org/2005/Atom feed"` Title string `xml:"title"` - Id string `xml:"id"` + ID string `xml:"id"` Link []Link `xml:"link"` Updated time.Time `xml:"updated,attr"` Author Person `xml:"author"` @@ -92,7 +92,7 @@ type Feed struct { type Entry struct { Title string `xml:"title"` - Id string `xml:"id"` + ID string `xml:"id"` Link []Link `xml:"link"` Updated time.Time `xml:"updated"` Author Person `xml:"author"` @@ -123,7 +123,7 @@ var atomFeed = Feed{ {Rel: "alternate", Href: "http://codereview.appspot.com/"}, {Rel: "self", Href: "http://codereview.appspot.com/rss/mine/rsc"}, }, - Id: "http://codereview.appspot.com/", + ID: "http://codereview.appspot.com/", Updated: ParseTime("2009-10-04T01:35:58+00:00"), Author: Person{ Name: "rietveld<>", @@ -140,7 +140,7 @@ var atomFeed = Feed{ Name: "email-address-removed", InnerXML: "<name>email-address-removed</name>", }, - Id: "urn:md5:134d9179c41f806be79b3a5f7877d19a", + ID: "urn:md5:134d9179c41f806be79b3a5f7877d19a", Summary: Text{ Type: "html", Body: ` @@ -187,7 +187,7 @@ the top of feeds.py marked NOTE(rsc). Name: "email-address-removed", InnerXML: "<name>email-address-removed</name>", }, - Id: "urn:md5:0a2a4f19bb815101f0ba2904aed7c35a", + ID: "urn:md5:0a2a4f19bb815101f0ba2904aed7c35a", Summary: Text{ Type: "html", Body: ` @@ -819,7 +819,7 @@ const ( ` ) -// github.com/golang/go/issues/13417 +// golang.org/issues/13417 func TestUnmarshalEmptyValues(t *testing.T) { // Test first with a zero-valued dst. v := new(Parent) @@ -908,3 +908,174 @@ func TestUnmarshalEmptyValues(t *testing.T) { t.Fatalf("populated: Unmarshal:\nhave: %#+v\nwant: %#+v", v, want) } } + +type WhitespaceValuesParent struct { + BFalse bool + BTrue bool + I int + INeg int + I8 int8 + I8Neg int8 + I16 int16 + I16Neg int16 + I32 int32 + I32Neg int32 + I64 int64 + I64Neg int64 + UI uint + UI8 uint8 + UI16 uint16 + UI32 uint32 + UI64 uint64 + F32 float32 + F32Neg float32 + F64 float64 + F64Neg float64 +} + +const whitespaceValuesXML = ` +<WhitespaceValuesParent> + <BFalse> false </BFalse> + <BTrue> true </BTrue> + <I> 266703 </I> + <INeg> -266703 </INeg> + <I8> 112 </I8> + <I8Neg> -112 </I8Neg> + <I16> 6703 </I16> + <I16Neg> -6703 </I16Neg> + <I32> 266703 </I32> + <I32Neg> -266703 </I32Neg> + <I64> 266703 </I64> + <I64Neg> -266703 </I64Neg> + <UI> 266703 </UI> + <UI8> 112 </UI8> + <UI16> 6703 </UI16> + <UI32> 266703 </UI32> + <UI64> 266703 </UI64> + <F32> 266.703 </F32> + <F32Neg> -266.703 </F32Neg> + <F64> 266.703 </F64> + <F64Neg> -266.703 </F64Neg> +</WhitespaceValuesParent> +` + +// golang.org/issues/22146 +func TestUnmarshalWhitespaceValues(t *testing.T) { + v := WhitespaceValuesParent{} + if err := Unmarshal([]byte(whitespaceValuesXML), &v); err != nil { + t.Fatalf("whitespace values: Unmarshal failed: got %v", err) + } + + want := WhitespaceValuesParent{ + BFalse: false, + BTrue: true, + I: 266703, + INeg: -266703, + I8: 112, + I8Neg: -112, + I16: 6703, + I16Neg: -6703, + I32: 266703, + I32Neg: -266703, + I64: 266703, + I64Neg: -266703, + UI: 266703, + UI8: 112, + UI16: 6703, + UI32: 266703, + UI64: 266703, + F32: 266.703, + F32Neg: -266.703, + F64: 266.703, + F64Neg: -266.703, + } + if v != want { + t.Fatalf("whitespace values: Unmarshal:\nhave: %#+v\nwant: %#+v", v, want) + } +} + +type WhitespaceAttrsParent struct { + BFalse bool `xml:",attr"` + BTrue bool `xml:",attr"` + I int `xml:",attr"` + INeg int `xml:",attr"` + I8 int8 `xml:",attr"` + I8Neg int8 `xml:",attr"` + I16 int16 `xml:",attr"` + I16Neg int16 `xml:",attr"` + I32 int32 `xml:",attr"` + I32Neg int32 `xml:",attr"` + I64 int64 `xml:",attr"` + I64Neg int64 `xml:",attr"` + UI uint `xml:",attr"` + UI8 uint8 `xml:",attr"` + UI16 uint16 `xml:",attr"` + UI32 uint32 `xml:",attr"` + UI64 uint64 `xml:",attr"` + F32 float32 `xml:",attr"` + F32Neg float32 `xml:",attr"` + F64 float64 `xml:",attr"` + F64Neg float64 `xml:",attr"` +} + +const whitespaceAttrsXML = ` +<WhitespaceAttrsParent + BFalse=" false " + BTrue=" true " + I=" 266703 " + INeg=" -266703 " + I8=" 112 " + I8Neg=" -112 " + I16=" 6703 " + I16Neg=" -6703 " + I32=" 266703 " + I32Neg=" -266703 " + I64=" 266703 " + I64Neg=" -266703 " + UI=" 266703 " + UI8=" 112 " + UI16=" 6703 " + UI32=" 266703 " + UI64=" 266703 " + F32=" 266.703 " + F32Neg=" -266.703 " + F64=" 266.703 " + F64Neg=" -266.703 " +> +</WhitespaceAttrsParent> +` + +// golang.org/issues/22146 +func TestUnmarshalWhitespaceAttrs(t *testing.T) { + v := WhitespaceAttrsParent{} + if err := Unmarshal([]byte(whitespaceAttrsXML), &v); err != nil { + t.Fatalf("whitespace attrs: Unmarshal failed: got %v", err) + } + + want := WhitespaceAttrsParent{ + BFalse: false, + BTrue: true, + I: 266703, + INeg: -266703, + I8: 112, + I8Neg: -112, + I16: 6703, + I16Neg: -6703, + I32: 266703, + I32Neg: -266703, + I64: 266703, + I64Neg: -266703, + UI: 266703, + UI8: 112, + UI16: 6703, + UI32: 266703, + UI64: 266703, + F32: 266.703, + F32Neg: -266.703, + F64: 266.703, + F64Neg: -266.703, + } + if v != want { + t.Fatalf("whitespace attrs: Unmarshal:\nhave: %#+v\nwant: %#+v", v, want) + } +} diff --git a/libgo/go/encoding/xml/typeinfo.go b/libgo/go/encoding/xml/typeinfo.go index 751caa9..48de3d7 100644 --- a/libgo/go/encoding/xml/typeinfo.go +++ b/libgo/go/encoding/xml/typeinfo.go @@ -40,6 +40,8 @@ const ( fOmitEmpty fMode = fElement | fAttr | fCDATA | fCharData | fInnerXml | fComment | fAny + + xmlName = "XMLName" ) var tinfoMap sync.Map // map[reflect.Type]*typeInfo @@ -91,7 +93,7 @@ func getTypeInfo(typ reflect.Type) (*typeInfo, error) { return nil, err } - if f.Name == "XMLName" { + if f.Name == xmlName { tinfo.xmlname = finfo continue } @@ -148,7 +150,7 @@ func structFieldInfo(typ reflect.Type, f *reflect.StructField) (*fieldInfo, erro case 0: finfo.flags |= fElement case fAttr, fCDATA, fCharData, fInnerXml, fComment, fAny, fAny | fAttr: - if f.Name == "XMLName" || tag != "" && mode != fAttr { + if f.Name == xmlName || tag != "" && mode != fAttr { valid = false } default: @@ -173,7 +175,7 @@ func structFieldInfo(typ reflect.Type, f *reflect.StructField) (*fieldInfo, erro f.Name, typ, f.Tag.Get("xml")) } - if f.Name == "XMLName" { + if f.Name == xmlName { // The XMLName field records the XML element name. Don't // process it as usual because its name should default to // empty rather than to the field name. @@ -235,11 +237,11 @@ func lookupXMLName(typ reflect.Type) (xmlname *fieldInfo) { } for i, n := 0, typ.NumField(); i < n; i++ { f := typ.Field(i) - if f.Name != "XMLName" { + if f.Name != xmlName { continue } finfo, err := structFieldInfo(typ, &f) - if finfo.name != "" && err == nil { + if err == nil && finfo.name != "" { return finfo } // Also consider errors as a non-existent field tag diff --git a/libgo/go/encoding/xml/xml.go b/libgo/go/encoding/xml/xml.go index 9a3b792..f408623 100644 --- a/libgo/go/encoding/xml/xml.go +++ b/libgo/go/encoding/xml/xml.go @@ -60,6 +60,7 @@ type StartElement struct { Attr []Attr } +// Copy creates a new copy of StartElement. func (e StartElement) Copy() StartElement { attrs := make([]Attr, len(e.Attr)) copy(attrs, e.Attr) @@ -88,12 +89,14 @@ func makeCopy(b []byte) []byte { return b1 } +// Copy creates a new copy of CharData. func (c CharData) Copy() CharData { return CharData(makeCopy(c)) } // A Comment represents an XML comment of the form <!--comment-->. // The bytes do not include the <!-- and --> comment markers. type Comment []byte +// Copy creates a new copy of Comment. func (c Comment) Copy() Comment { return Comment(makeCopy(c)) } // A ProcInst represents an XML processing instruction of the form <?target inst?> @@ -102,6 +105,7 @@ type ProcInst struct { Inst []byte } +// Copy creates a new copy of ProcInst. func (p ProcInst) Copy() ProcInst { p.Inst = makeCopy(p.Inst) return p @@ -111,6 +115,7 @@ func (p ProcInst) Copy() ProcInst { // The bytes do not include the <! and > markers. type Directive []byte +// Copy creates a new copy of Directive. func (d Directive) Copy() Directive { return Directive(makeCopy(d)) } // CopyToken returns a copy of a Token. @@ -130,6 +135,23 @@ func CopyToken(t Token) Token { return t } +// A TokenReader is anything that can decode a stream of XML tokens, including a +// Decoder. +// +// When Token encounters an error or end-of-file condition after successfully +// reading a token, it returns the token. It may return the (non-nil) error from +// the same call or return the error (and a nil token) from a subsequent call. +// An instance of this general case is that a TokenReader returning a non-nil +// token at the end of the token stream may return either io.EOF or a nil error. +// The next Read should return nil, io.EOF. +// +// Implementations of Token are discouraged from returning a nil token with a +// nil error. Callers should treat a return of nil, nil as indicating that +// nothing happened; in particular it does not indicate EOF. +type TokenReader interface { + Token() (Token, error) +} + // A Decoder represents an XML parser reading a particular input stream. // The parser assumes that its input is encoded in UTF-8. type Decoder struct { @@ -185,6 +207,7 @@ type Decoder struct { DefaultSpace string r io.ByteReader + t TokenReader buf bytes.Buffer saved *bytes.Buffer stk *stack @@ -214,6 +237,22 @@ func NewDecoder(r io.Reader) *Decoder { return d } +// NewTokenDecoder creates a new XML parser using an underlying token stream. +func NewTokenDecoder(t TokenReader) *Decoder { + // Is it already a Decoder? + if d, ok := t.(*Decoder); ok { + return d + } + d := &Decoder{ + ns: make(map[string]string), + t: t, + nextByte: -1, + line: 1, + Strict: true, + } + return d +} + // Token returns the next XML token in the input stream. // At the end of the input stream, Token returns nil, io.EOF. // @@ -266,12 +305,12 @@ func (d *Decoder) Token() (Token, error) { // to the other attribute names, so process // the translations first. for _, a := range t1.Attr { - if a.Name.Space == "xmlns" { + if a.Name.Space == xmlnsPrefix { v, ok := d.ns[a.Name.Local] d.pushNs(a.Name.Local, v, ok) d.ns[a.Name.Local] = a.Value } - if a.Name.Space == "" && a.Name.Local == "xmlns" { + if a.Name.Space == "" && a.Name.Local == xmlnsPrefix { // Default space for untagged names v, ok := d.ns[""] d.pushNs("", v, ok) @@ -296,20 +335,24 @@ func (d *Decoder) Token() (Token, error) { return t, err } -const xmlURL = "http://www.w3.org/XML/1998/namespace" +const ( + xmlURL = "http://www.w3.org/XML/1998/namespace" + xmlnsPrefix = "xmlns" + xmlPrefix = "xml" +) // Apply name space translation to name n. // The default name space (for Space=="") // applies only to element names, not to attribute names. func (d *Decoder) translate(n *Name, isElementName bool) { switch { - case n.Space == "xmlns": + case n.Space == xmlnsPrefix: return case n.Space == "" && !isElementName: return - case n.Space == "xml": + case n.Space == xmlPrefix: n.Space = xmlURL - case n.Space == "" && n.Local == "xmlns": + case n.Space == "" && n.Local == xmlnsPrefix: return } if v, ok := d.ns[n.Space]; ok { @@ -503,6 +546,9 @@ func (d *Decoder) RawToken() (Token, error) { } func (d *Decoder) rawToken() (Token, error) { + if d.t != nil { + return d.t.Token() + } if d.err != nil { return nil, d.err } @@ -786,10 +832,9 @@ func (d *Decoder) rawToken() (Token, error) { if d.Strict { d.err = d.syntaxError("attribute name without = in element") return nil, d.err - } else { - d.ungetc(b) - a.Value = a.Name.Local } + d.ungetc(b) + a.Value = a.Name.Local } else { d.space() data := d.attrval() @@ -1027,7 +1072,6 @@ Input: if d.err != nil { return nil } - ok = false } if b, ok = d.mustgetc(); !ok { return nil @@ -1837,15 +1881,15 @@ var htmlAutoClose = []string{ } var ( - esc_quot = []byte(""") // shorter than """ - esc_apos = []byte("'") // shorter than "'" - esc_amp = []byte("&") - esc_lt = []byte("<") - esc_gt = []byte(">") - esc_tab = []byte("	") - esc_nl = []byte("
") - esc_cr = []byte("
") - esc_fffd = []byte("\uFFFD") // Unicode replacement character + escQuot = []byte(""") // shorter than """ + escApos = []byte("'") // shorter than "'" + escAmp = []byte("&") + escLT = []byte("<") + escGT = []byte(">") + escTab = []byte("	") + escNL = []byte("
") + escCR = []byte("
") + escFFFD = []byte("\uFFFD") // Unicode replacement character ) // EscapeText writes to w the properly escaped XML equivalent @@ -1865,27 +1909,27 @@ func escapeText(w io.Writer, s []byte, escapeNewline bool) error { i += width switch r { case '"': - esc = esc_quot + esc = escQuot case '\'': - esc = esc_apos + esc = escApos case '&': - esc = esc_amp + esc = escAmp case '<': - esc = esc_lt + esc = escLT case '>': - esc = esc_gt + esc = escGT case '\t': - esc = esc_tab + esc = escTab case '\n': if !escapeNewline { continue } - esc = esc_nl + esc = escNL case '\r': - esc = esc_cr + esc = escCR default: if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) { - esc = esc_fffd + esc = escFFFD break } continue @@ -1914,24 +1958,24 @@ func (p *printer) EscapeString(s string) { i += width switch r { case '"': - esc = esc_quot + esc = escQuot case '\'': - esc = esc_apos + esc = escApos case '&': - esc = esc_amp + esc = escAmp case '<': - esc = esc_lt + esc = escLT case '>': - esc = esc_gt + esc = escGT case '\t': - esc = esc_tab + esc = escTab case '\n': - esc = esc_nl + esc = escNL case '\r': - esc = esc_cr + esc = escCR default: if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) { - esc = esc_fffd + esc = escFFFD break } continue diff --git a/libgo/go/encoding/xml/xml_test.go b/libgo/go/encoding/xml/xml_test.go index dad6ed9..7a3511d 100644 --- a/libgo/go/encoding/xml/xml_test.go +++ b/libgo/go/encoding/xml/xml_test.go @@ -479,15 +479,15 @@ func TestAllScalars(t *testing.T) { } type item struct { - Field_a string + FieldA string } func TestIssue569(t *testing.T) { - data := `<item><Field_a>abcd</Field_a></item>` + data := `<item><FieldA>abcd</FieldA></item>` var i item err := Unmarshal([]byte(data), &i) - if err != nil || i.Field_a != "abcd" { + if err != nil || i.FieldA != "abcd" { t.Fatal("Expecting abcd") } } @@ -797,3 +797,90 @@ func TestIssue12417(t *testing.T) { } } } + +func tokenMap(mapping func(t Token) Token) func(TokenReader) TokenReader { + return func(src TokenReader) TokenReader { + return mapper{ + t: src, + f: mapping, + } + } +} + +type mapper struct { + t TokenReader + f func(Token) Token +} + +func (m mapper) Token() (Token, error) { + tok, err := m.t.Token() + if err != nil { + return nil, err + } + return m.f(tok), nil +} + +func TestNewTokenDecoderIdempotent(t *testing.T) { + d := NewDecoder(strings.NewReader(`<br/>`)) + d2 := NewTokenDecoder(d) + if d != d2 { + t.Error("NewTokenDecoder did not detect underlying Decoder") + } +} + +func TestWrapDecoder(t *testing.T) { + d := NewDecoder(strings.NewReader(`<quote>[Re-enter Clown with a letter, and FABIAN]</quote>`)) + m := tokenMap(func(t Token) Token { + switch tok := t.(type) { + case StartElement: + if tok.Name.Local == "quote" { + tok.Name.Local = "blocking" + return tok + } + case EndElement: + if tok.Name.Local == "quote" { + tok.Name.Local = "blocking" + return tok + } + } + return t + }) + + d = NewTokenDecoder(m(d)) + + o := struct { + XMLName Name `xml:"blocking"` + Chardata string `xml:",chardata"` + }{} + + if err := d.Decode(&o); err != nil { + t.Fatal("Got unexpected error while decoding:", err) + } + + if o.Chardata != "[Re-enter Clown with a letter, and FABIAN]" { + t.Fatalf("Got unexpected chardata: `%s`\n", o.Chardata) + } +} + +type tokReader struct{} + +func (tokReader) Token() (Token, error) { + return StartElement{}, nil +} + +type Failure struct{} + +func (Failure) UnmarshalXML(*Decoder, StartElement) error { + return nil +} + +func TestTokenUnmarshaler(t *testing.T) { + defer func() { + if r := recover(); r != nil { + t.Error("Unexpected panic using custom token unmarshaler") + } + }() + + d := NewTokenDecoder(tokReader{}) + d.Decode(&Failure{}) +} |
