diff options
author | Ian Lance Taylor <ian@gcc.gnu.org> | 2011-12-07 01:11:29 +0000 |
---|---|---|
committer | Ian Lance Taylor <ian@gcc.gnu.org> | 2011-12-07 01:11:29 +0000 |
commit | 9c63abc9a1d127f95162756467284cf76b47aff8 (patch) | |
tree | 84f27a6ab44d932e4b0455f18390b070b4de626e /libgo/go/xml | |
parent | 374280238f934fa851273e2ee16ba53be890c6b8 (diff) | |
download | gcc-9c63abc9a1d127f95162756467284cf76b47aff8.zip gcc-9c63abc9a1d127f95162756467284cf76b47aff8.tar.gz gcc-9c63abc9a1d127f95162756467284cf76b47aff8.tar.bz2 |
libgo: Update to weekly 2011-11-09.
From-SVN: r182073
Diffstat (limited to 'libgo/go/xml')
-rw-r--r-- | libgo/go/xml/atom_test.go | 50 | ||||
-rw-r--r-- | libgo/go/xml/embed_test.go | 124 | ||||
-rw-r--r-- | libgo/go/xml/marshal.go | 304 | ||||
-rw-r--r-- | libgo/go/xml/marshal_test.go | 423 | ||||
-rw-r--r-- | libgo/go/xml/read.go | 630 | ||||
-rw-r--r-- | libgo/go/xml/read_test.go | 393 | ||||
-rw-r--r-- | libgo/go/xml/xml.go | 1697 | ||||
-rw-r--r-- | libgo/go/xml/xml_test.go | 609 |
8 files changed, 0 insertions, 4230 deletions
diff --git a/libgo/go/xml/atom_test.go b/libgo/go/xml/atom_test.go deleted file mode 100644 index d365510..0000000 --- a/libgo/go/xml/atom_test.go +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package xml - -var atomValue = &Feed{ - Title: "Example Feed", - Link: []Link{{Href: "http://example.org/"}}, - Updated: ParseTime("2003-12-13T18:30:02Z"), - Author: Person{Name: "John Doe"}, - Id: "urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6", - - Entry: []Entry{ - { - Title: "Atom-Powered Robots Run Amok", - Link: []Link{{Href: "http://example.org/2003/12/13/atom03"}}, - Id: "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a", - Updated: ParseTime("2003-12-13T18:30:02Z"), - Summary: NewText("Some text."), - }, - }, -} - -var atomXml = `` + - `<feed xmlns="http://www.w3.org/2005/Atom">` + - `<Title>Example Feed</Title>` + - `<Id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</Id>` + - `<Link href="http://example.org/"></Link>` + - `<Updated>2003-12-13T18:30:02Z</Updated>` + - `<Author><Name>John Doe</Name><URI></URI><Email></Email></Author>` + - `<Entry>` + - `<Title>Atom-Powered Robots Run Amok</Title>` + - `<Id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</Id>` + - `<Link href="http://example.org/2003/12/13/atom03"></Link>` + - `<Updated>2003-12-13T18:30:02Z</Updated>` + - `<Author><Name></Name><URI></URI><Email></Email></Author>` + - `<Summary>Some text.</Summary>` + - `</Entry>` + - `</feed>` - -func ParseTime(str string) Time { - return Time(str) -} - -func NewText(text string) Text { - return Text{ - Body: text, - } -} diff --git a/libgo/go/xml/embed_test.go b/libgo/go/xml/embed_test.go deleted file mode 100644 index ec7f478..0000000 --- a/libgo/go/xml/embed_test.go +++ /dev/null @@ -1,124 +0,0 @@ -// Copyright 2010 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package xml - -import "testing" - -type C struct { - Name string - Open bool -} - -type A struct { - XMLName Name `xml:"http://domain a"` - C - B B - FieldA string -} - -type B struct { - XMLName Name `xml:"b"` - C - FieldB string -} - -const _1a = ` -<?xml version="1.0" encoding="UTF-8"?> -<a xmlns="http://domain"> - <name>KmlFile</name> - <open>1</open> - <b> - <name>Absolute</name> - <open>0</open> - <fieldb>bar</fieldb> - </b> - <fielda>foo</fielda> -</a> -` - -// Tests that embedded structs are marshalled. -func TestEmbedded1(t *testing.T) { - var a A - if e := Unmarshal(StringReader(_1a), &a); e != nil { - t.Fatalf("Unmarshal: %s", e) - } - if a.FieldA != "foo" { - t.Fatalf("Unmarshal: expected 'foo' but found '%s'", a.FieldA) - } - if a.Name != "KmlFile" { - t.Fatalf("Unmarshal: expected 'KmlFile' but found '%s'", a.Name) - } - if !a.Open { - t.Fatal("Unmarshal: expected 'true' but found otherwise") - } - if a.B.FieldB != "bar" { - t.Fatalf("Unmarshal: expected 'bar' but found '%s'", a.B.FieldB) - } - if a.B.Name != "Absolute" { - t.Fatalf("Unmarshal: expected 'Absolute' but found '%s'", a.B.Name) - } - if a.B.Open { - t.Fatal("Unmarshal: expected 'false' but found otherwise") - } -} - -type A2 struct { - XMLName Name `xml:"http://domain a"` - XY string - Xy string -} - -const _2a = ` -<?xml version="1.0" encoding="UTF-8"?> -<a xmlns="http://domain"> - <xy>foo</xy> -</a> -` - -// Tests that conflicting field names get excluded. -func TestEmbedded2(t *testing.T) { - var a A2 - if e := Unmarshal(StringReader(_2a), &a); e != nil { - t.Fatalf("Unmarshal: %s", e) - } - if a.XY != "" { - t.Fatalf("Unmarshal: expected empty string but found '%s'", a.XY) - } - if a.Xy != "" { - t.Fatalf("Unmarshal: expected empty string but found '%s'", a.Xy) - } -} - -type A3 struct { - XMLName Name `xml:"http://domain a"` - xy string -} - -// Tests that private fields are not set. -func TestEmbedded3(t *testing.T) { - var a A3 - if e := Unmarshal(StringReader(_2a), &a); e != nil { - t.Fatalf("Unmarshal: %s", e) - } - if a.xy != "" { - t.Fatalf("Unmarshal: expected empty string but found '%s'", a.xy) - } -} - -type A4 struct { - XMLName Name `xml:"http://domain a"` - Any string -} - -// Tests that private fields are not set. -func TestEmbedded4(t *testing.T) { - var a A4 - if e := Unmarshal(StringReader(_2a), &a); e != nil { - t.Fatalf("Unmarshal: %s", e) - } - if a.Any != "foo" { - t.Fatalf("Unmarshal: expected 'foo' but found '%s'", a.Any) - } -} diff --git a/libgo/go/xml/marshal.go b/libgo/go/xml/marshal.go deleted file mode 100644 index 691b70d..0000000 --- a/libgo/go/xml/marshal.go +++ /dev/null @@ -1,304 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package xml - -import ( - "bufio" - "io" - "reflect" - "strconv" - "strings" -) - -const ( - // A generic XML header suitable for use with the output of Marshal and - // MarshalIndent. This is not automatically added to any output of this - // package, it is provided as a convenience. - Header = `<?xml version="1.0" encoding="UTF-8"?>` + "\n" -) - -// A Marshaler can produce well-formatted XML representing its internal state. -// It is used by both Marshal and MarshalIndent. -type Marshaler interface { - MarshalXML() ([]byte, error) -} - -type printer struct { - *bufio.Writer -} - -// Marshal writes an XML-formatted representation of v to w. -// -// If v implements Marshaler, then Marshal calls its MarshalXML method. -// Otherwise, Marshal uses the following procedure to create the XML. -// -// Marshal handles an array or slice by marshalling each of the elements. -// Marshal handles a pointer by marshalling the value it points at or, if the -// pointer is nil, by writing nothing. Marshal handles an interface value by -// marshalling the value it contains or, if the interface value is nil, by -// writing nothing. Marshal handles all other data by writing one or more XML -// elements containing the data. -// -// The name for the XML elements is taken from, in order of preference: -// - the tag on an XMLName field, if the data is a struct -// - the value of an XMLName field of type xml.Name -// - the tag of the struct field used to obtain the data -// - the name of the struct field used to obtain the data -// - the name '???'. -// -// The XML element for a struct contains marshalled elements for each of the -// exported fields of the struct, with these exceptions: -// - the XMLName field, described above, is omitted. -// - a field with tag "attr" becomes an attribute in the XML element. -// - a field with tag "chardata" is written as character data, -// not as an XML element. -// - a field with tag "innerxml" is written verbatim, -// not subject to the usual marshalling procedure. -// -// If a field uses a tag "a>b>c", then the element c will be nested inside -// parent elements a and b. Fields that appear next to each other that name -// the same parent will be enclosed in one XML element. For example: -// -// type Result struct { -// XMLName xml.Name `xml:"result"` -// FirstName string `xml:"person>name>first"` -// LastName string `xml:"person>name>last"` -// Age int `xml:"person>age"` -// } -// -// xml.Marshal(w, &Result{FirstName: "John", LastName: "Doe", Age: 42}) -// -// would be marshalled as: -// -// <result> -// <person> -// <name> -// <first>John</first> -// <last>Doe</last> -// </name> -// <age>42</age> -// </person> -// </result> -// -// Marshal will return an error if asked to marshal a channel, function, or map. -func Marshal(w io.Writer, v interface{}) (err error) { - p := &printer{bufio.NewWriter(w)} - err = p.marshalValue(reflect.ValueOf(v), "???") - p.Flush() - return err -} - -func (p *printer) marshalValue(val reflect.Value, name string) error { - if !val.IsValid() { - return nil - } - - kind := val.Kind() - typ := val.Type() - - // Try Marshaler - if typ.NumMethod() > 0 { - if marshaler, ok := val.Interface().(Marshaler); ok { - bytes, err := marshaler.MarshalXML() - if err != nil { - return err - } - p.Write(bytes) - return nil - } - } - - // Drill into pointers/interfaces - if kind == reflect.Ptr || kind == reflect.Interface { - if val.IsNil() { - return nil - } - return p.marshalValue(val.Elem(), name) - } - - // Slices and arrays iterate over the elements. They do not have an enclosing tag. - if (kind == reflect.Slice || kind == reflect.Array) && typ.Elem().Kind() != reflect.Uint8 { - for i, n := 0, val.Len(); i < n; i++ { - if err := p.marshalValue(val.Index(i), name); err != nil { - return err - } - } - return nil - } - - // Find XML name - xmlns := "" - if kind == reflect.Struct { - if f, ok := typ.FieldByName("XMLName"); ok { - if tag := f.Tag.Get("xml"); tag != "" { - if i := strings.Index(tag, " "); i >= 0 { - xmlns, name = tag[:i], tag[i+1:] - } else { - name = tag - } - } else if v, ok := val.FieldByIndex(f.Index).Interface().(Name); ok && v.Local != "" { - xmlns, name = v.Space, v.Local - } - } - } - - p.WriteByte('<') - p.WriteString(name) - - // Attributes - if kind == reflect.Struct { - if len(xmlns) > 0 { - p.WriteString(` xmlns="`) - Escape(p, []byte(xmlns)) - p.WriteByte('"') - } - - for i, n := 0, typ.NumField(); i < n; i++ { - if f := typ.Field(i); f.PkgPath == "" && f.Tag.Get("xml") == "attr" { - if f.Type.Kind() == reflect.String { - if str := val.Field(i).String(); str != "" { - p.WriteByte(' ') - p.WriteString(strings.ToLower(f.Name)) - p.WriteString(`="`) - Escape(p, []byte(str)) - p.WriteByte('"') - } - } - } - } - } - p.WriteByte('>') - - switch k := val.Kind(); k { - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - p.WriteString(strconv.Itoa64(val.Int())) - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - p.WriteString(strconv.Uitoa64(val.Uint())) - case reflect.Float32, reflect.Float64: - p.WriteString(strconv.Ftoa64(val.Float(), 'g', -1)) - case reflect.String: - Escape(p, []byte(val.String())) - case reflect.Bool: - p.WriteString(strconv.Btoa(val.Bool())) - case reflect.Array: - // will be [...]byte - bytes := make([]byte, val.Len()) - for i := range bytes { - bytes[i] = val.Index(i).Interface().(byte) - } - Escape(p, bytes) - case reflect.Slice: - // will be []byte - bytes := val.Interface().([]byte) - Escape(p, bytes) - case reflect.Struct: - s := parentStack{printer: p} - for i, n := 0, val.NumField(); i < n; i++ { - if f := typ.Field(i); f.Name != "XMLName" && f.PkgPath == "" { - name := f.Name - vf := val.Field(i) - switch tag := f.Tag.Get("xml"); tag { - case "": - s.trim(nil) - case "chardata": - if tk := f.Type.Kind(); tk == reflect.String { - Escape(p, []byte(vf.String())) - } else if tk == reflect.Slice { - if elem, ok := vf.Interface().([]byte); ok { - Escape(p, elem) - } - } - continue - case "innerxml": - iface := vf.Interface() - switch raw := iface.(type) { - case []byte: - p.Write(raw) - continue - case string: - p.WriteString(raw) - continue - } - case "attr": - continue - default: - parents := strings.Split(tag, ">") - if len(parents) == 1 { - parents, name = nil, tag - } else { - parents, name = parents[:len(parents)-1], parents[len(parents)-1] - if parents[0] == "" { - parents[0] = f.Name - } - } - - s.trim(parents) - if !(vf.Kind() == reflect.Ptr || vf.Kind() == reflect.Interface) || !vf.IsNil() { - s.push(parents[len(s.stack):]) - } - } - - if err := p.marshalValue(vf, name); err != nil { - return err - } - } - } - s.trim(nil) - default: - return &UnsupportedTypeError{typ} - } - - p.WriteByte('<') - p.WriteByte('/') - p.WriteString(name) - p.WriteByte('>') - - return nil -} - -type parentStack struct { - *printer - stack []string -} - -// trim updates the XML context to match the longest common prefix of the stack -// and the given parents. A closing tag will be written for every parent -// popped. Passing a zero slice or nil will close all the elements. -func (s *parentStack) trim(parents []string) { - split := 0 - for ; split < len(parents) && split < len(s.stack); split++ { - if parents[split] != s.stack[split] { - break - } - } - - for i := len(s.stack) - 1; i >= split; i-- { - s.WriteString("</") - s.WriteString(s.stack[i]) - s.WriteByte('>') - } - - s.stack = parents[:split] -} - -// push adds parent elements to the stack and writes open tags. -func (s *parentStack) push(parents []string) { - for i := 0; i < len(parents); i++ { - s.WriteString("<") - s.WriteString(parents[i]) - s.WriteByte('>') - } - s.stack = append(s.stack, parents...) -} - -// A MarshalXMLError is returned when Marshal or MarshalIndent encounter a type -// that cannot be converted into XML. -type UnsupportedTypeError struct { - Type reflect.Type -} - -func (e *UnsupportedTypeError) Error() string { - return "xml: unsupported type: " + e.Type.String() -} diff --git a/libgo/go/xml/marshal_test.go b/libgo/go/xml/marshal_test.go deleted file mode 100644 index 59007b3..0000000 --- a/libgo/go/xml/marshal_test.go +++ /dev/null @@ -1,423 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package xml - -import ( - "reflect" - "testing" - "bytes" - "strings" - "strconv" -) - -type DriveType int - -const ( - HyperDrive DriveType = iota - ImprobabilityDrive -) - -type Passenger struct { - Name []string `xml:"name"` - Weight float32 `xml:"weight"` -} - -type Ship struct { - XMLName Name `xml:"spaceship"` - - Name string `xml:"attr"` - Pilot string `xml:"attr"` - Drive DriveType `xml:"drive"` - Age uint `xml:"age"` - Passenger []*Passenger `xml:"passenger"` - secret string -} - -type RawXML string - -func (rx RawXML) MarshalXML() ([]byte, error) { - return []byte(rx), nil -} - -type NamedType string - -type Port struct { - XMLName Name `xml:"port"` - Type string `xml:"attr"` - Number string `xml:"chardata"` -} - -type Domain struct { - XMLName Name `xml:"domain"` - Country string `xml:"attr"` - Name []byte `xml:"chardata"` -} - -type Book struct { - XMLName Name `xml:"book"` - Title string `xml:"chardata"` -} - -type SecretAgent struct { - XMLName Name `xml:"agent"` - Handle string `xml:"attr"` - Identity string - Obfuscate string `xml:"innerxml"` -} - -type NestedItems struct { - XMLName Name `xml:"result"` - Items []string `xml:">item"` - Item1 []string `xml:"Items>item1"` -} - -type NestedOrder struct { - XMLName Name `xml:"result"` - Field1 string `xml:"parent>c"` - Field2 string `xml:"parent>b"` - Field3 string `xml:"parent>a"` -} - -type MixedNested struct { - XMLName Name `xml:"result"` - A string `xml:"parent1>a"` - B string `xml:"b"` - C string `xml:"parent1>parent2>c"` - D string `xml:"parent1>d"` -} - -type NilTest struct { - A interface{} `xml:"parent1>parent2>a"` - B interface{} `xml:"parent1>b"` - C interface{} `xml:"parent1>parent2>c"` -} - -type Service struct { - XMLName Name `xml:"service"` - Domain *Domain `xml:"host>domain"` - Port *Port `xml:"host>port"` - Extra1 interface{} - Extra2 interface{} `xml:"host>extra2"` -} - -var nilStruct *Ship - -var marshalTests = []struct { - Value interface{} - ExpectXML string -}{ - // Test nil marshals to nothing - {Value: nil, ExpectXML: ``}, - {Value: nilStruct, ExpectXML: ``}, - - // Test value types (no tag name, so ???) - {Value: true, ExpectXML: `<???>true</???>`}, - {Value: int(42), ExpectXML: `<???>42</???>`}, - {Value: int8(42), ExpectXML: `<???>42</???>`}, - {Value: int16(42), ExpectXML: `<???>42</???>`}, - {Value: int32(42), ExpectXML: `<???>42</???>`}, - {Value: uint(42), ExpectXML: `<???>42</???>`}, - {Value: uint8(42), ExpectXML: `<???>42</???>`}, - {Value: uint16(42), ExpectXML: `<???>42</???>`}, - {Value: uint32(42), ExpectXML: `<???>42</???>`}, - {Value: float32(1.25), ExpectXML: `<???>1.25</???>`}, - {Value: float64(1.25), ExpectXML: `<???>1.25</???>`}, - {Value: uintptr(0xFFDD), ExpectXML: `<???>65501</???>`}, - {Value: "gopher", ExpectXML: `<???>gopher</???>`}, - {Value: []byte("gopher"), ExpectXML: `<???>gopher</???>`}, - {Value: "</>", ExpectXML: `<???></></???>`}, - {Value: []byte("</>"), ExpectXML: `<???></></???>`}, - {Value: [3]byte{'<', '/', '>'}, ExpectXML: `<???></></???>`}, - {Value: NamedType("potato"), ExpectXML: `<???>potato</???>`}, - {Value: []int{1, 2, 3}, ExpectXML: `<???>1</???><???>2</???><???>3</???>`}, - {Value: [3]int{1, 2, 3}, ExpectXML: `<???>1</???><???>2</???><???>3</???>`}, - - // Test innerxml - {Value: RawXML("</>"), ExpectXML: `</>`}, - { - Value: &SecretAgent{ - Handle: "007", - Identity: "James Bond", - Obfuscate: "<redacted/>", - }, - //ExpectXML: `<agent handle="007"><redacted/></agent>`, - ExpectXML: `<agent handle="007"><Identity>James Bond</Identity><redacted/></agent>`, - }, - - // Test structs - {Value: &Port{Type: "ssl", Number: "443"}, ExpectXML: `<port type="ssl">443</port>`}, - {Value: &Port{Number: "443"}, ExpectXML: `<port>443</port>`}, - {Value: &Port{Type: "<unix>"}, ExpectXML: `<port type="<unix>"></port>`}, - {Value: &Domain{Name: []byte("google.com&friends")}, ExpectXML: `<domain>google.com&friends</domain>`}, - {Value: &Book{Title: "Pride & Prejudice"}, ExpectXML: `<book>Pride & Prejudice</book>`}, - {Value: atomValue, ExpectXML: atomXml}, - { - Value: &Ship{ - Name: "Heart of Gold", - Pilot: "Computer", - Age: 1, - Drive: ImprobabilityDrive, - Passenger: []*Passenger{ - &Passenger{ - Name: []string{"Zaphod", "Beeblebrox"}, - Weight: 7.25, - }, - &Passenger{ - Name: []string{"Trisha", "McMillen"}, - Weight: 5.5, - }, - &Passenger{ - Name: []string{"Ford", "Prefect"}, - Weight: 7, - }, - &Passenger{ - Name: []string{"Arthur", "Dent"}, - Weight: 6.75, - }, - }, - }, - ExpectXML: `<spaceship name="Heart of Gold" pilot="Computer">` + - `<drive>` + strconv.Itoa(int(ImprobabilityDrive)) + `</drive>` + - `<age>1</age>` + - `<passenger>` + - `<name>Zaphod</name>` + - `<name>Beeblebrox</name>` + - `<weight>7.25</weight>` + - `</passenger>` + - `<passenger>` + - `<name>Trisha</name>` + - `<name>McMillen</name>` + - `<weight>5.5</weight>` + - `</passenger>` + - `<passenger>` + - `<name>Ford</name>` + - `<name>Prefect</name>` + - `<weight>7</weight>` + - `</passenger>` + - `<passenger>` + - `<name>Arthur</name>` + - `<name>Dent</name>` + - `<weight>6.75</weight>` + - `</passenger>` + - `</spaceship>`, - }, - // Test a>b - { - Value: NestedItems{Items: []string{}, Item1: []string{}}, - ExpectXML: `<result>` + - `<Items>` + - `</Items>` + - `</result>`, - }, - { - Value: NestedItems{Items: []string{}, Item1: []string{"A"}}, - ExpectXML: `<result>` + - `<Items>` + - `<item1>A</item1>` + - `</Items>` + - `</result>`, - }, - { - Value: NestedItems{Items: []string{"A", "B"}, Item1: []string{}}, - ExpectXML: `<result>` + - `<Items>` + - `<item>A</item>` + - `<item>B</item>` + - `</Items>` + - `</result>`, - }, - { - Value: NestedItems{Items: []string{"A", "B"}, Item1: []string{"C"}}, - ExpectXML: `<result>` + - `<Items>` + - `<item>A</item>` + - `<item>B</item>` + - `<item1>C</item1>` + - `</Items>` + - `</result>`, - }, - { - Value: NestedOrder{Field1: "C", Field2: "B", Field3: "A"}, - ExpectXML: `<result>` + - `<parent>` + - `<c>C</c>` + - `<b>B</b>` + - `<a>A</a>` + - `</parent>` + - `</result>`, - }, - { - Value: NilTest{A: "A", B: nil, C: "C"}, - ExpectXML: `<???>` + - `<parent1>` + - `<parent2><a>A</a></parent2>` + - `<parent2><c>C</c></parent2>` + - `</parent1>` + - `</???>`, - }, - { - Value: MixedNested{A: "A", B: "B", C: "C", D: "D"}, - ExpectXML: `<result>` + - `<parent1><a>A</a></parent1>` + - `<b>B</b>` + - `<parent1>` + - `<parent2><c>C</c></parent2>` + - `<d>D</d>` + - `</parent1>` + - `</result>`, - }, - { - Value: Service{Port: &Port{Number: "80"}}, - ExpectXML: `<service><host><port>80</port></host></service>`, - }, - { - Value: Service{}, - ExpectXML: `<service></service>`, - }, - { - Value: Service{Port: &Port{Number: "80"}, Extra1: "A", Extra2: "B"}, - ExpectXML: `<service>` + - `<host><port>80</port></host>` + - `<Extra1>A</Extra1>` + - `<host><extra2>B</extra2></host>` + - `</service>`, - }, - { - Value: Service{Port: &Port{Number: "80"}, Extra2: "example"}, - ExpectXML: `<service>` + - `<host><port>80</port></host>` + - `<host><extra2>example</extra2></host>` + - `</service>`, - }, -} - -func TestMarshal(t *testing.T) { - for idx, test := range marshalTests { - buf := bytes.NewBuffer(nil) - err := Marshal(buf, test.Value) - if err != nil { - t.Errorf("#%d: Error: %s", idx, err) - continue - } - if got, want := buf.String(), test.ExpectXML; got != want { - if strings.Contains(want, "\n") { - t.Errorf("#%d: marshal(%#v) - GOT:\n%s\nWANT:\n%s", idx, test.Value, got, want) - } else { - t.Errorf("#%d: marshal(%#v) = %#q want %#q", idx, test.Value, got, want) - } - } - } -} - -var marshalErrorTests = []struct { - Value interface{} - Err string - Kind reflect.Kind -}{ - { - Value: make(chan bool), - Err: "xml: unsupported type: chan bool", - Kind: reflect.Chan, - }, - { - Value: map[string]string{ - "question": "What do you get when you multiply six by nine?", - "answer": "42", - }, - Err: "xml: unsupported type: map[string] string", - Kind: reflect.Map, - }, - { - Value: map[*Ship]bool{nil: false}, - Err: "xml: unsupported type: map[*xml.Ship] bool", - Kind: reflect.Map, - }, -} - -func TestMarshalErrors(t *testing.T) { - for idx, test := range marshalErrorTests { - buf := bytes.NewBuffer(nil) - err := Marshal(buf, test.Value) - if err == nil || err.Error() != test.Err { - t.Errorf("#%d: marshal(%#v) = [error] %q, want %q", idx, test.Value, err, test.Err) - } - if kind := err.(*UnsupportedTypeError).Type.Kind(); kind != test.Kind { - t.Errorf("#%d: marshal(%#v) = [error kind] %s, want %s", idx, test.Value, kind, test.Kind) - } - } -} - -// Do invertibility testing on the various structures that we test -func TestUnmarshal(t *testing.T) { - for i, test := range marshalTests { - // Skip the nil pointers - if i <= 1 { - continue - } - - var dest interface{} - - switch test.Value.(type) { - case *Ship, Ship: - dest = &Ship{} - case *Port, Port: - dest = &Port{} - case *Domain, Domain: - dest = &Domain{} - case *Feed, Feed: - dest = &Feed{} - default: - continue - } - - buffer := bytes.NewBufferString(test.ExpectXML) - err := Unmarshal(buffer, dest) - - // Don't compare XMLNames - switch fix := dest.(type) { - case *Ship: - fix.XMLName = Name{} - case *Port: - fix.XMLName = Name{} - case *Domain: - fix.XMLName = Name{} - case *Feed: - fix.XMLName = Name{} - fix.Author.InnerXML = "" - for i := range fix.Entry { - fix.Entry[i].Author.InnerXML = "" - } - } - - if err != nil { - t.Errorf("#%d: unexpected error: %#v", i, err) - } else if got, want := dest, test.Value; !reflect.DeepEqual(got, want) { - t.Errorf("#%d: unmarshal(%#s) = %#v, want %#v", i, test.ExpectXML, got, want) - } - } -} - -func BenchmarkMarshal(b *testing.B) { - idx := len(marshalTests) - 1 - test := marshalTests[idx] - - buf := bytes.NewBuffer(nil) - for i := 0; i < b.N; i++ { - Marshal(buf, test.Value) - buf.Truncate(0) - } -} - -func BenchmarkUnmarshal(b *testing.B) { - idx := len(marshalTests) - 1 - test := marshalTests[idx] - sm := &Ship{} - xml := []byte(test.ExpectXML) - - for i := 0; i < b.N; i++ { - buffer := bytes.NewBuffer(xml) - Unmarshal(buffer, sm) - } -} diff --git a/libgo/go/xml/read.go b/libgo/go/xml/read.go deleted file mode 100644 index a88941c..0000000 --- a/libgo/go/xml/read.go +++ /dev/null @@ -1,630 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package xml - -import ( - "bytes" - "errors" - "fmt" - "io" - "reflect" - "strconv" - "strings" - "unicode" - "utf8" -) - -// BUG(rsc): Mapping between XML elements and data structures is inherently flawed: -// an XML element is an order-dependent collection of anonymous -// values, while a data structure is an order-independent collection -// of named values. -// See package json for a textual representation more suitable -// to data structures. - -// Unmarshal parses an XML element from r and uses the -// reflect library to fill in an arbitrary struct, slice, or string -// pointed at by val. Well-formed data that does not fit -// into val is discarded. -// -// For example, given these definitions: -// -// type Email struct { -// Where string `xml:"attr"` -// Addr string -// } -// -// type Result struct { -// XMLName xml.Name `xml:"result"` -// Name string -// Phone string -// Email []Email -// Groups []string `xml:"group>value"` -// } -// -// result := Result{Name: "name", Phone: "phone", Email: nil} -// -// unmarshalling the XML input -// -// <result> -// <email where="home"> -// <addr>gre@example.com</addr> -// </email> -// <email where='work'> -// <addr>gre@work.com</addr> -// </email> -// <name>Grace R. Emlin</name> -// <group> -// <value>Friends</value> -// <value>Squash</value> -// </group> -// <address>123 Main Street</address> -// </result> -// -// via Unmarshal(r, &result) is equivalent to assigning -// -// r = Result{xml.Name{"", "result"}, -// "Grace R. Emlin", // name -// "phone", // no phone given -// []Email{ -// Email{"home", "gre@example.com"}, -// Email{"work", "gre@work.com"}, -// }, -// []string{"Friends", "Squash"}, -// } -// -// Note that the field r.Phone has not been modified and -// that the XML <address> element was discarded. Also, the field -// Groups was assigned considering the element path provided in the -// field tag. -// -// Because Unmarshal uses the reflect package, it can only assign -// to exported (upper case) fields. Unmarshal uses a case-insensitive -// comparison to match XML element names to struct field names. -// -// Unmarshal maps an XML element to a struct using the following rules. -// In the rules, the tag of a field refers to the value associated with the -// key 'xml' in the struct field's tag (see the example above). -// -// * If the struct has a field of type []byte or string with tag "innerxml", -// Unmarshal accumulates the raw XML nested inside the element -// in that field. The rest of the rules still apply. -// -// * If the struct has a field named XMLName of type xml.Name, -// Unmarshal records the element name in that field. -// -// * If the XMLName field has an associated tag of the form -// "name" or "namespace-URL name", the XML element must have -// the given name (and, optionally, name space) or else Unmarshal -// returns an error. -// -// * If the XML element has an attribute whose name matches a -// struct field of type string with tag "attr", Unmarshal records -// the attribute value in that field. -// -// * If the XML element contains character data, that data is -// accumulated in the first struct field that has tag "chardata". -// The struct field may have type []byte or string. -// If there is no such field, the character data is discarded. -// -// * If the XML element contains comments, they are accumulated in -// the first struct field that has tag "comments". The struct -// field may have type []byte or string. If there is no such -// field, the comments are discarded. -// -// * If the XML element contains a sub-element whose name matches -// the prefix of a tag formatted as "a>b>c", unmarshal -// will descend into the XML structure looking for elements with the -// given names, and will map the innermost elements to that struct field. -// A tag starting with ">" is equivalent to one starting -// with the field name followed by ">". -// -// * If the XML element contains a sub-element whose name -// matches a field whose tag is neither "attr" nor "chardata", -// Unmarshal maps the sub-element to that struct field. -// Otherwise, if the struct has a field named Any, unmarshal -// maps the sub-element to that struct field. -// -// Unmarshal maps an XML element to a string or []byte by saving the -// concatenation of that element's character data in the string or -// []byte. -// -// Unmarshal maps an attribute value to a string or []byte by saving -// the value in the string or slice. -// -// Unmarshal maps an XML element to a slice by extending the length of -// the slice and mapping the element to the newly created value. -// -// Unmarshal maps an XML element or attribute value to a bool by -// setting it to the boolean value represented by the string. -// -// Unmarshal maps an XML element or attribute value to an integer or -// floating-point field by setting the field to the result of -// interpreting the string value in decimal. There is no check for -// overflow. -// -// Unmarshal maps an XML element to an xml.Name by recording the -// element name. -// -// Unmarshal maps an XML element to a pointer by setting the pointer -// to a freshly allocated value and then mapping the element to that value. -// -func Unmarshal(r io.Reader, val interface{}) error { - v := reflect.ValueOf(val) - if v.Kind() != reflect.Ptr { - return errors.New("non-pointer passed to Unmarshal") - } - p := NewParser(r) - elem := v.Elem() - err := p.unmarshal(elem, nil) - if err != nil { - return err - } - return nil -} - -// An UnmarshalError represents an error in the unmarshalling process. -type UnmarshalError string - -func (e UnmarshalError) Error() string { return string(e) } - -// A TagPathError represents an error in the unmarshalling process -// caused by the use of field tags with conflicting paths. -type TagPathError struct { - Struct reflect.Type - Field1, Tag1 string - Field2, Tag2 string -} - -func (e *TagPathError) Error() string { - return fmt.Sprintf("%s field %q with tag %q conflicts with field %q with tag %q", e.Struct, e.Field1, e.Tag1, e.Field2, e.Tag2) -} - -// The Parser's Unmarshal method is like xml.Unmarshal -// except that it can be passed a pointer to the initial start element, -// useful when a client reads some raw XML tokens itself -// but also defers to Unmarshal for some elements. -// Passing a nil start element indicates that Unmarshal should -// read the token stream to find the start element. -func (p *Parser) Unmarshal(val interface{}, start *StartElement) error { - v := reflect.ValueOf(val) - if v.Kind() != reflect.Ptr { - return errors.New("non-pointer passed to Unmarshal") - } - return p.unmarshal(v.Elem(), start) -} - -// fieldName strips invalid characters from an XML name -// to create a valid Go struct name. It also converts the -// name to lower case letters. -func fieldName(original string) string { - - var i int - //remove leading underscores - for i = 0; i < len(original) && original[i] == '_'; i++ { - } - - return strings.Map( - func(x rune) rune { - if x == '_' || unicode.IsDigit(x) || unicode.IsLetter(x) { - return unicode.ToLower(x) - } - return -1 - }, - original[i:]) -} - -// Unmarshal a single XML element into val. -func (p *Parser) unmarshal(val reflect.Value, start *StartElement) error { - // Find start element if we need it. - if start == nil { - for { - tok, err := p.Token() - if err != nil { - return err - } - if t, ok := tok.(StartElement); ok { - start = &t - break - } - } - } - - if pv := val; pv.Kind() == reflect.Ptr { - if pv.IsNil() { - pv.Set(reflect.New(pv.Type().Elem())) - } - val = pv.Elem() - } - - var ( - data []byte - saveData reflect.Value - comment []byte - saveComment reflect.Value - saveXML reflect.Value - saveXMLIndex int - saveXMLData []byte - sv reflect.Value - styp reflect.Type - fieldPaths map[string]pathInfo - ) - - switch v := val; v.Kind() { - default: - return errors.New("unknown type " + v.Type().String()) - - case reflect.Slice: - typ := v.Type() - if typ.Elem().Kind() == reflect.Uint8 { - // []byte - saveData = v - break - } - - // Slice of element values. - // Grow slice. - n := v.Len() - if n >= v.Cap() { - ncap := 2 * n - if ncap < 4 { - ncap = 4 - } - new := reflect.MakeSlice(typ, n, ncap) - reflect.Copy(new, v) - v.Set(new) - } - v.SetLen(n + 1) - - // Recur to read element into slice. - if err := p.unmarshal(v.Index(n), start); err != nil { - v.SetLen(n) - return err - } - return nil - - case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, reflect.String: - saveData = v - - case reflect.Struct: - if _, ok := v.Interface().(Name); ok { - v.Set(reflect.ValueOf(start.Name)) - break - } - - sv = v - typ := sv.Type() - styp = typ - // Assign name. - if f, ok := typ.FieldByName("XMLName"); ok { - // Validate element name. - if tag := f.Tag.Get("xml"); tag != "" { - ns := "" - i := strings.LastIndex(tag, " ") - if i >= 0 { - ns, tag = tag[0:i], tag[i+1:] - } - if tag != start.Name.Local { - return UnmarshalError("expected element type <" + tag + "> but have <" + start.Name.Local + ">") - } - if ns != "" && ns != start.Name.Space { - e := "expected element <" + tag + "> in name space " + ns + " but have " - if start.Name.Space == "" { - e += "no name space" - } else { - e += start.Name.Space - } - return UnmarshalError(e) - } - } - - // Save - v := sv.FieldByIndex(f.Index) - if _, ok := v.Interface().(Name); ok { - v.Set(reflect.ValueOf(start.Name)) - } - } - - // Assign attributes. - // Also, determine whether we need to save character data or comments. - for i, n := 0, typ.NumField(); i < n; i++ { - f := typ.Field(i) - switch f.Tag.Get("xml") { - case "attr": - strv := sv.FieldByIndex(f.Index) - // Look for attribute. - val := "" - k := strings.ToLower(f.Name) - for _, a := range start.Attr { - if fieldName(a.Name.Local) == k { - val = a.Value - break - } - } - copyValue(strv, []byte(val)) - - case "comment": - if !saveComment.IsValid() { - saveComment = sv.FieldByIndex(f.Index) - } - - case "chardata": - if !saveData.IsValid() { - saveData = sv.FieldByIndex(f.Index) - } - - case "innerxml": - if !saveXML.IsValid() { - saveXML = sv.FieldByIndex(f.Index) - if p.saved == nil { - saveXMLIndex = 0 - p.saved = new(bytes.Buffer) - } else { - saveXMLIndex = p.savedOffset() - } - } - - default: - if tag := f.Tag.Get("xml"); strings.Contains(tag, ">") { - if fieldPaths == nil { - fieldPaths = make(map[string]pathInfo) - } - path := strings.ToLower(tag) - if strings.HasPrefix(tag, ">") { - path = strings.ToLower(f.Name) + path - } - if strings.HasSuffix(tag, ">") { - path = path[:len(path)-1] - } - err := addFieldPath(sv, fieldPaths, path, f.Index) - if err != nil { - return err - } - } - } - } - } - - // Find end element. - // Process sub-elements along the way. -Loop: - for { - var savedOffset int - if saveXML.IsValid() { - savedOffset = p.savedOffset() - } - tok, err := p.Token() - if err != nil { - return err - } - switch t := tok.(type) { - case StartElement: - // Sub-element. - // Look up by tag name. - if sv.IsValid() { - k := fieldName(t.Name.Local) - - if fieldPaths != nil { - if _, found := fieldPaths[k]; found { - if err := p.unmarshalPaths(sv, fieldPaths, k, &t); err != nil { - return err - } - continue Loop - } - } - - match := func(s string) bool { - // check if the name matches ignoring case - if strings.ToLower(s) != k { - return false - } - // now check that it's public - c, _ := utf8.DecodeRuneInString(s) - return unicode.IsUpper(c) - } - - f, found := styp.FieldByNameFunc(match) - if !found { // fall back to mop-up field named "Any" - f, found = styp.FieldByName("Any") - } - if found { - if err := p.unmarshal(sv.FieldByIndex(f.Index), &t); err != nil { - return err - } - continue Loop - } - } - // Not saving sub-element but still have to skip over it. - if err := p.Skip(); err != nil { - return err - } - - case EndElement: - if saveXML.IsValid() { - saveXMLData = p.saved.Bytes()[saveXMLIndex:savedOffset] - if saveXMLIndex == 0 { - p.saved = nil - } - } - break Loop - - case CharData: - if saveData.IsValid() { - data = append(data, t...) - } - - case Comment: - if saveComment.IsValid() { - comment = append(comment, t...) - } - } - } - - if err := copyValue(saveData, data); err != nil { - return err - } - - switch t := saveComment; t.Kind() { - case reflect.String: - t.SetString(string(comment)) - case reflect.Slice: - t.Set(reflect.ValueOf(comment)) - } - - switch t := saveXML; t.Kind() { - case reflect.String: - t.SetString(string(saveXMLData)) - case reflect.Slice: - t.Set(reflect.ValueOf(saveXMLData)) - } - - return nil -} - -func copyValue(dst reflect.Value, src []byte) (err error) { - // Helper functions for integer and unsigned integer conversions - var itmp int64 - getInt64 := func() bool { - itmp, err = strconv.Atoi64(string(src)) - // TODO: should check sizes - return err == nil - } - var utmp uint64 - getUint64 := func() bool { - utmp, err = strconv.Atoui64(string(src)) - // TODO: check for overflow? - return err == nil - } - var ftmp float64 - getFloat64 := func() bool { - ftmp, err = strconv.Atof64(string(src)) - // TODO: check for overflow? - return err == nil - } - - // Save accumulated data and comments - switch t := dst; t.Kind() { - case reflect.Invalid: - // Probably a comment, handled below - default: - return errors.New("cannot happen: unknown type " + t.Type().String()) - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - if !getInt64() { - return err - } - t.SetInt(itmp) - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - if !getUint64() { - return err - } - t.SetUint(utmp) - case reflect.Float32, reflect.Float64: - if !getFloat64() { - return err - } - t.SetFloat(ftmp) - case reflect.Bool: - value, err := strconv.Atob(strings.TrimSpace(string(src))) - if err != nil { - return err - } - t.SetBool(value) - case reflect.String: - t.SetString(string(src)) - case reflect.Slice: - t.Set(reflect.ValueOf(src)) - } - return nil -} - -type pathInfo struct { - fieldIdx []int - complete bool -} - -// addFieldPath takes an element path such as "a>b>c" and fills the -// paths map with all paths leading to it ("a", "a>b", and "a>b>c"). -// It is okay for paths to share a common, shorter prefix but not ok -// for one path to itself be a prefix of another. -func addFieldPath(sv reflect.Value, paths map[string]pathInfo, path string, fieldIdx []int) error { - if info, found := paths[path]; found { - return tagError(sv, info.fieldIdx, fieldIdx) - } - paths[path] = pathInfo{fieldIdx, true} - for { - i := strings.LastIndex(path, ">") - if i < 0 { - break - } - path = path[:i] - if info, found := paths[path]; found { - if info.complete { - return tagError(sv, info.fieldIdx, fieldIdx) - } - } else { - paths[path] = pathInfo{fieldIdx, false} - } - } - return nil - -} - -func tagError(sv reflect.Value, idx1 []int, idx2 []int) error { - t := sv.Type() - f1 := t.FieldByIndex(idx1) - f2 := t.FieldByIndex(idx2) - return &TagPathError{t, f1.Name, f1.Tag.Get("xml"), f2.Name, f2.Tag.Get("xml")} -} - -// unmarshalPaths walks down an XML structure looking for -// wanted paths, and calls unmarshal on them. -func (p *Parser) unmarshalPaths(sv reflect.Value, paths map[string]pathInfo, path string, start *StartElement) error { - if info, _ := paths[path]; info.complete { - return p.unmarshal(sv.FieldByIndex(info.fieldIdx), start) - } - for { - tok, err := p.Token() - if err != nil { - return err - } - switch t := tok.(type) { - case StartElement: - k := path + ">" + fieldName(t.Name.Local) - if _, found := paths[k]; found { - if err := p.unmarshalPaths(sv, paths, k, &t); err != nil { - return err - } - continue - } - if err := p.Skip(); err != nil { - return err - } - case EndElement: - return nil - } - } - panic("unreachable") -} - -// Have already read a start element. -// Read tokens until we find the end element. -// Token is taking care of making sure the -// end element matches the start element we saw. -func (p *Parser) Skip() error { - for { - tok, err := p.Token() - if err != nil { - return err - } - switch t := tok.(type) { - case StartElement: - if err := p.Skip(); err != nil { - return err - } - case EndElement: - return nil - } - } - panic("unreachable") -} diff --git a/libgo/go/xml/read_test.go b/libgo/go/xml/read_test.go deleted file mode 100644 index d39c2d5..0000000 --- a/libgo/go/xml/read_test.go +++ /dev/null @@ -1,393 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package xml - -import ( - "reflect" - "testing" -) - -// Stripped down Atom feed data structures. - -func TestUnmarshalFeed(t *testing.T) { - var f Feed - if err := Unmarshal(StringReader(atomFeedString), &f); err != nil { - t.Fatalf("Unmarshal: %s", err) - } - if !reflect.DeepEqual(f, atomFeed) { - t.Fatalf("have %#v\nwant %#v", f, atomFeed) - } -} - -// hget http://codereview.appspot.com/rss/mine/rsc -const atomFeedString = ` -<?xml version="1.0" encoding="utf-8"?> -<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en-us"><title>Code Review - My issues</title><link href="http://codereview.appspot.com/" rel="alternate"></link><li-nk href="http://codereview.appspot.com/rss/mine/rsc" rel="self"></li-nk><id>http://codereview.appspot.com/</id><updated>2009-10-04T01:35:58+00:00</updated><author><name>rietveld<></name></author><entry><title>rietveld: an attempt at pubsubhubbub -</title><link hre-f="http://codereview.appspot.com/126085" rel="alternate"></link><updated>2009-10-04T01:35:58+00:00</updated><author><name>email-address-removed</name></author><id>urn:md5:134d9179c41f806be79b3a5f7877d19a</id><summary type="html"> - An attempt at adding pubsubhubbub support to Rietveld. -http://code.google.com/p/pubsubhubbub -http://code.google.com/p/rietveld/issues/detail?id=155 - -The server side of the protocol is trivial: - 1. add a &lt;link rel=&quot;hub&quot; href=&quot;hub-server&quot;&gt; tag to all - feeds that will be pubsubhubbubbed. - 2. every time one of those feeds changes, tell the hub - with a simple POST request. - -I have tested this by adding debug prints to a local hub -server and checking that the server got the right publish -requests. - -I can&#39;t quite get the server to work, but I think the bug -is not in my code. I think that the server expects to be -able to grab the feed and see the feed&#39;s actual URL in -the link rel=&quot;self&quot;, but the default value for that drops -the :port from the URL, and I cannot for the life of me -figure out how to get the Atom generator deep inside -django not to do that, or even where it is doing that, -or even what code is running to generate the Atom feed. -(I thought I knew but I added some assert False statements -and it kept running!) - -Ignoring that particular problem, I would appreciate -feedback on the right way to get the two values at -the top of feeds.py marked NOTE(rsc). - - -</summary></entry><entry><title>rietveld: correct tab handling -</title><link href="http://codereview.appspot.com/124106" rel="alternate"></link><updated>2009-10-03T23:02:17+00:00</updated><author><name>email-address-removed</name></author><id>urn:md5:0a2a4f19bb815101f0ba2904aed7c35a</id><summary type="html"> - This fixes the buggy tab rendering that can be seen at -http://codereview.appspot.com/116075/diff/1/2 - -The fundamental problem was that the tab code was -not being told what column the text began in, so it -didn&#39;t know where to put the tab stops. Another problem -was that some of the code assumed that string byte -offsets were the same as column offsets, which is only -true if there are no tabs. - -In the process of fixing this, I cleaned up the arguments -to Fold and ExpandTabs and renamed them Break and -_ExpandTabs so that I could be sure that I found all the -call sites. I also wanted to verify that ExpandTabs was -not being used from outside intra_region_diff.py. - - -</summary></entry></feed> ` - -type Feed struct { - XMLName Name `xml:"http://www.w3.org/2005/Atom feed"` - Title string - Id string - Link []Link - Updated Time - Author Person - Entry []Entry -} - -type Entry struct { - Title string - Id string - Link []Link - Updated Time - Author Person - Summary Text -} - -type Link struct { - Rel string `xml:"attr"` - Href string `xml:"attr"` -} - -type Person struct { - Name string - URI string - Email string - InnerXML string `xml:"innerxml"` -} - -type Text struct { - Type string `xml:"attr"` - Body string `xml:"chardata"` -} - -type Time string - -var atomFeed = Feed{ - XMLName: Name{"http://www.w3.org/2005/Atom", "feed"}, - Title: "Code Review - My issues", - Link: []Link{ - {Rel: "alternate", Href: "http://codereview.appspot.com/"}, - {Rel: "self", Href: "http://codereview.appspot.com/rss/mine/rsc"}, - }, - Id: "http://codereview.appspot.com/", - Updated: "2009-10-04T01:35:58+00:00", - Author: Person{ - Name: "rietveld<>", - InnerXML: "<name>rietveld<></name>", - }, - Entry: []Entry{ - { - Title: "rietveld: an attempt at pubsubhubbub\n", - Link: []Link{ - {Rel: "alternate", Href: "http://codereview.appspot.com/126085"}, - }, - Updated: "2009-10-04T01:35:58+00:00", - Author: Person{ - Name: "email-address-removed", - InnerXML: "<name>email-address-removed</name>", - }, - Id: "urn:md5:134d9179c41f806be79b3a5f7877d19a", - Summary: Text{ - Type: "html", - Body: ` - An attempt at adding pubsubhubbub support to Rietveld. -http://code.google.com/p/pubsubhubbub -http://code.google.com/p/rietveld/issues/detail?id=155 - -The server side of the protocol is trivial: - 1. add a <link rel="hub" href="hub-server"> tag to all - feeds that will be pubsubhubbubbed. - 2. every time one of those feeds changes, tell the hub - with a simple POST request. - -I have tested this by adding debug prints to a local hub -server and checking that the server got the right publish -requests. - -I can't quite get the server to work, but I think the bug -is not in my code. I think that the server expects to be -able to grab the feed and see the feed's actual URL in -the link rel="self", but the default value for that drops -the :port from the URL, and I cannot for the life of me -figure out how to get the Atom generator deep inside -django not to do that, or even where it is doing that, -or even what code is running to generate the Atom feed. -(I thought I knew but I added some assert False statements -and it kept running!) - -Ignoring that particular problem, I would appreciate -feedback on the right way to get the two values at -the top of feeds.py marked NOTE(rsc). - - -`, - }, - }, - { - Title: "rietveld: correct tab handling\n", - Link: []Link{ - {Rel: "alternate", Href: "http://codereview.appspot.com/124106"}, - }, - Updated: "2009-10-03T23:02:17+00:00", - Author: Person{ - Name: "email-address-removed", - InnerXML: "<name>email-address-removed</name>", - }, - Id: "urn:md5:0a2a4f19bb815101f0ba2904aed7c35a", - Summary: Text{ - Type: "html", - Body: ` - This fixes the buggy tab rendering that can be seen at -http://codereview.appspot.com/116075/diff/1/2 - -The fundamental problem was that the tab code was -not being told what column the text began in, so it -didn't know where to put the tab stops. Another problem -was that some of the code assumed that string byte -offsets were the same as column offsets, which is only -true if there are no tabs. - -In the process of fixing this, I cleaned up the arguments -to Fold and ExpandTabs and renamed them Break and -_ExpandTabs so that I could be sure that I found all the -call sites. I also wanted to verify that ExpandTabs was -not being used from outside intra_region_diff.py. - - -`, - }, - }, - }, -} - -type FieldNameTest struct { - in, out string -} - -var FieldNameTests = []FieldNameTest{ - {"Profile-Image", "profileimage"}, - {"_score", "score"}, -} - -func TestFieldName(t *testing.T) { - for _, tt := range FieldNameTests { - a := fieldName(tt.in) - if a != tt.out { - t.Fatalf("have %#v\nwant %#v\n\n", a, tt.out) - } - } -} - -const pathTestString = ` -<result> - <before>1</before> - <items> - <item1> - <value>A</value> - </item1> - <item2> - <value>B</value> - </item2> - <Item1> - <Value>C</Value> - <Value>D</Value> - </Item1> - </items> - <after>2</after> -</result> -` - -type PathTestItem struct { - Value string -} - -type PathTestA struct { - Items []PathTestItem `xml:">item1"` - Before, After string -} - -type PathTestB struct { - Other []PathTestItem `xml:"items>Item1"` - Before, After string -} - -type PathTestC struct { - Values1 []string `xml:"items>item1>value"` - Values2 []string `xml:"items>item2>value"` - Before, After string -} - -type PathTestSet struct { - Item1 []PathTestItem -} - -type PathTestD struct { - Other PathTestSet `xml:"items>"` - Before, After string -} - -var pathTests = []interface{}{ - &PathTestA{Items: []PathTestItem{{"A"}, {"D"}}, Before: "1", After: "2"}, - &PathTestB{Other: []PathTestItem{{"A"}, {"D"}}, Before: "1", After: "2"}, - &PathTestC{Values1: []string{"A", "C", "D"}, Values2: []string{"B"}, Before: "1", After: "2"}, - &PathTestD{Other: PathTestSet{Item1: []PathTestItem{{"A"}, {"D"}}}, Before: "1", After: "2"}, -} - -func TestUnmarshalPaths(t *testing.T) { - for _, pt := range pathTests { - v := reflect.New(reflect.TypeOf(pt).Elem()).Interface() - if err := Unmarshal(StringReader(pathTestString), v); err != nil { - t.Fatalf("Unmarshal: %s", err) - } - if !reflect.DeepEqual(v, pt) { - t.Fatalf("have %#v\nwant %#v", v, pt) - } - } -} - -type BadPathTestA struct { - First string `xml:"items>item1"` - Other string `xml:"items>item2"` - Second string `xml:"items>"` -} - -type BadPathTestB struct { - Other string `xml:"items>item2>value"` - First string `xml:"items>item1"` - Second string `xml:"items>item1>value"` -} - -var badPathTests = []struct { - v, e interface{} -}{ - {&BadPathTestA{}, &TagPathError{reflect.TypeOf(BadPathTestA{}), "First", "items>item1", "Second", "items>"}}, - {&BadPathTestB{}, &TagPathError{reflect.TypeOf(BadPathTestB{}), "First", "items>item1", "Second", "items>item1>value"}}, -} - -func TestUnmarshalBadPaths(t *testing.T) { - for _, tt := range badPathTests { - err := Unmarshal(StringReader(pathTestString), tt.v) - if !reflect.DeepEqual(err, tt.e) { - t.Fatalf("Unmarshal with %#v didn't fail properly: %#v", tt.v, err) - } - } -} - -func TestUnmarshalAttrs(t *testing.T) { - var f AttrTest - if err := Unmarshal(StringReader(attrString), &f); err != nil { - t.Fatalf("Unmarshal: %s", err) - } - if !reflect.DeepEqual(f, attrStruct) { - t.Fatalf("have %#v\nwant %#v", f, attrStruct) - } -} - -type AttrTest struct { - Test1 Test1 - Test2 Test2 -} - -type Test1 struct { - Int int `xml:"attr"` - Float float64 `xml:"attr"` - Uint8 uint8 `xml:"attr"` -} - -type Test2 struct { - Bool bool `xml:"attr"` -} - -const attrString = ` -<?xml version="1.0" charset="utf-8"?> -<attrtest> - <test1 int="8" float="23.5" uint8="255"/> - <test2 bool="true"/> -</attrtest> -` - -var attrStruct = AttrTest{ - Test1: Test1{ - Int: 8, - Float: 23.5, - Uint8: 255, - }, - Test2: Test2{ - Bool: true, - }, -} - -// test data for TestUnmarshalWithoutNameType - -const OK = "OK" -const withoutNameTypeData = ` -<?xml version="1.0" charset="utf-8"?> -<Test3 attr="OK" />` - -type TestThree struct { - XMLName bool `xml:"Test3"` // XMLName field without an xml.Name type - Attr string `xml:"attr"` -} - -func TestUnmarshalWithoutNameType(t *testing.T) { - var x TestThree - if err := Unmarshal(StringReader(withoutNameTypeData), &x); err != nil { - t.Fatalf("Unmarshal: %s", err) - } - if x.Attr != OK { - t.Fatalf("have %v\nwant %v", x.Attr, OK) - } -} diff --git a/libgo/go/xml/xml.go b/libgo/go/xml/xml.go deleted file mode 100644 index d534c52..0000000 --- a/libgo/go/xml/xml.go +++ /dev/null @@ -1,1697 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package xml implements a simple XML 1.0 parser that -// understands XML name spaces. -package xml - -// References: -// Annotated XML spec: http://www.xml.com/axml/testaxml.htm -// XML name spaces: http://www.w3.org/TR/REC-xml-names/ - -// TODO(rsc): -// Test error handling. - -import ( - "bufio" - "bytes" - "fmt" - "io" - "strconv" - "strings" - "unicode" - "utf8" -) - -// A SyntaxError represents a syntax error in the XML input stream. -type SyntaxError struct { - Msg string - Line int -} - -func (e *SyntaxError) Error() string { - return "XML syntax error on line " + strconv.Itoa(e.Line) + ": " + e.Msg -} - -// A Name represents an XML name (Local) annotated -// with a name space identifier (Space). -// In tokens returned by Parser.Token, the Space identifier -// is given as a canonical URL, not the short prefix used -// in the document being parsed. -type Name struct { - Space, Local string -} - -// An Attr represents an attribute in an XML element (Name=Value). -type Attr struct { - Name Name - Value string -} - -// A Token is an interface holding one of the token types: -// StartElement, EndElement, CharData, Comment, ProcInst, or Directive. -type Token interface{} - -// A StartElement represents an XML start element. -type StartElement struct { - Name Name - Attr []Attr -} - -func (e StartElement) Copy() StartElement { - attrs := make([]Attr, len(e.Attr)) - copy(e.Attr, attrs) - e.Attr = attrs - return e -} - -// An EndElement represents an XML end element. -type EndElement struct { - Name Name -} - -// A CharData represents XML character data (raw text), -// in which XML escape sequences have been replaced by -// the characters they represent. -type CharData []byte - -func makeCopy(b []byte) []byte { - b1 := make([]byte, len(b)) - copy(b1, b) - return b1 -} - -func (c CharData) Copy() CharData { return CharData(makeCopy(c)) } - -// A Comment represents an XML comment of the form <!--comment-->. -// The bytes do not include the <!-- and --> comment markers. -type Comment []byte - -func (c Comment) Copy() Comment { return Comment(makeCopy(c)) } - -// A ProcInst represents an XML processing instruction of the form <?target inst?> -type ProcInst struct { - Target string - Inst []byte -} - -func (p ProcInst) Copy() ProcInst { - p.Inst = makeCopy(p.Inst) - return p -} - -// A Directive represents an XML directive of the form <!text>. -// The bytes do not include the <! and > markers. -type Directive []byte - -func (d Directive) Copy() Directive { return Directive(makeCopy(d)) } - -// CopyToken returns a copy of a Token. -func CopyToken(t Token) Token { - switch v := t.(type) { - case CharData: - return v.Copy() - case Comment: - return v.Copy() - case Directive: - return v.Copy() - case ProcInst: - return v.Copy() - case StartElement: - return v.Copy() - } - return t -} - -// A Parser represents an XML parser reading a particular input stream. -// The parser assumes that its input is encoded in UTF-8. -type Parser struct { - // Strict defaults to true, enforcing the requirements - // of the XML specification. - // If set to false, the parser allows input containing common - // mistakes: - // * If an element is missing an end tag, the parser invents - // end tags as necessary to keep the return values from Token - // properly balanced. - // * In attribute values and character data, unknown or malformed - // character entities (sequences beginning with &) are left alone. - // - // Setting: - // - // p.Strict = false; - // p.AutoClose = HTMLAutoClose; - // p.Entity = HTMLEntity - // - // creates a parser that can handle typical HTML. - Strict bool - - // When Strict == false, AutoClose indicates a set of elements to - // consider closed immediately after they are opened, regardless - // of whether an end element is present. - AutoClose []string - - // Entity can be used to map non-standard entity names to string replacements. - // The parser behaves as if these standard mappings are present in the map, - // regardless of the actual map content: - // - // "lt": "<", - // "gt": ">", - // "amp": "&", - // "apos": "'", - // "quot": `"`, - Entity map[string]string - - // CharsetReader, if non-nil, defines a function to generate - // charset-conversion readers, converting from the provided - // non-UTF-8 charset into UTF-8. If CharsetReader is nil or - // returns an error, parsing stops with an error. One of the - // the CharsetReader's result values must be non-nil. - CharsetReader func(charset string, input io.Reader) (io.Reader, error) - - r io.ByteReader - buf bytes.Buffer - saved *bytes.Buffer - stk *stack - free *stack - needClose bool - toClose Name - nextToken Token - nextByte int - ns map[string]string - err error - line int - tmp [32]byte -} - -// NewParser creates a new XML parser reading from r. -func NewParser(r io.Reader) *Parser { - p := &Parser{ - ns: make(map[string]string), - nextByte: -1, - line: 1, - Strict: true, - } - p.switchToReader(r) - return p -} - -// Token returns the next XML token in the input stream. -// At the end of the input stream, Token returns nil, os.EOF. -// -// Slices of bytes in the returned token data refer to the -// parser's internal buffer and remain valid only until the next -// call to Token. To acquire a copy of the bytes, call CopyToken -// or the token's Copy method. -// -// Token expands self-closing elements such as <br/> -// into separate start and end elements returned by successive calls. -// -// Token guarantees that the StartElement and EndElement -// tokens it returns are properly nested and matched: -// if Token encounters an unexpected end element, -// it will return an error. -// -// Token implements XML name spaces as described by -// http://www.w3.org/TR/REC-xml-names/. Each of the -// Name structures contained in the Token has the Space -// set to the URL identifying its name space when known. -// If Token encounters an unrecognized name space prefix, -// it uses the prefix as the Space rather than report an error. -func (p *Parser) Token() (t Token, err error) { - if p.nextToken != nil { - t = p.nextToken - p.nextToken = nil - } else if t, err = p.RawToken(); err != nil { - return - } - - if !p.Strict { - if t1, ok := p.autoClose(t); ok { - p.nextToken = t - t = t1 - } - } - switch t1 := t.(type) { - case StartElement: - // In XML name spaces, the translations listed in the - // attributes apply to the element name and - // to the other attribute names, so process - // the translations first. - for _, a := range t1.Attr { - if a.Name.Space == "xmlns" { - v, ok := p.ns[a.Name.Local] - p.pushNs(a.Name.Local, v, ok) - p.ns[a.Name.Local] = a.Value - } - if a.Name.Space == "" && a.Name.Local == "xmlns" { - // Default space for untagged names - v, ok := p.ns[""] - p.pushNs("", v, ok) - p.ns[""] = a.Value - } - } - - p.translate(&t1.Name, true) - for i := range t1.Attr { - p.translate(&t1.Attr[i].Name, false) - } - p.pushElement(t1.Name) - t = t1 - - case EndElement: - p.translate(&t1.Name, true) - if !p.popElement(&t1) { - return nil, p.err - } - t = t1 - } - return -} - -// Apply name space translation to name n. -// The default name space (for Space=="") -// applies only to element names, not to attribute names. -func (p *Parser) translate(n *Name, isElementName bool) { - switch { - case n.Space == "xmlns": - return - case n.Space == "" && !isElementName: - return - case n.Space == "" && n.Local == "xmlns": - return - } - if v, ok := p.ns[n.Space]; ok { - n.Space = v - } -} - -func (p *Parser) switchToReader(r io.Reader) { - // Get efficient byte at a time reader. - // Assume that if reader has its own - // ReadByte, it's efficient enough. - // Otherwise, use bufio. - if rb, ok := r.(io.ByteReader); ok { - p.r = rb - } else { - p.r = bufio.NewReader(r) - } -} - -// Parsing state - stack holds old name space translations -// and the current set of open elements. The translations to pop when -// ending a given tag are *below* it on the stack, which is -// more work but forced on us by XML. -type stack struct { - next *stack - kind int - name Name - ok bool -} - -const ( - stkStart = iota - stkNs -) - -func (p *Parser) push(kind int) *stack { - s := p.free - if s != nil { - p.free = s.next - } else { - s = new(stack) - } - s.next = p.stk - s.kind = kind - p.stk = s - return s -} - -func (p *Parser) pop() *stack { - s := p.stk - if s != nil { - p.stk = s.next - s.next = p.free - p.free = s - } - return s -} - -// Record that we are starting an element with the given name. -func (p *Parser) pushElement(name Name) { - s := p.push(stkStart) - s.name = name -} - -// Record that we are changing the value of ns[local]. -// The old value is url, ok. -func (p *Parser) pushNs(local string, url string, ok bool) { - s := p.push(stkNs) - s.name.Local = local - s.name.Space = url - s.ok = ok -} - -// Creates a SyntaxError with the current line number. -func (p *Parser) syntaxError(msg string) error { - return &SyntaxError{Msg: msg, Line: p.line} -} - -// Record that we are ending an element with the given name. -// The name must match the record at the top of the stack, -// which must be a pushElement record. -// After popping the element, apply any undo records from -// the stack to restore the name translations that existed -// before we saw this element. -func (p *Parser) popElement(t *EndElement) bool { - s := p.pop() - name := t.Name - switch { - case s == nil || s.kind != stkStart: - p.err = p.syntaxError("unexpected end element </" + name.Local + ">") - return false - case s.name.Local != name.Local: - if !p.Strict { - p.needClose = true - p.toClose = t.Name - t.Name = s.name - return true - } - p.err = p.syntaxError("element <" + s.name.Local + "> closed by </" + name.Local + ">") - return false - case s.name.Space != name.Space: - p.err = p.syntaxError("element <" + s.name.Local + "> in space " + s.name.Space + - "closed by </" + name.Local + "> in space " + name.Space) - return false - } - - // Pop stack until a Start is on the top, undoing the - // translations that were associated with the element we just closed. - for p.stk != nil && p.stk.kind != stkStart { - s := p.pop() - if s.ok { - p.ns[s.name.Local] = s.name.Space - } else { - delete(p.ns, s.name.Local) - } - } - - return true -} - -// If the top element on the stack is autoclosing and -// t is not the end tag, invent the end tag. -func (p *Parser) autoClose(t Token) (Token, bool) { - if p.stk == nil || p.stk.kind != stkStart { - return nil, false - } - name := strings.ToLower(p.stk.name.Local) - for _, s := range p.AutoClose { - if strings.ToLower(s) == name { - // This one should be auto closed if t doesn't close it. - et, ok := t.(EndElement) - if !ok || et.Name.Local != name { - return EndElement{p.stk.name}, true - } - break - } - } - return nil, false -} - -// RawToken is like Token but does not verify that -// start and end elements match and does not translate -// name space prefixes to their corresponding URLs. -func (p *Parser) RawToken() (Token, error) { - if p.err != nil { - return nil, p.err - } - if p.needClose { - // The last element we read was self-closing and - // we returned just the StartElement half. - // Return the EndElement half now. - p.needClose = false - return EndElement{p.toClose}, nil - } - - b, ok := p.getc() - if !ok { - return nil, p.err - } - - if b != '<' { - // Text section. - p.ungetc(b) - data := p.text(-1, false) - if data == nil { - return nil, p.err - } - return CharData(data), nil - } - - if b, ok = p.mustgetc(); !ok { - return nil, p.err - } - switch b { - case '/': - // </: End element - var name Name - if name, ok = p.nsname(); !ok { - if p.err == nil { - p.err = p.syntaxError("expected element name after </") - } - return nil, p.err - } - p.space() - if b, ok = p.mustgetc(); !ok { - return nil, p.err - } - if b != '>' { - p.err = p.syntaxError("invalid characters between </" + name.Local + " and >") - return nil, p.err - } - return EndElement{name}, nil - - case '?': - // <?: Processing instruction. - // TODO(rsc): Should parse the <?xml declaration to make sure - // the version is 1.0 and the encoding is UTF-8. - var target string - if target, ok = p.name(); !ok { - if p.err == nil { - p.err = p.syntaxError("expected target name after <?") - } - return nil, p.err - } - p.space() - p.buf.Reset() - var b0 byte - for { - if b, ok = p.mustgetc(); !ok { - return nil, p.err - } - p.buf.WriteByte(b) - if b0 == '?' && b == '>' { - break - } - b0 = b - } - data := p.buf.Bytes() - data = data[0 : len(data)-2] // chop ?> - - if target == "xml" { - enc := procInstEncoding(string(data)) - if enc != "" && enc != "utf-8" && enc != "UTF-8" { - if p.CharsetReader == nil { - p.err = fmt.Errorf("xml: encoding %q declared but Parser.CharsetReader is nil", enc) - return nil, p.err - } - newr, err := p.CharsetReader(enc, p.r.(io.Reader)) - if err != nil { - p.err = fmt.Errorf("xml: opening charset %q: %v", enc, err) - return nil, p.err - } - if newr == nil { - panic("CharsetReader returned a nil Reader for charset " + enc) - } - p.switchToReader(newr) - } - } - return ProcInst{target, data}, nil - - case '!': - // <!: Maybe comment, maybe CDATA. - if b, ok = p.mustgetc(); !ok { - return nil, p.err - } - switch b { - case '-': // <!- - // Probably <!-- for a comment. - if b, ok = p.mustgetc(); !ok { - return nil, p.err - } - if b != '-' { - p.err = p.syntaxError("invalid sequence <!- not part of <!--") - return nil, p.err - } - // Look for terminator. - p.buf.Reset() - var b0, b1 byte - for { - if b, ok = p.mustgetc(); !ok { - return nil, p.err - } - p.buf.WriteByte(b) - if b0 == '-' && b1 == '-' && b == '>' { - break - } - b0, b1 = b1, b - } - data := p.buf.Bytes() - data = data[0 : len(data)-3] // chop --> - return Comment(data), nil - - case '[': // <![ - // Probably <![CDATA[. - for i := 0; i < 6; i++ { - if b, ok = p.mustgetc(); !ok { - return nil, p.err - } - if b != "CDATA["[i] { - p.err = p.syntaxError("invalid <![ sequence") - return nil, p.err - } - } - // Have <![CDATA[. Read text until ]]>. - data := p.text(-1, true) - if data == nil { - return nil, p.err - } - return CharData(data), nil - } - - // Probably a directive: <!DOCTYPE ...>, <!ENTITY ...>, etc. - // We don't care, but accumulate for caller. Quoted angle - // brackets do not count for nesting. - p.buf.Reset() - p.buf.WriteByte(b) - inquote := uint8(0) - depth := 0 - for { - if b, ok = p.mustgetc(); !ok { - return nil, p.err - } - if inquote == 0 && b == '>' && depth == 0 { - break - } - p.buf.WriteByte(b) - switch { - case b == inquote: - inquote = 0 - - case inquote != 0: - // in quotes, no special action - - case b == '\'' || b == '"': - inquote = b - - case b == '>' && inquote == 0: - depth-- - - case b == '<' && inquote == 0: - depth++ - } - } - return Directive(p.buf.Bytes()), nil - } - - // Must be an open element like <a href="foo"> - p.ungetc(b) - - var ( - name Name - empty bool - attr []Attr - ) - if name, ok = p.nsname(); !ok { - if p.err == nil { - p.err = p.syntaxError("expected element name after <") - } - return nil, p.err - } - - attr = make([]Attr, 0, 4) - for { - p.space() - if b, ok = p.mustgetc(); !ok { - return nil, p.err - } - if b == '/' { - empty = true - if b, ok = p.mustgetc(); !ok { - return nil, p.err - } - if b != '>' { - p.err = p.syntaxError("expected /> in element") - return nil, p.err - } - break - } - if b == '>' { - break - } - p.ungetc(b) - - n := len(attr) - if n >= cap(attr) { - nattr := make([]Attr, n, 2*cap(attr)) - copy(nattr, attr) - attr = nattr - } - attr = attr[0 : n+1] - a := &attr[n] - if a.Name, ok = p.nsname(); !ok { - if p.err == nil { - p.err = p.syntaxError("expected attribute name in element") - } - return nil, p.err - } - p.space() - if b, ok = p.mustgetc(); !ok { - return nil, p.err - } - if b != '=' { - if p.Strict { - p.err = p.syntaxError("attribute name without = in element") - return nil, p.err - } else { - p.ungetc(b) - a.Value = a.Name.Local - } - } else { - p.space() - data := p.attrval() - if data == nil { - return nil, p.err - } - a.Value = string(data) - } - } - if empty { - p.needClose = true - p.toClose = name - } - return StartElement{name, attr}, nil -} - -func (p *Parser) attrval() []byte { - b, ok := p.mustgetc() - if !ok { - return nil - } - // Handle quoted attribute values - if b == '"' || b == '\'' { - return p.text(int(b), false) - } - // Handle unquoted attribute values for strict parsers - if p.Strict { - p.err = p.syntaxError("unquoted or missing attribute value in element") - return nil - } - // Handle unquoted attribute values for unstrict parsers - p.ungetc(b) - p.buf.Reset() - for { - b, ok = p.mustgetc() - if !ok { - return nil - } - // http://www.w3.org/TR/REC-html40/intro/sgmltut.html#h-3.2.2 - if 'a' <= b && b <= 'z' || 'A' <= b && b <= 'Z' || - '0' <= b && b <= '9' || b == '_' || b == ':' || b == '-' { - p.buf.WriteByte(b) - } else { - p.ungetc(b) - break - } - } - return p.buf.Bytes() -} - -// Skip spaces if any -func (p *Parser) space() { - for { - b, ok := p.getc() - if !ok { - return - } - switch b { - case ' ', '\r', '\n', '\t': - default: - p.ungetc(b) - return - } - } -} - -// Read a single byte. -// If there is no byte to read, return ok==false -// and leave the error in p.err. -// Maintain line number. -func (p *Parser) getc() (b byte, ok bool) { - if p.err != nil { - return 0, false - } - if p.nextByte >= 0 { - b = byte(p.nextByte) - p.nextByte = -1 - } else { - b, p.err = p.r.ReadByte() - if p.err != nil { - return 0, false - } - if p.saved != nil { - p.saved.WriteByte(b) - } - } - if b == '\n' { - p.line++ - } - return b, true -} - -// Return saved offset. -// If we did ungetc (nextByte >= 0), have to back up one. -func (p *Parser) savedOffset() int { - n := p.saved.Len() - if p.nextByte >= 0 { - n-- - } - return n -} - -// Must read a single byte. -// If there is no byte to read, -// set p.err to SyntaxError("unexpected EOF") -// and return ok==false -func (p *Parser) mustgetc() (b byte, ok bool) { - if b, ok = p.getc(); !ok { - if p.err == io.EOF { - p.err = p.syntaxError("unexpected EOF") - } - } - return -} - -// Unread a single byte. -func (p *Parser) ungetc(b byte) { - if b == '\n' { - p.line-- - } - p.nextByte = int(b) -} - -var entity = map[string]int{ - "lt": '<', - "gt": '>', - "amp": '&', - "apos": '\'', - "quot": '"', -} - -// Read plain text section (XML calls it character data). -// If quote >= 0, we are in a quoted string and need to find the matching quote. -// If cdata == true, we are in a <![CDATA[ section and need to find ]]>. -// On failure return nil and leave the error in p.err. -func (p *Parser) text(quote int, cdata bool) []byte { - var b0, b1 byte - var trunc int - p.buf.Reset() -Input: - for { - b, ok := p.getc() - if !ok { - if cdata { - if p.err == io.EOF { - p.err = p.syntaxError("unexpected EOF in CDATA section") - } - return nil - } - break Input - } - - // <![CDATA[ section ends with ]]>. - // It is an error for ]]> to appear in ordinary text. - if b0 == ']' && b1 == ']' && b == '>' { - if cdata { - trunc = 2 - break Input - } - p.err = p.syntaxError("unescaped ]]> not in CDATA section") - return nil - } - - // Stop reading text if we see a <. - if b == '<' && !cdata { - if quote >= 0 { - p.err = p.syntaxError("unescaped < inside quoted string") - return nil - } - p.ungetc('<') - break Input - } - if quote >= 0 && b == byte(quote) { - break Input - } - if b == '&' && !cdata { - // Read escaped character expression up to semicolon. - // XML in all its glory allows a document to define and use - // its own character names with <!ENTITY ...> directives. - // Parsers are required to recognize lt, gt, amp, apos, and quot - // even if they have not been declared. That's all we allow. - var i int - for i = 0; i < len(p.tmp); i++ { - var ok bool - p.tmp[i], ok = p.getc() - if !ok { - if p.err == io.EOF { - p.err = p.syntaxError("unexpected EOF") - } - return nil - } - c := p.tmp[i] - if c == ';' { - break - } - if 'a' <= c && c <= 'z' || - 'A' <= c && c <= 'Z' || - '0' <= c && c <= '9' || - c == '_' || c == '#' { - continue - } - p.ungetc(c) - break - } - s := string(p.tmp[0:i]) - if i >= len(p.tmp) { - if !p.Strict { - b0, b1 = 0, 0 - p.buf.WriteByte('&') - p.buf.Write(p.tmp[0:i]) - continue Input - } - p.err = p.syntaxError("character entity expression &" + s + "... too long") - return nil - } - var haveText bool - var text string - if i >= 2 && s[0] == '#' { - var n uint64 - var err error - if i >= 3 && s[1] == 'x' { - n, err = strconv.Btoui64(s[2:], 16) - } else { - n, err = strconv.Btoui64(s[1:], 10) - } - if err == nil && n <= unicode.MaxRune { - text = string(n) - haveText = true - } - } else { - if r, ok := entity[s]; ok { - text = string(r) - haveText = true - } else if p.Entity != nil { - text, haveText = p.Entity[s] - } - } - if !haveText { - if !p.Strict { - b0, b1 = 0, 0 - p.buf.WriteByte('&') - p.buf.Write(p.tmp[0:i]) - continue Input - } - p.err = p.syntaxError("invalid character entity &" + s + ";") - return nil - } - p.buf.Write([]byte(text)) - b0, b1 = 0, 0 - continue Input - } - p.buf.WriteByte(b) - b0, b1 = b1, b - } - data := p.buf.Bytes() - data = data[0 : len(data)-trunc] - - // Inspect each rune for being a disallowed character. - buf := data - for len(buf) > 0 { - r, size := utf8.DecodeRune(buf) - if r == utf8.RuneError && size == 1 { - p.err = p.syntaxError("invalid UTF-8") - return nil - } - buf = buf[size:] - if !isInCharacterRange(r) { - p.err = p.syntaxError(fmt.Sprintf("illegal character code %U", r)) - return nil - } - } - - // Must rewrite \r and \r\n into \n. - w := 0 - for r := 0; r < len(data); r++ { - b := data[r] - if b == '\r' { - if r+1 < len(data) && data[r+1] == '\n' { - continue - } - b = '\n' - } - data[w] = b - w++ - } - return data[0:w] -} - -// Decide whether the given rune is in the XML Character Range, per -// the Char production of http://www.xml.com/axml/testaxml.htm, -// Section 2.2 Characters. -func isInCharacterRange(r rune) (inrange bool) { - return r == 0x09 || - r == 0x0A || - r == 0x0D || - r >= 0x20 && r <= 0xDF77 || - r >= 0xE000 && r <= 0xFFFD || - r >= 0x10000 && r <= 0x10FFFF -} - -// Get name space name: name with a : stuck in the middle. -// The part before the : is the name space identifier. -func (p *Parser) nsname() (name Name, ok bool) { - s, ok := p.name() - if !ok { - return - } - i := strings.Index(s, ":") - if i < 0 { - name.Local = s - } else { - name.Space = s[0:i] - name.Local = s[i+1:] - } - return name, true -} - -// Get name: /first(first|second)*/ -// Do not set p.err if the name is missing (unless unexpected EOF is received): -// let the caller provide better context. -func (p *Parser) name() (s string, ok bool) { - var b byte - if b, ok = p.mustgetc(); !ok { - return - } - - // As a first approximation, we gather the bytes [A-Za-z_:.-\x80-\xFF]* - if b < utf8.RuneSelf && !isNameByte(b) { - p.ungetc(b) - return "", false - } - p.buf.Reset() - p.buf.WriteByte(b) - for { - if b, ok = p.mustgetc(); !ok { - return - } - if b < utf8.RuneSelf && !isNameByte(b) { - p.ungetc(b) - break - } - p.buf.WriteByte(b) - } - - // Then we check the characters. - s = p.buf.String() - for i, c := range s { - if !unicode.Is(first, c) && (i == 0 || !unicode.Is(second, c)) { - p.err = p.syntaxError("invalid XML name: " + s) - return "", false - } - } - return s, true -} - -func isNameByte(c byte) bool { - return 'A' <= c && c <= 'Z' || - 'a' <= c && c <= 'z' || - '0' <= c && c <= '9' || - c == '_' || c == ':' || c == '.' || c == '-' -} - -// These tables were generated by cut and paste from Appendix B of -// the XML spec at http://www.xml.com/axml/testaxml.htm -// and then reformatting. First corresponds to (Letter | '_' | ':') -// and second corresponds to NameChar. - -var first = &unicode.RangeTable{ - R16: []unicode.Range16{ - {0x003A, 0x003A, 1}, - {0x0041, 0x005A, 1}, - {0x005F, 0x005F, 1}, - {0x0061, 0x007A, 1}, - {0x00C0, 0x00D6, 1}, - {0x00D8, 0x00F6, 1}, - {0x00F8, 0x00FF, 1}, - {0x0100, 0x0131, 1}, - {0x0134, 0x013E, 1}, - {0x0141, 0x0148, 1}, - {0x014A, 0x017E, 1}, - {0x0180, 0x01C3, 1}, - {0x01CD, 0x01F0, 1}, - {0x01F4, 0x01F5, 1}, - {0x01FA, 0x0217, 1}, - {0x0250, 0x02A8, 1}, - {0x02BB, 0x02C1, 1}, - {0x0386, 0x0386, 1}, - {0x0388, 0x038A, 1}, - {0x038C, 0x038C, 1}, - {0x038E, 0x03A1, 1}, - {0x03A3, 0x03CE, 1}, - {0x03D0, 0x03D6, 1}, - {0x03DA, 0x03E0, 2}, - {0x03E2, 0x03F3, 1}, - {0x0401, 0x040C, 1}, - {0x040E, 0x044F, 1}, - {0x0451, 0x045C, 1}, - {0x045E, 0x0481, 1}, - {0x0490, 0x04C4, 1}, - {0x04C7, 0x04C8, 1}, - {0x04CB, 0x04CC, 1}, - {0x04D0, 0x04EB, 1}, - {0x04EE, 0x04F5, 1}, - {0x04F8, 0x04F9, 1}, - {0x0531, 0x0556, 1}, - {0x0559, 0x0559, 1}, - {0x0561, 0x0586, 1}, - {0x05D0, 0x05EA, 1}, - {0x05F0, 0x05F2, 1}, - {0x0621, 0x063A, 1}, - {0x0641, 0x064A, 1}, - {0x0671, 0x06B7, 1}, - {0x06BA, 0x06BE, 1}, - {0x06C0, 0x06CE, 1}, - {0x06D0, 0x06D3, 1}, - {0x06D5, 0x06D5, 1}, - {0x06E5, 0x06E6, 1}, - {0x0905, 0x0939, 1}, - {0x093D, 0x093D, 1}, - {0x0958, 0x0961, 1}, - {0x0985, 0x098C, 1}, - {0x098F, 0x0990, 1}, - {0x0993, 0x09A8, 1}, - {0x09AA, 0x09B0, 1}, - {0x09B2, 0x09B2, 1}, - {0x09B6, 0x09B9, 1}, - {0x09DC, 0x09DD, 1}, - {0x09DF, 0x09E1, 1}, - {0x09F0, 0x09F1, 1}, - {0x0A05, 0x0A0A, 1}, - {0x0A0F, 0x0A10, 1}, - {0x0A13, 0x0A28, 1}, - {0x0A2A, 0x0A30, 1}, - {0x0A32, 0x0A33, 1}, - {0x0A35, 0x0A36, 1}, - {0x0A38, 0x0A39, 1}, - {0x0A59, 0x0A5C, 1}, - {0x0A5E, 0x0A5E, 1}, - {0x0A72, 0x0A74, 1}, - {0x0A85, 0x0A8B, 1}, - {0x0A8D, 0x0A8D, 1}, - {0x0A8F, 0x0A91, 1}, - {0x0A93, 0x0AA8, 1}, - {0x0AAA, 0x0AB0, 1}, - {0x0AB2, 0x0AB3, 1}, - {0x0AB5, 0x0AB9, 1}, - {0x0ABD, 0x0AE0, 0x23}, - {0x0B05, 0x0B0C, 1}, - {0x0B0F, 0x0B10, 1}, - {0x0B13, 0x0B28, 1}, - {0x0B2A, 0x0B30, 1}, - {0x0B32, 0x0B33, 1}, - {0x0B36, 0x0B39, 1}, - {0x0B3D, 0x0B3D, 1}, - {0x0B5C, 0x0B5D, 1}, - {0x0B5F, 0x0B61, 1}, - {0x0B85, 0x0B8A, 1}, - {0x0B8E, 0x0B90, 1}, - {0x0B92, 0x0B95, 1}, - {0x0B99, 0x0B9A, 1}, - {0x0B9C, 0x0B9C, 1}, - {0x0B9E, 0x0B9F, 1}, - {0x0BA3, 0x0BA4, 1}, - {0x0BA8, 0x0BAA, 1}, - {0x0BAE, 0x0BB5, 1}, - {0x0BB7, 0x0BB9, 1}, - {0x0C05, 0x0C0C, 1}, - {0x0C0E, 0x0C10, 1}, - {0x0C12, 0x0C28, 1}, - {0x0C2A, 0x0C33, 1}, - {0x0C35, 0x0C39, 1}, - {0x0C60, 0x0C61, 1}, - {0x0C85, 0x0C8C, 1}, - {0x0C8E, 0x0C90, 1}, - {0x0C92, 0x0CA8, 1}, - {0x0CAA, 0x0CB3, 1}, - {0x0CB5, 0x0CB9, 1}, - {0x0CDE, 0x0CDE, 1}, - {0x0CE0, 0x0CE1, 1}, - {0x0D05, 0x0D0C, 1}, - {0x0D0E, 0x0D10, 1}, - {0x0D12, 0x0D28, 1}, - {0x0D2A, 0x0D39, 1}, - {0x0D60, 0x0D61, 1}, - {0x0E01, 0x0E2E, 1}, - {0x0E30, 0x0E30, 1}, - {0x0E32, 0x0E33, 1}, - {0x0E40, 0x0E45, 1}, - {0x0E81, 0x0E82, 1}, - {0x0E84, 0x0E84, 1}, - {0x0E87, 0x0E88, 1}, - {0x0E8A, 0x0E8D, 3}, - {0x0E94, 0x0E97, 1}, - {0x0E99, 0x0E9F, 1}, - {0x0EA1, 0x0EA3, 1}, - {0x0EA5, 0x0EA7, 2}, - {0x0EAA, 0x0EAB, 1}, - {0x0EAD, 0x0EAE, 1}, - {0x0EB0, 0x0EB0, 1}, - {0x0EB2, 0x0EB3, 1}, - {0x0EBD, 0x0EBD, 1}, - {0x0EC0, 0x0EC4, 1}, - {0x0F40, 0x0F47, 1}, - {0x0F49, 0x0F69, 1}, - {0x10A0, 0x10C5, 1}, - {0x10D0, 0x10F6, 1}, - {0x1100, 0x1100, 1}, - {0x1102, 0x1103, 1}, - {0x1105, 0x1107, 1}, - {0x1109, 0x1109, 1}, - {0x110B, 0x110C, 1}, - {0x110E, 0x1112, 1}, - {0x113C, 0x1140, 2}, - {0x114C, 0x1150, 2}, - {0x1154, 0x1155, 1}, - {0x1159, 0x1159, 1}, - {0x115F, 0x1161, 1}, - {0x1163, 0x1169, 2}, - {0x116D, 0x116E, 1}, - {0x1172, 0x1173, 1}, - {0x1175, 0x119E, 0x119E - 0x1175}, - {0x11A8, 0x11AB, 0x11AB - 0x11A8}, - {0x11AE, 0x11AF, 1}, - {0x11B7, 0x11B8, 1}, - {0x11BA, 0x11BA, 1}, - {0x11BC, 0x11C2, 1}, - {0x11EB, 0x11F0, 0x11F0 - 0x11EB}, - {0x11F9, 0x11F9, 1}, - {0x1E00, 0x1E9B, 1}, - {0x1EA0, 0x1EF9, 1}, - {0x1F00, 0x1F15, 1}, - {0x1F18, 0x1F1D, 1}, - {0x1F20, 0x1F45, 1}, - {0x1F48, 0x1F4D, 1}, - {0x1F50, 0x1F57, 1}, - {0x1F59, 0x1F5B, 0x1F5B - 0x1F59}, - {0x1F5D, 0x1F5D, 1}, - {0x1F5F, 0x1F7D, 1}, - {0x1F80, 0x1FB4, 1}, - {0x1FB6, 0x1FBC, 1}, - {0x1FBE, 0x1FBE, 1}, - {0x1FC2, 0x1FC4, 1}, - {0x1FC6, 0x1FCC, 1}, - {0x1FD0, 0x1FD3, 1}, - {0x1FD6, 0x1FDB, 1}, - {0x1FE0, 0x1FEC, 1}, - {0x1FF2, 0x1FF4, 1}, - {0x1FF6, 0x1FFC, 1}, - {0x2126, 0x2126, 1}, - {0x212A, 0x212B, 1}, - {0x212E, 0x212E, 1}, - {0x2180, 0x2182, 1}, - {0x3007, 0x3007, 1}, - {0x3021, 0x3029, 1}, - {0x3041, 0x3094, 1}, - {0x30A1, 0x30FA, 1}, - {0x3105, 0x312C, 1}, - {0x4E00, 0x9FA5, 1}, - {0xAC00, 0xD7A3, 1}, - }, -} - -var second = &unicode.RangeTable{ - R16: []unicode.Range16{ - {0x002D, 0x002E, 1}, - {0x0030, 0x0039, 1}, - {0x00B7, 0x00B7, 1}, - {0x02D0, 0x02D1, 1}, - {0x0300, 0x0345, 1}, - {0x0360, 0x0361, 1}, - {0x0387, 0x0387, 1}, - {0x0483, 0x0486, 1}, - {0x0591, 0x05A1, 1}, - {0x05A3, 0x05B9, 1}, - {0x05BB, 0x05BD, 1}, - {0x05BF, 0x05BF, 1}, - {0x05C1, 0x05C2, 1}, - {0x05C4, 0x0640, 0x0640 - 0x05C4}, - {0x064B, 0x0652, 1}, - {0x0660, 0x0669, 1}, - {0x0670, 0x0670, 1}, - {0x06D6, 0x06DC, 1}, - {0x06DD, 0x06DF, 1}, - {0x06E0, 0x06E4, 1}, - {0x06E7, 0x06E8, 1}, - {0x06EA, 0x06ED, 1}, - {0x06F0, 0x06F9, 1}, - {0x0901, 0x0903, 1}, - {0x093C, 0x093C, 1}, - {0x093E, 0x094C, 1}, - {0x094D, 0x094D, 1}, - {0x0951, 0x0954, 1}, - {0x0962, 0x0963, 1}, - {0x0966, 0x096F, 1}, - {0x0981, 0x0983, 1}, - {0x09BC, 0x09BC, 1}, - {0x09BE, 0x09BF, 1}, - {0x09C0, 0x09C4, 1}, - {0x09C7, 0x09C8, 1}, - {0x09CB, 0x09CD, 1}, - {0x09D7, 0x09D7, 1}, - {0x09E2, 0x09E3, 1}, - {0x09E6, 0x09EF, 1}, - {0x0A02, 0x0A3C, 0x3A}, - {0x0A3E, 0x0A3F, 1}, - {0x0A40, 0x0A42, 1}, - {0x0A47, 0x0A48, 1}, - {0x0A4B, 0x0A4D, 1}, - {0x0A66, 0x0A6F, 1}, - {0x0A70, 0x0A71, 1}, - {0x0A81, 0x0A83, 1}, - {0x0ABC, 0x0ABC, 1}, - {0x0ABE, 0x0AC5, 1}, - {0x0AC7, 0x0AC9, 1}, - {0x0ACB, 0x0ACD, 1}, - {0x0AE6, 0x0AEF, 1}, - {0x0B01, 0x0B03, 1}, - {0x0B3C, 0x0B3C, 1}, - {0x0B3E, 0x0B43, 1}, - {0x0B47, 0x0B48, 1}, - {0x0B4B, 0x0B4D, 1}, - {0x0B56, 0x0B57, 1}, - {0x0B66, 0x0B6F, 1}, - {0x0B82, 0x0B83, 1}, - {0x0BBE, 0x0BC2, 1}, - {0x0BC6, 0x0BC8, 1}, - {0x0BCA, 0x0BCD, 1}, - {0x0BD7, 0x0BD7, 1}, - {0x0BE7, 0x0BEF, 1}, - {0x0C01, 0x0C03, 1}, - {0x0C3E, 0x0C44, 1}, - {0x0C46, 0x0C48, 1}, - {0x0C4A, 0x0C4D, 1}, - {0x0C55, 0x0C56, 1}, - {0x0C66, 0x0C6F, 1}, - {0x0C82, 0x0C83, 1}, - {0x0CBE, 0x0CC4, 1}, - {0x0CC6, 0x0CC8, 1}, - {0x0CCA, 0x0CCD, 1}, - {0x0CD5, 0x0CD6, 1}, - {0x0CE6, 0x0CEF, 1}, - {0x0D02, 0x0D03, 1}, - {0x0D3E, 0x0D43, 1}, - {0x0D46, 0x0D48, 1}, - {0x0D4A, 0x0D4D, 1}, - {0x0D57, 0x0D57, 1}, - {0x0D66, 0x0D6F, 1}, - {0x0E31, 0x0E31, 1}, - {0x0E34, 0x0E3A, 1}, - {0x0E46, 0x0E46, 1}, - {0x0E47, 0x0E4E, 1}, - {0x0E50, 0x0E59, 1}, - {0x0EB1, 0x0EB1, 1}, - {0x0EB4, 0x0EB9, 1}, - {0x0EBB, 0x0EBC, 1}, - {0x0EC6, 0x0EC6, 1}, - {0x0EC8, 0x0ECD, 1}, - {0x0ED0, 0x0ED9, 1}, - {0x0F18, 0x0F19, 1}, - {0x0F20, 0x0F29, 1}, - {0x0F35, 0x0F39, 2}, - {0x0F3E, 0x0F3F, 1}, - {0x0F71, 0x0F84, 1}, - {0x0F86, 0x0F8B, 1}, - {0x0F90, 0x0F95, 1}, - {0x0F97, 0x0F97, 1}, - {0x0F99, 0x0FAD, 1}, - {0x0FB1, 0x0FB7, 1}, - {0x0FB9, 0x0FB9, 1}, - {0x20D0, 0x20DC, 1}, - {0x20E1, 0x3005, 0x3005 - 0x20E1}, - {0x302A, 0x302F, 1}, - {0x3031, 0x3035, 1}, - {0x3099, 0x309A, 1}, - {0x309D, 0x309E, 1}, - {0x30FC, 0x30FE, 1}, - }, -} - -// HTMLEntity is an entity map containing translations for the -// standard HTML entity characters. -var HTMLEntity = htmlEntity - -var htmlEntity = map[string]string{ - /* - hget http://www.w3.org/TR/html4/sgml/entities.html | - ssam ' - ,y /\>/ x/\<(.|\n)+/ s/\n/ /g - ,x v/^\<!ENTITY/d - ,s/\<!ENTITY ([^ ]+) .*U\+([0-9A-F][0-9A-F][0-9A-F][0-9A-F]) .+/ "\1": "\\u\2",/g - ' - */ - "nbsp": "\u00A0", - "iexcl": "\u00A1", - "cent": "\u00A2", - "pound": "\u00A3", - "curren": "\u00A4", - "yen": "\u00A5", - "brvbar": "\u00A6", - "sect": "\u00A7", - "uml": "\u00A8", - "copy": "\u00A9", - "ordf": "\u00AA", - "laquo": "\u00AB", - "not": "\u00AC", - "shy": "\u00AD", - "reg": "\u00AE", - "macr": "\u00AF", - "deg": "\u00B0", - "plusmn": "\u00B1", - "sup2": "\u00B2", - "sup3": "\u00B3", - "acute": "\u00B4", - "micro": "\u00B5", - "para": "\u00B6", - "middot": "\u00B7", - "cedil": "\u00B8", - "sup1": "\u00B9", - "ordm": "\u00BA", - "raquo": "\u00BB", - "frac14": "\u00BC", - "frac12": "\u00BD", - "frac34": "\u00BE", - "iquest": "\u00BF", - "Agrave": "\u00C0", - "Aacute": "\u00C1", - "Acirc": "\u00C2", - "Atilde": "\u00C3", - "Auml": "\u00C4", - "Aring": "\u00C5", - "AElig": "\u00C6", - "Ccedil": "\u00C7", - "Egrave": "\u00C8", - "Eacute": "\u00C9", - "Ecirc": "\u00CA", - "Euml": "\u00CB", - "Igrave": "\u00CC", - "Iacute": "\u00CD", - "Icirc": "\u00CE", - "Iuml": "\u00CF", - "ETH": "\u00D0", - "Ntilde": "\u00D1", - "Ograve": "\u00D2", - "Oacute": "\u00D3", - "Ocirc": "\u00D4", - "Otilde": "\u00D5", - "Ouml": "\u00D6", - "times": "\u00D7", - "Oslash": "\u00D8", - "Ugrave": "\u00D9", - "Uacute": "\u00DA", - "Ucirc": "\u00DB", - "Uuml": "\u00DC", - "Yacute": "\u00DD", - "THORN": "\u00DE", - "szlig": "\u00DF", - "agrave": "\u00E0", - "aacute": "\u00E1", - "acirc": "\u00E2", - "atilde": "\u00E3", - "auml": "\u00E4", - "aring": "\u00E5", - "aelig": "\u00E6", - "ccedil": "\u00E7", - "egrave": "\u00E8", - "eacute": "\u00E9", - "ecirc": "\u00EA", - "euml": "\u00EB", - "igrave": "\u00EC", - "iacute": "\u00ED", - "icirc": "\u00EE", - "iuml": "\u00EF", - "eth": "\u00F0", - "ntilde": "\u00F1", - "ograve": "\u00F2", - "oacute": "\u00F3", - "ocirc": "\u00F4", - "otilde": "\u00F5", - "ouml": "\u00F6", - "divide": "\u00F7", - "oslash": "\u00F8", - "ugrave": "\u00F9", - "uacute": "\u00FA", - "ucirc": "\u00FB", - "uuml": "\u00FC", - "yacute": "\u00FD", - "thorn": "\u00FE", - "yuml": "\u00FF", - "fnof": "\u0192", - "Alpha": "\u0391", - "Beta": "\u0392", - "Gamma": "\u0393", - "Delta": "\u0394", - "Epsilon": "\u0395", - "Zeta": "\u0396", - "Eta": "\u0397", - "Theta": "\u0398", - "Iota": "\u0399", - "Kappa": "\u039A", - "Lambda": "\u039B", - "Mu": "\u039C", - "Nu": "\u039D", - "Xi": "\u039E", - "Omicron": "\u039F", - "Pi": "\u03A0", - "Rho": "\u03A1", - "Sigma": "\u03A3", - "Tau": "\u03A4", - "Upsilon": "\u03A5", - "Phi": "\u03A6", - "Chi": "\u03A7", - "Psi": "\u03A8", - "Omega": "\u03A9", - "alpha": "\u03B1", - "beta": "\u03B2", - "gamma": "\u03B3", - "delta": "\u03B4", - "epsilon": "\u03B5", - "zeta": "\u03B6", - "eta": "\u03B7", - "theta": "\u03B8", - "iota": "\u03B9", - "kappa": "\u03BA", - "lambda": "\u03BB", - "mu": "\u03BC", - "nu": "\u03BD", - "xi": "\u03BE", - "omicron": "\u03BF", - "pi": "\u03C0", - "rho": "\u03C1", - "sigmaf": "\u03C2", - "sigma": "\u03C3", - "tau": "\u03C4", - "upsilon": "\u03C5", - "phi": "\u03C6", - "chi": "\u03C7", - "psi": "\u03C8", - "omega": "\u03C9", - "thetasym": "\u03D1", - "upsih": "\u03D2", - "piv": "\u03D6", - "bull": "\u2022", - "hellip": "\u2026", - "prime": "\u2032", - "Prime": "\u2033", - "oline": "\u203E", - "frasl": "\u2044", - "weierp": "\u2118", - "image": "\u2111", - "real": "\u211C", - "trade": "\u2122", - "alefsym": "\u2135", - "larr": "\u2190", - "uarr": "\u2191", - "rarr": "\u2192", - "darr": "\u2193", - "harr": "\u2194", - "crarr": "\u21B5", - "lArr": "\u21D0", - "uArr": "\u21D1", - "rArr": "\u21D2", - "dArr": "\u21D3", - "hArr": "\u21D4", - "forall": "\u2200", - "part": "\u2202", - "exist": "\u2203", - "empty": "\u2205", - "nabla": "\u2207", - "isin": "\u2208", - "notin": "\u2209", - "ni": "\u220B", - "prod": "\u220F", - "sum": "\u2211", - "minus": "\u2212", - "lowast": "\u2217", - "radic": "\u221A", - "prop": "\u221D", - "infin": "\u221E", - "ang": "\u2220", - "and": "\u2227", - "or": "\u2228", - "cap": "\u2229", - "cup": "\u222A", - "int": "\u222B", - "there4": "\u2234", - "sim": "\u223C", - "cong": "\u2245", - "asymp": "\u2248", - "ne": "\u2260", - "equiv": "\u2261", - "le": "\u2264", - "ge": "\u2265", - "sub": "\u2282", - "sup": "\u2283", - "nsub": "\u2284", - "sube": "\u2286", - "supe": "\u2287", - "oplus": "\u2295", - "otimes": "\u2297", - "perp": "\u22A5", - "sdot": "\u22C5", - "lceil": "\u2308", - "rceil": "\u2309", - "lfloor": "\u230A", - "rfloor": "\u230B", - "lang": "\u2329", - "rang": "\u232A", - "loz": "\u25CA", - "spades": "\u2660", - "clubs": "\u2663", - "hearts": "\u2665", - "diams": "\u2666", - "quot": "\u0022", - "amp": "\u0026", - "lt": "\u003C", - "gt": "\u003E", - "OElig": "\u0152", - "oelig": "\u0153", - "Scaron": "\u0160", - "scaron": "\u0161", - "Yuml": "\u0178", - "circ": "\u02C6", - "tilde": "\u02DC", - "ensp": "\u2002", - "emsp": "\u2003", - "thinsp": "\u2009", - "zwnj": "\u200C", - "zwj": "\u200D", - "lrm": "\u200E", - "rlm": "\u200F", - "ndash": "\u2013", - "mdash": "\u2014", - "lsquo": "\u2018", - "rsquo": "\u2019", - "sbquo": "\u201A", - "ldquo": "\u201C", - "rdquo": "\u201D", - "bdquo": "\u201E", - "dagger": "\u2020", - "Dagger": "\u2021", - "permil": "\u2030", - "lsaquo": "\u2039", - "rsaquo": "\u203A", - "euro": "\u20AC", -} - -// HTMLAutoClose is the set of HTML elements that -// should be considered to close automatically. -var HTMLAutoClose = htmlAutoClose - -var htmlAutoClose = []string{ - /* - hget http://www.w3.org/TR/html4/loose.dtd | - 9 sed -n 's/<!ELEMENT (.*) - O EMPTY.+/ "\1",/p' | tr A-Z a-z - */ - "basefont", - "br", - "area", - "link", - "img", - "param", - "hr", - "input", - "col ", - "frame", - "isindex", - "base", - "meta", -} - -var ( - esc_quot = []byte(""") // shorter than """ - esc_apos = []byte("'") // shorter than "'" - esc_amp = []byte("&") - esc_lt = []byte("<") - esc_gt = []byte(">") -) - -// Escape writes to w the properly escaped XML equivalent -// of the plain text data s. -func Escape(w io.Writer, s []byte) { - var esc []byte - last := 0 - for i, c := range s { - switch c { - case '"': - esc = esc_quot - case '\'': - esc = esc_apos - case '&': - esc = esc_amp - case '<': - esc = esc_lt - case '>': - esc = esc_gt - default: - continue - } - w.Write(s[last:i]) - w.Write(esc) - last = i + 1 - } - w.Write(s[last:]) -} - -// procInstEncoding parses the `encoding="..."` or `encoding='...'` -// value out of the provided string, returning "" if not found. -func procInstEncoding(s string) string { - // TODO: this parsing is somewhat lame and not exact. - // It works for all actual cases, though. - idx := strings.Index(s, "encoding=") - if idx == -1 { - return "" - } - v := s[idx+len("encoding="):] - if v == "" { - return "" - } - if v[0] != '\'' && v[0] != '"' { - return "" - } - idx = strings.IndexRune(v[1:], rune(v[0])) - if idx == -1 { - return "" - } - return v[1 : idx+1] -} diff --git a/libgo/go/xml/xml_test.go b/libgo/go/xml/xml_test.go deleted file mode 100644 index 1b40d0c4..0000000 --- a/libgo/go/xml/xml_test.go +++ /dev/null @@ -1,609 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package xml - -import ( - "bytes" - "io" - "os" - "reflect" - "strings" - "testing" -) - -const testInput = ` -<?xml version="1.0" encoding="UTF-8"?> -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" - "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> -<body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` + - "\r\n\t" + ` > - <hello lang="en">World <>'" 白鵬翔</hello> - <goodbye /> - <outer foo:attr="value" xmlns:tag="ns4"> - <inner/> - </outer> - <tag:name> - <![CDATA[Some text here.]]> - </tag:name> -</body><!-- missing final newline -->` - -var rawTokens = []Token{ - CharData([]byte("\n")), - ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, - CharData([]byte("\n")), - Directive([]byte(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" - "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), - ), - CharData([]byte("\n")), - StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, - CharData([]byte("\n ")), - StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, - CharData([]byte("World <>'\" 白鵬翔")), - EndElement{Name{"", "hello"}}, - CharData([]byte("\n ")), - StartElement{Name{"", "goodbye"}, nil}, - EndElement{Name{"", "goodbye"}}, - CharData([]byte("\n ")), - StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, - CharData([]byte("\n ")), - StartElement{Name{"", "inner"}, nil}, - EndElement{Name{"", "inner"}}, - CharData([]byte("\n ")), - EndElement{Name{"", "outer"}}, - CharData([]byte("\n ")), - StartElement{Name{"tag", "name"}, nil}, - CharData([]byte("\n ")), - CharData([]byte("Some text here.")), - CharData([]byte("\n ")), - EndElement{Name{"tag", "name"}}, - CharData([]byte("\n")), - EndElement{Name{"", "body"}}, - Comment([]byte(" missing final newline ")), -} - -var cookedTokens = []Token{ - CharData([]byte("\n")), - ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, - CharData([]byte("\n")), - Directive([]byte(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" - "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), - ), - CharData([]byte("\n")), - StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, - CharData([]byte("\n ")), - StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, - CharData([]byte("World <>'\" 白鵬翔")), - EndElement{Name{"ns2", "hello"}}, - CharData([]byte("\n ")), - StartElement{Name{"ns2", "goodbye"}, nil}, - EndElement{Name{"ns2", "goodbye"}}, - CharData([]byte("\n ")), - StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, - CharData([]byte("\n ")), - StartElement{Name{"ns2", "inner"}, nil}, - EndElement{Name{"ns2", "inner"}}, - CharData([]byte("\n ")), - EndElement{Name{"ns2", "outer"}}, - CharData([]byte("\n ")), - StartElement{Name{"ns3", "name"}, nil}, - CharData([]byte("\n ")), - CharData([]byte("Some text here.")), - CharData([]byte("\n ")), - EndElement{Name{"ns3", "name"}}, - CharData([]byte("\n")), - EndElement{Name{"ns2", "body"}}, - Comment([]byte(" missing final newline ")), -} - -const testInputAltEncoding = ` -<?xml version="1.0" encoding="x-testing-uppercase"?> -<TAG>VALUE</TAG>` - -var rawTokensAltEncoding = []Token{ - CharData([]byte("\n")), - ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)}, - CharData([]byte("\n")), - StartElement{Name{"", "tag"}, nil}, - CharData([]byte("value")), - EndElement{Name{"", "tag"}}, -} - -var xmlInput = []string{ - // unexpected EOF cases - "<", - "<t", - "<t ", - "<t/", - "<!", - "<!-", - "<!--", - "<!--c-", - "<!--c--", - "<!d", - "<t></", - "<t></t", - "<?", - "<?p", - "<t a", - "<t a=", - "<t a='", - "<t a=''", - "<t/><![", - "<t/><![C", - "<t/><![CDATA[d", - "<t/><![CDATA[d]", - "<t/><![CDATA[d]]", - - // other Syntax errors - "<>", - "<t/a", - "<0 />", - "<?0 >", - // "<!0 >", // let the Token() caller handle - "</0>", - "<t 0=''>", - "<t a='&'>", - "<t a='<'>", - "<t> c;</t>", - "<t a>", - "<t a=>", - "<t a=v>", - // "<![CDATA[d]]>", // let the Token() caller handle - "<t></e>", - "<t></>", - "<t></t!", - "<t>cdata]]></t>", -} - -type stringReader struct { - s string - off int -} - -func (r *stringReader) Read(b []byte) (n int, err error) { - if r.off >= len(r.s) { - return 0, io.EOF - } - for r.off < len(r.s) && n < len(b) { - b[n] = r.s[r.off] - n++ - r.off++ - } - return -} - -func (r *stringReader) ReadByte() (b byte, err error) { - if r.off >= len(r.s) { - return 0, io.EOF - } - b = r.s[r.off] - r.off++ - return -} - -func StringReader(s string) io.Reader { return &stringReader{s, 0} } - -func TestRawToken(t *testing.T) { - p := NewParser(StringReader(testInput)) - testRawToken(t, p, rawTokens) -} - -type downCaser struct { - t *testing.T - r io.ByteReader -} - -func (d *downCaser) ReadByte() (c byte, err error) { - c, err = d.r.ReadByte() - if c >= 'A' && c <= 'Z' { - c += 'a' - 'A' - } - return -} - -func (d *downCaser) Read(p []byte) (int, error) { - d.t.Fatalf("unexpected Read call on downCaser reader") - return 0, os.EINVAL -} - -func TestRawTokenAltEncoding(t *testing.T) { - sawEncoding := "" - p := NewParser(StringReader(testInputAltEncoding)) - p.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { - sawEncoding = charset - if charset != "x-testing-uppercase" { - t.Fatalf("unexpected charset %q", charset) - } - return &downCaser{t, input.(io.ByteReader)}, nil - } - testRawToken(t, p, rawTokensAltEncoding) -} - -func TestRawTokenAltEncodingNoConverter(t *testing.T) { - p := NewParser(StringReader(testInputAltEncoding)) - token, err := p.RawToken() - if token == nil { - t.Fatalf("expected a token on first RawToken call") - } - if err != nil { - t.Fatal(err) - } - token, err = p.RawToken() - if token != nil { - t.Errorf("expected a nil token; got %#v", token) - } - if err == nil { - t.Fatalf("expected an error on second RawToken call") - } - const encoding = "x-testing-uppercase" - if !strings.Contains(err.Error(), encoding) { - t.Errorf("expected error to contain %q; got error: %v", - encoding, err) - } -} - -func testRawToken(t *testing.T, p *Parser, rawTokens []Token) { - for i, want := range rawTokens { - have, err := p.RawToken() - if err != nil { - t.Fatalf("token %d: unexpected error: %s", i, err) - } - if !reflect.DeepEqual(have, want) { - t.Errorf("token %d = %#v want %#v", i, have, want) - } - } -} - -// Ensure that directives (specifically !DOCTYPE) include the complete -// text of any nested directives, noting that < and > do not change -// nesting depth if they are in single or double quotes. - -var nestedDirectivesInput = ` -<!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]> -<!DOCTYPE [<!ENTITY xlt ">">]> -<!DOCTYPE [<!ENTITY xlt "<">]> -<!DOCTYPE [<!ENTITY xlt '>'>]> -<!DOCTYPE [<!ENTITY xlt '<'>]> -<!DOCTYPE [<!ENTITY xlt '">'>]> -<!DOCTYPE [<!ENTITY xlt "'<">]> -` - -var nestedDirectivesTokens = []Token{ - CharData([]byte("\n")), - Directive([]byte(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`)), - CharData([]byte("\n")), - Directive([]byte(`DOCTYPE [<!ENTITY xlt ">">]`)), - CharData([]byte("\n")), - Directive([]byte(`DOCTYPE [<!ENTITY xlt "<">]`)), - CharData([]byte("\n")), - Directive([]byte(`DOCTYPE [<!ENTITY xlt '>'>]`)), - CharData([]byte("\n")), - Directive([]byte(`DOCTYPE [<!ENTITY xlt '<'>]`)), - CharData([]byte("\n")), - Directive([]byte(`DOCTYPE [<!ENTITY xlt '">'>]`)), - CharData([]byte("\n")), - Directive([]byte(`DOCTYPE [<!ENTITY xlt "'<">]`)), - CharData([]byte("\n")), -} - -func TestNestedDirectives(t *testing.T) { - p := NewParser(StringReader(nestedDirectivesInput)) - - for i, want := range nestedDirectivesTokens { - have, err := p.Token() - if err != nil { - t.Fatalf("token %d: unexpected error: %s", i, err) - } - if !reflect.DeepEqual(have, want) { - t.Errorf("token %d = %#v want %#v", i, have, want) - } - } -} - -func TestToken(t *testing.T) { - p := NewParser(StringReader(testInput)) - - for i, want := range cookedTokens { - have, err := p.Token() - if err != nil { - t.Fatalf("token %d: unexpected error: %s", i, err) - } - if !reflect.DeepEqual(have, want) { - t.Errorf("token %d = %#v want %#v", i, have, want) - } - } -} - -func TestSyntax(t *testing.T) { - for i := range xmlInput { - p := NewParser(StringReader(xmlInput[i])) - var err error - for _, err = p.Token(); err == nil; _, err = p.Token() { - } - if _, ok := err.(*SyntaxError); !ok { - t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i]) - } - } -} - -type allScalars struct { - True1 bool - True2 bool - False1 bool - False2 bool - Int int - Int8 int8 - Int16 int16 - Int32 int32 - Int64 int64 - Uint int - Uint8 uint8 - Uint16 uint16 - Uint32 uint32 - Uint64 uint64 - Uintptr uintptr - Float32 float32 - Float64 float64 - String string - PtrString *string -} - -var all = allScalars{ - True1: true, - True2: true, - False1: false, - False2: false, - Int: 1, - Int8: -2, - Int16: 3, - Int32: -4, - Int64: 5, - Uint: 6, - Uint8: 7, - Uint16: 8, - Uint32: 9, - Uint64: 10, - Uintptr: 11, - Float32: 13.0, - Float64: 14.0, - String: "15", - PtrString: &sixteen, -} - -var sixteen = "16" - -const testScalarsInput = `<allscalars> - <true1>true</true1> - <true2>1</true2> - <false1>false</false1> - <false2>0</false2> - <int>1</int> - <int8>-2</int8> - <int16>3</int16> - <int32>-4</int32> - <int64>5</int64> - <uint>6</uint> - <uint8>7</uint8> - <uint16>8</uint16> - <uint32>9</uint32> - <uint64>10</uint64> - <uintptr>11</uintptr> - <float>12.0</float> - <float32>13.0</float32> - <float64>14.0</float64> - <string>15</string> - <ptrstring>16</ptrstring> -</allscalars>` - -func TestAllScalars(t *testing.T) { - var a allScalars - buf := bytes.NewBufferString(testScalarsInput) - err := Unmarshal(buf, &a) - - if err != nil { - t.Fatal(err) - } - if !reflect.DeepEqual(a, all) { - t.Errorf("have %+v want %+v", a, all) - } -} - -type item struct { - Field_a string -} - -func TestIssue569(t *testing.T) { - data := `<item><field_a>abcd</field_a></item>` - var i item - buf := bytes.NewBufferString(data) - err := Unmarshal(buf, &i) - - if err != nil || i.Field_a != "abcd" { - t.Fatal("Expecting abcd") - } -} - -func TestUnquotedAttrs(t *testing.T) { - data := "<tag attr=azAZ09:-_\t>" - p := NewParser(StringReader(data)) - p.Strict = false - token, err := p.Token() - if _, ok := err.(*SyntaxError); ok { - t.Errorf("Unexpected error: %v", err) - } - if token.(StartElement).Name.Local != "tag" { - t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) - } - attr := token.(StartElement).Attr[0] - if attr.Value != "azAZ09:-_" { - t.Errorf("Unexpected attribute value: %v", attr.Value) - } - if attr.Name.Local != "attr" { - t.Errorf("Unexpected attribute name: %v", attr.Name.Local) - } -} - -func TestValuelessAttrs(t *testing.T) { - tests := [][3]string{ - {"<p nowrap>", "p", "nowrap"}, - {"<p nowrap >", "p", "nowrap"}, - {"<input checked/>", "input", "checked"}, - {"<input checked />", "input", "checked"}, - } - for _, test := range tests { - p := NewParser(StringReader(test[0])) - p.Strict = false - token, err := p.Token() - if _, ok := err.(*SyntaxError); ok { - t.Errorf("Unexpected error: %v", err) - } - if token.(StartElement).Name.Local != test[1] { - t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) - } - attr := token.(StartElement).Attr[0] - if attr.Value != test[2] { - t.Errorf("Unexpected attribute value: %v", attr.Value) - } - if attr.Name.Local != test[2] { - t.Errorf("Unexpected attribute name: %v", attr.Name.Local) - } - } -} - -func TestCopyTokenCharData(t *testing.T) { - data := []byte("same data") - var tok1 Token = CharData(data) - tok2 := CopyToken(tok1) - if !reflect.DeepEqual(tok1, tok2) { - t.Error("CopyToken(CharData) != CharData") - } - data[1] = 'o' - if reflect.DeepEqual(tok1, tok2) { - t.Error("CopyToken(CharData) uses same buffer.") - } -} - -func TestCopyTokenStartElement(t *testing.T) { - elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}} - var tok1 Token = elt - tok2 := CopyToken(tok1) - if !reflect.DeepEqual(tok1, tok2) { - t.Error("CopyToken(StartElement) != StartElement") - } - elt.Attr[0] = Attr{Name{"", "lang"}, "de"} - if reflect.DeepEqual(tok1, tok2) { - t.Error("CopyToken(CharData) uses same buffer.") - } -} - -func TestSyntaxErrorLineNum(t *testing.T) { - testInput := "<P>Foo<P>\n\n<P>Bar</>\n" - p := NewParser(StringReader(testInput)) - var err error - for _, err = p.Token(); err == nil; _, err = p.Token() { - } - synerr, ok := err.(*SyntaxError) - if !ok { - t.Error("Expected SyntaxError.") - } - if synerr.Line != 3 { - t.Error("SyntaxError didn't have correct line number.") - } -} - -func TestTrailingRawToken(t *testing.T) { - input := `<FOO></FOO> ` - p := NewParser(StringReader(input)) - var err error - for _, err = p.RawToken(); err == nil; _, err = p.RawToken() { - } - if err != io.EOF { - t.Fatalf("p.RawToken() = _, %v, want _, os.EOF", err) - } -} - -func TestTrailingToken(t *testing.T) { - input := `<FOO></FOO> ` - p := NewParser(StringReader(input)) - var err error - for _, err = p.Token(); err == nil; _, err = p.Token() { - } - if err != io.EOF { - t.Fatalf("p.Token() = _, %v, want _, os.EOF", err) - } -} - -func TestEntityInsideCDATA(t *testing.T) { - input := `<test><![CDATA[ &val=foo ]]></test>` - p := NewParser(StringReader(input)) - var err error - for _, err = p.Token(); err == nil; _, err = p.Token() { - } - if err != io.EOF { - t.Fatalf("p.Token() = _, %v, want _, os.EOF", err) - } -} - -// The last three tests (respectively one for characters in attribute -// names and two for character entities) pass not because of code -// changed for issue 1259, but instead pass with the given messages -// from other parts of xml.Parser. I provide these to note the -// current behavior of situations where one might think that character -// range checking would detect the error, but it does not in fact. - -var characterTests = []struct { - in string - err string -}{ - {"\x12<doc/>", "illegal character code U+0012"}, - {"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"}, - {"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"}, - {"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"}, - {"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"}, - {"<doc>&\x01;</doc>", "invalid character entity &;"}, - {"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &;"}, -} - -func TestDisallowedCharacters(t *testing.T) { - - for i, tt := range characterTests { - p := NewParser(StringReader(tt.in)) - var err error - - for err == nil { - _, err = p.Token() - } - synerr, ok := err.(*SyntaxError) - if !ok { - t.Fatalf("input %d p.Token() = _, %v, want _, *SyntaxError", i, err) - } - if synerr.Msg != tt.err { - t.Fatalf("input %d synerr.Msg wrong: want '%s', got '%s'", i, tt.err, synerr.Msg) - } - } -} - -type procInstEncodingTest struct { - expect, got string -} - -var procInstTests = []struct { - input, expect string -}{ - {`version="1.0" encoding="utf-8"`, "utf-8"}, - {`version="1.0" encoding='utf-8'`, "utf-8"}, - {`version="1.0" encoding='utf-8' `, "utf-8"}, - {`version="1.0" encoding=utf-8`, ""}, - {`encoding="FOO" `, "FOO"}, -} - -func TestProcInstEncoding(t *testing.T) { - for _, test := range procInstTests { - got := procInstEncoding(test.input) - if got != test.expect { - t.Errorf("procInstEncoding(%q) = %q; want %q", test.input, got, test.expect) - } - } -} |