diff options
Diffstat (limited to 'libgo/go/xml')
-rw-r--r-- | libgo/go/xml/read_test.go | 10 | ||||
-rw-r--r-- | libgo/go/xml/xml.go | 27 | ||||
-rw-r--r-- | libgo/go/xml/xml_test.go | 46 |
3 files changed, 74 insertions, 9 deletions
diff --git a/libgo/go/xml/read_test.go b/libgo/go/xml/read_test.go index 71ceddc..a6b9a8e 100644 --- a/libgo/go/xml/read_test.go +++ b/libgo/go/xml/read_test.go @@ -13,16 +13,16 @@ import ( func TestUnmarshalFeed(t *testing.T) { var f Feed - if err := Unmarshal(StringReader(rssFeedString), &f); err != nil { + if err := Unmarshal(StringReader(atomFeedString), &f); err != nil { t.Fatalf("Unmarshal: %s", err) } - if !reflect.DeepEqual(f, rssFeed) { - t.Fatalf("have %#v\nwant %#v", f, rssFeed) + if !reflect.DeepEqual(f, atomFeed) { + t.Fatalf("have %#v\nwant %#v", f, atomFeed) } } // hget http://codereview.appspot.com/rss/mine/rsc -const rssFeedString = ` +const atomFeedString = ` <?xml version="1.0" encoding="utf-8"?> <feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en-us"><title>Code Review - My issues</title><link href="http://codereview.appspot.com/" rel="alternate"></link><li-nk href="http://codereview.appspot.com/rss/mine/rsc" rel="self"></li-nk><id>http://codereview.appspot.com/</id><updated>2009-10-04T01:35:58+00:00</updated><author><name>rietveld<></name></author><entry><title>rietveld: an attempt at pubsubhubbub </title><link hre-f="http://codereview.appspot.com/126085" rel="alternate"></link><updated>2009-10-04T01:35:58+00:00</updated><author><name>email-address-removed</name></author><id>urn:md5:134d9179c41f806be79b3a5f7877d19a</id><summary type="html"> @@ -115,7 +115,7 @@ type Text struct { type Time string -var rssFeed = Feed{ +var atomFeed = Feed{ XMLName: Name{"http://www.w3.org/2005/Atom", "feed"}, Title: "Code Review - My issues", Link: []Link{ diff --git a/libgo/go/xml/xml.go b/libgo/go/xml/xml.go index 4d9c672..691c13a 100644 --- a/libgo/go/xml/xml.go +++ b/libgo/go/xml/xml.go @@ -163,7 +163,7 @@ type Parser struct { // "quot": `"`, Entity map[string]string - r io.ReadByter + r io.ByteReader buf bytes.Buffer saved *bytes.Buffer stk *stack @@ -191,7 +191,7 @@ func NewParser(r io.Reader) *Parser { // Assume that if reader has its own // ReadByte, it's efficient enough. // Otherwise, use bufio. - if rb, ok := r.(io.ReadByter); ok { + if rb, ok := r.(io.ByteReader); ok { p.r = rb } else { p.r = bufio.NewReader(r) @@ -541,17 +541,36 @@ func (p *Parser) RawToken() (Token, os.Error) { } // Probably a directive: <!DOCTYPE ...>, <!ENTITY ...>, etc. - // We don't care, but accumulate for caller. + // We don't care, but accumulate for caller. Quoted angle + // brackets do not count for nesting. p.buf.Reset() p.buf.WriteByte(b) + inquote := uint8(0) + depth := 0 for { if b, ok = p.mustgetc(); !ok { return nil, p.err } - if b == '>' { + if inquote == 0 && b == '>' && depth == 0 { break } p.buf.WriteByte(b) + switch { + case b == inquote: + inquote = 0 + + case inquote != 0: + // in quotes, no special action + + case b == '\'' || b == '"': + inquote = b + + case b == '>' && inquote == 0: + depth-- + + case b == '<' && inquote == 0: + depth++ + } } return Directive(p.buf.Bytes()), nil } diff --git a/libgo/go/xml/xml_test.go b/libgo/go/xml/xml_test.go index 317ecab..887bc3d 100644 --- a/libgo/go/xml/xml_test.go +++ b/libgo/go/xml/xml_test.go @@ -185,6 +185,52 @@ func TestRawToken(t *testing.T) { } } +// Ensure that directives (specifically !DOCTYPE) include the complete +// text of any nested directives, noting that < and > do not change +// nesting depth if they are in single or double quotes. + +var nestedDirectivesInput = ` +<!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]> +<!DOCTYPE [<!ENTITY xlt ">">]> +<!DOCTYPE [<!ENTITY xlt "<">]> +<!DOCTYPE [<!ENTITY xlt '>'>]> +<!DOCTYPE [<!ENTITY xlt '<'>]> +<!DOCTYPE [<!ENTITY xlt '">'>]> +<!DOCTYPE [<!ENTITY xlt "'<">]> +` + +var nestedDirectivesTokens = []Token{ + CharData([]byte("\n")), + Directive([]byte(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`)), + CharData([]byte("\n")), + Directive([]byte(`DOCTYPE [<!ENTITY xlt ">">]`)), + CharData([]byte("\n")), + Directive([]byte(`DOCTYPE [<!ENTITY xlt "<">]`)), + CharData([]byte("\n")), + Directive([]byte(`DOCTYPE [<!ENTITY xlt '>'>]`)), + CharData([]byte("\n")), + Directive([]byte(`DOCTYPE [<!ENTITY xlt '<'>]`)), + CharData([]byte("\n")), + Directive([]byte(`DOCTYPE [<!ENTITY xlt '">'>]`)), + CharData([]byte("\n")), + Directive([]byte(`DOCTYPE [<!ENTITY xlt "'<">]`)), + CharData([]byte("\n")), +} + +func TestNestedDirectives(t *testing.T) { + p := NewParser(StringReader(nestedDirectivesInput)) + + for i, want := range nestedDirectivesTokens { + have, err := p.Token() + if err != nil { + t.Fatalf("token %d: unexpected error: %s", i, err) + } + if !reflect.DeepEqual(have, want) { + t.Errorf("token %d = %#v want %#v", i, have, want) + } + } +} + func TestToken(t *testing.T) { p := NewParser(StringReader(testInput)) |