diff options
author | Ian Lance Taylor <ian@gcc.gnu.org> | 2011-12-07 01:11:29 +0000 |
---|---|---|
committer | Ian Lance Taylor <ian@gcc.gnu.org> | 2011-12-07 01:11:29 +0000 |
commit | 9c63abc9a1d127f95162756467284cf76b47aff8 (patch) | |
tree | 84f27a6ab44d932e4b0455f18390b070b4de626e /libgo/go/encoding/xml/xml_test.go | |
parent | 374280238f934fa851273e2ee16ba53be890c6b8 (diff) | |
download | gcc-9c63abc9a1d127f95162756467284cf76b47aff8.zip gcc-9c63abc9a1d127f95162756467284cf76b47aff8.tar.gz gcc-9c63abc9a1d127f95162756467284cf76b47aff8.tar.bz2 |
libgo: Update to weekly 2011-11-09.
From-SVN: r182073
Diffstat (limited to 'libgo/go/encoding/xml/xml_test.go')
-rw-r--r-- | libgo/go/encoding/xml/xml_test.go | 609 |
1 files changed, 609 insertions, 0 deletions
diff --git a/libgo/go/encoding/xml/xml_test.go b/libgo/go/encoding/xml/xml_test.go new file mode 100644 index 0000000..6c874fa --- /dev/null +++ b/libgo/go/encoding/xml/xml_test.go @@ -0,0 +1,609 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xml + +import ( + "bytes" + "io" + "os" + "reflect" + "strings" + "testing" +) + +const testInput = ` +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` + + "\r\n\t" + ` > + <hello lang="en">World <>'" 白鵬翔</hello> + <goodbye /> + <outer foo:attr="value" xmlns:tag="ns4"> + <inner/> + </outer> + <tag:name> + <![CDATA[Some text here.]]> + </tag:name> +</body><!-- missing final newline -->` + +var rawTokens = []Token{ + CharData([]byte("\n")), + ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, + CharData([]byte("\n")), + Directive([]byte(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), + ), + CharData([]byte("\n")), + StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, + CharData([]byte("\n ")), + StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, + CharData([]byte("World <>'\" 白鵬翔")), + EndElement{Name{"", "hello"}}, + CharData([]byte("\n ")), + StartElement{Name{"", "goodbye"}, nil}, + EndElement{Name{"", "goodbye"}}, + CharData([]byte("\n ")), + StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, + CharData([]byte("\n ")), + StartElement{Name{"", "inner"}, nil}, + EndElement{Name{"", "inner"}}, + CharData([]byte("\n ")), + EndElement{Name{"", "outer"}}, + CharData([]byte("\n ")), + StartElement{Name{"tag", "name"}, nil}, + CharData([]byte("\n ")), + CharData([]byte("Some text here.")), + CharData([]byte("\n ")), + EndElement{Name{"tag", "name"}}, + CharData([]byte("\n")), + EndElement{Name{"", "body"}}, + Comment([]byte(" missing final newline ")), +} + +var cookedTokens = []Token{ + CharData([]byte("\n")), + ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, + CharData([]byte("\n")), + Directive([]byte(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), + ), + CharData([]byte("\n")), + StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, + CharData([]byte("\n ")), + StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, + CharData([]byte("World <>'\" 白鵬翔")), + EndElement{Name{"ns2", "hello"}}, + CharData([]byte("\n ")), + StartElement{Name{"ns2", "goodbye"}, nil}, + EndElement{Name{"ns2", "goodbye"}}, + CharData([]byte("\n ")), + StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, + CharData([]byte("\n ")), + StartElement{Name{"ns2", "inner"}, nil}, + EndElement{Name{"ns2", "inner"}}, + CharData([]byte("\n ")), + EndElement{Name{"ns2", "outer"}}, + CharData([]byte("\n ")), + StartElement{Name{"ns3", "name"}, nil}, + CharData([]byte("\n ")), + CharData([]byte("Some text here.")), + CharData([]byte("\n ")), + EndElement{Name{"ns3", "name"}}, + CharData([]byte("\n")), + EndElement{Name{"ns2", "body"}}, + Comment([]byte(" missing final newline ")), +} + +const testInputAltEncoding = ` +<?xml version="1.0" encoding="x-testing-uppercase"?> +<TAG>VALUE</TAG>` + +var rawTokensAltEncoding = []Token{ + CharData([]byte("\n")), + ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)}, + CharData([]byte("\n")), + StartElement{Name{"", "tag"}, nil}, + CharData([]byte("value")), + EndElement{Name{"", "tag"}}, +} + +var xmlInput = []string{ + // unexpected EOF cases + "<", + "<t", + "<t ", + "<t/", + "<!", + "<!-", + "<!--", + "<!--c-", + "<!--c--", + "<!d", + "<t></", + "<t></t", + "<?", + "<?p", + "<t a", + "<t a=", + "<t a='", + "<t a=''", + "<t/><![", + "<t/><![C", + "<t/><![CDATA[d", + "<t/><![CDATA[d]", + "<t/><![CDATA[d]]", + + // other Syntax errors + "<>", + "<t/a", + "<0 />", + "<?0 >", + // "<!0 >", // let the Token() caller handle + "</0>", + "<t 0=''>", + "<t a='&'>", + "<t a='<'>", + "<t> c;</t>", + "<t a>", + "<t a=>", + "<t a=v>", + // "<![CDATA[d]]>", // let the Token() caller handle + "<t></e>", + "<t></>", + "<t></t!", + "<t>cdata]]></t>", +} + +type stringReader struct { + s string + off int +} + +func (r *stringReader) Read(b []byte) (n int, err error) { + if r.off >= len(r.s) { + return 0, io.EOF + } + for r.off < len(r.s) && n < len(b) { + b[n] = r.s[r.off] + n++ + r.off++ + } + return +} + +func (r *stringReader) ReadByte() (b byte, err error) { + if r.off >= len(r.s) { + return 0, io.EOF + } + b = r.s[r.off] + r.off++ + return +} + +func StringReader(s string) io.Reader { return &stringReader{s, 0} } + +func TestRawToken(t *testing.T) { + p := NewParser(StringReader(testInput)) + testRawToken(t, p, rawTokens) +} + +type downCaser struct { + t *testing.T + r io.ByteReader +} + +func (d *downCaser) ReadByte() (c byte, err error) { + c, err = d.r.ReadByte() + if c >= 'A' && c <= 'Z' { + c += 'a' - 'A' + } + return +} + +func (d *downCaser) Read(p []byte) (int, error) { + d.t.Fatalf("unexpected Read call on downCaser reader") + return 0, os.EINVAL +} + +func TestRawTokenAltEncoding(t *testing.T) { + sawEncoding := "" + p := NewParser(StringReader(testInputAltEncoding)) + p.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { + sawEncoding = charset + if charset != "x-testing-uppercase" { + t.Fatalf("unexpected charset %q", charset) + } + return &downCaser{t, input.(io.ByteReader)}, nil + } + testRawToken(t, p, rawTokensAltEncoding) +} + +func TestRawTokenAltEncodingNoConverter(t *testing.T) { + p := NewParser(StringReader(testInputAltEncoding)) + token, err := p.RawToken() + if token == nil { + t.Fatalf("expected a token on first RawToken call") + } + if err != nil { + t.Fatal(err) + } + token, err = p.RawToken() + if token != nil { + t.Errorf("expected a nil token; got %#v", token) + } + if err == nil { + t.Fatalf("expected an error on second RawToken call") + } + const encoding = "x-testing-uppercase" + if !strings.Contains(err.Error(), encoding) { + t.Errorf("expected error to contain %q; got error: %v", + encoding, err) + } +} + +func testRawToken(t *testing.T, p *Parser, rawTokens []Token) { + for i, want := range rawTokens { + have, err := p.RawToken() + if err != nil { + t.Fatalf("token %d: unexpected error: %s", i, err) + } + if !reflect.DeepEqual(have, want) { + t.Errorf("token %d = %#v want %#v", i, have, want) + } + } +} + +// Ensure that directives (specifically !DOCTYPE) include the complete +// text of any nested directives, noting that < and > do not change +// nesting depth if they are in single or double quotes. + +var nestedDirectivesInput = ` +<!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]> +<!DOCTYPE [<!ENTITY xlt ">">]> +<!DOCTYPE [<!ENTITY xlt "<">]> +<!DOCTYPE [<!ENTITY xlt '>'>]> +<!DOCTYPE [<!ENTITY xlt '<'>]> +<!DOCTYPE [<!ENTITY xlt '">'>]> +<!DOCTYPE [<!ENTITY xlt "'<">]> +` + +var nestedDirectivesTokens = []Token{ + CharData([]byte("\n")), + Directive([]byte(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`)), + CharData([]byte("\n")), + Directive([]byte(`DOCTYPE [<!ENTITY xlt ">">]`)), + CharData([]byte("\n")), + Directive([]byte(`DOCTYPE [<!ENTITY xlt "<">]`)), + CharData([]byte("\n")), + Directive([]byte(`DOCTYPE [<!ENTITY xlt '>'>]`)), + CharData([]byte("\n")), + Directive([]byte(`DOCTYPE [<!ENTITY xlt '<'>]`)), + CharData([]byte("\n")), + Directive([]byte(`DOCTYPE [<!ENTITY xlt '">'>]`)), + CharData([]byte("\n")), + Directive([]byte(`DOCTYPE [<!ENTITY xlt "'<">]`)), + CharData([]byte("\n")), +} + +func TestNestedDirectives(t *testing.T) { + p := NewParser(StringReader(nestedDirectivesInput)) + + for i, want := range nestedDirectivesTokens { + have, err := p.Token() + if err != nil { + t.Fatalf("token %d: unexpected error: %s", i, err) + } + if !reflect.DeepEqual(have, want) { + t.Errorf("token %d = %#v want %#v", i, have, want) + } + } +} + +func TestToken(t *testing.T) { + p := NewParser(StringReader(testInput)) + + for i, want := range cookedTokens { + have, err := p.Token() + if err != nil { + t.Fatalf("token %d: unexpected error: %s", i, err) + } + if !reflect.DeepEqual(have, want) { + t.Errorf("token %d = %#v want %#v", i, have, want) + } + } +} + +func TestSyntax(t *testing.T) { + for i := range xmlInput { + p := NewParser(StringReader(xmlInput[i])) + var err error + for _, err = p.Token(); err == nil; _, err = p.Token() { + } + if _, ok := err.(*SyntaxError); !ok { + t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i]) + } + } +} + +type allScalars struct { + True1 bool + True2 bool + False1 bool + False2 bool + Int int + Int8 int8 + Int16 int16 + Int32 int32 + Int64 int64 + Uint int + Uint8 uint8 + Uint16 uint16 + Uint32 uint32 + Uint64 uint64 + Uintptr uintptr + Float32 float32 + Float64 float64 + String string + PtrString *string +} + +var all = allScalars{ + True1: true, + True2: true, + False1: false, + False2: false, + Int: 1, + Int8: -2, + Int16: 3, + Int32: -4, + Int64: 5, + Uint: 6, + Uint8: 7, + Uint16: 8, + Uint32: 9, + Uint64: 10, + Uintptr: 11, + Float32: 13.0, + Float64: 14.0, + String: "15", + PtrString: &sixteen, +} + +var sixteen = "16" + +const testScalarsInput = `<allscalars> + <true1>true</true1> + <true2>1</true2> + <false1>false</false1> + <false2>0</false2> + <int>1</int> + <int8>-2</int8> + <int16>3</int16> + <int32>-4</int32> + <int64>5</int64> + <uint>6</uint> + <uint8>7</uint8> + <uint16>8</uint16> + <uint32>9</uint32> + <uint64>10</uint64> + <uintptr>11</uintptr> + <float>12.0</float> + <float32>13.0</float32> + <float64>14.0</float64> + <string>15</string> + <ptrstring>16</ptrstring> +</allscalars>` + +func TestAllScalars(t *testing.T) { + var a allScalars + buf := bytes.NewBufferString(testScalarsInput) + err := Unmarshal(buf, &a) + + if err != nil { + t.Fatal(err) + } + if !reflect.DeepEqual(a, all) { + t.Errorf("have %+v want %+v", a, all) + } +} + +type item struct { + Field_a string +} + +func TestIssue569(t *testing.T) { + data := `<item><field_a>abcd</field_a></item>` + var i item + buf := bytes.NewBufferString(data) + err := Unmarshal(buf, &i) + + if err != nil || i.Field_a != "abcd" { + t.Fatal("Expecting abcd") + } +} + +func TestUnquotedAttrs(t *testing.T) { + data := "<tag attr=azAZ09:-_\t>" + p := NewParser(StringReader(data)) + p.Strict = false + token, err := p.Token() + if _, ok := err.(*SyntaxError); ok { + t.Errorf("Unexpected error: %v", err) + } + if token.(StartElement).Name.Local != "tag" { + t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) + } + attr := token.(StartElement).Attr[0] + if attr.Value != "azAZ09:-_" { + t.Errorf("Unexpected attribute value: %v", attr.Value) + } + if attr.Name.Local != "attr" { + t.Errorf("Unexpected attribute name: %v", attr.Name.Local) + } +} + +func TestValuelessAttrs(t *testing.T) { + tests := [][3]string{ + {"<p nowrap>", "p", "nowrap"}, + {"<p nowrap >", "p", "nowrap"}, + {"<input checked/>", "input", "checked"}, + {"<input checked />", "input", "checked"}, + } + for _, test := range tests { + p := NewParser(StringReader(test[0])) + p.Strict = false + token, err := p.Token() + if _, ok := err.(*SyntaxError); ok { + t.Errorf("Unexpected error: %v", err) + } + if token.(StartElement).Name.Local != test[1] { + t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) + } + attr := token.(StartElement).Attr[0] + if attr.Value != test[2] { + t.Errorf("Unexpected attribute value: %v", attr.Value) + } + if attr.Name.Local != test[2] { + t.Errorf("Unexpected attribute name: %v", attr.Name.Local) + } + } +} + +func TestCopyTokenCharData(t *testing.T) { + data := []byte("same data") + var tok1 Token = CharData(data) + tok2 := CopyToken(tok1) + if !reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(CharData) != CharData") + } + data[1] = 'o' + if reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(CharData) uses same buffer.") + } +} + +func TestCopyTokenStartElement(t *testing.T) { + elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}} + var tok1 Token = elt + tok2 := CopyToken(tok1) + if !reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(StartElement) != StartElement") + } + elt.Attr[0] = Attr{Name{"", "lang"}, "de"} + if reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(CharData) uses same buffer.") + } +} + +func TestSyntaxErrorLineNum(t *testing.T) { + testInput := "<P>Foo<P>\n\n<P>Bar</>\n" + p := NewParser(StringReader(testInput)) + var err error + for _, err = p.Token(); err == nil; _, err = p.Token() { + } + synerr, ok := err.(*SyntaxError) + if !ok { + t.Error("Expected SyntaxError.") + } + if synerr.Line != 3 { + t.Error("SyntaxError didn't have correct line number.") + } +} + +func TestTrailingRawToken(t *testing.T) { + input := `<FOO></FOO> ` + p := NewParser(StringReader(input)) + var err error + for _, err = p.RawToken(); err == nil; _, err = p.RawToken() { + } + if err != io.EOF { + t.Fatalf("p.RawToken() = _, %v, want _, io.EOF", err) + } +} + +func TestTrailingToken(t *testing.T) { + input := `<FOO></FOO> ` + p := NewParser(StringReader(input)) + var err error + for _, err = p.Token(); err == nil; _, err = p.Token() { + } + if err != io.EOF { + t.Fatalf("p.Token() = _, %v, want _, io.EOF", err) + } +} + +func TestEntityInsideCDATA(t *testing.T) { + input := `<test><![CDATA[ &val=foo ]]></test>` + p := NewParser(StringReader(input)) + var err error + for _, err = p.Token(); err == nil; _, err = p.Token() { + } + if err != io.EOF { + t.Fatalf("p.Token() = _, %v, want _, io.EOF", err) + } +} + +// The last three tests (respectively one for characters in attribute +// names and two for character entities) pass not because of code +// changed for issue 1259, but instead pass with the given messages +// from other parts of xml.Parser. I provide these to note the +// current behavior of situations where one might think that character +// range checking would detect the error, but it does not in fact. + +var characterTests = []struct { + in string + err string +}{ + {"\x12<doc/>", "illegal character code U+0012"}, + {"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"}, + {"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"}, + {"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"}, + {"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"}, + {"<doc>&\x01;</doc>", "invalid character entity &;"}, + {"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &;"}, +} + +func TestDisallowedCharacters(t *testing.T) { + + for i, tt := range characterTests { + p := NewParser(StringReader(tt.in)) + var err error + + for err == nil { + _, err = p.Token() + } + synerr, ok := err.(*SyntaxError) + if !ok { + t.Fatalf("input %d p.Token() = _, %v, want _, *SyntaxError", i, err) + } + if synerr.Msg != tt.err { + t.Fatalf("input %d synerr.Msg wrong: want '%s', got '%s'", i, tt.err, synerr.Msg) + } + } +} + +type procInstEncodingTest struct { + expect, got string +} + +var procInstTests = []struct { + input, expect string +}{ + {`version="1.0" encoding="utf-8"`, "utf-8"}, + {`version="1.0" encoding='utf-8'`, "utf-8"}, + {`version="1.0" encoding='utf-8' `, "utf-8"}, + {`version="1.0" encoding=utf-8`, ""}, + {`encoding="FOO" `, "FOO"}, +} + +func TestProcInstEncoding(t *testing.T) { + for _, test := range procInstTests { + got := procInstEncoding(test.input) + if got != test.expect { + t.Errorf("procInstEncoding(%q) = %q; want %q", test.input, got, test.expect) + } + } +} |