diff options
author | Ian Lance Taylor <ian@gcc.gnu.org> | 2013-11-06 19:49:01 +0000 |
---|---|---|
committer | Ian Lance Taylor <ian@gcc.gnu.org> | 2013-11-06 19:49:01 +0000 |
commit | f038dae646bac2b31be98ab592c0e5206d2d96f5 (patch) | |
tree | 39530b071991b2326f881b2a30a2d82d6c133fd6 /libgo/go/encoding/xml/xml.go | |
parent | f20f261304993444741e0f0a14d3147e591bc660 (diff) | |
download | gcc-f038dae646bac2b31be98ab592c0e5206d2d96f5.zip gcc-f038dae646bac2b31be98ab592c0e5206d2d96f5.tar.gz gcc-f038dae646bac2b31be98ab592c0e5206d2d96f5.tar.bz2 |
libgo: Update to October 24 version of master library.
From-SVN: r204466
Diffstat (limited to 'libgo/go/encoding/xml/xml.go')
-rw-r--r-- | libgo/go/encoding/xml/xml.go | 160 |
1 files changed, 142 insertions, 18 deletions
diff --git a/libgo/go/encoding/xml/xml.go b/libgo/go/encoding/xml/xml.go index 021f7e4..5b9d670 100644 --- a/libgo/go/encoding/xml/xml.go +++ b/libgo/go/encoding/xml/xml.go @@ -16,6 +16,7 @@ package xml import ( "bufio" "bytes" + "errors" "fmt" "io" "strconv" @@ -66,6 +67,11 @@ func (e StartElement) Copy() StartElement { return e } +// End returns the corresponding XML end element. +func (e StartElement) End() EndElement { + return EndElement{e.Name} +} + // An EndElement represents an XML end element. type EndElement struct { Name Name @@ -144,6 +150,10 @@ type Decoder struct { // d.Entity = HTMLEntity // // creates a parser that can handle typical HTML. + // + // Strict mode does not enforce the requirements of the XML name spaces TR. + // In particular it does not reject name space tags using undefined prefixes. + // Such tags are recorded with the unknown prefix as the name space URL. Strict bool // When Strict == false, AutoClose indicates a set of elements to @@ -174,18 +184,19 @@ type Decoder struct { // the attribute xmlns="DefaultSpace". DefaultSpace string - r io.ByteReader - buf bytes.Buffer - saved *bytes.Buffer - stk *stack - free *stack - needClose bool - toClose Name - nextToken Token - nextByte int - ns map[string]string - err error - line int + r io.ByteReader + buf bytes.Buffer + saved *bytes.Buffer + stk *stack + free *stack + needClose bool + toClose Name + nextToken Token + nextByte int + ns map[string]string + err error + line int + unmarshalDepth int } // NewDecoder creates a new XML parser reading from r. @@ -223,10 +234,14 @@ func NewDecoder(r io.Reader) *Decoder { // If Token encounters an unrecognized name space prefix, // it uses the prefix as the Space rather than report an error. func (d *Decoder) Token() (t Token, err error) { + if d.stk != nil && d.stk.kind == stkEOF { + err = io.EOF + return + } if d.nextToken != nil { t = d.nextToken d.nextToken = nil - } else if t, err = d.RawToken(); err != nil { + } else if t, err = d.rawToken(); err != nil { return } @@ -322,6 +337,7 @@ type stack struct { const ( stkStart = iota stkNs + stkEOF ) func (d *Decoder) push(kind int) *stack { @@ -347,6 +363,43 @@ func (d *Decoder) pop() *stack { return s } +// Record that after the current element is finished +// (that element is already pushed on the stack) +// Token should return EOF until popEOF is called. +func (d *Decoder) pushEOF() { + // Walk down stack to find Start. + // It might not be the top, because there might be stkNs + // entries above it. + start := d.stk + for start.kind != stkStart { + start = start.next + } + // The stkNs entries below a start are associated with that + // element too; skip over them. + for start.next != nil && start.next.kind == stkNs { + start = start.next + } + s := d.free + if s != nil { + d.free = s.next + } else { + s = new(stack) + } + s.kind = stkEOF + s.next = start.next + start.next = s +} + +// Undo a pushEOF. +// The element must have been finished, so the EOF should be at the top of the stack. +func (d *Decoder) popEOF() bool { + if d.stk == nil || d.stk.kind != stkEOF { + return false + } + d.pop() + return true +} + // Record that we are starting an element with the given name. func (d *Decoder) pushElement(name Name) { s := d.push(stkStart) @@ -395,9 +448,9 @@ func (d *Decoder) popElement(t *EndElement) bool { return false } - // Pop stack until a Start is on the top, undoing the + // Pop stack until a Start or EOF is on the top, undoing the // translations that were associated with the element we just closed. - for d.stk != nil && d.stk.kind != stkStart { + for d.stk != nil && d.stk.kind != stkStart && d.stk.kind != stkEOF { s := d.pop() if s.ok { d.ns[s.name.Local] = s.name.Space @@ -429,10 +482,19 @@ func (d *Decoder) autoClose(t Token) (Token, bool) { return nil, false } +var errRawToken = errors.New("xml: cannot use RawToken from UnmarshalXML method") + // RawToken is like Token but does not verify that // start and end elements match and does not translate // name space prefixes to their corresponding URLs. func (d *Decoder) RawToken() (Token, error) { + if d.unmarshalDepth > 0 { + return nil, errRawToken + } + return d.rawToken() +} + +func (d *Decoder) rawToken() (Token, error) { if d.err != nil { return nil, d.err } @@ -484,8 +546,7 @@ func (d *Decoder) RawToken() (Token, error) { case '?': // <?: Processing instruction. - // TODO(rsc): Should parse the <?xml declaration to make sure - // the version is 1.0 and the encoding is UTF-8. + // TODO(rsc): Should parse the <?xml declaration to make sure the version is 1.0. var target string if target, ok = d.name(); !ok { if d.err == nil { @@ -1112,6 +1173,30 @@ func isName(s []byte) bool { return true } +func isNameString(s string) bool { + if len(s) == 0 { + return false + } + c, n := utf8.DecodeRuneInString(s) + if c == utf8.RuneError && n == 1 { + return false + } + if !unicode.Is(first, c) { + return false + } + for n < len(s) { + s = s[n:] + c, n = utf8.DecodeRuneInString(s) + if c == utf8.RuneError && n == 1 { + return false + } + if !unicode.Is(first, c) && !unicode.Is(second, c) { + return false + } + } + return true +} + // These tables were generated by cut and paste from Appendix B of // the XML spec at http://www.xml.com/axml/testaxml.htm // and then reformatting. First corresponds to (Letter | '_' | ':') @@ -1758,7 +1843,7 @@ func EscapeText(w io.Writer, s []byte) error { case '\r': esc = esc_cr default: - if !isInCharacterRange(r) { + if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) { esc = esc_fffd break } @@ -1778,6 +1863,45 @@ func EscapeText(w io.Writer, s []byte) error { return nil } +// EscapeString writes to p the properly escaped XML equivalent +// of the plain text data s. +func (p *printer) EscapeString(s string) { + var esc []byte + last := 0 + for i := 0; i < len(s); { + r, width := utf8.DecodeRuneInString(s[i:]) + i += width + switch r { + case '"': + esc = esc_quot + case '\'': + esc = esc_apos + case '&': + esc = esc_amp + case '<': + esc = esc_lt + case '>': + esc = esc_gt + case '\t': + esc = esc_tab + case '\n': + esc = esc_nl + case '\r': + esc = esc_cr + default: + if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) { + esc = esc_fffd + break + } + continue + } + p.WriteString(s[last : i-width]) + p.Write(esc) + last = i + } + p.WriteString(s[last:]) +} + // Escape is like EscapeText but omits the error return value. // It is provided for backwards compatibility with Go 1.0. // Code targeting Go 1.1 or later should use EscapeText. |