diff options
Diffstat (limited to 'libgo/go/old/template/parse.go')
-rw-r--r-- | libgo/go/old/template/parse.go | 743 |
1 files changed, 743 insertions, 0 deletions
diff --git a/libgo/go/old/template/parse.go b/libgo/go/old/template/parse.go new file mode 100644 index 0000000..dedf9ad --- /dev/null +++ b/libgo/go/old/template/parse.go @@ -0,0 +1,743 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Code to parse a template. + +package template + +import ( + "fmt" + "io" + "io/ioutil" + "os" + "reflect" + "strconv" + "strings" + "unicode" + "utf8" +) + +// Errors returned during parsing and execution. Users may extract the information and reformat +// if they desire. +type Error struct { + Line int + Msg string +} + +func (e *Error) String() string { return fmt.Sprintf("line %d: %s", e.Line, e.Msg) } + +// checkError is a deferred function to turn a panic with type *Error into a plain error return. +// Other panics are unexpected and so are re-enabled. +func checkError(error *os.Error) { + if v := recover(); v != nil { + if e, ok := v.(*Error); ok { + *error = e + } else { + // runtime errors should crash + panic(v) + } + } +} + +// Most of the literals are aces. +var lbrace = []byte{'{'} +var rbrace = []byte{'}'} +var space = []byte{' '} +var tab = []byte{'\t'} + +// The various types of "tokens", which are plain text or (usually) brace-delimited descriptors +const ( + tokAlternates = iota + tokComment + tokEnd + tokLiteral + tokOr + tokRepeated + tokSection + tokText + tokVariable +) + +// FormatterMap is the type describing the mapping from formatter +// names to the functions that implement them. +type FormatterMap map[string]func(io.Writer, string, ...interface{}) + +// Built-in formatters. +var builtins = FormatterMap{ + "html": HTMLFormatter, + "str": StringFormatter, + "": StringFormatter, +} + +// The parsed state of a template is a vector of xxxElement structs. +// Sections have line numbers so errors can be reported better during execution. + +// Plain text. +type textElement struct { + text []byte +} + +// A literal such as .meta-left or .meta-right +type literalElement struct { + text []byte +} + +// A variable invocation to be evaluated +type variableElement struct { + linenum int + args []interface{} // The fields and literals in the invocation. + fmts []string // Names of formatters to apply. len(fmts) > 0 +} + +// A variableElement arg to be evaluated as a field name +type fieldName string + +// A .section block, possibly with a .or +type sectionElement struct { + linenum int // of .section itself + field string // cursor field for this block + start int // first element + or int // first element of .or block + end int // one beyond last element +} + +// A .repeated block, possibly with a .or and a .alternates +type repeatedElement struct { + sectionElement // It has the same structure... + altstart int // ... except for alternates + altend int +} + +// Template is the type that represents a template definition. +// It is unchanged after parsing. +type Template struct { + fmap FormatterMap // formatters for variables + // Used during parsing: + ldelim, rdelim []byte // delimiters; default {} + buf []byte // input text to process + p int // position in buf + linenum int // position in input + // Parsed results: + elems []interface{} +} + +// New creates a new template with the specified formatter map (which +// may be nil) to define auxiliary functions for formatting variables. +func New(fmap FormatterMap) *Template { + t := new(Template) + t.fmap = fmap + t.ldelim = lbrace + t.rdelim = rbrace + t.elems = make([]interface{}, 0, 16) + return t +} + +// Report error and stop executing. The line number must be provided explicitly. +func (t *Template) execError(st *state, line int, err string, args ...interface{}) { + panic(&Error{line, fmt.Sprintf(err, args...)}) +} + +// Report error, panic to terminate parsing. +// The line number comes from the template state. +func (t *Template) parseError(err string, args ...interface{}) { + panic(&Error{t.linenum, fmt.Sprintf(err, args...)}) +} + +// Is this an exported - upper case - name? +func isExported(name string) bool { + rune, _ := utf8.DecodeRuneInString(name) + return unicode.IsUpper(rune) +} + +// -- Lexical analysis + +// Is c a space character? +func isSpace(c uint8) bool { return c == ' ' || c == '\t' || c == '\r' || c == '\n' } + +// Safely, does s[n:n+len(t)] == t? +func equal(s []byte, n int, t []byte) bool { + b := s[n:] + if len(t) > len(b) { // not enough space left for a match. + return false + } + for i, c := range t { + if c != b[i] { + return false + } + } + return true +} + +// isQuote returns true if c is a string- or character-delimiting quote character. +func isQuote(c byte) bool { + return c == '"' || c == '`' || c == '\'' +} + +// endQuote returns the end quote index for the quoted string that +// starts at n, or -1 if no matching end quote is found before the end +// of the line. +func endQuote(s []byte, n int) int { + quote := s[n] + for n++; n < len(s); n++ { + switch s[n] { + case '\\': + if quote == '"' || quote == '\'' { + n++ + } + case '\n': + return -1 + case quote: + return n + } + } + return -1 +} + +// nextItem returns the next item from the input buffer. If the returned +// item is empty, we are at EOF. The item will be either a +// delimited string or a non-empty string between delimited +// strings. Tokens stop at (but include, if plain text) a newline. +// Action tokens on a line by themselves drop any space on +// either side, up to and including the newline. +func (t *Template) nextItem() []byte { + startOfLine := t.p == 0 || t.buf[t.p-1] == '\n' + start := t.p + var i int + newline := func() { + t.linenum++ + i++ + } + // Leading space up to but not including newline + for i = start; i < len(t.buf); i++ { + if t.buf[i] == '\n' || !isSpace(t.buf[i]) { + break + } + } + leadingSpace := i > start + // What's left is nothing, newline, delimited string, or plain text + switch { + case i == len(t.buf): + // EOF; nothing to do + case t.buf[i] == '\n': + newline() + case equal(t.buf, i, t.ldelim): + left := i // Start of left delimiter. + right := -1 // Will be (immediately after) right delimiter. + haveText := false // Delimiters contain text. + i += len(t.ldelim) + // Find the end of the action. + for ; i < len(t.buf); i++ { + if t.buf[i] == '\n' { + break + } + if isQuote(t.buf[i]) { + i = endQuote(t.buf, i) + if i == -1 { + t.parseError("unmatched quote") + return nil + } + continue + } + if equal(t.buf, i, t.rdelim) { + i += len(t.rdelim) + right = i + break + } + haveText = true + } + if right < 0 { + t.parseError("unmatched opening delimiter") + return nil + } + // Is this a special action (starts with '.' or '#') and the only thing on the line? + if startOfLine && haveText { + firstChar := t.buf[left+len(t.ldelim)] + if firstChar == '.' || firstChar == '#' { + // It's special and the first thing on the line. Is it the last? + for j := right; j < len(t.buf) && isSpace(t.buf[j]); j++ { + if t.buf[j] == '\n' { + // Yes it is. Drop the surrounding space and return the {.foo} + t.linenum++ + t.p = j + 1 + return t.buf[left:right] + } + } + } + } + // No it's not. If there's leading space, return that. + if leadingSpace { + // not trimming space: return leading space if there is some. + t.p = left + return t.buf[start:left] + } + // Return the word, leave the trailing space. + start = left + break + default: + for ; i < len(t.buf); i++ { + if t.buf[i] == '\n' { + newline() + break + } + if equal(t.buf, i, t.ldelim) { + break + } + } + } + item := t.buf[start:i] + t.p = i + return item +} + +// Turn a byte array into a space-split array of strings, +// taking into account quoted strings. +func words(buf []byte) []string { + s := make([]string, 0, 5) + for i := 0; i < len(buf); { + // One word per loop + for i < len(buf) && isSpace(buf[i]) { + i++ + } + if i == len(buf) { + break + } + // Got a word + start := i + if isQuote(buf[i]) { + i = endQuote(buf, i) + if i < 0 { + i = len(buf) + } else { + i++ + } + } + // Even with quotes, break on space only. This handles input + // such as {""|} and catches quoting mistakes. + for i < len(buf) && !isSpace(buf[i]) { + i++ + } + s = append(s, string(buf[start:i])) + } + return s +} + +// Analyze an item and return its token type and, if it's an action item, an array of +// its constituent words. +func (t *Template) analyze(item []byte) (tok int, w []string) { + // item is known to be non-empty + if !equal(item, 0, t.ldelim) { // doesn't start with left delimiter + tok = tokText + return + } + if !equal(item, len(item)-len(t.rdelim), t.rdelim) { // doesn't end with right delimiter + t.parseError("internal error: unmatched opening delimiter") // lexing should prevent this + return + } + if len(item) <= len(t.ldelim)+len(t.rdelim) { // no contents + t.parseError("empty directive") + return + } + // Comment + if item[len(t.ldelim)] == '#' { + tok = tokComment + return + } + // Split into words + w = words(item[len(t.ldelim) : len(item)-len(t.rdelim)]) // drop final delimiter + if len(w) == 0 { + t.parseError("empty directive") + return + } + first := w[0] + if first[0] != '.' { + tok = tokVariable + return + } + if len(first) > 1 && first[1] >= '0' && first[1] <= '9' { + // Must be a float. + tok = tokVariable + return + } + switch first { + case ".meta-left", ".meta-right", ".space", ".tab": + tok = tokLiteral + return + case ".or": + tok = tokOr + return + case ".end": + tok = tokEnd + return + case ".section": + if len(w) != 2 { + t.parseError("incorrect fields for .section: %s", item) + return + } + tok = tokSection + return + case ".repeated": + if len(w) != 3 || w[1] != "section" { + t.parseError("incorrect fields for .repeated: %s", item) + return + } + tok = tokRepeated + return + case ".alternates": + if len(w) != 2 || w[1] != "with" { + t.parseError("incorrect fields for .alternates: %s", item) + return + } + tok = tokAlternates + return + } + t.parseError("bad directive: %s", item) + return +} + +// formatter returns the Formatter with the given name in the Template, or nil if none exists. +func (t *Template) formatter(name string) func(io.Writer, string, ...interface{}) { + if t.fmap != nil { + if fn := t.fmap[name]; fn != nil { + return fn + } + } + return builtins[name] +} + +// -- Parsing + +// newVariable allocates a new variable-evaluation element. +func (t *Template) newVariable(words []string) *variableElement { + formatters := extractFormatters(words) + args := make([]interface{}, len(words)) + + // Build argument list, processing any literals + for i, word := range words { + var lerr os.Error + switch word[0] { + case '"', '`', '\'': + v, err := strconv.Unquote(word) + if err == nil && word[0] == '\'' { + args[i] = []int(v)[0] + } else { + args[i], lerr = v, err + } + + case '.', '+', '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + v, err := strconv.Btoi64(word, 0) + if err == nil { + args[i] = v + } else { + v, err := strconv.Atof64(word) + args[i], lerr = v, err + } + + default: + args[i] = fieldName(word) + } + if lerr != nil { + t.parseError("invalid literal: %q: %s", word, lerr) + } + } + + // We could remember the function address here and avoid the lookup later, + // but it's more dynamic to let the user change the map contents underfoot. + // We do require the name to be present, though. + + // Is it in user-supplied map? + for _, f := range formatters { + if t.formatter(f) == nil { + t.parseError("unknown formatter: %q", f) + } + } + + return &variableElement{t.linenum, args, formatters} +} + +// extractFormatters extracts a list of formatters from words. +// After the final space-separated argument in a variable, formatters may be +// specified separated by pipe symbols. For example: {a b c|d|e} +// The words parameter still has the formatters joined by '|' in the last word. +// extractFormatters splits formatters, replaces the last word with the content +// found before the first '|' within it, and returns the formatters obtained. +// If no formatters are found in words, the default formatter is returned. +func extractFormatters(words []string) (formatters []string) { + // "" is the default formatter. + formatters = []string{""} + if len(words) == 0 { + return + } + var bar int + lastWord := words[len(words)-1] + if isQuote(lastWord[0]) { + end := endQuote([]byte(lastWord), 0) + if end < 0 || end+1 == len(lastWord) || lastWord[end+1] != '|' { + return + } + bar = end + 1 + } else { + bar = strings.IndexRune(lastWord, '|') + if bar < 0 { + return + } + } + words[len(words)-1] = lastWord[0:bar] + formatters = strings.Split(lastWord[bar+1:], "|") + return +} + +// Grab the next item. If it's simple, just append it to the template. +// Otherwise return its details. +func (t *Template) parseSimple(item []byte) (done bool, tok int, w []string) { + tok, w = t.analyze(item) + done = true // assume for simplicity + switch tok { + case tokComment: + return + case tokText: + t.elems = append(t.elems, &textElement{item}) + return + case tokLiteral: + switch w[0] { + case ".meta-left": + t.elems = append(t.elems, &literalElement{t.ldelim}) + case ".meta-right": + t.elems = append(t.elems, &literalElement{t.rdelim}) + case ".space": + t.elems = append(t.elems, &literalElement{space}) + case ".tab": + t.elems = append(t.elems, &literalElement{tab}) + default: + t.parseError("internal error: unknown literal: %s", w[0]) + } + return + case tokVariable: + t.elems = append(t.elems, t.newVariable(w)) + return + } + return false, tok, w +} + +// parseRepeated and parseSection are mutually recursive + +func (t *Template) parseRepeated(words []string) *repeatedElement { + r := new(repeatedElement) + t.elems = append(t.elems, r) + r.linenum = t.linenum + r.field = words[2] + // Scan section, collecting true and false (.or) blocks. + r.start = len(t.elems) + r.or = -1 + r.altstart = -1 + r.altend = -1 +Loop: + for { + item := t.nextItem() + if len(item) == 0 { + t.parseError("missing .end for .repeated section") + break + } + done, tok, w := t.parseSimple(item) + if done { + continue + } + switch tok { + case tokEnd: + break Loop + case tokOr: + if r.or >= 0 { + t.parseError("extra .or in .repeated section") + break Loop + } + r.altend = len(t.elems) + r.or = len(t.elems) + case tokSection: + t.parseSection(w) + case tokRepeated: + t.parseRepeated(w) + case tokAlternates: + if r.altstart >= 0 { + t.parseError("extra .alternates in .repeated section") + break Loop + } + if r.or >= 0 { + t.parseError(".alternates inside .or block in .repeated section") + break Loop + } + r.altstart = len(t.elems) + default: + t.parseError("internal error: unknown repeated section item: %s", item) + break Loop + } + } + if r.altend < 0 { + r.altend = len(t.elems) + } + r.end = len(t.elems) + return r +} + +func (t *Template) parseSection(words []string) *sectionElement { + s := new(sectionElement) + t.elems = append(t.elems, s) + s.linenum = t.linenum + s.field = words[1] + // Scan section, collecting true and false (.or) blocks. + s.start = len(t.elems) + s.or = -1 +Loop: + for { + item := t.nextItem() + if len(item) == 0 { + t.parseError("missing .end for .section") + break + } + done, tok, w := t.parseSimple(item) + if done { + continue + } + switch tok { + case tokEnd: + break Loop + case tokOr: + if s.or >= 0 { + t.parseError("extra .or in .section") + break Loop + } + s.or = len(t.elems) + case tokSection: + t.parseSection(w) + case tokRepeated: + t.parseRepeated(w) + case tokAlternates: + t.parseError(".alternates not in .repeated") + default: + t.parseError("internal error: unknown section item: %s", item) + } + } + s.end = len(t.elems) + return s +} + +func (t *Template) parse() { + for { + item := t.nextItem() + if len(item) == 0 { + break + } + done, tok, w := t.parseSimple(item) + if done { + continue + } + switch tok { + case tokOr, tokEnd, tokAlternates: + t.parseError("unexpected %s", w[0]) + case tokSection: + t.parseSection(w) + case tokRepeated: + t.parseRepeated(w) + default: + t.parseError("internal error: bad directive in parse: %s", item) + } + } +} + +// -- Execution + +// -- Public interface + +// Parse initializes a Template by parsing its definition. The string +// s contains the template text. If any errors occur, Parse returns +// the error. +func (t *Template) Parse(s string) (err os.Error) { + if t.elems == nil { + return &Error{1, "template not allocated with New"} + } + if !validDelim(t.ldelim) || !validDelim(t.rdelim) { + return &Error{1, fmt.Sprintf("bad delimiter strings %q %q", t.ldelim, t.rdelim)} + } + defer checkError(&err) + t.buf = []byte(s) + t.p = 0 + t.linenum = 1 + t.parse() + return nil +} + +// ParseFile is like Parse but reads the template definition from the +// named file. +func (t *Template) ParseFile(filename string) (err os.Error) { + b, err := ioutil.ReadFile(filename) + if err != nil { + return err + } + return t.Parse(string(b)) +} + +// Execute applies a parsed template to the specified data object, +// generating output to wr. +func (t *Template) Execute(wr io.Writer, data interface{}) (err os.Error) { + // Extract the driver data. + val := reflect.ValueOf(data) + defer checkError(&err) + t.p = 0 + t.execute(0, len(t.elems), &state{parent: nil, data: val, wr: wr}) + return nil +} + +// SetDelims sets the left and right delimiters for operations in the +// template. They are validated during parsing. They could be +// validated here but it's better to keep the routine simple. The +// delimiters are very rarely invalid and Parse has the necessary +// error-handling interface already. +func (t *Template) SetDelims(left, right string) { + t.ldelim = []byte(left) + t.rdelim = []byte(right) +} + +// Parse creates a Template with default parameters (such as {} for +// metacharacters). The string s contains the template text while +// the formatter map fmap, which may be nil, defines auxiliary functions +// for formatting variables. The template is returned. If any errors +// occur, err will be non-nil. +func Parse(s string, fmap FormatterMap) (t *Template, err os.Error) { + t = New(fmap) + err = t.Parse(s) + if err != nil { + t = nil + } + return +} + +// ParseFile is a wrapper function that creates a Template with default +// parameters (such as {} for metacharacters). The filename identifies +// a file containing the template text, while the formatter map fmap, which +// may be nil, defines auxiliary functions for formatting variables. +// The template is returned. If any errors occur, err will be non-nil. +func ParseFile(filename string, fmap FormatterMap) (t *Template, err os.Error) { + b, err := ioutil.ReadFile(filename) + if err != nil { + return nil, err + } + return Parse(string(b), fmap) +} + +// MustParse is like Parse but panics if the template cannot be parsed. +func MustParse(s string, fmap FormatterMap) *Template { + t, err := Parse(s, fmap) + if err != nil { + panic("template.MustParse error: " + err.String()) + } + return t +} + +// MustParseFile is like ParseFile but panics if the file cannot be read +// or the template cannot be parsed. +func MustParseFile(filename string, fmap FormatterMap) *Template { + b, err := ioutil.ReadFile(filename) + if err != nil { + panic("template.MustParseFile error: " + err.String()) + } + return MustParse(string(b), fmap) +} |