diff options
Diffstat (limited to 'libgo/go/net/http/lex.go')
-rw-r--r-- | libgo/go/net/http/lex.go | 206 |
1 files changed, 83 insertions, 123 deletions
diff --git a/libgo/go/net/http/lex.go b/libgo/go/net/http/lex.go index ffb393c..cb33318 100644 --- a/libgo/go/net/http/lex.go +++ b/libgo/go/net/http/lex.go @@ -6,131 +6,91 @@ package http // This file deals with lexical matters of HTTP -func isSeparator(c byte) bool { - switch c { - case '(', ')', '<', '>', '@', ',', ';', ':', '\\', '"', '/', '[', ']', '?', '=', '{', '}', ' ', '\t': - return true - } - return false +var isTokenTable = [127]bool{ + '!': true, + '#': true, + '$': true, + '%': true, + '&': true, + '\'': true, + '*': true, + '+': true, + '-': true, + '.': true, + '0': true, + '1': true, + '2': true, + '3': true, + '4': true, + '5': true, + '6': true, + '7': true, + '8': true, + '9': true, + 'A': true, + 'B': true, + 'C': true, + 'D': true, + 'E': true, + 'F': true, + 'G': true, + 'H': true, + 'I': true, + 'J': true, + 'K': true, + 'L': true, + 'M': true, + 'N': true, + 'O': true, + 'P': true, + 'Q': true, + 'R': true, + 'S': true, + 'T': true, + 'U': true, + 'W': true, + 'V': true, + 'X': true, + 'Y': true, + 'Z': true, + '^': true, + '_': true, + '`': true, + 'a': true, + 'b': true, + 'c': true, + 'd': true, + 'e': true, + 'f': true, + 'g': true, + 'h': true, + 'i': true, + 'j': true, + 'k': true, + 'l': true, + 'm': true, + 'n': true, + 'o': true, + 'p': true, + 'q': true, + 'r': true, + 's': true, + 't': true, + 'u': true, + 'v': true, + 'w': true, + 'x': true, + 'y': true, + 'z': true, + '|': true, + '~': true, } -func isCtl(c byte) bool { return (0 <= c && c <= 31) || c == 127 } - -func isChar(c byte) bool { return 0 <= c && c <= 127 } - -func isAnyText(c byte) bool { return !isCtl(c) } - -func isQdText(c byte) bool { return isAnyText(c) && c != '"' } - -func isToken(c byte) bool { return isChar(c) && !isCtl(c) && !isSeparator(c) } - -// Valid escaped sequences are not specified in RFC 2616, so for now, we assume -// that they coincide with the common sense ones used by GO. Malformed -// characters should probably not be treated as errors by a robust (forgiving) -// parser, so we replace them with the '?' character. -func httpUnquotePair(b byte) byte { - // skip the first byte, which should always be '\' - switch b { - case 'a': - return '\a' - case 'b': - return '\b' - case 'f': - return '\f' - case 'n': - return '\n' - case 'r': - return '\r' - case 't': - return '\t' - case 'v': - return '\v' - case '\\': - return '\\' - case '\'': - return '\'' - case '"': - return '"' - } - return '?' -} - -// raw must begin with a valid quoted string. Only the first quoted string is -// parsed and is unquoted in result. eaten is the number of bytes parsed, or -1 -// upon failure. -func httpUnquote(raw []byte) (eaten int, result string) { - buf := make([]byte, len(raw)) - if raw[0] != '"' { - return -1, "" - } - eaten = 1 - j := 0 // # of bytes written in buf - for i := 1; i < len(raw); i++ { - switch b := raw[i]; b { - case '"': - eaten++ - buf = buf[0:j] - return i + 1, string(buf) - case '\\': - if len(raw) < i+2 { - return -1, "" - } - buf[j] = httpUnquotePair(raw[i+1]) - eaten += 2 - j++ - i++ - default: - if isQdText(b) { - buf[j] = b - } else { - buf[j] = '?' - } - eaten++ - j++ - } - } - return -1, "" +func isToken(r rune) bool { + i := int(r) + return i < len(isTokenTable) && isTokenTable[i] } -// This is a best effort parse, so errors are not returned, instead not all of -// the input string might be parsed. result is always non-nil. -func httpSplitFieldValue(fv string) (eaten int, result []string) { - result = make([]string, 0, len(fv)) - raw := []byte(fv) - i := 0 - chunk := "" - for i < len(raw) { - b := raw[i] - switch { - case b == '"': - eaten, unq := httpUnquote(raw[i:len(raw)]) - if eaten < 0 { - return i, result - } else { - i += eaten - chunk += unq - } - case isSeparator(b): - if chunk != "" { - result = result[0 : len(result)+1] - result[len(result)-1] = chunk - chunk = "" - } - i++ - case isToken(b): - chunk += string(b) - i++ - case b == '\n' || b == '\r': - i++ - default: - chunk += "?" - i++ - } - } - if chunk != "" { - result = result[0 : len(result)+1] - result[len(result)-1] = chunk - chunk = "" - } - return i, result +func isNotToken(r rune) bool { + return !isToken(r) } |