diff options
author | Ian Lance Taylor <ian@gcc.gnu.org> | 2010-12-03 04:34:57 +0000 |
---|---|---|
committer | Ian Lance Taylor <ian@gcc.gnu.org> | 2010-12-03 04:34:57 +0000 |
commit | 7a9389330e91acc3ed05deac2d198af25d13cf3c (patch) | |
tree | 38fe54a4f38ede5d949c915d66191f24a6fe5153 /libgo/go/http/request.go | |
parent | 1aa6700378e5188a853c018256113ce6e1fb5c05 (diff) | |
download | gcc-7a9389330e91acc3ed05deac2d198af25d13cf3c.zip gcc-7a9389330e91acc3ed05deac2d198af25d13cf3c.tar.gz gcc-7a9389330e91acc3ed05deac2d198af25d13cf3c.tar.bz2 |
Add Go frontend, libgo library, and Go testsuite.
gcc/:
* gcc.c (default_compilers): Add entry for ".go".
* common.opt: Add -static-libgo as a driver option.
* doc/install.texi (Configuration): Mention libgo as an option for
--enable-shared. Mention go as an option for --enable-languages.
* doc/invoke.texi (Overall Options): Mention .go as a file name
suffix. Mention go as a -x option.
* doc/frontends.texi (G++ and GCC): Mention Go as a supported
language.
* doc/sourcebuild.texi (Top Level): Mention libgo.
* doc/standards.texi (Standards): Add section on Go language.
Move references for other languages into their own section.
* doc/contrib.texi (Contributors): Mention that I contributed the
Go frontend.
gcc/testsuite/:
* lib/go.exp: New file.
* lib/go-dg.exp: New file.
* lib/go-torture.exp: New file.
* lib/target-supports.exp (check_compile): Match // Go.
From-SVN: r167407
Diffstat (limited to 'libgo/go/http/request.go')
-rw-r--r-- | libgo/go/http/request.go | 693 |
1 files changed, 693 insertions, 0 deletions
diff --git a/libgo/go/http/request.go b/libgo/go/http/request.go new file mode 100644 index 0000000..b886899 --- /dev/null +++ b/libgo/go/http/request.go @@ -0,0 +1,693 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// HTTP Request reading and parsing. + +// The http package implements parsing of HTTP requests, replies, +// and URLs and provides an extensible HTTP server and a basic +// HTTP client. +package http + +import ( + "bufio" + "bytes" + "container/vector" + "fmt" + "io" + "io/ioutil" + "mime" + "mime/multipart" + "os" + "strconv" + "strings" +) + +const ( + maxLineLength = 4096 // assumed <= bufio.defaultBufSize + maxValueLength = 4096 + maxHeaderLines = 1024 + chunkSize = 4 << 10 // 4 KB chunks +) + +// HTTP request parsing errors. +type ProtocolError struct { + os.ErrorString +} + +var ( + ErrLineTooLong = &ProtocolError{"header line too long"} + ErrHeaderTooLong = &ProtocolError{"header too long"} + ErrShortBody = &ProtocolError{"entity body too short"} + ErrNotSupported = &ProtocolError{"feature not supported"} + ErrUnexpectedTrailer = &ProtocolError{"trailer header without chunked transfer encoding"} + ErrMissingContentLength = &ProtocolError{"missing ContentLength in HEAD response"} + ErrNotMultipart = &ProtocolError{"request Content-Type isn't multipart/form-data"} + ErrMissingBoundary = &ProtocolError{"no multipart boundary param Content-Type"} +) + +type badStringError struct { + what string + str string +} + +func (e *badStringError) String() string { return fmt.Sprintf("%s %q", e.what, e.str) } + +var reqExcludeHeader = map[string]bool{ + "Host": true, + "User-Agent": true, + "Referer": true, + "Content-Length": true, + "Transfer-Encoding": true, + "Trailer": true, +} + +// A Request represents a parsed HTTP request header. +type Request struct { + Method string // GET, POST, PUT, etc. + RawURL string // The raw URL given in the request. + URL *URL // Parsed URL. + Proto string // "HTTP/1.0" + ProtoMajor int // 1 + ProtoMinor int // 0 + + // A header maps request lines to their values. + // If the header says + // + // accept-encoding: gzip, deflate + // Accept-Language: en-us + // Connection: keep-alive + // + // then + // + // Header = map[string]string{ + // "Accept-Encoding": "gzip, deflate", + // "Accept-Language": "en-us", + // "Connection": "keep-alive", + // } + // + // HTTP defines that header names are case-insensitive. + // The request parser implements this by canonicalizing the + // name, making the first character and any characters + // following a hyphen uppercase and the rest lowercase. + Header map[string]string + + // The message body. + Body io.ReadCloser + + // ContentLength records the length of the associated content. + // The value -1 indicates that the length is unknown. + // Values >= 0 indicate that the given number of bytes may be read from Body. + ContentLength int64 + + // TransferEncoding lists the transfer encodings from outermost to innermost. + // An empty list denotes the "identity" encoding. + TransferEncoding []string + + // Whether to close the connection after replying to this request. + Close bool + + // The host on which the URL is sought. + // Per RFC 2616, this is either the value of the Host: header + // or the host name given in the URL itself. + Host string + + // The referring URL, if sent in the request. + // + // Referer is misspelled as in the request itself, + // a mistake from the earliest days of HTTP. + // This value can also be fetched from the Header map + // as Header["Referer"]; the benefit of making it + // available as a structure field is that the compiler + // can diagnose programs that use the alternate + // (correct English) spelling req.Referrer but cannot + // diagnose programs that use Header["Referrer"]. + Referer string + + // The User-Agent: header string, if sent in the request. + UserAgent string + + // The parsed form. Only available after ParseForm is called. + Form map[string][]string + + // Trailer maps trailer keys to values. Like for Header, if the + // response has multiple trailer lines with the same key, they will be + // concatenated, delimited by commas. + Trailer map[string]string +} + +// ProtoAtLeast returns whether the HTTP protocol used +// in the request is at least major.minor. +func (r *Request) ProtoAtLeast(major, minor int) bool { + return r.ProtoMajor > major || + r.ProtoMajor == major && r.ProtoMinor >= minor +} + +// MultipartReader returns a MIME multipart reader if this is a +// multipart/form-data POST request, else returns nil and an error. +func (r *Request) MultipartReader() (multipart.Reader, os.Error) { + v, ok := r.Header["Content-Type"] + if !ok { + return nil, ErrNotMultipart + } + d, params := mime.ParseMediaType(v) + if d != "multipart/form-data" { + return nil, ErrNotMultipart + } + boundary, ok := params["boundary"] + if !ok { + return nil, ErrMissingBoundary + } + return multipart.NewReader(r.Body, boundary), nil +} + +// Return value if nonempty, def otherwise. +func valueOrDefault(value, def string) string { + if value != "" { + return value + } + return def +} + +const defaultUserAgent = "Go http package" + +// Write writes an HTTP/1.1 request -- header and body -- in wire format. +// This method consults the following fields of req: +// Host +// RawURL, if non-empty, or else URL +// Method (defaults to "GET") +// UserAgent (defaults to defaultUserAgent) +// Referer +// Header +// Body +// +// If Body is present, Write forces "Transfer-Encoding: chunked" as a header +// and then closes Body when finished sending it. +func (req *Request) Write(w io.Writer) os.Error { + host := req.Host + if host == "" { + host = req.URL.Host + } + + uri := req.RawURL + if uri == "" { + uri = valueOrDefault(urlEscape(req.URL.Path, encodePath), "/") + if req.URL.RawQuery != "" { + uri += "?" + req.URL.RawQuery + } + } + + fmt.Fprintf(w, "%s %s HTTP/1.1\r\n", valueOrDefault(req.Method, "GET"), uri) + + // Header lines + fmt.Fprintf(w, "Host: %s\r\n", host) + fmt.Fprintf(w, "User-Agent: %s\r\n", valueOrDefault(req.UserAgent, defaultUserAgent)) + if req.Referer != "" { + fmt.Fprintf(w, "Referer: %s\r\n", req.Referer) + } + + // Process Body,ContentLength,Close,Trailer + tw, err := newTransferWriter(req) + if err != nil { + return err + } + err = tw.WriteHeader(w) + if err != nil { + return err + } + + // TODO: split long values? (If so, should share code with Conn.Write) + // TODO: if Header includes values for Host, User-Agent, or Referer, this + // may conflict with the User-Agent or Referer headers we add manually. + // One solution would be to remove the Host, UserAgent, and Referer fields + // from Request, and introduce Request methods along the lines of + // Response.{GetHeader,AddHeader} and string constants for "Host", + // "User-Agent" and "Referer". + err = writeSortedKeyValue(w, req.Header, reqExcludeHeader) + if err != nil { + return err + } + + io.WriteString(w, "\r\n") + + // Write body and trailer + err = tw.WriteBody(w) + if err != nil { + return err + } + + return nil +} + +// Read a line of bytes (up to \n) from b. +// Give up if the line exceeds maxLineLength. +// The returned bytes are a pointer into storage in +// the bufio, so they are only valid until the next bufio read. +func readLineBytes(b *bufio.Reader) (p []byte, err os.Error) { + if p, err = b.ReadSlice('\n'); err != nil { + // We always know when EOF is coming. + // If the caller asked for a line, there should be a line. + if err == os.EOF { + err = io.ErrUnexpectedEOF + } else if err == bufio.ErrBufferFull { + err = ErrLineTooLong + } + return nil, err + } + if len(p) >= maxLineLength { + return nil, ErrLineTooLong + } + + // Chop off trailing white space. + var i int + for i = len(p); i > 0; i-- { + if c := p[i-1]; c != ' ' && c != '\r' && c != '\t' && c != '\n' { + break + } + } + return p[0:i], nil +} + +// readLineBytes, but convert the bytes into a string. +func readLine(b *bufio.Reader) (s string, err os.Error) { + p, e := readLineBytes(b) + if e != nil { + return "", e + } + return string(p), nil +} + +var colon = []byte{':'} + +// Read a key/value pair from b. +// A key/value has the form Key: Value\r\n +// and the Value can continue on multiple lines if each continuation line +// starts with a space. +func readKeyValue(b *bufio.Reader) (key, value string, err os.Error) { + line, e := readLineBytes(b) + if e != nil { + return "", "", e + } + if len(line) == 0 { + return "", "", nil + } + + // Scan first line for colon. + i := bytes.Index(line, colon) + if i < 0 { + goto Malformed + } + + key = string(line[0:i]) + if strings.Contains(key, " ") { + // Key field has space - no good. + goto Malformed + } + + // Skip initial space before value. + for i++; i < len(line); i++ { + if line[i] != ' ' { + break + } + } + value = string(line[i:]) + + // Look for extension lines, which must begin with space. + for { + c, e := b.ReadByte() + if c != ' ' { + if e != os.EOF { + b.UnreadByte() + } + break + } + + // Eat leading space. + for c == ' ' { + if c, e = b.ReadByte(); e != nil { + if e == os.EOF { + e = io.ErrUnexpectedEOF + } + return "", "", e + } + } + b.UnreadByte() + + // Read the rest of the line and add to value. + if line, e = readLineBytes(b); e != nil { + return "", "", e + } + value += " " + string(line) + + if len(value) >= maxValueLength { + return "", "", &badStringError{"value too long for key", key} + } + } + return key, value, nil + +Malformed: + return "", "", &badStringError{"malformed header line", string(line)} +} + +// Convert decimal at s[i:len(s)] to integer, +// returning value, string position where the digits stopped, +// and whether there was a valid number (digits, not too big). +func atoi(s string, i int) (n, i1 int, ok bool) { + const Big = 1000000 + if i >= len(s) || s[i] < '0' || s[i] > '9' { + return 0, 0, false + } + n = 0 + for ; i < len(s) && '0' <= s[i] && s[i] <= '9'; i++ { + n = n*10 + int(s[i]-'0') + if n > Big { + return 0, 0, false + } + } + return n, i, true +} + +// Parse HTTP version: "HTTP/1.2" -> (1, 2, true). +func parseHTTPVersion(vers string) (int, int, bool) { + if len(vers) < 5 || vers[0:5] != "HTTP/" { + return 0, 0, false + } + major, i, ok := atoi(vers, 5) + if !ok || i >= len(vers) || vers[i] != '.' { + return 0, 0, false + } + var minor int + minor, i, ok = atoi(vers, i+1) + if !ok || i != len(vers) { + return 0, 0, false + } + return major, minor, true +} + +// CanonicalHeaderKey returns the canonical format of the +// HTTP header key s. The canonicalization converts the first +// letter and any letter following a hyphen to upper case; +// the rest are converted to lowercase. For example, the +// canonical key for "accept-encoding" is "Accept-Encoding". +func CanonicalHeaderKey(s string) string { + // canonicalize: first letter upper case + // and upper case after each dash. + // (Host, User-Agent, If-Modified-Since). + // HTTP headers are ASCII only, so no Unicode issues. + var a []byte + upper := true + for i := 0; i < len(s); i++ { + v := s[i] + if upper && 'a' <= v && v <= 'z' { + if a == nil { + a = []byte(s) + } + a[i] = v + 'A' - 'a' + } + if !upper && 'A' <= v && v <= 'Z' { + if a == nil { + a = []byte(s) + } + a[i] = v + 'a' - 'A' + } + upper = false + if v == '-' { + upper = true + } + } + if a != nil { + return string(a) + } + return s +} + +type chunkedReader struct { + r *bufio.Reader + n uint64 // unread bytes in chunk + err os.Error +} + +func newChunkedReader(r *bufio.Reader) *chunkedReader { + return &chunkedReader{r: r} +} + +func (cr *chunkedReader) beginChunk() { + // chunk-size CRLF + var line string + line, cr.err = readLine(cr.r) + if cr.err != nil { + return + } + cr.n, cr.err = strconv.Btoui64(line, 16) + if cr.err != nil { + return + } + if cr.n == 0 { + // trailer CRLF + for { + line, cr.err = readLine(cr.r) + if cr.err != nil { + return + } + if line == "" { + break + } + } + cr.err = os.EOF + } +} + +func (cr *chunkedReader) Read(b []uint8) (n int, err os.Error) { + if cr.err != nil { + return 0, cr.err + } + if cr.n == 0 { + cr.beginChunk() + if cr.err != nil { + return 0, cr.err + } + } + if uint64(len(b)) > cr.n { + b = b[0:cr.n] + } + n, cr.err = cr.r.Read(b) + cr.n -= uint64(n) + if cr.n == 0 && cr.err == nil { + // end of chunk (CRLF) + b := make([]byte, 2) + if _, cr.err = io.ReadFull(cr.r, b); cr.err == nil { + if b[0] != '\r' || b[1] != '\n' { + cr.err = os.NewError("malformed chunked encoding") + } + } + } + return n, cr.err +} + +// ReadRequest reads and parses a request from b. +func ReadRequest(b *bufio.Reader) (req *Request, err os.Error) { + req = new(Request) + + // First line: GET /index.html HTTP/1.0 + var s string + if s, err = readLine(b); err != nil { + return nil, err + } + + var f []string + if f = strings.Split(s, " ", 3); len(f) < 3 { + return nil, &badStringError{"malformed HTTP request", s} + } + req.Method, req.RawURL, req.Proto = f[0], f[1], f[2] + var ok bool + if req.ProtoMajor, req.ProtoMinor, ok = parseHTTPVersion(req.Proto); !ok { + return nil, &badStringError{"malformed HTTP version", req.Proto} + } + + if req.URL, err = ParseURL(req.RawURL); err != nil { + return nil, err + } + + // Subsequent lines: Key: value. + nheader := 0 + req.Header = make(map[string]string) + for { + var key, value string + if key, value, err = readKeyValue(b); err != nil { + return nil, err + } + if key == "" { + break + } + if nheader++; nheader >= maxHeaderLines { + return nil, ErrHeaderTooLong + } + + key = CanonicalHeaderKey(key) + + // RFC 2616 says that if you send the same header key + // multiple times, it has to be semantically equivalent + // to concatenating the values separated by commas. + oldvalue, present := req.Header[key] + if present { + req.Header[key] = oldvalue + "," + value + } else { + req.Header[key] = value + } + } + + // RFC2616: Must treat + // GET /index.html HTTP/1.1 + // Host: www.google.com + // and + // GET http://www.google.com/index.html HTTP/1.1 + // Host: doesntmatter + // the same. In the second case, any Host line is ignored. + req.Host = req.URL.Host + if req.Host == "" { + req.Host = req.Header["Host"] + } + req.Header["Host"] = "", false + + fixPragmaCacheControl(req.Header) + + // Pull out useful fields as a convenience to clients. + req.Referer = req.Header["Referer"] + req.Header["Referer"] = "", false + + req.UserAgent = req.Header["User-Agent"] + req.Header["User-Agent"] = "", false + + // TODO: Parse specific header values: + // Accept + // Accept-Encoding + // Accept-Language + // Authorization + // Cache-Control + // Connection + // Date + // Expect + // From + // If-Match + // If-Modified-Since + // If-None-Match + // If-Range + // If-Unmodified-Since + // Max-Forwards + // Proxy-Authorization + // Referer [sic] + // TE (transfer-codings) + // Trailer + // Transfer-Encoding + // Upgrade + // User-Agent + // Via + // Warning + + err = readTransfer(req, b) + if err != nil { + return nil, err + } + + return req, nil +} + +// ParseQuery parses the URL-encoded query string and returns +// a map listing the values specified for each key. +// ParseQuery always returns a non-nil map containing all the +// valid query parameters found; err describes the first decoding error +// encountered, if any. +func ParseQuery(query string) (m map[string][]string, err os.Error) { + m = make(map[string][]string) + err = parseQuery(m, query) + return +} + +func parseQuery(m map[string][]string, query string) (err os.Error) { + for _, kv := range strings.Split(query, "&", -1) { + if len(kv) == 0 { + continue + } + kvPair := strings.Split(kv, "=", 2) + + var key, value string + var e os.Error + key, e = URLUnescape(kvPair[0]) + if e == nil && len(kvPair) > 1 { + value, e = URLUnescape(kvPair[1]) + } + if e != nil { + err = e + continue + } + vec := vector.StringVector(m[key]) + vec.Push(value) + m[key] = vec + } + return err +} + +// ParseForm parses the request body as a form for POST requests, or the raw query for GET requests. +// It is idempotent. +func (r *Request) ParseForm() (err os.Error) { + if r.Form != nil { + return + } + + r.Form = make(map[string][]string) + if r.URL != nil { + err = parseQuery(r.Form, r.URL.RawQuery) + } + if r.Method == "POST" { + if r.Body == nil { + return os.ErrorString("missing form body") + } + ct := r.Header["Content-Type"] + switch strings.Split(ct, ";", 2)[0] { + case "text/plain", "application/x-www-form-urlencoded", "": + b, e := ioutil.ReadAll(r.Body) + if e != nil { + if err == nil { + err = e + } + break + } + e = parseQuery(r.Form, string(b)) + if err == nil { + err = e + } + // TODO(dsymonds): Handle multipart/form-data + default: + return &badStringError{"unknown Content-Type", ct} + } + } + return err +} + +// FormValue returns the first value for the named component of the query. +// FormValue calls ParseForm if necessary. +func (r *Request) FormValue(key string) string { + if r.Form == nil { + r.ParseForm() + } + if vs := r.Form[key]; len(vs) > 0 { + return vs[0] + } + return "" +} + +func (r *Request) expectsContinue() bool { + expectation, ok := r.Header["Expect"] + return ok && strings.ToLower(expectation) == "100-continue" +} + +func (r *Request) wantsHttp10KeepAlive() bool { + if r.ProtoMajor != 1 || r.ProtoMinor != 0 { + return false + } + value, exists := r.Header["Connection"] + if !exists { + return false + } + return strings.Contains(strings.ToLower(value), "keep-alive") +} |