diff options
Diffstat (limited to 'libgo/go/net/url/url.go')
-rw-r--r-- | libgo/go/net/url/url.go | 119 |
1 files changed, 85 insertions, 34 deletions
diff --git a/libgo/go/net/url/url.go b/libgo/go/net/url/url.go index 8ffad66..1a93e34 100644 --- a/libgo/go/net/url/url.go +++ b/libgo/go/net/url/url.go @@ -24,6 +24,24 @@ type Error struct { func (e *Error) Error() string { return e.Op + " " + e.URL + ": " + e.Err.Error() } +type timeout interface { + Timeout() bool +} + +func (e *Error) Timeout() bool { + t, ok := e.Err.(timeout) + return ok && t.Timeout() +} + +type temporary interface { + Temporary() bool +} + +func (e *Error) Temporary() bool { + t, ok := e.Err.(temporary) + return ok && t.Temporary() +} + func ishex(c byte) bool { switch { case '0' <= c && c <= '9': @@ -53,6 +71,7 @@ type encoding int const ( encodePath encoding = 1 + iota encodeHost + encodeZone encodeUserPassword encodeQueryComponent encodeFragment @@ -64,6 +83,12 @@ func (e EscapeError) Error() string { return "invalid URL escape " + strconv.Quote(string(e)) } +type InvalidHostError string + +func (e InvalidHostError) Error() string { + return "invalid character " + strconv.Quote(string(e)) + " in host name" +} + // Return true if the specified character should be escaped when // appearing in a URL string, according to RFC 3986. // @@ -75,14 +100,18 @@ func shouldEscape(c byte, mode encoding) bool { return false } - if mode == encodeHost { + if mode == encodeHost || mode == encodeZone { // ยง3.2.2 Host allows // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" // as part of reg-name. // We add : because we include :port as part of host. - // We add [ ] because we include [ipv6]:port as part of host + // We add [ ] because we include [ipv6]:port as part of host. + // We add < > because they're the only characters left that + // we could possibly allow, and Parse will reject them if we + // escape them (because hosts can't use %-encoding for + // ASCII bytes). switch c { - case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']': + case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']', '<', '>', '"': return false } } @@ -148,11 +177,36 @@ func unescape(s string, mode encoding) (string, error) { } return "", EscapeError(s) } + // Per https://tools.ietf.org/html/rfc3986#page-21 + // in the host component %-encoding can only be used + // for non-ASCII bytes. + // But https://tools.ietf.org/html/rfc6874#section-2 + // introduces %25 being allowed to escape a percent sign + // in IPv6 scoped-address literals. Yay. + if mode == encodeHost && unhex(s[i+1]) < 8 && s[i:i+3] != "%25" { + return "", EscapeError(s[i : i+3]) + } + if mode == encodeZone { + // RFC 6874 says basically "anything goes" for zone identifiers + // and that even non-ASCII can be redundantly escaped, + // but it seems prudent to restrict %-escaped bytes here to those + // that are valid host name bytes in their unescaped form. + // That is, you can use escaping in the zone identifier but not + // to introduce bytes you couldn't just write directly. + // But Windows puts spaces here! Yay. + v := unhex(s[i+1])<<4 | unhex(s[i+2]) + if s[i:i+3] != "%25" && v != ' ' && shouldEscape(v, encodeHost) { + return "", EscapeError(s[i : i+3]) + } + } i += 3 case '+': hasPlus = mode == encodeQueryComponent i++ default: + if (mode == encodeHost || mode == encodeZone) && s[i] < 0x80 && shouldEscape(s[i], mode) { + return "", InvalidHostError(s[i : i+1]) + } i++ } } @@ -246,7 +300,7 @@ func escape(s string, mode encoding) string { // Go 1.5 introduced the RawPath field to hold the encoded form of Path. // The Parse function sets both Path and RawPath in the URL it returns, // and URL's String method uses RawPath if it is a valid encoding of Path, -// by calling the EncodedPath method. +// by calling the EscapedPath method. // // In earlier versions of Go, the more indirect workarounds were that an // HTTP server could consult req.RequestURI and an HTTP client could @@ -431,7 +485,7 @@ func parse(rawurl string, viaRequest bool) (url *URL, err error) { goto Error } // RawPath is a hint as to the encoding of Path to use - // in url.EncodedPath. If that method already gets the + // in url.EscapedPath. If that method already gets the // right answer without RawPath, leave it empty. // This will help make sure that people don't rely on it in general. if url.EscapedPath() != rest && validEncodedPath(rest) { @@ -478,14 +532,9 @@ func parseAuthority(authority string) (user *Userinfo, host string, err error) { // parseHost parses host as an authority without user // information. That is, as host[:port]. func parseHost(host string) (string, error) { - litOrName := host if strings.HasPrefix(host, "[") { // Parse an IP-Literal in RFC 3986 and RFC 6874. - // E.g., "[fe80::1], "[fe80::1%25en0]" - // - // RFC 4007 defines "%" as a delimiter character in - // the textual representation of IPv6 addresses. - // Per RFC 6874, in URIs that "%" is encoded as "%25". + // E.g., "[fe80::1]", "[fe80::1%25en0]", "[fe80::1]:80". i := strings.LastIndex(host, "]") if i < 0 { return "", errors.New("missing ']' in host") @@ -494,29 +543,31 @@ func parseHost(host string) (string, error) { if !validOptionalPort(colonPort) { return "", fmt.Errorf("invalid port %q after host", colonPort) } - // Parse a host subcomponent without a ZoneID in RFC - // 6874 because the ZoneID is allowed to use the - // percent encoded form. - j := strings.Index(host[:i], "%25") - if j < 0 { - litOrName = host[1:i] - } else { - litOrName = host[1:j] + + // RFC 6874 defines that %25 (%-encoded percent) introduces + // the zone identifier, and the zone identifier can use basically + // any %-encoding it likes. That's different from the host, which + // can only %-encode non-ASCII bytes. + // We do impose some restrictions on the zone, to avoid stupidity + // like newlines. + zone := strings.Index(host[:i], "%25") + if zone >= 0 { + host1, err := unescape(host[:zone], encodeHost) + if err != nil { + return "", err + } + host2, err := unescape(host[zone:i], encodeZone) + if err != nil { + return "", err + } + host3, err := unescape(host[i:], encodeHost) + if err != nil { + return "", err + } + return host1 + host2 + host3, nil } } - // A URI containing an IP-Literal without a ZoneID or - // IPv4address in RFC 3986 and RFC 6847 must not be - // percent-encoded. - // - // A URI containing a DNS registered name in RFC 3986 is - // allowed to be percent-encoded, though we don't use it for - // now to avoid messing up with the gap between allowed - // characters in URI and allowed characters in DNS. - // See golang.org/issue/7991. - if strings.Contains(litOrName, "%") { - return "", errors.New("percent-encoded characters in host") - } var err error if host, err = unescape(host, encodeHost); err != nil { return "", err @@ -572,12 +623,12 @@ func validEncodedPath(s string) bool { } // validOptionalPort reports whether port is either an empty string -// or matches /^:\d+$/ +// or matches /^:\d*$/ func validOptionalPort(port string) bool { if port == "" { return true } - if port[0] != ':' || len(port) == 1 { + if port[0] != ':' { return false } for _, b := range port[1:] { @@ -596,7 +647,7 @@ func validOptionalPort(port string) bool { // // If u.Opaque is non-empty, String uses the first form; // otherwise it uses the second form. -// To obtain the path, String uses u.EncodedPath(). +// To obtain the path, String uses u.EscapedPath(). // // In the second form, the following rules apply: // - if u.Scheme is empty, scheme: is omitted. |