aboutsummaryrefslogtreecommitdiff
path: root/libgo/go/strconv/quote.go
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/go/strconv/quote.go')
-rw-r--r--libgo/go/strconv/quote.go171
1 files changed, 111 insertions, 60 deletions
diff --git a/libgo/go/strconv/quote.go b/libgo/go/strconv/quote.go
index bcbdbc5..b3bbb16 100644
--- a/libgo/go/strconv/quote.go
+++ b/libgo/go/strconv/quote.go
@@ -7,7 +7,6 @@
package strconv
import (
- "internal/bytealg"
"unicode/utf8"
)
@@ -16,6 +15,11 @@ const (
upperhex = "0123456789ABCDEF"
)
+// contains reports whether the string contains the byte c.
+func contains(s string, c byte) bool {
+ return index(s, c) != -1
+}
+
func quoteWith(s string, quote byte, ASCIIonly, graphicOnly bool) string {
return string(appendQuotedWith(make([]byte, 0, 3*len(s)/2), s, quote, ASCIIonly, graphicOnly))
}
@@ -360,85 +364,132 @@ func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string,
return
}
+// QuotedPrefix returns the quoted string (as understood by Unquote) at the prefix of s.
+// If s does not start with a valid quoted string, QuotedPrefix returns an error.
+func QuotedPrefix(s string) (string, error) {
+ out, _, err := unquote(s, false)
+ return out, err
+}
+
// Unquote interprets s as a single-quoted, double-quoted,
// or backquoted Go string literal, returning the string value
// that s quotes. (If s is single-quoted, it would be a Go
// character literal; Unquote returns the corresponding
// one-character string.)
func Unquote(s string) (string, error) {
- n := len(s)
- if n < 2 {
+ out, rem, err := unquote(s, true)
+ if len(rem) > 0 {
return "", ErrSyntax
}
- quote := s[0]
- if quote != s[n-1] {
- return "", ErrSyntax
+ return out, err
+}
+
+// unquote parses a quoted string at the start of the input,
+// returning the parsed prefix, the remaining suffix, and any parse errors.
+// If unescape is true, the parsed prefix is unescaped,
+// otherwise the input prefix is provided verbatim.
+func unquote(in string, unescape bool) (out, rem string, err error) {
+ // Determine the quote form and optimistically find the terminating quote.
+ if len(in) < 2 {
+ return "", in, ErrSyntax
}
- s = s[1 : n-1]
+ quote := in[0]
+ end := index(in[1:], quote)
+ if end < 0 {
+ return "", in, ErrSyntax
+ }
+ end += 2 // position after terminating quote; may be wrong if escape sequences are present
- if quote == '`' {
- if contains(s, '`') {
- return "", ErrSyntax
+ switch quote {
+ case '`':
+ switch {
+ case !unescape:
+ out = in[:end] // include quotes
+ case !contains(in[:end], '\r'):
+ out = in[len("`") : end-len("`")] // exclude quotes
+ default:
+ // Carriage return characters ('\r') inside raw string literals
+ // are discarded from the raw string value.
+ buf := make([]byte, 0, end-len("`")-len("\r")-len("`"))
+ for i := len("`"); i < end-len("`"); i++ {
+ if in[i] != '\r' {
+ buf = append(buf, in[i])
+ }
+ }
+ out = string(buf)
}
- if contains(s, '\r') {
- // -1 because we know there is at least one \r to remove.
- buf := make([]byte, 0, len(s)-1)
- for i := 0; i < len(s); i++ {
- if s[i] != '\r' {
- buf = append(buf, s[i])
+ // NOTE: Prior implementations did not verify that raw strings consist
+ // of valid UTF-8 characters and we continue to not verify it as such.
+ // The Go specification does not explicitly require valid UTF-8,
+ // but only mention that it is implicitly valid for Go source code
+ // (which must be valid UTF-8).
+ return out, in[end:], nil
+ case '"', '\'':
+ // Handle quoted strings without any escape sequences.
+ if !contains(in[:end], '\\') && !contains(in[:end], '\n') {
+ var valid bool
+ switch quote {
+ case '"':
+ valid = utf8.ValidString(in[len(`"`) : end-len(`"`)])
+ case '\'':
+ r, n := utf8.DecodeRuneInString(in[len("'") : end-len("'")])
+ valid = len("'")+n+len("'") == end && (r != utf8.RuneError || n != 1)
+ }
+ if valid {
+ out = in[:end]
+ if unescape {
+ out = out[1 : end-1] // exclude quotes
}
+ return out, in[end:], nil
}
- return string(buf), nil
}
- return s, nil
- }
- if quote != '"' && quote != '\'' {
- return "", ErrSyntax
- }
- if contains(s, '\n') {
- return "", ErrSyntax
- }
- // Is it trivial? Avoid allocation.
- if !contains(s, '\\') && !contains(s, quote) {
- switch quote {
- case '"':
- if utf8.ValidString(s) {
- return s, nil
+ // Handle quoted strings with escape sequences.
+ var buf []byte
+ in0 := in
+ in = in[1:] // skip starting quote
+ if unescape {
+ buf = make([]byte, 0, 3*end/2) // try to avoid more allocations
+ }
+ for len(in) > 0 && in[0] != quote {
+ // Process the next character,
+ // rejecting any unescaped newline characters which are invalid.
+ r, multibyte, rem, err := UnquoteChar(in, quote)
+ if in[0] == '\n' || err != nil {
+ return "", in0, ErrSyntax
}
- case '\'':
- r, size := utf8.DecodeRuneInString(s)
- if size == len(s) && (r != utf8.RuneError || size != 1) {
- return s, nil
+ in = rem
+
+ // Append the character if unescaping the input.
+ if unescape {
+ if r < utf8.RuneSelf || !multibyte {
+ buf = append(buf, byte(r))
+ } else {
+ var arr [utf8.UTFMax]byte
+ n := utf8.EncodeRune(arr[:], r)
+ buf = append(buf, arr[:n]...)
+ }
}
- }
- }
- var runeTmp [utf8.UTFMax]byte
- buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
- for len(s) > 0 {
- c, multibyte, ss, err := UnquoteChar(s, quote)
- if err != nil {
- return "", err
+ // Single quoted strings must be a single character.
+ if quote == '\'' {
+ break
+ }
}
- s = ss
- if c < utf8.RuneSelf || !multibyte {
- buf = append(buf, byte(c))
- } else {
- n := utf8.EncodeRune(runeTmp[:], c)
- buf = append(buf, runeTmp[:n]...)
+
+ // Verify that the string ends with a terminating quote.
+ if !(len(in) > 0 && in[0] == quote) {
+ return "", in0, ErrSyntax
}
- if quote == '\'' && len(s) != 0 {
- // single-quoted must be single character
- return "", ErrSyntax
+ in = in[1:] // skip terminating quote
+
+ if unescape {
+ return string(buf), in, nil
}
+ return in0[:len(in0)-len(in)], in, nil
+ default:
+ return "", in, ErrSyntax
}
- return string(buf), nil
-}
-
-// contains reports whether the string contains the byte c.
-func contains(s string, c byte) bool {
- return bytealg.IndexByteString(s, c) != -1
}
// bsearch16 returns the smallest i such that a[i] >= x.
@@ -446,7 +497,7 @@ func contains(s string, c byte) bool {
func bsearch16(a []uint16, x uint16) int {
i, j := 0, len(a)
for i < j {
- h := i + (j-i)/2
+ h := i + (j-i)>>1
if a[h] < x {
i = h + 1
} else {
@@ -461,7 +512,7 @@ func bsearch16(a []uint16, x uint16) int {
func bsearch32(a []uint32, x uint32) int {
i, j := 0, len(a)
for i < j {
- h := i + (j-i)/2
+ h := i + (j-i)>>1
if a[h] < x {
i = h + 1
} else {