diff options
author | Ian Lance Taylor <iant@golang.org> | 2022-02-11 14:53:56 -0800 |
---|---|---|
committer | Ian Lance Taylor <iant@golang.org> | 2022-02-11 15:01:19 -0800 |
commit | 8dc2499aa62f768c6395c9754b8cabc1ce25c494 (patch) | |
tree | 43d7fd2bbfd7ad8c9625a718a5e8718889351994 /libgo/go/regexp/regexp.go | |
parent | 9a56779dbc4e2d9c15be8d31e36f2f59be7331a8 (diff) | |
download | gcc-8dc2499aa62f768c6395c9754b8cabc1ce25c494.zip gcc-8dc2499aa62f768c6395c9754b8cabc1ce25c494.tar.gz gcc-8dc2499aa62f768c6395c9754b8cabc1ce25c494.tar.bz2 |
libgo: update to Go1.18beta2
gotools/
* Makefile.am (go_cmd_cgo_files): Add ast_go118.go
(check-go-tool): Copy golang.org/x/tools directories.
* Makefile.in: Regenerate.
Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/384695
Diffstat (limited to 'libgo/go/regexp/regexp.go')
-rw-r--r-- | libgo/go/regexp/regexp.go | 32 |
1 files changed, 18 insertions, 14 deletions
diff --git a/libgo/go/regexp/regexp.go b/libgo/go/regexp/regexp.go index b547a2a..f975bb3 100644 --- a/libgo/go/regexp/regexp.go +++ b/libgo/go/regexp/regexp.go @@ -20,6 +20,8 @@ // or any book about automata theory. // // All characters are UTF-8-encoded code points. +// Following utf8.DecodeRune, each byte of an invalid UTF-8 sequence +// is treated as if it encoded utf8.RuneError (U+FFFD). // // There are 16 methods of Regexp that match a regular expression and identify // the matched text. Their names are matched by this regular expression: @@ -40,7 +42,7 @@ // successive submatches of the expression. Submatches are matches of // parenthesized subexpressions (also known as capturing groups) within the // regular expression, numbered from left to right in order of opening -// parenthesis. Submatch 0 is the match of the entire expression, submatch 1 +// parenthesis. Submatch 0 is the match of the entire expression, submatch 1 is // the match of the first parenthesized subexpression, and so on. // // If 'Index' is present, matches and submatches are identified by byte index @@ -276,7 +278,11 @@ func minInputLen(re *syntax.Regexp) int { case syntax.OpLiteral: l := 0 for _, r := range re.Rune { - l += utf8.RuneLen(r) + if r == utf8.RuneError { + l++ + } else { + l += utf8.RuneLen(r) + } } return l case syntax.OpCapture, syntax.OpPlus: @@ -922,23 +928,22 @@ func (re *Regexp) ExpandString(dst []byte, template string, src string, match [] func (re *Regexp) expand(dst []byte, template string, bsrc []byte, src string, match []int) []byte { for len(template) > 0 { - i := strings.Index(template, "$") - if i < 0 { + before, after, ok := strings.Cut(template, "$") + if !ok { break } - dst = append(dst, template[:i]...) - template = template[i:] - if len(template) > 1 && template[1] == '$' { + dst = append(dst, before...) + template = after + if template != "" && template[0] == '$' { // Treat $$ as $. dst = append(dst, '$') - template = template[2:] + template = template[1:] continue } name, num, rest, ok := extract(template) if !ok { // Malformed; treat $ as raw text. dst = append(dst, '$') - template = template[1:] continue } template = rest @@ -967,17 +972,16 @@ func (re *Regexp) expand(dst []byte, template string, bsrc []byte, src string, m return dst } -// extract returns the name from a leading "$name" or "${name}" in str. +// extract returns the name from a leading "name" or "{name}" in str. +// (The $ has already been removed by the caller.) // If it is a number, extract returns num set to that number; otherwise num = -1. func extract(str string) (name string, num int, rest string, ok bool) { - if len(str) < 2 || str[0] != '$' { + if str == "" { return } brace := false - if str[1] == '{' { + if str[0] == '{' { brace = true - str = str[2:] - } else { str = str[1:] } i := 0 |