aboutsummaryrefslogtreecommitdiff
path: root/libgo/go/regexp/regexp.go
diff options
context:
space:
mode:
authorIan Lance Taylor <iant@golang.org>2022-02-11 14:53:56 -0800
committerIan Lance Taylor <iant@golang.org>2022-02-11 15:01:19 -0800
commit8dc2499aa62f768c6395c9754b8cabc1ce25c494 (patch)
tree43d7fd2bbfd7ad8c9625a718a5e8718889351994 /libgo/go/regexp/regexp.go
parent9a56779dbc4e2d9c15be8d31e36f2f59be7331a8 (diff)
downloadgcc-8dc2499aa62f768c6395c9754b8cabc1ce25c494.zip
gcc-8dc2499aa62f768c6395c9754b8cabc1ce25c494.tar.gz
gcc-8dc2499aa62f768c6395c9754b8cabc1ce25c494.tar.bz2
libgo: update to Go1.18beta2
gotools/ * Makefile.am (go_cmd_cgo_files): Add ast_go118.go (check-go-tool): Copy golang.org/x/tools directories. * Makefile.in: Regenerate. Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/384695
Diffstat (limited to 'libgo/go/regexp/regexp.go')
-rw-r--r--libgo/go/regexp/regexp.go32
1 files changed, 18 insertions, 14 deletions
diff --git a/libgo/go/regexp/regexp.go b/libgo/go/regexp/regexp.go
index b547a2a..f975bb3 100644
--- a/libgo/go/regexp/regexp.go
+++ b/libgo/go/regexp/regexp.go
@@ -20,6 +20,8 @@
// or any book about automata theory.
//
// All characters are UTF-8-encoded code points.
+// Following utf8.DecodeRune, each byte of an invalid UTF-8 sequence
+// is treated as if it encoded utf8.RuneError (U+FFFD).
//
// There are 16 methods of Regexp that match a regular expression and identify
// the matched text. Their names are matched by this regular expression:
@@ -40,7 +42,7 @@
// successive submatches of the expression. Submatches are matches of
// parenthesized subexpressions (also known as capturing groups) within the
// regular expression, numbered from left to right in order of opening
-// parenthesis. Submatch 0 is the match of the entire expression, submatch 1
+// parenthesis. Submatch 0 is the match of the entire expression, submatch 1 is
// the match of the first parenthesized subexpression, and so on.
//
// If 'Index' is present, matches and submatches are identified by byte index
@@ -276,7 +278,11 @@ func minInputLen(re *syntax.Regexp) int {
case syntax.OpLiteral:
l := 0
for _, r := range re.Rune {
- l += utf8.RuneLen(r)
+ if r == utf8.RuneError {
+ l++
+ } else {
+ l += utf8.RuneLen(r)
+ }
}
return l
case syntax.OpCapture, syntax.OpPlus:
@@ -922,23 +928,22 @@ func (re *Regexp) ExpandString(dst []byte, template string, src string, match []
func (re *Regexp) expand(dst []byte, template string, bsrc []byte, src string, match []int) []byte {
for len(template) > 0 {
- i := strings.Index(template, "$")
- if i < 0 {
+ before, after, ok := strings.Cut(template, "$")
+ if !ok {
break
}
- dst = append(dst, template[:i]...)
- template = template[i:]
- if len(template) > 1 && template[1] == '$' {
+ dst = append(dst, before...)
+ template = after
+ if template != "" && template[0] == '$' {
// Treat $$ as $.
dst = append(dst, '$')
- template = template[2:]
+ template = template[1:]
continue
}
name, num, rest, ok := extract(template)
if !ok {
// Malformed; treat $ as raw text.
dst = append(dst, '$')
- template = template[1:]
continue
}
template = rest
@@ -967,17 +972,16 @@ func (re *Regexp) expand(dst []byte, template string, bsrc []byte, src string, m
return dst
}
-// extract returns the name from a leading "$name" or "${name}" in str.
+// extract returns the name from a leading "name" or "{name}" in str.
+// (The $ has already been removed by the caller.)
// If it is a number, extract returns num set to that number; otherwise num = -1.
func extract(str string) (name string, num int, rest string, ok bool) {
- if len(str) < 2 || str[0] != '$' {
+ if str == "" {
return
}
brace := false
- if str[1] == '{' {
+ if str[0] == '{' {
brace = true
- str = str[2:]
- } else {
str = str[1:]
}
i := 0