aboutsummaryrefslogtreecommitdiff
path: root/libgo/go/regexp
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/go/regexp')
-rw-r--r--libgo/go/regexp/all_test.go3
-rw-r--r--libgo/go/regexp/exec2_test.go1
-rw-r--r--libgo/go/regexp/exec_test.go14
-rw-r--r--libgo/go/regexp/find_test.go7
-rw-r--r--libgo/go/regexp/onepass.go3
-rw-r--r--libgo/go/regexp/regexp.go32
-rw-r--r--libgo/go/regexp/syntax/parse.go8
-rw-r--r--libgo/go/regexp/syntax/prog.go3
8 files changed, 38 insertions, 33 deletions
diff --git a/libgo/go/regexp/all_test.go b/libgo/go/regexp/all_test.go
index be7a2e7..c233cfa 100644
--- a/libgo/go/regexp/all_test.go
+++ b/libgo/go/regexp/all_test.go
@@ -372,6 +372,9 @@ var literalPrefixTests = []MetaTest{
{`^^0$$`, ``, ``, false},
{`^$^$`, ``, ``, false},
{`$$0^^`, ``, ``, false},
+ {`a\x{fffd}b`, ``, `a`, false},
+ {`\x{fffd}b`, ``, ``, false},
+ {"\ufffd", ``, ``, false},
}
func TestQuoteMeta(t *testing.T) {
diff --git a/libgo/go/regexp/exec2_test.go b/libgo/go/regexp/exec2_test.go
index 6444bc1..b6dac4a 100644
--- a/libgo/go/regexp/exec2_test.go
+++ b/libgo/go/regexp/exec2_test.go
@@ -3,7 +3,6 @@
// license that can be found in the LICENSE file.
//go:build !race
-// +build !race
package regexp
diff --git a/libgo/go/regexp/exec_test.go b/libgo/go/regexp/exec_test.go
index 1f9a7a9..5f84426 100644
--- a/libgo/go/regexp/exec_test.go
+++ b/libgo/go/regexp/exec_test.go
@@ -294,12 +294,9 @@ func parseResult(t *testing.T, file string, lineno int, res string) []int {
out[n] = -1
out[n+1] = -1
} else {
- k := strings.Index(pair, "-")
- if k < 0 {
- t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
- }
- lo, err1 := strconv.Atoi(pair[:k])
- hi, err2 := strconv.Atoi(pair[k+1:])
+ loStr, hiStr, _ := strings.Cut(pair, "-")
+ lo, err1 := strconv.Atoi(loStr)
+ hi, err2 := strconv.Atoi(hiStr)
if err1 != nil || err2 != nil || lo > hi {
t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
}
@@ -457,12 +454,11 @@ Reading:
continue Reading
}
case ':':
- i := strings.Index(flag[1:], ":")
- if i < 0 {
+ var ok bool
+ if _, flag, ok = strings.Cut(flag[1:], ":"); !ok {
t.Logf("skip: %s", line)
continue Reading
}
- flag = flag[1+i+1:]
case 'C', 'N', 'T', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
t.Logf("skip: %s", line)
continue Reading
diff --git a/libgo/go/regexp/find_test.go b/libgo/go/regexp/find_test.go
index 64c2239..2edbe9b 100644
--- a/libgo/go/regexp/find_test.go
+++ b/libgo/go/regexp/find_test.go
@@ -116,6 +116,13 @@ var findTests = []FindTest{
{"\\`", "`", build(1, 0, 1)},
{"[\\`]+", "`", build(1, 0, 1)},
+ {"\ufffd", "\xff", build(1, 0, 1)},
+ {"\ufffd", "hello\xffworld", build(1, 5, 6)},
+ {`.*`, "hello\xffworld", build(1, 0, 11)},
+ {`\x{fffd}`, "\xc2\x00", build(1, 0, 1)},
+ {"[\ufffd]", "\xff", build(1, 0, 1)},
+ {`[\x{fffd}]`, "\xc2\x00", build(1, 0, 1)},
+
// long set of matches (longer than startSize)
{
".",
diff --git a/libgo/go/regexp/onepass.go b/libgo/go/regexp/onepass.go
index 2f3ce6f..bc47f4c 100644
--- a/libgo/go/regexp/onepass.go
+++ b/libgo/go/regexp/onepass.go
@@ -9,6 +9,7 @@ import (
"sort"
"strings"
"unicode"
+ "unicode/utf8"
)
// "One-pass" regexp execution.
@@ -55,7 +56,7 @@ func onePassPrefix(p *syntax.Prog) (prefix string, complete bool, pc uint32) {
// Have prefix; gather characters.
var buf strings.Builder
- for iop(i) == syntax.InstRune && len(i.Rune) == 1 && syntax.Flags(i.Arg)&syntax.FoldCase == 0 {
+ for iop(i) == syntax.InstRune && len(i.Rune) == 1 && syntax.Flags(i.Arg)&syntax.FoldCase == 0 && i.Rune[0] != utf8.RuneError {
buf.WriteRune(i.Rune[0])
pc, i = i.Out, &p.Inst[i.Out]
}
diff --git a/libgo/go/regexp/regexp.go b/libgo/go/regexp/regexp.go
index b547a2a..f975bb3 100644
--- a/libgo/go/regexp/regexp.go
+++ b/libgo/go/regexp/regexp.go
@@ -20,6 +20,8 @@
// or any book about automata theory.
//
// All characters are UTF-8-encoded code points.
+// Following utf8.DecodeRune, each byte of an invalid UTF-8 sequence
+// is treated as if it encoded utf8.RuneError (U+FFFD).
//
// There are 16 methods of Regexp that match a regular expression and identify
// the matched text. Their names are matched by this regular expression:
@@ -40,7 +42,7 @@
// successive submatches of the expression. Submatches are matches of
// parenthesized subexpressions (also known as capturing groups) within the
// regular expression, numbered from left to right in order of opening
-// parenthesis. Submatch 0 is the match of the entire expression, submatch 1
+// parenthesis. Submatch 0 is the match of the entire expression, submatch 1 is
// the match of the first parenthesized subexpression, and so on.
//
// If 'Index' is present, matches and submatches are identified by byte index
@@ -276,7 +278,11 @@ func minInputLen(re *syntax.Regexp) int {
case syntax.OpLiteral:
l := 0
for _, r := range re.Rune {
- l += utf8.RuneLen(r)
+ if r == utf8.RuneError {
+ l++
+ } else {
+ l += utf8.RuneLen(r)
+ }
}
return l
case syntax.OpCapture, syntax.OpPlus:
@@ -922,23 +928,22 @@ func (re *Regexp) ExpandString(dst []byte, template string, src string, match []
func (re *Regexp) expand(dst []byte, template string, bsrc []byte, src string, match []int) []byte {
for len(template) > 0 {
- i := strings.Index(template, "$")
- if i < 0 {
+ before, after, ok := strings.Cut(template, "$")
+ if !ok {
break
}
- dst = append(dst, template[:i]...)
- template = template[i:]
- if len(template) > 1 && template[1] == '$' {
+ dst = append(dst, before...)
+ template = after
+ if template != "" && template[0] == '$' {
// Treat $$ as $.
dst = append(dst, '$')
- template = template[2:]
+ template = template[1:]
continue
}
name, num, rest, ok := extract(template)
if !ok {
// Malformed; treat $ as raw text.
dst = append(dst, '$')
- template = template[1:]
continue
}
template = rest
@@ -967,17 +972,16 @@ func (re *Regexp) expand(dst []byte, template string, bsrc []byte, src string, m
return dst
}
-// extract returns the name from a leading "$name" or "${name}" in str.
+// extract returns the name from a leading "name" or "{name}" in str.
+// (The $ has already been removed by the caller.)
// If it is a number, extract returns num set to that number; otherwise num = -1.
func extract(str string) (name string, num int, rest string, ok bool) {
- if len(str) < 2 || str[0] != '$' {
+ if str == "" {
return
}
brace := false
- if str[1] == '{' {
+ if str[0] == '{' {
brace = true
- str = str[2:]
- } else {
str = str[1:]
}
i := 0
diff --git a/libgo/go/regexp/syntax/parse.go b/libgo/go/regexp/syntax/parse.go
index 7b40309..06a92fb 100644
--- a/libgo/go/regexp/syntax/parse.go
+++ b/libgo/go/regexp/syntax/parse.go
@@ -824,13 +824,7 @@ func Parse(s string, flags Flags) (*Regexp, error) {
case 'Q':
// \Q ... \E: the ... is always literals
var lit string
- if i := strings.Index(t, `\E`); i < 0 {
- lit = t[2:]
- t = ""
- } else {
- lit = t[2:i]
- t = t[i+2:]
- }
+ lit, t, _ = strings.Cut(t[2:], `\E`)
for lit != "" {
c, rest, err := nextRune(lit)
if err != nil {
diff --git a/libgo/go/regexp/syntax/prog.go b/libgo/go/regexp/syntax/prog.go
index ae7a9a2..8583f55 100644
--- a/libgo/go/regexp/syntax/prog.go
+++ b/libgo/go/regexp/syntax/prog.go
@@ -8,6 +8,7 @@ import (
"strconv"
"strings"
"unicode"
+ "unicode/utf8"
)
// Compiled program.
@@ -154,7 +155,7 @@ func (p *Prog) Prefix() (prefix string, complete bool) {
// Have prefix; gather characters.
var buf strings.Builder
- for i.op() == InstRune && len(i.Rune) == 1 && Flags(i.Arg)&FoldCase == 0 {
+ for i.op() == InstRune && len(i.Rune) == 1 && Flags(i.Arg)&FoldCase == 0 && i.Rune[0] != utf8.RuneError {
buf.WriteRune(i.Rune[0])
i = p.skipNop(i.Out)
}