8 files changed, 38 insertions, 33 deletions
diff --git a/libgo/go/regexp/all_test.go b/libgo/go/regexp/all_test.go
index be7a2e7..c233cfa 100644
--- a/libgo/go/regexp/all_test.go
+++ b/libgo/go/regexp/all_test.go
@@ -372,6 +372,9 @@ var literalPrefixTests = []MetaTest{
 	{`^^0$$`, ``, ``, false},
 	{`^$^$`, ``, ``, false},
 	{`$$0^^`, ``, ``, false},
+	{`a\x{fffd}b`, ``, `a`, false},
+	{`\x{fffd}b`, ``, ``, false},
+	{"\ufffd", ``, ``, false},
 }
 
 func TestQuoteMeta(t *testing.T) {
diff --git a/libgo/go/regexp/exec2_test.go b/libgo/go/regexp/exec2_test.go
index 6444bc1..b6dac4a 100644
--- a/libgo/go/regexp/exec2_test.go
+++ b/libgo/go/regexp/exec2_test.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build !race
-// +build !race
 
 package regexp
 
diff --git a/libgo/go/regexp/exec_test.go b/libgo/go/regexp/exec_test.go
index 1f9a7a9..5f84426 100644
--- a/libgo/go/regexp/exec_test.go
+++ b/libgo/go/regexp/exec_test.go
@@ -294,12 +294,9 @@ func parseResult(t *testing.T, file string, lineno int, res string) []int {
 				out[n] = -1
 				out[n+1] = -1
 			} else {
-				k := strings.Index(pair, "-")
-				if k < 0 {
-					t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
-				}
-				lo, err1 := strconv.Atoi(pair[:k])
-				hi, err2 := strconv.Atoi(pair[k+1:])
+				loStr, hiStr, _ := strings.Cut(pair, "-")
+				lo, err1 := strconv.Atoi(loStr)
+				hi, err2 := strconv.Atoi(hiStr)
 				if err1 != nil || err2 != nil || lo > hi {
 					t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
 				}
@@ -457,12 +454,11 @@ Reading:
 				continue Reading
 			}
 		case ':':
-			i := strings.Index(flag[1:], ":")
-			if i < 0 {
+			var ok bool
+			if _, flag, ok = strings.Cut(flag[1:], ":"); !ok {
 				t.Logf("skip: %s", line)
 				continue Reading
 			}
-			flag = flag[1+i+1:]
 		case 'C', 'N', 'T', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 			t.Logf("skip: %s", line)
 			continue Reading
diff --git a/libgo/go/regexp/find_test.go b/libgo/go/regexp/find_test.go
index 64c2239..2edbe9b 100644
--- a/libgo/go/regexp/find_test.go
+++ b/libgo/go/regexp/find_test.go
@@ -116,6 +116,13 @@ var findTests = []FindTest{
 	{"\\`", "`", build(1, 0, 1)},
 	{"[\\`]+", "`", build(1, 0, 1)},
 
+	{"\ufffd", "\xff", build(1, 0, 1)},
+	{"\ufffd", "hello\xffworld", build(1, 5, 6)},
+	{`.*`, "hello\xffworld", build(1, 0, 11)},
+	{`\x{fffd}`, "\xc2\x00", build(1, 0, 1)},
+	{"[\ufffd]", "\xff", build(1, 0, 1)},
+	{`[\x{fffd}]`, "\xc2\x00", build(1, 0, 1)},
+
 	// long set of matches (longer than startSize)
 	{
 		".",
diff --git a/libgo/go/regexp/onepass.go b/libgo/go/regexp/onepass.go
index 2f3ce6f..bc47f4c 100644
--- a/libgo/go/regexp/onepass.go
+++ b/libgo/go/regexp/onepass.go
@@ -9,6 +9,7 @@ import (
 	"sort"
 	"strings"
 	"unicode"
+	"unicode/utf8"
 )
 
 // "One-pass" regexp execution.
@@ -55,7 +56,7 @@ func onePassPrefix(p *syntax.Prog) (prefix string, complete bool, pc uint32) {
 
 	// Have prefix; gather characters.
 	var buf strings.Builder
-	for iop(i) == syntax.InstRune && len(i.Rune) == 1 && syntax.Flags(i.Arg)&syntax.FoldCase == 0 {
+	for iop(i) == syntax.InstRune && len(i.Rune) == 1 && syntax.Flags(i.Arg)&syntax.FoldCase == 0 && i.Rune[0] != utf8.RuneError {
 		buf.WriteRune(i.Rune[0])
 		pc, i = i.Out, &p.Inst[i.Out]
 	}
diff --git a/libgo/go/regexp/regexp.go b/libgo/go/regexp/regexp.go
index b547a2a..f975bb3 100644
--- a/libgo/go/regexp/regexp.go
+++ b/libgo/go/regexp/regexp.go
@@ -20,6 +20,8 @@
 // or any book about automata theory.
 //
 // All characters are UTF-8-encoded code points.
+// Following utf8.DecodeRune, each byte of an invalid UTF-8 sequence
+// is treated as if it encoded utf8.RuneError (U+FFFD).
 //
 // There are 16 methods of Regexp that match a regular expression and identify
 // the matched text. Their names are matched by this regular expression:
@@ -40,7 +42,7 @@
 // successive submatches of the expression. Submatches are matches of
 // parenthesized subexpressions (also known as capturing groups) within the
 // regular expression, numbered from left to right in order of opening
-// parenthesis. Submatch 0 is the match of the entire expression, submatch 1
+// parenthesis. Submatch 0 is the match of the entire expression, submatch 1 is
 // the match of the first parenthesized subexpression, and so on.
 //
 // If 'Index' is present, matches and submatches are identified by byte index
@@ -276,7 +278,11 @@ func minInputLen(re *syntax.Regexp) int {
 	case syntax.OpLiteral:
 		l := 0
 		for _, r := range re.Rune {
-			l += utf8.RuneLen(r)
+			if r == utf8.RuneError {
+				l++
+			} else {
+				l += utf8.RuneLen(r)
+			}
 		}
 		return l
 	case syntax.OpCapture, syntax.OpPlus:
@@ -922,23 +928,22 @@ func (re *Regexp) ExpandString(dst []byte, template string, src string, match []
 
 func (re *Regexp) expand(dst []byte, template string, bsrc []byte, src string, match []int) []byte {
 	for len(template) > 0 {
-		i := strings.Index(template, "$")
-		if i < 0 {
+		before, after, ok := strings.Cut(template, "$")
+		if !ok {
 			break
 		}
-		dst = append(dst, template[:i]...)
-		template = template[i:]
-		if len(template) > 1 && template[1] == '$' {
+		dst = append(dst, before...)
+		template = after
+		if template != "" && template[0] == '$' {
 			// Treat $$ as $.
 			dst = append(dst, '$')
-			template = template[2:]
+			template = template[1:]
 			continue
 		}
 		name, num, rest, ok := extract(template)
 		if !ok {
 			// Malformed; treat $ as raw text.
 			dst = append(dst, '$')
-			template = template[1:]
 			continue
 		}
 		template = rest
@@ -967,17 +972,16 @@ func (re *Regexp) expand(dst []byte, template string, bsrc []byte, src string, m
 	return dst
 }
 
-// extract returns the name from a leading "$name" or "${name}" in str.
+// extract returns the name from a leading "name" or "{name}" in str.
+// (The $ has already been removed by the caller.)
 // If it is a number, extract returns num set to that number; otherwise num = -1.
 func extract(str string) (name string, num int, rest string, ok bool) {
-	if len(str) < 2 || str[0] != '$' {
+	if str == "" {
 		return
 	}
 	brace := false
-	if str[1] == '{' {
+	if str[0] == '{' {
 		brace = true
-		str = str[2:]
-	} else {
 		str = str[1:]
 	}
 	i := 0
diff --git a/libgo/go/regexp/syntax/parse.go b/libgo/go/regexp/syntax/parse.go
index 7b40309..06a92fb 100644
--- a/libgo/go/regexp/syntax/parse.go
+++ b/libgo/go/regexp/syntax/parse.go
@@ -824,13 +824,7 @@ func Parse(s string, flags Flags) (*Regexp, error) {
 				case 'Q':
 					// \Q ... \E: the ... is always literals
 					var lit string
-					if i := strings.Index(t, `\E`); i < 0 {
-						lit = t[2:]
-						t = ""
-					} else {
-						lit = t[2:i]
-						t = t[i+2:]
-					}
+					lit, t, _ = strings.Cut(t[2:], `\E`)
 					for lit != "" {
 						c, rest, err := nextRune(lit)
 						if err != nil {
diff --git a/libgo/go/regexp/syntax/prog.go b/libgo/go/regexp/syntax/prog.go
index ae7a9a2..8583f55 100644
--- a/libgo/go/regexp/syntax/prog.go
+++ b/libgo/go/regexp/syntax/prog.go
@@ -8,6 +8,7 @@ import (
 	"strconv"
 	"strings"
 	"unicode"
+	"unicode/utf8"
 )
 
 // Compiled program.
@@ -154,7 +155,7 @@ func (p *Prog) Prefix() (prefix string, complete bool) {
 
 	// Have prefix; gather characters.
 	var buf strings.Builder
-	for i.op() == InstRune && len(i.Rune) == 1 && Flags(i.Arg)&FoldCase == 0 {
+	for i.op() == InstRune && len(i.Rune) == 1 && Flags(i.Arg)&FoldCase == 0 && i.Rune[0] != utf8.RuneError {
 		buf.WriteRune(i.Rune[0])
 		i = p.skipNop(i.Out)
 	}