From 8dc2499aa62f768c6395c9754b8cabc1ce25c494 Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Fri, 11 Feb 2022 14:53:56 -0800 Subject: libgo: update to Go1.18beta2 gotools/ * Makefile.am (go_cmd_cgo_files): Add ast_go118.go (check-go-tool): Copy golang.org/x/tools directories. * Makefile.in: Regenerate. Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/384695 --- libgo/go/regexp/all_test.go | 3 +++ libgo/go/regexp/exec2_test.go | 1 - libgo/go/regexp/exec_test.go | 14 +++++--------- libgo/go/regexp/find_test.go | 7 +++++++ libgo/go/regexp/onepass.go | 3 ++- libgo/go/regexp/regexp.go | 32 ++++++++++++++++++-------------- libgo/go/regexp/syntax/parse.go | 8 +------- libgo/go/regexp/syntax/prog.go | 3 ++- 8 files changed, 38 insertions(+), 33 deletions(-) (limited to 'libgo/go/regexp') diff --git a/libgo/go/regexp/all_test.go b/libgo/go/regexp/all_test.go index be7a2e7..c233cfa 100644 --- a/libgo/go/regexp/all_test.go +++ b/libgo/go/regexp/all_test.go @@ -372,6 +372,9 @@ var literalPrefixTests = []MetaTest{ {`^^0$$`, ``, ``, false}, {`^$^$`, ``, ``, false}, {`$$0^^`, ``, ``, false}, + {`a\x{fffd}b`, ``, `a`, false}, + {`\x{fffd}b`, ``, ``, false}, + {"\ufffd", ``, ``, false}, } func TestQuoteMeta(t *testing.T) { diff --git a/libgo/go/regexp/exec2_test.go b/libgo/go/regexp/exec2_test.go index 6444bc1..b6dac4a 100644 --- a/libgo/go/regexp/exec2_test.go +++ b/libgo/go/regexp/exec2_test.go @@ -3,7 +3,6 @@ // license that can be found in the LICENSE file. //go:build !race -// +build !race package regexp diff --git a/libgo/go/regexp/exec_test.go b/libgo/go/regexp/exec_test.go index 1f9a7a9..5f84426 100644 --- a/libgo/go/regexp/exec_test.go +++ b/libgo/go/regexp/exec_test.go @@ -294,12 +294,9 @@ func parseResult(t *testing.T, file string, lineno int, res string) []int { out[n] = -1 out[n+1] = -1 } else { - k := strings.Index(pair, "-") - if k < 0 { - t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair) - } - lo, err1 := strconv.Atoi(pair[:k]) - hi, err2 := strconv.Atoi(pair[k+1:]) + loStr, hiStr, _ := strings.Cut(pair, "-") + lo, err1 := strconv.Atoi(loStr) + hi, err2 := strconv.Atoi(hiStr) if err1 != nil || err2 != nil || lo > hi { t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair) } @@ -457,12 +454,11 @@ Reading: continue Reading } case ':': - i := strings.Index(flag[1:], ":") - if i < 0 { + var ok bool + if _, flag, ok = strings.Cut(flag[1:], ":"); !ok { t.Logf("skip: %s", line) continue Reading } - flag = flag[1+i+1:] case 'C', 'N', 'T', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': t.Logf("skip: %s", line) continue Reading diff --git a/libgo/go/regexp/find_test.go b/libgo/go/regexp/find_test.go index 64c2239..2edbe9b 100644 --- a/libgo/go/regexp/find_test.go +++ b/libgo/go/regexp/find_test.go @@ -116,6 +116,13 @@ var findTests = []FindTest{ {"\\`", "`", build(1, 0, 1)}, {"[\\`]+", "`", build(1, 0, 1)}, + {"\ufffd", "\xff", build(1, 0, 1)}, + {"\ufffd", "hello\xffworld", build(1, 5, 6)}, + {`.*`, "hello\xffworld", build(1, 0, 11)}, + {`\x{fffd}`, "\xc2\x00", build(1, 0, 1)}, + {"[\ufffd]", "\xff", build(1, 0, 1)}, + {`[\x{fffd}]`, "\xc2\x00", build(1, 0, 1)}, + // long set of matches (longer than startSize) { ".", diff --git a/libgo/go/regexp/onepass.go b/libgo/go/regexp/onepass.go index 2f3ce6f..bc47f4c 100644 --- a/libgo/go/regexp/onepass.go +++ b/libgo/go/regexp/onepass.go @@ -9,6 +9,7 @@ import ( "sort" "strings" "unicode" + "unicode/utf8" ) // "One-pass" regexp execution. @@ -55,7 +56,7 @@ func onePassPrefix(p *syntax.Prog) (prefix string, complete bool, pc uint32) { // Have prefix; gather characters. var buf strings.Builder - for iop(i) == syntax.InstRune && len(i.Rune) == 1 && syntax.Flags(i.Arg)&syntax.FoldCase == 0 { + for iop(i) == syntax.InstRune && len(i.Rune) == 1 && syntax.Flags(i.Arg)&syntax.FoldCase == 0 && i.Rune[0] != utf8.RuneError { buf.WriteRune(i.Rune[0]) pc, i = i.Out, &p.Inst[i.Out] } diff --git a/libgo/go/regexp/regexp.go b/libgo/go/regexp/regexp.go index b547a2a..f975bb3 100644 --- a/libgo/go/regexp/regexp.go +++ b/libgo/go/regexp/regexp.go @@ -20,6 +20,8 @@ // or any book about automata theory. // // All characters are UTF-8-encoded code points. +// Following utf8.DecodeRune, each byte of an invalid UTF-8 sequence +// is treated as if it encoded utf8.RuneError (U+FFFD). // // There are 16 methods of Regexp that match a regular expression and identify // the matched text. Their names are matched by this regular expression: @@ -40,7 +42,7 @@ // successive submatches of the expression. Submatches are matches of // parenthesized subexpressions (also known as capturing groups) within the // regular expression, numbered from left to right in order of opening -// parenthesis. Submatch 0 is the match of the entire expression, submatch 1 +// parenthesis. Submatch 0 is the match of the entire expression, submatch 1 is // the match of the first parenthesized subexpression, and so on. // // If 'Index' is present, matches and submatches are identified by byte index @@ -276,7 +278,11 @@ func minInputLen(re *syntax.Regexp) int { case syntax.OpLiteral: l := 0 for _, r := range re.Rune { - l += utf8.RuneLen(r) + if r == utf8.RuneError { + l++ + } else { + l += utf8.RuneLen(r) + } } return l case syntax.OpCapture, syntax.OpPlus: @@ -922,23 +928,22 @@ func (re *Regexp) ExpandString(dst []byte, template string, src string, match [] func (re *Regexp) expand(dst []byte, template string, bsrc []byte, src string, match []int) []byte { for len(template) > 0 { - i := strings.Index(template, "$") - if i < 0 { + before, after, ok := strings.Cut(template, "$") + if !ok { break } - dst = append(dst, template[:i]...) - template = template[i:] - if len(template) > 1 && template[1] == '$' { + dst = append(dst, before...) + template = after + if template != "" && template[0] == '$' { // Treat $$ as $. dst = append(dst, '$') - template = template[2:] + template = template[1:] continue } name, num, rest, ok := extract(template) if !ok { // Malformed; treat $ as raw text. dst = append(dst, '$') - template = template[1:] continue } template = rest @@ -967,17 +972,16 @@ func (re *Regexp) expand(dst []byte, template string, bsrc []byte, src string, m return dst } -// extract returns the name from a leading "$name" or "${name}" in str. +// extract returns the name from a leading "name" or "{name}" in str. +// (The $ has already been removed by the caller.) // If it is a number, extract returns num set to that number; otherwise num = -1. func extract(str string) (name string, num int, rest string, ok bool) { - if len(str) < 2 || str[0] != '$' { + if str == "" { return } brace := false - if str[1] == '{' { + if str[0] == '{' { brace = true - str = str[2:] - } else { str = str[1:] } i := 0 diff --git a/libgo/go/regexp/syntax/parse.go b/libgo/go/regexp/syntax/parse.go index 7b40309..06a92fb 100644 --- a/libgo/go/regexp/syntax/parse.go +++ b/libgo/go/regexp/syntax/parse.go @@ -824,13 +824,7 @@ func Parse(s string, flags Flags) (*Regexp, error) { case 'Q': // \Q ... \E: the ... is always literals var lit string - if i := strings.Index(t, `\E`); i < 0 { - lit = t[2:] - t = "" - } else { - lit = t[2:i] - t = t[i+2:] - } + lit, t, _ = strings.Cut(t[2:], `\E`) for lit != "" { c, rest, err := nextRune(lit) if err != nil { diff --git a/libgo/go/regexp/syntax/prog.go b/libgo/go/regexp/syntax/prog.go index ae7a9a2..8583f55 100644 --- a/libgo/go/regexp/syntax/prog.go +++ b/libgo/go/regexp/syntax/prog.go @@ -8,6 +8,7 @@ import ( "strconv" "strings" "unicode" + "unicode/utf8" ) // Compiled program. @@ -154,7 +155,7 @@ func (p *Prog) Prefix() (prefix string, complete bool) { // Have prefix; gather characters. var buf strings.Builder - for i.op() == InstRune && len(i.Rune) == 1 && Flags(i.Arg)&FoldCase == 0 { + for i.op() == InstRune && len(i.Rune) == 1 && Flags(i.Arg)&FoldCase == 0 && i.Rune[0] != utf8.RuneError { buf.WriteRune(i.Rune[0]) i = p.skipNop(i.Out) } -- cgit v1.1