diff options
author | Ian Lance Taylor <iant@golang.org> | 2019-09-06 18:12:46 +0000 |
---|---|---|
committer | Ian Lance Taylor <ian@gcc.gnu.org> | 2019-09-06 18:12:46 +0000 |
commit | aa8901e9bb0399d2c16f988ba2fe46eb0c0c5d13 (patch) | |
tree | 7e63b06d1eec92beec6997c9d3ab47a5d6a835be /libgo/go/regexp | |
parent | 920ea3b8ba3164b61ac9490dfdfceb6936eda6dd (diff) | |
download | gcc-aa8901e9bb0399d2c16f988ba2fe46eb0c0c5d13.zip gcc-aa8901e9bb0399d2c16f988ba2fe46eb0c0c5d13.tar.gz gcc-aa8901e9bb0399d2c16f988ba2fe46eb0c0c5d13.tar.bz2 |
libgo: update to Go 1.13beta1 release
Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/193497
From-SVN: r275473
Diffstat (limited to 'libgo/go/regexp')
-rw-r--r-- | libgo/go/regexp/all_test.go | 47 | ||||
-rw-r--r-- | libgo/go/regexp/exec.go | 4 | ||||
-rw-r--r-- | libgo/go/regexp/exec_test.go | 1 | ||||
-rw-r--r-- | libgo/go/regexp/find_test.go | 20 | ||||
-rw-r--r-- | libgo/go/regexp/onepass_test.go | 10 | ||||
-rw-r--r-- | libgo/go/regexp/regexp.go | 53 | ||||
-rw-r--r-- | libgo/go/regexp/syntax/parse_test.go | 1 | ||||
-rw-r--r-- | libgo/go/regexp/syntax/regexp.go | 2 |
8 files changed, 116 insertions, 22 deletions
diff --git a/libgo/go/regexp/all_test.go b/libgo/go/regexp/all_test.go index 623f82d..626a691 100644 --- a/libgo/go/regexp/all_test.go +++ b/libgo/go/regexp/all_test.go @@ -860,6 +860,25 @@ func BenchmarkQuoteMetaNone(b *testing.B) { } } +var compileBenchData = []struct{ name, re string }{ + {"Onepass", `^a.[l-nA-Cg-j]?e$`}, + {"Medium", `^((a|b|[d-z0-9])*(日){4,5}.)+$`}, + {"Hard", strings.Repeat(`((abc)*|`, 50) + strings.Repeat(`)`, 50)}, +} + +func BenchmarkCompile(b *testing.B) { + for _, data := range compileBenchData { + b.Run(data.name, func(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + if _, err := Compile(data.re); err != nil { + b.Fatal(err) + } + } + }) + } +} + func TestDeepEqual(t *testing.T) { re1 := MustCompile("a.*b.*c.*d") re2 := MustCompile("a.*b.*c.*d") @@ -882,3 +901,31 @@ func TestDeepEqual(t *testing.T) { t.Errorf("DeepEqual(re1, re2) = false, want true") } } + +var minInputLenTests = []struct { + Regexp string + min int +}{ + {``, 0}, + {`a`, 1}, + {`aa`, 2}, + {`(aa)a`, 3}, + {`(?:aa)a`, 3}, + {`a?a`, 1}, + {`(aaa)|(aa)`, 2}, + {`(aa)+a`, 3}, + {`(aa)*a`, 1}, + {`(aa){3,5}`, 6}, + {`[a-z]`, 1}, + {`日`, 3}, +} + +func TestMinInputLen(t *testing.T) { + for _, tt := range minInputLenTests { + re, _ := syntax.Parse(tt.Regexp, syntax.Perl) + m := minInputLen(re) + if m != tt.min { + t.Errorf("regexp %#q has minInputLen %d, should be %d", tt.Regexp, m, tt.min) + } + } +} diff --git a/libgo/go/regexp/exec.go b/libgo/go/regexp/exec.go index efe764e..4411e4c 100644 --- a/libgo/go/regexp/exec.go +++ b/libgo/go/regexp/exec.go @@ -524,6 +524,10 @@ func (re *Regexp) doExecute(r io.RuneReader, b []byte, s string, pos int, ncap i dstCap = arrayNoInts[:0:0] } + if r == nil && len(b)+len(s) < re.minInputLen { + return nil + } + if re.onepass != nil { return re.doOnePass(r, b, s, pos, ncap, dstCap) } diff --git a/libgo/go/regexp/exec_test.go b/libgo/go/regexp/exec_test.go index 1489219..1e87955 100644 --- a/libgo/go/regexp/exec_test.go +++ b/libgo/go/regexp/exec_test.go @@ -717,6 +717,7 @@ var benchSizes = []struct { name string n int }{ + {"16", 16}, {"32", 32}, {"1K", 1 << 10}, {"32K", 32 << 10}, diff --git a/libgo/go/regexp/find_test.go b/libgo/go/regexp/find_test.go index e07eb7d..87c49b0 100644 --- a/libgo/go/regexp/find_test.go +++ b/libgo/go/regexp/find_test.go @@ -161,6 +161,9 @@ func TestFind(t *testing.T) { t.Errorf("expected match; got none: %s", test) case test.matches != nil && result != nil: expect := test.text[test.matches[0][0]:test.matches[0][1]] + if len(result) != cap(result) { + t.Errorf("expected capacity %d got %d: %s", len(result), cap(result), test) + } if expect != string(result) { t.Errorf("expected %q got %q: %s", expect, result, test) } @@ -242,9 +245,13 @@ func TestFindAll(t *testing.T) { continue } for k, e := range test.matches { + got := result[k] + if len(got) != cap(got) { + t.Errorf("match %d: expected capacity %d got %d: %s", k, len(got), cap(got), test) + } expect := test.text[e[0]:e[1]] - if expect != string(result[k]) { - t.Errorf("match %d: expected %q got %q: %s", k, expect, result[k], test) + if expect != string(got) { + t.Errorf("match %d: expected %q got %q: %s", k, expect, got, test) } } } @@ -323,9 +330,14 @@ func testSubmatchBytes(test *FindTest, n int, submatches []int, result [][]byte, } continue } + got := result[k/2] + if len(got) != cap(got) { + t.Errorf("match %d: expected capacity %d got %d: %s", n, len(got), cap(got), test) + return + } expect := test.text[submatches[k]:submatches[k+1]] - if expect != string(result[k/2]) { - t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test) + if expect != string(got) { + t.Errorf("match %d: expected %q got %q: %s", n, expect, got, test) return } } diff --git a/libgo/go/regexp/onepass_test.go b/libgo/go/regexp/onepass_test.go index a0f2e39..32264d5 100644 --- a/libgo/go/regexp/onepass_test.go +++ b/libgo/go/regexp/onepass_test.go @@ -223,13 +223,3 @@ func TestRunOnePass(t *testing.T) { } } } - -func BenchmarkCompileOnepass(b *testing.B) { - b.ReportAllocs() - const re = `^a.[l-nA-Cg-j]?e$` - for i := 0; i < b.N; i++ { - if _, err := Compile(re); err != nil { - b.Fatal(err) - } - } -} diff --git a/libgo/go/regexp/regexp.go b/libgo/go/regexp/regexp.go index 38b3c86..19ca6f2 100644 --- a/libgo/go/regexp/regexp.go +++ b/libgo/go/regexp/regexp.go @@ -46,9 +46,10 @@ // If 'Index' is present, matches and submatches are identified by byte index // pairs within the input string: result[2*n:2*n+1] identifies the indexes of // the nth submatch. The pair for n==0 identifies the match of the entire -// expression. If 'Index' is not present, the match is identified by the -// text of the match/submatch. If an index is negative, it means that -// subexpression did not match any string in the input. +// expression. If 'Index' is not present, the match is identified by the text +// of the match/submatch. If an index is negative or text is nil, it means that +// subexpression did not match any string in the input. For 'String' versions +// an empty string means either no match or an empty match. // // There is also a subset of the methods that can be applied to text read // from a RuneReader: @@ -93,6 +94,7 @@ type Regexp struct { matchcap int // size of recorded match lengths prefixComplete bool // prefix is the entire regexp cond syntax.EmptyOp // empty-width conditions required at start of match + minInputLen int // minimum length of the input in bytes // This field can be modified by the Longest method, // but it is otherwise read-only. @@ -190,6 +192,7 @@ func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) { cond: prog.StartCond(), longest: longest, matchcap: matchcap, + minInputLen: minInputLen(re), } if regexp.onepass == nil { regexp.prefix, regexp.prefixComplete = prog.Prefix() @@ -263,6 +266,42 @@ func (re *Regexp) put(m *machine) { matchPool[re.mpool].Put(m) } +// minInputLen walks the regexp to find the minimum length of any matchable input +func minInputLen(re *syntax.Regexp) int { + switch re.Op { + default: + return 0 + case syntax.OpAnyChar, syntax.OpAnyCharNotNL, syntax.OpCharClass: + return 1 + case syntax.OpLiteral: + l := 0 + for _, r := range re.Rune { + l += utf8.RuneLen(r) + } + return l + case syntax.OpCapture, syntax.OpPlus: + return minInputLen(re.Sub[0]) + case syntax.OpRepeat: + return re.Min * minInputLen(re.Sub[0]) + case syntax.OpConcat: + l := 0 + for _, sub := range re.Sub { + l += minInputLen(sub) + } + return l + case syntax.OpAlternate: + l := minInputLen(re.Sub[0]) + var lnext int + for _, sub := range re.Sub[1:] { + lnext = minInputLen(sub) + if lnext < l { + l = lnext + } + } + return l + } +} + // MustCompile is like Compile but panics if the expression cannot be parsed. // It simplifies safe initialization of global variables holding compiled regular // expressions. @@ -761,7 +800,7 @@ func (re *Regexp) Find(b []byte) []byte { if a == nil { return nil } - return b[a[0]:a[1]] + return b[a[0]:a[1]:a[1]] } // FindIndex returns a two-element slice of integers defining the location of @@ -829,7 +868,7 @@ func (re *Regexp) FindSubmatch(b []byte) [][]byte { ret := make([][]byte, 1+re.numSubexp) for i := range ret { if 2*i < len(a) && a[2*i] >= 0 { - ret[i] = b[a[2*i]:a[2*i+1]] + ret[i] = b[a[2*i]:a[2*i+1]:a[2*i+1]] } } return ret @@ -1025,7 +1064,7 @@ func (re *Regexp) FindAll(b []byte, n int) [][]byte { if result == nil { result = make([][]byte, 0, startSize) } - result = append(result, b[match[0]:match[1]]) + result = append(result, b[match[0]:match[1]:match[1]]) }) return result } @@ -1100,7 +1139,7 @@ func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte { slice := make([][]byte, len(match)/2) for j := range slice { if match[2*j] >= 0 { - slice[j] = b[match[2*j]:match[2*j+1]] + slice[j] = b[match[2*j]:match[2*j+1]:match[2*j+1]] } } result = append(result, slice) diff --git a/libgo/go/regexp/syntax/parse_test.go b/libgo/go/regexp/syntax/parse_test.go index fe3d251..5581ba1 100644 --- a/libgo/go/regexp/syntax/parse_test.go +++ b/libgo/go/regexp/syntax/parse_test.go @@ -185,6 +185,7 @@ var parseTests = []parseTest{ {`(?-s).`, `dnl{}`}, {`(?:(?:^).)`, `cat{bol{}dot{}}`}, {`(?-s)(?:(?:^).)`, `cat{bol{}dnl{}}`}, + {`[\s\S]a`, `cat{cc{0x0-0x10ffff}lit{a}}`}, // RE2 prefix_tests {`abc|abd`, `cat{str{ab}cc{0x63-0x64}}`}, diff --git a/libgo/go/regexp/syntax/regexp.go b/libgo/go/regexp/syntax/regexp.go index ae5fa05..3a4d2d2 100644 --- a/libgo/go/regexp/syntax/regexp.go +++ b/libgo/go/regexp/syntax/regexp.go @@ -139,7 +139,7 @@ func writeRegexp(b *strings.Builder, re *Regexp) { b.WriteRune('[') if len(re.Rune) == 0 { b.WriteString(`^\x00-\x{10FFFF}`) - } else if re.Rune[0] == 0 && re.Rune[len(re.Rune)-1] == unicode.MaxRune { + } else if re.Rune[0] == 0 && re.Rune[len(re.Rune)-1] == unicode.MaxRune && len(re.Rune) > 2 { // Contains 0 and MaxRune. Probably a negated class. // Print the gaps. b.WriteRune('^') |