diff options
author | Ian Lance Taylor <ian@gcc.gnu.org> | 2012-01-12 01:31:45 +0000 |
---|---|---|
committer | Ian Lance Taylor <ian@gcc.gnu.org> | 2012-01-12 01:31:45 +0000 |
commit | 9a0e3259f44ad2de9c65f14f756dab01b3598391 (patch) | |
tree | 86a3b8019380d5fad53258c4baba3dd9e1e7c736 /libgo/go/regexp | |
parent | c6135f063335419e4b5df0b4e1caf145882c8a4b (diff) | |
download | gcc-9a0e3259f44ad2de9c65f14f756dab01b3598391.zip gcc-9a0e3259f44ad2de9c65f14f756dab01b3598391.tar.gz gcc-9a0e3259f44ad2de9c65f14f756dab01b3598391.tar.bz2 |
libgo: Update to weekly.2011-12-14.
From-SVN: r183118
Diffstat (limited to 'libgo/go/regexp')
-rw-r--r-- | libgo/go/regexp/exec.go | 40 | ||||
-rw-r--r-- | libgo/go/regexp/exec_test.go | 65 | ||||
-rw-r--r-- | libgo/go/regexp/regexp.go | 50 | ||||
-rw-r--r-- | libgo/go/regexp/syntax/parse.go | 8 |
4 files changed, 78 insertions, 85 deletions
diff --git a/libgo/go/regexp/exec.go b/libgo/go/regexp/exec.go index d7057a1..e16a1b5 100644 --- a/libgo/go/regexp/exec.go +++ b/libgo/go/regexp/exec.go @@ -1,6 +1,9 @@ package regexp -import "regexp/syntax" +import ( + "io" + "regexp/syntax" +) // A queue is a 'sparse array' holding pending threads of execution. // See http://research.swtch.com/2008/03/using-uninitialized-memory-for-fun-and.html @@ -34,6 +37,28 @@ type machine struct { pool []*thread // pool of available threads matched bool // whether a match was found matchcap []int // capture information for the match + + // cached inputs, to avoid allocation + inputBytes inputBytes + inputString inputString + inputReader inputReader +} + +func (m *machine) newInputBytes(b []byte) input { + m.inputBytes.str = b + return &m.inputBytes +} + +func (m *machine) newInputString(s string) input { + m.inputString.str = s + return &m.inputString +} + +func (m *machine) newInputReader(r io.RuneReader) input { + m.inputReader.r = r + m.inputReader.atEOT = false + m.inputReader.pos = 0 + return &m.inputReader } // progMachine returns a new machine running the prog p. @@ -74,6 +99,9 @@ func (m *machine) alloc(i *syntax.Inst) *thread { // free returns t to the free pool. func (m *machine) free(t *thread) { + m.inputBytes.str = nil + m.inputString.str = "" + m.inputReader.r = nil m.pool = append(m.pool, t) } @@ -287,8 +315,16 @@ var empty = make([]int, 0) // doExecute finds the leftmost match in the input and returns // the position of its subexpressions. -func (re *Regexp) doExecute(i input, pos int, ncap int) []int { +func (re *Regexp) doExecute(r io.RuneReader, b []byte, s string, pos int, ncap int) []int { m := re.get() + var i input + if r != nil { + i = m.newInputReader(r) + } else if b != nil { + i = m.newInputBytes(b) + } else { + i = m.newInputString(s) + } m.init(ncap) if !m.match(i, pos) { re.put(m) diff --git a/libgo/go/regexp/exec_test.go b/libgo/go/regexp/exec_test.go index d981f54..312bf02 100644 --- a/libgo/go/regexp/exec_test.go +++ b/libgo/go/regexp/exec_test.go @@ -10,7 +10,6 @@ import ( "fmt" "io" "math/rand" - old "old/regexp" "os" "path/filepath" "regexp/syntax" @@ -679,18 +678,6 @@ func benchmark(b *testing.B, re string, n int) { } } -func benchold(b *testing.B, re string, n int) { - r := old.MustCompile(re) - t := makeText(n) - b.ResetTimer() - b.SetBytes(int64(n)) - for i := 0; i < b.N; i++ { - if r.Match(t) { - panic("match!") - } - } -} - const ( easy0 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ$" easy1 = "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$" @@ -700,35 +687,23 @@ const ( "(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$" ) -func BenchmarkMatchEasy0_1K(b *testing.B) { benchmark(b, easy0, 1<<10) } -func BenchmarkMatchEasy0_1K_Old(b *testing.B) { benchold(b, easy0, 1<<10) } -func BenchmarkMatchEasy0_1M(b *testing.B) { benchmark(b, easy0, 1<<20) } -func BenchmarkMatchEasy0_1M_Old(b *testing.B) { benchold(b, easy0, 1<<20) } -func BenchmarkMatchEasy0_32K(b *testing.B) { benchmark(b, easy0, 32<<10) } -func BenchmarkMatchEasy0_32K_Old(b *testing.B) { benchold(b, easy0, 32<<10) } -func BenchmarkMatchEasy0_32M(b *testing.B) { benchmark(b, easy0, 32<<20) } -func BenchmarkMatchEasy0_32M_Old(b *testing.B) { benchold(b, easy0, 32<<20) } -func BenchmarkMatchEasy1_1K(b *testing.B) { benchmark(b, easy1, 1<<10) } -func BenchmarkMatchEasy1_1K_Old(b *testing.B) { benchold(b, easy1, 1<<10) } -func BenchmarkMatchEasy1_1M(b *testing.B) { benchmark(b, easy1, 1<<20) } -func BenchmarkMatchEasy1_1M_Old(b *testing.B) { benchold(b, easy1, 1<<20) } -func BenchmarkMatchEasy1_32K(b *testing.B) { benchmark(b, easy1, 32<<10) } -func BenchmarkMatchEasy1_32K_Old(b *testing.B) { benchold(b, easy1, 32<<10) } -func BenchmarkMatchEasy1_32M(b *testing.B) { benchmark(b, easy1, 32<<20) } -func BenchmarkMatchEasy1_32M_Old(b *testing.B) { benchold(b, easy1, 32<<20) } -func BenchmarkMatchMedium_1K(b *testing.B) { benchmark(b, medium, 1<<10) } -func BenchmarkMatchMedium_1K_Old(b *testing.B) { benchold(b, medium, 1<<10) } -func BenchmarkMatchMedium_1M(b *testing.B) { benchmark(b, medium, 1<<20) } -func BenchmarkMatchMedium_1M_Old(b *testing.B) { benchold(b, medium, 1<<20) } -func BenchmarkMatchMedium_32K(b *testing.B) { benchmark(b, medium, 32<<10) } -func BenchmarkMatchMedium_32K_Old(b *testing.B) { benchold(b, medium, 32<<10) } -func BenchmarkMatchMedium_32M(b *testing.B) { benchmark(b, medium, 32<<20) } -func BenchmarkMatchMedium_32M_Old(b *testing.B) { benchold(b, medium, 32<<20) } -func BenchmarkMatchHard_1K(b *testing.B) { benchmark(b, hard, 1<<10) } -func BenchmarkMatchHard_1K_Old(b *testing.B) { benchold(b, hard, 1<<10) } -func BenchmarkMatchHard_1M(b *testing.B) { benchmark(b, hard, 1<<20) } -func BenchmarkMatchHard_1M_Old(b *testing.B) { benchold(b, hard, 1<<20) } -func BenchmarkMatchHard_32K(b *testing.B) { benchmark(b, hard, 32<<10) } -func BenchmarkMatchHard_32K_Old(b *testing.B) { benchold(b, hard, 32<<10) } -func BenchmarkMatchHard_32M(b *testing.B) { benchmark(b, hard, 32<<20) } -func BenchmarkMatchHard_32M_Old(b *testing.B) { benchold(b, hard, 32<<20) } +func BenchmarkMatchEasy0_32(b *testing.B) { benchmark(b, easy0, 32<<0) } +func BenchmarkMatchEasy0_1K(b *testing.B) { benchmark(b, easy0, 1<<10) } +func BenchmarkMatchEasy0_32K(b *testing.B) { benchmark(b, easy0, 32<<10) } +func BenchmarkMatchEasy0_1M(b *testing.B) { benchmark(b, easy0, 1<<20) } +func BenchmarkMatchEasy0_32M(b *testing.B) { benchmark(b, easy0, 32<<20) } +func BenchmarkMatchEasy1_32(b *testing.B) { benchmark(b, easy1, 32<<0) } +func BenchmarkMatchEasy1_1K(b *testing.B) { benchmark(b, easy1, 1<<10) } +func BenchmarkMatchEasy1_32K(b *testing.B) { benchmark(b, easy1, 32<<10) } +func BenchmarkMatchEasy1_1M(b *testing.B) { benchmark(b, easy1, 1<<20) } +func BenchmarkMatchEasy1_32M(b *testing.B) { benchmark(b, easy1, 32<<20) } +func BenchmarkMatchMedium_32(b *testing.B) { benchmark(b, medium, 1<<0) } +func BenchmarkMatchMedium_1K(b *testing.B) { benchmark(b, medium, 1<<10) } +func BenchmarkMatchMedium_32K(b *testing.B) { benchmark(b, medium, 32<<10) } +func BenchmarkMatchMedium_1M(b *testing.B) { benchmark(b, medium, 1<<20) } +func BenchmarkMatchMedium_32M(b *testing.B) { benchmark(b, medium, 32<<20) } +func BenchmarkMatchHard_32(b *testing.B) { benchmark(b, hard, 32<<0) } +func BenchmarkMatchHard_1K(b *testing.B) { benchmark(b, hard, 1<<10) } +func BenchmarkMatchHard_32K(b *testing.B) { benchmark(b, hard, 32<<10) } +func BenchmarkMatchHard_1M(b *testing.B) { benchmark(b, hard, 1<<20) } +func BenchmarkMatchHard_32M(b *testing.B) { benchmark(b, hard, 32<<20) } diff --git a/libgo/go/regexp/regexp.go b/libgo/go/regexp/regexp.go index 59f3be3..b0c6a0b 100644 --- a/libgo/go/regexp/regexp.go +++ b/libgo/go/regexp/regexp.go @@ -240,10 +240,6 @@ type inputString struct { str string } -func newInputString(str string) *inputString { - return &inputString{str: str} -} - func (i *inputString) step(pos int) (rune, int) { if pos < len(i.str) { c := i.str[pos] @@ -283,10 +279,6 @@ type inputBytes struct { str []byte } -func newInputBytes(str []byte) *inputBytes { - return &inputBytes{str: str} -} - func (i *inputBytes) step(pos int) (rune, int) { if pos < len(i.str) { c := i.str[pos] @@ -328,10 +320,6 @@ type inputReader struct { pos int } -func newInputReader(r io.RuneReader) *inputReader { - return &inputReader{r: r} -} - func (i *inputReader) step(pos int) (rune, int) { if !i.atEOT && pos != i.pos { return endOfText, 0 @@ -373,19 +361,19 @@ func (re *Regexp) LiteralPrefix() (prefix string, complete bool) { // RuneReader. The return value is a boolean: true for match, false for no // match. func (re *Regexp) MatchReader(r io.RuneReader) bool { - return re.doExecute(newInputReader(r), 0, 0) != nil + return re.doExecute(r, nil, "", 0, 0) != nil } // MatchString returns whether the Regexp matches the string s. // The return value is a boolean: true for match, false for no match. func (re *Regexp) MatchString(s string) bool { - return re.doExecute(newInputString(s), 0, 0) != nil + return re.doExecute(nil, nil, s, 0, 0) != nil } // Match returns whether the Regexp matches the byte slice b. // The return value is a boolean: true for match, false for no match. func (re *Regexp) Match(b []byte) bool { - return re.doExecute(newInputBytes(b), 0, 0) != nil + return re.doExecute(nil, b, "", 0, 0) != nil } // MatchReader checks whether a textual regular expression matches the text @@ -437,7 +425,7 @@ func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) str searchPos := 0 // position where we next look for a match buf := new(bytes.Buffer) for searchPos <= len(src) { - a := re.doExecute(newInputString(src), searchPos, 2) + a := re.doExecute(nil, nil, src, searchPos, 2) if len(a) == 0 { break // no more matches } @@ -489,7 +477,7 @@ func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte { searchPos := 0 // position where we next look for a match buf := new(bytes.Buffer) for searchPos <= len(src) { - a := re.doExecute(newInputBytes(src), searchPos, 2) + a := re.doExecute(nil, src, "", searchPos, 2) if len(a) == 0 { break // no more matches } @@ -577,13 +565,7 @@ func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) { } for pos, i, prevMatchEnd := 0, 0, -1; i < n && pos <= end; { - var in input - if b == nil { - in = newInputString(s) - } else { - in = newInputBytes(b) - } - matches := re.doExecute(in, pos, re.prog.NumCap) + matches := re.doExecute(nil, b, s, pos, re.prog.NumCap) if len(matches) == 0 { break } @@ -623,7 +605,7 @@ func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) { // Find returns a slice holding the text of the leftmost match in b of the regular expression. // A return value of nil indicates no match. func (re *Regexp) Find(b []byte) []byte { - a := re.doExecute(newInputBytes(b), 0, 2) + a := re.doExecute(nil, b, "", 0, 2) if a == nil { return nil } @@ -635,7 +617,7 @@ func (re *Regexp) Find(b []byte) []byte { // b[loc[0]:loc[1]]. // A return value of nil indicates no match. func (re *Regexp) FindIndex(b []byte) (loc []int) { - a := re.doExecute(newInputBytes(b), 0, 2) + a := re.doExecute(nil, b, "", 0, 2) if a == nil { return nil } @@ -648,7 +630,7 @@ func (re *Regexp) FindIndex(b []byte) (loc []int) { // an empty string. Use FindStringIndex or FindStringSubmatch if it is // necessary to distinguish these cases. func (re *Regexp) FindString(s string) string { - a := re.doExecute(newInputString(s), 0, 2) + a := re.doExecute(nil, nil, s, 0, 2) if a == nil { return "" } @@ -660,7 +642,7 @@ func (re *Regexp) FindString(s string) string { // itself is at s[loc[0]:loc[1]]. // A return value of nil indicates no match. func (re *Regexp) FindStringIndex(s string) []int { - a := re.doExecute(newInputString(s), 0, 2) + a := re.doExecute(nil, nil, s, 0, 2) if a == nil { return nil } @@ -672,7 +654,7 @@ func (re *Regexp) FindStringIndex(s string) []int { // the RuneReader. The match itself is at s[loc[0]:loc[1]]. A return // value of nil indicates no match. func (re *Regexp) FindReaderIndex(r io.RuneReader) []int { - a := re.doExecute(newInputReader(r), 0, 2) + a := re.doExecute(r, nil, "", 0, 2) if a == nil { return nil } @@ -685,7 +667,7 @@ func (re *Regexp) FindReaderIndex(r io.RuneReader) []int { // comment. // A return value of nil indicates no match. func (re *Regexp) FindSubmatch(b []byte) [][]byte { - a := re.doExecute(newInputBytes(b), 0, re.prog.NumCap) + a := re.doExecute(nil, b, "", 0, re.prog.NumCap) if a == nil { return nil } @@ -704,7 +686,7 @@ func (re *Regexp) FindSubmatch(b []byte) [][]byte { // in the package comment. // A return value of nil indicates no match. func (re *Regexp) FindSubmatchIndex(b []byte) []int { - return re.pad(re.doExecute(newInputBytes(b), 0, re.prog.NumCap)) + return re.pad(re.doExecute(nil, b, "", 0, re.prog.NumCap)) } // FindStringSubmatch returns a slice of strings holding the text of the @@ -713,7 +695,7 @@ func (re *Regexp) FindSubmatchIndex(b []byte) []int { // package comment. // A return value of nil indicates no match. func (re *Regexp) FindStringSubmatch(s string) []string { - a := re.doExecute(newInputString(s), 0, re.prog.NumCap) + a := re.doExecute(nil, nil, s, 0, re.prog.NumCap) if a == nil { return nil } @@ -732,7 +714,7 @@ func (re *Regexp) FindStringSubmatch(s string) []string { // 'Index' descriptions in the package comment. // A return value of nil indicates no match. func (re *Regexp) FindStringSubmatchIndex(s string) []int { - return re.pad(re.doExecute(newInputString(s), 0, re.prog.NumCap)) + return re.pad(re.doExecute(nil, nil, s, 0, re.prog.NumCap)) } // FindReaderSubmatchIndex returns a slice holding the index pairs @@ -741,7 +723,7 @@ func (re *Regexp) FindStringSubmatchIndex(s string) []int { // by the 'Submatch' and 'Index' descriptions in the package comment. A // return value of nil indicates no match. func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int { - return re.pad(re.doExecute(newInputReader(r), 0, re.prog.NumCap)) + return re.pad(re.doExecute(r, nil, "", 0, re.prog.NumCap)) } const startSize = 10 // The size at which to start a slice in the 'All' routines. diff --git a/libgo/go/regexp/syntax/parse.go b/libgo/go/regexp/syntax/parse.go index 2ad682f..ba641c1 100644 --- a/libgo/go/regexp/syntax/parse.go +++ b/libgo/go/regexp/syntax/parse.go @@ -1694,7 +1694,7 @@ func appendFoldedClass(r []rune, x []rune) []rune { // appendNegatedClass returns the result of appending the negation of the class x to the class r. // It assumes x is clean. func appendNegatedClass(r []rune, x []rune) []rune { - nextLo := rune('\u0000') + nextLo := '\u0000' for i := 0; i < len(x); i += 2 { lo, hi := x[i], x[i+1] if nextLo <= lo-1 { @@ -1735,7 +1735,7 @@ func appendTable(r []rune, x *unicode.RangeTable) []rune { // appendNegatedTable returns the result of appending the negation of x to the class r. func appendNegatedTable(r []rune, x *unicode.RangeTable) []rune { - nextLo := rune('\u0000') // lo end of next class to add + nextLo := '\u0000' // lo end of next class to add for _, xr := range x.R16 { lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride) if stride == 1 { @@ -1777,8 +1777,8 @@ func appendNegatedTable(r []rune, x *unicode.RangeTable) []rune { // negateClass overwrites r and returns r's negation. // It assumes the class r is already clean. func negateClass(r []rune) []rune { - nextLo := rune('\u0000') // lo end of next class to add - w := 0 // write index + nextLo := '\u0000' // lo end of next class to add + w := 0 // write index for i := 0; i < len(r); i += 2 { lo, hi := r[i], r[i+1] if nextLo <= lo-1 { |