diff options
Diffstat (limited to 'libgo/go/regexp')
-rw-r--r-- | libgo/go/regexp/backtrack.go | 5 | ||||
-rw-r--r-- | libgo/go/regexp/exec.go | 14 | ||||
-rw-r--r-- | libgo/go/regexp/exec_test.go | 83 | ||||
-rw-r--r-- | libgo/go/regexp/onepass.go | 6 | ||||
-rw-r--r-- | libgo/go/regexp/onepass_test.go | 4 | ||||
-rw-r--r-- | libgo/go/regexp/regexp.go | 103 | ||||
-rw-r--r-- | libgo/go/regexp/syntax/compile.go | 4 | ||||
-rw-r--r-- | libgo/go/regexp/syntax/doc.go | 4 | ||||
-rw-r--r-- | libgo/go/regexp/syntax/parse.go | 34 | ||||
-rw-r--r-- | libgo/go/regexp/syntax/parse_test.go | 2 | ||||
-rw-r--r-- | libgo/go/regexp/syntax/prog.go | 4 | ||||
-rw-r--r-- | libgo/go/regexp/syntax/regexp.go | 6 | ||||
-rw-r--r-- | libgo/go/regexp/syntax/simplify.go | 8 | ||||
-rw-r--r-- | libgo/go/regexp/syntax/simplify_test.go | 6 |
14 files changed, 133 insertions, 150 deletions
diff --git a/libgo/go/regexp/backtrack.go b/libgo/go/regexp/backtrack.go index fd95604..29f624b 100644 --- a/libgo/go/regexp/backtrack.go +++ b/libgo/go/regexp/backtrack.go @@ -36,7 +36,6 @@ type bitState struct { end int cap []int - input input jobs []job visited []uint32 } @@ -146,7 +145,7 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool { // Optimization: rather than push and pop, // code that is going to Push and continue // the loop simply updates ip, p, and arg - // and jumps to CheckAndLoop. We have to + // and jumps to CheckAndLoop. We have to // do the ShouldVisit check that Push // would have, but we avoid the stack // manipulation. @@ -254,7 +253,6 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool { } panic("bad arg in InstCapture") - continue case syntax.InstEmptyWidth: if syntax.EmptyOp(inst.Arg)&^i.context(pos) != 0 { @@ -299,7 +297,6 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool { // Otherwise, continue on in hope of a longer match. continue } - panic("unreachable") } return m.matched diff --git a/libgo/go/regexp/exec.go b/libgo/go/regexp/exec.go index 5182720..4fd61b5 100644 --- a/libgo/go/regexp/exec.go +++ b/libgo/go/regexp/exec.go @@ -19,7 +19,7 @@ type queue struct { // A entry is an entry on a queue. // It holds both the instruction pc and the actual thread. // Some queue entries are just place holders so that the machine -// knows it has considered that pc. Such entries have t == nil. +// knows it has considered that pc. Such entries have t == nil. type entry struct { pc uint32 t *thread @@ -107,14 +107,6 @@ func (m *machine) alloc(i *syntax.Inst) *thread { return t } -// free returns t to the free pool. -func (m *machine) free(t *thread) { - m.inputBytes.str = nil - m.inputString.str = "" - m.inputReader.r = nil - m.pool = append(m.pool, t) -} - // match runs the machine over the input starting at pos. // It reports whether a match was found. // If so, m.matchcap holds the submatch information. @@ -192,7 +184,6 @@ func (m *machine) match(i input, pos int) bool { func (m *machine) clear(q *queue) { for _, d := range q.dense { if d.t != nil { - // m.free(d.t) m.pool = append(m.pool, d.t) } } @@ -213,7 +204,6 @@ func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond sy continue } if longest && m.matched && len(t.cap) > 0 && m.matchcap[0] < t.cap[0] { - // m.free(t) m.pool = append(m.pool, t) continue } @@ -232,7 +222,6 @@ func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond sy // First-match mode: cut off all lower-priority threads. for _, d := range runq.dense[j+1:] { if d.t != nil { - // m.free(d.t) m.pool = append(m.pool, d.t) } } @@ -253,7 +242,6 @@ func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond sy t = m.add(nextq, i.Out, nextPos, t.cap, nextCond, t) } if t != nil { - // m.free(t) m.pool = append(m.pool, t) } } diff --git a/libgo/go/regexp/exec_test.go b/libgo/go/regexp/exec_test.go index 4872cb3..69f187e 100644 --- a/libgo/go/regexp/exec_test.go +++ b/libgo/go/regexp/exec_test.go @@ -22,7 +22,7 @@ import ( // considered during RE2's exhaustive tests, which run all possible // regexps over a given set of atoms and operators, up to a given // complexity, over all possible strings over a given alphabet, -// up to a given size. Rather than try to link with RE2, we read a +// up to a given size. Rather than try to link with RE2, we read a // log file containing the test cases and the expected matches. // The log file, re2-exhaustive.txt, is generated by running 'make log' // in the open source RE2 distribution https://github.com/google/re2/. @@ -41,21 +41,21 @@ import ( // -;0-3 0-1 1-2 2-3 // // The stanza begins by defining a set of strings, quoted -// using Go double-quote syntax, one per line. Then the +// using Go double-quote syntax, one per line. Then the // regexps section gives a sequence of regexps to run on -// the strings. In the block that follows a regexp, each line +// the strings. In the block that follows a regexp, each line // gives the semicolon-separated match results of running // the regexp on the corresponding string. // Each match result is either a single -, meaning no match, or a // space-separated sequence of pairs giving the match and -// submatch indices. An unmatched subexpression formats +// submatch indices. An unmatched subexpression formats // its pair as a single - (not illustrated above). For now // each regexp run produces two match results, one for a // ``full match'' that restricts the regexp to matching the entire // string or nothing, and one for a ``partial match'' that gives // the leftmost first match found in the string. // -// Lines beginning with # are comments. Lines beginning with +// Lines beginning with # are comments. Lines beginning with // a capital letter are test names printed during RE2's test suite // and are echoed into t but otherwise ignored. // @@ -155,9 +155,9 @@ func testRE2(t *testing.T, file string) { if !isSingleBytes(text) && strings.Contains(re.String(), `\B`) { // RE2's \B considers every byte position, // so it sees 'not word boundary' in the - // middle of UTF-8 sequences. This package + // middle of UTF-8 sequences. This package // only considers the positions between runes, - // so it disagrees. Skip those cases. + // so it disagrees. Skip those cases. continue } res := strings.Split(line, ";") @@ -409,7 +409,7 @@ Reading: // h REG_MULTIREF multiple digit backref // i REG_ICASE ignore case // j REG_SPAN . matches \n - // k REG_ESCAPE \ to ecape [...] delimiter + // k REG_ESCAPE \ to escape [...] delimiter // l REG_LEFT implicit ^... // m REG_MINIMAL minimal match // n REG_NEWLINE explicit \n match @@ -658,47 +658,42 @@ func makeText(n int) []byte { return text } -func benchmark(b *testing.B, re string, n int) { - r := MustCompile(re) - t := makeText(n) - b.ResetTimer() - b.SetBytes(int64(n)) - for i := 0; i < b.N; i++ { - if r.Match(t) { - b.Fatal("match!") +func BenchmarkMatch(b *testing.B) { + for _, data := range benchData { + r := MustCompile(data.re) + for _, size := range benchSizes { + t := makeText(size.n) + b.Run(data.name+"/"+size.name, func(b *testing.B) { + b.SetBytes(int64(size.n)) + for i := 0; i < b.N; i++ { + if r.Match(t) { + b.Fatal("match!") + } + } + }) } } } -const ( - easy0 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ$" - easy1 = "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$" - medium = "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$" - hard = "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$" - parens = "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" + - "(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$" -) +var benchData = []struct{ name, re string }{ + {"Easy0", "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"}, + {"Easy0i", "(?i)ABCDEFGHIJklmnopqrstuvwxyz$"}, + {"Easy1", "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"}, + {"Medium", "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"}, + {"Hard", "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"}, + {"Hard1", "ABCD|CDEF|EFGH|GHIJ|IJKL|KLMN|MNOP|OPQR|QRST|STUV|UVWX|WXYZ"}, +} -func BenchmarkMatchEasy0_32(b *testing.B) { benchmark(b, easy0, 32<<0) } -func BenchmarkMatchEasy0_1K(b *testing.B) { benchmark(b, easy0, 1<<10) } -func BenchmarkMatchEasy0_32K(b *testing.B) { benchmark(b, easy0, 32<<10) } -func BenchmarkMatchEasy0_1M(b *testing.B) { benchmark(b, easy0, 1<<20) } -func BenchmarkMatchEasy0_32M(b *testing.B) { benchmark(b, easy0, 32<<20) } -func BenchmarkMatchEasy1_32(b *testing.B) { benchmark(b, easy1, 32<<0) } -func BenchmarkMatchEasy1_1K(b *testing.B) { benchmark(b, easy1, 1<<10) } -func BenchmarkMatchEasy1_32K(b *testing.B) { benchmark(b, easy1, 32<<10) } -func BenchmarkMatchEasy1_1M(b *testing.B) { benchmark(b, easy1, 1<<20) } -func BenchmarkMatchEasy1_32M(b *testing.B) { benchmark(b, easy1, 32<<20) } -func BenchmarkMatchMedium_32(b *testing.B) { benchmark(b, medium, 32<<0) } -func BenchmarkMatchMedium_1K(b *testing.B) { benchmark(b, medium, 1<<10) } -func BenchmarkMatchMedium_32K(b *testing.B) { benchmark(b, medium, 32<<10) } -func BenchmarkMatchMedium_1M(b *testing.B) { benchmark(b, medium, 1<<20) } -func BenchmarkMatchMedium_32M(b *testing.B) { benchmark(b, medium, 32<<20) } -func BenchmarkMatchHard_32(b *testing.B) { benchmark(b, hard, 32<<0) } -func BenchmarkMatchHard_1K(b *testing.B) { benchmark(b, hard, 1<<10) } -func BenchmarkMatchHard_32K(b *testing.B) { benchmark(b, hard, 32<<10) } -func BenchmarkMatchHard_1M(b *testing.B) { benchmark(b, hard, 1<<20) } -func BenchmarkMatchHard_32M(b *testing.B) { benchmark(b, hard, 32<<20) } +var benchSizes = []struct { + name string + n int +}{ + {"32", 32}, + {"1K", 1 << 10}, + {"32K", 32 << 10}, + {"1M", 1 << 20}, + {"32M", 32 << 20}, +} func TestLongest(t *testing.T) { re, err := Compile(`a(|b)`) diff --git a/libgo/go/regexp/onepass.go b/libgo/go/regexp/onepass.go index 2ce3902..4991954 100644 --- a/libgo/go/regexp/onepass.go +++ b/libgo/go/regexp/onepass.go @@ -1,4 +1,4 @@ -// Copyright 2014 The Go Authors. All rights reserved. +// Copyright 2014 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. @@ -33,7 +33,7 @@ type onePassInst struct { } // OnePassPrefix returns a literal string that all matches for the -// regexp must start with. Complete is true if the prefix +// regexp must start with. Complete is true if the prefix // is the entire match. Pc is the index of the last rune instruction // in the string. The OnePassPrefix skips over the mandatory // EmptyBeginText @@ -450,7 +450,7 @@ func makeOnePass(p *onePassProg) *onePassProg { for !instQueue.empty() { visitQueue.clear() pc := instQueue.next() - if !check(uint32(pc), m) { + if !check(pc, m) { p = notOnePass break } diff --git a/libgo/go/regexp/onepass_test.go b/libgo/go/regexp/onepass_test.go index 8202ebe..f4e336c 100644 --- a/libgo/go/regexp/onepass_test.go +++ b/libgo/go/regexp/onepass_test.go @@ -1,4 +1,4 @@ -// Copyright 2014 The Go Authors. All rights reserved. +// Copyright 2014 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. @@ -133,8 +133,6 @@ func TestMergeRuneSet(t *testing.T) { } } -const noStr = `!` - var onePass = &onePassProg{} var onePassTests = []struct { diff --git a/libgo/go/regexp/regexp.go b/libgo/go/regexp/regexp.go index d7d0edb..fe3db9f 100644 --- a/libgo/go/regexp/regexp.go +++ b/libgo/go/regexp/regexp.go @@ -22,14 +22,14 @@ // All characters are UTF-8-encoded code points. // // There are 16 methods of Regexp that match a regular expression and identify -// the matched text. Their names are matched by this regular expression: +// the matched text. Their names are matched by this regular expression: // // Find(All)?(String)?(Submatch)?(Index)? // // If 'All' is present, the routine matches successive non-overlapping -// matches of the entire expression. Empty matches abutting a preceding -// match are ignored. The return value is a slice containing the successive -// return values of the corresponding non-'All' routine. These routines take +// matches of the entire expression. Empty matches abutting a preceding +// match are ignored. The return value is a slice containing the successive +// return values of the corresponding non-'All' routine. These routines take // an extra integer argument, n; if n >= 0, the function returns at most n // matches/submatches. // @@ -45,9 +45,9 @@ // // If 'Index' is present, matches and submatches are identified by byte index // pairs within the input string: result[2*n:2*n+1] identifies the indexes of -// the nth submatch. The pair for n==0 identifies the match of the entire -// expression. If 'Index' is not present, the match is identified by the -// text of the match/submatch. If an index is negative, it means that +// the nth submatch. The pair for n==0 identifies the match of the entire +// expression. If 'Index' is not present, the match is identified by the +// text of the match/submatch. If an index is negative, it means that // subexpression did not match any string in the input. // // There is also a subset of the methods that can be applied to text read @@ -55,7 +55,7 @@ // // MatchReader, FindReaderIndex, FindReaderSubmatchIndex // -// This set may grow. Note that regular expression matches may need to +// This set may grow. Note that regular expression matches may need to // examine text beyond the text returned by a match, so the methods that // match text from a RuneReader may read arbitrarily far into the input // before returning. @@ -75,12 +75,18 @@ import ( "unicode/utf8" ) -var debug = false - // Regexp is the representation of a compiled regular expression. // A Regexp is safe for concurrent use by multiple goroutines. type Regexp struct { // read-only after Compile + regexpRO + + // cache of machines for running regexp + mu sync.Mutex + machine []*machine +} + +type regexpRO struct { expr string // as passed to Compile prog *syntax.Prog // compiled program onepass *onePassProg // onepass program or nil @@ -93,10 +99,6 @@ type Regexp struct { numSubexp int subexpNames []string longest bool - - // cache of machines for running regexp - mu sync.Mutex - machine []*machine } // String returns the source text used to compile the regular expression. @@ -109,10 +111,11 @@ func (re *Regexp) String() string { // When using a Regexp in multiple goroutines, giving each goroutine // its own copy helps to avoid lock contention. func (re *Regexp) Copy() *Regexp { - r := *re - r.mu = sync.Mutex{} - r.machine = nil - return &r + // It is not safe to copy Regexp by value + // since it contains a sync.Mutex. + return &Regexp{ + regexpRO: re.regexpRO, + } } // Compile parses a regular expression and returns, if successful, @@ -174,13 +177,15 @@ func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) { return nil, err } regexp := &Regexp{ - expr: expr, - prog: prog, - onepass: compileOnePass(prog), - numSubexp: maxCap, - subexpNames: capNames, - cond: prog.StartCond(), - longest: longest, + regexpRO: regexpRO{ + expr: expr, + prog: prog, + onepass: compileOnePass(prog), + numSubexp: maxCap, + subexpNames: capNames, + cond: prog.StartCond(), + longest: longest, + }, } if regexp.onepass == notOnePass { regexp.prefix, regexp.prefixComplete = prog.Prefix() @@ -258,10 +263,10 @@ func (re *Regexp) NumSubexp() int { } // SubexpNames returns the names of the parenthesized subexpressions -// in this Regexp. The name for the first sub-expression is names[1], +// in this Regexp. The name for the first sub-expression is names[1], // so that if m is a match slice, the name for m[i] is SubexpNames()[i]. // Since the Regexp as a whole cannot be named, names[0] is always -// the empty string. The slice should not be modified. +// the empty string. The slice should not be modified. func (re *Regexp) SubexpNames() []string { return re.subexpNames } @@ -394,7 +399,7 @@ func (i *inputReader) context(pos int) syntax.EmptyOp { } // LiteralPrefix returns a literal string that must begin any match -// of the regular expression re. It returns the boolean true if the +// of the regular expression re. It returns the boolean true if the // literal string comprises the entire regular expression. func (re *Regexp) LiteralPrefix() (prefix string, complete bool) { return re.prefix, re.prefixComplete @@ -417,7 +422,7 @@ func (re *Regexp) Match(b []byte) bool { } // MatchReader checks whether a textual regular expression matches the text -// read by the RuneReader. More complicated queries need to use Compile and +// read by the RuneReader. More complicated queries need to use Compile and // the full Regexp interface. func MatchReader(pattern string, r io.RuneReader) (matched bool, err error) { re, err := Compile(pattern) @@ -428,7 +433,7 @@ func MatchReader(pattern string, r io.RuneReader) (matched bool, err error) { } // MatchString checks whether a textual regular expression -// matches a string. More complicated queries need +// matches a string. More complicated queries need // to use Compile and the full Regexp interface. func MatchString(pattern string, s string) (matched bool, err error) { re, err := Compile(pattern) @@ -439,7 +444,7 @@ func MatchString(pattern string, s string) (matched bool, err error) { } // Match checks whether a textual regular expression -// matches a byte slice. More complicated queries need +// matches a byte slice. More complicated queries need // to use Compile and the full Regexp interface. func Match(pattern string, b []byte) (matched bool, err error) { re, err := Compile(pattern) @@ -450,11 +455,11 @@ func Match(pattern string, b []byte) (matched bool, err error) { } // ReplaceAllString returns a copy of src, replacing matches of the Regexp -// with the replacement string repl. Inside repl, $ signs are interpreted as +// with the replacement string repl. Inside repl, $ signs are interpreted as // in Expand, so for instance $1 represents the text of the first submatch. func (re *Regexp) ReplaceAllString(src, repl string) string { n := 2 - if strings.Index(repl, "$") >= 0 { + if strings.Contains(repl, "$") { n = 2 * (re.numSubexp + 1) } b := re.replaceAll(nil, src, n, func(dst []byte, match []int) []byte { @@ -464,7 +469,7 @@ func (re *Regexp) ReplaceAllString(src, repl string) string { } // ReplaceAllLiteralString returns a copy of src, replacing matches of the Regexp -// with the replacement string repl. The replacement repl is substituted directly, +// with the replacement string repl. The replacement repl is substituted directly, // without using Expand. func (re *Regexp) ReplaceAllLiteralString(src, repl string) string { return string(re.replaceAll(nil, src, 2, func(dst []byte, match []int) []byte { @@ -474,7 +479,7 @@ func (re *Regexp) ReplaceAllLiteralString(src, repl string) string { // ReplaceAllStringFunc returns a copy of src in which all matches of the // Regexp have been replaced by the return value of function repl applied -// to the matched substring. The replacement returned by repl is substituted +// to the matched substring. The replacement returned by repl is substituted // directly, without using Expand. func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string { b := re.replaceAll(nil, src, 2, func(dst []byte, match []int) []byte { @@ -530,7 +535,7 @@ func (re *Regexp) replaceAll(bsrc []byte, src string, nmatch int, repl func(dst searchPos += width } else if searchPos+1 > a[1] { // This clause is only needed at the end of the input - // string. In that case, DecodeRuneInString returns width=0. + // string. In that case, DecodeRuneInString returns width=0. searchPos++ } else { searchPos = a[1] @@ -548,7 +553,7 @@ func (re *Regexp) replaceAll(bsrc []byte, src string, nmatch int, repl func(dst } // ReplaceAll returns a copy of src, replacing matches of the Regexp -// with the replacement text repl. Inside repl, $ signs are interpreted as +// with the replacement text repl. Inside repl, $ signs are interpreted as // in Expand, so for instance $1 represents the text of the first submatch. func (re *Regexp) ReplaceAll(src, repl []byte) []byte { n := 2 @@ -566,7 +571,7 @@ func (re *Regexp) ReplaceAll(src, repl []byte) []byte { } // ReplaceAllLiteral returns a copy of src, replacing matches of the Regexp -// with the replacement bytes repl. The replacement repl is substituted directly, +// with the replacement bytes repl. The replacement repl is substituted directly, // without using Expand. func (re *Regexp) ReplaceAllLiteral(src, repl []byte) []byte { return re.replaceAll(src, "", 2, func(dst []byte, match []int) []byte { @@ -576,7 +581,7 @@ func (re *Regexp) ReplaceAllLiteral(src, repl []byte) []byte { // ReplaceAllFunc returns a copy of src in which all matches of the // Regexp have been replaced by the return value of function repl applied -// to the matched byte slice. The replacement returned by repl is substituted +// to the matched byte slice. The replacement returned by repl is substituted // directly, without using Expand. func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte { return re.replaceAll(src, "", 2, func(dst []byte, match []int) []byte { @@ -592,7 +597,7 @@ func special(b byte) bool { // QuoteMeta returns a string that quotes all regular expression metacharacters // inside the argument text; the returned string is a regular expression matching -// the literal text. For example, QuoteMeta(`[foo]`) returns `\[foo\]`. +// the literal text. For example, QuoteMeta(`[foo]`) returns `\[foo\]`. func QuoteMeta(s string) string { b := make([]byte, 2*len(s)) @@ -684,7 +689,7 @@ func (re *Regexp) Find(b []byte) []byte { } // FindIndex returns a two-element slice of integers defining the location of -// the leftmost match in b of the regular expression. The match itself is at +// the leftmost match in b of the regular expression. The match itself is at // b[loc[0]:loc[1]]. // A return value of nil indicates no match. func (re *Regexp) FindIndex(b []byte) (loc []int) { @@ -696,9 +701,9 @@ func (re *Regexp) FindIndex(b []byte) (loc []int) { } // FindString returns a string holding the text of the leftmost match in s of the regular -// expression. If there is no match, the return value is an empty string, +// expression. If there is no match, the return value is an empty string, // but it will also be empty if the regular expression successfully matches -// an empty string. Use FindStringIndex or FindStringSubmatch if it is +// an empty string. Use FindStringIndex or FindStringSubmatch if it is // necessary to distinguish these cases. func (re *Regexp) FindString(s string) string { a := re.doExecute(nil, nil, s, 0, 2) @@ -709,7 +714,7 @@ func (re *Regexp) FindString(s string) string { } // FindStringIndex returns a two-element slice of integers defining the -// location of the leftmost match in s of the regular expression. The match +// location of the leftmost match in s of the regular expression. The match // itself is at s[loc[0]:loc[1]]. // A return value of nil indicates no match. func (re *Regexp) FindStringIndex(s string) (loc []int) { @@ -722,7 +727,7 @@ func (re *Regexp) FindStringIndex(s string) (loc []int) { // FindReaderIndex returns a two-element slice of integers defining the // location of the leftmost match of the regular expression in text read from -// the RuneReader. The match text was found in the input stream at +// the RuneReader. The match text was found in the input stream at // byte offset loc[0] through loc[1]-1. // A return value of nil indicates no match. func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int) { @@ -754,14 +759,14 @@ func (re *Regexp) FindSubmatch(b []byte) [][]byte { // Expand appends template to dst and returns the result; during the // append, Expand replaces variables in the template with corresponding -// matches drawn from src. The match slice should have been returned by +// matches drawn from src. The match slice should have been returned by // FindSubmatchIndex. // // In the template, a variable is denoted by a substring of the form // $name or ${name}, where name is a non-empty sequence of letters, -// digits, and underscores. A purely numeric name like $1 refers to +// digits, and underscores. A purely numeric name like $1 refers to // the submatch with the corresponding index; other names refer to -// capturing parentheses named with the (?P<name>...) syntax. A +// capturing parentheses named with the (?P<name>...) syntax. A // reference to an out of range or unmatched index or a name that is not // present in the regular expression is replaced with an empty slice. // @@ -920,7 +925,7 @@ func (re *Regexp) FindStringSubmatchIndex(s string) []int { // FindReaderSubmatchIndex returns a slice holding the index pairs // identifying the leftmost match of the regular expression of text read by // the RuneReader, and the matches, if any, of its subexpressions, as defined -// by the 'Submatch' and 'Index' descriptions in the package comment. A +// by the 'Submatch' and 'Index' descriptions in the package comment. A // return value of nil indicates no match. func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int { return re.pad(re.doExecute(r, nil, "", 0, re.prog.NumCap)) diff --git a/libgo/go/regexp/syntax/compile.go b/libgo/go/regexp/syntax/compile.go index 95f6f15..83e53ba 100644 --- a/libgo/go/regexp/syntax/compile.go +++ b/libgo/go/regexp/syntax/compile.go @@ -8,11 +8,11 @@ import "unicode" // A patchList is a list of instruction pointers that need to be filled in (patched). // Because the pointers haven't been filled in yet, we can reuse their storage -// to hold the list. It's kind of sleazy, but works well in practice. +// to hold the list. It's kind of sleazy, but works well in practice. // See http://swtch.com/~rsc/regexp/regexp1.html for inspiration. // // These aren't really pointers: they're integers, so we can reinterpret them -// this way without using package unsafe. A value l denotes +// this way without using package unsafe. A value l denotes // p.inst[l>>1].Out (l&1==0) or .Arg (l&1==1). // l == 0 denotes the empty list, okay because we start every program // with a fail instruction, so we'll never want to point at its output link. diff --git a/libgo/go/regexp/syntax/doc.go b/libgo/go/regexp/syntax/doc.go index e5e71f1..efc0b43 100644 --- a/libgo/go/regexp/syntax/doc.go +++ b/libgo/go/regexp/syntax/doc.go @@ -1,4 +1,4 @@ -// Copyright 2012 The Go Authors. All rights reserved. +// Copyright 2012 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. @@ -66,7 +66,7 @@ Grouping: Empty strings: ^ at beginning of text or line (flag m=true) - $ at end of text (like \z not \Z) or line (flag m=true) + $ at end of text (like \z not Perl's \Z) or line (flag m=true) \A at beginning of text \b at ASCII word boundary (\w on one side and \W, \A, or \z on the other) \B not at ASCII word boundary diff --git a/libgo/go/regexp/syntax/parse.go b/libgo/go/regexp/syntax/parse.go index f38bbf6..7b8be55 100644 --- a/libgo/go/regexp/syntax/parse.go +++ b/libgo/go/regexp/syntax/parse.go @@ -1,4 +1,4 @@ -// Copyright 2011 The Go Authors. All rights reserved. +// Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. @@ -141,9 +141,9 @@ func (p *parser) push(re *Regexp) *Regexp { } // maybeConcat implements incremental concatenation -// of literal runes into string nodes. The parser calls this +// of literal runes into string nodes. The parser calls this // before each push, so only the top fragment of the stack -// might need processing. Since this is called before a push, +// might need processing. Since this is called before a push, // the topmost literal is no longer subject to operators like * // (Otherwise ab* would turn into (ab)*.) // If r >= 0 and there's a node left over, maybeConcat uses it @@ -600,7 +600,7 @@ func (p *parser) leadingString(re *Regexp) ([]rune, Flags) { } // removeLeadingString removes the first n leading runes -// from the beginning of re. It returns the replacement for re. +// from the beginning of re. It returns the replacement for re. func (p *parser) removeLeadingString(re *Regexp, n int) *Regexp { if re.Op == OpConcat && len(re.Sub) > 0 { // Removing a leading string in a concatenation @@ -957,11 +957,11 @@ func (p *parser) parsePerlFlags(s string) (rest string, err error) { // Perl 5.10 gave in and implemented the Python version too, // but they claim that the last two are the preferred forms. // PCRE and languages based on it (specifically, PHP and Ruby) - // support all three as well. EcmaScript 4 uses only the Python form. + // support all three as well. EcmaScript 4 uses only the Python form. // // In both the open source world (via Code Search) and the // Google source tree, (?P<expr>name) is the dominant form, - // so that's the one we implement. One is enough. + // so that's the one we implement. One is enough. if len(t) > 4 && t[2] == 'P' && t[3] == '<' { // Pull out name. end := strings.IndexRune(t, '>') @@ -989,7 +989,7 @@ func (p *parser) parsePerlFlags(s string) (rest string, err error) { return t[end+1:], nil } - // Non-capturing group. Might also twiddle Perl flags. + // Non-capturing group. Might also twiddle Perl flags. var c rune t = t[2:] // skip (? flags := p.flags @@ -1257,7 +1257,7 @@ Switch: if c < utf8.RuneSelf && !isalnum(c) { // Escaped non-word characters are always themselves. // PCRE is not quite so rigorous: it accepts things like - // \q, but we don't. We once rejected \_, but too many + // \q, but we don't. We once rejected \_, but too many // programs and people insist on using it, so allow \_. return c, t, nil } @@ -1292,7 +1292,7 @@ Switch: if c == '{' { // Any number of digits in braces. // Perl accepts any text at all; it ignores all text - // after the first non-hex digit. We require only hex digits, + // after the first non-hex digit. We require only hex digits, // and at least one. nhex := 0 r = 0 @@ -1333,10 +1333,10 @@ Switch: } return x*16 + y, t, nil - // C escapes. There is no case 'b', to avoid misparsing + // C escapes. There is no case 'b', to avoid misparsing // the Perl word-boundary \b as the C backspace \b - // when in POSIX mode. In Perl, /\b/ means word-boundary - // but /[\b]/ means backspace. We don't support that. + // when in POSIX mode. In Perl, /\b/ means word-boundary + // but /[\b]/ means backspace. We don't support that. // If you want a backspace, embed a literal backspace // character or use \x08. case 'a': @@ -1377,7 +1377,7 @@ type charGroup struct { } // parsePerlClassEscape parses a leading Perl character class escape like \d -// from the beginning of s. If one is present, it appends the characters to r +// from the beginning of s. If one is present, it appends the characters to r // and returns the new slice r and the remainder of the string. func (p *parser) parsePerlClassEscape(s string, r []rune) (out []rune, rest string) { if p.flags&PerlX == 0 || len(s) < 2 || s[0] != '\\' { @@ -1391,7 +1391,7 @@ func (p *parser) parsePerlClassEscape(s string, r []rune) (out []rune, rest stri } // parseNamedClass parses a leading POSIX named character class like [:alnum:] -// from the beginning of s. If one is present, it appends the characters to r +// from the beginning of s. If one is present, it appends the characters to r // and returns the new slice r and the remainder of the string. func (p *parser) parseNamedClass(s string, r []rune) (out []rune, rest string, err error) { if len(s) < 2 || s[0] != '[' || s[1] != ':' { @@ -1454,7 +1454,7 @@ func unicodeTable(name string) (*unicode.RangeTable, *unicode.RangeTable) { } // parseUnicodeClass parses a leading Unicode character class like \p{Han} -// from the beginning of s. If one is present, it appends the characters to r +// from the beginning of s. If one is present, it appends the characters to r // and returns the new slice r and the remainder of the string. func (p *parser) parseUnicodeClass(s string, r []rune) (out []rune, rest string, err error) { if p.flags&UnicodeGroups == 0 || len(s) < 2 || s[0] != '\\' || s[1] != 'p' && s[1] != 'P' { @@ -1692,7 +1692,7 @@ const ( // minimum and maximum runes involved in folding. // checked during test. minFold = 0x0041 - maxFold = 0x118df + maxFold = 0x1e943 ) // appendFoldedRange returns the result of appending the range lo-hi @@ -1718,7 +1718,7 @@ func appendFoldedRange(r []rune, lo, hi rune) []rune { hi = maxFold } - // Brute force. Depend on appendRange to coalesce ranges on the fly. + // Brute force. Depend on appendRange to coalesce ranges on the fly. for c := lo; c <= hi; c++ { r = appendRange(r, c, c) f := unicode.SimpleFold(c) diff --git a/libgo/go/regexp/syntax/parse_test.go b/libgo/go/regexp/syntax/parse_test.go index 5ca54bb..dd6529f 100644 --- a/libgo/go/regexp/syntax/parse_test.go +++ b/libgo/go/regexp/syntax/parse_test.go @@ -1,4 +1,4 @@ -// Copyright 2011 The Go Authors. All rights reserved. +// Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. diff --git a/libgo/go/regexp/syntax/prog.go b/libgo/go/regexp/syntax/prog.go index ae6db31..c32ae8d 100644 --- a/libgo/go/regexp/syntax/prog.go +++ b/libgo/go/regexp/syntax/prog.go @@ -144,7 +144,7 @@ func (i *Inst) op() InstOp { } // Prefix returns a literal string that all matches for the -// regexp must start with. Complete is true if the prefix +// regexp must start with. Complete is true if the prefix // is the entire match. func (p *Prog) Prefix() (prefix string, complete bool) { i, _ := p.skipNop(uint32(p.Start)) @@ -164,7 +164,7 @@ func (p *Prog) Prefix() (prefix string, complete bool) { } // StartCond returns the leading empty-width conditions that must -// be true in any match. It returns ^EmptyOp(0) if no matches are possible. +// be true in any match. It returns ^EmptyOp(0) if no matches are possible. func (p *Prog) StartCond() EmptyOp { var flag EmptyOp pc := uint32(p.Start) diff --git a/libgo/go/regexp/syntax/regexp.go b/libgo/go/regexp/syntax/regexp.go index 75822cf..0fe9269 100644 --- a/libgo/go/regexp/syntax/regexp.go +++ b/libgo/go/regexp/syntax/regexp.go @@ -1,4 +1,4 @@ -// Copyright 2011 The Go Authors. All rights reserved. +// Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. @@ -139,7 +139,7 @@ func writeRegexp(b *bytes.Buffer, re *Regexp) { if len(re.Rune) == 0 { b.WriteString(`^\x00-\x{10FFFF}`) } else if re.Rune[0] == 0 && re.Rune[len(re.Rune)-1] == unicode.MaxRune { - // Contains 0 and MaxRune. Probably a negated class. + // Contains 0 and MaxRune. Probably a negated class. // Print the gaps. b.WriteRune('^') for i := 1; i < len(re.Rune)-1; i += 2 { @@ -252,7 +252,7 @@ const meta = `\.+*?()|[]{}^$` func escape(b *bytes.Buffer, r rune, force bool) { if unicode.IsPrint(r) { - if strings.IndexRune(meta, r) >= 0 || force { + if strings.ContainsRune(meta, r) || force { b.WriteRune('\\') } b.WriteRune(r) diff --git a/libgo/go/regexp/syntax/simplify.go b/libgo/go/regexp/syntax/simplify.go index 7239041..e439325 100644 --- a/libgo/go/regexp/syntax/simplify.go +++ b/libgo/go/regexp/syntax/simplify.go @@ -1,4 +1,4 @@ -// Copyright 2011 The Go Authors. All rights reserved. +// Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. @@ -8,7 +8,7 @@ package syntax // and with various other simplifications, such as rewriting /(?:a+)+/ to /a+/. // The resulting regexp will execute correctly but its string representation // will not produce the same parse tree, because capturing parentheses -// may have been duplicated or removed. For example, the simplified form +// may have been duplicated or removed. For example, the simplified form // for /(x){1,2}/ is /(x)(x)?/ but both parentheses capture as $1. // The returned regexp may share structure with or be the original. func (re *Regexp) Simplify() *Regexp { @@ -117,13 +117,13 @@ func (re *Regexp) Simplify() *Regexp { } // simplify1 implements Simplify for the unary OpStar, -// OpPlus, and OpQuest operators. It returns the simple regexp +// OpPlus, and OpQuest operators. It returns the simple regexp // equivalent to // // Regexp{Op: op, Flags: flags, Sub: {sub}} // // under the assumption that sub is already simple, and -// without first allocating that structure. If the regexp +// without first allocating that structure. If the regexp // to be returned turns out to be equivalent to re, simplify1 // returns re instead. // diff --git a/libgo/go/regexp/syntax/simplify_test.go b/libgo/go/regexp/syntax/simplify_test.go index 5d0f1de..9877db3 100644 --- a/libgo/go/regexp/syntax/simplify_test.go +++ b/libgo/go/regexp/syntax/simplify_test.go @@ -1,4 +1,4 @@ -// Copyright 2011 The Go Authors. All rights reserved. +// Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. @@ -59,7 +59,7 @@ var simplifyTests = []struct { {`a{0,1}`, `a?`}, // The next three are illegible because Simplify inserts (?:) // parens instead of () parens to avoid creating extra - // captured subexpressions. The comments show a version with fewer parens. + // captured subexpressions. The comments show a version with fewer parens. {`(a){0,2}`, `(?:(a)(a)?)?`}, // (aa?)? {`(a){0,4}`, `(?:(a)(?:(a)(?:(a)(a)?)?)?)?`}, // (a(a(aa?)?)?)? {`(a){2,6}`, `(a)(a)(?:(a)(?:(a)(?:(a)(a)?)?)?)?`}, // aa(a(a(aa?)?)?)? @@ -117,7 +117,7 @@ var simplifyTests = []struct { // Empty string as a regular expression. // The empty string must be preserved inside parens in order // to make submatches work right, so these tests are less - // interesting than they might otherwise be. String inserts + // interesting than they might otherwise be. String inserts // explicit (?:) in place of non-parenthesized empty strings, // to make them easier to spot for other parsers. {`(a|b|)`, `([a-b]|(?:))`}, |