aboutsummaryrefslogtreecommitdiff
path: root/libgo/go/regexp
diff options
context:
space:
mode:
authorIan Lance Taylor <iant@golang.org>2019-01-18 19:04:36 +0000
committerIan Lance Taylor <ian@gcc.gnu.org>2019-01-18 19:04:36 +0000
commit4f4a855d82a889cebcfca150a7a43909bcb6a346 (patch)
treef12bae0781920fa34669fe30b6f4615a86d9fb80 /libgo/go/regexp
parent225220d668dafb8262db7012bced688acbe63b33 (diff)
downloadgcc-4f4a855d82a889cebcfca150a7a43909bcb6a346.zip
gcc-4f4a855d82a889cebcfca150a7a43909bcb6a346.tar.gz
gcc-4f4a855d82a889cebcfca150a7a43909bcb6a346.tar.bz2
libgo: update to Go1.12beta2
Reviewed-on: https://go-review.googlesource.com/c/158019 gotools/: * Makefile.am (go_cmd_vet_files): Update for Go1.12beta2 release. (GOTOOLS_TEST_TIMEOUT): Increase to 600. (check-runtime): Export LD_LIBRARY_PATH before computing GOARCH and GOOS. (check-vet): Copy golang.org/x/tools into check-vet-dir. * Makefile.in: Regenerate. gcc/testsuite/: * go.go-torture/execute/names-1.go: Stop using debug/xcoff, which is no longer externally visible. From-SVN: r268084
Diffstat (limited to 'libgo/go/regexp')
-rw-r--r--libgo/go/regexp/all_test.go27
-rw-r--r--libgo/go/regexp/backtrack.go179
-rw-r--r--libgo/go/regexp/exec.go312
-rw-r--r--libgo/go/regexp/exec_test.go12
-rw-r--r--libgo/go/regexp/onepass.go24
-rw-r--r--libgo/go/regexp/onepass_test.go106
-rw-r--r--libgo/go/regexp/regexp.go163
-rw-r--r--libgo/go/regexp/syntax/prog.go32
-rw-r--r--libgo/go/regexp/syntax/regexp.go2
9 files changed, 501 insertions, 356 deletions
diff --git a/libgo/go/regexp/all_test.go b/libgo/go/regexp/all_test.go
index 0fabeae..623f82d 100644
--- a/libgo/go/regexp/all_test.go
+++ b/libgo/go/regexp/all_test.go
@@ -550,8 +550,8 @@ func TestOnePassCutoff(t *testing.T) {
if err != nil {
t.Fatalf("compile: %v", err)
}
- if compileOnePass(p) != notOnePass {
- t.Fatalf("makeOnePass succeeded; wanted notOnePass")
+ if compileOnePass(p) != nil {
+ t.Fatalf("makeOnePass succeeded; wanted nil")
}
}
@@ -859,3 +859,26 @@ func BenchmarkQuoteMetaNone(b *testing.B) {
sink = QuoteMeta(s)
}
}
+
+func TestDeepEqual(t *testing.T) {
+ re1 := MustCompile("a.*b.*c.*d")
+ re2 := MustCompile("a.*b.*c.*d")
+ if !reflect.DeepEqual(re1, re2) { // has always been true, since Go 1.
+ t.Errorf("DeepEqual(re1, re2) = false, want true")
+ }
+
+ re1.MatchString("abcdefghijklmn")
+ if !reflect.DeepEqual(re1, re2) {
+ t.Errorf("DeepEqual(re1, re2) = false, want true")
+ }
+
+ re2.MatchString("abcdefghijklmn")
+ if !reflect.DeepEqual(re1, re2) {
+ t.Errorf("DeepEqual(re1, re2) = false, want true")
+ }
+
+ re2.MatchString(strings.Repeat("abcdefghijklmn", 100))
+ if !reflect.DeepEqual(re1, re2) {
+ t.Errorf("DeepEqual(re1, re2) = false, want true")
+ }
+}
diff --git a/libgo/go/regexp/backtrack.go b/libgo/go/regexp/backtrack.go
index 440bf7f..9fb7d1e 100644
--- a/libgo/go/regexp/backtrack.go
+++ b/libgo/go/regexp/backtrack.go
@@ -14,7 +14,10 @@
package regexp
-import "regexp/syntax"
+import (
+ "regexp/syntax"
+ "sync"
+)
// A job is an entry on the backtracker's job stack. It holds
// the instruction pc and the position in the input.
@@ -32,15 +35,29 @@ const (
// bitState holds state for the backtracker.
type bitState struct {
- prog *syntax.Prog
+ end int
+ cap []int
+ matchcap []int
+ jobs []job
+ visited []uint32
+
+ inputs inputs
+}
+
+var bitStatePool sync.Pool
- end int
- cap []int
- jobs []job
- visited []uint32
+func newBitState() *bitState {
+ b, ok := bitStatePool.Get().(*bitState)
+ if !ok {
+ b = new(bitState)
+ }
+ return b
}
-var notBacktrack *bitState = nil
+func freeBitState(b *bitState) {
+ b.inputs.clear()
+ bitStatePool.Put(b)
+}
// maxBitStateLen returns the maximum length of a string to search with
// the backtracker using prog.
@@ -51,18 +68,6 @@ func maxBitStateLen(prog *syntax.Prog) int {
return maxBacktrackVector / len(prog.Inst)
}
-// newBitState returns a new bitState for the given prog,
-// or notBacktrack if the size of the prog exceeds the maximum size that
-// the backtracker will be run for.
-func newBitState(prog *syntax.Prog) *bitState {
- if !shouldBacktrack(prog) {
- return notBacktrack
- }
- return &bitState{
- prog: prog,
- }
-}
-
// shouldBacktrack reports whether the program is too
// long for the backtracker to run.
func shouldBacktrack(prog *syntax.Prog) bool {
@@ -72,7 +77,7 @@ func shouldBacktrack(prog *syntax.Prog) bool {
// reset resets the state of the backtracker.
// end is the end position in the input.
// ncap is the number of captures.
-func (b *bitState) reset(end int, ncap int) {
+func (b *bitState) reset(prog *syntax.Prog, end int, ncap int) {
b.end = end
if cap(b.jobs) == 0 {
@@ -81,7 +86,7 @@ func (b *bitState) reset(end int, ncap int) {
b.jobs = b.jobs[:0]
}
- visitedSize := (len(b.prog.Inst)*(end+1) + visitedBits - 1) / visitedBits
+ visitedSize := (len(prog.Inst)*(end+1) + visitedBits - 1) / visitedBits
if cap(b.visited) < visitedSize {
b.visited = make([]uint32, visitedSize, maxBacktrackVector/visitedBits)
} else {
@@ -99,6 +104,15 @@ func (b *bitState) reset(end int, ncap int) {
for i := range b.cap {
b.cap[i] = -1
}
+
+ if cap(b.matchcap) < ncap {
+ b.matchcap = make([]int, ncap)
+ } else {
+ b.matchcap = b.matchcap[:ncap]
+ }
+ for i := range b.matchcap {
+ b.matchcap[i] = -1
+ }
}
// shouldVisit reports whether the combination of (pc, pos) has not
@@ -114,20 +128,19 @@ func (b *bitState) shouldVisit(pc uint32, pos int) bool {
// push pushes (pc, pos, arg) onto the job stack if it should be
// visited.
-func (b *bitState) push(pc uint32, pos int, arg bool) {
+func (b *bitState) push(re *Regexp, pc uint32, pos int, arg bool) {
// Only check shouldVisit when arg is false.
// When arg is true, we are continuing a previous visit.
- if b.prog.Inst[pc].Op != syntax.InstFail && (arg || b.shouldVisit(pc, pos)) {
+ if re.prog.Inst[pc].Op != syntax.InstFail && (arg || b.shouldVisit(pc, pos)) {
b.jobs = append(b.jobs, job{pc: pc, arg: arg, pos: pos})
}
}
// tryBacktrack runs a backtracking search starting at pos.
-func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool {
- longest := m.re.longest
- m.matched = false
+func (re *Regexp) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool {
+ longest := re.longest
- b.push(pc, pos, false)
+ b.push(re, pc, pos, false)
for len(b.jobs) > 0 {
l := len(b.jobs) - 1
// Pop job off the stack.
@@ -150,7 +163,7 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool {
}
Skip:
- inst := b.prog.Inst[pc]
+ inst := re.prog.Inst[pc]
switch inst.Op {
default:
@@ -172,23 +185,23 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool {
pc = inst.Arg
goto CheckAndLoop
} else {
- b.push(pc, pos, true)
+ b.push(re, pc, pos, true)
pc = inst.Out
goto CheckAndLoop
}
case syntax.InstAltMatch:
// One opcode consumes runes; the other leads to match.
- switch b.prog.Inst[inst.Out].Op {
+ switch re.prog.Inst[inst.Out].Op {
case syntax.InstRune, syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
// inst.Arg is the match.
- b.push(inst.Arg, pos, false)
+ b.push(re, inst.Arg, pos, false)
pc = inst.Arg
pos = b.end
goto CheckAndLoop
}
// inst.Out is the match - non-greedy
- b.push(inst.Out, b.end, false)
+ b.push(re, inst.Out, b.end, false)
pc = inst.Out
goto CheckAndLoop
@@ -236,7 +249,7 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool {
} else {
if 0 <= inst.Arg && inst.Arg < uint32(len(b.cap)) {
// Capture pos to register, but save old value.
- b.push(pc, b.cap[inst.Arg], true) // come back when we're done.
+ b.push(re, pc, b.cap[inst.Arg], true) // come back when we're done.
b.cap[inst.Arg] = pos
}
pc = inst.Out
@@ -244,7 +257,8 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool {
}
case syntax.InstEmptyWidth:
- if syntax.EmptyOp(inst.Arg)&^i.context(pos) != 0 {
+ flag := i.context(pos)
+ if !flag.match(syntax.EmptyOp(inst.Arg)) {
continue
}
pc = inst.Out
@@ -258,8 +272,7 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool {
// We found a match. If the caller doesn't care
// where the match is, no point going further.
if len(b.cap) == 0 {
- m.matched = true
- return m.matched
+ return true
}
// Record best match so far.
@@ -268,19 +281,18 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool {
if len(b.cap) > 1 {
b.cap[1] = pos
}
- if !m.matched || (longest && pos > 0 && pos > m.matchcap[1]) {
- copy(m.matchcap, b.cap)
+ if old := b.matchcap[1]; old == -1 || (longest && pos > 0 && pos > old) {
+ copy(b.matchcap, b.cap)
}
- m.matched = true
// If going for first match, we're done.
if !longest {
- return m.matched
+ return true
}
// If we used the entire text, no longer match is possible.
if pos == b.end {
- return m.matched
+ return true
}
// Otherwise, continue on in hope of a longer match.
@@ -288,65 +300,68 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool {
}
}
- return m.matched
+ return longest && len(b.matchcap) > 1 && b.matchcap[1] >= 0
}
// backtrack runs a backtracking search of prog on the input starting at pos.
-func (m *machine) backtrack(i input, pos int, end int, ncap int) bool {
- if !i.canCheckPrefix() {
- panic("backtrack called for a RuneReader")
- }
-
- startCond := m.re.cond
+func (re *Regexp) backtrack(ib []byte, is string, pos int, ncap int, dstCap []int) []int {
+ startCond := re.cond
if startCond == ^syntax.EmptyOp(0) { // impossible
- return false
+ return nil
}
if startCond&syntax.EmptyBeginText != 0 && pos != 0 {
// Anchored match, past beginning of text.
- return false
+ return nil
}
- b := m.b
- b.reset(end, ncap)
-
- m.matchcap = m.matchcap[:ncap]
- for i := range m.matchcap {
- m.matchcap[i] = -1
- }
+ b := newBitState()
+ i, end := b.inputs.init(nil, ib, is)
+ b.reset(re.prog, end, ncap)
// Anchored search must start at the beginning of the input
if startCond&syntax.EmptyBeginText != 0 {
if len(b.cap) > 0 {
b.cap[0] = pos
}
- return m.tryBacktrack(b, i, uint32(m.p.Start), pos)
- }
+ if !re.tryBacktrack(b, i, uint32(re.prog.Start), pos) {
+ freeBitState(b)
+ return nil
+ }
+ } else {
- // Unanchored search, starting from each possible text position.
- // Notice that we have to try the empty string at the end of
- // the text, so the loop condition is pos <= end, not pos < end.
- // This looks like it's quadratic in the size of the text,
- // but we are not clearing visited between calls to TrySearch,
- // so no work is duplicated and it ends up still being linear.
- width := -1
- for ; pos <= end && width != 0; pos += width {
- if len(m.re.prefix) > 0 {
- // Match requires literal prefix; fast search for it.
- advance := i.index(m.re, pos)
- if advance < 0 {
- return false
+ // Unanchored search, starting from each possible text position.
+ // Notice that we have to try the empty string at the end of
+ // the text, so the loop condition is pos <= end, not pos < end.
+ // This looks like it's quadratic in the size of the text,
+ // but we are not clearing visited between calls to TrySearch,
+ // so no work is duplicated and it ends up still being linear.
+ width := -1
+ for ; pos <= end && width != 0; pos += width {
+ if len(re.prefix) > 0 {
+ // Match requires literal prefix; fast search for it.
+ advance := i.index(re, pos)
+ if advance < 0 {
+ freeBitState(b)
+ return nil
+ }
+ pos += advance
}
- pos += advance
- }
- if len(b.cap) > 0 {
- b.cap[0] = pos
- }
- if m.tryBacktrack(b, i, uint32(m.p.Start), pos) {
- // Match must be leftmost; done.
- return true
+ if len(b.cap) > 0 {
+ b.cap[0] = pos
+ }
+ if re.tryBacktrack(b, i, uint32(re.prog.Start), pos) {
+ // Match must be leftmost; done.
+ goto Match
+ }
+ _, width = i.step(pos)
}
- _, width = i.step(pos)
+ freeBitState(b)
+ return nil
}
- return false
+
+Match:
+ dstCap = append(dstCap, b.matchcap...)
+ freeBitState(b)
+ return dstCap
}
diff --git a/libgo/go/regexp/exec.go b/libgo/go/regexp/exec.go
index 1c7b02d..efe764e 100644
--- a/libgo/go/regexp/exec.go
+++ b/libgo/go/regexp/exec.go
@@ -7,6 +7,7 @@ package regexp
import (
"io"
"regexp/syntax"
+ "sync"
)
// A queue is a 'sparse array' holding pending threads of execution.
@@ -35,54 +36,60 @@ type thread struct {
// A machine holds all the state during an NFA simulation for p.
type machine struct {
- re *Regexp // corresponding Regexp
- p *syntax.Prog // compiled program
- op *onePassProg // compiled onepass program, or notOnePass
- maxBitStateLen int // max length of string to search with bitstate
- b *bitState // state for backtracker, allocated lazily
- q0, q1 queue // two queues for runq, nextq
- pool []*thread // pool of available threads
- matched bool // whether a match was found
- matchcap []int // capture information for the match
+ re *Regexp // corresponding Regexp
+ p *syntax.Prog // compiled program
+ q0, q1 queue // two queues for runq, nextq
+ pool []*thread // pool of available threads
+ matched bool // whether a match was found
+ matchcap []int // capture information for the match
+ inputs inputs
+}
+
+type inputs struct {
// cached inputs, to avoid allocation
- inputBytes inputBytes
- inputString inputString
- inputReader inputReader
+ bytes inputBytes
+ string inputString
+ reader inputReader
}
-func (m *machine) newInputBytes(b []byte) input {
- m.inputBytes.str = b
- return &m.inputBytes
+func (i *inputs) newBytes(b []byte) input {
+ i.bytes.str = b
+ return &i.bytes
}
-func (m *machine) newInputString(s string) input {
- m.inputString.str = s
- return &m.inputString
+func (i *inputs) newString(s string) input {
+ i.string.str = s
+ return &i.string
}
-func (m *machine) newInputReader(r io.RuneReader) input {
- m.inputReader.r = r
- m.inputReader.atEOT = false
- m.inputReader.pos = 0
- return &m.inputReader
+func (i *inputs) newReader(r io.RuneReader) input {
+ i.reader.r = r
+ i.reader.atEOT = false
+ i.reader.pos = 0
+ return &i.reader
+}
+
+func (i *inputs) clear() {
+ // We need to clear 1 of these.
+ // Avoid the expense of clearing the others (pointer write barrier).
+ if i.bytes.str != nil {
+ i.bytes.str = nil
+ } else if i.reader.r != nil {
+ i.reader.r = nil
+ } else {
+ i.string.str = ""
+ }
}
-// progMachine returns a new machine running the prog p.
-func progMachine(p *syntax.Prog, op *onePassProg) *machine {
- m := &machine{p: p, op: op}
- n := len(m.p.Inst)
- m.q0 = queue{make([]uint32, n), make([]entry, 0, n)}
- m.q1 = queue{make([]uint32, n), make([]entry, 0, n)}
- ncap := p.NumCap
- if ncap < 2 {
- ncap = 2
+func (i *inputs) init(r io.RuneReader, b []byte, s string) (input, int) {
+ if r != nil {
+ return i.newReader(r), 0
}
- if op == notOnePass {
- m.maxBitStateLen = maxBitStateLen(p)
+ if b != nil {
+ return i.newBytes(b), len(b)
}
- m.matchcap = make([]int, ncap)
- return m
+ return i.newString(s), len(s)
}
func (m *machine) init(ncap int) {
@@ -107,6 +114,61 @@ func (m *machine) alloc(i *syntax.Inst) *thread {
return t
}
+// A lazyFlag is a lazily-evaluated syntax.EmptyOp,
+// for checking zero-width flags like ^ $ \A \z \B \b.
+// It records the pair of relevant runes and does not
+// determine the implied flags until absolutely necessary
+// (most of the time, that means never).
+type lazyFlag uint64
+
+func newLazyFlag(r1, r2 rune) lazyFlag {
+ return lazyFlag(uint64(r1)<<32 | uint64(uint32(r2)))
+}
+
+func (f lazyFlag) match(op syntax.EmptyOp) bool {
+ if op == 0 {
+ return true
+ }
+ r1 := rune(f >> 32)
+ if op&syntax.EmptyBeginLine != 0 {
+ if r1 != '\n' && r1 >= 0 {
+ return false
+ }
+ op &^= syntax.EmptyBeginLine
+ }
+ if op&syntax.EmptyBeginText != 0 {
+ if r1 >= 0 {
+ return false
+ }
+ op &^= syntax.EmptyBeginText
+ }
+ if op == 0 {
+ return true
+ }
+ r2 := rune(f)
+ if op&syntax.EmptyEndLine != 0 {
+ if r2 != '\n' && r2 >= 0 {
+ return false
+ }
+ op &^= syntax.EmptyEndLine
+ }
+ if op&syntax.EmptyEndText != 0 {
+ if r2 >= 0 {
+ return false
+ }
+ op &^= syntax.EmptyEndText
+ }
+ if op == 0 {
+ return true
+ }
+ if syntax.IsWordChar(r1) != syntax.IsWordChar(r2) {
+ op &^= syntax.EmptyWordBoundary
+ } else {
+ op &^= syntax.EmptyNoWordBoundary
+ }
+ return op == 0
+}
+
// match runs the machine over the input starting at pos.
// It reports whether a match was found.
// If so, m.matchcap holds the submatch information.
@@ -126,9 +188,9 @@ func (m *machine) match(i input, pos int) bool {
if r != endOfText {
r1, width1 = i.step(pos + width)
}
- var flag syntax.EmptyOp
+ var flag lazyFlag
if pos == 0 {
- flag = syntax.EmptyOpContext(-1, r)
+ flag = newLazyFlag(-1, r)
} else {
flag = i.context(pos)
}
@@ -157,10 +219,10 @@ func (m *machine) match(i input, pos int) bool {
if len(m.matchcap) > 0 {
m.matchcap[0] = pos
}
- m.add(runq, uint32(m.p.Start), pos, m.matchcap, flag, nil)
+ m.add(runq, uint32(m.p.Start), pos, m.matchcap, &flag, nil)
}
- flag = syntax.EmptyOpContext(r, r1)
- m.step(runq, nextq, pos, pos+width, r, flag)
+ flag = newLazyFlag(r, r1)
+ m.step(runq, nextq, pos, pos+width, r, &flag)
if width == 0 {
break
}
@@ -195,7 +257,7 @@ func (m *machine) clear(q *queue) {
// The step processes the rune c (which may be endOfText),
// which starts at position pos and ends at nextPos.
// nextCond gives the setting for the empty-width flags after c.
-func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond syntax.EmptyOp) {
+func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond *lazyFlag) {
longest := m.re.longest
for j := 0; j < len(runq.dense); j++ {
d := &runq.dense[j]
@@ -252,7 +314,8 @@ func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond sy
// It also recursively adds an entry for all instructions reachable from pc by following
// empty-width conditions satisfied by cond. pos gives the current position
// in the input.
-func (m *machine) add(q *queue, pc uint32, pos int, cap []int, cond syntax.EmptyOp, t *thread) *thread {
+func (m *machine) add(q *queue, pc uint32, pos int, cap []int, cond *lazyFlag, t *thread) *thread {
+Again:
if pc == 0 {
return t
}
@@ -275,13 +338,16 @@ func (m *machine) add(q *queue, pc uint32, pos int, cap []int, cond syntax.Empty
// nothing
case syntax.InstAlt, syntax.InstAltMatch:
t = m.add(q, i.Out, pos, cap, cond, t)
- t = m.add(q, i.Arg, pos, cap, cond, t)
+ pc = i.Arg
+ goto Again
case syntax.InstEmptyWidth:
- if syntax.EmptyOp(i.Arg)&^cond == 0 {
- t = m.add(q, i.Out, pos, cap, cond, t)
+ if cond.match(syntax.EmptyOp(i.Arg)) {
+ pc = i.Out
+ goto Again
}
case syntax.InstNop:
- t = m.add(q, i.Out, pos, cap, cond, t)
+ pc = i.Out
+ goto Again
case syntax.InstCapture:
if int(i.Arg) < len(cap) {
opos := cap[i.Arg]
@@ -289,7 +355,8 @@ func (m *machine) add(q *queue, pc uint32, pos int, cap []int, cond syntax.Empty
m.add(q, i.Out, pos, cap, cond, nil)
cap[i.Arg] = opos
} else {
- t = m.add(q, i.Out, pos, cap, cond, t)
+ pc = i.Out
+ goto Again
}
case syntax.InstMatch, syntax.InstRune, syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
if t == nil {
@@ -306,85 +373,112 @@ func (m *machine) add(q *queue, pc uint32, pos int, cap []int, cond syntax.Empty
return t
}
-// onepass runs the machine over the input starting at pos.
-// It reports whether a match was found.
-// If so, m.matchcap holds the submatch information.
-// ncap is the number of captures.
-func (m *machine) onepass(i input, pos, ncap int) bool {
- startCond := m.re.cond
+type onePassMachine struct {
+ inputs inputs
+ matchcap []int
+}
+
+var onePassPool sync.Pool
+
+func newOnePassMachine() *onePassMachine {
+ m, ok := onePassPool.Get().(*onePassMachine)
+ if !ok {
+ m = new(onePassMachine)
+ }
+ return m
+}
+
+func freeOnePassMachine(m *onePassMachine) {
+ m.inputs.clear()
+ onePassPool.Put(m)
+}
+
+// doOnePass implements r.doExecute using the one-pass execution engine.
+func (re *Regexp) doOnePass(ir io.RuneReader, ib []byte, is string, pos, ncap int, dstCap []int) []int {
+ startCond := re.cond
if startCond == ^syntax.EmptyOp(0) { // impossible
- return false
+ return nil
}
- m.matched = false
- m.matchcap = m.matchcap[:ncap]
+
+ m := newOnePassMachine()
+ if cap(m.matchcap) < ncap {
+ m.matchcap = make([]int, ncap)
+ } else {
+ m.matchcap = m.matchcap[:ncap]
+ }
+
+ matched := false
for i := range m.matchcap {
m.matchcap[i] = -1
}
+
+ i, _ := m.inputs.init(ir, ib, is)
+
r, r1 := endOfText, endOfText
width, width1 := 0, 0
r, width = i.step(pos)
if r != endOfText {
r1, width1 = i.step(pos + width)
}
- var flag syntax.EmptyOp
+ var flag lazyFlag
if pos == 0 {
- flag = syntax.EmptyOpContext(-1, r)
+ flag = newLazyFlag(-1, r)
} else {
flag = i.context(pos)
}
- pc := m.op.Start
- inst := m.op.Inst[pc]
+ pc := re.onepass.Start
+ inst := re.onepass.Inst[pc]
// If there is a simple literal prefix, skip over it.
- if pos == 0 && syntax.EmptyOp(inst.Arg)&^flag == 0 &&
- len(m.re.prefix) > 0 && i.canCheckPrefix() {
+ if pos == 0 && flag.match(syntax.EmptyOp(inst.Arg)) &&
+ len(re.prefix) > 0 && i.canCheckPrefix() {
// Match requires literal prefix; fast search for it.
- if !i.hasPrefix(m.re) {
- return m.matched
+ if !i.hasPrefix(re) {
+ goto Return
}
- pos += len(m.re.prefix)
+ pos += len(re.prefix)
r, width = i.step(pos)
r1, width1 = i.step(pos + width)
flag = i.context(pos)
- pc = int(m.re.prefixEnd)
+ pc = int(re.prefixEnd)
}
for {
- inst = m.op.Inst[pc]
+ inst = re.onepass.Inst[pc]
pc = int(inst.Out)
switch inst.Op {
default:
panic("bad inst")
case syntax.InstMatch:
- m.matched = true
+ matched = true
if len(m.matchcap) > 0 {
m.matchcap[0] = 0
m.matchcap[1] = pos
}
- return m.matched
+ goto Return
case syntax.InstRune:
if !inst.MatchRune(r) {
- return m.matched
+ goto Return
}
case syntax.InstRune1:
if r != inst.Rune[0] {
- return m.matched
+ goto Return
}
case syntax.InstRuneAny:
// Nothing
case syntax.InstRuneAnyNotNL:
if r == '\n' {
- return m.matched
+ goto Return
}
// peek at the input rune to see which branch of the Alt to take
case syntax.InstAlt, syntax.InstAltMatch:
pc = int(onePassNext(&inst, r))
continue
case syntax.InstFail:
- return m.matched
+ goto Return
case syntax.InstNop:
continue
case syntax.InstEmptyWidth:
- if syntax.EmptyOp(inst.Arg)&^flag != 0 {
- return m.matched
+ if !flag.match(syntax.EmptyOp(inst.Arg)) {
+ goto Return
}
continue
case syntax.InstCapture:
@@ -396,14 +490,23 @@ func (m *machine) onepass(i input, pos, ncap int) bool {
if width == 0 {
break
}
- flag = syntax.EmptyOpContext(r, r1)
+ flag = newLazyFlag(r, r1)
pos += width
r, width = r1, width1
if r != endOfText {
r1, width1 = i.step(pos + width)
}
}
- return m.matched
+
+Return:
+ if !matched {
+ freeOnePassMachine(m)
+ return nil
+ }
+
+ dstCap = append(dstCap, m.matchcap...)
+ freeOnePassMachine(m)
+ return dstCap
}
// doMatch reports whether either r, b or s match the regexp.
@@ -416,43 +519,28 @@ func (re *Regexp) doMatch(r io.RuneReader, b []byte, s string) bool {
//
// nil is returned if no matches are found and non-nil if matches are found.
func (re *Regexp) doExecute(r io.RuneReader, b []byte, s string, pos int, ncap int, dstCap []int) []int {
- m := re.get()
- var i input
- var size int
- if r != nil {
- i = m.newInputReader(r)
- } else if b != nil {
- i = m.newInputBytes(b)
- size = len(b)
- } else {
- i = m.newInputString(s)
- size = len(s)
+ if dstCap == nil {
+ // Make sure 'return dstCap' is non-nil.
+ dstCap = arrayNoInts[:0:0]
}
- if m.op != notOnePass {
- if !m.onepass(i, pos, ncap) {
- re.put(m)
- return nil
- }
- } else if size < m.maxBitStateLen && r == nil {
- if m.b == nil {
- m.b = newBitState(m.p)
- }
- if !m.backtrack(i, pos, size, ncap) {
- re.put(m)
- return nil
- }
- } else {
- m.init(ncap)
- if !m.match(i, pos) {
- re.put(m)
- return nil
- }
+
+ if re.onepass != nil {
+ return re.doOnePass(r, b, s, pos, ncap, dstCap)
}
- dstCap = append(dstCap, m.matchcap...)
- if dstCap == nil {
- // Keep the promise of returning non-nil value on match.
- dstCap = arrayNoInts[:0]
+ if r == nil && len(b)+len(s) < re.maxBitStateLen {
+ return re.backtrack(b, s, pos, ncap, dstCap)
}
+
+ m := re.get()
+ i, _ := m.inputs.init(r, b, s)
+
+ m.init(ncap)
+ if !m.match(i, pos) {
+ re.put(m)
+ return nil
+ }
+
+ dstCap = append(dstCap, m.matchcap...)
re.put(m)
return dstCap
}
diff --git a/libgo/go/regexp/exec_test.go b/libgo/go/regexp/exec_test.go
index 5f8e747..1489219 100644
--- a/libgo/go/regexp/exec_test.go
+++ b/libgo/go/regexp/exec_test.go
@@ -684,7 +684,7 @@ func BenchmarkMatch(b *testing.B) {
func BenchmarkMatch_onepass_regex(b *testing.B) {
isRaceBuilder := strings.HasSuffix(testenv.Builder(), "-race")
r := MustCompile(`(?s)\A.*\z`)
- if r.get().op == notOnePass {
+ if r.onepass == nil {
b.Fatalf("want onepass regex, but %q is not onepass", r)
}
for _, size := range benchSizes {
@@ -692,18 +692,12 @@ func BenchmarkMatch_onepass_regex(b *testing.B) {
continue
}
t := makeText(size.n)
- bs := make([][]byte, len(t))
- for i, s := range t {
- bs[i] = []byte{s}
- }
b.Run(size.name, func(b *testing.B) {
b.SetBytes(int64(size.n))
b.ReportAllocs()
for i := 0; i < b.N; i++ {
- for _, byts := range bs {
- if !r.Match(byts) {
- b.Fatal("not match!")
- }
+ if !r.Match(t) {
+ b.Fatal("not match!")
}
}
})
diff --git a/libgo/go/regexp/onepass.go b/libgo/go/regexp/onepass.go
index 125be59..2f3ce6f 100644
--- a/libgo/go/regexp/onepass.go
+++ b/libgo/go/regexp/onepass.go
@@ -294,12 +294,12 @@ var anyRune = []rune{0, unicode.MaxRune}
// makeOnePass creates a onepass Prog, if possible. It is possible if at any alt,
// the match engine can always tell which branch to take. The routine may modify
// p if it is turned into a onepass Prog. If it isn't possible for this to be a
-// onepass Prog, the Prog notOnePass is returned. makeOnePass is recursive
+// onepass Prog, the Prog nil is returned. makeOnePass is recursive
// to the size of the Prog.
func makeOnePass(p *onePassProg) *onePassProg {
// If the machine is very long, it's not worth the time to check if we can use one pass.
if len(p.Inst) >= 1000 {
- return notOnePass
+ return nil
}
var (
@@ -446,11 +446,11 @@ func makeOnePass(p *onePassProg) *onePassProg {
visitQueue.clear()
pc := instQueue.next()
if !check(pc, m) {
- p = notOnePass
+ p = nil
break
}
}
- if p != notOnePass {
+ if p != nil {
for i := range p.Inst {
p.Inst[i].Rune = onePassRunes[i]
}
@@ -458,20 +458,18 @@ func makeOnePass(p *onePassProg) *onePassProg {
return p
}
-var notOnePass *onePassProg = nil
-
// compileOnePass returns a new *syntax.Prog suitable for onePass execution if the original Prog
-// can be recharacterized as a one-pass regexp program, or syntax.notOnePass if the
+// can be recharacterized as a one-pass regexp program, or syntax.nil if the
// Prog cannot be converted. For a one pass prog, the fundamental condition that must
// be true is: at any InstAlt, there must be no ambiguity about what branch to take.
func compileOnePass(prog *syntax.Prog) (p *onePassProg) {
if prog.Start == 0 {
- return notOnePass
+ return nil
}
// onepass regexp is anchored
if prog.Inst[prog.Start].Op != syntax.InstEmptyWidth ||
syntax.EmptyOp(prog.Inst[prog.Start].Arg)&syntax.EmptyBeginText != syntax.EmptyBeginText {
- return notOnePass
+ return nil
}
// every instruction leading to InstMatch must be EmptyEndText
for _, inst := range prog.Inst {
@@ -479,18 +477,18 @@ func compileOnePass(prog *syntax.Prog) (p *onePassProg) {
switch inst.Op {
default:
if opOut == syntax.InstMatch {
- return notOnePass
+ return nil
}
case syntax.InstAlt, syntax.InstAltMatch:
if opOut == syntax.InstMatch || prog.Inst[inst.Arg].Op == syntax.InstMatch {
- return notOnePass
+ return nil
}
case syntax.InstEmptyWidth:
if opOut == syntax.InstMatch {
if syntax.EmptyOp(inst.Arg)&syntax.EmptyEndText == syntax.EmptyEndText {
continue
}
- return notOnePass
+ return nil
}
}
}
@@ -501,7 +499,7 @@ func compileOnePass(prog *syntax.Prog) (p *onePassProg) {
// checkAmbiguity on InstAlts, build onepass Prog if possible
p = makeOnePass(p)
- if p != notOnePass {
+ if p != nil {
cleanupOnePass(p, prog)
}
return p
diff --git a/libgo/go/regexp/onepass_test.go b/libgo/go/regexp/onepass_test.go
index b1caa44..a0f2e39 100644
--- a/libgo/go/regexp/onepass_test.go
+++ b/libgo/go/regexp/onepass_test.go
@@ -134,47 +134,45 @@ func TestMergeRuneSet(t *testing.T) {
}
}
-var onePass = &onePassProg{}
-
var onePassTests = []struct {
- re string
- onePass *onePassProg
+ re string
+ isOnePass bool
}{
- {`^(?:a|(?:a*))$`, notOnePass},
- {`^(?:(a)|(?:a*))$`, notOnePass},
- {`^(?:(?:(?:.(?:$))?))$`, onePass},
- {`^abcd$`, onePass},
- {`^(?:(?:a{0,})*?)$`, onePass},
- {`^(?:(?:a+)*)$`, onePass},
- {`^(?:(?:a|(?:aa)))$`, onePass},
- {`^(?:[^\s\S])$`, onePass},
- {`^(?:(?:a{3,4}){0,})$`, notOnePass},
- {`^(?:(?:(?:a*)+))$`, onePass},
- {`^[a-c]+$`, onePass},
- {`^[a-c]*$`, onePass},
- {`^(?:a*)$`, onePass},
- {`^(?:(?:aa)|a)$`, onePass},
- {`^[a-c]*`, notOnePass},
- {`^...$`, onePass},
- {`^(?:a|(?:aa))$`, onePass},
- {`^a((b))c$`, onePass},
- {`^a.[l-nA-Cg-j]?e$`, onePass},
- {`^a((b))$`, onePass},
- {`^a(?:(b)|(c))c$`, onePass},
- {`^a(?:(b*)|(c))c$`, notOnePass},
- {`^a(?:b|c)$`, onePass},
- {`^a(?:b?|c)$`, onePass},
- {`^a(?:b?|c?)$`, notOnePass},
- {`^a(?:b?|c+)$`, onePass},
- {`^a(?:b+|(bc))d$`, notOnePass},
- {`^a(?:bc)+$`, onePass},
- {`^a(?:[bcd])+$`, onePass},
- {`^a((?:[bcd])+)$`, onePass},
- {`^a(:?b|c)*d$`, onePass},
- {`^.bc(d|e)*$`, onePass},
- {`^(?:(?:aa)|.)$`, notOnePass},
- {`^(?:(?:a{1,2}){1,2})$`, notOnePass},
- {`^l` + strings.Repeat("o", 2<<8) + `ng$`, onePass},
+ {`^(?:a|(?:a*))$`, false},
+ {`^(?:(a)|(?:a*))$`, false},
+ {`^(?:(?:(?:.(?:$))?))$`, true},
+ {`^abcd$`, true},
+ {`^(?:(?:a{0,})*?)$`, true},
+ {`^(?:(?:a+)*)$`, true},
+ {`^(?:(?:a|(?:aa)))$`, true},
+ {`^(?:[^\s\S])$`, true},
+ {`^(?:(?:a{3,4}){0,})$`, false},
+ {`^(?:(?:(?:a*)+))$`, true},
+ {`^[a-c]+$`, true},
+ {`^[a-c]*$`, true},
+ {`^(?:a*)$`, true},
+ {`^(?:(?:aa)|a)$`, true},
+ {`^[a-c]*`, false},
+ {`^...$`, true},
+ {`^(?:a|(?:aa))$`, true},
+ {`^a((b))c$`, true},
+ {`^a.[l-nA-Cg-j]?e$`, true},
+ {`^a((b))$`, true},
+ {`^a(?:(b)|(c))c$`, true},
+ {`^a(?:(b*)|(c))c$`, false},
+ {`^a(?:b|c)$`, true},
+ {`^a(?:b?|c)$`, true},
+ {`^a(?:b?|c?)$`, false},
+ {`^a(?:b?|c+)$`, true},
+ {`^a(?:b+|(bc))d$`, false},
+ {`^a(?:bc)+$`, true},
+ {`^a(?:[bcd])+$`, true},
+ {`^a((?:[bcd])+)$`, true},
+ {`^a(:?b|c)*d$`, true},
+ {`^.bc(d|e)*$`, true},
+ {`^(?:(?:aa)|.)$`, false},
+ {`^(?:(?:a{1,2}){1,2})$`, false},
+ {`^l` + strings.Repeat("o", 2<<8) + `ng$`, true},
}
func TestCompileOnePass(t *testing.T) {
@@ -194,9 +192,9 @@ func TestCompileOnePass(t *testing.T) {
t.Errorf("Compile(%q) got err:%s, want success", test.re, err)
continue
}
- onePass = compileOnePass(p)
- if (onePass == notOnePass) != (test.onePass == notOnePass) {
- t.Errorf("CompileOnePass(%q) got %v, expected %v", test.re, onePass, test.onePass)
+ isOnePass := compileOnePass(p) != nil
+ if isOnePass != test.isOnePass {
+ t.Errorf("CompileOnePass(%q) got isOnePass=%v, expected %v", test.re, isOnePass, test.isOnePass)
}
}
}
@@ -216,8 +214,8 @@ func TestRunOnePass(t *testing.T) {
t.Errorf("Compile(%q): got err: %s", test.re, err)
continue
}
- if re.onepass == notOnePass {
- t.Errorf("Compile(%q): got notOnePass, want one-pass", test.re)
+ if re.onepass == nil {
+ t.Errorf("Compile(%q): got nil, want one-pass", test.re)
continue
}
if !re.MatchString(test.match) {
@@ -227,21 +225,11 @@ func TestRunOnePass(t *testing.T) {
}
func BenchmarkCompileOnepass(b *testing.B) {
- for _, test := range onePassTests {
- if test.onePass == notOnePass {
- continue
- }
- name := test.re
- if len(name) > 20 {
- name = name[:20] + "..."
+ b.ReportAllocs()
+ const re = `^a.[l-nA-Cg-j]?e$`
+ for i := 0; i < b.N; i++ {
+ if _, err := Compile(re); err != nil {
+ b.Fatal(err)
}
- b.Run(name, func(b *testing.B) {
- b.ReportAllocs()
- for i := 0; i < b.N; i++ {
- if _, err := Compile(test.re); err != nil {
- b.Fatal(err)
- }
- }
- })
}
}
diff --git a/libgo/go/regexp/regexp.go b/libgo/go/regexp/regexp.go
index 61ed9c5..38b3c86 100644
--- a/libgo/go/regexp/regexp.go
+++ b/libgo/go/regexp/regexp.go
@@ -79,27 +79,24 @@ import (
// A Regexp is safe for concurrent use by multiple goroutines,
// except for configuration methods, such as Longest.
type Regexp struct {
- // read-only after Compile
- regexpRO
-
- // cache of machines for running regexp
- mu sync.Mutex
- machine []*machine
-}
-
-type regexpRO struct {
- expr string // as passed to Compile
- prog *syntax.Prog // compiled program
- onepass *onePassProg // onepass program or nil
+ expr string // as passed to Compile
+ prog *syntax.Prog // compiled program
+ onepass *onePassProg // onepass program or nil
+ numSubexp int
+ maxBitStateLen int
+ subexpNames []string
prefix string // required prefix in unanchored matches
prefixBytes []byte // prefix, as a []byte
- prefixComplete bool // prefix is the entire regexp
prefixRune rune // first rune in prefix
prefixEnd uint32 // pc for last rune in prefix
+ mpool int // pool for machines
+ matchcap int // size of recorded match lengths
+ prefixComplete bool // prefix is the entire regexp
cond syntax.EmptyOp // empty-width conditions required at start of match
- numSubexp int
- subexpNames []string
- longest bool
+
+ // This field can be modified by the Longest method,
+ // but it is otherwise read-only.
+ longest bool // whether regexp prefers leftmost-longest match
}
// String returns the source text used to compile the regular expression.
@@ -108,15 +105,16 @@ func (re *Regexp) String() string {
}
// Copy returns a new Regexp object copied from re.
+// Calling Longest on one copy does not affect another.
//
-// When using a Regexp in multiple goroutines, giving each goroutine
-// its own copy helps to avoid lock contention.
+// Deprecated: In earlier releases, when using a Regexp in multiple goroutines,
+// giving each goroutine its own copy helped to avoid lock contention.
+// As of Go 1.12, using Copy is no longer necessary to avoid lock contention.
+// Copy may still be appropriate if the reason for its use is to make
+// two copies with different Longest settings.
func (re *Regexp) Copy() *Regexp {
- // It is not safe to copy Regexp by value
- // since it contains a sync.Mutex.
- return &Regexp{
- regexpRO: re.regexpRO,
- }
+ re2 := *re
+ return &re2
}
// Compile parses a regular expression and returns, if successful,
@@ -179,19 +177,23 @@ func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) {
if err != nil {
return nil, err
}
+ matchcap := prog.NumCap
+ if matchcap < 2 {
+ matchcap = 2
+ }
regexp := &Regexp{
- regexpRO: regexpRO{
- expr: expr,
- prog: prog,
- onepass: compileOnePass(prog),
- numSubexp: maxCap,
- subexpNames: capNames,
- cond: prog.StartCond(),
- longest: longest,
- },
- }
- if regexp.onepass == notOnePass {
+ expr: expr,
+ prog: prog,
+ onepass: compileOnePass(prog),
+ numSubexp: maxCap,
+ subexpNames: capNames,
+ cond: prog.StartCond(),
+ longest: longest,
+ matchcap: matchcap,
+ }
+ if regexp.onepass == nil {
regexp.prefix, regexp.prefixComplete = prog.Prefix()
+ regexp.maxBitStateLen = maxBitStateLen(prog)
} else {
regexp.prefix, regexp.prefixComplete, regexp.prefixEnd = onePassPrefix(prog)
}
@@ -201,39 +203,64 @@ func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) {
regexp.prefixBytes = []byte(regexp.prefix)
regexp.prefixRune, _ = utf8.DecodeRuneInString(regexp.prefix)
}
+
+ n := len(prog.Inst)
+ i := 0
+ for matchSize[i] != 0 && matchSize[i] < n {
+ i++
+ }
+ regexp.mpool = i
+
return regexp, nil
}
+// Pools of *machine for use during (*Regexp).doExecute,
+// split up by the size of the execution queues.
+// matchPool[i] machines have queue size matchSize[i].
+// On a 64-bit system each queue entry is 16 bytes,
+// so matchPool[0] has 16*2*128 = 4kB queues, etc.
+// The final matchPool is a catch-all for very large queues.
+var (
+ matchSize = [...]int{128, 512, 2048, 16384, 0}
+ matchPool [len(matchSize)]sync.Pool
+)
+
// get returns a machine to use for matching re.
// It uses the re's machine cache if possible, to avoid
// unnecessary allocation.
func (re *Regexp) get() *machine {
- re.mu.Lock()
- if n := len(re.machine); n > 0 {
- z := re.machine[n-1]
- re.machine = re.machine[:n-1]
- re.mu.Unlock()
- return z
- }
- re.mu.Unlock()
- z := progMachine(re.prog, re.onepass)
- z.re = re
- return z
-}
-
-// put returns a machine to the re's machine cache.
-// There is no attempt to limit the size of the cache, so it will
-// grow to the maximum number of simultaneous matches
-// run using re. (The cache empties when re gets garbage collected.)
-func (re *Regexp) put(z *machine) {
- // Remove references to input data that we no longer need.
- z.inputBytes.str = nil
- z.inputString.str = ""
- z.inputReader.r = nil
-
- re.mu.Lock()
- re.machine = append(re.machine, z)
- re.mu.Unlock()
+ m, ok := matchPool[re.mpool].Get().(*machine)
+ if !ok {
+ m = new(machine)
+ }
+ m.re = re
+ m.p = re.prog
+ if cap(m.matchcap) < re.matchcap {
+ m.matchcap = make([]int, re.matchcap)
+ for _, t := range m.pool {
+ t.cap = make([]int, re.matchcap)
+ }
+ }
+
+ // Allocate queues if needed.
+ // Or reallocate, for "large" match pool.
+ n := matchSize[re.mpool]
+ if n == 0 { // large pool
+ n = len(re.prog.Inst)
+ }
+ if len(m.q0.sparse) < n {
+ m.q0 = queue{make([]uint32, n), make([]entry, 0, n)}
+ m.q1 = queue{make([]uint32, n), make([]entry, 0, n)}
+ }
+ return m
+}
+
+// put returns a machine to the correct machine pool.
+func (re *Regexp) put(m *machine) {
+ m.re = nil
+ m.p = nil
+ m.inputs.clear()
+ matchPool[re.mpool].Put(m)
}
// MustCompile is like Compile but panics if the expression cannot be parsed.
@@ -288,7 +315,7 @@ type input interface {
canCheckPrefix() bool // can we look ahead without losing info?
hasPrefix(re *Regexp) bool
index(re *Regexp, pos int) int
- context(pos int) syntax.EmptyOp
+ context(pos int) lazyFlag
}
// inputString scans a string.
@@ -319,7 +346,7 @@ func (i *inputString) index(re *Regexp, pos int) int {
return strings.Index(i.str[pos:], re.prefix)
}
-func (i *inputString) context(pos int) syntax.EmptyOp {
+func (i *inputString) context(pos int) lazyFlag {
r1, r2 := endOfText, endOfText
// 0 < pos && pos <= len(i.str)
if uint(pos-1) < uint(len(i.str)) {
@@ -335,7 +362,7 @@ func (i *inputString) context(pos int) syntax.EmptyOp {
r2, _ = utf8.DecodeRuneInString(i.str[pos:])
}
}
- return syntax.EmptyOpContext(r1, r2)
+ return newLazyFlag(r1, r2)
}
// inputBytes scans a byte slice.
@@ -366,7 +393,7 @@ func (i *inputBytes) index(re *Regexp, pos int) int {
return bytes.Index(i.str[pos:], re.prefixBytes)
}
-func (i *inputBytes) context(pos int) syntax.EmptyOp {
+func (i *inputBytes) context(pos int) lazyFlag {
r1, r2 := endOfText, endOfText
// 0 < pos && pos <= len(i.str)
if uint(pos-1) < uint(len(i.str)) {
@@ -382,7 +409,7 @@ func (i *inputBytes) context(pos int) syntax.EmptyOp {
r2, _ = utf8.DecodeRune(i.str[pos:])
}
}
- return syntax.EmptyOpContext(r1, r2)
+ return newLazyFlag(r1, r2)
}
// inputReader scans a RuneReader.
@@ -418,8 +445,8 @@ func (i *inputReader) index(re *Regexp, pos int) int {
return -1
}
-func (i *inputReader) context(pos int) syntax.EmptyOp {
- return 0
+func (i *inputReader) context(pos int) lazyFlag {
+ return 0 // not used
}
// LiteralPrefix returns a literal string that must begin any match
@@ -469,7 +496,7 @@ func MatchString(pattern string, s string) (matched bool, err error) {
return re.MatchString(s), nil
}
-// MatchString reports whether the byte slice b
+// Match reports whether the byte slice b
// contains any match of the regular expression pattern.
// More complicated queries need to use Compile and the full Regexp interface.
func Match(pattern string, b []byte) (matched bool, err error) {
diff --git a/libgo/go/regexp/syntax/prog.go b/libgo/go/regexp/syntax/prog.go
index 49a06bb..ae7a9a2 100644
--- a/libgo/go/regexp/syntax/prog.go
+++ b/libgo/go/regexp/syntax/prog.go
@@ -201,8 +201,12 @@ func (i *Inst) MatchRune(r rune) bool {
func (i *Inst) MatchRunePos(r rune) int {
rune := i.Rune
- // Special case: single-rune slice is from literal string, not char class.
- if len(rune) == 1 {
+ switch len(rune) {
+ case 0:
+ return noMatch
+
+ case 1:
+ // Special case: single-rune slice is from literal string, not char class.
r0 := rune[0]
if r == r0 {
return 0
@@ -215,17 +219,25 @@ func (i *Inst) MatchRunePos(r rune) int {
}
}
return noMatch
- }
- // Peek at the first few pairs.
- // Should handle ASCII well.
- for j := 0; j < len(rune) && j <= 8; j += 2 {
- if r < rune[j] {
- return noMatch
+ case 2:
+ if r >= rune[0] && r <= rune[1] {
+ return 0
}
- if r <= rune[j+1] {
- return j / 2
+ return noMatch
+
+ case 4, 6, 8:
+ // Linear search for a few pairs.
+ // Should handle ASCII well.
+ for j := 0; j < len(rune); j += 2 {
+ if r < rune[j] {
+ return noMatch
+ }
+ if r <= rune[j+1] {
+ return j / 2
+ }
}
+ return noMatch
}
// Otherwise binary search.
diff --git a/libgo/go/regexp/syntax/regexp.go b/libgo/go/regexp/syntax/regexp.go
index a3f56f8..ae5fa05 100644
--- a/libgo/go/regexp/syntax/regexp.go
+++ b/libgo/go/regexp/syntax/regexp.go
@@ -59,7 +59,7 @@ const (
const opPseudo Op = 128 // where pseudo-ops start
-// Equal returns true if x and y have identical structure.
+// Equal reports whether x and y have identical structure.
func (x *Regexp) Equal(y *Regexp) bool {
if x == nil || y == nil {
return x == y