diff options
author | Ian Lance Taylor <iant@golang.org> | 2019-01-18 19:04:36 +0000 |
---|---|---|
committer | Ian Lance Taylor <ian@gcc.gnu.org> | 2019-01-18 19:04:36 +0000 |
commit | 4f4a855d82a889cebcfca150a7a43909bcb6a346 (patch) | |
tree | f12bae0781920fa34669fe30b6f4615a86d9fb80 /libgo/go/regexp | |
parent | 225220d668dafb8262db7012bced688acbe63b33 (diff) | |
download | gcc-4f4a855d82a889cebcfca150a7a43909bcb6a346.zip gcc-4f4a855d82a889cebcfca150a7a43909bcb6a346.tar.gz gcc-4f4a855d82a889cebcfca150a7a43909bcb6a346.tar.bz2 |
libgo: update to Go1.12beta2
Reviewed-on: https://go-review.googlesource.com/c/158019
gotools/:
* Makefile.am (go_cmd_vet_files): Update for Go1.12beta2 release.
(GOTOOLS_TEST_TIMEOUT): Increase to 600.
(check-runtime): Export LD_LIBRARY_PATH before computing GOARCH
and GOOS.
(check-vet): Copy golang.org/x/tools into check-vet-dir.
* Makefile.in: Regenerate.
gcc/testsuite/:
* go.go-torture/execute/names-1.go: Stop using debug/xcoff, which
is no longer externally visible.
From-SVN: r268084
Diffstat (limited to 'libgo/go/regexp')
-rw-r--r-- | libgo/go/regexp/all_test.go | 27 | ||||
-rw-r--r-- | libgo/go/regexp/backtrack.go | 179 | ||||
-rw-r--r-- | libgo/go/regexp/exec.go | 312 | ||||
-rw-r--r-- | libgo/go/regexp/exec_test.go | 12 | ||||
-rw-r--r-- | libgo/go/regexp/onepass.go | 24 | ||||
-rw-r--r-- | libgo/go/regexp/onepass_test.go | 106 | ||||
-rw-r--r-- | libgo/go/regexp/regexp.go | 163 | ||||
-rw-r--r-- | libgo/go/regexp/syntax/prog.go | 32 | ||||
-rw-r--r-- | libgo/go/regexp/syntax/regexp.go | 2 |
9 files changed, 501 insertions, 356 deletions
diff --git a/libgo/go/regexp/all_test.go b/libgo/go/regexp/all_test.go index 0fabeae..623f82d 100644 --- a/libgo/go/regexp/all_test.go +++ b/libgo/go/regexp/all_test.go @@ -550,8 +550,8 @@ func TestOnePassCutoff(t *testing.T) { if err != nil { t.Fatalf("compile: %v", err) } - if compileOnePass(p) != notOnePass { - t.Fatalf("makeOnePass succeeded; wanted notOnePass") + if compileOnePass(p) != nil { + t.Fatalf("makeOnePass succeeded; wanted nil") } } @@ -859,3 +859,26 @@ func BenchmarkQuoteMetaNone(b *testing.B) { sink = QuoteMeta(s) } } + +func TestDeepEqual(t *testing.T) { + re1 := MustCompile("a.*b.*c.*d") + re2 := MustCompile("a.*b.*c.*d") + if !reflect.DeepEqual(re1, re2) { // has always been true, since Go 1. + t.Errorf("DeepEqual(re1, re2) = false, want true") + } + + re1.MatchString("abcdefghijklmn") + if !reflect.DeepEqual(re1, re2) { + t.Errorf("DeepEqual(re1, re2) = false, want true") + } + + re2.MatchString("abcdefghijklmn") + if !reflect.DeepEqual(re1, re2) { + t.Errorf("DeepEqual(re1, re2) = false, want true") + } + + re2.MatchString(strings.Repeat("abcdefghijklmn", 100)) + if !reflect.DeepEqual(re1, re2) { + t.Errorf("DeepEqual(re1, re2) = false, want true") + } +} diff --git a/libgo/go/regexp/backtrack.go b/libgo/go/regexp/backtrack.go index 440bf7f..9fb7d1e 100644 --- a/libgo/go/regexp/backtrack.go +++ b/libgo/go/regexp/backtrack.go @@ -14,7 +14,10 @@ package regexp -import "regexp/syntax" +import ( + "regexp/syntax" + "sync" +) // A job is an entry on the backtracker's job stack. It holds // the instruction pc and the position in the input. @@ -32,15 +35,29 @@ const ( // bitState holds state for the backtracker. type bitState struct { - prog *syntax.Prog + end int + cap []int + matchcap []int + jobs []job + visited []uint32 + + inputs inputs +} + +var bitStatePool sync.Pool - end int - cap []int - jobs []job - visited []uint32 +func newBitState() *bitState { + b, ok := bitStatePool.Get().(*bitState) + if !ok { + b = new(bitState) + } + return b } -var notBacktrack *bitState = nil +func freeBitState(b *bitState) { + b.inputs.clear() + bitStatePool.Put(b) +} // maxBitStateLen returns the maximum length of a string to search with // the backtracker using prog. @@ -51,18 +68,6 @@ func maxBitStateLen(prog *syntax.Prog) int { return maxBacktrackVector / len(prog.Inst) } -// newBitState returns a new bitState for the given prog, -// or notBacktrack if the size of the prog exceeds the maximum size that -// the backtracker will be run for. -func newBitState(prog *syntax.Prog) *bitState { - if !shouldBacktrack(prog) { - return notBacktrack - } - return &bitState{ - prog: prog, - } -} - // shouldBacktrack reports whether the program is too // long for the backtracker to run. func shouldBacktrack(prog *syntax.Prog) bool { @@ -72,7 +77,7 @@ func shouldBacktrack(prog *syntax.Prog) bool { // reset resets the state of the backtracker. // end is the end position in the input. // ncap is the number of captures. -func (b *bitState) reset(end int, ncap int) { +func (b *bitState) reset(prog *syntax.Prog, end int, ncap int) { b.end = end if cap(b.jobs) == 0 { @@ -81,7 +86,7 @@ func (b *bitState) reset(end int, ncap int) { b.jobs = b.jobs[:0] } - visitedSize := (len(b.prog.Inst)*(end+1) + visitedBits - 1) / visitedBits + visitedSize := (len(prog.Inst)*(end+1) + visitedBits - 1) / visitedBits if cap(b.visited) < visitedSize { b.visited = make([]uint32, visitedSize, maxBacktrackVector/visitedBits) } else { @@ -99,6 +104,15 @@ func (b *bitState) reset(end int, ncap int) { for i := range b.cap { b.cap[i] = -1 } + + if cap(b.matchcap) < ncap { + b.matchcap = make([]int, ncap) + } else { + b.matchcap = b.matchcap[:ncap] + } + for i := range b.matchcap { + b.matchcap[i] = -1 + } } // shouldVisit reports whether the combination of (pc, pos) has not @@ -114,20 +128,19 @@ func (b *bitState) shouldVisit(pc uint32, pos int) bool { // push pushes (pc, pos, arg) onto the job stack if it should be // visited. -func (b *bitState) push(pc uint32, pos int, arg bool) { +func (b *bitState) push(re *Regexp, pc uint32, pos int, arg bool) { // Only check shouldVisit when arg is false. // When arg is true, we are continuing a previous visit. - if b.prog.Inst[pc].Op != syntax.InstFail && (arg || b.shouldVisit(pc, pos)) { + if re.prog.Inst[pc].Op != syntax.InstFail && (arg || b.shouldVisit(pc, pos)) { b.jobs = append(b.jobs, job{pc: pc, arg: arg, pos: pos}) } } // tryBacktrack runs a backtracking search starting at pos. -func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool { - longest := m.re.longest - m.matched = false +func (re *Regexp) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool { + longest := re.longest - b.push(pc, pos, false) + b.push(re, pc, pos, false) for len(b.jobs) > 0 { l := len(b.jobs) - 1 // Pop job off the stack. @@ -150,7 +163,7 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool { } Skip: - inst := b.prog.Inst[pc] + inst := re.prog.Inst[pc] switch inst.Op { default: @@ -172,23 +185,23 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool { pc = inst.Arg goto CheckAndLoop } else { - b.push(pc, pos, true) + b.push(re, pc, pos, true) pc = inst.Out goto CheckAndLoop } case syntax.InstAltMatch: // One opcode consumes runes; the other leads to match. - switch b.prog.Inst[inst.Out].Op { + switch re.prog.Inst[inst.Out].Op { case syntax.InstRune, syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL: // inst.Arg is the match. - b.push(inst.Arg, pos, false) + b.push(re, inst.Arg, pos, false) pc = inst.Arg pos = b.end goto CheckAndLoop } // inst.Out is the match - non-greedy - b.push(inst.Out, b.end, false) + b.push(re, inst.Out, b.end, false) pc = inst.Out goto CheckAndLoop @@ -236,7 +249,7 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool { } else { if 0 <= inst.Arg && inst.Arg < uint32(len(b.cap)) { // Capture pos to register, but save old value. - b.push(pc, b.cap[inst.Arg], true) // come back when we're done. + b.push(re, pc, b.cap[inst.Arg], true) // come back when we're done. b.cap[inst.Arg] = pos } pc = inst.Out @@ -244,7 +257,8 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool { } case syntax.InstEmptyWidth: - if syntax.EmptyOp(inst.Arg)&^i.context(pos) != 0 { + flag := i.context(pos) + if !flag.match(syntax.EmptyOp(inst.Arg)) { continue } pc = inst.Out @@ -258,8 +272,7 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool { // We found a match. If the caller doesn't care // where the match is, no point going further. if len(b.cap) == 0 { - m.matched = true - return m.matched + return true } // Record best match so far. @@ -268,19 +281,18 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool { if len(b.cap) > 1 { b.cap[1] = pos } - if !m.matched || (longest && pos > 0 && pos > m.matchcap[1]) { - copy(m.matchcap, b.cap) + if old := b.matchcap[1]; old == -1 || (longest && pos > 0 && pos > old) { + copy(b.matchcap, b.cap) } - m.matched = true // If going for first match, we're done. if !longest { - return m.matched + return true } // If we used the entire text, no longer match is possible. if pos == b.end { - return m.matched + return true } // Otherwise, continue on in hope of a longer match. @@ -288,65 +300,68 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool { } } - return m.matched + return longest && len(b.matchcap) > 1 && b.matchcap[1] >= 0 } // backtrack runs a backtracking search of prog on the input starting at pos. -func (m *machine) backtrack(i input, pos int, end int, ncap int) bool { - if !i.canCheckPrefix() { - panic("backtrack called for a RuneReader") - } - - startCond := m.re.cond +func (re *Regexp) backtrack(ib []byte, is string, pos int, ncap int, dstCap []int) []int { + startCond := re.cond if startCond == ^syntax.EmptyOp(0) { // impossible - return false + return nil } if startCond&syntax.EmptyBeginText != 0 && pos != 0 { // Anchored match, past beginning of text. - return false + return nil } - b := m.b - b.reset(end, ncap) - - m.matchcap = m.matchcap[:ncap] - for i := range m.matchcap { - m.matchcap[i] = -1 - } + b := newBitState() + i, end := b.inputs.init(nil, ib, is) + b.reset(re.prog, end, ncap) // Anchored search must start at the beginning of the input if startCond&syntax.EmptyBeginText != 0 { if len(b.cap) > 0 { b.cap[0] = pos } - return m.tryBacktrack(b, i, uint32(m.p.Start), pos) - } + if !re.tryBacktrack(b, i, uint32(re.prog.Start), pos) { + freeBitState(b) + return nil + } + } else { - // Unanchored search, starting from each possible text position. - // Notice that we have to try the empty string at the end of - // the text, so the loop condition is pos <= end, not pos < end. - // This looks like it's quadratic in the size of the text, - // but we are not clearing visited between calls to TrySearch, - // so no work is duplicated and it ends up still being linear. - width := -1 - for ; pos <= end && width != 0; pos += width { - if len(m.re.prefix) > 0 { - // Match requires literal prefix; fast search for it. - advance := i.index(m.re, pos) - if advance < 0 { - return false + // Unanchored search, starting from each possible text position. + // Notice that we have to try the empty string at the end of + // the text, so the loop condition is pos <= end, not pos < end. + // This looks like it's quadratic in the size of the text, + // but we are not clearing visited between calls to TrySearch, + // so no work is duplicated and it ends up still being linear. + width := -1 + for ; pos <= end && width != 0; pos += width { + if len(re.prefix) > 0 { + // Match requires literal prefix; fast search for it. + advance := i.index(re, pos) + if advance < 0 { + freeBitState(b) + return nil + } + pos += advance } - pos += advance - } - if len(b.cap) > 0 { - b.cap[0] = pos - } - if m.tryBacktrack(b, i, uint32(m.p.Start), pos) { - // Match must be leftmost; done. - return true + if len(b.cap) > 0 { + b.cap[0] = pos + } + if re.tryBacktrack(b, i, uint32(re.prog.Start), pos) { + // Match must be leftmost; done. + goto Match + } + _, width = i.step(pos) } - _, width = i.step(pos) + freeBitState(b) + return nil } - return false + +Match: + dstCap = append(dstCap, b.matchcap...) + freeBitState(b) + return dstCap } diff --git a/libgo/go/regexp/exec.go b/libgo/go/regexp/exec.go index 1c7b02d..efe764e 100644 --- a/libgo/go/regexp/exec.go +++ b/libgo/go/regexp/exec.go @@ -7,6 +7,7 @@ package regexp import ( "io" "regexp/syntax" + "sync" ) // A queue is a 'sparse array' holding pending threads of execution. @@ -35,54 +36,60 @@ type thread struct { // A machine holds all the state during an NFA simulation for p. type machine struct { - re *Regexp // corresponding Regexp - p *syntax.Prog // compiled program - op *onePassProg // compiled onepass program, or notOnePass - maxBitStateLen int // max length of string to search with bitstate - b *bitState // state for backtracker, allocated lazily - q0, q1 queue // two queues for runq, nextq - pool []*thread // pool of available threads - matched bool // whether a match was found - matchcap []int // capture information for the match + re *Regexp // corresponding Regexp + p *syntax.Prog // compiled program + q0, q1 queue // two queues for runq, nextq + pool []*thread // pool of available threads + matched bool // whether a match was found + matchcap []int // capture information for the match + inputs inputs +} + +type inputs struct { // cached inputs, to avoid allocation - inputBytes inputBytes - inputString inputString - inputReader inputReader + bytes inputBytes + string inputString + reader inputReader } -func (m *machine) newInputBytes(b []byte) input { - m.inputBytes.str = b - return &m.inputBytes +func (i *inputs) newBytes(b []byte) input { + i.bytes.str = b + return &i.bytes } -func (m *machine) newInputString(s string) input { - m.inputString.str = s - return &m.inputString +func (i *inputs) newString(s string) input { + i.string.str = s + return &i.string } -func (m *machine) newInputReader(r io.RuneReader) input { - m.inputReader.r = r - m.inputReader.atEOT = false - m.inputReader.pos = 0 - return &m.inputReader +func (i *inputs) newReader(r io.RuneReader) input { + i.reader.r = r + i.reader.atEOT = false + i.reader.pos = 0 + return &i.reader +} + +func (i *inputs) clear() { + // We need to clear 1 of these. + // Avoid the expense of clearing the others (pointer write barrier). + if i.bytes.str != nil { + i.bytes.str = nil + } else if i.reader.r != nil { + i.reader.r = nil + } else { + i.string.str = "" + } } -// progMachine returns a new machine running the prog p. -func progMachine(p *syntax.Prog, op *onePassProg) *machine { - m := &machine{p: p, op: op} - n := len(m.p.Inst) - m.q0 = queue{make([]uint32, n), make([]entry, 0, n)} - m.q1 = queue{make([]uint32, n), make([]entry, 0, n)} - ncap := p.NumCap - if ncap < 2 { - ncap = 2 +func (i *inputs) init(r io.RuneReader, b []byte, s string) (input, int) { + if r != nil { + return i.newReader(r), 0 } - if op == notOnePass { - m.maxBitStateLen = maxBitStateLen(p) + if b != nil { + return i.newBytes(b), len(b) } - m.matchcap = make([]int, ncap) - return m + return i.newString(s), len(s) } func (m *machine) init(ncap int) { @@ -107,6 +114,61 @@ func (m *machine) alloc(i *syntax.Inst) *thread { return t } +// A lazyFlag is a lazily-evaluated syntax.EmptyOp, +// for checking zero-width flags like ^ $ \A \z \B \b. +// It records the pair of relevant runes and does not +// determine the implied flags until absolutely necessary +// (most of the time, that means never). +type lazyFlag uint64 + +func newLazyFlag(r1, r2 rune) lazyFlag { + return lazyFlag(uint64(r1)<<32 | uint64(uint32(r2))) +} + +func (f lazyFlag) match(op syntax.EmptyOp) bool { + if op == 0 { + return true + } + r1 := rune(f >> 32) + if op&syntax.EmptyBeginLine != 0 { + if r1 != '\n' && r1 >= 0 { + return false + } + op &^= syntax.EmptyBeginLine + } + if op&syntax.EmptyBeginText != 0 { + if r1 >= 0 { + return false + } + op &^= syntax.EmptyBeginText + } + if op == 0 { + return true + } + r2 := rune(f) + if op&syntax.EmptyEndLine != 0 { + if r2 != '\n' && r2 >= 0 { + return false + } + op &^= syntax.EmptyEndLine + } + if op&syntax.EmptyEndText != 0 { + if r2 >= 0 { + return false + } + op &^= syntax.EmptyEndText + } + if op == 0 { + return true + } + if syntax.IsWordChar(r1) != syntax.IsWordChar(r2) { + op &^= syntax.EmptyWordBoundary + } else { + op &^= syntax.EmptyNoWordBoundary + } + return op == 0 +} + // match runs the machine over the input starting at pos. // It reports whether a match was found. // If so, m.matchcap holds the submatch information. @@ -126,9 +188,9 @@ func (m *machine) match(i input, pos int) bool { if r != endOfText { r1, width1 = i.step(pos + width) } - var flag syntax.EmptyOp + var flag lazyFlag if pos == 0 { - flag = syntax.EmptyOpContext(-1, r) + flag = newLazyFlag(-1, r) } else { flag = i.context(pos) } @@ -157,10 +219,10 @@ func (m *machine) match(i input, pos int) bool { if len(m.matchcap) > 0 { m.matchcap[0] = pos } - m.add(runq, uint32(m.p.Start), pos, m.matchcap, flag, nil) + m.add(runq, uint32(m.p.Start), pos, m.matchcap, &flag, nil) } - flag = syntax.EmptyOpContext(r, r1) - m.step(runq, nextq, pos, pos+width, r, flag) + flag = newLazyFlag(r, r1) + m.step(runq, nextq, pos, pos+width, r, &flag) if width == 0 { break } @@ -195,7 +257,7 @@ func (m *machine) clear(q *queue) { // The step processes the rune c (which may be endOfText), // which starts at position pos and ends at nextPos. // nextCond gives the setting for the empty-width flags after c. -func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond syntax.EmptyOp) { +func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond *lazyFlag) { longest := m.re.longest for j := 0; j < len(runq.dense); j++ { d := &runq.dense[j] @@ -252,7 +314,8 @@ func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond sy // It also recursively adds an entry for all instructions reachable from pc by following // empty-width conditions satisfied by cond. pos gives the current position // in the input. -func (m *machine) add(q *queue, pc uint32, pos int, cap []int, cond syntax.EmptyOp, t *thread) *thread { +func (m *machine) add(q *queue, pc uint32, pos int, cap []int, cond *lazyFlag, t *thread) *thread { +Again: if pc == 0 { return t } @@ -275,13 +338,16 @@ func (m *machine) add(q *queue, pc uint32, pos int, cap []int, cond syntax.Empty // nothing case syntax.InstAlt, syntax.InstAltMatch: t = m.add(q, i.Out, pos, cap, cond, t) - t = m.add(q, i.Arg, pos, cap, cond, t) + pc = i.Arg + goto Again case syntax.InstEmptyWidth: - if syntax.EmptyOp(i.Arg)&^cond == 0 { - t = m.add(q, i.Out, pos, cap, cond, t) + if cond.match(syntax.EmptyOp(i.Arg)) { + pc = i.Out + goto Again } case syntax.InstNop: - t = m.add(q, i.Out, pos, cap, cond, t) + pc = i.Out + goto Again case syntax.InstCapture: if int(i.Arg) < len(cap) { opos := cap[i.Arg] @@ -289,7 +355,8 @@ func (m *machine) add(q *queue, pc uint32, pos int, cap []int, cond syntax.Empty m.add(q, i.Out, pos, cap, cond, nil) cap[i.Arg] = opos } else { - t = m.add(q, i.Out, pos, cap, cond, t) + pc = i.Out + goto Again } case syntax.InstMatch, syntax.InstRune, syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL: if t == nil { @@ -306,85 +373,112 @@ func (m *machine) add(q *queue, pc uint32, pos int, cap []int, cond syntax.Empty return t } -// onepass runs the machine over the input starting at pos. -// It reports whether a match was found. -// If so, m.matchcap holds the submatch information. -// ncap is the number of captures. -func (m *machine) onepass(i input, pos, ncap int) bool { - startCond := m.re.cond +type onePassMachine struct { + inputs inputs + matchcap []int +} + +var onePassPool sync.Pool + +func newOnePassMachine() *onePassMachine { + m, ok := onePassPool.Get().(*onePassMachine) + if !ok { + m = new(onePassMachine) + } + return m +} + +func freeOnePassMachine(m *onePassMachine) { + m.inputs.clear() + onePassPool.Put(m) +} + +// doOnePass implements r.doExecute using the one-pass execution engine. +func (re *Regexp) doOnePass(ir io.RuneReader, ib []byte, is string, pos, ncap int, dstCap []int) []int { + startCond := re.cond if startCond == ^syntax.EmptyOp(0) { // impossible - return false + return nil } - m.matched = false - m.matchcap = m.matchcap[:ncap] + + m := newOnePassMachine() + if cap(m.matchcap) < ncap { + m.matchcap = make([]int, ncap) + } else { + m.matchcap = m.matchcap[:ncap] + } + + matched := false for i := range m.matchcap { m.matchcap[i] = -1 } + + i, _ := m.inputs.init(ir, ib, is) + r, r1 := endOfText, endOfText width, width1 := 0, 0 r, width = i.step(pos) if r != endOfText { r1, width1 = i.step(pos + width) } - var flag syntax.EmptyOp + var flag lazyFlag if pos == 0 { - flag = syntax.EmptyOpContext(-1, r) + flag = newLazyFlag(-1, r) } else { flag = i.context(pos) } - pc := m.op.Start - inst := m.op.Inst[pc] + pc := re.onepass.Start + inst := re.onepass.Inst[pc] // If there is a simple literal prefix, skip over it. - if pos == 0 && syntax.EmptyOp(inst.Arg)&^flag == 0 && - len(m.re.prefix) > 0 && i.canCheckPrefix() { + if pos == 0 && flag.match(syntax.EmptyOp(inst.Arg)) && + len(re.prefix) > 0 && i.canCheckPrefix() { // Match requires literal prefix; fast search for it. - if !i.hasPrefix(m.re) { - return m.matched + if !i.hasPrefix(re) { + goto Return } - pos += len(m.re.prefix) + pos += len(re.prefix) r, width = i.step(pos) r1, width1 = i.step(pos + width) flag = i.context(pos) - pc = int(m.re.prefixEnd) + pc = int(re.prefixEnd) } for { - inst = m.op.Inst[pc] + inst = re.onepass.Inst[pc] pc = int(inst.Out) switch inst.Op { default: panic("bad inst") case syntax.InstMatch: - m.matched = true + matched = true if len(m.matchcap) > 0 { m.matchcap[0] = 0 m.matchcap[1] = pos } - return m.matched + goto Return case syntax.InstRune: if !inst.MatchRune(r) { - return m.matched + goto Return } case syntax.InstRune1: if r != inst.Rune[0] { - return m.matched + goto Return } case syntax.InstRuneAny: // Nothing case syntax.InstRuneAnyNotNL: if r == '\n' { - return m.matched + goto Return } // peek at the input rune to see which branch of the Alt to take case syntax.InstAlt, syntax.InstAltMatch: pc = int(onePassNext(&inst, r)) continue case syntax.InstFail: - return m.matched + goto Return case syntax.InstNop: continue case syntax.InstEmptyWidth: - if syntax.EmptyOp(inst.Arg)&^flag != 0 { - return m.matched + if !flag.match(syntax.EmptyOp(inst.Arg)) { + goto Return } continue case syntax.InstCapture: @@ -396,14 +490,23 @@ func (m *machine) onepass(i input, pos, ncap int) bool { if width == 0 { break } - flag = syntax.EmptyOpContext(r, r1) + flag = newLazyFlag(r, r1) pos += width r, width = r1, width1 if r != endOfText { r1, width1 = i.step(pos + width) } } - return m.matched + +Return: + if !matched { + freeOnePassMachine(m) + return nil + } + + dstCap = append(dstCap, m.matchcap...) + freeOnePassMachine(m) + return dstCap } // doMatch reports whether either r, b or s match the regexp. @@ -416,43 +519,28 @@ func (re *Regexp) doMatch(r io.RuneReader, b []byte, s string) bool { // // nil is returned if no matches are found and non-nil if matches are found. func (re *Regexp) doExecute(r io.RuneReader, b []byte, s string, pos int, ncap int, dstCap []int) []int { - m := re.get() - var i input - var size int - if r != nil { - i = m.newInputReader(r) - } else if b != nil { - i = m.newInputBytes(b) - size = len(b) - } else { - i = m.newInputString(s) - size = len(s) + if dstCap == nil { + // Make sure 'return dstCap' is non-nil. + dstCap = arrayNoInts[:0:0] } - if m.op != notOnePass { - if !m.onepass(i, pos, ncap) { - re.put(m) - return nil - } - } else if size < m.maxBitStateLen && r == nil { - if m.b == nil { - m.b = newBitState(m.p) - } - if !m.backtrack(i, pos, size, ncap) { - re.put(m) - return nil - } - } else { - m.init(ncap) - if !m.match(i, pos) { - re.put(m) - return nil - } + + if re.onepass != nil { + return re.doOnePass(r, b, s, pos, ncap, dstCap) } - dstCap = append(dstCap, m.matchcap...) - if dstCap == nil { - // Keep the promise of returning non-nil value on match. - dstCap = arrayNoInts[:0] + if r == nil && len(b)+len(s) < re.maxBitStateLen { + return re.backtrack(b, s, pos, ncap, dstCap) } + + m := re.get() + i, _ := m.inputs.init(r, b, s) + + m.init(ncap) + if !m.match(i, pos) { + re.put(m) + return nil + } + + dstCap = append(dstCap, m.matchcap...) re.put(m) return dstCap } diff --git a/libgo/go/regexp/exec_test.go b/libgo/go/regexp/exec_test.go index 5f8e747..1489219 100644 --- a/libgo/go/regexp/exec_test.go +++ b/libgo/go/regexp/exec_test.go @@ -684,7 +684,7 @@ func BenchmarkMatch(b *testing.B) { func BenchmarkMatch_onepass_regex(b *testing.B) { isRaceBuilder := strings.HasSuffix(testenv.Builder(), "-race") r := MustCompile(`(?s)\A.*\z`) - if r.get().op == notOnePass { + if r.onepass == nil { b.Fatalf("want onepass regex, but %q is not onepass", r) } for _, size := range benchSizes { @@ -692,18 +692,12 @@ func BenchmarkMatch_onepass_regex(b *testing.B) { continue } t := makeText(size.n) - bs := make([][]byte, len(t)) - for i, s := range t { - bs[i] = []byte{s} - } b.Run(size.name, func(b *testing.B) { b.SetBytes(int64(size.n)) b.ReportAllocs() for i := 0; i < b.N; i++ { - for _, byts := range bs { - if !r.Match(byts) { - b.Fatal("not match!") - } + if !r.Match(t) { + b.Fatal("not match!") } } }) diff --git a/libgo/go/regexp/onepass.go b/libgo/go/regexp/onepass.go index 125be59..2f3ce6f 100644 --- a/libgo/go/regexp/onepass.go +++ b/libgo/go/regexp/onepass.go @@ -294,12 +294,12 @@ var anyRune = []rune{0, unicode.MaxRune} // makeOnePass creates a onepass Prog, if possible. It is possible if at any alt, // the match engine can always tell which branch to take. The routine may modify // p if it is turned into a onepass Prog. If it isn't possible for this to be a -// onepass Prog, the Prog notOnePass is returned. makeOnePass is recursive +// onepass Prog, the Prog nil is returned. makeOnePass is recursive // to the size of the Prog. func makeOnePass(p *onePassProg) *onePassProg { // If the machine is very long, it's not worth the time to check if we can use one pass. if len(p.Inst) >= 1000 { - return notOnePass + return nil } var ( @@ -446,11 +446,11 @@ func makeOnePass(p *onePassProg) *onePassProg { visitQueue.clear() pc := instQueue.next() if !check(pc, m) { - p = notOnePass + p = nil break } } - if p != notOnePass { + if p != nil { for i := range p.Inst { p.Inst[i].Rune = onePassRunes[i] } @@ -458,20 +458,18 @@ func makeOnePass(p *onePassProg) *onePassProg { return p } -var notOnePass *onePassProg = nil - // compileOnePass returns a new *syntax.Prog suitable for onePass execution if the original Prog -// can be recharacterized as a one-pass regexp program, or syntax.notOnePass if the +// can be recharacterized as a one-pass regexp program, or syntax.nil if the // Prog cannot be converted. For a one pass prog, the fundamental condition that must // be true is: at any InstAlt, there must be no ambiguity about what branch to take. func compileOnePass(prog *syntax.Prog) (p *onePassProg) { if prog.Start == 0 { - return notOnePass + return nil } // onepass regexp is anchored if prog.Inst[prog.Start].Op != syntax.InstEmptyWidth || syntax.EmptyOp(prog.Inst[prog.Start].Arg)&syntax.EmptyBeginText != syntax.EmptyBeginText { - return notOnePass + return nil } // every instruction leading to InstMatch must be EmptyEndText for _, inst := range prog.Inst { @@ -479,18 +477,18 @@ func compileOnePass(prog *syntax.Prog) (p *onePassProg) { switch inst.Op { default: if opOut == syntax.InstMatch { - return notOnePass + return nil } case syntax.InstAlt, syntax.InstAltMatch: if opOut == syntax.InstMatch || prog.Inst[inst.Arg].Op == syntax.InstMatch { - return notOnePass + return nil } case syntax.InstEmptyWidth: if opOut == syntax.InstMatch { if syntax.EmptyOp(inst.Arg)&syntax.EmptyEndText == syntax.EmptyEndText { continue } - return notOnePass + return nil } } } @@ -501,7 +499,7 @@ func compileOnePass(prog *syntax.Prog) (p *onePassProg) { // checkAmbiguity on InstAlts, build onepass Prog if possible p = makeOnePass(p) - if p != notOnePass { + if p != nil { cleanupOnePass(p, prog) } return p diff --git a/libgo/go/regexp/onepass_test.go b/libgo/go/regexp/onepass_test.go index b1caa44..a0f2e39 100644 --- a/libgo/go/regexp/onepass_test.go +++ b/libgo/go/regexp/onepass_test.go @@ -134,47 +134,45 @@ func TestMergeRuneSet(t *testing.T) { } } -var onePass = &onePassProg{} - var onePassTests = []struct { - re string - onePass *onePassProg + re string + isOnePass bool }{ - {`^(?:a|(?:a*))$`, notOnePass}, - {`^(?:(a)|(?:a*))$`, notOnePass}, - {`^(?:(?:(?:.(?:$))?))$`, onePass}, - {`^abcd$`, onePass}, - {`^(?:(?:a{0,})*?)$`, onePass}, - {`^(?:(?:a+)*)$`, onePass}, - {`^(?:(?:a|(?:aa)))$`, onePass}, - {`^(?:[^\s\S])$`, onePass}, - {`^(?:(?:a{3,4}){0,})$`, notOnePass}, - {`^(?:(?:(?:a*)+))$`, onePass}, - {`^[a-c]+$`, onePass}, - {`^[a-c]*$`, onePass}, - {`^(?:a*)$`, onePass}, - {`^(?:(?:aa)|a)$`, onePass}, - {`^[a-c]*`, notOnePass}, - {`^...$`, onePass}, - {`^(?:a|(?:aa))$`, onePass}, - {`^a((b))c$`, onePass}, - {`^a.[l-nA-Cg-j]?e$`, onePass}, - {`^a((b))$`, onePass}, - {`^a(?:(b)|(c))c$`, onePass}, - {`^a(?:(b*)|(c))c$`, notOnePass}, - {`^a(?:b|c)$`, onePass}, - {`^a(?:b?|c)$`, onePass}, - {`^a(?:b?|c?)$`, notOnePass}, - {`^a(?:b?|c+)$`, onePass}, - {`^a(?:b+|(bc))d$`, notOnePass}, - {`^a(?:bc)+$`, onePass}, - {`^a(?:[bcd])+$`, onePass}, - {`^a((?:[bcd])+)$`, onePass}, - {`^a(:?b|c)*d$`, onePass}, - {`^.bc(d|e)*$`, onePass}, - {`^(?:(?:aa)|.)$`, notOnePass}, - {`^(?:(?:a{1,2}){1,2})$`, notOnePass}, - {`^l` + strings.Repeat("o", 2<<8) + `ng$`, onePass}, + {`^(?:a|(?:a*))$`, false}, + {`^(?:(a)|(?:a*))$`, false}, + {`^(?:(?:(?:.(?:$))?))$`, true}, + {`^abcd$`, true}, + {`^(?:(?:a{0,})*?)$`, true}, + {`^(?:(?:a+)*)$`, true}, + {`^(?:(?:a|(?:aa)))$`, true}, + {`^(?:[^\s\S])$`, true}, + {`^(?:(?:a{3,4}){0,})$`, false}, + {`^(?:(?:(?:a*)+))$`, true}, + {`^[a-c]+$`, true}, + {`^[a-c]*$`, true}, + {`^(?:a*)$`, true}, + {`^(?:(?:aa)|a)$`, true}, + {`^[a-c]*`, false}, + {`^...$`, true}, + {`^(?:a|(?:aa))$`, true}, + {`^a((b))c$`, true}, + {`^a.[l-nA-Cg-j]?e$`, true}, + {`^a((b))$`, true}, + {`^a(?:(b)|(c))c$`, true}, + {`^a(?:(b*)|(c))c$`, false}, + {`^a(?:b|c)$`, true}, + {`^a(?:b?|c)$`, true}, + {`^a(?:b?|c?)$`, false}, + {`^a(?:b?|c+)$`, true}, + {`^a(?:b+|(bc))d$`, false}, + {`^a(?:bc)+$`, true}, + {`^a(?:[bcd])+$`, true}, + {`^a((?:[bcd])+)$`, true}, + {`^a(:?b|c)*d$`, true}, + {`^.bc(d|e)*$`, true}, + {`^(?:(?:aa)|.)$`, false}, + {`^(?:(?:a{1,2}){1,2})$`, false}, + {`^l` + strings.Repeat("o", 2<<8) + `ng$`, true}, } func TestCompileOnePass(t *testing.T) { @@ -194,9 +192,9 @@ func TestCompileOnePass(t *testing.T) { t.Errorf("Compile(%q) got err:%s, want success", test.re, err) continue } - onePass = compileOnePass(p) - if (onePass == notOnePass) != (test.onePass == notOnePass) { - t.Errorf("CompileOnePass(%q) got %v, expected %v", test.re, onePass, test.onePass) + isOnePass := compileOnePass(p) != nil + if isOnePass != test.isOnePass { + t.Errorf("CompileOnePass(%q) got isOnePass=%v, expected %v", test.re, isOnePass, test.isOnePass) } } } @@ -216,8 +214,8 @@ func TestRunOnePass(t *testing.T) { t.Errorf("Compile(%q): got err: %s", test.re, err) continue } - if re.onepass == notOnePass { - t.Errorf("Compile(%q): got notOnePass, want one-pass", test.re) + if re.onepass == nil { + t.Errorf("Compile(%q): got nil, want one-pass", test.re) continue } if !re.MatchString(test.match) { @@ -227,21 +225,11 @@ func TestRunOnePass(t *testing.T) { } func BenchmarkCompileOnepass(b *testing.B) { - for _, test := range onePassTests { - if test.onePass == notOnePass { - continue - } - name := test.re - if len(name) > 20 { - name = name[:20] + "..." + b.ReportAllocs() + const re = `^a.[l-nA-Cg-j]?e$` + for i := 0; i < b.N; i++ { + if _, err := Compile(re); err != nil { + b.Fatal(err) } - b.Run(name, func(b *testing.B) { - b.ReportAllocs() - for i := 0; i < b.N; i++ { - if _, err := Compile(test.re); err != nil { - b.Fatal(err) - } - } - }) } } diff --git a/libgo/go/regexp/regexp.go b/libgo/go/regexp/regexp.go index 61ed9c5..38b3c86 100644 --- a/libgo/go/regexp/regexp.go +++ b/libgo/go/regexp/regexp.go @@ -79,27 +79,24 @@ import ( // A Regexp is safe for concurrent use by multiple goroutines, // except for configuration methods, such as Longest. type Regexp struct { - // read-only after Compile - regexpRO - - // cache of machines for running regexp - mu sync.Mutex - machine []*machine -} - -type regexpRO struct { - expr string // as passed to Compile - prog *syntax.Prog // compiled program - onepass *onePassProg // onepass program or nil + expr string // as passed to Compile + prog *syntax.Prog // compiled program + onepass *onePassProg // onepass program or nil + numSubexp int + maxBitStateLen int + subexpNames []string prefix string // required prefix in unanchored matches prefixBytes []byte // prefix, as a []byte - prefixComplete bool // prefix is the entire regexp prefixRune rune // first rune in prefix prefixEnd uint32 // pc for last rune in prefix + mpool int // pool for machines + matchcap int // size of recorded match lengths + prefixComplete bool // prefix is the entire regexp cond syntax.EmptyOp // empty-width conditions required at start of match - numSubexp int - subexpNames []string - longest bool + + // This field can be modified by the Longest method, + // but it is otherwise read-only. + longest bool // whether regexp prefers leftmost-longest match } // String returns the source text used to compile the regular expression. @@ -108,15 +105,16 @@ func (re *Regexp) String() string { } // Copy returns a new Regexp object copied from re. +// Calling Longest on one copy does not affect another. // -// When using a Regexp in multiple goroutines, giving each goroutine -// its own copy helps to avoid lock contention. +// Deprecated: In earlier releases, when using a Regexp in multiple goroutines, +// giving each goroutine its own copy helped to avoid lock contention. +// As of Go 1.12, using Copy is no longer necessary to avoid lock contention. +// Copy may still be appropriate if the reason for its use is to make +// two copies with different Longest settings. func (re *Regexp) Copy() *Regexp { - // It is not safe to copy Regexp by value - // since it contains a sync.Mutex. - return &Regexp{ - regexpRO: re.regexpRO, - } + re2 := *re + return &re2 } // Compile parses a regular expression and returns, if successful, @@ -179,19 +177,23 @@ func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) { if err != nil { return nil, err } + matchcap := prog.NumCap + if matchcap < 2 { + matchcap = 2 + } regexp := &Regexp{ - regexpRO: regexpRO{ - expr: expr, - prog: prog, - onepass: compileOnePass(prog), - numSubexp: maxCap, - subexpNames: capNames, - cond: prog.StartCond(), - longest: longest, - }, - } - if regexp.onepass == notOnePass { + expr: expr, + prog: prog, + onepass: compileOnePass(prog), + numSubexp: maxCap, + subexpNames: capNames, + cond: prog.StartCond(), + longest: longest, + matchcap: matchcap, + } + if regexp.onepass == nil { regexp.prefix, regexp.prefixComplete = prog.Prefix() + regexp.maxBitStateLen = maxBitStateLen(prog) } else { regexp.prefix, regexp.prefixComplete, regexp.prefixEnd = onePassPrefix(prog) } @@ -201,39 +203,64 @@ func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) { regexp.prefixBytes = []byte(regexp.prefix) regexp.prefixRune, _ = utf8.DecodeRuneInString(regexp.prefix) } + + n := len(prog.Inst) + i := 0 + for matchSize[i] != 0 && matchSize[i] < n { + i++ + } + regexp.mpool = i + return regexp, nil } +// Pools of *machine for use during (*Regexp).doExecute, +// split up by the size of the execution queues. +// matchPool[i] machines have queue size matchSize[i]. +// On a 64-bit system each queue entry is 16 bytes, +// so matchPool[0] has 16*2*128 = 4kB queues, etc. +// The final matchPool is a catch-all for very large queues. +var ( + matchSize = [...]int{128, 512, 2048, 16384, 0} + matchPool [len(matchSize)]sync.Pool +) + // get returns a machine to use for matching re. // It uses the re's machine cache if possible, to avoid // unnecessary allocation. func (re *Regexp) get() *machine { - re.mu.Lock() - if n := len(re.machine); n > 0 { - z := re.machine[n-1] - re.machine = re.machine[:n-1] - re.mu.Unlock() - return z - } - re.mu.Unlock() - z := progMachine(re.prog, re.onepass) - z.re = re - return z -} - -// put returns a machine to the re's machine cache. -// There is no attempt to limit the size of the cache, so it will -// grow to the maximum number of simultaneous matches -// run using re. (The cache empties when re gets garbage collected.) -func (re *Regexp) put(z *machine) { - // Remove references to input data that we no longer need. - z.inputBytes.str = nil - z.inputString.str = "" - z.inputReader.r = nil - - re.mu.Lock() - re.machine = append(re.machine, z) - re.mu.Unlock() + m, ok := matchPool[re.mpool].Get().(*machine) + if !ok { + m = new(machine) + } + m.re = re + m.p = re.prog + if cap(m.matchcap) < re.matchcap { + m.matchcap = make([]int, re.matchcap) + for _, t := range m.pool { + t.cap = make([]int, re.matchcap) + } + } + + // Allocate queues if needed. + // Or reallocate, for "large" match pool. + n := matchSize[re.mpool] + if n == 0 { // large pool + n = len(re.prog.Inst) + } + if len(m.q0.sparse) < n { + m.q0 = queue{make([]uint32, n), make([]entry, 0, n)} + m.q1 = queue{make([]uint32, n), make([]entry, 0, n)} + } + return m +} + +// put returns a machine to the correct machine pool. +func (re *Regexp) put(m *machine) { + m.re = nil + m.p = nil + m.inputs.clear() + matchPool[re.mpool].Put(m) } // MustCompile is like Compile but panics if the expression cannot be parsed. @@ -288,7 +315,7 @@ type input interface { canCheckPrefix() bool // can we look ahead without losing info? hasPrefix(re *Regexp) bool index(re *Regexp, pos int) int - context(pos int) syntax.EmptyOp + context(pos int) lazyFlag } // inputString scans a string. @@ -319,7 +346,7 @@ func (i *inputString) index(re *Regexp, pos int) int { return strings.Index(i.str[pos:], re.prefix) } -func (i *inputString) context(pos int) syntax.EmptyOp { +func (i *inputString) context(pos int) lazyFlag { r1, r2 := endOfText, endOfText // 0 < pos && pos <= len(i.str) if uint(pos-1) < uint(len(i.str)) { @@ -335,7 +362,7 @@ func (i *inputString) context(pos int) syntax.EmptyOp { r2, _ = utf8.DecodeRuneInString(i.str[pos:]) } } - return syntax.EmptyOpContext(r1, r2) + return newLazyFlag(r1, r2) } // inputBytes scans a byte slice. @@ -366,7 +393,7 @@ func (i *inputBytes) index(re *Regexp, pos int) int { return bytes.Index(i.str[pos:], re.prefixBytes) } -func (i *inputBytes) context(pos int) syntax.EmptyOp { +func (i *inputBytes) context(pos int) lazyFlag { r1, r2 := endOfText, endOfText // 0 < pos && pos <= len(i.str) if uint(pos-1) < uint(len(i.str)) { @@ -382,7 +409,7 @@ func (i *inputBytes) context(pos int) syntax.EmptyOp { r2, _ = utf8.DecodeRune(i.str[pos:]) } } - return syntax.EmptyOpContext(r1, r2) + return newLazyFlag(r1, r2) } // inputReader scans a RuneReader. @@ -418,8 +445,8 @@ func (i *inputReader) index(re *Regexp, pos int) int { return -1 } -func (i *inputReader) context(pos int) syntax.EmptyOp { - return 0 +func (i *inputReader) context(pos int) lazyFlag { + return 0 // not used } // LiteralPrefix returns a literal string that must begin any match @@ -469,7 +496,7 @@ func MatchString(pattern string, s string) (matched bool, err error) { return re.MatchString(s), nil } -// MatchString reports whether the byte slice b +// Match reports whether the byte slice b // contains any match of the regular expression pattern. // More complicated queries need to use Compile and the full Regexp interface. func Match(pattern string, b []byte) (matched bool, err error) { diff --git a/libgo/go/regexp/syntax/prog.go b/libgo/go/regexp/syntax/prog.go index 49a06bb..ae7a9a2 100644 --- a/libgo/go/regexp/syntax/prog.go +++ b/libgo/go/regexp/syntax/prog.go @@ -201,8 +201,12 @@ func (i *Inst) MatchRune(r rune) bool { func (i *Inst) MatchRunePos(r rune) int { rune := i.Rune - // Special case: single-rune slice is from literal string, not char class. - if len(rune) == 1 { + switch len(rune) { + case 0: + return noMatch + + case 1: + // Special case: single-rune slice is from literal string, not char class. r0 := rune[0] if r == r0 { return 0 @@ -215,17 +219,25 @@ func (i *Inst) MatchRunePos(r rune) int { } } return noMatch - } - // Peek at the first few pairs. - // Should handle ASCII well. - for j := 0; j < len(rune) && j <= 8; j += 2 { - if r < rune[j] { - return noMatch + case 2: + if r >= rune[0] && r <= rune[1] { + return 0 } - if r <= rune[j+1] { - return j / 2 + return noMatch + + case 4, 6, 8: + // Linear search for a few pairs. + // Should handle ASCII well. + for j := 0; j < len(rune); j += 2 { + if r < rune[j] { + return noMatch + } + if r <= rune[j+1] { + return j / 2 + } } + return noMatch } // Otherwise binary search. diff --git a/libgo/go/regexp/syntax/regexp.go b/libgo/go/regexp/syntax/regexp.go index a3f56f8..ae5fa05 100644 --- a/libgo/go/regexp/syntax/regexp.go +++ b/libgo/go/regexp/syntax/regexp.go @@ -59,7 +59,7 @@ const ( const opPseudo Op = 128 // where pseudo-ops start -// Equal returns true if x and y have identical structure. +// Equal reports whether x and y have identical structure. func (x *Regexp) Equal(y *Regexp) bool { if x == nil || y == nil { return x == y |