diff options
Diffstat (limited to 'libgo/go/regexp')
-rw-r--r-- | libgo/go/regexp/all_test.go | 42 | ||||
-rw-r--r-- | libgo/go/regexp/example_test.go | 13 | ||||
-rw-r--r-- | libgo/go/regexp/regexp.go | 18 | ||||
-rw-r--r-- | libgo/go/regexp/syntax/compile.go | 68 | ||||
-rw-r--r-- | libgo/go/regexp/syntax/parse.go | 16 |
5 files changed, 93 insertions, 64 deletions
diff --git a/libgo/go/regexp/all_test.go b/libgo/go/regexp/all_test.go index 626a691..be7a2e7 100644 --- a/libgo/go/regexp/all_test.go +++ b/libgo/go/regexp/all_test.go @@ -418,24 +418,32 @@ func TestLiteralPrefix(t *testing.T) { } } +type subexpIndex struct { + name string + index int +} + type subexpCase struct { - input string - num int - names []string + input string + num int + names []string + indices []subexpIndex } +var emptySubexpIndices = []subexpIndex{{"", -1}, {"missing", -1}} + var subexpCases = []subexpCase{ - {``, 0, nil}, - {`.*`, 0, nil}, - {`abba`, 0, nil}, - {`ab(b)a`, 1, []string{"", ""}}, - {`ab(.*)a`, 1, []string{"", ""}}, - {`(.*)ab(.*)a`, 2, []string{"", "", ""}}, - {`(.*)(ab)(.*)a`, 3, []string{"", "", "", ""}}, - {`(.*)((a)b)(.*)a`, 4, []string{"", "", "", "", ""}}, - {`(.*)(\(ab)(.*)a`, 3, []string{"", "", "", ""}}, - {`(.*)(\(a\)b)(.*)a`, 3, []string{"", "", "", ""}}, - {`(?P<foo>.*)(?P<bar>(a)b)(?P<foo>.*)a`, 4, []string{"", "foo", "bar", "", "foo"}}, + {``, 0, nil, emptySubexpIndices}, + {`.*`, 0, nil, emptySubexpIndices}, + {`abba`, 0, nil, emptySubexpIndices}, + {`ab(b)a`, 1, []string{"", ""}, emptySubexpIndices}, + {`ab(.*)a`, 1, []string{"", ""}, emptySubexpIndices}, + {`(.*)ab(.*)a`, 2, []string{"", "", ""}, emptySubexpIndices}, + {`(.*)(ab)(.*)a`, 3, []string{"", "", "", ""}, emptySubexpIndices}, + {`(.*)((a)b)(.*)a`, 4, []string{"", "", "", "", ""}, emptySubexpIndices}, + {`(.*)(\(ab)(.*)a`, 3, []string{"", "", "", ""}, emptySubexpIndices}, + {`(.*)(\(a\)b)(.*)a`, 3, []string{"", "", "", ""}, emptySubexpIndices}, + {`(?P<foo>.*)(?P<bar>(a)b)(?P<foo>.*)a`, 4, []string{"", "foo", "bar", "", "foo"}, []subexpIndex{{"", -1}, {"missing", -1}, {"foo", 1}, {"bar", 2}}}, } func TestSubexp(t *testing.T) { @@ -458,6 +466,12 @@ func TestSubexp(t *testing.T) { } } } + for _, subexp := range c.indices { + index := re.SubexpIndex(subexp.name) + if index != subexp.index { + t.Errorf("%q: SubexpIndex(%q) = %d, want %d", c.input, subexp.name, index, subexp.index) + } + } } } diff --git a/libgo/go/regexp/example_test.go b/libgo/go/regexp/example_test.go index ea35a2e..466b38b 100644 --- a/libgo/go/regexp/example_test.go +++ b/libgo/go/regexp/example_test.go @@ -280,6 +280,19 @@ func ExampleRegexp_SubexpNames() { // Turing Alan } +func ExampleRegexp_SubexpIndex() { + re := regexp.MustCompile(`(?P<first>[a-zA-Z]+) (?P<last>[a-zA-Z]+)`) + fmt.Println(re.MatchString("Alan Turing")) + matches := re.FindStringSubmatch("Alan Turing") + lastIndex := re.SubexpIndex("last") + fmt.Printf("last => %d\n", lastIndex) + fmt.Println(matches[lastIndex]) + // Output: + // true + // last => 2 + // Turing +} + func ExampleRegexp_Split() { a := regexp.MustCompile(`a`) fmt.Println(a.Split("banana", -1)) diff --git a/libgo/go/regexp/regexp.go b/libgo/go/regexp/regexp.go index 19ca6f2..b547a2a 100644 --- a/libgo/go/regexp/regexp.go +++ b/libgo/go/regexp/regexp.go @@ -345,6 +345,24 @@ func (re *Regexp) SubexpNames() []string { return re.subexpNames } +// SubexpIndex returns the index of the first subexpression with the given name, +// or -1 if there is no subexpression with that name. +// +// Note that multiple subexpressions can be written using the same name, as in +// (?P<bob>a+)(?P<bob>b+), which declares two subexpressions named "bob". +// In this case, SubexpIndex returns the index of the leftmost such subexpression +// in the regular expression. +func (re *Regexp) SubexpIndex(name string) int { + if name != "" { + for i, s := range re.subexpNames { + if name == s { + return i + } + } + } + return -1 +} + const endOfText rune = -1 // input abstracts different representations of the input text. It provides diff --git a/libgo/go/regexp/syntax/compile.go b/libgo/go/regexp/syntax/compile.go index 1d8ab87..7524d62 100644 --- a/libgo/go/regexp/syntax/compile.go +++ b/libgo/go/regexp/syntax/compile.go @@ -12,57 +12,47 @@ import "unicode" // See https://swtch.com/~rsc/regexp/regexp1.html for inspiration. // // These aren't really pointers: they're integers, so we can reinterpret them -// this way without using package unsafe. A value l denotes -// p.inst[l>>1].Out (l&1==0) or .Arg (l&1==1). -// l == 0 denotes the empty list, okay because we start every program +// this way without using package unsafe. A value l.head denotes +// p.inst[l.head>>1].Out (l.head&1==0) or .Arg (l.head&1==1). +// head == 0 denotes the empty list, okay because we start every program // with a fail instruction, so we'll never want to point at its output link. -type patchList uint32 +type patchList struct { + head, tail uint32 +} -func (l patchList) next(p *Prog) patchList { - i := &p.Inst[l>>1] - if l&1 == 0 { - return patchList(i.Out) - } - return patchList(i.Arg) +func makePatchList(n uint32) patchList { + return patchList{n, n} } func (l patchList) patch(p *Prog, val uint32) { - for l != 0 { - i := &p.Inst[l>>1] - if l&1 == 0 { - l = patchList(i.Out) + head := l.head + for head != 0 { + i := &p.Inst[head>>1] + if head&1 == 0 { + head = i.Out i.Out = val } else { - l = patchList(i.Arg) + head = i.Arg i.Arg = val } } } func (l1 patchList) append(p *Prog, l2 patchList) patchList { - if l1 == 0 { + if l1.head == 0 { return l2 } - if l2 == 0 { + if l2.head == 0 { return l1 } - last := l1 - for { - next := last.next(p) - if next == 0 { - break - } - last = next - } - - i := &p.Inst[last>>1] - if last&1 == 0 { - i.Out = uint32(l2) + i := &p.Inst[l1.tail>>1] + if l1.tail&1 == 0 { + i.Out = l2.head } else { - i.Arg = uint32(l2) + i.Arg = l2.head } - return l1 + return patchList{l1.head, l2.tail} } // A frag represents a compiled program fragment. @@ -176,7 +166,7 @@ func (c *compiler) inst(op InstOp) frag { func (c *compiler) nop() frag { f := c.inst(InstNop) - f.out = patchList(f.i << 1) + f.out = makePatchList(f.i << 1) return f } @@ -186,7 +176,7 @@ func (c *compiler) fail() frag { func (c *compiler) cap(arg uint32) frag { f := c.inst(InstCapture) - f.out = patchList(f.i << 1) + f.out = makePatchList(f.i << 1) c.p.Inst[f.i].Arg = arg if c.p.NumCap < int(arg)+1 { @@ -229,10 +219,10 @@ func (c *compiler) quest(f1 frag, nongreedy bool) frag { i := &c.p.Inst[f.i] if nongreedy { i.Arg = f1.i - f.out = patchList(f.i << 1) + f.out = makePatchList(f.i << 1) } else { i.Out = f1.i - f.out = patchList(f.i<<1 | 1) + f.out = makePatchList(f.i<<1 | 1) } f.out = f.out.append(c.p, f1.out) return f @@ -243,10 +233,10 @@ func (c *compiler) star(f1 frag, nongreedy bool) frag { i := &c.p.Inst[f.i] if nongreedy { i.Arg = f1.i - f.out = patchList(f.i << 1) + f.out = makePatchList(f.i << 1) } else { i.Out = f1.i - f.out = patchList(f.i<<1 | 1) + f.out = makePatchList(f.i<<1 | 1) } f1.out.patch(c.p, f.i) return f @@ -259,7 +249,7 @@ func (c *compiler) plus(f1 frag, nongreedy bool) frag { func (c *compiler) empty(op EmptyOp) frag { f := c.inst(InstEmptyWidth) c.p.Inst[f.i].Arg = uint32(op) - f.out = patchList(f.i << 1) + f.out = makePatchList(f.i << 1) return f } @@ -273,7 +263,7 @@ func (c *compiler) rune(r []rune, flags Flags) frag { flags &^= FoldCase } i.Arg = uint32(flags) - f.out = patchList(f.i << 1) + f.out = makePatchList(f.i << 1) // Special cases for exec machine. switch { diff --git a/libgo/go/regexp/syntax/parse.go b/libgo/go/regexp/syntax/parse.go index 8c6d43a..7b40309 100644 --- a/libgo/go/regexp/syntax/parse.go +++ b/libgo/go/regexp/syntax/parse.go @@ -177,16 +177,16 @@ func (p *parser) maybeConcat(r rune, flags Flags) bool { return false // did not push r } -// newLiteral returns a new OpLiteral Regexp with the given flags -func (p *parser) newLiteral(r rune, flags Flags) *Regexp { +// literal pushes a literal regexp for the rune r on the stack. +func (p *parser) literal(r rune) { re := p.newRegexp(OpLiteral) - re.Flags = flags - if flags&FoldCase != 0 { + re.Flags = p.flags + if p.flags&FoldCase != 0 { r = minFoldRune(r) } re.Rune0[0] = r re.Rune = re.Rune0[:1] - return re + p.push(re) } // minFoldRune returns the minimum rune fold-equivalent to r. @@ -204,12 +204,6 @@ func minFoldRune(r rune) rune { return min } -// literal pushes a literal regexp for the rune r on the stack -// and returns that regexp. -func (p *parser) literal(r rune) { - p.push(p.newLiteral(r, p.flags)) -} - // op pushes a regexp with the given op onto the stack // and returns that regexp. func (p *parser) op(op Op) *Regexp { |