aboutsummaryrefslogtreecommitdiff
path: root/libgo/go/regexp
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/go/regexp')
-rw-r--r--libgo/go/regexp/all_test.go42
-rw-r--r--libgo/go/regexp/example_test.go13
-rw-r--r--libgo/go/regexp/regexp.go18
-rw-r--r--libgo/go/regexp/syntax/compile.go68
-rw-r--r--libgo/go/regexp/syntax/parse.go16
5 files changed, 93 insertions, 64 deletions
diff --git a/libgo/go/regexp/all_test.go b/libgo/go/regexp/all_test.go
index 626a691..be7a2e7 100644
--- a/libgo/go/regexp/all_test.go
+++ b/libgo/go/regexp/all_test.go
@@ -418,24 +418,32 @@ func TestLiteralPrefix(t *testing.T) {
}
}
+type subexpIndex struct {
+ name string
+ index int
+}
+
type subexpCase struct {
- input string
- num int
- names []string
+ input string
+ num int
+ names []string
+ indices []subexpIndex
}
+var emptySubexpIndices = []subexpIndex{{"", -1}, {"missing", -1}}
+
var subexpCases = []subexpCase{
- {``, 0, nil},
- {`.*`, 0, nil},
- {`abba`, 0, nil},
- {`ab(b)a`, 1, []string{"", ""}},
- {`ab(.*)a`, 1, []string{"", ""}},
- {`(.*)ab(.*)a`, 2, []string{"", "", ""}},
- {`(.*)(ab)(.*)a`, 3, []string{"", "", "", ""}},
- {`(.*)((a)b)(.*)a`, 4, []string{"", "", "", "", ""}},
- {`(.*)(\(ab)(.*)a`, 3, []string{"", "", "", ""}},
- {`(.*)(\(a\)b)(.*)a`, 3, []string{"", "", "", ""}},
- {`(?P<foo>.*)(?P<bar>(a)b)(?P<foo>.*)a`, 4, []string{"", "foo", "bar", "", "foo"}},
+ {``, 0, nil, emptySubexpIndices},
+ {`.*`, 0, nil, emptySubexpIndices},
+ {`abba`, 0, nil, emptySubexpIndices},
+ {`ab(b)a`, 1, []string{"", ""}, emptySubexpIndices},
+ {`ab(.*)a`, 1, []string{"", ""}, emptySubexpIndices},
+ {`(.*)ab(.*)a`, 2, []string{"", "", ""}, emptySubexpIndices},
+ {`(.*)(ab)(.*)a`, 3, []string{"", "", "", ""}, emptySubexpIndices},
+ {`(.*)((a)b)(.*)a`, 4, []string{"", "", "", "", ""}, emptySubexpIndices},
+ {`(.*)(\(ab)(.*)a`, 3, []string{"", "", "", ""}, emptySubexpIndices},
+ {`(.*)(\(a\)b)(.*)a`, 3, []string{"", "", "", ""}, emptySubexpIndices},
+ {`(?P<foo>.*)(?P<bar>(a)b)(?P<foo>.*)a`, 4, []string{"", "foo", "bar", "", "foo"}, []subexpIndex{{"", -1}, {"missing", -1}, {"foo", 1}, {"bar", 2}}},
}
func TestSubexp(t *testing.T) {
@@ -458,6 +466,12 @@ func TestSubexp(t *testing.T) {
}
}
}
+ for _, subexp := range c.indices {
+ index := re.SubexpIndex(subexp.name)
+ if index != subexp.index {
+ t.Errorf("%q: SubexpIndex(%q) = %d, want %d", c.input, subexp.name, index, subexp.index)
+ }
+ }
}
}
diff --git a/libgo/go/regexp/example_test.go b/libgo/go/regexp/example_test.go
index ea35a2e..466b38b 100644
--- a/libgo/go/regexp/example_test.go
+++ b/libgo/go/regexp/example_test.go
@@ -280,6 +280,19 @@ func ExampleRegexp_SubexpNames() {
// Turing Alan
}
+func ExampleRegexp_SubexpIndex() {
+ re := regexp.MustCompile(`(?P<first>[a-zA-Z]+) (?P<last>[a-zA-Z]+)`)
+ fmt.Println(re.MatchString("Alan Turing"))
+ matches := re.FindStringSubmatch("Alan Turing")
+ lastIndex := re.SubexpIndex("last")
+ fmt.Printf("last => %d\n", lastIndex)
+ fmt.Println(matches[lastIndex])
+ // Output:
+ // true
+ // last => 2
+ // Turing
+}
+
func ExampleRegexp_Split() {
a := regexp.MustCompile(`a`)
fmt.Println(a.Split("banana", -1))
diff --git a/libgo/go/regexp/regexp.go b/libgo/go/regexp/regexp.go
index 19ca6f2..b547a2a 100644
--- a/libgo/go/regexp/regexp.go
+++ b/libgo/go/regexp/regexp.go
@@ -345,6 +345,24 @@ func (re *Regexp) SubexpNames() []string {
return re.subexpNames
}
+// SubexpIndex returns the index of the first subexpression with the given name,
+// or -1 if there is no subexpression with that name.
+//
+// Note that multiple subexpressions can be written using the same name, as in
+// (?P<bob>a+)(?P<bob>b+), which declares two subexpressions named "bob".
+// In this case, SubexpIndex returns the index of the leftmost such subexpression
+// in the regular expression.
+func (re *Regexp) SubexpIndex(name string) int {
+ if name != "" {
+ for i, s := range re.subexpNames {
+ if name == s {
+ return i
+ }
+ }
+ }
+ return -1
+}
+
const endOfText rune = -1
// input abstracts different representations of the input text. It provides
diff --git a/libgo/go/regexp/syntax/compile.go b/libgo/go/regexp/syntax/compile.go
index 1d8ab87..7524d62 100644
--- a/libgo/go/regexp/syntax/compile.go
+++ b/libgo/go/regexp/syntax/compile.go
@@ -12,57 +12,47 @@ import "unicode"
// See https://swtch.com/~rsc/regexp/regexp1.html for inspiration.
//
// These aren't really pointers: they're integers, so we can reinterpret them
-// this way without using package unsafe. A value l denotes
-// p.inst[l>>1].Out (l&1==0) or .Arg (l&1==1).
-// l == 0 denotes the empty list, okay because we start every program
+// this way without using package unsafe. A value l.head denotes
+// p.inst[l.head>>1].Out (l.head&1==0) or .Arg (l.head&1==1).
+// head == 0 denotes the empty list, okay because we start every program
// with a fail instruction, so we'll never want to point at its output link.
-type patchList uint32
+type patchList struct {
+ head, tail uint32
+}
-func (l patchList) next(p *Prog) patchList {
- i := &p.Inst[l>>1]
- if l&1 == 0 {
- return patchList(i.Out)
- }
- return patchList(i.Arg)
+func makePatchList(n uint32) patchList {
+ return patchList{n, n}
}
func (l patchList) patch(p *Prog, val uint32) {
- for l != 0 {
- i := &p.Inst[l>>1]
- if l&1 == 0 {
- l = patchList(i.Out)
+ head := l.head
+ for head != 0 {
+ i := &p.Inst[head>>1]
+ if head&1 == 0 {
+ head = i.Out
i.Out = val
} else {
- l = patchList(i.Arg)
+ head = i.Arg
i.Arg = val
}
}
}
func (l1 patchList) append(p *Prog, l2 patchList) patchList {
- if l1 == 0 {
+ if l1.head == 0 {
return l2
}
- if l2 == 0 {
+ if l2.head == 0 {
return l1
}
- last := l1
- for {
- next := last.next(p)
- if next == 0 {
- break
- }
- last = next
- }
-
- i := &p.Inst[last>>1]
- if last&1 == 0 {
- i.Out = uint32(l2)
+ i := &p.Inst[l1.tail>>1]
+ if l1.tail&1 == 0 {
+ i.Out = l2.head
} else {
- i.Arg = uint32(l2)
+ i.Arg = l2.head
}
- return l1
+ return patchList{l1.head, l2.tail}
}
// A frag represents a compiled program fragment.
@@ -176,7 +166,7 @@ func (c *compiler) inst(op InstOp) frag {
func (c *compiler) nop() frag {
f := c.inst(InstNop)
- f.out = patchList(f.i << 1)
+ f.out = makePatchList(f.i << 1)
return f
}
@@ -186,7 +176,7 @@ func (c *compiler) fail() frag {
func (c *compiler) cap(arg uint32) frag {
f := c.inst(InstCapture)
- f.out = patchList(f.i << 1)
+ f.out = makePatchList(f.i << 1)
c.p.Inst[f.i].Arg = arg
if c.p.NumCap < int(arg)+1 {
@@ -229,10 +219,10 @@ func (c *compiler) quest(f1 frag, nongreedy bool) frag {
i := &c.p.Inst[f.i]
if nongreedy {
i.Arg = f1.i
- f.out = patchList(f.i << 1)
+ f.out = makePatchList(f.i << 1)
} else {
i.Out = f1.i
- f.out = patchList(f.i<<1 | 1)
+ f.out = makePatchList(f.i<<1 | 1)
}
f.out = f.out.append(c.p, f1.out)
return f
@@ -243,10 +233,10 @@ func (c *compiler) star(f1 frag, nongreedy bool) frag {
i := &c.p.Inst[f.i]
if nongreedy {
i.Arg = f1.i
- f.out = patchList(f.i << 1)
+ f.out = makePatchList(f.i << 1)
} else {
i.Out = f1.i
- f.out = patchList(f.i<<1 | 1)
+ f.out = makePatchList(f.i<<1 | 1)
}
f1.out.patch(c.p, f.i)
return f
@@ -259,7 +249,7 @@ func (c *compiler) plus(f1 frag, nongreedy bool) frag {
func (c *compiler) empty(op EmptyOp) frag {
f := c.inst(InstEmptyWidth)
c.p.Inst[f.i].Arg = uint32(op)
- f.out = patchList(f.i << 1)
+ f.out = makePatchList(f.i << 1)
return f
}
@@ -273,7 +263,7 @@ func (c *compiler) rune(r []rune, flags Flags) frag {
flags &^= FoldCase
}
i.Arg = uint32(flags)
- f.out = patchList(f.i << 1)
+ f.out = makePatchList(f.i << 1)
// Special cases for exec machine.
switch {
diff --git a/libgo/go/regexp/syntax/parse.go b/libgo/go/regexp/syntax/parse.go
index 8c6d43a..7b40309 100644
--- a/libgo/go/regexp/syntax/parse.go
+++ b/libgo/go/regexp/syntax/parse.go
@@ -177,16 +177,16 @@ func (p *parser) maybeConcat(r rune, flags Flags) bool {
return false // did not push r
}
-// newLiteral returns a new OpLiteral Regexp with the given flags
-func (p *parser) newLiteral(r rune, flags Flags) *Regexp {
+// literal pushes a literal regexp for the rune r on the stack.
+func (p *parser) literal(r rune) {
re := p.newRegexp(OpLiteral)
- re.Flags = flags
- if flags&FoldCase != 0 {
+ re.Flags = p.flags
+ if p.flags&FoldCase != 0 {
r = minFoldRune(r)
}
re.Rune0[0] = r
re.Rune = re.Rune0[:1]
- return re
+ p.push(re)
}
// minFoldRune returns the minimum rune fold-equivalent to r.
@@ -204,12 +204,6 @@ func minFoldRune(r rune) rune {
return min
}
-// literal pushes a literal regexp for the rune r on the stack
-// and returns that regexp.
-func (p *parser) literal(r rune) {
- p.push(p.newLiteral(r, p.flags))
-}
-
// op pushes a regexp with the given op onto the stack
// and returns that regexp.
func (p *parser) op(op Op) *Regexp {