aboutsummaryrefslogtreecommitdiff
path: root/libgo/go/regexp
diff options
context:
space:
mode:
authorIan Lance Taylor <iant@google.com>2011-01-21 18:19:03 +0000
committerIan Lance Taylor <ian@gcc.gnu.org>2011-01-21 18:19:03 +0000
commitff5f50c52c421d75940ef9392211e3ab24d71332 (patch)
tree27d8768fb1d25696d3c40b42535eb5e073c278da /libgo/go/regexp
parentd6ed1c8903e728f4233122554bab5910853338bd (diff)
downloadgcc-ff5f50c52c421d75940ef9392211e3ab24d71332.zip
gcc-ff5f50c52c421d75940ef9392211e3ab24d71332.tar.gz
gcc-ff5f50c52c421d75940ef9392211e3ab24d71332.tar.bz2
Remove the types float and complex.
Update to current version of Go library. Update testsuite for removed types. * go-lang.c (go_langhook_init): Omit float_type_size when calling go_create_gogo. * go-c.h: Update declaration of go_create_gogo. From-SVN: r169098
Diffstat (limited to 'libgo/go/regexp')
-rw-r--r--libgo/go/regexp/all_test.go80
-rw-r--r--libgo/go/regexp/find_test.go7
-rw-r--r--libgo/go/regexp/regexp.go132
3 files changed, 159 insertions, 60 deletions
diff --git a/libgo/go/regexp/all_test.go b/libgo/go/regexp/all_test.go
index d5a0e7d..aed7330 100644
--- a/libgo/go/regexp/all_test.go
+++ b/libgo/go/regexp/all_test.go
@@ -38,6 +38,8 @@ type stringError struct {
var bad_re = []stringError{
{`*`, ErrBareClosure},
+ {`+`, ErrBareClosure},
+ {`?`, ErrBareClosure},
{`(abc`, ErrUnmatchedLpar},
{`abc)`, ErrUnmatchedRpar},
{`x[a-z`, ErrUnmatchedLbkt},
@@ -47,7 +49,6 @@ var bad_re = []stringError{
{`a**`, ErrBadClosure},
{`a*+`, ErrBadClosure},
{`a??`, ErrBadClosure},
- {`*`, ErrBareClosure},
{`\x`, ErrBadBackslash},
}
@@ -229,18 +230,21 @@ func TestReplaceAllFunc(t *testing.T) {
}
}
-type QuoteMetaTest struct {
- pattern, output string
+type MetaTest struct {
+ pattern, output, literal string
+ isLiteral bool
}
-var quoteMetaTests = []QuoteMetaTest{
- {``, ``},
- {`foo`, `foo`},
- {`!@#$%^&*()_+-=[{]}\|,<.>/?~`, `!@#\$%\^&\*\(\)_\+-=\[{\]}\\\|,<\.>/\?~`},
+var metaTests = []MetaTest{
+ {``, ``, ``, true},
+ {`foo`, `foo`, `foo`, true},
+ {`foo\.\$`, `foo\\\.\\\$`, `foo.$`, true}, // has meta but no operator
+ {`foo.\$`, `foo\.\\\$`, `foo`, false}, // has escaped operators and real operators
+ {`!@#$%^&*()_+-=[{]}\|,<.>/?~`, `!@#\$%\^&\*\(\)_\+-=\[{\]}\\\|,<\.>/\?~`, `!@#`, false},
}
func TestQuoteMeta(t *testing.T) {
- for _, tc := range quoteMetaTests {
+ for _, tc := range metaTests {
// Verify that QuoteMeta returns the expected string.
quoted := QuoteMeta(tc.pattern)
if quoted != tc.output {
@@ -269,6 +273,20 @@ func TestQuoteMeta(t *testing.T) {
}
}
+func TestLiteralPrefix(t *testing.T) {
+ for _, tc := range metaTests {
+ // Literal method needs to scan the pattern.
+ re := MustCompile(tc.pattern)
+ str, complete := re.LiteralPrefix()
+ if complete != tc.isLiteral {
+ t.Errorf("LiteralPrefix(`%s`) = %t; want %t", tc.pattern, complete, tc.isLiteral)
+ }
+ if str != tc.literal {
+ t.Errorf("LiteralPrefix(`%s`) = `%s`; want `%s`", tc.pattern, str, tc.literal)
+ }
+ }
+}
+
type numSubexpCase struct {
input string
expected int
@@ -360,3 +378,49 @@ func BenchmarkReplaceAll(b *testing.B) {
re.ReplaceAllString(x, "")
}
}
+
+func BenchmarkAnchoredLiteralShortNonMatch(b *testing.B) {
+ b.StopTimer()
+ x := []byte("abcdefghijklmnopqrstuvwxyz")
+ re := MustCompile("^zbc(d|e)")
+ b.StartTimer()
+ for i := 0; i < b.N; i++ {
+ re.Match(x)
+ }
+}
+
+func BenchmarkAnchoredLiteralLongNonMatch(b *testing.B) {
+ b.StopTimer()
+ x := []byte("abcdefghijklmnopqrstuvwxyz")
+ for i := 0; i < 15; i++ {
+ x = append(x, x...)
+ }
+ re := MustCompile("^zbc(d|e)")
+ b.StartTimer()
+ for i := 0; i < b.N; i++ {
+ re.Match(x)
+ }
+}
+
+func BenchmarkAnchoredShortMatch(b *testing.B) {
+ b.StopTimer()
+ x := []byte("abcdefghijklmnopqrstuvwxyz")
+ re := MustCompile("^.bc(d|e)")
+ b.StartTimer()
+ for i := 0; i < b.N; i++ {
+ re.Match(x)
+ }
+}
+
+func BenchmarkAnchoredLongMatch(b *testing.B) {
+ b.StopTimer()
+ x := []byte("abcdefghijklmnopqrstuvwxyz")
+ for i := 0; i < 15; i++ {
+ x = append(x, x...)
+ }
+ re := MustCompile("^.bc(d|e)")
+ b.StartTimer()
+ for i := 0; i < b.N; i++ {
+ re.Match(x)
+ }
+}
diff --git a/libgo/go/regexp/find_test.go b/libgo/go/regexp/find_test.go
index 07f5586..1690711 100644
--- a/libgo/go/regexp/find_test.go
+++ b/libgo/go/regexp/find_test.go
@@ -78,6 +78,7 @@ var findTests = []FindTest{
{`axxb$`, "axxcb", nil},
{`data`, "daXY data", build(1, 5, 9)},
{`da(.)a$`, "daXY data", build(1, 5, 9, 7, 8)},
+ {`zx+`, "zzx", build(1, 1, 3)},
// can backslash-escape any punctuation
{`\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~`,
@@ -119,7 +120,11 @@ func build(n int, x ...int) [][]int {
func TestFind(t *testing.T) {
for _, test := range findTests {
- result := MustCompile(test.pat).Find([]byte(test.text))
+ re := MustCompile(test.pat)
+ if re.String() != test.pat {
+ t.Errorf("String() = `%s`; should be `%s`", re.String(), test.pat)
+ }
+ result := re.Find([]byte(test.text))
switch {
case len(test.matches) == 0 && len(result) == 0:
// ok
diff --git a/libgo/go/regexp/regexp.go b/libgo/go/regexp/regexp.go
index 2c041cd..d274ccd 100644
--- a/libgo/go/regexp/regexp.go
+++ b/libgo/go/regexp/regexp.go
@@ -283,6 +283,24 @@ func escape(c int) int {
return -1
}
+func (p *parser) checkBackslash() int {
+ c := p.c()
+ if c == '\\' {
+ c = p.nextc()
+ switch {
+ case c == endOfFile:
+ p.error(ErrExtraneousBackslash)
+ case ispunct(c):
+ // c is as delivered
+ case escape(c) >= 0:
+ c = int(escaped[escape(c)])
+ default:
+ p.error(ErrBadBackslash)
+ }
+ }
+ return c
+}
+
func (p *parser) charClass() *instr {
i := newCharClass()
cc := i.cclass
@@ -314,20 +332,8 @@ func (p *parser) charClass() *instr {
return i
case '-': // do this before backslash processing
p.error(ErrBadRange)
- case '\\':
- c = p.nextc()
- switch {
- case c == endOfFile:
- p.error(ErrExtraneousBackslash)
- case ispunct(c):
- // c is as delivered
- case escape(c) >= 0:
- c = int(escaped[escape(c)])
- default:
- p.error(ErrBadBackslash)
- }
- fallthrough
default:
+ c = p.checkBackslash()
p.nextc()
switch {
case left < 0: // first of pair
@@ -345,14 +351,14 @@ func (p *parser) charClass() *instr {
}
}
}
- return nil
+ panic("unreachable")
}
func (p *parser) term() (start, end *instr) {
switch c := p.c(); c {
case '|', endOfFile:
return nil, nil
- case '*', '+':
+ case '*', '+', '?':
p.error(ErrBareClosure)
case ')':
if p.nlpar == 0 {
@@ -407,20 +413,8 @@ func (p *parser) term() (start, end *instr) {
}
bra.next = start
return bra, ebra
- case '\\':
- c = p.nextc()
- switch {
- case c == endOfFile:
- p.error(ErrExtraneousBackslash)
- case ispunct(c):
- // c is as delivered
- case escape(c) >= 0:
- c = int(escaped[escape(c)])
- default:
- p.error(ErrBadBackslash)
- }
- fallthrough
default:
+ c = p.checkBackslash()
p.nextc()
start = &instr{kind: iChar, char: c}
p.re.add(start)
@@ -571,15 +565,20 @@ func (re *Regexp) doParse() {
}
}
-// Extract regular text from the beginning of the pattern.
+// Extract regular text from the beginning of the pattern,
+// possibly after a leading iBOT.
// That text can be used by doExecute to speed up matching.
func (re *Regexp) setPrefix() {
var b []byte
var utf = make([]byte, utf8.UTFMax)
var inst *instr
- // First instruction is start; skip that.
+ // First instruction is start; skip that. Also skip any initial iBOT.
+ inst = re.inst[0].next
+ for inst.kind == iBOT {
+ inst = inst.next
+ }
Loop:
- for inst = re.inst[0].next; inst.kind != iEnd; inst = inst.next {
+ for ; inst.kind != iEnd; inst = inst.next {
// stop if this is not a char
if inst.kind != iChar {
break
@@ -590,7 +589,7 @@ Loop:
case iBOT, iEOT, iAlt:
break Loop
}
- n := utf8.EncodeRune(inst.char, utf)
+ n := utf8.EncodeRune(utf, inst.char)
b = append(b, utf[0:n]...)
}
// point prefixStart instruction to first non-CHAR after prefix
@@ -599,6 +598,11 @@ Loop:
re.prefix = string(b)
}
+// String returns the source text used to compile the regular expression.
+func (re *Regexp) String() string {
+ return re.expr
+}
+
// Compile parses a regular expression and returns, if successful, a Regexp
// object that can be used to match against text.
func Compile(str string) (regexp *Regexp, error os.Error) {
@@ -743,34 +747,46 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int {
if bytestr != nil {
end = len(bytestr)
}
+ anchored := re.inst[0].next.kind == iBOT
+ if anchored && pos > 0 {
+ return nil
+ }
// fast check for initial plain substring
- prefixed := false // has this iteration begun by skipping a prefix?
if re.prefix != "" {
- var advance int
- if bytestr == nil {
- advance = strings.Index(str[pos:], re.prefix)
+ advance := 0
+ if anchored {
+ if bytestr == nil {
+ if !strings.HasPrefix(str, re.prefix) {
+ return nil
+ }
+ } else {
+ if !bytes.HasPrefix(bytestr, re.prefixBytes) {
+ return nil
+ }
+ }
} else {
- advance = bytes.Index(bytestr[pos:], re.prefixBytes)
+ if bytestr == nil {
+ advance = strings.Index(str[pos:], re.prefix)
+ } else {
+ advance = bytes.Index(bytestr[pos:], re.prefixBytes)
+ }
}
if advance == -1 {
return nil
}
- pos += advance + len(re.prefix)
- prefixed = true
+ pos += advance
}
arena := &matchArena{nil, 2 * (re.nbra + 1)}
- for pos <= end {
- if !found {
+ for startPos := pos; pos <= end; {
+ if !found && (pos == startPos || !anchored) {
// prime the pump if we haven't seen a match yet
match := arena.noMatch()
match.m[0] = pos
- if prefixed {
- s[out] = arena.addState(s[out], re.prefixStart, true, match, pos, end)
- prefixed = false // next iteration should start at beginning of machine.
- } else {
- s[out] = arena.addState(s[out], re.start.next, false, match, pos, end)
- }
+ s[out] = arena.addState(s[out], re.start.next, false, match, pos, end)
arena.free(match) // if addState saved it, ref was incremented
+ } else if len(s[out]) == 0 {
+ // machine has completed
+ break
}
in, out = out, in // old out state is new in state
// clear out old state
@@ -779,10 +795,6 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int {
arena.free(state.match)
}
s[out] = old[0:0] // truncate state vector
- if found && len(s[in]) == 0 {
- // machine has completed
- break
- }
charwidth := 1
c := endOfFile
if pos < end {
@@ -844,6 +856,24 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int {
return final.match.m
}
+// LiteralPrefix returns a literal string that must begin any match
+// of the regular expression re. It returns the boolean true if the
+// literal string comprises the entire regular expression.
+func (re *Regexp) LiteralPrefix() (prefix string, complete bool) {
+ c := make([]int, len(re.inst)-2) // minus start and end.
+ // First instruction is start; skip that.
+ i := 0
+ for inst := re.inst[0].next; inst.kind != iEnd; inst = inst.next {
+ // stop if this is not a char
+ if inst.kind != iChar {
+ return string(c[:i]), false
+ }
+ c[i] = inst.char
+ i++
+ }
+ return string(c[:i]), true
+}
+
// MatchString returns whether the Regexp matches the string s.
// The return value is a boolean: true for match, false for no match.
func (re *Regexp) MatchString(s string) bool { return len(re.doExecute(s, nil, 0)) > 0 }