diff options
Diffstat (limited to 'libgo/go/regexp/syntax')
-rw-r--r-- | libgo/go/regexp/syntax/parse.go | 24 | ||||
-rw-r--r-- | libgo/go/regexp/syntax/parse_test.go | 9 | ||||
-rw-r--r-- | libgo/go/regexp/syntax/regexp.go | 4 | ||||
-rw-r--r-- | libgo/go/regexp/syntax/simplify_test.go | 4 |
4 files changed, 29 insertions, 12 deletions
diff --git a/libgo/go/regexp/syntax/parse.go b/libgo/go/regexp/syntax/parse.go index d579a40..f38bbf6 100644 --- a/libgo/go/regexp/syntax/parse.go +++ b/libgo/go/regexp/syntax/parse.go @@ -470,9 +470,14 @@ func (p *parser) factor(sub []*Regexp, flags Flags) []*Regexp { } sub = out - // Round 2: Factor out common complex prefixes, - // just the first piece of each concatenation, - // whatever it is. This is good enough a lot of the time. + // Round 2: Factor out common simple prefixes, + // just the first piece of each concatenation. + // This will be good enough a lot of the time. + // + // Complex subexpressions (e.g. involving quantifiers) + // are not safe to factor because that collapses their + // distinct paths through the automaton, which affects + // correctness in some cases. start = 0 out = sub[:0] var first *Regexp @@ -485,7 +490,9 @@ func (p *parser) factor(sub []*Regexp, flags Flags) []*Regexp { var ifirst *Regexp if i < len(sub) { ifirst = p.leadingRegexp(sub[i]) - if first != nil && first.Equal(ifirst) { + if first != nil && first.Equal(ifirst) && + // first must be a character class OR a fixed repeat of a character class. + (isCharClass(first) || (first.Op == OpRepeat && first.Min == first.Max && isCharClass(first.Sub[0]))) { continue } } @@ -830,7 +837,14 @@ func Parse(s string, flags Flags) (*Regexp, error) { lit = t[2:i] t = t[i+2:] } - p.push(literalRegexp(lit, p.flags)) + for lit != "" { + c, rest, err := nextRune(lit) + if err != nil { + return nil, err + } + p.literal(c) + lit = rest + } break BigSwitch case 'z': p.op(OpEndText) diff --git a/libgo/go/regexp/syntax/parse_test.go b/libgo/go/regexp/syntax/parse_test.go index c4a1117..5ca54bb 100644 --- a/libgo/go/regexp/syntax/parse_test.go +++ b/libgo/go/regexp/syntax/parse_test.go @@ -144,6 +144,7 @@ var parseTests = []parseTest{ // Test Perl quoted literals {`\Q+|*?{[\E`, `str{+|*?{[}`}, {`\Q+\E+`, `plus{lit{+}}`}, + {`\Qab\E+`, `cat{lit{a}plus{lit{b}}}`}, {`\Q\\E`, `lit{\}`}, {`\Q\\\E`, `str{\\}`}, @@ -171,7 +172,7 @@ var parseTests = []parseTest{ // Factoring. {`abc|abd|aef|bcx|bcy`, `alt{cat{lit{a}alt{cat{lit{b}cc{0x63-0x64}}str{ef}}}cat{str{bc}cc{0x78-0x79}}}`}, - {`ax+y|ax+z|ay+w`, `cat{lit{a}alt{cat{plus{lit{x}}cc{0x79-0x7a}}cat{plus{lit{y}}lit{w}}}}`}, + {`ax+y|ax+z|ay+w`, `cat{lit{a}alt{cat{plus{lit{x}}lit{y}}cat{plus{lit{x}}lit{z}}cat{plus{lit{y}}lit{w}}}}`}, // Bug fixes. {`(?:.)`, `dot{}`}, @@ -194,12 +195,13 @@ var parseTests = []parseTest{ {`abc|x|abd`, `alt{str{abc}lit{x}str{abd}}`}, {`(?i)abc|ABD`, `cat{strfold{AB}cc{0x43-0x44 0x63-0x64}}`}, {`[ab]c|[ab]d`, `cat{cc{0x61-0x62}cc{0x63-0x64}}`}, - {`(?:xx|yy)c|(?:xx|yy)d`, - `cat{alt{str{xx}str{yy}}cc{0x63-0x64}}`}, + {`.c|.d`, `cat{dot{}cc{0x63-0x64}}`}, {`x{2}|x{2}[0-9]`, `cat{rep{2,2 lit{x}}alt{emp{}cc{0x30-0x39}}}`}, {`x{2}y|x{2}[0-9]y`, `cat{rep{2,2 lit{x}}alt{lit{y}cat{cc{0x30-0x39}lit{y}}}}`}, + {`a.*?c|a.*?b`, + `cat{lit{a}alt{cat{nstar{dot{}}lit{c}}cat{nstar{dot{}}lit{b}}}}`}, // Valid repetitions. {`((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}))`, ``}, @@ -479,6 +481,7 @@ var invalidRegexps = []string{ `a{100000}`, `a{100000,}`, "((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}){2})", + `\Q\E*`, } var onlyPerl = []string{ diff --git a/libgo/go/regexp/syntax/regexp.go b/libgo/go/regexp/syntax/regexp.go index cea7d9e..75822cf 100644 --- a/libgo/go/regexp/syntax/regexp.go +++ b/libgo/go/regexp/syntax/regexp.go @@ -166,9 +166,9 @@ func writeRegexp(b *bytes.Buffer, re *Regexp) { case OpAnyChar: b.WriteString(`(?s:.)`) case OpBeginLine: - b.WriteRune('^') + b.WriteString(`(?m:^)`) case OpEndLine: - b.WriteRune('$') + b.WriteString(`(?m:$)`) case OpBeginText: b.WriteString(`\A`) case OpEndText: diff --git a/libgo/go/regexp/syntax/simplify_test.go b/libgo/go/regexp/syntax/simplify_test.go index 879eff5..5d0f1de 100644 --- a/libgo/go/regexp/syntax/simplify_test.go +++ b/libgo/go/regexp/syntax/simplify_test.go @@ -19,8 +19,8 @@ var simplifyTests = []struct { {`(ab)+`, `(ab)+`}, {`(ab)?`, `(ab)?`}, {`.`, `(?s:.)`}, - {`^`, `^`}, - {`$`, `$`}, + {`^`, `(?m:^)`}, + {`$`, `(?m:$)`}, {`[ac]`, `[ac]`}, {`[^ac]`, `[^ac]`}, |