aboutsummaryrefslogtreecommitdiff
path: root/libgo/go/regexp
diff options
context:
space:
mode:
authorIan Lance Taylor <ian@gcc.gnu.org>2012-03-02 16:38:43 +0000
committerIan Lance Taylor <ian@gcc.gnu.org>2012-03-02 16:38:43 +0000
commitcbb6491d76c7aa81cdf5d3b3a81386129c5e2fce (patch)
treeefa0c55763b34cbc633bc494c2743d1b5d9aaff3 /libgo/go/regexp
parentff2f581b00ac6759f6366c16ef902c935163aa13 (diff)
downloadgcc-cbb6491d76c7aa81cdf5d3b3a81386129c5e2fce.zip
gcc-cbb6491d76c7aa81cdf5d3b3a81386129c5e2fce.tar.gz
gcc-cbb6491d76c7aa81cdf5d3b3a81386129c5e2fce.tar.bz2
libgo: Update to weekly.2012-02-14 release.
From-SVN: r184798
Diffstat (limited to 'libgo/go/regexp')
-rw-r--r--libgo/go/regexp/all_test.go88
-rw-r--r--libgo/go/regexp/regexp.go284
-rw-r--r--libgo/go/regexp/syntax/parse.go7
-rw-r--r--libgo/go/regexp/syntax/regexp.go2
4 files changed, 313 insertions, 68 deletions
diff --git a/libgo/go/regexp/all_test.go b/libgo/go/regexp/all_test.go
index 107dfe3..f7b41a6 100644
--- a/libgo/go/regexp/all_test.go
+++ b/libgo/go/regexp/all_test.go
@@ -176,6 +176,45 @@ var replaceTests = []ReplaceTest{
{"[a-c]*", "x", "def", "xdxexfx"},
{"[a-c]+", "x", "abcbcdcdedef", "xdxdedef"},
{"[a-c]*", "x", "abcbcdcdedef", "xdxdxexdxexfx"},
+
+ // Substitutions
+ {"a+", "($0)", "banana", "b(a)n(a)n(a)"},
+ {"a+", "(${0})", "banana", "b(a)n(a)n(a)"},
+ {"a+", "(${0})$0", "banana", "b(a)an(a)an(a)a"},
+ {"a+", "(${0})$0", "banana", "b(a)an(a)an(a)a"},
+ {"hello, (.+)", "goodbye, ${1}", "hello, world", "goodbye, world"},
+ {"hello, (.+)", "goodbye, $1x", "hello, world", "goodbye, "},
+ {"hello, (.+)", "goodbye, ${1}x", "hello, world", "goodbye, worldx"},
+ {"hello, (.+)", "<$0><$1><$2><$3>", "hello, world", "<hello, world><world><><>"},
+ {"hello, (?P<noun>.+)", "goodbye, $noun!", "hello, world", "goodbye, world!"},
+ {"hello, (?P<noun>.+)", "goodbye, ${noun}", "hello, world", "goodbye, world"},
+ {"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "hi", "hihihi"},
+ {"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "bye", "byebyebye"},
+ {"(?P<x>hi)|(?P<x>bye)", "$xyz", "hi", ""},
+ {"(?P<x>hi)|(?P<x>bye)", "${x}yz", "hi", "hiyz"},
+ {"(?P<x>hi)|(?P<x>bye)", "hello $$x", "hi", "hello $x"},
+ {"a+", "${oops", "aaa", "${oops"},
+ {"a+", "$$", "aaa", "$"},
+ {"a+", "$", "aaa", "$"},
+}
+
+var replaceLiteralTests = []ReplaceTest{
+ // Substitutions
+ {"a+", "($0)", "banana", "b($0)n($0)n($0)"},
+ {"a+", "(${0})", "banana", "b(${0})n(${0})n(${0})"},
+ {"a+", "(${0})$0", "banana", "b(${0})$0n(${0})$0n(${0})$0"},
+ {"a+", "(${0})$0", "banana", "b(${0})$0n(${0})$0n(${0})$0"},
+ {"hello, (.+)", "goodbye, ${1}", "hello, world", "goodbye, ${1}"},
+ {"hello, (?P<noun>.+)", "goodbye, $noun!", "hello, world", "goodbye, $noun!"},
+ {"hello, (?P<noun>.+)", "goodbye, ${noun}", "hello, world", "goodbye, ${noun}"},
+ {"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "hi", "$x$x$x"},
+ {"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "bye", "$x$x$x"},
+ {"(?P<x>hi)|(?P<x>bye)", "$xyz", "hi", "$xyz"},
+ {"(?P<x>hi)|(?P<x>bye)", "${x}yz", "hi", "${x}yz"},
+ {"(?P<x>hi)|(?P<x>bye)", "hello $$x", "hi", "hello $$x"},
+ {"a+", "${oops", "aaa", "${oops"},
+ {"a+", "$$", "aaa", "$$"},
+ {"a+", "$", "aaa", "$"},
}
type ReplaceFuncTest struct {
@@ -199,13 +238,58 @@ func TestReplaceAll(t *testing.T) {
}
actual := re.ReplaceAllString(tc.input, tc.replacement)
if actual != tc.output {
- t.Errorf("%q.Replace(%q,%q) = %q; want %q",
+ t.Errorf("%q.ReplaceAllString(%q,%q) = %q; want %q",
tc.pattern, tc.input, tc.replacement, actual, tc.output)
}
// now try bytes
actual = string(re.ReplaceAll([]byte(tc.input), []byte(tc.replacement)))
if actual != tc.output {
- t.Errorf("%q.Replace(%q,%q) = %q; want %q",
+ t.Errorf("%q.ReplaceAll(%q,%q) = %q; want %q",
+ tc.pattern, tc.input, tc.replacement, actual, tc.output)
+ }
+ }
+}
+
+func TestReplaceAllLiteral(t *testing.T) {
+ // Run ReplaceAll tests that do not have $ expansions.
+ for _, tc := range replaceTests {
+ if strings.Contains(tc.replacement, "$") {
+ continue
+ }
+ re, err := Compile(tc.pattern)
+ if err != nil {
+ t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err)
+ continue
+ }
+ actual := re.ReplaceAllLiteralString(tc.input, tc.replacement)
+ if actual != tc.output {
+ t.Errorf("%q.ReplaceAllLiteralString(%q,%q) = %q; want %q",
+ tc.pattern, tc.input, tc.replacement, actual, tc.output)
+ }
+ // now try bytes
+ actual = string(re.ReplaceAllLiteral([]byte(tc.input), []byte(tc.replacement)))
+ if actual != tc.output {
+ t.Errorf("%q.ReplaceAllLiteral(%q,%q) = %q; want %q",
+ tc.pattern, tc.input, tc.replacement, actual, tc.output)
+ }
+ }
+
+ // Run literal-specific tests.
+ for _, tc := range replaceLiteralTests {
+ re, err := Compile(tc.pattern)
+ if err != nil {
+ t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err)
+ continue
+ }
+ actual := re.ReplaceAllLiteralString(tc.input, tc.replacement)
+ if actual != tc.output {
+ t.Errorf("%q.ReplaceAllLiteralString(%q,%q) = %q; want %q",
+ tc.pattern, tc.input, tc.replacement, actual, tc.output)
+ }
+ // now try bytes
+ actual = string(re.ReplaceAllLiteral([]byte(tc.input), []byte(tc.replacement)))
+ if actual != tc.output {
+ t.Errorf("%q.ReplaceAllLiteral(%q,%q) = %q; want %q",
tc.pattern, tc.input, tc.replacement, actual, tc.output)
}
}
diff --git a/libgo/go/regexp/regexp.go b/libgo/go/regexp/regexp.go
index 7aebd37..54c5377 100644
--- a/libgo/go/regexp/regexp.go
+++ b/libgo/go/regexp/regexp.go
@@ -61,6 +61,7 @@ import (
"strconv"
"strings"
"sync"
+ "unicode"
"unicode/utf8"
)
@@ -416,41 +417,79 @@ func Match(pattern string, b []byte) (matched bool, error error) {
return re.Match(b), nil
}
-// ReplaceAllString returns a copy of src in which all matches for the Regexp
-// have been replaced by repl. No support is provided for expressions
-// (e.g. \1 or $1) in the replacement string.
+// ReplaceAllString returns a copy of src, replacing matches of the Regexp
+// with the replacement string repl. Inside repl, $ signs are interpreted as
+// in Expand, so for instance $1 represents the text of the first submatch.
func (re *Regexp) ReplaceAllString(src, repl string) string {
- return re.ReplaceAllStringFunc(src, func(string) string { return repl })
+ n := 2
+ if strings.Index(repl, "$") >= 0 {
+ n = 2 * (re.numSubexp + 1)
+ }
+ b := re.replaceAll(nil, src, n, func(dst []byte, match []int) []byte {
+ return re.expand(dst, repl, nil, src, match)
+ })
+ return string(b)
}
-// ReplaceAllStringFunc returns a copy of src in which all matches for the
-// Regexp have been replaced by the return value of of function repl (whose
-// first argument is the matched string). No support is provided for
-// expressions (e.g. \1 or $1) in the replacement string.
+// ReplaceAllStringLiteral returns a copy of src, replacing matches of the Regexp
+// with the replacement string repl. The replacement repl is substituted directly,
+// without using Expand.
+func (re *Regexp) ReplaceAllLiteralString(src, repl string) string {
+ return string(re.replaceAll(nil, src, 2, func(dst []byte, match []int) []byte {
+ return append(dst, repl...)
+ }))
+}
+
+// ReplaceAllStringFunc returns a copy of src in which all matches of the
+// Regexp have been replaced by the return value of of function repl applied
+// to the matched substring. The replacement returned by repl is substituted
+// directly, without using Expand.
func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string {
+ b := re.replaceAll(nil, src, 2, func(dst []byte, match []int) []byte {
+ return append(dst, repl(src[match[0]:match[1]])...)
+ })
+ return string(b)
+}
+
+func (re *Regexp) replaceAll(bsrc []byte, src string, nmatch int, repl func(dst []byte, m []int) []byte) []byte {
lastMatchEnd := 0 // end position of the most recent match
searchPos := 0 // position where we next look for a match
- buf := new(bytes.Buffer)
- for searchPos <= len(src) {
- a := re.doExecute(nil, nil, src, searchPos, 2)
+ var buf []byte
+ var endPos int
+ if bsrc != nil {
+ endPos = len(bsrc)
+ } else {
+ endPos = len(src)
+ }
+ for searchPos <= endPos {
+ a := re.doExecute(nil, bsrc, src, searchPos, nmatch)
if len(a) == 0 {
break // no more matches
}
// Copy the unmatched characters before this match.
- io.WriteString(buf, src[lastMatchEnd:a[0]])
+ if bsrc != nil {
+ buf = append(buf, bsrc[lastMatchEnd:a[0]]...)
+ } else {
+ buf = append(buf, src[lastMatchEnd:a[0]]...)
+ }
// Now insert a copy of the replacement string, but not for a
// match of the empty string immediately after another match.
// (Otherwise, we get double replacement for patterns that
// match both empty and nonempty strings.)
if a[1] > lastMatchEnd || a[0] == 0 {
- io.WriteString(buf, repl(src[a[0]:a[1]]))
+ buf = repl(buf, a)
}
lastMatchEnd = a[1]
// Advance past this match; always advance at least one character.
- _, width := utf8.DecodeRuneInString(src[searchPos:])
+ var width int
+ if bsrc != nil {
+ _, width = utf8.DecodeRune(bsrc[searchPos:])
+ } else {
+ _, width = utf8.DecodeRuneInString(src[searchPos:])
+ }
if searchPos+width > a[1] {
searchPos += width
} else if searchPos+1 > a[1] {
@@ -463,61 +502,50 @@ func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) str
}
// Copy the unmatched characters after the last match.
- io.WriteString(buf, src[lastMatchEnd:])
+ if bsrc != nil {
+ buf = append(buf, bsrc[lastMatchEnd:]...)
+ } else {
+ buf = append(buf, src[lastMatchEnd:]...)
+ }
- return buf.String()
+ return buf
}
-// ReplaceAll returns a copy of src in which all matches for the Regexp
-// have been replaced by repl. No support is provided for expressions
-// (e.g. \1 or $1) in the replacement text.
+// ReplaceAll returns a copy of src, replacing matches of the Regexp
+// with the replacement string repl. Inside repl, $ signs are interpreted as
+// in Expand, so for instance $1 represents the text of the first submatch.
func (re *Regexp) ReplaceAll(src, repl []byte) []byte {
- return re.ReplaceAllFunc(src, func([]byte) []byte { return repl })
-}
-
-// ReplaceAllFunc returns a copy of src in which all matches for the
-// Regexp have been replaced by the return value of of function repl (whose
-// first argument is the matched []byte). No support is provided for
-// expressions (e.g. \1 or $1) in the replacement string.
-func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte {
- lastMatchEnd := 0 // end position of the most recent match
- searchPos := 0 // position where we next look for a match
- buf := new(bytes.Buffer)
- for searchPos <= len(src) {
- a := re.doExecute(nil, src, "", searchPos, 2)
- if len(a) == 0 {
- break // no more matches
- }
-
- // Copy the unmatched characters before this match.
- buf.Write(src[lastMatchEnd:a[0]])
-
- // Now insert a copy of the replacement string, but not for a
- // match of the empty string immediately after another match.
- // (Otherwise, we get double replacement for patterns that
- // match both empty and nonempty strings.)
- if a[1] > lastMatchEnd || a[0] == 0 {
- buf.Write(repl(src[a[0]:a[1]]))
- }
- lastMatchEnd = a[1]
-
- // Advance past this match; always advance at least one character.
- _, width := utf8.DecodeRune(src[searchPos:])
- if searchPos+width > a[1] {
- searchPos += width
- } else if searchPos+1 > a[1] {
- // This clause is only needed at the end of the input
- // string. In that case, DecodeRuneInString returns width=0.
- searchPos++
- } else {
- searchPos = a[1]
- }
+ n := 2
+ if bytes.IndexByte(repl, '$') >= 0 {
+ n = 2 * (re.numSubexp + 1)
}
+ srepl := ""
+ b := re.replaceAll(src, "", n, func(dst []byte, match []int) []byte {
+ if len(srepl) != len(repl) {
+ srepl = string(repl)
+ }
+ return re.expand(dst, srepl, src, "", match)
+ })
+ return b
+}
- // Copy the unmatched characters after the last match.
- buf.Write(src[lastMatchEnd:])
+// ReplaceAllLiteral returns a copy of src, replacing matches of the Regexp
+// with the replacement bytes repl. The replacement repl is substituted directly,
+// without using Expand.
+func (re *Regexp) ReplaceAllLiteral(src, repl []byte) []byte {
+ return re.replaceAll(src, "", 2, func(dst []byte, match []int) []byte {
+ return append(dst, repl...)
+ })
+}
- return buf.Bytes()
+// ReplaceAllFunc returns a copy of src in which all matches of the
+// Regexp have been replaced by the return value of of function repl applied
+// to the matched byte slice. The replacement returned by repl is substituted
+// directly, without using Expand.
+func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte {
+ return re.replaceAll(src, "", 2, func(dst []byte, match []int) []byte {
+ return append(dst, repl(src[match[0]:match[1]])...)
+ })
}
var specialBytes = []byte(`\.+*?()|[]{}^$`)
@@ -648,7 +676,7 @@ func (re *Regexp) FindString(s string) string {
// location of the leftmost match in s of the regular expression. The match
// itself is at s[loc[0]:loc[1]].
// A return value of nil indicates no match.
-func (re *Regexp) FindStringIndex(s string) []int {
+func (re *Regexp) FindStringIndex(s string) (loc []int) {
a := re.doExecute(nil, nil, s, 0, 2)
if a == nil {
return nil
@@ -660,7 +688,7 @@ func (re *Regexp) FindStringIndex(s string) []int {
// location of the leftmost match of the regular expression in text read from
// the RuneReader. The match itself is at s[loc[0]:loc[1]]. A return
// value of nil indicates no match.
-func (re *Regexp) FindReaderIndex(r io.RuneReader) []int {
+func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int) {
a := re.doExecute(r, nil, "", 0, 2)
if a == nil {
return nil
@@ -687,6 +715,134 @@ func (re *Regexp) FindSubmatch(b []byte) [][]byte {
return ret
}
+// Expand appends template to dst and returns the result; during the
+// append, Expand replaces variables in the template with corresponding
+// matches drawn from src. The match slice should have been returned by
+// FindSubmatchIndex.
+//
+// In the template, a variable is denoted by a substring of the form
+// $name or ${name}, where name is a non-empty sequence of letters,
+// digits, and underscores. A purely numeric name like $1 refers to
+// the submatch with the corresponding index; other names refer to
+// capturing parentheses named with the (?P<name>...) syntax. A
+// reference to an out of range or unmatched index or a name that is not
+// present in the regular expression is replaced with an empty string.
+//
+// In the $name form, name is taken to be as long as possible: $1x is
+// equivalent to ${1x}, not ${1}x, and, $10 is equivalent to ${10}, not ${1}0.
+//
+// To insert a literal $ in the output, use $$ in the template.
+func (re *Regexp) Expand(dst []byte, template []byte, src []byte, match []int) []byte {
+ return re.expand(dst, string(template), src, "", match)
+}
+
+// ExpandString is like Expand but the template and source are strings.
+// It appends to and returns a byte slice in order to give the calling
+// code control over allocation.
+func (re *Regexp) ExpandString(dst []byte, template string, src string, match []int) []byte {
+ return re.expand(dst, template, nil, src, match)
+}
+
+func (re *Regexp) expand(dst []byte, template string, bsrc []byte, src string, match []int) []byte {
+ for len(template) > 0 {
+ i := strings.Index(template, "$")
+ if i < 0 {
+ break
+ }
+ dst = append(dst, template[:i]...)
+ template = template[i:]
+ if len(template) > 1 && template[1] == '$' {
+ // Treat $$ as $.
+ dst = append(dst, '$')
+ template = template[2:]
+ continue
+ }
+ name, num, rest, ok := extract(template)
+ if !ok {
+ // Malformed; treat $ as raw text.
+ dst = append(dst, '$')
+ template = template[1:]
+ continue
+ }
+ template = rest
+ if num >= 0 {
+ if 2*num+1 < len(match) {
+ if bsrc != nil {
+ dst = append(dst, bsrc[match[2*num]:match[2*num+1]]...)
+ } else {
+ dst = append(dst, src[match[2*num]:match[2*num+1]]...)
+ }
+ }
+ } else {
+ for i, namei := range re.subexpNames {
+ if name == namei && 2*i+1 < len(match) && match[2*i] >= 0 {
+ if bsrc != nil {
+ dst = append(dst, bsrc[match[2*i]:match[2*i+1]]...)
+ } else {
+ dst = append(dst, src[match[2*i]:match[2*i+1]]...)
+ }
+ break
+ }
+ }
+ }
+ }
+ dst = append(dst, template...)
+ return dst
+}
+
+// extract returns the name from a leading "$name" or "${name}" in str.
+// If it is a number, extract returns num set to that number; otherwise num = -1.
+func extract(str string) (name string, num int, rest string, ok bool) {
+ if len(str) < 2 || str[0] != '$' {
+ return
+ }
+ brace := false
+ if str[1] == '{' {
+ brace = true
+ str = str[2:]
+ } else {
+ str = str[1:]
+ }
+ i := 0
+ for i < len(str) {
+ rune, size := utf8.DecodeRuneInString(str[i:])
+ if !unicode.IsLetter(rune) && !unicode.IsDigit(rune) && rune != '_' {
+ break
+ }
+ i += size
+ }
+ if i == 0 {
+ // empty name is not okay
+ return
+ }
+ name = str[:i]
+ if brace {
+ if i >= len(str) || str[i] != '}' {
+ // missing closing brace
+ return
+ }
+ i++
+ }
+
+ // Parse number.
+ num = 0
+ for i := 0; i < len(name); i++ {
+ if name[i] < '0' || '9' < name[i] || num >= 1e8 {
+ num = -1
+ break
+ }
+ num = num*10 + int(name[i]) - '0'
+ }
+ // Disallow leading zeros.
+ if name[0] == '0' && len(name) > 1 {
+ num = -1
+ }
+
+ rest = str[i:]
+ ok = true
+ return
+}
+
// FindSubmatchIndex returns a slice holding the index pairs identifying the
// leftmost match of the regular expression in b and the matches, if any, of
// its subexpressions, as defined by the 'Submatch' and 'Index' descriptions
diff --git a/libgo/go/regexp/syntax/parse.go b/libgo/go/regexp/syntax/parse.go
index 07688be..71b07b9 100644
--- a/libgo/go/regexp/syntax/parse.go
+++ b/libgo/go/regexp/syntax/parse.go
@@ -2,6 +2,10 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+// Package syntax parses regular expressions into parse trees and compiles
+// parse trees into programs. Most clients of regular expressions will use
+// the facilities of package regexp (such as Compile and Match) instead of
+// this package.
package syntax
import (
@@ -648,6 +652,9 @@ func literalRegexp(s string, flags Flags) *Regexp {
// Parsing.
+// Parse parses a regular expression string s, controlled by the specified
+// Flags, and returns a regular expression parse tree. The syntax is
+// described in the top-level comment for package regexp.
func Parse(s string, flags Flags) (*Regexp, error) {
if flags&Literal != 0 {
// Trivial parser for literal string.
diff --git a/libgo/go/regexp/syntax/regexp.go b/libgo/go/regexp/syntax/regexp.go
index 668a077..329a90e 100644
--- a/libgo/go/regexp/syntax/regexp.go
+++ b/libgo/go/regexp/syntax/regexp.go
@@ -2,8 +2,6 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// Package syntax parses regular expressions into syntax trees.
-// WORK IN PROGRESS.
package syntax
// Note to implementers: