libgo: Update to weekly.2012-02-14 release.

From-SVN: r184798
author: Ian Lance Taylor <ian@gcc.gnu.org> 2012-03-02 16:38:43 +0000
committer: Ian Lance Taylor <ian@gcc.gnu.org> 2012-03-02 16:38:43 +0000
commit: cbb6491d76c7aa81cdf5d3b3a81386129c5e2fce (patch)
tree: efa0c55763b34cbc633bc494c2743d1b5d9aaff3 /libgo/go/regexp/regexp.go
parent: ff2f581b00ac6759f6366c16ef902c935163aa13 (diff)
download: gcc-cbb6491d76c7aa81cdf5d3b3a81386129c5e2fce.zip
gcc-cbb6491d76c7aa81cdf5d3b3a81386129c5e2fce.tar.gz
gcc-cbb6491d76c7aa81cdf5d3b3a81386129c5e2fce.tar.bz2
1 files changed, 220 insertions, 64 deletions
diff --git a/libgo/go/regexp/regexp.go b/libgo/go/regexp/regexp.go
index 7aebd37..54c5377 100644
--- a/libgo/go/regexp/regexp.go
+++ b/libgo/go/regexp/regexp.go
@@ -61,6 +61,7 @@ import (
 	"strconv"
 	"strings"
 	"sync"
+	"unicode"
 	"unicode/utf8"
 )
 
@@ -416,41 +417,79 @@ func Match(pattern string, b []byte) (matched bool, error error) {
 	return re.Match(b), nil
 }
 
-// ReplaceAllString returns a copy of src in which all matches for the Regexp
-// have been replaced by repl.  No support is provided for expressions
-// (e.g. \1 or $1) in the replacement string.
+// ReplaceAllString returns a copy of src, replacing matches of the Regexp
+// with the replacement string repl.  Inside repl, $ signs are interpreted as
+// in Expand, so for instance $1 represents the text of the first submatch.
 func (re *Regexp) ReplaceAllString(src, repl string) string {
-	return re.ReplaceAllStringFunc(src, func(string) string { return repl })
+	n := 2
+	if strings.Index(repl, "$") >= 0 {
+		n = 2 * (re.numSubexp + 1)
+	}
+	b := re.replaceAll(nil, src, n, func(dst []byte, match []int) []byte {
+		return re.expand(dst, repl, nil, src, match)
+	})
+	return string(b)
 }
 
-// ReplaceAllStringFunc returns a copy of src in which all matches for the
-// Regexp have been replaced by the return value of of function repl (whose
-// first argument is the matched string).  No support is provided for
-// expressions (e.g. \1 or $1) in the replacement string.
+// ReplaceAllStringLiteral returns a copy of src, replacing matches of the Regexp
+// with the replacement string repl.  The replacement repl is substituted directly,
+// without using Expand.
+func (re *Regexp) ReplaceAllLiteralString(src, repl string) string {
+	return string(re.replaceAll(nil, src, 2, func(dst []byte, match []int) []byte {
+		return append(dst, repl...)
+	}))
+}
+
+// ReplaceAllStringFunc returns a copy of src in which all matches of the
+// Regexp have been replaced by the return value of of function repl applied
+// to the matched substring.  The replacement returned by repl is substituted
+// directly, without using Expand.
 func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string {
+	b := re.replaceAll(nil, src, 2, func(dst []byte, match []int) []byte {
+		return append(dst, repl(src[match[0]:match[1]])...)
+	})
+	return string(b)
+}
+
+func (re *Regexp) replaceAll(bsrc []byte, src string, nmatch int, repl func(dst []byte, m []int) []byte) []byte {
 	lastMatchEnd := 0 // end position of the most recent match
 	searchPos := 0    // position where we next look for a match
-	buf := new(bytes.Buffer)
-	for searchPos <= len(src) {
-		a := re.doExecute(nil, nil, src, searchPos, 2)
+	var buf []byte
+	var endPos int
+	if bsrc != nil {
+		endPos = len(bsrc)
+	} else {
+		endPos = len(src)
+	}
+	for searchPos <= endPos {
+		a := re.doExecute(nil, bsrc, src, searchPos, nmatch)
 		if len(a) == 0 {
 			break // no more matches
 		}
 
 		// Copy the unmatched characters before this match.
-		io.WriteString(buf, src[lastMatchEnd:a[0]])
+		if bsrc != nil {
+			buf = append(buf, bsrc[lastMatchEnd:a[0]]...)
+		} else {
+			buf = append(buf, src[lastMatchEnd:a[0]]...)
+		}
 
 		// Now insert a copy of the replacement string, but not for a
 		// match of the empty string immediately after another match.
 		// (Otherwise, we get double replacement for patterns that
 		// match both empty and nonempty strings.)
 		if a[1] > lastMatchEnd || a[0] == 0 {
-			io.WriteString(buf, repl(src[a[0]:a[1]]))
+			buf = repl(buf, a)
 		}
 		lastMatchEnd = a[1]
 
 		// Advance past this match; always advance at least one character.
-		_, width := utf8.DecodeRuneInString(src[searchPos:])
+		var width int
+		if bsrc != nil {
+			_, width = utf8.DecodeRune(bsrc[searchPos:])
+		} else {
+			_, width = utf8.DecodeRuneInString(src[searchPos:])
+		}
 		if searchPos+width > a[1] {
 			searchPos += width
 		} else if searchPos+1 > a[1] {
@@ -463,61 +502,50 @@ func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) str
 	}
 
 	// Copy the unmatched characters after the last match.
-	io.WriteString(buf, src[lastMatchEnd:])
+	if bsrc != nil {
+		buf = append(buf, bsrc[lastMatchEnd:]...)
+	} else {
+		buf = append(buf, src[lastMatchEnd:]...)
+	}
 
-	return buf.String()
+	return buf
 }
 
-// ReplaceAll returns a copy of src in which all matches for the Regexp
-// have been replaced by repl.  No support is provided for expressions
-// (e.g. \1 or $1) in the replacement text.
+// ReplaceAll returns a copy of src, replacing matches of the Regexp
+// with the replacement string repl.  Inside repl, $ signs are interpreted as
+// in Expand, so for instance $1 represents the text of the first submatch.
 func (re *Regexp) ReplaceAll(src, repl []byte) []byte {
-	return re.ReplaceAllFunc(src, func([]byte) []byte { return repl })
-}
-
-// ReplaceAllFunc returns a copy of src in which all matches for the
-// Regexp have been replaced by the return value of of function repl (whose
-// first argument is the matched []byte).  No support is provided for
-// expressions (e.g. \1 or $1) in the replacement string.
-func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte {
-	lastMatchEnd := 0 // end position of the most recent match
-	searchPos := 0    // position where we next look for a match
-	buf := new(bytes.Buffer)
-	for searchPos <= len(src) {
-		a := re.doExecute(nil, src, "", searchPos, 2)
-		if len(a) == 0 {
-			break // no more matches
-		}
-
-		// Copy the unmatched characters before this match.
-		buf.Write(src[lastMatchEnd:a[0]])
-
-		// Now insert a copy of the replacement string, but not for a
-		// match of the empty string immediately after another match.
-		// (Otherwise, we get double replacement for patterns that
-		// match both empty and nonempty strings.)
-		if a[1] > lastMatchEnd || a[0] == 0 {
-			buf.Write(repl(src[a[0]:a[1]]))
-		}
-		lastMatchEnd = a[1]
-
-		// Advance past this match; always advance at least one character.
-		_, width := utf8.DecodeRune(src[searchPos:])
-		if searchPos+width > a[1] {
-			searchPos += width
-		} else if searchPos+1 > a[1] {
-			// This clause is only needed at the end of the input
-			// string.  In that case, DecodeRuneInString returns width=0.
-			searchPos++
-		} else {
-			searchPos = a[1]
-		}
+	n := 2
+	if bytes.IndexByte(repl, '$') >= 0 {
+		n = 2 * (re.numSubexp + 1)
 	}
+	srepl := ""
+	b := re.replaceAll(src, "", n, func(dst []byte, match []int) []byte {
+		if len(srepl) != len(repl) {
+			srepl = string(repl)
+		}
+		return re.expand(dst, srepl, src, "", match)
+	})
+	return b
+}
 
-	// Copy the unmatched characters after the last match.
-	buf.Write(src[lastMatchEnd:])
+// ReplaceAllLiteral returns a copy of src, replacing matches of the Regexp
+// with the replacement bytes repl.  The replacement repl is substituted directly,
+// without using Expand.
+func (re *Regexp) ReplaceAllLiteral(src, repl []byte) []byte {
+	return re.replaceAll(src, "", 2, func(dst []byte, match []int) []byte {
+		return append(dst, repl...)
+	})
+}
 
-	return buf.Bytes()
+// ReplaceAllFunc returns a copy of src in which all matches of the
+// Regexp have been replaced by the return value of of function repl applied
+// to the matched byte slice.  The replacement returned by repl is substituted
+// directly, without using Expand.
+func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte {
+	return re.replaceAll(src, "", 2, func(dst []byte, match []int) []byte {
+		return append(dst, repl(src[match[0]:match[1]])...)
+	})
 }
 
 var specialBytes = []byte(`\.+*?()|[]{}^$`)
@@ -648,7 +676,7 @@ func (re *Regexp) FindString(s string) string {
 // location of the leftmost match in s of the regular expression.  The match
 // itself is at s[loc[0]:loc[1]].
 // A return value of nil indicates no match.
-func (re *Regexp) FindStringIndex(s string) []int {
+func (re *Regexp) FindStringIndex(s string) (loc []int) {
 	a := re.doExecute(nil, nil, s, 0, 2)
 	if a == nil {
 		return nil
@@ -660,7 +688,7 @@ func (re *Regexp) FindStringIndex(s string) []int {
 // location of the leftmost match of the regular expression in text read from
 // the RuneReader.  The match itself is at s[loc[0]:loc[1]].  A return
 // value of nil indicates no match.
-func (re *Regexp) FindReaderIndex(r io.RuneReader) []int {
+func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int) {
 	a := re.doExecute(r, nil, "", 0, 2)
 	if a == nil {
 		return nil
@@ -687,6 +715,134 @@ func (re *Regexp) FindSubmatch(b []byte) [][]byte {
 	return ret
 }
 
+// Expand appends template to dst and returns the result; during the
+// append, Expand replaces variables in the template with corresponding
+// matches drawn from src.  The match slice should have been returned by
+// FindSubmatchIndex.
+// 
+// In the template, a variable is denoted by a substring of the form
+// $name or ${name}, where name is a non-empty sequence of letters,
+// digits, and underscores.  A purely numeric name like $1 refers to
+// the submatch with the corresponding index; other names refer to
+// capturing parentheses named with the (?P<name>...) syntax.  A
+// reference to an out of range or unmatched index or a name that is not
+// present in the regular expression is replaced with an empty string.
+// 
+// In the $name form, name is taken to be as long as possible: $1x is
+// equivalent to ${1x}, not ${1}x, and, $10 is equivalent to ${10}, not ${1}0.
+// 
+// To insert a literal $ in the output, use $$ in the template.
+func (re *Regexp) Expand(dst []byte, template []byte, src []byte, match []int) []byte {
+	return re.expand(dst, string(template), src, "", match)
+}
+
+// ExpandString is like Expand but the template and source are strings.
+// It appends to and returns a byte slice in order to give the calling
+// code control over allocation.
+func (re *Regexp) ExpandString(dst []byte, template string, src string, match []int) []byte {
+	return re.expand(dst, template, nil, src, match)
+}
+
+func (re *Regexp) expand(dst []byte, template string, bsrc []byte, src string, match []int) []byte {
+	for len(template) > 0 {
+		i := strings.Index(template, "$")
+		if i < 0 {
+			break
+		}
+		dst = append(dst, template[:i]...)
+		template = template[i:]
+		if len(template) > 1 && template[1] == '$' {
+			// Treat $$ as $.
+			dst = append(dst, '$')
+			template = template[2:]
+			continue
+		}
+		name, num, rest, ok := extract(template)
+		if !ok {
+			// Malformed; treat $ as raw text.
+			dst = append(dst, '$')
+			template = template[1:]
+			continue
+		}
+		template = rest
+		if num >= 0 {
+			if 2*num+1 < len(match) {
+				if bsrc != nil {
+					dst = append(dst, bsrc[match[2*num]:match[2*num+1]]...)
+				} else {
+					dst = append(dst, src[match[2*num]:match[2*num+1]]...)
+				}
+			}
+		} else {
+			for i, namei := range re.subexpNames {
+				if name == namei && 2*i+1 < len(match) && match[2*i] >= 0 {
+					if bsrc != nil {
+						dst = append(dst, bsrc[match[2*i]:match[2*i+1]]...)
+					} else {
+						dst = append(dst, src[match[2*i]:match[2*i+1]]...)
+					}
+					break
+				}
+			}
+		}
+	}
+	dst = append(dst, template...)
+	return dst
+}
+
+// extract returns the name from a leading "$name" or "${name}" in str.
+// If it is a number, extract returns num set to that number; otherwise num = -1.
+func extract(str string) (name string, num int, rest string, ok bool) {
+	if len(str) < 2 || str[0] != '$' {
+		return
+	}
+	brace := false
+	if str[1] == '{' {
+		brace = true
+		str = str[2:]
+	} else {
+		str = str[1:]
+	}
+	i := 0
+	for i < len(str) {
+		rune, size := utf8.DecodeRuneInString(str[i:])
+		if !unicode.IsLetter(rune) && !unicode.IsDigit(rune) && rune != '_' {
+			break
+		}
+		i += size
+	}
+	if i == 0 {
+		// empty name is not okay
+		return
+	}
+	name = str[:i]
+	if brace {
+		if i >= len(str) || str[i] != '}' {
+			// missing closing brace
+			return
+		}
+		i++
+	}
+
+	// Parse number.
+	num = 0
+	for i := 0; i < len(name); i++ {
+		if name[i] < '0' || '9' < name[i] || num >= 1e8 {
+			num = -1
+			break
+		}
+		num = num*10 + int(name[i]) - '0'
+	}
+	// Disallow leading zeros.
+	if name[0] == '0' && len(name) > 1 {
+		num = -1
+	}
+
+	rest = str[i:]
+	ok = true
+	return
+}
+
 // FindSubmatchIndex returns a slice holding the index pairs identifying the
 // leftmost match of the regular expression in b and the matches, if any, of
 // its subexpressions, as defined by the 'Submatch' and 'Index' descriptions
author	Ian Lance Taylor <ian@gcc.gnu.org>	2012-03-02 16:38:43 +0000
committer	Ian Lance Taylor <ian@gcc.gnu.org>	2012-03-02 16:38:43 +0000
commit	cbb6491d76c7aa81cdf5d3b3a81386129c5e2fce (patch)
tree	efa0c55763b34cbc633bc494c2743d1b5d9aaff3 /libgo/go/regexp/regexp.go
parent	ff2f581b00ac6759f6366c16ef902c935163aa13 (diff)
download	gcc-cbb6491d76c7aa81cdf5d3b3a81386129c5e2fce.zip gcc-cbb6491d76c7aa81cdf5d3b3a81386129c5e2fce.tar.gz gcc-cbb6491d76c7aa81cdf5d3b3a81386129c5e2fce.tar.bz2