aboutsummaryrefslogtreecommitdiff
path: root/libgo/go/strings
diff options
context:
space:
mode:
authorIan Lance Taylor <iant@google.com>2015-01-15 00:27:56 +0000
committerIan Lance Taylor <ian@gcc.gnu.org>2015-01-15 00:27:56 +0000
commitf8d9fa9e80b57f89e7877ce6cad8a3464879009b (patch)
tree58a1724fee16d2b03c65678c4dd9b50bb97137a9 /libgo/go/strings
parent6bd3f109d8d8fa58eeccd6b3504721b4f20c00c2 (diff)
downloadgcc-f8d9fa9e80b57f89e7877ce6cad8a3464879009b.zip
gcc-f8d9fa9e80b57f89e7877ce6cad8a3464879009b.tar.gz
gcc-f8d9fa9e80b57f89e7877ce6cad8a3464879009b.tar.bz2
libgo, compiler: Upgrade libgo to Go 1.4, except for runtime.
This upgrades all of libgo other than the runtime package to the Go 1.4 release. In Go 1.4 much of the runtime was rewritten into Go. Merging that code will take more time and will not change the API, so I'm putting it off for now. There are a few runtime changes anyhow, to accomodate other packages that rely on minor modifications to the runtime support. The compiler changes slightly to add a one-bit flag to each type descriptor kind that is stored directly in an interface, which for gccgo is currently only pointer types. Another one-bit flag (gcprog) is reserved because it is used by the gc compiler, but gccgo does not currently use it. There is another error check in the compiler since I ran across it during testing. gotools/: * Makefile.am (go_cmd_go_files): Sort entries. Add generate.go. * Makefile.in: Rebuild. From-SVN: r219627
Diffstat (limited to 'libgo/go/strings')
-rw-r--r--libgo/go/strings/replace.go149
-rw-r--r--libgo/go/strings/replace_test.go60
-rw-r--r--libgo/go/strings/strings.go81
-rw-r--r--libgo/go/strings/strings_test.go30
4 files changed, 195 insertions, 125 deletions
diff --git a/libgo/go/strings/replace.go b/libgo/go/strings/replace.go
index 3e05d20..4752641 100644
--- a/libgo/go/strings/replace.go
+++ b/libgo/go/strings/replace.go
@@ -6,7 +6,8 @@ package strings
import "io"
-// A Replacer replaces a list of strings with replacements.
+// Replacer replaces a list of strings with replacements.
+// It is safe for concurrent use by multiple goroutines.
type Replacer struct {
r replacer
}
@@ -17,15 +18,6 @@ type replacer interface {
WriteString(w io.Writer, s string) (n int, err error)
}
-// byteBitmap represents bytes which are sought for replacement.
-// byteBitmap is 256 bits wide, with a bit set for each old byte to be
-// replaced.
-type byteBitmap [256 / 32]uint32
-
-func (m *byteBitmap) set(b byte) {
- m[b>>5] |= uint32(1 << (b & 31))
-}
-
// NewReplacer returns a new Replacer from a list of old, new string pairs.
// Replacements are performed in order, without overlapping matches.
func NewReplacer(oldnew ...string) *Replacer {
@@ -48,30 +40,29 @@ func NewReplacer(oldnew ...string) *Replacer {
}
if allNewBytes {
- bb := &byteReplacer{}
- for i := 0; i < len(oldnew); i += 2 {
- o, n := oldnew[i][0], oldnew[i+1][0]
- if bb.old[o>>5]&uint32(1<<(o&31)) != 0 {
- // Later old->new maps do not override previous ones with the same old string.
- continue
- }
- bb.old.set(o)
- bb.new[o] = n
+ r := byteReplacer{}
+ for i := range r {
+ r[i] = byte(i)
}
- return &Replacer{r: bb}
+ // The first occurrence of old->new map takes precedence
+ // over the others with the same old string.
+ for i := len(oldnew) - 2; i >= 0; i -= 2 {
+ o := oldnew[i][0]
+ n := oldnew[i+1][0]
+ r[o] = n
+ }
+ return &Replacer{r: &r}
}
- bs := &byteStringReplacer{}
- for i := 0; i < len(oldnew); i += 2 {
- o, new := oldnew[i][0], oldnew[i+1]
- if bs.old[o>>5]&uint32(1<<(o&31)) != 0 {
- // Later old->new maps do not override previous ones with the same old string.
- continue
- }
- bs.old.set(o)
- bs.new[o] = []byte(new)
+ r := byteStringReplacer{}
+ // The first occurrence of old->new map takes precedence
+ // over the others with the same old string.
+ for i := len(oldnew) - 2; i >= 0; i -= 2 {
+ o := oldnew[i][0]
+ n := oldnew[i+1]
+ r[o] = []byte(n)
}
- return &Replacer{r: bs}
+ return &Replacer{r: &r}
}
// Replace returns a copy of s with all replacements performed.
@@ -323,6 +314,15 @@ func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err error)
var last, wn int
var prevMatchEmpty bool
for i := 0; i <= len(s); {
+ // Fast path: s[i] is not a prefix of any pattern.
+ if i != len(s) && r.root.priority == 0 {
+ index := int(r.mapping[s[i]])
+ if index == r.tableSize || r.root.table[index] == nil {
+ i++
+ continue
+ }
+ }
+
// Ignore the empty match iff the previous loop found the empty match.
val, keylen, match := r.lookup(s[i:], prevMatchEmpty)
prevMatchEmpty = match && keylen == 0
@@ -409,24 +409,18 @@ func (r *singleStringReplacer) WriteString(w io.Writer, s string) (n int, err er
// byteReplacer is the implementation that's used when all the "old"
// and "new" values are single ASCII bytes.
-type byteReplacer struct {
- // old has a bit set for each old byte that should be replaced.
- old byteBitmap
-
- // replacement byte, indexed by old byte. only valid if
- // corresponding old bit is set.
- new [256]byte
-}
+// The array contains replacement bytes indexed by old byte.
+type byteReplacer [256]byte
func (r *byteReplacer) Replace(s string) string {
var buf []byte // lazily allocated
for i := 0; i < len(s); i++ {
b := s[i]
- if r.old[b>>5]&uint32(1<<(b&31)) != 0 {
+ if r[b] != b {
if buf == nil {
buf = []byte(s)
}
- buf[i] = r.new[b]
+ buf[i] = r[b]
}
}
if buf == nil {
@@ -447,9 +441,7 @@ func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) {
ncopy := copy(buf, s[:])
s = s[ncopy:]
for i, b := range buf[:ncopy] {
- if r.old[b>>5]&uint32(1<<(b&31)) != 0 {
- buf[i] = r.new[b]
- }
+ buf[i] = r[b]
}
wn, err := w.Write(buf[:ncopy])
n += wn
@@ -461,27 +453,20 @@ func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) {
}
// byteStringReplacer is the implementation that's used when all the
-// "old" values are single ASCII bytes but the "new" values vary in
-// size.
-type byteStringReplacer struct {
- // old has a bit set for each old byte that should be replaced.
- old byteBitmap
-
- // replacement string, indexed by old byte. only valid if
- // corresponding old bit is set.
- new [256][]byte
-}
+// "old" values are single ASCII bytes but the "new" values vary in size.
+// The array contains replacement byte slices indexed by old byte.
+// A nil []byte means that the old byte should not be replaced.
+type byteStringReplacer [256][]byte
func (r *byteStringReplacer) Replace(s string) string {
- newSize := 0
+ newSize := len(s)
anyChanges := false
for i := 0; i < len(s); i++ {
b := s[i]
- if r.old[b>>5]&uint32(1<<(b&31)) != 0 {
+ if r[b] != nil {
anyChanges = true
- newSize += len(r.new[b])
- } else {
- newSize++
+ // The -1 is because we are replacing 1 byte with len(r[b]) bytes.
+ newSize += len(r[b]) - 1
}
}
if !anyChanges {
@@ -491,8 +476,8 @@ func (r *byteStringReplacer) Replace(s string) string {
bi := buf
for i := 0; i < len(s); i++ {
b := s[i]
- if r.old[b>>5]&uint32(1<<(b&31)) != 0 {
- n := copy(bi, r.new[b])
+ if r[b] != nil {
+ n := copy(bi, r[b])
bi = bi[n:]
} else {
bi[0] = b
@@ -502,48 +487,32 @@ func (r *byteStringReplacer) Replace(s string) string {
return string(buf)
}
-// WriteString maintains one buffer that's at most 32KB. The bytes in
-// s are enumerated and the buffer is filled. If it reaches its
-// capacity or a byte has a replacement, the buffer is flushed to w.
func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
- // TODO(bradfitz): use io.WriteString with slices of s instead.
- bufsize := 32 << 10
- if len(s) < bufsize {
- bufsize = len(s)
- }
- buf := make([]byte, bufsize)
- bi := buf[:0]
-
+ sw := getStringWriter(w)
+ last := 0
for i := 0; i < len(s); i++ {
b := s[i]
- var new []byte
- if r.old[b>>5]&uint32(1<<(b&31)) != 0 {
- new = r.new[b]
- } else {
- bi = append(bi, b)
- }
- if len(bi) == cap(bi) || (len(bi) > 0 && len(new) > 0) {
- nw, err := w.Write(bi)
- n += nw
- if err != nil {
- return n, err
- }
- bi = buf[:0]
+ if r[b] == nil {
+ continue
}
- if len(new) > 0 {
- nw, err := w.Write(new)
+ if last != i {
+ nw, err := sw.WriteString(s[last:i])
n += nw
if err != nil {
return n, err
}
}
- }
- if len(bi) > 0 {
- nw, err := w.Write(bi)
+ last = i + 1
+ nw, err := w.Write(r[b])
n += nw
if err != nil {
return n, err
}
}
- return n, nil
+ if last != len(s) {
+ var nw int
+ nw, err = sw.WriteString(s[last:])
+ n += nw
+ }
+ return
}
diff --git a/libgo/go/strings/replace_test.go b/libgo/go/strings/replace_test.go
index 82e4b6e..77e48b9 100644
--- a/libgo/go/strings/replace_test.go
+++ b/libgo/go/strings/replace_test.go
@@ -308,20 +308,21 @@ func TestReplacer(t *testing.T) {
}
}
+var algorithmTestCases = []struct {
+ r *Replacer
+ want string
+}{
+ {capitalLetters, "*strings.byteReplacer"},
+ {htmlEscaper, "*strings.byteStringReplacer"},
+ {NewReplacer("12", "123"), "*strings.singleStringReplacer"},
+ {NewReplacer("1", "12"), "*strings.byteStringReplacer"},
+ {NewReplacer("", "X"), "*strings.genericReplacer"},
+ {NewReplacer("a", "1", "b", "12", "cde", "123"), "*strings.genericReplacer"},
+}
+
// TestPickAlgorithm tests that NewReplacer picks the correct algorithm.
func TestPickAlgorithm(t *testing.T) {
- testCases := []struct {
- r *Replacer
- want string
- }{
- {capitalLetters, "*strings.byteReplacer"},
- {htmlEscaper, "*strings.byteStringReplacer"},
- {NewReplacer("12", "123"), "*strings.singleStringReplacer"},
- {NewReplacer("1", "12"), "*strings.byteStringReplacer"},
- {NewReplacer("", "X"), "*strings.genericReplacer"},
- {NewReplacer("a", "1", "b", "12", "cde", "123"), "*strings.genericReplacer"},
- }
- for i, tc := range testCases {
+ for i, tc := range algorithmTestCases {
got := fmt.Sprintf("%T", tc.r.Replacer())
if got != tc.want {
t.Errorf("%d. algorithm = %s, want %s", i, got, tc.want)
@@ -329,6 +330,23 @@ func TestPickAlgorithm(t *testing.T) {
}
}
+type errWriter struct{}
+
+func (errWriter) Write(p []byte) (n int, err error) {
+ return 0, fmt.Errorf("unwritable")
+}
+
+// TestWriteStringError tests that WriteString returns an error
+// received from the underlying io.Writer.
+func TestWriteStringError(t *testing.T) {
+ for i, tc := range algorithmTestCases {
+ n, err := tc.r.WriteString(errWriter{}, "abc")
+ if n != 0 || err == nil || err.Error() != "unwritable" {
+ t.Errorf("%d. WriteStringError = %d, %v, want 0, unwritable", i, n, err)
+ }
+ }
+}
+
// TestGenericTrieBuilding verifies the structure of the generated trie. There
// is one node per line, and the key ending with the current line is in the
// trie if it ends with a "+".
@@ -480,6 +498,24 @@ func BenchmarkHTMLEscapeOld(b *testing.B) {
}
}
+func BenchmarkByteStringReplacerWriteString(b *testing.B) {
+ str := Repeat("I <3 to escape HTML & other text too.", 100)
+ buf := new(bytes.Buffer)
+ for i := 0; i < b.N; i++ {
+ htmlEscaper.WriteString(buf, str)
+ buf.Reset()
+ }
+}
+
+func BenchmarkByteReplacerWriteString(b *testing.B) {
+ str := Repeat("abcdefghijklmnopqrstuvwxyz", 100)
+ buf := new(bytes.Buffer)
+ for i := 0; i < b.N; i++ {
+ capitalLetters.WriteString(buf, str)
+ buf.Reset()
+ }
+}
+
// BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces.
func BenchmarkByteByteReplaces(b *testing.B) {
str := Repeat("a", 100) + Repeat("b", 100)
diff --git a/libgo/go/strings/strings.go b/libgo/go/strings/strings.go
index 5d46211..27d3849 100644
--- a/libgo/go/strings/strings.go
+++ b/libgo/go/strings/strings.go
@@ -43,13 +43,29 @@ func explode(s string, n int) []string {
// primeRK is the prime base used in Rabin-Karp algorithm.
const primeRK = 16777619
-// hashstr returns the hash and the appropriate multiplicative
+// hashStr returns the hash and the appropriate multiplicative
// factor for use in Rabin-Karp algorithm.
-func hashstr(sep string) (uint32, uint32) {
+func hashStr(sep string) (uint32, uint32) {
hash := uint32(0)
for i := 0; i < len(sep); i++ {
hash = hash*primeRK + uint32(sep[i])
+ }
+ var pow, sq uint32 = 1, primeRK
+ for i := len(sep); i > 0; i >>= 1 {
+ if i&1 != 0 {
+ pow *= sq
+ }
+ sq *= sq
+ }
+ return hash, pow
+}
+// hashStrRev returns the hash of the reverse of sep and the
+// appropriate multiplicative factor for use in Rabin-Karp algorithm.
+func hashStrRev(sep string) (uint32, uint32) {
+ hash := uint32(0)
+ for i := len(sep) - 1; i >= 0; i-- {
+ hash = hash*primeRK + uint32(sep[i])
}
var pow, sq uint32 = 1, primeRK
for i := len(sep); i > 0; i >>= 1 {
@@ -85,7 +101,8 @@ func Count(s, sep string) int {
}
return 0
}
- hashsep, pow := hashstr(sep)
+ // Rabin-Karp search
+ hashsep, pow := hashStr(sep)
h := uint32(0)
for i := 0; i < len(sep); i++ {
h = h*primeRK + uint32(s[i])
@@ -139,8 +156,8 @@ func Index(s, sep string) int {
case n > len(s):
return -1
}
- // Hash sep.
- hashsep, pow := hashstr(sep)
+ // Rabin-Karp search
+ hashsep, pow := hashStr(sep)
var h uint32
for i := 0; i < n; i++ {
h = h*primeRK + uint32(s[i])
@@ -163,22 +180,41 @@ func Index(s, sep string) int {
// LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s.
func LastIndex(s, sep string) int {
n := len(sep)
- if n == 0 {
+ switch {
+ case n == 0:
return len(s)
- }
- c := sep[0]
- if n == 1 {
+ case n == 1:
// special case worth making fast
+ c := sep[0]
for i := len(s) - 1; i >= 0; i-- {
if s[i] == c {
return i
}
}
return -1
+ case n == len(s):
+ if sep == s {
+ return 0
+ }
+ return -1
+ case n > len(s):
+ return -1
+ }
+ // Rabin-Karp search from the end of the string
+ hashsep, pow := hashStrRev(sep)
+ last := len(s) - n
+ var h uint32
+ for i := len(s) - 1; i >= last; i-- {
+ h = h*primeRK + uint32(s[i])
+ }
+ if h == hashsep && s[last:] == sep {
+ return last
}
- // n > 1
- for i := len(s) - n; i >= 0; i-- {
- if s[i] == c && s[i:i+n] == sep {
+ for i := last - 1; i >= 0; i-- {
+ h *= primeRK
+ h += uint32(s[i])
+ h -= pow * uint32(s[i+n])
+ if h == hashsep && s[i:i+n] == sep {
return i
}
}
@@ -189,13 +225,8 @@ func LastIndex(s, sep string) int {
// r, or -1 if rune is not present in s.
func IndexRune(s string, r rune) int {
switch {
- case r < 0x80:
- b := byte(r)
- for i := 0; i < len(s); i++ {
- if s[i] == b {
- return i
- }
- }
+ case r < utf8.RuneSelf:
+ return IndexByte(s, byte(r))
default:
for i, c := range s {
if c == r {
@@ -311,6 +342,8 @@ func Fields(s string) []string {
// FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c)
// and returns an array of slices of s. If all code points in s satisfy f(c) or the
// string is empty, an empty slice is returned.
+// FieldsFunc makes no guarantees about the order in which it calls f(c).
+// If f does not return consistent results for a given c, FieldsFunc may crash.
func FieldsFunc(s string, f func(rune) bool) []string {
// First count the fields.
n := 0
@@ -423,9 +456,10 @@ func Map(mapping func(rune) rune, s string) string {
// Repeat returns a new string consisting of count copies of the string s.
func Repeat(s string, count int) string {
b := make([]byte, len(s)*count)
- bp := 0
- for i := 0; i < count; i++ {
- bp += copy(b[bp:], s)
+ bp := copy(b, s)
+ for bp < len(b) {
+ copy(b[bp:], b[:bp])
+ bp *= 2
}
return string(b)
}
@@ -634,6 +668,9 @@ func TrimSuffix(s, suffix string) string {
// Replace returns a copy of the string s with the first n
// non-overlapping instances of old replaced by new.
+// If old is empty, it matches at the beginning of the string
+// and after each UTF-8 sequence, yielding up to k+1 replacements
+// for a k-rune string.
// If n < 0, there is no limit on the number of replacements.
func Replace(s, old, new string, n int) string {
if old == new || n == 0 {
diff --git a/libgo/go/strings/strings_test.go b/libgo/go/strings/strings_test.go
index e40a180..7bb81ef 100644
--- a/libgo/go/strings/strings_test.go
+++ b/libgo/go/strings/strings_test.go
@@ -168,6 +168,15 @@ func BenchmarkIndex(b *testing.B) {
}
}
+func BenchmarkLastIndex(b *testing.B) {
+ if got := Index(benchmarkString, "v"); got != 17 {
+ b.Fatalf("wrong index: expected 17, got=%d", got)
+ }
+ for i := 0; i < b.N; i++ {
+ LastIndex(benchmarkString, "v")
+ }
+}
+
func BenchmarkIndexByte(b *testing.B) {
if got := IndexByte(benchmarkString, 'v'); got != 17 {
b.Fatalf("wrong index: expected 17, got=%d", got)
@@ -1069,8 +1078,11 @@ func makeBenchInputHard() string {
"hello", "world",
}
x := make([]byte, 0, 1<<20)
- for len(x) < 1<<20 {
+ for {
i := rand.Intn(len(tokens))
+ if len(x)+len(tokens[i]) >= 1<<20 {
+ break
+ }
x = append(x, tokens[i]...)
}
return string(x)
@@ -1084,6 +1096,12 @@ func benchmarkIndexHard(b *testing.B, sep string) {
}
}
+func benchmarkLastIndexHard(b *testing.B, sep string) {
+ for i := 0; i < b.N; i++ {
+ LastIndex(benchInputHard, sep)
+ }
+}
+
func benchmarkCountHard(b *testing.B, sep string) {
for i := 0; i < b.N; i++ {
Count(benchInputHard, sep)
@@ -1094,6 +1112,10 @@ func BenchmarkIndexHard1(b *testing.B) { benchmarkIndexHard(b, "<>") }
func BenchmarkIndexHard2(b *testing.B) { benchmarkIndexHard(b, "</pre>") }
func BenchmarkIndexHard3(b *testing.B) { benchmarkIndexHard(b, "<b>hello world</b>") }
+func BenchmarkLastIndexHard1(b *testing.B) { benchmarkLastIndexHard(b, "<>") }
+func BenchmarkLastIndexHard2(b *testing.B) { benchmarkLastIndexHard(b, "</pre>") }
+func BenchmarkLastIndexHard3(b *testing.B) { benchmarkLastIndexHard(b, "<b>hello world</b>") }
+
func BenchmarkCountHard1(b *testing.B) { benchmarkCountHard(b, "<>") }
func BenchmarkCountHard2(b *testing.B) { benchmarkCountHard(b, "</pre>") }
func BenchmarkCountHard3(b *testing.B) { benchmarkCountHard(b, "<b>hello world</b>") }
@@ -1174,3 +1196,9 @@ func BenchmarkSplit3(b *testing.B) {
Split(benchInputHard, "hello")
}
}
+
+func BenchmarkRepeat(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ Repeat("-", 80)
+ }
+}