diff options
author | Ian Lance Taylor <iant@golang.org> | 2020-07-27 22:27:54 -0700 |
---|---|---|
committer | Ian Lance Taylor <iant@golang.org> | 2020-08-01 11:21:40 -0700 |
commit | f75af8c1464e948b5e166cf5ab09ebf0d82fc253 (patch) | |
tree | 3ba3299859b504bdeb477727471216bd094a0191 /libgo/go/strings | |
parent | 75a23e59031fe673fc3b2e60fd1fe5f4c70ecb85 (diff) | |
download | gcc-f75af8c1464e948b5e166cf5ab09ebf0d82fc253.zip gcc-f75af8c1464e948b5e166cf5ab09ebf0d82fc253.tar.gz gcc-f75af8c1464e948b5e166cf5ab09ebf0d82fc253.tar.bz2 |
libgo: update to go1.15rc1
Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/245157
Diffstat (limited to 'libgo/go/strings')
-rw-r--r-- | libgo/go/strings/strings.go | 157 | ||||
-rw-r--r-- | libgo/go/strings/strings_test.go | 58 |
2 files changed, 117 insertions, 98 deletions
diff --git a/libgo/go/strings/strings.go b/libgo/go/strings/strings.go index 238d657..d6f5cea 100644 --- a/libgo/go/strings/strings.go +++ b/libgo/go/strings/strings.go @@ -36,43 +36,6 @@ func explode(s string, n int) []string { return a } -// primeRK is the prime base used in Rabin-Karp algorithm. -const primeRK = 16777619 - -// hashStr returns the hash and the appropriate multiplicative -// factor for use in Rabin-Karp algorithm. -func hashStr(sep string) (uint32, uint32) { - hash := uint32(0) - for i := 0; i < len(sep); i++ { - hash = hash*primeRK + uint32(sep[i]) - } - var pow, sq uint32 = 1, primeRK - for i := len(sep); i > 0; i >>= 1 { - if i&1 != 0 { - pow *= sq - } - sq *= sq - } - return hash, pow -} - -// hashStrRev returns the hash of the reverse of sep and the -// appropriate multiplicative factor for use in Rabin-Karp algorithm. -func hashStrRev(sep string) (uint32, uint32) { - hash := uint32(0) - for i := len(sep) - 1; i >= 0; i-- { - hash = hash*primeRK + uint32(sep[i]) - } - var pow, sq uint32 = 1, primeRK - for i := len(sep); i > 0; i >>= 1 { - if i&1 != 0 { - pow *= sq - } - sq *= sq - } - return hash, pow -} - // Count counts the number of non-overlapping instances of substr in s. // If substr is an empty string, Count returns 1 + the number of Unicode code points in s. func Count(s, substr string) int { @@ -126,17 +89,17 @@ func LastIndex(s, substr string) int { return -1 } // Rabin-Karp search from the end of the string - hashss, pow := hashStrRev(substr) + hashss, pow := bytealg.HashStrRev(substr) last := len(s) - n var h uint32 for i := len(s) - 1; i >= last; i-- { - h = h*primeRK + uint32(s[i]) + h = h*bytealg.PrimeRK + uint32(s[i]) } if h == hashss && s[last:] == substr { return last } for i := last - 1; i >= 0; i-- { - h *= primeRK + h *= bytealg.PrimeRK h += uint32(s[i]) h -= pow * uint32(s[i+n]) if h == hashss && s[i:i+n] == substr { @@ -180,6 +143,14 @@ func IndexAny(s, chars string) int { // Avoid scanning all of s. return -1 } + if len(chars) == 1 { + // Avoid scanning all of s. + r := rune(chars[0]) + if r >= utf8.RuneSelf { + r = utf8.RuneError + } + return IndexRune(s, r) + } if len(s) > 8 { if as, isASCII := makeASCIISet(chars); isASCII { for i := 0; i < len(s); i++ { @@ -191,10 +162,8 @@ func IndexAny(s, chars string) int { } } for i, c := range s { - for _, m := range chars { - if c == m { - return i - } + if IndexRune(chars, c) >= 0 { + return i } } return -1 @@ -208,6 +177,16 @@ func LastIndexAny(s, chars string) int { // Avoid scanning all of s. return -1 } + if len(s) == 1 { + rc := rune(s[0]) + if rc >= utf8.RuneSelf { + rc = utf8.RuneError + } + if IndexRune(chars, rc) >= 0 { + return 0 + } + return -1 + } if len(s) > 8 { if as, isASCII := makeASCIISet(chars); isASCII { for i := len(s) - 1; i >= 0; i-- { @@ -218,13 +197,25 @@ func LastIndexAny(s, chars string) int { return -1 } } + if len(chars) == 1 { + rc := rune(chars[0]) + if rc >= utf8.RuneSelf { + rc = utf8.RuneError + } + for i := len(s); i > 0; { + r, size := utf8.DecodeLastRuneInString(s[:i]) + i -= size + if rc == r { + return i + } + } + return -1 + } for i := len(s); i > 0; { r, size := utf8.DecodeLastRuneInString(s[:i]) i -= size - for _, c := range chars { - if r == c { - return i - } + if IndexRune(chars, r) >= 0 { + return i } } return -1 @@ -378,8 +369,9 @@ func Fields(s string) []string { // FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c) // and returns an array of slices of s. If all code points in s satisfy f(c) or the // string is empty, an empty slice is returned. -// FieldsFunc makes no guarantees about the order in which it calls f(c). -// If f does not return consistent results for a given c, FieldsFunc may crash. +// +// FieldsFunc makes no guarantees about the order in which it calls f(c) +// and assumes that f always returns the same value for a given c. func FieldsFunc(s string, f func(rune) bool) []string { // A span is used to record a slice of s of the form s[start:end]. // The start index is inclusive and the end index is exclusive. @@ -390,25 +382,29 @@ func FieldsFunc(s string, f func(rune) bool) []string { spans := make([]span, 0, 32) // Find the field start and end indices. - wasField := false - fromIndex := 0 - for i, rune := range s { + // Doing this in a separate pass (rather than slicing the string s + // and collecting the result substrings right away) is significantly + // more efficient, possibly due to cache effects. + start := -1 // valid span start if >= 0 + for end, rune := range s { if f(rune) { - if wasField { - spans = append(spans, span{start: fromIndex, end: i}) - wasField = false + if start >= 0 { + spans = append(spans, span{start, end}) + // Set start to a negative value. + // Note: using -1 here consistently and reproducibly + // slows down this code by a several percent on amd64. + start = ^start } } else { - if !wasField { - fromIndex = i - wasField = true + if start < 0 { + start = end } } } // Last field might end at EOF. - if wasField { - spans = append(spans, span{fromIndex, len(s)}) + if start >= 0 { + spans = append(spans, span{start, len(s)}) } // Create strings from recorded field indices. @@ -837,7 +833,7 @@ func makeCutsetFunc(cutset string) func(rune) bool { // Trim returns a slice of the string s with all leading and // trailing Unicode code points contained in cutset removed. -func Trim(s string, cutset string) string { +func Trim(s, cutset string) string { if s == "" || cutset == "" { return s } @@ -848,7 +844,7 @@ func Trim(s string, cutset string) string { // Unicode code points contained in cutset removed. // // To remove a prefix, use TrimPrefix instead. -func TrimLeft(s string, cutset string) string { +func TrimLeft(s, cutset string) string { if s == "" || cutset == "" { return s } @@ -859,7 +855,7 @@ func TrimLeft(s string, cutset string) string { // Unicode code points contained in cutset removed. // // To remove a suffix, use TrimSuffix instead. -func TrimRight(s string, cutset string) string { +func TrimRight(s, cutset string) string { if s == "" || cutset == "" { return s } @@ -1053,11 +1049,11 @@ func Index(s, substr string) int { if s[i] != c0 { // IndexByte is faster than bytealg.IndexString, so use it as long as // we're not getting lots of false positives. - o := IndexByte(s[i:t], c0) + o := IndexByte(s[i+1:t], c0) if o < 0 { return -1 } - i += o + i += o + 1 } if s[i+1] == c1 && s[i:i+n] == substr { return i @@ -1082,11 +1078,11 @@ func Index(s, substr string) int { fails := 0 for i < t { if s[i] != c0 { - o := IndexByte(s[i:t], c0) + o := IndexByte(s[i+1:t], c0) if o < 0 { return -1 } - i += o + i += o + 1 } if s[i+1] == c1 && s[i:i+n] == substr { return i @@ -1095,7 +1091,7 @@ func Index(s, substr string) int { fails++ if fails >= 4+i>>4 && i < t { // See comment in ../bytes/bytes.go. - j := indexRabinKarp(s[i:], substr) + j := bytealg.IndexRabinKarp(s[i:], substr) if j < 0 { return -1 } @@ -1104,26 +1100,3 @@ func Index(s, substr string) int { } return -1 } - -func indexRabinKarp(s, substr string) int { - // Rabin-Karp search - hashss, pow := hashStr(substr) - n := len(substr) - var h uint32 - for i := 0; i < n; i++ { - h = h*primeRK + uint32(s[i]) - } - if h == hashss && s[:n] == substr { - return 0 - } - for i := n; i < len(s); { - h *= primeRK - h += uint32(s[i]) - h -= pow * uint32(s[i-n]) - i++ - if h == hashss && s[i-n:i] == substr { - return i - n - } - } - return -1 -} diff --git a/libgo/go/strings/strings_test.go b/libgo/go/strings/strings_test.go index 7eddce5..095e482 100644 --- a/libgo/go/strings/strings_test.go +++ b/libgo/go/strings/strings_test.go @@ -154,6 +154,7 @@ var indexAnyTests = []IndexTest{ {"", "abc", -1}, {"a", "", -1}, {"a", "a", 0}, + {"\x80", "\xffb", 0}, {"aaa", "a", 0}, {"abc", "xyz", -1}, {"abc", "xcz", 2}, @@ -164,6 +165,7 @@ var indexAnyTests = []IndexTest{ {dots + dots + dots, " ", -1}, {"012abcba210", "\xffb", 4}, {"012\x80bcb\x80210", "\xffb", 3}, + {"0123456\xcf\x80abc", "\xcfb\x80", 10}, } var lastIndexAnyTests = []IndexTest{ @@ -172,6 +174,7 @@ var lastIndexAnyTests = []IndexTest{ {"", "abc", -1}, {"a", "", -1}, {"a", "a", 0}, + {"\x80", "\xffb", 0}, {"aaa", "a", 2}, {"abc", "xyz", -1}, {"abc", "ab", 1}, @@ -182,6 +185,7 @@ var lastIndexAnyTests = []IndexTest{ {dots + dots + dots, " ", -1}, {"012abcba210", "\xffb", 6}, {"012\x80bcb\x80210", "\xffb", 7}, + {"0123456\xcf\x80abc", "\xcfb\x80", 10}, } // Execute f on each test case. funcName should be the name of f; it's used @@ -682,8 +686,8 @@ func TestMap(t *testing.T) { } return r } - s := string(utf8.RuneSelf) + string(utf8.MaxRune) - r := string(utf8.MaxRune) + string(utf8.RuneSelf) // reverse of s + s := string(rune(utf8.RuneSelf)) + string(utf8.MaxRune) + r := string(utf8.MaxRune) + string(rune(utf8.RuneSelf)) // reverse of s m = Map(encode, s) if m != r { t.Errorf("encoding not handled correctly: expected %q got %q", r, m) @@ -1791,10 +1795,24 @@ func BenchmarkRepeat(b *testing.B) { } func BenchmarkIndexAnyASCII(b *testing.B) { - x := Repeat("#", 4096) // Never matches set - cs := "0123456789abcdef" - for k := 1; k <= 4096; k <<= 4 { - for j := 1; j <= 16; j <<= 1 { + x := Repeat("#", 2048) // Never matches set + cs := "0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz" + for k := 1; k <= 2048; k <<= 4 { + for j := 1; j <= 64; j <<= 1 { + b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) { + for i := 0; i < b.N; i++ { + IndexAny(x[:k], cs[:j]) + } + }) + } + } +} + +func BenchmarkIndexAnyUTF8(b *testing.B) { + x := Repeat("#", 2048) // Never matches set + cs := "你好世界, hello world. 你好世界, hello world. 你好世界, hello world." + for k := 1; k <= 2048; k <<= 4 { + for j := 1; j <= 64; j <<= 1 { b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) { for i := 0; i < b.N; i++ { IndexAny(x[:k], cs[:j]) @@ -1804,6 +1822,34 @@ func BenchmarkIndexAnyASCII(b *testing.B) { } } +func BenchmarkLastIndexAnyASCII(b *testing.B) { + x := Repeat("#", 2048) // Never matches set + cs := "0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz" + for k := 1; k <= 2048; k <<= 4 { + for j := 1; j <= 64; j <<= 1 { + b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) { + for i := 0; i < b.N; i++ { + LastIndexAny(x[:k], cs[:j]) + } + }) + } + } +} + +func BenchmarkLastIndexAnyUTF8(b *testing.B) { + x := Repeat("#", 2048) // Never matches set + cs := "你好世界, hello world. 你好世界, hello world. 你好世界, hello world." + for k := 1; k <= 2048; k <<= 4 { + for j := 1; j <= 64; j <<= 1 { + b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) { + for i := 0; i < b.N; i++ { + LastIndexAny(x[:k], cs[:j]) + } + }) + } + } +} + func BenchmarkTrimASCII(b *testing.B) { cs := "0123456789abcdef" for k := 1; k <= 4096; k <<= 4 { |