diff options
author | Ian Lance Taylor <iant@golang.org> | 2020-07-27 22:27:54 -0700 |
---|---|---|
committer | Ian Lance Taylor <iant@golang.org> | 2020-08-01 11:21:40 -0700 |
commit | f75af8c1464e948b5e166cf5ab09ebf0d82fc253 (patch) | |
tree | 3ba3299859b504bdeb477727471216bd094a0191 /libgo/go/strings/strings.go | |
parent | 75a23e59031fe673fc3b2e60fd1fe5f4c70ecb85 (diff) | |
download | gcc-f75af8c1464e948b5e166cf5ab09ebf0d82fc253.zip gcc-f75af8c1464e948b5e166cf5ab09ebf0d82fc253.tar.gz gcc-f75af8c1464e948b5e166cf5ab09ebf0d82fc253.tar.bz2 |
libgo: update to go1.15rc1
Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/245157
Diffstat (limited to 'libgo/go/strings/strings.go')
-rw-r--r-- | libgo/go/strings/strings.go | 157 |
1 files changed, 65 insertions, 92 deletions
diff --git a/libgo/go/strings/strings.go b/libgo/go/strings/strings.go index 238d657..d6f5cea 100644 --- a/libgo/go/strings/strings.go +++ b/libgo/go/strings/strings.go @@ -36,43 +36,6 @@ func explode(s string, n int) []string { return a } -// primeRK is the prime base used in Rabin-Karp algorithm. -const primeRK = 16777619 - -// hashStr returns the hash and the appropriate multiplicative -// factor for use in Rabin-Karp algorithm. -func hashStr(sep string) (uint32, uint32) { - hash := uint32(0) - for i := 0; i < len(sep); i++ { - hash = hash*primeRK + uint32(sep[i]) - } - var pow, sq uint32 = 1, primeRK - for i := len(sep); i > 0; i >>= 1 { - if i&1 != 0 { - pow *= sq - } - sq *= sq - } - return hash, pow -} - -// hashStrRev returns the hash of the reverse of sep and the -// appropriate multiplicative factor for use in Rabin-Karp algorithm. -func hashStrRev(sep string) (uint32, uint32) { - hash := uint32(0) - for i := len(sep) - 1; i >= 0; i-- { - hash = hash*primeRK + uint32(sep[i]) - } - var pow, sq uint32 = 1, primeRK - for i := len(sep); i > 0; i >>= 1 { - if i&1 != 0 { - pow *= sq - } - sq *= sq - } - return hash, pow -} - // Count counts the number of non-overlapping instances of substr in s. // If substr is an empty string, Count returns 1 + the number of Unicode code points in s. func Count(s, substr string) int { @@ -126,17 +89,17 @@ func LastIndex(s, substr string) int { return -1 } // Rabin-Karp search from the end of the string - hashss, pow := hashStrRev(substr) + hashss, pow := bytealg.HashStrRev(substr) last := len(s) - n var h uint32 for i := len(s) - 1; i >= last; i-- { - h = h*primeRK + uint32(s[i]) + h = h*bytealg.PrimeRK + uint32(s[i]) } if h == hashss && s[last:] == substr { return last } for i := last - 1; i >= 0; i-- { - h *= primeRK + h *= bytealg.PrimeRK h += uint32(s[i]) h -= pow * uint32(s[i+n]) if h == hashss && s[i:i+n] == substr { @@ -180,6 +143,14 @@ func IndexAny(s, chars string) int { // Avoid scanning all of s. return -1 } + if len(chars) == 1 { + // Avoid scanning all of s. + r := rune(chars[0]) + if r >= utf8.RuneSelf { + r = utf8.RuneError + } + return IndexRune(s, r) + } if len(s) > 8 { if as, isASCII := makeASCIISet(chars); isASCII { for i := 0; i < len(s); i++ { @@ -191,10 +162,8 @@ func IndexAny(s, chars string) int { } } for i, c := range s { - for _, m := range chars { - if c == m { - return i - } + if IndexRune(chars, c) >= 0 { + return i } } return -1 @@ -208,6 +177,16 @@ func LastIndexAny(s, chars string) int { // Avoid scanning all of s. return -1 } + if len(s) == 1 { + rc := rune(s[0]) + if rc >= utf8.RuneSelf { + rc = utf8.RuneError + } + if IndexRune(chars, rc) >= 0 { + return 0 + } + return -1 + } if len(s) > 8 { if as, isASCII := makeASCIISet(chars); isASCII { for i := len(s) - 1; i >= 0; i-- { @@ -218,13 +197,25 @@ func LastIndexAny(s, chars string) int { return -1 } } + if len(chars) == 1 { + rc := rune(chars[0]) + if rc >= utf8.RuneSelf { + rc = utf8.RuneError + } + for i := len(s); i > 0; { + r, size := utf8.DecodeLastRuneInString(s[:i]) + i -= size + if rc == r { + return i + } + } + return -1 + } for i := len(s); i > 0; { r, size := utf8.DecodeLastRuneInString(s[:i]) i -= size - for _, c := range chars { - if r == c { - return i - } + if IndexRune(chars, r) >= 0 { + return i } } return -1 @@ -378,8 +369,9 @@ func Fields(s string) []string { // FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c) // and returns an array of slices of s. If all code points in s satisfy f(c) or the // string is empty, an empty slice is returned. -// FieldsFunc makes no guarantees about the order in which it calls f(c). -// If f does not return consistent results for a given c, FieldsFunc may crash. +// +// FieldsFunc makes no guarantees about the order in which it calls f(c) +// and assumes that f always returns the same value for a given c. func FieldsFunc(s string, f func(rune) bool) []string { // A span is used to record a slice of s of the form s[start:end]. // The start index is inclusive and the end index is exclusive. @@ -390,25 +382,29 @@ func FieldsFunc(s string, f func(rune) bool) []string { spans := make([]span, 0, 32) // Find the field start and end indices. - wasField := false - fromIndex := 0 - for i, rune := range s { + // Doing this in a separate pass (rather than slicing the string s + // and collecting the result substrings right away) is significantly + // more efficient, possibly due to cache effects. + start := -1 // valid span start if >= 0 + for end, rune := range s { if f(rune) { - if wasField { - spans = append(spans, span{start: fromIndex, end: i}) - wasField = false + if start >= 0 { + spans = append(spans, span{start, end}) + // Set start to a negative value. + // Note: using -1 here consistently and reproducibly + // slows down this code by a several percent on amd64. + start = ^start } } else { - if !wasField { - fromIndex = i - wasField = true + if start < 0 { + start = end } } } // Last field might end at EOF. - if wasField { - spans = append(spans, span{fromIndex, len(s)}) + if start >= 0 { + spans = append(spans, span{start, len(s)}) } // Create strings from recorded field indices. @@ -837,7 +833,7 @@ func makeCutsetFunc(cutset string) func(rune) bool { // Trim returns a slice of the string s with all leading and // trailing Unicode code points contained in cutset removed. -func Trim(s string, cutset string) string { +func Trim(s, cutset string) string { if s == "" || cutset == "" { return s } @@ -848,7 +844,7 @@ func Trim(s string, cutset string) string { // Unicode code points contained in cutset removed. // // To remove a prefix, use TrimPrefix instead. -func TrimLeft(s string, cutset string) string { +func TrimLeft(s, cutset string) string { if s == "" || cutset == "" { return s } @@ -859,7 +855,7 @@ func TrimLeft(s string, cutset string) string { // Unicode code points contained in cutset removed. // // To remove a suffix, use TrimSuffix instead. -func TrimRight(s string, cutset string) string { +func TrimRight(s, cutset string) string { if s == "" || cutset == "" { return s } @@ -1053,11 +1049,11 @@ func Index(s, substr string) int { if s[i] != c0 { // IndexByte is faster than bytealg.IndexString, so use it as long as // we're not getting lots of false positives. - o := IndexByte(s[i:t], c0) + o := IndexByte(s[i+1:t], c0) if o < 0 { return -1 } - i += o + i += o + 1 } if s[i+1] == c1 && s[i:i+n] == substr { return i @@ -1082,11 +1078,11 @@ func Index(s, substr string) int { fails := 0 for i < t { if s[i] != c0 { - o := IndexByte(s[i:t], c0) + o := IndexByte(s[i+1:t], c0) if o < 0 { return -1 } - i += o + i += o + 1 } if s[i+1] == c1 && s[i:i+n] == substr { return i @@ -1095,7 +1091,7 @@ func Index(s, substr string) int { fails++ if fails >= 4+i>>4 && i < t { // See comment in ../bytes/bytes.go. - j := indexRabinKarp(s[i:], substr) + j := bytealg.IndexRabinKarp(s[i:], substr) if j < 0 { return -1 } @@ -1104,26 +1100,3 @@ func Index(s, substr string) int { } return -1 } - -func indexRabinKarp(s, substr string) int { - // Rabin-Karp search - hashss, pow := hashStr(substr) - n := len(substr) - var h uint32 - for i := 0; i < n; i++ { - h = h*primeRK + uint32(s[i]) - } - if h == hashss && s[:n] == substr { - return 0 - } - for i := n; i < len(s); { - h *= primeRK - h += uint32(s[i]) - h -= pow * uint32(s[i-n]) - i++ - if h == hashss && s[i-n:i] == substr { - return i - n - } - } - return -1 -} |