aboutsummaryrefslogtreecommitdiff
path: root/libgo/go/strings/strings.go
diff options
context:
space:
mode:
authorIan Lance Taylor <iant@golang.org>2020-07-27 22:27:54 -0700
committerIan Lance Taylor <iant@golang.org>2020-08-01 11:21:40 -0700
commitf75af8c1464e948b5e166cf5ab09ebf0d82fc253 (patch)
tree3ba3299859b504bdeb477727471216bd094a0191 /libgo/go/strings/strings.go
parent75a23e59031fe673fc3b2e60fd1fe5f4c70ecb85 (diff)
downloadgcc-f75af8c1464e948b5e166cf5ab09ebf0d82fc253.zip
gcc-f75af8c1464e948b5e166cf5ab09ebf0d82fc253.tar.gz
gcc-f75af8c1464e948b5e166cf5ab09ebf0d82fc253.tar.bz2
libgo: update to go1.15rc1
Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/245157
Diffstat (limited to 'libgo/go/strings/strings.go')
-rw-r--r--libgo/go/strings/strings.go157
1 files changed, 65 insertions, 92 deletions
diff --git a/libgo/go/strings/strings.go b/libgo/go/strings/strings.go
index 238d657..d6f5cea 100644
--- a/libgo/go/strings/strings.go
+++ b/libgo/go/strings/strings.go
@@ -36,43 +36,6 @@ func explode(s string, n int) []string {
return a
}
-// primeRK is the prime base used in Rabin-Karp algorithm.
-const primeRK = 16777619
-
-// hashStr returns the hash and the appropriate multiplicative
-// factor for use in Rabin-Karp algorithm.
-func hashStr(sep string) (uint32, uint32) {
- hash := uint32(0)
- for i := 0; i < len(sep); i++ {
- hash = hash*primeRK + uint32(sep[i])
- }
- var pow, sq uint32 = 1, primeRK
- for i := len(sep); i > 0; i >>= 1 {
- if i&1 != 0 {
- pow *= sq
- }
- sq *= sq
- }
- return hash, pow
-}
-
-// hashStrRev returns the hash of the reverse of sep and the
-// appropriate multiplicative factor for use in Rabin-Karp algorithm.
-func hashStrRev(sep string) (uint32, uint32) {
- hash := uint32(0)
- for i := len(sep) - 1; i >= 0; i-- {
- hash = hash*primeRK + uint32(sep[i])
- }
- var pow, sq uint32 = 1, primeRK
- for i := len(sep); i > 0; i >>= 1 {
- if i&1 != 0 {
- pow *= sq
- }
- sq *= sq
- }
- return hash, pow
-}
-
// Count counts the number of non-overlapping instances of substr in s.
// If substr is an empty string, Count returns 1 + the number of Unicode code points in s.
func Count(s, substr string) int {
@@ -126,17 +89,17 @@ func LastIndex(s, substr string) int {
return -1
}
// Rabin-Karp search from the end of the string
- hashss, pow := hashStrRev(substr)
+ hashss, pow := bytealg.HashStrRev(substr)
last := len(s) - n
var h uint32
for i := len(s) - 1; i >= last; i-- {
- h = h*primeRK + uint32(s[i])
+ h = h*bytealg.PrimeRK + uint32(s[i])
}
if h == hashss && s[last:] == substr {
return last
}
for i := last - 1; i >= 0; i-- {
- h *= primeRK
+ h *= bytealg.PrimeRK
h += uint32(s[i])
h -= pow * uint32(s[i+n])
if h == hashss && s[i:i+n] == substr {
@@ -180,6 +143,14 @@ func IndexAny(s, chars string) int {
// Avoid scanning all of s.
return -1
}
+ if len(chars) == 1 {
+ // Avoid scanning all of s.
+ r := rune(chars[0])
+ if r >= utf8.RuneSelf {
+ r = utf8.RuneError
+ }
+ return IndexRune(s, r)
+ }
if len(s) > 8 {
if as, isASCII := makeASCIISet(chars); isASCII {
for i := 0; i < len(s); i++ {
@@ -191,10 +162,8 @@ func IndexAny(s, chars string) int {
}
}
for i, c := range s {
- for _, m := range chars {
- if c == m {
- return i
- }
+ if IndexRune(chars, c) >= 0 {
+ return i
}
}
return -1
@@ -208,6 +177,16 @@ func LastIndexAny(s, chars string) int {
// Avoid scanning all of s.
return -1
}
+ if len(s) == 1 {
+ rc := rune(s[0])
+ if rc >= utf8.RuneSelf {
+ rc = utf8.RuneError
+ }
+ if IndexRune(chars, rc) >= 0 {
+ return 0
+ }
+ return -1
+ }
if len(s) > 8 {
if as, isASCII := makeASCIISet(chars); isASCII {
for i := len(s) - 1; i >= 0; i-- {
@@ -218,13 +197,25 @@ func LastIndexAny(s, chars string) int {
return -1
}
}
+ if len(chars) == 1 {
+ rc := rune(chars[0])
+ if rc >= utf8.RuneSelf {
+ rc = utf8.RuneError
+ }
+ for i := len(s); i > 0; {
+ r, size := utf8.DecodeLastRuneInString(s[:i])
+ i -= size
+ if rc == r {
+ return i
+ }
+ }
+ return -1
+ }
for i := len(s); i > 0; {
r, size := utf8.DecodeLastRuneInString(s[:i])
i -= size
- for _, c := range chars {
- if r == c {
- return i
- }
+ if IndexRune(chars, r) >= 0 {
+ return i
}
}
return -1
@@ -378,8 +369,9 @@ func Fields(s string) []string {
// FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c)
// and returns an array of slices of s. If all code points in s satisfy f(c) or the
// string is empty, an empty slice is returned.
-// FieldsFunc makes no guarantees about the order in which it calls f(c).
-// If f does not return consistent results for a given c, FieldsFunc may crash.
+//
+// FieldsFunc makes no guarantees about the order in which it calls f(c)
+// and assumes that f always returns the same value for a given c.
func FieldsFunc(s string, f func(rune) bool) []string {
// A span is used to record a slice of s of the form s[start:end].
// The start index is inclusive and the end index is exclusive.
@@ -390,25 +382,29 @@ func FieldsFunc(s string, f func(rune) bool) []string {
spans := make([]span, 0, 32)
// Find the field start and end indices.
- wasField := false
- fromIndex := 0
- for i, rune := range s {
+ // Doing this in a separate pass (rather than slicing the string s
+ // and collecting the result substrings right away) is significantly
+ // more efficient, possibly due to cache effects.
+ start := -1 // valid span start if >= 0
+ for end, rune := range s {
if f(rune) {
- if wasField {
- spans = append(spans, span{start: fromIndex, end: i})
- wasField = false
+ if start >= 0 {
+ spans = append(spans, span{start, end})
+ // Set start to a negative value.
+ // Note: using -1 here consistently and reproducibly
+ // slows down this code by a several percent on amd64.
+ start = ^start
}
} else {
- if !wasField {
- fromIndex = i
- wasField = true
+ if start < 0 {
+ start = end
}
}
}
// Last field might end at EOF.
- if wasField {
- spans = append(spans, span{fromIndex, len(s)})
+ if start >= 0 {
+ spans = append(spans, span{start, len(s)})
}
// Create strings from recorded field indices.
@@ -837,7 +833,7 @@ func makeCutsetFunc(cutset string) func(rune) bool {
// Trim returns a slice of the string s with all leading and
// trailing Unicode code points contained in cutset removed.
-func Trim(s string, cutset string) string {
+func Trim(s, cutset string) string {
if s == "" || cutset == "" {
return s
}
@@ -848,7 +844,7 @@ func Trim(s string, cutset string) string {
// Unicode code points contained in cutset removed.
//
// To remove a prefix, use TrimPrefix instead.
-func TrimLeft(s string, cutset string) string {
+func TrimLeft(s, cutset string) string {
if s == "" || cutset == "" {
return s
}
@@ -859,7 +855,7 @@ func TrimLeft(s string, cutset string) string {
// Unicode code points contained in cutset removed.
//
// To remove a suffix, use TrimSuffix instead.
-func TrimRight(s string, cutset string) string {
+func TrimRight(s, cutset string) string {
if s == "" || cutset == "" {
return s
}
@@ -1053,11 +1049,11 @@ func Index(s, substr string) int {
if s[i] != c0 {
// IndexByte is faster than bytealg.IndexString, so use it as long as
// we're not getting lots of false positives.
- o := IndexByte(s[i:t], c0)
+ o := IndexByte(s[i+1:t], c0)
if o < 0 {
return -1
}
- i += o
+ i += o + 1
}
if s[i+1] == c1 && s[i:i+n] == substr {
return i
@@ -1082,11 +1078,11 @@ func Index(s, substr string) int {
fails := 0
for i < t {
if s[i] != c0 {
- o := IndexByte(s[i:t], c0)
+ o := IndexByte(s[i+1:t], c0)
if o < 0 {
return -1
}
- i += o
+ i += o + 1
}
if s[i+1] == c1 && s[i:i+n] == substr {
return i
@@ -1095,7 +1091,7 @@ func Index(s, substr string) int {
fails++
if fails >= 4+i>>4 && i < t {
// See comment in ../bytes/bytes.go.
- j := indexRabinKarp(s[i:], substr)
+ j := bytealg.IndexRabinKarp(s[i:], substr)
if j < 0 {
return -1
}
@@ -1104,26 +1100,3 @@ func Index(s, substr string) int {
}
return -1
}
-
-func indexRabinKarp(s, substr string) int {
- // Rabin-Karp search
- hashss, pow := hashStr(substr)
- n := len(substr)
- var h uint32
- for i := 0; i < n; i++ {
- h = h*primeRK + uint32(s[i])
- }
- if h == hashss && s[:n] == substr {
- return 0
- }
- for i := n; i < len(s); {
- h *= primeRK
- h += uint32(s[i])
- h -= pow * uint32(s[i-n])
- i++
- if h == hashss && s[i-n:i] == substr {
- return i - n
- }
- }
- return -1
-}