diff options
Diffstat (limited to 'libgo/go/strings/strings.go')
-rw-r--r-- | libgo/go/strings/strings.go | 163 |
1 files changed, 108 insertions, 55 deletions
diff --git a/libgo/go/strings/strings.go b/libgo/go/strings/strings.go index 919e8c8..60a281a 100644 --- a/libgo/go/strings/strings.go +++ b/libgo/go/strings/strings.go @@ -77,48 +77,18 @@ func hashStrRev(sep string) (uint32, uint32) { func Count(s, sep string) int { n := 0 // special cases - switch { - case len(sep) == 0: + if len(sep) == 0 { return utf8.RuneCountInString(s) + 1 - case len(sep) == 1: - // special case worth making fast - c := sep[0] - for i := 0; i < len(s); i++ { - if s[i] == c { - n++ - } - } - return n - case len(sep) > len(s): - return 0 - case len(sep) == len(s): - if sep == s { - return 1 - } - return 0 } - // Rabin-Karp search - hashsep, pow := hashStr(sep) - h := uint32(0) - for i := 0; i < len(sep); i++ { - h = h*primeRK + uint32(s[i]) - } - lastmatch := 0 - if h == hashsep && s[:len(sep)] == sep { - n++ - lastmatch = len(sep) - } - for i := len(sep); i < len(s); { - h *= primeRK - h += uint32(s[i]) - h -= pow * uint32(s[i-len(sep)]) - i++ - if h == hashsep && lastmatch <= i-len(sep) && s[i-len(sep):i] == sep { - n++ - lastmatch = i + offset := 0 + for { + i := Index(s[offset:], sep) + if i == -1 { + return n } + n++ + offset += i + len(sep) } - return n } // Contains reports whether substr is within s. @@ -175,24 +145,40 @@ func LastIndex(s, sep string) int { // IndexRune returns the index of the first instance of the Unicode code point // r, or -1 if rune is not present in s. +// If r is utf8.RuneError, it returns the first instance of any +// invalid UTF-8 byte sequence. func IndexRune(s string, r rune) int { switch { - case r < utf8.RuneSelf: + case 0 <= r && r < utf8.RuneSelf: return IndexByte(s, byte(r)) - default: - for i, c := range s { - if c == r { + case r == utf8.RuneError: + for i, r := range s { + if r == utf8.RuneError { return i } } + return -1 + case !utf8.ValidRune(r): + return -1 + default: + return Index(s, string(r)) } - return -1 } // IndexAny returns the index of the first instance of any Unicode code point // from chars in s, or -1 if no Unicode code point from chars is present in s. func IndexAny(s, chars string) int { if len(chars) > 0 { + if len(s) > 8 { + if as, isASCII := makeASCIISet(chars); isASCII { + for i := 0; i < len(s); i++ { + if as.contains(s[i]) { + return i + } + } + return -1 + } + } for i, c := range s { for _, m := range chars { if c == m { @@ -209,11 +195,21 @@ func IndexAny(s, chars string) int { // present in s. func LastIndexAny(s, chars string) int { if len(chars) > 0 { + if len(s) > 8 { + if as, isASCII := makeASCIISet(chars); isASCII { + for i := len(s) - 1; i >= 0; i-- { + if as.contains(s[i]) { + return i + } + } + return -1 + } + } for i := len(s); i > 0; { - rune, size := utf8.DecodeLastRuneInString(s[0:i]) + r, size := utf8.DecodeLastRuneInString(s[:i]) i -= size - for _, m := range chars { - if rune == m { + for _, c := range chars { + if r == c { return i } } @@ -342,11 +338,19 @@ func FieldsFunc(s string, f func(rune) bool) []string { // Join concatenates the elements of a to create a single string. The separator string // sep is placed between elements in the resulting string. func Join(a []string, sep string) string { - if len(a) == 0 { + switch len(a) { + case 0: return "" - } - if len(a) == 1 { + case 1: return a[0] + case 2: + // Special case for common small values. + // Remove if golang.org/issue/6714 is fixed + return a[0] + sep + a[1] + case 3: + // Special case for common small values. + // Remove if golang.org/issue/6714 is fixed + return a[0] + sep + a[1] + sep + a[2] } n := len(sep) * (len(a) - 1) for i := 0; i < len(a); i++ { @@ -416,7 +420,20 @@ func Map(mapping func(rune) rune, s string) string { } // Repeat returns a new string consisting of count copies of the string s. +// +// It panics if count is negative or if +// the result of (len(s) * count) overflows. func Repeat(s string, count int) string { + // Since we cannot return an error on overflow, + // we should panic if the repeat will generate + // an overflow. + // See Issue golang.org/issue/16237 + if count < 0 { + panic("strings: negative Repeat count") + } else if count > 0 && len(s)*count/count != len(s) { + panic("strings: Repeat count causes overflow") + } + b := make([]byte, len(s)*count) bp := copy(b, s) for bp < len(b) { @@ -437,20 +454,20 @@ func ToTitle(s string) string { return Map(unicode.ToTitle, s) } // ToUpperSpecial returns a copy of the string s with all Unicode letters mapped to their // upper case, giving priority to the special casing rules. -func ToUpperSpecial(_case unicode.SpecialCase, s string) string { - return Map(func(r rune) rune { return _case.ToUpper(r) }, s) +func ToUpperSpecial(c unicode.SpecialCase, s string) string { + return Map(func(r rune) rune { return c.ToUpper(r) }, s) } // ToLowerSpecial returns a copy of the string s with all Unicode letters mapped to their // lower case, giving priority to the special casing rules. -func ToLowerSpecial(_case unicode.SpecialCase, s string) string { - return Map(func(r rune) rune { return _case.ToLower(r) }, s) +func ToLowerSpecial(c unicode.SpecialCase, s string) string { + return Map(func(r rune) rune { return c.ToLower(r) }, s) } // ToTitleSpecial returns a copy of the string s with all Unicode letters mapped to their // title case, giving priority to the special casing rules. -func ToTitleSpecial(_case unicode.SpecialCase, s string) string { - return Map(func(r rune) rune { return _case.ToTitle(r) }, s) +func ToTitleSpecial(c unicode.SpecialCase, s string) string { + return Map(func(r rune) rune { return c.ToTitle(r) }, s) } // isSeparator reports whether the rune could mark a word boundary. @@ -573,7 +590,43 @@ func lastIndexFunc(s string, f func(rune) bool, truth bool) int { return -1 } +// asciiSet is a 32-byte value, where each bit represents the presence of a +// given ASCII character in the set. The 128-bits of the lower 16 bytes, +// starting with the least-significant bit of the lowest word to the +// most-significant bit of the highest word, map to the full range of all +// 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed, +// ensuring that any non-ASCII character will be reported as not in the set. +type asciiSet [8]uint32 + +// makeASCIISet creates a set of ASCII characters and reports whether all +// characters in chars are ASCII. +func makeASCIISet(chars string) (as asciiSet, ok bool) { + for i := 0; i < len(chars); i++ { + c := chars[i] + if c >= utf8.RuneSelf { + return as, false + } + as[c>>5] |= 1 << uint(c&31) + } + return as, true +} + +// contains reports whether c is inside the set. +func (as *asciiSet) contains(c byte) bool { + return (as[c>>5] & (1 << uint(c&31))) != 0 +} + func makeCutsetFunc(cutset string) func(rune) bool { + if len(cutset) == 1 && cutset[0] < utf8.RuneSelf { + return func(r rune) bool { + return r == rune(cutset[0]) + } + } + if as, isASCII := makeASCIISet(cutset); isASCII { + return func(r rune) bool { + return r < utf8.RuneSelf && as.contains(byte(r)) + } + } return func(r rune) bool { return IndexRune(cutset, r) >= 0 } } |