aboutsummaryrefslogtreecommitdiff
path: root/libgo/go/strings
diff options
context:
space:
mode:
authorIan Lance Taylor <iant@golang.org>2017-01-14 00:05:42 +0000
committerIan Lance Taylor <ian@gcc.gnu.org>2017-01-14 00:05:42 +0000
commitc2047754c300b68c05d65faa8dc2925fe67b71b4 (patch)
treee183ae81a1f48a02945cb6de463a70c5be1b06f6 /libgo/go/strings
parent829afb8f05602bb31c9c597b24df7377fed4f059 (diff)
downloadgcc-c2047754c300b68c05d65faa8dc2925fe67b71b4.zip
gcc-c2047754c300b68c05d65faa8dc2925fe67b71b4.tar.gz
gcc-c2047754c300b68c05d65faa8dc2925fe67b71b4.tar.bz2
libgo: update to Go 1.8 release candidate 1
Compiler changes: * Change map assignment to use mapassign and assign value directly. * Change string iteration to use decoderune, faster for ASCII strings. * Change makeslice to take int, and use makeslice64 for larger values. * Add new noverflow field to hmap struct used for maps. Unresolved problems, to be fixed later: * Commented out test in go/types/sizes_test.go that doesn't compile. * Commented out reflect.TestStructOf test for padding after zero-sized field. Reviewed-on: https://go-review.googlesource.com/35231 gotools/: Updates for Go 1.8rc1. * Makefile.am (go_cmd_go_files): Add bug.go. (s-zdefaultcc): Write defaultPkgConfig. * Makefile.in: Rebuild. From-SVN: r244456
Diffstat (limited to 'libgo/go/strings')
-rw-r--r--libgo/go/strings/strings.go163
-rw-r--r--libgo/go/strings/strings_amd64.go52
-rw-r--r--libgo/go/strings/strings_generic.go2
-rw-r--r--libgo/go/strings/strings_s390x.go100
-rw-r--r--libgo/go/strings/strings_test.go182
5 files changed, 425 insertions, 74 deletions
diff --git a/libgo/go/strings/strings.go b/libgo/go/strings/strings.go
index 919e8c8..60a281a 100644
--- a/libgo/go/strings/strings.go
+++ b/libgo/go/strings/strings.go
@@ -77,48 +77,18 @@ func hashStrRev(sep string) (uint32, uint32) {
func Count(s, sep string) int {
n := 0
// special cases
- switch {
- case len(sep) == 0:
+ if len(sep) == 0 {
return utf8.RuneCountInString(s) + 1
- case len(sep) == 1:
- // special case worth making fast
- c := sep[0]
- for i := 0; i < len(s); i++ {
- if s[i] == c {
- n++
- }
- }
- return n
- case len(sep) > len(s):
- return 0
- case len(sep) == len(s):
- if sep == s {
- return 1
- }
- return 0
}
- // Rabin-Karp search
- hashsep, pow := hashStr(sep)
- h := uint32(0)
- for i := 0; i < len(sep); i++ {
- h = h*primeRK + uint32(s[i])
- }
- lastmatch := 0
- if h == hashsep && s[:len(sep)] == sep {
- n++
- lastmatch = len(sep)
- }
- for i := len(sep); i < len(s); {
- h *= primeRK
- h += uint32(s[i])
- h -= pow * uint32(s[i-len(sep)])
- i++
- if h == hashsep && lastmatch <= i-len(sep) && s[i-len(sep):i] == sep {
- n++
- lastmatch = i
+ offset := 0
+ for {
+ i := Index(s[offset:], sep)
+ if i == -1 {
+ return n
}
+ n++
+ offset += i + len(sep)
}
- return n
}
// Contains reports whether substr is within s.
@@ -175,24 +145,40 @@ func LastIndex(s, sep string) int {
// IndexRune returns the index of the first instance of the Unicode code point
// r, or -1 if rune is not present in s.
+// If r is utf8.RuneError, it returns the first instance of any
+// invalid UTF-8 byte sequence.
func IndexRune(s string, r rune) int {
switch {
- case r < utf8.RuneSelf:
+ case 0 <= r && r < utf8.RuneSelf:
return IndexByte(s, byte(r))
- default:
- for i, c := range s {
- if c == r {
+ case r == utf8.RuneError:
+ for i, r := range s {
+ if r == utf8.RuneError {
return i
}
}
+ return -1
+ case !utf8.ValidRune(r):
+ return -1
+ default:
+ return Index(s, string(r))
}
- return -1
}
// IndexAny returns the index of the first instance of any Unicode code point
// from chars in s, or -1 if no Unicode code point from chars is present in s.
func IndexAny(s, chars string) int {
if len(chars) > 0 {
+ if len(s) > 8 {
+ if as, isASCII := makeASCIISet(chars); isASCII {
+ for i := 0; i < len(s); i++ {
+ if as.contains(s[i]) {
+ return i
+ }
+ }
+ return -1
+ }
+ }
for i, c := range s {
for _, m := range chars {
if c == m {
@@ -209,11 +195,21 @@ func IndexAny(s, chars string) int {
// present in s.
func LastIndexAny(s, chars string) int {
if len(chars) > 0 {
+ if len(s) > 8 {
+ if as, isASCII := makeASCIISet(chars); isASCII {
+ for i := len(s) - 1; i >= 0; i-- {
+ if as.contains(s[i]) {
+ return i
+ }
+ }
+ return -1
+ }
+ }
for i := len(s); i > 0; {
- rune, size := utf8.DecodeLastRuneInString(s[0:i])
+ r, size := utf8.DecodeLastRuneInString(s[:i])
i -= size
- for _, m := range chars {
- if rune == m {
+ for _, c := range chars {
+ if r == c {
return i
}
}
@@ -342,11 +338,19 @@ func FieldsFunc(s string, f func(rune) bool) []string {
// Join concatenates the elements of a to create a single string. The separator string
// sep is placed between elements in the resulting string.
func Join(a []string, sep string) string {
- if len(a) == 0 {
+ switch len(a) {
+ case 0:
return ""
- }
- if len(a) == 1 {
+ case 1:
return a[0]
+ case 2:
+ // Special case for common small values.
+ // Remove if golang.org/issue/6714 is fixed
+ return a[0] + sep + a[1]
+ case 3:
+ // Special case for common small values.
+ // Remove if golang.org/issue/6714 is fixed
+ return a[0] + sep + a[1] + sep + a[2]
}
n := len(sep) * (len(a) - 1)
for i := 0; i < len(a); i++ {
@@ -416,7 +420,20 @@ func Map(mapping func(rune) rune, s string) string {
}
// Repeat returns a new string consisting of count copies of the string s.
+//
+// It panics if count is negative or if
+// the result of (len(s) * count) overflows.
func Repeat(s string, count int) string {
+ // Since we cannot return an error on overflow,
+ // we should panic if the repeat will generate
+ // an overflow.
+ // See Issue golang.org/issue/16237
+ if count < 0 {
+ panic("strings: negative Repeat count")
+ } else if count > 0 && len(s)*count/count != len(s) {
+ panic("strings: Repeat count causes overflow")
+ }
+
b := make([]byte, len(s)*count)
bp := copy(b, s)
for bp < len(b) {
@@ -437,20 +454,20 @@ func ToTitle(s string) string { return Map(unicode.ToTitle, s) }
// ToUpperSpecial returns a copy of the string s with all Unicode letters mapped to their
// upper case, giving priority to the special casing rules.
-func ToUpperSpecial(_case unicode.SpecialCase, s string) string {
- return Map(func(r rune) rune { return _case.ToUpper(r) }, s)
+func ToUpperSpecial(c unicode.SpecialCase, s string) string {
+ return Map(func(r rune) rune { return c.ToUpper(r) }, s)
}
// ToLowerSpecial returns a copy of the string s with all Unicode letters mapped to their
// lower case, giving priority to the special casing rules.
-func ToLowerSpecial(_case unicode.SpecialCase, s string) string {
- return Map(func(r rune) rune { return _case.ToLower(r) }, s)
+func ToLowerSpecial(c unicode.SpecialCase, s string) string {
+ return Map(func(r rune) rune { return c.ToLower(r) }, s)
}
// ToTitleSpecial returns a copy of the string s with all Unicode letters mapped to their
// title case, giving priority to the special casing rules.
-func ToTitleSpecial(_case unicode.SpecialCase, s string) string {
- return Map(func(r rune) rune { return _case.ToTitle(r) }, s)
+func ToTitleSpecial(c unicode.SpecialCase, s string) string {
+ return Map(func(r rune) rune { return c.ToTitle(r) }, s)
}
// isSeparator reports whether the rune could mark a word boundary.
@@ -573,7 +590,43 @@ func lastIndexFunc(s string, f func(rune) bool, truth bool) int {
return -1
}
+// asciiSet is a 32-byte value, where each bit represents the presence of a
+// given ASCII character in the set. The 128-bits of the lower 16 bytes,
+// starting with the least-significant bit of the lowest word to the
+// most-significant bit of the highest word, map to the full range of all
+// 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed,
+// ensuring that any non-ASCII character will be reported as not in the set.
+type asciiSet [8]uint32
+
+// makeASCIISet creates a set of ASCII characters and reports whether all
+// characters in chars are ASCII.
+func makeASCIISet(chars string) (as asciiSet, ok bool) {
+ for i := 0; i < len(chars); i++ {
+ c := chars[i]
+ if c >= utf8.RuneSelf {
+ return as, false
+ }
+ as[c>>5] |= 1 << uint(c&31)
+ }
+ return as, true
+}
+
+// contains reports whether c is inside the set.
+func (as *asciiSet) contains(c byte) bool {
+ return (as[c>>5] & (1 << uint(c&31))) != 0
+}
+
func makeCutsetFunc(cutset string) func(rune) bool {
+ if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
+ return func(r rune) bool {
+ return r == rune(cutset[0])
+ }
+ }
+ if as, isASCII := makeASCIISet(cutset); isASCII {
+ return func(r rune) bool {
+ return r < utf8.RuneSelf && as.contains(byte(r))
+ }
+ }
return func(r rune) bool { return IndexRune(cutset, r) >= 0 }
}
diff --git a/libgo/go/strings/strings_amd64.go b/libgo/go/strings/strings_amd64.go
index 4b06f8a..e55afd5 100644
--- a/libgo/go/strings/strings_amd64.go
+++ b/libgo/go/strings/strings_amd64.go
@@ -6,10 +6,22 @@
package strings
+//go:noescape
+
// indexShortStr returns the index of the first instance of c in s, or -1 if c is not present in s.
// indexShortStr requires 2 <= len(c) <= shortStringLen
func indexShortStr(s, c string) int // ../runtime/asm_$GOARCH.s
-const shortStringLen = 31
+func supportAVX2() bool // ../runtime/asm_$GOARCH.s
+
+var shortStringLen int
+
+func init() {
+ if supportAVX2() {
+ shortStringLen = 63
+ } else {
+ shortStringLen = 31
+ }
+}
// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
func Index(s, sep string) int {
@@ -19,8 +31,6 @@ func Index(s, sep string) int {
return 0
case n == 1:
return IndexByte(s, sep[0])
- case n <= shortStringLen:
- return indexShortStr(s, sep)
case n == len(s):
if sep == s {
return 0
@@ -28,6 +38,42 @@ func Index(s, sep string) int {
return -1
case n > len(s):
return -1
+ case n <= shortStringLen:
+ // Use brute force when s and sep both are small
+ if len(s) <= 64 {
+ return indexShortStr(s, sep)
+ }
+ c := sep[0]
+ i := 0
+ t := s[:len(s)-n+1]
+ fails := 0
+ for i < len(t) {
+ if t[i] != c {
+ // IndexByte skips 16/32 bytes per iteration,
+ // so it's faster than indexShortStr.
+ o := IndexByte(t[i:], c)
+ if o < 0 {
+ return -1
+ }
+ i += o
+ }
+ if s[i:i+n] == sep {
+ return i
+ }
+ fails++
+ i++
+ // Switch to indexShortStr when IndexByte produces too many false positives.
+ // Too many means more that 1 error per 8 characters.
+ // Allow some errors in the beginning.
+ if fails > (i+16)/8 {
+ r := indexShortStr(s[i:], sep)
+ if r >= 0 {
+ return r + i
+ }
+ return -1
+ }
+ }
+ return -1
}
// Rabin-Karp search
hashsep, pow := hashStr(sep)
diff --git a/libgo/go/strings/strings_generic.go b/libgo/go/strings/strings_generic.go
index aef2c1e..a3ad515 100644
--- a/libgo/go/strings/strings_generic.go
+++ b/libgo/go/strings/strings_generic.go
@@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+// -build !amd64,!s390x
+
package strings
// TODO: implements short string optimization on non amd64 platforms
diff --git a/libgo/go/strings/strings_s390x.go b/libgo/go/strings/strings_s390x.go
new file mode 100644
index 0000000..b47702f
--- /dev/null
+++ b/libgo/go/strings/strings_s390x.go
@@ -0,0 +1,100 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package strings
+
+//go:noescape
+
+// indexShortStr returns the index of the first instance of sep in s,
+// or -1 if sep is not present in s.
+// indexShortStr requires 2 <= len(sep) <= shortStringLen
+func indexShortStr(s, sep string) int // ../runtime/asm_$GOARCH.s
+
+// supportsVX reports whether the vector facility is available.
+// indexShortStr must not be called if the vector facility is not
+// available.
+func supportsVX() bool // ../runtime/asm_s390x.s
+
+var shortStringLen = -1
+
+func init() {
+ if supportsVX() {
+ shortStringLen = 64
+ }
+}
+
+// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
+func Index(s, sep string) int {
+ n := len(sep)
+ switch {
+ case n == 0:
+ return 0
+ case n == 1:
+ return IndexByte(s, sep[0])
+ case n == len(s):
+ if sep == s {
+ return 0
+ }
+ return -1
+ case n > len(s):
+ return -1
+ case n <= shortStringLen:
+ // Use brute force when s and sep both are small
+ if len(s) <= 64 {
+ return indexShortStr(s, sep)
+ }
+ c := sep[0]
+ i := 0
+ t := s[:len(s)-n+1]
+ fails := 0
+ for i < len(t) {
+ if t[i] != c {
+ // IndexByte skips 16/32 bytes per iteration,
+ // so it's faster than indexShortStr.
+ o := IndexByte(t[i:], c)
+ if o < 0 {
+ return -1
+ }
+ i += o
+ }
+ if s[i:i+n] == sep {
+ return i
+ }
+ fails++
+ i++
+ // Switch to indexShortStr when IndexByte produces too many false positives.
+ // Too many means more that 1 error per 8 characters.
+ // Allow some errors in the beginning.
+ if fails > (i+16)/8 {
+ r := indexShortStr(s[i:], sep)
+ if r >= 0 {
+ return r + i
+ }
+ return -1
+ }
+ }
+ return -1
+ }
+ // Rabin-Karp search
+ hashsep, pow := hashStr(sep)
+ var h uint32
+ for i := 0; i < n; i++ {
+ h = h*primeRK + uint32(s[i])
+ }
+ if h == hashsep && s[:n] == sep {
+ return 0
+ }
+ for i := n; i < len(s); {
+ h *= primeRK
+ h += uint32(s[i])
+ h -= pow * uint32(s[i-n])
+ i++
+ if h == hashsep && s[i-n:i] == sep {
+ return i - n
+ }
+ }
+ return -1
+}
diff --git a/libgo/go/strings/strings_test.go b/libgo/go/strings/strings_test.go
index fcef761..449fb50 100644
--- a/libgo/go/strings/strings_test.go
+++ b/libgo/go/strings/strings_test.go
@@ -6,9 +6,11 @@ package strings_test
import (
"bytes"
+ "fmt"
"io"
"math/rand"
"reflect"
+ "runtime"
. "strings"
"testing"
"unicode"
@@ -86,32 +88,44 @@ var indexTests = []IndexTest{
{"32145678", "01234567", -1},
{"01234567", "01234567", 0},
{"x01234567", "01234567", 1},
+ {"x0123456x01234567", "01234567", 9},
{"xx01234567"[:9], "01234567", -1},
{"", "0123456789", -1},
{"3214567844", "0123456789", -1},
{"0123456789", "0123456789", 0},
{"x0123456789", "0123456789", 1},
+ {"x012345678x0123456789", "0123456789", 11},
{"xyz0123456789"[:12], "0123456789", -1},
{"x01234567x89", "0123456789", -1},
{"", "0123456789012345", -1},
{"3214567889012345", "0123456789012345", -1},
{"0123456789012345", "0123456789012345", 0},
{"x0123456789012345", "0123456789012345", 1},
+ {"x012345678901234x0123456789012345", "0123456789012345", 17},
{"", "01234567890123456789", -1},
{"32145678890123456789", "01234567890123456789", -1},
{"01234567890123456789", "01234567890123456789", 0},
{"x01234567890123456789", "01234567890123456789", 1},
+ {"x0123456789012345678x01234567890123456789", "01234567890123456789", 21},
{"xyz01234567890123456789"[:22], "01234567890123456789", -1},
{"", "0123456789012345678901234567890", -1},
{"321456788901234567890123456789012345678911", "0123456789012345678901234567890", -1},
{"0123456789012345678901234567890", "0123456789012345678901234567890", 0},
{"x0123456789012345678901234567890", "0123456789012345678901234567890", 1},
+ {"x012345678901234567890123456789x0123456789012345678901234567890", "0123456789012345678901234567890", 32},
{"xyz0123456789012345678901234567890"[:33], "0123456789012345678901234567890", -1},
{"", "01234567890123456789012345678901", -1},
{"32145678890123456789012345678901234567890211", "01234567890123456789012345678901", -1},
{"01234567890123456789012345678901", "01234567890123456789012345678901", 0},
{"x01234567890123456789012345678901", "01234567890123456789012345678901", 1},
+ {"x0123456789012345678901234567890x01234567890123456789012345678901", "01234567890123456789012345678901", 33},
{"xyz01234567890123456789012345678901"[:34], "01234567890123456789012345678901", -1},
+ {"xxxxxx012345678901234567890123456789012345678901234567890123456789012", "012345678901234567890123456789012345678901234567890123456789012", 6},
+ {"", "0123456789012345678901234567890123456789", -1},
+ {"xx012345678901234567890123456789012345678901234567890123456789012", "0123456789012345678901234567890123456789", 2},
+ {"xx012345678901234567890123456789012345678901234567890123456789012"[:41], "0123456789012345678901234567890123456789", -1},
+ {"xx012345678901234567890123456789012345678901234567890123456789012", "0123456789012345678901234567890123456xxx", -1},
+ {"xx0123456789012345678901234567890123456789012345678901234567890120123456789012345678901234567890123456xxx", "0123456789012345678901234567890123456xxx", 65},
}
var lastIndexTests = []IndexTest{
@@ -139,10 +153,15 @@ var indexAnyTests = []IndexTest{
{"aaa", "a", 0},
{"abc", "xyz", -1},
{"abc", "xcz", 2},
- {"a☺b☻c☹d", "uvw☻xyz", 2 + len("☺")},
+ {"ab☺c", "x☺yz", 2},
+ {"a☺b☻c☹d", "cx", len("a☺b☻")},
+ {"a☺b☻c☹d", "uvw☻xyz", len("a☺b")},
{"aRegExp*", ".(|)*+?^$[]", 7},
{dots + dots + dots, " ", -1},
+ {"012abcba210", "\xffb", 4},
+ {"012\x80bcb\x80210", "\xffb", 3},
}
+
var lastIndexAnyTests = []IndexTest{
{"", "", -1},
{"", "a", -1},
@@ -152,9 +171,13 @@ var lastIndexAnyTests = []IndexTest{
{"aaa", "a", 2},
{"abc", "xyz", -1},
{"abc", "ab", 1},
- {"a☺b☻c☹d", "uvw☻xyz", 2 + len("☺")},
+ {"ab☺c", "x☺yz", 2},
+ {"a☺b☻c☹d", "cx", len("a☺b☻")},
+ {"a☺b☻c☹d", "uvw☻xyz", len("a☺b")},
{"a.RegExp*", ".(|)*+?^$[]", 8},
{dots + dots + dots, " ", -1},
+ {"012abcba210", "\xffb", 6},
+ {"012\x80bcb\x80210", "\xffb", 7},
}
// Execute f on each test case. funcName should be the name of f; it's used
@@ -227,22 +250,56 @@ func TestIndexRandom(t *testing.T) {
}
}
-var indexRuneTests = []struct {
- s string
- rune rune
- out int
-}{
- {"a A x", 'A', 2},
- {"some_text=some_value", '=', 9},
- {"☺a", 'a', 3},
- {"a☻☺b", '☺', 4},
-}
-
func TestIndexRune(t *testing.T) {
- for _, test := range indexRuneTests {
- if actual := IndexRune(test.s, test.rune); actual != test.out {
- t.Errorf("IndexRune(%q,%d)= %v; want %v", test.s, test.rune, actual, test.out)
+ tests := []struct {
+ in string
+ rune rune
+ want int
+ }{
+ {"", 'a', -1},
+ {"", '☺', -1},
+ {"foo", '☹', -1},
+ {"foo", 'o', 1},
+ {"foo☺bar", '☺', 3},
+ {"foo☺☻☹bar", '☹', 9},
+ {"a A x", 'A', 2},
+ {"some_text=some_value", '=', 9},
+ {"☺a", 'a', 3},
+ {"a☻☺b", '☺', 4},
+
+ // RuneError should match any invalid UTF-8 byte sequence.
+ {"�", '�', 0},
+ {"\xff", '�', 0},
+ {"☻x�", '�', len("☻x")},
+ {"☻x\xe2\x98", '�', len("☻x")},
+ {"☻x\xe2\x98�", '�', len("☻x")},
+ {"☻x\xe2\x98x", '�', len("☻x")},
+
+ // Invalid rune values should never match.
+ {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", -1, -1},
+ {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", 0xD800, -1}, // Surrogate pair
+ {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", utf8.MaxRune + 1, -1},
+ }
+ for _, tt := range tests {
+ if got := IndexRune(tt.in, tt.rune); got != tt.want {
+ t.Errorf("IndexRune(%q, %d) = %v; want %v", tt.in, tt.rune, got, tt.want)
+ }
+ }
+
+ haystack := "test世界"
+ allocs := testing.AllocsPerRun(1000, func() {
+ if i := IndexRune(haystack, 's'); i != 2 {
+ t.Fatalf("'s' at %d; want 2", i)
+ }
+ if i := IndexRune(haystack, '世'); i != 4 {
+ t.Fatalf("'世' at %d; want 4", i)
}
+ })
+ if runtime.Compiler == "gccgo" {
+ t.Skip("skipping allocations test for gccgo until escape analysis is enabled")
+ }
+ if allocs != 0 && testing.CoverMode() == "" {
+ t.Errorf("expected no allocations, got %f", allocs)
}
}
@@ -257,6 +314,17 @@ func BenchmarkIndexRune(b *testing.B) {
}
}
+var benchmarkLongString = Repeat(" ", 100) + benchmarkString
+
+func BenchmarkIndexRuneLongString(b *testing.B) {
+ if got := IndexRune(benchmarkLongString, '☺'); got != 114 {
+ b.Fatalf("wrong index: expected 114, got=%d", got)
+ }
+ for i := 0; i < b.N; i++ {
+ IndexRune(benchmarkLongString, '☺')
+ }
+}
+
func BenchmarkIndexRuneFastPath(b *testing.B) {
if got := IndexRune(benchmarkString, 'v'); got != 17 {
b.Fatalf("wrong index: expected 17, got=%d", got)
@@ -613,6 +681,9 @@ var trimTests = []struct {
{"Trim", "* listitem", " *", "listitem"},
{"Trim", `"quote"`, `"`, "quote"},
{"Trim", "\u2C6F\u2C6F\u0250\u0250\u2C6F\u2C6F", "\u2C6F", "\u0250\u0250"},
+ {"Trim", "\x80test\xff", "\xff", "test"},
+ {"Trim", " Ġ ", " ", "Ġ"},
+ {"Trim", " Ġİ0", "0 ", "Ġİ"},
//empty string tests
{"Trim", "abba", "", "abba"},
{"Trim", "", "123", ""},
@@ -855,6 +926,54 @@ func TestRepeat(t *testing.T) {
}
}
+func repeat(s string, count int) (err error) {
+ defer func() {
+ if r := recover(); r != nil {
+ switch v := r.(type) {
+ case error:
+ err = v
+ default:
+ err = fmt.Errorf("%s", v)
+ }
+ }
+ }()
+
+ Repeat(s, count)
+
+ return
+}
+
+// See Issue golang.org/issue/16237
+func TestRepeatCatchesOverflow(t *testing.T) {
+ tests := [...]struct {
+ s string
+ count int
+ errStr string
+ }{
+ 0: {"--", -2147483647, "negative"},
+ 1: {"", int(^uint(0) >> 1), ""},
+ 2: {"-", 10, ""},
+ 3: {"gopher", 0, ""},
+ 4: {"-", -1, "negative"},
+ 5: {"--", -102, "negative"},
+ 6: {string(make([]byte, 255)), int((^uint(0))/255 + 1), "overflow"},
+ }
+
+ for i, tt := range tests {
+ err := repeat(tt.s, tt.count)
+ if tt.errStr == "" {
+ if err != nil {
+ t.Errorf("#%d panicked %v", i, err)
+ }
+ continue
+ }
+
+ if err == nil || !Contains(err.Error(), tt.errStr) {
+ t.Errorf("#%d expected %q got %q", i, tt.errStr, err)
+ }
+ }
+}
+
func runesEqual(a, b []rune) bool {
if len(a) != len(b) {
return false
@@ -1290,6 +1409,9 @@ func benchmarkCountHard(b *testing.B, sep string) {
func BenchmarkIndexHard1(b *testing.B) { benchmarkIndexHard(b, "<>") }
func BenchmarkIndexHard2(b *testing.B) { benchmarkIndexHard(b, "</pre>") }
func BenchmarkIndexHard3(b *testing.B) { benchmarkIndexHard(b, "<b>hello world</b>") }
+func BenchmarkIndexHard4(b *testing.B) {
+ benchmarkIndexHard(b, "<pre><b>hello</b><strong>world</strong></pre>")
+}
func BenchmarkLastIndexHard1(b *testing.B) { benchmarkLastIndexHard(b, "<>") }
func BenchmarkLastIndexHard2(b *testing.B) { benchmarkLastIndexHard(b, "</pre>") }
@@ -1381,3 +1503,31 @@ func BenchmarkRepeat(b *testing.B) {
Repeat("-", 80)
}
}
+
+func BenchmarkIndexAnyASCII(b *testing.B) {
+ x := Repeat("#", 4096) // Never matches set
+ cs := "0123456789abcdef"
+ for k := 1; k <= 4096; k <<= 4 {
+ for j := 1; j <= 16; j <<= 1 {
+ b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ IndexAny(x[:k], cs[:j])
+ }
+ })
+ }
+ }
+}
+
+func BenchmarkTrimASCII(b *testing.B) {
+ cs := "0123456789abcdef"
+ for k := 1; k <= 4096; k <<= 4 {
+ for j := 1; j <= 16; j <<= 1 {
+ b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
+ x := Repeat(cs[:j], k) // Always matches set
+ for i := 0; i < b.N; i++ {
+ Trim(x[:k], cs[:j])
+ }
+ })
+ }
+ }
+}