aboutsummaryrefslogtreecommitdiff
path: root/libgo/go/strings
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/go/strings')
-rw-r--r--libgo/go/strings/builder.go120
-rw-r--r--libgo/go/strings/builder_test.go282
-rw-r--r--libgo/go/strings/example_test.go124
-rw-r--r--libgo/go/strings/strings.go255
-rw-r--r--libgo/go/strings/strings_amd64.go20
-rw-r--r--libgo/go/strings/strings_generic.go38
-rw-r--r--libgo/go/strings/strings_s390x.go20
-rw-r--r--libgo/go/strings/strings_test.go46
8 files changed, 711 insertions, 194 deletions
diff --git a/libgo/go/strings/builder.go b/libgo/go/strings/builder.go
new file mode 100644
index 0000000..594f3db
--- /dev/null
+++ b/libgo/go/strings/builder.go
@@ -0,0 +1,120 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package strings
+
+import (
+ "errors"
+ "io"
+ "unicode/utf8"
+ "unsafe"
+)
+
+// A Builder is used to efficiently build a string using Write methods.
+// It minimizes memory copying. The zero value is ready to use.
+type Builder struct {
+ buf []byte
+}
+
+// String returns the accumulated string.
+func (b *Builder) String() string {
+ return *(*string)(unsafe.Pointer(&b.buf))
+}
+
+// Len returns the number of accumulated bytes; b.Len() == len(b.String()).
+func (b *Builder) Len() int { return len(b.buf) }
+
+// Reset resets the Builder to be empty.
+func (b *Builder) Reset() { b.buf = nil }
+
+const maxInt = int(^uint(0) >> 1)
+
+// grow copies the buffer to a new, larger buffer so that there are at least n
+// bytes of capacity beyond len(b.buf).
+func (b *Builder) grow(n int) {
+ buf := make([]byte, len(b.buf), 2*cap(b.buf)+n)
+ copy(buf, b.buf)
+ b.buf = buf
+}
+
+// Grow grows b's capacity, if necessary, to guarantee space for
+// another n bytes. After Grow(n), at least n bytes can be written to b
+// without another allocation. If n is negative, Grow panics.
+func (b *Builder) Grow(n int) {
+ if n < 0 {
+ panic("strings.Builder.Grow: negative count")
+ }
+ if cap(b.buf)-len(b.buf) < n {
+ b.grow(n)
+ }
+}
+
+// Write appends the contents of p to b's buffer.
+// Write always returns len(p), nil.
+func (b *Builder) Write(p []byte) (int, error) {
+ b.buf = append(b.buf, p...)
+ return len(p), nil
+}
+
+// WriteByte appends the byte c to b's buffer.
+// The returned error is always nil.
+func (b *Builder) WriteByte(c byte) error {
+ b.buf = append(b.buf, c)
+ return nil
+}
+
+// WriteRune appends the UTF-8 encoding of Unicode code point r to b's buffer.
+// It returns the length of r and a nil error.
+func (b *Builder) WriteRune(r rune) (int, error) {
+ if r < utf8.RuneSelf {
+ b.buf = append(b.buf, byte(r))
+ return 1, nil
+ }
+ l := len(b.buf)
+ if cap(b.buf)-l < utf8.UTFMax {
+ b.grow(utf8.UTFMax)
+ }
+ n := utf8.EncodeRune(b.buf[l:l+utf8.UTFMax], r)
+ b.buf = b.buf[:l+n]
+ return n, nil
+}
+
+// WriteString appends the contents of s to b's buffer.
+// It returns the length of s and a nil error.
+func (b *Builder) WriteString(s string) (int, error) {
+ b.buf = append(b.buf, s...)
+ return len(s), nil
+}
+
+// minRead is the minimum slice passed to a Read call by Builder.ReadFrom.
+// It is the same as bytes.MinRead.
+const minRead = 512
+
+// errNegativeRead is the panic value if the reader passed to Builder.ReadFrom
+// returns a negative count.
+var errNegativeRead = errors.New("strings.Builder: reader returned negative count from Read")
+
+// ReadFrom reads data from r until EOF and appends it to b's buffer.
+// The return value n is the number of bytes read.
+// Any error except io.EOF encountered during the read is also returned.
+func (b *Builder) ReadFrom(r io.Reader) (n int64, err error) {
+ for {
+ l := len(b.buf)
+ if cap(b.buf)-l < minRead {
+ b.grow(minRead)
+ }
+ m, e := r.Read(b.buf[l:cap(b.buf)])
+ if m < 0 {
+ panic(errNegativeRead)
+ }
+ b.buf = b.buf[:l+m]
+ n += int64(m)
+ if e == io.EOF {
+ return n, nil
+ }
+ if e != nil {
+ return n, e
+ }
+ }
+}
diff --git a/libgo/go/strings/builder_test.go b/libgo/go/strings/builder_test.go
new file mode 100644
index 0000000..df55708
--- /dev/null
+++ b/libgo/go/strings/builder_test.go
@@ -0,0 +1,282 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package strings_test
+
+import (
+ "bytes"
+ "errors"
+ "io"
+ "runtime"
+ . "strings"
+ "testing"
+ "testing/iotest"
+)
+
+func check(t *testing.T, b *Builder, want string) {
+ t.Helper()
+ got := b.String()
+ if got != want {
+ t.Errorf("String: got %#q; want %#q", got, want)
+ return
+ }
+ if n := b.Len(); n != len(got) {
+ t.Errorf("Len: got %d; but len(String()) is %d", n, len(got))
+ }
+}
+
+func TestBuilder(t *testing.T) {
+ var b Builder
+ check(t, &b, "")
+ n, err := b.WriteString("hello")
+ if err != nil || n != 5 {
+ t.Errorf("WriteString: got %d,%s; want 5,nil", n, err)
+ }
+ check(t, &b, "hello")
+ if err = b.WriteByte(' '); err != nil {
+ t.Errorf("WriteByte: %s", err)
+ }
+ check(t, &b, "hello ")
+ n, err = b.WriteString("world")
+ if err != nil || n != 5 {
+ t.Errorf("WriteString: got %d,%s; want 5,nil", n, err)
+ }
+ check(t, &b, "hello world")
+}
+
+func TestBuilderString(t *testing.T) {
+ var b Builder
+ b.WriteString("alpha")
+ check(t, &b, "alpha")
+ s1 := b.String()
+ b.WriteString("beta")
+ check(t, &b, "alphabeta")
+ s2 := b.String()
+ b.WriteString("gamma")
+ check(t, &b, "alphabetagamma")
+ s3 := b.String()
+
+ // Check that subsequent operations didn't change the returned strings.
+ if want := "alpha"; s1 != want {
+ t.Errorf("first String result is now %q; want %q", s1, want)
+ }
+ if want := "alphabeta"; s2 != want {
+ t.Errorf("second String result is now %q; want %q", s2, want)
+ }
+ if want := "alphabetagamma"; s3 != want {
+ t.Errorf("third String result is now %q; want %q", s3, want)
+ }
+}
+
+func TestBuilderReset(t *testing.T) {
+ var b Builder
+ check(t, &b, "")
+ b.WriteString("aaa")
+ s := b.String()
+ check(t, &b, "aaa")
+ b.Reset()
+ check(t, &b, "")
+
+ // Ensure that writing after Reset doesn't alter
+ // previously returned strings.
+ b.WriteString("bbb")
+ check(t, &b, "bbb")
+ if want := "aaa"; s != want {
+ t.Errorf("previous String result changed after Reset: got %q; want %q", s, want)
+ }
+}
+
+func TestBuilderGrow(t *testing.T) {
+ for _, growLen := range []int{0, 100, 1000, 10000, 100000} {
+ var b Builder
+ b.Grow(growLen)
+ p := bytes.Repeat([]byte{'a'}, growLen)
+ allocs := numAllocs(func() { b.Write(p) })
+ if allocs > 0 {
+ t.Errorf("growLen=%d: allocation occurred during write", growLen)
+ }
+ if b.String() != string(p) {
+ t.Errorf("growLen=%d: bad data written after Grow", growLen)
+ }
+ }
+}
+
+func TestBuilderWrite2(t *testing.T) {
+ const s0 = "hello 世界"
+ for _, tt := range []struct {
+ name string
+ fn func(b *Builder) (int, error)
+ n int
+ want string
+ }{
+ {
+ "Write",
+ func(b *Builder) (int, error) { return b.Write([]byte(s0)) },
+ len(s0),
+ s0,
+ },
+ {
+ "WriteRune",
+ func(b *Builder) (int, error) { return b.WriteRune('a') },
+ 1,
+ "a",
+ },
+ {
+ "WriteRuneWide",
+ func(b *Builder) (int, error) { return b.WriteRune('世') },
+ 3,
+ "世",
+ },
+ {
+ "WriteString",
+ func(b *Builder) (int, error) { return b.WriteString(s0) },
+ len(s0),
+ s0,
+ },
+ } {
+ t.Run(tt.name, func(t *testing.T) {
+ var b Builder
+ n, err := tt.fn(&b)
+ if err != nil {
+ t.Fatalf("first call: got %s", err)
+ }
+ if n != tt.n {
+ t.Errorf("first call: got n=%d; want %d", n, tt.n)
+ }
+ check(t, &b, tt.want)
+
+ n, err = tt.fn(&b)
+ if err != nil {
+ t.Fatalf("second call: got %s", err)
+ }
+ if n != tt.n {
+ t.Errorf("second call: got n=%d; want %d", n, tt.n)
+ }
+ check(t, &b, tt.want+tt.want)
+ })
+ }
+}
+
+func TestBuilderWriteByte(t *testing.T) {
+ var b Builder
+ if err := b.WriteByte('a'); err != nil {
+ t.Error(err)
+ }
+ if err := b.WriteByte(0); err != nil {
+ t.Error(err)
+ }
+ check(t, &b, "a\x00")
+}
+
+func TestBuilderReadFrom(t *testing.T) {
+ for _, tt := range []struct {
+ name string
+ fn func(io.Reader) io.Reader
+ }{
+ {"Reader", func(r io.Reader) io.Reader { return r }},
+ {"DataErrReader", iotest.DataErrReader},
+ {"OneByteReader", iotest.OneByteReader},
+ } {
+ t.Run(tt.name, func(t *testing.T) {
+ var b Builder
+
+ r := tt.fn(NewReader("hello"))
+ n, err := b.ReadFrom(r)
+ if err != nil {
+ t.Fatalf("first call: got %s", err)
+ }
+ if n != 5 {
+ t.Errorf("first call: got n=%d; want 5", n)
+ }
+ check(t, &b, "hello")
+
+ r = tt.fn(NewReader(" world"))
+ n, err = b.ReadFrom(r)
+ if err != nil {
+ t.Fatalf("first call: got %s", err)
+ }
+ if n != 6 {
+ t.Errorf("first call: got n=%d; want 6", n)
+ }
+ check(t, &b, "hello world")
+ })
+ }
+}
+
+var errRead = errors.New("boom")
+
+// errorReader sends reads to the underlying reader
+// but returns errRead instead of io.EOF.
+type errorReader struct {
+ r io.Reader
+}
+
+func (r errorReader) Read(b []byte) (int, error) {
+ n, err := r.r.Read(b)
+ if err == io.EOF {
+ err = errRead
+ }
+ return n, err
+}
+
+func TestBuilderReadFromError(t *testing.T) {
+ var b Builder
+ r := errorReader{NewReader("hello")}
+ n, err := b.ReadFrom(r)
+ if n != 5 {
+ t.Errorf("got n=%d; want 5", n)
+ }
+ if err != errRead {
+ t.Errorf("got err=%q; want %q", err, errRead)
+ }
+ check(t, &b, "hello")
+}
+
+type negativeReader struct{}
+
+func (r negativeReader) Read([]byte) (int, error) { return -1, nil }
+
+func TestBuilderReadFromNegativeReader(t *testing.T) {
+ var b Builder
+ defer func() {
+ switch err := recover().(type) {
+ case nil:
+ t.Fatal("ReadFrom didn't panic")
+ case error:
+ wantErr := "strings.Builder: reader returned negative count from Read"
+ if err.Error() != wantErr {
+ t.Fatalf("recovered panic: got %v; want %v", err.Error(), wantErr)
+ }
+ default:
+ t.Fatalf("unexpected panic value: %#v", err)
+ }
+ }()
+
+ b.ReadFrom(negativeReader{})
+}
+
+func TestBuilderAllocs(t *testing.T) {
+ var b Builder
+ b.Grow(5)
+ var s string
+ allocs := numAllocs(func() {
+ b.WriteString("hello")
+ s = b.String()
+ })
+ if want := "hello"; s != want {
+ t.Errorf("String: got %#q; want %#q", s, want)
+ }
+ if allocs > 0 {
+ t.Fatalf("got %d alloc(s); want 0", allocs)
+ }
+}
+
+func numAllocs(fn func()) uint64 {
+ defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1))
+ var m1, m2 runtime.MemStats
+ runtime.ReadMemStats(&m1)
+ fn()
+ runtime.ReadMemStats(&m2)
+ return m2.Mallocs - m1.Mallocs
+}
diff --git a/libgo/go/strings/example_test.go b/libgo/go/strings/example_test.go
index e962152..607e4a0 100644
--- a/libgo/go/strings/example_test.go
+++ b/libgo/go/strings/example_test.go
@@ -166,6 +166,26 @@ func ExampleLastIndexAny() {
// -1
}
+func ExampleLastIndexByte() {
+ fmt.Println(strings.LastIndexByte("Hello, world", 'l'))
+ fmt.Println(strings.LastIndexByte("Hello, world", 'o'))
+ fmt.Println(strings.LastIndexByte("Hello, world", 'x'))
+ // Output:
+ // 10
+ // 8
+ // -1
+}
+
+func ExampleLastIndexFunc() {
+ fmt.Println(strings.LastIndexFunc("go 123", unicode.IsNumber))
+ fmt.Println(strings.LastIndexFunc("123 go", unicode.IsNumber))
+ fmt.Println(strings.LastIndexFunc("go", unicode.IsNumber))
+ // Output:
+ // 5
+ // 2
+ // -1
+}
+
func ExampleJoin() {
s := []string{"foo", "bar", "baz"}
fmt.Println(strings.Join(s, ", "))
@@ -229,17 +249,10 @@ func ExampleToTitle() {
// ХЛЕБ
}
-func ExampleTrim() {
- fmt.Printf("[%q]", strings.Trim(" !!! Achtung! Achtung! !!! ", "! "))
- // Output: ["Achtung! Achtung"]
-}
-
-func ExampleTrimFunc() {
- f := func(c rune) bool {
- return !unicode.IsLetter(c) && !unicode.IsNumber(c)
- }
- fmt.Printf("[%q]", strings.TrimFunc(" Achtung1! Achtung2,...", f))
- // Output: ["Achtung1! Achtung2"]
+func ExampleToTitleSpecial() {
+ fmt.Println(strings.ToTitleSpecial(unicode.TurkishCase, "dünyanın ilk borsa yapısı Aizonai kabul edilir"))
+ // Output:
+ // DÜNYANIN İLK BORSA YAPISI AİZONAİ KABUL EDİLİR
}
func ExampleMap() {
@@ -256,11 +269,6 @@ func ExampleMap() {
// Output: 'Gjnf oevyyvt naq gur fyvgul tbcure...
}
-func ExampleTrimSpace() {
- fmt.Println(strings.TrimSpace(" \t\n a lone gopher \n\t\r\n"))
- // Output: a lone gopher
-}
-
func ExampleNewReplacer() {
r := strings.NewReplacer("<", "&lt;", ">", "&gt;")
fmt.Println(r.Replace("This is <b>HTML</b>!"))
@@ -272,23 +280,85 @@ func ExampleToUpper() {
// Output: GOPHER
}
+func ExampleToUpperSpecial() {
+ fmt.Println(strings.ToUpperSpecial(unicode.TurkishCase, "örnek iş"))
+ // Output: ÖRNEK İŞ
+}
+
func ExampleToLower() {
fmt.Println(strings.ToLower("Gopher"))
// Output: gopher
}
-func ExampleTrimSuffix() {
- var s = "Hello, goodbye, etc!"
- s = strings.TrimSuffix(s, "goodbye, etc!")
- s = strings.TrimSuffix(s, "planet")
- fmt.Print(s, "world!")
- // Output: Hello, world!
+func ExampleToLowerSpecial() {
+ fmt.Println(strings.ToLowerSpecial(unicode.TurkishCase, "Önnek İş"))
+ // Output: önnek iş
+}
+
+func ExampleTrim() {
+ fmt.Print(strings.Trim("¡¡¡Hello, Gophers!!!", "!¡"))
+ // Output: Hello, Gophers
+}
+
+func ExampleTrimSpace() {
+ fmt.Println(strings.TrimSpace(" \t\n Hello, Gophers \n\t\r\n"))
+ // Output: Hello, Gophers
}
func ExampleTrimPrefix() {
- var s = "Goodbye,, world!"
- s = strings.TrimPrefix(s, "Goodbye,")
- s = strings.TrimPrefix(s, "Howdy,")
- fmt.Print("Hello" + s)
- // Output: Hello, world!
+ var s = "¡¡¡Hello, Gophers!!!"
+ s = strings.TrimPrefix(s, "¡¡¡Hello, ")
+ s = strings.TrimPrefix(s, "¡¡¡Howdy, ")
+ fmt.Print(s)
+ // Output: Gophers!!!
+}
+
+func ExampleTrimSuffix() {
+ var s = "¡¡¡Hello, Gophers!!!"
+ s = strings.TrimSuffix(s, ", Gophers!!!")
+ s = strings.TrimSuffix(s, ", Marmots!!!")
+ fmt.Print(s)
+ // Output: ¡¡¡Hello
+}
+
+func ExampleTrimFunc() {
+ fmt.Print(strings.TrimFunc("¡¡¡Hello, Gophers!!!", func(r rune) bool {
+ return !unicode.IsLetter(r) && !unicode.IsNumber(r)
+ }))
+ // Output: Hello, Gophers
+}
+
+func ExampleTrimLeft() {
+ fmt.Print(strings.TrimLeft("¡¡¡Hello, Gophers!!!", "!¡"))
+ // Output: Hello, Gophers!!!
+}
+
+func ExampleTrimLeftFunc() {
+ fmt.Print(strings.TrimLeftFunc("¡¡¡Hello, Gophers!!!", func(r rune) bool {
+ return !unicode.IsLetter(r) && !unicode.IsNumber(r)
+ }))
+ // Output: Hello, Gophers!!!
+}
+
+func ExampleTrimRight() {
+ fmt.Print(strings.TrimRight("¡¡¡Hello, Gophers!!!", "!¡"))
+ // Output: ¡¡¡Hello, Gophers
+}
+
+func ExampleTrimRightFunc() {
+ fmt.Print(strings.TrimRightFunc("¡¡¡Hello, Gophers!!!", func(r rune) bool {
+ return !unicode.IsLetter(r) && !unicode.IsNumber(r)
+ }))
+ // Output: ¡¡¡Hello, Gophers
+}
+
+func ExampleBuilder() {
+ var b strings.Builder
+ for i := 3; i >= 1; i-- {
+ fmt.Fprintf(&b, "%d...", i)
+ }
+ b.WriteString("ignition")
+ fmt.Println(b.String())
+
+ // Output: 3...2...1...ignition
}
diff --git a/libgo/go/strings/strings.go b/libgo/go/strings/strings.go
index 0c836c0..02c0320 100644
--- a/libgo/go/strings/strings.go
+++ b/libgo/go/strings/strings.go
@@ -166,22 +166,24 @@ func IndexRune(s string, r rune) int {
// IndexAny returns the index of the first instance of any Unicode code point
// from chars in s, or -1 if no Unicode code point from chars is present in s.
func IndexAny(s, chars string) int {
- if len(chars) > 0 {
- if len(s) > 8 {
- if as, isASCII := makeASCIISet(chars); isASCII {
- for i := 0; i < len(s); i++ {
- if as.contains(s[i]) {
- return i
- }
+ if chars == "" {
+ // Avoid scanning all of s.
+ return -1
+ }
+ if len(s) > 8 {
+ if as, isASCII := makeASCIISet(chars); isASCII {
+ for i := 0; i < len(s); i++ {
+ if as.contains(s[i]) {
+ return i
}
- return -1
}
+ return -1
}
- for i, c := range s {
- for _, m := range chars {
- if c == m {
- return i
- }
+ }
+ for i, c := range s {
+ for _, m := range chars {
+ if c == m {
+ return i
}
}
}
@@ -192,24 +194,26 @@ func IndexAny(s, chars string) int {
// point from chars in s, or -1 if no Unicode code point from chars is
// present in s.
func LastIndexAny(s, chars string) int {
- if len(chars) > 0 {
- if len(s) > 8 {
- if as, isASCII := makeASCIISet(chars); isASCII {
- for i := len(s) - 1; i >= 0; i-- {
- if as.contains(s[i]) {
- return i
- }
+ if chars == "" {
+ // Avoid scanning all of s.
+ return -1
+ }
+ if len(s) > 8 {
+ if as, isASCII := makeASCIISet(chars); isASCII {
+ for i := len(s) - 1; i >= 0; i-- {
+ if as.contains(s[i]) {
+ return i
}
- return -1
}
+ return -1
}
- for i := len(s); i > 0; {
- r, size := utf8.DecodeLastRuneInString(s[:i])
- i -= size
- for _, c := range chars {
- if r == c {
- return i
- }
+ }
+ for i := len(s); i > 0; {
+ r, size := utf8.DecodeLastRuneInString(s[:i])
+ i -= size
+ for _, c := range chars {
+ if r == c {
+ return i
}
}
}
@@ -310,8 +314,8 @@ func SplitAfter(s, sep string) []string {
var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
// Fields splits the string s around each instance of one or more consecutive white space
-// characters, as defined by unicode.IsSpace, returning an array of substrings of s or an
-// empty list if s contains only white space.
+// characters, as defined by unicode.IsSpace, returning a slice of substrings of s or an
+// empty slice if s contains only white space.
func Fields(s string) []string {
// First count the fields.
// This is an exact count if s is ASCII, otherwise it is an approximation.
@@ -358,67 +362,7 @@ func Fields(s string) []string {
}
// Some runes in the input string are not ASCII.
- // Same general approach as in the ASCII path but
- // uses DecodeRuneInString and unicode.IsSpace if
- // a non-ASCII rune needs to be decoded and checked
- // if it corresponds to a space.
- a := make([]string, 0, n)
- fieldStart := 0
- i := 0
- // Skip spaces in the front of the input.
- for i < len(s) {
- if c := s[i]; c < utf8.RuneSelf {
- if asciiSpace[c] == 0 {
- break
- }
- i++
- } else {
- r, w := utf8.DecodeRuneInString(s[i:])
- if !unicode.IsSpace(r) {
- break
- }
- i += w
- }
- }
- fieldStart = i
- for i < len(s) {
- if c := s[i]; c < utf8.RuneSelf {
- if asciiSpace[c] == 0 {
- i++
- continue
- }
- a = append(a, s[fieldStart:i])
- i++
- } else {
- r, w := utf8.DecodeRuneInString(s[i:])
- if !unicode.IsSpace(r) {
- i += w
- continue
- }
- a = append(a, s[fieldStart:i])
- i += w
- }
- // Skip spaces in between fields.
- for i < len(s) {
- if c := s[i]; c < utf8.RuneSelf {
- if asciiSpace[c] == 0 {
- break
- }
- i++
- } else {
- r, w := utf8.DecodeRuneInString(s[i:])
- if !unicode.IsSpace(r) {
- break
- }
- i += w
- }
- }
- fieldStart = i
- }
- if fieldStart < len(s) { // Last field might end at EOF.
- a = append(a, s[fieldStart:])
- }
- return a
+ return FieldsFunc(s, unicode.IsSpace)
}
// FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c)
@@ -427,35 +371,42 @@ func Fields(s string) []string {
// FieldsFunc makes no guarantees about the order in which it calls f(c).
// If f does not return consistent results for a given c, FieldsFunc may crash.
func FieldsFunc(s string, f func(rune) bool) []string {
- // First count the fields.
- n := 0
- inField := false
- for _, rune := range s {
- wasInField := inField
- inField = !f(rune)
- if inField && !wasInField {
- n++
- }
+ // A span is used to record a slice of s of the form s[start:end].
+ // The start index is inclusive and the end index is exclusive.
+ type span struct {
+ start int
+ end int
}
+ spans := make([]span, 0, 32)
- // Now create them.
- a := make([]string, n)
- na := 0
- fieldStart := -1 // Set to -1 when looking for start of field.
+ // Find the field start and end indices.
+ wasField := false
+ fromIndex := 0
for i, rune := range s {
if f(rune) {
- if fieldStart >= 0 {
- a[na] = s[fieldStart:i]
- na++
- fieldStart = -1
+ if wasField {
+ spans = append(spans, span{start: fromIndex, end: i})
+ wasField = false
+ }
+ } else {
+ if !wasField {
+ fromIndex = i
+ wasField = true
}
- } else if fieldStart == -1 {
- fieldStart = i
}
}
- if fieldStart >= 0 { // Last field might end at EOF.
- a[na] = s[fieldStart:]
+
+ // Last field might end at EOF.
+ if wasField {
+ spans = append(spans, span{fromIndex, len(s)})
+ }
+
+ // Create strings from recorded field indices.
+ a := make([]string, len(spans))
+ for i, span := range spans {
+ a[i] = s[span.start:span.end]
}
+
return a
}
@@ -599,10 +550,62 @@ func Repeat(s string, count int) string {
}
// ToUpper returns a copy of the string s with all Unicode letters mapped to their upper case.
-func ToUpper(s string) string { return Map(unicode.ToUpper, s) }
+func ToUpper(s string) string {
+ isASCII, hasLower := true, false
+ for i := 0; i < len(s); i++ {
+ c := s[i]
+ if c >= utf8.RuneSelf {
+ isASCII = false
+ break
+ }
+ hasLower = hasLower || (c >= 'a' && c <= 'z')
+ }
+
+ if isASCII { // optimize for ASCII-only strings.
+ if !hasLower {
+ return s
+ }
+ b := make([]byte, len(s))
+ for i := 0; i < len(s); i++ {
+ c := s[i]
+ if c >= 'a' && c <= 'z' {
+ c -= 'a' - 'A'
+ }
+ b[i] = c
+ }
+ return string(b)
+ }
+ return Map(unicode.ToUpper, s)
+}
// ToLower returns a copy of the string s with all Unicode letters mapped to their lower case.
-func ToLower(s string) string { return Map(unicode.ToLower, s) }
+func ToLower(s string) string {
+ isASCII, hasUpper := true, false
+ for i := 0; i < len(s); i++ {
+ c := s[i]
+ if c >= utf8.RuneSelf {
+ isASCII = false
+ break
+ }
+ hasUpper = hasUpper || (c >= 'A' && c <= 'Z')
+ }
+
+ if isASCII { // optimize for ASCII-only strings.
+ if !hasUpper {
+ return s
+ }
+ b := make([]byte, len(s))
+ for i := 0; i < len(s); i++ {
+ c := s[i]
+ if c >= 'A' && c <= 'Z' {
+ c += 'a' - 'A'
+ }
+ b[i] = c
+ }
+ return string(b)
+ }
+ return Map(unicode.ToLower, s)
+}
// ToTitle returns a copy of the string s with all Unicode letters mapped to their title case.
func ToTitle(s string) string { return Map(unicode.ToTitle, s) }
@@ -923,3 +926,27 @@ func EqualFold(s, t string) bool {
// One string is empty. Are both?
return s == t
}
+
+func indexRabinKarp(s, substr string) int {
+ // Rabin-Karp search
+ hashss, pow := hashStr(substr)
+ n := len(substr)
+ var h uint32
+ for i := 0; i < n; i++ {
+ h = h*primeRK + uint32(s[i])
+ }
+ if h == hashss && s[:n] == substr {
+ return 0
+ }
+ for i := n; i < len(s); {
+ h *= primeRK
+ h += uint32(s[i])
+ h -= pow * uint32(s[i-n])
+ i++
+ if h == hashss && s[i-n:i] == substr {
+ return i - n
+ }
+ }
+ return -1
+
+}
diff --git a/libgo/go/strings/strings_amd64.go b/libgo/go/strings/strings_amd64.go
index 9648912..2441569 100644
--- a/libgo/go/strings/strings_amd64.go
+++ b/libgo/go/strings/strings_amd64.go
@@ -77,25 +77,7 @@ func Index(s, substr string) int {
}
return -1
}
- // Rabin-Karp search
- hashss, pow := hashStr(substr)
- var h uint32
- for i := 0; i < n; i++ {
- h = h*primeRK + uint32(s[i])
- }
- if h == hashss && s[:n] == substr {
- return 0
- }
- for i := n; i < len(s); {
- h *= primeRK
- h += uint32(s[i])
- h -= pow * uint32(s[i-n])
- i++
- if h == hashss && s[i-n:i] == substr {
- return i - n
- }
- }
- return -1
+ return indexRabinKarp(s, substr)
}
// Count counts the number of non-overlapping instances of substr in s.
diff --git a/libgo/go/strings/strings_generic.go b/libgo/go/strings/strings_generic.go
index 9844201d..60a6f78 100644
--- a/libgo/go/strings/strings_generic.go
+++ b/libgo/go/strings/strings_generic.go
@@ -25,22 +25,30 @@ func Index(s, substr string) int {
case n > len(s):
return -1
}
- // Rabin-Karp search
- hashss, pow := hashStr(substr)
- var h uint32
- for i := 0; i < n; i++ {
- h = h*primeRK + uint32(s[i])
- }
- if h == hashss && s[:n] == substr {
- return 0
- }
- for i := n; i < len(s); {
- h *= primeRK
- h += uint32(s[i])
- h -= pow * uint32(s[i-n])
+ c := substr[0]
+ i := 0
+ t := s[:len(s)-n+1]
+ fails := 0
+ for i < len(t) {
+ if t[i] != c {
+ o := IndexByte(t[i:], c)
+ if o < 0 {
+ return -1
+ }
+ i += o
+ }
+ if s[i:i+n] == substr {
+ return i
+ }
i++
- if h == hashss && s[i-n:i] == substr {
- return i - n
+ fails++
+ if fails >= 4+i>>4 && i < len(t) {
+ // See comment in ../bytes/bytes_generic.go.
+ j := indexRabinKarp(s[i:], substr)
+ if j < 0 {
+ return -1
+ }
+ return i + j
}
}
return -1
diff --git a/libgo/go/strings/strings_s390x.go b/libgo/go/strings/strings_s390x.go
index b05fb2b..e74e4cd 100644
--- a/libgo/go/strings/strings_s390x.go
+++ b/libgo/go/strings/strings_s390x.go
@@ -78,25 +78,7 @@ func Index(s, substr string) int {
}
return -1
}
- // Rabin-Karp search
- hashss, pow := hashStr(substr)
- var h uint32
- for i := 0; i < n; i++ {
- h = h*primeRK + uint32(s[i])
- }
- if h == hashss && s[:n] == substr {
- return 0
- }
- for i := n; i < len(s); {
- h *= primeRK
- h += uint32(s[i])
- h -= pow * uint32(s[i-n])
- i++
- if h == hashss && s[i-n:i] == substr {
- return i - n
- }
- }
- return -1
+ return indexRabinKarp(s, substr)
}
// Count counts the number of non-overlapping instances of substr in s.
diff --git a/libgo/go/strings/strings_test.go b/libgo/go/strings/strings_test.go
index 0fddaf0..f2399f4 100644
--- a/libgo/go/strings/strings_test.go
+++ b/libgo/go/strings/strings_test.go
@@ -126,6 +126,9 @@ var indexTests = []IndexTest{
{"xx012345678901234567890123456789012345678901234567890123456789012"[:41], "0123456789012345678901234567890123456789", -1},
{"xx012345678901234567890123456789012345678901234567890123456789012", "0123456789012345678901234567890123456xxx", -1},
{"xx0123456789012345678901234567890123456789012345678901234567890120123456789012345678901234567890123456xxx", "0123456789012345678901234567890123456xxx", 65},
+ // test fallback to Rabin-Karp.
+ {"oxoxoxoxoxoxoxoxoxoxoxoy", "oy", 22},
+ {"oxoxoxoxoxoxoxoxoxoxoxox", "oy", -1},
}
var lastIndexTests = []IndexTest{
@@ -522,9 +525,12 @@ func runStringTests(t *testing.T, f func(string) string, funcName string, testCa
var upperTests = []StringTest{
{"", ""},
+ {"ONLYUPPER", "ONLYUPPER"},
{"abc", "ABC"},
{"AbC123", "ABC123"},
{"azAZ09_", "AZAZ09_"},
+ {"longStrinGwitHmixofsmaLLandcAps", "LONGSTRINGWITHMIXOFSMALLANDCAPS"},
+ {"long\u0250string\u0250with\u0250nonascii\u2C6Fchars", "LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS"},
{"\u0250\u0250\u0250\u0250\u0250", "\u2C6F\u2C6F\u2C6F\u2C6F\u2C6F"}, // grows one byte per char
}
@@ -533,6 +539,8 @@ var lowerTests = []StringTest{
{"abc", "abc"},
{"AbC123", "abc123"},
{"azAZ09_", "azaz09_"},
+ {"longStrinGwitHmixofsmaLLandcAps", "longstringwithmixofsmallandcaps"},
+ {"LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS", "long\u0250string\u0250with\u0250nonascii\u0250chars"},
{"\u2C6D\u2C6D\u2C6D\u2C6D\u2C6D", "\u0251\u0251\u0251\u0251\u0251"}, // shrinks one byte per char
}
@@ -652,6 +660,32 @@ func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTest
func TestToLower(t *testing.T) { runStringTests(t, ToLower, "ToLower", lowerTests) }
+func BenchmarkToUpper(b *testing.B) {
+ for _, tc := range upperTests {
+ b.Run(tc.in, func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ actual := ToUpper(tc.in)
+ if actual != tc.out {
+ b.Errorf("ToUpper(%q) = %q; want %q", tc.in, actual, tc.out)
+ }
+ }
+ })
+ }
+}
+
+func BenchmarkToLower(b *testing.B) {
+ for _, tc := range lowerTests {
+ b.Run(tc.in, func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ actual := ToLower(tc.in)
+ if actual != tc.out {
+ b.Errorf("ToLower(%q) = %q; want %q", tc.in, actual, tc.out)
+ }
+ }
+ })
+ }
+}
+
func BenchmarkMapNoChanges(b *testing.B) {
identity := func(r rune) rune {
return r
@@ -1614,3 +1648,15 @@ func BenchmarkTrimASCII(b *testing.B) {
}
}
}
+
+func BenchmarkIndexPeriodic(b *testing.B) {
+ key := "aa"
+ for _, skip := range [...]int{2, 4, 8, 16, 32, 64} {
+ b.Run(fmt.Sprintf("IndexPeriodic%d", skip), func(b *testing.B) {
+ s := Repeat("a"+Repeat(" ", skip-1), 1<<16/skip)
+ for i := 0; i < b.N; i++ {
+ Index(s, key)
+ }
+ })
+ }
+}