diff options
Diffstat (limited to 'libgo/go/strings')
-rw-r--r-- | libgo/go/strings/builder.go | 120 | ||||
-rw-r--r-- | libgo/go/strings/builder_test.go | 282 | ||||
-rw-r--r-- | libgo/go/strings/example_test.go | 124 | ||||
-rw-r--r-- | libgo/go/strings/strings.go | 255 | ||||
-rw-r--r-- | libgo/go/strings/strings_amd64.go | 20 | ||||
-rw-r--r-- | libgo/go/strings/strings_generic.go | 38 | ||||
-rw-r--r-- | libgo/go/strings/strings_s390x.go | 20 | ||||
-rw-r--r-- | libgo/go/strings/strings_test.go | 46 |
8 files changed, 711 insertions, 194 deletions
diff --git a/libgo/go/strings/builder.go b/libgo/go/strings/builder.go new file mode 100644 index 0000000..594f3db --- /dev/null +++ b/libgo/go/strings/builder.go @@ -0,0 +1,120 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package strings + +import ( + "errors" + "io" + "unicode/utf8" + "unsafe" +) + +// A Builder is used to efficiently build a string using Write methods. +// It minimizes memory copying. The zero value is ready to use. +type Builder struct { + buf []byte +} + +// String returns the accumulated string. +func (b *Builder) String() string { + return *(*string)(unsafe.Pointer(&b.buf)) +} + +// Len returns the number of accumulated bytes; b.Len() == len(b.String()). +func (b *Builder) Len() int { return len(b.buf) } + +// Reset resets the Builder to be empty. +func (b *Builder) Reset() { b.buf = nil } + +const maxInt = int(^uint(0) >> 1) + +// grow copies the buffer to a new, larger buffer so that there are at least n +// bytes of capacity beyond len(b.buf). +func (b *Builder) grow(n int) { + buf := make([]byte, len(b.buf), 2*cap(b.buf)+n) + copy(buf, b.buf) + b.buf = buf +} + +// Grow grows b's capacity, if necessary, to guarantee space for +// another n bytes. After Grow(n), at least n bytes can be written to b +// without another allocation. If n is negative, Grow panics. +func (b *Builder) Grow(n int) { + if n < 0 { + panic("strings.Builder.Grow: negative count") + } + if cap(b.buf)-len(b.buf) < n { + b.grow(n) + } +} + +// Write appends the contents of p to b's buffer. +// Write always returns len(p), nil. +func (b *Builder) Write(p []byte) (int, error) { + b.buf = append(b.buf, p...) + return len(p), nil +} + +// WriteByte appends the byte c to b's buffer. +// The returned error is always nil. +func (b *Builder) WriteByte(c byte) error { + b.buf = append(b.buf, c) + return nil +} + +// WriteRune appends the UTF-8 encoding of Unicode code point r to b's buffer. +// It returns the length of r and a nil error. +func (b *Builder) WriteRune(r rune) (int, error) { + if r < utf8.RuneSelf { + b.buf = append(b.buf, byte(r)) + return 1, nil + } + l := len(b.buf) + if cap(b.buf)-l < utf8.UTFMax { + b.grow(utf8.UTFMax) + } + n := utf8.EncodeRune(b.buf[l:l+utf8.UTFMax], r) + b.buf = b.buf[:l+n] + return n, nil +} + +// WriteString appends the contents of s to b's buffer. +// It returns the length of s and a nil error. +func (b *Builder) WriteString(s string) (int, error) { + b.buf = append(b.buf, s...) + return len(s), nil +} + +// minRead is the minimum slice passed to a Read call by Builder.ReadFrom. +// It is the same as bytes.MinRead. +const minRead = 512 + +// errNegativeRead is the panic value if the reader passed to Builder.ReadFrom +// returns a negative count. +var errNegativeRead = errors.New("strings.Builder: reader returned negative count from Read") + +// ReadFrom reads data from r until EOF and appends it to b's buffer. +// The return value n is the number of bytes read. +// Any error except io.EOF encountered during the read is also returned. +func (b *Builder) ReadFrom(r io.Reader) (n int64, err error) { + for { + l := len(b.buf) + if cap(b.buf)-l < minRead { + b.grow(minRead) + } + m, e := r.Read(b.buf[l:cap(b.buf)]) + if m < 0 { + panic(errNegativeRead) + } + b.buf = b.buf[:l+m] + n += int64(m) + if e == io.EOF { + return n, nil + } + if e != nil { + return n, e + } + } +} diff --git a/libgo/go/strings/builder_test.go b/libgo/go/strings/builder_test.go new file mode 100644 index 0000000..df55708 --- /dev/null +++ b/libgo/go/strings/builder_test.go @@ -0,0 +1,282 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package strings_test + +import ( + "bytes" + "errors" + "io" + "runtime" + . "strings" + "testing" + "testing/iotest" +) + +func check(t *testing.T, b *Builder, want string) { + t.Helper() + got := b.String() + if got != want { + t.Errorf("String: got %#q; want %#q", got, want) + return + } + if n := b.Len(); n != len(got) { + t.Errorf("Len: got %d; but len(String()) is %d", n, len(got)) + } +} + +func TestBuilder(t *testing.T) { + var b Builder + check(t, &b, "") + n, err := b.WriteString("hello") + if err != nil || n != 5 { + t.Errorf("WriteString: got %d,%s; want 5,nil", n, err) + } + check(t, &b, "hello") + if err = b.WriteByte(' '); err != nil { + t.Errorf("WriteByte: %s", err) + } + check(t, &b, "hello ") + n, err = b.WriteString("world") + if err != nil || n != 5 { + t.Errorf("WriteString: got %d,%s; want 5,nil", n, err) + } + check(t, &b, "hello world") +} + +func TestBuilderString(t *testing.T) { + var b Builder + b.WriteString("alpha") + check(t, &b, "alpha") + s1 := b.String() + b.WriteString("beta") + check(t, &b, "alphabeta") + s2 := b.String() + b.WriteString("gamma") + check(t, &b, "alphabetagamma") + s3 := b.String() + + // Check that subsequent operations didn't change the returned strings. + if want := "alpha"; s1 != want { + t.Errorf("first String result is now %q; want %q", s1, want) + } + if want := "alphabeta"; s2 != want { + t.Errorf("second String result is now %q; want %q", s2, want) + } + if want := "alphabetagamma"; s3 != want { + t.Errorf("third String result is now %q; want %q", s3, want) + } +} + +func TestBuilderReset(t *testing.T) { + var b Builder + check(t, &b, "") + b.WriteString("aaa") + s := b.String() + check(t, &b, "aaa") + b.Reset() + check(t, &b, "") + + // Ensure that writing after Reset doesn't alter + // previously returned strings. + b.WriteString("bbb") + check(t, &b, "bbb") + if want := "aaa"; s != want { + t.Errorf("previous String result changed after Reset: got %q; want %q", s, want) + } +} + +func TestBuilderGrow(t *testing.T) { + for _, growLen := range []int{0, 100, 1000, 10000, 100000} { + var b Builder + b.Grow(growLen) + p := bytes.Repeat([]byte{'a'}, growLen) + allocs := numAllocs(func() { b.Write(p) }) + if allocs > 0 { + t.Errorf("growLen=%d: allocation occurred during write", growLen) + } + if b.String() != string(p) { + t.Errorf("growLen=%d: bad data written after Grow", growLen) + } + } +} + +func TestBuilderWrite2(t *testing.T) { + const s0 = "hello 世界" + for _, tt := range []struct { + name string + fn func(b *Builder) (int, error) + n int + want string + }{ + { + "Write", + func(b *Builder) (int, error) { return b.Write([]byte(s0)) }, + len(s0), + s0, + }, + { + "WriteRune", + func(b *Builder) (int, error) { return b.WriteRune('a') }, + 1, + "a", + }, + { + "WriteRuneWide", + func(b *Builder) (int, error) { return b.WriteRune('世') }, + 3, + "世", + }, + { + "WriteString", + func(b *Builder) (int, error) { return b.WriteString(s0) }, + len(s0), + s0, + }, + } { + t.Run(tt.name, func(t *testing.T) { + var b Builder + n, err := tt.fn(&b) + if err != nil { + t.Fatalf("first call: got %s", err) + } + if n != tt.n { + t.Errorf("first call: got n=%d; want %d", n, tt.n) + } + check(t, &b, tt.want) + + n, err = tt.fn(&b) + if err != nil { + t.Fatalf("second call: got %s", err) + } + if n != tt.n { + t.Errorf("second call: got n=%d; want %d", n, tt.n) + } + check(t, &b, tt.want+tt.want) + }) + } +} + +func TestBuilderWriteByte(t *testing.T) { + var b Builder + if err := b.WriteByte('a'); err != nil { + t.Error(err) + } + if err := b.WriteByte(0); err != nil { + t.Error(err) + } + check(t, &b, "a\x00") +} + +func TestBuilderReadFrom(t *testing.T) { + for _, tt := range []struct { + name string + fn func(io.Reader) io.Reader + }{ + {"Reader", func(r io.Reader) io.Reader { return r }}, + {"DataErrReader", iotest.DataErrReader}, + {"OneByteReader", iotest.OneByteReader}, + } { + t.Run(tt.name, func(t *testing.T) { + var b Builder + + r := tt.fn(NewReader("hello")) + n, err := b.ReadFrom(r) + if err != nil { + t.Fatalf("first call: got %s", err) + } + if n != 5 { + t.Errorf("first call: got n=%d; want 5", n) + } + check(t, &b, "hello") + + r = tt.fn(NewReader(" world")) + n, err = b.ReadFrom(r) + if err != nil { + t.Fatalf("first call: got %s", err) + } + if n != 6 { + t.Errorf("first call: got n=%d; want 6", n) + } + check(t, &b, "hello world") + }) + } +} + +var errRead = errors.New("boom") + +// errorReader sends reads to the underlying reader +// but returns errRead instead of io.EOF. +type errorReader struct { + r io.Reader +} + +func (r errorReader) Read(b []byte) (int, error) { + n, err := r.r.Read(b) + if err == io.EOF { + err = errRead + } + return n, err +} + +func TestBuilderReadFromError(t *testing.T) { + var b Builder + r := errorReader{NewReader("hello")} + n, err := b.ReadFrom(r) + if n != 5 { + t.Errorf("got n=%d; want 5", n) + } + if err != errRead { + t.Errorf("got err=%q; want %q", err, errRead) + } + check(t, &b, "hello") +} + +type negativeReader struct{} + +func (r negativeReader) Read([]byte) (int, error) { return -1, nil } + +func TestBuilderReadFromNegativeReader(t *testing.T) { + var b Builder + defer func() { + switch err := recover().(type) { + case nil: + t.Fatal("ReadFrom didn't panic") + case error: + wantErr := "strings.Builder: reader returned negative count from Read" + if err.Error() != wantErr { + t.Fatalf("recovered panic: got %v; want %v", err.Error(), wantErr) + } + default: + t.Fatalf("unexpected panic value: %#v", err) + } + }() + + b.ReadFrom(negativeReader{}) +} + +func TestBuilderAllocs(t *testing.T) { + var b Builder + b.Grow(5) + var s string + allocs := numAllocs(func() { + b.WriteString("hello") + s = b.String() + }) + if want := "hello"; s != want { + t.Errorf("String: got %#q; want %#q", s, want) + } + if allocs > 0 { + t.Fatalf("got %d alloc(s); want 0", allocs) + } +} + +func numAllocs(fn func()) uint64 { + defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1)) + var m1, m2 runtime.MemStats + runtime.ReadMemStats(&m1) + fn() + runtime.ReadMemStats(&m2) + return m2.Mallocs - m1.Mallocs +} diff --git a/libgo/go/strings/example_test.go b/libgo/go/strings/example_test.go index e962152..607e4a0 100644 --- a/libgo/go/strings/example_test.go +++ b/libgo/go/strings/example_test.go @@ -166,6 +166,26 @@ func ExampleLastIndexAny() { // -1 } +func ExampleLastIndexByte() { + fmt.Println(strings.LastIndexByte("Hello, world", 'l')) + fmt.Println(strings.LastIndexByte("Hello, world", 'o')) + fmt.Println(strings.LastIndexByte("Hello, world", 'x')) + // Output: + // 10 + // 8 + // -1 +} + +func ExampleLastIndexFunc() { + fmt.Println(strings.LastIndexFunc("go 123", unicode.IsNumber)) + fmt.Println(strings.LastIndexFunc("123 go", unicode.IsNumber)) + fmt.Println(strings.LastIndexFunc("go", unicode.IsNumber)) + // Output: + // 5 + // 2 + // -1 +} + func ExampleJoin() { s := []string{"foo", "bar", "baz"} fmt.Println(strings.Join(s, ", ")) @@ -229,17 +249,10 @@ func ExampleToTitle() { // ХЛЕБ } -func ExampleTrim() { - fmt.Printf("[%q]", strings.Trim(" !!! Achtung! Achtung! !!! ", "! ")) - // Output: ["Achtung! Achtung"] -} - -func ExampleTrimFunc() { - f := func(c rune) bool { - return !unicode.IsLetter(c) && !unicode.IsNumber(c) - } - fmt.Printf("[%q]", strings.TrimFunc(" Achtung1! Achtung2,...", f)) - // Output: ["Achtung1! Achtung2"] +func ExampleToTitleSpecial() { + fmt.Println(strings.ToTitleSpecial(unicode.TurkishCase, "dünyanın ilk borsa yapısı Aizonai kabul edilir")) + // Output: + // DÜNYANIN İLK BORSA YAPISI AİZONAİ KABUL EDİLİR } func ExampleMap() { @@ -256,11 +269,6 @@ func ExampleMap() { // Output: 'Gjnf oevyyvt naq gur fyvgul tbcure... } -func ExampleTrimSpace() { - fmt.Println(strings.TrimSpace(" \t\n a lone gopher \n\t\r\n")) - // Output: a lone gopher -} - func ExampleNewReplacer() { r := strings.NewReplacer("<", "<", ">", ">") fmt.Println(r.Replace("This is <b>HTML</b>!")) @@ -272,23 +280,85 @@ func ExampleToUpper() { // Output: GOPHER } +func ExampleToUpperSpecial() { + fmt.Println(strings.ToUpperSpecial(unicode.TurkishCase, "örnek iş")) + // Output: ÖRNEK İŞ +} + func ExampleToLower() { fmt.Println(strings.ToLower("Gopher")) // Output: gopher } -func ExampleTrimSuffix() { - var s = "Hello, goodbye, etc!" - s = strings.TrimSuffix(s, "goodbye, etc!") - s = strings.TrimSuffix(s, "planet") - fmt.Print(s, "world!") - // Output: Hello, world! +func ExampleToLowerSpecial() { + fmt.Println(strings.ToLowerSpecial(unicode.TurkishCase, "Önnek İş")) + // Output: önnek iş +} + +func ExampleTrim() { + fmt.Print(strings.Trim("¡¡¡Hello, Gophers!!!", "!¡")) + // Output: Hello, Gophers +} + +func ExampleTrimSpace() { + fmt.Println(strings.TrimSpace(" \t\n Hello, Gophers \n\t\r\n")) + // Output: Hello, Gophers } func ExampleTrimPrefix() { - var s = "Goodbye,, world!" - s = strings.TrimPrefix(s, "Goodbye,") - s = strings.TrimPrefix(s, "Howdy,") - fmt.Print("Hello" + s) - // Output: Hello, world! + var s = "¡¡¡Hello, Gophers!!!" + s = strings.TrimPrefix(s, "¡¡¡Hello, ") + s = strings.TrimPrefix(s, "¡¡¡Howdy, ") + fmt.Print(s) + // Output: Gophers!!! +} + +func ExampleTrimSuffix() { + var s = "¡¡¡Hello, Gophers!!!" + s = strings.TrimSuffix(s, ", Gophers!!!") + s = strings.TrimSuffix(s, ", Marmots!!!") + fmt.Print(s) + // Output: ¡¡¡Hello +} + +func ExampleTrimFunc() { + fmt.Print(strings.TrimFunc("¡¡¡Hello, Gophers!!!", func(r rune) bool { + return !unicode.IsLetter(r) && !unicode.IsNumber(r) + })) + // Output: Hello, Gophers +} + +func ExampleTrimLeft() { + fmt.Print(strings.TrimLeft("¡¡¡Hello, Gophers!!!", "!¡")) + // Output: Hello, Gophers!!! +} + +func ExampleTrimLeftFunc() { + fmt.Print(strings.TrimLeftFunc("¡¡¡Hello, Gophers!!!", func(r rune) bool { + return !unicode.IsLetter(r) && !unicode.IsNumber(r) + })) + // Output: Hello, Gophers!!! +} + +func ExampleTrimRight() { + fmt.Print(strings.TrimRight("¡¡¡Hello, Gophers!!!", "!¡")) + // Output: ¡¡¡Hello, Gophers +} + +func ExampleTrimRightFunc() { + fmt.Print(strings.TrimRightFunc("¡¡¡Hello, Gophers!!!", func(r rune) bool { + return !unicode.IsLetter(r) && !unicode.IsNumber(r) + })) + // Output: ¡¡¡Hello, Gophers +} + +func ExampleBuilder() { + var b strings.Builder + for i := 3; i >= 1; i-- { + fmt.Fprintf(&b, "%d...", i) + } + b.WriteString("ignition") + fmt.Println(b.String()) + + // Output: 3...2...1...ignition } diff --git a/libgo/go/strings/strings.go b/libgo/go/strings/strings.go index 0c836c0..02c0320 100644 --- a/libgo/go/strings/strings.go +++ b/libgo/go/strings/strings.go @@ -166,22 +166,24 @@ func IndexRune(s string, r rune) int { // IndexAny returns the index of the first instance of any Unicode code point // from chars in s, or -1 if no Unicode code point from chars is present in s. func IndexAny(s, chars string) int { - if len(chars) > 0 { - if len(s) > 8 { - if as, isASCII := makeASCIISet(chars); isASCII { - for i := 0; i < len(s); i++ { - if as.contains(s[i]) { - return i - } + if chars == "" { + // Avoid scanning all of s. + return -1 + } + if len(s) > 8 { + if as, isASCII := makeASCIISet(chars); isASCII { + for i := 0; i < len(s); i++ { + if as.contains(s[i]) { + return i } - return -1 } + return -1 } - for i, c := range s { - for _, m := range chars { - if c == m { - return i - } + } + for i, c := range s { + for _, m := range chars { + if c == m { + return i } } } @@ -192,24 +194,26 @@ func IndexAny(s, chars string) int { // point from chars in s, or -1 if no Unicode code point from chars is // present in s. func LastIndexAny(s, chars string) int { - if len(chars) > 0 { - if len(s) > 8 { - if as, isASCII := makeASCIISet(chars); isASCII { - for i := len(s) - 1; i >= 0; i-- { - if as.contains(s[i]) { - return i - } + if chars == "" { + // Avoid scanning all of s. + return -1 + } + if len(s) > 8 { + if as, isASCII := makeASCIISet(chars); isASCII { + for i := len(s) - 1; i >= 0; i-- { + if as.contains(s[i]) { + return i } - return -1 } + return -1 } - for i := len(s); i > 0; { - r, size := utf8.DecodeLastRuneInString(s[:i]) - i -= size - for _, c := range chars { - if r == c { - return i - } + } + for i := len(s); i > 0; { + r, size := utf8.DecodeLastRuneInString(s[:i]) + i -= size + for _, c := range chars { + if r == c { + return i } } } @@ -310,8 +314,8 @@ func SplitAfter(s, sep string) []string { var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1} // Fields splits the string s around each instance of one or more consecutive white space -// characters, as defined by unicode.IsSpace, returning an array of substrings of s or an -// empty list if s contains only white space. +// characters, as defined by unicode.IsSpace, returning a slice of substrings of s or an +// empty slice if s contains only white space. func Fields(s string) []string { // First count the fields. // This is an exact count if s is ASCII, otherwise it is an approximation. @@ -358,67 +362,7 @@ func Fields(s string) []string { } // Some runes in the input string are not ASCII. - // Same general approach as in the ASCII path but - // uses DecodeRuneInString and unicode.IsSpace if - // a non-ASCII rune needs to be decoded and checked - // if it corresponds to a space. - a := make([]string, 0, n) - fieldStart := 0 - i := 0 - // Skip spaces in the front of the input. - for i < len(s) { - if c := s[i]; c < utf8.RuneSelf { - if asciiSpace[c] == 0 { - break - } - i++ - } else { - r, w := utf8.DecodeRuneInString(s[i:]) - if !unicode.IsSpace(r) { - break - } - i += w - } - } - fieldStart = i - for i < len(s) { - if c := s[i]; c < utf8.RuneSelf { - if asciiSpace[c] == 0 { - i++ - continue - } - a = append(a, s[fieldStart:i]) - i++ - } else { - r, w := utf8.DecodeRuneInString(s[i:]) - if !unicode.IsSpace(r) { - i += w - continue - } - a = append(a, s[fieldStart:i]) - i += w - } - // Skip spaces in between fields. - for i < len(s) { - if c := s[i]; c < utf8.RuneSelf { - if asciiSpace[c] == 0 { - break - } - i++ - } else { - r, w := utf8.DecodeRuneInString(s[i:]) - if !unicode.IsSpace(r) { - break - } - i += w - } - } - fieldStart = i - } - if fieldStart < len(s) { // Last field might end at EOF. - a = append(a, s[fieldStart:]) - } - return a + return FieldsFunc(s, unicode.IsSpace) } // FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c) @@ -427,35 +371,42 @@ func Fields(s string) []string { // FieldsFunc makes no guarantees about the order in which it calls f(c). // If f does not return consistent results for a given c, FieldsFunc may crash. func FieldsFunc(s string, f func(rune) bool) []string { - // First count the fields. - n := 0 - inField := false - for _, rune := range s { - wasInField := inField - inField = !f(rune) - if inField && !wasInField { - n++ - } + // A span is used to record a slice of s of the form s[start:end]. + // The start index is inclusive and the end index is exclusive. + type span struct { + start int + end int } + spans := make([]span, 0, 32) - // Now create them. - a := make([]string, n) - na := 0 - fieldStart := -1 // Set to -1 when looking for start of field. + // Find the field start and end indices. + wasField := false + fromIndex := 0 for i, rune := range s { if f(rune) { - if fieldStart >= 0 { - a[na] = s[fieldStart:i] - na++ - fieldStart = -1 + if wasField { + spans = append(spans, span{start: fromIndex, end: i}) + wasField = false + } + } else { + if !wasField { + fromIndex = i + wasField = true } - } else if fieldStart == -1 { - fieldStart = i } } - if fieldStart >= 0 { // Last field might end at EOF. - a[na] = s[fieldStart:] + + // Last field might end at EOF. + if wasField { + spans = append(spans, span{fromIndex, len(s)}) + } + + // Create strings from recorded field indices. + a := make([]string, len(spans)) + for i, span := range spans { + a[i] = s[span.start:span.end] } + return a } @@ -599,10 +550,62 @@ func Repeat(s string, count int) string { } // ToUpper returns a copy of the string s with all Unicode letters mapped to their upper case. -func ToUpper(s string) string { return Map(unicode.ToUpper, s) } +func ToUpper(s string) string { + isASCII, hasLower := true, false + for i := 0; i < len(s); i++ { + c := s[i] + if c >= utf8.RuneSelf { + isASCII = false + break + } + hasLower = hasLower || (c >= 'a' && c <= 'z') + } + + if isASCII { // optimize for ASCII-only strings. + if !hasLower { + return s + } + b := make([]byte, len(s)) + for i := 0; i < len(s); i++ { + c := s[i] + if c >= 'a' && c <= 'z' { + c -= 'a' - 'A' + } + b[i] = c + } + return string(b) + } + return Map(unicode.ToUpper, s) +} // ToLower returns a copy of the string s with all Unicode letters mapped to their lower case. -func ToLower(s string) string { return Map(unicode.ToLower, s) } +func ToLower(s string) string { + isASCII, hasUpper := true, false + for i := 0; i < len(s); i++ { + c := s[i] + if c >= utf8.RuneSelf { + isASCII = false + break + } + hasUpper = hasUpper || (c >= 'A' && c <= 'Z') + } + + if isASCII { // optimize for ASCII-only strings. + if !hasUpper { + return s + } + b := make([]byte, len(s)) + for i := 0; i < len(s); i++ { + c := s[i] + if c >= 'A' && c <= 'Z' { + c += 'a' - 'A' + } + b[i] = c + } + return string(b) + } + return Map(unicode.ToLower, s) +} // ToTitle returns a copy of the string s with all Unicode letters mapped to their title case. func ToTitle(s string) string { return Map(unicode.ToTitle, s) } @@ -923,3 +926,27 @@ func EqualFold(s, t string) bool { // One string is empty. Are both? return s == t } + +func indexRabinKarp(s, substr string) int { + // Rabin-Karp search + hashss, pow := hashStr(substr) + n := len(substr) + var h uint32 + for i := 0; i < n; i++ { + h = h*primeRK + uint32(s[i]) + } + if h == hashss && s[:n] == substr { + return 0 + } + for i := n; i < len(s); { + h *= primeRK + h += uint32(s[i]) + h -= pow * uint32(s[i-n]) + i++ + if h == hashss && s[i-n:i] == substr { + return i - n + } + } + return -1 + +} diff --git a/libgo/go/strings/strings_amd64.go b/libgo/go/strings/strings_amd64.go index 9648912..2441569 100644 --- a/libgo/go/strings/strings_amd64.go +++ b/libgo/go/strings/strings_amd64.go @@ -77,25 +77,7 @@ func Index(s, substr string) int { } return -1 } - // Rabin-Karp search - hashss, pow := hashStr(substr) - var h uint32 - for i := 0; i < n; i++ { - h = h*primeRK + uint32(s[i]) - } - if h == hashss && s[:n] == substr { - return 0 - } - for i := n; i < len(s); { - h *= primeRK - h += uint32(s[i]) - h -= pow * uint32(s[i-n]) - i++ - if h == hashss && s[i-n:i] == substr { - return i - n - } - } - return -1 + return indexRabinKarp(s, substr) } // Count counts the number of non-overlapping instances of substr in s. diff --git a/libgo/go/strings/strings_generic.go b/libgo/go/strings/strings_generic.go index 9844201d..60a6f78 100644 --- a/libgo/go/strings/strings_generic.go +++ b/libgo/go/strings/strings_generic.go @@ -25,22 +25,30 @@ func Index(s, substr string) int { case n > len(s): return -1 } - // Rabin-Karp search - hashss, pow := hashStr(substr) - var h uint32 - for i := 0; i < n; i++ { - h = h*primeRK + uint32(s[i]) - } - if h == hashss && s[:n] == substr { - return 0 - } - for i := n; i < len(s); { - h *= primeRK - h += uint32(s[i]) - h -= pow * uint32(s[i-n]) + c := substr[0] + i := 0 + t := s[:len(s)-n+1] + fails := 0 + for i < len(t) { + if t[i] != c { + o := IndexByte(t[i:], c) + if o < 0 { + return -1 + } + i += o + } + if s[i:i+n] == substr { + return i + } i++ - if h == hashss && s[i-n:i] == substr { - return i - n + fails++ + if fails >= 4+i>>4 && i < len(t) { + // See comment in ../bytes/bytes_generic.go. + j := indexRabinKarp(s[i:], substr) + if j < 0 { + return -1 + } + return i + j } } return -1 diff --git a/libgo/go/strings/strings_s390x.go b/libgo/go/strings/strings_s390x.go index b05fb2b..e74e4cd 100644 --- a/libgo/go/strings/strings_s390x.go +++ b/libgo/go/strings/strings_s390x.go @@ -78,25 +78,7 @@ func Index(s, substr string) int { } return -1 } - // Rabin-Karp search - hashss, pow := hashStr(substr) - var h uint32 - for i := 0; i < n; i++ { - h = h*primeRK + uint32(s[i]) - } - if h == hashss && s[:n] == substr { - return 0 - } - for i := n; i < len(s); { - h *= primeRK - h += uint32(s[i]) - h -= pow * uint32(s[i-n]) - i++ - if h == hashss && s[i-n:i] == substr { - return i - n - } - } - return -1 + return indexRabinKarp(s, substr) } // Count counts the number of non-overlapping instances of substr in s. diff --git a/libgo/go/strings/strings_test.go b/libgo/go/strings/strings_test.go index 0fddaf0..f2399f4 100644 --- a/libgo/go/strings/strings_test.go +++ b/libgo/go/strings/strings_test.go @@ -126,6 +126,9 @@ var indexTests = []IndexTest{ {"xx012345678901234567890123456789012345678901234567890123456789012"[:41], "0123456789012345678901234567890123456789", -1}, {"xx012345678901234567890123456789012345678901234567890123456789012", "0123456789012345678901234567890123456xxx", -1}, {"xx0123456789012345678901234567890123456789012345678901234567890120123456789012345678901234567890123456xxx", "0123456789012345678901234567890123456xxx", 65}, + // test fallback to Rabin-Karp. + {"oxoxoxoxoxoxoxoxoxoxoxoy", "oy", 22}, + {"oxoxoxoxoxoxoxoxoxoxoxox", "oy", -1}, } var lastIndexTests = []IndexTest{ @@ -522,9 +525,12 @@ func runStringTests(t *testing.T, f func(string) string, funcName string, testCa var upperTests = []StringTest{ {"", ""}, + {"ONLYUPPER", "ONLYUPPER"}, {"abc", "ABC"}, {"AbC123", "ABC123"}, {"azAZ09_", "AZAZ09_"}, + {"longStrinGwitHmixofsmaLLandcAps", "LONGSTRINGWITHMIXOFSMALLANDCAPS"}, + {"long\u0250string\u0250with\u0250nonascii\u2C6Fchars", "LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS"}, {"\u0250\u0250\u0250\u0250\u0250", "\u2C6F\u2C6F\u2C6F\u2C6F\u2C6F"}, // grows one byte per char } @@ -533,6 +539,8 @@ var lowerTests = []StringTest{ {"abc", "abc"}, {"AbC123", "abc123"}, {"azAZ09_", "azaz09_"}, + {"longStrinGwitHmixofsmaLLandcAps", "longstringwithmixofsmallandcaps"}, + {"LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS", "long\u0250string\u0250with\u0250nonascii\u0250chars"}, {"\u2C6D\u2C6D\u2C6D\u2C6D\u2C6D", "\u0251\u0251\u0251\u0251\u0251"}, // shrinks one byte per char } @@ -652,6 +660,32 @@ func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTest func TestToLower(t *testing.T) { runStringTests(t, ToLower, "ToLower", lowerTests) } +func BenchmarkToUpper(b *testing.B) { + for _, tc := range upperTests { + b.Run(tc.in, func(b *testing.B) { + for i := 0; i < b.N; i++ { + actual := ToUpper(tc.in) + if actual != tc.out { + b.Errorf("ToUpper(%q) = %q; want %q", tc.in, actual, tc.out) + } + } + }) + } +} + +func BenchmarkToLower(b *testing.B) { + for _, tc := range lowerTests { + b.Run(tc.in, func(b *testing.B) { + for i := 0; i < b.N; i++ { + actual := ToLower(tc.in) + if actual != tc.out { + b.Errorf("ToLower(%q) = %q; want %q", tc.in, actual, tc.out) + } + } + }) + } +} + func BenchmarkMapNoChanges(b *testing.B) { identity := func(r rune) rune { return r @@ -1614,3 +1648,15 @@ func BenchmarkTrimASCII(b *testing.B) { } } } + +func BenchmarkIndexPeriodic(b *testing.B) { + key := "aa" + for _, skip := range [...]int{2, 4, 8, 16, 32, 64} { + b.Run(fmt.Sprintf("IndexPeriodic%d", skip), func(b *testing.B) { + s := Repeat("a"+Repeat(" ", skip-1), 1<<16/skip) + for i := 0; i < b.N; i++ { + Index(s, key) + } + }) + } +} |