diff options
author | Ian Lance Taylor <iant@golang.org> | 2017-01-14 00:05:42 +0000 |
---|---|---|
committer | Ian Lance Taylor <ian@gcc.gnu.org> | 2017-01-14 00:05:42 +0000 |
commit | c2047754c300b68c05d65faa8dc2925fe67b71b4 (patch) | |
tree | e183ae81a1f48a02945cb6de463a70c5be1b06f6 /libgo/go/bytes | |
parent | 829afb8f05602bb31c9c597b24df7377fed4f059 (diff) | |
download | gcc-c2047754c300b68c05d65faa8dc2925fe67b71b4.zip gcc-c2047754c300b68c05d65faa8dc2925fe67b71b4.tar.gz gcc-c2047754c300b68c05d65faa8dc2925fe67b71b4.tar.bz2 |
libgo: update to Go 1.8 release candidate 1
Compiler changes:
* Change map assignment to use mapassign and assign value directly.
* Change string iteration to use decoderune, faster for ASCII strings.
* Change makeslice to take int, and use makeslice64 for larger values.
* Add new noverflow field to hmap struct used for maps.
Unresolved problems, to be fixed later:
* Commented out test in go/types/sizes_test.go that doesn't compile.
* Commented out reflect.TestStructOf test for padding after zero-sized field.
Reviewed-on: https://go-review.googlesource.com/35231
gotools/:
Updates for Go 1.8rc1.
* Makefile.am (go_cmd_go_files): Add bug.go.
(s-zdefaultcc): Write defaultPkgConfig.
* Makefile.in: Rebuild.
From-SVN: r244456
Diffstat (limited to 'libgo/go/bytes')
-rw-r--r-- | libgo/go/bytes/buffer.go | 43 | ||||
-rw-r--r-- | libgo/go/bytes/buffer_test.go | 13 | ||||
-rw-r--r-- | libgo/go/bytes/bytes.go | 146 | ||||
-rw-r--r-- | libgo/go/bytes/bytes_amd64.go | 117 | ||||
-rw-r--r-- | libgo/go/bytes/bytes_generic.go | 41 | ||||
-rw-r--r-- | libgo/go/bytes/bytes_s390x.go | 120 | ||||
-rw-r--r-- | libgo/go/bytes/bytes_test.go | 194 | ||||
-rw-r--r-- | libgo/go/bytes/example_test.go | 203 |
8 files changed, 794 insertions, 83 deletions
diff --git a/libgo/go/bytes/buffer.go b/libgo/go/bytes/buffer.go index 9154a1b..196419d 100644 --- a/libgo/go/bytes/buffer.go +++ b/libgo/go/bytes/buffer.go @@ -15,22 +15,25 @@ import ( // A Buffer is a variable-sized buffer of bytes with Read and Write methods. // The zero value for Buffer is an empty buffer ready to use. type Buffer struct { - buf []byte // contents are the bytes buf[off : len(buf)] - off int // read at &buf[off], write at &buf[len(buf)] - runeBytes [utf8.UTFMax]byte // avoid allocation of slice on each call to WriteRune - bootstrap [64]byte // memory to hold first slice; helps small buffers avoid allocation. - lastRead readOp // last read operation, so that Unread* can work correctly. + buf []byte // contents are the bytes buf[off : len(buf)] + off int // read at &buf[off], write at &buf[len(buf)] + bootstrap [64]byte // memory to hold first slice; helps small buffers avoid allocation. + lastRead readOp // last read operation, so that Unread* can work correctly. } // The readOp constants describe the last action performed on -// the buffer, so that UnreadRune and UnreadByte can -// check for invalid usage. +// the buffer, so that UnreadRune and UnreadByte can check for +// invalid usage. opReadRuneX constants are chosen such that +// converted to int they correspond to the rune size that was read. type readOp int const ( - opInvalid readOp = iota // Non-read operation. - opReadRune // Read rune. - opRead // Any other read operation. + opRead readOp = -1 // Any other read operation. + opInvalid = 0 // Non-read operation. + opReadRune1 = 1 // Read rune of size 1. + opReadRune2 = 2 // Read rune of size 2. + opReadRune3 = 3 // Read rune of size 3. + opReadRune4 = 4 // Read rune of size 4. ) // ErrTooLarge is passed to panic if memory cannot be allocated to store data in a buffer. @@ -246,8 +249,10 @@ func (b *Buffer) WriteRune(r rune) (n int, err error) { b.WriteByte(byte(r)) return 1, nil } - n = utf8.EncodeRune(b.runeBytes[0:], r) - b.Write(b.runeBytes[0:n]) + b.lastRead = opInvalid + m := b.grow(utf8.UTFMax) + n = utf8.EncodeRune(b.buf[m:m+utf8.UTFMax], r) + b.buf = b.buf[:m+n] return n, nil } @@ -318,14 +323,15 @@ func (b *Buffer) ReadRune() (r rune, size int, err error) { b.Truncate(0) return 0, 0, io.EOF } - b.lastRead = opReadRune c := b.buf[b.off] if c < utf8.RuneSelf { b.off++ + b.lastRead = opReadRune1 return rune(c), 1, nil } r, n := utf8.DecodeRune(b.buf[b.off:]) b.off += n + b.lastRead = readOp(n) return r, n, nil } @@ -335,14 +341,13 @@ func (b *Buffer) ReadRune() (r rune, size int, err error) { // it is stricter than UnreadByte, which will unread the last byte // from any read operation.) func (b *Buffer) UnreadRune() error { - if b.lastRead != opReadRune { + if b.lastRead <= opInvalid { return errors.New("bytes.Buffer: UnreadRune: previous operation was not ReadRune") } - b.lastRead = opInvalid - if b.off > 0 { - _, n := utf8.DecodeLastRune(b.buf[0:b.off]) - b.off -= n + if b.off >= int(b.lastRead) { + b.off -= int(b.lastRead) } + b.lastRead = opInvalid return nil } @@ -350,7 +355,7 @@ func (b *Buffer) UnreadRune() error { // read operation. If write has happened since the last read, UnreadByte // returns an error. func (b *Buffer) UnreadByte() error { - if b.lastRead != opReadRune && b.lastRead != opRead { + if b.lastRead == opInvalid { return errors.New("bytes.Buffer: UnreadByte: previous operation was not a read") } b.lastRead = opInvalid diff --git a/libgo/go/bytes/buffer_test.go b/libgo/go/bytes/buffer_test.go index 7de17ae..b1b85f9 100644 --- a/libgo/go/bytes/buffer_test.go +++ b/libgo/go/bytes/buffer_test.go @@ -514,6 +514,19 @@ func TestBufferGrowth(t *testing.T) { } } +func BenchmarkWriteRune(b *testing.B) { + const n = 4 << 10 + const r = '☺' + b.SetBytes(int64(n * utf8.RuneLen(r))) + buf := NewBuffer(make([]byte, n*utf8.UTFMax)) + for i := 0; i < b.N; i++ { + buf.Reset() + for i := 0; i < n; i++ { + buf.WriteRune(r) + } + } +} + // From Issue 5154. func BenchmarkBufferNotEmptyWriteRead(b *testing.B) { buf := make([]byte, 1024) diff --git a/libgo/go/bytes/bytes.go b/libgo/go/bytes/bytes.go index 305c85d..406a3825 100644 --- a/libgo/go/bytes/bytes.go +++ b/libgo/go/bytes/bytes.go @@ -93,37 +93,6 @@ func ContainsRune(b []byte, r rune) bool { return IndexRune(b, r) >= 0 } -// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s. -func Index(s, sep []byte) int { - n := len(sep) - if n == 0 { - return 0 - } - if n > len(s) { - return -1 - } - c := sep[0] - if n == 1 { - return IndexByte(s, c) - } - i := 0 - t := s[:len(s)-n+1] - for i < len(t) { - if t[i] != c { - o := IndexByte(t[i:], c) - if o < 0 { - break - } - i += o - } - if Equal(s[i:i+n], sep) { - return i - } - i++ - } - return -1 -} - func indexBytePortable(s []byte, c byte) int { for i, b := range s { if b == c { @@ -161,15 +130,28 @@ func LastIndexByte(s []byte, c byte) int { // IndexRune interprets s as a sequence of UTF-8-encoded Unicode code points. // It returns the byte index of the first occurrence in s of the given rune. // It returns -1 if rune is not present in s. +// If r is utf8.RuneError, it returns the first instance of any +// invalid UTF-8 byte sequence. func IndexRune(s []byte, r rune) int { - for i := 0; i < len(s); { - r1, size := utf8.DecodeRune(s[i:]) - if r == r1 { - return i + switch { + case 0 <= r && r < utf8.RuneSelf: + return IndexByte(s, byte(r)) + case r == utf8.RuneError: + for i := 0; i < len(s); { + r1, n := utf8.DecodeRune(s[i:]) + if r1 == utf8.RuneError { + return i + } + i += n } - i += size + return -1 + case !utf8.ValidRune(r): + return -1 + default: + var b [utf8.UTFMax]byte + n := utf8.EncodeRune(b[:], r) + return Index(s, b[:n]) } - return -1 } // IndexAny interprets s as a sequence of UTF-8-encoded Unicode code points. @@ -178,10 +160,19 @@ func IndexRune(s []byte, r rune) int { // point in common. func IndexAny(s []byte, chars string) int { if len(chars) > 0 { - var r rune + if len(s) > 8 { + if as, isASCII := makeASCIISet(chars); isASCII { + for i, c := range s { + if as.contains(c) { + return i + } + } + return -1 + } + } var width int for i := 0; i < len(s); i += width { - r = rune(s[i]) + r := rune(s[i]) if r < utf8.RuneSelf { width = 1 } else { @@ -203,11 +194,21 @@ func IndexAny(s []byte, chars string) int { // there is no code point in common. func LastIndexAny(s []byte, chars string) int { if len(chars) > 0 { + if len(s) > 8 { + if as, isASCII := makeASCIISet(chars); isASCII { + for i := len(s) - 1; i >= 0; i-- { + if as.contains(s[i]) { + return i + } + } + return -1 + } + } for i := len(s); i > 0; { - r, size := utf8.DecodeLastRune(s[0:i]) + r, size := utf8.DecodeLastRune(s[:i]) i -= size - for _, ch := range chars { - if r == ch { + for _, c := range chars { + if r == c { return i } } @@ -398,7 +399,20 @@ func Map(mapping func(r rune) rune, s []byte) []byte { } // Repeat returns a new byte slice consisting of count copies of b. +// +// It panics if count is negative or if +// the result of (len(b) * count) overflows. func Repeat(b []byte, count int) []byte { + // Since we cannot return an error on overflow, + // we should panic if the repeat will generate + // an overflow. + // See Issue golang.org/issue/16237. + if count < 0 { + panic("bytes: negative Repeat count") + } else if count > 0 && len(b)*count/count != len(b) { + panic("bytes: Repeat count causes overflow") + } + nb := make([]byte, len(b)*count) bp := copy(nb, b) for bp < len(nb) { @@ -419,20 +433,20 @@ func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) } // ToUpperSpecial returns a copy of the byte slice s with all Unicode letters mapped to their // upper case, giving priority to the special casing rules. -func ToUpperSpecial(_case unicode.SpecialCase, s []byte) []byte { - return Map(func(r rune) rune { return _case.ToUpper(r) }, s) +func ToUpperSpecial(c unicode.SpecialCase, s []byte) []byte { + return Map(func(r rune) rune { return c.ToUpper(r) }, s) } // ToLowerSpecial returns a copy of the byte slice s with all Unicode letters mapped to their // lower case, giving priority to the special casing rules. -func ToLowerSpecial(_case unicode.SpecialCase, s []byte) []byte { - return Map(func(r rune) rune { return _case.ToLower(r) }, s) +func ToLowerSpecial(c unicode.SpecialCase, s []byte) []byte { + return Map(func(r rune) rune { return c.ToLower(r) }, s) } // ToTitleSpecial returns a copy of the byte slice s with all Unicode letters mapped to their // title case, giving priority to the special casing rules. -func ToTitleSpecial(_case unicode.SpecialCase, s []byte) []byte { - return Map(func(r rune) rune { return _case.ToTitle(r) }, s) +func ToTitleSpecial(c unicode.SpecialCase, s []byte) []byte { + return Map(func(r rune) rune { return c.ToTitle(r) }, s) } // isSeparator reports whether the rune could mark a word boundary. @@ -578,7 +592,43 @@ func lastIndexFunc(s []byte, f func(r rune) bool, truth bool) int { return -1 } +// asciiSet is a 32-byte value, where each bit represents the presence of a +// given ASCII character in the set. The 128-bits of the lower 16 bytes, +// starting with the least-significant bit of the lowest word to the +// most-significant bit of the highest word, map to the full range of all +// 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed, +// ensuring that any non-ASCII character will be reported as not in the set. +type asciiSet [8]uint32 + +// makeASCIISet creates a set of ASCII characters and reports whether all +// characters in chars are ASCII. +func makeASCIISet(chars string) (as asciiSet, ok bool) { + for i := 0; i < len(chars); i++ { + c := chars[i] + if c >= utf8.RuneSelf { + return as, false + } + as[c>>5] |= 1 << uint(c&31) + } + return as, true +} + +// contains reports whether c is inside the set. +func (as *asciiSet) contains(c byte) bool { + return (as[c>>5] & (1 << uint(c&31))) != 0 +} + func makeCutsetFunc(cutset string) func(r rune) bool { + if len(cutset) == 1 && cutset[0] < utf8.RuneSelf { + return func(r rune) bool { + return r == rune(cutset[0]) + } + } + if as, isASCII := makeASCIISet(cutset); isASCII { + return func(r rune) bool { + return r < utf8.RuneSelf && as.contains(byte(r)) + } + } return func(r rune) bool { for _, c := range cutset { if c == r { diff --git a/libgo/go/bytes/bytes_amd64.go b/libgo/go/bytes/bytes_amd64.go new file mode 100644 index 0000000..58a07ef --- /dev/null +++ b/libgo/go/bytes/bytes_amd64.go @@ -0,0 +1,117 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +package bytes + +//go:noescape + +// indexShortStr returns the index of the first instance of c in s, or -1 if c is not present in s. +// indexShortStr requires 2 <= len(c) <= shortStringLen +func indexShortStr(s, c []byte) int // ../runtime/asm_$GOARCH.s +func supportAVX2() bool // ../runtime/asm_$GOARCH.s + +var shortStringLen int + +func init() { + if supportAVX2() { + shortStringLen = 63 + } else { + shortStringLen = 31 + } +} + +// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s. +func Index(s, sep []byte) int { + n := len(sep) + switch { + case n == 0: + return 0 + case n == 1: + return IndexByte(s, sep[0]) + case n == len(s): + if Equal(sep, s) { + return 0 + } + return -1 + case n > len(s): + return -1 + case n <= shortStringLen: + // Use brute force when s and sep both are small + if len(s) <= 64 { + return indexShortStr(s, sep) + } + c := sep[0] + i := 0 + t := s[:len(s)-n+1] + fails := 0 + for i < len(t) { + if t[i] != c { + // IndexByte skips 16/32 bytes per iteration, + // so it's faster than indexShortStr. + o := IndexByte(t[i:], c) + if o < 0 { + return -1 + } + i += o + } + if Equal(s[i:i+n], sep) { + return i + } + fails++ + i++ + // Switch to indexShortStr when IndexByte produces too many false positives. + // Too many means more that 1 error per 8 characters. + // Allow some errors in the beginning. + if fails > (i+16)/8 { + r := indexShortStr(s[i:], sep) + if r >= 0 { + return r + i + } + return -1 + } + } + return -1 + } + // Rabin-Karp search + hashsep, pow := hashStr(sep) + var h uint32 + for i := 0; i < n; i++ { + h = h*primeRK + uint32(s[i]) + } + if h == hashsep && Equal(s[:n], sep) { + return 0 + } + for i := n; i < len(s); { + h *= primeRK + h += uint32(s[i]) + h -= pow * uint32(s[i-n]) + i++ + if h == hashsep && Equal(s[i-n:i], sep) { + return i - n + } + } + return -1 +} + +// primeRK is the prime base used in Rabin-Karp algorithm. +const primeRK = 16777619 + +// hashStr returns the hash and the appropriate multiplicative +// factor for use in Rabin-Karp algorithm. +func hashStr(sep []byte) (uint32, uint32) { + hash := uint32(0) + for i := 0; i < len(sep); i++ { + hash = hash*primeRK + uint32(sep[i]) + } + var pow, sq uint32 = 1, primeRK + for i := len(sep); i > 0; i >>= 1 { + if i&1 != 0 { + pow *= sq + } + sq *= sq + } + return hash, pow +} diff --git a/libgo/go/bytes/bytes_generic.go b/libgo/go/bytes/bytes_generic.go new file mode 100644 index 0000000..91baa22 --- /dev/null +++ b/libgo/go/bytes/bytes_generic.go @@ -0,0 +1,41 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// -build !amd64,!s390x + +package bytes + +// TODO: implements short string optimization on non amd64 platforms +// and get rid of bytes_amd64.go + +// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s. +func Index(s, sep []byte) int { + n := len(sep) + if n == 0 { + return 0 + } + if n > len(s) { + return -1 + } + c := sep[0] + if n == 1 { + return IndexByte(s, c) + } + i := 0 + t := s[:len(s)-n+1] + for i < len(t) { + if t[i] != c { + o := IndexByte(t[i:], c) + if o < 0 { + break + } + i += o + } + if Equal(s[i:i+n], sep) { + return i + } + i++ + } + return -1 +} diff --git a/libgo/go/bytes/bytes_s390x.go b/libgo/go/bytes/bytes_s390x.go new file mode 100644 index 0000000..a05ca47 --- /dev/null +++ b/libgo/go/bytes/bytes_s390x.go @@ -0,0 +1,120 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +package bytes + +//go:noescape + +// indexShortStr returns the index of the first instance of sep in s, +// or -1 if sep is not present in s. +// indexShortStr requires 2 <= len(sep) <= shortStringLen +func indexShortStr(s, c []byte) int // ../runtime/asm_s390x.s + +// supportsVX reports whether the vector facility is available. +// indexShortStr must not be called if the vector facility is not +// available. +func supportsVX() bool // ../runtime/asm_s390x.s + +var shortStringLen = -1 + +func init() { + if supportsVX() { + shortStringLen = 64 + } +} + +// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s. +func Index(s, sep []byte) int { + n := len(sep) + switch { + case n == 0: + return 0 + case n == 1: + return IndexByte(s, sep[0]) + case n == len(s): + if Equal(sep, s) { + return 0 + } + return -1 + case n > len(s): + return -1 + case n <= shortStringLen: + // Use brute force when s and sep both are small + if len(s) <= 64 { + return indexShortStr(s, sep) + } + c := sep[0] + i := 0 + t := s[:len(s)-n+1] + fails := 0 + for i < len(t) { + if t[i] != c { + // IndexByte skips 16/32 bytes per iteration, + // so it's faster than indexShortStr. + o := IndexByte(t[i:], c) + if o < 0 { + return -1 + } + i += o + } + if Equal(s[i:i+n], sep) { + return i + } + fails++ + i++ + // Switch to indexShortStr when IndexByte produces too many false positives. + // Too many means more that 1 error per 8 characters. + // Allow some errors in the beginning. + if fails > (i+16)/8 { + r := indexShortStr(s[i:], sep) + if r >= 0 { + return r + i + } + return -1 + } + } + return -1 + } + // Rabin-Karp search + hashsep, pow := hashStr(sep) + var h uint32 + for i := 0; i < n; i++ { + h = h*primeRK + uint32(s[i]) + } + if h == hashsep && Equal(s[:n], sep) { + return 0 + } + for i := n; i < len(s); { + h *= primeRK + h += uint32(s[i]) + h -= pow * uint32(s[i-n]) + i++ + if h == hashsep && Equal(s[i-n:i], sep) { + return i - n + } + } + return -1 +} + +// primeRK is the prime base used in Rabin-Karp algorithm. +const primeRK = 16777619 + +// hashStr returns the hash and the appropriate multiplicative +// factor for use in Rabin-Karp algorithm. +func hashStr(sep []byte) (uint32, uint32) { + hash := uint32(0) + for i := 0; i < len(sep); i++ { + hash = hash*primeRK + uint32(sep[i]) + } + var pow, sq uint32 = 1, primeRK + for i := len(sep); i > 0; i >>= 1 { + if i&1 != 0 { + pow *= sq + } + sq *= sq + } + return hash, pow +} diff --git a/libgo/go/bytes/bytes_test.go b/libgo/go/bytes/bytes_test.go index c48f662..26eac5e 100644 --- a/libgo/go/bytes/bytes_test.go +++ b/libgo/go/bytes/bytes_test.go @@ -7,8 +7,10 @@ package bytes_test import ( . "bytes" "fmt" + "internal/testenv" "math/rand" "reflect" + "strings" "testing" "unicode" "unicode/utf8" @@ -165,8 +167,12 @@ var indexAnyTests = []BinOpTest{ {"abc", "xyz", -1}, {"abc", "xcz", 2}, {"ab☺c", "x☺yz", 2}, + {"a☺b☻c☹d", "cx", len("a☺b☻")}, + {"a☺b☻c☹d", "uvw☻xyz", len("a☺b")}, {"aRegExp*", ".(|)*+?^$[]", 7}, {dots + dots + dots, " ", -1}, + {"012abcba210", "\xffb", 4}, + {"012\x80bcb\x80210", "\xffb", 3}, } var lastIndexAnyTests = []BinOpTest{ @@ -178,18 +184,13 @@ var lastIndexAnyTests = []BinOpTest{ {"aaa", "a", 2}, {"abc", "xyz", -1}, {"abc", "ab", 1}, - {"a☺b☻c☹d", "uvw☻xyz", 2 + len("☺")}, + {"ab☺c", "x☺yz", 2}, + {"a☺b☻c☹d", "cx", len("a☺b☻")}, + {"a☺b☻c☹d", "uvw☻xyz", len("a☺b")}, {"a.RegExp*", ".(|)*+?^$[]", 8}, {dots + dots + dots, " ", -1}, -} - -var indexRuneTests = []BinOpTest{ - {"", "a", -1}, - {"", "☺", -1}, - {"foo", "☹", -1}, - {"foo", "o", 1}, - {"foo☺bar", "☺", 3}, - {"foo☺☻☹bar", "☹", 9}, + {"012abcba210", "\xffb", 6}, + {"012\x80bcb\x80210", "\xffb", 7}, } // Execute f on each test case. funcName should be the name of f; it's used @@ -346,13 +347,52 @@ func TestIndexByteSmall(t *testing.T) { } func TestIndexRune(t *testing.T) { - for _, tt := range indexRuneTests { - a := []byte(tt.a) - r, _ := utf8.DecodeRuneInString(tt.b) - pos := IndexRune(a, r) - if pos != tt.i { - t.Errorf(`IndexRune(%q, '%c') = %v`, tt.a, r, pos) + tests := []struct { + in string + rune rune + want int + }{ + {"", 'a', -1}, + {"", '☺', -1}, + {"foo", '☹', -1}, + {"foo", 'o', 1}, + {"foo☺bar", '☺', 3}, + {"foo☺☻☹bar", '☹', 9}, + {"a A x", 'A', 2}, + {"some_text=some_value", '=', 9}, + {"☺a", 'a', 3}, + {"a☻☺b", '☺', 4}, + + // RuneError should match any invalid UTF-8 byte sequence. + {"�", '�', 0}, + {"\xff", '�', 0}, + {"☻x�", '�', len("☻x")}, + {"☻x\xe2\x98", '�', len("☻x")}, + {"☻x\xe2\x98�", '�', len("☻x")}, + {"☻x\xe2\x98x", '�', len("☻x")}, + + // Invalid rune values should never match. + {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", -1, -1}, + {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", 0xD800, -1}, // Surrogate pair + {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", utf8.MaxRune + 1, -1}, + } + for _, tt := range tests { + if got := IndexRune([]byte(tt.in), tt.rune); got != tt.want { + t.Errorf("IndexRune(%q, %d) = %v; want %v", tt.in, tt.rune, got, tt.want) + } + } + + haystack := []byte("test世界") + allocs := testing.AllocsPerRun(1000, func() { + if i := IndexRune(haystack, 's'); i != 2 { + t.Fatalf("'s' at %d; want 2", i) + } + if i := IndexRune(haystack, '世'); i != 4 { + t.Fatalf("'世' at %d; want 4", i) } + }) + if allocs != 0 { + t.Errorf("expected no allocations, got %f", allocs) } } @@ -370,6 +410,9 @@ func valName(x int) string { func benchBytes(b *testing.B, sizes []int, f func(b *testing.B, n int)) { for _, n := range sizes { + if isRaceBuilder && n > 4<<10 { + continue + } b.Run(valName(n), func(b *testing.B) { if len(bmbuf) < n { bmbuf = make([]byte, n) @@ -382,6 +425,8 @@ func benchBytes(b *testing.B, sizes []int, f func(b *testing.B, n int)) { var indexSizes = []int{10, 32, 4 << 10, 4 << 20, 64 << 20} +var isRaceBuilder = strings.HasSuffix(testenv.Builder(), "-race") + func BenchmarkIndexByte(b *testing.B) { benchBytes(b, indexSizes, bmIndexByte(IndexByte)) } @@ -404,6 +449,44 @@ func bmIndexByte(index func([]byte, byte) int) func(b *testing.B, n int) { } } +func BenchmarkIndexRune(b *testing.B) { + benchBytes(b, indexSizes, bmIndexRune(IndexRune)) +} + +func BenchmarkIndexRuneASCII(b *testing.B) { + benchBytes(b, indexSizes, bmIndexRuneASCII(IndexRune)) +} + +func bmIndexRuneASCII(index func([]byte, rune) int) func(b *testing.B, n int) { + return func(b *testing.B, n int) { + buf := bmbuf[0:n] + buf[n-1] = 'x' + for i := 0; i < b.N; i++ { + j := index(buf, 'x') + if j != n-1 { + b.Fatal("bad index", j) + } + } + buf[n-1] = '\x00' + } +} + +func bmIndexRune(index func([]byte, rune) int) func(b *testing.B, n int) { + return func(b *testing.B, n int) { + buf := bmbuf[0:n] + utf8.EncodeRune(buf[n-3:], '世') + for i := 0; i < b.N; i++ { + j := index(buf, '世') + if j != n-3 { + b.Fatal("bad index", j) + } + } + buf[n-3] = '\x00' + buf[n-2] = '\x00' + buf[n-1] = '\x00' + } +} + func BenchmarkEqual(b *testing.B) { b.Run("0", func(b *testing.B) { var buf [4]byte @@ -844,6 +927,54 @@ func TestRepeat(t *testing.T) { } } +func repeat(b []byte, count int) (err error) { + defer func() { + if r := recover(); r != nil { + switch v := r.(type) { + case error: + err = v + default: + err = fmt.Errorf("%s", v) + } + } + }() + + Repeat(b, count) + + return +} + +// See Issue golang.org/issue/16237 +func TestRepeatCatchesOverflow(t *testing.T) { + tests := [...]struct { + s string + count int + errStr string + }{ + 0: {"--", -2147483647, "negative"}, + 1: {"", int(^uint(0) >> 1), ""}, + 2: {"-", 10, ""}, + 3: {"gopher", 0, ""}, + 4: {"-", -1, "negative"}, + 5: {"--", -102, "negative"}, + 6: {string(make([]byte, 255)), int((^uint(0))/255 + 1), "overflow"}, + } + + for i, tt := range tests { + err := repeat([]byte(tt.s), tt.count) + if tt.errStr == "" { + if err != nil { + t.Errorf("#%d panicked %v", i, err) + } + continue + } + + if err == nil || !strings.Contains(err.Error(), tt.errStr) { + t.Errorf("#%d expected %q got %q", i, tt.errStr, err) + } + } +} + func runesEqual(a, b []rune) bool { if len(a) != len(b) { return false @@ -906,6 +1037,9 @@ var trimTests = []TrimTest{ {"Trim", "* listitem", " *", "listitem"}, {"Trim", `"quote"`, `"`, "quote"}, {"Trim", "\u2C6F\u2C6F\u0250\u0250\u2C6F\u2C6F", "\u2C6F", "\u0250\u0250"}, + {"Trim", "\x80test\xff", "\xff", "test"}, + {"Trim", " Ġ ", " ", "Ġ"}, + {"Trim", " Ġİ0", "0 ", "Ġİ"}, //empty string tests {"Trim", "abba", "", "abba"}, {"Trim", "", "123", ""}, @@ -1325,3 +1459,31 @@ func BenchmarkBytesCompare(b *testing.B) { }) } } + +func BenchmarkIndexAnyASCII(b *testing.B) { + x := Repeat([]byte{'#'}, 4096) // Never matches set + cs := "0123456789abcdef" + for k := 1; k <= 4096; k <<= 4 { + for j := 1; j <= 16; j <<= 1 { + b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) { + for i := 0; i < b.N; i++ { + IndexAny(x[:k], cs[:j]) + } + }) + } + } +} + +func BenchmarkTrimASCII(b *testing.B) { + cs := "0123456789abcdef" + for k := 1; k <= 4096; k <<= 4 { + for j := 1; j <= 16; j <<= 1 { + b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) { + x := Repeat([]byte(cs[:j]), k) // Always matches set + for i := 0; i < b.N; i++ { + Trim(x[:k], cs[:j]) + } + }) + } + } +} diff --git a/libgo/go/bytes/example_test.go b/libgo/go/bytes/example_test.go index ad2dbc6..0d35a0d 100644 --- a/libgo/go/bytes/example_test.go +++ b/libgo/go/bytes/example_test.go @@ -11,6 +11,7 @@ import ( "io" "os" "sort" + "unicode" ) func ExampleBuffer() { @@ -83,3 +84,205 @@ func ExampleTrimPrefix() { fmt.Printf("Hello%s", b) // Output: Hello, world! } + +func ExampleFields() { + fmt.Printf("Fields are: %q", bytes.Fields([]byte(" foo bar baz "))) + // Output: Fields are: ["foo" "bar" "baz"] +} + +func ExampleFieldsFunc() { + f := func(c rune) bool { + return !unicode.IsLetter(c) && !unicode.IsNumber(c) + } + fmt.Printf("Fields are: %q", bytes.FieldsFunc([]byte(" foo1;bar2,baz3..."), f)) + // Output: Fields are: ["foo1" "bar2" "baz3"] +} + +func ExampleContains() { + fmt.Println(bytes.Contains([]byte("seafood"), []byte("foo"))) + fmt.Println(bytes.Contains([]byte("seafood"), []byte("bar"))) + fmt.Println(bytes.Contains([]byte("seafood"), []byte(""))) + fmt.Println(bytes.Contains([]byte(""), []byte(""))) + // Output: + // true + // false + // true + // true +} + +func ExampleCount() { + fmt.Println(bytes.Count([]byte("cheese"), []byte("e"))) + fmt.Println(bytes.Count([]byte("five"), []byte(""))) // before & after each rune + // Output: + // 3 + // 5 +} + +func ExampleEqualFold() { + fmt.Println(bytes.EqualFold([]byte("Go"), []byte("go"))) + // Output: true +} + +func ExampleHasPrefix() { + fmt.Println(bytes.HasPrefix([]byte("Gopher"), []byte("Go"))) + fmt.Println(bytes.HasPrefix([]byte("Gopher"), []byte("C"))) + fmt.Println(bytes.HasPrefix([]byte("Gopher"), []byte(""))) + // Output: + // true + // false + // true +} + +func ExampleHasSuffix() { + fmt.Println(bytes.HasSuffix([]byte("Amigo"), []byte("go"))) + fmt.Println(bytes.HasSuffix([]byte("Amigo"), []byte("O"))) + fmt.Println(bytes.HasSuffix([]byte("Amigo"), []byte("Ami"))) + fmt.Println(bytes.HasSuffix([]byte("Amigo"), []byte(""))) + // Output: + // true + // false + // false + // true +} + +func ExampleIndex() { + fmt.Println(bytes.Index([]byte("chicken"), []byte("ken"))) + fmt.Println(bytes.Index([]byte("chicken"), []byte("dmr"))) + // Output: + // 4 + // -1 +} + +func ExampleIndexFunc() { + f := func(c rune) bool { + return unicode.Is(unicode.Han, c) + } + fmt.Println(bytes.IndexFunc([]byte("Hello, 世界"), f)) + fmt.Println(bytes.IndexFunc([]byte("Hello, world"), f)) + // Output: + // 7 + // -1 +} + +func ExampleIndexAny() { + fmt.Println(bytes.IndexAny([]byte("chicken"), "aeiouy")) + fmt.Println(bytes.IndexAny([]byte("crwth"), "aeiouy")) + // Output: + // 2 + // -1 +} + +func ExampleIndexRune() { + fmt.Println(bytes.IndexRune([]byte("chicken"), 'k')) + fmt.Println(bytes.IndexRune([]byte("chicken"), 'd')) + // Output: + // 4 + // -1 +} + +func ExampleLastIndex() { + fmt.Println(bytes.Index([]byte("go gopher"), []byte("go"))) + fmt.Println(bytes.LastIndex([]byte("go gopher"), []byte("go"))) + fmt.Println(bytes.LastIndex([]byte("go gopher"), []byte("rodent"))) + // Output: + // 0 + // 3 + // -1 +} + +func ExampleJoin() { + s := [][]byte{[]byte("foo"), []byte("bar"), []byte("baz")} + fmt.Printf("%s", bytes.Join(s, []byte(", "))) + // Output: foo, bar, baz +} + +func ExampleRepeat() { + fmt.Printf("ba%s", bytes.Repeat([]byte("na"), 2)) + // Output: banana +} + +func ExampleReplace() { + fmt.Printf("%s\n", bytes.Replace([]byte("oink oink oink"), []byte("k"), []byte("ky"), 2)) + fmt.Printf("%s\n", bytes.Replace([]byte("oink oink oink"), []byte("oink"), []byte("moo"), -1)) + // Output: + // oinky oinky oink + // moo moo moo +} + +func ExampleSplit() { + fmt.Printf("%q\n", bytes.Split([]byte("a,b,c"), []byte(","))) + fmt.Printf("%q\n", bytes.Split([]byte("a man a plan a canal panama"), []byte("a "))) + fmt.Printf("%q\n", bytes.Split([]byte(" xyz "), []byte(""))) + fmt.Printf("%q\n", bytes.Split([]byte(""), []byte("Bernardo O'Higgins"))) + // Output: + // ["a" "b" "c"] + // ["" "man " "plan " "canal panama"] + // [" " "x" "y" "z" " "] + // [""] +} + +func ExampleSplitN() { + fmt.Printf("%q\n", bytes.SplitN([]byte("a,b,c"), []byte(","), 2)) + z := bytes.SplitN([]byte("a,b,c"), []byte(","), 0) + fmt.Printf("%q (nil = %v)\n", z, z == nil) + // Output: + // ["a" "b,c"] + // [] (nil = true) +} + +func ExampleSplitAfter() { + fmt.Printf("%q\n", bytes.SplitAfter([]byte("a,b,c"), []byte(","))) + // Output: ["a," "b," "c"] +} + +func ExampleSplitAfterN() { + fmt.Printf("%q\n", bytes.SplitAfterN([]byte("a,b,c"), []byte(","), 2)) + // Output: ["a," "b,c"] +} + +func ExampleTitle() { + fmt.Printf("%s", bytes.Title([]byte("her royal highness"))) + // Output: Her Royal Highness +} + +func ExampleToTitle() { + fmt.Printf("%s\n", bytes.ToTitle([]byte("loud noises"))) + fmt.Printf("%s\n", bytes.ToTitle([]byte("хлеб"))) + // Output: + // LOUD NOISES + // ХЛЕБ +} + +func ExampleTrim() { + fmt.Printf("[%q]", bytes.Trim([]byte(" !!! Achtung! Achtung! !!! "), "! ")) + // Output: ["Achtung! Achtung"] +} + +func ExampleMap() { + rot13 := func(r rune) rune { + switch { + case r >= 'A' && r <= 'Z': + return 'A' + (r-'A'+13)%26 + case r >= 'a' && r <= 'z': + return 'a' + (r-'a'+13)%26 + } + return r + } + fmt.Printf("%s", bytes.Map(rot13, []byte("'Twas brillig and the slithy gopher..."))) + // Output: 'Gjnf oevyyvt naq gur fyvgul tbcure... +} + +func ExampleTrimSpace() { + fmt.Printf("%s", bytes.TrimSpace([]byte(" \t\n a lone gopher \n\t\r\n"))) + // Output: a lone gopher +} + +func ExampleToUpper() { + fmt.Printf("%s", bytes.ToUpper([]byte("Gopher"))) + // Output: GOPHER +} + +func ExampleToLower() { + fmt.Printf("%s", bytes.ToLower([]byte("Gopher"))) + // Output: gopher +} |