aboutsummaryrefslogtreecommitdiff
path: root/libgo/go/bytes
diff options
context:
space:
mode:
authorIan Lance Taylor <iant@golang.org>2017-01-14 00:05:42 +0000
committerIan Lance Taylor <ian@gcc.gnu.org>2017-01-14 00:05:42 +0000
commitc2047754c300b68c05d65faa8dc2925fe67b71b4 (patch)
treee183ae81a1f48a02945cb6de463a70c5be1b06f6 /libgo/go/bytes
parent829afb8f05602bb31c9c597b24df7377fed4f059 (diff)
downloadgcc-c2047754c300b68c05d65faa8dc2925fe67b71b4.zip
gcc-c2047754c300b68c05d65faa8dc2925fe67b71b4.tar.gz
gcc-c2047754c300b68c05d65faa8dc2925fe67b71b4.tar.bz2
libgo: update to Go 1.8 release candidate 1
Compiler changes: * Change map assignment to use mapassign and assign value directly. * Change string iteration to use decoderune, faster for ASCII strings. * Change makeslice to take int, and use makeslice64 for larger values. * Add new noverflow field to hmap struct used for maps. Unresolved problems, to be fixed later: * Commented out test in go/types/sizes_test.go that doesn't compile. * Commented out reflect.TestStructOf test for padding after zero-sized field. Reviewed-on: https://go-review.googlesource.com/35231 gotools/: Updates for Go 1.8rc1. * Makefile.am (go_cmd_go_files): Add bug.go. (s-zdefaultcc): Write defaultPkgConfig. * Makefile.in: Rebuild. From-SVN: r244456
Diffstat (limited to 'libgo/go/bytes')
-rw-r--r--libgo/go/bytes/buffer.go43
-rw-r--r--libgo/go/bytes/buffer_test.go13
-rw-r--r--libgo/go/bytes/bytes.go146
-rw-r--r--libgo/go/bytes/bytes_amd64.go117
-rw-r--r--libgo/go/bytes/bytes_generic.go41
-rw-r--r--libgo/go/bytes/bytes_s390x.go120
-rw-r--r--libgo/go/bytes/bytes_test.go194
-rw-r--r--libgo/go/bytes/example_test.go203
8 files changed, 794 insertions, 83 deletions
diff --git a/libgo/go/bytes/buffer.go b/libgo/go/bytes/buffer.go
index 9154a1b..196419d 100644
--- a/libgo/go/bytes/buffer.go
+++ b/libgo/go/bytes/buffer.go
@@ -15,22 +15,25 @@ import (
// A Buffer is a variable-sized buffer of bytes with Read and Write methods.
// The zero value for Buffer is an empty buffer ready to use.
type Buffer struct {
- buf []byte // contents are the bytes buf[off : len(buf)]
- off int // read at &buf[off], write at &buf[len(buf)]
- runeBytes [utf8.UTFMax]byte // avoid allocation of slice on each call to WriteRune
- bootstrap [64]byte // memory to hold first slice; helps small buffers avoid allocation.
- lastRead readOp // last read operation, so that Unread* can work correctly.
+ buf []byte // contents are the bytes buf[off : len(buf)]
+ off int // read at &buf[off], write at &buf[len(buf)]
+ bootstrap [64]byte // memory to hold first slice; helps small buffers avoid allocation.
+ lastRead readOp // last read operation, so that Unread* can work correctly.
}
// The readOp constants describe the last action performed on
-// the buffer, so that UnreadRune and UnreadByte can
-// check for invalid usage.
+// the buffer, so that UnreadRune and UnreadByte can check for
+// invalid usage. opReadRuneX constants are chosen such that
+// converted to int they correspond to the rune size that was read.
type readOp int
const (
- opInvalid readOp = iota // Non-read operation.
- opReadRune // Read rune.
- opRead // Any other read operation.
+ opRead readOp = -1 // Any other read operation.
+ opInvalid = 0 // Non-read operation.
+ opReadRune1 = 1 // Read rune of size 1.
+ opReadRune2 = 2 // Read rune of size 2.
+ opReadRune3 = 3 // Read rune of size 3.
+ opReadRune4 = 4 // Read rune of size 4.
)
// ErrTooLarge is passed to panic if memory cannot be allocated to store data in a buffer.
@@ -246,8 +249,10 @@ func (b *Buffer) WriteRune(r rune) (n int, err error) {
b.WriteByte(byte(r))
return 1, nil
}
- n = utf8.EncodeRune(b.runeBytes[0:], r)
- b.Write(b.runeBytes[0:n])
+ b.lastRead = opInvalid
+ m := b.grow(utf8.UTFMax)
+ n = utf8.EncodeRune(b.buf[m:m+utf8.UTFMax], r)
+ b.buf = b.buf[:m+n]
return n, nil
}
@@ -318,14 +323,15 @@ func (b *Buffer) ReadRune() (r rune, size int, err error) {
b.Truncate(0)
return 0, 0, io.EOF
}
- b.lastRead = opReadRune
c := b.buf[b.off]
if c < utf8.RuneSelf {
b.off++
+ b.lastRead = opReadRune1
return rune(c), 1, nil
}
r, n := utf8.DecodeRune(b.buf[b.off:])
b.off += n
+ b.lastRead = readOp(n)
return r, n, nil
}
@@ -335,14 +341,13 @@ func (b *Buffer) ReadRune() (r rune, size int, err error) {
// it is stricter than UnreadByte, which will unread the last byte
// from any read operation.)
func (b *Buffer) UnreadRune() error {
- if b.lastRead != opReadRune {
+ if b.lastRead <= opInvalid {
return errors.New("bytes.Buffer: UnreadRune: previous operation was not ReadRune")
}
- b.lastRead = opInvalid
- if b.off > 0 {
- _, n := utf8.DecodeLastRune(b.buf[0:b.off])
- b.off -= n
+ if b.off >= int(b.lastRead) {
+ b.off -= int(b.lastRead)
}
+ b.lastRead = opInvalid
return nil
}
@@ -350,7 +355,7 @@ func (b *Buffer) UnreadRune() error {
// read operation. If write has happened since the last read, UnreadByte
// returns an error.
func (b *Buffer) UnreadByte() error {
- if b.lastRead != opReadRune && b.lastRead != opRead {
+ if b.lastRead == opInvalid {
return errors.New("bytes.Buffer: UnreadByte: previous operation was not a read")
}
b.lastRead = opInvalid
diff --git a/libgo/go/bytes/buffer_test.go b/libgo/go/bytes/buffer_test.go
index 7de17ae..b1b85f9 100644
--- a/libgo/go/bytes/buffer_test.go
+++ b/libgo/go/bytes/buffer_test.go
@@ -514,6 +514,19 @@ func TestBufferGrowth(t *testing.T) {
}
}
+func BenchmarkWriteRune(b *testing.B) {
+ const n = 4 << 10
+ const r = '☺'
+ b.SetBytes(int64(n * utf8.RuneLen(r)))
+ buf := NewBuffer(make([]byte, n*utf8.UTFMax))
+ for i := 0; i < b.N; i++ {
+ buf.Reset()
+ for i := 0; i < n; i++ {
+ buf.WriteRune(r)
+ }
+ }
+}
+
// From Issue 5154.
func BenchmarkBufferNotEmptyWriteRead(b *testing.B) {
buf := make([]byte, 1024)
diff --git a/libgo/go/bytes/bytes.go b/libgo/go/bytes/bytes.go
index 305c85d..406a3825 100644
--- a/libgo/go/bytes/bytes.go
+++ b/libgo/go/bytes/bytes.go
@@ -93,37 +93,6 @@ func ContainsRune(b []byte, r rune) bool {
return IndexRune(b, r) >= 0
}
-// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
-func Index(s, sep []byte) int {
- n := len(sep)
- if n == 0 {
- return 0
- }
- if n > len(s) {
- return -1
- }
- c := sep[0]
- if n == 1 {
- return IndexByte(s, c)
- }
- i := 0
- t := s[:len(s)-n+1]
- for i < len(t) {
- if t[i] != c {
- o := IndexByte(t[i:], c)
- if o < 0 {
- break
- }
- i += o
- }
- if Equal(s[i:i+n], sep) {
- return i
- }
- i++
- }
- return -1
-}
-
func indexBytePortable(s []byte, c byte) int {
for i, b := range s {
if b == c {
@@ -161,15 +130,28 @@ func LastIndexByte(s []byte, c byte) int {
// IndexRune interprets s as a sequence of UTF-8-encoded Unicode code points.
// It returns the byte index of the first occurrence in s of the given rune.
// It returns -1 if rune is not present in s.
+// If r is utf8.RuneError, it returns the first instance of any
+// invalid UTF-8 byte sequence.
func IndexRune(s []byte, r rune) int {
- for i := 0; i < len(s); {
- r1, size := utf8.DecodeRune(s[i:])
- if r == r1 {
- return i
+ switch {
+ case 0 <= r && r < utf8.RuneSelf:
+ return IndexByte(s, byte(r))
+ case r == utf8.RuneError:
+ for i := 0; i < len(s); {
+ r1, n := utf8.DecodeRune(s[i:])
+ if r1 == utf8.RuneError {
+ return i
+ }
+ i += n
}
- i += size
+ return -1
+ case !utf8.ValidRune(r):
+ return -1
+ default:
+ var b [utf8.UTFMax]byte
+ n := utf8.EncodeRune(b[:], r)
+ return Index(s, b[:n])
}
- return -1
}
// IndexAny interprets s as a sequence of UTF-8-encoded Unicode code points.
@@ -178,10 +160,19 @@ func IndexRune(s []byte, r rune) int {
// point in common.
func IndexAny(s []byte, chars string) int {
if len(chars) > 0 {
- var r rune
+ if len(s) > 8 {
+ if as, isASCII := makeASCIISet(chars); isASCII {
+ for i, c := range s {
+ if as.contains(c) {
+ return i
+ }
+ }
+ return -1
+ }
+ }
var width int
for i := 0; i < len(s); i += width {
- r = rune(s[i])
+ r := rune(s[i])
if r < utf8.RuneSelf {
width = 1
} else {
@@ -203,11 +194,21 @@ func IndexAny(s []byte, chars string) int {
// there is no code point in common.
func LastIndexAny(s []byte, chars string) int {
if len(chars) > 0 {
+ if len(s) > 8 {
+ if as, isASCII := makeASCIISet(chars); isASCII {
+ for i := len(s) - 1; i >= 0; i-- {
+ if as.contains(s[i]) {
+ return i
+ }
+ }
+ return -1
+ }
+ }
for i := len(s); i > 0; {
- r, size := utf8.DecodeLastRune(s[0:i])
+ r, size := utf8.DecodeLastRune(s[:i])
i -= size
- for _, ch := range chars {
- if r == ch {
+ for _, c := range chars {
+ if r == c {
return i
}
}
@@ -398,7 +399,20 @@ func Map(mapping func(r rune) rune, s []byte) []byte {
}
// Repeat returns a new byte slice consisting of count copies of b.
+//
+// It panics if count is negative or if
+// the result of (len(b) * count) overflows.
func Repeat(b []byte, count int) []byte {
+ // Since we cannot return an error on overflow,
+ // we should panic if the repeat will generate
+ // an overflow.
+ // See Issue golang.org/issue/16237.
+ if count < 0 {
+ panic("bytes: negative Repeat count")
+ } else if count > 0 && len(b)*count/count != len(b) {
+ panic("bytes: Repeat count causes overflow")
+ }
+
nb := make([]byte, len(b)*count)
bp := copy(nb, b)
for bp < len(nb) {
@@ -419,20 +433,20 @@ func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) }
// ToUpperSpecial returns a copy of the byte slice s with all Unicode letters mapped to their
// upper case, giving priority to the special casing rules.
-func ToUpperSpecial(_case unicode.SpecialCase, s []byte) []byte {
- return Map(func(r rune) rune { return _case.ToUpper(r) }, s)
+func ToUpperSpecial(c unicode.SpecialCase, s []byte) []byte {
+ return Map(func(r rune) rune { return c.ToUpper(r) }, s)
}
// ToLowerSpecial returns a copy of the byte slice s with all Unicode letters mapped to their
// lower case, giving priority to the special casing rules.
-func ToLowerSpecial(_case unicode.SpecialCase, s []byte) []byte {
- return Map(func(r rune) rune { return _case.ToLower(r) }, s)
+func ToLowerSpecial(c unicode.SpecialCase, s []byte) []byte {
+ return Map(func(r rune) rune { return c.ToLower(r) }, s)
}
// ToTitleSpecial returns a copy of the byte slice s with all Unicode letters mapped to their
// title case, giving priority to the special casing rules.
-func ToTitleSpecial(_case unicode.SpecialCase, s []byte) []byte {
- return Map(func(r rune) rune { return _case.ToTitle(r) }, s)
+func ToTitleSpecial(c unicode.SpecialCase, s []byte) []byte {
+ return Map(func(r rune) rune { return c.ToTitle(r) }, s)
}
// isSeparator reports whether the rune could mark a word boundary.
@@ -578,7 +592,43 @@ func lastIndexFunc(s []byte, f func(r rune) bool, truth bool) int {
return -1
}
+// asciiSet is a 32-byte value, where each bit represents the presence of a
+// given ASCII character in the set. The 128-bits of the lower 16 bytes,
+// starting with the least-significant bit of the lowest word to the
+// most-significant bit of the highest word, map to the full range of all
+// 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed,
+// ensuring that any non-ASCII character will be reported as not in the set.
+type asciiSet [8]uint32
+
+// makeASCIISet creates a set of ASCII characters and reports whether all
+// characters in chars are ASCII.
+func makeASCIISet(chars string) (as asciiSet, ok bool) {
+ for i := 0; i < len(chars); i++ {
+ c := chars[i]
+ if c >= utf8.RuneSelf {
+ return as, false
+ }
+ as[c>>5] |= 1 << uint(c&31)
+ }
+ return as, true
+}
+
+// contains reports whether c is inside the set.
+func (as *asciiSet) contains(c byte) bool {
+ return (as[c>>5] & (1 << uint(c&31))) != 0
+}
+
func makeCutsetFunc(cutset string) func(r rune) bool {
+ if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
+ return func(r rune) bool {
+ return r == rune(cutset[0])
+ }
+ }
+ if as, isASCII := makeASCIISet(cutset); isASCII {
+ return func(r rune) bool {
+ return r < utf8.RuneSelf && as.contains(byte(r))
+ }
+ }
return func(r rune) bool {
for _, c := range cutset {
if c == r {
diff --git a/libgo/go/bytes/bytes_amd64.go b/libgo/go/bytes/bytes_amd64.go
new file mode 100644
index 0000000..58a07ef
--- /dev/null
+++ b/libgo/go/bytes/bytes_amd64.go
@@ -0,0 +1,117 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package bytes
+
+//go:noescape
+
+// indexShortStr returns the index of the first instance of c in s, or -1 if c is not present in s.
+// indexShortStr requires 2 <= len(c) <= shortStringLen
+func indexShortStr(s, c []byte) int // ../runtime/asm_$GOARCH.s
+func supportAVX2() bool // ../runtime/asm_$GOARCH.s
+
+var shortStringLen int
+
+func init() {
+ if supportAVX2() {
+ shortStringLen = 63
+ } else {
+ shortStringLen = 31
+ }
+}
+
+// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
+func Index(s, sep []byte) int {
+ n := len(sep)
+ switch {
+ case n == 0:
+ return 0
+ case n == 1:
+ return IndexByte(s, sep[0])
+ case n == len(s):
+ if Equal(sep, s) {
+ return 0
+ }
+ return -1
+ case n > len(s):
+ return -1
+ case n <= shortStringLen:
+ // Use brute force when s and sep both are small
+ if len(s) <= 64 {
+ return indexShortStr(s, sep)
+ }
+ c := sep[0]
+ i := 0
+ t := s[:len(s)-n+1]
+ fails := 0
+ for i < len(t) {
+ if t[i] != c {
+ // IndexByte skips 16/32 bytes per iteration,
+ // so it's faster than indexShortStr.
+ o := IndexByte(t[i:], c)
+ if o < 0 {
+ return -1
+ }
+ i += o
+ }
+ if Equal(s[i:i+n], sep) {
+ return i
+ }
+ fails++
+ i++
+ // Switch to indexShortStr when IndexByte produces too many false positives.
+ // Too many means more that 1 error per 8 characters.
+ // Allow some errors in the beginning.
+ if fails > (i+16)/8 {
+ r := indexShortStr(s[i:], sep)
+ if r >= 0 {
+ return r + i
+ }
+ return -1
+ }
+ }
+ return -1
+ }
+ // Rabin-Karp search
+ hashsep, pow := hashStr(sep)
+ var h uint32
+ for i := 0; i < n; i++ {
+ h = h*primeRK + uint32(s[i])
+ }
+ if h == hashsep && Equal(s[:n], sep) {
+ return 0
+ }
+ for i := n; i < len(s); {
+ h *= primeRK
+ h += uint32(s[i])
+ h -= pow * uint32(s[i-n])
+ i++
+ if h == hashsep && Equal(s[i-n:i], sep) {
+ return i - n
+ }
+ }
+ return -1
+}
+
+// primeRK is the prime base used in Rabin-Karp algorithm.
+const primeRK = 16777619
+
+// hashStr returns the hash and the appropriate multiplicative
+// factor for use in Rabin-Karp algorithm.
+func hashStr(sep []byte) (uint32, uint32) {
+ hash := uint32(0)
+ for i := 0; i < len(sep); i++ {
+ hash = hash*primeRK + uint32(sep[i])
+ }
+ var pow, sq uint32 = 1, primeRK
+ for i := len(sep); i > 0; i >>= 1 {
+ if i&1 != 0 {
+ pow *= sq
+ }
+ sq *= sq
+ }
+ return hash, pow
+}
diff --git a/libgo/go/bytes/bytes_generic.go b/libgo/go/bytes/bytes_generic.go
new file mode 100644
index 0000000..91baa22
--- /dev/null
+++ b/libgo/go/bytes/bytes_generic.go
@@ -0,0 +1,41 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// -build !amd64,!s390x
+
+package bytes
+
+// TODO: implements short string optimization on non amd64 platforms
+// and get rid of bytes_amd64.go
+
+// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
+func Index(s, sep []byte) int {
+ n := len(sep)
+ if n == 0 {
+ return 0
+ }
+ if n > len(s) {
+ return -1
+ }
+ c := sep[0]
+ if n == 1 {
+ return IndexByte(s, c)
+ }
+ i := 0
+ t := s[:len(s)-n+1]
+ for i < len(t) {
+ if t[i] != c {
+ o := IndexByte(t[i:], c)
+ if o < 0 {
+ break
+ }
+ i += o
+ }
+ if Equal(s[i:i+n], sep) {
+ return i
+ }
+ i++
+ }
+ return -1
+}
diff --git a/libgo/go/bytes/bytes_s390x.go b/libgo/go/bytes/bytes_s390x.go
new file mode 100644
index 0000000..a05ca47
--- /dev/null
+++ b/libgo/go/bytes/bytes_s390x.go
@@ -0,0 +1,120 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package bytes
+
+//go:noescape
+
+// indexShortStr returns the index of the first instance of sep in s,
+// or -1 if sep is not present in s.
+// indexShortStr requires 2 <= len(sep) <= shortStringLen
+func indexShortStr(s, c []byte) int // ../runtime/asm_s390x.s
+
+// supportsVX reports whether the vector facility is available.
+// indexShortStr must not be called if the vector facility is not
+// available.
+func supportsVX() bool // ../runtime/asm_s390x.s
+
+var shortStringLen = -1
+
+func init() {
+ if supportsVX() {
+ shortStringLen = 64
+ }
+}
+
+// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
+func Index(s, sep []byte) int {
+ n := len(sep)
+ switch {
+ case n == 0:
+ return 0
+ case n == 1:
+ return IndexByte(s, sep[0])
+ case n == len(s):
+ if Equal(sep, s) {
+ return 0
+ }
+ return -1
+ case n > len(s):
+ return -1
+ case n <= shortStringLen:
+ // Use brute force when s and sep both are small
+ if len(s) <= 64 {
+ return indexShortStr(s, sep)
+ }
+ c := sep[0]
+ i := 0
+ t := s[:len(s)-n+1]
+ fails := 0
+ for i < len(t) {
+ if t[i] != c {
+ // IndexByte skips 16/32 bytes per iteration,
+ // so it's faster than indexShortStr.
+ o := IndexByte(t[i:], c)
+ if o < 0 {
+ return -1
+ }
+ i += o
+ }
+ if Equal(s[i:i+n], sep) {
+ return i
+ }
+ fails++
+ i++
+ // Switch to indexShortStr when IndexByte produces too many false positives.
+ // Too many means more that 1 error per 8 characters.
+ // Allow some errors in the beginning.
+ if fails > (i+16)/8 {
+ r := indexShortStr(s[i:], sep)
+ if r >= 0 {
+ return r + i
+ }
+ return -1
+ }
+ }
+ return -1
+ }
+ // Rabin-Karp search
+ hashsep, pow := hashStr(sep)
+ var h uint32
+ for i := 0; i < n; i++ {
+ h = h*primeRK + uint32(s[i])
+ }
+ if h == hashsep && Equal(s[:n], sep) {
+ return 0
+ }
+ for i := n; i < len(s); {
+ h *= primeRK
+ h += uint32(s[i])
+ h -= pow * uint32(s[i-n])
+ i++
+ if h == hashsep && Equal(s[i-n:i], sep) {
+ return i - n
+ }
+ }
+ return -1
+}
+
+// primeRK is the prime base used in Rabin-Karp algorithm.
+const primeRK = 16777619
+
+// hashStr returns the hash and the appropriate multiplicative
+// factor for use in Rabin-Karp algorithm.
+func hashStr(sep []byte) (uint32, uint32) {
+ hash := uint32(0)
+ for i := 0; i < len(sep); i++ {
+ hash = hash*primeRK + uint32(sep[i])
+ }
+ var pow, sq uint32 = 1, primeRK
+ for i := len(sep); i > 0; i >>= 1 {
+ if i&1 != 0 {
+ pow *= sq
+ }
+ sq *= sq
+ }
+ return hash, pow
+}
diff --git a/libgo/go/bytes/bytes_test.go b/libgo/go/bytes/bytes_test.go
index c48f662..26eac5e 100644
--- a/libgo/go/bytes/bytes_test.go
+++ b/libgo/go/bytes/bytes_test.go
@@ -7,8 +7,10 @@ package bytes_test
import (
. "bytes"
"fmt"
+ "internal/testenv"
"math/rand"
"reflect"
+ "strings"
"testing"
"unicode"
"unicode/utf8"
@@ -165,8 +167,12 @@ var indexAnyTests = []BinOpTest{
{"abc", "xyz", -1},
{"abc", "xcz", 2},
{"ab☺c", "x☺yz", 2},
+ {"a☺b☻c☹d", "cx", len("a☺b☻")},
+ {"a☺b☻c☹d", "uvw☻xyz", len("a☺b")},
{"aRegExp*", ".(|)*+?^$[]", 7},
{dots + dots + dots, " ", -1},
+ {"012abcba210", "\xffb", 4},
+ {"012\x80bcb\x80210", "\xffb", 3},
}
var lastIndexAnyTests = []BinOpTest{
@@ -178,18 +184,13 @@ var lastIndexAnyTests = []BinOpTest{
{"aaa", "a", 2},
{"abc", "xyz", -1},
{"abc", "ab", 1},
- {"a☺b☻c☹d", "uvw☻xyz", 2 + len("☺")},
+ {"ab☺c", "x☺yz", 2},
+ {"a☺b☻c☹d", "cx", len("a☺b☻")},
+ {"a☺b☻c☹d", "uvw☻xyz", len("a☺b")},
{"a.RegExp*", ".(|)*+?^$[]", 8},
{dots + dots + dots, " ", -1},
-}
-
-var indexRuneTests = []BinOpTest{
- {"", "a", -1},
- {"", "☺", -1},
- {"foo", "☹", -1},
- {"foo", "o", 1},
- {"foo☺bar", "☺", 3},
- {"foo☺☻☹bar", "☹", 9},
+ {"012abcba210", "\xffb", 6},
+ {"012\x80bcb\x80210", "\xffb", 7},
}
// Execute f on each test case. funcName should be the name of f; it's used
@@ -346,13 +347,52 @@ func TestIndexByteSmall(t *testing.T) {
}
func TestIndexRune(t *testing.T) {
- for _, tt := range indexRuneTests {
- a := []byte(tt.a)
- r, _ := utf8.DecodeRuneInString(tt.b)
- pos := IndexRune(a, r)
- if pos != tt.i {
- t.Errorf(`IndexRune(%q, '%c') = %v`, tt.a, r, pos)
+ tests := []struct {
+ in string
+ rune rune
+ want int
+ }{
+ {"", 'a', -1},
+ {"", '☺', -1},
+ {"foo", '☹', -1},
+ {"foo", 'o', 1},
+ {"foo☺bar", '☺', 3},
+ {"foo☺☻☹bar", '☹', 9},
+ {"a A x", 'A', 2},
+ {"some_text=some_value", '=', 9},
+ {"☺a", 'a', 3},
+ {"a☻☺b", '☺', 4},
+
+ // RuneError should match any invalid UTF-8 byte sequence.
+ {"�", '�', 0},
+ {"\xff", '�', 0},
+ {"☻x�", '�', len("☻x")},
+ {"☻x\xe2\x98", '�', len("☻x")},
+ {"☻x\xe2\x98�", '�', len("☻x")},
+ {"☻x\xe2\x98x", '�', len("☻x")},
+
+ // Invalid rune values should never match.
+ {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", -1, -1},
+ {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", 0xD800, -1}, // Surrogate pair
+ {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", utf8.MaxRune + 1, -1},
+ }
+ for _, tt := range tests {
+ if got := IndexRune([]byte(tt.in), tt.rune); got != tt.want {
+ t.Errorf("IndexRune(%q, %d) = %v; want %v", tt.in, tt.rune, got, tt.want)
+ }
+ }
+
+ haystack := []byte("test世界")
+ allocs := testing.AllocsPerRun(1000, func() {
+ if i := IndexRune(haystack, 's'); i != 2 {
+ t.Fatalf("'s' at %d; want 2", i)
+ }
+ if i := IndexRune(haystack, '世'); i != 4 {
+ t.Fatalf("'世' at %d; want 4", i)
}
+ })
+ if allocs != 0 {
+ t.Errorf("expected no allocations, got %f", allocs)
}
}
@@ -370,6 +410,9 @@ func valName(x int) string {
func benchBytes(b *testing.B, sizes []int, f func(b *testing.B, n int)) {
for _, n := range sizes {
+ if isRaceBuilder && n > 4<<10 {
+ continue
+ }
b.Run(valName(n), func(b *testing.B) {
if len(bmbuf) < n {
bmbuf = make([]byte, n)
@@ -382,6 +425,8 @@ func benchBytes(b *testing.B, sizes []int, f func(b *testing.B, n int)) {
var indexSizes = []int{10, 32, 4 << 10, 4 << 20, 64 << 20}
+var isRaceBuilder = strings.HasSuffix(testenv.Builder(), "-race")
+
func BenchmarkIndexByte(b *testing.B) {
benchBytes(b, indexSizes, bmIndexByte(IndexByte))
}
@@ -404,6 +449,44 @@ func bmIndexByte(index func([]byte, byte) int) func(b *testing.B, n int) {
}
}
+func BenchmarkIndexRune(b *testing.B) {
+ benchBytes(b, indexSizes, bmIndexRune(IndexRune))
+}
+
+func BenchmarkIndexRuneASCII(b *testing.B) {
+ benchBytes(b, indexSizes, bmIndexRuneASCII(IndexRune))
+}
+
+func bmIndexRuneASCII(index func([]byte, rune) int) func(b *testing.B, n int) {
+ return func(b *testing.B, n int) {
+ buf := bmbuf[0:n]
+ buf[n-1] = 'x'
+ for i := 0; i < b.N; i++ {
+ j := index(buf, 'x')
+ if j != n-1 {
+ b.Fatal("bad index", j)
+ }
+ }
+ buf[n-1] = '\x00'
+ }
+}
+
+func bmIndexRune(index func([]byte, rune) int) func(b *testing.B, n int) {
+ return func(b *testing.B, n int) {
+ buf := bmbuf[0:n]
+ utf8.EncodeRune(buf[n-3:], '世')
+ for i := 0; i < b.N; i++ {
+ j := index(buf, '世')
+ if j != n-3 {
+ b.Fatal("bad index", j)
+ }
+ }
+ buf[n-3] = '\x00'
+ buf[n-2] = '\x00'
+ buf[n-1] = '\x00'
+ }
+}
+
func BenchmarkEqual(b *testing.B) {
b.Run("0", func(b *testing.B) {
var buf [4]byte
@@ -844,6 +927,54 @@ func TestRepeat(t *testing.T) {
}
}
+func repeat(b []byte, count int) (err error) {
+ defer func() {
+ if r := recover(); r != nil {
+ switch v := r.(type) {
+ case error:
+ err = v
+ default:
+ err = fmt.Errorf("%s", v)
+ }
+ }
+ }()
+
+ Repeat(b, count)
+
+ return
+}
+
+// See Issue golang.org/issue/16237
+func TestRepeatCatchesOverflow(t *testing.T) {
+ tests := [...]struct {
+ s string
+ count int
+ errStr string
+ }{
+ 0: {"--", -2147483647, "negative"},
+ 1: {"", int(^uint(0) >> 1), ""},
+ 2: {"-", 10, ""},
+ 3: {"gopher", 0, ""},
+ 4: {"-", -1, "negative"},
+ 5: {"--", -102, "negative"},
+ 6: {string(make([]byte, 255)), int((^uint(0))/255 + 1), "overflow"},
+ }
+
+ for i, tt := range tests {
+ err := repeat([]byte(tt.s), tt.count)
+ if tt.errStr == "" {
+ if err != nil {
+ t.Errorf("#%d panicked %v", i, err)
+ }
+ continue
+ }
+
+ if err == nil || !strings.Contains(err.Error(), tt.errStr) {
+ t.Errorf("#%d expected %q got %q", i, tt.errStr, err)
+ }
+ }
+}
+
func runesEqual(a, b []rune) bool {
if len(a) != len(b) {
return false
@@ -906,6 +1037,9 @@ var trimTests = []TrimTest{
{"Trim", "* listitem", " *", "listitem"},
{"Trim", `"quote"`, `"`, "quote"},
{"Trim", "\u2C6F\u2C6F\u0250\u0250\u2C6F\u2C6F", "\u2C6F", "\u0250\u0250"},
+ {"Trim", "\x80test\xff", "\xff", "test"},
+ {"Trim", " Ġ ", " ", "Ġ"},
+ {"Trim", " Ġİ0", "0 ", "Ġİ"},
//empty string tests
{"Trim", "abba", "", "abba"},
{"Trim", "", "123", ""},
@@ -1325,3 +1459,31 @@ func BenchmarkBytesCompare(b *testing.B) {
})
}
}
+
+func BenchmarkIndexAnyASCII(b *testing.B) {
+ x := Repeat([]byte{'#'}, 4096) // Never matches set
+ cs := "0123456789abcdef"
+ for k := 1; k <= 4096; k <<= 4 {
+ for j := 1; j <= 16; j <<= 1 {
+ b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ IndexAny(x[:k], cs[:j])
+ }
+ })
+ }
+ }
+}
+
+func BenchmarkTrimASCII(b *testing.B) {
+ cs := "0123456789abcdef"
+ for k := 1; k <= 4096; k <<= 4 {
+ for j := 1; j <= 16; j <<= 1 {
+ b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
+ x := Repeat([]byte(cs[:j]), k) // Always matches set
+ for i := 0; i < b.N; i++ {
+ Trim(x[:k], cs[:j])
+ }
+ })
+ }
+ }
+}
diff --git a/libgo/go/bytes/example_test.go b/libgo/go/bytes/example_test.go
index ad2dbc6..0d35a0d 100644
--- a/libgo/go/bytes/example_test.go
+++ b/libgo/go/bytes/example_test.go
@@ -11,6 +11,7 @@ import (
"io"
"os"
"sort"
+ "unicode"
)
func ExampleBuffer() {
@@ -83,3 +84,205 @@ func ExampleTrimPrefix() {
fmt.Printf("Hello%s", b)
// Output: Hello, world!
}
+
+func ExampleFields() {
+ fmt.Printf("Fields are: %q", bytes.Fields([]byte(" foo bar baz ")))
+ // Output: Fields are: ["foo" "bar" "baz"]
+}
+
+func ExampleFieldsFunc() {
+ f := func(c rune) bool {
+ return !unicode.IsLetter(c) && !unicode.IsNumber(c)
+ }
+ fmt.Printf("Fields are: %q", bytes.FieldsFunc([]byte(" foo1;bar2,baz3..."), f))
+ // Output: Fields are: ["foo1" "bar2" "baz3"]
+}
+
+func ExampleContains() {
+ fmt.Println(bytes.Contains([]byte("seafood"), []byte("foo")))
+ fmt.Println(bytes.Contains([]byte("seafood"), []byte("bar")))
+ fmt.Println(bytes.Contains([]byte("seafood"), []byte("")))
+ fmt.Println(bytes.Contains([]byte(""), []byte("")))
+ // Output:
+ // true
+ // false
+ // true
+ // true
+}
+
+func ExampleCount() {
+ fmt.Println(bytes.Count([]byte("cheese"), []byte("e")))
+ fmt.Println(bytes.Count([]byte("five"), []byte(""))) // before & after each rune
+ // Output:
+ // 3
+ // 5
+}
+
+func ExampleEqualFold() {
+ fmt.Println(bytes.EqualFold([]byte("Go"), []byte("go")))
+ // Output: true
+}
+
+func ExampleHasPrefix() {
+ fmt.Println(bytes.HasPrefix([]byte("Gopher"), []byte("Go")))
+ fmt.Println(bytes.HasPrefix([]byte("Gopher"), []byte("C")))
+ fmt.Println(bytes.HasPrefix([]byte("Gopher"), []byte("")))
+ // Output:
+ // true
+ // false
+ // true
+}
+
+func ExampleHasSuffix() {
+ fmt.Println(bytes.HasSuffix([]byte("Amigo"), []byte("go")))
+ fmt.Println(bytes.HasSuffix([]byte("Amigo"), []byte("O")))
+ fmt.Println(bytes.HasSuffix([]byte("Amigo"), []byte("Ami")))
+ fmt.Println(bytes.HasSuffix([]byte("Amigo"), []byte("")))
+ // Output:
+ // true
+ // false
+ // false
+ // true
+}
+
+func ExampleIndex() {
+ fmt.Println(bytes.Index([]byte("chicken"), []byte("ken")))
+ fmt.Println(bytes.Index([]byte("chicken"), []byte("dmr")))
+ // Output:
+ // 4
+ // -1
+}
+
+func ExampleIndexFunc() {
+ f := func(c rune) bool {
+ return unicode.Is(unicode.Han, c)
+ }
+ fmt.Println(bytes.IndexFunc([]byte("Hello, 世界"), f))
+ fmt.Println(bytes.IndexFunc([]byte("Hello, world"), f))
+ // Output:
+ // 7
+ // -1
+}
+
+func ExampleIndexAny() {
+ fmt.Println(bytes.IndexAny([]byte("chicken"), "aeiouy"))
+ fmt.Println(bytes.IndexAny([]byte("crwth"), "aeiouy"))
+ // Output:
+ // 2
+ // -1
+}
+
+func ExampleIndexRune() {
+ fmt.Println(bytes.IndexRune([]byte("chicken"), 'k'))
+ fmt.Println(bytes.IndexRune([]byte("chicken"), 'd'))
+ // Output:
+ // 4
+ // -1
+}
+
+func ExampleLastIndex() {
+ fmt.Println(bytes.Index([]byte("go gopher"), []byte("go")))
+ fmt.Println(bytes.LastIndex([]byte("go gopher"), []byte("go")))
+ fmt.Println(bytes.LastIndex([]byte("go gopher"), []byte("rodent")))
+ // Output:
+ // 0
+ // 3
+ // -1
+}
+
+func ExampleJoin() {
+ s := [][]byte{[]byte("foo"), []byte("bar"), []byte("baz")}
+ fmt.Printf("%s", bytes.Join(s, []byte(", ")))
+ // Output: foo, bar, baz
+}
+
+func ExampleRepeat() {
+ fmt.Printf("ba%s", bytes.Repeat([]byte("na"), 2))
+ // Output: banana
+}
+
+func ExampleReplace() {
+ fmt.Printf("%s\n", bytes.Replace([]byte("oink oink oink"), []byte("k"), []byte("ky"), 2))
+ fmt.Printf("%s\n", bytes.Replace([]byte("oink oink oink"), []byte("oink"), []byte("moo"), -1))
+ // Output:
+ // oinky oinky oink
+ // moo moo moo
+}
+
+func ExampleSplit() {
+ fmt.Printf("%q\n", bytes.Split([]byte("a,b,c"), []byte(",")))
+ fmt.Printf("%q\n", bytes.Split([]byte("a man a plan a canal panama"), []byte("a ")))
+ fmt.Printf("%q\n", bytes.Split([]byte(" xyz "), []byte("")))
+ fmt.Printf("%q\n", bytes.Split([]byte(""), []byte("Bernardo O'Higgins")))
+ // Output:
+ // ["a" "b" "c"]
+ // ["" "man " "plan " "canal panama"]
+ // [" " "x" "y" "z" " "]
+ // [""]
+}
+
+func ExampleSplitN() {
+ fmt.Printf("%q\n", bytes.SplitN([]byte("a,b,c"), []byte(","), 2))
+ z := bytes.SplitN([]byte("a,b,c"), []byte(","), 0)
+ fmt.Printf("%q (nil = %v)\n", z, z == nil)
+ // Output:
+ // ["a" "b,c"]
+ // [] (nil = true)
+}
+
+func ExampleSplitAfter() {
+ fmt.Printf("%q\n", bytes.SplitAfter([]byte("a,b,c"), []byte(",")))
+ // Output: ["a," "b," "c"]
+}
+
+func ExampleSplitAfterN() {
+ fmt.Printf("%q\n", bytes.SplitAfterN([]byte("a,b,c"), []byte(","), 2))
+ // Output: ["a," "b,c"]
+}
+
+func ExampleTitle() {
+ fmt.Printf("%s", bytes.Title([]byte("her royal highness")))
+ // Output: Her Royal Highness
+}
+
+func ExampleToTitle() {
+ fmt.Printf("%s\n", bytes.ToTitle([]byte("loud noises")))
+ fmt.Printf("%s\n", bytes.ToTitle([]byte("хлеб")))
+ // Output:
+ // LOUD NOISES
+ // ХЛЕБ
+}
+
+func ExampleTrim() {
+ fmt.Printf("[%q]", bytes.Trim([]byte(" !!! Achtung! Achtung! !!! "), "! "))
+ // Output: ["Achtung! Achtung"]
+}
+
+func ExampleMap() {
+ rot13 := func(r rune) rune {
+ switch {
+ case r >= 'A' && r <= 'Z':
+ return 'A' + (r-'A'+13)%26
+ case r >= 'a' && r <= 'z':
+ return 'a' + (r-'a'+13)%26
+ }
+ return r
+ }
+ fmt.Printf("%s", bytes.Map(rot13, []byte("'Twas brillig and the slithy gopher...")))
+ // Output: 'Gjnf oevyyvt naq gur fyvgul tbcure...
+}
+
+func ExampleTrimSpace() {
+ fmt.Printf("%s", bytes.TrimSpace([]byte(" \t\n a lone gopher \n\t\r\n")))
+ // Output: a lone gopher
+}
+
+func ExampleToUpper() {
+ fmt.Printf("%s", bytes.ToUpper([]byte("Gopher")))
+ // Output: GOPHER
+}
+
+func ExampleToLower() {
+ fmt.Printf("%s", bytes.ToLower([]byte("Gopher")))
+ // Output: gopher
+}