diff options
author | Ian Lance Taylor <iant@golang.org> | 2018-01-09 01:23:08 +0000 |
---|---|---|
committer | Ian Lance Taylor <ian@gcc.gnu.org> | 2018-01-09 01:23:08 +0000 |
commit | 1a2f01efa63036a5104f203a4789e682c0e0915d (patch) | |
tree | 373e15778dc8295354584e1f86915ae493b604ff /libgo/go/bytes | |
parent | 8799df67f2dab88f9fda11739c501780a85575e2 (diff) | |
download | gcc-1a2f01efa63036a5104f203a4789e682c0e0915d.zip gcc-1a2f01efa63036a5104f203a4789e682c0e0915d.tar.gz gcc-1a2f01efa63036a5104f203a4789e682c0e0915d.tar.bz2 |
libgo: update to Go1.10beta1
Update the Go library to the 1.10beta1 release.
Requires a few changes to the compiler for modifications to the map
runtime code, and to handle some nowritebarrier cases in the runtime.
Reviewed-on: https://go-review.googlesource.com/86455
gotools/:
* Makefile.am (go_cmd_vet_files): New variable.
(go_cmd_buildid_files, go_cmd_test2json_files): New variables.
(s-zdefaultcc): Change from constants to functions.
(noinst_PROGRAMS): Add vet, buildid, and test2json.
(cgo$(EXEEXT)): Link against $(LIBGOTOOL).
(vet$(EXEEXT)): New target.
(buildid$(EXEEXT)): New target.
(test2json$(EXEEXT)): New target.
(install-exec-local): Install all $(noinst_PROGRAMS).
(uninstall-local): Uninstasll all $(noinst_PROGRAMS).
(check-go-tool): Depend on $(noinst_PROGRAMS). Copy down
objabi.go.
(check-runtime): Depend on $(noinst_PROGRAMS).
(check-cgo-test, check-carchive-test): Likewise.
(check-vet): New target.
(check): Depend on check-vet. Look at cmd_vet-testlog.
(.PHONY): Add check-vet.
* Makefile.in: Rebuild.
From-SVN: r256365
Diffstat (limited to 'libgo/go/bytes')
-rw-r--r-- | libgo/go/bytes/boundary_test.go | 84 | ||||
-rw-r--r-- | libgo/go/bytes/buffer.go | 94 | ||||
-rw-r--r-- | libgo/go/bytes/buffer_test.go | 114 | ||||
-rw-r--r-- | libgo/go/bytes/bytes.go | 292 | ||||
-rw-r--r-- | libgo/go/bytes/bytes_amd64.go | 42 | ||||
-rw-r--r-- | libgo/go/bytes/bytes_arm64.go | 70 | ||||
-rw-r--r-- | libgo/go/bytes/bytes_generic.go | 40 | ||||
-rw-r--r-- | libgo/go/bytes/bytes_s390x.go | 42 | ||||
-rw-r--r-- | libgo/go/bytes/bytes_test.go | 133 | ||||
-rw-r--r-- | libgo/go/bytes/equal_test.go | 47 | ||||
-rw-r--r-- | libgo/go/bytes/example_test.go | 141 | ||||
-rw-r--r-- | libgo/go/bytes/reader.go | 6 | ||||
-rw-r--r-- | libgo/go/bytes/reader_test.go | 4 |
13 files changed, 769 insertions, 340 deletions
diff --git a/libgo/go/bytes/boundary_test.go b/libgo/go/bytes/boundary_test.go new file mode 100644 index 0000000..ea84f1e --- /dev/null +++ b/libgo/go/bytes/boundary_test.go @@ -0,0 +1,84 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// +// +build linux + +package bytes_test + +import ( + . "bytes" + "syscall" + "testing" +) + +// This file tests the situation where byte operations are checking +// data very near to a page boundary. We want to make sure those +// operations do not read across the boundary and cause a page +// fault where they shouldn't. + +// These tests run only on linux. The code being tested is +// not OS-specific, so it does not need to be tested on all +// operating systems. + +// dangerousSlice returns a slice which is immediately +// preceded and followed by a faulting page. +func dangerousSlice(t *testing.T) []byte { + pagesize := syscall.Getpagesize() + b, err := syscall.Mmap(0, 0, 3*pagesize, syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_ANONYMOUS|syscall.MAP_PRIVATE) + if err != nil { + t.Fatalf("mmap failed %s", err) + } + err = syscall.Mprotect(b[:pagesize], syscall.PROT_NONE) + if err != nil { + t.Fatalf("mprotect low failed %s\n", err) + } + err = syscall.Mprotect(b[2*pagesize:], syscall.PROT_NONE) + if err != nil { + t.Fatalf("mprotect high failed %s\n", err) + } + return b[pagesize : 2*pagesize] +} + +func TestEqualNearPageBoundary(t *testing.T) { + t.Parallel() + b := dangerousSlice(t) + for i := range b { + b[i] = 'A' + } + for i := 0; i <= len(b); i++ { + Equal(b[:i], b[len(b)-i:]) + Equal(b[len(b)-i:], b[:i]) + } +} + +func TestIndexByteNearPageBoundary(t *testing.T) { + t.Parallel() + b := dangerousSlice(t) + for i := range b { + idx := IndexByte(b[i:], 1) + if idx != -1 { + t.Fatalf("IndexByte(b[%d:])=%d, want -1\n", i, idx) + } + } +} + +func TestIndexNearPageBoundary(t *testing.T) { + t.Parallel() + var q [64]byte + b := dangerousSlice(t) + if len(b) > 256 { + // Only worry about when we're near the end of a page. + b = b[len(b)-256:] + } + for j := 1; j < len(q); j++ { + q[j-1] = 1 // difference is only found on the last byte + for i := range b { + idx := Index(b[i:], q[:j]) + if idx != -1 { + t.Fatalf("Index(b[%d:], q[:%d])=%d, want -1\n", i, j, idx) + } + } + q[j-1] = 0 + } +} diff --git a/libgo/go/bytes/buffer.go b/libgo/go/bytes/buffer.go index 20e42bb..dc9d5e9 100644 --- a/libgo/go/bytes/buffer.go +++ b/libgo/go/bytes/buffer.go @@ -15,34 +15,37 @@ import ( // A Buffer is a variable-sized buffer of bytes with Read and Write methods. // The zero value for Buffer is an empty buffer ready to use. type Buffer struct { - buf []byte // contents are the bytes buf[off : len(buf)] - off int // read at &buf[off], write at &buf[len(buf)] - lastRead readOp // last read operation, so that Unread* can work correctly. - // FIXME: lastRead can fit in a single byte + buf []byte // contents are the bytes buf[off : len(buf)] + off int // read at &buf[off], write at &buf[len(buf)] + bootstrap [64]byte // memory to hold first slice; helps small buffers avoid allocation. + lastRead readOp // last read operation, so that Unread* can work correctly. - // memory to hold first slice; helps small buffers avoid allocation. // FIXME: it would be advisable to align Buffer to cachelines to avoid false // sharing. - bootstrap [64]byte } // The readOp constants describe the last action performed on // the buffer, so that UnreadRune and UnreadByte can check for // invalid usage. opReadRuneX constants are chosen such that // converted to int they correspond to the rune size that was read. -type readOp int +type readOp int8 +// Don't use iota for these, as the values need to correspond with the +// names and comments, which is easier to see when being explicit. const ( opRead readOp = -1 // Any other read operation. - opInvalid = 0 // Non-read operation. - opReadRune1 = 1 // Read rune of size 1. - opReadRune2 = 2 // Read rune of size 2. - opReadRune3 = 3 // Read rune of size 3. - opReadRune4 = 4 // Read rune of size 4. + opInvalid readOp = 0 // Non-read operation. + opReadRune1 readOp = 1 // Read rune of size 1. + opReadRune2 readOp = 2 // Read rune of size 2. + opReadRune3 readOp = 3 // Read rune of size 3. + opReadRune4 readOp = 4 // Read rune of size 4. ) // ErrTooLarge is passed to panic if memory cannot be allocated to store data in a buffer. var ErrTooLarge = errors.New("bytes.Buffer: too large") +var errNegativeRead = errors.New("bytes.Buffer: reader returned negative count from Read") + +const maxInt = int(^uint(0) >> 1) // Bytes returns a slice of length b.Len() holding the unread portion of the buffer. // The slice is valid for use only until the next buffer modification (that is, @@ -53,6 +56,8 @@ func (b *Buffer) Bytes() []byte { return b.buf[b.off:] } // String returns the contents of the unread portion of the buffer // as a string. If the Buffer is a nil pointer, it returns "<nil>". +// +// To build strings more efficiently, see the strings.Builder type. func (b *Buffer) String() string { if b == nil { // Special case, useful in debugging. @@ -61,6 +66,9 @@ func (b *Buffer) String() string { return string(b.buf[b.off:]) } +// empty returns whether the unread portion of the buffer is empty. +func (b *Buffer) empty() bool { return len(b.buf) <= b.off } + // Len returns the number of bytes of the unread portion of the buffer; // b.Len() == len(b.Bytes()). func (b *Buffer) Len() int { return len(b.buf) - b.off } @@ -81,7 +89,7 @@ func (b *Buffer) Truncate(n int) { if n < 0 || n > b.Len() { panic("bytes.Buffer: truncation out of range") } - b.buf = b.buf[0 : b.off+n] + b.buf = b.buf[:b.off+n] } // Reset resets the buffer to be empty, @@ -97,7 +105,7 @@ func (b *Buffer) Reset() { // internal buffer only needs to be resliced. // It returns the index where bytes should be written and whether it succeeded. func (b *Buffer) tryGrowByReslice(n int) (int, bool) { - if l := len(b.buf); l+n <= cap(b.buf) { + if l := len(b.buf); n <= cap(b.buf)-l { b.buf = b.buf[:l+n] return l, true } @@ -122,15 +130,18 @@ func (b *Buffer) grow(n int) int { b.buf = b.bootstrap[:n] return 0 } - if m+n <= cap(b.buf)/2 { + c := cap(b.buf) + if n <= c/2-m { // We can slide things down instead of allocating a new - // slice. We only need m+n <= cap(b.buf) to slide, but + // slice. We only need m+n <= c to slide, but // we instead let capacity get twice as large so we // don't spend all our time copying. - copy(b.buf[:], b.buf[b.off:]) + copy(b.buf, b.buf[b.off:]) + } else if c > maxInt-c-n { + panic(ErrTooLarge) } else { // Not enough space anywhere, we need to allocate. - buf := makeSlice(2*cap(b.buf) + n) + buf := makeSlice(2*c + n) copy(buf, b.buf[b.off:]) b.buf = buf } @@ -150,7 +161,7 @@ func (b *Buffer) Grow(n int) { panic("bytes.Buffer.Grow: negative count") } m := b.grow(n) - b.buf = b.buf[0:m] + b.buf = b.buf[:m] } // Write appends the contents of p to the buffer, growing the buffer as @@ -189,34 +200,22 @@ const MinRead = 512 // buffer becomes too large, ReadFrom will panic with ErrTooLarge. func (b *Buffer) ReadFrom(r io.Reader) (n int64, err error) { b.lastRead = opInvalid - // If buffer is empty, reset to recover space. - if b.off >= len(b.buf) { - b.Reset() - } for { - if free := cap(b.buf) - len(b.buf); free < MinRead { - // not enough space at end - newBuf := b.buf - if b.off+free < MinRead { - // not enough space using beginning of buffer; - // double buffer capacity - newBuf = makeSlice(2*cap(b.buf) + MinRead) - } - copy(newBuf, b.buf[b.off:]) - b.buf = newBuf[:len(b.buf)-b.off] - b.off = 0 + i := b.grow(MinRead) + m, e := r.Read(b.buf[i:cap(b.buf)]) + if m < 0 { + panic(errNegativeRead) } - m, e := r.Read(b.buf[len(b.buf):cap(b.buf)]) - b.buf = b.buf[0 : len(b.buf)+m] + + b.buf = b.buf[:i+m] n += int64(m) if e == io.EOF { - break + return n, nil // e is EOF, so return nil explicitly } if e != nil { return n, e } } - return n, nil // err is EOF, so return nil explicitly } // makeSlice allocates a slice of size n. If the allocation fails, it panics @@ -237,8 +236,7 @@ func makeSlice(n int) []byte { // encountered during the write is also returned. func (b *Buffer) WriteTo(w io.Writer) (n int64, err error) { b.lastRead = opInvalid - if b.off < len(b.buf) { - nBytes := b.Len() + if nBytes := b.Len(); nBytes > 0 { m, e := w.Write(b.buf[b.off:]) if m > nBytes { panic("bytes.Buffer.WriteTo: invalid Write count") @@ -256,7 +254,7 @@ func (b *Buffer) WriteTo(w io.Writer) (n int64, err error) { } // Buffer is now empty; reset. b.Reset() - return + return n, nil } // WriteByte appends the byte c to the buffer, growing the buffer as needed. @@ -298,11 +296,11 @@ func (b *Buffer) WriteRune(r rune) (n int, err error) { // otherwise it is nil. func (b *Buffer) Read(p []byte) (n int, err error) { b.lastRead = opInvalid - if b.off >= len(b.buf) { + if b.empty() { // Buffer is empty, reset to recover space. b.Reset() if len(p) == 0 { - return + return 0, nil } return 0, io.EOF } @@ -311,7 +309,7 @@ func (b *Buffer) Read(p []byte) (n int, err error) { if n > 0 { b.lastRead = opRead } - return + return n, nil } // Next returns a slice containing the next n bytes from the buffer, @@ -335,8 +333,7 @@ func (b *Buffer) Next(n int) []byte { // ReadByte reads and returns the next byte from the buffer. // If no byte is available, it returns error io.EOF. func (b *Buffer) ReadByte() (byte, error) { - b.lastRead = opInvalid - if b.off >= len(b.buf) { + if b.empty() { // Buffer is empty, reset to recover space. b.Reset() return 0, io.EOF @@ -353,8 +350,7 @@ func (b *Buffer) ReadByte() (byte, error) { // If the bytes are an erroneous UTF-8 encoding, it // consumes one byte and returns U+FFFD, 1. func (b *Buffer) ReadRune() (r rune, size int, err error) { - b.lastRead = opInvalid - if b.off >= len(b.buf) { + if b.empty() { // Buffer is empty, reset to recover space. b.Reset() return 0, 0, io.EOF @@ -413,7 +409,7 @@ func (b *Buffer) ReadBytes(delim byte) (line []byte, err error) { // return a copy of slice. The buffer's backing array may // be overwritten by later calls. line = append(line, slice...) - return + return line, err } // readSlice is like ReadBytes but returns a reference to internal buffer data. diff --git a/libgo/go/bytes/buffer_test.go b/libgo/go/bytes/buffer_test.go index ce2f01a..e4bbc12 100644 --- a/libgo/go/bytes/buffer_test.go +++ b/libgo/go/bytes/buffer_test.go @@ -6,25 +6,27 @@ package bytes_test import ( . "bytes" - "internal/testenv" "io" "math/rand" - "os/exec" "runtime" "testing" "unicode/utf8" ) -const N = 10000 // make this bigger for a larger (and slower) test -var data string // test data for write tests -var testBytes []byte // test data; same as data but as a slice. +const N = 10000 // make this bigger for a larger (and slower) test +var testString string // test data for write tests +var testBytes []byte // test data; same as testString but as a slice. + +type negativeReader struct{} + +func (r *negativeReader) Read([]byte) (int, error) { return -1, nil } func init() { testBytes = make([]byte, N) for i := 0; i < N; i++ { testBytes[i] = 'a' + byte(i%26) } - data = string(testBytes) + testString = string(testBytes) } // Verify that contents of buf match the string s. @@ -88,12 +90,12 @@ func fillBytes(t *testing.T, testname string, buf *Buffer, s string, n int, fub func TestNewBuffer(t *testing.T) { buf := NewBuffer(testBytes) - check(t, "NewBuffer", buf, data) + check(t, "NewBuffer", buf, testString) } func TestNewBufferString(t *testing.T) { - buf := NewBufferString(data) - check(t, "NewBufferString", buf, data) + buf := NewBufferString(testString) + check(t, "NewBufferString", buf, testString) } // Empty buf through repeated reads into fub. @@ -128,7 +130,7 @@ func TestBasicOperations(t *testing.T) { buf.Truncate(0) check(t, "TestBasicOperations (3)", &buf, "") - n, err := buf.Write([]byte(data[0:1])) + n, err := buf.Write(testBytes[0:1]) if n != 1 { t.Errorf("wrote 1 byte, but n == %d", n) } @@ -137,30 +139,30 @@ func TestBasicOperations(t *testing.T) { } check(t, "TestBasicOperations (4)", &buf, "a") - buf.WriteByte(data[1]) + buf.WriteByte(testString[1]) check(t, "TestBasicOperations (5)", &buf, "ab") - n, err = buf.Write([]byte(data[2:26])) + n, err = buf.Write(testBytes[2:26]) if n != 24 { - t.Errorf("wrote 25 bytes, but n == %d", n) + t.Errorf("wrote 24 bytes, but n == %d", n) } - check(t, "TestBasicOperations (6)", &buf, string(data[0:26])) + check(t, "TestBasicOperations (6)", &buf, testString[0:26]) buf.Truncate(26) - check(t, "TestBasicOperations (7)", &buf, string(data[0:26])) + check(t, "TestBasicOperations (7)", &buf, testString[0:26]) buf.Truncate(20) - check(t, "TestBasicOperations (8)", &buf, string(data[0:20])) + check(t, "TestBasicOperations (8)", &buf, testString[0:20]) - empty(t, "TestBasicOperations (9)", &buf, string(data[0:20]), make([]byte, 5)) + empty(t, "TestBasicOperations (9)", &buf, testString[0:20], make([]byte, 5)) empty(t, "TestBasicOperations (10)", &buf, "", make([]byte, 100)) - buf.WriteByte(data[1]) + buf.WriteByte(testString[1]) c, err := buf.ReadByte() if err != nil { t.Error("ReadByte unexpected eof") } - if c != data[1] { + if c != testString[1] { t.Errorf("ReadByte wrong value c=%v", c) } c, err = buf.ReadByte() @@ -177,8 +179,8 @@ func TestLargeStringWrites(t *testing.T) { limit = 9 } for i := 3; i < limit; i += 3 { - s := fillString(t, "TestLargeWrites (1)", &buf, "", 5, data) - empty(t, "TestLargeStringWrites (2)", &buf, s, make([]byte, len(data)/i)) + s := fillString(t, "TestLargeWrites (1)", &buf, "", 5, testString) + empty(t, "TestLargeStringWrites (2)", &buf, s, make([]byte, len(testString)/i)) } check(t, "TestLargeStringWrites (3)", &buf, "") } @@ -191,7 +193,7 @@ func TestLargeByteWrites(t *testing.T) { } for i := 3; i < limit; i += 3 { s := fillBytes(t, "TestLargeWrites (1)", &buf, "", 5, testBytes) - empty(t, "TestLargeByteWrites (2)", &buf, s, make([]byte, len(data)/i)) + empty(t, "TestLargeByteWrites (2)", &buf, s, make([]byte, len(testString)/i)) } check(t, "TestLargeByteWrites (3)", &buf, "") } @@ -199,8 +201,8 @@ func TestLargeByteWrites(t *testing.T) { func TestLargeStringReads(t *testing.T) { var buf Buffer for i := 3; i < 30; i += 3 { - s := fillString(t, "TestLargeReads (1)", &buf, "", 5, data[0:len(data)/i]) - empty(t, "TestLargeReads (2)", &buf, s, make([]byte, len(data))) + s := fillString(t, "TestLargeReads (1)", &buf, "", 5, testString[0:len(testString)/i]) + empty(t, "TestLargeReads (2)", &buf, s, make([]byte, len(testString))) } check(t, "TestLargeStringReads (3)", &buf, "") } @@ -209,7 +211,7 @@ func TestLargeByteReads(t *testing.T) { var buf Buffer for i := 3; i < 30; i += 3 { s := fillBytes(t, "TestLargeReads (1)", &buf, "", 5, testBytes[0:len(testBytes)/i]) - empty(t, "TestLargeReads (2)", &buf, s, make([]byte, len(data))) + empty(t, "TestLargeReads (2)", &buf, s, make([]byte, len(testString))) } check(t, "TestLargeByteReads (3)", &buf, "") } @@ -218,14 +220,14 @@ func TestMixedReadsAndWrites(t *testing.T) { var buf Buffer s := "" for i := 0; i < 50; i++ { - wlen := rand.Intn(len(data)) + wlen := rand.Intn(len(testString)) if i%2 == 0 { - s = fillString(t, "TestMixedReadsAndWrites (1)", &buf, s, 1, data[0:wlen]) + s = fillString(t, "TestMixedReadsAndWrites (1)", &buf, s, 1, testString[0:wlen]) } else { s = fillBytes(t, "TestMixedReadsAndWrites (1)", &buf, s, 1, testBytes[0:wlen]) } - rlen := rand.Intn(len(data)) + rlen := rand.Intn(len(testString)) fub := make([]byte, rlen) n, _ := buf.Read(fub) s = s[n:] @@ -263,17 +265,37 @@ func TestReadFrom(t *testing.T) { s := fillBytes(t, "TestReadFrom (1)", &buf, "", 5, testBytes[0:len(testBytes)/i]) var b Buffer b.ReadFrom(&buf) - empty(t, "TestReadFrom (2)", &b, s, make([]byte, len(data))) + empty(t, "TestReadFrom (2)", &b, s, make([]byte, len(testString))) } } +func TestReadFromNegativeReader(t *testing.T) { + var b Buffer + defer func() { + switch err := recover().(type) { + case nil: + t.Fatal("bytes.Buffer.ReadFrom didn't panic") + case error: + // this is the error string of errNegativeRead + wantError := "bytes.Buffer: reader returned negative count from Read" + if err.Error() != wantError { + t.Fatalf("recovered panic: got %v, want %v", err.Error(), wantError) + } + default: + t.Fatalf("unexpected panic value: %#v", err) + } + }() + + b.ReadFrom(new(negativeReader)) +} + func TestWriteTo(t *testing.T) { var buf Buffer for i := 3; i < 30; i += 3 { s := fillBytes(t, "TestWriteTo (1)", &buf, "", 5, testBytes[0:len(testBytes)/i]) var b Buffer buf.WriteTo(&b) - empty(t, "TestWriteTo (2)", &b, s, make([]byte, len(data))) + empty(t, "TestWriteTo (2)", &b, s, make([]byte, len(testString))) } } @@ -473,6 +495,18 @@ func TestGrow(t *testing.T) { } } +func TestGrowOverflow(t *testing.T) { + defer func() { + if err := recover(); err != ErrTooLarge { + t.Errorf("after too-large Grow, recover() = %v; want %v", err, ErrTooLarge) + } + }() + + buf := NewBuffer(make([]byte, 1)) + const maxInt = int(^uint(0) >> 1) + buf.Grow(maxInt) +} + // Was a bug: used to give EOF reading empty slice at EOF. func TestReadEmptyAtEOF(t *testing.T) { b := new(Buffer) @@ -548,26 +582,6 @@ func TestBufferGrowth(t *testing.T) { } } -// Test that tryGrowByReslice is inlined. -// Only execute on "linux-amd64" builder in order to avoid breakage. -func TestTryGrowByResliceInlined(t *testing.T) { - targetBuilder := "linux-amd64" - if testenv.Builder() != targetBuilder { - t.Skipf("%q gets executed on %q builder only", t.Name(), targetBuilder) - } - t.Parallel() - goBin := testenv.GoToolPath(t) - out, err := exec.Command(goBin, "tool", "nm", goBin).CombinedOutput() - if err != nil { - t.Fatalf("go tool nm: %v: %s", err, out) - } - // Verify this doesn't exist: - sym := "bytes.(*Buffer).tryGrowByReslice" - if Contains(out, []byte(sym)) { - t.Errorf("found symbol %q in cmd/go, but should be inlined", sym) - } -} - func BenchmarkWriteByte(b *testing.B) { const n = 4 << 10 b.SetBytes(n) diff --git a/libgo/go/bytes/bytes.go b/libgo/go/bytes/bytes.go index 7c878af..9af177f 100644 --- a/libgo/go/bytes/bytes.go +++ b/libgo/go/bytes/bytes.go @@ -39,7 +39,7 @@ func explode(s []byte, n int) [][]byte { break } _, size = utf8.DecodeRune(s) - a[na] = s[0:size] + a[na] = s[0:size:size] s = s[size:] na++ } @@ -68,12 +68,12 @@ func Contains(b, subslice []byte) bool { return Index(b, subslice) != -1 } -// ContainsAny reports whether any of the UTF-8-encoded Unicode code points in chars are within b. +// ContainsAny reports whether any of the UTF-8-encoded code points in chars are within b. func ContainsAny(b []byte, chars string) bool { return IndexAny(b, chars) >= 0 } -// ContainsRune reports whether the Unicode code point r is within b. +// ContainsRune reports whether the rune is contained in the UTF-8-encoded byte slice b. func ContainsRune(b []byte, r rune) bool { return IndexRune(b, r) >= 0 } @@ -112,7 +112,7 @@ func LastIndexByte(s []byte, c byte) int { return -1 } -// IndexRune interprets s as a sequence of UTF-8-encoded Unicode code points. +// IndexRune interprets s as a sequence of UTF-8-encoded code points. // It returns the byte index of the first occurrence in s of the given rune. // It returns -1 if rune is not present in s. // If r is utf8.RuneError, it returns the first instance of any @@ -144,29 +144,31 @@ func IndexRune(s []byte, r rune) int { // code points in chars. It returns -1 if chars is empty or if there is no code // point in common. func IndexAny(s []byte, chars string) int { - if len(chars) > 0 { - if len(s) > 8 { - if as, isASCII := makeASCIISet(chars); isASCII { - for i, c := range s { - if as.contains(c) { - return i - } + if chars == "" { + // Avoid scanning all of s. + return -1 + } + if len(s) > 8 { + if as, isASCII := makeASCIISet(chars); isASCII { + for i, c := range s { + if as.contains(c) { + return i } - return -1 } + return -1 } - var width int - for i := 0; i < len(s); i += width { - r := rune(s[i]) - if r < utf8.RuneSelf { - width = 1 - } else { - r, width = utf8.DecodeRune(s[i:]) - } - for _, ch := range chars { - if r == ch { - return i - } + } + var width int + for i := 0; i < len(s); i += width { + r := rune(s[i]) + if r < utf8.RuneSelf { + width = 1 + } else { + r, width = utf8.DecodeRune(s[i:]) + } + for _, ch := range chars { + if r == ch { + return i } } } @@ -178,24 +180,26 @@ func IndexAny(s []byte, chars string) int { // the Unicode code points in chars. It returns -1 if chars is empty or if // there is no code point in common. func LastIndexAny(s []byte, chars string) int { - if len(chars) > 0 { - if len(s) > 8 { - if as, isASCII := makeASCIISet(chars); isASCII { - for i := len(s) - 1; i >= 0; i-- { - if as.contains(s[i]) { - return i - } + if chars == "" { + // Avoid scanning all of s. + return -1 + } + if len(s) > 8 { + if as, isASCII := makeASCIISet(chars); isASCII { + for i := len(s) - 1; i >= 0; i-- { + if as.contains(s[i]) { + return i } - return -1 } + return -1 } - for i := len(s); i > 0; { - r, size := utf8.DecodeLastRune(s[:i]) - i -= size - for _, c := range chars { - if r == c { - return i - } + } + for i := len(s); i > 0; { + r, size := utf8.DecodeLastRune(s[:i]) + i -= size + for _, c := range chars { + if r == c { + return i } } } @@ -223,7 +227,7 @@ func genSplit(s, sep []byte, sepSave, n int) [][]byte { if m < 0 { break } - a[i] = s[:m+sepSave] + a[i] = s[: m+sepSave : m+sepSave] s = s[m+len(sep):] i++ } @@ -265,52 +269,112 @@ func SplitAfter(s, sep []byte) [][]byte { return genSplit(s, sep, len(sep), -1) } -// Fields splits the slice s around each instance of one or more consecutive white space -// characters, returning a slice of subslices of s or an empty list if s contains only white space. +var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1} + +// Fields interprets s as a sequence of UTF-8-encoded code points. +// It splits the slice s around each instance of one or more consecutive white space +// characters, as defined by unicode.IsSpace, returning a slice of subslices of s or an +// empty slice if s contains only white space. func Fields(s []byte) [][]byte { - return FieldsFunc(s, unicode.IsSpace) + // First count the fields. + // This is an exact count if s is ASCII, otherwise it is an approximation. + n := 0 + wasSpace := 1 + // setBits is used to track which bits are set in the bytes of s. + setBits := uint8(0) + for i := 0; i < len(s); i++ { + r := s[i] + setBits |= r + isSpace := int(asciiSpace[r]) + n += wasSpace & ^isSpace + wasSpace = isSpace + } + + if setBits >= utf8.RuneSelf { + // Some runes in the input slice are not ASCII. + return FieldsFunc(s, unicode.IsSpace) + } + + // ASCII fast path + a := make([][]byte, n) + na := 0 + fieldStart := 0 + i := 0 + // Skip spaces in the front of the input. + for i < len(s) && asciiSpace[s[i]] != 0 { + i++ + } + fieldStart = i + for i < len(s) { + if asciiSpace[s[i]] == 0 { + i++ + continue + } + a[na] = s[fieldStart:i:i] + na++ + i++ + // Skip spaces in between fields. + for i < len(s) && asciiSpace[s[i]] != 0 { + i++ + } + fieldStart = i + } + if fieldStart < len(s) { // Last field might end at EOF. + a[na] = s[fieldStart:len(s):len(s)] + } + return a } -// FieldsFunc interprets s as a sequence of UTF-8-encoded Unicode code points. +// FieldsFunc interprets s as a sequence of UTF-8-encoded code points. // It splits the slice s at each run of code points c satisfying f(c) and // returns a slice of subslices of s. If all code points in s satisfy f(c), or // len(s) == 0, an empty slice is returned. // FieldsFunc makes no guarantees about the order in which it calls f(c). // If f does not return consistent results for a given c, FieldsFunc may crash. func FieldsFunc(s []byte, f func(rune) bool) [][]byte { - n := 0 - inField := false - for i := 0; i < len(s); { - r, size := utf8.DecodeRune(s[i:]) - wasInField := inField - inField = !f(r) - if inField && !wasInField { - n++ - } - i += size + // A span is used to record a slice of s of the form s[start:end]. + // The start index is inclusive and the end index is exclusive. + type span struct { + start int + end int } + spans := make([]span, 0, 32) - a := make([][]byte, n) - na := 0 - fieldStart := -1 - for i := 0; i <= len(s) && na < n; { - r, size := utf8.DecodeRune(s[i:]) - if fieldStart < 0 && size > 0 && !f(r) { - fieldStart = i - i += size - continue - } - if fieldStart >= 0 && (size == 0 || f(r)) { - a[na] = s[fieldStart:i] - na++ - fieldStart = -1 + // Find the field start and end indices. + wasField := false + fromIndex := 0 + for i := 0; i < len(s); { + size := 1 + r := rune(s[i]) + if r >= utf8.RuneSelf { + r, size = utf8.DecodeRune(s[i:]) } - if size == 0 { - break + if f(r) { + if wasField { + spans = append(spans, span{start: fromIndex, end: i}) + wasField = false + } + } else { + if !wasField { + fromIndex = i + wasField = true + } } i += size } - return a[0:na] + + // Last field might end at EOF. + if wasField { + spans = append(spans, span{fromIndex, len(s)}) + } + + // Create subslices from recorded field indices. + a := make([][]byte, len(spans)) + for i, span := range spans { + a[i] = s[span.start:span.end:span.end] + } + + return a } // Join concatenates the elements of s to create a new byte slice. The separator @@ -349,8 +413,8 @@ func HasSuffix(s, suffix []byte) bool { // Map returns a copy of the byte slice s with all its characters modified // according to the mapping function. If mapping returns a negative value, the character is -// dropped from the string with no replacement. The characters in s and the -// output are interpreted as UTF-8-encoded Unicode code points. +// dropped from the byte slice with no replacement. The characters in s and the +// output are interpreted as UTF-8-encoded code points. func Map(mapping func(r rune) rune, s []byte) []byte { // In the worst case, the slice can grow when mapped, making // things unpleasant. But it's so rare we barge in assuming it's @@ -408,28 +472,28 @@ func Repeat(b []byte, count int) []byte { return nb } -// ToUpper returns a copy of the byte slice s with all Unicode letters mapped to their upper case. +// ToUpper treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters within it mapped to their upper case. func ToUpper(s []byte) []byte { return Map(unicode.ToUpper, s) } -// ToLower returns a copy of the byte slice s with all Unicode letters mapped to their lower case. +// ToLower treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their lower case. func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) } -// ToTitle returns a copy of the byte slice s with all Unicode letters mapped to their title case. +// ToTitle treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their title case. func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) } -// ToUpperSpecial returns a copy of the byte slice s with all Unicode letters mapped to their +// ToUpperSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their // upper case, giving priority to the special casing rules. func ToUpperSpecial(c unicode.SpecialCase, s []byte) []byte { return Map(func(r rune) rune { return c.ToUpper(r) }, s) } -// ToLowerSpecial returns a copy of the byte slice s with all Unicode letters mapped to their +// ToLowerSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their // lower case, giving priority to the special casing rules. func ToLowerSpecial(c unicode.SpecialCase, s []byte) []byte { return Map(func(r rune) rune { return c.ToLower(r) }, s) } -// ToTitleSpecial returns a copy of the byte slice s with all Unicode letters mapped to their +// ToTitleSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their // title case, giving priority to the special casing rules. func ToTitleSpecial(c unicode.SpecialCase, s []byte) []byte { return Map(func(r rune) rune { return c.ToTitle(r) }, s) @@ -460,8 +524,8 @@ func isSeparator(r rune) bool { return unicode.IsSpace(r) } -// Title returns a copy of s with all Unicode letters that begin words -// mapped to their title case. +// Title treats s as UTF-8-encoded bytes and returns a copy with all Unicode letters that begin +// words mapped to their title case. // // BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly. func Title(s []byte) []byte { @@ -481,8 +545,8 @@ func Title(s []byte) []byte { s) } -// TrimLeftFunc returns a subslice of s by slicing off all leading UTF-8-encoded -// Unicode code points c that satisfy f(c). +// TrimLeftFunc treats s as UTF-8-encoded bytes and returns a subslice of s by slicing off +// all leading UTF-8-encoded code points c that satisfy f(c). func TrimLeftFunc(s []byte, f func(r rune) bool) []byte { i := indexFunc(s, f, false) if i == -1 { @@ -491,8 +555,8 @@ func TrimLeftFunc(s []byte, f func(r rune) bool) []byte { return s[i:] } -// TrimRightFunc returns a subslice of s by slicing off all trailing UTF-8 -// encoded Unicode code points c that satisfy f(c). +// TrimRightFunc returns a subslice of s by slicing off all trailing +// UTF-8-encoded code points c that satisfy f(c). func TrimRightFunc(s []byte, f func(r rune) bool) []byte { i := lastIndexFunc(s, f, false) if i >= 0 && s[i] >= utf8.RuneSelf { @@ -505,7 +569,7 @@ func TrimRightFunc(s []byte, f func(r rune) bool) []byte { } // TrimFunc returns a subslice of s by slicing off all leading and trailing -// UTF-8-encoded Unicode code points c that satisfy f(c). +// UTF-8-encoded code points c that satisfy f(c). func TrimFunc(s []byte, f func(r rune) bool) []byte { return TrimRightFunc(TrimLeftFunc(s, f), f) } @@ -528,14 +592,14 @@ func TrimSuffix(s, suffix []byte) []byte { return s } -// IndexFunc interprets s as a sequence of UTF-8-encoded Unicode code points. +// IndexFunc interprets s as a sequence of UTF-8-encoded code points. // It returns the byte index in s of the first Unicode // code point satisfying f(c), or -1 if none do. func IndexFunc(s []byte, f func(r rune) bool) int { return indexFunc(s, f, true) } -// LastIndexFunc interprets s as a sequence of UTF-8-encoded Unicode code points. +// LastIndexFunc interprets s as a sequence of UTF-8-encoded code points. // It returns the byte index in s of the last Unicode // code point satisfying f(c), or -1 if none do. func LastIndexFunc(s []byte, f func(r rune) bool) int { @@ -626,19 +690,19 @@ func makeCutsetFunc(cutset string) func(r rune) bool { } // Trim returns a subslice of s by slicing off all leading and -// trailing UTF-8-encoded Unicode code points contained in cutset. +// trailing UTF-8-encoded code points contained in cutset. func Trim(s []byte, cutset string) []byte { return TrimFunc(s, makeCutsetFunc(cutset)) } // TrimLeft returns a subslice of s by slicing off all leading -// UTF-8-encoded Unicode code points contained in cutset. +// UTF-8-encoded code points contained in cutset. func TrimLeft(s []byte, cutset string) []byte { return TrimLeftFunc(s, makeCutsetFunc(cutset)) } // TrimRight returns a subslice of s by slicing off all trailing -// UTF-8-encoded Unicode code points that are contained in cutset. +// UTF-8-encoded code points that are contained in cutset. func TrimRight(s []byte, cutset string) []byte { return TrimRightFunc(s, makeCutsetFunc(cutset)) } @@ -649,7 +713,8 @@ func TrimSpace(s []byte) []byte { return TrimFunc(s, unicode.IsSpace) } -// Runes returns a slice of runes (Unicode code points) equivalent to s. +// Runes interprets s as a sequence of UTF-8-encoded code points. +// It returns a slice of runes (Unicode code points) equivalent to s. func Runes(s []byte) []rune { t := make([]rune, utf8.RuneCount(s)) i := 0 @@ -758,3 +823,46 @@ func EqualFold(s, t []byte) bool { // One string is empty. Are both? return len(s) == len(t) } + +func indexRabinKarp(s, sep []byte) int { + // Rabin-Karp search + hashsep, pow := hashStr(sep) + n := len(sep) + var h uint32 + for i := 0; i < n; i++ { + h = h*primeRK + uint32(s[i]) + } + if h == hashsep && Equal(s[:n], sep) { + return 0 + } + for i := n; i < len(s); { + h *= primeRK + h += uint32(s[i]) + h -= pow * uint32(s[i-n]) + i++ + if h == hashsep && Equal(s[i-n:i], sep) { + return i - n + } + } + return -1 +} + +// primeRK is the prime base used in Rabin-Karp algorithm. +const primeRK = 16777619 + +// hashStr returns the hash and the appropriate multiplicative +// factor for use in Rabin-Karp algorithm. +func hashStr(sep []byte) (uint32, uint32) { + hash := uint32(0) + for i := 0; i < len(sep); i++ { + hash = hash*primeRK + uint32(sep[i]) + } + var pow, sq uint32 = 1, primeRK + for i := len(sep); i > 0; i >>= 1 { + if i&1 != 0 { + pow *= sq + } + sq *= sq + } + return hash, pow +} diff --git a/libgo/go/bytes/bytes_amd64.go b/libgo/go/bytes/bytes_amd64.go index d40c744..2fbbbb0 100644 --- a/libgo/go/bytes/bytes_amd64.go +++ b/libgo/go/bytes/bytes_amd64.go @@ -77,52 +77,14 @@ func Index(s, sep []byte) int { } return -1 } - // Rabin-Karp search - hashsep, pow := hashStr(sep) - var h uint32 - for i := 0; i < n; i++ { - h = h*primeRK + uint32(s[i]) - } - if h == hashsep && Equal(s[:n], sep) { - return 0 - } - for i := n; i < len(s); { - h *= primeRK - h += uint32(s[i]) - h -= pow * uint32(s[i-n]) - i++ - if h == hashsep && Equal(s[i-n:i], sep) { - return i - n - } - } - return -1 + return indexRabinKarp(s, sep) } // Count counts the number of non-overlapping instances of sep in s. -// If sep is an empty slice, Count returns 1 + the number of Unicode code points in s. +// If sep is an empty slice, Count returns 1 + the number of UTF-8-encoded code points in s. func Count(s, sep []byte) int { if len(sep) == 1 && cpu.X86.HasPOPCNT { return countByte(s, sep[0]) } return countGeneric(s, sep) } - -// primeRK is the prime base used in Rabin-Karp algorithm. -const primeRK = 16777619 - -// hashStr returns the hash and the appropriate multiplicative -// factor for use in Rabin-Karp algorithm. -func hashStr(sep []byte) (uint32, uint32) { - hash := uint32(0) - for i := 0; i < len(sep); i++ { - hash = hash*primeRK + uint32(sep[i]) - } - var pow, sq uint32 = 1, primeRK - for i := len(sep); i > 0; i >>= 1 { - if i&1 != 0 { - pow *= sq - } - sq *= sq - } - return hash, pow -} diff --git a/libgo/go/bytes/bytes_arm64.go b/libgo/go/bytes/bytes_arm64.go new file mode 100644 index 0000000..1213b06 --- /dev/null +++ b/libgo/go/bytes/bytes_arm64.go @@ -0,0 +1,70 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +package bytes + +func countByte(s []byte, c byte) int // bytes_arm64.s + +// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s. +func Index(s, sep []byte) int { + n := len(sep) + switch { + case n == 0: + return 0 + case n == 1: + return IndexByte(s, sep[0]) + case n == len(s): + if Equal(sep, s) { + return 0 + } + return -1 + case n > len(s): + return -1 + } + c := sep[0] + i := 0 + fails := 0 + t := s[:len(s)-n+1] + for i < len(t) { + if t[i] != c { + o := IndexByte(t[i:], c) + if o < 0 { + break + } + i += o + } + if Equal(s[i:i+n], sep) { + return i + } + i++ + fails++ + if fails >= 4+i>>4 && i < len(t) { + // Give up on IndexByte, it isn't skipping ahead + // far enough to be better than Rabin-Karp. + // Experiments (using IndexPeriodic) suggest + // the cutover is about 16 byte skips. + // TODO: if large prefixes of sep are matching + // we should cutover at even larger average skips, + // because Equal becomes that much more expensive. + // This code does not take that effect into account. + j := indexRabinKarp(s[i:], sep) + if j < 0 { + return -1 + } + return i + j + } + } + return -1 +} + +// Count counts the number of non-overlapping instances of sep in s. +// If sep is an empty slice, Count returns 1 + the number of UTF-8-encoded code points in s. +func Count(s, sep []byte) int { + if len(sep) == 1 { + return countByte(s, sep[0]) + } + return countGeneric(s, sep) +} diff --git a/libgo/go/bytes/bytes_generic.go b/libgo/go/bytes/bytes_generic.go index 75a9c36..b52d939 100644 --- a/libgo/go/bytes/bytes_generic.go +++ b/libgo/go/bytes/bytes_generic.go @@ -2,27 +2,29 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// -build !amd64,!s390x +// -build !amd64,!s390x,!arm64 package bytes -// TODO: implements short string optimization on non amd64 platforms -// and get rid of bytes_amd64.go - // Index returns the index of the first instance of sep in s, or -1 if sep is not present in s. func Index(s, sep []byte) int { n := len(sep) - if n == 0 { + switch { + case n == 0: return 0 - } - if n > len(s) { + case n == 1: + return IndexByte(s, sep[0]) + case n == len(s): + if Equal(sep, s) { + return 0 + } + return -1 + case n > len(s): return -1 } c := sep[0] - if n == 1 { - return IndexByte(s, c) - } i := 0 + fails := 0 t := s[:len(s)-n+1] for i < len(t) { if t[i] != c { @@ -36,12 +38,28 @@ func Index(s, sep []byte) int { return i } i++ + fails++ + if fails >= 4+i>>4 && i < len(t) { + // Give up on IndexByte, it isn't skipping ahead + // far enough to be better than Rabin-Karp. + // Experiments (using IndexPeriodic) suggest + // the cutover is about 16 byte skips. + // TODO: if large prefixes of sep are matching + // we should cutover at even larger average skips, + // because Equal becomes that much more expensive. + // This code does not take that effect into account. + j := indexRabinKarp(s[i:], sep) + if j < 0 { + return -1 + } + return i + j + } } return -1 } // Count counts the number of non-overlapping instances of sep in s. -// If sep is an empty slice, Count returns 1 + the number of Unicode code points in s. +// If sep is an empty slice, Count returns 1 + the number of UTF-8-encoded code points in s. func Count(s, sep []byte) int { return countGeneric(s, sep) } diff --git a/libgo/go/bytes/bytes_s390x.go b/libgo/go/bytes/bytes_s390x.go index 54e013e..0c22848 100644 --- a/libgo/go/bytes/bytes_s390x.go +++ b/libgo/go/bytes/bytes_s390x.go @@ -78,49 +78,11 @@ func Index(s, sep []byte) int { } return -1 } - // Rabin-Karp search - hashsep, pow := hashStr(sep) - var h uint32 - for i := 0; i < n; i++ { - h = h*primeRK + uint32(s[i]) - } - if h == hashsep && Equal(s[:n], sep) { - return 0 - } - for i := n; i < len(s); { - h *= primeRK - h += uint32(s[i]) - h -= pow * uint32(s[i-n]) - i++ - if h == hashsep && Equal(s[i-n:i], sep) { - return i - n - } - } - return -1 + return indexRabinKarp(s, sep) } // Count counts the number of non-overlapping instances of sep in s. -// If sep is an empty slice, Count returns 1 + the number of Unicode code points in s. +// If sep is an empty slice, Count returns 1 + the number of UTF-8-encoded code points in s. func Count(s, sep []byte) int { return countGeneric(s, sep) } - -// primeRK is the prime base used in Rabin-Karp algorithm. -const primeRK = 16777619 - -// hashStr returns the hash and the appropriate multiplicative -// factor for use in Rabin-Karp algorithm. -func hashStr(sep []byte) (uint32, uint32) { - hash := uint32(0) - for i := 0; i < len(sep); i++ { - hash = hash*primeRK + uint32(sep[i]) - } - var pow, sq uint32 = 1, primeRK - for i := len(sep); i > 0; i >>= 1 { - if i&1 != 0 { - pow *= sq - } - sq *= sq - } - return hash, pow -} diff --git a/libgo/go/bytes/bytes_test.go b/libgo/go/bytes/bytes_test.go index d571eb3..23fce29 100644 --- a/libgo/go/bytes/bytes_test.go +++ b/libgo/go/bytes/bytes_test.go @@ -140,6 +140,9 @@ var indexTests = []BinOpTest{ {"barfoobarfooyyyzzzyyyzzzyyyzzzyyyxxxzzzyyy", "x", 33}, {"foofyfoobarfoobar", "y", 4}, {"oooooooooooooooooooooo", "r", -1}, + // test fallback to Rabin-Karp. + {"oxoxoxoxoxoxoxoxoxoxoxoy", "oy", 22}, + {"oxoxoxoxoxoxoxoxoxoxoxox", "oy", -1}, } var lastIndexTests = []BinOpTest{ @@ -741,6 +744,13 @@ var splittests = []SplitTest{ func TestSplit(t *testing.T) { for _, tt := range splittests { a := SplitN([]byte(tt.s), []byte(tt.sep), tt.n) + + // Appending to the results should not change future results. + var x []byte + for _, v := range a { + x = append(v, 'z') + } + result := sliceOfString(a) if !eq(result, tt.a) { t.Errorf(`Split(%q, %q, %d) = %v; want %v`, tt.s, tt.sep, tt.n, result, tt.a) @@ -749,6 +759,11 @@ func TestSplit(t *testing.T) { if tt.n == 0 { continue } + + if want := tt.a[len(tt.a)-1] + "z"; string(x) != want { + t.Errorf("last appended result was %s; want %s", x, want) + } + s := Join(a, []byte(tt.sep)) if string(s) != tt.s { t.Errorf(`Join(Split(%q, %q, %d), %q) = %q`, tt.s, tt.sep, tt.n, tt.sep, s) @@ -787,11 +802,23 @@ var splitaftertests = []SplitTest{ func TestSplitAfter(t *testing.T) { for _, tt := range splitaftertests { a := SplitAfterN([]byte(tt.s), []byte(tt.sep), tt.n) + + // Appending to the results should not change future results. + var x []byte + for _, v := range a { + x = append(v, 'z') + } + result := sliceOfString(a) if !eq(result, tt.a) { t.Errorf(`Split(%q, %q, %d) = %v; want %v`, tt.s, tt.sep, tt.n, result, tt.a) continue } + + if want := tt.a[len(tt.a)-1] + "z"; string(x) != want { + t.Errorf("last appended result was %s; want %s", x, want) + } + s := Join(a, nil) if string(s) != tt.s { t.Errorf(`Join(Split(%q, %q, %d), %q) = %q`, tt.s, tt.sep, tt.n, tt.sep, s) @@ -826,12 +853,29 @@ var fieldstests = []FieldsTest{ func TestFields(t *testing.T) { for _, tt := range fieldstests { - a := Fields([]byte(tt.s)) + b := []byte(tt.s) + a := Fields(b) + + // Appending to the results should not change future results. + var x []byte + for _, v := range a { + x = append(v, 'z') + } + result := sliceOfString(a) if !eq(result, tt.a) { t.Errorf("Fields(%q) = %v; want %v", tt.s, a, tt.a) continue } + + if string(b) != tt.s { + t.Errorf("slice changed to %s; want %s", string(b), tt.s) + } + if len(tt.a) > 0 { + if want := tt.a[len(tt.a)-1] + "z"; string(x) != want { + t.Errorf("last appended result was %s; want %s", x, want) + } + } } } @@ -852,11 +896,28 @@ func TestFieldsFunc(t *testing.T) { {"aXXbXXXcX", []string{"a", "b", "c"}}, } for _, tt := range fieldsFuncTests { - a := FieldsFunc([]byte(tt.s), pred) + b := []byte(tt.s) + a := FieldsFunc(b, pred) + + // Appending to the results should not change future results. + var x []byte + for _, v := range a { + x = append(v, 'z') + } + result := sliceOfString(a) if !eq(result, tt.a) { t.Errorf("FieldsFunc(%q) = %v, want %v", tt.s, a, tt.a) } + + if string(b) != tt.s { + t.Errorf("slice changed to %s; want %s", b, tt.s) + } + if len(tt.a) > 0 { + if want := tt.a[len(tt.a)-1] + "z"; string(x) != want { + t.Errorf("last appended result was %s; want %s", x, want) + } + } } } @@ -1507,19 +1568,58 @@ var makeFieldsInput = func() []byte { return x } -var fieldsInput = makeFieldsInput() +var makeFieldsInputASCII = func() []byte { + x := make([]byte, 1<<20) + // Input is ~10% space, rest ASCII non-space. + for i := range x { + if rand.Intn(10) == 0 { + x[i] = ' ' + } else { + x[i] = 'x' + } + } + return x +} + +var bytesdata = []struct { + name string + data []byte +}{ + {"ASCII", makeFieldsInputASCII()}, + {"Mixed", makeFieldsInput()}, +} func BenchmarkFields(b *testing.B) { - b.SetBytes(int64(len(fieldsInput))) - for i := 0; i < b.N; i++ { - Fields(fieldsInput) + for _, sd := range bytesdata { + b.Run(sd.name, func(b *testing.B) { + for j := 1 << 4; j <= 1<<20; j <<= 4 { + b.Run(fmt.Sprintf("%d", j), func(b *testing.B) { + b.ReportAllocs() + b.SetBytes(int64(j)) + data := sd.data[:j] + for i := 0; i < b.N; i++ { + Fields(data) + } + }) + } + }) } } func BenchmarkFieldsFunc(b *testing.B) { - b.SetBytes(int64(len(fieldsInput))) - for i := 0; i < b.N; i++ { - FieldsFunc(fieldsInput, unicode.IsSpace) + for _, sd := range bytesdata { + b.Run(sd.name, func(b *testing.B) { + for j := 1 << 4; j <= 1<<20; j <<= 4 { + b.Run(fmt.Sprintf("%d", j), func(b *testing.B) { + b.ReportAllocs() + b.SetBytes(int64(j)) + data := sd.data[:j] + for i := 0; i < b.N; i++ { + FieldsFunc(data, unicode.IsSpace) + } + }) + } + }) } } @@ -1638,3 +1738,18 @@ func BenchmarkTrimASCII(b *testing.B) { } } } + +func BenchmarkIndexPeriodic(b *testing.B) { + key := []byte{1, 1} + for _, skip := range [...]int{2, 4, 8, 16, 32, 64} { + b.Run(fmt.Sprintf("IndexPeriodic%d", skip), func(b *testing.B) { + buf := make([]byte, 1<<16) + for i := 0; i < len(buf); i += skip { + buf[i] = 1 + } + for i := 0; i < b.N; i++ { + Index(buf, key) + } + }) + } +} diff --git a/libgo/go/bytes/equal_test.go b/libgo/go/bytes/equal_test.go deleted file mode 100644 index 9fdead8..0000000 --- a/libgo/go/bytes/equal_test.go +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. -// -// +build linux - -package bytes_test - -import ( - . "bytes" - "syscall" - "testing" - "unsafe" -) - -// This file tests the situation where memeq is checking -// data very near to a page boundary. We want to make sure -// equal does not read across the boundary and cause a page -// fault where it shouldn't. - -// This test runs only on linux. The code being tested is -// not OS-specific, so it does not need to be tested on all -// operating systems. - -func TestEqualNearPageBoundary(t *testing.T) { - pagesize := syscall.Getpagesize() - b := make([]byte, 4*pagesize) - i := pagesize - for ; uintptr(unsafe.Pointer(&b[i]))%uintptr(pagesize) != 0; i++ { - } - syscall.Mprotect(b[i-pagesize:i], 0) - syscall.Mprotect(b[i+pagesize:i+2*pagesize], 0) - defer syscall.Mprotect(b[i-pagesize:i], syscall.PROT_READ|syscall.PROT_WRITE) - defer syscall.Mprotect(b[i+pagesize:i+2*pagesize], syscall.PROT_READ|syscall.PROT_WRITE) - - // both of these should fault - //pagesize += int(b[i-1]) - //pagesize += int(b[i+pagesize]) - - for j := 0; j < pagesize; j++ { - b[i+j] = 'A' - } - for j := 0; j <= pagesize; j++ { - Equal(b[i:i+j], b[i+pagesize-j:i+pagesize]) - Equal(b[i+pagesize-j:i+pagesize], b[i:i+j]) - } -} diff --git a/libgo/go/bytes/example_test.go b/libgo/go/bytes/example_test.go index 9397277..5b7a460 100644 --- a/libgo/go/bytes/example_test.go +++ b/libgo/go/bytes/example_test.go @@ -119,6 +119,32 @@ func ExampleContains() { // true } +func ExampleContainsAny() { + fmt.Println(bytes.ContainsAny([]byte("I like seafood."), "fÄo!")) + fmt.Println(bytes.ContainsAny([]byte("I like seafood."), "去是伟大的.")) + fmt.Println(bytes.ContainsAny([]byte("I like seafood."), "")) + fmt.Println(bytes.ContainsAny([]byte(""), "")) + // Output: + // true + // true + // false + // false +} + +func ExampleContainsRune() { + fmt.Println(bytes.ContainsRune([]byte("I like seafood."), 'f')) + fmt.Println(bytes.ContainsRune([]byte("I like seafood."), 'ö')) + fmt.Println(bytes.ContainsRune([]byte("去是伟大的!"), '大')) + fmt.Println(bytes.ContainsRune([]byte("去是伟大的!"), '!')) + fmt.Println(bytes.ContainsRune([]byte(""), '@')) + // Output: + // true + // false + // true + // true + // false +} + func ExampleCount() { fmt.Println(bytes.Count([]byte("cheese"), []byte("e"))) fmt.Println(bytes.Count([]byte("five"), []byte(""))) // before & after each rune @@ -127,6 +153,14 @@ func ExampleCount() { // 5 } +func ExampleEqual() { + fmt.Println(bytes.Equal([]byte("Go"), []byte("Go"))) + fmt.Println(bytes.Equal([]byte("Go"), []byte("C++"))) + // Output: + // true + // false +} + func ExampleEqualFold() { fmt.Println(bytes.EqualFold([]byte("Go"), []byte("go"))) // Output: true @@ -162,6 +196,14 @@ func ExampleIndex() { // -1 } +func ExampleIndexByte() { + fmt.Println(bytes.IndexByte([]byte("chicken"), byte('k'))) + fmt.Println(bytes.IndexByte([]byte("chicken"), byte('g'))) + // Output: + // 4 + // -1 +} + func ExampleIndexFunc() { f := func(c rune) bool { return unicode.Is(unicode.Han, c) @@ -199,6 +241,36 @@ func ExampleLastIndex() { // -1 } +func ExampleLastIndexAny() { + fmt.Println(bytes.LastIndexAny([]byte("go gopher"), "MüQp")) + fmt.Println(bytes.LastIndexAny([]byte("go 地鼠"), "地大")) + fmt.Println(bytes.LastIndexAny([]byte("go gopher"), "z,!.")) + // Output: + // 5 + // 3 + // -1 +} + +func ExampleLastIndexByte() { + fmt.Println(bytes.LastIndexByte([]byte("go gopher"), byte('g'))) + fmt.Println(bytes.LastIndexByte([]byte("go gopher"), byte('r'))) + fmt.Println(bytes.LastIndexByte([]byte("go gopher"), byte('z'))) + // Output: + // 3 + // 8 + // -1 +} + +func ExampleLastIndexFunc() { + fmt.Println(bytes.LastIndexFunc([]byte("go gopher!"), unicode.IsLetter)) + fmt.Println(bytes.LastIndexFunc([]byte("go gopher!"), unicode.IsPunct)) + fmt.Println(bytes.LastIndexFunc([]byte("go gopher!"), unicode.IsNumber)) + // Output: + // 8 + // 9 + // -1 +} + func ExampleJoin() { s := [][]byte{[]byte("foo"), []byte("bar"), []byte("baz")} fmt.Printf("%s", bytes.Join(s, []byte(", "))) @@ -218,6 +290,23 @@ func ExampleReplace() { // moo moo moo } +func ExampleRunes() { + rs := bytes.Runes([]byte("go gopher")) + for _, r := range rs { + fmt.Printf("%#U\n", r) + } + // Output: + // U+0067 'g' + // U+006F 'o' + // U+0020 ' ' + // U+0067 'g' + // U+006F 'o' + // U+0070 'p' + // U+0068 'h' + // U+0065 'e' + // U+0072 'r' +} + func ExampleSplit() { fmt.Printf("%q\n", bytes.Split([]byte("a,b,c"), []byte(","))) fmt.Printf("%q\n", bytes.Split([]byte("a man a plan a canal panama"), []byte("a "))) @@ -267,6 +356,18 @@ func ExampleTrim() { // Output: ["Achtung! Achtung"] } +func ExampleTrimFunc() { + fmt.Println(string(bytes.TrimFunc([]byte("go-gopher!"), unicode.IsLetter))) + fmt.Println(string(bytes.TrimFunc([]byte("\"go-gopher!\""), unicode.IsLetter))) + fmt.Println(string(bytes.TrimFunc([]byte("go-gopher!"), unicode.IsPunct))) + fmt.Println(string(bytes.TrimFunc([]byte("1234go-gopher!567"), unicode.IsNumber))) + // Output: + // -gopher! + // "go-gopher!" + // go-gopher + // go-gopher! +} + func ExampleMap() { rot13 := func(r rune) rune { switch { @@ -281,11 +382,43 @@ func ExampleMap() { // Output: 'Gjnf oevyyvt naq gur fyvgul tbcure... } +func ExampleTrimLeft() { + fmt.Print(string(bytes.TrimLeft([]byte("453gopher8257"), "0123456789"))) + // Output: + // gopher8257 +} + +func ExampleTrimLeftFunc() { + fmt.Println(string(bytes.TrimLeftFunc([]byte("go-gopher"), unicode.IsLetter))) + fmt.Println(string(bytes.TrimLeftFunc([]byte("go-gopher!"), unicode.IsPunct))) + fmt.Println(string(bytes.TrimLeftFunc([]byte("1234go-gopher!567"), unicode.IsNumber))) + // Output: + // -gopher + // go-gopher! + // go-gopher!567 +} + func ExampleTrimSpace() { fmt.Printf("%s", bytes.TrimSpace([]byte(" \t\n a lone gopher \n\t\r\n"))) // Output: a lone gopher } +func ExampleTrimRight() { + fmt.Print(string(bytes.TrimRight([]byte("453gopher8257"), "0123456789"))) + // Output: + // 453gopher +} + +func ExampleTrimRightFunc() { + fmt.Println(string(bytes.TrimRightFunc([]byte("go-gopher"), unicode.IsLetter))) + fmt.Println(string(bytes.TrimRightFunc([]byte("go-gopher!"), unicode.IsPunct))) + fmt.Println(string(bytes.TrimRightFunc([]byte("1234go-gopher!567"), unicode.IsNumber))) + // Output: + // go- + // go-gopher + // 1234go-gopher! +} + func ExampleToUpper() { fmt.Printf("%s", bytes.ToUpper([]byte("Gopher"))) // Output: GOPHER @@ -295,3 +428,11 @@ func ExampleToLower() { fmt.Printf("%s", bytes.ToLower([]byte("Gopher"))) // Output: gopher } + +func ExampleReader_Len() { + fmt.Println(bytes.NewReader([]byte("Hi!")).Len()) + fmt.Println(bytes.NewReader([]byte("こんにちは!")).Len()) + // Output: + // 3 + // 16 +} diff --git a/libgo/go/bytes/reader.go b/libgo/go/bytes/reader.go index 28cfc7a..08464c2 100644 --- a/libgo/go/bytes/reader.go +++ b/libgo/go/bytes/reader.go @@ -35,6 +35,7 @@ func (r *Reader) Len() int { // to any other method. func (r *Reader) Size() int64 { return int64(len(r.s)) } +// Read implements the io.Reader interface. func (r *Reader) Read(b []byte) (n int, err error) { if r.i >= int64(len(r.s)) { return 0, io.EOF @@ -45,6 +46,7 @@ func (r *Reader) Read(b []byte) (n int, err error) { return } +// ReadAt implements the io.ReaderAt interface. func (r *Reader) ReadAt(b []byte, off int64) (n int, err error) { // cannot modify state - see io.ReaderAt if off < 0 { @@ -60,6 +62,7 @@ func (r *Reader) ReadAt(b []byte, off int64) (n int, err error) { return } +// ReadByte implements the io.ByteReader interface. func (r *Reader) ReadByte() (byte, error) { r.prevRune = -1 if r.i >= int64(len(r.s)) { @@ -70,6 +73,7 @@ func (r *Reader) ReadByte() (byte, error) { return b, nil } +// UnreadByte complements ReadByte in implementing the io.ByteScanner interface. func (r *Reader) UnreadByte() error { r.prevRune = -1 if r.i <= 0 { @@ -79,6 +83,7 @@ func (r *Reader) UnreadByte() error { return nil } +// ReadRune implements the io.RuneReader interface. func (r *Reader) ReadRune() (ch rune, size int, err error) { if r.i >= int64(len(r.s)) { r.prevRune = -1 @@ -94,6 +99,7 @@ func (r *Reader) ReadRune() (ch rune, size int, err error) { return } +// UnreadRune complements ReadRune in implementing the io.RuneScanner interface. func (r *Reader) UnreadRune() error { if r.prevRune < 0 { return errors.New("bytes.Reader.UnreadRune: previous operation was not ReadRune") diff --git a/libgo/go/bytes/reader_test.go b/libgo/go/bytes/reader_test.go index 7b3034d..8806876 100644 --- a/libgo/go/bytes/reader_test.go +++ b/libgo/go/bytes/reader_test.go @@ -140,9 +140,9 @@ func TestReaderWriteTo(t *testing.T) { for i := 0; i < 30; i += 3 { var l int if i > 0 { - l = len(data) / i + l = len(testString) / i } - s := data[:l] + s := testString[:l] r := NewReader(testBytes[:l]) var b Buffer n, err := r.WriteTo(&b) |