aboutsummaryrefslogtreecommitdiff
path: root/libgo/go/bytes
diff options
context:
space:
mode:
authorIan Lance Taylor <iant@golang.org>2020-07-27 22:27:54 -0700
committerIan Lance Taylor <iant@golang.org>2020-08-01 11:21:40 -0700
commitf75af8c1464e948b5e166cf5ab09ebf0d82fc253 (patch)
tree3ba3299859b504bdeb477727471216bd094a0191 /libgo/go/bytes
parent75a23e59031fe673fc3b2e60fd1fe5f4c70ecb85 (diff)
downloadgcc-f75af8c1464e948b5e166cf5ab09ebf0d82fc253.zip
gcc-f75af8c1464e948b5e166cf5ab09ebf0d82fc253.tar.gz
gcc-f75af8c1464e948b5e166cf5ab09ebf0d82fc253.tar.bz2
libgo: update to go1.15rc1
Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/245157
Diffstat (limited to 'libgo/go/bytes')
-rw-r--r--libgo/go/bytes/buffer_test.go19
-rw-r--r--libgo/go/bytes/bytes.go199
-rw-r--r--libgo/go/bytes/bytes_test.go78
3 files changed, 193 insertions, 103 deletions
diff --git a/libgo/go/bytes/buffer_test.go b/libgo/go/bytes/buffer_test.go
index 7626d27..fec5ef8 100644
--- a/libgo/go/bytes/buffer_test.go
+++ b/libgo/go/bytes/buffer_test.go
@@ -8,7 +8,6 @@ import (
. "bytes"
"io"
"math/rand"
- "runtime"
"testing"
"unicode/utf8"
)
@@ -495,20 +494,20 @@ func TestGrow(t *testing.T) {
x := []byte{'x'}
y := []byte{'y'}
tmp := make([]byte, 72)
- for _, startLen := range []int{0, 100, 1000, 10000, 100000} {
- xBytes := Repeat(x, startLen)
- for _, growLen := range []int{0, 100, 1000, 10000, 100000} {
+ for _, growLen := range []int{0, 100, 1000, 10000, 100000} {
+ for _, startLen := range []int{0, 100, 1000, 10000, 100000} {
+ xBytes := Repeat(x, startLen)
+
buf := NewBuffer(xBytes)
// If we read, this affects buf.off, which is good to test.
readBytes, _ := buf.Read(tmp)
- buf.Grow(growLen)
yBytes := Repeat(y, growLen)
+ allocs := testing.AllocsPerRun(100, func() {
+ buf.Grow(growLen)
+ buf.Write(yBytes)
+ })
// Check no allocation occurs in write, as long as we're single-threaded.
- var m1, m2 runtime.MemStats
- runtime.ReadMemStats(&m1)
- buf.Write(yBytes)
- runtime.ReadMemStats(&m2)
- if runtime.GOMAXPROCS(-1) == 1 && m1.Mallocs != m2.Mallocs {
+ if allocs != 0 {
t.Errorf("allocation occurred during write")
}
// Check that buffer has correct data.
diff --git a/libgo/go/bytes/bytes.go b/libgo/go/bytes/bytes.go
index e872cc2..aa07b9f 100644
--- a/libgo/go/bytes/bytes.go
+++ b/libgo/go/bytes/bytes.go
@@ -117,17 +117,17 @@ func LastIndex(s, sep []byte) int {
return -1
}
// Rabin-Karp search from the end of the string
- hashss, pow := hashStrRev(sep)
+ hashss, pow := bytealg.HashStrRevBytes(sep)
last := len(s) - n
var h uint32
for i := len(s) - 1; i >= last; i-- {
- h = h*primeRK + uint32(s[i])
+ h = h*bytealg.PrimeRK + uint32(s[i])
}
if h == hashss && Equal(s[last:], sep) {
return last
}
for i := last - 1; i >= 0; i-- {
- h *= primeRK
+ h *= bytealg.PrimeRK
h += uint32(s[i])
h -= pow * uint32(s[i+n])
if h == hashss && Equal(s[i:i+n], sep) {
@@ -183,6 +183,29 @@ func IndexAny(s []byte, chars string) int {
// Avoid scanning all of s.
return -1
}
+ if len(s) == 1 {
+ r := rune(s[0])
+ if r >= utf8.RuneSelf {
+ // search utf8.RuneError.
+ for _, r = range chars {
+ if r == utf8.RuneError {
+ return 0
+ }
+ }
+ return -1
+ }
+ if bytealg.IndexByteString(chars, s[0]) >= 0 {
+ return 0
+ }
+ return -1
+ }
+ if len(chars) == 1 {
+ r := rune(chars[0])
+ if r >= utf8.RuneSelf {
+ r = utf8.RuneError
+ }
+ return IndexRune(s, r)
+ }
if len(s) > 8 {
if as, isASCII := makeASCIISet(chars); isASCII {
for i, c := range s {
@@ -197,14 +220,26 @@ func IndexAny(s []byte, chars string) int {
for i := 0; i < len(s); i += width {
r := rune(s[i])
if r < utf8.RuneSelf {
+ if bytealg.IndexByteString(chars, s[i]) >= 0 {
+ return i
+ }
width = 1
- } else {
- r, width = utf8.DecodeRune(s[i:])
+ continue
}
- for _, ch := range chars {
- if r == ch {
- return i
+ r, width = utf8.DecodeRune(s[i:])
+ if r == utf8.RuneError {
+ for _, r = range chars {
+ if r == utf8.RuneError {
+ return i
+ }
}
+ continue
+ }
+ // r is 2 to 4 bytes. Using strings.Index is more reasonable, but as the bytes
+ // package should not import the strings package, use bytealg.IndexString
+ // instead. And this does not seem to lose much performance.
+ if chars == string(r) || bytealg.IndexString(chars, string(r)) >= 0 {
+ return i
}
}
return -1
@@ -229,13 +264,59 @@ func LastIndexAny(s []byte, chars string) int {
return -1
}
}
+ if len(s) == 1 {
+ r := rune(s[0])
+ if r >= utf8.RuneSelf {
+ for _, r = range chars {
+ if r == utf8.RuneError {
+ return 0
+ }
+ }
+ return -1
+ }
+ if bytealg.IndexByteString(chars, s[0]) >= 0 {
+ return 0
+ }
+ return -1
+ }
+ if len(chars) == 1 {
+ cr := rune(chars[0])
+ if cr >= utf8.RuneSelf {
+ cr = utf8.RuneError
+ }
+ for i := len(s); i > 0; {
+ r, size := utf8.DecodeLastRune(s[:i])
+ i -= size
+ if r == cr {
+ return i
+ }
+ }
+ return -1
+ }
for i := len(s); i > 0; {
+ r := rune(s[i-1])
+ if r < utf8.RuneSelf {
+ if bytealg.IndexByteString(chars, s[i-1]) >= 0 {
+ return i - 1
+ }
+ i--
+ continue
+ }
r, size := utf8.DecodeLastRune(s[:i])
i -= size
- for _, c := range chars {
- if r == c {
- return i
+ if r == utf8.RuneError {
+ for _, r = range chars {
+ if r == utf8.RuneError {
+ return i
+ }
}
+ continue
+ }
+ // r is 2 to 4 bytes. Using strings.Index is more reasonable, but as the bytes
+ // package should not import the strings package, use bytealg.IndexString
+ // instead. And this does not seem to lose much performance.
+ if chars == string(r) || bytealg.IndexString(chars, string(r)) >= 0 {
+ return i
}
}
return -1
@@ -364,8 +445,9 @@ func Fields(s []byte) [][]byte {
// It splits the slice s at each run of code points c satisfying f(c) and
// returns a slice of subslices of s. If all code points in s satisfy f(c), or
// len(s) == 0, an empty slice is returned.
-// FieldsFunc makes no guarantees about the order in which it calls f(c).
-// If f does not return consistent results for a given c, FieldsFunc may crash.
+//
+// FieldsFunc makes no guarantees about the order in which it calls f(c)
+// and assumes that f always returns the same value for a given c.
func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
// A span is used to record a slice of s of the form s[start:end].
// The start index is inclusive and the end index is exclusive.
@@ -376,8 +458,10 @@ func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
spans := make([]span, 0, 32)
// Find the field start and end indices.
- wasField := false
- fromIndex := 0
+ // Doing this in a separate pass (rather than slicing the string s
+ // and collecting the result substrings right away) is significantly
+ // more efficient, possibly due to cache effects.
+ start := -1 // valid span start if >= 0
for i := 0; i < len(s); {
size := 1
r := rune(s[i])
@@ -385,22 +469,21 @@ func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
r, size = utf8.DecodeRune(s[i:])
}
if f(r) {
- if wasField {
- spans = append(spans, span{start: fromIndex, end: i})
- wasField = false
+ if start >= 0 {
+ spans = append(spans, span{start, i})
+ start = -1
}
} else {
- if !wasField {
- fromIndex = i
- wasField = true
+ if start < 0 {
+ start = i
}
}
i += size
}
// Last field might end at EOF.
- if wasField {
- spans = append(spans, span{fromIndex, len(s)})
+ if start >= 0 {
+ spans = append(spans, span{start, len(s)})
}
// Create subslices from recorded field indices.
@@ -1019,11 +1102,11 @@ func Index(s, sep []byte) int {
if s[i] != c0 {
// IndexByte is faster than bytealg.Index, so use it as long as
// we're not getting lots of false positives.
- o := IndexByte(s[i:t], c0)
+ o := IndexByte(s[i+1:t], c0)
if o < 0 {
return -1
}
- i += o
+ i += o + 1
}
if s[i+1] == c1 && Equal(s[i:i+n], sep) {
return i
@@ -1048,11 +1131,11 @@ func Index(s, sep []byte) int {
t := len(s) - n + 1
for i < t {
if s[i] != c0 {
- o := IndexByte(s[i:t], c0)
+ o := IndexByte(s[i+1:t], c0)
if o < 0 {
break
}
- i += o
+ i += o + 1
}
if s[i+1] == c1 && Equal(s[i:i+n], sep) {
return i
@@ -1068,7 +1151,7 @@ func Index(s, sep []byte) int {
// we should cutover at even larger average skips,
// because Equal becomes that much more expensive.
// This code does not take that effect into account.
- j := indexRabinKarp(s[i:], sep)
+ j := bytealg.IndexRabinKarpBytes(s[i:], sep)
if j < 0 {
return -1
}
@@ -1077,63 +1160,3 @@ func Index(s, sep []byte) int {
}
return -1
}
-
-func indexRabinKarp(s, sep []byte) int {
- // Rabin-Karp search
- hashsep, pow := hashStr(sep)
- n := len(sep)
- var h uint32
- for i := 0; i < n; i++ {
- h = h*primeRK + uint32(s[i])
- }
- if h == hashsep && Equal(s[:n], sep) {
- return 0
- }
- for i := n; i < len(s); {
- h *= primeRK
- h += uint32(s[i])
- h -= pow * uint32(s[i-n])
- i++
- if h == hashsep && Equal(s[i-n:i], sep) {
- return i - n
- }
- }
- return -1
-}
-
-// primeRK is the prime base used in Rabin-Karp algorithm.
-const primeRK = 16777619
-
-// hashStr returns the hash and the appropriate multiplicative
-// factor for use in Rabin-Karp algorithm.
-func hashStr(sep []byte) (uint32, uint32) {
- hash := uint32(0)
- for i := 0; i < len(sep); i++ {
- hash = hash*primeRK + uint32(sep[i])
- }
- var pow, sq uint32 = 1, primeRK
- for i := len(sep); i > 0; i >>= 1 {
- if i&1 != 0 {
- pow *= sq
- }
- sq *= sq
- }
- return hash, pow
-}
-
-// hashStrRev returns the hash of the reverse of sep and the
-// appropriate multiplicative factor for use in Rabin-Karp algorithm.
-func hashStrRev(sep []byte) (uint32, uint32) {
- hash := uint32(0)
- for i := len(sep) - 1; i >= 0; i-- {
- hash = hash*primeRK + uint32(sep[i])
- }
- var pow, sq uint32 = 1, primeRK
- for i := len(sep); i > 0; i >>= 1 {
- if i&1 != 0 {
- pow *= sq
- }
- sq *= sq
- }
- return hash, pow
-}
diff --git a/libgo/go/bytes/bytes_test.go b/libgo/go/bytes/bytes_test.go
index ebff5f0..0111d31 100644
--- a/libgo/go/bytes/bytes_test.go
+++ b/libgo/go/bytes/bytes_test.go
@@ -142,9 +142,10 @@ var indexTests = []BinOpTest{
{"barfoobarfooyyyzzzyyyzzzyyyzzzyyyxxxzzzyyy", "x", 33},
{"foofyfoobarfoobar", "y", 4},
{"oooooooooooooooooooooo", "r", -1},
- // test fallback to Rabin-Karp.
{"oxoxoxoxoxoxoxoxoxoxoxoy", "oy", 22},
{"oxoxoxoxoxoxoxoxoxoxoxox", "oy", -1},
+ // test fallback to Rabin-Karp.
+ {"000000000000000000000000000000000000000000000000000000000000000000000001", "0000000000000000000000000000000000000000000000000000000000000000001", 5},
}
var lastIndexTests = []BinOpTest{
@@ -169,6 +170,7 @@ var indexAnyTests = []BinOpTest{
{"", "abc", -1},
{"a", "", -1},
{"a", "a", 0},
+ {"\x80", "\xffb", 0},
{"aaa", "a", 0},
{"abc", "xyz", -1},
{"abc", "xcz", 2},
@@ -179,6 +181,7 @@ var indexAnyTests = []BinOpTest{
{dots + dots + dots, " ", -1},
{"012abcba210", "\xffb", 4},
{"012\x80bcb\x80210", "\xffb", 3},
+ {"0123456\xcf\x80abc", "\xcfb\x80", 10},
}
var lastIndexAnyTests = []BinOpTest{
@@ -187,6 +190,7 @@ var lastIndexAnyTests = []BinOpTest{
{"", "abc", -1},
{"a", "", -1},
{"a", "a", 0},
+ {"\x80", "\xffb", 0},
{"aaa", "a", 2},
{"abc", "xyz", -1},
{"abc", "ab", 1},
@@ -197,6 +201,7 @@ var lastIndexAnyTests = []BinOpTest{
{dots + dots + dots, " ", -1},
{"012abcba210", "\xffb", 6},
{"012\x80bcb\x80210", "\xffb", 7},
+ {"0123456\xcf\x80abc", "\xcfb\x80", 10},
}
// Execute f on each test case. funcName should be the name of f; it's used
@@ -210,6 +215,27 @@ func runIndexTests(t *testing.T, f func(s, sep []byte) int, funcName string, tes
t.Errorf("%s(%q,%q) = %v; want %v", funcName, a, b, actual, test.i)
}
}
+ var allocTests = []struct {
+ a []byte
+ b []byte
+ i int
+ }{
+ // case for function Index.
+ {[]byte("000000000000000000000000000000000000000000000000000000000000000000000001"), []byte("0000000000000000000000000000000000000000000000000000000000000000001"), 5},
+ // case for function LastIndex.
+ {[]byte("000000000000000000000000000000000000000000000000000000000000000010000"), []byte("00000000000000000000000000000000000000000000000000000000000001"), 3},
+ }
+ allocs := testing.AllocsPerRun(100, func() {
+ if i := Index(allocTests[1].a, allocTests[1].b); i != allocTests[1].i {
+ t.Errorf("Index([]byte(%q), []byte(%q)) = %v; want %v", allocTests[1].a, allocTests[1].b, i, allocTests[1].i)
+ }
+ if i := LastIndex(allocTests[0].a, allocTests[0].b); i != allocTests[0].i {
+ t.Errorf("LastIndex([]byte(%q), []byte(%q)) = %v; want %v", allocTests[0].a, allocTests[0].b, i, allocTests[0].i)
+ }
+ })
+ if allocs != 0 {
+ t.Errorf("expected no allocations, got %f", allocs)
+ }
}
func runIndexAnyTests(t *testing.T, f func(s []byte, chars string) int, funcName string, testCases []BinOpTest) {
@@ -1873,10 +1899,10 @@ func BenchmarkBytesCompare(b *testing.B) {
}
func BenchmarkIndexAnyASCII(b *testing.B) {
- x := Repeat([]byte{'#'}, 4096) // Never matches set
- cs := "0123456789abcdef"
- for k := 1; k <= 4096; k <<= 4 {
- for j := 1; j <= 16; j <<= 1 {
+ x := Repeat([]byte{'#'}, 2048) // Never matches set
+ cs := "0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz"
+ for k := 1; k <= 2048; k <<= 4 {
+ for j := 1; j <= 64; j <<= 1 {
b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
for i := 0; i < b.N; i++ {
IndexAny(x[:k], cs[:j])
@@ -1886,6 +1912,48 @@ func BenchmarkIndexAnyASCII(b *testing.B) {
}
}
+func BenchmarkIndexAnyUTF8(b *testing.B) {
+ x := Repeat([]byte{'#'}, 2048) // Never matches set
+ cs := "你好世界, hello world. 你好世界, hello world. 你好世界, hello world."
+ for k := 1; k <= 2048; k <<= 4 {
+ for j := 1; j <= 64; j <<= 1 {
+ b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ IndexAny(x[:k], cs[:j])
+ }
+ })
+ }
+ }
+}
+
+func BenchmarkLastIndexAnyASCII(b *testing.B) {
+ x := Repeat([]byte{'#'}, 2048) // Never matches set
+ cs := "0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz"
+ for k := 1; k <= 2048; k <<= 4 {
+ for j := 1; j <= 64; j <<= 1 {
+ b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ LastIndexAny(x[:k], cs[:j])
+ }
+ })
+ }
+ }
+}
+
+func BenchmarkLastIndexAnyUTF8(b *testing.B) {
+ x := Repeat([]byte{'#'}, 2048) // Never matches set
+ cs := "你好世界, hello world. 你好世界, hello world. 你好世界, hello world."
+ for k := 1; k <= 2048; k <<= 4 {
+ for j := 1; j <= 64; j <<= 1 {
+ b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ LastIndexAny(x[:k], cs[:j])
+ }
+ })
+ }
+ }
+}
+
func BenchmarkTrimASCII(b *testing.B) {
cs := "0123456789abcdef"
for k := 1; k <= 4096; k <<= 4 {