diff options
author | Ian Lance Taylor <iant@golang.org> | 2020-07-27 22:27:54 -0700 |
---|---|---|
committer | Ian Lance Taylor <iant@golang.org> | 2020-08-01 11:21:40 -0700 |
commit | f75af8c1464e948b5e166cf5ab09ebf0d82fc253 (patch) | |
tree | 3ba3299859b504bdeb477727471216bd094a0191 /libgo/go/bytes | |
parent | 75a23e59031fe673fc3b2e60fd1fe5f4c70ecb85 (diff) | |
download | gcc-f75af8c1464e948b5e166cf5ab09ebf0d82fc253.zip gcc-f75af8c1464e948b5e166cf5ab09ebf0d82fc253.tar.gz gcc-f75af8c1464e948b5e166cf5ab09ebf0d82fc253.tar.bz2 |
libgo: update to go1.15rc1
Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/245157
Diffstat (limited to 'libgo/go/bytes')
-rw-r--r-- | libgo/go/bytes/buffer_test.go | 19 | ||||
-rw-r--r-- | libgo/go/bytes/bytes.go | 199 | ||||
-rw-r--r-- | libgo/go/bytes/bytes_test.go | 78 |
3 files changed, 193 insertions, 103 deletions
diff --git a/libgo/go/bytes/buffer_test.go b/libgo/go/bytes/buffer_test.go index 7626d27..fec5ef8 100644 --- a/libgo/go/bytes/buffer_test.go +++ b/libgo/go/bytes/buffer_test.go @@ -8,7 +8,6 @@ import ( . "bytes" "io" "math/rand" - "runtime" "testing" "unicode/utf8" ) @@ -495,20 +494,20 @@ func TestGrow(t *testing.T) { x := []byte{'x'} y := []byte{'y'} tmp := make([]byte, 72) - for _, startLen := range []int{0, 100, 1000, 10000, 100000} { - xBytes := Repeat(x, startLen) - for _, growLen := range []int{0, 100, 1000, 10000, 100000} { + for _, growLen := range []int{0, 100, 1000, 10000, 100000} { + for _, startLen := range []int{0, 100, 1000, 10000, 100000} { + xBytes := Repeat(x, startLen) + buf := NewBuffer(xBytes) // If we read, this affects buf.off, which is good to test. readBytes, _ := buf.Read(tmp) - buf.Grow(growLen) yBytes := Repeat(y, growLen) + allocs := testing.AllocsPerRun(100, func() { + buf.Grow(growLen) + buf.Write(yBytes) + }) // Check no allocation occurs in write, as long as we're single-threaded. - var m1, m2 runtime.MemStats - runtime.ReadMemStats(&m1) - buf.Write(yBytes) - runtime.ReadMemStats(&m2) - if runtime.GOMAXPROCS(-1) == 1 && m1.Mallocs != m2.Mallocs { + if allocs != 0 { t.Errorf("allocation occurred during write") } // Check that buffer has correct data. diff --git a/libgo/go/bytes/bytes.go b/libgo/go/bytes/bytes.go index e872cc2..aa07b9f 100644 --- a/libgo/go/bytes/bytes.go +++ b/libgo/go/bytes/bytes.go @@ -117,17 +117,17 @@ func LastIndex(s, sep []byte) int { return -1 } // Rabin-Karp search from the end of the string - hashss, pow := hashStrRev(sep) + hashss, pow := bytealg.HashStrRevBytes(sep) last := len(s) - n var h uint32 for i := len(s) - 1; i >= last; i-- { - h = h*primeRK + uint32(s[i]) + h = h*bytealg.PrimeRK + uint32(s[i]) } if h == hashss && Equal(s[last:], sep) { return last } for i := last - 1; i >= 0; i-- { - h *= primeRK + h *= bytealg.PrimeRK h += uint32(s[i]) h -= pow * uint32(s[i+n]) if h == hashss && Equal(s[i:i+n], sep) { @@ -183,6 +183,29 @@ func IndexAny(s []byte, chars string) int { // Avoid scanning all of s. return -1 } + if len(s) == 1 { + r := rune(s[0]) + if r >= utf8.RuneSelf { + // search utf8.RuneError. + for _, r = range chars { + if r == utf8.RuneError { + return 0 + } + } + return -1 + } + if bytealg.IndexByteString(chars, s[0]) >= 0 { + return 0 + } + return -1 + } + if len(chars) == 1 { + r := rune(chars[0]) + if r >= utf8.RuneSelf { + r = utf8.RuneError + } + return IndexRune(s, r) + } if len(s) > 8 { if as, isASCII := makeASCIISet(chars); isASCII { for i, c := range s { @@ -197,14 +220,26 @@ func IndexAny(s []byte, chars string) int { for i := 0; i < len(s); i += width { r := rune(s[i]) if r < utf8.RuneSelf { + if bytealg.IndexByteString(chars, s[i]) >= 0 { + return i + } width = 1 - } else { - r, width = utf8.DecodeRune(s[i:]) + continue } - for _, ch := range chars { - if r == ch { - return i + r, width = utf8.DecodeRune(s[i:]) + if r == utf8.RuneError { + for _, r = range chars { + if r == utf8.RuneError { + return i + } } + continue + } + // r is 2 to 4 bytes. Using strings.Index is more reasonable, but as the bytes + // package should not import the strings package, use bytealg.IndexString + // instead. And this does not seem to lose much performance. + if chars == string(r) || bytealg.IndexString(chars, string(r)) >= 0 { + return i } } return -1 @@ -229,13 +264,59 @@ func LastIndexAny(s []byte, chars string) int { return -1 } } + if len(s) == 1 { + r := rune(s[0]) + if r >= utf8.RuneSelf { + for _, r = range chars { + if r == utf8.RuneError { + return 0 + } + } + return -1 + } + if bytealg.IndexByteString(chars, s[0]) >= 0 { + return 0 + } + return -1 + } + if len(chars) == 1 { + cr := rune(chars[0]) + if cr >= utf8.RuneSelf { + cr = utf8.RuneError + } + for i := len(s); i > 0; { + r, size := utf8.DecodeLastRune(s[:i]) + i -= size + if r == cr { + return i + } + } + return -1 + } for i := len(s); i > 0; { + r := rune(s[i-1]) + if r < utf8.RuneSelf { + if bytealg.IndexByteString(chars, s[i-1]) >= 0 { + return i - 1 + } + i-- + continue + } r, size := utf8.DecodeLastRune(s[:i]) i -= size - for _, c := range chars { - if r == c { - return i + if r == utf8.RuneError { + for _, r = range chars { + if r == utf8.RuneError { + return i + } } + continue + } + // r is 2 to 4 bytes. Using strings.Index is more reasonable, but as the bytes + // package should not import the strings package, use bytealg.IndexString + // instead. And this does not seem to lose much performance. + if chars == string(r) || bytealg.IndexString(chars, string(r)) >= 0 { + return i } } return -1 @@ -364,8 +445,9 @@ func Fields(s []byte) [][]byte { // It splits the slice s at each run of code points c satisfying f(c) and // returns a slice of subslices of s. If all code points in s satisfy f(c), or // len(s) == 0, an empty slice is returned. -// FieldsFunc makes no guarantees about the order in which it calls f(c). -// If f does not return consistent results for a given c, FieldsFunc may crash. +// +// FieldsFunc makes no guarantees about the order in which it calls f(c) +// and assumes that f always returns the same value for a given c. func FieldsFunc(s []byte, f func(rune) bool) [][]byte { // A span is used to record a slice of s of the form s[start:end]. // The start index is inclusive and the end index is exclusive. @@ -376,8 +458,10 @@ func FieldsFunc(s []byte, f func(rune) bool) [][]byte { spans := make([]span, 0, 32) // Find the field start and end indices. - wasField := false - fromIndex := 0 + // Doing this in a separate pass (rather than slicing the string s + // and collecting the result substrings right away) is significantly + // more efficient, possibly due to cache effects. + start := -1 // valid span start if >= 0 for i := 0; i < len(s); { size := 1 r := rune(s[i]) @@ -385,22 +469,21 @@ func FieldsFunc(s []byte, f func(rune) bool) [][]byte { r, size = utf8.DecodeRune(s[i:]) } if f(r) { - if wasField { - spans = append(spans, span{start: fromIndex, end: i}) - wasField = false + if start >= 0 { + spans = append(spans, span{start, i}) + start = -1 } } else { - if !wasField { - fromIndex = i - wasField = true + if start < 0 { + start = i } } i += size } // Last field might end at EOF. - if wasField { - spans = append(spans, span{fromIndex, len(s)}) + if start >= 0 { + spans = append(spans, span{start, len(s)}) } // Create subslices from recorded field indices. @@ -1019,11 +1102,11 @@ func Index(s, sep []byte) int { if s[i] != c0 { // IndexByte is faster than bytealg.Index, so use it as long as // we're not getting lots of false positives. - o := IndexByte(s[i:t], c0) + o := IndexByte(s[i+1:t], c0) if o < 0 { return -1 } - i += o + i += o + 1 } if s[i+1] == c1 && Equal(s[i:i+n], sep) { return i @@ -1048,11 +1131,11 @@ func Index(s, sep []byte) int { t := len(s) - n + 1 for i < t { if s[i] != c0 { - o := IndexByte(s[i:t], c0) + o := IndexByte(s[i+1:t], c0) if o < 0 { break } - i += o + i += o + 1 } if s[i+1] == c1 && Equal(s[i:i+n], sep) { return i @@ -1068,7 +1151,7 @@ func Index(s, sep []byte) int { // we should cutover at even larger average skips, // because Equal becomes that much more expensive. // This code does not take that effect into account. - j := indexRabinKarp(s[i:], sep) + j := bytealg.IndexRabinKarpBytes(s[i:], sep) if j < 0 { return -1 } @@ -1077,63 +1160,3 @@ func Index(s, sep []byte) int { } return -1 } - -func indexRabinKarp(s, sep []byte) int { - // Rabin-Karp search - hashsep, pow := hashStr(sep) - n := len(sep) - var h uint32 - for i := 0; i < n; i++ { - h = h*primeRK + uint32(s[i]) - } - if h == hashsep && Equal(s[:n], sep) { - return 0 - } - for i := n; i < len(s); { - h *= primeRK - h += uint32(s[i]) - h -= pow * uint32(s[i-n]) - i++ - if h == hashsep && Equal(s[i-n:i], sep) { - return i - n - } - } - return -1 -} - -// primeRK is the prime base used in Rabin-Karp algorithm. -const primeRK = 16777619 - -// hashStr returns the hash and the appropriate multiplicative -// factor for use in Rabin-Karp algorithm. -func hashStr(sep []byte) (uint32, uint32) { - hash := uint32(0) - for i := 0; i < len(sep); i++ { - hash = hash*primeRK + uint32(sep[i]) - } - var pow, sq uint32 = 1, primeRK - for i := len(sep); i > 0; i >>= 1 { - if i&1 != 0 { - pow *= sq - } - sq *= sq - } - return hash, pow -} - -// hashStrRev returns the hash of the reverse of sep and the -// appropriate multiplicative factor for use in Rabin-Karp algorithm. -func hashStrRev(sep []byte) (uint32, uint32) { - hash := uint32(0) - for i := len(sep) - 1; i >= 0; i-- { - hash = hash*primeRK + uint32(sep[i]) - } - var pow, sq uint32 = 1, primeRK - for i := len(sep); i > 0; i >>= 1 { - if i&1 != 0 { - pow *= sq - } - sq *= sq - } - return hash, pow -} diff --git a/libgo/go/bytes/bytes_test.go b/libgo/go/bytes/bytes_test.go index ebff5f0..0111d31 100644 --- a/libgo/go/bytes/bytes_test.go +++ b/libgo/go/bytes/bytes_test.go @@ -142,9 +142,10 @@ var indexTests = []BinOpTest{ {"barfoobarfooyyyzzzyyyzzzyyyzzzyyyxxxzzzyyy", "x", 33}, {"foofyfoobarfoobar", "y", 4}, {"oooooooooooooooooooooo", "r", -1}, - // test fallback to Rabin-Karp. {"oxoxoxoxoxoxoxoxoxoxoxoy", "oy", 22}, {"oxoxoxoxoxoxoxoxoxoxoxox", "oy", -1}, + // test fallback to Rabin-Karp. + {"000000000000000000000000000000000000000000000000000000000000000000000001", "0000000000000000000000000000000000000000000000000000000000000000001", 5}, } var lastIndexTests = []BinOpTest{ @@ -169,6 +170,7 @@ var indexAnyTests = []BinOpTest{ {"", "abc", -1}, {"a", "", -1}, {"a", "a", 0}, + {"\x80", "\xffb", 0}, {"aaa", "a", 0}, {"abc", "xyz", -1}, {"abc", "xcz", 2}, @@ -179,6 +181,7 @@ var indexAnyTests = []BinOpTest{ {dots + dots + dots, " ", -1}, {"012abcba210", "\xffb", 4}, {"012\x80bcb\x80210", "\xffb", 3}, + {"0123456\xcf\x80abc", "\xcfb\x80", 10}, } var lastIndexAnyTests = []BinOpTest{ @@ -187,6 +190,7 @@ var lastIndexAnyTests = []BinOpTest{ {"", "abc", -1}, {"a", "", -1}, {"a", "a", 0}, + {"\x80", "\xffb", 0}, {"aaa", "a", 2}, {"abc", "xyz", -1}, {"abc", "ab", 1}, @@ -197,6 +201,7 @@ var lastIndexAnyTests = []BinOpTest{ {dots + dots + dots, " ", -1}, {"012abcba210", "\xffb", 6}, {"012\x80bcb\x80210", "\xffb", 7}, + {"0123456\xcf\x80abc", "\xcfb\x80", 10}, } // Execute f on each test case. funcName should be the name of f; it's used @@ -210,6 +215,27 @@ func runIndexTests(t *testing.T, f func(s, sep []byte) int, funcName string, tes t.Errorf("%s(%q,%q) = %v; want %v", funcName, a, b, actual, test.i) } } + var allocTests = []struct { + a []byte + b []byte + i int + }{ + // case for function Index. + {[]byte("000000000000000000000000000000000000000000000000000000000000000000000001"), []byte("0000000000000000000000000000000000000000000000000000000000000000001"), 5}, + // case for function LastIndex. + {[]byte("000000000000000000000000000000000000000000000000000000000000000010000"), []byte("00000000000000000000000000000000000000000000000000000000000001"), 3}, + } + allocs := testing.AllocsPerRun(100, func() { + if i := Index(allocTests[1].a, allocTests[1].b); i != allocTests[1].i { + t.Errorf("Index([]byte(%q), []byte(%q)) = %v; want %v", allocTests[1].a, allocTests[1].b, i, allocTests[1].i) + } + if i := LastIndex(allocTests[0].a, allocTests[0].b); i != allocTests[0].i { + t.Errorf("LastIndex([]byte(%q), []byte(%q)) = %v; want %v", allocTests[0].a, allocTests[0].b, i, allocTests[0].i) + } + }) + if allocs != 0 { + t.Errorf("expected no allocations, got %f", allocs) + } } func runIndexAnyTests(t *testing.T, f func(s []byte, chars string) int, funcName string, testCases []BinOpTest) { @@ -1873,10 +1899,10 @@ func BenchmarkBytesCompare(b *testing.B) { } func BenchmarkIndexAnyASCII(b *testing.B) { - x := Repeat([]byte{'#'}, 4096) // Never matches set - cs := "0123456789abcdef" - for k := 1; k <= 4096; k <<= 4 { - for j := 1; j <= 16; j <<= 1 { + x := Repeat([]byte{'#'}, 2048) // Never matches set + cs := "0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz" + for k := 1; k <= 2048; k <<= 4 { + for j := 1; j <= 64; j <<= 1 { b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) { for i := 0; i < b.N; i++ { IndexAny(x[:k], cs[:j]) @@ -1886,6 +1912,48 @@ func BenchmarkIndexAnyASCII(b *testing.B) { } } +func BenchmarkIndexAnyUTF8(b *testing.B) { + x := Repeat([]byte{'#'}, 2048) // Never matches set + cs := "你好世界, hello world. 你好世界, hello world. 你好世界, hello world." + for k := 1; k <= 2048; k <<= 4 { + for j := 1; j <= 64; j <<= 1 { + b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) { + for i := 0; i < b.N; i++ { + IndexAny(x[:k], cs[:j]) + } + }) + } + } +} + +func BenchmarkLastIndexAnyASCII(b *testing.B) { + x := Repeat([]byte{'#'}, 2048) // Never matches set + cs := "0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz" + for k := 1; k <= 2048; k <<= 4 { + for j := 1; j <= 64; j <<= 1 { + b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) { + for i := 0; i < b.N; i++ { + LastIndexAny(x[:k], cs[:j]) + } + }) + } + } +} + +func BenchmarkLastIndexAnyUTF8(b *testing.B) { + x := Repeat([]byte{'#'}, 2048) // Never matches set + cs := "你好世界, hello world. 你好世界, hello world. 你好世界, hello world." + for k := 1; k <= 2048; k <<= 4 { + for j := 1; j <= 64; j <<= 1 { + b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) { + for i := 0; i < b.N; i++ { + LastIndexAny(x[:k], cs[:j]) + } + }) + } + } +} + func BenchmarkTrimASCII(b *testing.B) { cs := "0123456789abcdef" for k := 1; k <= 4096; k <<= 4 { |