diff options
author | Ian Lance Taylor <iant@golang.org> | 2019-09-06 18:12:46 +0000 |
---|---|---|
committer | Ian Lance Taylor <ian@gcc.gnu.org> | 2019-09-06 18:12:46 +0000 |
commit | aa8901e9bb0399d2c16f988ba2fe46eb0c0c5d13 (patch) | |
tree | 7e63b06d1eec92beec6997c9d3ab47a5d6a835be /libgo/go/bytes | |
parent | 920ea3b8ba3164b61ac9490dfdfceb6936eda6dd (diff) | |
download | gcc-aa8901e9bb0399d2c16f988ba2fe46eb0c0c5d13.zip gcc-aa8901e9bb0399d2c16f988ba2fe46eb0c0c5d13.tar.gz gcc-aa8901e9bb0399d2c16f988ba2fe46eb0c0c5d13.tar.bz2 |
libgo: update to Go 1.13beta1 release
Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/193497
From-SVN: r275473
Diffstat (limited to 'libgo/go/bytes')
-rw-r--r-- | libgo/go/bytes/buffer.go | 7 | ||||
-rw-r--r-- | libgo/go/bytes/buffer_test.go | 22 | ||||
-rw-r--r-- | libgo/go/bytes/bytes.go | 197 | ||||
-rw-r--r-- | libgo/go/bytes/bytes_test.go | 337 | ||||
-rw-r--r-- | libgo/go/bytes/example_test.go | 30 | ||||
-rw-r--r-- | libgo/go/bytes/export_test.go | 1 |
6 files changed, 464 insertions, 130 deletions
diff --git a/libgo/go/bytes/buffer.go b/libgo/go/bytes/buffer.go index aff2db5..f19a4cf 100644 --- a/libgo/go/bytes/buffer.go +++ b/libgo/go/bytes/buffer.go @@ -21,9 +21,6 @@ type Buffer struct { buf []byte // contents are the bytes buf[off : len(buf)] off int // read at &buf[off], write at &buf[len(buf)] lastRead readOp // last read operation, so that Unread* can work correctly. - - // FIXME: it would be advisable to align Buffer to cachelines to avoid false - // sharing. } // The readOp constants describe the last action performed on @@ -385,13 +382,15 @@ func (b *Buffer) UnreadRune() error { return nil } +var errUnreadByte = errors.New("bytes.Buffer: UnreadByte: previous operation was not a successful read") + // UnreadByte unreads the last byte returned by the most recent successful // read operation that read at least one byte. If a write has happened since // the last read, if the last read returned an error, or if the read read zero // bytes, UnreadByte returns an error. func (b *Buffer) UnreadByte() error { if b.lastRead == opInvalid { - return errors.New("bytes.Buffer: UnreadByte: previous operation was not a successful read") + return errUnreadByte } b.lastRead = opInvalid if b.off > 0 { diff --git a/libgo/go/bytes/buffer_test.go b/libgo/go/bytes/buffer_test.go index 6e9d695..7626d27 100644 --- a/libgo/go/bytes/buffer_test.go +++ b/libgo/go/bytes/buffer_test.go @@ -131,11 +131,8 @@ func TestBasicOperations(t *testing.T) { check(t, "TestBasicOperations (3)", &buf, "") n, err := buf.Write(testBytes[0:1]) - if n != 1 { - t.Errorf("wrote 1 byte, but n == %d", n) - } - if err != nil { - t.Errorf("err should always be nil, but err == %s", err) + if want := 1; err != nil || n != want { + t.Errorf("Write: got (%d, %v), want (%d, %v)", n, err, want, nil) } check(t, "TestBasicOperations (4)", &buf, "a") @@ -143,8 +140,8 @@ func TestBasicOperations(t *testing.T) { check(t, "TestBasicOperations (5)", &buf, "ab") n, err = buf.Write(testBytes[2:26]) - if n != 24 { - t.Errorf("wrote 24 bytes, but n == %d", n) + if want := 24; err != nil || n != want { + t.Errorf("Write: got (%d, %v), want (%d, %v)", n, err, want, nil) } check(t, "TestBasicOperations (6)", &buf, testString[0:26]) @@ -159,15 +156,12 @@ func TestBasicOperations(t *testing.T) { buf.WriteByte(testString[1]) c, err := buf.ReadByte() - if err != nil { - t.Error("ReadByte unexpected eof") - } - if c != testString[1] { - t.Errorf("ReadByte wrong value c=%v", c) + if want := testString[1]; err != nil || c != want { + t.Errorf("ReadByte: got (%q, %v), want (%q, %v)", c, err, want, nil) } c, err = buf.ReadByte() - if err == nil { - t.Error("ReadByte unexpected not eof") + if err != io.EOF { + t.Errorf("ReadByte: got (%q, %v), want (%q, %v)", c, err, byte(0), io.EOF) } } } diff --git a/libgo/go/bytes/bytes.go b/libgo/go/bytes/bytes.go index daf4a32..eb13212 100644 --- a/libgo/go/bytes/bytes.go +++ b/libgo/go/bytes/bytes.go @@ -12,23 +12,12 @@ import ( "unicode/utf8" ) -// Equal returns a boolean reporting whether a and b +// Equal reports whether a and b // are the same length and contain the same bytes. // A nil argument is equivalent to an empty slice. func Equal(a, b []byte) bool { - return bytealg.Equal(a, b) -} - -func equalPortable(a, b []byte) bool { - if len(a) != len(b) { - return false - } - for i, c := range a { - if c != b[i] { - return false - } - } - return true + // Neither cmd/compile nor gccgo allocates for these string conversions. + return string(a) == string(b) } // Compare returns an integer comparing two byte slices lexicographically. @@ -114,12 +103,34 @@ func indexBytePortable(s []byte, c byte) int { // LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s. func LastIndex(s, sep []byte) int { n := len(sep) - if n == 0 { + switch { + case n == 0: return len(s) + case n == 1: + return LastIndexByte(s, sep[0]) + case n == len(s): + if Equal(s, sep) { + return 0 + } + return -1 + case n > len(s): + return -1 } - c := sep[0] - for i := len(s) - n; i >= 0; i-- { - if s[i] == c && (n == 1 || Equal(s[i:i+n], sep)) { + // Rabin-Karp search from the end of the string + hashss, pow := hashStrRev(sep) + last := len(s) - n + var h uint32 + for i := len(s) - 1; i >= last; i-- { + h = h*primeRK + uint32(s[i]) + } + if h == hashss && Equal(s[last:], sep) { + return last + } + for i := last - 1; i >= 0; i-- { + h *= primeRK + h += uint32(s[i]) + h -= pow * uint32(s[i+n]) + if h == hashss && Equal(s[i:i+n], sep) { return i } } @@ -477,13 +488,16 @@ func Map(mapping func(r rune) rune, s []byte) []byte { // It panics if count is negative or if // the result of (len(b) * count) overflows. func Repeat(b []byte, count int) []byte { + if count == 0 { + return []byte{} + } // Since we cannot return an error on overflow, // we should panic if the repeat will generate // an overflow. // See Issue golang.org/issue/16237. if count < 0 { panic("bytes: negative Repeat count") - } else if count > 0 && len(b)*count/count != len(b) { + } else if len(b)*count/count != len(b) { panic("bytes: Repeat count causes overflow") } @@ -496,11 +510,66 @@ func Repeat(b []byte, count int) []byte { return nb } -// ToUpper treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters within it mapped to their upper case. -func ToUpper(s []byte) []byte { return Map(unicode.ToUpper, s) } +// ToUpper returns a copy of the byte slice s with all Unicode letters mapped to +// their upper case. +func ToUpper(s []byte) []byte { + isASCII, hasLower := true, false + for i := 0; i < len(s); i++ { + c := s[i] + if c >= utf8.RuneSelf { + isASCII = false + break + } + hasLower = hasLower || ('a' <= c && c <= 'z') + } + + if isASCII { // optimize for ASCII-only byte slices. + if !hasLower { + // Just return a copy. + return append([]byte(""), s...) + } + b := make([]byte, len(s)) + for i := 0; i < len(s); i++ { + c := s[i] + if 'a' <= c && c <= 'z' { + c -= 'a' - 'A' + } + b[i] = c + } + return b + } + return Map(unicode.ToUpper, s) +} -// ToLower treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their lower case. -func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) } +// ToLower returns a copy of the byte slice s with all Unicode letters mapped to +// their lower case. +func ToLower(s []byte) []byte { + isASCII, hasUpper := true, false + for i := 0; i < len(s); i++ { + c := s[i] + if c >= utf8.RuneSelf { + isASCII = false + break + } + hasUpper = hasUpper || ('A' <= c && c <= 'Z') + } + + if isASCII { // optimize for ASCII-only byte slices. + if !hasUpper { + return append([]byte(""), s...) + } + b := make([]byte, len(s)) + for i := 0; i < len(s); i++ { + c := s[i] + if 'A' <= c && c <= 'Z' { + c += 'a' - 'A' + } + b[i] = c + } + return b + } + return Map(unicode.ToLower, s) +} // ToTitle treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their title case. func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) } @@ -523,6 +592,35 @@ func ToTitleSpecial(c unicode.SpecialCase, s []byte) []byte { return Map(c.ToTitle, s) } +// ToValidUTF8 treats s as UTF-8-encoded bytes and returns a copy with each run of bytes +// representing invalid UTF-8 replaced with the bytes in replacement, which may be empty. +func ToValidUTF8(s, replacement []byte) []byte { + b := make([]byte, 0, len(s)+len(replacement)) + invalid := false // previous byte was from an invalid UTF-8 sequence + for i := 0; i < len(s); { + c := s[i] + if c < utf8.RuneSelf { + i++ + invalid = false + b = append(b, byte(c)) + continue + } + _, wid := utf8.DecodeRune(s[i:]) + if wid == 1 { + i++ + if !invalid { + invalid = true + b = append(b, replacement...) + } + continue + } + invalid = false + b = append(b, s[i:i+wid]...) + i += wid + } + return b +} + // isSeparator reports whether the rune could mark a word boundary. // TODO: update when package unicode captures more of the properties. func isSeparator(r rune) bool { @@ -734,7 +832,41 @@ func TrimRight(s []byte, cutset string) []byte { // TrimSpace returns a subslice of s by slicing off all leading and // trailing white space, as defined by Unicode. func TrimSpace(s []byte) []byte { - return TrimFunc(s, unicode.IsSpace) + // Fast path for ASCII: look for the first ASCII non-space byte + start := 0 + for ; start < len(s); start++ { + c := s[start] + if c >= utf8.RuneSelf { + // If we run into a non-ASCII byte, fall back to the + // slower unicode-aware method on the remaining bytes + return TrimFunc(s[start:], unicode.IsSpace) + } + if asciiSpace[c] == 0 { + break + } + } + + // Now look for the first ASCII non-space byte from the end + stop := len(s) + for ; stop > start; stop-- { + c := s[stop-1] + if c >= utf8.RuneSelf { + return TrimFunc(s[start:stop], unicode.IsSpace) + } + if asciiSpace[c] == 0 { + break + } + } + + // At this point s[start:stop] starts and ends with an ASCII + // non-space bytes, so we're done. Non-ASCII cases have already + // been handled above. + if start == stop { + // Special case to preserve previous TrimLeftFunc behavior, + // returning nil instead of empty slice if all spaces. + return nil + } + return s[start:stop] } // Runes interprets s as a sequence of UTF-8-encoded code points. @@ -987,3 +1119,20 @@ func hashStr(sep []byte) (uint32, uint32) { } return hash, pow } + +// hashStrRev returns the hash of the reverse of sep and the +// appropriate multiplicative factor for use in Rabin-Karp algorithm. +func hashStrRev(sep []byte) (uint32, uint32) { + hash := uint32(0) + for i := len(sep) - 1; i >= 0; i-- { + hash = hash*primeRK + uint32(sep[i]) + } + var pow, sq uint32 = 1, primeRK + for i := len(sep); i > 0; i >>= 1 { + if i&1 != 0 { + pow *= sq + } + sq *= sq + } + return hash, pow +} diff --git a/libgo/go/bytes/bytes_test.go b/libgo/go/bytes/bytes_test.go index ec4ecf3..ebff5f0 100644 --- a/libgo/go/bytes/bytes_test.go +++ b/libgo/go/bytes/bytes_test.go @@ -52,15 +52,17 @@ type BinOpTest struct { } func TestEqual(t *testing.T) { - for _, tt := range compareTests { - eql := Equal(tt.a, tt.b) - if eql != (tt.i == 0) { - t.Errorf(`Equal(%q, %q) = %v`, tt.a, tt.b, eql) - } - eql = EqualPortable(tt.a, tt.b) - if eql != (tt.i == 0) { - t.Errorf(`EqualPortable(%q, %q) = %v`, tt.a, tt.b, eql) + // Run the tests and check for allocation at the same time. + allocs := testing.AllocsPerRun(10, func() { + for _, tt := range compareTests { + eql := Equal(tt.a, tt.b) + if eql != (tt.i == 0) { + t.Errorf(`Equal(%q, %q) = %v`, tt.a, tt.b, eql) + } } + }) + if allocs > 0 { + t.Errorf("Equal allocated %v times", allocs) } } @@ -577,11 +579,6 @@ func BenchmarkEqual(b *testing.B) { benchBytes(b, sizes, bmEqual(Equal)) } -func BenchmarkEqualPort(b *testing.B) { - sizes := []int{1, 6, 32, 4 << 10, 4 << 20, 64 << 20} - benchBytes(b, sizes, bmEqual(EqualPortable)) -} - func bmEqual(equal func([]byte, []byte) bool) func(b *testing.B, n int) { return func(b *testing.B, n int) { if len(bmbuf) < 2*n { @@ -682,34 +679,6 @@ func BenchmarkCountSingle(b *testing.B) { }) } -type ExplodeTest struct { - s string - n int - a []string -} - -var explodetests = []ExplodeTest{ - {"", -1, []string{}}, - {abcd, -1, []string{"a", "b", "c", "d"}}, - {faces, -1, []string{"☺", "☻", "☹"}}, - {abcd, 2, []string{"a", "bcd"}}, -} - -func TestExplode(t *testing.T) { - for _, tt := range explodetests { - a := SplitN([]byte(tt.s), nil, tt.n) - result := sliceOfString(a) - if !eq(result, tt.a) { - t.Errorf(`Explode("%s", %d) = %v; want %v`, tt.s, tt.n, result, tt.a) - continue - } - s := Join(a, []byte{}) - if string(s) != tt.s { - t.Errorf(`Join(Explode("%s", %d), "") = "%s"`, tt.s, tt.n, s) - } - } -} - type SplitTest struct { s string sep string @@ -718,7 +687,9 @@ type SplitTest struct { } var splittests = []SplitTest{ + {"", "", -1, []string{}}, {abcd, "a", 0, nil}, + {abcd, "", 2, []string{"a", "bcd"}}, {abcd, "a", -1, []string{"", "bcd"}}, {abcd, "z", -1, []string{"abcd"}}, {abcd, "", -1, []string{"a", "b", "c", "d"}}, @@ -748,7 +719,7 @@ func TestSplit(t *testing.T) { t.Errorf(`Split(%q, %q, %d) = %v; want %v`, tt.s, tt.sep, tt.n, result, tt.a) continue } - if tt.n == 0 { + if tt.n == 0 || len(a) == 0 { continue } @@ -914,54 +885,72 @@ func TestFieldsFunc(t *testing.T) { } // Test case for any function which accepts and returns a byte slice. -// For ease of creation, we write the byte slices as strings. +// For ease of creation, we write the input byte slice as a string. type StringTest struct { - in, out string + in string + out []byte } var upperTests = []StringTest{ - {"", ""}, - {"abc", "ABC"}, - {"AbC123", "ABC123"}, - {"azAZ09_", "AZAZ09_"}, - {"\u0250\u0250\u0250\u0250\u0250", "\u2C6F\u2C6F\u2C6F\u2C6F\u2C6F"}, // grows one byte per char + {"", []byte("")}, + {"ONLYUPPER", []byte("ONLYUPPER")}, + {"abc", []byte("ABC")}, + {"AbC123", []byte("ABC123")}, + {"azAZ09_", []byte("AZAZ09_")}, + {"longStrinGwitHmixofsmaLLandcAps", []byte("LONGSTRINGWITHMIXOFSMALLANDCAPS")}, + {"long\u0250string\u0250with\u0250nonascii\u2C6Fchars", []byte("LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS")}, + {"\u0250\u0250\u0250\u0250\u0250", []byte("\u2C6F\u2C6F\u2C6F\u2C6F\u2C6F")}, // grows one byte per char + {"a\u0080\U0010FFFF", []byte("A\u0080\U0010FFFF")}, // test utf8.RuneSelf and utf8.MaxRune } var lowerTests = []StringTest{ - {"", ""}, - {"abc", "abc"}, - {"AbC123", "abc123"}, - {"azAZ09_", "azaz09_"}, - {"\u2C6D\u2C6D\u2C6D\u2C6D\u2C6D", "\u0251\u0251\u0251\u0251\u0251"}, // shrinks one byte per char + {"", []byte("")}, + {"abc", []byte("abc")}, + {"AbC123", []byte("abc123")}, + {"azAZ09_", []byte("azaz09_")}, + {"longStrinGwitHmixofsmaLLandcAps", []byte("longstringwithmixofsmallandcaps")}, + {"LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS", []byte("long\u0250string\u0250with\u0250nonascii\u0250chars")}, + {"\u2C6D\u2C6D\u2C6D\u2C6D\u2C6D", []byte("\u0251\u0251\u0251\u0251\u0251")}, // shrinks one byte per char + {"A\u0080\U0010FFFF", []byte("a\u0080\U0010FFFF")}, // test utf8.RuneSelf and utf8.MaxRune } const space = "\t\v\r\f\n\u0085\u00a0\u2000\u3000" var trimSpaceTests = []StringTest{ - {"", ""}, - {"abc", "abc"}, - {space + "abc" + space, "abc"}, - {" ", ""}, - {" \t\r\n \t\t\r\r\n\n ", ""}, - {" \t\r\n x\t\t\r\r\n\n ", "x"}, - {" \u2000\t\r\n x\t\t\r\r\ny\n \u3000", "x\t\t\r\r\ny"}, - {"1 \t\r\n2", "1 \t\r\n2"}, - {" x\x80", "x\x80"}, - {" x\xc0", "x\xc0"}, - {"x \xc0\xc0 ", "x \xc0\xc0"}, - {"x \xc0", "x \xc0"}, - {"x \xc0 ", "x \xc0"}, - {"x \xc0\xc0 ", "x \xc0\xc0"}, - {"x ☺\xc0\xc0 ", "x ☺\xc0\xc0"}, - {"x ☺ ", "x ☺"}, + {"", nil}, + {" a", []byte("a")}, + {"b ", []byte("b")}, + {"abc", []byte("abc")}, + {space + "abc" + space, []byte("abc")}, + {" ", nil}, + {"\u3000 ", nil}, + {" \u3000", nil}, + {" \t\r\n \t\t\r\r\n\n ", nil}, + {" \t\r\n x\t\t\r\r\n\n ", []byte("x")}, + {" \u2000\t\r\n x\t\t\r\r\ny\n \u3000", []byte("x\t\t\r\r\ny")}, + {"1 \t\r\n2", []byte("1 \t\r\n2")}, + {" x\x80", []byte("x\x80")}, + {" x\xc0", []byte("x\xc0")}, + {"x \xc0\xc0 ", []byte("x \xc0\xc0")}, + {"x \xc0", []byte("x \xc0")}, + {"x \xc0 ", []byte("x \xc0")}, + {"x \xc0\xc0 ", []byte("x \xc0\xc0")}, + {"x ☺\xc0\xc0 ", []byte("x ☺\xc0\xc0")}, + {"x ☺ ", []byte("x ☺")}, } // Execute f on each test case. funcName should be the name of f; it's used // in failure reports. func runStringTests(t *testing.T, f func([]byte) []byte, funcName string, testCases []StringTest) { for _, tc := range testCases { - actual := string(f([]byte(tc.in))) - if actual != tc.out { + actual := f([]byte(tc.in)) + if actual == nil && tc.out != nil { + t.Errorf("%s(%q) = nil; want %q", funcName, tc.in, tc.out) + } + if actual != nil && tc.out == nil { + t.Errorf("%s(%q) = %q; want nil", funcName, tc.in, actual) + } + if !Equal(actual, tc.out) { t.Errorf("%s(%q) = %q; want %q", funcName, tc.in, actual, tc.out) } } @@ -1049,6 +1038,64 @@ func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTest func TestToLower(t *testing.T) { runStringTests(t, ToLower, "ToLower", lowerTests) } +func BenchmarkToUpper(b *testing.B) { + for _, tc := range upperTests { + tin := []byte(tc.in) + b.Run(tc.in, func(b *testing.B) { + for i := 0; i < b.N; i++ { + actual := ToUpper(tin) + if !Equal(actual, tc.out) { + b.Errorf("ToUpper(%q) = %q; want %q", tc.in, actual, tc.out) + } + } + }) + } +} + +func BenchmarkToLower(b *testing.B) { + for _, tc := range lowerTests { + tin := []byte(tc.in) + b.Run(tc.in, func(b *testing.B) { + for i := 0; i < b.N; i++ { + actual := ToLower(tin) + if !Equal(actual, tc.out) { + b.Errorf("ToLower(%q) = %q; want %q", tc.in, actual, tc.out) + } + } + }) + } +} + +var toValidUTF8Tests = []struct { + in string + repl string + out string +}{ + {"", "\uFFFD", ""}, + {"abc", "\uFFFD", "abc"}, + {"\uFDDD", "\uFFFD", "\uFDDD"}, + {"a\xffb", "\uFFFD", "a\uFFFDb"}, + {"a\xffb\uFFFD", "X", "aXb\uFFFD"}, + {"a☺\xffb☺\xC0\xAFc☺\xff", "", "a☺b☺c☺"}, + {"a☺\xffb☺\xC0\xAFc☺\xff", "日本語", "a☺日本語b☺日本語c☺日本語"}, + {"\xC0\xAF", "\uFFFD", "\uFFFD"}, + {"\xE0\x80\xAF", "\uFFFD", "\uFFFD"}, + {"\xed\xa0\x80", "abc", "abc"}, + {"\xed\xbf\xbf", "\uFFFD", "\uFFFD"}, + {"\xF0\x80\x80\xaf", "☺", "☺"}, + {"\xF8\x80\x80\x80\xAF", "\uFFFD", "\uFFFD"}, + {"\xFC\x80\x80\x80\x80\xAF", "\uFFFD", "\uFFFD"}, +} + +func TestToValidUTF8(t *testing.T) { + for _, tc := range toValidUTF8Tests { + got := ToValidUTF8([]byte(tc.in), []byte(tc.repl)) + if !Equal(got, []byte(tc.out)) { + t.Errorf("ToValidUTF8(%q, %q) = %q; want %q", tc.in, tc.repl, got, tc.out) + } + } +} + func TestTrimSpace(t *testing.T) { runStringTests(t, TrimSpace, "TrimSpace", trimSpaceTests) } type RepeatTest struct { @@ -1255,8 +1302,11 @@ var isValidRune = predicate{ } type TrimFuncTest struct { - f predicate - in, out string + f predicate + in string + trimOut []byte + leftOut []byte + rightOut []byte } func not(p predicate) predicate { @@ -1269,20 +1319,68 @@ func not(p predicate) predicate { } var trimFuncTests = []TrimFuncTest{ - {isSpace, space + " hello " + space, "hello"}, - {isDigit, "\u0e50\u0e5212hello34\u0e50\u0e51", "hello"}, - {isUpper, "\u2C6F\u2C6F\u2C6F\u2C6FABCDhelloEF\u2C6F\u2C6FGH\u2C6F\u2C6F", "hello"}, - {not(isSpace), "hello" + space + "hello", space}, - {not(isDigit), "hello\u0e50\u0e521234\u0e50\u0e51helo", "\u0e50\u0e521234\u0e50\u0e51"}, - {isValidRune, "ab\xc0a\xc0cd", "\xc0a\xc0"}, - {not(isValidRune), "\xc0a\xc0", "a"}, + {isSpace, space + " hello " + space, + []byte("hello"), + []byte("hello " + space), + []byte(space + " hello")}, + {isDigit, "\u0e50\u0e5212hello34\u0e50\u0e51", + []byte("hello"), + []byte("hello34\u0e50\u0e51"), + []byte("\u0e50\u0e5212hello")}, + {isUpper, "\u2C6F\u2C6F\u2C6F\u2C6FABCDhelloEF\u2C6F\u2C6FGH\u2C6F\u2C6F", + []byte("hello"), + []byte("helloEF\u2C6F\u2C6FGH\u2C6F\u2C6F"), + []byte("\u2C6F\u2C6F\u2C6F\u2C6FABCDhello")}, + {not(isSpace), "hello" + space + "hello", + []byte(space), + []byte(space + "hello"), + []byte("hello" + space)}, + {not(isDigit), "hello\u0e50\u0e521234\u0e50\u0e51helo", + []byte("\u0e50\u0e521234\u0e50\u0e51"), + []byte("\u0e50\u0e521234\u0e50\u0e51helo"), + []byte("hello\u0e50\u0e521234\u0e50\u0e51")}, + {isValidRune, "ab\xc0a\xc0cd", + []byte("\xc0a\xc0"), + []byte("\xc0a\xc0cd"), + []byte("ab\xc0a\xc0")}, + {not(isValidRune), "\xc0a\xc0", + []byte("a"), + []byte("a\xc0"), + []byte("\xc0a")}, + // The nils returned by TrimLeftFunc are odd behavior, but we need + // to preserve backwards compatibility. + {isSpace, "", + nil, + nil, + []byte("")}, + {isSpace, " ", + nil, + nil, + []byte("")}, } func TestTrimFunc(t *testing.T) { for _, tc := range trimFuncTests { - actual := string(TrimFunc([]byte(tc.in), tc.f.f)) - if actual != tc.out { - t.Errorf("TrimFunc(%q, %q) = %q; want %q", tc.in, tc.f.name, actual, tc.out) + trimmers := []struct { + name string + trim func(s []byte, f func(r rune) bool) []byte + out []byte + }{ + {"TrimFunc", TrimFunc, tc.trimOut}, + {"TrimLeftFunc", TrimLeftFunc, tc.leftOut}, + {"TrimRightFunc", TrimRightFunc, tc.rightOut}, + } + for _, trimmer := range trimmers { + actual := trimmer.trim([]byte(tc.in), tc.f.f) + if actual == nil && trimmer.out != nil { + t.Errorf("%s(%q, %q) = nil; want %q", trimmer.name, tc.in, tc.f.name, trimmer.out) + } + if actual != nil && trimmer.out == nil { + t.Errorf("%s(%q, %q) = %q; want nil", trimmer.name, tc.in, tc.f.name, actual) + } + if !Equal(actual, trimmer.out) { + t.Errorf("%s(%q, %q) = %q; want %q", trimmer.name, tc.in, tc.f.name, actual, trimmer.out) + } } } } @@ -1622,9 +1720,41 @@ func BenchmarkFieldsFunc(b *testing.B) { } func BenchmarkTrimSpace(b *testing.B) { - s := []byte(" Some text. \n") - for i := 0; i < b.N; i++ { - TrimSpace(s) + tests := []struct { + name string + input []byte + }{ + {"NoTrim", []byte("typical")}, + {"ASCII", []byte(" foo bar ")}, + {"SomeNonASCII", []byte(" \u2000\t\r\n x\t\t\r\r\ny\n \u3000 ")}, + {"JustNonASCII", []byte("\u2000\u2000\u2000☺☺☺☺\u3000\u3000\u3000")}, + } + for _, test := range tests { + b.Run(test.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + TrimSpace(test.input) + } + }) + } +} + +func BenchmarkToValidUTF8(b *testing.B) { + tests := []struct { + name string + input []byte + }{ + {"Valid", []byte("typical")}, + {"InvalidASCII", []byte("foo\xffbar")}, + {"InvalidNonASCII", []byte("日本語\xff日本語")}, + } + replacement := []byte("\uFFFD") + b.ResetTimer() + for _, test := range tests { + b.Run(test.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + ToValidUTF8(test.input, replacement) + } + }) } } @@ -1647,6 +1777,39 @@ func makeBenchInputHard() []byte { var benchInputHard = makeBenchInputHard() +func benchmarkIndexHard(b *testing.B, sep []byte) { + for i := 0; i < b.N; i++ { + Index(benchInputHard, sep) + } +} + +func benchmarkLastIndexHard(b *testing.B, sep []byte) { + for i := 0; i < b.N; i++ { + LastIndex(benchInputHard, sep) + } +} + +func benchmarkCountHard(b *testing.B, sep []byte) { + for i := 0; i < b.N; i++ { + Count(benchInputHard, sep) + } +} + +func BenchmarkIndexHard1(b *testing.B) { benchmarkIndexHard(b, []byte("<>")) } +func BenchmarkIndexHard2(b *testing.B) { benchmarkIndexHard(b, []byte("</pre>")) } +func BenchmarkIndexHard3(b *testing.B) { benchmarkIndexHard(b, []byte("<b>hello world</b>")) } +func BenchmarkIndexHard4(b *testing.B) { + benchmarkIndexHard(b, []byte("<pre><b>hello</b><strong>world</strong></pre>")) +} + +func BenchmarkLastIndexHard1(b *testing.B) { benchmarkLastIndexHard(b, []byte("<>")) } +func BenchmarkLastIndexHard2(b *testing.B) { benchmarkLastIndexHard(b, []byte("</pre>")) } +func BenchmarkLastIndexHard3(b *testing.B) { benchmarkLastIndexHard(b, []byte("<b>hello world</b>")) } + +func BenchmarkCountHard1(b *testing.B) { benchmarkCountHard(b, []byte("<>")) } +func BenchmarkCountHard2(b *testing.B) { benchmarkCountHard(b, []byte("</pre>")) } +func BenchmarkCountHard3(b *testing.B) { benchmarkCountHard(b, []byte("<b>hello world</b>")) } + func BenchmarkSplitEmptySeparator(b *testing.B) { for i := 0; i < b.N; i++ { Split(benchInputHard, nil) diff --git a/libgo/go/bytes/example_test.go b/libgo/go/bytes/example_test.go index 6d32837..5ba7077 100644 --- a/libgo/go/bytes/example_test.go +++ b/libgo/go/bytes/example_test.go @@ -365,6 +365,16 @@ func ExampleToTitle() { // ХЛЕБ } +func ExampleToTitleSpecial() { + str := []byte("ahoj vývojári golang") + totitle := bytes.ToTitleSpecial(unicode.AzeriCase, str) + fmt.Println("Original : " + string(str)) + fmt.Println("ToTitle : " + string(totitle)) + // Output: + // Original : ahoj vývojári golang + // ToTitle : AHOJ VÝVOJÁRİ GOLANG +} + func ExampleTrim() { fmt.Printf("[%q]", bytes.Trim([]byte(" !!! Achtung! Achtung! !!! "), "! ")) // Output: ["Achtung! Achtung"] @@ -438,11 +448,31 @@ func ExampleToUpper() { // Output: GOPHER } +func ExampleToUpperSpecial() { + str := []byte("ahoj vývojári golang") + totitle := bytes.ToUpperSpecial(unicode.AzeriCase, str) + fmt.Println("Original : " + string(str)) + fmt.Println("ToUpper : " + string(totitle)) + // Output: + // Original : ahoj vývojári golang + // ToUpper : AHOJ VÝVOJÁRİ GOLANG +} + func ExampleToLower() { fmt.Printf("%s", bytes.ToLower([]byte("Gopher"))) // Output: gopher } +func ExampleToLowerSpecial() { + str := []byte("AHOJ VÝVOJÁRİ GOLANG") + totitle := bytes.ToLowerSpecial(unicode.AzeriCase, str) + fmt.Println("Original : " + string(str)) + fmt.Println("ToLower : " + string(totitle)) + // Output: + // Original : AHOJ VÝVOJÁRİ GOLANG + // ToLower : ahoj vývojári golang +} + func ExampleReader_Len() { fmt.Println(bytes.NewReader([]byte("Hi!")).Len()) fmt.Println(bytes.NewReader([]byte("こんにちは!")).Len()) diff --git a/libgo/go/bytes/export_test.go b/libgo/go/bytes/export_test.go index f61523e..b65428d 100644 --- a/libgo/go/bytes/export_test.go +++ b/libgo/go/bytes/export_test.go @@ -6,4 +6,3 @@ package bytes // Export func for testing var IndexBytePortable = indexBytePortable -var EqualPortable = equalPortable |