aboutsummaryrefslogtreecommitdiff
path: root/libgo/go/bytes
diff options
context:
space:
mode:
authorIan Lance Taylor <iant@golang.org>2019-09-06 18:12:46 +0000
committerIan Lance Taylor <ian@gcc.gnu.org>2019-09-06 18:12:46 +0000
commitaa8901e9bb0399d2c16f988ba2fe46eb0c0c5d13 (patch)
tree7e63b06d1eec92beec6997c9d3ab47a5d6a835be /libgo/go/bytes
parent920ea3b8ba3164b61ac9490dfdfceb6936eda6dd (diff)
downloadgcc-aa8901e9bb0399d2c16f988ba2fe46eb0c0c5d13.zip
gcc-aa8901e9bb0399d2c16f988ba2fe46eb0c0c5d13.tar.gz
gcc-aa8901e9bb0399d2c16f988ba2fe46eb0c0c5d13.tar.bz2
libgo: update to Go 1.13beta1 release
Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/193497 From-SVN: r275473
Diffstat (limited to 'libgo/go/bytes')
-rw-r--r--libgo/go/bytes/buffer.go7
-rw-r--r--libgo/go/bytes/buffer_test.go22
-rw-r--r--libgo/go/bytes/bytes.go197
-rw-r--r--libgo/go/bytes/bytes_test.go337
-rw-r--r--libgo/go/bytes/example_test.go30
-rw-r--r--libgo/go/bytes/export_test.go1
6 files changed, 464 insertions, 130 deletions
diff --git a/libgo/go/bytes/buffer.go b/libgo/go/bytes/buffer.go
index aff2db5..f19a4cf 100644
--- a/libgo/go/bytes/buffer.go
+++ b/libgo/go/bytes/buffer.go
@@ -21,9 +21,6 @@ type Buffer struct {
buf []byte // contents are the bytes buf[off : len(buf)]
off int // read at &buf[off], write at &buf[len(buf)]
lastRead readOp // last read operation, so that Unread* can work correctly.
-
- // FIXME: it would be advisable to align Buffer to cachelines to avoid false
- // sharing.
}
// The readOp constants describe the last action performed on
@@ -385,13 +382,15 @@ func (b *Buffer) UnreadRune() error {
return nil
}
+var errUnreadByte = errors.New("bytes.Buffer: UnreadByte: previous operation was not a successful read")
+
// UnreadByte unreads the last byte returned by the most recent successful
// read operation that read at least one byte. If a write has happened since
// the last read, if the last read returned an error, or if the read read zero
// bytes, UnreadByte returns an error.
func (b *Buffer) UnreadByte() error {
if b.lastRead == opInvalid {
- return errors.New("bytes.Buffer: UnreadByte: previous operation was not a successful read")
+ return errUnreadByte
}
b.lastRead = opInvalid
if b.off > 0 {
diff --git a/libgo/go/bytes/buffer_test.go b/libgo/go/bytes/buffer_test.go
index 6e9d695..7626d27 100644
--- a/libgo/go/bytes/buffer_test.go
+++ b/libgo/go/bytes/buffer_test.go
@@ -131,11 +131,8 @@ func TestBasicOperations(t *testing.T) {
check(t, "TestBasicOperations (3)", &buf, "")
n, err := buf.Write(testBytes[0:1])
- if n != 1 {
- t.Errorf("wrote 1 byte, but n == %d", n)
- }
- if err != nil {
- t.Errorf("err should always be nil, but err == %s", err)
+ if want := 1; err != nil || n != want {
+ t.Errorf("Write: got (%d, %v), want (%d, %v)", n, err, want, nil)
}
check(t, "TestBasicOperations (4)", &buf, "a")
@@ -143,8 +140,8 @@ func TestBasicOperations(t *testing.T) {
check(t, "TestBasicOperations (5)", &buf, "ab")
n, err = buf.Write(testBytes[2:26])
- if n != 24 {
- t.Errorf("wrote 24 bytes, but n == %d", n)
+ if want := 24; err != nil || n != want {
+ t.Errorf("Write: got (%d, %v), want (%d, %v)", n, err, want, nil)
}
check(t, "TestBasicOperations (6)", &buf, testString[0:26])
@@ -159,15 +156,12 @@ func TestBasicOperations(t *testing.T) {
buf.WriteByte(testString[1])
c, err := buf.ReadByte()
- if err != nil {
- t.Error("ReadByte unexpected eof")
- }
- if c != testString[1] {
- t.Errorf("ReadByte wrong value c=%v", c)
+ if want := testString[1]; err != nil || c != want {
+ t.Errorf("ReadByte: got (%q, %v), want (%q, %v)", c, err, want, nil)
}
c, err = buf.ReadByte()
- if err == nil {
- t.Error("ReadByte unexpected not eof")
+ if err != io.EOF {
+ t.Errorf("ReadByte: got (%q, %v), want (%q, %v)", c, err, byte(0), io.EOF)
}
}
}
diff --git a/libgo/go/bytes/bytes.go b/libgo/go/bytes/bytes.go
index daf4a32..eb13212 100644
--- a/libgo/go/bytes/bytes.go
+++ b/libgo/go/bytes/bytes.go
@@ -12,23 +12,12 @@ import (
"unicode/utf8"
)
-// Equal returns a boolean reporting whether a and b
+// Equal reports whether a and b
// are the same length and contain the same bytes.
// A nil argument is equivalent to an empty slice.
func Equal(a, b []byte) bool {
- return bytealg.Equal(a, b)
-}
-
-func equalPortable(a, b []byte) bool {
- if len(a) != len(b) {
- return false
- }
- for i, c := range a {
- if c != b[i] {
- return false
- }
- }
- return true
+ // Neither cmd/compile nor gccgo allocates for these string conversions.
+ return string(a) == string(b)
}
// Compare returns an integer comparing two byte slices lexicographically.
@@ -114,12 +103,34 @@ func indexBytePortable(s []byte, c byte) int {
// LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s.
func LastIndex(s, sep []byte) int {
n := len(sep)
- if n == 0 {
+ switch {
+ case n == 0:
return len(s)
+ case n == 1:
+ return LastIndexByte(s, sep[0])
+ case n == len(s):
+ if Equal(s, sep) {
+ return 0
+ }
+ return -1
+ case n > len(s):
+ return -1
}
- c := sep[0]
- for i := len(s) - n; i >= 0; i-- {
- if s[i] == c && (n == 1 || Equal(s[i:i+n], sep)) {
+ // Rabin-Karp search from the end of the string
+ hashss, pow := hashStrRev(sep)
+ last := len(s) - n
+ var h uint32
+ for i := len(s) - 1; i >= last; i-- {
+ h = h*primeRK + uint32(s[i])
+ }
+ if h == hashss && Equal(s[last:], sep) {
+ return last
+ }
+ for i := last - 1; i >= 0; i-- {
+ h *= primeRK
+ h += uint32(s[i])
+ h -= pow * uint32(s[i+n])
+ if h == hashss && Equal(s[i:i+n], sep) {
return i
}
}
@@ -477,13 +488,16 @@ func Map(mapping func(r rune) rune, s []byte) []byte {
// It panics if count is negative or if
// the result of (len(b) * count) overflows.
func Repeat(b []byte, count int) []byte {
+ if count == 0 {
+ return []byte{}
+ }
// Since we cannot return an error on overflow,
// we should panic if the repeat will generate
// an overflow.
// See Issue golang.org/issue/16237.
if count < 0 {
panic("bytes: negative Repeat count")
- } else if count > 0 && len(b)*count/count != len(b) {
+ } else if len(b)*count/count != len(b) {
panic("bytes: Repeat count causes overflow")
}
@@ -496,11 +510,66 @@ func Repeat(b []byte, count int) []byte {
return nb
}
-// ToUpper treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters within it mapped to their upper case.
-func ToUpper(s []byte) []byte { return Map(unicode.ToUpper, s) }
+// ToUpper returns a copy of the byte slice s with all Unicode letters mapped to
+// their upper case.
+func ToUpper(s []byte) []byte {
+ isASCII, hasLower := true, false
+ for i := 0; i < len(s); i++ {
+ c := s[i]
+ if c >= utf8.RuneSelf {
+ isASCII = false
+ break
+ }
+ hasLower = hasLower || ('a' <= c && c <= 'z')
+ }
+
+ if isASCII { // optimize for ASCII-only byte slices.
+ if !hasLower {
+ // Just return a copy.
+ return append([]byte(""), s...)
+ }
+ b := make([]byte, len(s))
+ for i := 0; i < len(s); i++ {
+ c := s[i]
+ if 'a' <= c && c <= 'z' {
+ c -= 'a' - 'A'
+ }
+ b[i] = c
+ }
+ return b
+ }
+ return Map(unicode.ToUpper, s)
+}
-// ToLower treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their lower case.
-func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) }
+// ToLower returns a copy of the byte slice s with all Unicode letters mapped to
+// their lower case.
+func ToLower(s []byte) []byte {
+ isASCII, hasUpper := true, false
+ for i := 0; i < len(s); i++ {
+ c := s[i]
+ if c >= utf8.RuneSelf {
+ isASCII = false
+ break
+ }
+ hasUpper = hasUpper || ('A' <= c && c <= 'Z')
+ }
+
+ if isASCII { // optimize for ASCII-only byte slices.
+ if !hasUpper {
+ return append([]byte(""), s...)
+ }
+ b := make([]byte, len(s))
+ for i := 0; i < len(s); i++ {
+ c := s[i]
+ if 'A' <= c && c <= 'Z' {
+ c += 'a' - 'A'
+ }
+ b[i] = c
+ }
+ return b
+ }
+ return Map(unicode.ToLower, s)
+}
// ToTitle treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their title case.
func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) }
@@ -523,6 +592,35 @@ func ToTitleSpecial(c unicode.SpecialCase, s []byte) []byte {
return Map(c.ToTitle, s)
}
+// ToValidUTF8 treats s as UTF-8-encoded bytes and returns a copy with each run of bytes
+// representing invalid UTF-8 replaced with the bytes in replacement, which may be empty.
+func ToValidUTF8(s, replacement []byte) []byte {
+ b := make([]byte, 0, len(s)+len(replacement))
+ invalid := false // previous byte was from an invalid UTF-8 sequence
+ for i := 0; i < len(s); {
+ c := s[i]
+ if c < utf8.RuneSelf {
+ i++
+ invalid = false
+ b = append(b, byte(c))
+ continue
+ }
+ _, wid := utf8.DecodeRune(s[i:])
+ if wid == 1 {
+ i++
+ if !invalid {
+ invalid = true
+ b = append(b, replacement...)
+ }
+ continue
+ }
+ invalid = false
+ b = append(b, s[i:i+wid]...)
+ i += wid
+ }
+ return b
+}
+
// isSeparator reports whether the rune could mark a word boundary.
// TODO: update when package unicode captures more of the properties.
func isSeparator(r rune) bool {
@@ -734,7 +832,41 @@ func TrimRight(s []byte, cutset string) []byte {
// TrimSpace returns a subslice of s by slicing off all leading and
// trailing white space, as defined by Unicode.
func TrimSpace(s []byte) []byte {
- return TrimFunc(s, unicode.IsSpace)
+ // Fast path for ASCII: look for the first ASCII non-space byte
+ start := 0
+ for ; start < len(s); start++ {
+ c := s[start]
+ if c >= utf8.RuneSelf {
+ // If we run into a non-ASCII byte, fall back to the
+ // slower unicode-aware method on the remaining bytes
+ return TrimFunc(s[start:], unicode.IsSpace)
+ }
+ if asciiSpace[c] == 0 {
+ break
+ }
+ }
+
+ // Now look for the first ASCII non-space byte from the end
+ stop := len(s)
+ for ; stop > start; stop-- {
+ c := s[stop-1]
+ if c >= utf8.RuneSelf {
+ return TrimFunc(s[start:stop], unicode.IsSpace)
+ }
+ if asciiSpace[c] == 0 {
+ break
+ }
+ }
+
+ // At this point s[start:stop] starts and ends with an ASCII
+ // non-space bytes, so we're done. Non-ASCII cases have already
+ // been handled above.
+ if start == stop {
+ // Special case to preserve previous TrimLeftFunc behavior,
+ // returning nil instead of empty slice if all spaces.
+ return nil
+ }
+ return s[start:stop]
}
// Runes interprets s as a sequence of UTF-8-encoded code points.
@@ -987,3 +1119,20 @@ func hashStr(sep []byte) (uint32, uint32) {
}
return hash, pow
}
+
+// hashStrRev returns the hash of the reverse of sep and the
+// appropriate multiplicative factor for use in Rabin-Karp algorithm.
+func hashStrRev(sep []byte) (uint32, uint32) {
+ hash := uint32(0)
+ for i := len(sep) - 1; i >= 0; i-- {
+ hash = hash*primeRK + uint32(sep[i])
+ }
+ var pow, sq uint32 = 1, primeRK
+ for i := len(sep); i > 0; i >>= 1 {
+ if i&1 != 0 {
+ pow *= sq
+ }
+ sq *= sq
+ }
+ return hash, pow
+}
diff --git a/libgo/go/bytes/bytes_test.go b/libgo/go/bytes/bytes_test.go
index ec4ecf3..ebff5f0 100644
--- a/libgo/go/bytes/bytes_test.go
+++ b/libgo/go/bytes/bytes_test.go
@@ -52,15 +52,17 @@ type BinOpTest struct {
}
func TestEqual(t *testing.T) {
- for _, tt := range compareTests {
- eql := Equal(tt.a, tt.b)
- if eql != (tt.i == 0) {
- t.Errorf(`Equal(%q, %q) = %v`, tt.a, tt.b, eql)
- }
- eql = EqualPortable(tt.a, tt.b)
- if eql != (tt.i == 0) {
- t.Errorf(`EqualPortable(%q, %q) = %v`, tt.a, tt.b, eql)
+ // Run the tests and check for allocation at the same time.
+ allocs := testing.AllocsPerRun(10, func() {
+ for _, tt := range compareTests {
+ eql := Equal(tt.a, tt.b)
+ if eql != (tt.i == 0) {
+ t.Errorf(`Equal(%q, %q) = %v`, tt.a, tt.b, eql)
+ }
}
+ })
+ if allocs > 0 {
+ t.Errorf("Equal allocated %v times", allocs)
}
}
@@ -577,11 +579,6 @@ func BenchmarkEqual(b *testing.B) {
benchBytes(b, sizes, bmEqual(Equal))
}
-func BenchmarkEqualPort(b *testing.B) {
- sizes := []int{1, 6, 32, 4 << 10, 4 << 20, 64 << 20}
- benchBytes(b, sizes, bmEqual(EqualPortable))
-}
-
func bmEqual(equal func([]byte, []byte) bool) func(b *testing.B, n int) {
return func(b *testing.B, n int) {
if len(bmbuf) < 2*n {
@@ -682,34 +679,6 @@ func BenchmarkCountSingle(b *testing.B) {
})
}
-type ExplodeTest struct {
- s string
- n int
- a []string
-}
-
-var explodetests = []ExplodeTest{
- {"", -1, []string{}},
- {abcd, -1, []string{"a", "b", "c", "d"}},
- {faces, -1, []string{"☺", "☻", "☹"}},
- {abcd, 2, []string{"a", "bcd"}},
-}
-
-func TestExplode(t *testing.T) {
- for _, tt := range explodetests {
- a := SplitN([]byte(tt.s), nil, tt.n)
- result := sliceOfString(a)
- if !eq(result, tt.a) {
- t.Errorf(`Explode("%s", %d) = %v; want %v`, tt.s, tt.n, result, tt.a)
- continue
- }
- s := Join(a, []byte{})
- if string(s) != tt.s {
- t.Errorf(`Join(Explode("%s", %d), "") = "%s"`, tt.s, tt.n, s)
- }
- }
-}
-
type SplitTest struct {
s string
sep string
@@ -718,7 +687,9 @@ type SplitTest struct {
}
var splittests = []SplitTest{
+ {"", "", -1, []string{}},
{abcd, "a", 0, nil},
+ {abcd, "", 2, []string{"a", "bcd"}},
{abcd, "a", -1, []string{"", "bcd"}},
{abcd, "z", -1, []string{"abcd"}},
{abcd, "", -1, []string{"a", "b", "c", "d"}},
@@ -748,7 +719,7 @@ func TestSplit(t *testing.T) {
t.Errorf(`Split(%q, %q, %d) = %v; want %v`, tt.s, tt.sep, tt.n, result, tt.a)
continue
}
- if tt.n == 0 {
+ if tt.n == 0 || len(a) == 0 {
continue
}
@@ -914,54 +885,72 @@ func TestFieldsFunc(t *testing.T) {
}
// Test case for any function which accepts and returns a byte slice.
-// For ease of creation, we write the byte slices as strings.
+// For ease of creation, we write the input byte slice as a string.
type StringTest struct {
- in, out string
+ in string
+ out []byte
}
var upperTests = []StringTest{
- {"", ""},
- {"abc", "ABC"},
- {"AbC123", "ABC123"},
- {"azAZ09_", "AZAZ09_"},
- {"\u0250\u0250\u0250\u0250\u0250", "\u2C6F\u2C6F\u2C6F\u2C6F\u2C6F"}, // grows one byte per char
+ {"", []byte("")},
+ {"ONLYUPPER", []byte("ONLYUPPER")},
+ {"abc", []byte("ABC")},
+ {"AbC123", []byte("ABC123")},
+ {"azAZ09_", []byte("AZAZ09_")},
+ {"longStrinGwitHmixofsmaLLandcAps", []byte("LONGSTRINGWITHMIXOFSMALLANDCAPS")},
+ {"long\u0250string\u0250with\u0250nonascii\u2C6Fchars", []byte("LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS")},
+ {"\u0250\u0250\u0250\u0250\u0250", []byte("\u2C6F\u2C6F\u2C6F\u2C6F\u2C6F")}, // grows one byte per char
+ {"a\u0080\U0010FFFF", []byte("A\u0080\U0010FFFF")}, // test utf8.RuneSelf and utf8.MaxRune
}
var lowerTests = []StringTest{
- {"", ""},
- {"abc", "abc"},
- {"AbC123", "abc123"},
- {"azAZ09_", "azaz09_"},
- {"\u2C6D\u2C6D\u2C6D\u2C6D\u2C6D", "\u0251\u0251\u0251\u0251\u0251"}, // shrinks one byte per char
+ {"", []byte("")},
+ {"abc", []byte("abc")},
+ {"AbC123", []byte("abc123")},
+ {"azAZ09_", []byte("azaz09_")},
+ {"longStrinGwitHmixofsmaLLandcAps", []byte("longstringwithmixofsmallandcaps")},
+ {"LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS", []byte("long\u0250string\u0250with\u0250nonascii\u0250chars")},
+ {"\u2C6D\u2C6D\u2C6D\u2C6D\u2C6D", []byte("\u0251\u0251\u0251\u0251\u0251")}, // shrinks one byte per char
+ {"A\u0080\U0010FFFF", []byte("a\u0080\U0010FFFF")}, // test utf8.RuneSelf and utf8.MaxRune
}
const space = "\t\v\r\f\n\u0085\u00a0\u2000\u3000"
var trimSpaceTests = []StringTest{
- {"", ""},
- {"abc", "abc"},
- {space + "abc" + space, "abc"},
- {" ", ""},
- {" \t\r\n \t\t\r\r\n\n ", ""},
- {" \t\r\n x\t\t\r\r\n\n ", "x"},
- {" \u2000\t\r\n x\t\t\r\r\ny\n \u3000", "x\t\t\r\r\ny"},
- {"1 \t\r\n2", "1 \t\r\n2"},
- {" x\x80", "x\x80"},
- {" x\xc0", "x\xc0"},
- {"x \xc0\xc0 ", "x \xc0\xc0"},
- {"x \xc0", "x \xc0"},
- {"x \xc0 ", "x \xc0"},
- {"x \xc0\xc0 ", "x \xc0\xc0"},
- {"x ☺\xc0\xc0 ", "x ☺\xc0\xc0"},
- {"x ☺ ", "x ☺"},
+ {"", nil},
+ {" a", []byte("a")},
+ {"b ", []byte("b")},
+ {"abc", []byte("abc")},
+ {space + "abc" + space, []byte("abc")},
+ {" ", nil},
+ {"\u3000 ", nil},
+ {" \u3000", nil},
+ {" \t\r\n \t\t\r\r\n\n ", nil},
+ {" \t\r\n x\t\t\r\r\n\n ", []byte("x")},
+ {" \u2000\t\r\n x\t\t\r\r\ny\n \u3000", []byte("x\t\t\r\r\ny")},
+ {"1 \t\r\n2", []byte("1 \t\r\n2")},
+ {" x\x80", []byte("x\x80")},
+ {" x\xc0", []byte("x\xc0")},
+ {"x \xc0\xc0 ", []byte("x \xc0\xc0")},
+ {"x \xc0", []byte("x \xc0")},
+ {"x \xc0 ", []byte("x \xc0")},
+ {"x \xc0\xc0 ", []byte("x \xc0\xc0")},
+ {"x ☺\xc0\xc0 ", []byte("x ☺\xc0\xc0")},
+ {"x ☺ ", []byte("x ☺")},
}
// Execute f on each test case. funcName should be the name of f; it's used
// in failure reports.
func runStringTests(t *testing.T, f func([]byte) []byte, funcName string, testCases []StringTest) {
for _, tc := range testCases {
- actual := string(f([]byte(tc.in)))
- if actual != tc.out {
+ actual := f([]byte(tc.in))
+ if actual == nil && tc.out != nil {
+ t.Errorf("%s(%q) = nil; want %q", funcName, tc.in, tc.out)
+ }
+ if actual != nil && tc.out == nil {
+ t.Errorf("%s(%q) = %q; want nil", funcName, tc.in, actual)
+ }
+ if !Equal(actual, tc.out) {
t.Errorf("%s(%q) = %q; want %q", funcName, tc.in, actual, tc.out)
}
}
@@ -1049,6 +1038,64 @@ func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTest
func TestToLower(t *testing.T) { runStringTests(t, ToLower, "ToLower", lowerTests) }
+func BenchmarkToUpper(b *testing.B) {
+ for _, tc := range upperTests {
+ tin := []byte(tc.in)
+ b.Run(tc.in, func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ actual := ToUpper(tin)
+ if !Equal(actual, tc.out) {
+ b.Errorf("ToUpper(%q) = %q; want %q", tc.in, actual, tc.out)
+ }
+ }
+ })
+ }
+}
+
+func BenchmarkToLower(b *testing.B) {
+ for _, tc := range lowerTests {
+ tin := []byte(tc.in)
+ b.Run(tc.in, func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ actual := ToLower(tin)
+ if !Equal(actual, tc.out) {
+ b.Errorf("ToLower(%q) = %q; want %q", tc.in, actual, tc.out)
+ }
+ }
+ })
+ }
+}
+
+var toValidUTF8Tests = []struct {
+ in string
+ repl string
+ out string
+}{
+ {"", "\uFFFD", ""},
+ {"abc", "\uFFFD", "abc"},
+ {"\uFDDD", "\uFFFD", "\uFDDD"},
+ {"a\xffb", "\uFFFD", "a\uFFFDb"},
+ {"a\xffb\uFFFD", "X", "aXb\uFFFD"},
+ {"a☺\xffb☺\xC0\xAFc☺\xff", "", "a☺b☺c☺"},
+ {"a☺\xffb☺\xC0\xAFc☺\xff", "日本語", "a☺日本語b☺日本語c☺日本語"},
+ {"\xC0\xAF", "\uFFFD", "\uFFFD"},
+ {"\xE0\x80\xAF", "\uFFFD", "\uFFFD"},
+ {"\xed\xa0\x80", "abc", "abc"},
+ {"\xed\xbf\xbf", "\uFFFD", "\uFFFD"},
+ {"\xF0\x80\x80\xaf", "☺", "☺"},
+ {"\xF8\x80\x80\x80\xAF", "\uFFFD", "\uFFFD"},
+ {"\xFC\x80\x80\x80\x80\xAF", "\uFFFD", "\uFFFD"},
+}
+
+func TestToValidUTF8(t *testing.T) {
+ for _, tc := range toValidUTF8Tests {
+ got := ToValidUTF8([]byte(tc.in), []byte(tc.repl))
+ if !Equal(got, []byte(tc.out)) {
+ t.Errorf("ToValidUTF8(%q, %q) = %q; want %q", tc.in, tc.repl, got, tc.out)
+ }
+ }
+}
+
func TestTrimSpace(t *testing.T) { runStringTests(t, TrimSpace, "TrimSpace", trimSpaceTests) }
type RepeatTest struct {
@@ -1255,8 +1302,11 @@ var isValidRune = predicate{
}
type TrimFuncTest struct {
- f predicate
- in, out string
+ f predicate
+ in string
+ trimOut []byte
+ leftOut []byte
+ rightOut []byte
}
func not(p predicate) predicate {
@@ -1269,20 +1319,68 @@ func not(p predicate) predicate {
}
var trimFuncTests = []TrimFuncTest{
- {isSpace, space + " hello " + space, "hello"},
- {isDigit, "\u0e50\u0e5212hello34\u0e50\u0e51", "hello"},
- {isUpper, "\u2C6F\u2C6F\u2C6F\u2C6FABCDhelloEF\u2C6F\u2C6FGH\u2C6F\u2C6F", "hello"},
- {not(isSpace), "hello" + space + "hello", space},
- {not(isDigit), "hello\u0e50\u0e521234\u0e50\u0e51helo", "\u0e50\u0e521234\u0e50\u0e51"},
- {isValidRune, "ab\xc0a\xc0cd", "\xc0a\xc0"},
- {not(isValidRune), "\xc0a\xc0", "a"},
+ {isSpace, space + " hello " + space,
+ []byte("hello"),
+ []byte("hello " + space),
+ []byte(space + " hello")},
+ {isDigit, "\u0e50\u0e5212hello34\u0e50\u0e51",
+ []byte("hello"),
+ []byte("hello34\u0e50\u0e51"),
+ []byte("\u0e50\u0e5212hello")},
+ {isUpper, "\u2C6F\u2C6F\u2C6F\u2C6FABCDhelloEF\u2C6F\u2C6FGH\u2C6F\u2C6F",
+ []byte("hello"),
+ []byte("helloEF\u2C6F\u2C6FGH\u2C6F\u2C6F"),
+ []byte("\u2C6F\u2C6F\u2C6F\u2C6FABCDhello")},
+ {not(isSpace), "hello" + space + "hello",
+ []byte(space),
+ []byte(space + "hello"),
+ []byte("hello" + space)},
+ {not(isDigit), "hello\u0e50\u0e521234\u0e50\u0e51helo",
+ []byte("\u0e50\u0e521234\u0e50\u0e51"),
+ []byte("\u0e50\u0e521234\u0e50\u0e51helo"),
+ []byte("hello\u0e50\u0e521234\u0e50\u0e51")},
+ {isValidRune, "ab\xc0a\xc0cd",
+ []byte("\xc0a\xc0"),
+ []byte("\xc0a\xc0cd"),
+ []byte("ab\xc0a\xc0")},
+ {not(isValidRune), "\xc0a\xc0",
+ []byte("a"),
+ []byte("a\xc0"),
+ []byte("\xc0a")},
+ // The nils returned by TrimLeftFunc are odd behavior, but we need
+ // to preserve backwards compatibility.
+ {isSpace, "",
+ nil,
+ nil,
+ []byte("")},
+ {isSpace, " ",
+ nil,
+ nil,
+ []byte("")},
}
func TestTrimFunc(t *testing.T) {
for _, tc := range trimFuncTests {
- actual := string(TrimFunc([]byte(tc.in), tc.f.f))
- if actual != tc.out {
- t.Errorf("TrimFunc(%q, %q) = %q; want %q", tc.in, tc.f.name, actual, tc.out)
+ trimmers := []struct {
+ name string
+ trim func(s []byte, f func(r rune) bool) []byte
+ out []byte
+ }{
+ {"TrimFunc", TrimFunc, tc.trimOut},
+ {"TrimLeftFunc", TrimLeftFunc, tc.leftOut},
+ {"TrimRightFunc", TrimRightFunc, tc.rightOut},
+ }
+ for _, trimmer := range trimmers {
+ actual := trimmer.trim([]byte(tc.in), tc.f.f)
+ if actual == nil && trimmer.out != nil {
+ t.Errorf("%s(%q, %q) = nil; want %q", trimmer.name, tc.in, tc.f.name, trimmer.out)
+ }
+ if actual != nil && trimmer.out == nil {
+ t.Errorf("%s(%q, %q) = %q; want nil", trimmer.name, tc.in, tc.f.name, actual)
+ }
+ if !Equal(actual, trimmer.out) {
+ t.Errorf("%s(%q, %q) = %q; want %q", trimmer.name, tc.in, tc.f.name, actual, trimmer.out)
+ }
}
}
}
@@ -1622,9 +1720,41 @@ func BenchmarkFieldsFunc(b *testing.B) {
}
func BenchmarkTrimSpace(b *testing.B) {
- s := []byte(" Some text. \n")
- for i := 0; i < b.N; i++ {
- TrimSpace(s)
+ tests := []struct {
+ name string
+ input []byte
+ }{
+ {"NoTrim", []byte("typical")},
+ {"ASCII", []byte(" foo bar ")},
+ {"SomeNonASCII", []byte(" \u2000\t\r\n x\t\t\r\r\ny\n \u3000 ")},
+ {"JustNonASCII", []byte("\u2000\u2000\u2000☺☺☺☺\u3000\u3000\u3000")},
+ }
+ for _, test := range tests {
+ b.Run(test.name, func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ TrimSpace(test.input)
+ }
+ })
+ }
+}
+
+func BenchmarkToValidUTF8(b *testing.B) {
+ tests := []struct {
+ name string
+ input []byte
+ }{
+ {"Valid", []byte("typical")},
+ {"InvalidASCII", []byte("foo\xffbar")},
+ {"InvalidNonASCII", []byte("日本語\xff日本語")},
+ }
+ replacement := []byte("\uFFFD")
+ b.ResetTimer()
+ for _, test := range tests {
+ b.Run(test.name, func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ ToValidUTF8(test.input, replacement)
+ }
+ })
}
}
@@ -1647,6 +1777,39 @@ func makeBenchInputHard() []byte {
var benchInputHard = makeBenchInputHard()
+func benchmarkIndexHard(b *testing.B, sep []byte) {
+ for i := 0; i < b.N; i++ {
+ Index(benchInputHard, sep)
+ }
+}
+
+func benchmarkLastIndexHard(b *testing.B, sep []byte) {
+ for i := 0; i < b.N; i++ {
+ LastIndex(benchInputHard, sep)
+ }
+}
+
+func benchmarkCountHard(b *testing.B, sep []byte) {
+ for i := 0; i < b.N; i++ {
+ Count(benchInputHard, sep)
+ }
+}
+
+func BenchmarkIndexHard1(b *testing.B) { benchmarkIndexHard(b, []byte("<>")) }
+func BenchmarkIndexHard2(b *testing.B) { benchmarkIndexHard(b, []byte("</pre>")) }
+func BenchmarkIndexHard3(b *testing.B) { benchmarkIndexHard(b, []byte("<b>hello world</b>")) }
+func BenchmarkIndexHard4(b *testing.B) {
+ benchmarkIndexHard(b, []byte("<pre><b>hello</b><strong>world</strong></pre>"))
+}
+
+func BenchmarkLastIndexHard1(b *testing.B) { benchmarkLastIndexHard(b, []byte("<>")) }
+func BenchmarkLastIndexHard2(b *testing.B) { benchmarkLastIndexHard(b, []byte("</pre>")) }
+func BenchmarkLastIndexHard3(b *testing.B) { benchmarkLastIndexHard(b, []byte("<b>hello world</b>")) }
+
+func BenchmarkCountHard1(b *testing.B) { benchmarkCountHard(b, []byte("<>")) }
+func BenchmarkCountHard2(b *testing.B) { benchmarkCountHard(b, []byte("</pre>")) }
+func BenchmarkCountHard3(b *testing.B) { benchmarkCountHard(b, []byte("<b>hello world</b>")) }
+
func BenchmarkSplitEmptySeparator(b *testing.B) {
for i := 0; i < b.N; i++ {
Split(benchInputHard, nil)
diff --git a/libgo/go/bytes/example_test.go b/libgo/go/bytes/example_test.go
index 6d32837..5ba7077 100644
--- a/libgo/go/bytes/example_test.go
+++ b/libgo/go/bytes/example_test.go
@@ -365,6 +365,16 @@ func ExampleToTitle() {
// ХЛЕБ
}
+func ExampleToTitleSpecial() {
+ str := []byte("ahoj vývojári golang")
+ totitle := bytes.ToTitleSpecial(unicode.AzeriCase, str)
+ fmt.Println("Original : " + string(str))
+ fmt.Println("ToTitle : " + string(totitle))
+ // Output:
+ // Original : ahoj vývojári golang
+ // ToTitle : AHOJ VÝVOJÁRİ GOLANG
+}
+
func ExampleTrim() {
fmt.Printf("[%q]", bytes.Trim([]byte(" !!! Achtung! Achtung! !!! "), "! "))
// Output: ["Achtung! Achtung"]
@@ -438,11 +448,31 @@ func ExampleToUpper() {
// Output: GOPHER
}
+func ExampleToUpperSpecial() {
+ str := []byte("ahoj vývojári golang")
+ totitle := bytes.ToUpperSpecial(unicode.AzeriCase, str)
+ fmt.Println("Original : " + string(str))
+ fmt.Println("ToUpper : " + string(totitle))
+ // Output:
+ // Original : ahoj vývojári golang
+ // ToUpper : AHOJ VÝVOJÁRİ GOLANG
+}
+
func ExampleToLower() {
fmt.Printf("%s", bytes.ToLower([]byte("Gopher")))
// Output: gopher
}
+func ExampleToLowerSpecial() {
+ str := []byte("AHOJ VÝVOJÁRİ GOLANG")
+ totitle := bytes.ToLowerSpecial(unicode.AzeriCase, str)
+ fmt.Println("Original : " + string(str))
+ fmt.Println("ToLower : " + string(totitle))
+ // Output:
+ // Original : AHOJ VÝVOJÁRİ GOLANG
+ // ToLower : ahoj vývojári golang
+}
+
func ExampleReader_Len() {
fmt.Println(bytes.NewReader([]byte("Hi!")).Len())
fmt.Println(bytes.NewReader([]byte("こんにちは!")).Len())
diff --git a/libgo/go/bytes/export_test.go b/libgo/go/bytes/export_test.go
index f61523e..b65428d 100644
--- a/libgo/go/bytes/export_test.go
+++ b/libgo/go/bytes/export_test.go
@@ -6,4 +6,3 @@ package bytes
// Export func for testing
var IndexBytePortable = indexBytePortable
-var EqualPortable = equalPortable