aboutsummaryrefslogtreecommitdiff
path: root/libgo/go/bytes/bytes.go
diff options
context:
space:
mode:
authorIan Lance Taylor <iant@golang.org>2022-02-11 14:53:56 -0800
committerIan Lance Taylor <iant@golang.org>2022-02-11 15:01:19 -0800
commit8dc2499aa62f768c6395c9754b8cabc1ce25c494 (patch)
tree43d7fd2bbfd7ad8c9625a718a5e8718889351994 /libgo/go/bytes/bytes.go
parent9a56779dbc4e2d9c15be8d31e36f2f59be7331a8 (diff)
downloadgcc-8dc2499aa62f768c6395c9754b8cabc1ce25c494.zip
gcc-8dc2499aa62f768c6395c9754b8cabc1ce25c494.tar.gz
gcc-8dc2499aa62f768c6395c9754b8cabc1ce25c494.tar.bz2
libgo: update to Go1.18beta2
gotools/ * Makefile.am (go_cmd_cgo_files): Add ast_go118.go (check-go-tool): Copy golang.org/x/tools directories. * Makefile.in: Regenerate. Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/384695
Diffstat (limited to 'libgo/go/bytes/bytes.go')
-rw-r--r--libgo/go/bytes/bytes.go146
1 files changed, 121 insertions, 25 deletions
diff --git a/libgo/go/bytes/bytes.go b/libgo/go/bytes/bytes.go
index ce52649f1..6fdaa49 100644
--- a/libgo/go/bytes/bytes.go
+++ b/libgo/go/bytes/bytes.go
@@ -21,7 +21,7 @@ func Equal(a, b []byte) bool {
}
// Compare returns an integer comparing two byte slices lexicographically.
-// The result will be 0 if a==b, -1 if a < b, and +1 if a > b.
+// The result will be 0 if a == b, -1 if a < b, and +1 if a > b.
// A nil argument is equivalent to an empty slice.
func Compare(a, b []byte) int {
return bytealg.Compare(a, b)
@@ -699,7 +699,7 @@ func ToValidUTF8(s, replacement []byte) []byte {
if c < utf8.RuneSelf {
i++
invalid = false
- b = append(b, byte(c))
+ b = append(b, c)
continue
}
_, wid := utf8.DecodeRune(s[i:])
@@ -746,7 +746,8 @@ func isSeparator(r rune) bool {
// Title treats s as UTF-8-encoded bytes and returns a copy with all Unicode letters that begin
// words mapped to their title case.
//
-// BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly.
+// Deprecated: The rule Title uses for word boundaries does not handle Unicode
+// punctuation properly. Use golang.org/x/text/cases instead.
func Title(s []byte) []byte {
// Use a closure here to remember state.
// Hackish but effective. Depends on Map scanning in order and calling
@@ -867,6 +868,8 @@ func lastIndexFunc(s []byte, f func(r rune) bool, truth bool) int {
// most-significant bit of the highest word, map to the full range of all
// 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed,
// ensuring that any non-ASCII character will be reported as not in the set.
+// This allocates a total of 32 bytes even though the upper half
+// is unused to avoid bounds checks in asciiSet.contains.
type asciiSet [8]uint32
// makeASCIISet creates a set of ASCII characters and reports whether all
@@ -877,53 +880,133 @@ func makeASCIISet(chars string) (as asciiSet, ok bool) {
if c >= utf8.RuneSelf {
return as, false
}
- as[c>>5] |= 1 << uint(c&31)
+ as[c/32] |= 1 << (c % 32)
}
return as, true
}
// contains reports whether c is inside the set.
func (as *asciiSet) contains(c byte) bool {
- return (as[c>>5] & (1 << uint(c&31))) != 0
+ return (as[c/32] & (1 << (c % 32))) != 0
}
-func makeCutsetFunc(cutset string) func(r rune) bool {
- if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
- return func(r rune) bool {
- return r == rune(cutset[0])
- }
- }
- if as, isASCII := makeASCIISet(cutset); isASCII {
- return func(r rune) bool {
- return r < utf8.RuneSelf && as.contains(byte(r))
- }
- }
- return func(r rune) bool {
- for _, c := range cutset {
- if c == r {
- return true
- }
+// containsRune is a simplified version of strings.ContainsRune
+// to avoid importing the strings package.
+// We avoid bytes.ContainsRune to avoid allocating a temporary copy of s.
+func containsRune(s string, r rune) bool {
+ for _, c := range s {
+ if c == r {
+ return true
}
- return false
}
+ return false
}
// Trim returns a subslice of s by slicing off all leading and
// trailing UTF-8-encoded code points contained in cutset.
func Trim(s []byte, cutset string) []byte {
- return TrimFunc(s, makeCutsetFunc(cutset))
+ if len(s) == 0 || cutset == "" {
+ return s
+ }
+ if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
+ return trimLeftByte(trimRightByte(s, cutset[0]), cutset[0])
+ }
+ if as, ok := makeASCIISet(cutset); ok {
+ return trimLeftASCII(trimRightASCII(s, &as), &as)
+ }
+ return trimLeftUnicode(trimRightUnicode(s, cutset), cutset)
}
// TrimLeft returns a subslice of s by slicing off all leading
// UTF-8-encoded code points contained in cutset.
func TrimLeft(s []byte, cutset string) []byte {
- return TrimLeftFunc(s, makeCutsetFunc(cutset))
+ if len(s) == 0 || cutset == "" {
+ return s
+ }
+ if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
+ return trimLeftByte(s, cutset[0])
+ }
+ if as, ok := makeASCIISet(cutset); ok {
+ return trimLeftASCII(s, &as)
+ }
+ return trimLeftUnicode(s, cutset)
+}
+
+func trimLeftByte(s []byte, c byte) []byte {
+ for len(s) > 0 && s[0] == c {
+ s = s[1:]
+ }
+ return s
+}
+
+func trimLeftASCII(s []byte, as *asciiSet) []byte {
+ for len(s) > 0 {
+ if !as.contains(s[0]) {
+ break
+ }
+ s = s[1:]
+ }
+ return s
+}
+
+func trimLeftUnicode(s []byte, cutset string) []byte {
+ for len(s) > 0 {
+ r, n := rune(s[0]), 1
+ if r >= utf8.RuneSelf {
+ r, n = utf8.DecodeRune(s)
+ }
+ if !containsRune(cutset, r) {
+ break
+ }
+ s = s[n:]
+ }
+ return s
}
// TrimRight returns a subslice of s by slicing off all trailing
// UTF-8-encoded code points that are contained in cutset.
func TrimRight(s []byte, cutset string) []byte {
- return TrimRightFunc(s, makeCutsetFunc(cutset))
+ if len(s) == 0 || cutset == "" {
+ return s
+ }
+ if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
+ return trimRightByte(s, cutset[0])
+ }
+ if as, ok := makeASCIISet(cutset); ok {
+ return trimRightASCII(s, &as)
+ }
+ return trimRightUnicode(s, cutset)
+}
+
+func trimRightByte(s []byte, c byte) []byte {
+ for len(s) > 0 && s[len(s)-1] == c {
+ s = s[:len(s)-1]
+ }
+ return s
+}
+
+func trimRightASCII(s []byte, as *asciiSet) []byte {
+ for len(s) > 0 {
+ if !as.contains(s[len(s)-1]) {
+ break
+ }
+ s = s[:len(s)-1]
+ }
+ return s
+}
+
+func trimRightUnicode(s []byte, cutset string) []byte {
+ for len(s) > 0 {
+ r, n := rune(s[len(s)-1]), 1
+ if r >= utf8.RuneSelf {
+ r, n = utf8.DecodeLastRune(s)
+ }
+ if !containsRune(cutset, r) {
+ break
+ }
+ s = s[:len(s)-n]
+ }
+ return s
}
// TrimSpace returns a subslice of s by slicing off all leading and
@@ -1174,3 +1257,16 @@ func Index(s, sep []byte) int {
}
return -1
}
+
+// Cut slices s around the first instance of sep,
+// returning the text before and after sep.
+// The found result reports whether sep appears in s.
+// If sep does not appear in s, cut returns s, nil, false.
+//
+// Cut returns slices of the original slice s, not copies.
+func Cut(s, sep []byte) (before, after []byte, found bool) {
+ if i := Index(s, sep); i >= 0 {
+ return s[:i], s[i+len(sep):], true
+ }
+ return s, nil, false
+}