From 8dc2499aa62f768c6395c9754b8cabc1ce25c494 Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Fri, 11 Feb 2022 14:53:56 -0800 Subject: libgo: update to Go1.18beta2 gotools/ * Makefile.am (go_cmd_cgo_files): Add ast_go118.go (check-go-tool): Copy golang.org/x/tools directories. * Makefile.in: Regenerate. Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/384695 --- libgo/go/strings/strings.go | 120 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 100 insertions(+), 20 deletions(-) (limited to 'libgo/go/strings/strings.go') diff --git a/libgo/go/strings/strings.go b/libgo/go/strings/strings.go index b429735..c5a29e9 100644 --- a/libgo/go/strings/strings.go +++ b/libgo/go/strings/strings.go @@ -706,7 +706,8 @@ func isSeparator(r rune) bool { // Title returns a copy of the string s with all Unicode letters that begin words // mapped to their Unicode title case. // -// BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly. +// Deprecated: The rule Title uses for word boundaries does not handle Unicode +// punctuation properly. Use golang.org/x/text/cases instead. func Title(s string) string { // Use a closure here to remember state. // Hackish but effective. Depends on Map scanning in order and calling @@ -797,6 +798,8 @@ func lastIndexFunc(s string, f func(rune) bool, truth bool) int { // most-significant bit of the highest word, map to the full range of all // 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed, // ensuring that any non-ASCII character will be reported as not in the set. +// This allocates a total of 32 bytes even though the upper half +// is unused to avoid bounds checks in asciiSet.contains. type asciiSet [8]uint32 // makeASCIISet creates a set of ASCII characters and reports whether all @@ -807,28 +810,14 @@ func makeASCIISet(chars string) (as asciiSet, ok bool) { if c >= utf8.RuneSelf { return as, false } - as[c>>5] |= 1 << uint(c&31) + as[c/32] |= 1 << (c % 32) } return as, true } // contains reports whether c is inside the set. func (as *asciiSet) contains(c byte) bool { - return (as[c>>5] & (1 << uint(c&31))) != 0 -} - -func makeCutsetFunc(cutset string) func(rune) bool { - if len(cutset) == 1 && cutset[0] < utf8.RuneSelf { - return func(r rune) bool { - return r == rune(cutset[0]) - } - } - if as, isASCII := makeASCIISet(cutset); isASCII { - return func(r rune) bool { - return r < utf8.RuneSelf && as.contains(byte(r)) - } - } - return func(r rune) bool { return IndexRune(cutset, r) >= 0 } + return (as[c/32] & (1 << (c % 32))) != 0 } // Trim returns a slice of the string s with all leading and @@ -837,7 +826,13 @@ func Trim(s, cutset string) string { if s == "" || cutset == "" { return s } - return TrimFunc(s, makeCutsetFunc(cutset)) + if len(cutset) == 1 && cutset[0] < utf8.RuneSelf { + return trimLeftByte(trimRightByte(s, cutset[0]), cutset[0]) + } + if as, ok := makeASCIISet(cutset); ok { + return trimLeftASCII(trimRightASCII(s, &as), &as) + } + return trimLeftUnicode(trimRightUnicode(s, cutset), cutset) } // TrimLeft returns a slice of the string s with all leading @@ -848,7 +843,44 @@ func TrimLeft(s, cutset string) string { if s == "" || cutset == "" { return s } - return TrimLeftFunc(s, makeCutsetFunc(cutset)) + if len(cutset) == 1 && cutset[0] < utf8.RuneSelf { + return trimLeftByte(s, cutset[0]) + } + if as, ok := makeASCIISet(cutset); ok { + return trimLeftASCII(s, &as) + } + return trimLeftUnicode(s, cutset) +} + +func trimLeftByte(s string, c byte) string { + for len(s) > 0 && s[0] == c { + s = s[1:] + } + return s +} + +func trimLeftASCII(s string, as *asciiSet) string { + for len(s) > 0 { + if !as.contains(s[0]) { + break + } + s = s[1:] + } + return s +} + +func trimLeftUnicode(s, cutset string) string { + for len(s) > 0 { + r, n := rune(s[0]), 1 + if r >= utf8.RuneSelf { + r, n = utf8.DecodeRuneInString(s) + } + if !ContainsRune(cutset, r) { + break + } + s = s[n:] + } + return s } // TrimRight returns a slice of the string s, with all trailing @@ -859,7 +891,44 @@ func TrimRight(s, cutset string) string { if s == "" || cutset == "" { return s } - return TrimRightFunc(s, makeCutsetFunc(cutset)) + if len(cutset) == 1 && cutset[0] < utf8.RuneSelf { + return trimRightByte(s, cutset[0]) + } + if as, ok := makeASCIISet(cutset); ok { + return trimRightASCII(s, &as) + } + return trimRightUnicode(s, cutset) +} + +func trimRightByte(s string, c byte) string { + for len(s) > 0 && s[len(s)-1] == c { + s = s[:len(s)-1] + } + return s +} + +func trimRightASCII(s string, as *asciiSet) string { + for len(s) > 0 { + if !as.contains(s[len(s)-1]) { + break + } + s = s[:len(s)-1] + } + return s +} + +func trimRightUnicode(s, cutset string) string { + for len(s) > 0 { + r, n := rune(s[len(s)-1]), 1 + if r >= utf8.RuneSelf { + r, n = utf8.DecodeLastRuneInString(s) + } + if !ContainsRune(cutset, r) { + break + } + s = s[:len(s)-n] + } + return s } // TrimSpace returns a slice of the string s, with all leading @@ -1100,3 +1169,14 @@ func Index(s, substr string) int { } return -1 } + +// Cut slices s around the first instance of sep, +// returning the text before and after sep. +// The found result reports whether sep appears in s. +// If sep does not appear in s, cut returns s, "", false. +func Cut(s, sep string) (before, after string, found bool) { + if i := Index(s, sep); i >= 0 { + return s[:i], s[i+len(sep):], true + } + return s, "", false +} -- cgit v1.1