aboutsummaryrefslogtreecommitdiff
path: root/libgo/go/strings
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/go/strings')
-rw-r--r--libgo/go/strings/example_test.go6
-rw-r--r--libgo/go/strings/replace.go2
-rw-r--r--libgo/go/strings/strings.go147
-rw-r--r--libgo/go/strings/strings_test.go145
4 files changed, 253 insertions, 47 deletions
diff --git a/libgo/go/strings/example_test.go b/libgo/go/strings/example_test.go
index e31054a..4f3a1ce 100644
--- a/libgo/go/strings/example_test.go
+++ b/libgo/go/strings/example_test.go
@@ -47,12 +47,16 @@ func ExampleContains() {
func ExampleContainsAny() {
fmt.Println(strings.ContainsAny("team", "i"))
- fmt.Println(strings.ContainsAny("failure", "u & i"))
+ fmt.Println(strings.ContainsAny("fail", "ui"))
+ fmt.Println(strings.ContainsAny("ure", "ui"))
+ fmt.Println(strings.ContainsAny("failure", "ui"))
fmt.Println(strings.ContainsAny("foo", ""))
fmt.Println(strings.ContainsAny("", ""))
// Output:
// false
// true
+ // true
+ // true
// false
// false
}
diff --git a/libgo/go/strings/replace.go b/libgo/go/strings/replace.go
index ace0b8d..ccab1fb 100644
--- a/libgo/go/strings/replace.go
+++ b/libgo/go/strings/replace.go
@@ -26,6 +26,8 @@ type replacer interface {
// NewReplacer returns a new Replacer from a list of old, new string
// pairs. Replacements are performed in the order they appear in the
// target string, without overlapping matches.
+//
+// NewReplacer panics if given an odd number of arguments.
func NewReplacer(oldnew ...string) *Replacer {
if len(oldnew)%2 == 1 {
panic("strings.NewReplacer: odd argument count")
diff --git a/libgo/go/strings/strings.go b/libgo/go/strings/strings.go
index a98f5d8..7337481 100644
--- a/libgo/go/strings/strings.go
+++ b/libgo/go/strings/strings.go
@@ -341,38 +341,38 @@ func Fields(s string) []string {
wasSpace = isSpace
}
- if setBits < utf8.RuneSelf { // ASCII fast path
- a := make([]string, n)
- na := 0
- fieldStart := 0
- i := 0
- // Skip spaces in the front of the input.
- for i < len(s) && asciiSpace[s[i]] != 0 {
+ if setBits >= utf8.RuneSelf {
+ // Some runes in the input string are not ASCII.
+ return FieldsFunc(s, unicode.IsSpace)
+ }
+ // ASCII fast path
+ a := make([]string, n)
+ na := 0
+ fieldStart := 0
+ i := 0
+ // Skip spaces in the front of the input.
+ for i < len(s) && asciiSpace[s[i]] != 0 {
+ i++
+ }
+ fieldStart = i
+ for i < len(s) {
+ if asciiSpace[s[i]] == 0 {
i++
+ continue
}
- fieldStart = i
- for i < len(s) {
- if asciiSpace[s[i]] == 0 {
- i++
- continue
- }
- a[na] = s[fieldStart:i]
- na++
+ a[na] = s[fieldStart:i]
+ na++
+ i++
+ // Skip spaces in between fields.
+ for i < len(s) && asciiSpace[s[i]] != 0 {
i++
- // Skip spaces in between fields.
- for i < len(s) && asciiSpace[s[i]] != 0 {
- i++
- }
- fieldStart = i
}
- if fieldStart < len(s) { // Last field might end at EOF.
- a[na] = s[fieldStart:]
- }
- return a
+ fieldStart = i
}
-
- // Some runes in the input string are not ASCII.
- return FieldsFunc(s, unicode.IsSpace)
+ if fieldStart < len(s) { // Last field might end at EOF.
+ a[na] = s[fieldStart:]
+ }
+ return a
}
// FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c)
@@ -550,7 +550,7 @@ func Repeat(s string, count int) string {
return b.String()
}
-// ToUpper returns a copy of the string s with all Unicode letters mapped to their upper case.
+// ToUpper returns s with all Unicode letters mapped to their upper case.
func ToUpper(s string) string {
isASCII, hasLower := true, false
for i := 0; i < len(s); i++ {
@@ -559,7 +559,7 @@ func ToUpper(s string) string {
isASCII = false
break
}
- hasLower = hasLower || (c >= 'a' && c <= 'z')
+ hasLower = hasLower || ('a' <= c && c <= 'z')
}
if isASCII { // optimize for ASCII-only strings.
@@ -570,7 +570,7 @@ func ToUpper(s string) string {
b.Grow(len(s))
for i := 0; i < len(s); i++ {
c := s[i]
- if c >= 'a' && c <= 'z' {
+ if 'a' <= c && c <= 'z' {
c -= 'a' - 'A'
}
b.WriteByte(c)
@@ -580,7 +580,7 @@ func ToUpper(s string) string {
return Map(unicode.ToUpper, s)
}
-// ToLower returns a copy of the string s with all Unicode letters mapped to their lower case.
+// ToLower returns s with all Unicode letters mapped to their lower case.
func ToLower(s string) string {
isASCII, hasUpper := true, false
for i := 0; i < len(s); i++ {
@@ -589,7 +589,7 @@ func ToLower(s string) string {
isASCII = false
break
}
- hasUpper = hasUpper || (c >= 'A' && c <= 'Z')
+ hasUpper = hasUpper || ('A' <= c && c <= 'Z')
}
if isASCII { // optimize for ASCII-only strings.
@@ -600,7 +600,7 @@ func ToLower(s string) string {
b.Grow(len(s))
for i := 0; i < len(s); i++ {
c := s[i]
- if c >= 'A' && c <= 'Z' {
+ if 'A' <= c && c <= 'Z' {
c += 'a' - 'A'
}
b.WriteByte(c)
@@ -631,6 +631,56 @@ func ToTitleSpecial(c unicode.SpecialCase, s string) string {
return Map(c.ToTitle, s)
}
+// ToValidUTF8 returns a copy of the string s with each run of invalid UTF-8 byte sequences
+// replaced by the replacement string, which may be empty.
+func ToValidUTF8(s, replacement string) string {
+ var b Builder
+
+ for i, c := range s {
+ if c != utf8.RuneError {
+ continue
+ }
+
+ _, wid := utf8.DecodeRuneInString(s[i:])
+ if wid == 1 {
+ b.Grow(len(s) + len(replacement))
+ b.WriteString(s[:i])
+ s = s[i:]
+ break
+ }
+ }
+
+ // Fast path for unchanged input
+ if b.Cap() == 0 { // didn't call b.Grow above
+ return s
+ }
+
+ invalid := false // previous byte was from an invalid UTF-8 sequence
+ for i := 0; i < len(s); {
+ c := s[i]
+ if c < utf8.RuneSelf {
+ i++
+ invalid = false
+ b.WriteByte(c)
+ continue
+ }
+ _, wid := utf8.DecodeRuneInString(s[i:])
+ if wid == 1 {
+ i++
+ if !invalid {
+ invalid = true
+ b.WriteString(replacement)
+ }
+ continue
+ }
+ invalid = false
+ b.WriteString(s[i : i+wid])
+ i += wid
+ }
+
+ return b.String()
+}
+
// isSeparator reports whether the rune could mark a word boundary.
// TODO: update when package unicode captures more of the properties.
func isSeparator(r rune) bool {
@@ -818,7 +868,36 @@ func TrimRight(s string, cutset string) string {
// TrimSpace returns a slice of the string s, with all leading
// and trailing white space removed, as defined by Unicode.
func TrimSpace(s string) string {
- return TrimFunc(s, unicode.IsSpace)
+ // Fast path for ASCII: look for the first ASCII non-space byte
+ start := 0
+ for ; start < len(s); start++ {
+ c := s[start]
+ if c >= utf8.RuneSelf {
+ // If we run into a non-ASCII byte, fall back to the
+ // slower unicode-aware method on the remaining bytes
+ return TrimFunc(s[start:], unicode.IsSpace)
+ }
+ if asciiSpace[c] == 0 {
+ break
+ }
+ }
+
+ // Now look for the first ASCII non-space byte from the end
+ stop := len(s)
+ for ; stop > start; stop-- {
+ c := s[stop-1]
+ if c >= utf8.RuneSelf {
+ return TrimFunc(s[start:stop], unicode.IsSpace)
+ }
+ if asciiSpace[c] == 0 {
+ break
+ }
+ }
+
+ // At this point s[start:stop] starts and ends with an ASCII
+ // non-space bytes, so we're done. Non-ASCII cases have already
+ // been handled above.
+ return s[start:stop]
}
// TrimPrefix returns s without the provided leading prefix string.
diff --git a/libgo/go/strings/strings_test.go b/libgo/go/strings/strings_test.go
index ae17eba..76d827b 100644
--- a/libgo/go/strings/strings_test.go
+++ b/libgo/go/strings/strings_test.go
@@ -200,6 +200,18 @@ func TestLastIndex(t *testing.T) { runIndexTests(t, LastIndex, "LastIndex", l
func TestIndexAny(t *testing.T) { runIndexTests(t, IndexAny, "IndexAny", indexAnyTests) }
func TestLastIndexAny(t *testing.T) { runIndexTests(t, LastIndexAny, "LastIndexAny", lastIndexAnyTests) }
+func TestIndexByte(t *testing.T) {
+ for _, tt := range indexTests {
+ if len(tt.sep) != 1 {
+ continue
+ }
+ pos := IndexByte(tt.s, tt.sep[0])
+ if pos != tt.out {
+ t.Errorf(`IndexByte(%q, %q) = %v; want %v`, tt.s, tt.sep[0], pos, tt.out)
+ }
+ }
+}
+
func TestLastIndexByte(t *testing.T) {
testCases := []IndexTest{
{"", "q", -1},
@@ -697,6 +709,36 @@ func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTest
func TestToLower(t *testing.T) { runStringTests(t, ToLower, "ToLower", lowerTests) }
+var toValidUTF8Tests = []struct {
+ in string
+ repl string
+ out string
+}{
+ {"", "\uFFFD", ""},
+ {"abc", "\uFFFD", "abc"},
+ {"\uFDDD", "\uFFFD", "\uFDDD"},
+ {"a\xffb", "\uFFFD", "a\uFFFDb"},
+ {"a\xffb\uFFFD", "X", "aXb\uFFFD"},
+ {"a☺\xffb☺\xC0\xAFc☺\xff", "", "a☺b☺c☺"},
+ {"a☺\xffb☺\xC0\xAFc☺\xff", "日本語", "a☺日本語b☺日本語c☺日本語"},
+ {"\xC0\xAF", "\uFFFD", "\uFFFD"},
+ {"\xE0\x80\xAF", "\uFFFD", "\uFFFD"},
+ {"\xed\xa0\x80", "abc", "abc"},
+ {"\xed\xbf\xbf", "\uFFFD", "\uFFFD"},
+ {"\xF0\x80\x80\xaf", "☺", "☺"},
+ {"\xF8\x80\x80\x80\xAF", "\uFFFD", "\uFFFD"},
+ {"\xFC\x80\x80\x80\x80\xAF", "\uFFFD", "\uFFFD"},
+}
+
+func TestToValidUTF8(t *testing.T) {
+ for _, tc := range toValidUTF8Tests {
+ got := ToValidUTF8(tc.in, tc.repl)
+ if got != tc.out {
+ t.Errorf("ToValidUTF8(%q, %q) = %q; want %q", tc.in, tc.repl, got, tc.out)
+ }
+ }
+}
+
func BenchmarkToUpper(b *testing.B) {
for _, tc := range upperTests {
b.Run(tc.in, func(b *testing.B) {
@@ -843,6 +885,26 @@ func BenchmarkTrim(b *testing.B) {
}
}
+func BenchmarkToValidUTF8(b *testing.B) {
+ tests := []struct {
+ name string
+ input string
+ }{
+ {"Valid", "typical"},
+ {"InvalidASCII", "foo\xffbar"},
+ {"InvalidNonASCII", "日本語\xff日本語"},
+ }
+ replacement := "\uFFFD"
+ b.ResetTimer()
+ for _, test := range tests {
+ b.Run(test.name, func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ ToValidUTF8(test.input, replacement)
+ }
+ })
+ }
+}
+
type predicate struct {
f func(rune) bool
name string
@@ -868,23 +930,66 @@ func not(p predicate) predicate {
}
var trimFuncTests = []struct {
- f predicate
- in, out string
+ f predicate
+ in string
+ trimOut string
+ leftOut string
+ rightOut string
}{
- {isSpace, space + " hello " + space, "hello"},
- {isDigit, "\u0e50\u0e5212hello34\u0e50\u0e51", "hello"},
- {isUpper, "\u2C6F\u2C6F\u2C6F\u2C6FABCDhelloEF\u2C6F\u2C6FGH\u2C6F\u2C6F", "hello"},
- {not(isSpace), "hello" + space + "hello", space},
- {not(isDigit), "hello\u0e50\u0e521234\u0e50\u0e51helo", "\u0e50\u0e521234\u0e50\u0e51"},
- {isValidRune, "ab\xc0a\xc0cd", "\xc0a\xc0"},
- {not(isValidRune), "\xc0a\xc0", "a"},
+ {isSpace, space + " hello " + space,
+ "hello",
+ "hello " + space,
+ space + " hello"},
+ {isDigit, "\u0e50\u0e5212hello34\u0e50\u0e51",
+ "hello",
+ "hello34\u0e50\u0e51",
+ "\u0e50\u0e5212hello"},
+ {isUpper, "\u2C6F\u2C6F\u2C6F\u2C6FABCDhelloEF\u2C6F\u2C6FGH\u2C6F\u2C6F",
+ "hello",
+ "helloEF\u2C6F\u2C6FGH\u2C6F\u2C6F",
+ "\u2C6F\u2C6F\u2C6F\u2C6FABCDhello"},
+ {not(isSpace), "hello" + space + "hello",
+ space,
+ space + "hello",
+ "hello" + space},
+ {not(isDigit), "hello\u0e50\u0e521234\u0e50\u0e51helo",
+ "\u0e50\u0e521234\u0e50\u0e51",
+ "\u0e50\u0e521234\u0e50\u0e51helo",
+ "hello\u0e50\u0e521234\u0e50\u0e51"},
+ {isValidRune, "ab\xc0a\xc0cd",
+ "\xc0a\xc0",
+ "\xc0a\xc0cd",
+ "ab\xc0a\xc0"},
+ {not(isValidRune), "\xc0a\xc0",
+ "a",
+ "a\xc0",
+ "\xc0a"},
+ {isSpace, "",
+ "",
+ "",
+ ""},
+ {isSpace, " ",
+ "",
+ "",
+ ""},
}
func TestTrimFunc(t *testing.T) {
for _, tc := range trimFuncTests {
- actual := TrimFunc(tc.in, tc.f.f)
- if actual != tc.out {
- t.Errorf("TrimFunc(%q, %q) = %q; want %q", tc.in, tc.f.name, actual, tc.out)
+ trimmers := []struct {
+ name string
+ trim func(s string, f func(r rune) bool) string
+ out string
+ }{
+ {"TrimFunc", TrimFunc, tc.trimOut},
+ {"TrimLeftFunc", TrimLeftFunc, tc.leftOut},
+ {"TrimRightFunc", TrimRightFunc, tc.rightOut},
+ }
+ for _, trimmer := range trimmers {
+ actual := trimmer.trim(tc.in, tc.f.f)
+ if actual != trimmer.out {
+ t.Errorf("%s(%q, %q) = %q; want %q", trimmer.name, tc.in, tc.f.name, actual, trimmer.out)
+ }
}
}
}
@@ -1735,3 +1840,19 @@ func BenchmarkJoin(b *testing.B) {
})
}
}
+
+func BenchmarkTrimSpace(b *testing.B) {
+ tests := []struct{ name, input string }{
+ {"NoTrim", "typical"},
+ {"ASCII", " foo bar "},
+ {"SomeNonASCII", " \u2000\t\r\n x\t\t\r\r\ny\n \u3000 "},
+ {"JustNonASCII", "\u2000\u2000\u2000☺☺☺☺\u3000\u3000\u3000"},
+ }
+ for _, test := range tests {
+ b.Run(test.name, func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ TrimSpace(test.input)
+ }
+ })
+ }
+}