diff options
author | Ian Lance Taylor <ian@gcc.gnu.org> | 2011-10-26 23:57:58 +0000 |
---|---|---|
committer | Ian Lance Taylor <ian@gcc.gnu.org> | 2011-10-26 23:57:58 +0000 |
commit | d8f412571f8768df2d3239e72392dfeabbad1559 (patch) | |
tree | 19d182df05ead7ff8ba7ee00a7d57555e1383fdf /libgo/go/strings | |
parent | e0c39d66d4f0607177b1cf8995dda56a667e07b3 (diff) | |
download | gcc-d8f412571f8768df2d3239e72392dfeabbad1559.zip gcc-d8f412571f8768df2d3239e72392dfeabbad1559.tar.gz gcc-d8f412571f8768df2d3239e72392dfeabbad1559.tar.bz2 |
Update Go library to last weekly.
From-SVN: r180552
Diffstat (limited to 'libgo/go/strings')
-rw-r--r-- | libgo/go/strings/export_test.go | 9 | ||||
-rw-r--r-- | libgo/go/strings/replace.go | 315 | ||||
-rw-r--r-- | libgo/go/strings/replace_test.go | 174 | ||||
-rw-r--r-- | libgo/go/strings/strings.go | 55 | ||||
-rw-r--r-- | libgo/go/strings/strings_test.go | 107 |
5 files changed, 611 insertions, 49 deletions
diff --git a/libgo/go/strings/export_test.go b/libgo/go/strings/export_test.go new file mode 100644 index 0000000..dcfec51 --- /dev/null +++ b/libgo/go/strings/export_test.go @@ -0,0 +1,9 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package strings + +func (r *Replacer) Replacer() interface{} { + return r.r +} diff --git a/libgo/go/strings/replace.go b/libgo/go/strings/replace.go new file mode 100644 index 0000000..64a7f20 --- /dev/null +++ b/libgo/go/strings/replace.go @@ -0,0 +1,315 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package strings + +import ( + "io" + "os" +) + +// A Replacer replaces a list of strings with replacements. +type Replacer struct { + r replacer +} + +// replacer is the interface that a replacement algorithm needs to implement. +type replacer interface { + Replace(s string) string + WriteString(w io.Writer, s string) (n int, err os.Error) +} + +// byteBitmap represents bytes which are sought for replacement. +// byteBitmap is 256 bits wide, with a bit set for each old byte to be +// replaced. +type byteBitmap [256 / 32]uint32 + +func (m *byteBitmap) set(b byte) { + m[b>>5] |= uint32(1 << (b & 31)) +} + +// NewReplacer returns a new Replacer from a list of old, new string pairs. +// Replacements are performed in order, without overlapping matches. +func NewReplacer(oldnew ...string) *Replacer { + if len(oldnew)%2 == 1 { + panic("strings.NewReplacer: odd argument count") + } + + // Possible implementations. + var ( + bb byteReplacer + bs byteStringReplacer + gen genericReplacer + ) + + allOldBytes, allNewBytes := true, true + for len(oldnew) > 0 { + old, new := oldnew[0], oldnew[1] + oldnew = oldnew[2:] + if len(old) != 1 { + allOldBytes = false + } + if len(new) != 1 { + allNewBytes = false + } + + // generic + gen.p = append(gen.p, pair{old, new}) + + // byte -> string + if allOldBytes { + bs.old.set(old[0]) + bs.new[old[0]] = []byte(new) + } + + // byte -> byte + if allOldBytes && allNewBytes { + bb.old.set(old[0]) + bb.new[old[0]] = new[0] + } + } + + if allOldBytes && allNewBytes { + return &Replacer{r: &bb} + } + if allOldBytes { + return &Replacer{r: &bs} + } + return &Replacer{r: &gen} +} + +// Replace returns a copy of s with all replacements performed. +func (r *Replacer) Replace(s string) string { + return r.r.Replace(s) +} + +// WriteString writes s to w with all replacements performed. +func (r *Replacer) WriteString(w io.Writer, s string) (n int, err os.Error) { + return r.r.WriteString(w, s) +} + +// genericReplacer is the fully generic (and least optimized) algorithm. +// It's used as a fallback when nothing faster can be used. +type genericReplacer struct { + p []pair +} + +type pair struct{ old, new string } + +type appendSliceWriter struct { + b []byte +} + +func (w *appendSliceWriter) Write(p []byte) (int, os.Error) { + w.b = append(w.b, p...) + return len(p), nil +} + +func (r *genericReplacer) Replace(s string) string { + // TODO(bradfitz): optimized version + n, _ := r.WriteString(discard, s) + w := appendSliceWriter{make([]byte, 0, n)} + r.WriteString(&w, s) + return string(w.b) +} + +func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err os.Error) { + lastEmpty := false // the last replacement was of the empty string +Input: + // TODO(bradfitz): optimized version + for i := 0; i < len(s); { + for _, p := range r.p { + if p.old == "" && lastEmpty { + // Don't let old match twice in a row. + // (it doesn't advance the input and + // would otherwise loop forever) + continue + } + if HasPrefix(s[i:], p.old) { + if p.new != "" { + wn, err := w.Write([]byte(p.new)) + n += wn + if err != nil { + return n, err + } + } + i += len(p.old) + lastEmpty = p.old == "" + continue Input + } + } + wn, err := w.Write([]byte{s[i]}) + n += wn + if err != nil { + return n, err + } + i++ + } + + // Final empty match at end. + for _, p := range r.p { + if p.old == "" { + if p.new != "" { + wn, err := w.Write([]byte(p.new)) + n += wn + if err != nil { + return n, err + } + } + break + } + } + + return n, nil +} + +// byteReplacer is the implementation that's used when all the "old" +// and "new" values are single ASCII bytes. +type byteReplacer struct { + // old has a bit set for each old byte that should be replaced. + old byteBitmap + + // replacement byte, indexed by old byte. only valid if + // corresponding old bit is set. + new [256]byte +} + +func (r *byteReplacer) Replace(s string) string { + var buf []byte // lazily allocated + for i := 0; i < len(s); i++ { + b := s[i] + if r.old[b>>5]&uint32(1<<(b&31)) != 0 { + if buf == nil { + buf = []byte(s) + } + buf[i] = r.new[b] + } + } + if buf == nil { + return s + } + return string(buf) +} + +func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err os.Error) { + // TODO(bradfitz): use io.WriteString with slices of s, avoiding allocation. + bufsize := 32 << 10 + if len(s) < bufsize { + bufsize = len(s) + } + buf := make([]byte, bufsize) + + for len(s) > 0 { + ncopy := copy(buf, s[:]) + s = s[ncopy:] + for i, b := range buf[:ncopy] { + if r.old[b>>5]&uint32(1<<(b&31)) != 0 { + buf[i] = r.new[b] + } + } + wn, err := w.Write(buf[:ncopy]) + n += wn + if err != nil { + return n, err + } + } + return n, nil +} + +// byteStringReplacer is the implementation that's used when all the +// "old" values are single ASCII bytes but the "new" values vary in +// size. +type byteStringReplacer struct { + // old has a bit set for each old byte that should be replaced. + old byteBitmap + + // replacement string, indexed by old byte. only valid if + // corresponding old bit is set. + new [256][]byte +} + +func (r *byteStringReplacer) Replace(s string) string { + newSize := 0 + anyChanges := false + for i := 0; i < len(s); i++ { + b := s[i] + if r.old[b>>5]&uint32(1<<(b&31)) != 0 { + anyChanges = true + newSize += len(r.new[b]) + } else { + newSize++ + } + } + if !anyChanges { + return s + } + buf := make([]byte, newSize) + bi := buf + for i := 0; i < len(s); i++ { + b := s[i] + if r.old[b>>5]&uint32(1<<(b&31)) != 0 { + n := copy(bi[:], r.new[b]) + bi = bi[n:] + } else { + bi[0] = b + bi = bi[1:] + } + } + return string(buf) +} + +// WriteString maintains one buffer that's at most 32KB. The bytes in +// s are enumerated and the buffer is filled. If it reaches its +// capacity or a byte has a replacement, the buffer is flushed to w. +func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err os.Error) { + // TODO(bradfitz): use io.WriteString with slices of s instead. + bufsize := 32 << 10 + if len(s) < bufsize { + bufsize = len(s) + } + buf := make([]byte, bufsize) + bi := buf[:0] + + for i := 0; i < len(s); i++ { + b := s[i] + var new []byte + if r.old[b>>5]&uint32(1<<(b&31)) != 0 { + new = r.new[b] + } else { + bi = append(bi, b) + } + if len(bi) == cap(bi) || (len(bi) > 0 && len(new) > 0) { + nw, err := w.Write(bi) + n += nw + if err != nil { + return n, err + } + bi = buf[:0] + } + if len(new) > 0 { + nw, err := w.Write(new) + n += nw + if err != nil { + return n, err + } + } + } + if len(bi) > 0 { + nw, err := w.Write(bi) + n += nw + if err != nil { + return n, err + } + } + return n, nil +} + +// strings is too low-level to import io/ioutil +var discard io.Writer = devNull(0) + +type devNull int + +func (devNull) Write(p []byte) (int, os.Error) { + return len(p), nil +} diff --git a/libgo/go/strings/replace_test.go b/libgo/go/strings/replace_test.go new file mode 100644 index 0000000..e337856 --- /dev/null +++ b/libgo/go/strings/replace_test.go @@ -0,0 +1,174 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package strings_test + +import ( + "bytes" + "fmt" + "log" + . "strings" + "testing" +) + +var _ = log.Printf + +type ReplacerTest struct { + r *Replacer + in string + out string +} + +var htmlEscaper = NewReplacer("&", "&", "<", "<", ">", ">", "\"", """) + +// The http package's old HTML escaping function. +func oldhtmlEscape(s string) string { + s = Replace(s, "&", "&", -1) + s = Replace(s, "<", "<", -1) + s = Replace(s, ">", ">", -1) + s = Replace(s, "\"", """, -1) + s = Replace(s, "'", "'", -1) + return s +} + +var replacer = NewReplacer("aaa", "3[aaa]", "aa", "2[aa]", "a", "1[a]", "i", "i", + "longerst", "most long", "longer", "medium", "long", "short", + "X", "Y", "Y", "Z") + +var capitalLetters = NewReplacer("a", "A", "b", "B") + +var blankToXReplacer = NewReplacer("", "X", "o", "O") + +var ReplacerTests = []ReplacerTest{ + // byte->string + {htmlEscaper, "No changes", "No changes"}, + {htmlEscaper, "I <3 escaping & stuff", "I <3 escaping & stuff"}, + {htmlEscaper, "&&&", "&&&"}, + + // generic + {replacer, "fooaaabar", "foo3[aaa]b1[a]r"}, + {replacer, "long, longerst, longer", "short, most long, medium"}, + {replacer, "XiX", "YiY"}, + + // byte->byte + {capitalLetters, "brad", "BrAd"}, + {capitalLetters, Repeat("a", (32<<10)+123), Repeat("A", (32<<10)+123)}, + + // hitting "" special case + {blankToXReplacer, "oo", "XOXOX"}, +} + +func TestReplacer(t *testing.T) { + for i, tt := range ReplacerTests { + if s := tt.r.Replace(tt.in); s != tt.out { + t.Errorf("%d. Replace(%q) = %q, want %q", i, tt.in, s, tt.out) + } + var buf bytes.Buffer + n, err := tt.r.WriteString(&buf, tt.in) + if err != nil { + t.Errorf("%d. WriteString: %v", i, err) + continue + } + got := buf.String() + if got != tt.out { + t.Errorf("%d. WriteString(%q) wrote %q, want %q", i, tt.in, got, tt.out) + continue + } + if n != len(tt.out) { + t.Errorf("%d. WriteString(%q) wrote correct string but reported %d bytes; want %d (%q)", + i, tt.in, n, len(tt.out), tt.out) + } + } +} + +// pickAlgorithmTest is a test that verifies that given input for a +// Replacer that we pick the correct algorithm. +type pickAlgorithmTest struct { + r *Replacer + want string // name of algorithm +} + +var pickAlgorithmTests = []pickAlgorithmTest{ + {capitalLetters, "*strings.byteReplacer"}, + {NewReplacer("12", "123"), "*strings.genericReplacer"}, + {NewReplacer("1", "12"), "*strings.byteStringReplacer"}, + {htmlEscaper, "*strings.byteStringReplacer"}, +} + +func TestPickAlgorithm(t *testing.T) { + for i, tt := range pickAlgorithmTests { + got := fmt.Sprintf("%T", tt.r.Replacer()) + if got != tt.want { + t.Errorf("%d. algorithm = %s, want %s", i, got, tt.want) + } + } +} + +func BenchmarkGenericMatch(b *testing.B) { + str := Repeat("A", 100) + Repeat("B", 100) + generic := NewReplacer("a", "A", "b", "B", "12", "123") // varying lengths forces generic + for i := 0; i < b.N; i++ { + generic.Replace(str) + } +} + +func BenchmarkByteByteNoMatch(b *testing.B) { + str := Repeat("A", 100) + Repeat("B", 100) + for i := 0; i < b.N; i++ { + capitalLetters.Replace(str) + } +} + +func BenchmarkByteByteMatch(b *testing.B) { + str := Repeat("a", 100) + Repeat("b", 100) + for i := 0; i < b.N; i++ { + capitalLetters.Replace(str) + } +} + +func BenchmarkByteStringMatch(b *testing.B) { + str := "<" + Repeat("a", 99) + Repeat("b", 99) + ">" + for i := 0; i < b.N; i++ { + htmlEscaper.Replace(str) + } +} + +func BenchmarkHTMLEscapeNew(b *testing.B) { + str := "I <3 to escape HTML & other text too." + for i := 0; i < b.N; i++ { + htmlEscaper.Replace(str) + } +} + +func BenchmarkHTMLEscapeOld(b *testing.B) { + str := "I <3 to escape HTML & other text too." + for i := 0; i < b.N; i++ { + oldhtmlEscape(str) + } +} + +// BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces. +func BenchmarkByteByteReplaces(b *testing.B) { + str := Repeat("a", 100) + Repeat("b", 100) + for i := 0; i < b.N; i++ { + Replace(Replace(str, "a", "A", -1), "b", "B", -1) + } +} + +// BenchmarkByteByteMap compares byteByteImpl against Map. +func BenchmarkByteByteMap(b *testing.B) { + str := Repeat("a", 100) + Repeat("b", 100) + fn := func(r int) int { + switch r { + case 'a': + return int('A') + case 'b': + return int('B') + } + return r + } + for i := 0; i < b.N; i++ { + Map(fn, str) + } +} diff --git a/libgo/go/strings/strings.go b/libgo/go/strings/strings.go index c547297..58301fe 100644 --- a/libgo/go/strings/strings.go +++ b/libgo/go/strings/strings.go @@ -583,3 +583,58 @@ func Replace(s, old, new string, n int) string { w += copy(t[w:], s[start:]) return string(t[0:w]) } + +// EqualFold reports whether s and t, interpreted as UTF-8 strings, +// are equal under Unicode case-folding. +func EqualFold(s, t string) bool { + for s != "" && t != "" { + // Extract first rune from each string. + var sr, tr int + if s[0] < utf8.RuneSelf { + sr, s = int(s[0]), s[1:] + } else { + r, size := utf8.DecodeRuneInString(s) + sr, s = r, s[size:] + } + if t[0] < utf8.RuneSelf { + tr, t = int(t[0]), t[1:] + } else { + r, size := utf8.DecodeRuneInString(t) + tr, t = r, t[size:] + } + + // If they match, keep going; if not, return false. + + // Easy case. + if tr == sr { + continue + } + + // Make sr < tr to simplify what follows. + if tr < sr { + tr, sr = sr, tr + } + // Fast check for ASCII. + if tr < utf8.RuneSelf && 'A' <= sr && sr <= 'Z' { + // ASCII, and sr is upper case. tr must be lower case. + if tr == sr+'a'-'A' { + continue + } + return false + } + + // General case. SimpleFold(x) returns the next equivalent rune > x + // or wraps around to smaller values. + r := unicode.SimpleFold(sr) + for r != sr && r < tr { + r = unicode.SimpleFold(r) + } + if r == tr { + continue + } + return false + } + + // One string is empty. Are both? + return s == t +} diff --git a/libgo/go/strings/strings_test.go b/libgo/go/strings/strings_test.go index 409d4da..0859ddd 100644 --- a/libgo/go/strings/strings_test.go +++ b/libgo/go/strings/strings_test.go @@ -120,13 +120,11 @@ func TestLastIndex(t *testing.T) { runIndexTests(t, LastIndex, "LastIndex", l func TestIndexAny(t *testing.T) { runIndexTests(t, IndexAny, "IndexAny", indexAnyTests) } func TestLastIndexAny(t *testing.T) { runIndexTests(t, LastIndexAny, "LastIndexAny", lastIndexAnyTests) } -type IndexRuneTest struct { +var indexRuneTests = []struct { s string rune int out int -} - -var indexRuneTests = []IndexRuneTest{ +}{ {"a A x", 'A', 2}, {"some_text=some_value", '=', 9}, {"☺a", 'a', 3}, @@ -170,13 +168,11 @@ func BenchmarkIndex(b *testing.B) { } } -type ExplodeTest struct { +var explodetests = []struct { s string n int a []string -} - -var explodetests = []ExplodeTest{ +}{ {"", -1, []string{}}, {abcd, 4, []string{"a", "b", "c", "d"}}, {faces, 3, []string{"☺", "☻", "☹"}}, @@ -308,15 +304,16 @@ func TestFields(t *testing.T) { } } +var FieldsFuncTests = []FieldsTest{ + {"", []string{}}, + {"XX", []string{}}, + {"XXhiXXX", []string{"hi"}}, + {"aXXbXXXcX", []string{"a", "b", "c"}}, +} + func TestFieldsFunc(t *testing.T) { pred := func(c int) bool { return c == 'X' } - var fieldsFuncTests = []FieldsTest{ - {"", []string{}}, - {"XX", []string{}}, - {"XXhiXXX", []string{"hi"}}, - {"aXXbXXXcX", []string{"a", "b", "c"}}, - } - for _, tt := range fieldsFuncTests { + for _, tt := range FieldsFuncTests { a := FieldsFunc(tt.s, pred) if !eq(a, tt.a) { t.Errorf("FieldsFunc(%q) = %v, want %v", tt.s, a, tt.a) @@ -491,12 +488,10 @@ func TestSpecialCase(t *testing.T) { func TestTrimSpace(t *testing.T) { runStringTests(t, TrimSpace, "TrimSpace", trimSpaceTests) } -type TrimTest struct { +var trimTests = []struct { f func(string, string) string in, cutset, out string -} - -var trimTests = []TrimTest{ +}{ {Trim, "abba", "a", "bb"}, {Trim, "abba", "ab", ""}, {TrimLeft, "abba", "ab", ""}, @@ -555,11 +550,6 @@ var isValidRune = predicate{ "IsValidRune", } -type TrimFuncTest struct { - f predicate - in, out string -} - func not(p predicate) predicate { return predicate{ func(r int) bool { @@ -569,7 +559,10 @@ func not(p predicate) predicate { } } -var trimFuncTests = []TrimFuncTest{ +var trimFuncTests = []struct { + f predicate + in, out string +}{ {isSpace, space + " hello " + space, "hello"}, {isDigit, "\u0e50\u0e5212hello34\u0e50\u0e51", "hello"}, {isUpper, "\u2C6F\u2C6F\u2C6F\u2C6FABCDhelloEF\u2C6F\u2C6FGH\u2C6F\u2C6F", "hello"}, @@ -588,13 +581,11 @@ func TestTrimFunc(t *testing.T) { } } -type IndexFuncTest struct { +var indexFuncTests = []struct { in string f predicate first, last int -} - -var indexFuncTests = []IndexFuncTest{ +}{ {"", isValidRune, -1, -1}, {"abc", isDigit, -1, -1}, {"0123", isDigit, 0, 3}, @@ -692,12 +683,10 @@ func TestCaseConsistency(t *testing.T) { */ } -type RepeatTest struct { +var RepeatTests = []struct { in, out string count int -} - -var RepeatTests = []RepeatTest{ +}{ {"", "", 0}, {"", "", 1}, {"", "", 2}, @@ -729,13 +718,11 @@ func runesEqual(a, b []int) bool { return true } -type RunesTest struct { +var RunesTests = []struct { in string out []int lossy bool -} - -var RunesTests = []RunesTest{ +}{ {"", []int{}, false}, {" ", []int{32}, false}, {"ABC", []int{65, 66, 67}, false}, @@ -846,14 +833,12 @@ func TestReadRune(t *testing.T) { } } -type ReplaceTest struct { +var ReplaceTests = []struct { in string old, new string n int out string -} - -var ReplaceTests = []ReplaceTest{ +}{ {"hello", "l", "L", 0, "hello"}, {"hello", "l", "L", -1, "heLLo"}, {"hello", "x", "X", -1, "hello"}, @@ -883,11 +868,9 @@ func TestReplace(t *testing.T) { } } -type TitleTest struct { +var TitleTests = []struct { in, out string -} - -var TitleTests = []TitleTest{ +}{ {"", ""}, {"a", "A"}, {" aaa aaa aaa ", " Aaa Aaa Aaa "}, @@ -905,12 +888,10 @@ func TestTitle(t *testing.T) { } } -type ContainsTest struct { +var ContainsTests = []struct { str, substr string expected bool -} - -var ContainsTests = []ContainsTest{ +}{ {"abc", "bc", true}, {"abc", "bcd", false}, {"abc", "", true}, @@ -925,3 +906,31 @@ func TestContains(t *testing.T) { } } } + +var EqualFoldTests = []struct { + s, t string + out bool +}{ + {"abc", "abc", true}, + {"ABcd", "ABcd", true}, + {"123abc", "123ABC", true}, + {"αβδ", "ΑΒΔ", true}, + {"abc", "xyz", false}, + {"abc", "XYZ", false}, + {"abcdefghijk", "abcdefghijX", false}, + {"abcdefghijk", "abcdefghij\u212A", true}, + {"abcdefghijK", "abcdefghij\u212A", true}, + {"abcdefghijkz", "abcdefghij\u212Ay", false}, + {"abcdefghijKz", "abcdefghij\u212Ay", false}, +} + +func TestEqualFold(t *testing.T) { + for _, tt := range EqualFoldTests { + if out := EqualFold(tt.s, tt.t); out != tt.out { + t.Errorf("EqualFold(%#q, %#q) = %v, want %v", tt.s, tt.t, out, tt.out) + } + if out := EqualFold(tt.t, tt.s); out != tt.out { + t.Errorf("EqualFold(%#q, %#q) = %v, want %v", tt.t, tt.s, out, tt.out) + } + } +} |