diff options
Diffstat (limited to 'libgo/go/strings/replace.go')
-rw-r--r-- | libgo/go/strings/replace.go | 77 |
1 files changed, 56 insertions, 21 deletions
diff --git a/libgo/go/strings/replace.go b/libgo/go/strings/replace.go index 4752641..58a11a6 100644 --- a/libgo/go/strings/replace.go +++ b/libgo/go/strings/replace.go @@ -18,8 +18,9 @@ type replacer interface { WriteString(w io.Writer, s string) (n int, err error) } -// NewReplacer returns a new Replacer from a list of old, new string pairs. -// Replacements are performed in order, without overlapping matches. +// NewReplacer returns a new Replacer from a list of old, new string +// pairs. Replacements are performed in the order they appear in the +// target string, without overlapping matches. func NewReplacer(oldnew ...string) *Replacer { if len(oldnew)%2 == 1 { panic("strings.NewReplacer: odd argument count") @@ -54,13 +55,21 @@ func NewReplacer(oldnew ...string) *Replacer { return &Replacer{r: &r} } - r := byteStringReplacer{} + r := byteStringReplacer{toReplace: make([]string, 0, len(oldnew)/2)} // The first occurrence of old->new map takes precedence // over the others with the same old string. for i := len(oldnew) - 2; i >= 0; i -= 2 { o := oldnew[i][0] n := oldnew[i+1] - r[o] = []byte(n) + // To avoid counting repetitions multiple times. + if r.replacements[o] == nil { + // We need to use string([]byte{o}) instead of string(o), + // to avoid utf8 encoding of o. + // E. g. byte(150) produces string of length 2. + r.toReplace = append(r.toReplace, string([]byte{o})) + } + r.replacements[o] = []byte(n) + } return &Replacer{r: &r} } @@ -454,34 +463,60 @@ func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) { // byteStringReplacer is the implementation that's used when all the // "old" values are single ASCII bytes but the "new" values vary in size. -// The array contains replacement byte slices indexed by old byte. -// A nil []byte means that the old byte should not be replaced. -type byteStringReplacer [256][]byte +type byteStringReplacer struct { + // replacements contains replacement byte slices indexed by old byte. + // A nil []byte means that the old byte should not be replaced. + replacements [256][]byte + // toReplace keeps a list of bytes to replace. Depending on length of toReplace + // and length of target string it may be faster to use Count, or a plain loop. + // We store single byte as a string, because Count takes a string. + toReplace []string +} + +// countCutOff controls the ratio of a string length to a number of replacements +// at which (*byteStringReplacer).Replace switches algorithms. +// For strings with higher ration of length to replacements than that value, +// we call Count, for each replacement from toReplace. +// For strings, with a lower ratio we use simple loop, because of Count overhead. +// countCutOff is an empirically determined overhead multiplier. +// TODO(tocarip) revisit once we have register-based abi/mid-stack inlining. +const countCutOff = 8 func (r *byteStringReplacer) Replace(s string) string { newSize := len(s) anyChanges := false - for i := 0; i < len(s); i++ { - b := s[i] - if r[b] != nil { - anyChanges = true - // The -1 is because we are replacing 1 byte with len(r[b]) bytes. - newSize += len(r[b]) - 1 + // Is it faster to use Count? + if len(r.toReplace)*countCutOff <= len(s) { + for _, x := range r.toReplace { + if c := Count(s, x); c != 0 { + // The -1 is because we are replacing 1 byte with len(replacements[b]) bytes. + newSize += c * (len(r.replacements[x[0]]) - 1) + anyChanges = true + } + + } + } else { + for i := 0; i < len(s); i++ { + b := s[i] + if r.replacements[b] != nil { + // See above for explanation of -1 + newSize += len(r.replacements[b]) - 1 + anyChanges = true + } } } if !anyChanges { return s } buf := make([]byte, newSize) - bi := buf + j := 0 for i := 0; i < len(s); i++ { b := s[i] - if r[b] != nil { - n := copy(bi, r[b]) - bi = bi[n:] + if r.replacements[b] != nil { + j += copy(buf[j:], r.replacements[b]) } else { - bi[0] = b - bi = bi[1:] + buf[j] = b + j++ } } return string(buf) @@ -492,7 +527,7 @@ func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err erro last := 0 for i := 0; i < len(s); i++ { b := s[i] - if r[b] == nil { + if r.replacements[b] == nil { continue } if last != i { @@ -503,7 +538,7 @@ func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err erro } } last = i + 1 - nw, err := w.Write(r[b]) + nw, err := w.Write(r.replacements[b]) n += nw if err != nil { return n, err |