diff options
author | Ian Lance Taylor <iant@golang.org> | 2018-01-09 01:23:08 +0000 |
---|---|---|
committer | Ian Lance Taylor <ian@gcc.gnu.org> | 2018-01-09 01:23:08 +0000 |
commit | 1a2f01efa63036a5104f203a4789e682c0e0915d (patch) | |
tree | 373e15778dc8295354584e1f86915ae493b604ff /libgo/go/bytes/bytes.go | |
parent | 8799df67f2dab88f9fda11739c501780a85575e2 (diff) | |
download | gcc-1a2f01efa63036a5104f203a4789e682c0e0915d.zip gcc-1a2f01efa63036a5104f203a4789e682c0e0915d.tar.gz gcc-1a2f01efa63036a5104f203a4789e682c0e0915d.tar.bz2 |
libgo: update to Go1.10beta1
Update the Go library to the 1.10beta1 release.
Requires a few changes to the compiler for modifications to the map
runtime code, and to handle some nowritebarrier cases in the runtime.
Reviewed-on: https://go-review.googlesource.com/86455
gotools/:
* Makefile.am (go_cmd_vet_files): New variable.
(go_cmd_buildid_files, go_cmd_test2json_files): New variables.
(s-zdefaultcc): Change from constants to functions.
(noinst_PROGRAMS): Add vet, buildid, and test2json.
(cgo$(EXEEXT)): Link against $(LIBGOTOOL).
(vet$(EXEEXT)): New target.
(buildid$(EXEEXT)): New target.
(test2json$(EXEEXT)): New target.
(install-exec-local): Install all $(noinst_PROGRAMS).
(uninstall-local): Uninstasll all $(noinst_PROGRAMS).
(check-go-tool): Depend on $(noinst_PROGRAMS). Copy down
objabi.go.
(check-runtime): Depend on $(noinst_PROGRAMS).
(check-cgo-test, check-carchive-test): Likewise.
(check-vet): New target.
(check): Depend on check-vet. Look at cmd_vet-testlog.
(.PHONY): Add check-vet.
* Makefile.in: Rebuild.
From-SVN: r256365
Diffstat (limited to 'libgo/go/bytes/bytes.go')
-rw-r--r-- | libgo/go/bytes/bytes.go | 292 |
1 files changed, 200 insertions, 92 deletions
diff --git a/libgo/go/bytes/bytes.go b/libgo/go/bytes/bytes.go index 7c878af..9af177f 100644 --- a/libgo/go/bytes/bytes.go +++ b/libgo/go/bytes/bytes.go @@ -39,7 +39,7 @@ func explode(s []byte, n int) [][]byte { break } _, size = utf8.DecodeRune(s) - a[na] = s[0:size] + a[na] = s[0:size:size] s = s[size:] na++ } @@ -68,12 +68,12 @@ func Contains(b, subslice []byte) bool { return Index(b, subslice) != -1 } -// ContainsAny reports whether any of the UTF-8-encoded Unicode code points in chars are within b. +// ContainsAny reports whether any of the UTF-8-encoded code points in chars are within b. func ContainsAny(b []byte, chars string) bool { return IndexAny(b, chars) >= 0 } -// ContainsRune reports whether the Unicode code point r is within b. +// ContainsRune reports whether the rune is contained in the UTF-8-encoded byte slice b. func ContainsRune(b []byte, r rune) bool { return IndexRune(b, r) >= 0 } @@ -112,7 +112,7 @@ func LastIndexByte(s []byte, c byte) int { return -1 } -// IndexRune interprets s as a sequence of UTF-8-encoded Unicode code points. +// IndexRune interprets s as a sequence of UTF-8-encoded code points. // It returns the byte index of the first occurrence in s of the given rune. // It returns -1 if rune is not present in s. // If r is utf8.RuneError, it returns the first instance of any @@ -144,29 +144,31 @@ func IndexRune(s []byte, r rune) int { // code points in chars. It returns -1 if chars is empty or if there is no code // point in common. func IndexAny(s []byte, chars string) int { - if len(chars) > 0 { - if len(s) > 8 { - if as, isASCII := makeASCIISet(chars); isASCII { - for i, c := range s { - if as.contains(c) { - return i - } + if chars == "" { + // Avoid scanning all of s. + return -1 + } + if len(s) > 8 { + if as, isASCII := makeASCIISet(chars); isASCII { + for i, c := range s { + if as.contains(c) { + return i } - return -1 } + return -1 } - var width int - for i := 0; i < len(s); i += width { - r := rune(s[i]) - if r < utf8.RuneSelf { - width = 1 - } else { - r, width = utf8.DecodeRune(s[i:]) - } - for _, ch := range chars { - if r == ch { - return i - } + } + var width int + for i := 0; i < len(s); i += width { + r := rune(s[i]) + if r < utf8.RuneSelf { + width = 1 + } else { + r, width = utf8.DecodeRune(s[i:]) + } + for _, ch := range chars { + if r == ch { + return i } } } @@ -178,24 +180,26 @@ func IndexAny(s []byte, chars string) int { // the Unicode code points in chars. It returns -1 if chars is empty or if // there is no code point in common. func LastIndexAny(s []byte, chars string) int { - if len(chars) > 0 { - if len(s) > 8 { - if as, isASCII := makeASCIISet(chars); isASCII { - for i := len(s) - 1; i >= 0; i-- { - if as.contains(s[i]) { - return i - } + if chars == "" { + // Avoid scanning all of s. + return -1 + } + if len(s) > 8 { + if as, isASCII := makeASCIISet(chars); isASCII { + for i := len(s) - 1; i >= 0; i-- { + if as.contains(s[i]) { + return i } - return -1 } + return -1 } - for i := len(s); i > 0; { - r, size := utf8.DecodeLastRune(s[:i]) - i -= size - for _, c := range chars { - if r == c { - return i - } + } + for i := len(s); i > 0; { + r, size := utf8.DecodeLastRune(s[:i]) + i -= size + for _, c := range chars { + if r == c { + return i } } } @@ -223,7 +227,7 @@ func genSplit(s, sep []byte, sepSave, n int) [][]byte { if m < 0 { break } - a[i] = s[:m+sepSave] + a[i] = s[: m+sepSave : m+sepSave] s = s[m+len(sep):] i++ } @@ -265,52 +269,112 @@ func SplitAfter(s, sep []byte) [][]byte { return genSplit(s, sep, len(sep), -1) } -// Fields splits the slice s around each instance of one or more consecutive white space -// characters, returning a slice of subslices of s or an empty list if s contains only white space. +var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1} + +// Fields interprets s as a sequence of UTF-8-encoded code points. +// It splits the slice s around each instance of one or more consecutive white space +// characters, as defined by unicode.IsSpace, returning a slice of subslices of s or an +// empty slice if s contains only white space. func Fields(s []byte) [][]byte { - return FieldsFunc(s, unicode.IsSpace) + // First count the fields. + // This is an exact count if s is ASCII, otherwise it is an approximation. + n := 0 + wasSpace := 1 + // setBits is used to track which bits are set in the bytes of s. + setBits := uint8(0) + for i := 0; i < len(s); i++ { + r := s[i] + setBits |= r + isSpace := int(asciiSpace[r]) + n += wasSpace & ^isSpace + wasSpace = isSpace + } + + if setBits >= utf8.RuneSelf { + // Some runes in the input slice are not ASCII. + return FieldsFunc(s, unicode.IsSpace) + } + + // ASCII fast path + a := make([][]byte, n) + na := 0 + fieldStart := 0 + i := 0 + // Skip spaces in the front of the input. + for i < len(s) && asciiSpace[s[i]] != 0 { + i++ + } + fieldStart = i + for i < len(s) { + if asciiSpace[s[i]] == 0 { + i++ + continue + } + a[na] = s[fieldStart:i:i] + na++ + i++ + // Skip spaces in between fields. + for i < len(s) && asciiSpace[s[i]] != 0 { + i++ + } + fieldStart = i + } + if fieldStart < len(s) { // Last field might end at EOF. + a[na] = s[fieldStart:len(s):len(s)] + } + return a } -// FieldsFunc interprets s as a sequence of UTF-8-encoded Unicode code points. +// FieldsFunc interprets s as a sequence of UTF-8-encoded code points. // It splits the slice s at each run of code points c satisfying f(c) and // returns a slice of subslices of s. If all code points in s satisfy f(c), or // len(s) == 0, an empty slice is returned. // FieldsFunc makes no guarantees about the order in which it calls f(c). // If f does not return consistent results for a given c, FieldsFunc may crash. func FieldsFunc(s []byte, f func(rune) bool) [][]byte { - n := 0 - inField := false - for i := 0; i < len(s); { - r, size := utf8.DecodeRune(s[i:]) - wasInField := inField - inField = !f(r) - if inField && !wasInField { - n++ - } - i += size + // A span is used to record a slice of s of the form s[start:end]. + // The start index is inclusive and the end index is exclusive. + type span struct { + start int + end int } + spans := make([]span, 0, 32) - a := make([][]byte, n) - na := 0 - fieldStart := -1 - for i := 0; i <= len(s) && na < n; { - r, size := utf8.DecodeRune(s[i:]) - if fieldStart < 0 && size > 0 && !f(r) { - fieldStart = i - i += size - continue - } - if fieldStart >= 0 && (size == 0 || f(r)) { - a[na] = s[fieldStart:i] - na++ - fieldStart = -1 + // Find the field start and end indices. + wasField := false + fromIndex := 0 + for i := 0; i < len(s); { + size := 1 + r := rune(s[i]) + if r >= utf8.RuneSelf { + r, size = utf8.DecodeRune(s[i:]) } - if size == 0 { - break + if f(r) { + if wasField { + spans = append(spans, span{start: fromIndex, end: i}) + wasField = false + } + } else { + if !wasField { + fromIndex = i + wasField = true + } } i += size } - return a[0:na] + + // Last field might end at EOF. + if wasField { + spans = append(spans, span{fromIndex, len(s)}) + } + + // Create subslices from recorded field indices. + a := make([][]byte, len(spans)) + for i, span := range spans { + a[i] = s[span.start:span.end:span.end] + } + + return a } // Join concatenates the elements of s to create a new byte slice. The separator @@ -349,8 +413,8 @@ func HasSuffix(s, suffix []byte) bool { // Map returns a copy of the byte slice s with all its characters modified // according to the mapping function. If mapping returns a negative value, the character is -// dropped from the string with no replacement. The characters in s and the -// output are interpreted as UTF-8-encoded Unicode code points. +// dropped from the byte slice with no replacement. The characters in s and the +// output are interpreted as UTF-8-encoded code points. func Map(mapping func(r rune) rune, s []byte) []byte { // In the worst case, the slice can grow when mapped, making // things unpleasant. But it's so rare we barge in assuming it's @@ -408,28 +472,28 @@ func Repeat(b []byte, count int) []byte { return nb } -// ToUpper returns a copy of the byte slice s with all Unicode letters mapped to their upper case. +// ToUpper treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters within it mapped to their upper case. func ToUpper(s []byte) []byte { return Map(unicode.ToUpper, s) } -// ToLower returns a copy of the byte slice s with all Unicode letters mapped to their lower case. +// ToLower treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their lower case. func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) } -// ToTitle returns a copy of the byte slice s with all Unicode letters mapped to their title case. +// ToTitle treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their title case. func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) } -// ToUpperSpecial returns a copy of the byte slice s with all Unicode letters mapped to their +// ToUpperSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their // upper case, giving priority to the special casing rules. func ToUpperSpecial(c unicode.SpecialCase, s []byte) []byte { return Map(func(r rune) rune { return c.ToUpper(r) }, s) } -// ToLowerSpecial returns a copy of the byte slice s with all Unicode letters mapped to their +// ToLowerSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their // lower case, giving priority to the special casing rules. func ToLowerSpecial(c unicode.SpecialCase, s []byte) []byte { return Map(func(r rune) rune { return c.ToLower(r) }, s) } -// ToTitleSpecial returns a copy of the byte slice s with all Unicode letters mapped to their +// ToTitleSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their // title case, giving priority to the special casing rules. func ToTitleSpecial(c unicode.SpecialCase, s []byte) []byte { return Map(func(r rune) rune { return c.ToTitle(r) }, s) @@ -460,8 +524,8 @@ func isSeparator(r rune) bool { return unicode.IsSpace(r) } -// Title returns a copy of s with all Unicode letters that begin words -// mapped to their title case. +// Title treats s as UTF-8-encoded bytes and returns a copy with all Unicode letters that begin +// words mapped to their title case. // // BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly. func Title(s []byte) []byte { @@ -481,8 +545,8 @@ func Title(s []byte) []byte { s) } -// TrimLeftFunc returns a subslice of s by slicing off all leading UTF-8-encoded -// Unicode code points c that satisfy f(c). +// TrimLeftFunc treats s as UTF-8-encoded bytes and returns a subslice of s by slicing off +// all leading UTF-8-encoded code points c that satisfy f(c). func TrimLeftFunc(s []byte, f func(r rune) bool) []byte { i := indexFunc(s, f, false) if i == -1 { @@ -491,8 +555,8 @@ func TrimLeftFunc(s []byte, f func(r rune) bool) []byte { return s[i:] } -// TrimRightFunc returns a subslice of s by slicing off all trailing UTF-8 -// encoded Unicode code points c that satisfy f(c). +// TrimRightFunc returns a subslice of s by slicing off all trailing +// UTF-8-encoded code points c that satisfy f(c). func TrimRightFunc(s []byte, f func(r rune) bool) []byte { i := lastIndexFunc(s, f, false) if i >= 0 && s[i] >= utf8.RuneSelf { @@ -505,7 +569,7 @@ func TrimRightFunc(s []byte, f func(r rune) bool) []byte { } // TrimFunc returns a subslice of s by slicing off all leading and trailing -// UTF-8-encoded Unicode code points c that satisfy f(c). +// UTF-8-encoded code points c that satisfy f(c). func TrimFunc(s []byte, f func(r rune) bool) []byte { return TrimRightFunc(TrimLeftFunc(s, f), f) } @@ -528,14 +592,14 @@ func TrimSuffix(s, suffix []byte) []byte { return s } -// IndexFunc interprets s as a sequence of UTF-8-encoded Unicode code points. +// IndexFunc interprets s as a sequence of UTF-8-encoded code points. // It returns the byte index in s of the first Unicode // code point satisfying f(c), or -1 if none do. func IndexFunc(s []byte, f func(r rune) bool) int { return indexFunc(s, f, true) } -// LastIndexFunc interprets s as a sequence of UTF-8-encoded Unicode code points. +// LastIndexFunc interprets s as a sequence of UTF-8-encoded code points. // It returns the byte index in s of the last Unicode // code point satisfying f(c), or -1 if none do. func LastIndexFunc(s []byte, f func(r rune) bool) int { @@ -626,19 +690,19 @@ func makeCutsetFunc(cutset string) func(r rune) bool { } // Trim returns a subslice of s by slicing off all leading and -// trailing UTF-8-encoded Unicode code points contained in cutset. +// trailing UTF-8-encoded code points contained in cutset. func Trim(s []byte, cutset string) []byte { return TrimFunc(s, makeCutsetFunc(cutset)) } // TrimLeft returns a subslice of s by slicing off all leading -// UTF-8-encoded Unicode code points contained in cutset. +// UTF-8-encoded code points contained in cutset. func TrimLeft(s []byte, cutset string) []byte { return TrimLeftFunc(s, makeCutsetFunc(cutset)) } // TrimRight returns a subslice of s by slicing off all trailing -// UTF-8-encoded Unicode code points that are contained in cutset. +// UTF-8-encoded code points that are contained in cutset. func TrimRight(s []byte, cutset string) []byte { return TrimRightFunc(s, makeCutsetFunc(cutset)) } @@ -649,7 +713,8 @@ func TrimSpace(s []byte) []byte { return TrimFunc(s, unicode.IsSpace) } -// Runes returns a slice of runes (Unicode code points) equivalent to s. +// Runes interprets s as a sequence of UTF-8-encoded code points. +// It returns a slice of runes (Unicode code points) equivalent to s. func Runes(s []byte) []rune { t := make([]rune, utf8.RuneCount(s)) i := 0 @@ -758,3 +823,46 @@ func EqualFold(s, t []byte) bool { // One string is empty. Are both? return len(s) == len(t) } + +func indexRabinKarp(s, sep []byte) int { + // Rabin-Karp search + hashsep, pow := hashStr(sep) + n := len(sep) + var h uint32 + for i := 0; i < n; i++ { + h = h*primeRK + uint32(s[i]) + } + if h == hashsep && Equal(s[:n], sep) { + return 0 + } + for i := n; i < len(s); { + h *= primeRK + h += uint32(s[i]) + h -= pow * uint32(s[i-n]) + i++ + if h == hashsep && Equal(s[i-n:i], sep) { + return i - n + } + } + return -1 +} + +// primeRK is the prime base used in Rabin-Karp algorithm. +const primeRK = 16777619 + +// hashStr returns the hash and the appropriate multiplicative +// factor for use in Rabin-Karp algorithm. +func hashStr(sep []byte) (uint32, uint32) { + hash := uint32(0) + for i := 0; i < len(sep); i++ { + hash = hash*primeRK + uint32(sep[i]) + } + var pow, sq uint32 = 1, primeRK + for i := len(sep); i > 0; i >>= 1 { + if i&1 != 0 { + pow *= sq + } + sq *= sq + } + return hash, pow +} |