diff options
Diffstat (limited to 'libgo/go/unicode/utf8/utf8.go')
-rw-r--r-- | libgo/go/unicode/utf8/utf8.go | 28 |
1 files changed, 28 insertions, 0 deletions
diff --git a/libgo/go/unicode/utf8/utf8.go b/libgo/go/unicode/utf8/utf8.go index b8368fc..ef0d740 100644 --- a/libgo/go/unicode/utf8/utf8.go +++ b/libgo/go/unicode/utf8/utf8.go @@ -448,6 +448,20 @@ func RuneStart(b byte) bool { return b&0xC0 != 0x80 } // Valid reports whether p consists entirely of valid UTF-8-encoded runes. func Valid(p []byte) bool { + // Fast path. Check for and skip 8 bytes of ASCII characters per iteration. + for len(p) >= 8 { + // Combining two 32 bit loads allows the same code to be used + // for 32 and 64 bit platforms. + // The compiler can generate a 32bit load for first32 and second32 + // on many platforms. See test/codegen/memcombine.go. + first32 := uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24 + second32 := uint32(p[4]) | uint32(p[5])<<8 | uint32(p[6])<<16 | uint32(p[7])<<24 + if (first32|second32)&0x80808080 != 0 { + // Found a non ASCII byte (>= RuneSelf). + break + } + p = p[8:] + } n := len(p) for i := 0; i < n; { pi := p[i] @@ -480,6 +494,20 @@ func Valid(p []byte) bool { // ValidString reports whether s consists entirely of valid UTF-8-encoded runes. func ValidString(s string) bool { + // Fast path. Check for and skip 8 bytes of ASCII characters per iteration. + for len(s) >= 8 { + // Combining two 32 bit loads allows the same code to be used + // for 32 and 64 bit platforms. + // The compiler can generate a 32bit load for first32 and second32 + // on many platforms. See test/codegen/memcombine.go. + first32 := uint32(s[0]) | uint32(s[1])<<8 | uint32(s[2])<<16 | uint32(s[3])<<24 + second32 := uint32(s[4]) | uint32(s[5])<<8 | uint32(s[6])<<16 | uint32(s[7])<<24 + if (first32|second32)&0x80808080 != 0 { + // Found a non ASCII byte (>= RuneSelf). + break + } + s = s[8:] + } n := len(s) for i := 0; i < n; { si := s[i] |