diff options
author | Giuliano Belinassi <giuliano.belinassi@usp.br> | 2020-08-22 17:43:43 -0300 |
---|---|---|
committer | Giuliano Belinassi <giuliano.belinassi@usp.br> | 2020-08-22 17:43:43 -0300 |
commit | a926878ddbd5a98b272c22171ce58663fc04c3e0 (patch) | |
tree | 86af256e5d9a9c06263c00adc90e5fe348008c43 /libgo/go/unicode/utf8/utf8.go | |
parent | 542730f087133690b47e036dfd43eb0db8a650ce (diff) | |
parent | 07cbaed8ba7d1b6e4ab3a9f44175502a4e1ecdb1 (diff) | |
download | gcc-devel/autopar_devel.zip gcc-devel/autopar_devel.tar.gz gcc-devel/autopar_devel.tar.bz2 |
Merge branch 'autopar_rebase2' into autopar_develdevel/autopar_devel
Quickly commit changes in the rebase branch.
Diffstat (limited to 'libgo/go/unicode/utf8/utf8.go')
-rw-r--r-- | libgo/go/unicode/utf8/utf8.go | 28 |
1 files changed, 28 insertions, 0 deletions
diff --git a/libgo/go/unicode/utf8/utf8.go b/libgo/go/unicode/utf8/utf8.go index b8368fc..ef0d740 100644 --- a/libgo/go/unicode/utf8/utf8.go +++ b/libgo/go/unicode/utf8/utf8.go @@ -448,6 +448,20 @@ func RuneStart(b byte) bool { return b&0xC0 != 0x80 } // Valid reports whether p consists entirely of valid UTF-8-encoded runes. func Valid(p []byte) bool { + // Fast path. Check for and skip 8 bytes of ASCII characters per iteration. + for len(p) >= 8 { + // Combining two 32 bit loads allows the same code to be used + // for 32 and 64 bit platforms. + // The compiler can generate a 32bit load for first32 and second32 + // on many platforms. See test/codegen/memcombine.go. + first32 := uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24 + second32 := uint32(p[4]) | uint32(p[5])<<8 | uint32(p[6])<<16 | uint32(p[7])<<24 + if (first32|second32)&0x80808080 != 0 { + // Found a non ASCII byte (>= RuneSelf). + break + } + p = p[8:] + } n := len(p) for i := 0; i < n; { pi := p[i] @@ -480,6 +494,20 @@ func Valid(p []byte) bool { // ValidString reports whether s consists entirely of valid UTF-8-encoded runes. func ValidString(s string) bool { + // Fast path. Check for and skip 8 bytes of ASCII characters per iteration. + for len(s) >= 8 { + // Combining two 32 bit loads allows the same code to be used + // for 32 and 64 bit platforms. + // The compiler can generate a 32bit load for first32 and second32 + // on many platforms. See test/codegen/memcombine.go. + first32 := uint32(s[0]) | uint32(s[1])<<8 | uint32(s[2])<<16 | uint32(s[3])<<24 + second32 := uint32(s[4]) | uint32(s[5])<<8 | uint32(s[6])<<16 | uint32(s[7])<<24 + if (first32|second32)&0x80808080 != 0 { + // Found a non ASCII byte (>= RuneSelf). + break + } + s = s[8:] + } n := len(s) for i := 0; i < n; { si := s[i] |