aboutsummaryrefslogtreecommitdiff
path: root/libgo/go/unicode/utf8
diff options
context:
space:
mode:
authorGiuliano Belinassi <giuliano.belinassi@usp.br>2020-08-22 17:43:43 -0300
committerGiuliano Belinassi <giuliano.belinassi@usp.br>2020-08-22 17:43:43 -0300
commita926878ddbd5a98b272c22171ce58663fc04c3e0 (patch)
tree86af256e5d9a9c06263c00adc90e5fe348008c43 /libgo/go/unicode/utf8
parent542730f087133690b47e036dfd43eb0db8a650ce (diff)
parent07cbaed8ba7d1b6e4ab3a9f44175502a4e1ecdb1 (diff)
downloadgcc-devel/autopar_devel.zip
gcc-devel/autopar_devel.tar.gz
gcc-devel/autopar_devel.tar.bz2
Merge branch 'autopar_rebase2' into autopar_develdevel/autopar_devel
Quickly commit changes in the rebase branch.
Diffstat (limited to 'libgo/go/unicode/utf8')
-rw-r--r--libgo/go/unicode/utf8/utf8.go28
1 files changed, 28 insertions, 0 deletions
diff --git a/libgo/go/unicode/utf8/utf8.go b/libgo/go/unicode/utf8/utf8.go
index b8368fc..ef0d740 100644
--- a/libgo/go/unicode/utf8/utf8.go
+++ b/libgo/go/unicode/utf8/utf8.go
@@ -448,6 +448,20 @@ func RuneStart(b byte) bool { return b&0xC0 != 0x80 }
// Valid reports whether p consists entirely of valid UTF-8-encoded runes.
func Valid(p []byte) bool {
+ // Fast path. Check for and skip 8 bytes of ASCII characters per iteration.
+ for len(p) >= 8 {
+ // Combining two 32 bit loads allows the same code to be used
+ // for 32 and 64 bit platforms.
+ // The compiler can generate a 32bit load for first32 and second32
+ // on many platforms. See test/codegen/memcombine.go.
+ first32 := uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24
+ second32 := uint32(p[4]) | uint32(p[5])<<8 | uint32(p[6])<<16 | uint32(p[7])<<24
+ if (first32|second32)&0x80808080 != 0 {
+ // Found a non ASCII byte (>= RuneSelf).
+ break
+ }
+ p = p[8:]
+ }
n := len(p)
for i := 0; i < n; {
pi := p[i]
@@ -480,6 +494,20 @@ func Valid(p []byte) bool {
// ValidString reports whether s consists entirely of valid UTF-8-encoded runes.
func ValidString(s string) bool {
+ // Fast path. Check for and skip 8 bytes of ASCII characters per iteration.
+ for len(s) >= 8 {
+ // Combining two 32 bit loads allows the same code to be used
+ // for 32 and 64 bit platforms.
+ // The compiler can generate a 32bit load for first32 and second32
+ // on many platforms. See test/codegen/memcombine.go.
+ first32 := uint32(s[0]) | uint32(s[1])<<8 | uint32(s[2])<<16 | uint32(s[3])<<24
+ second32 := uint32(s[4]) | uint32(s[5])<<8 | uint32(s[6])<<16 | uint32(s[7])<<24
+ if (first32|second32)&0x80808080 != 0 {
+ // Found a non ASCII byte (>= RuneSelf).
+ break
+ }
+ s = s[8:]
+ }
n := len(s)
for i := 0; i < n; {
si := s[i]