aboutsummaryrefslogtreecommitdiff
path: root/libgo
diff options
context:
space:
mode:
authorIan Lance Taylor <ian@gcc.gnu.org>2016-12-08 16:37:54 +0000
committerIan Lance Taylor <ian@gcc.gnu.org>2016-12-08 16:37:54 +0000
commit453060a9062959ceb1522b8b99adeb01b2a3f7b7 (patch)
tree34a4ee9d0eb2d5e59c8a8ffc86e75b6e7b9a572c /libgo
parentb2264b0964560e724010aac2faf4f6a3ec2729f7 (diff)
downloadgcc-453060a9062959ceb1522b8b99adeb01b2a3f7b7.zip
gcc-453060a9062959ceb1522b8b99adeb01b2a3f7b7.tar.gz
gcc-453060a9062959ceb1522b8b99adeb01b2a3f7b7.tar.bz2
runtime: copy memory hash code from Go 1.7
Rewrite the AES hashing code from gc assembler to C code using intrinsics. The resulting code generates the same hash code for the same input as the gc code--that doesn't matter as such, but testing it ensures that the C code does something useful. Also change mips64pe32le to mips64p32le in configure script--noticed during CL review. Reviewed-on: https://go-review.googlesource.com/34022 From-SVN: r243445
Diffstat (limited to 'libgo')
-rw-r--r--libgo/Makefile.am1
-rw-r--r--libgo/Makefile.in11
-rwxr-xr-xlibgo/configure2
-rw-r--r--libgo/configure.ac2
-rw-r--r--libgo/go/runtime/alg.go45
-rw-r--r--libgo/go/runtime/hash32.go94
-rw-r--r--libgo/go/runtime/hash64.go94
-rw-r--r--libgo/go/runtime/os_gccgo.go23
-rw-r--r--libgo/go/runtime/runtime2.go12
-rw-r--r--libgo/go/runtime/stubs.go6
-rw-r--r--libgo/go/runtime/unaligned1.go17
-rw-r--r--libgo/go/runtime/unaligned2.go20
-rw-r--r--libgo/runtime/aeshash.c583
-rw-r--r--libgo/runtime/go-libmain.c1
-rw-r--r--libgo/runtime/go-main.c1
-rw-r--r--libgo/runtime/go-type-identity.c40
-rw-r--r--libgo/runtime/go-type.h1
-rw-r--r--libgo/runtime/proc.c3
-rw-r--r--libgo/runtime/runtime.h6
-rw-r--r--libgo/runtime/runtime_c.c19
20 files changed, 935 insertions, 46 deletions
diff --git a/libgo/Makefile.am b/libgo/Makefile.am
index 7165dfd..b9aee9d 100644
--- a/libgo/Makefile.am
+++ b/libgo/Makefile.am
@@ -422,6 +422,7 @@ endif
endif
runtime_files = \
+ runtime/aeshash.c \
runtime/go-assert.c \
runtime/go-breakpoint.c \
runtime/go-caller.c \
diff --git a/libgo/Makefile.in b/libgo/Makefile.in
index 9b87db0..86d7aa8 100644
--- a/libgo/Makefile.in
+++ b/libgo/Makefile.in
@@ -189,7 +189,7 @@ libgo_llgo_la_DEPENDENCIES = $(am__DEPENDENCIES_4)
@LIBGO_IS_DARWIN_TRUE@@LIBGO_IS_LINUX_FALSE@am__objects_4 = \
@LIBGO_IS_DARWIN_TRUE@@LIBGO_IS_LINUX_FALSE@ getncpu-bsd.lo
@LIBGO_IS_LINUX_TRUE@am__objects_4 = getncpu-linux.lo
-am__objects_5 = go-assert.lo go-breakpoint.lo go-caller.lo \
+am__objects_5 = aeshash.lo go-assert.lo go-breakpoint.lo go-caller.lo \
go-callers.lo go-cdiv.lo go-cgo.lo go-construct-map.lo \
go-ffi.lo go-fieldtrack.lo go-matherr.lo go-memclr.lo \
go-memcmp.lo go-memequal.lo go-memmove.lo go-nanotime.lo \
@@ -767,6 +767,7 @@ toolexeclibgounicode_DATA = \
@LIBGO_IS_DARWIN_TRUE@@LIBGO_IS_LINUX_FALSE@runtime_getncpu_file = runtime/getncpu-bsd.c
@LIBGO_IS_LINUX_TRUE@runtime_getncpu_file = runtime/getncpu-linux.c
runtime_files = \
+ runtime/aeshash.c \
runtime/go-assert.c \
runtime/go-breakpoint.c \
runtime/go-caller.c \
@@ -1446,6 +1447,7 @@ mostlyclean-compile:
distclean-compile:
-rm -f *.tab.c
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/aeshash.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/env_posix.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/getncpu-bsd.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/getncpu-irix.Plo@am__quote@
@@ -1573,6 +1575,13 @@ libgolibbegin_a-go-libmain.obj: runtime/go-libmain.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgolibbegin_a_CFLAGS) $(CFLAGS) -c -o libgolibbegin_a-go-libmain.obj `if test -f 'runtime/go-libmain.c'; then $(CYGPATH_W) 'runtime/go-libmain.c'; else $(CYGPATH_W) '$(srcdir)/runtime/go-libmain.c'; fi`
+aeshash.lo: runtime/aeshash.c
+@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT aeshash.lo -MD -MP -MF $(DEPDIR)/aeshash.Tpo -c -o aeshash.lo `test -f 'runtime/aeshash.c' || echo '$(srcdir)/'`runtime/aeshash.c
+@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/aeshash.Tpo $(DEPDIR)/aeshash.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='runtime/aeshash.c' object='aeshash.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o aeshash.lo `test -f 'runtime/aeshash.c' || echo '$(srcdir)/'`runtime/aeshash.c
+
go-assert.lo: runtime/go-assert.c
@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT go-assert.lo -MD -MP -MF $(DEPDIR)/go-assert.Tpo -c -o go-assert.lo `test -f 'runtime/go-assert.c' || echo '$(srcdir)/'`runtime/go-assert.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/go-assert.Tpo $(DEPDIR)/go-assert.Plo
diff --git a/libgo/configure b/libgo/configure
index 9eac5c0..7789c120 100755
--- a/libgo/configure
+++ b/libgo/configure
@@ -13624,7 +13624,7 @@ esac
# supported by the gofrontend and all architectures supported by the
# gc toolchain.
# N.B. Keep in sync with gcc/testsuite/go.test/go-test.exp (go-set-goarch).
-ALLGOARCH="386 alpha amd64 amd64p32 arm armbe arm64 arm64be ia64 m68k mipso32 mipsn32 mipso64 mipsn64 mips mipsle mips64 mips64le mips64p32 mips64pe32le ppc ppc64 ppc64le s390 s390x sparc sparc64"
+ALLGOARCH="386 alpha amd64 amd64p32 arm armbe arm64 arm64be ia64 m68k mipso32 mipsn32 mipso64 mipsn64 mips mipsle mips64 mips64le mips64p32 mips64p32le ppc ppc64 ppc64le s390 s390x sparc sparc64"
# All known GOARCH_FAMILY values.
ALLGOARCHFAMILY="I386 ALPHA AMD64 ARM ARM64 IA64 M68K MIPS MIPS64 PPC PPC64 S390 S390X SPARC SPARC64"
diff --git a/libgo/configure.ac b/libgo/configure.ac
index 9e76540..77a744e 100644
--- a/libgo/configure.ac
+++ b/libgo/configure.ac
@@ -197,7 +197,7 @@ AC_SUBST(USE_DEJAGNU)
# supported by the gofrontend and all architectures supported by the
# gc toolchain.
# N.B. Keep in sync with gcc/testsuite/go.test/go-test.exp (go-set-goarch).
-ALLGOARCH="386 alpha amd64 amd64p32 arm armbe arm64 arm64be ia64 m68k mipso32 mipsn32 mipso64 mipsn64 mips mipsle mips64 mips64le mips64p32 mips64pe32le ppc ppc64 ppc64le s390 s390x sparc sparc64"
+ALLGOARCH="386 alpha amd64 amd64p32 arm armbe arm64 arm64be ia64 m68k mipso32 mipsn32 mipso64 mipsn64 mips mipsle mips64 mips64le mips64p32 mips64p32le ppc ppc64 ppc64le s390 s390x sparc sparc64"
# All known GOARCH_FAMILY values.
ALLGOARCHFAMILY="I386 ALPHA AMD64 ARM ARM64 IA64 M68K MIPS MIPS64 PPC PPC64 S390 S390X SPARC SPARC64"
diff --git a/libgo/go/runtime/alg.go b/libgo/go/runtime/alg.go
index 8f7c3c0..5ec19d0 100644
--- a/libgo/go/runtime/alg.go
+++ b/libgo/go/runtime/alg.go
@@ -23,12 +23,29 @@ import (
//go:linkname efacevaleq runtime.efacevaleq
//go:linkname eqstring runtime.eqstring
//go:linkname cmpstring runtime.cmpstring
+//
+// Temporary to be called from C code.
+//go:linkname alginit runtime.alginit
const (
c0 = uintptr((8-sys.PtrSize)/4*2860486313 + (sys.PtrSize-4)/4*33054211828000289)
c1 = uintptr((8-sys.PtrSize)/4*3267000013 + (sys.PtrSize-4)/4*23344194077549503)
)
+var useAeshash bool
+
+// in C code
+func aeshashbody(p unsafe.Pointer, h, s uintptr, sched []byte) uintptr
+
+func aeshash(p unsafe.Pointer, h, s uintptr) uintptr {
+ return aeshashbody(p, h, s, aeskeysched[:])
+}
+
+func aeshashstr(p unsafe.Pointer, h uintptr) uintptr {
+ ps := (*stringStruct)(p)
+ return aeshashbody(unsafe.Pointer(ps.str), h, uintptr(ps.len), aeskeysched[:])
+}
+
func interhash(p unsafe.Pointer, h uintptr, size uintptr) uintptr {
a := (*iface)(p)
tab := a.tab
@@ -198,7 +215,35 @@ func cmpstring(x, y string) int {
// Force the creation of function descriptors for equality and hash
// functions. These will be referenced directly by the compiler.
+var _ = memhash
var _ = interhash
var _ = interequal
var _ = nilinterhash
var _ = nilinterequal
+
+const hashRandomBytes = sys.PtrSize / 4 * 64
+
+// used in asm_{386,amd64}.s to seed the hash function
+var aeskeysched [hashRandomBytes]byte
+
+// used in hash{32,64}.go to seed the hash function
+var hashkey [4]uintptr
+
+func alginit() {
+ // Install aes hash algorithm if we have the instructions we need
+ if (GOARCH == "386" || GOARCH == "amd64") &&
+ GOOS != "nacl" &&
+ cpuid_ecx&(1<<25) != 0 && // aes (aesenc)
+ cpuid_ecx&(1<<9) != 0 && // sse3 (pshufb)
+ cpuid_ecx&(1<<19) != 0 { // sse4.1 (pinsr{d,q})
+ useAeshash = true
+ // Initialize with random data so hash collisions will be hard to engineer.
+ getRandomData(aeskeysched[:])
+ return
+ }
+ getRandomData((*[len(hashkey) * sys.PtrSize]byte)(unsafe.Pointer(&hashkey))[:])
+ hashkey[0] |= 1 // make sure these numbers are odd
+ hashkey[1] |= 1
+ hashkey[2] |= 1
+ hashkey[3] |= 1
+}
diff --git a/libgo/go/runtime/hash32.go b/libgo/go/runtime/hash32.go
new file mode 100644
index 0000000..cfb3a58
--- /dev/null
+++ b/libgo/go/runtime/hash32.go
@@ -0,0 +1,94 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Hashing algorithm inspired by
+// xxhash: https://code.google.com/p/xxhash/
+// cityhash: https://code.google.com/p/cityhash/
+
+// +build 386 arm armbe m68k mipso32 mipsn32 mips mipsle ppc s390 sparc
+
+package runtime
+
+import "unsafe"
+
+// For gccgo, use go:linkname to rename compiler-called functions to
+// themselves, so that the compiler will export them.
+//
+//go:linkname memhash runtime.memhash
+
+const (
+ // Constants for multiplication: four random odd 32-bit numbers.
+ m1 = 3168982561
+ m2 = 3339683297
+ m3 = 832293441
+ m4 = 2336365089
+)
+
+func memhash(p unsafe.Pointer, seed, s uintptr) uintptr {
+ if GOARCH == "386" && GOOS != "nacl" && useAeshash {
+ return aeshash(p, seed, s)
+ }
+ h := uint32(seed + s*hashkey[0])
+tail:
+ switch {
+ case s == 0:
+ case s < 4:
+ h ^= uint32(*(*byte)(p))
+ h ^= uint32(*(*byte)(add(p, s>>1))) << 8
+ h ^= uint32(*(*byte)(add(p, s-1))) << 16
+ h = rotl_15(h*m1) * m2
+ case s == 4:
+ h ^= readUnaligned32(p)
+ h = rotl_15(h*m1) * m2
+ case s <= 8:
+ h ^= readUnaligned32(p)
+ h = rotl_15(h*m1) * m2
+ h ^= readUnaligned32(add(p, s-4))
+ h = rotl_15(h*m1) * m2
+ case s <= 16:
+ h ^= readUnaligned32(p)
+ h = rotl_15(h*m1) * m2
+ h ^= readUnaligned32(add(p, 4))
+ h = rotl_15(h*m1) * m2
+ h ^= readUnaligned32(add(p, s-8))
+ h = rotl_15(h*m1) * m2
+ h ^= readUnaligned32(add(p, s-4))
+ h = rotl_15(h*m1) * m2
+ default:
+ v1 := h
+ v2 := uint32(seed * hashkey[1])
+ v3 := uint32(seed * hashkey[2])
+ v4 := uint32(seed * hashkey[3])
+ for s >= 16 {
+ v1 ^= readUnaligned32(p)
+ v1 = rotl_15(v1*m1) * m2
+ p = add(p, 4)
+ v2 ^= readUnaligned32(p)
+ v2 = rotl_15(v2*m2) * m3
+ p = add(p, 4)
+ v3 ^= readUnaligned32(p)
+ v3 = rotl_15(v3*m3) * m4
+ p = add(p, 4)
+ v4 ^= readUnaligned32(p)
+ v4 = rotl_15(v4*m4) * m1
+ p = add(p, 4)
+ s -= 16
+ }
+ h = v1 ^ v2 ^ v3 ^ v4
+ goto tail
+ }
+ h ^= h >> 17
+ h *= m3
+ h ^= h >> 13
+ h *= m4
+ h ^= h >> 16
+ return uintptr(h)
+}
+
+// Note: in order to get the compiler to issue rotl instructions, we
+// need to constant fold the shift amount by hand.
+// TODO: convince the compiler to issue rotl instructions after inlining.
+func rotl_15(x uint32) uint32 {
+ return (x << 15) | (x >> (32 - 15))
+}
diff --git a/libgo/go/runtime/hash64.go b/libgo/go/runtime/hash64.go
new file mode 100644
index 0000000..551d5b5
--- /dev/null
+++ b/libgo/go/runtime/hash64.go
@@ -0,0 +1,94 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Hashing algorithm inspired by
+// xxhash: https://code.google.com/p/xxhash/
+// cityhash: https://code.google.com/p/cityhash/
+
+// +build amd64 amd64p32 arm64 mips64 mips64le ppc64 ppc64le s390x alpha arm64be ia64 mipso64 mipsn64 mips64p32 mips64p32le sparc64
+
+package runtime
+
+import "unsafe"
+
+// For gccgo, use go:linkname to rename compiler-called functions to
+// themselves, so that the compiler will export them.
+//
+//go:linkname memhash runtime.memhash
+
+const (
+ // Constants for multiplication: four random odd 64-bit numbers.
+ m1 = 16877499708836156737
+ m2 = 2820277070424839065
+ m3 = 9497967016996688599
+ m4 = 15839092249703872147
+)
+
+func memhash(p unsafe.Pointer, seed, s uintptr) uintptr {
+ if GOARCH == "amd64" && GOOS != "nacl" && useAeshash {
+ return aeshash(p, seed, s)
+ }
+ h := uint64(seed + s*hashkey[0])
+tail:
+ switch {
+ case s == 0:
+ case s < 4:
+ h ^= uint64(*(*byte)(p))
+ h ^= uint64(*(*byte)(add(p, s>>1))) << 8
+ h ^= uint64(*(*byte)(add(p, s-1))) << 16
+ h = rotl_31(h*m1) * m2
+ case s <= 8:
+ h ^= uint64(readUnaligned32(p))
+ h ^= uint64(readUnaligned32(add(p, s-4))) << 32
+ h = rotl_31(h*m1) * m2
+ case s <= 16:
+ h ^= readUnaligned64(p)
+ h = rotl_31(h*m1) * m2
+ h ^= readUnaligned64(add(p, s-8))
+ h = rotl_31(h*m1) * m2
+ case s <= 32:
+ h ^= readUnaligned64(p)
+ h = rotl_31(h*m1) * m2
+ h ^= readUnaligned64(add(p, 8))
+ h = rotl_31(h*m1) * m2
+ h ^= readUnaligned64(add(p, s-16))
+ h = rotl_31(h*m1) * m2
+ h ^= readUnaligned64(add(p, s-8))
+ h = rotl_31(h*m1) * m2
+ default:
+ v1 := h
+ v2 := uint64(seed * hashkey[1])
+ v3 := uint64(seed * hashkey[2])
+ v4 := uint64(seed * hashkey[3])
+ for s >= 32 {
+ v1 ^= readUnaligned64(p)
+ v1 = rotl_31(v1*m1) * m2
+ p = add(p, 8)
+ v2 ^= readUnaligned64(p)
+ v2 = rotl_31(v2*m2) * m3
+ p = add(p, 8)
+ v3 ^= readUnaligned64(p)
+ v3 = rotl_31(v3*m3) * m4
+ p = add(p, 8)
+ v4 ^= readUnaligned64(p)
+ v4 = rotl_31(v4*m4) * m1
+ p = add(p, 8)
+ s -= 32
+ }
+ h = v1 ^ v2 ^ v3 ^ v4
+ goto tail
+ }
+
+ h ^= h >> 29
+ h *= m3
+ h ^= h >> 32
+ return uintptr(h)
+}
+
+// Note: in order to get the compiler to issue rotl instructions, we
+// need to constant fold the shift amount by hand.
+// TODO: convince the compiler to issue rotl instructions after inlining.
+func rotl_31(x uint64) uint64 {
+ return (x << 31) | (x >> (64 - 31))
+}
diff --git a/libgo/go/runtime/os_gccgo.go b/libgo/go/runtime/os_gccgo.go
new file mode 100644
index 0000000..4609432
--- /dev/null
+++ b/libgo/go/runtime/os_gccgo.go
@@ -0,0 +1,23 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+ "unsafe"
+)
+
+var urandom_dev = []byte("/dev/urandom\x00")
+
+func getRandomData(r []byte) {
+ if startupRandomData != nil {
+ n := copy(r, startupRandomData)
+ extendRandom(r, n)
+ return
+ }
+ fd := open(&urandom_dev[0], 0 /* O_RDONLY */, 0)
+ n := read(fd, unsafe.Pointer(&r[0]), int32(len(r)))
+ closefd(fd)
+ extendRandom(r, int(n))
+}
diff --git a/libgo/go/runtime/runtime2.go b/libgo/go/runtime/runtime2.go
index c8db7ad..4712318 100644
--- a/libgo/go/runtime/runtime2.go
+++ b/libgo/go/runtime/runtime2.go
@@ -5,6 +5,7 @@
package runtime
import (
+ "runtime/internal/sys"
"unsafe"
)
@@ -668,7 +669,6 @@ type forcegcstate struct {
// the ELF AT_RANDOM auxiliary vector (vdso_linux_amd64.go or os_linux_386.go).
var startupRandomData []byte
-/*
// extendRandom extends the random numbers in r[:n] to the whole slice r.
// Treats n<0 as n==0.
func extendRandom(r []byte, n int) {
@@ -689,7 +689,6 @@ func extendRandom(r []byte, n int) {
}
}
}
-*/
// deferred subroutine calls
// This is the gccgo version.
@@ -770,11 +769,12 @@ var (
sched schedt
-// newprocs int32
+ // newprocs int32
+
+ // Information about what cpu features are available.
+ // Set on startup.
+ cpuid_ecx uint32
-// Information about what cpu features are available.
-// Set on startup in asm_{x86,amd64}.s.
-// cpuid_ecx uint32
// cpuid_edx uint32
// cpuid_ebx7 uint32
// lfenceBeforeRdtsc bool
diff --git a/libgo/go/runtime/stubs.go b/libgo/go/runtime/stubs.go
index b2f1829..b4fee6b 100644
--- a/libgo/go/runtime/stubs.go
+++ b/libgo/go/runtime/stubs.go
@@ -248,6 +248,12 @@ func funcPC(f interface{}) uintptr {
return **(**uintptr)(i.data)
}
+// For gccgo, to communicate from the C code to the Go code.
+//go:linkname setCpuidECX runtime.setCpuidECX
+func setCpuidECX(v uint32) {
+ cpuid_ecx = v
+}
+
// typedmemmove copies a typed value.
// For gccgo for now.
//go:nosplit
diff --git a/libgo/go/runtime/unaligned1.go b/libgo/go/runtime/unaligned1.go
new file mode 100644
index 0000000..c94f19e
--- /dev/null
+++ b/libgo/go/runtime/unaligned1.go
@@ -0,0 +1,17 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build 386 amd64 amd64p32 arm64 ppc64 ppc64le s390x ppc s390 arm64be
+
+package runtime
+
+import "unsafe"
+
+func readUnaligned32(p unsafe.Pointer) uint32 {
+ return *(*uint32)(p)
+}
+
+func readUnaligned64(p unsafe.Pointer) uint64 {
+ return *(*uint64)(p)
+}
diff --git a/libgo/go/runtime/unaligned2.go b/libgo/go/runtime/unaligned2.go
new file mode 100644
index 0000000..e52d6ce
--- /dev/null
+++ b/libgo/go/runtime/unaligned2.go
@@ -0,0 +1,20 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build arm mips64 mips64le armbe m68k mipso32 mipsn32 mips mipsle sparc alpha ia64 mipso64 mipsn64 mips64p32 mips64p32le sparc64
+
+package runtime
+
+import "unsafe"
+
+// Note: These routines perform the read with an unspecified endianness.
+func readUnaligned32(p unsafe.Pointer) uint32 {
+ q := (*[4]byte)(p)
+ return uint32(q[0]) + uint32(q[1])<<8 + uint32(q[2])<<16 + uint32(q[3])<<24
+}
+
+func readUnaligned64(p unsafe.Pointer) uint64 {
+ q := (*[8]byte)(p)
+ return uint64(q[0]) + uint64(q[1])<<8 + uint64(q[2])<<16 + uint64(q[3])<<24 + uint64(q[4])<<32 + uint64(q[5])<<40 + uint64(q[6])<<48 + uint64(q[7])<<56
+}
diff --git a/libgo/runtime/aeshash.c b/libgo/runtime/aeshash.c
new file mode 100644
index 0000000..faa90e0
--- /dev/null
+++ b/libgo/runtime/aeshash.c
@@ -0,0 +1,583 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Hash code using AES intrinsics.
+
+#include "runtime.h"
+
+uintptr aeshashbody(void*, uintptr, uintptr, Slice)
+ __asm__(GOSYM_PREFIX "runtime.aeshashbody");
+
+uintptr aeshashbody(void*, uintptr, uintptr, Slice)
+ __attribute__((no_split_stack));
+
+#if defined(__i386__) || defined(__x86_64__)
+
+#include <emmintrin.h>
+#include <tmmintrin.h>
+#include <wmmintrin.h>
+
+// Force appropriate CPU level. We won't call here unless the CPU
+// supports it.
+
+#pragma GCC target("ssse3", "aes")
+
+#ifdef __x86_64__
+
+// aeshashbody implements a hash function using AES instructions
+// available in recent x86 processors. Note this is not encryption,
+// just hashing.
+//
+// This is written to produce exactly the same results as the gc
+// implementation, not because that matters, but just to ensure that
+// this does something reasonable.
+uintptr aeshashbody(void* p, uintptr seed, uintptr size, Slice aeskeysched) {
+ __m128i mseed, mseed2, mseed3, mseed4, mseed5, mseed6, mseed7, mseed8;
+ __m128i mval, mval2, mval3, mval4, mval5, mval6, mval7, mval8;
+
+ // Start with hash seed.
+ mseed = _mm_cvtsi64_si128(seed);
+ // Get 16 bits of length.
+ mseed = _mm_insert_epi16(mseed, size, 4);
+ // Repeat length 4 times total.
+ mseed = _mm_shufflehi_epi16(mseed, 0);
+ // Save unscrambled seed.
+ mseed2 = mseed;
+ // XOR in per-process seed.
+ mseed ^= _mm_loadu_si128(aeskeysched.__values);
+ // Scramble seed.
+ mseed = _mm_aesenc_si128(mseed, mseed);
+
+ if (size <= 16) {
+ if (size == 0) {
+ // Return scrambled input seed.
+ return _mm_cvtsi128_si64(_mm_aesenc_si128(mseed, mseed));
+ } else if (size < 16) {
+ if ((((uintptr)(p) + 16) & 0xff0) != 0) {
+ static const uint64 masks[32]
+ __attribute__ ((aligned(16))) =
+ {
+ 0x0000000000000000, 0x0000000000000000,
+ 0x00000000000000ff, 0x0000000000000000,
+ 0x000000000000ffff, 0x0000000000000000,
+ 0x0000000000ffffff, 0x0000000000000000,
+ 0x00000000ffffffff, 0x0000000000000000,
+ 0x000000ffffffffff, 0x0000000000000000,
+ 0x0000ffffffffffff, 0x0000000000000000,
+ 0x00ffffffffffffff, 0x0000000000000000,
+ 0xffffffffffffffff, 0x0000000000000000,
+ 0xffffffffffffffff, 0x00000000000000ff,
+ 0xffffffffffffffff, 0x000000000000ffff,
+ 0xffffffffffffffff, 0x0000000000ffffff,
+ 0xffffffffffffffff, 0x00000000ffffffff,
+ 0xffffffffffffffff, 0x000000ffffffffff,
+ 0xffffffffffffffff, 0x0000ffffffffffff,
+ 0xffffffffffffffff, 0x00ffffffffffffff
+ };
+
+ // 16 bytes loaded at p won't cross a page
+ // boundary, so we can load directly.
+ mval = _mm_loadu_si128(p);
+ mval &= *(const __m128i*)(&masks[size*2]);
+ } else {
+ static const uint64 shifts[32]
+ __attribute__ ((aligned(16))) =
+ {
+ 0x0000000000000000, 0x0000000000000000,
+ 0xffffffffffffff0f, 0xffffffffffffffff,
+ 0xffffffffffff0f0e, 0xffffffffffffffff,
+ 0xffffffffff0f0e0d, 0xffffffffffffffff,
+ 0xffffffff0f0e0d0c, 0xffffffffffffffff,
+ 0xffffff0f0e0d0c0b, 0xffffffffffffffff,
+ 0xffff0f0e0d0c0b0a, 0xffffffffffffffff,
+ 0xff0f0e0d0c0b0a09, 0xffffffffffffffff,
+ 0x0f0e0d0c0b0a0908, 0xffffffffffffffff,
+ 0x0e0d0c0b0a090807, 0xffffffffffffff0f,
+ 0x0d0c0b0a09080706, 0xffffffffffff0f0e,
+ 0x0c0b0a0908070605, 0xffffffffff0f0e0d,
+ 0x0b0a090807060504, 0xffffffff0f0e0d0c,
+ 0x0a09080706050403, 0xffffff0f0e0d0c0b,
+ 0x0908070605040302, 0xffff0f0e0d0c0b0a,
+ 0x0807060504030201, 0xff0f0e0d0c0b0a09,
+ };
+
+ // address ends in 1111xxxx. Might be
+ // up against a page boundary, so load
+ // ending at last byte. Then shift
+ // bytes down using pshufb.
+ mval = _mm_loadu_si128((void*)((char*)p - 16 + size));
+ mval = _mm_shuffle_epi8(mval, *(const __m128i*)(&shifts[size*2]));
+ }
+ } else {
+ mval = _mm_loadu_si128(p);
+ }
+
+ // XOR data with seed.
+ mval ^= mseed;
+ // Scramble combo 3 times.
+ mval = _mm_aesenc_si128(mval, mval);
+ mval = _mm_aesenc_si128(mval, mval);
+ mval = _mm_aesenc_si128(mval, mval);
+ return _mm_cvtsi128_si64(mval);
+ } else if (size <= 32) {
+ // Make second starting seed.
+ mseed2 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 16));
+ mseed2 = _mm_aesenc_si128(mseed2, mseed2);
+ // Load data to be hashed.
+ mval = _mm_loadu_si128(p);
+ mval2 = _mm_loadu_si128((void*)((char*)p + size - 16));
+ // XOR with seed.
+ mval ^= mseed;
+ mval2 ^= mseed2;
+ // Scramble 3 times.
+ mval = _mm_aesenc_si128(mval, mval);
+ mval2 = _mm_aesenc_si128(mval2, mval2);
+ mval = _mm_aesenc_si128(mval, mval);
+ mval2 = _mm_aesenc_si128(mval2, mval2);
+ mval = _mm_aesenc_si128(mval, mval);
+ mval2 = _mm_aesenc_si128(mval2, mval2);
+ // Combine results.
+ mval ^= mval2;
+ return _mm_cvtsi128_si64(mval);
+ } else if (size <= 64) {
+ // Make 3 more starting seeds.
+ mseed3 = mseed2;
+ mseed4 = mseed2;
+ mseed2 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 16));
+ mseed3 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 32));
+ mseed4 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 48));
+ mseed2 = _mm_aesenc_si128(mseed2, mseed2);
+ mseed3 = _mm_aesenc_si128(mseed3, mseed3);
+ mseed4 = _mm_aesenc_si128(mseed4, mseed4);
+
+ mval = _mm_loadu_si128(p);
+ mval2 = _mm_loadu_si128((void*)((char*)p + 16));
+ mval3 = _mm_loadu_si128((void*)((char*)p + size - 32));
+ mval4 = _mm_loadu_si128((void*)((char*)p + size - 16));
+
+ mval ^= mseed;
+ mval2 ^= mseed2;
+ mval3 ^= mseed3;
+ mval4 ^= mseed4;
+
+ mval = _mm_aesenc_si128(mval, mval);
+ mval2 = _mm_aesenc_si128(mval2, mval2);
+ mval3 = _mm_aesenc_si128(mval3, mval3);
+ mval4 = _mm_aesenc_si128(mval4, mval4);
+
+ mval = _mm_aesenc_si128(mval, mval);
+ mval2 = _mm_aesenc_si128(mval2, mval2);
+ mval3 = _mm_aesenc_si128(mval3, mval3);
+ mval4 = _mm_aesenc_si128(mval4, mval4);
+
+ mval = _mm_aesenc_si128(mval, mval);
+ mval2 = _mm_aesenc_si128(mval2, mval2);
+ mval3 = _mm_aesenc_si128(mval3, mval3);
+ mval4 = _mm_aesenc_si128(mval4, mval4);
+
+ mval ^= mval3;
+ mval2 ^= mval4;
+ mval ^= mval2;
+ return _mm_cvtsi128_si64(mval);
+ } else if (size <= 128) {
+ // Make 7 more starting seeds.
+ mseed3 = mseed2;
+ mseed4 = mseed2;
+ mseed5 = mseed2;
+ mseed6 = mseed2;
+ mseed7 = mseed2;
+ mseed8 = mseed2;
+ mseed2 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 16));
+ mseed3 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 32));
+ mseed4 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 48));
+ mseed5 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 64));
+ mseed6 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 80));
+ mseed7 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 96));
+ mseed8 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 112));
+ mseed2 = _mm_aesenc_si128(mseed2, mseed2);
+ mseed3 = _mm_aesenc_si128(mseed3, mseed3);
+ mseed4 = _mm_aesenc_si128(mseed4, mseed4);
+ mseed5 = _mm_aesenc_si128(mseed5, mseed5);
+ mseed6 = _mm_aesenc_si128(mseed6, mseed6);
+ mseed7 = _mm_aesenc_si128(mseed7, mseed7);
+ mseed8 = _mm_aesenc_si128(mseed8, mseed8);
+
+ // Load data.
+ mval = _mm_loadu_si128(p);
+ mval2 = _mm_loadu_si128((void*)((char*)p + 16));
+ mval3 = _mm_loadu_si128((void*)((char*)p + 32));
+ mval4 = _mm_loadu_si128((void*)((char*)p + 48));
+ mval5 = _mm_loadu_si128((void*)((char*)p + size - 64));
+ mval6 = _mm_loadu_si128((void*)((char*)p + size - 48));
+ mval7 = _mm_loadu_si128((void*)((char*)p + size - 32));
+ mval8 = _mm_loadu_si128((void*)((char*)p + size - 16));
+
+ // XOR with seed.
+ mval ^= mseed;
+ mval2 ^= mseed2;
+ mval3 ^= mseed3;
+ mval4 ^= mseed4;
+ mval5 ^= mseed5;
+ mval6 ^= mseed6;
+ mval7 ^= mseed7;
+ mval8 ^= mseed8;
+
+ // Scramble 3 times.
+ mval = _mm_aesenc_si128(mval, mval);
+ mval2 = _mm_aesenc_si128(mval2, mval2);
+ mval3 = _mm_aesenc_si128(mval3, mval3);
+ mval4 = _mm_aesenc_si128(mval4, mval4);
+ mval5 = _mm_aesenc_si128(mval5, mval5);
+ mval6 = _mm_aesenc_si128(mval6, mval6);
+ mval7 = _mm_aesenc_si128(mval7, mval7);
+ mval8 = _mm_aesenc_si128(mval8, mval8);
+
+ mval = _mm_aesenc_si128(mval, mval);
+ mval2 = _mm_aesenc_si128(mval2, mval2);
+ mval3 = _mm_aesenc_si128(mval3, mval3);
+ mval4 = _mm_aesenc_si128(mval4, mval4);
+ mval5 = _mm_aesenc_si128(mval5, mval5);
+ mval6 = _mm_aesenc_si128(mval6, mval6);
+ mval7 = _mm_aesenc_si128(mval7, mval7);
+ mval8 = _mm_aesenc_si128(mval8, mval8);
+
+ mval = _mm_aesenc_si128(mval, mval);
+ mval2 = _mm_aesenc_si128(mval2, mval2);
+ mval3 = _mm_aesenc_si128(mval3, mval3);
+ mval4 = _mm_aesenc_si128(mval4, mval4);
+ mval5 = _mm_aesenc_si128(mval5, mval5);
+ mval6 = _mm_aesenc_si128(mval6, mval6);
+ mval7 = _mm_aesenc_si128(mval7, mval7);
+ mval8 = _mm_aesenc_si128(mval8, mval8);
+
+ // Combine results.
+ mval ^= mval5;
+ mval2 ^= mval6;
+ mval3 ^= mval7;
+ mval4 ^= mval8;
+ mval ^= mval3;
+ mval2 ^= mval4;
+ mval ^= mval2;
+ return _mm_cvtsi128_si64(mval);
+ } else {
+ // Make 7 more starting seeds.
+ mseed3 = mseed2;
+ mseed4 = mseed2;
+ mseed5 = mseed2;
+ mseed6 = mseed2;
+ mseed7 = mseed2;
+ mseed8 = mseed2;
+ mseed2 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 16));
+ mseed3 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 32));
+ mseed4 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 48));
+ mseed5 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 64));
+ mseed6 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 80));
+ mseed7 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 96));
+ mseed8 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 112));
+ mseed2 = _mm_aesenc_si128(mseed2, mseed2);
+ mseed3 = _mm_aesenc_si128(mseed3, mseed3);
+ mseed4 = _mm_aesenc_si128(mseed4, mseed4);
+ mseed5 = _mm_aesenc_si128(mseed5, mseed5);
+ mseed6 = _mm_aesenc_si128(mseed6, mseed6);
+ mseed7 = _mm_aesenc_si128(mseed7, mseed7);
+ mseed8 = _mm_aesenc_si128(mseed8, mseed8);
+
+ // Start with last (possibly overlapping) block.
+ mval = _mm_loadu_si128((void*)((char*)p + size - 128));
+ mval2 = _mm_loadu_si128((void*)((char*)p + size - 112));
+ mval3 = _mm_loadu_si128((void*)((char*)p + size - 96));
+ mval4 = _mm_loadu_si128((void*)((char*)p + size - 80));
+ mval5 = _mm_loadu_si128((void*)((char*)p + size - 64));
+ mval6 = _mm_loadu_si128((void*)((char*)p + size - 48));
+ mval7 = _mm_loadu_si128((void*)((char*)p + size - 32));
+ mval8 = _mm_loadu_si128((void*)((char*)p + size - 16));
+
+ // XOR in seed.
+ mval ^= mseed;
+ mval2 ^= mseed2;
+ mval3 ^= mseed3;
+ mval4 ^= mseed4;
+ mval5 ^= mseed5;
+ mval6 ^= mseed6;
+ mval7 ^= mseed7;
+ mval8 ^= mseed8;
+
+ // Compute number of remaining 128-byte blocks.
+ size--;
+ size >>= 7;
+ do {
+ // Scramble state.
+ mval = _mm_aesenc_si128(mval, mval);
+ mval2 = _mm_aesenc_si128(mval2, mval2);
+ mval3 = _mm_aesenc_si128(mval3, mval3);
+ mval4 = _mm_aesenc_si128(mval4, mval4);
+ mval5 = _mm_aesenc_si128(mval5, mval5);
+ mval6 = _mm_aesenc_si128(mval6, mval6);
+ mval7 = _mm_aesenc_si128(mval7, mval7);
+ mval8 = _mm_aesenc_si128(mval8, mval8);
+
+ // Scramble state, XOR in a block.
+ mval = _mm_aesenc_si128(mval, _mm_loadu_si128(p));
+ mval2 = _mm_aesenc_si128(mval2, _mm_loadu_si128((void*)((char*)p + 16)));
+ mval3 = _mm_aesenc_si128(mval3, _mm_loadu_si128((void*)((char*)p + 32)));
+ mval4 = _mm_aesenc_si128(mval4, _mm_loadu_si128((void*)((char*)p + 48)));
+ mval5 = _mm_aesenc_si128(mval5, _mm_loadu_si128((void*)((char*)p + 64)));
+ mval6 = _mm_aesenc_si128(mval6, _mm_loadu_si128((void*)((char*)p + 80)));
+ mval7 = _mm_aesenc_si128(mval7, _mm_loadu_si128((void*)((char*)p + 96)));
+ mval8 = _mm_aesenc_si128(mval8, _mm_loadu_si128((void*)((char*)p + 112)));
+
+ p = (void*)((char*)p + 128);
+ } while (--size > 0);
+
+ // 3 more scrambles to finish.
+ mval = _mm_aesenc_si128(mval, mval);
+ mval2 = _mm_aesenc_si128(mval2, mval2);
+ mval3 = _mm_aesenc_si128(mval3, mval3);
+ mval4 = _mm_aesenc_si128(mval4, mval4);
+ mval5 = _mm_aesenc_si128(mval5, mval5);
+ mval6 = _mm_aesenc_si128(mval6, mval6);
+ mval7 = _mm_aesenc_si128(mval7, mval7);
+ mval8 = _mm_aesenc_si128(mval8, mval8);
+ mval = _mm_aesenc_si128(mval, mval);
+ mval2 = _mm_aesenc_si128(mval2, mval2);
+ mval3 = _mm_aesenc_si128(mval3, mval3);
+ mval4 = _mm_aesenc_si128(mval4, mval4);
+ mval5 = _mm_aesenc_si128(mval5, mval5);
+ mval6 = _mm_aesenc_si128(mval6, mval6);
+ mval7 = _mm_aesenc_si128(mval7, mval7);
+ mval8 = _mm_aesenc_si128(mval8, mval8);
+ mval = _mm_aesenc_si128(mval, mval);
+ mval2 = _mm_aesenc_si128(mval2, mval2);
+ mval3 = _mm_aesenc_si128(mval3, mval3);
+ mval4 = _mm_aesenc_si128(mval4, mval4);
+ mval5 = _mm_aesenc_si128(mval5, mval5);
+ mval6 = _mm_aesenc_si128(mval6, mval6);
+ mval7 = _mm_aesenc_si128(mval7, mval7);
+ mval8 = _mm_aesenc_si128(mval8, mval8);
+
+ mval ^= mval5;
+ mval2 ^= mval6;
+ mval3 ^= mval7;
+ mval4 ^= mval8;
+ mval ^= mval3;
+ mval2 ^= mval4;
+ mval ^= mval2;
+ return _mm_cvtsi128_si64(mval);
+ }
+}
+
+#else // !defined(__x86_64__)
+
+// The 32-bit version of aeshashbody.
+
+uintptr aeshashbody(void* p, uintptr seed, uintptr size, Slice aeskeysched) {
+ __m128i mseed, mseed2, mseed3, mseed4;
+ __m128i mval, mval2, mval3, mval4;
+
+ // Start with hash seed.
+ mseed = _mm_cvtsi32_si128(seed);
+ // Get 16 bits of length.
+ mseed = _mm_insert_epi16(mseed, size, 4);
+ // Replace size with its low 2 bytes repeated 4 times.
+ mseed = _mm_shufflehi_epi16(mseed, 0);
+ // Save unscrambled seed.
+ mseed2 = mseed;
+ // XOR in per-process seed.
+ mseed ^= _mm_loadu_si128(aeskeysched.__values);
+ // Scramble seed.
+ mseed = _mm_aesenc_si128(mseed, mseed);
+
+ if (size <= 16) {
+ if (size == 0) {
+ // Return scrambled input seed.
+ return _mm_cvtsi128_si32(_mm_aesenc_si128(mseed, mseed));
+ } else if (size < 16) {
+ if ((((uintptr)(p) + 16) & 0xff0) != 0) {
+ static const uint64 masks[32]
+ __attribute__ ((aligned(16))) =
+ {
+ 0x0000000000000000, 0x0000000000000000,
+ 0x00000000000000ff, 0x0000000000000000,
+ 0x000000000000ffff, 0x0000000000000000,
+ 0x0000000000ffffff, 0x0000000000000000,
+ 0x00000000ffffffff, 0x0000000000000000,
+ 0x000000ffffffffff, 0x0000000000000000,
+ 0x0000ffffffffffff, 0x0000000000000000,
+ 0x00ffffffffffffff, 0x0000000000000000,
+ 0xffffffffffffffff, 0x0000000000000000,
+ 0xffffffffffffffff, 0x00000000000000ff,
+ 0xffffffffffffffff, 0x000000000000ffff,
+ 0xffffffffffffffff, 0x0000000000ffffff,
+ 0xffffffffffffffff, 0x00000000ffffffff,
+ 0xffffffffffffffff, 0x000000ffffffffff,
+ 0xffffffffffffffff, 0x0000ffffffffffff,
+ 0xffffffffffffffff, 0x00ffffffffffffff
+ };
+
+ // 16 bytes loaded at p won't cross a page
+ // boundary, so we can load it directly.
+ mval = _mm_loadu_si128(p);
+ mval &= *(const __m128i*)(&masks[size*2]);
+ } else {
+ static const uint64 shifts[32]
+ __attribute__ ((aligned(16))) =
+ {
+ 0x0000000000000000, 0x0000000000000000,
+ 0xffffffffffffff0f, 0xffffffffffffffff,
+ 0xffffffffffff0f0e, 0xffffffffffffffff,
+ 0xffffffffff0f0e0d, 0xffffffffffffffff,
+ 0xffffffff0f0e0d0c, 0xffffffffffffffff,
+ 0xffffff0f0e0d0c0b, 0xffffffffffffffff,
+ 0xffff0f0e0d0c0b0a, 0xffffffffffffffff,
+ 0xff0f0e0d0c0b0a09, 0xffffffffffffffff,
+ 0x0f0e0d0c0b0a0908, 0xffffffffffffffff,
+ 0x0e0d0c0b0a090807, 0xffffffffffffff0f,
+ 0x0d0c0b0a09080706, 0xffffffffffff0f0e,
+ 0x0c0b0a0908070605, 0xffffffffff0f0e0d,
+ 0x0b0a090807060504, 0xffffffff0f0e0d0c,
+ 0x0a09080706050403, 0xffffff0f0e0d0c0b,
+ 0x0908070605040302, 0xffff0f0e0d0c0b0a,
+ 0x0807060504030201, 0xff0f0e0d0c0b0a09,
+ };
+
+ // address ends in 1111xxxx. Might be
+ // up against a page boundary, so load
+ // ending at last byte. Then shift
+ // bytes down using pshufb.
+ mval = _mm_loadu_si128((void*)((char*)p - 16 + size));
+ mval = _mm_shuffle_epi8(mval, *(const __m128i*)(&shifts[size*2]));
+ }
+ } else {
+ mval = _mm_loadu_si128(p);
+ }
+
+ // Scramble input, XOR in seed.
+ mval = _mm_aesenc_si128(mval, mseed);
+ mval = _mm_aesenc_si128(mval, mval);
+ mval = _mm_aesenc_si128(mval, mval);
+ return _mm_cvtsi128_si32(mval);
+ } else if (size <= 32) {
+ // Make second starting seed.
+ mseed2 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 16));
+ mseed2 = _mm_aesenc_si128(mseed2, mseed2);
+ // Load data to be hashed.
+ mval = _mm_loadu_si128(p);
+ mval2 = _mm_loadu_si128((void*)((char*)p + size - 16));
+
+ // Scramble 3 times.
+ mval = _mm_aesenc_si128(mval, mseed);
+ mval2 = _mm_aesenc_si128(mval2, mseed2);
+ mval = _mm_aesenc_si128(mval, mval);
+ mval2 = _mm_aesenc_si128(mval2, mval2);
+ mval = _mm_aesenc_si128(mval, mval);
+ mval2 = _mm_aesenc_si128(mval2, mval2);
+
+ // Combine results.
+ mval ^= mval2;
+ return _mm_cvtsi128_si32(mval);
+ } else if (size <= 64) {
+ // Make 3 more starting seeds.
+ mseed3 = mseed2;
+ mseed4 = mseed2;
+ mseed2 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 16));
+ mseed3 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 32));
+ mseed4 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 48));
+ mseed2 = _mm_aesenc_si128(mseed2, mseed2);
+ mseed3 = _mm_aesenc_si128(mseed3, mseed3);
+ mseed4 = _mm_aesenc_si128(mseed4, mseed4);
+
+ mval = _mm_loadu_si128(p);
+ mval2 = _mm_loadu_si128((void*)((char*)p + 16));
+ mval3 = _mm_loadu_si128((void*)((char*)p + size - 32));
+ mval4 = _mm_loadu_si128((void*)((char*)p + size - 16));
+
+ mval = _mm_aesenc_si128(mval, mseed);
+ mval2 = _mm_aesenc_si128(mval2, mseed2);
+ mval3 = _mm_aesenc_si128(mval3, mseed3);
+ mval4 = _mm_aesenc_si128(mval4, mseed4);
+
+ mval = _mm_aesenc_si128(mval, mval);
+ mval2 = _mm_aesenc_si128(mval2, mval2);
+ mval3 = _mm_aesenc_si128(mval3, mval3);
+ mval4 = _mm_aesenc_si128(mval4, mval4);
+
+ mval = _mm_aesenc_si128(mval, mval);
+ mval2 = _mm_aesenc_si128(mval2, mval2);
+ mval3 = _mm_aesenc_si128(mval3, mval3);
+ mval4 = _mm_aesenc_si128(mval4, mval4);
+
+ mval ^= mval3;
+ mval2 ^= mval4;
+ mval ^= mval2;
+ return _mm_cvtsi128_si32(mval);
+ } else {
+ // Make 3 more starting seeds.
+ mseed3 = mseed2;
+ mseed4 = mseed2;
+ mseed2 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 16));
+ mseed3 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 32));
+ mseed4 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 48));
+ mseed2 = _mm_aesenc_si128(mseed2, mseed2);
+ mseed3 = _mm_aesenc_si128(mseed3, mseed3);
+ mseed4 = _mm_aesenc_si128(mseed4, mseed4);
+
+ // Start with last (possibly overlapping) block.
+ mval = _mm_loadu_si128((void*)((char*)p + size - 64));
+ mval2 = _mm_loadu_si128((void*)((char*)p + size - 48));
+ mval3 = _mm_loadu_si128((void*)((char*)p + size - 32));
+ mval4 = _mm_loadu_si128((void*)((char*)p + size - 16));
+
+ // Scramble state once.
+ mval = _mm_aesenc_si128(mval, mseed);
+ mval2 = _mm_aesenc_si128(mval2, mseed2);
+ mval3 = _mm_aesenc_si128(mval3, mseed3);
+ mval4 = _mm_aesenc_si128(mval4, mseed4);
+
+ // Compute number of remaining 64-byte blocks.
+ size--;
+ size >>= 6;
+ do {
+ // Scramble state, XOR in a block.
+ mval = _mm_aesenc_si128(mval, _mm_loadu_si128(p));
+ mval2 = _mm_aesenc_si128(mval2, _mm_loadu_si128((void*)((char*)p + 16)));
+ mval3 = _mm_aesenc_si128(mval3, _mm_loadu_si128((void*)((char*)p + 32)));
+ mval4 = _mm_aesenc_si128(mval4, _mm_loadu_si128((void*)((char*)p + 48)));
+
+ // Scramble state.
+ mval = _mm_aesenc_si128(mval, mval);
+ mval2 = _mm_aesenc_si128(mval2, mval2);
+ mval3 = _mm_aesenc_si128(mval3, mval3);
+ mval4 = _mm_aesenc_si128(mval4, mval4);
+
+ p = (void*)((char*)p + 64);
+ } while (--size > 0);
+
+ // 2 more scrambles to finish.
+ mval = _mm_aesenc_si128(mval, mval);
+ mval2 = _mm_aesenc_si128(mval2, mval2);
+ mval3 = _mm_aesenc_si128(mval3, mval3);
+ mval4 = _mm_aesenc_si128(mval4, mval4);
+
+ mval = _mm_aesenc_si128(mval, mval);
+ mval2 = _mm_aesenc_si128(mval2, mval2);
+ mval3 = _mm_aesenc_si128(mval3, mval3);
+ mval4 = _mm_aesenc_si128(mval4, mval4);
+
+ mval ^= mval3;
+ mval2 ^= mval4;
+ mval ^= mval2;
+ return _mm_cvtsi128_si32(mval);
+ }
+}
+
+#endif // !defined(__x86_64__)
+
+#else // !defined(__i386__) && !defined(__x86_64__)
+
+uintptr aeshashbody(void* p, uintptr seed, uintptr size, Slice aeskeysched) {
+ // We should never get here on a non-x86 system.
+ runtime_throw("impossible call to aeshashbody");
+}
+
+#endif // !defined(__i386__) && !defined(__x86_64__)
diff --git a/libgo/runtime/go-libmain.c b/libgo/runtime/go-libmain.c
index 6884f3a..c62ad93 100644
--- a/libgo/runtime/go-libmain.c
+++ b/libgo/runtime/go-libmain.c
@@ -61,6 +61,7 @@ initfn (int argc, char **argv, char** env __attribute__ ((unused)))
runtime_isarchive = true;
+ runtime_cpuinit ();
runtime_initsig(true);
a = (struct args *) malloc (sizeof *a);
diff --git a/libgo/runtime/go-main.c b/libgo/runtime/go-main.c
index ff2958c..622a77d 100644
--- a/libgo/runtime/go-main.c
+++ b/libgo/runtime/go-main.c
@@ -47,6 +47,7 @@ main (int argc, char **argv)
runtime_isstarted = true;
__go_end = (uintptr)_end;
+ runtime_cpuinit ();
runtime_check ();
runtime_args (argc, (byte **) argv);
runtime_osinit ();
diff --git a/libgo/runtime/go-type-identity.c b/libgo/runtime/go-type-identity.c
index d58aa75..842fa24 100644
--- a/libgo/runtime/go-type-identity.c
+++ b/libgo/runtime/go-type-identity.c
@@ -9,44 +9,14 @@
#include "runtime.h"
#include "go-type.h"
-/* An identity hash function for a type. This is used for types where
- we can simply use the type value itself as a hash code. This is
- true of, e.g., integers and pointers. */
+/* The hash functions for types that can compare as identity is
+ written in Go. */
-uintptr_t
-__go_type_hash_identity (const void *key, uintptr_t seed, uintptr_t key_size)
-{
- uintptr_t ret;
- uintptr_t i;
- const unsigned char *p;
-
- if (key_size <= 8)
- {
- union
- {
- uint64 v;
- unsigned char a[8];
- } u;
- u.v = 0;
-#ifdef WORDS_BIGENDIAN
- __builtin_memcpy (&u.a[8 - key_size], key, key_size);
-#else
- __builtin_memcpy (&u.a[0], key, key_size);
-#endif
- if (sizeof (uintptr_t) >= 8)
- return (uintptr_t) u.v ^ seed;
- else
- return (uintptr_t) ((u.v >> 32) ^ (u.v & 0xffffffff)) ^ seed;
- }
-
- ret = seed;
- for (i = 0, p = (const unsigned char *) key; i < key_size; i++, p++)
- ret = ret * 33 + *p;
- return ret;
-}
+extern uintptr runtime_memhash(void *, uintptr, uintptr)
+ __asm__ (GOSYM_PREFIX "runtime.memhash");
const FuncVal __go_type_hash_identity_descriptor =
- { (void *) __go_type_hash_identity };
+ { (void *) runtime_memhash };
/* An identity equality function for a type. This is used for types
where we can check for equality by checking that the values have
diff --git a/libgo/runtime/go-type.h b/libgo/runtime/go-type.h
index 7c3149b..2d5965c 100644
--- a/libgo/runtime/go-type.h
+++ b/libgo/runtime/go-type.h
@@ -362,7 +362,6 @@ extern _Bool
__go_type_descriptors_equal(const struct __go_type_descriptor*,
const struct __go_type_descriptor*);
-extern uintptr_t __go_type_hash_identity (const void *, uintptr_t, uintptr_t);
extern const FuncVal __go_type_hash_identity_descriptor;
extern _Bool __go_type_equal_identity (const void *, const void *, uintptr_t);
extern const FuncVal __go_type_equal_identity_descriptor;
diff --git a/libgo/runtime/proc.c b/libgo/runtime/proc.c
index dd5562b..be7e083 100644
--- a/libgo/runtime/proc.c
+++ b/libgo/runtime/proc.c
@@ -455,7 +455,8 @@ runtime_schedinit(void)
// runtime_symtabinit();
runtime_mallocinit();
mcommoninit(m);
-
+ runtime_alginit(); // maps must not be used before this call
+
// Initialize the itable value for newErrorCString,
// so that the next time it gets called, possibly
// in a fault during a garbage collection, it will not
diff --git a/libgo/runtime/runtime.h b/libgo/runtime/runtime.h
index f793fea..424b429 100644
--- a/libgo/runtime/runtime.h
+++ b/libgo/runtime/runtime.h
@@ -265,6 +265,8 @@ struct __go_func_type;
void runtime_args(int32, byte**)
__asm__ (GOSYM_PREFIX "runtime.args");
void runtime_osinit();
+void runtime_alginit(void)
+ __asm__ (GOSYM_PREFIX "runtime.alginit");
void runtime_goargs(void)
__asm__ (GOSYM_PREFIX "runtime.goargs");
void runtime_goenvs(void);
@@ -592,3 +594,7 @@ extern void *getitab(const struct __go_type_descriptor *,
const struct __go_type_descriptor *,
_Bool)
__asm__ (GOSYM_PREFIX "runtime.getitab");
+
+extern void runtime_cpuinit(void);
+extern void setCpuidECX(uint32)
+ __asm__ (GOSYM_PREFIX "runtime.setCpuidECX");
diff --git a/libgo/runtime/runtime_c.c b/libgo/runtime/runtime_c.c
index 16be089..3387401 100644
--- a/libgo/runtime/runtime_c.c
+++ b/libgo/runtime/runtime_c.c
@@ -6,6 +6,10 @@
#include <signal.h>
#include <unistd.h>
+#if defined(__i386__) || defined(__x86_64__)
+#include <cpuid.h>
+#endif
+
#include "config.h"
#include "runtime.h"
@@ -204,3 +208,18 @@ go_errno()
{
return (intgo)errno;
}
+
+// CPU-specific initialization.
+// Fetch CPUID info on x86.
+
+void
+runtime_cpuinit()
+{
+#if defined(__i386__) || defined(__x86_64__)
+ unsigned int eax, ebx, ecx, edx;
+
+ if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) {
+ setCpuidECX(ecx);
+ }
+#endif
+}