From b7d7c92f24721a10f5b780bb6927c863cc5a45c6 Mon Sep 17 00:00:00 2001
From: Ian Lance Taylor <ian@gcc.gnu.org>
Date: Tue, 7 Aug 2018 17:29:50 +0000
Subject: runtime: use poll rather than pollset for netpoll on AIX

    Updates golang/go#26634

    Reviewed-on: https://go-review.googlesource.com/126857

From-SVN: r263364
---
 libgo/go/runtime/netpoll.go     |   4 +-
 libgo/go/runtime/netpoll_aix.go | 200 +++++++++++++++++++---------------------
 2 files changed, 95 insertions(+), 109 deletions(-)

(limited to 'libgo/go/runtime')

diff --git a/libgo/go/runtime/netpoll.go b/libgo/go/runtime/netpoll.go
index e9bbfec..3aeb1f6 100644
--- a/libgo/go/runtime/netpoll.go
+++ b/libgo/go/runtime/netpoll.go
@@ -169,8 +169,8 @@ func poll_runtime_pollWait(pd *pollDesc, mode int) int {
 	if err != 0 {
 		return err
 	}
-	// As for now only Solaris uses level-triggered IO.
-	if GOOS == "solaris" {
+	// As for now only Solaris and AIX use level-triggered IO.
+	if GOOS == "solaris" || GOOS == "aix" {
 		netpollarm(pd, mode)
 	}
 	for !netpollblock(pd, int32(mode), false) {
diff --git a/libgo/go/runtime/netpoll_aix.go b/libgo/go/runtime/netpoll_aix.go
index da59f93..cbeb8c9 100644
--- a/libgo/go/runtime/netpoll_aix.go
+++ b/libgo/go/runtime/netpoll_aix.go
@@ -7,9 +7,7 @@ package runtime
 import "unsafe"
 
 // This is based on the former libgo/runtime/netpoll_select.c implementation
-// except that it uses AIX pollset_poll instead of select and is written in Go.
-
-type pollset_t int32
+// except that it uses poll instead of select and is written in Go.
 
 type pollfd struct {
 	fd      int32
@@ -22,25 +20,9 @@ const _POLLOUT = 0x0002
 const _POLLHUP = 0x2000
 const _POLLERR = 0x4000
 
-type poll_ctl struct {
-	cmd    int16
-	events int16
-	fd     int32
-}
-
-const _PS_ADD = 0x0
-const _PS_DELETE = 0x2
-
-//extern pollset_create
-func pollset_create(maxfd int32) pollset_t
-
-//go:noescape
-//extern pollset_ctl
-func pollset_ctl(ps pollset_t, pollctl_array *poll_ctl, array_length int32) int32
-
 //go:noescape
-//extern pollset_poll
-func pollset_poll(ps pollset_t, polldata_array *pollfd, array_length int32, timeout int32) int32
+//extern poll
+func libc_poll(pfds *pollfd, npfds uintptr, timeout uintptr) int32
 
 //go:noescape
 //extern pipe
@@ -55,9 +37,10 @@ func fcntl(fd, cmd int32, arg uintptr) uintptr {
 }
 
 var (
-	ps          pollset_t = -1
-	mpfds       map[int32]*pollDesc
-	pmtx        mutex
+	pfds        []pollfd
+	pds         []*pollDesc
+	mtxpoll     mutex
+	mtxset      mutex
 	rdwake      int32
 	wrwake      int32
 	needsUpdate bool
@@ -66,13 +49,7 @@ var (
 func netpollinit() {
 	var p [2]int32
 
-	if ps = pollset_create(-1); ps < 0 {
-		throw("runtime: netpollinit failed to create pollset")
-	}
-	// It is not possible to add or remove descriptors from
-	// the pollset while pollset_poll is active.
-	// We use a pipe to wakeup pollset_poll when the pollset
-	// needs to be updated.
+	// Create the pipe we use to wakeup poll.
 	if err := libc_pipe(&p[0]); err < 0 {
 		throw("runtime: netpollinit failed to create pipe")
 	}
@@ -84,127 +61,136 @@ func netpollinit() {
 	fcntl(rdwake, _F_SETFD, _FD_CLOEXEC)
 
 	fl = fcntl(wrwake, _F_GETFL, 0)
-	fcntl(wrwake, _F_SETFL, fl|_O_NONBLOCK)
 	fcntl(wrwake, _F_SETFD, _FD_CLOEXEC)
 
-	// Add the read side of the pipe to the pollset.
-	var pctl poll_ctl
-	pctl.cmd = _PS_ADD
-	pctl.fd = rdwake
-	pctl.events = _POLLIN
-	if pollset_ctl(ps, &pctl, 1) != 0 {
-		throw("runtime: netpollinit failed to register pipe")
-	}
+	// Pre-allocate array of pollfd structures for poll.
+	pfds = make([]pollfd, 1, 128)
+	// Poll the read side of the pipe.
+	pfds[0].fd = rdwake
+	pfds[0].events = _POLLIN
 
-	mpfds = make(map[int32]*pollDesc)
+	// Allocate index to pd array
+	pds = make([]*pollDesc, 1, 128)
+	pds[0] = nil
 }
 
 func netpolldescriptor() uintptr {
-	// ps is not a real file descriptor.
 	return ^uintptr(0)
 }
 
-func netpollopen(fd uintptr, pd *pollDesc) int32 {
-	// pollset_ctl will block if pollset_poll is active
-	// so wakeup pollset_poll first.
-	lock(&pmtx)
-	needsUpdate = true
-	unlock(&pmtx)
-	b := [1]byte{0}
-	write(uintptr(wrwake), unsafe.Pointer(&b[0]), 1)
-
-	var pctl poll_ctl
-	pctl.cmd = _PS_ADD
-	pctl.fd = int32(fd)
-	pctl.events = _POLLIN | _POLLOUT
-	if pollset_ctl(ps, &pctl, 1) != 0 {
-		return int32(errno())
+func netpollwakeup() {
+	if !needsUpdate {
+		needsUpdate = true
+		b := [1]byte{0}
+		write(uintptr(wrwake), unsafe.Pointer(&b[0]), 1)
 	}
-	lock(&pmtx)
-	mpfds[int32(fd)] = pd
-	needsUpdate = false
-	unlock(&pmtx)
+}
 
+func netpollopen(fd uintptr, pd *pollDesc) int32 {
+	lock(&mtxpoll)
+	netpollwakeup()
+
+	lock(&mtxset)
+	unlock(&mtxpoll)
+
+	pd.user = uint32(len(pfds))
+	var pfd pollfd
+	pfd.fd = int32(fd)
+	pfds = append(pfds, pfd)
+	pds = append(pds, pd)
+	unlock(&mtxset)
 	return 0
 }
 
 func netpollclose(fd uintptr) int32 {
-	// pollset_ctl will block if pollset_poll is active
-	// so wakeup pollset_poll first.
-	lock(&pmtx)
-	needsUpdate = true
-	unlock(&pmtx)
-	b := [1]byte{0}
-	write(uintptr(wrwake), unsafe.Pointer(&b[0]), 1)
-
-	var pctl poll_ctl
-	pctl.cmd = _PS_DELETE
-	pctl.fd = int32(fd)
-	if pollset_ctl(ps, &pctl, 1) != 0 {
-		return int32(errno())
-	}
-	lock(&pmtx)
-	delete(mpfds, int32(fd))
-	needsUpdate = false
-	unlock(&pmtx)
+	lock(&mtxpoll)
+	netpollwakeup()
+
+	lock(&mtxset)
+	unlock(&mtxpoll)
 
+	for i := 0; i < len(pfds); i++ {
+		if pfds[i].fd == int32(fd) {
+			pfds[i] = pfds[len(pfds)-1]
+			pfds = pfds[:len(pfds)-1]
+
+			pds[i] = pds[len(pds)-1]
+			pds[i].user = uint32(i)
+			pds = pds[:len(pds)-1]
+			break
+		}
+	}
+	unlock(&mtxset)
 	return 0
 }
 
 func netpollarm(pd *pollDesc, mode int) {
-	throw("runtime: unused")
+	lock(&mtxpoll)
+	netpollwakeup()
+
+	lock(&mtxset)
+	unlock(&mtxpoll)
+
+	switch mode {
+	case 'r':
+		pfds[pd.user].events |= _POLLIN
+	case 'w':
+		pfds[pd.user].events |= _POLLOUT
+	}
+	unlock(&mtxset)
 }
 
+//go:nowritebarrierrec
 func netpoll(block bool) *g {
-	if ps == -1 {
-		return nil
-	}
-	timeout := int32(-1)
+	timeout := ^uintptr(0)
 	if !block {
 		timeout = 0
+		return nil
 	}
-	var pfds [128]pollfd
 retry:
-	lock(&pmtx)
-	if needsUpdate {
-		unlock(&pmtx)
-		osyield()
-		goto retry
-	}
-	unlock(&pmtx)
-	nfound := pollset_poll(ps, &pfds[0], int32(len(pfds)), timeout)
-	if nfound < 0 {
+	lock(&mtxpoll)
+	lock(&mtxset)
+	needsUpdate = false
+	unlock(&mtxpoll)
+
+	n := libc_poll(&pfds[0], uintptr(len(pfds)), timeout)
+	if n < 0 {
 		e := errno()
 		if e != _EINTR {
-			throw("runtime: pollset_poll failed")
+			throw("runtime: poll failed")
+		}
+		unlock(&mtxset)
+		goto retry
+	}
+	// Check if some descriptors need to be changed
+	if n != 0 && pfds[0].revents&(_POLLIN|_POLLHUP|_POLLERR) != 0 {
+		var b [1]byte
+		for read(rdwake, unsafe.Pointer(&b[0]), 1) == 1 {
 		}
+		// Do not look at the other fds in this case as the mode may have changed
+		// XXX only additions of flags are made, so maybe it is ok
+		unlock(&mtxset)
 		goto retry
 	}
 	var gp guintptr
-	for i := int32(0); i < nfound; i++ {
+	for i := 0; i < len(pfds) && n > 0; i++ {
 		pfd := &pfds[i]
 
 		var mode int32
 		if pfd.revents&(_POLLIN|_POLLHUP|_POLLERR) != 0 {
-			if pfd.fd == rdwake {
-				var b [1]byte
-				read(pfd.fd, unsafe.Pointer(&b[0]), 1)
-				continue
-			}
 			mode += 'r'
+			pfd.events &= ^_POLLIN
 		}
 		if pfd.revents&(_POLLOUT|_POLLHUP|_POLLERR) != 0 {
 			mode += 'w'
+			pfd.events &= ^_POLLOUT
 		}
 		if mode != 0 {
-			lock(&pmtx)
-			pd := mpfds[pfd.fd]
-			unlock(&pmtx)
-			if pd != nil {
-				netpollready(&gp, pd, mode)
-			}
+			netpollready(&gp, pds[i], mode)
+			n--
 		}
 	}
+	unlock(&mtxset)
 	if block && gp == 0 {
 		goto retry
 	}
-- 
cgit v1.1


From 1d29bb0408506480e13afeca2ec6b8611e6a0ce7 Mon Sep 17 00:00:00 2001
From: Ian Lance Taylor <ian@gcc.gnu.org>
Date: Fri, 24 Aug 2018 18:15:04 +0000
Subject: runtime: remove the dummy arg of getcallersp

    This is a port of https://golang.org/cl/109596 to the gofrontend, in
    preparation for updating libgo to 1.11.

    Original CL description:

        getcallersp is intrinsified, and so the dummy arg is no longer
        needed. Remove it, as well as a few dummy args that are solely
        to feed getcallersp.

    Reviewed-on: https://go-review.googlesource.com/131116

From-SVN: r263840
---
 libgo/go/runtime/cgo_gccgo.go   | 10 +++++-----
 libgo/go/runtime/lock_futex.go  |  4 ++--
 libgo/go/runtime/lock_sema.go   |  4 ++--
 libgo/go/runtime/malloc.go      |  6 +++---
 libgo/go/runtime/norace_test.go |  4 ++--
 libgo/go/runtime/proc.go        |  8 ++++----
 libgo/go/runtime/stubs.go       | 16 +++++++---------
 7 files changed, 25 insertions(+), 27 deletions(-)

(limited to 'libgo/go/runtime')

diff --git a/libgo/go/runtime/cgo_gccgo.go b/libgo/go/runtime/cgo_gccgo.go
index 05be496..e689b0e 100644
--- a/libgo/go/runtime/cgo_gccgo.go
+++ b/libgo/go/runtime/cgo_gccgo.go
@@ -47,7 +47,7 @@ func Cgocall() {
 	mp := getg().m
 	mp.ncgocall++
 	mp.ncgo++
-	entersyscall(0)
+	entersyscall()
 	mp.incgo = true
 }
 
@@ -63,7 +63,7 @@ func CgocallDone() {
 	// If we are invoked because the C function called _cgo_panic,
 	// then _cgo_panic will already have exited syscall mode.
 	if readgstatus(gp)&^_Gscan == _Gsyscall {
-		exitsyscall(0)
+		exitsyscall()
 	}
 }
 
@@ -84,7 +84,7 @@ func CgocallBack() {
 
 	lockOSThread()
 
-	exitsyscall(0)
+	exitsyscall()
 	gp.m.incgo = false
 
 	if gp.m.ncgo == 0 {
@@ -134,7 +134,7 @@ func CgocallBackDone() {
 	}
 
 	gp.m.incgo = true
-	entersyscall(0)
+	entersyscall()
 
 	if drop {
 		mp.dropextram = false
@@ -144,7 +144,7 @@ func CgocallBackDone() {
 
 // _cgo_panic may be called by SWIG code to panic.
 func _cgo_panic(p *byte) {
-	exitsyscall(0)
+	exitsyscall()
 	panic(gostringnocopy(p))
 }
 
diff --git a/libgo/go/runtime/lock_futex.go b/libgo/go/runtime/lock_futex.go
index 7ddd378..b2c9ccb 100644
--- a/libgo/go/runtime/lock_futex.go
+++ b/libgo/go/runtime/lock_futex.go
@@ -236,8 +236,8 @@ func notetsleepg(n *note, ns int64) bool {
 		throw("notetsleepg on g0")
 	}
 
-	entersyscallblock(0)
+	entersyscallblock()
 	ok := notetsleep_internal(n, ns)
-	exitsyscall(0)
+	exitsyscall()
 	return ok
 }
diff --git a/libgo/go/runtime/lock_sema.go b/libgo/go/runtime/lock_sema.go
index d000b11..b5cce6a 100644
--- a/libgo/go/runtime/lock_sema.go
+++ b/libgo/go/runtime/lock_sema.go
@@ -289,8 +289,8 @@ func notetsleepg(n *note, ns int64) bool {
 		throw("notetsleepg on g0")
 	}
 	semacreate(gp.m)
-	entersyscallblock(0)
+	entersyscallblock()
 	ok := notetsleep_internal(n, ns, nil, 0)
-	exitsyscall(0)
+	exitsyscall()
 	return ok
 }
diff --git a/libgo/go/runtime/malloc.go b/libgo/go/runtime/malloc.go
index c8d5284..523989e 100644
--- a/libgo/go/runtime/malloc.go
+++ b/libgo/go/runtime/malloc.go
@@ -621,7 +621,7 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
 	// callback.
 	incallback := false
 	if gomcache() == nil && getg().m.ncgo > 0 {
-		exitsyscall(0)
+		exitsyscall()
 		incallback = true
 	}
 
@@ -709,7 +709,7 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
 				mp.mallocing = 0
 				releasem(mp)
 				if incallback {
-					entersyscall(0)
+					entersyscall()
 				}
 				return x
 			}
@@ -835,7 +835,7 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
 	}
 
 	if incallback {
-		entersyscall(0)
+		entersyscall()
 	}
 
 	return x
diff --git a/libgo/go/runtime/norace_test.go b/libgo/go/runtime/norace_test.go
index e9b39b2..e90128b 100644
--- a/libgo/go/runtime/norace_test.go
+++ b/libgo/go/runtime/norace_test.go
@@ -34,12 +34,12 @@ func benchmarkSyscall(b *testing.B, work, excess int) {
 	b.RunParallel(func(pb *testing.PB) {
 		foo := 42
 		for pb.Next() {
-			runtime.Entersyscall(0)
+			runtime.Entersyscall()
 			for i := 0; i < work; i++ {
 				foo *= 2
 				foo /= 2
 			}
-			runtime.Exitsyscall(0)
+			runtime.Exitsyscall()
 		}
 		_ = foo
 	})
diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go
index 5826958..4c217cc 100644
--- a/libgo/go/runtime/proc.go
+++ b/libgo/go/runtime/proc.go
@@ -1168,7 +1168,7 @@ func kickoff() {
 	goexit1()
 }
 
-func mstart1(dummy int32) {
+func mstart1() {
 	_g_ := getg()
 
 	if _g_ != _g_.m.g0 {
@@ -2774,7 +2774,7 @@ func entersyscallblock_handoff() {
 //
 //go:nosplit
 //go:nowritebarrierrec
-func exitsyscall(dummy int32) {
+func exitsyscall() {
 	_g_ := getg()
 
 	_g_.m.locks++ // see comment in entersyscall
@@ -2984,13 +2984,13 @@ func exitsyscallclear(gp *g) {
 //go:linkname syscall_entersyscall syscall.Entersyscall
 //go:nosplit
 func syscall_entersyscall() {
-	entersyscall(0)
+	entersyscall()
 }
 
 //go:linkname syscall_exitsyscall syscall.Exitsyscall
 //go:nosplit
 func syscall_exitsyscall() {
-	exitsyscall(0)
+	exitsyscall()
 }
 
 func beforefork() {
diff --git a/libgo/go/runtime/stubs.go b/libgo/go/runtime/stubs.go
index bda2c69..1d21445 100644
--- a/libgo/go/runtime/stubs.go
+++ b/libgo/go/runtime/stubs.go
@@ -199,16 +199,14 @@ func publicationBarrier()
 // getcallerpc returns the program counter (PC) of its caller's caller.
 // getcallersp returns the stack pointer (SP) of its caller's caller.
 // argp must be a pointer to the caller's first function argument.
-// The implementation may or may not use argp, depending on
-// the architecture. The implementation may be a compiler
-// intrinsic; there is not necessarily code implementing this
-// on every platform.
+// The implementation may be a compiler intrinsic; there is not
+// necessarily code implementing this on every platform.
 //
 // For example:
 //
 //	func f(arg1, arg2, arg3 int) {
 //		pc := getcallerpc()
-//		sp := getcallersp(unsafe.Pointer(&arg1))
+//		sp := getcallersp()
 //	}
 //
 // These two lines find the PC and SP immediately following
@@ -230,7 +228,7 @@ func publicationBarrier()
 func getcallerpc() uintptr
 
 //go:noescape
-func getcallersp(argp unsafe.Pointer) uintptr
+func getcallersp() uintptr
 
 func asmcgocall(fn, arg unsafe.Pointer) int32 {
 	throw("asmcgocall")
@@ -309,9 +307,9 @@ func setSupportAES(v bool) {
 // Here for gccgo.
 func errno() int
 
-// Temporary for gccgo until we port proc.go.
-func entersyscall(int32)
-func entersyscallblock(int32)
+// For gccgo these are written in C.
+func entersyscall()
+func entersyscallblock()
 
 // For gccgo to call from C code, so that the C code and the Go code
 // can share the memstats variable for now.
-- 
cgit v1.1


From 347462bfeecb8db177df72dc5b3c6f2eb263c452 Mon Sep 17 00:00:00 2001
From: Ian Lance Taylor <ian@gcc.gnu.org>
Date: Wed, 29 Aug 2018 00:20:25 +0000
Subject: compiler, runtime: remove hmap field from maptypes

    This is the gofrontend version of https://golang.org/cl/91796.

    This is part of that CL, just the compiler change and required runtime
    changes, in preparation for updating libgo to 1.11.

    Relevant part of original CL description:

        The hmap field in the maptype is only used by the runtime to check the sizes of
        the hmap structure created by the compiler and runtime agree.

        Comments are already present about the hmap structure definitions in the
        compiler and runtime needing to be in sync.

    Reviewed-on: https://go-review.googlesource.com/130976

From-SVN: r263941
---
 libgo/go/runtime/hashmap.go | 14 +-------------
 libgo/go/runtime/type.go    |  1 -
 2 files changed, 1 insertion(+), 14 deletions(-)

(limited to 'libgo/go/runtime')

diff --git a/libgo/go/runtime/hashmap.go b/libgo/go/runtime/hashmap.go
index aba9abd..53b05b1 100644
--- a/libgo/go/runtime/hashmap.go
+++ b/libgo/go/runtime/hashmap.go
@@ -311,20 +311,13 @@ func makemap_small() *hmap {
 // If h != nil, the map can be created directly in h.
 // If h.buckets != nil, bucket pointed to can be used as the first bucket.
 func makemap(t *maptype, hint int, h *hmap) *hmap {
-	// The size of hmap should be 48 bytes on 64 bit
-	// and 28 bytes on 32 bit platforms.
-	if sz := unsafe.Sizeof(hmap{}); sz != 8+5*sys.PtrSize {
-		println("runtime: sizeof(hmap) =", sz, ", t.hmap.size =", t.hmap.size)
-		throw("bad hmap size")
-	}
-
 	if hint < 0 || hint > int(maxSliceCap(t.bucket.size)) {
 		hint = 0
 	}
 
 	// initialize Hmap
 	if h == nil {
-		h = (*hmap)(newobject(t.hmap))
+		h = new(hmap)
 	}
 	h.hash0 = fastrand()
 
@@ -1210,11 +1203,6 @@ func ismapkey(t *_type) bool {
 
 //go:linkname reflect_makemap reflect.makemap
 func reflect_makemap(t *maptype, cap int) *hmap {
-	// Check invariants and reflects math.
-	if sz := unsafe.Sizeof(hmap{}); sz != t.hmap.size {
-		println("runtime: sizeof(hmap) =", sz, ", t.hmap.size =", t.hmap.size)
-		throw("bad hmap size")
-	}
 	if !ismapkey(t.key) {
 		throw("runtime.reflect_makemap: unsupported map key type")
 	}
diff --git a/libgo/go/runtime/type.go b/libgo/go/runtime/type.go
index 0ec0da4..3c08f7e 100644
--- a/libgo/go/runtime/type.go
+++ b/libgo/go/runtime/type.go
@@ -72,7 +72,6 @@ type maptype struct {
 	key           *_type
 	elem          *_type
 	bucket        *_type // internal type representing a hash bucket
-	hmap          *_type // internal type representing a hmap
 	keysize       uint8  // size of key slot
 	indirectkey   bool   // store ptr to key instead of key itself
 	valuesize     uint8  // size of value slot
-- 
cgit v1.1


From 2919ad1ee3bf475c8f3aae44c2aec694a9843c4d Mon Sep 17 00:00:00 2001
From: Ian Lance Taylor <ian@gcc.gnu.org>
Date: Thu, 13 Sep 2018 16:44:43 +0000
Subject: libgo: build roots index to speed up bulkBarrierPreWrite

    To reduce the amount of time spent in write barrier processing
    (specifically runtime.bulkBarrierPreWrite), add support for building a
    'GC roots index', basically a sorted list of all roots, so as to
    allow more efficient lookups of gcdata structures for globals. The
    previous implementation worked on the raw (unsorted) roots list
    itself, which did not scale well.

    Reviewed-on: https://go-review.googlesource.com/132595

From-SVN: r264276
---
 libgo/go/runtime/cgocall.go   | 24 ++++++-----
 libgo/go/runtime/mbitmap.go   | 26 ++++++------
 libgo/go/runtime/mgc_gccgo.go | 92 +++++++++++++++++++++++++++++++++++++++++++
 libgo/go/runtime/proc.go      |  1 +
 4 files changed, 122 insertions(+), 21 deletions(-)

(limited to 'libgo/go/runtime')

diff --git a/libgo/go/runtime/cgocall.go b/libgo/go/runtime/cgocall.go
index d5794bc..67b2bce 100644
--- a/libgo/go/runtime/cgocall.go
+++ b/libgo/go/runtime/cgocall.go
@@ -243,17 +243,21 @@ func cgoCheckUnknownPointer(p unsafe.Pointer, msg string) (base, i uintptr) {
 		return
 	}
 
-	roots := gcRoots
-	for roots != nil {
-		for j := 0; j < roots.count; j++ {
-			pr := roots.roots[j]
-			addr := uintptr(pr.decl)
-			if cgoInRange(p, addr, addr+pr.size) {
-				cgoCheckBits(pr.decl, pr.gcdata, 0, pr.ptrdata)
-				return
-			}
+	lo := 0
+	hi := len(gcRootsIndex)
+	for lo < hi {
+		m := lo + (hi-lo)/2
+		pr := gcRootsIndex[m]
+		addr := uintptr(pr.decl)
+		if cgoInRange(p, addr, addr+pr.size) {
+			cgoCheckBits(pr.decl, pr.gcdata, 0, pr.ptrdata)
+			return
+		}
+		if uintptr(p) < addr {
+			hi = m
+		} else {
+			lo = m + 1
 		}
-		roots = roots.next
 	}
 
 	return
diff --git a/libgo/go/runtime/mbitmap.go b/libgo/go/runtime/mbitmap.go
index 191239a..c6c8e6a 100644
--- a/libgo/go/runtime/mbitmap.go
+++ b/libgo/go/runtime/mbitmap.go
@@ -575,19 +575,23 @@ func bulkBarrierPreWrite(dst, src, size uintptr) {
 	if !inheap(dst) {
 		// If dst is a global, use the data or BSS bitmaps to
 		// execute write barriers.
-		roots := gcRoots
-		for roots != nil {
-			for i := 0; i < roots.count; i++ {
-				pr := roots.roots[i]
-				addr := uintptr(pr.decl)
-				if addr <= dst && dst < addr+pr.size {
-					if dst < addr+pr.ptrdata {
-						bulkBarrierBitmap(dst, src, size, dst-addr, pr.gcdata)
-					}
-					return
+		lo := 0
+		hi := len(gcRootsIndex)
+		for lo < hi {
+			m := lo + (hi-lo)/2
+			pr := gcRootsIndex[m]
+			addr := uintptr(pr.decl)
+			if addr <= dst && dst < addr+pr.size {
+				if dst < addr+pr.ptrdata {
+					bulkBarrierBitmap(dst, src, size, dst-addr, pr.gcdata)
 				}
+				return
+			}
+			if dst < addr {
+				hi = m
+			} else {
+				lo = m + 1
 			}
-			roots = roots.next
 		}
 		return
 	}
diff --git a/libgo/go/runtime/mgc_gccgo.go b/libgo/go/runtime/mgc_gccgo.go
index 107a70a..cf7780c 100644
--- a/libgo/go/runtime/mgc_gccgo.go
+++ b/libgo/go/runtime/mgc_gccgo.go
@@ -12,6 +12,7 @@ import (
 )
 
 // gcRoot is a single GC root: a variable plus a ptrmask.
+//go:notinheap
 type gcRoot struct {
 	decl    unsafe.Pointer // Pointer to variable.
 	size    uintptr        // Size of variable.
@@ -32,6 +33,97 @@ type gcRootList struct {
 // The compiler keeps this variable itself off the list.
 var gcRoots *gcRootList
 
+// Slice containing pointers to all reachable gcRoot's sorted by
+// starting address (generated at init time from 'gcRoots').
+// The compiler also keeps this variable itself off the list.
+// The storage backing this slice is allocated via persistentalloc(), the
+// idea being that we don't want to treat the slice itself as a global
+// variable, since it points to things that don't need to be scanned
+// themselves.
+var gcRootsIndex []*gcRoot
+
+// rootradixsort performs an in-place radix sort of the 'arr' rootptr slice.
+// Note: not a stable sort, however we expect it to be called only on slices
+// with no duplicate entries, so this should not matter.
+func rootradixsort(arr []*gcRoot, lo, hi int, bit uint) {
+	// Partition the array into two bins based on the values at the
+	// specified bit position: 0's bin (grown from the left) and and
+	// 1's bin (grown from the right). We keep two boundary markers,
+	// the 0's boundary "zbb" (which grows to the right) and the 1's
+	// boundary "obb" (which grows to the left). At each step we
+	// examine the bit for the right-of-ZBB element: if it is zero, we
+	// leave it in place and move the ZBB to the right. If the bit is
+	// not zero, then we swap the ZBB and OBB elements and move the
+	// OBB to the left. When this is done, the two partitions are then
+	// sorted using the next lower bit.
+
+	// 0's bin boundary, initially set to before the first element
+	zbb := lo - 1
+	// 1's bin boundary, set to just beyond the last element
+	obb := hi + 1
+	// mask to pick up bit of interest
+	bmask := uintptr(1) << bit
+
+	for obb-zbb > 1 {
+		zbbval := uintptr(arr[zbb+1].decl) & bmask
+		if zbbval == 0 {
+			// Move zbb one to the right
+			zbb++
+		} else {
+			// Move obb one to the left and swap
+			arr[obb-1], arr[zbb+1] = arr[zbb+1], arr[obb-1]
+			obb--
+		}
+	}
+
+	if bit != 0 {
+		// NB: in most cases there is just a single partition to visit
+		// so if we wanted to reduce stack space we could check for this
+		// and insert a goto back up to the top.
+		if zbb-lo > 0 {
+			rootradixsort(arr, lo, zbb, bit-1)
+		}
+		if hi-obb > 0 {
+			rootradixsort(arr, obb, hi, bit-1)
+		}
+	}
+}
+
+//go:nowritebarrier
+func createGcRootsIndex() {
+	// Count roots
+	nroots := 0
+	gcr := gcRoots
+	for gcr != nil {
+		nroots += gcr.count
+		gcr = gcr.next
+	}
+
+	// Construct the gcRootsIndex slice. Use non-heap storage for the array
+	// backing the slice.
+	sp := (*notInHeapSlice)(unsafe.Pointer(&gcRootsIndex))
+	sp.array = (*notInHeap)(persistentalloc1(sys.PtrSize*uintptr(nroots), sys.PtrSize, &memstats.other_sys))
+	if sp.array == nil {
+		throw("runtime: cannot allocate memory")
+	}
+	sp.len = nroots
+	sp.cap = nroots
+
+	// Populate the roots index slice
+	gcr = gcRoots
+	k := 0
+	for gcr != nil {
+		for i := 0; i < gcr.count; i++ {
+			gcRootsIndex[k] = &gcr.roots[i]
+			k++
+		}
+		gcr = gcr.next
+	}
+
+	// Sort it by starting address.
+	rootradixsort(gcRootsIndex, 0, nroots-1, sys.PtrSize*8-1)
+}
+
 // registerGCRoots is called by compiler-generated code.
 //go:linkname registerGCRoots runtime.registerGCRoots
 
diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go
index 4c217cc..74325e3 100644
--- a/libgo/go/runtime/proc.go
+++ b/libgo/go/runtime/proc.go
@@ -207,6 +207,7 @@ func main() {
 
 	fn := main_init // make an indirect call, as the linker doesn't know the address of the main package when laying down the runtime
 	fn()
+	createGcRootsIndex()
 	close(main_init_done)
 
 	needUnlock = false
-- 
cgit v1.1


From f0d89c7759e7be18895868e0c4e7f9e120f7890f Mon Sep 17 00:00:00 2001
From: Ian Lance Taylor <ian@gcc.gnu.org>
Date: Thu, 13 Sep 2018 17:30:00 +0000
Subject: runtime: avoid write barriers with traceback info

    Unlike the gc runtime, libgo stores traceback information in location
    structs, which contain strings.  Therefore, copying location structs
    around appears to require write barriers, although in fact write
    barriers are never important because the strings are never allocated
    in Go memory.  They come from libbacktrace.

    Some of the generated write barriers come at times when write barriers
    are not permitted.  For example, exitsyscall, marked
    nowritebarrierrec, calls exitsyscallfast which calls traceGoSysExit
    which calls traceEvent which calls traceStackID which calls
    trace.stackTab.put which copies location values into memory allocated
    by tab.newStack.  This write barrier can be invoked when there is no
    p, causing a crash.

    This change fixes the problem by ensuring that location values are
    copied around in the tracing code with no write barriers.

    This was found by fixing the compiler to fully implement
    //go:nowritebarrierrec; CL to follow.

    Reviewed-on: https://go-review.googlesource.com/134226

From-SVN: r264282
---
 libgo/go/runtime/proc.go            | 4 ++--
 libgo/go/runtime/runtime2.go        | 2 +-
 libgo/go/runtime/trace.go           | 4 +++-
 libgo/go/runtime/traceback_gccgo.go | 4 ++--
 4 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'libgo/go/runtime')

diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go
index 74325e3..12d7071 100644
--- a/libgo/go/runtime/proc.go
+++ b/libgo/go/runtime/proc.go
@@ -1140,7 +1140,7 @@ func startTheWorldWithSema(emitTraceEvent bool) int64 {
 func kickoff() {
 	gp := getg()
 
-	if gp.traceback != nil {
+	if gp.traceback != 0 {
 		gtraceback(gp)
 	}
 
@@ -3097,7 +3097,7 @@ func newproc(fn uintptr, arg unsafe.Pointer) *g {
 	} else {
 		resetNewG(newg, &sp, &spsize)
 	}
-	newg.traceback = nil
+	newg.traceback = 0
 
 	if readgstatus(newg) != _Gdead {
 		throw("newproc1: new g is not Gdead")
diff --git a/libgo/go/runtime/runtime2.go b/libgo/go/runtime/runtime2.go
index 0299d5a..2de1cc8 100644
--- a/libgo/go/runtime/runtime2.go
+++ b/libgo/go/runtime/runtime2.go
@@ -431,7 +431,7 @@ type g struct {
 
 	isSystemGoroutine bool // whether goroutine is a "system" goroutine
 
-	traceback *tracebackg // stack traceback buffer
+	traceback uintptr // stack traceback buffer
 
 	context      g_ucontext_t // saved context for setcontext
 	stackcontext [10]uintptr  // split-stack context
diff --git a/libgo/go/runtime/trace.go b/libgo/go/runtime/trace.go
index 8427e76..e2bbb5d 100644
--- a/libgo/go/runtime/trace.go
+++ b/libgo/go/runtime/trace.go
@@ -135,6 +135,7 @@ var trace struct {
 }
 
 // traceBufHeader is per-P tracing buffer.
+//go:notinheap
 type traceBufHeader struct {
 	link      traceBufPtr              // in trace.empty/full
 	lastTicks uint64                   // when we wrote the last event
@@ -747,7 +748,8 @@ func (tab *traceStackTable) put(pcs []location) uint32 {
 	stk.n = len(pcs)
 	stkpc := stk.stack()
 	for i, pc := range pcs {
-		stkpc[i] = pc
+		// Use memmove to avoid write barrier.
+		memmove(unsafe.Pointer(&stkpc[i]), unsafe.Pointer(&pc), unsafe.Sizeof(pc))
 	}
 	part := int(hash % uintptr(len(tab.tab)))
 	stk.link = tab.tab[part]
diff --git a/libgo/go/runtime/traceback_gccgo.go b/libgo/go/runtime/traceback_gccgo.go
index 8551ec1..9456b1f 100644
--- a/libgo/go/runtime/traceback_gccgo.go
+++ b/libgo/go/runtime/traceback_gccgo.go
@@ -186,7 +186,7 @@ func tracebackothers(me *g) {
 	if gp != nil && gp != me {
 		print("\n")
 		goroutineheader(gp)
-		gp.traceback = (*tracebackg)(noescape(unsafe.Pointer(&tb)))
+		gp.traceback = (uintptr)(noescape(unsafe.Pointer(&tb)))
 		getTraceback(me, gp)
 		printtrace(tb.locbuf[:tb.c], nil)
 		printcreatedby(gp)
@@ -220,7 +220,7 @@ func tracebackothers(me *g) {
 			print("\tgoroutine in C code; stack unavailable\n")
 			printcreatedby(gp)
 		} else {
-			gp.traceback = (*tracebackg)(noescape(unsafe.Pointer(&tb)))
+			gp.traceback = (uintptr)(noescape(unsafe.Pointer(&tb)))
 			getTraceback(me, gp)
 			printtrace(tb.locbuf[:tb.c], nil)
 			printcreatedby(gp)
-- 
cgit v1.1


From 84cdf51de42f823b4ed0d65ef20ab0142607917b Mon Sep 17 00:00:00 2001
From: Ian Lance Taylor <ian@gcc.gnu.org>
Date: Thu, 13 Sep 2018 21:32:24 +0000
Subject: compiler, runtime: open code select

    This is the gofrontend version of https://golang.org/cl/37933,
    https://golang.org/cl/37934, and https://golang.org/cl/37935.
    Open code the initialization of select cases.

    This is a step toward updating libgo to the 1.11 release.

    Reviewed-on: https://go-review.googlesource.com/135000

From-SVN: r264290
---
 libgo/go/runtime/select.go | 266 +++++++++++----------------------------------
 1 file changed, 61 insertions(+), 205 deletions(-)

(limited to 'libgo/go/runtime')

diff --git a/libgo/go/runtime/select.go b/libgo/go/runtime/select.go
index 1c5124b..9dab052 100644
--- a/libgo/go/runtime/select.go
+++ b/libgo/go/runtime/select.go
@@ -7,146 +7,36 @@ package runtime
 // This file contains the implementation of Go select statements.
 
 import (
-	"runtime/internal/sys"
 	"unsafe"
 )
 
 // For gccgo, use go:linkname to rename compiler-called functions to
 // themselves, so that the compiler will export them.
 //
-//go:linkname newselect runtime.newselect
-//go:linkname selectdefault runtime.selectdefault
-//go:linkname selectsend runtime.selectsend
-//go:linkname selectrecv runtime.selectrecv
 //go:linkname selectgo runtime.selectgo
 
 const debugSelect = false
 
+// scase.kind values.
+// Known to compiler.
+// Changes here must also be made in src/cmd/compile/internal/gc/select.go's walkselect.
 const (
-	// scase.kind
 	caseNil = iota
 	caseRecv
 	caseSend
 	caseDefault
 )
 
-// Select statement header.
-// Known to compiler.
-// Changes here must also be made in src/cmd/internal/gc/select.go's selecttype.
-type hselect struct {
-	tcase     uint16   // total count of scase[]
-	ncase     uint16   // currently filled scase[]
-	pollorder *uint16  // case poll order
-	lockorder *uint16  // channel lock order
-	scase     [1]scase // one per case (in order of appearance)
-}
-
 // Select case descriptor.
 // Known to compiler.
-// Changes here must also be made in src/cmd/internal/gc/select.go's selecttype.
+// Changes here must also be made in src/cmd/internal/gc/select.go's scasetype.
 type scase struct {
-	elem        unsafe.Pointer // data element
 	c           *hchan         // chan
-	pc          uintptr        // return pc (for race detector / msan)
+	elem        unsafe.Pointer // data element
 	kind        uint16
-	receivedp   *bool // pointer to received bool, if any
 	releasetime int64
 }
 
-var (
-	chansendpc = funcPC(chansend)
-	chanrecvpc = funcPC(chanrecv)
-)
-
-func selectsize(size uintptr) uintptr {
-	selsize := unsafe.Sizeof(hselect{}) +
-		(size-1)*unsafe.Sizeof(hselect{}.scase[0]) +
-		size*unsafe.Sizeof(*hselect{}.lockorder) +
-		size*unsafe.Sizeof(*hselect{}.pollorder)
-	return round(selsize, sys.Int64Align)
-}
-
-func newselect(sel *hselect, selsize int64, size int32) {
-	if selsize != int64(selectsize(uintptr(size))) {
-		print("runtime: bad select size ", selsize, ", want ", selectsize(uintptr(size)), "\n")
-		throw("bad select size")
-	}
-	if size != int32(uint16(size)) {
-		throw("select size too large")
-	}
-	sel.tcase = uint16(size)
-	sel.ncase = 0
-	sel.lockorder = (*uint16)(add(unsafe.Pointer(&sel.scase), uintptr(size)*unsafe.Sizeof(hselect{}.scase[0])))
-	sel.pollorder = (*uint16)(add(unsafe.Pointer(sel.lockorder), uintptr(size)*unsafe.Sizeof(*hselect{}.lockorder)))
-
-	// For gccgo the temporary variable will not have been zeroed.
-	memclrNoHeapPointers(unsafe.Pointer(&sel.scase), uintptr(size)*unsafe.Sizeof(hselect{}.scase[0])+uintptr(size)*unsafe.Sizeof(*hselect{}.lockorder)+uintptr(size)*unsafe.Sizeof(*hselect{}.pollorder))
-
-	if debugSelect {
-		print("newselect s=", sel, " size=", size, "\n")
-	}
-}
-
-func selectsend(sel *hselect, c *hchan, elem unsafe.Pointer) {
-	pc := getcallerpc()
-	i := sel.ncase
-	if i >= sel.tcase {
-		throw("selectsend: too many cases")
-	}
-	sel.ncase = i + 1
-	if c == nil {
-		return
-	}
-	cas := (*scase)(add(unsafe.Pointer(&sel.scase), uintptr(i)*unsafe.Sizeof(sel.scase[0])))
-	cas.pc = pc
-	cas.c = c
-	cas.kind = caseSend
-	cas.elem = elem
-
-	if debugSelect {
-		print("selectsend s=", sel, " pc=", hex(cas.pc), " chan=", cas.c, "\n")
-	}
-}
-
-func selectrecv(sel *hselect, c *hchan, elem unsafe.Pointer, received *bool) {
-	pc := getcallerpc()
-	i := sel.ncase
-	if i >= sel.tcase {
-		throw("selectrecv: too many cases")
-	}
-	sel.ncase = i + 1
-	if c == nil {
-		return
-	}
-	cas := (*scase)(add(unsafe.Pointer(&sel.scase), uintptr(i)*unsafe.Sizeof(sel.scase[0])))
-	cas.pc = pc
-	cas.c = c
-	cas.kind = caseRecv
-	cas.elem = elem
-	cas.receivedp = received
-
-	if debugSelect {
-		print("selectrecv s=", sel, " pc=", hex(cas.pc), " chan=", cas.c, "\n")
-	}
-}
-
-func selectdefault(sel *hselect) {
-	pc := getcallerpc()
-	i := sel.ncase
-	if i >= sel.tcase {
-		throw("selectdefault: too many cases")
-	}
-	sel.ncase = i + 1
-	cas := (*scase)(add(unsafe.Pointer(&sel.scase), uintptr(i)*unsafe.Sizeof(sel.scase[0])))
-	cas.pc = pc
-	cas.c = nil
-	cas.kind = caseDefault
-
-	if debugSelect {
-		print("selectdefault s=", sel, " pc=", hex(cas.pc), "\n")
-	}
-}
-
 func sellock(scases []scase, lockorder []uint16) {
 	var c *hchan
 	for _, o := range lockorder {
@@ -209,26 +99,39 @@ func block() {
 
 // selectgo implements the select statement.
 //
-// *sel is on the current goroutine's stack (regardless of any
-// escaping in selectgo).
+// cas0 points to an array of type [ncases]scase, and order0 points to
+// an array of type [2*ncases]uint16. Both reside on the goroutine's
+// stack (regardless of any escaping in selectgo).
 //
 // selectgo returns the index of the chosen scase, which matches the
 // ordinal position of its respective select{recv,send,default} call.
-func selectgo(sel *hselect) int {
+// Also, if the chosen scase was a receive operation, it returns whether
+// a value was received.
+func selectgo(cas0 *scase, order0 *uint16, ncases int) (int, bool) {
 	if debugSelect {
-		print("select: sel=", sel, "\n")
-	}
-	if sel.ncase != sel.tcase {
-		throw("selectgo: case count mismatch")
+		print("select: cas0=", cas0, "\n")
 	}
 
-	scaseslice := slice{unsafe.Pointer(&sel.scase), int(sel.ncase), int(sel.ncase)}
-	scases := *(*[]scase)(unsafe.Pointer(&scaseslice))
+	cas1 := (*[1 << 16]scase)(unsafe.Pointer(cas0))
+	order1 := (*[1 << 17]uint16)(unsafe.Pointer(order0))
+
+	scases := cas1[:ncases:ncases]
+	pollorder := order1[:ncases:ncases]
+	lockorder := order1[ncases:][:ncases:ncases]
+
+	// Replace send/receive cases involving nil channels with
+	// caseNil so logic below can assume non-nil channel.
+	for i := range scases {
+		cas := &scases[i]
+		if cas.c == nil && cas.kind != caseDefault {
+			*cas = scase{}
+		}
+	}
 
 	var t0 int64
 	if blockprofilerate > 0 {
 		t0 = cputicks()
-		for i := 0; i < int(sel.ncase); i++ {
+		for i := 0; i < ncases; i++ {
 			scases[i].releasetime = -1
 		}
 	}
@@ -241,10 +144,13 @@ func selectgo(sel *hselect) int {
 	// cases correctly, and they are rare enough not to bother
 	// optimizing (and needing to test).
 
+	// needed for gccgo, which doesn't zero pollorder
+	if ncases > 0 {
+		pollorder[0] = 0
+	}
+
 	// generate permuted order
-	pollslice := slice{unsafe.Pointer(sel.pollorder), int(sel.ncase), int(sel.ncase)}
-	pollorder := *(*[]uint16)(unsafe.Pointer(&pollslice))
-	for i := 1; i < int(sel.ncase); i++ {
+	for i := 1; i < ncases; i++ {
 		j := fastrandn(uint32(i + 1))
 		pollorder[i] = pollorder[j]
 		pollorder[j] = uint16(i)
@@ -252,9 +158,7 @@ func selectgo(sel *hselect) int {
 
 	// sort the cases by Hchan address to get the locking order.
 	// simple heap sort, to guarantee n log n time and constant stack footprint.
-	lockslice := slice{unsafe.Pointer(sel.lockorder), int(sel.ncase), int(sel.ncase)}
-	lockorder := *(*[]uint16)(unsafe.Pointer(&lockslice))
-	for i := 0; i < int(sel.ncase); i++ {
+	for i := 0; i < ncases; i++ {
 		j := i
 		// Start with the pollorder to permute cases on the same channel.
 		c := scases[pollorder[i]].c
@@ -265,7 +169,7 @@ func selectgo(sel *hselect) int {
 		}
 		lockorder[j] = pollorder[i]
 	}
-	for i := int(sel.ncase) - 1; i >= 0; i-- {
+	for i := ncases - 1; i >= 0; i-- {
 		o := lockorder[i]
 		c := scases[o].c
 		lockorder[i] = lockorder[0]
@@ -287,14 +191,15 @@ func selectgo(sel *hselect) int {
 		}
 		lockorder[j] = o
 	}
-	/*
-		for i := 0; i+1 < int(sel.ncase); i++ {
+
+	if debugSelect {
+		for i := 0; i+1 < ncases; i++ {
 			if scases[lockorder[i]].c.sortkey() > scases[lockorder[i+1]].c.sortkey() {
 				print("i=", i, " x=", lockorder[i], " y=", lockorder[i+1], "\n")
 				throw("select: broken sort")
 			}
 		}
-	*/
+	}
 
 	// lock all the channels involved in the select
 	sellock(scases, lockorder)
@@ -316,7 +221,8 @@ loop:
 	var dfl *scase
 	var casi int
 	var cas *scase
-	for i := 0; i < int(sel.ncase); i++ {
+	var recvOK bool
+	for i := 0; i < ncases; i++ {
 		casi = int(pollorder[i])
 		cas = &scases[casi]
 		c = cas.c
@@ -338,9 +244,6 @@ loop:
 			}
 
 		case caseSend:
-			if raceenabled {
-				racereadpc(unsafe.Pointer(c), cas.pc, chansendpc)
-			}
 			if c.closed != 0 {
 				goto sclose
 			}
@@ -469,26 +372,11 @@ loop:
 	c = cas.c
 
 	if debugSelect {
-		print("wait-return: sel=", sel, " c=", c, " cas=", cas, " kind=", cas.kind, "\n")
+		print("wait-return: cas0=", cas0, " c=", c, " cas=", cas, " kind=", cas.kind, "\n")
 	}
 
-	if cas.kind == caseRecv && cas.receivedp != nil {
-		*cas.receivedp = true
-	}
-
-	if raceenabled {
-		if cas.kind == caseRecv && cas.elem != nil {
-			raceWriteObjectPC(c.elemtype, cas.elem, cas.pc, chanrecvpc)
-		} else if cas.kind == caseSend {
-			raceReadObjectPC(c.elemtype, cas.elem, cas.pc, chansendpc)
-		}
-	}
-	if msanenabled {
-		if cas.kind == caseRecv && cas.elem != nil {
-			msanwrite(cas.elem, c.elemtype.size)
-		} else if cas.kind == caseSend {
-			msanread(cas.elem, c.elemtype.size)
-		}
+	if cas.kind == caseRecv {
+		recvOK = true
 	}
 
 	selunlock(scases, lockorder)
@@ -496,19 +384,7 @@ loop:
 
 bufrecv:
 	// can receive from buffer
-	if raceenabled {
-		if cas.elem != nil {
-			raceWriteObjectPC(c.elemtype, cas.elem, cas.pc, chanrecvpc)
-		}
-		raceacquire(chanbuf(c, c.recvx))
-		racerelease(chanbuf(c, c.recvx))
-	}
-	if msanenabled && cas.elem != nil {
-		msanwrite(cas.elem, c.elemtype.size)
-	}
-	if cas.receivedp != nil {
-		*cas.receivedp = true
-	}
+	recvOK = true
 	qp = chanbuf(c, c.recvx)
 	if cas.elem != nil {
 		typedmemmove(c.elemtype, cas.elem, qp)
@@ -524,14 +400,6 @@ bufrecv:
 
 bufsend:
 	// can send to buffer
-	if raceenabled {
-		raceacquire(chanbuf(c, c.sendx))
-		racerelease(chanbuf(c, c.sendx))
-		raceReadObjectPC(c.elemtype, cas.elem, cas.pc, chansendpc)
-	}
-	if msanenabled {
-		msanread(cas.elem, c.elemtype.size)
-	}
 	typedmemmove(c.elemtype, chanbuf(c, c.sendx), cas.elem)
 	c.sendx++
 	if c.sendx == c.dataqsiz {
@@ -545,19 +413,15 @@ recv:
 	// can receive from sleeping sender (sg)
 	recv(c, sg, cas.elem, func() { selunlock(scases, lockorder) }, 2)
 	if debugSelect {
-		print("syncrecv: sel=", sel, " c=", c, "\n")
-	}
-	if cas.receivedp != nil {
-		*cas.receivedp = true
+		print("syncrecv: cas0=", cas0, " c=", c, "\n")
 	}
+	recvOK = true
 	goto retc
 
 rclose:
 	// read at end of closed channel
 	selunlock(scases, lockorder)
-	if cas.receivedp != nil {
-		*cas.receivedp = false
-	}
+	recvOK = false
 	if cas.elem != nil {
 		typedmemclr(c.elemtype, cas.elem)
 	}
@@ -568,15 +432,9 @@ rclose:
 
 send:
 	// can send to a sleeping receiver (sg)
-	if raceenabled {
-		raceReadObjectPC(c.elemtype, cas.elem, cas.pc, chansendpc)
-	}
-	if msanenabled {
-		msanread(cas.elem, c.elemtype.size)
-	}
 	send(c, sg, cas.elem, func() { selunlock(scases, lockorder) }, 2)
 	if debugSelect {
-		print("syncsend: sel=", sel, " c=", c, "\n")
+		print("syncsend: cas0=", cas0, " c=", c, "\n")
 	}
 	goto retc
 
@@ -591,7 +449,7 @@ retc:
 		checkPreempt()
 	}
 
-	return casi
+	return casi, recvOK
 
 sclose:
 	// send on closed channel
@@ -625,27 +483,25 @@ const (
 )
 
 //go:linkname reflect_rselect reflect.rselect
-func reflect_rselect(cases []runtimeSelect) (chosen int, recvOK bool) {
-	// flagNoScan is safe here, because all objects are also referenced from cases.
-	size := selectsize(uintptr(len(cases)))
-	sel := (*hselect)(mallocgc(size, nil, true))
-	newselect(sel, int64(size), int32(len(cases)))
-	r := new(bool)
+func reflect_rselect(cases []runtimeSelect) (int, bool) {
+	if len(cases) == 0 {
+		block()
+	}
+	sel := make([]scase, len(cases))
+	order := make([]uint16, 2*len(cases))
 	for i := range cases {
 		rc := &cases[i]
 		switch rc.dir {
 		case selectDefault:
-			selectdefault(sel)
+			sel[i] = scase{kind: caseDefault}
 		case selectSend:
-			selectsend(sel, rc.ch, rc.val)
+			sel[i] = scase{kind: caseSend, c: rc.ch, elem: rc.val}
 		case selectRecv:
-			selectrecv(sel, rc.ch, rc.val, r)
+			sel[i] = scase{kind: caseRecv, c: rc.ch, elem: rc.val}
 		}
 	}
 
-	chosen = selectgo(sel)
-	recvOK = *r
-	return
+	return selectgo(&sel[0], &order[0], len(cases))
 }
 
 func (q *waitq) dequeueSudoG(sgp *sudog) {
-- 
cgit v1.1


From 38fab7369d19fd545eb8510ec198e73949a2c75d Mon Sep 17 00:00:00 2001
From: Ian Lance Taylor <ian@gcc.gnu.org>
Date: Thu, 13 Sep 2018 22:06:16 +0000
Subject: runtime: correct counters in sweep

    In the sweep code we can sometimes see incorrect counts when
    conservative stack scanning causes us to grey an object that we
    earlier decided could be freed.  We already ignored this check, but
    adjust this case to maintain correct span counts when it happens.
    This gives us slightly more correct numbers in MemStats, and helps
    avoid a rare failure in TestReadMemStats.

    Also fix the free index, and cope with finding a full span when
    allocating a new one.

    Reviewed-on: https://go-review.googlesource.com/134216

From-SVN: r264294
---
 libgo/go/runtime/mcentral.go |  9 +++++++++
 libgo/go/runtime/mgcsweep.go | 26 ++++++++++++++++++--------
 2 files changed, 27 insertions(+), 8 deletions(-)

(limited to 'libgo/go/runtime')

diff --git a/libgo/go/runtime/mcentral.go b/libgo/go/runtime/mcentral.go
index eaabcb9..150f4fd 100644
--- a/libgo/go/runtime/mcentral.go
+++ b/libgo/go/runtime/mcentral.go
@@ -56,6 +56,15 @@ retry:
 			c.empty.insertBack(s)
 			unlock(&c.lock)
 			s.sweep(true)
+
+			// With gccgo's conservative GC, the returned span may
+			// now be full. See the comments in mspan.sweep.
+			if uintptr(s.allocCount) == s.nelems {
+				s.freeindex = s.nelems
+				lock(&c.lock)
+				goto retry
+			}
+
 			goto havespan
 		}
 		if s.sweepgen == sg-1 {
diff --git a/libgo/go/runtime/mgcsweep.go b/libgo/go/runtime/mgcsweep.go
index c60214c..d6be349 100644
--- a/libgo/go/runtime/mgcsweep.go
+++ b/libgo/go/runtime/mgcsweep.go
@@ -296,7 +296,7 @@ func (s *mspan) sweep(preserve bool) bool {
 	}
 	nfreed := s.allocCount - nalloc
 
-	// This test is not reliable with gccgo, because of
+	// This check is not reliable with gccgo, because of
 	// conservative stack scanning. The test boils down to
 	// checking that no new bits have been set in gcmarkBits since
 	// the span was added to the sweep count. New bits are set by
@@ -309,16 +309,23 @@ func (s *mspan) sweep(preserve bool) bool {
 	// check to be inaccurate, and it will keep an object live
 	// unnecessarily, but provided the pointer is not really live
 	// it is not otherwise a problem. So we disable the test for gccgo.
-	if false && nalloc > s.allocCount {
-		print("runtime: nelems=", s.nelems, " nalloc=", nalloc, " previous allocCount=", s.allocCount, " nfreed=", nfreed, "\n")
-		throw("sweep increased allocation count")
+	nfreedSigned := int(nfreed)
+	if nalloc > s.allocCount {
+		// print("runtime: nelems=", s.nelems, " nalloc=", nalloc, " previous allocCount=", s.allocCount, " nfreed=", nfreed, "\n")
+		// throw("sweep increased allocation count")
+
+		// For gccgo, adjust the freed count as a signed number.
+		nfreedSigned = int(s.allocCount) - int(nalloc)
+		if uintptr(nalloc) == s.nelems {
+			s.freeindex = s.nelems
+		}
 	}
 
 	s.allocCount = nalloc
 	wasempty := s.nextFreeIndex() == s.nelems
 	s.freeindex = 0 // reset allocation index to start of span.
 	if trace.enabled {
-		getg().m.p.ptr().traceReclaimed += uintptr(nfreed) * s.elemsize
+		getg().m.p.ptr().traceReclaimed += uintptr(nfreedSigned) * s.elemsize
 	}
 
 	// gcmarkBits becomes the allocBits.
@@ -334,7 +341,7 @@ func (s *mspan) sweep(preserve bool) bool {
 	// But we need to set it before we make the span available for allocation
 	// (return it to heap or mcentral), because allocation code assumes that a
 	// span is already swept if available for allocation.
-	if freeToHeap || nfreed == 0 {
+	if freeToHeap || nfreedSigned <= 0 {
 		// The span must be in our exclusive ownership until we update sweepgen,
 		// check for potential races.
 		if s.state != mSpanInUse || s.sweepgen != sweepgen-1 {
@@ -347,8 +354,11 @@ func (s *mspan) sweep(preserve bool) bool {
 		atomic.Store(&s.sweepgen, sweepgen)
 	}
 
-	if nfreed > 0 && spc.sizeclass() != 0 {
-		c.local_nsmallfree[spc.sizeclass()] += uintptr(nfreed)
+	if spc.sizeclass() != 0 {
+		c.local_nsmallfree[spc.sizeclass()] += uintptr(nfreedSigned)
+	}
+
+	if nfreedSigned > 0 && spc.sizeclass() != 0 {
 		res = mheap_.central[spc].mcentral.freeSpan(s, preserve, wasempty)
 		// MCentral_FreeSpan updates sweepgen
 	} else if freeToHeap {
-- 
cgit v1.1


From cec07c4759e8af44ca77a2beb9312e4e30d1cc7a Mon Sep 17 00:00:00 2001
From: Ian Lance Taylor <ian@gcc.gnu.org>
Date: Thu, 13 Sep 2018 22:25:58 +0000
Subject: compiler, runtime: call gcWriteBarrier instead of writebarrierptr

    In 1.11 writebarrierptr is going away, so change the compiler to call
    gcWriteBarrier instead.  We weren't using gcWriteBarrier before;
    adjust the implementation to use the putFast method.

    This revealed a problem in the kickoff function.  When using cgo,
    kickoff can be called on the g0 of an m allocated by newExtraM.  In
    that case the m will generally have a p, but systemstack may be called
    by wbBufFlush as part of flushing the write barrier buffer.  At that
    point the buffer is full, so we can not do a write barrier.  So adjust
    the existing code in kickoff so that in the case where we are g0,
    don't do any write barrier at all.

    Reviewed-on: https://go-review.googlesource.com/131395

From-SVN: r264295
---
 libgo/go/runtime/mgc_gccgo.go | 12 ++++++------
 libgo/go/runtime/proc.go      | 35 ++++++++++++++++++++---------------
 2 files changed, 26 insertions(+), 21 deletions(-)

(limited to 'libgo/go/runtime')

diff --git a/libgo/go/runtime/mgc_gccgo.go b/libgo/go/runtime/mgc_gccgo.go
index cf7780c..b396d35 100644
--- a/libgo/go/runtime/mgc_gccgo.go
+++ b/libgo/go/runtime/mgc_gccgo.go
@@ -11,6 +11,11 @@ import (
 	"unsafe"
 )
 
+// For gccgo, use go:linkname to rename compiler-called functions to
+// themselves, so that the compiler will export them.
+//
+//go:linkname gcWriteBarrier runtime.gcWriteBarrier
+
 // gcRoot is a single GC root: a variable plus a ptrmask.
 //go:notinheap
 type gcRoot struct {
@@ -188,12 +193,7 @@ func checkPreempt() {
 //go:nowritebarrier
 func gcWriteBarrier(dst *uintptr, src uintptr) {
 	buf := &getg().m.p.ptr().wbBuf
-	next := buf.next
-	np := next + 2*sys.PtrSize
-	buf.next = np
-	*(*uintptr)(unsafe.Pointer(next)) = src
-	*(*uintptr)(unsafe.Pointer(next + sys.PtrSize)) = *dst
-	if np >= buf.end {
+	if !buf.putFast(src, *dst) {
 		wbBufFlush(dst, src)
 	}
 	*dst = src
diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go
index 12d7071..4fc45dd 100644
--- a/libgo/go/runtime/proc.go
+++ b/libgo/go/runtime/proc.go
@@ -1146,24 +1146,29 @@ func kickoff() {
 
 	fv := gp.entry
 	param := gp.param
-	gp.entry = nil
 
 	// When running on the g0 stack we can wind up here without a p,
-	// for example from mcall(exitsyscall0) in exitsyscall.
-	// Setting gp.param = nil will call a write barrier, and if
-	// there is no p that write barrier will crash. When called from
-	// mcall the gp.param value will be a *g, which we don't need to
-	// shade since we know it will be kept alive elsewhere. In that
-	// case clear the field using uintptr so that the write barrier
-	// does nothing.
-	if gp.m.p == 0 {
-		if gp == gp.m.g0 && gp.param == unsafe.Pointer(gp.m.curg) {
-			*(*uintptr)(unsafe.Pointer(&gp.param)) = 0
-		} else {
-			throw("no p in kickoff")
-		}
+	// for example from mcall(exitsyscall0) in exitsyscall, in
+	// which case we can not run a write barrier.
+	// It is also possible for us to get here from the systemstack
+	// call in wbBufFlush, at which point the write barrier buffer
+	// is full and we can not run a write barrier.
+	// Setting gp.entry = nil or gp.param = nil will try to run a
+	// write barrier, so if we are on the g0 stack due to mcall
+	// (systemstack calls mcall) then clear the field using uintptr.
+	// This is OK when gp.param is gp.m.curg, as curg will be kept
+	// alive elsewhere, and gp.entry always points into g, or
+	// to a statically allocated value, or (in the case of mcall)
+	// to the stack.
+	if gp == gp.m.g0 && gp.param == unsafe.Pointer(gp.m.curg) {
+		*(*uintptr)(unsafe.Pointer(&gp.entry)) = 0
+		*(*uintptr)(unsafe.Pointer(&gp.param)) = 0
+	} else if gp.m.p == 0 {
+		throw("no p in kickoff")
+	} else {
+		gp.entry = nil
+		gp.param = nil
 	}
-	gp.param = nil
 
 	fv(param)
 	goexit1()
-- 
cgit v1.1


From dd931d9b48647e898dc80927c532ae93cc09e192 Mon Sep 17 00:00:00 2001
From: Ian Lance Taylor <iant@golang.org>
Date: Mon, 24 Sep 2018 21:46:21 +0000
Subject: libgo: update to Go 1.11

    Reviewed-on: https://go-review.googlesource.com/136435

gotools/:
	* Makefile.am (mostlyclean-local): Run chmod on check-go-dir to
	make sure it is writable.
	(check-go-tools): Likewise.
	(check-vet): Copy internal/objabi to check-vet-dir.
	* Makefile.in: Rebuild.

From-SVN: r264546
---
 libgo/go/runtime/alg.go                            |   39 +-
 libgo/go/runtime/append_test.go                    |  338 -----
 libgo/go/runtime/atomic_pointer.go                 |   31 +-
 libgo/go/runtime/auxv_none.go                      |   15 +
 libgo/go/runtime/cgocall.go                        |   15 +-
 libgo/go/runtime/cgocheck.go                       |   12 +-
 libgo/go/runtime/chan.go                           |   12 +-
 libgo/go/runtime/chanbarrier_test.go               |    2 +-
 libgo/go/runtime/cputicks.go                       |    8 +
 libgo/go/runtime/crash_cgo_test.go                 |   41 +-
 libgo/go/runtime/crash_gccgo_test.go               |    4 +-
 libgo/go/runtime/crash_nonunix_test.go             |    2 +-
 libgo/go/runtime/crash_test.go                     |  109 ++
 libgo/go/runtime/debug.go                          |    4 +
 libgo/go/runtime/debug/heapdump_test.go            |    8 +-
 libgo/go/runtime/debug_test.go                     |  207 +++
 libgo/go/runtime/env_posix.go                      |    2 +-
 libgo/go/runtime/error.go                          |   61 +-
 libgo/go/runtime/export_debug_test.go              |  169 +++
 libgo/go/runtime/export_linux_test.go              |   10 +-
 libgo/go/runtime/export_test.go                    |   61 +-
 libgo/go/runtime/extern.go                         |    6 +
 libgo/go/runtime/gc_test.go                        |   38 +-
 libgo/go/runtime/gcinfo_test.go                    |    6 +-
 libgo/go/runtime/hash64.go                         |    5 +-
 libgo/go/runtime/hash_test.go                      |   15 +
 libgo/go/runtime/hashmap.go                        | 1298 -------------------
 libgo/go/runtime/hashmap_fast.go                   | 1237 ------------------
 libgo/go/runtime/heapdump.go                       |   33 +-
 libgo/go/runtime/iface.go                          |   50 +-
 libgo/go/runtime/internal/atomic/atomic_test.go    |    6 +-
 libgo/go/runtime/internal/atomic/bench_test.go     |   36 +
 libgo/go/runtime/internal/atomic/stubs.go          |    2 +
 libgo/go/runtime/internal/sys/intrinsics.go        |   24 +
 libgo/go/runtime/lfstack.go                        |   10 +
 libgo/go/runtime/lfstack_64bit.go                  |   22 +-
 libgo/go/runtime/lock_futex.go                     |    6 +
 libgo/go/runtime/lock_js.go                        |  172 +++
 libgo/go/runtime/lock_sema.go                      |    6 +
 libgo/go/runtime/malloc.go                         |  699 ++++++----
 libgo/go/runtime/malloc_test.go                    |   76 +-
 libgo/go/runtime/map.go                            | 1363 ++++++++++++++++++++
 libgo/go/runtime/map_benchmark_test.go             |  372 ++++++
 libgo/go/runtime/map_fast32.go                     |  413 ++++++
 libgo/go/runtime/map_fast64.go                     |  419 ++++++
 libgo/go/runtime/map_faststr.go                    |  430 ++++++
 libgo/go/runtime/map_test.go                       |  294 ++++-
 libgo/go/runtime/mapspeed_test.go                  |  343 -----
 libgo/go/runtime/mbarrier.go                       |  129 +-
 libgo/go/runtime/mbitmap.go                        |  415 ++++--
 libgo/go/runtime/mcache.go                         |    1 -
 libgo/go/runtime/mcentral.go                       |    2 +-
 libgo/go/runtime/mem_gccgo.go                      |  100 +-
 libgo/go/runtime/memmove_test.go                   |    7 +
 libgo/go/runtime/mfinal.go                         |   62 +-
 libgo/go/runtime/mfixalloc.go                      |    2 +-
 libgo/go/runtime/mgc.go                            |   71 +-
 libgo/go/runtime/mgclarge.go                       |    6 +-
 libgo/go/runtime/mgcmark.go                        |   84 +-
 libgo/go/runtime/mgcsweep.go                       |    4 +-
 libgo/go/runtime/mgcwork.go                        |    1 +
 libgo/go/runtime/mheap.go                          |  515 ++++----
 libgo/go/runtime/mprof.go                          |    4 +-
 libgo/go/runtime/msan/msan.go                      |    3 +-
 libgo/go/runtime/mstats.go                         |   10 +-
 libgo/go/runtime/mwbbuf.go                         |   38 +-
 libgo/go/runtime/netpoll.go                        |    4 +-
 libgo/go/runtime/netpoll_fake.go                   |   32 +
 libgo/go/runtime/netpoll_nacl.go                   |   30 -
 libgo/go/runtime/os_darwin.go                      |  332 +----
 libgo/go/runtime/os_dragonfly.go                   |    9 +-
 libgo/go/runtime/os_freebsd.go                     |    4 +-
 libgo/go/runtime/os_js.go                          |  145 +++
 libgo/go/runtime/os_linux.go                       |   23 +-
 libgo/go/runtime/os_linux_arm.go                   |   60 +
 libgo/go/runtime/os_linux_arm64.go                 |   29 +
 libgo/go/runtime/os_linux_mips64x.go               |   21 +
 libgo/go/runtime/os_linux_mipsx.go                 |   21 +
 libgo/go/runtime/os_linux_noauxv.go                |   11 +
 libgo/go/runtime/os_linux_ppc64x.go                |   20 +-
 libgo/go/runtime/os_linux_s390x.go                 |   19 +
 libgo/go/runtime/os_netbsd.go                      |    1 +
 libgo/go/runtime/os_openbsd.go                     |    1 +
 libgo/go/runtime/panic.go                          |  167 ++-
 libgo/go/runtime/pprof/internal/profile/encode.go  |   12 +
 libgo/go/runtime/pprof/internal/profile/profile.go |   20 +-
 libgo/go/runtime/pprof/pprof.go                    |   34 +-
 libgo/go/runtime/pprof/pprof_test.go               |   14 +-
 libgo/go/runtime/pprof/proto.go                    |  106 +-
 libgo/go/runtime/pprof/proto_test.go               |  138 +-
 libgo/go/runtime/pprof/protomem.go                 |    5 +-
 libgo/go/runtime/pprof/protomem_test.go            |   32 +-
 .../go/runtime/pprof/testdata/mappingtest/main.go  |  105 ++
 libgo/go/runtime/proc.go                           |  176 ++-
 libgo/go/runtime/proc_test.go                      |   34 +-
 libgo/go/runtime/rand_test.go                      |    2 +-
 libgo/go/runtime/runtime-lldb_test.go              |    4 +-
 libgo/go/runtime/runtime1.go                       |   30 +-
 libgo/go/runtime/runtime2.go                       |  134 +-
 libgo/go/runtime/runtime_test.go                   |    5 +-
 libgo/go/runtime/rwmutex_test.go                   |    3 +
 libgo/go/runtime/select.go                         |    4 +-
 libgo/go/runtime/sema.go                           |    8 +-
 libgo/go/runtime/signal_sighandler.go              |   18 +-
 libgo/go/runtime/signal_unix.go                    |   16 +-
 libgo/go/runtime/sigqueue.go                       |   11 +
 libgo/go/runtime/sizeof_test.go                    |   43 +
 libgo/go/runtime/slice.go                          |   77 +-
 libgo/go/runtime/slice_test.go                     |  374 ++++++
 libgo/go/runtime/string.go                         |   81 +-
 libgo/go/runtime/string_test.go                    |   42 +-
 libgo/go/runtime/stubs.go                          |   42 +-
 libgo/go/runtime/stubs2.go                         |    3 +-
 libgo/go/runtime/stubs3.go                         |   14 +
 libgo/go/runtime/symtab.go                         |    5 +-
 libgo/go/runtime/sys_darwin.go                     |  374 ++++++
 libgo/go/runtime/sys_wasm.go                       |   42 +
 libgo/go/runtime/testdata/testprog/abort.go        |   23 +
 .../go/runtime/testdata/testprog/numcpu_freebsd.go |   13 +-
 libgo/go/runtime/testdata/testprog/timeprof.go     |   46 +
 .../testdata/testprog/traceback_ancestors.go       |   53 +
 .../testdata/testprogcgo/bigstack_windows.go       |   27 +
 libgo/go/runtime/testdata/testprogcgo/raceprof.go  |    2 +-
 libgo/go/runtime/testdata/testprogcgo/racesig.go   |    2 +-
 libgo/go/runtime/time.go                           |   78 +-
 libgo/go/runtime/timeasm.go                        |    2 +-
 libgo/go/runtime/timestub.go                       |    3 -
 libgo/go/runtime/timestub2.go                      |   11 +
 libgo/go/runtime/trace.go                          |  147 ++-
 libgo/go/runtime/trace/annotation.go               |  196 +++
 libgo/go/runtime/trace/annotation_test.go          |  152 +++
 libgo/go/runtime/trace/trace.go                    |  104 +-
 libgo/go/runtime/trace/trace_stack_test.go         |  148 ++-
 libgo/go/runtime/trace/trace_test.go               |   27 +
 libgo/go/runtime/traceback_gccgo.go                |    4 +-
 libgo/go/runtime/type.go                           |   24 +-
 libgo/go/runtime/unaligned1.go                     |    2 +-
 libgo/go/runtime/utf8.go                           |    9 +
 libgo/go/runtime/vdso_none.go                      |   11 -
 139 files changed, 8889 insertions(+), 5117 deletions(-)
 delete mode 100644 libgo/go/runtime/append_test.go
 create mode 100644 libgo/go/runtime/auxv_none.go
 create mode 100644 libgo/go/runtime/debug_test.go
 create mode 100644 libgo/go/runtime/export_debug_test.go
 delete mode 100644 libgo/go/runtime/hashmap.go
 delete mode 100644 libgo/go/runtime/hashmap_fast.go
 create mode 100644 libgo/go/runtime/lock_js.go
 create mode 100644 libgo/go/runtime/map.go
 create mode 100644 libgo/go/runtime/map_benchmark_test.go
 create mode 100644 libgo/go/runtime/map_fast32.go
 create mode 100644 libgo/go/runtime/map_fast64.go
 create mode 100644 libgo/go/runtime/map_faststr.go
 delete mode 100644 libgo/go/runtime/mapspeed_test.go
 create mode 100644 libgo/go/runtime/netpoll_fake.go
 delete mode 100644 libgo/go/runtime/netpoll_nacl.go
 create mode 100644 libgo/go/runtime/os_js.go
 create mode 100644 libgo/go/runtime/os_linux_arm.go
 create mode 100644 libgo/go/runtime/os_linux_arm64.go
 create mode 100644 libgo/go/runtime/os_linux_mips64x.go
 create mode 100644 libgo/go/runtime/os_linux_mipsx.go
 create mode 100644 libgo/go/runtime/os_linux_noauxv.go
 create mode 100644 libgo/go/runtime/os_linux_s390x.go
 create mode 100644 libgo/go/runtime/pprof/testdata/mappingtest/main.go
 create mode 100644 libgo/go/runtime/sizeof_test.go
 create mode 100644 libgo/go/runtime/slice_test.go
 create mode 100644 libgo/go/runtime/stubs3.go
 create mode 100644 libgo/go/runtime/sys_darwin.go
 create mode 100644 libgo/go/runtime/sys_wasm.go
 create mode 100644 libgo/go/runtime/testdata/testprog/abort.go
 create mode 100644 libgo/go/runtime/testdata/testprog/timeprof.go
 create mode 100644 libgo/go/runtime/testdata/testprog/traceback_ancestors.go
 create mode 100644 libgo/go/runtime/testdata/testprogcgo/bigstack_windows.go
 create mode 100644 libgo/go/runtime/timestub2.go
 create mode 100644 libgo/go/runtime/trace/annotation.go
 create mode 100644 libgo/go/runtime/trace/annotation_test.go
 delete mode 100644 libgo/go/runtime/vdso_none.go

(limited to 'libgo/go/runtime')

diff --git a/libgo/go/runtime/alg.go b/libgo/go/runtime/alg.go
index 7c98f1b..c6bc6b6 100644
--- a/libgo/go/runtime/alg.go
+++ b/libgo/go/runtime/alg.go
@@ -5,6 +5,7 @@
 package runtime
 
 import (
+	"internal/cpu"
 	"runtime/internal/sys"
 	"unsafe"
 )
@@ -137,7 +138,7 @@ func interhash(p unsafe.Pointer, h uintptr) uintptr {
 	t := *(**_type)(tab)
 	fn := t.hashfn
 	if fn == nil {
-		panic(errorString("hash of unhashable type " + *t.string))
+		panic(errorString("hash of unhashable type " + t.string()))
 	}
 	if isDirectIface(t) {
 		return c1 * fn(unsafe.Pointer(&a.data), h^c0)
@@ -154,7 +155,7 @@ func nilinterhash(p unsafe.Pointer, h uintptr) uintptr {
 	}
 	fn := t.hashfn
 	if fn == nil {
-		panic(errorString("hash of unhashable type " + *t.string))
+		panic(errorString("hash of unhashable type " + t.string()))
 	}
 	if isDirectIface(t) {
 		return c1 * fn(unsafe.Pointer(&a.data), h^c0)
@@ -212,7 +213,7 @@ func efaceeq(x, y eface) bool {
 	}
 	eq := t.equalfn
 	if eq == nil {
-		panic(errorString("comparing uncomparable type " + *t.string))
+		panic(errorString("comparing uncomparable type " + t.string()))
 	}
 	if isDirectIface(t) {
 		return x.data == y.data
@@ -233,7 +234,7 @@ func ifaceeq(x, y iface) bool {
 	}
 	eq := t.equalfn
 	if eq == nil {
-		panic(errorString("comparing uncomparable type " + *t.string))
+		panic(errorString("comparing uncomparable type " + t.string()))
 	}
 	if isDirectIface(t) {
 		return x.data == y.data
@@ -251,7 +252,7 @@ func ifacevaleq(x iface, t *_type, p unsafe.Pointer) bool {
 	}
 	eq := t.equalfn
 	if eq == nil {
-		panic(errorString("comparing uncomparable type " + *t.string))
+		panic(errorString("comparing uncomparable type " + t.string()))
 	}
 	if isDirectIface(t) {
 		return x.data == p
@@ -272,7 +273,7 @@ func ifaceefaceeq(x iface, y eface) bool {
 	}
 	eq := xt.equalfn
 	if eq == nil {
-		panic(errorString("comparing uncomparable type " + *xt.string))
+		panic(errorString("comparing uncomparable type " + xt.string()))
 	}
 	if isDirectIface(xt) {
 		return x.data == y.data
@@ -289,7 +290,7 @@ func efacevaleq(x eface, t *_type, p unsafe.Pointer) bool {
 	}
 	eq := t.equalfn
 	if eq == nil {
-		panic(errorString("comparing uncomparable type " + *t.string))
+		panic(errorString("comparing uncomparable type " + t.string()))
 	}
 	if isDirectIface(t) {
 		return x.data == p
@@ -388,23 +389,25 @@ func ifaceHash(i interface {
 
 const hashRandomBytes = sys.PtrSize / 4 * 64
 
-// used in asm_{386,amd64}.s to seed the hash function
+// used in asm_{386,amd64,arm64}.s to seed the hash function
 var aeskeysched [hashRandomBytes]byte
 
 // used in hash{32,64}.go to seed the hash function
 var hashkey [4]uintptr
 
 func alginit() {
-	// Install aes hash algorithm if we have the instructions we need
+	// Install AES hash algorithms if the instructions needed are present.
 	if (GOARCH == "386" || GOARCH == "amd64") &&
 		GOOS != "nacl" &&
 		support_aes &&
-		cpuid_ecx&(1<<25) != 0 && // aes (aesenc)
-		cpuid_ecx&(1<<9) != 0 && // sse3 (pshufb)
-		cpuid_ecx&(1<<19) != 0 { // sse4.1 (pinsr{d,q})
-		useAeshash = true
-		// Initialize with random data so hash collisions will be hard to engineer.
-		getRandomData(aeskeysched[:])
+		cpu.X86.HasAES && // AESENC
+		cpu.X86.HasSSSE3 && // PSHUFB
+		cpu.X86.HasSSE41 { // PINSR{D,Q}
+		initAlgAES()
+		return
+	}
+	if GOARCH == "arm64" && cpu.ARM64.HasAES {
+		initAlgAES()
 		return
 	}
 	getRandomData((*[len(hashkey) * sys.PtrSize]byte)(unsafe.Pointer(&hashkey))[:])
@@ -413,3 +416,9 @@ func alginit() {
 	hashkey[2] |= 1
 	hashkey[3] |= 1
 }
+
+func initAlgAES() {
+	useAeshash = true
+	// Initialize with random data so hash collisions will be hard to engineer.
+	getRandomData(aeskeysched[:])
+}
diff --git a/libgo/go/runtime/append_test.go b/libgo/go/runtime/append_test.go
deleted file mode 100644
index ef1e812..0000000
--- a/libgo/go/runtime/append_test.go
+++ /dev/null
@@ -1,338 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-package runtime_test
-
-import (
-	"fmt"
-	"testing"
-)
-
-const N = 20
-
-func BenchmarkMakeSlice(b *testing.B) {
-	var x []byte
-	for i := 0; i < b.N; i++ {
-		x = make([]byte, 32)
-		_ = x
-	}
-}
-
-type (
-	struct24 struct{ a, b, c int64 }
-	struct32 struct{ a, b, c, d int64 }
-	struct40 struct{ a, b, c, d, e int64 }
-)
-
-func BenchmarkGrowSlice(b *testing.B) {
-	b.Run("Byte", func(b *testing.B) {
-		x := make([]byte, 9)
-		for i := 0; i < b.N; i++ {
-			_ = append([]byte(nil), x...)
-		}
-	})
-	b.Run("Int", func(b *testing.B) {
-		x := make([]int, 9)
-		for i := 0; i < b.N; i++ {
-			_ = append([]int(nil), x...)
-		}
-	})
-	b.Run("Ptr", func(b *testing.B) {
-		x := make([]*byte, 9)
-		for i := 0; i < b.N; i++ {
-			_ = append([]*byte(nil), x...)
-		}
-	})
-	b.Run("Struct", func(b *testing.B) {
-		b.Run("24", func(b *testing.B) {
-			x := make([]struct24, 9)
-			for i := 0; i < b.N; i++ {
-				_ = append([]struct24(nil), x...)
-			}
-		})
-		b.Run("32", func(b *testing.B) {
-			x := make([]struct32, 9)
-			for i := 0; i < b.N; i++ {
-				_ = append([]struct32(nil), x...)
-			}
-		})
-		b.Run("40", func(b *testing.B) {
-			x := make([]struct40, 9)
-			for i := 0; i < b.N; i++ {
-				_ = append([]struct40(nil), x...)
-			}
-		})
-
-	})
-}
-
-func BenchmarkAppend(b *testing.B) {
-	b.StopTimer()
-	x := make([]int, 0, N)
-	b.StartTimer()
-	for i := 0; i < b.N; i++ {
-		x = x[0:0]
-		for j := 0; j < N; j++ {
-			x = append(x, j)
-		}
-	}
-}
-
-func BenchmarkAppendGrowByte(b *testing.B) {
-	for i := 0; i < b.N; i++ {
-		var x []byte
-		for j := 0; j < 1<<20; j++ {
-			x = append(x, byte(j))
-		}
-	}
-}
-
-func BenchmarkAppendGrowString(b *testing.B) {
-	var s string
-	for i := 0; i < b.N; i++ {
-		var x []string
-		for j := 0; j < 1<<20; j++ {
-			x = append(x, s)
-		}
-	}
-}
-
-func BenchmarkAppendSlice(b *testing.B) {
-	for _, length := range []int{1, 4, 7, 8, 15, 16, 32} {
-		b.Run(fmt.Sprint(length, "Bytes"), func(b *testing.B) {
-			x := make([]byte, 0, N)
-			y := make([]byte, length)
-			for i := 0; i < b.N; i++ {
-				x = x[0:0]
-				x = append(x, y...)
-			}
-		})
-	}
-}
-
-var (
-	blackhole []byte
-)
-
-func BenchmarkAppendSliceLarge(b *testing.B) {
-	for _, length := range []int{1 << 10, 4 << 10, 16 << 10, 64 << 10, 256 << 10, 1024 << 10} {
-		y := make([]byte, length)
-		b.Run(fmt.Sprint(length, "Bytes"), func(b *testing.B) {
-			for i := 0; i < b.N; i++ {
-				blackhole = nil
-				blackhole = append(blackhole, y...)
-			}
-		})
-	}
-}
-
-func BenchmarkAppendStr(b *testing.B) {
-	for _, str := range []string{
-		"1",
-		"1234",
-		"12345678",
-		"1234567890123456",
-		"12345678901234567890123456789012",
-	} {
-		b.Run(fmt.Sprint(len(str), "Bytes"), func(b *testing.B) {
-			x := make([]byte, 0, N)
-			for i := 0; i < b.N; i++ {
-				x = x[0:0]
-				x = append(x, str...)
-			}
-		})
-	}
-}
-
-func BenchmarkAppendSpecialCase(b *testing.B) {
-	b.StopTimer()
-	x := make([]int, 0, N)
-	b.StartTimer()
-	for i := 0; i < b.N; i++ {
-		x = x[0:0]
-		for j := 0; j < N; j++ {
-			if len(x) < cap(x) {
-				x = x[:len(x)+1]
-				x[len(x)-1] = j
-			} else {
-				x = append(x, j)
-			}
-		}
-	}
-}
-
-var x []int
-
-func f() int {
-	x[:1][0] = 3
-	return 2
-}
-
-func TestSideEffectOrder(t *testing.T) {
-	x = make([]int, 0, 10)
-	x = append(x, 1, f())
-	if x[0] != 1 || x[1] != 2 {
-		t.Error("append failed: ", x[0], x[1])
-	}
-}
-
-func TestAppendOverlap(t *testing.T) {
-	x := []byte("1234")
-	x = append(x[1:], x...) // p > q in runtime·appendslice.
-	got := string(x)
-	want := "2341234"
-	if got != want {
-		t.Errorf("overlap failed: got %q want %q", got, want)
-	}
-}
-
-func BenchmarkCopy(b *testing.B) {
-	for _, l := range []int{1, 2, 4, 8, 12, 16, 32, 128, 1024} {
-		buf := make([]byte, 4096)
-		b.Run(fmt.Sprint(l, "Byte"), func(b *testing.B) {
-			s := make([]byte, l)
-			var n int
-			for i := 0; i < b.N; i++ {
-				n = copy(buf, s)
-			}
-			b.SetBytes(int64(n))
-		})
-		b.Run(fmt.Sprint(l, "String"), func(b *testing.B) {
-			s := string(make([]byte, l))
-			var n int
-			for i := 0; i < b.N; i++ {
-				n = copy(buf, s)
-			}
-			b.SetBytes(int64(n))
-		})
-	}
-}
-
-var (
-	sByte []byte
-	s1Ptr []uintptr
-	s2Ptr [][2]uintptr
-	s3Ptr [][3]uintptr
-	s4Ptr [][4]uintptr
-)
-
-// BenchmarkAppendInPlace tests the performance of append
-// when the result is being written back to the same slice.
-// In order for the in-place optimization to occur,
-// the slice must be referred to by address;
-// using a global is an easy way to trigger that.
-// We test the "grow" and "no grow" paths separately,
-// but not the "normal" (occasionally grow) path,
-// because it is a blend of the other two.
-// We use small numbers and small sizes in an attempt
-// to avoid benchmarking memory allocation and copying.
-// We use scalars instead of pointers in an attempt
-// to avoid benchmarking the write barriers.
-// We benchmark four common sizes (byte, pointer, string/interface, slice),
-// and one larger size.
-func BenchmarkAppendInPlace(b *testing.B) {
-	b.Run("NoGrow", func(b *testing.B) {
-		const C = 128
-
-		b.Run("Byte", func(b *testing.B) {
-			for i := 0; i < b.N; i++ {
-				sByte = make([]byte, C)
-				for j := 0; j < C; j++ {
-					sByte = append(sByte, 0x77)
-				}
-			}
-		})
-
-		b.Run("1Ptr", func(b *testing.B) {
-			for i := 0; i < b.N; i++ {
-				s1Ptr = make([]uintptr, C)
-				for j := 0; j < C; j++ {
-					s1Ptr = append(s1Ptr, 0x77)
-				}
-			}
-		})
-
-		b.Run("2Ptr", func(b *testing.B) {
-			for i := 0; i < b.N; i++ {
-				s2Ptr = make([][2]uintptr, C)
-				for j := 0; j < C; j++ {
-					s2Ptr = append(s2Ptr, [2]uintptr{0x77, 0x88})
-				}
-			}
-		})
-
-		b.Run("3Ptr", func(b *testing.B) {
-			for i := 0; i < b.N; i++ {
-				s3Ptr = make([][3]uintptr, C)
-				for j := 0; j < C; j++ {
-					s3Ptr = append(s3Ptr, [3]uintptr{0x77, 0x88, 0x99})
-				}
-			}
-		})
-
-		b.Run("4Ptr", func(b *testing.B) {
-			for i := 0; i < b.N; i++ {
-				s4Ptr = make([][4]uintptr, C)
-				for j := 0; j < C; j++ {
-					s4Ptr = append(s4Ptr, [4]uintptr{0x77, 0x88, 0x99, 0xAA})
-				}
-			}
-		})
-
-	})
-
-	b.Run("Grow", func(b *testing.B) {
-		const C = 5
-
-		b.Run("Byte", func(b *testing.B) {
-			for i := 0; i < b.N; i++ {
-				sByte = make([]byte, 0)
-				for j := 0; j < C; j++ {
-					sByte = append(sByte, 0x77)
-					sByte = sByte[:cap(sByte)]
-				}
-			}
-		})
-
-		b.Run("1Ptr", func(b *testing.B) {
-			for i := 0; i < b.N; i++ {
-				s1Ptr = make([]uintptr, 0)
-				for j := 0; j < C; j++ {
-					s1Ptr = append(s1Ptr, 0x77)
-					s1Ptr = s1Ptr[:cap(s1Ptr)]
-				}
-			}
-		})
-
-		b.Run("2Ptr", func(b *testing.B) {
-			for i := 0; i < b.N; i++ {
-				s2Ptr = make([][2]uintptr, 0)
-				for j := 0; j < C; j++ {
-					s2Ptr = append(s2Ptr, [2]uintptr{0x77, 0x88})
-					s2Ptr = s2Ptr[:cap(s2Ptr)]
-				}
-			}
-		})
-
-		b.Run("3Ptr", func(b *testing.B) {
-			for i := 0; i < b.N; i++ {
-				s3Ptr = make([][3]uintptr, 0)
-				for j := 0; j < C; j++ {
-					s3Ptr = append(s3Ptr, [3]uintptr{0x77, 0x88, 0x99})
-					s3Ptr = s3Ptr[:cap(s3Ptr)]
-				}
-			}
-		})
-
-		b.Run("4Ptr", func(b *testing.B) {
-			for i := 0; i < b.N; i++ {
-				s4Ptr = make([][4]uintptr, 0)
-				for j := 0; j < C; j++ {
-					s4Ptr = append(s4Ptr, [4]uintptr{0x77, 0x88, 0x99, 0xAA})
-					s4Ptr = s4Ptr[:cap(s4Ptr)]
-				}
-			}
-		})
-
-	})
-}
diff --git a/libgo/go/runtime/atomic_pointer.go b/libgo/go/runtime/atomic_pointer.go
index b66ef58..2d023d3 100644
--- a/libgo/go/runtime/atomic_pointer.go
+++ b/libgo/go/runtime/atomic_pointer.go
@@ -16,11 +16,24 @@ import (
 // Instead, these are wrappers around the actual atomics (casp1 and so on)
 // that use noescape to convey which arguments do not escape.
 
+// atomicwb performs a write barrier before an atomic pointer write.
+// The caller should guard the call with "if writeBarrier.enabled".
+//
+//go:nosplit
+func atomicwb(ptr *unsafe.Pointer, new unsafe.Pointer) {
+	slot := (*uintptr)(unsafe.Pointer(ptr))
+	if !getg().m.p.ptr().wbBuf.putFast(*slot, uintptr(new)) {
+		wbBufFlush(slot, uintptr(new))
+	}
+}
+
 // atomicstorep performs *ptr = new atomically and invokes a write barrier.
 //
 //go:nosplit
 func atomicstorep(ptr unsafe.Pointer, new unsafe.Pointer) {
-	writebarrierptr_prewrite((*uintptr)(ptr), uintptr(new))
+	if writeBarrier.enabled {
+		atomicwb((*unsafe.Pointer)(ptr), new)
+	}
 	atomic.StorepNoWB(noescape(ptr), new)
 }
 
@@ -29,7 +42,9 @@ func casp(ptr *unsafe.Pointer, old, new unsafe.Pointer) bool {
 	// The write barrier is only necessary if the CAS succeeds,
 	// but since it needs to happen before the write becomes
 	// public, we have to do it conservatively all the time.
-	writebarrierptr_prewrite((*uintptr)(unsafe.Pointer(ptr)), uintptr(new))
+	if writeBarrier.enabled {
+		atomicwb(ptr, new)
+	}
 	return atomic.Casp1((*unsafe.Pointer)(noescape(unsafe.Pointer(ptr))), noescape(old), new)
 }
 
@@ -43,7 +58,9 @@ func sync_atomic_StoreUintptr(ptr *uintptr, new uintptr)
 //go:linkname sync_atomic_StorePointer sync_atomic.StorePointer
 //go:nosplit
 func sync_atomic_StorePointer(ptr *unsafe.Pointer, new unsafe.Pointer) {
-	writebarrierptr_prewrite((*uintptr)(unsafe.Pointer(ptr)), uintptr(new))
+	if writeBarrier.enabled {
+		atomicwb(ptr, new)
+	}
 	sync_atomic_StoreUintptr((*uintptr)(unsafe.Pointer(ptr)), uintptr(new))
 }
 
@@ -53,7 +70,9 @@ func sync_atomic_SwapUintptr(ptr *uintptr, new uintptr) uintptr
 //go:linkname sync_atomic_SwapPointer sync_atomic.SwapPointer
 //go:nosplit
 func sync_atomic_SwapPointer(ptr *unsafe.Pointer, new unsafe.Pointer) unsafe.Pointer {
-	writebarrierptr_prewrite((*uintptr)(unsafe.Pointer(ptr)), uintptr(new))
+	if writeBarrier.enabled {
+		atomicwb(ptr, new)
+	}
 	old := unsafe.Pointer(sync_atomic_SwapUintptr((*uintptr)(noescape(unsafe.Pointer(ptr))), uintptr(new)))
 	return old
 }
@@ -64,6 +83,8 @@ func sync_atomic_CompareAndSwapUintptr(ptr *uintptr, old, new uintptr) bool
 //go:linkname sync_atomic_CompareAndSwapPointer sync_atomic.CompareAndSwapPointer
 //go:nosplit
 func sync_atomic_CompareAndSwapPointer(ptr *unsafe.Pointer, old, new unsafe.Pointer) bool {
-	writebarrierptr_prewrite((*uintptr)(unsafe.Pointer(ptr)), uintptr(new))
+	if writeBarrier.enabled {
+		atomicwb(ptr, new)
+	}
 	return sync_atomic_CompareAndSwapUintptr((*uintptr)(noescape(unsafe.Pointer(ptr))), uintptr(old), uintptr(new))
 }
diff --git a/libgo/go/runtime/auxv_none.go b/libgo/go/runtime/auxv_none.go
new file mode 100644
index 0000000..3ca617b
--- /dev/null
+++ b/libgo/go/runtime/auxv_none.go
@@ -0,0 +1,15 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !linux
+// +build !darwin
+// +build !dragonfly
+// +build !freebsd
+// +build !netbsd
+// +build !solaris
+
+package runtime
+
+func sysargs(argc int32, argv **byte) {
+}
diff --git a/libgo/go/runtime/cgocall.go b/libgo/go/runtime/cgocall.go
index 67b2bce..24bf749 100644
--- a/libgo/go/runtime/cgocall.go
+++ b/libgo/go/runtime/cgocall.go
@@ -212,22 +212,13 @@ func cgoCheckArg(t *_type, p unsafe.Pointer, indir, top bool, msg string) {
 // pointer into Go memory. If it does, we panic.
 // The return values are unused but useful to see in panic tracebacks.
 func cgoCheckUnknownPointer(p unsafe.Pointer, msg string) (base, i uintptr) {
-	if cgoInRange(p, mheap_.arena_start, mheap_.arena_used) {
-		if !inheap(uintptr(p)) {
-			// On 32-bit systems it is possible for C's allocated memory
-			// to have addresses between arena_start and arena_used.
-			// Either this pointer is a stack or an unused span or it's
-			// a C allocation. Escape analysis should prevent the first,
-			// garbage collection should prevent the second,
-			// and the third is completely OK.
-			return
-		}
-
-		b, hbits, span, _ := heapBitsForObject(uintptr(p), 0, 0, false)
+	if inheap(uintptr(p)) {
+		b, span, _ := findObject(uintptr(p), 0, 0, false)
 		base = b
 		if base == 0 {
 			return
 		}
+		hbits := heapBitsForAddr(base)
 		n := span.elemsize
 		for i = uintptr(0); i < n; i += sys.PtrSize {
 			if i != 1*sys.PtrSize && !hbits.morePointers() {
diff --git a/libgo/go/runtime/cgocheck.go b/libgo/go/runtime/cgocheck.go
index b85b519..d896fb7 100644
--- a/libgo/go/runtime/cgocheck.go
+++ b/libgo/go/runtime/cgocheck.go
@@ -126,9 +126,7 @@ func cgoCheckTypedBlock(typ *_type, src unsafe.Pointer, off, size uintptr) {
 		roots = roots.next
 	}
 
-	aoff := uintptr(src) - mheap_.arena_start
-	idx := aoff >> _PageShift
-	s := mheap_.spans[idx]
+	s := spanOfUnchecked(uintptr(src))
 	if s.state == _MSpanManual {
 		// There are no heap bits for value stored on the stack.
 		// For a channel receive src might be on the stack of some
@@ -151,9 +149,7 @@ func cgoCheckTypedBlock(typ *_type, src unsafe.Pointer, off, size uintptr) {
 		if i >= off && bits&bitPointer != 0 {
 			v := *(*unsafe.Pointer)(add(src, i))
 			if cgoIsGoPointer(v) {
-				systemstack(func() {
-					throw(cgoWriteBarrierFail)
-				})
+				throw(cgoWriteBarrierFail)
 			}
 		}
 		hbits = hbits.next()
@@ -186,9 +182,7 @@ func cgoCheckBits(src unsafe.Pointer, gcbits *byte, off, size uintptr) {
 			if bits&1 != 0 {
 				v := *(*unsafe.Pointer)(add(src, i))
 				if cgoIsGoPointer(v) {
-					systemstack(func() {
-						throw(cgoWriteBarrierFail)
-					})
+					throw(cgoWriteBarrierFail)
 				}
 			}
 		}
diff --git a/libgo/go/runtime/chan.go b/libgo/go/runtime/chan.go
index 87f7879..88a8944 100644
--- a/libgo/go/runtime/chan.go
+++ b/libgo/go/runtime/chan.go
@@ -88,7 +88,7 @@ func makechan(t *chantype, size int) *hchan {
 		throw("makechan: bad alignment")
 	}
 
-	if size < 0 || uintptr(size) > maxSliceCap(elem.size) || uintptr(size)*elem.size > _MaxMem-hchanSize {
+	if size < 0 || uintptr(size) > maxSliceCap(elem.size) || uintptr(size)*elem.size > maxAlloc-hchanSize {
 		panic(plainError("makechan: size out of range"))
 	}
 
@@ -157,7 +157,7 @@ func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool {
 		if !block {
 			return false
 		}
-		gopark(nil, nil, "chan send (nil chan)", traceEvGoStop, 2)
+		gopark(nil, nil, waitReasonChanSendNilChan, traceEvGoStop, 2)
 		throw("unreachable")
 	}
 
@@ -246,7 +246,7 @@ func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool {
 	gp.waiting = mysg
 	gp.param = nil
 	c.sendq.enqueue(mysg)
-	goparkunlock(&c.lock, "chan send", traceEvGoBlockSend, 3)
+	goparkunlock(&c.lock, waitReasonChanSend, traceEvGoBlockSend, 3)
 
 	// someone woke us up.
 	if mysg != gp.waiting {
@@ -325,6 +325,8 @@ func sendDirect(t *_type, sg *sudog, src unsafe.Pointer) {
 	// So make sure that no preemption points can happen between read & use.
 	dst := sg.elem
 	typeBitsBulkBarrier(t, uintptr(dst), uintptr(src), t.size)
+	// No need for cgo write barrier checks because dst is always
+	// Go memory.
 	memmove(dst, src, t.size)
 }
 
@@ -444,7 +446,7 @@ func chanrecv(c *hchan, ep unsafe.Pointer, block bool) (selected, received bool)
 		if !block {
 			return
 		}
-		gopark(nil, nil, "chan receive (nil chan)", traceEvGoStop, 2)
+		gopark(nil, nil, waitReasonChanReceiveNilChan, traceEvGoStop, 2)
 		throw("unreachable")
 	}
 
@@ -535,7 +537,7 @@ func chanrecv(c *hchan, ep unsafe.Pointer, block bool) (selected, received bool)
 	mysg.c = c
 	gp.param = nil
 	c.recvq.enqueue(mysg)
-	goparkunlock(&c.lock, "chan receive", traceEvGoBlockRecv, 3)
+	goparkunlock(&c.lock, waitReasonChanReceive, traceEvGoBlockRecv, 3)
 
 	// someone woke us up
 	if mysg != gp.waiting {
diff --git a/libgo/go/runtime/chanbarrier_test.go b/libgo/go/runtime/chanbarrier_test.go
index b6029fb..d479574 100644
--- a/libgo/go/runtime/chanbarrier_test.go
+++ b/libgo/go/runtime/chanbarrier_test.go
@@ -57,7 +57,7 @@ func testChanSendBarrier(useSelect bool) {
 	var globalMu sync.Mutex
 	outer := 100
 	inner := 100000
-	if testing.Short() {
+	if testing.Short() || runtime.GOARCH == "wasm" {
 		outer = 10
 		inner = 1000
 	}
diff --git a/libgo/go/runtime/cputicks.go b/libgo/go/runtime/cputicks.go
index 7e62dc1..c41a58b 100644
--- a/libgo/go/runtime/cputicks.go
+++ b/libgo/go/runtime/cputicks.go
@@ -2,6 +2,14 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+// // +build !arm
+// // +build !arm64
+// // +build !mips64
+// // +build !mips64le
+// // +build !mips
+// // +build !mipsle
+// // +build !wasm
+
 package runtime
 
 // careful: cputicks is not guaranteed to be monotonic! In particular, we have
diff --git a/libgo/go/runtime/crash_cgo_test.go b/libgo/go/runtime/crash_cgo_test.go
index 6688b3c..770f85e 100644
--- a/libgo/go/runtime/crash_cgo_test.go
+++ b/libgo/go/runtime/crash_cgo_test.go
@@ -89,19 +89,6 @@ func TestCgoExternalThreadSIGPROF(t *testing.T) {
 	switch runtime.GOOS {
 	case "plan9", "windows":
 		t.Skipf("no pthreads on %s", runtime.GOOS)
-	case "darwin":
-		if runtime.GOARCH != "arm" && runtime.GOARCH != "arm64" {
-			// static constructor needs external linking, but we don't support
-			// external linking on OS X 10.6.
-			out, err := exec.Command("uname", "-r").Output()
-			if err != nil {
-				t.Fatalf("uname -r failed: %v", err)
-			}
-			// OS X 10.6 == Darwin 10.x
-			if strings.HasPrefix(string(out), "10.") {
-				t.Skipf("no external linking on OS X 10.6")
-			}
-		}
 	}
 	if runtime.GOARCH == "ppc64" {
 		// TODO(austin) External linking not implemented on
@@ -252,8 +239,12 @@ func TestCgoCCodeSIGPROF(t *testing.T) {
 
 func TestCgoCrashTraceback(t *testing.T) {
 	t.Parallel()
-	if runtime.GOOS != "linux" || (runtime.GOARCH != "amd64" && runtime.GOARCH != "ppc64le") {
-		t.Skipf("not yet supported on %s/%s", runtime.GOOS, runtime.GOARCH)
+	switch platform := runtime.GOOS + "/" + runtime.GOARCH; platform {
+	case "darwin/amd64":
+	case "linux/amd64":
+	case "linux/ppc64le":
+	default:
+		t.Skipf("not yet supported on %s", platform)
 	}
 	if runtime.Compiler == "gccgo" {
 		t.Skip("gccgo does not have SetCgoTraceback")
@@ -352,7 +343,7 @@ func TestCgoPprofThreadNoTraceback(t *testing.T) {
 }
 
 func TestRaceProf(t *testing.T) {
-	if runtime.GOOS != "linux" || runtime.GOARCH != "amd64" {
+	if (runtime.GOOS != "linux" && runtime.GOOS != "freebsd") || runtime.GOARCH != "amd64" {
 		t.Skipf("not yet supported on %s/%s", runtime.GOOS, runtime.GOARCH)
 	}
 	if runtime.Compiler == "gccgo" {
@@ -384,7 +375,7 @@ func TestRaceProf(t *testing.T) {
 
 func TestRaceSignal(t *testing.T) {
 	t.Parallel()
-	if runtime.GOOS != "linux" || runtime.GOARCH != "amd64" {
+	if (runtime.GOOS != "linux" && runtime.GOOS != "freebsd") || runtime.GOARCH != "amd64" {
 		t.Skipf("not yet supported on %s/%s", runtime.GOOS, runtime.GOARCH)
 	}
 
@@ -514,3 +505,19 @@ func TestCgoTracebackSigpanic(t *testing.T) {
 		t.Fatalf("failure incorrectly contains %q. output:\n%s\n", nowant, got)
 	}
 }
+
+// Test that C code called via cgo can use large Windows thread stacks
+// and call back in to Go without crashing. See issue #20975.
+//
+// See also TestBigStackCallbackSyscall.
+func TestBigStackCallbackCgo(t *testing.T) {
+	if runtime.GOOS != "windows" {
+		t.Skip("skipping windows specific test")
+	}
+	t.Parallel()
+	got := runTestProg(t, "testprogcgo", "BigStack")
+	want := "OK\n"
+	if got != want {
+		t.Errorf("expected %q got %v", want, got)
+	}
+}
diff --git a/libgo/go/runtime/crash_gccgo_test.go b/libgo/go/runtime/crash_gccgo_test.go
index c216e54..d4a826e 100644
--- a/libgo/go/runtime/crash_gccgo_test.go
+++ b/libgo/go/runtime/crash_gccgo_test.go
@@ -38,8 +38,8 @@ func TestGccgoCrashTracebackNodebug(t *testing.T) {
 	}
 
 	cc := strings.Fields(os.Getenv("CC"))
-	cc = append(cc, "-x", "c++", "-")
-	out, _ := exec.Command(cc[0], cc[1:]...).CombinedOutput()
+	cc = append(cc, "-o", os.DevNull, "-x", "c++", "-")
+	out, _ := testenv.CleanCmdEnv(exec.Command(cc[0], cc[1:]...)).CombinedOutput()
 	if bytes.Contains(out, []byte("error trying to exec 'cc1plus'")) {
 		t.Skip("no C++ compiler")
 	}
diff --git a/libgo/go/runtime/crash_nonunix_test.go b/libgo/go/runtime/crash_nonunix_test.go
index 2ce995c..bf349a5 100644
--- a/libgo/go/runtime/crash_nonunix_test.go
+++ b/libgo/go/runtime/crash_nonunix_test.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build windows plan9 nacl
+// +build windows plan9 nacl js,wasm
 
 package runtime_test
 
diff --git a/libgo/go/runtime/crash_test.go b/libgo/go/runtime/crash_test.go
index 602630d..91a5c16 100644
--- a/libgo/go/runtime/crash_test.go
+++ b/libgo/go/runtime/crash_test.go
@@ -652,3 +652,112 @@ func TestBadTraceback(t *testing.T) {
 		}
 	}
 }
+
+func TestTimePprof(t *testing.T) {
+	if runtime.Compiler == "gccgo" {
+		t.Skip("gccgo may not have the pprof tool")
+	}
+	fn := runTestProg(t, "testprog", "TimeProf")
+	fn = strings.TrimSpace(fn)
+	defer os.Remove(fn)
+
+	cmd := testenv.CleanCmdEnv(exec.Command(testenv.GoToolPath(t), "tool", "pprof", "-top", "-nodecount=1", fn))
+	cmd.Env = append(cmd.Env, "PPROF_TMPDIR="+os.TempDir())
+	top, err := cmd.CombinedOutput()
+	t.Logf("%s", top)
+	if err != nil {
+		t.Error(err)
+	} else if bytes.Contains(top, []byte("ExternalCode")) {
+		t.Error("profiler refers to ExternalCode")
+	}
+}
+
+// Test that runtime.abort does so.
+func TestAbort(t *testing.T) {
+	// Pass GOTRACEBACK to ensure we get runtime frames.
+	output := runTestProg(t, "testprog", "Abort", "GOTRACEBACK=system")
+	if want := "runtime.abort"; !strings.Contains(output, want) {
+		t.Errorf("output does not contain %q:\n%s", want, output)
+	}
+	if strings.Contains(output, "BAD") {
+		t.Errorf("output contains BAD:\n%s", output)
+	}
+	// Check that it's a signal traceback.
+	want := "PC="
+	// For systems that use a breakpoint, check specifically for that.
+	if runtime.Compiler == "gc" {
+		switch runtime.GOARCH {
+		case "386", "amd64":
+			switch runtime.GOOS {
+			case "plan9":
+				want = "sys: breakpoint"
+			case "windows":
+				want = "Exception 0x80000003"
+			default:
+				want = "SIGTRAP"
+			}
+		}
+	}
+	if !strings.Contains(output, want) {
+		t.Errorf("output does not contain %q:\n%s", want, output)
+	}
+}
+
+// For TestRuntimePanic: test a panic in the runtime package without
+// involving the testing harness.
+func init() {
+	if os.Getenv("GO_TEST_RUNTIME_PANIC") == "1" {
+		defer func() {
+			if r := recover(); r != nil {
+				// We expect to crash, so exit 0
+				// to indicate failure.
+				os.Exit(0)
+			}
+		}()
+		runtime.PanicForTesting(nil, 1)
+		// We expect to crash, so exit 0 to indicate failure.
+		os.Exit(0)
+	}
+}
+
+func TestRuntimePanic(t *testing.T) {
+	testenv.MustHaveExec(t)
+	cmd := testenv.CleanCmdEnv(exec.Command(os.Args[0], "-test.run=TestRuntimePanic"))
+	cmd.Env = append(cmd.Env, "GO_TEST_RUNTIME_PANIC=1")
+	out, err := cmd.CombinedOutput()
+	t.Logf("%s", out)
+	if err == nil {
+		t.Error("child process did not fail")
+	} else if want := "runtime.unexportedPanicForTesting"; !bytes.Contains(out, []byte(want)) {
+		t.Errorf("output did not contain expected string %q", want)
+	}
+}
+
+// Test that g0 stack overflows are handled gracefully.
+func TestG0StackOverflow(t *testing.T) {
+	testenv.MustHaveExec(t)
+
+	switch runtime.GOOS {
+	case "darwin", "dragonfly", "freebsd", "linux", "netbsd", "openbsd", "android":
+		t.Skipf("g0 stack is wrong on pthread platforms (see golang.org/issue/26061)")
+	}
+
+	if os.Getenv("TEST_G0_STACK_OVERFLOW") != "1" {
+		cmd := testenv.CleanCmdEnv(exec.Command(os.Args[0], "-test.run=TestG0StackOverflow", "-test.v"))
+		cmd.Env = append(cmd.Env, "TEST_G0_STACK_OVERFLOW=1")
+		out, err := cmd.CombinedOutput()
+		// Don't check err since it's expected to crash.
+		if n := strings.Count(string(out), "morestack on g0\n"); n != 1 {
+			t.Fatalf("%s\n(exit status %v)", out, err)
+		}
+		// Check that it's a signal-style traceback.
+		if runtime.GOOS != "windows" {
+			if want := "PC="; !strings.Contains(string(out), want) {
+				t.Errorf("output does not contain %q:\n%s", want, out)
+			}
+		}
+		return
+	}
+
+	runtime.G0StackOverflow()
+}
diff --git a/libgo/go/runtime/debug.go b/libgo/go/runtime/debug.go
index 7cddd29..5be2ec4 100644
--- a/libgo/go/runtime/debug.go
+++ b/libgo/go/runtime/debug.go
@@ -15,6 +15,10 @@ import (
 // The number of logical CPUs on the local machine can be queried with NumCPU.
 // This call will go away when the scheduler improves.
 func GOMAXPROCS(n int) int {
+	if GOARCH == "wasm" && n > 1 {
+		n = 1 // WebAssembly has no threads yet, so only one CPU is possible.
+	}
+
 	lock(&sched.lock)
 	ret := int(gomaxprocs)
 	unlock(&sched.lock)
diff --git a/libgo/go/runtime/debug/heapdump_test.go b/libgo/go/runtime/debug/heapdump_test.go
index 7d5b950..c986efc 100644
--- a/libgo/go/runtime/debug/heapdump_test.go
+++ b/libgo/go/runtime/debug/heapdump_test.go
@@ -13,8 +13,8 @@ import (
 )
 
 func TestWriteHeapDumpNonempty(t *testing.T) {
-	if runtime.GOOS == "nacl" {
-		t.Skip("WriteHeapDump is not available on NaCl.")
+	if runtime.GOOS == "nacl" || runtime.GOOS == "js" {
+		t.Skipf("WriteHeapDump is not available on %s.", runtime.GOOS)
 	}
 	f, err := ioutil.TempFile("", "heapdumptest")
 	if err != nil {
@@ -42,8 +42,8 @@ func objfin(x *Obj) {
 }
 
 func TestWriteHeapDumpFinalizers(t *testing.T) {
-	if runtime.GOOS == "nacl" {
-		t.Skip("WriteHeapDump is not available on NaCl.")
+	if runtime.GOOS == "nacl" || runtime.GOOS == "js" {
+		t.Skipf("WriteHeapDump is not available on %s.", runtime.GOOS)
 	}
 	f, err := ioutil.TempFile("", "heapdumptest")
 	if err != nil {
diff --git a/libgo/go/runtime/debug_test.go b/libgo/go/runtime/debug_test.go
new file mode 100644
index 0000000..38c764f
--- /dev/null
+++ b/libgo/go/runtime/debug_test.go
@@ -0,0 +1,207 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// TODO: This test could be implemented on all (most?) UNIXes if we
+// added syscall.Tgkill more widely.
+
+// We skip all of these tests under race mode because our test thread
+// spends all of its time in the race runtime, which isn't a safe
+// point.
+
+// +build ignore_for_gccgo
+// +build amd64
+// +build linux
+// +build !race
+
+package runtime_test
+
+import (
+	"fmt"
+	"runtime"
+	"runtime/debug"
+	"sync/atomic"
+	"syscall"
+	"testing"
+)
+
+func startDebugCallWorker(t *testing.T) (g *runtime.G, after func()) {
+	// This can deadlock if there aren't enough threads or if a GC
+	// tries to interrupt an atomic loop (see issue #10958).
+	ogomaxprocs := runtime.GOMAXPROCS(2)
+	ogcpercent := debug.SetGCPercent(-1)
+
+	ready := make(chan *runtime.G)
+	var stop uint32
+	done := make(chan error)
+	go debugCallWorker(ready, &stop, done)
+	g = <-ready
+	return g, func() {
+		atomic.StoreUint32(&stop, 1)
+		err := <-done
+		if err != nil {
+			t.Fatal(err)
+		}
+		runtime.GOMAXPROCS(ogomaxprocs)
+		debug.SetGCPercent(ogcpercent)
+	}
+}
+
+func debugCallWorker(ready chan<- *runtime.G, stop *uint32, done chan<- error) {
+	runtime.LockOSThread()
+	defer runtime.UnlockOSThread()
+
+	ready <- runtime.Getg()
+
+	x := 2
+	debugCallWorker2(stop, &x)
+	if x != 1 {
+		done <- fmt.Errorf("want x = 2, got %d; register pointer not adjusted?", x)
+	}
+	close(done)
+}
+
+func debugCallWorker2(stop *uint32, x *int) {
+	for atomic.LoadUint32(stop) == 0 {
+		// Strongly encourage x to live in a register so we
+		// can test pointer register adjustment.
+		*x++
+	}
+	*x = 1
+}
+
+func debugCallTKill(tid int) error {
+	return syscall.Tgkill(syscall.Getpid(), tid, syscall.SIGTRAP)
+}
+
+func TestDebugCall(t *testing.T) {
+	g, after := startDebugCallWorker(t)
+	defer after()
+
+	// Inject a call into the debugCallWorker goroutine and test
+	// basic argument and result passing.
+	var args struct {
+		x    int
+		yRet int
+	}
+	fn := func(x int) (yRet int) {
+		return x + 1
+	}
+	args.x = 42
+	if _, err := runtime.InjectDebugCall(g, fn, &args, debugCallTKill); err != nil {
+		t.Fatal(err)
+	}
+	if args.yRet != 43 {
+		t.Fatalf("want 43, got %d", args.yRet)
+	}
+}
+
+func TestDebugCallLarge(t *testing.T) {
+	g, after := startDebugCallWorker(t)
+	defer after()
+
+	// Inject a call with a large call frame.
+	const N = 128
+	var args struct {
+		in  [N]int
+		out [N]int
+	}
+	fn := func(in [N]int) (out [N]int) {
+		for i := range in {
+			out[i] = in[i] + 1
+		}
+		return
+	}
+	var want [N]int
+	for i := range args.in {
+		args.in[i] = i
+		want[i] = i + 1
+	}
+	if _, err := runtime.InjectDebugCall(g, fn, &args, debugCallTKill); err != nil {
+		t.Fatal(err)
+	}
+	if want != args.out {
+		t.Fatalf("want %v, got %v", want, args.out)
+	}
+}
+
+func TestDebugCallGC(t *testing.T) {
+	g, after := startDebugCallWorker(t)
+	defer after()
+
+	// Inject a call that performs a GC.
+	if _, err := runtime.InjectDebugCall(g, runtime.GC, nil, debugCallTKill); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestDebugCallGrowStack(t *testing.T) {
+	g, after := startDebugCallWorker(t)
+	defer after()
+
+	// Inject a call that grows the stack. debugCallWorker checks
+	// for stack pointer breakage.
+	if _, err := runtime.InjectDebugCall(g, func() { growStack(nil) }, nil, debugCallTKill); err != nil {
+		t.Fatal(err)
+	}
+}
+
+//go:nosplit
+func debugCallUnsafePointWorker(gpp **runtime.G, ready, stop *uint32) {
+	// The nosplit causes this function to not contain safe-points
+	// except at calls.
+	runtime.LockOSThread()
+	defer runtime.UnlockOSThread()
+
+	*gpp = runtime.Getg()
+
+	for atomic.LoadUint32(stop) == 0 {
+		atomic.StoreUint32(ready, 1)
+	}
+}
+
+func TestDebugCallUnsafePoint(t *testing.T) {
+	// This can deadlock if there aren't enough threads or if a GC
+	// tries to interrupt an atomic loop (see issue #10958).
+	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(2))
+	defer debug.SetGCPercent(debug.SetGCPercent(-1))
+
+	// Test that the runtime refuses call injection at unsafe points.
+	var g *runtime.G
+	var ready, stop uint32
+	defer atomic.StoreUint32(&stop, 1)
+	go debugCallUnsafePointWorker(&g, &ready, &stop)
+	for atomic.LoadUint32(&ready) == 0 {
+		runtime.Gosched()
+	}
+
+	_, err := runtime.InjectDebugCall(g, func() {}, nil, debugCallTKill)
+	if msg := "call not at safe point"; err == nil || err.Error() != msg {
+		t.Fatalf("want %q, got %s", msg, err)
+	}
+}
+
+func TestDebugCallPanic(t *testing.T) {
+	// This can deadlock if there aren't enough threads.
+	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(2))
+
+	ready := make(chan *runtime.G)
+	var stop uint32
+	defer atomic.StoreUint32(&stop, 1)
+	go func() {
+		runtime.LockOSThread()
+		defer runtime.UnlockOSThread()
+		ready <- runtime.Getg()
+		for atomic.LoadUint32(&stop) == 0 {
+		}
+	}()
+	g := <-ready
+
+	p, err := runtime.InjectDebugCall(g, func() { panic("test") }, nil, debugCallTKill)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if ps, ok := p.(string); !ok || ps != "test" {
+		t.Fatalf("wanted panic %v, got %v", "test", p)
+	}
+}
diff --git a/libgo/go/runtime/env_posix.go b/libgo/go/runtime/env_posix.go
index ddf3c02..399e88f 100644
--- a/libgo/go/runtime/env_posix.go
+++ b/libgo/go/runtime/env_posix.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows
+// +build aix darwin dragonfly freebsd js,wasm linux nacl netbsd openbsd solaris windows
 
 package runtime
 
diff --git a/libgo/go/runtime/error.go b/libgo/go/runtime/error.go
index 1a038cf..b1a3f68 100644
--- a/libgo/go/runtime/error.go
+++ b/libgo/go/runtime/error.go
@@ -19,55 +19,40 @@ type Error interface {
 
 // A TypeAssertionError explains a failed type assertion.
 type TypeAssertionError struct {
-	interfaceString string
-	concreteString  string
-	assertedString  string
-	missingMethod   string // one method needed by Interface, missing from Concrete
+	_interface    *_type
+	concrete      *_type
+	asserted      *_type
+	missingMethod string // one method needed by Interface, missing from Concrete
 }
 
 func (*TypeAssertionError) RuntimeError() {}
 
 func (e *TypeAssertionError) Error() string {
-	inter := e.interfaceString
-	if inter == "" {
-		inter = "interface"
+	inter := "interface"
+	if e._interface != nil {
+		inter = e._interface.string()
 	}
-	if e.concreteString == "" {
-		return "interface conversion: " + inter + " is nil, not " + e.assertedString
+	as := e.asserted.string()
+	if e.concrete == nil {
+		return "interface conversion: " + inter + " is nil, not " + as
 	}
+	cs := e.concrete.string()
 	if e.missingMethod == "" {
-		return "interface conversion: " + inter + " is " + e.concreteString +
-			", not " + e.assertedString
+		msg := "interface conversion: " + inter + " is " + cs + ", not " + as
+		if cs == as {
+			// provide slightly clearer error message
+			if e.concrete.pkgpath() != e.asserted.pkgpath() {
+				msg += " (types from different packages)"
+			} else {
+				msg += " (types from different scopes)"
+			}
+		}
+		return msg
 	}
-	return "interface conversion: " + e.concreteString + " is not " + e.assertedString +
+	return "interface conversion: " + cs + " is not " + as +
 		": missing method " + e.missingMethod
 }
 
-// For calling from C.
-func NewTypeAssertionError(ps1, ps2, ps3 *string, pmeth *string, ret *interface{}) {
-	var s1, s2, s3, meth string
-
-	if ps1 != nil {
-		s1 = *ps1
-	}
-	if ps2 != nil {
-		s2 = *ps2
-	}
-	if ps3 != nil {
-		s3 = *ps3
-	}
-	if pmeth != nil {
-		meth = *pmeth
-	}
-
-	// For gccgo, strip out quoted strings.
-	s1 = unquote(s1)
-	s2 = unquote(s2)
-	s3 = unquote(s3)
-
-	*ret = &TypeAssertionError{s1, s2, s3, meth}
-}
-
 // Remove quoted strings from gccgo reflection strings.
 func unquote(s string) string {
 	ls := len(s)
@@ -135,7 +120,7 @@ type stringer interface {
 
 func typestring(x interface{}) string {
 	e := efaceOf(&x)
-	return *e._type.string
+	return e._type.string()
 }
 
 // printany prints an argument passed to panic.
diff --git a/libgo/go/runtime/export_debug_test.go b/libgo/go/runtime/export_debug_test.go
new file mode 100644
index 0000000..2d2d535
--- /dev/null
+++ b/libgo/go/runtime/export_debug_test.go
@@ -0,0 +1,169 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore_for_gccgo
+// +build amd64
+// +build linux
+
+package runtime
+
+import (
+	"runtime/internal/sys"
+	"unsafe"
+)
+
+// InjectDebugCall injects a debugger call to fn into g. args must be
+// a pointer to a valid call frame (including arguments and return
+// space) for fn, or nil. tkill must be a function that will send
+// SIGTRAP to thread ID tid. gp must be locked to its OS thread and
+// running.
+//
+// On success, InjectDebugCall returns the panic value of fn or nil.
+// If fn did not panic, its results will be available in args.
+func InjectDebugCall(gp *g, fn, args interface{}, tkill func(tid int) error) (interface{}, error) {
+	if gp.lockedm == 0 {
+		return nil, plainError("goroutine not locked to thread")
+	}
+
+	tid := int(gp.lockedm.ptr().procid)
+	if tid == 0 {
+		return nil, plainError("missing tid")
+	}
+
+	f := efaceOf(&fn)
+	if f._type == nil || f._type.kind&kindMask != kindFunc {
+		return nil, plainError("fn must be a function")
+	}
+	fv := (*funcval)(f.data)
+
+	a := efaceOf(&args)
+	if a._type != nil && a._type.kind&kindMask != kindPtr {
+		return nil, plainError("args must be a pointer or nil")
+	}
+	argp := a.data
+	var argSize uintptr
+	if argp != nil {
+		argSize = (*ptrtype)(unsafe.Pointer(a._type)).elem.size
+	}
+
+	h := new(debugCallHandler)
+	h.gp = gp
+	h.fv, h.argp, h.argSize = fv, argp, argSize
+	h.handleF = h.handle // Avoid allocating closure during signal
+	noteclear(&h.done)
+
+	defer func() { testSigtrap = nil }()
+	testSigtrap = h.inject
+	if err := tkill(tid); err != nil {
+		return nil, err
+	}
+	// Wait for completion.
+	notetsleepg(&h.done, -1)
+	if len(h.err) != 0 {
+		return nil, h.err
+	}
+	return h.panic, nil
+}
+
+type debugCallHandler struct {
+	gp      *g
+	fv      *funcval
+	argp    unsafe.Pointer
+	argSize uintptr
+	panic   interface{}
+
+	handleF func(info *siginfo, ctxt *sigctxt, gp2 *g) bool
+
+	err       plainError
+	done      note
+	savedRegs sigcontext
+	savedFP   fpstate1
+}
+
+func (h *debugCallHandler) inject(info *siginfo, ctxt *sigctxt, gp2 *g) bool {
+	switch h.gp.atomicstatus {
+	case _Grunning:
+		if getg().m != h.gp.m {
+			println("trap on wrong M", getg().m, h.gp.m)
+			return false
+		}
+		// Push current PC on the stack.
+		rsp := ctxt.rsp() - sys.PtrSize
+		*(*uint64)(unsafe.Pointer(uintptr(rsp))) = ctxt.rip()
+		ctxt.set_rsp(rsp)
+		// Write the argument frame size.
+		*(*uintptr)(unsafe.Pointer(uintptr(rsp - 16))) = h.argSize
+		// Save current registers.
+		h.savedRegs = *ctxt.regs()
+		h.savedFP = *h.savedRegs.fpstate
+		h.savedRegs.fpstate = nil
+		// Set PC to debugCallV1.
+		ctxt.set_rip(uint64(funcPC(debugCallV1)))
+	default:
+		h.err = plainError("goroutine in unexpected state at call inject")
+		return true
+	}
+	// Switch to the debugCall protocol and resume execution.
+	testSigtrap = h.handleF
+	return true
+}
+
+func (h *debugCallHandler) handle(info *siginfo, ctxt *sigctxt, gp2 *g) bool {
+	// Sanity check.
+	if getg().m != h.gp.m {
+		println("trap on wrong M", getg().m, h.gp.m)
+		return false
+	}
+	f := findfunc(uintptr(ctxt.rip()))
+	if !(hasprefix(funcname(f), "runtime.debugCall") || hasprefix(funcname(f), "debugCall")) {
+		println("trap in unknown function", funcname(f))
+		return false
+	}
+	if *(*byte)(unsafe.Pointer(uintptr(ctxt.rip() - 1))) != 0xcc {
+		println("trap at non-INT3 instruction pc =", hex(ctxt.rip()))
+		return false
+	}
+
+	switch status := ctxt.rax(); status {
+	case 0:
+		// Frame is ready. Copy the arguments to the frame.
+		sp := ctxt.rsp()
+		memmove(unsafe.Pointer(uintptr(sp)), h.argp, h.argSize)
+		// Push return PC.
+		sp -= sys.PtrSize
+		ctxt.set_rsp(sp)
+		*(*uint64)(unsafe.Pointer(uintptr(sp))) = ctxt.rip()
+		// Set PC to call and context register.
+		ctxt.set_rip(uint64(h.fv.fn))
+		ctxt.regs().rcx = uint64(uintptr(unsafe.Pointer(h.fv)))
+	case 1:
+		// Function returned. Copy frame back out.
+		sp := ctxt.rsp()
+		memmove(h.argp, unsafe.Pointer(uintptr(sp)), h.argSize)
+	case 2:
+		// Function panicked. Copy panic out.
+		sp := ctxt.rsp()
+		memmove(unsafe.Pointer(&h.panic), unsafe.Pointer(uintptr(sp)), 2*sys.PtrSize)
+	case 8:
+		// Call isn't safe. Get the reason.
+		sp := ctxt.rsp()
+		reason := *(*string)(unsafe.Pointer(uintptr(sp)))
+		h.err = plainError(reason)
+	case 16:
+		// Restore all registers except RIP and RSP.
+		rip, rsp := ctxt.rip(), ctxt.rsp()
+		fp := ctxt.regs().fpstate
+		*ctxt.regs() = h.savedRegs
+		ctxt.regs().fpstate = fp
+		*fp = h.savedFP
+		ctxt.set_rip(rip)
+		ctxt.set_rsp(rsp)
+		// Done
+		notewakeup(&h.done)
+	default:
+		h.err = plainError("unexpected debugCallV1 status")
+	}
+	// Resume execution.
+	return true
+}
diff --git a/libgo/go/runtime/export_linux_test.go b/libgo/go/runtime/export_linux_test.go
index 183a6ee..96ff1c7 100644
--- a/libgo/go/runtime/export_linux_test.go
+++ b/libgo/go/runtime/export_linux_test.go
@@ -6,5 +6,11 @@
 
 package runtime
 
-//var NewOSProc0 = newosproc0
-//var Mincore = mincore
+import "unsafe"
+
+// var NewOSProc0 = newosproc0
+// var Mincore = mincore
+
+func Epollctl(epfd, op, fd int32, ev unsafe.Pointer) int32 {
+	return epollctl(epfd, op, fd, (*epollevent)(ev))
+}
diff --git a/libgo/go/runtime/export_test.go b/libgo/go/runtime/export_test.go
index 5e798e3..7f4811c 100644
--- a/libgo/go/runtime/export_test.go
+++ b/libgo/go/runtime/export_test.go
@@ -21,7 +21,6 @@ import (
 //var Fcmp64 = fcmp64
 //var Fintto64 = fintto64
 //var F64toint = f64toint
-//var Sqrt = sqrt
 
 var Entersyscall = entersyscall
 var Exitsyscall = exitsyscall
@@ -372,6 +371,8 @@ func (rw *RWMutex) Unlock() {
 	rw.rw.unlock()
 }
 
+const RuntimeHmapSize = unsafe.Sizeof(hmap{})
+
 func MapBucketsCount(m map[int]int) int {
 	h := *(**hmap)(unsafe.Pointer(&m))
 	return 1 << h.B
@@ -395,3 +396,61 @@ func LockOSCounts() (external, internal uint32) {
 	}
 	return g.m.lockedExt, g.m.lockedInt
 }
+
+func KeepNArenaHints(n int) {
+	hint := mheap_.arenaHints
+	for i := 1; i < n; i++ {
+		hint = hint.next
+		if hint == nil {
+			return
+		}
+	}
+	hint.next = nil
+}
+
+// MapNextArenaHint reserves a page at the next arena growth hint,
+// preventing the arena from growing there, and returns the range of
+// addresses that are no longer viable.
+func MapNextArenaHint() (start, end uintptr) {
+	hint := mheap_.arenaHints
+	addr := hint.addr
+	if hint.down {
+		start, end = addr-heapArenaBytes, addr
+		addr -= physPageSize
+	} else {
+		start, end = addr, addr+heapArenaBytes
+	}
+	sysReserve(unsafe.Pointer(addr), physPageSize)
+	return
+}
+
+func GetNextArenaHint() uintptr {
+	return mheap_.arenaHints.addr
+}
+
+type G = g
+
+func Getg() *G {
+	return getg()
+}
+
+//go:noinline
+func PanicForTesting(b []byte, i int) byte {
+	return unexportedPanicForTesting(b, i)
+}
+
+//go:noinline
+func unexportedPanicForTesting(b []byte, i int) byte {
+	return b[i]
+}
+
+func G0StackOverflow() {
+	systemstack(func() {
+		stackOverflow(nil)
+	})
+}
+
+func stackOverflow(x *byte) {
+	var buf [256]byte
+	stackOverflow(&buf[0])
+}
diff --git a/libgo/go/runtime/extern.go b/libgo/go/runtime/extern.go
index b3afd10..c9d10f1 100644
--- a/libgo/go/runtime/extern.go
+++ b/libgo/go/runtime/extern.go
@@ -116,6 +116,12 @@ It is a comma-separated list of name=val pairs setting these named variables:
 	schedtrace: setting schedtrace=X causes the scheduler to emit a single line to standard
 	error every X milliseconds, summarizing the scheduler state.
 
+	tracebackancestors: setting tracebackancestors=N extends tracebacks with the stacks at
+	which goroutines were created, where N limits the number of ancestor goroutines to
+	report. This also extends the information returned by runtime.Stack. Ancestor's goroutine
+	IDs will refer to the ID of the goroutine at the time of creation; it's possible for this
+	ID to be reused for another goroutine. Setting N to 0 will report no ancestry information.
+
 The net and net/http packages also refer to debugging variables in GODEBUG.
 See the documentation for those packages for details.
 
diff --git a/libgo/go/runtime/gc_test.go b/libgo/go/runtime/gc_test.go
index a8c52d2..180919b 100644
--- a/libgo/go/runtime/gc_test.go
+++ b/libgo/go/runtime/gc_test.go
@@ -10,6 +10,7 @@ import (
 	"reflect"
 	"runtime"
 	"runtime/debug"
+	"sync"
 	"sync/atomic"
 	"testing"
 	"time"
@@ -44,7 +45,7 @@ func TestGcDeepNesting(t *testing.T) {
 	}
 }
 
-func TestGcHashmapIndirection(t *testing.T) {
+func TestGcMapIndirection(t *testing.T) {
 	defer debug.SetGCPercent(debug.SetGCPercent(1))
 	runtime.GC()
 	type T struct {
@@ -157,6 +158,10 @@ func TestHugeGCInfo(t *testing.T) {
 
 /*
 func TestPeriodicGC(t *testing.T) {
+	if runtime.GOARCH == "wasm" {
+		t.Skip("no sysmon on wasm yet")
+	}
+
 	// Make sure we're not in the middle of a GC.
 	runtime.GC()
 
@@ -642,3 +647,34 @@ func BenchmarkBulkWriteBarrier(b *testing.B) {
 
 	runtime.KeepAlive(ptrs)
 }
+
+func BenchmarkScanStackNoLocals(b *testing.B) {
+	var ready sync.WaitGroup
+	teardown := make(chan bool)
+	for j := 0; j < 10; j++ {
+		ready.Add(1)
+		go func() {
+			x := 100000
+			countpwg(&x, &ready, teardown)
+		}()
+	}
+	ready.Wait()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		b.StartTimer()
+		runtime.GC()
+		runtime.GC()
+		b.StopTimer()
+	}
+	close(teardown)
+}
+
+func countpwg(n *int, ready *sync.WaitGroup, teardown chan bool) {
+	if *n == 0 {
+		ready.Done()
+		<-teardown
+		return
+	}
+	*n--
+	countpwg(n, ready, teardown)
+}
diff --git a/libgo/go/runtime/gcinfo_test.go b/libgo/go/runtime/gcinfo_test.go
index 4ac67dc..ca012bb 100644
--- a/libgo/go/runtime/gcinfo_test.go
+++ b/libgo/go/runtime/gcinfo_test.go
@@ -133,7 +133,7 @@ func infoBigStruct() []byte {
 			typeScalar, typeScalar, typeScalar, typeScalar, // t int; y uint16; u uint64
 			typePointer, typeScalar, // i string
 		}
-	case "arm64", "amd64", "mips64", "mips64le", "ppc64", "ppc64le", "s390x":
+	case "arm64", "amd64", "mips64", "mips64le", "ppc64", "ppc64le", "s390x", "wasm":
 		return []byte{
 			typePointer,                        // q *int
 			typeScalar, typeScalar, typeScalar, // w byte; e [17]byte
@@ -186,6 +186,6 @@ var (
 
 	infoString = []byte{typePointer, typeScalar}
 	infoSlice  = []byte{typePointer, typeScalar, typeScalar}
-	infoEface  = []byte{typePointer, typePointer}
-	infoIface  = []byte{typePointer, typePointer}
+	infoEface  = []byte{typeScalar, typePointer}
+	infoIface  = []byte{typeScalar, typePointer}
 )
diff --git a/libgo/go/runtime/hash64.go b/libgo/go/runtime/hash64.go
index 74775a8..7c6513e 100644
--- a/libgo/go/runtime/hash64.go
+++ b/libgo/go/runtime/hash64.go
@@ -6,7 +6,7 @@
 //   xxhash: https://code.google.com/p/xxhash/
 // cityhash: https://code.google.com/p/cityhash/
 
-// +build amd64 amd64p32 arm64 mips64 mips64le ppc64 ppc64le s390x alpha arm64be ia64 mips64p32 mips64p32le sparc64 riscv64
+// +build amd64 amd64p32 arm64 mips64 mips64le ppc64 ppc64le s390x wasm alpha arm64be ia64 mips64p32 mips64p32le sparc64 riscv64
 
 package runtime
 
@@ -26,7 +26,8 @@ const (
 )
 
 func memhash(p unsafe.Pointer, seed, s uintptr) uintptr {
-	if GOARCH == "amd64" && GOOS != "nacl" && useAeshash {
+	if (GOARCH == "amd64" || GOARCH == "arm64") &&
+		GOOS != "nacl" && useAeshash {
 		return aeshash(p, seed, s)
 	}
 	h := uint64(seed + s*hashkey[0])
diff --git a/libgo/go/runtime/hash_test.go b/libgo/go/runtime/hash_test.go
index 54c9160..070edb6 100644
--- a/libgo/go/runtime/hash_test.go
+++ b/libgo/go/runtime/hash_test.go
@@ -161,6 +161,9 @@ func TestSmhasherZeros(t *testing.T) {
 
 // Strings with up to two nonzero bytes all have distinct hashes.
 func TestSmhasherTwoNonzero(t *testing.T) {
+	if GOARCH == "wasm" {
+		t.Skip("Too slow on wasm")
+	}
 	if testing.Short() {
 		t.Skip("Skipping in short mode")
 	}
@@ -229,6 +232,9 @@ func TestSmhasherCyclic(t *testing.T) {
 
 // Test strings with only a few bits set
 func TestSmhasherSparse(t *testing.T) {
+	if GOARCH == "wasm" {
+		t.Skip("Too slow on wasm")
+	}
 	if testing.Short() {
 		t.Skip("Skipping in short mode")
 	}
@@ -264,6 +270,9 @@ func setbits(h *HashSet, b []byte, i int, k int) {
 // Test all possible combinations of n blocks from the set s.
 // "permutation" is a bad name here, but it is what Smhasher uses.
 func TestSmhasherPermutation(t *testing.T) {
+	if GOARCH == "wasm" {
+		t.Skip("Too slow on wasm")
+	}
 	if testing.Short() {
 		t.Skip("Skipping in short mode")
 	}
@@ -433,6 +442,9 @@ func (k *IfaceKey) name() string {
 
 // Flipping a single bit of a key should flip each output bit with 50% probability.
 func TestSmhasherAvalanche(t *testing.T) {
+	if GOARCH == "wasm" {
+		t.Skip("Too slow on wasm")
+	}
 	if testing.Short() {
 		t.Skip("Skipping in short mode")
 	}
@@ -508,6 +520,9 @@ func TestSmhasherWindowed(t *testing.T) {
 	windowed(t, &BytesKey{make([]byte, 128)})
 }
 func windowed(t *testing.T, k Key) {
+	if GOARCH == "wasm" {
+		t.Skip("Too slow on wasm")
+	}
 	if testing.Short() {
 		t.Skip("Skipping in short mode")
 	}
diff --git a/libgo/go/runtime/hashmap.go b/libgo/go/runtime/hashmap.go
deleted file mode 100644
index 53b05b1..0000000
--- a/libgo/go/runtime/hashmap.go
+++ /dev/null
@@ -1,1298 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-// This file contains the implementation of Go's map type.
-//
-// A map is just a hash table. The data is arranged
-// into an array of buckets. Each bucket contains up to
-// 8 key/value pairs. The low-order bits of the hash are
-// used to select a bucket. Each bucket contains a few
-// high-order bits of each hash to distinguish the entries
-// within a single bucket.
-//
-// If more than 8 keys hash to a bucket, we chain on
-// extra buckets.
-//
-// When the hashtable grows, we allocate a new array
-// of buckets twice as big. Buckets are incrementally
-// copied from the old bucket array to the new bucket array.
-//
-// Map iterators walk through the array of buckets and
-// return the keys in walk order (bucket #, then overflow
-// chain order, then bucket index).  To maintain iteration
-// semantics, we never move keys within their bucket (if
-// we did, keys might be returned 0 or 2 times).  When
-// growing the table, iterators remain iterating through the
-// old table and must check the new table if the bucket
-// they are iterating through has been moved ("evacuated")
-// to the new table.
-
-// Picking loadFactor: too large and we have lots of overflow
-// buckets, too small and we waste a lot of space. I wrote
-// a simple program to check some stats for different loads:
-// (64-bit, 8 byte keys and values)
-//  loadFactor    %overflow  bytes/entry     hitprobe    missprobe
-//        4.00         2.13        20.77         3.00         4.00
-//        4.50         4.05        17.30         3.25         4.50
-//        5.00         6.85        14.77         3.50         5.00
-//        5.50        10.55        12.94         3.75         5.50
-//        6.00        15.27        11.67         4.00         6.00
-//        6.50        20.90        10.79         4.25         6.50
-//        7.00        27.14        10.15         4.50         7.00
-//        7.50        34.03         9.73         4.75         7.50
-//        8.00        41.10         9.40         5.00         8.00
-//
-// %overflow   = percentage of buckets which have an overflow bucket
-// bytes/entry = overhead bytes used per key/value pair
-// hitprobe    = # of entries to check when looking up a present key
-// missprobe   = # of entries to check when looking up an absent key
-//
-// Keep in mind this data is for maximally loaded tables, i.e. just
-// before the table grows. Typical tables will be somewhat less loaded.
-
-import (
-	"runtime/internal/atomic"
-	"runtime/internal/sys"
-	"unsafe"
-)
-
-// For gccgo, use go:linkname to rename compiler-called functions to
-// themselves, so that the compiler will export them.
-//
-//go:linkname makemap runtime.makemap
-//go:linkname makemap64 runtime.makemap64
-//go:linkname makemap_small runtime.makemap_small
-//go:linkname mapaccess1 runtime.mapaccess1
-//go:linkname mapaccess2 runtime.mapaccess2
-//go:linkname mapaccess1_fat runtime.mapaccess1_fat
-//go:linkname mapaccess2_fat runtime.mapaccess2_fat
-//go:linkname mapassign runtime.mapassign
-//go:linkname mapdelete runtime.mapdelete
-//go:linkname mapiterinit runtime.mapiterinit
-//go:linkname mapiternext runtime.mapiternext
-
-const (
-	// Maximum number of key/value pairs a bucket can hold.
-	bucketCntBits = 3
-	bucketCnt     = 1 << bucketCntBits
-
-	// Maximum average load of a bucket that triggers growth is 6.5.
-	// Represent as loadFactorNum/loadFactDen, to allow integer math.
-	loadFactorNum = 13
-	loadFactorDen = 2
-
-	// Maximum key or value size to keep inline (instead of mallocing per element).
-	// Must fit in a uint8.
-	// Fast versions cannot handle big values - the cutoff size for
-	// fast versions in ../../cmd/internal/gc/walk.go must be at most this value.
-	maxKeySize   = 128
-	maxValueSize = 128
-
-	// data offset should be the size of the bmap struct, but needs to be
-	// aligned correctly. For amd64p32 this means 64-bit alignment
-	// even though pointers are 32 bit.
-	dataOffset = unsafe.Offsetof(struct {
-		b bmap
-		v int64
-	}{}.v)
-
-	// Possible tophash values. We reserve a few possibilities for special marks.
-	// Each bucket (including its overflow buckets, if any) will have either all or none of its
-	// entries in the evacuated* states (except during the evacuate() method, which only happens
-	// during map writes and thus no one else can observe the map during that time).
-	empty          = 0 // cell is empty
-	evacuatedEmpty = 1 // cell is empty, bucket is evacuated.
-	evacuatedX     = 2 // key/value is valid.  Entry has been evacuated to first half of larger table.
-	evacuatedY     = 3 // same as above, but evacuated to second half of larger table.
-	minTopHash     = 4 // minimum tophash for a normal filled cell.
-
-	// flags
-	iterator     = 1 // there may be an iterator using buckets
-	oldIterator  = 2 // there may be an iterator using oldbuckets
-	hashWriting  = 4 // a goroutine is writing to the map
-	sameSizeGrow = 8 // the current map growth is to a new map of the same size
-
-	// sentinel bucket ID for iterator checks
-	noCheck = 1<<(8*sys.PtrSize) - 1
-)
-
-// A header for a Go map.
-type hmap struct {
-	// Note: the format of the Hmap is encoded in ../../cmd/internal/gc/reflect.go and
-	// ../reflect/type.go. Don't change this structure without also changing that code!
-	count     int // # live cells == size of map.  Must be first (used by len() builtin)
-	flags     uint8
-	B         uint8  // log_2 of # of buckets (can hold up to loadFactor * 2^B items)
-	noverflow uint16 // approximate number of overflow buckets; see incrnoverflow for details
-	hash0     uint32 // hash seed
-
-	buckets    unsafe.Pointer // array of 2^B Buckets. may be nil if count==0.
-	oldbuckets unsafe.Pointer // previous bucket array of half the size, non-nil only when growing
-	nevacuate  uintptr        // progress counter for evacuation (buckets less than this have been evacuated)
-
-	extra *mapextra // optional fields
-}
-
-// mapextra holds fields that are not present on all maps.
-type mapextra struct {
-	// If both key and value do not contain pointers and are inline, then we mark bucket
-	// type as containing no pointers. This avoids scanning such maps.
-	// However, bmap.overflow is a pointer. In order to keep overflow buckets
-	// alive, we store pointers to all overflow buckets in hmap.overflow and h.map.oldoverflow.
-	// overflow and oldoverflow are only used if key and value do not contain pointers.
-	// overflow contains overflow buckets for hmap.buckets.
-	// oldoverflow contains overflow buckets for hmap.oldbuckets.
-	// The indirection allows to store a pointer to the slice in hiter.
-	overflow    *[]*bmap
-	oldoverflow *[]*bmap
-
-	// nextOverflow holds a pointer to a free overflow bucket.
-	nextOverflow *bmap
-}
-
-// A bucket for a Go map.
-type bmap struct {
-	// tophash generally contains the top byte of the hash value
-	// for each key in this bucket. If tophash[0] < minTopHash,
-	// tophash[0] is a bucket evacuation state instead.
-	tophash [bucketCnt]uint8
-	// Followed by bucketCnt keys and then bucketCnt values.
-	// NOTE: packing all the keys together and then all the values together makes the
-	// code a bit more complicated than alternating key/value/key/value/... but it allows
-	// us to eliminate padding which would be needed for, e.g., map[int64]int8.
-	// Followed by an overflow pointer.
-}
-
-// A hash iteration structure.
-// If you modify hiter, also change cmd/internal/gc/reflect.go to indicate
-// the layout of this structure.
-type hiter struct {
-	key         unsafe.Pointer // Must be in first position.  Write nil to indicate iteration end (see cmd/internal/gc/range.go).
-	value       unsafe.Pointer // Must be in second position (see cmd/internal/gc/range.go).
-	t           *maptype
-	h           *hmap
-	buckets     unsafe.Pointer // bucket ptr at hash_iter initialization time
-	bptr        *bmap          // current bucket
-	overflow    *[]*bmap       // keeps overflow buckets of hmap.buckets alive
-	oldoverflow *[]*bmap       // keeps overflow buckets of hmap.oldbuckets alive
-	startBucket uintptr        // bucket iteration started at
-	offset      uint8          // intra-bucket offset to start from during iteration (should be big enough to hold bucketCnt-1)
-	wrapped     bool           // already wrapped around from end of bucket array to beginning
-	B           uint8
-	i           uint8
-	bucket      uintptr
-	checkBucket uintptr
-}
-
-// bucketShift returns 1<<b, optimized for code generation.
-func bucketShift(b uint8) uintptr {
-	if sys.GoarchAmd64|sys.GoarchAmd64p32|sys.Goarch386 != 0 {
-		b &= sys.PtrSize*8 - 1 // help x86 archs remove shift overflow checks
-	}
-	return uintptr(1) << b
-}
-
-// bucketMask returns 1<<b - 1, optimized for code generation.
-func bucketMask(b uint8) uintptr {
-	return bucketShift(b) - 1
-}
-
-// tophash calculates the tophash value for hash.
-func tophash(hash uintptr) uint8 {
-	top := uint8(hash >> (sys.PtrSize*8 - 8))
-	if top < minTopHash {
-		top += minTopHash
-	}
-	return top
-}
-
-func evacuated(b *bmap) bool {
-	h := b.tophash[0]
-	return h > empty && h < minTopHash
-}
-
-func (b *bmap) overflow(t *maptype) *bmap {
-	return *(**bmap)(add(unsafe.Pointer(b), uintptr(t.bucketsize)-sys.PtrSize))
-}
-
-func (b *bmap) setoverflow(t *maptype, ovf *bmap) {
-	*(**bmap)(add(unsafe.Pointer(b), uintptr(t.bucketsize)-sys.PtrSize)) = ovf
-}
-
-func (b *bmap) keys() unsafe.Pointer {
-	return add(unsafe.Pointer(b), dataOffset)
-}
-
-// incrnoverflow increments h.noverflow.
-// noverflow counts the number of overflow buckets.
-// This is used to trigger same-size map growth.
-// See also tooManyOverflowBuckets.
-// To keep hmap small, noverflow is a uint16.
-// When there are few buckets, noverflow is an exact count.
-// When there are many buckets, noverflow is an approximate count.
-func (h *hmap) incrnoverflow() {
-	// We trigger same-size map growth if there are
-	// as many overflow buckets as buckets.
-	// We need to be able to count to 1<<h.B.
-	if h.B < 16 {
-		h.noverflow++
-		return
-	}
-	// Increment with probability 1/(1<<(h.B-15)).
-	// When we reach 1<<15 - 1, we will have approximately
-	// as many overflow buckets as buckets.
-	mask := uint32(1)<<(h.B-15) - 1
-	// Example: if h.B == 18, then mask == 7,
-	// and fastrand & 7 == 0 with probability 1/8.
-	if fastrand()&mask == 0 {
-		h.noverflow++
-	}
-}
-
-func (h *hmap) newoverflow(t *maptype, b *bmap) *bmap {
-	var ovf *bmap
-	if h.extra != nil && h.extra.nextOverflow != nil {
-		// We have preallocated overflow buckets available.
-		// See makeBucketArray for more details.
-		ovf = h.extra.nextOverflow
-		if ovf.overflow(t) == nil {
-			// We're not at the end of the preallocated overflow buckets. Bump the pointer.
-			h.extra.nextOverflow = (*bmap)(add(unsafe.Pointer(ovf), uintptr(t.bucketsize)))
-		} else {
-			// This is the last preallocated overflow bucket.
-			// Reset the overflow pointer on this bucket,
-			// which was set to a non-nil sentinel value.
-			ovf.setoverflow(t, nil)
-			h.extra.nextOverflow = nil
-		}
-	} else {
-		ovf = (*bmap)(newobject(t.bucket))
-	}
-	h.incrnoverflow()
-	if t.bucket.kind&kindNoPointers != 0 {
-		h.createOverflow()
-		*h.extra.overflow = append(*h.extra.overflow, ovf)
-	}
-	b.setoverflow(t, ovf)
-	return ovf
-}
-
-func (h *hmap) createOverflow() {
-	if h.extra == nil {
-		h.extra = new(mapextra)
-	}
-	if h.extra.overflow == nil {
-		h.extra.overflow = new([]*bmap)
-	}
-}
-
-func makemap64(t *maptype, hint int64, h *hmap) *hmap {
-	if int64(int(hint)) != hint {
-		hint = 0
-	}
-	return makemap(t, int(hint), h)
-}
-
-// makehmap_small implements Go map creation for make(map[k]v) and
-// make(map[k]v, hint) when hint is known to be at most bucketCnt
-// at compile time and the map needs to be allocated on the heap.
-func makemap_small() *hmap {
-	h := new(hmap)
-	h.hash0 = fastrand()
-	return h
-}
-
-// makemap implements Go map creation for make(map[k]v, hint).
-// If the compiler has determined that the map or the first bucket
-// can be created on the stack, h and/or bucket may be non-nil.
-// If h != nil, the map can be created directly in h.
-// If h.buckets != nil, bucket pointed to can be used as the first bucket.
-func makemap(t *maptype, hint int, h *hmap) *hmap {
-	if hint < 0 || hint > int(maxSliceCap(t.bucket.size)) {
-		hint = 0
-	}
-
-	// initialize Hmap
-	if h == nil {
-		h = new(hmap)
-	}
-	h.hash0 = fastrand()
-
-	// find size parameter which will hold the requested # of elements
-	B := uint8(0)
-	for overLoadFactor(hint, B) {
-		B++
-	}
-	h.B = B
-
-	// allocate initial hash table
-	// if B == 0, the buckets field is allocated lazily later (in mapassign)
-	// If hint is large zeroing this memory could take a while.
-	if h.B != 0 {
-		var nextOverflow *bmap
-		h.buckets, nextOverflow = makeBucketArray(t, h.B)
-		if nextOverflow != nil {
-			h.extra = new(mapextra)
-			h.extra.nextOverflow = nextOverflow
-		}
-	}
-
-	return h
-}
-
-// mapaccess1 returns a pointer to h[key].  Never returns nil, instead
-// it will return a reference to the zero object for the value type if
-// the key is not in the map.
-// NOTE: The returned pointer may keep the whole map live, so don't
-// hold onto it for very long.
-func mapaccess1(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
-	// Check preemption, since unlike gc we don't check on every call.
-	if getg().preempt {
-		checkPreempt()
-	}
-
-	if raceenabled && h != nil {
-		callerpc := getcallerpc()
-		pc := funcPC(mapaccess1)
-		racereadpc(unsafe.Pointer(h), callerpc, pc)
-		raceReadObjectPC(t.key, key, callerpc, pc)
-	}
-	if msanenabled && h != nil {
-		msanread(key, t.key.size)
-	}
-	if h == nil || h.count == 0 {
-		return unsafe.Pointer(&zeroVal[0])
-	}
-	if h.flags&hashWriting != 0 {
-		throw("concurrent map read and map write")
-	}
-	hashfn := t.key.hashfn
-	equalfn := t.key.equalfn
-	hash := hashfn(key, uintptr(h.hash0))
-	m := bucketMask(h.B)
-	b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
-	if c := h.oldbuckets; c != nil {
-		if !h.sameSizeGrow() {
-			// There used to be half as many buckets; mask down one more power of two.
-			m >>= 1
-		}
-		oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
-		if !evacuated(oldb) {
-			b = oldb
-		}
-	}
-	top := tophash(hash)
-	for ; b != nil; b = b.overflow(t) {
-		for i := uintptr(0); i < bucketCnt; i++ {
-			if b.tophash[i] != top {
-				continue
-			}
-			k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
-			if t.indirectkey {
-				k = *((*unsafe.Pointer)(k))
-			}
-			if equalfn(key, k) {
-				v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
-				if t.indirectvalue {
-					v = *((*unsafe.Pointer)(v))
-				}
-				return v
-			}
-		}
-	}
-	return unsafe.Pointer(&zeroVal[0])
-}
-
-func mapaccess2(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, bool) {
-	// Check preemption, since unlike gc we don't check on every call.
-	if getg().preempt {
-		checkPreempt()
-	}
-
-	if raceenabled && h != nil {
-		callerpc := getcallerpc()
-		pc := funcPC(mapaccess2)
-		racereadpc(unsafe.Pointer(h), callerpc, pc)
-		raceReadObjectPC(t.key, key, callerpc, pc)
-	}
-	if msanenabled && h != nil {
-		msanread(key, t.key.size)
-	}
-	if h == nil || h.count == 0 {
-		return unsafe.Pointer(&zeroVal[0]), false
-	}
-	if h.flags&hashWriting != 0 {
-		throw("concurrent map read and map write")
-	}
-	hashfn := t.key.hashfn
-	equalfn := t.key.equalfn
-	hash := hashfn(key, uintptr(h.hash0))
-	m := bucketMask(h.B)
-	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + (hash&m)*uintptr(t.bucketsize)))
-	if c := h.oldbuckets; c != nil {
-		if !h.sameSizeGrow() {
-			// There used to be half as many buckets; mask down one more power of two.
-			m >>= 1
-		}
-		oldb := (*bmap)(unsafe.Pointer(uintptr(c) + (hash&m)*uintptr(t.bucketsize)))
-		if !evacuated(oldb) {
-			b = oldb
-		}
-	}
-	top := tophash(hash)
-	for ; b != nil; b = b.overflow(t) {
-		for i := uintptr(0); i < bucketCnt; i++ {
-			if b.tophash[i] != top {
-				continue
-			}
-			k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
-			if t.indirectkey {
-				k = *((*unsafe.Pointer)(k))
-			}
-			if equalfn(key, k) {
-				v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
-				if t.indirectvalue {
-					v = *((*unsafe.Pointer)(v))
-				}
-				return v, true
-			}
-		}
-	}
-	return unsafe.Pointer(&zeroVal[0]), false
-}
-
-// returns both key and value. Used by map iterator
-func mapaccessK(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, unsafe.Pointer) {
-	// Check preemption, since unlike gc we don't check on every call.
-	if getg().preempt {
-		checkPreempt()
-	}
-
-	if h == nil || h.count == 0 {
-		return nil, nil
-	}
-	hashfn := t.key.hashfn
-	equalfn := t.key.equalfn
-	hash := hashfn(key, uintptr(h.hash0))
-	m := bucketMask(h.B)
-	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + (hash&m)*uintptr(t.bucketsize)))
-	if c := h.oldbuckets; c != nil {
-		if !h.sameSizeGrow() {
-			// There used to be half as many buckets; mask down one more power of two.
-			m >>= 1
-		}
-		oldb := (*bmap)(unsafe.Pointer(uintptr(c) + (hash&m)*uintptr(t.bucketsize)))
-		if !evacuated(oldb) {
-			b = oldb
-		}
-	}
-	top := tophash(hash)
-	for ; b != nil; b = b.overflow(t) {
-		for i := uintptr(0); i < bucketCnt; i++ {
-			if b.tophash[i] != top {
-				continue
-			}
-			k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
-			if t.indirectkey {
-				k = *((*unsafe.Pointer)(k))
-			}
-			if equalfn(key, k) {
-				v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
-				if t.indirectvalue {
-					v = *((*unsafe.Pointer)(v))
-				}
-				return k, v
-			}
-		}
-	}
-	return nil, nil
-}
-
-func mapaccess1_fat(t *maptype, h *hmap, key, zero unsafe.Pointer) unsafe.Pointer {
-	v := mapaccess1(t, h, key)
-	if v == unsafe.Pointer(&zeroVal[0]) {
-		return zero
-	}
-	return v
-}
-
-func mapaccess2_fat(t *maptype, h *hmap, key, zero unsafe.Pointer) (unsafe.Pointer, bool) {
-	v := mapaccess1(t, h, key)
-	if v == unsafe.Pointer(&zeroVal[0]) {
-		return zero, false
-	}
-	return v, true
-}
-
-// Like mapaccess, but allocates a slot for the key if it is not present in the map.
-func mapassign(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
-	// Check preemption, since unlike gc we don't check on every call.
-	if getg().preempt {
-		checkPreempt()
-	}
-
-	if h == nil {
-		panic(plainError("assignment to entry in nil map"))
-	}
-	if raceenabled {
-		callerpc := getcallerpc()
-		pc := funcPC(mapassign)
-		racewritepc(unsafe.Pointer(h), callerpc, pc)
-		raceReadObjectPC(t.key, key, callerpc, pc)
-	}
-	if msanenabled {
-		msanread(key, t.key.size)
-	}
-	if h.flags&hashWriting != 0 {
-		throw("concurrent map writes")
-	}
-	hashfn := t.key.hashfn
-	equalfn := t.key.equalfn
-	hash := hashfn(key, uintptr(h.hash0))
-
-	// Set hashWriting after calling alg.hash, since alg.hash may panic,
-	// in which case we have not actually done a write.
-	h.flags |= hashWriting
-
-	if h.buckets == nil {
-		h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
-	}
-
-again:
-	bucket := hash & bucketMask(h.B)
-	if h.growing() {
-		growWork(t, h, bucket)
-	}
-	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
-	top := tophash(hash)
-
-	var inserti *uint8
-	var insertk unsafe.Pointer
-	var val unsafe.Pointer
-	for {
-		for i := uintptr(0); i < bucketCnt; i++ {
-			if b.tophash[i] != top {
-				if b.tophash[i] == empty && inserti == nil {
-					inserti = &b.tophash[i]
-					insertk = add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
-					val = add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
-				}
-				continue
-			}
-			k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
-			if t.indirectkey {
-				k = *((*unsafe.Pointer)(k))
-			}
-			if !equalfn(key, k) {
-				continue
-			}
-			// already have a mapping for key. Update it.
-			if t.needkeyupdate {
-				typedmemmove(t.key, k, key)
-			}
-			val = add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
-			goto done
-		}
-		ovf := b.overflow(t)
-		if ovf == nil {
-			break
-		}
-		b = ovf
-	}
-
-	// Did not find mapping for key. Allocate new cell & add entry.
-
-	// If we hit the max load factor or we have too many overflow buckets,
-	// and we're not already in the middle of growing, start growing.
-	if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
-		hashGrow(t, h)
-		goto again // Growing the table invalidates everything, so try again
-	}
-
-	if inserti == nil {
-		// all current buckets are full, allocate a new one.
-		newb := h.newoverflow(t, b)
-		inserti = &newb.tophash[0]
-		insertk = add(unsafe.Pointer(newb), dataOffset)
-		val = add(insertk, bucketCnt*uintptr(t.keysize))
-	}
-
-	// store new key/value at insert position
-	if t.indirectkey {
-		kmem := newobject(t.key)
-		*(*unsafe.Pointer)(insertk) = kmem
-		insertk = kmem
-	}
-	if t.indirectvalue {
-		vmem := newobject(t.elem)
-		*(*unsafe.Pointer)(val) = vmem
-	}
-	typedmemmove(t.key, insertk, key)
-	*inserti = top
-	h.count++
-
-done:
-	if h.flags&hashWriting == 0 {
-		throw("concurrent map writes")
-	}
-	h.flags &^= hashWriting
-	if t.indirectvalue {
-		val = *((*unsafe.Pointer)(val))
-	}
-	return val
-}
-
-func mapdelete(t *maptype, h *hmap, key unsafe.Pointer) {
-	if raceenabled && h != nil {
-		callerpc := getcallerpc()
-		pc := funcPC(mapdelete)
-		racewritepc(unsafe.Pointer(h), callerpc, pc)
-		raceReadObjectPC(t.key, key, callerpc, pc)
-	}
-	if msanenabled && h != nil {
-		msanread(key, t.key.size)
-	}
-	if h == nil || h.count == 0 {
-		return
-	}
-	if h.flags&hashWriting != 0 {
-		throw("concurrent map writes")
-	}
-
-	hashfn := t.key.hashfn
-	equalfn := t.key.equalfn
-	hash := hashfn(key, uintptr(h.hash0))
-
-	// Set hashWriting after calling alg.hash, since alg.hash may panic,
-	// in which case we have not actually done a write (delete).
-	h.flags |= hashWriting
-
-	bucket := hash & bucketMask(h.B)
-	if h.growing() {
-		growWork(t, h, bucket)
-	}
-	b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
-	top := tophash(hash)
-search:
-	for ; b != nil; b = b.overflow(t) {
-		for i := uintptr(0); i < bucketCnt; i++ {
-			if b.tophash[i] != top {
-				continue
-			}
-			k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
-			k2 := k
-			if t.indirectkey {
-				k2 = *((*unsafe.Pointer)(k2))
-			}
-			if !equalfn(key, k2) {
-				continue
-			}
-			// Only clear key if there are pointers in it.
-			if t.indirectkey {
-				*(*unsafe.Pointer)(k) = nil
-			} else if t.key.kind&kindNoPointers == 0 {
-				memclrHasPointers(k, t.key.size)
-			}
-			// Only clear value if there are pointers in it.
-			if t.indirectvalue || t.elem.kind&kindNoPointers == 0 {
-				v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
-				if t.indirectvalue {
-					*(*unsafe.Pointer)(v) = nil
-				} else {
-					memclrHasPointers(v, t.elem.size)
-				}
-			}
-			b.tophash[i] = empty
-			h.count--
-			break search
-		}
-	}
-
-	if h.flags&hashWriting == 0 {
-		throw("concurrent map writes")
-	}
-	h.flags &^= hashWriting
-}
-
-// mapiterinit initializes the hiter struct used for ranging over maps.
-// The hiter struct pointed to by 'it' is allocated on the stack
-// by the compilers order pass or on the heap by reflect_mapiterinit.
-// Both need to have zeroed hiter since the struct contains pointers.
-// Gccgo-specific: *it need not be zeroed by the compiler,
-//  and it's cheaper to zero it here.
-func mapiterinit(t *maptype, h *hmap, it *hiter) {
-	it.key = nil
-	it.value = nil
-	it.t = nil
-	it.h = nil
-	it.buckets = nil
-	it.bptr = nil
-	it.overflow = nil
-	it.oldoverflow = nil
-	it.wrapped = false
-	it.i = 0
-	it.checkBucket = 0
-
-	if raceenabled && h != nil {
-		callerpc := getcallerpc()
-		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapiterinit))
-	}
-
-	if h == nil || h.count == 0 {
-		return
-	}
-
-	if unsafe.Sizeof(hiter{})/sys.PtrSize != 12 {
-		throw("hash_iter size incorrect") // see ../../cmd/internal/gc/reflect.go
-	}
-	it.t = t
-	it.h = h
-
-	// grab snapshot of bucket state
-	it.B = h.B
-	it.buckets = h.buckets
-	if t.bucket.kind&kindNoPointers != 0 {
-		// Allocate the current slice and remember pointers to both current and old.
-		// This preserves all relevant overflow buckets alive even if
-		// the table grows and/or overflow buckets are added to the table
-		// while we are iterating.
-		h.createOverflow()
-		it.overflow = h.extra.overflow
-		it.oldoverflow = h.extra.oldoverflow
-	}
-
-	// decide where to start
-	r := uintptr(fastrand())
-	if h.B > 31-bucketCntBits {
-		r += uintptr(fastrand()) << 31
-	}
-	it.startBucket = r & bucketMask(h.B)
-	it.offset = uint8(r >> h.B & (bucketCnt - 1))
-
-	// iterator state
-	it.bucket = it.startBucket
-
-	// Remember we have an iterator.
-	// Can run concurrently with another mapiterinit().
-	if old := h.flags; old&(iterator|oldIterator) != iterator|oldIterator {
-		atomic.Or8(&h.flags, iterator|oldIterator)
-	}
-
-	mapiternext(it)
-}
-
-func mapiternext(it *hiter) {
-	// Check preemption, since unlike gc we don't check on every call.
-	if getg().preempt {
-		checkPreempt()
-	}
-
-	h := it.h
-	if raceenabled {
-		callerpc := getcallerpc()
-		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapiternext))
-	}
-	if h.flags&hashWriting != 0 {
-		throw("concurrent map iteration and map write")
-	}
-	t := it.t
-	bucket := it.bucket
-	b := it.bptr
-	i := it.i
-	checkBucket := it.checkBucket
-	hashfn := t.key.hashfn
-	equalfn := t.key.equalfn
-
-next:
-	if b == nil {
-		if bucket == it.startBucket && it.wrapped {
-			// end of iteration
-			it.key = nil
-			it.value = nil
-			return
-		}
-		if h.growing() && it.B == h.B {
-			// Iterator was started in the middle of a grow, and the grow isn't done yet.
-			// If the bucket we're looking at hasn't been filled in yet (i.e. the old
-			// bucket hasn't been evacuated) then we need to iterate through the old
-			// bucket and only return the ones that will be migrated to this bucket.
-			oldbucket := bucket & it.h.oldbucketmask()
-			b = (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
-			if !evacuated(b) {
-				checkBucket = bucket
-			} else {
-				b = (*bmap)(add(it.buckets, bucket*uintptr(t.bucketsize)))
-				checkBucket = noCheck
-			}
-		} else {
-			b = (*bmap)(add(it.buckets, bucket*uintptr(t.bucketsize)))
-			checkBucket = noCheck
-		}
-		bucket++
-		if bucket == bucketShift(it.B) {
-			bucket = 0
-			it.wrapped = true
-		}
-		i = 0
-	}
-	for ; i < bucketCnt; i++ {
-		offi := (i + it.offset) & (bucketCnt - 1)
-		if b.tophash[offi] == empty || b.tophash[offi] == evacuatedEmpty {
-			continue
-		}
-		k := add(unsafe.Pointer(b), dataOffset+uintptr(offi)*uintptr(t.keysize))
-		if t.indirectkey {
-			k = *((*unsafe.Pointer)(k))
-		}
-		v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+uintptr(offi)*uintptr(t.valuesize))
-		if checkBucket != noCheck && !h.sameSizeGrow() {
-			// Special case: iterator was started during a grow to a larger size
-			// and the grow is not done yet. We're working on a bucket whose
-			// oldbucket has not been evacuated yet. Or at least, it wasn't
-			// evacuated when we started the bucket. So we're iterating
-			// through the oldbucket, skipping any keys that will go
-			// to the other new bucket (each oldbucket expands to two
-			// buckets during a grow).
-			if t.reflexivekey || equalfn(k, k) {
-				// If the item in the oldbucket is not destined for
-				// the current new bucket in the iteration, skip it.
-				hash := hashfn(k, uintptr(h.hash0))
-				if hash&bucketMask(it.B) != checkBucket {
-					continue
-				}
-			} else {
-				// Hash isn't repeatable if k != k (NaNs).  We need a
-				// repeatable and randomish choice of which direction
-				// to send NaNs during evacuation. We'll use the low
-				// bit of tophash to decide which way NaNs go.
-				// NOTE: this case is why we need two evacuate tophash
-				// values, evacuatedX and evacuatedY, that differ in
-				// their low bit.
-				if checkBucket>>(it.B-1) != uintptr(b.tophash[offi]&1) {
-					continue
-				}
-			}
-		}
-		if (b.tophash[offi] != evacuatedX && b.tophash[offi] != evacuatedY) ||
-			!(t.reflexivekey || equalfn(k, k)) {
-			// This is the golden data, we can return it.
-			// OR
-			// key!=key, so the entry can't be deleted or updated, so we can just return it.
-			// That's lucky for us because when key!=key we can't look it up successfully.
-			it.key = k
-			if t.indirectvalue {
-				v = *((*unsafe.Pointer)(v))
-			}
-			it.value = v
-		} else {
-			// The hash table has grown since the iterator was started.
-			// The golden data for this key is now somewhere else.
-			// Check the current hash table for the data.
-			// This code handles the case where the key
-			// has been deleted, updated, or deleted and reinserted.
-			// NOTE: we need to regrab the key as it has potentially been
-			// updated to an equal() but not identical key (e.g. +0.0 vs -0.0).
-			rk, rv := mapaccessK(t, h, k)
-			if rk == nil {
-				continue // key has been deleted
-			}
-			it.key = rk
-			it.value = rv
-		}
-		it.bucket = bucket
-		if it.bptr != b { // avoid unnecessary write barrier; see issue 14921
-			it.bptr = b
-		}
-		it.i = i + 1
-		it.checkBucket = checkBucket
-		return
-	}
-	b = b.overflow(t)
-	i = 0
-	goto next
-}
-
-func makeBucketArray(t *maptype, b uint8) (buckets unsafe.Pointer, nextOverflow *bmap) {
-	base := bucketShift(b)
-	nbuckets := base
-	// For small b, overflow buckets are unlikely.
-	// Avoid the overhead of the calculation.
-	if b >= 4 {
-		// Add on the estimated number of overflow buckets
-		// required to insert the median number of elements
-		// used with this value of b.
-		nbuckets += bucketShift(b - 4)
-		sz := t.bucket.size * nbuckets
-		up := roundupsize(sz)
-		if up != sz {
-			nbuckets = up / t.bucket.size
-		}
-	}
-	buckets = newarray(t.bucket, int(nbuckets))
-	if base != nbuckets {
-		// We preallocated some overflow buckets.
-		// To keep the overhead of tracking these overflow buckets to a minimum,
-		// we use the convention that if a preallocated overflow bucket's overflow
-		// pointer is nil, then there are more available by bumping the pointer.
-		// We need a safe non-nil pointer for the last overflow bucket; just use buckets.
-		nextOverflow = (*bmap)(add(buckets, base*uintptr(t.bucketsize)))
-		last := (*bmap)(add(buckets, (nbuckets-1)*uintptr(t.bucketsize)))
-		last.setoverflow(t, (*bmap)(buckets))
-	}
-	return buckets, nextOverflow
-}
-
-func hashGrow(t *maptype, h *hmap) {
-	// If we've hit the load factor, get bigger.
-	// Otherwise, there are too many overflow buckets,
-	// so keep the same number of buckets and "grow" laterally.
-	bigger := uint8(1)
-	if !overLoadFactor(h.count+1, h.B) {
-		bigger = 0
-		h.flags |= sameSizeGrow
-	}
-	oldbuckets := h.buckets
-	newbuckets, nextOverflow := makeBucketArray(t, h.B+bigger)
-
-	flags := h.flags &^ (iterator | oldIterator)
-	if h.flags&iterator != 0 {
-		flags |= oldIterator
-	}
-	// commit the grow (atomic wrt gc)
-	h.B += bigger
-	h.flags = flags
-	h.oldbuckets = oldbuckets
-	h.buckets = newbuckets
-	h.nevacuate = 0
-	h.noverflow = 0
-
-	if h.extra != nil && h.extra.overflow != nil {
-		// Promote current overflow buckets to the old generation.
-		if h.extra.oldoverflow != nil {
-			throw("oldoverflow is not nil")
-		}
-		h.extra.oldoverflow = h.extra.overflow
-		h.extra.overflow = nil
-	}
-	if nextOverflow != nil {
-		if h.extra == nil {
-			h.extra = new(mapextra)
-		}
-		h.extra.nextOverflow = nextOverflow
-	}
-
-	// the actual copying of the hash table data is done incrementally
-	// by growWork() and evacuate().
-}
-
-// overLoadFactor reports whether count items placed in 1<<B buckets is over loadFactor.
-func overLoadFactor(count int, B uint8) bool {
-	return count > bucketCnt && uintptr(count) > loadFactorNum*(bucketShift(B)/loadFactorDen)
-}
-
-// tooManyOverflowBuckets reports whether noverflow buckets is too many for a map with 1<<B buckets.
-// Note that most of these overflow buckets must be in sparse use;
-// if use was dense, then we'd have already triggered regular map growth.
-func tooManyOverflowBuckets(noverflow uint16, B uint8) bool {
-	// If the threshold is too low, we do extraneous work.
-	// If the threshold is too high, maps that grow and shrink can hold on to lots of unused memory.
-	// "too many" means (approximately) as many overflow buckets as regular buckets.
-	// See incrnoverflow for more details.
-	if B > 15 {
-		B = 15
-	}
-	// The compiler doesn't see here that B < 16; mask B to generate shorter shift code.
-	return noverflow >= uint16(1)<<(B&15)
-}
-
-// growing reports whether h is growing. The growth may be to the same size or bigger.
-func (h *hmap) growing() bool {
-	return h.oldbuckets != nil
-}
-
-// sameSizeGrow reports whether the current growth is to a map of the same size.
-func (h *hmap) sameSizeGrow() bool {
-	return h.flags&sameSizeGrow != 0
-}
-
-// noldbuckets calculates the number of buckets prior to the current map growth.
-func (h *hmap) noldbuckets() uintptr {
-	oldB := h.B
-	if !h.sameSizeGrow() {
-		oldB--
-	}
-	return bucketShift(oldB)
-}
-
-// oldbucketmask provides a mask that can be applied to calculate n % noldbuckets().
-func (h *hmap) oldbucketmask() uintptr {
-	return h.noldbuckets() - 1
-}
-
-func growWork(t *maptype, h *hmap, bucket uintptr) {
-	// make sure we evacuate the oldbucket corresponding
-	// to the bucket we're about to use
-	evacuate(t, h, bucket&h.oldbucketmask())
-
-	// evacuate one more oldbucket to make progress on growing
-	if h.growing() {
-		evacuate(t, h, h.nevacuate)
-	}
-}
-
-func bucketEvacuated(t *maptype, h *hmap, bucket uintptr) bool {
-	b := (*bmap)(add(h.oldbuckets, bucket*uintptr(t.bucketsize)))
-	return evacuated(b)
-}
-
-// evacDst is an evacuation destination.
-type evacDst struct {
-	b *bmap          // current destination bucket
-	i int            // key/val index into b
-	k unsafe.Pointer // pointer to current key storage
-	v unsafe.Pointer // pointer to current value storage
-}
-
-func evacuate(t *maptype, h *hmap, oldbucket uintptr) {
-	b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
-	newbit := h.noldbuckets()
-	hashfn := t.key.hashfn
-	if !evacuated(b) {
-		// TODO: reuse overflow buckets instead of using new ones, if there
-		// is no iterator using the old buckets.  (If !oldIterator.)
-
-		// xy contains the x and y (low and high) evacuation destinations.
-		var xy [2]evacDst
-		x := &xy[0]
-		x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize)))
-		x.k = add(unsafe.Pointer(x.b), dataOffset)
-		x.v = add(x.k, bucketCnt*uintptr(t.keysize))
-
-		if !h.sameSizeGrow() {
-			// Only calculate y pointers if we're growing bigger.
-			// Otherwise GC can see bad pointers.
-			y := &xy[1]
-			y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize)))
-			y.k = add(unsafe.Pointer(y.b), dataOffset)
-			y.v = add(y.k, bucketCnt*uintptr(t.keysize))
-		}
-
-		for ; b != nil; b = b.overflow(t) {
-			k := add(unsafe.Pointer(b), dataOffset)
-			v := add(k, bucketCnt*uintptr(t.keysize))
-			for i := 0; i < bucketCnt; i, k, v = i+1, add(k, uintptr(t.keysize)), add(v, uintptr(t.valuesize)) {
-				top := b.tophash[i]
-				if top == empty {
-					b.tophash[i] = evacuatedEmpty
-					continue
-				}
-				if top < minTopHash {
-					throw("bad map state")
-				}
-				k2 := k
-				if t.indirectkey {
-					k2 = *((*unsafe.Pointer)(k2))
-				}
-				var useY uint8
-				if !h.sameSizeGrow() {
-					// Compute hash to make our evacuation decision (whether we need
-					// to send this key/value to bucket x or bucket y).
-					hash := hashfn(k2, uintptr(h.hash0))
-					if h.flags&iterator != 0 && !t.reflexivekey && !t.key.equalfn(k2, k2) {
-						// If key != key (NaNs), then the hash could be (and probably
-						// will be) entirely different from the old hash. Moreover,
-						// it isn't reproducible. Reproducibility is required in the
-						// presence of iterators, as our evacuation decision must
-						// match whatever decision the iterator made.
-						// Fortunately, we have the freedom to send these keys either
-						// way. Also, tophash is meaningless for these kinds of keys.
-						// We let the low bit of tophash drive the evacuation decision.
-						// We recompute a new random tophash for the next level so
-						// these keys will get evenly distributed across all buckets
-						// after multiple grows.
-						useY = top & 1
-						top = tophash(hash)
-					} else {
-						if hash&newbit != 0 {
-							useY = 1
-						}
-					}
-				}
-
-				if evacuatedX+1 != evacuatedY {
-					throw("bad evacuatedN")
-				}
-
-				b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY
-				dst := &xy[useY]                 // evacuation destination
-
-				if dst.i == bucketCnt {
-					dst.b = h.newoverflow(t, dst.b)
-					dst.i = 0
-					dst.k = add(unsafe.Pointer(dst.b), dataOffset)
-					dst.v = add(dst.k, bucketCnt*uintptr(t.keysize))
-				}
-				dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check
-				if t.indirectkey {
-					*(*unsafe.Pointer)(dst.k) = k2 // copy pointer
-				} else {
-					typedmemmove(t.key, dst.k, k) // copy value
-				}
-				if t.indirectvalue {
-					*(*unsafe.Pointer)(dst.v) = *(*unsafe.Pointer)(v)
-				} else {
-					typedmemmove(t.elem, dst.v, v)
-				}
-				dst.i++
-				// These updates might push these pointers past the end of the
-				// key or value arrays.  That's ok, as we have the overflow pointer
-				// at the end of the bucket to protect against pointing past the
-				// end of the bucket.
-				dst.k = add(dst.k, uintptr(t.keysize))
-				dst.v = add(dst.v, uintptr(t.valuesize))
-			}
-		}
-		// Unlink the overflow buckets & clear key/value to help GC.
-		if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 {
-			b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))
-			// Preserve b.tophash because the evacuation
-			// state is maintained there.
-			ptr := add(b, dataOffset)
-			n := uintptr(t.bucketsize) - dataOffset
-			memclrHasPointers(ptr, n)
-		}
-	}
-
-	if oldbucket == h.nevacuate {
-		advanceEvacuationMark(h, t, newbit)
-	}
-}
-
-func advanceEvacuationMark(h *hmap, t *maptype, newbit uintptr) {
-	h.nevacuate++
-	// Experiments suggest that 1024 is overkill by at least an order of magnitude.
-	// Put it in there as a safeguard anyway, to ensure O(1) behavior.
-	stop := h.nevacuate + 1024
-	if stop > newbit {
-		stop = newbit
-	}
-	for h.nevacuate != stop && bucketEvacuated(t, h, h.nevacuate) {
-		h.nevacuate++
-	}
-	if h.nevacuate == newbit { // newbit == # of oldbuckets
-		// Growing is all done. Free old main bucket array.
-		h.oldbuckets = nil
-		// Can discard old overflow buckets as well.
-		// If they are still referenced by an iterator,
-		// then the iterator holds a pointers to the slice.
-		if h.extra != nil {
-			h.extra.oldoverflow = nil
-		}
-		h.flags &^= sameSizeGrow
-	}
-}
-
-func ismapkey(t *_type) bool {
-	return t.hashfn != nil
-}
-
-// Reflect stubs. Called from ../reflect/asm_*.s
-
-//go:linkname reflect_makemap reflect.makemap
-func reflect_makemap(t *maptype, cap int) *hmap {
-	if !ismapkey(t.key) {
-		throw("runtime.reflect_makemap: unsupported map key type")
-	}
-	if t.key.size > maxKeySize && (!t.indirectkey || t.keysize != uint8(sys.PtrSize)) ||
-		t.key.size <= maxKeySize && (t.indirectkey || t.keysize != uint8(t.key.size)) {
-		throw("key size wrong")
-	}
-	if t.elem.size > maxValueSize && (!t.indirectvalue || t.valuesize != uint8(sys.PtrSize)) ||
-		t.elem.size <= maxValueSize && (t.indirectvalue || t.valuesize != uint8(t.elem.size)) {
-		throw("value size wrong")
-	}
-	if t.key.align > bucketCnt {
-		throw("key align too big")
-	}
-	if t.elem.align > bucketCnt {
-		throw("value align too big")
-	}
-	if t.key.size%uintptr(t.key.align) != 0 {
-		throw("key size not a multiple of key align")
-	}
-	if t.elem.size%uintptr(t.elem.align) != 0 {
-		throw("value size not a multiple of value align")
-	}
-	if bucketCnt < 8 {
-		throw("bucketsize too small for proper alignment")
-	}
-	if dataOffset%uintptr(t.key.align) != 0 {
-		throw("need padding in bucket (key)")
-	}
-	if dataOffset%uintptr(t.elem.align) != 0 {
-		throw("need padding in bucket (value)")
-	}
-
-	return makemap(t, cap, nil)
-}
-
-//go:linkname reflect_mapaccess reflect.mapaccess
-func reflect_mapaccess(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
-	val, ok := mapaccess2(t, h, key)
-	if !ok {
-		// reflect wants nil for a missing element
-		val = nil
-	}
-	return val
-}
-
-//go:linkname reflect_mapassign reflect.mapassign
-func reflect_mapassign(t *maptype, h *hmap, key unsafe.Pointer, val unsafe.Pointer) {
-	p := mapassign(t, h, key)
-	typedmemmove(t.elem, p, val)
-}
-
-//go:linkname reflect_mapdelete reflect.mapdelete
-func reflect_mapdelete(t *maptype, h *hmap, key unsafe.Pointer) {
-	mapdelete(t, h, key)
-}
-
-//go:linkname reflect_mapiterinit reflect.mapiterinit
-func reflect_mapiterinit(t *maptype, h *hmap) *hiter {
-	it := new(hiter)
-	mapiterinit(t, h, it)
-	return it
-}
-
-//go:linkname reflect_mapiternext reflect.mapiternext
-func reflect_mapiternext(it *hiter) {
-	mapiternext(it)
-}
-
-//go:linkname reflect_mapiterkey reflect.mapiterkey
-func reflect_mapiterkey(it *hiter) unsafe.Pointer {
-	return it.key
-}
-
-//go:linkname reflect_maplen reflect.maplen
-func reflect_maplen(h *hmap) int {
-	if h == nil {
-		return 0
-	}
-	if raceenabled {
-		callerpc := getcallerpc()
-		racereadpc(unsafe.Pointer(h), callerpc, funcPC(reflect_maplen))
-	}
-	return h.count
-}
-
-//go:linkname reflect_ismapkey reflect.ismapkey
-func reflect_ismapkey(t *_type) bool {
-	return ismapkey(t)
-}
-
-const maxZero = 1024 // must match value in ../cmd/compile/internal/gc/walk.go
-var zeroVal [maxZero]byte
diff --git a/libgo/go/runtime/hashmap_fast.go b/libgo/go/runtime/hashmap_fast.go
deleted file mode 100644
index e0fc981..0000000
--- a/libgo/go/runtime/hashmap_fast.go
+++ /dev/null
@@ -1,1237 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-import (
-	"runtime/internal/sys"
-	"unsafe"
-)
-
-func mapaccess1_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer {
-	if raceenabled && h != nil {
-		callerpc := getcallerpc()
-		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_fast32))
-	}
-	if h == nil || h.count == 0 {
-		return unsafe.Pointer(&zeroVal[0])
-	}
-	if h.flags&hashWriting != 0 {
-		throw("concurrent map read and map write")
-	}
-	var b *bmap
-	if h.B == 0 {
-		// One-bucket table. No need to hash.
-		b = (*bmap)(h.buckets)
-	} else {
-		hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
-		m := bucketMask(h.B)
-		b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
-		if c := h.oldbuckets; c != nil {
-			if !h.sameSizeGrow() {
-				// There used to be half as many buckets; mask down one more power of two.
-				m >>= 1
-			}
-			oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
-			if !evacuated(oldb) {
-				b = oldb
-			}
-		}
-	}
-	for ; b != nil; b = b.overflow(t) {
-		for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) {
-			if *(*uint32)(k) == key && b.tophash[i] != empty {
-				return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize))
-			}
-		}
-	}
-	return unsafe.Pointer(&zeroVal[0])
-}
-
-func mapaccess2_fast32(t *maptype, h *hmap, key uint32) (unsafe.Pointer, bool) {
-	if raceenabled && h != nil {
-		callerpc := getcallerpc()
-		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_fast32))
-	}
-	if h == nil || h.count == 0 {
-		return unsafe.Pointer(&zeroVal[0]), false
-	}
-	if h.flags&hashWriting != 0 {
-		throw("concurrent map read and map write")
-	}
-	var b *bmap
-	if h.B == 0 {
-		// One-bucket table. No need to hash.
-		b = (*bmap)(h.buckets)
-	} else {
-		hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
-		m := bucketMask(h.B)
-		b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
-		if c := h.oldbuckets; c != nil {
-			if !h.sameSizeGrow() {
-				// There used to be half as many buckets; mask down one more power of two.
-				m >>= 1
-			}
-			oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
-			if !evacuated(oldb) {
-				b = oldb
-			}
-		}
-	}
-	for ; b != nil; b = b.overflow(t) {
-		for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) {
-			if *(*uint32)(k) == key && b.tophash[i] != empty {
-				return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize)), true
-			}
-		}
-	}
-	return unsafe.Pointer(&zeroVal[0]), false
-}
-
-func mapaccess1_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer {
-	if raceenabled && h != nil {
-		callerpc := getcallerpc()
-		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_fast64))
-	}
-	if h == nil || h.count == 0 {
-		return unsafe.Pointer(&zeroVal[0])
-	}
-	if h.flags&hashWriting != 0 {
-		throw("concurrent map read and map write")
-	}
-	var b *bmap
-	if h.B == 0 {
-		// One-bucket table. No need to hash.
-		b = (*bmap)(h.buckets)
-	} else {
-		hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
-		m := bucketMask(h.B)
-		b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
-		if c := h.oldbuckets; c != nil {
-			if !h.sameSizeGrow() {
-				// There used to be half as many buckets; mask down one more power of two.
-				m >>= 1
-			}
-			oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
-			if !evacuated(oldb) {
-				b = oldb
-			}
-		}
-	}
-	for ; b != nil; b = b.overflow(t) {
-		for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) {
-			if *(*uint64)(k) == key && b.tophash[i] != empty {
-				return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize))
-			}
-		}
-	}
-	return unsafe.Pointer(&zeroVal[0])
-}
-
-func mapaccess2_fast64(t *maptype, h *hmap, key uint64) (unsafe.Pointer, bool) {
-	if raceenabled && h != nil {
-		callerpc := getcallerpc()
-		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_fast64))
-	}
-	if h == nil || h.count == 0 {
-		return unsafe.Pointer(&zeroVal[0]), false
-	}
-	if h.flags&hashWriting != 0 {
-		throw("concurrent map read and map write")
-	}
-	var b *bmap
-	if h.B == 0 {
-		// One-bucket table. No need to hash.
-		b = (*bmap)(h.buckets)
-	} else {
-		hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
-		m := bucketMask(h.B)
-		b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
-		if c := h.oldbuckets; c != nil {
-			if !h.sameSizeGrow() {
-				// There used to be half as many buckets; mask down one more power of two.
-				m >>= 1
-			}
-			oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
-			if !evacuated(oldb) {
-				b = oldb
-			}
-		}
-	}
-	for ; b != nil; b = b.overflow(t) {
-		for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) {
-			if *(*uint64)(k) == key && b.tophash[i] != empty {
-				return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize)), true
-			}
-		}
-	}
-	return unsafe.Pointer(&zeroVal[0]), false
-}
-
-func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer {
-	if raceenabled && h != nil {
-		callerpc := getcallerpc()
-		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_faststr))
-	}
-	if h == nil || h.count == 0 {
-		return unsafe.Pointer(&zeroVal[0])
-	}
-	if h.flags&hashWriting != 0 {
-		throw("concurrent map read and map write")
-	}
-	key := stringStructOf(&ky)
-	if h.B == 0 {
-		// One-bucket table.
-		b := (*bmap)(h.buckets)
-		if key.len < 32 {
-			// short key, doing lots of comparisons is ok
-			for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
-				k := (*stringStruct)(kptr)
-				if k.len != key.len || b.tophash[i] == empty {
-					continue
-				}
-				if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
-					return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize))
-				}
-			}
-			return unsafe.Pointer(&zeroVal[0])
-		}
-		// long key, try not to do more comparisons than necessary
-		keymaybe := uintptr(bucketCnt)
-		for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
-			k := (*stringStruct)(kptr)
-			if k.len != key.len || b.tophash[i] == empty {
-				continue
-			}
-			if k.str == key.str {
-				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize))
-			}
-			// check first 4 bytes
-			if *((*[4]byte)(key.str)) != *((*[4]byte)(k.str)) {
-				continue
-			}
-			// check last 4 bytes
-			if *((*[4]byte)(add(key.str, uintptr(key.len)-4))) != *((*[4]byte)(add(k.str, uintptr(key.len)-4))) {
-				continue
-			}
-			if keymaybe != bucketCnt {
-				// Two keys are potential matches. Use hash to distinguish them.
-				goto dohash
-			}
-			keymaybe = i
-		}
-		if keymaybe != bucketCnt {
-			k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+keymaybe*2*sys.PtrSize))
-			if memequal(k.str, key.str, uintptr(key.len)) {
-				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+keymaybe*uintptr(t.valuesize))
-			}
-		}
-		return unsafe.Pointer(&zeroVal[0])
-	}
-dohash:
-	hash := t.key.hashfn(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0))
-	m := bucketMask(h.B)
-	b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
-	if c := h.oldbuckets; c != nil {
-		if !h.sameSizeGrow() {
-			// There used to be half as many buckets; mask down one more power of two.
-			m >>= 1
-		}
-		oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
-		if !evacuated(oldb) {
-			b = oldb
-		}
-	}
-	top := tophash(hash)
-	for ; b != nil; b = b.overflow(t) {
-		for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
-			k := (*stringStruct)(kptr)
-			if k.len != key.len || b.tophash[i] != top {
-				continue
-			}
-			if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
-				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize))
-			}
-		}
-	}
-	return unsafe.Pointer(&zeroVal[0])
-}
-
-func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) {
-	if raceenabled && h != nil {
-		callerpc := getcallerpc()
-		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_faststr))
-	}
-	if h == nil || h.count == 0 {
-		return unsafe.Pointer(&zeroVal[0]), false
-	}
-	if h.flags&hashWriting != 0 {
-		throw("concurrent map read and map write")
-	}
-	key := stringStructOf(&ky)
-	if h.B == 0 {
-		// One-bucket table.
-		b := (*bmap)(h.buckets)
-		if key.len < 32 {
-			// short key, doing lots of comparisons is ok
-			for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
-				k := (*stringStruct)(kptr)
-				if k.len != key.len || b.tophash[i] == empty {
-					continue
-				}
-				if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
-					return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)), true
-				}
-			}
-			return unsafe.Pointer(&zeroVal[0]), false
-		}
-		// long key, try not to do more comparisons than necessary
-		keymaybe := uintptr(bucketCnt)
-		for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
-			k := (*stringStruct)(kptr)
-			if k.len != key.len || b.tophash[i] == empty {
-				continue
-			}
-			if k.str == key.str {
-				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)), true
-			}
-			// check first 4 bytes
-			if *((*[4]byte)(key.str)) != *((*[4]byte)(k.str)) {
-				continue
-			}
-			// check last 4 bytes
-			if *((*[4]byte)(add(key.str, uintptr(key.len)-4))) != *((*[4]byte)(add(k.str, uintptr(key.len)-4))) {
-				continue
-			}
-			if keymaybe != bucketCnt {
-				// Two keys are potential matches. Use hash to distinguish them.
-				goto dohash
-			}
-			keymaybe = i
-		}
-		if keymaybe != bucketCnt {
-			k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+keymaybe*2*sys.PtrSize))
-			if memequal(k.str, key.str, uintptr(key.len)) {
-				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+keymaybe*uintptr(t.valuesize)), true
-			}
-		}
-		return unsafe.Pointer(&zeroVal[0]), false
-	}
-dohash:
-	hash := t.key.hashfn(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0))
-	m := bucketMask(h.B)
-	b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
-	if c := h.oldbuckets; c != nil {
-		if !h.sameSizeGrow() {
-			// There used to be half as many buckets; mask down one more power of two.
-			m >>= 1
-		}
-		oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
-		if !evacuated(oldb) {
-			b = oldb
-		}
-	}
-	top := tophash(hash)
-	for ; b != nil; b = b.overflow(t) {
-		for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
-			k := (*stringStruct)(kptr)
-			if k.len != key.len || b.tophash[i] != top {
-				continue
-			}
-			if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
-				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)), true
-			}
-		}
-	}
-	return unsafe.Pointer(&zeroVal[0]), false
-}
-
-func mapassign_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer {
-	if h == nil {
-		panic(plainError("assignment to entry in nil map"))
-	}
-	if raceenabled {
-		callerpc := getcallerpc()
-		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast32))
-	}
-	if h.flags&hashWriting != 0 {
-		throw("concurrent map writes")
-	}
-	hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
-
-	// Set hashWriting after calling alg.hash for consistency with mapassign.
-	h.flags |= hashWriting
-
-	if h.buckets == nil {
-		h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
-	}
-
-again:
-	bucket := hash & bucketMask(h.B)
-	if h.growing() {
-		growWork_fast32(t, h, bucket)
-	}
-	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
-
-	var insertb *bmap
-	var inserti uintptr
-	var insertk unsafe.Pointer
-
-	for {
-		for i := uintptr(0); i < bucketCnt; i++ {
-			if b.tophash[i] == empty {
-				if insertb == nil {
-					inserti = i
-					insertb = b
-				}
-				continue
-			}
-			k := *((*uint32)(add(unsafe.Pointer(b), dataOffset+i*4)))
-			if k != key {
-				continue
-			}
-			inserti = i
-			insertb = b
-			goto done
-		}
-		ovf := b.overflow(t)
-		if ovf == nil {
-			break
-		}
-		b = ovf
-	}
-
-	// Did not find mapping for key. Allocate new cell & add entry.
-
-	// If we hit the max load factor or we have too many overflow buckets,
-	// and we're not already in the middle of growing, start growing.
-	if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
-		hashGrow(t, h)
-		goto again // Growing the table invalidates everything, so try again
-	}
-
-	if insertb == nil {
-		// all current buckets are full, allocate a new one.
-		insertb = h.newoverflow(t, b)
-		inserti = 0 // not necessary, but avoids needlessly spilling inserti
-	}
-	insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks
-
-	insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*4)
-	// store new key at insert position
-	*(*uint32)(insertk) = key
-
-	h.count++
-
-done:
-	val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*4+inserti*uintptr(t.valuesize))
-	if h.flags&hashWriting == 0 {
-		throw("concurrent map writes")
-	}
-	h.flags &^= hashWriting
-	return val
-}
-
-func mapassign_fast32ptr(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
-	if h == nil {
-		panic(plainError("assignment to entry in nil map"))
-	}
-	if raceenabled {
-		callerpc := getcallerpc()
-		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast32))
-	}
-	if h.flags&hashWriting != 0 {
-		throw("concurrent map writes")
-	}
-	hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
-
-	// Set hashWriting after calling alg.hash for consistency with mapassign.
-	h.flags |= hashWriting
-
-	if h.buckets == nil {
-		h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
-	}
-
-again:
-	bucket := hash & bucketMask(h.B)
-	if h.growing() {
-		growWork_fast32(t, h, bucket)
-	}
-	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
-
-	var insertb *bmap
-	var inserti uintptr
-	var insertk unsafe.Pointer
-
-	for {
-		for i := uintptr(0); i < bucketCnt; i++ {
-			if b.tophash[i] == empty {
-				if insertb == nil {
-					inserti = i
-					insertb = b
-				}
-				continue
-			}
-			k := *((*unsafe.Pointer)(add(unsafe.Pointer(b), dataOffset+i*4)))
-			if k != key {
-				continue
-			}
-			inserti = i
-			insertb = b
-			goto done
-		}
-		ovf := b.overflow(t)
-		if ovf == nil {
-			break
-		}
-		b = ovf
-	}
-
-	// Did not find mapping for key. Allocate new cell & add entry.
-
-	// If we hit the max load factor or we have too many overflow buckets,
-	// and we're not already in the middle of growing, start growing.
-	if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
-		hashGrow(t, h)
-		goto again // Growing the table invalidates everything, so try again
-	}
-
-	if insertb == nil {
-		// all current buckets are full, allocate a new one.
-		insertb = h.newoverflow(t, b)
-		inserti = 0 // not necessary, but avoids needlessly spilling inserti
-	}
-	insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks
-
-	insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*4)
-	// store new key at insert position
-	*(*unsafe.Pointer)(insertk) = key
-
-	h.count++
-
-done:
-	val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*4+inserti*uintptr(t.valuesize))
-	if h.flags&hashWriting == 0 {
-		throw("concurrent map writes")
-	}
-	h.flags &^= hashWriting
-	return val
-}
-
-func mapassign_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer {
-	if h == nil {
-		panic(plainError("assignment to entry in nil map"))
-	}
-	if raceenabled {
-		callerpc := getcallerpc()
-		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast64))
-	}
-	if h.flags&hashWriting != 0 {
-		throw("concurrent map writes")
-	}
-	hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
-
-	// Set hashWriting after calling alg.hash for consistency with mapassign.
-	h.flags |= hashWriting
-
-	if h.buckets == nil {
-		h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
-	}
-
-again:
-	bucket := hash & bucketMask(h.B)
-	if h.growing() {
-		growWork_fast64(t, h, bucket)
-	}
-	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
-
-	var insertb *bmap
-	var inserti uintptr
-	var insertk unsafe.Pointer
-
-	for {
-		for i := uintptr(0); i < bucketCnt; i++ {
-			if b.tophash[i] == empty {
-				if insertb == nil {
-					insertb = b
-					inserti = i
-				}
-				continue
-			}
-			k := *((*uint64)(add(unsafe.Pointer(b), dataOffset+i*8)))
-			if k != key {
-				continue
-			}
-			insertb = b
-			inserti = i
-			goto done
-		}
-		ovf := b.overflow(t)
-		if ovf == nil {
-			break
-		}
-		b = ovf
-	}
-
-	// Did not find mapping for key. Allocate new cell & add entry.
-
-	// If we hit the max load factor or we have too many overflow buckets,
-	// and we're not already in the middle of growing, start growing.
-	if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
-		hashGrow(t, h)
-		goto again // Growing the table invalidates everything, so try again
-	}
-
-	if insertb == nil {
-		// all current buckets are full, allocate a new one.
-		insertb = h.newoverflow(t, b)
-		inserti = 0 // not necessary, but avoids needlessly spilling inserti
-	}
-	insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks
-
-	insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*8)
-	// store new key at insert position
-	*(*uint64)(insertk) = key
-
-	h.count++
-
-done:
-	val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*8+inserti*uintptr(t.valuesize))
-	if h.flags&hashWriting == 0 {
-		throw("concurrent map writes")
-	}
-	h.flags &^= hashWriting
-	return val
-}
-
-func mapassign_fast64ptr(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
-	if h == nil {
-		panic(plainError("assignment to entry in nil map"))
-	}
-	if raceenabled {
-		callerpc := getcallerpc()
-		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast64))
-	}
-	if h.flags&hashWriting != 0 {
-		throw("concurrent map writes")
-	}
-	hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
-
-	// Set hashWriting after calling alg.hash for consistency with mapassign.
-	h.flags |= hashWriting
-
-	if h.buckets == nil {
-		h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
-	}
-
-again:
-	bucket := hash & bucketMask(h.B)
-	if h.growing() {
-		growWork_fast64(t, h, bucket)
-	}
-	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
-
-	var insertb *bmap
-	var inserti uintptr
-	var insertk unsafe.Pointer
-
-	for {
-		for i := uintptr(0); i < bucketCnt; i++ {
-			if b.tophash[i] == empty {
-				if insertb == nil {
-					insertb = b
-					inserti = i
-				}
-				continue
-			}
-			k := *((*unsafe.Pointer)(add(unsafe.Pointer(b), dataOffset+i*8)))
-			if k != key {
-				continue
-			}
-			insertb = b
-			inserti = i
-			goto done
-		}
-		ovf := b.overflow(t)
-		if ovf == nil {
-			break
-		}
-		b = ovf
-	}
-
-	// Did not find mapping for key. Allocate new cell & add entry.
-
-	// If we hit the max load factor or we have too many overflow buckets,
-	// and we're not already in the middle of growing, start growing.
-	if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
-		hashGrow(t, h)
-		goto again // Growing the table invalidates everything, so try again
-	}
-
-	if insertb == nil {
-		// all current buckets are full, allocate a new one.
-		insertb = h.newoverflow(t, b)
-		inserti = 0 // not necessary, but avoids needlessly spilling inserti
-	}
-	insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks
-
-	insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*8)
-	// store new key at insert position
-	*(*unsafe.Pointer)(insertk) = key
-
-	h.count++
-
-done:
-	val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*8+inserti*uintptr(t.valuesize))
-	if h.flags&hashWriting == 0 {
-		throw("concurrent map writes")
-	}
-	h.flags &^= hashWriting
-	return val
-}
-
-func mapassign_faststr(t *maptype, h *hmap, s string) unsafe.Pointer {
-	if h == nil {
-		panic(plainError("assignment to entry in nil map"))
-	}
-	if raceenabled {
-		callerpc := getcallerpc()
-		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_faststr))
-	}
-	if h.flags&hashWriting != 0 {
-		throw("concurrent map writes")
-	}
-	key := stringStructOf(&s)
-	hash := t.key.hashfn(noescape(unsafe.Pointer(&s)), uintptr(h.hash0))
-
-	// Set hashWriting after calling alg.hash for consistency with mapassign.
-	h.flags |= hashWriting
-
-	if h.buckets == nil {
-		h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
-	}
-
-again:
-	bucket := hash & bucketMask(h.B)
-	if h.growing() {
-		growWork_faststr(t, h, bucket)
-	}
-	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
-	top := tophash(hash)
-
-	var insertb *bmap
-	var inserti uintptr
-	var insertk unsafe.Pointer
-
-	for {
-		for i := uintptr(0); i < bucketCnt; i++ {
-			if b.tophash[i] != top {
-				if b.tophash[i] == empty && insertb == nil {
-					insertb = b
-					inserti = i
-				}
-				continue
-			}
-			k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize))
-			if k.len != key.len {
-				continue
-			}
-			if k.str != key.str && !memequal(k.str, key.str, uintptr(key.len)) {
-				continue
-			}
-			// already have a mapping for key. Update it.
-			inserti = i
-			insertb = b
-			goto done
-		}
-		ovf := b.overflow(t)
-		if ovf == nil {
-			break
-		}
-		b = ovf
-	}
-
-	// Did not find mapping for key. Allocate new cell & add entry.
-
-	// If we hit the max load factor or we have too many overflow buckets,
-	// and we're not already in the middle of growing, start growing.
-	if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
-		hashGrow(t, h)
-		goto again // Growing the table invalidates everything, so try again
-	}
-
-	if insertb == nil {
-		// all current buckets are full, allocate a new one.
-		insertb = h.newoverflow(t, b)
-		inserti = 0 // not necessary, but avoids needlessly spilling inserti
-	}
-	insertb.tophash[inserti&(bucketCnt-1)] = top // mask inserti to avoid bounds checks
-
-	insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*2*sys.PtrSize)
-	// store new key at insert position
-	*((*stringStruct)(insertk)) = *key
-	h.count++
-
-done:
-	val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*2*sys.PtrSize+inserti*uintptr(t.valuesize))
-	if h.flags&hashWriting == 0 {
-		throw("concurrent map writes")
-	}
-	h.flags &^= hashWriting
-	return val
-}
-
-func mapdelete_fast32(t *maptype, h *hmap, key uint32) {
-	if raceenabled && h != nil {
-		callerpc := getcallerpc()
-		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_fast32))
-	}
-	if h == nil || h.count == 0 {
-		return
-	}
-	if h.flags&hashWriting != 0 {
-		throw("concurrent map writes")
-	}
-
-	hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
-
-	// Set hashWriting after calling alg.hash for consistency with mapdelete
-	h.flags |= hashWriting
-
-	bucket := hash & bucketMask(h.B)
-	if h.growing() {
-		growWork_fast32(t, h, bucket)
-	}
-	b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
-search:
-	for ; b != nil; b = b.overflow(t) {
-		for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) {
-			if key != *(*uint32)(k) || b.tophash[i] == empty {
-				continue
-			}
-			// Only clear key if there are pointers in it.
-			if t.key.kind&kindNoPointers == 0 {
-				memclrHasPointers(k, t.key.size)
-			}
-			// Only clear value if there are pointers in it.
-			if t.elem.kind&kindNoPointers == 0 {
-				v := add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize))
-				memclrHasPointers(v, t.elem.size)
-			}
-			b.tophash[i] = empty
-			h.count--
-			break search
-		}
-	}
-
-	if h.flags&hashWriting == 0 {
-		throw("concurrent map writes")
-	}
-	h.flags &^= hashWriting
-}
-
-func mapdelete_fast64(t *maptype, h *hmap, key uint64) {
-	if raceenabled && h != nil {
-		callerpc := getcallerpc()
-		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_fast64))
-	}
-	if h == nil || h.count == 0 {
-		return
-	}
-	if h.flags&hashWriting != 0 {
-		throw("concurrent map writes")
-	}
-
-	hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
-
-	// Set hashWriting after calling alg.hash for consistency with mapdelete
-	h.flags |= hashWriting
-
-	bucket := hash & bucketMask(h.B)
-	if h.growing() {
-		growWork_fast64(t, h, bucket)
-	}
-	b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
-search:
-	for ; b != nil; b = b.overflow(t) {
-		for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) {
-			if key != *(*uint64)(k) || b.tophash[i] == empty {
-				continue
-			}
-			// Only clear key if there are pointers in it.
-			if t.key.kind&kindNoPointers == 0 {
-				memclrHasPointers(k, t.key.size)
-			}
-			// Only clear value if there are pointers in it.
-			if t.elem.kind&kindNoPointers == 0 {
-				v := add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize))
-				memclrHasPointers(v, t.elem.size)
-			}
-			b.tophash[i] = empty
-			h.count--
-			break search
-		}
-	}
-
-	if h.flags&hashWriting == 0 {
-		throw("concurrent map writes")
-	}
-	h.flags &^= hashWriting
-}
-
-func mapdelete_faststr(t *maptype, h *hmap, ky string) {
-	if raceenabled && h != nil {
-		callerpc := getcallerpc()
-		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_faststr))
-	}
-	if h == nil || h.count == 0 {
-		return
-	}
-	if h.flags&hashWriting != 0 {
-		throw("concurrent map writes")
-	}
-
-	key := stringStructOf(&ky)
-	hash := t.key.hashfn(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0))
-
-	// Set hashWriting after calling alg.hash for consistency with mapdelete
-	h.flags |= hashWriting
-
-	bucket := hash & bucketMask(h.B)
-	if h.growing() {
-		growWork_faststr(t, h, bucket)
-	}
-	b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
-	top := tophash(hash)
-search:
-	for ; b != nil; b = b.overflow(t) {
-		for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
-			k := (*stringStruct)(kptr)
-			if k.len != key.len || b.tophash[i] != top {
-				continue
-			}
-			if k.str != key.str && !memequal(k.str, key.str, uintptr(key.len)) {
-				continue
-			}
-			// Clear key's pointer.
-			k.str = nil
-			// Only clear value if there are pointers in it.
-			if t.elem.kind&kindNoPointers == 0 {
-				v := add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize))
-				memclrHasPointers(v, t.elem.size)
-			}
-			b.tophash[i] = empty
-			h.count--
-			break search
-		}
-	}
-
-	if h.flags&hashWriting == 0 {
-		throw("concurrent map writes")
-	}
-	h.flags &^= hashWriting
-}
-
-func growWork_fast32(t *maptype, h *hmap, bucket uintptr) {
-	// make sure we evacuate the oldbucket corresponding
-	// to the bucket we're about to use
-	evacuate_fast32(t, h, bucket&h.oldbucketmask())
-
-	// evacuate one more oldbucket to make progress on growing
-	if h.growing() {
-		evacuate_fast32(t, h, h.nevacuate)
-	}
-}
-
-func evacuate_fast32(t *maptype, h *hmap, oldbucket uintptr) {
-	b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
-	newbit := h.noldbuckets()
-	if !evacuated(b) {
-		// TODO: reuse overflow buckets instead of using new ones, if there
-		// is no iterator using the old buckets.  (If !oldIterator.)
-
-		// xy contains the x and y (low and high) evacuation destinations.
-		var xy [2]evacDst
-		x := &xy[0]
-		x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize)))
-		x.k = add(unsafe.Pointer(x.b), dataOffset)
-		x.v = add(x.k, bucketCnt*4)
-
-		if !h.sameSizeGrow() {
-			// Only calculate y pointers if we're growing bigger.
-			// Otherwise GC can see bad pointers.
-			y := &xy[1]
-			y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize)))
-			y.k = add(unsafe.Pointer(y.b), dataOffset)
-			y.v = add(y.k, bucketCnt*4)
-		}
-
-		for ; b != nil; b = b.overflow(t) {
-			k := add(unsafe.Pointer(b), dataOffset)
-			v := add(k, bucketCnt*4)
-			for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 4), add(v, uintptr(t.valuesize)) {
-				top := b.tophash[i]
-				if top == empty {
-					b.tophash[i] = evacuatedEmpty
-					continue
-				}
-				if top < minTopHash {
-					throw("bad map state")
-				}
-				var useY uint8
-				if !h.sameSizeGrow() {
-					// Compute hash to make our evacuation decision (whether we need
-					// to send this key/value to bucket x or bucket y).
-					hash := t.key.hashfn(k, uintptr(h.hash0))
-					if hash&newbit != 0 {
-						useY = 1
-					}
-				}
-
-				b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap
-				dst := &xy[useY]                 // evacuation destination
-
-				if dst.i == bucketCnt {
-					dst.b = h.newoverflow(t, dst.b)
-					dst.i = 0
-					dst.k = add(unsafe.Pointer(dst.b), dataOffset)
-					dst.v = add(dst.k, bucketCnt*4)
-				}
-				dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check
-
-				// Copy key.
-				if sys.PtrSize == 4 && t.key.kind&kindNoPointers == 0 && writeBarrier.enabled {
-					writebarrierptr((*uintptr)(dst.k), *(*uintptr)(k))
-				} else {
-					*(*uint32)(dst.k) = *(*uint32)(k)
-				}
-
-				typedmemmove(t.elem, dst.v, v)
-				dst.i++
-				// These updates might push these pointers past the end of the
-				// key or value arrays.  That's ok, as we have the overflow pointer
-				// at the end of the bucket to protect against pointing past the
-				// end of the bucket.
-				dst.k = add(dst.k, 4)
-				dst.v = add(dst.v, uintptr(t.valuesize))
-			}
-		}
-		// Unlink the overflow buckets & clear key/value to help GC.
-		if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 {
-			b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))
-			// Preserve b.tophash because the evacuation
-			// state is maintained there.
-			ptr := add(b, dataOffset)
-			n := uintptr(t.bucketsize) - dataOffset
-			memclrHasPointers(ptr, n)
-		}
-	}
-
-	if oldbucket == h.nevacuate {
-		advanceEvacuationMark(h, t, newbit)
-	}
-}
-
-func growWork_fast64(t *maptype, h *hmap, bucket uintptr) {
-	// make sure we evacuate the oldbucket corresponding
-	// to the bucket we're about to use
-	evacuate_fast64(t, h, bucket&h.oldbucketmask())
-
-	// evacuate one more oldbucket to make progress on growing
-	if h.growing() {
-		evacuate_fast64(t, h, h.nevacuate)
-	}
-}
-
-func evacuate_fast64(t *maptype, h *hmap, oldbucket uintptr) {
-	b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
-	newbit := h.noldbuckets()
-	if !evacuated(b) {
-		// TODO: reuse overflow buckets instead of using new ones, if there
-		// is no iterator using the old buckets.  (If !oldIterator.)
-
-		// xy contains the x and y (low and high) evacuation destinations.
-		var xy [2]evacDst
-		x := &xy[0]
-		x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize)))
-		x.k = add(unsafe.Pointer(x.b), dataOffset)
-		x.v = add(x.k, bucketCnt*8)
-
-		if !h.sameSizeGrow() {
-			// Only calculate y pointers if we're growing bigger.
-			// Otherwise GC can see bad pointers.
-			y := &xy[1]
-			y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize)))
-			y.k = add(unsafe.Pointer(y.b), dataOffset)
-			y.v = add(y.k, bucketCnt*8)
-		}
-
-		for ; b != nil; b = b.overflow(t) {
-			k := add(unsafe.Pointer(b), dataOffset)
-			v := add(k, bucketCnt*8)
-			for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 8), add(v, uintptr(t.valuesize)) {
-				top := b.tophash[i]
-				if top == empty {
-					b.tophash[i] = evacuatedEmpty
-					continue
-				}
-				if top < minTopHash {
-					throw("bad map state")
-				}
-				var useY uint8
-				if !h.sameSizeGrow() {
-					// Compute hash to make our evacuation decision (whether we need
-					// to send this key/value to bucket x or bucket y).
-					hash := t.key.hashfn(k, uintptr(h.hash0))
-					if hash&newbit != 0 {
-						useY = 1
-					}
-				}
-
-				b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap
-				dst := &xy[useY]                 // evacuation destination
-
-				if dst.i == bucketCnt {
-					dst.b = h.newoverflow(t, dst.b)
-					dst.i = 0
-					dst.k = add(unsafe.Pointer(dst.b), dataOffset)
-					dst.v = add(dst.k, bucketCnt*8)
-				}
-				dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check
-
-				// Copy key.
-				if t.key.kind&kindNoPointers == 0 && writeBarrier.enabled {
-					if sys.PtrSize == 8 {
-						writebarrierptr((*uintptr)(dst.k), *(*uintptr)(k))
-					} else {
-						// There are three ways to squeeze at least one 32 bit pointer into 64 bits.
-						// Give up and call typedmemmove.
-						typedmemmove(t.key, dst.k, k)
-					}
-				} else {
-					*(*uint64)(dst.k) = *(*uint64)(k)
-				}
-
-				typedmemmove(t.elem, dst.v, v)
-				dst.i++
-				// These updates might push these pointers past the end of the
-				// key or value arrays.  That's ok, as we have the overflow pointer
-				// at the end of the bucket to protect against pointing past the
-				// end of the bucket.
-				dst.k = add(dst.k, 8)
-				dst.v = add(dst.v, uintptr(t.valuesize))
-			}
-		}
-		// Unlink the overflow buckets & clear key/value to help GC.
-		if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 {
-			b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))
-			// Preserve b.tophash because the evacuation
-			// state is maintained there.
-			ptr := add(b, dataOffset)
-			n := uintptr(t.bucketsize) - dataOffset
-			memclrHasPointers(ptr, n)
-		}
-	}
-
-	if oldbucket == h.nevacuate {
-		advanceEvacuationMark(h, t, newbit)
-	}
-}
-
-func growWork_faststr(t *maptype, h *hmap, bucket uintptr) {
-	// make sure we evacuate the oldbucket corresponding
-	// to the bucket we're about to use
-	evacuate_faststr(t, h, bucket&h.oldbucketmask())
-
-	// evacuate one more oldbucket to make progress on growing
-	if h.growing() {
-		evacuate_faststr(t, h, h.nevacuate)
-	}
-}
-
-func evacuate_faststr(t *maptype, h *hmap, oldbucket uintptr) {
-	b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
-	newbit := h.noldbuckets()
-	if !evacuated(b) {
-		// TODO: reuse overflow buckets instead of using new ones, if there
-		// is no iterator using the old buckets.  (If !oldIterator.)
-
-		// xy contains the x and y (low and high) evacuation destinations.
-		var xy [2]evacDst
-		x := &xy[0]
-		x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize)))
-		x.k = add(unsafe.Pointer(x.b), dataOffset)
-		x.v = add(x.k, bucketCnt*2*sys.PtrSize)
-
-		if !h.sameSizeGrow() {
-			// Only calculate y pointers if we're growing bigger.
-			// Otherwise GC can see bad pointers.
-			y := &xy[1]
-			y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize)))
-			y.k = add(unsafe.Pointer(y.b), dataOffset)
-			y.v = add(y.k, bucketCnt*2*sys.PtrSize)
-		}
-
-		for ; b != nil; b = b.overflow(t) {
-			k := add(unsafe.Pointer(b), dataOffset)
-			v := add(k, bucketCnt*2*sys.PtrSize)
-			for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 2*sys.PtrSize), add(v, uintptr(t.valuesize)) {
-				top := b.tophash[i]
-				if top == empty {
-					b.tophash[i] = evacuatedEmpty
-					continue
-				}
-				if top < minTopHash {
-					throw("bad map state")
-				}
-				var useY uint8
-				if !h.sameSizeGrow() {
-					// Compute hash to make our evacuation decision (whether we need
-					// to send this key/value to bucket x or bucket y).
-					hash := t.key.hashfn(k, uintptr(h.hash0))
-					if hash&newbit != 0 {
-						useY = 1
-					}
-				}
-
-				b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap
-				dst := &xy[useY]                 // evacuation destination
-
-				if dst.i == bucketCnt {
-					dst.b = h.newoverflow(t, dst.b)
-					dst.i = 0
-					dst.k = add(unsafe.Pointer(dst.b), dataOffset)
-					dst.v = add(dst.k, bucketCnt*2*sys.PtrSize)
-				}
-				dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check
-
-				// Copy key.
-				*(*string)(dst.k) = *(*string)(k)
-
-				typedmemmove(t.elem, dst.v, v)
-				dst.i++
-				// These updates might push these pointers past the end of the
-				// key or value arrays.  That's ok, as we have the overflow pointer
-				// at the end of the bucket to protect against pointing past the
-				// end of the bucket.
-				dst.k = add(dst.k, 2*sys.PtrSize)
-				dst.v = add(dst.v, uintptr(t.valuesize))
-			}
-		}
-		// Unlink the overflow buckets & clear key/value to help GC.
-		// Unlink the overflow buckets & clear key/value to help GC.
-		if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 {
-			b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))
-			// Preserve b.tophash because the evacuation
-			// state is maintained there.
-			ptr := add(b, dataOffset)
-			n := uintptr(t.bucketsize) - dataOffset
-			memclrHasPointers(ptr, n)
-		}
-	}
-
-	if oldbucket == h.nevacuate {
-		advanceEvacuationMark(h, t, newbit)
-	}
-}
diff --git a/libgo/go/runtime/heapdump.go b/libgo/go/runtime/heapdump.go
index a4b168d..e92ea39 100644
--- a/libgo/go/runtime/heapdump.go
+++ b/libgo/go/runtime/heapdump.go
@@ -184,7 +184,7 @@ func dumptype(t *_type) {
 	dumpint(uint64(uintptr(unsafe.Pointer(t))))
 	dumpint(uint64(t.size))
 	if x := t.uncommontype; x == nil || t.pkgPath == nil || *t.pkgPath == "" {
-		dumpstr(*t.string)
+		dumpstr(t.string())
 	} else {
 		pkgpathstr := *t.pkgPath
 		pkgpath := stringStructOf(&pkgpathstr)
@@ -233,9 +233,8 @@ type childInfo struct {
 
 // dump kinds & offsets of interesting fields in bv
 func dumpbv(cbv *bitvector, offset uintptr) {
-	bv := gobv(*cbv)
-	for i := uintptr(0); i < bv.n; i++ {
-		if bv.bytedata[i/8]>>(i%8)&1 == 1 {
+	for i := uintptr(0); i < uintptr(cbv.n); i++ {
+		if cbv.ptrbit(i) == 1 {
 			dumpint(fieldKindPtr)
 			dumpint(uint64(offset + i*sys.PtrSize))
 		}
@@ -254,7 +253,7 @@ func dumpgoroutine(gp *g) {
 	dumpbool(isSystemGoroutine(gp))
 	dumpbool(false) // isbackground
 	dumpint(uint64(gp.waitsince))
-	dumpstr(gp.waitreason)
+	dumpstr(gp.waitreason.String())
 	dumpint(0)
 	dumpint(uint64(uintptr(unsafe.Pointer(gp.m))))
 	dumpint(uint64(uintptr(unsafe.Pointer(gp._defer))))
@@ -372,8 +371,26 @@ func dumpparams() {
 		dumpbool(true) // big-endian ptrs
 	}
 	dumpint(sys.PtrSize)
-	dumpint(uint64(mheap_.arena_start))
-	dumpint(uint64(mheap_.arena_used))
+	var arenaStart, arenaEnd uintptr
+	for i1 := range mheap_.arenas {
+		if mheap_.arenas[i1] == nil {
+			continue
+		}
+		for i, ha := range mheap_.arenas[i1] {
+			if ha == nil {
+				continue
+			}
+			base := arenaBase(arenaIdx(i1)<<arenaL1Shift | arenaIdx(i))
+			if arenaStart == 0 || base < arenaStart {
+				arenaStart = base
+			}
+			if base+heapArenaBytes > arenaEnd {
+				arenaEnd = base + heapArenaBytes
+			}
+		}
+	}
+	dumpint(uint64(arenaStart))
+	dumpint(uint64(arenaEnd))
 	dumpstr(sys.GOARCH)
 	dumpstr(sys.Goexperiment)
 	dumpint(uint64(ncpu))
@@ -509,7 +526,7 @@ func mdump() {
 func writeheapdump_m(fd uintptr) {
 	_g_ := getg()
 	casgstatus(_g_.m.curg, _Grunning, _Gwaiting)
-	_g_.waitreason = "dumping heap"
+	_g_.waitreason = waitReasonDumpingHeap
 
 	// Update stats so we can dump them.
 	// As a side effect, flushes all the MCaches so the MSpan.freelist
diff --git a/libgo/go/runtime/iface.go b/libgo/go/runtime/iface.go
index 62d47ce..8ed67c1 100644
--- a/libgo/go/runtime/iface.go
+++ b/libgo/go/runtime/iface.go
@@ -94,7 +94,7 @@ func getitab(lhs, rhs *_type, canfail bool) unsafe.Pointer {
 		if canfail {
 			return nil
 		}
-		panic(&TypeAssertionError{"", *rhs.string, *lhs.string, *lhsi.methods[0].name})
+		panic(&TypeAssertionError{nil, rhs, lhs, *lhsi.methods[0].name})
 	}
 
 	methods := make([]unsafe.Pointer, len(lhsi.methods)+1)
@@ -110,7 +110,7 @@ func getitab(lhs, rhs *_type, canfail bool) unsafe.Pointer {
 				if canfail {
 					return nil
 				}
-				panic(&TypeAssertionError{"", *rhs.string, *lhs.string, *lhsMethod.name})
+				panic(&TypeAssertionError{nil, rhs, lhs, *lhsMethod.name})
 			}
 
 			rhsMethod = &rhs.methods[ri]
@@ -126,7 +126,7 @@ func getitab(lhs, rhs *_type, canfail bool) unsafe.Pointer {
 			if canfail {
 				return nil
 			}
-			panic(&TypeAssertionError{"", *rhs.string, *lhs.string, *lhsMethod.name})
+			panic(&TypeAssertionError{nil, rhs, lhs, *lhsMethod.name})
 		}
 
 		methods[li+1] = unsafe.Pointer(rhsMethod.tfn)
@@ -147,7 +147,7 @@ func requireitab(lhs, rhs *_type) unsafe.Pointer {
 // impossible or if the rhs type is nil.
 func assertitab(lhs, rhs *_type) unsafe.Pointer {
 	if rhs == nil {
-		panic(&TypeAssertionError{"", "", *lhs.string, ""})
+		panic(&TypeAssertionError{nil, nil, lhs, ""})
 	}
 
 	if lhs.kind&kindMask != kindInterface {
@@ -167,10 +167,10 @@ func assertitab(lhs, rhs *_type) unsafe.Pointer {
 // type, panicing if not.
 func assertI2T(lhs, rhs, inter *_type) {
 	if rhs == nil {
-		panic(&TypeAssertionError{"", "", *lhs.string, ""})
+		panic(&TypeAssertionError{nil, nil, lhs, ""})
 	}
 	if !eqtype(lhs, rhs) {
-		panic(&TypeAssertionError{*inter.string, *rhs.string, *lhs.string, ""})
+		panic(&TypeAssertionError{inter, rhs, lhs, ""})
 	}
 }
 
@@ -327,8 +327,44 @@ func ifaceT2Ip(to, from *_type) bool {
 func reflect_ifaceE2I(inter *interfacetype, e eface, dst *iface) {
 	t := e._type
 	if t == nil {
-		panic(TypeAssertionError{"", "", *inter.typ.string, ""})
+		panic(TypeAssertionError{nil, nil, &inter.typ, ""})
 	}
 	dst.tab = requireitab((*_type)(unsafe.Pointer(inter)), t)
 	dst.data = e.data
 }
+
+// staticbytes is used to avoid convT2E for byte-sized values.
+var staticbytes = [...]byte{
+	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+	0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+	0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+	0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
+	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+	0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
+	0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+	0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
+	0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
+	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
+	0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+	0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
+	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+	0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+	0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
+	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
+	0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
+	0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
+	0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+	0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
+	0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
+	0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
+	0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
+	0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
+	0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
+}
diff --git a/libgo/go/runtime/internal/atomic/atomic_test.go b/libgo/go/runtime/internal/atomic/atomic_test.go
index b697aa8..25ece43 100644
--- a/libgo/go/runtime/internal/atomic/atomic_test.go
+++ b/libgo/go/runtime/internal/atomic/atomic_test.go
@@ -93,8 +93,10 @@ func TestUnaligned64(t *testing.T) {
 	}
 
 	x := make([]uint32, 4)
-	up64 := (*uint64)(unsafe.Pointer(&x[1])) // misaligned
-	p64 := (*int64)(unsafe.Pointer(&x[1]))   // misaligned
+	u := unsafe.Pointer(uintptr(unsafe.Pointer(&x[0])) | 4) // force alignment to 4
+
+	up64 := (*uint64)(u) // misaligned
+	p64 := (*int64)(u)   // misaligned
 
 	shouldPanic(t, "Load64", func() { atomic.Load64(up64) })
 	shouldPanic(t, "Loadint64", func() { atomic.Loadint64(p64) })
diff --git a/libgo/go/runtime/internal/atomic/bench_test.go b/libgo/go/runtime/internal/atomic/bench_test.go
index 47010e3..083a75c 100644
--- a/libgo/go/runtime/internal/atomic/bench_test.go
+++ b/libgo/go/runtime/internal/atomic/bench_test.go
@@ -26,3 +26,39 @@ func BenchmarkAtomicStore64(b *testing.B) {
 		atomic.Store64(&x, 0)
 	}
 }
+
+func BenchmarkAtomicLoad(b *testing.B) {
+	var x uint32
+	sink = &x
+	for i := 0; i < b.N; i++ {
+		_ = atomic.Load(&x)
+	}
+}
+
+func BenchmarkAtomicStore(b *testing.B) {
+	var x uint32
+	sink = &x
+	for i := 0; i < b.N; i++ {
+		atomic.Store(&x, 0)
+	}
+}
+
+func BenchmarkXadd(b *testing.B) {
+	var x uint32
+	ptr := &x
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			atomic.Xadd(ptr, 1)
+		}
+	})
+}
+
+func BenchmarkXadd64(b *testing.B) {
+	var x uint64
+	ptr := &x
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			atomic.Xadd64(ptr, 1)
+		}
+	})
+}
diff --git a/libgo/go/runtime/internal/atomic/stubs.go b/libgo/go/runtime/internal/atomic/stubs.go
index 497b980..62e30d1 100644
--- a/libgo/go/runtime/internal/atomic/stubs.go
+++ b/libgo/go/runtime/internal/atomic/stubs.go
@@ -2,6 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+// +build !wasm
+
 package atomic
 
 import "unsafe"
diff --git a/libgo/go/runtime/internal/sys/intrinsics.go b/libgo/go/runtime/internal/sys/intrinsics.go
index 2928280..6906938 100644
--- a/libgo/go/runtime/internal/sys/intrinsics.go
+++ b/libgo/go/runtime/internal/sys/intrinsics.go
@@ -32,6 +32,30 @@ func Ctz32(x uint32) int {
 	return int(builtinCtz32(x))
 }
 
+// Ctz8 returns the number of trailing zero bits in x; the result is 8 for x == 0.
+func Ctz8(x uint8) int {
+	return int(ntz8tab[x])
+}
+
+var ntz8tab = [256]uint8{
+	0x08, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+	0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+	0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+	0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+	0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+	0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+	0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+	0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+	0x07, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+	0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+	0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+	0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+	0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+	0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+	0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+	0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+}
+
 //extern __builtin_bswap64
 func bswap64(uint64) uint64
 
diff --git a/libgo/go/runtime/lfstack.go b/libgo/go/runtime/lfstack.go
index 4787c5b..406561a 100644
--- a/libgo/go/runtime/lfstack.go
+++ b/libgo/go/runtime/lfstack.go
@@ -55,3 +55,13 @@ func (head *lfstack) pop() unsafe.Pointer {
 func (head *lfstack) empty() bool {
 	return atomic.Load64((*uint64)(head)) == 0
 }
+
+// lfnodeValidate panics if node is not a valid address for use with
+// lfstack.push. This only needs to be called when node is allocated.
+func lfnodeValidate(node *lfnode) {
+	if lfstackUnpack(lfstackPack(node, ^uintptr(0))) != node {
+		printlock()
+		println("runtime: bad lfnode address", hex(uintptr(unsafe.Pointer(node))))
+		throw("bad lfnode address")
+	}
+}
diff --git a/libgo/go/runtime/lfstack_64bit.go b/libgo/go/runtime/lfstack_64bit.go
index dca1718..401f83d 100644
--- a/libgo/go/runtime/lfstack_64bit.go
+++ b/libgo/go/runtime/lfstack_64bit.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build amd64 arm64 mips64 mips64le ppc64 ppc64le s390x arm64be alpha sparc64 ia64 riscv64
+// +build amd64 arm64 mips64 mips64le ppc64 ppc64le s390x wasm arm64be alpha sparc64 ia64 riscv64
 
 package runtime
 
@@ -11,21 +11,17 @@ import "unsafe"
 const (
 	// addrBits is the number of bits needed to represent a virtual address.
 	//
-	// In Linux the user address space for each architecture is limited as
-	// follows (taken from the processor.h file for the architecture):
+	// See heapAddrBits for a table of address space sizes on
+	// various architectures. 48 bits is enough for all
+	// architectures except s390x.
 	//
-	// Architecture  Name              Maximum Value (exclusive)
-	// ---------------------------------------------------------------------
-	// arm64         TASK_SIZE_64      Depends on configuration.
-	// ppc64{,le}    TASK_SIZE_USER64  0x400000000000UL (46 bit addresses)
-	// mips64{,le}   TASK_SIZE64       0x010000000000UL (40 bit addresses)
-	// s390x         TASK_SIZE         0x020000000000UL (41 bit addresses)
-	//
-	// These values may increase over time.
-	//
-	// On AMD64, virtual addresses are 48-bit numbers sign extended to 64.
+	// On AMD64, virtual addresses are 48-bit (or 57-bit) numbers sign extended to 64.
 	// We shift the address left 16 to eliminate the sign extended part and make
 	// room in the bottom for the count.
+	//
+	// On s390x, virtual addresses are 64-bit. There's not much we
+	// can do about this, so we just hope that the kernel doesn't
+	// get to really high addresses and panic if it does.
 	addrBits = 48
 
 	// In addition to the 16 bits taken from the top, we can take 3 from the
diff --git a/libgo/go/runtime/lock_futex.go b/libgo/go/runtime/lock_futex.go
index b2c9ccb..f7ca1f0 100644
--- a/libgo/go/runtime/lock_futex.go
+++ b/libgo/go/runtime/lock_futex.go
@@ -241,3 +241,9 @@ func notetsleepg(n *note, ns int64) bool {
 	exitsyscall()
 	return ok
 }
+
+func pauseSchedulerUntilCallback() bool {
+	return false
+}
+
+func checkTimeouts() {}
diff --git a/libgo/go/runtime/lock_js.go b/libgo/go/runtime/lock_js.go
new file mode 100644
index 0000000..df321e5
--- /dev/null
+++ b/libgo/go/runtime/lock_js.go
@@ -0,0 +1,172 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build js,wasm
+
+package runtime
+
+import (
+	_ "unsafe"
+)
+
+// js/wasm has no support for threads yet. There is no preemption.
+// Waiting for a mutex is implemented by allowing other goroutines
+// to run until the mutex gets unlocked.
+
+const (
+	mutex_unlocked = 0
+	mutex_locked   = 1
+
+	note_cleared = 0
+	note_woken   = 1
+	note_timeout = 2
+
+	active_spin     = 4
+	active_spin_cnt = 30
+	passive_spin    = 1
+)
+
+func lock(l *mutex) {
+	for l.key == mutex_locked {
+		mcall(gosched_m)
+	}
+	l.key = mutex_locked
+}
+
+func unlock(l *mutex) {
+	if l.key == mutex_unlocked {
+		throw("unlock of unlocked lock")
+	}
+	l.key = mutex_unlocked
+}
+
+// One-time notifications.
+
+type noteWithTimeout struct {
+	gp       *g
+	deadline int64
+}
+
+var (
+	notes            = make(map[*note]*g)
+	notesWithTimeout = make(map[*note]noteWithTimeout)
+)
+
+func noteclear(n *note) {
+	n.key = note_cleared
+}
+
+func notewakeup(n *note) {
+	// gp := getg()
+	if n.key == note_woken {
+		throw("notewakeup - double wakeup")
+	}
+	cleared := n.key == note_cleared
+	n.key = note_woken
+	if cleared {
+		goready(notes[n], 1)
+	}
+}
+
+func notesleep(n *note) {
+	throw("notesleep not supported by js")
+}
+
+func notetsleep(n *note, ns int64) bool {
+	throw("notetsleep not supported by js")
+	return false
+}
+
+// same as runtime·notetsleep, but called on user g (not g0)
+func notetsleepg(n *note, ns int64) bool {
+	gp := getg()
+	if gp == gp.m.g0 {
+		throw("notetsleepg on g0")
+	}
+
+	if ns >= 0 {
+		deadline := nanotime() + ns
+		delay := ns/1000000 + 1 // round up
+		if delay > 1<<31-1 {
+			delay = 1<<31 - 1 // cap to max int32
+		}
+
+		id := scheduleCallback(delay)
+		mp := acquirem()
+		notes[n] = gp
+		notesWithTimeout[n] = noteWithTimeout{gp: gp, deadline: deadline}
+		releasem(mp)
+
+		gopark(nil, nil, waitReasonSleep, traceEvNone, 1)
+
+		clearScheduledCallback(id) // note might have woken early, clear timeout
+		mp = acquirem()
+		delete(notes, n)
+		delete(notesWithTimeout, n)
+		releasem(mp)
+
+		return n.key == note_woken
+	}
+
+	for n.key != note_woken {
+		mp := acquirem()
+		notes[n] = gp
+		releasem(mp)
+
+		gopark(nil, nil, waitReasonZero, traceEvNone, 1)
+
+		mp = acquirem()
+		delete(notes, n)
+		releasem(mp)
+	}
+	return true
+}
+
+// checkTimeouts resumes goroutines that are waiting on a note which has reached its deadline.
+func checkTimeouts() {
+	now := nanotime()
+	for n, nt := range notesWithTimeout {
+		if n.key == note_cleared && now > nt.deadline {
+			n.key = note_timeout
+			goready(nt.gp, 1)
+		}
+	}
+}
+
+var waitingForCallback *g
+
+// sleepUntilCallback puts the current goroutine to sleep until a callback is triggered.
+// It is currently only used by the callback routine of the syscall/js package.
+//go:linkname sleepUntilCallback syscall/js.sleepUntilCallback
+func sleepUntilCallback() {
+	waitingForCallback = getg()
+	gopark(nil, nil, waitReasonZero, traceEvNone, 1)
+	waitingForCallback = nil
+}
+
+// pauseSchedulerUntilCallback gets called from the scheduler and pauses the execution
+// of Go's WebAssembly code until a callback is triggered. Then it checks for note timeouts
+// and resumes goroutines that are waiting for a callback.
+func pauseSchedulerUntilCallback() bool {
+	if waitingForCallback == nil && len(notesWithTimeout) == 0 {
+		return false
+	}
+
+	pause()
+	checkTimeouts()
+	if waitingForCallback != nil {
+		goready(waitingForCallback, 1)
+	}
+	return true
+}
+
+// pause pauses the execution of Go's WebAssembly code until a callback is triggered.
+func pause()
+
+// scheduleCallback tells the WebAssembly environment to trigger a callback after ms milliseconds.
+// It returns a timer id that can be used with clearScheduledCallback.
+func scheduleCallback(ms int64) int32
+
+// clearScheduledCallback clears a callback scheduled by scheduleCallback.
+func clearScheduledCallback(id int32)
diff --git a/libgo/go/runtime/lock_sema.go b/libgo/go/runtime/lock_sema.go
index b5cce6a..237513c 100644
--- a/libgo/go/runtime/lock_sema.go
+++ b/libgo/go/runtime/lock_sema.go
@@ -294,3 +294,9 @@ func notetsleepg(n *note, ns int64) bool {
 	exitsyscall()
 	return ok
 }
+
+func pauseSchedulerUntilCallback() bool {
+	return false
+}
+
+func checkTimeouts() {}
diff --git a/libgo/go/runtime/malloc.go b/libgo/go/runtime/malloc.go
index 523989e..ac4759f 100644
--- a/libgo/go/runtime/malloc.go
+++ b/libgo/go/runtime/malloc.go
@@ -78,9 +78,34 @@
 //
 //	3. We don't zero pages that never get reused.
 
+// Virtual memory layout
+//
+// The heap consists of a set of arenas, which are 64MB on 64-bit and
+// 4MB on 32-bit (heapArenaBytes). Each arena's start address is also
+// aligned to the arena size.
+//
+// Each arena has an associated heapArena object that stores the
+// metadata for that arena: the heap bitmap for all words in the arena
+// and the span map for all pages in the arena. heapArena objects are
+// themselves allocated off-heap.
+//
+// Since arenas are aligned, the address space can be viewed as a
+// series of arena frames. The arena map (mheap_.arenas) maps from
+// arena frame number to *heapArena, or nil for parts of the address
+// space not backed by the Go heap. The arena map is structured as a
+// two-level array consisting of a "L1" arena map and many "L2" arena
+// maps; however, since arenas are large, on many architectures, the
+// arena map consists of a single, large L2 map.
+//
+// The arena map covers the entire possible address space, allowing
+// the Go heap to use any part of the address space. The allocator
+// attempts to keep arenas contiguous so that large spans (and hence
+// large objects) can cross arenas.
+
 package runtime
 
 import (
+	"runtime/internal/atomic"
 	"runtime/internal/sys"
 	"unsafe"
 )
@@ -124,9 +149,8 @@ const (
 	_TinySize      = 16
 	_TinySizeClass = int8(2)
 
-	_FixAllocChunk  = 16 << 10               // Chunk size for FixAlloc
-	_MaxMHeapList   = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap.
-	_HeapAllocChunk = 1 << 20                // Chunk size for heap growth
+	_FixAllocChunk = 16 << 10               // Chunk size for FixAlloc
+	_MaxMHeapList  = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap.
 
 	// Per-P, per order stack segment cache size.
 	_StackCacheSize = 32 * 1024
@@ -145,25 +169,144 @@ const (
 	//   plan9            | 4KB        | 3
 	_NumStackOrders = 4 - sys.PtrSize/4*sys.GoosWindows - 1*sys.GoosPlan9
 
-	// Number of bits in page to span calculations (4k pages).
-	// On Windows 64-bit we limit the arena to 32GB or 35 bits.
-	// Windows counts memory used by page table into committed memory
-	// of the process, so we can't reserve too much memory.
-	// See https://golang.org/issue/5402 and https://golang.org/issue/5236.
-	// On other 64-bit platforms, we limit the arena to 512GB, or 39 bits.
-	// On 32-bit, we don't bother limiting anything, so we use the full 32-bit address.
-	// The only exception is mips32 which only has access to low 2GB of virtual memory.
-	// On Darwin/arm64, we cannot reserve more than ~5GB of virtual memory,
-	// but as most devices have less than 4GB of physical memory anyway, we
-	// try to be conservative here, and only ask for a 2GB heap.
-	_MHeapMap_TotalBits = (_64bit*sys.GoosWindows)*35 + (_64bit*(1-sys.GoosWindows)*(1-sys.GoosDarwin*sys.GoarchArm64))*39 + sys.GoosDarwin*sys.GoarchArm64*31 + (1-_64bit)*(32-(sys.GoarchMips+sys.GoarchMipsle))
-	_MHeapMap_Bits      = _MHeapMap_TotalBits - _PageShift
-
-	// _MaxMem is the maximum heap arena size minus 1.
+	// heapAddrBits is the number of bits in a heap address. On
+	// amd64, addresses are sign-extended beyond heapAddrBits. On
+	// other arches, they are zero-extended.
+	//
+	// On 64-bit platforms, we limit this to 48 bits based on a
+	// combination of hardware and OS limitations.
+	//
+	// amd64 hardware limits addresses to 48 bits, sign-extended
+	// to 64 bits. Addresses where the top 16 bits are not either
+	// all 0 or all 1 are "non-canonical" and invalid. Because of
+	// these "negative" addresses, we offset addresses by 1<<47
+	// (arenaBaseOffset) on amd64 before computing indexes into
+	// the heap arenas index. In 2017, amd64 hardware added
+	// support for 57 bit addresses; however, currently only Linux
+	// supports this extension and the kernel will never choose an
+	// address above 1<<47 unless mmap is called with a hint
+	// address above 1<<47 (which we never do).
+	//
+	// arm64 hardware (as of ARMv8) limits user addresses to 48
+	// bits, in the range [0, 1<<48).
+	//
+	// ppc64, mips64, and s390x support arbitrary 64 bit addresses
+	// in hardware. However, since Go only supports Linux on
+	// these, we lean on OS limits. Based on Linux's processor.h,
+	// the user address space is limited as follows on 64-bit
+	// architectures:
+	//
+	// Architecture  Name              Maximum Value (exclusive)
+	// ---------------------------------------------------------------------
+	// amd64         TASK_SIZE_MAX     0x007ffffffff000 (47 bit addresses)
+	// arm64         TASK_SIZE_64      0x01000000000000 (48 bit addresses)
+	// ppc64{,le}    TASK_SIZE_USER64  0x00400000000000 (46 bit addresses)
+	// mips64{,le}   TASK_SIZE64       0x00010000000000 (40 bit addresses)
+	// s390x         TASK_SIZE         1<<64 (64 bit addresses)
+	//
+	// These limits may increase over time, but are currently at
+	// most 48 bits except on s390x. On all architectures, Linux
+	// starts placing mmap'd regions at addresses that are
+	// significantly below 48 bits, so even if it's possible to
+	// exceed Go's 48 bit limit, it's extremely unlikely in
+	// practice.
+	//
+	// On 32-bit platforms, we accept the full 32-bit address
+	// space because doing so is cheap.
+	// mips32 only has access to the low 2GB of virtual memory, so
+	// we further limit it to 31 bits.
+	//
+	// WebAssembly currently has a limit of 4GB linear memory.
+	heapAddrBits = (_64bit*(1-sys.GoarchWasm))*48 + (1-_64bit+sys.GoarchWasm)*(32-(sys.GoarchMips+sys.GoarchMipsle))
+
+	// maxAlloc is the maximum size of an allocation. On 64-bit,
+	// it's theoretically possible to allocate 1<<heapAddrBits bytes. On
+	// 32-bit, however, this is one less than 1<<32 because the
+	// number of bytes in the address space doesn't actually fit
+	// in a uintptr.
+	maxAlloc = (1 << heapAddrBits) - (1-_64bit)*1
+
+	// The number of bits in a heap address, the size of heap
+	// arenas, and the L1 and L2 arena map sizes are related by
 	//
-	// On 32-bit, this is also the maximum heap pointer value,
-	// since the arena starts at address 0.
-	_MaxMem = 1<<_MHeapMap_TotalBits - 1
+	//   (1 << addrBits) = arenaBytes * L1entries * L2entries
+	//
+	// Currently, we balance these as follows:
+	//
+	//       Platform  Addr bits  Arena size  L1 entries  L2 size
+	// --------------  ---------  ----------  ----------  -------
+	//       */64-bit         48        64MB           1     32MB
+	// windows/64-bit         48         4MB          64      8MB
+	//       */32-bit         32         4MB           1      4KB
+	//     */mips(le)         31         4MB           1      2KB
+
+	// heapArenaBytes is the size of a heap arena. The heap
+	// consists of mappings of size heapArenaBytes, aligned to
+	// heapArenaBytes. The initial heap mapping is one arena.
+	//
+	// This is currently 64MB on 64-bit non-Windows and 4MB on
+	// 32-bit and on Windows. We use smaller arenas on Windows
+	// because all committed memory is charged to the process,
+	// even if it's not touched. Hence, for processes with small
+	// heaps, the mapped arena space needs to be commensurate.
+	// This is particularly important with the race detector,
+	// since it significantly amplifies the cost of committed
+	// memory.
+	heapArenaBytes = 1 << logHeapArenaBytes
+
+	// logHeapArenaBytes is log_2 of heapArenaBytes. For clarity,
+	// prefer using heapArenaBytes where possible (we need the
+	// constant to compute some other constants).
+	logHeapArenaBytes = (6+20)*(_64bit*(1-sys.GoosWindows)) + (2+20)*(_64bit*sys.GoosWindows) + (2+20)*(1-_64bit)
+
+	// heapArenaBitmapBytes is the size of each heap arena's bitmap.
+	heapArenaBitmapBytes = heapArenaBytes / (sys.PtrSize * 8 / 2)
+
+	pagesPerArena = heapArenaBytes / pageSize
+
+	// arenaL1Bits is the number of bits of the arena number
+	// covered by the first level arena map.
+	//
+	// This number should be small, since the first level arena
+	// map requires PtrSize*(1<<arenaL1Bits) of space in the
+	// binary's BSS. It can be zero, in which case the first level
+	// index is effectively unused. There is a performance benefit
+	// to this, since the generated code can be more efficient,
+	// but comes at the cost of having a large L2 mapping.
+	//
+	// We use the L1 map on 64-bit Windows because the arena size
+	// is small, but the address space is still 48 bits, and
+	// there's a high cost to having a large L2.
+	arenaL1Bits = 6 * (_64bit * sys.GoosWindows)
+
+	// arenaL2Bits is the number of bits of the arena number
+	// covered by the second level arena index.
+	//
+	// The size of each arena map allocation is proportional to
+	// 1<<arenaL2Bits, so it's important that this not be too
+	// large. 48 bits leads to 32MB arena index allocations, which
+	// is about the practical threshold.
+	arenaL2Bits = heapAddrBits - logHeapArenaBytes - arenaL1Bits
+
+	// arenaL1Shift is the number of bits to shift an arena frame
+	// number by to compute an index into the first level arena map.
+	arenaL1Shift = arenaL2Bits
+
+	// arenaBits is the total bits in a combined arena map index.
+	// This is split between the index into the L1 arena map and
+	// the L2 arena map.
+	arenaBits = arenaL1Bits + arenaL2Bits
+
+	// arenaBaseOffset is the pointer value that corresponds to
+	// index 0 in the heap arena map.
+	//
+	// On amd64, the address space is 48 bits, sign extended to 64
+	// bits. This offset lets us handle "negative" addresses (or
+	// high addresses if viewed as unsigned).
+	//
+	// On other platforms, the user address space is contiguous
+	// and starts at 0, so no offset is necessary.
+	arenaBaseOffset uintptr = sys.GoarchAmd64 * (1 << 47)
 
 	// Max number of threads to run garbage collection.
 	// 2, 3, and 4 are all plausible maximums depending
@@ -209,18 +352,12 @@ var physPageSize uintptr
 // SysReserve reserves address space without allocating memory.
 // If the pointer passed to it is non-nil, the caller wants the
 // reservation there, but SysReserve can still choose another
-// location if that one is unavailable. On some systems and in some
-// cases SysReserve will simply check that the address space is
-// available and not actually reserve it. If SysReserve returns
-// non-nil, it sets *reserved to true if the address space is
-// reserved, false if it has merely been checked.
+// location if that one is unavailable.
 // NOTE: SysReserve returns OS-aligned memory, but the heap allocator
 // may use larger alignment, so the caller must be careful to realign the
 // memory obtained by sysAlloc.
 //
 // SysMap maps previously reserved address space for use.
-// The reserved argument is true if the address space was really
-// reserved, not merely checked.
 //
 // SysFault marks a (already sysAlloc'd) region to fault
 // if accessed. Used only for debugging the runtime.
@@ -233,6 +370,12 @@ func mallocinit() {
 	// Not used for gccgo.
 	// testdefersizes()
 
+	if heapArenaBitmapBytes&(heapArenaBitmapBytes-1) != 0 {
+		// heapBits expects modular arithmetic on bitmap
+		// addresses to work.
+		throw("heapArenaBitmapBytes not a power of 2")
+	}
+
 	// Copy class sizes out for statistics table.
 	for i := range class_to_size {
 		memstats.by_size[i].size = uint32(class_to_size[i])
@@ -252,55 +395,47 @@ func mallocinit() {
 		throw("bad system page size")
 	}
 
-	// The auxiliary regions start at p and are laid out in the
-	// following order: spans, bitmap, arena.
-	var p, pSize uintptr
-	var reserved bool
-
-	// The spans array holds one *mspan per _PageSize of arena.
-	var spansSize uintptr = (_MaxMem + 1) / _PageSize * sys.PtrSize
-	spansSize = round(spansSize, _PageSize)
-	// The bitmap holds 2 bits per word of arena.
-	var bitmapSize uintptr = (_MaxMem + 1) / (sys.PtrSize * 8 / 2)
-	bitmapSize = round(bitmapSize, _PageSize)
-
-	// Set up the allocation arena, a contiguous area of memory where
-	// allocated data will be found.
-	if sys.PtrSize == 8 {
-		// On a 64-bit machine, allocate from a single contiguous reservation.
-		// 512 GB (MaxMem) should be big enough for now.
+	// Initialize the heap.
+	mheap_.init()
+	_g_ := getg()
+	_g_.m.mcache = allocmcache()
+
+	// Create initial arena growth hints.
+	if sys.PtrSize == 8 && GOARCH != "wasm" {
+		// On a 64-bit machine, we pick the following hints
+		// because:
+		//
+		// 1. Starting from the middle of the address space
+		// makes it easier to grow out a contiguous range
+		// without running in to some other mapping.
 		//
-		// The code will work with the reservation at any address, but ask
-		// SysReserve to use 0x0000XXc000000000 if possible (XX=00...7f).
-		// Allocating a 512 GB region takes away 39 bits, and the amd64
-		// doesn't let us choose the top 17 bits, so that leaves the 9 bits
-		// in the middle of 0x00c0 for us to choose. Choosing 0x00c0 means
-		// that the valid memory addresses will begin 0x00c0, 0x00c1, ..., 0x00df.
-		// In little-endian, that's c0 00, c1 00, ..., df 00. None of those are valid
+		// 2. This makes Go heap addresses more easily
+		// recognizable when debugging.
+		//
+		// 3. Stack scanning in gccgo is still conservative,
+		// so it's important that addresses be distinguishable
+		// from other data.
+		//
+		// Starting at 0x00c0 means that the valid memory addresses
+		// will begin 0x00c0, 0x00c1, ...
+		// In little-endian, that's c0 00, c1 00, ... None of those are valid
 		// UTF-8 sequences, and they are otherwise as far away from
 		// ff (likely a common byte) as possible. If that fails, we try other 0xXXc0
 		// addresses. An earlier attempt to use 0x11f8 caused out of memory errors
 		// on OS X during thread allocations.  0x00c0 causes conflicts with
 		// AddressSanitizer which reserves all memory up to 0x0100.
-		// These choices are both for debuggability and to reduce the
-		// odds of a conservative garbage collector (as is still used in gccgo)
+		// These choices reduce the odds of a conservative garbage collector
 		// not collecting memory because some non-pointer block of memory
 		// had a bit pattern that matched a memory address.
 		//
-		// Actually we reserve 544 GB (because the bitmap ends up being 32 GB)
-		// but it hardly matters: e0 00 is not valid UTF-8 either.
-		//
-		// If this fails we fall back to the 32 bit memory mechanism
-		//
 		// However, on arm64, we ignore all this advice above and slam the
 		// allocation at 0x40 << 32 because when using 4k pages with 3-level
 		// translation buffers, the user address space is limited to 39 bits
 		// On darwin/arm64, the address space is even smaller.
 		// On AIX, mmap adresses range starts at 0x0700000000000000 for 64-bit
 		// processes. The new address space allocator starts at 0x0A00000000000000.
-		arenaSize := round(_MaxMem, _PageSize)
-		pSize = bitmapSize + spansSize + arenaSize + _PageSize
-		for i := 0; i <= 0x7f; i++ {
+		for i := 0x7f; i >= 0; i-- {
+			var p uintptr
 			switch {
 			case GOARCH == "arm64" && GOOS == "darwin":
 				p = uintptr(i)<<40 | uintptrMask&(0x0013<<28)
@@ -312,225 +447,283 @@ func mallocinit() {
 				} else {
 					p = uintptr(i)<<42 | uintptrMask&(0x70<<52)
 				}
+			case raceenabled:
+				// The TSAN runtime requires the heap
+				// to be in the range [0x00c000000000,
+				// 0x00e000000000).
+				p = uintptr(i)<<32 | uintptrMask&(0x00c0<<32)
+				if p >= uintptrMask&0x00e000000000 {
+					continue
+				}
 			default:
 				p = uintptr(i)<<40 | uintptrMask&(0x00c0<<32)
 			}
-			p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved))
-			if p != 0 {
-				break
-			}
+			hint := (*arenaHint)(mheap_.arenaHintAlloc.alloc())
+			hint.addr = p
+			hint.next, mheap_.arenaHints = mheap_.arenaHints, hint
 		}
-	}
+	} else {
+		// On a 32-bit machine, we're much more concerned
+		// about keeping the usable heap contiguous.
+		// Hence:
+		//
+		// 1. We reserve space for all heapArenas up front so
+		// they don't get interleaved with the heap. They're
+		// ~258MB, so this isn't too bad. (We could reserve a
+		// smaller amount of space up front if this is a
+		// problem.)
+		//
+		// 2. We hint the heap to start right above the end of
+		// the binary so we have the best chance of keeping it
+		// contiguous.
+		//
+		// 3. We try to stake out a reasonably large initial
+		// heap reservation.
 
-	if p == 0 {
-		// On a 32-bit machine, we can't typically get away
-		// with a giant virtual address space reservation.
-		// Instead we map the memory information bitmap
-		// immediately after the data segment, large enough
-		// to handle the entire 4GB address space (256 MB),
-		// along with a reservation for an initial arena.
-		// When that gets used up, we'll start asking the kernel
-		// for any memory anywhere.
+		const arenaMetaSize = unsafe.Sizeof([1 << arenaBits]heapArena{})
+		meta := uintptr(sysReserve(nil, arenaMetaSize))
+		if meta != 0 {
+			mheap_.heapArenaAlloc.init(meta, arenaMetaSize)
+		}
 
 		// We want to start the arena low, but if we're linked
 		// against C code, it's possible global constructors
 		// have called malloc and adjusted the process' brk.
 		// Query the brk so we can avoid trying to map the
-		// arena over it (which will cause the kernel to put
-		// the arena somewhere else, likely at a high
+		// region over it (which will cause the kernel to put
+		// the region somewhere else, likely at a high
 		// address).
 		procBrk := sbrk0()
 
-		// If we fail to allocate, try again with a smaller arena.
-		// This is necessary on Android L where we share a process
-		// with ART, which reserves virtual memory aggressively.
-		// In the worst case, fall back to a 0-sized initial arena,
-		// in the hope that subsequent reservations will succeed.
+		// If we ask for the end of the data segment but the
+		// operating system requires a little more space
+		// before we can start allocating, it will give out a
+		// slightly higher pointer. Except QEMU, which is
+		// buggy, as usual: it won't adjust the pointer
+		// upward. So adjust it upward a little bit ourselves:
+		// 1/4 MB to get away from the running binary image.
+		p := getEnd()
+		if p < procBrk {
+			p = procBrk
+		}
+		if mheap_.heapArenaAlloc.next <= p && p < mheap_.heapArenaAlloc.end {
+			p = mheap_.heapArenaAlloc.end
+		}
+		p = round(p+(256<<10), heapArenaBytes)
+		// Because we're worried about fragmentation on
+		// 32-bit, we try to make a large initial reservation.
 		arenaSizes := [...]uintptr{
 			512 << 20,
 			256 << 20,
 			128 << 20,
-			0,
 		}
-
 		for _, arenaSize := range &arenaSizes {
-			// SysReserve treats the address we ask for, end, as a hint,
-			// not as an absolute requirement. If we ask for the end
-			// of the data segment but the operating system requires
-			// a little more space before we can start allocating, it will
-			// give out a slightly higher pointer. Except QEMU, which
-			// is buggy, as usual: it won't adjust the pointer upward.
-			// So adjust it upward a little bit ourselves: 1/4 MB to get
-			// away from the running binary image and then round up
-			// to a MB boundary.
-			p = round(getEnd()+(1<<18), 1<<20)
-			pSize = bitmapSize + spansSize + arenaSize + _PageSize
-			if p <= procBrk && procBrk < p+pSize {
-				// Move the start above the brk,
-				// leaving some room for future brk
-				// expansion.
-				p = round(procBrk+(1<<20), 1<<20)
-			}
-			p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved))
-			if p != 0 {
+			a, size := sysReserveAligned(unsafe.Pointer(p), arenaSize, heapArenaBytes)
+			if a != nil {
+				mheap_.arena.init(uintptr(a), size)
+				p = uintptr(a) + size // For hint below
 				break
 			}
 		}
-		if p == 0 {
-			throw("runtime: cannot reserve arena virtual address space")
-		}
-	}
-
-	// PageSize can be larger than OS definition of page size,
-	// so SysReserve can give us a PageSize-unaligned pointer.
-	// To overcome this we ask for PageSize more and round up the pointer.
-	p1 := round(p, _PageSize)
-	pSize -= p1 - p
-
-	spansStart := p1
-	p1 += spansSize
-	mheap_.bitmap = p1 + bitmapSize
-	p1 += bitmapSize
-	if sys.PtrSize == 4 {
-		// Set arena_start such that we can accept memory
-		// reservations located anywhere in the 4GB virtual space.
-		mheap_.arena_start = 0
-	} else {
-		mheap_.arena_start = p1
+		hint := (*arenaHint)(mheap_.arenaHintAlloc.alloc())
+		hint.addr = p
+		hint.next, mheap_.arenaHints = mheap_.arenaHints, hint
 	}
-	mheap_.arena_end = p + pSize
-	mheap_.arena_used = p1
-	mheap_.arena_alloc = p1
-	mheap_.arena_reserved = reserved
-
-	if mheap_.arena_start&(_PageSize-1) != 0 {
-		println("bad pagesize", hex(p), hex(p1), hex(spansSize), hex(bitmapSize), hex(_PageSize), "start", hex(mheap_.arena_start))
-		throw("misrounded allocation in mallocinit")
-	}
-
-	// Initialize the rest of the allocator.
-	mheap_.init(spansStart, spansSize)
-	_g_ := getg()
-	_g_.m.mcache = allocmcache()
 }
 
-// sysAlloc allocates the next n bytes from the heap arena. The
-// returned pointer is always _PageSize aligned and between
-// h.arena_start and h.arena_end. sysAlloc returns nil on failure.
+// sysAlloc allocates heap arena space for at least n bytes. The
+// returned pointer is always heapArenaBytes-aligned and backed by
+// h.arenas metadata. The returned size is always a multiple of
+// heapArenaBytes. sysAlloc returns nil on failure.
 // There is no corresponding free function.
-func (h *mheap) sysAlloc(n uintptr) unsafe.Pointer {
-	// strandLimit is the maximum number of bytes to strand from
-	// the current arena block. If we would need to strand more
-	// than this, we fall back to sysAlloc'ing just enough for
-	// this allocation.
-	const strandLimit = 16 << 20
-
-	if n > h.arena_end-h.arena_alloc {
-		// If we haven't grown the arena to _MaxMem yet, try
-		// to reserve some more address space.
-		p_size := round(n+_PageSize, 256<<20)
-		new_end := h.arena_end + p_size // Careful: can overflow
-		if h.arena_end <= new_end && new_end-h.arena_start-1 <= _MaxMem {
-			// TODO: It would be bad if part of the arena
-			// is reserved and part is not.
-			var reserved bool
-			p := uintptr(sysReserve(unsafe.Pointer(h.arena_end), p_size, &reserved))
-			if p == 0 {
-				// TODO: Try smaller reservation
-				// growths in case we're in a crowded
-				// 32-bit address space.
-				goto reservationFailed
-			}
-			// p can be just about anywhere in the address
-			// space, including before arena_end.
-			if p == h.arena_end {
-				// The new block is contiguous with
-				// the current block. Extend the
-				// current arena block.
-				h.arena_end = new_end
-				h.arena_reserved = reserved
-			} else if h.arena_start <= p && p+p_size-h.arena_start-1 <= _MaxMem && h.arena_end-h.arena_alloc < strandLimit {
-				// We were able to reserve more memory
-				// within the arena space, but it's
-				// not contiguous with our previous
-				// reservation. It could be before or
-				// after our current arena_used.
-				//
-				// Keep everything page-aligned.
-				// Our pages are bigger than hardware pages.
-				h.arena_end = p + p_size
-				p = round(p, _PageSize)
-				h.arena_alloc = p
-				h.arena_reserved = reserved
-			} else {
-				// We got a mapping, but either
-				//
-				// 1) It's not in the arena, so we
-				// can't use it. (This should never
-				// happen on 32-bit.)
-				//
-				// 2) We would need to discard too
-				// much of our current arena block to
-				// use it.
-				//
-				// We haven't added this allocation to
-				// the stats, so subtract it from a
-				// fake stat (but avoid underflow).
-				//
-				// We'll fall back to a small sysAlloc.
-				stat := uint64(p_size)
-				sysFree(unsafe.Pointer(p), p_size, &stat)
+//
+// h must be locked.
+func (h *mheap) sysAlloc(n uintptr) (v unsafe.Pointer, size uintptr) {
+	n = round(n, heapArenaBytes)
+
+	// First, try the arena pre-reservation.
+	v = h.arena.alloc(n, heapArenaBytes, &memstats.heap_sys)
+	if v != nil {
+		size = n
+		goto mapped
+	}
+
+	// Try to grow the heap at a hint address.
+	for h.arenaHints != nil {
+		hint := h.arenaHints
+		p := hint.addr
+		if hint.down {
+			p -= n
+		}
+		if p+n < p {
+			// We can't use this, so don't ask.
+			v = nil
+		} else if arenaIndex(p+n-1) >= 1<<arenaBits {
+			// Outside addressable heap. Can't use.
+			v = nil
+		} else {
+			v = sysReserve(unsafe.Pointer(p), n)
+		}
+		if p == uintptr(v) {
+			// Success. Update the hint.
+			if !hint.down {
+				p += n
 			}
+			hint.addr = p
+			size = n
+			break
 		}
+		// Failed. Discard this hint and try the next.
+		//
+		// TODO: This would be cleaner if sysReserve could be
+		// told to only return the requested address. In
+		// particular, this is already how Windows behaves, so
+		// it would simply things there.
+		if v != nil {
+			sysFree(v, n, nil)
+		}
+		h.arenaHints = hint.next
+		h.arenaHintAlloc.free(unsafe.Pointer(hint))
 	}
 
-	if n <= h.arena_end-h.arena_alloc {
-		// Keep taking from our reservation.
-		p := h.arena_alloc
-		sysMap(unsafe.Pointer(p), n, h.arena_reserved, &memstats.heap_sys)
-		h.arena_alloc += n
-		if h.arena_alloc > h.arena_used {
-			h.setArenaUsed(h.arena_alloc, true)
+	if size == 0 {
+		if raceenabled {
+			// The race detector assumes the heap lives in
+			// [0x00c000000000, 0x00e000000000), but we
+			// just ran out of hints in this region. Give
+			// a nice failure.
+			throw("too many address space collisions for -race mode")
 		}
 
-		if p&(_PageSize-1) != 0 {
-			throw("misrounded allocation in MHeap_SysAlloc")
+		// All of the hints failed, so we'll take any
+		// (sufficiently aligned) address the kernel will give
+		// us.
+		v, size = sysReserveAligned(nil, n, heapArenaBytes)
+		if v == nil {
+			return nil, 0
 		}
-		return unsafe.Pointer(p)
+
+		// Create new hints for extending this region.
+		hint := (*arenaHint)(h.arenaHintAlloc.alloc())
+		hint.addr, hint.down = uintptr(v), true
+		hint.next, mheap_.arenaHints = mheap_.arenaHints, hint
+		hint = (*arenaHint)(h.arenaHintAlloc.alloc())
+		hint.addr = uintptr(v) + size
+		hint.next, mheap_.arenaHints = mheap_.arenaHints, hint
 	}
 
-reservationFailed:
-	// If using 64-bit, our reservation is all we have.
-	if sys.PtrSize != 4 {
-		return nil
+	// Check for bad pointers or pointers we can't use.
+	{
+		var bad string
+		p := uintptr(v)
+		if p+size < p {
+			bad = "region exceeds uintptr range"
+		} else if arenaIndex(p) >= 1<<arenaBits {
+			bad = "base outside usable address space"
+		} else if arenaIndex(p+size-1) >= 1<<arenaBits {
+			bad = "end outside usable address space"
+		}
+		if bad != "" {
+			// This should be impossible on most architectures,
+			// but it would be really confusing to debug.
+			print("runtime: memory allocated by OS [", hex(p), ", ", hex(p+size), ") not in usable address space: ", bad, "\n")
+			throw("memory reservation exceeds address space limit")
+		}
 	}
 
-	// On 32-bit, once the reservation is gone we can
-	// try to get memory at a location chosen by the OS.
-	p_size := round(n, _PageSize) + _PageSize
-	p := uintptr(sysAlloc(p_size, &memstats.heap_sys))
-	if p == 0 {
-		return nil
+	if uintptr(v)&(heapArenaBytes-1) != 0 {
+		throw("misrounded allocation in sysAlloc")
 	}
 
-	if p < h.arena_start || p+p_size-h.arena_start > _MaxMem {
-		// This shouldn't be possible because _MaxMem is the
-		// whole address space on 32-bit.
-		top := uint64(h.arena_start) + _MaxMem
-		print("runtime: memory allocated by OS (", hex(p), ") not in usable range [", hex(h.arena_start), ",", hex(top), ")\n")
-		sysFree(unsafe.Pointer(p), p_size, &memstats.heap_sys)
-		return nil
+	// Back the reservation.
+	sysMap(v, size, &memstats.heap_sys)
+
+mapped:
+	// Create arena metadata.
+	for ri := arenaIndex(uintptr(v)); ri <= arenaIndex(uintptr(v)+size-1); ri++ {
+		l2 := h.arenas[ri.l1()]
+		if l2 == nil {
+			// Allocate an L2 arena map.
+			l2 = (*[1 << arenaL2Bits]*heapArena)(persistentalloc(unsafe.Sizeof(*l2), sys.PtrSize, nil))
+			if l2 == nil {
+				throw("out of memory allocating heap arena map")
+			}
+			atomic.StorepNoWB(unsafe.Pointer(&h.arenas[ri.l1()]), unsafe.Pointer(l2))
+		}
+
+		if l2[ri.l2()] != nil {
+			throw("arena already initialized")
+		}
+		var r *heapArena
+		r = (*heapArena)(h.heapArenaAlloc.alloc(unsafe.Sizeof(*r), sys.PtrSize, &memstats.gc_sys))
+		if r == nil {
+			r = (*heapArena)(persistentalloc(unsafe.Sizeof(*r), sys.PtrSize, &memstats.gc_sys))
+			if r == nil {
+				throw("out of memory allocating heap arena metadata")
+			}
+		}
+
+		// Store atomically just in case an object from the
+		// new heap arena becomes visible before the heap lock
+		// is released (which shouldn't happen, but there's
+		// little downside to this).
+		atomic.StorepNoWB(unsafe.Pointer(&l2[ri.l2()]), unsafe.Pointer(r))
 	}
 
-	p += -p & (_PageSize - 1)
-	if p+n > h.arena_used {
-		h.setArenaUsed(p+n, true)
+	// Tell the race detector about the new heap memory.
+	if raceenabled {
+		racemapshadow(v, size)
 	}
 
-	if p&(_PageSize-1) != 0 {
-		throw("misrounded allocation in MHeap_SysAlloc")
+	return
+}
+
+// sysReserveAligned is like sysReserve, but the returned pointer is
+// aligned to align bytes. It may reserve either n or n+align bytes,
+// so it returns the size that was reserved.
+func sysReserveAligned(v unsafe.Pointer, size, align uintptr) (unsafe.Pointer, uintptr) {
+	// Since the alignment is rather large in uses of this
+	// function, we're not likely to get it by chance, so we ask
+	// for a larger region and remove the parts we don't need.
+	retries := 0
+retry:
+	p := uintptr(sysReserve(v, size+align))
+	switch {
+	case p == 0:
+		return nil, 0
+	case p&(align-1) == 0:
+		// We got lucky and got an aligned region, so we can
+		// use the whole thing.
+		return unsafe.Pointer(p), size + align
+	case GOOS == "windows":
+		// On Windows we can't release pieces of a
+		// reservation, so we release the whole thing and
+		// re-reserve the aligned sub-region. This may race,
+		// so we may have to try again.
+		sysFree(unsafe.Pointer(p), size+align, nil)
+		p = round(p, align)
+		p2 := sysReserve(unsafe.Pointer(p), size)
+		if p != uintptr(p2) {
+			// Must have raced. Try again.
+			sysFree(p2, size, nil)
+			if retries++; retries == 100 {
+				throw("failed to allocate aligned heap memory; too many retries")
+			}
+			goto retry
+		}
+		// Success.
+		return p2, size
+	default:
+		// Trim off the unaligned parts.
+		pAligned := round(p, align)
+		sysFree(unsafe.Pointer(p), pAligned-p, nil)
+		end := pAligned + size
+		endLen := (p + size + align) - end
+		if endLen > 0 {
+			sysFree(unsafe.Pointer(end), endLen, nil)
+		}
+		return unsafe.Pointer(pAligned), size
 	}
-	return unsafe.Pointer(p)
 }
 
 // base address for all 0-byte allocations
@@ -862,7 +1055,7 @@ func largeAlloc(size uintptr, needzero bool, noscan bool) *mspan {
 		throw("out of memory")
 	}
 	s.limit = s.base() + size
-	heapBitsForSpan(s.base()).initSpan(s)
+	heapBitsForAddr(s.base()).initSpan(s)
 	return s
 }
 
@@ -875,7 +1068,7 @@ func newobject(typ *_type) unsafe.Pointer {
 
 //go:linkname reflect_unsafe_New reflect.unsafe_New
 func reflect_unsafe_New(typ *_type) unsafe.Pointer {
-	return newobject(typ)
+	return mallocgc(typ.size, typ, true)
 }
 
 // newarray allocates an array of n elements of type typ.
@@ -1046,6 +1239,34 @@ func persistentalloc1(size, align uintptr, sysStat *uint64) *notInHeap {
 	return p
 }
 
+// linearAlloc is a simple linear allocator that pre-reserves a region
+// of memory and then maps that region as needed. The caller is
+// responsible for locking.
+type linearAlloc struct {
+	next   uintptr // next free byte
+	mapped uintptr // one byte past end of mapped space
+	end    uintptr // end of reserved space
+}
+
+func (l *linearAlloc) init(base, size uintptr) {
+	l.next, l.mapped = base, base
+	l.end = base + size
+}
+
+func (l *linearAlloc) alloc(size, align uintptr, sysStat *uint64) unsafe.Pointer {
+	p := round(l.next, align)
+	if p+size > l.end {
+		return nil
+	}
+	l.next = p + size
+	if pEnd := round(l.next-1, physPageSize); pEnd > l.mapped {
+		// We need to map more of the reserved space.
+		sysMap(unsafe.Pointer(l.mapped), pEnd-l.mapped, sysStat)
+		l.mapped = pEnd
+	}
+	return unsafe.Pointer(p)
+}
+
 // notInHeap is off-heap memory allocated by a lower-level allocator
 // like sysAlloc or persistentAlloc.
 //
diff --git a/libgo/go/runtime/malloc_test.go b/libgo/go/runtime/malloc_test.go
index ab580f8..30a7d84 100644
--- a/libgo/go/runtime/malloc_test.go
+++ b/libgo/go/runtime/malloc_test.go
@@ -7,16 +7,25 @@ package runtime_test
 import (
 	"flag"
 	"fmt"
+	"internal/race"
+	"internal/testenv"
+	"os"
+	"os/exec"
 	"reflect"
 	. "runtime"
+	"strings"
 	"testing"
 	"time"
 	"unsafe"
 )
 
+var testMemStatsCount int
+
 func TestMemStats(t *testing.T) {
 	t.Skip("skipping test with gccgo")
 
+	testMemStatsCount++
+
 	// Make sure there's at least one forced GC.
 	GC()
 
@@ -32,6 +41,13 @@ func TestMemStats(t *testing.T) {
 	}
 	le := func(thresh float64) func(interface{}) error {
 		return func(x interface{}) error {
+			// These sanity tests aren't necessarily valid
+			// with high -test.count values, so only run
+			// them once.
+			if testMemStatsCount > 1 {
+				return nil
+			}
+
 			if reflect.ValueOf(x).Convert(reflect.TypeOf(thresh)).Float() < thresh {
 				return nil
 			}
@@ -50,7 +66,7 @@ func TestMemStats(t *testing.T) {
 	// PauseTotalNs can be 0 if timer resolution is poor.
 	fields := map[string][]func(interface{}) error{
 		"Alloc": {nz, le(1e10)}, "TotalAlloc": {nz, le(1e11)}, "Sys": {nz, le(1e10)},
-		"Lookups": {nz, le(1e10)}, "Mallocs": {nz, le(1e10)}, "Frees": {nz, le(1e10)},
+		"Lookups": {eq(uint64(0))}, "Mallocs": {nz, le(1e10)}, "Frees": {nz, le(1e10)},
 		"HeapAlloc": {nz, le(1e10)}, "HeapSys": {nz, le(1e10)}, "HeapIdle": {le(1e10)},
 		"HeapInuse": {nz, le(1e10)}, "HeapReleased": {le(1e10)}, "HeapObjects": {nz, le(1e10)},
 		"StackInuse": {nz, le(1e10)}, "StackSys": {nz, le(1e10)},
@@ -154,6 +170,64 @@ func TestTinyAlloc(t *testing.T) {
 	}
 }
 
+type acLink struct {
+	x [1 << 20]byte
+}
+
+var arenaCollisionSink []*acLink
+
+func TestArenaCollision(t *testing.T) {
+	testenv.MustHaveExec(t)
+
+	// Test that mheap.sysAlloc handles collisions with other
+	// memory mappings.
+	if os.Getenv("TEST_ARENA_COLLISION") != "1" {
+		cmd := testenv.CleanCmdEnv(exec.Command(os.Args[0], "-test.run=TestArenaCollision", "-test.v"))
+		cmd.Env = append(cmd.Env, "TEST_ARENA_COLLISION=1")
+		out, err := cmd.CombinedOutput()
+		if race.Enabled {
+			// This test runs the runtime out of hint
+			// addresses, so it will start mapping the
+			// heap wherever it can. The race detector
+			// doesn't support this, so look for the
+			// expected failure.
+			if want := "too many address space collisions"; !strings.Contains(string(out), want) {
+				t.Fatalf("want %q, got:\n%s", want, string(out))
+			}
+		} else if !strings.Contains(string(out), "PASS\n") || err != nil {
+			t.Fatalf("%s\n(exit status %v)", string(out), err)
+		}
+		return
+	}
+	disallowed := [][2]uintptr{}
+	// Drop all but the next 3 hints. 64-bit has a lot of hints,
+	// so it would take a lot of memory to go through all of them.
+	KeepNArenaHints(3)
+	// Consume these 3 hints and force the runtime to find some
+	// fallback hints.
+	for i := 0; i < 5; i++ {
+		// Reserve memory at the next hint so it can't be used
+		// for the heap.
+		start, end := MapNextArenaHint()
+		disallowed = append(disallowed, [2]uintptr{start, end})
+		// Allocate until the runtime tries to use the hint we
+		// just mapped over.
+		hint := GetNextArenaHint()
+		for GetNextArenaHint() == hint {
+			ac := new(acLink)
+			arenaCollisionSink = append(arenaCollisionSink, ac)
+			// The allocation must not have fallen into
+			// one of the reserved regions.
+			p := uintptr(unsafe.Pointer(ac))
+			for _, d := range disallowed {
+				if d[0] <= p && p < d[1] {
+					t.Fatalf("allocation %#x in reserved region [%#x, %#x)", p, d[0], d[1])
+				}
+			}
+		}
+	}
+}
+
 var mallocSink uintptr
 
 func BenchmarkMalloc8(b *testing.B) {
diff --git a/libgo/go/runtime/map.go b/libgo/go/runtime/map.go
new file mode 100644
index 0000000..8e97bc5
--- /dev/null
+++ b/libgo/go/runtime/map.go
@@ -0,0 +1,1363 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+// This file contains the implementation of Go's map type.
+//
+// A map is just a hash table. The data is arranged
+// into an array of buckets. Each bucket contains up to
+// 8 key/value pairs. The low-order bits of the hash are
+// used to select a bucket. Each bucket contains a few
+// high-order bits of each hash to distinguish the entries
+// within a single bucket.
+//
+// If more than 8 keys hash to a bucket, we chain on
+// extra buckets.
+//
+// When the hashtable grows, we allocate a new array
+// of buckets twice as big. Buckets are incrementally
+// copied from the old bucket array to the new bucket array.
+//
+// Map iterators walk through the array of buckets and
+// return the keys in walk order (bucket #, then overflow
+// chain order, then bucket index).  To maintain iteration
+// semantics, we never move keys within their bucket (if
+// we did, keys might be returned 0 or 2 times).  When
+// growing the table, iterators remain iterating through the
+// old table and must check the new table if the bucket
+// they are iterating through has been moved ("evacuated")
+// to the new table.
+
+// Picking loadFactor: too large and we have lots of overflow
+// buckets, too small and we waste a lot of space. I wrote
+// a simple program to check some stats for different loads:
+// (64-bit, 8 byte keys and values)
+//  loadFactor    %overflow  bytes/entry     hitprobe    missprobe
+//        4.00         2.13        20.77         3.00         4.00
+//        4.50         4.05        17.30         3.25         4.50
+//        5.00         6.85        14.77         3.50         5.00
+//        5.50        10.55        12.94         3.75         5.50
+//        6.00        15.27        11.67         4.00         6.00
+//        6.50        20.90        10.79         4.25         6.50
+//        7.00        27.14        10.15         4.50         7.00
+//        7.50        34.03         9.73         4.75         7.50
+//        8.00        41.10         9.40         5.00         8.00
+//
+// %overflow   = percentage of buckets which have an overflow bucket
+// bytes/entry = overhead bytes used per key/value pair
+// hitprobe    = # of entries to check when looking up a present key
+// missprobe   = # of entries to check when looking up an absent key
+//
+// Keep in mind this data is for maximally loaded tables, i.e. just
+// before the table grows. Typical tables will be somewhat less loaded.
+
+import (
+	"runtime/internal/atomic"
+	"runtime/internal/sys"
+	"unsafe"
+)
+
+// For gccgo, use go:linkname to rename compiler-called functions to
+// themselves, so that the compiler will export them.
+//
+//go:linkname makemap runtime.makemap
+//go:linkname makemap64 runtime.makemap64
+//go:linkname makemap_small runtime.makemap_small
+//go:linkname mapaccess1 runtime.mapaccess1
+//go:linkname mapaccess2 runtime.mapaccess2
+//go:linkname mapaccess1_fat runtime.mapaccess1_fat
+//go:linkname mapaccess2_fat runtime.mapaccess2_fat
+//go:linkname mapassign runtime.mapassign
+//go:linkname mapdelete runtime.mapdelete
+//go:linkname mapiterinit runtime.mapiterinit
+//go:linkname mapiternext runtime.mapiternext
+
+const (
+	// Maximum number of key/value pairs a bucket can hold.
+	bucketCntBits = 3
+	bucketCnt     = 1 << bucketCntBits
+
+	// Maximum average load of a bucket that triggers growth is 6.5.
+	// Represent as loadFactorNum/loadFactDen, to allow integer math.
+	loadFactorNum = 13
+	loadFactorDen = 2
+
+	// Maximum key or value size to keep inline (instead of mallocing per element).
+	// Must fit in a uint8.
+	// Fast versions cannot handle big values - the cutoff size for
+	// fast versions in cmd/compile/internal/gc/walk.go must be at most this value.
+	maxKeySize   = 128
+	maxValueSize = 128
+
+	// data offset should be the size of the bmap struct, but needs to be
+	// aligned correctly. For amd64p32 this means 64-bit alignment
+	// even though pointers are 32 bit.
+	dataOffset = unsafe.Offsetof(struct {
+		b bmap
+		v int64
+	}{}.v)
+
+	// Possible tophash values. We reserve a few possibilities for special marks.
+	// Each bucket (including its overflow buckets, if any) will have either all or none of its
+	// entries in the evacuated* states (except during the evacuate() method, which only happens
+	// during map writes and thus no one else can observe the map during that time).
+	empty          = 0 // cell is empty
+	evacuatedEmpty = 1 // cell is empty, bucket is evacuated.
+	evacuatedX     = 2 // key/value is valid.  Entry has been evacuated to first half of larger table.
+	evacuatedY     = 3 // same as above, but evacuated to second half of larger table.
+	minTopHash     = 4 // minimum tophash for a normal filled cell.
+
+	// flags
+	iterator     = 1 // there may be an iterator using buckets
+	oldIterator  = 2 // there may be an iterator using oldbuckets
+	hashWriting  = 4 // a goroutine is writing to the map
+	sameSizeGrow = 8 // the current map growth is to a new map of the same size
+
+	// sentinel bucket ID for iterator checks
+	noCheck = 1<<(8*sys.PtrSize) - 1
+)
+
+// A header for a Go map.
+type hmap struct {
+	// Note: the format of the hmap is also encoded in cmd/compile/internal/gc/reflect.go.
+	// Make sure this stays in sync with the compiler's definition.
+	count     int // # live cells == size of map.  Must be first (used by len() builtin)
+	flags     uint8
+	B         uint8  // log_2 of # of buckets (can hold up to loadFactor * 2^B items)
+	noverflow uint16 // approximate number of overflow buckets; see incrnoverflow for details
+	hash0     uint32 // hash seed
+
+	buckets    unsafe.Pointer // array of 2^B Buckets. may be nil if count==0.
+	oldbuckets unsafe.Pointer // previous bucket array of half the size, non-nil only when growing
+	nevacuate  uintptr        // progress counter for evacuation (buckets less than this have been evacuated)
+
+	extra *mapextra // optional fields
+}
+
+// mapextra holds fields that are not present on all maps.
+type mapextra struct {
+	// If both key and value do not contain pointers and are inline, then we mark bucket
+	// type as containing no pointers. This avoids scanning such maps.
+	// However, bmap.overflow is a pointer. In order to keep overflow buckets
+	// alive, we store pointers to all overflow buckets in hmap.extra.overflow and hmap.extra.oldoverflow.
+	// overflow and oldoverflow are only used if key and value do not contain pointers.
+	// overflow contains overflow buckets for hmap.buckets.
+	// oldoverflow contains overflow buckets for hmap.oldbuckets.
+	// The indirection allows to store a pointer to the slice in hiter.
+	overflow    *[]*bmap
+	oldoverflow *[]*bmap
+
+	// nextOverflow holds a pointer to a free overflow bucket.
+	nextOverflow *bmap
+}
+
+// A bucket for a Go map.
+type bmap struct {
+	// tophash generally contains the top byte of the hash value
+	// for each key in this bucket. If tophash[0] < minTopHash,
+	// tophash[0] is a bucket evacuation state instead.
+	tophash [bucketCnt]uint8
+	// Followed by bucketCnt keys and then bucketCnt values.
+	// NOTE: packing all the keys together and then all the values together makes the
+	// code a bit more complicated than alternating key/value/key/value/... but it allows
+	// us to eliminate padding which would be needed for, e.g., map[int64]int8.
+	// Followed by an overflow pointer.
+}
+
+// A hash iteration structure.
+// If you modify hiter, also change cmd/compile/internal/gc/reflect.go to indicate
+// the layout of this structure.
+type hiter struct {
+	key         unsafe.Pointer // Must be in first position.  Write nil to indicate iteration end (see cmd/internal/gc/range.go).
+	value       unsafe.Pointer // Must be in second position (see cmd/internal/gc/range.go).
+	t           *maptype
+	h           *hmap
+	buckets     unsafe.Pointer // bucket ptr at hash_iter initialization time
+	bptr        *bmap          // current bucket
+	overflow    *[]*bmap       // keeps overflow buckets of hmap.buckets alive
+	oldoverflow *[]*bmap       // keeps overflow buckets of hmap.oldbuckets alive
+	startBucket uintptr        // bucket iteration started at
+	offset      uint8          // intra-bucket offset to start from during iteration (should be big enough to hold bucketCnt-1)
+	wrapped     bool           // already wrapped around from end of bucket array to beginning
+	B           uint8
+	i           uint8
+	bucket      uintptr
+	checkBucket uintptr
+}
+
+// bucketShift returns 1<<b, optimized for code generation.
+func bucketShift(b uint8) uintptr {
+	if sys.GoarchAmd64|sys.GoarchAmd64p32|sys.Goarch386 != 0 {
+		b &= sys.PtrSize*8 - 1 // help x86 archs remove shift overflow checks
+	}
+	return uintptr(1) << b
+}
+
+// bucketMask returns 1<<b - 1, optimized for code generation.
+func bucketMask(b uint8) uintptr {
+	return bucketShift(b) - 1
+}
+
+// tophash calculates the tophash value for hash.
+func tophash(hash uintptr) uint8 {
+	top := uint8(hash >> (sys.PtrSize*8 - 8))
+	if top < minTopHash {
+		top += minTopHash
+	}
+	return top
+}
+
+func evacuated(b *bmap) bool {
+	h := b.tophash[0]
+	return h > empty && h < minTopHash
+}
+
+func (b *bmap) overflow(t *maptype) *bmap {
+	return *(**bmap)(add(unsafe.Pointer(b), uintptr(t.bucketsize)-sys.PtrSize))
+}
+
+func (b *bmap) setoverflow(t *maptype, ovf *bmap) {
+	*(**bmap)(add(unsafe.Pointer(b), uintptr(t.bucketsize)-sys.PtrSize)) = ovf
+}
+
+func (b *bmap) keys() unsafe.Pointer {
+	return add(unsafe.Pointer(b), dataOffset)
+}
+
+// incrnoverflow increments h.noverflow.
+// noverflow counts the number of overflow buckets.
+// This is used to trigger same-size map growth.
+// See also tooManyOverflowBuckets.
+// To keep hmap small, noverflow is a uint16.
+// When there are few buckets, noverflow is an exact count.
+// When there are many buckets, noverflow is an approximate count.
+func (h *hmap) incrnoverflow() {
+	// We trigger same-size map growth if there are
+	// as many overflow buckets as buckets.
+	// We need to be able to count to 1<<h.B.
+	if h.B < 16 {
+		h.noverflow++
+		return
+	}
+	// Increment with probability 1/(1<<(h.B-15)).
+	// When we reach 1<<15 - 1, we will have approximately
+	// as many overflow buckets as buckets.
+	mask := uint32(1)<<(h.B-15) - 1
+	// Example: if h.B == 18, then mask == 7,
+	// and fastrand & 7 == 0 with probability 1/8.
+	if fastrand()&mask == 0 {
+		h.noverflow++
+	}
+}
+
+func (h *hmap) newoverflow(t *maptype, b *bmap) *bmap {
+	var ovf *bmap
+	if h.extra != nil && h.extra.nextOverflow != nil {
+		// We have preallocated overflow buckets available.
+		// See makeBucketArray for more details.
+		ovf = h.extra.nextOverflow
+		if ovf.overflow(t) == nil {
+			// We're not at the end of the preallocated overflow buckets. Bump the pointer.
+			h.extra.nextOverflow = (*bmap)(add(unsafe.Pointer(ovf), uintptr(t.bucketsize)))
+		} else {
+			// This is the last preallocated overflow bucket.
+			// Reset the overflow pointer on this bucket,
+			// which was set to a non-nil sentinel value.
+			ovf.setoverflow(t, nil)
+			h.extra.nextOverflow = nil
+		}
+	} else {
+		ovf = (*bmap)(newobject(t.bucket))
+	}
+	h.incrnoverflow()
+	if t.bucket.kind&kindNoPointers != 0 {
+		h.createOverflow()
+		*h.extra.overflow = append(*h.extra.overflow, ovf)
+	}
+	b.setoverflow(t, ovf)
+	return ovf
+}
+
+func (h *hmap) createOverflow() {
+	if h.extra == nil {
+		h.extra = new(mapextra)
+	}
+	if h.extra.overflow == nil {
+		h.extra.overflow = new([]*bmap)
+	}
+}
+
+func makemap64(t *maptype, hint int64, h *hmap) *hmap {
+	if int64(int(hint)) != hint {
+		hint = 0
+	}
+	return makemap(t, int(hint), h)
+}
+
+// makehmap_small implements Go map creation for make(map[k]v) and
+// make(map[k]v, hint) when hint is known to be at most bucketCnt
+// at compile time and the map needs to be allocated on the heap.
+func makemap_small() *hmap {
+	h := new(hmap)
+	h.hash0 = fastrand()
+	return h
+}
+
+// makemap implements Go map creation for make(map[k]v, hint).
+// If the compiler has determined that the map or the first bucket
+// can be created on the stack, h and/or bucket may be non-nil.
+// If h != nil, the map can be created directly in h.
+// If h.buckets != nil, bucket pointed to can be used as the first bucket.
+func makemap(t *maptype, hint int, h *hmap) *hmap {
+	if hint < 0 || hint > int(maxSliceCap(t.bucket.size)) {
+		hint = 0
+	}
+
+	// initialize Hmap
+	if h == nil {
+		h = new(hmap)
+	}
+	h.hash0 = fastrand()
+
+	// find size parameter which will hold the requested # of elements
+	B := uint8(0)
+	for overLoadFactor(hint, B) {
+		B++
+	}
+	h.B = B
+
+	// allocate initial hash table
+	// if B == 0, the buckets field is allocated lazily later (in mapassign)
+	// If hint is large zeroing this memory could take a while.
+	if h.B != 0 {
+		var nextOverflow *bmap
+		h.buckets, nextOverflow = makeBucketArray(t, h.B, nil)
+		if nextOverflow != nil {
+			h.extra = new(mapextra)
+			h.extra.nextOverflow = nextOverflow
+		}
+	}
+
+	return h
+}
+
+// makeBucketArray initializes a backing array for map buckets.
+// 1<<b is the minimum number of buckets to allocate.
+// dirtyalloc should either be nil or a bucket array previously
+// allocated by makeBucketArray with the same t and b parameters.
+// If dirtyalloc is nil a new backing array will be alloced and
+// otherwise dirtyalloc will be cleared and reused as backing array.
+func makeBucketArray(t *maptype, b uint8, dirtyalloc unsafe.Pointer) (buckets unsafe.Pointer, nextOverflow *bmap) {
+	base := bucketShift(b)
+	nbuckets := base
+	// For small b, overflow buckets are unlikely.
+	// Avoid the overhead of the calculation.
+	if b >= 4 {
+		// Add on the estimated number of overflow buckets
+		// required to insert the median number of elements
+		// used with this value of b.
+		nbuckets += bucketShift(b - 4)
+		sz := t.bucket.size * nbuckets
+		up := roundupsize(sz)
+		if up != sz {
+			nbuckets = up / t.bucket.size
+		}
+	}
+
+	if dirtyalloc == nil {
+		buckets = newarray(t.bucket, int(nbuckets))
+	} else {
+		// dirtyalloc was previously generated by
+		// the above newarray(t.bucket, int(nbuckets))
+		// but may not be empty.
+		buckets = dirtyalloc
+		size := t.bucket.size * nbuckets
+		if t.bucket.kind&kindNoPointers == 0 {
+			memclrHasPointers(buckets, size)
+		} else {
+			memclrNoHeapPointers(buckets, size)
+		}
+	}
+
+	if base != nbuckets {
+		// We preallocated some overflow buckets.
+		// To keep the overhead of tracking these overflow buckets to a minimum,
+		// we use the convention that if a preallocated overflow bucket's overflow
+		// pointer is nil, then there are more available by bumping the pointer.
+		// We need a safe non-nil pointer for the last overflow bucket; just use buckets.
+		nextOverflow = (*bmap)(add(buckets, base*uintptr(t.bucketsize)))
+		last := (*bmap)(add(buckets, (nbuckets-1)*uintptr(t.bucketsize)))
+		last.setoverflow(t, (*bmap)(buckets))
+	}
+	return buckets, nextOverflow
+}
+
+// mapaccess1 returns a pointer to h[key].  Never returns nil, instead
+// it will return a reference to the zero object for the value type if
+// the key is not in the map.
+// NOTE: The returned pointer may keep the whole map live, so don't
+// hold onto it for very long.
+func mapaccess1(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
+	// Check preemption, since unlike gc we don't check on every call.
+	if getg().preempt {
+		checkPreempt()
+	}
+
+	if raceenabled && h != nil {
+		callerpc := getcallerpc()
+		pc := funcPC(mapaccess1)
+		racereadpc(unsafe.Pointer(h), callerpc, pc)
+		raceReadObjectPC(t.key, key, callerpc, pc)
+	}
+	if msanenabled && h != nil {
+		msanread(key, t.key.size)
+	}
+	if h == nil || h.count == 0 {
+		return unsafe.Pointer(&zeroVal[0])
+	}
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map read and map write")
+	}
+	hashfn := t.key.hashfn
+	equalfn := t.key.equalfn
+	hash := hashfn(key, uintptr(h.hash0))
+	m := bucketMask(h.B)
+	b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
+	if c := h.oldbuckets; c != nil {
+		if !h.sameSizeGrow() {
+			// There used to be half as many buckets; mask down one more power of two.
+			m >>= 1
+		}
+		oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
+		if !evacuated(oldb) {
+			b = oldb
+		}
+	}
+	top := tophash(hash)
+	for ; b != nil; b = b.overflow(t) {
+		for i := uintptr(0); i < bucketCnt; i++ {
+			if b.tophash[i] != top {
+				continue
+			}
+			k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
+			if t.indirectkey {
+				k = *((*unsafe.Pointer)(k))
+			}
+			if equalfn(key, k) {
+				v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
+				if t.indirectvalue {
+					v = *((*unsafe.Pointer)(v))
+				}
+				return v
+			}
+		}
+	}
+	return unsafe.Pointer(&zeroVal[0])
+}
+
+func mapaccess2(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, bool) {
+	// Check preemption, since unlike gc we don't check on every call.
+	if getg().preempt {
+		checkPreempt()
+	}
+
+	if raceenabled && h != nil {
+		callerpc := getcallerpc()
+		pc := funcPC(mapaccess2)
+		racereadpc(unsafe.Pointer(h), callerpc, pc)
+		raceReadObjectPC(t.key, key, callerpc, pc)
+	}
+	if msanenabled && h != nil {
+		msanread(key, t.key.size)
+	}
+	if h == nil || h.count == 0 {
+		return unsafe.Pointer(&zeroVal[0]), false
+	}
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map read and map write")
+	}
+	hashfn := t.key.hashfn
+	equalfn := t.key.equalfn
+	hash := hashfn(key, uintptr(h.hash0))
+	m := bucketMask(h.B)
+	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + (hash&m)*uintptr(t.bucketsize)))
+	if c := h.oldbuckets; c != nil {
+		if !h.sameSizeGrow() {
+			// There used to be half as many buckets; mask down one more power of two.
+			m >>= 1
+		}
+		oldb := (*bmap)(unsafe.Pointer(uintptr(c) + (hash&m)*uintptr(t.bucketsize)))
+		if !evacuated(oldb) {
+			b = oldb
+		}
+	}
+	top := tophash(hash)
+	for ; b != nil; b = b.overflow(t) {
+		for i := uintptr(0); i < bucketCnt; i++ {
+			if b.tophash[i] != top {
+				continue
+			}
+			k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
+			if t.indirectkey {
+				k = *((*unsafe.Pointer)(k))
+			}
+			if equalfn(key, k) {
+				v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
+				if t.indirectvalue {
+					v = *((*unsafe.Pointer)(v))
+				}
+				return v, true
+			}
+		}
+	}
+	return unsafe.Pointer(&zeroVal[0]), false
+}
+
+// returns both key and value. Used by map iterator
+func mapaccessK(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, unsafe.Pointer) {
+	// Check preemption, since unlike gc we don't check on every call.
+	if getg().preempt {
+		checkPreempt()
+	}
+
+	if h == nil || h.count == 0 {
+		return nil, nil
+	}
+	hashfn := t.key.hashfn
+	equalfn := t.key.equalfn
+	hash := hashfn(key, uintptr(h.hash0))
+	m := bucketMask(h.B)
+	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + (hash&m)*uintptr(t.bucketsize)))
+	if c := h.oldbuckets; c != nil {
+		if !h.sameSizeGrow() {
+			// There used to be half as many buckets; mask down one more power of two.
+			m >>= 1
+		}
+		oldb := (*bmap)(unsafe.Pointer(uintptr(c) + (hash&m)*uintptr(t.bucketsize)))
+		if !evacuated(oldb) {
+			b = oldb
+		}
+	}
+	top := tophash(hash)
+	for ; b != nil; b = b.overflow(t) {
+		for i := uintptr(0); i < bucketCnt; i++ {
+			if b.tophash[i] != top {
+				continue
+			}
+			k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
+			if t.indirectkey {
+				k = *((*unsafe.Pointer)(k))
+			}
+			if equalfn(key, k) {
+				v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
+				if t.indirectvalue {
+					v = *((*unsafe.Pointer)(v))
+				}
+				return k, v
+			}
+		}
+	}
+	return nil, nil
+}
+
+func mapaccess1_fat(t *maptype, h *hmap, key, zero unsafe.Pointer) unsafe.Pointer {
+	v := mapaccess1(t, h, key)
+	if v == unsafe.Pointer(&zeroVal[0]) {
+		return zero
+	}
+	return v
+}
+
+func mapaccess2_fat(t *maptype, h *hmap, key, zero unsafe.Pointer) (unsafe.Pointer, bool) {
+	v := mapaccess1(t, h, key)
+	if v == unsafe.Pointer(&zeroVal[0]) {
+		return zero, false
+	}
+	return v, true
+}
+
+// Like mapaccess, but allocates a slot for the key if it is not present in the map.
+func mapassign(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
+	// Check preemption, since unlike gc we don't check on every call.
+	if getg().preempt {
+		checkPreempt()
+	}
+
+	if h == nil {
+		panic(plainError("assignment to entry in nil map"))
+	}
+	if raceenabled {
+		callerpc := getcallerpc()
+		pc := funcPC(mapassign)
+		racewritepc(unsafe.Pointer(h), callerpc, pc)
+		raceReadObjectPC(t.key, key, callerpc, pc)
+	}
+	if msanenabled {
+		msanread(key, t.key.size)
+	}
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map writes")
+	}
+	hashfn := t.key.hashfn
+	equalfn := t.key.equalfn
+	hash := hashfn(key, uintptr(h.hash0))
+
+	// Set hashWriting after calling alg.hash, since alg.hash may panic,
+	// in which case we have not actually done a write.
+	h.flags |= hashWriting
+
+	if h.buckets == nil {
+		h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
+	}
+
+again:
+	bucket := hash & bucketMask(h.B)
+	if h.growing() {
+		growWork(t, h, bucket)
+	}
+	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
+	top := tophash(hash)
+
+	var inserti *uint8
+	var insertk unsafe.Pointer
+	var val unsafe.Pointer
+	for {
+		for i := uintptr(0); i < bucketCnt; i++ {
+			if b.tophash[i] != top {
+				if b.tophash[i] == empty && inserti == nil {
+					inserti = &b.tophash[i]
+					insertk = add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
+					val = add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
+				}
+				continue
+			}
+			k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
+			if t.indirectkey {
+				k = *((*unsafe.Pointer)(k))
+			}
+			if !equalfn(key, k) {
+				continue
+			}
+			// already have a mapping for key. Update it.
+			if t.needkeyupdate {
+				typedmemmove(t.key, k, key)
+			}
+			val = add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
+			goto done
+		}
+		ovf := b.overflow(t)
+		if ovf == nil {
+			break
+		}
+		b = ovf
+	}
+
+	// Did not find mapping for key. Allocate new cell & add entry.
+
+	// If we hit the max load factor or we have too many overflow buckets,
+	// and we're not already in the middle of growing, start growing.
+	if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
+		hashGrow(t, h)
+		goto again // Growing the table invalidates everything, so try again
+	}
+
+	if inserti == nil {
+		// all current buckets are full, allocate a new one.
+		newb := h.newoverflow(t, b)
+		inserti = &newb.tophash[0]
+		insertk = add(unsafe.Pointer(newb), dataOffset)
+		val = add(insertk, bucketCnt*uintptr(t.keysize))
+	}
+
+	// store new key/value at insert position
+	if t.indirectkey {
+		kmem := newobject(t.key)
+		*(*unsafe.Pointer)(insertk) = kmem
+		insertk = kmem
+	}
+	if t.indirectvalue {
+		vmem := newobject(t.elem)
+		*(*unsafe.Pointer)(val) = vmem
+	}
+	typedmemmove(t.key, insertk, key)
+	*inserti = top
+	h.count++
+
+done:
+	if h.flags&hashWriting == 0 {
+		throw("concurrent map writes")
+	}
+	h.flags &^= hashWriting
+	if t.indirectvalue {
+		val = *((*unsafe.Pointer)(val))
+	}
+	return val
+}
+
+func mapdelete(t *maptype, h *hmap, key unsafe.Pointer) {
+	if raceenabled && h != nil {
+		callerpc := getcallerpc()
+		pc := funcPC(mapdelete)
+		racewritepc(unsafe.Pointer(h), callerpc, pc)
+		raceReadObjectPC(t.key, key, callerpc, pc)
+	}
+	if msanenabled && h != nil {
+		msanread(key, t.key.size)
+	}
+	if h == nil || h.count == 0 {
+		return
+	}
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map writes")
+	}
+
+	hashfn := t.key.hashfn
+	equalfn := t.key.equalfn
+	hash := hashfn(key, uintptr(h.hash0))
+
+	// Set hashWriting after calling alg.hash, since alg.hash may panic,
+	// in which case we have not actually done a write (delete).
+	h.flags |= hashWriting
+
+	bucket := hash & bucketMask(h.B)
+	if h.growing() {
+		growWork(t, h, bucket)
+	}
+	b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
+	top := tophash(hash)
+search:
+	for ; b != nil; b = b.overflow(t) {
+		for i := uintptr(0); i < bucketCnt; i++ {
+			if b.tophash[i] != top {
+				continue
+			}
+			k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
+			k2 := k
+			if t.indirectkey {
+				k2 = *((*unsafe.Pointer)(k2))
+			}
+			if !equalfn(key, k2) {
+				continue
+			}
+			// Only clear key if there are pointers in it.
+			if t.indirectkey {
+				*(*unsafe.Pointer)(k) = nil
+			} else if t.key.kind&kindNoPointers == 0 {
+				memclrHasPointers(k, t.key.size)
+			}
+			v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
+			if t.indirectvalue {
+				*(*unsafe.Pointer)(v) = nil
+			} else if t.elem.kind&kindNoPointers == 0 {
+				memclrHasPointers(v, t.elem.size)
+			} else {
+				memclrNoHeapPointers(v, t.elem.size)
+			}
+			b.tophash[i] = empty
+			h.count--
+			break search
+		}
+	}
+
+	if h.flags&hashWriting == 0 {
+		throw("concurrent map writes")
+	}
+	h.flags &^= hashWriting
+}
+
+// mapiterinit initializes the hiter struct used for ranging over maps.
+// The hiter struct pointed to by 'it' is allocated on the stack
+// by the compilers order pass or on the heap by reflect_mapiterinit.
+// Both need to have zeroed hiter since the struct contains pointers.
+// Gccgo-specific: *it need not be zeroed by the compiler,
+//  and it's cheaper to zero it here.
+func mapiterinit(t *maptype, h *hmap, it *hiter) {
+	it.key = nil
+	it.value = nil
+	it.t = nil
+	it.h = nil
+	it.buckets = nil
+	it.bptr = nil
+	it.overflow = nil
+	it.oldoverflow = nil
+	it.wrapped = false
+	it.i = 0
+	it.checkBucket = 0
+
+	if raceenabled && h != nil {
+		callerpc := getcallerpc()
+		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapiterinit))
+	}
+
+	if h == nil || h.count == 0 {
+		return
+	}
+
+	if unsafe.Sizeof(hiter{})/sys.PtrSize != 12 {
+		throw("hash_iter size incorrect") // see cmd/compile/internal/gc/reflect.go
+	}
+	it.t = t
+	it.h = h
+
+	// grab snapshot of bucket state
+	it.B = h.B
+	it.buckets = h.buckets
+	if t.bucket.kind&kindNoPointers != 0 {
+		// Allocate the current slice and remember pointers to both current and old.
+		// This preserves all relevant overflow buckets alive even if
+		// the table grows and/or overflow buckets are added to the table
+		// while we are iterating.
+		h.createOverflow()
+		it.overflow = h.extra.overflow
+		it.oldoverflow = h.extra.oldoverflow
+	}
+
+	// decide where to start
+	r := uintptr(fastrand())
+	if h.B > 31-bucketCntBits {
+		r += uintptr(fastrand()) << 31
+	}
+	it.startBucket = r & bucketMask(h.B)
+	it.offset = uint8(r >> h.B & (bucketCnt - 1))
+
+	// iterator state
+	it.bucket = it.startBucket
+
+	// Remember we have an iterator.
+	// Can run concurrently with another mapiterinit().
+	if old := h.flags; old&(iterator|oldIterator) != iterator|oldIterator {
+		atomic.Or8(&h.flags, iterator|oldIterator)
+	}
+
+	mapiternext(it)
+}
+
+func mapiternext(it *hiter) {
+	// Check preemption, since unlike gc we don't check on every call.
+	if getg().preempt {
+		checkPreempt()
+	}
+
+	h := it.h
+	if raceenabled {
+		callerpc := getcallerpc()
+		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapiternext))
+	}
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map iteration and map write")
+	}
+	t := it.t
+	bucket := it.bucket
+	b := it.bptr
+	i := it.i
+	checkBucket := it.checkBucket
+	hashfn := t.key.hashfn
+	equalfn := t.key.equalfn
+
+next:
+	if b == nil {
+		if bucket == it.startBucket && it.wrapped {
+			// end of iteration
+			it.key = nil
+			it.value = nil
+			return
+		}
+		if h.growing() && it.B == h.B {
+			// Iterator was started in the middle of a grow, and the grow isn't done yet.
+			// If the bucket we're looking at hasn't been filled in yet (i.e. the old
+			// bucket hasn't been evacuated) then we need to iterate through the old
+			// bucket and only return the ones that will be migrated to this bucket.
+			oldbucket := bucket & it.h.oldbucketmask()
+			b = (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
+			if !evacuated(b) {
+				checkBucket = bucket
+			} else {
+				b = (*bmap)(add(it.buckets, bucket*uintptr(t.bucketsize)))
+				checkBucket = noCheck
+			}
+		} else {
+			b = (*bmap)(add(it.buckets, bucket*uintptr(t.bucketsize)))
+			checkBucket = noCheck
+		}
+		bucket++
+		if bucket == bucketShift(it.B) {
+			bucket = 0
+			it.wrapped = true
+		}
+		i = 0
+	}
+	for ; i < bucketCnt; i++ {
+		offi := (i + it.offset) & (bucketCnt - 1)
+		if b.tophash[offi] == empty || b.tophash[offi] == evacuatedEmpty {
+			continue
+		}
+		k := add(unsafe.Pointer(b), dataOffset+uintptr(offi)*uintptr(t.keysize))
+		if t.indirectkey {
+			k = *((*unsafe.Pointer)(k))
+		}
+		v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+uintptr(offi)*uintptr(t.valuesize))
+		if checkBucket != noCheck && !h.sameSizeGrow() {
+			// Special case: iterator was started during a grow to a larger size
+			// and the grow is not done yet. We're working on a bucket whose
+			// oldbucket has not been evacuated yet. Or at least, it wasn't
+			// evacuated when we started the bucket. So we're iterating
+			// through the oldbucket, skipping any keys that will go
+			// to the other new bucket (each oldbucket expands to two
+			// buckets during a grow).
+			if t.reflexivekey || equalfn(k, k) {
+				// If the item in the oldbucket is not destined for
+				// the current new bucket in the iteration, skip it.
+				hash := hashfn(k, uintptr(h.hash0))
+				if hash&bucketMask(it.B) != checkBucket {
+					continue
+				}
+			} else {
+				// Hash isn't repeatable if k != k (NaNs).  We need a
+				// repeatable and randomish choice of which direction
+				// to send NaNs during evacuation. We'll use the low
+				// bit of tophash to decide which way NaNs go.
+				// NOTE: this case is why we need two evacuate tophash
+				// values, evacuatedX and evacuatedY, that differ in
+				// their low bit.
+				if checkBucket>>(it.B-1) != uintptr(b.tophash[offi]&1) {
+					continue
+				}
+			}
+		}
+		if (b.tophash[offi] != evacuatedX && b.tophash[offi] != evacuatedY) ||
+			!(t.reflexivekey || equalfn(k, k)) {
+			// This is the golden data, we can return it.
+			// OR
+			// key!=key, so the entry can't be deleted or updated, so we can just return it.
+			// That's lucky for us because when key!=key we can't look it up successfully.
+			it.key = k
+			if t.indirectvalue {
+				v = *((*unsafe.Pointer)(v))
+			}
+			it.value = v
+		} else {
+			// The hash table has grown since the iterator was started.
+			// The golden data for this key is now somewhere else.
+			// Check the current hash table for the data.
+			// This code handles the case where the key
+			// has been deleted, updated, or deleted and reinserted.
+			// NOTE: we need to regrab the key as it has potentially been
+			// updated to an equal() but not identical key (e.g. +0.0 vs -0.0).
+			rk, rv := mapaccessK(t, h, k)
+			if rk == nil {
+				continue // key has been deleted
+			}
+			it.key = rk
+			it.value = rv
+		}
+		it.bucket = bucket
+		if it.bptr != b { // avoid unnecessary write barrier; see issue 14921
+			it.bptr = b
+		}
+		it.i = i + 1
+		it.checkBucket = checkBucket
+		return
+	}
+	b = b.overflow(t)
+	i = 0
+	goto next
+}
+
+// mapclear deletes all keys from a map.
+func mapclear(t *maptype, h *hmap) {
+	if raceenabled && h != nil {
+		callerpc := getcallerpc()
+		pc := funcPC(mapclear)
+		racewritepc(unsafe.Pointer(h), callerpc, pc)
+	}
+
+	if h == nil || h.count == 0 {
+		return
+	}
+
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map writes")
+	}
+
+	h.flags |= hashWriting
+
+	h.flags &^= sameSizeGrow
+	h.oldbuckets = nil
+	h.nevacuate = 0
+	h.noverflow = 0
+	h.count = 0
+
+	// Keep the mapextra allocation but clear any extra information.
+	if h.extra != nil {
+		*h.extra = mapextra{}
+	}
+
+	// makeBucketArray clears the memory pointed to by h.buckets
+	// and recovers any overflow buckets by generating them
+	// as if h.buckets was newly alloced.
+	_, nextOverflow := makeBucketArray(t, h.B, h.buckets)
+	if nextOverflow != nil {
+		// If overflow buckets are created then h.extra
+		// will have been allocated during initial bucket creation.
+		h.extra.nextOverflow = nextOverflow
+	}
+
+	if h.flags&hashWriting == 0 {
+		throw("concurrent map writes")
+	}
+	h.flags &^= hashWriting
+}
+
+func hashGrow(t *maptype, h *hmap) {
+	// If we've hit the load factor, get bigger.
+	// Otherwise, there are too many overflow buckets,
+	// so keep the same number of buckets and "grow" laterally.
+	bigger := uint8(1)
+	if !overLoadFactor(h.count+1, h.B) {
+		bigger = 0
+		h.flags |= sameSizeGrow
+	}
+	oldbuckets := h.buckets
+	newbuckets, nextOverflow := makeBucketArray(t, h.B+bigger, nil)
+
+	flags := h.flags &^ (iterator | oldIterator)
+	if h.flags&iterator != 0 {
+		flags |= oldIterator
+	}
+	// commit the grow (atomic wrt gc)
+	h.B += bigger
+	h.flags = flags
+	h.oldbuckets = oldbuckets
+	h.buckets = newbuckets
+	h.nevacuate = 0
+	h.noverflow = 0
+
+	if h.extra != nil && h.extra.overflow != nil {
+		// Promote current overflow buckets to the old generation.
+		if h.extra.oldoverflow != nil {
+			throw("oldoverflow is not nil")
+		}
+		h.extra.oldoverflow = h.extra.overflow
+		h.extra.overflow = nil
+	}
+	if nextOverflow != nil {
+		if h.extra == nil {
+			h.extra = new(mapextra)
+		}
+		h.extra.nextOverflow = nextOverflow
+	}
+
+	// the actual copying of the hash table data is done incrementally
+	// by growWork() and evacuate().
+}
+
+// overLoadFactor reports whether count items placed in 1<<B buckets is over loadFactor.
+func overLoadFactor(count int, B uint8) bool {
+	return count > bucketCnt && uintptr(count) > loadFactorNum*(bucketShift(B)/loadFactorDen)
+}
+
+// tooManyOverflowBuckets reports whether noverflow buckets is too many for a map with 1<<B buckets.
+// Note that most of these overflow buckets must be in sparse use;
+// if use was dense, then we'd have already triggered regular map growth.
+func tooManyOverflowBuckets(noverflow uint16, B uint8) bool {
+	// If the threshold is too low, we do extraneous work.
+	// If the threshold is too high, maps that grow and shrink can hold on to lots of unused memory.
+	// "too many" means (approximately) as many overflow buckets as regular buckets.
+	// See incrnoverflow for more details.
+	if B > 15 {
+		B = 15
+	}
+	// The compiler doesn't see here that B < 16; mask B to generate shorter shift code.
+	return noverflow >= uint16(1)<<(B&15)
+}
+
+// growing reports whether h is growing. The growth may be to the same size or bigger.
+func (h *hmap) growing() bool {
+	return h.oldbuckets != nil
+}
+
+// sameSizeGrow reports whether the current growth is to a map of the same size.
+func (h *hmap) sameSizeGrow() bool {
+	return h.flags&sameSizeGrow != 0
+}
+
+// noldbuckets calculates the number of buckets prior to the current map growth.
+func (h *hmap) noldbuckets() uintptr {
+	oldB := h.B
+	if !h.sameSizeGrow() {
+		oldB--
+	}
+	return bucketShift(oldB)
+}
+
+// oldbucketmask provides a mask that can be applied to calculate n % noldbuckets().
+func (h *hmap) oldbucketmask() uintptr {
+	return h.noldbuckets() - 1
+}
+
+func growWork(t *maptype, h *hmap, bucket uintptr) {
+	// make sure we evacuate the oldbucket corresponding
+	// to the bucket we're about to use
+	evacuate(t, h, bucket&h.oldbucketmask())
+
+	// evacuate one more oldbucket to make progress on growing
+	if h.growing() {
+		evacuate(t, h, h.nevacuate)
+	}
+}
+
+func bucketEvacuated(t *maptype, h *hmap, bucket uintptr) bool {
+	b := (*bmap)(add(h.oldbuckets, bucket*uintptr(t.bucketsize)))
+	return evacuated(b)
+}
+
+// evacDst is an evacuation destination.
+type evacDst struct {
+	b *bmap          // current destination bucket
+	i int            // key/val index into b
+	k unsafe.Pointer // pointer to current key storage
+	v unsafe.Pointer // pointer to current value storage
+}
+
+func evacuate(t *maptype, h *hmap, oldbucket uintptr) {
+	b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
+	newbit := h.noldbuckets()
+	if !evacuated(b) {
+		// TODO: reuse overflow buckets instead of using new ones, if there
+		// is no iterator using the old buckets.  (If !oldIterator.)
+
+		// xy contains the x and y (low and high) evacuation destinations.
+		var xy [2]evacDst
+		x := &xy[0]
+		x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize)))
+		x.k = add(unsafe.Pointer(x.b), dataOffset)
+		x.v = add(x.k, bucketCnt*uintptr(t.keysize))
+
+		if !h.sameSizeGrow() {
+			// Only calculate y pointers if we're growing bigger.
+			// Otherwise GC can see bad pointers.
+			y := &xy[1]
+			y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize)))
+			y.k = add(unsafe.Pointer(y.b), dataOffset)
+			y.v = add(y.k, bucketCnt*uintptr(t.keysize))
+		}
+
+		for ; b != nil; b = b.overflow(t) {
+			k := add(unsafe.Pointer(b), dataOffset)
+			v := add(k, bucketCnt*uintptr(t.keysize))
+			for i := 0; i < bucketCnt; i, k, v = i+1, add(k, uintptr(t.keysize)), add(v, uintptr(t.valuesize)) {
+				top := b.tophash[i]
+				if top == empty {
+					b.tophash[i] = evacuatedEmpty
+					continue
+				}
+				if top < minTopHash {
+					throw("bad map state")
+				}
+				k2 := k
+				if t.indirectkey {
+					k2 = *((*unsafe.Pointer)(k2))
+				}
+				var useY uint8
+				if !h.sameSizeGrow() {
+					// Compute hash to make our evacuation decision (whether we need
+					// to send this key/value to bucket x or bucket y).
+					hash := t.key.hashfn(k2, uintptr(h.hash0))
+					if h.flags&iterator != 0 && !t.reflexivekey && !t.key.equalfn(k2, k2) {
+						// If key != key (NaNs), then the hash could be (and probably
+						// will be) entirely different from the old hash. Moreover,
+						// it isn't reproducible. Reproducibility is required in the
+						// presence of iterators, as our evacuation decision must
+						// match whatever decision the iterator made.
+						// Fortunately, we have the freedom to send these keys either
+						// way. Also, tophash is meaningless for these kinds of keys.
+						// We let the low bit of tophash drive the evacuation decision.
+						// We recompute a new random tophash for the next level so
+						// these keys will get evenly distributed across all buckets
+						// after multiple grows.
+						useY = top & 1
+						top = tophash(hash)
+					} else {
+						if hash&newbit != 0 {
+							useY = 1
+						}
+					}
+				}
+
+				if evacuatedX+1 != evacuatedY {
+					throw("bad evacuatedN")
+				}
+
+				b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY
+				dst := &xy[useY]                 // evacuation destination
+
+				if dst.i == bucketCnt {
+					dst.b = h.newoverflow(t, dst.b)
+					dst.i = 0
+					dst.k = add(unsafe.Pointer(dst.b), dataOffset)
+					dst.v = add(dst.k, bucketCnt*uintptr(t.keysize))
+				}
+				dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check
+				if t.indirectkey {
+					*(*unsafe.Pointer)(dst.k) = k2 // copy pointer
+				} else {
+					typedmemmove(t.key, dst.k, k) // copy value
+				}
+				if t.indirectvalue {
+					*(*unsafe.Pointer)(dst.v) = *(*unsafe.Pointer)(v)
+				} else {
+					typedmemmove(t.elem, dst.v, v)
+				}
+				dst.i++
+				// These updates might push these pointers past the end of the
+				// key or value arrays.  That's ok, as we have the overflow pointer
+				// at the end of the bucket to protect against pointing past the
+				// end of the bucket.
+				dst.k = add(dst.k, uintptr(t.keysize))
+				dst.v = add(dst.v, uintptr(t.valuesize))
+			}
+		}
+		// Unlink the overflow buckets & clear key/value to help GC.
+		if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 {
+			b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))
+			// Preserve b.tophash because the evacuation
+			// state is maintained there.
+			ptr := add(b, dataOffset)
+			n := uintptr(t.bucketsize) - dataOffset
+			memclrHasPointers(ptr, n)
+		}
+	}
+
+	if oldbucket == h.nevacuate {
+		advanceEvacuationMark(h, t, newbit)
+	}
+}
+
+func advanceEvacuationMark(h *hmap, t *maptype, newbit uintptr) {
+	h.nevacuate++
+	// Experiments suggest that 1024 is overkill by at least an order of magnitude.
+	// Put it in there as a safeguard anyway, to ensure O(1) behavior.
+	stop := h.nevacuate + 1024
+	if stop > newbit {
+		stop = newbit
+	}
+	for h.nevacuate != stop && bucketEvacuated(t, h, h.nevacuate) {
+		h.nevacuate++
+	}
+	if h.nevacuate == newbit { // newbit == # of oldbuckets
+		// Growing is all done. Free old main bucket array.
+		h.oldbuckets = nil
+		// Can discard old overflow buckets as well.
+		// If they are still referenced by an iterator,
+		// then the iterator holds a pointers to the slice.
+		if h.extra != nil {
+			h.extra.oldoverflow = nil
+		}
+		h.flags &^= sameSizeGrow
+	}
+}
+
+func ismapkey(t *_type) bool {
+	return t.hashfn != nil
+}
+
+// Reflect stubs. Called from ../reflect/asm_*.s
+
+//go:linkname reflect_makemap reflect.makemap
+func reflect_makemap(t *maptype, cap int) *hmap {
+	// Check invariants and reflects math.
+	if !ismapkey(t.key) {
+		throw("runtime.reflect_makemap: unsupported map key type")
+	}
+	if t.key.size > maxKeySize && (!t.indirectkey || t.keysize != uint8(sys.PtrSize)) ||
+		t.key.size <= maxKeySize && (t.indirectkey || t.keysize != uint8(t.key.size)) {
+		throw("key size wrong")
+	}
+	if t.elem.size > maxValueSize && (!t.indirectvalue || t.valuesize != uint8(sys.PtrSize)) ||
+		t.elem.size <= maxValueSize && (t.indirectvalue || t.valuesize != uint8(t.elem.size)) {
+		throw("value size wrong")
+	}
+	if t.key.align > bucketCnt {
+		throw("key align too big")
+	}
+	if t.elem.align > bucketCnt {
+		throw("value align too big")
+	}
+	if t.key.size%uintptr(t.key.align) != 0 {
+		throw("key size not a multiple of key align")
+	}
+	if t.elem.size%uintptr(t.elem.align) != 0 {
+		throw("value size not a multiple of value align")
+	}
+	if bucketCnt < 8 {
+		throw("bucketsize too small for proper alignment")
+	}
+	if dataOffset%uintptr(t.key.align) != 0 {
+		throw("need padding in bucket (key)")
+	}
+	if dataOffset%uintptr(t.elem.align) != 0 {
+		throw("need padding in bucket (value)")
+	}
+
+	return makemap(t, cap, nil)
+}
+
+//go:linkname reflect_mapaccess reflect.mapaccess
+func reflect_mapaccess(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
+	val, ok := mapaccess2(t, h, key)
+	if !ok {
+		// reflect wants nil for a missing element
+		val = nil
+	}
+	return val
+}
+
+//go:linkname reflect_mapassign reflect.mapassign
+func reflect_mapassign(t *maptype, h *hmap, key unsafe.Pointer, val unsafe.Pointer) {
+	p := mapassign(t, h, key)
+	typedmemmove(t.elem, p, val)
+}
+
+//go:linkname reflect_mapdelete reflect.mapdelete
+func reflect_mapdelete(t *maptype, h *hmap, key unsafe.Pointer) {
+	mapdelete(t, h, key)
+}
+
+//go:linkname reflect_mapiterinit reflect.mapiterinit
+func reflect_mapiterinit(t *maptype, h *hmap) *hiter {
+	it := new(hiter)
+	mapiterinit(t, h, it)
+	return it
+}
+
+//go:linkname reflect_mapiternext reflect.mapiternext
+func reflect_mapiternext(it *hiter) {
+	mapiternext(it)
+}
+
+//go:linkname reflect_mapiterkey reflect.mapiterkey
+func reflect_mapiterkey(it *hiter) unsafe.Pointer {
+	return it.key
+}
+
+//go:linkname reflect_maplen reflect.maplen
+func reflect_maplen(h *hmap) int {
+	if h == nil {
+		return 0
+	}
+	if raceenabled {
+		callerpc := getcallerpc()
+		racereadpc(unsafe.Pointer(h), callerpc, funcPC(reflect_maplen))
+	}
+	return h.count
+}
+
+//go:linkname reflect_ismapkey reflect.ismapkey
+func reflect_ismapkey(t *_type) bool {
+	return ismapkey(t)
+}
+
+const maxZero = 1024 // must match value in cmd/compile/internal/gc/walk.go
+var zeroVal [maxZero]byte
diff --git a/libgo/go/runtime/map_benchmark_test.go b/libgo/go/runtime/map_benchmark_test.go
new file mode 100644
index 0000000..025c039
--- /dev/null
+++ b/libgo/go/runtime/map_benchmark_test.go
@@ -0,0 +1,372 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+package runtime_test
+
+import (
+	"fmt"
+	"strconv"
+	"strings"
+	"testing"
+)
+
+const size = 10
+
+func BenchmarkHashStringSpeed(b *testing.B) {
+	strings := make([]string, size)
+	for i := 0; i < size; i++ {
+		strings[i] = fmt.Sprintf("string#%d", i)
+	}
+	sum := 0
+	m := make(map[string]int, size)
+	for i := 0; i < size; i++ {
+		m[strings[i]] = 0
+	}
+	idx := 0
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		sum += m[strings[idx]]
+		idx++
+		if idx == size {
+			idx = 0
+		}
+	}
+}
+
+type chunk [17]byte
+
+func BenchmarkHashBytesSpeed(b *testing.B) {
+	// a bunch of chunks, each with a different alignment mod 16
+	var chunks [size]chunk
+	// initialize each to a different value
+	for i := 0; i < size; i++ {
+		chunks[i][0] = byte(i)
+	}
+	// put into a map
+	m := make(map[chunk]int, size)
+	for i, c := range chunks {
+		m[c] = i
+	}
+	idx := 0
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		if m[chunks[idx]] != idx {
+			b.Error("bad map entry for chunk")
+		}
+		idx++
+		if idx == size {
+			idx = 0
+		}
+	}
+}
+
+func BenchmarkHashInt32Speed(b *testing.B) {
+	ints := make([]int32, size)
+	for i := 0; i < size; i++ {
+		ints[i] = int32(i)
+	}
+	sum := 0
+	m := make(map[int32]int, size)
+	for i := 0; i < size; i++ {
+		m[ints[i]] = 0
+	}
+	idx := 0
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		sum += m[ints[idx]]
+		idx++
+		if idx == size {
+			idx = 0
+		}
+	}
+}
+
+func BenchmarkHashInt64Speed(b *testing.B) {
+	ints := make([]int64, size)
+	for i := 0; i < size; i++ {
+		ints[i] = int64(i)
+	}
+	sum := 0
+	m := make(map[int64]int, size)
+	for i := 0; i < size; i++ {
+		m[ints[i]] = 0
+	}
+	idx := 0
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		sum += m[ints[idx]]
+		idx++
+		if idx == size {
+			idx = 0
+		}
+	}
+}
+func BenchmarkHashStringArraySpeed(b *testing.B) {
+	stringpairs := make([][2]string, size)
+	for i := 0; i < size; i++ {
+		for j := 0; j < 2; j++ {
+			stringpairs[i][j] = fmt.Sprintf("string#%d/%d", i, j)
+		}
+	}
+	sum := 0
+	m := make(map[[2]string]int, size)
+	for i := 0; i < size; i++ {
+		m[stringpairs[i]] = 0
+	}
+	idx := 0
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		sum += m[stringpairs[idx]]
+		idx++
+		if idx == size {
+			idx = 0
+		}
+	}
+}
+
+func BenchmarkMegMap(b *testing.B) {
+	m := make(map[string]bool)
+	for suffix := 'A'; suffix <= 'G'; suffix++ {
+		m[strings.Repeat("X", 1<<20-1)+fmt.Sprint(suffix)] = true
+	}
+	key := strings.Repeat("X", 1<<20-1) + "k"
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, _ = m[key]
+	}
+}
+
+func BenchmarkMegOneMap(b *testing.B) {
+	m := make(map[string]bool)
+	m[strings.Repeat("X", 1<<20)] = true
+	key := strings.Repeat("Y", 1<<20)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, _ = m[key]
+	}
+}
+
+func BenchmarkMegEqMap(b *testing.B) {
+	m := make(map[string]bool)
+	key1 := strings.Repeat("X", 1<<20)
+	key2 := strings.Repeat("X", 1<<20) // equal but different instance
+	m[key1] = true
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, _ = m[key2]
+	}
+}
+
+func BenchmarkMegEmptyMap(b *testing.B) {
+	m := make(map[string]bool)
+	key := strings.Repeat("X", 1<<20)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, _ = m[key]
+	}
+}
+
+func BenchmarkSmallStrMap(b *testing.B) {
+	m := make(map[string]bool)
+	for suffix := 'A'; suffix <= 'G'; suffix++ {
+		m[fmt.Sprint(suffix)] = true
+	}
+	key := "k"
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, _ = m[key]
+	}
+}
+
+func BenchmarkMapStringKeysEight_16(b *testing.B) { benchmarkMapStringKeysEight(b, 16) }
+func BenchmarkMapStringKeysEight_32(b *testing.B) { benchmarkMapStringKeysEight(b, 32) }
+func BenchmarkMapStringKeysEight_64(b *testing.B) { benchmarkMapStringKeysEight(b, 64) }
+func BenchmarkMapStringKeysEight_1M(b *testing.B) { benchmarkMapStringKeysEight(b, 1<<20) }
+
+func benchmarkMapStringKeysEight(b *testing.B, keySize int) {
+	m := make(map[string]bool)
+	for i := 0; i < 8; i++ {
+		m[strings.Repeat("K", i+1)] = true
+	}
+	key := strings.Repeat("K", keySize)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_ = m[key]
+	}
+}
+
+func BenchmarkIntMap(b *testing.B) {
+	m := make(map[int]bool)
+	for i := 0; i < 8; i++ {
+		m[i] = true
+	}
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, _ = m[7]
+	}
+}
+
+// Accessing the same keys in a row.
+func benchmarkRepeatedLookup(b *testing.B, lookupKeySize int) {
+	m := make(map[string]bool)
+	// At least bigger than a single bucket:
+	for i := 0; i < 64; i++ {
+		m[fmt.Sprintf("some key %d", i)] = true
+	}
+	base := strings.Repeat("x", lookupKeySize-1)
+	key1 := base + "1"
+	key2 := base + "2"
+	b.ResetTimer()
+	for i := 0; i < b.N/4; i++ {
+		_ = m[key1]
+		_ = m[key1]
+		_ = m[key2]
+		_ = m[key2]
+	}
+}
+
+func BenchmarkRepeatedLookupStrMapKey32(b *testing.B) { benchmarkRepeatedLookup(b, 32) }
+func BenchmarkRepeatedLookupStrMapKey1M(b *testing.B) { benchmarkRepeatedLookup(b, 1<<20) }
+
+func BenchmarkNewEmptyMap(b *testing.B) {
+	b.ReportAllocs()
+	for i := 0; i < b.N; i++ {
+		_ = make(map[int]int)
+	}
+}
+
+func BenchmarkNewSmallMap(b *testing.B) {
+	b.ReportAllocs()
+	for i := 0; i < b.N; i++ {
+		m := make(map[int]int)
+		m[0] = 0
+		m[1] = 1
+	}
+}
+
+func BenchmarkMapIter(b *testing.B) {
+	m := make(map[int]bool)
+	for i := 0; i < 8; i++ {
+		m[i] = true
+	}
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		for range m {
+		}
+	}
+}
+
+func BenchmarkMapIterEmpty(b *testing.B) {
+	m := make(map[int]bool)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		for range m {
+		}
+	}
+}
+
+func BenchmarkSameLengthMap(b *testing.B) {
+	// long strings, same length, differ in first few
+	// and last few bytes.
+	m := make(map[string]bool)
+	s1 := "foo" + strings.Repeat("-", 100) + "bar"
+	s2 := "goo" + strings.Repeat("-", 100) + "ber"
+	m[s1] = true
+	m[s2] = true
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_ = m[s1]
+	}
+}
+
+type BigKey [3]int64
+
+func BenchmarkBigKeyMap(b *testing.B) {
+	m := make(map[BigKey]bool)
+	k := BigKey{3, 4, 5}
+	m[k] = true
+	for i := 0; i < b.N; i++ {
+		_ = m[k]
+	}
+}
+
+type BigVal [3]int64
+
+func BenchmarkBigValMap(b *testing.B) {
+	m := make(map[BigKey]BigVal)
+	k := BigKey{3, 4, 5}
+	m[k] = BigVal{6, 7, 8}
+	for i := 0; i < b.N; i++ {
+		_ = m[k]
+	}
+}
+
+func BenchmarkSmallKeyMap(b *testing.B) {
+	m := make(map[int16]bool)
+	m[5] = true
+	for i := 0; i < b.N; i++ {
+		_ = m[5]
+	}
+}
+
+func BenchmarkMapPopulate(b *testing.B) {
+	for size := 1; size < 1000000; size *= 10 {
+		b.Run(strconv.Itoa(size), func(b *testing.B) {
+			b.ReportAllocs()
+			for i := 0; i < b.N; i++ {
+				m := make(map[int]bool)
+				for j := 0; j < size; j++ {
+					m[j] = true
+				}
+			}
+		})
+	}
+}
+
+type ComplexAlgKey struct {
+	a, b, c int64
+	_       int
+	d       int32
+	_       int
+	e       string
+	_       int
+	f, g, h int64
+}
+
+func BenchmarkComplexAlgMap(b *testing.B) {
+	m := make(map[ComplexAlgKey]bool)
+	var k ComplexAlgKey
+	m[k] = true
+	for i := 0; i < b.N; i++ {
+		_ = m[k]
+	}
+}
+
+func BenchmarkGoMapClear(b *testing.B) {
+	b.Run("Reflexive", func(b *testing.B) {
+		for size := 1; size < 100000; size *= 10 {
+			b.Run(strconv.Itoa(size), func(b *testing.B) {
+				m := make(map[int]int, size)
+				for i := 0; i < b.N; i++ {
+					m[0] = size // Add one element so len(m) != 0 avoiding fast paths.
+					for k := range m {
+						delete(m, k)
+					}
+				}
+			})
+		}
+	})
+	b.Run("NonReflexive", func(b *testing.B) {
+		for size := 1; size < 100000; size *= 10 {
+			b.Run(strconv.Itoa(size), func(b *testing.B) {
+				m := make(map[float64]int, size)
+				for i := 0; i < b.N; i++ {
+					m[1.0] = size // Add one element so len(m) != 0 avoiding fast paths.
+					for k := range m {
+						delete(m, k)
+					}
+				}
+			})
+		}
+	})
+}
diff --git a/libgo/go/runtime/map_fast32.go b/libgo/go/runtime/map_fast32.go
new file mode 100644
index 0000000..a9a06a8
--- /dev/null
+++ b/libgo/go/runtime/map_fast32.go
@@ -0,0 +1,413 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+	"runtime/internal/sys"
+	"unsafe"
+)
+
+func mapaccess1_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer {
+	if raceenabled && h != nil {
+		callerpc := getcallerpc()
+		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_fast32))
+	}
+	if h == nil || h.count == 0 {
+		return unsafe.Pointer(&zeroVal[0])
+	}
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map read and map write")
+	}
+	var b *bmap
+	if h.B == 0 {
+		// One-bucket table. No need to hash.
+		b = (*bmap)(h.buckets)
+	} else {
+		hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
+		m := bucketMask(h.B)
+		b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
+		if c := h.oldbuckets; c != nil {
+			if !h.sameSizeGrow() {
+				// There used to be half as many buckets; mask down one more power of two.
+				m >>= 1
+			}
+			oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
+			if !evacuated(oldb) {
+				b = oldb
+			}
+		}
+	}
+	for ; b != nil; b = b.overflow(t) {
+		for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) {
+			if *(*uint32)(k) == key && b.tophash[i] != empty {
+				return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize))
+			}
+		}
+	}
+	return unsafe.Pointer(&zeroVal[0])
+}
+
+func mapaccess2_fast32(t *maptype, h *hmap, key uint32) (unsafe.Pointer, bool) {
+	if raceenabled && h != nil {
+		callerpc := getcallerpc()
+		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_fast32))
+	}
+	if h == nil || h.count == 0 {
+		return unsafe.Pointer(&zeroVal[0]), false
+	}
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map read and map write")
+	}
+	var b *bmap
+	if h.B == 0 {
+		// One-bucket table. No need to hash.
+		b = (*bmap)(h.buckets)
+	} else {
+		hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
+		m := bucketMask(h.B)
+		b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
+		if c := h.oldbuckets; c != nil {
+			if !h.sameSizeGrow() {
+				// There used to be half as many buckets; mask down one more power of two.
+				m >>= 1
+			}
+			oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
+			if !evacuated(oldb) {
+				b = oldb
+			}
+		}
+	}
+	for ; b != nil; b = b.overflow(t) {
+		for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) {
+			if *(*uint32)(k) == key && b.tophash[i] != empty {
+				return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize)), true
+			}
+		}
+	}
+	return unsafe.Pointer(&zeroVal[0]), false
+}
+
+func mapassign_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer {
+	if h == nil {
+		panic(plainError("assignment to entry in nil map"))
+	}
+	if raceenabled {
+		callerpc := getcallerpc()
+		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast32))
+	}
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map writes")
+	}
+	hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
+
+	// Set hashWriting after calling alg.hash for consistency with mapassign.
+	h.flags |= hashWriting
+
+	if h.buckets == nil {
+		h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
+	}
+
+again:
+	bucket := hash & bucketMask(h.B)
+	if h.growing() {
+		growWork_fast32(t, h, bucket)
+	}
+	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
+
+	var insertb *bmap
+	var inserti uintptr
+	var insertk unsafe.Pointer
+
+	for {
+		for i := uintptr(0); i < bucketCnt; i++ {
+			if b.tophash[i] == empty {
+				if insertb == nil {
+					inserti = i
+					insertb = b
+				}
+				continue
+			}
+			k := *((*uint32)(add(unsafe.Pointer(b), dataOffset+i*4)))
+			if k != key {
+				continue
+			}
+			inserti = i
+			insertb = b
+			goto done
+		}
+		ovf := b.overflow(t)
+		if ovf == nil {
+			break
+		}
+		b = ovf
+	}
+
+	// Did not find mapping for key. Allocate new cell & add entry.
+
+	// If we hit the max load factor or we have too many overflow buckets,
+	// and we're not already in the middle of growing, start growing.
+	if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
+		hashGrow(t, h)
+		goto again // Growing the table invalidates everything, so try again
+	}
+
+	if insertb == nil {
+		// all current buckets are full, allocate a new one.
+		insertb = h.newoverflow(t, b)
+		inserti = 0 // not necessary, but avoids needlessly spilling inserti
+	}
+	insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks
+
+	insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*4)
+	// store new key at insert position
+	*(*uint32)(insertk) = key
+
+	h.count++
+
+done:
+	val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*4+inserti*uintptr(t.valuesize))
+	if h.flags&hashWriting == 0 {
+		throw("concurrent map writes")
+	}
+	h.flags &^= hashWriting
+	return val
+}
+
+func mapassign_fast32ptr(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
+	if h == nil {
+		panic(plainError("assignment to entry in nil map"))
+	}
+	if raceenabled {
+		callerpc := getcallerpc()
+		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast32))
+	}
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map writes")
+	}
+	hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
+
+	// Set hashWriting after calling alg.hash for consistency with mapassign.
+	h.flags |= hashWriting
+
+	if h.buckets == nil {
+		h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
+	}
+
+again:
+	bucket := hash & bucketMask(h.B)
+	if h.growing() {
+		growWork_fast32(t, h, bucket)
+	}
+	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
+
+	var insertb *bmap
+	var inserti uintptr
+	var insertk unsafe.Pointer
+
+	for {
+		for i := uintptr(0); i < bucketCnt; i++ {
+			if b.tophash[i] == empty {
+				if insertb == nil {
+					inserti = i
+					insertb = b
+				}
+				continue
+			}
+			k := *((*unsafe.Pointer)(add(unsafe.Pointer(b), dataOffset+i*4)))
+			if k != key {
+				continue
+			}
+			inserti = i
+			insertb = b
+			goto done
+		}
+		ovf := b.overflow(t)
+		if ovf == nil {
+			break
+		}
+		b = ovf
+	}
+
+	// Did not find mapping for key. Allocate new cell & add entry.
+
+	// If we hit the max load factor or we have too many overflow buckets,
+	// and we're not already in the middle of growing, start growing.
+	if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
+		hashGrow(t, h)
+		goto again // Growing the table invalidates everything, so try again
+	}
+
+	if insertb == nil {
+		// all current buckets are full, allocate a new one.
+		insertb = h.newoverflow(t, b)
+		inserti = 0 // not necessary, but avoids needlessly spilling inserti
+	}
+	insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks
+
+	insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*4)
+	// store new key at insert position
+	*(*unsafe.Pointer)(insertk) = key
+
+	h.count++
+
+done:
+	val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*4+inserti*uintptr(t.valuesize))
+	if h.flags&hashWriting == 0 {
+		throw("concurrent map writes")
+	}
+	h.flags &^= hashWriting
+	return val
+}
+
+func mapdelete_fast32(t *maptype, h *hmap, key uint32) {
+	if raceenabled && h != nil {
+		callerpc := getcallerpc()
+		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_fast32))
+	}
+	if h == nil || h.count == 0 {
+		return
+	}
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map writes")
+	}
+
+	hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
+
+	// Set hashWriting after calling alg.hash for consistency with mapdelete
+	h.flags |= hashWriting
+
+	bucket := hash & bucketMask(h.B)
+	if h.growing() {
+		growWork_fast32(t, h, bucket)
+	}
+	b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
+search:
+	for ; b != nil; b = b.overflow(t) {
+		for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) {
+			if key != *(*uint32)(k) || b.tophash[i] == empty {
+				continue
+			}
+			// Only clear key if there are pointers in it.
+			if t.key.kind&kindNoPointers == 0 {
+				memclrHasPointers(k, t.key.size)
+			}
+			v := add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize))
+			if t.elem.kind&kindNoPointers == 0 {
+				memclrHasPointers(v, t.elem.size)
+			} else {
+				memclrNoHeapPointers(v, t.elem.size)
+			}
+			b.tophash[i] = empty
+			h.count--
+			break search
+		}
+	}
+
+	if h.flags&hashWriting == 0 {
+		throw("concurrent map writes")
+	}
+	h.flags &^= hashWriting
+}
+
+func growWork_fast32(t *maptype, h *hmap, bucket uintptr) {
+	// make sure we evacuate the oldbucket corresponding
+	// to the bucket we're about to use
+	evacuate_fast32(t, h, bucket&h.oldbucketmask())
+
+	// evacuate one more oldbucket to make progress on growing
+	if h.growing() {
+		evacuate_fast32(t, h, h.nevacuate)
+	}
+}
+
+func evacuate_fast32(t *maptype, h *hmap, oldbucket uintptr) {
+	b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
+	newbit := h.noldbuckets()
+	if !evacuated(b) {
+		// TODO: reuse overflow buckets instead of using new ones, if there
+		// is no iterator using the old buckets.  (If !oldIterator.)
+
+		// xy contains the x and y (low and high) evacuation destinations.
+		var xy [2]evacDst
+		x := &xy[0]
+		x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize)))
+		x.k = add(unsafe.Pointer(x.b), dataOffset)
+		x.v = add(x.k, bucketCnt*4)
+
+		if !h.sameSizeGrow() {
+			// Only calculate y pointers if we're growing bigger.
+			// Otherwise GC can see bad pointers.
+			y := &xy[1]
+			y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize)))
+			y.k = add(unsafe.Pointer(y.b), dataOffset)
+			y.v = add(y.k, bucketCnt*4)
+		}
+
+		for ; b != nil; b = b.overflow(t) {
+			k := add(unsafe.Pointer(b), dataOffset)
+			v := add(k, bucketCnt*4)
+			for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 4), add(v, uintptr(t.valuesize)) {
+				top := b.tophash[i]
+				if top == empty {
+					b.tophash[i] = evacuatedEmpty
+					continue
+				}
+				if top < minTopHash {
+					throw("bad map state")
+				}
+				var useY uint8
+				if !h.sameSizeGrow() {
+					// Compute hash to make our evacuation decision (whether we need
+					// to send this key/value to bucket x or bucket y).
+					hash := t.key.hashfn(k, uintptr(h.hash0))
+					if hash&newbit != 0 {
+						useY = 1
+					}
+				}
+
+				b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap
+				dst := &xy[useY]                 // evacuation destination
+
+				if dst.i == bucketCnt {
+					dst.b = h.newoverflow(t, dst.b)
+					dst.i = 0
+					dst.k = add(unsafe.Pointer(dst.b), dataOffset)
+					dst.v = add(dst.k, bucketCnt*4)
+				}
+				dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check
+
+				// Copy key.
+				if sys.PtrSize == 4 && t.key.kind&kindNoPointers == 0 && writeBarrier.enabled {
+					// Write with a write barrier.
+					*(*unsafe.Pointer)(dst.k) = *(*unsafe.Pointer)(k)
+				} else {
+					*(*uint32)(dst.k) = *(*uint32)(k)
+				}
+
+				typedmemmove(t.elem, dst.v, v)
+				dst.i++
+				// These updates might push these pointers past the end of the
+				// key or value arrays.  That's ok, as we have the overflow pointer
+				// at the end of the bucket to protect against pointing past the
+				// end of the bucket.
+				dst.k = add(dst.k, 4)
+				dst.v = add(dst.v, uintptr(t.valuesize))
+			}
+		}
+		// Unlink the overflow buckets & clear key/value to help GC.
+		if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 {
+			b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))
+			// Preserve b.tophash because the evacuation
+			// state is maintained there.
+			ptr := add(b, dataOffset)
+			n := uintptr(t.bucketsize) - dataOffset
+			memclrHasPointers(ptr, n)
+		}
+	}
+
+	if oldbucket == h.nevacuate {
+		advanceEvacuationMark(h, t, newbit)
+	}
+}
diff --git a/libgo/go/runtime/map_fast64.go b/libgo/go/runtime/map_fast64.go
new file mode 100644
index 0000000..a2a51fc
--- /dev/null
+++ b/libgo/go/runtime/map_fast64.go
@@ -0,0 +1,419 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+	"runtime/internal/sys"
+	"unsafe"
+)
+
+func mapaccess1_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer {
+	if raceenabled && h != nil {
+		callerpc := getcallerpc()
+		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_fast64))
+	}
+	if h == nil || h.count == 0 {
+		return unsafe.Pointer(&zeroVal[0])
+	}
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map read and map write")
+	}
+	var b *bmap
+	if h.B == 0 {
+		// One-bucket table. No need to hash.
+		b = (*bmap)(h.buckets)
+	} else {
+		hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
+		m := bucketMask(h.B)
+		b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
+		if c := h.oldbuckets; c != nil {
+			if !h.sameSizeGrow() {
+				// There used to be half as many buckets; mask down one more power of two.
+				m >>= 1
+			}
+			oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
+			if !evacuated(oldb) {
+				b = oldb
+			}
+		}
+	}
+	for ; b != nil; b = b.overflow(t) {
+		for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) {
+			if *(*uint64)(k) == key && b.tophash[i] != empty {
+				return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize))
+			}
+		}
+	}
+	return unsafe.Pointer(&zeroVal[0])
+}
+
+func mapaccess2_fast64(t *maptype, h *hmap, key uint64) (unsafe.Pointer, bool) {
+	if raceenabled && h != nil {
+		callerpc := getcallerpc()
+		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_fast64))
+	}
+	if h == nil || h.count == 0 {
+		return unsafe.Pointer(&zeroVal[0]), false
+	}
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map read and map write")
+	}
+	var b *bmap
+	if h.B == 0 {
+		// One-bucket table. No need to hash.
+		b = (*bmap)(h.buckets)
+	} else {
+		hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
+		m := bucketMask(h.B)
+		b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
+		if c := h.oldbuckets; c != nil {
+			if !h.sameSizeGrow() {
+				// There used to be half as many buckets; mask down one more power of two.
+				m >>= 1
+			}
+			oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
+			if !evacuated(oldb) {
+				b = oldb
+			}
+		}
+	}
+	for ; b != nil; b = b.overflow(t) {
+		for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) {
+			if *(*uint64)(k) == key && b.tophash[i] != empty {
+				return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize)), true
+			}
+		}
+	}
+	return unsafe.Pointer(&zeroVal[0]), false
+}
+
+func mapassign_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer {
+	if h == nil {
+		panic(plainError("assignment to entry in nil map"))
+	}
+	if raceenabled {
+		callerpc := getcallerpc()
+		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast64))
+	}
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map writes")
+	}
+	hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
+
+	// Set hashWriting after calling alg.hash for consistency with mapassign.
+	h.flags |= hashWriting
+
+	if h.buckets == nil {
+		h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
+	}
+
+again:
+	bucket := hash & bucketMask(h.B)
+	if h.growing() {
+		growWork_fast64(t, h, bucket)
+	}
+	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
+
+	var insertb *bmap
+	var inserti uintptr
+	var insertk unsafe.Pointer
+
+	for {
+		for i := uintptr(0); i < bucketCnt; i++ {
+			if b.tophash[i] == empty {
+				if insertb == nil {
+					insertb = b
+					inserti = i
+				}
+				continue
+			}
+			k := *((*uint64)(add(unsafe.Pointer(b), dataOffset+i*8)))
+			if k != key {
+				continue
+			}
+			insertb = b
+			inserti = i
+			goto done
+		}
+		ovf := b.overflow(t)
+		if ovf == nil {
+			break
+		}
+		b = ovf
+	}
+
+	// Did not find mapping for key. Allocate new cell & add entry.
+
+	// If we hit the max load factor or we have too many overflow buckets,
+	// and we're not already in the middle of growing, start growing.
+	if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
+		hashGrow(t, h)
+		goto again // Growing the table invalidates everything, so try again
+	}
+
+	if insertb == nil {
+		// all current buckets are full, allocate a new one.
+		insertb = h.newoverflow(t, b)
+		inserti = 0 // not necessary, but avoids needlessly spilling inserti
+	}
+	insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks
+
+	insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*8)
+	// store new key at insert position
+	*(*uint64)(insertk) = key
+
+	h.count++
+
+done:
+	val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*8+inserti*uintptr(t.valuesize))
+	if h.flags&hashWriting == 0 {
+		throw("concurrent map writes")
+	}
+	h.flags &^= hashWriting
+	return val
+}
+
+func mapassign_fast64ptr(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
+	if h == nil {
+		panic(plainError("assignment to entry in nil map"))
+	}
+	if raceenabled {
+		callerpc := getcallerpc()
+		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast64))
+	}
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map writes")
+	}
+	hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
+
+	// Set hashWriting after calling alg.hash for consistency with mapassign.
+	h.flags |= hashWriting
+
+	if h.buckets == nil {
+		h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
+	}
+
+again:
+	bucket := hash & bucketMask(h.B)
+	if h.growing() {
+		growWork_fast64(t, h, bucket)
+	}
+	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
+
+	var insertb *bmap
+	var inserti uintptr
+	var insertk unsafe.Pointer
+
+	for {
+		for i := uintptr(0); i < bucketCnt; i++ {
+			if b.tophash[i] == empty {
+				if insertb == nil {
+					insertb = b
+					inserti = i
+				}
+				continue
+			}
+			k := *((*unsafe.Pointer)(add(unsafe.Pointer(b), dataOffset+i*8)))
+			if k != key {
+				continue
+			}
+			insertb = b
+			inserti = i
+			goto done
+		}
+		ovf := b.overflow(t)
+		if ovf == nil {
+			break
+		}
+		b = ovf
+	}
+
+	// Did not find mapping for key. Allocate new cell & add entry.
+
+	// If we hit the max load factor or we have too many overflow buckets,
+	// and we're not already in the middle of growing, start growing.
+	if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
+		hashGrow(t, h)
+		goto again // Growing the table invalidates everything, so try again
+	}
+
+	if insertb == nil {
+		// all current buckets are full, allocate a new one.
+		insertb = h.newoverflow(t, b)
+		inserti = 0 // not necessary, but avoids needlessly spilling inserti
+	}
+	insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks
+
+	insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*8)
+	// store new key at insert position
+	*(*unsafe.Pointer)(insertk) = key
+
+	h.count++
+
+done:
+	val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*8+inserti*uintptr(t.valuesize))
+	if h.flags&hashWriting == 0 {
+		throw("concurrent map writes")
+	}
+	h.flags &^= hashWriting
+	return val
+}
+
+func mapdelete_fast64(t *maptype, h *hmap, key uint64) {
+	if raceenabled && h != nil {
+		callerpc := getcallerpc()
+		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_fast64))
+	}
+	if h == nil || h.count == 0 {
+		return
+	}
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map writes")
+	}
+
+	hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
+
+	// Set hashWriting after calling alg.hash for consistency with mapdelete
+	h.flags |= hashWriting
+
+	bucket := hash & bucketMask(h.B)
+	if h.growing() {
+		growWork_fast64(t, h, bucket)
+	}
+	b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
+search:
+	for ; b != nil; b = b.overflow(t) {
+		for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) {
+			if key != *(*uint64)(k) || b.tophash[i] == empty {
+				continue
+			}
+			// Only clear key if there are pointers in it.
+			if t.key.kind&kindNoPointers == 0 {
+				memclrHasPointers(k, t.key.size)
+			}
+			v := add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize))
+			if t.elem.kind&kindNoPointers == 0 {
+				memclrHasPointers(v, t.elem.size)
+			} else {
+				memclrNoHeapPointers(v, t.elem.size)
+			}
+			b.tophash[i] = empty
+			h.count--
+			break search
+		}
+	}
+
+	if h.flags&hashWriting == 0 {
+		throw("concurrent map writes")
+	}
+	h.flags &^= hashWriting
+}
+
+func growWork_fast64(t *maptype, h *hmap, bucket uintptr) {
+	// make sure we evacuate the oldbucket corresponding
+	// to the bucket we're about to use
+	evacuate_fast64(t, h, bucket&h.oldbucketmask())
+
+	// evacuate one more oldbucket to make progress on growing
+	if h.growing() {
+		evacuate_fast64(t, h, h.nevacuate)
+	}
+}
+
+func evacuate_fast64(t *maptype, h *hmap, oldbucket uintptr) {
+	b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
+	newbit := h.noldbuckets()
+	if !evacuated(b) {
+		// TODO: reuse overflow buckets instead of using new ones, if there
+		// is no iterator using the old buckets.  (If !oldIterator.)
+
+		// xy contains the x and y (low and high) evacuation destinations.
+		var xy [2]evacDst
+		x := &xy[0]
+		x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize)))
+		x.k = add(unsafe.Pointer(x.b), dataOffset)
+		x.v = add(x.k, bucketCnt*8)
+
+		if !h.sameSizeGrow() {
+			// Only calculate y pointers if we're growing bigger.
+			// Otherwise GC can see bad pointers.
+			y := &xy[1]
+			y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize)))
+			y.k = add(unsafe.Pointer(y.b), dataOffset)
+			y.v = add(y.k, bucketCnt*8)
+		}
+
+		for ; b != nil; b = b.overflow(t) {
+			k := add(unsafe.Pointer(b), dataOffset)
+			v := add(k, bucketCnt*8)
+			for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 8), add(v, uintptr(t.valuesize)) {
+				top := b.tophash[i]
+				if top == empty {
+					b.tophash[i] = evacuatedEmpty
+					continue
+				}
+				if top < minTopHash {
+					throw("bad map state")
+				}
+				var useY uint8
+				if !h.sameSizeGrow() {
+					// Compute hash to make our evacuation decision (whether we need
+					// to send this key/value to bucket x or bucket y).
+					hash := t.key.hashfn(k, uintptr(h.hash0))
+					if hash&newbit != 0 {
+						useY = 1
+					}
+				}
+
+				b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap
+				dst := &xy[useY]                 // evacuation destination
+
+				if dst.i == bucketCnt {
+					dst.b = h.newoverflow(t, dst.b)
+					dst.i = 0
+					dst.k = add(unsafe.Pointer(dst.b), dataOffset)
+					dst.v = add(dst.k, bucketCnt*8)
+				}
+				dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check
+
+				// Copy key.
+				if t.key.kind&kindNoPointers == 0 && writeBarrier.enabled {
+					if sys.PtrSize == 8 {
+						// Write with a write barrier.
+						*(*unsafe.Pointer)(dst.k) = *(*unsafe.Pointer)(k)
+					} else {
+						// There are three ways to squeeze at least one 32 bit pointer into 64 bits.
+						// Give up and call typedmemmove.
+						typedmemmove(t.key, dst.k, k)
+					}
+				} else {
+					*(*uint64)(dst.k) = *(*uint64)(k)
+				}
+
+				typedmemmove(t.elem, dst.v, v)
+				dst.i++
+				// These updates might push these pointers past the end of the
+				// key or value arrays.  That's ok, as we have the overflow pointer
+				// at the end of the bucket to protect against pointing past the
+				// end of the bucket.
+				dst.k = add(dst.k, 8)
+				dst.v = add(dst.v, uintptr(t.valuesize))
+			}
+		}
+		// Unlink the overflow buckets & clear key/value to help GC.
+		if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 {
+			b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))
+			// Preserve b.tophash because the evacuation
+			// state is maintained there.
+			ptr := add(b, dataOffset)
+			n := uintptr(t.bucketsize) - dataOffset
+			memclrHasPointers(ptr, n)
+		}
+	}
+
+	if oldbucket == h.nevacuate {
+		advanceEvacuationMark(h, t, newbit)
+	}
+}
diff --git a/libgo/go/runtime/map_faststr.go b/libgo/go/runtime/map_faststr.go
new file mode 100644
index 0000000..5812b3f
--- /dev/null
+++ b/libgo/go/runtime/map_faststr.go
@@ -0,0 +1,430 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+	"runtime/internal/sys"
+	"unsafe"
+)
+
+func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer {
+	if raceenabled && h != nil {
+		callerpc := getcallerpc()
+		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_faststr))
+	}
+	if h == nil || h.count == 0 {
+		return unsafe.Pointer(&zeroVal[0])
+	}
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map read and map write")
+	}
+	key := stringStructOf(&ky)
+	if h.B == 0 {
+		// One-bucket table.
+		b := (*bmap)(h.buckets)
+		if key.len < 32 {
+			// short key, doing lots of comparisons is ok
+			for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+				k := (*stringStruct)(kptr)
+				if k.len != key.len || b.tophash[i] == empty {
+					continue
+				}
+				if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
+					return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize))
+				}
+			}
+			return unsafe.Pointer(&zeroVal[0])
+		}
+		// long key, try not to do more comparisons than necessary
+		keymaybe := uintptr(bucketCnt)
+		for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+			k := (*stringStruct)(kptr)
+			if k.len != key.len || b.tophash[i] == empty {
+				continue
+			}
+			if k.str == key.str {
+				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize))
+			}
+			// check first 4 bytes
+			if *((*[4]byte)(key.str)) != *((*[4]byte)(k.str)) {
+				continue
+			}
+			// check last 4 bytes
+			if *((*[4]byte)(add(key.str, uintptr(key.len)-4))) != *((*[4]byte)(add(k.str, uintptr(key.len)-4))) {
+				continue
+			}
+			if keymaybe != bucketCnt {
+				// Two keys are potential matches. Use hash to distinguish them.
+				goto dohash
+			}
+			keymaybe = i
+		}
+		if keymaybe != bucketCnt {
+			k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+keymaybe*2*sys.PtrSize))
+			if memequal(k.str, key.str, uintptr(key.len)) {
+				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+keymaybe*uintptr(t.valuesize))
+			}
+		}
+		return unsafe.Pointer(&zeroVal[0])
+	}
+dohash:
+	hash := t.key.hashfn(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0))
+	m := bucketMask(h.B)
+	b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
+	if c := h.oldbuckets; c != nil {
+		if !h.sameSizeGrow() {
+			// There used to be half as many buckets; mask down one more power of two.
+			m >>= 1
+		}
+		oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
+		if !evacuated(oldb) {
+			b = oldb
+		}
+	}
+	top := tophash(hash)
+	for ; b != nil; b = b.overflow(t) {
+		for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+			k := (*stringStruct)(kptr)
+			if k.len != key.len || b.tophash[i] != top {
+				continue
+			}
+			if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
+				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize))
+			}
+		}
+	}
+	return unsafe.Pointer(&zeroVal[0])
+}
+
+func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) {
+	if raceenabled && h != nil {
+		callerpc := getcallerpc()
+		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_faststr))
+	}
+	if h == nil || h.count == 0 {
+		return unsafe.Pointer(&zeroVal[0]), false
+	}
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map read and map write")
+	}
+	key := stringStructOf(&ky)
+	if h.B == 0 {
+		// One-bucket table.
+		b := (*bmap)(h.buckets)
+		if key.len < 32 {
+			// short key, doing lots of comparisons is ok
+			for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+				k := (*stringStruct)(kptr)
+				if k.len != key.len || b.tophash[i] == empty {
+					continue
+				}
+				if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
+					return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)), true
+				}
+			}
+			return unsafe.Pointer(&zeroVal[0]), false
+		}
+		// long key, try not to do more comparisons than necessary
+		keymaybe := uintptr(bucketCnt)
+		for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+			k := (*stringStruct)(kptr)
+			if k.len != key.len || b.tophash[i] == empty {
+				continue
+			}
+			if k.str == key.str {
+				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)), true
+			}
+			// check first 4 bytes
+			if *((*[4]byte)(key.str)) != *((*[4]byte)(k.str)) {
+				continue
+			}
+			// check last 4 bytes
+			if *((*[4]byte)(add(key.str, uintptr(key.len)-4))) != *((*[4]byte)(add(k.str, uintptr(key.len)-4))) {
+				continue
+			}
+			if keymaybe != bucketCnt {
+				// Two keys are potential matches. Use hash to distinguish them.
+				goto dohash
+			}
+			keymaybe = i
+		}
+		if keymaybe != bucketCnt {
+			k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+keymaybe*2*sys.PtrSize))
+			if memequal(k.str, key.str, uintptr(key.len)) {
+				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+keymaybe*uintptr(t.valuesize)), true
+			}
+		}
+		return unsafe.Pointer(&zeroVal[0]), false
+	}
+dohash:
+	hash := t.key.hashfn(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0))
+	m := bucketMask(h.B)
+	b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
+	if c := h.oldbuckets; c != nil {
+		if !h.sameSizeGrow() {
+			// There used to be half as many buckets; mask down one more power of two.
+			m >>= 1
+		}
+		oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
+		if !evacuated(oldb) {
+			b = oldb
+		}
+	}
+	top := tophash(hash)
+	for ; b != nil; b = b.overflow(t) {
+		for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+			k := (*stringStruct)(kptr)
+			if k.len != key.len || b.tophash[i] != top {
+				continue
+			}
+			if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
+				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)), true
+			}
+		}
+	}
+	return unsafe.Pointer(&zeroVal[0]), false
+}
+
+func mapassign_faststr(t *maptype, h *hmap, s string) unsafe.Pointer {
+	if h == nil {
+		panic(plainError("assignment to entry in nil map"))
+	}
+	if raceenabled {
+		callerpc := getcallerpc()
+		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_faststr))
+	}
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map writes")
+	}
+	key := stringStructOf(&s)
+	hash := t.key.hashfn(noescape(unsafe.Pointer(&s)), uintptr(h.hash0))
+
+	// Set hashWriting after calling alg.hash for consistency with mapassign.
+	h.flags |= hashWriting
+
+	if h.buckets == nil {
+		h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
+	}
+
+again:
+	bucket := hash & bucketMask(h.B)
+	if h.growing() {
+		growWork_faststr(t, h, bucket)
+	}
+	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
+	top := tophash(hash)
+
+	var insertb *bmap
+	var inserti uintptr
+	var insertk unsafe.Pointer
+
+	for {
+		for i := uintptr(0); i < bucketCnt; i++ {
+			if b.tophash[i] != top {
+				if b.tophash[i] == empty && insertb == nil {
+					insertb = b
+					inserti = i
+				}
+				continue
+			}
+			k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize))
+			if k.len != key.len {
+				continue
+			}
+			if k.str != key.str && !memequal(k.str, key.str, uintptr(key.len)) {
+				continue
+			}
+			// already have a mapping for key. Update it.
+			inserti = i
+			insertb = b
+			goto done
+		}
+		ovf := b.overflow(t)
+		if ovf == nil {
+			break
+		}
+		b = ovf
+	}
+
+	// Did not find mapping for key. Allocate new cell & add entry.
+
+	// If we hit the max load factor or we have too many overflow buckets,
+	// and we're not already in the middle of growing, start growing.
+	if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
+		hashGrow(t, h)
+		goto again // Growing the table invalidates everything, so try again
+	}
+
+	if insertb == nil {
+		// all current buckets are full, allocate a new one.
+		insertb = h.newoverflow(t, b)
+		inserti = 0 // not necessary, but avoids needlessly spilling inserti
+	}
+	insertb.tophash[inserti&(bucketCnt-1)] = top // mask inserti to avoid bounds checks
+
+	insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*2*sys.PtrSize)
+	// store new key at insert position
+	*((*stringStruct)(insertk)) = *key
+	h.count++
+
+done:
+	val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*2*sys.PtrSize+inserti*uintptr(t.valuesize))
+	if h.flags&hashWriting == 0 {
+		throw("concurrent map writes")
+	}
+	h.flags &^= hashWriting
+	return val
+}
+
+func mapdelete_faststr(t *maptype, h *hmap, ky string) {
+	if raceenabled && h != nil {
+		callerpc := getcallerpc()
+		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_faststr))
+	}
+	if h == nil || h.count == 0 {
+		return
+	}
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map writes")
+	}
+
+	key := stringStructOf(&ky)
+	hash := t.key.hashfn(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0))
+
+	// Set hashWriting after calling alg.hash for consistency with mapdelete
+	h.flags |= hashWriting
+
+	bucket := hash & bucketMask(h.B)
+	if h.growing() {
+		growWork_faststr(t, h, bucket)
+	}
+	b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
+	top := tophash(hash)
+search:
+	for ; b != nil; b = b.overflow(t) {
+		for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+			k := (*stringStruct)(kptr)
+			if k.len != key.len || b.tophash[i] != top {
+				continue
+			}
+			if k.str != key.str && !memequal(k.str, key.str, uintptr(key.len)) {
+				continue
+			}
+			// Clear key's pointer.
+			k.str = nil
+			v := add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize))
+			if t.elem.kind&kindNoPointers == 0 {
+				memclrHasPointers(v, t.elem.size)
+			} else {
+				memclrNoHeapPointers(v, t.elem.size)
+			}
+			b.tophash[i] = empty
+			h.count--
+			break search
+		}
+	}
+
+	if h.flags&hashWriting == 0 {
+		throw("concurrent map writes")
+	}
+	h.flags &^= hashWriting
+}
+
+func growWork_faststr(t *maptype, h *hmap, bucket uintptr) {
+	// make sure we evacuate the oldbucket corresponding
+	// to the bucket we're about to use
+	evacuate_faststr(t, h, bucket&h.oldbucketmask())
+
+	// evacuate one more oldbucket to make progress on growing
+	if h.growing() {
+		evacuate_faststr(t, h, h.nevacuate)
+	}
+}
+
+func evacuate_faststr(t *maptype, h *hmap, oldbucket uintptr) {
+	b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
+	newbit := h.noldbuckets()
+	if !evacuated(b) {
+		// TODO: reuse overflow buckets instead of using new ones, if there
+		// is no iterator using the old buckets.  (If !oldIterator.)
+
+		// xy contains the x and y (low and high) evacuation destinations.
+		var xy [2]evacDst
+		x := &xy[0]
+		x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize)))
+		x.k = add(unsafe.Pointer(x.b), dataOffset)
+		x.v = add(x.k, bucketCnt*2*sys.PtrSize)
+
+		if !h.sameSizeGrow() {
+			// Only calculate y pointers if we're growing bigger.
+			// Otherwise GC can see bad pointers.
+			y := &xy[1]
+			y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize)))
+			y.k = add(unsafe.Pointer(y.b), dataOffset)
+			y.v = add(y.k, bucketCnt*2*sys.PtrSize)
+		}
+
+		for ; b != nil; b = b.overflow(t) {
+			k := add(unsafe.Pointer(b), dataOffset)
+			v := add(k, bucketCnt*2*sys.PtrSize)
+			for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 2*sys.PtrSize), add(v, uintptr(t.valuesize)) {
+				top := b.tophash[i]
+				if top == empty {
+					b.tophash[i] = evacuatedEmpty
+					continue
+				}
+				if top < minTopHash {
+					throw("bad map state")
+				}
+				var useY uint8
+				if !h.sameSizeGrow() {
+					// Compute hash to make our evacuation decision (whether we need
+					// to send this key/value to bucket x or bucket y).
+					hash := t.key.hashfn(k, uintptr(h.hash0))
+					if hash&newbit != 0 {
+						useY = 1
+					}
+				}
+
+				b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap
+				dst := &xy[useY]                 // evacuation destination
+
+				if dst.i == bucketCnt {
+					dst.b = h.newoverflow(t, dst.b)
+					dst.i = 0
+					dst.k = add(unsafe.Pointer(dst.b), dataOffset)
+					dst.v = add(dst.k, bucketCnt*2*sys.PtrSize)
+				}
+				dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check
+
+				// Copy key.
+				*(*string)(dst.k) = *(*string)(k)
+
+				typedmemmove(t.elem, dst.v, v)
+				dst.i++
+				// These updates might push these pointers past the end of the
+				// key or value arrays.  That's ok, as we have the overflow pointer
+				// at the end of the bucket to protect against pointing past the
+				// end of the bucket.
+				dst.k = add(dst.k, 2*sys.PtrSize)
+				dst.v = add(dst.v, uintptr(t.valuesize))
+			}
+		}
+		// Unlink the overflow buckets & clear key/value to help GC.
+		// Unlink the overflow buckets & clear key/value to help GC.
+		if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 {
+			b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))
+			// Preserve b.tophash because the evacuation
+			// state is maintained there.
+			ptr := add(b, dataOffset)
+			n := uintptr(t.bucketsize) - dataOffset
+			memclrHasPointers(ptr, n)
+		}
+	}
+
+	if oldbucket == h.nevacuate {
+		advanceEvacuationMark(h, t, newbit)
+	}
+}
diff --git a/libgo/go/runtime/map_test.go b/libgo/go/runtime/map_test.go
index 6d7097e..13f1d2e 100644
--- a/libgo/go/runtime/map_test.go
+++ b/libgo/go/runtime/map_test.go
@@ -9,6 +9,7 @@ import (
 	"math"
 	"reflect"
 	"runtime"
+	"runtime/internal/sys"
 	"sort"
 	"strconv"
 	"strings"
@@ -16,6 +17,17 @@ import (
 	"testing"
 )
 
+func TestHmapSize(t *testing.T) {
+	// The structure of hmap is defined in runtime/map.go
+	// and in cmd/compile/internal/gc/reflect.go and must be in sync.
+	// The size of hmap should be 48 bytes on 64 bit and 28 bytes on 32 bit platforms.
+	var hmapSize = uintptr(8 + 5*sys.PtrSize)
+	if runtime.RuntimeHmapSize != hmapSize {
+		t.Errorf("sizeof(runtime.hmap{})==%d, want %d", runtime.RuntimeHmapSize, hmapSize)
+	}
+
+}
+
 // negative zero is a good test because:
 //  1) 0 and -0 are equal, yet have distinct representations.
 //  2) 0 is represented as all zeros, -0 isn't.
@@ -52,14 +64,7 @@ func TestNegativeZero(t *testing.T) {
 	}
 }
 
-// nan is a good test because nan != nan, and nan has
-// a randomized hash value.
-func TestNan(t *testing.T) {
-	m := make(map[float64]int, 0)
-	nan := math.NaN()
-	m[nan] = 1
-	m[nan] = 2
-	m[nan] = 4
+func testMapNan(t *testing.T, m map[float64]int) {
 	if len(m) != 3 {
 		t.Error("length wrong")
 	}
@@ -78,6 +83,67 @@ func TestNan(t *testing.T) {
 	}
 }
 
+// nan is a good test because nan != nan, and nan has
+// a randomized hash value.
+func TestMapAssignmentNan(t *testing.T) {
+	m := make(map[float64]int, 0)
+	nan := math.NaN()
+
+	// Test assignment.
+	m[nan] = 1
+	m[nan] = 2
+	m[nan] = 4
+	testMapNan(t, m)
+}
+
+// nan is a good test because nan != nan, and nan has
+// a randomized hash value.
+func TestMapOperatorAssignmentNan(t *testing.T) {
+	m := make(map[float64]int, 0)
+	nan := math.NaN()
+
+	// Test assignment operations.
+	m[nan] += 1
+	m[nan] += 2
+	m[nan] += 4
+	testMapNan(t, m)
+}
+
+func TestMapOperatorAssignment(t *testing.T) {
+	m := make(map[int]int, 0)
+
+	// "m[k] op= x" is rewritten into "m[k] = m[k] op x"
+	// differently when op is / or % than when it isn't.
+	// Simple test to make sure they all work as expected.
+	m[0] = 12345
+	m[0] += 67890
+	m[0] /= 123
+	m[0] %= 456
+
+	const want = (12345 + 67890) / 123 % 456
+	if got := m[0]; got != want {
+		t.Errorf("got %d, want %d", got, want)
+	}
+}
+
+var sinkAppend bool
+
+func TestMapAppendAssignment(t *testing.T) {
+	m := make(map[int][]int, 0)
+
+	m[0] = nil
+	m[0] = append(m[0], 12345)
+	m[0] = append(m[0], 67890)
+	sinkAppend, m[0] = !sinkAppend, append(m[0], 123, 456)
+	a := []int{7, 8, 9, 0}
+	m[0] = append(m[0], a...)
+
+	want := []int{12345, 67890, 123, 456, 7, 8, 9, 0}
+	if got := m[0]; !reflect.DeepEqual(got, want) {
+		t.Errorf("got %v, want %v", got, want)
+	}
+}
+
 // Maps aren't actually copied on assignment.
 func TestAlias(t *testing.T) {
 	m := make(map[int]int, 0)
@@ -92,18 +158,25 @@ func TestAlias(t *testing.T) {
 func TestGrowWithNaN(t *testing.T) {
 	m := make(map[float64]int, 4)
 	nan := math.NaN()
+
+	// Use both assignment and assignment operations as they may
+	// behave differently.
 	m[nan] = 1
 	m[nan] = 2
-	m[nan] = 4
+	m[nan] += 4
+
 	cnt := 0
 	s := 0
 	growflag := true
 	for k, v := range m {
 		if growflag {
 			// force a hashtable resize
-			for i := 0; i < 100; i++ {
+			for i := 0; i < 50; i++ {
 				m[float64(i)] = i
 			}
+			for i := 50; i < 100; i++ {
+				m[float64(i)] += i
+			}
 			growflag = false
 		}
 		if k != k {
@@ -128,8 +201,8 @@ func TestGrowWithNegativeZero(t *testing.T) {
 	negzero := math.Copysign(0.0, -1.0)
 	m := make(map[FloatInt]int, 4)
 	m[FloatInt{0.0, 0}] = 1
-	m[FloatInt{0.0, 1}] = 2
-	m[FloatInt{0.0, 2}] = 4
+	m[FloatInt{0.0, 1}] += 2
+	m[FloatInt{0.0, 2}] += 4
 	m[FloatInt{0.0, 3}] = 8
 	growflag := true
 	s := 0
@@ -211,9 +284,12 @@ func TestIterGrowAndDelete(t *testing.T) {
 // an iterator is still using them.
 func TestIterGrowWithGC(t *testing.T) {
 	m := make(map[int]int, 4)
-	for i := 0; i < 16; i++ {
+	for i := 0; i < 8; i++ {
 		m[i] = i
 	}
+	for i := 8; i < 16; i++ {
+		m[i] += i
+	}
 	growflag := true
 	bitmask := 0
 	for k := range m {
@@ -364,11 +440,11 @@ func TestEmptyKeyAndValue(t *testing.T) {
 // ("quick keys") as well as long keys.
 func TestSingleBucketMapStringKeys_DupLen(t *testing.T) {
 	testMapLookups(t, map[string]string{
-		"x":    "x1val",
-		"xx":   "x2val",
-		"foo":  "fooval",
-		"bar":  "barval", // same key length as "foo"
-		"xxxx": "x4val",
+		"x":                      "x1val",
+		"xx":                     "x2val",
+		"foo":                    "fooval",
+		"bar":                    "barval", // same key length as "foo"
+		"xxxx":                   "x4val",
 		strings.Repeat("x", 128): "longval1",
 		strings.Repeat("y", 128): "longval2",
 	})
@@ -627,7 +703,7 @@ func TestMapBuckets(t *testing.T) {
 	// have a nil bucket pointer due to starting with preallocated buckets
 	// on the stack. Escaping maps start with a non-nil bucket pointer if
 	// hint size is above bucketCnt and thereby have more than one bucket.
-	// These tests depend on bucketCnt and loadFactor* in hashmap.go.
+	// These tests depend on bucketCnt and loadFactor* in map.go.
 	t.Run("mapliteral", func(t *testing.T) {
 		for _, tt := range mapBucketTests {
 			localMap := map[int]int{}
@@ -802,6 +878,23 @@ func benchmarkMapAssignInt32(b *testing.B, n int) {
 	}
 }
 
+func benchmarkMapOperatorAssignInt32(b *testing.B, n int) {
+	a := make(map[int32]int)
+	for i := 0; i < b.N; i++ {
+		a[int32(i&(n-1))] += i
+	}
+}
+
+func benchmarkMapAppendAssignInt32(b *testing.B, n int) {
+	a := make(map[int32][]int)
+	b.ReportAllocs()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		key := int32(i & (n - 1))
+		a[key] = append(a[key], i)
+	}
+}
+
 func benchmarkMapDeleteInt32(b *testing.B, n int) {
 	a := make(map[int32]int, n)
 	b.ResetTimer()
@@ -824,6 +917,23 @@ func benchmarkMapAssignInt64(b *testing.B, n int) {
 	}
 }
 
+func benchmarkMapOperatorAssignInt64(b *testing.B, n int) {
+	a := make(map[int64]int)
+	for i := 0; i < b.N; i++ {
+		a[int64(i&(n-1))] += i
+	}
+}
+
+func benchmarkMapAppendAssignInt64(b *testing.B, n int) {
+	a := make(map[int64][]int)
+	b.ReportAllocs()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		key := int64(i & (n - 1))
+		a[key] = append(a[key], i)
+	}
+}
+
 func benchmarkMapDeleteInt64(b *testing.B, n int) {
 	a := make(map[int64]int, n)
 	b.ResetTimer()
@@ -851,6 +961,33 @@ func benchmarkMapAssignStr(b *testing.B, n int) {
 	}
 }
 
+func benchmarkMapOperatorAssignStr(b *testing.B, n int) {
+	k := make([]string, n)
+	for i := 0; i < len(k); i++ {
+		k[i] = strconv.Itoa(i)
+	}
+	b.ResetTimer()
+	a := make(map[string]string)
+	for i := 0; i < b.N; i++ {
+		key := k[i&(n-1)]
+		a[key] += key
+	}
+}
+
+func benchmarkMapAppendAssignStr(b *testing.B, n int) {
+	k := make([]string, n)
+	for i := 0; i < len(k); i++ {
+		k[i] = strconv.Itoa(i)
+	}
+	a := make(map[string][]string)
+	b.ReportAllocs()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		key := k[i&(n-1)]
+		a[key] = append(a[key], key)
+	}
+}
+
 func benchmarkMapDeleteStr(b *testing.B, n int) {
 	i2s := make([]string, n)
 	for i := 0; i < n; i++ {
@@ -886,8 +1023,127 @@ func BenchmarkMapAssign(b *testing.B) {
 	b.Run("Str", runWith(benchmarkMapAssignStr, 1<<8, 1<<16))
 }
 
+func BenchmarkMapOperatorAssign(b *testing.B) {
+	b.Run("Int32", runWith(benchmarkMapOperatorAssignInt32, 1<<8, 1<<16))
+	b.Run("Int64", runWith(benchmarkMapOperatorAssignInt64, 1<<8, 1<<16))
+	b.Run("Str", runWith(benchmarkMapOperatorAssignStr, 1<<8, 1<<16))
+}
+
+func BenchmarkMapAppendAssign(b *testing.B) {
+	b.Run("Int32", runWith(benchmarkMapAppendAssignInt32, 1<<8, 1<<16))
+	b.Run("Int64", runWith(benchmarkMapAppendAssignInt64, 1<<8, 1<<16))
+	b.Run("Str", runWith(benchmarkMapAppendAssignStr, 1<<8, 1<<16))
+}
+
 func BenchmarkMapDelete(b *testing.B) {
 	b.Run("Int32", runWith(benchmarkMapDeleteInt32, 100, 1000, 10000))
 	b.Run("Int64", runWith(benchmarkMapDeleteInt64, 100, 1000, 10000))
 	b.Run("Str", runWith(benchmarkMapDeleteStr, 100, 1000, 10000))
 }
+
+func TestDeferDeleteSlow(t *testing.T) {
+	ks := []complex128{0, 1, 2, 3}
+
+	m := make(map[interface{}]int)
+	for i, k := range ks {
+		m[k] = i
+	}
+	if len(m) != len(ks) {
+		t.Errorf("want %d elements, got %d", len(ks), len(m))
+	}
+
+	func() {
+		for _, k := range ks {
+			defer delete(m, k)
+		}
+	}()
+	if len(m) != 0 {
+		t.Errorf("want 0 elements, got %d", len(m))
+	}
+}
+
+// TestIncrementAfterDeleteValueInt and other test Issue 25936.
+// Value types int, int32, int64 are affected. Value type string
+// works as expected.
+func TestIncrementAfterDeleteValueInt(t *testing.T) {
+	const key1 = 12
+	const key2 = 13
+
+	m := make(map[int]int)
+	m[key1] = 99
+	delete(m, key1)
+	m[key2]++
+	if n2 := m[key2]; n2 != 1 {
+		t.Errorf("incremented 0 to %d", n2)
+	}
+}
+
+func TestIncrementAfterDeleteValueInt32(t *testing.T) {
+	const key1 = 12
+	const key2 = 13
+
+	m := make(map[int]int32)
+	m[key1] = 99
+	delete(m, key1)
+	m[key2]++
+	if n2 := m[key2]; n2 != 1 {
+		t.Errorf("incremented 0 to %d", n2)
+	}
+}
+
+func TestIncrementAfterDeleteValueInt64(t *testing.T) {
+	const key1 = 12
+	const key2 = 13
+
+	m := make(map[int]int64)
+	m[key1] = 99
+	delete(m, key1)
+	m[key2]++
+	if n2 := m[key2]; n2 != 1 {
+		t.Errorf("incremented 0 to %d", n2)
+	}
+}
+
+func TestIncrementAfterDeleteKeyStringValueInt(t *testing.T) {
+	const key1 = ""
+	const key2 = "x"
+
+	m := make(map[string]int)
+	m[key1] = 99
+	delete(m, key1)
+	m[key2] += 1
+	if n2 := m[key2]; n2 != 1 {
+		t.Errorf("incremented 0 to %d", n2)
+	}
+}
+
+func TestIncrementAfterDeleteKeyValueString(t *testing.T) {
+	const key1 = ""
+	const key2 = "x"
+
+	m := make(map[string]string)
+	m[key1] = "99"
+	delete(m, key1)
+	m[key2] += "1"
+	if n2 := m[key2]; n2 != "1" {
+		t.Errorf("appended '1' to empty (nil) string, got %s", n2)
+	}
+}
+
+// TestIncrementAfterBulkClearKeyStringValueInt tests that map bulk
+// deletion (mapclear) still works as expected. Note that it was not
+// affected by Issue 25936.
+func TestIncrementAfterBulkClearKeyStringValueInt(t *testing.T) {
+	const key1 = ""
+	const key2 = "x"
+
+	m := make(map[string]int)
+	m[key1] = 99
+	for k := range m {
+		delete(m, k)
+	}
+	m[key2]++
+	if n2 := m[key2]; n2 != 1 {
+		t.Errorf("incremented 0 to %d", n2)
+	}
+}
diff --git a/libgo/go/runtime/mapspeed_test.go b/libgo/go/runtime/mapspeed_test.go
deleted file mode 100644
index aec0c51..0000000
--- a/libgo/go/runtime/mapspeed_test.go
+++ /dev/null
@@ -1,343 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-package runtime_test
-
-import (
-	"fmt"
-	"strconv"
-	"strings"
-	"testing"
-)
-
-const size = 10
-
-func BenchmarkHashStringSpeed(b *testing.B) {
-	strings := make([]string, size)
-	for i := 0; i < size; i++ {
-		strings[i] = fmt.Sprintf("string#%d", i)
-	}
-	sum := 0
-	m := make(map[string]int, size)
-	for i := 0; i < size; i++ {
-		m[strings[i]] = 0
-	}
-	idx := 0
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		sum += m[strings[idx]]
-		idx++
-		if idx == size {
-			idx = 0
-		}
-	}
-}
-
-type chunk [17]byte
-
-func BenchmarkHashBytesSpeed(b *testing.B) {
-	// a bunch of chunks, each with a different alignment mod 16
-	var chunks [size]chunk
-	// initialize each to a different value
-	for i := 0; i < size; i++ {
-		chunks[i][0] = byte(i)
-	}
-	// put into a map
-	m := make(map[chunk]int, size)
-	for i, c := range chunks {
-		m[c] = i
-	}
-	idx := 0
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		if m[chunks[idx]] != idx {
-			b.Error("bad map entry for chunk")
-		}
-		idx++
-		if idx == size {
-			idx = 0
-		}
-	}
-}
-
-func BenchmarkHashInt32Speed(b *testing.B) {
-	ints := make([]int32, size)
-	for i := 0; i < size; i++ {
-		ints[i] = int32(i)
-	}
-	sum := 0
-	m := make(map[int32]int, size)
-	for i := 0; i < size; i++ {
-		m[ints[i]] = 0
-	}
-	idx := 0
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		sum += m[ints[idx]]
-		idx++
-		if idx == size {
-			idx = 0
-		}
-	}
-}
-
-func BenchmarkHashInt64Speed(b *testing.B) {
-	ints := make([]int64, size)
-	for i := 0; i < size; i++ {
-		ints[i] = int64(i)
-	}
-	sum := 0
-	m := make(map[int64]int, size)
-	for i := 0; i < size; i++ {
-		m[ints[i]] = 0
-	}
-	idx := 0
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		sum += m[ints[idx]]
-		idx++
-		if idx == size {
-			idx = 0
-		}
-	}
-}
-func BenchmarkHashStringArraySpeed(b *testing.B) {
-	stringpairs := make([][2]string, size)
-	for i := 0; i < size; i++ {
-		for j := 0; j < 2; j++ {
-			stringpairs[i][j] = fmt.Sprintf("string#%d/%d", i, j)
-		}
-	}
-	sum := 0
-	m := make(map[[2]string]int, size)
-	for i := 0; i < size; i++ {
-		m[stringpairs[i]] = 0
-	}
-	idx := 0
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		sum += m[stringpairs[idx]]
-		idx++
-		if idx == size {
-			idx = 0
-		}
-	}
-}
-
-func BenchmarkMegMap(b *testing.B) {
-	m := make(map[string]bool)
-	for suffix := 'A'; suffix <= 'G'; suffix++ {
-		m[strings.Repeat("X", 1<<20-1)+fmt.Sprint(suffix)] = true
-	}
-	key := strings.Repeat("X", 1<<20-1) + "k"
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		_, _ = m[key]
-	}
-}
-
-func BenchmarkMegOneMap(b *testing.B) {
-	m := make(map[string]bool)
-	m[strings.Repeat("X", 1<<20)] = true
-	key := strings.Repeat("Y", 1<<20)
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		_, _ = m[key]
-	}
-}
-
-func BenchmarkMegEqMap(b *testing.B) {
-	m := make(map[string]bool)
-	key1 := strings.Repeat("X", 1<<20)
-	key2 := strings.Repeat("X", 1<<20) // equal but different instance
-	m[key1] = true
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		_, _ = m[key2]
-	}
-}
-
-func BenchmarkMegEmptyMap(b *testing.B) {
-	m := make(map[string]bool)
-	key := strings.Repeat("X", 1<<20)
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		_, _ = m[key]
-	}
-}
-
-func BenchmarkSmallStrMap(b *testing.B) {
-	m := make(map[string]bool)
-	for suffix := 'A'; suffix <= 'G'; suffix++ {
-		m[fmt.Sprint(suffix)] = true
-	}
-	key := "k"
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		_, _ = m[key]
-	}
-}
-
-func BenchmarkMapStringKeysEight_16(b *testing.B) { benchmarkMapStringKeysEight(b, 16) }
-func BenchmarkMapStringKeysEight_32(b *testing.B) { benchmarkMapStringKeysEight(b, 32) }
-func BenchmarkMapStringKeysEight_64(b *testing.B) { benchmarkMapStringKeysEight(b, 64) }
-func BenchmarkMapStringKeysEight_1M(b *testing.B) { benchmarkMapStringKeysEight(b, 1<<20) }
-
-func benchmarkMapStringKeysEight(b *testing.B, keySize int) {
-	m := make(map[string]bool)
-	for i := 0; i < 8; i++ {
-		m[strings.Repeat("K", i+1)] = true
-	}
-	key := strings.Repeat("K", keySize)
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		_ = m[key]
-	}
-}
-
-func BenchmarkIntMap(b *testing.B) {
-	m := make(map[int]bool)
-	for i := 0; i < 8; i++ {
-		m[i] = true
-	}
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		_, _ = m[7]
-	}
-}
-
-// Accessing the same keys in a row.
-func benchmarkRepeatedLookup(b *testing.B, lookupKeySize int) {
-	m := make(map[string]bool)
-	// At least bigger than a single bucket:
-	for i := 0; i < 64; i++ {
-		m[fmt.Sprintf("some key %d", i)] = true
-	}
-	base := strings.Repeat("x", lookupKeySize-1)
-	key1 := base + "1"
-	key2 := base + "2"
-	b.ResetTimer()
-	for i := 0; i < b.N/4; i++ {
-		_ = m[key1]
-		_ = m[key1]
-		_ = m[key2]
-		_ = m[key2]
-	}
-}
-
-func BenchmarkRepeatedLookupStrMapKey32(b *testing.B) { benchmarkRepeatedLookup(b, 32) }
-func BenchmarkRepeatedLookupStrMapKey1M(b *testing.B) { benchmarkRepeatedLookup(b, 1<<20) }
-
-func BenchmarkNewEmptyMap(b *testing.B) {
-	b.ReportAllocs()
-	for i := 0; i < b.N; i++ {
-		_ = make(map[int]int)
-	}
-}
-
-func BenchmarkNewSmallMap(b *testing.B) {
-	b.ReportAllocs()
-	for i := 0; i < b.N; i++ {
-		m := make(map[int]int)
-		m[0] = 0
-		m[1] = 1
-	}
-}
-
-func BenchmarkMapIter(b *testing.B) {
-	m := make(map[int]bool)
-	for i := 0; i < 8; i++ {
-		m[i] = true
-	}
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		for range m {
-		}
-	}
-}
-
-func BenchmarkMapIterEmpty(b *testing.B) {
-	m := make(map[int]bool)
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		for range m {
-		}
-	}
-}
-
-func BenchmarkSameLengthMap(b *testing.B) {
-	// long strings, same length, differ in first few
-	// and last few bytes.
-	m := make(map[string]bool)
-	s1 := "foo" + strings.Repeat("-", 100) + "bar"
-	s2 := "goo" + strings.Repeat("-", 100) + "ber"
-	m[s1] = true
-	m[s2] = true
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		_ = m[s1]
-	}
-}
-
-type BigKey [3]int64
-
-func BenchmarkBigKeyMap(b *testing.B) {
-	m := make(map[BigKey]bool)
-	k := BigKey{3, 4, 5}
-	m[k] = true
-	for i := 0; i < b.N; i++ {
-		_ = m[k]
-	}
-}
-
-type BigVal [3]int64
-
-func BenchmarkBigValMap(b *testing.B) {
-	m := make(map[BigKey]BigVal)
-	k := BigKey{3, 4, 5}
-	m[k] = BigVal{6, 7, 8}
-	for i := 0; i < b.N; i++ {
-		_ = m[k]
-	}
-}
-
-func BenchmarkSmallKeyMap(b *testing.B) {
-	m := make(map[int16]bool)
-	m[5] = true
-	for i := 0; i < b.N; i++ {
-		_ = m[5]
-	}
-}
-
-func BenchmarkMapPopulate(b *testing.B) {
-	for size := 1; size < 1000000; size *= 10 {
-		b.Run(strconv.Itoa(size), func(b *testing.B) {
-			b.ReportAllocs()
-			for i := 0; i < b.N; i++ {
-				m := make(map[int]bool)
-				for j := 0; j < size; j++ {
-					m[j] = true
-				}
-			}
-		})
-	}
-}
-
-type ComplexAlgKey struct {
-	a, b, c int64
-	_       int
-	d       int32
-	_       int
-	e       string
-	_       int
-	f, g, h int64
-}
-
-func BenchmarkComplexAlgMap(b *testing.B) {
-	m := make(map[ComplexAlgKey]bool)
-	var k ComplexAlgKey
-	m[k] = true
-	for i := 0; i < b.N; i++ {
-		_ = m[k]
-	}
-}
diff --git a/libgo/go/runtime/mbarrier.go b/libgo/go/runtime/mbarrier.go
index 3b8f714..24e5865 100644
--- a/libgo/go/runtime/mbarrier.go
+++ b/libgo/go/runtime/mbarrier.go
@@ -6,10 +6,10 @@
 //
 // For the concurrent garbage collector, the Go compiler implements
 // updates to pointer-valued fields that may be in heap objects by
-// emitting calls to write barriers. This file contains the actual write barrier
-// implementation, gcmarkwb_m, and the various wrappers called by the
-// compiler to implement pointer assignment, slice assignment,
-// typed memmove, and so on.
+// emitting calls to write barriers. The main write barrier for
+// individual pointer writes is gcWriteBarrier and is implemented in
+// assembly. This file contains write barrier entry points for bulk
+// operations. See also mwbbuf.go.
 
 package runtime
 
@@ -21,14 +21,10 @@ import (
 // For gccgo, use go:linkname to rename compiler-called functions to
 // themselves, so that the compiler will export them.
 //
-//go:linkname writebarrierptr runtime.writebarrierptr
 //go:linkname typedmemmove runtime.typedmemmove
 //go:linkname typedslicecopy runtime.typedslicecopy
 
-// gcmarkwb_m is the mark-phase write barrier, the only barrier we have.
-// The rest of this file exists only to make calls to this function.
-//
-// This is a hybrid barrier that combines a Yuasa-style deletion
+// Go uses a hybrid barrier that combines a Yuasa-style deletion
 // barrier—which shades the object whose reference is being
 // overwritten—with Dijkstra insertion barrier—which shades the object
 // whose reference is being written. The insertion part of the barrier
@@ -144,105 +140,17 @@ import (
 // reachable by some goroutine that currently cannot reach it.
 //
 //
-//go:nowritebarrierrec
-//go:systemstack
-func gcmarkwb_m(slot *uintptr, ptr uintptr) {
-	if writeBarrier.needed {
-		// Note: This turns bad pointer writes into bad
-		// pointer reads, which could be confusing. We avoid
-		// reading from obviously bad pointers, which should
-		// take care of the vast majority of these. We could
-		// patch this up in the signal handler, or use XCHG to
-		// combine the read and the write. Checking inheap is
-		// insufficient since we need to track changes to
-		// roots outside the heap.
-		//
-		// Note: profbuf.go omits a barrier during signal handler
-		// profile logging; that's safe only because this deletion barrier exists.
-		// If we remove the deletion barrier, we'll have to work out
-		// a new way to handle the profile logging.
-		if slot1 := uintptr(unsafe.Pointer(slot)); slot1 >= minPhysPageSize {
-			if optr := *slot; optr != 0 {
-				shade(optr)
-			}
-		}
-		// TODO: Make this conditional on the caller's stack color.
-		if ptr != 0 && inheap(ptr) {
-			shade(ptr)
-		}
-	}
-}
-
-// writebarrierptr_prewrite1 invokes a write barrier for *dst = src
-// prior to the write happening.
-//
-// Write barrier calls must not happen during critical GC and scheduler
-// related operations. In particular there are times when the GC assumes
-// that the world is stopped but scheduler related code is still being
-// executed, dealing with syscalls, dealing with putting gs on runnable
-// queues and so forth. This code cannot execute write barriers because
-// the GC might drop them on the floor. Stopping the world involves removing
-// the p associated with an m. We use the fact that m.p == nil to indicate
-// that we are in one these critical section and throw if the write is of
-// a pointer to a heap object.
-//go:nosplit
-func writebarrierptr_prewrite1(dst *uintptr, src uintptr) {
-	mp := acquirem()
-	if mp.inwb || mp.dying > 0 {
-		// We explicitly allow write barriers in startpanic_m,
-		// since we're going down anyway. Ignore them here.
-		releasem(mp)
-		return
-	}
-	systemstack(func() {
-		if mp.p == 0 && memstats.enablegc && !mp.inwb && inheap(src) {
-			throw("writebarrierptr_prewrite1 called with mp.p == nil")
-		}
-		mp.inwb = true
-		gcmarkwb_m(dst, src)
-	})
-	mp.inwb = false
-	releasem(mp)
-}
-
-// NOTE: Really dst *unsafe.Pointer, src unsafe.Pointer,
-// but if we do that, Go inserts a write barrier on *dst = src.
-//go:nosplit
-func writebarrierptr(dst *uintptr, src uintptr) {
-	if writeBarrier.cgo {
-		cgoCheckWriteBarrier(dst, src)
-	}
-	if !writeBarrier.needed {
-		*dst = src
-		return
-	}
-	if src != 0 && src < minPhysPageSize {
-		systemstack(func() {
-			print("runtime: writebarrierptr *", dst, " = ", hex(src), "\n")
-			throw("bad pointer in write barrier")
-		})
-	}
-	writebarrierptr_prewrite1(dst, src)
-	*dst = src
-}
-
-// writebarrierptr_prewrite is like writebarrierptr, but the store
-// will be performed by the caller after this call. The caller must
-// not allow preemption between this call and the write.
+// Signal handler pointer writes:
 //
-//go:nosplit
-func writebarrierptr_prewrite(dst *uintptr, src uintptr) {
-	if writeBarrier.cgo {
-		cgoCheckWriteBarrier(dst, src)
-	}
-	if !writeBarrier.needed {
-		return
-	}
-	if src != 0 && src < minPhysPageSize {
-		systemstack(func() { throw("bad pointer in write barrier") })
-	}
-	writebarrierptr_prewrite1(dst, src)
-}
+// In general, the signal handler cannot safely invoke the write
+// barrier because it may run without a P or even during the write
+// barrier.
+//
+// There is exactly one exception: profbuf.go omits a barrier during
+// signal handler profile logging. That's safe only because of the
+// deletion barrier. See profbuf.go for a detailed argument. If we
+// remove the deletion barrier, we'll have to work out a new way to
+// handle the profile logging.
 
 // typedmemmove copies a value of type t to dst from src.
 // Must be nosplit, see #16026.
@@ -252,6 +160,9 @@ func writebarrierptr_prewrite(dst *uintptr, src uintptr) {
 //
 //go:nosplit
 func typedmemmove(typ *_type, dst, src unsafe.Pointer) {
+	if dst == src {
+		return
+	}
 	if typ.kind&kindNoPointers == 0 {
 		bulkBarrierPreWrite(uintptr(dst), uintptr(src), typ.size)
 	}
@@ -335,6 +246,10 @@ func typedslicecopy(typ *_type, dst, src slice) int {
 		cgoCheckSliceCopy(typ, dst, src, n)
 	}
 
+	if dstp == srcp {
+		return n
+	}
+
 	// Note: No point in checking typ.kind&kindNoPointers here:
 	// compiler only emits calls to typedslicecopy for types with pointers,
 	// and growslice and reflect_typedslicecopy check for pointers
diff --git a/libgo/go/runtime/mbitmap.go b/libgo/go/runtime/mbitmap.go
index c6c8e6a..42c2015 100644
--- a/libgo/go/runtime/mbitmap.go
+++ b/libgo/go/runtime/mbitmap.go
@@ -13,12 +13,11 @@
 //
 // Heap bitmap
 //
-// The allocated heap comes from a subset of the memory in the range [start, used),
-// where start == mheap_.arena_start and used == mheap_.arena_used.
-// The heap bitmap comprises 2 bits for each pointer-sized word in that range,
-// stored in bytes indexed backward in memory from start.
-// That is, the byte at address start-1 holds the 2-bit entries for the four words
-// start through start+3*ptrSize, the byte at start-2 holds the entries for
+// The heap bitmap comprises 2 bits for each pointer-sized word in the heap,
+// stored in the heapArena metadata backing each heap arena.
+// That is, if ha is the heapArena for the arena starting a start,
+// then ha.bitmap[0] holds the 2-bit entries for the four words start
+// through start+3*ptrSize, ha.bitmap[1] holds the entries for
 // start+4*ptrSize through start+7*ptrSize, and so on.
 //
 // In each 2-bit entry, the lower bit holds the same information as in the 1-bit
@@ -85,8 +84,8 @@ const (
 	bitPointer = 1 << 0
 	bitScan    = 1 << 4
 
-	heapBitsShift   = 1                     // shift offset between successive bitPointer or bitScan entries
-	heapBitmapScale = sys.PtrSize * (8 / 2) // number of data bytes described by one heap bitmap byte
+	heapBitsShift      = 1     // shift offset between successive bitPointer or bitScan entries
+	wordsPerBitmapByte = 8 / 2 // heap words described by one bitmap byte
 
 	// all scan/pointer bits in a byte
 	bitScanAll    = bitScan | bitScan<<heapBitsShift | bitScan<<(2*heapBitsShift) | bitScan<<(3*heapBitsShift)
@@ -104,8 +103,6 @@ func addb(p *byte, n uintptr) *byte {
 }
 
 // subtractb returns the byte pointer p-n.
-// subtractb is typically used when traversing the pointer tables referred to by hbits
-// which are arranged in reverse order.
 //go:nowritebarrier
 //go:nosplit
 func subtractb(p *byte, n uintptr) *byte {
@@ -126,8 +123,6 @@ func add1(p *byte) *byte {
 }
 
 // subtract1 returns the byte pointer p-1.
-// subtract1 is typically used when traversing the pointer tables referred to by hbits
-// which are arranged in reverse order.
 //go:nowritebarrier
 //
 // nosplit because it is used during write barriers and must not be preempted.
@@ -139,28 +134,6 @@ func subtract1(p *byte) *byte {
 	return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - 1))
 }
 
-// mapBits maps any additional bitmap memory needed for the new arena memory.
-//
-// Don't call this directly. Call mheap.setArenaUsed.
-//
-//go:nowritebarrier
-func (h *mheap) mapBits(arena_used uintptr) {
-	// Caller has added extra mappings to the arena.
-	// Add extra mappings of bitmap words as needed.
-	// We allocate extra bitmap pieces in chunks of bitmapChunk.
-	const bitmapChunk = 8192
-
-	n := (arena_used - mheap_.arena_start) / heapBitmapScale
-	n = round(n, bitmapChunk)
-	n = round(n, physPageSize)
-	if h.bitmap_mapped >= n {
-		return
-	}
-
-	sysMap(unsafe.Pointer(h.bitmap-n), n-h.bitmap_mapped, h.arena_reserved, &memstats.gc_sys)
-	h.bitmap_mapped = n
-}
-
 // heapBits provides access to the bitmap bits for a single heap word.
 // The methods on heapBits take value receivers so that the compiler
 // can more easily inline calls to those methods and registerize the
@@ -168,8 +141,14 @@ func (h *mheap) mapBits(arena_used uintptr) {
 type heapBits struct {
 	bitp  *uint8
 	shift uint32
+	arena uint32 // Index of heap arena containing bitp
+	last  *uint8 // Last byte arena's bitmap
 }
 
+// Make the compiler check that heapBits.arena is large enough to hold
+// the maximum arena frame number.
+var _ = heapBits{arena: (1<<heapAddrBits)/heapArenaBytes - 1}
+
 // markBits provides access to the mark bit for an object in the heap.
 // bytep points to the byte holding the mark bit.
 // mask is a byte with a single bit set that can be &ed with *bytep
@@ -191,7 +170,7 @@ func (s *mspan) allocBitsForIndex(allocBitIndex uintptr) markBits {
 	return markBits{bytep, mask, allocBitIndex}
 }
 
-// refillaCache takes 8 bytes s.allocBits starting at whichByte
+// refillAllocCache takes 8 bytes s.allocBits starting at whichByte
 // and negates them so that ctz (count trailing zeros) instructions
 // can be used. It then places these 8 bytes into the cached 64 bit
 // s.allocCache.
@@ -278,7 +257,7 @@ func (s *mspan) objIndex(p uintptr) uintptr {
 		return 0
 	}
 	if s.baseMask != 0 {
-		// s.baseMask is 0, elemsize is a power of two, so shift by s.divShift
+		// s.baseMask is non-0, elemsize is a power of two, so shift by s.divShift
 		return byteOffset >> s.divShift
 	}
 	return uintptr(((uint64(byteOffset) >> s.divShift) * uint64(s.divMul)) >> s.divShift2)
@@ -329,9 +308,6 @@ func (m markBits) clearMarked() {
 
 // markBitsForSpan returns the markBits for the span base address base.
 func markBitsForSpan(base uintptr) (mbits markBits) {
-	if base < mheap_.arena_start || base >= mheap_.arena_used {
-		throw("markBitsForSpan: base out of range")
-	}
 	mbits = markBitsForAddr(base)
 	if mbits.mask != 1 {
 		throw("markBitsForSpan: unaligned start")
@@ -351,31 +327,36 @@ func (m *markBits) advance() {
 }
 
 // heapBitsForAddr returns the heapBits for the address addr.
-// The caller must have already checked that addr is in the range [mheap_.arena_start, mheap_.arena_used).
+// The caller must ensure addr is in an allocated span.
+// In particular, be careful not to point past the end of an object.
 //
 // nosplit because it is used during write barriers and must not be preempted.
 //go:nosplit
-func heapBitsForAddr(addr uintptr) heapBits {
-	// 2 bits per work, 4 pairs per byte, and a mask is hard coded.
-	off := (addr - mheap_.arena_start) / sys.PtrSize
-	return heapBits{(*uint8)(unsafe.Pointer(mheap_.bitmap - off/4 - 1)), uint32(off & 3)}
-}
-
-// heapBitsForSpan returns the heapBits for the span base address base.
-func heapBitsForSpan(base uintptr) (hbits heapBits) {
-	if base < mheap_.arena_start || base >= mheap_.arena_used {
-		print("runtime: base ", hex(base), " not in range [", hex(mheap_.arena_start), ",", hex(mheap_.arena_used), ")\n")
-		throw("heapBitsForSpan: base out of range")
+func heapBitsForAddr(addr uintptr) (h heapBits) {
+	// 2 bits per word, 4 pairs per byte, and a mask is hard coded.
+	arena := arenaIndex(addr)
+	ha := mheap_.arenas[arena.l1()][arena.l2()]
+	// The compiler uses a load for nil checking ha, but in this
+	// case we'll almost never hit that cache line again, so it
+	// makes more sense to do a value check.
+	if ha == nil {
+		// addr is not in the heap. Return nil heapBits, which
+		// we expect to crash in the caller.
+		return
 	}
-	return heapBitsForAddr(base)
+	h.bitp = &ha.bitmap[(addr/(sys.PtrSize*4))%heapArenaBitmapBytes]
+	h.shift = uint32((addr / sys.PtrSize) & 3)
+	h.arena = uint32(arena)
+	h.last = &ha.bitmap[len(ha.bitmap)-1]
+	return
 }
 
-// heapBitsForObject returns the base address for the heap object
-// containing the address p, the heapBits for base,
-// the object's span, and of the index of the object in s.
-// If p does not point into a heap object,
-// return base == 0
-// otherwise return the base of the object.
+// findObject returns the base address for the heap object containing
+// the address p, the object's span, and the index of the object in s.
+// If p does not point into a heap object, it returns base == 0.
+//
+// If p points is an invalid heap pointer and debug.invalidptr != 0,
+// findObject panics.
 //
 // For gccgo, the forStack parameter is true if the value came from the stack.
 // The stack is collected conservatively and may contain invalid pointers.
@@ -383,16 +364,9 @@ func heapBitsForSpan(base uintptr) (hbits heapBits) {
 // refBase and refOff optionally give the base address of the object
 // in which the pointer p was found and the byte offset at which it
 // was found. These are used for error reporting.
-func heapBitsForObject(p, refBase, refOff uintptr, forStack bool) (base uintptr, hbits heapBits, s *mspan, objIndex uintptr) {
-	arenaStart := mheap_.arena_start
-	if p < arenaStart || p >= mheap_.arena_used {
-		return
-	}
-	off := p - arenaStart
-	idx := off >> _PageShift
-	// p points into the heap, but possibly to the middle of an object.
-	// Consult the span table to find the block beginning.
-	s = mheap_.spans[idx]
+func findObject(p, refBase, refOff uintptr, forStack bool) (base uintptr, s *mspan, objIndex uintptr) {
+	s = spanOf(p)
+	// If p is a bad pointer, it may not be in s's bounds.
 	if s == nil || p < s.base() || p >= s.limit || s.state != mSpanInUse {
 		if s == nil || s.state == _MSpanManual || forStack {
 			// If s is nil, the virtual address has never been part of the heap.
@@ -419,7 +393,7 @@ func heapBitsForObject(p, refBase, refOff uintptr, forStack bool) (base uintptr,
 			} else {
 				print(" to unused region of span")
 			}
-			print(" idx=", hex(idx), " span.base()=", hex(s.base()), " span.limit=", hex(s.limit), " span.state=", s.state, "\n")
+			print(" span.base()=", hex(s.base()), " span.limit=", hex(s.limit), " span.state=", s.state, "\n")
 			if refBase != 0 {
 				print("runtime: found in object at *(", hex(refBase), "+", hex(refOff), ")\n")
 				gcDumpObject("object", refBase, refOff)
@@ -458,8 +432,6 @@ func heapBitsForObject(p, refBase, refOff uintptr, forStack bool) (base uintptr,
 			base += objIndex * s.elemsize
 		}
 	}
-	// Now that we know the actual base, compute heapBits to return to caller.
-	hbits = heapBitsForAddr(base)
 	return
 }
 
@@ -471,9 +443,42 @@ func heapBitsForObject(p, refBase, refOff uintptr, forStack bool) (base uintptr,
 //go:nosplit
 func (h heapBits) next() heapBits {
 	if h.shift < 3*heapBitsShift {
-		return heapBits{h.bitp, h.shift + heapBitsShift}
+		h.shift += heapBitsShift
+	} else if h.bitp != h.last {
+		h.bitp, h.shift = add1(h.bitp), 0
+	} else {
+		// Move to the next arena.
+		return h.nextArena()
 	}
-	return heapBits{subtract1(h.bitp), 0}
+	return h
+}
+
+// nextArena advances h to the beginning of the next heap arena.
+//
+// This is a slow-path helper to next. gc's inliner knows that
+// heapBits.next can be inlined even though it calls this. This is
+// marked noinline so it doesn't get inlined into next and cause next
+// to be too big to inline.
+//
+//go:nosplit
+//go:noinline
+func (h heapBits) nextArena() heapBits {
+	h.arena++
+	ai := arenaIdx(h.arena)
+	l2 := mheap_.arenas[ai.l1()]
+	if l2 == nil {
+		// We just passed the end of the object, which
+		// was also the end of the heap. Poison h. It
+		// should never be dereferenced at this point.
+		return heapBits{}
+	}
+	ha := l2[ai.l2()]
+	if ha == nil {
+		return heapBits{}
+	}
+	h.bitp, h.shift = &ha.bitmap[0], 0
+	h.last = &ha.bitmap[len(ha.bitmap)-1]
+	return h
 }
 
 // forward returns the heapBits describing n pointer-sized words ahead of h in memory.
@@ -481,9 +486,39 @@ func (h heapBits) next() heapBits {
 // h.forward(1) is equivalent to h.next(), just slower.
 // Note that forward does not modify h. The caller must record the result.
 // bits returns the heap bits for the current word.
+//go:nosplit
 func (h heapBits) forward(n uintptr) heapBits {
 	n += uintptr(h.shift) / heapBitsShift
-	return heapBits{subtractb(h.bitp, n/4), uint32(n%4) * heapBitsShift}
+	nbitp := uintptr(unsafe.Pointer(h.bitp)) + n/4
+	h.shift = uint32(n%4) * heapBitsShift
+	if nbitp <= uintptr(unsafe.Pointer(h.last)) {
+		h.bitp = (*uint8)(unsafe.Pointer(nbitp))
+		return h
+	}
+
+	// We're in a new heap arena.
+	past := nbitp - (uintptr(unsafe.Pointer(h.last)) + 1)
+	h.arena += 1 + uint32(past/heapArenaBitmapBytes)
+	ai := arenaIdx(h.arena)
+	if l2 := mheap_.arenas[ai.l1()]; l2 != nil && l2[ai.l2()] != nil {
+		a := l2[ai.l2()]
+		h.bitp = &a.bitmap[past%heapArenaBitmapBytes]
+		h.last = &a.bitmap[len(a.bitmap)-1]
+	} else {
+		h.bitp, h.last = nil, nil
+	}
+	return h
+}
+
+// forwardOrBoundary is like forward, but stops at boundaries between
+// contiguous sections of the bitmap. It returns the number of words
+// advanced over, which will be <= n.
+func (h heapBits) forwardOrBoundary(n uintptr) (heapBits, uintptr) {
+	maxn := 4 * ((uintptr(unsafe.Pointer(h.last)) + 1) - uintptr(unsafe.Pointer(h.bitp)))
+	if n > maxn {
+		n = maxn
+	}
+	return h.forward(n), n
 }
 
 // The caller can test morePointers and isPointer by &-ing with bitScan and bitPointer.
@@ -564,6 +599,8 @@ func (h heapBits) setCheckmarked(size uintptr) {
 // make sure the underlying allocation contains pointers, usually
 // by checking typ.kind&kindNoPointers.
 //
+// Callers must perform cgo checks if writeBarrier.cgo.
+//
 //go:nosplit
 func bulkBarrierPreWrite(dst, src, size uintptr) {
 	if (dst|src|size)&(sys.PtrSize-1) != 0 {
@@ -572,7 +609,7 @@ func bulkBarrierPreWrite(dst, src, size uintptr) {
 	if !writeBarrier.needed {
 		return
 	}
-	if !inheap(dst) {
+	if s := spanOf(dst); s == nil {
 		// If dst is a global, use the data or BSS bitmaps to
 		// execute write barriers.
 		lo := 0
@@ -594,6 +631,14 @@ func bulkBarrierPreWrite(dst, src, size uintptr) {
 			}
 		}
 		return
+	} else if s.state != _MSpanInUse || dst < s.base() || s.limit <= dst {
+		// dst was heap memory at some point, but isn't now.
+		// It can't be a global. It must be either our stack,
+		// or in the case of direct channel sends, it could be
+		// another stack. Either way, no need for barriers.
+		// This will also catch if dst is in a freed span,
+		// though that should never have.
+		return
 	}
 
 	buf := &getg().m.p.ptr().wbBuf
@@ -663,7 +708,7 @@ func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) {
 	}
 }
 
-// typeBitsBulkBarrier executes writebarrierptr_prewrite for every
+// typeBitsBulkBarrier executes a write barrier for every
 // pointer that would be copied from [src, src+size) to [dst,
 // dst+size) by a memmove using the type bitmap to locate those
 // pointer slots.
@@ -677,23 +722,26 @@ func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) {
 // Must not be preempted because it typically runs right before memmove,
 // and the GC must observe them as an atomic action.
 //
+// Callers must perform cgo checks if writeBarrier.cgo.
+//
 //go:nosplit
 func typeBitsBulkBarrier(typ *_type, dst, src, size uintptr) {
 	if typ == nil {
 		throw("runtime: typeBitsBulkBarrier without type")
 	}
 	if typ.size != size {
-		println("runtime: typeBitsBulkBarrier with type ", *typ.string, " of size ", typ.size, " but memory size", size)
+		println("runtime: typeBitsBulkBarrier with type ", typ.string(), " of size ", typ.size, " but memory size", size)
 		throw("runtime: invalid typeBitsBulkBarrier")
 	}
 	if typ.kind&kindGCProg != 0 {
-		println("runtime: typeBitsBulkBarrier with type ", *typ.string, " with GC prog")
+		println("runtime: typeBitsBulkBarrier with type ", typ.string(), " with GC prog")
 		throw("runtime: invalid typeBitsBulkBarrier")
 	}
 	if !writeBarrier.needed {
 		return
 	}
 	ptrmask := typ.gcdata
+	buf := &getg().m.p.ptr().wbBuf
 	var bits uint32
 	for i := uintptr(0); i < typ.ptrdata; i += sys.PtrSize {
 		if i&(sys.PtrSize*8-1) == 0 {
@@ -705,7 +753,9 @@ func typeBitsBulkBarrier(typ *_type, dst, src, size uintptr) {
 		if bits&1 != 0 {
 			dstx := (*uintptr)(unsafe.Pointer(dst + i))
 			srcx := (*uintptr)(unsafe.Pointer(src + i))
-			writebarrierptr_prewrite(dstx, *srcx)
+			if !buf.putFast(*dstx, *srcx) {
+				wbBufFlush(nil, 0)
+			}
 		}
 	}
 }
@@ -736,23 +786,28 @@ func (h heapBits) initSpan(s *mspan) {
 	s.allocBits = newAllocBits(s.nelems)
 
 	// Clear bits corresponding to objects.
-	if total%heapBitmapScale != 0 {
+	nw := total / sys.PtrSize
+	if nw%wordsPerBitmapByte != 0 {
 		throw("initSpan: unaligned length")
 	}
-	nbyte := total / heapBitmapScale
-	if sys.PtrSize == 8 && size == sys.PtrSize {
-		end := h.bitp
-		bitp := subtractb(end, nbyte-1)
-		for {
-			*bitp = bitPointerAll | bitScanAll
-			if bitp == end {
-				break
+	if h.shift != 0 {
+		throw("initSpan: unaligned base")
+	}
+	for nw > 0 {
+		hNext, anw := h.forwardOrBoundary(nw)
+		nbyte := anw / wordsPerBitmapByte
+		if sys.PtrSize == 8 && size == sys.PtrSize {
+			bitp := h.bitp
+			for i := uintptr(0); i < nbyte; i++ {
+				*bitp = bitPointerAll | bitScanAll
+				bitp = add1(bitp)
 			}
-			bitp = add1(bitp)
+		} else {
+			memclrNoHeapPointers(unsafe.Pointer(h.bitp), nbyte)
 		}
-		return
+		h = hNext
+		nw -= anw
 	}
-	memclrNoHeapPointers(unsafe.Pointer(subtractb(h.bitp, nbyte-1)), nbyte)
 }
 
 // initCheckmarkSpan initializes a span for being checkmarked.
@@ -764,10 +819,9 @@ func (h heapBits) initCheckmarkSpan(size, n, total uintptr) {
 		// Only possible on 64-bit system, since minimum size is 8.
 		// Must clear type bit (checkmark bit) of every word.
 		// The type bit is the lower of every two-bit pair.
-		bitp := h.bitp
-		for i := uintptr(0); i < n; i += 4 {
-			*bitp &^= bitPointerAll
-			bitp = subtract1(bitp)
+		for i := uintptr(0); i < n; i += wordsPerBitmapByte {
+			*h.bitp &^= bitPointerAll
+			h = h.forward(wordsPerBitmapByte)
 		}
 		return
 	}
@@ -788,10 +842,9 @@ func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) {
 		// Only possible on 64-bit system, since minimum size is 8.
 		// Must clear type bit (checkmark bit) of every word.
 		// The type bit is the lower of every two-bit pair.
-		bitp := h.bitp
-		for i := uintptr(0); i < n; i += 4 {
-			*bitp |= bitPointerAll
-			bitp = subtract1(bitp)
+		for i := uintptr(0); i < n; i += wordsPerBitmapByte {
+			*h.bitp |= bitPointerAll
+			h = h.forward(wordsPerBitmapByte)
 		}
 	}
 }
@@ -958,6 +1011,19 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
 	// This is a lot of lines of code, but it compiles into relatively few
 	// machine instructions.
 
+	outOfPlace := false
+	if arenaIndex(x+size-1) != arenaIdx(h.arena) || (doubleCheck && fastrand()%2 == 0) {
+		// This object spans heap arenas, so the bitmap may be
+		// discontiguous. Unroll it into the object instead
+		// and then copy it out.
+		//
+		// In doubleCheck mode, we randomly do this anyway to
+		// stress test the bitmap copying path.
+		outOfPlace = true
+		h.bitp = (*uint8)(unsafe.Pointer(x))
+		h.last = nil
+	}
+
 	var (
 		// Ptrmask input.
 		p     *byte   // last ptrmask byte read
@@ -996,9 +1062,8 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
 			}
 			ptrmask = debugPtrmask.data
 			runGCProg(addb(typ.gcdata, 4), nil, ptrmask, 1)
-			goto Phase4
 		}
-		return
+		goto Phase4
 	}
 
 	// Note about sizes:
@@ -1106,7 +1171,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
 	}
 	if nw == 0 {
 		// No pointers! Caller was supposed to check.
-		println("runtime: invalid type ", *typ.string)
+		println("runtime: invalid type ", typ.string())
 		throw("heapBitsSetType: called with non-pointer type")
 		return
 	}
@@ -1116,7 +1181,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
 		nw = 2
 	}
 
-	// Phase 1: Special case for leading byte (shift==0) or half-byte (shift==4).
+	// Phase 1: Special case for leading byte (shift==0) or half-byte (shift==2).
 	// The leading byte is special because it contains the bits for word 1,
 	// which does not have the scan bit set.
 	// The leading half-byte is special because it's a half a byte,
@@ -1146,7 +1211,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
 			goto Phase3
 		}
 		*hbitp = uint8(hb)
-		hbitp = subtract1(hbitp)
+		hbitp = add1(hbitp)
 		b >>= 4
 		nb -= 4
 
@@ -1167,7 +1232,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
 		// the checkmark.
 		*hbitp &^= uint8((bitPointer | bitScan | (bitPointer << heapBitsShift)) << (2 * heapBitsShift))
 		*hbitp |= uint8(hb)
-		hbitp = subtract1(hbitp)
+		hbitp = add1(hbitp)
 		if w += 2; w >= nw {
 			// We know that there is more data, because we handled 2-word objects above.
 			// This must be at least a 6-word object. If we're out of pointer words,
@@ -1197,7 +1262,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
 			break
 		}
 		*hbitp = uint8(hb)
-		hbitp = subtract1(hbitp)
+		hbitp = add1(hbitp)
 		b >>= 4
 
 		// Load more bits. b has nb right now.
@@ -1245,7 +1310,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
 			break
 		}
 		*hbitp = uint8(hb)
-		hbitp = subtract1(hbitp)
+		hbitp = add1(hbitp)
 		b >>= 4
 	}
 
@@ -1266,11 +1331,11 @@ Phase3:
 	// The first is hb, the rest are zero.
 	if w <= nw {
 		*hbitp = uint8(hb)
-		hbitp = subtract1(hbitp)
+		hbitp = add1(hbitp)
 		hb = 0 // for possible final half-byte below
 		for w += 4; w <= nw; w += 4 {
 			*hbitp = 0
-			hbitp = subtract1(hbitp)
+			hbitp = add1(hbitp)
 		}
 	}
 
@@ -1287,11 +1352,91 @@ Phase3:
 	}
 
 Phase4:
-	// Phase 4: all done, but perhaps double check.
+	// Phase 4: Copy unrolled bitmap to per-arena bitmaps, if necessary.
+	if outOfPlace {
+		// TODO: We could probably make this faster by
+		// handling [x+dataSize, x+size) specially.
+		h := heapBitsForAddr(x)
+		// cnw is the number of heap words, or bit pairs
+		// remaining (like nw above).
+		cnw := size / sys.PtrSize
+		src := (*uint8)(unsafe.Pointer(x))
+		// We know the first and last byte of the bitmap are
+		// not the same, but it's still possible for small
+		// objects span arenas, so it may share bitmap bytes
+		// with neighboring objects.
+		//
+		// Handle the first byte specially if it's shared. See
+		// Phase 1 for why this is the only special case we need.
+		if doubleCheck {
+			if !(h.shift == 0 || (sys.PtrSize == 8 && h.shift == 2)) {
+				print("x=", x, " size=", size, " cnw=", h.shift, "\n")
+				throw("bad start shift")
+			}
+		}
+		if sys.PtrSize == 8 && h.shift == 2 {
+			*h.bitp = *h.bitp&^((bitPointer|bitScan|(bitPointer|bitScan)<<heapBitsShift)<<(2*heapBitsShift)) | *src
+			h = h.next().next()
+			cnw -= 2
+			src = addb(src, 1)
+		}
+		// We're now byte aligned. Copy out to per-arena
+		// bitmaps until the last byte (which may again be
+		// partial).
+		for cnw >= 4 {
+			// This loop processes four words at a time,
+			// so round cnw down accordingly.
+			hNext, words := h.forwardOrBoundary(cnw / 4 * 4)
+
+			// n is the number of bitmap bytes to copy.
+			n := words / 4
+			memmove(unsafe.Pointer(h.bitp), unsafe.Pointer(src), n)
+			cnw -= words
+			h = hNext
+			src = addb(src, n)
+		}
+		if doubleCheck && h.shift != 0 {
+			print("cnw=", cnw, " h.shift=", h.shift, "\n")
+			throw("bad shift after block copy")
+		}
+		// Handle the last byte if it's shared.
+		if cnw == 2 {
+			*h.bitp = *h.bitp&^(bitPointer|bitScan|(bitPointer|bitScan)<<heapBitsShift) | *src
+			src = addb(src, 1)
+			h = h.next().next()
+		}
+		if doubleCheck {
+			if uintptr(unsafe.Pointer(src)) > x+size {
+				throw("copy exceeded object size")
+			}
+			if !(cnw == 0 || cnw == 2) {
+				print("x=", x, " size=", size, " cnw=", cnw, "\n")
+				throw("bad number of remaining words")
+			}
+			// Set up hbitp so doubleCheck code below can check it.
+			hbitp = h.bitp
+		}
+		// Zero the object where we wrote the bitmap.
+		memclrNoHeapPointers(unsafe.Pointer(x), uintptr(unsafe.Pointer(src))-x)
+	}
+
+	// Double check the whole bitmap.
 	if doubleCheck {
-		end := heapBitsForAddr(x + size)
+		// x+size may not point to the heap, so back up one
+		// word and then call next().
+		end := heapBitsForAddr(x + size - sys.PtrSize).next()
+		endAI := arenaIdx(end.arena)
+		if !outOfPlace && (end.bitp == nil || (end.shift == 0 && end.bitp == &mheap_.arenas[endAI.l1()][endAI.l2()].bitmap[0])) {
+			// The unrolling code above walks hbitp just
+			// past the bitmap without moving to the next
+			// arena. Synthesize this for end.bitp.
+			end.arena--
+			endAI = arenaIdx(end.arena)
+			end.bitp = addb(&mheap_.arenas[endAI.l1()][endAI.l2()].bitmap[0], heapArenaBitmapBytes)
+			end.last = nil
+		}
 		if typ.kind&kindGCProg == 0 && (hbitp != end.bitp || (w == nw+2) != (end.shift == 2)) {
-			println("ended at wrong bitmap byte for", *typ.string, "x", dataSize/typ.size)
+			println("ended at wrong bitmap byte for", typ.string(), "x", dataSize/typ.size)
 			print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n")
 			print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n")
 			h0 := heapBitsForAddr(x)
@@ -1327,15 +1472,15 @@ Phase4:
 				}
 			}
 			if have != want {
-				println("mismatch writing bits for", *typ.string, "x", dataSize/typ.size)
+				println("mismatch writing bits for", typ.string(), "x", dataSize/typ.size)
 				print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n")
-				print("kindGCProg=", typ.kind&kindGCProg != 0, "\n")
+				print("kindGCProg=", typ.kind&kindGCProg != 0, " outOfPlace=", outOfPlace, "\n")
 				print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n")
 				h0 := heapBitsForAddr(x)
 				print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n")
 				print("current bits h.bitp=", h.bitp, " h.shift=", h.shift, " *h.bitp=", hex(*h.bitp), "\n")
 				print("ptrmask=", ptrmask, " p=", p, " endp=", endp, " endnb=", endnb, " pbits=", hex(pbits), " b=", hex(b), " nb=", nb, "\n")
-				println("at word", i, "offset", i*sys.PtrSize, "have", have, "want", want)
+				println("at word", i, "offset", i*sys.PtrSize, "have", hex(have), "want", hex(want))
 				if typ.kind&kindGCProg != 0 {
 					println("GC program:")
 					dumpGCProg(addb(typ.gcdata, 4))
@@ -1436,9 +1581,9 @@ func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize u
 		// so that scanobject can stop early in the final element.
 		totalBits = (elemSize*(count-1) + progSize) / sys.PtrSize
 	}
-	endProg := unsafe.Pointer(subtractb(h.bitp, (totalBits+3)/4))
-	endAlloc := unsafe.Pointer(subtractb(h.bitp, allocSize/heapBitmapScale))
-	memclrNoHeapPointers(add(endAlloc, 1), uintptr(endProg)-uintptr(endAlloc))
+	endProg := unsafe.Pointer(addb(h.bitp, (totalBits+3)/4))
+	endAlloc := unsafe.Pointer(addb(h.bitp, allocSize/sys.PtrSize/wordsPerBitmapByte))
+	memclrNoHeapPointers(endProg, uintptr(endAlloc)-uintptr(endProg))
 }
 
 // progToPointerMask returns the 1-bit pointer mask output by the GC program prog.
@@ -1497,11 +1642,11 @@ Run:
 			} else {
 				v := bits&bitPointerAll | bitScanAll
 				*dst = uint8(v)
-				dst = subtract1(dst)
+				dst = add1(dst)
 				bits >>= 4
 				v = bits&bitPointerAll | bitScanAll
 				*dst = uint8(v)
-				dst = subtract1(dst)
+				dst = add1(dst)
 				bits >>= 4
 			}
 		}
@@ -1535,11 +1680,11 @@ Run:
 				} else {
 					v := bits&0xf | bitScanAll
 					*dst = uint8(v)
-					dst = subtract1(dst)
+					dst = add1(dst)
 					bits >>= 4
 					v = bits&0xf | bitScanAll
 					*dst = uint8(v)
-					dst = subtract1(dst)
+					dst = add1(dst)
 					bits >>= 4
 				}
 			}
@@ -1599,11 +1744,11 @@ Run:
 					npattern += 8
 				}
 			} else {
-				src = add1(src)
+				src = subtract1(src)
 				for npattern < n {
 					pattern <<= 4
 					pattern |= uintptr(*src) & 0xf
-					src = add1(src)
+					src = subtract1(src)
 					npattern += 4
 				}
 			}
@@ -1665,7 +1810,7 @@ Run:
 				} else {
 					for nbits >= 4 {
 						*dst = uint8(bits&0xf | bitScanAll)
-						dst = subtract1(dst)
+						dst = add1(dst)
 						bits >>= 4
 						nbits -= 4
 					}
@@ -1710,10 +1855,10 @@ Run:
 			}
 		} else {
 			// Leading src fragment.
-			src = addb(src, (off+3)/4)
+			src = subtractb(src, (off+3)/4)
 			if frag := off & 3; frag != 0 {
 				bits |= (uintptr(*src) & 0xf) >> (4 - frag) << nbits
-				src = subtract1(src)
+				src = add1(src)
 				nbits += frag
 				c -= frag
 			}
@@ -1721,9 +1866,9 @@ Run:
 			// The bits are rotating through the bit buffer.
 			for i := c / 4; i > 0; i-- {
 				bits |= (uintptr(*src) & 0xf) << nbits
-				src = subtract1(src)
+				src = add1(src)
 				*dst = uint8(bits&0xf | bitScanAll)
-				dst = subtract1(dst)
+				dst = add1(dst)
 				bits >>= 4
 			}
 			// Final src fragment.
@@ -1745,12 +1890,12 @@ Run:
 			bits >>= 8
 		}
 	} else {
-		totalBits = (uintptr(unsafe.Pointer(dstStart))-uintptr(unsafe.Pointer(dst)))*4 + nbits
+		totalBits = (uintptr(unsafe.Pointer(dst))-uintptr(unsafe.Pointer(dstStart)))*4 + nbits
 		nbits += -nbits & 3
 		for ; nbits > 0; nbits -= 4 {
 			v := bits&0xf | bitScanAll
 			*dst = uint8(v)
-			dst = subtract1(dst)
+			dst = add1(dst)
 			bits >>= 4
 		}
 	}
@@ -1839,12 +1984,11 @@ func getgcmask(ep interface{}) (mask []byte) {
 	}
 
 	// heap
-	var n uintptr
-	var base uintptr
-	if mlookup(uintptr(p), &base, &n, nil) != 0 {
+	if base, s, _ := findObject(uintptr(p), 0, 0, false); base != 0 {
+		hbits := heapBitsForAddr(base)
+		n := s.elemsize
 		mask = make([]byte, n/sys.PtrSize)
 		for i := uintptr(0); i < n; i += sys.PtrSize {
-			hbits := heapBitsForAddr(base + i)
 			if hbits.isPointer() {
 				mask[i/sys.PtrSize] = 1
 			}
@@ -1852,6 +1996,7 @@ func getgcmask(ep interface{}) (mask []byte) {
 				mask = mask[:i/sys.PtrSize]
 				break
 			}
+			hbits = hbits.next()
 		}
 		return
 	}
diff --git a/libgo/go/runtime/mcache.go b/libgo/go/runtime/mcache.go
index 766cfd1..3dacf96 100644
--- a/libgo/go/runtime/mcache.go
+++ b/libgo/go/runtime/mcache.go
@@ -37,7 +37,6 @@ type mcache struct {
 	alloc [numSpanClasses]*mspan // spans to allocate from, indexed by spanClass
 
 	// Local allocator stats, flushed during GC.
-	local_nlookup    uintptr                  // number of pointer lookups
 	local_largefree  uintptr                  // bytes freed for large objects (>maxsmallsize)
 	local_nlargefree uintptr                  // number of frees for large objects (>maxsmallsize)
 	local_nsmallfree [_NumSizeClasses]uintptr // number of frees for small objects (<=maxsmallsize)
diff --git a/libgo/go/runtime/mcentral.go b/libgo/go/runtime/mcentral.go
index 150f4fd..50a4791 100644
--- a/libgo/go/runtime/mcentral.go
+++ b/libgo/go/runtime/mcentral.go
@@ -246,6 +246,6 @@ func (c *mcentral) grow() *mspan {
 	p := s.base()
 	s.limit = p + size*n
 
-	heapBitsForSpan(s.base()).initSpan(s)
+	heapBitsForAddr(s.base()).initSpan(s)
 	return s
 }
diff --git a/libgo/go/runtime/mem_gccgo.go b/libgo/go/runtime/mem_gccgo.go
index a087945..44f4648 100644
--- a/libgo/go/runtime/mem_gccgo.go
+++ b/libgo/go/runtime/mem_gccgo.go
@@ -21,9 +21,6 @@ func sysMmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uintptr)
 //extern munmap
 func munmap(addr unsafe.Pointer, length uintptr) int32
 
-//extern mincore
-func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32
-
 //extern madvise
 func madvise(addr unsafe.Pointer, n uintptr, flags int32) int32
 
@@ -49,54 +46,6 @@ func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uintptr) (u
 	return p, 0
 }
 
-// NOTE: vec must be just 1 byte long here.
-// Mincore returns ENOMEM if any of the pages are unmapped,
-// but we want to know that all of the pages are unmapped.
-// To make these the same, we can only ask about one page
-// at a time. See golang.org/issue/7476.
-var addrspace_vec [1]byte
-
-func addrspace_free(v unsafe.Pointer, n uintptr) bool {
-	for off := uintptr(0); off < n; off += physPageSize {
-		// Use a length of 1 byte, which the kernel will round
-		// up to one physical page regardless of the true
-		// physical page size.
-		errval := 0
-		if mincore(unsafe.Pointer(uintptr(v)+off), 1, &addrspace_vec[0]) < 0 {
-			errval = errno()
-		}
-		if errval == _ENOSYS {
-			// mincore is not available on this system.
-			// Assume the address is available.
-			return true
-		}
-		if errval == _EINVAL {
-			// Address is not a multiple of the physical
-			// page size. Shouldn't happen, but just ignore it.
-			continue
-		}
-		// ENOMEM means unmapped, which is what we want.
-		// Anything else we assume means the pages are mapped.
-		if errval != _ENOMEM {
-			return false
-		}
-	}
-	return true
-}
-
-func mmap_fixed(v unsafe.Pointer, n uintptr, prot, flags, fd int32, offset uintptr) (unsafe.Pointer, int) {
-	p, err := mmap(v, n, prot, flags, fd, offset)
-	// On some systems, mmap ignores v without
-	// MAP_FIXED, so retry if the address space is free.
-	if p != v && addrspace_free(v, n) {
-		if err == 0 {
-			munmap(p, n)
-		}
-		p, err = mmap(v, n, prot, flags|_MAP_FIXED, fd, offset)
-	}
-	return p, err
-}
-
 // Don't split the stack as this method may be invoked without a valid G, which
 // prevents us from allocating more stack.
 //go:nosplit
@@ -227,62 +176,17 @@ func sysFault(v unsafe.Pointer, n uintptr) {
 	mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE|_MAP_FIXED, mmapFD, 0)
 }
 
-func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer {
-	// On 64-bit, people with ulimit -v set complain if we reserve too
-	// much address space. Instead, assume that the reservation is okay
-	// if we can reserve at least 64K and check the assumption in SysMap.
-	// Only user-mode Linux (UML) rejects these requests.
-	if sys.PtrSize == 8 && uint64(n) > 1<<32 {
-		p, err := mmap_fixed(v, 64<<10, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, mmapFD, 0)
-		if p != v || err != 0 {
-			if err == 0 {
-				munmap(p, 64<<10)
-			}
-			return nil
-		}
-		munmap(p, 64<<10)
-		*reserved = false
-		return v
-	}
-
+func sysReserve(v unsafe.Pointer, n uintptr) unsafe.Pointer {
 	p, err := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, mmapFD, 0)
 	if err != 0 {
 		return nil
 	}
-	*reserved = true
 	return p
 }
 
-func sysMap(v unsafe.Pointer, n uintptr, reserved bool, sysStat *uint64) {
+func sysMap(v unsafe.Pointer, n uintptr, sysStat *uint64) {
 	mSysStatInc(sysStat, n)
 
-	// On 64-bit, we don't actually have v reserved, so tread carefully.
-	if !reserved {
-		flags := int32(_MAP_ANON | _MAP_PRIVATE)
-		if GOOS == "dragonfly" {
-			// TODO(jsing): For some reason DragonFly seems to return
-			// memory at a different address than we requested, even when
-			// there should be no reason for it to do so. This can be
-			// avoided by using MAP_FIXED, but I'm not sure we should need
-			// to do this - we do not on other platforms.
-			flags |= _MAP_FIXED
-		}
-		p, err := mmap_fixed(v, n, _PROT_READ|_PROT_WRITE, flags, mmapFD, 0)
-		if err == _ENOMEM {
-			throw("runtime: out of memory")
-		}
-		if p != v || err != 0 {
-			print("runtime: address space conflict: map(", v, ") = ", p, " (err ", err, ")\n")
-			throw("runtime: address space conflict")
-		}
-		return
-	}
-
-	if GOOS == "aix" {
-		// AIX does not allow mapping a range that is already mapped.
-		// So always unmap first even if it is already unmapped.
-		munmap(v, n)
-	}
 	p, err := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, mmapFD, 0)
 	if err == _ENOMEM {
 		throw("runtime: out of memory")
diff --git a/libgo/go/runtime/memmove_test.go b/libgo/go/runtime/memmove_test.go
index 62de604..b490cd8 100644
--- a/libgo/go/runtime/memmove_test.go
+++ b/libgo/go/runtime/memmove_test.go
@@ -450,6 +450,13 @@ func BenchmarkCopyFat512(b *testing.B) {
 		_ = y
 	}
 }
+func BenchmarkCopyFat520(b *testing.B) {
+	var x [520 / 4]uint32
+	for i := 0; i < b.N; i++ {
+		y := x
+		_ = y
+	}
+}
 func BenchmarkCopyFat1024(b *testing.B) {
 	var x [1024 / 4]uint32
 	for i := 0; i < b.N; i++ {
diff --git a/libgo/go/runtime/mfinal.go b/libgo/go/runtime/mfinal.go
index 19573d8..1a7792c 100644
--- a/libgo/go/runtime/mfinal.go
+++ b/libgo/go/runtime/mfinal.go
@@ -142,7 +142,7 @@ func runfinq() {
 		if fb == nil {
 			fing = gp
 			fingwait = true
-			goparkunlock(&finlock, "finalizer wait", traceEvGoBlock, 1)
+			goparkunlock(&finlock, waitReasonFinalizerWait, traceEvGoBlock, 1)
 			continue
 		}
 		unlock(&finlock)
@@ -233,8 +233,8 @@ func runfinq() {
 // is not guaranteed to run, because there is no ordering that
 // respects the dependencies.
 //
-// The finalizer for obj is scheduled to run at some arbitrary time after
-// obj becomes unreachable.
+// The finalizer is scheduled to run at some arbitrary time after the
+// program can no longer reach the object to which obj points.
 // There is no guarantee that finalizers will run before a program exits,
 // so typically they are useful only for releasing non-memory resources
 // associated with an object during a long-running program.
@@ -284,7 +284,7 @@ func SetFinalizer(obj interface{}, finalizer interface{}) {
 		throw("runtime.SetFinalizer: first argument is nil")
 	}
 	if etyp.kind&kindMask != kindPtr {
-		throw("runtime.SetFinalizer: first argument is " + *etyp.string + ", not pointer")
+		throw("runtime.SetFinalizer: first argument is " + etyp.string() + ", not pointer")
 	}
 	ot := (*ptrtype)(unsafe.Pointer(etyp))
 	if ot.elem == nil {
@@ -292,9 +292,9 @@ func SetFinalizer(obj interface{}, finalizer interface{}) {
 	}
 
 	// find the containing object
-	_, base, _ := findObject(e.data)
+	base, _, _ := findObject(uintptr(e.data), 0, 0, false)
 
-	if base == nil {
+	if base == 0 {
 		// 0-length objects are okay.
 		if e.data == unsafe.Pointer(&zerobase) {
 			return
@@ -314,7 +314,7 @@ func SetFinalizer(obj interface{}, finalizer interface{}) {
 		return
 	}
 
-	if e.data != base {
+	if uintptr(e.data) != base {
 		// As an implementation detail we allow to set finalizers for an inner byte
 		// of an object if it could come from tiny alloc (see mallocgc for details).
 		if ot.elem == nil || ot.elem.kind&kindNoPointers == 0 || ot.elem.size >= maxTinySize {
@@ -333,14 +333,14 @@ func SetFinalizer(obj interface{}, finalizer interface{}) {
 	}
 
 	if ftyp.kind&kindMask != kindFunc {
-		throw("runtime.SetFinalizer: second argument is " + *ftyp.string + ", not a function")
+		throw("runtime.SetFinalizer: second argument is " + ftyp.string() + ", not a function")
 	}
 	ft := (*functype)(unsafe.Pointer(ftyp))
 	if ft.dotdotdot {
-		throw("runtime.SetFinalizer: cannot pass " + *etyp.string + " to finalizer " + *ftyp.string + " because dotdotdot")
+		throw("runtime.SetFinalizer: cannot pass " + etyp.string() + " to finalizer " + ftyp.string() + " because dotdotdot")
 	}
 	if len(ft.in) != 1 {
-		throw("runtime.SetFinalizer: cannot pass " + *etyp.string + " to finalizer " + *ftyp.string)
+		throw("runtime.SetFinalizer: cannot pass " + etyp.string() + " to finalizer " + ftyp.string())
 	}
 	fint := ft.in[0]
 	switch {
@@ -363,7 +363,7 @@ func SetFinalizer(obj interface{}, finalizer interface{}) {
 			goto okarg
 		}
 	}
-	throw("runtime.SetFinalizer: cannot pass " + *etyp.string + " to finalizer " + *ftyp.string)
+	throw("runtime.SetFinalizer: cannot pass " + etyp.string() + " to finalizer " + ftyp.string())
 okarg:
 	// make sure we have a finalizer goroutine
 	createfing()
@@ -379,46 +379,6 @@ okarg:
 	})
 }
 
-// Look up pointer v in heap. Return the span containing the object,
-// the start of the object, and the size of the object. If the object
-// does not exist, return nil, nil, 0.
-func findObject(v unsafe.Pointer) (s *mspan, x unsafe.Pointer, n uintptr) {
-	c := gomcache()
-	c.local_nlookup++
-	if sys.PtrSize == 4 && c.local_nlookup >= 1<<30 {
-		// purge cache stats to prevent overflow
-		lock(&mheap_.lock)
-		purgecachedstats(c)
-		unlock(&mheap_.lock)
-	}
-
-	// find span
-	arena_start := mheap_.arena_start
-	arena_used := mheap_.arena_used
-	if uintptr(v) < arena_start || uintptr(v) >= arena_used {
-		return
-	}
-	p := uintptr(v) >> pageShift
-	q := p - arena_start>>pageShift
-	s = mheap_.spans[q]
-	if s == nil {
-		return
-	}
-	x = unsafe.Pointer(s.base())
-
-	if uintptr(v) < uintptr(x) || uintptr(v) >= uintptr(unsafe.Pointer(s.limit)) || s.state != mSpanInUse {
-		s = nil
-		x = nil
-		return
-	}
-
-	n = s.elemsize
-	if s.spanclass.sizeclass() != 0 {
-		x = add(x, (uintptr(v)-uintptr(x))/n*n)
-	}
-	return
-}
-
 // Mark KeepAlive as noinline so that it is easily detectable as an intrinsic.
 //go:noinline
 
diff --git a/libgo/go/runtime/mfixalloc.go b/libgo/go/runtime/mfixalloc.go
index 7496671..1febe78 100644
--- a/libgo/go/runtime/mfixalloc.go
+++ b/libgo/go/runtime/mfixalloc.go
@@ -11,7 +11,7 @@ package runtime
 import "unsafe"
 
 // FixAlloc is a simple free-list allocator for fixed size objects.
-// Malloc uses a FixAlloc wrapped around sysAlloc to manages its
+// Malloc uses a FixAlloc wrapped around sysAlloc to manage its
 // MCache and MSpan objects.
 //
 // Memory returned by fixalloc.alloc is zeroed by default, but the
diff --git a/libgo/go/runtime/mgc.go b/libgo/go/runtime/mgc.go
index 626f088..4ef982d 100644
--- a/libgo/go/runtime/mgc.go
+++ b/libgo/go/runtime/mgc.go
@@ -232,21 +232,10 @@ func setGCPercent(in int32) (out int32) {
 	gcSetTriggerRatio(memstats.triggerRatio)
 	unlock(&mheap_.lock)
 
-	// If we just disabled GC, wait for any concurrent GC to
+	// If we just disabled GC, wait for any concurrent GC mark to
 	// finish so we always return with no GC running.
 	if in < 0 {
-		// Disable phase transitions.
-		lock(&work.sweepWaiters.lock)
-		if gcphase == _GCmark {
-			// GC is active. Wait until we reach sweeping.
-			gp := getg()
-			gp.schedlink = work.sweepWaiters.head
-			work.sweepWaiters.head.set(gp)
-			goparkunlock(&work.sweepWaiters.lock, "wait for GC cycle", traceEvGoBlock, 1)
-		} else {
-			// GC isn't active.
-			unlock(&work.sweepWaiters.lock)
-		}
+		gcWaitOnMark(atomic.Load(&work.cycles))
 	}
 
 	return out
@@ -1091,21 +1080,10 @@ func GC() {
 	// GC may move ahead on its own. For example, when we block
 	// until mark termination N, we may wake up in cycle N+2.
 
-	gp := getg()
-
-	// Prevent the GC phase or cycle count from changing.
-	lock(&work.sweepWaiters.lock)
+	// Wait until the current sweep termination, mark, and mark
+	// termination complete.
 	n := atomic.Load(&work.cycles)
-	if gcphase == _GCmark {
-		// Wait until sweep termination, mark, and mark
-		// termination of cycle N complete.
-		gp.schedlink = work.sweepWaiters.head
-		work.sweepWaiters.head.set(gp)
-		goparkunlock(&work.sweepWaiters.lock, "wait for GC cycle", traceEvGoBlock, 1)
-	} else {
-		// We're in sweep N already.
-		unlock(&work.sweepWaiters.lock)
-	}
+	gcWaitOnMark(n)
 
 	// We're now in sweep N or later. Trigger GC cycle N+1, which
 	// will first finish sweep N if necessary and then enter sweep
@@ -1113,14 +1091,7 @@ func GC() {
 	gcStart(gcBackgroundMode, gcTrigger{kind: gcTriggerCycle, n: n + 1})
 
 	// Wait for mark termination N+1 to complete.
-	lock(&work.sweepWaiters.lock)
-	if gcphase == _GCmark && atomic.Load(&work.cycles) == n+1 {
-		gp.schedlink = work.sweepWaiters.head
-		work.sweepWaiters.head.set(gp)
-		goparkunlock(&work.sweepWaiters.lock, "wait for GC cycle", traceEvGoBlock, 1)
-	} else {
-		unlock(&work.sweepWaiters.lock)
-	}
+	gcWaitOnMark(n + 1)
 
 	// Finish sweep N+1 before returning. We do this both to
 	// complete the cycle and because runtime.GC() is often used
@@ -1157,6 +1128,32 @@ func GC() {
 	releasem(mp)
 }
 
+// gcWaitOnMark blocks until GC finishes the Nth mark phase. If GC has
+// already completed this mark phase, it returns immediately.
+func gcWaitOnMark(n uint32) {
+	for {
+		// Disable phase transitions.
+		lock(&work.sweepWaiters.lock)
+		nMarks := atomic.Load(&work.cycles)
+		if gcphase != _GCmark {
+			// We've already completed this cycle's mark.
+			nMarks++
+		}
+		if nMarks > n {
+			// We're done.
+			unlock(&work.sweepWaiters.lock)
+			return
+		}
+
+		// Wait until sweep termination, mark, and mark
+		// termination of cycle N complete.
+		gp := getg()
+		gp.schedlink = work.sweepWaiters.head
+		work.sweepWaiters.head.set(gp)
+		goparkunlock(&work.sweepWaiters.lock, waitReasonWaitForGCCycle, traceEvGoBlock, 1)
+	}
+}
+
 // gcMode indicates how concurrent a GC cycle should be.
 type gcMode int
 
@@ -1531,7 +1528,7 @@ func gcMarkTermination(nextTriggerRatio float64) {
 	_g_.m.traceback = 2
 	gp := _g_.m.curg
 	casgstatus(gp, _Grunning, _Gwaiting)
-	gp.waitreason = "garbage collection"
+	gp.waitreason = waitReasonGarbageCollection
 
 	// Run gc on the g0 stack. We do this so that the g stack
 	// we're currently running on will no longer change. Cuts
@@ -1800,7 +1797,7 @@ func gcBgMarkWorker(_p_ *p) {
 				}
 			}
 			return true
-		}, unsafe.Pointer(park), "GC worker (idle)", traceEvGoBlock, 0)
+		}, unsafe.Pointer(park), waitReasonGCWorkerIdle, traceEvGoBlock, 0)
 
 		// Loop until the P dies and disassociates this
 		// worker (the P may later be reused, in which case
diff --git a/libgo/go/runtime/mgclarge.go b/libgo/go/runtime/mgclarge.go
index fe437bf..e7fa831 100644
--- a/libgo/go/runtime/mgclarge.go
+++ b/libgo/go/runtime/mgclarge.go
@@ -9,7 +9,7 @@
 // Large spans are the subject of this file. Spans consisting of less than
 // _MaxMHeapLists are held in lists of like sized spans. Larger spans
 // are held in a treap. See https://en.wikipedia.org/wiki/Treap or
-// http://faculty.washington.edu/aragon/pubs/rst89.pdf for an overview.
+// https://faculty.washington.edu/aragon/pubs/rst89.pdf for an overview.
 // sema.go also holds an implementation of a treap.
 //
 // Each treapNode holds a single span. The treap is sorted by page size
@@ -43,7 +43,7 @@ type treapNode struct {
 	parent    *treapNode // direct parent of this node, nil if root
 	npagesKey uintptr    // number of pages in spanKey, used as primary sort key
 	spanKey   *mspan     // span of size npagesKey, used as secondary sort key
-	priority  uint32     // random number used by treap algorithm keep tree probablistically balanced
+	priority  uint32     // random number used by treap algorithm to keep tree probabilistically balanced
 }
 
 func (t *treapNode) init() {
@@ -137,7 +137,7 @@ func (root *mTreap) insert(span *mspan) {
 	// npagesKeys, it is kept balanced on average by maintaining a heap ordering
 	// on the priority: s.priority <= both s.right.priority and s.right.priority.
 	// https://en.wikipedia.org/wiki/Treap
-	// http://faculty.washington.edu/aragon/pubs/rst89.pdf
+	// https://faculty.washington.edu/aragon/pubs/rst89.pdf
 
 	t := (*treapNode)(mheap_.treapalloc.alloc())
 	t.init()
diff --git a/libgo/go/runtime/mgcmark.go b/libgo/go/runtime/mgcmark.go
index 7297fcb..88cae41 100644
--- a/libgo/go/runtime/mgcmark.go
+++ b/libgo/go/runtime/mgcmark.go
@@ -232,7 +232,7 @@ func markroot(gcw *gcWork, i uint32) {
 			selfScan := gp == userG && readgstatus(userG) == _Grunning
 			if selfScan {
 				casgstatus(userG, _Grunning, _Gwaiting)
-				userG.waitreason = "garbage collection scan"
+				userG.waitreason = waitReasonGarbageCollectionScan
 			}
 
 			// TODO: scang blocks until gp's stack has
@@ -467,7 +467,7 @@ func gcAssistAlloc1(gp *g, scanWork int64) {
 		// store that clears it but an atomic check in every malloc
 		// would be a performance hit.
 		// Instead we recheck it here on the non-preemptable system
-		// stack to determine if we should preform an assist.
+		// stack to determine if we should perform an assist.
 
 		// GC is done, so ignore any remaining debt.
 		gp.gcAssistBytes = 0
@@ -486,7 +486,7 @@ func gcAssistAlloc1(gp *g, scanWork int64) {
 
 	// gcDrainN requires the caller to be preemptible.
 	casgstatus(gp, _Grunning, _Gwaiting)
-	gp.waitreason = "GC assist marking"
+	gp.waitreason = waitReasonGCAssistMarking
 
 	// drain own cached work first in the hopes that it
 	// will be more cache friendly.
@@ -585,7 +585,7 @@ func gcParkAssist() bool {
 		return false
 	}
 	// Park.
-	goparkunlock(&work.assistQueue.lock, "GC assist wait", traceEvGoBlockGC, 2)
+	goparkunlock(&work.assistQueue.lock, waitReasonGCAssistWait, traceEvGoBlockGC, 2)
 	return true
 }
 
@@ -934,9 +934,6 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) {
 	b := b0
 	n := n0
 
-	arena_start := mheap_.arena_start
-	arena_used := mheap_.arena_used
-
 	for i := uintptr(0); i < n; {
 		// Find bits for the next word.
 		bits := uint32(*addb(ptrmask, i/(sys.PtrSize*8)))
@@ -948,9 +945,9 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) {
 			if bits&1 != 0 {
 				// Same work as in scanobject; see comments there.
 				obj := *(*uintptr)(unsafe.Pointer(b + i))
-				if obj != 0 && arena_start <= obj && obj < arena_used {
-					if obj, hbits, span, objIndex := heapBitsForObject(obj, b, i, false); obj != 0 {
-						greyobject(obj, b, i, hbits, span, gcw, objIndex, false)
+				if obj != 0 {
+					if obj, span, objIndex := findObject(obj, b, i, false); obj != 0 {
+						greyobject(obj, b, i, span, gcw, objIndex, false)
 					}
 				}
 			}
@@ -967,18 +964,6 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) {
 //
 //go:nowritebarrier
 func scanobject(b uintptr, gcw *gcWork) {
-	// Note that arena_used may change concurrently during
-	// scanobject and hence scanobject may encounter a pointer to
-	// a newly allocated heap object that is *not* in
-	// [start,used). It will not mark this object; however, we
-	// know that it was just installed by a mutator, which means
-	// that mutator will execute a write barrier and take care of
-	// marking it. This is even more pronounced on relaxed memory
-	// architectures since we access arena_used without barriers
-	// or synchronization, but the same logic applies.
-	arena_start := mheap_.arena_start
-	arena_used := mheap_.arena_used
-
 	// Find the bits for b and the size of the object at b.
 	//
 	// b is either the beginning of an object, in which case this
@@ -1052,11 +1037,19 @@ func scanobject(b uintptr, gcw *gcWork) {
 		obj := *(*uintptr)(unsafe.Pointer(b + i))
 
 		// At this point we have extracted the next potential pointer.
-		// Check if it points into heap and not back at the current object.
-		if obj != 0 && arena_start <= obj && obj < arena_used && obj-b >= n {
-			// Mark the object.
-			if obj, hbits, span, objIndex := heapBitsForObject(obj, b, i, false); obj != 0 {
-				greyobject(obj, b, i, hbits, span, gcw, objIndex, false)
+		// Quickly filter out nil and pointers back to the current object.
+		if obj != 0 && obj-b >= n {
+			// Test if obj points into the Go heap and, if so,
+			// mark the object.
+			//
+			// Note that it's possible for findObject to
+			// fail if obj points to a just-allocated heap
+			// object because of a race with growing the
+			// heap. In this case, we know the object was
+			// just allocated and hence will be marked by
+			// allocation itself.
+			if obj, span, objIndex := findObject(obj, b, i, false); obj != 0 {
+				greyobject(obj, b, i, span, gcw, objIndex, false)
 			}
 		}
 	}
@@ -1071,16 +1064,11 @@ func scanobject(b uintptr, gcw *gcWork) {
 // scanblock, but we scan the stack conservatively, so there is no
 // bitmask of pointers.
 func scanstackblock(b, n uintptr, gcw *gcWork) {
-	arena_start := mheap_.arena_start
-	arena_used := mheap_.arena_used
-
 	for i := uintptr(0); i < n; i += sys.PtrSize {
 		// Same work as in scanobject; see comments there.
 		obj := *(*uintptr)(unsafe.Pointer(b + i))
-		if obj != 0 && arena_start <= obj && obj < arena_used {
-			if obj, hbits, span, objIndex := heapBitsForObject(obj, b, i, true); obj != 0 {
-				greyobject(obj, b, i, hbits, span, gcw, objIndex, true)
-			}
+		if obj, span, objIndex := findObject(obj, b, i, true); obj != 0 {
+			greyobject(obj, b, i, span, gcw, objIndex, true)
 		}
 	}
 }
@@ -1090,11 +1078,9 @@ func scanstackblock(b, n uintptr, gcw *gcWork) {
 // Preemption must be disabled.
 //go:nowritebarrier
 func shade(b uintptr) {
-	// shade can be called to shade a pointer found on the stack,
-	// so pass forStack as true to heapBitsForObject and greyobject.
-	if obj, hbits, span, objIndex := heapBitsForObject(b, 0, 0, true); obj != 0 {
+	if obj, span, objIndex := findObject(b, 0, 0, true); obj != 0 {
 		gcw := &getg().m.p.ptr().gcw
-		greyobject(obj, 0, 0, hbits, span, gcw, objIndex, true)
+		greyobject(obj, 0, 0, span, gcw, objIndex, true)
 		if gcphase == _GCmarktermination || gcBlackenPromptly {
 			// Ps aren't allowed to cache work during mark
 			// termination.
@@ -1110,7 +1096,7 @@ func shade(b uintptr) {
 // See also wbBufFlush1, which partially duplicates this logic.
 //
 //go:nowritebarrierrec
-func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork, objIndex uintptr, forStack bool) {
+func greyobject(obj, base, off uintptr, span *mspan, gcw *gcWork, objIndex uintptr, forStack bool) {
 	// obj should be start of allocation, and so must be at least pointer-aligned.
 	if obj&(sys.PtrSize-1) != 0 {
 		throw("greyobject: obj not pointer-aligned")
@@ -1139,6 +1125,7 @@ func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork
 			getg().m.traceback = 2
 			throw("checkmark found unmarked object")
 		}
+		hbits := heapBitsForAddr(obj)
 		if hbits.isCheckmarked(span.elemsize) {
 			return
 		}
@@ -1190,15 +1177,8 @@ func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork
 // gcDumpObject dumps the contents of obj for debugging and marks the
 // field at byte offset off in obj.
 func gcDumpObject(label string, obj, off uintptr) {
-	if obj < mheap_.arena_start || obj >= mheap_.arena_used {
-		print(label, "=", hex(obj), " is not in the Go heap\n")
-		return
-	}
-	k := obj >> _PageShift
-	x := k
-	x -= mheap_.arena_start >> _PageShift
-	s := mheap_.spans[x]
-	print(label, "=", hex(obj), " k=", hex(k))
+	s := spanOf(obj)
+	print(label, "=", hex(obj))
 	if s == nil {
 		print(" s=nil\n")
 		return
@@ -1272,9 +1252,9 @@ func gcMarkTinyAllocs() {
 		if c == nil || c.tiny == 0 {
 			continue
 		}
-		_, hbits, span, objIndex := heapBitsForObject(c.tiny, 0, 0, false)
+		_, span, objIndex := findObject(c.tiny, 0, 0, false)
 		gcw := &p.gcw
-		greyobject(c.tiny, 0, 0, hbits, span, gcw, objIndex, false)
+		greyobject(c.tiny, 0, 0, span, gcw, objIndex, false)
 		if gcBlackenPromptly {
 			gcw.dispose()
 		}
@@ -1309,7 +1289,7 @@ func initCheckmarks() {
 	useCheckmark = true
 	for _, s := range mheap_.allspans {
 		if s.state == _MSpanInUse {
-			heapBitsForSpan(s.base()).initCheckmarkSpan(s.layout())
+			heapBitsForAddr(s.base()).initCheckmarkSpan(s.layout())
 		}
 	}
 }
@@ -1318,7 +1298,7 @@ func clearCheckmarks() {
 	useCheckmark = false
 	for _, s := range mheap_.allspans {
 		if s.state == _MSpanInUse {
-			heapBitsForSpan(s.base()).clearCheckmarkSpan(s.layout())
+			heapBitsForAddr(s.base()).clearCheckmarkSpan(s.layout())
 		}
 	}
 }
diff --git a/libgo/go/runtime/mgcsweep.go b/libgo/go/runtime/mgcsweep.go
index d6be349..39dd54e 100644
--- a/libgo/go/runtime/mgcsweep.go
+++ b/libgo/go/runtime/mgcsweep.go
@@ -51,7 +51,7 @@ func bgsweep(c chan int) {
 	lock(&sweep.lock)
 	sweep.parked = true
 	c <- 1
-	goparkunlock(&sweep.lock, "GC sweep wait", traceEvGoBlock, 1)
+	goparkunlock(&sweep.lock, waitReasonGCSweepWait, traceEvGoBlock, 1)
 
 	for {
 		for gosweepone() != ^uintptr(0) {
@@ -70,7 +70,7 @@ func bgsweep(c chan int) {
 			continue
 		}
 		sweep.parked = true
-		goparkunlock(&sweep.lock, "GC sweep wait", traceEvGoBlock, 1)
+		goparkunlock(&sweep.lock, waitReasonGCSweepWait, traceEvGoBlock, 1)
 	}
 }
 
diff --git a/libgo/go/runtime/mgcwork.go b/libgo/go/runtime/mgcwork.go
index c6634fc..99771e2 100644
--- a/libgo/go/runtime/mgcwork.go
+++ b/libgo/go/runtime/mgcwork.go
@@ -400,6 +400,7 @@ func getempty() *workbuf {
 		for i := uintptr(0); i+_WorkbufSize <= workbufAlloc; i += _WorkbufSize {
 			newb := (*workbuf)(unsafe.Pointer(s.base() + i))
 			newb.nobj = 0
+			lfnodeValidate(&newb.node)
 			if i == 0 {
 				b = newb
 			} else {
diff --git a/libgo/go/runtime/mheap.go b/libgo/go/runtime/mheap.go
index d971bfe..65622f4 100644
--- a/libgo/go/runtime/mheap.go
+++ b/libgo/go/runtime/mheap.go
@@ -50,23 +50,6 @@ type mheap struct {
 	// access (since that may free the backing store).
 	allspans []*mspan // all spans out there
 
-	// spans is a lookup table to map virtual address page IDs to *mspan.
-	// For allocated spans, their pages map to the span itself.
-	// For free spans, only the lowest and highest pages map to the span itself.
-	// Internal pages map to an arbitrary span.
-	// For pages that have never been allocated, spans entries are nil.
-	//
-	// Modifications are protected by mheap.lock. Reads can be
-	// performed without locking, but ONLY from indexes that are
-	// known to contain in-use or stack spans. This means there
-	// must not be a safe-point between establishing that an
-	// address is live and looking it up in the spans array.
-	//
-	// This is backed by a reserved region of the address space so
-	// it can grow without moving. The memory up to len(spans) is
-	// mapped. cap(spans) indicates the total reserved memory.
-	spans []*mspan
-
 	// sweepSpans contains two mspan stacks: one of swept in-use
 	// spans, and one of unswept in-use spans. These two trade
 	// roles on each GC cycle. Since the sweepgen increases by 2
@@ -78,7 +61,7 @@ type mheap struct {
 	// on the swept stack.
 	sweepSpans [2]gcSweepBuf
 
-	_ uint32 // align uint64 fields on 32-bit for atomics
+	//_ uint32 // align uint64 fields on 32-bit for atomics
 
 	// Proportional sweep
 	//
@@ -113,36 +96,44 @@ type mheap struct {
 	nlargefree  uint64                  // number of frees for large objects (>maxsmallsize)
 	nsmallfree  [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize)
 
-	// range of addresses we might see in the heap
-	bitmap        uintptr // Points to one byte past the end of the bitmap
-	bitmap_mapped uintptr
-
-	// The arena_* fields indicate the addresses of the Go heap.
+	// arenas is the heap arena map. It points to the metadata for
+	// the heap for every arena frame of the entire usable virtual
+	// address space.
+	//
+	// Use arenaIndex to compute indexes into this array.
 	//
-	// The maximum range of the Go heap is
-	// [arena_start, arena_start+_MaxMem+1).
+	// For regions of the address space that are not backed by the
+	// Go heap, the arena map contains nil.
 	//
-	// The range of the current Go heap is
-	// [arena_start, arena_used). Parts of this range may not be
-	// mapped, but the metadata structures are always mapped for
-	// the full range.
-	arena_start uintptr
-	arena_used  uintptr // Set with setArenaUsed.
-
-	// The heap is grown using a linear allocator that allocates
-	// from the block [arena_alloc, arena_end). arena_alloc is
-	// often, but *not always* equal to arena_used.
-	arena_alloc uintptr
-	arena_end   uintptr
-
-	// arena_reserved indicates that the memory [arena_alloc,
-	// arena_end) is reserved (e.g., mapped PROT_NONE). If this is
-	// false, we have to be careful not to clobber existing
-	// mappings here. If this is true, then we own the mapping
-	// here and *must* clobber it to use it.
-	arena_reserved bool
-
-	_ uint32 // ensure 64-bit alignment
+	// Modifications are protected by mheap_.lock. Reads can be
+	// performed without locking; however, a given entry can
+	// transition from nil to non-nil at any time when the lock
+	// isn't held. (Entries never transitions back to nil.)
+	//
+	// In general, this is a two-level mapping consisting of an L1
+	// map and possibly many L2 maps. This saves space when there
+	// are a huge number of arena frames. However, on many
+	// platforms (even 64-bit), arenaL1Bits is 0, making this
+	// effectively a single-level map. In this case, arenas[0]
+	// will never be nil.
+	arenas [1 << arenaL1Bits]*[1 << arenaL2Bits]*heapArena
+
+	// heapArenaAlloc is pre-reserved space for allocating heapArena
+	// objects. This is only used on 32-bit, where we pre-reserve
+	// this space to avoid interleaving it with the heap itself.
+	heapArenaAlloc linearAlloc
+
+	// arenaHints is a list of addresses at which to attempt to
+	// add more heap arenas. This is initially populated with a
+	// set of general hint addresses, and grown with the bounds of
+	// actual heap arena ranges.
+	arenaHints *arenaHint
+
+	// arena is a pre-reserved space for allocating heap arenas
+	// (the actual arenas). This is only used on 32-bit.
+	arena linearAlloc
+
+	//_ uint32 // ensure 64-bit alignment of central
 
 	// central free lists for small size classes.
 	// the padding makes sure that the MCentrals are
@@ -160,12 +151,51 @@ type mheap struct {
 	specialfinalizeralloc fixalloc // allocator for specialfinalizer*
 	specialprofilealloc   fixalloc // allocator for specialprofile*
 	speciallock           mutex    // lock for special record allocators.
+	arenaHintAlloc        fixalloc // allocator for arenaHints
 
 	unused *specialfinalizer // never set, just here to force the specialfinalizer type into DWARF
 }
 
 var mheap_ mheap
 
+// A heapArena stores metadata for a heap arena. heapArenas are stored
+// outside of the Go heap and accessed via the mheap_.arenas index.
+//
+// This gets allocated directly from the OS, so ideally it should be a
+// multiple of the system page size. For example, avoid adding small
+// fields.
+//
+//go:notinheap
+type heapArena struct {
+	// bitmap stores the pointer/scalar bitmap for the words in
+	// this arena. See mbitmap.go for a description. Use the
+	// heapBits type to access this.
+	bitmap [heapArenaBitmapBytes]byte
+
+	// spans maps from virtual address page ID within this arena to *mspan.
+	// For allocated spans, their pages map to the span itself.
+	// For free spans, only the lowest and highest pages map to the span itself.
+	// Internal pages map to an arbitrary span.
+	// For pages that have never been allocated, spans entries are nil.
+	//
+	// Modifications are protected by mheap.lock. Reads can be
+	// performed without locking, but ONLY from indexes that are
+	// known to contain in-use or stack spans. This means there
+	// must not be a safe-point between establishing that an
+	// address is live and looking it up in the spans array.
+	spans [pagesPerArena]*mspan
+}
+
+// arenaHint is a hint for where to grow the heap arenas. See
+// mheap_.arenaHints.
+//
+//go:notinheap
+type arenaHint struct {
+	addr uintptr
+	down bool
+	next *arenaHint
+}
+
 // An MSpan is a run of pages.
 //
 // When a MSpan is in the heap free list, state == MSpanFree
@@ -384,21 +414,55 @@ func (sc spanClass) noscan() bool {
 	return sc&1 != 0
 }
 
+// arenaIndex returns the index into mheap_.arenas of the arena
+// containing metadata for p. This index combines of an index into the
+// L1 map and an index into the L2 map and should be used as
+// mheap_.arenas[ai.l1()][ai.l2()].
+//
+// If p is outside the range of valid heap addresses, either l1() or
+// l2() will be out of bounds.
+//
+// It is nosplit because it's called by spanOf and several other
+// nosplit functions.
+//
+//go:nosplit
+func arenaIndex(p uintptr) arenaIdx {
+	return arenaIdx((p + arenaBaseOffset) / heapArenaBytes)
+}
+
+// arenaBase returns the low address of the region covered by heap
+// arena i.
+func arenaBase(i arenaIdx) uintptr {
+	return uintptr(i)*heapArenaBytes - arenaBaseOffset
+}
+
+type arenaIdx uint
+
+func (i arenaIdx) l1() uint {
+	if arenaL1Bits == 0 {
+		// Let the compiler optimize this away if there's no
+		// L1 map.
+		return 0
+	} else {
+		return uint(i) >> arenaL1Shift
+	}
+}
+
+func (i arenaIdx) l2() uint {
+	if arenaL1Bits == 0 {
+		return uint(i)
+	} else {
+		return uint(i) & (1<<arenaL2Bits - 1)
+	}
+}
+
 // inheap reports whether b is a pointer into a (potentially dead) heap object.
 // It returns false for pointers into _MSpanManual spans.
 // Non-preemptible because it is used by write barriers.
 //go:nowritebarrier
 //go:nosplit
 func inheap(b uintptr) bool {
-	if b == 0 || b < mheap_.arena_start || b >= mheap_.arena_used {
-		return false
-	}
-	// Not a beginning of a block, consult span table to find the block beginning.
-	s := mheap_.spans[(b-mheap_.arena_start)>>_PageShift]
-	if s == nil || b < s.base() || b >= s.limit || s.state != mSpanInUse {
-		return false
-	}
-	return true
+	return spanOfHeap(b) != nil
 }
 
 // inHeapOrStack is a variant of inheap that returns true for pointers
@@ -407,11 +471,7 @@ func inheap(b uintptr) bool {
 //go:nowritebarrier
 //go:nosplit
 func inHeapOrStack(b uintptr) bool {
-	if b == 0 || b < mheap_.arena_start || b >= mheap_.arena_used {
-		return false
-	}
-	// Not a beginning of a block, consult span table to find the block beginning.
-	s := mheap_.spans[(b-mheap_.arena_start)>>_PageShift]
+	s := spanOf(b)
 	if s == nil || b < s.base() {
 		return false
 	}
@@ -423,81 +483,81 @@ func inHeapOrStack(b uintptr) bool {
 	}
 }
 
-// TODO: spanOf and spanOfUnchecked are open-coded in a lot of places.
-// Use the functions instead.
-
-// spanOf returns the span of p. If p does not point into the heap or
-// no span contains p, spanOf returns nil.
+// spanOf returns the span of p. If p does not point into the heap
+// arena or no span has ever contained p, spanOf returns nil.
+//
+// If p does not point to allocated memory, this may return a non-nil
+// span that does *not* contain p. If this is a possibility, the
+// caller should either call spanOfHeap or check the span bounds
+// explicitly.
+//
+// Must be nosplit because it has callers that are nosplit.
+//
+//go:nosplit
 func spanOf(p uintptr) *mspan {
-	if p == 0 || p < mheap_.arena_start || p >= mheap_.arena_used {
+	// This function looks big, but we use a lot of constant
+	// folding around arenaL1Bits to get it under the inlining
+	// budget. Also, many of the checks here are safety checks
+	// that Go needs to do anyway, so the generated code is quite
+	// short.
+	ri := arenaIndex(p)
+	if arenaL1Bits == 0 {
+		// If there's no L1, then ri.l1() can't be out of bounds but ri.l2() can.
+		if ri.l2() >= uint(len(mheap_.arenas[0])) {
+			return nil
+		}
+	} else {
+		// If there's an L1, then ri.l1() can be out of bounds but ri.l2() can't.
+		if ri.l1() >= uint(len(mheap_.arenas)) {
+			return nil
+		}
+	}
+	l2 := mheap_.arenas[ri.l1()]
+	if arenaL1Bits != 0 && l2 == nil { // Should never happen if there's no L1.
 		return nil
 	}
-	return spanOfUnchecked(p)
+	ha := l2[ri.l2()]
+	if ha == nil {
+		return nil
+	}
+	return ha.spans[(p/pageSize)%pagesPerArena]
 }
 
 // spanOfUnchecked is equivalent to spanOf, but the caller must ensure
-// that p points into the heap (that is, mheap_.arena_start <= p <
-// mheap_.arena_used).
+// that p points into an allocated heap arena.
+//
+// Must be nosplit because it has callers that are nosplit.
+//
+//go:nosplit
 func spanOfUnchecked(p uintptr) *mspan {
-	return mheap_.spans[(p-mheap_.arena_start)>>_PageShift]
+	ai := arenaIndex(p)
+	return mheap_.arenas[ai.l1()][ai.l2()].spans[(p/pageSize)%pagesPerArena]
 }
 
-func mlookup(v uintptr, base *uintptr, size *uintptr, sp **mspan) int32 {
-	_g_ := getg()
-
-	_g_.m.mcache.local_nlookup++
-	if sys.PtrSize == 4 && _g_.m.mcache.local_nlookup >= 1<<30 {
-		// purge cache stats to prevent overflow
-		lock(&mheap_.lock)
-		purgecachedstats(_g_.m.mcache)
-		unlock(&mheap_.lock)
-	}
-
-	s := mheap_.lookupMaybe(unsafe.Pointer(v))
-	if sp != nil {
-		*sp = s
-	}
-	if s == nil {
-		if base != nil {
-			*base = 0
-		}
-		if size != nil {
-			*size = 0
-		}
-		return 0
-	}
-
-	p := s.base()
-	if s.spanclass.sizeclass() == 0 {
-		// Large object.
-		if base != nil {
-			*base = p
-		}
-		if size != nil {
-			*size = s.npages << _PageShift
-		}
-		return 1
-	}
-
-	n := s.elemsize
-	if base != nil {
-		i := (v - p) / n
-		*base = p + i*n
-	}
-	if size != nil {
-		*size = n
+// spanOfHeap is like spanOf, but returns nil if p does not point to a
+// heap object.
+//
+// Must be nosplit because it has callers that are nosplit.
+//
+//go:nosplit
+func spanOfHeap(p uintptr) *mspan {
+	s := spanOf(p)
+	// If p is not allocated, it may point to a stale span, so we
+	// have to check the span's bounds and state.
+	if s == nil || p < s.base() || p >= s.limit || s.state != mSpanInUse {
+		return nil
 	}
-
-	return 1
+	return s
 }
 
 // Initialize the heap.
-func (h *mheap) init(spansStart, spansBytes uintptr) {
+func (h *mheap) init() {
 	h.treapalloc.init(unsafe.Sizeof(treapNode{}), nil, nil, &memstats.other_sys)
 	h.spanalloc.init(unsafe.Sizeof(mspan{}), recordspan, unsafe.Pointer(h), &memstats.mspan_sys)
 	h.cachealloc.init(unsafe.Sizeof(mcache{}), nil, nil, &memstats.mcache_sys)
 	h.specialfinalizeralloc.init(unsafe.Sizeof(specialfinalizer{}), nil, nil, &memstats.other_sys)
 	h.specialprofilealloc.init(unsafe.Sizeof(specialprofile{}), nil, nil, &memstats.other_sys)
+	h.arenaHintAlloc.init(unsafe.Sizeof(arenaHint{}), nil, nil, &memstats.other_sys)
 
 	// Don't zero mspan allocations. Background sweeping can
 	// inspect a span concurrently with allocating it, so it's
@@ -518,60 +578,6 @@ func (h *mheap) init(spansStart, spansBytes uintptr) {
 	for i := range h.central {
 		h.central[i].mcentral.init(spanClass(i))
 	}
-
-	sp := (*slice)(unsafe.Pointer(&h.spans))
-	sp.array = unsafe.Pointer(spansStart)
-	sp.len = 0
-	sp.cap = int(spansBytes / sys.PtrSize)
-
-	// Map metadata structures. But don't map race detector memory
-	// since we're not actually growing the arena here (and TSAN
-	// gets mad if you map 0 bytes).
-	h.setArenaUsed(h.arena_used, false)
-}
-
-// setArenaUsed extends the usable arena to address arena_used and
-// maps auxiliary VM regions for any newly usable arena space.
-//
-// racemap indicates that this memory should be managed by the race
-// detector. racemap should be true unless this is covering a VM hole.
-func (h *mheap) setArenaUsed(arena_used uintptr, racemap bool) {
-	// Map auxiliary structures *before* h.arena_used is updated.
-	// Waiting to update arena_used until after the memory has been mapped
-	// avoids faults when other threads try access these regions immediately
-	// after observing the change to arena_used.
-
-	// Map the bitmap.
-	h.mapBits(arena_used)
-
-	// Map spans array.
-	h.mapSpans(arena_used)
-
-	// Tell the race detector about the new heap memory.
-	if racemap && raceenabled {
-		racemapshadow(unsafe.Pointer(h.arena_used), arena_used-h.arena_used)
-	}
-
-	h.arena_used = arena_used
-}
-
-// mapSpans makes sure that the spans are mapped
-// up to the new value of arena_used.
-//
-// Don't call this directly. Call mheap.setArenaUsed.
-func (h *mheap) mapSpans(arena_used uintptr) {
-	// Map spans array, PageSize at a time.
-	n := arena_used
-	n -= h.arena_start
-	n = n / _PageSize * sys.PtrSize
-	n = round(n, physPageSize)
-	need := n / unsafe.Sizeof(h.spans[0])
-	have := uintptr(len(h.spans))
-	if have >= need {
-		return
-	}
-	h.spans = h.spans[:need]
-	sysMap(unsafe.Pointer(&h.spans[have]), (need-have)*unsafe.Sizeof(h.spans[0]), h.arena_reserved, &memstats.other_sys)
 }
 
 // Sweeps spans in list until reclaims at least npages into heap.
@@ -598,7 +604,7 @@ retry:
 			goto retry
 		}
 		if s.sweepgen == sg-1 {
-			// the span is being sweept by background sweeper, skip
+			// the span is being swept by background sweeper, skip
 			continue
 		}
 		// already swept empty span,
@@ -785,7 +791,7 @@ func (h *mheap) allocManual(npage uintptr, stat *uint64) *mspan {
 		s.nelems = 0
 		s.elemsize = 0
 		s.limit = s.base() + s.npages<<_PageShift
-		// Manually manged memory doesn't count toward heap_sys.
+		// Manually managed memory doesn't count toward heap_sys.
 		memstats.heap_sys -= uint64(s.npages << _PageShift)
 	}
 
@@ -795,6 +801,28 @@ func (h *mheap) allocManual(npage uintptr, stat *uint64) *mspan {
 	return s
 }
 
+// setSpan modifies the span map so spanOf(base) is s.
+func (h *mheap) setSpan(base uintptr, s *mspan) {
+	ai := arenaIndex(base)
+	h.arenas[ai.l1()][ai.l2()].spans[(base/pageSize)%pagesPerArena] = s
+}
+
+// setSpans modifies the span map so [spanOf(base), spanOf(base+npage*pageSize))
+// is s.
+func (h *mheap) setSpans(base, npage uintptr, s *mspan) {
+	p := base / pageSize
+	ai := arenaIndex(base)
+	ha := h.arenas[ai.l1()][ai.l2()]
+	for n := uintptr(0); n < npage; n++ {
+		i := (p + n) % pagesPerArena
+		if i == 0 {
+			ai = arenaIndex(base + n*pageSize)
+			ha = h.arenas[ai.l1()][ai.l2()]
+		}
+		ha.spans[i] = s
+	}
+}
+
 // Allocates a span of the given size.  h must be locked.
 // The returned span has been removed from the
 // free list, but its state is still MSpanFree.
@@ -842,12 +870,9 @@ HaveSpan:
 		t := (*mspan)(h.spanalloc.alloc())
 		t.init(s.base()+npage<<_PageShift, s.npages-npage)
 		s.npages = npage
-		p := (t.base() - h.arena_start) >> _PageShift
-		if p > 0 {
-			h.spans[p-1] = s
-		}
-		h.spans[p] = t
-		h.spans[p+t.npages-1] = t
+		h.setSpan(t.base()-1, s)
+		h.setSpan(t.base(), t)
+		h.setSpan(t.base()+t.npages*pageSize-1, t)
 		t.needzero = s.needzero
 		s.state = _MSpanManual // prevent coalescing with s
 		t.state = _MSpanManual
@@ -856,10 +881,7 @@ HaveSpan:
 	}
 	s.unusedsince = 0
 
-	p := (s.base() - h.arena_start) >> _PageShift
-	for n := uintptr(0); n < npage; n++ {
-		h.spans[p+n] = s
-	}
+	h.setSpans(s.base(), npage, s)
 
 	*stat += uint64(npage << _PageShift)
 	memstats.heap_idle -= uint64(npage << _PageShift)
@@ -891,36 +913,18 @@ func (h *mheap) allocLarge(npage uintptr) *mspan {
 //
 // h must be locked.
 func (h *mheap) grow(npage uintptr) bool {
-	// Ask for a big chunk, to reduce the number of mappings
-	// the operating system needs to track; also amortizes
-	// the overhead of an operating system mapping.
-	// Allocate a multiple of 64kB.
-	npage = round(npage, (64<<10)/_PageSize)
 	ask := npage << _PageShift
-	if ask < _HeapAllocChunk {
-		ask = _HeapAllocChunk
-	}
-
-	v := h.sysAlloc(ask)
+	v, size := h.sysAlloc(ask)
 	if v == nil {
-		if ask > npage<<_PageShift {
-			ask = npage << _PageShift
-			v = h.sysAlloc(ask)
-		}
-		if v == nil {
-			print("runtime: out of memory: cannot allocate ", ask, "-byte block (", memstats.heap_sys, " in use)\n")
-			return false
-		}
+		print("runtime: out of memory: cannot allocate ", ask, "-byte block (", memstats.heap_sys, " in use)\n")
+		return false
 	}
 
 	// Create a fake "in use" span and free it, so that the
 	// right coalescing happens.
 	s := (*mspan)(h.spanalloc.alloc())
-	s.init(uintptr(v), ask>>_PageShift)
-	p := (s.base() - h.arena_start) >> _PageShift
-	for i := p; i < p+s.npages; i++ {
-		h.spans[i] = s
-	}
+	s.init(uintptr(v), size/pageSize)
+	h.setSpans(s.base(), s.npages, s)
 	atomic.Store(&s.sweepgen, h.sweepgen)
 	s.state = _MSpanInUse
 	h.pagesInUse += uint64(s.npages)
@@ -928,33 +932,6 @@ func (h *mheap) grow(npage uintptr) bool {
 	return true
 }
 
-// Look up the span at the given address.
-// Address is guaranteed to be in map
-// and is guaranteed to be start or end of span.
-func (h *mheap) lookup(v unsafe.Pointer) *mspan {
-	p := uintptr(v)
-	p -= h.arena_start
-	return h.spans[p>>_PageShift]
-}
-
-// Look up the span at the given address.
-// Address is *not* guaranteed to be in map
-// and may be anywhere in the span.
-// Map entries for the middle of a span are only
-// valid for allocated spans. Free spans may have
-// other garbage in their middles, so we have to
-// check for that.
-func (h *mheap) lookupMaybe(v unsafe.Pointer) *mspan {
-	if uintptr(v) < h.arena_start || uintptr(v) >= h.arena_used {
-		return nil
-	}
-	s := h.spans[(uintptr(v)-h.arena_start)>>_PageShift]
-	if s == nil || uintptr(v) < s.base() || uintptr(v) >= uintptr(unsafe.Pointer(s.limit)) || s.state != _MSpanInUse {
-		return nil
-	}
-	return s
-}
-
 // Free the span back into the heap.
 func (h *mheap) freeSpan(s *mspan, acct int32) {
 	systemstack(func() {
@@ -1039,46 +1016,38 @@ func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince i
 	s.npreleased = 0
 
 	// Coalesce with earlier, later spans.
-	p := (s.base() - h.arena_start) >> _PageShift
-	if p > 0 {
-		before := h.spans[p-1]
-		if before != nil && before.state == _MSpanFree {
-			// Now adjust s.
-			s.startAddr = before.startAddr
-			s.npages += before.npages
-			s.npreleased = before.npreleased // absorb released pages
-			s.needzero |= before.needzero
-			p -= before.npages
-			h.spans[p] = s
-			// The size is potentially changing so the treap needs to delete adjacent nodes and
-			// insert back as a combined node.
-			if h.isLargeSpan(before.npages) {
-				// We have a t, it is large so it has to be in the treap so we can remove it.
-				h.freelarge.removeSpan(before)
-			} else {
-				h.freeList(before.npages).remove(before)
-			}
-			before.state = _MSpanDead
-			h.spanalloc.free(unsafe.Pointer(before))
+	if before := spanOf(s.base() - 1); before != nil && before.state == _MSpanFree {
+		// Now adjust s.
+		s.startAddr = before.startAddr
+		s.npages += before.npages
+		s.npreleased = before.npreleased // absorb released pages
+		s.needzero |= before.needzero
+		h.setSpan(before.base(), s)
+		// The size is potentially changing so the treap needs to delete adjacent nodes and
+		// insert back as a combined node.
+		if h.isLargeSpan(before.npages) {
+			// We have a t, it is large so it has to be in the treap so we can remove it.
+			h.freelarge.removeSpan(before)
+		} else {
+			h.freeList(before.npages).remove(before)
 		}
+		before.state = _MSpanDead
+		h.spanalloc.free(unsafe.Pointer(before))
 	}
 
 	// Now check to see if next (greater addresses) span is free and can be coalesced.
-	if (p + s.npages) < uintptr(len(h.spans)) {
-		after := h.spans[p+s.npages]
-		if after != nil && after.state == _MSpanFree {
-			s.npages += after.npages
-			s.npreleased += after.npreleased
-			s.needzero |= after.needzero
-			h.spans[p+s.npages-1] = s
-			if h.isLargeSpan(after.npages) {
-				h.freelarge.removeSpan(after)
-			} else {
-				h.freeList(after.npages).remove(after)
-			}
-			after.state = _MSpanDead
-			h.spanalloc.free(unsafe.Pointer(after))
+	if after := spanOf(s.base() + s.npages*pageSize); after != nil && after.state == _MSpanFree {
+		s.npages += after.npages
+		s.npreleased += after.npreleased
+		s.needzero |= after.needzero
+		h.setSpan(s.base()+s.npages*pageSize-1, s)
+		if h.isLargeSpan(after.npages) {
+			h.freelarge.removeSpan(after)
+		} else {
+			h.freeList(after.npages).remove(after)
 		}
+		after.state = _MSpanDead
+		h.spanalloc.free(unsafe.Pointer(after))
 	}
 
 	// Insert s into appropriate list or treap.
@@ -1343,7 +1312,7 @@ type special struct {
 // (The add will fail only if a record with the same p and s->kind
 //  already exists.)
 func addspecial(p unsafe.Pointer, s *special) bool {
-	span := mheap_.lookupMaybe(p)
+	span := spanOfHeap(uintptr(p))
 	if span == nil {
 		throw("addspecial on invalid pointer")
 	}
@@ -1391,7 +1360,7 @@ func addspecial(p unsafe.Pointer, s *special) bool {
 // Returns the record if the record existed, nil otherwise.
 // The caller must FixAlloc_Free the result.
 func removespecial(p unsafe.Pointer, kind uint8) *special {
-	span := mheap_.lookupMaybe(p)
+	span := spanOfHeap(uintptr(p))
 	if span == nil {
 		throw("removespecial on invalid pointer")
 	}
@@ -1454,12 +1423,12 @@ func addfinalizer(p unsafe.Pointer, f *funcval, ft *functype, ot *ptrtype) bool
 		// situation where it's possible that markrootSpans
 		// has already run but mark termination hasn't yet.
 		if gcphase != _GCoff {
-			_, base, _ := findObject(p)
+			base, _, _ := findObject(uintptr(p), 0, 0, false)
 			mp := acquirem()
 			gcw := &mp.p.ptr().gcw
 			// Mark everything reachable from the object
 			// so it's retained for the finalizer.
-			scanobject(uintptr(base), gcw)
+			scanobject(base, gcw)
 			// Mark the finalizer itself, since the
 			// special isn't part of the GC'd heap.
 			scanblock(uintptr(unsafe.Pointer(&s.fn)), sys.PtrSize, &oneptrmask[0], gcw)
@@ -1643,7 +1612,7 @@ func newMarkBits(nelems uintptr) *gcBits {
 // to be used for this span's alloc bits.
 // newAllocBits is used to provide newly initialized spans
 // allocation bits. For spans not being initialized the
-// the mark bits are repurposed as allocation bits when
+// mark bits are repurposed as allocation bits when
 // the span is swept.
 func newAllocBits(nelems uintptr) *gcBits {
 	return newMarkBits(nelems)
diff --git a/libgo/go/runtime/mprof.go b/libgo/go/runtime/mprof.go
index f31c88c..2bbf37a 100644
--- a/libgo/go/runtime/mprof.go
+++ b/libgo/go/runtime/mprof.go
@@ -436,7 +436,7 @@ var mutexprofilerate uint64 // fraction sampled
 // reported. The previous rate is returned.
 //
 // To turn off profiling entirely, pass rate 0.
-// To just read the current rate, pass rate -1.
+// To just read the current rate, pass rate < 0.
 // (For n>1 the details of sampling may change.)
 func SetMutexProfileFraction(rate int) int {
 	if rate < 0 {
@@ -833,7 +833,7 @@ func tracealloc(p unsafe.Pointer, size uintptr, typ *_type) {
 	if typ == nil {
 		print("tracealloc(", p, ", ", hex(size), ")\n")
 	} else {
-		print("tracealloc(", p, ", ", hex(size), ", ", *typ.string, ")\n")
+		print("tracealloc(", p, ", ", hex(size), ", ", typ.string(), ")\n")
 	}
 	if gp.m.curg == nil || gp == gp.m.curg {
 		goroutineheader(gp)
diff --git a/libgo/go/runtime/msan/msan.go b/libgo/go/runtime/msan/msan.go
index b6ea3f0..c81577d 100644
--- a/libgo/go/runtime/msan/msan.go
+++ b/libgo/go/runtime/msan/msan.go
@@ -2,7 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build msan,linux,amd64
+// +build msan,linux
+// +build amd64 arm64
 
 package msan
 
diff --git a/libgo/go/runtime/mstats.go b/libgo/go/runtime/mstats.go
index 095a0de..f54ce9d 100644
--- a/libgo/go/runtime/mstats.go
+++ b/libgo/go/runtime/mstats.go
@@ -26,7 +26,7 @@ type mstats struct {
 	alloc       uint64 // bytes allocated and not yet freed
 	total_alloc uint64 // bytes allocated (even if freed)
 	sys         uint64 // bytes obtained from system (should be sum of xxx_sys below, no locking, approximate)
-	nlookup     uint64 // number of pointer lookups
+	nlookup     uint64 // number of pointer lookups (unused)
 	nmalloc     uint64 // number of mallocs
 	nfree       uint64 // number of frees
 
@@ -637,8 +637,6 @@ func purgecachedstats(c *mcache) {
 	c.local_scan = 0
 	memstats.tinyallocs += uint64(c.local_tinyallocs)
 	c.local_tinyallocs = 0
-	memstats.nlookup += uint64(c.local_nlookup)
-	c.local_nlookup = 0
 	h.largefree += uint64(c.local_largefree)
 	c.local_largefree = 0
 	h.nlargefree += uint64(c.local_nlargefree)
@@ -663,6 +661,9 @@ func purgecachedstats(c *mcache) {
 // overflow errors.
 //go:nosplit
 func mSysStatInc(sysStat *uint64, n uintptr) {
+	if sysStat == nil {
+		return
+	}
 	if sys.BigEndian {
 		atomic.Xadd64(sysStat, int64(n))
 		return
@@ -677,6 +678,9 @@ func mSysStatInc(sysStat *uint64, n uintptr) {
 // mSysStatInc apply.
 //go:nosplit
 func mSysStatDec(sysStat *uint64, n uintptr) {
+	if sysStat == nil {
+		return
+	}
 	if sys.BigEndian {
 		atomic.Xadd64(sysStat, -int64(n))
 		return
diff --git a/libgo/go/runtime/mwbbuf.go b/libgo/go/runtime/mwbbuf.go
index 7e88463..39d1370 100644
--- a/libgo/go/runtime/mwbbuf.go
+++ b/libgo/go/runtime/mwbbuf.go
@@ -5,6 +5,9 @@
 // This implements the write barrier buffer. The write barrier itself
 // is gcWriteBarrier and is implemented in assembly.
 //
+// See mbarrier.go for algorithmic details on the write barrier. This
+// file deals only with the buffer.
+//
 // The write barrier has a fast path and a slow path. The fast path
 // simply enqueues to a per-P write barrier buffer. It's written in
 // assembly and doesn't clobber any general purpose registers, so it
@@ -111,16 +114,21 @@ func (b *wbBuf) discard() {
 //     if !buf.putFast(old, new) {
 //         wbBufFlush(...)
 //     }
+//     ... actual memory write ...
 //
 // The arguments to wbBufFlush depend on whether the caller is doing
 // its own cgo pointer checks. If it is, then this can be
 // wbBufFlush(nil, 0). Otherwise, it must pass the slot address and
 // new.
 //
-// Since buf is a per-P resource, the caller must ensure there are no
-// preemption points while buf is in use.
+// The caller must ensure there are no preemption points during the
+// above sequence. There must be no preemption points while buf is in
+// use because it is a per-P resource. There must be no preemption
+// points between the buffer put and the write to memory because this
+// could allow a GC phase change, which could result in missed write
+// barriers.
 //
-// It must be nowritebarrierrec to because write barriers here would
+// putFast must be nowritebarrierrec to because write barriers here would
 // corrupt the write barrier buffer. It (and everything it calls, if
 // it called anything) has to be nosplit to avoid scheduling on to a
 // different P and a different buffer.
@@ -155,6 +163,13 @@ func wbBufFlush(dst *uintptr, src uintptr) {
 	// Note: Every possible return from this function must reset
 	// the buffer's next pointer to prevent buffer overflow.
 
+	// This *must not* modify its arguments because this
+	// function's argument slots do double duty in gcWriteBarrier
+	// as register spill slots. Currently, not modifying the
+	// arguments is sufficient to keep the spill slots unmodified
+	// (which seems unlikely to change since it costs little and
+	// helps with debugging).
+
 	if getg().m.dying > 0 {
 		// We're going down. Not much point in write barriers
 		// and this way we can allow write barriers in the
@@ -214,11 +229,18 @@ func wbBufFlush1(_p_ *p) {
 	//
 	// TODO: Should scanobject/scanblock just stuff pointers into
 	// the wbBuf? Then this would become the sole greying path.
+	//
+	// TODO: We could avoid shading any of the "new" pointers in
+	// the buffer if the stack has been shaded, or even avoid
+	// putting them in the buffer at all (which would double its
+	// capacity). This is slightly complicated with the buffer; we
+	// could track whether any un-shaded goroutine has used the
+	// buffer, or just track globally whether there are any
+	// un-shaded stacks and flush after each stack scan.
 	gcw := &_p_.gcw
 	pos := 0
-	arenaStart := mheap_.arena_start
 	for _, ptr := range ptrs {
-		if ptr < arenaStart {
+		if ptr < minLegalPointer {
 			// nil pointers are very common, especially
 			// for the "old" values. Filter out these and
 			// other "obvious" non-heap pointers ASAP.
@@ -227,11 +249,7 @@ func wbBufFlush1(_p_ *p) {
 			// path to reduce the rate of flushes?
 			continue
 		}
-		// TODO: This doesn't use hbits, so calling
-		// heapBitsForObject seems a little silly. We could
-		// easily separate this out since heapBitsForObject
-		// just calls heapBitsForAddr(obj) to get hbits.
-		obj, _, span, objIndex := heapBitsForObject(ptr, 0, 0, false)
+		obj, span, objIndex := findObject(ptr, 0, 0, false)
 		if obj == 0 {
 			continue
 		}
diff --git a/libgo/go/runtime/netpoll.go b/libgo/go/runtime/netpoll.go
index 3aeb1f6..ab3d14d 100644
--- a/libgo/go/runtime/netpoll.go
+++ b/libgo/go/runtime/netpoll.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows
+// +build aix darwin dragonfly freebsd js,wasm linux nacl netbsd openbsd solaris windows
 
 package runtime
 
@@ -366,7 +366,7 @@ func netpollblock(pd *pollDesc, mode int32, waitio bool) bool {
 	// this is necessary because runtime_pollUnblock/runtime_pollSetDeadline/deadlineimpl
 	// do the opposite: store to closing/rd/wd, membarrier, load of rg/wg
 	if waitio || netpollcheckerr(pd, mode) == 0 {
-		gopark(netpollblockcommit, unsafe.Pointer(gpp), "IO wait", traceEvGoBlockNet, 5)
+		gopark(netpollblockcommit, unsafe.Pointer(gpp), waitReasonIOWait, traceEvGoBlockNet, 5)
 	}
 	// be careful to not lose concurrent READY notification
 	old := atomic.Xchguintptr(gpp, 0)
diff --git a/libgo/go/runtime/netpoll_fake.go b/libgo/go/runtime/netpoll_fake.go
new file mode 100644
index 0000000..aab18dc
--- /dev/null
+++ b/libgo/go/runtime/netpoll_fake.go
@@ -0,0 +1,32 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Fake network poller for NaCl and wasm/js.
+// Should never be used, because NaCl and wasm/js network connections do not honor "SetNonblock".
+
+// +build nacl js,wasm
+
+package runtime
+
+func netpollinit() {
+}
+
+func netpolldescriptor() uintptr {
+	return ^uintptr(0)
+}
+
+func netpollopen(fd uintptr, pd *pollDesc) int32 {
+	return 0
+}
+
+func netpollclose(fd uintptr) int32 {
+	return 0
+}
+
+func netpollarm(pd *pollDesc, mode int) {
+}
+
+func netpoll(block bool) *g {
+	return nil
+}
diff --git a/libgo/go/runtime/netpoll_nacl.go b/libgo/go/runtime/netpoll_nacl.go
deleted file mode 100644
index dc5a55e..0000000
--- a/libgo/go/runtime/netpoll_nacl.go
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Fake network poller for NaCl.
-// Should never be used, because NaCl network connections do not honor "SetNonblock".
-
-package runtime
-
-func netpollinit() {
-}
-
-func netpolldescriptor() uintptr {
-	return ^uintptr(0)
-}
-
-func netpollopen(fd uintptr, pd *pollDesc) int32 {
-	return 0
-}
-
-func netpollclose(fd uintptr) int32 {
-	return 0
-}
-
-func netpollarm(pd *pollDesc, mode int) {
-}
-
-func netpoll(block bool) *g {
-	return nil
-}
diff --git a/libgo/go/runtime/os_darwin.go b/libgo/go/runtime/os_darwin.go
index ec92370..9597633 100644
--- a/libgo/go/runtime/os_darwin.go
+++ b/libgo/go/runtime/os_darwin.go
@@ -7,323 +7,61 @@ package runtime
 import "unsafe"
 
 type mOS struct {
-	machport uint32 // return address for mach ipc
-	waitsema uint32 // semaphore for parking on locks
+	initialized bool
+	mutex       pthreadmutex
+	cond        pthreadcond
+	count       int
 }
 
-//go:noescape
-//extern mach_msg_trap
-func mach_msg_trap(h unsafe.Pointer, op int32, send_size, rcv_size, rcv_name, timeout, notify uint32) int32
-
-//extern mach_reply_port
-func mach_reply_port() uint32
-
-//extern mach_task_self
-func mach_task_self() uint32
-
 func unimplemented(name string) {
 	println(name, "not implemented")
 	*(*int)(unsafe.Pointer(uintptr(1231))) = 1231
 }
 
 //go:nosplit
-func semawakeup(mp *m) {
-	mach_semrelease(mp.mos.waitsema)
-}
-
-//go:nosplit
 func semacreate(mp *m) {
-	if mp.mos.waitsema != 0 {
+	if mp.initialized {
 		return
 	}
-	systemstack(func() {
-		mp.mos.waitsema = mach_semcreate()
-	})
-}
-
-// Mach IPC, to get at semaphores
-// Definitions are in /usr/include/mach on a Mac.
-
-func macherror(r int32, fn string) {
-	print("mach error ", fn, ": ", r, "\n")
-	throw("mach error")
-}
-
-const _DebugMach = false
-
-var zerondr machndr
-
-func mach_msgh_bits(a, b uint32) uint32 {
-	return a | b<<8
-}
-
-func mach_msg(h *machheader, op int32, send_size, rcv_size, rcv_name, timeout, notify uint32) int32 {
-	// TODO: Loop on interrupt.
-	return mach_msg_trap(unsafe.Pointer(h), op, send_size, rcv_size, rcv_name, timeout, notify)
-}
-
-// Mach RPC (MIG)
-const (
-	_MinMachMsg = 48
-	_MachReply  = 100
-)
-
-type codemsg struct {
-	h    machheader
-	ndr  machndr
-	code int32
-}
-
-func machcall(h *machheader, maxsize int32, rxsize int32) int32 {
-	_g_ := getg()
-	port := _g_.m.mos.machport
-	if port == 0 {
-		port = mach_reply_port()
-		_g_.m.mos.machport = port
-	}
-
-	h.msgh_bits |= mach_msgh_bits(_MACH_MSG_TYPE_COPY_SEND, _MACH_MSG_TYPE_MAKE_SEND_ONCE)
-	h.msgh_local_port = port
-	h.msgh_reserved = 0
-	id := h.msgh_id
-
-	if _DebugMach {
-		p := (*[10000]unsafe.Pointer)(unsafe.Pointer(h))
-		print("send:\t")
-		var i uint32
-		for i = 0; i < h.msgh_size/uint32(unsafe.Sizeof(p[0])); i++ {
-			print(" ", p[i])
-			if i%8 == 7 {
-				print("\n\t")
-			}
-		}
-		if i%8 != 0 {
-			print("\n")
-		}
-	}
-	ret := mach_msg(h, _MACH_SEND_MSG|_MACH_RCV_MSG, h.msgh_size, uint32(maxsize), port, 0, 0)
-	if ret != 0 {
-		if _DebugMach {
-			print("mach_msg error ", ret, "\n")
-		}
-		return ret
-	}
-	if _DebugMach {
-		p := (*[10000]unsafe.Pointer)(unsafe.Pointer(h))
-		var i uint32
-		for i = 0; i < h.msgh_size/uint32(unsafe.Sizeof(p[0])); i++ {
-			print(" ", p[i])
-			if i%8 == 7 {
-				print("\n\t")
-			}
-		}
-		if i%8 != 0 {
-			print("\n")
-		}
-	}
-	if h.msgh_id != id+_MachReply {
-		if _DebugMach {
-			print("mach_msg _MachReply id mismatch ", h.msgh_id, " != ", id+_MachReply, "\n")
-		}
-		return -303 // MIG_REPLY_MISMATCH
-	}
-	// Look for a response giving the return value.
-	// Any call can send this back with an error,
-	// and some calls only have return values so they
-	// send it back on success too. I don't quite see how
-	// you know it's one of these and not the full response
-	// format, so just look if the message is right.
-	c := (*codemsg)(unsafe.Pointer(h))
-	if uintptr(h.msgh_size) == unsafe.Sizeof(*c) && h.msgh_bits&_MACH_MSGH_BITS_COMPLEX == 0 {
-		if _DebugMach {
-			print("mig result ", c.code, "\n")
-		}
-		return c.code
-	}
-	if h.msgh_size != uint32(rxsize) {
-		if _DebugMach {
-			print("mach_msg _MachReply size mismatch ", h.msgh_size, " != ", rxsize, "\n")
-		}
-		return -307 // MIG_ARRAY_TOO_LARGE
-	}
-	return 0
-}
-
-// Semaphores!
-
-const (
-	tmach_semcreate = 3418
-	rmach_semcreate = tmach_semcreate + _MachReply
-
-	tmach_semdestroy = 3419
-	rmach_semdestroy = tmach_semdestroy + _MachReply
-
-	_KERN_ABORTED             = 14
-	_KERN_OPERATION_TIMED_OUT = 49
-)
-
-type tmach_semcreatemsg struct {
-	h      machheader
-	ndr    machndr
-	policy int32
-	value  int32
-}
-
-type rmach_semcreatemsg struct {
-	h         machheader
-	body      machbody
-	semaphore machport
-}
-
-type tmach_semdestroymsg struct {
-	h         machheader
-	body      machbody
-	semaphore machport
-}
-
-func mach_semcreate() uint32 {
-	var m [256]uint8
-	tx := (*tmach_semcreatemsg)(unsafe.Pointer(&m))
-	rx := (*rmach_semcreatemsg)(unsafe.Pointer(&m))
-
-	tx.h.msgh_bits = 0
-	tx.h.msgh_size = uint32(unsafe.Sizeof(*tx))
-	tx.h.msgh_remote_port = mach_task_self()
-	tx.h.msgh_id = tmach_semcreate
-	tx.ndr = zerondr
-
-	tx.policy = 0 // 0 = SYNC_POLICY_FIFO
-	tx.value = 0
-
-	for {
-		r := machcall(&tx.h, int32(unsafe.Sizeof(m)), int32(unsafe.Sizeof(*rx)))
-		if r == 0 {
-			break
-		}
-		if r == _KERN_ABORTED { // interrupted
-			continue
-		}
-		macherror(r, "semaphore_create")
-	}
-	if rx.body.msgh_descriptor_count != 1 {
-		unimplemented("mach_semcreate desc count")
-	}
-	return rx.semaphore.name
-}
-
-func mach_semdestroy(sem uint32) {
-	var m [256]uint8
-	tx := (*tmach_semdestroymsg)(unsafe.Pointer(&m))
-
-	tx.h.msgh_bits = _MACH_MSGH_BITS_COMPLEX
-	tx.h.msgh_size = uint32(unsafe.Sizeof(*tx))
-	tx.h.msgh_remote_port = mach_task_self()
-	tx.h.msgh_id = tmach_semdestroy
-	tx.body.msgh_descriptor_count = 1
-	tx.semaphore.name = sem
-	tx.semaphore.disposition = _MACH_MSG_TYPE_MOVE_SEND
-	tx.semaphore._type = 0
-
-	for {
-		r := machcall(&tx.h, int32(unsafe.Sizeof(m)), 0)
-		if r == 0 {
-			break
-		}
-		if r == _KERN_ABORTED { // interrupted
-			continue
-		}
-		macherror(r, "semaphore_destroy")
-	}
-}
-
-//extern semaphore_wait
-func mach_semaphore_wait(sema uint32) int32
-
-//extern semaphore_timedwait
-func mach_semaphore_timedwait(sema, sec, nsec uint32) int32
-
-//extern semaphore_signal
-func mach_semaphore_signal(sema uint32) int32
-
-//extern semaphore_signal_all
-func mach_semaphore_signal_all(sema uint32) int32
-
-func semasleep1(ns int64) int32 {
-	_g_ := getg()
-
-	if ns >= 0 {
-		var nsecs int32
-		secs := timediv(ns, 1000000000, &nsecs)
-		r := mach_semaphore_timedwait(_g_.m.mos.waitsema, uint32(secs), uint32(nsecs))
-		if r == _KERN_ABORTED || r == _KERN_OPERATION_TIMED_OUT {
-			return -1
-		}
-		if r != 0 {
-			macherror(r, "semaphore_wait")
-		}
-		return 0
+	mp.initialized = true
+	if err := pthread_mutex_init(&mp.mutex, nil); err != 0 {
+		throw("pthread_mutex_init")
 	}
-
-	for {
-		r := mach_semaphore_wait(_g_.m.mos.waitsema)
-		if r == 0 {
-			break
-		}
-		// Note: We don't know how this call (with no timeout) can get _KERN_OPERATION_TIMED_OUT,
-		// but it does reliably, though at a very low rate, on OS X 10.8, 10.9, 10.10, and 10.11.
-		// See golang.org/issue/17161.
-		if r == _KERN_ABORTED || r == _KERN_OPERATION_TIMED_OUT { // interrupted
-			continue
-		}
-		macherror(r, "semaphore_wait")
+	if err := pthread_cond_init(&mp.cond, nil); err != 0 {
+		throw("pthread_cond_init")
 	}
-	return 0
 }
 
 //go:nosplit
 func semasleep(ns int64) int32 {
-	var r int32
-	systemstack(func() {
-		r = semasleep1(ns)
-	})
-	return r
-}
-
-//go:nosplit
-func mach_semrelease(sem uint32) {
+	mp := getg().m
+	pthread_mutex_lock(&mp.mutex)
 	for {
-		r := mach_semaphore_signal(sem)
-		if r == 0 {
-			break
-		}
-		if r == _KERN_ABORTED { // interrupted
-			continue
+		if mp.count > 0 {
+			mp.count--
+			pthread_mutex_unlock(&mp.mutex)
+			return 0
+		}
+		if ns >= 0 {
+			var t timespec
+			t.set_nsec(ns)
+			err := pthread_cond_timedwait_relative_np(&mp.cond, &mp.mutex, &t)
+			if err == _ETIMEDOUT {
+				pthread_mutex_unlock(&mp.mutex)
+				return -1
+			}
+		} else {
+			pthread_cond_wait(&mp.cond, &mp.mutex)
 		}
-
-		// mach_semrelease must be completely nosplit,
-		// because it is called from Go code.
-		// If we're going to die, start that process on the system stack
-		// to avoid a Go stack split.
-		systemstack(func() { macherror(r, "semaphore_signal") })
 	}
 }
 
-type machheader struct {
-	msgh_bits        uint32
-	msgh_size        uint32
-	msgh_remote_port uint32
-	msgh_local_port  uint32
-	msgh_reserved    uint32
-	msgh_id          int32
-}
-
-type machndr struct {
-	mig_vers     uint8
-	if_vers      uint8
-	reserved1    uint8
-	mig_encoding uint8
-	int_rep      uint8
-	char_rep     uint8
-	float_rep    uint8
-	reserved2    uint8
+//go:nosplit
+func semawakeup(mp *m) {
+	pthread_mutex_lock(&mp.mutex)
+	mp.count++
+	if mp.count > 0 {
+		pthread_cond_signal(&mp.cond)
+	}
+	pthread_mutex_unlock(&mp.mutex)
 }
diff --git a/libgo/go/runtime/os_dragonfly.go b/libgo/go/runtime/os_dragonfly.go
index 6452984..abcad72 100644
--- a/libgo/go/runtime/os_dragonfly.go
+++ b/libgo/go/runtime/os_dragonfly.go
@@ -4,11 +4,12 @@
 
 package runtime
 
-import "unsafe"
+import (
+	"runtime/internal/sys"
+	"unsafe"
+)
 
-type mOS struct {
-	unused byte
-}
+type mOS struct{}
 
 //go:noescape
 //extern umtx_sleep
diff --git a/libgo/go/runtime/os_freebsd.go b/libgo/go/runtime/os_freebsd.go
index 8c3535b..34939c5 100644
--- a/libgo/go/runtime/os_freebsd.go
+++ b/libgo/go/runtime/os_freebsd.go
@@ -8,9 +8,7 @@ import (
 	"unsafe"
 )
 
-type mOS struct {
-	unused byte
-}
+type mOS struct{}
 
 //go:noescape
 //extern _umtx_op
diff --git a/libgo/go/runtime/os_js.go b/libgo/go/runtime/os_js.go
new file mode 100644
index 0000000..ad6db18
--- /dev/null
+++ b/libgo/go/runtime/os_js.go
@@ -0,0 +1,145 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build js,wasm
+
+package runtime
+
+import (
+	"unsafe"
+)
+
+func exit(code int32)
+
+func write(fd uintptr, p unsafe.Pointer, n int32) int32 {
+	if fd > 2 {
+		throw("runtime.write to fd > 2 is unsupported")
+	}
+	wasmWrite(fd, p, n)
+	return n
+}
+
+// Stubs so tests can link correctly. These should never be called.
+func open(name *byte, mode, perm int32) int32        { panic("not implemented") }
+func closefd(fd int32) int32                         { panic("not implemented") }
+func read(fd int32, p unsafe.Pointer, n int32) int32 { panic("not implemented") }
+
+//go:noescape
+func wasmWrite(fd uintptr, p unsafe.Pointer, n int32)
+
+func usleep(usec uint32)
+
+func exitThread(wait *uint32)
+
+type mOS struct{}
+
+func osyield()
+
+const _SIGSEGV = 0xb
+
+func sigpanic() {
+	g := getg()
+	if !canpanic(g) {
+		throw("unexpected signal during runtime execution")
+	}
+
+	// js only invokes the exception handler for memory faults.
+	g.sig = _SIGSEGV
+	panicmem()
+}
+
+type sigset struct{}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
+func mpreinit(mp *m) {
+	mp.gsignal = malg(32 * 1024)
+	mp.gsignal.m = mp
+}
+
+//go:nosplit
+func msigsave(mp *m) {
+}
+
+//go:nosplit
+func msigrestore(sigmask sigset) {
+}
+
+//go:nosplit
+//go:nowritebarrierrec
+func clearSignalHandlers() {
+}
+
+//go:nosplit
+func sigblock() {
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the new thread, cannot allocate memory.
+func minit() {
+}
+
+// Called from dropm to undo the effect of an minit.
+func unminit() {
+}
+
+func osinit() {
+	ncpu = 1
+	getg().m.procid = 2
+	physPageSize = 64 * 1024
+}
+
+// wasm has no signals
+const _NSIG = 0
+
+func signame(sig uint32) string {
+	return ""
+}
+
+func crash() {
+	*(*int32)(nil) = 0
+}
+
+func getRandomData(r []byte)
+
+func goenvs() {
+	goenvs_unix()
+}
+
+func initsig(preinit bool) {
+}
+
+// May run with m.p==nil, so write barriers are not allowed.
+//go:nowritebarrier
+func newosproc(mp *m) {
+	panic("newosproc: not implemented")
+}
+
+func setProcessCPUProfiler(hz int32) {}
+func setThreadCPUProfiler(hz int32)  {}
+func sigdisable(uint32)              {}
+func sigenable(uint32)               {}
+func sigignore(uint32)               {}
+
+//go:linkname os_sigpipe os.sigpipe
+func os_sigpipe() {
+	throw("too many writes on closed pipe")
+}
+
+//go:nosplit
+func cputicks() int64 {
+	// Currently cputicks() is used in blocking profiler and to seed runtime·fastrand().
+	// runtime·nanotime() is a poor approximation of CPU ticks that is enough for the profiler.
+	// TODO: need more entropy to better seed fastrand.
+	return nanotime()
+}
+
+//go:linkname syscall_now syscall.now
+func syscall_now() (sec int64, nsec int32) {
+	sec, nsec, _ = time_now()
+	return
+}
+
+// gsignalStack is unused on js.
+type gsignalStack struct{}
diff --git a/libgo/go/runtime/os_linux.go b/libgo/go/runtime/os_linux.go
index 816327e..04314bd 100644
--- a/libgo/go/runtime/os_linux.go
+++ b/libgo/go/runtime/os_linux.go
@@ -27,8 +27,9 @@ func futex(addr unsafe.Pointer, op int32, val uint32, ts, addr2 unsafe.Pointer,
 // Futexsleep is allowed to wake up spuriously.
 
 const (
-	_FUTEX_WAIT = 0
-	_FUTEX_WAKE = 1
+	_FUTEX_PRIVATE_FLAG = 128
+	_FUTEX_WAIT_PRIVATE = 0 | _FUTEX_PRIVATE_FLAG
+	_FUTEX_WAKE_PRIVATE = 1 | _FUTEX_PRIVATE_FLAG
 )
 
 // Atomically,
@@ -45,7 +46,7 @@ func futexsleep(addr *uint32, val uint32, ns int64) {
 	// here, and so can we: as it says a few lines up,
 	// spurious wakeups are allowed.
 	if ns < 0 {
-		futex(unsafe.Pointer(addr), _FUTEX_WAIT, val, nil, nil, 0)
+		futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, nil, nil, 0)
 		return
 	}
 
@@ -62,13 +63,13 @@ func futexsleep(addr *uint32, val uint32, ns int64) {
 		ts.tv_nsec = 0
 		ts.set_sec(int64(timediv(ns, 1000000000, (*int32)(unsafe.Pointer(&ts.tv_nsec)))))
 	}
-	futex(unsafe.Pointer(addr), _FUTEX_WAIT, val, unsafe.Pointer(&ts), nil, 0)
+	futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, unsafe.Pointer(&ts), nil, 0)
 }
 
 // If any procs are sleeping on addr, wake up at most cnt.
 //go:nosplit
 func futexwakeup(addr *uint32, cnt uint32) {
-	ret := futex(unsafe.Pointer(addr), _FUTEX_WAKE, cnt, nil, nil, 0)
+	ret := futex(unsafe.Pointer(addr), _FUTEX_WAKE_PRIVATE, cnt, nil, nil, 0)
 	if ret >= 0 {
 		return
 	}
@@ -93,6 +94,11 @@ const (
 
 var procAuxv = []byte("/proc/self/auxv\x00")
 
+var addrspace_vec [1]byte
+
+//extern mincore
+func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32
+
 func sysargs(argc int32, argv **byte) {
 	n := argc + 1
 
@@ -158,12 +164,17 @@ func sysauxv(auxv []uintptr) int {
 			// worth of random data.
 			startupRandomData = (*[16]byte)(unsafe.Pointer(val))[:]
 
+			setRandomNumber(uint32(startupRandomData[4]) | uint32(startupRandomData[5])<<8 |
+				uint32(startupRandomData[6])<<16 | uint32(startupRandomData[7])<<24)
+
 		case _AT_PAGESZ:
 			physPageSize = val
 		}
 
+		archauxv(tag, val)
+
 		// Commented out for gccgo for now.
-		// archauxv(tag, val)
+		// vdsoauxv(tag, val)
 	}
 	return i / 2
 }
diff --git a/libgo/go/runtime/os_linux_arm.go b/libgo/go/runtime/os_linux_arm.go
new file mode 100644
index 0000000..42c2839
--- /dev/null
+++ b/libgo/go/runtime/os_linux_arm.go
@@ -0,0 +1,60 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+const (
+	_AT_PLATFORM = 15 //  introduced in at least 2.6.11
+
+	_HWCAP_VFP   = 1 << 6  // introduced in at least 2.6.11
+	_HWCAP_VFPv3 = 1 << 13 // introduced in 2.6.30
+	_HWCAP_IDIVA = 1 << 17
+)
+
+var randomNumber uint32
+var armArch uint8 = 6 // we default to ARMv6
+var hwcap uint32      // set by archauxv
+var hardDiv bool      // set if a hardware divider is available
+
+func checkgoarm() {
+	// On Android, /proc/self/auxv might be unreadable and hwcap won't
+	// reflect the CPU capabilities. Assume that every Android arm device
+	// has the necessary floating point hardware available.
+	if GOOS == "android" {
+		return
+	}
+	if goarm > 5 && hwcap&_HWCAP_VFP == 0 {
+		print("runtime: this CPU has no floating point hardware, so it cannot run\n")
+		print("this GOARM=", goarm, " binary. Recompile using GOARM=5.\n")
+		exit(1)
+	}
+	if goarm > 6 && hwcap&_HWCAP_VFPv3 == 0 {
+		print("runtime: this CPU has no VFPv3 floating point hardware, so it cannot run\n")
+		print("this GOARM=", goarm, " binary. Recompile using GOARM=5 or GOARM=6.\n")
+		exit(1)
+	}
+}
+
+func archauxv(tag, val uintptr) {
+	switch tag {
+	case _AT_RANDOM:
+		// sysargs filled in startupRandomData, but that
+		// pointer may not be word aligned, so we must treat
+		// it as a byte array.
+		randomNumber = uint32(startupRandomData[4]) | uint32(startupRandomData[5])<<8 |
+			uint32(startupRandomData[6])<<16 | uint32(startupRandomData[7])<<24
+
+	case _AT_PLATFORM: // v5l, v6l, v7l
+		t := *(*uint8)(unsafe.Pointer(val + 1))
+		if '5' <= t && t <= '7' {
+			armArch = t - '0'
+		}
+
+	case _AT_HWCAP: // CPU capability bit flags
+		hwcap = uint32(val)
+		hardDiv = (hwcap & _HWCAP_IDIVA) != 0
+	}
+}
diff --git a/libgo/go/runtime/os_linux_arm64.go b/libgo/go/runtime/os_linux_arm64.go
new file mode 100644
index 0000000..013e7ae
--- /dev/null
+++ b/libgo/go/runtime/os_linux_arm64.go
@@ -0,0 +1,29 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build arm64
+
+package runtime
+
+import "internal/cpu"
+
+var randomNumber uint32
+
+func archauxv(tag, val uintptr) {
+	switch tag {
+	case _AT_RANDOM:
+		// sysargs filled in startupRandomData, but that
+		// pointer may not be word aligned, so we must treat
+		// it as a byte array.
+		randomNumber = uint32(startupRandomData[4]) | uint32(startupRandomData[5])<<8 |
+			uint32(startupRandomData[6])<<16 | uint32(startupRandomData[7])<<24
+
+	case _AT_HWCAP:
+		// arm64 doesn't have a 'cpuid' instruction equivalent and relies on
+		// HWCAP/HWCAP2 bits for hardware capabilities.
+		cpu.HWCap = uint(val)
+	case _AT_HWCAP2:
+		cpu.HWCap2 = uint(val)
+	}
+}
diff --git a/libgo/go/runtime/os_linux_mips64x.go b/libgo/go/runtime/os_linux_mips64x.go
new file mode 100644
index 0000000..b7f737f
--- /dev/null
+++ b/libgo/go/runtime/os_linux_mips64x.go
@@ -0,0 +1,21 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build linux
+// +build mips64 mips64le
+
+package runtime
+
+var randomNumber uint32
+
+func archauxv(tag, val uintptr) {
+	switch tag {
+	case _AT_RANDOM:
+		// sysargs filled in startupRandomData, but that
+		// pointer may not be word aligned, so we must treat
+		// it as a byte array.
+		randomNumber = uint32(startupRandomData[4]) | uint32(startupRandomData[5])<<8 |
+			uint32(startupRandomData[6])<<16 | uint32(startupRandomData[7])<<24
+	}
+}
diff --git a/libgo/go/runtime/os_linux_mipsx.go b/libgo/go/runtime/os_linux_mipsx.go
new file mode 100644
index 0000000..a2696de
--- /dev/null
+++ b/libgo/go/runtime/os_linux_mipsx.go
@@ -0,0 +1,21 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build linux
+// +build mips mipsle
+
+package runtime
+
+var randomNumber uint32
+
+func archauxv(tag, val uintptr) {
+	switch tag {
+	case _AT_RANDOM:
+		// sysargs filled in startupRandomData, but that
+		// pointer may not be word aligned, so we must treat
+		// it as a byte array.
+		randomNumber = uint32(startupRandomData[4]) | uint32(startupRandomData[5])<<8 |
+			uint32(startupRandomData[6])<<16 | uint32(startupRandomData[7])<<24
+	}
+}
diff --git a/libgo/go/runtime/os_linux_noauxv.go b/libgo/go/runtime/os_linux_noauxv.go
new file mode 100644
index 0000000..895b4cd
--- /dev/null
+++ b/libgo/go/runtime/os_linux_noauxv.go
@@ -0,0 +1,11 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build linux
+// +build !arm,!arm64,!mips,!mipsle,!mips64,!mips64le,!s390x,!ppc64,!ppc64le
+
+package runtime
+
+func archauxv(tag, val uintptr) {
+}
diff --git a/libgo/go/runtime/os_linux_ppc64x.go b/libgo/go/runtime/os_linux_ppc64x.go
index d27902d..cc79cc4 100644
--- a/libgo/go/runtime/os_linux_ppc64x.go
+++ b/libgo/go/runtime/os_linux_ppc64x.go
@@ -2,27 +2,21 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build ignore_for_gccgo
+// +build linux
 // +build ppc64 ppc64le
 
 package runtime
 
-// For go:linkname
-import _ "unsafe"
-
-// ppc64x doesn't have a 'cpuid' instruction equivalent and relies on
-// HWCAP/HWCAP2 bits for hardware capabilities.
-
-//go:linkname cpu_hwcap internal/cpu.ppc64x_hwcap
-//go:linkname cpu_hwcap2 internal/cpu.ppc64x_hwcap2
-var cpu_hwcap uint
-var cpu_hwcap2 uint
+import "internal/cpu"
 
 func archauxv(tag, val uintptr) {
 	switch tag {
 	case _AT_HWCAP:
-		cpu_hwcap = uint(val)
+		// ppc64x doesn't have a 'cpuid' instruction
+		// equivalent and relies on HWCAP/HWCAP2 bits for
+		// hardware capabilities.
+		cpu.HWCap = uint(val)
 	case _AT_HWCAP2:
-		cpu_hwcap2 = uint(val)
+		cpu.HWCap2 = uint(val)
 	}
 }
diff --git a/libgo/go/runtime/os_linux_s390x.go b/libgo/go/runtime/os_linux_s390x.go
new file mode 100644
index 0000000..55d35c7
--- /dev/null
+++ b/libgo/go/runtime/os_linux_s390x.go
@@ -0,0 +1,19 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "internal/cpu"
+
+const (
+	// bit masks taken from bits/hwcap.h
+	_HWCAP_S390_VX = 2048 // vector facility
+)
+
+func archauxv(tag, val uintptr) {
+	switch tag {
+	case _AT_HWCAP: // CPU capability bit flags
+		cpu.S390X.HasVX = val&_HWCAP_S390_VX != 0
+	}
+}
diff --git a/libgo/go/runtime/os_netbsd.go b/libgo/go/runtime/os_netbsd.go
index 81ebe76..ea47e5c 100644
--- a/libgo/go/runtime/os_netbsd.go
+++ b/libgo/go/runtime/os_netbsd.go
@@ -6,6 +6,7 @@ package runtime
 
 import (
 	"runtime/internal/atomic"
+	"runtime/internal/sys"
 	"unsafe"
 )
 
diff --git a/libgo/go/runtime/os_openbsd.go b/libgo/go/runtime/os_openbsd.go
index b64d3af..4f05665 100644
--- a/libgo/go/runtime/os_openbsd.go
+++ b/libgo/go/runtime/os_openbsd.go
@@ -6,6 +6,7 @@ package runtime
 
 import (
 	"runtime/internal/atomic"
+	"runtime/internal/sys"
 	"unsafe"
 )
 
diff --git a/libgo/go/runtime/panic.go b/libgo/go/runtime/panic.go
index 6b490b7..752bf71 100644
--- a/libgo/go/runtime/panic.go
+++ b/libgo/go/runtime/panic.go
@@ -39,7 +39,24 @@ func panicCheckMalloc(err error) {
 
 var indexError = error(errorString("index out of range"))
 
+// The panicindex, panicslice, and panicdivide functions are called by
+// code generated by the compiler for out of bounds index expressions,
+// out of bounds slice expressions, and division by zero. The
+// panicdivide (again), panicoverflow, panicfloat, and panicmem
+// functions are called by the signal handler when a signal occurs
+// indicating the respective problem.
+//
+// Since panicindex and panicslice are never called directly, and
+// since the runtime package should never have an out of bounds slice
+// or array reference, if we see those functions called from the
+// runtime package we turn the panic into a throw. That will dump the
+// entire runtime stack for easier debugging.
+
 func panicindex() {
+	name, _, _ := funcfileline(getcallerpc(), -1)
+	if hasprefix(name, "runtime.") {
+		throw(string(indexError.(errorString)))
+	}
 	panicCheckMalloc(indexError)
 	panic(indexError)
 }
@@ -47,6 +64,10 @@ func panicindex() {
 var sliceError = error(errorString("slice bounds out of range"))
 
 func panicslice() {
+	name, _, _ := funcfileline(getcallerpc(), -1)
+	if hasprefix(name, "runtime.") {
+		throw(string(sliceError.(errorString)))
+	}
 	panicCheckMalloc(sliceError)
 	panic(sliceError)
 }
@@ -144,6 +165,12 @@ func newdefer() *_defer {
 //
 //go:nosplit
 func freedefer(d *_defer) {
+	if d._panic != nil {
+		freedeferpanic()
+	}
+	if d.pfn != 0 {
+		freedeferfn()
+	}
 	pp := getg().m.p.ptr()
 	if len(pp.deferpool) == cap(pp.deferpool) {
 		// Transfer half of local cache to the central cache.
@@ -176,15 +203,28 @@ func freedefer(d *_defer) {
 	d.link = nil
 	d.frame = nil
 	d.panicStack = nil
-	d._panic = nil
-	d.pfn = 0
 	d.arg = nil
 	d.retaddr = 0
 	d.makefunccanrecover = false
+	// d._panic and d.pfn must be nil already.
+	// If not, we would have called freedeferpanic or freedeferfn above,
+	// both of which throw.
 
 	pp.deferpool = append(pp.deferpool, d)
 }
 
+// Separate function so that it can split stack.
+// Windows otherwise runs out of stack space.
+func freedeferpanic() {
+	// _panic must be cleared before d is unlinked from gp.
+	throw("freedefer with d._panic != nil")
+}
+
+func freedeferfn() {
+	// fn must be cleared before d is unlinked from gp.
+	throw("freedefer with d.fn != nil")
+}
+
 // deferreturn is called to undefer the stack.
 // The compiler inserts a call to this function as a finally clause
 // wrapped around the body of any function that calls defer.
@@ -544,15 +584,9 @@ func gopanic(e interface{}) {
 	// the world, we call preprintpanics to invoke all necessary Error
 	// and String methods to prepare the panic strings before startpanic.
 	preprintpanics(gp._panic)
-	startpanic()
-
-	// startpanic set panicking, which will block main from exiting,
-	// so now OK to decrement runningPanicDefers.
-	atomic.Xadd(&runningPanicDefers, -1)
 
-	printpanics(gp._panic)
-	dopanic(0)       // should not return
-	*(*int)(nil) = 0 // not reached
+	fatalpanic(gp._panic) // should not return
+	*(*int)(nil) = 0      // not reached
 }
 
 // currentDefer returns the top of the defer stack if it can be recovered.
@@ -810,13 +844,16 @@ func sync_throw(s string) {
 
 //go:nosplit
 func throw(s string) {
-	print("fatal error: ", s, "\n")
+	// Everything throw does should be recursively nosplit so it
+	// can be called even when it's unsafe to grow the stack.
+	systemstack(func() {
+		print("fatal error: ", s, "\n")
+	})
 	gp := getg()
 	if gp.m.throwing == 0 {
 		gp.m.throwing = 1
 	}
-	startpanic()
-	dopanic(0)
+	fatalthrow()
 	*(*int)(nil) = 0 // not reached
 }
 
@@ -833,13 +870,76 @@ var panicking uint32
 // so that two concurrent panics don't overlap their output.
 var paniclk mutex
 
+// fatalthrow implements an unrecoverable runtime throw. It freezes the
+// system, prints stack traces starting from its caller, and terminates the
+// process.
+//
+//go:nosplit
+func fatalthrow() {
+	pc := getcallerpc()
+	sp := getcallersp()
+	gp := getg()
+
+	startpanic_m()
+
+	if dopanic_m(gp, pc, sp) {
+		crash()
+	}
+
+	exit(2)
+
+	*(*int)(nil) = 0 // not reached
+}
+
+// fatalpanic implements an unrecoverable panic. It is like fatalthrow, except
+// that if msgs != nil, fatalpanic also prints panic messages and decrements
+// runningPanicDefers once main is blocked from exiting.
+//
+//go:nosplit
+func fatalpanic(msgs *_panic) {
+	pc := getcallerpc()
+	sp := getcallersp()
+	gp := getg()
+	var docrash bool
+
+	if startpanic_m() && msgs != nil {
+		// There were panic messages and startpanic_m
+		// says it's okay to try to print them.
+
+		// startpanic_m set panicking, which will
+		// block main from exiting, so now OK to
+		// decrement runningPanicDefers.
+		atomic.Xadd(&runningPanicDefers, -1)
+
+		printpanics(msgs)
+	}
+
+	docrash = dopanic_m(gp, pc, sp)
+
+	if docrash {
+		// By crashing outside the above systemstack call, debuggers
+		// will not be confused when generating a backtrace.
+		// Function crash is marked nosplit to avoid stack growth.
+		crash()
+	}
+
+	systemstack(func() {
+		exit(2)
+	})
+
+	*(*int)(nil) = 0 // not reached
+}
+
 // startpanic_m prepares for an unrecoverable panic.
 //
+// It returns true if panic messages should be printed, or false if
+// the runtime is in bad shape and should just print stacks.
+//
 // It can have write barriers because the write barrier explicitly
 // ignores writes once dying > 0.
 //
 //go:yeswritebarrierrec
-func startpanic() {
+func startpanic_m() bool {
 	_g_ := getg()
 	if mheap_.cachealloc.size == 0 { // very early
 		print("runtime: panic before malloc heap initialized\n")
@@ -850,6 +950,12 @@ func startpanic() {
 	// happen (even if we're not in one of these situations).
 	_g_.m.mallocing++
 
+	// If we're dying because of a bad lock count, set it to a
+	// good lock count so we don't recursively panic below.
+	if _g_.m.locks < 0 {
+		_g_.m.locks = 1
+	}
+
 	switch _g_.m.dying {
 	case 0:
 		_g_.m.dying = 1
@@ -860,15 +966,13 @@ func startpanic() {
 			schedtrace(true)
 		}
 		freezetheworld()
-		return
+		return true
 	case 1:
-		// Something failed while panicking, probably the print of the
-		// argument to panic().  Just print a stack trace and exit.
+		// Something failed while panicking.
+		// Just print a stack trace and exit.
 		_g_.m.dying = 2
 		print("panic during panic\n")
-		dopanic(0)
-		exit(3)
-		fallthrough
+		return false
 	case 2:
 		// This is a genuine bug in the runtime, we couldn't even
 		// print the stack trace successfully.
@@ -879,14 +983,14 @@ func startpanic() {
 	default:
 		// Can't even print! Just exit.
 		exit(5)
+		return false // Need to return something.
 	}
 }
 
 var didothers bool
 var deadlock mutex
 
-func dopanic(unused int) {
-	gp := getg()
+func dopanic_m(gp *g, pc, sp uintptr) bool {
 	if gp.sig != 0 {
 		signame := signame(gp.sig)
 		if signame != "" {
@@ -927,11 +1031,7 @@ func dopanic(unused int) {
 		lock(&deadlock)
 	}
 
-	if docrash {
-		crash()
-	}
-
-	exit(2)
+	return docrash
 }
 
 // canpanic returns false if a signal should throw instead of
@@ -951,7 +1051,7 @@ func canpanic(gp *g) bool {
 	if gp == nil || gp != _m_.curg {
 		return false
 	}
-	if _m_.locks-_m_.softfloat != 0 || _m_.mallocing != 0 || _m_.throwing != 0 || _m_.preemptoff != "" || _m_.dying != 0 {
+	if _m_.locks != 0 || _m_.mallocing != 0 || _m_.throwing != 0 || _m_.preemptoff != "" || _m_.dying != 0 {
 		return false
 	}
 	status := readgstatus(gp)
@@ -960,3 +1060,14 @@ func canpanic(gp *g) bool {
 	}
 	return true
 }
+
+// isAbortPC returns true if pc is the program counter at which
+// runtime.abort raises a signal.
+//
+// It is nosplit because it's part of the isgoexception
+// implementation.
+//
+//go:nosplit
+func isAbortPC(pc uintptr) bool {
+	return false
+}
diff --git a/libgo/go/runtime/pprof/internal/profile/encode.go b/libgo/go/runtime/pprof/internal/profile/encode.go
index 6b879a8..af31933 100644
--- a/libgo/go/runtime/pprof/internal/profile/encode.go
+++ b/libgo/go/runtime/pprof/internal/profile/encode.go
@@ -197,6 +197,10 @@ var profileDecoder = []decoder{
 	},
 	// repeated int64 period = 12
 	func(b *buffer, m message) error { return decodeInt64(b, &m.(*Profile).Period) },
+	// repeated int64 comment = 13
+	func(b *buffer, m message) error { return decodeInt64s(b, &m.(*Profile).commentX) },
+	// int64 defaultSampleType = 14
+	func(b *buffer, m message) error { return decodeInt64(b, &m.(*Profile).defaultSampleTypeX) },
 }
 
 // postDecode takes the unexported fields populated by decode (with
@@ -278,6 +282,14 @@ func (p *Profile) postDecode() error {
 		pt.Type, err = getString(p.stringTable, &pt.typeX, err)
 		pt.Unit, err = getString(p.stringTable, &pt.unitX, err)
 	}
+	for _, i := range p.commentX {
+		var c string
+		c, err = getString(p.stringTable, &i, err)
+		p.Comments = append(p.Comments, c)
+	}
+
+	p.commentX = nil
+	p.DefaultSampleType, err = getString(p.stringTable, &p.defaultSampleTypeX, err)
 	p.stringTable = nil
 	return nil
 }
diff --git a/libgo/go/runtime/pprof/internal/profile/profile.go b/libgo/go/runtime/pprof/internal/profile/profile.go
index 9b6a6f9..64c3e3f 100644
--- a/libgo/go/runtime/pprof/internal/profile/profile.go
+++ b/libgo/go/runtime/pprof/internal/profile/profile.go
@@ -22,11 +22,13 @@ import (
 
 // Profile is an in-memory representation of profile.proto.
 type Profile struct {
-	SampleType []*ValueType
-	Sample     []*Sample
-	Mapping    []*Mapping
-	Location   []*Location
-	Function   []*Function
+	SampleType        []*ValueType
+	DefaultSampleType string
+	Sample            []*Sample
+	Mapping           []*Mapping
+	Location          []*Location
+	Function          []*Function
+	Comments          []string
 
 	DropFrames string
 	KeepFrames string
@@ -36,9 +38,11 @@ type Profile struct {
 	PeriodType    *ValueType
 	Period        int64
 
-	dropFramesX int64
-	keepFramesX int64
-	stringTable []string
+	commentX           []int64
+	dropFramesX        int64
+	keepFramesX        int64
+	stringTable        []string
+	defaultSampleTypeX int64
 }
 
 // ValueType corresponds to Profile.ValueType
diff --git a/libgo/go/runtime/pprof/pprof.go b/libgo/go/runtime/pprof/pprof.go
index be4e869..5128c22 100644
--- a/libgo/go/runtime/pprof/pprof.go
+++ b/libgo/go/runtime/pprof/pprof.go
@@ -68,7 +68,7 @@
 // all pprof commands.
 //
 // For more information about pprof, see
-// https://github.com/google/pprof/blob/master/doc/pprof.md.
+// https://github.com/google/pprof/blob/master/doc/README.md.
 package pprof
 
 import (
@@ -99,7 +99,8 @@ import (
 // Each Profile has a unique name. A few profiles are predefined:
 //
 //	goroutine    - stack traces of all current goroutines
-//	heap         - a sampling of all heap allocations
+//	heap         - a sampling of memory allocations of live objects
+//	allocs       - a sampling of all past memory allocations
 //	threadcreate - stack traces that led to the creation of new OS threads
 //	block        - stack traces that led to blocking on synchronization primitives
 //	mutex        - stack traces of holders of contended mutexes
@@ -114,6 +115,16 @@ import (
 // all known allocations. This exception helps mainly in programs running
 // without garbage collection enabled, usually for debugging purposes.
 //
+// The heap profile tracks both the allocation sites for all live objects in
+// the application memory and for all objects allocated since the program start.
+// Pprof's -inuse_space, -inuse_objects, -alloc_space, and -alloc_objects
+// flags select which to display, defaulting to -inuse_space (live objects,
+// scaled by size).
+//
+// The allocs profile is the same as the heap profile but changes the default
+// pprof display to -alloc_space, the total number of bytes allocated since
+// the program began (including garbage-collected bytes).
+//
 // The CPU profile is not available as a Profile. It has a special API,
 // the StartCPUProfile and StopCPUProfile functions, because it streams
 // output to a writer during profiling.
@@ -150,6 +161,12 @@ var heapProfile = &Profile{
 	write: writeHeap,
 }
 
+var allocsProfile = &Profile{
+	name:  "allocs",
+	count: countHeap, // identical to heap profile
+	write: writeAlloc,
+}
+
 var blockProfile = &Profile{
 	name:  "block",
 	count: countBlock,
@@ -170,6 +187,7 @@ func lockProfiles() {
 			"goroutine":    goroutineProfile,
 			"threadcreate": threadcreateProfile,
 			"heap":         heapProfile,
+			"allocs":       allocsProfile,
 			"block":        blockProfile,
 			"mutex":        mutexProfile,
 		}
@@ -525,6 +543,16 @@ func countHeap() int {
 
 // writeHeap writes the current runtime heap profile to w.
 func writeHeap(w io.Writer, debug int) error {
+	return writeHeapInternal(w, debug, "")
+}
+
+// writeAlloc writes the current runtime heap profile to w
+// with the total allocation space as the default sample type.
+func writeAlloc(w io.Writer, debug int) error {
+	return writeHeapInternal(w, debug, "alloc_space")
+}
+
+func writeHeapInternal(w io.Writer, debug int, defaultSampleType string) error {
 	var memStats *runtime.MemStats
 	if debug != 0 {
 		// Read mem stats first, so that our other allocations
@@ -555,7 +583,7 @@ func writeHeap(w io.Writer, debug int) error {
 	}
 
 	if debug == 0 {
-		return writeHeapProto(w, p, int64(runtime.MemProfileRate))
+		return writeHeapProto(w, p, int64(runtime.MemProfileRate), defaultSampleType)
 	}
 
 	sort.Slice(p, func(i, j int) bool { return p[i].InUseBytes() > p[j].InUseBytes() })
diff --git a/libgo/go/runtime/pprof/pprof_test.go b/libgo/go/runtime/pprof/pprof_test.go
index 02d99f5..74a7777 100644
--- a/libgo/go/runtime/pprof/pprof_test.go
+++ b/libgo/go/runtime/pprof/pprof_test.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build !nacl
+// +build !nacl,!js
 
 package pprof
 
@@ -732,7 +732,7 @@ func TestMutexProfile(t *testing.T) {
 			return
 		}
 		// checking that the line is like "35258904 1 @ 0x48288d 0x47cd28 0x458931"
-		r2 := `^\d+ 1 @(?: 0x[[:xdigit:]]+)+`
+		r2 := `^\d+ \d+ @(?: 0x[[:xdigit:]]+)+`
 		//r2 := "^[0-9]+ 1 @ 0x[0-9a-f x]+$"
 		if ok, err := regexp.MatchString(r2, lines[3]); err != nil || !ok {
 			t.Errorf("%q didn't match %q", lines[3], r2)
@@ -862,16 +862,22 @@ func containsCounts(prof *profile.Profile, counts []int64) bool {
 	return true
 }
 
+var emptyCallStackTestRun int64
+
 // Issue 18836.
 func TestEmptyCallStack(t *testing.T) {
+	name := fmt.Sprintf("test18836_%d", emptyCallStackTestRun)
+	emptyCallStackTestRun++
+
 	t.Parallel()
 	var buf bytes.Buffer
-	p := NewProfile("test18836")
+	p := NewProfile(name)
+
 	p.Add("foo", 47674)
 	p.WriteTo(&buf, 1)
 	p.Remove("foo")
 	got := buf.String()
-	prefix := "test18836 profile: total 1\n"
+	prefix := name + " profile: total 1\n"
 	if !strings.HasPrefix(got, prefix) {
 		t.Fatalf("got:\n\t%q\nwant prefix:\n\t%q\n", got, prefix)
 	}
diff --git a/libgo/go/runtime/pprof/proto.go b/libgo/go/runtime/pprof/proto.go
index 793be44..d8456be 100644
--- a/libgo/go/runtime/pprof/proto.go
+++ b/libgo/go/runtime/pprof/proto.go
@@ -11,7 +11,6 @@ import (
 	"io"
 	"io/ioutil"
 	"runtime"
-	"sort"
 	"strconv"
 	"time"
 	"unsafe"
@@ -53,24 +52,43 @@ type profileBuilder struct {
 }
 
 type memMap struct {
-	start uintptr
-	end   uintptr
+	// initialized as reading mapping
+	start         uintptr
+	end           uintptr
+	offset        uint64
+	file, buildID string
+
+	funcs symbolizeFlag
+	fake  bool // map entry was faked; /proc/self/maps wasn't available
 }
 
+// symbolizeFlag keeps track of symbolization result.
+//   0                  : no symbol lookup was performed
+//   1<<0 (lookupTried) : symbol lookup was performed
+//   1<<1 (lookupFailed): symbol lookup was performed but failed
+type symbolizeFlag uint8
+
+const (
+	lookupTried  symbolizeFlag = 1 << iota
+	lookupFailed symbolizeFlag = 1 << iota
+)
+
 const (
 	// message Profile
-	tagProfile_SampleType    = 1  // repeated ValueType
-	tagProfile_Sample        = 2  // repeated Sample
-	tagProfile_Mapping       = 3  // repeated Mapping
-	tagProfile_Location      = 4  // repeated Location
-	tagProfile_Function      = 5  // repeated Function
-	tagProfile_StringTable   = 6  // repeated string
-	tagProfile_DropFrames    = 7  // int64 (string table index)
-	tagProfile_KeepFrames    = 8  // int64 (string table index)
-	tagProfile_TimeNanos     = 9  // int64
-	tagProfile_DurationNanos = 10 // int64
-	tagProfile_PeriodType    = 11 // ValueType (really optional string???)
-	tagProfile_Period        = 12 // int64
+	tagProfile_SampleType        = 1  // repeated ValueType
+	tagProfile_Sample            = 2  // repeated Sample
+	tagProfile_Mapping           = 3  // repeated Mapping
+	tagProfile_Location          = 4  // repeated Location
+	tagProfile_Function          = 5  // repeated Function
+	tagProfile_StringTable       = 6  // repeated string
+	tagProfile_DropFrames        = 7  // int64 (string table index)
+	tagProfile_KeepFrames        = 8  // int64 (string table index)
+	tagProfile_TimeNanos         = 9  // int64
+	tagProfile_DurationNanos     = 10 // int64
+	tagProfile_PeriodType        = 11 // ValueType (really optional string???)
+	tagProfile_Period            = 12 // int64
+	tagProfile_Comment           = 13 // repeated int64
+	tagProfile_DefaultSampleType = 14 // int64
 
 	// message ValueType
 	tagValueType_Type = 1 // int64 (string table index)
@@ -174,7 +192,7 @@ func (b *profileBuilder) pbLine(tag int, funcID uint64, line int64) {
 }
 
 // pbMapping encodes a Mapping message to b.pb.
-func (b *profileBuilder) pbMapping(tag int, id, base, limit, offset uint64, file, buildID string) {
+func (b *profileBuilder) pbMapping(tag int, id, base, limit, offset uint64, file, buildID string, hasFuncs bool) {
 	start := b.pb.startMessage()
 	b.pb.uint64Opt(tagMapping_ID, id)
 	b.pb.uint64Opt(tagMapping_Start, base)
@@ -182,8 +200,15 @@ func (b *profileBuilder) pbMapping(tag int, id, base, limit, offset uint64, file
 	b.pb.uint64Opt(tagMapping_Offset, offset)
 	b.pb.int64Opt(tagMapping_Filename, b.stringIndex(file))
 	b.pb.int64Opt(tagMapping_BuildID, b.stringIndex(buildID))
-	// TODO: Set any of HasInlineFrames, HasFunctions, HasFilenames, HasLineNumbers?
-	// It seems like they should all be true, but they've never been set.
+	// TODO: we set HasFunctions if all symbols from samples were symbolized (hasFuncs).
+	// Decide what to do about HasInlineFrames and HasLineNumbers.
+	// Also, another approach to handle the mapping entry with
+	// incomplete symbolization results is to dupliace the mapping
+	// entry (but with different Has* fields values) and use
+	// different entries for symbolized locations and unsymbolized locations.
+	if hasFuncs {
+		b.pb.bool(tagMapping_HasFunctions, true)
+	}
 	b.pb.endMessage(tag, start)
 }
 
@@ -208,6 +233,11 @@ func (b *profileBuilder) locForPC(addr uintptr) uint64 {
 		return 0
 	}
 
+	symbolizeResult := lookupTried
+	if frame.PC == 0 || frame.Function == "" || frame.File == "" || frame.Line == 0 {
+		symbolizeResult |= lookupFailed
+	}
+
 	if frame.PC == 0 {
 		// If we failed to resolve the frame, at least make up
 		// a reasonable call PC. This mostly happens in tests.
@@ -242,12 +272,14 @@ func (b *profileBuilder) locForPC(addr uintptr) uint64 {
 		}
 		frame, more = frames.Next()
 	}
-	if len(b.mem) > 0 {
-		i := sort.Search(len(b.mem), func(i int) bool {
-			return b.mem[i].end > addr
-		})
-		if i < len(b.mem) && b.mem[i].start <= addr && addr < b.mem[i].end {
+	for i := range b.mem {
+		if b.mem[i].start <= addr && addr < b.mem[i].end || b.mem[i].fake {
 			b.pb.uint64Opt(tagLocation_MappingID, uint64(i+1))
+
+			m := b.mem[i]
+			m.funcs |= symbolizeResult
+			b.mem[i] = m
+			break
 		}
 	}
 	b.pb.endMessage(tagProfile_Location, start)
@@ -348,7 +380,7 @@ func (b *profileBuilder) addCPUData(data []uint64, tags []unsafe.Pointer) error
 }
 
 // build completes and returns the constructed profile.
-func (b *profileBuilder) build() error {
+func (b *profileBuilder) build() {
 	b.end = time.Now()
 
 	b.pb.int64Opt(tagProfile_TimeNanos, b.start.UnixNano())
@@ -395,13 +427,17 @@ func (b *profileBuilder) build() error {
 		b.pbSample(values, locs, labels)
 	}
 
+	for i, m := range b.mem {
+		hasFunctions := m.funcs == lookupTried // lookupTried but not lookupFailed
+		b.pbMapping(tagProfile_Mapping, uint64(i+1), uint64(m.start), uint64(m.end), m.offset, m.file, m.buildID, hasFunctions)
+	}
+
 	// TODO: Anything for tagProfile_DropFrames?
 	// TODO: Anything for tagProfile_KeepFrames?
 
 	b.pb.strings(tagProfile_StringTable, b.strings)
 	b.zw.Write(b.pb.data)
 	b.zw.Close()
-	return nil
 }
 
 // readMapping reads /proc/self/maps and writes mappings to b.pb.
@@ -410,6 +446,12 @@ func (b *profileBuilder) build() error {
 func (b *profileBuilder) readMapping() {
 	data, _ := ioutil.ReadFile("/proc/self/maps")
 	parseProcSelfMaps(data, b.addMapping)
+	if len(b.mem) == 0 { // pprof expects a map entry, so fake one.
+		b.addMappingEntry(0, 0, 0, "", "", true)
+		// TODO(hyangah): make addMapping return *memMap or
+		// take a memMap struct, and get rid of addMappingEntry
+		// that takes a bunch of positional arguments.
+	}
 }
 
 func parseProcSelfMaps(data []byte, addMapping func(lo, hi, offset uint64, file, buildID string)) {
@@ -510,6 +552,16 @@ func parseProcSelfMaps(data []byte, addMapping func(lo, hi, offset uint64, file,
 }
 
 func (b *profileBuilder) addMapping(lo, hi, offset uint64, file, buildID string) {
-	b.mem = append(b.mem, memMap{uintptr(lo), uintptr(hi)})
-	b.pbMapping(tagProfile_Mapping, uint64(len(b.mem)), lo, hi, offset, file, buildID)
+	b.addMappingEntry(lo, hi, offset, file, buildID, false)
+}
+
+func (b *profileBuilder) addMappingEntry(lo, hi, offset uint64, file, buildID string, fake bool) {
+	b.mem = append(b.mem, memMap{
+		start:   uintptr(lo),
+		end:     uintptr(hi),
+		offset:  offset,
+		file:    file,
+		buildID: buildID,
+		fake:    fake,
+	})
 }
diff --git a/libgo/go/runtime/pprof/proto_test.go b/libgo/go/runtime/pprof/proto_test.go
index a268c3a..604628c 100644
--- a/libgo/go/runtime/pprof/proto_test.go
+++ b/libgo/go/runtime/pprof/proto_test.go
@@ -8,7 +8,10 @@ import (
 	"bytes"
 	"encoding/json"
 	"fmt"
+	"internal/testenv"
 	"io/ioutil"
+	"os"
+	"os/exec"
 	"reflect"
 	"runtime"
 	"runtime/pprof/internal/profile"
@@ -63,7 +66,7 @@ func TestConvertCPUProfileEmpty(t *testing.T) {
 		{Type: "cpu", Unit: "nanoseconds"},
 	}
 
-	checkProfile(t, p, 2000*1000, periodType, sampleType, nil)
+	checkProfile(t, p, 2000*1000, periodType, sampleType, nil, "")
 }
 
 // For gccgo make these functions different so that gccgo doesn't
@@ -96,9 +99,16 @@ func testPCs(t *testing.T) (addr1, addr2 uint64, map1, map2 *profile.Mapping) {
 		addr2 = mprof.Mapping[1].Start
 		map2 = mprof.Mapping[1]
 		map2.BuildID, _ = elfBuildID(map2.File)
+	case "js":
+		addr1 = uint64(funcPC(f1))
+		addr2 = uint64(funcPC(f2))
 	default:
 		addr1 = uint64(funcPC(f1))
 		addr2 = uint64(funcPC(f2))
+		// Fake mapping - HasFunctions will be true because two PCs from Go
+		// will be fully symbolized.
+		fake := &profile.Mapping{ID: 1, HasFunctions: true}
+		map1, map2 = fake, fake
 	}
 	return
 }
@@ -132,18 +142,23 @@ func TestConvertCPUProfile(t *testing.T) {
 			{ID: 4, Mapping: map2, Address: addr2 + 1},
 		}},
 	}
-	checkProfile(t, p, period, periodType, sampleType, samples)
+	checkProfile(t, p, period, periodType, sampleType, samples, "")
 }
 
-func checkProfile(t *testing.T, p *profile.Profile, period int64, periodType *profile.ValueType, sampleType []*profile.ValueType, samples []*profile.Sample) {
+func checkProfile(t *testing.T, p *profile.Profile, period int64, periodType *profile.ValueType, sampleType []*profile.ValueType, samples []*profile.Sample, defaultSampleType string) {
+	t.Helper()
+
 	if p.Period != period {
-		t.Fatalf("p.Period = %d, want %d", p.Period, period)
+		t.Errorf("p.Period = %d, want %d", p.Period, period)
 	}
 	if !reflect.DeepEqual(p.PeriodType, periodType) {
-		t.Fatalf("p.PeriodType = %v\nwant = %v", fmtJSON(p.PeriodType), fmtJSON(periodType))
+		t.Errorf("p.PeriodType = %v\nwant = %v", fmtJSON(p.PeriodType), fmtJSON(periodType))
 	}
 	if !reflect.DeepEqual(p.SampleType, sampleType) {
-		t.Fatalf("p.SampleType = %v\nwant = %v", fmtJSON(p.SampleType), fmtJSON(sampleType))
+		t.Errorf("p.SampleType = %v\nwant = %v", fmtJSON(p.SampleType), fmtJSON(sampleType))
+	}
+	if defaultSampleType != p.DefaultSampleType {
+		t.Errorf("p.DefaultSampleType = %v\nwant = %v", p.DefaultSampleType, defaultSampleType)
 	}
 	// Clear line info since it is not in the expected samples.
 	// If we used f1 and f2 above, then the samples will have line info.
@@ -222,3 +237,114 @@ func TestProcSelfMaps(t *testing.T) {
 		}
 	}
 }
+
+// TestMapping checkes the mapping section of CPU profiles
+// has the HasFunctions field set correctly. If all PCs included
+// in the samples are successfully symbolized, the corresponding
+// mapping entry (in this test case, only one entry) should have
+// its HasFunctions field set true.
+// The test generates a CPU profile that includes PCs from C side
+// that the runtime can't symbolize. See ./testdata/mappingtest.
+func TestMapping(t *testing.T) {
+	testenv.MustHaveGoRun(t)
+	testenv.MustHaveCGO(t)
+
+	prog := "./testdata/mappingtest/main.go"
+
+	// GoOnly includes only Go symbols that runtime will symbolize.
+	// Go+C includes C symbols that runtime will not symbolize.
+	for _, traceback := range []string{"GoOnly", "Go+C"} {
+		t.Run("traceback"+traceback, func(t *testing.T) {
+			cmd := exec.Command(testenv.GoToolPath(t), "run", prog)
+			if traceback != "GoOnly" {
+				cmd.Env = append(os.Environ(), "SETCGOTRACEBACK=1")
+			}
+			cmd.Stderr = new(bytes.Buffer)
+
+			out, err := cmd.Output()
+			if err != nil {
+				t.Fatalf("failed to run the test program %q: %v\n%v", prog, err, cmd.Stderr)
+			}
+
+			prof, err := profile.Parse(bytes.NewReader(out))
+			if err != nil {
+				t.Fatalf("failed to parse the generated profile data: %v", err)
+			}
+			t.Logf("Profile: %s", prof)
+
+			hit := make(map[*profile.Mapping]bool)
+			miss := make(map[*profile.Mapping]bool)
+			for _, loc := range prof.Location {
+				if symbolized(loc) {
+					hit[loc.Mapping] = true
+				} else {
+					miss[loc.Mapping] = true
+				}
+			}
+			if len(miss) == 0 {
+				t.Log("no location with missing symbol info was sampled")
+			}
+
+			for _, m := range prof.Mapping {
+				if miss[m] && m.HasFunctions {
+					t.Errorf("mapping %+v has HasFunctions=true, but contains locations with failed symbolization", m)
+					continue
+				}
+				if !miss[m] && hit[m] && !m.HasFunctions {
+					t.Errorf("mapping %+v has HasFunctions=false, but all referenced locations from this lapping were symbolized successfully", m)
+					continue
+				}
+			}
+		})
+	}
+}
+
+func symbolized(loc *profile.Location) bool {
+	if len(loc.Line) == 0 {
+		return false
+	}
+	l := loc.Line[0]
+	f := l.Function
+	if l.Line == 0 || f == nil || f.Name == "" || f.Filename == "" {
+		return false
+	}
+	return true
+}
+
+// TestFakeMapping tests if at least one mapping exists
+// (including a fake mapping), and their HasFunctions bits
+// are set correctly.
+func TestFakeMapping(t *testing.T) {
+	var buf bytes.Buffer
+	if err := Lookup("heap").WriteTo(&buf, 0); err != nil {
+		t.Fatalf("failed to write heap profile: %v", err)
+	}
+	prof, err := profile.Parse(&buf)
+	if err != nil {
+		t.Fatalf("failed to parse the generated profile data: %v", err)
+	}
+	t.Logf("Profile: %s", prof)
+	if len(prof.Mapping) == 0 {
+		t.Fatal("want profile with at least one mapping entry, got 0 mapping")
+	}
+
+	hit := make(map[*profile.Mapping]bool)
+	miss := make(map[*profile.Mapping]bool)
+	for _, loc := range prof.Location {
+		if symbolized(loc) {
+			hit[loc.Mapping] = true
+		} else {
+			miss[loc.Mapping] = true
+		}
+	}
+	for _, m := range prof.Mapping {
+		if miss[m] && m.HasFunctions {
+			t.Errorf("mapping %+v has HasFunctions=true, but contains locations with failed symbolization", m)
+			continue
+		}
+		if !miss[m] && hit[m] && !m.HasFunctions {
+			t.Errorf("mapping %+v has HasFunctions=false, but all referenced locations from this lapping were symbolized successfully", m)
+			continue
+		}
+	}
+}
diff --git a/libgo/go/runtime/pprof/protomem.go b/libgo/go/runtime/pprof/protomem.go
index 2756cfd..82565d5 100644
--- a/libgo/go/runtime/pprof/protomem.go
+++ b/libgo/go/runtime/pprof/protomem.go
@@ -12,7 +12,7 @@ import (
 )
 
 // writeHeapProto writes the current heap profile in protobuf format to w.
-func writeHeapProto(w io.Writer, p []runtime.MemProfileRecord, rate int64) error {
+func writeHeapProto(w io.Writer, p []runtime.MemProfileRecord, rate int64, defaultSampleType string) error {
 	b := newProfileBuilder(w)
 	b.pbValueType(tagProfile_PeriodType, "space", "bytes")
 	b.pb.int64Opt(tagProfile_Period, rate)
@@ -20,6 +20,9 @@ func writeHeapProto(w io.Writer, p []runtime.MemProfileRecord, rate int64) error
 	b.pbValueType(tagProfile_SampleType, "alloc_space", "bytes")
 	b.pbValueType(tagProfile_SampleType, "inuse_objects", "count")
 	b.pbValueType(tagProfile_SampleType, "inuse_space", "bytes")
+	if defaultSampleType != "" {
+		b.pb.int64Opt(tagProfile_DefaultSampleType, b.stringIndex(defaultSampleType))
+	}
 
 	values := []int64{0, 0, 0, 0}
 	var locs []uint64
diff --git a/libgo/go/runtime/pprof/protomem_test.go b/libgo/go/runtime/pprof/protomem_test.go
index 1e30ed9..315d5f0 100644
--- a/libgo/go/runtime/pprof/protomem_test.go
+++ b/libgo/go/runtime/pprof/protomem_test.go
@@ -14,7 +14,6 @@ import (
 func TestConvertMemProfile(t *testing.T) {
 	addr1, addr2, map1, map2 := testPCs(t)
 
-	var buf bytes.Buffer
 	// MemProfileRecord stacks are return PCs, so add one to the
 	// addresses recorded in the "profile". The proto profile
 	// locations are call PCs, so conversion will subtract one
@@ -27,15 +26,6 @@ func TestConvertMemProfile(t *testing.T) {
 		{AllocBytes: 512 * 1024, FreeBytes: 512 * 1024, AllocObjects: 1, FreeObjects: 1, Stack0: [32]uintptr{a1 + 1, a1 + 2, a2 + 3}},
 	}
 
-	if err := writeHeapProto(&buf, rec, rate); err != nil {
-		t.Fatalf("writing profile: %v", err)
-	}
-
-	p, err := profile.Parse(&buf)
-	if err != nil {
-		t.Fatalf("profile.Parse: %v", err)
-	}
-
 	periodType := &profile.ValueType{Type: "space", Unit: "bytes"}
 	sampleType := []*profile.ValueType{
 		{Type: "alloc_objects", Unit: "count"},
@@ -70,5 +60,25 @@ func TestConvertMemProfile(t *testing.T) {
 			NumLabel: map[string][]int64{"bytes": {829411}},
 		},
 	}
-	checkProfile(t, p, rate, periodType, sampleType, samples)
+	for _, tc := range []struct {
+		name              string
+		defaultSampleType string
+	}{
+		{"heap", ""},
+		{"allocs", "alloc_space"},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			var buf bytes.Buffer
+			if err := writeHeapProto(&buf, rec, rate, tc.defaultSampleType); err != nil {
+				t.Fatalf("writing profile: %v", err)
+			}
+
+			p, err := profile.Parse(&buf)
+			if err != nil {
+				t.Fatalf("profile.Parse: %v", err)
+			}
+
+			checkProfile(t, p, rate, periodType, sampleType, samples, tc.defaultSampleType)
+		})
+	}
 }
diff --git a/libgo/go/runtime/pprof/testdata/mappingtest/main.go b/libgo/go/runtime/pprof/testdata/mappingtest/main.go
new file mode 100644
index 0000000..7850faa
--- /dev/null
+++ b/libgo/go/runtime/pprof/testdata/mappingtest/main.go
@@ -0,0 +1,105 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This program outputs a CPU profile that includes
+// both Go and Cgo stacks. This is used by the mapping info
+// tests in runtime/pprof.
+//
+// If SETCGOTRACEBACK=1 is set, the CPU profile will includes
+// PCs from C side but they will not be symbolized.
+package main
+
+/*
+#include <stdint.h>
+#include <stdlib.h>
+
+int cpuHogCSalt1 = 0;
+int cpuHogCSalt2 = 0;
+
+void CPUHogCFunction() {
+	int foo = cpuHogCSalt1;
+	int i;
+	for (i = 0; i < 100000; i++) {
+		if (foo > 0) {
+			foo *= foo;
+		} else {
+			foo *= foo + 1;
+		}
+		cpuHogCSalt2 = foo;
+	}
+}
+
+struct CgoTracebackArg {
+	uintptr_t context;
+        uintptr_t sigContext;
+	uintptr_t *buf;
+        uintptr_t max;
+};
+
+void CollectCgoTraceback(void* parg) {
+        struct CgoTracebackArg* arg = (struct CgoTracebackArg*)(parg);
+	arg->buf[0] = (uintptr_t)(CPUHogCFunction);
+	arg->buf[1] = 0;
+};
+*/
+import "C"
+
+import (
+	"log"
+	"os"
+	"runtime"
+	"runtime/pprof"
+	"time"
+	"unsafe"
+)
+
+func init() {
+	if v := os.Getenv("SETCGOTRACEBACK"); v == "1" {
+		// Collect some PCs from C-side, but don't symbolize.
+		runtime.SetCgoTraceback(0, unsafe.Pointer(C.CollectCgoTraceback), nil, nil)
+	}
+}
+
+func main() {
+	go cpuHogGoFunction()
+	go cpuHogCFunction()
+	runtime.Gosched()
+
+	if err := pprof.StartCPUProfile(os.Stdout); err != nil {
+		log.Fatal("can't start CPU profile: ", err)
+	}
+	time.Sleep(1 * time.Second)
+	pprof.StopCPUProfile()
+
+	if err := os.Stdout.Close(); err != nil {
+		log.Fatal("can't write CPU profile: ", err)
+	}
+}
+
+var salt1 int
+var salt2 int
+
+func cpuHogGoFunction() {
+	// Generates CPU profile samples including a Go call path.
+	for {
+		foo := salt1
+		for i := 0; i < 1e5; i++ {
+			if foo > 0 {
+				foo *= foo
+			} else {
+				foo *= foo + 1
+			}
+			salt2 = foo
+		}
+		runtime.Gosched()
+	}
+}
+
+func cpuHogCFunction() {
+	// Generates CPU profile samples including a Cgo call path.
+	for {
+		C.CPUHogCFunction()
+		runtime.Gosched()
+	}
+}
diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go
index 4fc45dd..77d379b 100644
--- a/libgo/go/runtime/proc.go
+++ b/libgo/go/runtime/proc.go
@@ -5,6 +5,7 @@
 package runtime
 
 import (
+	"internal/cpu"
 	"runtime/internal/atomic"
 	"runtime/internal/sys"
 	"unsafe"
@@ -169,9 +170,11 @@ func main() {
 	// Allow newproc to start new Ms.
 	mainStarted = true
 
-	systemstack(func() {
-		newm(sysmon, nil)
-	})
+	if GOARCH != "wasm" { // no threads on wasm yet, so no sysmon
+		systemstack(func() {
+			newm(sysmon, nil)
+		})
+	}
 
 	// Lock the main goroutine onto this, the main OS thread,
 	// during initialization. Most programs won't care, but a few
@@ -242,7 +245,7 @@ func main() {
 		}
 	}
 	if atomic.Load(&panicking) != 0 {
-		gopark(nil, nil, "panicwait", traceEvGoStop, 1)
+		gopark(nil, nil, waitReasonPanicWait, traceEvGoStop, 1)
 	}
 
 	exit(0)
@@ -276,7 +279,7 @@ func forcegchelper() {
 			throw("forcegc: phase error")
 		}
 		atomic.Store(&forcegc.idle, 1)
-		goparkunlock(&forcegc.lock, "force gc (idle)", traceEvGoBlock, 1)
+		goparkunlock(&forcegc.lock, waitReasonForceGGIdle, traceEvGoBlock, 1)
 		// this goroutine is explicitly resumed by sysmon
 		if debug.gctrace > 0 {
 			println("GC forced")
@@ -291,6 +294,7 @@ func forcegchelper() {
 // Gosched yields the processor, allowing other goroutines to run. It does not
 // suspend the current goroutine, so execution resumes automatically.
 func Gosched() {
+	checkTimeouts()
 	mcall(gosched_m)
 }
 
@@ -305,7 +309,14 @@ func goschedguarded() {
 // If unlockf returns false, the goroutine is resumed.
 // unlockf must not access this G's stack, as it may be moved between
 // the call to gopark and the call to unlockf.
-func gopark(unlockf func(*g, unsafe.Pointer) bool, lock unsafe.Pointer, reason string, traceEv byte, traceskip int) {
+// Reason explains why the goroutine has been parked.
+// It is displayed in stack traces and heap dumps.
+// Reasons should be unique and descriptive.
+// Do not re-use reasons, add new ones.
+func gopark(unlockf func(*g, unsafe.Pointer) bool, lock unsafe.Pointer, reason waitReason, traceEv byte, traceskip int) {
+	if reason != waitReasonSleep {
+		checkTimeouts() // timeouts may expire while two goroutines keep the scheduler busy
+	}
 	mp := acquirem()
 	gp := mp.curg
 	status := readgstatus(gp)
@@ -324,7 +335,7 @@ func gopark(unlockf func(*g, unsafe.Pointer) bool, lock unsafe.Pointer, reason s
 
 // Puts the current goroutine into a waiting state and unlocks the lock.
 // The goroutine can be made runnable again by calling goready(gp).
-func goparkunlock(lock *mutex, reason string, traceEv byte, traceskip int) {
+func goparkunlock(lock *mutex, reason waitReason, traceEv byte, traceskip int) {
 	gopark(parkunlock_c, unsafe.Pointer(lock), reason, traceEv, traceskip)
 }
 
@@ -468,6 +479,37 @@ const (
 	_GoidCacheBatch = 16
 )
 
+// cpuinit extracts the environment variable GODEBUGCPU from the environment on
+// Linux and Darwin if the GOEXPERIMENT debugcpu was set and calls internal/cpu.Initialize.
+func cpuinit() {
+	const prefix = "GODEBUGCPU="
+	var env string
+
+	if haveexperiment("debugcpu") && (GOOS == "linux" || GOOS == "darwin") {
+		cpu.DebugOptions = true
+
+		// Similar to goenv_unix but extracts the environment value for
+		// GODEBUGCPU directly.
+		// TODO(moehrmann): remove when general goenvs() can be called before cpuinit()
+		n := int32(0)
+		for argv_index(argv, argc+1+n) != nil {
+			n++
+		}
+
+		for i := int32(0); i < n; i++ {
+			p := argv_index(argv, argc+1+i)
+			s := *(*string)(unsafe.Pointer(&stringStruct{unsafe.Pointer(p), findnull(p)}))
+
+			if hasprefix(s, prefix) {
+				env = gostring(p)[len(prefix):]
+				break
+			}
+		}
+	}
+
+	cpu.Initialize(env)
+}
+
 // The bootstrap sequence is:
 //
 //	call osinit
@@ -488,6 +530,7 @@ func schedinit() {
 
 	mallocinit()
 	mcommoninit(_g_.m)
+	cpuinit() // must run before alginit
 	alginit() // maps must not be used before this call
 
 	msigsave(_g_.m)
@@ -778,7 +821,7 @@ func casgstatus(gp *g, oldval, newval uint32) {
 		})
 	}
 
-	// See http://golang.org/cl/21503 for justification of the yield delay.
+	// See https://golang.org/cl/21503 for justification of the yield delay.
 	const yieldDelay = 5 * 1000
 	var nextYield int64
 
@@ -786,9 +829,7 @@ func casgstatus(gp *g, oldval, newval uint32) {
 	// GC time to finish and change the state to oldval.
 	for i := 0; !atomic.Cas(&gp.atomicstatus, oldval, newval); i++ {
 		if oldval == _Gwaiting && gp.atomicstatus == _Grunnable {
-			systemstack(func() {
-				throw("casgstatus: waiting for Gwaiting but is Grunnable")
-			})
+			throw("casgstatus: waiting for Gwaiting but is Grunnable")
 		}
 		// Help GC if needed.
 		// if gp.preemptscan && !gp.gcworkdone && (oldval == _Grunning || oldval == _Gsyscall) {
@@ -826,7 +867,7 @@ func scang(gp *g, gcw *gcWork) {
 
 	gp.gcscandone = false
 
-	// See http://golang.org/cl/21503 for justification of the yield delay.
+	// See https://golang.org/cl/21503 for justification of the yield delay.
 	const yieldDelay = 10 * 1000
 	var nextYield int64
 
@@ -1212,7 +1253,9 @@ func mstart1() {
 //go:yeswritebarrierrec
 func mstartm0() {
 	// Create an extra M for callbacks on threads not created by Go.
-	if iscgo && !cgoHasExtraM {
+	// An extra M is also needed on Windows for callbacks created by
+	// syscall.NewCallback. See issue #6751 for details.
+	if (iscgo || GOOS == "windows") && !cgoHasExtraM {
 		cgoHasExtraM = true
 		newextram()
 	}
@@ -1517,8 +1560,12 @@ func allocm(_p_ *p, fn func(), allocatestack bool) (mp *m, g0Stack unsafe.Pointe
 // put the m back on the list.
 //go:nosplit
 func needm(x byte) {
-	if iscgo && !cgoHasExtraM {
+	if (iscgo || GOOS == "windows") && !cgoHasExtraM {
 		// Can happen if C/C++ code calls Go from a global ctor.
+		// Can also happen on Windows if a global ctor uses a
+		// callback created by syscall.NewCallback. See issue #6751
+		// for details.
+		//
 		// Can not throw, because scheduler is not initialized yet.
 		write(2, unsafe.Pointer(&earlycgocallback[0]), int32(len(earlycgocallback)))
 		exit(1)
@@ -1814,13 +1861,16 @@ func newm1(mp *m) {
 //
 // The calling thread must itself be in a known-good state.
 func startTemplateThread() {
+	if GOARCH == "wasm" { // no threads on wasm yet
+		return
+	}
 	if !atomic.Cas(&newmHandoff.haveTemplateThread, 0, 1) {
 		return
 	}
 	newm(templateThread, nil)
 }
 
-// tmeplateThread is a thread in a known-good state that exists solely
+// templateThread is a thread in a known-good state that exists solely
 // to start new threads in known-good states when the calling thread
 // may not be a a good state.
 //
@@ -2232,6 +2282,14 @@ stop:
 		return gp, false
 	}
 
+	// wasm only:
+	// Check if a goroutine is waiting for a callback from the WebAssembly host.
+	// If yes, pause the execution until a callback was triggered.
+	if pauseSchedulerUntilCallback() {
+		// A callback was triggered and caused at least one goroutine to wake up.
+		goto top
+	}
+
 	// Before we drop our P, make a snapshot of the allp slice,
 	// which can change underfoot once we no longer block
 	// safe-points. We don't need to snapshot the contents because
@@ -2616,7 +2674,7 @@ func goexit0(gp *g) {
 	gp._defer = nil // should be true already but just in case.
 	gp._panic = nil // non-nil for Goexit during panic. points at stack-allocated data.
 	gp.writebuf = nil
-	gp.waitreason = ""
+	gp.waitreason = 0
 	gp.param = nil
 	gp.labels = nil
 	gp.timer = nil
@@ -2635,6 +2693,11 @@ func goexit0(gp *g) {
 	gp.gcscanvalid = true
 	dropg()
 
+	if GOARCH == "wasm" { // no threads yet on wasm
+		gfput(_g_.m.p.ptr(), gp)
+		schedule() // never returns
+	}
+
 	if _g_.m.lockedInt != 0 {
 		print("invalid m->lockedInt = ", _g_.m.lockedInt, "\n")
 		throw("internal lockOSThread error")
@@ -2743,8 +2806,6 @@ func entersyscall_gcwait() {
 	unlock(&sched.lock)
 }
 
-// The same as reentersyscall(), but with a hint that the syscall is blocking.
-//go:nosplit
 func reentersyscallblock(pc, sp uintptr) {
 	_g_ := getg()
 
@@ -2789,9 +2850,7 @@ func exitsyscall() {
 	oldp := _g_.m.p.ptr()
 	if exitsyscallfast() {
 		if _g_.m.mcache == nil {
-			systemstack(func() {
-				throw("lost mcache")
-			})
+			throw("lost mcache")
 		}
 		if trace.enabled {
 			if oldp != _g_.m.p.ptr() || _g_.m.syscalltick != _g_.m.p.ptr().syscalltick {
@@ -2836,9 +2895,7 @@ func exitsyscall() {
 	mcall(exitsyscall0)
 
 	if _g_.m.mcache == nil {
-		systemstack(func() {
-			throw("lost mcache")
-		})
+		throw("lost mcache")
 	}
 
 	// Scheduler returned, so we're allowed to run now.
@@ -3188,6 +3245,42 @@ func setSystemGoroutine() {
 	atomic.Xadd(&expectedSystemGoroutines, -1)
 }
 
+// saveAncestors copies previous ancestors of the given caller g and
+// includes infor for the current caller into a new set of tracebacks for
+// a g being created.
+func saveAncestors(callergp *g) *[]ancestorInfo {
+	// Copy all prior info, except for the root goroutine (goid 0).
+	if debug.tracebackancestors <= 0 || callergp.goid == 0 {
+		return nil
+	}
+	var callerAncestors []ancestorInfo
+	if callergp.ancestors != nil {
+		callerAncestors = *callergp.ancestors
+	}
+	n := int32(len(callerAncestors)) + 1
+	if n > debug.tracebackancestors {
+		n = debug.tracebackancestors
+	}
+	ancestors := make([]ancestorInfo, n)
+	copy(ancestors[1:], callerAncestors)
+
+	var pcs [_TracebackMaxFrames]uintptr
+	// FIXME: This should get a traceback of callergp.
+	// npcs := gcallers(callergp, 0, pcs[:])
+	npcs := 0
+	ipcs := make([]uintptr, npcs)
+	copy(ipcs, pcs[:])
+	ancestors[0] = ancestorInfo{
+		pcs:  ipcs,
+		goid: callergp.goid,
+		gopc: callergp.gopc,
+	}
+
+	ancestorsp := new([]ancestorInfo)
+	*ancestorsp = ancestors
+	return ancestorsp
+}
+
 // Put on gfree list.
 // If local list is too long, transfer a batch to the global list.
 func gfput(_p_ *p, gp *g) {
@@ -3265,6 +3358,9 @@ func Breakpoint() {
 // or else the m might be different in this function than in the caller.
 //go:nosplit
 func dolockOSThread() {
+	if GOARCH == "wasm" {
+		return // no threads on wasm yet
+	}
 	_g_ := getg()
 	_g_.m.lockedg.set(_g_)
 	_g_.lockedm.set(_g_.m)
@@ -3280,6 +3376,10 @@ func dolockOSThread() {
 // If the calling goroutine exits without unlocking the thread,
 // the thread will be terminated.
 //
+// All init functions are run on the startup thread. Calling LockOSThread
+// from an init function will cause the main function to be invoked on
+// that thread.
+//
 // A goroutine should call LockOSThread before calling OS services or
 // non-Go library functions that depend on per-thread state.
 func LockOSThread() {
@@ -3309,6 +3409,9 @@ func lockOSThread() {
 // or else the m might be in different in this function than in the caller.
 //go:nosplit
 func dounlockOSThread() {
+	if GOARCH == "wasm" {
+		return // no threads on wasm yet
+	}
 	_g_ := getg()
 	if _g_.m.lockedInt != 0 || _g_.m.lockedExt != 0 {
 		return
@@ -3382,6 +3485,7 @@ func _ExternalCode()              { _ExternalCode() }
 func _LostExternalCode()          { _LostExternalCode() }
 func _GC()                        { _GC() }
 func _LostSIGPROFDuringAtomic64() { _LostSIGPROFDuringAtomic64() }
+func _VDSO()                      { _VDSO() }
 
 // Counts SIGPROFs received while in atomic64 critical section, on mips{,le}
 var lostAtomic64Count uint64
@@ -3470,7 +3574,7 @@ func sigprof(pc uintptr, gp *g, mp *m) {
 	}
 
 	if prof.hz != 0 {
-		if (GOARCH == "mips" || GOARCH == "mipsle") && lostAtomic64Count > 0 {
+		if (GOARCH == "mips" || GOARCH == "mipsle" || GOARCH == "arm") && lostAtomic64Count > 0 {
 			cpuprof.addLostAtomic64(lostAtomic64Count)
 			lostAtomic64Count = 0
 		}
@@ -3818,8 +3922,17 @@ func checkdead() {
 		return
 	}
 
+	// If we are not running under cgo, but we have an extra M then account
+	// for it. (It is possible to have an extra M on Windows without cgo to
+	// accommodate callbacks created by syscall.NewCallback. See issue #6751
+	// for details.)
+	var run0 int32
+	if !iscgo && cgoHasExtraM {
+		run0 = 1
+	}
+
 	run := mcount() - sched.nmidle - sched.nmidlelocked - sched.nmsys
-	if run > 0 {
+	if run > run0 {
 		return
 	}
 	if run < 0 {
@@ -4215,7 +4328,7 @@ func schedtrace(detailed bool) {
 		if lockedm != nil {
 			id2 = lockedm.id
 		}
-		print("  G", gp.goid, ": status=", readgstatus(gp), "(", gp.waitreason, ") m=", id1, " lockedm=", id2, "\n")
+		print("  G", gp.goid, ": status=", readgstatus(gp), "(", gp.waitreason.String(), ") m=", id1, " lockedm=", id2, "\n")
 	}
 	unlock(&allglock)
 	unlock(&sched.lock)
@@ -4375,7 +4488,7 @@ func runqempty(_p_ *p) bool {
 const randomizeScheduler = raceenabled
 
 // runqput tries to put g on the local runnable queue.
-// If next if false, runqput adds g to the tail of the runnable queue.
+// If next is false, runqput adds g to the tail of the runnable queue.
 // If next is true, runqput puts g in the _p_.runnext slot.
 // If the run queue is full, runnext puts g on the global queue.
 // Executed only by the owner P.
@@ -4571,6 +4684,11 @@ func setMaxThreads(in int) (out int) {
 	return
 }
 
+func haveexperiment(name string) bool {
+	// The gofrontend does not support experiments.
+	return false
+}
+
 //go:nosplit
 func procPin() int {
 	_g_ := getg()
@@ -4618,7 +4736,7 @@ func sync_runtime_canSpin(i int) bool {
 	// Spin only few times and only if running on a multicore machine and
 	// GOMAXPROCS>1 and there is at least one other running P and local runq is empty.
 	// As opposed to runtime mutex we don't do passive spinning here,
-	// because there can be work on global runq on on other Ps.
+	// because there can be work on global runq or on other Ps.
 	if i >= active_spin || ncpu <= 1 || gomaxprocs <= int32(sched.npidle+sched.nmspinning)+1 {
 		return false
 	}
diff --git a/libgo/go/runtime/proc_test.go b/libgo/go/runtime/proc_test.go
index 672e1fa..82a2fe4 100644
--- a/libgo/go/runtime/proc_test.go
+++ b/libgo/go/runtime/proc_test.go
@@ -28,6 +28,9 @@ func perpetuumMobile() {
 }
 
 func TestStopTheWorldDeadlock(t *testing.T) {
+	if runtime.GOARCH == "wasm" {
+		t.Skip("no preemption on wasm yet")
+	}
 	if testing.Short() {
 		t.Skip("skipping during short test")
 	}
@@ -230,6 +233,10 @@ func TestBlockLocked(t *testing.T) {
 }
 
 func TestTimerFairness(t *testing.T) {
+	if runtime.GOARCH == "wasm" {
+		t.Skip("no preemption on wasm yet")
+	}
+
 	done := make(chan bool)
 	c := make(chan bool)
 	for i := 0; i < 2; i++ {
@@ -256,6 +263,10 @@ func TestTimerFairness(t *testing.T) {
 }
 
 func TestTimerFairness2(t *testing.T) {
+	if runtime.GOARCH == "wasm" {
+		t.Skip("no preemption on wasm yet")
+	}
+
 	done := make(chan bool)
 	c := make(chan bool)
 	for i := 0; i < 2; i++ {
@@ -290,7 +301,13 @@ var preempt = func() int {
 }
 
 func TestPreemption(t *testing.T) {
-	t.Skip("gccgo does not implement preemption")
+	if runtime.Compiler == "gccgo" {
+		t.Skip("gccgo does not implement preemption")
+	}
+	if runtime.GOARCH == "wasm" {
+		t.Skip("no preemption on wasm yet")
+	}
+
 	// Test that goroutines are preempted at function calls.
 	N := 5
 	if testing.Short() {
@@ -314,7 +331,13 @@ func TestPreemption(t *testing.T) {
 }
 
 func TestPreemptionGC(t *testing.T) {
-	t.Skip("gccgo does not implement preemption")
+	if runtime.Compiler == "gccgo" {
+		t.Skip("gccgo does not implement preemption")
+	}
+	if runtime.GOARCH == "wasm" {
+		t.Skip("no preemption on wasm yet")
+	}
+
 	// Test that pending GC preempts running goroutines.
 	P := 5
 	N := 10
@@ -387,6 +410,9 @@ func TestNumGoroutine(t *testing.T) {
 }
 
 func TestPingPongHog(t *testing.T) {
+	if runtime.GOARCH == "wasm" {
+		t.Skip("no preemption on wasm yet")
+	}
 	if testing.Short() {
 		t.Skip("skipping in -short mode")
 	}
@@ -837,6 +863,10 @@ func TestStealOrder(t *testing.T) {
 }
 
 func TestLockOSThreadNesting(t *testing.T) {
+	if runtime.GOARCH == "wasm" {
+		t.Skip("no threads on wasm yet")
+	}
+
 	go func() {
 		e, i := runtime.LockOSCounts()
 		if e != 0 || i != 0 {
diff --git a/libgo/go/runtime/rand_test.go b/libgo/go/runtime/rand_test.go
index f8831b0..1b84c79 100644
--- a/libgo/go/runtime/rand_test.go
+++ b/libgo/go/runtime/rand_test.go
@@ -25,7 +25,7 @@ func BenchmarkFastrandHashiter(b *testing.B) {
 	}
 	b.RunParallel(func(pb *testing.PB) {
 		for pb.Next() {
-			for _ = range m {
+			for range m {
 				break
 			}
 		}
diff --git a/libgo/go/runtime/runtime-lldb_test.go b/libgo/go/runtime/runtime-lldb_test.go
index 9a28705..fe3a0eb 100644
--- a/libgo/go/runtime/runtime-lldb_test.go
+++ b/libgo/go/runtime/runtime-lldb_test.go
@@ -154,7 +154,9 @@ func TestLldbPython(t *testing.T) {
 		t.Fatalf("failed to create file: %v", err)
 	}
 
-	cmd := exec.Command(testenv.GoToolPath(t), "build", "-gcflags=all=-N -l", "-o", "a.exe")
+	// As of 2018-07-17, lldb doesn't support compressed DWARF, so
+	// disable it for this test.
+	cmd := exec.Command(testenv.GoToolPath(t), "build", "-gcflags=all=-N -l", "-ldflags=-compressdwarf=false", "-o", "a.exe")
 	cmd.Dir = dir
 	out, err := cmd.CombinedOutput()
 	if err != nil {
diff --git a/libgo/go/runtime/runtime1.go b/libgo/go/runtime/runtime1.go
index b617f85..8b1b0a0 100644
--- a/libgo/go/runtime/runtime1.go
+++ b/libgo/go/runtime/runtime1.go
@@ -326,20 +326,21 @@ type dbgVar struct {
 // existing int var for that value, which may
 // already have an initial value.
 var debug struct {
-	allocfreetrace   int32
-	cgocheck         int32
-	efence           int32
-	gccheckmark      int32
-	gcpacertrace     int32
-	gcshrinkstackoff int32
-	gcrescanstacks   int32
-	gcstoptheworld   int32
-	gctrace          int32
-	invalidptr       int32
-	sbrk             int32
-	scavenge         int32
-	scheddetail      int32
-	schedtrace       int32
+	allocfreetrace     int32
+	cgocheck           int32
+	efence             int32
+	gccheckmark        int32
+	gcpacertrace       int32
+	gcshrinkstackoff   int32
+	gcrescanstacks     int32
+	gcstoptheworld     int32
+	gctrace            int32
+	invalidptr         int32
+	sbrk               int32
+	scavenge           int32
+	scheddetail        int32
+	schedtrace         int32
+	tracebackancestors int32
 }
 
 var dbgvars = []dbgVar{
@@ -357,6 +358,7 @@ var dbgvars = []dbgVar{
 	{"scavenge", &debug.scavenge},
 	{"scheddetail", &debug.scheddetail},
 	{"schedtrace", &debug.schedtrace},
+	{"tracebackancestors", &debug.tracebackancestors},
 }
 
 func parsedebugvars() {
diff --git a/libgo/go/runtime/runtime2.go b/libgo/go/runtime/runtime2.go
index 2de1cc8..e12e832 100644
--- a/libgo/go/runtime/runtime2.go
+++ b/libgo/go/runtime/runtime2.go
@@ -353,28 +353,29 @@ type g struct {
 	atomicstatus uint32
 	// Not for gccgo: stackLock      uint32 // sigprof/scang lock; TODO: fold in to atomicstatus
 	goid           int64
-	waitsince      int64  // approx time when the g become blocked
-	waitreason     string // if status==Gwaiting
 	schedlink      guintptr
-	preempt        bool     // preemption signal, duplicates stackguard0 = stackpreempt
-	paniconfault   bool     // panic (instead of crash) on unexpected fault address
-	preemptscan    bool     // preempted g does scan for gc
-	gcscandone     bool     // g has scanned stack; protected by _Gscan bit in status
-	gcscanvalid    bool     // false at start of gc cycle, true if G has not run since last scan; TODO: remove?
-	throwsplit     bool     // must not split stack
-	raceignore     int8     // ignore race detection events
-	sysblocktraced bool     // StartTrace has emitted EvGoInSyscall about this goroutine
-	sysexitticks   int64    // cputicks when syscall has returned (for tracing)
-	traceseq       uint64   // trace event sequencer
-	tracelastp     puintptr // last P emitted an event for this goroutine
+	waitsince      int64      // approx time when the g become blocked
+	waitreason     waitReason // if status==Gwaiting
+	preempt        bool       // preemption signal, duplicates stackguard0 = stackpreempt
+	paniconfault   bool       // panic (instead of crash) on unexpected fault address
+	preemptscan    bool       // preempted g does scan for gc
+	gcscandone     bool       // g has scanned stack; protected by _Gscan bit in status
+	gcscanvalid    bool       // false at start of gc cycle, true if G has not run since last scan; TODO: remove?
+	throwsplit     bool       // must not split stack
+	raceignore     int8       // ignore race detection events
+	sysblocktraced bool       // StartTrace has emitted EvGoInSyscall about this goroutine
+	sysexitticks   int64      // cputicks when syscall has returned (for tracing)
+	traceseq       uint64     // trace event sequencer
+	tracelastp     puintptr   // last P emitted an event for this goroutine
 	lockedm        muintptr
 	sig            uint32
 	writebuf       []byte
 	sigcode0       uintptr
 	sigcode1       uintptr
 	sigpc          uintptr
-	gopc           uintptr // pc of go statement that created this goroutine
-	startpc        uintptr // pc of goroutine function
+	gopc           uintptr         // pc of go statement that created this goroutine
+	ancestors      *[]ancestorInfo // ancestor information goroutine(s) that created this goroutine (only used if debug.tracebackancestors)
+	startpc        uintptr         // pc of goroutine function
 	// Not for gccgo: racectx        uintptr
 	waiting *sudog // sudog structures this g is waiting on (that have a valid elem ptr); in lock order
 	// Not for gccgo: cgoCtxt        []uintptr      // cgo traceback context
@@ -476,15 +477,12 @@ type m struct {
 	ncgo        int32  // number of cgo calls currently in progress
 	// Not for gccgo: cgoCallersUse uint32      // if non-zero, cgoCallers in use temporarily
 	// Not for gccgo: cgoCallers    *cgoCallers // cgo traceback if crashing in cgo call
-	park        note
-	alllink     *m // on allm
-	schedlink   muintptr
-	mcache      *mcache
-	lockedg     guintptr
-	createstack [32]location // stack that created this thread.
-	// Not for gccgo: freglo        [16]uint32     // d[i] lsb and f[i]
-	// Not for gccgo: freghi        [16]uint32     // d[i] msb and f[i+16]
-	// Not for gccgo: fflag         uint32         // floating point compare flags
+	park          note
+	alllink       *m // on allm
+	schedlink     muintptr
+	mcache        *mcache
+	lockedg       guintptr
+	createstack   [32]location   // stack that created this thread.
 	lockedExt     uint32         // tracking for external LockOSThread
 	lockedInt     uint32         // tracking for internal lockOSThread
 	nextwaitm     muintptr       // next m waiting for lock
@@ -773,6 +771,13 @@ type _panic struct {
 	aborted bool
 }
 
+// ancestorInfo records details of where a goroutine was started.
+type ancestorInfo struct {
+	pcs  []uintptr // pcs from the stack of this goroutine
+	goid int64     // goroutine id of this goroutine; original goroutine possibly dead
+	gopc uintptr   // pc of go statement that created this goroutine
+}
+
 const (
 	_TraceRuntimeFrames = 1 << iota // include frames for internal runtime functions.
 	_TraceTrap                      // the initial PC, SP are from a trap, not a return PC from a call
@@ -782,6 +787,71 @@ const (
 // The maximum number of frames we print for a traceback
 const _TracebackMaxFrames = 100
 
+// A waitReason explains why a goroutine has been stopped.
+// See gopark. Do not re-use waitReasons, add new ones.
+type waitReason uint8
+
+const (
+	waitReasonZero                  waitReason = iota // ""
+	waitReasonGCAssistMarking                         // "GC assist marking"
+	waitReasonIOWait                                  // "IO wait"
+	waitReasonChanReceiveNilChan                      // "chan receive (nil chan)"
+	waitReasonChanSendNilChan                         // "chan send (nil chan)"
+	waitReasonDumpingHeap                             // "dumping heap"
+	waitReasonGarbageCollection                       // "garbage collection"
+	waitReasonGarbageCollectionScan                   // "garbage collection scan"
+	waitReasonPanicWait                               // "panicwait"
+	waitReasonSelect                                  // "select"
+	waitReasonSelectNoCases                           // "select (no cases)"
+	waitReasonGCAssistWait                            // "GC assist wait"
+	waitReasonGCSweepWait                             // "GC sweep wait"
+	waitReasonChanReceive                             // "chan receive"
+	waitReasonChanSend                                // "chan send"
+	waitReasonFinalizerWait                           // "finalizer wait"
+	waitReasonForceGGIdle                             // "force gc (idle)"
+	waitReasonSemacquire                              // "semacquire"
+	waitReasonSleep                                   // "sleep"
+	waitReasonSyncCondWait                            // "sync.Cond.Wait"
+	waitReasonTimerGoroutineIdle                      // "timer goroutine (idle)"
+	waitReasonTraceReaderBlocked                      // "trace reader (blocked)"
+	waitReasonWaitForGCCycle                          // "wait for GC cycle"
+	waitReasonGCWorkerIdle                            // "GC worker (idle)"
+)
+
+var waitReasonStrings = [...]string{
+	waitReasonZero:                  "",
+	waitReasonGCAssistMarking:       "GC assist marking",
+	waitReasonIOWait:                "IO wait",
+	waitReasonChanReceiveNilChan:    "chan receive (nil chan)",
+	waitReasonChanSendNilChan:       "chan send (nil chan)",
+	waitReasonDumpingHeap:           "dumping heap",
+	waitReasonGarbageCollection:     "garbage collection",
+	waitReasonGarbageCollectionScan: "garbage collection scan",
+	waitReasonPanicWait:             "panicwait",
+	waitReasonSelect:                "select",
+	waitReasonSelectNoCases:         "select (no cases)",
+	waitReasonGCAssistWait:          "GC assist wait",
+	waitReasonGCSweepWait:           "GC sweep wait",
+	waitReasonChanReceive:           "chan receive",
+	waitReasonChanSend:              "chan send",
+	waitReasonFinalizerWait:         "finalizer wait",
+	waitReasonForceGGIdle:           "force gc (idle)",
+	waitReasonSemacquire:            "semacquire",
+	waitReasonSleep:                 "sleep",
+	waitReasonSyncCondWait:          "sync.Cond.Wait",
+	waitReasonTimerGoroutineIdle:    "timer goroutine (idle)",
+	waitReasonTraceReaderBlocked:    "trace reader (blocked)",
+	waitReasonWaitForGCCycle:        "wait for GC cycle",
+	waitReasonGCWorkerIdle:          "GC worker (idle)",
+}
+
+func (w waitReason) String() string {
+	if w < 0 || w >= waitReason(len(waitReasonStrings)) {
+		return "unknown wait reason"
+	}
+	return waitReasonStrings[w]
+}
+
 var (
 	allglen    uintptr
 	allm       *m
@@ -793,23 +863,7 @@ var (
 	sched      schedt
 	newprocs   int32
 
-	// Information about what cpu features are available.
-	// Set on startup in asm_{x86,amd64}.s.
-	// Packages outside the runtime should not use these
-	// as they are not an external api.
-	cpuid_ecx   uint32
 	support_aes bool
-
-	// cpuid_edx         uint32
-	// cpuid_ebx7        uint32
-	// lfenceBeforeRdtsc bool
-	// support_avx       bool
-	// support_avx2      bool
-	// support_bmi1      bool
-	// support_bmi2      bool
-
-//	goarm                uint8 // set by cmd/link on arm systems
-//	framepointer_enabled bool  // set by cmd/link
 )
 
 // Set by the linker so the runtime can determine the buildmode.
diff --git a/libgo/go/runtime/runtime_test.go b/libgo/go/runtime/runtime_test.go
index 0231043..995ce25 100644
--- a/libgo/go/runtime/runtime_test.go
+++ b/libgo/go/runtime/runtime_test.go
@@ -169,6 +169,9 @@ func testSetPanicOnFault(t *testing.T, addr uintptr, nfault *int) {
 	if GOOS == "nacl" {
 		t.Skip("nacl doesn't seem to fault on high addresses")
 	}
+	if GOOS == "js" {
+		t.Skip("js does not support catching faults")
+	}
 
 	defer func() {
 		if err := recover(); err != nil {
@@ -266,7 +269,7 @@ func TestTrailingZero(t *testing.T) {
 */
 
 func TestBadOpen(t *testing.T) {
-	if GOOS == "windows" || GOOS == "nacl" {
+	if GOOS == "windows" || GOOS == "nacl" || GOOS == "js" {
 		t.Skip("skipping OS that doesn't have open/read/write/close")
 	}
 	// make sure we get the correct error code if open fails. Same for
diff --git a/libgo/go/runtime/rwmutex_test.go b/libgo/go/runtime/rwmutex_test.go
index 872b3b0..291a32e 100644
--- a/libgo/go/runtime/rwmutex_test.go
+++ b/libgo/go/runtime/rwmutex_test.go
@@ -47,6 +47,9 @@ func doTestParallelReaders(numReaders int) {
 }
 
 func TestParallelRWMutexReaders(t *testing.T) {
+	if GOARCH == "wasm" {
+		t.Skip("wasm has no threads yet")
+	}
 	defer GOMAXPROCS(GOMAXPROCS(-1))
 	// If runtime triggers a forced GC during this test then it will deadlock,
 	// since the goroutines can't be stopped/preempted.
diff --git a/libgo/go/runtime/select.go b/libgo/go/runtime/select.go
index 9dab052..39c12da 100644
--- a/libgo/go/runtime/select.go
+++ b/libgo/go/runtime/select.go
@@ -94,7 +94,7 @@ func selparkcommit(gp *g, _ unsafe.Pointer) bool {
 }
 
 func block() {
-	gopark(nil, nil, "select (no cases)", traceEvGoStop, 1) // forever
+	gopark(nil, nil, waitReasonSelectNoCases, traceEvGoStop, 1) // forever
 }
 
 // selectgo implements the select statement.
@@ -307,7 +307,7 @@ loop:
 
 	// wait for someone to wake us up
 	gp.param = nil
-	gopark(selparkcommit, nil, "select", traceEvGoBlockSelect, 1)
+	gopark(selparkcommit, nil, waitReasonSelect, traceEvGoBlockSelect, 1)
 
 	sellock(scases, lockorder)
 
diff --git a/libgo/go/runtime/sema.go b/libgo/go/runtime/sema.go
index 6e2beec..cb7d3cd 100644
--- a/libgo/go/runtime/sema.go
+++ b/libgo/go/runtime/sema.go
@@ -15,7 +15,7 @@
 // even if, due to races, the wakeup happens before the sleep.
 //
 // See Mullender and Cox, ``Semaphores in Plan 9,''
-// http://swtch.com/semaphore.pdf
+// https://swtch.com/semaphore.pdf
 
 package runtime
 
@@ -141,7 +141,7 @@ func semacquire1(addr *uint32, lifo bool, profile semaProfileFlags) {
 		// Any semrelease after the cansemacquire knows we're waiting
 		// (we set nwait above), so go to sleep.
 		root.queue(addr, s, lifo)
-		goparkunlock(&root.lock, "semacquire", traceEvGoBlockSync, 4)
+		goparkunlock(&root.lock, waitReasonSemacquire, traceEvGoBlockSync, 4)
 		if s.ticket != 0 || cansemacquire(addr) {
 			break
 		}
@@ -274,7 +274,7 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool) {
 	// addresses, it is kept balanced on average by maintaining a heap ordering
 	// on the ticket: s.ticket <= both s.prev.ticket and s.next.ticket.
 	// https://en.wikipedia.org/wiki/Treap
-	// http://faculty.washington.edu/aragon/pubs/rst89.pdf
+	// https://faculty.washington.edu/aragon/pubs/rst89.pdf
 	//
 	// s.ticket compared with zero in couple of places, therefore set lowest bit.
 	// It will not affect treap's quality noticeably.
@@ -507,7 +507,7 @@ func notifyListWait(l *notifyList, t uint32) {
 		l.tail.next = s
 	}
 	l.tail = s
-	goparkunlock(&l.lock, "semacquire", traceEvGoBlockCond, 3)
+	goparkunlock(&l.lock, waitReasonSyncCondWait, traceEvGoBlockCond, 3)
 	if t0 != 0 {
 		blockevent(s.releasetime-t0, 2)
 	}
diff --git a/libgo/go/runtime/signal_sighandler.go b/libgo/go/runtime/signal_sighandler.go
index 698629d..e4bf7bc 100644
--- a/libgo/go/runtime/signal_sighandler.go
+++ b/libgo/go/runtime/signal_sighandler.go
@@ -14,6 +14,11 @@ import (
 // GOTRACEBACK=crash when a signal is received.
 var crashing int32
 
+// testSigtrap is used by the runtime tests. If non-nil, it is called
+// on SIGTRAP. If it returns true, the normal behavior on SIGTRAP is
+// suppressed.
+var testSigtrap func(info *_siginfo_t, ctxt *sigctxt, gp *g) bool
+
 // sighandler is invoked when a signal occurs. The global g will be
 // set to a gsignal goroutine and we will be running on the alternate
 // signal stack. The parameter g will be the value of the global g
@@ -27,7 +32,7 @@ var crashing int32
 //go:nowritebarrierrec
 func sighandler(sig uint32, info *_siginfo_t, ctxt unsafe.Pointer, gp *g) {
 	_g_ := getg()
-	c := sigctxt{info, ctxt}
+	c := &sigctxt{info, ctxt}
 
 	sigfault, sigpc := getSiginfo(info, ctxt)
 
@@ -36,6 +41,10 @@ func sighandler(sig uint32, info *_siginfo_t, ctxt unsafe.Pointer, gp *g) {
 		return
 	}
 
+	if sig == _SIGTRAP && testSigtrap != nil && testSigtrap(info, (*sigctxt)(noescape(unsafe.Pointer(c))), gp) {
+		return
+	}
+
 	flags := int32(_SigThrow)
 	if sig < uint32(len(sigtable)) {
 		flags = sigtable[sig].flags
@@ -45,6 +54,11 @@ func sighandler(sig uint32, info *_siginfo_t, ctxt unsafe.Pointer, gp *g) {
 		// stack. Abort in the signal handler instead.
 		flags = (flags &^ _SigPanic) | _SigThrow
 	}
+	if isAbortPC(sigpc) {
+		// On many architectures, the abort function just
+		// causes a memory fault. Don't turn that into a panic.
+		flags = _SigThrow
+	}
 	if c.sigcode() != _SI_USER && flags&_SigPanic != 0 {
 		// Emulate gc by passing arguments out of band,
 		// although we don't really have to.
@@ -87,7 +101,7 @@ func sighandler(sig uint32, info *_siginfo_t, ctxt unsafe.Pointer, gp *g) {
 	_g_.m.caughtsig.set(gp)
 
 	if crashing == 0 {
-		startpanic()
+		startpanic_m()
 	}
 
 	if sig < uint32(len(sigtable)) {
diff --git a/libgo/go/runtime/signal_unix.go b/libgo/go/runtime/signal_unix.go
index a8f77fa..84623d3 100644
--- a/libgo/go/runtime/signal_unix.go
+++ b/libgo/go/runtime/signal_unix.go
@@ -112,6 +112,8 @@ func initsig(preinit bool) {
 			// set SA_ONSTACK if necessary.
 			if fwdSig[i] != _SIG_DFL && fwdSig[i] != _SIG_IGN {
 				setsigstack(i)
+			} else if fwdSig[i] == _SIG_IGN {
+				sigInitIgnored(i)
 			}
 			continue
 		}
@@ -398,14 +400,6 @@ func dieFromSignal(sig uint32) {
 	osyield()
 	osyield()
 
-	// On Darwin we may still fail to die, because raise sends the
-	// signal to the whole process rather than just the current thread,
-	// and osyield just sleeps briefly rather than letting all other
-	// threads run. See issue 20315. Sleep longer.
-	if GOOS == "darwin" {
-		usleep(100)
-	}
-
 	// If we are still somehow running, just exit with the wrong status.
 	exit(2)
 }
@@ -444,7 +438,10 @@ func raisebadsignal(sig uint32, c *sigctxt) {
 	// re-installing sighandler. At this point we can just
 	// return and the signal will be re-raised and caught by
 	// the default handler with the correct context.
-	if (isarchive || islibrary) && handler == _SIG_DFL && c.sigcode() != _SI_USER {
+	//
+	// On FreeBSD, the libthr sigaction code prevents
+	// this from working so we fall through to raise.
+	if GOOS != "freebsd" && (isarchive || islibrary) && handler == _SIG_DFL && c.sigcode() != _SI_USER {
 		return
 	}
 
@@ -464,6 +461,7 @@ func raisebadsignal(sig uint32, c *sigctxt) {
 	setsig(sig, getSigtramp())
 }
 
+//go:nosplit
 func crash() {
 	if GOOS == "darwin" {
 		// OS X core dumps are linear dumps of the mapped memory,
diff --git a/libgo/go/runtime/sigqueue.go b/libgo/go/runtime/sigqueue.go
index b108c39..cf926a9 100644
--- a/libgo/go/runtime/sigqueue.go
+++ b/libgo/go/runtime/sigqueue.go
@@ -237,7 +237,18 @@ func signal_ignore(s uint32) {
 	atomic.Store(&sig.ignored[s/32], i)
 }
 
+// sigInitIgnored marks the signal as already ignored. This is called at
+// program start by initsig. In a shared library initsig is called by
+// libpreinit, so the runtime may not be initialized yet.
+//go:nosplit
+func sigInitIgnored(s uint32) {
+	i := sig.ignored[s/32]
+	i |= 1 << (s & 31)
+	atomic.Store(&sig.ignored[s/32], i)
+}
+
 // Checked by signal handlers.
+//go:linkname signal_ignored os_signal.signal_ignored
 func signal_ignored(s uint32) bool {
 	i := atomic.Load(&sig.ignored[s/32])
 	return i&(1<<(s&31)) != 0
diff --git a/libgo/go/runtime/sizeof_test.go b/libgo/go/runtime/sizeof_test.go
new file mode 100644
index 0000000..ecda82a
--- /dev/null
+++ b/libgo/go/runtime/sizeof_test.go
@@ -0,0 +1,43 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !nacl
+
+package runtime_test
+
+import (
+	"reflect"
+	"runtime"
+	"testing"
+	"unsafe"
+)
+
+// Assert that the size of important structures do not change unexpectedly.
+
+func TestSizeof(t *testing.T) {
+	if runtime.Compiler != "gc" {
+		t.Skip("skipping size test; specific to gc compiler")
+	}
+
+	const _64bit = unsafe.Sizeof(uintptr(0)) == 8
+
+	var tests = []struct {
+		val    interface{} // type as a value
+		_32bit uintptr     // size on 32bit platforms
+		_64bit uintptr     // size on 64bit platforms
+	}{
+		{runtime.G{}, 216, 376}, // g, but exported for testing
+	}
+
+	for _, tt := range tests {
+		want := tt._32bit
+		if _64bit {
+			want = tt._64bit
+		}
+		got := reflect.TypeOf(tt.val).Size()
+		if want != got {
+			t.Errorf("unsafe.Sizeof(%T) = %d, want %d", tt.val, got, want)
+		}
+	}
+}
diff --git a/libgo/go/runtime/slice.go b/libgo/go/runtime/slice.go
index ec5aa64..2e874cc 100644
--- a/libgo/go/runtime/slice.go
+++ b/libgo/go/runtime/slice.go
@@ -5,6 +5,7 @@
 package runtime
 
 import (
+	"runtime/internal/sys"
 	"unsafe"
 )
 
@@ -34,14 +35,14 @@ type notInHeapSlice struct {
 // The index is the size of the slice element.
 var maxElems = [...]uintptr{
 	^uintptr(0),
-	_MaxMem / 1, _MaxMem / 2, _MaxMem / 3, _MaxMem / 4,
-	_MaxMem / 5, _MaxMem / 6, _MaxMem / 7, _MaxMem / 8,
-	_MaxMem / 9, _MaxMem / 10, _MaxMem / 11, _MaxMem / 12,
-	_MaxMem / 13, _MaxMem / 14, _MaxMem / 15, _MaxMem / 16,
-	_MaxMem / 17, _MaxMem / 18, _MaxMem / 19, _MaxMem / 20,
-	_MaxMem / 21, _MaxMem / 22, _MaxMem / 23, _MaxMem / 24,
-	_MaxMem / 25, _MaxMem / 26, _MaxMem / 27, _MaxMem / 28,
-	_MaxMem / 29, _MaxMem / 30, _MaxMem / 31, _MaxMem / 32,
+	maxAlloc / 1, maxAlloc / 2, maxAlloc / 3, maxAlloc / 4,
+	maxAlloc / 5, maxAlloc / 6, maxAlloc / 7, maxAlloc / 8,
+	maxAlloc / 9, maxAlloc / 10, maxAlloc / 11, maxAlloc / 12,
+	maxAlloc / 13, maxAlloc / 14, maxAlloc / 15, maxAlloc / 16,
+	maxAlloc / 17, maxAlloc / 18, maxAlloc / 19, maxAlloc / 20,
+	maxAlloc / 21, maxAlloc / 22, maxAlloc / 23, maxAlloc / 24,
+	maxAlloc / 25, maxAlloc / 26, maxAlloc / 27, maxAlloc / 28,
+	maxAlloc / 29, maxAlloc / 30, maxAlloc / 31, maxAlloc / 32,
 }
 
 // maxSliceCap returns the maximum capacity for a slice.
@@ -49,7 +50,15 @@ func maxSliceCap(elemsize uintptr) uintptr {
 	if elemsize < uintptr(len(maxElems)) {
 		return maxElems[elemsize]
 	}
-	return _MaxMem / elemsize
+	return maxAlloc / elemsize
+}
+
+func panicmakeslicelen() {
+	panic(errorString("makeslice: len out of range"))
+}
+
+func panicmakeslicecap() {
+	panic(errorString("makeslice: cap out of range"))
 }
 
 func makeslice(et *_type, len, cap int) slice {
@@ -60,11 +69,11 @@ func makeslice(et *_type, len, cap int) slice {
 	// See issue 4085.
 	maxElements := maxSliceCap(et.size)
 	if len < 0 || uintptr(len) > maxElements {
-		panic(errorString("makeslice: len out of range"))
+		panicmakeslicelen()
 	}
 
 	if cap < len || uintptr(cap) > maxElements {
-		panic(errorString("makeslice: cap out of range"))
+		panicmakeslicecap()
 	}
 
 	p := mallocgc(et.size*uintptr(cap), et, true)
@@ -74,12 +83,12 @@ func makeslice(et *_type, len, cap int) slice {
 func makeslice64(et *_type, len64, cap64 int64) slice {
 	len := int(len64)
 	if int64(len) != len64 {
-		panic(errorString("makeslice: len out of range"))
+		panicmakeslicelen()
 	}
 
 	cap := int(cap64)
 	if int64(cap) != cap64 {
-		panic(errorString("makeslice: cap out of range"))
+		panicmakeslicecap()
 	}
 
 	return makeslice(et, len, cap)
@@ -131,20 +140,36 @@ func growslice(et *_type, old slice, cap int) slice {
 
 	var overflow bool
 	var lenmem, newlenmem, capmem uintptr
-	const ptrSize = unsafe.Sizeof((*byte)(nil))
-	switch et.size {
-	case 1:
+	// Specialize for common values of et.size.
+	// For 1 we don't need any division/multiplication.
+	// For sys.PtrSize, compiler will optimize division/multiplication into a shift by a constant.
+	// For powers of 2, use a variable shift.
+	switch {
+	case et.size == 1:
 		lenmem = uintptr(old.len)
 		newlenmem = uintptr(cap)
 		capmem = roundupsize(uintptr(newcap))
-		overflow = uintptr(newcap) > _MaxMem
+		overflow = uintptr(newcap) > maxAlloc
 		newcap = int(capmem)
-	case ptrSize:
-		lenmem = uintptr(old.len) * ptrSize
-		newlenmem = uintptr(cap) * ptrSize
-		capmem = roundupsize(uintptr(newcap) * ptrSize)
-		overflow = uintptr(newcap) > _MaxMem/ptrSize
-		newcap = int(capmem / ptrSize)
+	case et.size == sys.PtrSize:
+		lenmem = uintptr(old.len) * sys.PtrSize
+		newlenmem = uintptr(cap) * sys.PtrSize
+		capmem = roundupsize(uintptr(newcap) * sys.PtrSize)
+		overflow = uintptr(newcap) > maxAlloc/sys.PtrSize
+		newcap = int(capmem / sys.PtrSize)
+	case isPowerOfTwo(et.size):
+		var shift uintptr
+		if sys.PtrSize == 8 {
+			// Mask shift for better code generation.
+			shift = uintptr(sys.Ctz64(uint64(et.size))) & 63
+		} else {
+			shift = uintptr(sys.Ctz32(uint32(et.size))) & 31
+		}
+		lenmem = uintptr(old.len) << shift
+		newlenmem = uintptr(cap) << shift
+		capmem = roundupsize(uintptr(newcap) << shift)
+		overflow = uintptr(newcap) > (maxAlloc >> shift)
+		newcap = int(capmem >> shift)
 	default:
 		lenmem = uintptr(old.len) * et.size
 		newlenmem = uintptr(cap) * et.size
@@ -167,7 +192,7 @@ func growslice(et *_type, old slice, cap int) slice {
 	//   s = append(s, d, d, d, d)
 	//   print(len(s), "\n")
 	// }
-	if cap < old.cap || overflow || capmem > _MaxMem {
+	if cap < old.cap || overflow || capmem > maxAlloc {
 		panic(errorString("growslice: cap out of range"))
 	}
 
@@ -193,6 +218,10 @@ func growslice(et *_type, old slice, cap int) slice {
 	return slice{p, cap, newcap}
 }
 
+func isPowerOfTwo(x uintptr) bool {
+	return x&(x-1) == 0
+}
+
 func slicecopy(to, fm slice, width uintptr) int {
 	if fm.len == 0 || to.len == 0 {
 		return 0
diff --git a/libgo/go/runtime/slice_test.go b/libgo/go/runtime/slice_test.go
new file mode 100644
index 0000000..c2dfb7a
--- /dev/null
+++ b/libgo/go/runtime/slice_test.go
@@ -0,0 +1,374 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+package runtime_test
+
+import (
+	"fmt"
+	"testing"
+)
+
+const N = 20
+
+func BenchmarkMakeSlice(b *testing.B) {
+	var x []byte
+	for i := 0; i < b.N; i++ {
+		x = make([]byte, 32)
+		_ = x
+	}
+}
+
+type (
+	struct24 struct{ a, b, c int64 }
+	struct32 struct{ a, b, c, d int64 }
+	struct40 struct{ a, b, c, d, e int64 }
+)
+
+func BenchmarkGrowSlice(b *testing.B) {
+	b.Run("Byte", func(b *testing.B) {
+		x := make([]byte, 9)
+		for i := 0; i < b.N; i++ {
+			_ = append([]byte(nil), x...)
+		}
+	})
+	b.Run("Int16", func(b *testing.B) {
+		x := make([]int16, 9)
+		for i := 0; i < b.N; i++ {
+			_ = append([]int16(nil), x...)
+		}
+	})
+	b.Run("Int", func(b *testing.B) {
+		x := make([]int, 9)
+		for i := 0; i < b.N; i++ {
+			_ = append([]int(nil), x...)
+		}
+	})
+	b.Run("Ptr", func(b *testing.B) {
+		x := make([]*byte, 9)
+		for i := 0; i < b.N; i++ {
+			_ = append([]*byte(nil), x...)
+		}
+	})
+	b.Run("Struct", func(b *testing.B) {
+		b.Run("24", func(b *testing.B) {
+			x := make([]struct24, 9)
+			for i := 0; i < b.N; i++ {
+				_ = append([]struct24(nil), x...)
+			}
+		})
+		b.Run("32", func(b *testing.B) {
+			x := make([]struct32, 9)
+			for i := 0; i < b.N; i++ {
+				_ = append([]struct32(nil), x...)
+			}
+		})
+		b.Run("40", func(b *testing.B) {
+			x := make([]struct40, 9)
+			for i := 0; i < b.N; i++ {
+				_ = append([]struct40(nil), x...)
+			}
+		})
+
+	})
+}
+
+var (
+	SinkIntSlice        []int
+	SinkIntPointerSlice []*int
+)
+
+func BenchmarkExtendSlice(b *testing.B) {
+	var length = 4 // Use a variable to prevent stack allocation of slices.
+	b.Run("IntSlice", func(b *testing.B) {
+		s := make([]int, 0, length)
+		for i := 0; i < b.N; i++ {
+			s = append(s[:0:length/2], make([]int, length)...)
+		}
+		SinkIntSlice = s
+	})
+	b.Run("PointerSlice", func(b *testing.B) {
+		s := make([]*int, 0, length)
+		for i := 0; i < b.N; i++ {
+			s = append(s[:0:length/2], make([]*int, length)...)
+		}
+		SinkIntPointerSlice = s
+	})
+	b.Run("NoGrow", func(b *testing.B) {
+		s := make([]int, 0, length)
+		for i := 0; i < b.N; i++ {
+			s = append(s[:0:length], make([]int, length)...)
+		}
+		SinkIntSlice = s
+	})
+}
+
+func BenchmarkAppend(b *testing.B) {
+	b.StopTimer()
+	x := make([]int, 0, N)
+	b.StartTimer()
+	for i := 0; i < b.N; i++ {
+		x = x[0:0]
+		for j := 0; j < N; j++ {
+			x = append(x, j)
+		}
+	}
+}
+
+func BenchmarkAppendGrowByte(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		var x []byte
+		for j := 0; j < 1<<20; j++ {
+			x = append(x, byte(j))
+		}
+	}
+}
+
+func BenchmarkAppendGrowString(b *testing.B) {
+	var s string
+	for i := 0; i < b.N; i++ {
+		var x []string
+		for j := 0; j < 1<<20; j++ {
+			x = append(x, s)
+		}
+	}
+}
+
+func BenchmarkAppendSlice(b *testing.B) {
+	for _, length := range []int{1, 4, 7, 8, 15, 16, 32} {
+		b.Run(fmt.Sprint(length, "Bytes"), func(b *testing.B) {
+			x := make([]byte, 0, N)
+			y := make([]byte, length)
+			for i := 0; i < b.N; i++ {
+				x = x[0:0]
+				x = append(x, y...)
+			}
+		})
+	}
+}
+
+var (
+	blackhole []byte
+)
+
+func BenchmarkAppendSliceLarge(b *testing.B) {
+	for _, length := range []int{1 << 10, 4 << 10, 16 << 10, 64 << 10, 256 << 10, 1024 << 10} {
+		y := make([]byte, length)
+		b.Run(fmt.Sprint(length, "Bytes"), func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				blackhole = nil
+				blackhole = append(blackhole, y...)
+			}
+		})
+	}
+}
+
+func BenchmarkAppendStr(b *testing.B) {
+	for _, str := range []string{
+		"1",
+		"1234",
+		"12345678",
+		"1234567890123456",
+		"12345678901234567890123456789012",
+	} {
+		b.Run(fmt.Sprint(len(str), "Bytes"), func(b *testing.B) {
+			x := make([]byte, 0, N)
+			for i := 0; i < b.N; i++ {
+				x = x[0:0]
+				x = append(x, str...)
+			}
+		})
+	}
+}
+
+func BenchmarkAppendSpecialCase(b *testing.B) {
+	b.StopTimer()
+	x := make([]int, 0, N)
+	b.StartTimer()
+	for i := 0; i < b.N; i++ {
+		x = x[0:0]
+		for j := 0; j < N; j++ {
+			if len(x) < cap(x) {
+				x = x[:len(x)+1]
+				x[len(x)-1] = j
+			} else {
+				x = append(x, j)
+			}
+		}
+	}
+}
+
+var x []int
+
+func f() int {
+	x[:1][0] = 3
+	return 2
+}
+
+func TestSideEffectOrder(t *testing.T) {
+	x = make([]int, 0, 10)
+	x = append(x, 1, f())
+	if x[0] != 1 || x[1] != 2 {
+		t.Error("append failed: ", x[0], x[1])
+	}
+}
+
+func TestAppendOverlap(t *testing.T) {
+	x := []byte("1234")
+	x = append(x[1:], x...) // p > q in runtime·appendslice.
+	got := string(x)
+	want := "2341234"
+	if got != want {
+		t.Errorf("overlap failed: got %q want %q", got, want)
+	}
+}
+
+func BenchmarkCopy(b *testing.B) {
+	for _, l := range []int{1, 2, 4, 8, 12, 16, 32, 128, 1024} {
+		buf := make([]byte, 4096)
+		b.Run(fmt.Sprint(l, "Byte"), func(b *testing.B) {
+			s := make([]byte, l)
+			var n int
+			for i := 0; i < b.N; i++ {
+				n = copy(buf, s)
+			}
+			b.SetBytes(int64(n))
+		})
+		b.Run(fmt.Sprint(l, "String"), func(b *testing.B) {
+			s := string(make([]byte, l))
+			var n int
+			for i := 0; i < b.N; i++ {
+				n = copy(buf, s)
+			}
+			b.SetBytes(int64(n))
+		})
+	}
+}
+
+var (
+	sByte []byte
+	s1Ptr []uintptr
+	s2Ptr [][2]uintptr
+	s3Ptr [][3]uintptr
+	s4Ptr [][4]uintptr
+)
+
+// BenchmarkAppendInPlace tests the performance of append
+// when the result is being written back to the same slice.
+// In order for the in-place optimization to occur,
+// the slice must be referred to by address;
+// using a global is an easy way to trigger that.
+// We test the "grow" and "no grow" paths separately,
+// but not the "normal" (occasionally grow) path,
+// because it is a blend of the other two.
+// We use small numbers and small sizes in an attempt
+// to avoid benchmarking memory allocation and copying.
+// We use scalars instead of pointers in an attempt
+// to avoid benchmarking the write barriers.
+// We benchmark four common sizes (byte, pointer, string/interface, slice),
+// and one larger size.
+func BenchmarkAppendInPlace(b *testing.B) {
+	b.Run("NoGrow", func(b *testing.B) {
+		const C = 128
+
+		b.Run("Byte", func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				sByte = make([]byte, C)
+				for j := 0; j < C; j++ {
+					sByte = append(sByte, 0x77)
+				}
+			}
+		})
+
+		b.Run("1Ptr", func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				s1Ptr = make([]uintptr, C)
+				for j := 0; j < C; j++ {
+					s1Ptr = append(s1Ptr, 0x77)
+				}
+			}
+		})
+
+		b.Run("2Ptr", func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				s2Ptr = make([][2]uintptr, C)
+				for j := 0; j < C; j++ {
+					s2Ptr = append(s2Ptr, [2]uintptr{0x77, 0x88})
+				}
+			}
+		})
+
+		b.Run("3Ptr", func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				s3Ptr = make([][3]uintptr, C)
+				for j := 0; j < C; j++ {
+					s3Ptr = append(s3Ptr, [3]uintptr{0x77, 0x88, 0x99})
+				}
+			}
+		})
+
+		b.Run("4Ptr", func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				s4Ptr = make([][4]uintptr, C)
+				for j := 0; j < C; j++ {
+					s4Ptr = append(s4Ptr, [4]uintptr{0x77, 0x88, 0x99, 0xAA})
+				}
+			}
+		})
+
+	})
+
+	b.Run("Grow", func(b *testing.B) {
+		const C = 5
+
+		b.Run("Byte", func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				sByte = make([]byte, 0)
+				for j := 0; j < C; j++ {
+					sByte = append(sByte, 0x77)
+					sByte = sByte[:cap(sByte)]
+				}
+			}
+		})
+
+		b.Run("1Ptr", func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				s1Ptr = make([]uintptr, 0)
+				for j := 0; j < C; j++ {
+					s1Ptr = append(s1Ptr, 0x77)
+					s1Ptr = s1Ptr[:cap(s1Ptr)]
+				}
+			}
+		})
+
+		b.Run("2Ptr", func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				s2Ptr = make([][2]uintptr, 0)
+				for j := 0; j < C; j++ {
+					s2Ptr = append(s2Ptr, [2]uintptr{0x77, 0x88})
+					s2Ptr = s2Ptr[:cap(s2Ptr)]
+				}
+			}
+		})
+
+		b.Run("3Ptr", func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				s3Ptr = make([][3]uintptr, 0)
+				for j := 0; j < C; j++ {
+					s3Ptr = append(s3Ptr, [3]uintptr{0x77, 0x88, 0x99})
+					s3Ptr = s3Ptr[:cap(s3Ptr)]
+				}
+			}
+		})
+
+		b.Run("4Ptr", func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				s4Ptr = make([][4]uintptr, 0)
+				for j := 0; j < C; j++ {
+					s4Ptr = append(s4Ptr, [4]uintptr{0x77, 0x88, 0x99, 0xAA})
+					s4Ptr = s4Ptr[:cap(s4Ptr)]
+				}
+			}
+		})
+
+	})
+}
diff --git a/libgo/go/runtime/string.go b/libgo/go/runtime/string.go
index e8df9a6..5296ebd 100644
--- a/libgo/go/runtime/string.go
+++ b/libgo/go/runtime/string.go
@@ -4,7 +4,10 @@
 
 package runtime
 
-import "unsafe"
+import (
+	"internal/bytealg"
+	"unsafe"
+)
 
 // For gccgo, use go:linkname to rename compiler-called functions to
 // themselves, so that the compiler will export them.
@@ -105,6 +108,11 @@ func slicebytetostring(buf *tmpBuf, b []byte) (str string) {
 	if msanenabled {
 		msanread(unsafe.Pointer(&b[0]), uintptr(l))
 	}
+	if l == 1 {
+		stringStructOf(&str).str = unsafe.Pointer(&staticbytes[b[0]])
+		stringStructOf(&str).len = 1
+		return
+	}
 
 	var p unsafe.Pointer
 	if buf != nil && len(b) <= len(buf) {
@@ -232,8 +240,13 @@ func stringStructOf(sp *string) *stringStruct {
 	return (*stringStruct)(unsafe.Pointer(sp))
 }
 
-func intstring(buf *[4]byte, v int64) string {
-	var s string
+func intstring(buf *[4]byte, v int64) (s string) {
+	if v >= 0 && v < runeSelf {
+		stringStructOf(&s).str = unsafe.Pointer(&staticbytes[v])
+		stringStructOf(&s).len = 1
+		return
+	}
+
 	var b []byte
 	if buf != nil {
 		b = buf[:]
@@ -277,7 +290,7 @@ func rawbyteslice(size int) (b []byte) {
 
 // rawruneslice allocates a new rune slice. The rune slice is not zeroed.
 func rawruneslice(size int) (b []rune) {
-	if uintptr(size) > _MaxMem/4 {
+	if uintptr(size) > maxAlloc/4 {
 		throw("out of memory")
 	}
 	mem := roundupsize(uintptr(size) * 4)
@@ -291,13 +304,20 @@ func rawruneslice(size int) (b []rune) {
 }
 
 // used by cmd/cgo
-func gobytes(p *byte, n int) []byte {
+func gobytes(p *byte, n int) (b []byte) {
 	if n == 0 {
 		return make([]byte, 0)
 	}
-	x := make([]byte, n)
-	memmove(unsafe.Pointer(&x[0]), unsafe.Pointer(p), uintptr(n))
-	return x
+
+	if n < 0 || uintptr(n) > maxAlloc {
+		panic(errorString("gobytes: length out of range"))
+	}
+
+	bp := mallocgc(uintptr(n), nil, false)
+	memmove(bp, unsafe.Pointer(p), uintptr(n))
+
+	*(*slice)(unsafe.Pointer(&b)) = slice{bp, n, n}
+	return
 }
 
 func gostring(p *byte) string {
@@ -406,19 +426,50 @@ func findnull(s *byte) int {
 	if s == nil {
 		return 0
 	}
-	p := (*[_MaxMem/2 - 1]byte)(unsafe.Pointer(s))
-	l := 0
-	for p[l] != 0 {
-		l++
+
+	// Avoid IndexByteString on Plan 9 because it uses SSE instructions
+	// on x86 machines, and those are classified as floating point instructions,
+	// which are illegal in a note handler.
+	if GOOS == "plan9" {
+		p := (*[maxAlloc/2 - 1]byte)(unsafe.Pointer(s))
+		l := 0
+		for p[l] != 0 {
+			l++
+		}
+		return l
+	}
+
+	// pageSize is the unit we scan at a time looking for NULL.
+	// It must be the minimum page size for any architecture Go
+	// runs on. It's okay (just a minor performance loss) if the
+	// actual system page size is larger than this value.
+	const pageSize = 4096
+
+	offset := 0
+	ptr := unsafe.Pointer(s)
+	// IndexByteString uses wide reads, so we need to be careful
+	// with page boundaries. Call IndexByteString on
+	// [ptr, endOfPage) interval.
+	safeLen := int(pageSize - uintptr(ptr)%pageSize)
+
+	for {
+		t := *(*string)(unsafe.Pointer(&stringStruct{ptr, safeLen}))
+		// Check one page at a time.
+		if i := bytealg.IndexByteString(t, 0); i != -1 {
+			return offset + i
+		}
+		// Move to next page
+		ptr = unsafe.Pointer(uintptr(ptr) + uintptr(safeLen))
+		offset += safeLen
+		safeLen = pageSize
 	}
-	return l
 }
 
 func findnullw(s *uint16) int {
 	if s == nil {
 		return 0
 	}
-	p := (*[_MaxMem/2/2 - 1]uint16)(unsafe.Pointer(s))
+	p := (*[maxAlloc/2/2 - 1]uint16)(unsafe.Pointer(s))
 	l := 0
 	for p[l] != 0 {
 		l++
@@ -435,7 +486,7 @@ func gostringnocopy(str *byte) string {
 
 func gostringw(strw *uint16) string {
 	var buf [8]byte
-	str := (*[_MaxMem/2/2 - 1]uint16)(unsafe.Pointer(strw))
+	str := (*[maxAlloc/2/2 - 1]uint16)(unsafe.Pointer(strw))
 	n1 := 0
 	for i := 0; str[i] != 0; i++ {
 		n1 += encoderune(buf[:], rune(str[i]))
diff --git a/libgo/go/runtime/string_test.go b/libgo/go/runtime/string_test.go
index 555a7fc..03327bb 100644
--- a/libgo/go/runtime/string_test.go
+++ b/libgo/go/runtime/string_test.go
@@ -9,6 +9,7 @@ import (
 	"strconv"
 	"strings"
 	"testing"
+	"unicode/utf8"
 )
 
 // Strings and slices that don't escape and fit into tmpBuf are stack allocated,
@@ -110,6 +111,43 @@ var stringdata = []struct{ name, data string }{
 	{"MixedLength", "$Ѐࠀက퀀𐀀\U00040000\U0010FFFF"},
 }
 
+var sinkInt int
+
+func BenchmarkRuneCount(b *testing.B) {
+	// Each sub-benchmark counts the runes in a string in a different way.
+	b.Run("lenruneslice", func(b *testing.B) {
+		for _, sd := range stringdata {
+			b.Run(sd.name, func(b *testing.B) {
+				for i := 0; i < b.N; i++ {
+					sinkInt += len([]rune(sd.data))
+				}
+			})
+		}
+	})
+	b.Run("rangeloop", func(b *testing.B) {
+		for _, sd := range stringdata {
+			b.Run(sd.name, func(b *testing.B) {
+				for i := 0; i < b.N; i++ {
+					n := 0
+					for range sd.data {
+						n++
+					}
+					sinkInt += n
+				}
+			})
+		}
+	})
+	b.Run("utf8.RuneCountInString", func(b *testing.B) {
+		for _, sd := range stringdata {
+			b.Run(sd.name, func(b *testing.B) {
+				for i := 0; i < b.N; i++ {
+					sinkInt += utf8.RuneCountInString(sd.data)
+				}
+			})
+		}
+	})
+}
+
 func BenchmarkRuneIterate(b *testing.B) {
 	b.Run("range", func(b *testing.B) {
 		for _, sd := range stringdata {
@@ -125,7 +163,7 @@ func BenchmarkRuneIterate(b *testing.B) {
 		for _, sd := range stringdata {
 			b.Run(sd.name, func(b *testing.B) {
 				for i := 0; i < b.N; i++ {
-					for _ = range sd.data {
+					for range sd.data {
 					}
 				}
 			})
@@ -135,7 +173,7 @@ func BenchmarkRuneIterate(b *testing.B) {
 		for _, sd := range stringdata {
 			b.Run(sd.name, func(b *testing.B) {
 				for i := 0; i < b.N; i++ {
-					for _, _ = range sd.data {
+					for range sd.data {
 					}
 				}
 			})
diff --git a/libgo/go/runtime/stubs.go b/libgo/go/runtime/stubs.go
index 1d21445..1aae4f3 100644
--- a/libgo/go/runtime/stubs.go
+++ b/libgo/go/runtime/stubs.go
@@ -69,8 +69,13 @@ func systemstack(fn func()) {
 	}
 }
 
+var badsystemstackMsg = "fatal: systemstack called from unexpected goroutine"
+
+//go:nosplit
+//go:nowritebarrierrec
 func badsystemstack() {
-	throw("systemstack called from unexpected goroutine")
+	sp := stringStructOf(&badsystemstackMsg)
+	write(2, sp.str, int32(sp.len))
 }
 
 // memclrNoHeapPointers clears n bytes starting at ptr.
@@ -127,7 +132,7 @@ func fastrand() uint32 {
 //go:nosplit
 func fastrandn(n uint32) uint32 {
 	// This is similar to fastrand() % n, but faster.
-	// See http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
+	// See https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
 	return uint32(uint64(fastrand()) * uint64(n) >> 32)
 }
 
@@ -198,7 +203,6 @@ func publicationBarrier()
 
 // getcallerpc returns the program counter (PC) of its caller's caller.
 // getcallersp returns the stack pointer (SP) of its caller's caller.
-// argp must be a pointer to the caller's first function argument.
 // The implementation may be a compiler intrinsic; there is not
 // necessarily code implementing this on every platform.
 //
@@ -213,10 +217,7 @@ func publicationBarrier()
 // the call to f (where f will return).
 //
 // The call to getcallerpc and getcallersp must be done in the
-// frame being asked about. It would not be correct for f to pass &arg1
-// to another function g and let g call getcallerpc/getcallersp.
-// The call inside g might return information about g's caller or
-// information about f's caller or complete garbage.
+// frame being asked about.
 //
 // The result of getcallersp is correct at the time of the return,
 // but it may be invalidated by any subsequent call to a function
@@ -228,7 +229,7 @@ func publicationBarrier()
 func getcallerpc() uintptr
 
 //go:noescape
-func getcallersp() uintptr
+func getcallersp() uintptr // implemented as an intrinsic on all platforms
 
 func asmcgocall(fn, arg unsafe.Pointer) int32 {
 	throw("asmcgocall")
@@ -293,12 +294,6 @@ func setIsCgo() {
 }
 
 // For gccgo, to communicate from the C code to the Go code.
-//go:linkname setCpuidECX runtime.setCpuidECX
-func setCpuidECX(v uint32) {
-	cpuid_ecx = v
-}
-
-// For gccgo, to communicate from the C code to the Go code.
 //go:linkname setSupportAES runtime.setSupportAES
 func setSupportAES(v bool) {
 	support_aes = v
@@ -336,6 +331,9 @@ func getSiginfo(*_siginfo_t, unsafe.Pointer) (sigaddr uintptr, sigpc uintptr)
 // Implemented in C for gccgo.
 func dumpregs(*_siginfo_t, unsafe.Pointer)
 
+// Implemented in C for gccgo.
+func setRandomNumber(uint32)
+
 // Temporary for gccgo until we port proc.go.
 //go:linkname getsched runtime.getsched
 func getsched() *schedt {
@@ -426,6 +424,15 @@ type bitvector struct {
 	bytedata *uint8
 }
 
+// ptrbit returns the i'th bit in bv.
+// ptrbit is less efficient than iterating directly over bitvector bits,
+// and should only be used in non-performance-critical code.
+// See adjustpointers for an example of a high-efficiency walk of a bitvector.
+func (bv *bitvector) ptrbit(i uintptr) uint8 {
+	b := *(addb(bv.bytedata, i/8))
+	return (b >> (i % 8)) & 1
+}
+
 // bool2int returns 0 if x is false or 1 if x is true.
 func bool2int(x bool) int {
 	if x {
@@ -433,3 +440,10 @@ func bool2int(x bool) int {
 	}
 	return 0
 }
+
+// abort crashes the runtime in situations where even throw might not
+// work. In general it should do something a debugger will recognize
+// (e.g., an INT3 on x86). A crash in abort is recognized by the
+// signal handler, which will attempt to tear down the runtime
+// immediately.
+func abort()
diff --git a/libgo/go/runtime/stubs2.go b/libgo/go/runtime/stubs2.go
index e305b16..1cb910c 100644
--- a/libgo/go/runtime/stubs2.go
+++ b/libgo/go/runtime/stubs2.go
@@ -5,6 +5,8 @@
 // +build !plan9
 // +build !windows
 // +build !nacl
+// +build !js
+// +build !darwin
 
 package runtime
 
@@ -16,7 +18,6 @@ func closefd(fd int32) int32
 
 //extern exit
 func exit(code int32)
-func nanotime() int64
 func usleep(usec uint32)
 
 //go:noescape
diff --git a/libgo/go/runtime/stubs3.go b/libgo/go/runtime/stubs3.go
new file mode 100644
index 0000000..5c0786e
--- /dev/null
+++ b/libgo/go/runtime/stubs3.go
@@ -0,0 +1,14 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !plan9
+// +build !solaris
+// +build !windows
+// +build !nacl
+// +build !freebsd
+// +build !darwin
+
+package runtime
+
+func nanotime() int64
diff --git a/libgo/go/runtime/symtab.go b/libgo/go/runtime/symtab.go
index 12dc672..861921c 100644
--- a/libgo/go/runtime/symtab.go
+++ b/libgo/go/runtime/symtab.go
@@ -124,6 +124,7 @@ type funcID uint32
 
 const (
 	funcID_normal funcID = iota // not a special function
+	funcID_runtime_main
 	funcID_goexit
 	funcID_jmpdefer
 	funcID_mcall
@@ -133,15 +134,13 @@ const (
 	funcID_asmcgocall
 	funcID_sigpanic
 	funcID_runfinq
-	funcID_bgsweep
-	funcID_forcegchelper
-	funcID_timerproc
 	funcID_gcBgMarkWorker
 	funcID_systemstack_switch
 	funcID_systemstack
 	funcID_cgocallback_gofunc
 	funcID_gogo
 	funcID_externalthreadhandler
+	funcID_debugCallV1
 )
 
 // FuncForPC returns a *Func describing the function that contains the
diff --git a/libgo/go/runtime/sys_darwin.go b/libgo/go/runtime/sys_darwin.go
new file mode 100644
index 0000000..7efbef7
--- /dev/null
+++ b/libgo/go/runtime/sys_darwin.go
@@ -0,0 +1,374 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+// Call fn with arg as its argument. Return what fn returns.
+// fn is the raw pc value of the entry point of the desired function.
+// Switches to the system stack, if not already there.
+// Preserves the calling point as the location where a profiler traceback will begin.
+//go:nosplit
+func libcCall(fn, arg unsafe.Pointer) int32 {
+	// Leave caller's PC/SP/G around for traceback.
+	gp := getg()
+	var mp *m
+	if gp != nil {
+		mp = gp.m
+	}
+	if mp != nil && mp.libcallsp == 0 {
+		mp.libcallg.set(gp)
+		mp.libcallpc = getcallerpc()
+		// sp must be the last, because once async cpu profiler finds
+		// all three values to be non-zero, it will use them
+		mp.libcallsp = getcallersp()
+	} else {
+		// Make sure we don't reset libcallsp. This makes
+		// libcCall reentrant; We remember the g/pc/sp for the
+		// first call on an M, until that libcCall instance
+		// returns.  Reentrance only matters for signals, as
+		// libc never calls back into Go.  The tricky case is
+		// where we call libcX from an M and record g/pc/sp.
+		// Before that call returns, a signal arrives on the
+		// same M and the signal handling code calls another
+		// libc function.  We don't want that second libcCall
+		// from within the handler to be recorded, and we
+		// don't want that call's completion to zero
+		// libcallsp.
+		// We don't need to set libcall* while we're in a sighandler
+		// (even if we're not currently in libc) because we block all
+		// signals while we're handling a signal. That includes the
+		// profile signal, which is the one that uses the libcall* info.
+		mp = nil
+	}
+	res := asmcgocall(fn, arg)
+	if mp != nil {
+		mp.libcallsp = 0
+	}
+	return res
+}
+
+// The *_trampoline functions convert from the Go calling convention to the C calling convention
+// and then call the underlying libc function.  They are defined in sys_darwin_$ARCH.s.
+
+//go:nosplit
+//go:cgo_unsafe_args
+func pthread_attr_init(attr *pthreadattr) int32 {
+	return libcCall(unsafe.Pointer(funcPC(pthread_attr_init_trampoline)), unsafe.Pointer(&attr))
+}
+func pthread_attr_init_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func pthread_attr_setstacksize(attr *pthreadattr, size uintptr) int32 {
+	return libcCall(unsafe.Pointer(funcPC(pthread_attr_setstacksize_trampoline)), unsafe.Pointer(&attr))
+}
+func pthread_attr_setstacksize_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func pthread_attr_setdetachstate(attr *pthreadattr, state int) int32 {
+	return libcCall(unsafe.Pointer(funcPC(pthread_attr_setdetachstate_trampoline)), unsafe.Pointer(&attr))
+}
+func pthread_attr_setdetachstate_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func pthread_create(attr *pthreadattr, start uintptr, arg unsafe.Pointer) int32 {
+	return libcCall(unsafe.Pointer(funcPC(pthread_create_trampoline)), unsafe.Pointer(&attr))
+}
+func pthread_create_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func raise(sig uint32) {
+	libcCall(unsafe.Pointer(funcPC(raise_trampoline)), unsafe.Pointer(&sig))
+}
+func raise_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func pthread_self() (t pthread) {
+	libcCall(unsafe.Pointer(funcPC(pthread_self_trampoline)), unsafe.Pointer(&t))
+	return
+}
+func pthread_self_trampoline()
+
+func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) (unsafe.Pointer, int) {
+	args := struct {
+		addr            unsafe.Pointer
+		n               uintptr
+		prot, flags, fd int32
+		off             uint32
+		ret1            unsafe.Pointer
+		ret2            int
+	}{addr, n, prot, flags, fd, off, nil, 0}
+	libcCall(unsafe.Pointer(funcPC(mmap_trampoline)), unsafe.Pointer(&args))
+	return args.ret1, args.ret2
+}
+func mmap_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func munmap(addr unsafe.Pointer, n uintptr) {
+	libcCall(unsafe.Pointer(funcPC(munmap_trampoline)), unsafe.Pointer(&addr))
+}
+func munmap_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func madvise(addr unsafe.Pointer, n uintptr, flags int32) {
+	libcCall(unsafe.Pointer(funcPC(madvise_trampoline)), unsafe.Pointer(&addr))
+}
+func madvise_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func read(fd int32, p unsafe.Pointer, n int32) int32 {
+	return libcCall(unsafe.Pointer(funcPC(read_trampoline)), unsafe.Pointer(&fd))
+}
+func read_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func closefd(fd int32) int32 {
+	return libcCall(unsafe.Pointer(funcPC(close_trampoline)), unsafe.Pointer(&fd))
+}
+func close_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func exit(code int32) {
+	libcCall(unsafe.Pointer(funcPC(exit_trampoline)), unsafe.Pointer(&code))
+}
+func exit_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func usleep(usec uint32) {
+	libcCall(unsafe.Pointer(funcPC(usleep_trampoline)), unsafe.Pointer(&usec))
+}
+func usleep_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func write(fd uintptr, p unsafe.Pointer, n int32) int32 {
+	return libcCall(unsafe.Pointer(funcPC(write_trampoline)), unsafe.Pointer(&fd))
+}
+func write_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func open(name *byte, mode, perm int32) (ret int32) {
+	return libcCall(unsafe.Pointer(funcPC(open_trampoline)), unsafe.Pointer(&name))
+}
+func open_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func nanotime() int64 {
+	var r struct {
+		t            int64  // raw timer
+		numer, denom uint32 // conversion factors. nanoseconds = t * numer / denom.
+	}
+	libcCall(unsafe.Pointer(funcPC(nanotime_trampoline)), unsafe.Pointer(&r))
+	// Note: Apple seems unconcerned about overflow here. See
+	// https://developer.apple.com/library/content/qa/qa1398/_index.html
+	// Note also, numer == denom == 1 is common.
+	t := r.t
+	if r.numer != 1 {
+		t *= int64(r.numer)
+	}
+	if r.denom != 1 {
+		t /= int64(r.denom)
+	}
+	return t
+}
+func nanotime_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func walltime() (int64, int32) {
+	var t timeval
+	libcCall(unsafe.Pointer(funcPC(walltime_trampoline)), unsafe.Pointer(&t))
+	return int64(t.tv_sec), 1000 * t.tv_usec
+}
+func walltime_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func sigaction(sig uint32, new *usigactiont, old *usigactiont) {
+	libcCall(unsafe.Pointer(funcPC(sigaction_trampoline)), unsafe.Pointer(&sig))
+}
+func sigaction_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func sigprocmask(how uint32, new *sigset, old *sigset) {
+	libcCall(unsafe.Pointer(funcPC(sigprocmask_trampoline)), unsafe.Pointer(&how))
+}
+func sigprocmask_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func sigaltstack(new *stackt, old *stackt) {
+	if new != nil && new.ss_flags&_SS_DISABLE != 0 && new.ss_size == 0 {
+		// Despite the fact that Darwin's sigaltstack man page says it ignores the size
+		// when SS_DISABLE is set, it doesn't. sigaltstack returns ENOMEM
+		// if we don't give it a reasonable size.
+		// ref: http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20140421/214296.html
+		new.ss_size = 32768
+	}
+	libcCall(unsafe.Pointer(funcPC(sigaltstack_trampoline)), unsafe.Pointer(&new))
+}
+func sigaltstack_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func raiseproc(sig uint32) {
+	libcCall(unsafe.Pointer(funcPC(raiseproc_trampoline)), unsafe.Pointer(&sig))
+}
+func raiseproc_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func setitimer(mode int32, new, old *itimerval) {
+	libcCall(unsafe.Pointer(funcPC(setitimer_trampoline)), unsafe.Pointer(&mode))
+}
+func setitimer_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, ndst uintptr) int32 {
+	return libcCall(unsafe.Pointer(funcPC(sysctl_trampoline)), unsafe.Pointer(&mib))
+}
+func sysctl_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func fcntl(fd, cmd, arg int32) int32 {
+	return libcCall(unsafe.Pointer(funcPC(fcntl_trampoline)), unsafe.Pointer(&fd))
+}
+func fcntl_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func kqueue() int32 {
+	v := libcCall(unsafe.Pointer(funcPC(kqueue_trampoline)), nil)
+	return v
+}
+func kqueue_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func kevent(kq int32, ch *keventt, nch int32, ev *keventt, nev int32, ts *timespec) int32 {
+	return libcCall(unsafe.Pointer(funcPC(kevent_trampoline)), unsafe.Pointer(&kq))
+}
+func kevent_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func pthread_mutex_init(m *pthreadmutex, attr *pthreadmutexattr) int32 {
+	return libcCall(unsafe.Pointer(funcPC(pthread_mutex_init_trampoline)), unsafe.Pointer(&m))
+}
+func pthread_mutex_init_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func pthread_mutex_lock(m *pthreadmutex) int32 {
+	return libcCall(unsafe.Pointer(funcPC(pthread_mutex_lock_trampoline)), unsafe.Pointer(&m))
+}
+func pthread_mutex_lock_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func pthread_mutex_unlock(m *pthreadmutex) int32 {
+	return libcCall(unsafe.Pointer(funcPC(pthread_mutex_unlock_trampoline)), unsafe.Pointer(&m))
+}
+func pthread_mutex_unlock_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func pthread_cond_init(c *pthreadcond, attr *pthreadcondattr) int32 {
+	return libcCall(unsafe.Pointer(funcPC(pthread_cond_init_trampoline)), unsafe.Pointer(&c))
+}
+func pthread_cond_init_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func pthread_cond_wait(c *pthreadcond, m *pthreadmutex) int32 {
+	return libcCall(unsafe.Pointer(funcPC(pthread_cond_wait_trampoline)), unsafe.Pointer(&c))
+}
+func pthread_cond_wait_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func pthread_cond_timedwait_relative_np(c *pthreadcond, m *pthreadmutex, t *timespec) int32 {
+	return libcCall(unsafe.Pointer(funcPC(pthread_cond_timedwait_relative_np_trampoline)), unsafe.Pointer(&c))
+}
+func pthread_cond_timedwait_relative_np_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func pthread_cond_signal(c *pthreadcond) int32 {
+	return libcCall(unsafe.Pointer(funcPC(pthread_cond_signal_trampoline)), unsafe.Pointer(&c))
+}
+func pthread_cond_signal_trampoline()
+
+// Not used on Darwin, but must be defined.
+func exitThread(wait *uint32) {
+}
+
+//go:nosplit
+func closeonexec(fd int32) {
+	fcntl(fd, _F_SETFD, _FD_CLOEXEC)
+}
+
+// Tell the linker that the libc_* functions are to be found
+// in a system library, with the libc_ prefix missing.
+
+//go:cgo_import_dynamic libc_pthread_attr_init pthread_attr_init "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_pthread_attr_setstacksize pthread_attr_setstacksize "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_pthread_attr_setdetachstate pthread_attr_setdetachstate "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_pthread_create pthread_create "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_exit exit "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_raise raise "/usr/lib/libSystem.B.dylib"
+
+//go:cgo_import_dynamic libc_open open "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_close close "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_read read "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_write write "/usr/lib/libSystem.B.dylib"
+
+//go:cgo_import_dynamic libc_mmap mmap "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_munmap munmap "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_madvise madvise "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_error __error "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_usleep usleep "/usr/lib/libSystem.B.dylib"
+
+//go:cgo_import_dynamic libc_mach_timebase_info mach_timebase_info "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_mach_absolute_time mach_absolute_time "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_gettimeofday gettimeofday "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_sigaction sigaction "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_pthread_sigmask pthread_sigmask "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_sigaltstack sigaltstack "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_getpid getpid "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_kill kill "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_setitimer setitimer "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_sysctl sysctl "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_fcntl fcntl "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_kqueue kqueue "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_kevent kevent "/usr/lib/libSystem.B.dylib"
+
+//go:cgo_import_dynamic libc_pthread_mutex_init pthread_mutex_init "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_pthread_mutex_lock pthread_mutex_lock "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_pthread_mutex_unlock pthread_mutex_unlock "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_pthread_cond_init pthread_cond_init "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_pthread_cond_wait pthread_cond_wait "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_pthread_cond_timedwait_relative_np pthread_cond_timedwait_relative_np "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_pthread_cond_signal pthread_cond_signal "/usr/lib/libSystem.B.dylib"
+
+// Magic incantation to get libSystem actually dynamically linked.
+// TODO: Why does the code require this?  See cmd/compile/internal/ld/go.go:210
+//go:cgo_import_dynamic _ _ "/usr/lib/libSystem.B.dylib"
diff --git a/libgo/go/runtime/sys_wasm.go b/libgo/go/runtime/sys_wasm.go
new file mode 100644
index 0000000..9bf710b
--- /dev/null
+++ b/libgo/go/runtime/sys_wasm.go
@@ -0,0 +1,42 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+	"runtime/internal/sys"
+	"unsafe"
+)
+
+type m0Stack struct {
+	_ [8192 * sys.StackGuardMultiplier]byte
+}
+
+var wasmStack m0Stack
+
+func wasmMove()
+
+func wasmZero()
+
+func wasmDiv()
+
+func wasmTruncS()
+func wasmTruncU()
+
+func wasmExit(code int32)
+
+// adjust Gobuf as it if executed a call to fn with context ctxt
+// and then did an immediate gosave.
+func gostartcall(buf *gobuf, fn, ctxt unsafe.Pointer) {
+	sp := buf.sp
+	if sys.RegSize > sys.PtrSize {
+		sp -= sys.PtrSize
+		*(*uintptr)(unsafe.Pointer(sp)) = 0
+	}
+	sp -= sys.PtrSize
+	*(*uintptr)(unsafe.Pointer(sp)) = buf.pc
+	buf.sp = sp
+	buf.pc = uintptr(fn)
+	buf.ctxt = ctxt
+}
diff --git a/libgo/go/runtime/testdata/testprog/abort.go b/libgo/go/runtime/testdata/testprog/abort.go
new file mode 100644
index 0000000..9e79d4d
--- /dev/null
+++ b/libgo/go/runtime/testdata/testprog/abort.go
@@ -0,0 +1,23 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import _ "unsafe" // for go:linkname
+
+func init() {
+	register("Abort", Abort)
+}
+
+//go:linkname runtimeAbort runtime.abort
+func runtimeAbort()
+
+func Abort() {
+	defer func() {
+		recover()
+		panic("BAD: recovered from abort")
+	}()
+	runtimeAbort()
+	println("BAD: after abort")
+}
diff --git a/libgo/go/runtime/testdata/testprog/numcpu_freebsd.go b/libgo/go/runtime/testdata/testprog/numcpu_freebsd.go
index 035c534..42ee154 100644
--- a/libgo/go/runtime/testdata/testprog/numcpu_freebsd.go
+++ b/libgo/go/runtime/testdata/testprog/numcpu_freebsd.go
@@ -9,12 +9,17 @@ import (
 	"fmt"
 	"os"
 	"os/exec"
+	"regexp"
 	"runtime"
 	"strconv"
 	"strings"
 	"syscall"
 )
 
+var (
+	cpuSetRE = regexp.MustCompile(`(\d,?)+`)
+)
+
 func init() {
 	register("FreeBSDNumCPU", FreeBSDNumCPU)
 	register("FreeBSDNumCPUHelper", FreeBSDNumCPUHelper)
@@ -105,8 +110,12 @@ func checkNCPU(list []string) error {
 		return fmt.Errorf("could not check against an empty CPU list")
 	}
 
+	cListString := cpuSetRE.FindString(listString)
+	if len(cListString) == 0 {
+		return fmt.Errorf("invalid cpuset output '%s'", listString)
+	}
 	// Launch FreeBSDNumCPUHelper() with specified CPUs list.
-	cmd := exec.Command("cpuset", "-l", listString, os.Args[0], "FreeBSDNumCPUHelper")
+	cmd := exec.Command("cpuset", "-l", cListString, os.Args[0], "FreeBSDNumCPUHelper")
 	cmdline := strings.Join(cmd.Args, " ")
 	output, err := cmd.CombinedOutput()
 	if err != nil {
@@ -120,7 +129,7 @@ func checkNCPU(list []string) error {
 		return fmt.Errorf("fail to parse output from child '%s', error: %s, output: %s", cmdline, err, output)
 	}
 	if n != len(list) {
-		return fmt.Errorf("runtime.NumCPU() expected to %d, got %d when run with CPU list %s", len(list), n, listString)
+		return fmt.Errorf("runtime.NumCPU() expected to %d, got %d when run with CPU list %s", len(list), n, cListString)
 	}
 	return nil
 }
diff --git a/libgo/go/runtime/testdata/testprog/timeprof.go b/libgo/go/runtime/testdata/testprog/timeprof.go
new file mode 100644
index 0000000..0702885
--- /dev/null
+++ b/libgo/go/runtime/testdata/testprog/timeprof.go
@@ -0,0 +1,46 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"fmt"
+	"io/ioutil"
+	"os"
+	"runtime/pprof"
+	"time"
+)
+
+func init() {
+	register("TimeProf", TimeProf)
+}
+
+func TimeProf() {
+	f, err := ioutil.TempFile("", "timeprof")
+	if err != nil {
+		fmt.Fprintln(os.Stderr, err)
+		os.Exit(2)
+	}
+
+	if err := pprof.StartCPUProfile(f); err != nil {
+		fmt.Fprintln(os.Stderr, err)
+		os.Exit(2)
+	}
+
+	t0 := time.Now()
+	// We should get a profiling signal 100 times a second,
+	// so running for 1/10 second should be sufficient.
+	for time.Since(t0) < time.Second/10 {
+	}
+
+	pprof.StopCPUProfile()
+
+	name := f.Name()
+	if err := f.Close(); err != nil {
+		fmt.Fprintln(os.Stderr, err)
+		os.Exit(2)
+	}
+
+	fmt.Println(name)
+}
diff --git a/libgo/go/runtime/testdata/testprog/traceback_ancestors.go b/libgo/go/runtime/testdata/testprog/traceback_ancestors.go
new file mode 100644
index 0000000..fe57c1c
--- /dev/null
+++ b/libgo/go/runtime/testdata/testprog/traceback_ancestors.go
@@ -0,0 +1,53 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"fmt"
+	"runtime"
+)
+
+func init() {
+	register("TracebackAncestors", TracebackAncestors)
+}
+
+const numGoroutines = 3
+const numFrames = 2
+
+func TracebackAncestors() {
+	w := make(chan struct{})
+	recurseThenCallGo(w, numGoroutines, numFrames)
+	<-w
+	printStack()
+	close(w)
+}
+
+func printStack() {
+	buf := make([]byte, 1024)
+	for {
+		n := runtime.Stack(buf, true)
+		if n < len(buf) {
+			fmt.Print(string(buf[:n]))
+			return
+		}
+		buf = make([]byte, 2*len(buf))
+	}
+}
+
+func recurseThenCallGo(w chan struct{}, frames int, goroutines int) {
+	if frames == 0 {
+		// Signal to TracebackAncestors that we are done recursing and starting goroutines.
+		w <- struct{}{}
+		<-w
+		return
+	}
+	if goroutines == 0 {
+		// Start the next goroutine now that there are no more recursions left
+		// for this current goroutine.
+		go recurseThenCallGo(w, frames-1, numFrames)
+		return
+	}
+	recurseThenCallGo(w, frames, goroutines-1)
+}
diff --git a/libgo/go/runtime/testdata/testprogcgo/bigstack_windows.go b/libgo/go/runtime/testdata/testprogcgo/bigstack_windows.go
new file mode 100644
index 0000000..f58fcf9
--- /dev/null
+++ b/libgo/go/runtime/testdata/testprogcgo/bigstack_windows.go
@@ -0,0 +1,27 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+/*
+typedef void callback(char*);
+extern void goBigStack1(char*);
+extern void bigStack(callback*);
+*/
+import "C"
+
+func init() {
+	register("BigStack", BigStack)
+}
+
+func BigStack() {
+	// Create a large thread stack and call back into Go to test
+	// if Go correctly determines the stack bounds.
+	C.bigStack((*C.callback)(C.goBigStack1))
+}
+
+//export goBigStack1
+func goBigStack1(x *C.char) {
+	println("OK")
+}
diff --git a/libgo/go/runtime/testdata/testprogcgo/raceprof.go b/libgo/go/runtime/testdata/testprogcgo/raceprof.go
index 466a367..0750ec1 100644
--- a/libgo/go/runtime/testdata/testprogcgo/raceprof.go
+++ b/libgo/go/runtime/testdata/testprogcgo/raceprof.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build linux,amd64
+// +build linux,amd64 freebsd,amd64
 // +build !gccgo
 
 package main
diff --git a/libgo/go/runtime/testdata/testprogcgo/racesig.go b/libgo/go/runtime/testdata/testprogcgo/racesig.go
index d0c1c3c..a079b3f 100644
--- a/libgo/go/runtime/testdata/testprogcgo/racesig.go
+++ b/libgo/go/runtime/testdata/testprogcgo/racesig.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build linux,amd64
+// +build linux,amd64 freebsd,amd64
 
 package main
 
diff --git a/libgo/go/runtime/time.go b/libgo/go/runtime/time.go
index b707590..a95d95b 100644
--- a/libgo/go/runtime/time.go
+++ b/libgo/go/runtime/time.go
@@ -98,8 +98,11 @@ func timeSleep(ns int64) {
 	t.arg = gp
 	tb := t.assignBucket()
 	lock(&tb.lock)
-	tb.addtimerLocked(t)
-	goparkunlock(&tb.lock, "sleep", traceEvGoSleep, 2)
+	if !tb.addtimerLocked(t) {
+		unlock(&tb.lock)
+		badTimer()
+	}
+	goparkunlock(&tb.lock, waitReasonSleep, traceEvGoSleep, 2)
 }
 
 // startTimer adds t to the timer heap.
@@ -128,14 +131,19 @@ func goroutineReady(arg interface{}, seq uintptr) {
 func addtimer(t *timer) {
 	tb := t.assignBucket()
 	lock(&tb.lock)
-	tb.addtimerLocked(t)
+	ok := tb.addtimerLocked(t)
 	unlock(&tb.lock)
+	if !ok {
+		badTimer()
+	}
 }
 
 // Add a timer to the heap and start or kick timerproc if the new timer is
 // earlier than any of the others.
 // Timers are locked.
-func (tb *timersBucket) addtimerLocked(t *timer) {
+// Returns whether all is well: false if the data structure is corrupt
+// due to user-level races.
+func (tb *timersBucket) addtimerLocked(t *timer) bool {
 	// when must never be negative; otherwise timerproc will overflow
 	// during its delta calculation and never expire other runtime timers.
 	if t.when < 0 {
@@ -143,7 +151,9 @@ func (tb *timersBucket) addtimerLocked(t *timer) {
 	}
 	t.i = len(tb.t)
 	tb.t = append(tb.t, t)
-	siftupTimer(tb.t, t.i)
+	if !siftupTimer(tb.t, t.i) {
+		return false
+	}
 	if t.i == 0 {
 		// siftup moved to top: new earliest deadline.
 		if tb.sleeping {
@@ -160,6 +170,7 @@ func (tb *timersBucket) addtimerLocked(t *timer) {
 		expectSystemGoroutine()
 		go timerproc(tb)
 	}
+	return true
 }
 
 // Delete timer t from the heap.
@@ -192,11 +203,19 @@ func deltimer(t *timer) bool {
 	}
 	tb.t[last] = nil
 	tb.t = tb.t[:last]
+	ok := true
 	if i != last {
-		siftupTimer(tb.t, i)
-		siftdownTimer(tb.t, i)
+		if !siftupTimer(tb.t, i) {
+			ok = false
+		}
+		if !siftdownTimer(tb.t, i) {
+			ok = false
+		}
 	}
 	unlock(&tb.lock)
+	if !ok {
+		badTimer()
+	}
 	return true
 }
 
@@ -222,10 +241,13 @@ func timerproc(tb *timersBucket) {
 			if delta > 0 {
 				break
 			}
+			ok := true
 			if t.period > 0 {
 				// leave in heap but adjust next time to fire
 				t.when += t.period * (1 + -delta/t.period)
-				siftdownTimer(tb.t, 0)
+				if !siftdownTimer(tb.t, 0) {
+					ok = false
+				}
 			} else {
 				// remove from heap
 				last := len(tb.t) - 1
@@ -236,7 +258,9 @@ func timerproc(tb *timersBucket) {
 				tb.t[last] = nil
 				tb.t = tb.t[:last]
 				if last > 0 {
-					siftdownTimer(tb.t, 0)
+					if !siftdownTimer(tb.t, 0) {
+						ok = false
+					}
 				}
 				t.i = -1 // mark as removed
 			}
@@ -244,6 +268,9 @@ func timerproc(tb *timersBucket) {
 			arg := t.arg
 			seq := t.seq
 			unlock(&tb.lock)
+			if !ok {
+				badTimer()
+			}
 			if raceenabled {
 				raceacquire(unsafe.Pointer(t))
 			}
@@ -253,7 +280,7 @@ func timerproc(tb *timersBucket) {
 		if delta < 0 || faketime > 0 {
 			// No timers left - put goroutine to sleep.
 			tb.rescheduling = true
-			goparkunlock(&tb.lock, "timer goroutine (idle)", traceEvGoBlock, 1)
+			goparkunlock(&tb.lock, waitReasonTimerGoroutineIdle, traceEvGoBlock, 1)
 			continue
 		}
 		// At least one timer pending. Sleep until then.
@@ -329,8 +356,20 @@ func timeSleepUntil() int64 {
 }
 
 // Heap maintenance algorithms.
-
-func siftupTimer(t []*timer, i int) {
+// These algorithms check for slice index errors manually.
+// Slice index error can happen if the program is using racy
+// access to timers. We don't want to panic here, because
+// it will cause the program to crash with a mysterious
+// "panic holding locks" message. Instead, we panic while not
+// holding a lock.
+// The races can occur despite the bucket locks because assignBucket
+// itself is called without locks, so racy calls can cause a timer to
+// change buckets while executing these functions.
+
+func siftupTimer(t []*timer, i int) bool {
+	if i >= len(t) {
+		return false
+	}
 	when := t[i].when
 	tmp := t[i]
 	for i > 0 {
@@ -346,10 +385,14 @@ func siftupTimer(t []*timer, i int) {
 		t[i] = tmp
 		t[i].i = i
 	}
+	return true
 }
 
-func siftdownTimer(t []*timer, i int) {
+func siftdownTimer(t []*timer, i int) bool {
 	n := len(t)
+	if i >= n {
+		return false
+	}
 	when := t[i].when
 	tmp := t[i]
 	for {
@@ -385,6 +428,15 @@ func siftdownTimer(t []*timer, i int) {
 		t[i] = tmp
 		t[i].i = i
 	}
+	return true
+}
+
+// badTimer is called if the timer data structures have been corrupted,
+// presumably due to racy use by the program. We panic here rather than
+// panicing due to invalid slice access while holding locks.
+// See issue #25686.
+func badTimer() {
+	panic(errorString("racy use of timers"))
 }
 
 // Entry points for net, time to call nanotime.
diff --git a/libgo/go/runtime/timeasm.go b/libgo/go/runtime/timeasm.go
index d5f5ea3..55b0d07 100644
--- a/libgo/go/runtime/timeasm.go
+++ b/libgo/go/runtime/timeasm.go
@@ -7,7 +7,7 @@
 // so that time.now and nanotime return the same monotonic clock readings.
 
 // +build ignore
-// +build darwin,amd64 darwin,386 windows
+// +build windows
 
 package runtime
 
diff --git a/libgo/go/runtime/timestub.go b/libgo/go/runtime/timestub.go
index 033734e..9f1d111 100644
--- a/libgo/go/runtime/timestub.go
+++ b/libgo/go/runtime/timestub.go
@@ -5,15 +5,12 @@
 // Declarations for operating systems implementing time.now
 // indirectly, in terms of walltime and nanotime assembly.
 
-// -build !darwin !amd64,!386
 // -build !windows
 
 package runtime
 
 import _ "unsafe" // for go:linkname
 
-func walltime() (sec int64, nsec int32)
-
 //go:linkname time_now time.now
 func time_now() (sec int64, nsec int32, mono int64) {
 	sec, nsec = walltime()
diff --git a/libgo/go/runtime/timestub2.go b/libgo/go/runtime/timestub2.go
new file mode 100644
index 0000000..9ddc6fe
--- /dev/null
+++ b/libgo/go/runtime/timestub2.go
@@ -0,0 +1,11 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !darwin
+// +build !windows
+// +build !freebsd
+
+package runtime
+
+func walltime() (sec int64, nsec int32)
diff --git a/libgo/go/runtime/trace.go b/libgo/go/runtime/trace.go
index e2bbb5d..7aed9a9 100644
--- a/libgo/go/runtime/trace.go
+++ b/libgo/go/runtime/trace.go
@@ -64,7 +64,14 @@ const (
 	traceEvGoBlockGC         = 42 // goroutine blocks on GC assist [timestamp, stack]
 	traceEvGCMarkAssistStart = 43 // GC mark assist start [timestamp, stack]
 	traceEvGCMarkAssistDone  = 44 // GC mark assist done [timestamp]
-	traceEvCount             = 45
+	traceEvUserTaskCreate    = 45 // trace.NewContext [timestamp, internal task id, internal parent task id, stack, name string]
+	traceEvUserTaskEnd       = 46 // end of a task [timestamp, internal task id, stack]
+	traceEvUserRegion        = 47 // trace.WithRegion [timestamp, internal task id, mode(0:start, 1:end), stack, name string]
+	traceEvUserLog           = 48 // trace.Log [timestamp, internal task id, key string id, stack, value string]
+	traceEvCount             = 49
+	// Byte is used but only 6 bits are available for event type.
+	// The remaining 2 bits are used to specify the number of arguments.
+	// That means, the max event type value is 63.
 )
 
 const (
@@ -121,11 +128,13 @@ var trace struct {
 
 	// Dictionary for traceEvString.
 	//
-	// Currently this is used only at trace setup and for
-	// func/file:line info after tracing session, so we assume
-	// single-threaded access.
-	strings   map[string]uint64
-	stringSeq uint64
+	// TODO: central lock to access the map is not ideal.
+	//   option: pre-assign ids to all user annotation region names and tags
+	//   option: per-P cache
+	//   option: sync.Map like data structure
+	stringsLock mutex
+	strings     map[string]uint64
+	stringSeq   uint64
 
 	// markWorkerLabels maps gcMarkWorkerMode to string ID.
 	markWorkerLabels [len(gcMarkWorkerModeStrings)]uint64
@@ -379,12 +388,12 @@ func ReadTrace() []byte {
 		trace.headerWritten = true
 		trace.lockOwner = nil
 		unlock(&trace.lock)
-		return []byte("go 1.10 trace\x00\x00\x00")
+		return []byte("go 1.11 trace\x00\x00\x00")
 	}
 	// Wait for new data.
 	if trace.fullHead == 0 && !trace.shutdown {
 		trace.reader.set(getg())
-		goparkunlock(&trace.lock, "trace reader (blocked)", traceEvGoBlock, 2)
+		goparkunlock(&trace.lock, waitReasonTraceReaderBlocked, traceEvGoBlock, 2)
 		lock(&trace.lock)
 	}
 	// Write a buffer.
@@ -507,12 +516,26 @@ func traceEvent(ev byte, skip int, args ...uint64) {
 	// so if we see trace.enabled == true now, we know it's true for the rest of the function.
 	// Exitsyscall can run even during stopTheWorld. The race with StartTrace/StopTrace
 	// during tracing in exitsyscall is resolved by locking trace.bufLock in traceLockBuffer.
+	//
+	// Note trace_userTaskCreate runs the same check.
 	if !trace.enabled && !mp.startingtrace {
 		traceReleaseBuffer(pid)
 		return
 	}
+
+	if skip > 0 {
+		if getg() == mp.curg {
+			skip++ // +1 because stack is captured in traceEventLocked.
+		}
+	}
+	traceEventLocked(0, mp, pid, bufp, ev, skip, args...)
+	traceReleaseBuffer(pid)
+}
+
+func traceEventLocked(extraBytes int, mp *m, pid int32, bufp *traceBufPtr, ev byte, skip int, args ...uint64) {
 	buf := (*bufp).ptr()
-	const maxSize = 2 + 5*traceBytesPerNumber // event type, length, sequence, timestamp, stack id and two add params
+	// TODO: test on non-zero extraBytes param.
+	maxSize := 2 + 5*traceBytesPerNumber + extraBytes // event type, length, sequence, timestamp, stack id and two add params
 	if buf == nil || len(buf.arr)-buf.pos < maxSize {
 		buf = traceFlush(traceBufPtrOf(buf), pid).ptr()
 		(*bufp).set(buf)
@@ -555,7 +578,6 @@ func traceEvent(ev byte, skip int, args ...uint64) {
 		// Fill in actual length.
 		*lenp = byte(evSize - 2)
 	}
-	traceReleaseBuffer(pid)
 }
 
 func traceStackID(mp *m, buf []location, skip int) uint64 {
@@ -636,7 +658,20 @@ func traceString(bufp *traceBufPtr, pid int32, s string) (uint64, *traceBufPtr)
 	if s == "" {
 		return 0, bufp
 	}
+
+	lock(&trace.stringsLock)
+	if raceenabled {
+		// raceacquire is necessary because the map access
+		// below is race annotated.
+		raceacquire(unsafe.Pointer(&trace.stringsLock))
+	}
+
 	if id, ok := trace.strings[s]; ok {
+		if raceenabled {
+			racerelease(unsafe.Pointer(&trace.stringsLock))
+		}
+		unlock(&trace.stringsLock)
+
 		return id, bufp
 	}
 
@@ -644,6 +679,11 @@ func traceString(bufp *traceBufPtr, pid int32, s string) (uint64, *traceBufPtr)
 	id := trace.stringSeq
 	trace.strings[s] = id
 
+	if raceenabled {
+		racerelease(unsafe.Pointer(&trace.stringsLock))
+	}
+	unlock(&trace.stringsLock)
+
 	// memory allocation in above may trigger tracing and
 	// cause *bufp changes. Following code now works with *bufp,
 	// so there must be no memory allocation or any activities
@@ -657,8 +697,16 @@ func traceString(bufp *traceBufPtr, pid int32, s string) (uint64, *traceBufPtr)
 	}
 	buf.byte(traceEvString)
 	buf.varint(id)
-	buf.varint(uint64(len(s)))
-	buf.pos += copy(buf.arr[buf.pos:], s)
+
+	// double-check the string and the length can fit.
+	// Otherwise, truncate the string.
+	slen := len(s)
+	if room := len(buf.arr) - buf.pos; room < slen+traceBytesPerNumber {
+		slen = room
+	}
+
+	buf.varint(uint64(slen))
+	buf.pos += copy(buf.arr[buf.pos:], s[:slen])
 
 	(*bufp).set(buf)
 	return id, bufp
@@ -1091,3 +1139,78 @@ func traceNextGC() {
 		traceEvent(traceEvNextGC, -1, memstats.next_gc)
 	}
 }
+
+// To access runtime functions from runtime/trace.
+// See runtime/trace/annotation.go
+
+//go:linkname trace_userTaskCreate runtime_trace.userTaskCreate
+func trace_userTaskCreate(id, parentID uint64, taskType string) {
+	if !trace.enabled {
+		return
+	}
+
+	// Same as in traceEvent.
+	mp, pid, bufp := traceAcquireBuffer()
+	if !trace.enabled && !mp.startingtrace {
+		traceReleaseBuffer(pid)
+		return
+	}
+
+	typeStringID, bufp := traceString(bufp, pid, taskType)
+	traceEventLocked(0, mp, pid, bufp, traceEvUserTaskCreate, 3, id, parentID, typeStringID)
+	traceReleaseBuffer(pid)
+}
+
+//go:linkname trace_userTaskEnd runtime_trace.userTaskEnd
+func trace_userTaskEnd(id uint64) {
+	traceEvent(traceEvUserTaskEnd, 2, id)
+}
+
+//go:linkname trace_userRegion runtime_trace.userRegion
+func trace_userRegion(id, mode uint64, name string) {
+	if !trace.enabled {
+		return
+	}
+
+	mp, pid, bufp := traceAcquireBuffer()
+	if !trace.enabled && !mp.startingtrace {
+		traceReleaseBuffer(pid)
+		return
+	}
+
+	nameStringID, bufp := traceString(bufp, pid, name)
+	traceEventLocked(0, mp, pid, bufp, traceEvUserRegion, 3, id, mode, nameStringID)
+	traceReleaseBuffer(pid)
+}
+
+//go:linkname trace_userLog runtime_trace.userLog
+func trace_userLog(id uint64, category, message string) {
+	if !trace.enabled {
+		return
+	}
+
+	mp, pid, bufp := traceAcquireBuffer()
+	if !trace.enabled && !mp.startingtrace {
+		traceReleaseBuffer(pid)
+		return
+	}
+
+	categoryID, bufp := traceString(bufp, pid, category)
+
+	extraSpace := traceBytesPerNumber + len(message) // extraSpace for the value string
+	traceEventLocked(extraSpace, mp, pid, bufp, traceEvUserLog, 3, id, categoryID)
+	// traceEventLocked reserved extra space for val and len(val)
+	// in buf, so buf now has room for the following.
+	buf := (*bufp).ptr()
+
+	// double-check the message and its length can fit.
+	// Otherwise, truncate the message.
+	slen := len(message)
+	if room := len(buf.arr) - buf.pos; room < slen+traceBytesPerNumber {
+		slen = room
+	}
+	buf.varint(uint64(slen))
+	buf.pos += copy(buf.arr[buf.pos:], message[:slen])
+
+	traceReleaseBuffer(pid)
+}
diff --git a/libgo/go/runtime/trace/annotation.go b/libgo/go/runtime/trace/annotation.go
new file mode 100644
index 0000000..3545ef3
--- /dev/null
+++ b/libgo/go/runtime/trace/annotation.go
@@ -0,0 +1,196 @@
+package trace
+
+import (
+	"context"
+	"fmt"
+	"sync/atomic"
+	_ "unsafe"
+)
+
+type traceContextKey struct{}
+
+// NewTask creates a task instance with the type taskType and returns
+// it along with a Context that carries the task.
+// If the input context contains a task, the new task is its subtask.
+//
+// The taskType is used to classify task instances. Analysis tools
+// like the Go execution tracer may assume there are only a bounded
+// number of unique task types in the system.
+//
+// The returned end function is used to mark the task's end.
+// The trace tool measures task latency as the time between task creation
+// and when the end function is called, and provides the latency
+// distribution per task type.
+// If the end function is called multiple times, only the first
+// call is used in the latency measurement.
+//
+//   ctx, task := trace.NewTask(ctx, "awesome task")
+//   trace.WithRegion(ctx, prepWork)
+//   // preparation of the task
+//   go func() {  // continue processing the task in a separate goroutine.
+//       defer task.End()
+//       trace.WithRegion(ctx, remainingWork)
+//   }
+func NewTask(pctx context.Context, taskType string) (ctx context.Context, task *Task) {
+	pid := fromContext(pctx).id
+	id := newID()
+	userTaskCreate(id, pid, taskType)
+	s := &Task{id: id}
+	return context.WithValue(pctx, traceContextKey{}, s), s
+
+	// We allocate a new task and the end function even when
+	// the tracing is disabled because the context and the detach
+	// function can be used across trace enable/disable boundaries,
+	// which complicates the problem.
+	//
+	// For example, consider the following scenario:
+	//   - trace is enabled.
+	//   - trace.WithRegion is called, so a new context ctx
+	//     with a new region is created.
+	//   - trace is disabled.
+	//   - trace is enabled again.
+	//   - trace APIs with the ctx is called. Is the ID in the task
+	//   a valid one to use?
+	//
+	// TODO(hyangah): reduce the overhead at least when
+	// tracing is disabled. Maybe the id can embed a tracing
+	// round number and ignore ids generated from previous
+	// tracing round.
+}
+
+func fromContext(ctx context.Context) *Task {
+	if s, ok := ctx.Value(traceContextKey{}).(*Task); ok {
+		return s
+	}
+	return &bgTask
+}
+
+// Task is a data type for tracing a user-defined, logical operation.
+type Task struct {
+	id uint64
+	// TODO(hyangah): record parent id?
+}
+
+// End marks the end of the operation represented by the Task.
+func (t *Task) End() {
+	userTaskEnd(t.id)
+}
+
+var lastTaskID uint64 = 0 // task id issued last time
+
+func newID() uint64 {
+	// TODO(hyangah): use per-P cache
+	return atomic.AddUint64(&lastTaskID, 1)
+}
+
+var bgTask = Task{id: uint64(0)}
+
+// Log emits a one-off event with the given category and message.
+// Category can be empty and the API assumes there are only a handful of
+// unique categories in the system.
+func Log(ctx context.Context, category, message string) {
+	id := fromContext(ctx).id
+	userLog(id, category, message)
+}
+
+// Logf is like Log, but the value is formatted using the specified format spec.
+func Logf(ctx context.Context, category, format string, args ...interface{}) {
+	if IsEnabled() {
+		// Ideally this should be just Log, but that will
+		// add one more frame in the stack trace.
+		id := fromContext(ctx).id
+		userLog(id, category, fmt.Sprintf(format, args...))
+	}
+}
+
+const (
+	regionStartCode = uint64(0)
+	regionEndCode   = uint64(1)
+)
+
+// WithRegion starts a region associated with its calling goroutine, runs fn,
+// and then ends the region. If the context carries a task, the region is
+// associated with the task. Otherwise, the region is attached to the background
+// task.
+//
+// The regionType is used to classify regions, so there should be only a
+// handful of unique region types.
+func WithRegion(ctx context.Context, regionType string, fn func()) {
+	// NOTE:
+	// WithRegion helps avoiding misuse of the API but in practice,
+	// this is very restrictive:
+	// - Use of WithRegion makes the stack traces captured from
+	//   region start and end are identical.
+	// - Refactoring the existing code to use WithRegion is sometimes
+	//   hard and makes the code less readable.
+	//     e.g. code block nested deep in the loop with various
+	//          exit point with return values
+	// - Refactoring the code to use this API with closure can
+	//   cause different GC behavior such as retaining some parameters
+	//   longer.
+	// This causes more churns in code than I hoped, and sometimes
+	// makes the code less readable.
+
+	id := fromContext(ctx).id
+	userRegion(id, regionStartCode, regionType)
+	defer userRegion(id, regionEndCode, regionType)
+	fn()
+}
+
+// StartRegion starts a region and returns a function for marking the
+// end of the region. The returned Region's End function must be called
+// from the same goroutine where the region was started.
+// Within each goroutine, regions must nest. That is, regions started
+// after this region must be ended before this region can be ended.
+// Recommended usage is
+//
+//     defer trace.StartRegion(ctx, "myTracedRegion").End()
+//
+func StartRegion(ctx context.Context, regionType string) *Region {
+	if !IsEnabled() {
+		return noopRegion
+	}
+	id := fromContext(ctx).id
+	userRegion(id, regionStartCode, regionType)
+	return &Region{id, regionType}
+}
+
+// Region is a region of code whose execution time interval is traced.
+type Region struct {
+	id         uint64
+	regionType string
+}
+
+var noopRegion = &Region{}
+
+// End marks the end of the traced code region.
+func (r *Region) End() {
+	if r == noopRegion {
+		return
+	}
+	userRegion(r.id, regionEndCode, r.regionType)
+}
+
+// IsEnabled returns whether tracing is enabled.
+// The information is advisory only. The tracing status
+// may have changed by the time this function returns.
+func IsEnabled() bool {
+	enabled := atomic.LoadInt32(&tracing.enabled)
+	return enabled == 1
+}
+
+//
+// Function bodies are defined in runtime/trace.go
+//
+
+// emits UserTaskCreate event.
+func userTaskCreate(id, parentID uint64, taskType string)
+
+// emits UserTaskEnd event.
+func userTaskEnd(id uint64)
+
+// emits UserRegion event.
+func userRegion(id, mode uint64, regionType string)
+
+// emits UserLog event.
+func userLog(id uint64, category, message string)
diff --git a/libgo/go/runtime/trace/annotation_test.go b/libgo/go/runtime/trace/annotation_test.go
new file mode 100644
index 0000000..71abbfc
--- /dev/null
+++ b/libgo/go/runtime/trace/annotation_test.go
@@ -0,0 +1,152 @@
+package trace_test
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"internal/trace"
+	"reflect"
+	. "runtime/trace"
+	"strings"
+	"sync"
+	"testing"
+)
+
+func BenchmarkStartRegion(b *testing.B) {
+	b.ReportAllocs()
+	ctx, task := NewTask(context.Background(), "benchmark")
+	defer task.End()
+
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			StartRegion(ctx, "region").End()
+		}
+	})
+}
+
+func BenchmarkNewTask(b *testing.B) {
+	b.ReportAllocs()
+	pctx, task := NewTask(context.Background(), "benchmark")
+	defer task.End()
+
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			_, task := NewTask(pctx, "task")
+			task.End()
+		}
+	})
+}
+
+func TestUserTaskRegion(t *testing.T) {
+	if IsEnabled() {
+		t.Skip("skipping because -test.trace is set")
+	}
+	bgctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	preExistingRegion := StartRegion(bgctx, "pre-existing region")
+
+	buf := new(bytes.Buffer)
+	if err := Start(buf); err != nil {
+		t.Fatalf("failed to start tracing: %v", err)
+	}
+
+	// Beginning of traced execution
+	var wg sync.WaitGroup
+	ctx, task := NewTask(bgctx, "task0") // EvUserTaskCreate("task0")
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		defer task.End() // EvUserTaskEnd("task0")
+
+		WithRegion(ctx, "region0", func() {
+			// EvUserRegionCreate("region0", start)
+			WithRegion(ctx, "region1", func() {
+				Log(ctx, "key0", "0123456789abcdef") // EvUserLog("task0", "key0", "0....f")
+			})
+			// EvUserRegion("region0", end)
+		})
+	}()
+
+	wg.Wait()
+
+	preExistingRegion.End()
+	postExistingRegion := StartRegion(bgctx, "post-existing region")
+
+	// End of traced execution
+	Stop()
+
+	postExistingRegion.End()
+
+	saveTrace(t, buf, "TestUserTaskRegion")
+	res, err := trace.Parse(buf, "")
+	if err == trace.ErrTimeOrder {
+		// golang.org/issues/16755
+		t.Skipf("skipping trace: %v", err)
+	}
+	if err != nil {
+		t.Fatalf("Parse failed: %v", err)
+	}
+
+	// Check whether we see all user annotation related records in order
+	type testData struct {
+		typ     byte
+		strs    []string
+		args    []uint64
+		setLink bool
+	}
+
+	var got []testData
+	tasks := map[uint64]string{}
+	for _, e := range res.Events {
+		t.Logf("%s", e)
+		switch e.Type {
+		case trace.EvUserTaskCreate:
+			taskName := e.SArgs[0]
+			got = append(got, testData{trace.EvUserTaskCreate, []string{taskName}, nil, e.Link != nil})
+			if e.Link != nil && e.Link.Type != trace.EvUserTaskEnd {
+				t.Errorf("Unexpected linked event %q->%q", e, e.Link)
+			}
+			tasks[e.Args[0]] = taskName
+		case trace.EvUserLog:
+			key, val := e.SArgs[0], e.SArgs[1]
+			taskName := tasks[e.Args[0]]
+			got = append(got, testData{trace.EvUserLog, []string{taskName, key, val}, nil, e.Link != nil})
+		case trace.EvUserTaskEnd:
+			taskName := tasks[e.Args[0]]
+			got = append(got, testData{trace.EvUserTaskEnd, []string{taskName}, nil, e.Link != nil})
+			if e.Link != nil && e.Link.Type != trace.EvUserTaskCreate {
+				t.Errorf("Unexpected linked event %q->%q", e, e.Link)
+			}
+		case trace.EvUserRegion:
+			taskName := tasks[e.Args[0]]
+			regionName := e.SArgs[0]
+			got = append(got, testData{trace.EvUserRegion, []string{taskName, regionName}, []uint64{e.Args[1]}, e.Link != nil})
+			if e.Link != nil && (e.Link.Type != trace.EvUserRegion || e.Link.SArgs[0] != regionName) {
+				t.Errorf("Unexpected linked event %q->%q", e, e.Link)
+			}
+		}
+	}
+	want := []testData{
+		{trace.EvUserTaskCreate, []string{"task0"}, nil, true},
+		{trace.EvUserRegion, []string{"task0", "region0"}, []uint64{0}, true},
+		{trace.EvUserRegion, []string{"task0", "region1"}, []uint64{0}, true},
+		{trace.EvUserLog, []string{"task0", "key0", "0123456789abcdef"}, nil, false},
+		{trace.EvUserRegion, []string{"task0", "region1"}, []uint64{1}, false},
+		{trace.EvUserRegion, []string{"task0", "region0"}, []uint64{1}, false},
+		{trace.EvUserTaskEnd, []string{"task0"}, nil, false},
+		//  Currently, pre-existing region is not recorded to avoid allocations.
+		//  {trace.EvUserRegion, []string{"", "pre-existing region"}, []uint64{1}, false},
+		{trace.EvUserRegion, []string{"", "post-existing region"}, []uint64{0}, false},
+	}
+	if !reflect.DeepEqual(got, want) {
+		pretty := func(data []testData) string {
+			var s strings.Builder
+			for _, d := range data {
+				s.WriteString(fmt.Sprintf("\t%+v\n", d))
+			}
+			return s.String()
+		}
+		t.Errorf("Got user region related events\n%+v\nwant:\n%+v", pretty(got), pretty(want))
+	}
+}
diff --git a/libgo/go/runtime/trace/trace.go b/libgo/go/runtime/trace/trace.go
index 439f998..7f9d72a 100644
--- a/libgo/go/runtime/trace/trace.go
+++ b/libgo/go/runtime/trace/trace.go
@@ -2,8 +2,10 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// Package trace contains facilities for programs to generate trace
-// for Go execution tracer.
+// Package trace contains facilities for programs to generate traces
+// for the Go execution tracer.
+//
+// Tracing runtime activities
 //
 // The execution trace captures a wide range of execution events such as
 // goroutine creation/blocking/unblocking, syscall enter/exit/block,
@@ -12,8 +14,6 @@
 // captured for most events. The generated trace can be interpreted
 // using `go tool trace`.
 //
-// Tracing a Go program
-//
 // Support for tracing tests and benchmarks built with the standard
 // testing package is built into `go test`. For example, the following
 // command runs the test in the current directory and writes the trace
@@ -25,24 +25,102 @@
 // support to a standalone program. See the Example that demonstrates
 // how to use this API to enable tracing.
 //
-// There is also a standard HTTP interface to profiling data. Adding the
-// following line will install handlers under the /debug/pprof/trace URL
-// to download live profiles:
+// There is also a standard HTTP interface to trace data. Adding the
+// following line will install a handler under the /debug/pprof/trace URL
+// to download a live trace:
 //
 //     import _ "net/http/pprof"
 //
-// See the net/http/pprof package for more details.
+// See the net/http/pprof package for more details about all of the
+// debug endpoints installed by this import.
+//
+// User annotation
+//
+// Package trace provides user annotation APIs that can be used to
+// log interesting events during execution.
+//
+// There are three types of user annotations: log messages, regions,
+// and tasks.
+//
+// Log emits a timestamped message to the execution trace along with
+// additional information such as the category of the message and
+// which goroutine called Log. The execution tracer provides UIs to filter
+// and group goroutines using the log category and the message supplied
+// in Log.
+//
+// A region is for logging a time interval during a goroutine's execution.
+// By definition, a region starts and ends in the same goroutine.
+// Regions can be nested to represent subintervals.
+// For example, the following code records four regions in the execution
+// trace to trace the durations of sequential steps in a cappuccino making
+// operation.
+//
+//   trace.WithRegion(ctx, "makeCappuccino", func() {
+//
+//      // orderID allows to identify a specific order
+//      // among many cappuccino order region records.
+//      trace.Log(ctx, "orderID", orderID)
+//
+//      trace.WithRegion(ctx, "steamMilk", steamMilk)
+//      trace.WithRegion(ctx, "extractCoffee", extractCoffee)
+//      trace.WithRegion(ctx, "mixMilkCoffee", mixMilkCoffee)
+//   })
+//
+// A task is a higher-level component that aids tracing of logical
+// operations such as an RPC request, an HTTP request, or an
+// interesting local operation which may require multiple goroutines
+// working together. Since tasks can involve multiple goroutines,
+// they are tracked via a context.Context object. NewTask creates
+// a new task and embeds it in the returned context.Context object.
+// Log messages and regions are attached to the task, if any, in the
+// Context passed to Log and WithRegion.
+//
+// For example, assume that we decided to froth milk, extract coffee,
+// and mix milk and coffee in separate goroutines. With a task,
+// the trace tool can identify the goroutines involved in a specific
+// cappuccino order.
+//
+//      ctx, task := trace.NewTask(ctx, "makeCappuccino")
+//      trace.Log(ctx, "orderID", orderID)
+//
+//      milk := make(chan bool)
+//      espresso := make(chan bool)
+//
+//      go func() {
+//              trace.WithRegion(ctx, "steamMilk", steamMilk)
+//              milk <- true
+//      }()
+//      go func() {
+//              trace.WithRegion(ctx, "extractCoffee", extractCoffee)
+//              espresso <- true
+//      }()
+//      go func() {
+//              defer task.End() // When assemble is done, the order is complete.
+//              <-espresso
+//              <-milk
+//              trace.WithRegion(ctx, "mixMilkCoffee", mixMilkCoffee)
+//      }()
+//
+//
+// The trace tool computes the latency of a task by measuring the
+// time between the task creation and the task end and provides
+// latency distributions for each task type found in the trace.
 package trace
 
 import (
 	"io"
 	"runtime"
+	"sync"
+	"sync/atomic"
 )
 
 // Start enables tracing for the current program.
 // While tracing, the trace will be buffered and written to w.
 // Start returns an error if tracing is already enabled.
 func Start(w io.Writer) error {
+	tracing.Lock()
+	defer tracing.Unlock()
+
 	if err := runtime.StartTrace(); err != nil {
 		return err
 	}
@@ -55,11 +133,21 @@ func Start(w io.Writer) error {
 			w.Write(data)
 		}
 	}()
+	atomic.StoreInt32(&tracing.enabled, 1)
 	return nil
 }
 
 // Stop stops the current tracing, if any.
 // Stop only returns after all the writes for the trace have completed.
 func Stop() {
+	tracing.Lock()
+	defer tracing.Unlock()
+	atomic.StoreInt32(&tracing.enabled, 0)
+
 	runtime.StopTrace()
 }
+
+var tracing struct {
+	sync.Mutex       // gate mutators (Start, Stop)
+	enabled    int32 // accessed via atomic
+}
diff --git a/libgo/go/runtime/trace/trace_stack_test.go b/libgo/go/runtime/trace/trace_stack_test.go
index 274cdf7..62c06e6 100644
--- a/libgo/go/runtime/trace/trace_stack_test.go
+++ b/libgo/go/runtime/trace/trace_stack_test.go
@@ -6,14 +6,17 @@ package trace_test
 
 import (
 	"bytes"
+	"fmt"
 	"internal/testenv"
 	"internal/trace"
 	"net"
 	"os"
 	"runtime"
 	. "runtime/trace"
+	"strings"
 	"sync"
 	"testing"
+	"text/tabwriter"
 	"time"
 )
 
@@ -21,7 +24,7 @@ import (
 // In particular that we strip bottom uninteresting frames like goexit,
 // top uninteresting frames (runtime guts).
 func TestTraceSymbolize(t *testing.T) {
-	testenv.MustHaveGoBuild(t)
+	skipTraceSymbolizeTestIfNecessary(t)
 
 	buf := new(bytes.Buffer)
 	if err := Start(buf); err != nil {
@@ -34,28 +37,28 @@ func TestTraceSymbolize(t *testing.T) {
 	// on a channel, in a select or otherwise. So we kick off goroutines
 	// that need to block first in the hope that while we are executing
 	// the rest of the test, they will block.
-	go func() {
+	go func() { // func1
 		select {}
 	}()
-	go func() {
+	go func() { // func2
 		var c chan int
 		c <- 0
 	}()
-	go func() {
+	go func() { // func3
 		var c chan int
 		<-c
 	}()
 	done1 := make(chan bool)
-	go func() {
+	go func() { // func4
 		<-done1
 	}()
 	done2 := make(chan bool)
-	go func() {
+	go func() { // func5
 		done2 <- true
 	}()
 	c1 := make(chan int)
 	c2 := make(chan int)
-	go func() {
+	go func() { // func6
 		select {
 		case <-c1:
 		case <-c2:
@@ -63,17 +66,17 @@ func TestTraceSymbolize(t *testing.T) {
 	}()
 	var mu sync.Mutex
 	mu.Lock()
-	go func() {
+	go func() { // func7
 		mu.Lock()
 		mu.Unlock()
 	}()
 	var wg sync.WaitGroup
 	wg.Add(1)
-	go func() {
+	go func() { // func8
 		wg.Wait()
 	}()
 	cv := sync.NewCond(&sync.Mutex{})
-	go func() {
+	go func() { // func9
 		cv.L.Lock()
 		cv.Wait()
 		cv.L.Unlock()
@@ -82,7 +85,7 @@ func TestTraceSymbolize(t *testing.T) {
 	if err != nil {
 		t.Fatalf("failed to listen: %v", err)
 	}
-	go func() {
+	go func() { // func10
 		c, err := ln.Accept()
 		if err != nil {
 			t.Errorf("failed to accept: %v", err)
@@ -97,7 +100,7 @@ func TestTraceSymbolize(t *testing.T) {
 	defer rp.Close()
 	defer wp.Close()
 	pipeReadDone := make(chan bool)
-	go func() {
+	go func() { // func11
 		var data [1]byte
 		rp.Read(data[:])
 		pipeReadDone <- true
@@ -125,14 +128,16 @@ func TestTraceSymbolize(t *testing.T) {
 	wp.Write(data[:])
 	<-pipeReadDone
 
+	oldGoMaxProcs := runtime.GOMAXPROCS(0)
+	runtime.GOMAXPROCS(oldGoMaxProcs + 1)
+
 	Stop()
+
+	runtime.GOMAXPROCS(oldGoMaxProcs)
+
 	events, _ := parseTrace(t, buf)
 
 	// Now check that the stacks are correct.
-	type frame struct {
-		Fn   string
-		Line int
-	}
 	type eventDesc struct {
 		Type byte
 		Stk  []frame
@@ -140,90 +145,96 @@ func TestTraceSymbolize(t *testing.T) {
 	want := []eventDesc{
 		{trace.EvGCStart, []frame{
 			{"runtime.GC", 0},
-			{"runtime/trace_test.TestTraceSymbolize", 107},
+			{"runtime/trace_test.TestTraceSymbolize", 0},
 			{"testing.tRunner", 0},
 		}},
 		{trace.EvGoStart, []frame{
-			{"runtime/trace_test.TestTraceSymbolize.func1", 37},
+			{"runtime/trace_test.TestTraceSymbolize.func1", 0},
 		}},
 		{trace.EvGoSched, []frame{
-			{"runtime/trace_test.TestTraceSymbolize", 108},
+			{"runtime/trace_test.TestTraceSymbolize", 111},
 			{"testing.tRunner", 0},
 		}},
 		{trace.EvGoCreate, []frame{
-			{"runtime/trace_test.TestTraceSymbolize", 37},
+			{"runtime/trace_test.TestTraceSymbolize", 40},
 			{"testing.tRunner", 0},
 		}},
 		{trace.EvGoStop, []frame{
 			{"runtime.block", 0},
-			{"runtime/trace_test.TestTraceSymbolize.func1", 38},
+			{"runtime/trace_test.TestTraceSymbolize.func1", 0},
 		}},
 		{trace.EvGoStop, []frame{
 			{"runtime.chansend1", 0},
-			{"runtime/trace_test.TestTraceSymbolize.func2", 42},
+			{"runtime/trace_test.TestTraceSymbolize.func2", 0},
 		}},
 		{trace.EvGoStop, []frame{
 			{"runtime.chanrecv1", 0},
-			{"runtime/trace_test.TestTraceSymbolize.func3", 46},
+			{"runtime/trace_test.TestTraceSymbolize.func3", 0},
 		}},
 		{trace.EvGoBlockRecv, []frame{
 			{"runtime.chanrecv1", 0},
-			{"runtime/trace_test.TestTraceSymbolize.func4", 50},
+			{"runtime/trace_test.TestTraceSymbolize.func4", 0},
 		}},
 		{trace.EvGoUnblock, []frame{
 			{"runtime.chansend1", 0},
-			{"runtime/trace_test.TestTraceSymbolize", 110},
+			{"runtime/trace_test.TestTraceSymbolize", 113},
 			{"testing.tRunner", 0},
 		}},
 		{trace.EvGoBlockSend, []frame{
 			{"runtime.chansend1", 0},
-			{"runtime/trace_test.TestTraceSymbolize.func5", 54},
+			{"runtime/trace_test.TestTraceSymbolize.func5", 0},
 		}},
 		{trace.EvGoUnblock, []frame{
 			{"runtime.chanrecv1", 0},
-			{"runtime/trace_test.TestTraceSymbolize", 111},
+			{"runtime/trace_test.TestTraceSymbolize", 114},
 			{"testing.tRunner", 0},
 		}},
 		{trace.EvGoBlockSelect, []frame{
 			{"runtime.selectgo", 0},
-			{"runtime/trace_test.TestTraceSymbolize.func6", 59},
+			{"runtime/trace_test.TestTraceSymbolize.func6", 0},
 		}},
 		{trace.EvGoUnblock, []frame{
 			{"runtime.selectgo", 0},
-			{"runtime/trace_test.TestTraceSymbolize", 112},
+			{"runtime/trace_test.TestTraceSymbolize", 115},
 			{"testing.tRunner", 0},
 		}},
 		{trace.EvGoBlockSync, []frame{
 			{"sync.(*Mutex).Lock", 0},
-			{"runtime/trace_test.TestTraceSymbolize.func7", 67},
+			{"runtime/trace_test.TestTraceSymbolize.func7", 0},
 		}},
 		{trace.EvGoUnblock, []frame{
 			{"sync.(*Mutex).Unlock", 0},
-			{"runtime/trace_test.TestTraceSymbolize", 116},
+			{"runtime/trace_test.TestTraceSymbolize", 0},
 			{"testing.tRunner", 0},
 		}},
 		{trace.EvGoBlockSync, []frame{
 			{"sync.(*WaitGroup).Wait", 0},
-			{"runtime/trace_test.TestTraceSymbolize.func8", 73},
+			{"runtime/trace_test.TestTraceSymbolize.func8", 0},
 		}},
 		{trace.EvGoUnblock, []frame{
 			{"sync.(*WaitGroup).Add", 0},
 			{"sync.(*WaitGroup).Done", 0},
-			{"runtime/trace_test.TestTraceSymbolize", 117},
+			{"runtime/trace_test.TestTraceSymbolize", 120},
 			{"testing.tRunner", 0},
 		}},
 		{trace.EvGoBlockCond, []frame{
 			{"sync.(*Cond).Wait", 0},
-			{"runtime/trace_test.TestTraceSymbolize.func9", 78},
+			{"runtime/trace_test.TestTraceSymbolize.func9", 0},
 		}},
 		{trace.EvGoUnblock, []frame{
 			{"sync.(*Cond).Signal", 0},
-			{"runtime/trace_test.TestTraceSymbolize", 118},
+			{"runtime/trace_test.TestTraceSymbolize", 0},
 			{"testing.tRunner", 0},
 		}},
 		{trace.EvGoSleep, []frame{
 			{"time.Sleep", 0},
-			{"runtime/trace_test.TestTraceSymbolize", 109},
+			{"runtime/trace_test.TestTraceSymbolize", 0},
+			{"testing.tRunner", 0},
+		}},
+		{trace.EvGomaxprocs, []frame{
+			{"runtime.startTheWorld", 0}, // this is when the current gomaxprocs is logged.
+			{"runtime.GOMAXPROCS", 0},
+			{"runtime/trace_test.TestTraceSymbolize", 0},
 			{"testing.tRunner", 0},
 		}},
 	}
@@ -235,7 +246,7 @@ func TestTraceSymbolize(t *testing.T) {
 				{"net.(*netFD).accept", 0},
 				{"net.(*TCPListener).accept", 0},
 				{"net.(*TCPListener).Accept", 0},
-				{"runtime/trace_test.TestTraceSymbolize.func10", 86},
+				{"runtime/trace_test.TestTraceSymbolize.func10", 0},
 			}},
 			{trace.EvGoSysCall, []frame{
 				{"syscall.read", 0},
@@ -243,7 +254,7 @@ func TestTraceSymbolize(t *testing.T) {
 				{"internal/poll.(*FD).Read", 0},
 				{"os.(*File).read", 0},
 				{"os.(*File).Read", 0},
-				{"runtime/trace_test.TestTraceSymbolize.func11", 102},
+				{"runtime/trace_test.TestTraceSymbolize.func11", 0},
 			}},
 		}...)
 	}
@@ -264,22 +275,57 @@ func TestTraceSymbolize(t *testing.T) {
 			matched[i] = true
 		}
 	}
-	for i, m := range matched {
-		if m {
+	for i, w := range want {
+		if matched[i] {
 			continue
 		}
-		w := want[i]
-		t.Errorf("did not match event %v at %v:%v", trace.EventDescriptions[w.Type].Name, w.Stk[0].Fn, w.Stk[0].Line)
-		t.Errorf("seen the following events of this type:")
-		for _, ev := range events {
-			if ev.Type != w.Type {
-				continue
-			}
-			for _, f := range ev.Stk {
-				t.Logf("  %v :: %s:%v", f.Fn, f.File, f.Line)
+		seen, n := dumpEventStacks(w.Type, events)
+		t.Errorf("Did not match event %v with stack\n%s\nSeen %d events of the type\n%s",
+			trace.EventDescriptions[w.Type].Name, dumpFrames(w.Stk), n, seen)
+	}
+}
+
+func skipTraceSymbolizeTestIfNecessary(t *testing.T) {
+	testenv.MustHaveGoBuild(t)
+	if IsEnabled() {
+		t.Skip("skipping because -test.trace is set")
+	}
+}
+
+func dumpEventStacks(typ byte, events []*trace.Event) ([]byte, int) {
+	matched := 0
+	o := new(bytes.Buffer)
+	tw := tabwriter.NewWriter(o, 0, 8, 0, '\t', 0)
+	for _, ev := range events {
+		if ev.Type != typ {
+			continue
+		}
+		matched++
+		fmt.Fprintf(tw, "Offset %d\n", ev.Off)
+		for _, f := range ev.Stk {
+			fname := f.File
+			if idx := strings.Index(fname, "/go/src/"); idx > 0 {
+				fname = fname[idx:]
 			}
-			t.Logf("---")
+			fmt.Fprintf(tw, "  %v\t%s:%d\n", f.Fn, fname, f.Line)
 		}
-		t.Logf("======")
 	}
+	tw.Flush()
+	return o.Bytes(), matched
+}
+
+type frame struct {
+	Fn   string
+	Line int
+}
+
+func dumpFrames(frames []frame) []byte {
+	o := new(bytes.Buffer)
+	tw := tabwriter.NewWriter(o, 0, 8, 0, '\t', 0)
+
+	for _, f := range frames {
+		fmt.Fprintf(tw, "  %v\t :%d\n", f.Fn, f.Line)
+	}
+	tw.Flush()
+	return o.Bytes()
 }
diff --git a/libgo/go/runtime/trace/trace_test.go b/libgo/go/runtime/trace/trace_test.go
index 997d486..fc81abc 100644
--- a/libgo/go/runtime/trace/trace_test.go
+++ b/libgo/go/runtime/trace/trace_test.go
@@ -31,6 +31,9 @@ func TestEventBatch(t *testing.T) {
 	if race.Enabled {
 		t.Skip("skipping in race mode")
 	}
+	if IsEnabled() {
+		t.Skip("skipping because -test.trace is set")
+	}
 	if testing.Short() {
 		t.Skip("skipping in short mode")
 	}
@@ -81,6 +84,9 @@ func TestEventBatch(t *testing.T) {
 }
 
 func TestTraceStartStop(t *testing.T) {
+	if IsEnabled() {
+		t.Skip("skipping because -test.trace is set")
+	}
 	buf := new(bytes.Buffer)
 	if err := Start(buf); err != nil {
 		t.Fatalf("failed to start tracing: %v", err)
@@ -98,6 +104,9 @@ func TestTraceStartStop(t *testing.T) {
 }
 
 func TestTraceDoubleStart(t *testing.T) {
+	if IsEnabled() {
+		t.Skip("skipping because -test.trace is set")
+	}
 	Stop()
 	buf := new(bytes.Buffer)
 	if err := Start(buf); err != nil {
@@ -111,6 +120,9 @@ func TestTraceDoubleStart(t *testing.T) {
 }
 
 func TestTrace(t *testing.T) {
+	if IsEnabled() {
+		t.Skip("skipping because -test.trace is set")
+	}
 	buf := new(bytes.Buffer)
 	if err := Start(buf); err != nil {
 		t.Fatalf("failed to start tracing: %v", err)
@@ -168,6 +180,12 @@ func testBrokenTimestamps(t *testing.T, data []byte) {
 }
 
 func TestTraceStress(t *testing.T) {
+	if runtime.GOOS == "js" {
+		t.Skip("no os.Pipe on js")
+	}
+	if IsEnabled() {
+		t.Skip("skipping because -test.trace is set")
+	}
 	var wg sync.WaitGroup
 	done := make(chan bool)
 
@@ -307,6 +325,12 @@ func TestTraceStress(t *testing.T) {
 // Do a bunch of various stuff (timers, GC, network, etc) in a separate goroutine.
 // And concurrently with all that start/stop trace 3 times.
 func TestTraceStressStartStop(t *testing.T) {
+	if runtime.GOOS == "js" {
+		t.Skip("no os.Pipe on js")
+	}
+	if IsEnabled() {
+		t.Skip("skipping because -test.trace is set")
+	}
 	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(8))
 	outerDone := make(chan bool)
 
@@ -454,6 +478,9 @@ func TestTraceStressStartStop(t *testing.T) {
 }
 
 func TestTraceFutileWakeup(t *testing.T) {
+	if IsEnabled() {
+		t.Skip("skipping because -test.trace is set")
+	}
 	buf := new(bytes.Buffer)
 	if err := Start(buf); err != nil {
 		t.Fatalf("failed to start tracing: %v", err)
diff --git a/libgo/go/runtime/traceback_gccgo.go b/libgo/go/runtime/traceback_gccgo.go
index 9456b1f..e97071e 100644
--- a/libgo/go/runtime/traceback_gccgo.go
+++ b/libgo/go/runtime/traceback_gccgo.go
@@ -141,8 +141,8 @@ func goroutineheader(gp *g) {
 	}
 
 	// Override.
-	if gpstatus == _Gwaiting && gp.waitreason != "" {
-		status = gp.waitreason
+	if gpstatus == _Gwaiting && gp.waitreason != waitReasonZero {
+		status = gp.waitreason.String()
 	}
 
 	// approx time the G is blocked, in minutes
diff --git a/libgo/go/runtime/type.go b/libgo/go/runtime/type.go
index 3c08f7e..8fd38c3 100644
--- a/libgo/go/runtime/type.go
+++ b/libgo/go/runtime/type.go
@@ -20,12 +20,30 @@ type _type struct {
 	hashfn  func(unsafe.Pointer, uintptr) uintptr
 	equalfn func(unsafe.Pointer, unsafe.Pointer) bool
 
-	gcdata *byte
-	string *string
+	gcdata  *byte
+	_string *string
 	*uncommontype
 	ptrToThis *_type
 }
 
+func (t *_type) string() string {
+	return *t._string
+}
+
+// pkgpath returns the path of the package where t was defined, if
+// available. This is not the same as the reflect package's PkgPath
+// method, in that it returns the package path for struct and interface
+// types, not just named types.
+func (t *_type) pkgpath() string {
+	if u := t.uncommontype; u != nil {
+		if u.pkgPath == nil {
+			return ""
+		}
+		return *u.pkgPath
+	}
+	return ""
+}
+
 // Return whether two type descriptors are equal.
 // This is gccgo-specific, as gccgo, unlike gc, permits multiple
 // independent descriptors for a single type.
@@ -38,7 +56,7 @@ func eqtype(t1, t2 *_type) bool {
 	case t1.kind != t2.kind || t1.hash != t2.hash:
 		return false
 	default:
-		return *t1.string == *t2.string
+		return t1.string() == t2.string()
 	}
 }
 
diff --git a/libgo/go/runtime/unaligned1.go b/libgo/go/runtime/unaligned1.go
index 2f5b63a..86e0df0 100644
--- a/libgo/go/runtime/unaligned1.go
+++ b/libgo/go/runtime/unaligned1.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build 386 amd64 amd64p32 arm64 ppc64 ppc64le s390x ppc s390 arm64be riscv64
+// +build 386 amd64 amd64p32 arm64 ppc64 ppc64le s390x wasm ppc s390 arm64be riscv64
 
 package runtime
 
diff --git a/libgo/go/runtime/utf8.go b/libgo/go/runtime/utf8.go
index e845451..0ba0dad 100644
--- a/libgo/go/runtime/utf8.go
+++ b/libgo/go/runtime/utf8.go
@@ -46,6 +46,15 @@ const (
 	hicb = 0xBF // 1011 1111
 )
 
+// countrunes returns the number of runes in s.
+func countrunes(s string) int {
+	n := 0
+	for range s {
+		n++
+	}
+	return n
+}
+
 // decoderune returns the non-ASCII rune at the start of
 // s[k:] and the index after the rune in s.
 //
diff --git a/libgo/go/runtime/vdso_none.go b/libgo/go/runtime/vdso_none.go
deleted file mode 100644
index fc21240..0000000
--- a/libgo/go/runtime/vdso_none.go
+++ /dev/null
@@ -1,11 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !linux
-// +build !darwin
-
-package runtime
-
-func sysargs(argc int32, argv **byte) {
-}
-- 
cgit v1.1