libgo: update to Go 1.11

Reviewed-on: https://go-review.googlesource.com/136435 gotools/: * Makefile.am (mostlyclean-local): Run chmod on check-go-dir to make sure it is writable. (check-go-tools): Likewise. (check-vet): Copy internal/objabi to check-vet-dir. * Makefile.in: Rebuild. From-SVN: r264546
author: Ian Lance Taylor <iant@golang.org> 2018-09-24 21:46:21 +0000
committer: Ian Lance Taylor <ian@gcc.gnu.org> 2018-09-24 21:46:21 +0000
commit: dd931d9b48647e898dc80927c532ae93cc09e192 (patch)
tree: 71be2295cd79b8a182f6130611658db8628772d5 /libgo/go/runtime
parent: 779d8a5ad09b01428726ea5a0e6c87bd9ac3c0e4 (diff)
download: gcc-dd931d9b48647e898dc80927c532ae93cc09e192.zip
gcc-dd931d9b48647e898dc80927c532ae93cc09e192.tar.gz
gcc-dd931d9b48647e898dc80927c532ae93cc09e192.tar.bz2
134 files changed, 6916 insertions, 3144 deletions
diff --git a/libgo/go/runtime/alg.go b/libgo/go/runtime/alg.go
index 7c98f1b..c6bc6b6 100644
--- a/libgo/go/runtime/alg.go
+++ b/libgo/go/runtime/alg.go
@@ -5,6 +5,7 @@
 package runtime
 
 import (
+	"internal/cpu"
 	"runtime/internal/sys"
 	"unsafe"
 )
@@ -137,7 +138,7 @@ func interhash(p unsafe.Pointer, h uintptr) uintptr {
 	t := *(**_type)(tab)
 	fn := t.hashfn
 	if fn == nil {
-		panic(errorString("hash of unhashable type " + *t.string))
+		panic(errorString("hash of unhashable type " + t.string()))
 	}
 	if isDirectIface(t) {
 		return c1 * fn(unsafe.Pointer(&a.data), h^c0)
@@ -154,7 +155,7 @@ func nilinterhash(p unsafe.Pointer, h uintptr) uintptr {
 	}
 	fn := t.hashfn
 	if fn == nil {
-		panic(errorString("hash of unhashable type " + *t.string))
+		panic(errorString("hash of unhashable type " + t.string()))
 	}
 	if isDirectIface(t) {
 		return c1 * fn(unsafe.Pointer(&a.data), h^c0)
@@ -212,7 +213,7 @@ func efaceeq(x, y eface) bool {
 	}
 	eq := t.equalfn
 	if eq == nil {
-		panic(errorString("comparing uncomparable type " + *t.string))
+		panic(errorString("comparing uncomparable type " + t.string()))
 	}
 	if isDirectIface(t) {
 		return x.data == y.data
@@ -233,7 +234,7 @@ func ifaceeq(x, y iface) bool {
 	}
 	eq := t.equalfn
 	if eq == nil {
-		panic(errorString("comparing uncomparable type " + *t.string))
+		panic(errorString("comparing uncomparable type " + t.string()))
 	}
 	if isDirectIface(t) {
 		return x.data == y.data
@@ -251,7 +252,7 @@ func ifacevaleq(x iface, t *_type, p unsafe.Pointer) bool {
 	}
 	eq := t.equalfn
 	if eq == nil {
-		panic(errorString("comparing uncomparable type " + *t.string))
+		panic(errorString("comparing uncomparable type " + t.string()))
 	}
 	if isDirectIface(t) {
 		return x.data == p
@@ -272,7 +273,7 @@ func ifaceefaceeq(x iface, y eface) bool {
 	}
 	eq := xt.equalfn
 	if eq == nil {
-		panic(errorString("comparing uncomparable type " + *xt.string))
+		panic(errorString("comparing uncomparable type " + xt.string()))
 	}
 	if isDirectIface(xt) {
 		return x.data == y.data
@@ -289,7 +290,7 @@ func efacevaleq(x eface, t *_type, p unsafe.Pointer) bool {
 	}
 	eq := t.equalfn
 	if eq == nil {
-		panic(errorString("comparing uncomparable type " + *t.string))
+		panic(errorString("comparing uncomparable type " + t.string()))
 	}
 	if isDirectIface(t) {
 		return x.data == p
@@ -388,23 +389,25 @@ func ifaceHash(i interface {
 
 const hashRandomBytes = sys.PtrSize / 4 * 64
 
-// used in asm_{386,amd64}.s to seed the hash function
+// used in asm_{386,amd64,arm64}.s to seed the hash function
 var aeskeysched [hashRandomBytes]byte
 
 // used in hash{32,64}.go to seed the hash function
 var hashkey [4]uintptr
 
 func alginit() {
-	// Install aes hash algorithm if we have the instructions we need
+	// Install AES hash algorithms if the instructions needed are present.
 	if (GOARCH == "386" || GOARCH == "amd64") &&
 		GOOS != "nacl" &&
 		support_aes &&
-		cpuid_ecx&(1<<25) != 0 && // aes (aesenc)
-		cpuid_ecx&(1<<9) != 0 && // sse3 (pshufb)
-		cpuid_ecx&(1<<19) != 0 { // sse4.1 (pinsr{d,q})
-		useAeshash = true
-		// Initialize with random data so hash collisions will be hard to engineer.
-		getRandomData(aeskeysched[:])
+		cpu.X86.HasAES && // AESENC
+		cpu.X86.HasSSSE3 && // PSHUFB
+		cpu.X86.HasSSE41 { // PINSR{D,Q}
+		initAlgAES()
+		return
+	}
+	if GOARCH == "arm64" && cpu.ARM64.HasAES {
+		initAlgAES()
 		return
 	}
 	getRandomData((*[len(hashkey) * sys.PtrSize]byte)(unsafe.Pointer(&hashkey))[:])
@@ -413,3 +416,9 @@ func alginit() {
 	hashkey[2] |= 1
 	hashkey[3] |= 1
 }
+
+func initAlgAES() {
+	useAeshash = true
+	// Initialize with random data so hash collisions will be hard to engineer.
+	getRandomData(aeskeysched[:])
+}
diff --git a/libgo/go/runtime/atomic_pointer.go b/libgo/go/runtime/atomic_pointer.go
index b66ef58..2d023d3 100644
--- a/libgo/go/runtime/atomic_pointer.go
+++ b/libgo/go/runtime/atomic_pointer.go
@@ -16,11 +16,24 @@ import (
 // Instead, these are wrappers around the actual atomics (casp1 and so on)
 // that use noescape to convey which arguments do not escape.
 
+// atomicwb performs a write barrier before an atomic pointer write.
+// The caller should guard the call with "if writeBarrier.enabled".
+//
+//go:nosplit
+func atomicwb(ptr *unsafe.Pointer, new unsafe.Pointer) {
+	slot := (*uintptr)(unsafe.Pointer(ptr))
+	if !getg().m.p.ptr().wbBuf.putFast(*slot, uintptr(new)) {
+		wbBufFlush(slot, uintptr(new))
+	}
+}
+
 // atomicstorep performs *ptr = new atomically and invokes a write barrier.
 //
 //go:nosplit
 func atomicstorep(ptr unsafe.Pointer, new unsafe.Pointer) {
-	writebarrierptr_prewrite((*uintptr)(ptr), uintptr(new))
+	if writeBarrier.enabled {
+		atomicwb((*unsafe.Pointer)(ptr), new)
+	}
 	atomic.StorepNoWB(noescape(ptr), new)
 }
 
@@ -29,7 +42,9 @@ func casp(ptr *unsafe.Pointer, old, new unsafe.Pointer) bool {
 	// The write barrier is only necessary if the CAS succeeds,
 	// but since it needs to happen before the write becomes
 	// public, we have to do it conservatively all the time.
-	writebarrierptr_prewrite((*uintptr)(unsafe.Pointer(ptr)), uintptr(new))
+	if writeBarrier.enabled {
+		atomicwb(ptr, new)
+	}
 	return atomic.Casp1((*unsafe.Pointer)(noescape(unsafe.Pointer(ptr))), noescape(old), new)
 }
 
@@ -43,7 +58,9 @@ func sync_atomic_StoreUintptr(ptr *uintptr, new uintptr)
 //go:linkname sync_atomic_StorePointer sync_atomic.StorePointer
 //go:nosplit
 func sync_atomic_StorePointer(ptr *unsafe.Pointer, new unsafe.Pointer) {
-	writebarrierptr_prewrite((*uintptr)(unsafe.Pointer(ptr)), uintptr(new))
+	if writeBarrier.enabled {
+		atomicwb(ptr, new)
+	}
 	sync_atomic_StoreUintptr((*uintptr)(unsafe.Pointer(ptr)), uintptr(new))
 }
 
@@ -53,7 +70,9 @@ func sync_atomic_SwapUintptr(ptr *uintptr, new uintptr) uintptr
 //go:linkname sync_atomic_SwapPointer sync_atomic.SwapPointer
 //go:nosplit
 func sync_atomic_SwapPointer(ptr *unsafe.Pointer, new unsafe.Pointer) unsafe.Pointer {
-	writebarrierptr_prewrite((*uintptr)(unsafe.Pointer(ptr)), uintptr(new))
+	if writeBarrier.enabled {
+		atomicwb(ptr, new)
+	}
 	old := unsafe.Pointer(sync_atomic_SwapUintptr((*uintptr)(noescape(unsafe.Pointer(ptr))), uintptr(new)))
 	return old
 }
@@ -64,6 +83,8 @@ func sync_atomic_CompareAndSwapUintptr(ptr *uintptr, old, new uintptr) bool
 //go:linkname sync_atomic_CompareAndSwapPointer sync_atomic.CompareAndSwapPointer
 //go:nosplit
 func sync_atomic_CompareAndSwapPointer(ptr *unsafe.Pointer, old, new unsafe.Pointer) bool {
-	writebarrierptr_prewrite((*uintptr)(unsafe.Pointer(ptr)), uintptr(new))
+	if writeBarrier.enabled {
+		atomicwb(ptr, new)
+	}
 	return sync_atomic_CompareAndSwapUintptr((*uintptr)(noescape(unsafe.Pointer(ptr))), uintptr(old), uintptr(new))
 }
diff --git a/libgo/go/runtime/vdso_none.go b/libgo/go/runtime/auxv_none.go
index fc21240..3ca617b 100644
--- a/libgo/go/runtime/vdso_none.go
+++ b/libgo/go/runtime/auxv_none.go
@@ -4,6 +4,10 @@
 
 // +build !linux
 // +build !darwin
+// +build !dragonfly
+// +build !freebsd
+// +build !netbsd
+// +build !solaris
 
 package runtime
 
diff --git a/libgo/go/runtime/cgocall.go b/libgo/go/runtime/cgocall.go
index 67b2bce..24bf749 100644
--- a/libgo/go/runtime/cgocall.go
+++ b/libgo/go/runtime/cgocall.go
@@ -212,22 +212,13 @@ func cgoCheckArg(t *_type, p unsafe.Pointer, indir, top bool, msg string) {
 // pointer into Go memory. If it does, we panic.
 // The return values are unused but useful to see in panic tracebacks.
 func cgoCheckUnknownPointer(p unsafe.Pointer, msg string) (base, i uintptr) {
-	if cgoInRange(p, mheap_.arena_start, mheap_.arena_used) {
-		if !inheap(uintptr(p)) {
-			// On 32-bit systems it is possible for C's allocated memory
-			// to have addresses between arena_start and arena_used.
-			// Either this pointer is a stack or an unused span or it's
-			// a C allocation. Escape analysis should prevent the first,
-			// garbage collection should prevent the second,
-			// and the third is completely OK.
-			return
-		}
-
-		b, hbits, span, _ := heapBitsForObject(uintptr(p), 0, 0, false)
+	if inheap(uintptr(p)) {
+		b, span, _ := findObject(uintptr(p), 0, 0, false)
 		base = b
 		if base == 0 {
 			return
 		}
+		hbits := heapBitsForAddr(base)
 		n := span.elemsize
 		for i = uintptr(0); i < n; i += sys.PtrSize {
 			if i != 1*sys.PtrSize && !hbits.morePointers() {
diff --git a/libgo/go/runtime/cgocheck.go b/libgo/go/runtime/cgocheck.go
index b85b519..d896fb7 100644
--- a/libgo/go/runtime/cgocheck.go
+++ b/libgo/go/runtime/cgocheck.go
@@ -126,9 +126,7 @@ func cgoCheckTypedBlock(typ *_type, src unsafe.Pointer, off, size uintptr) {
 		roots = roots.next
 	}
 
-	aoff := uintptr(src) - mheap_.arena_start
-	idx := aoff >> _PageShift
-	s := mheap_.spans[idx]
+	s := spanOfUnchecked(uintptr(src))
 	if s.state == _MSpanManual {
 		// There are no heap bits for value stored on the stack.
 		// For a channel receive src might be on the stack of some
@@ -151,9 +149,7 @@ func cgoCheckTypedBlock(typ *_type, src unsafe.Pointer, off, size uintptr) {
 		if i >= off && bits&bitPointer != 0 {
 			v := *(*unsafe.Pointer)(add(src, i))
 			if cgoIsGoPointer(v) {
-				systemstack(func() {
-					throw(cgoWriteBarrierFail)
-				})
+				throw(cgoWriteBarrierFail)
 			}
 		}
 		hbits = hbits.next()
@@ -186,9 +182,7 @@ func cgoCheckBits(src unsafe.Pointer, gcbits *byte, off, size uintptr) {
 			if bits&1 != 0 {
 				v := *(*unsafe.Pointer)(add(src, i))
 				if cgoIsGoPointer(v) {
-					systemstack(func() {
-						throw(cgoWriteBarrierFail)
-					})
+					throw(cgoWriteBarrierFail)
 				}
 			}
 		}
diff --git a/libgo/go/runtime/chan.go b/libgo/go/runtime/chan.go
index 87f7879..88a8944 100644
--- a/libgo/go/runtime/chan.go
+++ b/libgo/go/runtime/chan.go
@@ -88,7 +88,7 @@ func makechan(t *chantype, size int) *hchan {
 		throw("makechan: bad alignment")
 	}
 
-	if size < 0 || uintptr(size) > maxSliceCap(elem.size) || uintptr(size)*elem.size > _MaxMem-hchanSize {
+	if size < 0 || uintptr(size) > maxSliceCap(elem.size) || uintptr(size)*elem.size > maxAlloc-hchanSize {
 		panic(plainError("makechan: size out of range"))
 	}
 
@@ -157,7 +157,7 @@ func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool {
 		if !block {
 			return false
 		}
-		gopark(nil, nil, "chan send (nil chan)", traceEvGoStop, 2)
+		gopark(nil, nil, waitReasonChanSendNilChan, traceEvGoStop, 2)
 		throw("unreachable")
 	}
 
@@ -246,7 +246,7 @@ func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool {
 	gp.waiting = mysg
 	gp.param = nil
 	c.sendq.enqueue(mysg)
-	goparkunlock(&c.lock, "chan send", traceEvGoBlockSend, 3)
+	goparkunlock(&c.lock, waitReasonChanSend, traceEvGoBlockSend, 3)
 
 	// someone woke us up.
 	if mysg != gp.waiting {
@@ -325,6 +325,8 @@ func sendDirect(t *_type, sg *sudog, src unsafe.Pointer) {
 	// So make sure that no preemption points can happen between read & use.
 	dst := sg.elem
 	typeBitsBulkBarrier(t, uintptr(dst), uintptr(src), t.size)
+	// No need for cgo write barrier checks because dst is always
+	// Go memory.
 	memmove(dst, src, t.size)
 }
 
@@ -444,7 +446,7 @@ func chanrecv(c *hchan, ep unsafe.Pointer, block bool) (selected, received bool)
 		if !block {
 			return
 		}
-		gopark(nil, nil, "chan receive (nil chan)", traceEvGoStop, 2)
+		gopark(nil, nil, waitReasonChanReceiveNilChan, traceEvGoStop, 2)
 		throw("unreachable")
 	}
 
@@ -535,7 +537,7 @@ func chanrecv(c *hchan, ep unsafe.Pointer, block bool) (selected, received bool)
 	mysg.c = c
 	gp.param = nil
 	c.recvq.enqueue(mysg)
-	goparkunlock(&c.lock, "chan receive", traceEvGoBlockRecv, 3)
+	goparkunlock(&c.lock, waitReasonChanReceive, traceEvGoBlockRecv, 3)
 
 	// someone woke us up
 	if mysg != gp.waiting {
diff --git a/libgo/go/runtime/chanbarrier_test.go b/libgo/go/runtime/chanbarrier_test.go
index b6029fb..d479574 100644
--- a/libgo/go/runtime/chanbarrier_test.go
+++ b/libgo/go/runtime/chanbarrier_test.go
@@ -57,7 +57,7 @@ func testChanSendBarrier(useSelect bool) {
 	var globalMu sync.Mutex
 	outer := 100
 	inner := 100000
-	if testing.Short() {
+	if testing.Short() || runtime.GOARCH == "wasm" {
 		outer = 10
 		inner = 1000
 	}
diff --git a/libgo/go/runtime/cputicks.go b/libgo/go/runtime/cputicks.go
index 7e62dc1..c41a58b 100644
--- a/libgo/go/runtime/cputicks.go
+++ b/libgo/go/runtime/cputicks.go
@@ -2,6 +2,14 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+// // +build !arm
+// // +build !arm64
+// // +build !mips64
+// // +build !mips64le
+// // +build !mips
+// // +build !mipsle
+// // +build !wasm
+
 package runtime
 
 // careful: cputicks is not guaranteed to be monotonic! In particular, we have
diff --git a/libgo/go/runtime/crash_cgo_test.go b/libgo/go/runtime/crash_cgo_test.go
index 6688b3c..770f85e 100644
--- a/libgo/go/runtime/crash_cgo_test.go
+++ b/libgo/go/runtime/crash_cgo_test.go
@@ -89,19 +89,6 @@ func TestCgoExternalThreadSIGPROF(t *testing.T) {
 	switch runtime.GOOS {
 	case "plan9", "windows":
 		t.Skipf("no pthreads on %s", runtime.GOOS)
-	case "darwin":
-		if runtime.GOARCH != "arm" && runtime.GOARCH != "arm64" {
-			// static constructor needs external linking, but we don't support
-			// external linking on OS X 10.6.
-			out, err := exec.Command("uname", "-r").Output()
-			if err != nil {
-				t.Fatalf("uname -r failed: %v", err)
-			}
-			// OS X 10.6 == Darwin 10.x
-			if strings.HasPrefix(string(out), "10.") {
-				t.Skipf("no external linking on OS X 10.6")
-			}
-		}
 	}
 	if runtime.GOARCH == "ppc64" {
 		// TODO(austin) External linking not implemented on
@@ -252,8 +239,12 @@ func TestCgoCCodeSIGPROF(t *testing.T) {
 
 func TestCgoCrashTraceback(t *testing.T) {
 	t.Parallel()
-	if runtime.GOOS != "linux" || (runtime.GOARCH != "amd64" && runtime.GOARCH != "ppc64le") {
-		t.Skipf("not yet supported on %s/%s", runtime.GOOS, runtime.GOARCH)
+	switch platform := runtime.GOOS + "/" + runtime.GOARCH; platform {
+	case "darwin/amd64":
+	case "linux/amd64":
+	case "linux/ppc64le":
+	default:
+		t.Skipf("not yet supported on %s", platform)
 	}
 	if runtime.Compiler == "gccgo" {
 		t.Skip("gccgo does not have SetCgoTraceback")
@@ -352,7 +343,7 @@ func TestCgoPprofThreadNoTraceback(t *testing.T) {
 }
 
 func TestRaceProf(t *testing.T) {
-	if runtime.GOOS != "linux" || runtime.GOARCH != "amd64" {
+	if (runtime.GOOS != "linux" && runtime.GOOS != "freebsd") || runtime.GOARCH != "amd64" {
 		t.Skipf("not yet supported on %s/%s", runtime.GOOS, runtime.GOARCH)
 	}
 	if runtime.Compiler == "gccgo" {
@@ -384,7 +375,7 @@ func TestRaceProf(t *testing.T) {
 
 func TestRaceSignal(t *testing.T) {
 	t.Parallel()
-	if runtime.GOOS != "linux" || runtime.GOARCH != "amd64" {
+	if (runtime.GOOS != "linux" && runtime.GOOS != "freebsd") || runtime.GOARCH != "amd64" {
 		t.Skipf("not yet supported on %s/%s", runtime.GOOS, runtime.GOARCH)
 	}
 
@@ -514,3 +505,19 @@ func TestCgoTracebackSigpanic(t *testing.T) {
 		t.Fatalf("failure incorrectly contains %q. output:\n%s\n", nowant, got)
 	}
 }
+
+// Test that C code called via cgo can use large Windows thread stacks
+// and call back in to Go without crashing. See issue #20975.
+//
+// See also TestBigStackCallbackSyscall.
+func TestBigStackCallbackCgo(t *testing.T) {
+	if runtime.GOOS != "windows" {
+		t.Skip("skipping windows specific test")
+	}
+	t.Parallel()
+	got := runTestProg(t, "testprogcgo", "BigStack")
+	want := "OK\n"
+	if got != want {
+		t.Errorf("expected %q got %v", want, got)
+	}
+}
diff --git a/libgo/go/runtime/crash_gccgo_test.go b/libgo/go/runtime/crash_gccgo_test.go
index c216e54..d4a826e 100644
--- a/libgo/go/runtime/crash_gccgo_test.go
+++ b/libgo/go/runtime/crash_gccgo_test.go
@@ -38,8 +38,8 @@ func TestGccgoCrashTracebackNodebug(t *testing.T) {
 	}
 
 	cc := strings.Fields(os.Getenv("CC"))
-	cc = append(cc, "-x", "c++", "-")
-	out, _ := exec.Command(cc[0], cc[1:]...).CombinedOutput()
+	cc = append(cc, "-o", os.DevNull, "-x", "c++", "-")
+	out, _ := testenv.CleanCmdEnv(exec.Command(cc[0], cc[1:]...)).CombinedOutput()
 	if bytes.Contains(out, []byte("error trying to exec 'cc1plus'")) {
 		t.Skip("no C++ compiler")
 	}
diff --git a/libgo/go/runtime/crash_nonunix_test.go b/libgo/go/runtime/crash_nonunix_test.go
index 2ce995c..bf349a5 100644
--- a/libgo/go/runtime/crash_nonunix_test.go
+++ b/libgo/go/runtime/crash_nonunix_test.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build windows plan9 nacl
+// +build windows plan9 nacl js,wasm
 
 package runtime_test
 
diff --git a/libgo/go/runtime/crash_test.go b/libgo/go/runtime/crash_test.go
index 602630d..91a5c16 100644
--- a/libgo/go/runtime/crash_test.go
+++ b/libgo/go/runtime/crash_test.go
@@ -652,3 +652,112 @@ func TestBadTraceback(t *testing.T) {
 		}
 	}
 }
+
+func TestTimePprof(t *testing.T) {
+	if runtime.Compiler == "gccgo" {
+		t.Skip("gccgo may not have the pprof tool")
+	}
+	fn := runTestProg(t, "testprog", "TimeProf")
+	fn = strings.TrimSpace(fn)
+	defer os.Remove(fn)
+
+	cmd := testenv.CleanCmdEnv(exec.Command(testenv.GoToolPath(t), "tool", "pprof", "-top", "-nodecount=1", fn))
+	cmd.Env = append(cmd.Env, "PPROF_TMPDIR="+os.TempDir())
+	top, err := cmd.CombinedOutput()
+	t.Logf("%s", top)
+	if err != nil {
+		t.Error(err)
+	} else if bytes.Contains(top, []byte("ExternalCode")) {
+		t.Error("profiler refers to ExternalCode")
+	}
+}
+
+// Test that runtime.abort does so.
+func TestAbort(t *testing.T) {
+	// Pass GOTRACEBACK to ensure we get runtime frames.
+	output := runTestProg(t, "testprog", "Abort", "GOTRACEBACK=system")
+	if want := "runtime.abort"; !strings.Contains(output, want) {
+		t.Errorf("output does not contain %q:\n%s", want, output)
+	}
+	if strings.Contains(output, "BAD") {
+		t.Errorf("output contains BAD:\n%s", output)
+	}
+	// Check that it's a signal traceback.
+	want := "PC="
+	// For systems that use a breakpoint, check specifically for that.
+	if runtime.Compiler == "gc" {
+		switch runtime.GOARCH {
+		case "386", "amd64":
+			switch runtime.GOOS {
+			case "plan9":
+				want = "sys: breakpoint"
+			case "windows":
+				want = "Exception 0x80000003"
+			default:
+				want = "SIGTRAP"
+			}
+		}
+	}
+	if !strings.Contains(output, want) {
+		t.Errorf("output does not contain %q:\n%s", want, output)
+	}
+}
+
+// For TestRuntimePanic: test a panic in the runtime package without
+// involving the testing harness.
+func init() {
+	if os.Getenv("GO_TEST_RUNTIME_PANIC") == "1" {
+		defer func() {
+			if r := recover(); r != nil {
+				// We expect to crash, so exit 0
+				// to indicate failure.
+				os.Exit(0)
+			}
+		}()
+		runtime.PanicForTesting(nil, 1)
+		// We expect to crash, so exit 0 to indicate failure.
+		os.Exit(0)
+	}
+}
+
+func TestRuntimePanic(t *testing.T) {
+	testenv.MustHaveExec(t)
+	cmd := testenv.CleanCmdEnv(exec.Command(os.Args[0], "-test.run=TestRuntimePanic"))
+	cmd.Env = append(cmd.Env, "GO_TEST_RUNTIME_PANIC=1")
+	out, err := cmd.CombinedOutput()
+	t.Logf("%s", out)
+	if err == nil {
+		t.Error("child process did not fail")
+	} else if want := "runtime.unexportedPanicForTesting"; !bytes.Contains(out, []byte(want)) {
+		t.Errorf("output did not contain expected string %q", want)
+	}
+}
+
+// Test that g0 stack overflows are handled gracefully.
+func TestG0StackOverflow(t *testing.T) {
+	testenv.MustHaveExec(t)
+
+	switch runtime.GOOS {
+	case "darwin", "dragonfly", "freebsd", "linux", "netbsd", "openbsd", "android":
+		t.Skipf("g0 stack is wrong on pthread platforms (see golang.org/issue/26061)")
+	}
+
+	if os.Getenv("TEST_G0_STACK_OVERFLOW") != "1" {
+		cmd := testenv.CleanCmdEnv(exec.Command(os.Args[0], "-test.run=TestG0StackOverflow", "-test.v"))
+		cmd.Env = append(cmd.Env, "TEST_G0_STACK_OVERFLOW=1")
+		out, err := cmd.CombinedOutput()
+		// Don't check err since it's expected to crash.
+		if n := strings.Count(string(out), "morestack on g0\n"); n != 1 {
+			t.Fatalf("%s\n(exit status %v)", out, err)
+		}
+		// Check that it's a signal-style traceback.
+		if runtime.GOOS != "windows" {
+			if want := "PC="; !strings.Contains(string(out), want) {
+				t.Errorf("output does not contain %q:\n%s", want, out)
+			}
+		}
+		return
+	}
+
+	runtime.G0StackOverflow()
+}
diff --git a/libgo/go/runtime/debug.go b/libgo/go/runtime/debug.go
index 7cddd29..5be2ec4 100644
--- a/libgo/go/runtime/debug.go
+++ b/libgo/go/runtime/debug.go
@@ -15,6 +15,10 @@ import (
 // The number of logical CPUs on the local machine can be queried with NumCPU.
 // This call will go away when the scheduler improves.
 func GOMAXPROCS(n int) int {
+	if GOARCH == "wasm" && n > 1 {
+		n = 1 // WebAssembly has no threads yet, so only one CPU is possible.
+	}
+
 	lock(&sched.lock)
 	ret := int(gomaxprocs)
 	unlock(&sched.lock)
diff --git a/libgo/go/runtime/debug/heapdump_test.go b/libgo/go/runtime/debug/heapdump_test.go
index 7d5b950..c986efc 100644
--- a/libgo/go/runtime/debug/heapdump_test.go
+++ b/libgo/go/runtime/debug/heapdump_test.go
@@ -13,8 +13,8 @@ import (
 )
 
 func TestWriteHeapDumpNonempty(t *testing.T) {
-	if runtime.GOOS == "nacl" {
-		t.Skip("WriteHeapDump is not available on NaCl.")
+	if runtime.GOOS == "nacl" || runtime.GOOS == "js" {
+		t.Skipf("WriteHeapDump is not available on %s.", runtime.GOOS)
 	}
 	f, err := ioutil.TempFile("", "heapdumptest")
 	if err != nil {
@@ -42,8 +42,8 @@ func objfin(x *Obj) {
 }
 
 func TestWriteHeapDumpFinalizers(t *testing.T) {
-	if runtime.GOOS == "nacl" {
-		t.Skip("WriteHeapDump is not available on NaCl.")
+	if runtime.GOOS == "nacl" || runtime.GOOS == "js" {
+		t.Skipf("WriteHeapDump is not available on %s.", runtime.GOOS)
 	}
 	f, err := ioutil.TempFile("", "heapdumptest")
 	if err != nil {
diff --git a/libgo/go/runtime/debug_test.go b/libgo/go/runtime/debug_test.go
new file mode 100644
index 0000000..38c764f
--- /dev/null
+++ b/libgo/go/runtime/debug_test.go
@@ -0,0 +1,207 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// TODO: This test could be implemented on all (most?) UNIXes if we
+// added syscall.Tgkill more widely.
+
+// We skip all of these tests under race mode because our test thread
+// spends all of its time in the race runtime, which isn't a safe
+// point.
+
+// +build ignore_for_gccgo
+// +build amd64
+// +build linux
+// +build !race
+
+package runtime_test
+
+import (
+	"fmt"
+	"runtime"
+	"runtime/debug"
+	"sync/atomic"
+	"syscall"
+	"testing"
+)
+
+func startDebugCallWorker(t *testing.T) (g *runtime.G, after func()) {
+	// This can deadlock if there aren't enough threads or if a GC
+	// tries to interrupt an atomic loop (see issue #10958).
+	ogomaxprocs := runtime.GOMAXPROCS(2)
+	ogcpercent := debug.SetGCPercent(-1)
+
+	ready := make(chan *runtime.G)
+	var stop uint32
+	done := make(chan error)
+	go debugCallWorker(ready, &stop, done)
+	g = <-ready
+	return g, func() {
+		atomic.StoreUint32(&stop, 1)
+		err := <-done
+		if err != nil {
+			t.Fatal(err)
+		}
+		runtime.GOMAXPROCS(ogomaxprocs)
+		debug.SetGCPercent(ogcpercent)
+	}
+}
+
+func debugCallWorker(ready chan<- *runtime.G, stop *uint32, done chan<- error) {
+	runtime.LockOSThread()
+	defer runtime.UnlockOSThread()
+
+	ready <- runtime.Getg()
+
+	x := 2
+	debugCallWorker2(stop, &x)
+	if x != 1 {
+		done <- fmt.Errorf("want x = 2, got %d; register pointer not adjusted?", x)
+	}
+	close(done)
+}
+
+func debugCallWorker2(stop *uint32, x *int) {
+	for atomic.LoadUint32(stop) == 0 {
+		// Strongly encourage x to live in a register so we
+		// can test pointer register adjustment.
+		*x++
+	}
+	*x = 1
+}
+
+func debugCallTKill(tid int) error {
+	return syscall.Tgkill(syscall.Getpid(), tid, syscall.SIGTRAP)
+}
+
+func TestDebugCall(t *testing.T) {
+	g, after := startDebugCallWorker(t)
+	defer after()
+
+	// Inject a call into the debugCallWorker goroutine and test
+	// basic argument and result passing.
+	var args struct {
+		x    int
+		yRet int
+	}
+	fn := func(x int) (yRet int) {
+		return x + 1
+	}
+	args.x = 42
+	if _, err := runtime.InjectDebugCall(g, fn, &args, debugCallTKill); err != nil {
+		t.Fatal(err)
+	}
+	if args.yRet != 43 {
+		t.Fatalf("want 43, got %d", args.yRet)
+	}
+}
+
+func TestDebugCallLarge(t *testing.T) {
+	g, after := startDebugCallWorker(t)
+	defer after()
+
+	// Inject a call with a large call frame.
+	const N = 128
+	var args struct {
+		in  [N]int
+		out [N]int
+	}
+	fn := func(in [N]int) (out [N]int) {
+		for i := range in {
+			out[i] = in[i] + 1
+		}
+		return
+	}
+	var want [N]int
+	for i := range args.in {
+		args.in[i] = i
+		want[i] = i + 1
+	}
+	if _, err := runtime.InjectDebugCall(g, fn, &args, debugCallTKill); err != nil {
+		t.Fatal(err)
+	}
+	if want != args.out {
+		t.Fatalf("want %v, got %v", want, args.out)
+	}
+}
+
+func TestDebugCallGC(t *testing.T) {
+	g, after := startDebugCallWorker(t)
+	defer after()
+
+	// Inject a call that performs a GC.
+	if _, err := runtime.InjectDebugCall(g, runtime.GC, nil, debugCallTKill); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestDebugCallGrowStack(t *testing.T) {
+	g, after := startDebugCallWorker(t)
+	defer after()
+
+	// Inject a call that grows the stack. debugCallWorker checks
+	// for stack pointer breakage.
+	if _, err := runtime.InjectDebugCall(g, func() { growStack(nil) }, nil, debugCallTKill); err != nil {
+		t.Fatal(err)
+	}
+}
+
+//go:nosplit
+func debugCallUnsafePointWorker(gpp **runtime.G, ready, stop *uint32) {
+	// The nosplit causes this function to not contain safe-points
+	// except at calls.
+	runtime.LockOSThread()
+	defer runtime.UnlockOSThread()
+
+	*gpp = runtime.Getg()
+
+	for atomic.LoadUint32(stop) == 0 {
+		atomic.StoreUint32(ready, 1)
+	}
+}
+
+func TestDebugCallUnsafePoint(t *testing.T) {
+	// This can deadlock if there aren't enough threads or if a GC
+	// tries to interrupt an atomic loop (see issue #10958).
+	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(2))
+	defer debug.SetGCPercent(debug.SetGCPercent(-1))
+
+	// Test that the runtime refuses call injection at unsafe points.
+	var g *runtime.G
+	var ready, stop uint32
+	defer atomic.StoreUint32(&stop, 1)
+	go debugCallUnsafePointWorker(&g, &ready, &stop)
+	for atomic.LoadUint32(&ready) == 0 {
+		runtime.Gosched()
+	}
+
+	_, err := runtime.InjectDebugCall(g, func() {}, nil, debugCallTKill)
+	if msg := "call not at safe point"; err == nil || err.Error() != msg {
+		t.Fatalf("want %q, got %s", msg, err)
+	}
+}
+
+func TestDebugCallPanic(t *testing.T) {
+	// This can deadlock if there aren't enough threads.
+	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(2))
+
+	ready := make(chan *runtime.G)
+	var stop uint32
+	defer atomic.StoreUint32(&stop, 1)
+	go func() {
+		runtime.LockOSThread()
+		defer runtime.UnlockOSThread()
+		ready <- runtime.Getg()
+		for atomic.LoadUint32(&stop) == 0 {
+		}
+	}()
+	g := <-ready
+
+	p, err := runtime.InjectDebugCall(g, func() { panic("test") }, nil, debugCallTKill)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if ps, ok := p.(string); !ok || ps != "test" {
+		t.Fatalf("wanted panic %v, got %v", "test", p)
+	}
+}
diff --git a/libgo/go/runtime/env_posix.go b/libgo/go/runtime/env_posix.go
index ddf3c02..399e88f 100644
--- a/libgo/go/runtime/env_posix.go
+++ b/libgo/go/runtime/env_posix.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows
+// +build aix darwin dragonfly freebsd js,wasm linux nacl netbsd openbsd solaris windows
 
 package runtime
 
diff --git a/libgo/go/runtime/error.go b/libgo/go/runtime/error.go
index 1a038cf..b1a3f68 100644
--- a/libgo/go/runtime/error.go
+++ b/libgo/go/runtime/error.go
@@ -19,55 +19,40 @@ type Error interface {
 
 // A TypeAssertionError explains a failed type assertion.
 type TypeAssertionError struct {
-	interfaceString string
-	concreteString  string
-	assertedString  string
-	missingMethod   string // one method needed by Interface, missing from Concrete
+	_interface    *_type
+	concrete      *_type
+	asserted      *_type
+	missingMethod string // one method needed by Interface, missing from Concrete
 }
 
 func (*TypeAssertionError) RuntimeError() {}
 
 func (e *TypeAssertionError) Error() string {
-	inter := e.interfaceString
-	if inter == "" {
-		inter = "interface"
+	inter := "interface"
+	if e._interface != nil {
+		inter = e._interface.string()
 	}
-	if e.concreteString == "" {
-		return "interface conversion: " + inter + " is nil, not " + e.assertedString
+	as := e.asserted.string()
+	if e.concrete == nil {
+		return "interface conversion: " + inter + " is nil, not " + as
 	}
+	cs := e.concrete.string()
 	if e.missingMethod == "" {
-		return "interface conversion: " + inter + " is " + e.concreteString +
-			", not " + e.assertedString
+		msg := "interface conversion: " + inter + " is " + cs + ", not " + as
+		if cs == as {
+			// provide slightly clearer error message
+			if e.concrete.pkgpath() != e.asserted.pkgpath() {
+				msg += " (types from different packages)"
+			} else {
+				msg += " (types from different scopes)"
+			}
+		}
+		return msg
 	}
-	return "interface conversion: " + e.concreteString + " is not " + e.assertedString +
+	return "interface conversion: " + cs + " is not " + as +
 		": missing method " + e.missingMethod
 }
 
-// For calling from C.
-func NewTypeAssertionError(ps1, ps2, ps3 *string, pmeth *string, ret *interface{}) {
-	var s1, s2, s3, meth string
-
-	if ps1 != nil {
-		s1 = *ps1
-	}
-	if ps2 != nil {
-		s2 = *ps2
-	}
-	if ps3 != nil {
-		s3 = *ps3
-	}
-	if pmeth != nil {
-		meth = *pmeth
-	}
-
-	// For gccgo, strip out quoted strings.
-	s1 = unquote(s1)
-	s2 = unquote(s2)
-	s3 = unquote(s3)
-
-	*ret = &TypeAssertionError{s1, s2, s3, meth}
-}
-
 // Remove quoted strings from gccgo reflection strings.
 func unquote(s string) string {
 	ls := len(s)
@@ -135,7 +120,7 @@ type stringer interface {
 
 func typestring(x interface{}) string {
 	e := efaceOf(&x)
-	return *e._type.string
+	return e._type.string()
 }
 
 // printany prints an argument passed to panic.
diff --git a/libgo/go/runtime/export_debug_test.go b/libgo/go/runtime/export_debug_test.go
new file mode 100644
index 0000000..2d2d535
--- /dev/null
+++ b/libgo/go/runtime/export_debug_test.go
@@ -0,0 +1,169 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore_for_gccgo
+// +build amd64
+// +build linux
+
+package runtime
+
+import (
+	"runtime/internal/sys"
+	"unsafe"
+)
+
+// InjectDebugCall injects a debugger call to fn into g. args must be
+// a pointer to a valid call frame (including arguments and return
+// space) for fn, or nil. tkill must be a function that will send
+// SIGTRAP to thread ID tid. gp must be locked to its OS thread and
+// running.
+//
+// On success, InjectDebugCall returns the panic value of fn or nil.
+// If fn did not panic, its results will be available in args.
+func InjectDebugCall(gp *g, fn, args interface{}, tkill func(tid int) error) (interface{}, error) {
+	if gp.lockedm == 0 {
+		return nil, plainError("goroutine not locked to thread")
+	}
+
+	tid := int(gp.lockedm.ptr().procid)
+	if tid == 0 {
+		return nil, plainError("missing tid")
+	}
+
+	f := efaceOf(&fn)
+	if f._type == nil || f._type.kind&kindMask != kindFunc {
+		return nil, plainError("fn must be a function")
+	}
+	fv := (*funcval)(f.data)
+
+	a := efaceOf(&args)
+	if a._type != nil && a._type.kind&kindMask != kindPtr {
+		return nil, plainError("args must be a pointer or nil")
+	}
+	argp := a.data
+	var argSize uintptr
+	if argp != nil {
+		argSize = (*ptrtype)(unsafe.Pointer(a._type)).elem.size
+	}
+
+	h := new(debugCallHandler)
+	h.gp = gp
+	h.fv, h.argp, h.argSize = fv, argp, argSize
+	h.handleF = h.handle // Avoid allocating closure during signal
+	noteclear(&h.done)
+
+	defer func() { testSigtrap = nil }()
+	testSigtrap = h.inject
+	if err := tkill(tid); err != nil {
+		return nil, err
+	}
+	// Wait for completion.
+	notetsleepg(&h.done, -1)
+	if len(h.err) != 0 {
+		return nil, h.err
+	}
+	return h.panic, nil
+}
+
+type debugCallHandler struct {
+	gp      *g
+	fv      *funcval
+	argp    unsafe.Pointer
+	argSize uintptr
+	panic   interface{}
+
+	handleF func(info *siginfo, ctxt *sigctxt, gp2 *g) bool
+
+	err       plainError
+	done      note
+	savedRegs sigcontext
+	savedFP   fpstate1
+}
+
+func (h *debugCallHandler) inject(info *siginfo, ctxt *sigctxt, gp2 *g) bool {
+	switch h.gp.atomicstatus {
+	case _Grunning:
+		if getg().m != h.gp.m {
+			println("trap on wrong M", getg().m, h.gp.m)
+			return false
+		}
+		// Push current PC on the stack.
+		rsp := ctxt.rsp() - sys.PtrSize
+		*(*uint64)(unsafe.Pointer(uintptr(rsp))) = ctxt.rip()
+		ctxt.set_rsp(rsp)
+		// Write the argument frame size.
+		*(*uintptr)(unsafe.Pointer(uintptr(rsp - 16))) = h.argSize
+		// Save current registers.
+		h.savedRegs = *ctxt.regs()
+		h.savedFP = *h.savedRegs.fpstate
+		h.savedRegs.fpstate = nil
+		// Set PC to debugCallV1.
+		ctxt.set_rip(uint64(funcPC(debugCallV1)))
+	default:
+		h.err = plainError("goroutine in unexpected state at call inject")
+		return true
+	}
+	// Switch to the debugCall protocol and resume execution.
+	testSigtrap = h.handleF
+	return true
+}
+
+func (h *debugCallHandler) handle(info *siginfo, ctxt *sigctxt, gp2 *g) bool {
+	// Sanity check.
+	if getg().m != h.gp.m {
+		println("trap on wrong M", getg().m, h.gp.m)
+		return false
+	}
+	f := findfunc(uintptr(ctxt.rip()))
+	if !(hasprefix(funcname(f), "runtime.debugCall") || hasprefix(funcname(f), "debugCall")) {
+		println("trap in unknown function", funcname(f))
+		return false
+	}
+	if *(*byte)(unsafe.Pointer(uintptr(ctxt.rip() - 1))) != 0xcc {
+		println("trap at non-INT3 instruction pc =", hex(ctxt.rip()))
+		return false
+	}
+
+	switch status := ctxt.rax(); status {
+	case 0:
+		// Frame is ready. Copy the arguments to the frame.
+		sp := ctxt.rsp()
+		memmove(unsafe.Pointer(uintptr(sp)), h.argp, h.argSize)
+		// Push return PC.
+		sp -= sys.PtrSize
+		ctxt.set_rsp(sp)
+		*(*uint64)(unsafe.Pointer(uintptr(sp))) = ctxt.rip()
+		// Set PC to call and context register.
+		ctxt.set_rip(uint64(h.fv.fn))
+		ctxt.regs().rcx = uint64(uintptr(unsafe.Pointer(h.fv)))
+	case 1:
+		// Function returned. Copy frame back out.
+		sp := ctxt.rsp()
+		memmove(h.argp, unsafe.Pointer(uintptr(sp)), h.argSize)
+	case 2:
+		// Function panicked. Copy panic out.
+		sp := ctxt.rsp()
+		memmove(unsafe.Pointer(&h.panic), unsafe.Pointer(uintptr(sp)), 2*sys.PtrSize)
+	case 8:
+		// Call isn't safe. Get the reason.
+		sp := ctxt.rsp()
+		reason := *(*string)(unsafe.Pointer(uintptr(sp)))
+		h.err = plainError(reason)
+	case 16:
+		// Restore all registers except RIP and RSP.
+		rip, rsp := ctxt.rip(), ctxt.rsp()
+		fp := ctxt.regs().fpstate
+		*ctxt.regs() = h.savedRegs
+		ctxt.regs().fpstate = fp
+		*fp = h.savedFP
+		ctxt.set_rip(rip)
+		ctxt.set_rsp(rsp)
+		// Done
+		notewakeup(&h.done)
+	default:
+		h.err = plainError("unexpected debugCallV1 status")
+	}
+	// Resume execution.
+	return true
+}
diff --git a/libgo/go/runtime/export_linux_test.go b/libgo/go/runtime/export_linux_test.go
index 183a6ee..96ff1c7 100644
--- a/libgo/go/runtime/export_linux_test.go
+++ b/libgo/go/runtime/export_linux_test.go
@@ -6,5 +6,11 @@
 
 package runtime
 
-//var NewOSProc0 = newosproc0
-//var Mincore = mincore
+import "unsafe"
+
+// var NewOSProc0 = newosproc0
+// var Mincore = mincore
+
+func Epollctl(epfd, op, fd int32, ev unsafe.Pointer) int32 {
+	return epollctl(epfd, op, fd, (*epollevent)(ev))
+}
diff --git a/libgo/go/runtime/export_test.go b/libgo/go/runtime/export_test.go
index 5e798e3..7f4811c 100644
--- a/libgo/go/runtime/export_test.go
+++ b/libgo/go/runtime/export_test.go
@@ -21,7 +21,6 @@ import (
 //var Fcmp64 = fcmp64
 //var Fintto64 = fintto64
 //var F64toint = f64toint
-//var Sqrt = sqrt
 
 var Entersyscall = entersyscall
 var Exitsyscall = exitsyscall
@@ -372,6 +371,8 @@ func (rw *RWMutex) Unlock() {
 	rw.rw.unlock()
 }
 
+const RuntimeHmapSize = unsafe.Sizeof(hmap{})
+
 func MapBucketsCount(m map[int]int) int {
 	h := *(**hmap)(unsafe.Pointer(&m))
 	return 1 << h.B
@@ -395,3 +396,61 @@ func LockOSCounts() (external, internal uint32) {
 	}
 	return g.m.lockedExt, g.m.lockedInt
 }
+
+func KeepNArenaHints(n int) {
+	hint := mheap_.arenaHints
+	for i := 1; i < n; i++ {
+		hint = hint.next
+		if hint == nil {
+			return
+		}
+	}
+	hint.next = nil
+}
+
+// MapNextArenaHint reserves a page at the next arena growth hint,
+// preventing the arena from growing there, and returns the range of
+// addresses that are no longer viable.
+func MapNextArenaHint() (start, end uintptr) {
+	hint := mheap_.arenaHints
+	addr := hint.addr
+	if hint.down {
+		start, end = addr-heapArenaBytes, addr
+		addr -= physPageSize
+	} else {
+		start, end = addr, addr+heapArenaBytes
+	}
+	sysReserve(unsafe.Pointer(addr), physPageSize)
+	return
+}
+
+func GetNextArenaHint() uintptr {
+	return mheap_.arenaHints.addr
+}
+
+type G = g
+
+func Getg() *G {
+	return getg()
+}
+
+//go:noinline
+func PanicForTesting(b []byte, i int) byte {
+	return unexportedPanicForTesting(b, i)
+}
+
+//go:noinline
+func unexportedPanicForTesting(b []byte, i int) byte {
+	return b[i]
+}
+
+func G0StackOverflow() {
+	systemstack(func() {
+		stackOverflow(nil)
+	})
+}
+
+func stackOverflow(x *byte) {
+	var buf [256]byte
+	stackOverflow(&buf[0])
+}
diff --git a/libgo/go/runtime/extern.go b/libgo/go/runtime/extern.go
index b3afd10..c9d10f1 100644
--- a/libgo/go/runtime/extern.go
+++ b/libgo/go/runtime/extern.go
@@ -116,6 +116,12 @@ It is a comma-separated list of name=val pairs setting these named variables:
 	schedtrace: setting schedtrace=X causes the scheduler to emit a single line to standard
 	error every X milliseconds, summarizing the scheduler state.
 
+	tracebackancestors: setting tracebackancestors=N extends tracebacks with the stacks at
+	which goroutines were created, where N limits the number of ancestor goroutines to
+	report. This also extends the information returned by runtime.Stack. Ancestor's goroutine
+	IDs will refer to the ID of the goroutine at the time of creation; it's possible for this
+	ID to be reused for another goroutine. Setting N to 0 will report no ancestry information.
+
 The net and net/http packages also refer to debugging variables in GODEBUG.
 See the documentation for those packages for details.
 
diff --git a/libgo/go/runtime/gc_test.go b/libgo/go/runtime/gc_test.go
index a8c52d2..180919b 100644
--- a/libgo/go/runtime/gc_test.go
+++ b/libgo/go/runtime/gc_test.go
@@ -10,6 +10,7 @@ import (
 	"reflect"
 	"runtime"
 	"runtime/debug"
+	"sync"
 	"sync/atomic"
 	"testing"
 	"time"
@@ -44,7 +45,7 @@ func TestGcDeepNesting(t *testing.T) {
 	}
 }
 
-func TestGcHashmapIndirection(t *testing.T) {
+func TestGcMapIndirection(t *testing.T) {
 	defer debug.SetGCPercent(debug.SetGCPercent(1))
 	runtime.GC()
 	type T struct {
@@ -157,6 +158,10 @@ func TestHugeGCInfo(t *testing.T) {
 
 /*
 func TestPeriodicGC(t *testing.T) {
+	if runtime.GOARCH == "wasm" {
+		t.Skip("no sysmon on wasm yet")
+	}
+
 	// Make sure we're not in the middle of a GC.
 	runtime.GC()
 
@@ -642,3 +647,34 @@ func BenchmarkBulkWriteBarrier(b *testing.B) {
 
 	runtime.KeepAlive(ptrs)
 }
+
+func BenchmarkScanStackNoLocals(b *testing.B) {
+	var ready sync.WaitGroup
+	teardown := make(chan bool)
+	for j := 0; j < 10; j++ {
+		ready.Add(1)
+		go func() {
+			x := 100000
+			countpwg(&x, &ready, teardown)
+		}()
+	}
+	ready.Wait()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		b.StartTimer()
+		runtime.GC()
+		runtime.GC()
+		b.StopTimer()
+	}
+	close(teardown)
+}
+
+func countpwg(n *int, ready *sync.WaitGroup, teardown chan bool) {
+	if *n == 0 {
+		ready.Done()
+		<-teardown
+		return
+	}
+	*n--
+	countpwg(n, ready, teardown)
+}
diff --git a/libgo/go/runtime/gcinfo_test.go b/libgo/go/runtime/gcinfo_test.go
index 4ac67dc..ca012bb 100644
--- a/libgo/go/runtime/gcinfo_test.go
+++ b/libgo/go/runtime/gcinfo_test.go
@@ -133,7 +133,7 @@ func infoBigStruct() []byte {
 			typeScalar, typeScalar, typeScalar, typeScalar, // t int; y uint16; u uint64
 			typePointer, typeScalar, // i string
 		}
-	case "arm64", "amd64", "mips64", "mips64le", "ppc64", "ppc64le", "s390x":
+	case "arm64", "amd64", "mips64", "mips64le", "ppc64", "ppc64le", "s390x", "wasm":
 		return []byte{
 			typePointer,                        // q *int
 			typeScalar, typeScalar, typeScalar, // w byte; e [17]byte
@@ -186,6 +186,6 @@ var (
 
 	infoString = []byte{typePointer, typeScalar}
 	infoSlice  = []byte{typePointer, typeScalar, typeScalar}
-	infoEface  = []byte{typePointer, typePointer}
-	infoIface  = []byte{typePointer, typePointer}
+	infoEface  = []byte{typeScalar, typePointer}
+	infoIface  = []byte{typeScalar, typePointer}
 )
diff --git a/libgo/go/runtime/hash64.go b/libgo/go/runtime/hash64.go
index 74775a8..7c6513e 100644
--- a/libgo/go/runtime/hash64.go
+++ b/libgo/go/runtime/hash64.go
@@ -6,7 +6,7 @@
 //   xxhash: https://code.google.com/p/xxhash/
 // cityhash: https://code.google.com/p/cityhash/
 
-// +build amd64 amd64p32 arm64 mips64 mips64le ppc64 ppc64le s390x alpha arm64be ia64 mips64p32 mips64p32le sparc64 riscv64
+// +build amd64 amd64p32 arm64 mips64 mips64le ppc64 ppc64le s390x wasm alpha arm64be ia64 mips64p32 mips64p32le sparc64 riscv64
 
 package runtime
 
@@ -26,7 +26,8 @@ const (
 )
 
 func memhash(p unsafe.Pointer, seed, s uintptr) uintptr {
-	if GOARCH == "amd64" && GOOS != "nacl" && useAeshash {
+	if (GOARCH == "amd64" || GOARCH == "arm64") &&
+		GOOS != "nacl" && useAeshash {
 		return aeshash(p, seed, s)
 	}
 	h := uint64(seed + s*hashkey[0])
diff --git a/libgo/go/runtime/hash_test.go b/libgo/go/runtime/hash_test.go
index 54c9160..070edb6 100644
--- a/libgo/go/runtime/hash_test.go
+++ b/libgo/go/runtime/hash_test.go
@@ -161,6 +161,9 @@ func TestSmhasherZeros(t *testing.T) {
 
 // Strings with up to two nonzero bytes all have distinct hashes.
 func TestSmhasherTwoNonzero(t *testing.T) {
+	if GOARCH == "wasm" {
+		t.Skip("Too slow on wasm")
+	}
 	if testing.Short() {
 		t.Skip("Skipping in short mode")
 	}
@@ -229,6 +232,9 @@ func TestSmhasherCyclic(t *testing.T) {
 
 // Test strings with only a few bits set
 func TestSmhasherSparse(t *testing.T) {
+	if GOARCH == "wasm" {
+		t.Skip("Too slow on wasm")
+	}
 	if testing.Short() {
 		t.Skip("Skipping in short mode")
 	}
@@ -264,6 +270,9 @@ func setbits(h *HashSet, b []byte, i int, k int) {
 // Test all possible combinations of n blocks from the set s.
 // "permutation" is a bad name here, but it is what Smhasher uses.
 func TestSmhasherPermutation(t *testing.T) {
+	if GOARCH == "wasm" {
+		t.Skip("Too slow on wasm")
+	}
 	if testing.Short() {
 		t.Skip("Skipping in short mode")
 	}
@@ -433,6 +442,9 @@ func (k *IfaceKey) name() string {
 
 // Flipping a single bit of a key should flip each output bit with 50% probability.
 func TestSmhasherAvalanche(t *testing.T) {
+	if GOARCH == "wasm" {
+		t.Skip("Too slow on wasm")
+	}
 	if testing.Short() {
 		t.Skip("Skipping in short mode")
 	}
@@ -508,6 +520,9 @@ func TestSmhasherWindowed(t *testing.T) {
 	windowed(t, &BytesKey{make([]byte, 128)})
 }
 func windowed(t *testing.T, k Key) {
+	if GOARCH == "wasm" {
+		t.Skip("Too slow on wasm")
+	}
 	if testing.Short() {
 		t.Skip("Skipping in short mode")
 	}
diff --git a/libgo/go/runtime/hashmap_fast.go b/libgo/go/runtime/hashmap_fast.go
deleted file mode 100644
index e0fc981..0000000
--- a/libgo/go/runtime/hashmap_fast.go
+++ /dev/null
@@ -1,1237 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-import (
-	"runtime/internal/sys"
-	"unsafe"
-)
-
-func mapaccess1_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer {
-	if raceenabled && h != nil {
-		callerpc := getcallerpc()
-		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_fast32))
-	}
-	if h == nil || h.count == 0 {
-		return unsafe.Pointer(&zeroVal[0])
-	}
-	if h.flags&hashWriting != 0 {
-		throw("concurrent map read and map write")
-	}
-	var b *bmap
-	if h.B == 0 {
-		// One-bucket table. No need to hash.
-		b = (*bmap)(h.buckets)
-	} else {
-		hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
-		m := bucketMask(h.B)
-		b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
-		if c := h.oldbuckets; c != nil {
-			if !h.sameSizeGrow() {
-				// There used to be half as many buckets; mask down one more power of two.
-				m >>= 1
-			}
-			oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
-			if !evacuated(oldb) {
-				b = oldb
-			}
-		}
-	}
-	for ; b != nil; b = b.overflow(t) {
-		for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) {
-			if *(*uint32)(k) == key && b.tophash[i] != empty {
-				return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize))
-			}
-		}
-	}
-	return unsafe.Pointer(&zeroVal[0])
-}
-
-func mapaccess2_fast32(t *maptype, h *hmap, key uint32) (unsafe.Pointer, bool) {
-	if raceenabled && h != nil {
-		callerpc := getcallerpc()
-		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_fast32))
-	}
-	if h == nil || h.count == 0 {
-		return unsafe.Pointer(&zeroVal[0]), false
-	}
-	if h.flags&hashWriting != 0 {
-		throw("concurrent map read and map write")
-	}
-	var b *bmap
-	if h.B == 0 {
-		// One-bucket table. No need to hash.
-		b = (*bmap)(h.buckets)
-	} else {
-		hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
-		m := bucketMask(h.B)
-		b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
-		if c := h.oldbuckets; c != nil {
-			if !h.sameSizeGrow() {
-				// There used to be half as many buckets; mask down one more power of two.
-				m >>= 1
-			}
-			oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
-			if !evacuated(oldb) {
-				b = oldb
-			}
-		}
-	}
-	for ; b != nil; b = b.overflow(t) {
-		for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) {
-			if *(*uint32)(k) == key && b.tophash[i] != empty {
-				return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize)), true
-			}
-		}
-	}
-	return unsafe.Pointer(&zeroVal[0]), false
-}
-
-func mapaccess1_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer {
-	if raceenabled && h != nil {
-		callerpc := getcallerpc()
-		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_fast64))
-	}
-	if h == nil || h.count == 0 {
-		return unsafe.Pointer(&zeroVal[0])
-	}
-	if h.flags&hashWriting != 0 {
-		throw("concurrent map read and map write")
-	}
-	var b *bmap
-	if h.B == 0 {
-		// One-bucket table. No need to hash.
-		b = (*bmap)(h.buckets)
-	} else {
-		hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
-		m := bucketMask(h.B)
-		b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
-		if c := h.oldbuckets; c != nil {
-			if !h.sameSizeGrow() {
-				// There used to be half as many buckets; mask down one more power of two.
-				m >>= 1
-			}
-			oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
-			if !evacuated(oldb) {
-				b = oldb
-			}
-		}
-	}
-	for ; b != nil; b = b.overflow(t) {
-		for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) {
-			if *(*uint64)(k) == key && b.tophash[i] != empty {
-				return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize))
-			}
-		}
-	}
-	return unsafe.Pointer(&zeroVal[0])
-}
-
-func mapaccess2_fast64(t *maptype, h *hmap, key uint64) (unsafe.Pointer, bool) {
-	if raceenabled && h != nil {
-		callerpc := getcallerpc()
-		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_fast64))
-	}
-	if h == nil || h.count == 0 {
-		return unsafe.Pointer(&zeroVal[0]), false
-	}
-	if h.flags&hashWriting != 0 {
-		throw("concurrent map read and map write")
-	}
-	var b *bmap
-	if h.B == 0 {
-		// One-bucket table. No need to hash.
-		b = (*bmap)(h.buckets)
-	} else {
-		hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
-		m := bucketMask(h.B)
-		b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
-		if c := h.oldbuckets; c != nil {
-			if !h.sameSizeGrow() {
-				// There used to be half as many buckets; mask down one more power of two.
-				m >>= 1
-			}
-			oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
-			if !evacuated(oldb) {
-				b = oldb
-			}
-		}
-	}
-	for ; b != nil; b = b.overflow(t) {
-		for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) {
-			if *(*uint64)(k) == key && b.tophash[i] != empty {
-				return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize)), true
-			}
-		}
-	}
-	return unsafe.Pointer(&zeroVal[0]), false
-}
-
-func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer {
-	if raceenabled && h != nil {
-		callerpc := getcallerpc()
-		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_faststr))
-	}
-	if h == nil || h.count == 0 {
-		return unsafe.Pointer(&zeroVal[0])
-	}
-	if h.flags&hashWriting != 0 {
-		throw("concurrent map read and map write")
-	}
-	key := stringStructOf(&ky)
-	if h.B == 0 {
-		// One-bucket table.
-		b := (*bmap)(h.buckets)
-		if key.len < 32 {
-			// short key, doing lots of comparisons is ok
-			for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
-				k := (*stringStruct)(kptr)
-				if k.len != key.len || b.tophash[i] == empty {
-					continue
-				}
-				if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
-					return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize))
-				}
-			}
-			return unsafe.Pointer(&zeroVal[0])
-		}
-		// long key, try not to do more comparisons than necessary
-		keymaybe := uintptr(bucketCnt)
-		for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
-			k := (*stringStruct)(kptr)
-			if k.len != key.len || b.tophash[i] == empty {
-				continue
-			}
-			if k.str == key.str {
-				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize))
-			}
-			// check first 4 bytes
-			if *((*[4]byte)(key.str)) != *((*[4]byte)(k.str)) {
-				continue
-			}
-			// check last 4 bytes
-			if *((*[4]byte)(add(key.str, uintptr(key.len)-4))) != *((*[4]byte)(add(k.str, uintptr(key.len)-4))) {
-				continue
-			}
-			if keymaybe != bucketCnt {
-				// Two keys are potential matches. Use hash to distinguish them.
-				goto dohash
-			}
-			keymaybe = i
-		}
-		if keymaybe != bucketCnt {
-			k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+keymaybe*2*sys.PtrSize))
-			if memequal(k.str, key.str, uintptr(key.len)) {
-				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+keymaybe*uintptr(t.valuesize))
-			}
-		}
-		return unsafe.Pointer(&zeroVal[0])
-	}
-dohash:
-	hash := t.key.hashfn(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0))
-	m := bucketMask(h.B)
-	b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
-	if c := h.oldbuckets; c != nil {
-		if !h.sameSizeGrow() {
-			// There used to be half as many buckets; mask down one more power of two.
-			m >>= 1
-		}
-		oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
-		if !evacuated(oldb) {
-			b = oldb
-		}
-	}
-	top := tophash(hash)
-	for ; b != nil; b = b.overflow(t) {
-		for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
-			k := (*stringStruct)(kptr)
-			if k.len != key.len || b.tophash[i] != top {
-				continue
-			}
-			if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
-				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize))
-			}
-		}
-	}
-	return unsafe.Pointer(&zeroVal[0])
-}
-
-func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) {
-	if raceenabled && h != nil {
-		callerpc := getcallerpc()
-		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_faststr))
-	}
-	if h == nil || h.count == 0 {
-		return unsafe.Pointer(&zeroVal[0]), false
-	}
-	if h.flags&hashWriting != 0 {
-		throw("concurrent map read and map write")
-	}
-	key := stringStructOf(&ky)
-	if h.B == 0 {
-		// One-bucket table.
-		b := (*bmap)(h.buckets)
-		if key.len < 32 {
-			// short key, doing lots of comparisons is ok
-			for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
-				k := (*stringStruct)(kptr)
-				if k.len != key.len || b.tophash[i] == empty {
-					continue
-				}
-				if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
-					return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)), true
-				}
-			}
-			return unsafe.Pointer(&zeroVal[0]), false
-		}
-		// long key, try not to do more comparisons than necessary
-		keymaybe := uintptr(bucketCnt)
-		for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
-			k := (*stringStruct)(kptr)
-			if k.len != key.len || b.tophash[i] == empty {
-				continue
-			}
-			if k.str == key.str {
-				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)), true
-			}
-			// check first 4 bytes
-			if *((*[4]byte)(key.str)) != *((*[4]byte)(k.str)) {
-				continue
-			}
-			// check last 4 bytes
-			if *((*[4]byte)(add(key.str, uintptr(key.len)-4))) != *((*[4]byte)(add(k.str, uintptr(key.len)-4))) {
-				continue
-			}
-			if keymaybe != bucketCnt {
-				// Two keys are potential matches. Use hash to distinguish them.
-				goto dohash
-			}
-			keymaybe = i
-		}
-		if keymaybe != bucketCnt {
-			k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+keymaybe*2*sys.PtrSize))
-			if memequal(k.str, key.str, uintptr(key.len)) {
-				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+keymaybe*uintptr(t.valuesize)), true
-			}
-		}
-		return unsafe.Pointer(&zeroVal[0]), false
-	}
-dohash:
-	hash := t.key.hashfn(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0))
-	m := bucketMask(h.B)
-	b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
-	if c := h.oldbuckets; c != nil {
-		if !h.sameSizeGrow() {
-			// There used to be half as many buckets; mask down one more power of two.
-			m >>= 1
-		}
-		oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
-		if !evacuated(oldb) {
-			b = oldb
-		}
-	}
-	top := tophash(hash)
-	for ; b != nil; b = b.overflow(t) {
-		for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
-			k := (*stringStruct)(kptr)
-			if k.len != key.len || b.tophash[i] != top {
-				continue
-			}
-			if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
-				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)), true
-			}
-		}
-	}
-	return unsafe.Pointer(&zeroVal[0]), false
-}
-
-func mapassign_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer {
-	if h == nil {
-		panic(plainError("assignment to entry in nil map"))
-	}
-	if raceenabled {
-		callerpc := getcallerpc()
-		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast32))
-	}
-	if h.flags&hashWriting != 0 {
-		throw("concurrent map writes")
-	}
-	hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
-
-	// Set hashWriting after calling alg.hash for consistency with mapassign.
-	h.flags |= hashWriting
-
-	if h.buckets == nil {
-		h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
-	}
-
-again:
-	bucket := hash & bucketMask(h.B)
-	if h.growing() {
-		growWork_fast32(t, h, bucket)
-	}
-	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
-
-	var insertb *bmap
-	var inserti uintptr
-	var insertk unsafe.Pointer
-
-	for {
-		for i := uintptr(0); i < bucketCnt; i++ {
-			if b.tophash[i] == empty {
-				if insertb == nil {
-					inserti = i
-					insertb = b
-				}
-				continue
-			}
-			k := *((*uint32)(add(unsafe.Pointer(b), dataOffset+i*4)))
-			if k != key {
-				continue
-			}
-			inserti = i
-			insertb = b
-			goto done
-		}
-		ovf := b.overflow(t)
-		if ovf == nil {
-			break
-		}
-		b = ovf
-	}
-
-	// Did not find mapping for key. Allocate new cell & add entry.
-
-	// If we hit the max load factor or we have too many overflow buckets,
-	// and we're not already in the middle of growing, start growing.
-	if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
-		hashGrow(t, h)
-		goto again // Growing the table invalidates everything, so try again
-	}
-
-	if insertb == nil {
-		// all current buckets are full, allocate a new one.
-		insertb = h.newoverflow(t, b)
-		inserti = 0 // not necessary, but avoids needlessly spilling inserti
-	}
-	insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks
-
-	insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*4)
-	// store new key at insert position
-	*(*uint32)(insertk) = key
-
-	h.count++
-
-done:
-	val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*4+inserti*uintptr(t.valuesize))
-	if h.flags&hashWriting == 0 {
-		throw("concurrent map writes")
-	}
-	h.flags &^= hashWriting
-	return val
-}
-
-func mapassign_fast32ptr(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
-	if h == nil {
-		panic(plainError("assignment to entry in nil map"))
-	}
-	if raceenabled {
-		callerpc := getcallerpc()
-		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast32))
-	}
-	if h.flags&hashWriting != 0 {
-		throw("concurrent map writes")
-	}
-	hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
-
-	// Set hashWriting after calling alg.hash for consistency with mapassign.
-	h.flags |= hashWriting
-
-	if h.buckets == nil {
-		h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
-	}
-
-again:
-	bucket := hash & bucketMask(h.B)
-	if h.growing() {
-		growWork_fast32(t, h, bucket)
-	}
-	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
-
-	var insertb *bmap
-	var inserti uintptr
-	var insertk unsafe.Pointer
-
-	for {
-		for i := uintptr(0); i < bucketCnt; i++ {
-			if b.tophash[i] == empty {
-				if insertb == nil {
-					inserti = i
-					insertb = b
-				}
-				continue
-			}
-			k := *((*unsafe.Pointer)(add(unsafe.Pointer(b), dataOffset+i*4)))
-			if k != key {
-				continue
-			}
-			inserti = i
-			insertb = b
-			goto done
-		}
-		ovf := b.overflow(t)
-		if ovf == nil {
-			break
-		}
-		b = ovf
-	}
-
-	// Did not find mapping for key. Allocate new cell & add entry.
-
-	// If we hit the max load factor or we have too many overflow buckets,
-	// and we're not already in the middle of growing, start growing.
-	if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
-		hashGrow(t, h)
-		goto again // Growing the table invalidates everything, so try again
-	}
-
-	if insertb == nil {
-		// all current buckets are full, allocate a new one.
-		insertb = h.newoverflow(t, b)
-		inserti = 0 // not necessary, but avoids needlessly spilling inserti
-	}
-	insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks
-
-	insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*4)
-	// store new key at insert position
-	*(*unsafe.Pointer)(insertk) = key
-
-	h.count++
-
-done:
-	val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*4+inserti*uintptr(t.valuesize))
-	if h.flags&hashWriting == 0 {
-		throw("concurrent map writes")
-	}
-	h.flags &^= hashWriting
-	return val
-}
-
-func mapassign_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer {
-	if h == nil {
-		panic(plainError("assignment to entry in nil map"))
-	}
-	if raceenabled {
-		callerpc := getcallerpc()
-		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast64))
-	}
-	if h.flags&hashWriting != 0 {
-		throw("concurrent map writes")
-	}
-	hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
-
-	// Set hashWriting after calling alg.hash for consistency with mapassign.
-	h.flags |= hashWriting
-
-	if h.buckets == nil {
-		h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
-	}
-
-again:
-	bucket := hash & bucketMask(h.B)
-	if h.growing() {
-		growWork_fast64(t, h, bucket)
-	}
-	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
-
-	var insertb *bmap
-	var inserti uintptr
-	var insertk unsafe.Pointer
-
-	for {
-		for i := uintptr(0); i < bucketCnt; i++ {
-			if b.tophash[i] == empty {
-				if insertb == nil {
-					insertb = b
-					inserti = i
-				}
-				continue
-			}
-			k := *((*uint64)(add(unsafe.Pointer(b), dataOffset+i*8)))
-			if k != key {
-				continue
-			}
-			insertb = b
-			inserti = i
-			goto done
-		}
-		ovf := b.overflow(t)
-		if ovf == nil {
-			break
-		}
-		b = ovf
-	}
-
-	// Did not find mapping for key. Allocate new cell & add entry.
-
-	// If we hit the max load factor or we have too many overflow buckets,
-	// and we're not already in the middle of growing, start growing.
-	if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
-		hashGrow(t, h)
-		goto again // Growing the table invalidates everything, so try again
-	}
-
-	if insertb == nil {
-		// all current buckets are full, allocate a new one.
-		insertb = h.newoverflow(t, b)
-		inserti = 0 // not necessary, but avoids needlessly spilling inserti
-	}
-	insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks
-
-	insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*8)
-	// store new key at insert position
-	*(*uint64)(insertk) = key
-
-	h.count++
-
-done:
-	val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*8+inserti*uintptr(t.valuesize))
-	if h.flags&hashWriting == 0 {
-		throw("concurrent map writes")
-	}
-	h.flags &^= hashWriting
-	return val
-}
-
-func mapassign_fast64ptr(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
-	if h == nil {
-		panic(plainError("assignment to entry in nil map"))
-	}
-	if raceenabled {
-		callerpc := getcallerpc()
-		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast64))
-	}
-	if h.flags&hashWriting != 0 {
-		throw("concurrent map writes")
-	}
-	hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
-
-	// Set hashWriting after calling alg.hash for consistency with mapassign.
-	h.flags |= hashWriting
-
-	if h.buckets == nil {
-		h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
-	}
-
-again:
-	bucket := hash & bucketMask(h.B)
-	if h.growing() {
-		growWork_fast64(t, h, bucket)
-	}
-	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
-
-	var insertb *bmap
-	var inserti uintptr
-	var insertk unsafe.Pointer
-
-	for {
-		for i := uintptr(0); i < bucketCnt; i++ {
-			if b.tophash[i] == empty {
-				if insertb == nil {
-					insertb = b
-					inserti = i
-				}
-				continue
-			}
-			k := *((*unsafe.Pointer)(add(unsafe.Pointer(b), dataOffset+i*8)))
-			if k != key {
-				continue
-			}
-			insertb = b
-			inserti = i
-			goto done
-		}
-		ovf := b.overflow(t)
-		if ovf == nil {
-			break
-		}
-		b = ovf
-	}
-
-	// Did not find mapping for key. Allocate new cell & add entry.
-
-	// If we hit the max load factor or we have too many overflow buckets,
-	// and we're not already in the middle of growing, start growing.
-	if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
-		hashGrow(t, h)
-		goto again // Growing the table invalidates everything, so try again
-	}
-
-	if insertb == nil {
-		// all current buckets are full, allocate a new one.
-		insertb = h.newoverflow(t, b)
-		inserti = 0 // not necessary, but avoids needlessly spilling inserti
-	}
-	insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks
-
-	insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*8)
-	// store new key at insert position
-	*(*unsafe.Pointer)(insertk) = key
-
-	h.count++
-
-done:
-	val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*8+inserti*uintptr(t.valuesize))
-	if h.flags&hashWriting == 0 {
-		throw("concurrent map writes")
-	}
-	h.flags &^= hashWriting
-	return val
-}
-
-func mapassign_faststr(t *maptype, h *hmap, s string) unsafe.Pointer {
-	if h == nil {
-		panic(plainError("assignment to entry in nil map"))
-	}
-	if raceenabled {
-		callerpc := getcallerpc()
-		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_faststr))
-	}
-	if h.flags&hashWriting != 0 {
-		throw("concurrent map writes")
-	}
-	key := stringStructOf(&s)
-	hash := t.key.hashfn(noescape(unsafe.Pointer(&s)), uintptr(h.hash0))
-
-	// Set hashWriting after calling alg.hash for consistency with mapassign.
-	h.flags |= hashWriting
-
-	if h.buckets == nil {
-		h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
-	}
-
-again:
-	bucket := hash & bucketMask(h.B)
-	if h.growing() {
-		growWork_faststr(t, h, bucket)
-	}
-	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
-	top := tophash(hash)
-
-	var insertb *bmap
-	var inserti uintptr
-	var insertk unsafe.Pointer
-
-	for {
-		for i := uintptr(0); i < bucketCnt; i++ {
-			if b.tophash[i] != top {
-				if b.tophash[i] == empty && insertb == nil {
-					insertb = b
-					inserti = i
-				}
-				continue
-			}
-			k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize))
-			if k.len != key.len {
-				continue
-			}
-			if k.str != key.str && !memequal(k.str, key.str, uintptr(key.len)) {
-				continue
-			}
-			// already have a mapping for key. Update it.
-			inserti = i
-			insertb = b
-			goto done
-		}
-		ovf := b.overflow(t)
-		if ovf == nil {
-			break
-		}
-		b = ovf
-	}
-
-	// Did not find mapping for key. Allocate new cell & add entry.
-
-	// If we hit the max load factor or we have too many overflow buckets,
-	// and we're not already in the middle of growing, start growing.
-	if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
-		hashGrow(t, h)
-		goto again // Growing the table invalidates everything, so try again
-	}
-
-	if insertb == nil {
-		// all current buckets are full, allocate a new one.
-		insertb = h.newoverflow(t, b)
-		inserti = 0 // not necessary, but avoids needlessly spilling inserti
-	}
-	insertb.tophash[inserti&(bucketCnt-1)] = top // mask inserti to avoid bounds checks
-
-	insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*2*sys.PtrSize)
-	// store new key at insert position
-	*((*stringStruct)(insertk)) = *key
-	h.count++
-
-done:
-	val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*2*sys.PtrSize+inserti*uintptr(t.valuesize))
-	if h.flags&hashWriting == 0 {
-		throw("concurrent map writes")
-	}
-	h.flags &^= hashWriting
-	return val
-}
-
-func mapdelete_fast32(t *maptype, h *hmap, key uint32) {
-	if raceenabled && h != nil {
-		callerpc := getcallerpc()
-		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_fast32))
-	}
-	if h == nil || h.count == 0 {
-		return
-	}
-	if h.flags&hashWriting != 0 {
-		throw("concurrent map writes")
-	}
-
-	hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
-
-	// Set hashWriting after calling alg.hash for consistency with mapdelete
-	h.flags |= hashWriting
-
-	bucket := hash & bucketMask(h.B)
-	if h.growing() {
-		growWork_fast32(t, h, bucket)
-	}
-	b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
-search:
-	for ; b != nil; b = b.overflow(t) {
-		for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) {
-			if key != *(*uint32)(k) || b.tophash[i] == empty {
-				continue
-			}
-			// Only clear key if there are pointers in it.
-			if t.key.kind&kindNoPointers == 0 {
-				memclrHasPointers(k, t.key.size)
-			}
-			// Only clear value if there are pointers in it.
-			if t.elem.kind&kindNoPointers == 0 {
-				v := add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize))
-				memclrHasPointers(v, t.elem.size)
-			}
-			b.tophash[i] = empty
-			h.count--
-			break search
-		}
-	}
-
-	if h.flags&hashWriting == 0 {
-		throw("concurrent map writes")
-	}
-	h.flags &^= hashWriting
-}
-
-func mapdelete_fast64(t *maptype, h *hmap, key uint64) {
-	if raceenabled && h != nil {
-		callerpc := getcallerpc()
-		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_fast64))
-	}
-	if h == nil || h.count == 0 {
-		return
-	}
-	if h.flags&hashWriting != 0 {
-		throw("concurrent map writes")
-	}
-
-	hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
-
-	// Set hashWriting after calling alg.hash for consistency with mapdelete
-	h.flags |= hashWriting
-
-	bucket := hash & bucketMask(h.B)
-	if h.growing() {
-		growWork_fast64(t, h, bucket)
-	}
-	b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
-search:
-	for ; b != nil; b = b.overflow(t) {
-		for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) {
-			if key != *(*uint64)(k) || b.tophash[i] == empty {
-				continue
-			}
-			// Only clear key if there are pointers in it.
-			if t.key.kind&kindNoPointers == 0 {
-				memclrHasPointers(k, t.key.size)
-			}
-			// Only clear value if there are pointers in it.
-			if t.elem.kind&kindNoPointers == 0 {
-				v := add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize))
-				memclrHasPointers(v, t.elem.size)
-			}
-			b.tophash[i] = empty
-			h.count--
-			break search
-		}
-	}
-
-	if h.flags&hashWriting == 0 {
-		throw("concurrent map writes")
-	}
-	h.flags &^= hashWriting
-}
-
-func mapdelete_faststr(t *maptype, h *hmap, ky string) {
-	if raceenabled && h != nil {
-		callerpc := getcallerpc()
-		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_faststr))
-	}
-	if h == nil || h.count == 0 {
-		return
-	}
-	if h.flags&hashWriting != 0 {
-		throw("concurrent map writes")
-	}
-
-	key := stringStructOf(&ky)
-	hash := t.key.hashfn(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0))
-
-	// Set hashWriting after calling alg.hash for consistency with mapdelete
-	h.flags |= hashWriting
-
-	bucket := hash & bucketMask(h.B)
-	if h.growing() {
-		growWork_faststr(t, h, bucket)
-	}
-	b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
-	top := tophash(hash)
-search:
-	for ; b != nil; b = b.overflow(t) {
-		for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
-			k := (*stringStruct)(kptr)
-			if k.len != key.len || b.tophash[i] != top {
-				continue
-			}
-			if k.str != key.str && !memequal(k.str, key.str, uintptr(key.len)) {
-				continue
-			}
-			// Clear key's pointer.
-			k.str = nil
-			// Only clear value if there are pointers in it.
-			if t.elem.kind&kindNoPointers == 0 {
-				v := add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize))
-				memclrHasPointers(v, t.elem.size)
-			}
-			b.tophash[i] = empty
-			h.count--
-			break search
-		}
-	}
-
-	if h.flags&hashWriting == 0 {
-		throw("concurrent map writes")
-	}
-	h.flags &^= hashWriting
-}
-
-func growWork_fast32(t *maptype, h *hmap, bucket uintptr) {
-	// make sure we evacuate the oldbucket corresponding
-	// to the bucket we're about to use
-	evacuate_fast32(t, h, bucket&h.oldbucketmask())
-
-	// evacuate one more oldbucket to make progress on growing
-	if h.growing() {
-		evacuate_fast32(t, h, h.nevacuate)
-	}
-}
-
-func evacuate_fast32(t *maptype, h *hmap, oldbucket uintptr) {
-	b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
-	newbit := h.noldbuckets()
-	if !evacuated(b) {
-		// TODO: reuse overflow buckets instead of using new ones, if there
-		// is no iterator using the old buckets.  (If !oldIterator.)
-
-		// xy contains the x and y (low and high) evacuation destinations.
-		var xy [2]evacDst
-		x := &xy[0]
-		x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize)))
-		x.k = add(unsafe.Pointer(x.b), dataOffset)
-		x.v = add(x.k, bucketCnt*4)
-
-		if !h.sameSizeGrow() {
-			// Only calculate y pointers if we're growing bigger.
-			// Otherwise GC can see bad pointers.
-			y := &xy[1]
-			y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize)))
-			y.k = add(unsafe.Pointer(y.b), dataOffset)
-			y.v = add(y.k, bucketCnt*4)
-		}
-
-		for ; b != nil; b = b.overflow(t) {
-			k := add(unsafe.Pointer(b), dataOffset)
-			v := add(k, bucketCnt*4)
-			for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 4), add(v, uintptr(t.valuesize)) {
-				top := b.tophash[i]
-				if top == empty {
-					b.tophash[i] = evacuatedEmpty
-					continue
-				}
-				if top < minTopHash {
-					throw("bad map state")
-				}
-				var useY uint8
-				if !h.sameSizeGrow() {
-					// Compute hash to make our evacuation decision (whether we need
-					// to send this key/value to bucket x or bucket y).
-					hash := t.key.hashfn(k, uintptr(h.hash0))
-					if hash&newbit != 0 {
-						useY = 1
-					}
-				}
-
-				b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap
-				dst := &xy[useY]                 // evacuation destination
-
-				if dst.i == bucketCnt {
-					dst.b = h.newoverflow(t, dst.b)
-					dst.i = 0
-					dst.k = add(unsafe.Pointer(dst.b), dataOffset)
-					dst.v = add(dst.k, bucketCnt*4)
-				}
-				dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check
-
-				// Copy key.
-				if sys.PtrSize == 4 && t.key.kind&kindNoPointers == 0 && writeBarrier.enabled {
-					writebarrierptr((*uintptr)(dst.k), *(*uintptr)(k))
-				} else {
-					*(*uint32)(dst.k) = *(*uint32)(k)
-				}
-
-				typedmemmove(t.elem, dst.v, v)
-				dst.i++
-				// These updates might push these pointers past the end of the
-				// key or value arrays.  That's ok, as we have the overflow pointer
-				// at the end of the bucket to protect against pointing past the
-				// end of the bucket.
-				dst.k = add(dst.k, 4)
-				dst.v = add(dst.v, uintptr(t.valuesize))
-			}
-		}
-		// Unlink the overflow buckets & clear key/value to help GC.
-		if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 {
-			b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))
-			// Preserve b.tophash because the evacuation
-			// state is maintained there.
-			ptr := add(b, dataOffset)
-			n := uintptr(t.bucketsize) - dataOffset
-			memclrHasPointers(ptr, n)
-		}
-	}
-
-	if oldbucket == h.nevacuate {
-		advanceEvacuationMark(h, t, newbit)
-	}
-}
-
-func growWork_fast64(t *maptype, h *hmap, bucket uintptr) {
-	// make sure we evacuate the oldbucket corresponding
-	// to the bucket we're about to use
-	evacuate_fast64(t, h, bucket&h.oldbucketmask())
-
-	// evacuate one more oldbucket to make progress on growing
-	if h.growing() {
-		evacuate_fast64(t, h, h.nevacuate)
-	}
-}
-
-func evacuate_fast64(t *maptype, h *hmap, oldbucket uintptr) {
-	b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
-	newbit := h.noldbuckets()
-	if !evacuated(b) {
-		// TODO: reuse overflow buckets instead of using new ones, if there
-		// is no iterator using the old buckets.  (If !oldIterator.)
-
-		// xy contains the x and y (low and high) evacuation destinations.
-		var xy [2]evacDst
-		x := &xy[0]
-		x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize)))
-		x.k = add(unsafe.Pointer(x.b), dataOffset)
-		x.v = add(x.k, bucketCnt*8)
-
-		if !h.sameSizeGrow() {
-			// Only calculate y pointers if we're growing bigger.
-			// Otherwise GC can see bad pointers.
-			y := &xy[1]
-			y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize)))
-			y.k = add(unsafe.Pointer(y.b), dataOffset)
-			y.v = add(y.k, bucketCnt*8)
-		}
-
-		for ; b != nil; b = b.overflow(t) {
-			k := add(unsafe.Pointer(b), dataOffset)
-			v := add(k, bucketCnt*8)
-			for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 8), add(v, uintptr(t.valuesize)) {
-				top := b.tophash[i]
-				if top == empty {
-					b.tophash[i] = evacuatedEmpty
-					continue
-				}
-				if top < minTopHash {
-					throw("bad map state")
-				}
-				var useY uint8
-				if !h.sameSizeGrow() {
-					// Compute hash to make our evacuation decision (whether we need
-					// to send this key/value to bucket x or bucket y).
-					hash := t.key.hashfn(k, uintptr(h.hash0))
-					if hash&newbit != 0 {
-						useY = 1
-					}
-				}
-
-				b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap
-				dst := &xy[useY]                 // evacuation destination
-
-				if dst.i == bucketCnt {
-					dst.b = h.newoverflow(t, dst.b)
-					dst.i = 0
-					dst.k = add(unsafe.Pointer(dst.b), dataOffset)
-					dst.v = add(dst.k, bucketCnt*8)
-				}
-				dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check
-
-				// Copy key.
-				if t.key.kind&kindNoPointers == 0 && writeBarrier.enabled {
-					if sys.PtrSize == 8 {
-						writebarrierptr((*uintptr)(dst.k), *(*uintptr)(k))
-					} else {
-						// There are three ways to squeeze at least one 32 bit pointer into 64 bits.
-						// Give up and call typedmemmove.
-						typedmemmove(t.key, dst.k, k)
-					}
-				} else {
-					*(*uint64)(dst.k) = *(*uint64)(k)
-				}
-
-				typedmemmove(t.elem, dst.v, v)
-				dst.i++
-				// These updates might push these pointers past the end of the
-				// key or value arrays.  That's ok, as we have the overflow pointer
-				// at the end of the bucket to protect against pointing past the
-				// end of the bucket.
-				dst.k = add(dst.k, 8)
-				dst.v = add(dst.v, uintptr(t.valuesize))
-			}
-		}
-		// Unlink the overflow buckets & clear key/value to help GC.
-		if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 {
-			b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))
-			// Preserve b.tophash because the evacuation
-			// state is maintained there.
-			ptr := add(b, dataOffset)
-			n := uintptr(t.bucketsize) - dataOffset
-			memclrHasPointers(ptr, n)
-		}
-	}
-
-	if oldbucket == h.nevacuate {
-		advanceEvacuationMark(h, t, newbit)
-	}
-}
-
-func growWork_faststr(t *maptype, h *hmap, bucket uintptr) {
-	// make sure we evacuate the oldbucket corresponding
-	// to the bucket we're about to use
-	evacuate_faststr(t, h, bucket&h.oldbucketmask())
-
-	// evacuate one more oldbucket to make progress on growing
-	if h.growing() {
-		evacuate_faststr(t, h, h.nevacuate)
-	}
-}
-
-func evacuate_faststr(t *maptype, h *hmap, oldbucket uintptr) {
-	b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
-	newbit := h.noldbuckets()
-	if !evacuated(b) {
-		// TODO: reuse overflow buckets instead of using new ones, if there
-		// is no iterator using the old buckets.  (If !oldIterator.)
-
-		// xy contains the x and y (low and high) evacuation destinations.
-		var xy [2]evacDst
-		x := &xy[0]
-		x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize)))
-		x.k = add(unsafe.Pointer(x.b), dataOffset)
-		x.v = add(x.k, bucketCnt*2*sys.PtrSize)
-
-		if !h.sameSizeGrow() {
-			// Only calculate y pointers if we're growing bigger.
-			// Otherwise GC can see bad pointers.
-			y := &xy[1]
-			y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize)))
-			y.k = add(unsafe.Pointer(y.b), dataOffset)
-			y.v = add(y.k, bucketCnt*2*sys.PtrSize)
-		}
-
-		for ; b != nil; b = b.overflow(t) {
-			k := add(unsafe.Pointer(b), dataOffset)
-			v := add(k, bucketCnt*2*sys.PtrSize)
-			for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 2*sys.PtrSize), add(v, uintptr(t.valuesize)) {
-				top := b.tophash[i]
-				if top == empty {
-					b.tophash[i] = evacuatedEmpty
-					continue
-				}
-				if top < minTopHash {
-					throw("bad map state")
-				}
-				var useY uint8
-				if !h.sameSizeGrow() {
-					// Compute hash to make our evacuation decision (whether we need
-					// to send this key/value to bucket x or bucket y).
-					hash := t.key.hashfn(k, uintptr(h.hash0))
-					if hash&newbit != 0 {
-						useY = 1
-					}
-				}
-
-				b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap
-				dst := &xy[useY]                 // evacuation destination
-
-				if dst.i == bucketCnt {
-					dst.b = h.newoverflow(t, dst.b)
-					dst.i = 0
-					dst.k = add(unsafe.Pointer(dst.b), dataOffset)
-					dst.v = add(dst.k, bucketCnt*2*sys.PtrSize)
-				}
-				dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check
-
-				// Copy key.
-				*(*string)(dst.k) = *(*string)(k)
-
-				typedmemmove(t.elem, dst.v, v)
-				dst.i++
-				// These updates might push these pointers past the end of the
-				// key or value arrays.  That's ok, as we have the overflow pointer
-				// at the end of the bucket to protect against pointing past the
-				// end of the bucket.
-				dst.k = add(dst.k, 2*sys.PtrSize)
-				dst.v = add(dst.v, uintptr(t.valuesize))
-			}
-		}
-		// Unlink the overflow buckets & clear key/value to help GC.
-		// Unlink the overflow buckets & clear key/value to help GC.
-		if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 {
-			b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))
-			// Preserve b.tophash because the evacuation
-			// state is maintained there.
-			ptr := add(b, dataOffset)
-			n := uintptr(t.bucketsize) - dataOffset
-			memclrHasPointers(ptr, n)
-		}
-	}
-
-	if oldbucket == h.nevacuate {
-		advanceEvacuationMark(h, t, newbit)
-	}
-}
diff --git a/libgo/go/runtime/heapdump.go b/libgo/go/runtime/heapdump.go
index a4b168d..e92ea39 100644
--- a/libgo/go/runtime/heapdump.go
+++ b/libgo/go/runtime/heapdump.go
@@ -184,7 +184,7 @@ func dumptype(t *_type) {
 	dumpint(uint64(uintptr(unsafe.Pointer(t))))
 	dumpint(uint64(t.size))
 	if x := t.uncommontype; x == nil || t.pkgPath == nil || *t.pkgPath == "" {
-		dumpstr(*t.string)
+		dumpstr(t.string())
 	} else {
 		pkgpathstr := *t.pkgPath
 		pkgpath := stringStructOf(&pkgpathstr)
@@ -233,9 +233,8 @@ type childInfo struct {
 
 // dump kinds & offsets of interesting fields in bv
 func dumpbv(cbv *bitvector, offset uintptr) {
-	bv := gobv(*cbv)
-	for i := uintptr(0); i < bv.n; i++ {
-		if bv.bytedata[i/8]>>(i%8)&1 == 1 {
+	for i := uintptr(0); i < uintptr(cbv.n); i++ {
+		if cbv.ptrbit(i) == 1 {
 			dumpint(fieldKindPtr)
 			dumpint(uint64(offset + i*sys.PtrSize))
 		}
@@ -254,7 +253,7 @@ func dumpgoroutine(gp *g) {
 	dumpbool(isSystemGoroutine(gp))
 	dumpbool(false) // isbackground
 	dumpint(uint64(gp.waitsince))
-	dumpstr(gp.waitreason)
+	dumpstr(gp.waitreason.String())
 	dumpint(0)
 	dumpint(uint64(uintptr(unsafe.Pointer(gp.m))))
 	dumpint(uint64(uintptr(unsafe.Pointer(gp._defer))))
@@ -372,8 +371,26 @@ func dumpparams() {
 		dumpbool(true) // big-endian ptrs
 	}
 	dumpint(sys.PtrSize)
-	dumpint(uint64(mheap_.arena_start))
-	dumpint(uint64(mheap_.arena_used))
+	var arenaStart, arenaEnd uintptr
+	for i1 := range mheap_.arenas {
+		if mheap_.arenas[i1] == nil {
+			continue
+		}
+		for i, ha := range mheap_.arenas[i1] {
+			if ha == nil {
+				continue
+			}
+			base := arenaBase(arenaIdx(i1)<<arenaL1Shift | arenaIdx(i))
+			if arenaStart == 0 || base < arenaStart {
+				arenaStart = base
+			}
+			if base+heapArenaBytes > arenaEnd {
+				arenaEnd = base + heapArenaBytes
+			}
+		}
+	}
+	dumpint(uint64(arenaStart))
+	dumpint(uint64(arenaEnd))
 	dumpstr(sys.GOARCH)
 	dumpstr(sys.Goexperiment)
 	dumpint(uint64(ncpu))
@@ -509,7 +526,7 @@ func mdump() {
 func writeheapdump_m(fd uintptr) {
 	_g_ := getg()
 	casgstatus(_g_.m.curg, _Grunning, _Gwaiting)
-	_g_.waitreason = "dumping heap"
+	_g_.waitreason = waitReasonDumpingHeap
 
 	// Update stats so we can dump them.
 	// As a side effect, flushes all the MCaches so the MSpan.freelist
diff --git a/libgo/go/runtime/iface.go b/libgo/go/runtime/iface.go
index 62d47ce..8ed67c1 100644
--- a/libgo/go/runtime/iface.go
+++ b/libgo/go/runtime/iface.go
@@ -94,7 +94,7 @@ func getitab(lhs, rhs *_type, canfail bool) unsafe.Pointer {
 		if canfail {
 			return nil
 		}
-		panic(&TypeAssertionError{"", *rhs.string, *lhs.string, *lhsi.methods[0].name})
+		panic(&TypeAssertionError{nil, rhs, lhs, *lhsi.methods[0].name})
 	}
 
 	methods := make([]unsafe.Pointer, len(lhsi.methods)+1)
@@ -110,7 +110,7 @@ func getitab(lhs, rhs *_type, canfail bool) unsafe.Pointer {
 				if canfail {
 					return nil
 				}
-				panic(&TypeAssertionError{"", *rhs.string, *lhs.string, *lhsMethod.name})
+				panic(&TypeAssertionError{nil, rhs, lhs, *lhsMethod.name})
 			}
 
 			rhsMethod = &rhs.methods[ri]
@@ -126,7 +126,7 @@ func getitab(lhs, rhs *_type, canfail bool) unsafe.Pointer {
 			if canfail {
 				return nil
 			}
-			panic(&TypeAssertionError{"", *rhs.string, *lhs.string, *lhsMethod.name})
+			panic(&TypeAssertionError{nil, rhs, lhs, *lhsMethod.name})
 		}
 
 		methods[li+1] = unsafe.Pointer(rhsMethod.tfn)
@@ -147,7 +147,7 @@ func requireitab(lhs, rhs *_type) unsafe.Pointer {
 // impossible or if the rhs type is nil.
 func assertitab(lhs, rhs *_type) unsafe.Pointer {
 	if rhs == nil {
-		panic(&TypeAssertionError{"", "", *lhs.string, ""})
+		panic(&TypeAssertionError{nil, nil, lhs, ""})
 	}
 
 	if lhs.kind&kindMask != kindInterface {
@@ -167,10 +167,10 @@ func assertitab(lhs, rhs *_type) unsafe.Pointer {
 // type, panicing if not.
 func assertI2T(lhs, rhs, inter *_type) {
 	if rhs == nil {
-		panic(&TypeAssertionError{"", "", *lhs.string, ""})
+		panic(&TypeAssertionError{nil, nil, lhs, ""})
 	}
 	if !eqtype(lhs, rhs) {
-		panic(&TypeAssertionError{*inter.string, *rhs.string, *lhs.string, ""})
+		panic(&TypeAssertionError{inter, rhs, lhs, ""})
 	}
 }
 
@@ -327,8 +327,44 @@ func ifaceT2Ip(to, from *_type) bool {
 func reflect_ifaceE2I(inter *interfacetype, e eface, dst *iface) {
 	t := e._type
 	if t == nil {
-		panic(TypeAssertionError{"", "", *inter.typ.string, ""})
+		panic(TypeAssertionError{nil, nil, &inter.typ, ""})
 	}
 	dst.tab = requireitab((*_type)(unsafe.Pointer(inter)), t)
 	dst.data = e.data
 }
+
+// staticbytes is used to avoid convT2E for byte-sized values.
+var staticbytes = [...]byte{
+	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+	0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+	0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+	0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
+	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+	0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
+	0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+	0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
+	0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
+	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
+	0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+	0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
+	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+	0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+	0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
+	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
+	0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
+	0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
+	0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+	0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
+	0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
+	0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
+	0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
+	0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
+	0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
+}
diff --git a/libgo/go/runtime/internal/atomic/atomic_test.go b/libgo/go/runtime/internal/atomic/atomic_test.go
index b697aa8..25ece43 100644
--- a/libgo/go/runtime/internal/atomic/atomic_test.go
+++ b/libgo/go/runtime/internal/atomic/atomic_test.go
@@ -93,8 +93,10 @@ func TestUnaligned64(t *testing.T) {
 	}
 
 	x := make([]uint32, 4)
-	up64 := (*uint64)(unsafe.Pointer(&x[1])) // misaligned
-	p64 := (*int64)(unsafe.Pointer(&x[1]))   // misaligned
+	u := unsafe.Pointer(uintptr(unsafe.Pointer(&x[0])) | 4) // force alignment to 4
+
+	up64 := (*uint64)(u) // misaligned
+	p64 := (*int64)(u)   // misaligned
 
 	shouldPanic(t, "Load64", func() { atomic.Load64(up64) })
 	shouldPanic(t, "Loadint64", func() { atomic.Loadint64(p64) })
diff --git a/libgo/go/runtime/internal/atomic/bench_test.go b/libgo/go/runtime/internal/atomic/bench_test.go
index 47010e3..083a75c 100644
--- a/libgo/go/runtime/internal/atomic/bench_test.go
+++ b/libgo/go/runtime/internal/atomic/bench_test.go
@@ -26,3 +26,39 @@ func BenchmarkAtomicStore64(b *testing.B) {
 		atomic.Store64(&x, 0)
 	}
 }
+
+func BenchmarkAtomicLoad(b *testing.B) {
+	var x uint32
+	sink = &x
+	for i := 0; i < b.N; i++ {
+		_ = atomic.Load(&x)
+	}
+}
+
+func BenchmarkAtomicStore(b *testing.B) {
+	var x uint32
+	sink = &x
+	for i := 0; i < b.N; i++ {
+		atomic.Store(&x, 0)
+	}
+}
+
+func BenchmarkXadd(b *testing.B) {
+	var x uint32
+	ptr := &x
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			atomic.Xadd(ptr, 1)
+		}
+	})
+}
+
+func BenchmarkXadd64(b *testing.B) {
+	var x uint64
+	ptr := &x
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			atomic.Xadd64(ptr, 1)
+		}
+	})
+}
diff --git a/libgo/go/runtime/internal/atomic/stubs.go b/libgo/go/runtime/internal/atomic/stubs.go
index 497b980..62e30d1 100644
--- a/libgo/go/runtime/internal/atomic/stubs.go
+++ b/libgo/go/runtime/internal/atomic/stubs.go
@@ -2,6 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+// +build !wasm
+
 package atomic
 
 import "unsafe"
diff --git a/libgo/go/runtime/internal/sys/intrinsics.go b/libgo/go/runtime/internal/sys/intrinsics.go
index 2928280..6906938 100644
--- a/libgo/go/runtime/internal/sys/intrinsics.go
+++ b/libgo/go/runtime/internal/sys/intrinsics.go
@@ -32,6 +32,30 @@ func Ctz32(x uint32) int {
 	return int(builtinCtz32(x))
 }
 
+// Ctz8 returns the number of trailing zero bits in x; the result is 8 for x == 0.
+func Ctz8(x uint8) int {
+	return int(ntz8tab[x])
+}
+
+var ntz8tab = [256]uint8{
+	0x08, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+	0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+	0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+	0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+	0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+	0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+	0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+	0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+	0x07, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+	0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+	0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+	0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+	0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+	0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+	0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+	0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+}
+
 //extern __builtin_bswap64
 func bswap64(uint64) uint64
 
diff --git a/libgo/go/runtime/lfstack.go b/libgo/go/runtime/lfstack.go
index 4787c5b..406561a 100644
--- a/libgo/go/runtime/lfstack.go
+++ b/libgo/go/runtime/lfstack.go
@@ -55,3 +55,13 @@ func (head *lfstack) pop() unsafe.Pointer {
 func (head *lfstack) empty() bool {
 	return atomic.Load64((*uint64)(head)) == 0
 }
+
+// lfnodeValidate panics if node is not a valid address for use with
+// lfstack.push. This only needs to be called when node is allocated.
+func lfnodeValidate(node *lfnode) {
+	if lfstackUnpack(lfstackPack(node, ^uintptr(0))) != node {
+		printlock()
+		println("runtime: bad lfnode address", hex(uintptr(unsafe.Pointer(node))))
+		throw("bad lfnode address")
+	}
+}
diff --git a/libgo/go/runtime/lfstack_64bit.go b/libgo/go/runtime/lfstack_64bit.go
index dca1718..401f83d 100644
--- a/libgo/go/runtime/lfstack_64bit.go
+++ b/libgo/go/runtime/lfstack_64bit.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build amd64 arm64 mips64 mips64le ppc64 ppc64le s390x arm64be alpha sparc64 ia64 riscv64
+// +build amd64 arm64 mips64 mips64le ppc64 ppc64le s390x wasm arm64be alpha sparc64 ia64 riscv64
 
 package runtime
 
@@ -11,21 +11,17 @@ import "unsafe"
 const (
 	// addrBits is the number of bits needed to represent a virtual address.
 	//
-	// In Linux the user address space for each architecture is limited as
-	// follows (taken from the processor.h file for the architecture):
+	// See heapAddrBits for a table of address space sizes on
+	// various architectures. 48 bits is enough for all
+	// architectures except s390x.
 	//
-	// Architecture  Name              Maximum Value (exclusive)
-	// ---------------------------------------------------------------------
-	// arm64         TASK_SIZE_64      Depends on configuration.
-	// ppc64{,le}    TASK_SIZE_USER64  0x400000000000UL (46 bit addresses)
-	// mips64{,le}   TASK_SIZE64       0x010000000000UL (40 bit addresses)
-	// s390x         TASK_SIZE         0x020000000000UL (41 bit addresses)
-	//
-	// These values may increase over time.
-	//
-	// On AMD64, virtual addresses are 48-bit numbers sign extended to 64.
+	// On AMD64, virtual addresses are 48-bit (or 57-bit) numbers sign extended to 64.
 	// We shift the address left 16 to eliminate the sign extended part and make
 	// room in the bottom for the count.
+	//
+	// On s390x, virtual addresses are 64-bit. There's not much we
+	// can do about this, so we just hope that the kernel doesn't
+	// get to really high addresses and panic if it does.
 	addrBits = 48
 
 	// In addition to the 16 bits taken from the top, we can take 3 from the
diff --git a/libgo/go/runtime/lock_futex.go b/libgo/go/runtime/lock_futex.go
index b2c9ccb..f7ca1f0 100644
--- a/libgo/go/runtime/lock_futex.go
+++ b/libgo/go/runtime/lock_futex.go
@@ -241,3 +241,9 @@ func notetsleepg(n *note, ns int64) bool {
 	exitsyscall()
 	return ok
 }
+
+func pauseSchedulerUntilCallback() bool {
+	return false
+}
+
+func checkTimeouts() {}
diff --git a/libgo/go/runtime/lock_js.go b/libgo/go/runtime/lock_js.go
new file mode 100644
index 0000000..df321e5
--- /dev/null
+++ b/libgo/go/runtime/lock_js.go
@@ -0,0 +1,172 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build js,wasm
+
+package runtime
+
+import (
+	_ "unsafe"
+)
+
+// js/wasm has no support for threads yet. There is no preemption.
+// Waiting for a mutex is implemented by allowing other goroutines
+// to run until the mutex gets unlocked.
+
+const (
+	mutex_unlocked = 0
+	mutex_locked   = 1
+
+	note_cleared = 0
+	note_woken   = 1
+	note_timeout = 2
+
+	active_spin     = 4
+	active_spin_cnt = 30
+	passive_spin    = 1
+)
+
+func lock(l *mutex) {
+	for l.key == mutex_locked {
+		mcall(gosched_m)
+	}
+	l.key = mutex_locked
+}
+
+func unlock(l *mutex) {
+	if l.key == mutex_unlocked {
+		throw("unlock of unlocked lock")
+	}
+	l.key = mutex_unlocked
+}
+
+// One-time notifications.
+
+type noteWithTimeout struct {
+	gp       *g
+	deadline int64
+}
+
+var (
+	notes            = make(map[*note]*g)
+	notesWithTimeout = make(map[*note]noteWithTimeout)
+)
+
+func noteclear(n *note) {
+	n.key = note_cleared
+}
+
+func notewakeup(n *note) {
+	// gp := getg()
+	if n.key == note_woken {
+		throw("notewakeup - double wakeup")
+	}
+	cleared := n.key == note_cleared
+	n.key = note_woken
+	if cleared {
+		goready(notes[n], 1)
+	}
+}
+
+func notesleep(n *note) {
+	throw("notesleep not supported by js")
+}
+
+func notetsleep(n *note, ns int64) bool {
+	throw("notetsleep not supported by js")
+	return false
+}
+
+// same as runtime·notetsleep, but called on user g (not g0)
+func notetsleepg(n *note, ns int64) bool {
+	gp := getg()
+	if gp == gp.m.g0 {
+		throw("notetsleepg on g0")
+	}
+
+	if ns >= 0 {
+		deadline := nanotime() + ns
+		delay := ns/1000000 + 1 // round up
+		if delay > 1<<31-1 {
+			delay = 1<<31 - 1 // cap to max int32
+		}
+
+		id := scheduleCallback(delay)
+		mp := acquirem()
+		notes[n] = gp
+		notesWithTimeout[n] = noteWithTimeout{gp: gp, deadline: deadline}
+		releasem(mp)
+
+		gopark(nil, nil, waitReasonSleep, traceEvNone, 1)
+
+		clearScheduledCallback(id) // note might have woken early, clear timeout
+		mp = acquirem()
+		delete(notes, n)
+		delete(notesWithTimeout, n)
+		releasem(mp)
+
+		return n.key == note_woken
+	}
+
+	for n.key != note_woken {
+		mp := acquirem()
+		notes[n] = gp
+		releasem(mp)
+
+		gopark(nil, nil, waitReasonZero, traceEvNone, 1)
+
+		mp = acquirem()
+		delete(notes, n)
+		releasem(mp)
+	}
+	return true
+}
+
+// checkTimeouts resumes goroutines that are waiting on a note which has reached its deadline.
+func checkTimeouts() {
+	now := nanotime()
+	for n, nt := range notesWithTimeout {
+		if n.key == note_cleared && now > nt.deadline {
+			n.key = note_timeout
+			goready(nt.gp, 1)
+		}
+	}
+}
+
+var waitingForCallback *g
+
+// sleepUntilCallback puts the current goroutine to sleep until a callback is triggered.
+// It is currently only used by the callback routine of the syscall/js package.
+//go:linkname sleepUntilCallback syscall/js.sleepUntilCallback
+func sleepUntilCallback() {
+	waitingForCallback = getg()
+	gopark(nil, nil, waitReasonZero, traceEvNone, 1)
+	waitingForCallback = nil
+}
+
+// pauseSchedulerUntilCallback gets called from the scheduler and pauses the execution
+// of Go's WebAssembly code until a callback is triggered. Then it checks for note timeouts
+// and resumes goroutines that are waiting for a callback.
+func pauseSchedulerUntilCallback() bool {
+	if waitingForCallback == nil && len(notesWithTimeout) == 0 {
+		return false
+	}
+
+	pause()
+	checkTimeouts()
+	if waitingForCallback != nil {
+		goready(waitingForCallback, 1)
+	}
+	return true
+}
+
+// pause pauses the execution of Go's WebAssembly code until a callback is triggered.
+func pause()
+
+// scheduleCallback tells the WebAssembly environment to trigger a callback after ms milliseconds.
+// It returns a timer id that can be used with clearScheduledCallback.
+func scheduleCallback(ms int64) int32
+
+// clearScheduledCallback clears a callback scheduled by scheduleCallback.
+func clearScheduledCallback(id int32)
diff --git a/libgo/go/runtime/lock_sema.go b/libgo/go/runtime/lock_sema.go
index b5cce6a..237513c 100644
--- a/libgo/go/runtime/lock_sema.go
+++ b/libgo/go/runtime/lock_sema.go
@@ -294,3 +294,9 @@ func notetsleepg(n *note, ns int64) bool {
 	exitsyscall()
 	return ok
 }
+
+func pauseSchedulerUntilCallback() bool {
+	return false
+}
+
+func checkTimeouts() {}
diff --git a/libgo/go/runtime/malloc.go b/libgo/go/runtime/malloc.go
index 523989e..ac4759f 100644
--- a/libgo/go/runtime/malloc.go
+++ b/libgo/go/runtime/malloc.go
@@ -78,9 +78,34 @@
 //
 //	3. We don't zero pages that never get reused.
 
+// Virtual memory layout
+//
+// The heap consists of a set of arenas, which are 64MB on 64-bit and
+// 4MB on 32-bit (heapArenaBytes). Each arena's start address is also
+// aligned to the arena size.
+//
+// Each arena has an associated heapArena object that stores the
+// metadata for that arena: the heap bitmap for all words in the arena
+// and the span map for all pages in the arena. heapArena objects are
+// themselves allocated off-heap.
+//
+// Since arenas are aligned, the address space can be viewed as a
+// series of arena frames. The arena map (mheap_.arenas) maps from
+// arena frame number to *heapArena, or nil for parts of the address
+// space not backed by the Go heap. The arena map is structured as a
+// two-level array consisting of a "L1" arena map and many "L2" arena
+// maps; however, since arenas are large, on many architectures, the
+// arena map consists of a single, large L2 map.
+//
+// The arena map covers the entire possible address space, allowing
+// the Go heap to use any part of the address space. The allocator
+// attempts to keep arenas contiguous so that large spans (and hence
+// large objects) can cross arenas.
+
 package runtime
 
 import (
+	"runtime/internal/atomic"
 	"runtime/internal/sys"
 	"unsafe"
 )
@@ -124,9 +149,8 @@ const (
 	_TinySize      = 16
 	_TinySizeClass = int8(2)
 
-	_FixAllocChunk  = 16 << 10               // Chunk size for FixAlloc
-	_MaxMHeapList   = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap.
-	_HeapAllocChunk = 1 << 20                // Chunk size for heap growth
+	_FixAllocChunk = 16 << 10               // Chunk size for FixAlloc
+	_MaxMHeapList  = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap.
 
 	// Per-P, per order stack segment cache size.
 	_StackCacheSize = 32 * 1024
@@ -145,25 +169,144 @@ const (
 	//   plan9            | 4KB        | 3
 	_NumStackOrders = 4 - sys.PtrSize/4*sys.GoosWindows - 1*sys.GoosPlan9
 
-	// Number of bits in page to span calculations (4k pages).
-	// On Windows 64-bit we limit the arena to 32GB or 35 bits.
-	// Windows counts memory used by page table into committed memory
-	// of the process, so we can't reserve too much memory.
-	// See https://golang.org/issue/5402 and https://golang.org/issue/5236.
-	// On other 64-bit platforms, we limit the arena to 512GB, or 39 bits.
-	// On 32-bit, we don't bother limiting anything, so we use the full 32-bit address.
-	// The only exception is mips32 which only has access to low 2GB of virtual memory.
-	// On Darwin/arm64, we cannot reserve more than ~5GB of virtual memory,
-	// but as most devices have less than 4GB of physical memory anyway, we
-	// try to be conservative here, and only ask for a 2GB heap.
-	_MHeapMap_TotalBits = (_64bit*sys.GoosWindows)*35 + (_64bit*(1-sys.GoosWindows)*(1-sys.GoosDarwin*sys.GoarchArm64))*39 + sys.GoosDarwin*sys.GoarchArm64*31 + (1-_64bit)*(32-(sys.GoarchMips+sys.GoarchMipsle))
-	_MHeapMap_Bits      = _MHeapMap_TotalBits - _PageShift
-
-	// _MaxMem is the maximum heap arena size minus 1.
+	// heapAddrBits is the number of bits in a heap address. On
+	// amd64, addresses are sign-extended beyond heapAddrBits. On
+	// other arches, they are zero-extended.
+	//
+	// On 64-bit platforms, we limit this to 48 bits based on a
+	// combination of hardware and OS limitations.
+	//
+	// amd64 hardware limits addresses to 48 bits, sign-extended
+	// to 64 bits. Addresses where the top 16 bits are not either
+	// all 0 or all 1 are "non-canonical" and invalid. Because of
+	// these "negative" addresses, we offset addresses by 1<<47
+	// (arenaBaseOffset) on amd64 before computing indexes into
+	// the heap arenas index. In 2017, amd64 hardware added
+	// support for 57 bit addresses; however, currently only Linux
+	// supports this extension and the kernel will never choose an
+	// address above 1<<47 unless mmap is called with a hint
+	// address above 1<<47 (which we never do).
+	//
+	// arm64 hardware (as of ARMv8) limits user addresses to 48
+	// bits, in the range [0, 1<<48).
+	//
+	// ppc64, mips64, and s390x support arbitrary 64 bit addresses
+	// in hardware. However, since Go only supports Linux on
+	// these, we lean on OS limits. Based on Linux's processor.h,
+	// the user address space is limited as follows on 64-bit
+	// architectures:
+	//
+	// Architecture  Name              Maximum Value (exclusive)
+	// ---------------------------------------------------------------------
+	// amd64         TASK_SIZE_MAX     0x007ffffffff000 (47 bit addresses)
+	// arm64         TASK_SIZE_64      0x01000000000000 (48 bit addresses)
+	// ppc64{,le}    TASK_SIZE_USER64  0x00400000000000 (46 bit addresses)
+	// mips64{,le}   TASK_SIZE64       0x00010000000000 (40 bit addresses)
+	// s390x         TASK_SIZE         1<<64 (64 bit addresses)
+	//
+	// These limits may increase over time, but are currently at
+	// most 48 bits except on s390x. On all architectures, Linux
+	// starts placing mmap'd regions at addresses that are
+	// significantly below 48 bits, so even if it's possible to
+	// exceed Go's 48 bit limit, it's extremely unlikely in
+	// practice.
+	//
+	// On 32-bit platforms, we accept the full 32-bit address
+	// space because doing so is cheap.
+	// mips32 only has access to the low 2GB of virtual memory, so
+	// we further limit it to 31 bits.
+	//
+	// WebAssembly currently has a limit of 4GB linear memory.
+	heapAddrBits = (_64bit*(1-sys.GoarchWasm))*48 + (1-_64bit+sys.GoarchWasm)*(32-(sys.GoarchMips+sys.GoarchMipsle))
+
+	// maxAlloc is the maximum size of an allocation. On 64-bit,
+	// it's theoretically possible to allocate 1<<heapAddrBits bytes. On
+	// 32-bit, however, this is one less than 1<<32 because the
+	// number of bytes in the address space doesn't actually fit
+	// in a uintptr.
+	maxAlloc = (1 << heapAddrBits) - (1-_64bit)*1
+
+	// The number of bits in a heap address, the size of heap
+	// arenas, and the L1 and L2 arena map sizes are related by
 	//
-	// On 32-bit, this is also the maximum heap pointer value,
-	// since the arena starts at address 0.
-	_MaxMem = 1<<_MHeapMap_TotalBits - 1
+	//   (1 << addrBits) = arenaBytes * L1entries * L2entries
+	//
+	// Currently, we balance these as follows:
+	//
+	//       Platform  Addr bits  Arena size  L1 entries  L2 size
+	// --------------  ---------  ----------  ----------  -------
+	//       */64-bit         48        64MB           1     32MB
+	// windows/64-bit         48         4MB          64      8MB
+	//       */32-bit         32         4MB           1      4KB
+	//     */mips(le)         31         4MB           1      2KB
+
+	// heapArenaBytes is the size of a heap arena. The heap
+	// consists of mappings of size heapArenaBytes, aligned to
+	// heapArenaBytes. The initial heap mapping is one arena.
+	//
+	// This is currently 64MB on 64-bit non-Windows and 4MB on
+	// 32-bit and on Windows. We use smaller arenas on Windows
+	// because all committed memory is charged to the process,
+	// even if it's not touched. Hence, for processes with small
+	// heaps, the mapped arena space needs to be commensurate.
+	// This is particularly important with the race detector,
+	// since it significantly amplifies the cost of committed
+	// memory.
+	heapArenaBytes = 1 << logHeapArenaBytes
+
+	// logHeapArenaBytes is log_2 of heapArenaBytes. For clarity,
+	// prefer using heapArenaBytes where possible (we need the
+	// constant to compute some other constants).
+	logHeapArenaBytes = (6+20)*(_64bit*(1-sys.GoosWindows)) + (2+20)*(_64bit*sys.GoosWindows) + (2+20)*(1-_64bit)
+
+	// heapArenaBitmapBytes is the size of each heap arena's bitmap.
+	heapArenaBitmapBytes = heapArenaBytes / (sys.PtrSize * 8 / 2)
+
+	pagesPerArena = heapArenaBytes / pageSize
+
+	// arenaL1Bits is the number of bits of the arena number
+	// covered by the first level arena map.
+	//
+	// This number should be small, since the first level arena
+	// map requires PtrSize*(1<<arenaL1Bits) of space in the
+	// binary's BSS. It can be zero, in which case the first level
+	// index is effectively unused. There is a performance benefit
+	// to this, since the generated code can be more efficient,
+	// but comes at the cost of having a large L2 mapping.
+	//
+	// We use the L1 map on 64-bit Windows because the arena size
+	// is small, but the address space is still 48 bits, and
+	// there's a high cost to having a large L2.
+	arenaL1Bits = 6 * (_64bit * sys.GoosWindows)
+
+	// arenaL2Bits is the number of bits of the arena number
+	// covered by the second level arena index.
+	//
+	// The size of each arena map allocation is proportional to
+	// 1<<arenaL2Bits, so it's important that this not be too
+	// large. 48 bits leads to 32MB arena index allocations, which
+	// is about the practical threshold.
+	arenaL2Bits = heapAddrBits - logHeapArenaBytes - arenaL1Bits
+
+	// arenaL1Shift is the number of bits to shift an arena frame
+	// number by to compute an index into the first level arena map.
+	arenaL1Shift = arenaL2Bits
+
+	// arenaBits is the total bits in a combined arena map index.
+	// This is split between the index into the L1 arena map and
+	// the L2 arena map.
+	arenaBits = arenaL1Bits + arenaL2Bits
+
+	// arenaBaseOffset is the pointer value that corresponds to
+	// index 0 in the heap arena map.
+	//
+	// On amd64, the address space is 48 bits, sign extended to 64
+	// bits. This offset lets us handle "negative" addresses (or
+	// high addresses if viewed as unsigned).
+	//
+	// On other platforms, the user address space is contiguous
+	// and starts at 0, so no offset is necessary.
+	arenaBaseOffset uintptr = sys.GoarchAmd64 * (1 << 47)
 
 	// Max number of threads to run garbage collection.
 	// 2, 3, and 4 are all plausible maximums depending
@@ -209,18 +352,12 @@ var physPageSize uintptr
 // SysReserve reserves address space without allocating memory.
 // If the pointer passed to it is non-nil, the caller wants the
 // reservation there, but SysReserve can still choose another
-// location if that one is unavailable. On some systems and in some
-// cases SysReserve will simply check that the address space is
-// available and not actually reserve it. If SysReserve returns
-// non-nil, it sets *reserved to true if the address space is
-// reserved, false if it has merely been checked.
+// location if that one is unavailable.
 // NOTE: SysReserve returns OS-aligned memory, but the heap allocator
 // may use larger alignment, so the caller must be careful to realign the
 // memory obtained by sysAlloc.
 //
 // SysMap maps previously reserved address space for use.
-// The reserved argument is true if the address space was really
-// reserved, not merely checked.
 //
 // SysFault marks a (already sysAlloc'd) region to fault
 // if accessed. Used only for debugging the runtime.
@@ -233,6 +370,12 @@ func mallocinit() {
 	// Not used for gccgo.
 	// testdefersizes()
 
+	if heapArenaBitmapBytes&(heapArenaBitmapBytes-1) != 0 {
+		// heapBits expects modular arithmetic on bitmap
+		// addresses to work.
+		throw("heapArenaBitmapBytes not a power of 2")
+	}
+
 	// Copy class sizes out for statistics table.
 	for i := range class_to_size {
 		memstats.by_size[i].size = uint32(class_to_size[i])
@@ -252,55 +395,47 @@ func mallocinit() {
 		throw("bad system page size")
 	}
 
-	// The auxiliary regions start at p and are laid out in the
-	// following order: spans, bitmap, arena.
-	var p, pSize uintptr
-	var reserved bool
-
-	// The spans array holds one *mspan per _PageSize of arena.
-	var spansSize uintptr = (_MaxMem + 1) / _PageSize * sys.PtrSize
-	spansSize = round(spansSize, _PageSize)
-	// The bitmap holds 2 bits per word of arena.
-	var bitmapSize uintptr = (_MaxMem + 1) / (sys.PtrSize * 8 / 2)
-	bitmapSize = round(bitmapSize, _PageSize)
-
-	// Set up the allocation arena, a contiguous area of memory where
-	// allocated data will be found.
-	if sys.PtrSize == 8 {
-		// On a 64-bit machine, allocate from a single contiguous reservation.
-		// 512 GB (MaxMem) should be big enough for now.
+	// Initialize the heap.
+	mheap_.init()
+	_g_ := getg()
+	_g_.m.mcache = allocmcache()
+
+	// Create initial arena growth hints.
+	if sys.PtrSize == 8 && GOARCH != "wasm" {
+		// On a 64-bit machine, we pick the following hints
+		// because:
+		//
+		// 1. Starting from the middle of the address space
+		// makes it easier to grow out a contiguous range
+		// without running in to some other mapping.
 		//
-		// The code will work with the reservation at any address, but ask
-		// SysReserve to use 0x0000XXc000000000 if possible (XX=00...7f).
-		// Allocating a 512 GB region takes away 39 bits, and the amd64
-		// doesn't let us choose the top 17 bits, so that leaves the 9 bits
-		// in the middle of 0x00c0 for us to choose. Choosing 0x00c0 means
-		// that the valid memory addresses will begin 0x00c0, 0x00c1, ..., 0x00df.
-		// In little-endian, that's c0 00, c1 00, ..., df 00. None of those are valid
+		// 2. This makes Go heap addresses more easily
+		// recognizable when debugging.
+		//
+		// 3. Stack scanning in gccgo is still conservative,
+		// so it's important that addresses be distinguishable
+		// from other data.
+		//
+		// Starting at 0x00c0 means that the valid memory addresses
+		// will begin 0x00c0, 0x00c1, ...
+		// In little-endian, that's c0 00, c1 00, ... None of those are valid
 		// UTF-8 sequences, and they are otherwise as far away from
 		// ff (likely a common byte) as possible. If that fails, we try other 0xXXc0
 		// addresses. An earlier attempt to use 0x11f8 caused out of memory errors
 		// on OS X during thread allocations.  0x00c0 causes conflicts with
 		// AddressSanitizer which reserves all memory up to 0x0100.
-		// These choices are both for debuggability and to reduce the
-		// odds of a conservative garbage collector (as is still used in gccgo)
+		// These choices reduce the odds of a conservative garbage collector
 		// not collecting memory because some non-pointer block of memory
 		// had a bit pattern that matched a memory address.
 		//
-		// Actually we reserve 544 GB (because the bitmap ends up being 32 GB)
-		// but it hardly matters: e0 00 is not valid UTF-8 either.
-		//
-		// If this fails we fall back to the 32 bit memory mechanism
-		//
 		// However, on arm64, we ignore all this advice above and slam the
 		// allocation at 0x40 << 32 because when using 4k pages with 3-level
 		// translation buffers, the user address space is limited to 39 bits
 		// On darwin/arm64, the address space is even smaller.
 		// On AIX, mmap adresses range starts at 0x0700000000000000 for 64-bit
 		// processes. The new address space allocator starts at 0x0A00000000000000.
-		arenaSize := round(_MaxMem, _PageSize)
-		pSize = bitmapSize + spansSize + arenaSize + _PageSize
-		for i := 0; i <= 0x7f; i++ {
+		for i := 0x7f; i >= 0; i-- {
+			var p uintptr
 			switch {
 			case GOARCH == "arm64" && GOOS == "darwin":
 				p = uintptr(i)<<40 | uintptrMask&(0x0013<<28)
@@ -312,225 +447,283 @@ func mallocinit() {
 				} else {
 					p = uintptr(i)<<42 | uintptrMask&(0x70<<52)
 				}
+			case raceenabled:
+				// The TSAN runtime requires the heap
+				// to be in the range [0x00c000000000,
+				// 0x00e000000000).
+				p = uintptr(i)<<32 | uintptrMask&(0x00c0<<32)
+				if p >= uintptrMask&0x00e000000000 {
+					continue
+				}
 			default:
 				p = uintptr(i)<<40 | uintptrMask&(0x00c0<<32)
 			}
-			p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved))
-			if p != 0 {
-				break
-			}
+			hint := (*arenaHint)(mheap_.arenaHintAlloc.alloc())
+			hint.addr = p
+			hint.next, mheap_.arenaHints = mheap_.arenaHints, hint
 		}
-	}
+	} else {
+		// On a 32-bit machine, we're much more concerned
+		// about keeping the usable heap contiguous.
+		// Hence:
+		//
+		// 1. We reserve space for all heapArenas up front so
+		// they don't get interleaved with the heap. They're
+		// ~258MB, so this isn't too bad. (We could reserve a
+		// smaller amount of space up front if this is a
+		// problem.)
+		//
+		// 2. We hint the heap to start right above the end of
+		// the binary so we have the best chance of keeping it
+		// contiguous.
+		//
+		// 3. We try to stake out a reasonably large initial
+		// heap reservation.
 
-	if p == 0 {
-		// On a 32-bit machine, we can't typically get away
-		// with a giant virtual address space reservation.
-		// Instead we map the memory information bitmap
-		// immediately after the data segment, large enough
-		// to handle the entire 4GB address space (256 MB),
-		// along with a reservation for an initial arena.
-		// When that gets used up, we'll start asking the kernel
-		// for any memory anywhere.
+		const arenaMetaSize = unsafe.Sizeof([1 << arenaBits]heapArena{})
+		meta := uintptr(sysReserve(nil, arenaMetaSize))
+		if meta != 0 {
+			mheap_.heapArenaAlloc.init(meta, arenaMetaSize)
+		}
 
 		// We want to start the arena low, but if we're linked
 		// against C code, it's possible global constructors
 		// have called malloc and adjusted the process' brk.
 		// Query the brk so we can avoid trying to map the
-		// arena over it (which will cause the kernel to put
-		// the arena somewhere else, likely at a high
+		// region over it (which will cause the kernel to put
+		// the region somewhere else, likely at a high
 		// address).
 		procBrk := sbrk0()
 
-		// If we fail to allocate, try again with a smaller arena.
-		// This is necessary on Android L where we share a process
-		// with ART, which reserves virtual memory aggressively.
-		// In the worst case, fall back to a 0-sized initial arena,
-		// in the hope that subsequent reservations will succeed.
+		// If we ask for the end of the data segment but the
+		// operating system requires a little more space
+		// before we can start allocating, it will give out a
+		// slightly higher pointer. Except QEMU, which is
+		// buggy, as usual: it won't adjust the pointer
+		// upward. So adjust it upward a little bit ourselves:
+		// 1/4 MB to get away from the running binary image.
+		p := getEnd()
+		if p < procBrk {
+			p = procBrk
+		}
+		if mheap_.heapArenaAlloc.next <= p && p < mheap_.heapArenaAlloc.end {
+			p = mheap_.heapArenaAlloc.end
+		}
+		p = round(p+(256<<10), heapArenaBytes)
+		// Because we're worried about fragmentation on
+		// 32-bit, we try to make a large initial reservation.
 		arenaSizes := [...]uintptr{
 			512 << 20,
 			256 << 20,
 			128 << 20,
-			0,
 		}
-
 		for _, arenaSize := range &arenaSizes {
-			// SysReserve treats the address we ask for, end, as a hint,
-			// not as an absolute requirement. If we ask for the end
-			// of the data segment but the operating system requires
-			// a little more space before we can start allocating, it will
-			// give out a slightly higher pointer. Except QEMU, which
-			// is buggy, as usual: it won't adjust the pointer upward.
-			// So adjust it upward a little bit ourselves: 1/4 MB to get
-			// away from the running binary image and then round up
-			// to a MB boundary.
-			p = round(getEnd()+(1<<18), 1<<20)
-			pSize = bitmapSize + spansSize + arenaSize + _PageSize
-			if p <= procBrk && procBrk < p+pSize {
-				// Move the start above the brk,
-				// leaving some room for future brk
-				// expansion.
-				p = round(procBrk+(1<<20), 1<<20)
-			}
-			p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved))
-			if p != 0 {
+			a, size := sysReserveAligned(unsafe.Pointer(p), arenaSize, heapArenaBytes)
+			if a != nil {
+				mheap_.arena.init(uintptr(a), size)
+				p = uintptr(a) + size // For hint below
 				break
 			}
 		}
-		if p == 0 {
-			throw("runtime: cannot reserve arena virtual address space")
-		}
-	}
-
-	// PageSize can be larger than OS definition of page size,
-	// so SysReserve can give us a PageSize-unaligned pointer.
-	// To overcome this we ask for PageSize more and round up the pointer.
-	p1 := round(p, _PageSize)
-	pSize -= p1 - p
-
-	spansStart := p1
-	p1 += spansSize
-	mheap_.bitmap = p1 + bitmapSize
-	p1 += bitmapSize
-	if sys.PtrSize == 4 {
-		// Set arena_start such that we can accept memory
-		// reservations located anywhere in the 4GB virtual space.
-		mheap_.arena_start = 0
-	} else {
-		mheap_.arena_start = p1
+		hint := (*arenaHint)(mheap_.arenaHintAlloc.alloc())
+		hint.addr = p
+		hint.next, mheap_.arenaHints = mheap_.arenaHints, hint
 	}
-	mheap_.arena_end = p + pSize
-	mheap_.arena_used = p1
-	mheap_.arena_alloc = p1
-	mheap_.arena_reserved = reserved
-
-	if mheap_.arena_start&(_PageSize-1) != 0 {
-		println("bad pagesize", hex(p), hex(p1), hex(spansSize), hex(bitmapSize), hex(_PageSize), "start", hex(mheap_.arena_start))
-		throw("misrounded allocation in mallocinit")
-	}
-
-	// Initialize the rest of the allocator.
-	mheap_.init(spansStart, spansSize)
-	_g_ := getg()
-	_g_.m.mcache = allocmcache()
 }
 
-// sysAlloc allocates the next n bytes from the heap arena. The
-// returned pointer is always _PageSize aligned and between
-// h.arena_start and h.arena_end. sysAlloc returns nil on failure.
+// sysAlloc allocates heap arena space for at least n bytes. The
+// returned pointer is always heapArenaBytes-aligned and backed by
+// h.arenas metadata. The returned size is always a multiple of
+// heapArenaBytes. sysAlloc returns nil on failure.
 // There is no corresponding free function.
-func (h *mheap) sysAlloc(n uintptr) unsafe.Pointer {
-	// strandLimit is the maximum number of bytes to strand from
-	// the current arena block. If we would need to strand more
-	// than this, we fall back to sysAlloc'ing just enough for
-	// this allocation.
-	const strandLimit = 16 << 20
-
-	if n > h.arena_end-h.arena_alloc {
-		// If we haven't grown the arena to _MaxMem yet, try
-		// to reserve some more address space.
-		p_size := round(n+_PageSize, 256<<20)
-		new_end := h.arena_end + p_size // Careful: can overflow
-		if h.arena_end <= new_end && new_end-h.arena_start-1 <= _MaxMem {
-			// TODO: It would be bad if part of the arena
-			// is reserved and part is not.
-			var reserved bool
-			p := uintptr(sysReserve(unsafe.Pointer(h.arena_end), p_size, &reserved))
-			if p == 0 {
-				// TODO: Try smaller reservation
-				// growths in case we're in a crowded
-				// 32-bit address space.
-				goto reservationFailed
-			}
-			// p can be just about anywhere in the address
-			// space, including before arena_end.
-			if p == h.arena_end {
-				// The new block is contiguous with
-				// the current block. Extend the
-				// current arena block.
-				h.arena_end = new_end
-				h.arena_reserved = reserved
-			} else if h.arena_start <= p && p+p_size-h.arena_start-1 <= _MaxMem && h.arena_end-h.arena_alloc < strandLimit {
-				// We were able to reserve more memory
-				// within the arena space, but it's
-				// not contiguous with our previous
-				// reservation. It could be before or
-				// after our current arena_used.
-				//
-				// Keep everything page-aligned.
-				// Our pages are bigger than hardware pages.
-				h.arena_end = p + p_size
-				p = round(p, _PageSize)
-				h.arena_alloc = p
-				h.arena_reserved = reserved
-			} else {
-				// We got a mapping, but either
-				//
-				// 1) It's not in the arena, so we
-				// can't use it. (This should never
-				// happen on 32-bit.)
-				//
-				// 2) We would need to discard too
-				// much of our current arena block to
-				// use it.
-				//
-				// We haven't added this allocation to
-				// the stats, so subtract it from a
-				// fake stat (but avoid underflow).
-				//
-				// We'll fall back to a small sysAlloc.
-				stat := uint64(p_size)
-				sysFree(unsafe.Pointer(p), p_size, &stat)
+//
+// h must be locked.
+func (h *mheap) sysAlloc(n uintptr) (v unsafe.Pointer, size uintptr) {
+	n = round(n, heapArenaBytes)
+
+	// First, try the arena pre-reservation.
+	v = h.arena.alloc(n, heapArenaBytes, &memstats.heap_sys)
+	if v != nil {
+		size = n
+		goto mapped
+	}
+
+	// Try to grow the heap at a hint address.
+	for h.arenaHints != nil {
+		hint := h.arenaHints
+		p := hint.addr
+		if hint.down {
+			p -= n
+		}
+		if p+n < p {
+			// We can't use this, so don't ask.
+			v = nil
+		} else if arenaIndex(p+n-1) >= 1<<arenaBits {
+			// Outside addressable heap. Can't use.
+			v = nil
+		} else {
+			v = sysReserve(unsafe.Pointer(p), n)
+		}
+		if p == uintptr(v) {
+			// Success. Update the hint.
+			if !hint.down {
+				p += n
 			}
+			hint.addr = p
+			size = n
+			break
 		}
+		// Failed. Discard this hint and try the next.
+		//
+		// TODO: This would be cleaner if sysReserve could be
+		// told to only return the requested address. In
+		// particular, this is already how Windows behaves, so
+		// it would simply things there.
+		if v != nil {
+			sysFree(v, n, nil)
+		}
+		h.arenaHints = hint.next
+		h.arenaHintAlloc.free(unsafe.Pointer(hint))
 	}
 
-	if n <= h.arena_end-h.arena_alloc {
-		// Keep taking from our reservation.
-		p := h.arena_alloc
-		sysMap(unsafe.Pointer(p), n, h.arena_reserved, &memstats.heap_sys)
-		h.arena_alloc += n
-		if h.arena_alloc > h.arena_used {
-			h.setArenaUsed(h.arena_alloc, true)
+	if size == 0 {
+		if raceenabled {
+			// The race detector assumes the heap lives in
+			// [0x00c000000000, 0x00e000000000), but we
+			// just ran out of hints in this region. Give
+			// a nice failure.
+			throw("too many address space collisions for -race mode")
 		}
 
-		if p&(_PageSize-1) != 0 {
-			throw("misrounded allocation in MHeap_SysAlloc")
+		// All of the hints failed, so we'll take any
+		// (sufficiently aligned) address the kernel will give
+		// us.
+		v, size = sysReserveAligned(nil, n, heapArenaBytes)
+		if v == nil {
+			return nil, 0
 		}
-		return unsafe.Pointer(p)
+
+		// Create new hints for extending this region.
+		hint := (*arenaHint)(h.arenaHintAlloc.alloc())
+		hint.addr, hint.down = uintptr(v), true
+		hint.next, mheap_.arenaHints = mheap_.arenaHints, hint
+		hint = (*arenaHint)(h.arenaHintAlloc.alloc())
+		hint.addr = uintptr(v) + size
+		hint.next, mheap_.arenaHints = mheap_.arenaHints, hint
 	}
 
-reservationFailed:
-	// If using 64-bit, our reservation is all we have.
-	if sys.PtrSize != 4 {
-		return nil
+	// Check for bad pointers or pointers we can't use.
+	{
+		var bad string
+		p := uintptr(v)
+		if p+size < p {
+			bad = "region exceeds uintptr range"
+		} else if arenaIndex(p) >= 1<<arenaBits {
+			bad = "base outside usable address space"
+		} else if arenaIndex(p+size-1) >= 1<<arenaBits {
+			bad = "end outside usable address space"
+		}
+		if bad != "" {
+			// This should be impossible on most architectures,
+			// but it would be really confusing to debug.
+			print("runtime: memory allocated by OS [", hex(p), ", ", hex(p+size), ") not in usable address space: ", bad, "\n")
+			throw("memory reservation exceeds address space limit")
+		}
 	}
 
-	// On 32-bit, once the reservation is gone we can
-	// try to get memory at a location chosen by the OS.
-	p_size := round(n, _PageSize) + _PageSize
-	p := uintptr(sysAlloc(p_size, &memstats.heap_sys))
-	if p == 0 {
-		return nil
+	if uintptr(v)&(heapArenaBytes-1) != 0 {
+		throw("misrounded allocation in sysAlloc")
 	}
 
-	if p < h.arena_start || p+p_size-h.arena_start > _MaxMem {
-		// This shouldn't be possible because _MaxMem is the
-		// whole address space on 32-bit.
-		top := uint64(h.arena_start) + _MaxMem
-		print("runtime: memory allocated by OS (", hex(p), ") not in usable range [", hex(h.arena_start), ",", hex(top), ")\n")
-		sysFree(unsafe.Pointer(p), p_size, &memstats.heap_sys)
-		return nil
+	// Back the reservation.
+	sysMap(v, size, &memstats.heap_sys)
+
+mapped:
+	// Create arena metadata.
+	for ri := arenaIndex(uintptr(v)); ri <= arenaIndex(uintptr(v)+size-1); ri++ {
+		l2 := h.arenas[ri.l1()]
+		if l2 == nil {
+			// Allocate an L2 arena map.
+			l2 = (*[1 << arenaL2Bits]*heapArena)(persistentalloc(unsafe.Sizeof(*l2), sys.PtrSize, nil))
+			if l2 == nil {
+				throw("out of memory allocating heap arena map")
+			}
+			atomic.StorepNoWB(unsafe.Pointer(&h.arenas[ri.l1()]), unsafe.Pointer(l2))
+		}
+
+		if l2[ri.l2()] != nil {
+			throw("arena already initialized")
+		}
+		var r *heapArena
+		r = (*heapArena)(h.heapArenaAlloc.alloc(unsafe.Sizeof(*r), sys.PtrSize, &memstats.gc_sys))
+		if r == nil {
+			r = (*heapArena)(persistentalloc(unsafe.Sizeof(*r), sys.PtrSize, &memstats.gc_sys))
+			if r == nil {
+				throw("out of memory allocating heap arena metadata")
+			}
+		}
+
+		// Store atomically just in case an object from the
+		// new heap arena becomes visible before the heap lock
+		// is released (which shouldn't happen, but there's
+		// little downside to this).
+		atomic.StorepNoWB(unsafe.Pointer(&l2[ri.l2()]), unsafe.Pointer(r))
 	}
 
-	p += -p & (_PageSize - 1)
-	if p+n > h.arena_used {
-		h.setArenaUsed(p+n, true)
+	// Tell the race detector about the new heap memory.
+	if raceenabled {
+		racemapshadow(v, size)
 	}
 
-	if p&(_PageSize-1) != 0 {
-		throw("misrounded allocation in MHeap_SysAlloc")
+	return
+}
+
+// sysReserveAligned is like sysReserve, but the returned pointer is
+// aligned to align bytes. It may reserve either n or n+align bytes,
+// so it returns the size that was reserved.
+func sysReserveAligned(v unsafe.Pointer, size, align uintptr) (unsafe.Pointer, uintptr) {
+	// Since the alignment is rather large in uses of this
+	// function, we're not likely to get it by chance, so we ask
+	// for a larger region and remove the parts we don't need.
+	retries := 0
+retry:
+	p := uintptr(sysReserve(v, size+align))
+	switch {
+	case p == 0:
+		return nil, 0
+	case p&(align-1) == 0:
+		// We got lucky and got an aligned region, so we can
+		// use the whole thing.
+		return unsafe.Pointer(p), size + align
+	case GOOS == "windows":
+		// On Windows we can't release pieces of a
+		// reservation, so we release the whole thing and
+		// re-reserve the aligned sub-region. This may race,
+		// so we may have to try again.
+		sysFree(unsafe.Pointer(p), size+align, nil)
+		p = round(p, align)
+		p2 := sysReserve(unsafe.Pointer(p), size)
+		if p != uintptr(p2) {
+			// Must have raced. Try again.
+			sysFree(p2, size, nil)
+			if retries++; retries == 100 {
+				throw("failed to allocate aligned heap memory; too many retries")
+			}
+			goto retry
+		}
+		// Success.
+		return p2, size
+	default:
+		// Trim off the unaligned parts.
+		pAligned := round(p, align)
+		sysFree(unsafe.Pointer(p), pAligned-p, nil)
+		end := pAligned + size
+		endLen := (p + size + align) - end
+		if endLen > 0 {
+			sysFree(unsafe.Pointer(end), endLen, nil)
+		}
+		return unsafe.Pointer(pAligned), size
 	}
-	return unsafe.Pointer(p)
 }
 
 // base address for all 0-byte allocations
@@ -862,7 +1055,7 @@ func largeAlloc(size uintptr, needzero bool, noscan bool) *mspan {
 		throw("out of memory")
 	}
 	s.limit = s.base() + size
-	heapBitsForSpan(s.base()).initSpan(s)
+	heapBitsForAddr(s.base()).initSpan(s)
 	return s
 }
 
@@ -875,7 +1068,7 @@ func newobject(typ *_type) unsafe.Pointer {
 
 //go:linkname reflect_unsafe_New reflect.unsafe_New
 func reflect_unsafe_New(typ *_type) unsafe.Pointer {
-	return newobject(typ)
+	return mallocgc(typ.size, typ, true)
 }
 
 // newarray allocates an array of n elements of type typ.
@@ -1046,6 +1239,34 @@ func persistentalloc1(size, align uintptr, sysStat *uint64) *notInHeap {
 	return p
 }
 
+// linearAlloc is a simple linear allocator that pre-reserves a region
+// of memory and then maps that region as needed. The caller is
+// responsible for locking.
+type linearAlloc struct {
+	next   uintptr // next free byte
+	mapped uintptr // one byte past end of mapped space
+	end    uintptr // end of reserved space
+}
+
+func (l *linearAlloc) init(base, size uintptr) {
+	l.next, l.mapped = base, base
+	l.end = base + size
+}
+
+func (l *linearAlloc) alloc(size, align uintptr, sysStat *uint64) unsafe.Pointer {
+	p := round(l.next, align)
+	if p+size > l.end {
+		return nil
+	}
+	l.next = p + size
+	if pEnd := round(l.next-1, physPageSize); pEnd > l.mapped {
+		// We need to map more of the reserved space.
+		sysMap(unsafe.Pointer(l.mapped), pEnd-l.mapped, sysStat)
+		l.mapped = pEnd
+	}
+	return unsafe.Pointer(p)
+}
+
 // notInHeap is off-heap memory allocated by a lower-level allocator
 // like sysAlloc or persistentAlloc.
 //
diff --git a/libgo/go/runtime/malloc_test.go b/libgo/go/runtime/malloc_test.go
index ab580f8..30a7d84 100644
--- a/libgo/go/runtime/malloc_test.go
+++ b/libgo/go/runtime/malloc_test.go
@@ -7,16 +7,25 @@ package runtime_test
 import (
 	"flag"
 	"fmt"
+	"internal/race"
+	"internal/testenv"
+	"os"
+	"os/exec"
 	"reflect"
 	. "runtime"
+	"strings"
 	"testing"
 	"time"
 	"unsafe"
 )
 
+var testMemStatsCount int
+
 func TestMemStats(t *testing.T) {
 	t.Skip("skipping test with gccgo")
 
+	testMemStatsCount++
+
 	// Make sure there's at least one forced GC.
 	GC()
 
@@ -32,6 +41,13 @@ func TestMemStats(t *testing.T) {
 	}
 	le := func(thresh float64) func(interface{}) error {
 		return func(x interface{}) error {
+			// These sanity tests aren't necessarily valid
+			// with high -test.count values, so only run
+			// them once.
+			if testMemStatsCount > 1 {
+				return nil
+			}
+
 			if reflect.ValueOf(x).Convert(reflect.TypeOf(thresh)).Float() < thresh {
 				return nil
 			}
@@ -50,7 +66,7 @@ func TestMemStats(t *testing.T) {
 	// PauseTotalNs can be 0 if timer resolution is poor.
 	fields := map[string][]func(interface{}) error{
 		"Alloc": {nz, le(1e10)}, "TotalAlloc": {nz, le(1e11)}, "Sys": {nz, le(1e10)},
-		"Lookups": {nz, le(1e10)}, "Mallocs": {nz, le(1e10)}, "Frees": {nz, le(1e10)},
+		"Lookups": {eq(uint64(0))}, "Mallocs": {nz, le(1e10)}, "Frees": {nz, le(1e10)},
 		"HeapAlloc": {nz, le(1e10)}, "HeapSys": {nz, le(1e10)}, "HeapIdle": {le(1e10)},
 		"HeapInuse": {nz, le(1e10)}, "HeapReleased": {le(1e10)}, "HeapObjects": {nz, le(1e10)},
 		"StackInuse": {nz, le(1e10)}, "StackSys": {nz, le(1e10)},
@@ -154,6 +170,64 @@ func TestTinyAlloc(t *testing.T) {
 	}
 }
 
+type acLink struct {
+	x [1 << 20]byte
+}
+
+var arenaCollisionSink []*acLink
+
+func TestArenaCollision(t *testing.T) {
+	testenv.MustHaveExec(t)
+
+	// Test that mheap.sysAlloc handles collisions with other
+	// memory mappings.
+	if os.Getenv("TEST_ARENA_COLLISION") != "1" {
+		cmd := testenv.CleanCmdEnv(exec.Command(os.Args[0], "-test.run=TestArenaCollision", "-test.v"))
+		cmd.Env = append(cmd.Env, "TEST_ARENA_COLLISION=1")
+		out, err := cmd.CombinedOutput()
+		if race.Enabled {
+			// This test runs the runtime out of hint
+			// addresses, so it will start mapping the
+			// heap wherever it can. The race detector
+			// doesn't support this, so look for the
+			// expected failure.
+			if want := "too many address space collisions"; !strings.Contains(string(out), want) {
+				t.Fatalf("want %q, got:\n%s", want, string(out))
+			}
+		} else if !strings.Contains(string(out), "PASS\n") || err != nil {
+			t.Fatalf("%s\n(exit status %v)", string(out), err)
+		}
+		return
+	}
+	disallowed := [][2]uintptr{}
+	// Drop all but the next 3 hints. 64-bit has a lot of hints,
+	// so it would take a lot of memory to go through all of them.
+	KeepNArenaHints(3)
+	// Consume these 3 hints and force the runtime to find some
+	// fallback hints.
+	for i := 0; i < 5; i++ {
+		// Reserve memory at the next hint so it can't be used
+		// for the heap.
+		start, end := MapNextArenaHint()
+		disallowed = append(disallowed, [2]uintptr{start, end})
+		// Allocate until the runtime tries to use the hint we
+		// just mapped over.
+		hint := GetNextArenaHint()
+		for GetNextArenaHint() == hint {
+			ac := new(acLink)
+			arenaCollisionSink = append(arenaCollisionSink, ac)
+			// The allocation must not have fallen into
+			// one of the reserved regions.
+			p := uintptr(unsafe.Pointer(ac))
+			for _, d := range disallowed {
+				if d[0] <= p && p < d[1] {
+					t.Fatalf("allocation %#x in reserved region [%#x, %#x)", p, d[0], d[1])
+				}
+			}
+		}
+	}
+}
+
 var mallocSink uintptr
 
 func BenchmarkMalloc8(b *testing.B) {
diff --git a/libgo/go/runtime/hashmap.go b/libgo/go/runtime/map.go
index 53b05b1..8e97bc5 100644
--- a/libgo/go/runtime/hashmap.go
+++ b/libgo/go/runtime/map.go
@@ -87,7 +87,7 @@ const (
 	// Maximum key or value size to keep inline (instead of mallocing per element).
 	// Must fit in a uint8.
 	// Fast versions cannot handle big values - the cutoff size for
-	// fast versions in ../../cmd/internal/gc/walk.go must be at most this value.
+	// fast versions in cmd/compile/internal/gc/walk.go must be at most this value.
 	maxKeySize   = 128
 	maxValueSize = 128
 
@@ -121,8 +121,8 @@ const (
 
 // A header for a Go map.
 type hmap struct {
-	// Note: the format of the Hmap is encoded in ../../cmd/internal/gc/reflect.go and
-	// ../reflect/type.go. Don't change this structure without also changing that code!
+	// Note: the format of the hmap is also encoded in cmd/compile/internal/gc/reflect.go.
+	// Make sure this stays in sync with the compiler's definition.
 	count     int // # live cells == size of map.  Must be first (used by len() builtin)
 	flags     uint8
 	B         uint8  // log_2 of # of buckets (can hold up to loadFactor * 2^B items)
@@ -141,7 +141,7 @@ type mapextra struct {
 	// If both key and value do not contain pointers and are inline, then we mark bucket
 	// type as containing no pointers. This avoids scanning such maps.
 	// However, bmap.overflow is a pointer. In order to keep overflow buckets
-	// alive, we store pointers to all overflow buckets in hmap.overflow and h.map.oldoverflow.
+	// alive, we store pointers to all overflow buckets in hmap.extra.overflow and hmap.extra.oldoverflow.
 	// overflow and oldoverflow are only used if key and value do not contain pointers.
 	// overflow contains overflow buckets for hmap.buckets.
 	// oldoverflow contains overflow buckets for hmap.oldbuckets.
@@ -167,7 +167,7 @@ type bmap struct {
 }
 
 // A hash iteration structure.
-// If you modify hiter, also change cmd/internal/gc/reflect.go to indicate
+// If you modify hiter, also change cmd/compile/internal/gc/reflect.go to indicate
 // the layout of this structure.
 type hiter struct {
 	key         unsafe.Pointer // Must be in first position.  Write nil to indicate iteration end (see cmd/internal/gc/range.go).
@@ -333,7 +333,7 @@ func makemap(t *maptype, hint int, h *hmap) *hmap {
 	// If hint is large zeroing this memory could take a while.
 	if h.B != 0 {
 		var nextOverflow *bmap
-		h.buckets, nextOverflow = makeBucketArray(t, h.B)
+		h.buckets, nextOverflow = makeBucketArray(t, h.B, nil)
 		if nextOverflow != nil {
 			h.extra = new(mapextra)
 			h.extra.nextOverflow = nextOverflow
@@ -343,6 +343,57 @@ func makemap(t *maptype, hint int, h *hmap) *hmap {
 	return h
 }
 
+// makeBucketArray initializes a backing array for map buckets.
+// 1<<b is the minimum number of buckets to allocate.
+// dirtyalloc should either be nil or a bucket array previously
+// allocated by makeBucketArray with the same t and b parameters.
+// If dirtyalloc is nil a new backing array will be alloced and
+// otherwise dirtyalloc will be cleared and reused as backing array.
+func makeBucketArray(t *maptype, b uint8, dirtyalloc unsafe.Pointer) (buckets unsafe.Pointer, nextOverflow *bmap) {
+	base := bucketShift(b)
+	nbuckets := base
+	// For small b, overflow buckets are unlikely.
+	// Avoid the overhead of the calculation.
+	if b >= 4 {
+		// Add on the estimated number of overflow buckets
+		// required to insert the median number of elements
+		// used with this value of b.
+		nbuckets += bucketShift(b - 4)
+		sz := t.bucket.size * nbuckets
+		up := roundupsize(sz)
+		if up != sz {
+			nbuckets = up / t.bucket.size
+		}
+	}
+
+	if dirtyalloc == nil {
+		buckets = newarray(t.bucket, int(nbuckets))
+	} else {
+		// dirtyalloc was previously generated by
+		// the above newarray(t.bucket, int(nbuckets))
+		// but may not be empty.
+		buckets = dirtyalloc
+		size := t.bucket.size * nbuckets
+		if t.bucket.kind&kindNoPointers == 0 {
+			memclrHasPointers(buckets, size)
+		} else {
+			memclrNoHeapPointers(buckets, size)
+		}
+	}
+
+	if base != nbuckets {
+		// We preallocated some overflow buckets.
+		// To keep the overhead of tracking these overflow buckets to a minimum,
+		// we use the convention that if a preallocated overflow bucket's overflow
+		// pointer is nil, then there are more available by bumping the pointer.
+		// We need a safe non-nil pointer for the last overflow bucket; just use buckets.
+		nextOverflow = (*bmap)(add(buckets, base*uintptr(t.bucketsize)))
+		last := (*bmap)(add(buckets, (nbuckets-1)*uintptr(t.bucketsize)))
+		last.setoverflow(t, (*bmap)(buckets))
+	}
+	return buckets, nextOverflow
+}
+
 // mapaccess1 returns a pointer to h[key].  Never returns nil, instead
 // it will return a reference to the zero object for the value type if
 // the key is not in the map.
@@ -696,14 +747,13 @@ search:
 			} else if t.key.kind&kindNoPointers == 0 {
 				memclrHasPointers(k, t.key.size)
 			}
-			// Only clear value if there are pointers in it.
-			if t.indirectvalue || t.elem.kind&kindNoPointers == 0 {
-				v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
-				if t.indirectvalue {
-					*(*unsafe.Pointer)(v) = nil
-				} else {
-					memclrHasPointers(v, t.elem.size)
-				}
+			v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
+			if t.indirectvalue {
+				*(*unsafe.Pointer)(v) = nil
+			} else if t.elem.kind&kindNoPointers == 0 {
+				memclrHasPointers(v, t.elem.size)
+			} else {
+				memclrNoHeapPointers(v, t.elem.size)
 			}
 			b.tophash[i] = empty
 			h.count--
@@ -746,7 +796,7 @@ func mapiterinit(t *maptype, h *hmap, it *hiter) {
 	}
 
 	if unsafe.Sizeof(hiter{})/sys.PtrSize != 12 {
-		throw("hash_iter size incorrect") // see ../../cmd/internal/gc/reflect.go
+		throw("hash_iter size incorrect") // see cmd/compile/internal/gc/reflect.go
 	}
 	it.t = t
 	it.h = h
@@ -915,34 +965,49 @@ next:
 	goto next
 }
 
-func makeBucketArray(t *maptype, b uint8) (buckets unsafe.Pointer, nextOverflow *bmap) {
-	base := bucketShift(b)
-	nbuckets := base
-	// For small b, overflow buckets are unlikely.
-	// Avoid the overhead of the calculation.
-	if b >= 4 {
-		// Add on the estimated number of overflow buckets
-		// required to insert the median number of elements
-		// used with this value of b.
-		nbuckets += bucketShift(b - 4)
-		sz := t.bucket.size * nbuckets
-		up := roundupsize(sz)
-		if up != sz {
-			nbuckets = up / t.bucket.size
-		}
+// mapclear deletes all keys from a map.
+func mapclear(t *maptype, h *hmap) {
+	if raceenabled && h != nil {
+		callerpc := getcallerpc()
+		pc := funcPC(mapclear)
+		racewritepc(unsafe.Pointer(h), callerpc, pc)
 	}
-	buckets = newarray(t.bucket, int(nbuckets))
-	if base != nbuckets {
-		// We preallocated some overflow buckets.
-		// To keep the overhead of tracking these overflow buckets to a minimum,
-		// we use the convention that if a preallocated overflow bucket's overflow
-		// pointer is nil, then there are more available by bumping the pointer.
-		// We need a safe non-nil pointer for the last overflow bucket; just use buckets.
-		nextOverflow = (*bmap)(add(buckets, base*uintptr(t.bucketsize)))
-		last := (*bmap)(add(buckets, (nbuckets-1)*uintptr(t.bucketsize)))
-		last.setoverflow(t, (*bmap)(buckets))
+
+	if h == nil || h.count == 0 {
+		return
 	}
-	return buckets, nextOverflow
+
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map writes")
+	}
+
+	h.flags |= hashWriting
+
+	h.flags &^= sameSizeGrow
+	h.oldbuckets = nil
+	h.nevacuate = 0
+	h.noverflow = 0
+	h.count = 0
+
+	// Keep the mapextra allocation but clear any extra information.
+	if h.extra != nil {
+		*h.extra = mapextra{}
+	}
+
+	// makeBucketArray clears the memory pointed to by h.buckets
+	// and recovers any overflow buckets by generating them
+	// as if h.buckets was newly alloced.
+	_, nextOverflow := makeBucketArray(t, h.B, h.buckets)
+	if nextOverflow != nil {
+		// If overflow buckets are created then h.extra
+		// will have been allocated during initial bucket creation.
+		h.extra.nextOverflow = nextOverflow
+	}
+
+	if h.flags&hashWriting == 0 {
+		throw("concurrent map writes")
+	}
+	h.flags &^= hashWriting
 }
 
 func hashGrow(t *maptype, h *hmap) {
@@ -955,7 +1020,7 @@ func hashGrow(t *maptype, h *hmap) {
 		h.flags |= sameSizeGrow
 	}
 	oldbuckets := h.buckets
-	newbuckets, nextOverflow := makeBucketArray(t, h.B+bigger)
+	newbuckets, nextOverflow := makeBucketArray(t, h.B+bigger, nil)
 
 	flags := h.flags &^ (iterator | oldIterator)
 	if h.flags&iterator != 0 {
@@ -1059,7 +1124,6 @@ type evacDst struct {
 func evacuate(t *maptype, h *hmap, oldbucket uintptr) {
 	b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
 	newbit := h.noldbuckets()
-	hashfn := t.key.hashfn
 	if !evacuated(b) {
 		// TODO: reuse overflow buckets instead of using new ones, if there
 		// is no iterator using the old buckets.  (If !oldIterator.)
@@ -1100,7 +1164,7 @@ func evacuate(t *maptype, h *hmap, oldbucket uintptr) {
 				if !h.sameSizeGrow() {
 					// Compute hash to make our evacuation decision (whether we need
 					// to send this key/value to bucket x or bucket y).
-					hash := hashfn(k2, uintptr(h.hash0))
+					hash := t.key.hashfn(k2, uintptr(h.hash0))
 					if h.flags&iterator != 0 && !t.reflexivekey && !t.key.equalfn(k2, k2) {
 						// If key != key (NaNs), then the hash could be (and probably
 						// will be) entirely different from the old hash. Moreover,
@@ -1203,6 +1267,7 @@ func ismapkey(t *_type) bool {
 
 //go:linkname reflect_makemap reflect.makemap
 func reflect_makemap(t *maptype, cap int) *hmap {
+	// Check invariants and reflects math.
 	if !ismapkey(t.key) {
 		throw("runtime.reflect_makemap: unsupported map key type")
 	}
@@ -1294,5 +1359,5 @@ func reflect_ismapkey(t *_type) bool {
 	return ismapkey(t)
 }
 
-const maxZero = 1024 // must match value in ../cmd/compile/internal/gc/walk.go
+const maxZero = 1024 // must match value in cmd/compile/internal/gc/walk.go
 var zeroVal [maxZero]byte
diff --git a/libgo/go/runtime/mapspeed_test.go b/libgo/go/runtime/map_benchmark_test.go
index aec0c51..025c039 100644
--- a/libgo/go/runtime/mapspeed_test.go
+++ b/libgo/go/runtime/map_benchmark_test.go
@@ -341,3 +341,32 @@ func BenchmarkComplexAlgMap(b *testing.B) {
 		_ = m[k]
 	}
 }
+
+func BenchmarkGoMapClear(b *testing.B) {
+	b.Run("Reflexive", func(b *testing.B) {
+		for size := 1; size < 100000; size *= 10 {
+			b.Run(strconv.Itoa(size), func(b *testing.B) {
+				m := make(map[int]int, size)
+				for i := 0; i < b.N; i++ {
+					m[0] = size // Add one element so len(m) != 0 avoiding fast paths.
+					for k := range m {
+						delete(m, k)
+					}
+				}
+			})
+		}
+	})
+	b.Run("NonReflexive", func(b *testing.B) {
+		for size := 1; size < 100000; size *= 10 {
+			b.Run(strconv.Itoa(size), func(b *testing.B) {
+				m := make(map[float64]int, size)
+				for i := 0; i < b.N; i++ {
+					m[1.0] = size // Add one element so len(m) != 0 avoiding fast paths.
+					for k := range m {
+						delete(m, k)
+					}
+				}
+			})
+		}
+	})
+}
diff --git a/libgo/go/runtime/map_fast32.go b/libgo/go/runtime/map_fast32.go
new file mode 100644
index 0000000..a9a06a8
--- /dev/null
+++ b/libgo/go/runtime/map_fast32.go
@@ -0,0 +1,413 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+	"runtime/internal/sys"
+	"unsafe"
+)
+
+func mapaccess1_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer {
+	if raceenabled && h != nil {
+		callerpc := getcallerpc()
+		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_fast32))
+	}
+	if h == nil || h.count == 0 {
+		return unsafe.Pointer(&zeroVal[0])
+	}
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map read and map write")
+	}
+	var b *bmap
+	if h.B == 0 {
+		// One-bucket table. No need to hash.
+		b = (*bmap)(h.buckets)
+	} else {
+		hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
+		m := bucketMask(h.B)
+		b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
+		if c := h.oldbuckets; c != nil {
+			if !h.sameSizeGrow() {
+				// There used to be half as many buckets; mask down one more power of two.
+				m >>= 1
+			}
+			oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
+			if !evacuated(oldb) {
+				b = oldb
+			}
+		}
+	}
+	for ; b != nil; b = b.overflow(t) {
+		for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) {
+			if *(*uint32)(k) == key && b.tophash[i] != empty {
+				return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize))
+			}
+		}
+	}
+	return unsafe.Pointer(&zeroVal[0])
+}
+
+func mapaccess2_fast32(t *maptype, h *hmap, key uint32) (unsafe.Pointer, bool) {
+	if raceenabled && h != nil {
+		callerpc := getcallerpc()
+		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_fast32))
+	}
+	if h == nil || h.count == 0 {
+		return unsafe.Pointer(&zeroVal[0]), false
+	}
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map read and map write")
+	}
+	var b *bmap
+	if h.B == 0 {
+		// One-bucket table. No need to hash.
+		b = (*bmap)(h.buckets)
+	} else {
+		hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
+		m := bucketMask(h.B)
+		b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
+		if c := h.oldbuckets; c != nil {
+			if !h.sameSizeGrow() {
+				// There used to be half as many buckets; mask down one more power of two.
+				m >>= 1
+			}
+			oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
+			if !evacuated(oldb) {
+				b = oldb
+			}
+		}
+	}
+	for ; b != nil; b = b.overflow(t) {
+		for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) {
+			if *(*uint32)(k) == key && b.tophash[i] != empty {
+				return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize)), true
+			}
+		}
+	}
+	return unsafe.Pointer(&zeroVal[0]), false
+}
+
+func mapassign_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer {
+	if h == nil {
+		panic(plainError("assignment to entry in nil map"))
+	}
+	if raceenabled {
+		callerpc := getcallerpc()
+		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast32))
+	}
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map writes")
+	}
+	hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
+
+	// Set hashWriting after calling alg.hash for consistency with mapassign.
+	h.flags |= hashWriting
+
+	if h.buckets == nil {
+		h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
+	}
+
+again:
+	bucket := hash & bucketMask(h.B)
+	if h.growing() {
+		growWork_fast32(t, h, bucket)
+	}
+	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
+
+	var insertb *bmap
+	var inserti uintptr
+	var insertk unsafe.Pointer
+
+	for {
+		for i := uintptr(0); i < bucketCnt; i++ {
+			if b.tophash[i] == empty {
+				if insertb == nil {
+					inserti = i
+					insertb = b
+				}
+				continue
+			}
+			k := *((*uint32)(add(unsafe.Pointer(b), dataOffset+i*4)))
+			if k != key {
+				continue
+			}
+			inserti = i
+			insertb = b
+			goto done
+		}
+		ovf := b.overflow(t)
+		if ovf == nil {
+			break
+		}
+		b = ovf
+	}
+
+	// Did not find mapping for key. Allocate new cell & add entry.
+
+	// If we hit the max load factor or we have too many overflow buckets,
+	// and we're not already in the middle of growing, start growing.
+	if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
+		hashGrow(t, h)
+		goto again // Growing the table invalidates everything, so try again
+	}
+
+	if insertb == nil {
+		// all current buckets are full, allocate a new one.
+		insertb = h.newoverflow(t, b)
+		inserti = 0 // not necessary, but avoids needlessly spilling inserti
+	}
+	insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks
+
+	insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*4)
+	// store new key at insert position
+	*(*uint32)(insertk) = key
+
+	h.count++
+
+done:
+	val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*4+inserti*uintptr(t.valuesize))
+	if h.flags&hashWriting == 0 {
+		throw("concurrent map writes")
+	}
+	h.flags &^= hashWriting
+	return val
+}
+
+func mapassign_fast32ptr(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
+	if h == nil {
+		panic(plainError("assignment to entry in nil map"))
+	}
+	if raceenabled {
+		callerpc := getcallerpc()
+		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast32))
+	}
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map writes")
+	}
+	hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
+
+	// Set hashWriting after calling alg.hash for consistency with mapassign.
+	h.flags |= hashWriting
+
+	if h.buckets == nil {
+		h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
+	}
+
+again:
+	bucket := hash & bucketMask(h.B)
+	if h.growing() {
+		growWork_fast32(t, h, bucket)
+	}
+	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
+
+	var insertb *bmap
+	var inserti uintptr
+	var insertk unsafe.Pointer
+
+	for {
+		for i := uintptr(0); i < bucketCnt; i++ {
+			if b.tophash[i] == empty {
+				if insertb == nil {
+					inserti = i
+					insertb = b
+				}
+				continue
+			}
+			k := *((*unsafe.Pointer)(add(unsafe.Pointer(b), dataOffset+i*4)))
+			if k != key {
+				continue
+			}
+			inserti = i
+			insertb = b
+			goto done
+		}
+		ovf := b.overflow(t)
+		if ovf == nil {
+			break
+		}
+		b = ovf
+	}
+
+	// Did not find mapping for key. Allocate new cell & add entry.
+
+	// If we hit the max load factor or we have too many overflow buckets,
+	// and we're not already in the middle of growing, start growing.
+	if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
+		hashGrow(t, h)
+		goto again // Growing the table invalidates everything, so try again
+	}
+
+	if insertb == nil {
+		// all current buckets are full, allocate a new one.
+		insertb = h.newoverflow(t, b)
+		inserti = 0 // not necessary, but avoids needlessly spilling inserti
+	}
+	insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks
+
+	insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*4)
+	// store new key at insert position
+	*(*unsafe.Pointer)(insertk) = key
+
+	h.count++
+
+done:
+	val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*4+inserti*uintptr(t.valuesize))
+	if h.flags&hashWriting == 0 {
+		throw("concurrent map writes")
+	}
+	h.flags &^= hashWriting
+	return val
+}
+
+func mapdelete_fast32(t *maptype, h *hmap, key uint32) {
+	if raceenabled && h != nil {
+		callerpc := getcallerpc()
+		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_fast32))
+	}
+	if h == nil || h.count == 0 {
+		return
+	}
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map writes")
+	}
+
+	hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
+
+	// Set hashWriting after calling alg.hash for consistency with mapdelete
+	h.flags |= hashWriting
+
+	bucket := hash & bucketMask(h.B)
+	if h.growing() {
+		growWork_fast32(t, h, bucket)
+	}
+	b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
+search:
+	for ; b != nil; b = b.overflow(t) {
+		for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) {
+			if key != *(*uint32)(k) || b.tophash[i] == empty {
+				continue
+			}
+			// Only clear key if there are pointers in it.
+			if t.key.kind&kindNoPointers == 0 {
+				memclrHasPointers(k, t.key.size)
+			}
+			v := add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize))
+			if t.elem.kind&kindNoPointers == 0 {
+				memclrHasPointers(v, t.elem.size)
+			} else {
+				memclrNoHeapPointers(v, t.elem.size)
+			}
+			b.tophash[i] = empty
+			h.count--
+			break search
+		}
+	}
+
+	if h.flags&hashWriting == 0 {
+		throw("concurrent map writes")
+	}
+	h.flags &^= hashWriting
+}
+
+func growWork_fast32(t *maptype, h *hmap, bucket uintptr) {
+	// make sure we evacuate the oldbucket corresponding
+	// to the bucket we're about to use
+	evacuate_fast32(t, h, bucket&h.oldbucketmask())
+
+	// evacuate one more oldbucket to make progress on growing
+	if h.growing() {
+		evacuate_fast32(t, h, h.nevacuate)
+	}
+}
+
+func evacuate_fast32(t *maptype, h *hmap, oldbucket uintptr) {
+	b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
+	newbit := h.noldbuckets()
+	if !evacuated(b) {
+		// TODO: reuse overflow buckets instead of using new ones, if there
+		// is no iterator using the old buckets.  (If !oldIterator.)
+
+		// xy contains the x and y (low and high) evacuation destinations.
+		var xy [2]evacDst
+		x := &xy[0]
+		x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize)))
+		x.k = add(unsafe.Pointer(x.b), dataOffset)
+		x.v = add(x.k, bucketCnt*4)
+
+		if !h.sameSizeGrow() {
+			// Only calculate y pointers if we're growing bigger.
+			// Otherwise GC can see bad pointers.
+			y := &xy[1]
+			y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize)))
+			y.k = add(unsafe.Pointer(y.b), dataOffset)
+			y.v = add(y.k, bucketCnt*4)
+		}
+
+		for ; b != nil; b = b.overflow(t) {
+			k := add(unsafe.Pointer(b), dataOffset)
+			v := add(k, bucketCnt*4)
+			for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 4), add(v, uintptr(t.valuesize)) {
+				top := b.tophash[i]
+				if top == empty {
+					b.tophash[i] = evacuatedEmpty
+					continue
+				}
+				if top < minTopHash {
+					throw("bad map state")
+				}
+				var useY uint8
+				if !h.sameSizeGrow() {
+					// Compute hash to make our evacuation decision (whether we need
+					// to send this key/value to bucket x or bucket y).
+					hash := t.key.hashfn(k, uintptr(h.hash0))
+					if hash&newbit != 0 {
+						useY = 1
+					}
+				}
+
+				b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap
+				dst := &xy[useY]                 // evacuation destination
+
+				if dst.i == bucketCnt {
+					dst.b = h.newoverflow(t, dst.b)
+					dst.i = 0
+					dst.k = add(unsafe.Pointer(dst.b), dataOffset)
+					dst.v = add(dst.k, bucketCnt*4)
+				}
+				dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check
+
+				// Copy key.
+				if sys.PtrSize == 4 && t.key.kind&kindNoPointers == 0 && writeBarrier.enabled {
+					// Write with a write barrier.
+					*(*unsafe.Pointer)(dst.k) = *(*unsafe.Pointer)(k)
+				} else {
+					*(*uint32)(dst.k) = *(*uint32)(k)
+				}
+
+				typedmemmove(t.elem, dst.v, v)
+				dst.i++
+				// These updates might push these pointers past the end of the
+				// key or value arrays.  That's ok, as we have the overflow pointer
+				// at the end of the bucket to protect against pointing past the
+				// end of the bucket.
+				dst.k = add(dst.k, 4)
+				dst.v = add(dst.v, uintptr(t.valuesize))
+			}
+		}
+		// Unlink the overflow buckets & clear key/value to help GC.
+		if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 {
+			b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))
+			// Preserve b.tophash because the evacuation
+			// state is maintained there.
+			ptr := add(b, dataOffset)
+			n := uintptr(t.bucketsize) - dataOffset
+			memclrHasPointers(ptr, n)
+		}
+	}
+
+	if oldbucket == h.nevacuate {
+		advanceEvacuationMark(h, t, newbit)
+	}
+}
diff --git a/libgo/go/runtime/map_fast64.go b/libgo/go/runtime/map_fast64.go
new file mode 100644
index 0000000..a2a51fc
--- /dev/null
+++ b/libgo/go/runtime/map_fast64.go
@@ -0,0 +1,419 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+	"runtime/internal/sys"
+	"unsafe"
+)
+
+func mapaccess1_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer {
+	if raceenabled && h != nil {
+		callerpc := getcallerpc()
+		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_fast64))
+	}
+	if h == nil || h.count == 0 {
+		return unsafe.Pointer(&zeroVal[0])
+	}
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map read and map write")
+	}
+	var b *bmap
+	if h.B == 0 {
+		// One-bucket table. No need to hash.
+		b = (*bmap)(h.buckets)
+	} else {
+		hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
+		m := bucketMask(h.B)
+		b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
+		if c := h.oldbuckets; c != nil {
+			if !h.sameSizeGrow() {
+				// There used to be half as many buckets; mask down one more power of two.
+				m >>= 1
+			}
+			oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
+			if !evacuated(oldb) {
+				b = oldb
+			}
+		}
+	}
+	for ; b != nil; b = b.overflow(t) {
+		for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) {
+			if *(*uint64)(k) == key && b.tophash[i] != empty {
+				return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize))
+			}
+		}
+	}
+	return unsafe.Pointer(&zeroVal[0])
+}
+
+func mapaccess2_fast64(t *maptype, h *hmap, key uint64) (unsafe.Pointer, bool) {
+	if raceenabled && h != nil {
+		callerpc := getcallerpc()
+		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_fast64))
+	}
+	if h == nil || h.count == 0 {
+		return unsafe.Pointer(&zeroVal[0]), false
+	}
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map read and map write")
+	}
+	var b *bmap
+	if h.B == 0 {
+		// One-bucket table. No need to hash.
+		b = (*bmap)(h.buckets)
+	} else {
+		hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
+		m := bucketMask(h.B)
+		b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
+		if c := h.oldbuckets; c != nil {
+			if !h.sameSizeGrow() {
+				// There used to be half as many buckets; mask down one more power of two.
+				m >>= 1
+			}
+			oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
+			if !evacuated(oldb) {
+				b = oldb
+			}
+		}
+	}
+	for ; b != nil; b = b.overflow(t) {
+		for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) {
+			if *(*uint64)(k) == key && b.tophash[i] != empty {
+				return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize)), true
+			}
+		}
+	}
+	return unsafe.Pointer(&zeroVal[0]), false
+}
+
+func mapassign_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer {
+	if h == nil {
+		panic(plainError("assignment to entry in nil map"))
+	}
+	if raceenabled {
+		callerpc := getcallerpc()
+		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast64))
+	}
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map writes")
+	}
+	hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
+
+	// Set hashWriting after calling alg.hash for consistency with mapassign.
+	h.flags |= hashWriting
+
+	if h.buckets == nil {
+		h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
+	}
+
+again:
+	bucket := hash & bucketMask(h.B)
+	if h.growing() {
+		growWork_fast64(t, h, bucket)
+	}
+	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
+
+	var insertb *bmap
+	var inserti uintptr
+	var insertk unsafe.Pointer
+
+	for {
+		for i := uintptr(0); i < bucketCnt; i++ {
+			if b.tophash[i] == empty {
+				if insertb == nil {
+					insertb = b
+					inserti = i
+				}
+				continue
+			}
+			k := *((*uint64)(add(unsafe.Pointer(b), dataOffset+i*8)))
+			if k != key {
+				continue
+			}
+			insertb = b
+			inserti = i
+			goto done
+		}
+		ovf := b.overflow(t)
+		if ovf == nil {
+			break
+		}
+		b = ovf
+	}
+
+	// Did not find mapping for key. Allocate new cell & add entry.
+
+	// If we hit the max load factor or we have too many overflow buckets,
+	// and we're not already in the middle of growing, start growing.
+	if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
+		hashGrow(t, h)
+		goto again // Growing the table invalidates everything, so try again
+	}
+
+	if insertb == nil {
+		// all current buckets are full, allocate a new one.
+		insertb = h.newoverflow(t, b)
+		inserti = 0 // not necessary, but avoids needlessly spilling inserti
+	}
+	insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks
+
+	insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*8)
+	// store new key at insert position
+	*(*uint64)(insertk) = key
+
+	h.count++
+
+done:
+	val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*8+inserti*uintptr(t.valuesize))
+	if h.flags&hashWriting == 0 {
+		throw("concurrent map writes")
+	}
+	h.flags &^= hashWriting
+	return val
+}
+
+func mapassign_fast64ptr(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
+	if h == nil {
+		panic(plainError("assignment to entry in nil map"))
+	}
+	if raceenabled {
+		callerpc := getcallerpc()
+		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast64))
+	}
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map writes")
+	}
+	hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
+
+	// Set hashWriting after calling alg.hash for consistency with mapassign.
+	h.flags |= hashWriting
+
+	if h.buckets == nil {
+		h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
+	}
+
+again:
+	bucket := hash & bucketMask(h.B)
+	if h.growing() {
+		growWork_fast64(t, h, bucket)
+	}
+	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
+
+	var insertb *bmap
+	var inserti uintptr
+	var insertk unsafe.Pointer
+
+	for {
+		for i := uintptr(0); i < bucketCnt; i++ {
+			if b.tophash[i] == empty {
+				if insertb == nil {
+					insertb = b
+					inserti = i
+				}
+				continue
+			}
+			k := *((*unsafe.Pointer)(add(unsafe.Pointer(b), dataOffset+i*8)))
+			if k != key {
+				continue
+			}
+			insertb = b
+			inserti = i
+			goto done
+		}
+		ovf := b.overflow(t)
+		if ovf == nil {
+			break
+		}
+		b = ovf
+	}
+
+	// Did not find mapping for key. Allocate new cell & add entry.
+
+	// If we hit the max load factor or we have too many overflow buckets,
+	// and we're not already in the middle of growing, start growing.
+	if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
+		hashGrow(t, h)
+		goto again // Growing the table invalidates everything, so try again
+	}
+
+	if insertb == nil {
+		// all current buckets are full, allocate a new one.
+		insertb = h.newoverflow(t, b)
+		inserti = 0 // not necessary, but avoids needlessly spilling inserti
+	}
+	insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks
+
+	insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*8)
+	// store new key at insert position
+	*(*unsafe.Pointer)(insertk) = key
+
+	h.count++
+
+done:
+	val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*8+inserti*uintptr(t.valuesize))
+	if h.flags&hashWriting == 0 {
+		throw("concurrent map writes")
+	}
+	h.flags &^= hashWriting
+	return val
+}
+
+func mapdelete_fast64(t *maptype, h *hmap, key uint64) {
+	if raceenabled && h != nil {
+		callerpc := getcallerpc()
+		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_fast64))
+	}
+	if h == nil || h.count == 0 {
+		return
+	}
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map writes")
+	}
+
+	hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0))
+
+	// Set hashWriting after calling alg.hash for consistency with mapdelete
+	h.flags |= hashWriting
+
+	bucket := hash & bucketMask(h.B)
+	if h.growing() {
+		growWork_fast64(t, h, bucket)
+	}
+	b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
+search:
+	for ; b != nil; b = b.overflow(t) {
+		for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) {
+			if key != *(*uint64)(k) || b.tophash[i] == empty {
+				continue
+			}
+			// Only clear key if there are pointers in it.
+			if t.key.kind&kindNoPointers == 0 {
+				memclrHasPointers(k, t.key.size)
+			}
+			v := add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize))
+			if t.elem.kind&kindNoPointers == 0 {
+				memclrHasPointers(v, t.elem.size)
+			} else {
+				memclrNoHeapPointers(v, t.elem.size)
+			}
+			b.tophash[i] = empty
+			h.count--
+			break search
+		}
+	}
+
+	if h.flags&hashWriting == 0 {
+		throw("concurrent map writes")
+	}
+	h.flags &^= hashWriting
+}
+
+func growWork_fast64(t *maptype, h *hmap, bucket uintptr) {
+	// make sure we evacuate the oldbucket corresponding
+	// to the bucket we're about to use
+	evacuate_fast64(t, h, bucket&h.oldbucketmask())
+
+	// evacuate one more oldbucket to make progress on growing
+	if h.growing() {
+		evacuate_fast64(t, h, h.nevacuate)
+	}
+}
+
+func evacuate_fast64(t *maptype, h *hmap, oldbucket uintptr) {
+	b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
+	newbit := h.noldbuckets()
+	if !evacuated(b) {
+		// TODO: reuse overflow buckets instead of using new ones, if there
+		// is no iterator using the old buckets.  (If !oldIterator.)
+
+		// xy contains the x and y (low and high) evacuation destinations.
+		var xy [2]evacDst
+		x := &xy[0]
+		x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize)))
+		x.k = add(unsafe.Pointer(x.b), dataOffset)
+		x.v = add(x.k, bucketCnt*8)
+
+		if !h.sameSizeGrow() {
+			// Only calculate y pointers if we're growing bigger.
+			// Otherwise GC can see bad pointers.
+			y := &xy[1]
+			y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize)))
+			y.k = add(unsafe.Pointer(y.b), dataOffset)
+			y.v = add(y.k, bucketCnt*8)
+		}
+
+		for ; b != nil; b = b.overflow(t) {
+			k := add(unsafe.Pointer(b), dataOffset)
+			v := add(k, bucketCnt*8)
+			for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 8), add(v, uintptr(t.valuesize)) {
+				top := b.tophash[i]
+				if top == empty {
+					b.tophash[i] = evacuatedEmpty
+					continue
+				}
+				if top < minTopHash {
+					throw("bad map state")
+				}
+				var useY uint8
+				if !h.sameSizeGrow() {
+					// Compute hash to make our evacuation decision (whether we need
+					// to send this key/value to bucket x or bucket y).
+					hash := t.key.hashfn(k, uintptr(h.hash0))
+					if hash&newbit != 0 {
+						useY = 1
+					}
+				}
+
+				b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap
+				dst := &xy[useY]                 // evacuation destination
+
+				if dst.i == bucketCnt {
+					dst.b = h.newoverflow(t, dst.b)
+					dst.i = 0
+					dst.k = add(unsafe.Pointer(dst.b), dataOffset)
+					dst.v = add(dst.k, bucketCnt*8)
+				}
+				dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check
+
+				// Copy key.
+				if t.key.kind&kindNoPointers == 0 && writeBarrier.enabled {
+					if sys.PtrSize == 8 {
+						// Write with a write barrier.
+						*(*unsafe.Pointer)(dst.k) = *(*unsafe.Pointer)(k)
+					} else {
+						// There are three ways to squeeze at least one 32 bit pointer into 64 bits.
+						// Give up and call typedmemmove.
+						typedmemmove(t.key, dst.k, k)
+					}
+				} else {
+					*(*uint64)(dst.k) = *(*uint64)(k)
+				}
+
+				typedmemmove(t.elem, dst.v, v)
+				dst.i++
+				// These updates might push these pointers past the end of the
+				// key or value arrays.  That's ok, as we have the overflow pointer
+				// at the end of the bucket to protect against pointing past the
+				// end of the bucket.
+				dst.k = add(dst.k, 8)
+				dst.v = add(dst.v, uintptr(t.valuesize))
+			}
+		}
+		// Unlink the overflow buckets & clear key/value to help GC.
+		if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 {
+			b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))
+			// Preserve b.tophash because the evacuation
+			// state is maintained there.
+			ptr := add(b, dataOffset)
+			n := uintptr(t.bucketsize) - dataOffset
+			memclrHasPointers(ptr, n)
+		}
+	}
+
+	if oldbucket == h.nevacuate {
+		advanceEvacuationMark(h, t, newbit)
+	}
+}
diff --git a/libgo/go/runtime/map_faststr.go b/libgo/go/runtime/map_faststr.go
new file mode 100644
index 0000000..5812b3f
--- /dev/null
+++ b/libgo/go/runtime/map_faststr.go
@@ -0,0 +1,430 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+	"runtime/internal/sys"
+	"unsafe"
+)
+
+func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer {
+	if raceenabled && h != nil {
+		callerpc := getcallerpc()
+		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_faststr))
+	}
+	if h == nil || h.count == 0 {
+		return unsafe.Pointer(&zeroVal[0])
+	}
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map read and map write")
+	}
+	key := stringStructOf(&ky)
+	if h.B == 0 {
+		// One-bucket table.
+		b := (*bmap)(h.buckets)
+		if key.len < 32 {
+			// short key, doing lots of comparisons is ok
+			for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+				k := (*stringStruct)(kptr)
+				if k.len != key.len || b.tophash[i] == empty {
+					continue
+				}
+				if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
+					return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize))
+				}
+			}
+			return unsafe.Pointer(&zeroVal[0])
+		}
+		// long key, try not to do more comparisons than necessary
+		keymaybe := uintptr(bucketCnt)
+		for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+			k := (*stringStruct)(kptr)
+			if k.len != key.len || b.tophash[i] == empty {
+				continue
+			}
+			if k.str == key.str {
+				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize))
+			}
+			// check first 4 bytes
+			if *((*[4]byte)(key.str)) != *((*[4]byte)(k.str)) {
+				continue
+			}
+			// check last 4 bytes
+			if *((*[4]byte)(add(key.str, uintptr(key.len)-4))) != *((*[4]byte)(add(k.str, uintptr(key.len)-4))) {
+				continue
+			}
+			if keymaybe != bucketCnt {
+				// Two keys are potential matches. Use hash to distinguish them.
+				goto dohash
+			}
+			keymaybe = i
+		}
+		if keymaybe != bucketCnt {
+			k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+keymaybe*2*sys.PtrSize))
+			if memequal(k.str, key.str, uintptr(key.len)) {
+				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+keymaybe*uintptr(t.valuesize))
+			}
+		}
+		return unsafe.Pointer(&zeroVal[0])
+	}
+dohash:
+	hash := t.key.hashfn(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0))
+	m := bucketMask(h.B)
+	b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
+	if c := h.oldbuckets; c != nil {
+		if !h.sameSizeGrow() {
+			// There used to be half as many buckets; mask down one more power of two.
+			m >>= 1
+		}
+		oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
+		if !evacuated(oldb) {
+			b = oldb
+		}
+	}
+	top := tophash(hash)
+	for ; b != nil; b = b.overflow(t) {
+		for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+			k := (*stringStruct)(kptr)
+			if k.len != key.len || b.tophash[i] != top {
+				continue
+			}
+			if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
+				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize))
+			}
+		}
+	}
+	return unsafe.Pointer(&zeroVal[0])
+}
+
+func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) {
+	if raceenabled && h != nil {
+		callerpc := getcallerpc()
+		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_faststr))
+	}
+	if h == nil || h.count == 0 {
+		return unsafe.Pointer(&zeroVal[0]), false
+	}
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map read and map write")
+	}
+	key := stringStructOf(&ky)
+	if h.B == 0 {
+		// One-bucket table.
+		b := (*bmap)(h.buckets)
+		if key.len < 32 {
+			// short key, doing lots of comparisons is ok
+			for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+				k := (*stringStruct)(kptr)
+				if k.len != key.len || b.tophash[i] == empty {
+					continue
+				}
+				if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
+					return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)), true
+				}
+			}
+			return unsafe.Pointer(&zeroVal[0]), false
+		}
+		// long key, try not to do more comparisons than necessary
+		keymaybe := uintptr(bucketCnt)
+		for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+			k := (*stringStruct)(kptr)
+			if k.len != key.len || b.tophash[i] == empty {
+				continue
+			}
+			if k.str == key.str {
+				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)), true
+			}
+			// check first 4 bytes
+			if *((*[4]byte)(key.str)) != *((*[4]byte)(k.str)) {
+				continue
+			}
+			// check last 4 bytes
+			if *((*[4]byte)(add(key.str, uintptr(key.len)-4))) != *((*[4]byte)(add(k.str, uintptr(key.len)-4))) {
+				continue
+			}
+			if keymaybe != bucketCnt {
+				// Two keys are potential matches. Use hash to distinguish them.
+				goto dohash
+			}
+			keymaybe = i
+		}
+		if keymaybe != bucketCnt {
+			k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+keymaybe*2*sys.PtrSize))
+			if memequal(k.str, key.str, uintptr(key.len)) {
+				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+keymaybe*uintptr(t.valuesize)), true
+			}
+		}
+		return unsafe.Pointer(&zeroVal[0]), false
+	}
+dohash:
+	hash := t.key.hashfn(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0))
+	m := bucketMask(h.B)
+	b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
+	if c := h.oldbuckets; c != nil {
+		if !h.sameSizeGrow() {
+			// There used to be half as many buckets; mask down one more power of two.
+			m >>= 1
+		}
+		oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
+		if !evacuated(oldb) {
+			b = oldb
+		}
+	}
+	top := tophash(hash)
+	for ; b != nil; b = b.overflow(t) {
+		for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+			k := (*stringStruct)(kptr)
+			if k.len != key.len || b.tophash[i] != top {
+				continue
+			}
+			if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
+				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)), true
+			}
+		}
+	}
+	return unsafe.Pointer(&zeroVal[0]), false
+}
+
+func mapassign_faststr(t *maptype, h *hmap, s string) unsafe.Pointer {
+	if h == nil {
+		panic(plainError("assignment to entry in nil map"))
+	}
+	if raceenabled {
+		callerpc := getcallerpc()
+		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_faststr))
+	}
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map writes")
+	}
+	key := stringStructOf(&s)
+	hash := t.key.hashfn(noescape(unsafe.Pointer(&s)), uintptr(h.hash0))
+
+	// Set hashWriting after calling alg.hash for consistency with mapassign.
+	h.flags |= hashWriting
+
+	if h.buckets == nil {
+		h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
+	}
+
+again:
+	bucket := hash & bucketMask(h.B)
+	if h.growing() {
+		growWork_faststr(t, h, bucket)
+	}
+	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
+	top := tophash(hash)
+
+	var insertb *bmap
+	var inserti uintptr
+	var insertk unsafe.Pointer
+
+	for {
+		for i := uintptr(0); i < bucketCnt; i++ {
+			if b.tophash[i] != top {
+				if b.tophash[i] == empty && insertb == nil {
+					insertb = b
+					inserti = i
+				}
+				continue
+			}
+			k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize))
+			if k.len != key.len {
+				continue
+			}
+			if k.str != key.str && !memequal(k.str, key.str, uintptr(key.len)) {
+				continue
+			}
+			// already have a mapping for key. Update it.
+			inserti = i
+			insertb = b
+			goto done
+		}
+		ovf := b.overflow(t)
+		if ovf == nil {
+			break
+		}
+		b = ovf
+	}
+
+	// Did not find mapping for key. Allocate new cell & add entry.
+
+	// If we hit the max load factor or we have too many overflow buckets,
+	// and we're not already in the middle of growing, start growing.
+	if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
+		hashGrow(t, h)
+		goto again // Growing the table invalidates everything, so try again
+	}
+
+	if insertb == nil {
+		// all current buckets are full, allocate a new one.
+		insertb = h.newoverflow(t, b)
+		inserti = 0 // not necessary, but avoids needlessly spilling inserti
+	}
+	insertb.tophash[inserti&(bucketCnt-1)] = top // mask inserti to avoid bounds checks
+
+	insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*2*sys.PtrSize)
+	// store new key at insert position
+	*((*stringStruct)(insertk)) = *key
+	h.count++
+
+done:
+	val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*2*sys.PtrSize+inserti*uintptr(t.valuesize))
+	if h.flags&hashWriting == 0 {
+		throw("concurrent map writes")
+	}
+	h.flags &^= hashWriting
+	return val
+}
+
+func mapdelete_faststr(t *maptype, h *hmap, ky string) {
+	if raceenabled && h != nil {
+		callerpc := getcallerpc()
+		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_faststr))
+	}
+	if h == nil || h.count == 0 {
+		return
+	}
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map writes")
+	}
+
+	key := stringStructOf(&ky)
+	hash := t.key.hashfn(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0))
+
+	// Set hashWriting after calling alg.hash for consistency with mapdelete
+	h.flags |= hashWriting
+
+	bucket := hash & bucketMask(h.B)
+	if h.growing() {
+		growWork_faststr(t, h, bucket)
+	}
+	b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
+	top := tophash(hash)
+search:
+	for ; b != nil; b = b.overflow(t) {
+		for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+			k := (*stringStruct)(kptr)
+			if k.len != key.len || b.tophash[i] != top {
+				continue
+			}
+			if k.str != key.str && !memequal(k.str, key.str, uintptr(key.len)) {
+				continue
+			}
+			// Clear key's pointer.
+			k.str = nil
+			v := add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize))
+			if t.elem.kind&kindNoPointers == 0 {
+				memclrHasPointers(v, t.elem.size)
+			} else {
+				memclrNoHeapPointers(v, t.elem.size)
+			}
+			b.tophash[i] = empty
+			h.count--
+			break search
+		}
+	}
+
+	if h.flags&hashWriting == 0 {
+		throw("concurrent map writes")
+	}
+	h.flags &^= hashWriting
+}
+
+func growWork_faststr(t *maptype, h *hmap, bucket uintptr) {
+	// make sure we evacuate the oldbucket corresponding
+	// to the bucket we're about to use
+	evacuate_faststr(t, h, bucket&h.oldbucketmask())
+
+	// evacuate one more oldbucket to make progress on growing
+	if h.growing() {
+		evacuate_faststr(t, h, h.nevacuate)
+	}
+}
+
+func evacuate_faststr(t *maptype, h *hmap, oldbucket uintptr) {
+	b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
+	newbit := h.noldbuckets()
+	if !evacuated(b) {
+		// TODO: reuse overflow buckets instead of using new ones, if there
+		// is no iterator using the old buckets.  (If !oldIterator.)
+
+		// xy contains the x and y (low and high) evacuation destinations.
+		var xy [2]evacDst
+		x := &xy[0]
+		x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize)))
+		x.k = add(unsafe.Pointer(x.b), dataOffset)
+		x.v = add(x.k, bucketCnt*2*sys.PtrSize)
+
+		if !h.sameSizeGrow() {
+			// Only calculate y pointers if we're growing bigger.
+			// Otherwise GC can see bad pointers.
+			y := &xy[1]
+			y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize)))
+			y.k = add(unsafe.Pointer(y.b), dataOffset)
+			y.v = add(y.k, bucketCnt*2*sys.PtrSize)
+		}
+
+		for ; b != nil; b = b.overflow(t) {
+			k := add(unsafe.Pointer(b), dataOffset)
+			v := add(k, bucketCnt*2*sys.PtrSize)
+			for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 2*sys.PtrSize), add(v, uintptr(t.valuesize)) {
+				top := b.tophash[i]
+				if top == empty {
+					b.tophash[i] = evacuatedEmpty
+					continue
+				}
+				if top < minTopHash {
+					throw("bad map state")
+				}
+				var useY uint8
+				if !h.sameSizeGrow() {
+					// Compute hash to make our evacuation decision (whether we need
+					// to send this key/value to bucket x or bucket y).
+					hash := t.key.hashfn(k, uintptr(h.hash0))
+					if hash&newbit != 0 {
+						useY = 1
+					}
+				}
+
+				b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap
+				dst := &xy[useY]                 // evacuation destination
+
+				if dst.i == bucketCnt {
+					dst.b = h.newoverflow(t, dst.b)
+					dst.i = 0
+					dst.k = add(unsafe.Pointer(dst.b), dataOffset)
+					dst.v = add(dst.k, bucketCnt*2*sys.PtrSize)
+				}
+				dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check
+
+				// Copy key.
+				*(*string)(dst.k) = *(*string)(k)
+
+				typedmemmove(t.elem, dst.v, v)
+				dst.i++
+				// These updates might push these pointers past the end of the
+				// key or value arrays.  That's ok, as we have the overflow pointer
+				// at the end of the bucket to protect against pointing past the
+				// end of the bucket.
+				dst.k = add(dst.k, 2*sys.PtrSize)
+				dst.v = add(dst.v, uintptr(t.valuesize))
+			}
+		}
+		// Unlink the overflow buckets & clear key/value to help GC.
+		// Unlink the overflow buckets & clear key/value to help GC.
+		if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 {
+			b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))
+			// Preserve b.tophash because the evacuation
+			// state is maintained there.
+			ptr := add(b, dataOffset)
+			n := uintptr(t.bucketsize) - dataOffset
+			memclrHasPointers(ptr, n)
+		}
+	}
+
+	if oldbucket == h.nevacuate {
+		advanceEvacuationMark(h, t, newbit)
+	}
+}
diff --git a/libgo/go/runtime/map_test.go b/libgo/go/runtime/map_test.go
index 6d7097e..13f1d2e 100644
--- a/libgo/go/runtime/map_test.go
+++ b/libgo/go/runtime/map_test.go
@@ -9,6 +9,7 @@ import (
 	"math"
 	"reflect"
 	"runtime"
+	"runtime/internal/sys"
 	"sort"
 	"strconv"
 	"strings"
@@ -16,6 +17,17 @@ import (
 	"testing"
 )
 
+func TestHmapSize(t *testing.T) {
+	// The structure of hmap is defined in runtime/map.go
+	// and in cmd/compile/internal/gc/reflect.go and must be in sync.
+	// The size of hmap should be 48 bytes on 64 bit and 28 bytes on 32 bit platforms.
+	var hmapSize = uintptr(8 + 5*sys.PtrSize)
+	if runtime.RuntimeHmapSize != hmapSize {
+		t.Errorf("sizeof(runtime.hmap{})==%d, want %d", runtime.RuntimeHmapSize, hmapSize)
+	}
+
+}
+
 // negative zero is a good test because:
 //  1) 0 and -0 are equal, yet have distinct representations.
 //  2) 0 is represented as all zeros, -0 isn't.
@@ -52,14 +64,7 @@ func TestNegativeZero(t *testing.T) {
 	}
 }
 
-// nan is a good test because nan != nan, and nan has
-// a randomized hash value.
-func TestNan(t *testing.T) {
-	m := make(map[float64]int, 0)
-	nan := math.NaN()
-	m[nan] = 1
-	m[nan] = 2
-	m[nan] = 4
+func testMapNan(t *testing.T, m map[float64]int) {
 	if len(m) != 3 {
 		t.Error("length wrong")
 	}
@@ -78,6 +83,67 @@ func TestNan(t *testing.T) {
 	}
 }
 
+// nan is a good test because nan != nan, and nan has
+// a randomized hash value.
+func TestMapAssignmentNan(t *testing.T) {
+	m := make(map[float64]int, 0)
+	nan := math.NaN()
+
+	// Test assignment.
+	m[nan] = 1
+	m[nan] = 2
+	m[nan] = 4
+	testMapNan(t, m)
+}
+
+// nan is a good test because nan != nan, and nan has
+// a randomized hash value.
+func TestMapOperatorAssignmentNan(t *testing.T) {
+	m := make(map[float64]int, 0)
+	nan := math.NaN()
+
+	// Test assignment operations.
+	m[nan] += 1
+	m[nan] += 2
+	m[nan] += 4
+	testMapNan(t, m)
+}
+
+func TestMapOperatorAssignment(t *testing.T) {
+	m := make(map[int]int, 0)
+
+	// "m[k] op= x" is rewritten into "m[k] = m[k] op x"
+	// differently when op is / or % than when it isn't.
+	// Simple test to make sure they all work as expected.
+	m[0] = 12345
+	m[0] += 67890
+	m[0] /= 123
+	m[0] %= 456
+
+	const want = (12345 + 67890) / 123 % 456
+	if got := m[0]; got != want {
+		t.Errorf("got %d, want %d", got, want)
+	}
+}
+
+var sinkAppend bool
+
+func TestMapAppendAssignment(t *testing.T) {
+	m := make(map[int][]int, 0)
+
+	m[0] = nil
+	m[0] = append(m[0], 12345)
+	m[0] = append(m[0], 67890)
+	sinkAppend, m[0] = !sinkAppend, append(m[0], 123, 456)
+	a := []int{7, 8, 9, 0}
+	m[0] = append(m[0], a...)
+
+	want := []int{12345, 67890, 123, 456, 7, 8, 9, 0}
+	if got := m[0]; !reflect.DeepEqual(got, want) {
+		t.Errorf("got %v, want %v", got, want)
+	}
+}
+
 // Maps aren't actually copied on assignment.
 func TestAlias(t *testing.T) {
 	m := make(map[int]int, 0)
@@ -92,18 +158,25 @@ func TestAlias(t *testing.T) {
 func TestGrowWithNaN(t *testing.T) {
 	m := make(map[float64]int, 4)
 	nan := math.NaN()
+
+	// Use both assignment and assignment operations as they may
+	// behave differently.
 	m[nan] = 1
 	m[nan] = 2
-	m[nan] = 4
+	m[nan] += 4
+
 	cnt := 0
 	s := 0
 	growflag := true
 	for k, v := range m {
 		if growflag {
 			// force a hashtable resize
-			for i := 0; i < 100; i++ {
+			for i := 0; i < 50; i++ {
 				m[float64(i)] = i
 			}
+			for i := 50; i < 100; i++ {
+				m[float64(i)] += i
+			}
 			growflag = false
 		}
 		if k != k {
@@ -128,8 +201,8 @@ func TestGrowWithNegativeZero(t *testing.T) {
 	negzero := math.Copysign(0.0, -1.0)
 	m := make(map[FloatInt]int, 4)
 	m[FloatInt{0.0, 0}] = 1
-	m[FloatInt{0.0, 1}] = 2
-	m[FloatInt{0.0, 2}] = 4
+	m[FloatInt{0.0, 1}] += 2
+	m[FloatInt{0.0, 2}] += 4
 	m[FloatInt{0.0, 3}] = 8
 	growflag := true
 	s := 0
@@ -211,9 +284,12 @@ func TestIterGrowAndDelete(t *testing.T) {
 // an iterator is still using them.
 func TestIterGrowWithGC(t *testing.T) {
 	m := make(map[int]int, 4)
-	for i := 0; i < 16; i++ {
+	for i := 0; i < 8; i++ {
 		m[i] = i
 	}
+	for i := 8; i < 16; i++ {
+		m[i] += i
+	}
 	growflag := true
 	bitmask := 0
 	for k := range m {
@@ -364,11 +440,11 @@ func TestEmptyKeyAndValue(t *testing.T) {
 // ("quick keys") as well as long keys.
 func TestSingleBucketMapStringKeys_DupLen(t *testing.T) {
 	testMapLookups(t, map[string]string{
-		"x":    "x1val",
-		"xx":   "x2val",
-		"foo":  "fooval",
-		"bar":  "barval", // same key length as "foo"
-		"xxxx": "x4val",
+		"x":                      "x1val",
+		"xx":                     "x2val",
+		"foo":                    "fooval",
+		"bar":                    "barval", // same key length as "foo"
+		"xxxx":                   "x4val",
 		strings.Repeat("x", 128): "longval1",
 		strings.Repeat("y", 128): "longval2",
 	})
@@ -627,7 +703,7 @@ func TestMapBuckets(t *testing.T) {
 	// have a nil bucket pointer due to starting with preallocated buckets
 	// on the stack. Escaping maps start with a non-nil bucket pointer if
 	// hint size is above bucketCnt and thereby have more than one bucket.
-	// These tests depend on bucketCnt and loadFactor* in hashmap.go.
+	// These tests depend on bucketCnt and loadFactor* in map.go.
 	t.Run("mapliteral", func(t *testing.T) {
 		for _, tt := range mapBucketTests {
 			localMap := map[int]int{}
@@ -802,6 +878,23 @@ func benchmarkMapAssignInt32(b *testing.B, n int) {
 	}
 }
 
+func benchmarkMapOperatorAssignInt32(b *testing.B, n int) {
+	a := make(map[int32]int)
+	for i := 0; i < b.N; i++ {
+		a[int32(i&(n-1))] += i
+	}
+}
+
+func benchmarkMapAppendAssignInt32(b *testing.B, n int) {
+	a := make(map[int32][]int)
+	b.ReportAllocs()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		key := int32(i & (n - 1))
+		a[key] = append(a[key], i)
+	}
+}
+
 func benchmarkMapDeleteInt32(b *testing.B, n int) {
 	a := make(map[int32]int, n)
 	b.ResetTimer()
@@ -824,6 +917,23 @@ func benchmarkMapAssignInt64(b *testing.B, n int) {
 	}
 }
 
+func benchmarkMapOperatorAssignInt64(b *testing.B, n int) {
+	a := make(map[int64]int)
+	for i := 0; i < b.N; i++ {
+		a[int64(i&(n-1))] += i
+	}
+}
+
+func benchmarkMapAppendAssignInt64(b *testing.B, n int) {
+	a := make(map[int64][]int)
+	b.ReportAllocs()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		key := int64(i & (n - 1))
+		a[key] = append(a[key], i)
+	}
+}
+
 func benchmarkMapDeleteInt64(b *testing.B, n int) {
 	a := make(map[int64]int, n)
 	b.ResetTimer()
@@ -851,6 +961,33 @@ func benchmarkMapAssignStr(b *testing.B, n int) {
 	}
 }
 
+func benchmarkMapOperatorAssignStr(b *testing.B, n int) {
+	k := make([]string, n)
+	for i := 0; i < len(k); i++ {
+		k[i] = strconv.Itoa(i)
+	}
+	b.ResetTimer()
+	a := make(map[string]string)
+	for i := 0; i < b.N; i++ {
+		key := k[i&(n-1)]
+		a[key] += key
+	}
+}
+
+func benchmarkMapAppendAssignStr(b *testing.B, n int) {
+	k := make([]string, n)
+	for i := 0; i < len(k); i++ {
+		k[i] = strconv.Itoa(i)
+	}
+	a := make(map[string][]string)
+	b.ReportAllocs()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		key := k[i&(n-1)]
+		a[key] = append(a[key], key)
+	}
+}
+
 func benchmarkMapDeleteStr(b *testing.B, n int) {
 	i2s := make([]string, n)
 	for i := 0; i < n; i++ {
@@ -886,8 +1023,127 @@ func BenchmarkMapAssign(b *testing.B) {
 	b.Run("Str", runWith(benchmarkMapAssignStr, 1<<8, 1<<16))
 }
 
+func BenchmarkMapOperatorAssign(b *testing.B) {
+	b.Run("Int32", runWith(benchmarkMapOperatorAssignInt32, 1<<8, 1<<16))
+	b.Run("Int64", runWith(benchmarkMapOperatorAssignInt64, 1<<8, 1<<16))
+	b.Run("Str", runWith(benchmarkMapOperatorAssignStr, 1<<8, 1<<16))
+}
+
+func BenchmarkMapAppendAssign(b *testing.B) {
+	b.Run("Int32", runWith(benchmarkMapAppendAssignInt32, 1<<8, 1<<16))
+	b.Run("Int64", runWith(benchmarkMapAppendAssignInt64, 1<<8, 1<<16))
+	b.Run("Str", runWith(benchmarkMapAppendAssignStr, 1<<8, 1<<16))
+}
+
 func BenchmarkMapDelete(b *testing.B) {
 	b.Run("Int32", runWith(benchmarkMapDeleteInt32, 100, 1000, 10000))
 	b.Run("Int64", runWith(benchmarkMapDeleteInt64, 100, 1000, 10000))
 	b.Run("Str", runWith(benchmarkMapDeleteStr, 100, 1000, 10000))
 }
+
+func TestDeferDeleteSlow(t *testing.T) {
+	ks := []complex128{0, 1, 2, 3}
+
+	m := make(map[interface{}]int)
+	for i, k := range ks {
+		m[k] = i
+	}
+	if len(m) != len(ks) {
+		t.Errorf("want %d elements, got %d", len(ks), len(m))
+	}
+
+	func() {
+		for _, k := range ks {
+			defer delete(m, k)
+		}
+	}()
+	if len(m) != 0 {
+		t.Errorf("want 0 elements, got %d", len(m))
+	}
+}
+
+// TestIncrementAfterDeleteValueInt and other test Issue 25936.
+// Value types int, int32, int64 are affected. Value type string
+// works as expected.
+func TestIncrementAfterDeleteValueInt(t *testing.T) {
+	const key1 = 12
+	const key2 = 13
+
+	m := make(map[int]int)
+	m[key1] = 99
+	delete(m, key1)
+	m[key2]++
+	if n2 := m[key2]; n2 != 1 {
+		t.Errorf("incremented 0 to %d", n2)
+	}
+}
+
+func TestIncrementAfterDeleteValueInt32(t *testing.T) {
+	const key1 = 12
+	const key2 = 13
+
+	m := make(map[int]int32)
+	m[key1] = 99
+	delete(m, key1)
+	m[key2]++
+	if n2 := m[key2]; n2 != 1 {
+		t.Errorf("incremented 0 to %d", n2)
+	}
+}
+
+func TestIncrementAfterDeleteValueInt64(t *testing.T) {
+	const key1 = 12
+	const key2 = 13
+
+	m := make(map[int]int64)
+	m[key1] = 99
+	delete(m, key1)
+	m[key2]++
+	if n2 := m[key2]; n2 != 1 {
+		t.Errorf("incremented 0 to %d", n2)
+	}
+}
+
+func TestIncrementAfterDeleteKeyStringValueInt(t *testing.T) {
+	const key1 = ""
+	const key2 = "x"
+
+	m := make(map[string]int)
+	m[key1] = 99
+	delete(m, key1)
+	m[key2] += 1
+	if n2 := m[key2]; n2 != 1 {
+		t.Errorf("incremented 0 to %d", n2)
+	}
+}
+
+func TestIncrementAfterDeleteKeyValueString(t *testing.T) {
+	const key1 = ""
+	const key2 = "x"
+
+	m := make(map[string]string)
+	m[key1] = "99"
+	delete(m, key1)
+	m[key2] += "1"
+	if n2 := m[key2]; n2 != "1" {
+		t.Errorf("appended '1' to empty (nil) string, got %s", n2)
+	}
+}
+
+// TestIncrementAfterBulkClearKeyStringValueInt tests that map bulk
+// deletion (mapclear) still works as expected. Note that it was not
+// affected by Issue 25936.
+func TestIncrementAfterBulkClearKeyStringValueInt(t *testing.T) {
+	const key1 = ""
+	const key2 = "x"
+
+	m := make(map[string]int)
+	m[key1] = 99
+	for k := range m {
+		delete(m, k)
+	}
+	m[key2]++
+	if n2 := m[key2]; n2 != 1 {
+		t.Errorf("incremented 0 to %d", n2)
+	}
+}
diff --git a/libgo/go/runtime/mbarrier.go b/libgo/go/runtime/mbarrier.go
index 3b8f714..24e5865 100644
--- a/libgo/go/runtime/mbarrier.go
+++ b/libgo/go/runtime/mbarrier.go
@@ -6,10 +6,10 @@
 //
 // For the concurrent garbage collector, the Go compiler implements
 // updates to pointer-valued fields that may be in heap objects by
-// emitting calls to write barriers. This file contains the actual write barrier
-// implementation, gcmarkwb_m, and the various wrappers called by the
-// compiler to implement pointer assignment, slice assignment,
-// typed memmove, and so on.
+// emitting calls to write barriers. The main write barrier for
+// individual pointer writes is gcWriteBarrier and is implemented in
+// assembly. This file contains write barrier entry points for bulk
+// operations. See also mwbbuf.go.
 
 package runtime
 
@@ -21,14 +21,10 @@ import (
 // For gccgo, use go:linkname to rename compiler-called functions to
 // themselves, so that the compiler will export them.
 //
-//go:linkname writebarrierptr runtime.writebarrierptr
 //go:linkname typedmemmove runtime.typedmemmove
 //go:linkname typedslicecopy runtime.typedslicecopy
 
-// gcmarkwb_m is the mark-phase write barrier, the only barrier we have.
-// The rest of this file exists only to make calls to this function.
-//
-// This is a hybrid barrier that combines a Yuasa-style deletion
+// Go uses a hybrid barrier that combines a Yuasa-style deletion
 // barrier—which shades the object whose reference is being
 // overwritten—with Dijkstra insertion barrier—which shades the object
 // whose reference is being written. The insertion part of the barrier
@@ -144,105 +140,17 @@ import (
 // reachable by some goroutine that currently cannot reach it.
 //
 //
-//go:nowritebarrierrec
-//go:systemstack
-func gcmarkwb_m(slot *uintptr, ptr uintptr) {
-	if writeBarrier.needed {
-		// Note: This turns bad pointer writes into bad
-		// pointer reads, which could be confusing. We avoid
-		// reading from obviously bad pointers, which should
-		// take care of the vast majority of these. We could
-		// patch this up in the signal handler, or use XCHG to
-		// combine the read and the write. Checking inheap is
-		// insufficient since we need to track changes to
-		// roots outside the heap.
-		//
-		// Note: profbuf.go omits a barrier during signal handler
-		// profile logging; that's safe only because this deletion barrier exists.
-		// If we remove the deletion barrier, we'll have to work out
-		// a new way to handle the profile logging.
-		if slot1 := uintptr(unsafe.Pointer(slot)); slot1 >= minPhysPageSize {
-			if optr := *slot; optr != 0 {
-				shade(optr)
-			}
-		}
-		// TODO: Make this conditional on the caller's stack color.
-		if ptr != 0 && inheap(ptr) {
-			shade(ptr)
-		}
-	}
-}
-
-// writebarrierptr_prewrite1 invokes a write barrier for *dst = src
-// prior to the write happening.
-//
-// Write barrier calls must not happen during critical GC and scheduler
-// related operations. In particular there are times when the GC assumes
-// that the world is stopped but scheduler related code is still being
-// executed, dealing with syscalls, dealing with putting gs on runnable
-// queues and so forth. This code cannot execute write barriers because
-// the GC might drop them on the floor. Stopping the world involves removing
-// the p associated with an m. We use the fact that m.p == nil to indicate
-// that we are in one these critical section and throw if the write is of
-// a pointer to a heap object.
-//go:nosplit
-func writebarrierptr_prewrite1(dst *uintptr, src uintptr) {
-	mp := acquirem()
-	if mp.inwb || mp.dying > 0 {
-		// We explicitly allow write barriers in startpanic_m,
-		// since we're going down anyway. Ignore them here.
-		releasem(mp)
-		return
-	}
-	systemstack(func() {
-		if mp.p == 0 && memstats.enablegc && !mp.inwb && inheap(src) {
-			throw("writebarrierptr_prewrite1 called with mp.p == nil")
-		}
-		mp.inwb = true
-		gcmarkwb_m(dst, src)
-	})
-	mp.inwb = false
-	releasem(mp)
-}
-
-// NOTE: Really dst *unsafe.Pointer, src unsafe.Pointer,
-// but if we do that, Go inserts a write barrier on *dst = src.
-//go:nosplit
-func writebarrierptr(dst *uintptr, src uintptr) {
-	if writeBarrier.cgo {
-		cgoCheckWriteBarrier(dst, src)
-	}
-	if !writeBarrier.needed {
-		*dst = src
-		return
-	}
-	if src != 0 && src < minPhysPageSize {
-		systemstack(func() {
-			print("runtime: writebarrierptr *", dst, " = ", hex(src), "\n")
-			throw("bad pointer in write barrier")
-		})
-	}
-	writebarrierptr_prewrite1(dst, src)
-	*dst = src
-}
-
-// writebarrierptr_prewrite is like writebarrierptr, but the store
-// will be performed by the caller after this call. The caller must
-// not allow preemption between this call and the write.
+// Signal handler pointer writes:
 //
-//go:nosplit
-func writebarrierptr_prewrite(dst *uintptr, src uintptr) {
-	if writeBarrier.cgo {
-		cgoCheckWriteBarrier(dst, src)
-	}
-	if !writeBarrier.needed {
-		return
-	}
-	if src != 0 && src < minPhysPageSize {
-		systemstack(func() { throw("bad pointer in write barrier") })
-	}
-	writebarrierptr_prewrite1(dst, src)
-}
+// In general, the signal handler cannot safely invoke the write
+// barrier because it may run without a P or even during the write
+// barrier.
+//
+// There is exactly one exception: profbuf.go omits a barrier during
+// signal handler profile logging. That's safe only because of the
+// deletion barrier. See profbuf.go for a detailed argument. If we
+// remove the deletion barrier, we'll have to work out a new way to
+// handle the profile logging.
 
 // typedmemmove copies a value of type t to dst from src.
 // Must be nosplit, see #16026.
@@ -252,6 +160,9 @@ func writebarrierptr_prewrite(dst *uintptr, src uintptr) {
 //
 //go:nosplit
 func typedmemmove(typ *_type, dst, src unsafe.Pointer) {
+	if dst == src {
+		return
+	}
 	if typ.kind&kindNoPointers == 0 {
 		bulkBarrierPreWrite(uintptr(dst), uintptr(src), typ.size)
 	}
@@ -335,6 +246,10 @@ func typedslicecopy(typ *_type, dst, src slice) int {
 		cgoCheckSliceCopy(typ, dst, src, n)
 	}
 
+	if dstp == srcp {
+		return n
+	}
+
 	// Note: No point in checking typ.kind&kindNoPointers here:
 	// compiler only emits calls to typedslicecopy for types with pointers,
 	// and growslice and reflect_typedslicecopy check for pointers
diff --git a/libgo/go/runtime/mbitmap.go b/libgo/go/runtime/mbitmap.go
index c6c8e6a..42c2015 100644
--- a/libgo/go/runtime/mbitmap.go
+++ b/libgo/go/runtime/mbitmap.go
@@ -13,12 +13,11 @@
 //
 // Heap bitmap
 //
-// The allocated heap comes from a subset of the memory in the range [start, used),
-// where start == mheap_.arena_start and used == mheap_.arena_used.
-// The heap bitmap comprises 2 bits for each pointer-sized word in that range,
-// stored in bytes indexed backward in memory from start.
-// That is, the byte at address start-1 holds the 2-bit entries for the four words
-// start through start+3*ptrSize, the byte at start-2 holds the entries for
+// The heap bitmap comprises 2 bits for each pointer-sized word in the heap,
+// stored in the heapArena metadata backing each heap arena.
+// That is, if ha is the heapArena for the arena starting a start,
+// then ha.bitmap[0] holds the 2-bit entries for the four words start
+// through start+3*ptrSize, ha.bitmap[1] holds the entries for
 // start+4*ptrSize through start+7*ptrSize, and so on.
 //
 // In each 2-bit entry, the lower bit holds the same information as in the 1-bit
@@ -85,8 +84,8 @@ const (
 	bitPointer = 1 << 0
 	bitScan    = 1 << 4
 
-	heapBitsShift   = 1                     // shift offset between successive bitPointer or bitScan entries
-	heapBitmapScale = sys.PtrSize * (8 / 2) // number of data bytes described by one heap bitmap byte
+	heapBitsShift      = 1     // shift offset between successive bitPointer or bitScan entries
+	wordsPerBitmapByte = 8 / 2 // heap words described by one bitmap byte
 
 	// all scan/pointer bits in a byte
 	bitScanAll    = bitScan | bitScan<<heapBitsShift | bitScan<<(2*heapBitsShift) | bitScan<<(3*heapBitsShift)
@@ -104,8 +103,6 @@ func addb(p *byte, n uintptr) *byte {
 }
 
 // subtractb returns the byte pointer p-n.
-// subtractb is typically used when traversing the pointer tables referred to by hbits
-// which are arranged in reverse order.
 //go:nowritebarrier
 //go:nosplit
 func subtractb(p *byte, n uintptr) *byte {
@@ -126,8 +123,6 @@ func add1(p *byte) *byte {
 }
 
 // subtract1 returns the byte pointer p-1.
-// subtract1 is typically used when traversing the pointer tables referred to by hbits
-// which are arranged in reverse order.
 //go:nowritebarrier
 //
 // nosplit because it is used during write barriers and must not be preempted.
@@ -139,28 +134,6 @@ func subtract1(p *byte) *byte {
 	return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - 1))
 }
 
-// mapBits maps any additional bitmap memory needed for the new arena memory.
-//
-// Don't call this directly. Call mheap.setArenaUsed.
-//
-//go:nowritebarrier
-func (h *mheap) mapBits(arena_used uintptr) {
-	// Caller has added extra mappings to the arena.
-	// Add extra mappings of bitmap words as needed.
-	// We allocate extra bitmap pieces in chunks of bitmapChunk.
-	const bitmapChunk = 8192
-
-	n := (arena_used - mheap_.arena_start) / heapBitmapScale
-	n = round(n, bitmapChunk)
-	n = round(n, physPageSize)
-	if h.bitmap_mapped >= n {
-		return
-	}
-
-	sysMap(unsafe.Pointer(h.bitmap-n), n-h.bitmap_mapped, h.arena_reserved, &memstats.gc_sys)
-	h.bitmap_mapped = n
-}
-
 // heapBits provides access to the bitmap bits for a single heap word.
 // The methods on heapBits take value receivers so that the compiler
 // can more easily inline calls to those methods and registerize the
@@ -168,8 +141,14 @@ func (h *mheap) mapBits(arena_used uintptr) {
 type heapBits struct {
 	bitp  *uint8
 	shift uint32
+	arena uint32 // Index of heap arena containing bitp
+	last  *uint8 // Last byte arena's bitmap
 }
 
+// Make the compiler check that heapBits.arena is large enough to hold
+// the maximum arena frame number.
+var _ = heapBits{arena: (1<<heapAddrBits)/heapArenaBytes - 1}
+
 // markBits provides access to the mark bit for an object in the heap.
 // bytep points to the byte holding the mark bit.
 // mask is a byte with a single bit set that can be &ed with *bytep
@@ -191,7 +170,7 @@ func (s *mspan) allocBitsForIndex(allocBitIndex uintptr) markBits {
 	return markBits{bytep, mask, allocBitIndex}
 }
 
-// refillaCache takes 8 bytes s.allocBits starting at whichByte
+// refillAllocCache takes 8 bytes s.allocBits starting at whichByte
 // and negates them so that ctz (count trailing zeros) instructions
 // can be used. It then places these 8 bytes into the cached 64 bit
 // s.allocCache.
@@ -278,7 +257,7 @@ func (s *mspan) objIndex(p uintptr) uintptr {
 		return 0
 	}
 	if s.baseMask != 0 {
-		// s.baseMask is 0, elemsize is a power of two, so shift by s.divShift
+		// s.baseMask is non-0, elemsize is a power of two, so shift by s.divShift
 		return byteOffset >> s.divShift
 	}
 	return uintptr(((uint64(byteOffset) >> s.divShift) * uint64(s.divMul)) >> s.divShift2)
@@ -329,9 +308,6 @@ func (m markBits) clearMarked() {
 
 // markBitsForSpan returns the markBits for the span base address base.
 func markBitsForSpan(base uintptr) (mbits markBits) {
-	if base < mheap_.arena_start || base >= mheap_.arena_used {
-		throw("markBitsForSpan: base out of range")
-	}
 	mbits = markBitsForAddr(base)
 	if mbits.mask != 1 {
 		throw("markBitsForSpan: unaligned start")
@@ -351,31 +327,36 @@ func (m *markBits) advance() {
 }
 
 // heapBitsForAddr returns the heapBits for the address addr.
-// The caller must have already checked that addr is in the range [mheap_.arena_start, mheap_.arena_used).
+// The caller must ensure addr is in an allocated span.
+// In particular, be careful not to point past the end of an object.
 //
 // nosplit because it is used during write barriers and must not be preempted.
 //go:nosplit
-func heapBitsForAddr(addr uintptr) heapBits {
-	// 2 bits per work, 4 pairs per byte, and a mask is hard coded.
-	off := (addr - mheap_.arena_start) / sys.PtrSize
-	return heapBits{(*uint8)(unsafe.Pointer(mheap_.bitmap - off/4 - 1)), uint32(off & 3)}
-}
-
-// heapBitsForSpan returns the heapBits for the span base address base.
-func heapBitsForSpan(base uintptr) (hbits heapBits) {
-	if base < mheap_.arena_start || base >= mheap_.arena_used {
-		print("runtime: base ", hex(base), " not in range [", hex(mheap_.arena_start), ",", hex(mheap_.arena_used), ")\n")
-		throw("heapBitsForSpan: base out of range")
+func heapBitsForAddr(addr uintptr) (h heapBits) {
+	// 2 bits per word, 4 pairs per byte, and a mask is hard coded.
+	arena := arenaIndex(addr)
+	ha := mheap_.arenas[arena.l1()][arena.l2()]
+	// The compiler uses a load for nil checking ha, but in this
+	// case we'll almost never hit that cache line again, so it
+	// makes more sense to do a value check.
+	if ha == nil {
+		// addr is not in the heap. Return nil heapBits, which
+		// we expect to crash in the caller.
+		return
 	}
-	return heapBitsForAddr(base)
+	h.bitp = &ha.bitmap[(addr/(sys.PtrSize*4))%heapArenaBitmapBytes]
+	h.shift = uint32((addr / sys.PtrSize) & 3)
+	h.arena = uint32(arena)
+	h.last = &ha.bitmap[len(ha.bitmap)-1]
+	return
 }
 
-// heapBitsForObject returns the base address for the heap object
-// containing the address p, the heapBits for base,
-// the object's span, and of the index of the object in s.
-// If p does not point into a heap object,
-// return base == 0
-// otherwise return the base of the object.
+// findObject returns the base address for the heap object containing
+// the address p, the object's span, and the index of the object in s.
+// If p does not point into a heap object, it returns base == 0.
+//
+// If p points is an invalid heap pointer and debug.invalidptr != 0,
+// findObject panics.
 //
 // For gccgo, the forStack parameter is true if the value came from the stack.
 // The stack is collected conservatively and may contain invalid pointers.
@@ -383,16 +364,9 @@ func heapBitsForSpan(base uintptr) (hbits heapBits) {
 // refBase and refOff optionally give the base address of the object
 // in which the pointer p was found and the byte offset at which it
 // was found. These are used for error reporting.
-func heapBitsForObject(p, refBase, refOff uintptr, forStack bool) (base uintptr, hbits heapBits, s *mspan, objIndex uintptr) {
-	arenaStart := mheap_.arena_start
-	if p < arenaStart || p >= mheap_.arena_used {
-		return
-	}
-	off := p - arenaStart
-	idx := off >> _PageShift
-	// p points into the heap, but possibly to the middle of an object.
-	// Consult the span table to find the block beginning.
-	s = mheap_.spans[idx]
+func findObject(p, refBase, refOff uintptr, forStack bool) (base uintptr, s *mspan, objIndex uintptr) {
+	s = spanOf(p)
+	// If p is a bad pointer, it may not be in s's bounds.
 	if s == nil || p < s.base() || p >= s.limit || s.state != mSpanInUse {
 		if s == nil || s.state == _MSpanManual || forStack {
 			// If s is nil, the virtual address has never been part of the heap.
@@ -419,7 +393,7 @@ func heapBitsForObject(p, refBase, refOff uintptr, forStack bool) (base uintptr,
 			} else {
 				print(" to unused region of span")
 			}
-			print(" idx=", hex(idx), " span.base()=", hex(s.base()), " span.limit=", hex(s.limit), " span.state=", s.state, "\n")
+			print(" span.base()=", hex(s.base()), " span.limit=", hex(s.limit), " span.state=", s.state, "\n")
 			if refBase != 0 {
 				print("runtime: found in object at *(", hex(refBase), "+", hex(refOff), ")\n")
 				gcDumpObject("object", refBase, refOff)
@@ -458,8 +432,6 @@ func heapBitsForObject(p, refBase, refOff uintptr, forStack bool) (base uintptr,
 			base += objIndex * s.elemsize
 		}
 	}
-	// Now that we know the actual base, compute heapBits to return to caller.
-	hbits = heapBitsForAddr(base)
 	return
 }
 
@@ -471,9 +443,42 @@ func heapBitsForObject(p, refBase, refOff uintptr, forStack bool) (base uintptr,
 //go:nosplit
 func (h heapBits) next() heapBits {
 	if h.shift < 3*heapBitsShift {
-		return heapBits{h.bitp, h.shift + heapBitsShift}
+		h.shift += heapBitsShift
+	} else if h.bitp != h.last {
+		h.bitp, h.shift = add1(h.bitp), 0
+	} else {
+		// Move to the next arena.
+		return h.nextArena()
 	}
-	return heapBits{subtract1(h.bitp), 0}
+	return h
+}
+
+// nextArena advances h to the beginning of the next heap arena.
+//
+// This is a slow-path helper to next. gc's inliner knows that
+// heapBits.next can be inlined even though it calls this. This is
+// marked noinline so it doesn't get inlined into next and cause next
+// to be too big to inline.
+//
+//go:nosplit
+//go:noinline
+func (h heapBits) nextArena() heapBits {
+	h.arena++
+	ai := arenaIdx(h.arena)
+	l2 := mheap_.arenas[ai.l1()]
+	if l2 == nil {
+		// We just passed the end of the object, which
+		// was also the end of the heap. Poison h. It
+		// should never be dereferenced at this point.
+		return heapBits{}
+	}
+	ha := l2[ai.l2()]
+	if ha == nil {
+		return heapBits{}
+	}
+	h.bitp, h.shift = &ha.bitmap[0], 0
+	h.last = &ha.bitmap[len(ha.bitmap)-1]
+	return h
 }
 
 // forward returns the heapBits describing n pointer-sized words ahead of h in memory.
@@ -481,9 +486,39 @@ func (h heapBits) next() heapBits {
 // h.forward(1) is equivalent to h.next(), just slower.
 // Note that forward does not modify h. The caller must record the result.
 // bits returns the heap bits for the current word.
+//go:nosplit
 func (h heapBits) forward(n uintptr) heapBits {
 	n += uintptr(h.shift) / heapBitsShift
-	return heapBits{subtractb(h.bitp, n/4), uint32(n%4) * heapBitsShift}
+	nbitp := uintptr(unsafe.Pointer(h.bitp)) + n/4
+	h.shift = uint32(n%4) * heapBitsShift
+	if nbitp <= uintptr(unsafe.Pointer(h.last)) {
+		h.bitp = (*uint8)(unsafe.Pointer(nbitp))
+		return h
+	}
+
+	// We're in a new heap arena.
+	past := nbitp - (uintptr(unsafe.Pointer(h.last)) + 1)
+	h.arena += 1 + uint32(past/heapArenaBitmapBytes)
+	ai := arenaIdx(h.arena)
+	if l2 := mheap_.arenas[ai.l1()]; l2 != nil && l2[ai.l2()] != nil {
+		a := l2[ai.l2()]
+		h.bitp = &a.bitmap[past%heapArenaBitmapBytes]
+		h.last = &a.bitmap[len(a.bitmap)-1]
+	} else {
+		h.bitp, h.last = nil, nil
+	}
+	return h
+}
+
+// forwardOrBoundary is like forward, but stops at boundaries between
+// contiguous sections of the bitmap. It returns the number of words
+// advanced over, which will be <= n.
+func (h heapBits) forwardOrBoundary(n uintptr) (heapBits, uintptr) {
+	maxn := 4 * ((uintptr(unsafe.Pointer(h.last)) + 1) - uintptr(unsafe.Pointer(h.bitp)))
+	if n > maxn {
+		n = maxn
+	}
+	return h.forward(n), n
 }
 
 // The caller can test morePointers and isPointer by &-ing with bitScan and bitPointer.
@@ -564,6 +599,8 @@ func (h heapBits) setCheckmarked(size uintptr) {
 // make sure the underlying allocation contains pointers, usually
 // by checking typ.kind&kindNoPointers.
 //
+// Callers must perform cgo checks if writeBarrier.cgo.
+//
 //go:nosplit
 func bulkBarrierPreWrite(dst, src, size uintptr) {
 	if (dst|src|size)&(sys.PtrSize-1) != 0 {
@@ -572,7 +609,7 @@ func bulkBarrierPreWrite(dst, src, size uintptr) {
 	if !writeBarrier.needed {
 		return
 	}
-	if !inheap(dst) {
+	if s := spanOf(dst); s == nil {
 		// If dst is a global, use the data or BSS bitmaps to
 		// execute write barriers.
 		lo := 0
@@ -594,6 +631,14 @@ func bulkBarrierPreWrite(dst, src, size uintptr) {
 			}
 		}
 		return
+	} else if s.state != _MSpanInUse || dst < s.base() || s.limit <= dst {
+		// dst was heap memory at some point, but isn't now.
+		// It can't be a global. It must be either our stack,
+		// or in the case of direct channel sends, it could be
+		// another stack. Either way, no need for barriers.
+		// This will also catch if dst is in a freed span,
+		// though that should never have.
+		return
 	}
 
 	buf := &getg().m.p.ptr().wbBuf
@@ -663,7 +708,7 @@ func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) {
 	}
 }
 
-// typeBitsBulkBarrier executes writebarrierptr_prewrite for every
+// typeBitsBulkBarrier executes a write barrier for every
 // pointer that would be copied from [src, src+size) to [dst,
 // dst+size) by a memmove using the type bitmap to locate those
 // pointer slots.
@@ -677,23 +722,26 @@ func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) {
 // Must not be preempted because it typically runs right before memmove,
 // and the GC must observe them as an atomic action.
 //
+// Callers must perform cgo checks if writeBarrier.cgo.
+//
 //go:nosplit
 func typeBitsBulkBarrier(typ *_type, dst, src, size uintptr) {
 	if typ == nil {
 		throw("runtime: typeBitsBulkBarrier without type")
 	}
 	if typ.size != size {
-		println("runtime: typeBitsBulkBarrier with type ", *typ.string, " of size ", typ.size, " but memory size", size)
+		println("runtime: typeBitsBulkBarrier with type ", typ.string(), " of size ", typ.size, " but memory size", size)
 		throw("runtime: invalid typeBitsBulkBarrier")
 	}
 	if typ.kind&kindGCProg != 0 {
-		println("runtime: typeBitsBulkBarrier with type ", *typ.string, " with GC prog")
+		println("runtime: typeBitsBulkBarrier with type ", typ.string(), " with GC prog")
 		throw("runtime: invalid typeBitsBulkBarrier")
 	}
 	if !writeBarrier.needed {
 		return
 	}
 	ptrmask := typ.gcdata
+	buf := &getg().m.p.ptr().wbBuf
 	var bits uint32
 	for i := uintptr(0); i < typ.ptrdata; i += sys.PtrSize {
 		if i&(sys.PtrSize*8-1) == 0 {
@@ -705,7 +753,9 @@ func typeBitsBulkBarrier(typ *_type, dst, src, size uintptr) {
 		if bits&1 != 0 {
 			dstx := (*uintptr)(unsafe.Pointer(dst + i))
 			srcx := (*uintptr)(unsafe.Pointer(src + i))
-			writebarrierptr_prewrite(dstx, *srcx)
+			if !buf.putFast(*dstx, *srcx) {
+				wbBufFlush(nil, 0)
+			}
 		}
 	}
 }
@@ -736,23 +786,28 @@ func (h heapBits) initSpan(s *mspan) {
 	s.allocBits = newAllocBits(s.nelems)
 
 	// Clear bits corresponding to objects.
-	if total%heapBitmapScale != 0 {
+	nw := total / sys.PtrSize
+	if nw%wordsPerBitmapByte != 0 {
 		throw("initSpan: unaligned length")
 	}
-	nbyte := total / heapBitmapScale
-	if sys.PtrSize == 8 && size == sys.PtrSize {
-		end := h.bitp
-		bitp := subtractb(end, nbyte-1)
-		for {
-			*bitp = bitPointerAll | bitScanAll
-			if bitp == end {
-				break
+	if h.shift != 0 {
+		throw("initSpan: unaligned base")
+	}
+	for nw > 0 {
+		hNext, anw := h.forwardOrBoundary(nw)
+		nbyte := anw / wordsPerBitmapByte
+		if sys.PtrSize == 8 && size == sys.PtrSize {
+			bitp := h.bitp
+			for i := uintptr(0); i < nbyte; i++ {
+				*bitp = bitPointerAll | bitScanAll
+				bitp = add1(bitp)
 			}
-			bitp = add1(bitp)
+		} else {
+			memclrNoHeapPointers(unsafe.Pointer(h.bitp), nbyte)
 		}
-		return
+		h = hNext
+		nw -= anw
 	}
-	memclrNoHeapPointers(unsafe.Pointer(subtractb(h.bitp, nbyte-1)), nbyte)
 }
 
 // initCheckmarkSpan initializes a span for being checkmarked.
@@ -764,10 +819,9 @@ func (h heapBits) initCheckmarkSpan(size, n, total uintptr) {
 		// Only possible on 64-bit system, since minimum size is 8.
 		// Must clear type bit (checkmark bit) of every word.
 		// The type bit is the lower of every two-bit pair.
-		bitp := h.bitp
-		for i := uintptr(0); i < n; i += 4 {
-			*bitp &^= bitPointerAll
-			bitp = subtract1(bitp)
+		for i := uintptr(0); i < n; i += wordsPerBitmapByte {
+			*h.bitp &^= bitPointerAll
+			h = h.forward(wordsPerBitmapByte)
 		}
 		return
 	}
@@ -788,10 +842,9 @@ func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) {
 		// Only possible on 64-bit system, since minimum size is 8.
 		// Must clear type bit (checkmark bit) of every word.
 		// The type bit is the lower of every two-bit pair.
-		bitp := h.bitp
-		for i := uintptr(0); i < n; i += 4 {
-			*bitp |= bitPointerAll
-			bitp = subtract1(bitp)
+		for i := uintptr(0); i < n; i += wordsPerBitmapByte {
+			*h.bitp |= bitPointerAll
+			h = h.forward(wordsPerBitmapByte)
 		}
 	}
 }
@@ -958,6 +1011,19 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
 	// This is a lot of lines of code, but it compiles into relatively few
 	// machine instructions.
 
+	outOfPlace := false
+	if arenaIndex(x+size-1) != arenaIdx(h.arena) || (doubleCheck && fastrand()%2 == 0) {
+		// This object spans heap arenas, so the bitmap may be
+		// discontiguous. Unroll it into the object instead
+		// and then copy it out.
+		//
+		// In doubleCheck mode, we randomly do this anyway to
+		// stress test the bitmap copying path.
+		outOfPlace = true
+		h.bitp = (*uint8)(unsafe.Pointer(x))
+		h.last = nil
+	}
+
 	var (
 		// Ptrmask input.
 		p     *byte   // last ptrmask byte read
@@ -996,9 +1062,8 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
 			}
 			ptrmask = debugPtrmask.data
 			runGCProg(addb(typ.gcdata, 4), nil, ptrmask, 1)
-			goto Phase4
 		}
-		return
+		goto Phase4
 	}
 
 	// Note about sizes:
@@ -1106,7 +1171,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
 	}
 	if nw == 0 {
 		// No pointers! Caller was supposed to check.
-		println("runtime: invalid type ", *typ.string)
+		println("runtime: invalid type ", typ.string())
 		throw("heapBitsSetType: called with non-pointer type")
 		return
 	}
@@ -1116,7 +1181,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
 		nw = 2
 	}
 
-	// Phase 1: Special case for leading byte (shift==0) or half-byte (shift==4).
+	// Phase 1: Special case for leading byte (shift==0) or half-byte (shift==2).
 	// The leading byte is special because it contains the bits for word 1,
 	// which does not have the scan bit set.
 	// The leading half-byte is special because it's a half a byte,
@@ -1146,7 +1211,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
 			goto Phase3
 		}
 		*hbitp = uint8(hb)
-		hbitp = subtract1(hbitp)
+		hbitp = add1(hbitp)
 		b >>= 4
 		nb -= 4
 
@@ -1167,7 +1232,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
 		// the checkmark.
 		*hbitp &^= uint8((bitPointer | bitScan | (bitPointer << heapBitsShift)) << (2 * heapBitsShift))
 		*hbitp |= uint8(hb)
-		hbitp = subtract1(hbitp)
+		hbitp = add1(hbitp)
 		if w += 2; w >= nw {
 			// We know that there is more data, because we handled 2-word objects above.
 			// This must be at least a 6-word object. If we're out of pointer words,
@@ -1197,7 +1262,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
 			break
 		}
 		*hbitp = uint8(hb)
-		hbitp = subtract1(hbitp)
+		hbitp = add1(hbitp)
 		b >>= 4
 
 		// Load more bits. b has nb right now.
@@ -1245,7 +1310,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
 			break
 		}
 		*hbitp = uint8(hb)
-		hbitp = subtract1(hbitp)
+		hbitp = add1(hbitp)
 		b >>= 4
 	}
 
@@ -1266,11 +1331,11 @@ Phase3:
 	// The first is hb, the rest are zero.
 	if w <= nw {
 		*hbitp = uint8(hb)
-		hbitp = subtract1(hbitp)
+		hbitp = add1(hbitp)
 		hb = 0 // for possible final half-byte below
 		for w += 4; w <= nw; w += 4 {
 			*hbitp = 0
-			hbitp = subtract1(hbitp)
+			hbitp = add1(hbitp)
 		}
 	}
 
@@ -1287,11 +1352,91 @@ Phase3:
 	}
 
 Phase4:
-	// Phase 4: all done, but perhaps double check.
+	// Phase 4: Copy unrolled bitmap to per-arena bitmaps, if necessary.
+	if outOfPlace {
+		// TODO: We could probably make this faster by
+		// handling [x+dataSize, x+size) specially.
+		h := heapBitsForAddr(x)
+		// cnw is the number of heap words, or bit pairs
+		// remaining (like nw above).
+		cnw := size / sys.PtrSize
+		src := (*uint8)(unsafe.Pointer(x))
+		// We know the first and last byte of the bitmap are
+		// not the same, but it's still possible for small
+		// objects span arenas, so it may share bitmap bytes
+		// with neighboring objects.
+		//
+		// Handle the first byte specially if it's shared. See
+		// Phase 1 for why this is the only special case we need.
+		if doubleCheck {
+			if !(h.shift == 0 || (sys.PtrSize == 8 && h.shift == 2)) {
+				print("x=", x, " size=", size, " cnw=", h.shift, "\n")
+				throw("bad start shift")
+			}
+		}
+		if sys.PtrSize == 8 && h.shift == 2 {
+			*h.bitp = *h.bitp&^((bitPointer|bitScan|(bitPointer|bitScan)<<heapBitsShift)<<(2*heapBitsShift)) | *src
+			h = h.next().next()
+			cnw -= 2
+			src = addb(src, 1)
+		}
+		// We're now byte aligned. Copy out to per-arena
+		// bitmaps until the last byte (which may again be
+		// partial).
+		for cnw >= 4 {
+			// This loop processes four words at a time,
+			// so round cnw down accordingly.
+			hNext, words := h.forwardOrBoundary(cnw / 4 * 4)
+
+			// n is the number of bitmap bytes to copy.
+			n := words / 4
+			memmove(unsafe.Pointer(h.bitp), unsafe.Pointer(src), n)
+			cnw -= words
+			h = hNext
+			src = addb(src, n)
+		}
+		if doubleCheck && h.shift != 0 {
+			print("cnw=", cnw, " h.shift=", h.shift, "\n")
+			throw("bad shift after block copy")
+		}
+		// Handle the last byte if it's shared.
+		if cnw == 2 {
+			*h.bitp = *h.bitp&^(bitPointer|bitScan|(bitPointer|bitScan)<<heapBitsShift) | *src
+			src = addb(src, 1)
+			h = h.next().next()
+		}
+		if doubleCheck {
+			if uintptr(unsafe.Pointer(src)) > x+size {
+				throw("copy exceeded object size")
+			}
+			if !(cnw == 0 || cnw == 2) {
+				print("x=", x, " size=", size, " cnw=", cnw, "\n")
+				throw("bad number of remaining words")
+			}
+			// Set up hbitp so doubleCheck code below can check it.
+			hbitp = h.bitp
+		}
+		// Zero the object where we wrote the bitmap.
+		memclrNoHeapPointers(unsafe.Pointer(x), uintptr(unsafe.Pointer(src))-x)
+	}
+
+	// Double check the whole bitmap.
 	if doubleCheck {
-		end := heapBitsForAddr(x + size)
+		// x+size may not point to the heap, so back up one
+		// word and then call next().
+		end := heapBitsForAddr(x + size - sys.PtrSize).next()
+		endAI := arenaIdx(end.arena)
+		if !outOfPlace && (end.bitp == nil || (end.shift == 0 && end.bitp == &mheap_.arenas[endAI.l1()][endAI.l2()].bitmap[0])) {
+			// The unrolling code above walks hbitp just
+			// past the bitmap without moving to the next
+			// arena. Synthesize this for end.bitp.
+			end.arena--
+			endAI = arenaIdx(end.arena)
+			end.bitp = addb(&mheap_.arenas[endAI.l1()][endAI.l2()].bitmap[0], heapArenaBitmapBytes)
+			end.last = nil
+		}
 		if typ.kind&kindGCProg == 0 && (hbitp != end.bitp || (w == nw+2) != (end.shift == 2)) {
-			println("ended at wrong bitmap byte for", *typ.string, "x", dataSize/typ.size)
+			println("ended at wrong bitmap byte for", typ.string(), "x", dataSize/typ.size)
 			print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n")
 			print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n")
 			h0 := heapBitsForAddr(x)
@@ -1327,15 +1472,15 @@ Phase4:
 				}
 			}
 			if have != want {
-				println("mismatch writing bits for", *typ.string, "x", dataSize/typ.size)
+				println("mismatch writing bits for", typ.string(), "x", dataSize/typ.size)
 				print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n")
-				print("kindGCProg=", typ.kind&kindGCProg != 0, "\n")
+				print("kindGCProg=", typ.kind&kindGCProg != 0, " outOfPlace=", outOfPlace, "\n")
 				print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n")
 				h0 := heapBitsForAddr(x)
 				print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n")
 				print("current bits h.bitp=", h.bitp, " h.shift=", h.shift, " *h.bitp=", hex(*h.bitp), "\n")
 				print("ptrmask=", ptrmask, " p=", p, " endp=", endp, " endnb=", endnb, " pbits=", hex(pbits), " b=", hex(b), " nb=", nb, "\n")
-				println("at word", i, "offset", i*sys.PtrSize, "have", have, "want", want)
+				println("at word", i, "offset", i*sys.PtrSize, "have", hex(have), "want", hex(want))
 				if typ.kind&kindGCProg != 0 {
 					println("GC program:")
 					dumpGCProg(addb(typ.gcdata, 4))
@@ -1436,9 +1581,9 @@ func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize u
 		// so that scanobject can stop early in the final element.
 		totalBits = (elemSize*(count-1) + progSize) / sys.PtrSize
 	}
-	endProg := unsafe.Pointer(subtractb(h.bitp, (totalBits+3)/4))
-	endAlloc := unsafe.Pointer(subtractb(h.bitp, allocSize/heapBitmapScale))
-	memclrNoHeapPointers(add(endAlloc, 1), uintptr(endProg)-uintptr(endAlloc))
+	endProg := unsafe.Pointer(addb(h.bitp, (totalBits+3)/4))
+	endAlloc := unsafe.Pointer(addb(h.bitp, allocSize/sys.PtrSize/wordsPerBitmapByte))
+	memclrNoHeapPointers(endProg, uintptr(endAlloc)-uintptr(endProg))
 }
 
 // progToPointerMask returns the 1-bit pointer mask output by the GC program prog.
@@ -1497,11 +1642,11 @@ Run:
 			} else {
 				v := bits&bitPointerAll | bitScanAll
 				*dst = uint8(v)
-				dst = subtract1(dst)
+				dst = add1(dst)
 				bits >>= 4
 				v = bits&bitPointerAll | bitScanAll
 				*dst = uint8(v)
-				dst = subtract1(dst)
+				dst = add1(dst)
 				bits >>= 4
 			}
 		}
@@ -1535,11 +1680,11 @@ Run:
 				} else {
 					v := bits&0xf | bitScanAll
 					*dst = uint8(v)
-					dst = subtract1(dst)
+					dst = add1(dst)
 					bits >>= 4
 					v = bits&0xf | bitScanAll
 					*dst = uint8(v)
-					dst = subtract1(dst)
+					dst = add1(dst)
 					bits >>= 4
 				}
 			}
@@ -1599,11 +1744,11 @@ Run:
 					npattern += 8
 				}
 			} else {
-				src = add1(src)
+				src = subtract1(src)
 				for npattern < n {
 					pattern <<= 4
 					pattern |= uintptr(*src) & 0xf
-					src = add1(src)
+					src = subtract1(src)
 					npattern += 4
 				}
 			}
@@ -1665,7 +1810,7 @@ Run:
 				} else {
 					for nbits >= 4 {
 						*dst = uint8(bits&0xf | bitScanAll)
-						dst = subtract1(dst)
+						dst = add1(dst)
 						bits >>= 4
 						nbits -= 4
 					}
@@ -1710,10 +1855,10 @@ Run:
 			}
 		} else {
 			// Leading src fragment.
-			src = addb(src, (off+3)/4)
+			src = subtractb(src, (off+3)/4)
 			if frag := off & 3; frag != 0 {
 				bits |= (uintptr(*src) & 0xf) >> (4 - frag) << nbits
-				src = subtract1(src)
+				src = add1(src)
 				nbits += frag
 				c -= frag
 			}
@@ -1721,9 +1866,9 @@ Run:
 			// The bits are rotating through the bit buffer.
 			for i := c / 4; i > 0; i-- {
 				bits |= (uintptr(*src) & 0xf) << nbits
-				src = subtract1(src)
+				src = add1(src)
 				*dst = uint8(bits&0xf | bitScanAll)
-				dst = subtract1(dst)
+				dst = add1(dst)
 				bits >>= 4
 			}
 			// Final src fragment.
@@ -1745,12 +1890,12 @@ Run:
 			bits >>= 8
 		}
 	} else {
-		totalBits = (uintptr(unsafe.Pointer(dstStart))-uintptr(unsafe.Pointer(dst)))*4 + nbits
+		totalBits = (uintptr(unsafe.Pointer(dst))-uintptr(unsafe.Pointer(dstStart)))*4 + nbits
 		nbits += -nbits & 3
 		for ; nbits > 0; nbits -= 4 {
 			v := bits&0xf | bitScanAll
 			*dst = uint8(v)
-			dst = subtract1(dst)
+			dst = add1(dst)
 			bits >>= 4
 		}
 	}
@@ -1839,12 +1984,11 @@ func getgcmask(ep interface{}) (mask []byte) {
 	}
 
 	// heap
-	var n uintptr
-	var base uintptr
-	if mlookup(uintptr(p), &base, &n, nil) != 0 {
+	if base, s, _ := findObject(uintptr(p), 0, 0, false); base != 0 {
+		hbits := heapBitsForAddr(base)
+		n := s.elemsize
 		mask = make([]byte, n/sys.PtrSize)
 		for i := uintptr(0); i < n; i += sys.PtrSize {
-			hbits := heapBitsForAddr(base + i)
 			if hbits.isPointer() {
 				mask[i/sys.PtrSize] = 1
 			}
@@ -1852,6 +1996,7 @@ func getgcmask(ep interface{}) (mask []byte) {
 				mask = mask[:i/sys.PtrSize]
 				break
 			}
+			hbits = hbits.next()
 		}
 		return
 	}
diff --git a/libgo/go/runtime/mcache.go b/libgo/go/runtime/mcache.go
index 766cfd1..3dacf96 100644
--- a/libgo/go/runtime/mcache.go
+++ b/libgo/go/runtime/mcache.go
@@ -37,7 +37,6 @@ type mcache struct {
 	alloc [numSpanClasses]*mspan // spans to allocate from, indexed by spanClass
 
 	// Local allocator stats, flushed during GC.
-	local_nlookup    uintptr                  // number of pointer lookups
 	local_largefree  uintptr                  // bytes freed for large objects (>maxsmallsize)
 	local_nlargefree uintptr                  // number of frees for large objects (>maxsmallsize)
 	local_nsmallfree [_NumSizeClasses]uintptr // number of frees for small objects (<=maxsmallsize)
diff --git a/libgo/go/runtime/mcentral.go b/libgo/go/runtime/mcentral.go
index 150f4fd..50a4791 100644
--- a/libgo/go/runtime/mcentral.go
+++ b/libgo/go/runtime/mcentral.go
@@ -246,6 +246,6 @@ func (c *mcentral) grow() *mspan {
 	p := s.base()
 	s.limit = p + size*n
 
-	heapBitsForSpan(s.base()).initSpan(s)
+	heapBitsForAddr(s.base()).initSpan(s)
 	return s
 }
diff --git a/libgo/go/runtime/mem_gccgo.go b/libgo/go/runtime/mem_gccgo.go
index a087945..44f4648 100644
--- a/libgo/go/runtime/mem_gccgo.go
+++ b/libgo/go/runtime/mem_gccgo.go
@@ -21,9 +21,6 @@ func sysMmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uintptr)
 //extern munmap
 func munmap(addr unsafe.Pointer, length uintptr) int32
 
-//extern mincore
-func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32
-
 //extern madvise
 func madvise(addr unsafe.Pointer, n uintptr, flags int32) int32
 
@@ -49,54 +46,6 @@ func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uintptr) (u
 	return p, 0
 }
 
-// NOTE: vec must be just 1 byte long here.
-// Mincore returns ENOMEM if any of the pages are unmapped,
-// but we want to know that all of the pages are unmapped.
-// To make these the same, we can only ask about one page
-// at a time. See golang.org/issue/7476.
-var addrspace_vec [1]byte
-
-func addrspace_free(v unsafe.Pointer, n uintptr) bool {
-	for off := uintptr(0); off < n; off += physPageSize {
-		// Use a length of 1 byte, which the kernel will round
-		// up to one physical page regardless of the true
-		// physical page size.
-		errval := 0
-		if mincore(unsafe.Pointer(uintptr(v)+off), 1, &addrspace_vec[0]) < 0 {
-			errval = errno()
-		}
-		if errval == _ENOSYS {
-			// mincore is not available on this system.
-			// Assume the address is available.
-			return true
-		}
-		if errval == _EINVAL {
-			// Address is not a multiple of the physical
-			// page size. Shouldn't happen, but just ignore it.
-			continue
-		}
-		// ENOMEM means unmapped, which is what we want.
-		// Anything else we assume means the pages are mapped.
-		if errval != _ENOMEM {
-			return false
-		}
-	}
-	return true
-}
-
-func mmap_fixed(v unsafe.Pointer, n uintptr, prot, flags, fd int32, offset uintptr) (unsafe.Pointer, int) {
-	p, err := mmap(v, n, prot, flags, fd, offset)
-	// On some systems, mmap ignores v without
-	// MAP_FIXED, so retry if the address space is free.
-	if p != v && addrspace_free(v, n) {
-		if err == 0 {
-			munmap(p, n)
-		}
-		p, err = mmap(v, n, prot, flags|_MAP_FIXED, fd, offset)
-	}
-	return p, err
-}
-
 // Don't split the stack as this method may be invoked without a valid G, which
 // prevents us from allocating more stack.
 //go:nosplit
@@ -227,62 +176,17 @@ func sysFault(v unsafe.Pointer, n uintptr) {
 	mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE|_MAP_FIXED, mmapFD, 0)
 }
 
-func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer {
-	// On 64-bit, people with ulimit -v set complain if we reserve too
-	// much address space. Instead, assume that the reservation is okay
-	// if we can reserve at least 64K and check the assumption in SysMap.
-	// Only user-mode Linux (UML) rejects these requests.
-	if sys.PtrSize == 8 && uint64(n) > 1<<32 {
-		p, err := mmap_fixed(v, 64<<10, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, mmapFD, 0)
-		if p != v || err != 0 {
-			if err == 0 {
-				munmap(p, 64<<10)
-			}
-			return nil
-		}
-		munmap(p, 64<<10)
-		*reserved = false
-		return v
-	}
-
+func sysReserve(v unsafe.Pointer, n uintptr) unsafe.Pointer {
 	p, err := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, mmapFD, 0)
 	if err != 0 {
 		return nil
 	}
-	*reserved = true
 	return p
 }
 
-func sysMap(v unsafe.Pointer, n uintptr, reserved bool, sysStat *uint64) {
+func sysMap(v unsafe.Pointer, n uintptr, sysStat *uint64) {
 	mSysStatInc(sysStat, n)
 
-	// On 64-bit, we don't actually have v reserved, so tread carefully.
-	if !reserved {
-		flags := int32(_MAP_ANON | _MAP_PRIVATE)
-		if GOOS == "dragonfly" {
-			// TODO(jsing): For some reason DragonFly seems to return
-			// memory at a different address than we requested, even when
-			// there should be no reason for it to do so. This can be
-			// avoided by using MAP_FIXED, but I'm not sure we should need
-			// to do this - we do not on other platforms.
-			flags |= _MAP_FIXED
-		}
-		p, err := mmap_fixed(v, n, _PROT_READ|_PROT_WRITE, flags, mmapFD, 0)
-		if err == _ENOMEM {
-			throw("runtime: out of memory")
-		}
-		if p != v || err != 0 {
-			print("runtime: address space conflict: map(", v, ") = ", p, " (err ", err, ")\n")
-			throw("runtime: address space conflict")
-		}
-		return
-	}
-
-	if GOOS == "aix" {
-		// AIX does not allow mapping a range that is already mapped.
-		// So always unmap first even if it is already unmapped.
-		munmap(v, n)
-	}
 	p, err := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, mmapFD, 0)
 	if err == _ENOMEM {
 		throw("runtime: out of memory")
diff --git a/libgo/go/runtime/memmove_test.go b/libgo/go/runtime/memmove_test.go
index 62de604..b490cd8 100644
--- a/libgo/go/runtime/memmove_test.go
+++ b/libgo/go/runtime/memmove_test.go
@@ -450,6 +450,13 @@ func BenchmarkCopyFat512(b *testing.B) {
 		_ = y
 	}
 }
+func BenchmarkCopyFat520(b *testing.B) {
+	var x [520 / 4]uint32
+	for i := 0; i < b.N; i++ {
+		y := x
+		_ = y
+	}
+}
 func BenchmarkCopyFat1024(b *testing.B) {
 	var x [1024 / 4]uint32
 	for i := 0; i < b.N; i++ {
diff --git a/libgo/go/runtime/mfinal.go b/libgo/go/runtime/mfinal.go
index 19573d8..1a7792c 100644
--- a/libgo/go/runtime/mfinal.go
+++ b/libgo/go/runtime/mfinal.go
@@ -142,7 +142,7 @@ func runfinq() {
 		if fb == nil {
 			fing = gp
 			fingwait = true
-			goparkunlock(&finlock, "finalizer wait", traceEvGoBlock, 1)
+			goparkunlock(&finlock, waitReasonFinalizerWait, traceEvGoBlock, 1)
 			continue
 		}
 		unlock(&finlock)
@@ -233,8 +233,8 @@ func runfinq() {
 // is not guaranteed to run, because there is no ordering that
 // respects the dependencies.
 //
-// The finalizer for obj is scheduled to run at some arbitrary time after
-// obj becomes unreachable.
+// The finalizer is scheduled to run at some arbitrary time after the
+// program can no longer reach the object to which obj points.
 // There is no guarantee that finalizers will run before a program exits,
 // so typically they are useful only for releasing non-memory resources
 // associated with an object during a long-running program.
@@ -284,7 +284,7 @@ func SetFinalizer(obj interface{}, finalizer interface{}) {
 		throw("runtime.SetFinalizer: first argument is nil")
 	}
 	if etyp.kind&kindMask != kindPtr {
-		throw("runtime.SetFinalizer: first argument is " + *etyp.string + ", not pointer")
+		throw("runtime.SetFinalizer: first argument is " + etyp.string() + ", not pointer")
 	}
 	ot := (*ptrtype)(unsafe.Pointer(etyp))
 	if ot.elem == nil {
@@ -292,9 +292,9 @@ func SetFinalizer(obj interface{}, finalizer interface{}) {
 	}
 
 	// find the containing object
-	_, base, _ := findObject(e.data)
+	base, _, _ := findObject(uintptr(e.data), 0, 0, false)
 
-	if base == nil {
+	if base == 0 {
 		// 0-length objects are okay.
 		if e.data == unsafe.Pointer(&zerobase) {
 			return
@@ -314,7 +314,7 @@ func SetFinalizer(obj interface{}, finalizer interface{}) {
 		return
 	}
 
-	if e.data != base {
+	if uintptr(e.data) != base {
 		// As an implementation detail we allow to set finalizers for an inner byte
 		// of an object if it could come from tiny alloc (see mallocgc for details).
 		if ot.elem == nil || ot.elem.kind&kindNoPointers == 0 || ot.elem.size >= maxTinySize {
@@ -333,14 +333,14 @@ func SetFinalizer(obj interface{}, finalizer interface{}) {
 	}
 
 	if ftyp.kind&kindMask != kindFunc {
-		throw("runtime.SetFinalizer: second argument is " + *ftyp.string + ", not a function")
+		throw("runtime.SetFinalizer: second argument is " + ftyp.string() + ", not a function")
 	}
 	ft := (*functype)(unsafe.Pointer(ftyp))
 	if ft.dotdotdot {
-		throw("runtime.SetFinalizer: cannot pass " + *etyp.string + " to finalizer " + *ftyp.string + " because dotdotdot")
+		throw("runtime.SetFinalizer: cannot pass " + etyp.string() + " to finalizer " + ftyp.string() + " because dotdotdot")
 	}
 	if len(ft.in) != 1 {
-		throw("runtime.SetFinalizer: cannot pass " + *etyp.string + " to finalizer " + *ftyp.string)
+		throw("runtime.SetFinalizer: cannot pass " + etyp.string() + " to finalizer " + ftyp.string())
 	}
 	fint := ft.in[0]
 	switch {
@@ -363,7 +363,7 @@ func SetFinalizer(obj interface{}, finalizer interface{}) {
 			goto okarg
 		}
 	}
-	throw("runtime.SetFinalizer: cannot pass " + *etyp.string + " to finalizer " + *ftyp.string)
+	throw("runtime.SetFinalizer: cannot pass " + etyp.string() + " to finalizer " + ftyp.string())
 okarg:
 	// make sure we have a finalizer goroutine
 	createfing()
@@ -379,46 +379,6 @@ okarg:
 	})
 }
 
-// Look up pointer v in heap. Return the span containing the object,
-// the start of the object, and the size of the object. If the object
-// does not exist, return nil, nil, 0.
-func findObject(v unsafe.Pointer) (s *mspan, x unsafe.Pointer, n uintptr) {
-	c := gomcache()
-	c.local_nlookup++
-	if sys.PtrSize == 4 && c.local_nlookup >= 1<<30 {
-		// purge cache stats to prevent overflow
-		lock(&mheap_.lock)
-		purgecachedstats(c)
-		unlock(&mheap_.lock)
-	}
-
-	// find span
-	arena_start := mheap_.arena_start
-	arena_used := mheap_.arena_used
-	if uintptr(v) < arena_start || uintptr(v) >= arena_used {
-		return
-	}
-	p := uintptr(v) >> pageShift
-	q := p - arena_start>>pageShift
-	s = mheap_.spans[q]
-	if s == nil {
-		return
-	}
-	x = unsafe.Pointer(s.base())
-
-	if uintptr(v) < uintptr(x) || uintptr(v) >= uintptr(unsafe.Pointer(s.limit)) || s.state != mSpanInUse {
-		s = nil
-		x = nil
-		return
-	}
-
-	n = s.elemsize
-	if s.spanclass.sizeclass() != 0 {
-		x = add(x, (uintptr(v)-uintptr(x))/n*n)
-	}
-	return
-}
-
 // Mark KeepAlive as noinline so that it is easily detectable as an intrinsic.
 //go:noinline
 
diff --git a/libgo/go/runtime/mfixalloc.go b/libgo/go/runtime/mfixalloc.go
index 7496671..1febe78 100644
--- a/libgo/go/runtime/mfixalloc.go
+++ b/libgo/go/runtime/mfixalloc.go
@@ -11,7 +11,7 @@ package runtime
 import "unsafe"
 
 // FixAlloc is a simple free-list allocator for fixed size objects.
-// Malloc uses a FixAlloc wrapped around sysAlloc to manages its
+// Malloc uses a FixAlloc wrapped around sysAlloc to manage its
 // MCache and MSpan objects.
 //
 // Memory returned by fixalloc.alloc is zeroed by default, but the
diff --git a/libgo/go/runtime/mgc.go b/libgo/go/runtime/mgc.go
index 626f088..4ef982d 100644
--- a/libgo/go/runtime/mgc.go
+++ b/libgo/go/runtime/mgc.go
@@ -232,21 +232,10 @@ func setGCPercent(in int32) (out int32) {
 	gcSetTriggerRatio(memstats.triggerRatio)
 	unlock(&mheap_.lock)
 
-	// If we just disabled GC, wait for any concurrent GC to
+	// If we just disabled GC, wait for any concurrent GC mark to
 	// finish so we always return with no GC running.
 	if in < 0 {
-		// Disable phase transitions.
-		lock(&work.sweepWaiters.lock)
-		if gcphase == _GCmark {
-			// GC is active. Wait until we reach sweeping.
-			gp := getg()
-			gp.schedlink = work.sweepWaiters.head
-			work.sweepWaiters.head.set(gp)
-			goparkunlock(&work.sweepWaiters.lock, "wait for GC cycle", traceEvGoBlock, 1)
-		} else {
-			// GC isn't active.
-			unlock(&work.sweepWaiters.lock)
-		}
+		gcWaitOnMark(atomic.Load(&work.cycles))
 	}
 
 	return out
@@ -1091,21 +1080,10 @@ func GC() {
 	// GC may move ahead on its own. For example, when we block
 	// until mark termination N, we may wake up in cycle N+2.
 
-	gp := getg()
-
-	// Prevent the GC phase or cycle count from changing.
-	lock(&work.sweepWaiters.lock)
+	// Wait until the current sweep termination, mark, and mark
+	// termination complete.
 	n := atomic.Load(&work.cycles)
-	if gcphase == _GCmark {
-		// Wait until sweep termination, mark, and mark
-		// termination of cycle N complete.
-		gp.schedlink = work.sweepWaiters.head
-		work.sweepWaiters.head.set(gp)
-		goparkunlock(&work.sweepWaiters.lock, "wait for GC cycle", traceEvGoBlock, 1)
-	} else {
-		// We're in sweep N already.
-		unlock(&work.sweepWaiters.lock)
-	}
+	gcWaitOnMark(n)
 
 	// We're now in sweep N or later. Trigger GC cycle N+1, which
 	// will first finish sweep N if necessary and then enter sweep
@@ -1113,14 +1091,7 @@ func GC() {
 	gcStart(gcBackgroundMode, gcTrigger{kind: gcTriggerCycle, n: n + 1})
 
 	// Wait for mark termination N+1 to complete.
-	lock(&work.sweepWaiters.lock)
-	if gcphase == _GCmark && atomic.Load(&work.cycles) == n+1 {
-		gp.schedlink = work.sweepWaiters.head
-		work.sweepWaiters.head.set(gp)
-		goparkunlock(&work.sweepWaiters.lock, "wait for GC cycle", traceEvGoBlock, 1)
-	} else {
-		unlock(&work.sweepWaiters.lock)
-	}
+	gcWaitOnMark(n + 1)
 
 	// Finish sweep N+1 before returning. We do this both to
 	// complete the cycle and because runtime.GC() is often used
@@ -1157,6 +1128,32 @@ func GC() {
 	releasem(mp)
 }
 
+// gcWaitOnMark blocks until GC finishes the Nth mark phase. If GC has
+// already completed this mark phase, it returns immediately.
+func gcWaitOnMark(n uint32) {
+	for {
+		// Disable phase transitions.
+		lock(&work.sweepWaiters.lock)
+		nMarks := atomic.Load(&work.cycles)
+		if gcphase != _GCmark {
+			// We've already completed this cycle's mark.
+			nMarks++
+		}
+		if nMarks > n {
+			// We're done.
+			unlock(&work.sweepWaiters.lock)
+			return
+		}
+
+		// Wait until sweep termination, mark, and mark
+		// termination of cycle N complete.
+		gp := getg()
+		gp.schedlink = work.sweepWaiters.head
+		work.sweepWaiters.head.set(gp)
+		goparkunlock(&work.sweepWaiters.lock, waitReasonWaitForGCCycle, traceEvGoBlock, 1)
+	}
+}
+
 // gcMode indicates how concurrent a GC cycle should be.
 type gcMode int
 
@@ -1531,7 +1528,7 @@ func gcMarkTermination(nextTriggerRatio float64) {
 	_g_.m.traceback = 2
 	gp := _g_.m.curg
 	casgstatus(gp, _Grunning, _Gwaiting)
-	gp.waitreason = "garbage collection"
+	gp.waitreason = waitReasonGarbageCollection
 
 	// Run gc on the g0 stack. We do this so that the g stack
 	// we're currently running on will no longer change. Cuts
@@ -1800,7 +1797,7 @@ func gcBgMarkWorker(_p_ *p) {
 				}
 			}
 			return true
-		}, unsafe.Pointer(park), "GC worker (idle)", traceEvGoBlock, 0)
+		}, unsafe.Pointer(park), waitReasonGCWorkerIdle, traceEvGoBlock, 0)
 
 		// Loop until the P dies and disassociates this
 		// worker (the P may later be reused, in which case
diff --git a/libgo/go/runtime/mgclarge.go b/libgo/go/runtime/mgclarge.go
index fe437bf..e7fa831 100644
--- a/libgo/go/runtime/mgclarge.go
+++ b/libgo/go/runtime/mgclarge.go
@@ -9,7 +9,7 @@
 // Large spans are the subject of this file. Spans consisting of less than
 // _MaxMHeapLists are held in lists of like sized spans. Larger spans
 // are held in a treap. See https://en.wikipedia.org/wiki/Treap or
-// http://faculty.washington.edu/aragon/pubs/rst89.pdf for an overview.
+// https://faculty.washington.edu/aragon/pubs/rst89.pdf for an overview.
 // sema.go also holds an implementation of a treap.
 //
 // Each treapNode holds a single span. The treap is sorted by page size
@@ -43,7 +43,7 @@ type treapNode struct {
 	parent    *treapNode // direct parent of this node, nil if root
 	npagesKey uintptr    // number of pages in spanKey, used as primary sort key
 	spanKey   *mspan     // span of size npagesKey, used as secondary sort key
-	priority  uint32     // random number used by treap algorithm keep tree probablistically balanced
+	priority  uint32     // random number used by treap algorithm to keep tree probabilistically balanced
 }
 
 func (t *treapNode) init() {
@@ -137,7 +137,7 @@ func (root *mTreap) insert(span *mspan) {
 	// npagesKeys, it is kept balanced on average by maintaining a heap ordering
 	// on the priority: s.priority <= both s.right.priority and s.right.priority.
 	// https://en.wikipedia.org/wiki/Treap
-	// http://faculty.washington.edu/aragon/pubs/rst89.pdf
+	// https://faculty.washington.edu/aragon/pubs/rst89.pdf
 
 	t := (*treapNode)(mheap_.treapalloc.alloc())
 	t.init()
diff --git a/libgo/go/runtime/mgcmark.go b/libgo/go/runtime/mgcmark.go
index 7297fcb..88cae41 100644
--- a/libgo/go/runtime/mgcmark.go
+++ b/libgo/go/runtime/mgcmark.go
@@ -232,7 +232,7 @@ func markroot(gcw *gcWork, i uint32) {
 			selfScan := gp == userG && readgstatus(userG) == _Grunning
 			if selfScan {
 				casgstatus(userG, _Grunning, _Gwaiting)
-				userG.waitreason = "garbage collection scan"
+				userG.waitreason = waitReasonGarbageCollectionScan
 			}
 
 			// TODO: scang blocks until gp's stack has
@@ -467,7 +467,7 @@ func gcAssistAlloc1(gp *g, scanWork int64) {
 		// store that clears it but an atomic check in every malloc
 		// would be a performance hit.
 		// Instead we recheck it here on the non-preemptable system
-		// stack to determine if we should preform an assist.
+		// stack to determine if we should perform an assist.
 
 		// GC is done, so ignore any remaining debt.
 		gp.gcAssistBytes = 0
@@ -486,7 +486,7 @@ func gcAssistAlloc1(gp *g, scanWork int64) {
 
 	// gcDrainN requires the caller to be preemptible.
 	casgstatus(gp, _Grunning, _Gwaiting)
-	gp.waitreason = "GC assist marking"
+	gp.waitreason = waitReasonGCAssistMarking
 
 	// drain own cached work first in the hopes that it
 	// will be more cache friendly.
@@ -585,7 +585,7 @@ func gcParkAssist() bool {
 		return false
 	}
 	// Park.
-	goparkunlock(&work.assistQueue.lock, "GC assist wait", traceEvGoBlockGC, 2)
+	goparkunlock(&work.assistQueue.lock, waitReasonGCAssistWait, traceEvGoBlockGC, 2)
 	return true
 }
 
@@ -934,9 +934,6 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) {
 	b := b0
 	n := n0
 
-	arena_start := mheap_.arena_start
-	arena_used := mheap_.arena_used
-
 	for i := uintptr(0); i < n; {
 		// Find bits for the next word.
 		bits := uint32(*addb(ptrmask, i/(sys.PtrSize*8)))
@@ -948,9 +945,9 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) {
 			if bits&1 != 0 {
 				// Same work as in scanobject; see comments there.
 				obj := *(*uintptr)(unsafe.Pointer(b + i))
-				if obj != 0 && arena_start <= obj && obj < arena_used {
-					if obj, hbits, span, objIndex := heapBitsForObject(obj, b, i, false); obj != 0 {
-						greyobject(obj, b, i, hbits, span, gcw, objIndex, false)
+				if obj != 0 {
+					if obj, span, objIndex := findObject(obj, b, i, false); obj != 0 {
+						greyobject(obj, b, i, span, gcw, objIndex, false)
 					}
 				}
 			}
@@ -967,18 +964,6 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) {
 //
 //go:nowritebarrier
 func scanobject(b uintptr, gcw *gcWork) {
-	// Note that arena_used may change concurrently during
-	// scanobject and hence scanobject may encounter a pointer to
-	// a newly allocated heap object that is *not* in
-	// [start,used). It will not mark this object; however, we
-	// know that it was just installed by a mutator, which means
-	// that mutator will execute a write barrier and take care of
-	// marking it. This is even more pronounced on relaxed memory
-	// architectures since we access arena_used without barriers
-	// or synchronization, but the same logic applies.
-	arena_start := mheap_.arena_start
-	arena_used := mheap_.arena_used
-
 	// Find the bits for b and the size of the object at b.
 	//
 	// b is either the beginning of an object, in which case this
@@ -1052,11 +1037,19 @@ func scanobject(b uintptr, gcw *gcWork) {
 		obj := *(*uintptr)(unsafe.Pointer(b + i))
 
 		// At this point we have extracted the next potential pointer.
-		// Check if it points into heap and not back at the current object.
-		if obj != 0 && arena_start <= obj && obj < arena_used && obj-b >= n {
-			// Mark the object.
-			if obj, hbits, span, objIndex := heapBitsForObject(obj, b, i, false); obj != 0 {
-				greyobject(obj, b, i, hbits, span, gcw, objIndex, false)
+		// Quickly filter out nil and pointers back to the current object.
+		if obj != 0 && obj-b >= n {
+			// Test if obj points into the Go heap and, if so,
+			// mark the object.
+			//
+			// Note that it's possible for findObject to
+			// fail if obj points to a just-allocated heap
+			// object because of a race with growing the
+			// heap. In this case, we know the object was
+			// just allocated and hence will be marked by
+			// allocation itself.
+			if obj, span, objIndex := findObject(obj, b, i, false); obj != 0 {
+				greyobject(obj, b, i, span, gcw, objIndex, false)
 			}
 		}
 	}
@@ -1071,16 +1064,11 @@ func scanobject(b uintptr, gcw *gcWork) {
 // scanblock, but we scan the stack conservatively, so there is no
 // bitmask of pointers.
 func scanstackblock(b, n uintptr, gcw *gcWork) {
-	arena_start := mheap_.arena_start
-	arena_used := mheap_.arena_used
-
 	for i := uintptr(0); i < n; i += sys.PtrSize {
 		// Same work as in scanobject; see comments there.
 		obj := *(*uintptr)(unsafe.Pointer(b + i))
-		if obj != 0 && arena_start <= obj && obj < arena_used {
-			if obj, hbits, span, objIndex := heapBitsForObject(obj, b, i, true); obj != 0 {
-				greyobject(obj, b, i, hbits, span, gcw, objIndex, true)
-			}
+		if obj, span, objIndex := findObject(obj, b, i, true); obj != 0 {
+			greyobject(obj, b, i, span, gcw, objIndex, true)
 		}
 	}
 }
@@ -1090,11 +1078,9 @@ func scanstackblock(b, n uintptr, gcw *gcWork) {
 // Preemption must be disabled.
 //go:nowritebarrier
 func shade(b uintptr) {
-	// shade can be called to shade a pointer found on the stack,
-	// so pass forStack as true to heapBitsForObject and greyobject.
-	if obj, hbits, span, objIndex := heapBitsForObject(b, 0, 0, true); obj != 0 {
+	if obj, span, objIndex := findObject(b, 0, 0, true); obj != 0 {
 		gcw := &getg().m.p.ptr().gcw
-		greyobject(obj, 0, 0, hbits, span, gcw, objIndex, true)
+		greyobject(obj, 0, 0, span, gcw, objIndex, true)
 		if gcphase == _GCmarktermination || gcBlackenPromptly {
 			// Ps aren't allowed to cache work during mark
 			// termination.
@@ -1110,7 +1096,7 @@ func shade(b uintptr) {
 // See also wbBufFlush1, which partially duplicates this logic.
 //
 //go:nowritebarrierrec
-func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork, objIndex uintptr, forStack bool) {
+func greyobject(obj, base, off uintptr, span *mspan, gcw *gcWork, objIndex uintptr, forStack bool) {
 	// obj should be start of allocation, and so must be at least pointer-aligned.
 	if obj&(sys.PtrSize-1) != 0 {
 		throw("greyobject: obj not pointer-aligned")
@@ -1139,6 +1125,7 @@ func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork
 			getg().m.traceback = 2
 			throw("checkmark found unmarked object")
 		}
+		hbits := heapBitsForAddr(obj)
 		if hbits.isCheckmarked(span.elemsize) {
 			return
 		}
@@ -1190,15 +1177,8 @@ func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork
 // gcDumpObject dumps the contents of obj for debugging and marks the
 // field at byte offset off in obj.
 func gcDumpObject(label string, obj, off uintptr) {
-	if obj < mheap_.arena_start || obj >= mheap_.arena_used {
-		print(label, "=", hex(obj), " is not in the Go heap\n")
-		return
-	}
-	k := obj >> _PageShift
-	x := k
-	x -= mheap_.arena_start >> _PageShift
-	s := mheap_.spans[x]
-	print(label, "=", hex(obj), " k=", hex(k))
+	s := spanOf(obj)
+	print(label, "=", hex(obj))
 	if s == nil {
 		print(" s=nil\n")
 		return
@@ -1272,9 +1252,9 @@ func gcMarkTinyAllocs() {
 		if c == nil || c.tiny == 0 {
 			continue
 		}
-		_, hbits, span, objIndex := heapBitsForObject(c.tiny, 0, 0, false)
+		_, span, objIndex := findObject(c.tiny, 0, 0, false)
 		gcw := &p.gcw
-		greyobject(c.tiny, 0, 0, hbits, span, gcw, objIndex, false)
+		greyobject(c.tiny, 0, 0, span, gcw, objIndex, false)
 		if gcBlackenPromptly {
 			gcw.dispose()
 		}
@@ -1309,7 +1289,7 @@ func initCheckmarks() {
 	useCheckmark = true
 	for _, s := range mheap_.allspans {
 		if s.state == _MSpanInUse {
-			heapBitsForSpan(s.base()).initCheckmarkSpan(s.layout())
+			heapBitsForAddr(s.base()).initCheckmarkSpan(s.layout())
 		}
 	}
 }
@@ -1318,7 +1298,7 @@ func clearCheckmarks() {
 	useCheckmark = false
 	for _, s := range mheap_.allspans {
 		if s.state == _MSpanInUse {
-			heapBitsForSpan(s.base()).clearCheckmarkSpan(s.layout())
+			heapBitsForAddr(s.base()).clearCheckmarkSpan(s.layout())
 		}
 	}
 }
diff --git a/libgo/go/runtime/mgcsweep.go b/libgo/go/runtime/mgcsweep.go
index d6be349..39dd54e 100644
--- a/libgo/go/runtime/mgcsweep.go
+++ b/libgo/go/runtime/mgcsweep.go
@@ -51,7 +51,7 @@ func bgsweep(c chan int) {
 	lock(&sweep.lock)
 	sweep.parked = true
 	c <- 1
-	goparkunlock(&sweep.lock, "GC sweep wait", traceEvGoBlock, 1)
+	goparkunlock(&sweep.lock, waitReasonGCSweepWait, traceEvGoBlock, 1)
 
 	for {
 		for gosweepone() != ^uintptr(0) {
@@ -70,7 +70,7 @@ func bgsweep(c chan int) {
 			continue
 		}
 		sweep.parked = true
-		goparkunlock(&sweep.lock, "GC sweep wait", traceEvGoBlock, 1)
+		goparkunlock(&sweep.lock, waitReasonGCSweepWait, traceEvGoBlock, 1)
 	}
 }
 
diff --git a/libgo/go/runtime/mgcwork.go b/libgo/go/runtime/mgcwork.go
index c6634fc..99771e2 100644
--- a/libgo/go/runtime/mgcwork.go
+++ b/libgo/go/runtime/mgcwork.go
@@ -400,6 +400,7 @@ func getempty() *workbuf {
 		for i := uintptr(0); i+_WorkbufSize <= workbufAlloc; i += _WorkbufSize {
 			newb := (*workbuf)(unsafe.Pointer(s.base() + i))
 			newb.nobj = 0
+			lfnodeValidate(&newb.node)
 			if i == 0 {
 				b = newb
 			} else {
diff --git a/libgo/go/runtime/mheap.go b/libgo/go/runtime/mheap.go
index d971bfe..65622f4 100644
--- a/libgo/go/runtime/mheap.go
+++ b/libgo/go/runtime/mheap.go
@@ -50,23 +50,6 @@ type mheap struct {
 	// access (since that may free the backing store).
 	allspans []*mspan // all spans out there
 
-	// spans is a lookup table to map virtual address page IDs to *mspan.
-	// For allocated spans, their pages map to the span itself.
-	// For free spans, only the lowest and highest pages map to the span itself.
-	// Internal pages map to an arbitrary span.
-	// For pages that have never been allocated, spans entries are nil.
-	//
-	// Modifications are protected by mheap.lock. Reads can be
-	// performed without locking, but ONLY from indexes that are
-	// known to contain in-use or stack spans. This means there
-	// must not be a safe-point between establishing that an
-	// address is live and looking it up in the spans array.
-	//
-	// This is backed by a reserved region of the address space so
-	// it can grow without moving. The memory up to len(spans) is
-	// mapped. cap(spans) indicates the total reserved memory.
-	spans []*mspan
-
 	// sweepSpans contains two mspan stacks: one of swept in-use
 	// spans, and one of unswept in-use spans. These two trade
 	// roles on each GC cycle. Since the sweepgen increases by 2
@@ -78,7 +61,7 @@ type mheap struct {
 	// on the swept stack.
 	sweepSpans [2]gcSweepBuf
 
-	_ uint32 // align uint64 fields on 32-bit for atomics
+	//_ uint32 // align uint64 fields on 32-bit for atomics
 
 	// Proportional sweep
 	//
@@ -113,36 +96,44 @@ type mheap struct {
 	nlargefree  uint64                  // number of frees for large objects (>maxsmallsize)
 	nsmallfree  [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize)
 
-	// range of addresses we might see in the heap
-	bitmap        uintptr // Points to one byte past the end of the bitmap
-	bitmap_mapped uintptr
-
-	// The arena_* fields indicate the addresses of the Go heap.
+	// arenas is the heap arena map. It points to the metadata for
+	// the heap for every arena frame of the entire usable virtual
+	// address space.
+	//
+	// Use arenaIndex to compute indexes into this array.
 	//
-	// The maximum range of the Go heap is
-	// [arena_start, arena_start+_MaxMem+1).
+	// For regions of the address space that are not backed by the
+	// Go heap, the arena map contains nil.
 	//
-	// The range of the current Go heap is
-	// [arena_start, arena_used). Parts of this range may not be
-	// mapped, but the metadata structures are always mapped for
-	// the full range.
-	arena_start uintptr
-	arena_used  uintptr // Set with setArenaUsed.
-
-	// The heap is grown using a linear allocator that allocates
-	// from the block [arena_alloc, arena_end). arena_alloc is
-	// often, but *not always* equal to arena_used.
-	arena_alloc uintptr
-	arena_end   uintptr
-
-	// arena_reserved indicates that the memory [arena_alloc,
-	// arena_end) is reserved (e.g., mapped PROT_NONE). If this is
-	// false, we have to be careful not to clobber existing
-	// mappings here. If this is true, then we own the mapping
-	// here and *must* clobber it to use it.
-	arena_reserved bool
-
-	_ uint32 // ensure 64-bit alignment
+	// Modifications are protected by mheap_.lock. Reads can be
+	// performed without locking; however, a given entry can
+	// transition from nil to non-nil at any time when the lock
+	// isn't held. (Entries never transitions back to nil.)
+	//
+	// In general, this is a two-level mapping consisting of an L1
+	// map and possibly many L2 maps. This saves space when there
+	// are a huge number of arena frames. However, on many
+	// platforms (even 64-bit), arenaL1Bits is 0, making this
+	// effectively a single-level map. In this case, arenas[0]
+	// will never be nil.
+	arenas [1 << arenaL1Bits]*[1 << arenaL2Bits]*heapArena
+
+	// heapArenaAlloc is pre-reserved space for allocating heapArena
+	// objects. This is only used on 32-bit, where we pre-reserve
+	// this space to avoid interleaving it with the heap itself.
+	heapArenaAlloc linearAlloc
+
+	// arenaHints is a list of addresses at which to attempt to
+	// add more heap arenas. This is initially populated with a
+	// set of general hint addresses, and grown with the bounds of
+	// actual heap arena ranges.
+	arenaHints *arenaHint
+
+	// arena is a pre-reserved space for allocating heap arenas
+	// (the actual arenas). This is only used on 32-bit.
+	arena linearAlloc
+
+	//_ uint32 // ensure 64-bit alignment of central
 
 	// central free lists for small size classes.
 	// the padding makes sure that the MCentrals are
@@ -160,12 +151,51 @@ type mheap struct {
 	specialfinalizeralloc fixalloc // allocator for specialfinalizer*
 	specialprofilealloc   fixalloc // allocator for specialprofile*
 	speciallock           mutex    // lock for special record allocators.
+	arenaHintAlloc        fixalloc // allocator for arenaHints
 
 	unused *specialfinalizer // never set, just here to force the specialfinalizer type into DWARF
 }
 
 var mheap_ mheap
 
+// A heapArena stores metadata for a heap arena. heapArenas are stored
+// outside of the Go heap and accessed via the mheap_.arenas index.
+//
+// This gets allocated directly from the OS, so ideally it should be a
+// multiple of the system page size. For example, avoid adding small
+// fields.
+//
+//go:notinheap
+type heapArena struct {
+	// bitmap stores the pointer/scalar bitmap for the words in
+	// this arena. See mbitmap.go for a description. Use the
+	// heapBits type to access this.
+	bitmap [heapArenaBitmapBytes]byte
+
+	// spans maps from virtual address page ID within this arena to *mspan.
+	// For allocated spans, their pages map to the span itself.
+	// For free spans, only the lowest and highest pages map to the span itself.
+	// Internal pages map to an arbitrary span.
+	// For pages that have never been allocated, spans entries are nil.
+	//
+	// Modifications are protected by mheap.lock. Reads can be
+	// performed without locking, but ONLY from indexes that are
+	// known to contain in-use or stack spans. This means there
+	// must not be a safe-point between establishing that an
+	// address is live and looking it up in the spans array.
+	spans [pagesPerArena]*mspan
+}
+
+// arenaHint is a hint for where to grow the heap arenas. See
+// mheap_.arenaHints.
+//
+//go:notinheap
+type arenaHint struct {
+	addr uintptr
+	down bool
+	next *arenaHint
+}
+
 // An MSpan is a run of pages.
 //
 // When a MSpan is in the heap free list, state == MSpanFree
@@ -384,21 +414,55 @@ func (sc spanClass) noscan() bool {
 	return sc&1 != 0
 }
 
+// arenaIndex returns the index into mheap_.arenas of the arena
+// containing metadata for p. This index combines of an index into the
+// L1 map and an index into the L2 map and should be used as
+// mheap_.arenas[ai.l1()][ai.l2()].
+//
+// If p is outside the range of valid heap addresses, either l1() or
+// l2() will be out of bounds.
+//
+// It is nosplit because it's called by spanOf and several other
+// nosplit functions.
+//
+//go:nosplit
+func arenaIndex(p uintptr) arenaIdx {
+	return arenaIdx((p + arenaBaseOffset) / heapArenaBytes)
+}
+
+// arenaBase returns the low address of the region covered by heap
+// arena i.
+func arenaBase(i arenaIdx) uintptr {
+	return uintptr(i)*heapArenaBytes - arenaBaseOffset
+}
+
+type arenaIdx uint
+
+func (i arenaIdx) l1() uint {
+	if arenaL1Bits == 0 {
+		// Let the compiler optimize this away if there's no
+		// L1 map.
+		return 0
+	} else {
+		return uint(i) >> arenaL1Shift
+	}
+}
+
+func (i arenaIdx) l2() uint {
+	if arenaL1Bits == 0 {
+		return uint(i)
+	} else {
+		return uint(i) & (1<<arenaL2Bits - 1)
+	}
+}
+
 // inheap reports whether b is a pointer into a (potentially dead) heap object.
 // It returns false for pointers into _MSpanManual spans.
 // Non-preemptible because it is used by write barriers.
 //go:nowritebarrier
 //go:nosplit
 func inheap(b uintptr) bool {
-	if b == 0 || b < mheap_.arena_start || b >= mheap_.arena_used {
-		return false
-	}
-	// Not a beginning of a block, consult span table to find the block beginning.
-	s := mheap_.spans[(b-mheap_.arena_start)>>_PageShift]
-	if s == nil || b < s.base() || b >= s.limit || s.state != mSpanInUse {
-		return false
-	}
-	return true
+	return spanOfHeap(b) != nil
 }
 
 // inHeapOrStack is a variant of inheap that returns true for pointers
@@ -407,11 +471,7 @@ func inheap(b uintptr) bool {
 //go:nowritebarrier
 //go:nosplit
 func inHeapOrStack(b uintptr) bool {
-	if b == 0 || b < mheap_.arena_start || b >= mheap_.arena_used {
-		return false
-	}
-	// Not a beginning of a block, consult span table to find the block beginning.
-	s := mheap_.spans[(b-mheap_.arena_start)>>_PageShift]
+	s := spanOf(b)
 	if s == nil || b < s.base() {
 		return false
 	}
@@ -423,81 +483,81 @@ func inHeapOrStack(b uintptr) bool {
 	}
 }
 
-// TODO: spanOf and spanOfUnchecked are open-coded in a lot of places.
-// Use the functions instead.
-
-// spanOf returns the span of p. If p does not point into the heap or
-// no span contains p, spanOf returns nil.
+// spanOf returns the span of p. If p does not point into the heap
+// arena or no span has ever contained p, spanOf returns nil.
+//
+// If p does not point to allocated memory, this may return a non-nil
+// span that does *not* contain p. If this is a possibility, the
+// caller should either call spanOfHeap or check the span bounds
+// explicitly.
+//
+// Must be nosplit because it has callers that are nosplit.
+//
+//go:nosplit
 func spanOf(p uintptr) *mspan {
-	if p == 0 || p < mheap_.arena_start || p >= mheap_.arena_used {
+	// This function looks big, but we use a lot of constant
+	// folding around arenaL1Bits to get it under the inlining
+	// budget. Also, many of the checks here are safety checks
+	// that Go needs to do anyway, so the generated code is quite
+	// short.
+	ri := arenaIndex(p)
+	if arenaL1Bits == 0 {
+		// If there's no L1, then ri.l1() can't be out of bounds but ri.l2() can.
+		if ri.l2() >= uint(len(mheap_.arenas[0])) {
+			return nil
+		}
+	} else {
+		// If there's an L1, then ri.l1() can be out of bounds but ri.l2() can't.
+		if ri.l1() >= uint(len(mheap_.arenas)) {
+			return nil
+		}
+	}
+	l2 := mheap_.arenas[ri.l1()]
+	if arenaL1Bits != 0 && l2 == nil { // Should never happen if there's no L1.
 		return nil
 	}
-	return spanOfUnchecked(p)
+	ha := l2[ri.l2()]
+	if ha == nil {
+		return nil
+	}
+	return ha.spans[(p/pageSize)%pagesPerArena]
 }
 
 // spanOfUnchecked is equivalent to spanOf, but the caller must ensure
-// that p points into the heap (that is, mheap_.arena_start <= p <
-// mheap_.arena_used).
+// that p points into an allocated heap arena.
+//
+// Must be nosplit because it has callers that are nosplit.
+//
+//go:nosplit
 func spanOfUnchecked(p uintptr) *mspan {
-	return mheap_.spans[(p-mheap_.arena_start)>>_PageShift]
+	ai := arenaIndex(p)
+	return mheap_.arenas[ai.l1()][ai.l2()].spans[(p/pageSize)%pagesPerArena]
 }
 
-func mlookup(v uintptr, base *uintptr, size *uintptr, sp **mspan) int32 {
-	_g_ := getg()
-
-	_g_.m.mcache.local_nlookup++
-	if sys.PtrSize == 4 && _g_.m.mcache.local_nlookup >= 1<<30 {
-		// purge cache stats to prevent overflow
-		lock(&mheap_.lock)
-		purgecachedstats(_g_.m.mcache)
-		unlock(&mheap_.lock)
-	}
-
-	s := mheap_.lookupMaybe(unsafe.Pointer(v))
-	if sp != nil {
-		*sp = s
-	}
-	if s == nil {
-		if base != nil {
-			*base = 0
-		}
-		if size != nil {
-			*size = 0
-		}
-		return 0
-	}
-
-	p := s.base()
-	if s.spanclass.sizeclass() == 0 {
-		// Large object.
-		if base != nil {
-			*base = p
-		}
-		if size != nil {
-			*size = s.npages << _PageShift
-		}
-		return 1
-	}
-
-	n := s.elemsize
-	if base != nil {
-		i := (v - p) / n
-		*base = p + i*n
-	}
-	if size != nil {
-		*size = n
+// spanOfHeap is like spanOf, but returns nil if p does not point to a
+// heap object.
+//
+// Must be nosplit because it has callers that are nosplit.
+//
+//go:nosplit
+func spanOfHeap(p uintptr) *mspan {
+	s := spanOf(p)
+	// If p is not allocated, it may point to a stale span, so we
+	// have to check the span's bounds and state.
+	if s == nil || p < s.base() || p >= s.limit || s.state != mSpanInUse {
+		return nil
 	}
-
-	return 1
+	return s
 }
 
 // Initialize the heap.
-func (h *mheap) init(spansStart, spansBytes uintptr) {
+func (h *mheap) init() {
 	h.treapalloc.init(unsafe.Sizeof(treapNode{}), nil, nil, &memstats.other_sys)
 	h.spanalloc.init(unsafe.Sizeof(mspan{}), recordspan, unsafe.Pointer(h), &memstats.mspan_sys)
 	h.cachealloc.init(unsafe.Sizeof(mcache{}), nil, nil, &memstats.mcache_sys)
 	h.specialfinalizeralloc.init(unsafe.Sizeof(specialfinalizer{}), nil, nil, &memstats.other_sys)
 	h.specialprofilealloc.init(unsafe.Sizeof(specialprofile{}), nil, nil, &memstats.other_sys)
+	h.arenaHintAlloc.init(unsafe.Sizeof(arenaHint{}), nil, nil, &memstats.other_sys)
 
 	// Don't zero mspan allocations. Background sweeping can
 	// inspect a span concurrently with allocating it, so it's
@@ -518,60 +578,6 @@ func (h *mheap) init(spansStart, spansBytes uintptr) {
 	for i := range h.central {
 		h.central[i].mcentral.init(spanClass(i))
 	}
-
-	sp := (*slice)(unsafe.Pointer(&h.spans))
-	sp.array = unsafe.Pointer(spansStart)
-	sp.len = 0
-	sp.cap = int(spansBytes / sys.PtrSize)
-
-	// Map metadata structures. But don't map race detector memory
-	// since we're not actually growing the arena here (and TSAN
-	// gets mad if you map 0 bytes).
-	h.setArenaUsed(h.arena_used, false)
-}
-
-// setArenaUsed extends the usable arena to address arena_used and
-// maps auxiliary VM regions for any newly usable arena space.
-//
-// racemap indicates that this memory should be managed by the race
-// detector. racemap should be true unless this is covering a VM hole.
-func (h *mheap) setArenaUsed(arena_used uintptr, racemap bool) {
-	// Map auxiliary structures *before* h.arena_used is updated.
-	// Waiting to update arena_used until after the memory has been mapped
-	// avoids faults when other threads try access these regions immediately
-	// after observing the change to arena_used.
-
-	// Map the bitmap.
-	h.mapBits(arena_used)
-
-	// Map spans array.
-	h.mapSpans(arena_used)
-
-	// Tell the race detector about the new heap memory.
-	if racemap && raceenabled {
-		racemapshadow(unsafe.Pointer(h.arena_used), arena_used-h.arena_used)
-	}
-
-	h.arena_used = arena_used
-}
-
-// mapSpans makes sure that the spans are mapped
-// up to the new value of arena_used.
-//
-// Don't call this directly. Call mheap.setArenaUsed.
-func (h *mheap) mapSpans(arena_used uintptr) {
-	// Map spans array, PageSize at a time.
-	n := arena_used
-	n -= h.arena_start
-	n = n / _PageSize * sys.PtrSize
-	n = round(n, physPageSize)
-	need := n / unsafe.Sizeof(h.spans[0])
-	have := uintptr(len(h.spans))
-	if have >= need {
-		return
-	}
-	h.spans = h.spans[:need]
-	sysMap(unsafe.Pointer(&h.spans[have]), (need-have)*unsafe.Sizeof(h.spans[0]), h.arena_reserved, &memstats.other_sys)
 }
 
 // Sweeps spans in list until reclaims at least npages into heap.
@@ -598,7 +604,7 @@ retry:
 			goto retry
 		}
 		if s.sweepgen == sg-1 {
-			// the span is being sweept by background sweeper, skip
+			// the span is being swept by background sweeper, skip
 			continue
 		}
 		// already swept empty span,
@@ -785,7 +791,7 @@ func (h *mheap) allocManual(npage uintptr, stat *uint64) *mspan {
 		s.nelems = 0
 		s.elemsize = 0
 		s.limit = s.base() + s.npages<<_PageShift
-		// Manually manged memory doesn't count toward heap_sys.
+		// Manually managed memory doesn't count toward heap_sys.
 		memstats.heap_sys -= uint64(s.npages << _PageShift)
 	}
 
@@ -795,6 +801,28 @@ func (h *mheap) allocManual(npage uintptr, stat *uint64) *mspan {
 	return s
 }
 
+// setSpan modifies the span map so spanOf(base) is s.
+func (h *mheap) setSpan(base uintptr, s *mspan) {
+	ai := arenaIndex(base)
+	h.arenas[ai.l1()][ai.l2()].spans[(base/pageSize)%pagesPerArena] = s
+}
+
+// setSpans modifies the span map so [spanOf(base), spanOf(base+npage*pageSize))
+// is s.
+func (h *mheap) setSpans(base, npage uintptr, s *mspan) {
+	p := base / pageSize
+	ai := arenaIndex(base)
+	ha := h.arenas[ai.l1()][ai.l2()]
+	for n := uintptr(0); n < npage; n++ {
+		i := (p + n) % pagesPerArena
+		if i == 0 {
+			ai = arenaIndex(base + n*pageSize)
+			ha = h.arenas[ai.l1()][ai.l2()]
+		}
+		ha.spans[i] = s
+	}
+}
+
 // Allocates a span of the given size.  h must be locked.
 // The returned span has been removed from the
 // free list, but its state is still MSpanFree.
@@ -842,12 +870,9 @@ HaveSpan:
 		t := (*mspan)(h.spanalloc.alloc())
 		t.init(s.base()+npage<<_PageShift, s.npages-npage)
 		s.npages = npage
-		p := (t.base() - h.arena_start) >> _PageShift
-		if p > 0 {
-			h.spans[p-1] = s
-		}
-		h.spans[p] = t
-		h.spans[p+t.npages-1] = t
+		h.setSpan(t.base()-1, s)
+		h.setSpan(t.base(), t)
+		h.setSpan(t.base()+t.npages*pageSize-1, t)
 		t.needzero = s.needzero
 		s.state = _MSpanManual // prevent coalescing with s
 		t.state = _MSpanManual
@@ -856,10 +881,7 @@ HaveSpan:
 	}
 	s.unusedsince = 0
 
-	p := (s.base() - h.arena_start) >> _PageShift
-	for n := uintptr(0); n < npage; n++ {
-		h.spans[p+n] = s
-	}
+	h.setSpans(s.base(), npage, s)
 
 	*stat += uint64(npage << _PageShift)
 	memstats.heap_idle -= uint64(npage << _PageShift)
@@ -891,36 +913,18 @@ func (h *mheap) allocLarge(npage uintptr) *mspan {
 //
 // h must be locked.
 func (h *mheap) grow(npage uintptr) bool {
-	// Ask for a big chunk, to reduce the number of mappings
-	// the operating system needs to track; also amortizes
-	// the overhead of an operating system mapping.
-	// Allocate a multiple of 64kB.
-	npage = round(npage, (64<<10)/_PageSize)
 	ask := npage << _PageShift
-	if ask < _HeapAllocChunk {
-		ask = _HeapAllocChunk
-	}
-
-	v := h.sysAlloc(ask)
+	v, size := h.sysAlloc(ask)
 	if v == nil {
-		if ask > npage<<_PageShift {
-			ask = npage << _PageShift
-			v = h.sysAlloc(ask)
-		}
-		if v == nil {
-			print("runtime: out of memory: cannot allocate ", ask, "-byte block (", memstats.heap_sys, " in use)\n")
-			return false
-		}
+		print("runtime: out of memory: cannot allocate ", ask, "-byte block (", memstats.heap_sys, " in use)\n")
+		return false
 	}
 
 	// Create a fake "in use" span and free it, so that the
 	// right coalescing happens.
 	s := (*mspan)(h.spanalloc.alloc())
-	s.init(uintptr(v), ask>>_PageShift)
-	p := (s.base() - h.arena_start) >> _PageShift
-	for i := p; i < p+s.npages; i++ {
-		h.spans[i] = s
-	}
+	s.init(uintptr(v), size/pageSize)
+	h.setSpans(s.base(), s.npages, s)
 	atomic.Store(&s.sweepgen, h.sweepgen)
 	s.state = _MSpanInUse
 	h.pagesInUse += uint64(s.npages)
@@ -928,33 +932,6 @@ func (h *mheap) grow(npage uintptr) bool {
 	return true
 }
 
-// Look up the span at the given address.
-// Address is guaranteed to be in map
-// and is guaranteed to be start or end of span.
-func (h *mheap) lookup(v unsafe.Pointer) *mspan {
-	p := uintptr(v)
-	p -= h.arena_start
-	return h.spans[p>>_PageShift]
-}
-
-// Look up the span at the given address.
-// Address is *not* guaranteed to be in map
-// and may be anywhere in the span.
-// Map entries for the middle of a span are only
-// valid for allocated spans. Free spans may have
-// other garbage in their middles, so we have to
-// check for that.
-func (h *mheap) lookupMaybe(v unsafe.Pointer) *mspan {
-	if uintptr(v) < h.arena_start || uintptr(v) >= h.arena_used {
-		return nil
-	}
-	s := h.spans[(uintptr(v)-h.arena_start)>>_PageShift]
-	if s == nil || uintptr(v) < s.base() || uintptr(v) >= uintptr(unsafe.Pointer(s.limit)) || s.state != _MSpanInUse {
-		return nil
-	}
-	return s
-}
-
 // Free the span back into the heap.
 func (h *mheap) freeSpan(s *mspan, acct int32) {
 	systemstack(func() {
@@ -1039,46 +1016,38 @@ func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince i
 	s.npreleased = 0
 
 	// Coalesce with earlier, later spans.
-	p := (s.base() - h.arena_start) >> _PageShift
-	if p > 0 {
-		before := h.spans[p-1]
-		if before != nil && before.state == _MSpanFree {
-			// Now adjust s.
-			s.startAddr = before.startAddr
-			s.npages += before.npages
-			s.npreleased = before.npreleased // absorb released pages
-			s.needzero |= before.needzero
-			p -= before.npages
-			h.spans[p] = s
-			// The size is potentially changing so the treap needs to delete adjacent nodes and
-			// insert back as a combined node.
-			if h.isLargeSpan(before.npages) {
-				// We have a t, it is large so it has to be in the treap so we can remove it.
-				h.freelarge.removeSpan(before)
-			} else {
-				h.freeList(before.npages).remove(before)
-			}
-			before.state = _MSpanDead
-			h.spanalloc.free(unsafe.Pointer(before))
+	if before := spanOf(s.base() - 1); before != nil && before.state == _MSpanFree {
+		// Now adjust s.
+		s.startAddr = before.startAddr
+		s.npages += before.npages
+		s.npreleased = before.npreleased // absorb released pages
+		s.needzero |= before.needzero
+		h.setSpan(before.base(), s)
+		// The size is potentially changing so the treap needs to delete adjacent nodes and
+		// insert back as a combined node.
+		if h.isLargeSpan(before.npages) {
+			// We have a t, it is large so it has to be in the treap so we can remove it.
+			h.freelarge.removeSpan(before)
+		} else {
+			h.freeList(before.npages).remove(before)
 		}
+		before.state = _MSpanDead
+		h.spanalloc.free(unsafe.Pointer(before))
 	}
 
 	// Now check to see if next (greater addresses) span is free and can be coalesced.
-	if (p + s.npages) < uintptr(len(h.spans)) {
-		after := h.spans[p+s.npages]
-		if after != nil && after.state == _MSpanFree {
-			s.npages += after.npages
-			s.npreleased += after.npreleased
-			s.needzero |= after.needzero
-			h.spans[p+s.npages-1] = s
-			if h.isLargeSpan(after.npages) {
-				h.freelarge.removeSpan(after)
-			} else {
-				h.freeList(after.npages).remove(after)
-			}
-			after.state = _MSpanDead
-			h.spanalloc.free(unsafe.Pointer(after))
+	if after := spanOf(s.base() + s.npages*pageSize); after != nil && after.state == _MSpanFree {
+		s.npages += after.npages
+		s.npreleased += after.npreleased
+		s.needzero |= after.needzero
+		h.setSpan(s.base()+s.npages*pageSize-1, s)
+		if h.isLargeSpan(after.npages) {
+			h.freelarge.removeSpan(after)
+		} else {
+			h.freeList(after.npages).remove(after)
 		}
+		after.state = _MSpanDead
+		h.spanalloc.free(unsafe.Pointer(after))
 	}
 
 	// Insert s into appropriate list or treap.
@@ -1343,7 +1312,7 @@ type special struct {
 // (The add will fail only if a record with the same p and s->kind
 //  already exists.)
 func addspecial(p unsafe.Pointer, s *special) bool {
-	span := mheap_.lookupMaybe(p)
+	span := spanOfHeap(uintptr(p))
 	if span == nil {
 		throw("addspecial on invalid pointer")
 	}
@@ -1391,7 +1360,7 @@ func addspecial(p unsafe.Pointer, s *special) bool {
 // Returns the record if the record existed, nil otherwise.
 // The caller must FixAlloc_Free the result.
 func removespecial(p unsafe.Pointer, kind uint8) *special {
-	span := mheap_.lookupMaybe(p)
+	span := spanOfHeap(uintptr(p))
 	if span == nil {
 		throw("removespecial on invalid pointer")
 	}
@@ -1454,12 +1423,12 @@ func addfinalizer(p unsafe.Pointer, f *funcval, ft *functype, ot *ptrtype) bool
 		// situation where it's possible that markrootSpans
 		// has already run but mark termination hasn't yet.
 		if gcphase != _GCoff {
-			_, base, _ := findObject(p)
+			base, _, _ := findObject(uintptr(p), 0, 0, false)
 			mp := acquirem()
 			gcw := &mp.p.ptr().gcw
 			// Mark everything reachable from the object
 			// so it's retained for the finalizer.
-			scanobject(uintptr(base), gcw)
+			scanobject(base, gcw)
 			// Mark the finalizer itself, since the
 			// special isn't part of the GC'd heap.
 			scanblock(uintptr(unsafe.Pointer(&s.fn)), sys.PtrSize, &oneptrmask[0], gcw)
@@ -1643,7 +1612,7 @@ func newMarkBits(nelems uintptr) *gcBits {
 // to be used for this span's alloc bits.
 // newAllocBits is used to provide newly initialized spans
 // allocation bits. For spans not being initialized the
-// the mark bits are repurposed as allocation bits when
+// mark bits are repurposed as allocation bits when
 // the span is swept.
 func newAllocBits(nelems uintptr) *gcBits {
 	return newMarkBits(nelems)
diff --git a/libgo/go/runtime/mprof.go b/libgo/go/runtime/mprof.go
index f31c88c..2bbf37a 100644
--- a/libgo/go/runtime/mprof.go
+++ b/libgo/go/runtime/mprof.go
@@ -436,7 +436,7 @@ var mutexprofilerate uint64 // fraction sampled
 // reported. The previous rate is returned.
 //
 // To turn off profiling entirely, pass rate 0.
-// To just read the current rate, pass rate -1.
+// To just read the current rate, pass rate < 0.
 // (For n>1 the details of sampling may change.)
 func SetMutexProfileFraction(rate int) int {
 	if rate < 0 {
@@ -833,7 +833,7 @@ func tracealloc(p unsafe.Pointer, size uintptr, typ *_type) {
 	if typ == nil {
 		print("tracealloc(", p, ", ", hex(size), ")\n")
 	} else {
-		print("tracealloc(", p, ", ", hex(size), ", ", *typ.string, ")\n")
+		print("tracealloc(", p, ", ", hex(size), ", ", typ.string(), ")\n")
 	}
 	if gp.m.curg == nil || gp == gp.m.curg {
 		goroutineheader(gp)
diff --git a/libgo/go/runtime/msan/msan.go b/libgo/go/runtime/msan/msan.go
index b6ea3f0..c81577d 100644
--- a/libgo/go/runtime/msan/msan.go
+++ b/libgo/go/runtime/msan/msan.go
@@ -2,7 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build msan,linux,amd64
+// +build msan,linux
+// +build amd64 arm64
 
 package msan
 
diff --git a/libgo/go/runtime/mstats.go b/libgo/go/runtime/mstats.go
index 095a0de..f54ce9d 100644
--- a/libgo/go/runtime/mstats.go
+++ b/libgo/go/runtime/mstats.go
@@ -26,7 +26,7 @@ type mstats struct {
 	alloc       uint64 // bytes allocated and not yet freed
 	total_alloc uint64 // bytes allocated (even if freed)
 	sys         uint64 // bytes obtained from system (should be sum of xxx_sys below, no locking, approximate)
-	nlookup     uint64 // number of pointer lookups
+	nlookup     uint64 // number of pointer lookups (unused)
 	nmalloc     uint64 // number of mallocs
 	nfree       uint64 // number of frees
 
@@ -637,8 +637,6 @@ func purgecachedstats(c *mcache) {
 	c.local_scan = 0
 	memstats.tinyallocs += uint64(c.local_tinyallocs)
 	c.local_tinyallocs = 0
-	memstats.nlookup += uint64(c.local_nlookup)
-	c.local_nlookup = 0
 	h.largefree += uint64(c.local_largefree)
 	c.local_largefree = 0
 	h.nlargefree += uint64(c.local_nlargefree)
@@ -663,6 +661,9 @@ func purgecachedstats(c *mcache) {
 // overflow errors.
 //go:nosplit
 func mSysStatInc(sysStat *uint64, n uintptr) {
+	if sysStat == nil {
+		return
+	}
 	if sys.BigEndian {
 		atomic.Xadd64(sysStat, int64(n))
 		return
@@ -677,6 +678,9 @@ func mSysStatInc(sysStat *uint64, n uintptr) {
 // mSysStatInc apply.
 //go:nosplit
 func mSysStatDec(sysStat *uint64, n uintptr) {
+	if sysStat == nil {
+		return
+	}
 	if sys.BigEndian {
 		atomic.Xadd64(sysStat, -int64(n))
 		return
diff --git a/libgo/go/runtime/mwbbuf.go b/libgo/go/runtime/mwbbuf.go
index 7e88463..39d1370 100644
--- a/libgo/go/runtime/mwbbuf.go
+++ b/libgo/go/runtime/mwbbuf.go
@@ -5,6 +5,9 @@
 // This implements the write barrier buffer. The write barrier itself
 // is gcWriteBarrier and is implemented in assembly.
 //
+// See mbarrier.go for algorithmic details on the write barrier. This
+// file deals only with the buffer.
+//
 // The write barrier has a fast path and a slow path. The fast path
 // simply enqueues to a per-P write barrier buffer. It's written in
 // assembly and doesn't clobber any general purpose registers, so it
@@ -111,16 +114,21 @@ func (b *wbBuf) discard() {
 //     if !buf.putFast(old, new) {
 //         wbBufFlush(...)
 //     }
+//     ... actual memory write ...
 //
 // The arguments to wbBufFlush depend on whether the caller is doing
 // its own cgo pointer checks. If it is, then this can be
 // wbBufFlush(nil, 0). Otherwise, it must pass the slot address and
 // new.
 //
-// Since buf is a per-P resource, the caller must ensure there are no
-// preemption points while buf is in use.
+// The caller must ensure there are no preemption points during the
+// above sequence. There must be no preemption points while buf is in
+// use because it is a per-P resource. There must be no preemption
+// points between the buffer put and the write to memory because this
+// could allow a GC phase change, which could result in missed write
+// barriers.
 //
-// It must be nowritebarrierrec to because write barriers here would
+// putFast must be nowritebarrierrec to because write barriers here would
 // corrupt the write barrier buffer. It (and everything it calls, if
 // it called anything) has to be nosplit to avoid scheduling on to a
 // different P and a different buffer.
@@ -155,6 +163,13 @@ func wbBufFlush(dst *uintptr, src uintptr) {
 	// Note: Every possible return from this function must reset
 	// the buffer's next pointer to prevent buffer overflow.
 
+	// This *must not* modify its arguments because this
+	// function's argument slots do double duty in gcWriteBarrier
+	// as register spill slots. Currently, not modifying the
+	// arguments is sufficient to keep the spill slots unmodified
+	// (which seems unlikely to change since it costs little and
+	// helps with debugging).
+
 	if getg().m.dying > 0 {
 		// We're going down. Not much point in write barriers
 		// and this way we can allow write barriers in the
@@ -214,11 +229,18 @@ func wbBufFlush1(_p_ *p) {
 	//
 	// TODO: Should scanobject/scanblock just stuff pointers into
 	// the wbBuf? Then this would become the sole greying path.
+	//
+	// TODO: We could avoid shading any of the "new" pointers in
+	// the buffer if the stack has been shaded, or even avoid
+	// putting them in the buffer at all (which would double its
+	// capacity). This is slightly complicated with the buffer; we
+	// could track whether any un-shaded goroutine has used the
+	// buffer, or just track globally whether there are any
+	// un-shaded stacks and flush after each stack scan.
 	gcw := &_p_.gcw
 	pos := 0
-	arenaStart := mheap_.arena_start
 	for _, ptr := range ptrs {
-		if ptr < arenaStart {
+		if ptr < minLegalPointer {
 			// nil pointers are very common, especially
 			// for the "old" values. Filter out these and
 			// other "obvious" non-heap pointers ASAP.
@@ -227,11 +249,7 @@ func wbBufFlush1(_p_ *p) {
 			// path to reduce the rate of flushes?
 			continue
 		}
-		// TODO: This doesn't use hbits, so calling
-		// heapBitsForObject seems a little silly. We could
-		// easily separate this out since heapBitsForObject
-		// just calls heapBitsForAddr(obj) to get hbits.
-		obj, _, span, objIndex := heapBitsForObject(ptr, 0, 0, false)
+		obj, span, objIndex := findObject(ptr, 0, 0, false)
 		if obj == 0 {
 			continue
 		}
diff --git a/libgo/go/runtime/netpoll.go b/libgo/go/runtime/netpoll.go
index 3aeb1f6..ab3d14d 100644
--- a/libgo/go/runtime/netpoll.go
+++ b/libgo/go/runtime/netpoll.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows
+// +build aix darwin dragonfly freebsd js,wasm linux nacl netbsd openbsd solaris windows
 
 package runtime
 
@@ -366,7 +366,7 @@ func netpollblock(pd *pollDesc, mode int32, waitio bool) bool {
 	// this is necessary because runtime_pollUnblock/runtime_pollSetDeadline/deadlineimpl
 	// do the opposite: store to closing/rd/wd, membarrier, load of rg/wg
 	if waitio || netpollcheckerr(pd, mode) == 0 {
-		gopark(netpollblockcommit, unsafe.Pointer(gpp), "IO wait", traceEvGoBlockNet, 5)
+		gopark(netpollblockcommit, unsafe.Pointer(gpp), waitReasonIOWait, traceEvGoBlockNet, 5)
 	}
 	// be careful to not lose concurrent READY notification
 	old := atomic.Xchguintptr(gpp, 0)
diff --git a/libgo/go/runtime/netpoll_nacl.go b/libgo/go/runtime/netpoll_fake.go
index dc5a55e..aab18dc 100644
--- a/libgo/go/runtime/netpoll_nacl.go
+++ b/libgo/go/runtime/netpoll_fake.go
@@ -2,8 +2,10 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// Fake network poller for NaCl.
-// Should never be used, because NaCl network connections do not honor "SetNonblock".
+// Fake network poller for NaCl and wasm/js.
+// Should never be used, because NaCl and wasm/js network connections do not honor "SetNonblock".
+
+// +build nacl js,wasm
 
 package runtime
 
diff --git a/libgo/go/runtime/os_darwin.go b/libgo/go/runtime/os_darwin.go
index ec92370..9597633 100644
--- a/libgo/go/runtime/os_darwin.go
+++ b/libgo/go/runtime/os_darwin.go
@@ -7,323 +7,61 @@ package runtime
 import "unsafe"
 
 type mOS struct {
-	machport uint32 // return address for mach ipc
-	waitsema uint32 // semaphore for parking on locks
+	initialized bool
+	mutex       pthreadmutex
+	cond        pthreadcond
+	count       int
 }
 
-//go:noescape
-//extern mach_msg_trap
-func mach_msg_trap(h unsafe.Pointer, op int32, send_size, rcv_size, rcv_name, timeout, notify uint32) int32
-
-//extern mach_reply_port
-func mach_reply_port() uint32
-
-//extern mach_task_self
-func mach_task_self() uint32
-
 func unimplemented(name string) {
 	println(name, "not implemented")
 	*(*int)(unsafe.Pointer(uintptr(1231))) = 1231
 }
 
 //go:nosplit
-func semawakeup(mp *m) {
-	mach_semrelease(mp.mos.waitsema)
-}
-
-//go:nosplit
 func semacreate(mp *m) {
-	if mp.mos.waitsema != 0 {
+	if mp.initialized {
 		return
 	}
-	systemstack(func() {
-		mp.mos.waitsema = mach_semcreate()
-	})
-}
-
-// Mach IPC, to get at semaphores
-// Definitions are in /usr/include/mach on a Mac.
-
-func macherror(r int32, fn string) {
-	print("mach error ", fn, ": ", r, "\n")
-	throw("mach error")
-}
-
-const _DebugMach = false
-
-var zerondr machndr
-
-func mach_msgh_bits(a, b uint32) uint32 {
-	return a | b<<8
-}
-
-func mach_msg(h *machheader, op int32, send_size, rcv_size, rcv_name, timeout, notify uint32) int32 {
-	// TODO: Loop on interrupt.
-	return mach_msg_trap(unsafe.Pointer(h), op, send_size, rcv_size, rcv_name, timeout, notify)
-}
-
-// Mach RPC (MIG)
-const (
-	_MinMachMsg = 48
-	_MachReply  = 100
-)
-
-type codemsg struct {
-	h    machheader
-	ndr  machndr
-	code int32
-}
-
-func machcall(h *machheader, maxsize int32, rxsize int32) int32 {
-	_g_ := getg()
-	port := _g_.m.mos.machport
-	if port == 0 {
-		port = mach_reply_port()
-		_g_.m.mos.machport = port
-	}
-
-	h.msgh_bits |= mach_msgh_bits(_MACH_MSG_TYPE_COPY_SEND, _MACH_MSG_TYPE_MAKE_SEND_ONCE)
-	h.msgh_local_port = port
-	h.msgh_reserved = 0
-	id := h.msgh_id
-
-	if _DebugMach {
-		p := (*[10000]unsafe.Pointer)(unsafe.Pointer(h))
-		print("send:\t")
-		var i uint32
-		for i = 0; i < h.msgh_size/uint32(unsafe.Sizeof(p[0])); i++ {
-			print(" ", p[i])
-			if i%8 == 7 {
-				print("\n\t")
-			}
-		}
-		if i%8 != 0 {
-			print("\n")
-		}
-	}
-	ret := mach_msg(h, _MACH_SEND_MSG|_MACH_RCV_MSG, h.msgh_size, uint32(maxsize), port, 0, 0)
-	if ret != 0 {
-		if _DebugMach {
-			print("mach_msg error ", ret, "\n")
-		}
-		return ret
-	}
-	if _DebugMach {
-		p := (*[10000]unsafe.Pointer)(unsafe.Pointer(h))
-		var i uint32
-		for i = 0; i < h.msgh_size/uint32(unsafe.Sizeof(p[0])); i++ {
-			print(" ", p[i])
-			if i%8 == 7 {
-				print("\n\t")
-			}
-		}
-		if i%8 != 0 {
-			print("\n")
-		}
-	}
-	if h.msgh_id != id+_MachReply {
-		if _DebugMach {
-			print("mach_msg _MachReply id mismatch ", h.msgh_id, " != ", id+_MachReply, "\n")
-		}
-		return -303 // MIG_REPLY_MISMATCH
-	}
-	// Look for a response giving the return value.
-	// Any call can send this back with an error,
-	// and some calls only have return values so they
-	// send it back on success too. I don't quite see how
-	// you know it's one of these and not the full response
-	// format, so just look if the message is right.
-	c := (*codemsg)(unsafe.Pointer(h))
-	if uintptr(h.msgh_size) == unsafe.Sizeof(*c) && h.msgh_bits&_MACH_MSGH_BITS_COMPLEX == 0 {
-		if _DebugMach {
-			print("mig result ", c.code, "\n")
-		}
-		return c.code
-	}
-	if h.msgh_size != uint32(rxsize) {
-		if _DebugMach {
-			print("mach_msg _MachReply size mismatch ", h.msgh_size, " != ", rxsize, "\n")
-		}
-		return -307 // MIG_ARRAY_TOO_LARGE
-	}
-	return 0
-}
-
-// Semaphores!
-
-const (
-	tmach_semcreate = 3418
-	rmach_semcreate = tmach_semcreate + _MachReply
-
-	tmach_semdestroy = 3419
-	rmach_semdestroy = tmach_semdestroy + _MachReply
-
-	_KERN_ABORTED             = 14
-	_KERN_OPERATION_TIMED_OUT = 49
-)
-
-type tmach_semcreatemsg struct {
-	h      machheader
-	ndr    machndr
-	policy int32
-	value  int32
-}
-
-type rmach_semcreatemsg struct {
-	h         machheader
-	body      machbody
-	semaphore machport
-}
-
-type tmach_semdestroymsg struct {
-	h         machheader
-	body      machbody
-	semaphore machport
-}
-
-func mach_semcreate() uint32 {
-	var m [256]uint8
-	tx := (*tmach_semcreatemsg)(unsafe.Pointer(&m))
-	rx := (*rmach_semcreatemsg)(unsafe.Pointer(&m))
-
-	tx.h.msgh_bits = 0
-	tx.h.msgh_size = uint32(unsafe.Sizeof(*tx))
-	tx.h.msgh_remote_port = mach_task_self()
-	tx.h.msgh_id = tmach_semcreate
-	tx.ndr = zerondr
-
-	tx.policy = 0 // 0 = SYNC_POLICY_FIFO
-	tx.value = 0
-
-	for {
-		r := machcall(&tx.h, int32(unsafe.Sizeof(m)), int32(unsafe.Sizeof(*rx)))
-		if r == 0 {
-			break
-		}
-		if r == _KERN_ABORTED { // interrupted
-			continue
-		}
-		macherror(r, "semaphore_create")
-	}
-	if rx.body.msgh_descriptor_count != 1 {
-		unimplemented("mach_semcreate desc count")
-	}
-	return rx.semaphore.name
-}
-
-func mach_semdestroy(sem uint32) {
-	var m [256]uint8
-	tx := (*tmach_semdestroymsg)(unsafe.Pointer(&m))
-
-	tx.h.msgh_bits = _MACH_MSGH_BITS_COMPLEX
-	tx.h.msgh_size = uint32(unsafe.Sizeof(*tx))
-	tx.h.msgh_remote_port = mach_task_self()
-	tx.h.msgh_id = tmach_semdestroy
-	tx.body.msgh_descriptor_count = 1
-	tx.semaphore.name = sem
-	tx.semaphore.disposition = _MACH_MSG_TYPE_MOVE_SEND
-	tx.semaphore._type = 0
-
-	for {
-		r := machcall(&tx.h, int32(unsafe.Sizeof(m)), 0)
-		if r == 0 {
-			break
-		}
-		if r == _KERN_ABORTED { // interrupted
-			continue
-		}
-		macherror(r, "semaphore_destroy")
-	}
-}
-
-//extern semaphore_wait
-func mach_semaphore_wait(sema uint32) int32
-
-//extern semaphore_timedwait
-func mach_semaphore_timedwait(sema, sec, nsec uint32) int32
-
-//extern semaphore_signal
-func mach_semaphore_signal(sema uint32) int32
-
-//extern semaphore_signal_all
-func mach_semaphore_signal_all(sema uint32) int32
-
-func semasleep1(ns int64) int32 {
-	_g_ := getg()
-
-	if ns >= 0 {
-		var nsecs int32
-		secs := timediv(ns, 1000000000, &nsecs)
-		r := mach_semaphore_timedwait(_g_.m.mos.waitsema, uint32(secs), uint32(nsecs))
-		if r == _KERN_ABORTED || r == _KERN_OPERATION_TIMED_OUT {
-			return -1
-		}
-		if r != 0 {
-			macherror(r, "semaphore_wait")
-		}
-		return 0
+	mp.initialized = true
+	if err := pthread_mutex_init(&mp.mutex, nil); err != 0 {
+		throw("pthread_mutex_init")
 	}
-
-	for {
-		r := mach_semaphore_wait(_g_.m.mos.waitsema)
-		if r == 0 {
-			break
-		}
-		// Note: We don't know how this call (with no timeout) can get _KERN_OPERATION_TIMED_OUT,
-		// but it does reliably, though at a very low rate, on OS X 10.8, 10.9, 10.10, and 10.11.
-		// See golang.org/issue/17161.
-		if r == _KERN_ABORTED || r == _KERN_OPERATION_TIMED_OUT { // interrupted
-			continue
-		}
-		macherror(r, "semaphore_wait")
+	if err := pthread_cond_init(&mp.cond, nil); err != 0 {
+		throw("pthread_cond_init")
 	}
-	return 0
 }
 
 //go:nosplit
 func semasleep(ns int64) int32 {
-	var r int32
-	systemstack(func() {
-		r = semasleep1(ns)
-	})
-	return r
-}
-
-//go:nosplit
-func mach_semrelease(sem uint32) {
+	mp := getg().m
+	pthread_mutex_lock(&mp.mutex)
 	for {
-		r := mach_semaphore_signal(sem)
-		if r == 0 {
-			break
-		}
-		if r == _KERN_ABORTED { // interrupted
-			continue
+		if mp.count > 0 {
+			mp.count--
+			pthread_mutex_unlock(&mp.mutex)
+			return 0
+		}
+		if ns >= 0 {
+			var t timespec
+			t.set_nsec(ns)
+			err := pthread_cond_timedwait_relative_np(&mp.cond, &mp.mutex, &t)
+			if err == _ETIMEDOUT {
+				pthread_mutex_unlock(&mp.mutex)
+				return -1
+			}
+		} else {
+			pthread_cond_wait(&mp.cond, &mp.mutex)
 		}
-
-		// mach_semrelease must be completely nosplit,
-		// because it is called from Go code.
-		// If we're going to die, start that process on the system stack
-		// to avoid a Go stack split.
-		systemstack(func() { macherror(r, "semaphore_signal") })
 	}
 }
 
-type machheader struct {
-	msgh_bits        uint32
-	msgh_size        uint32
-	msgh_remote_port uint32
-	msgh_local_port  uint32
-	msgh_reserved    uint32
-	msgh_id          int32
-}
-
-type machndr struct {
-	mig_vers     uint8
-	if_vers      uint8
-	reserved1    uint8
-	mig_encoding uint8
-	int_rep      uint8
-	char_rep     uint8
-	float_rep    uint8
-	reserved2    uint8
+//go:nosplit
+func semawakeup(mp *m) {
+	pthread_mutex_lock(&mp.mutex)
+	mp.count++
+	if mp.count > 0 {
+		pthread_cond_signal(&mp.cond)
+	}
+	pthread_mutex_unlock(&mp.mutex)
 }
diff --git a/libgo/go/runtime/os_dragonfly.go b/libgo/go/runtime/os_dragonfly.go
index 6452984..abcad72 100644
--- a/libgo/go/runtime/os_dragonfly.go
+++ b/libgo/go/runtime/os_dragonfly.go
@@ -4,11 +4,12 @@
 
 package runtime
 
-import "unsafe"
+import (
+	"runtime/internal/sys"
+	"unsafe"
+)
 
-type mOS struct {
-	unused byte
-}
+type mOS struct{}
 
 //go:noescape
 //extern umtx_sleep
diff --git a/libgo/go/runtime/os_freebsd.go b/libgo/go/runtime/os_freebsd.go
index 8c3535b..34939c5 100644
--- a/libgo/go/runtime/os_freebsd.go
+++ b/libgo/go/runtime/os_freebsd.go
@@ -8,9 +8,7 @@ import (
 	"unsafe"
 )
 
-type mOS struct {
-	unused byte
-}
+type mOS struct{}
 
 //go:noescape
 //extern _umtx_op
diff --git a/libgo/go/runtime/os_js.go b/libgo/go/runtime/os_js.go
new file mode 100644
index 0000000..ad6db18
--- /dev/null
+++ b/libgo/go/runtime/os_js.go
@@ -0,0 +1,145 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build js,wasm
+
+package runtime
+
+import (
+	"unsafe"
+)
+
+func exit(code int32)
+
+func write(fd uintptr, p unsafe.Pointer, n int32) int32 {
+	if fd > 2 {
+		throw("runtime.write to fd > 2 is unsupported")
+	}
+	wasmWrite(fd, p, n)
+	return n
+}
+
+// Stubs so tests can link correctly. These should never be called.
+func open(name *byte, mode, perm int32) int32        { panic("not implemented") }
+func closefd(fd int32) int32                         { panic("not implemented") }
+func read(fd int32, p unsafe.Pointer, n int32) int32 { panic("not implemented") }
+
+//go:noescape
+func wasmWrite(fd uintptr, p unsafe.Pointer, n int32)
+
+func usleep(usec uint32)
+
+func exitThread(wait *uint32)
+
+type mOS struct{}
+
+func osyield()
+
+const _SIGSEGV = 0xb
+
+func sigpanic() {
+	g := getg()
+	if !canpanic(g) {
+		throw("unexpected signal during runtime execution")
+	}
+
+	// js only invokes the exception handler for memory faults.
+	g.sig = _SIGSEGV
+	panicmem()
+}
+
+type sigset struct{}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
+func mpreinit(mp *m) {
+	mp.gsignal = malg(32 * 1024)
+	mp.gsignal.m = mp
+}
+
+//go:nosplit
+func msigsave(mp *m) {
+}
+
+//go:nosplit
+func msigrestore(sigmask sigset) {
+}
+
+//go:nosplit
+//go:nowritebarrierrec
+func clearSignalHandlers() {
+}
+
+//go:nosplit
+func sigblock() {
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the new thread, cannot allocate memory.
+func minit() {
+}
+
+// Called from dropm to undo the effect of an minit.
+func unminit() {
+}
+
+func osinit() {
+	ncpu = 1
+	getg().m.procid = 2
+	physPageSize = 64 * 1024
+}
+
+// wasm has no signals
+const _NSIG = 0
+
+func signame(sig uint32) string {
+	return ""
+}
+
+func crash() {
+	*(*int32)(nil) = 0
+}
+
+func getRandomData(r []byte)
+
+func goenvs() {
+	goenvs_unix()
+}
+
+func initsig(preinit bool) {
+}
+
+// May run with m.p==nil, so write barriers are not allowed.
+//go:nowritebarrier
+func newosproc(mp *m) {
+	panic("newosproc: not implemented")
+}
+
+func setProcessCPUProfiler(hz int32) {}
+func setThreadCPUProfiler(hz int32)  {}
+func sigdisable(uint32)              {}
+func sigenable(uint32)               {}
+func sigignore(uint32)               {}
+
+//go:linkname os_sigpipe os.sigpipe
+func os_sigpipe() {
+	throw("too many writes on closed pipe")
+}
+
+//go:nosplit
+func cputicks() int64 {
+	// Currently cputicks() is used in blocking profiler and to seed runtime·fastrand().
+	// runtime·nanotime() is a poor approximation of CPU ticks that is enough for the profiler.
+	// TODO: need more entropy to better seed fastrand.
+	return nanotime()
+}
+
+//go:linkname syscall_now syscall.now
+func syscall_now() (sec int64, nsec int32) {
+	sec, nsec, _ = time_now()
+	return
+}
+
+// gsignalStack is unused on js.
+type gsignalStack struct{}
diff --git a/libgo/go/runtime/os_linux.go b/libgo/go/runtime/os_linux.go
index 816327e..04314bd 100644
--- a/libgo/go/runtime/os_linux.go
+++ b/libgo/go/runtime/os_linux.go
@@ -27,8 +27,9 @@ func futex(addr unsafe.Pointer, op int32, val uint32, ts, addr2 unsafe.Pointer,
 // Futexsleep is allowed to wake up spuriously.
 
 const (
-	_FUTEX_WAIT = 0
-	_FUTEX_WAKE = 1
+	_FUTEX_PRIVATE_FLAG = 128
+	_FUTEX_WAIT_PRIVATE = 0 | _FUTEX_PRIVATE_FLAG
+	_FUTEX_WAKE_PRIVATE = 1 | _FUTEX_PRIVATE_FLAG
 )
 
 // Atomically,
@@ -45,7 +46,7 @@ func futexsleep(addr *uint32, val uint32, ns int64) {
 	// here, and so can we: as it says a few lines up,
 	// spurious wakeups are allowed.
 	if ns < 0 {
-		futex(unsafe.Pointer(addr), _FUTEX_WAIT, val, nil, nil, 0)
+		futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, nil, nil, 0)
 		return
 	}
 
@@ -62,13 +63,13 @@ func futexsleep(addr *uint32, val uint32, ns int64) {
 		ts.tv_nsec = 0
 		ts.set_sec(int64(timediv(ns, 1000000000, (*int32)(unsafe.Pointer(&ts.tv_nsec)))))
 	}
-	futex(unsafe.Pointer(addr), _FUTEX_WAIT, val, unsafe.Pointer(&ts), nil, 0)
+	futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, unsafe.Pointer(&ts), nil, 0)
 }
 
 // If any procs are sleeping on addr, wake up at most cnt.
 //go:nosplit
 func futexwakeup(addr *uint32, cnt uint32) {
-	ret := futex(unsafe.Pointer(addr), _FUTEX_WAKE, cnt, nil, nil, 0)
+	ret := futex(unsafe.Pointer(addr), _FUTEX_WAKE_PRIVATE, cnt, nil, nil, 0)
 	if ret >= 0 {
 		return
 	}
@@ -93,6 +94,11 @@ const (
 
 var procAuxv = []byte("/proc/self/auxv\x00")
 
+var addrspace_vec [1]byte
+
+//extern mincore
+func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32
+
 func sysargs(argc int32, argv **byte) {
 	n := argc + 1
 
@@ -158,12 +164,17 @@ func sysauxv(auxv []uintptr) int {
 			// worth of random data.
 			startupRandomData = (*[16]byte)(unsafe.Pointer(val))[:]
 
+			setRandomNumber(uint32(startupRandomData[4]) | uint32(startupRandomData[5])<<8 |
+				uint32(startupRandomData[6])<<16 | uint32(startupRandomData[7])<<24)
+
 		case _AT_PAGESZ:
 			physPageSize = val
 		}
 
+		archauxv(tag, val)
+
 		// Commented out for gccgo for now.
-		// archauxv(tag, val)
+		// vdsoauxv(tag, val)
 	}
 	return i / 2
 }
diff --git a/libgo/go/runtime/os_linux_arm.go b/libgo/go/runtime/os_linux_arm.go
new file mode 100644
index 0000000..42c2839
--- /dev/null
+++ b/libgo/go/runtime/os_linux_arm.go
@@ -0,0 +1,60 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+const (
+	_AT_PLATFORM = 15 //  introduced in at least 2.6.11
+
+	_HWCAP_VFP   = 1 << 6  // introduced in at least 2.6.11
+	_HWCAP_VFPv3 = 1 << 13 // introduced in 2.6.30
+	_HWCAP_IDIVA = 1 << 17
+)
+
+var randomNumber uint32
+var armArch uint8 = 6 // we default to ARMv6
+var hwcap uint32      // set by archauxv
+var hardDiv bool      // set if a hardware divider is available
+
+func checkgoarm() {
+	// On Android, /proc/self/auxv might be unreadable and hwcap won't
+	// reflect the CPU capabilities. Assume that every Android arm device
+	// has the necessary floating point hardware available.
+	if GOOS == "android" {
+		return
+	}
+	if goarm > 5 && hwcap&_HWCAP_VFP == 0 {
+		print("runtime: this CPU has no floating point hardware, so it cannot run\n")
+		print("this GOARM=", goarm, " binary. Recompile using GOARM=5.\n")
+		exit(1)
+	}
+	if goarm > 6 && hwcap&_HWCAP_VFPv3 == 0 {
+		print("runtime: this CPU has no VFPv3 floating point hardware, so it cannot run\n")
+		print("this GOARM=", goarm, " binary. Recompile using GOARM=5 or GOARM=6.\n")
+		exit(1)
+	}
+}
+
+func archauxv(tag, val uintptr) {
+	switch tag {
+	case _AT_RANDOM:
+		// sysargs filled in startupRandomData, but that
+		// pointer may not be word aligned, so we must treat
+		// it as a byte array.
+		randomNumber = uint32(startupRandomData[4]) | uint32(startupRandomData[5])<<8 |
+			uint32(startupRandomData[6])<<16 | uint32(startupRandomData[7])<<24
+
+	case _AT_PLATFORM: // v5l, v6l, v7l
+		t := *(*uint8)(unsafe.Pointer(val + 1))
+		if '5' <= t && t <= '7' {
+			armArch = t - '0'
+		}
+
+	case _AT_HWCAP: // CPU capability bit flags
+		hwcap = uint32(val)
+		hardDiv = (hwcap & _HWCAP_IDIVA) != 0
+	}
+}
diff --git a/libgo/go/runtime/os_linux_arm64.go b/libgo/go/runtime/os_linux_arm64.go
new file mode 100644
index 0000000..013e7ae
--- /dev/null
+++ b/libgo/go/runtime/os_linux_arm64.go
@@ -0,0 +1,29 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build arm64
+
+package runtime
+
+import "internal/cpu"
+
+var randomNumber uint32
+
+func archauxv(tag, val uintptr) {
+	switch tag {
+	case _AT_RANDOM:
+		// sysargs filled in startupRandomData, but that
+		// pointer may not be word aligned, so we must treat
+		// it as a byte array.
+		randomNumber = uint32(startupRandomData[4]) | uint32(startupRandomData[5])<<8 |
+			uint32(startupRandomData[6])<<16 | uint32(startupRandomData[7])<<24
+
+	case _AT_HWCAP:
+		// arm64 doesn't have a 'cpuid' instruction equivalent and relies on
+		// HWCAP/HWCAP2 bits for hardware capabilities.
+		cpu.HWCap = uint(val)
+	case _AT_HWCAP2:
+		cpu.HWCap2 = uint(val)
+	}
+}
diff --git a/libgo/go/runtime/os_linux_mips64x.go b/libgo/go/runtime/os_linux_mips64x.go
new file mode 100644
index 0000000..b7f737f
--- /dev/null
+++ b/libgo/go/runtime/os_linux_mips64x.go
@@ -0,0 +1,21 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build linux
+// +build mips64 mips64le
+
+package runtime
+
+var randomNumber uint32
+
+func archauxv(tag, val uintptr) {
+	switch tag {
+	case _AT_RANDOM:
+		// sysargs filled in startupRandomData, but that
+		// pointer may not be word aligned, so we must treat
+		// it as a byte array.
+		randomNumber = uint32(startupRandomData[4]) | uint32(startupRandomData[5])<<8 |
+			uint32(startupRandomData[6])<<16 | uint32(startupRandomData[7])<<24
+	}
+}
diff --git a/libgo/go/runtime/os_linux_mipsx.go b/libgo/go/runtime/os_linux_mipsx.go
new file mode 100644
index 0000000..a2696de
--- /dev/null
+++ b/libgo/go/runtime/os_linux_mipsx.go
@@ -0,0 +1,21 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build linux
+// +build mips mipsle
+
+package runtime
+
+var randomNumber uint32
+
+func archauxv(tag, val uintptr) {
+	switch tag {
+	case _AT_RANDOM:
+		// sysargs filled in startupRandomData, but that
+		// pointer may not be word aligned, so we must treat
+		// it as a byte array.
+		randomNumber = uint32(startupRandomData[4]) | uint32(startupRandomData[5])<<8 |
+			uint32(startupRandomData[6])<<16 | uint32(startupRandomData[7])<<24
+	}
+}
diff --git a/libgo/go/runtime/os_linux_noauxv.go b/libgo/go/runtime/os_linux_noauxv.go
new file mode 100644
index 0000000..895b4cd
--- /dev/null
+++ b/libgo/go/runtime/os_linux_noauxv.go
@@ -0,0 +1,11 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build linux
+// +build !arm,!arm64,!mips,!mipsle,!mips64,!mips64le,!s390x,!ppc64,!ppc64le
+
+package runtime
+
+func archauxv(tag, val uintptr) {
+}
diff --git a/libgo/go/runtime/os_linux_ppc64x.go b/libgo/go/runtime/os_linux_ppc64x.go
index d27902d..cc79cc4 100644
--- a/libgo/go/runtime/os_linux_ppc64x.go
+++ b/libgo/go/runtime/os_linux_ppc64x.go
@@ -2,27 +2,21 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build ignore_for_gccgo
+// +build linux
 // +build ppc64 ppc64le
 
 package runtime
 
-// For go:linkname
-import _ "unsafe"
-
-// ppc64x doesn't have a 'cpuid' instruction equivalent and relies on
-// HWCAP/HWCAP2 bits for hardware capabilities.
-
-//go:linkname cpu_hwcap internal/cpu.ppc64x_hwcap
-//go:linkname cpu_hwcap2 internal/cpu.ppc64x_hwcap2
-var cpu_hwcap uint
-var cpu_hwcap2 uint
+import "internal/cpu"
 
 func archauxv(tag, val uintptr) {
 	switch tag {
 	case _AT_HWCAP:
-		cpu_hwcap = uint(val)
+		// ppc64x doesn't have a 'cpuid' instruction
+		// equivalent and relies on HWCAP/HWCAP2 bits for
+		// hardware capabilities.
+		cpu.HWCap = uint(val)
 	case _AT_HWCAP2:
-		cpu_hwcap2 = uint(val)
+		cpu.HWCap2 = uint(val)
 	}
 }
diff --git a/libgo/go/runtime/os_linux_s390x.go b/libgo/go/runtime/os_linux_s390x.go
new file mode 100644
index 0000000..55d35c7
--- /dev/null
+++ b/libgo/go/runtime/os_linux_s390x.go
@@ -0,0 +1,19 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "internal/cpu"
+
+const (
+	// bit masks taken from bits/hwcap.h
+	_HWCAP_S390_VX = 2048 // vector facility
+)
+
+func archauxv(tag, val uintptr) {
+	switch tag {
+	case _AT_HWCAP: // CPU capability bit flags
+		cpu.S390X.HasVX = val&_HWCAP_S390_VX != 0
+	}
+}
diff --git a/libgo/go/runtime/os_netbsd.go b/libgo/go/runtime/os_netbsd.go
index 81ebe76..ea47e5c 100644
--- a/libgo/go/runtime/os_netbsd.go
+++ b/libgo/go/runtime/os_netbsd.go
@@ -6,6 +6,7 @@ package runtime
 
 import (
 	"runtime/internal/atomic"
+	"runtime/internal/sys"
 	"unsafe"
 )
 
diff --git a/libgo/go/runtime/os_openbsd.go b/libgo/go/runtime/os_openbsd.go
index b64d3af..4f05665 100644
--- a/libgo/go/runtime/os_openbsd.go
+++ b/libgo/go/runtime/os_openbsd.go
@@ -6,6 +6,7 @@ package runtime
 
 import (
 	"runtime/internal/atomic"
+	"runtime/internal/sys"
 	"unsafe"
 )
 
diff --git a/libgo/go/runtime/panic.go b/libgo/go/runtime/panic.go
index 6b490b7..752bf71 100644
--- a/libgo/go/runtime/panic.go
+++ b/libgo/go/runtime/panic.go
@@ -39,7 +39,24 @@ func panicCheckMalloc(err error) {
 
 var indexError = error(errorString("index out of range"))
 
+// The panicindex, panicslice, and panicdivide functions are called by
+// code generated by the compiler for out of bounds index expressions,
+// out of bounds slice expressions, and division by zero. The
+// panicdivide (again), panicoverflow, panicfloat, and panicmem
+// functions are called by the signal handler when a signal occurs
+// indicating the respective problem.
+//
+// Since panicindex and panicslice are never called directly, and
+// since the runtime package should never have an out of bounds slice
+// or array reference, if we see those functions called from the
+// runtime package we turn the panic into a throw. That will dump the
+// entire runtime stack for easier debugging.
+
 func panicindex() {
+	name, _, _ := funcfileline(getcallerpc(), -1)
+	if hasprefix(name, "runtime.") {
+		throw(string(indexError.(errorString)))
+	}
 	panicCheckMalloc(indexError)
 	panic(indexError)
 }
@@ -47,6 +64,10 @@ func panicindex() {
 var sliceError = error(errorString("slice bounds out of range"))
 
 func panicslice() {
+	name, _, _ := funcfileline(getcallerpc(), -1)
+	if hasprefix(name, "runtime.") {
+		throw(string(sliceError.(errorString)))
+	}
 	panicCheckMalloc(sliceError)
 	panic(sliceError)
 }
@@ -144,6 +165,12 @@ func newdefer() *_defer {
 //
 //go:nosplit
 func freedefer(d *_defer) {
+	if d._panic != nil {
+		freedeferpanic()
+	}
+	if d.pfn != 0 {
+		freedeferfn()
+	}
 	pp := getg().m.p.ptr()
 	if len(pp.deferpool) == cap(pp.deferpool) {
 		// Transfer half of local cache to the central cache.
@@ -176,15 +203,28 @@ func freedefer(d *_defer) {
 	d.link = nil
 	d.frame = nil
 	d.panicStack = nil
-	d._panic = nil
-	d.pfn = 0
 	d.arg = nil
 	d.retaddr = 0
 	d.makefunccanrecover = false
+	// d._panic and d.pfn must be nil already.
+	// If not, we would have called freedeferpanic or freedeferfn above,
+	// both of which throw.
 
 	pp.deferpool = append(pp.deferpool, d)
 }
 
+// Separate function so that it can split stack.
+// Windows otherwise runs out of stack space.
+func freedeferpanic() {
+	// _panic must be cleared before d is unlinked from gp.
+	throw("freedefer with d._panic != nil")
+}
+
+func freedeferfn() {
+	// fn must be cleared before d is unlinked from gp.
+	throw("freedefer with d.fn != nil")
+}
+
 // deferreturn is called to undefer the stack.
 // The compiler inserts a call to this function as a finally clause
 // wrapped around the body of any function that calls defer.
@@ -544,15 +584,9 @@ func gopanic(e interface{}) {
 	// the world, we call preprintpanics to invoke all necessary Error
 	// and String methods to prepare the panic strings before startpanic.
 	preprintpanics(gp._panic)
-	startpanic()
-
-	// startpanic set panicking, which will block main from exiting,
-	// so now OK to decrement runningPanicDefers.
-	atomic.Xadd(&runningPanicDefers, -1)
 
-	printpanics(gp._panic)
-	dopanic(0)       // should not return
-	*(*int)(nil) = 0 // not reached
+	fatalpanic(gp._panic) // should not return
+	*(*int)(nil) = 0      // not reached
 }
 
 // currentDefer returns the top of the defer stack if it can be recovered.
@@ -810,13 +844,16 @@ func sync_throw(s string) {
 
 //go:nosplit
 func throw(s string) {
-	print("fatal error: ", s, "\n")
+	// Everything throw does should be recursively nosplit so it
+	// can be called even when it's unsafe to grow the stack.
+	systemstack(func() {
+		print("fatal error: ", s, "\n")
+	})
 	gp := getg()
 	if gp.m.throwing == 0 {
 		gp.m.throwing = 1
 	}
-	startpanic()
-	dopanic(0)
+	fatalthrow()
 	*(*int)(nil) = 0 // not reached
 }
 
@@ -833,13 +870,76 @@ var panicking uint32
 // so that two concurrent panics don't overlap their output.
 var paniclk mutex
 
+// fatalthrow implements an unrecoverable runtime throw. It freezes the
+// system, prints stack traces starting from its caller, and terminates the
+// process.
+//
+//go:nosplit
+func fatalthrow() {
+	pc := getcallerpc()
+	sp := getcallersp()
+	gp := getg()
+
+	startpanic_m()
+
+	if dopanic_m(gp, pc, sp) {
+		crash()
+	}
+
+	exit(2)
+
+	*(*int)(nil) = 0 // not reached
+}
+
+// fatalpanic implements an unrecoverable panic. It is like fatalthrow, except
+// that if msgs != nil, fatalpanic also prints panic messages and decrements
+// runningPanicDefers once main is blocked from exiting.
+//
+//go:nosplit
+func fatalpanic(msgs *_panic) {
+	pc := getcallerpc()
+	sp := getcallersp()
+	gp := getg()
+	var docrash bool
+
+	if startpanic_m() && msgs != nil {
+		// There were panic messages and startpanic_m
+		// says it's okay to try to print them.
+
+		// startpanic_m set panicking, which will
+		// block main from exiting, so now OK to
+		// decrement runningPanicDefers.
+		atomic.Xadd(&runningPanicDefers, -1)
+
+		printpanics(msgs)
+	}
+
+	docrash = dopanic_m(gp, pc, sp)
+
+	if docrash {
+		// By crashing outside the above systemstack call, debuggers
+		// will not be confused when generating a backtrace.
+		// Function crash is marked nosplit to avoid stack growth.
+		crash()
+	}
+
+	systemstack(func() {
+		exit(2)
+	})
+
+	*(*int)(nil) = 0 // not reached
+}
+
 // startpanic_m prepares for an unrecoverable panic.
 //
+// It returns true if panic messages should be printed, or false if
+// the runtime is in bad shape and should just print stacks.
+//
 // It can have write barriers because the write barrier explicitly
 // ignores writes once dying > 0.
 //
 //go:yeswritebarrierrec
-func startpanic() {
+func startpanic_m() bool {
 	_g_ := getg()
 	if mheap_.cachealloc.size == 0 { // very early
 		print("runtime: panic before malloc heap initialized\n")
@@ -850,6 +950,12 @@ func startpanic() {
 	// happen (even if we're not in one of these situations).
 	_g_.m.mallocing++
 
+	// If we're dying because of a bad lock count, set it to a
+	// good lock count so we don't recursively panic below.
+	if _g_.m.locks < 0 {
+		_g_.m.locks = 1
+	}
+
 	switch _g_.m.dying {
 	case 0:
 		_g_.m.dying = 1
@@ -860,15 +966,13 @@ func startpanic() {
 			schedtrace(true)
 		}
 		freezetheworld()
-		return
+		return true
 	case 1:
-		// Something failed while panicking, probably the print of the
-		// argument to panic().  Just print a stack trace and exit.
+		// Something failed while panicking.
+		// Just print a stack trace and exit.
 		_g_.m.dying = 2
 		print("panic during panic\n")
-		dopanic(0)
-		exit(3)
-		fallthrough
+		return false
 	case 2:
 		// This is a genuine bug in the runtime, we couldn't even
 		// print the stack trace successfully.
@@ -879,14 +983,14 @@ func startpanic() {
 	default:
 		// Can't even print! Just exit.
 		exit(5)
+		return false // Need to return something.
 	}
 }
 
 var didothers bool
 var deadlock mutex
 
-func dopanic(unused int) {
-	gp := getg()
+func dopanic_m(gp *g, pc, sp uintptr) bool {
 	if gp.sig != 0 {
 		signame := signame(gp.sig)
 		if signame != "" {
@@ -927,11 +1031,7 @@ func dopanic(unused int) {
 		lock(&deadlock)
 	}
 
-	if docrash {
-		crash()
-	}
-
-	exit(2)
+	return docrash
 }
 
 // canpanic returns false if a signal should throw instead of
@@ -951,7 +1051,7 @@ func canpanic(gp *g) bool {
 	if gp == nil || gp != _m_.curg {
 		return false
 	}
-	if _m_.locks-_m_.softfloat != 0 || _m_.mallocing != 0 || _m_.throwing != 0 || _m_.preemptoff != "" || _m_.dying != 0 {
+	if _m_.locks != 0 || _m_.mallocing != 0 || _m_.throwing != 0 || _m_.preemptoff != "" || _m_.dying != 0 {
 		return false
 	}
 	status := readgstatus(gp)
@@ -960,3 +1060,14 @@ func canpanic(gp *g) bool {
 	}
 	return true
 }
+
+// isAbortPC returns true if pc is the program counter at which
+// runtime.abort raises a signal.
+//
+// It is nosplit because it's part of the isgoexception
+// implementation.
+//
+//go:nosplit
+func isAbortPC(pc uintptr) bool {
+	return false
+}
diff --git a/libgo/go/runtime/pprof/internal/profile/encode.go b/libgo/go/runtime/pprof/internal/profile/encode.go
index 6b879a8..af31933 100644
--- a/libgo/go/runtime/pprof/internal/profile/encode.go
+++ b/libgo/go/runtime/pprof/internal/profile/encode.go
@@ -197,6 +197,10 @@ var profileDecoder = []decoder{
 	},
 	// repeated int64 period = 12
 	func(b *buffer, m message) error { return decodeInt64(b, &m.(*Profile).Period) },
+	// repeated int64 comment = 13
+	func(b *buffer, m message) error { return decodeInt64s(b, &m.(*Profile).commentX) },
+	// int64 defaultSampleType = 14
+	func(b *buffer, m message) error { return decodeInt64(b, &m.(*Profile).defaultSampleTypeX) },
 }
 
 // postDecode takes the unexported fields populated by decode (with
@@ -278,6 +282,14 @@ func (p *Profile) postDecode() error {
 		pt.Type, err = getString(p.stringTable, &pt.typeX, err)
 		pt.Unit, err = getString(p.stringTable, &pt.unitX, err)
 	}
+	for _, i := range p.commentX {
+		var c string
+		c, err = getString(p.stringTable, &i, err)
+		p.Comments = append(p.Comments, c)
+	}
+
+	p.commentX = nil
+	p.DefaultSampleType, err = getString(p.stringTable, &p.defaultSampleTypeX, err)
 	p.stringTable = nil
 	return nil
 }
diff --git a/libgo/go/runtime/pprof/internal/profile/profile.go b/libgo/go/runtime/pprof/internal/profile/profile.go
index 9b6a6f9..64c3e3f 100644
--- a/libgo/go/runtime/pprof/internal/profile/profile.go
+++ b/libgo/go/runtime/pprof/internal/profile/profile.go
@@ -22,11 +22,13 @@ import (
 
 // Profile is an in-memory representation of profile.proto.
 type Profile struct {
-	SampleType []*ValueType
-	Sample     []*Sample
-	Mapping    []*Mapping
-	Location   []*Location
-	Function   []*Function
+	SampleType        []*ValueType
+	DefaultSampleType string
+	Sample            []*Sample
+	Mapping           []*Mapping
+	Location          []*Location
+	Function          []*Function
+	Comments          []string
 
 	DropFrames string
 	KeepFrames string
@@ -36,9 +38,11 @@ type Profile struct {
 	PeriodType    *ValueType
 	Period        int64
 
-	dropFramesX int64
-	keepFramesX int64
-	stringTable []string
+	commentX           []int64
+	dropFramesX        int64
+	keepFramesX        int64
+	stringTable        []string
+	defaultSampleTypeX int64
 }
 
 // ValueType corresponds to Profile.ValueType
diff --git a/libgo/go/runtime/pprof/pprof.go b/libgo/go/runtime/pprof/pprof.go
index be4e869..5128c22 100644
--- a/libgo/go/runtime/pprof/pprof.go
+++ b/libgo/go/runtime/pprof/pprof.go
@@ -68,7 +68,7 @@
 // all pprof commands.
 //
 // For more information about pprof, see
-// https://github.com/google/pprof/blob/master/doc/pprof.md.
+// https://github.com/google/pprof/blob/master/doc/README.md.
 package pprof
 
 import (
@@ -99,7 +99,8 @@ import (
 // Each Profile has a unique name. A few profiles are predefined:
 //
 //	goroutine    - stack traces of all current goroutines
-//	heap         - a sampling of all heap allocations
+//	heap         - a sampling of memory allocations of live objects
+//	allocs       - a sampling of all past memory allocations
 //	threadcreate - stack traces that led to the creation of new OS threads
 //	block        - stack traces that led to blocking on synchronization primitives
 //	mutex        - stack traces of holders of contended mutexes
@@ -114,6 +115,16 @@ import (
 // all known allocations. This exception helps mainly in programs running
 // without garbage collection enabled, usually for debugging purposes.
 //
+// The heap profile tracks both the allocation sites for all live objects in
+// the application memory and for all objects allocated since the program start.
+// Pprof's -inuse_space, -inuse_objects, -alloc_space, and -alloc_objects
+// flags select which to display, defaulting to -inuse_space (live objects,
+// scaled by size).
+//
+// The allocs profile is the same as the heap profile but changes the default
+// pprof display to -alloc_space, the total number of bytes allocated since
+// the program began (including garbage-collected bytes).
+//
 // The CPU profile is not available as a Profile. It has a special API,
 // the StartCPUProfile and StopCPUProfile functions, because it streams
 // output to a writer during profiling.
@@ -150,6 +161,12 @@ var heapProfile = &Profile{
 	write: writeHeap,
 }
 
+var allocsProfile = &Profile{
+	name:  "allocs",
+	count: countHeap, // identical to heap profile
+	write: writeAlloc,
+}
+
 var blockProfile = &Profile{
 	name:  "block",
 	count: countBlock,
@@ -170,6 +187,7 @@ func lockProfiles() {
 			"goroutine":    goroutineProfile,
 			"threadcreate": threadcreateProfile,
 			"heap":         heapProfile,
+			"allocs":       allocsProfile,
 			"block":        blockProfile,
 			"mutex":        mutexProfile,
 		}
@@ -525,6 +543,16 @@ func countHeap() int {
 
 // writeHeap writes the current runtime heap profile to w.
 func writeHeap(w io.Writer, debug int) error {
+	return writeHeapInternal(w, debug, "")
+}
+
+// writeAlloc writes the current runtime heap profile to w
+// with the total allocation space as the default sample type.
+func writeAlloc(w io.Writer, debug int) error {
+	return writeHeapInternal(w, debug, "alloc_space")
+}
+
+func writeHeapInternal(w io.Writer, debug int, defaultSampleType string) error {
 	var memStats *runtime.MemStats
 	if debug != 0 {
 		// Read mem stats first, so that our other allocations
@@ -555,7 +583,7 @@ func writeHeap(w io.Writer, debug int) error {
 	}
 
 	if debug == 0 {
-		return writeHeapProto(w, p, int64(runtime.MemProfileRate))
+		return writeHeapProto(w, p, int64(runtime.MemProfileRate), defaultSampleType)
 	}
 
 	sort.Slice(p, func(i, j int) bool { return p[i].InUseBytes() > p[j].InUseBytes() })
diff --git a/libgo/go/runtime/pprof/pprof_test.go b/libgo/go/runtime/pprof/pprof_test.go
index 02d99f5..74a7777 100644
--- a/libgo/go/runtime/pprof/pprof_test.go
+++ b/libgo/go/runtime/pprof/pprof_test.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build !nacl
+// +build !nacl,!js
 
 package pprof
 
@@ -732,7 +732,7 @@ func TestMutexProfile(t *testing.T) {
 			return
 		}
 		// checking that the line is like "35258904 1 @ 0x48288d 0x47cd28 0x458931"
-		r2 := `^\d+ 1 @(?: 0x[[:xdigit:]]+)+`
+		r2 := `^\d+ \d+ @(?: 0x[[:xdigit:]]+)+`
 		//r2 := "^[0-9]+ 1 @ 0x[0-9a-f x]+$"
 		if ok, err := regexp.MatchString(r2, lines[3]); err != nil || !ok {
 			t.Errorf("%q didn't match %q", lines[3], r2)
@@ -862,16 +862,22 @@ func containsCounts(prof *profile.Profile, counts []int64) bool {
 	return true
 }
 
+var emptyCallStackTestRun int64
+
 // Issue 18836.
 func TestEmptyCallStack(t *testing.T) {
+	name := fmt.Sprintf("test18836_%d", emptyCallStackTestRun)
+	emptyCallStackTestRun++
+
 	t.Parallel()
 	var buf bytes.Buffer
-	p := NewProfile("test18836")
+	p := NewProfile(name)
+
 	p.Add("foo", 47674)
 	p.WriteTo(&buf, 1)
 	p.Remove("foo")
 	got := buf.String()
-	prefix := "test18836 profile: total 1\n"
+	prefix := name + " profile: total 1\n"
 	if !strings.HasPrefix(got, prefix) {
 		t.Fatalf("got:\n\t%q\nwant prefix:\n\t%q\n", got, prefix)
 	}
diff --git a/libgo/go/runtime/pprof/proto.go b/libgo/go/runtime/pprof/proto.go
index 793be44..d8456be 100644
--- a/libgo/go/runtime/pprof/proto.go
+++ b/libgo/go/runtime/pprof/proto.go
@@ -11,7 +11,6 @@ import (
 	"io"
 	"io/ioutil"
 	"runtime"
-	"sort"
 	"strconv"
 	"time"
 	"unsafe"
@@ -53,24 +52,43 @@ type profileBuilder struct {
 }
 
 type memMap struct {
-	start uintptr
-	end   uintptr
+	// initialized as reading mapping
+	start         uintptr
+	end           uintptr
+	offset        uint64
+	file, buildID string
+
+	funcs symbolizeFlag
+	fake  bool // map entry was faked; /proc/self/maps wasn't available
 }
 
+// symbolizeFlag keeps track of symbolization result.
+//   0                  : no symbol lookup was performed
+//   1<<0 (lookupTried) : symbol lookup was performed
+//   1<<1 (lookupFailed): symbol lookup was performed but failed
+type symbolizeFlag uint8
+
+const (
+	lookupTried  symbolizeFlag = 1 << iota
+	lookupFailed symbolizeFlag = 1 << iota
+)
+
 const (
 	// message Profile
-	tagProfile_SampleType    = 1  // repeated ValueType
-	tagProfile_Sample        = 2  // repeated Sample
-	tagProfile_Mapping       = 3  // repeated Mapping
-	tagProfile_Location      = 4  // repeated Location
-	tagProfile_Function      = 5  // repeated Function
-	tagProfile_StringTable   = 6  // repeated string
-	tagProfile_DropFrames    = 7  // int64 (string table index)
-	tagProfile_KeepFrames    = 8  // int64 (string table index)
-	tagProfile_TimeNanos     = 9  // int64
-	tagProfile_DurationNanos = 10 // int64
-	tagProfile_PeriodType    = 11 // ValueType (really optional string???)
-	tagProfile_Period        = 12 // int64
+	tagProfile_SampleType        = 1  // repeated ValueType
+	tagProfile_Sample            = 2  // repeated Sample
+	tagProfile_Mapping           = 3  // repeated Mapping
+	tagProfile_Location          = 4  // repeated Location
+	tagProfile_Function          = 5  // repeated Function
+	tagProfile_StringTable       = 6  // repeated string
+	tagProfile_DropFrames        = 7  // int64 (string table index)
+	tagProfile_KeepFrames        = 8  // int64 (string table index)
+	tagProfile_TimeNanos         = 9  // int64
+	tagProfile_DurationNanos     = 10 // int64
+	tagProfile_PeriodType        = 11 // ValueType (really optional string???)
+	tagProfile_Period            = 12 // int64
+	tagProfile_Comment           = 13 // repeated int64
+	tagProfile_DefaultSampleType = 14 // int64
 
 	// message ValueType
 	tagValueType_Type = 1 // int64 (string table index)
@@ -174,7 +192,7 @@ func (b *profileBuilder) pbLine(tag int, funcID uint64, line int64) {
 }
 
 // pbMapping encodes a Mapping message to b.pb.
-func (b *profileBuilder) pbMapping(tag int, id, base, limit, offset uint64, file, buildID string) {
+func (b *profileBuilder) pbMapping(tag int, id, base, limit, offset uint64, file, buildID string, hasFuncs bool) {
 	start := b.pb.startMessage()
 	b.pb.uint64Opt(tagMapping_ID, id)
 	b.pb.uint64Opt(tagMapping_Start, base)
@@ -182,8 +200,15 @@ func (b *profileBuilder) pbMapping(tag int, id, base, limit, offset uint64, file
 	b.pb.uint64Opt(tagMapping_Offset, offset)
 	b.pb.int64Opt(tagMapping_Filename, b.stringIndex(file))
 	b.pb.int64Opt(tagMapping_BuildID, b.stringIndex(buildID))
-	// TODO: Set any of HasInlineFrames, HasFunctions, HasFilenames, HasLineNumbers?
-	// It seems like they should all be true, but they've never been set.
+	// TODO: we set HasFunctions if all symbols from samples were symbolized (hasFuncs).
+	// Decide what to do about HasInlineFrames and HasLineNumbers.
+	// Also, another approach to handle the mapping entry with
+	// incomplete symbolization results is to dupliace the mapping
+	// entry (but with different Has* fields values) and use
+	// different entries for symbolized locations and unsymbolized locations.
+	if hasFuncs {
+		b.pb.bool(tagMapping_HasFunctions, true)
+	}
 	b.pb.endMessage(tag, start)
 }
 
@@ -208,6 +233,11 @@ func (b *profileBuilder) locForPC(addr uintptr) uint64 {
 		return 0
 	}
 
+	symbolizeResult := lookupTried
+	if frame.PC == 0 || frame.Function == "" || frame.File == "" || frame.Line == 0 {
+		symbolizeResult |= lookupFailed
+	}
+
 	if frame.PC == 0 {
 		// If we failed to resolve the frame, at least make up
 		// a reasonable call PC. This mostly happens in tests.
@@ -242,12 +272,14 @@ func (b *profileBuilder) locForPC(addr uintptr) uint64 {
 		}
 		frame, more = frames.Next()
 	}
-	if len(b.mem) > 0 {
-		i := sort.Search(len(b.mem), func(i int) bool {
-			return b.mem[i].end > addr
-		})
-		if i < len(b.mem) && b.mem[i].start <= addr && addr < b.mem[i].end {
+	for i := range b.mem {
+		if b.mem[i].start <= addr && addr < b.mem[i].end || b.mem[i].fake {
 			b.pb.uint64Opt(tagLocation_MappingID, uint64(i+1))
+
+			m := b.mem[i]
+			m.funcs |= symbolizeResult
+			b.mem[i] = m
+			break
 		}
 	}
 	b.pb.endMessage(tagProfile_Location, start)
@@ -348,7 +380,7 @@ func (b *profileBuilder) addCPUData(data []uint64, tags []unsafe.Pointer) error
 }
 
 // build completes and returns the constructed profile.
-func (b *profileBuilder) build() error {
+func (b *profileBuilder) build() {
 	b.end = time.Now()
 
 	b.pb.int64Opt(tagProfile_TimeNanos, b.start.UnixNano())
@@ -395,13 +427,17 @@ func (b *profileBuilder) build() error {
 		b.pbSample(values, locs, labels)
 	}
 
+	for i, m := range b.mem {
+		hasFunctions := m.funcs == lookupTried // lookupTried but not lookupFailed
+		b.pbMapping(tagProfile_Mapping, uint64(i+1), uint64(m.start), uint64(m.end), m.offset, m.file, m.buildID, hasFunctions)
+	}
+
 	// TODO: Anything for tagProfile_DropFrames?
 	// TODO: Anything for tagProfile_KeepFrames?
 
 	b.pb.strings(tagProfile_StringTable, b.strings)
 	b.zw.Write(b.pb.data)
 	b.zw.Close()
-	return nil
 }
 
 // readMapping reads /proc/self/maps and writes mappings to b.pb.
@@ -410,6 +446,12 @@ func (b *profileBuilder) build() error {
 func (b *profileBuilder) readMapping() {
 	data, _ := ioutil.ReadFile("/proc/self/maps")
 	parseProcSelfMaps(data, b.addMapping)
+	if len(b.mem) == 0 { // pprof expects a map entry, so fake one.
+		b.addMappingEntry(0, 0, 0, "", "", true)
+		// TODO(hyangah): make addMapping return *memMap or
+		// take a memMap struct, and get rid of addMappingEntry
+		// that takes a bunch of positional arguments.
+	}
 }
 
 func parseProcSelfMaps(data []byte, addMapping func(lo, hi, offset uint64, file, buildID string)) {
@@ -510,6 +552,16 @@ func parseProcSelfMaps(data []byte, addMapping func(lo, hi, offset uint64, file,
 }
 
 func (b *profileBuilder) addMapping(lo, hi, offset uint64, file, buildID string) {
-	b.mem = append(b.mem, memMap{uintptr(lo), uintptr(hi)})
-	b.pbMapping(tagProfile_Mapping, uint64(len(b.mem)), lo, hi, offset, file, buildID)
+	b.addMappingEntry(lo, hi, offset, file, buildID, false)
+}
+
+func (b *profileBuilder) addMappingEntry(lo, hi, offset uint64, file, buildID string, fake bool) {
+	b.mem = append(b.mem, memMap{
+		start:   uintptr(lo),
+		end:     uintptr(hi),
+		offset:  offset,
+		file:    file,
+		buildID: buildID,
+		fake:    fake,
+	})
 }
diff --git a/libgo/go/runtime/pprof/proto_test.go b/libgo/go/runtime/pprof/proto_test.go
index a268c3a..604628c 100644
--- a/libgo/go/runtime/pprof/proto_test.go
+++ b/libgo/go/runtime/pprof/proto_test.go
@@ -8,7 +8,10 @@ import (
 	"bytes"
 	"encoding/json"
 	"fmt"
+	"internal/testenv"
 	"io/ioutil"
+	"os"
+	"os/exec"
 	"reflect"
 	"runtime"
 	"runtime/pprof/internal/profile"
@@ -63,7 +66,7 @@ func TestConvertCPUProfileEmpty(t *testing.T) {
 		{Type: "cpu", Unit: "nanoseconds"},
 	}
 
-	checkProfile(t, p, 2000*1000, periodType, sampleType, nil)
+	checkProfile(t, p, 2000*1000, periodType, sampleType, nil, "")
 }
 
 // For gccgo make these functions different so that gccgo doesn't
@@ -96,9 +99,16 @@ func testPCs(t *testing.T) (addr1, addr2 uint64, map1, map2 *profile.Mapping) {
 		addr2 = mprof.Mapping[1].Start
 		map2 = mprof.Mapping[1]
 		map2.BuildID, _ = elfBuildID(map2.File)
+	case "js":
+		addr1 = uint64(funcPC(f1))
+		addr2 = uint64(funcPC(f2))
 	default:
 		addr1 = uint64(funcPC(f1))
 		addr2 = uint64(funcPC(f2))
+		// Fake mapping - HasFunctions will be true because two PCs from Go
+		// will be fully symbolized.
+		fake := &profile.Mapping{ID: 1, HasFunctions: true}
+		map1, map2 = fake, fake
 	}
 	return
 }
@@ -132,18 +142,23 @@ func TestConvertCPUProfile(t *testing.T) {
 			{ID: 4, Mapping: map2, Address: addr2 + 1},
 		}},
 	}
-	checkProfile(t, p, period, periodType, sampleType, samples)
+	checkProfile(t, p, period, periodType, sampleType, samples, "")
 }
 
-func checkProfile(t *testing.T, p *profile.Profile, period int64, periodType *profile.ValueType, sampleType []*profile.ValueType, samples []*profile.Sample) {
+func checkProfile(t *testing.T, p *profile.Profile, period int64, periodType *profile.ValueType, sampleType []*profile.ValueType, samples []*profile.Sample, defaultSampleType string) {
+	t.Helper()
+
 	if p.Period != period {
-		t.Fatalf("p.Period = %d, want %d", p.Period, period)
+		t.Errorf("p.Period = %d, want %d", p.Period, period)
 	}
 	if !reflect.DeepEqual(p.PeriodType, periodType) {
-		t.Fatalf("p.PeriodType = %v\nwant = %v", fmtJSON(p.PeriodType), fmtJSON(periodType))
+		t.Errorf("p.PeriodType = %v\nwant = %v", fmtJSON(p.PeriodType), fmtJSON(periodType))
 	}
 	if !reflect.DeepEqual(p.SampleType, sampleType) {
-		t.Fatalf("p.SampleType = %v\nwant = %v", fmtJSON(p.SampleType), fmtJSON(sampleType))
+		t.Errorf("p.SampleType = %v\nwant = %v", fmtJSON(p.SampleType), fmtJSON(sampleType))
+	}
+	if defaultSampleType != p.DefaultSampleType {
+		t.Errorf("p.DefaultSampleType = %v\nwant = %v", p.DefaultSampleType, defaultSampleType)
 	}
 	// Clear line info since it is not in the expected samples.
 	// If we used f1 and f2 above, then the samples will have line info.
@@ -222,3 +237,114 @@ func TestProcSelfMaps(t *testing.T) {
 		}
 	}
 }
+
+// TestMapping checkes the mapping section of CPU profiles
+// has the HasFunctions field set correctly. If all PCs included
+// in the samples are successfully symbolized, the corresponding
+// mapping entry (in this test case, only one entry) should have
+// its HasFunctions field set true.
+// The test generates a CPU profile that includes PCs from C side
+// that the runtime can't symbolize. See ./testdata/mappingtest.
+func TestMapping(t *testing.T) {
+	testenv.MustHaveGoRun(t)
+	testenv.MustHaveCGO(t)
+
+	prog := "./testdata/mappingtest/main.go"
+
+	// GoOnly includes only Go symbols that runtime will symbolize.
+	// Go+C includes C symbols that runtime will not symbolize.
+	for _, traceback := range []string{"GoOnly", "Go+C"} {
+		t.Run("traceback"+traceback, func(t *testing.T) {
+			cmd := exec.Command(testenv.GoToolPath(t), "run", prog)
+			if traceback != "GoOnly" {
+				cmd.Env = append(os.Environ(), "SETCGOTRACEBACK=1")
+			}
+			cmd.Stderr = new(bytes.Buffer)
+
+			out, err := cmd.Output()
+			if err != nil {
+				t.Fatalf("failed to run the test program %q: %v\n%v", prog, err, cmd.Stderr)
+			}
+
+			prof, err := profile.Parse(bytes.NewReader(out))
+			if err != nil {
+				t.Fatalf("failed to parse the generated profile data: %v", err)
+			}
+			t.Logf("Profile: %s", prof)
+
+			hit := make(map[*profile.Mapping]bool)
+			miss := make(map[*profile.Mapping]bool)
+			for _, loc := range prof.Location {
+				if symbolized(loc) {
+					hit[loc.Mapping] = true
+				} else {
+					miss[loc.Mapping] = true
+				}
+			}
+			if len(miss) == 0 {
+				t.Log("no location with missing symbol info was sampled")
+			}
+
+			for _, m := range prof.Mapping {
+				if miss[m] && m.HasFunctions {
+					t.Errorf("mapping %+v has HasFunctions=true, but contains locations with failed symbolization", m)
+					continue
+				}
+				if !miss[m] && hit[m] && !m.HasFunctions {
+					t.Errorf("mapping %+v has HasFunctions=false, but all referenced locations from this lapping were symbolized successfully", m)
+					continue
+				}
+			}
+		})
+	}
+}
+
+func symbolized(loc *profile.Location) bool {
+	if len(loc.Line) == 0 {
+		return false
+	}
+	l := loc.Line[0]
+	f := l.Function
+	if l.Line == 0 || f == nil || f.Name == "" || f.Filename == "" {
+		return false
+	}
+	return true
+}
+
+// TestFakeMapping tests if at least one mapping exists
+// (including a fake mapping), and their HasFunctions bits
+// are set correctly.
+func TestFakeMapping(t *testing.T) {
+	var buf bytes.Buffer
+	if err := Lookup("heap").WriteTo(&buf, 0); err != nil {
+		t.Fatalf("failed to write heap profile: %v", err)
+	}
+	prof, err := profile.Parse(&buf)
+	if err != nil {
+		t.Fatalf("failed to parse the generated profile data: %v", err)
+	}
+	t.Logf("Profile: %s", prof)
+	if len(prof.Mapping) == 0 {
+		t.Fatal("want profile with at least one mapping entry, got 0 mapping")
+	}
+
+	hit := make(map[*profile.Mapping]bool)
+	miss := make(map[*profile.Mapping]bool)
+	for _, loc := range prof.Location {
+		if symbolized(loc) {
+			hit[loc.Mapping] = true
+		} else {
+			miss[loc.Mapping] = true
+		}
+	}
+	for _, m := range prof.Mapping {
+		if miss[m] && m.HasFunctions {
+			t.Errorf("mapping %+v has HasFunctions=true, but contains locations with failed symbolization", m)
+			continue
+		}
+		if !miss[m] && hit[m] && !m.HasFunctions {
+			t.Errorf("mapping %+v has HasFunctions=false, but all referenced locations from this lapping were symbolized successfully", m)
+			continue
+		}
+	}
+}
diff --git a/libgo/go/runtime/pprof/protomem.go b/libgo/go/runtime/pprof/protomem.go
index 2756cfd..82565d5 100644
--- a/libgo/go/runtime/pprof/protomem.go
+++ b/libgo/go/runtime/pprof/protomem.go
@@ -12,7 +12,7 @@ import (
 )
 
 // writeHeapProto writes the current heap profile in protobuf format to w.
-func writeHeapProto(w io.Writer, p []runtime.MemProfileRecord, rate int64) error {
+func writeHeapProto(w io.Writer, p []runtime.MemProfileRecord, rate int64, defaultSampleType string) error {
 	b := newProfileBuilder(w)
 	b.pbValueType(tagProfile_PeriodType, "space", "bytes")
 	b.pb.int64Opt(tagProfile_Period, rate)
@@ -20,6 +20,9 @@ func writeHeapProto(w io.Writer, p []runtime.MemProfileRecord, rate int64) error
 	b.pbValueType(tagProfile_SampleType, "alloc_space", "bytes")
 	b.pbValueType(tagProfile_SampleType, "inuse_objects", "count")
 	b.pbValueType(tagProfile_SampleType, "inuse_space", "bytes")
+	if defaultSampleType != "" {
+		b.pb.int64Opt(tagProfile_DefaultSampleType, b.stringIndex(defaultSampleType))
+	}
 
 	values := []int64{0, 0, 0, 0}
 	var locs []uint64
diff --git a/libgo/go/runtime/pprof/protomem_test.go b/libgo/go/runtime/pprof/protomem_test.go
index 1e30ed9..315d5f0 100644
--- a/libgo/go/runtime/pprof/protomem_test.go
+++ b/libgo/go/runtime/pprof/protomem_test.go
@@ -14,7 +14,6 @@ import (
 func TestConvertMemProfile(t *testing.T) {
 	addr1, addr2, map1, map2 := testPCs(t)
 
-	var buf bytes.Buffer
 	// MemProfileRecord stacks are return PCs, so add one to the
 	// addresses recorded in the "profile". The proto profile
 	// locations are call PCs, so conversion will subtract one
@@ -27,15 +26,6 @@ func TestConvertMemProfile(t *testing.T) {
 		{AllocBytes: 512 * 1024, FreeBytes: 512 * 1024, AllocObjects: 1, FreeObjects: 1, Stack0: [32]uintptr{a1 + 1, a1 + 2, a2 + 3}},
 	}
 
-	if err := writeHeapProto(&buf, rec, rate); err != nil {
-		t.Fatalf("writing profile: %v", err)
-	}
-
-	p, err := profile.Parse(&buf)
-	if err != nil {
-		t.Fatalf("profile.Parse: %v", err)
-	}
-
 	periodType := &profile.ValueType{Type: "space", Unit: "bytes"}
 	sampleType := []*profile.ValueType{
 		{Type: "alloc_objects", Unit: "count"},
@@ -70,5 +60,25 @@ func TestConvertMemProfile(t *testing.T) {
 			NumLabel: map[string][]int64{"bytes": {829411}},
 		},
 	}
-	checkProfile(t, p, rate, periodType, sampleType, samples)
+	for _, tc := range []struct {
+		name              string
+		defaultSampleType string
+	}{
+		{"heap", ""},
+		{"allocs", "alloc_space"},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			var buf bytes.Buffer
+			if err := writeHeapProto(&buf, rec, rate, tc.defaultSampleType); err != nil {
+				t.Fatalf("writing profile: %v", err)
+			}
+
+			p, err := profile.Parse(&buf)
+			if err != nil {
+				t.Fatalf("profile.Parse: %v", err)
+			}
+
+			checkProfile(t, p, rate, periodType, sampleType, samples, tc.defaultSampleType)
+		})
+	}
 }
diff --git a/libgo/go/runtime/pprof/testdata/mappingtest/main.go b/libgo/go/runtime/pprof/testdata/mappingtest/main.go
new file mode 100644
index 0000000..7850faa
--- /dev/null
+++ b/libgo/go/runtime/pprof/testdata/mappingtest/main.go
@@ -0,0 +1,105 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This program outputs a CPU profile that includes
+// both Go and Cgo stacks. This is used by the mapping info
+// tests in runtime/pprof.
+//
+// If SETCGOTRACEBACK=1 is set, the CPU profile will includes
+// PCs from C side but they will not be symbolized.
+package main
+
+/*
+#include <stdint.h>
+#include <stdlib.h>
+
+int cpuHogCSalt1 = 0;
+int cpuHogCSalt2 = 0;
+
+void CPUHogCFunction() {
+	int foo = cpuHogCSalt1;
+	int i;
+	for (i = 0; i < 100000; i++) {
+		if (foo > 0) {
+			foo *= foo;
+		} else {
+			foo *= foo + 1;
+		}
+		cpuHogCSalt2 = foo;
+	}
+}
+
+struct CgoTracebackArg {
+	uintptr_t context;
+        uintptr_t sigContext;
+	uintptr_t *buf;
+        uintptr_t max;
+};
+
+void CollectCgoTraceback(void* parg) {
+        struct CgoTracebackArg* arg = (struct CgoTracebackArg*)(parg);
+	arg->buf[0] = (uintptr_t)(CPUHogCFunction);
+	arg->buf[1] = 0;
+};
+*/
+import "C"
+
+import (
+	"log"
+	"os"
+	"runtime"
+	"runtime/pprof"
+	"time"
+	"unsafe"
+)
+
+func init() {
+	if v := os.Getenv("SETCGOTRACEBACK"); v == "1" {
+		// Collect some PCs from C-side, but don't symbolize.
+		runtime.SetCgoTraceback(0, unsafe.Pointer(C.CollectCgoTraceback), nil, nil)
+	}
+}
+
+func main() {
+	go cpuHogGoFunction()
+	go cpuHogCFunction()
+	runtime.Gosched()
+
+	if err := pprof.StartCPUProfile(os.Stdout); err != nil {
+		log.Fatal("can't start CPU profile: ", err)
+	}
+	time.Sleep(1 * time.Second)
+	pprof.StopCPUProfile()
+
+	if err := os.Stdout.Close(); err != nil {
+		log.Fatal("can't write CPU profile: ", err)
+	}
+}
+
+var salt1 int
+var salt2 int
+
+func cpuHogGoFunction() {
+	// Generates CPU profile samples including a Go call path.
+	for {
+		foo := salt1
+		for i := 0; i < 1e5; i++ {
+			if foo > 0 {
+				foo *= foo
+			} else {
+				foo *= foo + 1
+			}
+			salt2 = foo
+		}
+		runtime.Gosched()
+	}
+}
+
+func cpuHogCFunction() {
+	// Generates CPU profile samples including a Cgo call path.
+	for {
+		C.CPUHogCFunction()
+		runtime.Gosched()
+	}
+}
diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go
index 4fc45dd..77d379b 100644
--- a/libgo/go/runtime/proc.go
+++ b/libgo/go/runtime/proc.go
@@ -5,6 +5,7 @@
 package runtime
 
 import (
+	"internal/cpu"
 	"runtime/internal/atomic"
 	"runtime/internal/sys"
 	"unsafe"
@@ -169,9 +170,11 @@ func main() {
 	// Allow newproc to start new Ms.
 	mainStarted = true
 
-	systemstack(func() {
-		newm(sysmon, nil)
-	})
+	if GOARCH != "wasm" { // no threads on wasm yet, so no sysmon
+		systemstack(func() {
+			newm(sysmon, nil)
+		})
+	}
 
 	// Lock the main goroutine onto this, the main OS thread,
 	// during initialization. Most programs won't care, but a few
@@ -242,7 +245,7 @@ func main() {
 		}
 	}
 	if atomic.Load(&panicking) != 0 {
-		gopark(nil, nil, "panicwait", traceEvGoStop, 1)
+		gopark(nil, nil, waitReasonPanicWait, traceEvGoStop, 1)
 	}
 
 	exit(0)
@@ -276,7 +279,7 @@ func forcegchelper() {
 			throw("forcegc: phase error")
 		}
 		atomic.Store(&forcegc.idle, 1)
-		goparkunlock(&forcegc.lock, "force gc (idle)", traceEvGoBlock, 1)
+		goparkunlock(&forcegc.lock, waitReasonForceGGIdle, traceEvGoBlock, 1)
 		// this goroutine is explicitly resumed by sysmon
 		if debug.gctrace > 0 {
 			println("GC forced")
@@ -291,6 +294,7 @@ func forcegchelper() {
 // Gosched yields the processor, allowing other goroutines to run. It does not
 // suspend the current goroutine, so execution resumes automatically.
 func Gosched() {
+	checkTimeouts()
 	mcall(gosched_m)
 }
 
@@ -305,7 +309,14 @@ func goschedguarded() {
 // If unlockf returns false, the goroutine is resumed.
 // unlockf must not access this G's stack, as it may be moved between
 // the call to gopark and the call to unlockf.
-func gopark(unlockf func(*g, unsafe.Pointer) bool, lock unsafe.Pointer, reason string, traceEv byte, traceskip int) {
+// Reason explains why the goroutine has been parked.
+// It is displayed in stack traces and heap dumps.
+// Reasons should be unique and descriptive.
+// Do not re-use reasons, add new ones.
+func gopark(unlockf func(*g, unsafe.Pointer) bool, lock unsafe.Pointer, reason waitReason, traceEv byte, traceskip int) {
+	if reason != waitReasonSleep {
+		checkTimeouts() // timeouts may expire while two goroutines keep the scheduler busy
+	}
 	mp := acquirem()
 	gp := mp.curg
 	status := readgstatus(gp)
@@ -324,7 +335,7 @@ func gopark(unlockf func(*g, unsafe.Pointer) bool, lock unsafe.Pointer, reason s
 
 // Puts the current goroutine into a waiting state and unlocks the lock.
 // The goroutine can be made runnable again by calling goready(gp).
-func goparkunlock(lock *mutex, reason string, traceEv byte, traceskip int) {
+func goparkunlock(lock *mutex, reason waitReason, traceEv byte, traceskip int) {
 	gopark(parkunlock_c, unsafe.Pointer(lock), reason, traceEv, traceskip)
 }
 
@@ -468,6 +479,37 @@ const (
 	_GoidCacheBatch = 16
 )
 
+// cpuinit extracts the environment variable GODEBUGCPU from the environment on
+// Linux and Darwin if the GOEXPERIMENT debugcpu was set and calls internal/cpu.Initialize.
+func cpuinit() {
+	const prefix = "GODEBUGCPU="
+	var env string
+
+	if haveexperiment("debugcpu") && (GOOS == "linux" || GOOS == "darwin") {
+		cpu.DebugOptions = true
+
+		// Similar to goenv_unix but extracts the environment value for
+		// GODEBUGCPU directly.
+		// TODO(moehrmann): remove when general goenvs() can be called before cpuinit()
+		n := int32(0)
+		for argv_index(argv, argc+1+n) != nil {
+			n++
+		}
+
+		for i := int32(0); i < n; i++ {
+			p := argv_index(argv, argc+1+i)
+			s := *(*string)(unsafe.Pointer(&stringStruct{unsafe.Pointer(p), findnull(p)}))
+
+			if hasprefix(s, prefix) {
+				env = gostring(p)[len(prefix):]
+				break
+			}
+		}
+	}
+
+	cpu.Initialize(env)
+}
+
 // The bootstrap sequence is:
 //
 //	call osinit
@@ -488,6 +530,7 @@ func schedinit() {
 
 	mallocinit()
 	mcommoninit(_g_.m)
+	cpuinit() // must run before alginit
 	alginit() // maps must not be used before this call
 
 	msigsave(_g_.m)
@@ -778,7 +821,7 @@ func casgstatus(gp *g, oldval, newval uint32) {
 		})
 	}
 
-	// See http://golang.org/cl/21503 for justification of the yield delay.
+	// See https://golang.org/cl/21503 for justification of the yield delay.
 	const yieldDelay = 5 * 1000
 	var nextYield int64
 
@@ -786,9 +829,7 @@ func casgstatus(gp *g, oldval, newval uint32) {
 	// GC time to finish and change the state to oldval.
 	for i := 0; !atomic.Cas(&gp.atomicstatus, oldval, newval); i++ {
 		if oldval == _Gwaiting && gp.atomicstatus == _Grunnable {
-			systemstack(func() {
-				throw("casgstatus: waiting for Gwaiting but is Grunnable")
-			})
+			throw("casgstatus: waiting for Gwaiting but is Grunnable")
 		}
 		// Help GC if needed.
 		// if gp.preemptscan && !gp.gcworkdone && (oldval == _Grunning || oldval == _Gsyscall) {
@@ -826,7 +867,7 @@ func scang(gp *g, gcw *gcWork) {
 
 	gp.gcscandone = false
 
-	// See http://golang.org/cl/21503 for justification of the yield delay.
+	// See https://golang.org/cl/21503 for justification of the yield delay.
 	const yieldDelay = 10 * 1000
 	var nextYield int64
 
@@ -1212,7 +1253,9 @@ func mstart1() {
 //go:yeswritebarrierrec
 func mstartm0() {
 	// Create an extra M for callbacks on threads not created by Go.
-	if iscgo && !cgoHasExtraM {
+	// An extra M is also needed on Windows for callbacks created by
+	// syscall.NewCallback. See issue #6751 for details.
+	if (iscgo || GOOS == "windows") && !cgoHasExtraM {
 		cgoHasExtraM = true
 		newextram()
 	}
@@ -1517,8 +1560,12 @@ func allocm(_p_ *p, fn func(), allocatestack bool) (mp *m, g0Stack unsafe.Pointe
 // put the m back on the list.
 //go:nosplit
 func needm(x byte) {
-	if iscgo && !cgoHasExtraM {
+	if (iscgo || GOOS == "windows") && !cgoHasExtraM {
 		// Can happen if C/C++ code calls Go from a global ctor.
+		// Can also happen on Windows if a global ctor uses a
+		// callback created by syscall.NewCallback. See issue #6751
+		// for details.
+		//
 		// Can not throw, because scheduler is not initialized yet.
 		write(2, unsafe.Pointer(&earlycgocallback[0]), int32(len(earlycgocallback)))
 		exit(1)
@@ -1814,13 +1861,16 @@ func newm1(mp *m) {
 //
 // The calling thread must itself be in a known-good state.
 func startTemplateThread() {
+	if GOARCH == "wasm" { // no threads on wasm yet
+		return
+	}
 	if !atomic.Cas(&newmHandoff.haveTemplateThread, 0, 1) {
 		return
 	}
 	newm(templateThread, nil)
 }
 
-// tmeplateThread is a thread in a known-good state that exists solely
+// templateThread is a thread in a known-good state that exists solely
 // to start new threads in known-good states when the calling thread
 // may not be a a good state.
 //
@@ -2232,6 +2282,14 @@ stop:
 		return gp, false
 	}
 
+	// wasm only:
+	// Check if a goroutine is waiting for a callback from the WebAssembly host.
+	// If yes, pause the execution until a callback was triggered.
+	if pauseSchedulerUntilCallback() {
+		// A callback was triggered and caused at least one goroutine to wake up.
+		goto top
+	}
+
 	// Before we drop our P, make a snapshot of the allp slice,
 	// which can change underfoot once we no longer block
 	// safe-points. We don't need to snapshot the contents because
@@ -2616,7 +2674,7 @@ func goexit0(gp *g) {
 	gp._defer = nil // should be true already but just in case.
 	gp._panic = nil // non-nil for Goexit during panic. points at stack-allocated data.
 	gp.writebuf = nil
-	gp.waitreason = ""
+	gp.waitreason = 0
 	gp.param = nil
 	gp.labels = nil
 	gp.timer = nil
@@ -2635,6 +2693,11 @@ func goexit0(gp *g) {
 	gp.gcscanvalid = true
 	dropg()
 
+	if GOARCH == "wasm" { // no threads yet on wasm
+		gfput(_g_.m.p.ptr(), gp)
+		schedule() // never returns
+	}
+
 	if _g_.m.lockedInt != 0 {
 		print("invalid m->lockedInt = ", _g_.m.lockedInt, "\n")
 		throw("internal lockOSThread error")
@@ -2743,8 +2806,6 @@ func entersyscall_gcwait() {
 	unlock(&sched.lock)
 }
 
-// The same as reentersyscall(), but with a hint that the syscall is blocking.
-//go:nosplit
 func reentersyscallblock(pc, sp uintptr) {
 	_g_ := getg()
 
@@ -2789,9 +2850,7 @@ func exitsyscall() {
 	oldp := _g_.m.p.ptr()
 	if exitsyscallfast() {
 		if _g_.m.mcache == nil {
-			systemstack(func() {
-				throw("lost mcache")
-			})
+			throw("lost mcache")
 		}
 		if trace.enabled {
 			if oldp != _g_.m.p.ptr() || _g_.m.syscalltick != _g_.m.p.ptr().syscalltick {
@@ -2836,9 +2895,7 @@ func exitsyscall() {
 	mcall(exitsyscall0)
 
 	if _g_.m.mcache == nil {
-		systemstack(func() {
-			throw("lost mcache")
-		})
+		throw("lost mcache")
 	}
 
 	// Scheduler returned, so we're allowed to run now.
@@ -3188,6 +3245,42 @@ func setSystemGoroutine() {
 	atomic.Xadd(&expectedSystemGoroutines, -1)
 }
 
+// saveAncestors copies previous ancestors of the given caller g and
+// includes infor for the current caller into a new set of tracebacks for
+// a g being created.
+func saveAncestors(callergp *g) *[]ancestorInfo {
+	// Copy all prior info, except for the root goroutine (goid 0).
+	if debug.tracebackancestors <= 0 || callergp.goid == 0 {
+		return nil
+	}
+	var callerAncestors []ancestorInfo
+	if callergp.ancestors != nil {
+		callerAncestors = *callergp.ancestors
+	}
+	n := int32(len(callerAncestors)) + 1
+	if n > debug.tracebackancestors {
+		n = debug.tracebackancestors
+	}
+	ancestors := make([]ancestorInfo, n)
+	copy(ancestors[1:], callerAncestors)
+
+	var pcs [_TracebackMaxFrames]uintptr
+	// FIXME: This should get a traceback of callergp.
+	// npcs := gcallers(callergp, 0, pcs[:])
+	npcs := 0
+	ipcs := make([]uintptr, npcs)
+	copy(ipcs, pcs[:])
+	ancestors[0] = ancestorInfo{
+		pcs:  ipcs,
+		goid: callergp.goid,
+		gopc: callergp.gopc,
+	}
+
+	ancestorsp := new([]ancestorInfo)
+	*ancestorsp = ancestors
+	return ancestorsp
+}
+
 // Put on gfree list.
 // If local list is too long, transfer a batch to the global list.
 func gfput(_p_ *p, gp *g) {
@@ -3265,6 +3358,9 @@ func Breakpoint() {
 // or else the m might be different in this function than in the caller.
 //go:nosplit
 func dolockOSThread() {
+	if GOARCH == "wasm" {
+		return // no threads on wasm yet
+	}
 	_g_ := getg()
 	_g_.m.lockedg.set(_g_)
 	_g_.lockedm.set(_g_.m)
@@ -3280,6 +3376,10 @@ func dolockOSThread() {
 // If the calling goroutine exits without unlocking the thread,
 // the thread will be terminated.
 //
+// All init functions are run on the startup thread. Calling LockOSThread
+// from an init function will cause the main function to be invoked on
+// that thread.
+//
 // A goroutine should call LockOSThread before calling OS services or
 // non-Go library functions that depend on per-thread state.
 func LockOSThread() {
@@ -3309,6 +3409,9 @@ func lockOSThread() {
 // or else the m might be in different in this function than in the caller.
 //go:nosplit
 func dounlockOSThread() {
+	if GOARCH == "wasm" {
+		return // no threads on wasm yet
+	}
 	_g_ := getg()
 	if _g_.m.lockedInt != 0 || _g_.m.lockedExt != 0 {
 		return
@@ -3382,6 +3485,7 @@ func _ExternalCode()              { _ExternalCode() }
 func _LostExternalCode()          { _LostExternalCode() }
 func _GC()                        { _GC() }
 func _LostSIGPROFDuringAtomic64() { _LostSIGPROFDuringAtomic64() }
+func _VDSO()                      { _VDSO() }
 
 // Counts SIGPROFs received while in atomic64 critical section, on mips{,le}
 var lostAtomic64Count uint64
@@ -3470,7 +3574,7 @@ func sigprof(pc uintptr, gp *g, mp *m) {
 	}
 
 	if prof.hz != 0 {
-		if (GOARCH == "mips" || GOARCH == "mipsle") && lostAtomic64Count > 0 {
+		if (GOARCH == "mips" || GOARCH == "mipsle" || GOARCH == "arm") && lostAtomic64Count > 0 {
 			cpuprof.addLostAtomic64(lostAtomic64Count)
 			lostAtomic64Count = 0
 		}
@@ -3818,8 +3922,17 @@ func checkdead() {
 		return
 	}
 
+	// If we are not running under cgo, but we have an extra M then account
+	// for it. (It is possible to have an extra M on Windows without cgo to
+	// accommodate callbacks created by syscall.NewCallback. See issue #6751
+	// for details.)
+	var run0 int32
+	if !iscgo && cgoHasExtraM {
+		run0 = 1
+	}
+
 	run := mcount() - sched.nmidle - sched.nmidlelocked - sched.nmsys
-	if run > 0 {
+	if run > run0 {
 		return
 	}
 	if run < 0 {
@@ -4215,7 +4328,7 @@ func schedtrace(detailed bool) {
 		if lockedm != nil {
 			id2 = lockedm.id
 		}
-		print("  G", gp.goid, ": status=", readgstatus(gp), "(", gp.waitreason, ") m=", id1, " lockedm=", id2, "\n")
+		print("  G", gp.goid, ": status=", readgstatus(gp), "(", gp.waitreason.String(), ") m=", id1, " lockedm=", id2, "\n")
 	}
 	unlock(&allglock)
 	unlock(&sched.lock)
@@ -4375,7 +4488,7 @@ func runqempty(_p_ *p) bool {
 const randomizeScheduler = raceenabled
 
 // runqput tries to put g on the local runnable queue.
-// If next if false, runqput adds g to the tail of the runnable queue.
+// If next is false, runqput adds g to the tail of the runnable queue.
 // If next is true, runqput puts g in the _p_.runnext slot.
 // If the run queue is full, runnext puts g on the global queue.
 // Executed only by the owner P.
@@ -4571,6 +4684,11 @@ func setMaxThreads(in int) (out int) {
 	return
 }
 
+func haveexperiment(name string) bool {
+	// The gofrontend does not support experiments.
+	return false
+}
+
 //go:nosplit
 func procPin() int {
 	_g_ := getg()
@@ -4618,7 +4736,7 @@ func sync_runtime_canSpin(i int) bool {
 	// Spin only few times and only if running on a multicore machine and
 	// GOMAXPROCS>1 and there is at least one other running P and local runq is empty.
 	// As opposed to runtime mutex we don't do passive spinning here,
-	// because there can be work on global runq on on other Ps.
+	// because there can be work on global runq or on other Ps.
 	if i >= active_spin || ncpu <= 1 || gomaxprocs <= int32(sched.npidle+sched.nmspinning)+1 {
 		return false
 	}
diff --git a/libgo/go/runtime/proc_test.go b/libgo/go/runtime/proc_test.go
index 672e1fa..82a2fe4 100644
--- a/libgo/go/runtime/proc_test.go
+++ b/libgo/go/runtime/proc_test.go
@@ -28,6 +28,9 @@ func perpetuumMobile() {
 }
 
 func TestStopTheWorldDeadlock(t *testing.T) {
+	if runtime.GOARCH == "wasm" {
+		t.Skip("no preemption on wasm yet")
+	}
 	if testing.Short() {
 		t.Skip("skipping during short test")
 	}
@@ -230,6 +233,10 @@ func TestBlockLocked(t *testing.T) {
 }
 
 func TestTimerFairness(t *testing.T) {
+	if runtime.GOARCH == "wasm" {
+		t.Skip("no preemption on wasm yet")
+	}
+
 	done := make(chan bool)
 	c := make(chan bool)
 	for i := 0; i < 2; i++ {
@@ -256,6 +263,10 @@ func TestTimerFairness(t *testing.T) {
 }
 
 func TestTimerFairness2(t *testing.T) {
+	if runtime.GOARCH == "wasm" {
+		t.Skip("no preemption on wasm yet")
+	}
+
 	done := make(chan bool)
 	c := make(chan bool)
 	for i := 0; i < 2; i++ {
@@ -290,7 +301,13 @@ var preempt = func() int {
 }
 
 func TestPreemption(t *testing.T) {
-	t.Skip("gccgo does not implement preemption")
+	if runtime.Compiler == "gccgo" {
+		t.Skip("gccgo does not implement preemption")
+	}
+	if runtime.GOARCH == "wasm" {
+		t.Skip("no preemption on wasm yet")
+	}
+
 	// Test that goroutines are preempted at function calls.
 	N := 5
 	if testing.Short() {
@@ -314,7 +331,13 @@ func TestPreemption(t *testing.T) {
 }
 
 func TestPreemptionGC(t *testing.T) {
-	t.Skip("gccgo does not implement preemption")
+	if runtime.Compiler == "gccgo" {
+		t.Skip("gccgo does not implement preemption")
+	}
+	if runtime.GOARCH == "wasm" {
+		t.Skip("no preemption on wasm yet")
+	}
+
 	// Test that pending GC preempts running goroutines.
 	P := 5
 	N := 10
@@ -387,6 +410,9 @@ func TestNumGoroutine(t *testing.T) {
 }
 
 func TestPingPongHog(t *testing.T) {
+	if runtime.GOARCH == "wasm" {
+		t.Skip("no preemption on wasm yet")
+	}
 	if testing.Short() {
 		t.Skip("skipping in -short mode")
 	}
@@ -837,6 +863,10 @@ func TestStealOrder(t *testing.T) {
 }
 
 func TestLockOSThreadNesting(t *testing.T) {
+	if runtime.GOARCH == "wasm" {
+		t.Skip("no threads on wasm yet")
+	}
+
 	go func() {
 		e, i := runtime.LockOSCounts()
 		if e != 0 || i != 0 {
diff --git a/libgo/go/runtime/rand_test.go b/libgo/go/runtime/rand_test.go
index f8831b0..1b84c79 100644
--- a/libgo/go/runtime/rand_test.go
+++ b/libgo/go/runtime/rand_test.go
@@ -25,7 +25,7 @@ func BenchmarkFastrandHashiter(b *testing.B) {
 	}
 	b.RunParallel(func(pb *testing.PB) {
 		for pb.Next() {
-			for _ = range m {
+			for range m {
 				break
 			}
 		}
diff --git a/libgo/go/runtime/runtime-lldb_test.go b/libgo/go/runtime/runtime-lldb_test.go
index 9a28705..fe3a0eb 100644
--- a/libgo/go/runtime/runtime-lldb_test.go
+++ b/libgo/go/runtime/runtime-lldb_test.go
@@ -154,7 +154,9 @@ func TestLldbPython(t *testing.T) {
 		t.Fatalf("failed to create file: %v", err)
 	}
 
-	cmd := exec.Command(testenv.GoToolPath(t), "build", "-gcflags=all=-N -l", "-o", "a.exe")
+	// As of 2018-07-17, lldb doesn't support compressed DWARF, so
+	// disable it for this test.
+	cmd := exec.Command(testenv.GoToolPath(t), "build", "-gcflags=all=-N -l", "-ldflags=-compressdwarf=false", "-o", "a.exe")
 	cmd.Dir = dir
 	out, err := cmd.CombinedOutput()
 	if err != nil {
diff --git a/libgo/go/runtime/runtime1.go b/libgo/go/runtime/runtime1.go
index b617f85..8b1b0a0 100644
--- a/libgo/go/runtime/runtime1.go
+++ b/libgo/go/runtime/runtime1.go
@@ -326,20 +326,21 @@ type dbgVar struct {
 // existing int var for that value, which may
 // already have an initial value.
 var debug struct {
-	allocfreetrace   int32
-	cgocheck         int32
-	efence           int32
-	gccheckmark      int32
-	gcpacertrace     int32
-	gcshrinkstackoff int32
-	gcrescanstacks   int32
-	gcstoptheworld   int32
-	gctrace          int32
-	invalidptr       int32
-	sbrk             int32
-	scavenge         int32
-	scheddetail      int32
-	schedtrace       int32
+	allocfreetrace     int32
+	cgocheck           int32
+	efence             int32
+	gccheckmark        int32
+	gcpacertrace       int32
+	gcshrinkstackoff   int32
+	gcrescanstacks     int32
+	gcstoptheworld     int32
+	gctrace            int32
+	invalidptr         int32
+	sbrk               int32
+	scavenge           int32
+	scheddetail        int32
+	schedtrace         int32
+	tracebackancestors int32
 }
 
 var dbgvars = []dbgVar{
@@ -357,6 +358,7 @@ var dbgvars = []dbgVar{
 	{"scavenge", &debug.scavenge},
 	{"scheddetail", &debug.scheddetail},
 	{"schedtrace", &debug.schedtrace},
+	{"tracebackancestors", &debug.tracebackancestors},
 }
 
 func parsedebugvars() {
diff --git a/libgo/go/runtime/runtime2.go b/libgo/go/runtime/runtime2.go
index 2de1cc8..e12e832 100644
--- a/libgo/go/runtime/runtime2.go
+++ b/libgo/go/runtime/runtime2.go
@@ -353,28 +353,29 @@ type g struct {
 	atomicstatus uint32
 	// Not for gccgo: stackLock      uint32 // sigprof/scang lock; TODO: fold in to atomicstatus
 	goid           int64
-	waitsince      int64  // approx time when the g become blocked
-	waitreason     string // if status==Gwaiting
 	schedlink      guintptr
-	preempt        bool     // preemption signal, duplicates stackguard0 = stackpreempt
-	paniconfault   bool     // panic (instead of crash) on unexpected fault address
-	preemptscan    bool     // preempted g does scan for gc
-	gcscandone     bool     // g has scanned stack; protected by _Gscan bit in status
-	gcscanvalid    bool     // false at start of gc cycle, true if G has not run since last scan; TODO: remove?
-	throwsplit     bool     // must not split stack
-	raceignore     int8     // ignore race detection events
-	sysblocktraced bool     // StartTrace has emitted EvGoInSyscall about this goroutine
-	sysexitticks   int64    // cputicks when syscall has returned (for tracing)
-	traceseq       uint64   // trace event sequencer
-	tracelastp     puintptr // last P emitted an event for this goroutine
+	waitsince      int64      // approx time when the g become blocked
+	waitreason     waitReason // if status==Gwaiting
+	preempt        bool       // preemption signal, duplicates stackguard0 = stackpreempt
+	paniconfault   bool       // panic (instead of crash) on unexpected fault address
+	preemptscan    bool       // preempted g does scan for gc
+	gcscandone     bool       // g has scanned stack; protected by _Gscan bit in status
+	gcscanvalid    bool       // false at start of gc cycle, true if G has not run since last scan; TODO: remove?
+	throwsplit     bool       // must not split stack
+	raceignore     int8       // ignore race detection events
+	sysblocktraced bool       // StartTrace has emitted EvGoInSyscall about this goroutine
+	sysexitticks   int64      // cputicks when syscall has returned (for tracing)
+	traceseq       uint64     // trace event sequencer
+	tracelastp     puintptr   // last P emitted an event for this goroutine
 	lockedm        muintptr
 	sig            uint32
 	writebuf       []byte
 	sigcode0       uintptr
 	sigcode1       uintptr
 	sigpc          uintptr
-	gopc           uintptr // pc of go statement that created this goroutine
-	startpc        uintptr // pc of goroutine function
+	gopc           uintptr         // pc of go statement that created this goroutine
+	ancestors      *[]ancestorInfo // ancestor information goroutine(s) that created this goroutine (only used if debug.tracebackancestors)
+	startpc        uintptr         // pc of goroutine function
 	// Not for gccgo: racectx        uintptr
 	waiting *sudog // sudog structures this g is waiting on (that have a valid elem ptr); in lock order
 	// Not for gccgo: cgoCtxt        []uintptr      // cgo traceback context
@@ -476,15 +477,12 @@ type m struct {
 	ncgo        int32  // number of cgo calls currently in progress
 	// Not for gccgo: cgoCallersUse uint32      // if non-zero, cgoCallers in use temporarily
 	// Not for gccgo: cgoCallers    *cgoCallers // cgo traceback if crashing in cgo call
-	park        note
-	alllink     *m // on allm
-	schedlink   muintptr
-	mcache      *mcache
-	lockedg     guintptr
-	createstack [32]location // stack that created this thread.
-	// Not for gccgo: freglo        [16]uint32     // d[i] lsb and f[i]
-	// Not for gccgo: freghi        [16]uint32     // d[i] msb and f[i+16]
-	// Not for gccgo: fflag         uint32         // floating point compare flags
+	park          note
+	alllink       *m // on allm
+	schedlink     muintptr
+	mcache        *mcache
+	lockedg       guintptr
+	createstack   [32]location   // stack that created this thread.
 	lockedExt     uint32         // tracking for external LockOSThread
 	lockedInt     uint32         // tracking for internal lockOSThread
 	nextwaitm     muintptr       // next m waiting for lock
@@ -773,6 +771,13 @@ type _panic struct {
 	aborted bool
 }
 
+// ancestorInfo records details of where a goroutine was started.
+type ancestorInfo struct {
+	pcs  []uintptr // pcs from the stack of this goroutine
+	goid int64     // goroutine id of this goroutine; original goroutine possibly dead
+	gopc uintptr   // pc of go statement that created this goroutine
+}
+
 const (
 	_TraceRuntimeFrames = 1 << iota // include frames for internal runtime functions.
 	_TraceTrap                      // the initial PC, SP are from a trap, not a return PC from a call
@@ -782,6 +787,71 @@ const (
 // The maximum number of frames we print for a traceback
 const _TracebackMaxFrames = 100
 
+// A waitReason explains why a goroutine has been stopped.
+// See gopark. Do not re-use waitReasons, add new ones.
+type waitReason uint8
+
+const (
+	waitReasonZero                  waitReason = iota // ""
+	waitReasonGCAssistMarking                         // "GC assist marking"
+	waitReasonIOWait                                  // "IO wait"
+	waitReasonChanReceiveNilChan                      // "chan receive (nil chan)"
+	waitReasonChanSendNilChan                         // "chan send (nil chan)"
+	waitReasonDumpingHeap                             // "dumping heap"
+	waitReasonGarbageCollection                       // "garbage collection"
+	waitReasonGarbageCollectionScan                   // "garbage collection scan"
+	waitReasonPanicWait                               // "panicwait"
+	waitReasonSelect                                  // "select"
+	waitReasonSelectNoCases                           // "select (no cases)"
+	waitReasonGCAssistWait                            // "GC assist wait"
+	waitReasonGCSweepWait                             // "GC sweep wait"
+	waitReasonChanReceive                             // "chan receive"
+	waitReasonChanSend                                // "chan send"
+	waitReasonFinalizerWait                           // "finalizer wait"
+	waitReasonForceGGIdle                             // "force gc (idle)"
+	waitReasonSemacquire                              // "semacquire"
+	waitReasonSleep                                   // "sleep"
+	waitReasonSyncCondWait                            // "sync.Cond.Wait"
+	waitReasonTimerGoroutineIdle                      // "timer goroutine (idle)"
+	waitReasonTraceReaderBlocked                      // "trace reader (blocked)"
+	waitReasonWaitForGCCycle                          // "wait for GC cycle"
+	waitReasonGCWorkerIdle                            // "GC worker (idle)"
+)
+
+var waitReasonStrings = [...]string{
+	waitReasonZero:                  "",
+	waitReasonGCAssistMarking:       "GC assist marking",
+	waitReasonIOWait:                "IO wait",
+	waitReasonChanReceiveNilChan:    "chan receive (nil chan)",
+	waitReasonChanSendNilChan:       "chan send (nil chan)",
+	waitReasonDumpingHeap:           "dumping heap",
+	waitReasonGarbageCollection:     "garbage collection",
+	waitReasonGarbageCollectionScan: "garbage collection scan",
+	waitReasonPanicWait:             "panicwait",
+	waitReasonSelect:                "select",
+	waitReasonSelectNoCases:         "select (no cases)",
+	waitReasonGCAssistWait:          "GC assist wait",
+	waitReasonGCSweepWait:           "GC sweep wait",
+	waitReasonChanReceive:           "chan receive",
+	waitReasonChanSend:              "chan send",
+	waitReasonFinalizerWait:         "finalizer wait",
+	waitReasonForceGGIdle:           "force gc (idle)",
+	waitReasonSemacquire:            "semacquire",
+	waitReasonSleep:                 "sleep",
+	waitReasonSyncCondWait:          "sync.Cond.Wait",
+	waitReasonTimerGoroutineIdle:    "timer goroutine (idle)",
+	waitReasonTraceReaderBlocked:    "trace reader (blocked)",
+	waitReasonWaitForGCCycle:        "wait for GC cycle",
+	waitReasonGCWorkerIdle:          "GC worker (idle)",
+}
+
+func (w waitReason) String() string {
+	if w < 0 || w >= waitReason(len(waitReasonStrings)) {
+		return "unknown wait reason"
+	}
+	return waitReasonStrings[w]
+}
+
 var (
 	allglen    uintptr
 	allm       *m
@@ -793,23 +863,7 @@ var (
 	sched      schedt
 	newprocs   int32
 
-	// Information about what cpu features are available.
-	// Set on startup in asm_{x86,amd64}.s.
-	// Packages outside the runtime should not use these
-	// as they are not an external api.
-	cpuid_ecx   uint32
 	support_aes bool
-
-	// cpuid_edx         uint32
-	// cpuid_ebx7        uint32
-	// lfenceBeforeRdtsc bool
-	// support_avx       bool
-	// support_avx2      bool
-	// support_bmi1      bool
-	// support_bmi2      bool
-
-//	goarm                uint8 // set by cmd/link on arm systems
-//	framepointer_enabled bool  // set by cmd/link
 )
 
 // Set by the linker so the runtime can determine the buildmode.
diff --git a/libgo/go/runtime/runtime_test.go b/libgo/go/runtime/runtime_test.go
index 0231043..995ce25 100644
--- a/libgo/go/runtime/runtime_test.go
+++ b/libgo/go/runtime/runtime_test.go
@@ -169,6 +169,9 @@ func testSetPanicOnFault(t *testing.T, addr uintptr, nfault *int) {
 	if GOOS == "nacl" {
 		t.Skip("nacl doesn't seem to fault on high addresses")
 	}
+	if GOOS == "js" {
+		t.Skip("js does not support catching faults")
+	}
 
 	defer func() {
 		if err := recover(); err != nil {
@@ -266,7 +269,7 @@ func TestTrailingZero(t *testing.T) {
 */
 
 func TestBadOpen(t *testing.T) {
-	if GOOS == "windows" || GOOS == "nacl" {
+	if GOOS == "windows" || GOOS == "nacl" || GOOS == "js" {
 		t.Skip("skipping OS that doesn't have open/read/write/close")
 	}
 	// make sure we get the correct error code if open fails. Same for
diff --git a/libgo/go/runtime/rwmutex_test.go b/libgo/go/runtime/rwmutex_test.go
index 872b3b0..291a32e 100644
--- a/libgo/go/runtime/rwmutex_test.go
+++ b/libgo/go/runtime/rwmutex_test.go
@@ -47,6 +47,9 @@ func doTestParallelReaders(numReaders int) {
 }
 
 func TestParallelRWMutexReaders(t *testing.T) {
+	if GOARCH == "wasm" {
+		t.Skip("wasm has no threads yet")
+	}
 	defer GOMAXPROCS(GOMAXPROCS(-1))
 	// If runtime triggers a forced GC during this test then it will deadlock,
 	// since the goroutines can't be stopped/preempted.
diff --git a/libgo/go/runtime/select.go b/libgo/go/runtime/select.go
index 9dab052..39c12da 100644
--- a/libgo/go/runtime/select.go
+++ b/libgo/go/runtime/select.go
@@ -94,7 +94,7 @@ func selparkcommit(gp *g, _ unsafe.Pointer) bool {
 }
 
 func block() {
-	gopark(nil, nil, "select (no cases)", traceEvGoStop, 1) // forever
+	gopark(nil, nil, waitReasonSelectNoCases, traceEvGoStop, 1) // forever
 }
 
 // selectgo implements the select statement.
@@ -307,7 +307,7 @@ loop:
 
 	// wait for someone to wake us up
 	gp.param = nil
-	gopark(selparkcommit, nil, "select", traceEvGoBlockSelect, 1)
+	gopark(selparkcommit, nil, waitReasonSelect, traceEvGoBlockSelect, 1)
 
 	sellock(scases, lockorder)
 
diff --git a/libgo/go/runtime/sema.go b/libgo/go/runtime/sema.go
index 6e2beec..cb7d3cd 100644
--- a/libgo/go/runtime/sema.go
+++ b/libgo/go/runtime/sema.go
@@ -15,7 +15,7 @@
 // even if, due to races, the wakeup happens before the sleep.
 //
 // See Mullender and Cox, ``Semaphores in Plan 9,''
-// http://swtch.com/semaphore.pdf
+// https://swtch.com/semaphore.pdf
 
 package runtime
 
@@ -141,7 +141,7 @@ func semacquire1(addr *uint32, lifo bool, profile semaProfileFlags) {
 		// Any semrelease after the cansemacquire knows we're waiting
 		// (we set nwait above), so go to sleep.
 		root.queue(addr, s, lifo)
-		goparkunlock(&root.lock, "semacquire", traceEvGoBlockSync, 4)
+		goparkunlock(&root.lock, waitReasonSemacquire, traceEvGoBlockSync, 4)
 		if s.ticket != 0 || cansemacquire(addr) {
 			break
 		}
@@ -274,7 +274,7 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool) {
 	// addresses, it is kept balanced on average by maintaining a heap ordering
 	// on the ticket: s.ticket <= both s.prev.ticket and s.next.ticket.
 	// https://en.wikipedia.org/wiki/Treap
-	// http://faculty.washington.edu/aragon/pubs/rst89.pdf
+	// https://faculty.washington.edu/aragon/pubs/rst89.pdf
 	//
 	// s.ticket compared with zero in couple of places, therefore set lowest bit.
 	// It will not affect treap's quality noticeably.
@@ -507,7 +507,7 @@ func notifyListWait(l *notifyList, t uint32) {
 		l.tail.next = s
 	}
 	l.tail = s
-	goparkunlock(&l.lock, "semacquire", traceEvGoBlockCond, 3)
+	goparkunlock(&l.lock, waitReasonSyncCondWait, traceEvGoBlockCond, 3)
 	if t0 != 0 {
 		blockevent(s.releasetime-t0, 2)
 	}
diff --git a/libgo/go/runtime/signal_sighandler.go b/libgo/go/runtime/signal_sighandler.go
index 698629d..e4bf7bc 100644
--- a/libgo/go/runtime/signal_sighandler.go
+++ b/libgo/go/runtime/signal_sighandler.go
@@ -14,6 +14,11 @@ import (
 // GOTRACEBACK=crash when a signal is received.
 var crashing int32
 
+// testSigtrap is used by the runtime tests. If non-nil, it is called
+// on SIGTRAP. If it returns true, the normal behavior on SIGTRAP is
+// suppressed.
+var testSigtrap func(info *_siginfo_t, ctxt *sigctxt, gp *g) bool
+
 // sighandler is invoked when a signal occurs. The global g will be
 // set to a gsignal goroutine and we will be running on the alternate
 // signal stack. The parameter g will be the value of the global g
@@ -27,7 +32,7 @@ var crashing int32
 //go:nowritebarrierrec
 func sighandler(sig uint32, info *_siginfo_t, ctxt unsafe.Pointer, gp *g) {
 	_g_ := getg()
-	c := sigctxt{info, ctxt}
+	c := &sigctxt{info, ctxt}
 
 	sigfault, sigpc := getSiginfo(info, ctxt)
 
@@ -36,6 +41,10 @@ func sighandler(sig uint32, info *_siginfo_t, ctxt unsafe.Pointer, gp *g) {
 		return
 	}
 
+	if sig == _SIGTRAP && testSigtrap != nil && testSigtrap(info, (*sigctxt)(noescape(unsafe.Pointer(c))), gp) {
+		return
+	}
+
 	flags := int32(_SigThrow)
 	if sig < uint32(len(sigtable)) {
 		flags = sigtable[sig].flags
@@ -45,6 +54,11 @@ func sighandler(sig uint32, info *_siginfo_t, ctxt unsafe.Pointer, gp *g) {
 		// stack. Abort in the signal handler instead.
 		flags = (flags &^ _SigPanic) | _SigThrow
 	}
+	if isAbortPC(sigpc) {
+		// On many architectures, the abort function just
+		// causes a memory fault. Don't turn that into a panic.
+		flags = _SigThrow
+	}
 	if c.sigcode() != _SI_USER && flags&_SigPanic != 0 {
 		// Emulate gc by passing arguments out of band,
 		// although we don't really have to.
@@ -87,7 +101,7 @@ func sighandler(sig uint32, info *_siginfo_t, ctxt unsafe.Pointer, gp *g) {
 	_g_.m.caughtsig.set(gp)
 
 	if crashing == 0 {
-		startpanic()
+		startpanic_m()
 	}
 
 	if sig < uint32(len(sigtable)) {
diff --git a/libgo/go/runtime/signal_unix.go b/libgo/go/runtime/signal_unix.go
index a8f77fa..84623d3 100644
--- a/libgo/go/runtime/signal_unix.go
+++ b/libgo/go/runtime/signal_unix.go
@@ -112,6 +112,8 @@ func initsig(preinit bool) {
 			// set SA_ONSTACK if necessary.
 			if fwdSig[i] != _SIG_DFL && fwdSig[i] != _SIG_IGN {
 				setsigstack(i)
+			} else if fwdSig[i] == _SIG_IGN {
+				sigInitIgnored(i)
 			}
 			continue
 		}
@@ -398,14 +400,6 @@ func dieFromSignal(sig uint32) {
 	osyield()
 	osyield()
 
-	// On Darwin we may still fail to die, because raise sends the
-	// signal to the whole process rather than just the current thread,
-	// and osyield just sleeps briefly rather than letting all other
-	// threads run. See issue 20315. Sleep longer.
-	if GOOS == "darwin" {
-		usleep(100)
-	}
-
 	// If we are still somehow running, just exit with the wrong status.
 	exit(2)
 }
@@ -444,7 +438,10 @@ func raisebadsignal(sig uint32, c *sigctxt) {
 	// re-installing sighandler. At this point we can just
 	// return and the signal will be re-raised and caught by
 	// the default handler with the correct context.
-	if (isarchive || islibrary) && handler == _SIG_DFL && c.sigcode() != _SI_USER {
+	//
+	// On FreeBSD, the libthr sigaction code prevents
+	// this from working so we fall through to raise.
+	if GOOS != "freebsd" && (isarchive || islibrary) && handler == _SIG_DFL && c.sigcode() != _SI_USER {
 		return
 	}
 
@@ -464,6 +461,7 @@ func raisebadsignal(sig uint32, c *sigctxt) {
 	setsig(sig, getSigtramp())
 }
 
+//go:nosplit
 func crash() {
 	if GOOS == "darwin" {
 		// OS X core dumps are linear dumps of the mapped memory,
diff --git a/libgo/go/runtime/sigqueue.go b/libgo/go/runtime/sigqueue.go
index b108c39..cf926a9 100644
--- a/libgo/go/runtime/sigqueue.go
+++ b/libgo/go/runtime/sigqueue.go
@@ -237,7 +237,18 @@ func signal_ignore(s uint32) {
 	atomic.Store(&sig.ignored[s/32], i)
 }
 
+// sigInitIgnored marks the signal as already ignored. This is called at
+// program start by initsig. In a shared library initsig is called by
+// libpreinit, so the runtime may not be initialized yet.
+//go:nosplit
+func sigInitIgnored(s uint32) {
+	i := sig.ignored[s/32]
+	i |= 1 << (s & 31)
+	atomic.Store(&sig.ignored[s/32], i)
+}
+
 // Checked by signal handlers.
+//go:linkname signal_ignored os_signal.signal_ignored
 func signal_ignored(s uint32) bool {
 	i := atomic.Load(&sig.ignored[s/32])
 	return i&(1<<(s&31)) != 0
diff --git a/libgo/go/runtime/sizeof_test.go b/libgo/go/runtime/sizeof_test.go
new file mode 100644
index 0000000..ecda82a
--- /dev/null
+++ b/libgo/go/runtime/sizeof_test.go
@@ -0,0 +1,43 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !nacl
+
+package runtime_test
+
+import (
+	"reflect"
+	"runtime"
+	"testing"
+	"unsafe"
+)
+
+// Assert that the size of important structures do not change unexpectedly.
+
+func TestSizeof(t *testing.T) {
+	if runtime.Compiler != "gc" {
+		t.Skip("skipping size test; specific to gc compiler")
+	}
+
+	const _64bit = unsafe.Sizeof(uintptr(0)) == 8
+
+	var tests = []struct {
+		val    interface{} // type as a value
+		_32bit uintptr     // size on 32bit platforms
+		_64bit uintptr     // size on 64bit platforms
+	}{
+		{runtime.G{}, 216, 376}, // g, but exported for testing
+	}
+
+	for _, tt := range tests {
+		want := tt._32bit
+		if _64bit {
+			want = tt._64bit
+		}
+		got := reflect.TypeOf(tt.val).Size()
+		if want != got {
+			t.Errorf("unsafe.Sizeof(%T) = %d, want %d", tt.val, got, want)
+		}
+	}
+}
diff --git a/libgo/go/runtime/slice.go b/libgo/go/runtime/slice.go
index ec5aa64..2e874cc 100644
--- a/libgo/go/runtime/slice.go
+++ b/libgo/go/runtime/slice.go
@@ -5,6 +5,7 @@
 package runtime
 
 import (
+	"runtime/internal/sys"
 	"unsafe"
 )
 
@@ -34,14 +35,14 @@ type notInHeapSlice struct {
 // The index is the size of the slice element.
 var maxElems = [...]uintptr{
 	^uintptr(0),
-	_MaxMem / 1, _MaxMem / 2, _MaxMem / 3, _MaxMem / 4,
-	_MaxMem / 5, _MaxMem / 6, _MaxMem / 7, _MaxMem / 8,
-	_MaxMem / 9, _MaxMem / 10, _MaxMem / 11, _MaxMem / 12,
-	_MaxMem / 13, _MaxMem / 14, _MaxMem / 15, _MaxMem / 16,
-	_MaxMem / 17, _MaxMem / 18, _MaxMem / 19, _MaxMem / 20,
-	_MaxMem / 21, _MaxMem / 22, _MaxMem / 23, _MaxMem / 24,
-	_MaxMem / 25, _MaxMem / 26, _MaxMem / 27, _MaxMem / 28,
-	_MaxMem / 29, _MaxMem / 30, _MaxMem / 31, _MaxMem / 32,
+	maxAlloc / 1, maxAlloc / 2, maxAlloc / 3, maxAlloc / 4,
+	maxAlloc / 5, maxAlloc / 6, maxAlloc / 7, maxAlloc / 8,
+	maxAlloc / 9, maxAlloc / 10, maxAlloc / 11, maxAlloc / 12,
+	maxAlloc / 13, maxAlloc / 14, maxAlloc / 15, maxAlloc / 16,
+	maxAlloc / 17, maxAlloc / 18, maxAlloc / 19, maxAlloc / 20,
+	maxAlloc / 21, maxAlloc / 22, maxAlloc / 23, maxAlloc / 24,
+	maxAlloc / 25, maxAlloc / 26, maxAlloc / 27, maxAlloc / 28,
+	maxAlloc / 29, maxAlloc / 30, maxAlloc / 31, maxAlloc / 32,
 }
 
 // maxSliceCap returns the maximum capacity for a slice.
@@ -49,7 +50,15 @@ func maxSliceCap(elemsize uintptr) uintptr {
 	if elemsize < uintptr(len(maxElems)) {
 		return maxElems[elemsize]
 	}
-	return _MaxMem / elemsize
+	return maxAlloc / elemsize
+}
+
+func panicmakeslicelen() {
+	panic(errorString("makeslice: len out of range"))
+}
+
+func panicmakeslicecap() {
+	panic(errorString("makeslice: cap out of range"))
 }
 
 func makeslice(et *_type, len, cap int) slice {
@@ -60,11 +69,11 @@ func makeslice(et *_type, len, cap int) slice {
 	// See issue 4085.
 	maxElements := maxSliceCap(et.size)
 	if len < 0 || uintptr(len) > maxElements {
-		panic(errorString("makeslice: len out of range"))
+		panicmakeslicelen()
 	}
 
 	if cap < len || uintptr(cap) > maxElements {
-		panic(errorString("makeslice: cap out of range"))
+		panicmakeslicecap()
 	}
 
 	p := mallocgc(et.size*uintptr(cap), et, true)
@@ -74,12 +83,12 @@ func makeslice(et *_type, len, cap int) slice {
 func makeslice64(et *_type, len64, cap64 int64) slice {
 	len := int(len64)
 	if int64(len) != len64 {
-		panic(errorString("makeslice: len out of range"))
+		panicmakeslicelen()
 	}
 
 	cap := int(cap64)
 	if int64(cap) != cap64 {
-		panic(errorString("makeslice: cap out of range"))
+		panicmakeslicecap()
 	}
 
 	return makeslice(et, len, cap)
@@ -131,20 +140,36 @@ func growslice(et *_type, old slice, cap int) slice {
 
 	var overflow bool
 	var lenmem, newlenmem, capmem uintptr
-	const ptrSize = unsafe.Sizeof((*byte)(nil))
-	switch et.size {
-	case 1:
+	// Specialize for common values of et.size.
+	// For 1 we don't need any division/multiplication.
+	// For sys.PtrSize, compiler will optimize division/multiplication into a shift by a constant.
+	// For powers of 2, use a variable shift.
+	switch {
+	case et.size == 1:
 		lenmem = uintptr(old.len)
 		newlenmem = uintptr(cap)
 		capmem = roundupsize(uintptr(newcap))
-		overflow = uintptr(newcap) > _MaxMem
+		overflow = uintptr(newcap) > maxAlloc
 		newcap = int(capmem)
-	case ptrSize:
-		lenmem = uintptr(old.len) * ptrSize
-		newlenmem = uintptr(cap) * ptrSize
-		capmem = roundupsize(uintptr(newcap) * ptrSize)
-		overflow = uintptr(newcap) > _MaxMem/ptrSize
-		newcap = int(capmem / ptrSize)
+	case et.size == sys.PtrSize:
+		lenmem = uintptr(old.len) * sys.PtrSize
+		newlenmem = uintptr(cap) * sys.PtrSize
+		capmem = roundupsize(uintptr(newcap) * sys.PtrSize)
+		overflow = uintptr(newcap) > maxAlloc/sys.PtrSize
+		newcap = int(capmem / sys.PtrSize)
+	case isPowerOfTwo(et.size):
+		var shift uintptr
+		if sys.PtrSize == 8 {
+			// Mask shift for better code generation.
+			shift = uintptr(sys.Ctz64(uint64(et.size))) & 63
+		} else {
+			shift = uintptr(sys.Ctz32(uint32(et.size))) & 31
+		}
+		lenmem = uintptr(old.len) << shift
+		newlenmem = uintptr(cap) << shift
+		capmem = roundupsize(uintptr(newcap) << shift)
+		overflow = uintptr(newcap) > (maxAlloc >> shift)
+		newcap = int(capmem >> shift)
 	default:
 		lenmem = uintptr(old.len) * et.size
 		newlenmem = uintptr(cap) * et.size
@@ -167,7 +192,7 @@ func growslice(et *_type, old slice, cap int) slice {
 	//   s = append(s, d, d, d, d)
 	//   print(len(s), "\n")
 	// }
-	if cap < old.cap || overflow || capmem > _MaxMem {
+	if cap < old.cap || overflow || capmem > maxAlloc {
 		panic(errorString("growslice: cap out of range"))
 	}
 
@@ -193,6 +218,10 @@ func growslice(et *_type, old slice, cap int) slice {
 	return slice{p, cap, newcap}
 }
 
+func isPowerOfTwo(x uintptr) bool {
+	return x&(x-1) == 0
+}
+
 func slicecopy(to, fm slice, width uintptr) int {
 	if fm.len == 0 || to.len == 0 {
 		return 0
diff --git a/libgo/go/runtime/append_test.go b/libgo/go/runtime/slice_test.go
index ef1e812..c2dfb7a 100644
--- a/libgo/go/runtime/append_test.go
+++ b/libgo/go/runtime/slice_test.go
@@ -31,6 +31,12 @@ func BenchmarkGrowSlice(b *testing.B) {
 			_ = append([]byte(nil), x...)
 		}
 	})
+	b.Run("Int16", func(b *testing.B) {
+		x := make([]int16, 9)
+		for i := 0; i < b.N; i++ {
+			_ = append([]int16(nil), x...)
+		}
+	})
 	b.Run("Int", func(b *testing.B) {
 		x := make([]int, 9)
 		for i := 0; i < b.N; i++ {
@@ -66,6 +72,36 @@ func BenchmarkGrowSlice(b *testing.B) {
 	})
 }
 
+var (
+	SinkIntSlice        []int
+	SinkIntPointerSlice []*int
+)
+
+func BenchmarkExtendSlice(b *testing.B) {
+	var length = 4 // Use a variable to prevent stack allocation of slices.
+	b.Run("IntSlice", func(b *testing.B) {
+		s := make([]int, 0, length)
+		for i := 0; i < b.N; i++ {
+			s = append(s[:0:length/2], make([]int, length)...)
+		}
+		SinkIntSlice = s
+	})
+	b.Run("PointerSlice", func(b *testing.B) {
+		s := make([]*int, 0, length)
+		for i := 0; i < b.N; i++ {
+			s = append(s[:0:length/2], make([]*int, length)...)
+		}
+		SinkIntPointerSlice = s
+	})
+	b.Run("NoGrow", func(b *testing.B) {
+		s := make([]int, 0, length)
+		for i := 0; i < b.N; i++ {
+			s = append(s[:0:length], make([]int, length)...)
+		}
+		SinkIntSlice = s
+	})
+}
+
 func BenchmarkAppend(b *testing.B) {
 	b.StopTimer()
 	x := make([]int, 0, N)
diff --git a/libgo/go/runtime/string.go b/libgo/go/runtime/string.go
index e8df9a6..5296ebd 100644
--- a/libgo/go/runtime/string.go
+++ b/libgo/go/runtime/string.go
@@ -4,7 +4,10 @@
 
 package runtime
 
-import "unsafe"
+import (
+	"internal/bytealg"
+	"unsafe"
+)
 
 // For gccgo, use go:linkname to rename compiler-called functions to
 // themselves, so that the compiler will export them.
@@ -105,6 +108,11 @@ func slicebytetostring(buf *tmpBuf, b []byte) (str string) {
 	if msanenabled {
 		msanread(unsafe.Pointer(&b[0]), uintptr(l))
 	}
+	if l == 1 {
+		stringStructOf(&str).str = unsafe.Pointer(&staticbytes[b[0]])
+		stringStructOf(&str).len = 1
+		return
+	}
 
 	var p unsafe.Pointer
 	if buf != nil && len(b) <= len(buf) {
@@ -232,8 +240,13 @@ func stringStructOf(sp *string) *stringStruct {
 	return (*stringStruct)(unsafe.Pointer(sp))
 }
 
-func intstring(buf *[4]byte, v int64) string {
-	var s string
+func intstring(buf *[4]byte, v int64) (s string) {
+	if v >= 0 && v < runeSelf {
+		stringStructOf(&s).str = unsafe.Pointer(&staticbytes[v])
+		stringStructOf(&s).len = 1
+		return
+	}
+
 	var b []byte
 	if buf != nil {
 		b = buf[:]
@@ -277,7 +290,7 @@ func rawbyteslice(size int) (b []byte) {
 
 // rawruneslice allocates a new rune slice. The rune slice is not zeroed.
 func rawruneslice(size int) (b []rune) {
-	if uintptr(size) > _MaxMem/4 {
+	if uintptr(size) > maxAlloc/4 {
 		throw("out of memory")
 	}
 	mem := roundupsize(uintptr(size) * 4)
@@ -291,13 +304,20 @@ func rawruneslice(size int) (b []rune) {
 }
 
 // used by cmd/cgo
-func gobytes(p *byte, n int) []byte {
+func gobytes(p *byte, n int) (b []byte) {
 	if n == 0 {
 		return make([]byte, 0)
 	}
-	x := make([]byte, n)
-	memmove(unsafe.Pointer(&x[0]), unsafe.Pointer(p), uintptr(n))
-	return x
+
+	if n < 0 || uintptr(n) > maxAlloc {
+		panic(errorString("gobytes: length out of range"))
+	}
+
+	bp := mallocgc(uintptr(n), nil, false)
+	memmove(bp, unsafe.Pointer(p), uintptr(n))
+
+	*(*slice)(unsafe.Pointer(&b)) = slice{bp, n, n}
+	return
 }
 
 func gostring(p *byte) string {
@@ -406,19 +426,50 @@ func findnull(s *byte) int {
 	if s == nil {
 		return 0
 	}
-	p := (*[_MaxMem/2 - 1]byte)(unsafe.Pointer(s))
-	l := 0
-	for p[l] != 0 {
-		l++
+
+	// Avoid IndexByteString on Plan 9 because it uses SSE instructions
+	// on x86 machines, and those are classified as floating point instructions,
+	// which are illegal in a note handler.
+	if GOOS == "plan9" {
+		p := (*[maxAlloc/2 - 1]byte)(unsafe.Pointer(s))
+		l := 0
+		for p[l] != 0 {
+			l++
+		}
+		return l
+	}
+
+	// pageSize is the unit we scan at a time looking for NULL.
+	// It must be the minimum page size for any architecture Go
+	// runs on. It's okay (just a minor performance loss) if the
+	// actual system page size is larger than this value.
+	const pageSize = 4096
+
+	offset := 0
+	ptr := unsafe.Pointer(s)
+	// IndexByteString uses wide reads, so we need to be careful
+	// with page boundaries. Call IndexByteString on
+	// [ptr, endOfPage) interval.
+	safeLen := int(pageSize - uintptr(ptr)%pageSize)
+
+	for {
+		t := *(*string)(unsafe.Pointer(&stringStruct{ptr, safeLen}))
+		// Check one page at a time.
+		if i := bytealg.IndexByteString(t, 0); i != -1 {
+			return offset + i
+		}
+		// Move to next page
+		ptr = unsafe.Pointer(uintptr(ptr) + uintptr(safeLen))
+		offset += safeLen
+		safeLen = pageSize
 	}
-	return l
 }
 
 func findnullw(s *uint16) int {
 	if s == nil {
 		return 0
 	}
-	p := (*[_MaxMem/2/2 - 1]uint16)(unsafe.Pointer(s))
+	p := (*[maxAlloc/2/2 - 1]uint16)(unsafe.Pointer(s))
 	l := 0
 	for p[l] != 0 {
 		l++
@@ -435,7 +486,7 @@ func gostringnocopy(str *byte) string {
 
 func gostringw(strw *uint16) string {
 	var buf [8]byte
-	str := (*[_MaxMem/2/2 - 1]uint16)(unsafe.Pointer(strw))
+	str := (*[maxAlloc/2/2 - 1]uint16)(unsafe.Pointer(strw))
 	n1 := 0
 	for i := 0; str[i] != 0; i++ {
 		n1 += encoderune(buf[:], rune(str[i]))
diff --git a/libgo/go/runtime/string_test.go b/libgo/go/runtime/string_test.go
index 555a7fc..03327bb 100644
--- a/libgo/go/runtime/string_test.go
+++ b/libgo/go/runtime/string_test.go
@@ -9,6 +9,7 @@ import (
 	"strconv"
 	"strings"
 	"testing"
+	"unicode/utf8"
 )
 
 // Strings and slices that don't escape and fit into tmpBuf are stack allocated,
@@ -110,6 +111,43 @@ var stringdata = []struct{ name, data string }{
 	{"MixedLength", "$Ѐࠀက퀀𐀀\U00040000\U0010FFFF"},
 }
 
+var sinkInt int
+
+func BenchmarkRuneCount(b *testing.B) {
+	// Each sub-benchmark counts the runes in a string in a different way.
+	b.Run("lenruneslice", func(b *testing.B) {
+		for _, sd := range stringdata {
+			b.Run(sd.name, func(b *testing.B) {
+				for i := 0; i < b.N; i++ {
+					sinkInt += len([]rune(sd.data))
+				}
+			})
+		}
+	})
+	b.Run("rangeloop", func(b *testing.B) {
+		for _, sd := range stringdata {
+			b.Run(sd.name, func(b *testing.B) {
+				for i := 0; i < b.N; i++ {
+					n := 0
+					for range sd.data {
+						n++
+					}
+					sinkInt += n
+				}
+			})
+		}
+	})
+	b.Run("utf8.RuneCountInString", func(b *testing.B) {
+		for _, sd := range stringdata {
+			b.Run(sd.name, func(b *testing.B) {
+				for i := 0; i < b.N; i++ {
+					sinkInt += utf8.RuneCountInString(sd.data)
+				}
+			})
+		}
+	})
+}
+
 func BenchmarkRuneIterate(b *testing.B) {
 	b.Run("range", func(b *testing.B) {
 		for _, sd := range stringdata {
@@ -125,7 +163,7 @@ func BenchmarkRuneIterate(b *testing.B) {
 		for _, sd := range stringdata {
 			b.Run(sd.name, func(b *testing.B) {
 				for i := 0; i < b.N; i++ {
-					for _ = range sd.data {
+					for range sd.data {
 					}
 				}
 			})
@@ -135,7 +173,7 @@ func BenchmarkRuneIterate(b *testing.B) {
 		for _, sd := range stringdata {
 			b.Run(sd.name, func(b *testing.B) {
 				for i := 0; i < b.N; i++ {
-					for _, _ = range sd.data {
+					for range sd.data {
 					}
 				}
 			})
diff --git a/libgo/go/runtime/stubs.go b/libgo/go/runtime/stubs.go
index 1d21445..1aae4f3 100644
--- a/libgo/go/runtime/stubs.go
+++ b/libgo/go/runtime/stubs.go
@@ -69,8 +69,13 @@ func systemstack(fn func()) {
 	}
 }
 
+var badsystemstackMsg = "fatal: systemstack called from unexpected goroutine"
+
+//go:nosplit
+//go:nowritebarrierrec
 func badsystemstack() {
-	throw("systemstack called from unexpected goroutine")
+	sp := stringStructOf(&badsystemstackMsg)
+	write(2, sp.str, int32(sp.len))
 }
 
 // memclrNoHeapPointers clears n bytes starting at ptr.
@@ -127,7 +132,7 @@ func fastrand() uint32 {
 //go:nosplit
 func fastrandn(n uint32) uint32 {
 	// This is similar to fastrand() % n, but faster.
-	// See http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
+	// See https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
 	return uint32(uint64(fastrand()) * uint64(n) >> 32)
 }
 
@@ -198,7 +203,6 @@ func publicationBarrier()
 
 // getcallerpc returns the program counter (PC) of its caller's caller.
 // getcallersp returns the stack pointer (SP) of its caller's caller.
-// argp must be a pointer to the caller's first function argument.
 // The implementation may be a compiler intrinsic; there is not
 // necessarily code implementing this on every platform.
 //
@@ -213,10 +217,7 @@ func publicationBarrier()
 // the call to f (where f will return).
 //
 // The call to getcallerpc and getcallersp must be done in the
-// frame being asked about. It would not be correct for f to pass &arg1
-// to another function g and let g call getcallerpc/getcallersp.
-// The call inside g might return information about g's caller or
-// information about f's caller or complete garbage.
+// frame being asked about.
 //
 // The result of getcallersp is correct at the time of the return,
 // but it may be invalidated by any subsequent call to a function
@@ -228,7 +229,7 @@ func publicationBarrier()
 func getcallerpc() uintptr
 
 //go:noescape
-func getcallersp() uintptr
+func getcallersp() uintptr // implemented as an intrinsic on all platforms
 
 func asmcgocall(fn, arg unsafe.Pointer) int32 {
 	throw("asmcgocall")
@@ -293,12 +294,6 @@ func setIsCgo() {
 }
 
 // For gccgo, to communicate from the C code to the Go code.
-//go:linkname setCpuidECX runtime.setCpuidECX
-func setCpuidECX(v uint32) {
-	cpuid_ecx = v
-}
-
-// For gccgo, to communicate from the C code to the Go code.
 //go:linkname setSupportAES runtime.setSupportAES
 func setSupportAES(v bool) {
 	support_aes = v
@@ -336,6 +331,9 @@ func getSiginfo(*_siginfo_t, unsafe.Pointer) (sigaddr uintptr, sigpc uintptr)
 // Implemented in C for gccgo.
 func dumpregs(*_siginfo_t, unsafe.Pointer)
 
+// Implemented in C for gccgo.
+func setRandomNumber(uint32)
+
 // Temporary for gccgo until we port proc.go.
 //go:linkname getsched runtime.getsched
 func getsched() *schedt {
@@ -426,6 +424,15 @@ type bitvector struct {
 	bytedata *uint8
 }
 
+// ptrbit returns the i'th bit in bv.
+// ptrbit is less efficient than iterating directly over bitvector bits,
+// and should only be used in non-performance-critical code.
+// See adjustpointers for an example of a high-efficiency walk of a bitvector.
+func (bv *bitvector) ptrbit(i uintptr) uint8 {
+	b := *(addb(bv.bytedata, i/8))
+	return (b >> (i % 8)) & 1
+}
+
 // bool2int returns 0 if x is false or 1 if x is true.
 func bool2int(x bool) int {
 	if x {
@@ -433,3 +440,10 @@ func bool2int(x bool) int {
 	}
 	return 0
 }
+
+// abort crashes the runtime in situations where even throw might not
+// work. In general it should do something a debugger will recognize
+// (e.g., an INT3 on x86). A crash in abort is recognized by the
+// signal handler, which will attempt to tear down the runtime
+// immediately.
+func abort()
diff --git a/libgo/go/runtime/stubs2.go b/libgo/go/runtime/stubs2.go
index e305b16..1cb910c 100644
--- a/libgo/go/runtime/stubs2.go
+++ b/libgo/go/runtime/stubs2.go
@@ -5,6 +5,8 @@
 // +build !plan9
 // +build !windows
 // +build !nacl
+// +build !js
+// +build !darwin
 
 package runtime
 
@@ -16,7 +18,6 @@ func closefd(fd int32) int32
 
 //extern exit
 func exit(code int32)
-func nanotime() int64
 func usleep(usec uint32)
 
 //go:noescape
diff --git a/libgo/go/runtime/stubs3.go b/libgo/go/runtime/stubs3.go
new file mode 100644
index 0000000..5c0786e
--- /dev/null
+++ b/libgo/go/runtime/stubs3.go
@@ -0,0 +1,14 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !plan9
+// +build !solaris
+// +build !windows
+// +build !nacl
+// +build !freebsd
+// +build !darwin
+
+package runtime
+
+func nanotime() int64
diff --git a/libgo/go/runtime/symtab.go b/libgo/go/runtime/symtab.go
index 12dc672..861921c 100644
--- a/libgo/go/runtime/symtab.go
+++ b/libgo/go/runtime/symtab.go
@@ -124,6 +124,7 @@ type funcID uint32
 
 const (
 	funcID_normal funcID = iota // not a special function
+	funcID_runtime_main
 	funcID_goexit
 	funcID_jmpdefer
 	funcID_mcall
@@ -133,15 +134,13 @@ const (
 	funcID_asmcgocall
 	funcID_sigpanic
 	funcID_runfinq
-	funcID_bgsweep
-	funcID_forcegchelper
-	funcID_timerproc
 	funcID_gcBgMarkWorker
 	funcID_systemstack_switch
 	funcID_systemstack
 	funcID_cgocallback_gofunc
 	funcID_gogo
 	funcID_externalthreadhandler
+	funcID_debugCallV1
 )
 
 // FuncForPC returns a *Func describing the function that contains the
diff --git a/libgo/go/runtime/sys_darwin.go b/libgo/go/runtime/sys_darwin.go
new file mode 100644
index 0000000..7efbef7
--- /dev/null
+++ b/libgo/go/runtime/sys_darwin.go
@@ -0,0 +1,374 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+// Call fn with arg as its argument. Return what fn returns.
+// fn is the raw pc value of the entry point of the desired function.
+// Switches to the system stack, if not already there.
+// Preserves the calling point as the location where a profiler traceback will begin.
+//go:nosplit
+func libcCall(fn, arg unsafe.Pointer) int32 {
+	// Leave caller's PC/SP/G around for traceback.
+	gp := getg()
+	var mp *m
+	if gp != nil {
+		mp = gp.m
+	}
+	if mp != nil && mp.libcallsp == 0 {
+		mp.libcallg.set(gp)
+		mp.libcallpc = getcallerpc()
+		// sp must be the last, because once async cpu profiler finds
+		// all three values to be non-zero, it will use them
+		mp.libcallsp = getcallersp()
+	} else {
+		// Make sure we don't reset libcallsp. This makes
+		// libcCall reentrant; We remember the g/pc/sp for the
+		// first call on an M, until that libcCall instance
+		// returns.  Reentrance only matters for signals, as
+		// libc never calls back into Go.  The tricky case is
+		// where we call libcX from an M and record g/pc/sp.
+		// Before that call returns, a signal arrives on the
+		// same M and the signal handling code calls another
+		// libc function.  We don't want that second libcCall
+		// from within the handler to be recorded, and we
+		// don't want that call's completion to zero
+		// libcallsp.
+		// We don't need to set libcall* while we're in a sighandler
+		// (even if we're not currently in libc) because we block all
+		// signals while we're handling a signal. That includes the
+		// profile signal, which is the one that uses the libcall* info.
+		mp = nil
+	}
+	res := asmcgocall(fn, arg)
+	if mp != nil {
+		mp.libcallsp = 0
+	}
+	return res
+}
+
+// The *_trampoline functions convert from the Go calling convention to the C calling convention
+// and then call the underlying libc function.  They are defined in sys_darwin_$ARCH.s.
+
+//go:nosplit
+//go:cgo_unsafe_args
+func pthread_attr_init(attr *pthreadattr) int32 {
+	return libcCall(unsafe.Pointer(funcPC(pthread_attr_init_trampoline)), unsafe.Pointer(&attr))
+}
+func pthread_attr_init_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func pthread_attr_setstacksize(attr *pthreadattr, size uintptr) int32 {
+	return libcCall(unsafe.Pointer(funcPC(pthread_attr_setstacksize_trampoline)), unsafe.Pointer(&attr))
+}
+func pthread_attr_setstacksize_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func pthread_attr_setdetachstate(attr *pthreadattr, state int) int32 {
+	return libcCall(unsafe.Pointer(funcPC(pthread_attr_setdetachstate_trampoline)), unsafe.Pointer(&attr))
+}
+func pthread_attr_setdetachstate_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func pthread_create(attr *pthreadattr, start uintptr, arg unsafe.Pointer) int32 {
+	return libcCall(unsafe.Pointer(funcPC(pthread_create_trampoline)), unsafe.Pointer(&attr))
+}
+func pthread_create_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func raise(sig uint32) {
+	libcCall(unsafe.Pointer(funcPC(raise_trampoline)), unsafe.Pointer(&sig))
+}
+func raise_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func pthread_self() (t pthread) {
+	libcCall(unsafe.Pointer(funcPC(pthread_self_trampoline)), unsafe.Pointer(&t))
+	return
+}
+func pthread_self_trampoline()
+
+func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) (unsafe.Pointer, int) {
+	args := struct {
+		addr            unsafe.Pointer
+		n               uintptr
+		prot, flags, fd int32
+		off             uint32
+		ret1            unsafe.Pointer
+		ret2            int
+	}{addr, n, prot, flags, fd, off, nil, 0}
+	libcCall(unsafe.Pointer(funcPC(mmap_trampoline)), unsafe.Pointer(&args))
+	return args.ret1, args.ret2
+}
+func mmap_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func munmap(addr unsafe.Pointer, n uintptr) {
+	libcCall(unsafe.Pointer(funcPC(munmap_trampoline)), unsafe.Pointer(&addr))
+}
+func munmap_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func madvise(addr unsafe.Pointer, n uintptr, flags int32) {
+	libcCall(unsafe.Pointer(funcPC(madvise_trampoline)), unsafe.Pointer(&addr))
+}
+func madvise_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func read(fd int32, p unsafe.Pointer, n int32) int32 {
+	return libcCall(unsafe.Pointer(funcPC(read_trampoline)), unsafe.Pointer(&fd))
+}
+func read_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func closefd(fd int32) int32 {
+	return libcCall(unsafe.Pointer(funcPC(close_trampoline)), unsafe.Pointer(&fd))
+}
+func close_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func exit(code int32) {
+	libcCall(unsafe.Pointer(funcPC(exit_trampoline)), unsafe.Pointer(&code))
+}
+func exit_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func usleep(usec uint32) {
+	libcCall(unsafe.Pointer(funcPC(usleep_trampoline)), unsafe.Pointer(&usec))
+}
+func usleep_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func write(fd uintptr, p unsafe.Pointer, n int32) int32 {
+	return libcCall(unsafe.Pointer(funcPC(write_trampoline)), unsafe.Pointer(&fd))
+}
+func write_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func open(name *byte, mode, perm int32) (ret int32) {
+	return libcCall(unsafe.Pointer(funcPC(open_trampoline)), unsafe.Pointer(&name))
+}
+func open_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func nanotime() int64 {
+	var r struct {
+		t            int64  // raw timer
+		numer, denom uint32 // conversion factors. nanoseconds = t * numer / denom.
+	}
+	libcCall(unsafe.Pointer(funcPC(nanotime_trampoline)), unsafe.Pointer(&r))
+	// Note: Apple seems unconcerned about overflow here. See
+	// https://developer.apple.com/library/content/qa/qa1398/_index.html
+	// Note also, numer == denom == 1 is common.
+	t := r.t
+	if r.numer != 1 {
+		t *= int64(r.numer)
+	}
+	if r.denom != 1 {
+		t /= int64(r.denom)
+	}
+	return t
+}
+func nanotime_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func walltime() (int64, int32) {
+	var t timeval
+	libcCall(unsafe.Pointer(funcPC(walltime_trampoline)), unsafe.Pointer(&t))
+	return int64(t.tv_sec), 1000 * t.tv_usec
+}
+func walltime_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func sigaction(sig uint32, new *usigactiont, old *usigactiont) {
+	libcCall(unsafe.Pointer(funcPC(sigaction_trampoline)), unsafe.Pointer(&sig))
+}
+func sigaction_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func sigprocmask(how uint32, new *sigset, old *sigset) {
+	libcCall(unsafe.Pointer(funcPC(sigprocmask_trampoline)), unsafe.Pointer(&how))
+}
+func sigprocmask_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func sigaltstack(new *stackt, old *stackt) {
+	if new != nil && new.ss_flags&_SS_DISABLE != 0 && new.ss_size == 0 {
+		// Despite the fact that Darwin's sigaltstack man page says it ignores the size
+		// when SS_DISABLE is set, it doesn't. sigaltstack returns ENOMEM
+		// if we don't give it a reasonable size.
+		// ref: http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20140421/214296.html
+		new.ss_size = 32768
+	}
+	libcCall(unsafe.Pointer(funcPC(sigaltstack_trampoline)), unsafe.Pointer(&new))
+}
+func sigaltstack_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func raiseproc(sig uint32) {
+	libcCall(unsafe.Pointer(funcPC(raiseproc_trampoline)), unsafe.Pointer(&sig))
+}
+func raiseproc_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func setitimer(mode int32, new, old *itimerval) {
+	libcCall(unsafe.Pointer(funcPC(setitimer_trampoline)), unsafe.Pointer(&mode))
+}
+func setitimer_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, ndst uintptr) int32 {
+	return libcCall(unsafe.Pointer(funcPC(sysctl_trampoline)), unsafe.Pointer(&mib))
+}
+func sysctl_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func fcntl(fd, cmd, arg int32) int32 {
+	return libcCall(unsafe.Pointer(funcPC(fcntl_trampoline)), unsafe.Pointer(&fd))
+}
+func fcntl_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func kqueue() int32 {
+	v := libcCall(unsafe.Pointer(funcPC(kqueue_trampoline)), nil)
+	return v
+}
+func kqueue_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func kevent(kq int32, ch *keventt, nch int32, ev *keventt, nev int32, ts *timespec) int32 {
+	return libcCall(unsafe.Pointer(funcPC(kevent_trampoline)), unsafe.Pointer(&kq))
+}
+func kevent_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func pthread_mutex_init(m *pthreadmutex, attr *pthreadmutexattr) int32 {
+	return libcCall(unsafe.Pointer(funcPC(pthread_mutex_init_trampoline)), unsafe.Pointer(&m))
+}
+func pthread_mutex_init_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func pthread_mutex_lock(m *pthreadmutex) int32 {
+	return libcCall(unsafe.Pointer(funcPC(pthread_mutex_lock_trampoline)), unsafe.Pointer(&m))
+}
+func pthread_mutex_lock_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func pthread_mutex_unlock(m *pthreadmutex) int32 {
+	return libcCall(unsafe.Pointer(funcPC(pthread_mutex_unlock_trampoline)), unsafe.Pointer(&m))
+}
+func pthread_mutex_unlock_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func pthread_cond_init(c *pthreadcond, attr *pthreadcondattr) int32 {
+	return libcCall(unsafe.Pointer(funcPC(pthread_cond_init_trampoline)), unsafe.Pointer(&c))
+}
+func pthread_cond_init_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func pthread_cond_wait(c *pthreadcond, m *pthreadmutex) int32 {
+	return libcCall(unsafe.Pointer(funcPC(pthread_cond_wait_trampoline)), unsafe.Pointer(&c))
+}
+func pthread_cond_wait_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func pthread_cond_timedwait_relative_np(c *pthreadcond, m *pthreadmutex, t *timespec) int32 {
+	return libcCall(unsafe.Pointer(funcPC(pthread_cond_timedwait_relative_np_trampoline)), unsafe.Pointer(&c))
+}
+func pthread_cond_timedwait_relative_np_trampoline()
+
+//go:nosplit
+//go:cgo_unsafe_args
+func pthread_cond_signal(c *pthreadcond) int32 {
+	return libcCall(unsafe.Pointer(funcPC(pthread_cond_signal_trampoline)), unsafe.Pointer(&c))
+}
+func pthread_cond_signal_trampoline()
+
+// Not used on Darwin, but must be defined.
+func exitThread(wait *uint32) {
+}
+
+//go:nosplit
+func closeonexec(fd int32) {
+	fcntl(fd, _F_SETFD, _FD_CLOEXEC)
+}
+
+// Tell the linker that the libc_* functions are to be found
+// in a system library, with the libc_ prefix missing.
+
+//go:cgo_import_dynamic libc_pthread_attr_init pthread_attr_init "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_pthread_attr_setstacksize pthread_attr_setstacksize "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_pthread_attr_setdetachstate pthread_attr_setdetachstate "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_pthread_create pthread_create "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_exit exit "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_raise raise "/usr/lib/libSystem.B.dylib"
+
+//go:cgo_import_dynamic libc_open open "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_close close "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_read read "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_write write "/usr/lib/libSystem.B.dylib"
+
+//go:cgo_import_dynamic libc_mmap mmap "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_munmap munmap "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_madvise madvise "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_error __error "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_usleep usleep "/usr/lib/libSystem.B.dylib"
+
+//go:cgo_import_dynamic libc_mach_timebase_info mach_timebase_info "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_mach_absolute_time mach_absolute_time "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_gettimeofday gettimeofday "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_sigaction sigaction "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_pthread_sigmask pthread_sigmask "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_sigaltstack sigaltstack "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_getpid getpid "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_kill kill "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_setitimer setitimer "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_sysctl sysctl "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_fcntl fcntl "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_kqueue kqueue "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_kevent kevent "/usr/lib/libSystem.B.dylib"
+
+//go:cgo_import_dynamic libc_pthread_mutex_init pthread_mutex_init "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_pthread_mutex_lock pthread_mutex_lock "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_pthread_mutex_unlock pthread_mutex_unlock "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_pthread_cond_init pthread_cond_init "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_pthread_cond_wait pthread_cond_wait "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_pthread_cond_timedwait_relative_np pthread_cond_timedwait_relative_np "/usr/lib/libSystem.B.dylib"
+//go:cgo_import_dynamic libc_pthread_cond_signal pthread_cond_signal "/usr/lib/libSystem.B.dylib"
+
+// Magic incantation to get libSystem actually dynamically linked.
+// TODO: Why does the code require this?  See cmd/compile/internal/ld/go.go:210
+//go:cgo_import_dynamic _ _ "/usr/lib/libSystem.B.dylib"
diff --git a/libgo/go/runtime/sys_wasm.go b/libgo/go/runtime/sys_wasm.go
new file mode 100644
index 0000000..9bf710b
--- /dev/null
+++ b/libgo/go/runtime/sys_wasm.go
@@ -0,0 +1,42 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+	"runtime/internal/sys"
+	"unsafe"
+)
+
+type m0Stack struct {
+	_ [8192 * sys.StackGuardMultiplier]byte
+}
+
+var wasmStack m0Stack
+
+func wasmMove()
+
+func wasmZero()
+
+func wasmDiv()
+
+func wasmTruncS()
+func wasmTruncU()
+
+func wasmExit(code int32)
+
+// adjust Gobuf as it if executed a call to fn with context ctxt
+// and then did an immediate gosave.
+func gostartcall(buf *gobuf, fn, ctxt unsafe.Pointer) {
+	sp := buf.sp
+	if sys.RegSize > sys.PtrSize {
+		sp -= sys.PtrSize
+		*(*uintptr)(unsafe.Pointer(sp)) = 0
+	}
+	sp -= sys.PtrSize
+	*(*uintptr)(unsafe.Pointer(sp)) = buf.pc
+	buf.sp = sp
+	buf.pc = uintptr(fn)
+	buf.ctxt = ctxt
+}
diff --git a/libgo/go/runtime/testdata/testprog/abort.go b/libgo/go/runtime/testdata/testprog/abort.go
new file mode 100644
index 0000000..9e79d4d
--- /dev/null
+++ b/libgo/go/runtime/testdata/testprog/abort.go
@@ -0,0 +1,23 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import _ "unsafe" // for go:linkname
+
+func init() {
+	register("Abort", Abort)
+}
+
+//go:linkname runtimeAbort runtime.abort
+func runtimeAbort()
+
+func Abort() {
+	defer func() {
+		recover()
+		panic("BAD: recovered from abort")
+	}()
+	runtimeAbort()
+	println("BAD: after abort")
+}
diff --git a/libgo/go/runtime/testdata/testprog/numcpu_freebsd.go b/libgo/go/runtime/testdata/testprog/numcpu_freebsd.go
index 035c534..42ee154 100644
--- a/libgo/go/runtime/testdata/testprog/numcpu_freebsd.go
+++ b/libgo/go/runtime/testdata/testprog/numcpu_freebsd.go
@@ -9,12 +9,17 @@ import (
 	"fmt"
 	"os"
 	"os/exec"
+	"regexp"
 	"runtime"
 	"strconv"
 	"strings"
 	"syscall"
 )
 
+var (
+	cpuSetRE = regexp.MustCompile(`(\d,?)+`)
+)
+
 func init() {
 	register("FreeBSDNumCPU", FreeBSDNumCPU)
 	register("FreeBSDNumCPUHelper", FreeBSDNumCPUHelper)
@@ -105,8 +110,12 @@ func checkNCPU(list []string) error {
 		return fmt.Errorf("could not check against an empty CPU list")
 	}
 
+	cListString := cpuSetRE.FindString(listString)
+	if len(cListString) == 0 {
+		return fmt.Errorf("invalid cpuset output '%s'", listString)
+	}
 	// Launch FreeBSDNumCPUHelper() with specified CPUs list.
-	cmd := exec.Command("cpuset", "-l", listString, os.Args[0], "FreeBSDNumCPUHelper")
+	cmd := exec.Command("cpuset", "-l", cListString, os.Args[0], "FreeBSDNumCPUHelper")
 	cmdline := strings.Join(cmd.Args, " ")
 	output, err := cmd.CombinedOutput()
 	if err != nil {
@@ -120,7 +129,7 @@ func checkNCPU(list []string) error {
 		return fmt.Errorf("fail to parse output from child '%s', error: %s, output: %s", cmdline, err, output)
 	}
 	if n != len(list) {
-		return fmt.Errorf("runtime.NumCPU() expected to %d, got %d when run with CPU list %s", len(list), n, listString)
+		return fmt.Errorf("runtime.NumCPU() expected to %d, got %d when run with CPU list %s", len(list), n, cListString)
 	}
 	return nil
 }
diff --git a/libgo/go/runtime/testdata/testprog/timeprof.go b/libgo/go/runtime/testdata/testprog/timeprof.go
new file mode 100644
index 0000000..0702885
--- /dev/null
+++ b/libgo/go/runtime/testdata/testprog/timeprof.go
@@ -0,0 +1,46 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"fmt"
+	"io/ioutil"
+	"os"
+	"runtime/pprof"
+	"time"
+)
+
+func init() {
+	register("TimeProf", TimeProf)
+}
+
+func TimeProf() {
+	f, err := ioutil.TempFile("", "timeprof")
+	if err != nil {
+		fmt.Fprintln(os.Stderr, err)
+		os.Exit(2)
+	}
+
+	if err := pprof.StartCPUProfile(f); err != nil {
+		fmt.Fprintln(os.Stderr, err)
+		os.Exit(2)
+	}
+
+	t0 := time.Now()
+	// We should get a profiling signal 100 times a second,
+	// so running for 1/10 second should be sufficient.
+	for time.Since(t0) < time.Second/10 {
+	}
+
+	pprof.StopCPUProfile()
+
+	name := f.Name()
+	if err := f.Close(); err != nil {
+		fmt.Fprintln(os.Stderr, err)
+		os.Exit(2)
+	}
+
+	fmt.Println(name)
+}
diff --git a/libgo/go/runtime/testdata/testprog/traceback_ancestors.go b/libgo/go/runtime/testdata/testprog/traceback_ancestors.go
new file mode 100644
index 0000000..fe57c1c
--- /dev/null
+++ b/libgo/go/runtime/testdata/testprog/traceback_ancestors.go
@@ -0,0 +1,53 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"fmt"
+	"runtime"
+)
+
+func init() {
+	register("TracebackAncestors", TracebackAncestors)
+}
+
+const numGoroutines = 3
+const numFrames = 2
+
+func TracebackAncestors() {
+	w := make(chan struct{})
+	recurseThenCallGo(w, numGoroutines, numFrames)
+	<-w
+	printStack()
+	close(w)
+}
+
+func printStack() {
+	buf := make([]byte, 1024)
+	for {
+		n := runtime.Stack(buf, true)
+		if n < len(buf) {
+			fmt.Print(string(buf[:n]))
+			return
+		}
+		buf = make([]byte, 2*len(buf))
+	}
+}
+
+func recurseThenCallGo(w chan struct{}, frames int, goroutines int) {
+	if frames == 0 {
+		// Signal to TracebackAncestors that we are done recursing and starting goroutines.
+		w <- struct{}{}
+		<-w
+		return
+	}
+	if goroutines == 0 {
+		// Start the next goroutine now that there are no more recursions left
+		// for this current goroutine.
+		go recurseThenCallGo(w, frames-1, numFrames)
+		return
+	}
+	recurseThenCallGo(w, frames, goroutines-1)
+}
diff --git a/libgo/go/runtime/testdata/testprogcgo/bigstack_windows.go b/libgo/go/runtime/testdata/testprogcgo/bigstack_windows.go
new file mode 100644
index 0000000..f58fcf9
--- /dev/null
+++ b/libgo/go/runtime/testdata/testprogcgo/bigstack_windows.go
@@ -0,0 +1,27 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+/*
+typedef void callback(char*);
+extern void goBigStack1(char*);
+extern void bigStack(callback*);
+*/
+import "C"
+
+func init() {
+	register("BigStack", BigStack)
+}
+
+func BigStack() {
+	// Create a large thread stack and call back into Go to test
+	// if Go correctly determines the stack bounds.
+	C.bigStack((*C.callback)(C.goBigStack1))
+}
+
+//export goBigStack1
+func goBigStack1(x *C.char) {
+	println("OK")
+}
diff --git a/libgo/go/runtime/testdata/testprogcgo/raceprof.go b/libgo/go/runtime/testdata/testprogcgo/raceprof.go
index 466a367..0750ec1 100644
--- a/libgo/go/runtime/testdata/testprogcgo/raceprof.go
+++ b/libgo/go/runtime/testdata/testprogcgo/raceprof.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build linux,amd64
+// +build linux,amd64 freebsd,amd64
 // +build !gccgo
 
 package main
diff --git a/libgo/go/runtime/testdata/testprogcgo/racesig.go b/libgo/go/runtime/testdata/testprogcgo/racesig.go
index d0c1c3c..a079b3f 100644
--- a/libgo/go/runtime/testdata/testprogcgo/racesig.go
+++ b/libgo/go/runtime/testdata/testprogcgo/racesig.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build linux,amd64
+// +build linux,amd64 freebsd,amd64
 
 package main
 
diff --git a/libgo/go/runtime/time.go b/libgo/go/runtime/time.go
index b707590..a95d95b 100644
--- a/libgo/go/runtime/time.go
+++ b/libgo/go/runtime/time.go
@@ -98,8 +98,11 @@ func timeSleep(ns int64) {
 	t.arg = gp
 	tb := t.assignBucket()
 	lock(&tb.lock)
-	tb.addtimerLocked(t)
-	goparkunlock(&tb.lock, "sleep", traceEvGoSleep, 2)
+	if !tb.addtimerLocked(t) {
+		unlock(&tb.lock)
+		badTimer()
+	}
+	goparkunlock(&tb.lock, waitReasonSleep, traceEvGoSleep, 2)
 }
 
 // startTimer adds t to the timer heap.
@@ -128,14 +131,19 @@ func goroutineReady(arg interface{}, seq uintptr) {
 func addtimer(t *timer) {
 	tb := t.assignBucket()
 	lock(&tb.lock)
-	tb.addtimerLocked(t)
+	ok := tb.addtimerLocked(t)
 	unlock(&tb.lock)
+	if !ok {
+		badTimer()
+	}
 }
 
 // Add a timer to the heap and start or kick timerproc if the new timer is
 // earlier than any of the others.
 // Timers are locked.
-func (tb *timersBucket) addtimerLocked(t *timer) {
+// Returns whether all is well: false if the data structure is corrupt
+// due to user-level races.
+func (tb *timersBucket) addtimerLocked(t *timer) bool {
 	// when must never be negative; otherwise timerproc will overflow
 	// during its delta calculation and never expire other runtime timers.
 	if t.when < 0 {
@@ -143,7 +151,9 @@ func (tb *timersBucket) addtimerLocked(t *timer) {
 	}
 	t.i = len(tb.t)
 	tb.t = append(tb.t, t)
-	siftupTimer(tb.t, t.i)
+	if !siftupTimer(tb.t, t.i) {
+		return false
+	}
 	if t.i == 0 {
 		// siftup moved to top: new earliest deadline.
 		if tb.sleeping {
@@ -160,6 +170,7 @@ func (tb *timersBucket) addtimerLocked(t *timer) {
 		expectSystemGoroutine()
 		go timerproc(tb)
 	}
+	return true
 }
 
 // Delete timer t from the heap.
@@ -192,11 +203,19 @@ func deltimer(t *timer) bool {
 	}
 	tb.t[last] = nil
 	tb.t = tb.t[:last]
+	ok := true
 	if i != last {
-		siftupTimer(tb.t, i)
-		siftdownTimer(tb.t, i)
+		if !siftupTimer(tb.t, i) {
+			ok = false
+		}
+		if !siftdownTimer(tb.t, i) {
+			ok = false
+		}
 	}
 	unlock(&tb.lock)
+	if !ok {
+		badTimer()
+	}
 	return true
 }
 
@@ -222,10 +241,13 @@ func timerproc(tb *timersBucket) {
 			if delta > 0 {
 				break
 			}
+			ok := true
 			if t.period > 0 {
 				// leave in heap but adjust next time to fire
 				t.when += t.period * (1 + -delta/t.period)
-				siftdownTimer(tb.t, 0)
+				if !siftdownTimer(tb.t, 0) {
+					ok = false
+				}
 			} else {
 				// remove from heap
 				last := len(tb.t) - 1
@@ -236,7 +258,9 @@ func timerproc(tb *timersBucket) {
 				tb.t[last] = nil
 				tb.t = tb.t[:last]
 				if last > 0 {
-					siftdownTimer(tb.t, 0)
+					if !siftdownTimer(tb.t, 0) {
+						ok = false
+					}
 				}
 				t.i = -1 // mark as removed
 			}
@@ -244,6 +268,9 @@ func timerproc(tb *timersBucket) {
 			arg := t.arg
 			seq := t.seq
 			unlock(&tb.lock)
+			if !ok {
+				badTimer()
+			}
 			if raceenabled {
 				raceacquire(unsafe.Pointer(t))
 			}
@@ -253,7 +280,7 @@ func timerproc(tb *timersBucket) {
 		if delta < 0 || faketime > 0 {
 			// No timers left - put goroutine to sleep.
 			tb.rescheduling = true
-			goparkunlock(&tb.lock, "timer goroutine (idle)", traceEvGoBlock, 1)
+			goparkunlock(&tb.lock, waitReasonTimerGoroutineIdle, traceEvGoBlock, 1)
 			continue
 		}
 		// At least one timer pending. Sleep until then.
@@ -329,8 +356,20 @@ func timeSleepUntil() int64 {
 }
 
 // Heap maintenance algorithms.
-
-func siftupTimer(t []*timer, i int) {
+// These algorithms check for slice index errors manually.
+// Slice index error can happen if the program is using racy
+// access to timers. We don't want to panic here, because
+// it will cause the program to crash with a mysterious
+// "panic holding locks" message. Instead, we panic while not
+// holding a lock.
+// The races can occur despite the bucket locks because assignBucket
+// itself is called without locks, so racy calls can cause a timer to
+// change buckets while executing these functions.
+
+func siftupTimer(t []*timer, i int) bool {
+	if i >= len(t) {
+		return false
+	}
 	when := t[i].when
 	tmp := t[i]
 	for i > 0 {
@@ -346,10 +385,14 @@ func siftupTimer(t []*timer, i int) {
 		t[i] = tmp
 		t[i].i = i
 	}
+	return true
 }
 
-func siftdownTimer(t []*timer, i int) {
+func siftdownTimer(t []*timer, i int) bool {
 	n := len(t)
+	if i >= n {
+		return false
+	}
 	when := t[i].when
 	tmp := t[i]
 	for {
@@ -385,6 +428,15 @@ func siftdownTimer(t []*timer, i int) {
 		t[i] = tmp
 		t[i].i = i
 	}
+	return true
+}
+
+// badTimer is called if the timer data structures have been corrupted,
+// presumably due to racy use by the program. We panic here rather than
+// panicing due to invalid slice access while holding locks.
+// See issue #25686.
+func badTimer() {
+	panic(errorString("racy use of timers"))
 }
 
 // Entry points for net, time to call nanotime.
diff --git a/libgo/go/runtime/timeasm.go b/libgo/go/runtime/timeasm.go
index d5f5ea3..55b0d07 100644
--- a/libgo/go/runtime/timeasm.go
+++ b/libgo/go/runtime/timeasm.go
@@ -7,7 +7,7 @@
 // so that time.now and nanotime return the same monotonic clock readings.
 
 // +build ignore
-// +build darwin,amd64 darwin,386 windows
+// +build windows
 
 package runtime
 
diff --git a/libgo/go/runtime/timestub.go b/libgo/go/runtime/timestub.go
index 033734e..9f1d111 100644
--- a/libgo/go/runtime/timestub.go
+++ b/libgo/go/runtime/timestub.go
@@ -5,15 +5,12 @@
 // Declarations for operating systems implementing time.now
 // indirectly, in terms of walltime and nanotime assembly.
 
-// -build !darwin !amd64,!386
 // -build !windows
 
 package runtime
 
 import _ "unsafe" // for go:linkname
 
-func walltime() (sec int64, nsec int32)
-
 //go:linkname time_now time.now
 func time_now() (sec int64, nsec int32, mono int64) {
 	sec, nsec = walltime()
diff --git a/libgo/go/runtime/timestub2.go b/libgo/go/runtime/timestub2.go
new file mode 100644
index 0000000..9ddc6fe
--- /dev/null
+++ b/libgo/go/runtime/timestub2.go
@@ -0,0 +1,11 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !darwin
+// +build !windows
+// +build !freebsd
+
+package runtime
+
+func walltime() (sec int64, nsec int32)
diff --git a/libgo/go/runtime/trace.go b/libgo/go/runtime/trace.go
index e2bbb5d..7aed9a9 100644
--- a/libgo/go/runtime/trace.go
+++ b/libgo/go/runtime/trace.go
@@ -64,7 +64,14 @@ const (
 	traceEvGoBlockGC         = 42 // goroutine blocks on GC assist [timestamp, stack]
 	traceEvGCMarkAssistStart = 43 // GC mark assist start [timestamp, stack]
 	traceEvGCMarkAssistDone  = 44 // GC mark assist done [timestamp]
-	traceEvCount             = 45
+	traceEvUserTaskCreate    = 45 // trace.NewContext [timestamp, internal task id, internal parent task id, stack, name string]
+	traceEvUserTaskEnd       = 46 // end of a task [timestamp, internal task id, stack]
+	traceEvUserRegion        = 47 // trace.WithRegion [timestamp, internal task id, mode(0:start, 1:end), stack, name string]
+	traceEvUserLog           = 48 // trace.Log [timestamp, internal task id, key string id, stack, value string]
+	traceEvCount             = 49
+	// Byte is used but only 6 bits are available for event type.
+	// The remaining 2 bits are used to specify the number of arguments.
+	// That means, the max event type value is 63.
 )
 
 const (
@@ -121,11 +128,13 @@ var trace struct {
 
 	// Dictionary for traceEvString.
 	//
-	// Currently this is used only at trace setup and for
-	// func/file:line info after tracing session, so we assume
-	// single-threaded access.
-	strings   map[string]uint64
-	stringSeq uint64
+	// TODO: central lock to access the map is not ideal.
+	//   option: pre-assign ids to all user annotation region names and tags
+	//   option: per-P cache
+	//   option: sync.Map like data structure
+	stringsLock mutex
+	strings     map[string]uint64
+	stringSeq   uint64
 
 	// markWorkerLabels maps gcMarkWorkerMode to string ID.
 	markWorkerLabels [len(gcMarkWorkerModeStrings)]uint64
@@ -379,12 +388,12 @@ func ReadTrace() []byte {
 		trace.headerWritten = true
 		trace.lockOwner = nil
 		unlock(&trace.lock)
-		return []byte("go 1.10 trace\x00\x00\x00")
+		return []byte("go 1.11 trace\x00\x00\x00")
 	}
 	// Wait for new data.
 	if trace.fullHead == 0 && !trace.shutdown {
 		trace.reader.set(getg())
-		goparkunlock(&trace.lock, "trace reader (blocked)", traceEvGoBlock, 2)
+		goparkunlock(&trace.lock, waitReasonTraceReaderBlocked, traceEvGoBlock, 2)
 		lock(&trace.lock)
 	}
 	// Write a buffer.
@@ -507,12 +516,26 @@ func traceEvent(ev byte, skip int, args ...uint64) {
 	// so if we see trace.enabled == true now, we know it's true for the rest of the function.
 	// Exitsyscall can run even during stopTheWorld. The race with StartTrace/StopTrace
 	// during tracing in exitsyscall is resolved by locking trace.bufLock in traceLockBuffer.
+	//
+	// Note trace_userTaskCreate runs the same check.
 	if !trace.enabled && !mp.startingtrace {
 		traceReleaseBuffer(pid)
 		return
 	}
+
+	if skip > 0 {
+		if getg() == mp.curg {
+			skip++ // +1 because stack is captured in traceEventLocked.
+		}
+	}
+	traceEventLocked(0, mp, pid, bufp, ev, skip, args...)
+	traceReleaseBuffer(pid)
+}
+
+func traceEventLocked(extraBytes int, mp *m, pid int32, bufp *traceBufPtr, ev byte, skip int, args ...uint64) {
 	buf := (*bufp).ptr()
-	const maxSize = 2 + 5*traceBytesPerNumber // event type, length, sequence, timestamp, stack id and two add params
+	// TODO: test on non-zero extraBytes param.
+	maxSize := 2 + 5*traceBytesPerNumber + extraBytes // event type, length, sequence, timestamp, stack id and two add params
 	if buf == nil || len(buf.arr)-buf.pos < maxSize {
 		buf = traceFlush(traceBufPtrOf(buf), pid).ptr()
 		(*bufp).set(buf)
@@ -555,7 +578,6 @@ func traceEvent(ev byte, skip int, args ...uint64) {
 		// Fill in actual length.
 		*lenp = byte(evSize - 2)
 	}
-	traceReleaseBuffer(pid)
 }
 
 func traceStackID(mp *m, buf []location, skip int) uint64 {
@@ -636,7 +658,20 @@ func traceString(bufp *traceBufPtr, pid int32, s string) (uint64, *traceBufPtr)
 	if s == "" {
 		return 0, bufp
 	}
+
+	lock(&trace.stringsLock)
+	if raceenabled {
+		// raceacquire is necessary because the map access
+		// below is race annotated.
+		raceacquire(unsafe.Pointer(&trace.stringsLock))
+	}
+
 	if id, ok := trace.strings[s]; ok {
+		if raceenabled {
+			racerelease(unsafe.Pointer(&trace.stringsLock))
+		}
+		unlock(&trace.stringsLock)
+
 		return id, bufp
 	}
 
@@ -644,6 +679,11 @@ func traceString(bufp *traceBufPtr, pid int32, s string) (uint64, *traceBufPtr)
 	id := trace.stringSeq
 	trace.strings[s] = id
 
+	if raceenabled {
+		racerelease(unsafe.Pointer(&trace.stringsLock))
+	}
+	unlock(&trace.stringsLock)
+
 	// memory allocation in above may trigger tracing and
 	// cause *bufp changes. Following code now works with *bufp,
 	// so there must be no memory allocation or any activities
@@ -657,8 +697,16 @@ func traceString(bufp *traceBufPtr, pid int32, s string) (uint64, *traceBufPtr)
 	}
 	buf.byte(traceEvString)
 	buf.varint(id)
-	buf.varint(uint64(len(s)))
-	buf.pos += copy(buf.arr[buf.pos:], s)
+
+	// double-check the string and the length can fit.
+	// Otherwise, truncate the string.
+	slen := len(s)
+	if room := len(buf.arr) - buf.pos; room < slen+traceBytesPerNumber {
+		slen = room
+	}
+
+	buf.varint(uint64(slen))
+	buf.pos += copy(buf.arr[buf.pos:], s[:slen])
 
 	(*bufp).set(buf)
 	return id, bufp
@@ -1091,3 +1139,78 @@ func traceNextGC() {
 		traceEvent(traceEvNextGC, -1, memstats.next_gc)
 	}
 }
+
+// To access runtime functions from runtime/trace.
+// See runtime/trace/annotation.go
+
+//go:linkname trace_userTaskCreate runtime_trace.userTaskCreate
+func trace_userTaskCreate(id, parentID uint64, taskType string) {
+	if !trace.enabled {
+		return
+	}
+
+	// Same as in traceEvent.
+	mp, pid, bufp := traceAcquireBuffer()
+	if !trace.enabled && !mp.startingtrace {
+		traceReleaseBuffer(pid)
+		return
+	}
+
+	typeStringID, bufp := traceString(bufp, pid, taskType)
+	traceEventLocked(0, mp, pid, bufp, traceEvUserTaskCreate, 3, id, parentID, typeStringID)
+	traceReleaseBuffer(pid)
+}
+
+//go:linkname trace_userTaskEnd runtime_trace.userTaskEnd
+func trace_userTaskEnd(id uint64) {
+	traceEvent(traceEvUserTaskEnd, 2, id)
+}
+
+//go:linkname trace_userRegion runtime_trace.userRegion
+func trace_userRegion(id, mode uint64, name string) {
+	if !trace.enabled {
+		return
+	}
+
+	mp, pid, bufp := traceAcquireBuffer()
+	if !trace.enabled && !mp.startingtrace {
+		traceReleaseBuffer(pid)
+		return
+	}
+
+	nameStringID, bufp := traceString(bufp, pid, name)
+	traceEventLocked(0, mp, pid, bufp, traceEvUserRegion, 3, id, mode, nameStringID)
+	traceReleaseBuffer(pid)
+}
+
+//go:linkname trace_userLog runtime_trace.userLog
+func trace_userLog(id uint64, category, message string) {
+	if !trace.enabled {
+		return
+	}
+
+	mp, pid, bufp := traceAcquireBuffer()
+	if !trace.enabled && !mp.startingtrace {
+		traceReleaseBuffer(pid)
+		return
+	}
+
+	categoryID, bufp := traceString(bufp, pid, category)
+
+	extraSpace := traceBytesPerNumber + len(message) // extraSpace for the value string
+	traceEventLocked(extraSpace, mp, pid, bufp, traceEvUserLog, 3, id, categoryID)
+	// traceEventLocked reserved extra space for val and len(val)
+	// in buf, so buf now has room for the following.
+	buf := (*bufp).ptr()
+
+	// double-check the message and its length can fit.
+	// Otherwise, truncate the message.
+	slen := len(message)
+	if room := len(buf.arr) - buf.pos; room < slen+traceBytesPerNumber {
+		slen = room
+	}
+	buf.varint(uint64(slen))
+	buf.pos += copy(buf.arr[buf.pos:], message[:slen])
+
+	traceReleaseBuffer(pid)
+}
diff --git a/libgo/go/runtime/trace/annotation.go b/libgo/go/runtime/trace/annotation.go
new file mode 100644
index 0000000..3545ef3
--- /dev/null
+++ b/libgo/go/runtime/trace/annotation.go
@@ -0,0 +1,196 @@
+package trace
+
+import (
+	"context"
+	"fmt"
+	"sync/atomic"
+	_ "unsafe"
+)
+
+type traceContextKey struct{}
+
+// NewTask creates a task instance with the type taskType and returns
+// it along with a Context that carries the task.
+// If the input context contains a task, the new task is its subtask.
+//
+// The taskType is used to classify task instances. Analysis tools
+// like the Go execution tracer may assume there are only a bounded
+// number of unique task types in the system.
+//
+// The returned end function is used to mark the task's end.
+// The trace tool measures task latency as the time between task creation
+// and when the end function is called, and provides the latency
+// distribution per task type.
+// If the end function is called multiple times, only the first
+// call is used in the latency measurement.
+//
+//   ctx, task := trace.NewTask(ctx, "awesome task")
+//   trace.WithRegion(ctx, prepWork)
+//   // preparation of the task
+//   go func() {  // continue processing the task in a separate goroutine.
+//       defer task.End()
+//       trace.WithRegion(ctx, remainingWork)
+//   }
+func NewTask(pctx context.Context, taskType string) (ctx context.Context, task *Task) {
+	pid := fromContext(pctx).id
+	id := newID()
+	userTaskCreate(id, pid, taskType)
+	s := &Task{id: id}
+	return context.WithValue(pctx, traceContextKey{}, s), s
+
+	// We allocate a new task and the end function even when
+	// the tracing is disabled because the context and the detach
+	// function can be used across trace enable/disable boundaries,
+	// which complicates the problem.
+	//
+	// For example, consider the following scenario:
+	//   - trace is enabled.
+	//   - trace.WithRegion is called, so a new context ctx
+	//     with a new region is created.
+	//   - trace is disabled.
+	//   - trace is enabled again.
+	//   - trace APIs with the ctx is called. Is the ID in the task
+	//   a valid one to use?
+	//
+	// TODO(hyangah): reduce the overhead at least when
+	// tracing is disabled. Maybe the id can embed a tracing
+	// round number and ignore ids generated from previous
+	// tracing round.
+}
+
+func fromContext(ctx context.Context) *Task {
+	if s, ok := ctx.Value(traceContextKey{}).(*Task); ok {
+		return s
+	}
+	return &bgTask
+}
+
+// Task is a data type for tracing a user-defined, logical operation.
+type Task struct {
+	id uint64
+	// TODO(hyangah): record parent id?
+}
+
+// End marks the end of the operation represented by the Task.
+func (t *Task) End() {
+	userTaskEnd(t.id)
+}
+
+var lastTaskID uint64 = 0 // task id issued last time
+
+func newID() uint64 {
+	// TODO(hyangah): use per-P cache
+	return atomic.AddUint64(&lastTaskID, 1)
+}
+
+var bgTask = Task{id: uint64(0)}
+
+// Log emits a one-off event with the given category and message.
+// Category can be empty and the API assumes there are only a handful of
+// unique categories in the system.
+func Log(ctx context.Context, category, message string) {
+	id := fromContext(ctx).id
+	userLog(id, category, message)
+}
+
+// Logf is like Log, but the value is formatted using the specified format spec.
+func Logf(ctx context.Context, category, format string, args ...interface{}) {
+	if IsEnabled() {
+		// Ideally this should be just Log, but that will
+		// add one more frame in the stack trace.
+		id := fromContext(ctx).id
+		userLog(id, category, fmt.Sprintf(format, args...))
+	}
+}
+
+const (
+	regionStartCode = uint64(0)
+	regionEndCode   = uint64(1)
+)
+
+// WithRegion starts a region associated with its calling goroutine, runs fn,
+// and then ends the region. If the context carries a task, the region is
+// associated with the task. Otherwise, the region is attached to the background
+// task.
+//
+// The regionType is used to classify regions, so there should be only a
+// handful of unique region types.
+func WithRegion(ctx context.Context, regionType string, fn func()) {
+	// NOTE:
+	// WithRegion helps avoiding misuse of the API but in practice,
+	// this is very restrictive:
+	// - Use of WithRegion makes the stack traces captured from
+	//   region start and end are identical.
+	// - Refactoring the existing code to use WithRegion is sometimes
+	//   hard and makes the code less readable.
+	//     e.g. code block nested deep in the loop with various
+	//          exit point with return values
+	// - Refactoring the code to use this API with closure can
+	//   cause different GC behavior such as retaining some parameters
+	//   longer.
+	// This causes more churns in code than I hoped, and sometimes
+	// makes the code less readable.
+
+	id := fromContext(ctx).id
+	userRegion(id, regionStartCode, regionType)
+	defer userRegion(id, regionEndCode, regionType)
+	fn()
+}
+
+// StartRegion starts a region and returns a function for marking the
+// end of the region. The returned Region's End function must be called
+// from the same goroutine where the region was started.
+// Within each goroutine, regions must nest. That is, regions started
+// after this region must be ended before this region can be ended.
+// Recommended usage is
+//
+//     defer trace.StartRegion(ctx, "myTracedRegion").End()
+//
+func StartRegion(ctx context.Context, regionType string) *Region {
+	if !IsEnabled() {
+		return noopRegion
+	}
+	id := fromContext(ctx).id
+	userRegion(id, regionStartCode, regionType)
+	return &Region{id, regionType}
+}
+
+// Region is a region of code whose execution time interval is traced.
+type Region struct {
+	id         uint64
+	regionType string
+}
+
+var noopRegion = &Region{}
+
+// End marks the end of the traced code region.
+func (r *Region) End() {
+	if r == noopRegion {
+		return
+	}
+	userRegion(r.id, regionEndCode, r.regionType)
+}
+
+// IsEnabled returns whether tracing is enabled.
+// The information is advisory only. The tracing status
+// may have changed by the time this function returns.
+func IsEnabled() bool {
+	enabled := atomic.LoadInt32(&tracing.enabled)
+	return enabled == 1
+}
+
+//
+// Function bodies are defined in runtime/trace.go
+//
+
+// emits UserTaskCreate event.
+func userTaskCreate(id, parentID uint64, taskType string)
+
+// emits UserTaskEnd event.
+func userTaskEnd(id uint64)
+
+// emits UserRegion event.
+func userRegion(id, mode uint64, regionType string)
+
+// emits UserLog event.
+func userLog(id uint64, category, message string)
diff --git a/libgo/go/runtime/trace/annotation_test.go b/libgo/go/runtime/trace/annotation_test.go
new file mode 100644
index 0000000..71abbfc
--- /dev/null
+++ b/libgo/go/runtime/trace/annotation_test.go
@@ -0,0 +1,152 @@
+package trace_test
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"internal/trace"
+	"reflect"
+	. "runtime/trace"
+	"strings"
+	"sync"
+	"testing"
+)
+
+func BenchmarkStartRegion(b *testing.B) {
+	b.ReportAllocs()
+	ctx, task := NewTask(context.Background(), "benchmark")
+	defer task.End()
+
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			StartRegion(ctx, "region").End()
+		}
+	})
+}
+
+func BenchmarkNewTask(b *testing.B) {
+	b.ReportAllocs()
+	pctx, task := NewTask(context.Background(), "benchmark")
+	defer task.End()
+
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			_, task := NewTask(pctx, "task")
+			task.End()
+		}
+	})
+}
+
+func TestUserTaskRegion(t *testing.T) {
+	if IsEnabled() {
+		t.Skip("skipping because -test.trace is set")
+	}
+	bgctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	preExistingRegion := StartRegion(bgctx, "pre-existing region")
+
+	buf := new(bytes.Buffer)
+	if err := Start(buf); err != nil {
+		t.Fatalf("failed to start tracing: %v", err)
+	}
+
+	// Beginning of traced execution
+	var wg sync.WaitGroup
+	ctx, task := NewTask(bgctx, "task0") // EvUserTaskCreate("task0")
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		defer task.End() // EvUserTaskEnd("task0")
+
+		WithRegion(ctx, "region0", func() {
+			// EvUserRegionCreate("region0", start)
+			WithRegion(ctx, "region1", func() {
+				Log(ctx, "key0", "0123456789abcdef") // EvUserLog("task0", "key0", "0....f")
+			})
+			// EvUserRegion("region0", end)
+		})
+	}()
+
+	wg.Wait()
+
+	preExistingRegion.End()
+	postExistingRegion := StartRegion(bgctx, "post-existing region")
+
+	// End of traced execution
+	Stop()
+
+	postExistingRegion.End()
+
+	saveTrace(t, buf, "TestUserTaskRegion")
+	res, err := trace.Parse(buf, "")
+	if err == trace.ErrTimeOrder {
+		// golang.org/issues/16755
+		t.Skipf("skipping trace: %v", err)
+	}
+	if err != nil {
+		t.Fatalf("Parse failed: %v", err)
+	}
+
+	// Check whether we see all user annotation related records in order
+	type testData struct {
+		typ     byte
+		strs    []string
+		args    []uint64
+		setLink bool
+	}
+
+	var got []testData
+	tasks := map[uint64]string{}
+	for _, e := range res.Events {
+		t.Logf("%s", e)
+		switch e.Type {
+		case trace.EvUserTaskCreate:
+			taskName := e.SArgs[0]
+			got = append(got, testData{trace.EvUserTaskCreate, []string{taskName}, nil, e.Link != nil})
+			if e.Link != nil && e.Link.Type != trace.EvUserTaskEnd {
+				t.Errorf("Unexpected linked event %q->%q", e, e.Link)
+			}
+			tasks[e.Args[0]] = taskName
+		case trace.EvUserLog:
+			key, val := e.SArgs[0], e.SArgs[1]
+			taskName := tasks[e.Args[0]]
+			got = append(got, testData{trace.EvUserLog, []string{taskName, key, val}, nil, e.Link != nil})
+		case trace.EvUserTaskEnd:
+			taskName := tasks[e.Args[0]]
+			got = append(got, testData{trace.EvUserTaskEnd, []string{taskName}, nil, e.Link != nil})
+			if e.Link != nil && e.Link.Type != trace.EvUserTaskCreate {
+				t.Errorf("Unexpected linked event %q->%q", e, e.Link)
+			}
+		case trace.EvUserRegion:
+			taskName := tasks[e.Args[0]]
+			regionName := e.SArgs[0]
+			got = append(got, testData{trace.EvUserRegion, []string{taskName, regionName}, []uint64{e.Args[1]}, e.Link != nil})
+			if e.Link != nil && (e.Link.Type != trace.EvUserRegion || e.Link.SArgs[0] != regionName) {
+				t.Errorf("Unexpected linked event %q->%q", e, e.Link)
+			}
+		}
+	}
+	want := []testData{
+		{trace.EvUserTaskCreate, []string{"task0"}, nil, true},
+		{trace.EvUserRegion, []string{"task0", "region0"}, []uint64{0}, true},
+		{trace.EvUserRegion, []string{"task0", "region1"}, []uint64{0}, true},
+		{trace.EvUserLog, []string{"task0", "key0", "0123456789abcdef"}, nil, false},
+		{trace.EvUserRegion, []string{"task0", "region1"}, []uint64{1}, false},
+		{trace.EvUserRegion, []string{"task0", "region0"}, []uint64{1}, false},
+		{trace.EvUserTaskEnd, []string{"task0"}, nil, false},
+		//  Currently, pre-existing region is not recorded to avoid allocations.
+		//  {trace.EvUserRegion, []string{"", "pre-existing region"}, []uint64{1}, false},
+		{trace.EvUserRegion, []string{"", "post-existing region"}, []uint64{0}, false},
+	}
+	if !reflect.DeepEqual(got, want) {
+		pretty := func(data []testData) string {
+			var s strings.Builder
+			for _, d := range data {
+				s.WriteString(fmt.Sprintf("\t%+v\n", d))
+			}
+			return s.String()
+		}
+		t.Errorf("Got user region related events\n%+v\nwant:\n%+v", pretty(got), pretty(want))
+	}
+}
diff --git a/libgo/go/runtime/trace/trace.go b/libgo/go/runtime/trace/trace.go
index 439f998..7f9d72a 100644
--- a/libgo/go/runtime/trace/trace.go
+++ b/libgo/go/runtime/trace/trace.go
@@ -2,8 +2,10 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// Package trace contains facilities for programs to generate trace
-// for Go execution tracer.
+// Package trace contains facilities for programs to generate traces
+// for the Go execution tracer.
+//
+// Tracing runtime activities
 //
 // The execution trace captures a wide range of execution events such as
 // goroutine creation/blocking/unblocking, syscall enter/exit/block,
@@ -12,8 +14,6 @@
 // captured for most events. The generated trace can be interpreted
 // using `go tool trace`.
 //
-// Tracing a Go program
-//
 // Support for tracing tests and benchmarks built with the standard
 // testing package is built into `go test`. For example, the following
 // command runs the test in the current directory and writes the trace
@@ -25,24 +25,102 @@
 // support to a standalone program. See the Example that demonstrates
 // how to use this API to enable tracing.
 //
-// There is also a standard HTTP interface to profiling data. Adding the
-// following line will install handlers under the /debug/pprof/trace URL
-// to download live profiles:
+// There is also a standard HTTP interface to trace data. Adding the
+// following line will install a handler under the /debug/pprof/trace URL
+// to download a live trace:
 //
 //     import _ "net/http/pprof"
 //
-// See the net/http/pprof package for more details.
+// See the net/http/pprof package for more details about all of the
+// debug endpoints installed by this import.
+//
+// User annotation
+//
+// Package trace provides user annotation APIs that can be used to
+// log interesting events during execution.
+//
+// There are three types of user annotations: log messages, regions,
+// and tasks.
+//
+// Log emits a timestamped message to the execution trace along with
+// additional information such as the category of the message and
+// which goroutine called Log. The execution tracer provides UIs to filter
+// and group goroutines using the log category and the message supplied
+// in Log.
+//
+// A region is for logging a time interval during a goroutine's execution.
+// By definition, a region starts and ends in the same goroutine.
+// Regions can be nested to represent subintervals.
+// For example, the following code records four regions in the execution
+// trace to trace the durations of sequential steps in a cappuccino making
+// operation.
+//
+//   trace.WithRegion(ctx, "makeCappuccino", func() {
+//
+//      // orderID allows to identify a specific order
+//      // among many cappuccino order region records.
+//      trace.Log(ctx, "orderID", orderID)
+//
+//      trace.WithRegion(ctx, "steamMilk", steamMilk)
+//      trace.WithRegion(ctx, "extractCoffee", extractCoffee)
+//      trace.WithRegion(ctx, "mixMilkCoffee", mixMilkCoffee)
+//   })
+//
+// A task is a higher-level component that aids tracing of logical
+// operations such as an RPC request, an HTTP request, or an
+// interesting local operation which may require multiple goroutines
+// working together. Since tasks can involve multiple goroutines,
+// they are tracked via a context.Context object. NewTask creates
+// a new task and embeds it in the returned context.Context object.
+// Log messages and regions are attached to the task, if any, in the
+// Context passed to Log and WithRegion.
+//
+// For example, assume that we decided to froth milk, extract coffee,
+// and mix milk and coffee in separate goroutines. With a task,
+// the trace tool can identify the goroutines involved in a specific
+// cappuccino order.
+//
+//      ctx, task := trace.NewTask(ctx, "makeCappuccino")
+//      trace.Log(ctx, "orderID", orderID)
+//
+//      milk := make(chan bool)
+//      espresso := make(chan bool)
+//
+//      go func() {
+//              trace.WithRegion(ctx, "steamMilk", steamMilk)
+//              milk <- true
+//      }()
+//      go func() {
+//              trace.WithRegion(ctx, "extractCoffee", extractCoffee)
+//              espresso <- true
+//      }()
+//      go func() {
+//              defer task.End() // When assemble is done, the order is complete.
+//              <-espresso
+//              <-milk
+//              trace.WithRegion(ctx, "mixMilkCoffee", mixMilkCoffee)
+//      }()
+//
+//
+// The trace tool computes the latency of a task by measuring the
+// time between the task creation and the task end and provides
+// latency distributions for each task type found in the trace.
 package trace
 
 import (
 	"io"
 	"runtime"
+	"sync"
+	"sync/atomic"
 )
 
 // Start enables tracing for the current program.
 // While tracing, the trace will be buffered and written to w.
 // Start returns an error if tracing is already enabled.
 func Start(w io.Writer) error {
+	tracing.Lock()
+	defer tracing.Unlock()
+
 	if err := runtime.StartTrace(); err != nil {
 		return err
 	}
@@ -55,11 +133,21 @@ func Start(w io.Writer) error {
 			w.Write(data)
 		}
 	}()
+	atomic.StoreInt32(&tracing.enabled, 1)
 	return nil
 }
 
 // Stop stops the current tracing, if any.
 // Stop only returns after all the writes for the trace have completed.
 func Stop() {
+	tracing.Lock()
+	defer tracing.Unlock()
+	atomic.StoreInt32(&tracing.enabled, 0)
+
 	runtime.StopTrace()
 }
+
+var tracing struct {
+	sync.Mutex       // gate mutators (Start, Stop)
+	enabled    int32 // accessed via atomic
+}
diff --git a/libgo/go/runtime/trace/trace_stack_test.go b/libgo/go/runtime/trace/trace_stack_test.go
index 274cdf7..62c06e6 100644
--- a/libgo/go/runtime/trace/trace_stack_test.go
+++ b/libgo/go/runtime/trace/trace_stack_test.go
@@ -6,14 +6,17 @@ package trace_test
 
 import (
 	"bytes"
+	"fmt"
 	"internal/testenv"
 	"internal/trace"
 	"net"
 	"os"
 	"runtime"
 	. "runtime/trace"
+	"strings"
 	"sync"
 	"testing"
+	"text/tabwriter"
 	"time"
 )
 
@@ -21,7 +24,7 @@ import (
 // In particular that we strip bottom uninteresting frames like goexit,
 // top uninteresting frames (runtime guts).
 func TestTraceSymbolize(t *testing.T) {
-	testenv.MustHaveGoBuild(t)
+	skipTraceSymbolizeTestIfNecessary(t)
 
 	buf := new(bytes.Buffer)
 	if err := Start(buf); err != nil {
@@ -34,28 +37,28 @@ func TestTraceSymbolize(t *testing.T) {
 	// on a channel, in a select or otherwise. So we kick off goroutines
 	// that need to block first in the hope that while we are executing
 	// the rest of the test, they will block.
-	go func() {
+	go func() { // func1
 		select {}
 	}()
-	go func() {
+	go func() { // func2
 		var c chan int
 		c <- 0
 	}()
-	go func() {
+	go func() { // func3
 		var c chan int
 		<-c
 	}()
 	done1 := make(chan bool)
-	go func() {
+	go func() { // func4
 		<-done1
 	}()
 	done2 := make(chan bool)
-	go func() {
+	go func() { // func5
 		done2 <- true
 	}()
 	c1 := make(chan int)
 	c2 := make(chan int)
-	go func() {
+	go func() { // func6
 		select {
 		case <-c1:
 		case <-c2:
@@ -63,17 +66,17 @@ func TestTraceSymbolize(t *testing.T) {
 	}()
 	var mu sync.Mutex
 	mu.Lock()
-	go func() {
+	go func() { // func7
 		mu.Lock()
 		mu.Unlock()
 	}()
 	var wg sync.WaitGroup
 	wg.Add(1)
-	go func() {
+	go func() { // func8
 		wg.Wait()
 	}()
 	cv := sync.NewCond(&sync.Mutex{})
-	go func() {
+	go func() { // func9
 		cv.L.Lock()
 		cv.Wait()
 		cv.L.Unlock()
@@ -82,7 +85,7 @@ func TestTraceSymbolize(t *testing.T) {
 	if err != nil {
 		t.Fatalf("failed to listen: %v", err)
 	}
-	go func() {
+	go func() { // func10
 		c, err := ln.Accept()
 		if err != nil {
 			t.Errorf("failed to accept: %v", err)
@@ -97,7 +100,7 @@ func TestTraceSymbolize(t *testing.T) {
 	defer rp.Close()
 	defer wp.Close()
 	pipeReadDone := make(chan bool)
-	go func() {
+	go func() { // func11
 		var data [1]byte
 		rp.Read(data[:])
 		pipeReadDone <- true
@@ -125,14 +128,16 @@ func TestTraceSymbolize(t *testing.T) {
 	wp.Write(data[:])
 	<-pipeReadDone
 
+	oldGoMaxProcs := runtime.GOMAXPROCS(0)
+	runtime.GOMAXPROCS(oldGoMaxProcs + 1)
+
 	Stop()
+
+	runtime.GOMAXPROCS(oldGoMaxProcs)
+
 	events, _ := parseTrace(t, buf)
 
 	// Now check that the stacks are correct.
-	type frame struct {
-		Fn   string
-		Line int
-	}
 	type eventDesc struct {
 		Type byte
 		Stk  []frame
@@ -140,90 +145,96 @@ func TestTraceSymbolize(t *testing.T) {
 	want := []eventDesc{
 		{trace.EvGCStart, []frame{
 			{"runtime.GC", 0},
-			{"runtime/trace_test.TestTraceSymbolize", 107},
+			{"runtime/trace_test.TestTraceSymbolize", 0},
 			{"testing.tRunner", 0},
 		}},
 		{trace.EvGoStart, []frame{
-			{"runtime/trace_test.TestTraceSymbolize.func1", 37},
+			{"runtime/trace_test.TestTraceSymbolize.func1", 0},
 		}},
 		{trace.EvGoSched, []frame{
-			{"runtime/trace_test.TestTraceSymbolize", 108},
+			{"runtime/trace_test.TestTraceSymbolize", 111},
 			{"testing.tRunner", 0},
 		}},
 		{trace.EvGoCreate, []frame{
-			{"runtime/trace_test.TestTraceSymbolize", 37},
+			{"runtime/trace_test.TestTraceSymbolize", 40},
 			{"testing.tRunner", 0},
 		}},
 		{trace.EvGoStop, []frame{
 			{"runtime.block", 0},
-			{"runtime/trace_test.TestTraceSymbolize.func1", 38},
+			{"runtime/trace_test.TestTraceSymbolize.func1", 0},
 		}},
 		{trace.EvGoStop, []frame{
 			{"runtime.chansend1", 0},
-			{"runtime/trace_test.TestTraceSymbolize.func2", 42},
+			{"runtime/trace_test.TestTraceSymbolize.func2", 0},
 		}},
 		{trace.EvGoStop, []frame{
 			{"runtime.chanrecv1", 0},
-			{"runtime/trace_test.TestTraceSymbolize.func3", 46},
+			{"runtime/trace_test.TestTraceSymbolize.func3", 0},
 		}},
 		{trace.EvGoBlockRecv, []frame{
 			{"runtime.chanrecv1", 0},
-			{"runtime/trace_test.TestTraceSymbolize.func4", 50},
+			{"runtime/trace_test.TestTraceSymbolize.func4", 0},
 		}},
 		{trace.EvGoUnblock, []frame{
 			{"runtime.chansend1", 0},
-			{"runtime/trace_test.TestTraceSymbolize", 110},
+			{"runtime/trace_test.TestTraceSymbolize", 113},
 			{"testing.tRunner", 0},
 		}},
 		{trace.EvGoBlockSend, []frame{
 			{"runtime.chansend1", 0},
-			{"runtime/trace_test.TestTraceSymbolize.func5", 54},
+			{"runtime/trace_test.TestTraceSymbolize.func5", 0},
 		}},
 		{trace.EvGoUnblock, []frame{
 			{"runtime.chanrecv1", 0},
-			{"runtime/trace_test.TestTraceSymbolize", 111},
+			{"runtime/trace_test.TestTraceSymbolize", 114},
 			{"testing.tRunner", 0},
 		}},
 		{trace.EvGoBlockSelect, []frame{
 			{"runtime.selectgo", 0},
-			{"runtime/trace_test.TestTraceSymbolize.func6", 59},
+			{"runtime/trace_test.TestTraceSymbolize.func6", 0},
 		}},
 		{trace.EvGoUnblock, []frame{
 			{"runtime.selectgo", 0},
-			{"runtime/trace_test.TestTraceSymbolize", 112},
+			{"runtime/trace_test.TestTraceSymbolize", 115},
 			{"testing.tRunner", 0},
 		}},
 		{trace.EvGoBlockSync, []frame{
 			{"sync.(*Mutex).Lock", 0},
-			{"runtime/trace_test.TestTraceSymbolize.func7", 67},
+			{"runtime/trace_test.TestTraceSymbolize.func7", 0},
 		}},
 		{trace.EvGoUnblock, []frame{
 			{"sync.(*Mutex).Unlock", 0},
-			{"runtime/trace_test.TestTraceSymbolize", 116},
+			{"runtime/trace_test.TestTraceSymbolize", 0},
 			{"testing.tRunner", 0},
 		}},
 		{trace.EvGoBlockSync, []frame{
 			{"sync.(*WaitGroup).Wait", 0},
-			{"runtime/trace_test.TestTraceSymbolize.func8", 73},
+			{"runtime/trace_test.TestTraceSymbolize.func8", 0},
 		}},
 		{trace.EvGoUnblock, []frame{
 			{"sync.(*WaitGroup).Add", 0},
 			{"sync.(*WaitGroup).Done", 0},
-			{"runtime/trace_test.TestTraceSymbolize", 117},
+			{"runtime/trace_test.TestTraceSymbolize", 120},
 			{"testing.tRunner", 0},
 		}},
 		{trace.EvGoBlockCond, []frame{
 			{"sync.(*Cond).Wait", 0},
-			{"runtime/trace_test.TestTraceSymbolize.func9", 78},
+			{"runtime/trace_test.TestTraceSymbolize.func9", 0},
 		}},
 		{trace.EvGoUnblock, []frame{
 			{"sync.(*Cond).Signal", 0},
-			{"runtime/trace_test.TestTraceSymbolize", 118},
+			{"runtime/trace_test.TestTraceSymbolize", 0},
 			{"testing.tRunner", 0},
 		}},
 		{trace.EvGoSleep, []frame{
 			{"time.Sleep", 0},
-			{"runtime/trace_test.TestTraceSymbolize", 109},
+			{"runtime/trace_test.TestTraceSymbolize", 0},
+			{"testing.tRunner", 0},
+		}},
+		{trace.EvGomaxprocs, []frame{
+			{"runtime.startTheWorld", 0}, // this is when the current gomaxprocs is logged.
+			{"runtime.GOMAXPROCS", 0},
+			{"runtime/trace_test.TestTraceSymbolize", 0},
 			{"testing.tRunner", 0},
 		}},
 	}
@@ -235,7 +246,7 @@ func TestTraceSymbolize(t *testing.T) {
 				{"net.(*netFD).accept", 0},
 				{"net.(*TCPListener).accept", 0},
 				{"net.(*TCPListener).Accept", 0},
-				{"runtime/trace_test.TestTraceSymbolize.func10", 86},
+				{"runtime/trace_test.TestTraceSymbolize.func10", 0},
 			}},
 			{trace.EvGoSysCall, []frame{
 				{"syscall.read", 0},
@@ -243,7 +254,7 @@ func TestTraceSymbolize(t *testing.T) {
 				{"internal/poll.(*FD).Read", 0},
 				{"os.(*File).read", 0},
 				{"os.(*File).Read", 0},
-				{"runtime/trace_test.TestTraceSymbolize.func11", 102},
+				{"runtime/trace_test.TestTraceSymbolize.func11", 0},
 			}},
 		}...)
 	}
@@ -264,22 +275,57 @@ func TestTraceSymbolize(t *testing.T) {
 			matched[i] = true
 		}
 	}
-	for i, m := range matched {
-		if m {
+	for i, w := range want {
+		if matched[i] {
 			continue
 		}
-		w := want[i]
-		t.Errorf("did not match event %v at %v:%v", trace.EventDescriptions[w.Type].Name, w.Stk[0].Fn, w.Stk[0].Line)
-		t.Errorf("seen the following events of this type:")
-		for _, ev := range events {
-			if ev.Type != w.Type {
-				continue
-			}
-			for _, f := range ev.Stk {
-				t.Logf("  %v :: %s:%v", f.Fn, f.File, f.Line)
+		seen, n := dumpEventStacks(w.Type, events)
+		t.Errorf("Did not match event %v with stack\n%s\nSeen %d events of the type\n%s",
+			trace.EventDescriptions[w.Type].Name, dumpFrames(w.Stk), n, seen)
+	}
+}
+
+func skipTraceSymbolizeTestIfNecessary(t *testing.T) {
+	testenv.MustHaveGoBuild(t)
+	if IsEnabled() {
+		t.Skip("skipping because -test.trace is set")
+	}
+}
+
+func dumpEventStacks(typ byte, events []*trace.Event) ([]byte, int) {
+	matched := 0
+	o := new(bytes.Buffer)
+	tw := tabwriter.NewWriter(o, 0, 8, 0, '\t', 0)
+	for _, ev := range events {
+		if ev.Type != typ {
+			continue
+		}
+		matched++
+		fmt.Fprintf(tw, "Offset %d\n", ev.Off)
+		for _, f := range ev.Stk {
+			fname := f.File
+			if idx := strings.Index(fname, "/go/src/"); idx > 0 {
+				fname = fname[idx:]
 			}
-			t.Logf("---")
+			fmt.Fprintf(tw, "  %v\t%s:%d\n", f.Fn, fname, f.Line)
 		}
-		t.Logf("======")
 	}
+	tw.Flush()
+	return o.Bytes(), matched
+}
+
+type frame struct {
+	Fn   string
+	Line int
+}
+
+func dumpFrames(frames []frame) []byte {
+	o := new(bytes.Buffer)
+	tw := tabwriter.NewWriter(o, 0, 8, 0, '\t', 0)
+
+	for _, f := range frames {
+		fmt.Fprintf(tw, "  %v\t :%d\n", f.Fn, f.Line)
+	}
+	tw.Flush()
+	return o.Bytes()
 }
diff --git a/libgo/go/runtime/trace/trace_test.go b/libgo/go/runtime/trace/trace_test.go
index 997d486..fc81abc 100644
--- a/libgo/go/runtime/trace/trace_test.go
+++ b/libgo/go/runtime/trace/trace_test.go
@@ -31,6 +31,9 @@ func TestEventBatch(t *testing.T) {
 	if race.Enabled {
 		t.Skip("skipping in race mode")
 	}
+	if IsEnabled() {
+		t.Skip("skipping because -test.trace is set")
+	}
 	if testing.Short() {
 		t.Skip("skipping in short mode")
 	}
@@ -81,6 +84,9 @@ func TestEventBatch(t *testing.T) {
 }
 
 func TestTraceStartStop(t *testing.T) {
+	if IsEnabled() {
+		t.Skip("skipping because -test.trace is set")
+	}
 	buf := new(bytes.Buffer)
 	if err := Start(buf); err != nil {
 		t.Fatalf("failed to start tracing: %v", err)
@@ -98,6 +104,9 @@ func TestTraceStartStop(t *testing.T) {
 }
 
 func TestTraceDoubleStart(t *testing.T) {
+	if IsEnabled() {
+		t.Skip("skipping because -test.trace is set")
+	}
 	Stop()
 	buf := new(bytes.Buffer)
 	if err := Start(buf); err != nil {
@@ -111,6 +120,9 @@ func TestTraceDoubleStart(t *testing.T) {
 }
 
 func TestTrace(t *testing.T) {
+	if IsEnabled() {
+		t.Skip("skipping because -test.trace is set")
+	}
 	buf := new(bytes.Buffer)
 	if err := Start(buf); err != nil {
 		t.Fatalf("failed to start tracing: %v", err)
@@ -168,6 +180,12 @@ func testBrokenTimestamps(t *testing.T, data []byte) {
 }
 
 func TestTraceStress(t *testing.T) {
+	if runtime.GOOS == "js" {
+		t.Skip("no os.Pipe on js")
+	}
+	if IsEnabled() {
+		t.Skip("skipping because -test.trace is set")
+	}
 	var wg sync.WaitGroup
 	done := make(chan bool)
 
@@ -307,6 +325,12 @@ func TestTraceStress(t *testing.T) {
 // Do a bunch of various stuff (timers, GC, network, etc) in a separate goroutine.
 // And concurrently with all that start/stop trace 3 times.
 func TestTraceStressStartStop(t *testing.T) {
+	if runtime.GOOS == "js" {
+		t.Skip("no os.Pipe on js")
+	}
+	if IsEnabled() {
+		t.Skip("skipping because -test.trace is set")
+	}
 	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(8))
 	outerDone := make(chan bool)
 
@@ -454,6 +478,9 @@ func TestTraceStressStartStop(t *testing.T) {
 }
 
 func TestTraceFutileWakeup(t *testing.T) {
+	if IsEnabled() {
+		t.Skip("skipping because -test.trace is set")
+	}
 	buf := new(bytes.Buffer)
 	if err := Start(buf); err != nil {
 		t.Fatalf("failed to start tracing: %v", err)
diff --git a/libgo/go/runtime/traceback_gccgo.go b/libgo/go/runtime/traceback_gccgo.go
index 9456b1f..e97071e 100644
--- a/libgo/go/runtime/traceback_gccgo.go
+++ b/libgo/go/runtime/traceback_gccgo.go
@@ -141,8 +141,8 @@ func goroutineheader(gp *g) {
 	}
 
 	// Override.
-	if gpstatus == _Gwaiting && gp.waitreason != "" {
-		status = gp.waitreason
+	if gpstatus == _Gwaiting && gp.waitreason != waitReasonZero {
+		status = gp.waitreason.String()
 	}
 
 	// approx time the G is blocked, in minutes
diff --git a/libgo/go/runtime/type.go b/libgo/go/runtime/type.go
index 3c08f7e..8fd38c3 100644
--- a/libgo/go/runtime/type.go
+++ b/libgo/go/runtime/type.go
@@ -20,12 +20,30 @@ type _type struct {
 	hashfn  func(unsafe.Pointer, uintptr) uintptr
 	equalfn func(unsafe.Pointer, unsafe.Pointer) bool
 
-	gcdata *byte
-	string *string
+	gcdata  *byte
+	_string *string
 	*uncommontype
 	ptrToThis *_type
 }
 
+func (t *_type) string() string {
+	return *t._string
+}
+
+// pkgpath returns the path of the package where t was defined, if
+// available. This is not the same as the reflect package's PkgPath
+// method, in that it returns the package path for struct and interface
+// types, not just named types.
+func (t *_type) pkgpath() string {
+	if u := t.uncommontype; u != nil {
+		if u.pkgPath == nil {
+			return ""
+		}
+		return *u.pkgPath
+	}
+	return ""
+}
+
 // Return whether two type descriptors are equal.
 // This is gccgo-specific, as gccgo, unlike gc, permits multiple
 // independent descriptors for a single type.
@@ -38,7 +56,7 @@ func eqtype(t1, t2 *_type) bool {
 	case t1.kind != t2.kind || t1.hash != t2.hash:
 		return false
 	default:
-		return *t1.string == *t2.string
+		return t1.string() == t2.string()
 	}
 }
 
diff --git a/libgo/go/runtime/unaligned1.go b/libgo/go/runtime/unaligned1.go
index 2f5b63a..86e0df0 100644
--- a/libgo/go/runtime/unaligned1.go
+++ b/libgo/go/runtime/unaligned1.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build 386 amd64 amd64p32 arm64 ppc64 ppc64le s390x ppc s390 arm64be riscv64
+// +build 386 amd64 amd64p32 arm64 ppc64 ppc64le s390x wasm ppc s390 arm64be riscv64
 
 package runtime
 
diff --git a/libgo/go/runtime/utf8.go b/libgo/go/runtime/utf8.go
index e845451..0ba0dad 100644
--- a/libgo/go/runtime/utf8.go
+++ b/libgo/go/runtime/utf8.go
@@ -46,6 +46,15 @@ const (
 	hicb = 0xBF // 1011 1111
 )
 
+// countrunes returns the number of runes in s.
+func countrunes(s string) int {
+	n := 0
+	for range s {
+		n++
+	}
+	return n
+}
+
 // decoderune returns the non-ASCII rune at the start of
 // s[k:] and the index after the rune in s.
 //
author	Ian Lance Taylor <iant@golang.org>	2018-09-24 21:46:21 +0000
committer	Ian Lance Taylor <ian@gcc.gnu.org>	2018-09-24 21:46:21 +0000
commit	dd931d9b48647e898dc80927c532ae93cc09e192 (patch)
tree	71be2295cd79b8a182f6130611658db8628772d5 /libgo/go/runtime
parent	779d8a5ad09b01428726ea5a0e6c87bd9ac3c0e4 (diff)
download	gcc-dd931d9b48647e898dc80927c532ae93cc09e192.zip gcc-dd931d9b48647e898dc80927c532ae93cc09e192.tar.gz gcc-dd931d9b48647e898dc80927c532ae93cc09e192.tar.bz2