libgo: update to Go 1.8 release candidate 1

Compiler changes: * Change map assignment to use mapassign and assign value directly. * Change string iteration to use decoderune, faster for ASCII strings. * Change makeslice to take int, and use makeslice64 for larger values. * Add new noverflow field to hmap struct used for maps. Unresolved problems, to be fixed later: * Commented out test in go/types/sizes_test.go that doesn't compile. * Commented out reflect.TestStructOf test for padding after zero-sized field. Reviewed-on: https://go-review.googlesource.com/35231 gotools/: Updates for Go 1.8rc1. * Makefile.am (go_cmd_go_files): Add bug.go. (s-zdefaultcc): Write defaultPkgConfig. * Makefile.in: Rebuild. From-SVN: r244456
author: Ian Lance Taylor <iant@golang.org> 2017-01-14 00:05:42 +0000
committer: Ian Lance Taylor <ian@gcc.gnu.org> 2017-01-14 00:05:42 +0000
commit: c2047754c300b68c05d65faa8dc2925fe67b71b4 (patch)
tree: e183ae81a1f48a02945cb6de463a70c5be1b06f6 /libgo/go/runtime
parent: 829afb8f05602bb31c9c597b24df7377fed4f059 (diff)
download: gcc-c2047754c300b68c05d65faa8dc2925fe67b71b4.zip
gcc-c2047754c300b68c05d65faa8dc2925fe67b71b4.tar.gz
gcc-c2047754c300b68c05d65faa8dc2925fe67b71b4.tar.bz2
95 files changed, 5141 insertions, 1845 deletions
diff --git a/libgo/go/runtime/alg.go b/libgo/go/runtime/alg.go
index 426b7f6..4946269 100644
--- a/libgo/go/runtime/alg.go
+++ b/libgo/go/runtime/alg.go
@@ -103,7 +103,7 @@ func f32hash(p unsafe.Pointer, h uintptr) uintptr {
 	case f == 0:
 		return c1 * (c0 ^ h) // +0, -0
 	case f != f:
-		return c1 * (c0 ^ h ^ uintptr(fastrand1())) // any kind of NaN
+		return c1 * (c0 ^ h ^ uintptr(fastrand())) // any kind of NaN
 	default:
 		return memhash(p, h, 4)
 	}
@@ -115,7 +115,7 @@ func f64hash(p unsafe.Pointer, h uintptr) uintptr {
 	case f == 0:
 		return c1 * (c0 ^ h) // +0, -0
 	case f != f:
-		return c1 * (c0 ^ h ^ uintptr(fastrand1())) // any kind of NaN
+		return c1 * (c0 ^ h ^ uintptr(fastrand())) // any kind of NaN
 	default:
 		return memhash(p, h, 8)
 	}
@@ -300,15 +300,6 @@ func efacevaleq(x eface, t *_type, p unsafe.Pointer) bool {
 	return eq(x.data, p)
 }
 
-func eqstring(x, y string) bool {
-	a := stringStructOf(&x)
-	b := stringStructOf(&y)
-	if a.len != b.len {
-		return false
-	}
-	return memcmp(unsafe.Pointer(a.str), unsafe.Pointer(b.str), uintptr(a.len)) == 0
-}
-
 func cmpstring(x, y string) int {
 	a := stringStructOf(&x)
 	b := stringStructOf(&y)
diff --git a/libgo/go/runtime/append_test.go b/libgo/go/runtime/append_test.go
index 6b8968e..6bd8f3b 100644
--- a/libgo/go/runtime/append_test.go
+++ b/libgo/go/runtime/append_test.go
@@ -100,6 +100,22 @@ func BenchmarkAppendSlice(b *testing.B) {
 	}
 }
 
+var (
+	blackhole []byte
+)
+
+func BenchmarkAppendSliceLarge(b *testing.B) {
+	for _, length := range []int{1 << 10, 4 << 10, 16 << 10, 64 << 10, 256 << 10, 1024 << 10} {
+		y := make([]byte, length)
+		b.Run(fmt.Sprint(length, "Bytes"), func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				blackhole = nil
+				blackhole = append(blackhole, y...)
+			}
+		})
+	}
+}
+
 func BenchmarkAppendStr(b *testing.B) {
 	for _, str := range []string{
 		"1",
diff --git a/libgo/go/runtime/cgocheck.go b/libgo/go/runtime/cgocheck.go
index 7e38d9c..fec3646 100644
--- a/libgo/go/runtime/cgocheck.go
+++ b/libgo/go/runtime/cgocheck.go
@@ -110,7 +110,7 @@ func cgoCheckTypedBlock(typ *_type, src unsafe.Pointer, off, size uintptr) {
 	}
 
 	// The type has a GC program. Try to find GC bits somewhere else.
-	for datap := &firstmoduledata; datap != nil; datap = datap.next {
+	for _, datap := range activeModules() {
 		if cgoInRange(src, datap.data, datap.edata) {
 			doff := uintptr(src) - datap.data
 			cgoCheckBits(add(src, -doff), datap.gcdatamask.bytedata, off+doff, size)
@@ -125,7 +125,7 @@ func cgoCheckTypedBlock(typ *_type, src unsafe.Pointer, off, size uintptr) {
 
 	aoff := uintptr(src) - mheap_.arena_start
 	idx := aoff >> _PageShift
-	s := h_spans[idx]
+	s := mheap_.spans[idx]
 	if s.state == _MSpanStack {
 		// There are no heap bits for value stored on the stack.
 		// For a channel receive src might be on the stack of some
diff --git a/libgo/go/runtime/chan.go b/libgo/go/runtime/chan.go
index eb2cad6..a9574dd 100644
--- a/libgo/go/runtime/chan.go
+++ b/libgo/go/runtime/chan.go
@@ -7,10 +7,16 @@ package runtime
 // This file contains the implementation of Go channels.
 
 // Invariants:
-//  At least one of c.sendq and c.recvq is empty.
+//  At least one of c.sendq and c.recvq is empty,
+//  except for the case of an unbuffered channel with a single goroutine
+//  blocked on it for both sending and receiving using a select statement,
+//  in which case the length of c.sendq and c.recvq is limited only by the
+//  size of the select statement.
+//
 // For buffered channels, also:
 //  c.qcount > 0 implies that c.recvq is empty.
 //  c.qcount < c.dataqsiz implies that c.sendq is empty.
+
 import (
 	"runtime/internal/atomic"
 	"unsafe"
@@ -290,23 +296,34 @@ func send(c *hchan, sg *sudog, ep unsafe.Pointer, unlockf func()) {
 	goready(gp, 4)
 }
 
+// Sends and receives on unbuffered or empty-buffered channels are the
+// only operations where one running goroutine writes to the stack of
+// another running goroutine. The GC assumes that stack writes only
+// happen when the goroutine is running and are only done by that
+// goroutine. Using a write barrier is sufficient to make up for
+// violating that assumption, but the write barrier has to work.
+// typedmemmove will call bulkBarrierPreWrite, but the target bytes
+// are not in the heap, so that will not help. We arrange to call
+// memmove and typeBitsBulkBarrier instead.
+
 func sendDirect(t *_type, sg *sudog, src unsafe.Pointer) {
-	// Send on an unbuffered or empty-buffered channel is the only operation
-	// in the entire runtime where one goroutine
-	// writes to the stack of another goroutine. The GC assumes that
-	// stack writes only happen when the goroutine is running and are
-	// only done by that goroutine. Using a write barrier is sufficient to
-	// make up for violating that assumption, but the write barrier has to work.
-	// typedmemmove will call heapBitsBulkBarrier, but the target bytes
-	// are not in the heap, so that will not help. We arrange to call
-	// memmove and typeBitsBulkBarrier instead.
+	// src is on our stack, dst is a slot on another stack.
 
 	// Once we read sg.elem out of sg, it will no longer
 	// be updated if the destination's stack gets copied (shrunk).
 	// So make sure that no preemption points can happen between read & use.
 	dst := sg.elem
+	typeBitsBulkBarrier(t, uintptr(dst), uintptr(src), t.size)
+	memmove(dst, src, t.size)
+}
+
+func recvDirect(t *_type, sg *sudog, dst unsafe.Pointer) {
+	// dst is on our stack or the heap, src is on another stack.
+	// The channel is locked, so src will not move during this
+	// operation.
+	src := sg.elem
+	typeBitsBulkBarrier(t, uintptr(dst), uintptr(src), t.size)
 	memmove(dst, src, t.size)
-	typeBitsBulkBarrier(t, uintptr(dst), t.size)
 }
 
 func closechan(c *hchan) {
@@ -337,7 +354,7 @@ func closechan(c *hchan) {
 			break
 		}
 		if sg.elem != nil {
-			memclr(sg.elem, uintptr(c.elemsize))
+			typedmemclr(c.elemtype, sg.elem)
 			sg.elem = nil
 		}
 		if sg.releasetime != 0 {
@@ -446,7 +463,7 @@ func chanrecv(t *chantype, c *hchan, ep unsafe.Pointer, block bool) (selected, r
 		}
 		unlock(&c.lock)
 		if ep != nil {
-			memclr(ep, uintptr(c.elemsize))
+			typedmemclr(c.elemtype, ep)
 		}
 		return true, false
 	}
@@ -470,7 +487,7 @@ func chanrecv(t *chantype, c *hchan, ep unsafe.Pointer, block bool) (selected, r
 		if ep != nil {
 			typedmemmove(c.elemtype, ep, qp)
 		}
-		memclr(qp, uintptr(c.elemsize))
+		typedmemclr(c.elemtype, qp)
 		c.recvx++
 		if c.recvx == c.dataqsiz {
 			c.recvx = 0
@@ -539,9 +556,7 @@ func recv(c *hchan, sg *sudog, ep unsafe.Pointer, unlockf func()) {
 		}
 		if ep != nil {
 			// copy data from sender
-			// ep points to our own stack or heap, so nothing
-			// special (ala sendDirect) needed here.
-			typedmemmove(c.elemtype, ep, sg.elem)
+			recvDirect(c.elemtype, sg, ep)
 		}
 	} else {
 		// Queue is full. Take the item at the
diff --git a/libgo/go/runtime/chan_test.go b/libgo/go/runtime/chan_test.go
index 4bd061d..b96af8a 100644
--- a/libgo/go/runtime/chan_test.go
+++ b/libgo/go/runtime/chan_test.go
@@ -215,11 +215,14 @@ func TestNonblockRecvRace(t *testing.T) {
 			select {
 			case <-c:
 			default:
-				t.Fatal("chan is not ready")
+				t.Error("chan is not ready")
 			}
 		}()
 		close(c)
 		<-c
+		if t.Failed() {
+			return
+		}
 	}
 }
 
@@ -316,14 +319,16 @@ func TestSelfSelect(t *testing.T) {
 						case c <- p:
 						case v := <-c:
 							if chanCap == 0 && v == p {
-								t.Fatalf("self receive")
+								t.Errorf("self receive")
+								return
 							}
 						}
 					} else {
 						select {
 						case v := <-c:
 							if chanCap == 0 && v == p {
-								t.Fatalf("self receive")
+								t.Errorf("self receive")
+								return
 							}
 						case c <- p:
 						}
diff --git a/libgo/go/runtime/cpuprof.go b/libgo/go/runtime/cpuprof.go
index 873276f..e1206f9 100644
--- a/libgo/go/runtime/cpuprof.go
+++ b/libgo/go/runtime/cpuprof.go
@@ -4,7 +4,7 @@
 
 // CPU profiling.
 // Based on algorithms and data structures used in
-// http://code.google.com/p/google-perftools/.
+// https://github.com/google/pprof.
 //
 // The main difference between this code and the google-perftools
 // code is that this code is written to allow copying the profile data
@@ -68,6 +68,7 @@ type cpuprofEntry struct {
 	stack [maxCPUProfStack]uintptr
 }
 
+//go:notinheap
 type cpuProfile struct {
 	on     bool    // profiling is on
 	wait   note    // goroutine waits here
diff --git a/libgo/go/runtime/crash_cgo_test.go b/libgo/go/runtime/crash_cgo_test.go
index 2504bd0..347b820 100644
--- a/libgo/go/runtime/crash_cgo_test.go
+++ b/libgo/go/runtime/crash_cgo_test.go
@@ -19,10 +19,12 @@ import (
 )
 
 func TestCgoCrashHandler(t *testing.T) {
+	t.Parallel()
 	testCrashHandler(t, true)
 }
 
 func TestCgoSignalDeadlock(t *testing.T) {
+	t.Parallel()
 	if testing.Short() && runtime.GOOS == "windows" {
 		t.Skip("Skipping in short mode") // takes up to 64 seconds
 	}
@@ -34,6 +36,7 @@ func TestCgoSignalDeadlock(t *testing.T) {
 }
 
 func TestCgoTraceback(t *testing.T) {
+	t.Parallel()
 	got := runTestProg(t, "testprogcgo", "CgoTraceback")
 	want := "OK\n"
 	if got != want {
@@ -42,7 +45,9 @@ func TestCgoTraceback(t *testing.T) {
 }
 
 func TestCgoCallbackGC(t *testing.T) {
-	if runtime.GOOS == "plan9" || runtime.GOOS == "windows" {
+	t.Parallel()
+	switch runtime.GOOS {
+	case "plan9", "windows":
 		t.Skipf("no pthreads on %s", runtime.GOOS)
 	}
 	if testing.Short() {
@@ -63,6 +68,7 @@ func TestCgoCallbackGC(t *testing.T) {
 }
 
 func TestCgoExternalThreadPanic(t *testing.T) {
+	t.Parallel()
 	if runtime.GOOS == "plan9" {
 		t.Skipf("no pthreads on %s", runtime.GOOS)
 	}
@@ -74,6 +80,7 @@ func TestCgoExternalThreadPanic(t *testing.T) {
 }
 
 func TestCgoExternalThreadSIGPROF(t *testing.T) {
+	t.Parallel()
 	// issue 9456.
 	switch runtime.GOOS {
 	case "plan9", "windows":
@@ -97,22 +104,42 @@ func TestCgoExternalThreadSIGPROF(t *testing.T) {
 		// ppc64 (issue #8912)
 		t.Skipf("no external linking on ppc64")
 	}
-	got := runTestProg(t, "testprogcgo", "CgoExternalThreadSIGPROF")
-	want := "OK\n"
-	if got != want {
+
+	exe, err := buildTestProg(t, "testprogcgo", "-tags=threadprof")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	got, err := testEnv(exec.Command(exe, "CgoExternalThreadSIGPROF")).CombinedOutput()
+	if err != nil {
+		t.Fatalf("exit status: %v\n%s", err, got)
+	}
+
+	if want := "OK\n"; string(got) != want {
 		t.Fatalf("expected %q, but got:\n%s", want, got)
 	}
 }
 
 func TestCgoExternalThreadSignal(t *testing.T) {
+	t.Parallel()
 	// issue 10139
 	switch runtime.GOOS {
 	case "plan9", "windows":
 		t.Skipf("no pthreads on %s", runtime.GOOS)
 	}
-	got := runTestProg(t, "testprogcgo", "CgoExternalThreadSignal")
-	want := "OK\n"
-	if got != want {
+
+	exe, err := buildTestProg(t, "testprogcgo", "-tags=threadprof")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	got, err := testEnv(exec.Command(exe, "CgoExternalThreadSIGPROF")).CombinedOutput()
+	if err != nil {
+		t.Fatalf("exit status: %v\n%s", err, got)
+	}
+
+	want := []byte("OK\n")
+	if !bytes.Equal(got, want) {
 		t.Fatalf("expected %q, but got:\n%s", want, got)
 	}
 }
@@ -130,6 +157,7 @@ func TestCgoDLLImports(t *testing.T) {
 }
 
 func TestCgoExecSignalMask(t *testing.T) {
+	t.Parallel()
 	// Test issue 13164.
 	switch runtime.GOOS {
 	case "windows", "plan9":
@@ -143,6 +171,7 @@ func TestCgoExecSignalMask(t *testing.T) {
 }
 
 func TestEnsureDropM(t *testing.T) {
+	t.Parallel()
 	// Test for issue 13881.
 	switch runtime.GOOS {
 	case "windows", "plan9":
@@ -159,6 +188,7 @@ func TestEnsureDropM(t *testing.T) {
 // Test that the program that doesn't need any cgo pointer checking
 // takes about the same amount of time with it as without it.
 func TestCgoCheckBytes(t *testing.T) {
+	t.Parallel()
 	// Make sure we don't count the build time as part of the run time.
 	testenv.MustHaveGoBuild(t)
 	exe, err := buildTestProg(t, "testprogcgo")
@@ -198,6 +228,7 @@ func TestCgoCheckBytes(t *testing.T) {
 }
 
 func TestCgoPanicDeadlock(t *testing.T) {
+	t.Parallel()
 	// test issue 14432
 	got := runTestProg(t, "testprogcgo", "CgoPanicDeadlock")
 	want := "panic: cgo error\n\n"
@@ -207,6 +238,7 @@ func TestCgoPanicDeadlock(t *testing.T) {
 }
 
 func TestCgoCCodeSIGPROF(t *testing.T) {
+	t.Parallel()
 	got := runTestProg(t, "testprogcgo", "CgoCCodeSIGPROF")
 	want := "OK\n"
 	if got != want {
@@ -215,6 +247,7 @@ func TestCgoCCodeSIGPROF(t *testing.T) {
 }
 
 func TestCgoCrashTraceback(t *testing.T) {
+	t.Parallel()
 	if runtime.GOOS != "linux" || runtime.GOARCH != "amd64" {
 		t.Skipf("not yet supported on %s/%s", runtime.GOOS, runtime.GOARCH)
 	}
@@ -227,6 +260,7 @@ func TestCgoCrashTraceback(t *testing.T) {
 }
 
 func TestCgoTracebackContext(t *testing.T) {
+	t.Parallel()
 	got := runTestProg(t, "testprogcgo", "TracebackContext")
 	want := "OK\n"
 	if got != want {
@@ -235,6 +269,7 @@ func TestCgoTracebackContext(t *testing.T) {
 }
 
 func testCgoPprof(t *testing.T, buildArg, runArg string) {
+	t.Parallel()
 	if runtime.GOOS != "linux" || runtime.GOARCH != "amd64" {
 		t.Skipf("not yet supported on %s/%s", runtime.GOOS, runtime.GOARCH)
 	}
@@ -252,7 +287,7 @@ func testCgoPprof(t *testing.T, buildArg, runArg string) {
 	fn := strings.TrimSpace(string(got))
 	defer os.Remove(fn)
 
-	cmd := testEnv(exec.Command("go", "tool", "pprof", "-top", "-nodecount=1", exe, fn))
+	cmd := testEnv(exec.Command(testenv.GoToolPath(t), "tool", "pprof", "-top", "-nodecount=1", exe, fn))
 
 	found := false
 	for i, e := range cmd.Env {
@@ -288,3 +323,65 @@ func TestCgoPprofPIE(t *testing.T) {
 func TestCgoPprofThread(t *testing.T) {
 	testCgoPprof(t, "", "CgoPprofThread")
 }
+
+func TestCgoPprofThreadNoTraceback(t *testing.T) {
+	testCgoPprof(t, "", "CgoPprofThreadNoTraceback")
+}
+
+func TestRaceProf(t *testing.T) {
+	if runtime.GOOS != "linux" || runtime.GOARCH != "amd64" {
+		t.Skipf("not yet supported on %s/%s", runtime.GOOS, runtime.GOARCH)
+	}
+
+	testenv.MustHaveGoRun(t)
+
+	// This test requires building various packages with -race, so
+	// it's somewhat slow.
+	if testing.Short() {
+		t.Skip("skipping test in -short mode")
+	}
+
+	exe, err := buildTestProg(t, "testprogcgo", "-race")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	got, err := testEnv(exec.Command(exe, "CgoRaceprof")).CombinedOutput()
+	if err != nil {
+		t.Fatal(err)
+	}
+	want := "OK\n"
+	if string(got) != want {
+		t.Errorf("expected %q got %s", want, got)
+	}
+}
+
+func TestRaceSignal(t *testing.T) {
+	t.Parallel()
+	if runtime.GOOS != "linux" || runtime.GOARCH != "amd64" {
+		t.Skipf("not yet supported on %s/%s", runtime.GOOS, runtime.GOARCH)
+	}
+
+	testenv.MustHaveGoRun(t)
+
+	// This test requires building various packages with -race, so
+	// it's somewhat slow.
+	if testing.Short() {
+		t.Skip("skipping test in -short mode")
+	}
+
+	exe, err := buildTestProg(t, "testprogcgo", "-race")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	got, err := testEnv(exec.Command(exe, "CgoRaceSignal")).CombinedOutput()
+	if err != nil {
+		t.Logf("%s\n", got)
+		t.Fatal(err)
+	}
+	want := "OK\n"
+	if string(got) != want {
+		t.Errorf("expected %q got %s", want, got)
+	}
+}
diff --git a/libgo/go/runtime/crash_test.go b/libgo/go/runtime/crash_test.go
index a2f7ff7..9ec0ae4 100644
--- a/libgo/go/runtime/crash_test.go
+++ b/libgo/go/runtime/crash_test.go
@@ -6,6 +6,7 @@ package runtime_test
 
 import (
 	"bytes"
+	"flag"
 	"fmt"
 	"internal/testenv"
 	"io/ioutil"
@@ -136,11 +137,10 @@ func buildTestProg(t *testing.T, binary string, flags ...string) (string, error)
 	}
 
 	exe := filepath.Join(testprog.dir, name+".exe")
-	cmd := exec.Command("go", append([]string{"build", "-o", exe}, flags...)...)
+	cmd := exec.Command(testenv.GoToolPath(t), append([]string{"build", "-o", exe}, flags...)...)
 	cmd.Dir = "testdata/" + binary
 	out, err := testEnv(cmd).CombinedOutput()
 	if err != nil {
-		exe = ""
 		target.err = fmt.Errorf("building %s %v: %v\n%s", binary, flags, err, out)
 		testprog.target[name] = target
 		return "", target.err
@@ -158,7 +158,7 @@ var (
 func checkStaleRuntime(t *testing.T) {
 	staleRuntimeOnce.Do(func() {
 		// 'go run' uses the installed copy of runtime.a, which may be out of date.
-		out, err := testEnv(exec.Command("go", "list", "-f", "{{.Stale}}", "runtime")).CombinedOutput()
+		out, err := testEnv(exec.Command(testenv.GoToolPath(t), "list", "-f", "{{.Stale}}", "runtime")).CombinedOutput()
 		if err != nil {
 			staleRuntimeErr = fmt.Errorf("failed to execute 'go list': %v\n%v", err, string(out))
 			return
@@ -401,6 +401,7 @@ func TestRecoverBeforePanicAfterGoexit(t *testing.T) {
 }
 
 func TestNetpollDeadlock(t *testing.T) {
+	t.Parallel()
 	output := runTestProg(t, "testprognet", "NetpollDeadlock")
 	want := "done\n"
 	if !strings.HasSuffix(output, want) {
@@ -409,6 +410,7 @@ func TestNetpollDeadlock(t *testing.T) {
 }
 
 func TestPanicTraceback(t *testing.T) {
+	t.Parallel()
 	output := runTestProg(t, "testprog", "PanicTraceback")
 	want := "panic: hello"
 	if !strings.HasPrefix(output, want) {
@@ -416,7 +418,7 @@ func TestPanicTraceback(t *testing.T) {
 	}
 
 	// Check functions in the traceback.
-	fns := []string{"panic", "main.pt1.func1", "panic", "main.pt2.func1", "panic", "main.pt2", "main.pt1"}
+	fns := []string{"main.pt1.func1", "panic", "main.pt2.func1", "panic", "main.pt2", "main.pt1"}
 	for _, fn := range fns {
 		re := regexp.MustCompile(`(?m)^` + regexp.QuoteMeta(fn) + `\(.*\n`)
 		idx := re.FindStringIndex(output)
@@ -443,6 +445,13 @@ func TestPanicDeadlockSyscall(t *testing.T) {
 	testPanicDeadlock(t, "SyscallInPanic", "1\n2\npanic: 3\n\n")
 }
 
+func TestPanicLoop(t *testing.T) {
+	output := runTestProg(t, "testprog", "PanicLoop")
+	if want := "panic while printing panic value"; !strings.Contains(output, want) {
+		t.Errorf("output does not contain %q:\n%s", want, output)
+	}
+}
+
 func TestMemPprof(t *testing.T) {
 	testenv.MustHaveGoRun(t)
 
@@ -458,7 +467,7 @@ func TestMemPprof(t *testing.T) {
 	fn := strings.TrimSpace(string(got))
 	defer os.Remove(fn)
 
-	cmd := testEnv(exec.Command("go", "tool", "pprof", "-alloc_space", "-top", exe, fn))
+	cmd := testEnv(exec.Command(testenv.GoToolPath(t), "tool", "pprof", "-alloc_space", "-top", exe, fn))
 
 	found := false
 	for i, e := range cmd.Env {
@@ -482,3 +491,39 @@ func TestMemPprof(t *testing.T) {
 		t.Error("missing MemProf in pprof output")
 	}
 }
+
+var concurrentMapTest = flag.Bool("run_concurrent_map_tests", false, "also run flaky concurrent map tests")
+
+func TestConcurrentMapWrites(t *testing.T) {
+	if !*concurrentMapTest {
+		t.Skip("skipping without -run_concurrent_map_tests")
+	}
+	testenv.MustHaveGoRun(t)
+	output := runTestProg(t, "testprog", "concurrentMapWrites")
+	want := "fatal error: concurrent map writes"
+	if !strings.HasPrefix(output, want) {
+		t.Fatalf("output does not start with %q:\n%s", want, output)
+	}
+}
+func TestConcurrentMapReadWrite(t *testing.T) {
+	if !*concurrentMapTest {
+		t.Skip("skipping without -run_concurrent_map_tests")
+	}
+	testenv.MustHaveGoRun(t)
+	output := runTestProg(t, "testprog", "concurrentMapReadWrite")
+	want := "fatal error: concurrent map read and map write"
+	if !strings.HasPrefix(output, want) {
+		t.Fatalf("output does not start with %q:\n%s", want, output)
+	}
+}
+func TestConcurrentMapIterateWrite(t *testing.T) {
+	if !*concurrentMapTest {
+		t.Skip("skipping without -run_concurrent_map_tests")
+	}
+	testenv.MustHaveGoRun(t)
+	output := runTestProg(t, "testprog", "concurrentMapIterateWrite")
+	want := "fatal error: concurrent map iteration and map write"
+	if !strings.HasPrefix(output, want) {
+		t.Fatalf("output does not start with %q:\n%s", want, output)
+	}
+}
diff --git a/libgo/go/runtime/crash_unix_test.go b/libgo/go/runtime/crash_unix_test.go
index 6e4d04b..97deed8 100644
--- a/libgo/go/runtime/crash_unix_test.go
+++ b/libgo/go/runtime/crash_unix_test.go
@@ -37,6 +37,8 @@ func TestCrashDumpsAllThreads(t *testing.T) {
 
 	checkStaleRuntime(t)
 
+	t.Parallel()
+
 	dir, err := ioutil.TempDir("", "go-build")
 	if err != nil {
 		t.Fatalf("failed to create temp directory: %v", err)
@@ -47,7 +49,7 @@ func TestCrashDumpsAllThreads(t *testing.T) {
 		t.Fatalf("failed to create Go file: %v", err)
 	}
 
-	cmd := exec.Command("go", "build", "-o", "a.exe")
+	cmd := exec.Command(testenv.GoToolPath(t), "build", "-o", "a.exe")
 	cmd.Dir = dir
 	out, err := testEnv(cmd).CombinedOutput()
 	if err != nil {
diff --git a/libgo/go/runtime/debug/garbage.go b/libgo/go/runtime/debug/garbage.go
index 81444971..c82c024 100644
--- a/libgo/go/runtime/debug/garbage.go
+++ b/libgo/go/runtime/debug/garbage.go
@@ -71,7 +71,7 @@ func ReadGCStats(stats *GCStats) {
 			// See the allocation at the top of the function.
 			sorted := stats.Pause[n : n+n]
 			copy(sorted, stats.Pause)
-			sort.Sort(byDuration(sorted))
+			sort.Slice(sorted, func(i, j int) bool { return sorted[i] < sorted[j] })
 			nq := len(stats.PauseQuantiles) - 1
 			for i := 0; i < nq; i++ {
 				stats.PauseQuantiles[i] = sorted[len(sorted)*i/nq]
@@ -81,12 +81,6 @@ func ReadGCStats(stats *GCStats) {
 	}
 }
 
-type byDuration []time.Duration
-
-func (x byDuration) Len() int           { return len(x) }
-func (x byDuration) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
-func (x byDuration) Less(i, j int) bool { return x[i] < x[j] }
-
 // SetGCPercent sets the garbage collection target percentage:
 // a collection is triggered when the ratio of freshly allocated data
 // to live data remaining after the previous collection reaches this percentage.
diff --git a/libgo/go/runtime/debug/garbage_test.go b/libgo/go/runtime/debug/garbage_test.go
index 6ec94aa..04e954b 100644
--- a/libgo/go/runtime/debug/garbage_test.go
+++ b/libgo/go/runtime/debug/garbage_test.go
@@ -80,7 +80,7 @@ func TestReadGCStats(t *testing.T) {
 	for i := 0; i < n; i++ {
 		dt := stats.PauseEnd[i]
 		if dt.UnixNano() != int64(mstats.PauseEnd[off]) {
-			t.Errorf("stats.PauseEnd[%d] = %d, want %d", i, dt, mstats.PauseEnd[off])
+			t.Errorf("stats.PauseEnd[%d] = %d, want %d", i, dt.UnixNano(), mstats.PauseEnd[off])
 		}
 		off = (off + len(mstats.PauseEnd) - 1) % len(mstats.PauseEnd)
 	}
@@ -89,10 +89,6 @@ func TestReadGCStats(t *testing.T) {
 var big = make([]byte, 1<<20)
 
 func TestFreeOSMemory(t *testing.T) {
-	if runtime.GOARCH == "arm64" || runtime.GOARCH == "ppc64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "mips64" || runtime.GOARCH == "mips64le" ||
-		runtime.GOOS == "nacl" {
-		t.Skip("issue 9993; scavenger temporarily disabled on systems with physical pages larger than logical pages")
-	}
 	var ms1, ms2 runtime.MemStats
 
 	if big == nil {
@@ -118,3 +114,16 @@ func TestSetGCPercent(t *testing.T) {
 		t.Errorf("SetGCPercent(123); SetGCPercent(x) = %d, want 123", new)
 	}
 }
+
+func TestSetMaxThreadsOvf(t *testing.T) {
+	// Verify that a big threads count will not overflow the int32
+	// maxmcount variable, causing a panic (see Issue 16076).
+	//
+	// This can only happen when ints are 64 bits, since on platforms
+	// with 32 bit ints SetMaxThreads (which takes an int parameter)
+	// cannot be given anything that will overflow an int32.
+	//
+	// Call SetMaxThreads with 1<<31, but only on 64 bit systems.
+	nt := SetMaxThreads(1 << (30 + ^uint(0)>>63))
+	SetMaxThreads(nt) // restore previous value
+}
diff --git a/libgo/go/runtime/export_mmap_test.go b/libgo/go/runtime/export_mmap_test.go
index 7bde44d..6e05bb9 100644
--- a/libgo/go/runtime/export_mmap_test.go
+++ b/libgo/go/runtime/export_mmap_test.go
@@ -2,14 +2,19 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build ignore
-
 // Export guts for testing.
 
 package runtime
 
 var Mmap = mmap
+var Munmap = munmap
+var Errno = errno
 
 const ENOMEM = _ENOMEM
 const MAP_ANON = _MAP_ANON
 const MAP_PRIVATE = _MAP_PRIVATE
+const MAP_FIXED = _MAP_FIXED
+
+func GetPhysPageSize() uintptr {
+	return physPageSize
+}
diff --git a/libgo/go/runtime/export_test.go b/libgo/go/runtime/export_test.go
index 77b5900..fcc1e61 100644
--- a/libgo/go/runtime/export_test.go
+++ b/libgo/go/runtime/export_test.go
@@ -8,6 +8,7 @@ package runtime
 
 import (
 	"runtime/internal/atomic"
+	"runtime/internal/sys"
 	"unsafe"
 )
 
@@ -30,6 +31,9 @@ var LockedOSThread = lockedOSThread
 
 // var FuncPC = funcPC
 
+var Atoi = atoi
+var Atoi32 = atoi32
+
 type LFNode struct {
 	Next    uint64
 	Pushcnt uintptr
@@ -150,7 +154,11 @@ func RunSchedLocalQueueEmptyTest(iters int) {
 //var Int64Hash = int64Hash
 //var EfaceHash = efaceHash
 //var IfaceHash = ifaceHash
-//var MemclrBytes = memclrBytes
+
+func MemclrBytes(b []byte) {
+	s := (*slice)(unsafe.Pointer(&b))
+	memclrNoHeapPointers(s.array, uintptr(s.len))
+}
 
 var HashLoad = &hashLoad
 
@@ -160,10 +168,7 @@ var HashLoad = &hashLoad
 //	return
 //}
 
-//var Gostringnocopy = gostringnocopy
-//var Maxstring = &maxstring
-
-//type Uintreg uintreg
+type Uintreg sys.Uintreg
 
 var Open = open
 var Close = closefd
@@ -207,9 +212,6 @@ func BenchSetType(n int, x interface{}) {
 
 const PtrSize = sys.PtrSize
 
-var TestingAssertE2I2GC = &testingAssertE2I2GC
-var TestingAssertE2T2GC = &testingAssertE2T2GC
-
 var ForceGCPeriod = &forcegcperiod
 */
 
@@ -230,7 +232,7 @@ func CountPagesInUse() (pagesInUse, counted uintptr) {
 
 	pagesInUse = uintptr(mheap_.pagesInUse)
 
-	for _, s := range h_allspans {
+	for _, s := range mheap_.allspans {
 		if s.state == mSpanInUse {
 			counted += s.npages
 		}
diff --git a/libgo/go/runtime/extern.go b/libgo/go/runtime/extern.go
index c221b1d..5aa76ac 100644
--- a/libgo/go/runtime/extern.go
+++ b/libgo/go/runtime/extern.go
@@ -57,6 +57,11 @@ It is a comma-separated list of name=val pairs setting these named variables:
 	gcstackbarrierall: setting gcstackbarrierall=1 installs stack barriers
 	in every stack frame, rather than in exponentially-spaced frames.
 
+	gcrescanstacks: setting gcrescanstacks=1 enables stack
+	re-scanning during the STW mark termination phase. This is
+	helpful for debugging if objects are being prematurely
+	garbage collected.
+
 	gcstoptheworld: setting gcstoptheworld=1 disables concurrent garbage collection,
 	making every garbage collection a stop-the-world event. Setting gcstoptheworld=2
 	also disables concurrent sweeping after the garbage collection finishes.
diff --git a/libgo/go/runtime/fastlog2.go b/libgo/go/runtime/fastlog2.go
index 1029de2..6fbe572f4 100644
--- a/libgo/go/runtime/fastlog2.go
+++ b/libgo/go/runtime/fastlog2.go
@@ -10,7 +10,7 @@ import "unsafe"
 
 // fastlog2 implements a fast approximation to the base 2 log of a
 // float64. This is used to compute a geometric distribution for heap
-// sampling, without introducing dependences into package math. This
+// sampling, without introducing dependencies into package math. This
 // uses a very rough approximation using the float64 exponent and the
 // first 25 bits of the mantissa. The top 5 bits of the mantissa are
 // used to load limits from a table of constants and the rest are used
@@ -31,5 +31,5 @@ func fastlog2(x float64) float64 {
 }
 
 // float64bits returns the IEEE 754 binary representation of f.
-// Taken from math.Float64bits to avoid dependences into package math.
+// Taken from math.Float64bits to avoid dependencies into package math.
 func float64bits(f float64) uint64 { return *(*uint64)(unsafe.Pointer(&f)) }
diff --git a/libgo/go/runtime/gc_test.go b/libgo/go/runtime/gc_test.go
index 11035c3..2a6acf0 100644
--- a/libgo/go/runtime/gc_test.go
+++ b/libgo/go/runtime/gc_test.go
@@ -400,39 +400,6 @@ func TestPrintGC(t *testing.T) {
 	close(done)
 }
 
-/*
-
-// The implicit y, ok := x.(error) for the case error
-// in testTypeSwitch used to not initialize the result y
-// before passing &y to assertE2I2GC.
-// Catch this by making assertE2I2 call runtime.GC,
-// which will force a stack scan and failure if there are
-// bad pointers, and then fill the stack with bad pointers
-// and run the type switch.
-func TestAssertE2I2Liveness(t *testing.T) {
-	// Note that this flag is defined in export_test.go
-	// and is not available to ordinary imports of runtime.
-	*runtime.TestingAssertE2I2GC = true
-	defer func() {
-		*runtime.TestingAssertE2I2GC = false
-	}()
-
-	poisonStack()
-	testTypeSwitch(io.EOF)
-	poisonStack()
-	testAssert(io.EOF)
-	poisonStack()
-	testAssertVar(io.EOF)
-}
-
-func poisonStack() uintptr {
-	var x [1000]uintptr
-	for i := range x {
-		x[i] = 0xff
-	}
-	return x[123]
-}
-
 func testTypeSwitch(x interface{}) error {
 	switch y := x.(type) {
 	case nil:
@@ -458,16 +425,6 @@ func testAssertVar(x interface{}) error {
 	return nil
 }
 
-func TestAssertE2T2Liveness(t *testing.T) {
-	*runtime.TestingAssertE2T2GC = true
-	defer func() {
-		*runtime.TestingAssertE2T2GC = false
-	}()
-
-	poisonStack()
-	testIfaceEqual(io.EOF)
-}
-
 var a bool
 
 //go:noinline
@@ -477,6 +434,8 @@ func testIfaceEqual(x interface{}) {
 	}
 }
 
+/*
+
 func TestPageAccounting(t *testing.T) {
 	// Grow the heap in small increments. This used to drop the
 	// pages-in-use count below zero because of a rounding
diff --git a/libgo/go/runtime/gcinfo_test.go b/libgo/go/runtime/gcinfo_test.go
index 2253d93..4ac67dc 100644
--- a/libgo/go/runtime/gcinfo_test.go
+++ b/libgo/go/runtime/gcinfo_test.go
@@ -125,7 +125,7 @@ type BigStruct struct {
 
 func infoBigStruct() []byte {
 	switch runtime.GOARCH {
-	case "386", "arm":
+	case "386", "arm", "mips", "mipsle":
 		return []byte{
 			typePointer,                                                // q *int
 			typeScalar, typeScalar, typeScalar, typeScalar, typeScalar, // w byte; e [17]byte
diff --git a/libgo/go/runtime/hashmap.go b/libgo/go/runtime/hashmap.go
index 77b33f3..5b191d4 100644
--- a/libgo/go/runtime/hashmap.go
+++ b/libgo/go/runtime/hashmap.go
@@ -67,7 +67,7 @@ import (
 //go:linkname mapaccess2 runtime.mapaccess2
 //go:linkname mapaccess1_fat runtime.mapaccess1_fat
 //go:linkname mapaccess2_fat runtime.mapaccess2_fat
-//go:linkname mapassign1 runtime.mapassign1
+//go:linkname mapassign runtime.mapassign
 //go:linkname mapdelete runtime.mapdelete
 //go:linkname mapiterinit runtime.mapiterinit
 //go:linkname mapiternext runtime.mapiternext
@@ -106,9 +106,10 @@ const (
 	minTopHash     = 4 // minimum tophash for a normal filled cell.
 
 	// flags
-	iterator    = 1 // there may be an iterator using buckets
-	oldIterator = 2 // there may be an iterator using oldbuckets
-	hashWriting = 4 // a goroutine is writing to the map
+	iterator     = 1 // there may be an iterator using buckets
+	oldIterator  = 2 // there may be an iterator using oldbuckets
+	hashWriting  = 4 // a goroutine is writing to the map
+	sameSizeGrow = 8 // the current map growth is to a new map of the same size
 
 	// sentinel bucket ID for iterator checks
 	noCheck = 1<<(8*sys.PtrSize) - 1
@@ -118,10 +119,11 @@ const (
 type hmap struct {
 	// Note: the format of the Hmap is encoded in ../../cmd/internal/gc/reflect.go and
 	// ../reflect/type.go. Don't change this structure without also changing that code!
-	count int // # live cells == size of map.  Must be first (used by len() builtin)
-	flags uint8
-	B     uint8  // log_2 of # of buckets (can hold up to loadFactor * 2^B items)
-	hash0 uint32 // hash seed
+	count     int // # live cells == size of map.  Must be first (used by len() builtin)
+	flags     uint8
+	B         uint8  // log_2 of # of buckets (can hold up to loadFactor * 2^B items)
+	noverflow uint16 // approximate number of overflow buckets; see incrnoverflow for details
+	hash0     uint32 // hash seed
 
 	buckets    unsafe.Pointer // array of 2^B Buckets. may be nil if count==0.
 	oldbuckets unsafe.Pointer // previous bucket array of half the size, non-nil only when growing
@@ -141,6 +143,9 @@ type hmap struct {
 
 // A bucket for a Go map.
 type bmap struct {
+	// tophash generally contains the top byte of the hash value
+	// for each key in this bucket. If tophash[0] < minTopHash,
+	// tophash[0] is a bucket evacuation state instead.
 	tophash [bucketCnt]uint8
 	// Followed by bucketCnt keys and then bucketCnt values.
 	// NOTE: packing all the keys together and then all the values together makes the
@@ -178,7 +183,34 @@ func (b *bmap) overflow(t *maptype) *bmap {
 	return *(**bmap)(add(unsafe.Pointer(b), uintptr(t.bucketsize)-sys.PtrSize))
 }
 
+// incrnoverflow increments h.noverflow.
+// noverflow counts the number of overflow buckets.
+// This is used to trigger same-size map growth.
+// See also tooManyOverflowBuckets.
+// To keep hmap small, noverflow is a uint16.
+// When there are few buckets, noverflow is an exact count.
+// When there are many buckets, noverflow is an approximate count.
+func (h *hmap) incrnoverflow() {
+	// We trigger same-size map growth if there are
+	// as many overflow buckets as buckets.
+	// We need to be able to count to 1<<h.B.
+	if h.B < 16 {
+		h.noverflow++
+		return
+	}
+	// Increment with probability 1/(1<<(h.B-15)).
+	// When we reach 1<<15 - 1, we will have approximately
+	// as many overflow buckets as buckets.
+	mask := uint32(1)<<(h.B-15) - 1
+	// Example: if h.B == 18, then mask == 7,
+	// and fastrand & 7 == 0 with probability 1/8.
+	if fastrand()&mask == 0 {
+		h.noverflow++
+	}
+}
+
 func (h *hmap) setoverflow(t *maptype, b, ovf *bmap) {
+	h.incrnoverflow()
 	if t.bucket.kind&kindNoPointers != 0 {
 		h.createOverflow()
 		*h.overflow[0] = append(*h.overflow[0], ovf)
@@ -251,7 +283,7 @@ func makemap(t *maptype, hint int64, h *hmap, bucket unsafe.Pointer) *hmap {
 
 	// find size parameter which will hold the requested # of elements
 	B := uint8(0)
-	for ; hint > bucketCnt && float32(hint) > loadFactor*float32(uintptr(1)<<B); B++ {
+	for ; overLoadFactor(hint, B); B++ {
 	}
 
 	// allocate initial hash table
@@ -269,10 +301,11 @@ func makemap(t *maptype, hint int64, h *hmap, bucket unsafe.Pointer) *hmap {
 	h.count = 0
 	h.B = B
 	h.flags = 0
-	h.hash0 = fastrand1()
+	h.hash0 = fastrand()
 	h.buckets = buckets
 	h.oldbuckets = nil
 	h.nevacuate = 0
+	h.noverflow = 0
 
 	return h
 }
@@ -304,7 +337,11 @@ func mapaccess1(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
 	m := uintptr(1)<<h.B - 1
 	b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
 	if c := h.oldbuckets; c != nil {
-		oldb := (*bmap)(add(c, (hash&(m>>1))*uintptr(t.bucketsize)))
+		if !h.sameSizeGrow() {
+			// There used to be half as many buckets; mask down one more power of two.
+			m >>= 1
+		}
+		oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
 		if !evacuated(oldb) {
 			b = oldb
 		}
@@ -359,7 +396,11 @@ func mapaccess2(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, bool)
 	m := uintptr(1)<<h.B - 1
 	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + (hash&m)*uintptr(t.bucketsize)))
 	if c := h.oldbuckets; c != nil {
-		oldb := (*bmap)(unsafe.Pointer(uintptr(c) + (hash&(m>>1))*uintptr(t.bucketsize)))
+		if !h.sameSizeGrow() {
+			// There used to be half as many buckets; mask down one more power of two.
+			m >>= 1
+		}
+		oldb := (*bmap)(unsafe.Pointer(uintptr(c) + (hash&m)*uintptr(t.bucketsize)))
 		if !evacuated(oldb) {
 			b = oldb
 		}
@@ -397,16 +438,17 @@ func mapaccessK(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, unsafe
 	if h == nil || h.count == 0 {
 		return nil, nil
 	}
-	if h.flags&hashWriting != 0 {
-		throw("concurrent map read and map write")
-	}
 	hashfn := t.key.hashfn
 	equalfn := t.key.equalfn
 	hash := hashfn(key, uintptr(h.hash0))
 	m := uintptr(1)<<h.B - 1
 	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + (hash&m)*uintptr(t.bucketsize)))
 	if c := h.oldbuckets; c != nil {
-		oldb := (*bmap)(unsafe.Pointer(uintptr(c) + (hash&(m>>1))*uintptr(t.bucketsize)))
+		if !h.sameSizeGrow() {
+			// There used to be half as many buckets; mask down one more power of two.
+			m >>= 1
+		}
+		oldb := (*bmap)(unsafe.Pointer(uintptr(c) + (hash&m)*uintptr(t.bucketsize)))
 		if !evacuated(oldb) {
 			b = oldb
 		}
@@ -455,20 +497,19 @@ func mapaccess2_fat(t *maptype, h *hmap, key, zero unsafe.Pointer) (unsafe.Point
 	return v, true
 }
 
-func mapassign1(t *maptype, h *hmap, key unsafe.Pointer, val unsafe.Pointer) {
+// Like mapaccess, but allocates a slot for the key if it is not present in the map.
+func mapassign(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
 	if h == nil {
 		panic(plainError("assignment to entry in nil map"))
 	}
 	if raceenabled {
 		callerpc := getcallerpc(unsafe.Pointer( /* &t */ nil))
-		pc := funcPC(mapassign1)
+		pc := funcPC(mapassign)
 		racewritepc(unsafe.Pointer(h), callerpc, pc)
 		raceReadObjectPC(t.key, key, callerpc, pc)
-		raceReadObjectPC(t.elem, val, callerpc, pc)
 	}
 	if msanenabled {
 		msanread(key, t.key.size)
-		msanread(val, t.elem.size)
 	}
 	if h.flags&hashWriting != 0 {
 		throw("concurrent map writes")
@@ -485,7 +526,7 @@ func mapassign1(t *maptype, h *hmap, key unsafe.Pointer, val unsafe.Pointer) {
 
 again:
 	bucket := hash & (uintptr(1)<<h.B - 1)
-	if h.oldbuckets != nil {
+	if h.growing() {
 		growWork(t, h, bucket)
 	}
 	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
@@ -496,35 +537,29 @@ again:
 
 	var inserti *uint8
 	var insertk unsafe.Pointer
-	var insertv unsafe.Pointer
+	var val unsafe.Pointer
 	for {
 		for i := uintptr(0); i < bucketCnt; i++ {
 			if b.tophash[i] != top {
 				if b.tophash[i] == empty && inserti == nil {
 					inserti = &b.tophash[i]
 					insertk = add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
-					insertv = add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
+					val = add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
 				}
 				continue
 			}
 			k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
-			k2 := k
 			if t.indirectkey {
-				k2 = *((*unsafe.Pointer)(k2))
+				k = *((*unsafe.Pointer)(k))
 			}
-			if !equalfn(key, k2) {
+			if !equalfn(key, k) {
 				continue
 			}
 			// already have a mapping for key. Update it.
 			if t.needkeyupdate {
-				typedmemmove(t.key, k2, key)
+				typedmemmove(t.key, k, key)
 			}
-			v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
-			v2 := v
-			if t.indirectvalue {
-				v2 = *((*unsafe.Pointer)(v2))
-			}
-			typedmemmove(t.elem, v2, val)
+			val = add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
 			goto done
 		}
 		ovf := b.overflow(t)
@@ -534,8 +569,11 @@ again:
 		b = ovf
 	}
 
-	// did not find mapping for key. Allocate new cell & add entry.
-	if float32(h.count) >= loadFactor*float32((uintptr(1)<<h.B)) && h.count >= bucketCnt {
+	// Did not find mapping for key. Allocate new cell & add entry.
+
+	// If we hit the max load factor or we have too many overflow buckets,
+	// and we're not already in the middle of growing, start growing.
+	if !h.growing() && (overLoadFactor(int64(h.count), h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
 		hashGrow(t, h)
 		goto again // Growing the table invalidates everything, so try again
 	}
@@ -546,7 +584,7 @@ again:
 		h.setoverflow(t, b, newb)
 		inserti = &newb.tophash[0]
 		insertk = add(unsafe.Pointer(newb), dataOffset)
-		insertv = add(insertk, bucketCnt*uintptr(t.keysize))
+		val = add(insertk, bucketCnt*uintptr(t.keysize))
 	}
 
 	// store new key/value at insert position
@@ -557,11 +595,9 @@ again:
 	}
 	if t.indirectvalue {
 		vmem := newobject(t.elem)
-		*(*unsafe.Pointer)(insertv) = vmem
-		insertv = vmem
+		*(*unsafe.Pointer)(val) = vmem
 	}
 	typedmemmove(t.key, insertk, key)
-	typedmemmove(t.elem, insertv, val)
 	*inserti = top
 	h.count++
 
@@ -570,6 +606,10 @@ done:
 		throw("concurrent map writes")
 	}
 	h.flags &^= hashWriting
+	if t.indirectvalue {
+		val = *((*unsafe.Pointer)(val))
+	}
+	return val
 }
 
 func mapdelete(t *maptype, h *hmap, key unsafe.Pointer) {
@@ -594,7 +634,7 @@ func mapdelete(t *maptype, h *hmap, key unsafe.Pointer) {
 	equalfn := t.key.equalfn
 	hash := hashfn(key, uintptr(h.hash0))
 	bucket := hash & (uintptr(1)<<h.B - 1)
-	if h.oldbuckets != nil {
+	if h.growing() {
 		growWork(t, h, bucket)
 	}
 	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
@@ -615,9 +655,17 @@ func mapdelete(t *maptype, h *hmap, key unsafe.Pointer) {
 			if !equalfn(key, k2) {
 				continue
 			}
-			memclr(k, uintptr(t.keysize))
+			if t.indirectkey {
+				*(*unsafe.Pointer)(k) = nil
+			} else {
+				typedmemclr(t.key, k)
+			}
 			v := unsafe.Pointer(uintptr(unsafe.Pointer(b)) + dataOffset + bucketCnt*uintptr(t.keysize) + i*uintptr(t.valuesize))
-			memclr(v, uintptr(t.valuesize))
+			if t.indirectvalue {
+				*(*unsafe.Pointer)(v) = nil
+			} else {
+				typedmemclr(t.elem, v)
+			}
 			b.tophash[i] = empty
 			h.count--
 			goto done
@@ -676,9 +724,9 @@ func mapiterinit(t *maptype, h *hmap, it *hiter) {
 	}
 
 	// decide where to start
-	r := uintptr(fastrand1())
+	r := uintptr(fastrand())
 	if h.B > 31-bucketCntBits {
-		r += uintptr(fastrand1()) << 31
+		r += uintptr(fastrand()) << 31
 	}
 	it.startBucket = r & (uintptr(1)<<h.B - 1)
 	it.offset = uint8(r >> h.B & (bucketCnt - 1))
@@ -703,6 +751,9 @@ func mapiternext(it *hiter) {
 		callerpc := getcallerpc(unsafe.Pointer( /* &it */ nil))
 		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapiternext))
 	}
+	if h.flags&hashWriting != 0 {
+		throw("concurrent map iteration and map write")
+	}
 	t := it.t
 	bucket := it.bucket
 	b := it.bptr
@@ -719,12 +770,12 @@ next:
 			it.value = nil
 			return
 		}
-		if h.oldbuckets != nil && it.B == h.B {
+		if h.growing() && it.B == h.B {
 			// Iterator was started in the middle of a grow, and the grow isn't done yet.
 			// If the bucket we're looking at hasn't been filled in yet (i.e. the old
 			// bucket hasn't been evacuated) then we need to iterate through the old
 			// bucket and only return the ones that will be migrated to this bucket.
-			oldbucket := bucket & (uintptr(1)<<(it.B-1) - 1)
+			oldbucket := bucket & it.h.oldbucketmask()
 			b = (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
 			if !evacuated(b) {
 				checkBucket = bucket
@@ -748,9 +799,9 @@ next:
 		k := add(unsafe.Pointer(b), dataOffset+uintptr(offi)*uintptr(t.keysize))
 		v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+uintptr(offi)*uintptr(t.valuesize))
 		if b.tophash[offi] != empty && b.tophash[offi] != evacuatedEmpty {
-			if checkBucket != noCheck {
-				// Special case: iterator was started during a grow and the
-				// grow is not done yet. We're working on a bucket whose
+			if checkBucket != noCheck && !h.sameSizeGrow() {
+				// Special case: iterator was started during a grow to a larger size
+				// and the grow is not done yet. We're working on a bucket whose
 				// oldbucket has not been evacuated yet. Or at least, it wasn't
 				// evacuated when we started the bucket. So we're iterating
 				// through the oldbucket, skipping any keys that will go
@@ -836,21 +887,27 @@ next:
 }
 
 func hashGrow(t *maptype, h *hmap) {
-	if h.oldbuckets != nil {
-		throw("evacuation not done in time")
+	// If we've hit the load factor, get bigger.
+	// Otherwise, there are too many overflow buckets,
+	// so keep the same number of buckets and "grow" laterally.
+	bigger := uint8(1)
+	if !overLoadFactor(int64(h.count), h.B) {
+		bigger = 0
+		h.flags |= sameSizeGrow
 	}
 	oldbuckets := h.buckets
-	newbuckets := newarray(t.bucket, 1<<(h.B+1))
+	newbuckets := newarray(t.bucket, 1<<(h.B+bigger))
 	flags := h.flags &^ (iterator | oldIterator)
 	if h.flags&iterator != 0 {
 		flags |= oldIterator
 	}
 	// commit the grow (atomic wrt gc)
-	h.B++
+	h.B += bigger
 	h.flags = flags
 	h.oldbuckets = oldbuckets
 	h.buckets = newbuckets
 	h.nevacuate = 0
+	h.noverflow = 0
 
 	if h.overflow != nil {
 		// Promote current overflow buckets to the old generation.
@@ -865,36 +922,88 @@ func hashGrow(t *maptype, h *hmap) {
 	// by growWork() and evacuate().
 }
 
-func growWork(t *maptype, h *hmap, bucket uintptr) {
-	noldbuckets := uintptr(1) << (h.B - 1)
+// overLoadFactor reports whether count items placed in 1<<B buckets is over loadFactor.
+func overLoadFactor(count int64, B uint8) bool {
+	// TODO: rewrite to use integer math and comparison?
+	return count >= bucketCnt && float32(count) >= loadFactor*float32((uintptr(1)<<B))
+}
+
+// tooManyOverflowBuckets reports whether noverflow buckets is too many for a map with 1<<B buckets.
+// Note that most of these overflow buckets must be in sparse use;
+// if use was dense, then we'd have already triggered regular map growth.
+func tooManyOverflowBuckets(noverflow uint16, B uint8) bool {
+	// If the threshold is too low, we do extraneous work.
+	// If the threshold is too high, maps that grow and shrink can hold on to lots of unused memory.
+	// "too many" means (approximately) as many overflow buckets as regular buckets.
+	// See incrnoverflow for more details.
+	if B < 16 {
+		return noverflow >= uint16(1)<<B
+	}
+	return noverflow >= 1<<15
+}
+
+// growing reports whether h is growing. The growth may be to the same size or bigger.
+func (h *hmap) growing() bool {
+	return h.oldbuckets != nil
+}
+
+// sameSizeGrow reports whether the current growth is to a map of the same size.
+func (h *hmap) sameSizeGrow() bool {
+	return h.flags&sameSizeGrow != 0
+}
+
+// noldbuckets calculates the number of buckets prior to the current map growth.
+func (h *hmap) noldbuckets() uintptr {
+	oldB := h.B
+	if !h.sameSizeGrow() {
+		oldB--
+	}
+	return uintptr(1) << oldB
+}
+
+// oldbucketmask provides a mask that can be applied to calculate n % noldbuckets().
+func (h *hmap) oldbucketmask() uintptr {
+	return h.noldbuckets() - 1
+}
 
+func growWork(t *maptype, h *hmap, bucket uintptr) {
 	// make sure we evacuate the oldbucket corresponding
 	// to the bucket we're about to use
-	evacuate(t, h, bucket&(noldbuckets-1))
+	evacuate(t, h, bucket&h.oldbucketmask())
 
 	// evacuate one more oldbucket to make progress on growing
-	if h.oldbuckets != nil {
+	if h.growing() {
 		evacuate(t, h, h.nevacuate)
 	}
 }
 
 func evacuate(t *maptype, h *hmap, oldbucket uintptr) {
 	b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
-	newbit := uintptr(1) << (h.B - 1)
+	newbit := h.noldbuckets()
 	hashfn := t.key.hashfn
 	equalfn := t.key.equalfn
 	if !evacuated(b) {
 		// TODO: reuse overflow buckets instead of using new ones, if there
 		// is no iterator using the old buckets.  (If !oldIterator.)
 
-		x := (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize)))
-		y := (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize)))
-		xi := 0
-		yi := 0
-		xk := add(unsafe.Pointer(x), dataOffset)
-		yk := add(unsafe.Pointer(y), dataOffset)
-		xv := add(xk, bucketCnt*uintptr(t.keysize))
-		yv := add(yk, bucketCnt*uintptr(t.keysize))
+		var (
+			x, y   *bmap          // current low/high buckets in new map
+			xi, yi int            // key/val indices into x and y
+			xk, yk unsafe.Pointer // pointers to current x and y key storage
+			xv, yv unsafe.Pointer // pointers to current x and y value storage
+		)
+		x = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize)))
+		xi = 0
+		xk = add(unsafe.Pointer(x), dataOffset)
+		xv = add(xk, bucketCnt*uintptr(t.keysize))
+		if !h.sameSizeGrow() {
+			// Only calculate y pointers if we're growing bigger.
+			// Otherwise GC can see bad pointers.
+			y = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize)))
+			yi = 0
+			yk = add(unsafe.Pointer(y), dataOffset)
+			yv = add(yk, bucketCnt*uintptr(t.keysize))
+		}
 		for ; b != nil; b = b.overflow(t) {
 			k := add(unsafe.Pointer(b), dataOffset)
 			v := add(k, bucketCnt*uintptr(t.keysize))
@@ -911,34 +1020,38 @@ func evacuate(t *maptype, h *hmap, oldbucket uintptr) {
 				if t.indirectkey {
 					k2 = *((*unsafe.Pointer)(k2))
 				}
-				// Compute hash to make our evacuation decision (whether we need
-				// to send this key/value to bucket x or bucket y).
-				hash := hashfn(k2, uintptr(h.hash0))
-				if h.flags&iterator != 0 {
-					if !t.reflexivekey && !equalfn(k2, k2) {
-						// If key != key (NaNs), then the hash could be (and probably
-						// will be) entirely different from the old hash. Moreover,
-						// it isn't reproducible. Reproducibility is required in the
-						// presence of iterators, as our evacuation decision must
-						// match whatever decision the iterator made.
-						// Fortunately, we have the freedom to send these keys either
-						// way. Also, tophash is meaningless for these kinds of keys.
-						// We let the low bit of tophash drive the evacuation decision.
-						// We recompute a new random tophash for the next level so
-						// these keys will get evenly distributed across all buckets
-						// after multiple grows.
-						if (top & 1) != 0 {
-							hash |= newbit
-						} else {
-							hash &^= newbit
-						}
-						top = uint8(hash >> (sys.PtrSize*8 - 8))
-						if top < minTopHash {
-							top += minTopHash
+				useX := true
+				if !h.sameSizeGrow() {
+					// Compute hash to make our evacuation decision (whether we need
+					// to send this key/value to bucket x or bucket y).
+					hash := hashfn(k2, uintptr(h.hash0))
+					if h.flags&iterator != 0 {
+						if !t.reflexivekey && !equalfn(k2, k2) {
+							// If key != key (NaNs), then the hash could be (and probably
+							// will be) entirely different from the old hash. Moreover,
+							// it isn't reproducible. Reproducibility is required in the
+							// presence of iterators, as our evacuation decision must
+							// match whatever decision the iterator made.
+							// Fortunately, we have the freedom to send these keys either
+							// way. Also, tophash is meaningless for these kinds of keys.
+							// We let the low bit of tophash drive the evacuation decision.
+							// We recompute a new random tophash for the next level so
+							// these keys will get evenly distributed across all buckets
+							// after multiple grows.
+							if top&1 != 0 {
+								hash |= newbit
+							} else {
+								hash &^= newbit
+							}
+							top = uint8(hash >> (sys.PtrSize*8 - 8))
+							if top < minTopHash {
+								top += minTopHash
+							}
 						}
 					}
+					useX = hash&newbit == 0
 				}
-				if (hash & newbit) == 0 {
+				if useX {
 					b.tophash[i] = evacuatedX
 					if xi == bucketCnt {
 						newx := (*bmap)(newobject(t.bucket))
@@ -992,7 +1105,13 @@ func evacuate(t *maptype, h *hmap, oldbucket uintptr) {
 		// Unlink the overflow buckets & clear key/value to help GC.
 		if h.flags&oldIterator == 0 {
 			b = (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
-			memclr(add(unsafe.Pointer(b), dataOffset), uintptr(t.bucketsize)-dataOffset)
+			// Preserve b.tophash because the evacuation
+			// state is maintained there.
+			if t.bucket.kind&kindNoPointers == 0 {
+				memclrHasPointers(add(unsafe.Pointer(b), dataOffset), uintptr(t.bucketsize)-dataOffset)
+			} else {
+				memclrNoHeapPointers(add(unsafe.Pointer(b), dataOffset), uintptr(t.bucketsize)-dataOffset)
+			}
 		}
 	}
 
@@ -1008,6 +1127,7 @@ func evacuate(t *maptype, h *hmap, oldbucket uintptr) {
 			if h.overflow != nil {
 				h.overflow[1] = nil
 			}
+			h.flags &^= sameSizeGrow
 		}
 	}
 }
@@ -1035,7 +1155,8 @@ func reflect_mapaccess(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
 
 //go:linkname reflect_mapassign reflect.mapassign
 func reflect_mapassign(t *maptype, h *hmap, key unsafe.Pointer, val unsafe.Pointer) {
-	mapassign1(t, h, key, val)
+	p := mapassign(t, h, key)
+	typedmemmove(t.elem, p, val)
 }
 
 //go:linkname reflect_mapdelete reflect.mapdelete
diff --git a/libgo/go/runtime/hashmap_fast.go b/libgo/go/runtime/hashmap_fast.go
index c6cad9d..853da70 100644
--- a/libgo/go/runtime/hashmap_fast.go
+++ b/libgo/go/runtime/hashmap_fast.go
@@ -29,7 +29,11 @@ func mapaccess1_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer {
 		m := uintptr(1)<<h.B - 1
 		b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
 		if c := h.oldbuckets; c != nil {
-			oldb := (*bmap)(add(c, (hash&(m>>1))*uintptr(t.bucketsize)))
+			if !h.sameSizeGrow() {
+				// There used to be half as many buckets; mask down one more power of two.
+				m >>= 1
+			}
+			oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
 			if !evacuated(oldb) {
 				b = oldb
 			}
@@ -74,7 +78,11 @@ func mapaccess2_fast32(t *maptype, h *hmap, key uint32) (unsafe.Pointer, bool) {
 		m := uintptr(1)<<h.B - 1
 		b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
 		if c := h.oldbuckets; c != nil {
-			oldb := (*bmap)(add(c, (hash&(m>>1))*uintptr(t.bucketsize)))
+			if !h.sameSizeGrow() {
+				// There used to be half as many buckets; mask down one more power of two.
+				m >>= 1
+			}
+			oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
 			if !evacuated(oldb) {
 				b = oldb
 			}
@@ -119,7 +127,11 @@ func mapaccess1_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer {
 		m := uintptr(1)<<h.B - 1
 		b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
 		if c := h.oldbuckets; c != nil {
-			oldb := (*bmap)(add(c, (hash&(m>>1))*uintptr(t.bucketsize)))
+			if !h.sameSizeGrow() {
+				// There used to be half as many buckets; mask down one more power of two.
+				m >>= 1
+			}
+			oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
 			if !evacuated(oldb) {
 				b = oldb
 			}
@@ -164,7 +176,11 @@ func mapaccess2_fast64(t *maptype, h *hmap, key uint64) (unsafe.Pointer, bool) {
 		m := uintptr(1)<<h.B - 1
 		b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
 		if c := h.oldbuckets; c != nil {
-			oldb := (*bmap)(add(c, (hash&(m>>1))*uintptr(t.bucketsize)))
+			if !h.sameSizeGrow() {
+				// There used to be half as many buckets; mask down one more power of two.
+				m >>= 1
+			}
+			oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
 			if !evacuated(oldb) {
 				b = oldb
 			}
@@ -264,7 +280,11 @@ dohash:
 	m := uintptr(1)<<h.B - 1
 	b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
 	if c := h.oldbuckets; c != nil {
-		oldb := (*bmap)(add(c, (hash&(m>>1))*uintptr(t.bucketsize)))
+		if !h.sameSizeGrow() {
+			// There used to be half as many buckets; mask down one more power of two.
+			m >>= 1
+		}
+		oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
 		if !evacuated(oldb) {
 			b = oldb
 		}
@@ -367,7 +387,11 @@ dohash:
 	m := uintptr(1)<<h.B - 1
 	b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
 	if c := h.oldbuckets; c != nil {
-		oldb := (*bmap)(add(c, (hash&(m>>1))*uintptr(t.bucketsize)))
+		if !h.sameSizeGrow() {
+			// There used to be half as many buckets; mask down one more power of two.
+			m >>= 1
+		}
+		oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
 		if !evacuated(oldb) {
 			b = oldb
 		}
diff --git a/libgo/go/runtime/iface.go b/libgo/go/runtime/iface.go
index 5274734..62d47ce 100644
--- a/libgo/go/runtime/iface.go
+++ b/libgo/go/runtime/iface.go
@@ -261,7 +261,7 @@ func ifaceI2T2P(t *_type, i iface) (unsafe.Pointer, bool) {
 // Convert an empty interface to a non-pointer non-interface type.
 func ifaceE2T2(t *_type, e eface, ret unsafe.Pointer) bool {
 	if !eqtype(t, e._type) {
-		memclr(ret, t.size)
+		typedmemclr(t, ret)
 		return false
 	} else {
 		typedmemmove(t, ret, e.data)
@@ -272,7 +272,7 @@ func ifaceE2T2(t *_type, e eface, ret unsafe.Pointer) bool {
 // Convert a non-empty interface to a non-pointer non-interface type.
 func ifaceI2T2(t *_type, i iface, ret unsafe.Pointer) bool {
 	if i.tab == nil || !eqtype(t, *(**_type)(i.tab)) {
-		memclr(ret, t.size)
+		typedmemclr(t, ret)
 		return false
 	} else {
 		typedmemmove(t, ret, i.data)
diff --git a/libgo/go/runtime/internal/atomic/atomic.c b/libgo/go/runtime/internal/atomic/atomic.c
index 3393fbe..b584656 100644
--- a/libgo/go/runtime/internal/atomic/atomic.c
+++ b/libgo/go/runtime/internal/atomic/atomic.c
@@ -33,6 +33,8 @@ uint64_t Load64 (uint64_t *ptr)
 uint64_t
 Load64 (uint64_t *ptr)
 {
+  if (((uintptr_t) ptr & 7) != 0)
+    ptr = NULL;
   return __atomic_load_n (ptr, __ATOMIC_ACQUIRE);
 }
 
@@ -63,6 +65,8 @@ int64_t Loadint64 (int64_t *ptr)
 int64_t
 Loadint64 (int64_t *ptr)
 {
+  if (((uintptr_t) ptr & 7) != 0)
+    ptr = NULL;
   return __atomic_load_n (ptr, __ATOMIC_ACQUIRE);
 }
 
@@ -83,6 +87,8 @@ uint64_t Xadd64 (uint64_t *ptr, int64_t delta)
 uint64_t
 Xadd64 (uint64_t *ptr, int64_t delta)
 {
+  if (((uintptr_t) ptr & 7) != 0)
+    ptr = NULL;
   return __atomic_add_fetch (ptr, (uint64_t) delta, __ATOMIC_SEQ_CST);
 }
 
@@ -103,6 +109,8 @@ int64_t Xaddint64 (int64_t *ptr, int64_t delta)
 int64_t
 Xaddint64 (int64_t *ptr, int64_t delta)
 {
+  if (((uintptr_t) ptr & 7) != 0)
+    ptr = NULL;
   return __atomic_add_fetch (ptr, delta, __ATOMIC_SEQ_CST);
 }
 
@@ -123,6 +131,8 @@ uint64_t Xchg64 (uint64_t *ptr, uint64_t new)
 uint64_t
 Xchg64 (uint64_t *ptr, uint64_t new)
 {
+  if (((uintptr_t) ptr & 7) != 0)
+    ptr = NULL;
   return __atomic_exchange_n (ptr, new, __ATOMIC_SEQ_CST);
 }
 
@@ -173,6 +183,8 @@ _Bool Cas64 (uint64_t *ptr, uint64_t old, uint64_t new)
 _Bool
 Cas64 (uint64_t *ptr, uint64_t old, uint64_t new)
 {
+  if (((uintptr_t) ptr & 7) != 0)
+    ptr = NULL;
   return __atomic_compare_exchange_n (ptr, &old, new, false, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
 }
 
@@ -213,6 +225,8 @@ void Store64 (uint64_t *ptr, uint64_t val)
 void
 Store64 (uint64_t *ptr, uint64_t val)
 {
+  if (((uintptr_t) ptr & 7) != 0)
+    ptr = NULL;
   __atomic_store_n (ptr, val, __ATOMIC_SEQ_CST);
 }
 
diff --git a/libgo/go/runtime/internal/atomic/atomic_test.go b/libgo/go/runtime/internal/atomic/atomic_test.go
index d5dc552..879a82f 100644
--- a/libgo/go/runtime/internal/atomic/atomic_test.go
+++ b/libgo/go/runtime/internal/atomic/atomic_test.go
@@ -7,6 +7,7 @@ package atomic_test
 import (
 	"runtime"
 	"runtime/internal/atomic"
+	"runtime/internal/sys"
 	"testing"
 	"unsafe"
 )
@@ -51,13 +52,13 @@ func TestXadduintptr(t *testing.T) {
 // Tests that xadduintptr correctly updates 64-bit values. The place where
 // we actually do so is mstats.go, functions mSysStat{Inc,Dec}.
 func TestXadduintptrOnUint64(t *testing.T) {
-	/*	if runtime.BigEndian != 0 {
+	if sys.BigEndian != 0 {
 		// On big endian architectures, we never use xadduintptr to update
 		// 64-bit values and hence we skip the test.  (Note that functions
 		// mSysStat{Inc,Dec} in mstats.go have explicit checks for
 		// big-endianness.)
-		return
-	}*/
+		t.Skip("skip xadduintptr on big endian architecture")
+	}
 	const inc = 100
 	val := uint64(0)
 	atomic.Xadduintptr((*uintptr)(unsafe.Pointer(&val)), inc)
@@ -65,3 +66,40 @@ func TestXadduintptrOnUint64(t *testing.T) {
 		t.Fatalf("xadduintptr should increase lower-order bits, want %d, got %d", inc, val)
 	}
 }
+
+func shouldPanic(t *testing.T, name string, f func()) {
+	defer func() {
+		if recover() == nil {
+			t.Errorf("%s did not panic", name)
+		}
+	}()
+	f()
+}
+
+// Variant of sync/atomic's TestUnaligned64:
+func TestUnaligned64(t *testing.T) {
+	// Unaligned 64-bit atomics on 32-bit systems are
+	// a continual source of pain. Test that on 32-bit systems they crash
+	// instead of failing silently.
+
+	switch runtime.GOARCH {
+	default:
+		if unsafe.Sizeof(int(0)) != 4 {
+			t.Skip("test only runs on 32-bit systems")
+		}
+	case "amd64p32":
+		// amd64p32 can handle unaligned atomics.
+		t.Skipf("test not needed on %v", runtime.GOARCH)
+	}
+
+	x := make([]uint32, 4)
+	up64 := (*uint64)(unsafe.Pointer(&x[1])) // misaligned
+	p64 := (*int64)(unsafe.Pointer(&x[1]))   // misaligned
+
+	shouldPanic(t, "Load64", func() { atomic.Load64(up64) })
+	shouldPanic(t, "Loadint64", func() { atomic.Loadint64(p64) })
+	shouldPanic(t, "Store64", func() { atomic.Store64(up64, 0) })
+	shouldPanic(t, "Xadd64", func() { atomic.Xadd64(up64, 1) })
+	shouldPanic(t, "Xchg64", func() { atomic.Xchg64(up64, 1) })
+	shouldPanic(t, "Cas64", func() { atomic.Cas64(up64, 1, 2) })
+}
diff --git a/libgo/go/runtime/internal/atomic/bench_test.go b/libgo/go/runtime/internal/atomic/bench_test.go
new file mode 100644
index 0000000..47010e3
--- /dev/null
+++ b/libgo/go/runtime/internal/atomic/bench_test.go
@@ -0,0 +1,28 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package atomic_test
+
+import (
+	"runtime/internal/atomic"
+	"testing"
+)
+
+var sink interface{}
+
+func BenchmarkAtomicLoad64(b *testing.B) {
+	var x uint64
+	sink = &x
+	for i := 0; i < b.N; i++ {
+		_ = atomic.Load64(&x)
+	}
+}
+
+func BenchmarkAtomicStore64(b *testing.B) {
+	var x uint64
+	sink = &x
+	for i := 0; i < b.N; i++ {
+		atomic.Store64(&x, 0)
+	}
+}
diff --git a/libgo/go/runtime/internal/sys/intrinsics.go b/libgo/go/runtime/internal/sys/intrinsics.go
index f33209a..43acf34 100644
--- a/libgo/go/runtime/internal/sys/intrinsics.go
+++ b/libgo/go/runtime/internal/sys/intrinsics.go
@@ -32,28 +32,6 @@ func Ctz32(x uint32) uint32 {
 	return uint32(builtinCtz32(x))
 }
 
-//go:nosplit
-
-// Ctz16 counts trailing (low-order) zeroes,
-// and if all are zero, then 16.
-func Ctz16(x uint16) uint16 {
-	if x == 0 {
-		return 16
-	}
-	return uint16(builtinCtz32(uint32(x)))
-}
-
-//go:nosplit
-
-// Ctz8 counts trailing (low-order) zeroes,
-// and if all are zero, then 8.
-func Ctz8(x uint8) uint8 {
-	if x == 0 {
-		return 8
-	}
-	return uint8(builtinCtz32(uint32(x)))
-}
-
 //extern __builtin_bswap64
 func bswap64(uint64) uint64
 
diff --git a/libgo/go/runtime/internal/sys/intrinsics_test.go b/libgo/go/runtime/internal/sys/intrinsics_test.go
index 097631b..1f2c8da 100644
--- a/libgo/go/runtime/internal/sys/intrinsics_test.go
+++ b/libgo/go/runtime/internal/sys/intrinsics_test.go
@@ -21,22 +21,6 @@ func TestCtz32(t *testing.T) {
 		}
 	}
 }
-func TestCtz16(t *testing.T) {
-	for i := uint(0); i <= 16; i++ {
-		x := uint16(5) << i
-		if got := sys.Ctz16(x); got != uint16(i) {
-			t.Errorf("Ctz16(%d)=%d, want %d", x, got, i)
-		}
-	}
-}
-func TestCtz8(t *testing.T) {
-	for i := uint(0); i <= 8; i++ {
-		x := uint8(5) << i
-		if got := sys.Ctz8(x); got != uint8(i) {
-			t.Errorf("Ctz8(%d)=%d, want %d", x, got, i)
-		}
-	}
-}
 
 func TestBswap64(t *testing.T) {
 	x := uint64(0x1122334455667788)
diff --git a/libgo/go/runtime/malloc_test.go b/libgo/go/runtime/malloc_test.go
index 4f92627..bc5530c 100644
--- a/libgo/go/runtime/malloc_test.go
+++ b/libgo/go/runtime/malloc_test.go
@@ -14,6 +14,10 @@ import (
 
 func TestMemStats(t *testing.T) {
 	t.Skip("skipping test with gccgo")
+
+	// Make sure there's at least one forced GC.
+	GC()
+
 	// Test that MemStats has sane values.
 	st := new(MemStats)
 	ReadMemStats(st)
@@ -25,7 +29,7 @@ func TestMemStats(t *testing.T) {
 		st.HeapInuse == 0 || st.HeapObjects == 0 || st.StackInuse == 0 ||
 		st.StackSys == 0 || st.MSpanInuse == 0 || st.MSpanSys == 0 || st.MCacheInuse == 0 ||
 		st.MCacheSys == 0 || st.BuckHashSys == 0 || st.GCSys == 0 || st.OtherSys == 0 ||
-		st.NextGC == 0 {
+		st.NextGC == 0 || st.NumForcedGC == 0 {
 		t.Fatalf("Zero value: %+v", *st)
 	}
 
@@ -34,7 +38,7 @@ func TestMemStats(t *testing.T) {
 		st.HeapIdle > 1e10 || st.HeapInuse > 1e10 || st.HeapObjects > 1e10 || st.StackInuse > 1e10 ||
 		st.StackSys > 1e10 || st.MSpanInuse > 1e10 || st.MSpanSys > 1e10 || st.MCacheInuse > 1e10 ||
 		st.MCacheSys > 1e10 || st.BuckHashSys > 1e10 || st.GCSys > 1e10 || st.OtherSys > 1e10 ||
-		st.NextGC > 1e10 || st.NumGC > 1e9 || st.PauseTotalNs > 1e11 {
+		st.NextGC > 1e10 || st.NumGC > 1e9 || st.NumForcedGC > 1e9 || st.PauseTotalNs > 1e11 {
 		t.Fatalf("Insanely high value (overflow?): %+v", *st)
 	}
 	if st.Sys != st.HeapSys+st.StackSys+st.MSpanSys+st.MCacheSys+
@@ -72,6 +76,10 @@ func TestMemStats(t *testing.T) {
 			t.Fatalf("PauseTotalNs(%d) < sum PauseNs(%d)", st.PauseTotalNs, pauseTotal)
 		}
 	}
+
+	if st.NumForcedGC > st.NumGC {
+		t.Fatalf("NumForcedGC(%d) > NumGC(%d)", st.NumForcedGC, st.NumGC)
+	}
 }
 
 func TestStringConcatenationAllocs(t *testing.T) {
diff --git a/libgo/go/runtime/map_test.go b/libgo/go/runtime/map_test.go
index 77affdf..9b5b051 100644
--- a/libgo/go/runtime/map_test.go
+++ b/libgo/go/runtime/map_test.go
@@ -235,6 +235,7 @@ func TestIterGrowWithGC(t *testing.T) {
 }
 
 func testConcurrentReadsAfterGrowth(t *testing.T, useReflect bool) {
+	t.Parallel()
 	if runtime.GOMAXPROCS(-1) == 1 {
 		if runtime.GOARCH == "s390" {
 			// Test uses too much address space on 31-bit S390.
diff --git a/libgo/go/runtime/mcache.go b/libgo/go/runtime/mcache.go
index e383e0d..b65dd37 100644
--- a/libgo/go/runtime/mcache.go
+++ b/libgo/go/runtime/mcache.go
@@ -9,15 +9,6 @@ package runtime
 
 import "unsafe"
 
-const (
-	// Computed constant. The definition of MaxSmallSize and the
-	// algorithm in msize.go produces some number of different allocation
-	// size classes. NumSizeClasses is that number. It's needed here
-	// because there are static arrays of this length; when msize runs its
-	// size choosing algorithm it double-checks that NumSizeClasses agrees.
-	_NumSizeClasses = 67
-)
-
 type mcachelist struct {
 	list  *mlink
 	nlist uint32
@@ -28,6 +19,8 @@ type mcachelist struct {
 //
 // mcaches are allocated from non-GC'd memory, so any heap pointers
 // must be specially handled.
+//
+//go:notinheap
 type mcache struct {
 	// The following members are accessed on every malloc,
 	// so they are grouped here for better caching.
diff --git a/libgo/go/runtime/mksizeclasses.go b/libgo/go/runtime/mksizeclasses.go
new file mode 100644
index 0000000..0f897ba
--- /dev/null
+++ b/libgo/go/runtime/mksizeclasses.go
@@ -0,0 +1,325 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+// Generate tables for small malloc size classes.
+//
+// See malloc.go for overview.
+//
+// The size classes are chosen so that rounding an allocation
+// request up to the next size class wastes at most 12.5% (1.125x).
+//
+// Each size class has its own page count that gets allocated
+// and chopped up when new objects of the size class are needed.
+// That page count is chosen so that chopping up the run of
+// pages into objects of the given size wastes at most 12.5% (1.125x)
+// of the memory. It is not necessary that the cutoff here be
+// the same as above.
+//
+// The two sources of waste multiply, so the worst possible case
+// for the above constraints would be that allocations of some
+// size might have a 26.6% (1.266x) overhead.
+// In practice, only one of the wastes comes into play for a
+// given size (sizes < 512 waste mainly on the round-up,
+// sizes > 512 waste mainly on the page chopping).
+//
+// TODO(rsc): Compute max waste for any given size.
+
+package main
+
+import (
+	"bytes"
+	"flag"
+	"fmt"
+	"go/format"
+	"io"
+	"io/ioutil"
+	"log"
+	"os"
+)
+
+// Generate msize.go
+
+var stdout = flag.Bool("stdout", false, "write to stdout instead of sizeclasses.go")
+
+func main() {
+	flag.Parse()
+
+	var b bytes.Buffer
+	fmt.Fprintln(&b, "// AUTO-GENERATED by mksizeclasses.go; DO NOT EDIT")
+	fmt.Fprintln(&b, "//go:generate go run mksizeclasses.go")
+	fmt.Fprintln(&b)
+	fmt.Fprintln(&b, "package runtime")
+	classes := makeClasses()
+
+	printComment(&b, classes)
+
+	printClasses(&b, classes)
+
+	out, err := format.Source(b.Bytes())
+	if err != nil {
+		log.Fatal(err)
+	}
+	if *stdout {
+		_, err = os.Stdout.Write(out)
+	} else {
+		err = ioutil.WriteFile("sizeclasses.go", out, 0666)
+	}
+	if err != nil {
+		log.Fatal(err)
+	}
+}
+
+const (
+	// Constants that we use and will transfer to the runtime.
+	maxSmallSize = 32 << 10
+	smallSizeDiv = 8
+	smallSizeMax = 1024
+	largeSizeDiv = 128
+	pageShift    = 13
+
+	// Derived constants.
+	pageSize = 1 << pageShift
+)
+
+type class struct {
+	size   int // max size
+	npages int // number of pages
+
+	mul    int
+	shift  uint
+	shift2 uint
+	mask   int
+}
+
+func powerOfTwo(x int) bool {
+	return x != 0 && x&(x-1) == 0
+}
+
+func makeClasses() []class {
+	var classes []class
+
+	classes = append(classes, class{}) // class #0 is a dummy entry
+
+	align := 8
+	for size := align; size <= maxSmallSize; size += align {
+		if powerOfTwo(size) { // bump alignment once in a while
+			if size >= 2048 {
+				align = 256
+			} else if size >= 128 {
+				align = size / 8
+			} else if size >= 16 {
+				align = 16 // required for x86 SSE instructions, if we want to use them
+			}
+		}
+		if !powerOfTwo(align) {
+			panic("incorrect alignment")
+		}
+
+		// Make the allocnpages big enough that
+		// the leftover is less than 1/8 of the total,
+		// so wasted space is at most 12.5%.
+		allocsize := pageSize
+		for allocsize%size > allocsize/8 {
+			allocsize += pageSize
+		}
+		npages := allocsize / pageSize
+
+		// If the previous sizeclass chose the same
+		// allocation size and fit the same number of
+		// objects into the page, we might as well
+		// use just this size instead of having two
+		// different sizes.
+		if len(classes) > 1 && npages == classes[len(classes)-1].npages && allocsize/size == allocsize/classes[len(classes)-1].size {
+			classes[len(classes)-1].size = size
+			continue
+		}
+		classes = append(classes, class{size: size, npages: npages})
+	}
+
+	// Increase object sizes if we can fit the same number of larger objects
+	// into the same number of pages. For example, we choose size 8448 above
+	// with 6 objects in 7 pages. But we can well use object size 9472,
+	// which is also 6 objects in 7 pages but +1024 bytes (+12.12%).
+	// We need to preserve at least largeSizeDiv alignment otherwise
+	// sizeToClass won't work.
+	for i := range classes {
+		if i == 0 {
+			continue
+		}
+		c := &classes[i]
+		psize := c.npages * pageSize
+		new_size := (psize / (psize / c.size)) &^ (largeSizeDiv - 1)
+		if new_size > c.size {
+			c.size = new_size
+		}
+	}
+
+	if len(classes) != 67 {
+		panic("number of size classes has changed")
+	}
+
+	for i := range classes {
+		computeDivMagic(&classes[i])
+	}
+
+	return classes
+}
+
+// computeDivMagic computes some magic constants to implement
+// the division required to compute object number from span offset.
+// n / c.size is implemented as n >> c.shift * c.mul >> c.shift2
+// for all 0 <= n < c.npages * pageSize
+func computeDivMagic(c *class) {
+	// divisor
+	d := c.size
+	if d == 0 {
+		return
+	}
+
+	// maximum input value for which the formula needs to work.
+	max := c.npages*pageSize - 1
+
+	if powerOfTwo(d) {
+		// If the size is a power of two, heapBitsForObject can divide even faster by masking.
+		// Compute this mask.
+		if max >= 1<<16 {
+			panic("max too big for power of two size")
+		}
+		c.mask = 1<<16 - d
+	}
+
+	// Compute pre-shift by factoring power of 2 out of d.
+	for d%2 == 0 {
+		c.shift++
+		d >>= 1
+		max >>= 1
+	}
+
+	// Find the smallest k that works.
+	// A small k allows us to fit the math required into 32 bits
+	// so we can use 32-bit multiplies and shifts on 32-bit platforms.
+nextk:
+	for k := uint(0); ; k++ {
+		mul := (int(1)<<k + d - 1) / d //  ⌈2^k / d⌉
+
+		// Test to see if mul works.
+		for n := 0; n <= max; n++ {
+			if n*mul>>k != n/d {
+				continue nextk
+			}
+		}
+		if mul >= 1<<16 {
+			panic("mul too big")
+		}
+		if uint64(mul)*uint64(max) >= 1<<32 {
+			panic("mul*max too big")
+		}
+		c.mul = mul
+		c.shift2 = k
+		break
+	}
+
+	// double-check.
+	for n := 0; n <= max; n++ {
+		if n*c.mul>>c.shift2 != n/d {
+			fmt.Printf("d=%d max=%d mul=%d shift2=%d n=%d\n", d, max, c.mul, c.shift2, n)
+			panic("bad multiply magic")
+		}
+		// Also check the exact computations that will be done by the runtime,
+		// for both 32 and 64 bit operations.
+		if uint32(n)*uint32(c.mul)>>uint8(c.shift2) != uint32(n/d) {
+			fmt.Printf("d=%d max=%d mul=%d shift2=%d n=%d\n", d, max, c.mul, c.shift2, n)
+			panic("bad 32-bit multiply magic")
+		}
+		if uint64(n)*uint64(c.mul)>>uint8(c.shift2) != uint64(n/d) {
+			fmt.Printf("d=%d max=%d mul=%d shift2=%d n=%d\n", d, max, c.mul, c.shift2, n)
+			panic("bad 64-bit multiply magic")
+		}
+	}
+}
+
+func printComment(w io.Writer, classes []class) {
+	fmt.Fprintf(w, "// %-5s  %-9s  %-10s  %-7s  %-11s\n", "class", "bytes/obj", "bytes/span", "objects", "waste bytes")
+	for i, c := range classes {
+		if i == 0 {
+			continue
+		}
+		spanSize := c.npages * pageSize
+		objects := spanSize / c.size
+		waste := spanSize - c.size*(spanSize/c.size)
+		fmt.Fprintf(w, "// %5d  %9d  %10d  %7d  %11d\n", i, c.size, spanSize, objects, waste)
+	}
+	fmt.Fprintf(w, "\n")
+}
+
+func printClasses(w io.Writer, classes []class) {
+	fmt.Fprintln(w, "const (")
+	fmt.Fprintf(w, "_MaxSmallSize = %d\n", maxSmallSize)
+	fmt.Fprintf(w, "smallSizeDiv = %d\n", smallSizeDiv)
+	fmt.Fprintf(w, "smallSizeMax = %d\n", smallSizeMax)
+	fmt.Fprintf(w, "largeSizeDiv = %d\n", largeSizeDiv)
+	fmt.Fprintf(w, "_NumSizeClasses = %d\n", len(classes))
+	fmt.Fprintf(w, "_PageShift = %d\n", pageShift)
+	fmt.Fprintln(w, ")")
+
+	fmt.Fprint(w, "var class_to_size = [_NumSizeClasses]uint16 {")
+	for _, c := range classes {
+		fmt.Fprintf(w, "%d,", c.size)
+	}
+	fmt.Fprintln(w, "}")
+
+	fmt.Fprint(w, "var class_to_allocnpages = [_NumSizeClasses]uint8 {")
+	for _, c := range classes {
+		fmt.Fprintf(w, "%d,", c.npages)
+	}
+	fmt.Fprintln(w, "}")
+
+	fmt.Fprintln(w, "type divMagic struct {")
+	fmt.Fprintln(w, "  shift uint8")
+	fmt.Fprintln(w, "  shift2 uint8")
+	fmt.Fprintln(w, "  mul uint16")
+	fmt.Fprintln(w, "  baseMask uint16")
+	fmt.Fprintln(w, "}")
+	fmt.Fprint(w, "var class_to_divmagic = [_NumSizeClasses]divMagic {")
+	for _, c := range classes {
+		fmt.Fprintf(w, "{%d,%d,%d,%d},", c.shift, c.shift2, c.mul, c.mask)
+	}
+	fmt.Fprintln(w, "}")
+
+	// map from size to size class, for small sizes.
+	sc := make([]int, smallSizeMax/smallSizeDiv+1)
+	for i := range sc {
+		size := i * smallSizeDiv
+		for j, c := range classes {
+			if c.size >= size {
+				sc[i] = j
+				break
+			}
+		}
+	}
+	fmt.Fprint(w, "var size_to_class8 = [smallSizeMax/smallSizeDiv+1]uint8 {")
+	for _, v := range sc {
+		fmt.Fprintf(w, "%d,", v)
+	}
+	fmt.Fprintln(w, "}")
+
+	// map from size to size class, for large sizes.
+	sc = make([]int, (maxSmallSize-smallSizeMax)/largeSizeDiv+1)
+	for i := range sc {
+		size := smallSizeMax + i*largeSizeDiv
+		for j, c := range classes {
+			if c.size >= size {
+				sc[i] = j
+				break
+			}
+		}
+	}
+	fmt.Fprint(w, "var size_to_class128 = [(_MaxSmallSize-smallSizeMax)/largeSizeDiv+1]uint8 {")
+	for _, v := range sc {
+		fmt.Fprintf(w, "%d,", v)
+	}
+	fmt.Fprintln(w, "}")
+}
diff --git a/libgo/go/runtime/mmap.go b/libgo/go/runtime/mmap.go
deleted file mode 100644
index 02aafdd..0000000
--- a/libgo/go/runtime/mmap.go
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build ignore
-
-// +build !plan9
-// +build !solaris
-// +build !windows
-// +build !nacl
-// +build !linux !amd64
-
-package runtime
-
-import "unsafe"
-
-// mmap calls the mmap system call. It is implemented in assembly.
-// We only pass the lower 32 bits of file offset to the
-// assembly routine; the higher bits (if required), should be provided
-// by the assembly routine as 0.
-func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) unsafe.Pointer
diff --git a/libgo/go/runtime/mprof.go b/libgo/go/runtime/mprof.go
index e190017..1bfdc39 100644
--- a/libgo/go/runtime/mprof.go
+++ b/libgo/go/runtime/mprof.go
@@ -31,6 +31,7 @@ const (
 	// profile types
 	memProfile bucketType = 1 + iota
 	blockProfile
+	mutexProfile
 
 	// size of bucket hash table
 	buckHashSize = 179999
@@ -49,10 +50,14 @@ type bucketType int
 //
 // Per-call-stack profiling information.
 // Lookup by hashing call stack into a linked-list hash table.
+//
+// No heap pointers.
+//
+//go:notinheap
 type bucket struct {
 	next    *bucket
 	allnext *bucket
-	typ     bucketType // memBucket or blockBucket
+	typ     bucketType // memBucket or blockBucket (includes mutexProfile)
 	hash    uintptr
 	size    uintptr
 	nstk    uintptr
@@ -92,7 +97,7 @@ type memRecord struct {
 }
 
 // A blockRecord is the bucket data for a bucket of type blockProfile,
-// part of the blocking profile.
+// which is used in blocking and mutex profiles.
 type blockRecord struct {
 	count  int64
 	cycles int64
@@ -101,6 +106,7 @@ type blockRecord struct {
 var (
 	mbuckets  *bucket // memory profile buckets
 	bbuckets  *bucket // blocking profile buckets
+	xbuckets  *bucket // mutex profile buckets
 	buckhash  *[179999]*bucket
 	bucketmem uintptr
 )
@@ -113,7 +119,7 @@ func newBucket(typ bucketType, nstk int) *bucket {
 		throw("invalid profile bucket type")
 	case memProfile:
 		size += unsafe.Sizeof(memRecord{})
-	case blockProfile:
+	case blockProfile, mutexProfile:
 		size += unsafe.Sizeof(blockRecord{})
 	}
 
@@ -141,7 +147,7 @@ func (b *bucket) mp() *memRecord {
 
 // bp returns the blockRecord associated with the blockProfile bucket b.
 func (b *bucket) bp() *blockRecord {
-	if b.typ != blockProfile {
+	if b.typ != blockProfile && b.typ != mutexProfile {
 		throw("bad use of bucket.bp")
 	}
 	data := add(unsafe.Pointer(b), unsafe.Sizeof(*b)+b.nstk*unsafe.Sizeof(location{}))
@@ -193,6 +199,9 @@ func stkbucket(typ bucketType, size uintptr, stk []location, alloc bool) *bucket
 	if typ == memProfile {
 		b.allnext = mbuckets
 		mbuckets = b
+	} else if typ == mutexProfile {
+		b.allnext = xbuckets
+		xbuckets = b
 	} else {
 		b.allnext = bbuckets
 		bbuckets = b
@@ -297,10 +306,20 @@ func blockevent(cycles int64, skip int) {
 	if cycles <= 0 {
 		cycles = 1
 	}
+	if blocksampled(cycles) {
+		saveblockevent(cycles, skip+1, blockProfile, &blockprofilerate)
+	}
+}
+
+func blocksampled(cycles int64) bool {
 	rate := int64(atomic.Load64(&blockprofilerate))
-	if rate <= 0 || (rate > cycles && int64(fastrand1())%rate > cycles) {
-		return
+	if rate <= 0 || (rate > cycles && int64(fastrand())%rate > cycles) {
+		return false
 	}
+	return true
+}
+
+func saveblockevent(cycles int64, skip int, which bucketType, ratep *uint64) {
 	gp := getg()
 	var nstk int
 	var stk [maxStack]location
@@ -312,12 +331,43 @@ func blockevent(cycles int64, skip int) {
 		nstk = callers(skip, stk[:])
 	}
 	lock(&proflock)
-	b := stkbucket(blockProfile, 0, stk[:nstk], true)
+	b := stkbucket(which, 0, stk[:nstk], true)
 	b.bp().count++
 	b.bp().cycles += cycles
 	unlock(&proflock)
 }
 
+var mutexprofilerate uint64 // fraction sampled
+
+// SetMutexProfileFraction controls the fraction of mutex contention events
+// that are reported in the mutex profile. On average 1/rate events are
+// reported. The previous rate is returned.
+//
+// To turn off profiling entirely, pass rate 0.
+// To just read the current rate, pass rate -1.
+// (For n>1 the details of sampling may change.)
+func SetMutexProfileFraction(rate int) int {
+	if rate < 0 {
+		return int(mutexprofilerate)
+	}
+	old := mutexprofilerate
+	atomic.Store64(&mutexprofilerate, uint64(rate))
+	return int(old)
+}
+
+//go:linkname mutexevent sync.event
+func mutexevent(cycles int64, skip int) {
+	if cycles < 0 {
+		cycles = 0
+	}
+	rate := int64(atomic.Load64(&mutexprofilerate))
+	// TODO(pjw): measure impact of always calling fastrand vs using something
+	// like malloc.go:nextSample()
+	if rate > 0 && int64(fastrand())%rate == 0 {
+		saveblockevent(cycles, skip+1, mutexProfile, &mutexprofilerate)
+	}
+}
+
 // Go interface to profile data.
 
 // A StackRecord describes a single execution stack.
@@ -514,6 +564,42 @@ func BlockProfile(p []BlockProfileRecord) (n int, ok bool) {
 	return
 }
 
+// MutexProfile returns n, the number of records in the current mutex profile.
+// If len(p) >= n, MutexProfile copies the profile into p and returns n, true.
+// Otherwise, MutexProfile does not change p, and returns n, false.
+//
+// Most clients should use the runtime/pprof package
+// instead of calling MutexProfile directly.
+func MutexProfile(p []BlockProfileRecord) (n int, ok bool) {
+	lock(&proflock)
+	for b := xbuckets; b != nil; b = b.allnext {
+		n++
+	}
+	if n <= len(p) {
+		ok = true
+		for b := xbuckets; b != nil; b = b.allnext {
+			bp := b.bp()
+			r := &p[0]
+			r.Count = int64(bp.count)
+			r.Cycles = bp.cycles
+			i := 0
+			var loc location
+			for i, loc = range b.stk() {
+				if i >= len(r.Stack0) {
+					break
+				}
+				r.Stack0[i] = loc.pc
+			}
+			for ; i < len(r.Stack0); i++ {
+				r.Stack0[i] = 0
+			}
+			p = p[1:]
+		}
+	}
+	unlock(&proflock)
+	return
+}
+
 // ThreadCreateProfile returns n, the number of records in the thread creation profile.
 // If len(p) >= n, ThreadCreateProfile copies the profile into p and returns n, true.
 // If len(p) < n, ThreadCreateProfile does not change p and returns n, false.
diff --git a/libgo/go/runtime/mstats.go b/libgo/go/runtime/mstats.go
index 6ec268d..178c32c 100644
--- a/libgo/go/runtime/mstats.go
+++ b/libgo/go/runtime/mstats.go
@@ -14,6 +14,13 @@ import (
 
 // Statistics.
 // If you edit this structure, also edit type MemStats below.
+// Their layouts must match exactly.
+//
+// For detailed descriptions see the documentation for MemStats.
+// Fields that differ from MemStats are further documented here.
+//
+// Many of these fields are updated on the fly, while others are only
+// updated when updatememstats is called.
 type mstats struct {
 	// General statistics.
 	alloc       uint64 // bytes allocated and not yet freed
@@ -24,18 +31,36 @@ type mstats struct {
 	nfree       uint64 // number of frees
 
 	// Statistics about malloc heap.
-	// protected by mheap.lock
+	// Protected by mheap.lock
+	//
+	// In mstats, heap_sys and heap_inuse includes stack memory,
+	// while in MemStats stack memory is separated out from the
+	// heap stats.
 	heap_alloc    uint64 // bytes allocated and not yet freed (same as alloc above)
-	heap_sys      uint64 // bytes obtained from system
+	heap_sys      uint64 // virtual address space obtained from system
 	heap_idle     uint64 // bytes in idle spans
 	heap_inuse    uint64 // bytes in non-idle spans
 	heap_released uint64 // bytes released to the os
 	heap_objects  uint64 // total number of allocated objects
 
+	// TODO(austin): heap_released is both useless and inaccurate
+	// in its current form. It's useless because, from the user's
+	// and OS's perspectives, there's no difference between a page
+	// that has not yet been faulted in and a page that has been
+	// released back to the OS. We could fix this by considering
+	// newly mapped spans to be "released". It's inaccurate
+	// because when we split a large span for allocation, we
+	// "unrelease" all pages in the large span and not just the
+	// ones we split off for use. This is trickier to fix because
+	// we currently don't know which pages of a span we've
+	// released. We could fix it by separating "free" and
+	// "released" spans, but then we have to allocate from runs of
+	// free and released spans.
+
 	// Statistics about allocation of low-level fixed-size structures.
 	// Protected by FixAlloc locks.
-	stacks_inuse uint64 // this number is included in heap_inuse above
-	stacks_sys   uint64 // always 0 in mstats
+	stacks_inuse uint64 // this number is included in heap_inuse above; differs from MemStats.StackInuse
+	stacks_sys   uint64 // only counts newosproc0 stack in mstats; differs from MemStats.StackSys
 	mspan_inuse  uint64 // mspan structures
 	mspan_sys    uint64
 	mcache_inuse uint64 // mcache structures
@@ -46,12 +71,13 @@ type mstats struct {
 
 	// Statistics about garbage collector.
 	// Protected by mheap or stopping the world during GC.
-	next_gc         uint64 // next gc (in heap_live time)
+	next_gc         uint64 // goal heap_live for when next GC ends; ^0 if disabled
 	last_gc         uint64 // last gc (in absolute time)
 	pause_total_ns  uint64
 	pause_ns        [256]uint64 // circular buffer of recent gc pause lengths
 	pause_end       [256]uint64 // circular buffer of recent gc end times (nanoseconds since 1970)
 	numgc           uint32
+	numforcedgc     uint32  // number of user-forced GCs
 	gc_cpu_fraction float64 // fraction of CPU time used by GC
 	enablegc        bool
 	debuggc         bool
@@ -64,10 +90,17 @@ type mstats struct {
 		nfree   uint64
 	}
 
-	// Statistics below here are not exported to Go directly.
+	// Statistics below here are not exported to MemStats directly.
 
 	tinyallocs uint64 // number of tiny allocations that didn't cause actual allocation; not exported to go directly
 
+	// gc_trigger is the heap size that triggers marking.
+	//
+	// When heap_live ≥ gc_trigger, the mark phase will start.
+	// This is also the heap size by which proportional sweeping
+	// must be complete.
+	gc_trigger uint64
+
 	// heap_live is the number of bytes considered live by the GC.
 	// That is: retained by the most recent GC plus allocated
 	// since then. heap_live <= heap_alloc, since heap_alloc
@@ -104,10 +137,6 @@ type mstats struct {
 	// unlike heap_live, heap_marked does not change until the
 	// next mark termination.
 	heap_marked uint64
-
-	// heap_reachable is an estimate of the reachable heap bytes
-	// at the end of the previous GC.
-	heap_reachable uint64
 }
 
 var memstats mstats
@@ -115,58 +144,281 @@ var memstats mstats
 // A MemStats records statistics about the memory allocator.
 type MemStats struct {
 	// General statistics.
-	Alloc      uint64 // bytes allocated and not yet freed
-	TotalAlloc uint64 // bytes allocated (even if freed)
-	Sys        uint64 // bytes obtained from system (sum of XxxSys below)
-	Lookups    uint64 // number of pointer lookups
-	Mallocs    uint64 // number of mallocs
-	Frees      uint64 // number of frees
-
-	// Main allocation heap statistics.
-	HeapAlloc    uint64 // bytes allocated and not yet freed (same as Alloc above)
-	HeapSys      uint64 // bytes obtained from system
-	HeapIdle     uint64 // bytes in idle spans
-	HeapInuse    uint64 // bytes in non-idle span
-	HeapReleased uint64 // bytes released to the OS
-	HeapObjects  uint64 // total number of allocated objects
-
-	// Low-level fixed-size structure allocator statistics.
-	//	Inuse is bytes used now.
-	//	Sys is bytes obtained from system.
-	StackInuse  uint64 // bytes used by stack allocator
-	StackSys    uint64
-	MSpanInuse  uint64 // mspan structures
-	MSpanSys    uint64
-	MCacheInuse uint64 // mcache structures
-	MCacheSys   uint64
-	BuckHashSys uint64 // profiling bucket hash table
-	GCSys       uint64 // GC metadata
-	OtherSys    uint64 // other system allocations
+
+	// Alloc is bytes of allocated heap objects.
+	//
+	// This is the same as HeapAlloc (see below).
+	Alloc uint64
+
+	// TotalAlloc is cumulative bytes allocated for heap objects.
+	//
+	// TotalAlloc increases as heap objects are allocated, but
+	// unlike Alloc and HeapAlloc, it does not decrease when
+	// objects are freed.
+	TotalAlloc uint64
+
+	// Sys is the total bytes of memory obtained from the OS.
+	//
+	// Sys is the sum of the XSys fields below. Sys measures the
+	// virtual address space reserved by the Go runtime for the
+	// heap, stacks, and other internal data structures. It's
+	// likely that not all of the virtual address space is backed
+	// by physical memory at any given moment, though in general
+	// it all was at some point.
+	Sys uint64
+
+	// Lookups is the number of pointer lookups performed by the
+	// runtime.
+	//
+	// This is primarily useful for debugging runtime internals.
+	Lookups uint64
+
+	// Mallocs is the cumulative count of heap objects allocated.
+	// The number of live objects is Mallocs - Frees.
+	Mallocs uint64
+
+	// Frees is the cumulative count of heap objects freed.
+	Frees uint64
+
+	// Heap memory statistics.
+	//
+	// Interpreting the heap statistics requires some knowledge of
+	// how Go organizes memory. Go divides the virtual address
+	// space of the heap into "spans", which are contiguous
+	// regions of memory 8K or larger. A span may be in one of
+	// three states:
+	//
+	// An "idle" span contains no objects or other data. The
+	// physical memory backing an idle span can be released back
+	// to the OS (but the virtual address space never is), or it
+	// can be converted into an "in use" or "stack" span.
+	//
+	// An "in use" span contains at least one heap object and may
+	// have free space available to allocate more heap objects.
+	//
+	// A "stack" span is used for goroutine stacks. Stack spans
+	// are not considered part of the heap. A span can change
+	// between heap and stack memory; it is never used for both
+	// simultaneously.
+
+	// HeapAlloc is bytes of allocated heap objects.
+	//
+	// "Allocated" heap objects include all reachable objects, as
+	// well as unreachable objects that the garbage collector has
+	// not yet freed. Specifically, HeapAlloc increases as heap
+	// objects are allocated and decreases as the heap is swept
+	// and unreachable objects are freed. Sweeping occurs
+	// incrementally between GC cycles, so these two processes
+	// occur simultaneously, and as a result HeapAlloc tends to
+	// change smoothly (in contrast with the sawtooth that is
+	// typical of stop-the-world garbage collectors).
+	HeapAlloc uint64
+
+	// HeapSys is bytes of heap memory obtained from the OS.
+	//
+	// HeapSys measures the amount of virtual address space
+	// reserved for the heap. This includes virtual address space
+	// that has been reserved but not yet used, which consumes no
+	// physical memory, but tends to be small, as well as virtual
+	// address space for which the physical memory has been
+	// returned to the OS after it became unused (see HeapReleased
+	// for a measure of the latter).
+	//
+	// HeapSys estimates the largest size the heap has had.
+	HeapSys uint64
+
+	// HeapIdle is bytes in idle (unused) spans.
+	//
+	// Idle spans have no objects in them. These spans could be
+	// (and may already have been) returned to the OS, or they can
+	// be reused for heap allocations, or they can be reused as
+	// stack memory.
+	//
+	// HeapIdle minus HeapReleased estimates the amount of memory
+	// that could be returned to the OS, but is being retained by
+	// the runtime so it can grow the heap without requesting more
+	// memory from the OS. If this difference is significantly
+	// larger than the heap size, it indicates there was a recent
+	// transient spike in live heap size.
+	HeapIdle uint64
+
+	// HeapInuse is bytes in in-use spans.
+	//
+	// In-use spans have at least one object in them. These spans
+	// can only be used for other objects of roughly the same
+	// size.
+	//
+	// HeapInuse minus HeapAlloc esimates the amount of memory
+	// that has been dedicated to particular size classes, but is
+	// not currently being used. This is an upper bound on
+	// fragmentation, but in general this memory can be reused
+	// efficiently.
+	HeapInuse uint64
+
+	// HeapReleased is bytes of physical memory returned to the OS.
+	//
+	// This counts heap memory from idle spans that was returned
+	// to the OS and has not yet been reacquired for the heap.
+	HeapReleased uint64
+
+	// HeapObjects is the number of allocated heap objects.
+	//
+	// Like HeapAlloc, this increases as objects are allocated and
+	// decreases as the heap is swept and unreachable objects are
+	// freed.
+	HeapObjects uint64
+
+	// Stack memory statistics.
+	//
+	// Stacks are not considered part of the heap, but the runtime
+	// can reuse a span of heap memory for stack memory, and
+	// vice-versa.
+
+	// StackInuse is bytes in stack spans.
+	//
+	// In-use stack spans have at least one stack in them. These
+	// spans can only be used for other stacks of the same size.
+	//
+	// There is no StackIdle because unused stack spans are
+	// returned to the heap (and hence counted toward HeapIdle).
+	StackInuse uint64
+
+	// StackSys is bytes of stack memory obtained from the OS.
+	//
+	// StackSys is StackInuse, plus any memory obtained directly
+	// from the OS for OS thread stacks (which should be minimal).
+	StackSys uint64
+
+	// Off-heap memory statistics.
+	//
+	// The following statistics measure runtime-internal
+	// structures that are not allocated from heap memory (usually
+	// because they are part of implementing the heap). Unlike
+	// heap or stack memory, any memory allocated to these
+	// structures is dedicated to these structures.
+	//
+	// These are primarily useful for debugging runtime memory
+	// overheads.
+
+	// MSpanInuse is bytes of allocated mspan structures.
+	MSpanInuse uint64
+
+	// MSpanSys is bytes of memory obtained from the OS for mspan
+	// structures.
+	MSpanSys uint64
+
+	// MCacheInuse is bytes of allocated mcache structures.
+	MCacheInuse uint64
+
+	// MCacheSys is bytes of memory obtained from the OS for
+	// mcache structures.
+	MCacheSys uint64
+
+	// BuckHashSys is bytes of memory in profiling bucket hash tables.
+	BuckHashSys uint64
+
+	// GCSys is bytes of memory in garbage collection metadata.
+	GCSys uint64
+
+	// OtherSys is bytes of memory in miscellaneous off-heap
+	// runtime allocations.
+	OtherSys uint64
 
 	// Garbage collector statistics.
-	NextGC        uint64 // next collection will happen when HeapAlloc ≥ this amount
-	LastGC        uint64 // end time of last collection (nanoseconds since 1970)
-	PauseTotalNs  uint64
-	PauseNs       [256]uint64 // circular buffer of recent GC pause durations, most recent at [(NumGC+255)%256]
-	PauseEnd      [256]uint64 // circular buffer of recent GC pause end times
-	NumGC         uint32
-	GCCPUFraction float64 // fraction of CPU time used by GC
-	EnableGC      bool
-	DebugGC       bool
-
-	// Per-size allocation statistics.
-	// 61 is NumSizeClasses in the C code.
+
+	// NextGC is the target heap size of the next GC cycle.
+	//
+	// The garbage collector's goal is to keep HeapAlloc ≤ NextGC.
+	// At the end of each GC cycle, the target for the next cycle
+	// is computed based on the amount of reachable data and the
+	// value of GOGC.
+	NextGC uint64
+
+	// LastGC is the time the last garbage collection finished, as
+	// nanoseconds since 1970 (the UNIX epoch).
+	LastGC uint64
+
+	// PauseTotalNs is the cumulative nanoseconds in GC
+	// stop-the-world pauses since the program started.
+	//
+	// During a stop-the-world pause, all goroutines are paused
+	// and only the garbage collector can run.
+	PauseTotalNs uint64
+
+	// PauseNs is a circular buffer of recent GC stop-the-world
+	// pause times in nanoseconds.
+	//
+	// The most recent pause is at PauseNs[(NumGC+255)%256]. In
+	// general, PauseNs[N%256] records the time paused in the most
+	// recent N%256th GC cycle. There may be multiple pauses per
+	// GC cycle; this is the sum of all pauses during a cycle.
+	PauseNs [256]uint64
+
+	// PauseEnd is a circular buffer of recent GC pause end times,
+	// as nanoseconds since 1970 (the UNIX epoch).
+	//
+	// This buffer is filled the same way as PauseNs. There may be
+	// multiple pauses per GC cycle; this records the end of the
+	// last pause in a cycle.
+	PauseEnd [256]uint64
+
+	// NumGC is the number of completed GC cycles.
+	NumGC uint32
+
+	// NumForcedGC is the number of GC cycles that were forced by
+	// the application calling the GC function.
+	NumForcedGC uint32
+
+	// GCCPUFraction is the fraction of this program's available
+	// CPU time used by the GC since the program started.
+	//
+	// GCCPUFraction is expressed as a number between 0 and 1,
+	// where 0 means GC has consumed none of this program's CPU. A
+	// program's available CPU time is defined as the integral of
+	// GOMAXPROCS since the program started. That is, if
+	// GOMAXPROCS is 2 and a program has been running for 10
+	// seconds, its "available CPU" is 20 seconds. GCCPUFraction
+	// does not include CPU time used for write barrier activity.
+	//
+	// This is the same as the fraction of CPU reported by
+	// GODEBUG=gctrace=1.
+	GCCPUFraction float64
+
+	// EnableGC indicates that GC is enabled. It is always true,
+	// even if GOGC=off.
+	EnableGC bool
+
+	// DebugGC is currently unused.
+	DebugGC bool
+
+	// BySize reports per-size class allocation statistics.
+	//
+	// BySize[N] gives statistics for allocations of size S where
+	// BySize[N-1].Size < S ≤ BySize[N].Size.
+	//
+	// This does not report allocations larger than BySize[60].Size.
 	BySize [61]struct {
-		Size    uint32
+		// Size is the maximum byte size of an object in this
+		// size class.
+		Size uint32
+
+		// Mallocs is the cumulative count of heap objects
+		// allocated in this size class. The cumulative bytes
+		// of allocation is Size*Mallocs. The number of live
+		// objects in this size class is Mallocs - Frees.
 		Mallocs uint64
-		Frees   uint64
+
+		// Frees is the cumulative count of heap objects freed
+		// in this size class.
+		Frees uint64
 	}
 }
 
-// Size of the trailing by_size array differs between Go and C,
+// Size of the trailing by_size array differs between mstats and MemStats,
 // and all data after by_size is local to runtime, not exported.
-// NumSizeClasses was changed, but we cannot change Go struct because of backward compatibility.
-// sizeof_C_MStats is what C thinks about size of Go struct.
+// NumSizeClasses was changed, but we cannot change MemStats because of backward compatibility.
+// sizeof_C_MStats is the size of the prefix of mstats that
+// corresponds to MemStats. It should match Sizeof(MemStats{}).
 var sizeof_C_MStats = unsafe.Offsetof(memstats.by_size) + 61*unsafe.Sizeof(memstats.by_size[0])
 
 func init() {
@@ -175,9 +427,19 @@ func init() {
 		println(sizeof_C_MStats, unsafe.Sizeof(memStats))
 		throw("MStats vs MemStatsType size mismatch")
 	}
+
+	if unsafe.Offsetof(memstats.heap_live)%8 != 0 {
+		println(unsafe.Offsetof(memstats.heap_live))
+		throw("memstats.heap_live not aligned to 8 bytes")
+	}
 }
 
 // ReadMemStats populates m with memory allocator statistics.
+//
+// The returned memory allocator statistics are up to date as of the
+// call to ReadMemStats. This is in contrast with a heap profile,
+// which is a snapshot as of the most recently completed garbage
+// collection cycle.
 func ReadMemStats(m *MemStats) {
 	stopTheWorld("read mem stats")
 
@@ -191,8 +453,9 @@ func ReadMemStats(m *MemStats) {
 func readmemstats_m(stats *MemStats) {
 	updatememstats(nil)
 
-	// Size of the trailing by_size array differs between Go and C,
-	// NumSizeClasses was changed, but we cannot change Go struct because of backward compatibility.
+	// The size of the trailing by_size array differs between
+	// mstats and MemStats. NumSizeClasses was changed, but we
+	// cannot change MemStats because of backward compatibility.
 	memmove(unsafe.Pointer(stats), unsafe.Pointer(&memstats), sizeof_C_MStats)
 
 	// Stack numbers are part of the heap numbers, separate those out for user consumption
@@ -292,8 +555,7 @@ func updatememstats(stats *gcstats) {
 
 	// Scan all spans and count number of alive objects.
 	lock(&mheap_.lock)
-	for i := uint32(0); i < mheap_.nspan; i++ {
-		s := h_allspans[i]
+	for _, s := range mheap_.allspans {
 		if s.state != mSpanInUse {
 			continue
 		}
@@ -341,19 +603,32 @@ func cachestats() {
 	}
 }
 
+// flushmcache flushes the mcache of allp[i].
+//
+// The world must be stopped.
+//
+//go:nowritebarrier
+func flushmcache(i int) {
+	p := allp[i]
+	if p == nil {
+		return
+	}
+	c := p.mcache
+	if c == nil {
+		return
+	}
+	c.releaseAll()
+	stackcache_clear(c)
+}
+
+// flushallmcaches flushes the mcaches of all Ps.
+//
+// The world must be stopped.
+//
 //go:nowritebarrier
 func flushallmcaches() {
-	for i := 0; ; i++ {
-		p := allp[i]
-		if p == nil {
-			break
-		}
-		c := p.mcache
-		if c == nil {
-			continue
-		}
-		c.releaseAll()
-		stackcache_clear(c)
+	for i := 0; i < int(gomaxprocs); i++ {
+		flushmcache(i)
 	}
 }
 
diff --git a/libgo/go/runtime/mstkbar.go b/libgo/go/runtime/mstkbar.go
index 6d0b5ac..616c220 100644
--- a/libgo/go/runtime/mstkbar.go
+++ b/libgo/go/runtime/mstkbar.go
@@ -150,6 +150,10 @@ var firstStackBarrierOffset = 1024
 // gcMaxStackBarriers returns the maximum number of stack barriers
 // that can be installed in a stack of stackSize bytes.
 func gcMaxStackBarriers(stackSize int) (n int) {
+	if debug.gcstackbarrieroff > 0 {
+		return 0
+	}
+
 	if firstStackBarrierOffset == 0 {
 		// Special debugging case for inserting stack barriers
 		// at every frame. Steal half of the stack for the
diff --git a/libgo/go/runtime/net_plan9.go b/libgo/go/runtime/net_plan9.go
new file mode 100644
index 0000000..10fd089
--- /dev/null
+++ b/libgo/go/runtime/net_plan9.go
@@ -0,0 +1,29 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+	_ "unsafe"
+)
+
+//go:linkname runtime_ignoreHangup net.runtime_ignoreHangup
+func runtime_ignoreHangup() {
+	getg().m.ignoreHangup = true
+}
+
+//go:linkname runtime_unignoreHangup net.runtime_unignoreHangup
+func runtime_unignoreHangup(sig string) {
+	getg().m.ignoreHangup = false
+}
+
+func ignoredNote(note *byte) bool {
+	if note == nil {
+		return false
+	}
+	if gostringnocopy(note) != "hangup" {
+		return false
+	}
+	return getg().m.ignoreHangup
+}
diff --git a/libgo/go/runtime/netpoll.go b/libgo/go/runtime/netpoll.go
index 729b597..876eaea 100644
--- a/libgo/go/runtime/netpoll.go
+++ b/libgo/go/runtime/netpoll.go
@@ -42,6 +42,10 @@ const (
 const pollBlockSize = 4 * 1024
 
 // Network poller descriptor.
+//
+// No heap pointers.
+//
+//go:notinheap
 type pollDesc struct {
 	link *pollDesc // in pollcache, protected by pollcache.lock
 
diff --git a/libgo/go/runtime/os_darwin.go b/libgo/go/runtime/os_darwin.go
index db403e2..ec92370 100644
--- a/libgo/go/runtime/os_darwin.go
+++ b/libgo/go/runtime/os_darwin.go
@@ -269,7 +269,10 @@ func semasleep1(ns int64) int32 {
 		if r == 0 {
 			break
 		}
-		if r == _KERN_ABORTED { // interrupted
+		// Note: We don't know how this call (with no timeout) can get _KERN_OPERATION_TIMED_OUT,
+		// but it does reliably, though at a very low rate, on OS X 10.8, 10.9, 10.10, and 10.11.
+		// See golang.org/issue/17161.
+		if r == _KERN_ABORTED || r == _KERN_OPERATION_TIMED_OUT { // interrupted
 			continue
 		}
 		macherror(r, "semaphore_wait")
diff --git a/libgo/go/runtime/os_freebsd.go b/libgo/go/runtime/os_freebsd.go
index 4512e76..a4d2886 100644
--- a/libgo/go/runtime/os_freebsd.go
+++ b/libgo/go/runtime/os_freebsd.go
@@ -14,7 +14,7 @@ type mOS struct {
 
 //go:noescape
 //extern _umtx_op
-func sys_umtx_op(addr *uint32, mode int32, val uint32, ptr2, ts *timespec) int32
+func sys_umtx_op(addr *uint32, mode int32, val uint32, uaddr1 uinptr, ts *umtx_time) int32
 
 // FreeBSD's umtx_op syscall is effectively the same as Linux's futex, and
 // thus the code is largely similar. See Linux implementation
@@ -28,14 +28,14 @@ func futexsleep(addr *uint32, val uint32, ns int64) {
 }
 
 func futexsleep1(addr *uint32, val uint32, ns int64) {
-	var tsp *timespec
+	var utp *umtx_time
 	if ns >= 0 {
-		var ts timespec
-		ts.tv_nsec = 0
-		ts.set_sec(int64(timediv(ns, 1000000000, (*int32)(unsafe.Pointer(&ts.tv_nsec)))))
-		tsp = &ts
+		var ut umtx_time
+		ut._clockid = _CLOCK_MONOTONIC
+		ut._timeout.set_sec(int64(timediv(ns, 1000000000, (*int32)(unsafe.Pointer(&ut._timeout.tv_nsec)))))
+		utp = &ut
 	}
-	ret := sys_umtx_op(addr, _UMTX_OP_WAIT_UINT_PRIVATE, val, nil, tsp)
+	ret := sys_umtx_op(addr, _UMTX_OP_WAIT_UINT_PRIVATE, val, unsafe.Sizeof(*utp), utp)
 	if ret >= 0 || ret == -_EINTR {
 		return
 	}
@@ -45,7 +45,7 @@ func futexsleep1(addr *uint32, val uint32, ns int64) {
 
 //go:nosplit
 func futexwakeup(addr *uint32, cnt uint32) {
-	ret := sys_umtx_op(addr, _UMTX_OP_WAKE_PRIVATE, cnt, nil, nil)
+	ret := sys_umtx_op(addr, _UMTX_OP_WAKE_PRIVATE, cnt, 0, nil)
 	if ret >= 0 {
 		return
 	}
diff --git a/libgo/go/runtime/os_gccgo.go b/libgo/go/runtime/os_gccgo.go
index 1bdef7d..a8f05a4 100644
--- a/libgo/go/runtime/os_gccgo.go
+++ b/libgo/go/runtime/os_gccgo.go
@@ -21,36 +21,15 @@ func mpreinit(mp *m) {
 // minit is called to initialize a new m (including the bootstrap m).
 // Called on the new thread, cannot allocate memory.
 func minit() {
-	// Initialize signal handling.
-	_g_ := getg()
-
-	var st _stack_t
-	sigaltstack(nil, &st)
-	if st.ss_flags&_SS_DISABLE != 0 {
-		signalstack(_g_.m.gsignalstack, _g_.m.gsignalstacksize)
-		_g_.m.newSigstack = true
-	} else {
-		_g_.m.newSigstack = false
-	}
+	minitSignals()
 
 	// FIXME: We should set _g_.m.procid here.
-
-	// restore signal mask from m.sigmask and unblock essential signals
-	nmask := _g_.m.sigmask
-	for i := range sigtable {
-		if sigtable[i].flags&_SigUnblock != 0 {
-			sigdelset(&nmask, int32(i))
-		}
-	}
-	sigprocmask(_SIG_SETMASK, &nmask, nil)
 }
 
 // Called from dropm to undo the effect of an minit.
 //go:nosplit
 func unminit() {
-	if getg().m.newSigstack {
-		signalstack(nil, 0)
-	}
+	unminitSignals()
 }
 
 var urandom_dev = []byte("/dev/urandom\x00")
diff --git a/libgo/go/runtime/os_linux.go b/libgo/go/runtime/os_linux.go
index 04c690b..ad33486 100644
--- a/libgo/go/runtime/os_linux.go
+++ b/libgo/go/runtime/os_linux.go
@@ -86,9 +86,13 @@ func futexwakeup(addr *uint32, cnt uint32) {
 const (
 	_AT_NULL   = 0  // End of vector
 	_AT_PAGESZ = 6  // System physical page size
+	_AT_HWCAP  = 16 // hardware capability bit vector
 	_AT_RANDOM = 25 // introduced in 2.6.29
+	_AT_HWCAP2 = 26 // hardware capability bit vector 2
 )
 
+var procAuxv = []byte("/proc/self/auxv\x00")
+
 func sysargs(argc int32, argv **byte) {
 	n := argc + 1
 
@@ -102,7 +106,50 @@ func sysargs(argc int32, argv **byte) {
 
 	// now argv+n is auxv
 	auxv := (*[1 << 28]uintptr)(add(unsafe.Pointer(argv), uintptr(n)*sys.PtrSize))
-	for i := 0; auxv[i] != _AT_NULL; i += 2 {
+	if sysauxv(auxv[:]) == 0 {
+		// In some situations we don't get a loader-provided
+		// auxv, such as when loaded as a library on Android.
+		// Fall back to /proc/self/auxv.
+		fd := open(&procAuxv[0], 0 /* O_RDONLY */, 0)
+		if fd < 0 {
+			// On Android, /proc/self/auxv might be unreadable (issue 9229), so we fallback to
+			// try using mincore to detect the physical page size.
+			// mincore should return EINVAL when address is not a multiple of system page size.
+			const size = 256 << 10 // size of memory region to allocate
+			p := mmap(nil, size, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
+			if uintptr(p) < 4096 {
+				return
+			}
+			var n uintptr
+			for n = 4 << 10; n < size; n <<= 1 {
+				err := mincore(unsafe.Pointer(uintptr(p)+n), 1, &addrspace_vec[0])
+				if err == 0 {
+					physPageSize = n
+					break
+				}
+			}
+			if physPageSize == 0 {
+				physPageSize = size
+			}
+			munmap(p, size)
+			return
+		}
+		var buf [128]uintptr
+		n := read(fd, noescape(unsafe.Pointer(&buf[0])), int32(unsafe.Sizeof(buf)))
+		closefd(fd)
+		if n < 0 {
+			return
+		}
+		// Make sure buf is terminated, even if we didn't read
+		// the whole file.
+		buf[len(buf)-2] = _AT_NULL
+		sysauxv(buf[:])
+	}
+}
+
+func sysauxv(auxv []uintptr) int {
+	var i int
+	for ; auxv[i] != _AT_NULL; i += 2 {
 		tag, val := auxv[i], auxv[i+1]
 		switch tag {
 		case _AT_RANDOM:
@@ -111,20 +158,14 @@ func sysargs(argc int32, argv **byte) {
 			startupRandomData = (*[16]byte)(unsafe.Pointer(val))[:]
 
 		case _AT_PAGESZ:
-			// Check that the true physical page size is
-			// compatible with the runtime's assumed
-			// physical page size.
-			if sys.PhysPageSize < val {
-				print("runtime: kernel page size (", val, ") is larger than runtime page size (", sys.PhysPageSize, ")\n")
-				exit(1)
-			}
-			if sys.PhysPageSize%val != 0 {
-				print("runtime: runtime page size (", sys.PhysPageSize, ") is not a multiple of kernel page size (", val, ")\n")
-				exit(1)
-			}
+			physPageSize = val
 		}
 
 		// Commented out for gccgo for now.
 		// archauxv(tag, val)
 	}
+	return i / 2
 }
+
+// Temporary for gccgo until we port mem_GOOS.go.
+var addrspace_vec [1]byte
diff --git a/libgo/go/runtime/os_linux_ppc64x.go b/libgo/go/runtime/os_linux_ppc64x.go
new file mode 100644
index 0000000..b324344
--- /dev/null
+++ b/libgo/go/runtime/os_linux_ppc64x.go
@@ -0,0 +1,61 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore_for_gccgo
+// +build ppc64 ppc64le
+
+package runtime
+
+import (
+	"runtime/internal/sys"
+)
+
+const (
+	// ISA level
+	// Go currently requires POWER5 as a minimum for ppc64, so we need
+	// to check for ISA 2.03 and beyond.
+	_PPC_FEATURE_POWER5_PLUS = 0x00020000 // ISA 2.03 (POWER5+)
+	_PPC_FEATURE_ARCH_2_05   = 0x00001000 // ISA 2.05 (POWER6)
+	_PPC_FEATURE_POWER6_EXT  = 0x00000200 // mffgpr/mftgpr extension (POWER6x)
+	_PPC_FEATURE_ARCH_2_06   = 0x00000100 // ISA 2.06 (POWER7)
+	_PPC_FEATURE2_ARCH_2_07  = 0x80000000 // ISA 2.07 (POWER8)
+
+	// Standalone capabilities
+	_PPC_FEATURE_HAS_ALTIVEC = 0x10000000 // SIMD/Vector unit
+	_PPC_FEATURE_HAS_VSX     = 0x00000080 // Vector scalar unit
+)
+
+type facilities struct {
+	_         [sys.CacheLineSize]byte
+	isPOWER5x bool // ISA 2.03
+	isPOWER6  bool // ISA 2.05
+	isPOWER6x bool // ISA 2.05 + mffgpr/mftgpr extension
+	isPOWER7  bool // ISA 2.06
+	isPOWER8  bool // ISA 2.07
+	hasVMX    bool // Vector unit
+	hasVSX    bool // Vector scalar unit
+	_         [sys.CacheLineSize]byte
+}
+
+// cpu can be tested at runtime in go assembler code to check for
+// a certain ISA level or hardware capability, for example:
+//	  ·cpu+facilities_hasVSX(SB) for checking the availability of VSX
+//	  or
+//	  ·cpu+facilities_isPOWER7(SB) for checking if the processor implements
+//	  ISA 2.06 instructions.
+var cpu facilities
+
+func archauxv(tag, val uintptr) {
+	switch tag {
+	case _AT_HWCAP:
+		cpu.isPOWER5x = val&_PPC_FEATURE_POWER5_PLUS != 0
+		cpu.isPOWER6 = val&_PPC_FEATURE_ARCH_2_05 != 0
+		cpu.isPOWER6x = val&_PPC_FEATURE_POWER6_EXT != 0
+		cpu.isPOWER7 = val&_PPC_FEATURE_ARCH_2_06 != 0
+		cpu.hasVMX = val&_PPC_FEATURE_HAS_ALTIVEC != 0
+		cpu.hasVSX = val&_PPC_FEATURE_HAS_VSX != 0
+	case _AT_HWCAP2:
+		cpu.isPOWER8 = val&_PPC_FEATURE2_ARCH_2_07 != 0
+	}
+}
diff --git a/libgo/go/runtime/panic.go b/libgo/go/runtime/panic.go
index f7e5efe..b76bb21 100644
--- a/libgo/go/runtime/panic.go
+++ b/libgo/go/runtime/panic.go
@@ -78,10 +78,6 @@ func panicmem() {
 	panic(memoryError)
 }
 
-func throwreturn() {
-	throw("no return at end of a typed function - compiler is broken")
-}
-
 func throwinit() {
 	throw("recursive call during initialization - linker skew")
 }
@@ -108,17 +104,19 @@ func deferproc(frame *bool, pfn uintptr, arg unsafe.Pointer) {
 // Each defer must be released with freedefer.
 func newdefer() *_defer {
 	var d *_defer
-	mp := acquirem()
-	pp := mp.p.ptr()
+	gp := getg()
+	pp := gp.m.p.ptr()
 	if len(pp.deferpool) == 0 && sched.deferpool != nil {
-		lock(&sched.deferlock)
-		for len(pp.deferpool) < cap(pp.deferpool)/2 && sched.deferpool != nil {
-			d := sched.deferpool
-			sched.deferpool = d.link
-			d.link = nil
-			pp.deferpool = append(pp.deferpool, d)
-		}
-		unlock(&sched.deferlock)
+		systemstack(func() {
+			lock(&sched.deferlock)
+			for len(pp.deferpool) < cap(pp.deferpool)/2 && sched.deferpool != nil {
+				d := sched.deferpool
+				sched.deferpool = d.link
+				d.link = nil
+				pp.deferpool = append(pp.deferpool, d)
+			}
+			unlock(&sched.deferlock)
+		})
 	}
 	if n := len(pp.deferpool); n > 0 {
 		d = pp.deferpool[n-1]
@@ -126,17 +124,22 @@ func newdefer() *_defer {
 		pp.deferpool = pp.deferpool[:n-1]
 	}
 	if d == nil {
-		d = new(_defer)
+		systemstack(func() {
+			d = new(_defer)
+		})
 	}
-	gp := mp.curg
 	d.link = gp._defer
 	gp._defer = d
-	releasem(mp)
 	return d
 }
 
 // Free the given defer.
 // The defer cannot be used after this call.
+//
+// This must not grow the stack because there may be a frame without a
+// stack map when this is called.
+//
+//go:nosplit
 func freedefer(d *_defer) {
 	if d.special {
 		return
@@ -150,31 +153,34 @@ func freedefer(d *_defer) {
 		return
 	}
 
-	mp := acquirem()
-	pp := mp.p.ptr()
+	pp := getg().m.p.ptr()
 	if len(pp.deferpool) == cap(pp.deferpool) {
 		// Transfer half of local cache to the central cache.
-		var first, last *_defer
-		for len(pp.deferpool) > cap(pp.deferpool)/2 {
-			n := len(pp.deferpool)
-			d := pp.deferpool[n-1]
-			pp.deferpool[n-1] = nil
-			pp.deferpool = pp.deferpool[:n-1]
-			if first == nil {
-				first = d
-			} else {
-				last.link = d
+		//
+		// Take this slow path on the system stack so
+		// we don't grow freedefer's stack.
+		systemstack(func() {
+			var first, last *_defer
+			for len(pp.deferpool) > cap(pp.deferpool)/2 {
+				n := len(pp.deferpool)
+				d := pp.deferpool[n-1]
+				pp.deferpool[n-1] = nil
+				pp.deferpool = pp.deferpool[:n-1]
+				if first == nil {
+					first = d
+				} else {
+					last.link = d
+				}
+				last = d
 			}
-			last = d
-		}
-		lock(&sched.deferlock)
-		last.link = sched.deferpool
-		sched.deferpool = first
-		unlock(&sched.deferlock)
+			lock(&sched.deferlock)
+			last.link = sched.deferpool
+			sched.deferpool = first
+			unlock(&sched.deferlock)
+		})
 	}
 	*d = _defer{}
 	pp.deferpool = append(pp.deferpool, d)
-	releasem(mp)
 }
 
 // deferreturn is called to undefer the stack.
@@ -358,6 +364,11 @@ func Goexit() {
 // Used when crashing with panicking.
 // This must match types handled by printany.
 func preprintpanics(p *_panic) {
+	defer func() {
+		if recover() != nil {
+			throw("panic while printing panic value")
+		}
+	}()
 	for p != nil {
 		switch v := p.arg.(type) {
 		case error:
@@ -731,6 +742,11 @@ func deferredrecover() interface{} {
 	return gorecover()
 }
 
+//go:linkname sync_throw sync.throw
+func sync_throw(s string) {
+	throw(s)
+}
+
 //go:nosplit
 func throw(s string) {
 	print("fatal error: ", s, "\n")
@@ -769,7 +785,7 @@ func startpanic() {
 		freezetheworld()
 		return
 	case 1:
-		// Something failed while panicing, probably the print of the
+		// Something failed while panicking, probably the print of the
 		// argument to panic().  Just print a stack trace and exit.
 		_g_.m.dying = 2
 		print("panic during panic\n")
diff --git a/libgo/go/runtime/pprof/internal/protopprof/protomemprofile.go b/libgo/go/runtime/pprof/internal/protopprof/protomemprofile.go
new file mode 100644
index 0000000..c2ab5b5
--- /dev/null
+++ b/libgo/go/runtime/pprof/internal/protopprof/protomemprofile.go
@@ -0,0 +1,83 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package protopprof
+
+import (
+	"internal/pprof/profile"
+	"math"
+	"runtime"
+	"time"
+)
+
+// EncodeMemProfile converts MemProfileRecords to a Profile.
+func EncodeMemProfile(mr []runtime.MemProfileRecord, rate int64, t time.Time) *profile.Profile {
+	p := &profile.Profile{
+		Period:     rate,
+		PeriodType: &profile.ValueType{Type: "space", Unit: "bytes"},
+		SampleType: []*profile.ValueType{
+			{Type: "alloc_objects", Unit: "count"},
+			{Type: "alloc_space", Unit: "bytes"},
+			{Type: "inuse_objects", Unit: "count"},
+			{Type: "inuse_space", Unit: "bytes"},
+		},
+		TimeNanos: int64(t.UnixNano()),
+	}
+
+	locs := make(map[uintptr]*profile.Location)
+	for _, r := range mr {
+		stack := r.Stack()
+		sloc := make([]*profile.Location, len(stack))
+		for i, addr := range stack {
+			loc := locs[addr]
+			if loc == nil {
+				loc = &profile.Location{
+					ID:      uint64(len(p.Location) + 1),
+					Address: uint64(addr),
+				}
+				locs[addr] = loc
+				p.Location = append(p.Location, loc)
+			}
+			sloc[i] = loc
+		}
+
+		ao, ab := scaleHeapSample(r.AllocObjects, r.AllocBytes, rate)
+		uo, ub := scaleHeapSample(r.InUseObjects(), r.InUseBytes(), rate)
+
+		p.Sample = append(p.Sample, &profile.Sample{
+			Value:    []int64{ao, ab, uo, ub},
+			Location: sloc,
+		})
+	}
+	if runtime.GOOS == "linux" {
+		addMappings(p)
+	}
+	return p
+}
+
+// scaleHeapSample adjusts the data from a heap Sample to
+// account for its probability of appearing in the collected
+// data. heap profiles are a sampling of the memory allocations
+// requests in a program. We estimate the unsampled value by dividing
+// each collected sample by its probability of appearing in the
+// profile. heap profiles rely on a poisson process to determine
+// which samples to collect, based on the desired average collection
+// rate R. The probability of a sample of size S to appear in that
+// profile is 1-exp(-S/R).
+func scaleHeapSample(count, size, rate int64) (int64, int64) {
+	if count == 0 || size == 0 {
+		return 0, 0
+	}
+
+	if rate <= 1 {
+		// if rate==1 all samples were collected so no adjustment is needed.
+		// if rate<1 treat as unknown and skip scaling.
+		return count, size
+	}
+
+	avgSize := float64(size) / float64(count)
+	scale := 1 / (1 - math.Exp(-avgSize/float64(rate)))
+
+	return int64(float64(count) * scale), int64(float64(size) * scale)
+}
diff --git a/libgo/go/runtime/pprof/internal/protopprof/protomemprofile_test.go b/libgo/go/runtime/pprof/internal/protopprof/protomemprofile_test.go
new file mode 100644
index 0000000..a10fe77
--- /dev/null
+++ b/libgo/go/runtime/pprof/internal/protopprof/protomemprofile_test.go
@@ -0,0 +1,104 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package protopprof
+
+import (
+	"bytes"
+	"internal/pprof/profile"
+	"io/ioutil"
+	"reflect"
+	"runtime"
+	"testing"
+	"time"
+)
+
+// TestSampledHeapAllocProfile tests encoding of a memory profile from
+// runtime.MemProfileRecord data.
+func TestSampledHeapAllocProfile(t *testing.T) {
+	if runtime.GOOS != "linux" {
+		t.Skip("Test requires a system with /proc/self/maps")
+	}
+
+	// Figure out two addresses from /proc/self/maps.
+	mmap, err := ioutil.ReadFile("/proc/self/maps")
+	if err != nil {
+		t.Fatal("Cannot read /proc/self/maps")
+	}
+	rd := bytes.NewReader(mmap)
+	mprof := &profile.Profile{}
+	if err = mprof.ParseMemoryMap(rd); err != nil {
+		t.Fatalf("Cannot parse /proc/self/maps")
+	}
+	if len(mprof.Mapping) < 2 {
+		// It is possible for a binary to only have 1 executable
+		// region of memory.
+		t.Skipf("need 2 or more mappings, got %v", len(mprof.Mapping))
+	}
+	address1 := mprof.Mapping[0].Start
+	address2 := mprof.Mapping[1].Start
+
+	var buf bytes.Buffer
+
+	rec, rate := testMemRecords(address1, address2)
+	p := EncodeMemProfile(rec, rate, time.Now())
+	if err := p.Write(&buf); err != nil {
+		t.Fatalf("Failed to write profile: %v", err)
+	}
+
+	p, err = profile.Parse(&buf)
+	if err != nil {
+		t.Fatalf("Could not parse Profile profile: %v", err)
+	}
+
+	// Expected PeriodType, SampleType and Sample.
+	expectedPeriodType := &profile.ValueType{Type: "space", Unit: "bytes"}
+	expectedSampleType := []*profile.ValueType{
+		{Type: "alloc_objects", Unit: "count"},
+		{Type: "alloc_space", Unit: "bytes"},
+		{Type: "inuse_objects", Unit: "count"},
+		{Type: "inuse_space", Unit: "bytes"},
+	}
+	// Expected samples, with values unsampled according to the profiling rate.
+	expectedSample := []*profile.Sample{
+		{Value: []int64{2050, 2099200, 1537, 1574400}, Location: []*profile.Location{
+			{ID: 1, Mapping: mprof.Mapping[0], Address: address1},
+			{ID: 2, Mapping: mprof.Mapping[1], Address: address2},
+		}},
+		{Value: []int64{1, 829411, 1, 829411}, Location: []*profile.Location{
+			{ID: 3, Mapping: mprof.Mapping[1], Address: address2 + 1},
+			{ID: 4, Mapping: mprof.Mapping[1], Address: address2 + 2},
+		}},
+		{Value: []int64{1, 829411, 0, 0}, Location: []*profile.Location{
+			{ID: 5, Mapping: mprof.Mapping[0], Address: address1 + 1},
+			{ID: 6, Mapping: mprof.Mapping[0], Address: address1 + 2},
+			{ID: 7, Mapping: mprof.Mapping[1], Address: address2 + 3},
+		}},
+	}
+
+	if p.Period != 512*1024 {
+		t.Fatalf("Sampling periods do not match")
+	}
+	if !reflect.DeepEqual(p.PeriodType, expectedPeriodType) {
+		t.Fatalf("Period types do not match")
+	}
+	if !reflect.DeepEqual(p.SampleType, expectedSampleType) {
+		t.Fatalf("Sample types do not match")
+	}
+	if !reflect.DeepEqual(p.Sample, expectedSample) {
+		t.Fatalf("Samples do not match: Expected: %v, Got:%v", getSampleAsString(expectedSample),
+			getSampleAsString(p.Sample))
+	}
+}
+
+func testMemRecords(a1, a2 uint64) ([]runtime.MemProfileRecord, int64) {
+	addr1, addr2 := uintptr(a1), uintptr(a2)
+	rate := int64(512 * 1024)
+	rec := []runtime.MemProfileRecord{
+		{4096, 1024, 4, 1, [32]uintptr{addr1, addr2}},
+		{512 * 1024, 0, 1, 0, [32]uintptr{addr2 + 1, addr2 + 2}},
+		{512 * 1024, 512 * 1024, 1, 1, [32]uintptr{addr1 + 1, addr1 + 2, addr2 + 3}},
+	}
+	return rec, rate
+}
diff --git a/libgo/go/runtime/pprof/internal/protopprof/protopprof.go b/libgo/go/runtime/pprof/internal/protopprof/protopprof.go
new file mode 100644
index 0000000..5d269c4
--- /dev/null
+++ b/libgo/go/runtime/pprof/internal/protopprof/protopprof.go
@@ -0,0 +1,105 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package protopprof converts the runtime's raw profile logs
+// to Profile structs containing a representation of the pprof
+// protocol buffer profile format.
+package protopprof
+
+import (
+	"fmt"
+	"os"
+	"runtime"
+	"time"
+	"unsafe"
+
+	"internal/pprof/profile"
+)
+
+// TranslateCPUProfile parses binary CPU profiling stack trace data
+// generated by runtime.CPUProfile() into a profile struct.
+func TranslateCPUProfile(b []byte, startTime time.Time) (*profile.Profile, error) {
+	const wordSize = unsafe.Sizeof(uintptr(0))
+	const minRawProfile = 5 * wordSize // Need a minimum of 5 words.
+	if uintptr(len(b)) < minRawProfile {
+		return nil, fmt.Errorf("truncated profile")
+	}
+	n := int(uintptr(len(b)) / wordSize)
+	data := ((*[1 << 28]uintptr)(unsafe.Pointer(&b[0])))[:n:n]
+	period := data[3]
+	data = data[5:] // skip header
+
+	// profile initialization taken from pprof tool
+	p := &profile.Profile{
+		Period:     int64(period) * 1000,
+		PeriodType: &profile.ValueType{Type: "cpu", Unit: "nanoseconds"},
+		SampleType: []*profile.ValueType{
+			{Type: "samples", Unit: "count"},
+			{Type: "cpu", Unit: "nanoseconds"},
+		},
+		TimeNanos:     int64(startTime.UnixNano()),
+		DurationNanos: time.Since(startTime).Nanoseconds(),
+	}
+	// Parse CPU samples from the profile.
+	locs := make(map[uint64]*profile.Location)
+	for len(b) > 0 {
+		if len(data) < 2 || uintptr(len(data)) < 2+data[1] {
+			return nil, fmt.Errorf("truncated profile")
+		}
+		count := data[0]
+		nstk := data[1]
+		if uintptr(len(data)) < 2+nstk {
+			return nil, fmt.Errorf("truncated profile")
+		}
+		stk := data[2 : 2+nstk]
+		data = data[2+nstk:]
+
+		if count == 0 && nstk == 1 && stk[0] == 0 {
+			// end of data marker
+			break
+		}
+
+		sloc := make([]*profile.Location, len(stk))
+		for i, addr := range stk {
+			addr := uint64(addr)
+			// Addresses from stack traces point to the next instruction after
+			// each call.  Adjust by -1 to land somewhere on the actual call
+			// (except for the leaf, which is not a call).
+			if i > 0 {
+				addr--
+			}
+			loc := locs[addr]
+			if loc == nil {
+				loc = &profile.Location{
+					ID:      uint64(len(p.Location) + 1),
+					Address: addr,
+				}
+				locs[addr] = loc
+				p.Location = append(p.Location, loc)
+			}
+			sloc[i] = loc
+		}
+		p.Sample = append(p.Sample, &profile.Sample{
+			Value:    []int64{int64(count), int64(count) * int64(p.Period)},
+			Location: sloc,
+		})
+	}
+
+	if runtime.GOOS == "linux" {
+		if err := addMappings(p); err != nil {
+			return nil, err
+		}
+	}
+	return p, nil
+}
+
+func addMappings(p *profile.Profile) error {
+	// Parse memory map from /proc/self/maps
+	f, err := os.Open("/proc/self/maps")
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+	return p.ParseMemoryMap(f)
+}
diff --git a/libgo/go/runtime/pprof/internal/protopprof/protopprof_test.go b/libgo/go/runtime/pprof/internal/protopprof/protopprof_test.go
new file mode 100644
index 0000000..f1937b5
--- /dev/null
+++ b/libgo/go/runtime/pprof/internal/protopprof/protopprof_test.go
@@ -0,0 +1,171 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package protopprof
+
+import (
+	"bytes"
+	"fmt"
+	"internal/pprof/profile"
+	"io/ioutil"
+	"reflect"
+	"runtime"
+	"testing"
+	"time"
+	"unsafe"
+)
+
+// Helper function to initialize empty cpu profile with sampling period provided.
+func createEmptyProfileWithPeriod(t *testing.T, periodMs uint64) bytes.Buffer {
+	// Mock the sample header produced by cpu profiler. Write a sample
+	// period of 2000 microseconds, followed by no samples.
+	buf := new(bytes.Buffer)
+	// Profile header is as follows:
+	// The first, third and fifth words are 0. The second word is 3.
+	// The fourth word is the period.
+	// EOD marker:
+	// The sixth word -- count is initialized to 0 above.
+	// The code below sets the seventh word -- nstk to 1
+	// The eighth word -- addr is initialized to 0 above.
+	words := []int{0, 3, 0, int(periodMs), 0, 0, 1, 0}
+	n := int(unsafe.Sizeof(0)) * len(words)
+	data := ((*[1 << 29]byte)(unsafe.Pointer(&words[0])))[:n:n]
+	if _, err := buf.Write(data); err != nil {
+		t.Fatalf("createEmptyProfileWithPeriod failed: %v", err)
+	}
+	return *buf
+}
+
+// Helper function to initialize cpu profile with two sample values.
+func createProfileWithTwoSamples(t *testing.T, periodMs uintptr, count1 uintptr, count2 uintptr,
+	address1 uintptr, address2 uintptr) bytes.Buffer {
+	// Mock the sample header produced by cpu profiler. Write a sample
+	// period of 2000 microseconds, followed by no samples.
+	buf := new(bytes.Buffer)
+	words := []uintptr{0, 3, 0, uintptr(periodMs), 0, uintptr(count1), 2,
+		uintptr(address1), uintptr(address1 + 2),
+		uintptr(count2), 2, uintptr(address2), uintptr(address2 + 2),
+		0, 1, 0}
+	for _, n := range words {
+		var err error
+		switch unsafe.Sizeof(int(0)) {
+		case 8:
+			_, err = buf.Write((*[8]byte)(unsafe.Pointer(&n))[:8:8])
+		case 4:
+			_, err = buf.Write((*[4]byte)(unsafe.Pointer(&n))[:4:4])
+		}
+		if err != nil {
+			t.Fatalf("createProfileWithTwoSamples failed: %v", err)
+		}
+	}
+	return *buf
+}
+
+// Tests TranslateCPUProfile parses correct sampling period in an otherwise empty cpu profile.
+func TestTranlateCPUProfileSamplingPeriod(t *testing.T) {
+	// A test server with mock cpu profile data.
+	var buf bytes.Buffer
+
+	startTime := time.Now()
+	b := createEmptyProfileWithPeriod(t, 2000)
+	p, err := TranslateCPUProfile(b.Bytes(), startTime)
+	if err != nil {
+		t.Fatalf("translate failed: %v", err)
+	}
+	if err := p.Write(&buf); err != nil {
+		t.Fatalf("write failed: %v", err)
+	}
+
+	p, err = profile.Parse(&buf)
+	if err != nil {
+		t.Fatalf("Could not parse Profile profile: %v", err)
+	}
+
+	// Expected PeriodType and SampleType.
+	expectedPeriodType := &profile.ValueType{Type: "cpu", Unit: "nanoseconds"}
+	expectedSampleType := []*profile.ValueType{
+		{Type: "samples", Unit: "count"},
+		{Type: "cpu", Unit: "nanoseconds"},
+	}
+	if p.Period != 2000*1000 || !reflect.DeepEqual(p.PeriodType, expectedPeriodType) ||
+		!reflect.DeepEqual(p.SampleType, expectedSampleType) || p.Sample != nil {
+		t.Fatalf("Unexpected Profile fields")
+	}
+}
+
+func getSampleAsString(sample []*profile.Sample) string {
+	var str string
+	for _, x := range sample {
+		for _, y := range x.Location {
+			if y.Mapping != nil {
+				str += fmt.Sprintf("Mapping:%v\n", *y.Mapping)
+			}
+			str += fmt.Sprintf("Location:%v\n", y)
+		}
+		str += fmt.Sprintf("Sample:%v\n", *x)
+	}
+	return str
+}
+
+// Tests TranslateCPUProfile parses a cpu profile with sample values present.
+func TestTranslateCPUProfileWithSamples(t *testing.T) {
+	if runtime.GOOS != "linux" {
+		t.Skip("test requires a system with /proc/self/maps")
+	}
+	// Figure out two addresses from /proc/self/maps.
+	mmap, err := ioutil.ReadFile("/proc/self/maps")
+	if err != nil {
+		t.Fatal("Cannot read /proc/self/maps")
+	}
+	rd := bytes.NewReader(mmap)
+	mprof := &profile.Profile{}
+	if err = mprof.ParseMemoryMap(rd); err != nil {
+		t.Fatalf("Cannot parse /proc/self/maps")
+	}
+	if len(mprof.Mapping) < 2 {
+		// It is possible for a binary to only have 1 executable
+		// region of memory.
+		t.Skipf("need 2 or more mappings, got %v", len(mprof.Mapping))
+	}
+	address1 := mprof.Mapping[0].Start
+	address2 := mprof.Mapping[1].Start
+	// A test server with mock cpu profile data.
+
+	startTime := time.Now()
+	b := createProfileWithTwoSamples(t, 2000, 20, 40, uintptr(address1), uintptr(address2))
+	p, err := TranslateCPUProfile(b.Bytes(), startTime)
+
+	if err != nil {
+		t.Fatalf("Could not parse Profile profile: %v", err)
+	}
+	// Expected PeriodType, SampleType and Sample.
+	expectedPeriodType := &profile.ValueType{Type: "cpu", Unit: "nanoseconds"}
+	expectedSampleType := []*profile.ValueType{
+		{Type: "samples", Unit: "count"},
+		{Type: "cpu", Unit: "nanoseconds"},
+	}
+	expectedSample := []*profile.Sample{
+		{Value: []int64{20, 20 * 2000 * 1000}, Location: []*profile.Location{
+			{ID: 1, Mapping: mprof.Mapping[0], Address: address1},
+			{ID: 2, Mapping: mprof.Mapping[0], Address: address1 + 1},
+		}},
+		{Value: []int64{40, 40 * 2000 * 1000}, Location: []*profile.Location{
+			{ID: 3, Mapping: mprof.Mapping[1], Address: address2},
+			{ID: 4, Mapping: mprof.Mapping[1], Address: address2 + 1},
+		}},
+	}
+	if p.Period != 2000*1000 {
+		t.Fatalf("Sampling periods do not match")
+	}
+	if !reflect.DeepEqual(p.PeriodType, expectedPeriodType) {
+		t.Fatalf("Period types do not match")
+	}
+	if !reflect.DeepEqual(p.SampleType, expectedSampleType) {
+		t.Fatalf("Sample types do not match")
+	}
+	if !reflect.DeepEqual(p.Sample, expectedSample) {
+		t.Fatalf("Samples do not match: Expected: %v, Got:%v", getSampleAsString(expectedSample),
+			getSampleAsString(p.Sample))
+	}
+}
diff --git a/libgo/go/runtime/pprof/mprof_test.go b/libgo/go/runtime/pprof/mprof_test.go
index 54daefa..079af15 100644
--- a/libgo/go/runtime/pprof/mprof_test.go
+++ b/libgo/go/runtime/pprof/mprof_test.go
@@ -7,6 +7,7 @@ package pprof_test
 import (
 	"bytes"
 	"fmt"
+	"reflect"
 	"regexp"
 	"runtime"
 	. "runtime/pprof"
@@ -42,6 +43,17 @@ func allocatePersistent1K() {
 	}
 }
 
+// Allocate transient memory using reflect.Call.
+
+func allocateReflectTransient() {
+	memSink = make([]byte, 2<<20)
+}
+
+func allocateReflect() {
+	rv := reflect.ValueOf(allocateReflectTransient)
+	rv.Call(nil)
+}
+
 var memoryProfilerRun = 0
 
 func TestMemoryProfiler(t *testing.T) {
@@ -61,6 +73,7 @@ func TestMemoryProfiler(t *testing.T) {
 	allocateTransient1M()
 	allocateTransient2M()
 	allocatePersistent1K()
+	allocateReflect()
 	memSink = nil
 
 	runtime.GC() // materialize stats
@@ -74,21 +87,25 @@ func TestMemoryProfiler(t *testing.T) {
 	tests := []string{
 
 		fmt.Sprintf(`%v: %v \[%v: %v\] @ 0x[0-9,a-f x]+
-#	0x[0-9,a-f]+	pprof_test\.allocatePersistent1K\+0x[0-9,a-f]+	.*/mprof_test\.go:40
-#	0x[0-9,a-f]+	runtime_pprof_test\.TestMemoryProfiler\+0x[0-9,a-f]+	.*/mprof_test\.go:63
+#	0x[0-9,a-f]+	pprof_test\.allocatePersistent1K\+0x[0-9,a-f]+	.*/mprof_test\.go:41
+#	0x[0-9,a-f]+	runtime_pprof_test\.TestMemoryProfiler\+0x[0-9,a-f]+	.*/mprof_test\.go:75
 `, 32*memoryProfilerRun, 1024*memoryProfilerRun, 32*memoryProfilerRun, 1024*memoryProfilerRun),
 
 		fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f x]+
-#	0x[0-9,a-f]+	pprof_test\.allocateTransient1M\+0x[0-9,a-f]+	.*/mprof_test.go:21
-#	0x[0-9,a-f]+	runtime_pprof_test\.TestMemoryProfiler\+0x[0-9,a-f]+	.*/mprof_test.go:61
+#	0x[0-9,a-f]+	pprof_test\.allocateTransient1M\+0x[0-9,a-f]+	.*/mprof_test.go:22
+#	0x[0-9,a-f]+	runtime_pprof_test\.TestMemoryProfiler\+0x[0-9,a-f]+	.*/mprof_test.go:73
 `, (1<<10)*memoryProfilerRun, (1<<20)*memoryProfilerRun),
 
 		// This should start with "0: 0" but gccgo's imprecise
 		// GC means that sometimes the value is not collected.
 		fmt.Sprintf(`(0|%v): (0|%v) \[%v: %v\] @ 0x[0-9,a-f x]+
-#	0x[0-9,a-f]+	pprof_test\.allocateTransient2M\+0x[0-9,a-f]+	.*/mprof_test.go:27
-#	0x[0-9,a-f]+	runtime_pprof_test\.TestMemoryProfiler\+0x[0-9,a-f]+	.*/mprof_test.go:62
+#	0x[0-9,a-f]+	pprof_test\.allocateTransient2M\+0x[0-9,a-f]+	.*/mprof_test.go:28
+#	0x[0-9,a-f]+	runtime_pprof_test\.TestMemoryProfiler\+0x[0-9,a-f]+	.*/mprof_test.go:74
 `, memoryProfilerRun, (2<<20)*memoryProfilerRun, memoryProfilerRun, (2<<20)*memoryProfilerRun),
+
+		fmt.Sprintf(`0: 0 \[%v: %v\] @( 0x[0-9,a-f]+)+
+#	0x[0-9,a-f]+	pprof_test\.allocateReflectTransient\+0x[0-9,a-f]+	.*/mprof_test.go:49
+`, memoryProfilerRun, (2<<20)*memoryProfilerRun),
 	}
 
 	for _, test := range tests {
diff --git a/libgo/go/runtime/pprof/pprof.go b/libgo/go/runtime/pprof/pprof.go
index 0a58baf..0db1ded 100644
--- a/libgo/go/runtime/pprof/pprof.go
+++ b/libgo/go/runtime/pprof/pprof.go
@@ -73,13 +73,15 @@ import (
 	"bufio"
 	"bytes"
 	"fmt"
+	"internal/pprof/profile"
 	"io"
-	"os"
 	"runtime"
+	"runtime/pprof/internal/protopprof"
 	"sort"
 	"strings"
 	"sync"
 	"text/tabwriter"
+	"time"
 )
 
 // BUG(rsc): Profiles are only as good as the kernel support used to generate them.
@@ -99,6 +101,7 @@ import (
 //	heap         - a sampling of all heap allocations
 //	threadcreate - stack traces that led to the creation of new OS threads
 //	block        - stack traces that led to blocking on synchronization primitives
+//	mutex        - stack traces of holders of contended mutexes
 //
 // These predefined profiles maintain themselves and panic on an explicit
 // Add or Remove method call.
@@ -152,6 +155,12 @@ var blockProfile = &Profile{
 	write: writeBlock,
 }
 
+var mutexProfile = &Profile{
+	name:  "mutex",
+	count: countMutex,
+	write: writeMutex,
+}
+
 func lockProfiles() {
 	profiles.mu.Lock()
 	if profiles.m == nil {
@@ -161,6 +170,7 @@ func lockProfiles() {
 			"threadcreate": threadcreateProfile,
 			"heap":         heapProfile,
 			"block":        blockProfile,
+			"mutex":        mutexProfile,
 		}
 	}
 }
@@ -202,21 +212,15 @@ func Profiles() []*Profile {
 	lockProfiles()
 	defer unlockProfiles()
 
-	var all []*Profile
+	all := make([]*Profile, 0, len(profiles.m))
 	for _, p := range profiles.m {
 		all = append(all, p)
 	}
 
-	sort.Sort(byName(all))
+	sort.Slice(all, func(i, j int) bool { return all[i].name < all[j].name })
 	return all
 }
 
-type byName []*Profile
-
-func (x byName) Len() int           { return len(x) }
-func (x byName) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
-func (x byName) Less(i, j int) bool { return x[i].name < x[j].name }
-
 // Name returns this profile's name, which can be passed to Lookup to reobtain the profile.
 func (p *Profile) Name() string {
 	return p.name
@@ -299,7 +303,7 @@ func (p *Profile) WriteTo(w io.Writer, debug int) error {
 	}
 
 	// Obtain consistent snapshot under lock; then process without lock.
-	var all [][]uintptr
+	all := make([][]uintptr, 0, len(p.m))
 	p.mu.Lock()
 	for _, stk := range p.m {
 		all = append(all, stk)
@@ -337,17 +341,8 @@ type countProfile interface {
 }
 
 // printCountProfile prints a countProfile at the specified debug level.
+// The profile will be in compressed proto format unless debug is nonzero.
 func printCountProfile(w io.Writer, debug int, name string, p countProfile) error {
-	b := bufio.NewWriter(w)
-	var tw *tabwriter.Writer
-	w = b
-	if debug > 0 {
-		tw = tabwriter.NewWriter(w, 1, 8, 1, '\t', 0)
-		w = tw
-	}
-
-	fmt.Fprintf(w, "%s profile: total %d\n", name, p.Len())
-
 	// Build count of each stack.
 	var buf bytes.Buffer
 	key := func(stk []uintptr) string {
@@ -373,17 +368,47 @@ func printCountProfile(w io.Writer, debug int, name string, p countProfile) erro
 
 	sort.Sort(&keysByCount{keys, count})
 
-	for _, k := range keys {
-		fmt.Fprintf(w, "%d %s\n", count[k], k)
-		if debug > 0 {
-			printStackRecord(w, p.Stack(index[k]), false)
+	if debug > 0 {
+		// Print debug profile in legacy format
+		tw := tabwriter.NewWriter(w, 1, 8, 1, '\t', 0)
+		fmt.Fprintf(tw, "%s profile: total %d\n", name, p.Len())
+		for _, k := range keys {
+			fmt.Fprintf(tw, "%d %s\n", count[k], k)
+			printStackRecord(tw, p.Stack(index[k]), false)
 		}
+		return tw.Flush()
 	}
 
-	if tw != nil {
-		tw.Flush()
+	// Output profile in protobuf form.
+	prof := &profile.Profile{
+		PeriodType: &profile.ValueType{Type: name, Unit: "count"},
+		Period:     1,
+		Sample:     make([]*profile.Sample, 0, len(keys)),
+		SampleType: []*profile.ValueType{{Type: name, Unit: "count"}},
 	}
-	return b.Flush()
+	locMap := make(map[uintptr]*profile.Location)
+	for _, k := range keys {
+		stk := p.Stack(index[k])
+		c := count[k]
+		locs := make([]*profile.Location, len(stk))
+		for i, addr := range stk {
+			loc := locMap[addr]
+			if loc == nil {
+				loc = &profile.Location{
+					ID:      uint64(len(locMap) + 1),
+					Address: uint64(addr - 1),
+				}
+				prof.Location = append(prof.Location, loc)
+				locMap[addr] = loc
+			}
+			locs[i] = loc
+		}
+		prof.Sample = append(prof.Sample, &profile.Sample{
+			Location: locs,
+			Value:    []int64{int64(c)},
+		})
+	}
+	return prof.Write(w)
 }
 
 // keysByCount sorts keys with higher counts first, breaking ties by key string order.
@@ -449,12 +474,6 @@ func printStackRecord(w io.Writer, stk []uintptr, allFrames bool) {
 
 // Interface to system profiles.
 
-type byInUseBytes []runtime.MemProfileRecord
-
-func (x byInUseBytes) Len() int           { return len(x) }
-func (x byInUseBytes) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
-func (x byInUseBytes) Less(i, j int) bool { return x[i].InUseBytes() > x[j].InUseBytes() }
-
 // WriteHeapProfile is shorthand for Lookup("heap").WriteTo(w, 0).
 // It is preserved for backwards compatibility.
 func WriteHeapProfile(w io.Writer) error {
@@ -490,15 +509,16 @@ func writeHeap(w io.Writer, debug int) error {
 		// Profile grew; try again.
 	}
 
-	sort.Sort(byInUseBytes(p))
+	if debug == 0 {
+		pp := protopprof.EncodeMemProfile(p, int64(runtime.MemProfileRate), time.Now())
+		return pp.Write(w)
+	}
+
+	sort.Slice(p, func(i, j int) bool { return p[i].InUseBytes() > p[j].InUseBytes() })
 
 	b := bufio.NewWriter(w)
-	var tw *tabwriter.Writer
-	w = b
-	if debug > 0 {
-		tw = tabwriter.NewWriter(w, 1, 8, 1, '\t', 0)
-		w = tw
-	}
+	tw := tabwriter.NewWriter(b, 1, 8, 1, '\t', 0)
+	w = tw
 
 	var total runtime.MemProfileRecord
 	for i := range p {
@@ -526,9 +546,7 @@ func writeHeap(w io.Writer, debug int) error {
 			fmt.Fprintf(w, " %#x", pc)
 		}
 		fmt.Fprintf(w, "\n")
-		if debug > 0 {
-			printStackRecord(w, r.Stack(), false)
-		}
+		printStackRecord(w, r.Stack(), false)
 	}
 
 	// Print memstats information too.
@@ -554,15 +572,15 @@ func writeHeap(w io.Writer, debug int) error {
 	fmt.Fprintf(w, "# MSpan = %d / %d\n", s.MSpanInuse, s.MSpanSys)
 	fmt.Fprintf(w, "# MCache = %d / %d\n", s.MCacheInuse, s.MCacheSys)
 	fmt.Fprintf(w, "# BuckHashSys = %d\n", s.BuckHashSys)
+	fmt.Fprintf(w, "# GCSys = %d\n", s.GCSys)
+	fmt.Fprintf(w, "# OtherSys = %d\n", s.OtherSys)
 
 	fmt.Fprintf(w, "# NextGC = %d\n", s.NextGC)
 	fmt.Fprintf(w, "# PauseNs = %d\n", s.PauseNs)
 	fmt.Fprintf(w, "# NumGC = %d\n", s.NumGC)
 	fmt.Fprintf(w, "# DebugGC = %v\n", s.DebugGC)
 
-	if tw != nil {
-		tw.Flush()
-	}
+	tw.Flush()
 	return b.Flush()
 }
 
@@ -686,49 +704,29 @@ func StartCPUProfile(w io.Writer) error {
 }
 
 func profileWriter(w io.Writer) {
+	startTime := time.Now()
+	// This will buffer the entire profile into buf and then
+	// translate it into a profile.Profile structure. This will
+	// create two copies of all the data in the profile in memory.
+	// TODO(matloob): Convert each chunk of the proto output and
+	// stream it out instead of converting the entire profile.
+	var buf bytes.Buffer
 	for {
 		data := runtime.CPUProfile()
 		if data == nil {
 			break
 		}
-		w.Write(data)
-	}
-
-	// We are emitting the legacy profiling format, which permits
-	// a memory map following the CPU samples. The memory map is
-	// simply a copy of the GNU/Linux /proc/self/maps file. The
-	// profiler uses the memory map to map PC values in shared
-	// libraries to a shared library in the filesystem, in order
-	// to report the correct function and, if the shared library
-	// has debug info, file/line. This is particularly useful for
-	// PIE (position independent executables) as on ELF systems a
-	// PIE is simply an executable shared library.
-	//
-	// Because the profiling format expects the memory map in
-	// GNU/Linux format, we only do this on GNU/Linux for now. To
-	// add support for profiling PIE on other ELF-based systems,
-	// it may be necessary to map the system-specific mapping
-	// information to the GNU/Linux format. For a reasonably
-	// portable C++ version, see the FillProcSelfMaps function in
-	// https://github.com/gperftools/gperftools/blob/master/src/base/sysinfo.cc
-	//
-	// The code that parses this mapping for the pprof tool is
-	// ParseMemoryMap in cmd/internal/pprof/legacy_profile.go, but
-	// don't change that code, as similar code exists in other
-	// (non-Go) pprof readers. Change this code so that that code works.
-	//
-	// We ignore errors reading or copying the memory map; the
-	// profile is likely usable without it, and we have no good way
-	// to report errors.
-	if runtime.GOOS == "linux" {
-		f, err := os.Open("/proc/self/maps")
-		if err == nil {
-			io.WriteString(w, "\nMAPPED_LIBRARIES:\n")
-			io.Copy(w, f)
-			f.Close()
-		}
+		buf.Write(data)
+	}
+
+	profile, err := protopprof.TranslateCPUProfile(buf.Bytes(), startTime)
+	if err != nil {
+		// The runtime should never produce an invalid or truncated profile.
+		// It drops records that can't fit into its log buffers.
+		panic(fmt.Errorf("could not translate binary profile to proto format: %v", err))
 	}
 
+	profile.Write(w)
 	cpu.done <- true
 }
 
@@ -747,18 +745,18 @@ func StopCPUProfile() {
 	<-cpu.done
 }
 
-type byCycles []runtime.BlockProfileRecord
-
-func (x byCycles) Len() int           { return len(x) }
-func (x byCycles) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
-func (x byCycles) Less(i, j int) bool { return x[i].Cycles > x[j].Cycles }
-
 // countBlock returns the number of records in the blocking profile.
 func countBlock() int {
 	n, _ := runtime.BlockProfile(nil)
 	return n
 }
 
+// countMutex returns the number of records in the mutex profile.
+func countMutex() int {
+	n, _ := runtime.MutexProfile(nil)
+	return n
+}
+
 // writeBlock writes the current blocking profile to w.
 func writeBlock(w io.Writer, debug int) error {
 	var p []runtime.BlockProfileRecord
@@ -772,7 +770,7 @@ func writeBlock(w io.Writer, debug int) error {
 		}
 	}
 
-	sort.Sort(byCycles(p))
+	sort.Slice(p, func(i, j int) bool { return p[i].Cycles > p[j].Cycles })
 
 	b := bufio.NewWriter(w)
 	var tw *tabwriter.Writer
@@ -802,4 +800,49 @@ func writeBlock(w io.Writer, debug int) error {
 	return b.Flush()
 }
 
+// writeMutex writes the current mutex profile to w.
+func writeMutex(w io.Writer, debug int) error {
+	// TODO(pjw): too much common code with writeBlock. FIX!
+	var p []runtime.BlockProfileRecord
+	n, ok := runtime.MutexProfile(nil)
+	for {
+		p = make([]runtime.BlockProfileRecord, n+50)
+		n, ok = runtime.MutexProfile(p)
+		if ok {
+			p = p[:n]
+			break
+		}
+	}
+
+	sort.Slice(p, func(i, j int) bool { return p[i].Cycles > p[j].Cycles })
+
+	b := bufio.NewWriter(w)
+	var tw *tabwriter.Writer
+	w = b
+	if debug > 0 {
+		tw = tabwriter.NewWriter(w, 1, 8, 1, '\t', 0)
+		w = tw
+	}
+
+	fmt.Fprintf(w, "--- mutex:\n")
+	fmt.Fprintf(w, "cycles/second=%v\n", runtime_cyclesPerSecond())
+	fmt.Fprintf(w, "sampling period=%d\n", runtime.SetMutexProfileFraction(-1))
+	for i := range p {
+		r := &p[i]
+		fmt.Fprintf(w, "%v %v @", r.Cycles, r.Count)
+		for _, pc := range r.Stack() {
+			fmt.Fprintf(w, " %#x", pc)
+		}
+		fmt.Fprint(w, "\n")
+		if debug > 0 {
+			printStackRecord(w, r.Stack(), true)
+		}
+	}
+
+	if tw != nil {
+		tw.Flush()
+	}
+	return b.Flush()
+}
+
 func runtime_cyclesPerSecond() int64
diff --git a/libgo/go/runtime/pprof/pprof_test.go b/libgo/go/runtime/pprof/pprof_test.go
index 1692d42..6034058 100644
--- a/libgo/go/runtime/pprof/pprof_test.go
+++ b/libgo/go/runtime/pprof/pprof_test.go
@@ -8,8 +8,12 @@ package pprof_test
 
 import (
 	"bytes"
+	"compress/gzip"
 	"fmt"
+	"internal/pprof/profile"
 	"internal/testenv"
+	"io"
+	"io/ioutil"
 	"math/big"
 	"os"
 	"os/exec"
@@ -20,7 +24,6 @@ import (
 	"sync"
 	"testing"
 	"time"
-	"unsafe"
 )
 
 func cpuHogger(f func(), dur time.Duration) {
@@ -87,40 +90,17 @@ func TestCPUProfileMultithreaded(t *testing.T) {
 }
 
 func parseProfile(t *testing.T, valBytes []byte, f func(uintptr, []uintptr)) {
-	// Convert []byte to []uintptr.
-	l := len(valBytes)
-	if i := bytes.Index(valBytes, []byte("\nMAPPED_LIBRARIES:\n")); i >= 0 {
-		l = i
-	}
-	l /= int(unsafe.Sizeof(uintptr(0)))
-	val := *(*[]uintptr)(unsafe.Pointer(&valBytes))
-	val = val[:l]
-
-	// 5 for the header, 3 for the trailer.
-	if l < 5+3 {
-		t.Logf("profile too short: %#x", val)
-		if badOS[runtime.GOOS] {
-			t.Skipf("ignoring failure on %s; see golang.org/issue/13841", runtime.GOOS)
-			return
-		}
-		t.FailNow()
-	}
-
-	hd, val, tl := val[:5], val[5:l-3], val[l-3:]
-	if hd[0] != 0 || hd[1] != 3 || hd[2] != 0 || hd[3] != 1e6/100 || hd[4] != 0 {
-		t.Fatalf("unexpected header %#x", hd)
-	}
-
-	if tl[0] != 0 || tl[1] != 1 || tl[2] != 0 {
-		t.Fatalf("malformed end-of-data marker %#x", tl)
+	p, err := profile.Parse(bytes.NewReader(valBytes))
+	if err != nil {
+		t.Fatal(err)
 	}
-
-	for len(val) > 0 {
-		if len(val) < 2 || val[0] < 1 || val[1] < 1 || uintptr(len(val)) < 2+val[1] {
-			t.Fatalf("malformed profile.  leftover: %#x", val)
+	for _, sample := range p.Sample {
+		count := uintptr(sample.Value[0])
+		stk := make([]uintptr, len(sample.Location))
+		for i := range sample.Location {
+			stk[i] = uintptr(sample.Location[i].Address)
 		}
-		f(val[0], val[2:2+val[1]])
-		val = val[2+val[1]:]
+		f(count, stk)
 	}
 }
 
@@ -225,7 +205,11 @@ func profileOk(t *testing.T, need []string, prof bytes.Buffer, duration time.Dur
 	}
 
 	// Check that we got a reasonable number of samples.
-	if ideal := uintptr(duration * 100 / time.Second); samples == 0 || samples < ideal/4 {
+	// We used to always require at least ideal/4 samples,
+	// but that is too hard to guarantee on a loaded system.
+	// Now we accept 10 or more samples, which we take to be
+	// enough to show that at least some profiling is occurring.
+	if ideal := uintptr(duration * 100 / time.Second); samples == 0 || (samples < ideal/4 && samples < 10) {
 		t.Logf("too few samples; got %d, want at least %d, ideally %d", samples, ideal/4, ideal)
 		ok = false
 	}
@@ -367,8 +351,49 @@ func TestMathBigDivide(t *testing.T) {
 	})
 }
 
+func slurpString(r io.Reader) string {
+	slurp, _ := ioutil.ReadAll(r)
+	return string(slurp)
+}
+
+func getLinuxKernelConfig() string {
+	if f, err := os.Open("/proc/config"); err == nil {
+		defer f.Close()
+		return slurpString(f)
+	}
+	if f, err := os.Open("/proc/config.gz"); err == nil {
+		defer f.Close()
+		r, err := gzip.NewReader(f)
+		if err != nil {
+			return ""
+		}
+		return slurpString(r)
+	}
+	if f, err := os.Open("/boot/config"); err == nil {
+		defer f.Close()
+		return slurpString(f)
+	}
+	uname, _ := exec.Command("uname", "-r").Output()
+	if len(uname) > 0 {
+		if f, err := os.Open("/boot/config-" + strings.TrimSpace(string(uname))); err == nil {
+			defer f.Close()
+			return slurpString(f)
+		}
+	}
+	return ""
+}
+
+func haveLinuxHiresTimers() bool {
+	config := getLinuxKernelConfig()
+	return strings.Contains(config, "CONFIG_HIGH_RES_TIMERS=y")
+}
+
 func TestStackBarrierProfiling(t *testing.T) {
-	if (runtime.GOOS == "linux" && runtime.GOARCH == "arm") || runtime.GOOS == "openbsd" || runtime.GOOS == "solaris" || runtime.GOOS == "dragonfly" || runtime.GOOS == "freebsd" {
+	if (runtime.GOOS == "linux" && runtime.GOARCH == "arm") ||
+		runtime.GOOS == "openbsd" ||
+		runtime.GOOS == "solaris" ||
+		runtime.GOOS == "dragonfly" ||
+		runtime.GOOS == "freebsd" {
 		// This test currently triggers a large number of
 		// usleep(100)s. These kernels/arches have poor
 		// resolution timers, so this gives up a whole
@@ -381,6 +406,12 @@ func TestStackBarrierProfiling(t *testing.T) {
 		return
 	}
 
+	if runtime.GOOS == "linux" && strings.HasPrefix(runtime.GOARCH, "mips") {
+		if !haveLinuxHiresTimers() {
+			t.Skipf("low resolution timers inhibit profiling signals (golang.org/issue/13405, golang.org/issue/17936)")
+		}
+	}
+
 	if !strings.Contains(os.Getenv("GODEBUG"), "gcstackbarrierall=1") {
 		// Re-execute this test with constant GC and stack
 		// barriers at every frame.
@@ -594,6 +625,50 @@ func blockCond() {
 	mu.Unlock()
 }
 
+func TestMutexProfile(t *testing.T) {
+	old := runtime.SetMutexProfileFraction(1)
+	defer runtime.SetMutexProfileFraction(old)
+	if old != 0 {
+		t.Fatalf("need MutexProfileRate 0, got %d", old)
+	}
+
+	blockMutex()
+
+	var w bytes.Buffer
+	Lookup("mutex").WriteTo(&w, 1)
+	prof := w.String()
+
+	if !strings.HasPrefix(prof, "--- mutex:\ncycles/second=") {
+		t.Errorf("Bad profile header:\n%v", prof)
+	}
+	prof = strings.Trim(prof, "\n")
+	lines := strings.Split(prof, "\n")
+	// gccgo adds an extra line in the stack trace, not sure why.
+	if len(lines) < 6 {
+		t.Errorf("expected 6 lines, got %d %q\n%s", len(lines), prof, prof)
+	}
+	if len(lines) < 6 {
+		return
+	}
+	// checking that the line is like "35258904 1 @ 0x48288d 0x47cd28 0x458931"
+	r2 := `^\d+ 1 @(?: 0x[[:xdigit:]]+)+`
+	//r2 := "^[0-9]+ 1 @ 0x[0-9a-f x]+$"
+	if ok, err := regexp.MatchString(r2, lines[3]); err != nil || !ok {
+		t.Errorf("%q didn't match %q", lines[3], r2)
+	}
+	r3 := "^#.*pprof_test.\\$nested.*$"
+	match := false
+	for _, i := range []int{5, 6} {
+		if ok, _ := regexp.MatchString(r3, lines[i]); ok {
+			match = true
+			break
+		}
+	}
+	if !match {
+		t.Errorf("neither %q nor %q matched %q", lines[5], lines[6], r3)
+	}
+}
+
 func func1(c chan int) { <-c }
 func func2(c chan int) { <-c }
 func func3(c chan int) { <-c }
@@ -621,13 +696,31 @@ func TestGoroutineCounts(t *testing.T) {
 	time.Sleep(10 * time.Millisecond) // let goroutines block on channel
 
 	var w bytes.Buffer
-	Lookup("goroutine").WriteTo(&w, 1)
+	goroutineProf := Lookup("goroutine")
+
+	// Check debug profile
+	goroutineProf.WriteTo(&w, 1)
 	prof := w.String()
 
 	if !containsInOrder(prof, "\n50 @ ", "\n40 @", "\n10 @", "\n1 @") {
 		t.Errorf("expected sorted goroutine counts:\n%s", prof)
 	}
 
+	// Check proto profile
+	w.Reset()
+	goroutineProf.WriteTo(&w, 0)
+	p, err := profile.Parse(&w)
+	if err != nil {
+		t.Errorf("error parsing protobuf profile: %v", err)
+	}
+	if err := p.CheckValid(); err != nil {
+		t.Errorf("protobuf profile is invalid: %v", err)
+	}
+	if !containsCounts(p, []int64{50, 40, 10, 1}) {
+		t.Errorf("expected count profile to contain goroutines with counts %v, got %v",
+			[]int64{50, 40, 10, 1}, p)
+	}
+
 	close(c)
 
 	time.Sleep(10 * time.Millisecond) // let goroutines exit
@@ -643,3 +736,23 @@ func containsInOrder(s string, all ...string) bool {
 	}
 	return true
 }
+
+func containsCounts(prof *profile.Profile, counts []int64) bool {
+	m := make(map[int64]int)
+	for _, c := range counts {
+		m[c]++
+	}
+	for _, s := range prof.Sample {
+		// The count is the single value in the sample
+		if len(s.Value) != 1 {
+			return false
+		}
+		m[s.Value[0]]--
+	}
+	for _, n := range m {
+		if n > 0 {
+			return false
+		}
+	}
+	return true
+}
diff --git a/libgo/go/runtime/print.go b/libgo/go/runtime/print.go
index 371cec5..4db726a 100644
--- a/libgo/go/runtime/print.go
+++ b/libgo/go/runtime/print.go
@@ -4,7 +4,10 @@
 
 package runtime
 
-import "unsafe"
+import (
+	"runtime/internal/atomic"
+	"unsafe"
+)
 
 // For gccgo, use go:linkname to rename compiler-called functions to
 // themselves, so that the compiler will export them.
@@ -41,6 +44,36 @@ func bytes(s string) (ret []byte) {
 	return
 }
 
+var (
+	// printBacklog is a circular buffer of messages written with the builtin
+	// print* functions, for use in postmortem analysis of core dumps.
+	printBacklog      [512]byte
+	printBacklogIndex int
+)
+
+// recordForPanic maintains a circular buffer of messages written by the
+// runtime leading up to a process crash, allowing the messages to be
+// extracted from a core dump.
+//
+// The text written during a process crash (following "panic" or "fatal
+// error") is not saved, since the goroutine stacks will generally be readable
+// from the runtime datastructures in the core file.
+func recordForPanic(b []byte) {
+	printlock()
+
+	if atomic.Load(&panicking) == 0 {
+		// Not actively crashing: maintain circular buffer of print output.
+		for i := 0; i < len(b); {
+			n := copy(printBacklog[printBacklogIndex:], b[i:])
+			i += n
+			printBacklogIndex += n
+			printBacklogIndex %= len(printBacklog)
+		}
+	}
+
+	printunlock()
+}
+
 var debuglock mutex
 
 // The compiler emits calls to printlock and printunlock around
@@ -75,6 +108,7 @@ func gwrite(b []byte) {
 	if len(b) == 0 {
 		return
 	}
+	recordForPanic(b)
 	gp := getg()
 	if gp == nil || gp.writebuf == nil {
 		writeErr(b)
diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go
index ef863c8..958b56e 100644
--- a/libgo/go/runtime/proc.go
+++ b/libgo/go/runtime/proc.go
@@ -266,7 +266,7 @@ func ready(gp *g, traceskip int, next bool) {
 	// status is Gwaiting or Gscanwaiting, make Grunnable and put on runq
 	casgstatus(gp, _Gwaiting, _Grunnable)
 	runqput(_g_.m.p.ptr(), gp, next)
-	if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 { // TODO: fast atomic
+	if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 {
 		wakep()
 	}
 	_g_.m.locks--
@@ -329,10 +329,15 @@ func helpgc(nproc int32) {
 // sched.stopwait to in order to request that all Gs permanently stop.
 const freezeStopWait = 0x7fffffff
 
+// freezing is set to non-zero if the runtime is trying to freeze the
+// world.
+var freezing uint32
+
 // Similar to stopTheWorld but best-effort and can be called several times.
 // There is no reverse operation, used during crashing.
 // This function must not lock any mutexes.
 func freezetheworld() {
+	atomic.Store(&freezing, 1)
 	// stopwait and preemption requests can be lost
 	// due to races with concurrently executing threads,
 	// so try several times
@@ -498,7 +503,7 @@ func casgstatus(gp *g, oldval, newval uint32) {
 // in panic or being exited, this may not reliably stop all
 // goroutines.
 func stopTheWorld(reason string) {
-	semacquire(&worldsema, false)
+	semacquire(&worldsema, 0)
 	getg().m.preemptoff = reason
 	systemstack(stopTheWorldWithSema)
 }
@@ -521,7 +526,7 @@ var worldsema uint32 = 1
 // preemption first and then should stopTheWorldWithSema on the system
 // stack:
 //
-//	semacquire(&worldsema, false)
+//	semacquire(&worldsema, 0)
 //	m.preemptoff = "reason"
 //	systemstack(stopTheWorldWithSema)
 //
@@ -590,15 +595,30 @@ func stopTheWorldWithSema() {
 			preemptall()
 		}
 	}
+
+	// sanity checks
+	bad := ""
 	if sched.stopwait != 0 {
-		throw("stopTheWorld: not stopped")
-	}
-	for i := 0; i < int(gomaxprocs); i++ {
-		p := allp[i]
-		if p.status != _Pgcstop {
-			throw("stopTheWorld: not stopped")
+		bad = "stopTheWorld: not stopped (stopwait != 0)"
+	} else {
+		for i := 0; i < int(gomaxprocs); i++ {
+			p := allp[i]
+			if p.status != _Pgcstop {
+				bad = "stopTheWorld: not stopped (status != _Pgcstop)"
+			}
 		}
 	}
+	if atomic.Load(&freezing) != 0 {
+		// Some other thread is panicking. This can cause the
+		// sanity checks above to fail if the panic happens in
+		// the signal handler on a stopped thread. Either way,
+		// we should halt this thread.
+		lock(&deadlock)
+		lock(&deadlock)
+	}
+	if bad != "" {
+		throw(bad)
+	}
 }
 
 func mhelpgc() {
@@ -897,6 +917,7 @@ func oneNewExtraM() {
 	mp := allocm(nil, true, &g0SP, &g0SPSize)
 	gp := malg(true, false, nil, nil)
 	gp.gcscanvalid = true // fresh G, so no dequeueRescan necessary
+	gp.gcscandone = true
 	gp.gcRescan = -1
 
 	// malg returns status as Gidle, change to Gdead before adding to allg
@@ -1061,7 +1082,7 @@ retry:
 
 // Hands off P from syscall or locked M.
 // Always runs without a P, so write barriers are not allowed.
-//go:nowritebarrier
+//go:nowritebarrierrec
 func handoffp(_p_ *p) {
 	// handoffp must start an M in any situation where
 	// findrunnable would return a G to run on _p_.
@@ -1154,7 +1175,7 @@ func stoplockedm() {
 
 // Schedules the locked m to run the locked gp.
 // May run during STW, so write barriers are not allowed.
-//go:nowritebarrier
+//go:nowritebarrierrec
 func startlockedm(gp *g) {
 	_g_ := getg()
 
@@ -1204,6 +1225,11 @@ func gcstopm() {
 // If inheritTime is true, gp inherits the remaining time in the
 // current time slice. Otherwise, it starts a new time slice.
 // Never returns.
+//
+// Write barriers are allowed because this is called immediately after
+// acquiring a P in several places.
+//
+//go:yeswritebarrierrec
 func execute(gp *g, inheritTime bool) {
 	_g_ := getg()
 
@@ -1302,7 +1328,7 @@ top:
 	// If number of spinning M's >= number of busy P's, block.
 	// This is necessary to prevent excessive CPU consumption
 	// when GOMAXPROCS>>1 but the program parallelism is low.
-	if !_g_.m.spinning && 2*atomic.Load(&sched.nmspinning) >= procs-atomic.Load(&sched.npidle) { // TODO: fast atomic
+	if !_g_.m.spinning && 2*atomic.Load(&sched.nmspinning) >= procs-atomic.Load(&sched.npidle) {
 		goto stop
 	}
 	if !_g_.m.spinning {
@@ -1310,7 +1336,7 @@ top:
 		atomic.Xadd(&sched.nmspinning, 1)
 	}
 	for i := 0; i < 4; i++ {
-		for enum := stealOrder.start(fastrand1()); !enum.done(); enum.next() {
+		for enum := stealOrder.start(fastrand()); !enum.done(); enum.next() {
 			if sched.gcwaiting != 0 {
 				goto top
 			}
@@ -1393,6 +1419,26 @@ stop:
 		}
 	}
 
+	// Check for idle-priority GC work again.
+	if gcBlackenEnabled != 0 && gcMarkWorkAvailable(nil) {
+		lock(&sched.lock)
+		_p_ = pidleget()
+		if _p_ != nil && _p_.gcBgMarkWorker == 0 {
+			pidleput(_p_)
+			_p_ = nil
+		}
+		unlock(&sched.lock)
+		if _p_ != nil {
+			acquirep(_p_)
+			if wasSpinning {
+				_g_.m.spinning = true
+				atomic.Xadd(&sched.nmspinning, 1)
+			}
+			// Go back to idle GC check.
+			goto stop
+		}
+	}
+
 	// poll network
 	if netpollinited() && atomic.Xchg64(&sched.lastpoll, 0) != 0 {
 		if _g_.m.p != 0 {
@@ -1423,6 +1469,27 @@ stop:
 	goto top
 }
 
+// pollWork returns true if there is non-background work this P could
+// be doing. This is a fairly lightweight check to be used for
+// background work loops, like idle GC. It checks a subset of the
+// conditions checked by the actual scheduler.
+func pollWork() bool {
+	if sched.runqsize != 0 {
+		return true
+	}
+	p := getg().m.p.ptr()
+	if !runqempty(p) {
+		return true
+	}
+	if netpollinited() && sched.lastpoll != 0 {
+		if gp := netpoll(false); gp != nil {
+			injectglist(gp)
+			return true
+		}
+	}
+	return false
+}
+
 func resetspinning() {
 	_g_ := getg()
 	if !_g_.m.spinning {
@@ -1562,8 +1629,8 @@ top:
 func dropg() {
 	_g_ := getg()
 
-	_g_.m.curg.m = nil
-	_g_.m.curg = nil
+	setMNoWB(&_g_.m.curg.m, nil)
+	setGNoWB(&_g_.m.curg, nil)
 }
 
 func beforefork() {
@@ -1887,7 +1954,13 @@ func procresize(nprocs int32) *p {
 }
 
 // Associate p and the current m.
+//
+// This function is allowed to have write barriers even if the caller
+// isn't because it immediately acquires _p_.
+//
+//go:yeswritebarrierrec
 func acquirep(_p_ *p) {
+	// Do the part that isn't allowed to have write barriers.
 	acquirep1(_p_)
 
 	// have p; write barriers now allowed
@@ -1899,8 +1972,11 @@ func acquirep(_p_ *p) {
 	}
 }
 
-// May run during STW, so write barriers are not allowed.
-//go:nowritebarrier
+// acquirep1 is the first step of acquirep, which actually acquires
+// _p_. This is broken out so we can disallow write barriers for this
+// part, since we don't yet have a P.
+//
+//go:nowritebarrierrec
 func acquirep1(_p_ *p) {
 	_g_ := getg()
 
@@ -2064,7 +2140,7 @@ func sysmon() {
 			delay = 10 * 1000
 		}
 		usleep(delay)
-		if debug.schedtrace <= 0 && (sched.gcwaiting != 0 || atomic.Load(&sched.npidle) == uint32(gomaxprocs)) { // TODO: fast atomic
+		if debug.schedtrace <= 0 && (sched.gcwaiting != 0 || atomic.Load(&sched.npidle) == uint32(gomaxprocs)) {
 			lock(&sched.lock)
 			if atomic.Load(&sched.gcwaiting) != 0 || atomic.Load(&sched.npidle) == uint32(gomaxprocs) {
 				atomic.Store(&sched.sysmonwait, 1)
@@ -2347,7 +2423,7 @@ func schedtrace(detailed bool) {
 // Put mp on midle list.
 // Sched must be locked.
 // May run during STW, so write barriers are not allowed.
-//go:nowritebarrier
+//go:nowritebarrierrec
 func mput(mp *m) {
 	mp.schedlink = sched.midle
 	sched.midle.set(mp)
@@ -2358,7 +2434,7 @@ func mput(mp *m) {
 // Try to get an m from midle list.
 // Sched must be locked.
 // May run during STW, so write barriers are not allowed.
-//go:nowritebarrier
+//go:nowritebarrierrec
 func mget() *m {
 	mp := sched.midle.ptr()
 	if mp != nil {
@@ -2371,7 +2447,7 @@ func mget() *m {
 // Put gp on the global runnable queue.
 // Sched must be locked.
 // May run during STW, so write barriers are not allowed.
-//go:nowritebarrier
+//go:nowritebarrierrec
 func globrunqput(gp *g) {
 	gp.schedlink = 0
 	if sched.runqtail != 0 {
@@ -2386,7 +2462,7 @@ func globrunqput(gp *g) {
 // Put gp at the head of the global runnable queue.
 // Sched must be locked.
 // May run during STW, so write barriers are not allowed.
-//go:nowritebarrier
+//go:nowritebarrierrec
 func globrunqputhead(gp *g) {
 	gp.schedlink = sched.runqhead
 	sched.runqhead.set(gp)
@@ -2446,7 +2522,7 @@ func globrunqget(_p_ *p, max int32) *g {
 // Put p to on _Pidle list.
 // Sched must be locked.
 // May run during STW, so write barriers are not allowed.
-//go:nowritebarrier
+//go:nowritebarrierrec
 func pidleput(_p_ *p) {
 	if !runqempty(_p_) {
 		throw("pidleput: P has non-empty run queue")
@@ -2459,7 +2535,7 @@ func pidleput(_p_ *p) {
 // Try get a p from _Pidle list.
 // Sched must be locked.
 // May run during STW, so write barriers are not allowed.
-//go:nowritebarrier
+//go:nowritebarrierrec
 func pidleget() *p {
 	_p_ := sched.pidle.ptr()
 	if _p_ != nil {
@@ -2503,7 +2579,7 @@ const randomizeScheduler = raceenabled
 // If the run queue is full, runnext puts g on the global queue.
 // Executed only by the owner P.
 func runqput(_p_ *p, gp *g, next bool) {
-	if randomizeScheduler && next && fastrand1()%2 == 0 {
+	if randomizeScheduler && next && fastrand()%2 == 0 {
 		next = false
 	}
 
@@ -2556,7 +2632,7 @@ func runqputslow(_p_ *p, gp *g, h, t uint32) bool {
 
 	if randomizeScheduler {
 		for i := uint32(1); i <= n; i++ {
-			j := fastrand1() % (i + 1)
+			j := fastrand() % (i + 1)
 			batch[i], batch[j] = batch[j], batch[i]
 		}
 	}
@@ -2681,7 +2757,11 @@ func runqsteal(_p_, p2 *p, stealRunNextG bool) *g {
 func setMaxThreads(in int) (out int) {
 	lock(&sched.lock)
 	out = int(sched.maxmcount)
-	sched.maxmcount = int32(in)
+	if in > 0x7fffffff { // MaxInt32
+		sched.maxmcount = 0x7fffffff
+	} else {
+		sched.maxmcount = int32(in)
+	}
 	checkmcount()
 	unlock(&sched.lock)
 	return
diff --git a/libgo/go/runtime/race/race_linux_test.go b/libgo/go/runtime/race/race_linux_test.go
deleted file mode 100644
index c00ce4d..0000000
--- a/libgo/go/runtime/race/race_linux_test.go
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build linux,race
-
-package race_test
-
-import (
-	"sync/atomic"
-	"syscall"
-	"testing"
-	"unsafe"
-)
-
-func TestAtomicMmap(t *testing.T) {
-	// Test that atomic operations work on "external" memory. Previously they crashed (#16206).
-	// Also do a sanity correctness check: under race detector atomic operations
-	// are implemented inside of race runtime.
-	mem, err := syscall.Mmap(-1, 0, 1<<20, syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_ANON|syscall.MAP_PRIVATE)
-	if err != nil {
-		t.Fatalf("mmap failed: %v", err)
-	}
-	defer syscall.Munmap(mem)
-	a := (*uint64)(unsafe.Pointer(&mem[0]))
-	if *a != 0 {
-		t.Fatalf("bad atomic value: %v, want 0", *a)
-	}
-	atomic.AddUint64(a, 1)
-	if *a != 1 {
-		t.Fatalf("bad atomic value: %v, want 1", *a)
-	}
-	atomic.AddUint64(a, 1)
-	if *a != 2 {
-		t.Fatalf("bad atomic value: %v, want 2", *a)
-	}
-}
diff --git a/libgo/go/runtime/race/race_windows_test.go b/libgo/go/runtime/race/race_windows_test.go
deleted file mode 100644
index 307a1ea..0000000
--- a/libgo/go/runtime/race/race_windows_test.go
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build windows,race
-
-package race_test
-
-import (
-	"sync/atomic"
-	"syscall"
-	"testing"
-	"unsafe"
-)
-
-func TestAtomicMmap(t *testing.T) {
-	// Test that atomic operations work on "external" memory. Previously they crashed (#16206).
-	// Also do a sanity correctness check: under race detector atomic operations
-	// are implemented inside of race runtime.
-	kernel32 := syscall.NewLazyDLL("kernel32.dll")
-	VirtualAlloc := kernel32.NewProc("VirtualAlloc")
-	VirtualFree := kernel32.NewProc("VirtualFree")
-	const (
-		MEM_COMMIT     = 0x00001000
-		MEM_RESERVE    = 0x00002000
-		MEM_RELEASE    = 0x8000
-		PAGE_READWRITE = 0x04
-	)
-	mem, _, err := syscall.Syscall6(VirtualAlloc.Addr(), 4, 0, 1<<20, MEM_COMMIT|MEM_RESERVE, PAGE_READWRITE, 0, 0)
-	if err != 0 {
-		t.Fatalf("VirtualAlloc failed: %v", err)
-	}
-	defer syscall.Syscall(VirtualFree.Addr(), 3, mem, 1<<20, MEM_RELEASE)
-	a := (*uint64)(unsafe.Pointer(mem))
-	if *a != 0 {
-		t.Fatalf("bad atomic value: %v, want 0", *a)
-	}
-	atomic.AddUint64(a, 1)
-	if *a != 1 {
-		t.Fatalf("bad atomic value: %v, want 1", *a)
-	}
-	atomic.AddUint64(a, 1)
-	if *a != 2 {
-		t.Fatalf("bad atomic value: %v, want 2", *a)
-	}
-}
diff --git a/libgo/go/runtime/race/testdata/issue12225_test.go b/libgo/go/runtime/race/testdata/issue12225_test.go
deleted file mode 100644
index 0494493..0000000
--- a/libgo/go/runtime/race/testdata/issue12225_test.go
+++ /dev/null
@@ -1,20 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package race_test
-
-import "unsafe"
-
-// golang.org/issue/12225
-// The test is that this compiles at all.
-
-//go:noinline
-func convert(s string) []byte {
-	return []byte(s)
-}
-
-func issue12225() {
-	println(*(*int)(unsafe.Pointer(&convert("")[0])))
-	println(*(*int)(unsafe.Pointer(&[]byte("")[0])))
-}
diff --git a/libgo/go/runtime/race/testdata/issue12664_test.go b/libgo/go/runtime/race/testdata/issue12664_test.go
deleted file mode 100644
index c9f790e..0000000
--- a/libgo/go/runtime/race/testdata/issue12664_test.go
+++ /dev/null
@@ -1,76 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package race_test
-
-import (
-	"fmt"
-	"testing"
-)
-
-var issue12664 = "hi"
-
-func TestRaceIssue12664(t *testing.T) {
-	c := make(chan struct{})
-	go func() {
-		issue12664 = "bye"
-		close(c)
-	}()
-	fmt.Println(issue12664)
-	<-c
-}
-
-type MyI interface {
-	foo()
-}
-
-type MyT int
-
-func (MyT) foo() {
-}
-
-var issue12664_2 MyT = 0
-
-func TestRaceIssue12664_2(t *testing.T) {
-	c := make(chan struct{})
-	go func() {
-		issue12664_2 = 1
-		close(c)
-	}()
-	func(x MyI) {
-		// Never true, but prevents inlining.
-		if x.(MyT) == -1 {
-			close(c)
-		}
-	}(issue12664_2)
-	<-c
-}
-
-var issue12664_3 MyT = 0
-
-func TestRaceIssue12664_3(t *testing.T) {
-	c := make(chan struct{})
-	go func() {
-		issue12664_3 = 1
-		close(c)
-	}()
-	var r MyT
-	var i interface{} = r
-	issue12664_3 = i.(MyT)
-	<-c
-}
-
-var issue12664_4 MyT = 0
-
-func TestRaceIssue12664_4(t *testing.T) {
-	c := make(chan struct{})
-	go func() {
-		issue12664_4 = 1
-		close(c)
-	}()
-	var r MyT
-	var i MyI = r
-	issue12664_4 = i.(MyT)
-	<-c
-}
diff --git a/libgo/go/runtime/race/testdata/issue13264_test.go b/libgo/go/runtime/race/testdata/issue13264_test.go
deleted file mode 100644
index d42290d..0000000
--- a/libgo/go/runtime/race/testdata/issue13264_test.go
+++ /dev/null
@@ -1,13 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package race_test
-
-// golang.org/issue/13264
-// The test is that this compiles at all.
-
-func issue13264() {
-	for ; ; []map[int]int{}[0][0] = 0 {
-	}
-}
diff --git a/libgo/go/runtime/rune.go b/libgo/go/runtime/rune.go
deleted file mode 100644
index 99c38e0..0000000
--- a/libgo/go/runtime/rune.go
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- * The authors of this software are Rob Pike and Ken Thompson.
- *              Copyright (c) 2002 by Lucent Technologies.
- *              Portions Copyright 2009 The Go Authors. All rights reserved.
- * Permission to use, copy, modify, and distribute this software for any
- * purpose without fee is hereby granted, provided that this entire notice
- * is included in all copies of any software which is or includes a copy
- * or modification of this software and in all copies of the supporting
- * documentation for such software.
- * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
- * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
- * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
- */
-
-/*
- * This code is copied, with slight editing due to type differences,
- * from a subset of ../lib9/utf/rune.c [which no longer exists]
- */
-
-package runtime
-
-const (
-	bit1 = 7
-	bitx = 6
-	bit2 = 5
-	bit3 = 4
-	bit4 = 3
-	bit5 = 2
-
-	t1 = ((1 << (bit1 + 1)) - 1) ^ 0xFF /* 0000 0000 */
-	tx = ((1 << (bitx + 1)) - 1) ^ 0xFF /* 1000 0000 */
-	t2 = ((1 << (bit2 + 1)) - 1) ^ 0xFF /* 1100 0000 */
-	t3 = ((1 << (bit3 + 1)) - 1) ^ 0xFF /* 1110 0000 */
-	t4 = ((1 << (bit4 + 1)) - 1) ^ 0xFF /* 1111 0000 */
-	t5 = ((1 << (bit5 + 1)) - 1) ^ 0xFF /* 1111 1000 */
-
-	rune1 = (1 << (bit1 + 0*bitx)) - 1 /* 0000 0000 0111 1111 */
-	rune2 = (1 << (bit2 + 1*bitx)) - 1 /* 0000 0111 1111 1111 */
-	rune3 = (1 << (bit3 + 2*bitx)) - 1 /* 1111 1111 1111 1111 */
-	rune4 = (1 << (bit4 + 3*bitx)) - 1 /* 0001 1111 1111 1111 1111 1111 */
-
-	maskx = (1 << bitx) - 1 /* 0011 1111 */
-	testx = maskx ^ 0xFF    /* 1100 0000 */
-
-	runeerror = 0xFFFD
-	runeself  = 0x80
-
-	surrogateMin = 0xD800
-	surrogateMax = 0xDFFF
-
-	bad = runeerror
-
-	runemax = 0x10FFFF /* maximum rune value */
-)
-
-/*
- * Modified by Wei-Hwa Huang, Google Inc., on 2004-09-24
- * This is a slower but "safe" version of the old chartorune
- * that works on strings that are not necessarily null-terminated.
- *
- * If you know for sure that your string is null-terminated,
- * chartorune will be a bit faster.
- *
- * It is guaranteed not to attempt to access "length"
- * past the incoming pointer.  This is to avoid
- * possible access violations.  If the string appears to be
- * well-formed but incomplete (i.e., to get the whole Rune
- * we'd need to read past str+length) then we'll set the Rune
- * to Bad and return 0.
- *
- * Note that if we have decoding problems for other
- * reasons, we return 1 instead of 0.
- */
-func charntorune(s string) (rune, int) {
-	/* When we're not allowed to read anything */
-	if len(s) <= 0 {
-		return bad, 1
-	}
-
-	/*
-	 * one character sequence (7-bit value)
-	 *	00000-0007F => T1
-	 */
-	c := s[0]
-	if c < tx {
-		return rune(c), 1
-	}
-
-	// If we can't read more than one character we must stop
-	if len(s) <= 1 {
-		return bad, 1
-	}
-
-	/*
-	 * two character sequence (11-bit value)
-	 *	0080-07FF => t2 tx
-	 */
-	c1 := s[1] ^ tx
-	if (c1 & testx) != 0 {
-		return bad, 1
-	}
-	if c < t3 {
-		if c < t2 {
-			return bad, 1
-		}
-		l := ((rune(c) << bitx) | rune(c1)) & rune2
-		if l <= rune1 {
-			return bad, 1
-		}
-		return l, 2
-	}
-
-	// If we can't read more than two characters we must stop
-	if len(s) <= 2 {
-		return bad, 1
-	}
-
-	/*
-	 * three character sequence (16-bit value)
-	 *	0800-FFFF => t3 tx tx
-	 */
-	c2 := s[2] ^ tx
-	if (c2 & testx) != 0 {
-		return bad, 1
-	}
-	if c < t4 {
-		l := ((((rune(c) << bitx) | rune(c1)) << bitx) | rune(c2)) & rune3
-		if l <= rune2 {
-			return bad, 1
-		}
-		if surrogateMin <= l && l <= surrogateMax {
-			return bad, 1
-		}
-		return l, 3
-	}
-
-	if len(s) <= 3 {
-		return bad, 1
-	}
-
-	/*
-	 * four character sequence (21-bit value)
-	 *	10000-1FFFFF => t4 tx tx tx
-	 */
-	c3 := s[3] ^ tx
-	if (c3 & testx) != 0 {
-		return bad, 1
-	}
-	if c < t5 {
-		l := ((((((rune(c) << bitx) | rune(c1)) << bitx) | rune(c2)) << bitx) | rune(c3)) & rune4
-		if l <= rune3 || l > runemax {
-			return bad, 1
-		}
-		return l, 4
-	}
-
-	// Support for 5-byte or longer UTF-8 would go here, but
-	// since we don't have that, we'll just return bad.
-	return bad, 1
-}
-
-// runetochar converts r to bytes and writes the result to str.
-// returns the number of bytes generated.
-func runetochar(str []byte, r rune) int {
-	/* runes are signed, so convert to unsigned for range check. */
-	c := uint32(r)
-	/*
-	 * one character sequence
-	 *	00000-0007F => 00-7F
-	 */
-	if c <= rune1 {
-		str[0] = byte(c)
-		return 1
-	}
-	/*
-	 * two character sequence
-	 *	0080-07FF => t2 tx
-	 */
-	if c <= rune2 {
-		str[0] = byte(t2 | (c >> (1 * bitx)))
-		str[1] = byte(tx | (c & maskx))
-		return 2
-	}
-
-	/*
-	 * If the rune is out of range or a surrogate half, convert it to the error rune.
-	 * Do this test here because the error rune encodes to three bytes.
-	 * Doing it earlier would duplicate work, since an out of range
-	 * rune wouldn't have fit in one or two bytes.
-	 */
-	if c > runemax {
-		c = runeerror
-	}
-	if surrogateMin <= c && c <= surrogateMax {
-		c = runeerror
-	}
-
-	/*
-	 * three character sequence
-	 *	0800-FFFF => t3 tx tx
-	 */
-	if c <= rune3 {
-		str[0] = byte(t3 | (c >> (2 * bitx)))
-		str[1] = byte(tx | ((c >> (1 * bitx)) & maskx))
-		str[2] = byte(tx | (c & maskx))
-		return 3
-	}
-
-	/*
-	 * four character sequence (21-bit value)
-	 *     10000-1FFFFF => t4 tx tx tx
-	 */
-	str[0] = byte(t4 | (c >> (3 * bitx)))
-	str[1] = byte(tx | ((c >> (2 * bitx)) & maskx))
-	str[2] = byte(tx | ((c >> (1 * bitx)) & maskx))
-	str[3] = byte(tx | (c & maskx))
-	return 4
-}
diff --git a/libgo/go/runtime/runtime-lldb_test.go b/libgo/go/runtime/runtime-lldb_test.go
index 4c379b9..98bc906 100644
--- a/libgo/go/runtime/runtime-lldb_test.go
+++ b/libgo/go/runtime/runtime-lldb_test.go
@@ -158,7 +158,7 @@ func TestLldbPython(t *testing.T) {
 		t.Fatalf("failed to create file: %v", err)
 	}
 
-	cmd := exec.Command("go", "build", "-gcflags", "-N -l", "-o", "a.exe")
+	cmd := exec.Command(testenv.GoToolPath(t), "build", "-gcflags", "-N -l", "-o", "a.exe")
 	cmd.Dir = dir
 	out, err := cmd.CombinedOutput()
 	if err != nil {
@@ -198,7 +198,7 @@ func TestDwarfAranges(t *testing.T) {
 		t.Fatalf("failed to create file: %v", err)
 	}
 
-	cmd := exec.Command("go", "build", "-o", "a.exe")
+	cmd := exec.Command(testenv.GoToolPath(t), "build", "-o", "a.exe")
 	cmd.Dir = dir
 	out, err := cmd.CombinedOutput()
 	if err != nil {
diff --git a/libgo/go/runtime/runtime.go b/libgo/go/runtime/runtime.go
index 23601e1..e63130b 100644
--- a/libgo/go/runtime/runtime.go
+++ b/libgo/go/runtime/runtime.go
@@ -58,6 +58,9 @@ var argslice []string
 //go:linkname syscall_runtime_envs syscall.runtime_envs
 func syscall_runtime_envs() []string { return append([]string{}, envs...) }
 
+//go:linkname syscall_Getpagesize syscall.Getpagesize
+func syscall_Getpagesize() int { return int(physPageSize) }
+
 //go:linkname os_runtime_args os.runtime_args
 func os_runtime_args() []string { return append([]string{}, argslice...) }
 
diff --git a/libgo/go/runtime/runtime1.go b/libgo/go/runtime/runtime1.go
index dea19da..a41cfc8 100644
--- a/libgo/go/runtime/runtime1.go
+++ b/libgo/go/runtime/runtime1.go
@@ -80,7 +80,6 @@ func goargs() {
 	if GOOS == "windows" {
 		return
 	}
-
 	argslice = make([]string, argc)
 	for i := int32(0); i < argc; i++ {
 		argslice[i] = gostringnocopy(argv_index(argv, i))
@@ -345,6 +344,7 @@ type debugVars struct {
 	gcshrinkstackoff  int32
 	gcstackbarrieroff int32
 	gcstackbarrierall int32
+	gcrescanstacks    int32
 	gcstoptheworld    int32
 	gctrace           int32
 	invalidptr        int32
@@ -370,6 +370,7 @@ var dbgvars = []dbgVar{
 	{"gcshrinkstackoff", &debug.gcshrinkstackoff},
 	{"gcstackbarrieroff", &debug.gcstackbarrieroff},
 	{"gcstackbarrierall", &debug.gcstackbarrierall},
+	{"gcrescanstacks", &debug.gcrescanstacks},
 	{"gcstoptheworld", &debug.gcstoptheworld},
 	{"gctrace", &debug.gctrace},
 	{"invalidptr", &debug.invalidptr},
@@ -403,11 +404,15 @@ func parsedebugvars() {
 		// is int, not int32, and should only be updated
 		// if specified in GODEBUG.
 		if key == "memprofilerate" {
-			MemProfileRate = atoi(value)
+			if n, ok := atoi(value); ok {
+				MemProfileRate = n
+			}
 		} else {
 			for _, v := range dbgvars {
 				if v.name == key {
-					*v.value = int32(atoi(value))
+					if n, ok := atoi32(value); ok {
+						*v.value = n
+					}
 				}
 			}
 		}
@@ -416,6 +421,13 @@ func parsedebugvars() {
 	setTraceback(gogetenv("GOTRACEBACK"))
 	traceback_env = traceback_cache
 
+	if debug.gcrescanstacks == 0 {
+		// Without rescanning, there's no need for stack
+		// barriers.
+		debug.gcstackbarrieroff = 1
+		debug.gcstackbarrierall = 0
+	}
+
 	// if debug.gcstackbarrierall > 0 {
 	// 	firstStackBarrierOffset = 0
 	// }
@@ -446,7 +458,10 @@ func setTraceback(level string) {
 	case "crash":
 		t = 2<<tracebackShift | tracebackAll | tracebackCrash
 	default:
-		t = uint32(atoi(level))<<tracebackShift | tracebackAll
+		t = tracebackAll
+		if n, ok := atoi(level); ok && n == int(uint32(n)) {
+			t |= uint32(n) << tracebackShift
+		}
 	}
 	// when C owns the process, simply exit'ing the process on fatal errors
 	// and panics is surprising. Be louder and abort instead.
diff --git a/libgo/go/runtime/runtime2.go b/libgo/go/runtime/runtime2.go
index 7dc2743..195d65b 100644
--- a/libgo/go/runtime/runtime2.go
+++ b/libgo/go/runtime/runtime2.go
@@ -209,6 +209,14 @@ func (gp *guintptr) cas(old, new guintptr) bool {
 	return atomic.Casuintptr((*uintptr)(unsafe.Pointer(gp)), uintptr(old), uintptr(new))
 }
 
+// setGNoWB performs *gp = new without a write barrier.
+// For times when it's impractical to use a guintptr.
+//go:nosplit
+//go:nowritebarrier
+func setGNoWB(gp **g, new *g) {
+	(*guintptr)(unsafe.Pointer(gp)).set(new)
+}
+
 type puintptr uintptr
 
 //go:nosplit
@@ -225,6 +233,14 @@ func (mp muintptr) ptr() *m { return (*m)(unsafe.Pointer(mp)) }
 //go:nosplit
 func (mp *muintptr) set(m *m) { *mp = muintptr(unsafe.Pointer(m)) }
 
+// setMNoWB performs *mp = new without a write barrier.
+// For times when it's impractical to use an muintptr.
+//go:nosplit
+//go:nowritebarrier
+func setMNoWB(mp **m, new *m) {
+	(*muintptr)(unsafe.Pointer(mp)).set(new)
+}
+
 // sudog represents a g in a wait list, such as for sending/receiving
 // on a channel.
 //
@@ -249,6 +265,7 @@ type sudog struct {
 	// The following fields are never accessed concurrently.
 	// waitlink is only accessed by g.
 
+	acquiretime int64
 	releasetime int64
 	ticket      uint32
 	waitlink    *sudog // g.waiting list
@@ -538,7 +555,7 @@ type p struct {
 
 	runSafePointFn uint32 // if 1, run sched.safePointFn at next safe point
 
-	pad [64]byte
+	pad [sys.CacheLineSize]byte
 }
 
 const (
@@ -626,29 +643,6 @@ const (
 	_SigUnblock              // unblocked in minit
 )
 
-/*
-gccgo does not use this.
-
-// Layout of in-memory per-function information prepared by linker
-// See https://golang.org/s/go12symtab.
-// Keep in sync with linker
-// and with package debug/gosym and with symtab.go in package runtime.
-type _func struct {
-	entry   uintptr // start pc
-	nameoff int32   // function name
-
-	args int32 // in/out args size
-	_    int32 // previously legacy frame size; kept for layout compatibility
-
-	pcsp      int32
-	pcfile    int32
-	pcln      int32
-	npcdata   int32
-	nfuncdata int32
-}
-
-*/
-
 // Lock-free stack node.
 // // Also known to export_test.go.
 type lfnode struct {
@@ -766,15 +760,17 @@ var (
 	newprocs   int32
 
 	// Information about what cpu features are available.
-	// Set on startup.
+	// Set on startup in asm_{x86,amd64}.s.
 	cpuid_ecx   uint32
 	support_aes bool
 
-//	cpuid_edx         uint32
-//	cpuid_ebx7        uint32
-//	lfenceBeforeRdtsc bool
-//	support_avx       bool
-//	support_avx2      bool
+	// cpuid_edx         uint32
+	// cpuid_ebx7        uint32
+	// lfenceBeforeRdtsc bool
+	// support_avx       bool
+	// support_avx2      bool
+	// support_bmi1      bool
+	// support_bmi2      bool
 
 //	goarm                uint8 // set by cmd/link on arm systems
 //	framepointer_enabled bool  // set by cmd/link
diff --git a/libgo/go/runtime/runtime_mmap_test.go b/libgo/go/runtime/runtime_mmap_test.go
index 97b44e2..0141e81 100644
--- a/libgo/go/runtime/runtime_mmap_test.go
+++ b/libgo/go/runtime/runtime_mmap_test.go
@@ -2,21 +2,19 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build ignore
-
 package runtime_test
 
 import (
 	"runtime"
-	"runtime/internal/sys"
 	"testing"
+	"unsafe"
 )
 
 // Test that the error value returned by mmap is positive, as that is
 // what the code in mem_bsd.go, mem_darwin.go, and mem_linux.go expects.
 // See the uses of ENOMEM in sysMap in those files.
 func TestMmapErrorSign(t *testing.T) {
-	p := runtime.Mmap(nil, ^uintptr(0)&^(sys.PhysPageSize-1), 0, runtime.MAP_ANON|runtime.MAP_PRIVATE, -1, 0)
+	p := runtime.Mmap(nil, ^uintptr(0)&^(runtime.GetPhysPageSize()-1), 0, runtime.MAP_ANON|runtime.MAP_PRIVATE, -1, 0)
 
 	// The runtime.mmap function is nosplit, but t.Errorf is not.
 	// Reset the pointer so that we don't get an "invalid stack
@@ -24,7 +22,32 @@ func TestMmapErrorSign(t *testing.T) {
 	v := uintptr(p)
 	p = nil
 
-	if v != runtime.ENOMEM {
-		t.Errorf("mmap = %v, want %v", v, runtime.ENOMEM)
+	err := runtime.Errno()
+	if v != ^uintptr(0) || err != runtime.ENOMEM {
+		t.Errorf("mmap = %v, %v, want %v", v, err, runtime.ENOMEM)
+	}
+}
+
+func TestPhysPageSize(t *testing.T) {
+	// Mmap fails if the address is not page aligned, so we can
+	// use this to test if the page size is the true page size.
+	ps := runtime.GetPhysPageSize()
+
+	// Get a region of memory to play with. This should be page-aligned.
+	b := uintptr(runtime.Mmap(nil, 2*ps, 0, runtime.MAP_ANON|runtime.MAP_PRIVATE, -1, 0))
+	if b == ^uintptr(0) {
+		t.Fatalf("Mmap: %v %v", b, runtime.Errno())
+	}
+
+	// Mmap should fail at a half page into the buffer.
+	err := uintptr(runtime.Mmap(unsafe.Pointer(uintptr(b)+ps/2), ps, 0, runtime.MAP_ANON|runtime.MAP_PRIVATE|runtime.MAP_FIXED, -1, 0))
+	if err != ^uintptr(0) {
+		t.Errorf("Mmap should have failed with half-page alignment %d, but succeeded: %v", ps/2, err)
+	}
+
+	// Mmap should succeed at a full page into the buffer.
+	err = uintptr(runtime.Mmap(unsafe.Pointer(uintptr(b)+ps), ps, 0, runtime.MAP_ANON|runtime.MAP_PRIVATE|runtime.MAP_FIXED, -1, 0))
+	if err == ^uintptr(0) {
+		t.Errorf("Mmap at full-page alignment %d failed: %v %v", ps, err, runtime.Errno())
 	}
 }
diff --git a/libgo/go/runtime/runtime_test.go b/libgo/go/runtime/runtime_test.go
index 2ea2dc2..1f403a1 100644
--- a/libgo/go/runtime/runtime_test.go
+++ b/libgo/go/runtime/runtime_test.go
@@ -8,6 +8,7 @@ import (
 	"io"
 	. "runtime"
 	"runtime/debug"
+	"strings"
 	"testing"
 	"unsafe"
 )
@@ -331,3 +332,11 @@ func TestGoroutineProfileTrivial(t *testing.T) {
 		}
 	}
 }
+
+func TestVersion(t *testing.T) {
+	// Test that version does not contain \r or \n.
+	vers := Version()
+	if strings.Contains(vers, "\r") || strings.Contains(vers, "\n") {
+		t.Fatalf("cr/nl in version: %q", vers)
+	}
+}
diff --git a/libgo/go/runtime/select.go b/libgo/go/runtime/select.go
index 08446a1..62c4049 100644
--- a/libgo/go/runtime/select.go
+++ b/libgo/go/runtime/select.go
@@ -81,7 +81,7 @@ func newselect(sel *hselect, selsize int64, size int32) {
 	sel.pollorder = (*uint16)(add(unsafe.Pointer(sel.lockorder), uintptr(size)*unsafe.Sizeof(*hselect{}.lockorder)))
 
 	// For gccgo the temporary variable will not have been zeroed.
-	memclr(unsafe.Pointer(&sel.scase), uintptr(size)*unsafe.Sizeof(hselect{}.scase[0])+uintptr(size)*unsafe.Sizeof(*hselect{}.lockorder)+uintptr(size)*unsafe.Sizeof(*hselect{}.pollorder))
+	memclrNoHeapPointers(unsafe.Pointer(&sel.scase), uintptr(size)*unsafe.Sizeof(hselect{}.scase[0])+uintptr(size)*unsafe.Sizeof(*hselect{}.lockorder)+uintptr(size)*unsafe.Sizeof(*hselect{}.pollorder))
 
 	if debugSelect {
 		print("newselect s=", sel, " size=", size, "\n")
@@ -279,7 +279,7 @@ func selectgoImpl(sel *hselect) (uintptr, uint16) {
 	pollslice := slice{unsafe.Pointer(sel.pollorder), int(sel.ncase), int(sel.ncase)}
 	pollorder := *(*[]uint16)(unsafe.Pointer(&pollslice))
 	for i := 1; i < int(sel.ncase); i++ {
-		j := int(fastrand1()) % (i + 1)
+		j := int(fastrand()) % (i + 1)
 		pollorder[i] = pollorder[j]
 		pollorder[j] = uint16(i)
 	}
@@ -431,8 +431,62 @@ loop:
 	gp.param = nil
 	gopark(selparkcommit, nil, "select", traceEvGoBlockSelect, 2)
 
-	// someone woke us up
-	sellock(scases, lockorder)
+	// While we were asleep, some goroutine came along and completed
+	// one of the cases in the select and woke us up (called ready).
+	// As part of that process, the goroutine did a cas on done above
+	// (aka *sg.selectdone for all queued sg) to win the right to
+	// complete the select. Now done = 1.
+	//
+	// If we copy (grow) our own stack, we will update the
+	// selectdone pointers inside the gp.waiting sudog list to point
+	// at the new stack. Another goroutine attempting to
+	// complete one of our (still linked in) select cases might
+	// see the new selectdone pointer (pointing at the new stack)
+	// before the new stack has real data; if the new stack has done = 0
+	// (before the old values are copied over), the goroutine might
+	// do a cas via sg.selectdone and incorrectly believe that it has
+	// won the right to complete the select, executing a second
+	// communication and attempting to wake us (call ready) again.
+	//
+	// Then things break.
+	//
+	// The best break is that the goroutine doing ready sees the
+	// _Gcopystack status and throws, as in #17007.
+	// A worse break would be for us to continue on, start running real code,
+	// block in a semaphore acquisition (sema.go), and have the other
+	// goroutine wake us up without having really acquired the semaphore.
+	// That would result in the goroutine spuriously running and then
+	// queue up another spurious wakeup when the semaphore really is ready.
+	// In general the situation can cascade until something notices the
+	// problem and causes a crash.
+	//
+	// A stack shrink does not have this problem, because it locks
+	// all the channels that are involved first, blocking out the
+	// possibility of a cas on selectdone.
+	//
+	// A stack growth before gopark above does not have this
+	// problem, because we hold those channel locks (released by
+	// selparkcommit).
+	//
+	// A stack growth after sellock below does not have this
+	// problem, because again we hold those channel locks.
+	//
+	// The only problem is a stack growth during sellock.
+	// To keep that from happening, run sellock on the system stack.
+	//
+	// It might be that we could avoid this if copystack copied the
+	// stack before calling adjustsudogs. In that case,
+	// syncadjustsudogs would need to recopy the tiny part that
+	// it copies today, resulting in a little bit of extra copying.
+	//
+	// An even better fix, not for the week before a release candidate,
+	// would be to put space in every sudog and make selectdone
+	// point at (say) the space in the first sudog.
+
+	systemstack(func() {
+		sellock(scases, lockorder)
+	})
+
 	sg = (*sudog)(gp.param)
 	gp.param = nil
 
@@ -473,8 +527,15 @@ loop:
 	}
 
 	if cas == nil {
-		// This can happen if we were woken up by a close().
-		// TODO: figure that out explicitly so we don't need this loop.
+		// We can wake up with gp.param == nil (so cas == nil)
+		// when a channel involved in the select has been closed.
+		// It is easiest to loop and re-run the operation;
+		// we'll see that it's now closed.
+		// Maybe some day we can signal the close explicitly,
+		// but we'd have to distinguish close-on-reader from close-on-writer.
+		// It's easiest not to duplicate the code and just recheck above.
+		// We know that something closed, and things never un-close,
+		// so we won't block again.
 		goto loop
 	}
 
@@ -527,7 +588,7 @@ bufrecv:
 	if cas.elem != nil {
 		typedmemmove(c.elemtype, cas.elem, qp)
 	}
-	memclr(qp, uintptr(c.elemsize))
+	typedmemclr(c.elemtype, qp)
 	c.recvx++
 	if c.recvx == c.dataqsiz {
 		c.recvx = 0
@@ -573,7 +634,7 @@ rclose:
 		*cas.receivedp = false
 	}
 	if cas.elem != nil {
-		memclr(cas.elem, uintptr(c.elemsize))
+		typedmemclr(c.elemtype, cas.elem)
 	}
 	if raceenabled {
 		raceacquire(unsafe.Pointer(c))
diff --git a/libgo/go/runtime/sema.go b/libgo/go/runtime/sema.go
index 855d73e..576a1fb 100644
--- a/libgo/go/runtime/sema.go
+++ b/libgo/go/runtime/sema.go
@@ -19,10 +19,6 @@
 
 package runtime
 
-// Export temporarily for gccgo's C code to call:
-//go:linkname semacquire runtime.semacquire
-//go:linkname semrelease runtime.semrelease
-
 import (
 	"runtime/internal/atomic"
 	"runtime/internal/sys"
@@ -48,12 +44,12 @@ var semtable [semTabSize]struct {
 
 //go:linkname sync_runtime_Semacquire sync.runtime_Semacquire
 func sync_runtime_Semacquire(addr *uint32) {
-	semacquire(addr, true)
+	semacquire(addr, semaBlockProfile)
 }
 
 //go:linkname net_runtime_Semacquire net.runtime_Semacquire
 func net_runtime_Semacquire(addr *uint32) {
-	semacquire(addr, true)
+	semacquire(addr, semaBlockProfile)
 }
 
 //go:linkname sync_runtime_Semrelease sync.runtime_Semrelease
@@ -61,6 +57,11 @@ func sync_runtime_Semrelease(addr *uint32) {
 	semrelease(addr)
 }
 
+//go:linkname sync_runtime_SemacquireMutex sync.runtime_SemacquireMutex
+func sync_runtime_SemacquireMutex(addr *uint32) {
+	semacquire(addr, semaBlockProfile|semaMutexProfile)
+}
+
 //go:linkname net_runtime_Semrelease net.runtime_Semrelease
 func net_runtime_Semrelease(addr *uint32) {
 	semrelease(addr)
@@ -73,8 +74,15 @@ func readyWithTime(s *sudog, traceskip int) {
 	goready(s.g, traceskip)
 }
 
+type semaProfileFlags int
+
+const (
+	semaBlockProfile semaProfileFlags = 1 << iota
+	semaMutexProfile
+)
+
 // Called from runtime.
-func semacquire(addr *uint32, profile bool) {
+func semacquire(addr *uint32, profile semaProfileFlags) {
 	gp := getg()
 	if gp != gp.m.curg {
 		throw("semacquire not on the G stack")
@@ -95,10 +103,17 @@ func semacquire(addr *uint32, profile bool) {
 	root := semroot(addr)
 	t0 := int64(0)
 	s.releasetime = 0
-	if profile && blockprofilerate > 0 {
+	s.acquiretime = 0
+	if profile&semaBlockProfile != 0 && blockprofilerate > 0 {
 		t0 = cputicks()
 		s.releasetime = -1
 	}
+	if profile&semaMutexProfile != 0 && mutexprofilerate > 0 {
+		if t0 == 0 {
+			t0 = cputicks()
+		}
+		s.acquiretime = t0
+	}
 	for {
 		lock(&root.lock)
 		// Add ourselves to nwait to disable "easy case" in semrelease.
@@ -150,8 +165,19 @@ func semrelease(addr *uint32) {
 			break
 		}
 	}
-	unlock(&root.lock)
 	if s != nil {
+		if s.acquiretime != 0 {
+			t0 := cputicks()
+			for x := root.head; x != nil; x = x.next {
+				if x.elem == unsafe.Pointer(addr) {
+					x.acquiretime = t0
+				}
+			}
+			mutexevent(t0-s.acquiretime, 3)
+		}
+	}
+	unlock(&root.lock)
+	if s != nil { // May be slow, so unlock first
 		readyWithTime(s, 5)
 	}
 }
diff --git a/libgo/go/runtime/signal1_unix.go b/libgo/go/runtime/signal1_unix.go
deleted file mode 100644
index 181aebe..0000000
--- a/libgo/go/runtime/signal1_unix.go
+++ /dev/null
@@ -1,337 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build darwin dragonfly freebsd linux netbsd openbsd solaris
-
-package runtime
-
-import (
-	_ "unsafe" // For go:linkname.
-)
-
-// Temporary for gccgo's C code to call:
-//go:linkname initsig runtime.initsig
-//go:linkname crash runtime.crash
-//go:linkname resetcpuprofiler runtime.resetcpuprofiler
-
-//extern setitimer
-func setitimer(which int32, new *_itimerval, old *_itimerval) int32
-
-type sigTabT struct {
-	flags int32
-	name  string
-}
-
-const (
-	_SIG_DFL uintptr = 0
-	_SIG_IGN uintptr = 1
-)
-
-// Stores the signal handlers registered before Go installed its own.
-// These signal handlers will be invoked in cases where Go doesn't want to
-// handle a particular signal (e.g., signal occurred on a non-Go thread).
-// See sigfwdgo() for more information on when the signals are forwarded.
-//
-// Signal forwarding is currently available only on Darwin and Linux.
-var fwdSig [_NSIG]uintptr
-
-// sigmask represents a general signal mask compatible with the GOOS
-// specific sigset types: the signal numbered x is represented by bit x-1
-// to match the representation expected by sigprocmask.
-type sigmask [(_NSIG + 31) / 32]uint32
-
-// channels for synchronizing signal mask updates with the signal mask
-// thread
-var (
-	disableSigChan  chan uint32
-	enableSigChan   chan uint32
-	maskUpdatedChan chan struct{}
-)
-
-func init() {
-	// _NSIG is the number of signals on this operating system.
-	// sigtable should describe what to do for all the possible signals.
-	if len(sigtable) != _NSIG {
-		print("runtime: len(sigtable)=", len(sigtable), " _NSIG=", _NSIG, "\n")
-		throw("bad sigtable len")
-	}
-}
-
-var signalsOK bool
-
-// Initialize signals.
-// Called by libpreinit so runtime may not be initialized.
-//go:nosplit
-//go:nowritebarrierrec
-func initsig(preinit bool) {
-	if preinit {
-		// preinit is only passed as true if isarchive should be true.
-		isarchive = true
-	}
-
-	if !preinit {
-		// It's now OK for signal handlers to run.
-		signalsOK = true
-	}
-
-	// For c-archive/c-shared this is called by libpreinit with
-	// preinit == true.
-	if (isarchive || islibrary) && !preinit {
-		return
-	}
-
-	for i := int32(0); i < _NSIG; i++ {
-		t := &sigtable[i]
-		if t.flags == 0 || t.flags&_SigDefault != 0 {
-			continue
-		}
-		fwdSig[i] = getsig(i)
-
-		if !sigInstallGoHandler(i) {
-			// Even if we are not installing a signal handler,
-			// set SA_ONSTACK if necessary.
-			if fwdSig[i] != _SIG_DFL && fwdSig[i] != _SIG_IGN {
-				setsigstack(i)
-			}
-			continue
-		}
-
-		t.flags |= _SigHandling
-		setsig(i, getSigtramp(), true)
-	}
-}
-
-//go:nosplit
-//go:nowritebarrierrec
-func sigInstallGoHandler(sig int32) bool {
-	// For some signals, we respect an inherited SIG_IGN handler
-	// rather than insist on installing our own default handler.
-	// Even these signals can be fetched using the os/signal package.
-	switch sig {
-	case _SIGHUP, _SIGINT:
-		if fwdSig[sig] == _SIG_IGN {
-			return false
-		}
-	}
-
-	t := &sigtable[sig]
-	if t.flags&_SigSetStack != 0 {
-		return false
-	}
-
-	// When built using c-archive or c-shared, only install signal
-	// handlers for synchronous signals.
-	if (isarchive || islibrary) && t.flags&_SigPanic == 0 {
-		return false
-	}
-
-	return true
-}
-
-func sigenable(sig uint32) {
-	if sig >= uint32(len(sigtable)) {
-		return
-	}
-
-	t := &sigtable[sig]
-	if t.flags&_SigNotify != 0 {
-		ensureSigM()
-		enableSigChan <- sig
-		<-maskUpdatedChan
-		if t.flags&_SigHandling == 0 {
-			t.flags |= _SigHandling
-			fwdSig[sig] = getsig(int32(sig))
-			setsig(int32(sig), getSigtramp(), true)
-		}
-	}
-}
-
-func sigdisable(sig uint32) {
-	if sig >= uint32(len(sigtable)) {
-		return
-	}
-
-	t := &sigtable[sig]
-	if t.flags&_SigNotify != 0 {
-		ensureSigM()
-		disableSigChan <- sig
-		<-maskUpdatedChan
-
-		// If initsig does not install a signal handler for a
-		// signal, then to go back to the state before Notify
-		// we should remove the one we installed.
-		if !sigInstallGoHandler(int32(sig)) {
-			t.flags &^= _SigHandling
-			setsig(int32(sig), fwdSig[sig], true)
-		}
-	}
-}
-
-func sigignore(sig uint32) {
-	if sig >= uint32(len(sigtable)) {
-		return
-	}
-
-	t := &sigtable[sig]
-	if t.flags&_SigNotify != 0 {
-		t.flags &^= _SigHandling
-		setsig(int32(sig), _SIG_IGN, true)
-	}
-}
-
-func resetcpuprofiler(hz int32) {
-	var it _itimerval
-	if hz == 0 {
-		setitimer(_ITIMER_PROF, &it, nil)
-	} else {
-		it.it_interval.tv_sec = 0
-		it.it_interval.set_usec(1000000 / hz)
-		it.it_value = it.it_interval
-		setitimer(_ITIMER_PROF, &it, nil)
-	}
-	_g_ := getg()
-	_g_.m.profilehz = hz
-}
-
-func sigpipe() {
-	if sigsend(_SIGPIPE) {
-		return
-	}
-	dieFromSignal(_SIGPIPE)
-}
-
-// dieFromSignal kills the program with a signal.
-// This provides the expected exit status for the shell.
-// This is only called with fatal signals expected to kill the process.
-//go:nosplit
-//go:nowritebarrierrec
-func dieFromSignal(sig int32) {
-	setsig(sig, _SIG_DFL, false)
-	updatesigmask(sigmask{})
-	raise(sig)
-
-	// That should have killed us. On some systems, though, raise
-	// sends the signal to the whole process rather than to just
-	// the current thread, which means that the signal may not yet
-	// have been delivered. Give other threads a chance to run and
-	// pick up the signal.
-	osyield()
-	osyield()
-	osyield()
-
-	// If we are still somehow running, just exit with the wrong status.
-	exit(2)
-}
-
-// raisebadsignal is called when a signal is received on a non-Go
-// thread, and the Go program does not want to handle it (that is, the
-// program has not called os/signal.Notify for the signal).
-func raisebadsignal(sig int32, c *sigctxt) {
-	if sig == _SIGPROF {
-		// Ignore profiling signals that arrive on non-Go threads.
-		return
-	}
-
-	var handler uintptr
-	if sig >= _NSIG {
-		handler = _SIG_DFL
-	} else {
-		handler = fwdSig[sig]
-	}
-
-	// Reset the signal handler and raise the signal.
-	// We are currently running inside a signal handler, so the
-	// signal is blocked. We need to unblock it before raising the
-	// signal, or the signal we raise will be ignored until we return
-	// from the signal handler. We know that the signal was unblocked
-	// before entering the handler, or else we would not have received
-	// it. That means that we don't have to worry about blocking it
-	// again.
-	unblocksig(sig)
-	setsig(sig, handler, false)
-
-	// If we're linked into a non-Go program we want to try to
-	// avoid modifying the original context in which the signal
-	// was raised. If the handler is the default, we know it
-	// is non-recoverable, so we don't have to worry about
-	// re-installing sighandler. At this point we can just
-	// return and the signal will be re-raised and caught by
-	// the default handler with the correct context.
-	if (isarchive || islibrary) && handler == _SIG_DFL && c.sigcode() != _SI_USER {
-		return
-	}
-
-	raise(sig)
-
-	// If the signal didn't cause the program to exit, restore the
-	// Go signal handler and carry on.
-	//
-	// We may receive another instance of the signal before we
-	// restore the Go handler, but that is not so bad: we know
-	// that the Go program has been ignoring the signal.
-	setsig(sig, getSigtramp(), true)
-}
-
-func crash() {
-	dieFromSignal(_SIGABRT)
-}
-
-// ensureSigM starts one global, sleeping thread to make sure at least one thread
-// is available to catch signals enabled for os/signal.
-func ensureSigM() {
-	if maskUpdatedChan != nil {
-		return
-	}
-	maskUpdatedChan = make(chan struct{})
-	disableSigChan = make(chan uint32)
-	enableSigChan = make(chan uint32)
-	go func() {
-		// Signal masks are per-thread, so make sure this goroutine stays on one
-		// thread.
-		LockOSThread()
-		defer UnlockOSThread()
-		// The sigBlocked mask contains the signals not active for os/signal,
-		// initially all signals except the essential. When signal.Notify()/Stop is called,
-		// sigenable/sigdisable in turn notify this thread to update its signal
-		// mask accordingly.
-		var sigBlocked sigmask
-		for i := range sigBlocked {
-			sigBlocked[i] = ^uint32(0)
-		}
-		for i := range sigtable {
-			if sigtable[i].flags&_SigUnblock != 0 {
-				sigBlocked[(i-1)/32] &^= 1 << ((uint32(i) - 1) & 31)
-			}
-		}
-		updatesigmask(sigBlocked)
-		for {
-			select {
-			case sig := <-enableSigChan:
-				if b := sig - 1; sig > 0 {
-					sigBlocked[b/32] &^= (1 << (b & 31))
-				}
-			case sig := <-disableSigChan:
-				if b := sig - 1; sig > 0 {
-					sigBlocked[b/32] |= (1 << (b & 31))
-				}
-			}
-			updatesigmask(sigBlocked)
-			maskUpdatedChan <- struct{}{}
-		}
-	}()
-}
-
-// This runs on a foreign stack, without an m or a g. No stack split.
-//go:nosplit
-//go:norace
-//go:nowritebarrierrec
-func badsignal(sig uintptr, c *sigctxt) {
-	needm(0)
-	if !sigsend(uint32(sig)) {
-		// A foreign thread received the signal sig, and the
-		// Go code does not want to handle it.
-		raisebadsignal(int32(sig), c)
-	}
-	dropm()
-}
diff --git a/libgo/go/runtime/signal2_unix.go b/libgo/go/runtime/signal2_unix.go
deleted file mode 100644
index 2a39eac..0000000
--- a/libgo/go/runtime/signal2_unix.go
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build darwin dragonfly freebsd linux netbsd openbsd solaris
-
-package runtime
-
-import "unsafe"
-
-// Determines if the signal should be handled by Go and if not, forwards the
-// signal to the handler that was installed before Go's. Returns whether the
-// signal was forwarded.
-// This is called by the signal handler, and the world may be stopped.
-//go:nosplit
-//go:nowritebarrierrec
-func sigfwdgo(sig uint32, info *_siginfo_t, ctx unsafe.Pointer) bool {
-	if sig >= uint32(len(sigtable)) {
-		return false
-	}
-	fwdFn := fwdSig[sig]
-
-	if !signalsOK {
-		// The only way we can get here is if we are in a
-		// library or archive, we installed a signal handler
-		// at program startup, but the Go runtime has not yet
-		// been initialized.
-		if fwdFn == _SIG_DFL {
-			dieFromSignal(int32(sig))
-		} else {
-			sigfwd(fwdFn, sig, info, ctx)
-		}
-		return true
-	}
-
-	flags := sigtable[sig].flags
-
-	// If there is no handler to forward to, no need to forward.
-	if fwdFn == _SIG_DFL {
-		return false
-	}
-
-	// If we aren't handling the signal, forward it.
-	if flags&_SigHandling == 0 {
-		sigfwd(fwdFn, sig, info, ctx)
-		return true
-	}
-
-	// Only forward synchronous signals.
-	c := sigctxt{info, ctx}
-	if c.sigcode() == _SI_USER || flags&_SigPanic == 0 {
-		return false
-	}
-	// Determine if the signal occurred inside Go code. We test that:
-	//   (1) we were in a goroutine (i.e., m.curg != nil), and
-	//   (2) we weren't in CGO (i.e., m.curg.syscallsp == 0).
-	g := getg()
-	if g != nil && g.m != nil && g.m.curg != nil && g.m.curg.syscallsp == 0 {
-		return false
-	}
-	// Signal not handled by Go, forward it.
-	if fwdFn != _SIG_IGN {
-		sigfwd(fwdFn, sig, info, ctx)
-	}
-	return true
-}
diff --git a/libgo/go/runtime/signal_gccgo.go b/libgo/go/runtime/signal_gccgo.go
index 4e5044f..62fe458 100644
--- a/libgo/go/runtime/signal_gccgo.go
+++ b/libgo/go/runtime/signal_gccgo.go
@@ -14,7 +14,7 @@ import (
 // these are written in OS-specific files and in assembler.
 
 //extern sigaction
-func sigaction(signum int32, act *_sigaction, oact *_sigaction) int32
+func sigaction(signum uint32, act *_sigaction, oact *_sigaction) int32
 
 //extern sigprocmask
 func sigprocmask(how int32, set *sigset, oldset *sigset) int32
@@ -26,25 +26,35 @@ func sigfillset(set *sigset) int32
 func sigemptyset(set *sigset) int32
 
 //extern sigaddset
-func sigaddset(set *sigset, signum int32) int32
+func c_sigaddset(set *sigset, signum uint32) int32
 
 //extern sigdelset
-func sigdelset(set *sigset, signum int32) int32
+func c_sigdelset(set *sigset, signum uint32) int32
 
 //extern sigaltstack
 func sigaltstack(ss *_stack_t, oss *_stack_t) int32
 
 //extern raise
-func raise(sig int32) int32
+func raise(sig uint32) int32
 
 //extern getpid
 func getpid() _pid_t
 
 //extern kill
-func kill(pid _pid_t, sig int32) int32
+func kill(pid _pid_t, sig uint32) int32
+
+//extern setitimer
+func setitimer(which int32, new *_itimerval, old *_itimerval) int32
+
+type siginfo _siginfo_t
+
+type sigTabT struct {
+	flags int32
+	name  string
+}
 
 type sigctxt struct {
-	info *_siginfo_t
+	info *siginfo
 	ctxt unsafe.Pointer
 }
 
@@ -58,27 +68,10 @@ func (c *sigctxt) sigcode() uint64 {
 }
 
 //go:nosplit
-func msigsave(mp *m) {
-	sigprocmask(_SIG_SETMASK, nil, &mp.sigmask)
-}
-
-//go:nosplit
-func msigrestore(sigmask sigset) {
-	sigprocmask(_SIG_SETMASK, &sigmask, nil)
-}
-
-//go:nosplit
-func sigblock() {
-	var set sigset
-	sigfillset(&set)
-	sigprocmask(_SIG_SETMASK, &set, nil)
-}
-
-//go:nosplit
 //go:nowritebarrierrec
-func setsig(i int32, fn uintptr, restart bool) {
+func setsig(i uint32, fn uintptr) {
 	var sa _sigaction
-	sa.sa_flags = _SA_SIGINFO
+	sa.sa_flags = _SA_SIGINFO | _SA_RESTART
 
 	// For gccgo we do not set SA_ONSTACK for a signal that can
 	// cause a panic.  Instead, we trust that the split stack has
@@ -89,9 +82,6 @@ func setsig(i int32, fn uintptr, restart bool) {
 		sa.sa_flags |= _SA_ONSTACK
 	}
 
-	if restart {
-		sa.sa_flags |= _SA_RESTART
-	}
 	sigfillset((*sigset)(unsafe.Pointer(&sa.sa_mask)))
 	setSigactionHandler(&sa, fn)
 	sigaction(i, &sa, nil)
@@ -99,7 +89,7 @@ func setsig(i int32, fn uintptr, restart bool) {
 
 //go:nosplit
 //go:nowritebarrierrec
-func setsigstack(i int32) {
+func setsigstack(i uint32) {
 	var sa _sigaction
 	sigaction(i, nil, &sa)
 	handler := getSigactionHandler(&sa)
@@ -115,7 +105,7 @@ func setsigstack(i int32) {
 
 //go:nosplit
 //go:nowritebarrierrec
-func getsig(i int32) uintptr {
+func getsig(i uint32) uintptr {
 	var sa _sigaction
 	if sigaction(i, nil, &sa) < 0 {
 		// On GNU/Linux glibc rejects attempts to call
@@ -132,34 +122,24 @@ func signalstack(p unsafe.Pointer, n uintptr)
 
 //go:nosplit
 //go:nowritebarrierrec
-func updatesigmask(m sigmask) {
-	var mask sigset
-	sigemptyset(&mask)
-	for i := int32(0); i < _NSIG; i++ {
-		if m[(i-1)/32]&(1<<((uint(i)-1)&31)) != 0 {
-			sigaddset(&mask, i)
-		}
-	}
-	sigprocmask(_SIG_SETMASK, &mask, nil)
-}
-
-func unblocksig(sig int32) {
-	var mask sigset
-	sigemptyset(&mask)
-	sigaddset(&mask, sig)
-	sigprocmask(_SIG_UNBLOCK, &mask, nil)
+func raiseproc(sig uint32) {
+	kill(getpid(), sig)
 }
 
 //go:nosplit
 //go:nowritebarrierrec
-func raiseproc(sig int32) {
-	kill(getpid(), sig)
+func sigfwd(fn uintptr, sig uint32, info *siginfo, ctx unsafe.Pointer) {
+	f1 := &[1]uintptr{fn}
+	f2 := *(*func(uint32, *siginfo, unsafe.Pointer))(unsafe.Pointer(&f1))
+	f2(sig, info, ctx)
 }
 
 //go:nosplit
 //go:nowritebarrierrec
-func sigfwd(fn uintptr, sig uint32, info *_siginfo_t, ctx unsafe.Pointer) {
-	f1 := &[1]uintptr{fn}
-	f2 := *(*func(uint32, *_siginfo_t, unsafe.Pointer))(unsafe.Pointer(&f1))
-	f2(sig, info, ctx)
+func sigaddset(mask *sigset, i int) {
+	c_sigaddset(mask, uint32(i))
+}
+
+func sigdelset(mask *sigset, i int) {
+	c_sigdelset(mask, uint32(i))
 }
diff --git a/libgo/go/runtime/signal_sighandler.go b/libgo/go/runtime/signal_sighandler.go
index 766bb7d..a057df9 100644
--- a/libgo/go/runtime/signal_sighandler.go
+++ b/libgo/go/runtime/signal_sighandler.go
@@ -25,7 +25,7 @@ var crashing int32
 // are not allowed.
 //
 //go:nowritebarrierrec
-func sighandler(sig uint32, info *_siginfo_t, ctxt unsafe.Pointer, gp *g) {
+func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
 	_g_ := getg()
 	c := sigctxt{info, ctxt}
 
@@ -71,7 +71,7 @@ func sighandler(sig uint32, info *_siginfo_t, ctxt unsafe.Pointer, gp *g) {
 	}
 
 	if flags&_SigKill != 0 {
-		dieFromSignal(int32(sig))
+		dieFromSignal(sig)
 	}
 
 	if flags&_SigThrow == 0 {
@@ -91,10 +91,7 @@ func sighandler(sig uint32, info *_siginfo_t, ctxt unsafe.Pointer, gp *g) {
 		print("Signal ", sig, "\n")
 	}
 
-	if sigpc != 0 {
-		print("PC=", hex(sigpc), " ")
-	}
-	print("m=", _g_.m.id, " sigcode=", c.sigcode(), "\n")
+	print("PC=", hex(sigpc), " m=", _g_.m.id, " sigcode=", c.sigcode(), "\n")
 	if _g_.m.lockedg != nil && _g_.m.ncgo > 0 && gp == _g_.m.g0 {
 		print("signal arrived during cgo execution\n")
 		gp = _g_.m.lockedg
@@ -114,7 +111,7 @@ func sighandler(sig uint32, info *_siginfo_t, ctxt unsafe.Pointer, gp *g) {
 
 	if docrash {
 		crashing++
-		if crashing < mcount() {
+		if crashing < sched.mcount {
 			// There are other m's that need to dump their stacks.
 			// Relay SIGQUIT to the next m by sending it to the current process.
 			// All m's that have already received SIGQUIT have signal masks blocking
diff --git a/libgo/go/runtime/signal_sigtramp.go b/libgo/go/runtime/signal_sigtramp.go
deleted file mode 100644
index 667d5fe..0000000
--- a/libgo/go/runtime/signal_sigtramp.go
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build darwin dragonfly freebsd linux netbsd openbsd solaris
-
-package runtime
-
-import "unsafe"
-
-// For gccgo, use go:linkname so the C signal handler can call this one.
-//go:linkname sigtrampgo runtime.sigtrampgo
-
-// Continuation of the (assembly) sigtramp() logic.
-// This may be called with the world stopped.
-//go:nosplit
-//go:nowritebarrierrec
-func sigtrampgo(sig uint32, info *_siginfo_t, ctx unsafe.Pointer) {
-	if sigfwdgo(sig, info, ctx) {
-		return
-	}
-	g := getg()
-	if g == nil {
-		if sig == _SIGPROF {
-			// Ignore profiling signals that arrive on
-			// non-Go threads. On some systems they will
-			// be handled directly by the signal handler,
-			// by calling sigprofNonGo, in which case we won't
-			// get here anyhow.
-			return
-		}
-		c := sigctxt{info, ctx}
-		badsignal(uintptr(sig), &c)
-		return
-	}
-
-	setg(g.m.gsignal)
-	sighandler(sig, info, ctx, g)
-	setg(g)
-}
diff --git a/libgo/go/runtime/signal_unix.go b/libgo/go/runtime/signal_unix.go
index f59c9b9..4324753 100644
--- a/libgo/go/runtime/signal_unix.go
+++ b/libgo/go/runtime/signal_unix.go
@@ -6,7 +6,16 @@
 
 package runtime
 
-import _ "unsafe" // for go:linkname
+import (
+	"runtime/internal/sys"
+	"unsafe"
+)
+
+// For gccgo's C code to call:
+//go:linkname initsig runtime.initsig
+//go:linkname crash runtime.crash
+//go:linkname resetcpuprofiler runtime.resetcpuprofiler
+//go:linkname sigtrampgo runtime.sigtrampgo
 
 //go:linkname os_sigpipe os.sigpipe
 func os_sigpipe() {
@@ -19,3 +28,574 @@ func signame(sig uint32) string {
 	}
 	return sigtable[sig].name
 }
+
+const (
+	_SIG_DFL uintptr = 0
+	_SIG_IGN uintptr = 1
+)
+
+// Stores the signal handlers registered before Go installed its own.
+// These signal handlers will be invoked in cases where Go doesn't want to
+// handle a particular signal (e.g., signal occurred on a non-Go thread).
+// See sigfwdgo() for more information on when the signals are forwarded.
+//
+// Signal forwarding is currently available only on Darwin and Linux.
+var fwdSig [_NSIG]uintptr
+
+// channels for synchronizing signal mask updates with the signal mask
+// thread
+var (
+	disableSigChan  chan uint32
+	enableSigChan   chan uint32
+	maskUpdatedChan chan struct{}
+)
+
+func init() {
+	// _NSIG is the number of signals on this operating system.
+	// sigtable should describe what to do for all the possible signals.
+	if len(sigtable) != _NSIG {
+		print("runtime: len(sigtable)=", len(sigtable), " _NSIG=", _NSIG, "\n")
+		throw("bad sigtable len")
+	}
+}
+
+var signalsOK bool
+
+// Initialize signals.
+// Called by libpreinit so runtime may not be initialized.
+//go:nosplit
+//go:nowritebarrierrec
+func initsig(preinit bool) {
+	if !preinit {
+		// It's now OK for signal handlers to run.
+		signalsOK = true
+	}
+
+	// For c-archive/c-shared this is called by libpreinit with
+	// preinit == true.
+	if (isarchive || islibrary) && !preinit {
+		return
+	}
+
+	for i := uint32(0); i < _NSIG; i++ {
+		t := &sigtable[i]
+		if t.flags == 0 || t.flags&_SigDefault != 0 {
+			continue
+		}
+		fwdSig[i] = getsig(i)
+
+		if !sigInstallGoHandler(i) {
+			// Even if we are not installing a signal handler,
+			// set SA_ONSTACK if necessary.
+			if fwdSig[i] != _SIG_DFL && fwdSig[i] != _SIG_IGN {
+				setsigstack(i)
+			}
+			continue
+		}
+
+		t.flags |= _SigHandling
+		setsig(i, getSigtramp())
+	}
+}
+
+//go:nosplit
+//go:nowritebarrierrec
+func sigInstallGoHandler(sig uint32) bool {
+	// For some signals, we respect an inherited SIG_IGN handler
+	// rather than insist on installing our own default handler.
+	// Even these signals can be fetched using the os/signal package.
+	switch sig {
+	case _SIGHUP, _SIGINT:
+		if fwdSig[sig] == _SIG_IGN {
+			return false
+		}
+	}
+
+	t := &sigtable[sig]
+	if t.flags&_SigSetStack != 0 {
+		return false
+	}
+
+	// When built using c-archive or c-shared, only install signal
+	// handlers for synchronous signals.
+	if (isarchive || islibrary) && t.flags&_SigPanic == 0 {
+		return false
+	}
+
+	return true
+}
+
+func sigenable(sig uint32) {
+	if sig >= uint32(len(sigtable)) {
+		return
+	}
+
+	t := &sigtable[sig]
+	if t.flags&_SigNotify != 0 {
+		ensureSigM()
+		enableSigChan <- sig
+		<-maskUpdatedChan
+		if t.flags&_SigHandling == 0 {
+			t.flags |= _SigHandling
+			fwdSig[sig] = getsig(sig)
+			setsig(sig, getSigtramp())
+		}
+	}
+}
+
+func sigdisable(sig uint32) {
+	if sig >= uint32(len(sigtable)) {
+		return
+	}
+
+	t := &sigtable[sig]
+	if t.flags&_SigNotify != 0 {
+		ensureSigM()
+		disableSigChan <- sig
+		<-maskUpdatedChan
+
+		// If initsig does not install a signal handler for a
+		// signal, then to go back to the state before Notify
+		// we should remove the one we installed.
+		if !sigInstallGoHandler(sig) {
+			t.flags &^= _SigHandling
+			setsig(sig, fwdSig[sig])
+		}
+	}
+}
+
+func sigignore(sig uint32) {
+	if sig >= uint32(len(sigtable)) {
+		return
+	}
+
+	t := &sigtable[sig]
+	if t.flags&_SigNotify != 0 {
+		t.flags &^= _SigHandling
+		setsig(sig, _SIG_IGN)
+	}
+}
+
+func resetcpuprofiler(hz int32) {
+	var it _itimerval
+	if hz == 0 {
+		setitimer(_ITIMER_PROF, &it, nil)
+	} else {
+		it.it_interval.tv_sec = 0
+		it.it_interval.set_usec(1000000 / hz)
+		it.it_value = it.it_interval
+		setitimer(_ITIMER_PROF, &it, nil)
+	}
+	_g_ := getg()
+	_g_.m.profilehz = hz
+}
+
+func sigpipe() {
+	if sigsend(_SIGPIPE) {
+		return
+	}
+	dieFromSignal(_SIGPIPE)
+}
+
+// sigtrampgo is called from the signal handler function, sigtramp,
+// written in assembly code.
+// This is called by the signal handler, and the world may be stopped.
+//go:nosplit
+//go:nowritebarrierrec
+func sigtrampgo(sig uint32, info *siginfo, ctx unsafe.Pointer) {
+	if sigfwdgo(sig, info, ctx) {
+		return
+	}
+	g := getg()
+	if g == nil {
+		c := sigctxt{info, ctx}
+		if sig == _SIGPROF {
+			_, pc := getSiginfo(info, ctx)
+			sigprofNonGoPC(pc)
+			return
+		}
+		badsignal(uintptr(sig), &c)
+		return
+	}
+
+	setg(g.m.gsignal)
+	sighandler(sig, info, ctx, g)
+	setg(g)
+}
+
+// sigpanic turns a synchronous signal into a run-time panic.
+// If the signal handler sees a synchronous panic, it arranges the
+// stack to look like the function where the signal occurred called
+// sigpanic, sets the signal's PC value to sigpanic, and returns from
+// the signal handler. The effect is that the program will act as
+// though the function that got the signal simply called sigpanic
+// instead.
+func sigpanic() {
+	g := getg()
+	if !canpanic(g) {
+		throw("unexpected signal during runtime execution")
+	}
+
+	switch g.sig {
+	case _SIGBUS:
+		if g.sigcode0 == _BUS_ADRERR && g.sigcode1 < 0x1000 {
+			panicmem()
+		}
+		// Support runtime/debug.SetPanicOnFault.
+		if g.paniconfault {
+			panicmem()
+		}
+		print("unexpected fault address ", hex(g.sigcode1), "\n")
+		throw("fault")
+	case _SIGSEGV:
+		if (g.sigcode0 == 0 || g.sigcode0 == _SEGV_MAPERR || g.sigcode0 == _SEGV_ACCERR) && g.sigcode1 < 0x1000 {
+			panicmem()
+		}
+		// Support runtime/debug.SetPanicOnFault.
+		if g.paniconfault {
+			panicmem()
+		}
+		print("unexpected fault address ", hex(g.sigcode1), "\n")
+		throw("fault")
+	case _SIGFPE:
+		switch g.sigcode0 {
+		case _FPE_INTDIV:
+			panicdivide()
+		case _FPE_INTOVF:
+			panicoverflow()
+		}
+		panicfloat()
+	}
+
+	if g.sig >= uint32(len(sigtable)) {
+		// can't happen: we looked up g.sig in sigtable to decide to call sigpanic
+		throw("unexpected signal value")
+	}
+	panic(errorString(sigtable[g.sig].name))
+}
+
+// dieFromSignal kills the program with a signal.
+// This provides the expected exit status for the shell.
+// This is only called with fatal signals expected to kill the process.
+//go:nosplit
+//go:nowritebarrierrec
+func dieFromSignal(sig uint32) {
+	setsig(sig, _SIG_DFL)
+	unblocksig(sig)
+	raise(sig)
+
+	// That should have killed us. On some systems, though, raise
+	// sends the signal to the whole process rather than to just
+	// the current thread, which means that the signal may not yet
+	// have been delivered. Give other threads a chance to run and
+	// pick up the signal.
+	osyield()
+	osyield()
+	osyield()
+
+	// If we are still somehow running, just exit with the wrong status.
+	exit(2)
+}
+
+// raisebadsignal is called when a signal is received on a non-Go
+// thread, and the Go program does not want to handle it (that is, the
+// program has not called os/signal.Notify for the signal).
+func raisebadsignal(sig uint32, c *sigctxt) {
+	if sig == _SIGPROF {
+		// Ignore profiling signals that arrive on non-Go threads.
+		return
+	}
+
+	var handler uintptr
+	if sig >= _NSIG {
+		handler = _SIG_DFL
+	} else {
+		handler = fwdSig[sig]
+	}
+
+	// Reset the signal handler and raise the signal.
+	// We are currently running inside a signal handler, so the
+	// signal is blocked. We need to unblock it before raising the
+	// signal, or the signal we raise will be ignored until we return
+	// from the signal handler. We know that the signal was unblocked
+	// before entering the handler, or else we would not have received
+	// it. That means that we don't have to worry about blocking it
+	// again.
+	unblocksig(sig)
+	setsig(sig, handler)
+
+	// If we're linked into a non-Go program we want to try to
+	// avoid modifying the original context in which the signal
+	// was raised. If the handler is the default, we know it
+	// is non-recoverable, so we don't have to worry about
+	// re-installing sighandler. At this point we can just
+	// return and the signal will be re-raised and caught by
+	// the default handler with the correct context.
+	if (isarchive || islibrary) && handler == _SIG_DFL && c.sigcode() != _SI_USER {
+		return
+	}
+
+	raise(sig)
+
+	// Give the signal a chance to be delivered.
+	// In almost all real cases the program is about to crash,
+	// so sleeping here is not a waste of time.
+	usleep(1000)
+
+	// If the signal didn't cause the program to exit, restore the
+	// Go signal handler and carry on.
+	//
+	// We may receive another instance of the signal before we
+	// restore the Go handler, but that is not so bad: we know
+	// that the Go program has been ignoring the signal.
+	setsig(sig, getSigtramp())
+}
+
+func crash() {
+	if GOOS == "darwin" {
+		// OS X core dumps are linear dumps of the mapped memory,
+		// from the first virtual byte to the last, with zeros in the gaps.
+		// Because of the way we arrange the address space on 64-bit systems,
+		// this means the OS X core file will be >128 GB and even on a zippy
+		// workstation can take OS X well over an hour to write (uninterruptible).
+		// Save users from making that mistake.
+		if sys.PtrSize == 8 {
+			return
+		}
+	}
+
+	dieFromSignal(_SIGABRT)
+}
+
+// ensureSigM starts one global, sleeping thread to make sure at least one thread
+// is available to catch signals enabled for os/signal.
+func ensureSigM() {
+	if maskUpdatedChan != nil {
+		return
+	}
+	maskUpdatedChan = make(chan struct{})
+	disableSigChan = make(chan uint32)
+	enableSigChan = make(chan uint32)
+	go func() {
+		// Signal masks are per-thread, so make sure this goroutine stays on one
+		// thread.
+		LockOSThread()
+		defer UnlockOSThread()
+		// The sigBlocked mask contains the signals not active for os/signal,
+		// initially all signals except the essential. When signal.Notify()/Stop is called,
+		// sigenable/sigdisable in turn notify this thread to update its signal
+		// mask accordingly.
+		var sigBlocked sigset
+		sigfillset(&sigBlocked)
+		for i := range sigtable {
+			if sigtable[i].flags&_SigUnblock != 0 {
+				sigdelset(&sigBlocked, i)
+			}
+		}
+		sigprocmask(_SIG_SETMASK, &sigBlocked, nil)
+		for {
+			select {
+			case sig := <-enableSigChan:
+				if sig > 0 {
+					sigdelset(&sigBlocked, int(sig))
+				}
+			case sig := <-disableSigChan:
+				if sig > 0 {
+					sigaddset(&sigBlocked, int(sig))
+				}
+			}
+			sigprocmask(_SIG_SETMASK, &sigBlocked, nil)
+			maskUpdatedChan <- struct{}{}
+		}
+	}()
+}
+
+// This is called when we receive a signal when there is no signal stack.
+// This can only happen if non-Go code calls sigaltstack to disable the
+// signal stack.
+func noSignalStack(sig uint32) {
+	println("signal", sig, "received on thread with no signal stack")
+	throw("non-Go code disabled sigaltstack")
+}
+
+// This is called if we receive a signal when there is a signal stack
+// but we are not on it. This can only happen if non-Go code called
+// sigaction without setting the SS_ONSTACK flag.
+func sigNotOnStack(sig uint32) {
+	println("signal", sig, "received but handler not on signal stack")
+	throw("non-Go code set up signal handler without SA_ONSTACK flag")
+}
+
+// This runs on a foreign stack, without an m or a g. No stack split.
+//go:nosplit
+//go:norace
+//go:nowritebarrierrec
+func badsignal(sig uintptr, c *sigctxt) {
+	needm(0)
+	if !sigsend(uint32(sig)) {
+		// A foreign thread received the signal sig, and the
+		// Go code does not want to handle it.
+		raisebadsignal(uint32(sig), c)
+	}
+	dropm()
+}
+
+// Determines if the signal should be handled by Go and if not, forwards the
+// signal to the handler that was installed before Go's. Returns whether the
+// signal was forwarded.
+// This is called by the signal handler, and the world may be stopped.
+//go:nosplit
+//go:nowritebarrierrec
+func sigfwdgo(sig uint32, info *siginfo, ctx unsafe.Pointer) bool {
+	if sig >= uint32(len(sigtable)) {
+		return false
+	}
+	fwdFn := fwdSig[sig]
+
+	if !signalsOK {
+		// The only way we can get here is if we are in a
+		// library or archive, we installed a signal handler
+		// at program startup, but the Go runtime has not yet
+		// been initialized.
+		if fwdFn == _SIG_DFL {
+			dieFromSignal(sig)
+		} else {
+			sigfwd(fwdFn, sig, info, ctx)
+		}
+		return true
+	}
+
+	flags := sigtable[sig].flags
+
+	// If there is no handler to forward to, no need to forward.
+	if fwdFn == _SIG_DFL {
+		return false
+	}
+
+	// If we aren't handling the signal, forward it.
+	if flags&_SigHandling == 0 {
+		sigfwd(fwdFn, sig, info, ctx)
+		return true
+	}
+
+	// Only forward synchronous signals.
+	c := sigctxt{info, ctx}
+	if c.sigcode() == _SI_USER || flags&_SigPanic == 0 {
+		return false
+	}
+	// Determine if the signal occurred inside Go code. We test that:
+	//   (1) we were in a goroutine (i.e., m.curg != nil), and
+	//   (2) we weren't in CGO (i.e., m.curg.syscallsp == 0).
+	g := getg()
+	if g != nil && g.m != nil && g.m.curg != nil && g.m.curg.syscallsp == 0 {
+		return false
+	}
+	// Signal not handled by Go, forward it.
+	if fwdFn != _SIG_IGN {
+		sigfwd(fwdFn, sig, info, ctx)
+	}
+	return true
+}
+
+// msigsave saves the current thread's signal mask into mp.sigmask.
+// This is used to preserve the non-Go signal mask when a non-Go
+// thread calls a Go function.
+// This is nosplit and nowritebarrierrec because it is called by needm
+// which may be called on a non-Go thread with no g available.
+//go:nosplit
+//go:nowritebarrierrec
+func msigsave(mp *m) {
+	sigprocmask(_SIG_SETMASK, nil, &mp.sigmask)
+}
+
+// msigrestore sets the current thread's signal mask to sigmask.
+// This is used to restore the non-Go signal mask when a non-Go thread
+// calls a Go function.
+// This is nosplit and nowritebarrierrec because it is called by dropm
+// after g has been cleared.
+//go:nosplit
+//go:nowritebarrierrec
+func msigrestore(sigmask sigset) {
+	sigprocmask(_SIG_SETMASK, &sigmask, nil)
+}
+
+// sigblock blocks all signals in the current thread's signal mask.
+// This is used to block signals while setting up and tearing down g
+// when a non-Go thread calls a Go function.
+// The OS-specific code is expected to define sigset_all.
+// This is nosplit and nowritebarrierrec because it is called by needm
+// which may be called on a non-Go thread with no g available.
+//go:nosplit
+//go:nowritebarrierrec
+func sigblock() {
+	var set sigset
+	sigfillset(&set)
+	sigprocmask(_SIG_SETMASK, &set, nil)
+}
+
+// unblocksig removes sig from the current thread's signal mask.
+// This is nosplit and nowritebarrierrec because it is called from
+// dieFromSignal, which can be called by sigfwdgo while running in the
+// signal handler, on the signal stack, with no g available.
+//go:nosplit
+//go:nowritebarrierrec
+func unblocksig(sig uint32) {
+	var set sigset
+	sigemptyset(&set)
+	sigaddset(&set, int(sig))
+	sigprocmask(_SIG_UNBLOCK, &set, nil)
+}
+
+// minitSignals is called when initializing a new m to set the
+// thread's alternate signal stack and signal mask.
+func minitSignals() {
+	minitSignalStack()
+	minitSignalMask()
+}
+
+// minitSignalStack is called when initializing a new m to set the
+// alternate signal stack. If the alternate signal stack is not set
+// for the thread (the normal case) then set the alternate signal
+// stack to the gsignal stack. If the alternate signal stack is set
+// for the thread (the case when a non-Go thread sets the alternate
+// signal stack and then calls a Go function) then set the gsignal
+// stack to the alternate signal stack. Record which choice was made
+// in newSigstack, so that it can be undone in unminit.
+func minitSignalStack() {
+	_g_ := getg()
+	var st _stack_t
+	sigaltstack(nil, &st)
+	if st.ss_flags&_SS_DISABLE != 0 {
+		signalstack(_g_.m.gsignalstack, _g_.m.gsignalstacksize)
+		_g_.m.newSigstack = true
+	} else {
+		_g_.m.newSigstack = false
+	}
+}
+
+// minitSignalMask is called when initializing a new m to set the
+// thread's signal mask. When this is called all signals have been
+// blocked for the thread.  This starts with m.sigmask, which was set
+// either from initSigmask for a newly created thread or by calling
+// msigsave if this is a non-Go thread calling a Go function. It
+// removes all essential signals from the mask, thus causing those
+// signals to not be blocked. Then it sets the thread's signal mask.
+// After this is called the thread can receive signals.
+func minitSignalMask() {
+	nmask := getg().m.sigmask
+	for i := range sigtable {
+		if sigtable[i].flags&_SigUnblock != 0 {
+			sigdelset(&nmask, i)
+		}
+	}
+	sigprocmask(_SIG_SETMASK, &nmask, nil)
+}
+
+// unminitSignals is called from dropm, via unminit, to undo the
+// effect of calling minit on a non-Go thread.
+//go:nosplit
+func unminitSignals() {
+	if getg().m.newSigstack {
+		signalstack(nil, 0)
+	}
+}
diff --git a/libgo/go/runtime/sigpanic_unix.go b/libgo/go/runtime/sigpanic_unix.go
deleted file mode 100644
index 00ad090..0000000
--- a/libgo/go/runtime/sigpanic_unix.go
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build darwin dragonfly freebsd linux netbsd openbsd solaris
-
-package runtime
-
-import _ "unsafe" // For go:linkname.
-
-// For gccgo, C code has to call sigpanic, so we have to export it.
-//go:linkname sigpanic runtime.sigpanic
-
-func sigpanic() {
-	g := getg()
-	if !canpanic(g) {
-		throw("unexpected signal during runtime execution")
-	}
-
-	switch g.sig {
-	case _SIGBUS:
-		if g.sigcode0 == _BUS_ADRERR && g.sigcode1 < 0x1000 || g.paniconfault {
-			panicmem()
-		}
-		print("unexpected fault address ", hex(g.sigcode1), "\n")
-		throw("fault")
-	case _SIGSEGV:
-		if (g.sigcode0 == 0 || g.sigcode0 == _SEGV_MAPERR || g.sigcode0 == _SEGV_ACCERR) && g.sigcode1 < 0x1000 || g.paniconfault {
-			panicmem()
-		}
-		print("unexpected fault address ", hex(g.sigcode1), "\n")
-		throw("fault")
-	case _SIGFPE:
-		switch g.sigcode0 {
-		case _FPE_INTDIV:
-			panicdivide()
-		case _FPE_INTOVF:
-			panicoverflow()
-		}
-		panicfloat()
-	}
-
-	if g.sig >= uint32(len(sigtable)) {
-		// can't happen: we looked up g.sig in sigtable to decide to call sigpanic
-		throw("unexpected signal value")
-	}
-	panic(errorString(sigtable[g.sig].name))
-}
diff --git a/libgo/go/runtime/sizeclasses.go b/libgo/go/runtime/sizeclasses.go
new file mode 100644
index 0000000..e616e951
--- /dev/null
+++ b/libgo/go/runtime/sizeclasses.go
@@ -0,0 +1,95 @@
+// AUTO-GENERATED by mksizeclasses.go; DO NOT EDIT
+//go:generate go run mksizeclasses.go
+
+package runtime
+
+// class  bytes/obj  bytes/span  objects  waste bytes
+//     1          8        8192     1024            0
+//     2         16        8192      512            0
+//     3         32        8192      256            0
+//     4         48        8192      170           32
+//     5         64        8192      128            0
+//     6         80        8192      102           32
+//     7         96        8192       85           32
+//     8        112        8192       73           16
+//     9        128        8192       64            0
+//    10        144        8192       56          128
+//    11        160        8192       51           32
+//    12        176        8192       46           96
+//    13        192        8192       42          128
+//    14        208        8192       39           80
+//    15        224        8192       36          128
+//    16        240        8192       34           32
+//    17        256        8192       32            0
+//    18        288        8192       28          128
+//    19        320        8192       25          192
+//    20        352        8192       23           96
+//    21        384        8192       21          128
+//    22        416        8192       19          288
+//    23        448        8192       18          128
+//    24        480        8192       17           32
+//    25        512        8192       16            0
+//    26        576        8192       14          128
+//    27        640        8192       12          512
+//    28        704        8192       11          448
+//    29        768        8192       10          512
+//    30        896        8192        9          128
+//    31       1024        8192        8            0
+//    32       1152        8192        7          128
+//    33       1280        8192        6          512
+//    34       1408       16384       11          896
+//    35       1536        8192        5          512
+//    36       1792       16384        9          256
+//    37       2048        8192        4            0
+//    38       2304       16384        7          256
+//    39       2688        8192        3          128
+//    40       3072       24576        8            0
+//    41       3200       16384        5          384
+//    42       3456       24576        7          384
+//    43       4096        8192        2            0
+//    44       4864       24576        5          256
+//    45       5376       16384        3          256
+//    46       6144       24576        4            0
+//    47       6528       32768        5          128
+//    48       6784       40960        6          256
+//    49       6912       49152        7          768
+//    50       8192        8192        1            0
+//    51       9472       57344        6          512
+//    52       9728       49152        5          512
+//    53      10240       40960        4            0
+//    54      10880       32768        3          128
+//    55      12288       24576        2            0
+//    56      13568       40960        3          256
+//    57      14336       57344        4            0
+//    58      16384       16384        1            0
+//    59      18432       73728        4            0
+//    60      19072       57344        3          128
+//    61      20480       40960        2            0
+//    62      21760       65536        3          256
+//    63      24576       24576        1            0
+//    64      27264       81920        3          128
+//    65      28672       57344        2            0
+//    66      32768       32768        1            0
+
+const (
+	_MaxSmallSize   = 32768
+	smallSizeDiv    = 8
+	smallSizeMax    = 1024
+	largeSizeDiv    = 128
+	_NumSizeClasses = 67
+	_PageShift      = 13
+)
+
+var class_to_size = [_NumSizeClasses]uint16{0, 8, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, 256, 288, 320, 352, 384, 416, 448, 480, 512, 576, 640, 704, 768, 896, 1024, 1152, 1280, 1408, 1536, 1792, 2048, 2304, 2688, 3072, 3200, 3456, 4096, 4864, 5376, 6144, 6528, 6784, 6912, 8192, 9472, 9728, 10240, 10880, 12288, 13568, 14336, 16384, 18432, 19072, 20480, 21760, 24576, 27264, 28672, 32768}
+var class_to_allocnpages = [_NumSizeClasses]uint8{0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 3, 2, 3, 1, 3, 2, 3, 4, 5, 6, 1, 7, 6, 5, 4, 3, 5, 7, 2, 9, 7, 5, 8, 3, 10, 7, 4}
+
+type divMagic struct {
+	shift    uint8
+	shift2   uint8
+	mul      uint16
+	baseMask uint16
+}
+
+var class_to_divmagic = [_NumSizeClasses]divMagic{{0, 0, 0, 0}, {3, 0, 1, 65528}, {4, 0, 1, 65520}, {5, 0, 1, 65504}, {4, 9, 171, 0}, {6, 0, 1, 65472}, {4, 10, 205, 0}, {5, 9, 171, 0}, {4, 11, 293, 0}, {7, 0, 1, 65408}, {4, 9, 57, 0}, {5, 10, 205, 0}, {4, 12, 373, 0}, {6, 7, 43, 0}, {4, 13, 631, 0}, {5, 11, 293, 0}, {4, 13, 547, 0}, {8, 0, 1, 65280}, {5, 9, 57, 0}, {6, 9, 103, 0}, {5, 12, 373, 0}, {7, 7, 43, 0}, {5, 10, 79, 0}, {6, 10, 147, 0}, {5, 11, 137, 0}, {9, 0, 1, 65024}, {6, 9, 57, 0}, {7, 6, 13, 0}, {6, 11, 187, 0}, {8, 5, 11, 0}, {7, 8, 37, 0}, {10, 0, 1, 64512}, {7, 9, 57, 0}, {8, 6, 13, 0}, {7, 11, 187, 0}, {9, 5, 11, 0}, {8, 8, 37, 0}, {11, 0, 1, 63488}, {8, 9, 57, 0}, {7, 10, 49, 0}, {10, 5, 11, 0}, {7, 10, 41, 0}, {7, 9, 19, 0}, {12, 0, 1, 61440}, {8, 9, 27, 0}, {8, 10, 49, 0}, {11, 5, 11, 0}, {7, 13, 161, 0}, {7, 13, 155, 0}, {8, 9, 19, 0}, {13, 0, 1, 57344}, {8, 12, 111, 0}, {9, 9, 27, 0}, {11, 6, 13, 0}, {7, 14, 193, 0}, {12, 3, 3, 0}, {8, 13, 155, 0}, {11, 8, 37, 0}, {14, 0, 1, 49152}, {11, 8, 29, 0}, {7, 13, 55, 0}, {12, 5, 7, 0}, {8, 14, 193, 0}, {13, 3, 3, 0}, {7, 14, 77, 0}, {12, 7, 19, 0}, {15, 0, 1, 32768}}
+var size_to_class8 = [smallSizeMax/smallSizeDiv + 1]uint8{0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31}
+var size_to_class128 = [(_MaxSmallSize-smallSizeMax)/largeSizeDiv + 1]uint8{31, 32, 33, 34, 35, 36, 36, 37, 37, 38, 38, 39, 39, 39, 40, 40, 40, 41, 42, 42, 43, 43, 43, 43, 43, 44, 44, 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 46, 46, 47, 47, 47, 48, 48, 49, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 54, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 57, 57, 57, 57, 57, 57, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66}
diff --git a/libgo/go/runtime/slice.go b/libgo/go/runtime/slice.go
index 4548a5b..55f4454 100644
--- a/libgo/go/runtime/slice.go
+++ b/libgo/go/runtime/slice.go
@@ -12,6 +12,7 @@ import (
 // themselves, so that the compiler will export them.
 //
 //go:linkname makeslice runtime.makeslice
+//go:linkname makeslice64 runtime.makeslice64
 //go:linkname growslice runtime.growslice
 //go:linkname slicecopy runtime.slicecopy
 //go:linkname slicestringcopy runtime.slicestringcopy
@@ -44,21 +45,18 @@ func maxSliceCap(elemsize uintptr) uintptr {
 	return _MaxMem / elemsize
 }
 
-// TODO: take uintptrs instead of int64s?
-func makeslice(et *_type, len64, cap64 int64) slice {
+func makeslice(et *_type, len, cap int) slice {
 	// NOTE: The len > maxElements check here is not strictly necessary,
 	// but it produces a 'len out of range' error instead of a 'cap out of range' error
 	// when someone does make([]T, bignumber). 'cap out of range' is true too,
 	// but since the cap is only being supplied implicitly, saying len is clearer.
 	// See issue 4085.
 	maxElements := maxSliceCap(et.size)
-	len := int(len64)
-	if len64 < 0 || int64(len) != len64 || uintptr(len) > maxElements {
+	if len < 0 || uintptr(len) > maxElements {
 		panic(errorString("makeslice: len out of range"))
 	}
 
-	cap := int(cap64)
-	if cap < len || int64(cap) != cap64 || uintptr(cap) > maxElements {
+	if cap < len || uintptr(cap) > maxElements {
 		panic(errorString("makeslice: cap out of range"))
 	}
 
@@ -69,6 +67,20 @@ func makeslice(et *_type, len64, cap64 int64) slice {
 	return slice{p, len, cap}
 }
 
+func makeslice64(et *_type, len64, cap64 int64) slice {
+	len := int(len64)
+	if int64(len) != len64 {
+		panic(errorString("makeslice: len out of range"))
+	}
+
+	cap := int(cap64)
+	if int64(cap) != cap64 {
+		panic(errorString("makeslice: cap out of range"))
+	}
+
+	return makeslice(et, len, cap)
+}
+
 // growslice handles slice growth during append.
 // It is passed the slice element type, the old slice, and the desired new minimum capacity,
 // and it returns a new slice with at least that capacity, with the old data
@@ -106,19 +118,22 @@ func growslice(et *_type, old slice, cap int) slice {
 		}
 	}
 
-	var lenmem, capmem uintptr
+	var lenmem, newlenmem, capmem uintptr
 	const ptrSize = unsafe.Sizeof((*byte)(nil))
 	switch et.size {
 	case 1:
 		lenmem = uintptr(old.len)
+		newlenmem = uintptr(cap)
 		capmem = roundupsize(uintptr(newcap))
 		newcap = int(capmem)
 	case ptrSize:
 		lenmem = uintptr(old.len) * ptrSize
+		newlenmem = uintptr(cap) * ptrSize
 		capmem = roundupsize(uintptr(newcap) * ptrSize)
 		newcap = int(capmem / ptrSize)
 	default:
 		lenmem = uintptr(old.len) * et.size
+		newlenmem = uintptr(cap) * et.size
 		capmem = roundupsize(uintptr(newcap) * et.size)
 		newcap = int(capmem / et.size)
 	}
@@ -136,7 +151,10 @@ func growslice(et *_type, old slice, cap int) slice {
 		// the newarray function will zero the memory.
 		// Calling memclr is also wrong since we allocated
 		// newcap*et.size bytes, which is not the same as capmem.
-		// memclr(add(p, lenmem), capmem-lenmem)
+		// The append() that calls growslice is going to overwrite from old.len to cap (which will be the new length).
+		// Only clear the part that will not be overwritten.
+		// memclrNoHeapPointers(add(p, newlenmem), capmem-newlenmem)
+		_ = newlenmem
 	} else {
 		// Note: can't use rawmem (which avoids zeroing of memory), because then GC can scan uninitialized memory.
 		// gccgo's current GC requires newarray, not mallocgc.
diff --git a/libgo/go/runtime/stack.go b/libgo/go/runtime/stack.go
index 708a4c2..fd99e4d 100644
--- a/libgo/go/runtime/stack.go
+++ b/libgo/go/runtime/stack.go
@@ -92,7 +92,7 @@ const (
 
 	// The stack guard is a pointer this many bytes above the
 	// bottom of the stack.
-	_StackGuard = 720*sys.StackGuardMultiplier + _StackSystem
+	_StackGuard = 880*sys.StackGuardMultiplier + _StackSystem
 
 	// After a stack split check the SP is allowed to be this
 	// many bytes below the stack guard. This saves an instruction
@@ -125,6 +125,9 @@ const (
 	stackPoisonCopy  = 0 // fill stack that should not be accessed with garbage, to detect bad dereferences during copy
 
 	stackCache = 1
+
+	// check the BP links during traceback.
+	debugCheckBP = false
 )
 
 const (
@@ -337,6 +340,7 @@ func stackalloc(n uint32) (stack, []stkbar) {
 	// Compute the size of stack barrier array.
 	maxstkbar := gcMaxStackBarriers(int(n))
 	nstkbar := unsafe.Sizeof(stkbar{}) * uintptr(maxstkbar)
+	var stkbarSlice slice
 
 	if debug.efence != 0 || stackFromSystem != 0 {
 		v := sysAlloc(round(uintptr(n), _PageSize), &memstats.stacks_sys)
@@ -344,7 +348,9 @@ func stackalloc(n uint32) (stack, []stkbar) {
 			throw("out of memory (stackalloc)")
 		}
 		top := uintptr(n) - nstkbar
-		stkbarSlice := slice{add(v, top), 0, maxstkbar}
+		if maxstkbar != 0 {
+			stkbarSlice = slice{add(v, top), 0, maxstkbar}
+		}
 		return stack{uintptr(v), uintptr(v) + top}, *(*[]stkbar)(unsafe.Pointer(&stkbarSlice))
 	}
 
@@ -412,7 +418,9 @@ func stackalloc(n uint32) (stack, []stkbar) {
 		print("  allocated ", v, "\n")
 	}
 	top := uintptr(n) - nstkbar
-	stkbarSlice := slice{add(v, top), 0, maxstkbar}
+	if maxstkbar != 0 {
+		stkbarSlice = slice{add(v, top), 0, maxstkbar}
+	}
 	return stack{uintptr(v), uintptr(v) + top}, *(*[]stkbar)(unsafe.Pointer(&stkbarSlice))
 }
 
@@ -433,7 +441,7 @@ func stackfree(stk stack, n uintptr) {
 	}
 	if stackDebug >= 1 {
 		println("stackfree", v, n)
-		memclr(v, n) // for testing, clobber stack data
+		memclrNoHeapPointers(v, n) // for testing, clobber stack data
 	}
 	if debug.efence != 0 || stackFromSystem != 0 {
 		if debug.efence != 0 || stackFaultOnFree != 0 {
@@ -595,16 +603,16 @@ func adjustpointers(scanp unsafe.Pointer, cbv *bitvector, adjinfo *adjustinfo, f
 			pp := (*uintptr)(add(scanp, i*sys.PtrSize))
 		retry:
 			p := *pp
-			if f != nil && 0 < p && p < _PageSize && debug.invalidptr != 0 {
+			if f != nil && 0 < p && p < minLegalPointer && debug.invalidptr != 0 {
 				// Looks like a junk value in a pointer slot.
 				// Live analysis wrong?
 				getg().m.traceback = 2
 				print("runtime: bad pointer in frame ", funcname(f), " at ", pp, ": ", hex(p), "\n")
-				throw("invalid stack pointer")
+				throw("invalid pointer found on stack")
 			}
 			if minp <= p && p < maxp {
 				if stackDebug >= 3 {
-					print("adjust ptr ", p, " ", funcname(f), "\n")
+					print("adjust ptr ", hex(p), " ", funcname(f), "\n")
 				}
 				if useCAS {
 					ppu := (*unsafe.Pointer)(unsafe.Pointer(pp))
@@ -685,6 +693,16 @@ func adjustframe(frame *stkframe, arg unsafe.Pointer) bool {
 		if stackDebug >= 3 {
 			print("      saved bp\n")
 		}
+		if debugCheckBP {
+			// Frame pointers should always point to the next higher frame on
+			// the Go stack (or be nil, for the top frame on the stack).
+			bp := *(*uintptr)(unsafe.Pointer(frame.varp))
+			if bp != 0 && (bp < adjinfo.old.lo || bp >= adjinfo.old.hi) {
+				println("runtime: found invalid frame pointer")
+				print("bp=", hex(bp), " min=", hex(adjinfo.old.lo), " max=", hex(adjinfo.old.hi), "\n")
+				throw("bad frame pointer")
+			}
+		}
 		adjustpointer(adjinfo, unsafe.Pointer(frame.varp))
 	}
 
@@ -716,6 +734,18 @@ func adjustframe(frame *stkframe, arg unsafe.Pointer) bool {
 
 func adjustctxt(gp *g, adjinfo *adjustinfo) {
 	adjustpointer(adjinfo, unsafe.Pointer(&gp.sched.ctxt))
+	if !framepointer_enabled {
+		return
+	}
+	if debugCheckBP {
+		bp := gp.sched.bp
+		if bp != 0 && (bp < adjinfo.old.lo || bp >= adjinfo.old.hi) {
+			println("runtime: found invalid top frame pointer")
+			print("bp=", hex(bp), " min=", hex(adjinfo.old.lo), " max=", hex(adjinfo.old.hi), "\n")
+			throw("bad top frame pointer")
+		}
+	}
+	adjustpointer(adjinfo, unsafe.Pointer(&gp.sched.bp))
 }
 
 func adjustdefers(gp *g, adjinfo *adjustinfo) {
@@ -927,7 +957,10 @@ func round2(x int32) int32 {
 //
 // g->atomicstatus will be Grunning or Gscanrunning upon entry.
 // If the GC is trying to stop this g then it will set preemptscan to true.
-func newstack() {
+//
+// ctxt is the value of the context register on morestack. newstack
+// will write it to g.sched.ctxt.
+func newstack(ctxt unsafe.Pointer) {
 	thisg := getg()
 	// TODO: double check all gp. shouldn't be getg().
 	if thisg.m.morebuf.g.ptr().stackguard0 == stackFork {
@@ -939,8 +972,13 @@ func newstack() {
 		traceback(morebuf.pc, morebuf.sp, morebuf.lr, morebuf.g.ptr())
 		throw("runtime: wrong goroutine in newstack")
 	}
+
+	gp := thisg.m.curg
+	// Write ctxt to gp.sched. We do this here instead of in
+	// morestack so it has the necessary write barrier.
+	gp.sched.ctxt = ctxt
+
 	if thisg.m.curg.throwsplit {
-		gp := thisg.m.curg
 		// Update syscallsp, syscallpc in case traceback uses them.
 		morebuf := thisg.m.morebuf
 		gp.syscallsp = morebuf.sp
@@ -953,13 +991,11 @@ func newstack() {
 		throw("runtime: stack split at bad time")
 	}
 
-	gp := thisg.m.curg
 	morebuf := thisg.m.morebuf
 	thisg.m.morebuf.pc = 0
 	thisg.m.morebuf.lr = 0
 	thisg.m.morebuf.sp = 0
 	thisg.m.morebuf.g = 0
-	rewindmorestack(&gp.sched)
 
 	// NOTE: stackguard0 may change underfoot, if another thread
 	// is about to try to preempt gp. Read it just once and use that same
@@ -1006,14 +1042,6 @@ func newstack() {
 		throw("runtime: split stack overflow")
 	}
 
-	if gp.sched.ctxt != nil {
-		// morestack wrote sched.ctxt on its way in here,
-		// without a write barrier. Run the write barrier now.
-		// It is not possible to be preempted between then
-		// and now, so it's okay.
-		writebarrierptr_nostore((*uintptr)(unsafe.Pointer(&gp.sched.ctxt)), uintptr(gp.sched.ctxt))
-	}
-
 	if preempt {
 		if gp == thisg.m.g0 {
 			throw("runtime: preempt g0")
@@ -1121,6 +1149,11 @@ func shrinkstack(gp *g) {
 	if debug.gcshrinkstackoff > 0 {
 		return
 	}
+	if gp.startpc == gcBgMarkWorkerPC {
+		// We're not allowed to shrink the gcBgMarkWorker
+		// stack (see gcBgMarkWorker for explanation).
+		return
+	}
 
 	oldsize := gp.stackAlloc
 	newsize := oldsize / 2
diff --git a/libgo/go/runtime/string.go b/libgo/go/runtime/string.go
index bf5791e..9cc2873 100644
--- a/libgo/go/runtime/string.go
+++ b/libgo/go/runtime/string.go
@@ -4,9 +4,7 @@
 
 package runtime
 
-import (
-	"unsafe"
-)
+import "unsafe"
 
 // For gccgo, use go:linkname to rename compiler-called functions to
 // themselves, so that the compiler will export them.
@@ -19,12 +17,9 @@ import (
 //go:linkname slicebytetostring runtime.slicebytetostring
 //go:linkname slicebytetostringtmp runtime.slicebytetostringtmp
 //go:linkname stringtoslicebyte runtime.stringtoslicebyte
-//go:linkname stringtoslicebytetmp runtime.stringtoslicebytetmp
 //go:linkname stringtoslicerune runtime.stringtoslicerune
 //go:linkname slicerunetostring runtime.slicerunetostring
 //go:linkname intstring runtime.intstring
-//go:linkname stringiter runtime.stringiter
-//go:linkname stringiter2 runtime.stringiter2
 // Temporary for C code to call:
 //go:linkname gostringnocopy runtime.gostringnocopy
 //go:linkname findnull runtime.findnull
@@ -68,10 +63,9 @@ func concatstrings(buf *tmpBuf, a []string) string {
 	// 	return a[idx]
 	// }
 	s, b := rawstringtmp(buf, l)
-	l = 0
 	for _, x := range a {
-		copy(b[l:], x)
-		l += len(x)
+		copy(b, x)
+		b = b[len(x):]
 	}
 	return s
 }
@@ -126,17 +120,20 @@ func rawstringtmp(buf *tmpBuf, l int) (s string, b []byte) {
 	return
 }
 
+// slicebytetostringtmp returns a "string" referring to the actual []byte bytes.
+//
+// Callers need to ensure that the returned string will not be used after
+// the calling goroutine modifies the original slice or synchronizes with
+// another goroutine.
+//
+// The function is only called when instrumenting
+// and otherwise intrinsified by the compiler.
+//
+// Some internal compiler optimizations use this function.
+// - Used for m[string(k)] lookup where m is a string-keyed map and k is a []byte.
+// - Used for "<"+string(b)+">" concatenation where b is []byte.
+// - Used for string(b)=="foo" comparison where b is []byte.
 func slicebytetostringtmp(b []byte) string {
-	// Return a "string" referring to the actual []byte bytes.
-	// This is only for use by internal compiler optimizations
-	// that know that the string form will be discarded before
-	// the calling goroutine could possibly modify the original
-	// slice or synchronize with another goroutine.
-	// First such case is a m[string(k)] lookup where
-	// m is a string-keyed map and k is a []byte.
-	// Second such case is "<"+string(b)+">" concatenation where b is []byte.
-	// Third such case is string(b)=="foo" comparison where b is []byte.
-
 	if raceenabled && len(b) > 0 {
 		racereadrangepc(unsafe.Pointer(&b[0]),
 			uintptr(len(b)),
@@ -161,28 +158,14 @@ func stringtoslicebyte(buf *tmpBuf, s string) []byte {
 	return b
 }
 
-func stringtoslicebytetmp(s string) []byte {
-	// Return a slice referring to the actual string bytes.
-	// This is only for use by internal compiler optimizations
-	// that know that the slice won't be mutated.
-	// The only such case today is:
-	// for i, c := range []byte(str)
-
-	str := stringStructOf(&s)
-	ret := slice{array: str.str, len: str.len, cap: str.len}
-	return *(*[]byte)(unsafe.Pointer(&ret))
-}
-
 func stringtoslicerune(buf *[tmpStringBufSize]rune, s string) []rune {
 	// two passes.
 	// unlike slicerunetostring, no race because strings are immutable.
 	n := 0
-	t := s
-	for len(s) > 0 {
-		_, k := charntorune(s)
-		s = s[k:]
+	for range s {
 		n++
 	}
+
 	var a []rune
 	if buf != nil && n <= len(buf) {
 		*buf = [tmpStringBufSize]rune{}
@@ -190,10 +173,9 @@ func stringtoslicerune(buf *[tmpStringBufSize]rune, s string) []rune {
 	} else {
 		a = rawruneslice(n)
 	}
+
 	n = 0
-	for len(t) > 0 {
-		r, k := charntorune(t)
-		t = t[k:]
+	for _, r := range s {
 		a[n] = r
 		n++
 	}
@@ -213,7 +195,7 @@ func slicerunetostring(buf *tmpBuf, a []rune) string {
 	var dum [4]byte
 	size1 := 0
 	for _, r := range a {
-		size1 += runetochar(dum[:], r)
+		size1 += encoderune(dum[:], r)
 	}
 	s, b := rawstringtmp(buf, size1+3)
 	size2 := 0
@@ -222,7 +204,7 @@ func slicerunetostring(buf *tmpBuf, a []rune) string {
 		if size2 >= size1 {
 			break
 		}
-		size2 += runetochar(b[size2:], r)
+		size2 += encoderune(b[size2:], r)
 	}
 	return s[:size2]
 }
@@ -252,48 +234,12 @@ func intstring(buf *[4]byte, v int64) string {
 		s, b = rawstring(4)
 	}
 	if int64(rune(v)) != v {
-		v = runeerror
+		v = runeError
 	}
-	n := runetochar(b, rune(v))
+	n := encoderune(b, rune(v))
 	return s[:n]
 }
 
-// stringiter returns the index of the next
-// rune after the rune that starts at s[k].
-func stringiter(s string, k int) int {
-	if k >= len(s) {
-		// 0 is end of iteration
-		return 0
-	}
-
-	c := s[k]
-	if c < runeself {
-		return k + 1
-	}
-
-	// multi-char rune
-	_, n := charntorune(s[k:])
-	return k + n
-}
-
-// stringiter2 returns the rune that starts at s[k]
-// and the index where the next rune starts.
-func stringiter2(s string, k int) (int, rune) {
-	if k >= len(s) {
-		// 0 is end of iteration
-		return 0, 0
-	}
-
-	c := s[k]
-	if c < runeself {
-		return k + 1, rune(c)
-	}
-
-	// multi-char rune
-	r, n := charntorune(s[k:])
-	return k + n, r
-}
-
 // rawstring allocates storage for a new string. The returned
 // string and byte slice both refer to the same storage.
 // The storage is not zeroed. Callers should use
@@ -305,6 +251,7 @@ func rawstring(size int) (s string, b []byte) {
 	stringStructOf(&s).len = size
 
 	*(*slice)(unsafe.Pointer(&b)) = slice{p, size, size}
+
 	return
 }
 
@@ -313,7 +260,7 @@ func rawbyteslice(size int) (b []byte) {
 	cap := roundupsize(uintptr(size))
 	p := mallocgc(cap, nil, false)
 	if cap != uintptr(size) {
-		memclr(add(p, uintptr(size)), cap-uintptr(size))
+		memclrNoHeapPointers(add(p, uintptr(size)), cap-uintptr(size))
 	}
 
 	*(*slice)(unsafe.Pointer(&b)) = slice{p, size, int(cap)}
@@ -328,7 +275,7 @@ func rawruneslice(size int) (b []rune) {
 	mem := roundupsize(uintptr(size) * 4)
 	p := mallocgc(mem, nil, false)
 	if mem != uintptr(size)*4 {
-		memclr(add(p, uintptr(size)*4), mem-uintptr(size)*4)
+		memclrNoHeapPointers(add(p, uintptr(size)*4), mem-uintptr(size)*4)
 	}
 
 	*(*slice)(unsafe.Pointer(&b)) = slice{p, size, int(mem / 4)}
@@ -384,13 +331,66 @@ func hasprefix(s, t string) bool {
 	return len(s) >= len(t) && s[:len(t)] == t
 }
 
-func atoi(s string) int {
-	n := 0
-	for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
-		n = n*10 + int(s[0]) - '0'
+const (
+	maxUint = ^uint(0)
+	maxInt  = int(maxUint >> 1)
+)
+
+// atoi parses an int from a string s.
+// The bool result reports whether s is a number
+// representable by a value of type int.
+func atoi(s string) (int, bool) {
+	if s == "" {
+		return 0, false
+	}
+
+	neg := false
+	if s[0] == '-' {
+		neg = true
 		s = s[1:]
 	}
-	return n
+
+	un := uint(0)
+	for i := 0; i < len(s); i++ {
+		c := s[i]
+		if c < '0' || c > '9' {
+			return 0, false
+		}
+		if un > maxUint/10 {
+			// overflow
+			return 0, false
+		}
+		un *= 10
+		un1 := un + uint(c) - '0'
+		if un1 < un {
+			// overflow
+			return 0, false
+		}
+		un = un1
+	}
+
+	if !neg && un > uint(maxInt) {
+		return 0, false
+	}
+	if neg && un > uint(maxInt)+1 {
+		return 0, false
+	}
+
+	n := int(un)
+	if neg {
+		n = -n
+	}
+
+	return n, true
+}
+
+// atoi32 is like atoi but for integers
+// that fit into an int32.
+func atoi32(s string) (int32, bool) {
+	if n, ok := atoi(s); n == int(int32(n)) {
+		return int32(n), ok
+	}
+	return 0, false
 }
 
 //go:nosplit
@@ -430,7 +430,7 @@ func gostringw(strw *uint16) string {
 	str := (*[_MaxMem/2/2 - 1]uint16)(unsafe.Pointer(strw))
 	n1 := 0
 	for i := 0; str[i] != 0; i++ {
-		n1 += runetochar(buf[:], rune(str[i]))
+		n1 += encoderune(buf[:], rune(str[i]))
 	}
 	s, b := rawstring(n1 + 4)
 	n2 := 0
@@ -439,7 +439,7 @@ func gostringw(strw *uint16) string {
 		if n2 >= n1 {
 			break
 		}
-		n2 += runetochar(b[n2:], rune(str[i]))
+		n2 += encoderune(b[n2:], rune(str[i]))
 	}
 	b[n2] = 0 // for luck
 	return s[:n2]
diff --git a/libgo/go/runtime/string_test.go b/libgo/go/runtime/string_test.go
index 11fa454..ee30699 100644
--- a/libgo/go/runtime/string_test.go
+++ b/libgo/go/runtime/string_test.go
@@ -5,6 +5,7 @@
 package runtime_test
 
 import (
+	"runtime"
 	"strings"
 	"testing"
 )
@@ -81,28 +82,50 @@ func BenchmarkCompareStringBig(b *testing.B) {
 	b.SetBytes(int64(len(s1)))
 }
 
-func BenchmarkRuneIterate(b *testing.B) {
-	bytes := make([]byte, 100)
-	for i := range bytes {
-		bytes[i] = byte('A')
-	}
-	s := string(bytes)
+func BenchmarkConcatStringAndBytes(b *testing.B) {
+	s1 := []byte("Gophers!")
 	for i := 0; i < b.N; i++ {
-		for range s {
-		}
+		_ = "Hello " + string(s1)
 	}
 }
 
-func BenchmarkRuneIterate2(b *testing.B) {
-	bytes := make([]byte, 100)
-	for i := range bytes {
-		bytes[i] = byte('A')
-	}
-	s := string(bytes)
-	for i := 0; i < b.N; i++ {
-		for range s {
+var stringdata = []struct{ name, data string }{
+	{"ASCII", "01234567890"},
+	{"Japanese", "日本語日本語日本語"},
+	{"MixedLength", "$Ѐࠀက퀀𐀀\U00040000\U0010FFFF"},
+}
+
+func BenchmarkRuneIterate(b *testing.B) {
+	b.Run("range", func(b *testing.B) {
+		for _, sd := range stringdata {
+			b.Run(sd.name, func(b *testing.B) {
+				for i := 0; i < b.N; i++ {
+					for range sd.data {
+					}
+				}
+			})
 		}
-	}
+	})
+	b.Run("range1", func(b *testing.B) {
+		for _, sd := range stringdata {
+			b.Run(sd.name, func(b *testing.B) {
+				for i := 0; i < b.N; i++ {
+					for _ = range sd.data {
+					}
+				}
+			})
+		}
+	})
+	b.Run("range2", func(b *testing.B) {
+		for _, sd := range stringdata {
+			b.Run(sd.name, func(b *testing.B) {
+				for i := 0; i < b.N; i++ {
+					for _, _ = range sd.data {
+					}
+				}
+			})
+		}
+	})
 }
 
 func BenchmarkArrayEqual(b *testing.B) {
@@ -149,21 +172,6 @@ func TestLargeStringConcat(t *testing.T) {
 	}
 }
 
-/*
-func TestGostringnocopy(t *testing.T) {
-	max := *runtime.Maxstring
-	b := make([]byte, max+10)
-	for i := uintptr(0); i < max+9; i++ {
-		b[i] = 'a'
-	}
-	_ = runtime.Gostringnocopy(&b[0])
-	newmax := *runtime.Maxstring
-	if newmax != max+9 {
-		t.Errorf("want %d, got %d", max+9, newmax)
-	}
-}
-*/
-
 func TestCompareTempString(t *testing.T) {
 	s := strings.Repeat("x", sizeNoStack)
 	b := []byte(s)
@@ -277,3 +285,97 @@ func TestString2Slice(t *testing.T) {
 		t.Errorf("extra runes not zeroed")
 	}
 }
+
+const intSize = 32 << (^uint(0) >> 63)
+
+type atoi64Test struct {
+	in  string
+	out int64
+	ok  bool
+}
+
+var atoi64tests = []atoi64Test{
+	{"", 0, false},
+	{"0", 0, true},
+	{"-0", 0, true},
+	{"1", 1, true},
+	{"-1", -1, true},
+	{"12345", 12345, true},
+	{"-12345", -12345, true},
+	{"012345", 12345, true},
+	{"-012345", -12345, true},
+	{"12345x", 0, false},
+	{"-12345x", 0, false},
+	{"98765432100", 98765432100, true},
+	{"-98765432100", -98765432100, true},
+	{"20496382327982653440", 0, false},
+	{"-20496382327982653440", 0, false},
+	{"9223372036854775807", 1<<63 - 1, true},
+	{"-9223372036854775807", -(1<<63 - 1), true},
+	{"9223372036854775808", 0, false},
+	{"-9223372036854775808", -1 << 63, true},
+	{"9223372036854775809", 0, false},
+	{"-9223372036854775809", 0, false},
+}
+
+func TestAtoi(t *testing.T) {
+	switch intSize {
+	case 32:
+		for i := range atoi32tests {
+			test := &atoi32tests[i]
+			out, ok := runtime.Atoi(test.in)
+			if test.out != int32(out) || test.ok != ok {
+				t.Errorf("atoi(%q) = (%v, %v) want (%v, %v)",
+					test.in, out, ok, test.out, test.ok)
+			}
+		}
+	case 64:
+		for i := range atoi64tests {
+			test := &atoi64tests[i]
+			out, ok := runtime.Atoi(test.in)
+			if test.out != int64(out) || test.ok != ok {
+				t.Errorf("atoi(%q) = (%v, %v) want (%v, %v)",
+					test.in, out, ok, test.out, test.ok)
+			}
+		}
+	}
+}
+
+type atoi32Test struct {
+	in  string
+	out int32
+	ok  bool
+}
+
+var atoi32tests = []atoi32Test{
+	{"", 0, false},
+	{"0", 0, true},
+	{"-0", 0, true},
+	{"1", 1, true},
+	{"-1", -1, true},
+	{"12345", 12345, true},
+	{"-12345", -12345, true},
+	{"012345", 12345, true},
+	{"-012345", -12345, true},
+	{"12345x", 0, false},
+	{"-12345x", 0, false},
+	{"987654321", 987654321, true},
+	{"-987654321", -987654321, true},
+	{"2147483647", 1<<31 - 1, true},
+	{"-2147483647", -(1<<31 - 1), true},
+	{"2147483648", 0, false},
+	{"-2147483648", -1 << 31, true},
+	{"2147483649", 0, false},
+	{"-2147483649", 0, false},
+}
+
+func TestAtoi32(t *testing.T) {
+	for i := range atoi32tests {
+		test := &atoi32tests[i]
+		out, ok := runtime.Atoi32(test.in)
+		if test.out != out || test.ok != ok {
+			t.Errorf("atoi32(%q) = (%v, %v) want (%v, %v)",
+				test.in, out, ok, test.out, test.ok)
+		}
+	}
+}
diff --git a/libgo/go/runtime/stubs.go b/libgo/go/runtime/stubs.go
index a5f0470..c51ccc6 100644
--- a/libgo/go/runtime/stubs.go
+++ b/libgo/go/runtime/stubs.go
@@ -62,14 +62,24 @@ func badsystemstack() {
 	throw("systemstack called from unexpected goroutine")
 }
 
-// memclr clears n bytes starting at ptr.
+// memclrNoHeapPointers clears n bytes starting at ptr.
+//
+// Usually you should use typedmemclr. memclrNoHeapPointers should be
+// used only when the caller knows that *ptr contains no heap pointers
+// because either:
+//
+// 1. *ptr is initialized memory and its type is pointer-free.
+//
+// 2. *ptr is uninitialized memory (e.g., memory that's being reused
+//    for a new allocation) and hence contains only "junk".
+//
 // in memclr_*.s
 //go:noescape
-func memclr(ptr unsafe.Pointer, n uintptr)
+func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr)
 
-//go:linkname reflect_memclr reflect.memclr
-func reflect_memclr(ptr unsafe.Pointer, n uintptr) {
-	memclr(ptr, n)
+//go:linkname reflect_memclrNoHeapPointers reflect.memclrNoHeapPointers
+func reflect_memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr) {
+	memclrNoHeapPointers(ptr, n)
 }
 
 // memmove copies n bytes from "from" to "to".
@@ -89,7 +99,10 @@ func memcmp(a, b unsafe.Pointer, size uintptr) int32
 var hashLoad = loadFactor
 
 // in asm_*.s
-func fastrand1() uint32
+func fastrand() uint32
+
+//go:linkname sync_fastrand sync.fastrand
+func sync_fastrand() uint32 { return fastrand() }
 
 // in asm_*.s
 //go:noescape
@@ -98,7 +111,7 @@ func memequal(a, b unsafe.Pointer, size uintptr) bool
 // noescape hides a pointer from escape analysis.  noescape is
 // the identity function but escape analysis doesn't think the
 // output depends on the input.  noescape is inlined and currently
-// compiles down to a single xor instruction.
+// compiles down to zero instructions.
 // USE CAREFULLY!
 //go:nosplit
 func noescape(p unsafe.Pointer) unsafe.Pointer {
@@ -106,6 +119,7 @@ func noescape(p unsafe.Pointer) unsafe.Pointer {
 	return unsafe.Pointer(x ^ 0)
 }
 
+//extern mincore
 func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32
 
 //go:noescape
@@ -219,6 +233,18 @@ func checkASM() bool {
 	return true
 }
 
+func eqstring(x, y string) bool {
+	a := stringStructOf(&x)
+	b := stringStructOf(&y)
+	if a.len != b.len {
+		return false
+	}
+	if a.str == b.str {
+		return true
+	}
+	return memequal(a.str, b.str, uintptr(a.len))
+}
+
 // For gccgo this is in the C code.
 func osyield()
 
@@ -266,12 +292,31 @@ func setSupportAES(v bool) {
 
 // typedmemmove copies a typed value.
 // For gccgo for now.
+//go:linkname typedmemmove runtime.typedmemmove
 //go:nosplit
 func typedmemmove(typ *_type, dst, src unsafe.Pointer) {
 	memmove(dst, src, typ.size)
 }
 
 // Temporary for gccgo until we port mbarrier.go.
+//go:linkname reflect_typedmemmove reflect.typedmemmove
+func reflect_typedmemmove(typ *_type, dst, src unsafe.Pointer) {
+	typedmemmove(typ, dst, src)
+}
+
+// Temporary for gccgo until we port mbarrier.go.
+//go:nosplit
+func typedmemclr(typ *_type, ptr unsafe.Pointer) {
+	memclrNoHeapPointers(ptr, typ.size)
+}
+
+// Temporary for gccgo until we port mbarrier.go.
+//go:nosplit
+func memclrHasPointers(ptr unsafe.Pointer, n uintptr) {
+	memclrNoHeapPointers(ptr, n)
+}
+
+// Temporary for gccgo until we port mbarrier.go.
 //go:linkname typedslicecopy runtime.typedslicecopy
 func typedslicecopy(typ *_type, dst, src slice) int {
 	n := dst.len
@@ -285,6 +330,12 @@ func typedslicecopy(typ *_type, dst, src slice) int {
 	return n
 }
 
+// Temporary for gccgo until we port mbarrier.go.
+//go:linkname reflect_typedslicecopy reflect.typedslicecopy
+func reflect_typedslicecopy(elemType *_type, dst, src slice) int {
+	return typedslicecopy(elemType, dst, src)
+}
+
 // Here for gccgo until we port malloc.go.
 const (
 	_64bit              = 1 << (^uintptr(0) >> 63) / 2
@@ -339,7 +390,7 @@ func gopark(func(*g, unsafe.Pointer) bool, unsafe.Pointer, string, byte, int)
 func goparkunlock(*mutex, string, byte, int)
 
 // Temporary hack for gccgo until we port the garbage collector.
-func typeBitsBulkBarrier(typ *_type, p, size uintptr) {}
+func typeBitsBulkBarrier(typ *_type, dst, src, size uintptr) {}
 
 // Here for gccgo until we port msize.go.
 func roundupsize(uintptr) uintptr
@@ -350,7 +401,7 @@ func GC()
 // For gccgo to call from C code.
 //go:linkname acquireWorldsema runtime.acquireWorldsema
 func acquireWorldsema() {
-	semacquire(&worldsema, false)
+	semacquire(&worldsema, 0)
 }
 
 // For gccgo to call from C code.
@@ -434,10 +485,10 @@ func setSigactionHandler(*_sigaction, uintptr)
 // Retrieve fields from the siginfo_t and ucontext_t pointers passed
 // to a signal handler using C, as they are often hidden in a union.
 // Returns  and, if available, PC where signal occurred.
-func getSiginfo(*_siginfo_t, unsafe.Pointer) (sigaddr uintptr, sigpc uintptr)
+func getSiginfo(*siginfo, unsafe.Pointer) (sigaddr uintptr, sigpc uintptr)
 
 // Implemented in C for gccgo.
-func dumpregs(*_siginfo_t, unsafe.Pointer)
+func dumpregs(*siginfo, unsafe.Pointer)
 
 // Temporary for gccgo until we port proc.go.
 //go:linkname getsched runtime.getsched
@@ -565,3 +616,27 @@ func (h *mheap) scavenge(k int32, now, limit uint64) {
 func setncpu(n int32) {
 	ncpu = n
 }
+
+// Temporary for gccgo until we port malloc.go.
+var physPageSize uintptr
+
+// Temporary for gccgo until we reliably initialize physPageSize in Go.
+//go:linkname setpagesize runtime.setpagesize
+func setpagesize(s uintptr) {
+	if physPageSize == 0 {
+		physPageSize = s
+	}
+}
+
+// Temporary for gccgo until we port more of proc.go.
+func sigprofNonGoPC(pc uintptr) {
+}
+
+// Temporary for gccgo until we port mgc.go.
+// gcMarkWorkerModeStrings are the strings labels of gcMarkWorkerModes
+// to use in execution traces.
+var gcMarkWorkerModeStrings = [...]string{
+	"GC (dedicated)",
+	"GC (fractional)",
+	"GC (idle)",
+}
diff --git a/libgo/go/runtime/stubs2.go b/libgo/go/runtime/stubs2.go
index d960226..e891fe5 100644
--- a/libgo/go/runtime/stubs2.go
+++ b/libgo/go/runtime/stubs2.go
@@ -18,7 +18,11 @@ func exit(code int32)
 func nanotime() int64
 func usleep(usec uint32)
 
-func munmap(addr unsafe.Pointer, n uintptr)
+//extern mmap
+func mmap(addr unsafe.Pointer, length uintptr, prot, flags, fd int32, offset uintptr) unsafe.Pointer
+
+//extern munmap
+func munmap(addr unsafe.Pointer, n uintptr) int32
 
 //go:noescape
 func write(fd uintptr, p unsafe.Pointer, n int32) int32
diff --git a/libgo/go/runtime/testdata/testprog/deadlock.go b/libgo/go/runtime/testdata/testprog/deadlock.go
index c938fcf..ca2be57 100644
--- a/libgo/go/runtime/testdata/testprog/deadlock.go
+++ b/libgo/go/runtime/testdata/testprog/deadlock.go
@@ -32,6 +32,7 @@ func init() {
 	register("PanicTraceback", PanicTraceback)
 	register("GoschedInPanic", GoschedInPanic)
 	register("SyscallInPanic", SyscallInPanic)
+	register("PanicLoop", PanicLoop)
 }
 
 func SimpleDeadlock() {
@@ -214,3 +215,13 @@ func pt2() {
 	}()
 	panic("hello")
 }
+
+type panicError struct{}
+
+func (*panicError) Error() string {
+	panic("double error")
+}
+
+func PanicLoop() {
+	panic(&panicError{})
+}
diff --git a/libgo/go/runtime/testdata/testprog/gc.go b/libgo/go/runtime/testdata/testprog/gc.go
index a0c1f82..744b610 100644
--- a/libgo/go/runtime/testdata/testprog/gc.go
+++ b/libgo/go/runtime/testdata/testprog/gc.go
@@ -98,11 +98,25 @@ func GCFairness2() {
 	// If the scheduling rules change, this may not be enough time
 	// to let all goroutines run, but for now we cycle through
 	// them rapidly.
+	//
+	// OpenBSD's scheduler makes every usleep() take at least
+	// 20ms, so we need a long time to ensure all goroutines have
+	// run. If they haven't run after 30ms, give it another 1000ms
+	// and check again.
 	time.Sleep(30 * time.Millisecond)
+	var fail bool
 	for i := range count {
 		if atomic.LoadInt64(&count[i]) == 0 {
-			fmt.Printf("goroutine %d did not run\n", i)
-			return
+			fail = true
+		}
+	}
+	if fail {
+		time.Sleep(1 * time.Second)
+		for i := range count {
+			if atomic.LoadInt64(&count[i]) == 0 {
+				fmt.Printf("goroutine %d did not run\n", i)
+				return
+			}
 		}
 	}
 	fmt.Println("OK")
diff --git a/libgo/go/runtime/testdata/testprog/map.go b/libgo/go/runtime/testdata/testprog/map.go
new file mode 100644
index 0000000..5524289
--- /dev/null
+++ b/libgo/go/runtime/testdata/testprog/map.go
@@ -0,0 +1,77 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import "runtime"
+
+func init() {
+	register("concurrentMapWrites", concurrentMapWrites)
+	register("concurrentMapReadWrite", concurrentMapReadWrite)
+	register("concurrentMapIterateWrite", concurrentMapIterateWrite)
+}
+
+func concurrentMapWrites() {
+	m := map[int]int{}
+	c := make(chan struct{})
+	go func() {
+		for i := 0; i < 10000; i++ {
+			m[5] = 0
+			runtime.Gosched()
+		}
+		c <- struct{}{}
+	}()
+	go func() {
+		for i := 0; i < 10000; i++ {
+			m[6] = 0
+			runtime.Gosched()
+		}
+		c <- struct{}{}
+	}()
+	<-c
+	<-c
+}
+
+func concurrentMapReadWrite() {
+	m := map[int]int{}
+	c := make(chan struct{})
+	go func() {
+		for i := 0; i < 10000; i++ {
+			m[5] = 0
+			runtime.Gosched()
+		}
+		c <- struct{}{}
+	}()
+	go func() {
+		for i := 0; i < 10000; i++ {
+			_ = m[6]
+			runtime.Gosched()
+		}
+		c <- struct{}{}
+	}()
+	<-c
+	<-c
+}
+
+func concurrentMapIterateWrite() {
+	m := map[int]int{}
+	c := make(chan struct{})
+	go func() {
+		for i := 0; i < 10000; i++ {
+			m[5] = 0
+			runtime.Gosched()
+		}
+		c <- struct{}{}
+	}()
+	go func() {
+		for i := 0; i < 10000; i++ {
+			for range m {
+			}
+			runtime.Gosched()
+		}
+		c <- struct{}{}
+	}()
+	<-c
+	<-c
+}
diff --git a/libgo/go/runtime/testdata/testprogcgo/pprof.go b/libgo/go/runtime/testdata/testprogcgo/pprof.go
index cb30ec5..4460b93 100644
--- a/libgo/go/runtime/testdata/testprogcgo/pprof.go
+++ b/libgo/go/runtime/testdata/testprogcgo/pprof.go
@@ -1,4 +1,4 @@
-// Copyright 2016 The Go Authors.  All rights reserved.
+// Copyright 2016 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
diff --git a/libgo/go/runtime/testdata/testprogcgo/raceprof.go b/libgo/go/runtime/testdata/testprogcgo/raceprof.go
new file mode 100644
index 0000000..fe624c5
--- /dev/null
+++ b/libgo/go/runtime/testdata/testprogcgo/raceprof.go
@@ -0,0 +1,78 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build linux,amd64
+
+package main
+
+// Test that we can collect a lot of colliding profiling signals from
+// an external C thread. This used to fail when built with the race
+// detector, because a call of the predeclared function copy was
+// turned into a call to runtime.slicecopy, which is not marked nosplit.
+
+/*
+#include <signal.h>
+#include <stdint.h>
+#include <pthread.h>
+#include <sched.h>
+
+struct cgoTracebackArg {
+	uintptr_t  context;
+	uintptr_t  sigContext;
+	uintptr_t* buf;
+	uintptr_t  max;
+};
+
+static int raceprofCount;
+
+// We want a bunch of different profile stacks that collide in the
+// hash table maintained in runtime/cpuprof.go. This code knows the
+// size of the hash table (1 << 10) and knows that the hash function
+// is simply multiplicative.
+void raceprofTraceback(void* parg) {
+	struct cgoTracebackArg* arg = (struct cgoTracebackArg*)(parg);
+	raceprofCount++;
+	arg->buf[0] = raceprofCount * (1 << 10);
+	arg->buf[1] = 0;
+}
+
+static void* raceprofThread(void* p) {
+	int i;
+
+	for (i = 0; i < 100; i++) {
+		pthread_kill(pthread_self(), SIGPROF);
+		sched_yield();
+	}
+	return 0;
+}
+
+void runRaceprofThread() {
+	pthread_t tid;
+	pthread_create(&tid, 0, raceprofThread, 0);
+	pthread_join(tid, 0);
+}
+*/
+import "C"
+
+import (
+	"bytes"
+	"fmt"
+	"runtime"
+	"runtime/pprof"
+	"unsafe"
+)
+
+func init() {
+	register("CgoRaceprof", CgoRaceprof)
+}
+
+func CgoRaceprof() {
+	runtime.SetCgoTraceback(0, unsafe.Pointer(C.raceprofTraceback), nil, nil)
+
+	var buf bytes.Buffer
+	pprof.StartCPUProfile(&buf)
+
+	C.runRaceprofThread()
+	fmt.Println("OK")
+}
diff --git a/libgo/go/runtime/testdata/testprogcgo/racesig.go b/libgo/go/runtime/testdata/testprogcgo/racesig.go
new file mode 100644
index 0000000..d0c1c3c
--- /dev/null
+++ b/libgo/go/runtime/testdata/testprogcgo/racesig.go
@@ -0,0 +1,102 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build linux,amd64
+
+package main
+
+// Test that an external C thread that is calling malloc can be hit
+// with SIGCHLD signals. This used to fail when built with the race
+// detector, because in that case the signal handler would indirectly
+// call the C malloc function.
+
+/*
+#include <errno.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <sched.h>
+#include <unistd.h>
+
+#define ALLOCERS 100
+#define SIGNALERS 10
+
+static void* signalThread(void* p) {
+	pthread_t* pt = (pthread_t*)(p);
+	int i, j;
+
+	for (i = 0; i < 100; i++) {
+		for (j = 0; j < ALLOCERS; j++) {
+			if (pthread_kill(pt[j], SIGCHLD) < 0) {
+				return NULL;
+			}
+		}
+		usleep(1);
+	}
+	return NULL;
+}
+
+#define CALLS 100
+
+static void* mallocThread(void* p) {
+	int i;
+	void *a[CALLS];
+
+	for (i = 0; i < ALLOCERS; i++) {
+		sched_yield();
+	}
+	for (i = 0; i < CALLS; i++) {
+		a[i] = malloc(i);
+	}
+	for (i = 0; i < CALLS; i++) {
+		free(a[i]);
+	}
+	return NULL;
+}
+
+void runRaceSignalThread() {
+	int i;
+	pthread_t m[ALLOCERS];
+	pthread_t s[SIGNALERS];
+
+	for (i = 0; i < ALLOCERS; i++) {
+		pthread_create(&m[i], NULL, mallocThread, NULL);
+	}
+	for (i = 0; i < SIGNALERS; i++) {
+		pthread_create(&s[i], NULL, signalThread, &m[0]);
+	}
+	for (i = 0; i < SIGNALERS; i++) {
+		pthread_join(s[i], NULL);
+	}
+	for (i = 0; i < ALLOCERS; i++) {
+		pthread_join(m[i], NULL);
+	}
+}
+*/
+import "C"
+
+import (
+	"fmt"
+	"os"
+	"time"
+)
+
+func init() {
+	register("CgoRaceSignal", CgoRaceSignal)
+}
+
+func CgoRaceSignal() {
+	// The failure symptom is that the program hangs because of a
+	// deadlock in malloc, so set an alarm.
+	go func() {
+		time.Sleep(5 * time.Second)
+		fmt.Println("Hung for 5 seconds")
+		os.Exit(1)
+	}()
+
+	C.runRaceSignalThread()
+	fmt.Println("OK")
+}
diff --git a/libgo/go/runtime/testdata/testprogcgo/threadpprof.go b/libgo/go/runtime/testdata/testprogcgo/threadpprof.go
index fdeee69..44afb91 100644
--- a/libgo/go/runtime/testdata/testprogcgo/threadpprof.go
+++ b/libgo/go/runtime/testdata/testprogcgo/threadpprof.go
@@ -1,4 +1,4 @@
-// Copyright 2016 The Go Authors.  All rights reserved.
+// Copyright 2016 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
@@ -39,17 +39,6 @@ struct cgoTracebackArg {
 	uintptr_t  max;
 };
 
-static void *pprofThread(void* p) {
-	time_t start;
-
-	(void)p;
-	start = time(NULL);
-	while (__sync_add_and_fetch(&cpuHogThreadCount, 0) < 2 && time(NULL) - start < 2) {
-		cpuHogThread();
-	}
-}
-
-
 // pprofCgoThreadTraceback is passed to runtime.SetCgoTraceback.
 // For testing purposes it pretends that all CPU hits in C code are in cpuHog.
 void pprofCgoThreadTraceback(void* parg) {
@@ -64,6 +53,18 @@ void pprofCgoThreadTraceback(void* parg) {
 int getCPUHogThreadCount() {
 	return __sync_add_and_fetch(&cpuHogThreadCount, 0);
 }
+
+static void* cpuHogDriver(void* arg __attribute__ ((unused))) {
+	while (1) {
+		cpuHogThread();
+	}
+	return 0;
+}
+
+void runCPUHogThread() {
+	pthread_t tid;
+	pthread_create(&tid, 0, cpuHogDriver, 0);
+}
 */
 import "C"
 
@@ -79,11 +80,19 @@ import (
 
 func init() {
 	register("CgoPprofThread", CgoPprofThread)
+	register("CgoPprofThreadNoTraceback", CgoPprofThreadNoTraceback)
 }
 
 func CgoPprofThread() {
 	runtime.SetCgoTraceback(0, unsafe.Pointer(C.pprofCgoThreadTraceback), nil, nil)
+	pprofThread()
+}
+
+func CgoPprofThreadNoTraceback() {
+	pprofThread()
+}
 
+func pprofThread() {
 	f, err := ioutil.TempFile("", "prof")
 	if err != nil {
 		fmt.Fprintln(os.Stderr, err)
@@ -95,6 +104,8 @@ func CgoPprofThread() {
 		os.Exit(2)
 	}
 
+	C.runCPUHogThread()
+
 	t0 := time.Now()
 	for C.getCPUHogThreadCount() < 2 && time.Since(t0) < time.Second {
 		time.Sleep(100 * time.Millisecond)
diff --git a/libgo/go/runtime/testdata/testprogcgo/threadprof.go b/libgo/go/runtime/testdata/testprogcgo/threadprof.go
index a77479d..2d4c103 100644
--- a/libgo/go/runtime/testdata/testprogcgo/threadprof.go
+++ b/libgo/go/runtime/testdata/testprogcgo/threadprof.go
@@ -2,7 +2,11 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+// We only build this file with the tag "threadprof", since it starts
+// a thread running a busy loop at constructor time.
+
 // +build !plan9,!windows
+// +build threadprof
 
 package main
 
@@ -21,6 +25,7 @@ static void *thread1(void *p) {
 	spinlock = 0;
 	return NULL;
 }
+
 __attribute__((constructor)) void issue9456() {
 	pthread_t tid;
 	pthread_create(&tid, 0, thread1, NULL);
@@ -84,8 +89,8 @@ func CgoExternalThreadSignal() {
 
 	out, err := exec.Command(os.Args[0], "CgoExternalThreadSignal", "crash").CombinedOutput()
 	if err == nil {
-		fmt.Println("C signal did not crash as expected\n")
-		fmt.Printf("%s\n", out)
+		fmt.Println("C signal did not crash as expected")
+		fmt.Printf("\n%s\n", out)
 		os.Exit(1)
 	}
 
diff --git a/libgo/go/runtime/time.go b/libgo/go/runtime/time.go
index 8df185d..604ccde 100644
--- a/libgo/go/runtime/time.go
+++ b/libgo/go/runtime/time.go
@@ -88,12 +88,12 @@ func addtimer(t *timer) {
 	unlock(&timers.lock)
 }
 
-// Add a timer to the heap and start or kick the timer proc.
-// If the new timer is earlier than any of the others.
+// Add a timer to the heap and start or kick timerproc if the new timer is
+// earlier than any of the others.
 // Timers are locked.
 func addtimerLocked(t *timer) {
 	// when must never be negative; otherwise timerproc will overflow
-	// during its delta calculation and never expire other runtime·timers.
+	// during its delta calculation and never expire other runtime timers.
 	if t.when < 0 {
 		t.when = 1<<63 - 1
 	}
@@ -150,7 +150,7 @@ func deltimer(t *timer) bool {
 
 // Timerproc runs the time-driven events.
 // It sleeps until the next event in the timers heap.
-// If addtimer inserts a new earlier event, addtimer1 wakes timerproc early.
+// If addtimer inserts a new earlier event, it wakes timerproc early.
 func timerproc() {
 	timers.gp = getg()
 	for {
diff --git a/libgo/go/runtime/trace.go b/libgo/go/runtime/trace.go
index 09a150f..61cfa8e 100644
--- a/libgo/go/runtime/trace.go
+++ b/libgo/go/runtime/trace.go
@@ -28,8 +28,8 @@ const (
 	traceEvProcStop       = 6  // stop of P [timestamp]
 	traceEvGCStart        = 7  // GC start [timestamp, seq, stack id]
 	traceEvGCDone         = 8  // GC done [timestamp]
-	traceEvGCScanStart    = 9  // GC scan start [timestamp]
-	traceEvGCScanDone     = 10 // GC scan done [timestamp]
+	traceEvGCScanStart    = 9  // GC mark termination start [timestamp]
+	traceEvGCScanDone     = 10 // GC mark termination done [timestamp]
 	traceEvGCSweepStart   = 11 // GC sweep start [timestamp, stack id]
 	traceEvGCSweepDone    = 12 // GC sweep done [timestamp]
 	traceEvGoCreate       = 13 // goroutine creation [timestamp, new goroutine id, new stack id, stack id]
@@ -60,7 +60,9 @@ const (
 	traceEvGoStartLocal   = 38 // goroutine starts running on the same P as the last event [timestamp, goroutine id]
 	traceEvGoUnblockLocal = 39 // goroutine is unblocked on the same P as the last event [timestamp, goroutine id, stack]
 	traceEvGoSysExitLocal = 40 // syscall exit on the same P as the last event [timestamp, goroutine id, real timestamp]
-	traceEvCount          = 41
+	traceEvGoStartLabel   = 41 // goroutine starts running with label [timestamp, goroutine id, seq, label string id]
+	traceEvGoBlockGC      = 42 // goroutine blocks on GC assist [timestamp, stack]
+	traceEvCount          = 43
 )
 
 const (
@@ -112,15 +114,20 @@ var trace struct {
 	empty         traceBufPtr // stack of empty buffers
 	fullHead      traceBufPtr // queue of full buffers
 	fullTail      traceBufPtr
-	reader        *g              // goroutine that called ReadTrace, or nil
+	reader        guintptr        // goroutine that called ReadTrace, or nil
 	stackTab      traceStackTable // maps stack traces to unique ids
 
 	// Dictionary for traceEvString.
-	// Currently this is used only for func/file:line info after tracing session,
-	// so we assume single-threaded access.
+	//
+	// Currently this is used only at trace setup and for
+	// func/file:line info after tracing session, so we assume
+	// single-threaded access.
 	strings   map[string]uint64
 	stringSeq uint64
 
+	// markWorkerLabels maps gcMarkWorkerMode to string ID.
+	markWorkerLabels [len(gcMarkWorkerModeStrings)]uint64
+
 	bufLock mutex       // protects buf
 	buf     traceBufPtr // global trace buffer, used when running without a p
 }
@@ -134,6 +141,8 @@ type traceBufHeader struct {
 }
 
 // traceBuf is per-P tracing buffer.
+//
+//go:notinheap
 type traceBuf struct {
 	traceBufHeader
 	arr [64<<10 - unsafe.Sizeof(traceBufHeader{})]byte // underlying buffer for traceBufHeader.buf
@@ -144,6 +153,8 @@ type traceBuf struct {
 // allocated from the GC'd heap, so this is safe, and are often
 // manipulated in contexts where write barriers are not allowed, so
 // this is necessary.
+//
+// TODO: Since traceBuf is now go:notinheap, this isn't necessary.
 type traceBufPtr uintptr
 
 func (tp traceBufPtr) ptr() *traceBuf   { return (*traceBuf)(unsafe.Pointer(tp)) }
@@ -184,10 +195,21 @@ func StartTrace() error {
 	// trace.enabled is set afterwards once we have emitted all preliminary events.
 	_g_ := getg()
 	_g_.m.startingtrace = true
+
+	// Obtain current stack ID to use in all traceEvGoCreate events below.
+	mp := acquirem()
+	stkBuf := make([]location, traceStackSize)
+	stackID := traceStackID(mp, stkBuf, 2)
+	releasem(mp)
+
 	for _, gp := range allgs {
 		status := readgstatus(gp)
 		if status != _Gdead {
-			traceGoCreate(gp, gp.startpc) // also resets gp.traceseq/tracelastp
+			gp.traceseq = 0
+			gp.tracelastp = getg().m.p
+			// +PCQuantum because traceFrameForPC expects return PCs and subtracts PCQuantum.
+			id := trace.stackTab.put([]location{location{pc: gp.startpc + sys.PCQuantum}})
+			traceEvent(traceEvGoCreate, -1, uint64(gp.goid), uint64(id), stackID)
 		}
 		if status == _Gwaiting {
 			// traceEvGoWaiting is implied to have seq=1.
@@ -217,6 +239,18 @@ func StartTrace() error {
 	_g_.m.startingtrace = false
 	trace.enabled = true
 
+	// Register runtime goroutine labels.
+	_, pid, bufp := traceAcquireBuffer()
+	buf := (*bufp).ptr()
+	if buf == nil {
+		buf = traceFlush(0).ptr()
+		(*bufp).set(buf)
+	}
+	for i, label := range gcMarkWorkerModeStrings[:] {
+		trace.markWorkerLabels[i], buf = traceString(buf, label)
+	}
+	traceReleaseBuffer(pid)
+
 	unlock(&trace.bufLock)
 
 	startTheWorld()
@@ -251,10 +285,12 @@ func StopTrace() {
 			p.tracebuf = 0
 		}
 	}
-	if trace.buf != 0 && trace.buf.ptr().pos != 0 {
+	if trace.buf != 0 {
 		buf := trace.buf
 		trace.buf = 0
-		traceFullQueue(buf)
+		if buf.ptr().pos != 0 {
+			traceFullQueue(buf)
+		}
 	}
 
 	for {
@@ -275,7 +311,7 @@ func StopTrace() {
 
 	// The world is started but we've set trace.shutdown, so new tracing can't start.
 	// Wait for the trace reader to flush pending buffers and stop.
-	semacquire(&trace.shutdownSema, false)
+	semacquire(&trace.shutdownSema, 0)
 	if raceenabled {
 		raceacquire(unsafe.Pointer(&trace.shutdownSema))
 	}
@@ -296,7 +332,7 @@ func StopTrace() {
 	if trace.fullHead != 0 || trace.fullTail != 0 {
 		throw("trace: non-empty full trace buffer")
 	}
-	if trace.reading != 0 || trace.reader != nil {
+	if trace.reading != 0 || trace.reader != 0 {
 		throw("trace: reading after shutdown")
 	}
 	for trace.empty != 0 {
@@ -324,7 +360,7 @@ func ReadTrace() []byte {
 	lock(&trace.lock)
 	trace.lockOwner = getg()
 
-	if trace.reader != nil {
+	if trace.reader != 0 {
 		// More than one goroutine reads trace. This is bad.
 		// But we rather do not crash the program because of tracing,
 		// because tracing can be enabled at runtime on prod servers.
@@ -344,11 +380,11 @@ func ReadTrace() []byte {
 		trace.headerWritten = true
 		trace.lockOwner = nil
 		unlock(&trace.lock)
-		return []byte("go 1.7 trace\x00\x00\x00\x00")
+		return []byte("go 1.8 trace\x00\x00\x00\x00")
 	}
 	// Wait for new data.
 	if trace.fullHead == 0 && !trace.shutdown {
-		trace.reader = getg()
+		trace.reader.set(getg())
 		goparkunlock(&trace.lock, "trace reader (blocked)", traceEvGoBlock, 2)
 		lock(&trace.lock)
 	}
@@ -402,16 +438,16 @@ func ReadTrace() []byte {
 
 // traceReader returns the trace reader that should be woken up, if any.
 func traceReader() *g {
-	if trace.reader == nil || (trace.fullHead == 0 && !trace.shutdown) {
+	if trace.reader == 0 || (trace.fullHead == 0 && !trace.shutdown) {
 		return nil
 	}
 	lock(&trace.lock)
-	if trace.reader == nil || (trace.fullHead == 0 && !trace.shutdown) {
+	if trace.reader == 0 || (trace.fullHead == 0 && !trace.shutdown) {
 		unlock(&trace.lock)
 		return nil
 	}
-	gp := trace.reader
-	trace.reader = nil
+	gp := trace.reader.ptr()
+	trace.reader.set(nil)
 	unlock(&trace.lock)
 	return gp
 }
@@ -513,22 +549,7 @@ func traceEvent(ev byte, skip int, args ...uint64) {
 	if skip == 0 {
 		buf.varint(0)
 	} else if skip > 0 {
-		_g_ := getg()
-		gp := mp.curg
-		var nstk int
-		if gp == _g_ {
-			nstk = callers(skip, buf.stk[:])
-		} else if gp != nil {
-			// FIXME: get stack trace of different goroutine.
-		}
-		if nstk > 0 {
-			nstk-- // skip runtime.goexit
-		}
-		if nstk > 0 && gp.goid == 1 {
-			nstk-- // skip runtime.main
-		}
-		id := trace.stackTab.put(buf.stk[:nstk])
-		buf.varint(uint64(id))
+		buf.varint(traceStackID(mp, buf.stk[:], skip))
 	}
 	evSize := buf.pos - startPos
 	if evSize > maxSize {
@@ -541,6 +562,25 @@ func traceEvent(ev byte, skip int, args ...uint64) {
 	traceReleaseBuffer(pid)
 }
 
+func traceStackID(mp *m, buf []location, skip int) uint64 {
+	_g_ := getg()
+	gp := mp.curg
+	var nstk int
+	if gp == _g_ {
+		nstk = callers(skip+1, buf[:])
+	} else if gp != nil {
+		// FIXME: get stack trace of different goroutine.
+	}
+	if nstk > 0 {
+		nstk-- // skip runtime.goexit
+	}
+	if nstk > 0 && gp.goid == 1 {
+		nstk-- // skip runtime.main
+	}
+	id := trace.stackTab.put(buf[:nstk])
+	return uint64(id)
+}
+
 // traceAcquireBuffer returns trace buffer to use and, if necessary, locks it.
 func traceAcquireBuffer() (mp *m, pid int32, bufp *traceBufPtr) {
 	mp = acquirem()
@@ -799,11 +839,14 @@ type traceAlloc struct {
 // traceAllocBlock is allocated from non-GC'd memory, so it must not
 // contain heap pointers. Writes to pointers to traceAllocBlocks do
 // not need write barriers.
+//
+//go:notinheap
 type traceAllocBlock struct {
 	next traceAllocBlockPtr
 	data [64<<10 - sys.PtrSize]byte
 }
 
+// TODO: Since traceAllocBlock is now go:notinheap, this isn't necessary.
 type traceAllocBlockPtr uintptr
 
 func (p traceAllocBlockPtr) ptr() *traceAllocBlock   { return (*traceAllocBlock)(unsafe.Pointer(p)) }
@@ -898,7 +941,9 @@ func traceGoStart() {
 	_g_ := getg().m.curg
 	_p_ := _g_.m.p
 	_g_.traceseq++
-	if _g_.tracelastp == _p_ {
+	if _g_ == _p_.ptr().gcBgMarkWorker.ptr() {
+		traceEvent(traceEvGoStartLabel, -1, uint64(_g_.goid), _g_.traceseq, trace.markWorkerLabels[_p_.ptr().gcMarkWorkerMode])
+	} else if _g_.tracelastp == _p_ {
 		traceEvent(traceEvGoStartLocal, -1, uint64(_g_.goid))
 	} else {
 		_g_.tracelastp = _p_
@@ -979,5 +1024,10 @@ func traceHeapAlloc() {
 }
 
 func traceNextGC() {
-	traceEvent(traceEvNextGC, -1, memstats.next_gc)
+	if memstats.next_gc == ^uint64(0) {
+		// Heap-based triggering is disabled.
+		traceEvent(traceEvNextGC, -1, 0)
+	} else {
+		traceEvent(traceEvNextGC, -1, memstats.next_gc)
+	}
 }
diff --git a/libgo/go/runtime/trace/trace.go b/libgo/go/runtime/trace/trace.go
new file mode 100644
index 0000000..7cbb8a6
--- /dev/null
+++ b/libgo/go/runtime/trace/trace.go
@@ -0,0 +1,42 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Go execution tracer.
+// The tracer captures a wide range of execution events like goroutine
+// creation/blocking/unblocking, syscall enter/exit/block, GC-related events,
+// changes of heap size, processor start/stop, etc and writes them to an io.Writer
+// in a compact form. A precise nanosecond-precision timestamp and a stack
+// trace is captured for most events. A trace can be analyzed later with
+// 'go tool trace' command.
+package trace
+
+import (
+	"io"
+	"runtime"
+)
+
+// Start enables tracing for the current program.
+// While tracing, the trace will be buffered and written to w.
+// Start returns an error if tracing is already enabled.
+func Start(w io.Writer) error {
+	if err := runtime.StartTrace(); err != nil {
+		return err
+	}
+	go func() {
+		for {
+			data := runtime.ReadTrace()
+			if data == nil {
+				break
+			}
+			w.Write(data)
+		}
+	}()
+	return nil
+}
+
+// Stop stops the current tracing, if any.
+// Stop only returns after all the writes for the trace have completed.
+func Stop() {
+	runtime.StopTrace()
+}
diff --git a/libgo/go/runtime/trace/trace_stack_test.go b/libgo/go/runtime/trace/trace_stack_test.go
new file mode 100644
index 0000000..c37b33d
--- /dev/null
+++ b/libgo/go/runtime/trace/trace_stack_test.go
@@ -0,0 +1,282 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace_test
+
+import (
+	"bytes"
+	"internal/testenv"
+	"internal/trace"
+	"net"
+	"os"
+	"runtime"
+	. "runtime/trace"
+	"sync"
+	"testing"
+	"time"
+)
+
+// TestTraceSymbolize tests symbolization and that events has proper stacks.
+// In particular that we strip bottom uninteresting frames like goexit,
+// top uninteresting frames (runtime guts).
+func TestTraceSymbolize(t *testing.T) {
+	testenv.MustHaveGoBuild(t)
+
+	buf := new(bytes.Buffer)
+	if err := Start(buf); err != nil {
+		t.Fatalf("failed to start tracing: %v", err)
+	}
+	defer Stop() // in case of early return
+
+	// Now we will do a bunch of things for which we verify stacks later.
+	// It is impossible to ensure that a goroutine has actually blocked
+	// on a channel, in a select or otherwise. So we kick off goroutines
+	// that need to block first in the hope that while we are executing
+	// the rest of the test, they will block.
+	go func() {
+		select {}
+	}()
+	go func() {
+		var c chan int
+		c <- 0
+	}()
+	go func() {
+		var c chan int
+		<-c
+	}()
+	done1 := make(chan bool)
+	go func() {
+		<-done1
+	}()
+	done2 := make(chan bool)
+	go func() {
+		done2 <- true
+	}()
+	c1 := make(chan int)
+	c2 := make(chan int)
+	go func() {
+		select {
+		case <-c1:
+		case <-c2:
+		}
+	}()
+	var mu sync.Mutex
+	mu.Lock()
+	go func() {
+		mu.Lock()
+		mu.Unlock()
+	}()
+	var wg sync.WaitGroup
+	wg.Add(1)
+	go func() {
+		wg.Wait()
+	}()
+	cv := sync.NewCond(&sync.Mutex{})
+	go func() {
+		cv.L.Lock()
+		cv.Wait()
+		cv.L.Unlock()
+	}()
+	ln, err := net.Listen("tcp", "127.0.0.1:0")
+	if err != nil {
+		t.Fatalf("failed to listen: %v", err)
+	}
+	go func() {
+		c, err := ln.Accept()
+		if err != nil {
+			t.Errorf("failed to accept: %v", err)
+			return
+		}
+		c.Close()
+	}()
+	rp, wp, err := os.Pipe()
+	if err != nil {
+		t.Fatalf("failed to create a pipe: %v", err)
+	}
+	defer rp.Close()
+	defer wp.Close()
+	pipeReadDone := make(chan bool)
+	go func() {
+		var data [1]byte
+		rp.Read(data[:])
+		pipeReadDone <- true
+	}()
+
+	time.Sleep(100 * time.Millisecond)
+	runtime.GC()
+	runtime.Gosched()
+	time.Sleep(100 * time.Millisecond) // the last chance for the goroutines above to block
+	done1 <- true
+	<-done2
+	select {
+	case c1 <- 0:
+	case c2 <- 0:
+	}
+	mu.Unlock()
+	wg.Done()
+	cv.Signal()
+	c, err := net.Dial("tcp", ln.Addr().String())
+	if err != nil {
+		t.Fatalf("failed to dial: %v", err)
+	}
+	c.Close()
+	var data [1]byte
+	wp.Write(data[:])
+	<-pipeReadDone
+
+	Stop()
+	events, _ := parseTrace(t, buf)
+
+	// Now check that the stacks are correct.
+	type frame struct {
+		Fn   string
+		Line int
+	}
+	type eventDesc struct {
+		Type byte
+		Stk  []frame
+	}
+	want := []eventDesc{
+		{trace.EvGCStart, []frame{
+			{"runtime.GC", 0},
+			{"runtime/trace_test.TestTraceSymbolize", 107},
+			{"testing.tRunner", 0},
+		}},
+		{trace.EvGoStart, []frame{
+			{"runtime/trace_test.TestTraceSymbolize.func1", 37},
+		}},
+		{trace.EvGoSched, []frame{
+			{"runtime/trace_test.TestTraceSymbolize", 108},
+			{"testing.tRunner", 0},
+		}},
+		{trace.EvGoCreate, []frame{
+			{"runtime/trace_test.TestTraceSymbolize", 39},
+			{"testing.tRunner", 0},
+		}},
+		{trace.EvGoStop, []frame{
+			{"runtime.block", 0},
+			{"runtime/trace_test.TestTraceSymbolize.func1", 38},
+		}},
+		{trace.EvGoStop, []frame{
+			{"runtime.chansend1", 0},
+			{"runtime/trace_test.TestTraceSymbolize.func2", 42},
+		}},
+		{trace.EvGoStop, []frame{
+			{"runtime.chanrecv1", 0},
+			{"runtime/trace_test.TestTraceSymbolize.func3", 46},
+		}},
+		{trace.EvGoBlockRecv, []frame{
+			{"runtime.chanrecv1", 0},
+			{"runtime/trace_test.TestTraceSymbolize.func4", 50},
+		}},
+		{trace.EvGoUnblock, []frame{
+			{"runtime.chansend1", 0},
+			{"runtime/trace_test.TestTraceSymbolize", 110},
+			{"testing.tRunner", 0},
+		}},
+		{trace.EvGoBlockSend, []frame{
+			{"runtime.chansend1", 0},
+			{"runtime/trace_test.TestTraceSymbolize.func5", 54},
+		}},
+		{trace.EvGoUnblock, []frame{
+			{"runtime.chanrecv1", 0},
+			{"runtime/trace_test.TestTraceSymbolize", 111},
+			{"testing.tRunner", 0},
+		}},
+		{trace.EvGoBlockSelect, []frame{
+			{"runtime.selectgo", 0},
+			{"runtime/trace_test.TestTraceSymbolize.func6", 59},
+		}},
+		{trace.EvGoUnblock, []frame{
+			{"runtime.selectgo", 0},
+			{"runtime/trace_test.TestTraceSymbolize", 112},
+			{"testing.tRunner", 0},
+		}},
+		{trace.EvGoBlockSync, []frame{
+			{"sync.(*Mutex).Lock", 0},
+			{"runtime/trace_test.TestTraceSymbolize.func7", 67},
+		}},
+		{trace.EvGoUnblock, []frame{
+			{"sync.(*Mutex).Unlock", 0},
+			{"runtime/trace_test.TestTraceSymbolize", 116},
+			{"testing.tRunner", 0},
+		}},
+		{trace.EvGoBlockSync, []frame{
+			{"sync.(*WaitGroup).Wait", 0},
+			{"runtime/trace_test.TestTraceSymbolize.func8", 73},
+		}},
+		{trace.EvGoUnblock, []frame{
+			{"sync.(*WaitGroup).Add", 0},
+			{"sync.(*WaitGroup).Done", 0},
+			{"runtime/trace_test.TestTraceSymbolize", 117},
+			{"testing.tRunner", 0},
+		}},
+		{trace.EvGoBlockCond, []frame{
+			{"sync.(*Cond).Wait", 0},
+			{"runtime/trace_test.TestTraceSymbolize.func9", 78},
+		}},
+		{trace.EvGoUnblock, []frame{
+			{"sync.(*Cond).Signal", 0},
+			{"runtime/trace_test.TestTraceSymbolize", 118},
+			{"testing.tRunner", 0},
+		}},
+		{trace.EvGoSleep, []frame{
+			{"time.Sleep", 0},
+			{"runtime/trace_test.TestTraceSymbolize", 109},
+			{"testing.tRunner", 0},
+		}},
+	}
+	// Stacks for the following events are OS-dependent due to OS-specific code in net package.
+	if runtime.GOOS != "windows" && runtime.GOOS != "plan9" {
+		want = append(want, []eventDesc{
+			{trace.EvGoBlockNet, []frame{
+				{"net.(*netFD).accept", 0},
+				{"net.(*TCPListener).accept", 0},
+				{"net.(*TCPListener).Accept", 0},
+				{"runtime/trace_test.TestTraceSymbolize.func10", 86},
+			}},
+			{trace.EvGoSysCall, []frame{
+				{"syscall.read", 0},
+				{"syscall.Read", 0},
+				{"os.(*File).read", 0},
+				{"os.(*File).Read", 0},
+				{"runtime/trace_test.TestTraceSymbolize.func11", 102},
+			}},
+		}...)
+	}
+	matched := make([]bool, len(want))
+	for _, ev := range events {
+	wantLoop:
+		for i, w := range want {
+			if matched[i] || w.Type != ev.Type || len(w.Stk) != len(ev.Stk) {
+				continue
+			}
+
+			for fi, f := range ev.Stk {
+				wf := w.Stk[fi]
+				if wf.Fn != f.Fn || wf.Line != 0 && wf.Line != f.Line {
+					continue wantLoop
+				}
+			}
+			matched[i] = true
+		}
+	}
+	for i, m := range matched {
+		if m {
+			continue
+		}
+		w := want[i]
+		t.Errorf("did not match event %v at %v:%v", trace.EventDescriptions[w.Type].Name, w.Stk[0].Fn, w.Stk[0].Line)
+		t.Errorf("seen the following events of this type:")
+		for _, ev := range events {
+			if ev.Type != w.Type {
+				continue
+			}
+			for _, f := range ev.Stk {
+				t.Logf("  %v:%v", f.Fn, f.Line)
+			}
+			t.Logf("---")
+		}
+	}
+}
diff --git a/libgo/go/runtime/trace/trace_test.go b/libgo/go/runtime/trace/trace_test.go
new file mode 100644
index 0000000..c5f64fc
--- /dev/null
+++ b/libgo/go/runtime/trace/trace_test.go
@@ -0,0 +1,489 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace_test
+
+import (
+	"bytes"
+	"flag"
+	"internal/trace"
+	"io"
+	"io/ioutil"
+	"net"
+	"os"
+	"runtime"
+	. "runtime/trace"
+	"sync"
+	"testing"
+	"time"
+)
+
+var (
+	saveTraces = flag.Bool("savetraces", false, "save traces collected by tests")
+)
+
+func TestTraceStartStop(t *testing.T) {
+	buf := new(bytes.Buffer)
+	if err := Start(buf); err != nil {
+		t.Fatalf("failed to start tracing: %v", err)
+	}
+	Stop()
+	size := buf.Len()
+	if size == 0 {
+		t.Fatalf("trace is empty")
+	}
+	time.Sleep(100 * time.Millisecond)
+	if size != buf.Len() {
+		t.Fatalf("trace writes after stop: %v -> %v", size, buf.Len())
+	}
+	saveTrace(t, buf, "TestTraceStartStop")
+}
+
+func TestTraceDoubleStart(t *testing.T) {
+	Stop()
+	buf := new(bytes.Buffer)
+	if err := Start(buf); err != nil {
+		t.Fatalf("failed to start tracing: %v", err)
+	}
+	if err := Start(buf); err == nil {
+		t.Fatalf("succeed to start tracing second time")
+	}
+	Stop()
+	Stop()
+}
+
+func TestTrace(t *testing.T) {
+	buf := new(bytes.Buffer)
+	if err := Start(buf); err != nil {
+		t.Fatalf("failed to start tracing: %v", err)
+	}
+	Stop()
+	saveTrace(t, buf, "TestTrace")
+	_, err := trace.Parse(buf, "")
+	if err == trace.ErrTimeOrder {
+		t.Skipf("skipping trace: %v", err)
+	}
+	if err != nil {
+		t.Fatalf("failed to parse trace: %v", err)
+	}
+}
+
+func parseTrace(t *testing.T, r io.Reader) ([]*trace.Event, map[uint64]*trace.GDesc) {
+	events, err := trace.Parse(r, "")
+	if err == trace.ErrTimeOrder {
+		t.Skipf("skipping trace: %v", err)
+	}
+	if err != nil {
+		t.Fatalf("failed to parse trace: %v", err)
+	}
+	gs := trace.GoroutineStats(events)
+	for goid := range gs {
+		// We don't do any particular checks on the result at the moment.
+		// But still check that RelatedGoroutines does not crash, hang, etc.
+		_ = trace.RelatedGoroutines(events, goid)
+	}
+	return events, gs
+}
+
+func testBrokenTimestamps(t *testing.T, data []byte) {
+	// On some processors cputicks (used to generate trace timestamps)
+	// produce non-monotonic timestamps. It is important that the parser
+	// distinguishes logically inconsistent traces (e.g. missing, excessive
+	// or misordered events) from broken timestamps. The former is a bug
+	// in tracer, the latter is a machine issue.
+	// So now that we have a consistent trace, test that (1) parser does
+	// not return a logical error in case of broken timestamps
+	// and (2) broken timestamps are eventually detected and reported.
+	trace.BreakTimestampsForTesting = true
+	defer func() {
+		trace.BreakTimestampsForTesting = false
+	}()
+	for i := 0; i < 1e4; i++ {
+		_, err := trace.Parse(bytes.NewReader(data), "")
+		if err == trace.ErrTimeOrder {
+			return
+		}
+		if err != nil {
+			t.Fatalf("failed to parse trace: %v", err)
+		}
+	}
+}
+
+func TestTraceStress(t *testing.T) {
+	var wg sync.WaitGroup
+	done := make(chan bool)
+
+	// Create a goroutine blocked before tracing.
+	wg.Add(1)
+	go func() {
+		<-done
+		wg.Done()
+	}()
+
+	// Create a goroutine blocked in syscall before tracing.
+	rp, wp, err := os.Pipe()
+	if err != nil {
+		t.Fatalf("failed to create pipe: %v", err)
+	}
+	defer func() {
+		rp.Close()
+		wp.Close()
+	}()
+	wg.Add(1)
+	go func() {
+		var tmp [1]byte
+		rp.Read(tmp[:])
+		<-done
+		wg.Done()
+	}()
+	time.Sleep(time.Millisecond) // give the goroutine above time to block
+
+	buf := new(bytes.Buffer)
+	if err := Start(buf); err != nil {
+		t.Fatalf("failed to start tracing: %v", err)
+	}
+
+	procs := runtime.GOMAXPROCS(10)
+	time.Sleep(50 * time.Millisecond) // test proc stop/start events
+
+	go func() {
+		runtime.LockOSThread()
+		for {
+			select {
+			case <-done:
+				return
+			default:
+				runtime.Gosched()
+			}
+		}
+	}()
+
+	runtime.GC()
+	// Trigger GC from malloc.
+	n := int(1e3)
+	if runtime.GOOS == "openbsd" && runtime.GOARCH == "arm" {
+		// Reduce allocation to avoid running out of
+		// memory on the builder - see issue/12032.
+		n = 512
+	}
+	for i := 0; i < n; i++ {
+		_ = make([]byte, 1<<20)
+	}
+
+	// Create a bunch of busy goroutines to load all Ps.
+	for p := 0; p < 10; p++ {
+		wg.Add(1)
+		go func() {
+			// Do something useful.
+			tmp := make([]byte, 1<<16)
+			for i := range tmp {
+				tmp[i]++
+			}
+			_ = tmp
+			<-done
+			wg.Done()
+		}()
+	}
+
+	// Block in syscall.
+	wg.Add(1)
+	go func() {
+		var tmp [1]byte
+		rp.Read(tmp[:])
+		<-done
+		wg.Done()
+	}()
+
+	// Test timers.
+	timerDone := make(chan bool)
+	go func() {
+		time.Sleep(time.Millisecond)
+		timerDone <- true
+	}()
+	<-timerDone
+
+	// A bit of network.
+	ln, err := net.Listen("tcp", "127.0.0.1:0")
+	if err != nil {
+		t.Fatalf("listen failed: %v", err)
+	}
+	defer ln.Close()
+	go func() {
+		c, err := ln.Accept()
+		if err != nil {
+			return
+		}
+		time.Sleep(time.Millisecond)
+		var buf [1]byte
+		c.Write(buf[:])
+		c.Close()
+	}()
+	c, err := net.Dial("tcp", ln.Addr().String())
+	if err != nil {
+		t.Fatalf("dial failed: %v", err)
+	}
+	var tmp [1]byte
+	c.Read(tmp[:])
+	c.Close()
+
+	go func() {
+		runtime.Gosched()
+		select {}
+	}()
+
+	// Unblock helper goroutines and wait them to finish.
+	wp.Write(tmp[:])
+	wp.Write(tmp[:])
+	close(done)
+	wg.Wait()
+
+	runtime.GOMAXPROCS(procs)
+
+	Stop()
+	saveTrace(t, buf, "TestTraceStress")
+	trace := buf.Bytes()
+	parseTrace(t, buf)
+	testBrokenTimestamps(t, trace)
+}
+
+// Do a bunch of various stuff (timers, GC, network, etc) in a separate goroutine.
+// And concurrently with all that start/stop trace 3 times.
+func TestTraceStressStartStop(t *testing.T) {
+	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(8))
+	outerDone := make(chan bool)
+
+	go func() {
+		defer func() {
+			outerDone <- true
+		}()
+
+		var wg sync.WaitGroup
+		done := make(chan bool)
+
+		wg.Add(1)
+		go func() {
+			<-done
+			wg.Done()
+		}()
+
+		rp, wp, err := os.Pipe()
+		if err != nil {
+			t.Errorf("failed to create pipe: %v", err)
+			return
+		}
+		defer func() {
+			rp.Close()
+			wp.Close()
+		}()
+		wg.Add(1)
+		go func() {
+			var tmp [1]byte
+			rp.Read(tmp[:])
+			<-done
+			wg.Done()
+		}()
+		time.Sleep(time.Millisecond)
+
+		go func() {
+			runtime.LockOSThread()
+			for {
+				select {
+				case <-done:
+					return
+				default:
+					runtime.Gosched()
+				}
+			}
+		}()
+
+		runtime.GC()
+		// Trigger GC from malloc.
+		n := int(1e3)
+		if runtime.GOOS == "openbsd" && runtime.GOARCH == "arm" {
+			// Reduce allocation to avoid running out of
+			// memory on the builder - see issue/12032.
+			n = 512
+		}
+		for i := 0; i < n; i++ {
+			_ = make([]byte, 1<<20)
+		}
+
+		// Create a bunch of busy goroutines to load all Ps.
+		for p := 0; p < 10; p++ {
+			wg.Add(1)
+			go func() {
+				// Do something useful.
+				tmp := make([]byte, 1<<16)
+				for i := range tmp {
+					tmp[i]++
+				}
+				_ = tmp
+				<-done
+				wg.Done()
+			}()
+		}
+
+		// Block in syscall.
+		wg.Add(1)
+		go func() {
+			var tmp [1]byte
+			rp.Read(tmp[:])
+			<-done
+			wg.Done()
+		}()
+
+		runtime.GOMAXPROCS(runtime.GOMAXPROCS(1))
+
+		// Test timers.
+		timerDone := make(chan bool)
+		go func() {
+			time.Sleep(time.Millisecond)
+			timerDone <- true
+		}()
+		<-timerDone
+
+		// A bit of network.
+		ln, err := net.Listen("tcp", "127.0.0.1:0")
+		if err != nil {
+			t.Errorf("listen failed: %v", err)
+			return
+		}
+		defer ln.Close()
+		go func() {
+			c, err := ln.Accept()
+			if err != nil {
+				return
+			}
+			time.Sleep(time.Millisecond)
+			var buf [1]byte
+			c.Write(buf[:])
+			c.Close()
+		}()
+		c, err := net.Dial("tcp", ln.Addr().String())
+		if err != nil {
+			t.Errorf("dial failed: %v", err)
+			return
+		}
+		var tmp [1]byte
+		c.Read(tmp[:])
+		c.Close()
+
+		go func() {
+			runtime.Gosched()
+			select {}
+		}()
+
+		// Unblock helper goroutines and wait them to finish.
+		wp.Write(tmp[:])
+		wp.Write(tmp[:])
+		close(done)
+		wg.Wait()
+	}()
+
+	for i := 0; i < 3; i++ {
+		buf := new(bytes.Buffer)
+		if err := Start(buf); err != nil {
+			t.Fatalf("failed to start tracing: %v", err)
+		}
+		time.Sleep(time.Millisecond)
+		Stop()
+		saveTrace(t, buf, "TestTraceStressStartStop")
+		trace := buf.Bytes()
+		parseTrace(t, buf)
+		testBrokenTimestamps(t, trace)
+	}
+	<-outerDone
+}
+
+func TestTraceFutileWakeup(t *testing.T) {
+	buf := new(bytes.Buffer)
+	if err := Start(buf); err != nil {
+		t.Fatalf("failed to start tracing: %v", err)
+	}
+
+	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(8))
+	c0 := make(chan int, 1)
+	c1 := make(chan int, 1)
+	c2 := make(chan int, 1)
+	const procs = 2
+	var done sync.WaitGroup
+	done.Add(4 * procs)
+	for p := 0; p < procs; p++ {
+		const iters = 1e3
+		go func() {
+			for i := 0; i < iters; i++ {
+				runtime.Gosched()
+				c0 <- 0
+			}
+			done.Done()
+		}()
+		go func() {
+			for i := 0; i < iters; i++ {
+				runtime.Gosched()
+				<-c0
+			}
+			done.Done()
+		}()
+		go func() {
+			for i := 0; i < iters; i++ {
+				runtime.Gosched()
+				select {
+				case c1 <- 0:
+				case c2 <- 0:
+				}
+			}
+			done.Done()
+		}()
+		go func() {
+			for i := 0; i < iters; i++ {
+				runtime.Gosched()
+				select {
+				case <-c1:
+				case <-c2:
+				}
+			}
+			done.Done()
+		}()
+	}
+	done.Wait()
+
+	Stop()
+	saveTrace(t, buf, "TestTraceFutileWakeup")
+	events, _ := parseTrace(t, buf)
+	// Check that (1) trace does not contain EvFutileWakeup events and
+	// (2) there are no consecutive EvGoBlock/EvGCStart/EvGoBlock events
+	// (we call runtime.Gosched between all operations, so these would be futile wakeups).
+	gs := make(map[uint64]int)
+	for _, ev := range events {
+		switch ev.Type {
+		case trace.EvFutileWakeup:
+			t.Fatalf("found EvFutileWakeup event")
+		case trace.EvGoBlockSend, trace.EvGoBlockRecv, trace.EvGoBlockSelect:
+			if gs[ev.G] == 2 {
+				t.Fatalf("goroutine %v blocked on %v at %v right after start",
+					ev.G, trace.EventDescriptions[ev.Type].Name, ev.Ts)
+			}
+			if gs[ev.G] == 1 {
+				t.Fatalf("goroutine %v blocked on %v at %v while blocked",
+					ev.G, trace.EventDescriptions[ev.Type].Name, ev.Ts)
+			}
+			gs[ev.G] = 1
+		case trace.EvGoStart:
+			if gs[ev.G] == 1 {
+				gs[ev.G] = 2
+			}
+		default:
+			delete(gs, ev.G)
+		}
+	}
+}
+
+func saveTrace(t *testing.T, buf *bytes.Buffer, name string) {
+	if !*saveTraces {
+		return
+	}
+	if err := ioutil.WriteFile(name+".trace", buf.Bytes(), 0600); err != nil {
+		t.Errorf("failed to write trace file: %s", err)
+	}
+}
diff --git a/libgo/go/runtime/unaligned2.go b/libgo/go/runtime/unaligned2.go
index e52d6ce..b8aefb9 100644
--- a/libgo/go/runtime/unaligned2.go
+++ b/libgo/go/runtime/unaligned2.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build arm mips64 mips64le armbe m68k mipso32 mipsn32 mips mipsle sparc alpha ia64 mipso64 mipsn64 mips64p32 mips64p32le sparc64
+// +build arm mips mipsle mips64 mips64le armbe m68k mipso32 mipsn32 sparc alpha ia64 mipso64 mipsn64 mips64p32 mips64p32le sparc64
 
 package runtime
 
diff --git a/libgo/go/runtime/utf8.go b/libgo/go/runtime/utf8.go
new file mode 100644
index 0000000..e845451
--- /dev/null
+++ b/libgo/go/runtime/utf8.go
@@ -0,0 +1,130 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import _ "unsafe" // For go:linkname.
+
+// For gccgo, use go:linkname to rename compiler-called functions to
+// themselves, so that the compiler will export them.
+//
+//go:linkname decoderune runtime.decoderune
+
+// Numbers fundamental to the encoding.
+const (
+	runeError = '\uFFFD'     // the "error" Rune or "Unicode replacement character"
+	runeSelf  = 0x80         // characters below Runeself are represented as themselves in a single byte.
+	maxRune   = '\U0010FFFF' // Maximum valid Unicode code point.
+)
+
+// Code points in the surrogate range are not valid for UTF-8.
+const (
+	surrogateMin = 0xD800
+	surrogateMax = 0xDFFF
+)
+
+const (
+	t1 = 0x00 // 0000 0000
+	tx = 0x80 // 1000 0000
+	t2 = 0xC0 // 1100 0000
+	t3 = 0xE0 // 1110 0000
+	t4 = 0xF0 // 1111 0000
+	t5 = 0xF8 // 1111 1000
+
+	maskx = 0x3F // 0011 1111
+	mask2 = 0x1F // 0001 1111
+	mask3 = 0x0F // 0000 1111
+	mask4 = 0x07 // 0000 0111
+
+	rune1Max = 1<<7 - 1
+	rune2Max = 1<<11 - 1
+	rune3Max = 1<<16 - 1
+
+	// The default lowest and highest continuation byte.
+	locb = 0x80 // 1000 0000
+	hicb = 0xBF // 1011 1111
+)
+
+// decoderune returns the non-ASCII rune at the start of
+// s[k:] and the index after the rune in s.
+//
+// decoderune assumes that caller has checked that
+// the to be decoded rune is a non-ASCII rune.
+//
+// If the string appears to be incomplete or decoding problems
+// are encountered (runeerror, k + 1) is returned to ensure
+// progress when decoderune is used to iterate over a string.
+func decoderune(s string, k int) (r rune, pos int) {
+	pos = k
+
+	if k >= len(s) {
+		return runeError, k + 1
+	}
+
+	s = s[k:]
+
+	switch {
+	case t2 <= s[0] && s[0] < t3:
+		// 0080-07FF two byte sequence
+		if len(s) > 1 && (locb <= s[1] && s[1] <= hicb) {
+			r = rune(s[0]&mask2)<<6 | rune(s[1]&maskx)
+			pos += 2
+			if rune1Max < r {
+				return
+			}
+		}
+	case t3 <= s[0] && s[0] < t4:
+		// 0800-FFFF three byte sequence
+		if len(s) > 2 && (locb <= s[1] && s[1] <= hicb) && (locb <= s[2] && s[2] <= hicb) {
+			r = rune(s[0]&mask3)<<12 | rune(s[1]&maskx)<<6 | rune(s[2]&maskx)
+			pos += 3
+			if rune2Max < r && !(surrogateMin <= r && r <= surrogateMax) {
+				return
+			}
+		}
+	case t4 <= s[0] && s[0] < t5:
+		// 10000-1FFFFF four byte sequence
+		if len(s) > 3 && (locb <= s[1] && s[1] <= hicb) && (locb <= s[2] && s[2] <= hicb) && (locb <= s[3] && s[3] <= hicb) {
+			r = rune(s[0]&mask4)<<18 | rune(s[1]&maskx)<<12 | rune(s[2]&maskx)<<6 | rune(s[3]&maskx)
+			pos += 4
+			if rune3Max < r && r <= maxRune {
+				return
+			}
+		}
+	}
+
+	return runeError, k + 1
+}
+
+// encoderune writes into p (which must be large enough) the UTF-8 encoding of the rune.
+// It returns the number of bytes written.
+func encoderune(p []byte, r rune) int {
+	// Negative values are erroneous. Making it unsigned addresses the problem.
+	switch i := uint32(r); {
+	case i <= rune1Max:
+		p[0] = byte(r)
+		return 1
+	case i <= rune2Max:
+		_ = p[1] // eliminate bounds checks
+		p[0] = t2 | byte(r>>6)
+		p[1] = tx | byte(r)&maskx
+		return 2
+	case i > maxRune, surrogateMin <= i && i <= surrogateMax:
+		r = runeError
+		fallthrough
+	case i <= rune3Max:
+		_ = p[2] // eliminate bounds checks
+		p[0] = t3 | byte(r>>12)
+		p[1] = tx | byte(r>>6)&maskx
+		p[2] = tx | byte(r)&maskx
+		return 3
+	default:
+		_ = p[3] // eliminate bounds checks
+		p[0] = t4 | byte(r>>18)
+		p[1] = tx | byte(r>>12)&maskx
+		p[2] = tx | byte(r>>6)&maskx
+		p[3] = tx | byte(r)&maskx
+		return 4
+	}
+}
diff --git a/libgo/go/runtime/vdso_none.go b/libgo/go/runtime/vdso_none.go
index efae23f..fc21240 100644
--- a/libgo/go/runtime/vdso_none.go
+++ b/libgo/go/runtime/vdso_none.go
@@ -3,6 +3,7 @@
 // license that can be found in the LICENSE file.
 
 // +build !linux
+// +build !darwin
 
 package runtime
 
diff --git a/libgo/go/runtime/write_err_android.go b/libgo/go/runtime/write_err_android.go
index 4411a14..748dec6 100644
--- a/libgo/go/runtime/write_err_android.go
+++ b/libgo/go/runtime/write_err_android.go
@@ -75,7 +75,9 @@ func writeErr(b []byte) {
 		if v == '\n' || writePos == len(dst)-1 {
 			dst[writePos] = 0
 			write(writeFD, unsafe.Pointer(&writeBuf[0]), int32(hlen+writePos))
-			memclrBytes(dst)
+			for i := range dst {
+				dst[i] = 0
+			}
 			writePos = 0
 		}
 	}
author	Ian Lance Taylor <iant@golang.org>	2017-01-14 00:05:42 +0000
committer	Ian Lance Taylor <ian@gcc.gnu.org>	2017-01-14 00:05:42 +0000
commit	c2047754c300b68c05d65faa8dc2925fe67b71b4 (patch)
tree	e183ae81a1f48a02945cb6de463a70c5be1b06f6 /libgo/go/runtime
parent	829afb8f05602bb31c9c597b24df7377fed4f059 (diff)
download	gcc-c2047754c300b68c05d65faa8dc2925fe67b71b4.zip gcc-c2047754c300b68c05d65faa8dc2925fe67b71b4.tar.gz gcc-c2047754c300b68c05d65faa8dc2925fe67b71b4.tar.bz2