libgo: update to Go1.18beta2

gotools/ * Makefile.am (go_cmd_cgo_files): Add ast_go118.go (check-go-tool): Copy golang.org/x/tools directories. * Makefile.in: Regenerate. Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/384695
author: Ian Lance Taylor <iant@golang.org> 2022-02-11 14:53:56 -0800
committer: Ian Lance Taylor <iant@golang.org> 2022-02-11 15:01:19 -0800
commit: 8dc2499aa62f768c6395c9754b8cabc1ce25c494 (patch)
tree: 43d7fd2bbfd7ad8c9625a718a5e8718889351994 /libgo/go/runtime
parent: 9a56779dbc4e2d9c15be8d31e36f2f59be7331a8 (diff)
download: gcc-8dc2499aa62f768c6395c9754b8cabc1ce25c494.zip
gcc-8dc2499aa62f768c6395c9754b8cabc1ce25c494.tar.gz
gcc-8dc2499aa62f768c6395c9754b8cabc1ce25c494.tar.bz2
221 files changed, 5803 insertions, 2145 deletions
diff --git a/libgo/go/runtime/abi_test.go b/libgo/go/runtime/abi_test.go
index b27b012..2d9d0f6 100644
--- a/libgo/go/runtime/abi_test.go
+++ b/libgo/go/runtime/abi_test.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build goexperiment.regabireflect
-// +build goexperiment.regabireflect
 
 // This file contains tests specific to making sure the register ABI
 // works in a bunch of contexts in the runtime.
@@ -78,7 +77,7 @@ func TestFinalizerRegisterABI(t *testing.T) {
 
 	tests := []struct {
 		name         string
-		fin          interface{}
+		fin          any
 		confirmValue int
 	}{
 		{"Pointer", regFinalizerPointer, -1},
diff --git a/libgo/go/runtime/alg.go b/libgo/go/runtime/alg.go
index 4408eab..93f9ec4 100644
--- a/libgo/go/runtime/alg.go
+++ b/libgo/go/runtime/alg.go
@@ -6,7 +6,7 @@ package runtime
 
 import (
 	"internal/cpu"
-	"runtime/internal/sys"
+	"internal/goarch"
 	"unsafe"
 )
 
@@ -49,8 +49,8 @@ import (
 //go:linkname alginit
 
 const (
-	c0 = uintptr((8-sys.PtrSize)/4*2860486313 + (sys.PtrSize-4)/4*33054211828000289)
-	c1 = uintptr((8-sys.PtrSize)/4*3267000013 + (sys.PtrSize-4)/4*23344194077549503)
+	c0 = uintptr((8-goarch.PtrSize)/4*2860486313 + (goarch.PtrSize-4)/4*33054211828000289)
+	c1 = uintptr((8-goarch.PtrSize)/4*3267000013 + (goarch.PtrSize-4)/4*23344194077549503)
 )
 
 func memhash0(p unsafe.Pointer, h uintptr) uintptr {
@@ -452,7 +452,7 @@ func int64Hash(i uint64, seed uintptr) uintptr {
 	return memhash64(noescape(unsafe.Pointer(&i)), seed)
 }
 
-func efaceHash(i interface{}, seed uintptr) uintptr {
+func efaceHash(i any, seed uintptr) uintptr {
 	return nilinterhash(noescape(unsafe.Pointer(&i)), seed)
 }
 
@@ -462,7 +462,7 @@ func ifaceHash(i interface {
 	return interhash(noescape(unsafe.Pointer(&i)), seed)
 }
 
-const hashRandomBytes = sys.PtrSize / 4 * 64
+const hashRandomBytes = goarch.PtrSize / 4 * 64
 
 // used in asm_{386,amd64,arm64}.s to seed the hash function
 var aeskeysched [hashRandomBytes]byte
@@ -484,7 +484,7 @@ func alginit() {
 		initAlgAES()
 		return
 	}
-	getRandomData((*[len(hashkey) * sys.PtrSize]byte)(unsafe.Pointer(&hashkey))[:])
+	getRandomData((*[len(hashkey) * goarch.PtrSize]byte)(unsafe.Pointer(&hashkey))[:])
 	hashkey[0] |= 1 // make sure these numbers are odd
 	hashkey[1] |= 1
 	hashkey[2] |= 1
@@ -500,7 +500,7 @@ func initAlgAES() {
 // Note: These routines perform the read with a native endianness.
 func readUnaligned32(p unsafe.Pointer) uint32 {
 	q := (*[4]byte)(p)
-	if sys.BigEndian {
+	if goarch.BigEndian {
 		return uint32(q[3]) | uint32(q[2])<<8 | uint32(q[1])<<16 | uint32(q[0])<<24
 	}
 	return uint32(q[0]) | uint32(q[1])<<8 | uint32(q[2])<<16 | uint32(q[3])<<24
@@ -508,7 +508,7 @@ func readUnaligned32(p unsafe.Pointer) uint32 {
 
 func readUnaligned64(p unsafe.Pointer) uint64 {
 	q := (*[8]byte)(p)
-	if sys.BigEndian {
+	if goarch.BigEndian {
 		return uint64(q[7]) | uint64(q[6])<<8 | uint64(q[5])<<16 | uint64(q[4])<<24 |
 			uint64(q[3])<<32 | uint64(q[2])<<40 | uint64(q[1])<<48 | uint64(q[0])<<56
 	}
diff --git a/libgo/go/runtime/asan/asan.go b/libgo/go/runtime/asan/asan.go
new file mode 100644
index 0000000..bab2362
--- /dev/null
+++ b/libgo/go/runtime/asan/asan.go
@@ -0,0 +1,38 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build asan && linux && (arm64 || amd64)
+
+package asan
+
+/*
+#cgo CFLAGS: -fsanitize=address
+#cgo LDFLAGS: -fsanitize=address
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <sanitizer/asan_interface.h>
+
+void __asan_read_go(void *addr, uintptr_t sz, void *sp, void *pc) {
+	if (__asan_region_is_poisoned(addr, sz)) {
+		__asan_report_error(pc, 0, sp, addr, false, sz);
+	}
+}
+
+void __asan_write_go(void *addr, uintptr_t sz, void *sp, void *pc) {
+	if (__asan_region_is_poisoned(addr, sz)) {
+		__asan_report_error(pc, 0, sp, addr, true, sz);
+	}
+}
+
+void __asan_unpoison_go(void *addr, uintptr_t sz) {
+	__asan_unpoison_memory_region(addr, sz);
+}
+
+void __asan_poison_go(void *addr, uintptr_t sz) {
+	__asan_poison_memory_region(addr, sz);
+}
+
+*/
+import "C"
diff --git a/libgo/go/runtime/asan0.go b/libgo/go/runtime/asan0.go
new file mode 100644
index 0000000..d5478d6
--- /dev/null
+++ b/libgo/go/runtime/asan0.go
@@ -0,0 +1,22 @@
+// Copyright 2021 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !asan
+
+// Dummy ASan support API, used when not built with -asan.
+
+package runtime
+
+import (
+	"unsafe"
+)
+
+const asanenabled = false
+
+// Because asanenabled is false, none of these functions should be called.
+
+func asanread(addr unsafe.Pointer, sz uintptr)     { throw("asan") }
+func asanwrite(addr unsafe.Pointer, sz uintptr)    { throw("asan") }
+func asanunpoison(addr unsafe.Pointer, sz uintptr) { throw("asan") }
+func asanpoison(addr unsafe.Pointer, sz uintptr)   { throw("asan") }
diff --git a/libgo/go/runtime/auxv_none.go b/libgo/go/runtime/auxv_none.go
index 3178f1a..5d473ca 100644
--- a/libgo/go/runtime/auxv_none.go
+++ b/libgo/go/runtime/auxv_none.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build !linux && !darwin && !dragonfly && !freebsd && !netbsd && !solaris
-// +build !linux,!darwin,!dragonfly,!freebsd,!netbsd,!solaris
 
 package runtime
 
diff --git a/libgo/go/runtime/cgo/handle.go b/libgo/go/runtime/cgo/handle.go
index 720acca..d711900 100644
--- a/libgo/go/runtime/cgo/handle.go
+++ b/libgo/go/runtime/cgo/handle.go
@@ -59,6 +59,41 @@ import (
 //	void myprint(uintptr_t handle) {
 //	    MyGoPrint(handle);
 //	}
+//
+// Some C functions accept a void* argument that points to an arbitrary
+// data value supplied by the caller. It is not safe to coerce a cgo.Handle
+// (an integer) to a Go unsafe.Pointer, but instead we can pass the address
+// of the cgo.Handle to the void* parameter, as in this variant of the
+// previous example:
+//
+//	package main
+//
+//	/*
+//	extern void MyGoPrint(void *context);
+//	static inline void myprint(void *context) {
+//	    MyGoPrint(context);
+//	}
+//	*/
+//	import "C"
+//	import (
+//		"runtime/cgo"
+//		"unsafe"
+//	)
+//
+//	//export MyGoPrint
+//	func MyGoPrint(context unsafe.Pointer) {
+//		h := *(*cgo.Handle)(context)
+//		val := h.Value().(string)
+//		println(val)
+//		h.Delete()
+//	}
+//
+//	func main() {
+//		val := "hello Go"
+//		h := cgo.NewHandle(val)
+//		C.myprint(unsafe.Pointer(&h))
+//		// Output: hello Go
+//	}
 type Handle uintptr
 
 // NewHandle returns a handle for a given value.
@@ -70,7 +105,7 @@ type Handle uintptr
 //
 // The intended use is to pass the returned handle to C code, which
 // passes it back to Go, which calls Value.
-func NewHandle(v interface{}) Handle {
+func NewHandle(v any) Handle {
 	h := atomic.AddUintptr(&handleIdx, 1)
 	if h == 0 {
 		panic("runtime/cgo: ran out of handle space")
@@ -83,7 +118,7 @@ func NewHandle(v interface{}) Handle {
 // Value returns the associated Go value for a valid handle.
 //
 // The method panics if the handle is invalid.
-func (h Handle) Value() interface{} {
+func (h Handle) Value() any {
 	v, ok := handles.Load(uintptr(h))
 	if !ok {
 		panic("runtime/cgo: misuse of an invalid Handle")
diff --git a/libgo/go/runtime/cgo/handle_test.go b/libgo/go/runtime/cgo/handle_test.go
index 738051a..b341c8e 100644
--- a/libgo/go/runtime/cgo/handle_test.go
+++ b/libgo/go/runtime/cgo/handle_test.go
@@ -13,8 +13,8 @@ func TestHandle(t *testing.T) {
 	v := 42
 
 	tests := []struct {
-		v1 interface{}
-		v2 interface{}
+		v1 any
+		v2 any
 	}{
 		{v1: v, v2: v},
 		{v1: &v, v2: &v},
@@ -44,7 +44,7 @@ func TestHandle(t *testing.T) {
 	}
 
 	siz := 0
-	handles.Range(func(k, v interface{}) bool {
+	handles.Range(func(k, v any) bool {
 		siz++
 		return true
 	})
diff --git a/libgo/go/runtime/cgocall.go b/libgo/go/runtime/cgocall.go
index 0495d67..39a3bf2 100644
--- a/libgo/go/runtime/cgocall.go
+++ b/libgo/go/runtime/cgocall.go
@@ -7,7 +7,7 @@
 package runtime
 
 import (
-	"runtime/internal/sys"
+	"internal/goarch"
 	"unsafe"
 )
 
@@ -49,7 +49,7 @@ var ncgocall uint64 // number of cgo calls in total for dead m
 
 // cgoCheckPointer checks if the argument contains a Go pointer that
 // points to a Go pointer, and panics if it does.
-func cgoCheckPointer(ptr interface{}, arg interface{}) {
+func cgoCheckPointer(ptr any, arg any) {
 	if debug.cgocheck == 0 {
 		return
 	}
@@ -148,7 +148,7 @@ func cgoCheckArg(t *_type, p unsafe.Pointer, indir, top bool, msg string) {
 		if inheap(uintptr(unsafe.Pointer(it))) {
 			panic(errorString(msg))
 		}
-		p = *(*unsafe.Pointer)(add(p, sys.PtrSize))
+		p = *(*unsafe.Pointer)(add(p, goarch.PtrSize))
 		if !cgoIsGoPointer(p) {
 			return
 		}
@@ -228,7 +228,7 @@ func cgoCheckUnknownPointer(p unsafe.Pointer, msg string) (base, i uintptr) {
 		}
 		hbits := heapBitsForAddr(base)
 		n := span.elemsize
-		for i = uintptr(0); i < n; i += sys.PtrSize {
+		for i = uintptr(0); i < n; i += goarch.PtrSize {
 			if !hbits.morePointers() {
 				// No more possible pointers.
 				break
@@ -301,7 +301,7 @@ func cgoInRange(p unsafe.Pointer, start, end uintptr) bool {
 // cgoCheckResult is called to check the result parameter of an
 // exported Go function. It panics if the result is or contains a Go
 // pointer.
-func cgoCheckResult(val interface{}) {
+func cgoCheckResult(val any) {
 	if debug.cgocheck == 0 {
 		return
 	}
diff --git a/libgo/go/runtime/cgocheck.go b/libgo/go/runtime/cgocheck.go
index 42fdfe8..9e5c862 100644
--- a/libgo/go/runtime/cgocheck.go
+++ b/libgo/go/runtime/cgocheck.go
@@ -8,7 +8,7 @@
 package runtime
 
 import (
-	"runtime/internal/sys"
+	"internal/goarch"
 	"unsafe"
 )
 
@@ -152,7 +152,7 @@ func cgoCheckTypedBlock(typ *_type, src unsafe.Pointer, off, size uintptr) {
 	// src must be in the regular heap.
 
 	hbits := heapBitsForAddr(uintptr(src))
-	for i := uintptr(0); i < off+size; i += sys.PtrSize {
+	for i := uintptr(0); i < off+size; i += goarch.PtrSize {
 		bits := hbits.bits()
 		if i >= off && bits&bitPointer != 0 {
 			v := *(*unsafe.Pointer)(add(src, i))
@@ -170,22 +170,22 @@ func cgoCheckTypedBlock(typ *_type, src unsafe.Pointer, off, size uintptr) {
 //go:nosplit
 //go:nowritebarrier
 func cgoCheckBits(src unsafe.Pointer, gcbits *byte, off, size uintptr) {
-	skipMask := off / sys.PtrSize / 8
-	skipBytes := skipMask * sys.PtrSize * 8
+	skipMask := off / goarch.PtrSize / 8
+	skipBytes := skipMask * goarch.PtrSize * 8
 	ptrmask := addb(gcbits, skipMask)
 	src = add(src, skipBytes)
 	off -= skipBytes
 	size += off
 	var bits uint32
-	for i := uintptr(0); i < size; i += sys.PtrSize {
-		if i&(sys.PtrSize*8-1) == 0 {
+	for i := uintptr(0); i < size; i += goarch.PtrSize {
+		if i&(goarch.PtrSize*8-1) == 0 {
 			bits = uint32(*ptrmask)
 			ptrmask = addb(ptrmask, 1)
 		} else {
 			bits >>= 1
 		}
 		if off > 0 {
-			off -= sys.PtrSize
+			off -= goarch.PtrSize
 		} else {
 			if bits&1 != 0 {
 				v := *(*unsafe.Pointer)(add(src, i))
diff --git a/libgo/go/runtime/chan.go b/libgo/go/runtime/chan.go
index e3d0ad5..06e1a86 100644
--- a/libgo/go/runtime/chan.go
+++ b/libgo/go/runtime/chan.go
@@ -18,6 +18,7 @@ package runtime
 //  c.qcount < c.dataqsiz implies that c.sendq is empty.
 
 import (
+	"internal/abi"
 	"runtime/internal/atomic"
 	"runtime/internal/math"
 	"unsafe"
@@ -185,7 +186,7 @@ func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool {
 	}
 
 	if raceenabled {
-		racereadpc(c.raceaddr(), callerpc, funcPC(chansend))
+		racereadpc(c.raceaddr(), callerpc, abi.FuncPCABIInternal(chansend))
 	}
 
 	// Fast path: check for failed non-blocking operation without acquiring the lock.
@@ -381,7 +382,7 @@ func closechan(c *hchan) {
 
 	if raceenabled {
 		callerpc := getcallerpc()
-		racewritepc(c.raceaddr(), callerpc, funcPC(closechan))
+		racewritepc(c.raceaddr(), callerpc, abi.FuncPCABIInternal(closechan))
 		racerelease(c.raceaddr())
 	}
 
diff --git a/libgo/go/runtime/chan_test.go b/libgo/go/runtime/chan_test.go
index 4aae89f..9905d4d 100644
--- a/libgo/go/runtime/chan_test.go
+++ b/libgo/go/runtime/chan_test.go
@@ -499,7 +499,7 @@ func TestSelectFairness(t *testing.T) {
 func TestChanSendInterface(t *testing.T) {
 	type mt struct{}
 	m := &mt{}
-	c := make(chan interface{}, 1)
+	c := make(chan any, 1)
 	c <- m
 	select {
 	case c <- m:
@@ -629,6 +629,10 @@ func TestShrinkStackDuringBlockedSend(t *testing.T) {
 }
 
 func TestNoShrinkStackWhileParking(t *testing.T) {
+	if runtime.GOOS == "netbsd" && runtime.GOARCH == "arm64" {
+		testenv.SkipFlaky(t, 49382)
+	}
+
 	// The goal of this test is to trigger a "racy sudog adjustment"
 	// throw. Basically, there's a window between when a goroutine
 	// becomes available for preemption for stack scanning (and thus,
diff --git a/libgo/go/runtime/checkptr_test.go b/libgo/go/runtime/checkptr_test.go
index 3e42469..5849139 100644
--- a/libgo/go/runtime/checkptr_test.go
+++ b/libgo/go/runtime/checkptr_test.go
@@ -16,6 +16,13 @@ func TestCheckPtr(t *testing.T) {
 	if runtime.Compiler == "gccgo" {
 		t.Skip("gccgo does not have -d=checkptr")
 	}
+
+	// This test requires rebuilding packages with -d=checkptr=1,
+	// so it's somewhat slow.
+	if testing.Short() {
+		t.Skip("skipping test in -short mode")
+	}
+
 	t.Parallel()
 	testenv.MustHaveGoRun(t)
 
@@ -59,3 +66,46 @@ func TestCheckPtr(t *testing.T) {
 		})
 	}
 }
+
+func TestCheckPtr2(t *testing.T) {
+	// This test requires rebuilding packages with -d=checkptr=2,
+	// so it's somewhat slow.
+	if testing.Short() {
+		t.Skip("skipping test in -short mode")
+	}
+
+	t.Parallel()
+	testenv.MustHaveGoRun(t)
+
+	exe, err := buildTestProg(t, "testprog", "-gcflags=all=-d=checkptr=2")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	testCases := []struct {
+		cmd  string
+		want string
+	}{
+		{"CheckPtrAlignmentNested", "fatal error: checkptr: converted pointer straddles multiple allocations\n"},
+	}
+
+	for _, tc := range testCases {
+		tc := tc
+		t.Run(tc.cmd, func(t *testing.T) {
+			t.Parallel()
+			got, err := testenv.CleanCmdEnv(exec.Command(exe, tc.cmd)).CombinedOutput()
+			if err != nil {
+				t.Log(err)
+			}
+			if tc.want == "" {
+				if len(got) > 0 {
+					t.Errorf("output:\n%s\nwant no output", got)
+				}
+				return
+			}
+			if !strings.HasPrefix(string(got), tc.want) {
+				t.Errorf("output:\n%s\n\nwant output starting with: %s", got, tc.want)
+			}
+		})
+	}
+}
diff --git a/libgo/go/runtime/cpuprof.go b/libgo/go/runtime/cpuprof.go
index 55ad0d1..2df48cf 100644
--- a/libgo/go/runtime/cpuprof.go
+++ b/libgo/go/runtime/cpuprof.go
@@ -13,6 +13,7 @@
 package runtime
 
 import (
+	"internal/abi"
 	"runtime/internal/atomic"
 	"runtime/internal/sys"
 	"unsafe"
@@ -88,7 +89,7 @@ func SetCPUProfileRate(hz int) {
 // held at the time of the signal, nor can it use substantial amounts
 // of stack.
 //go:nowritebarrierrec
-func (p *cpuProfile) add(gp *g, stk []uintptr) {
+func (p *cpuProfile) add(tagPtr *unsafe.Pointer, stk []uintptr) {
 	// Simple cas-lock to coordinate with setcpuprofilerate.
 	for !atomic.Cas(&prof.signalLock, 0, 1) {
 		osyield()
@@ -103,15 +104,6 @@ func (p *cpuProfile) add(gp *g, stk []uintptr) {
 		// because otherwise its write barrier behavior may not
 		// be correct. See the long comment there before
 		// changing the argument here.
-		//
-		// Note: it can happen on Windows, where we are calling
-		// p.add with a gp that is not the current g, that gp is nil,
-		// meaning we interrupted a system thread with no g.
-		// Avoid faulting in that case.
-		var tagPtr *unsafe.Pointer
-		if gp != nil {
-			tagPtr = &gp.labels
-		}
 		cpuprof.log.write(tagPtr, nanotime(), hdr[:], stk)
 	}
 
@@ -166,8 +158,8 @@ func (p *cpuProfile) addExtra() {
 	if p.lostExtra > 0 {
 		hdr := [1]uint64{p.lostExtra}
 		lostStk := [2]uintptr{
-			_LostExternalCodePC + sys.PCQuantum,
-			_ExternalCodePC + sys.PCQuantum,
+			abi.FuncPCABIInternal(_LostExternalCode) + sys.PCQuantum,
+			abi.FuncPCABIInternal(_ExternalCode) + sys.PCQuantum,
 		}
 		p.log.write(nil, 0, hdr[:], lostStk[:])
 		p.lostExtra = 0
@@ -176,8 +168,8 @@ func (p *cpuProfile) addExtra() {
 	if p.lostAtomic > 0 {
 		hdr := [1]uint64{p.lostAtomic}
 		lostStk := [2]uintptr{
-			_LostSIGPROFDuringAtomic64PC + sys.PCQuantum,
-			_SystemPC + sys.PCQuantum,
+			abi.FuncPCABIInternal(_LostSIGPROFDuringAtomic64) + sys.PCQuantum,
+			abi.FuncPCABIInternal(_System) + sys.PCQuantum,
 		}
 		p.log.write(nil, 0, hdr[:], lostStk[:])
 		p.lostAtomic = 0
@@ -208,6 +200,8 @@ func runtime_pprof_runtime_cyclesPerSecond() int64 {
 // If profiling is turned off and all the profile data accumulated while it was
 // on has been returned, readProfile returns eof=true.
 // The caller must save the returned data and tags before calling readProfile again.
+// The returned data contains a whole number of records, and tags contains
+// exactly one entry per record.
 //
 //go:linkname runtime_pprof_readProfile runtime_1pprof.readProfile
 func runtime_pprof_readProfile() ([]uint64, []unsafe.Pointer, bool) {
diff --git a/libgo/go/runtime/cputicks.go b/libgo/go/runtime/cputicks.go
index 84e3ce3..8a87d4a 100644
--- a/libgo/go/runtime/cputicks.go
+++ b/libgo/go/runtime/cputicks.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //-go:build !arm && !arm64 && !mips64 && !mips64le && !mips && !mipsle && !wasm
-// -build !arm,!arm64,!mips64,!mips64le,!mips,!mipsle,!wasm
 
 package runtime
 
diff --git a/libgo/go/runtime/crash_cgo_test.go b/libgo/go/runtime/crash_cgo_test.go
index 6ad42b2..615892d 100644
--- a/libgo/go/runtime/crash_cgo_test.go
+++ b/libgo/go/runtime/crash_cgo_test.go
@@ -3,12 +3,10 @@
 // license that can be found in the LICENSE file.
 
 //go:build cgo
-// +build cgo
 
 package runtime_test
 
 import (
-	"bytes"
 	"fmt"
 	"internal/testenv"
 	"os"
@@ -65,6 +63,10 @@ func TestCgoCallbackGC(t *testing.T) {
 			t.Skip("too slow for mips64x builders")
 		}
 	}
+	if testenv.Builder() == "darwin-amd64-10_14" {
+		// TODO(#23011): When the 10.14 builders are gone, remove this skip.
+		t.Skip("skipping due to platform bug on macOS 10.14; see https://golang.org/issue/43926")
+	}
 	got := runTestProg(t, "testprogcgo", "CgoCallbackGC")
 	want := "OK\n"
 	if got != want {
@@ -91,23 +93,9 @@ func TestCgoExternalThreadSIGPROF(t *testing.T) {
 	case "plan9", "windows":
 		t.Skipf("no pthreads on %s", runtime.GOOS)
 	}
-	if runtime.GOARCH == "ppc64" && runtime.GOOS == "linux" {
-		// TODO(austin) External linking not implemented on
-		// linux/ppc64 (issue #8912)
-		t.Skipf("no external linking on ppc64")
-	}
-
-	exe, err := buildTestProg(t, "testprogcgo", "-tags=threadprof")
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	got, err := testenv.CleanCmdEnv(exec.Command(exe, "CgoExternalThreadSIGPROF")).CombinedOutput()
-	if err != nil {
-		t.Fatalf("exit status: %v\n%s", err, got)
-	}
 
-	if want := "OK\n"; string(got) != want {
+	got := runTestProg(t, "testprogcgo", "CgoExternalThreadSIGPROF", "GO_START_SIGPROF_THREAD=1")
+	if want := "OK\n"; got != want {
 		t.Fatalf("expected %q, but got:\n%s", want, got)
 	}
 }
@@ -120,18 +108,8 @@ func TestCgoExternalThreadSignal(t *testing.T) {
 		t.Skipf("no pthreads on %s", runtime.GOOS)
 	}
 
-	exe, err := buildTestProg(t, "testprogcgo", "-tags=threadprof")
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	got, err := testenv.CleanCmdEnv(exec.Command(exe, "CgoExternalThreadSIGPROF")).CombinedOutput()
-	if err != nil {
-		t.Fatalf("exit status: %v\n%s", err, got)
-	}
-
-	want := []byte("OK\n")
-	if !bytes.Equal(got, want) {
+	got := runTestProg(t, "testprogcgo", "CgoExternalThreadSignal")
+	if want := "OK\n"; got != want {
 		t.Fatalf("expected %q, but got:\n%s", want, got)
 	}
 }
@@ -318,12 +296,7 @@ func testCgoPprof(t *testing.T, buildArg, runArg, top, bottom string) {
 		t.Fatal(err)
 	}
 
-	// pprofCgoTraceback is called whenever CGO code is executing and a signal
-	// is received. Disable signal preemption to increase the likelihood at
-	// least one SIGPROF signal fired to capture a sample. See issue #37201.
 	cmd := testenv.CleanCmdEnv(exec.Command(exe, runArg))
-	cmd.Env = append(cmd.Env, "GODEBUG=asyncpreemptoff=1")
-
 	got, err := cmd.CombinedOutput()
 	if err != nil {
 		if testenv.Builder() == "linux-amd64-alpine" {
@@ -336,7 +309,7 @@ func testCgoPprof(t *testing.T, buildArg, runArg, top, bottom string) {
 	defer os.Remove(fn)
 
 	for try := 0; try < 2; try++ {
-		cmd := testenv.CleanCmdEnv(exec.Command(testenv.GoToolPath(t), "tool", "pprof", "-traces"))
+		cmd := testenv.CleanCmdEnv(exec.Command(testenv.GoToolPath(t), "tool", "pprof", "-tagignore=ignore", "-traces"))
 		// Check that pprof works both with and without explicit executable on command line.
 		if try == 0 {
 			cmd.Args = append(cmd.Args, exe, fn)
@@ -544,13 +517,38 @@ func TestCgoTracebackSigpanic(t *testing.T) {
 	}
 	t.Parallel()
 	got := runTestProg(t, "testprogcgo", "TracebackSigpanic")
+	t.Log(got)
 	want := "runtime.sigpanic"
 	if !strings.Contains(got, want) {
-		t.Fatalf("want failure containing %q. output:\n%s\n", want, got)
+		t.Errorf("did not see %q in output", want)
 	}
-	nowant := "unexpected return pc"
+	// No runtime errors like "runtime: unexpected return pc".
+	nowant := "runtime: "
 	if strings.Contains(got, nowant) {
-		t.Fatalf("failure incorrectly contains %q. output:\n%s\n", nowant, got)
+		t.Errorf("unexpectedly saw %q in output", nowant)
+	}
+}
+
+func TestCgoPanicCallback(t *testing.T) {
+	t.Parallel()
+	got := runTestProg(t, "testprogcgo", "PanicCallback")
+	t.Log(got)
+	want := "panic: runtime error: invalid memory address or nil pointer dereference"
+	if !strings.Contains(got, want) {
+		t.Errorf("did not see %q in output", want)
+	}
+	want = "panic_callback"
+	if !strings.Contains(got, want) {
+		t.Errorf("did not see %q in output", want)
+	}
+	want = "PanicCallback"
+	if !strings.Contains(got, want) {
+		t.Errorf("did not see %q in output", want)
+	}
+	// No runtime errors like "runtime: unexpected return pc".
+	nowant := "runtime: "
+	if strings.Contains(got, nowant) {
+		t.Errorf("did not see %q in output", want)
 	}
 }
 
@@ -609,17 +607,64 @@ func TestSegv(t *testing.T) {
 	}
 
 	for _, test := range []string{"Segv", "SegvInCgo"} {
+		test := test
 		t.Run(test, func(t *testing.T) {
 			t.Parallel()
 			got := runTestProg(t, "testprogcgo", test)
 			t.Log(got)
-			if !strings.Contains(got, "SIGSEGV") {
-				t.Errorf("expected crash from signal")
+			want := "SIGSEGV"
+			if !strings.Contains(got, want) {
+				t.Errorf("did not see %q in output", want)
+			}
+
+			// No runtime errors like "runtime: unknown pc".
+			switch runtime.GOOS {
+			case "darwin", "illumos", "solaris":
+				// Runtime sometimes throws when generating the traceback.
+				testenv.SkipFlaky(t, 49182)
+			case "linux":
+				if runtime.GOARCH == "386" {
+					// Runtime throws when generating a traceback from
+					// a VDSO call via asmcgocall.
+					testenv.SkipFlaky(t, 50504)
+				}
+				if testenv.Builder() == "linux-mips64le-mengzhuo" && strings.Contains(got, "runtime: unknown pc") {
+					// Runtime sometimes throw "unknown pc" when generating the traceback.
+					// Curiously, that doesn't seem to happen on the linux-mips64le-rtrk
+					// builder.
+					testenv.SkipFlaky(t, 50605)
+				}
+			}
+			nowant := "runtime: "
+			if strings.Contains(got, nowant) {
+				t.Errorf("unexpectedly saw %q in output", nowant)
 			}
 		})
 	}
 }
 
+func TestAbortInCgo(t *testing.T) {
+	switch runtime.GOOS {
+	case "plan9", "windows":
+		// N.B. On Windows, C abort() causes the program to exit
+		// without going through the runtime at all.
+		t.Skipf("no signals on %s", runtime.GOOS)
+	}
+
+	t.Parallel()
+	got := runTestProg(t, "testprogcgo", "Abort")
+	t.Log(got)
+	want := "SIGABRT"
+	if !strings.Contains(got, want) {
+		t.Errorf("did not see %q in output", want)
+	}
+	// No runtime errors like "runtime: unknown pc".
+	nowant := "runtime: "
+	if strings.Contains(got, nowant) {
+		t.Errorf("did not see %q in output", want)
+	}
+}
+
 // TestEINTR tests that we handle EINTR correctly.
 // See issue #20400 and friends.
 func TestEINTR(t *testing.T) {
@@ -659,3 +704,14 @@ func TestNeedmDeadlock(t *testing.T) {
 		t.Fatalf("want %s, got %s\n", want, output)
 	}
 }
+
+func TestCgoTracebackGoroutineProfile(t *testing.T) {
+	if runtime.Compiler == "gccgo" {
+		t.Skip("gccgo does not have SetCgoTraceback")
+	}
+	output := runTestProg(t, "testprogcgo", "GoroutineProfile")
+	want := "OK\n"
+	if output != want {
+		t.Fatalf("want %s, got %s\n", want, output)
+	}
+}
diff --git a/libgo/go/runtime/crash_nonunix_test.go b/libgo/go/runtime/crash_nonunix_test.go
deleted file mode 100644
index 5f61476..0000000
--- a/libgo/go/runtime/crash_nonunix_test.go
+++ /dev/null
@@ -1,14 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build windows || plan9 || (js && wasm)
-// +build windows plan9 js,wasm
-
-package runtime_test
-
-import "os"
-
-// sigquit is the signal to send to kill a hanging testdata program.
-// On Unix we send SIGQUIT, but on non-Unix we only have os.Kill.
-var sigquit = os.Kill
diff --git a/libgo/go/runtime/crash_test.go b/libgo/go/runtime/crash_test.go
index e3633af..6e79449 100644
--- a/libgo/go/runtime/crash_test.go
+++ b/libgo/go/runtime/crash_test.go
@@ -6,6 +6,7 @@ package runtime_test
 
 import (
 	"bytes"
+	"errors"
 	"flag"
 	"fmt"
 	"internal/testenv"
@@ -14,11 +15,9 @@ import (
 	"path/filepath"
 	"regexp"
 	"runtime"
-	"strconv"
 	"strings"
 	"sync"
 	"testing"
-	"time"
 )
 
 var toRemove []string
@@ -34,12 +33,13 @@ func TestMain(m *testing.M) {
 var testprog struct {
 	sync.Mutex
 	dir    string
-	target map[string]buildexe
+	target map[string]*buildexe
 }
 
 type buildexe struct {
-	exe string
-	err error
+	once sync.Once
+	exe  string
+	err  error
 }
 
 func runTestProg(t *testing.T, binary, name string, env ...string) string {
@@ -69,52 +69,19 @@ func runBuiltTestProg(t *testing.T, exe, name string, env ...string) string {
 	if testing.Short() {
 		cmd.Env = append(cmd.Env, "RUNTIME_TEST_SHORT=1")
 	}
-	var b bytes.Buffer
-	cmd.Stdout = &b
-	cmd.Stderr = &b
-	if err := cmd.Start(); err != nil {
-		t.Fatalf("starting %s %s: %v", exe, name, err)
-	}
-
-	// If the process doesn't complete within 1 minute,
-	// assume it is hanging and kill it to get a stack trace.
-	p := cmd.Process
-	done := make(chan bool)
-	go func() {
-		scale := 1
-		// This GOARCH/GOOS test is copied from cmd/dist/test.go.
-		// TODO(iant): Have cmd/dist update the environment variable.
-		if runtime.GOARCH == "arm" || runtime.GOOS == "windows" {
-			scale = 2
-		}
-		if s := os.Getenv("GO_TEST_TIMEOUT_SCALE"); s != "" {
-			if sc, err := strconv.Atoi(s); err == nil {
-				scale = sc
-			}
-		}
-
-		select {
-		case <-done:
-		case <-time.After(time.Duration(scale) * time.Minute):
-			p.Signal(sigquit)
-		}
-	}()
-
-	if err := cmd.Wait(); err != nil {
-		t.Logf("%s %s exit status: %v", exe, name, err)
-	}
-	close(done)
-
-	return b.String()
+	out, _ := testenv.RunWithTimeout(t, cmd)
+	return string(out)
 }
 
+var serializeBuild = make(chan bool, 2)
+
 func buildTestProg(t *testing.T, binary string, flags ...string) (string, error) {
 	if *flagQuick {
 		t.Skip("-quick")
 	}
+	testenv.MustHaveGoBuild(t)
 
 	testprog.Lock()
-	defer testprog.Unlock()
 	if testprog.dir == "" {
 		dir, err := os.MkdirTemp("", "go-build")
 		if err != nil {
@@ -125,29 +92,48 @@ func buildTestProg(t *testing.T, binary string, flags ...string) (string, error)
 	}
 
 	if testprog.target == nil {
-		testprog.target = make(map[string]buildexe)
+		testprog.target = make(map[string]*buildexe)
 	}
 	name := binary
 	if len(flags) > 0 {
 		name += "_" + strings.Join(flags, "_")
 	}
 	target, ok := testprog.target[name]
-	if ok {
-		return target.exe, target.err
-	}
-
-	exe := filepath.Join(testprog.dir, name+".exe")
-	cmd := exec.Command(testenv.GoToolPath(t), append([]string{"build", "-o", exe}, flags...)...)
-	cmd.Dir = "testdata/" + binary
-	out, err := testenv.CleanCmdEnv(cmd).CombinedOutput()
-	if err != nil {
-		target.err = fmt.Errorf("building %s %v: %v\n%s", binary, flags, err, out)
+	if !ok {
+		target = &buildexe{}
 		testprog.target[name] = target
-		return "", target.err
 	}
-	target.exe = exe
-	testprog.target[name] = target
-	return exe, nil
+
+	dir := testprog.dir
+
+	// Unlock testprog while actually building, so that other
+	// tests can look up executables that were already built.
+	testprog.Unlock()
+
+	target.once.Do(func() {
+		// Only do two "go build"'s at a time,
+		// to keep load from getting too high.
+		serializeBuild <- true
+		defer func() { <-serializeBuild }()
+
+		// Don't get confused if testenv.GoToolPath calls t.Skip.
+		target.err = errors.New("building test called t.Skip")
+
+		exe := filepath.Join(dir, name+".exe")
+
+		t.Logf("running go build -o %s %s", exe, strings.Join(flags, " "))
+		cmd := exec.Command(testenv.GoToolPath(t), append([]string{"build", "-o", exe}, flags...)...)
+		cmd.Dir = "testdata/" + binary
+		out, err := testenv.CleanCmdEnv(cmd).CombinedOutput()
+		if err != nil {
+			target.err = fmt.Errorf("building %s %v: %v\n%s", binary, flags, err, out)
+		} else {
+			target.exe = exe
+			target.err = nil
+		}
+	})
+
+	return target.exe, target.err
 }
 
 func TestVDSO(t *testing.T) {
@@ -421,7 +407,7 @@ func TestRuntimePanicWithRuntimeError(t *testing.T) {
 	}
 }
 
-func panicValue(fn func()) (recovered interface{}) {
+func panicValue(fn func()) (recovered any) {
 	defer func() {
 		recovered = recover()
 	}()
@@ -722,9 +708,13 @@ func TestTimePprof(t *testing.T) {
 	if runtime.Compiler == "gccgo" {
 		t.Skip("gccgo may not have the pprof tool")
 	}
-	if runtime.GOOS == "aix" {
-		t.Skip("pprof not yet available on AIX (see golang.org/issue/28555)")
+	// This test is unreliable on any system in which nanotime
+	// calls into libc.
+	switch runtime.GOOS {
+	case "aix", "darwin", "illumos", "openbsd", "solaris":
+		t.Skipf("skipping on %s because nanotime calls libc", runtime.GOOS)
 	}
+
 	// Pass GOTRACEBACK for issue #41120 to try to get more
 	// information on timeout.
 	fn := runTestProg(t, "testprog", "TimeProf", "GOTRACEBACK=crash")
diff --git a/libgo/go/runtime/crash_unix_test.go b/libgo/go/runtime/crash_unix_test.go
index 9dac167..b7ca811 100644
--- a/libgo/go/runtime/crash_unix_test.go
+++ b/libgo/go/runtime/crash_unix_test.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build aix || darwin || dragonfly || freebsd || hurd || linux || netbsd || openbsd || solaris
-// +build aix darwin dragonfly freebsd hurd linux netbsd openbsd solaris
 
 package runtime_test
 
@@ -14,7 +13,7 @@ import (
 	"os"
 	"os/exec"
 	"runtime"
-	"strings"
+	"runtime/debug"
 	"sync"
 	"syscall"
 	"testing"
@@ -22,16 +21,12 @@ import (
 	"unsafe"
 )
 
-// sigquit is the signal to send to kill a hanging testdata program.
-// Send SIGQUIT to get a stack trace.
-var sigquit = syscall.SIGQUIT
-
 func init() {
 	if runtime.Sigisblocked(int(syscall.SIGQUIT)) {
 		// We can't use SIGQUIT to kill subprocesses because
 		// it's blocked. Use SIGKILL instead. See issue
 		// #19196 for an example of when this happens.
-		sigquit = syscall.SIGKILL
+		testenv.Sigquit = syscall.SIGKILL
 	}
 }
 
@@ -212,6 +207,11 @@ func TestPanicSystemstack(t *testing.T) {
 
 func init() {
 	if len(os.Args) >= 2 && os.Args[1] == "testPanicSystemstackInternal" {
+		// Complete any in-flight GCs and disable future ones. We're going to
+		// block goroutines on runtime locks, which aren't ever preemptible for the
+		// GC to scan them.
+		runtime.GC()
+		debug.SetGCPercent(-1)
 		// Get two threads running on the system stack with
 		// something recognizable in the stack trace.
 		runtime.GOMAXPROCS(2)
@@ -245,9 +245,7 @@ func TestSignalExitStatus(t *testing.T) {
 
 func TestSignalIgnoreSIGTRAP(t *testing.T) {
 	if runtime.GOOS == "openbsd" {
-		if bn := testenv.Builder(); strings.HasSuffix(bn, "-62") || strings.HasSuffix(bn, "-64") {
-			testenv.SkipFlaky(t, 17496)
-		}
+		testenv.SkipFlaky(t, 49725)
 	}
 
 	output := runTestProg(t, "testprognet", "SignalIgnoreSIGTRAP")
diff --git a/libgo/go/runtime/debug/garbage_test.go b/libgo/go/runtime/debug/garbage_test.go
index 62eeb2c..8986606 100644
--- a/libgo/go/runtime/debug/garbage_test.go
+++ b/libgo/go/runtime/debug/garbage_test.go
@@ -6,6 +6,7 @@ package debug_test
 
 import (
 	"internal/testenv"
+	"os"
 	"runtime"
 	. "runtime/debug"
 	"testing"
@@ -87,27 +88,75 @@ func TestReadGCStats(t *testing.T) {
 	}
 }
 
-var big = make([]byte, 1<<20)
+var big []byte
 
 func TestFreeOSMemory(t *testing.T) {
-	var ms1, ms2 runtime.MemStats
-
-	if big == nil {
-		t.Skip("test is not reliable when run multiple times")
+	if runtime.Compiler == "gccgo" {
+		t.Skip("conservative GC")
 	}
-	big = nil
+
+	// Tests FreeOSMemory by making big susceptible to collection
+	// and checking that at least that much memory is returned to
+	// the OS after.
+
+	const bigBytes = 32 << 20
+	big = make([]byte, bigBytes)
+
+	// Make sure any in-progress GCs are complete.
 	runtime.GC()
-	runtime.ReadMemStats(&ms1)
+
+	var before runtime.MemStats
+	runtime.ReadMemStats(&before)
+
+	// Clear the last reference to the big allocation, making it
+	// susceptible to collection.
+	big = nil
+
+	// FreeOSMemory runs a GC cycle before releasing memory,
+	// so it's fine to skip a GC here.
+	//
+	// It's possible the background scavenger runs concurrently
+	// with this function and does most of the work for it.
+	// If that happens, it's OK. What we want is a test that fails
+	// often if FreeOSMemory does not work correctly, and a test
+	// that passes every time if it does.
 	FreeOSMemory()
-	runtime.ReadMemStats(&ms2)
-	if ms1.HeapReleased >= ms2.HeapReleased {
-		t.Errorf("released before=%d; released after=%d; did not go up", ms1.HeapReleased, ms2.HeapReleased)
+
+	var after runtime.MemStats
+	runtime.ReadMemStats(&after)
+
+	// Check to make sure that the big allocation (now freed)
+	// had its memory shift into HeapReleased as a result of that
+	// FreeOSMemory.
+	if after.HeapReleased <= before.HeapReleased {
+		t.Fatalf("no memory released: %d -> %d", before.HeapReleased, after.HeapReleased)
+	}
+
+	// Check to make sure bigBytes was released, plus some slack. Pages may get
+	// allocated in between the two measurements above for a variety for reasons,
+	// most commonly for GC work bufs. Since this can get fairly high, depending
+	// on scheduling and what GOMAXPROCS is, give a lot of slack up-front.
+	//
+	// Add a little more slack too if the page size is bigger than the runtime page size.
+	// "big" could end up unaligned on its ends, forcing the scavenger to skip at worst
+	// 2x pages.
+	slack := uint64(bigBytes / 2)
+	pageSize := uint64(os.Getpagesize())
+	if pageSize > 8<<10 {
+		slack += pageSize * 2
+	}
+	if slack > bigBytes {
+		// We basically already checked this.
+		return
+	}
+	if after.HeapReleased-before.HeapReleased < bigBytes-slack {
+		t.Fatalf("less than %d released: %d -> %d", bigBytes, before.HeapReleased, after.HeapReleased)
 	}
 }
 
 var (
-	setGCPercentBallast interface{}
-	setGCPercentSink    interface{}
+	setGCPercentBallast any
+	setGCPercentSink    any
 )
 
 func TestSetGCPercent(t *testing.T) {
diff --git a/libgo/go/runtime/debug/mod.go b/libgo/go/runtime/debug/mod.go
index feac168..61b15ad 100644
--- a/libgo/go/runtime/debug/mod.go
+++ b/libgo/go/runtime/debug/mod.go
@@ -5,6 +5,9 @@
 package debug
 
 import (
+	"bytes"
+	"fmt"
+	"runtime"
 	"strings"
 	_ "unsafe" // for go:linkname
 )
@@ -16,15 +19,32 @@ func modinfo() string
 // in the running binary. The information is available only
 // in binaries built with module support.
 func ReadBuildInfo() (info *BuildInfo, ok bool) {
-	return readBuildInfo(modinfo())
+	data := modinfo()
+	if len(data) < 32 {
+		return nil, false
+	}
+	data = data[16 : len(data)-16]
+	bi := &BuildInfo{}
+	if err := bi.UnmarshalText([]byte(data)); err != nil {
+		return nil, false
+	}
+
+	// The go version is stored separately from other build info, mostly for
+	// historical reasons. It is not part of the modinfo() string, and
+	// ParseBuildInfo does not recognize it. We inject it here to hide this
+	// awkwardness from the user.
+	bi.GoVersion = runtime.Version()
+
+	return bi, true
 }
 
-// BuildInfo represents the build information read from
-// the running binary.
+// BuildInfo represents the build information read from a Go binary.
 type BuildInfo struct {
-	Path string    // The main package path
-	Main Module    // The module containing the main package
-	Deps []*Module // Module dependencies
+	GoVersion string         // Version of Go that produced this binary.
+	Path      string         // The main package path
+	Main      Module         // The module containing the main package
+	Deps      []*Module      // Module dependencies
+	Settings  []BuildSetting // Other information about the build.
 }
 
 // Module represents a module.
@@ -35,81 +55,151 @@ type Module struct {
 	Replace *Module // replaced by this module
 }
 
-func readBuildInfo(data string) (*BuildInfo, bool) {
-	if len(data) < 32 {
-		return nil, false
+// BuildSetting describes a setting that may be used to understand how the
+// binary was built. For example, VCS commit and dirty status is stored here.
+type BuildSetting struct {
+	// Key and Value describe the build setting.
+	// Key must not contain an equals sign, space, tab, or newline.
+	// Value must not contain newlines ('\n').
+	Key, Value string
+}
+
+func (bi *BuildInfo) MarshalText() ([]byte, error) {
+	buf := &bytes.Buffer{}
+	if bi.GoVersion != "" {
+		fmt.Fprintf(buf, "go\t%s\n", bi.GoVersion)
 	}
-	data = data[16 : len(data)-16]
+	if bi.Path != "" {
+		fmt.Fprintf(buf, "path\t%s\n", bi.Path)
+	}
+	var formatMod func(string, Module)
+	formatMod = func(word string, m Module) {
+		buf.WriteString(word)
+		buf.WriteByte('\t')
+		buf.WriteString(m.Path)
+		mv := m.Version
+		if mv == "" {
+			mv = "(devel)"
+		}
+		buf.WriteByte('\t')
+		buf.WriteString(mv)
+		if m.Replace == nil {
+			buf.WriteByte('\t')
+			buf.WriteString(m.Sum)
+		} else {
+			buf.WriteByte('\n')
+			formatMod("=>", *m.Replace)
+		}
+		buf.WriteByte('\n')
+	}
+	if bi.Main.Path != "" {
+		formatMod("mod", bi.Main)
+	}
+	for _, dep := range bi.Deps {
+		formatMod("dep", *dep)
+	}
+	for _, s := range bi.Settings {
+		if strings.ContainsAny(s.Key, "= \t\n") {
+			return nil, fmt.Errorf("invalid build setting key %q", s.Key)
+		}
+		if strings.Contains(s.Value, "\n") {
+			return nil, fmt.Errorf("invalid build setting value for key %q: contains newline", s.Value)
+		}
+		fmt.Fprintf(buf, "build\t%s=%s\n", s.Key, s.Value)
+	}
+
+	return buf.Bytes(), nil
+}
 
-	const (
-		pathLine = "path\t"
-		modLine  = "mod\t"
-		depLine  = "dep\t"
-		repLine  = "=>\t"
+func (bi *BuildInfo) UnmarshalText(data []byte) (err error) {
+	*bi = BuildInfo{}
+	lineNum := 1
+	defer func() {
+		if err != nil {
+			err = fmt.Errorf("could not parse Go build info: line %d: %w", lineNum, err)
+		}
+	}()
+
+	var (
+		pathLine  = []byte("path\t")
+		modLine   = []byte("mod\t")
+		depLine   = []byte("dep\t")
+		repLine   = []byte("=>\t")
+		buildLine = []byte("build\t")
+		newline   = []byte("\n")
+		tab       = []byte("\t")
 	)
 
-	readEntryFirstLine := func(elem []string) (Module, bool) {
+	readModuleLine := func(elem [][]byte) (Module, error) {
 		if len(elem) != 2 && len(elem) != 3 {
-			return Module{}, false
+			return Module{}, fmt.Errorf("expected 2 or 3 columns; got %d", len(elem))
 		}
 		sum := ""
 		if len(elem) == 3 {
-			sum = elem[2]
+			sum = string(elem[2])
 		}
 		return Module{
-			Path:    elem[0],
-			Version: elem[1],
+			Path:    string(elem[0]),
+			Version: string(elem[1]),
 			Sum:     sum,
-		}, true
+		}, nil
 	}
 
 	var (
-		info = &BuildInfo{}
 		last *Module
-		line string
+		line []byte
 		ok   bool
 	)
-	// Reverse of cmd/go/internal/modload.PackageBuildInfo
+	// Reverse of BuildInfo.String(), except for go version.
 	for len(data) > 0 {
-		i := strings.IndexByte(data, '\n')
-		if i < 0 {
+		line, data, ok = bytes.Cut(data, newline)
+		if !ok {
 			break
 		}
-		line, data = data[:i], data[i+1:]
 		switch {
-		case strings.HasPrefix(line, pathLine):
+		case bytes.HasPrefix(line, pathLine):
 			elem := line[len(pathLine):]
-			info.Path = elem
-		case strings.HasPrefix(line, modLine):
-			elem := strings.Split(line[len(modLine):], "\t")
-			last = &info.Main
-			*last, ok = readEntryFirstLine(elem)
-			if !ok {
-				return nil, false
+			bi.Path = string(elem)
+		case bytes.HasPrefix(line, modLine):
+			elem := bytes.Split(line[len(modLine):], tab)
+			last = &bi.Main
+			*last, err = readModuleLine(elem)
+			if err != nil {
+				return err
 			}
-		case strings.HasPrefix(line, depLine):
-			elem := strings.Split(line[len(depLine):], "\t")
+		case bytes.HasPrefix(line, depLine):
+			elem := bytes.Split(line[len(depLine):], tab)
 			last = new(Module)
-			info.Deps = append(info.Deps, last)
-			*last, ok = readEntryFirstLine(elem)
-			if !ok {
-				return nil, false
+			bi.Deps = append(bi.Deps, last)
+			*last, err = readModuleLine(elem)
+			if err != nil {
+				return err
 			}
-		case strings.HasPrefix(line, repLine):
-			elem := strings.Split(line[len(repLine):], "\t")
+		case bytes.HasPrefix(line, repLine):
+			elem := bytes.Split(line[len(repLine):], tab)
 			if len(elem) != 3 {
-				return nil, false
+				return fmt.Errorf("expected 3 columns for replacement; got %d", len(elem))
 			}
 			if last == nil {
-				return nil, false
+				return fmt.Errorf("replacement with no module on previous line")
 			}
 			last.Replace = &Module{
-				Path:    elem[0],
-				Version: elem[1],
-				Sum:     elem[2],
+				Path:    string(elem[0]),
+				Version: string(elem[1]),
+				Sum:     string(elem[2]),
 			}
 			last = nil
+		case bytes.HasPrefix(line, buildLine):
+			key, val, ok := strings.Cut(string(line[len(buildLine):]), "=")
+			if !ok {
+				return fmt.Errorf("invalid build line")
+			}
+			if key == "" {
+				return fmt.Errorf("empty key")
+			}
+			bi.Settings = append(bi.Settings, BuildSetting{Key: key, Value: val})
 		}
+		lineNum++
 	}
-	return info, true
+	return nil
 }
diff --git a/libgo/go/runtime/debug/panic_test.go b/libgo/go/runtime/debug/panic_test.go
index 65f9555..ec5294c 100644
--- a/libgo/go/runtime/debug/panic_test.go
+++ b/libgo/go/runtime/debug/panic_test.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd
-// +build aix darwin dragonfly freebsd linux netbsd openbsd
 
 // TODO: test on Windows?
 
diff --git a/libgo/go/runtime/debuglog.go b/libgo/go/runtime/debuglog.go
index d8c87c7..f63f298 100644
--- a/libgo/go/runtime/debuglog.go
+++ b/libgo/go/runtime/debuglog.go
@@ -266,7 +266,7 @@ func (l *dlogger) hex(x uint64) *dlogger {
 }
 
 //go:nosplit
-func (l *dlogger) p(x interface{}) *dlogger {
+func (l *dlogger) p(x any) *dlogger {
 	if !dlogEnabled {
 		return l
 	}
diff --git a/libgo/go/runtime/debuglog_off.go b/libgo/go/runtime/debuglog_off.go
index dd38156..fa3be39 100644
--- a/libgo/go/runtime/debuglog_off.go
+++ b/libgo/go/runtime/debuglog_off.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build !debuglog
-// +build !debuglog
 
 package runtime
 
diff --git a/libgo/go/runtime/debuglog_on.go b/libgo/go/runtime/debuglog_on.go
index 2fcdbe7..b815020 100644
--- a/libgo/go/runtime/debuglog_on.go
+++ b/libgo/go/runtime/debuglog_on.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build debuglog
-// +build debuglog
 
 package runtime
 
diff --git a/libgo/go/runtime/defer_test.go b/libgo/go/runtime/defer_test.go
index fc96144..3a54951 100644
--- a/libgo/go/runtime/defer_test.go
+++ b/libgo/go/runtime/defer_test.go
@@ -433,8 +433,86 @@ func TestIssue43921(t *testing.T) {
 	}()
 }
 
-func expect(t *testing.T, n int, err interface{}) {
+func expect(t *testing.T, n int, err any) {
 	if n != err {
 		t.Fatalf("have %v, want %v", err, n)
 	}
 }
+
+func TestIssue43920(t *testing.T) {
+	var steps int
+
+	defer func() {
+		expect(t, 1, recover())
+	}()
+	defer func() {
+		defer func() {
+			defer func() {
+				expect(t, 5, recover())
+			}()
+			defer panic(5)
+			func() {
+				panic(4)
+			}()
+		}()
+		defer func() {
+			expect(t, 3, recover())
+		}()
+		defer panic(3)
+	}()
+	func() {
+		defer step(t, &steps, 1)
+		panic(1)
+	}()
+}
+
+func step(t *testing.T, steps *int, want int) {
+	*steps++
+	if *steps != want {
+		t.Fatalf("have %v, want %v", *steps, want)
+	}
+}
+
+func TestIssue43941(t *testing.T) {
+	var steps int = 7
+	defer func() {
+		step(t, &steps, 14)
+		expect(t, 4, recover())
+	}()
+	func() {
+		func() {
+			defer func() {
+				defer func() {
+					expect(t, 3, recover())
+				}()
+				defer panic(3)
+				panic(2)
+			}()
+			defer func() {
+				expect(t, 1, recover())
+			}()
+			defer panic(1)
+		}()
+		defer func() {}()
+		defer func() {}()
+		defer step(t, &steps, 10)
+		defer step(t, &steps, 9)
+		step(t, &steps, 8)
+	}()
+	func() {
+		defer step(t, &steps, 13)
+		defer step(t, &steps, 12)
+		func() {
+			defer step(t, &steps, 11)
+			panic(4)
+		}()
+
+		// Code below isn't executed,
+		// but removing it breaks the test case.
+		defer func() {}()
+		defer panic(-1)
+		defer step(t, &steps, -1)
+		defer step(t, &steps, -1)
+		defer func() {}()
+	}()
+}
diff --git a/libgo/go/runtime/env_posix.go b/libgo/go/runtime/env_posix.go
index 8893c4a..9bb34a8 100644
--- a/libgo/go/runtime/env_posix.go
+++ b/libgo/go/runtime/env_posix.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build aix || darwin || dragonfly || freebsd || hurd || (js && wasm) || linux || netbsd || openbsd || solaris || windows || plan9
-// +build aix darwin dragonfly freebsd hurd js,wasm linux netbsd openbsd solaris windows plan9
 
 package runtime
 
diff --git a/libgo/go/runtime/error.go b/libgo/go/runtime/error.go
index a8c82bb..c7e2385 100644
--- a/libgo/go/runtime/error.go
+++ b/libgo/go/runtime/error.go
@@ -251,7 +251,7 @@ type stringer interface {
 // printany prints an argument passed to panic.
 // If panic is called with a value that has a String or Error method,
 // it has already been converted into a string by preprintpanics.
-func printany(i interface{}) {
+func printany(i any) {
 	switch v := i.(type) {
 	case nil:
 		print("nil")
@@ -294,7 +294,7 @@ func printany(i interface{}) {
 	}
 }
 
-func printanycustomtype(i interface{}) {
+func printanycustomtype(i any) {
 	eface := efaceOf(&i)
 	typestring := eface._type.string()
 
diff --git a/libgo/go/runtime/export_debuglog_test.go b/libgo/go/runtime/export_debuglog_test.go
index 8cd943b..1a9074e 100644
--- a/libgo/go/runtime/export_debuglog_test.go
+++ b/libgo/go/runtime/export_debuglog_test.go
@@ -14,15 +14,15 @@ const DebugLogStringLimit = debugLogStringLimit
 
 var Dlog = dlog
 
-func (l *dlogger) End()                     { l.end() }
-func (l *dlogger) B(x bool) *dlogger        { return l.b(x) }
-func (l *dlogger) I(x int) *dlogger         { return l.i(x) }
-func (l *dlogger) I16(x int16) *dlogger     { return l.i16(x) }
-func (l *dlogger) U64(x uint64) *dlogger    { return l.u64(x) }
-func (l *dlogger) Hex(x uint64) *dlogger    { return l.hex(x) }
-func (l *dlogger) P(x interface{}) *dlogger { return l.p(x) }
-func (l *dlogger) S(x string) *dlogger      { return l.s(x) }
-func (l *dlogger) PC(x uintptr) *dlogger    { return l.pc(x) }
+func (l *dlogger) End()                  { l.end() }
+func (l *dlogger) B(x bool) *dlogger     { return l.b(x) }
+func (l *dlogger) I(x int) *dlogger      { return l.i(x) }
+func (l *dlogger) I16(x int16) *dlogger  { return l.i16(x) }
+func (l *dlogger) U64(x uint64) *dlogger { return l.u64(x) }
+func (l *dlogger) Hex(x uint64) *dlogger { return l.hex(x) }
+func (l *dlogger) P(x any) *dlogger      { return l.p(x) }
+func (l *dlogger) S(x string) *dlogger   { return l.s(x) }
+func (l *dlogger) PC(x uintptr) *dlogger { return l.pc(x) }
 
 func DumpDebugLog() string {
 	g := getg()
diff --git a/libgo/go/runtime/export_linux_test.go b/libgo/go/runtime/export_linux_test.go
index 1f8e633..f7803c5 100644
--- a/libgo/go/runtime/export_linux_test.go
+++ b/libgo/go/runtime/export_linux_test.go
@@ -6,14 +6,9 @@
 
 package runtime
 
-import "unsafe"
+// const SiginfoMaxSize = _si_max_size
+// const SigeventMaxSize = _sigev_max_size
 
 // var NewOSProc0 = newosproc0
 // var Mincore = mincore
-var Add = add
-
-type EpollEvent epollevent
-
-func Epollctl(epfd, op, fd int32, ev unsafe.Pointer) int32 {
-	return epollctl(epfd, op, fd, (*epollevent)(ev))
-}
+// var Add = add
diff --git a/libgo/go/runtime/export_mmap_test.go b/libgo/go/runtime/export_mmap_test.go
index aa498bb..920c7fd 100644
--- a/libgo/go/runtime/export_mmap_test.go
+++ b/libgo/go/runtime/export_mmap_test.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build aix || darwin || dragonfly || freebsd || hurd || linux || netbsd || openbsd || solaris
-// +build aix darwin dragonfly freebsd hurd linux netbsd openbsd solaris
 
 // Export guts for testing.
 
diff --git a/libgo/go/runtime/export_pipe2_test.go b/libgo/go/runtime/export_pipe2_test.go
index 22ac4b7..c06b63b 100644
--- a/libgo/go/runtime/export_pipe2_test.go
+++ b/libgo/go/runtime/export_pipe2_test.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build dragonfly || freebsd || hurd || linux || netbsd || openbsd || solaris
-// +build dragonfly freebsd hurd linux netbsd openbsd solaris
 
 package runtime
 
diff --git a/libgo/go/runtime/export_pipe_test.go b/libgo/go/runtime/export_pipe_test.go
index a0c6c04..0583039 100644
--- a/libgo/go/runtime/export_pipe_test.go
+++ b/libgo/go/runtime/export_pipe_test.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build aix || darwin
-// +build aix darwin
 
 package runtime
 
diff --git a/libgo/go/runtime/export_test.go b/libgo/go/runtime/export_test.go
index 5255a70..468e3a1 100644
--- a/libgo/go/runtime/export_test.go
+++ b/libgo/go/runtime/export_test.go
@@ -7,6 +7,8 @@
 package runtime
 
 import (
+	"internal/goarch"
+	"internal/goos"
 	"runtime/internal/atomic"
 	"runtime/internal/sys"
 	"unsafe"
@@ -27,8 +29,6 @@ var Exitsyscall = exitsyscall
 var LockedOSThread = lockedOSThread
 var Xadduintptr = atomic.Xadduintptr
 
-var FuncPC = funcPC
-
 var Fastlog2 = fastlog2
 
 var Atoi = atoi
@@ -75,7 +75,7 @@ func Netpoll(delta int64) {
 	})
 }
 
-func GCMask(x interface{}) (ret []byte) {
+func GCMask(x any) (ret []byte) {
 	return nil
 }
 
@@ -144,48 +144,43 @@ func RunSchedLocalQueueStealTest() {
 	}
 }
 
-// Temporary to enable register ABI bringup.
-// TODO(register args): convert back to local variables in RunSchedLocalQueueEmptyTest that
-// get passed to the "go" stmts there.
-var RunSchedLocalQueueEmptyState struct {
-	done  chan bool
-	ready *uint32
-	p     *p
-}
-
 func RunSchedLocalQueueEmptyTest(iters int) {
-	// Test that runq is not spuriously reported as empty.
-	// Runq emptiness affects scheduling decisions and spurious emptiness
-	// can lead to underutilization (both runnable Gs and idle Ps coexist
-	// for arbitrary long time).
-	done := make(chan bool, 1)
-	RunSchedLocalQueueEmptyState.done = done
-	p := new(p)
-	RunSchedLocalQueueEmptyState.p = p
-	gs := make([]g, 2)
-	ready := new(uint32)
-	RunSchedLocalQueueEmptyState.ready = ready
-	for i := 0; i < iters; i++ {
-		*ready = 0
-		next0 := (i & 1) == 0
-		next1 := (i & 2) == 0
-		runqput(p, &gs[0], next0)
-		go func() {
-			for atomic.Xadd(RunSchedLocalQueueEmptyState.ready, 1); atomic.Load(RunSchedLocalQueueEmptyState.ready) != 2; {
-			}
-			if runqempty(RunSchedLocalQueueEmptyState.p) {
-				//println("next:", next0, next1)
-				throw("queue is empty")
-			}
-			RunSchedLocalQueueEmptyState.done <- true
-		}()
-		for atomic.Xadd(ready, 1); atomic.Load(ready) != 2; {
-		}
-		runqput(p, &gs[1], next1)
-		runqget(p)
-		<-done
-		runqget(p)
-	}
+
+	/* FIXME: gofrontend fails to compile this code because
+	      p, next0, next1, and done escape to the heap, which is
+	      not permitted in the runtime package.
+
+	   	// Test that runq is not spuriously reported as empty.
+	   	// Runq emptiness affects scheduling decisions and spurious emptiness
+	   	// can lead to underutilization (both runnable Gs and idle Ps coexist
+	   	// for arbitrary long time).
+	   	done := make(chan bool, 1)
+	   	p := new(p)
+	   	gs := make([]g, 2)
+	   	ready := new(uint32)
+	   	for i := 0; i < iters; i++ {
+	   		*ready = 0
+	   		next0 := (i & 1) == 0
+	   		next1 := (i & 2) == 0
+	   		runqput(p, &gs[0], next0)
+	   		go func() {
+	   			for atomic.Xadd(ready, 1); atomic.Load(ready) != 2; {
+	   			}
+	   			if runqempty(p) {
+	   				println("next:", next0, next1)
+	   				throw("queue is empty")
+	   			}
+	   			done <- true
+	   		}()
+	   		for atomic.Xadd(ready, 1); atomic.Load(ready) != 2; {
+	   		}
+	   		runqput(p, &gs[1], next1)
+	   		runqget(p)
+	   		<-done
+	   		runqget(p)
+	   	}
+
+	*/
 }
 
 var (
@@ -207,7 +202,7 @@ func MemclrBytes(b []byte) {
 	memclrNoHeapPointers(s.array, uintptr(s.len))
 }
 
-var HashLoad = &hashLoad
+const HashLoad = hashLoad
 
 // entry point for testing
 //func GostringW(w []uint16) (s string) {
@@ -223,11 +218,9 @@ var Write = write
 func Envs() []string     { return envs }
 func SetEnvs(e []string) { envs = e }
 
-//var BigEndian = sys.BigEndian
-
 // For benchmarking.
 
-func BenchSetType(n int, x interface{}) {
+func BenchSetType(n int, x any) {
 	e := *efaceOf(&x)
 	t := e._type
 	var size uintptr
@@ -254,7 +247,7 @@ func BenchSetType(n int, x interface{}) {
 	})
 }
 
-const PtrSize = sys.PtrSize
+const PtrSize = goarch.PtrSize
 
 var ForceGCPeriod = &forcegcperiod
 
@@ -272,7 +265,7 @@ var ReadUnaligned64 = readUnaligned64
 func CountPagesInUse() (pagesInUse, counted uintptr) {
 	stopTheWorld("CountPagesInUse")
 
-	pagesInUse = uintptr(mheap_.pagesInUse)
+	pagesInUse = uintptr(mheap_.pagesInUse.Load())
 
 	for _, s := range mheap_.allspans {
 		if s.state.get() == mSpanInUse {
@@ -554,7 +547,7 @@ func MapTombstoneCheck(m map[int]int) {
 	// We should have a series of filled and emptyOne cells, followed by
 	// a series of emptyRest cells.
 	h := *(**hmap)(unsafe.Pointer(&m))
-	i := interface{}(m)
+	i := any(m)
 	t := *(**maptype)(unsafe.Pointer(&i))
 
 	for x := 0; x < 1<<h.B; x++ {
@@ -804,21 +797,17 @@ func (p *PageAlloc) Free(base, npages uintptr) {
 		// None of the tests need any higher-level locking, so we just
 		// take the lock internally.
 		lock(pp.mheapLock)
-		pp.free(base, npages)
+		pp.free(base, npages, true)
 		unlock(pp.mheapLock)
 	})
 }
 func (p *PageAlloc) Bounds() (ChunkIdx, ChunkIdx) {
 	return ChunkIdx((*pageAlloc)(p).start), ChunkIdx((*pageAlloc)(p).end)
 }
-func (p *PageAlloc) Scavenge(nbytes uintptr, mayUnlock bool) (r uintptr) {
+func (p *PageAlloc) Scavenge(nbytes uintptr) (r uintptr) {
 	pp := (*pageAlloc)(p)
 	systemstack(func() {
-		// None of the tests need any higher-level locking, so we just
-		// take the lock internally.
-		lock(pp.mheapLock)
-		r = pp.scavenge(nbytes, mayUnlock)
-		unlock(pp.mheapLock)
+		r = pp.scavenge(nbytes)
 	})
 	return
 }
@@ -1060,7 +1049,19 @@ func FreePageAlloc(pp *PageAlloc) {
 //
 // This should not be higher than 0x100*pallocChunkBytes to support
 // mips and mipsle, which only have 31-bit address spaces.
-var BaseChunkIdx = ChunkIdx(chunkIndex(((0xc000*pageAlloc64Bit + 0x100*pageAlloc32Bit) * pallocChunkBytes) + arenaBaseOffset*sys.GoosAix*sys.GoarchPpc64))
+var BaseChunkIdx = func() ChunkIdx {
+	var prefix uintptr
+	if pageAlloc64Bit != 0 {
+		prefix = 0xc000
+	} else {
+		prefix = 0x100
+	}
+	baseAddr := prefix * pallocChunkBytes
+	if goos.IsAix != 0 {
+		baseAddr += arenaBaseOffset
+	}
+	return ChunkIdx(chunkIndex(baseAddr))
+}()
 
 // PageBase returns an address given a chunk index and a page index
 // relative to that chunk.
@@ -1225,3 +1226,95 @@ func GCTestPointerClass(p unsafe.Pointer) string {
 }
 
 const Raceenabled = raceenabled
+
+const (
+	GCBackgroundUtilization = gcBackgroundUtilization
+	GCGoalUtilization       = gcGoalUtilization
+)
+
+type GCController struct {
+	gcControllerState
+}
+
+func NewGCController(gcPercent int) *GCController {
+	// Force the controller to escape. We're going to
+	// do 64-bit atomics on it, and if it gets stack-allocated
+	// on a 32-bit architecture, it may get allocated unaligned
+	// space.
+	g := escape(new(GCController)).(*GCController)
+	g.gcControllerState.test = true // Mark it as a test copy.
+	g.init(int32(gcPercent))
+	return g
+}
+
+func (c *GCController) StartCycle(stackSize, globalsSize uint64, scannableFrac float64, gomaxprocs int) {
+	c.scannableStackSize = stackSize
+	c.globalsScan = globalsSize
+	c.heapLive = c.trigger
+	c.heapScan += uint64(float64(c.trigger-c.heapMarked) * scannableFrac)
+	c.startCycle(0, gomaxprocs)
+}
+
+func (c *GCController) AssistWorkPerByte() float64 {
+	return c.assistWorkPerByte.Load()
+}
+
+func (c *GCController) HeapGoal() uint64 {
+	return c.heapGoal
+}
+
+func (c *GCController) HeapLive() uint64 {
+	return c.heapLive
+}
+
+func (c *GCController) HeapMarked() uint64 {
+	return c.heapMarked
+}
+
+func (c *GCController) Trigger() uint64 {
+	return c.trigger
+}
+
+type GCControllerReviseDelta struct {
+	HeapLive        int64
+	HeapScan        int64
+	HeapScanWork    int64
+	StackScanWork   int64
+	GlobalsScanWork int64
+}
+
+func (c *GCController) Revise(d GCControllerReviseDelta) {
+	c.heapLive += uint64(d.HeapLive)
+	c.heapScan += uint64(d.HeapScan)
+	c.heapScanWork.Add(d.HeapScanWork)
+	c.stackScanWork.Add(d.StackScanWork)
+	c.globalsScanWork.Add(d.GlobalsScanWork)
+	c.revise()
+}
+
+func (c *GCController) EndCycle(bytesMarked uint64, assistTime, elapsed int64, gomaxprocs int) {
+	c.assistTime = assistTime
+	triggerRatio := c.endCycle(elapsed, gomaxprocs, false)
+	c.resetLive(bytesMarked)
+	c.commit(triggerRatio)
+}
+
+var escapeSink any
+
+//go:noinline
+func escape(x any) any {
+	escapeSink = x
+	escapeSink = nil
+	return x
+}
+
+// Acquirem blocks preemption.
+func Acquirem() {
+	acquirem()
+}
+
+func Releasem() {
+	releasem(getg().m)
+}
+
+var Timediv = timediv
diff --git a/libgo/go/runtime/export_unix_test.go b/libgo/go/runtime/export_unix_test.go
index 180af01..158c590 100644
--- a/libgo/go/runtime/export_unix_test.go
+++ b/libgo/go/runtime/export_unix_test.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build aix || darwin || dragonfly || freebsd || hurd || linux || netbsd || openbsd || solaris
-// +build aix darwin dragonfly freebsd hurd linux netbsd openbsd solaris
 
 package runtime
 
diff --git a/libgo/go/runtime/export_windows_test.go b/libgo/go/runtime/export_windows_test.go
index 536b398..d9cf753 100644
--- a/libgo/go/runtime/export_windows_test.go
+++ b/libgo/go/runtime/export_windows_test.go
@@ -8,6 +8,8 @@ package runtime
 
 import "unsafe"
 
+const MaxArgs = maxArgs
+
 var (
 	TestingWER              = &testingWER
 	OsYield                 = osyield
diff --git a/libgo/go/runtime/extern.go b/libgo/go/runtime/extern.go
index 6bd612f..a371d9c 100644
--- a/libgo/go/runtime/extern.go
+++ b/libgo/go/runtime/extern.go
@@ -78,6 +78,11 @@ It is a comma-separated list of name=val pairs setting these named variables:
 	If the line ends with "(forced)", this GC was forced by a
 	runtime.GC() call.
 
+	harddecommit: setting harddecommit=1 causes memory that is returned to the OS to
+	also have protections removed on it. This is the only mode of operation on Windows,
+	but is helpful in debugging scavenger-related issues on other platforms. Currently,
+	only supported on Linux.
+
 	inittrace: setting inittrace=1 causes the runtime to emit a single line to standard
 	error for each package with init work, summarizing the execution time and memory
 	allocation. No information is printed for inits executed as part of plugin loading
@@ -144,7 +149,7 @@ It is a comma-separated list of name=val pairs setting these named variables:
 	because it also disables the conservative stack scanning used
 	for asynchronously preempted goroutines.
 
-The net, net/http, and crypto/tls packages also refer to debugging variables in GODEBUG.
+The net and net/http packages also refer to debugging variables in GODEBUG.
 See the documentation for those packages for details.
 
 The GOMAXPROCS variable limits the number of operating system threads that
@@ -186,7 +191,11 @@ of the run-time system.
 */
 package runtime
 
-import "runtime/internal/sys"
+import (
+	"internal/goarch"
+	"internal/goos"
+	"runtime/internal/sys"
+)
 
 // Caller reports file and line number information about function invocations on
 // the calling goroutine's stack. The argument skip is the number of stack frames
@@ -242,11 +251,11 @@ func Version() string {
 // GOOS is the running program's operating system target:
 // one of darwin, freebsd, linux, and so on.
 // To view possible combinations of GOOS and GOARCH, run "go tool dist list".
-const GOOS string = sys.GOOS
+const GOOS string = goos.GOOS
 
 // GOARCH is the running program's architecture target:
 // one of 386, amd64, arm, s390x, and so on.
-const GOARCH string = sys.GOARCH
+const GOARCH string = goarch.GOARCH
 
 // GCCGOTOOLDIR is the Tool Dir for the gccgo build
 const GCCGOTOOLDIR string = sys.GccgoToolDir
diff --git a/libgo/go/runtime/float_test.go b/libgo/go/runtime/float_test.go
new file mode 100644
index 0000000..b2aa43d
--- /dev/null
+++ b/libgo/go/runtime/float_test.go
@@ -0,0 +1,25 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+	"testing"
+)
+
+func TestIssue48807(t *testing.T) {
+	for _, i := range []uint64{
+		0x8234508000000001, // from issue48807
+		1<<56 + 1<<32 + 1,
+	} {
+		got := float32(i)
+		dontwant := float32(float64(i))
+		if got == dontwant {
+			// The test cases above should be uint64s such that
+			// this equality doesn't hold. These examples trigger
+			// the case where using an intermediate float64 doesn't work.
+			t.Errorf("direct float32 conversion doesn't work: arg=%x got=%x dontwant=%x", i, got, dontwant)
+		}
+	}
+}
diff --git a/libgo/go/runtime/gc_test.go b/libgo/go/runtime/gc_test.go
index bb5e1ae..b160c9f 100644
--- a/libgo/go/runtime/gc_test.go
+++ b/libgo/go/runtime/gc_test.go
@@ -21,7 +21,7 @@ import (
 )
 
 func TestGcSys(t *testing.T) {
-	t.Skip("does not test anything; https://golang.org/issue/23343")
+	t.Skip("skipping known-flaky test; golang.org/issue/37331")
 	if os.Getenv("GOGC") == "off" {
 		t.Skip("skipping test; GOGC=off in environment")
 	}
@@ -136,7 +136,7 @@ func TestGcLastTime(t *testing.T) {
 	}
 }
 
-var hugeSink interface{}
+var hugeSink any
 
 func TestHugeGCInfo(t *testing.T) {
 	// The test ensures that compiler can chew these huge types even on weakest machines.
@@ -201,7 +201,10 @@ func TestGcZombieReporting(t *testing.T) {
 		t.Skip("gccgo uses partially conservative GC")
 	}
 	// This test is somewhat sensitive to how the allocator works.
-	got := runTestProg(t, "testprog", "GCZombie")
+	// Pointers in zombies slice may cross-span, thus we
+	// add invalidptr=0 for avoiding the badPointer check.
+	// See issue https://golang.org/issues/49613/
+	got := runTestProg(t, "testprog", "GCZombie", "GODEBUG=invalidptr=0")
 	want := "found pointer to free object"
 	if !strings.Contains(got, want) {
 		t.Fatalf("expected %q in output, but got %q", want, got)
@@ -477,11 +480,11 @@ func BenchmarkSetTypeNode1024Slice(b *testing.B) {
 	benchSetType(b, make([]Node1024, 32))
 }
 
-func benchSetType(b *testing.B, x interface{}) {
+func benchSetType(b *testing.B, x any) {
 	v := reflect.ValueOf(x)
 	t := v.Type()
 	switch t.Kind() {
-	case reflect.Ptr:
+	case reflect.Pointer:
 		b.SetBytes(int64(t.Elem().Size()))
 	case reflect.Slice:
 		b.SetBytes(int64(t.Elem().Size()) * int64(v.Len()))
@@ -543,7 +546,7 @@ func TestPrintGC(t *testing.T) {
 	close(done)
 }
 
-func testTypeSwitch(x interface{}) error {
+func testTypeSwitch(x any) error {
 	switch y := x.(type) {
 	case nil:
 		// ok
@@ -553,14 +556,14 @@ func testTypeSwitch(x interface{}) error {
 	return nil
 }
 
-func testAssert(x interface{}) error {
+func testAssert(x any) error {
 	if y, ok := x.(error); ok {
 		return y
 	}
 	return nil
 }
 
-func testAssertVar(x interface{}) error {
+func testAssertVar(x any) error {
 	var y, ok = x.(error)
 	if ok {
 		return y
@@ -571,7 +574,7 @@ func testAssertVar(x interface{}) error {
 var a bool
 
 //go:noinline
-func testIfaceEqual(x interface{}) {
+func testIfaceEqual(x any) {
 	if x == "abc" {
 		a = true
 	}
diff --git a/libgo/go/runtime/gcinfo_test.go b/libgo/go/runtime/gcinfo_test.go
index d329e32..5936a87 100644
--- a/libgo/go/runtime/gcinfo_test.go
+++ b/libgo/go/runtime/gcinfo_test.go
@@ -2,6 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+//go:build ignore
 // +build ignore
 
 package runtime_test
@@ -52,7 +53,7 @@ func TestGCInfo(t *testing.T) {
 		runtime.KeepAlive(x)
 	}
 	{
-		var x interface{}
+		var x any
 		verifyGCInfo(t, "stack eface", &x, infoEface)
 		runtime.KeepAlive(x)
 	}
@@ -70,12 +71,12 @@ func TestGCInfo(t *testing.T) {
 		verifyGCInfo(t, "heap PtrScalar", escape(new(PtrScalar)), trimDead(infoPtrScalar))
 		verifyGCInfo(t, "heap BigStruct", escape(new(BigStruct)), trimDead(infoBigStruct()))
 		verifyGCInfo(t, "heap string", escape(new(string)), trimDead(infoString))
-		verifyGCInfo(t, "heap eface", escape(new(interface{})), trimDead(infoEface))
+		verifyGCInfo(t, "heap eface", escape(new(any)), trimDead(infoEface))
 		verifyGCInfo(t, "heap iface", escape(new(Iface)), trimDead(infoIface))
 	}
 }
 
-func verifyGCInfo(t *testing.T, name string, p interface{}, mask0 []byte) {
+func verifyGCInfo(t *testing.T, name string, p any, mask0 []byte) {
 	mask := runtime.GCMask(p)
 	if !bytes.Equal(mask, mask0) {
 		t.Errorf("bad GC program for %v:\nwant %+v\ngot  %+v", name, mask0, mask)
@@ -90,9 +91,9 @@ func trimDead(mask []byte) []byte {
 	return mask
 }
 
-var gcinfoSink interface{}
+var gcinfoSink any
 
-func escape(p interface{}) interface{} {
+func escape(p any) any {
 	gcinfoSink = p
 	return p
 }
@@ -180,18 +181,18 @@ var (
 	bssBigStruct BigStruct
 	bssString    string
 	bssSlice     []string
-	bssEface     interface{}
+	bssEface     any
 	bssIface     Iface
 
 	// DATA
-	dataPtr                   = Ptr{new(byte)}
-	dataScalarPtr             = ScalarPtr{q: 1}
-	dataPtrScalar             = PtrScalar{w: 1}
-	dataBigStruct             = BigStruct{w: 1}
-	dataString                = "foo"
-	dataSlice                 = []string{"foo"}
-	dataEface     interface{} = 42
-	dataIface     Iface       = IfaceImpl(42)
+	dataPtr             = Ptr{new(byte)}
+	dataScalarPtr       = ScalarPtr{q: 1}
+	dataPtrScalar       = PtrScalar{w: 1}
+	dataBigStruct       = BigStruct{w: 1}
+	dataString          = "foo"
+	dataSlice           = []string{"foo"}
+	dataEface     any   = 42
+	dataIface     Iface = IfaceImpl(42)
 
 	infoString = []byte{typePointer, typeScalar}
 	infoSlice  = []byte{typePointer, typeScalar, typeScalar}
diff --git a/libgo/go/runtime/hash32.go b/libgo/go/runtime/hash32.go
index 0df7303..52977ee 100644
--- a/libgo/go/runtime/hash32.go
+++ b/libgo/go/runtime/hash32.go
@@ -6,7 +6,6 @@
 // wyhash: https://github.com/wangyi-fudan/wyhash/blob/ceb019b530e2c1c14d70b79bfa2bc49de7d95bc1/Modern%20Non-Cryptographic%20Hash%20Function%20and%20Pseudorandom%20Number%20Generator.pdf
 
 //go:build 386 || arm || mips || mipsle || amd64p32 || armbe || m68k || mips64p32 || mips64p32le || nios2 || ppc || riscv || s390 || sh || shbe || sparc
-// +build 386 arm mips mipsle amd64p32 armbe m68k mips64p32 mips64p32le nios2 ppc riscv s390 sh shbe sparc
 
 package runtime
 
diff --git a/libgo/go/runtime/hash64.go b/libgo/go/runtime/hash64.go
index 96ed90b..a1d2529 100644
--- a/libgo/go/runtime/hash64.go
+++ b/libgo/go/runtime/hash64.go
@@ -6,7 +6,6 @@
 // wyhash: https://github.com/wangyi-fudan/wyhash
 
 //go:build amd64 || arm64 || mips64 || mips64le || ppc64 || ppc64le || riscv64 || s390x || wasm || alpha || arm64be || ia64 || sparc64
-// +build amd64 arm64 mips64 mips64le ppc64 ppc64le riscv64 s390x wasm alpha arm64be ia64 sparc64
 
 package runtime
 
diff --git a/libgo/go/runtime/hash_test.go b/libgo/go/runtime/hash_test.go
index 753b129..e8e8d50 100644
--- a/libgo/go/runtime/hash_test.go
+++ b/libgo/go/runtime/hash_test.go
@@ -382,7 +382,7 @@ func (k *Int64Key) name() string {
 }
 
 type EfaceKey struct {
-	i interface{}
+	i any
 }
 
 func (k *EfaceKey) clear() {
diff --git a/libgo/go/runtime/heapdump.go b/libgo/go/runtime/heapdump.go
index 171acb8..32cd0bf 100644
--- a/libgo/go/runtime/heapdump.go
+++ b/libgo/go/runtime/heapdump.go
@@ -12,7 +12,7 @@
 package runtime
 
 import (
-	"runtime/internal/sys"
+	"internal/goarch"
 	"unsafe"
 )
 
@@ -247,7 +247,7 @@ func dumpbv(cbv *bitvector, offset uintptr) {
 	for i := uintptr(0); i < uintptr(cbv.n); i++ {
 		if cbv.ptrbit(i) == 1 {
 			dumpint(fieldKindPtr)
-			dumpint(uint64(offset + i*sys.PtrSize))
+			dumpint(uint64(offset + i*goarch.PtrSize))
 		}
 	}
 }
@@ -388,7 +388,7 @@ func dumpparams() {
 	} else {
 		dumpbool(true) // big-endian ptrs
 	}
-	dumpint(sys.PtrSize)
+	dumpint(goarch.PtrSize)
 	var arenaStart, arenaEnd uintptr
 	for i1 := range mheap_.arenas {
 		if mheap_.arenas[i1] == nil {
@@ -409,7 +409,7 @@ func dumpparams() {
 	}
 	dumpint(uint64(arenaStart))
 	dumpint(uint64(arenaEnd))
-	dumpstr(sys.GOARCH)
+	dumpstr(goarch.GOARCH)
 	dumpstr(buildVersion)
 	dumpint(uint64(ncpu))
 }
@@ -586,7 +586,7 @@ func dumpfields(bv bitvector) {
 
 func makeheapobjbv(p uintptr, size uintptr) bitvector {
 	// Extend the temp buffer if necessary.
-	nptr := size / sys.PtrSize
+	nptr := size / goarch.PtrSize
 	if uintptr(len(tmpbuf)) < nptr/8+1 {
 		if tmpbuf != nil {
 			sysFree(unsafe.Pointer(&tmpbuf[0]), uintptr(len(tmpbuf)), &memstats.other_sys)
diff --git a/libgo/go/runtime/iface.go b/libgo/go/runtime/iface.go
index f9df1e0..f702132 100644
--- a/libgo/go/runtime/iface.go
+++ b/libgo/go/runtime/iface.go
@@ -5,8 +5,8 @@
 package runtime
 
 import (
+	"internal/goarch"
 	"runtime/internal/atomic"
-	"runtime/internal/sys"
 	"unsafe"
 )
 
@@ -117,7 +117,7 @@ func (t *itabTableType) find(inter *interfacetype, typ *_type) *itab {
 	mask := t.size - 1
 	h := itabHashFunc(inter, typ) & mask
 	for i := uintptr(1); ; i++ {
-		p := (**itab)(add(unsafe.Pointer(&t.entries), h*sys.PtrSize))
+		p := (**itab)(add(unsafe.Pointer(&t.entries), h*goarch.PtrSize))
 		// Use atomic read here so if we see m != nil, we also see
 		// the initializations of the fields of m.
 		// m := *p
@@ -150,7 +150,7 @@ func itabAdd(m *itab) {
 		// t2 = new(itabTableType) + some additional entries
 		// We lie and tell malloc we want pointer-free memory because
 		// all the pointed-to values are not in the heap.
-		t2 := (*itabTableType)(mallocgc((2+2*t.size)*sys.PtrSize, nil, true))
+		t2 := (*itabTableType)(mallocgc((2+2*t.size)*goarch.PtrSize, nil, true))
 		t2.size = t.size * 2
 
 		// Copy over entries.
@@ -178,7 +178,7 @@ func (t *itabTableType) add(m *itab) {
 	mask := t.size - 1
 	h := itabHashFunc(m.inter, m._type()) & mask
 	for i := uintptr(1); ; i++ {
-		p := (**itab)(add(unsafe.Pointer(&t.entries), h*sys.PtrSize))
+		p := (**itab)(add(unsafe.Pointer(&t.entries), h*goarch.PtrSize))
 		m2 := *p
 		if m2 == m {
 			// A given itab may be used in more than one module
@@ -253,7 +253,7 @@ func iterate_itabs(fn func(*itab)) {
 	// so no other locks/atomics needed.
 	t := itabTable
 	for i := uintptr(0); i < t.size; i++ {
-		m := *(**itab)(add(unsafe.Pointer(&t.entries), i*sys.PtrSize))
+		m := *(**itab)(add(unsafe.Pointer(&t.entries), i*goarch.PtrSize))
 		if m != nil {
 			fn(m)
 		}
@@ -304,7 +304,7 @@ func getitab(lhs, rhs *_type, canfail bool) unsafe.Pointer {
 	}
 
 	// Entry doesn't exist yet. Make a new entry & add it.
-	m = (*itab)(persistentalloc(unsafe.Sizeof(itab{})+uintptr(len(lhsi.methods)-1)*sys.PtrSize, 0, &memstats.other_sys))
+	m = (*itab)(persistentalloc(unsafe.Sizeof(itab{})+uintptr(len(lhsi.methods)-1)*goarch.PtrSize, 0, &memstats.other_sys))
 	m.inter = lhsi
 	m.methods[0] = unsafe.Pointer(rhs)
 	m.init()
diff --git a/libgo/go/runtime/iface_test.go b/libgo/go/runtime/iface_test.go
index 43d3698..98d7164 100644
--- a/libgo/go/runtime/iface_test.go
+++ b/libgo/go/runtime/iface_test.go
@@ -44,8 +44,8 @@ func (Tstr) Method1()   {}
 func (Tslice) Method1() {}
 
 var (
-	e  interface{}
-	e_ interface{}
+	e  any
+	e_ any
 	i1 I1
 	i2 I2
 	ts TS
@@ -196,7 +196,7 @@ func BenchmarkAssertI2I(b *testing.B) {
 func BenchmarkAssertI2E(b *testing.B) {
 	i1 = tm
 	for i := 0; i < b.N; i++ {
-		e = i1.(interface{})
+		e = i1.(any)
 	}
 }
 
@@ -224,28 +224,28 @@ func BenchmarkAssertE2T2Blank(b *testing.B) {
 func BenchmarkAssertI2E2(b *testing.B) {
 	i1 = tm
 	for i := 0; i < b.N; i++ {
-		e, ok = i1.(interface{})
+		e, ok = i1.(any)
 	}
 }
 
 func BenchmarkAssertI2E2Blank(b *testing.B) {
 	i1 = tm
 	for i := 0; i < b.N; i++ {
-		_, ok = i1.(interface{})
+		_, ok = i1.(any)
 	}
 }
 
 func BenchmarkAssertE2E2(b *testing.B) {
 	e = tm
 	for i := 0; i < b.N; i++ {
-		e_, ok = e.(interface{})
+		e_, ok = e.(any)
 	}
 }
 
 func BenchmarkAssertE2E2Blank(b *testing.B) {
 	e = tm
 	for i := 0; i < b.N; i++ {
-		_, ok = e.(interface{})
+		_, ok = e.(any)
 	}
 }
 
@@ -254,7 +254,7 @@ func TestNonEscapingConvT2E(t *testing.T) {
 		t.Skip("does not work on gccgo without better escape analysis")
 	}
 
-	m := make(map[interface{}]bool)
+	m := make(map[any]bool)
 	m[42] = true
 	if !m[42] {
 		t.Fatalf("42 is not present in the map")
diff --git a/libgo/go/runtime/internal/atomic/atomic_test.go b/libgo/go/runtime/internal/atomic/atomic_test.go
index c9c2eba..2ae60b8 100644
--- a/libgo/go/runtime/internal/atomic/atomic_test.go
+++ b/libgo/go/runtime/internal/atomic/atomic_test.go
@@ -5,9 +5,9 @@
 package atomic_test
 
 import (
+	"internal/goarch"
 	"runtime"
 	"runtime/internal/atomic"
-	"runtime/internal/sys"
 	"testing"
 	"unsafe"
 )
@@ -56,7 +56,7 @@ func TestXadduintptr(t *testing.T) {
 // Tests that xadduintptr correctly updates 64-bit values. The place where
 // we actually do so is mstats.go, functions mSysStat{Inc,Dec}.
 func TestXadduintptrOnUint64(t *testing.T) {
-	if sys.BigEndian {
+	if goarch.BigEndian {
 		// On big endian architectures, we never use xadduintptr to update
 		// 64-bit values and hence we skip the test.  (Note that functions
 		// mSysStat{Inc,Dec} in mstats.go have explicit checks for
diff --git a/libgo/go/runtime/internal/atomic/bench_test.go b/libgo/go/runtime/internal/atomic/bench_test.go
index 2476c06..efc0531a 100644
--- a/libgo/go/runtime/internal/atomic/bench_test.go
+++ b/libgo/go/runtime/internal/atomic/bench_test.go
@@ -9,7 +9,7 @@ import (
 	"testing"
 )
 
-var sink interface{}
+var sink any
 
 func BenchmarkAtomicLoad64(b *testing.B) {
 	var x uint64
diff --git a/libgo/go/runtime/internal/atomic/doc.go b/libgo/go/runtime/internal/atomic/doc.go
new file mode 100644
index 0000000..08e6b6c
--- /dev/null
+++ b/libgo/go/runtime/internal/atomic/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+Package atomic provides atomic operations, independent of sync/atomic,
+to the runtime.
+
+On most platforms, the compiler is aware of the functions defined
+in this package, and they're replaced with platform-specific intrinsics.
+On other platforms, generic implementations are made available.
+
+Unless otherwise noted, operations defined in this package are sequentially
+consistent across threads with respect to the values they manipulate. More
+specifically, operations that happen in a specific order on one thread,
+will always be observed to happen in exactly that order by another thread.
+*/
+package atomic
diff --git a/libgo/go/runtime/internal/atomic/stubs.go b/libgo/go/runtime/internal/atomic/stubs.go
index e7544ba..7df8d9c 100644
--- a/libgo/go/runtime/internal/atomic/stubs.go
+++ b/libgo/go/runtime/internal/atomic/stubs.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build !wasm
-// +build !wasm
 
 package atomic
 
diff --git a/libgo/go/runtime/internal/atomic/types.go b/libgo/go/runtime/internal/atomic/types.go
new file mode 100644
index 0000000..1a240d7
--- /dev/null
+++ b/libgo/go/runtime/internal/atomic/types.go
@@ -0,0 +1,395 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package atomic
+
+import "unsafe"
+
+// Int32 is an atomically accessed int32 value.
+//
+// An Int32 must not be copied.
+type Int32 struct {
+	noCopy noCopy
+	value  int32
+}
+
+// Load accesses and returns the value atomically.
+func (i *Int32) Load() int32 {
+	return Loadint32(&i.value)
+}
+
+// Store updates the value atomically.
+func (i *Int32) Store(value int32) {
+	Storeint32(&i.value, value)
+}
+
+// CompareAndSwap atomically compares i's value with old,
+// and if they're equal, swaps i's value with new.
+//
+// Returns true if the operation succeeded.
+func (i *Int32) CompareAndSwap(old, new int32) bool {
+	return Casint32(&i.value, old, new)
+}
+
+// Swap replaces i's value with new, returning
+// i's value before the replacement.
+func (i *Int32) Swap(new int32) int32 {
+	return Xchgint32(&i.value, new)
+}
+
+// Add adds delta to i atomically, returning
+// the new updated value.
+//
+// This operation wraps around in the usual
+// two's-complement way.
+func (i *Int32) Add(delta int32) int32 {
+	return Xaddint32(&i.value, delta)
+}
+
+// Int64 is an atomically accessed int64 value.
+//
+// An Int64 must not be copied.
+type Int64 struct {
+	noCopy noCopy
+	value  int64
+}
+
+// Load accesses and returns the value atomically.
+func (i *Int64) Load() int64 {
+	return Loadint64(&i.value)
+}
+
+// Store updates the value atomically.
+func (i *Int64) Store(value int64) {
+	Storeint64(&i.value, value)
+}
+
+// CompareAndSwap atomically compares i's value with old,
+// and if they're equal, swaps i's value with new.
+//
+// Returns true if the operation succeeded.
+func (i *Int64) CompareAndSwap(old, new int64) bool {
+	return Casint64(&i.value, old, new)
+}
+
+// Swap replaces i's value with new, returning
+// i's value before the replacement.
+func (i *Int64) Swap(new int64) int64 {
+	return Xchgint64(&i.value, new)
+}
+
+// Add adds delta to i atomically, returning
+// the new updated value.
+//
+// This operation wraps around in the usual
+// two's-complement way.
+func (i *Int64) Add(delta int64) int64 {
+	return Xaddint64(&i.value, delta)
+}
+
+// Uint8 is an atomically accessed uint8 value.
+//
+// A Uint8 must not be copied.
+type Uint8 struct {
+	noCopy noCopy
+	value  uint8
+}
+
+// Load accesses and returns the value atomically.
+func (u *Uint8) Load() uint8 {
+	return Load8(&u.value)
+}
+
+// Store updates the value atomically.
+func (u *Uint8) Store(value uint8) {
+	Store8(&u.value, value)
+}
+
+// And takes value and performs a bit-wise
+// "and" operation with the value of u, storing
+// the result into u.
+//
+// The full process is performed atomically.
+func (u *Uint8) And(value uint8) {
+	And8(&u.value, value)
+}
+
+// Or takes value and performs a bit-wise
+// "or" operation with the value of u, storing
+// the result into u.
+//
+// The full process is performed atomically.
+func (u *Uint8) Or(value uint8) {
+	Or8(&u.value, value)
+}
+
+// Uint32 is an atomically accessed uint32 value.
+//
+// A Uint32 must not be copied.
+type Uint32 struct {
+	noCopy noCopy
+	value  uint32
+}
+
+// Load accesses and returns the value atomically.
+func (u *Uint32) Load() uint32 {
+	return Load(&u.value)
+}
+
+// LoadAcquire is a partially unsynchronized version
+// of Load that relaxes ordering constraints. Other threads
+// may observe operations that precede this operation to
+// occur after it, but no operation that occurs after it
+// on this thread can be observed to occur before it.
+//
+// WARNING: Use sparingly and with great care.
+func (u *Uint32) LoadAcquire() uint32 {
+	return LoadAcq(&u.value)
+}
+
+// Store updates the value atomically.
+func (u *Uint32) Store(value uint32) {
+	Store(&u.value, value)
+}
+
+// StoreRelease is a partially unsynchronized version
+// of Store that relaxes ordering constraints. Other threads
+// may observe operations that occur after this operation to
+// precede it, but no operation that precedes it
+// on this thread can be observed to occur after it.
+//
+// WARNING: Use sparingly and with great care.
+func (u *Uint32) StoreRelease(value uint32) {
+	StoreRel(&u.value, value)
+}
+
+// CompareAndSwap atomically compares u's value with old,
+// and if they're equal, swaps u's value with new.
+//
+// Returns true if the operation succeeded.
+func (u *Uint32) CompareAndSwap(old, new uint32) bool {
+	return Cas(&u.value, old, new)
+}
+
+// CompareAndSwapRelease is a partially unsynchronized version
+// of Cas that relaxes ordering constraints. Other threads
+// may observe operations that occur after this operation to
+// precede it, but no operation that precedes it
+// on this thread can be observed to occur after it.
+//
+// Returns true if the operation succeeded.
+//
+// WARNING: Use sparingly and with great care.
+func (u *Uint32) CompareAndSwapRelease(old, new uint32) bool {
+	return CasRel(&u.value, old, new)
+}
+
+// Swap replaces u's value with new, returning
+// u's value before the replacement.
+func (u *Uint32) Swap(value uint32) uint32 {
+	return Xchg(&u.value, value)
+}
+
+// And takes value and performs a bit-wise
+// "and" operation with the value of u, storing
+// the result into u.
+//
+// The full process is performed atomically.
+func (u *Uint32) And(value uint32) {
+	And(&u.value, value)
+}
+
+// Or takes value and performs a bit-wise
+// "or" operation with the value of u, storing
+// the result into u.
+//
+// The full process is performed atomically.
+func (u *Uint32) Or(value uint32) {
+	Or(&u.value, value)
+}
+
+// Add adds delta to u atomically, returning
+// the new updated value.
+//
+// This operation wraps around in the usual
+// two's-complement way.
+func (u *Uint32) Add(delta int32) uint32 {
+	return Xadd(&u.value, delta)
+}
+
+// Uint64 is an atomically accessed uint64 value.
+//
+// A Uint64 must not be copied.
+type Uint64 struct {
+	noCopy noCopy
+	value  uint64
+}
+
+// Load accesses and returns the value atomically.
+func (u *Uint64) Load() uint64 {
+	return Load64(&u.value)
+}
+
+// Store updates the value atomically.
+func (u *Uint64) Store(value uint64) {
+	Store64(&u.value, value)
+}
+
+// CompareAndSwap atomically compares u's value with old,
+// and if they're equal, swaps u's value with new.
+//
+// Returns true if the operation succeeded.
+func (u *Uint64) CompareAndSwap(old, new uint64) bool {
+	return Cas64(&u.value, old, new)
+}
+
+// Swap replaces u's value with new, returning
+// u's value before the replacement.
+func (u *Uint64) Swap(value uint64) uint64 {
+	return Xchg64(&u.value, value)
+}
+
+// Add adds delta to u atomically, returning
+// the new updated value.
+//
+// This operation wraps around in the usual
+// two's-complement way.
+func (u *Uint64) Add(delta int64) uint64 {
+	return Xadd64(&u.value, delta)
+}
+
+// Uintptr is an atomically accessed uintptr value.
+//
+// A Uintptr must not be copied.
+type Uintptr struct {
+	noCopy noCopy
+	value  uintptr
+}
+
+// Load accesses and returns the value atomically.
+func (u *Uintptr) Load() uintptr {
+	return Loaduintptr(&u.value)
+}
+
+// LoadAcquire is a partially unsynchronized version
+// of Load that relaxes ordering constraints. Other threads
+// may observe operations that precede this operation to
+// occur after it, but no operation that occurs after it
+// on this thread can be observed to occur before it.
+//
+// WARNING: Use sparingly and with great care.
+func (u *Uintptr) LoadAcquire() uintptr {
+	return LoadAcquintptr(&u.value)
+}
+
+// Store updates the value atomically.
+func (u *Uintptr) Store(value uintptr) {
+	Storeuintptr(&u.value, value)
+}
+
+// StoreRelease is a partially unsynchronized version
+// of Store that relaxes ordering constraints. Other threads
+// may observe operations that occur after this operation to
+// precede it, but no operation that precedes it
+// on this thread can be observed to occur after it.
+//
+// WARNING: Use sparingly and with great care.
+func (u *Uintptr) StoreRelease(value uintptr) {
+	StoreReluintptr(&u.value, value)
+}
+
+// CompareAndSwap atomically compares u's value with old,
+// and if they're equal, swaps u's value with new.
+//
+// Returns true if the operation succeeded.
+func (u *Uintptr) CompareAndSwap(old, new uintptr) bool {
+	return Casuintptr(&u.value, old, new)
+}
+
+// Swap replaces u's value with new, returning
+// u's value before the replacement.
+func (u *Uintptr) Swap(value uintptr) uintptr {
+	return Xchguintptr(&u.value, value)
+}
+
+// Add adds delta to u atomically, returning
+// the new updated value.
+//
+// This operation wraps around in the usual
+// two's-complement way.
+func (u *Uintptr) Add(delta uintptr) uintptr {
+	return Xadduintptr(&u.value, delta)
+}
+
+// Float64 is an atomically accessed float64 value.
+//
+// A Float64 must not be copied.
+type Float64 struct {
+	u Uint64
+}
+
+// Load accesses and returns the value atomically.
+func (f *Float64) Load() float64 {
+	r := f.u.Load()
+	return *(*float64)(unsafe.Pointer(&r))
+}
+
+// Store updates the value atomically.
+func (f *Float64) Store(value float64) {
+	f.u.Store(*(*uint64)(unsafe.Pointer(&value)))
+}
+
+// UnsafePointer is an atomically accessed unsafe.Pointer value.
+//
+// Note that because of the atomicity guarantees, stores to values
+// of this type never trigger a write barrier, and the relevant
+// methods are suffixed with "NoWB" to indicate that explicitly.
+// As a result, this type should be used carefully, and sparingly,
+// mostly with values that do not live in the Go heap anyway.
+//
+// An UnsafePointer must not be copied.
+type UnsafePointer struct {
+	noCopy noCopy
+	value  unsafe.Pointer
+}
+
+// Load accesses and returns the value atomically.
+func (u *UnsafePointer) Load() unsafe.Pointer {
+	return Loadp(unsafe.Pointer(&u.value))
+}
+
+// StoreNoWB updates the value atomically.
+//
+// WARNING: As the name implies this operation does *not*
+// perform a write barrier on value, and so this operation may
+// hide pointers from the GC. Use with care and sparingly.
+// It is safe to use with values not found in the Go heap.
+func (u *UnsafePointer) StoreNoWB(value unsafe.Pointer) {
+	StorepNoWB(unsafe.Pointer(&u.value), value)
+}
+
+// CompareAndSwapNoWB atomically (with respect to other methods)
+// compares u's value with old, and if they're equal,
+// swaps u's value with new.
+//
+// Returns true if the operation succeeded.
+//
+// WARNING: As the name implies this operation does *not*
+// perform a write barrier on value, and so this operation may
+// hide pointers from the GC. Use with care and sparingly.
+// It is safe to use with values not found in the Go heap.
+func (u *UnsafePointer) CompareAndSwapNoWB(old, new unsafe.Pointer) bool {
+	return Casp1(&u.value, old, new)
+}
+
+// noCopy may be embedded into structs which must not be copied
+// after the first use.
+//
+// See https://golang.org/issues/8005#issuecomment-190753527
+// for details.
+type noCopy struct{}
+
+// Lock is a no-op used by -copylocks checker from `go vet`.
+func (*noCopy) Lock()   {}
+func (*noCopy) Unlock() {}
diff --git a/libgo/go/runtime/internal/atomic/types_64bit.go b/libgo/go/runtime/internal/atomic/types_64bit.go
new file mode 100644
index 0000000..43c1ba2
--- /dev/null
+++ b/libgo/go/runtime/internal/atomic/types_64bit.go
@@ -0,0 +1,29 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build amd64 || arm64 || mips64 || mips64le || ppc64 || ppc64le || riscv64 || s390x || wasm
+
+package atomic
+
+// LoadAcquire is a partially unsynchronized version
+// of Load that relaxes ordering constraints. Other threads
+// may observe operations that precede this operation to
+// occur after it, but no operation that occurs after it
+// on this thread can be observed to occur before it.
+//
+// WARNING: Use sparingly and with great care.
+func (u *Uint64) LoadAcquire() uint64 {
+	return LoadAcq64(&u.value)
+}
+
+// StoreRelease is a partially unsynchronized version
+// of Store that relaxes ordering constraints. Other threads
+// may observe operations that occur after this operation to
+// precede it, but no operation that precedes it
+// on this thread can be observed to occur after it.
+//
+// WARNING: Use sparingly and with great care.
+func (u *Uint64) StoreRelease(value uint64) {
+	StoreRel64(&u.value, value)
+}
diff --git a/libgo/go/runtime/internal/math/math.go b/libgo/go/runtime/internal/math/math.go
index b6bd12d..c3fac36 100644
--- a/libgo/go/runtime/internal/math/math.go
+++ b/libgo/go/runtime/internal/math/math.go
@@ -4,14 +4,14 @@
 
 package math
 
-import "runtime/internal/sys"
+import "internal/goarch"
 
 const MaxUintptr = ^uintptr(0)
 
 // MulUintptr returns a * b and whether the multiplication overflowed.
 // On supported platforms this is an intrinsic lowered by the compiler.
 func MulUintptr(a, b uintptr) (uintptr, bool) {
-	if a|b < 1<<(4*sys.PtrSize) || a == 0 {
+	if a|b < 1<<(4*goarch.PtrSize) || a == 0 {
 		return a * b, false
 	}
 	overflow := b > MaxUintptr/a
diff --git a/libgo/go/runtime/internal/sys/arch.go b/libgo/go/runtime/internal/sys/consts.go
index 7e09c10..fffcf81 100644
--- a/libgo/go/runtime/internal/sys/arch.go
+++ b/libgo/go/runtime/internal/sys/consts.go
@@ -4,38 +4,31 @@
 
 package sys
 
-type ArchFamilyType int
-
-// PtrSize is the size of a pointer in bytes - unsafe.Sizeof(uintptr(0)) but as an ideal constant.
-// It is also the size of the machine's native word size (that is, 4 on 32-bit systems, 8 on 64-bit).
-const PtrSize = 4 << (^uintptr(0) >> 63)
+import (
+	"internal/goarch"
+	"internal/goos"
+)
 
 // AIX requires a larger stack for syscalls.
-const StackGuardMultiplier = StackGuardMultiplierDefault*(1-GoosAix) + 2*GoosAix
-
-// ArchFamily is the architecture family (AMD64, ARM, ...)
-const ArchFamily ArchFamilyType = _ArchFamily
-
-// BigEndian reports whether the architecture is big-endian.
-const BigEndian = _BigEndian
+const StackGuardMultiplier = StackGuardMultiplierDefault*(1-goos.IsAix) + 2*goos.IsAix
 
 // DefaultPhysPageSize is the default physical page size.
-const DefaultPhysPageSize = _DefaultPhysPageSize
+const DefaultPhysPageSize = goarch.DefaultPhysPageSize
 
 // PCQuantum is the minimal unit for a program counter (1 on x86, 4 on most other systems).
 // The various PC tables record PC deltas pre-divided by PCQuantum.
-const PCQuantum = _PCQuantum
+const PCQuantum = goarch.PCQuantum
 
 // Int64Align is the required alignment for a 64-bit integer (4 on 32-bit systems, 8 on 64-bit).
-const Int64Align = _Int64Align
+const Int64Align = goarch.PtrSize
 
 // MinFrameSize is the size of the system-reserved words at the bottom
 // of a frame (just above the architectural stack pointer).
 // It is zero on x86 and PtrSize on most non-x86 (LR-based) systems.
 // On PowerPC it is larger, to cover three more reserved words:
 // the compiler word, the link editor word, and the TOC save word.
-const MinFrameSize = _MinFrameSize
+const MinFrameSize = goarch.MinFrameSize
 
 // StackAlign is the required alignment of the SP register.
 // The stack must be at least word aligned, but some architectures require more.
-const StackAlign = _StackAlign
+const StackAlign = goarch.StackAlign
diff --git a/libgo/go/runtime/internal/sys/intrinsics_common.go b/libgo/go/runtime/internal/sys/intrinsics_common.go
index 818d75e..48d9759 100644
--- a/libgo/go/runtime/internal/sys/intrinsics_common.go
+++ b/libgo/go/runtime/internal/sys/intrinsics_common.go
@@ -141,3 +141,18 @@ func TrailingZeros8(x uint8) int {
 func Len8(x uint8) int {
 	return int(len8tab[x])
 }
+
+// Prefetch prefetches data from memory addr to cache
+//
+// AMD64: Produce PREFETCHT0 instruction
+//
+// ARM64: Produce PRFM instruction with PLDL1KEEP option
+func Prefetch(addr uintptr) {}
+
+// PrefetchStreamed prefetches data from memory addr, with a hint that this data is being streamed.
+// That is, it is likely to be accessed very soon, but only once. If possible, this will avoid polluting the cache.
+//
+// AMD64: Produce PREFETCHNTA instruction
+//
+// ARM64: Produce PRFM instruction with PLDL1STRM option
+func PrefetchStreamed(addr uintptr) {}
diff --git a/libgo/go/runtime/internal/sys/sys.go b/libgo/go/runtime/internal/sys/sys.go
index 9d9ac45..694101d 100644
--- a/libgo/go/runtime/internal/sys/sys.go
+++ b/libgo/go/runtime/internal/sys/sys.go
@@ -5,11 +5,3 @@
 // package sys contains system- and configuration- and architecture-specific
 // constants used by the runtime.
 package sys
-
-// The next line makes 'go generate' write the zgo*.go files with
-// per-OS and per-arch information, including constants
-// named Goos$GOOS and Goarch$GOARCH for every
-// known GOOS and GOARCH. The constant is 1 on the
-// current system, 0 otherwise; multiplying by them is
-// useful for defining GOOS- or GOARCH-specific constants.
-//go:generate go run gengoos.go
diff --git a/libgo/go/runtime/lfstack_64bit.go b/libgo/go/runtime/lfstack_64bit.go
index 44e6812..8e08830 100644
--- a/libgo/go/runtime/lfstack_64bit.go
+++ b/libgo/go/runtime/lfstack_64bit.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build amd64 || arm64 || mips64 || mips64le || ppc64 || ppc64le || riscv64 || s390x || wasm || arm64be || alpha || sparc64 || ia64
-// +build amd64 arm64 mips64 mips64le ppc64 ppc64le riscv64 s390x wasm arm64be alpha sparc64 ia64
 
 package runtime
 
diff --git a/libgo/go/runtime/lfstack_test.go b/libgo/go/runtime/lfstack_test.go
index fb4b459..d0a1b6b 100644
--- a/libgo/go/runtime/lfstack_test.go
+++ b/libgo/go/runtime/lfstack_test.go
@@ -24,7 +24,7 @@ func toMyNode(node *LFNode) *MyNode {
 	return (*MyNode)(unsafe.Pointer(node))
 }
 
-var global interface{}
+var global any
 
 func TestLFStack(t *testing.T) {
 	stack := new(uint64)
diff --git a/libgo/go/runtime/libfuzzer.go b/libgo/go/runtime/libfuzzer.go
index 578bce0..e7b3cdc 100644
--- a/libgo/go/runtime/libfuzzer.go
+++ b/libgo/go/runtime/libfuzzer.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build libfuzzer
-// +build libfuzzer
 
 package runtime
 
diff --git a/libgo/go/runtime/lock_futex.go b/libgo/go/runtime/lock_futex.go
index 74f6428..446c054 100644
--- a/libgo/go/runtime/lock_futex.go
+++ b/libgo/go/runtime/lock_futex.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build dragonfly || freebsd || linux
-// +build dragonfly freebsd linux
 
 package runtime
 
diff --git a/libgo/go/runtime/lock_js.go b/libgo/go/runtime/lock_js.go
index 0ca3512..80ee50d 100644
--- a/libgo/go/runtime/lock_js.go
+++ b/libgo/go/runtime/lock_js.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build js && wasm
-// +build js,wasm
 
 package runtime
 
diff --git a/libgo/go/runtime/lock_sema.go b/libgo/go/runtime/lock_sema.go
index bcd8a86..b63f1b0 100644
--- a/libgo/go/runtime/lock_sema.go
+++ b/libgo/go/runtime/lock_sema.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build aix || darwin || hurd || netbsd || openbsd || plan9 || solaris || windows
-// +build aix darwin hurd netbsd openbsd plan9 solaris windows
 
 package runtime
 
diff --git a/libgo/go/runtime/lockrank.go b/libgo/go/runtime/lockrank.go
index dde9f7c..4a16bc0 100644
--- a/libgo/go/runtime/lockrank.go
+++ b/libgo/go/runtime/lockrank.go
@@ -51,9 +51,9 @@ const (
 	lockRankItab
 	lockRankReflectOffs
 	lockRankHchan // Multiple hchans acquired in lock order in syncadjustsudogs()
+	lockRankTraceBuf
 	lockRankFin
 	lockRankNotifyList
-	lockRankTraceBuf
 	lockRankTraceStrings
 	lockRankMspanSpecial
 	lockRankProf
@@ -80,6 +80,7 @@ const (
 
 	// Memory-related leaf locks
 	lockRankGlobalAlloc
+	lockRankPageAllocScav
 
 	// Other leaf locks
 	lockRankGFree
@@ -131,9 +132,9 @@ var lockNames = []string{
 	lockRankReflectOffs: "reflectOffs",
 
 	lockRankHchan:         "hchan",
+	lockRankTraceBuf:      "traceBuf",
 	lockRankFin:           "fin",
 	lockRankNotifyList:    "notifyList",
-	lockRankTraceBuf:      "traceBuf",
 	lockRankTraceStrings:  "traceStrings",
 	lockRankMspanSpecial:  "mspanSpecial",
 	lockRankProf:          "prof",
@@ -157,7 +158,8 @@ var lockNames = []string{
 	lockRankMheap:        "mheap",
 	lockRankMheapSpecial: "mheapSpecial",
 
-	lockRankGlobalAlloc: "globalAlloc.mutex",
+	lockRankGlobalAlloc:   "globalAlloc.mutex",
+	lockRankPageAllocScav: "pageAlloc.scav.lock",
 
 	lockRankGFree:     "gFree",
 	lockRankHchanLeaf: "hchanLeaf",
@@ -208,31 +210,32 @@ var lockPartialOrder [][]lockRank = [][]lockRank{
 	lockRankItab:          {},
 	lockRankReflectOffs:   {lockRankItab},
 	lockRankHchan:         {lockRankScavenge, lockRankSweep, lockRankHchan},
-	lockRankFin:           {lockRankSysmon, lockRankScavenge, lockRankSched, lockRankAllg, lockRankTimers, lockRankHchan},
-	lockRankNotifyList:    {},
 	lockRankTraceBuf:      {lockRankSysmon, lockRankScavenge},
+	lockRankFin:           {lockRankSysmon, lockRankScavenge, lockRankSched, lockRankAllg, lockRankTimers, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf},
+	lockRankNotifyList:    {},
 	lockRankTraceStrings:  {lockRankTraceBuf},
-	lockRankMspanSpecial:  {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings},
-	lockRankProf:          {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings},
-	lockRankGcBitsArenas:  {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings},
+	lockRankMspanSpecial:  {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankNotifyList, lockRankTraceStrings},
+	lockRankProf:          {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankNotifyList, lockRankTraceStrings},
+	lockRankGcBitsArenas:  {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSched, lockRankAllg, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankNotifyList, lockRankTraceStrings},
 	lockRankRoot:          {},
 	lockRankTrace:         {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankAssistQueue, lockRankSweep, lockRankSched, lockRankHchan, lockRankTraceBuf, lockRankTraceStrings, lockRankRoot},
-	lockRankTraceStackTab: {lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankSched, lockRankAllg, lockRankTimers, lockRankHchan, lockRankFin, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankRoot, lockRankTrace},
+	lockRankTraceStackTab: {lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankSched, lockRankAllg, lockRankTimers, lockRankHchan, lockRankTraceBuf, lockRankFin, lockRankNotifyList, lockRankTraceStrings, lockRankRoot, lockRankTrace},
 	lockRankNetpollInit:   {lockRankTimers},
 
 	lockRankRwmutexW: {},
 	lockRankRwmutexR: {lockRankSysmon, lockRankRwmutexW},
 
-	lockRankSpanSetSpine: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings},
-	lockRankGscan:        {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankFin, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankTrace, lockRankTraceStackTab, lockRankNetpollInit, lockRankSpanSetSpine},
-	lockRankStackpool:    {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankFin, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankTrace, lockRankTraceStackTab, lockRankNetpollInit, lockRankRwmutexR, lockRankSpanSetSpine, lockRankGscan},
-	lockRankStackLarge:   {lockRankSysmon, lockRankAssistQueue, lockRankSched, lockRankItab, lockRankHchan, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankSpanSetSpine, lockRankGscan},
-	lockRankDefer:        {},
-	lockRankSudog:        {lockRankHchan, lockRankNotifyList},
-	lockRankWbufSpans:    {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankAllg, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankFin, lockRankNotifyList, lockRankTraceStrings, lockRankMspanSpecial, lockRankProf, lockRankRoot, lockRankGscan, lockRankDefer, lockRankSudog},
-	lockRankMheap:        {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankFin, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankMspanSpecial, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankSpanSetSpine, lockRankGscan, lockRankStackpool, lockRankStackLarge, lockRankDefer, lockRankSudog, lockRankWbufSpans},
-	lockRankMheapSpecial: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings},
-	lockRankGlobalAlloc:  {lockRankProf, lockRankSpanSetSpine, lockRankMheap, lockRankMheapSpecial},
+	lockRankSpanSetSpine:  {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankNotifyList, lockRankTraceStrings},
+	lockRankGscan:         {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankFin, lockRankNotifyList, lockRankTraceStrings, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankTrace, lockRankTraceStackTab, lockRankNetpollInit, lockRankSpanSetSpine},
+	lockRankStackpool:     {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankFin, lockRankNotifyList, lockRankTraceStrings, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankTrace, lockRankTraceStackTab, lockRankNetpollInit, lockRankRwmutexR, lockRankSpanSetSpine, lockRankGscan},
+	lockRankStackLarge:    {lockRankSysmon, lockRankAssistQueue, lockRankSched, lockRankItab, lockRankHchan, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankSpanSetSpine, lockRankGscan},
+	lockRankDefer:         {},
+	lockRankSudog:         {lockRankHchan, lockRankNotifyList},
+	lockRankWbufSpans:     {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankAllg, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankFin, lockRankNotifyList, lockRankTraceStrings, lockRankMspanSpecial, lockRankProf, lockRankRoot, lockRankGscan, lockRankDefer, lockRankSudog},
+	lockRankMheap:         {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankFin, lockRankNotifyList, lockRankTraceStrings, lockRankMspanSpecial, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankSpanSetSpine, lockRankGscan, lockRankStackpool, lockRankStackLarge, lockRankDefer, lockRankSudog, lockRankWbufSpans},
+	lockRankMheapSpecial:  {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankTraceBuf, lockRankNotifyList, lockRankTraceStrings},
+	lockRankGlobalAlloc:   {lockRankProf, lockRankSpanSetSpine, lockRankMheap, lockRankMheapSpecial},
+	lockRankPageAllocScav: {lockRankMheap},
 
 	lockRankGFree:     {lockRankSched},
 	lockRankHchanLeaf: {lockRankGscan, lockRankHchanLeaf},
diff --git a/libgo/go/runtime/lockrank_off.go b/libgo/go/runtime/lockrank_off.go
index f3d2c00..daa45b5 100644
--- a/libgo/go/runtime/lockrank_off.go
+++ b/libgo/go/runtime/lockrank_off.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build !goexperiment.staticlockranking
-// +build !goexperiment.staticlockranking
 
 package runtime
 
diff --git a/libgo/go/runtime/lockrank_on.go b/libgo/go/runtime/lockrank_on.go
index fc8d2dc..3c8c367 100644
--- a/libgo/go/runtime/lockrank_on.go
+++ b/libgo/go/runtime/lockrank_on.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build goexperiment.staticlockranking
-// +build goexperiment.staticlockranking
 
 package runtime
 
diff --git a/libgo/go/runtime/malloc.go b/libgo/go/runtime/malloc.go
index 7005e53..e5ab8de 100644
--- a/libgo/go/runtime/malloc.go
+++ b/libgo/go/runtime/malloc.go
@@ -101,6 +101,8 @@
 package runtime
 
 import (
+	"internal/goarch"
+	"internal/goos"
 	"runtime/internal/atomic"
 	"runtime/internal/math"
 	"runtime/internal/sys"
@@ -160,7 +162,7 @@ const (
 	//   windows/32       | 4KB        | 3
 	//   windows/64       | 8KB        | 2
 	//   plan9            | 4KB        | 3
-	_NumStackOrders = 4 - sys.PtrSize/4*sys.GoosWindows - 1*sys.GoosPlan9
+	_NumStackOrders = 4 - goarch.PtrSize/4*goos.IsWindows - 1*goos.IsPlan9
 
 	// heapAddrBits is the number of bits in a heap address. On
 	// amd64, addresses are sign-extended beyond heapAddrBits. On
@@ -209,15 +211,21 @@ const (
 	// we further limit it to 31 bits.
 	//
 	// On ios/arm64, although 64-bit pointers are presumably
-	// available, pointers are truncated to 33 bits. Furthermore,
-	// only the top 4 GiB of the address space are actually available
-	// to the application, but we allow the whole 33 bits anyway for
-	// simplicity.
-	// TODO(mknyszek): Consider limiting it to 32 bits and using
-	// arenaBaseOffset to offset into the top 4 GiB.
+	// available, pointers are truncated to 33 bits in iOS <14.
+	// Furthermore, only the top 4 GiB of the address space are
+	// actually available to the application. In iOS >=14, more
+	// of the address space is available, and the OS can now
+	// provide addresses outside of those 33 bits. Pick 40 bits
+	// as a reasonable balance between address space usage by the
+	// page allocator, and flexibility for what mmap'd regions
+	// we'll accept for the heap. We can't just move to the full
+	// 48 bits because this uses too much address space for older
+	// iOS versions.
+	// TODO(mknyszek): Once iOS <14 is deprecated, promote ios/arm64
+	// to a 48-bit address space like every other arm64 platform.
 	//
 	// WebAssembly currently has a limit of 4GB linear memory.
-	heapAddrBits = (_64bit*(1-sys.GoarchWasm)*(1-sys.GoosIos*sys.GoarchArm64))*48 + (1-_64bit+sys.GoarchWasm)*(32-(sys.GoarchMips+sys.GoarchMipsle)) + 33*sys.GoosIos*sys.GoarchArm64
+	heapAddrBits = (_64bit*(1-goarch.IsWasm)*(1-goos.IsIos*goarch.IsArm64))*48 + (1-_64bit+goarch.IsWasm)*(32-(goarch.IsMips+goarch.IsMipsle)) + 40*goos.IsIos*goarch.IsArm64
 
 	// maxAlloc is the maximum size of an allocation. On 64-bit,
 	// it's theoretically possible to allocate 1<<heapAddrBits bytes. On
@@ -258,10 +266,10 @@ const (
 	// logHeapArenaBytes is log_2 of heapArenaBytes. For clarity,
 	// prefer using heapArenaBytes where possible (we need the
 	// constant to compute some other constants).
-	logHeapArenaBytes = (6+20)*(_64bit*(1-sys.GoosWindows)*(1-sys.GoarchWasm)*(1-sys.GoosIos*sys.GoarchArm64)) + (2+20)*(_64bit*sys.GoosWindows) + (2+20)*(1-_64bit) + (2+20)*sys.GoarchWasm + (2+20)*sys.GoosIos*sys.GoarchArm64
+	logHeapArenaBytes = (6+20)*(_64bit*(1-goos.IsWindows)*(1-goarch.IsWasm)*(1-goos.IsIos*goarch.IsArm64)) + (2+20)*(_64bit*goos.IsWindows) + (2+20)*(1-_64bit) + (2+20)*goarch.IsWasm + (2+20)*goos.IsIos*goarch.IsArm64
 
 	// heapArenaBitmapBytes is the size of each heap arena's bitmap.
-	heapArenaBitmapBytes = heapArenaBytes / (sys.PtrSize * 8 / 2)
+	heapArenaBitmapBytes = heapArenaBytes / (goarch.PtrSize * 8 / 2)
 
 	pagesPerArena = heapArenaBytes / pageSize
 
@@ -278,7 +286,7 @@ const (
 	// We use the L1 map on 64-bit Windows because the arena size
 	// is small, but the address space is still 48 bits, and
 	// there's a high cost to having a large L2.
-	arenaL1Bits = 6 * (_64bit * sys.GoosWindows)
+	arenaL1Bits = 6 * (_64bit * goos.IsWindows)
 
 	// arenaL2Bits is the number of bits of the arena number
 	// covered by the second level arena index.
@@ -313,7 +321,7 @@ const (
 	//
 	// On other platforms, the user address space is contiguous
 	// and starts at 0, so no offset is necessary.
-	arenaBaseOffset = 0xffff800000000000*sys.GoarchAmd64 + 0x0a00000000000000*sys.GoosAix*sys.GoarchPpc64
+	arenaBaseOffset = 0xffff800000000000*goarch.IsAmd64 + 0x0a00000000000000*goos.IsAix
 	// A typed version of this constant that will make it into DWARF (for viewcore).
 	arenaBaseOffsetUintptr = uintptr(arenaBaseOffset)
 
@@ -430,9 +438,6 @@ func mallocinit() {
 		throw("bad TinySizeClass")
 	}
 
-	// Not used for gccgo.
-	// testdefersizes()
-
 	if heapArenaBitmapBytes&(heapArenaBitmapBytes-1) != 0 {
 		// heapBits expects modular arithmetic on bitmap
 		// addresses to work.
@@ -496,7 +501,7 @@ func mallocinit() {
 	lockInit(&globalAlloc.mutex, lockRankGlobalAlloc)
 
 	// Create initial arena growth hints.
-	if sys.PtrSize == 8 {
+	if goarch.PtrSize == 8 {
 		// On a 64-bit machine, we pick the following hints
 		// because:
 		//
@@ -743,7 +748,7 @@ mapped:
 		l2 := h.arenas[ri.l1()]
 		if l2 == nil {
 			// Allocate an L2 arena map.
-			l2 = (*[1 << arenaL2Bits]*heapArena)(persistentalloc(unsafe.Sizeof(*l2), sys.PtrSize, nil))
+			l2 = (*[1 << arenaL2Bits]*heapArena)(persistentalloc(unsafe.Sizeof(*l2), goarch.PtrSize, nil))
 			if l2 == nil {
 				throw("out of memory allocating heap arena map")
 			}
@@ -754,9 +759,9 @@ mapped:
 			throw("arena already initialized")
 		}
 		var r *heapArena
-		r = (*heapArena)(h.heapArenaAlloc.alloc(unsafe.Sizeof(*r), sys.PtrSize, &memstats.gcMiscSys))
+		r = (*heapArena)(h.heapArenaAlloc.alloc(unsafe.Sizeof(*r), goarch.PtrSize, &memstats.gcMiscSys))
 		if r == nil {
-			r = (*heapArena)(persistentalloc(unsafe.Sizeof(*r), sys.PtrSize, &memstats.gcMiscSys))
+			r = (*heapArena)(persistentalloc(unsafe.Sizeof(*r), goarch.PtrSize, &memstats.gcMiscSys))
 			if r == nil {
 				throw("out of memory allocating heap arena metadata")
 			}
@@ -764,16 +769,16 @@ mapped:
 
 		// Add the arena to the arenas list.
 		if len(h.allArenas) == cap(h.allArenas) {
-			size := 2 * uintptr(cap(h.allArenas)) * sys.PtrSize
+			size := 2 * uintptr(cap(h.allArenas)) * goarch.PtrSize
 			if size == 0 {
 				size = physPageSize
 			}
-			newArray := (*notInHeap)(persistentalloc(size, sys.PtrSize, &memstats.gcMiscSys))
+			newArray := (*notInHeap)(persistentalloc(size, goarch.PtrSize, &memstats.gcMiscSys))
 			if newArray == nil {
 				throw("out of memory allocating allArenas")
 			}
 			oldSlice := h.allArenas
-			*(*notInHeapSlice)(unsafe.Pointer(&h.allArenas)) = notInHeapSlice{newArray, len(h.allArenas), int(size / sys.PtrSize)}
+			*(*notInHeapSlice)(unsafe.Pointer(&h.allArenas)) = notInHeapSlice{newArray, len(h.allArenas), int(size / goarch.PtrSize)}
 			copy(h.allArenas, oldSlice)
 			// Do not free the old backing array because
 			// there may be concurrent readers. Since we
@@ -919,6 +924,14 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
 	if size == 0 {
 		return unsafe.Pointer(&zerobase)
 	}
+	userSize := size
+	if asanenabled {
+		// Refer to ASAN runtime library, the malloc() function allocates extra memory,
+		// the redzone, around the user requested memory region. And the redzones are marked
+		// as unaddressable. We perform the same operations in Go to detect the overflows or
+		// underflows.
+		size += computeRZlog(size)
+	}
 
 	if debug.malloc {
 		if debug.sbrk != 0 {
@@ -993,8 +1006,8 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
 	mp.mallocing = 1
 
 	shouldhelpgc := false
-	dataSize := size
-	c := getMCache()
+	dataSize := userSize
+	c := getMCache(mp)
 	if c == nil {
 		throw("mallocgc called without a P or outside bootstrapping")
 	}
@@ -1002,8 +1015,8 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
 	var x unsafe.Pointer
 	noscan := typ == nil || typ.ptrdata == 0
 	// In some cases block zeroing can profitably (for latency reduction purposes)
-	// be delayed till preemption is possible; isZeroed tracks that state.
-	isZeroed := true
+	// be delayed till preemption is possible; delayedZeroing tracks that state.
+	delayedZeroing := false
 	if size <= maxSmallSize {
 		if noscan && size < maxTinySize {
 			// Tiny allocator.
@@ -1039,7 +1052,7 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
 			// Align tiny pointer for required (conservative) alignment.
 			if size&7 == 0 {
 				off = alignUp(off, 8)
-			} else if sys.PtrSize == 4 && size == 12 {
+			} else if goarch.PtrSize == 4 && size == 12 {
 				// Conservatively align 12-byte objects to 8 bytes on 32-bit
 				// systems so that objects whose first field is a 64-bit
 				// value is aligned to 8 bytes and does not cause a fault on
@@ -1104,11 +1117,23 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
 		shouldhelpgc = true
 		// For large allocations, keep track of zeroed state so that
 		// bulk zeroing can be happen later in a preemptible context.
-		span, isZeroed = c.allocLarge(size, needzero && !noscan, noscan)
+		span = c.allocLarge(size, noscan)
 		span.freeindex = 1
 		span.allocCount = 1
-		x = unsafe.Pointer(span.base())
 		size = span.elemsize
+		x = unsafe.Pointer(span.base())
+		if needzero && span.needzero != 0 {
+			if noscan {
+				delayedZeroing = true
+			} else {
+				memclrNoHeapPointers(x, size)
+				// We've in theory cleared almost the whole span here,
+				// and could take the extra step of actually clearing
+				// the whole thing. However, don't. Any GC bits for the
+				// uncleared parts will be zero, and it's just going to
+				// be needzero = 1 once freed anyway.
+			}
+		}
 	}
 
 	var scanSize uintptr
@@ -1151,6 +1176,17 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
 		msanmalloc(x, size)
 	}
 
+	if asanenabled {
+		// We should only read/write the memory with the size asked by the user.
+		// The rest of the allocated memory should be poisoned, so that we can report
+		// errors when accessing poisoned memory.
+		// The allocated memory is larger than required userSize, it will also include
+		// redzone and some other padding bytes.
+		rzBeg := unsafe.Add(x, userSize)
+		asanpoison(rzBeg, size-userSize)
+		asanunpoison(x, userSize)
+	}
+
 	if rate := MemProfileRate; rate > 0 {
 		// Note cache c only valid while m acquired; see #47302
 		if rate != 1 && size < c.nextSample {
@@ -1164,7 +1200,10 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
 
 	// Pointerfree data can be zeroed late in a context where preemption can occur.
 	// x will keep the memory alive.
-	if !isZeroed && needzero {
+	if delayedZeroing {
+		if !noscan {
+			throw("delayed zeroing on data that may contain pointers")
+		}
 		memclrNoHeapPointersChunked(size, x) // This is a possible preemption point: see #47302
 	}
 
@@ -1281,7 +1320,7 @@ func reflect_unsafe_NewArray(typ *_type, n int) unsafe.Pointer {
 }
 
 func profilealloc(mp *m, x unsafe.Pointer, size uintptr) {
-	c := getMCache()
+	c := getMCache(mp)
 	if c == nil {
 		throw("profilealloc called without a P or outside bootstrapping")
 	}
@@ -1335,7 +1374,7 @@ func fastexprand(mean int) int32 {
 	// x = -log_e(q) * mean
 	// x = log_2(q) * (-log_e(2)) * mean    ; Using log_2 for efficiency
 	const randomBitCount = 26
-	q := fastrand()%(1<<randomBitCount) + 1
+	q := fastrandn(1<<randomBitCount) + 1
 	qlog := fastlog2(float64(q)) - randomBitCount
 	if qlog > 0 {
 		qlog = 0
@@ -1353,7 +1392,7 @@ func nextSampleNoFP() uintptr {
 		rate = 0x3fffffff
 	}
 	if rate != 0 {
-		return uintptr(fastrand() % uint32(2*rate))
+		return uintptr(fastrandn(uint32(2 * rate)))
 	}
 	return 0
 }
@@ -1444,7 +1483,7 @@ func persistentalloc1(size, align uintptr, sysStat *sysMemStat) *notInHeap {
 				break
 			}
 		}
-		persistent.off = alignUp(sys.PtrSize, align)
+		persistent.off = alignUp(goarch.PtrSize, align)
 	}
 	p := persistent.base.add(persistent.off)
 	persistent.off += size
@@ -1533,3 +1572,26 @@ type notInHeap struct{}
 func (p *notInHeap) add(bytes uintptr) *notInHeap {
 	return (*notInHeap)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + bytes))
 }
+
+// computeRZlog computes the size of the redzone.
+// Refer to the implementation of the compiler-rt.
+func computeRZlog(userSize uintptr) uintptr {
+	switch {
+	case userSize <= (64 - 16):
+		return 16 << 0
+	case userSize <= (128 - 32):
+		return 16 << 1
+	case userSize <= (512 - 64):
+		return 16 << 2
+	case userSize <= (4096 - 128):
+		return 16 << 3
+	case userSize <= (1<<14)-256:
+		return 16 << 4
+	case userSize <= (1<<15)-512:
+		return 16 << 5
+	case userSize <= (1<<16)-1024:
+		return 16 << 6
+	default:
+		return 16 << 7
+	}
+}
diff --git a/libgo/go/runtime/malloc_test.go b/libgo/go/runtime/malloc_test.go
index 7e83255..0bdc831 100644
--- a/libgo/go/runtime/malloc_test.go
+++ b/libgo/go/runtime/malloc_test.go
@@ -35,14 +35,14 @@ func TestMemStats(t *testing.T) {
 	st := new(MemStats)
 	ReadMemStats(st)
 
-	nz := func(x interface{}) error {
+	nz := func(x any) error {
 		if x != reflect.Zero(reflect.TypeOf(x)).Interface() {
 			return nil
 		}
 		return fmt.Errorf("zero value")
 	}
-	le := func(thresh float64) func(interface{}) error {
-		return func(x interface{}) error {
+	le := func(thresh float64) func(any) error {
+		return func(x any) error {
 			// These sanity tests aren't necessarily valid
 			// with high -test.count values, so only run
 			// them once.
@@ -56,8 +56,8 @@ func TestMemStats(t *testing.T) {
 			return fmt.Errorf("insanely high value (overflow?); want <= %v", thresh)
 		}
 	}
-	eq := func(x interface{}) func(interface{}) error {
-		return func(y interface{}) error {
+	eq := func(x any) func(any) error {
+		return func(y any) error {
 			if x == y {
 				return nil
 			}
@@ -66,7 +66,7 @@ func TestMemStats(t *testing.T) {
 	}
 	// Of the uint fields, HeapReleased, HeapIdle can be 0.
 	// PauseTotalNs can be 0 if timer resolution is poor.
-	fields := map[string][]func(interface{}) error{
+	fields := map[string][]func(any) error{
 		"Alloc": {nz, le(1e10)}, "TotalAlloc": {nz, le(1e11)}, "Sys": {nz, le(1e10)},
 		"Lookups": {eq(uint64(0))}, "Mallocs": {nz, le(1e10)}, "Frees": {nz, le(1e10)},
 		"HeapAlloc": {nz, le(1e10)}, "HeapSys": {nz, le(1e10)}, "HeapIdle": {le(1e10)},
@@ -200,6 +200,10 @@ func TestTinyAllocIssue37262(t *testing.T) {
 	runtime.GC()
 	runtime.GC()
 
+	// Disable preemption so we stay on one P's tiny allocator and
+	// nothing else allocates from it.
+	runtime.Acquirem()
+
 	// Make 1-byte allocations until we get a fresh tiny slot.
 	aligned := false
 	for i := 0; i < 16; i++ {
@@ -210,6 +214,7 @@ func TestTinyAllocIssue37262(t *testing.T) {
 		}
 	}
 	if !aligned {
+		runtime.Releasem()
 		t.Fatal("unable to get a fresh tiny slot")
 	}
 
@@ -231,6 +236,8 @@ func TestTinyAllocIssue37262(t *testing.T) {
 	tinyByteSink = nil
 	tinyUint32Sink = nil
 	tinyObj12Sink = nil
+
+	runtime.Releasem()
 }
 
 func TestPageCacheLeak(t *testing.T) {
diff --git a/libgo/go/runtime/map.go b/libgo/go/runtime/map.go
index c96d2c7..e5844ac 100644
--- a/libgo/go/runtime/map.go
+++ b/libgo/go/runtime/map.go
@@ -54,9 +54,10 @@ package runtime
 // before the table grows. Typical tables will be somewhat less loaded.
 
 import (
+	"internal/abi"
+	"internal/goarch"
 	"runtime/internal/atomic"
 	"runtime/internal/math"
-	"runtime/internal/sys"
 	"unsafe"
 )
 
@@ -118,7 +119,7 @@ const (
 	sameSizeGrow = 8 // the current map growth is to a new map of the same size
 
 	// sentinel bucket ID for iterator checks
-	noCheck = 1<<(8*sys.PtrSize) - 1
+	noCheck = 1<<(8*goarch.PtrSize) - 1
 )
 
 // isEmpty reports whether the given tophash array entry represents an empty bucket entry.
@@ -174,8 +175,8 @@ type bmap struct {
 }
 
 // A hash iteration structure.
-// If you modify hiter, also change cmd/compile/internal/reflectdata/reflect.go to indicate
-// the layout of this structure.
+// If you modify hiter, also change cmd/compile/internal/reflectdata/reflect.go
+// and reflect/value.go to match the layout of this structure.
 type hiter struct {
 	key         unsafe.Pointer // Must be in first position.  Write nil to indicate iteration end (see cmd/compile/internal/walk/range.go).
 	elem        unsafe.Pointer // Must be in second position (see cmd/compile/internal/walk/range.go).
@@ -197,7 +198,7 @@ type hiter struct {
 // bucketShift returns 1<<b, optimized for code generation.
 func bucketShift(b uint8) uintptr {
 	// Masking the shift amount allows overflow checks to be elided.
-	return uintptr(1) << (b & (sys.PtrSize*8 - 1))
+	return uintptr(1) << (b & (goarch.PtrSize*8 - 1))
 }
 
 // bucketMask returns 1<<b - 1, optimized for code generation.
@@ -207,7 +208,7 @@ func bucketMask(b uint8) uintptr {
 
 // tophash calculates the tophash value for hash.
 func tophash(hash uintptr) uint8 {
-	top := uint8(hash >> (sys.PtrSize*8 - 8))
+	top := uint8(hash >> (goarch.PtrSize*8 - 8))
 	if top < minTopHash {
 		top += minTopHash
 	}
@@ -220,11 +221,11 @@ func evacuated(b *bmap) bool {
 }
 
 func (b *bmap) overflow(t *maptype) *bmap {
-	return *(**bmap)(add(unsafe.Pointer(b), uintptr(t.bucketsize)-sys.PtrSize))
+	return *(**bmap)(add(unsafe.Pointer(b), uintptr(t.bucketsize)-goarch.PtrSize))
 }
 
 func (b *bmap) setoverflow(t *maptype, ovf *bmap) {
-	*(**bmap)(add(unsafe.Pointer(b), uintptr(t.bucketsize)-sys.PtrSize)) = ovf
+	*(**bmap)(add(unsafe.Pointer(b), uintptr(t.bucketsize)-goarch.PtrSize)) = ovf
 }
 
 func (b *bmap) keys() unsafe.Pointer {
@@ -414,13 +415,16 @@ func mapaccess1(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
 
 	if raceenabled && h != nil {
 		callerpc := getcallerpc()
-		pc := funcPC(mapaccess1)
+		pc := abi.FuncPCABIInternal(mapaccess1)
 		racereadpc(unsafe.Pointer(h), callerpc, pc)
 		raceReadObjectPC(t.key, key, callerpc, pc)
 	}
 	if msanenabled && h != nil {
 		msanread(key, t.key.size)
 	}
+	if asanenabled && h != nil {
+		asanread(key, t.key.size)
+	}
 	if h == nil || h.count == 0 {
 		if t.hashMightPanic() {
 			t.hasher(key, 0) // see issue 23734
@@ -477,13 +481,16 @@ func mapaccess2(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, bool)
 
 	if raceenabled && h != nil {
 		callerpc := getcallerpc()
-		pc := funcPC(mapaccess2)
+		pc := abi.FuncPCABIInternal(mapaccess2)
 		racereadpc(unsafe.Pointer(h), callerpc, pc)
 		raceReadObjectPC(t.key, key, callerpc, pc)
 	}
 	if msanenabled && h != nil {
 		msanread(key, t.key.size)
 	}
+	if asanenabled && h != nil {
+		asanread(key, t.key.size)
+	}
 	if h == nil || h.count == 0 {
 		if t.hashMightPanic() {
 			t.hasher(key, 0) // see issue 23734
@@ -609,13 +616,16 @@ func mapassign(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
 	}
 	if raceenabled {
 		callerpc := getcallerpc()
-		pc := funcPC(mapassign)
+		pc := abi.FuncPCABIInternal(mapassign)
 		racewritepc(unsafe.Pointer(h), callerpc, pc)
 		raceReadObjectPC(t.key, key, callerpc, pc)
 	}
 	if msanenabled {
 		msanread(key, t.key.size)
 	}
+	if asanenabled {
+		asanread(key, t.key.size)
+	}
 	if h.flags&hashWriting != 0 {
 		throw("concurrent map writes")
 	}
@@ -720,13 +730,16 @@ done:
 func mapdelete(t *maptype, h *hmap, key unsafe.Pointer) {
 	if raceenabled && h != nil {
 		callerpc := getcallerpc()
-		pc := funcPC(mapdelete)
+		pc := abi.FuncPCABIInternal(mapdelete)
 		racewritepc(unsafe.Pointer(h), callerpc, pc)
 		raceReadObjectPC(t.key, key, callerpc, pc)
 	}
 	if msanenabled && h != nil {
 		msanread(key, t.key.size)
 	}
+	if asanenabled && h != nil {
+		asanread(key, t.key.size)
+	}
 	if h == nil || h.count == 0 {
 		if t.hashMightPanic() {
 			t.hasher(key, 0) // see issue 23734
@@ -855,17 +868,17 @@ func mapiterinit(t *maptype, h *hmap, it *hiter) {
 
 	if raceenabled && h != nil {
 		callerpc := getcallerpc()
-		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapiterinit))
+		racereadpc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapiterinit))
 	}
 
+	it.t = t
 	if h == nil || h.count == 0 {
 		return
 	}
 
-	if unsafe.Sizeof(hiter{})/sys.PtrSize != 12 {
+	if unsafe.Sizeof(hiter{})/goarch.PtrSize != 12 {
 		throw("hash_iter size incorrect") // see cmd/compile/internal/reflectdata/reflect.go
 	}
-	it.t = t
 	it.h = h
 
 	// grab snapshot of bucket state
@@ -910,7 +923,7 @@ func mapiternext(it *hiter) {
 	h := it.h
 	if raceenabled {
 		callerpc := getcallerpc()
-		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapiternext))
+		racereadpc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapiternext))
 	}
 	if h.flags&hashWriting != 0 {
 		throw("concurrent map iteration and map write")
@@ -1036,7 +1049,7 @@ next:
 func mapclear(t *maptype, h *hmap) {
 	if raceenabled && h != nil {
 		callerpc := getcallerpc()
-		pc := funcPC(mapclear)
+		pc := abi.FuncPCABIInternal(mapclear)
 		racewritepc(unsafe.Pointer(h), callerpc, pc)
 	}
 
@@ -1338,11 +1351,11 @@ func reflect_makemap(t *maptype, cap int) *hmap {
 	if t.key.equal == nil {
 		throw("runtime.reflect_makemap: unsupported map key type")
 	}
-	if t.key.size > maxKeySize && (!t.indirectkey() || t.keysize != uint8(sys.PtrSize)) ||
+	if t.key.size > maxKeySize && (!t.indirectkey() || t.keysize != uint8(goarch.PtrSize)) ||
 		t.key.size <= maxKeySize && (t.indirectkey() || t.keysize != uint8(t.key.size)) {
 		throw("key size wrong")
 	}
-	if t.elem.size > maxElemSize && (!t.indirectelem() || t.elemsize != uint8(sys.PtrSize)) ||
+	if t.elem.size > maxElemSize && (!t.indirectelem() || t.elemsize != uint8(goarch.PtrSize)) ||
 		t.elem.size <= maxElemSize && (t.indirectelem() || t.elemsize != uint8(t.elem.size)) {
 		throw("elem size wrong")
 	}
@@ -1381,22 +1394,41 @@ func reflect_mapaccess(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
 	return elem
 }
 
+//go:linkname reflect_mapaccess_faststr reflect.mapaccess__faststr
+func reflect_mapaccess_faststr(t *maptype, h *hmap, key string) unsafe.Pointer {
+	elem, ok := mapaccess2_faststr(t, h, key)
+	if !ok {
+		// reflect wants nil for a missing element
+		elem = nil
+	}
+	return elem
+}
+
 //go:linkname reflect_mapassign reflect.mapassign
 func reflect_mapassign(t *maptype, h *hmap, key unsafe.Pointer, elem unsafe.Pointer) {
 	p := mapassign(t, h, key)
 	typedmemmove(t.elem, p, elem)
 }
 
+//go:linkname reflect_mapassign_faststr reflect.mapassign__faststr
+func reflect_mapassign_faststr(t *maptype, h *hmap, key string, elem unsafe.Pointer) {
+	p := mapassign_faststr(t, h, key)
+	typedmemmove(t.elem, p, elem)
+}
+
 //go:linkname reflect_mapdelete reflect.mapdelete
 func reflect_mapdelete(t *maptype, h *hmap, key unsafe.Pointer) {
 	mapdelete(t, h, key)
 }
 
+//go:linkname reflect_mapdelete_faststr reflect.mapdelete__faststr
+func reflect_mapdelete_faststr(t *maptype, h *hmap, key string) {
+	mapdelete_faststr(t, h, key)
+}
+
 //go:linkname reflect_mapiterinit reflect.mapiterinit
-func reflect_mapiterinit(t *maptype, h *hmap) *hiter {
-	it := new(hiter)
+func reflect_mapiterinit(t *maptype, h *hmap, it *hiter) {
 	mapiterinit(t, h, it)
-	return it
 }
 
 //go:linkname reflect_mapiternext reflect.mapiternext
@@ -1421,7 +1453,7 @@ func reflect_maplen(h *hmap) int {
 	}
 	if raceenabled {
 		callerpc := getcallerpc()
-		racereadpc(unsafe.Pointer(h), callerpc, funcPC(reflect_maplen))
+		racereadpc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(reflect_maplen))
 	}
 	return h.count
 }
@@ -1433,7 +1465,7 @@ func reflectlite_maplen(h *hmap) int {
 	}
 	if raceenabled {
 		callerpc := getcallerpc()
-		racereadpc(unsafe.Pointer(h), callerpc, funcPC(reflect_maplen))
+		racereadpc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(reflect_maplen))
 	}
 	return h.count
 }
diff --git a/libgo/go/runtime/map_benchmark_test.go b/libgo/go/runtime/map_benchmark_test.go
index d0becc9..b46d2a4 100644
--- a/libgo/go/runtime/map_benchmark_test.go
+++ b/libgo/go/runtime/map_benchmark_test.go
@@ -488,20 +488,20 @@ func BenchmarkMapStringConversion(b *testing.B) {
 var BoolSink bool
 
 func BenchmarkMapInterfaceString(b *testing.B) {
-	m := map[interface{}]bool{}
+	m := map[any]bool{}
 
 	for i := 0; i < 100; i++ {
 		m[fmt.Sprintf("%d", i)] = true
 	}
 
-	key := (interface{})("A")
+	key := (any)("A")
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		BoolSink = m[key]
 	}
 }
 func BenchmarkMapInterfacePtr(b *testing.B) {
-	m := map[interface{}]bool{}
+	m := map[any]bool{}
 
 	for i := 0; i < 100; i++ {
 		i := i
diff --git a/libgo/go/runtime/map_fast32.go b/libgo/go/runtime/map_fast32.go
index 74f1d61a..a4fc5d5 100644
--- a/libgo/go/runtime/map_fast32.go
+++ b/libgo/go/runtime/map_fast32.go
@@ -5,7 +5,8 @@
 package runtime
 
 import (
-	"runtime/internal/sys"
+	"internal/abi"
+	"internal/goarch"
 	"unsafe"
 )
 
@@ -20,7 +21,7 @@ import (
 func mapaccess1_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer {
 	if raceenabled && h != nil {
 		callerpc := getcallerpc()
-		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_fast32))
+		racereadpc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapaccess1_fast32))
 	}
 	if h == nil || h.count == 0 {
 		return unsafe.Pointer(&zeroVal[0])
@@ -60,7 +61,7 @@ func mapaccess1_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer {
 func mapaccess2_fast32(t *maptype, h *hmap, key uint32) (unsafe.Pointer, bool) {
 	if raceenabled && h != nil {
 		callerpc := getcallerpc()
-		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_fast32))
+		racereadpc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapaccess2_fast32))
 	}
 	if h == nil || h.count == 0 {
 		return unsafe.Pointer(&zeroVal[0]), false
@@ -103,7 +104,7 @@ func mapassign_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer {
 	}
 	if raceenabled {
 		callerpc := getcallerpc()
-		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast32))
+		racewritepc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapassign_fast32))
 	}
 	if h.flags&hashWriting != 0 {
 		throw("concurrent map writes")
@@ -193,7 +194,7 @@ func mapassign_fast32ptr(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer
 	}
 	if raceenabled {
 		callerpc := getcallerpc()
-		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast32))
+		racewritepc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapassign_fast32))
 	}
 	if h.flags&hashWriting != 0 {
 		throw("concurrent map writes")
@@ -280,7 +281,7 @@ done:
 func mapdelete_fast32(t *maptype, h *hmap, key uint32) {
 	if raceenabled && h != nil {
 		callerpc := getcallerpc()
-		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_fast32))
+		racewritepc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapdelete_fast32))
 	}
 	if h == nil || h.count == 0 {
 		return
@@ -309,7 +310,7 @@ search:
 			// Only clear key if there are pointers in it.
 			// This can only happen if pointers are 32 bit
 			// wide as 64 bit pointers do not fit into a 32 bit key.
-			if sys.PtrSize == 4 && t.key.ptrdata != 0 {
+			if goarch.PtrSize == 4 && t.key.ptrdata != 0 {
 				// The key must be a pointer as we checked pointers are
 				// 32 bits wide and the key is 32 bits wide also.
 				*(*unsafe.Pointer)(k) = nil
@@ -435,7 +436,7 @@ func evacuate_fast32(t *maptype, h *hmap, oldbucket uintptr) {
 				dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check
 
 				// Copy key.
-				if sys.PtrSize == 4 && t.key.ptrdata != 0 && writeBarrier.enabled {
+				if goarch.PtrSize == 4 && t.key.ptrdata != 0 && writeBarrier.enabled {
 					// Write with a write barrier.
 					*(*unsafe.Pointer)(dst.k) = *(*unsafe.Pointer)(k)
 				} else {
diff --git a/libgo/go/runtime/map_fast64.go b/libgo/go/runtime/map_fast64.go
index 14bdcee..379637a 100644
--- a/libgo/go/runtime/map_fast64.go
+++ b/libgo/go/runtime/map_fast64.go
@@ -5,7 +5,8 @@
 package runtime
 
 import (
-	"runtime/internal/sys"
+	"internal/abi"
+	"internal/goarch"
 	"unsafe"
 )
 
@@ -20,7 +21,7 @@ import (
 func mapaccess1_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer {
 	if raceenabled && h != nil {
 		callerpc := getcallerpc()
-		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_fast64))
+		racereadpc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapaccess1_fast64))
 	}
 	if h == nil || h.count == 0 {
 		return unsafe.Pointer(&zeroVal[0])
@@ -60,7 +61,7 @@ func mapaccess1_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer {
 func mapaccess2_fast64(t *maptype, h *hmap, key uint64) (unsafe.Pointer, bool) {
 	if raceenabled && h != nil {
 		callerpc := getcallerpc()
-		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_fast64))
+		racereadpc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapaccess2_fast64))
 	}
 	if h == nil || h.count == 0 {
 		return unsafe.Pointer(&zeroVal[0]), false
@@ -103,7 +104,7 @@ func mapassign_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer {
 	}
 	if raceenabled {
 		callerpc := getcallerpc()
-		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast64))
+		racewritepc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapassign_fast64))
 	}
 	if h.flags&hashWriting != 0 {
 		throw("concurrent map writes")
@@ -193,7 +194,7 @@ func mapassign_fast64ptr(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer
 	}
 	if raceenabled {
 		callerpc := getcallerpc()
-		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast64))
+		racewritepc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapassign_fast64))
 	}
 	if h.flags&hashWriting != 0 {
 		throw("concurrent map writes")
@@ -280,7 +281,7 @@ done:
 func mapdelete_fast64(t *maptype, h *hmap, key uint64) {
 	if raceenabled && h != nil {
 		callerpc := getcallerpc()
-		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_fast64))
+		racewritepc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapdelete_fast64))
 	}
 	if h == nil || h.count == 0 {
 		return
@@ -308,7 +309,7 @@ search:
 			}
 			// Only clear key if there are pointers in it.
 			if t.key.ptrdata != 0 {
-				if sys.PtrSize == 8 {
+				if goarch.PtrSize == 8 {
 					*(*unsafe.Pointer)(k) = nil
 				} else {
 					// There are three ways to squeeze at one ore more 32 bit pointers into 64 bits.
@@ -438,7 +439,7 @@ func evacuate_fast64(t *maptype, h *hmap, oldbucket uintptr) {
 
 				// Copy key.
 				if t.key.ptrdata != 0 && writeBarrier.enabled {
-					if sys.PtrSize == 8 {
+					if goarch.PtrSize == 8 {
 						// Write with a write barrier.
 						*(*unsafe.Pointer)(dst.k) = *(*unsafe.Pointer)(k)
 					} else {
diff --git a/libgo/go/runtime/map_faststr.go b/libgo/go/runtime/map_faststr.go
index 647978b..af4f46f 100644
--- a/libgo/go/runtime/map_faststr.go
+++ b/libgo/go/runtime/map_faststr.go
@@ -5,7 +5,8 @@
 package runtime
 
 import (
-	"runtime/internal/sys"
+	"internal/abi"
+	"internal/goarch"
 	"unsafe"
 )
 
@@ -19,7 +20,7 @@ import (
 func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer {
 	if raceenabled && h != nil {
 		callerpc := getcallerpc()
-		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_faststr))
+		racereadpc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapaccess1_faststr))
 	}
 	if h == nil || h.count == 0 {
 		return unsafe.Pointer(&zeroVal[0])
@@ -33,7 +34,7 @@ func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer {
 		b := (*bmap)(h.buckets)
 		if key.len < 32 {
 			// short key, doing lots of comparisons is ok
-			for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+			for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*goarch.PtrSize) {
 				k := (*stringStruct)(kptr)
 				if k.len != key.len || isEmpty(b.tophash[i]) {
 					if b.tophash[i] == emptyRest {
@@ -42,14 +43,14 @@ func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer {
 					continue
 				}
 				if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
-					return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.elemsize))
+					return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*goarch.PtrSize+i*uintptr(t.elemsize))
 				}
 			}
 			return unsafe.Pointer(&zeroVal[0])
 		}
 		// long key, try not to do more comparisons than necessary
 		keymaybe := uintptr(bucketCnt)
-		for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+		for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*goarch.PtrSize) {
 			k := (*stringStruct)(kptr)
 			if k.len != key.len || isEmpty(b.tophash[i]) {
 				if b.tophash[i] == emptyRest {
@@ -58,7 +59,7 @@ func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer {
 				continue
 			}
 			if k.str == key.str {
-				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.elemsize))
+				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*goarch.PtrSize+i*uintptr(t.elemsize))
 			}
 			// check first 4 bytes
 			if *((*[4]byte)(key.str)) != *((*[4]byte)(k.str)) {
@@ -75,9 +76,9 @@ func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer {
 			keymaybe = i
 		}
 		if keymaybe != bucketCnt {
-			k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+keymaybe*2*sys.PtrSize))
+			k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+keymaybe*2*goarch.PtrSize))
 			if memequal(k.str, key.str, uintptr(key.len)) {
-				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+keymaybe*uintptr(t.elemsize))
+				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*goarch.PtrSize+keymaybe*uintptr(t.elemsize))
 			}
 		}
 		return unsafe.Pointer(&zeroVal[0])
@@ -98,13 +99,13 @@ dohash:
 	}
 	top := tophash(hash)
 	for ; b != nil; b = b.overflow(t) {
-		for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+		for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*goarch.PtrSize) {
 			k := (*stringStruct)(kptr)
 			if k.len != key.len || b.tophash[i] != top {
 				continue
 			}
 			if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
-				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.elemsize))
+				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*goarch.PtrSize+i*uintptr(t.elemsize))
 			}
 		}
 	}
@@ -114,7 +115,7 @@ dohash:
 func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) {
 	if raceenabled && h != nil {
 		callerpc := getcallerpc()
-		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_faststr))
+		racereadpc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapaccess2_faststr))
 	}
 	if h == nil || h.count == 0 {
 		return unsafe.Pointer(&zeroVal[0]), false
@@ -128,7 +129,7 @@ func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) {
 		b := (*bmap)(h.buckets)
 		if key.len < 32 {
 			// short key, doing lots of comparisons is ok
-			for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+			for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*goarch.PtrSize) {
 				k := (*stringStruct)(kptr)
 				if k.len != key.len || isEmpty(b.tophash[i]) {
 					if b.tophash[i] == emptyRest {
@@ -137,14 +138,14 @@ func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) {
 					continue
 				}
 				if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
-					return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.elemsize)), true
+					return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*goarch.PtrSize+i*uintptr(t.elemsize)), true
 				}
 			}
 			return unsafe.Pointer(&zeroVal[0]), false
 		}
 		// long key, try not to do more comparisons than necessary
 		keymaybe := uintptr(bucketCnt)
-		for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+		for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*goarch.PtrSize) {
 			k := (*stringStruct)(kptr)
 			if k.len != key.len || isEmpty(b.tophash[i]) {
 				if b.tophash[i] == emptyRest {
@@ -153,7 +154,7 @@ func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) {
 				continue
 			}
 			if k.str == key.str {
-				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.elemsize)), true
+				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*goarch.PtrSize+i*uintptr(t.elemsize)), true
 			}
 			// check first 4 bytes
 			if *((*[4]byte)(key.str)) != *((*[4]byte)(k.str)) {
@@ -170,9 +171,9 @@ func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) {
 			keymaybe = i
 		}
 		if keymaybe != bucketCnt {
-			k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+keymaybe*2*sys.PtrSize))
+			k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+keymaybe*2*goarch.PtrSize))
 			if memequal(k.str, key.str, uintptr(key.len)) {
-				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+keymaybe*uintptr(t.elemsize)), true
+				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*goarch.PtrSize+keymaybe*uintptr(t.elemsize)), true
 			}
 		}
 		return unsafe.Pointer(&zeroVal[0]), false
@@ -193,13 +194,13 @@ dohash:
 	}
 	top := tophash(hash)
 	for ; b != nil; b = b.overflow(t) {
-		for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+		for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*goarch.PtrSize) {
 			k := (*stringStruct)(kptr)
 			if k.len != key.len || b.tophash[i] != top {
 				continue
 			}
 			if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) {
-				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.elemsize)), true
+				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*goarch.PtrSize+i*uintptr(t.elemsize)), true
 			}
 		}
 	}
@@ -212,7 +213,7 @@ func mapassign_faststr(t *maptype, h *hmap, s string) unsafe.Pointer {
 	}
 	if raceenabled {
 		callerpc := getcallerpc()
-		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_faststr))
+		racewritepc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapassign_faststr))
 	}
 	if h.flags&hashWriting != 0 {
 		throw("concurrent map writes")
@@ -252,7 +253,7 @@ bucketloop:
 				}
 				continue
 			}
-			k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize))
+			k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*goarch.PtrSize))
 			if k.len != key.len {
 				continue
 			}
@@ -290,13 +291,13 @@ bucketloop:
 	}
 	insertb.tophash[inserti&(bucketCnt-1)] = top // mask inserti to avoid bounds checks
 
-	insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*2*sys.PtrSize)
+	insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*2*goarch.PtrSize)
 	// store new key at insert position
 	*((*stringStruct)(insertk)) = *key
 	h.count++
 
 done:
-	elem := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*2*sys.PtrSize+inserti*uintptr(t.elemsize))
+	elem := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*2*goarch.PtrSize+inserti*uintptr(t.elemsize))
 	if h.flags&hashWriting == 0 {
 		throw("concurrent map writes")
 	}
@@ -307,7 +308,7 @@ done:
 func mapdelete_faststr(t *maptype, h *hmap, ky string) {
 	if raceenabled && h != nil {
 		callerpc := getcallerpc()
-		racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_faststr))
+		racewritepc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapdelete_faststr))
 	}
 	if h == nil || h.count == 0 {
 		return
@@ -331,7 +332,7 @@ func mapdelete_faststr(t *maptype, h *hmap, ky string) {
 	top := tophash(hash)
 search:
 	for ; b != nil; b = b.overflow(t) {
-		for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) {
+		for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*goarch.PtrSize) {
 			k := (*stringStruct)(kptr)
 			if k.len != key.len || b.tophash[i] != top {
 				continue
@@ -341,7 +342,7 @@ search:
 			}
 			// Clear key's pointer.
 			k.str = nil
-			e := add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.elemsize))
+			e := add(unsafe.Pointer(b), dataOffset+bucketCnt*2*goarch.PtrSize+i*uintptr(t.elemsize))
 			if t.elem.ptrdata != 0 {
 				memclrHasPointers(e, t.elem.size)
 			} else {
@@ -417,7 +418,7 @@ func evacuate_faststr(t *maptype, h *hmap, oldbucket uintptr) {
 		x := &xy[0]
 		x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize)))
 		x.k = add(unsafe.Pointer(x.b), dataOffset)
-		x.e = add(x.k, bucketCnt*2*sys.PtrSize)
+		x.e = add(x.k, bucketCnt*2*goarch.PtrSize)
 
 		if !h.sameSizeGrow() {
 			// Only calculate y pointers if we're growing bigger.
@@ -425,13 +426,13 @@ func evacuate_faststr(t *maptype, h *hmap, oldbucket uintptr) {
 			y := &xy[1]
 			y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize)))
 			y.k = add(unsafe.Pointer(y.b), dataOffset)
-			y.e = add(y.k, bucketCnt*2*sys.PtrSize)
+			y.e = add(y.k, bucketCnt*2*goarch.PtrSize)
 		}
 
 		for ; b != nil; b = b.overflow(t) {
 			k := add(unsafe.Pointer(b), dataOffset)
-			e := add(k, bucketCnt*2*sys.PtrSize)
-			for i := 0; i < bucketCnt; i, k, e = i+1, add(k, 2*sys.PtrSize), add(e, uintptr(t.elemsize)) {
+			e := add(k, bucketCnt*2*goarch.PtrSize)
+			for i := 0; i < bucketCnt; i, k, e = i+1, add(k, 2*goarch.PtrSize), add(e, uintptr(t.elemsize)) {
 				top := b.tophash[i]
 				if isEmpty(top) {
 					b.tophash[i] = evacuatedEmpty
@@ -457,7 +458,7 @@ func evacuate_faststr(t *maptype, h *hmap, oldbucket uintptr) {
 					dst.b = h.newoverflow(t, dst.b)
 					dst.i = 0
 					dst.k = add(unsafe.Pointer(dst.b), dataOffset)
-					dst.e = add(dst.k, bucketCnt*2*sys.PtrSize)
+					dst.e = add(dst.k, bucketCnt*2*goarch.PtrSize)
 				}
 				dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check
 
@@ -470,7 +471,7 @@ func evacuate_faststr(t *maptype, h *hmap, oldbucket uintptr) {
 				// key or elem arrays.  That's ok, as we have the overflow pointer
 				// at the end of the bucket to protect against pointing past the
 				// end of the bucket.
-				dst.k = add(dst.k, 2*sys.PtrSize)
+				dst.k = add(dst.k, 2*goarch.PtrSize)
 				dst.e = add(dst.e, uintptr(t.elemsize))
 			}
 		}
diff --git a/libgo/go/runtime/map_test.go b/libgo/go/runtime/map_test.go
index b9a3457..2da4643 100644
--- a/libgo/go/runtime/map_test.go
+++ b/libgo/go/runtime/map_test.go
@@ -6,10 +6,10 @@ package runtime_test
 
 import (
 	"fmt"
+	"internal/goarch"
 	"math"
 	"reflect"
 	"runtime"
-	"runtime/internal/sys"
 	"sort"
 	"strconv"
 	"strings"
@@ -21,7 +21,7 @@ func TestHmapSize(t *testing.T) {
 	// The structure of hmap is defined in runtime/map.go
 	// and in cmd/compile/internal/gc/reflect.go and must be in sync.
 	// The size of hmap should be 48 bytes on 64 bit and 28 bytes on 32 bit platforms.
-	var hmapSize = uintptr(8 + 5*sys.PtrSize)
+	var hmapSize = uintptr(8 + 5*goarch.PtrSize)
 	if runtime.RuntimeHmapSize != hmapSize {
 		t.Errorf("sizeof(runtime.hmap{})==%d, want %d", runtime.RuntimeHmapSize, hmapSize)
 	}
@@ -478,7 +478,7 @@ func TestMapNanGrowIterator(t *testing.T) {
 	nan := math.NaN()
 	const nBuckets = 16
 	// To fill nBuckets buckets takes LOAD * nBuckets keys.
-	nKeys := int(nBuckets * *runtime.HashLoad)
+	nKeys := int(nBuckets * runtime.HashLoad)
 
 	// Get map to full point with nan keys.
 	for i := 0; i < nKeys; i++ {
@@ -1066,7 +1066,7 @@ func BenchmarkMapDelete(b *testing.B) {
 func TestDeferDeleteSlow(t *testing.T) {
 	ks := []complex128{0, 1, 2, 3}
 
-	m := make(map[interface{}]int)
+	m := make(map[any]int)
 	for i, k := range ks {
 		m[k] = i
 	}
@@ -1209,14 +1209,14 @@ func TestMapInterfaceKey(t *testing.T) {
 		c64  complex64
 		c128 complex128
 		s    string
-		i0   interface{}
+		i0   any
 		i1   interface {
 			String() string
 		}
 		a [4]string
 	}
 
-	m := map[interface{}]bool{}
+	m := map[any]bool{}
 	// Put a bunch of data in m, so that a bad hash is likely to
 	// lead to a bad bucket, which will lead to a missed lookup.
 	for i := 0; i < 1000; i++ {
diff --git a/libgo/go/runtime/mbarrier.go b/libgo/go/runtime/mbarrier.go
index 3399033..4982fd4 100644
--- a/libgo/go/runtime/mbarrier.go
+++ b/libgo/go/runtime/mbarrier.go
@@ -14,7 +14,8 @@
 package runtime
 
 import (
-	"runtime/internal/sys"
+	"internal/abi"
+	"internal/goarch"
 	"unsafe"
 )
 
@@ -182,13 +183,17 @@ func typedmemmove(typ *_type, dst, src unsafe.Pointer) {
 //go:linkname reflect_typedmemmove reflect.typedmemmove
 func reflect_typedmemmove(typ *_type, dst, src unsafe.Pointer) {
 	if raceenabled {
-		raceWriteObjectPC(typ, dst, getcallerpc(), funcPC(reflect_typedmemmove))
-		raceReadObjectPC(typ, src, getcallerpc(), funcPC(reflect_typedmemmove))
+		raceWriteObjectPC(typ, dst, getcallerpc(), abi.FuncPCABIInternal(reflect_typedmemmove))
+		raceReadObjectPC(typ, src, getcallerpc(), abi.FuncPCABIInternal(reflect_typedmemmove))
 	}
 	if msanenabled {
 		msanwrite(dst, typ.size)
 		msanread(src, typ.size)
 	}
+	if asanenabled {
+		asanwrite(dst, typ.size)
+		asanread(src, typ.size)
+	}
 	typedmemmove(typ, dst, src)
 }
 
@@ -202,11 +207,11 @@ func reflectlite_typedmemmove(typ *_type, dst, src unsafe.Pointer) {
 // off must be a multiple of sys.PtrSize.
 //go:linkname reflect_typedmemmovepartial reflect.typedmemmovepartial
 func reflect_typedmemmovepartial(typ *_type, dst, src unsafe.Pointer, off, size uintptr) {
-	if writeBarrier.needed && typ.ptrdata > off && size >= sys.PtrSize {
-		if off&(sys.PtrSize-1) != 0 {
+	if writeBarrier.needed && typ.ptrdata > off && size >= goarch.PtrSize {
+		if off&(goarch.PtrSize-1) != 0 {
 			panic("reflect: internal error: misaligned offset")
 		}
-		pwsize := alignDown(size, sys.PtrSize)
+		pwsize := alignDown(size, goarch.PtrSize)
 		if poff := typ.ptrdata - off; pwsize > poff {
 			pwsize = poff
 		}
@@ -235,7 +240,7 @@ func typedslicecopy(typ *_type, dstPtr unsafe.Pointer, dstLen int, srcPtr unsafe
 	// code and needs its own instrumentation.
 	if raceenabled {
 		callerpc := getcallerpc()
-		pc := funcPC(slicecopy)
+		pc := abi.FuncPCABIInternal(slicecopy)
 		racewriterangepc(dstPtr, uintptr(n)*typ.size, callerpc, pc)
 		racereadrangepc(srcPtr, uintptr(n)*typ.size, callerpc, pc)
 	}
@@ -243,6 +248,10 @@ func typedslicecopy(typ *_type, dstPtr unsafe.Pointer, dstLen int, srcPtr unsafe
 		msanwrite(dstPtr, uintptr(n)*typ.size)
 		msanread(srcPtr, uintptr(n)*typ.size)
 	}
+	if asanenabled {
+		asanwrite(dstPtr, uintptr(n)*typ.size)
+		asanread(srcPtr, uintptr(n)*typ.size)
+	}
 
 	if writeBarrier.cgo {
 		cgoCheckSliceCopy(typ, dstPtr, srcPtr, n)
diff --git a/libgo/go/runtime/mbitmap.go b/libgo/go/runtime/mbitmap.go
index 72cd9f3..019ca34 100644
--- a/libgo/go/runtime/mbitmap.go
+++ b/libgo/go/runtime/mbitmap.go
@@ -46,6 +46,7 @@
 package runtime
 
 import (
+	"internal/goarch"
 	"runtime/internal/atomic"
 	"runtime/internal/sys"
 	"unsafe"
@@ -326,8 +327,8 @@ func heapBitsForAddr(addr uintptr) (h heapBits) {
 		// we expect to crash in the caller.
 		return
 	}
-	h.bitp = &ha.bitmap[(addr/(sys.PtrSize*4))%heapArenaBitmapBytes]
-	h.shift = uint32((addr / sys.PtrSize) & 3)
+	h.bitp = &ha.bitmap[(addr/(goarch.PtrSize*4))%heapArenaBitmapBytes]
+	h.shift = uint32((addr / goarch.PtrSize) & 3)
 	h.arena = uint32(arena)
 	h.last = &ha.bitmap[len(ha.bitmap)-1]
 	return
@@ -389,10 +390,10 @@ func findObject(p, refBase, refOff uintptr, forStack bool) (base uintptr, s *msp
 	// If s is nil, the virtual address has never been part of the heap.
 	// This pointer may be to some mmap'd region, so we allow it.
 	if s == nil {
-		if GOARCH == "amd64" && p == clobberdeadPtr && debug.invalidptr != 0 {
-			// Crash if clobberdeadPtr is seen. Only on AMD64 for now, as
-			// it is the only platform where compiler's clobberdead mode is
-			// implemented. On AMD64 clobberdeadPtr cannot be a valid address.
+		if (GOARCH == "amd64" || GOARCH == "arm64") && p == clobberdeadPtr && debug.invalidptr != 0 {
+			// Crash if clobberdeadPtr is seen. Only on AMD64 and ARM64 for now,
+			// as they are the only platform where compiler's clobberdead mode is
+			// implemented. On these platforms clobberdeadPtr cannot be a valid address.
 			badPointer(s, p, refBase, refOff)
 		}
 		return
@@ -429,6 +430,15 @@ func findObject(p, refBase, refOff uintptr, forStack bool) (base uintptr, s *msp
 	return
 }
 
+// verifyNotInHeapPtr reports whether converting the not-in-heap pointer into a unsafe.Pointer is ok.
+//go:linkname reflect_verifyNotInHeapPtr reflect.verifyNotInHeapPtr
+func reflect_verifyNotInHeapPtr(p uintptr) bool {
+	// Conversion to a pointer is ok as long as findObject above does not call badPointer.
+	// Since we're already promised that p doesn't point into the heap, just disallow heap
+	// pointers and the special clobbered pointer.
+	return spanOf(p) == nil && p != clobberdeadPtr
+}
+
 // next returns the heapBits describing the next pointer-sized word in memory.
 // That is, if h describes address p, h.next() describes p+ptrSize.
 // Note that next does not modify h. The caller must record the result.
@@ -570,7 +580,7 @@ func (h heapBits) isPointer() bool {
 //
 //go:nosplit
 func bulkBarrierPreWrite(dst, src, size uintptr) {
-	if (dst|src|size)&(sys.PtrSize-1) != 0 {
+	if (dst|src|size)&(goarch.PtrSize-1) != 0 {
 		throw("bulkBarrierPreWrite: unaligned arguments")
 	}
 	if !writeBarrier.needed {
@@ -611,7 +621,7 @@ func bulkBarrierPreWrite(dst, src, size uintptr) {
 	buf := &getg().m.p.ptr().wbBuf
 	h := heapBitsForAddr(dst)
 	if src == 0 {
-		for i := uintptr(0); i < size; i += sys.PtrSize {
+		for i := uintptr(0); i < size; i += goarch.PtrSize {
 			if h.isPointer() {
 				dstx := (*uintptr)(unsafe.Pointer(dst + i))
 				if !buf.putFast(*dstx, 0) {
@@ -621,7 +631,7 @@ func bulkBarrierPreWrite(dst, src, size uintptr) {
 			h = h.next()
 		}
 	} else {
-		for i := uintptr(0); i < size; i += sys.PtrSize {
+		for i := uintptr(0); i < size; i += goarch.PtrSize {
 			if h.isPointer() {
 				dstx := (*uintptr)(unsafe.Pointer(dst + i))
 				srcx := (*uintptr)(unsafe.Pointer(src + i))
@@ -644,7 +654,7 @@ func bulkBarrierPreWrite(dst, src, size uintptr) {
 // created and zeroed with malloc.
 //go:nosplit
 func bulkBarrierPreWriteSrcOnly(dst, src, size uintptr) {
-	if (dst|src|size)&(sys.PtrSize-1) != 0 {
+	if (dst|src|size)&(goarch.PtrSize-1) != 0 {
 		throw("bulkBarrierPreWrite: unaligned arguments")
 	}
 	if !writeBarrier.needed {
@@ -652,7 +662,7 @@ func bulkBarrierPreWriteSrcOnly(dst, src, size uintptr) {
 	}
 	buf := &getg().m.p.ptr().wbBuf
 	h := heapBitsForAddr(dst)
-	for i := uintptr(0); i < size; i += sys.PtrSize {
+	for i := uintptr(0); i < size; i += goarch.PtrSize {
 		if h.isPointer() {
 			srcx := (*uintptr)(unsafe.Pointer(src + i))
 			if !buf.putFast(0, *srcx) {
@@ -672,17 +682,17 @@ func bulkBarrierPreWriteSrcOnly(dst, src, size uintptr) {
 //
 //go:nosplit
 func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) {
-	word := maskOffset / sys.PtrSize
+	word := maskOffset / goarch.PtrSize
 	bits = addb(bits, word/8)
 	mask := uint8(1) << (word % 8)
 
 	buf := &getg().m.p.ptr().wbBuf
-	for i := uintptr(0); i < size; i += sys.PtrSize {
+	for i := uintptr(0); i < size; i += goarch.PtrSize {
 		if mask == 0 {
 			bits = addb(bits, 1)
 			if *bits == 0 {
 				// Skip 8 words.
-				i += 7 * sys.PtrSize
+				i += 7 * goarch.PtrSize
 				continue
 			}
 			mask = 1
@@ -739,8 +749,8 @@ func typeBitsBulkBarrier(typ *_type, dst, src, size uintptr) {
 	ptrmask := typ.gcdata
 	buf := &getg().m.p.ptr().wbBuf
 	var bits uint32
-	for i := uintptr(0); i < typ.ptrdata; i += sys.PtrSize {
-		if i&(sys.PtrSize*8-1) == 0 {
+	for i := uintptr(0); i < typ.ptrdata; i += goarch.PtrSize {
+		if i&(goarch.PtrSize*8-1) == 0 {
 			bits = uint32(*ptrmask)
 			ptrmask = addb(ptrmask, 1)
 		} else {
@@ -770,14 +780,14 @@ func typeBitsBulkBarrier(typ *_type, dst, src, size uintptr) {
 // Otherwise, it initializes all words to scalar/dead.
 func (h heapBits) initSpan(s *mspan) {
 	// Clear bits corresponding to objects.
-	nw := (s.npages << _PageShift) / sys.PtrSize
+	nw := (s.npages << _PageShift) / goarch.PtrSize
 	if nw%wordsPerBitmapByte != 0 {
 		throw("initSpan: unaligned length")
 	}
 	if h.shift != 0 {
 		throw("initSpan: unaligned base")
 	}
-	isPtrs := sys.PtrSize == 8 && s.elemsize == sys.PtrSize
+	isPtrs := goarch.PtrSize == 8 && s.elemsize == goarch.PtrSize
 	for nw > 0 {
 		hNext, anw := h.forwardOrBoundary(nw)
 		nbyte := anw / wordsPerBitmapByte
@@ -855,7 +865,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
 	// The checks for size == sys.PtrSize and size == 2*sys.PtrSize can therefore
 	// assume that dataSize == size without checking it explicitly.
 
-	if sys.PtrSize == 8 && size == sys.PtrSize {
+	if goarch.PtrSize == 8 && size == goarch.PtrSize {
 		// It's one word and it has pointers, it must be a pointer.
 		// Since all allocated one-word objects are pointers
 		// (non-pointers are aggregated into tinySize allocations),
@@ -881,8 +891,8 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
 	// objects are at least 4 words long and that their bitmaps start either at the beginning
 	// of a bitmap byte, or half-way in (h.shift of 0 and 2 respectively).
 
-	if size == 2*sys.PtrSize {
-		if typ.size == sys.PtrSize {
+	if size == 2*goarch.PtrSize {
+		if typ.size == goarch.PtrSize {
 			// We're allocating a block big enough to hold two pointers.
 			// On 64-bit, that means the actual object must be two pointers,
 			// or else we'd have used the one-pointer-sized block.
@@ -891,7 +901,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
 			// just the smallest block available. Distinguish by checking dataSize.
 			// (In general the number of instances of typ being allocated is
 			// dataSize/typ.size.)
-			if sys.PtrSize == 4 && dataSize == sys.PtrSize {
+			if goarch.PtrSize == 4 && dataSize == goarch.PtrSize {
 				// 1 pointer object. On 32-bit machines clear the bit for the
 				// unused second word.
 				*h.bitp &^= (bitPointer | bitScan | (bitPointer|bitScan)<<heapBitsShift) << h.shift
@@ -905,38 +915,38 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
 		// Otherwise typ.size must be 2*sys.PtrSize,
 		// and typ.kind&kindGCProg == 0.
 		if doubleCheck {
-			if typ.size != 2*sys.PtrSize || typ.kind&kindGCProg != 0 {
+			if typ.size != 2*goarch.PtrSize || typ.kind&kindGCProg != 0 {
 				print("runtime: heapBitsSetType size=", size, " but typ.size=", typ.size, " gcprog=", typ.kind&kindGCProg != 0, "\n")
 				throw("heapBitsSetType")
 			}
 		}
 		b := uint32(*ptrmask)
 		hb := b & 3
-		hb |= bitScanAll & ((bitScan << (typ.ptrdata / sys.PtrSize)) - 1)
+		hb |= bitScanAll & ((bitScan << (typ.ptrdata / goarch.PtrSize)) - 1)
 		// Clear the bits for this object so we can set the
 		// appropriate ones.
 		*h.bitp &^= (bitPointer | bitScan | ((bitPointer | bitScan) << heapBitsShift)) << h.shift
 		*h.bitp |= uint8(hb << h.shift)
 		return
-	} else if size == 3*sys.PtrSize {
+	} else if size == 3*goarch.PtrSize {
 		b := uint8(*ptrmask)
 		if doubleCheck {
 			if b == 0 {
 				println("runtime: invalid type ", typ.string())
 				throw("heapBitsSetType: called with non-pointer type")
 			}
-			if sys.PtrSize != 8 {
+			if goarch.PtrSize != 8 {
 				throw("heapBitsSetType: unexpected 3 pointer wide size class on 32 bit")
 			}
 			if typ.kind&kindGCProg != 0 {
 				throw("heapBitsSetType: unexpected GC prog for 3 pointer wide size class")
 			}
-			if typ.size == 2*sys.PtrSize {
+			if typ.size == 2*goarch.PtrSize {
 				print("runtime: heapBitsSetType size=", size, " but typ.size=", typ.size, "\n")
 				throw("heapBitsSetType: inconsistent object sizes")
 			}
 		}
-		if typ.size == sys.PtrSize {
+		if typ.size == goarch.PtrSize {
 			// The type contains a pointer otherwise heapBitsSetType wouldn't have been called.
 			// Since the type is only 1 pointer wide and contains a pointer, its gcdata must be exactly 1.
 			if doubleCheck && *typ.gcdata != 1 {
@@ -992,7 +1002,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
 	// machine instructions.
 
 	outOfPlace := false
-	if arenaIndex(x+size-1) != arenaIdx(h.arena) || (doubleCheck && fastrand()%2 == 0) {
+	if arenaIndex(x+size-1) != arenaIdx(h.arena) || (doubleCheck && fastrandn(2) == 0) {
 		// This object spans heap arenas, so the bitmap may be
 		// discontiguous. Unroll it into the object instead
 		// and then copy it out.
@@ -1082,8 +1092,8 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
 		// Filling in bits for an array of typ.
 		// Set up for repetition of ptrmask during main loop.
 		// Note that ptrmask describes only a prefix of
-		const maxBits = sys.PtrSize*8 - 7
-		if typ.ptrdata/sys.PtrSize <= maxBits {
+		const maxBits = goarch.PtrSize*8 - 7
+		if typ.ptrdata/goarch.PtrSize <= maxBits {
 			// Entire ptrmask fits in uintptr with room for a byte fragment.
 			// Load into pbits and never read from ptrmask again.
 			// This is especially important when the ptrmask has
@@ -1094,12 +1104,12 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
 			// Accumulate ptrmask into b.
 			// ptrmask is sized to describe only typ.ptrdata, but we record
 			// it as describing typ.size bytes, since all the high bits are zero.
-			nb = typ.ptrdata / sys.PtrSize
+			nb = typ.ptrdata / goarch.PtrSize
 			for i := uintptr(0); i < nb; i += 8 {
 				b |= uintptr(*p) << i
 				p = add1(p)
 			}
-			nb = typ.size / sys.PtrSize
+			nb = typ.size / goarch.PtrSize
 
 			// Replicate ptrmask to fill entire pbits uintptr.
 			// Doubling and truncating is fewer steps than
@@ -1110,7 +1120,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
 			pbits = b
 			endnb = nb
 			if nb+nb <= maxBits {
-				for endnb <= sys.PtrSize*8 {
+				for endnb <= goarch.PtrSize*8 {
 					pbits |= pbits << endnb
 					endnb += endnb
 				}
@@ -1129,9 +1139,9 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
 			endp = nil
 		} else {
 			// Ptrmask is larger. Read it multiple times.
-			n := (typ.ptrdata/sys.PtrSize+7)/8 - 1
+			n := (typ.ptrdata/goarch.PtrSize+7)/8 - 1
 			endp = addb(ptrmask, n)
-			endnb = typ.size/sys.PtrSize - n*8
+			endnb = typ.size/goarch.PtrSize - n*8
 		}
 	}
 	if p != nil {
@@ -1142,12 +1152,12 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
 
 	if typ.size == dataSize {
 		// Single entry: can stop once we reach the non-pointer data.
-		nw = typ.ptrdata / sys.PtrSize
+		nw = typ.ptrdata / goarch.PtrSize
 	} else {
 		// Repeated instances of typ in an array.
 		// Have to process first N-1 entries in full, but can stop
 		// once we reach the non-pointer data in the final entry.
-		nw = ((dataSize/typ.size-1)*typ.size + typ.ptrdata) / sys.PtrSize
+		nw = ((dataSize/typ.size-1)*typ.size + typ.ptrdata) / goarch.PtrSize
 	}
 	if nw == 0 {
 		// No pointers! Caller was supposed to check.
@@ -1310,7 +1320,7 @@ Phase3:
 	}
 
 	// Change nw from counting possibly-pointer words to total words in allocation.
-	nw = size / sys.PtrSize
+	nw = size / goarch.PtrSize
 
 	// Write whole bitmap bytes.
 	// The first is hb, the rest are zero.
@@ -1344,7 +1354,7 @@ Phase4:
 		h := heapBitsForAddr(x)
 		// cnw is the number of heap words, or bit pairs
 		// remaining (like nw above).
-		cnw := size / sys.PtrSize
+		cnw := size / goarch.PtrSize
 		src := (*uint8)(unsafe.Pointer(x))
 		// We know the first and last byte of the bitmap are
 		// not the same, but it's still possible for small
@@ -1409,7 +1419,7 @@ Phase4:
 	if doubleCheck {
 		// x+size may not point to the heap, so back up one
 		// word and then advance it the way we do above.
-		end := heapBitsForAddr(x + size - sys.PtrSize)
+		end := heapBitsForAddr(x + size - goarch.PtrSize)
 		if outOfPlace {
 			// In out-of-place copying, we just advance
 			// using next.
@@ -1436,11 +1446,11 @@ Phase4:
 		// Double-check that bits to be written were written correctly.
 		// Does not check that other bits were not written, unfortunately.
 		h := heapBitsForAddr(x)
-		nptr := typ.ptrdata / sys.PtrSize
-		ndata := typ.size / sys.PtrSize
+		nptr := typ.ptrdata / goarch.PtrSize
+		ndata := typ.size / goarch.PtrSize
 		count := dataSize / typ.size
-		totalptr := ((count-1)*typ.size + typ.ptrdata) / sys.PtrSize
-		for i := uintptr(0); i < size/sys.PtrSize; i++ {
+		totalptr := ((count-1)*typ.size + typ.ptrdata) / goarch.PtrSize
+		for i := uintptr(0); i < size/goarch.PtrSize; i++ {
 			j := i % ndata
 			var have, want uint8
 			have = (*h.bitp >> h.shift) & (bitPointer | bitScan)
@@ -1465,7 +1475,7 @@ Phase4:
 				print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n")
 				print("current bits h.bitp=", h.bitp, " h.shift=", h.shift, " *h.bitp=", hex(*h.bitp), "\n")
 				print("ptrmask=", ptrmask, " p=", p, " endp=", endp, " endnb=", endnb, " pbits=", hex(pbits), " b=", hex(b), " nb=", nb, "\n")
-				println("at word", i, "offset", i*sys.PtrSize, "have", hex(have), "want", hex(want))
+				println("at word", i, "offset", i*goarch.PtrSize, "have", hex(have), "want", hex(want))
 				if typ.kind&kindGCProg != 0 {
 					println("GC program:")
 					dumpGCProg(addb(typ.gcdata, 4))
@@ -1496,14 +1506,14 @@ var debugPtrmask struct {
 // so that the relevant bitmap bytes are not shared with surrounding
 // objects.
 func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize uintptr, prog *byte) {
-	if sys.PtrSize == 8 && allocSize%(4*sys.PtrSize) != 0 {
+	if goarch.PtrSize == 8 && allocSize%(4*goarch.PtrSize) != 0 {
 		// Alignment will be wrong.
 		throw("heapBitsSetTypeGCProg: small allocation")
 	}
 	var totalBits uintptr
 	if elemSize == dataSize {
 		totalBits = runGCProg(prog, nil, h.bitp, 2)
-		if totalBits*sys.PtrSize != progSize {
+		if totalBits*goarch.PtrSize != progSize {
 			println("runtime: heapBitsSetTypeGCProg: total bits", totalBits, "but progSize", progSize)
 			throw("heapBitsSetTypeGCProg: unexpected bit count")
 		}
@@ -1518,7 +1528,7 @@ func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize u
 		// repeats that first element to fill the array.
 		var trailer [40]byte // 3 varints (max 10 each) + some bytes
 		i := 0
-		if n := elemSize/sys.PtrSize - progSize/sys.PtrSize; n > 0 {
+		if n := elemSize/goarch.PtrSize - progSize/goarch.PtrSize; n > 0 {
 			// literal(0)
 			trailer[i] = 0x01
 			i++
@@ -1540,7 +1550,7 @@ func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize u
 		// repeat(elemSize/ptrSize, count-1)
 		trailer[i] = 0x80
 		i++
-		n := elemSize / sys.PtrSize
+		n := elemSize / goarch.PtrSize
 		for ; n >= 0x80; n >>= 7 {
 			trailer[i] = byte(n | 0x80)
 			i++
@@ -1564,10 +1574,10 @@ func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize u
 		// last element. This will cause the code below to
 		// memclr the dead section of the final array element,
 		// so that scanobject can stop early in the final element.
-		totalBits = (elemSize*(count-1) + progSize) / sys.PtrSize
+		totalBits = (elemSize*(count-1) + progSize) / goarch.PtrSize
 	}
 	endProg := unsafe.Pointer(addb(h.bitp, (totalBits+3)/4))
-	endAlloc := unsafe.Pointer(addb(h.bitp, allocSize/sys.PtrSize/wordsPerBitmapByte))
+	endAlloc := unsafe.Pointer(addb(h.bitp, allocSize/goarch.PtrSize/wordsPerBitmapByte))
 	memclrNoHeapPointers(endProg, uintptr(endAlloc)-uintptr(endProg))
 }
 
@@ -1575,7 +1585,7 @@ func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize u
 // size the size of the region described by prog, in bytes.
 // The resulting bitvector will have no more than size/sys.PtrSize bits.
 func progToPointerMask(prog *byte, size uintptr) bitvector {
-	n := (size/sys.PtrSize + 7) / 8
+	n := (size/goarch.PtrSize + 7) / 8
 	x := (*[1 << 30]byte)(persistentalloc(n+1, 1, &memstats.buckhash_sys))[:n+1]
 	x[len(x)-1] = 0xa1 // overflow check sentinel
 	n = runGCProg(prog, nil, &x[0], 1)
@@ -1710,7 +1720,7 @@ Run:
 		// the pattern to a bit buffer holding at most 7 bits (a partial byte)
 		// it will not overflow.
 		src := dst
-		const maxBits = sys.PtrSize*8 - 7
+		const maxBits = goarch.PtrSize*8 - 7
 		if n <= maxBits {
 			// Start with bits in output buffer.
 			pattern := bits
@@ -1763,7 +1773,7 @@ Run:
 				nb := npattern
 				if nb+nb <= maxBits {
 					// Double pattern until the whole uintptr is filled.
-					for nb <= sys.PtrSize*8 {
+					for nb <= goarch.PtrSize*8 {
 						b |= b << nb
 						nb += nb
 					}
@@ -1891,7 +1901,7 @@ Run:
 // The result must be deallocated with dematerializeGCProg.
 func materializeGCProg(ptrdata uintptr, prog *byte) *mspan {
 	// Each word of ptrdata needs one bit in the bitmap.
-	bitmapBytes := divRoundUp(ptrdata, 8*sys.PtrSize)
+	bitmapBytes := divRoundUp(ptrdata, 8*goarch.PtrSize)
 	// Compute the number of pages needed for bitmapBytes.
 	pages := divRoundUp(bitmapBytes, pageSize)
 	s := mheap_.allocManual(pages, spanAllocPtrScalarBits)
@@ -1952,10 +1962,10 @@ func dumpGCProg(p *byte) {
 // gcbits returns the GC type info for x, for testing.
 // The result is the bitmap entries (0 or 1), one entry per byte.
 //go:linkname reflect_gcbits reflect.gcbits
-func reflect_gcbits(x interface{}) []byte {
+func reflect_gcbits(x any) []byte {
 	ret := getgcmask(x)
 	typ := (*ptrtype)(unsafe.Pointer(efaceOf(&x)._type)).elem
-	nptr := typ.ptrdata / sys.PtrSize
+	nptr := typ.ptrdata / goarch.PtrSize
 	for uintptr(len(ret)) > nptr && ret[len(ret)-1] == 0 {
 		ret = ret[:len(ret)-1]
 	}
@@ -1965,7 +1975,7 @@ func reflect_gcbits(x interface{}) []byte {
 // Returns GC type info for the pointer stored in ep for testing.
 // If ep points to the stack, only static live information will be returned
 // (i.e. not for objects which are only dynamically live stack objects).
-func getgcmask(ep interface{}) (mask []byte) {
+func getgcmask(ep any) (mask []byte) {
 	e := *efaceOf(&ep)
 	p := e.data
 	t := e._type
@@ -1977,7 +1987,7 @@ func getgcmask(ep interface{}) (mask []byte) {
 			addr := uintptr(pr.decl)
 			if addr <= uintptr(p) && uintptr(p) < addr+pr.size {
 				n := (*ptrtype)(unsafe.Pointer(t)).elem.size
-				mask = make([]byte, n/sys.PtrSize)
+				mask = make([]byte, n/goarch.PtrSize)
 				copy(mask, (*[1 << 29]uint8)(unsafe.Pointer(pr.gcdata))[:pr.ptrdata])
 			}
 			return
@@ -1989,13 +1999,13 @@ func getgcmask(ep interface{}) (mask []byte) {
 	if base, s, _ := findObject(uintptr(p), 0, 0, false); base != 0 {
 		hbits := heapBitsForAddr(base)
 		n := s.elemsize
-		mask = make([]byte, n/sys.PtrSize)
-		for i := uintptr(0); i < n; i += sys.PtrSize {
+		mask = make([]byte, n/goarch.PtrSize)
+		for i := uintptr(0); i < n; i += goarch.PtrSize {
 			if hbits.isPointer() {
-				mask[i/sys.PtrSize] = 1
+				mask[i/goarch.PtrSize] = 1
 			}
 			if !hbits.morePointers() {
-				mask = mask[:i/sys.PtrSize]
+				mask = mask[:i/goarch.PtrSize]
 				break
 			}
 			hbits = hbits.next()
diff --git a/libgo/go/runtime/mcache.go b/libgo/go/runtime/mcache.go
index 531f3be..c7487dc 100644
--- a/libgo/go/runtime/mcache.go
+++ b/libgo/go/runtime/mcache.go
@@ -114,9 +114,9 @@ func freemcache(c *mcache) {
 //
 // Returns nil if we're not bootstrapping or we don't have a P. The caller's
 // P must not change, so we must be in a non-preemptible state.
-func getMCache() *mcache {
+func getMCache(mp *m) *mcache {
 	// Grab the mcache, since that's where stats live.
-	pp := getg().m.p.ptr()
+	pp := mp.p.ptr()
 	var c *mcache
 	if pp == nil {
 		// We will be called without a P while bootstrapping,
@@ -176,32 +176,18 @@ func (c *mcache) refill(spc spanClass) {
 	}
 	memstats.heapStats.release()
 
-	// Update gcController.heapLive with the same assumption.
-	usedBytes := uintptr(s.allocCount) * s.elemsize
-	atomic.Xadd64(&gcController.heapLive, int64(s.npages*pageSize)-int64(usedBytes))
-
+	// Update heapLive with the same assumption.
 	// While we're here, flush scanAlloc, since we have to call
 	// revise anyway.
-	atomic.Xadd64(&gcController.heapScan, int64(c.scanAlloc))
+	usedBytes := uintptr(s.allocCount) * s.elemsize
+	gcController.update(int64(s.npages*pageSize)-int64(usedBytes), int64(c.scanAlloc))
 	c.scanAlloc = 0
 
-	if trace.enabled {
-		// gcController.heapLive changed.
-		traceHeapAlloc()
-	}
-	if gcBlackenEnabled != 0 {
-		// gcController.heapLive and heapScan changed.
-		gcController.revise()
-	}
-
 	c.alloc[spc] = s
 }
 
 // allocLarge allocates a span for a large object.
-// The boolean result indicates whether the span is known-zeroed.
-// If it did not need to be zeroed, it may not have been zeroed;
-// but if it came directly from the OS, it is already zeroed.
-func (c *mcache) allocLarge(size uintptr, needzero bool, noscan bool) (*mspan, bool) {
+func (c *mcache) allocLarge(size uintptr, noscan bool) *mspan {
 	if size+_PageSize < size {
 		throw("out of memory")
 	}
@@ -216,7 +202,7 @@ func (c *mcache) allocLarge(size uintptr, needzero bool, noscan bool) (*mspan, b
 	deductSweepCredit(npages*_PageSize, npages)
 
 	spc := makeSpanClass(0, noscan)
-	s, isZeroed := mheap_.alloc(npages, spc, needzero)
+	s := mheap_.alloc(npages, spc)
 	if s == nil {
 		throw("out of memory")
 	}
@@ -225,30 +211,24 @@ func (c *mcache) allocLarge(size uintptr, needzero bool, noscan bool) (*mspan, b
 	atomic.Xadduintptr(&stats.largeAllocCount, 1)
 	memstats.heapStats.release()
 
-	// Update gcController.heapLive and revise pacing if needed.
-	atomic.Xadd64(&gcController.heapLive, int64(npages*pageSize))
-	if trace.enabled {
-		// Trace that a heap alloc occurred because gcController.heapLive changed.
-		traceHeapAlloc()
-	}
-	if gcBlackenEnabled != 0 {
-		gcController.revise()
-	}
+	// Update heapLive.
+	gcController.update(int64(s.npages*pageSize), 0)
 
 	// Put the large span in the mcentral swept list so that it's
 	// visible to the background sweeper.
 	mheap_.central[spc].mcentral.fullSwept(mheap_.sweepgen).push(s)
 	s.limit = s.base() + size
 	heapBitsForAddr(s.base()).initSpan(s)
-	return s, isZeroed
+	return s
 }
 
 func (c *mcache) releaseAll() {
 	// Take this opportunity to flush scanAlloc.
-	atomic.Xadd64(&gcController.heapScan, int64(c.scanAlloc))
+	scanAlloc := int64(c.scanAlloc)
 	c.scanAlloc = 0
 
 	sg := mheap_.sweepgen
+	dHeapLive := int64(0)
 	for i := range c.alloc {
 		s := c.alloc[i]
 		if s != &emptymspan {
@@ -265,7 +245,7 @@ func (c *mcache) releaseAll() {
 				// gcController.heapLive was totally recomputed since
 				// caching this span, so we don't do this for
 				// stale spans.
-				atomic.Xadd64(&gcController.heapLive, -int64(n)*int64(s.elemsize))
+				dHeapLive -= int64(n) * int64(s.elemsize)
 			}
 			// Release the span to the mcentral.
 			mheap_.central[i].mcentral.uncacheSpan(s)
@@ -282,10 +262,8 @@ func (c *mcache) releaseAll() {
 	c.tinyAllocs = 0
 	memstats.heapStats.release()
 
-	// Updated heapScan and possible gcController.heapLive.
-	if gcBlackenEnabled != 0 {
-		gcController.revise()
-	}
+	// Updated heapScan and heapLive.
+	gcController.update(dHeapLive, scanAlloc)
 }
 
 // prepareForSweep flushes c if the system has entered a new sweep phase
diff --git a/libgo/go/runtime/mcentral.go b/libgo/go/runtime/mcentral.go
index 6013c94..e4bdf35 100644
--- a/libgo/go/runtime/mcentral.go
+++ b/libgo/go/runtime/mcentral.go
@@ -102,56 +102,59 @@ func (c *mcentral) cacheSpan() *mspan {
 	spanBudget := 100
 
 	var s *mspan
-	sl := newSweepLocker()
-	sg := sl.sweepGen
+	var sl sweepLocker
 
 	// Try partial swept spans first.
+	sg := mheap_.sweepgen
 	if s = c.partialSwept(sg).pop(); s != nil {
 		goto havespan
 	}
 
-	// Now try partial unswept spans.
-	for ; spanBudget >= 0; spanBudget-- {
-		s = c.partialUnswept(sg).pop()
-		if s == nil {
-			break
-		}
-		if s, ok := sl.tryAcquire(s); ok {
-			// We got ownership of the span, so let's sweep it and use it.
-			s.sweep(true)
-			sl.dispose()
-			goto havespan
-		}
-		// We failed to get ownership of the span, which means it's being or
-		// has been swept by an asynchronous sweeper that just couldn't remove it
-		// from the unswept list. That sweeper took ownership of the span and
-		// responsibility for either freeing it to the heap or putting it on the
-		// right swept list. Either way, we should just ignore it (and it's unsafe
-		// for us to do anything else).
-	}
-	// Now try full unswept spans, sweeping them and putting them into the
-	// right list if we fail to get a span.
-	for ; spanBudget >= 0; spanBudget-- {
-		s = c.fullUnswept(sg).pop()
-		if s == nil {
-			break
-		}
-		if s, ok := sl.tryAcquire(s); ok {
-			// We got ownership of the span, so let's sweep it.
-			s.sweep(true)
-			// Check if there's any free space.
-			freeIndex := s.nextFreeIndex()
-			if freeIndex != s.nelems {
-				s.freeindex = freeIndex
-				sl.dispose()
+	sl = sweep.active.begin()
+	if sl.valid {
+		// Now try partial unswept spans.
+		for ; spanBudget >= 0; spanBudget-- {
+			s = c.partialUnswept(sg).pop()
+			if s == nil {
+				break
+			}
+			if s, ok := sl.tryAcquire(s); ok {
+				// We got ownership of the span, so let's sweep it and use it.
+				s.sweep(true)
+				sweep.active.end(sl)
 				goto havespan
 			}
-			// Add it to the swept list, because sweeping didn't give us any free space.
-			c.fullSwept(sg).push(s.mspan)
+			// We failed to get ownership of the span, which means it's being or
+			// has been swept by an asynchronous sweeper that just couldn't remove it
+			// from the unswept list. That sweeper took ownership of the span and
+			// responsibility for either freeing it to the heap or putting it on the
+			// right swept list. Either way, we should just ignore it (and it's unsafe
+			// for us to do anything else).
+		}
+		// Now try full unswept spans, sweeping them and putting them into the
+		// right list if we fail to get a span.
+		for ; spanBudget >= 0; spanBudget-- {
+			s = c.fullUnswept(sg).pop()
+			if s == nil {
+				break
+			}
+			if s, ok := sl.tryAcquire(s); ok {
+				// We got ownership of the span, so let's sweep it.
+				s.sweep(true)
+				// Check if there's any free space.
+				freeIndex := s.nextFreeIndex()
+				if freeIndex != s.nelems {
+					s.freeindex = freeIndex
+					sweep.active.end(sl)
+					goto havespan
+				}
+				// Add it to the swept list, because sweeping didn't give us any free space.
+				c.fullSwept(sg).push(s.mspan)
+			}
+			// See comment for partial unswept spans.
 		}
-		// See comment for partial unswept spans.
+		sweep.active.end(sl)
 	}
-	sl.dispose()
 	if trace.enabled {
 		traceGCSweepDone()
 		traceDone = true
@@ -238,7 +241,7 @@ func (c *mcentral) grow() *mspan {
 	npages := uintptr(class_to_allocnpages[c.spanclass.sizeclass()])
 	size := uintptr(class_to_size[c.spanclass.sizeclass()])
 
-	s, _ := mheap_.alloc(npages, c.spanclass, true)
+	s := mheap_.alloc(npages, c.spanclass)
 	if s == nil {
 		return nil
 	}
diff --git a/libgo/go/runtime/mcheckmark.go b/libgo/go/runtime/mcheckmark.go
index e4acb79..a058a57 100644
--- a/libgo/go/runtime/mcheckmark.go
+++ b/libgo/go/runtime/mcheckmark.go
@@ -13,8 +13,8 @@
 package runtime
 
 import (
+	"internal/goarch"
 	"runtime/internal/atomic"
-	"runtime/internal/sys"
 	"unsafe"
 )
 
@@ -24,7 +24,7 @@ import (
 // allocation.
 //
 //go:notinheap
-type checkmarksMap [heapArenaBytes / sys.PtrSize / 8]uint8
+type checkmarksMap [heapArenaBytes / goarch.PtrSize / 8]uint8
 
 // If useCheckmark is true, marking of an object uses the checkmark
 // bits instead of the standard mark bits.
diff --git a/libgo/go/runtime/mfinal.go b/libgo/go/runtime/mfinal.go
index b4afaee..09fb064 100644
--- a/libgo/go/runtime/mfinal.go
+++ b/libgo/go/runtime/mfinal.go
@@ -7,8 +7,8 @@
 package runtime
 
 import (
+	"internal/goarch"
 	"runtime/internal/atomic"
-	"runtime/internal/sys"
 	"unsafe"
 )
 
@@ -25,14 +25,14 @@ type finblock struct {
 	next    *finblock
 	cnt     uint32
 	_       int32
-	fin     [(_FinBlockSize - 2*sys.PtrSize - 2*4) / unsafe.Sizeof(finalizer{})]finalizer
+	fin     [(_FinBlockSize - 2*goarch.PtrSize - 2*4) / unsafe.Sizeof(finalizer{})]finalizer
 }
 
 var finlock mutex  // protects the following variables
 var fing *g        // goroutine that runs finalizers
 var finq *finblock // list of finalizers that are to be executed
 var finc *finblock // cache of free blocks
-var finptrmask [_FinBlockSize / sys.PtrSize / 8]byte
+var finptrmask [_FinBlockSize / goarch.PtrSize / 8]byte
 var fingwait bool
 var fingwake bool
 var allfin *finblock // list of all blocks
@@ -273,7 +273,7 @@ func runfinq() {
 // A single goroutine runs all finalizers for a program, sequentially.
 // If a finalizer must run for a long time, it should do so by starting
 // a new goroutine.
-func SetFinalizer(obj interface{}, finalizer interface{}) {
+func SetFinalizer(obj any, finalizer any) {
 	if debug.sbrk != 0 {
 		// debug.sbrk never frees memory, so no finalizers run
 		// (and we don't have the data structures to record them).
@@ -406,7 +406,7 @@ okarg:
 // Note: KeepAlive should only be used to prevent finalizers from
 // running prematurely. In particular, when used with unsafe.Pointer,
 // the rules for valid uses of unsafe.Pointer still apply.
-func KeepAlive(x interface{}) {
+func KeepAlive(x any) {
 	// Introduce a use of x that the compiler can't eliminate.
 	// This makes sure x is alive on entry. We need x to be alive
 	// on entry for "defer runtime.KeepAlive(x)"; see issue 21402.
diff --git a/libgo/go/runtime/mfinal_test.go b/libgo/go/runtime/mfinal_test.go
index 2086e42..81c924f 100644
--- a/libgo/go/runtime/mfinal_test.go
+++ b/libgo/go/runtime/mfinal_test.go
@@ -37,14 +37,14 @@ func TestFinalizerType(t *testing.T) {
 	}
 
 	var finalizerTests = []struct {
-		convert   func(*int) interface{}
-		finalizer interface{}
+		convert   func(*int) any
+		finalizer any
 	}{
-		{func(x *int) interface{} { return x }, func(v *int) { finalize(v) }},
-		{func(x *int) interface{} { return Tintptr(x) }, func(v Tintptr) { finalize(v) }},
-		{func(x *int) interface{} { return Tintptr(x) }, func(v *int) { finalize(v) }},
-		{func(x *int) interface{} { return (*Tint)(x) }, func(v *Tint) { finalize((*int)(v)) }},
-		{func(x *int) interface{} { return (*Tint)(x) }, func(v Tinter) { finalize((*int)(v.(*Tint))) }},
+		{func(x *int) any { return x }, func(v *int) { finalize(v) }},
+		{func(x *int) any { return Tintptr(x) }, func(v Tintptr) { finalize(v) }},
+		{func(x *int) any { return Tintptr(x) }, func(v *int) { finalize(v) }},
+		{func(x *int) any { return (*Tint)(x) }, func(v *Tint) { finalize((*int)(v)) }},
+		{func(x *int) any { return (*Tint)(x) }, func(v Tinter) { finalize((*int)(v.(*Tint))) }},
 	}
 
 	for i, tt := range finalizerTests {
@@ -91,7 +91,7 @@ func TestFinalizerInterfaceBig(t *testing.T) {
 	go func() {
 		v := &bigValue{0xDEADBEEFDEADBEEF, true, "It matters not how strait the gate"}
 		old := *v
-		runtime.SetFinalizer(v, func(v interface{}) {
+		runtime.SetFinalizer(v, func(v any) {
 			i, ok := v.(*bigValue)
 			if !ok {
 				t.Errorf("finalizer called with type %T, want *bigValue", v)
diff --git a/libgo/go/runtime/mfixalloc.go b/libgo/go/runtime/mfixalloc.go
index 293c16b..b701a09b 100644
--- a/libgo/go/runtime/mfixalloc.go
+++ b/libgo/go/runtime/mfixalloc.go
@@ -30,7 +30,8 @@ type fixalloc struct {
 	arg    unsafe.Pointer
 	list   *mlink
 	chunk  uintptr // use uintptr instead of unsafe.Pointer to avoid write barriers
-	nchunk uint32
+	nchunk uint32  // bytes remaining in current chunk
+	nalloc uint32  // size of new chunks in bytes
 	inuse  uintptr // in-use bytes now
 	stat   *sysMemStat
 	zero   bool // zero allocations
@@ -50,12 +51,20 @@ type mlink struct {
 // Initialize f to allocate objects of the given size,
 // using the allocator to obtain chunks of memory.
 func (f *fixalloc) init(size uintptr, first func(arg, p unsafe.Pointer), arg unsafe.Pointer, stat *sysMemStat) {
+	if size > _FixAllocChunk {
+		throw("runtime: fixalloc size too large")
+	}
+	if min := unsafe.Sizeof(mlink{}); size < min {
+		size = min
+	}
+
 	f.size = size
 	f.first = first
 	f.arg = arg
 	f.list = nil
 	f.chunk = 0
 	f.nchunk = 0
+	f.nalloc = uint32(_FixAllocChunk / size * size) // Round _FixAllocChunk down to an exact multiple of size to eliminate tail waste
 	f.inuse = 0
 	f.stat = stat
 	f.zero = true
@@ -77,8 +86,8 @@ func (f *fixalloc) alloc() unsafe.Pointer {
 		return v
 	}
 	if uintptr(f.nchunk) < f.size {
-		f.chunk = uintptr(persistentalloc(_FixAllocChunk, 0, f.stat))
-		f.nchunk = _FixAllocChunk
+		f.chunk = uintptr(persistentalloc(uintptr(f.nalloc), 0, f.stat))
+		f.nchunk = f.nalloc
 	}
 
 	v := unsafe.Pointer(f.chunk)
diff --git a/libgo/go/runtime/mgc.go b/libgo/go/runtime/mgc.go
index efb8012..65cbcdb 100644
--- a/libgo/go/runtime/mgc.go
+++ b/libgo/go/runtime/mgc.go
@@ -154,7 +154,7 @@ func gcinit() {
 		throw("size of Workbuf is suboptimal")
 	}
 	// No sweep on the first cycle.
-	mheap_.sweepDrained = 1
+	sweep.active.state.Store(sweepDrainedMask)
 
 	// Initialize GC pacer state.
 	// Use the environment variable GOGC for the initial gcPercent value.
@@ -167,24 +167,19 @@ func gcinit() {
 	lockInit(&work.wbufSpans.lock, lockRankWbufSpans)
 }
 
-// Temporary in order to enable register ABI work.
-// TODO(register args): convert back to local chan in gcenabled, passed to "go" stmts.
-var gcenable_setup chan int
-
 // gcenable is called after the bulk of the runtime initialization,
 // just before we're about to start letting user code run.
 // It kicks off the background sweeper goroutine, the background
 // scavenger goroutine, and enables GC.
 func gcenable() {
 	// Kick off sweeping and scavenging.
-	gcenable_setup = make(chan int, 2)
+	c := make(chan int, 2)
 	expectSystemGoroutine()
-	go bgsweep()
+	go bgsweep(c)
 	expectSystemGoroutine()
-	go bgscavenge()
-	<-gcenable_setup
-	<-gcenable_setup
-	gcenable_setup = nil
+	go bgscavenge(c)
+	<-c
+	<-c
 	memstats.enablegc = true // now that runtime is initialized, GC is okay
 }
 
@@ -332,6 +327,12 @@ var work struct {
 	// Base indexes of each root type. Set by gcMarkRootPrepare.
 	baseData, baseSpans, baseStacks, baseEnd uint32
 
+	// stackRoots is a snapshot of all of the Gs that existed
+	// before the beginning of concurrent marking. The backing
+	// store of this must not be modified because it might be
+	// shared with allgs.
+	stackRoots []*g
+
 	// Each type of GC state transition is protected by a lock.
 	// Since multiple threads can simultaneously detect the state
 	// transition condition, any thread that detects a transition
@@ -552,7 +553,7 @@ func (t gcTrigger) test() bool {
 		// own write.
 		return gcController.heapLive >= gcController.trigger
 	case gcTriggerTime:
-		if gcController.gcPercent < 0 {
+		if gcController.gcPercent.Load() < 0 {
 			return false
 		}
 		lastgc := int64(atomic.Load64(&memstats.last_gc_nanotime))
@@ -668,7 +669,9 @@ func gcStart(trigger gcTrigger) {
 
 	work.cycles++
 
-	gcController.startCycle()
+	// Assists and workers can start the moment we start
+	// the world.
+	gcController.startCycle(now, int(gomaxprocs))
 	work.heapGoal = gcController.heapGoal
 
 	// In STW mode, disable scheduling of user Gs. This may also
@@ -711,10 +714,6 @@ func gcStart(trigger gcTrigger) {
 	// mutators.
 	atomic.Store(&gcBlackenEnabled, 1)
 
-	// Assists and workers can start the moment we start
-	// the world.
-	gcController.markStartTime = now
-
 	// In STW mode, we could block the instant systemstack
 	// returns, so make sure we're not preemptible.
 	mp = acquirem()
@@ -898,7 +897,7 @@ top:
 	// endCycle depends on all gcWork cache stats being flushed.
 	// The termination algorithm above ensured that up to
 	// allocations since the ragged barrier.
-	nextTriggerRatio := gcController.endCycle(work.userForced)
+	nextTriggerRatio := gcController.endCycle(now, int(gomaxprocs), work.userForced)
 
 	// Perform mark termination. This will restart the world.
 	gcMarkTermination(nextTriggerRatio)
@@ -972,12 +971,13 @@ func gcMarkTermination(nextTriggerRatio float64) {
 		throw("gc done but gcphase != _GCoff")
 	}
 
-	// Record heapGoal and heap_inuse for scavenger.
-	gcController.lastHeapGoal = gcController.heapGoal
+	// Record heap_inuse for scavenger.
 	memstats.last_heap_inuse = memstats.heap_inuse
 
 	// Update GC trigger and pacing for the next cycle.
 	gcController.commit(nextTriggerRatio)
+	gcPaceSweeper(gcController.trigger)
+	gcPaceScavenger(gcController.heapGoal, gcController.lastHeapGoal)
 
 	// Update timing memstats
 	now := nanotime()
@@ -1028,8 +1028,10 @@ func gcMarkTermination(nextTriggerRatio float64) {
 	// Those aren't tracked in any sweep lists, so we need to
 	// count them against sweep completion until we ensure all
 	// those spans have been forced out.
-	sl := newSweepLocker()
-	sl.blockCompletion()
+	sl := sweep.active.begin()
+	if !sl.valid {
+		throw("failed to set sweep barrier")
+	}
 
 	systemstack(func() { startTheWorldWithSema(true) })
 
@@ -1053,7 +1055,7 @@ func gcMarkTermination(nextTriggerRatio float64) {
 	})
 	// Now that we've swept stale spans in mcaches, they don't
 	// count against unswept spans.
-	sl.dispose()
+	sweep.active.end(sl)
 
 	// Print gctrace before dropping worldsema. As soon as we drop
 	// worldsema another cycle could start and smash the stats
@@ -1087,6 +1089,8 @@ func gcMarkTermination(nextTriggerRatio float64) {
 		print(" ms cpu, ",
 			work.heap0>>20, "->", work.heap1>>20, "->", work.heap2>>20, " MB, ",
 			work.heapGoal>>20, " MB goal, ",
+			gcController.stackScan>>20, " MB stacks, ",
+			gcController.globalsScan>>20, " MB globals, ",
 			work.maxprocs, " P")
 		if work.userForced {
 			print(" (forced)")
@@ -1294,15 +1298,9 @@ func gcBgMarkWorker() {
 
 		// Account for time.
 		duration := nanotime() - startTime
-		switch pp.gcMarkWorkerMode {
-		case gcMarkWorkerDedicatedMode:
-			atomic.Xaddint64(&gcController.dedicatedMarkTime, duration)
-			atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, 1)
-		case gcMarkWorkerFractionalMode:
-			atomic.Xaddint64(&gcController.fractionalMarkTime, duration)
+		gcController.logWorkTime(pp.gcMarkWorkerMode, duration)
+		if pp.gcMarkWorkerMode == gcMarkWorkerFractionalMode {
 			atomic.Xaddint64(&pp.gcFractionalMarkTime, duration)
-		case gcMarkWorkerIdleMode:
-			atomic.Xaddint64(&gcController.idleMarkTime, duration)
 		}
 
 		// Was this the last worker and did we run out
@@ -1324,7 +1322,7 @@ func gcBgMarkWorker() {
 		// point, signal the main GC goroutine.
 		if incnwait == work.nproc && !gcMarkWorkAvailable(nil) {
 			// We don't need the P-local buffers here, allow
-			// preemption becuse we may schedule like a regular
+			// preemption because we may schedule like a regular
 			// goroutine in gcMarkDone (block on locks, etc).
 			releasem(node.m.ptr())
 			node.m.set(nil)
@@ -1378,6 +1376,11 @@ func gcMark(startTime int64) {
 		throw("work.full != 0")
 	}
 
+	// Drop allg snapshot. allgs may have grown, in which case
+	// this is the only reference to the old backing store and
+	// there's no need to keep it around.
+	work.stackRoots = nil
+
 	// Clear out buffers and double-check that all gcWork caches
 	// are empty. This should be ensured by gcMarkDone before we
 	// enter mark termination.
@@ -1422,30 +1425,22 @@ func gcMark(startTime int64) {
 		gcw.dispose()
 	}
 
-	// Update the marked heap stat.
-	gcController.heapMarked = work.bytesMarked
-
 	// Flush scanAlloc from each mcache since we're about to modify
 	// heapScan directly. If we were to flush this later, then scanAlloc
 	// might have incorrect information.
+	//
+	// Note that it's not important to retain this information; we know
+	// exactly what heapScan is at this point via scanWork.
 	for _, p := range allp {
 		c := p.mcache
 		if c == nil {
 			continue
 		}
-		gcController.heapScan += uint64(c.scanAlloc)
 		c.scanAlloc = 0
 	}
 
-	// Update other GC heap size stats. This must happen after
-	// cachestats (which flushes local statistics to these) and
-	// flushallmcaches (which modifies gcController.heapLive).
-	gcController.heapLive = work.bytesMarked
-	gcController.heapScan = uint64(gcController.scanWork)
-
-	if trace.enabled {
-		traceHeapAlloc()
-	}
+	// Reset controller state.
+	gcController.resetLive(work.bytesMarked)
 }
 
 // gcSweep must be called on the system stack because it acquires the heap
@@ -1463,11 +1458,11 @@ func gcSweep(mode gcMode) {
 
 	lock(&mheap_.lock)
 	mheap_.sweepgen += 2
-	mheap_.sweepDrained = 0
-	mheap_.pagesSwept = 0
+	sweep.active.reset()
+	mheap_.pagesSwept.Store(0)
 	mheap_.sweepArenas = mheap_.allArenas
-	mheap_.reclaimIndex = 0
-	mheap_.reclaimCredit = 0
+	mheap_.reclaimIndex.Store(0)
+	mheap_.reclaimCredit.Store(0)
 	unlock(&mheap_.lock)
 
 	sweep.centralIndex.clear()
@@ -1565,7 +1560,7 @@ func clearpools() {
 	sched.sudogcache = nil
 	unlock(&sched.sudoglock)
 
-	// Clear central defer pools.
+	// Clear central defer pool.
 	// Leave per-P pools alone, they have strictly bounded size.
 	lock(&sched.deferlock)
 	// disconnect cached list before dropping it on the floor,
diff --git a/libgo/go/runtime/mgc_gccgo.go b/libgo/go/runtime/mgc_gccgo.go
index 21539eb..826f51e 100644
--- a/libgo/go/runtime/mgc_gccgo.go
+++ b/libgo/go/runtime/mgc_gccgo.go
@@ -7,7 +7,7 @@
 package runtime
 
 import (
-	"runtime/internal/sys"
+	"internal/goarch"
 	"unsafe"
 )
 
@@ -106,7 +106,7 @@ func createGcRootsIndex() {
 	// Construct the gcRootsIndex slice. Use non-heap storage for the array
 	// backing the slice.
 	sp := (*notInHeapSlice)(unsafe.Pointer(&gcRootsIndex))
-	sp.array = (*notInHeap)(persistentalloc1(sys.PtrSize*uintptr(nroots), sys.PtrSize, &memstats.other_sys))
+	sp.array = (*notInHeap)(persistentalloc1(goarch.PtrSize*uintptr(nroots), goarch.PtrSize, &memstats.other_sys))
 	if sp.array == nil {
 		throw("runtime: cannot allocate memory")
 	}
@@ -125,7 +125,7 @@ func createGcRootsIndex() {
 	}
 
 	// Sort it by starting address.
-	rootradixsort(gcRootsIndex, 0, nroots-1, sys.PtrSize*8-1)
+	rootradixsort(gcRootsIndex, 0, nroots-1, goarch.PtrSize*8-1)
 }
 
 // registerGCRoots is called by compiler-generated code.
diff --git a/libgo/go/runtime/mgcmark.go b/libgo/go/runtime/mgcmark.go
index f6e1a14..6bc7094 100644
--- a/libgo/go/runtime/mgcmark.go
+++ b/libgo/go/runtime/mgcmark.go
@@ -7,6 +7,8 @@
 package runtime
 
 import (
+	"internal/goarch"
+	"internal/goexperiment"
 	"runtime/internal/atomic"
 	"runtime/internal/sys"
 	"unsafe"
@@ -86,7 +88,8 @@ func gcMarkRootPrepare() {
 	// ignore them because they begin life without any roots, so
 	// there's nothing to scan, and any roots they create during
 	// the concurrent phase will be caught by the write barrier.
-	work.nStackRoots = int(atomic.Loaduintptr(&allglen))
+	work.stackRoots = allGsSnapshot()
+	work.nStackRoots = len(work.stackRoots)
 
 	work.markrootNext = 0
 	work.markrootJobs = uint32(fixedRootCount + work.nDataRoots + work.nSpanRoots + work.nStackRoots)
@@ -135,18 +138,25 @@ var oneptrmask = [...]uint8{1}
 //
 // Preemption must be disabled (because this uses a gcWork).
 //
+// Returns the amount of GC work credit produced by the operation.
+// If flushBgCredit is true, then that credit is also flushed
+// to the background credit pool.
+//
 // nowritebarrier is only advisory here.
 //
 //go:nowritebarrier
-func markroot(gcw *gcWork, i uint32) {
+func markroot(gcw *gcWork, i uint32, flushBgCredit bool) int64 {
 	// Note: if you add a case here, please also update heapdump.go:dumproots.
+	var workDone int64
+	var workCounter *atomic.Int64
 	switch {
 	case work.baseData <= i && i < work.baseSpans:
+		workCounter = &gcController.globalsScanWork
 		roots := gcRoots
 		c := work.baseData
 		for roots != nil {
 			if i == c {
-				markrootBlock(roots, gcw)
+				workDone += markrootBlock(roots, gcw)
 				break
 			}
 			roots = roots.next
@@ -168,15 +178,13 @@ func markroot(gcw *gcWork, i uint32) {
 
 	default:
 		// the rest is scanning goroutine stacks
-		var gp *g
-		if work.baseStacks <= i && i < work.baseEnd {
-			// N.B. Atomic read of allglen in gcMarkRootPrepare
-			// acts as a barrier to ensure that allgs must be large
-			// enough to contain all relevant Gs.
-			gp = allgs[i-work.baseStacks]
-		} else {
+		workCounter = &gcController.stackScanWork
+		if i < work.baseStacks || work.baseEnd <= i {
+			printlock()
+			print("runtime: markroot index ", i, " not in stack roots range [", work.baseStacks, ", ", work.baseEnd, ")\n")
 			throw("markroot: bad index")
 		}
+		gp := work.stackRoots[i-work.baseStacks]
 
 		// remember when we've first observed the G blocked
 		// needed only to output in traceback
@@ -214,7 +222,7 @@ func markroot(gcw *gcWork, i uint32) {
 			if gp.gcscandone {
 				throw("g already scanned")
 			}
-			scanstack(gp, gcw)
+			workDone += scanstack(gp, gcw)
 			gp.gcscandone = true
 			resumeG(stopped)
 
@@ -223,16 +231,30 @@ func markroot(gcw *gcWork, i uint32) {
 			}
 		})
 	}
+	if goexperiment.PacerRedesign {
+		if workCounter != nil && workDone != 0 {
+			workCounter.Add(workDone)
+			if flushBgCredit {
+				gcFlushBgCredit(workDone)
+			}
+		}
+	}
+	return workDone
 }
 
 // markrootBlock scans one element of the list of GC roots.
 //
+// Returns the amount of work done.
+//
 //go:nowritebarrier
-func markrootBlock(roots *gcRootList, gcw *gcWork) {
+func markrootBlock(roots *gcRootList, gcw *gcWork) int64 {
+	var ret int64
 	for i := 0; i < roots.count; i++ {
 		r := &roots.roots[i]
 		scanblock(uintptr(r.decl), r.ptrdata, r.gcdata, gcw)
+		ret += int64(r.ptrdata)
 	}
+	return ret
 }
 
 // markrootSpans marks roots for one shard of markArenas.
@@ -308,7 +330,7 @@ func markrootSpans(gcw *gcWork, shard int) {
 				scanobject(p, gcw)
 
 				// The special itself is a root.
-				scanblock(uintptr(unsafe.Pointer(&spf.fn)), sys.PtrSize, &oneptrmask[0], gcw)
+				scanblock(uintptr(unsafe.Pointer(&spf.fn)), goarch.PtrSize, &oneptrmask[0], gcw)
 			}
 			unlock(&s.speciallock)
 		}
@@ -335,8 +357,8 @@ retry:
 	// balance positive. When the required amount of work is low,
 	// we over-assist to build up credit for future allocations
 	// and amortize the cost of assisting.
-	assistWorkPerByte := float64frombits(atomic.Load64(&gcController.assistWorkPerByte))
-	assistBytesPerWork := float64frombits(atomic.Load64(&gcController.assistBytesPerWork))
+	assistWorkPerByte := gcController.assistWorkPerByte.Load()
+	assistBytesPerWork := gcController.assistBytesPerWork.Load()
 	debtBytes := -gp.gcAssistBytes
 	scanWork := int64(assistWorkPerByte * float64(debtBytes))
 	if scanWork < gcOverAssistWork {
@@ -480,7 +502,7 @@ func gcAssistAlloc1(gp *g, scanWork int64) {
 	// this scan work counts for. The "1+" is a poor man's
 	// round-up, to ensure this adds credit even if
 	// assistBytesPerWork is very low.
-	assistBytesPerWork := float64frombits(atomic.Load64(&gcController.assistBytesPerWork))
+	assistBytesPerWork := gcController.assistBytesPerWork.Load()
 	gp.gcAssistBytes += 1 + int64(assistBytesPerWork*float64(workDone))
 
 	// If this is the last worker and we ran out of work,
@@ -522,8 +544,6 @@ func gcWakeAllAssists() {
 //
 // gcParkAssist reports whether the assist is now satisfied. If it
 // returns false, the caller must retry the assist.
-//
-//go:nowritebarrier
 func gcParkAssist() bool {
 	lock(&work.assistQueue.lock)
 	// If the GC cycle finished while we were getting the lock,
@@ -575,7 +595,7 @@ func gcFlushBgCredit(scanWork int64) {
 		return
 	}
 
-	assistBytesPerWork := float64frombits(atomic.Load64(&gcController.assistBytesPerWork))
+	assistBytesPerWork := gcController.assistBytesPerWork.Load()
 	scanBytes := int64(float64(scanWork) * assistBytesPerWork)
 
 	lock(&work.assistQueue.lock)
@@ -609,7 +629,7 @@ func gcFlushBgCredit(scanWork int64) {
 
 	if scanBytes > 0 {
 		// Convert from scan bytes back to work.
-		assistWorkPerByte := float64frombits(atomic.Load64(&gcController.assistWorkPerByte))
+		assistWorkPerByte := gcController.assistWorkPerByte.Load()
 		scanWork = int64(float64(scanBytes) * assistWorkPerByte)
 		atomic.Xaddint64(&gcController.bgScanCredit, scanWork)
 	}
@@ -624,6 +644,13 @@ func doscanstackswitch(*g, *g)
 
 // scanstack scans gp's stack, greying all pointers found on the stack.
 //
+// For goexperiment.PacerRedesign:
+// Returns the amount of scan work performed, but doesn't update
+// gcController.stackScanWork or flush any credit. Any background credit produced
+// by this function should be flushed by its caller. scanstack itself can't
+// safely flush because it may result in trying to wake up a goroutine that
+// was just scanned, resulting in a self-deadlock.
+//
 // scanstack will also shrink the stack if it is safe to do so. If it
 // is not, it schedules a stack shrink for the next synchronous safe
 // point.
@@ -633,7 +660,7 @@ func doscanstackswitch(*g, *g)
 //
 //go:nowritebarrier
 //go:systemstack
-func scanstack(gp *g, gcw *gcWork) {
+func scanstack(gp *g, gcw *gcWork) int64 {
 	if readgstatus(gp)&_Gscan == 0 {
 		print("runtime:scanstack: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", hex(readgstatus(gp)), "\n")
 		throw("scanstack - bad status")
@@ -644,7 +671,7 @@ func scanstack(gp *g, gcw *gcWork) {
 		print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
 		throw("mark - bad status")
 	case _Gdead:
-		return
+		return 0
 	case _Grunning:
 		print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
 		throw("scanstack: goroutine not stopped")
@@ -679,6 +706,8 @@ func scanstack(gp *g, gcw *gcWork) {
 	// This is necessary as it uses stack objects (a.k.a. stack tracing).
 	// We don't (yet) do stack objects, and regular stack/heap scan
 	// will take care of defer records just fine.
+
+	return 0
 }
 
 // scanstackswitch scans gp's stack by switching (gogo) to gp and
@@ -789,7 +818,7 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) {
 	flushBgCredit := flags&gcDrainFlushBgCredit != 0
 	idle := flags&gcDrainIdle != 0
 
-	initScanWork := gcw.scanWork
+	initScanWork := gcw.heapScanWork
 
 	// checkWork is the scan work before performing the next
 	// self-preempt check.
@@ -812,7 +841,7 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) {
 			if job >= work.markrootJobs {
 				break
 			}
-			markroot(gcw, job)
+			markroot(gcw, job, flushBgCredit)
 			if check != nil && check() {
 				goto done
 			}
@@ -851,14 +880,14 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) {
 		// Flush background scan work credit to the global
 		// account if we've accumulated enough locally so
 		// mutator assists can draw on it.
-		if gcw.scanWork >= gcCreditSlack {
-			atomic.Xaddint64(&gcController.scanWork, gcw.scanWork)
+		if gcw.heapScanWork >= gcCreditSlack {
+			gcController.heapScanWork.Add(gcw.heapScanWork)
 			if flushBgCredit {
-				gcFlushBgCredit(gcw.scanWork - initScanWork)
+				gcFlushBgCredit(gcw.heapScanWork - initScanWork)
 				initScanWork = 0
 			}
-			checkWork -= gcw.scanWork
-			gcw.scanWork = 0
+			checkWork -= gcw.heapScanWork
+			gcw.heapScanWork = 0
 
 			if checkWork <= 0 {
 				checkWork += drainCheckThreshold
@@ -871,12 +900,12 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) {
 
 done:
 	// Flush remaining scan work credit.
-	if gcw.scanWork > 0 {
-		atomic.Xaddint64(&gcController.scanWork, gcw.scanWork)
+	if gcw.heapScanWork > 0 {
+		gcController.heapScanWork.Add(gcw.heapScanWork)
 		if flushBgCredit {
-			gcFlushBgCredit(gcw.scanWork - initScanWork)
+			gcFlushBgCredit(gcw.heapScanWork - initScanWork)
 		}
-		gcw.scanWork = 0
+		gcw.heapScanWork = 0
 	}
 }
 
@@ -900,20 +929,15 @@ func gcDrainN(gcw *gcWork, scanWork int64) int64 {
 
 	// There may already be scan work on the gcw, which we don't
 	// want to claim was done by this call.
-	workFlushed := -gcw.scanWork
+	workFlushed := -gcw.heapScanWork
 
 	gp := getg().m.curg
-	for !gp.preempt && workFlushed+gcw.scanWork < scanWork {
+	for !gp.preempt && workFlushed+gcw.heapScanWork < scanWork {
 		// See gcDrain comment.
 		if work.full == 0 {
 			gcw.balance()
 		}
 
-		// This might be a good place to add prefetch code...
-		// if(wbuf.nobj > 4) {
-		//         PREFETCH(wbuf->obj[wbuf.nobj - 3];
-		//  }
-		//
 		b := gcw.tryGetFast()
 		if b == 0 {
 			b = gcw.tryGet()
@@ -927,26 +951,27 @@ func gcDrainN(gcw *gcWork, scanWork int64) int64 {
 
 		if b == 0 {
 			// Try to do a root job.
-			//
-			// TODO: Assists should get credit for this
-			// work.
 			if work.markrootNext < work.markrootJobs {
 				job := atomic.Xadd(&work.markrootNext, +1) - 1
 				if job < work.markrootJobs {
-					markroot(gcw, job)
+					work := markroot(gcw, job, false)
+					if goexperiment.PacerRedesign {
+						workFlushed += work
+					}
 					continue
 				}
 			}
 			// No heap or root jobs.
 			break
 		}
+
 		scanobject(b, gcw)
 
 		// Flush background scan work credit.
-		if gcw.scanWork >= gcCreditSlack {
-			atomic.Xaddint64(&gcController.scanWork, gcw.scanWork)
-			workFlushed += gcw.scanWork
-			gcw.scanWork = 0
+		if gcw.heapScanWork >= gcCreditSlack {
+			gcController.heapScanWork.Add(gcw.heapScanWork)
+			workFlushed += gcw.heapScanWork
+			gcw.heapScanWork = 0
 		}
 	}
 
@@ -954,14 +979,14 @@ func gcDrainN(gcw *gcWork, scanWork int64) int64 {
 	// here because this never flushes to bgScanCredit and
 	// gcw.dispose will flush any remaining work to scanWork.
 
-	return workFlushed + gcw.scanWork
+	return workFlushed + gcw.heapScanWork
 }
 
 // scanblock scans b as scanobject would, but using an explicit
 // pointer bitmap instead of the heap bitmap.
 //
 // This is used to scan non-heap roots, so it does not update
-// gcw.bytesMarked or gcw.scanWork.
+// gcw.bytesMarked or gcw.heapScanWork.
 //
 //go:nowritebarrier
 func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) {
@@ -973,9 +998,9 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) {
 
 	for i := uintptr(0); i < n; {
 		// Find bits for the next word.
-		bits := uint32(*addb(ptrmask, i/(sys.PtrSize*8)))
+		bits := uint32(*addb(ptrmask, i/(goarch.PtrSize*8)))
 		if bits == 0 {
-			i += sys.PtrSize * 8
+			i += goarch.PtrSize * 8
 			continue
 		}
 		for j := 0; j < 8 && i < n; j++ {
@@ -989,7 +1014,7 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) {
 				}
 			}
 			bits >>= 1
-			i += sys.PtrSize
+			i += goarch.PtrSize
 		}
 	}
 }
@@ -1001,6 +1026,12 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) {
 //
 //go:nowritebarrier
 func scanobject(b uintptr, gcw *gcWork) {
+	// Prefetch object before we scan it.
+	//
+	// This will overlap fetching the beginning of the object with initial
+	// setup before we start scanning the object.
+	sys.Prefetch(b)
+
 	// Find the bits for b and the size of the object at b.
 	//
 	// b is either the beginning of an object, in which case this
@@ -1050,7 +1081,7 @@ func scanobject(b uintptr, gcw *gcWork) {
 	}
 
 	var i uintptr
-	for i = 0; i < n; i, hbits = i+sys.PtrSize, hbits.next() {
+	for i = 0; i < n; i, hbits = i+goarch.PtrSize, hbits.next() {
 		// Load bits once. See CL 22712 and issue 16973 for discussion.
 		bits := hbits.bits()
 		if bits&bitScan == 0 {
@@ -1082,7 +1113,7 @@ func scanobject(b uintptr, gcw *gcWork) {
 		}
 	}
 	gcw.bytesMarked += uint64(n)
-	gcw.scanWork += int64(i)
+	gcw.heapScanWork += int64(i)
 }
 
 //go:linkname scanstackblock
@@ -1096,7 +1127,7 @@ func scanstackblock(b, n uintptr, gcw *gcWork) {
 		throw("scanstackblock: conservative scan but stack map is used")
 	}
 
-	for i := uintptr(0); i < n; i += sys.PtrSize {
+	for i := uintptr(0); i < n; i += goarch.PtrSize {
 		// Same work as in scanobject; see comments there.
 		obj := *(*uintptr)(unsafe.Pointer(b + i))
 		if obj, span, objIndex := findObject(obj, b, i, true); obj != 0 {
@@ -1118,9 +1149,9 @@ func scanstackblockwithmap(pc, b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) {
 
 	for i := uintptr(0); i < n; {
 		// Find bits for the next word.
-		bits := uint32(*addb(ptrmask, i/(sys.PtrSize*8)))
+		bits := uint32(*addb(ptrmask, i/(goarch.PtrSize*8)))
 		if bits == 0 {
-			i += sys.PtrSize * 8
+			i += goarch.PtrSize * 8
 			continue
 		}
 		for j := 0; j < 8 && i < n; j++ {
@@ -1144,7 +1175,7 @@ func scanstackblockwithmap(pc, b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) {
 				}
 			}
 			bits >>= 1
-			i += sys.PtrSize
+			i += goarch.PtrSize
 		}
 	}
 }
@@ -1169,7 +1200,7 @@ func shade(b uintptr) {
 //go:nowritebarrierrec
 func greyobject(obj, base, off uintptr, span *mspan, gcw *gcWork, objIndex uintptr, forStack bool) {
 	// obj should be start of allocation, and so must be at least pointer-aligned.
-	if obj&(sys.PtrSize-1) != 0 {
+	if obj&(goarch.PtrSize-1) != 0 {
 		throw("greyobject: obj not pointer-aligned")
 	}
 	mbits := span.markBitsForIndex(objIndex)
@@ -1215,12 +1246,12 @@ func greyobject(obj, base, off uintptr, span *mspan, gcw *gcWork, objIndex uintp
 		}
 	}
 
-	// Queue the obj for scanning. The PREFETCH(obj) logic has been removed but
-	// seems like a nice optimization that can be added back in.
-	// There needs to be time between the PREFETCH and the use.
-	// Previously we put the obj in an 8 element buffer that is drained at a rate
-	// to give the PREFETCH time to do its work.
-	// Use of PREFETCHNTA might be more appropriate than PREFETCH
+	// We're adding obj to P's local workbuf, so it's likely
+	// this object will be processed soon by the same P.
+	// Even if the workbuf gets flushed, there will likely still be
+	// some benefit on platforms with inclusive shared caches.
+	sys.Prefetch(obj)
+	// Queue the obj for scanning.
 	if !gcw.putFast(obj) {
 		gcw.put(obj)
 	}
@@ -1248,13 +1279,13 @@ func gcDumpObject(label string, obj, off uintptr) {
 		// We're printing something from a stack frame. We
 		// don't know how big it is, so just show up to an
 		// including off.
-		size = off + sys.PtrSize
+		size = off + goarch.PtrSize
 	}
-	for i := uintptr(0); i < size; i += sys.PtrSize {
+	for i := uintptr(0); i < size; i += goarch.PtrSize {
 		// For big objects, just print the beginning (because
 		// that usually hints at the object's type) and the
 		// fields around off.
-		if !(i < 128*sys.PtrSize || off-16*sys.PtrSize < i && i < off+16*sys.PtrSize) {
+		if !(i < 128*goarch.PtrSize || off-16*goarch.PtrSize < i && i < off+16*goarch.PtrSize) {
 			skipped = true
 			continue
 		}
@@ -1297,7 +1328,19 @@ func gcmarknewobject(span *mspan, obj, size, scanSize uintptr) {
 
 	gcw := &getg().m.p.ptr().gcw
 	gcw.bytesMarked += uint64(size)
-	gcw.scanWork += int64(scanSize)
+	if !goexperiment.PacerRedesign {
+		// The old pacer counts newly allocated memory toward
+		// heapScanWork because heapScan is continuously updated
+		// throughout the GC cycle with newly allocated memory. However,
+		// these objects are never actually scanned, so we need
+		// to account for them in heapScanWork here, "faking" their work.
+		// Otherwise the pacer will think it's always behind, potentially
+		// by a large margin.
+		//
+		// The new pacer doesn't care about this because it ceases to updated
+		// heapScan once a GC cycle starts, effectively snapshotting it.
+		gcw.heapScanWork += int64(scanSize)
+	}
 }
 
 // gcMarkTinyAllocs greys all active tiny alloc blocks.
diff --git a/libgo/go/runtime/mgcpacer.go b/libgo/go/runtime/mgcpacer.go
index 8a66920..5e940b0 100644
--- a/libgo/go/runtime/mgcpacer.go
+++ b/libgo/go/runtime/mgcpacer.go
@@ -6,6 +6,7 @@ package runtime
 
 import (
 	"internal/cpu"
+	"internal/goexperiment"
 	"runtime/internal/atomic"
 	"unsafe"
 )
@@ -13,7 +14,8 @@ import (
 const (
 	// gcGoalUtilization is the goal CPU utilization for
 	// marking as a fraction of GOMAXPROCS.
-	gcGoalUtilization = 0.30
+	gcGoalUtilization = goexperiment.PacerRedesignInt*gcBackgroundUtilization +
+		(1-goexperiment.PacerRedesignInt)*(gcBackgroundUtilization+0.05)
 
 	// gcBackgroundUtilization is the fixed CPU utilization for background
 	// marking. It must be <= gcGoalUtilization. The difference between
@@ -26,10 +28,15 @@ const (
 	// better control CPU and heap growth. However, the larger the gap,
 	// the more mutator assists are expected to happen, which impact
 	// mutator latency.
+	//
+	// If goexperiment.PacerRedesign, the trigger feedback controller
+	// is replaced with an estimate of the mark/cons ratio that doesn't
+	// have the same saturation issues, so this is set equal to
+	// gcGoalUtilization.
 	gcBackgroundUtilization = 0.25
 
 	// gcCreditSlack is the amount of scan work credit that can
-	// accumulate locally before updating gcController.scanWork and,
+	// accumulate locally before updating gcController.heapScanWork and,
 	// optionally, gcController.bgScanCredit. Lower values give a more
 	// accurate assist ratio and make it more likely that assists will
 	// successfully steal background credit. Higher values reduce memory
@@ -46,7 +53,12 @@ const (
 	gcOverAssistWork = 64 << 10
 
 	// defaultHeapMinimum is the value of heapMinimum for GOGC==100.
-	defaultHeapMinimum = 4 << 20
+	defaultHeapMinimum = (goexperiment.HeapMinimum512KiBInt)*(512<<10) +
+		(1-goexperiment.HeapMinimum512KiBInt)*(4<<20)
+
+	// scannableStackSizeSlack is the bytes of stack space allocated or freed
+	// that can accumulate on a P before updating gcController.stackSize.
+	scannableStackSizeSlack = 8 << 10
 )
 
 func init() {
@@ -72,8 +84,9 @@ func init() {
 var gcController gcControllerState
 
 type gcControllerState struct {
-	// Initialized from $GOGC. GOGC=off means no GC.
-	gcPercent int32
+
+	// Initialized from GOGC. GOGC=off means no GC.
+	gcPercent atomic.Int32
 
 	_ uint32 // padding so following 64-bit values are 8-byte aligned
 
@@ -100,6 +113,8 @@ type gcControllerState struct {
 	// during mark termination for the next cycle's trigger.
 	//
 	// Protected by mheap_.lock or a STW.
+	//
+	// Used if !goexperiment.PacerRedesign.
 	triggerRatio float64
 
 	// trigger is the heap size that triggers marking.
@@ -114,6 +129,31 @@ type gcControllerState struct {
 	// Protected by mheap_.lock or a STW.
 	trigger uint64
 
+	// consMark is the estimated per-CPU consMark ratio for the application.
+	//
+	// It represents the ratio between the application's allocation
+	// rate, as bytes allocated per CPU-time, and the GC's scan rate,
+	// as bytes scanned per CPU-time.
+	// The units of this ratio are (B / cpu-ns) / (B / cpu-ns).
+	//
+	// At a high level, this value is computed as the bytes of memory
+	// allocated (cons) per unit of scan work completed (mark) in a GC
+	// cycle, divided by the CPU time spent on each activity.
+	//
+	// Updated at the end of each GC cycle, in endCycle.
+	//
+	// For goexperiment.PacerRedesign.
+	consMark float64
+
+	// consMarkController holds the state for the mark-cons ratio
+	// estimation over time.
+	//
+	// Its purpose is to smooth out noisiness in the computation of
+	// consMark; see consMark for details.
+	//
+	// For goexperiment.PacerRedesign.
+	consMarkController piController
+
 	// heapGoal is the goal heapLive for when next GC ends.
 	// Set to ^uint64(0) if disabled.
 	//
@@ -156,28 +196,73 @@ type gcControllerState struct {
 	// is the live heap (as counted by heapLive), but omitting
 	// no-scan objects and no-scan tails of objects.
 	//
-	// Whenever this is updated, call this gcControllerState's
-	// revise() method.
+	// For !goexperiment.PacerRedesign: Whenever this is updated,
+	// call this gcControllerState's revise() method. It is read
+	// and written atomically or with the world stopped.
 	//
-	// Read and written atomically or with the world stopped.
+	// For goexperiment.PacerRedesign: This value is fixed at the
+	// start of a GC cycle, so during a GC cycle it is safe to
+	// read without atomics, and it represents the maximum scannable
+	// heap.
 	heapScan uint64
 
+	// lastHeapScan is the number of bytes of heap that were scanned
+	// last GC cycle. It is the same as heapMarked, but only
+	// includes the "scannable" parts of objects.
+	//
+	// Updated when the world is stopped.
+	lastHeapScan uint64
+
+	// stackScan is a snapshot of scannableStackSize taken at each GC
+	// STW pause and is used in pacing decisions.
+	//
+	// Updated only while the world is stopped.
+	stackScan uint64
+
+	// scannableStackSize is the amount of allocated goroutine stack space in
+	// use by goroutines.
+	//
+	// This number tracks allocated goroutine stack space rather than used
+	// goroutine stack space (i.e. what is actually scanned) because used
+	// goroutine stack space is much harder to measure cheaply. By using
+	// allocated space, we make an overestimate; this is OK, it's better
+	// to conservatively overcount than undercount.
+	//
+	// Read and updated atomically.
+	scannableStackSize uint64
+
+	// globalsScan is the total amount of global variable space
+	// that is scannable.
+	//
+	// Read and updated atomically.
+	globalsScan uint64
+
 	// heapMarked is the number of bytes marked by the previous
 	// GC. After mark termination, heapLive == heapMarked, but
 	// unlike heapLive, heapMarked does not change until the
 	// next mark termination.
 	heapMarked uint64
 
-	// scanWork is the total scan work performed this cycle. This
-	// is updated atomically during the cycle. Updates occur in
-	// bounded batches, since it is both written and read
-	// throughout the cycle. At the end of the cycle, this is how
+	// heapScanWork is the total heap scan work performed this cycle.
+	// stackScanWork is the total stack scan work performed this cycle.
+	// globalsScanWork is the total globals scan work performed this cycle.
+	//
+	// These are updated atomically during the cycle. Updates occur in
+	// bounded batches, since they are both written and read
+	// throughout the cycle. At the end of the cycle, heapScanWork is how
 	// much of the retained heap is scannable.
 	//
-	// Currently this is the bytes of heap scanned. For most uses,
-	// this is an opaque unit of work, but for estimation the
-	// definition is important.
-	scanWork int64
+	// Currently these are measured in bytes. For most uses, this is an
+	// opaque unit of work, but for estimation the definition is important.
+	//
+	// Note that stackScanWork includes all allocated space, not just the
+	// size of the stack itself, mirroring stackSize.
+	//
+	// For !goexperiment.PacerRedesign, stackScanWork and globalsScanWork
+	// are always zero.
+	heapScanWork    atomic.Int64
+	stackScanWork   atomic.Int64
+	globalsScanWork atomic.Int64
 
 	// bgScanCredit is the scan work credit accumulated by the
 	// concurrent background scan. This credit is accumulated by
@@ -222,24 +307,14 @@ type gcControllerState struct {
 	// bytes that should be performed by mutator assists. This is
 	// computed at the beginning of each cycle and updated every
 	// time heapScan is updated.
-	//
-	// Stored as a uint64, but it's actually a float64. Use
-	// float64frombits to get the value.
-	//
-	// Read and written atomically.
-	assistWorkPerByte uint64
+	assistWorkPerByte atomic.Float64
 
 	// assistBytesPerWork is 1/assistWorkPerByte.
 	//
-	// Stored as a uint64, but it's actually a float64. Use
-	// float64frombits to get the value.
-	//
-	// Read and written atomically.
-	//
 	// Note that because this is read and written independently
 	// from assistWorkPerByte users may notice a skew between
 	// the two values, and such a state should be safe.
-	assistBytesPerWork uint64
+	assistBytesPerWork atomic.Float64
 
 	// fractionalUtilizationGoal is the fraction of wall clock
 	// time that should be spent in the fractional mark worker on
@@ -253,19 +328,45 @@ type gcControllerState struct {
 	// If this is zero, no fractional workers are needed.
 	fractionalUtilizationGoal float64
 
+	// test indicates that this is a test-only copy of gcControllerState.
+	test bool
+
 	_ cpu.CacheLinePad
 }
 
 func (c *gcControllerState) init(gcPercent int32) {
 	c.heapMinimum = defaultHeapMinimum
 
-	// Set a reasonable initial GC trigger.
-	c.triggerRatio = 7 / 8.0
+	if goexperiment.PacerRedesign {
+		c.consMarkController = piController{
+			// Tuned first via the Ziegler-Nichols process in simulation,
+			// then the integral time was manually tuned against real-world
+			// applications to deal with noisiness in the measured cons/mark
+			// ratio.
+			kp: 0.9,
+			ti: 4.0,
+
+			// Set a high reset time in GC cycles.
+			// This is inversely proportional to the rate at which we
+			// accumulate error from clipping. By making this very high
+			// we make the accumulation slow. In general, clipping is
+			// OK in our situation, hence the choice.
+			//
+			// Tune this if we get unintended effects from clipping for
+			// a long time.
+			tt:  1000,
+			min: -1000,
+			max: 1000,
+		}
+	} else {
+		// Set a reasonable initial GC trigger.
+		c.triggerRatio = 7 / 8.0
 
-	// Fake a heapMarked value so it looks like a trigger at
-	// heapMinimum is the appropriate growth from heapMarked.
-	// This will go into computing the initial GC goal.
-	c.heapMarked = uint64(float64(c.heapMinimum) / (1 + c.triggerRatio))
+		// Fake a heapMarked value so it looks like a trigger at
+		// heapMinimum is the appropriate growth from heapMarked.
+		// This will go into computing the initial GC goal.
+		c.heapMarked = uint64(float64(c.heapMinimum) / (1 + c.triggerRatio))
+	}
 
 	// This will also compute and set the GC trigger and goal.
 	c.setGCPercent(gcPercent)
@@ -274,13 +375,17 @@ func (c *gcControllerState) init(gcPercent int32) {
 // startCycle resets the GC controller's state and computes estimates
 // for a new GC cycle. The caller must hold worldsema and the world
 // must be stopped.
-func (c *gcControllerState) startCycle() {
-	c.scanWork = 0
+func (c *gcControllerState) startCycle(markStartTime int64, procs int) {
+	c.heapScanWork.Store(0)
+	c.stackScanWork.Store(0)
+	c.globalsScanWork.Store(0)
 	c.bgScanCredit = 0
 	c.assistTime = 0
 	c.dedicatedMarkTime = 0
 	c.fractionalMarkTime = 0
 	c.idleMarkTime = 0
+	c.markStartTime = markStartTime
+	c.stackScan = atomic.Load64(&c.scannableStackSize)
 
 	// Ensure that the heap goal is at least a little larger than
 	// the current live heap size. This may not be the case if GC
@@ -289,8 +394,14 @@ func (c *gcControllerState) startCycle() {
 	// GOGC. Assist is proportional to this distance, so enforce a
 	// minimum distance, even if it means going over the GOGC goal
 	// by a tiny bit.
-	if c.heapGoal < c.heapLive+1024*1024 {
-		c.heapGoal = c.heapLive + 1024*1024
+	if goexperiment.PacerRedesign {
+		if c.heapGoal < c.heapLive+64<<10 {
+			c.heapGoal = c.heapLive + 64<<10
+		}
+	} else {
+		if c.heapGoal < c.heapLive+1<<20 {
+			c.heapGoal = c.heapLive + 1<<20
+		}
 	}
 
 	// Compute the background mark utilization goal. In general,
@@ -298,7 +409,7 @@ func (c *gcControllerState) startCycle() {
 	// dedicated workers so that the utilization is closest to
 	// 25%. For small GOMAXPROCS, this would introduce too much
 	// error, so we add fractional workers in that case.
-	totalUtilizationGoal := float64(gomaxprocs) * gcBackgroundUtilization
+	totalUtilizationGoal := float64(procs) * gcBackgroundUtilization
 	c.dedicatedMarkWorkersNeeded = int64(totalUtilizationGoal + 0.5)
 	utilError := float64(c.dedicatedMarkWorkersNeeded)/totalUtilizationGoal - 1
 	const maxUtilError = 0.3
@@ -311,14 +422,14 @@ func (c *gcControllerState) startCycle() {
 			// Too many dedicated workers.
 			c.dedicatedMarkWorkersNeeded--
 		}
-		c.fractionalUtilizationGoal = (totalUtilizationGoal - float64(c.dedicatedMarkWorkersNeeded)) / float64(gomaxprocs)
+		c.fractionalUtilizationGoal = (totalUtilizationGoal - float64(c.dedicatedMarkWorkersNeeded)) / float64(procs)
 	} else {
 		c.fractionalUtilizationGoal = 0
 	}
 
 	// In STW mode, we just want dedicated workers.
 	if debug.gcstoptheworld > 0 {
-		c.dedicatedMarkWorkersNeeded = int64(gomaxprocs)
+		c.dedicatedMarkWorkersNeeded = int64(procs)
 		c.fractionalUtilizationGoal = 0
 	}
 
@@ -333,7 +444,7 @@ func (c *gcControllerState) startCycle() {
 	c.revise()
 
 	if debug.gcpacertrace > 0 {
-		assistRatio := float64frombits(atomic.Load64(&c.assistWorkPerByte))
+		assistRatio := c.assistWorkPerByte.Load()
 		print("pacer: assist ratio=", assistRatio,
 			" (scan ", gcController.heapScan>>20, " MB in ",
 			work.initialHeapLive>>20, "->",
@@ -365,7 +476,7 @@ func (c *gcControllerState) startCycle() {
 // is when assists are enabled and the necessary statistics are
 // available).
 func (c *gcControllerState) revise() {
-	gcPercent := c.gcPercent
+	gcPercent := c.gcPercent.Load()
 	if gcPercent < 0 {
 		// If GC is disabled but we're running a forced GC,
 		// act like GOGC is huge for the below calculations.
@@ -373,32 +484,80 @@ func (c *gcControllerState) revise() {
 	}
 	live := atomic.Load64(&c.heapLive)
 	scan := atomic.Load64(&c.heapScan)
-	work := atomic.Loadint64(&c.scanWork)
+	work := c.heapScanWork.Load() + c.stackScanWork.Load() + c.globalsScanWork.Load()
 
 	// Assume we're under the soft goal. Pace GC to complete at
 	// heapGoal assuming the heap is in steady-state.
 	heapGoal := int64(atomic.Load64(&c.heapGoal))
 
-	// Compute the expected scan work remaining.
-	//
-	// This is estimated based on the expected
-	// steady-state scannable heap. For example, with
-	// GOGC=100, only half of the scannable heap is
-	// expected to be live, so that's what we target.
-	//
-	// (This is a float calculation to avoid overflowing on
-	// 100*heapScan.)
-	scanWorkExpected := int64(float64(scan) * 100 / float64(100+gcPercent))
-
-	if int64(live) > heapGoal || work > scanWorkExpected {
-		// We're past the soft goal, or we've already done more scan
-		// work than we expected. Pace GC so that in the worst case it
-		// will complete by the hard goal.
-		const maxOvershoot = 1.1
-		heapGoal = int64(float64(heapGoal) * maxOvershoot)
-
-		// Compute the upper bound on the scan work remaining.
-		scanWorkExpected = int64(scan)
+	var scanWorkExpected int64
+	if goexperiment.PacerRedesign {
+		// The expected scan work is computed as the amount of bytes scanned last
+		// GC cycle, plus our estimate of stacks and globals work for this cycle.
+		scanWorkExpected = int64(c.lastHeapScan + c.stackScan + c.globalsScan)
+
+		// maxScanWork is a worst-case estimate of the amount of scan work that
+		// needs to be performed in this GC cycle. Specifically, it represents
+		// the case where *all* scannable memory turns out to be live.
+		maxScanWork := int64(scan + c.stackScan + c.globalsScan)
+		if work > scanWorkExpected {
+			// We've already done more scan work than expected. Because our expectation
+			// is based on a steady-state scannable heap size, we assume this means our
+			// heap is growing. Compute a new heap goal that takes our existing runway
+			// computed for scanWorkExpected and extrapolates it to maxScanWork, the worst-case
+			// scan work. This keeps our assist ratio stable if the heap continues to grow.
+			//
+			// The effect of this mechanism is that assists stay flat in the face of heap
+			// growths. It's OK to use more memory this cycle to scan all the live heap,
+			// because the next GC cycle is inevitably going to use *at least* that much
+			// memory anyway.
+			extHeapGoal := int64(float64(heapGoal-int64(c.trigger))/float64(scanWorkExpected)*float64(maxScanWork)) + int64(c.trigger)
+			scanWorkExpected = maxScanWork
+
+			// hardGoal is a hard limit on the amount that we're willing to push back the
+			// heap goal, and that's twice the heap goal (i.e. if GOGC=100 and the heap and/or
+			// stacks and/or globals grow to twice their size, this limits the current GC cycle's
+			// growth to 4x the original live heap's size).
+			//
+			// This maintains the invariant that we use no more memory than the next GC cycle
+			// will anyway.
+			hardGoal := int64((1.0 + float64(gcPercent)/100.0) * float64(heapGoal))
+			if extHeapGoal > hardGoal {
+				extHeapGoal = hardGoal
+			}
+			heapGoal = extHeapGoal
+		}
+		if int64(live) > heapGoal {
+			// We're already past our heap goal, even the extrapolated one.
+			// Leave ourselves some extra runway, so in the worst case we
+			// finish by that point.
+			const maxOvershoot = 1.1
+			heapGoal = int64(float64(heapGoal) * maxOvershoot)
+
+			// Compute the upper bound on the scan work remaining.
+			scanWorkExpected = maxScanWork
+		}
+	} else {
+		// Compute the expected scan work remaining.
+		//
+		// This is estimated based on the expected
+		// steady-state scannable heap. For example, with
+		// GOGC=100, only half of the scannable heap is
+		// expected to be live, so that's what we target.
+		//
+		// (This is a float calculation to avoid overflowing on
+		// 100*heapScan.)
+		scanWorkExpected = int64(float64(scan) * 100 / float64(100+gcPercent))
+		if int64(live) > heapGoal || work > scanWorkExpected {
+			// We're past the soft goal, or we've already done more scan
+			// work than we expected. Pace GC so that in the worst case it
+			// will complete by the hard goal.
+			const maxOvershoot = 1.1
+			heapGoal = int64(float64(heapGoal) * maxOvershoot)
+
+			// Compute the upper bound on the scan work remaining.
+			scanWorkExpected = int64(scan)
+		}
 	}
 
 	// Compute the remaining scan work estimate.
@@ -439,14 +598,97 @@ func (c *gcControllerState) revise() {
 	// cycle.
 	assistWorkPerByte := float64(scanWorkRemaining) / float64(heapRemaining)
 	assistBytesPerWork := float64(heapRemaining) / float64(scanWorkRemaining)
-	atomic.Store64(&c.assistWorkPerByte, float64bits(assistWorkPerByte))
-	atomic.Store64(&c.assistBytesPerWork, float64bits(assistBytesPerWork))
+	c.assistWorkPerByte.Store(assistWorkPerByte)
+	c.assistBytesPerWork.Store(assistBytesPerWork)
 }
 
-// endCycle computes the trigger ratio for the next cycle.
+// endCycle computes the trigger ratio (!goexperiment.PacerRedesign)
+// or the consMark estimate (goexperiment.PacerRedesign) for the next cycle.
+// Returns the trigger ratio if application, or 0 (goexperiment.PacerRedesign).
 // userForced indicates whether the current GC cycle was forced
 // by the application.
-func (c *gcControllerState) endCycle(userForced bool) float64 {
+func (c *gcControllerState) endCycle(now int64, procs int, userForced bool) float64 {
+	// Record last heap goal for the scavenger.
+	// We'll be updating the heap goal soon.
+	gcController.lastHeapGoal = gcController.heapGoal
+
+	// Compute the duration of time for which assists were turned on.
+	assistDuration := now - c.markStartTime
+
+	// Assume background mark hit its utilization goal.
+	utilization := gcBackgroundUtilization
+	// Add assist utilization; avoid divide by zero.
+	if assistDuration > 0 {
+		utilization += float64(c.assistTime) / float64(assistDuration*int64(procs))
+	}
+
+	if goexperiment.PacerRedesign {
+		if c.heapLive <= c.trigger {
+			// Shouldn't happen, but let's be very safe about this in case the
+			// GC is somehow extremely short.
+			//
+			// In this case though, the only reasonable value for c.heapLive-c.trigger
+			// would be 0, which isn't really all that useful, i.e. the GC was so short
+			// that it didn't matter.
+			//
+			// Ignore this case and don't update anything.
+			return 0
+		}
+		idleUtilization := 0.0
+		if assistDuration > 0 {
+			idleUtilization = float64(c.idleMarkTime) / float64(assistDuration*int64(procs))
+		}
+		// Determine the cons/mark ratio.
+		//
+		// The units we want for the numerator and denominator are both B / cpu-ns.
+		// We get this by taking the bytes allocated or scanned, and divide by the amount of
+		// CPU time it took for those operations. For allocations, that CPU time is
+		//
+		//    assistDuration * procs * (1 - utilization)
+		//
+		// Where utilization includes just background GC workers and assists. It does *not*
+		// include idle GC work time, because in theory the mutator is free to take that at
+		// any point.
+		//
+		// For scanning, that CPU time is
+		//
+		//    assistDuration * procs * (utilization + idleUtilization)
+		//
+		// In this case, we *include* idle utilization, because that is additional CPU time that the
+		// the GC had available to it.
+		//
+		// In effect, idle GC time is sort of double-counted here, but it's very weird compared
+		// to other kinds of GC work, because of how fluid it is. Namely, because the mutator is
+		// *always* free to take it.
+		//
+		// So this calculation is really:
+		//     (heapLive-trigger) / (assistDuration * procs * (1-utilization)) /
+		//         (scanWork) / (assistDuration * procs * (utilization+idleUtilization)
+		//
+		// Note that because we only care about the ratio, assistDuration and procs cancel out.
+		scanWork := c.heapScanWork.Load() + c.stackScanWork.Load() + c.globalsScanWork.Load()
+		currentConsMark := (float64(c.heapLive-c.trigger) * (utilization + idleUtilization)) /
+			(float64(scanWork) * (1 - utilization))
+
+		// Update cons/mark controller.
+		// Period for this is 1 GC cycle.
+		oldConsMark := c.consMark
+		c.consMark = c.consMarkController.next(c.consMark, currentConsMark, 1.0)
+
+		if debug.gcpacertrace > 0 {
+			printlock()
+			goal := gcGoalUtilization * 100
+			print("pacer: ", int(utilization*100), "% CPU (", int(goal), " exp.) for ")
+			print(c.heapScanWork.Load(), "+", c.stackScanWork.Load(), "+", c.globalsScanWork.Load(), " B work (", c.lastHeapScan+c.stackScan+c.globalsScan, " B exp.) ")
+			print("in ", c.trigger, " B -> ", c.heapLive, " B (∆goal ", int64(c.heapLive)-int64(c.heapGoal), ", cons/mark ", oldConsMark, ")")
+			println()
+			printunlock()
+		}
+		return 0
+	}
+
+	// !goexperiment.PacerRedesign below.
+
 	if userForced {
 		// Forced GC means this cycle didn't start at the
 		// trigger, so where it finished isn't good
@@ -473,15 +715,6 @@ func (c *gcControllerState) endCycle(userForced bool) float64 {
 	// heap growth is the error.
 	goalGrowthRatio := c.effectiveGrowthRatio()
 	actualGrowthRatio := float64(c.heapLive)/float64(c.heapMarked) - 1
-	assistDuration := nanotime() - c.markStartTime
-
-	// Assume background mark hit its utilization goal.
-	utilization := gcBackgroundUtilization
-	// Add assist utilization; avoid divide by zero.
-	if assistDuration > 0 {
-		utilization += float64(c.assistTime) / float64(assistDuration*int64(gomaxprocs))
-	}
-
 	triggerError := goalGrowthRatio - c.triggerRatio - utilization/gcGoalUtilization*(actualGrowthRatio-c.triggerRatio)
 
 	// Finally, we adjust the trigger for next time by this error,
@@ -500,7 +733,7 @@ func (c *gcControllerState) endCycle(userForced bool) float64 {
 		H_g := int64(float64(H_m_prev) * (1 + h_g))
 		u_a := utilization
 		u_g := gcGoalUtilization
-		W_a := c.scanWork
+		W_a := c.heapScanWork.Load()
 		print("pacer: H_m_prev=", H_m_prev,
 			" h_t=", h_t, " H_T=", H_T,
 			" h_a=", h_a, " H_a=", H_a,
@@ -636,9 +869,82 @@ func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g {
 	return gp
 }
 
-// commit sets the trigger ratio and updates everything
-// derived from it: the absolute trigger, the heap goal, mark pacing,
-// and sweep pacing.
+// resetLive sets up the controller state for the next mark phase after the end
+// of the previous one. Must be called after endCycle and before commit, before
+// the world is started.
+//
+// The world must be stopped.
+func (c *gcControllerState) resetLive(bytesMarked uint64) {
+	c.heapMarked = bytesMarked
+	c.heapLive = bytesMarked
+	c.heapScan = uint64(c.heapScanWork.Load())
+	c.lastHeapScan = uint64(c.heapScanWork.Load())
+
+	// heapLive was updated, so emit a trace event.
+	if trace.enabled {
+		traceHeapAlloc()
+	}
+}
+
+// logWorkTime updates mark work accounting in the controller by a duration of
+// work in nanoseconds.
+//
+// Safe to execute at any time.
+func (c *gcControllerState) logWorkTime(mode gcMarkWorkerMode, duration int64) {
+	switch mode {
+	case gcMarkWorkerDedicatedMode:
+		atomic.Xaddint64(&c.dedicatedMarkTime, duration)
+		atomic.Xaddint64(&c.dedicatedMarkWorkersNeeded, 1)
+	case gcMarkWorkerFractionalMode:
+		atomic.Xaddint64(&c.fractionalMarkTime, duration)
+	case gcMarkWorkerIdleMode:
+		atomic.Xaddint64(&c.idleMarkTime, duration)
+	default:
+		throw("logWorkTime: unknown mark worker mode")
+	}
+}
+
+func (c *gcControllerState) update(dHeapLive, dHeapScan int64) {
+	if dHeapLive != 0 {
+		atomic.Xadd64(&gcController.heapLive, dHeapLive)
+		if trace.enabled {
+			// gcController.heapLive changed.
+			traceHeapAlloc()
+		}
+	}
+	// Only update heapScan in the new pacer redesign if we're not
+	// currently in a GC.
+	if !goexperiment.PacerRedesign || gcBlackenEnabled == 0 {
+		if dHeapScan != 0 {
+			atomic.Xadd64(&gcController.heapScan, dHeapScan)
+		}
+	}
+	if gcBlackenEnabled != 0 {
+		// gcController.heapLive and heapScan changed.
+		c.revise()
+	}
+}
+
+func (c *gcControllerState) addScannableStack(pp *p, amount int64) {
+	if pp == nil {
+		atomic.Xadd64(&c.scannableStackSize, amount)
+		return
+	}
+	pp.scannableStackSizeDelta += amount
+	if pp.scannableStackSizeDelta >= scannableStackSizeSlack || pp.scannableStackSizeDelta <= -scannableStackSizeSlack {
+		atomic.Xadd64(&c.scannableStackSize, pp.scannableStackSizeDelta)
+		pp.scannableStackSizeDelta = 0
+	}
+}
+
+func (c *gcControllerState) addGlobals(amount int64) {
+	atomic.Xadd64(&c.globalsScan, amount)
+}
+
+// commit recomputes all pacing parameters from scratch, namely
+// absolute trigger, the heap goal, mark pacing, and sweep pacing.
+//
+// If goexperiment.PacerRedesign is true, triggerRatio is ignored.
 //
 // This can be called any time. If GC is the in the middle of a
 // concurrent phase, it will adjust the pacing of that phase.
@@ -648,19 +954,147 @@ func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g {
 //
 // mheap_.lock must be held or the world must be stopped.
 func (c *gcControllerState) commit(triggerRatio float64) {
-	assertWorldStoppedOrLockHeld(&mheap_.lock)
+	if !c.test {
+		assertWorldStoppedOrLockHeld(&mheap_.lock)
+	}
+
+	if !goexperiment.PacerRedesign {
+		c.oldCommit(triggerRatio)
+		return
+	}
+
+	// Compute the next GC goal, which is when the allocated heap
+	// has grown by GOGC/100 over where it started the last cycle,
+	// plus additional runway for non-heap sources of GC work.
+	goal := ^uint64(0)
+	if gcPercent := c.gcPercent.Load(); gcPercent >= 0 {
+		goal = c.heapMarked + (c.heapMarked+atomic.Load64(&c.stackScan)+atomic.Load64(&c.globalsScan))*uint64(gcPercent)/100
+	}
+
+	// Don't trigger below the minimum heap size.
+	minTrigger := c.heapMinimum
+	if !isSweepDone() {
+		// Concurrent sweep happens in the heap growth
+		// from gcController.heapLive to trigger, so ensure
+		// that concurrent sweep has some heap growth
+		// in which to perform sweeping before we
+		// start the next GC cycle.
+		sweepMin := atomic.Load64(&c.heapLive) + sweepMinHeapDistance
+		if sweepMin > minTrigger {
+			minTrigger = sweepMin
+		}
+	}
+
+	// If we let the trigger go too low, then if the application
+	// is allocating very rapidly we might end up in a situation
+	// where we're allocating black during a nearly always-on GC.
+	// The result of this is a growing heap and ultimately an
+	// increase in RSS. By capping us at a point >0, we're essentially
+	// saying that we're OK using more CPU during the GC to prevent
+	// this growth in RSS.
+	//
+	// The current constant was chosen empirically: given a sufficiently
+	// fast/scalable allocator with 48 Ps that could drive the trigger ratio
+	// to <0.05, this constant causes applications to retain the same peak
+	// RSS compared to not having this allocator.
+	if triggerBound := uint64(0.7*float64(goal-c.heapMarked)) + c.heapMarked; minTrigger < triggerBound {
+		minTrigger = triggerBound
+	}
+
+	// For small heaps, set the max trigger point at 95% of the heap goal.
+	// This ensures we always have *some* headroom when the GC actually starts.
+	// For larger heaps, set the max trigger point at the goal, minus the
+	// minimum heap size.
+	// This choice follows from the fact that the minimum heap size is chosen
+	// to reflect the costs of a GC with no work to do. With a large heap but
+	// very little scan work to perform, this gives us exactly as much runway
+	// as we would need, in the worst case.
+	maxRunway := uint64(0.95 * float64(goal-c.heapMarked))
+	if largeHeapMaxRunway := goal - c.heapMinimum; goal > c.heapMinimum && maxRunway < largeHeapMaxRunway {
+		maxRunway = largeHeapMaxRunway
+	}
+	maxTrigger := maxRunway + c.heapMarked
+	if maxTrigger < minTrigger {
+		maxTrigger = minTrigger
+	}
+
+	// Compute the trigger by using our estimate of the cons/mark ratio.
+	//
+	// The idea is to take our expected scan work, and multiply it by
+	// the cons/mark ratio to determine how long it'll take to complete
+	// that scan work in terms of bytes allocated. This gives us our GC's
+	// runway.
+	//
+	// However, the cons/mark ratio is a ratio of rates per CPU-second, but
+	// here we care about the relative rates for some division of CPU
+	// resources among the mutator and the GC.
+	//
+	// To summarize, we have B / cpu-ns, and we want B / ns. We get that
+	// by multiplying by our desired division of CPU resources. We choose
+	// to express CPU resources as GOMAPROCS*fraction. Note that because
+	// we're working with a ratio here, we can omit the number of CPU cores,
+	// because they'll appear in the numerator and denominator and cancel out.
+	// As a result, this is basically just "weighing" the cons/mark ratio by
+	// our desired division of resources.
+	//
+	// Furthermore, by setting the trigger so that CPU resources are divided
+	// this way, assuming that the cons/mark ratio is correct, we make that
+	// division a reality.
+	var trigger uint64
+	runway := uint64((c.consMark * (1 - gcGoalUtilization) / (gcGoalUtilization)) * float64(c.lastHeapScan+c.stackScan+c.globalsScan))
+	if runway > goal {
+		trigger = minTrigger
+	} else {
+		trigger = goal - runway
+	}
+	if trigger < minTrigger {
+		trigger = minTrigger
+	}
+	if trigger > maxTrigger {
+		trigger = maxTrigger
+	}
+	if trigger > goal {
+		goal = trigger
+	}
+
+	// Commit to the trigger and goal.
+	c.trigger = trigger
+	atomic.Store64(&c.heapGoal, goal)
+	if trace.enabled {
+		traceHeapGoal()
+	}
+
+	// Update mark pacing.
+	if gcphase != _GCoff {
+		c.revise()
+	}
+}
+
+// oldCommit sets the trigger ratio and updates everything
+// derived from it: the absolute trigger, the heap goal, mark pacing,
+// and sweep pacing.
+//
+// This can be called any time. If GC is the in the middle of a
+// concurrent phase, it will adjust the pacing of that phase.
+//
+// This depends on gcPercent, gcController.heapMarked, and
+// gcController.heapLive. These must be up to date.
+//
+// For !goexperiment.PacerRedesign.
+func (c *gcControllerState) oldCommit(triggerRatio float64) {
+	gcPercent := c.gcPercent.Load()
 
 	// Compute the next GC goal, which is when the allocated heap
 	// has grown by GOGC/100 over the heap marked by the last
 	// cycle.
 	goal := ^uint64(0)
-	if c.gcPercent >= 0 {
-		goal = c.heapMarked + c.heapMarked*uint64(c.gcPercent)/100
+	if gcPercent >= 0 {
+		goal = c.heapMarked + c.heapMarked*uint64(gcPercent)/100
 	}
 
 	// Set the trigger ratio, capped to reasonable bounds.
-	if c.gcPercent >= 0 {
-		scalingFactor := float64(c.gcPercent) / 100
+	if gcPercent >= 0 {
+		scalingFactor := float64(gcPercent) / 100
 		// Ensure there's always a little margin so that the
 		// mutator assist ratio isn't infinity.
 		maxTriggerRatio := 0.95 * scalingFactor
@@ -700,7 +1134,7 @@ func (c *gcControllerState) commit(triggerRatio float64) {
 	// We trigger the next GC cycle when the allocated heap has
 	// grown by the trigger ratio over the marked heap size.
 	trigger := ^uint64(0)
-	if c.gcPercent >= 0 {
+	if gcPercent >= 0 {
 		trigger = uint64(float64(c.heapMarked) * (1 + triggerRatio))
 		// Don't trigger below the minimum heap size.
 		minTrigger := c.heapMinimum
@@ -741,42 +1175,6 @@ func (c *gcControllerState) commit(triggerRatio float64) {
 	if gcphase != _GCoff {
 		c.revise()
 	}
-
-	// Update sweep pacing.
-	if isSweepDone() {
-		mheap_.sweepPagesPerByte = 0
-	} else {
-		// Concurrent sweep needs to sweep all of the in-use
-		// pages by the time the allocated heap reaches the GC
-		// trigger. Compute the ratio of in-use pages to sweep
-		// per byte allocated, accounting for the fact that
-		// some might already be swept.
-		heapLiveBasis := atomic.Load64(&c.heapLive)
-		heapDistance := int64(trigger) - int64(heapLiveBasis)
-		// Add a little margin so rounding errors and
-		// concurrent sweep are less likely to leave pages
-		// unswept when GC starts.
-		heapDistance -= 1024 * 1024
-		if heapDistance < _PageSize {
-			// Avoid setting the sweep ratio extremely high
-			heapDistance = _PageSize
-		}
-		pagesSwept := atomic.Load64(&mheap_.pagesSwept)
-		pagesInUse := atomic.Load64(&mheap_.pagesInUse)
-		sweepDistancePages := int64(pagesInUse) - int64(pagesSwept)
-		if sweepDistancePages <= 0 {
-			mheap_.sweepPagesPerByte = 0
-		} else {
-			mheap_.sweepPagesPerByte = float64(sweepDistancePages) / float64(heapDistance)
-			mheap_.sweepHeapLiveBasis = heapLiveBasis
-			// Write pagesSweptBasis last, since this
-			// signals concurrent sweeps to recompute
-			// their debt.
-			atomic.Store64(&mheap_.pagesSweptBasis, pagesSwept)
-		}
-	}
-
-	gcPaceScavenger()
 }
 
 // effectiveGrowthRatio returns the current effective heap growth
@@ -789,7 +1187,9 @@ func (c *gcControllerState) commit(triggerRatio float64) {
 //
 // mheap_.lock must be held or the world must be stopped.
 func (c *gcControllerState) effectiveGrowthRatio() float64 {
-	assertWorldStoppedOrLockHeld(&mheap_.lock)
+	if !c.test {
+		assertWorldStoppedOrLockHeld(&mheap_.lock)
+	}
 
 	egogc := float64(atomic.Load64(&c.heapGoal)-c.heapMarked) / float64(c.heapMarked)
 	if egogc < 0 {
@@ -802,16 +1202,20 @@ func (c *gcControllerState) effectiveGrowthRatio() float64 {
 // setGCPercent updates gcPercent and all related pacer state.
 // Returns the old value of gcPercent.
 //
+// Calls gcControllerState.commit.
+//
 // The world must be stopped, or mheap_.lock must be held.
 func (c *gcControllerState) setGCPercent(in int32) int32 {
-	assertWorldStoppedOrLockHeld(&mheap_.lock)
+	if !c.test {
+		assertWorldStoppedOrLockHeld(&mheap_.lock)
+	}
 
-	out := c.gcPercent
+	out := c.gcPercent.Load()
 	if in < 0 {
 		in = -1
 	}
-	c.gcPercent = in
-	c.heapMinimum = defaultHeapMinimum * uint64(c.gcPercent) / 100
+	c.heapMinimum = defaultHeapMinimum * uint64(in) / 100
+	c.gcPercent.Store(in)
 	// Update pacing in response to gcPercent change.
 	c.commit(c.triggerRatio)
 
@@ -824,6 +1228,8 @@ func setGCPercent(in int32) (out int32) {
 	systemstack(func() {
 		lock(&mheap_.lock)
 		out = gcController.setGCPercent(in)
+		gcPaceSweeper(gcController.trigger)
+		gcPaceScavenger(gcController.heapGoal, gcController.lastHeapGoal)
 		unlock(&mheap_.lock)
 	})
 
@@ -846,3 +1252,35 @@ func readGOGC() int32 {
 	}
 	return 100
 }
+
+type piController struct {
+	kp float64 // Proportional constant.
+	ti float64 // Integral time constant.
+	tt float64 // Reset time.
+
+	min, max float64 // Output boundaries.
+
+	// PI controller state.
+
+	errIntegral float64 // Integral of the error from t=0 to now.
+}
+
+func (c *piController) next(input, setpoint, period float64) float64 {
+	// Compute the raw output value.
+	prop := c.kp * (setpoint - input)
+	rawOutput := prop + c.errIntegral
+
+	// Clamp rawOutput into output.
+	output := rawOutput
+	if output < c.min {
+		output = c.min
+	} else if output > c.max {
+		output = c.max
+	}
+
+	// Update the controller's state.
+	if c.ti != 0 && c.tt != 0 {
+		c.errIntegral += (c.kp*period/c.ti)*(setpoint-input) + (period/c.tt)*(output-rawOutput)
+	}
+	return output
+}
diff --git a/libgo/go/runtime/mgcpacer_test.go b/libgo/go/runtime/mgcpacer_test.go
new file mode 100644
index 0000000..9ec0e51
--- /dev/null
+++ b/libgo/go/runtime/mgcpacer_test.go
@@ -0,0 +1,717 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+	"fmt"
+	"internal/goexperiment"
+	"math"
+	"math/rand"
+	. "runtime"
+	"testing"
+	"time"
+)
+
+func TestGcPacer(t *testing.T) {
+	t.Parallel()
+
+	const initialHeapBytes = 256 << 10
+	for _, e := range []*gcExecTest{
+		{
+			// The most basic test case: a steady-state heap.
+			// Growth to an O(MiB) heap, then constant heap size, alloc/scan rates.
+			name:          "Steady",
+			gcPercent:     100,
+			globalsBytes:  32 << 10,
+			nCores:        8,
+			allocRate:     constant(33.0),
+			scanRate:      constant(1024.0),
+			growthRate:    constant(2.0).sum(ramp(-1.0, 12)),
+			scannableFrac: constant(1.0),
+			stackBytes:    constant(8192),
+			length:        50,
+			checker: func(t *testing.T, c []gcCycleResult) {
+				n := len(c)
+				if n >= 25 {
+					if goexperiment.PacerRedesign {
+						// For the pacer redesign, assert something even stronger: at this alloc/scan rate,
+						// it should be extremely close to the goal utilization.
+						assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, GCGoalUtilization, 0.005)
+					}
+
+					// Make sure the pacer settles into a non-degenerate state in at least 25 GC cycles.
+					assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, c[n-2].gcUtilization, 0.005)
+					assertInRange(t, "goal ratio", c[n-1].goalRatio(), 0.95, 1.05)
+				}
+			},
+		},
+		{
+			// Same as the steady-state case, but lots of stacks to scan relative to the heap size.
+			name:          "SteadyBigStacks",
+			gcPercent:     100,
+			globalsBytes:  32 << 10,
+			nCores:        8,
+			allocRate:     constant(132.0),
+			scanRate:      constant(1024.0),
+			growthRate:    constant(2.0).sum(ramp(-1.0, 12)),
+			scannableFrac: constant(1.0),
+			stackBytes:    constant(2048).sum(ramp(128<<20, 8)),
+			length:        50,
+			checker: func(t *testing.T, c []gcCycleResult) {
+				// Check the same conditions as the steady-state case, except the old pacer can't
+				// really handle this well, so don't check the goal ratio for it.
+				n := len(c)
+				if n >= 25 {
+					if goexperiment.PacerRedesign {
+						// For the pacer redesign, assert something even stronger: at this alloc/scan rate,
+						// it should be extremely close to the goal utilization.
+						assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, GCGoalUtilization, 0.005)
+						assertInRange(t, "goal ratio", c[n-1].goalRatio(), 0.95, 1.05)
+					}
+
+					// Make sure the pacer settles into a non-degenerate state in at least 25 GC cycles.
+					assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, c[n-2].gcUtilization, 0.005)
+				}
+			},
+		},
+		{
+			// Same as the steady-state case, but lots of globals to scan relative to the heap size.
+			name:          "SteadyBigGlobals",
+			gcPercent:     100,
+			globalsBytes:  128 << 20,
+			nCores:        8,
+			allocRate:     constant(132.0),
+			scanRate:      constant(1024.0),
+			growthRate:    constant(2.0).sum(ramp(-1.0, 12)),
+			scannableFrac: constant(1.0),
+			stackBytes:    constant(8192),
+			length:        50,
+			checker: func(t *testing.T, c []gcCycleResult) {
+				// Check the same conditions as the steady-state case, except the old pacer can't
+				// really handle this well, so don't check the goal ratio for it.
+				n := len(c)
+				if n >= 25 {
+					if goexperiment.PacerRedesign {
+						// For the pacer redesign, assert something even stronger: at this alloc/scan rate,
+						// it should be extremely close to the goal utilization.
+						assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, GCGoalUtilization, 0.005)
+						assertInRange(t, "goal ratio", c[n-1].goalRatio(), 0.95, 1.05)
+					}
+
+					// Make sure the pacer settles into a non-degenerate state in at least 25 GC cycles.
+					assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, c[n-2].gcUtilization, 0.005)
+				}
+			},
+		},
+		{
+			// This tests the GC pacer's response to a small change in allocation rate.
+			name:          "StepAlloc",
+			gcPercent:     100,
+			globalsBytes:  32 << 10,
+			nCores:        8,
+			allocRate:     constant(33.0).sum(ramp(66.0, 1).delay(50)),
+			scanRate:      constant(1024.0),
+			growthRate:    constant(2.0).sum(ramp(-1.0, 12)),
+			scannableFrac: constant(1.0),
+			stackBytes:    constant(8192),
+			length:        100,
+			checker: func(t *testing.T, c []gcCycleResult) {
+				n := len(c)
+				if (n >= 25 && n < 50) || n >= 75 {
+					// Make sure the pacer settles into a non-degenerate state in at least 25 GC cycles
+					// and then is able to settle again after a significant jump in allocation rate.
+					assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, c[n-2].gcUtilization, 0.005)
+					assertInRange(t, "goal ratio", c[n-1].goalRatio(), 0.95, 1.05)
+				}
+			},
+		},
+		{
+			// This tests the GC pacer's response to a large change in allocation rate.
+			name:          "HeavyStepAlloc",
+			gcPercent:     100,
+			globalsBytes:  32 << 10,
+			nCores:        8,
+			allocRate:     constant(33).sum(ramp(330, 1).delay(50)),
+			scanRate:      constant(1024.0),
+			growthRate:    constant(2.0).sum(ramp(-1.0, 12)),
+			scannableFrac: constant(1.0),
+			stackBytes:    constant(8192),
+			length:        100,
+			checker: func(t *testing.T, c []gcCycleResult) {
+				n := len(c)
+				if (n >= 25 && n < 50) || n >= 75 {
+					// Make sure the pacer settles into a non-degenerate state in at least 25 GC cycles
+					// and then is able to settle again after a significant jump in allocation rate.
+					assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, c[n-2].gcUtilization, 0.005)
+					assertInRange(t, "goal ratio", c[n-1].goalRatio(), 0.95, 1.05)
+				}
+			},
+		},
+		{
+			// This tests the GC pacer's response to a change in the fraction of the scannable heap.
+			name:          "StepScannableFrac",
+			gcPercent:     100,
+			globalsBytes:  32 << 10,
+			nCores:        8,
+			allocRate:     constant(128.0),
+			scanRate:      constant(1024.0),
+			growthRate:    constant(2.0).sum(ramp(-1.0, 12)),
+			scannableFrac: constant(0.2).sum(unit(0.5).delay(50)),
+			stackBytes:    constant(8192),
+			length:        100,
+			checker: func(t *testing.T, c []gcCycleResult) {
+				n := len(c)
+				if (n >= 25 && n < 50) || n >= 75 {
+					// Make sure the pacer settles into a non-degenerate state in at least 25 GC cycles
+					// and then is able to settle again after a significant jump in allocation rate.
+					assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, c[n-2].gcUtilization, 0.005)
+					assertInRange(t, "goal ratio", c[n-1].goalRatio(), 0.95, 1.05)
+				}
+			},
+		},
+		{
+			// Tests the pacer for a high GOGC value with a large heap growth happening
+			// in the middle. The purpose of the large heap growth is to check if GC
+			// utilization ends up sensitive
+			name:          "HighGOGC",
+			gcPercent:     1500,
+			globalsBytes:  32 << 10,
+			nCores:        8,
+			allocRate:     random(7, 0x53).offset(165),
+			scanRate:      constant(1024.0),
+			growthRate:    constant(2.0).sum(ramp(-1.0, 12), random(0.01, 0x1), unit(14).delay(25)),
+			scannableFrac: constant(1.0),
+			stackBytes:    constant(8192),
+			length:        50,
+			checker: func(t *testing.T, c []gcCycleResult) {
+				n := len(c)
+				if goexperiment.PacerRedesign && n > 12 {
+					if n == 26 {
+						// In the 26th cycle there's a heap growth. Overshoot is expected to maintain
+						// a stable utilization, but we should *never* overshoot more than GOGC of
+						// the next cycle.
+						assertInRange(t, "goal ratio", c[n-1].goalRatio(), 0.90, 15)
+					} else {
+						// Give a wider goal range here. With such a high GOGC value we're going to be
+						// forced to undershoot.
+						//
+						// TODO(mknyszek): Instead of placing a 0.95 limit on the trigger, make the limit
+						// based on absolute bytes, that's based somewhat in how the minimum heap size
+						// is determined.
+						assertInRange(t, "goal ratio", c[n-1].goalRatio(), 0.90, 1.05)
+					}
+
+					// Ensure utilization remains stable despite a growth in live heap size
+					// at GC #25. This test fails prior to the GC pacer redesign.
+					//
+					// Because GOGC is so large, we should also be really close to the goal utilization.
+					assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, GCGoalUtilization, GCGoalUtilization+0.03)
+					assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, c[n-2].gcUtilization, 0.03)
+				}
+			},
+		},
+		{
+			// This test makes sure that in the face of a varying (in this case, oscillating) allocation
+			// rate, the pacer does a reasonably good job of staying abreast of the changes.
+			name:          "OscAlloc",
+			gcPercent:     100,
+			globalsBytes:  32 << 10,
+			nCores:        8,
+			allocRate:     oscillate(13, 0, 8).offset(67),
+			scanRate:      constant(1024.0),
+			growthRate:    constant(2.0).sum(ramp(-1.0, 12)),
+			scannableFrac: constant(1.0),
+			stackBytes:    constant(8192),
+			length:        50,
+			checker: func(t *testing.T, c []gcCycleResult) {
+				n := len(c)
+				if n > 12 {
+					// After the 12th GC, the heap will stop growing. Now, just make sure that:
+					// 1. Utilization isn't varying _too_ much, and
+					// 2. The pacer is mostly keeping up with the goal.
+					assertInRange(t, "goal ratio", c[n-1].goalRatio(), 0.95, 1.05)
+					if goexperiment.PacerRedesign {
+						assertInRange(t, "GC utilization", c[n-1].gcUtilization, 0.25, 0.3)
+					} else {
+						// The old pacer is messier here, and needs a lot more tolerance.
+						assertInRange(t, "GC utilization", c[n-1].gcUtilization, 0.25, 0.4)
+					}
+				}
+			},
+		},
+		{
+			// This test is the same as OscAlloc, but instead of oscillating, the allocation rate is jittery.
+			name:          "JitterAlloc",
+			gcPercent:     100,
+			globalsBytes:  32 << 10,
+			nCores:        8,
+			allocRate:     random(13, 0xf).offset(132),
+			scanRate:      constant(1024.0),
+			growthRate:    constant(2.0).sum(ramp(-1.0, 12), random(0.01, 0xe)),
+			scannableFrac: constant(1.0),
+			stackBytes:    constant(8192),
+			length:        50,
+			checker: func(t *testing.T, c []gcCycleResult) {
+				n := len(c)
+				if n > 12 {
+					// After the 12th GC, the heap will stop growing. Now, just make sure that:
+					// 1. Utilization isn't varying _too_ much, and
+					// 2. The pacer is mostly keeping up with the goal.
+					assertInRange(t, "goal ratio", c[n-1].goalRatio(), 0.95, 1.05)
+					if goexperiment.PacerRedesign {
+						assertInRange(t, "GC utilization", c[n-1].gcUtilization, 0.25, 0.3)
+					} else {
+						// The old pacer is messier here, and needs a lot more tolerance.
+						assertInRange(t, "GC utilization", c[n-1].gcUtilization, 0.25, 0.4)
+					}
+				}
+			},
+		},
+		{
+			// This test is the same as JitterAlloc, but with a much higher allocation rate.
+			// The jitter is proportionally the same.
+			name:          "HeavyJitterAlloc",
+			gcPercent:     100,
+			globalsBytes:  32 << 10,
+			nCores:        8,
+			allocRate:     random(33.0, 0x0).offset(330),
+			scanRate:      constant(1024.0),
+			growthRate:    constant(2.0).sum(ramp(-1.0, 12), random(0.01, 0x152)),
+			scannableFrac: constant(1.0),
+			stackBytes:    constant(8192),
+			length:        50,
+			checker: func(t *testing.T, c []gcCycleResult) {
+				n := len(c)
+				if n > 13 {
+					// After the 12th GC, the heap will stop growing. Now, just make sure that:
+					// 1. Utilization isn't varying _too_ much, and
+					// 2. The pacer is mostly keeping up with the goal.
+					// We start at the 13th here because we want to use the 12th as a reference.
+					assertInRange(t, "goal ratio", c[n-1].goalRatio(), 0.95, 1.05)
+					// Unlike the other tests, GC utilization here will vary more and tend higher.
+					// Just make sure it's not going too crazy.
+					assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, c[n-2].gcUtilization, 0.05)
+					if goexperiment.PacerRedesign {
+						assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, c[11].gcUtilization, 0.05)
+					} else {
+						// The old pacer is messier here, and needs a little more tolerance.
+						assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, c[11].gcUtilization, 0.07)
+					}
+				}
+			},
+		},
+		// TODO(mknyszek): Write a test that exercises the pacer's hard goal.
+		// This is difficult in the idealized model this testing framework places
+		// the pacer in, because the calculated overshoot is directly proportional
+		// to the runway for the case of the expected work.
+		// However, it is still possible to trigger this case if something exceptional
+		// happens between calls to revise; the framework just doesn't support this yet.
+	} {
+		e := e
+		t.Run(e.name, func(t *testing.T) {
+			t.Parallel()
+
+			c := NewGCController(e.gcPercent)
+			var bytesAllocatedBlackLast int64
+			results := make([]gcCycleResult, 0, e.length)
+			for i := 0; i < e.length; i++ {
+				cycle := e.next()
+				c.StartCycle(cycle.stackBytes, e.globalsBytes, cycle.scannableFrac, e.nCores)
+
+				// Update pacer incrementally as we complete scan work.
+				const (
+					revisePeriod = 500 * time.Microsecond
+					rateConv     = 1024 * float64(revisePeriod) / float64(time.Millisecond)
+				)
+				var nextHeapMarked int64
+				if i == 0 {
+					nextHeapMarked = initialHeapBytes
+				} else {
+					nextHeapMarked = int64(float64(int64(c.HeapMarked())-bytesAllocatedBlackLast) * cycle.growthRate)
+				}
+				globalsScanWorkLeft := int64(e.globalsBytes)
+				stackScanWorkLeft := int64(cycle.stackBytes)
+				heapScanWorkLeft := int64(float64(nextHeapMarked) * cycle.scannableFrac)
+				doWork := func(work int64) (int64, int64, int64) {
+					var deltas [3]int64
+
+					// Do globals work first, then stacks, then heap.
+					for i, workLeft := range []*int64{&globalsScanWorkLeft, &stackScanWorkLeft, &heapScanWorkLeft} {
+						if *workLeft == 0 {
+							continue
+						}
+						if *workLeft > work {
+							deltas[i] += work
+							*workLeft -= work
+							work = 0
+							break
+						} else {
+							deltas[i] += *workLeft
+							work -= *workLeft
+							*workLeft = 0
+						}
+					}
+					return deltas[0], deltas[1], deltas[2]
+				}
+				var (
+					gcDuration          int64
+					assistTime          int64
+					bytesAllocatedBlack int64
+				)
+				for heapScanWorkLeft+stackScanWorkLeft+globalsScanWorkLeft > 0 {
+					// Simulate GC assist pacing.
+					//
+					// Note that this is an idealized view of the GC assist pacing
+					// mechanism.
+
+					// From the assist ratio and the alloc and scan rates, we can idealize what
+					// the GC CPU utilization looks like.
+					//
+					// We start with assistRatio = (bytes of scan work) / (bytes of runway) (by definition).
+					//
+					// Over revisePeriod, we can also calculate how many bytes are scanned and
+					// allocated, given some GC CPU utilization u:
+					//
+					//     bytesScanned   = scanRate  * rateConv * nCores * u
+					//     bytesAllocated = allocRate * rateConv * nCores * (1 - u)
+					//
+					// During revisePeriod, assistRatio is kept constant, and GC assists kick in to
+					// maintain it. Specifically, they act to prevent too many bytes being allocated
+					// compared to how many bytes are scanned. It directly defines the ratio of
+					// bytesScanned to bytesAllocated over this period, hence:
+					//
+					//     assistRatio = bytesScanned / bytesAllocated
+					//
+					// From this, we can solve for utilization, because everything else has already
+					// been determined:
+					//
+					//     assistRatio = (scanRate * rateConv * nCores * u) / (allocRate * rateConv * nCores * (1 - u))
+					//     assistRatio = (scanRate * u) / (allocRate * (1 - u))
+					//     assistRatio * allocRate * (1-u) = scanRate * u
+					//     assistRatio * allocRate - assistRatio * allocRate * u = scanRate * u
+					//     assistRatio * allocRate = assistRatio * allocRate * u + scanRate * u
+					//     assistRatio * allocRate = (assistRatio * allocRate + scanRate) * u
+					//     u = (assistRatio * allocRate) / (assistRatio * allocRate + scanRate)
+					//
+					// Note that this may give a utilization that is _less_ than GCBackgroundUtilization,
+					// which isn't possible in practice because of dedicated workers. Thus, this case
+					// must be interpreted as GC assists not kicking in at all, and just round up. All
+					// downstream values will then have this accounted for.
+					assistRatio := c.AssistWorkPerByte()
+					utilization := assistRatio * cycle.allocRate / (assistRatio*cycle.allocRate + cycle.scanRate)
+					if utilization < GCBackgroundUtilization {
+						utilization = GCBackgroundUtilization
+					}
+
+					// Knowing the utilization, calculate bytesScanned and bytesAllocated.
+					bytesScanned := int64(cycle.scanRate * rateConv * float64(e.nCores) * utilization)
+					bytesAllocated := int64(cycle.allocRate * rateConv * float64(e.nCores) * (1 - utilization))
+
+					// Subtract work from our model.
+					globalsScanned, stackScanned, heapScanned := doWork(bytesScanned)
+
+					// doWork may not use all of bytesScanned.
+					// In this case, the GC actually ends sometime in this period.
+					// Let's figure out when, exactly, and adjust bytesAllocated too.
+					actualElapsed := revisePeriod
+					actualAllocated := bytesAllocated
+					if actualScanned := globalsScanned + stackScanned + heapScanned; actualScanned < bytesScanned {
+						// actualScanned = scanRate * rateConv * (t / revisePeriod) * nCores * u
+						// => t = actualScanned * revisePeriod / (scanRate * rateConv * nCores * u)
+						actualElapsed = time.Duration(float64(actualScanned) * float64(revisePeriod) / (cycle.scanRate * rateConv * float64(e.nCores) * utilization))
+						actualAllocated = int64(cycle.allocRate * rateConv * float64(actualElapsed) / float64(revisePeriod) * float64(e.nCores) * (1 - utilization))
+					}
+
+					// Ask the pacer to revise.
+					c.Revise(GCControllerReviseDelta{
+						HeapLive:        actualAllocated,
+						HeapScan:        int64(float64(actualAllocated) * cycle.scannableFrac),
+						HeapScanWork:    heapScanned,
+						StackScanWork:   stackScanned,
+						GlobalsScanWork: globalsScanned,
+					})
+
+					// Accumulate variables.
+					assistTime += int64(float64(actualElapsed) * float64(e.nCores) * (utilization - GCBackgroundUtilization))
+					gcDuration += int64(actualElapsed)
+					bytesAllocatedBlack += actualAllocated
+				}
+
+				// Put together the results, log them, and concatenate them.
+				result := gcCycleResult{
+					cycle:         i + 1,
+					heapLive:      c.HeapMarked(),
+					heapScannable: int64(float64(int64(c.HeapMarked())-bytesAllocatedBlackLast) * cycle.scannableFrac),
+					heapTrigger:   c.Trigger(),
+					heapPeak:      c.HeapLive(),
+					heapGoal:      c.HeapGoal(),
+					gcUtilization: float64(assistTime)/(float64(gcDuration)*float64(e.nCores)) + GCBackgroundUtilization,
+				}
+				t.Log("GC", result.String())
+				results = append(results, result)
+
+				// Run the checker for this test.
+				e.check(t, results)
+
+				c.EndCycle(uint64(nextHeapMarked+bytesAllocatedBlack), assistTime, gcDuration, e.nCores)
+
+				bytesAllocatedBlackLast = bytesAllocatedBlack
+			}
+		})
+	}
+}
+
+type gcExecTest struct {
+	name string
+
+	gcPercent    int
+	globalsBytes uint64
+	nCores       int
+
+	allocRate     float64Stream // > 0, KiB / cpu-ms
+	scanRate      float64Stream // > 0, KiB / cpu-ms
+	growthRate    float64Stream // > 0
+	scannableFrac float64Stream // Clamped to [0, 1]
+	stackBytes    float64Stream // Multiple of 2048.
+	length        int
+
+	checker func(*testing.T, []gcCycleResult)
+}
+
+// minRate is an arbitrary minimum for allocRate, scanRate, and growthRate.
+// These values just cannot be zero.
+const minRate = 0.0001
+
+func (e *gcExecTest) next() gcCycle {
+	return gcCycle{
+		allocRate:     e.allocRate.min(minRate)(),
+		scanRate:      e.scanRate.min(minRate)(),
+		growthRate:    e.growthRate.min(minRate)(),
+		scannableFrac: e.scannableFrac.limit(0, 1)(),
+		stackBytes:    uint64(e.stackBytes.quantize(2048).min(0)()),
+	}
+}
+
+func (e *gcExecTest) check(t *testing.T, results []gcCycleResult) {
+	t.Helper()
+
+	// Do some basic general checks first.
+	n := len(results)
+	switch n {
+	case 0:
+		t.Fatal("no results passed to check")
+		return
+	case 1:
+		if results[0].cycle != 1 {
+			t.Error("first cycle has incorrect number")
+		}
+	default:
+		if results[n-1].cycle != results[n-2].cycle+1 {
+			t.Error("cycle numbers out of order")
+		}
+	}
+	if u := results[n-1].gcUtilization; u < 0 || u > 1 {
+		t.Fatal("GC utilization not within acceptable bounds")
+	}
+	if s := results[n-1].heapScannable; s < 0 {
+		t.Fatal("heapScannable is negative")
+	}
+	if e.checker == nil {
+		t.Fatal("test-specific checker is missing")
+	}
+
+	// Run the test-specific checker.
+	e.checker(t, results)
+}
+
+type gcCycle struct {
+	allocRate     float64
+	scanRate      float64
+	growthRate    float64
+	scannableFrac float64
+	stackBytes    uint64
+}
+
+type gcCycleResult struct {
+	cycle int
+
+	// These come directly from the pacer, so uint64.
+	heapLive    uint64
+	heapTrigger uint64
+	heapGoal    uint64
+	heapPeak    uint64
+
+	// These are produced by the simulation, so int64 and
+	// float64 are more appropriate, so that we can check for
+	// bad states in the simulation.
+	heapScannable int64
+	gcUtilization float64
+}
+
+func (r *gcCycleResult) goalRatio() float64 {
+	return float64(r.heapPeak) / float64(r.heapGoal)
+}
+
+func (r *gcCycleResult) String() string {
+	return fmt.Sprintf("%d %2.1f%% %d->%d->%d (goal: %d)", r.cycle, r.gcUtilization*100, r.heapLive, r.heapTrigger, r.heapPeak, r.heapGoal)
+}
+
+func assertInEpsilon(t *testing.T, name string, a, b, epsilon float64) {
+	t.Helper()
+	assertInRange(t, name, a, b-epsilon, b+epsilon)
+}
+
+func assertInRange(t *testing.T, name string, a, min, max float64) {
+	t.Helper()
+	if a < min || a > max {
+		t.Errorf("%s not in range (%f, %f): %f", name, min, max, a)
+	}
+}
+
+// float64Stream is a function that generates an infinite stream of
+// float64 values when called repeatedly.
+type float64Stream func() float64
+
+// constant returns a stream that generates the value c.
+func constant(c float64) float64Stream {
+	return func() float64 {
+		return c
+	}
+}
+
+// unit returns a stream that generates a single peak with
+// amplitude amp, followed by zeroes.
+//
+// In another manner of speaking, this is the Kronecker delta.
+func unit(amp float64) float64Stream {
+	dropped := false
+	return func() float64 {
+		if dropped {
+			return 0
+		}
+		dropped = true
+		return amp
+	}
+}
+
+// oscillate returns a stream that oscillates sinusoidally
+// with the given amplitude, phase, and period.
+func oscillate(amp, phase float64, period int) float64Stream {
+	var cycle int
+	return func() float64 {
+		p := float64(cycle)/float64(period)*2*math.Pi + phase
+		cycle++
+		if cycle == period {
+			cycle = 0
+		}
+		return math.Sin(p) * amp
+	}
+}
+
+// ramp returns a stream that moves from zero to height
+// over the course of length steps.
+func ramp(height float64, length int) float64Stream {
+	var cycle int
+	return func() float64 {
+		h := height * float64(cycle) / float64(length)
+		if cycle < length {
+			cycle++
+		}
+		return h
+	}
+}
+
+// random returns a stream that generates random numbers
+// between -amp and amp.
+func random(amp float64, seed int64) float64Stream {
+	r := rand.New(rand.NewSource(seed))
+	return func() float64 {
+		return ((r.Float64() - 0.5) * 2) * amp
+	}
+}
+
+// delay returns a new stream which is a buffered version
+// of f: it returns zero for cycles steps, followed by f.
+func (f float64Stream) delay(cycles int) float64Stream {
+	zeroes := 0
+	return func() float64 {
+		if zeroes < cycles {
+			zeroes++
+			return 0
+		}
+		return f()
+	}
+}
+
+// scale returns a new stream that is f, but attenuated by a
+// constant factor.
+func (f float64Stream) scale(amt float64) float64Stream {
+	return func() float64 {
+		return f() * amt
+	}
+}
+
+// offset returns a new stream that is f but offset by amt
+// at each step.
+func (f float64Stream) offset(amt float64) float64Stream {
+	return func() float64 {
+		old := f()
+		return old + amt
+	}
+}
+
+// sum returns a new stream that is the sum of all input streams
+// at each step.
+func (f float64Stream) sum(fs ...float64Stream) float64Stream {
+	return func() float64 {
+		sum := f()
+		for _, s := range fs {
+			sum += s()
+		}
+		return sum
+	}
+}
+
+// quantize returns a new stream that rounds f to a multiple
+// of mult at each step.
+func (f float64Stream) quantize(mult float64) float64Stream {
+	return func() float64 {
+		r := f() / mult
+		if r < 0 {
+			return math.Ceil(r) * mult
+		}
+		return math.Floor(r) * mult
+	}
+}
+
+// min returns a new stream that replaces all values produced
+// by f lower than min with min.
+func (f float64Stream) min(min float64) float64Stream {
+	return func() float64 {
+		return math.Max(min, f())
+	}
+}
+
+// max returns a new stream that replaces all values produced
+// by f higher than max with max.
+func (f float64Stream) max(max float64) float64Stream {
+	return func() float64 {
+		return math.Min(max, f())
+	}
+}
+
+// limit returns a new stream that replaces all values produced
+// by f lower than min with min and higher than max with max.
+func (f float64Stream) limit(min, max float64) float64Stream {
+	return func() float64 {
+		v := f()
+		if v < min {
+			v = min
+		} else if v > max {
+			v = max
+		}
+		return v
+	}
+}
diff --git a/libgo/go/runtime/mgcscavenge.go b/libgo/go/runtime/mgcscavenge.go
index 3fa1c46..adf2b05 100644
--- a/libgo/go/runtime/mgcscavenge.go
+++ b/libgo/go/runtime/mgcscavenge.go
@@ -56,6 +56,7 @@
 package runtime
 
 import (
+	"internal/goos"
 	"runtime/internal/atomic"
 	"runtime/internal/sys"
 	"unsafe"
@@ -90,7 +91,7 @@ const (
 	//
 	// This ratio is used as part of multiplicative factor to help the scavenger account
 	// for the additional costs of using scavenged memory in its pacing.
-	scavengeCostRatio = 0.7 * (sys.GoosDarwin + sys.GoosIos)
+	scavengeCostRatio = 0.7 * (goos.IsDarwin + goos.IsIos)
 
 	// scavengeReservationShards determines the amount of memory the scavenger
 	// should reserve for scavenging at a time. Specifically, the amount of
@@ -104,7 +105,8 @@ func heapRetained() uint64 {
 }
 
 // gcPaceScavenger updates the scavenger's pacing, particularly
-// its rate and RSS goal.
+// its rate and RSS goal. For this, it requires the current heapGoal,
+// and the heapGoal for the previous GC cycle.
 //
 // The RSS goal is based on the current heap goal with a small overhead
 // to accommodate non-determinism in the allocator.
@@ -112,18 +114,22 @@ func heapRetained() uint64 {
 // The pacing is based on scavengePageRate, which applies to both regular and
 // huge pages. See that constant for more information.
 //
+// Must be called whenever GC pacing is updated.
+//
 // mheap_.lock must be held or the world must be stopped.
-func gcPaceScavenger() {
+func gcPaceScavenger(heapGoal, lastHeapGoal uint64) {
+	assertWorldStoppedOrLockHeld(&mheap_.lock)
+
 	// If we're called before the first GC completed, disable scavenging.
 	// We never scavenge before the 2nd GC cycle anyway (we don't have enough
 	// information about the heap yet) so this is fine, and avoids a fault
 	// or garbage data later.
-	if gcController.lastHeapGoal == 0 {
-		mheap_.scavengeGoal = ^uint64(0)
+	if lastHeapGoal == 0 {
+		atomic.Store64(&mheap_.scavengeGoal, ^uint64(0))
 		return
 	}
 	// Compute our scavenging goal.
-	goalRatio := float64(atomic.Load64(&gcController.heapGoal)) / float64(gcController.lastHeapGoal)
+	goalRatio := float64(heapGoal) / float64(lastHeapGoal)
 	retainedGoal := uint64(float64(memstats.last_heap_inuse) * goalRatio)
 	// Add retainExtraPercent overhead to retainedGoal. This calculation
 	// looks strange but the purpose is to arrive at an integer division
@@ -151,10 +157,10 @@ func gcPaceScavenger() {
 	// the background scavenger. We disable the background scavenger if there's
 	// less than one physical page of work to do because it's not worth it.
 	if retainedNow <= retainedGoal || retainedNow-retainedGoal < uint64(physPageSize) {
-		mheap_.scavengeGoal = ^uint64(0)
+		atomic.Store64(&mheap_.scavengeGoal, ^uint64(0))
 		return
 	}
-	mheap_.scavengeGoal = retainedGoal
+	atomic.Store64(&mheap_.scavengeGoal, retainedGoal)
 }
 
 // Sleep/wait state of the background scavenger.
@@ -249,7 +255,7 @@ func scavengeSleep(ns int64) int64 {
 // The background scavenger maintains the RSS of the application below
 // the line described by the proportional scavenging statistics in
 // the mheap struct.
-func bgscavenge() {
+func bgscavenge(c chan int) {
 	setSystemGoroutine()
 
 	scavenge.g = getg()
@@ -259,56 +265,93 @@ func bgscavenge() {
 	scavenge.parked = true
 
 	scavenge.timer = new(timer)
-	scavenge.timer.f = func(_ interface{}, _ uintptr) {
+	scavenge.timer.f = func(_ any, _ uintptr) {
 		wakeScavenger()
 	}
 
-	gcenable_setup <- 1
+	c <- 1
 	goparkunlock(&scavenge.lock, waitReasonGCScavengeWait, traceEvGoBlock, 1)
 
-	// Exponentially-weighted moving average of the fraction of time this
-	// goroutine spends scavenging (that is, percent of a single CPU).
-	// It represents a measure of scheduling overheads which might extend
-	// the sleep or the critical time beyond what's expected. Assume no
-	// overhead to begin with.
-	//
-	// TODO(mknyszek): Consider making this based on total CPU time of the
-	// application (i.e. scavengePercent * GOMAXPROCS). This isn't really
-	// feasible now because the scavenger acquires the heap lock over the
-	// scavenging operation, which means scavenging effectively blocks
-	// allocators and isn't scalable. However, given a scalable allocator,
-	// it makes sense to also make the scavenger scale with it; if you're
-	// allocating more frequently, then presumably you're also generating
-	// more work for the scavenger.
-	const idealFraction = scavengePercent / 100.0
-	scavengeEWMA := float64(idealFraction)
+	// idealFraction is the ideal % of overall application CPU time that we
+	// spend scavenging.
+	idealFraction := float64(scavengePercent) / 100.0
 
+	// Input: fraction of CPU time used.
+	// Setpoint: idealFraction.
+	// Output: ratio of critical time to sleep time (determines sleep time).
+	//
+	// The output of this controller is somewhat indirect to what we actually
+	// want to achieve: how much time to sleep for. The reason for this definition
+	// is to ensure that the controller's outputs have a direct relationship with
+	// its inputs (as opposed to an inverse relationship), making it somewhat
+	// easier to reason about for tuning purposes.
+	critSleepController := piController{
+		// Tuned loosely via Ziegler-Nichols process.
+		kp: 0.3375,
+		ti: 3.2e6,
+		tt: 1e9, // 1 second reset time.
+
+		// These ranges seem wide, but we want to give the controller plenty of
+		// room to hunt for the optimal value.
+		min: 0.001,  // 1:1000
+		max: 1000.0, // 1000:1
+	}
+	// It doesn't really matter what value we start at, but we can't be zero, because
+	// that'll cause divide-by-zero issues.
+	critSleepRatio := 0.001
 	for {
 		released := uintptr(0)
-
-		// Time in scavenging critical section.
 		crit := float64(0)
 
-		// Run on the system stack since we grab the heap lock,
-		// and a stack growth with the heap lock means a deadlock.
-		systemstack(func() {
-			lock(&mheap_.lock)
-
+		// Spend at least 1 ms scavenging, otherwise the corresponding
+		// sleep time to maintain our desired utilization is too low to
+		// be reliable.
+		const minCritTime = 1e6
+		for crit < minCritTime {
 			// If background scavenging is disabled or if there's no work to do just park.
-			retained, goal := heapRetained(), mheap_.scavengeGoal
+			retained, goal := heapRetained(), atomic.Load64(&mheap_.scavengeGoal)
 			if retained <= goal {
-				unlock(&mheap_.lock)
-				return
+				break
 			}
 
-			// Scavenge one page, and measure the amount of time spent scavenging.
+			// scavengeQuantum is the amount of memory we try to scavenge
+			// in one go. A smaller value means the scavenger is more responsive
+			// to the scheduler in case of e.g. preemption. A larger value means
+			// that the overheads of scavenging are better amortized, so better
+			// scavenging throughput.
+			//
+			// The current value is chosen assuming a cost of ~10µs/physical page
+			// (this is somewhat pessimistic), which implies a worst-case latency of
+			// about 160µs for 4 KiB physical pages. The current value is biased
+			// toward latency over throughput.
+			const scavengeQuantum = 64 << 10
+
+			// Accumulate the amount of time spent scavenging.
 			start := nanotime()
-			released = mheap_.pages.scavenge(physPageSize, true)
-			mheap_.pages.scav.released += released
-			crit = float64(nanotime() - start)
+			r := mheap_.pages.scavenge(scavengeQuantum)
+			atomic.Xadduintptr(&mheap_.pages.scav.released, r)
+			end := nanotime()
 
-			unlock(&mheap_.lock)
-		})
+			// On some platforms we may see end >= start if the time it takes to scavenge
+			// memory is less than the minimum granularity of its clock (e.g. Windows) or
+			// due to clock bugs.
+			//
+			// In this case, just assume scavenging takes 10 µs per regular physical page
+			// (determined empirically), and conservatively ignore the impact of huge pages
+			// on timing.
+			const approxCritNSPerPhysicalPage = 10e3
+			if end <= start {
+				crit += approxCritNSPerPhysicalPage * float64(r/physPageSize)
+			} else {
+				crit += float64(end - start)
+			}
+			released += r
+
+			// When using fake time just do one loop.
+			if faketime != 0 {
+				break
+			}
+		}
 
 		if released == 0 {
 			lock(&scavenge.lock)
@@ -325,18 +368,13 @@ func bgscavenge() {
 			throw("released less than one physical page of memory")
 		}
 
-		// On some platforms we may see crit as zero if the time it takes to scavenge
-		// memory is less than the minimum granularity of its clock (e.g. Windows).
-		// In this case, just assume scavenging takes 10 µs per regular physical page
-		// (determined empirically), and conservatively ignore the impact of huge pages
-		// on timing.
-		//
-		// We shouldn't ever see a crit value less than zero unless there's a bug of
-		// some kind, either on our side or in the platform we're running on, but be
-		// defensive in that case as well.
-		const approxCritNSPerPhysicalPage = 10e3
-		if crit <= 0 {
-			crit = approxCritNSPerPhysicalPage * float64(released/physPageSize)
+		if crit < minCritTime {
+			// This means there wasn't enough work to actually fill up minCritTime.
+			// That's fine; we shouldn't try to do anything with this information
+			// because it's going result in a short enough sleep request that things
+			// will get messy. Just assume we did at least this much work.
+			// All this means is that we'll sleep longer than we otherwise would have.
+			crit = minCritTime
 		}
 
 		// Multiply the critical time by 1 + the ratio of the costs of using
@@ -347,41 +385,19 @@ func bgscavenge() {
 		// because of the additional overheads of using scavenged memory.
 		crit *= 1 + scavengeCostRatio
 
-		// If we spent more than 10 ms (for example, if the OS scheduled us away, or someone
-		// put their machine to sleep) in the critical section, bound the time we use to
-		// calculate at 10 ms to avoid letting the sleep time get arbitrarily high.
-		const maxCrit = 10e6
-		if crit > maxCrit {
-			crit = maxCrit
-		}
+		// Go to sleep for our current sleepNS.
+		slept := scavengeSleep(int64(crit / critSleepRatio))
 
-		// Compute the amount of time to sleep, assuming we want to use at most
-		// scavengePercent of CPU time. Take into account scheduling overheads
-		// that may extend the length of our sleep by multiplying by how far
-		// off we are from the ideal ratio. For example, if we're sleeping too
-		// much, then scavengeEMWA < idealFraction, so we'll adjust the sleep time
-		// down.
-		adjust := scavengeEWMA / idealFraction
-		sleepTime := int64(adjust * crit / (scavengePercent / 100.0))
-
-		// Go to sleep.
-		slept := scavengeSleep(sleepTime)
-
-		// Compute the new ratio.
-		fraction := crit / (crit + float64(slept))
-
-		// Set a lower bound on the fraction.
-		// Due to OS-related anomalies we may "sleep" for an inordinate amount
-		// of time. Let's avoid letting the ratio get out of hand by bounding
-		// the sleep time we use in our EWMA.
-		const minFraction = 1.0 / 1000.0
-		if fraction < minFraction {
-			fraction = minFraction
-		}
+		// Calculate the CPU time spent.
+		//
+		// This may be slightly inaccurate with respect to GOMAXPROCS, but we're
+		// recomputing this often enough relative to GOMAXPROCS changes in general
+		// (it only changes when the world is stopped, and not during a GC) that
+		// that small inaccuracy is in the noise.
+		cpuFraction := float64(crit) / ((float64(slept) + crit) * float64(gomaxprocs))
 
-		// Update scavengeEWMA by merging in the new crit/slept ratio.
-		const alpha = 0.5
-		scavengeEWMA = alpha*fraction + (1-alpha)*scavengeEWMA
+		// Update the critSleepRatio, adjusting until we reach our ideal fraction.
+		critSleepRatio = critSleepController.next(cpuFraction, idealFraction, float64(slept)+crit)
 	}
 }
 
@@ -391,16 +407,7 @@ func bgscavenge() {
 // back to the top of the heap.
 //
 // Returns the amount of memory scavenged in bytes.
-//
-// p.mheapLock must be held, but may be temporarily released if
-// mayUnlock == true.
-//
-// Must run on the system stack because p.mheapLock must be held.
-//
-//go:systemstack
-func (p *pageAlloc) scavenge(nbytes uintptr, mayUnlock bool) uintptr {
-	assertLockHeld(p.mheapLock)
-
+func (p *pageAlloc) scavenge(nbytes uintptr) uintptr {
 	var (
 		addrs addrRange
 		gen   uint32
@@ -412,9 +419,11 @@ func (p *pageAlloc) scavenge(nbytes uintptr, mayUnlock bool) uintptr {
 				break
 			}
 		}
-		r, a := p.scavengeOne(addrs, nbytes-released, mayUnlock)
-		released += r
-		addrs = a
+		systemstack(func() {
+			r, a := p.scavengeOne(addrs, nbytes-released)
+			released += r
+			addrs = a
+		})
 	}
 	// Only unreserve the space which hasn't been scavenged or searched
 	// to ensure we always make progress.
@@ -452,8 +461,9 @@ func printScavTrace(gen uint32, released uintptr, forced bool) {
 func (p *pageAlloc) scavengeStartGen() {
 	assertLockHeld(p.mheapLock)
 
+	lock(&p.scav.lock)
 	if debug.scavtrace > 0 {
-		printScavTrace(p.scav.gen, p.scav.released, false)
+		printScavTrace(p.scav.gen, atomic.Loaduintptr(&p.scav.released), false)
 	}
 	p.inUse.cloneInto(&p.scav.inUse)
 
@@ -483,9 +493,10 @@ func (p *pageAlloc) scavengeStartGen() {
 	// arena in size, so virtually every heap has the scavenger on.
 	p.scav.reservationBytes = alignUp(p.inUse.totalBytes, pallocChunkBytes) / scavengeReservationShards
 	p.scav.gen++
-	p.scav.released = 0
+	atomic.Storeuintptr(&p.scav.released, 0)
 	p.scav.freeHWM = minOffAddr
 	p.scav.scavLWM = maxOffAddr
+	unlock(&p.scav.lock)
 }
 
 // scavengeReserve reserves a contiguous range of the address space
@@ -494,14 +505,9 @@ func (p *pageAlloc) scavengeStartGen() {
 // first.
 //
 // Returns the reserved range and the scavenge generation number for it.
-//
-// p.mheapLock must be held.
-//
-// Must run on the system stack because p.mheapLock must be held.
-//
-//go:systemstack
 func (p *pageAlloc) scavengeReserve() (addrRange, uint32) {
-	assertLockHeld(p.mheapLock)
+	lock(&p.scav.lock)
+	gen := p.scav.gen
 
 	// Start by reserving the minimum.
 	r := p.scav.inUse.removeLast(p.scav.reservationBytes)
@@ -509,7 +515,8 @@ func (p *pageAlloc) scavengeReserve() (addrRange, uint32) {
 	// Return early if the size is zero; we don't want to use
 	// the bogus address below.
 	if r.size() == 0 {
-		return r, p.scav.gen
+		unlock(&p.scav.lock)
+		return r, gen
 	}
 
 	// The scavenger requires that base be aligned to a
@@ -520,28 +527,26 @@ func (p *pageAlloc) scavengeReserve() (addrRange, uint32) {
 
 	// Remove from inUse however much extra we just pulled out.
 	p.scav.inUse.removeGreaterEqual(newBase)
+	unlock(&p.scav.lock)
+
 	r.base = offAddr{newBase}
-	return r, p.scav.gen
+	return r, gen
 }
 
 // scavengeUnreserve returns an unscavenged portion of a range that was
 // previously reserved with scavengeReserve.
-//
-// p.mheapLock must be held.
-//
-// Must run on the system stack because p.mheapLock must be held.
-//
-//go:systemstack
 func (p *pageAlloc) scavengeUnreserve(r addrRange, gen uint32) {
-	assertLockHeld(p.mheapLock)
-
-	if r.size() == 0 || gen != p.scav.gen {
+	if r.size() == 0 {
 		return
 	}
 	if r.base.addr()%pallocChunkBytes != 0 {
 		throw("unreserving unaligned region")
 	}
-	p.scav.inUse.add(r)
+	lock(&p.scav.lock)
+	if gen == p.scav.gen {
+		p.scav.inUse.add(r)
+	}
+	unlock(&p.scav.lock)
 }
 
 // scavengeOne walks over address range work until it finds
@@ -555,15 +560,10 @@ func (p *pageAlloc) scavengeUnreserve(r addrRange, gen uint32) {
 //
 // work's base address must be aligned to pallocChunkBytes.
 //
-// p.mheapLock must be held, but may be temporarily released if
-// mayUnlock == true.
-//
-// Must run on the system stack because p.mheapLock must be held.
+// Must run on the systemstack because it acquires p.mheapLock.
 //
 //go:systemstack
-func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (uintptr, addrRange) {
-	assertLockHeld(p.mheapLock)
-
+func (p *pageAlloc) scavengeOne(work addrRange, max uintptr) (uintptr, addrRange) {
 	// Defensively check if we've received an empty address range.
 	// If so, just return.
 	if work.size() == 0 {
@@ -595,40 +595,12 @@ func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui
 		minPages = 1
 	}
 
-	// Helpers for locking and unlocking only if mayUnlock == true.
-	lockHeap := func() {
-		if mayUnlock {
-			lock(p.mheapLock)
-		}
-	}
-	unlockHeap := func() {
-		if mayUnlock {
-			unlock(p.mheapLock)
-		}
-	}
-
-	// Fast path: check the chunk containing the top-most address in work,
-	// starting at that address's page index in the chunk.
-	//
-	// Note that work.end() is exclusive, so get the chunk we care about
-	// by subtracting 1.
-	maxAddr := work.limit.addr() - 1
-	maxChunk := chunkIndex(maxAddr)
-	if p.summary[len(p.summary)-1][maxChunk].max() >= uint(minPages) {
-		// We only bother looking for a candidate if there at least
-		// minPages free pages at all.
-		base, npages := p.chunkOf(maxChunk).findScavengeCandidate(chunkPageIndex(maxAddr), minPages, maxPages)
-
-		// If we found something, scavenge it and return!
-		if npages != 0 {
-			work.limit = offAddr{p.scavengeRangeLocked(maxChunk, base, npages)}
-
-			assertLockHeld(p.mheapLock) // Must be locked on return.
-			return uintptr(npages) * pageSize, work
-		}
+	// Fast path: check the chunk containing the top-most address in work.
+	if r, w := p.scavengeOneFast(work, minPages, maxPages); r != 0 {
+		return r, w
+	} else {
+		work = w
 	}
-	// Update the limit to reflect the fact that we checked maxChunk already.
-	work.limit = offAddr{chunkBase(maxChunk)}
 
 	// findCandidate finds the next scavenge candidate in work optimistically.
 	//
@@ -667,37 +639,61 @@ func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui
 	// looking for any free and unscavenged page. If we think we see something,
 	// lock and verify it!
 	for work.size() != 0 {
-		unlockHeap()
 
 		// Search for the candidate.
 		candidateChunkIdx, ok := findCandidate(work)
-
-		// Lock the heap. We need to do this now if we found a candidate or not.
-		// If we did, we'll verify it. If not, we need to lock before returning
-		// anyway.
-		lockHeap()
-
 		if !ok {
 			// We didn't find a candidate, so we're done.
 			work.limit = work.base
 			break
 		}
 
+		// Lock, so we can verify what we found.
+		lock(p.mheapLock)
+
 		// Find, verify, and scavenge if we can.
 		chunk := p.chunkOf(candidateChunkIdx)
 		base, npages := chunk.findScavengeCandidate(pallocChunkPages-1, minPages, maxPages)
 		if npages > 0 {
 			work.limit = offAddr{p.scavengeRangeLocked(candidateChunkIdx, base, npages)}
-
-			assertLockHeld(p.mheapLock) // Must be locked on return.
+			unlock(p.mheapLock)
 			return uintptr(npages) * pageSize, work
 		}
+		unlock(p.mheapLock)
 
 		// We were fooled, so let's continue from where we left off.
 		work.limit = offAddr{chunkBase(candidateChunkIdx)}
 	}
+	return 0, work
+}
 
-	assertLockHeld(p.mheapLock) // Must be locked on return.
+// scavengeOneFast is the fast path for scavengeOne, which just checks the top
+// chunk of work for some pages to scavenge.
+//
+// Must run on the system stack because it acquires the heap lock.
+//
+//go:systemstack
+func (p *pageAlloc) scavengeOneFast(work addrRange, minPages, maxPages uintptr) (uintptr, addrRange) {
+	maxAddr := work.limit.addr() - 1
+	maxChunk := chunkIndex(maxAddr)
+
+	lock(p.mheapLock)
+	if p.summary[len(p.summary)-1][maxChunk].max() >= uint(minPages) {
+		// We only bother looking for a candidate if there at least
+		// minPages free pages at all.
+		base, npages := p.chunkOf(maxChunk).findScavengeCandidate(chunkPageIndex(maxAddr), minPages, maxPages)
+
+		// If we found something, scavenge it and return!
+		if npages != 0 {
+			work.limit = offAddr{p.scavengeRangeLocked(maxChunk, base, npages)}
+			unlock(p.mheapLock)
+			return uintptr(npages) * pageSize, work
+		}
+	}
+	unlock(p.mheapLock)
+
+	// Update the limit to reflect the fact that we checked maxChunk already.
+	work.limit = offAddr{chunkBase(maxChunk)}
 	return 0, work
 }
 
@@ -708,38 +704,57 @@ func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui
 //
 // Returns the base address of the scavenged region.
 //
-// p.mheapLock must be held.
+// p.mheapLock must be held. Unlocks p.mheapLock but reacquires
+// it before returning. Must be run on the systemstack as a result.
+//
+//go:systemstack
 func (p *pageAlloc) scavengeRangeLocked(ci chunkIdx, base, npages uint) uintptr {
 	assertLockHeld(p.mheapLock)
 
-	p.chunkOf(ci).scavenged.setRange(base, npages)
-
 	// Compute the full address for the start of the range.
 	addr := chunkBase(ci) + uintptr(base)*pageSize
 
+	// Mark the range we're about to scavenge as allocated, because
+	// we don't want any allocating goroutines to grab it while
+	// the scavenging is in progress.
+	if scav := p.allocRange(addr, uintptr(npages)); scav != 0 {
+		throw("double scavenge")
+	}
+
+	// With that done, it's safe to unlock.
+	unlock(p.mheapLock)
+
 	// Update the scavenge low watermark.
+	lock(&p.scav.lock)
 	if oAddr := (offAddr{addr}); oAddr.lessThan(p.scav.scavLWM) {
 		p.scav.scavLWM = oAddr
 	}
+	unlock(&p.scav.lock)
 
-	// Only perform the actual scavenging if we're not in a test.
-	// It's dangerous to do so otherwise.
-	if p.test {
-		return addr
-	}
-	sysUnused(unsafe.Pointer(addr), uintptr(npages)*pageSize)
+	if !p.test {
+		// Only perform the actual scavenging if we're not in a test.
+		// It's dangerous to do so otherwise.
+		sysUnused(unsafe.Pointer(addr), uintptr(npages)*pageSize)
 
-	// Update global accounting only when not in test, otherwise
-	// the runtime's accounting will be wrong.
-	nbytes := int64(npages) * pageSize
-	atomic.Xadd64(&memstats.heap_released, nbytes)
+		// Update global accounting only when not in test, otherwise
+		// the runtime's accounting will be wrong.
+		nbytes := int64(npages) * pageSize
+		atomic.Xadd64(&memstats.heap_released, nbytes)
 
-	// Update consistent accounting too.
-	stats := memstats.heapStats.acquire()
-	atomic.Xaddint64(&stats.committed, -nbytes)
-	atomic.Xaddint64(&stats.released, nbytes)
-	memstats.heapStats.release()
+		// Update consistent accounting too.
+		stats := memstats.heapStats.acquire()
+		atomic.Xaddint64(&stats.committed, -nbytes)
+		atomic.Xaddint64(&stats.released, nbytes)
+		memstats.heapStats.release()
+	}
+
+	// Relock the heap, because now we need to make these pages
+	// available allocation. Free them back to the page allocator.
+	lock(p.mheapLock)
+	p.free(addr, uintptr(npages), true)
 
+	// Mark the range as scavenged.
+	p.chunkOf(ci).scavenged.setRange(base, npages)
 	return addr
 }
 
diff --git a/libgo/go/runtime/mgcscavenge_test.go b/libgo/go/runtime/mgcscavenge_test.go
index 3b12a2e..0659293 100644
--- a/libgo/go/runtime/mgcscavenge_test.go
+++ b/libgo/go/runtime/mgcscavenge_test.go
@@ -6,6 +6,7 @@ package runtime_test
 
 import (
 	"fmt"
+	"internal/goos"
 	"math/rand"
 	. "runtime"
 	"testing"
@@ -408,7 +409,9 @@ func TestPageAllocScavenge(t *testing.T) {
 			},
 		},
 	}
-	if PageAlloc64Bit != 0 {
+	// Disable these tests on iOS since we have a small address space.
+	// See #46860.
+	if PageAlloc64Bit != 0 && goos.IsIos == 0 {
 		tests["ScavAllVeryDiscontiguous"] = setup{
 			beforeAlloc: map[ChunkIdx][]BitRange{
 				BaseChunkIdx:          {},
@@ -430,12 +433,12 @@ func TestPageAllocScavenge(t *testing.T) {
 	}
 	for name, v := range tests {
 		v := v
-		runTest := func(t *testing.T, mayUnlock bool) {
+		t.Run(name, func(t *testing.T) {
 			b := NewPageAlloc(v.beforeAlloc, v.beforeScav)
 			defer FreePageAlloc(b)
 
 			for iter, h := range v.expect {
-				if got := b.Scavenge(h.request, mayUnlock); got != h.expect {
+				if got := b.Scavenge(h.request); got != h.expect {
 					t.Fatalf("bad scavenge #%d: want %d, got %d", iter+1, h.expect, got)
 				}
 			}
@@ -443,12 +446,6 @@ func TestPageAllocScavenge(t *testing.T) {
 			defer FreePageAlloc(want)
 
 			checkPageAlloc(t, want, b)
-		}
-		t.Run(name, func(t *testing.T) {
-			runTest(t, false)
-		})
-		t.Run(name+"MayUnlock", func(t *testing.T) {
-			runTest(t, true)
 		})
 	}
 }
diff --git a/libgo/go/runtime/mgcsweep.go b/libgo/go/runtime/mgcsweep.go
index 746e7f0..482731e 100644
--- a/libgo/go/runtime/mgcsweep.go
+++ b/libgo/go/runtime/mgcsweep.go
@@ -41,6 +41,10 @@ type sweepdata struct {
 	nbgsweep    uint32
 	npausesweep uint32
 
+	// active tracks outstanding sweepers and the sweep
+	// termination condition.
+	active activeSweep
+
 	// centralIndex is the current unswept span class.
 	// It represents an index into the mcentral span
 	// sets. Accessed and updated via its load and
@@ -116,6 +120,108 @@ func (h *mheap) nextSpanForSweep() *mspan {
 	return nil
 }
 
+const sweepDrainedMask = 1 << 31
+
+// activeSweep is a type that captures whether sweeping
+// is done, and whether there are any outstanding sweepers.
+//
+// Every potential sweeper must call begin() before they look
+// for work, and end() after they've finished sweeping.
+type activeSweep struct {
+	// state is divided into two parts.
+	//
+	// The top bit (masked by sweepDrainedMask) is a boolean
+	// value indicating whether all the sweep work has been
+	// drained from the queue.
+	//
+	// The rest of the bits are a counter, indicating the
+	// number of outstanding concurrent sweepers.
+	state atomic.Uint32
+}
+
+// begin registers a new sweeper. Returns a sweepLocker
+// for acquiring spans for sweeping. Any outstanding sweeper blocks
+// sweep termination.
+//
+// If the sweepLocker is invalid, the caller can be sure that all
+// outstanding sweep work has been drained, so there is nothing left
+// to sweep. Note that there may be sweepers currently running, so
+// this does not indicate that all sweeping has completed.
+//
+// Even if the sweepLocker is invalid, its sweepGen is always valid.
+func (a *activeSweep) begin() sweepLocker {
+	for {
+		state := a.state.Load()
+		if state&sweepDrainedMask != 0 {
+			return sweepLocker{mheap_.sweepgen, false}
+		}
+		if a.state.CompareAndSwap(state, state+1) {
+			return sweepLocker{mheap_.sweepgen, true}
+		}
+	}
+}
+
+// end deregisters a sweeper. Must be called once for each time
+// begin is called if the sweepLocker is valid.
+func (a *activeSweep) end(sl sweepLocker) {
+	if sl.sweepGen != mheap_.sweepgen {
+		throw("sweeper left outstanding across sweep generations")
+	}
+	for {
+		state := a.state.Load()
+		if (state&^sweepDrainedMask)-1 >= sweepDrainedMask {
+			throw("mismatched begin/end of activeSweep")
+		}
+		if a.state.CompareAndSwap(state, state-1) {
+			if state != sweepDrainedMask {
+				return
+			}
+			if debug.gcpacertrace > 0 {
+				print("pacer: sweep done at heap size ", gcController.heapLive>>20, "MB; allocated ", (gcController.heapLive-mheap_.sweepHeapLiveBasis)>>20, "MB during sweep; swept ", mheap_.pagesSwept.Load(), " pages at ", mheap_.sweepPagesPerByte, " pages/byte\n")
+			}
+			return
+		}
+	}
+}
+
+// markDrained marks the active sweep cycle as having drained
+// all remaining work. This is safe to be called concurrently
+// with all other methods of activeSweep, though may race.
+//
+// Returns true if this call was the one that actually performed
+// the mark.
+func (a *activeSweep) markDrained() bool {
+	for {
+		state := a.state.Load()
+		if state&sweepDrainedMask != 0 {
+			return false
+		}
+		if a.state.CompareAndSwap(state, state|sweepDrainedMask) {
+			return true
+		}
+	}
+}
+
+// sweepers returns the current number of active sweepers.
+func (a *activeSweep) sweepers() uint32 {
+	return a.state.Load() &^ sweepDrainedMask
+}
+
+// isDone returns true if all sweep work has been drained and no more
+// outstanding sweepers exist. That is, when the sweep phase is
+// completely done.
+func (a *activeSweep) isDone() bool {
+	return a.state.Load() == sweepDrainedMask
+}
+
+// reset sets up the activeSweep for the next sweep cycle.
+//
+// The world must be stopped.
+func (a *activeSweep) reset() {
+	assertWorldStopped()
+	a.state.Store(0)
+}
+
 // finishsweep_m ensures that all spans are swept.
 //
 // The world must be stopped. This ensures there are no sweeps in
@@ -134,6 +240,15 @@ func finishsweep_m() {
 		sweep.npausesweep++
 	}
 
+	// Make sure there aren't any outstanding sweepers left.
+	// At this point, with the world stopped, it means one of two
+	// things. Either we were able to preempt a sweeper, or that
+	// a sweeper didn't call sweep.active.end when it should have.
+	// Both cases indicate a bug, so throw.
+	if sweep.active.sweepers() != 0 {
+		throw("active sweepers found at start of mark phase")
+	}
+
 	// Reset all the unswept buffers, which should be empty.
 	// Do this in sweep termination as opposed to mark termination
 	// so that we can catch unswept spans and reclaim blocks as
@@ -153,7 +268,7 @@ func finishsweep_m() {
 	nextMarkBitArenaEpoch()
 }
 
-func bgsweep() {
+func bgsweep(c chan int) {
 	setSystemGoroutine()
 
 	sweep.g = getg()
@@ -161,7 +276,7 @@ func bgsweep() {
 	lockInit(&sweep.lock, lockRankSweep)
 	lock(&sweep.lock)
 	sweep.parked = true
-	gcenable_setup <- 1
+	c <- 1
 	goparkunlock(&sweep.lock, waitReasonGCSweepWait, traceEvGoBlock, 1)
 
 	for {
@@ -189,15 +304,11 @@ func bgsweep() {
 	}
 }
 
-// sweepLocker acquires sweep ownership of spans and blocks sweep
-// completion.
+// sweepLocker acquires sweep ownership of spans.
 type sweepLocker struct {
 	// sweepGen is the sweep generation of the heap.
 	sweepGen uint32
-	// blocking indicates that this tracker is blocking sweep
-	// completion, usually as a result of acquiring sweep
-	// ownership of at least one span.
-	blocking bool
+	valid    bool
 }
 
 // sweepLocked represents sweep ownership of a span.
@@ -205,22 +316,16 @@ type sweepLocked struct {
 	*mspan
 }
 
-func newSweepLocker() sweepLocker {
-	return sweepLocker{
-		sweepGen: mheap_.sweepgen,
-	}
-}
-
 // tryAcquire attempts to acquire sweep ownership of span s. If it
 // successfully acquires ownership, it blocks sweep completion.
 func (l *sweepLocker) tryAcquire(s *mspan) (sweepLocked, bool) {
+	if !l.valid {
+		throw("use of invalid sweepLocker")
+	}
 	// Check before attempting to CAS.
 	if atomic.Load(&s.sweepgen) != l.sweepGen-2 {
 		return sweepLocked{}, false
 	}
-	// Add ourselves to sweepers before potentially taking
-	// ownership.
-	l.blockCompletion()
 	// Attempt to acquire sweep ownership of s.
 	if !atomic.Cas(&s.sweepgen, l.sweepGen-2, l.sweepGen-1) {
 		return sweepLocked{}, false
@@ -228,48 +333,22 @@ func (l *sweepLocker) tryAcquire(s *mspan) (sweepLocked, bool) {
 	return sweepLocked{s}, true
 }
 
-// blockCompletion blocks sweep completion without acquiring any
-// specific spans.
-func (l *sweepLocker) blockCompletion() {
-	if !l.blocking {
-		atomic.Xadd(&mheap_.sweepers, +1)
-		l.blocking = true
-	}
-}
-
-func (l *sweepLocker) dispose() {
-	if !l.blocking {
-		return
-	}
-	// Decrement the number of active sweepers and if this is the
-	// last one, mark sweep as complete.
-	l.blocking = false
-	if atomic.Xadd(&mheap_.sweepers, -1) == 0 && atomic.Load(&mheap_.sweepDrained) != 0 {
-		l.sweepIsDone()
-	}
-}
-
-func (l *sweepLocker) sweepIsDone() {
-	if debug.gcpacertrace > 0 {
-		print("pacer: sweep done at heap size ", gcController.heapLive>>20, "MB; allocated ", (gcController.heapLive-mheap_.sweepHeapLiveBasis)>>20, "MB during sweep; swept ", mheap_.pagesSwept, " pages at ", mheap_.sweepPagesPerByte, " pages/byte\n")
-	}
-}
-
 // sweepone sweeps some unswept heap span and returns the number of pages returned
 // to the heap, or ^uintptr(0) if there was nothing to sweep.
 func sweepone() uintptr {
-	_g_ := getg()
+	gp := getg()
 
-	// increment locks to ensure that the goroutine is not preempted
+	// Increment locks to ensure that the goroutine is not preempted
 	// in the middle of sweep thus leaving the span in an inconsistent state for next GC
-	_g_.m.locks++
-	if atomic.Load(&mheap_.sweepDrained) != 0 {
-		_g_.m.locks--
-		return ^uintptr(0)
-	}
+	gp.m.locks++
+
 	// TODO(austin): sweepone is almost always called in a loop;
 	// lift the sweepLocker into its callers.
-	sl := newSweepLocker()
+	sl := sweep.active.begin()
+	if !sl.valid {
+		gp.m.locks--
+		return ^uintptr(0)
+	}
 
 	// Find a span to sweep.
 	npages := ^uintptr(0)
@@ -277,7 +356,7 @@ func sweepone() uintptr {
 	for {
 		s := mheap_.nextSpanForSweep()
 		if s == nil {
-			noMoreWork = atomic.Cas(&mheap_.sweepDrained, 0, 1)
+			noMoreWork = sweep.active.markDrained()
 			break
 		}
 		if state := s.state.get(); state != mSpanInUse {
@@ -297,7 +376,7 @@ func sweepone() uintptr {
 				// Whole span was freed. Count it toward the
 				// page reclaimer credit since these pages can
 				// now be used for span allocation.
-				atomic.Xadduintptr(&mheap_.reclaimCredit, npages)
+				mheap_.reclaimCredit.Add(npages)
 			} else {
 				// Span is still in-use, so this returned no
 				// pages to the heap and the span needs to
@@ -307,8 +386,7 @@ func sweepone() uintptr {
 			break
 		}
 	}
-
-	sl.dispose()
+	sweep.active.end(sl)
 
 	if noMoreWork {
 		// The sweep list is empty. There may still be
@@ -321,7 +399,7 @@ func sweepone() uintptr {
 		// The scavenger is signaled by the last sweeper because once
 		// sweeping is done, we will definitely have useful work for
 		// the scavenger to do, since the scavenger only runs over the
-		// heap once per GC cyle. This update is not done during sweep
+		// heap once per GC cycle. This update is not done during sweep
 		// termination because in some cases there may be a long delay
 		// between sweep done and sweep termination (e.g. not enough
 		// allocations to trigger a GC) which would be nice to fill in
@@ -337,7 +415,7 @@ func sweepone() uintptr {
 		readyForScavenger()
 	}
 
-	_g_.m.locks--
+	gp.m.locks--
 	return npages
 }
 
@@ -348,10 +426,7 @@ func sweepone() uintptr {
 // GC runs; to prevent that the caller must be non-preemptible or must
 // somehow block GC progress.
 func isSweepDone() bool {
-	// Check that all spans have at least begun sweeping and there
-	// are no active sweepers. If both are true, then all spans
-	// have finished sweeping.
-	return atomic.Load(&mheap_.sweepDrained) != 0 && atomic.Load(&mheap_.sweepers) == 0
+	return sweep.active.isDone()
 }
 
 // Returns only when span s has been swept.
@@ -365,16 +440,23 @@ func (s *mspan) ensureSwept() {
 		throw("mspan.ensureSwept: m is not locked")
 	}
 
-	sl := newSweepLocker()
-	// The caller must be sure that the span is a mSpanInUse span.
-	if s, ok := sl.tryAcquire(s); ok {
-		s.sweep(false)
-		sl.dispose()
-		return
+	// If this operation fails, then that means that there are
+	// no more spans to be swept. In this case, either s has already
+	// been swept, or is about to be acquired for sweeping and swept.
+	sl := sweep.active.begin()
+	if sl.valid {
+		// The caller must be sure that the span is a mSpanInUse span.
+		if s, ok := sl.tryAcquire(s); ok {
+			s.sweep(false)
+			sweep.active.end(sl)
+			return
+		}
+		sweep.active.end(sl)
 	}
-	sl.dispose()
 
-	// unfortunate condition, and we don't have efficient means to wait
+	// Unfortunately we can't sweep the span ourselves. Somebody else
+	// got to it first. We don't have efficient means to wait, but that's
+	// OK, it will be swept fairly soon.
 	for {
 		spangen := atomic.Load(&s.sweepgen)
 		if spangen == sl.sweepGen || spangen == sl.sweepGen+3 {
@@ -414,7 +496,7 @@ func (sl *sweepLocked) sweep(preserve bool) bool {
 		traceGCSweepSpan(s.npages * _PageSize)
 	}
 
-	atomic.Xadd64(&mheap_.pagesSwept, int64(s.npages))
+	mheap_.pagesSwept.Add(int64(s.npages))
 
 	spc := s.spanclass
 	size := s.elemsize
@@ -487,7 +569,7 @@ func (sl *sweepLocked) sweep(preserve bool) bool {
 		spanHasNoSpecials(s)
 	}
 
-	if debug.allocfreetrace != 0 || debug.clobberfree != 0 || raceenabled || msanenabled {
+	if debug.allocfreetrace != 0 || debug.clobberfree != 0 || raceenabled || msanenabled || asanenabled {
 		// Find all newly freed objects. This doesn't have to
 		// efficient; allocfreetrace has massive overhead.
 		mbits := s.markBitsForBase()
@@ -507,6 +589,9 @@ func (sl *sweepLocked) sweep(preserve bool) bool {
 				if msanenabled {
 					msanfree(unsafe.Pointer(x), size)
 				}
+				if asanenabled {
+					asanpoison(unsafe.Pointer(x), size)
+				}
 			}
 			mbits.advance()
 			abits.advance()
@@ -725,17 +810,17 @@ func deductSweepCredit(spanBytes uintptr, callerSweepPages uintptr) {
 	}
 
 retry:
-	sweptBasis := atomic.Load64(&mheap_.pagesSweptBasis)
+	sweptBasis := mheap_.pagesSweptBasis.Load()
 
 	// Fix debt if necessary.
 	newHeapLive := uintptr(atomic.Load64(&gcController.heapLive)-mheap_.sweepHeapLiveBasis) + spanBytes
 	pagesTarget := int64(mheap_.sweepPagesPerByte*float64(newHeapLive)) - int64(callerSweepPages)
-	for pagesTarget > int64(atomic.Load64(&mheap_.pagesSwept)-sweptBasis) {
+	for pagesTarget > int64(mheap_.pagesSwept.Load()-sweptBasis) {
 		if sweepone() == ^uintptr(0) {
 			mheap_.sweepPagesPerByte = 0
 			break
 		}
-		if atomic.Load64(&mheap_.pagesSweptBasis) != sweptBasis {
+		if mheap_.pagesSweptBasis.Load() != sweptBasis {
 			// Sweep pacing changed. Recompute debt.
 			goto retry
 		}
@@ -754,3 +839,46 @@ func clobberfree(x unsafe.Pointer, size uintptr) {
 		*(*uint32)(add(x, i)) = 0xdeadbeef
 	}
 }
+
+// gcPaceSweeper updates the sweeper's pacing parameters.
+//
+// Must be called whenever the GC's pacing is updated.
+//
+// The world must be stopped, or mheap_.lock must be held.
+func gcPaceSweeper(trigger uint64) {
+	assertWorldStoppedOrLockHeld(&mheap_.lock)
+
+	// Update sweep pacing.
+	if isSweepDone() {
+		mheap_.sweepPagesPerByte = 0
+	} else {
+		// Concurrent sweep needs to sweep all of the in-use
+		// pages by the time the allocated heap reaches the GC
+		// trigger. Compute the ratio of in-use pages to sweep
+		// per byte allocated, accounting for the fact that
+		// some might already be swept.
+		heapLiveBasis := atomic.Load64(&gcController.heapLive)
+		heapDistance := int64(trigger) - int64(heapLiveBasis)
+		// Add a little margin so rounding errors and
+		// concurrent sweep are less likely to leave pages
+		// unswept when GC starts.
+		heapDistance -= 1024 * 1024
+		if heapDistance < _PageSize {
+			// Avoid setting the sweep ratio extremely high
+			heapDistance = _PageSize
+		}
+		pagesSwept := mheap_.pagesSwept.Load()
+		pagesInUse := mheap_.pagesInUse.Load()
+		sweepDistancePages := int64(pagesInUse) - int64(pagesSwept)
+		if sweepDistancePages <= 0 {
+			mheap_.sweepPagesPerByte = 0
+		} else {
+			mheap_.sweepPagesPerByte = float64(sweepDistancePages) / float64(heapDistance)
+			mheap_.sweepHeapLiveBasis = heapLiveBasis
+			// Write pagesSweptBasis last, since this
+			// signals concurrent sweeps to recompute
+			// their debt.
+			mheap_.pagesSweptBasis.Store(pagesSwept)
+		}
+	}
+}
diff --git a/libgo/go/runtime/mgcwork.go b/libgo/go/runtime/mgcwork.go
index 667c7af..9c3f7fd 100644
--- a/libgo/go/runtime/mgcwork.go
+++ b/libgo/go/runtime/mgcwork.go
@@ -5,8 +5,8 @@
 package runtime
 
 import (
+	"internal/goarch"
 	"runtime/internal/atomic"
-	"runtime/internal/sys"
 	"unsafe"
 )
 
@@ -77,9 +77,10 @@ type gcWork struct {
 	// into work.bytesMarked by dispose.
 	bytesMarked uint64
 
-	// Scan work performed on this gcWork. This is aggregated into
+	// Heap scan work performed on this gcWork. This is aggregated into
 	// gcController by dispose and may also be flushed by callers.
-	scanWork int64
+	// Other types of scan work are flushed immediately.
+	heapScanWork int64
 
 	// flushedWork indicates that a non-empty work buffer was
 	// flushed to the global work list since the last gcMarkDone
@@ -274,9 +275,9 @@ func (w *gcWork) dispose() {
 		atomic.Xadd64(&work.bytesMarked, int64(w.bytesMarked))
 		w.bytesMarked = 0
 	}
-	if w.scanWork != 0 {
-		atomic.Xaddint64(&gcController.scanWork, w.scanWork)
-		w.scanWork = 0
+	if w.heapScanWork != 0 {
+		gcController.heapScanWork.Add(w.heapScanWork)
+		w.heapScanWork = 0
 	}
 }
 
@@ -322,7 +323,7 @@ type workbufhdr struct {
 type workbuf struct {
 	workbufhdr
 	// account for the above fields
-	obj [(_WorkbufSize - unsafe.Sizeof(workbufhdr{})) / sys.PtrSize]uintptr
+	obj [(_WorkbufSize - unsafe.Sizeof(workbufhdr{})) / goarch.PtrSize]uintptr
 }
 
 // workbuf factory routines. These funcs are used to manage the
diff --git a/libgo/go/runtime/mheap.go b/libgo/go/runtime/mheap.go
index 8e346c8..8b4b685 100644
--- a/libgo/go/runtime/mheap.go
+++ b/libgo/go/runtime/mheap.go
@@ -10,8 +10,8 @@ package runtime
 
 import (
 	"internal/cpu"
+	"internal/goarch"
 	"runtime/internal/atomic"
-	"runtime/internal/sys"
 	"unsafe"
 )
 
@@ -65,9 +65,7 @@ type mheap struct {
 	lock  mutex
 	pages pageAlloc // page allocation data structure
 
-	sweepgen     uint32 // sweep generation, see comment in mspan; written during STW
-	sweepDrained uint32 // all spans are swept or are being swept
-	sweepers     uint32 // number of active sweepone calls
+	sweepgen uint32 // sweep generation, see comment in mspan; written during STW
 
 	// allspans is a slice of all mspans ever created. Each mspan
 	// appears exactly once.
@@ -82,7 +80,7 @@ type mheap struct {
 	// access (since that may free the backing store).
 	allspans []*mspan // all spans out there
 
-	_ uint32 // align uint64 fields on 32-bit for atomics
+	// _ uint32 // align uint64 fields on 32-bit for atomics
 
 	// Proportional sweep
 	//
@@ -96,23 +94,25 @@ type mheap struct {
 	// any given time, the system is at (gcController.heapLive,
 	// pagesSwept) in this space.
 	//
-	// It's important that the line pass through a point we
-	// control rather than simply starting at a (0,0) origin
+	// It is important that the line pass through a point we
+	// control rather than simply starting at a 0,0 origin
 	// because that lets us adjust sweep pacing at any time while
 	// accounting for current progress. If we could only adjust
 	// the slope, it would create a discontinuity in debt if any
 	// progress has already been made.
-	pagesInUse         uint64  // pages of spans in stats mSpanInUse; updated atomically
-	pagesSwept         uint64  // pages swept this cycle; updated atomically
-	pagesSweptBasis    uint64  // pagesSwept to use as the origin of the sweep ratio; updated atomically
-	sweepHeapLiveBasis uint64  // value of gcController.heapLive to use as the origin of sweep ratio; written with lock, read without
-	sweepPagesPerByte  float64 // proportional sweep ratio; written with lock, read without
+	pagesInUse         atomic.Uint64 // pages of spans in stats mSpanInUse
+	pagesSwept         atomic.Uint64 // pages swept this cycle
+	pagesSweptBasis    atomic.Uint64 // pagesSwept to use as the origin of the sweep ratio
+	sweepHeapLiveBasis uint64        // value of gcController.heapLive to use as the origin of sweep ratio; written with lock, read without
+	sweepPagesPerByte  float64       // proportional sweep ratio; written with lock, read without
 	// TODO(austin): pagesInUse should be a uintptr, but the 386
 	// compiler can't 8-byte align fields.
 
 	// scavengeGoal is the amount of total retained heap memory (measured by
 	// heapRetained) that the runtime will try to maintain by returning memory
 	// to the OS.
+	//
+	// Accessed atomically.
 	scavengeGoal uint64
 
 	// Page reclaimer state
@@ -123,16 +123,13 @@ type mheap struct {
 	//
 	// If this is >= 1<<63, the page reclaimer is done scanning
 	// the page marks.
-	//
-	// This is accessed atomically.
-	reclaimIndex uint64
+	reclaimIndex atomic.Uint64
+
 	// reclaimCredit is spare credit for extra pages swept. Since
 	// the page reclaimer works in large chunks, it may reclaim
 	// more than requested. Any spare pages released go to this
 	// credit pool.
-	//
-	// This is accessed atomically.
-	reclaimCredit uintptr
+	reclaimCredit atomic.Uintptr
 
 	// arenas is the heap arena map. It points to the metadata for
 	// the heap for every arena frame of the entire usable virtual
@@ -497,13 +494,13 @@ func recordspan(vh unsafe.Pointer, p unsafe.Pointer) {
 	assertLockHeld(&h.lock)
 
 	if len(h.allspans) >= cap(h.allspans) {
-		n := 64 * 1024 / sys.PtrSize
+		n := 64 * 1024 / goarch.PtrSize
 		if n < cap(h.allspans)*3/2 {
 			n = cap(h.allspans) * 3 / 2
 		}
 		var new []*mspan
 		sp := (*notInHeapSlice)(unsafe.Pointer(&new))
-		sp.array = (*notInHeap)(sysAlloc(uintptr(n)*sys.PtrSize, &memstats.other_sys))
+		sp.array = (*notInHeap)(sysAlloc(uintptr(n)*goarch.PtrSize, &memstats.other_sys))
 		if sp.array == nil {
 			throw("runtime: cannot allocate memory")
 		}
@@ -739,7 +736,7 @@ func (h *mheap) reclaim(npage uintptr) {
 	// batching heap frees.
 
 	// Bail early if there's no more reclaim work.
-	if atomic.Load64(&h.reclaimIndex) >= 1<<63 {
+	if h.reclaimIndex.Load() >= 1<<63 {
 		return
 	}
 
@@ -756,23 +753,23 @@ func (h *mheap) reclaim(npage uintptr) {
 	locked := false
 	for npage > 0 {
 		// Pull from accumulated credit first.
-		if credit := atomic.Loaduintptr(&h.reclaimCredit); credit > 0 {
+		if credit := h.reclaimCredit.Load(); credit > 0 {
 			take := credit
 			if take > npage {
 				// Take only what we need.
 				take = npage
 			}
-			if atomic.Casuintptr(&h.reclaimCredit, credit, credit-take) {
+			if h.reclaimCredit.CompareAndSwap(credit, credit-take) {
 				npage -= take
 			}
 			continue
 		}
 
 		// Claim a chunk of work.
-		idx := uintptr(atomic.Xadd64(&h.reclaimIndex, pagesPerReclaimerChunk) - pagesPerReclaimerChunk)
+		idx := uintptr(h.reclaimIndex.Add(pagesPerReclaimerChunk) - pagesPerReclaimerChunk)
 		if idx/pagesPerArena >= uintptr(len(arenas)) {
 			// Page reclaiming is done.
-			atomic.Store64(&h.reclaimIndex, 1<<63)
+			h.reclaimIndex.Store(1 << 63)
 			break
 		}
 
@@ -788,7 +785,7 @@ func (h *mheap) reclaim(npage uintptr) {
 			npage -= nfound
 		} else {
 			// Put spare pages toward global credit.
-			atomic.Xadduintptr(&h.reclaimCredit, nfound-npage)
+			h.reclaimCredit.Add(nfound - npage)
 			npage = 0
 		}
 	}
@@ -818,7 +815,10 @@ func (h *mheap) reclaimChunk(arenas []arenaIdx, pageIdx, n uintptr) uintptr {
 
 	n0 := n
 	var nFreed uintptr
-	sl := newSweepLocker()
+	sl := sweep.active.begin()
+	if !sl.valid {
+		return 0
+	}
 	for n > 0 {
 		ai := arenas[pageIdx/pagesPerArena]
 		ha := h.arenas[ai.l1()][ai.l2()]
@@ -864,7 +864,7 @@ func (h *mheap) reclaimChunk(arenas []arenaIdx, pageIdx, n uintptr) uintptr {
 		pageIdx += uintptr(len(inUse) * 8)
 		n -= uintptr(len(inUse) * 8)
 	}
-	sl.dispose()
+	sweep.active.end(sl)
 	if trace.enabled {
 		unlock(&h.lock)
 		// Account for pages scanned but not reclaimed.
@@ -896,10 +896,9 @@ func (s spanAllocType) manual() bool {
 //
 // spanclass indicates the span's size class and scannability.
 //
-// If needzero is true, the memory for the returned span will be zeroed.
-// The boolean returned indicates whether the returned span contains zeroes,
-// either because this was requested, or because it was already zeroed.
-func (h *mheap) alloc(npages uintptr, spanclass spanClass, needzero bool) (*mspan, bool) {
+// Returns a span that has been fully initialized. span.needzero indicates
+// whether the span has been zeroed. Note that it may not be.
+func (h *mheap) alloc(npages uintptr, spanclass spanClass) *mspan {
 	// Don't do any operations that lock the heap on the G stack.
 	// It might trigger stack growth, and the stack growth code needs
 	// to be able to allocate heap.
@@ -912,17 +911,7 @@ func (h *mheap) alloc(npages uintptr, spanclass spanClass, needzero bool) (*mspa
 		}
 		s = h.allocSpan(npages, spanAllocHeap, spanclass)
 	})
-
-	if s == nil {
-		return nil, false
-	}
-	isZeroed := s.needzero == 0
-	if needzero && !isZeroed {
-		memclrNoHeapPointers(unsafe.Pointer(s.base()), s.npages<<_PageShift)
-		isZeroed = true
-	}
-	s.needzero = 0
-	return s, isZeroed
+	return s
 }
 
 // allocManual allocates a manually-managed span of npage pages.
@@ -1011,7 +1000,7 @@ func (h *mheap) allocNeedsZero(base, npage uintptr) (needZero bool) {
 				break
 			}
 			zeroedBase = atomic.Loaduintptr(&ha.zeroedBase)
-			// Sanity check zeroedBase.
+			// Double check basic conditions of zeroedBase.
 			if zeroedBase <= arenaLimit && zeroedBase > arenaBase {
 				// The zeroedBase moved into the space we were trying to
 				// claim. That's very bad, and indicates someone allocated
@@ -1131,6 +1120,7 @@ func (h *mheap) allocSpan(npages uintptr, typ spanAllocType, spanclass spanClass
 	// Function-global state.
 	gp := getg()
 	base, scav := uintptr(0), uintptr(0)
+	growth := uintptr(0)
 
 	// On some platforms we need to provide physical page aligned stack
 	// allocations. Where the page size is less than the physical page
@@ -1176,7 +1166,9 @@ func (h *mheap) allocSpan(npages uintptr, typ spanAllocType, spanclass spanClass
 		// Try to acquire a base address.
 		base, scav = h.pages.alloc(npages)
 		if base == 0 {
-			if !h.grow(npages) {
+			var ok bool
+			growth, ok = h.grow(npages)
+			if !ok {
 				unlock(&h.lock)
 				return nil
 			}
@@ -1200,16 +1192,35 @@ func (h *mheap) allocSpan(npages uintptr, typ spanAllocType, spanclass spanClass
 		// Return memory around the aligned allocation.
 		spaceBefore := base - allocBase
 		if spaceBefore > 0 {
-			h.pages.free(allocBase, spaceBefore/pageSize)
+			h.pages.free(allocBase, spaceBefore/pageSize, false)
 		}
 		spaceAfter := (allocPages-npages)*pageSize - spaceBefore
 		if spaceAfter > 0 {
-			h.pages.free(base+npages*pageSize, spaceAfter/pageSize)
+			h.pages.free(base+npages*pageSize, spaceAfter/pageSize, false)
 		}
 	}
 
 	unlock(&h.lock)
 
+	if growth > 0 {
+		// We just caused a heap growth, so scavenge down what will soon be used.
+		// By scavenging inline we deal with the failure to allocate out of
+		// memory fragments by scavenging the memory fragments that are least
+		// likely to be re-used.
+		scavengeGoal := atomic.Load64(&h.scavengeGoal)
+		if retained := heapRetained(); retained+uint64(growth) > scavengeGoal {
+			// The scavenging algorithm requires the heap lock to be dropped so it
+			// can acquire it only sparingly. This is a potentially expensive operation
+			// so it frees up other goroutines to allocate in the meanwhile. In fact,
+			// they can make use of the growth we just created.
+			todo := growth
+			if overage := uintptr(retained + uint64(growth) - scavengeGoal); todo > overage {
+				todo = overage
+			}
+			h.pages.scavenge(todo)
+		}
+	}
+
 HaveSpan:
 	// At this point, both s != nil and base != 0, and the heap
 	// lock is no longer held. Initialize the span.
@@ -1311,7 +1322,7 @@ HaveSpan:
 		atomic.Or8(&arena.pageInUse[pageIdx], pageMask)
 
 		// Update related page sweeper stats.
-		atomic.Xadd64(&h.pagesInUse, int64(npages))
+		h.pagesInUse.Add(int64(npages))
 	}
 
 	// Make sure the newly allocated span will be observed
@@ -1322,10 +1333,10 @@ HaveSpan:
 }
 
 // Try to add at least npage pages of memory to the heap,
-// returning whether it worked.
+// returning how much the heap grew by and whether it worked.
 //
 // h.lock must be held.
-func (h *mheap) grow(npage uintptr) bool {
+func (h *mheap) grow(npage uintptr) (uintptr, bool) {
 	assertLockHeld(&h.lock)
 
 	// We must grow the heap in whole palloc chunks.
@@ -1347,7 +1358,7 @@ func (h *mheap) grow(npage uintptr) bool {
 		av, asize := h.sysAlloc(ask)
 		if av == nil {
 			print("runtime: out of memory: cannot allocate ", ask, "-byte block (", memstats.heap_sys, " in use)\n")
-			return false
+			return 0, false
 		}
 
 		if uintptr(av) == h.curArena.end {
@@ -1407,19 +1418,7 @@ func (h *mheap) grow(npage uintptr) bool {
 	// space ready for allocation.
 	h.pages.grow(v, nBase-v)
 	totalGrowth += nBase - v
-
-	// We just caused a heap growth, so scavenge down what will soon be used.
-	// By scavenging inline we deal with the failure to allocate out of
-	// memory fragments by scavenging the memory fragments that are least
-	// likely to be re-used.
-	if retained := heapRetained(); retained+uint64(totalGrowth) > h.scavengeGoal {
-		todo := totalGrowth
-		if overage := uintptr(retained + uint64(totalGrowth) - h.scavengeGoal); todo > overage {
-			todo = overage
-		}
-		h.pages.scavenge(todo, false)
-	}
-	return true
+	return totalGrowth, true
 }
 
 // Free the span back into the heap.
@@ -1432,6 +1431,12 @@ func (h *mheap) freeSpan(s *mspan) {
 			bytes := s.npages << _PageShift
 			msanfree(base, bytes)
 		}
+		if asanenabled {
+			// Tell asan that this entire span is no longer in use.
+			base := unsafe.Pointer(s.base())
+			bytes := s.npages << _PageShift
+			asanpoison(base, bytes)
+		}
 		h.freeSpanLocked(s, spanAllocHeap)
 		unlock(&h.lock)
 	})
@@ -1468,7 +1473,7 @@ func (h *mheap) freeSpanLocked(s *mspan, typ spanAllocType) {
 			print("mheap.freeSpanLocked - span ", s, " ptr ", hex(s.base()), " allocCount ", s.allocCount, " sweepgen ", s.sweepgen, "/", h.sweepgen, "\n")
 			throw("mheap.freeSpanLocked - invalid free")
 		}
-		atomic.Xadd64(&h.pagesInUse, -int64(s.npages))
+		h.pagesInUse.Add(-int64(s.npages))
 
 		// Clear in-use bit in arena page bitmap.
 		arena, pageIdx, pageMask := pageIndexOf(s.base())
@@ -1503,7 +1508,7 @@ func (h *mheap) freeSpanLocked(s *mspan, typ spanAllocType) {
 	memstats.heapStats.release()
 
 	// Mark the space as free.
-	h.pages.free(s.base(), s.npages)
+	h.pages.free(s.base(), s.npages, false)
 
 	// Free the span structure. We no longer have a use for it.
 	s.state.set(mSpanDead)
@@ -1519,13 +1524,19 @@ func (h *mheap) scavengeAll() {
 	// the mheap API.
 	gp := getg()
 	gp.m.mallocing++
+
 	lock(&h.lock)
 	// Start a new scavenge generation so we have a chance to walk
 	// over the whole heap.
 	h.pages.scavengeStartGen()
-	released := h.pages.scavenge(^uintptr(0), false)
-	gen := h.pages.scav.gen
 	unlock(&h.lock)
+
+	released := h.pages.scavenge(^uintptr(0))
+
+	lock(&h.pages.scav.lock)
+	gen := h.pages.scav.gen
+	unlock(&h.pages.scav.lock)
+
 	gp.m.mallocing--
 
 	if debug.scavtrace > 0 {
@@ -1820,7 +1831,7 @@ func addfinalizer(p unsafe.Pointer, f *funcval, ft *functype, ot *ptrtype) bool
 			scanobject(base, gcw)
 			// Mark the finalizer itself, since the
 			// special isn't part of the GC'd heap.
-			scanblock(uintptr(unsafe.Pointer(&s.fn)), sys.PtrSize, &oneptrmask[0], gcw)
+			scanblock(uintptr(unsafe.Pointer(&s.fn)), goarch.PtrSize, &oneptrmask[0], gcw)
 			releasem(mp)
 		}
 		return true
diff --git a/libgo/go/runtime/mkfastlog2table.go b/libgo/go/runtime/mkfastlog2table.go
index 8d78a39..a55f547 100644
--- a/libgo/go/runtime/mkfastlog2table.go
+++ b/libgo/go/runtime/mkfastlog2table.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build ignore
-// +build ignore
 
 // fastlog2Table contains log2 approximations for 5 binary digits.
 // This is used to implement fastlog2, which is used for heap sampling.
diff --git a/libgo/go/runtime/mkpreempt.go b/libgo/go/runtime/mkpreempt.go
index f82df93..47488c5 100644
--- a/libgo/go/runtime/mkpreempt.go
+++ b/libgo/go/runtime/mkpreempt.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build ignore
-// +build ignore
 
 // mkpreempt generates the asyncPreempt functions for each
 // architecture.
@@ -125,15 +124,13 @@ func header(arch string) {
 	if beLe[arch] {
 		base := arch[:len(arch)-1]
 		fmt.Fprintf(out, "//go:build %s || %sle\n", base, base)
-		fmt.Fprintf(out, "// +build %s %sle\n\n", base, base)
 	}
 	fmt.Fprintf(out, "#include \"go_asm.h\"\n")
 	fmt.Fprintf(out, "#include \"textflag.h\"\n\n")
-	fmt.Fprintf(out, "// Note: asyncPreempt doesn't use the internal ABI, but we must be able to inject calls to it from the signal handler, so Go code has to see the PC of this function literally.\n")
-	fmt.Fprintf(out, "TEXT ·asyncPreempt<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-0\n")
+	fmt.Fprintf(out, "TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0\n")
 }
 
-func p(f string, args ...interface{}) {
+func p(f string, args ...any) {
 	fmted := fmt.Sprintf(f, args...)
 	fmt.Fprintf(out, "\t%s\n", strings.ReplaceAll(fmted, "\n", "\n\t"))
 }
@@ -202,6 +199,8 @@ func gen386() {
 		l.add("MOVL", reg, 4)
 	}
 
+	softfloat := "GO386_softfloat"
+
 	// Save SSE state only if supported.
 	lSSE := layout{stack: l.stack, sp: "SP"}
 	for i := 0; i < 8; i++ {
@@ -211,13 +210,13 @@ func gen386() {
 	p("ADJSP $%d", lSSE.stack)
 	p("NOP SP")
 	l.save()
-	p("CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1\nJNE nosse")
+	p("#ifndef %s", softfloat)
 	lSSE.save()
-	label("nosse:")
+	p("#endif")
 	p("CALL ·asyncPreempt2(SB)")
-	p("CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1\nJNE nosse2")
+	p("#ifndef %s", softfloat)
 	lSSE.restore()
-	label("nosse2:")
+	p("#endif")
 	l.restore()
 	p("ADJSP $%d", -lSSE.stack)
 
@@ -511,12 +510,12 @@ func genRISCV() {
 }
 
 func genRISCV64() {
-	// X0 (zero), X1 (LR), X2 (SP), X4 (TP), X27 (g), X31 (TMP) are special.
+	// X0 (zero), X1 (LR), X2 (SP), X3 (GP), X4 (TP), X27 (g), X31 (TMP) are special.
 	var l = layout{sp: "X2", stack: 8}
 
-	// Add integer registers (X3, X5-X26, X28-30).
-	for i := 3; i < 31; i++ {
-		if i == 4 || i == 27 {
+	// Add integer registers (X5-X26, X28-30).
+	for i := 5; i < 31; i++ {
+		if i == 27 {
 			continue
 		}
 		reg := fmt.Sprintf("X%d", i)
diff --git a/libgo/go/runtime/mksizeclasses.go b/libgo/go/runtime/mksizeclasses.go
index b1b10e9..64ed844 100644
--- a/libgo/go/runtime/mksizeclasses.go
+++ b/libgo/go/runtime/mksizeclasses.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build ignore
-// +build ignore
 
 // Generate tables for small malloc size classes.
 //
diff --git a/libgo/go/runtime/mpagealloc.go b/libgo/go/runtime/mpagealloc.go
index 071f1fc27..2725e3b 100644
--- a/libgo/go/runtime/mpagealloc.go
+++ b/libgo/go/runtime/mpagealloc.go
@@ -155,7 +155,7 @@ func addrsToSummaryRange(level int, base, limit uintptr) (lo int, hi int) {
 	// upper-bound. Note that the exclusive upper bound may be within a
 	// summary at this level, meaning if we just do the obvious computation
 	// hi will end up being an inclusive upper bound. Unfortunately, just
-	// adding 1 to that is too broad since we might be on the very edge of
+	// adding 1 to that is too broad since we might be on the very edge
 	// of a summary's max page count boundary for this level
 	// (1 << levelLogPages[level]). So, make limit an inclusive upper bound
 	// then shift, then add 1, so we get an exclusive upper bound at the end.
@@ -226,6 +226,8 @@ type pageAlloc struct {
 	// are currently available. Otherwise one might iterate over unused
 	// ranges.
 	//
+	// Protected by mheapLock.
+	//
 	// TODO(mknyszek): Consider changing the definition of the bitmap
 	// such that 1 means free and 0 means in-use so that summaries and
 	// the bitmaps align better on zero-values.
@@ -261,29 +263,41 @@ type pageAlloc struct {
 	inUse addrRanges
 
 	// scav stores the scavenger state.
-	//
-	// All fields are protected by mheapLock.
 	scav struct {
+		lock mutex
+
 		// inUse is a slice of ranges of address space which have not
 		// yet been looked at by the scavenger.
+		//
+		// Protected by lock.
 		inUse addrRanges
 
 		// gen is the scavenge generation number.
+		//
+		// Protected by lock.
 		gen uint32
 
 		// reservationBytes is how large of a reservation should be made
 		// in bytes of address space for each scavenge iteration.
+		//
+		// Protected by lock.
 		reservationBytes uintptr
 
 		// released is the amount of memory released this generation.
+		//
+		// Updated atomically.
 		released uintptr
 
 		// scavLWM is the lowest (offset) address that the scavenger reached this
 		// scavenge generation.
+		//
+		// Protected by lock.
 		scavLWM offAddr
 
 		// freeHWM is the highest (offset) address of a page that was freed to
 		// the page allocator this scavenge generation.
+		//
+		// Protected by mheapLock.
 		freeHWM offAddr
 	}
 
@@ -864,17 +878,19 @@ Found:
 // Must run on the system stack because p.mheapLock must be held.
 //
 //go:systemstack
-func (p *pageAlloc) free(base, npages uintptr) {
+func (p *pageAlloc) free(base, npages uintptr, scavenged bool) {
 	assertLockHeld(p.mheapLock)
 
 	// If we're freeing pages below the p.searchAddr, update searchAddr.
 	if b := (offAddr{base}); b.lessThan(p.searchAddr) {
 		p.searchAddr = b
 	}
-	// Update the free high watermark for the scavenger.
 	limit := base + npages*pageSize - 1
-	if offLimit := (offAddr{limit}); p.scav.freeHWM.lessThan(offLimit) {
-		p.scav.freeHWM = offLimit
+	if !scavenged {
+		// Update the free high watermark for the scavenger.
+		if offLimit := (offAddr{limit}); p.scav.freeHWM.lessThan(offLimit) {
+			p.scav.freeHWM = offLimit
+		}
 	}
 	if npages == 1 {
 		// Fast path: we're clearing a single bit, and we know exactly
diff --git a/libgo/go/runtime/mpagealloc_32bit.go b/libgo/go/runtime/mpagealloc_32bit.go
index 0741546..0979e00 100644
--- a/libgo/go/runtime/mpagealloc_32bit.go
+++ b/libgo/go/runtime/mpagealloc_32bit.go
@@ -3,19 +3,12 @@
 // license that can be found in the LICENSE file.
 
 //go:build 386 || arm || mips || mipsle || wasm || (ios && arm64) || amd64p32 || armbe || m68k || mips64p32 || mips64p32le || nios2 || ppc || riscv || s390 || sh || shbe || sparc
-// +build 386 arm mips mipsle wasm ios,arm64 amd64p32 armbe m68k mips64p32 mips64p32le nios2 ppc riscv s390 sh shbe sparc
 
 // wasm is a treated as a 32-bit architecture for the purposes of the page
 // allocator, even though it has 64-bit pointers. This is because any wasm
 // pointer always has its top 32 bits as zero, so the effective heap address
 // space is only 2^32 bytes in size (see heapAddrBits).
 
-// ios/arm64 is treated as a 32-bit architecture for the purposes of the
-// page allocator, even though it has 64-bit pointers and a 33-bit address
-// space (see heapAddrBits). The 33 bit address space cannot be rounded up
-// to 64 bits because there are too many summary levels to fit in just 33
-// bits.
-
 package runtime
 
 import "unsafe"
diff --git a/libgo/go/runtime/mpagealloc_64bit.go b/libgo/go/runtime/mpagealloc_64bit.go
index 729a4eb..3d0d4c6 100644
--- a/libgo/go/runtime/mpagealloc_64bit.go
+++ b/libgo/go/runtime/mpagealloc_64bit.go
@@ -2,10 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-//go:build amd64 || (!ios && arm64) || mips64 || mips64le || ppc64 || ppc64le || riscv64 || s390x || arm64be || alpha || sparc64 || ia64
-// +build amd64 !ios,arm64 mips64 mips64le ppc64 ppc64le riscv64 s390x arm64be alpha sparc64 ia64
-
-// See mpagealloc_32bit.go for why ios/arm64 is excluded here.
+//go:build amd64 || arm64 || mips64 || mips64le || ppc64 || ppc64le || riscv64 || s390x || arm64be || alpha || sparc64 || ia64
 
 package runtime
 
diff --git a/libgo/go/runtime/mpagealloc_test.go b/libgo/go/runtime/mpagealloc_test.go
index 5d979fa..f2b82e3 100644
--- a/libgo/go/runtime/mpagealloc_test.go
+++ b/libgo/go/runtime/mpagealloc_test.go
@@ -6,6 +6,7 @@ package runtime_test
 
 import (
 	"fmt"
+	"internal/goos"
 	. "runtime"
 	"testing"
 )
@@ -165,7 +166,9 @@ func TestPageAllocGrow(t *testing.T) {
 			},
 		},
 	}
-	if PageAlloc64Bit != 0 {
+	// Disable these tests on iOS since we have a small address space.
+	// See #46860.
+	if PageAlloc64Bit != 0 && goos.IsIos == 0 {
 		tests["ExtremelyDiscontiguous"] = test{
 			chunks: []ChunkIdx{
 				BaseChunkIdx,
@@ -571,7 +574,9 @@ func TestPageAllocAlloc(t *testing.T) {
 			},
 		},
 	}
-	if PageAlloc64Bit != 0 {
+	// Disable these tests on iOS since we have a small address space.
+	// See #46860.
+	if PageAlloc64Bit != 0 && goos.IsIos == 0 {
 		const chunkIdxBigJump = 0x100000 // chunk index offset which translates to O(TiB)
 
 		// This test attempts to trigger a bug wherein we look at unmapped summary
diff --git a/libgo/go/runtime/mpagecache.go b/libgo/go/runtime/mpagecache.go
index 4b5c66d..7206e2d 100644
--- a/libgo/go/runtime/mpagecache.go
+++ b/libgo/go/runtime/mpagecache.go
@@ -123,9 +123,10 @@ func (p *pageAlloc) allocToCache() pageCache {
 	}
 	c := pageCache{}
 	ci := chunkIndex(p.searchAddr.addr()) // chunk index
+	var chunk *pallocData
 	if p.summary[len(p.summary)-1][ci] != 0 {
 		// Fast path: there's free pages at or near the searchAddr address.
-		chunk := p.chunkOf(ci)
+		chunk = p.chunkOf(ci)
 		j, _ := chunk.find(1, chunkPageIndex(p.searchAddr.addr()))
 		if j == ^uint(0) {
 			throw("bad summary data")
@@ -146,7 +147,7 @@ func (p *pageAlloc) allocToCache() pageCache {
 			return pageCache{}
 		}
 		ci := chunkIndex(addr)
-		chunk := p.chunkOf(ci)
+		chunk = p.chunkOf(ci)
 		c = pageCache{
 			base:  alignDown(addr, 64*pageSize),
 			cache: ^chunk.pages64(chunkPageIndex(addr)),
@@ -154,8 +155,11 @@ func (p *pageAlloc) allocToCache() pageCache {
 		}
 	}
 
-	// Set the bits as allocated and clear the scavenged bits.
-	p.allocRange(c.base, pageCachePages)
+	// Set the page bits as allocated and clear the scavenged bits, but
+	// be careful to only set and clear the relevant bits.
+	cpi := chunkPageIndex(c.base)
+	chunk.allocPages64(cpi, c.cache)
+	chunk.scavenged.clearBlock64(cpi, c.cache&c.scav /* free and scavenged */)
 
 	// Update as an allocation, but note that it's not contiguous.
 	p.update(c.base, pageCachePages, false, true)
diff --git a/libgo/go/runtime/mpagecache_test.go b/libgo/go/runtime/mpagecache_test.go
index 2ed0c0a..6cb0620 100644
--- a/libgo/go/runtime/mpagecache_test.go
+++ b/libgo/go/runtime/mpagecache_test.go
@@ -5,6 +5,7 @@
 package runtime_test
 
 import (
+	"internal/goos"
 	"math/rand"
 	. "runtime"
 	"testing"
@@ -261,17 +262,18 @@ func TestPageAllocAllocToCache(t *testing.T) {
 		t.Skip("skipping because virtual memory is limited; see #36210")
 	}
 	type test struct {
-		before map[ChunkIdx][]BitRange
-		scav   map[ChunkIdx][]BitRange
-		hits   []PageCache // expected base addresses and patterns
-		after  map[ChunkIdx][]BitRange
+		beforeAlloc map[ChunkIdx][]BitRange
+		beforeScav  map[ChunkIdx][]BitRange
+		hits        []PageCache // expected base addresses and patterns
+		afterAlloc  map[ChunkIdx][]BitRange
+		afterScav   map[ChunkIdx][]BitRange
 	}
 	tests := map[string]test{
 		"AllFree": {
-			before: map[ChunkIdx][]BitRange{
+			beforeAlloc: map[ChunkIdx][]BitRange{
 				BaseChunkIdx: {},
 			},
-			scav: map[ChunkIdx][]BitRange{
+			beforeScav: map[ChunkIdx][]BitRange{
 				BaseChunkIdx: {{1, 1}, {64, 64}},
 			},
 			hits: []PageCache{
@@ -280,17 +282,17 @@ func TestPageAllocAllocToCache(t *testing.T) {
 				NewPageCache(PageBase(BaseChunkIdx, 128), ^uint64(0), 0),
 				NewPageCache(PageBase(BaseChunkIdx, 192), ^uint64(0), 0),
 			},
-			after: map[ChunkIdx][]BitRange{
+			afterAlloc: map[ChunkIdx][]BitRange{
 				BaseChunkIdx: {{0, 256}},
 			},
 		},
 		"ManyArena": {
-			before: map[ChunkIdx][]BitRange{
+			beforeAlloc: map[ChunkIdx][]BitRange{
 				BaseChunkIdx:     {{0, PallocChunkPages}},
 				BaseChunkIdx + 1: {{0, PallocChunkPages}},
 				BaseChunkIdx + 2: {{0, PallocChunkPages - 64}},
 			},
-			scav: map[ChunkIdx][]BitRange{
+			beforeScav: map[ChunkIdx][]BitRange{
 				BaseChunkIdx:     {{0, PallocChunkPages}},
 				BaseChunkIdx + 1: {{0, PallocChunkPages}},
 				BaseChunkIdx + 2: {},
@@ -298,46 +300,50 @@ func TestPageAllocAllocToCache(t *testing.T) {
 			hits: []PageCache{
 				NewPageCache(PageBase(BaseChunkIdx+2, PallocChunkPages-64), ^uint64(0), 0),
 			},
-			after: map[ChunkIdx][]BitRange{
+			afterAlloc: map[ChunkIdx][]BitRange{
 				BaseChunkIdx:     {{0, PallocChunkPages}},
 				BaseChunkIdx + 1: {{0, PallocChunkPages}},
 				BaseChunkIdx + 2: {{0, PallocChunkPages}},
 			},
 		},
 		"NotContiguous": {
-			before: map[ChunkIdx][]BitRange{
+			beforeAlloc: map[ChunkIdx][]BitRange{
 				BaseChunkIdx:        {{0, PallocChunkPages}},
 				BaseChunkIdx + 0xff: {{0, 0}},
 			},
-			scav: map[ChunkIdx][]BitRange{
+			beforeScav: map[ChunkIdx][]BitRange{
 				BaseChunkIdx:        {{0, PallocChunkPages}},
 				BaseChunkIdx + 0xff: {{31, 67}},
 			},
 			hits: []PageCache{
 				NewPageCache(PageBase(BaseChunkIdx+0xff, 0), ^uint64(0), ((uint64(1)<<33)-1)<<31),
 			},
-			after: map[ChunkIdx][]BitRange{
+			afterAlloc: map[ChunkIdx][]BitRange{
 				BaseChunkIdx:        {{0, PallocChunkPages}},
 				BaseChunkIdx + 0xff: {{0, 64}},
 			},
+			afterScav: map[ChunkIdx][]BitRange{
+				BaseChunkIdx:        {{0, PallocChunkPages}},
+				BaseChunkIdx + 0xff: {{64, 34}},
+			},
 		},
 		"First": {
-			before: map[ChunkIdx][]BitRange{
+			beforeAlloc: map[ChunkIdx][]BitRange{
 				BaseChunkIdx: {{0, 32}, {33, 31}, {96, 32}},
 			},
-			scav: map[ChunkIdx][]BitRange{
+			beforeScav: map[ChunkIdx][]BitRange{
 				BaseChunkIdx: {{1, 4}, {31, 5}, {66, 2}},
 			},
 			hits: []PageCache{
 				NewPageCache(PageBase(BaseChunkIdx, 0), 1<<32, 1<<32),
 				NewPageCache(PageBase(BaseChunkIdx, 64), (uint64(1)<<32)-1, 0x3<<2),
 			},
-			after: map[ChunkIdx][]BitRange{
+			afterAlloc: map[ChunkIdx][]BitRange{
 				BaseChunkIdx: {{0, 128}},
 			},
 		},
 		"Fail": {
-			before: map[ChunkIdx][]BitRange{
+			beforeAlloc: map[ChunkIdx][]BitRange{
 				BaseChunkIdx: {{0, PallocChunkPages}},
 			},
 			hits: []PageCache{
@@ -345,12 +351,31 @@ func TestPageAllocAllocToCache(t *testing.T) {
 				NewPageCache(0, 0, 0),
 				NewPageCache(0, 0, 0),
 			},
-			after: map[ChunkIdx][]BitRange{
+			afterAlloc: map[ChunkIdx][]BitRange{
 				BaseChunkIdx: {{0, PallocChunkPages}},
 			},
 		},
+		"RetainScavBits": {
+			beforeAlloc: map[ChunkIdx][]BitRange{
+				BaseChunkIdx: {{0, 1}, {10, 2}},
+			},
+			beforeScav: map[ChunkIdx][]BitRange{
+				BaseChunkIdx: {{0, 4}, {11, 1}},
+			},
+			hits: []PageCache{
+				NewPageCache(PageBase(BaseChunkIdx, 0), ^uint64(0x1|(0x3<<10)), 0x7<<1),
+			},
+			afterAlloc: map[ChunkIdx][]BitRange{
+				BaseChunkIdx: {{0, 64}},
+			},
+			afterScav: map[ChunkIdx][]BitRange{
+				BaseChunkIdx: {{0, 1}, {11, 1}},
+			},
+		},
 	}
-	if PageAlloc64Bit != 0 {
+	// Disable these tests on iOS since we have a small address space.
+	// See #46860.
+	if PageAlloc64Bit != 0 && goos.IsIos == 0 {
 		const chunkIdxBigJump = 0x100000 // chunk index offset which translates to O(TiB)
 
 		// This test is similar to the one with the same name for
@@ -359,11 +384,11 @@ func TestPageAllocAllocToCache(t *testing.T) {
 		sumsPerPhysPage := ChunkIdx(PhysPageSize / PallocSumBytes)
 		baseChunkIdx := BaseChunkIdx &^ (sumsPerPhysPage - 1)
 		tests["DiscontiguousMappedSumBoundary"] = test{
-			before: map[ChunkIdx][]BitRange{
+			beforeAlloc: map[ChunkIdx][]BitRange{
 				baseChunkIdx + sumsPerPhysPage - 1: {{0, PallocChunkPages - 1}},
 				baseChunkIdx + chunkIdxBigJump:     {{1, PallocChunkPages - 1}},
 			},
-			scav: map[ChunkIdx][]BitRange{
+			beforeScav: map[ChunkIdx][]BitRange{
 				baseChunkIdx + sumsPerPhysPage - 1: {},
 				baseChunkIdx + chunkIdxBigJump:     {},
 			},
@@ -372,7 +397,7 @@ func TestPageAllocAllocToCache(t *testing.T) {
 				NewPageCache(PageBase(baseChunkIdx+chunkIdxBigJump, 0), 1, 0),
 				NewPageCache(0, 0, 0),
 			},
-			after: map[ChunkIdx][]BitRange{
+			afterAlloc: map[ChunkIdx][]BitRange{
 				baseChunkIdx + sumsPerPhysPage - 1: {{0, PallocChunkPages}},
 				baseChunkIdx + chunkIdxBigJump:     {{0, PallocChunkPages}},
 			},
@@ -381,7 +406,7 @@ func TestPageAllocAllocToCache(t *testing.T) {
 	for name, v := range tests {
 		v := v
 		t.Run(name, func(t *testing.T) {
-			b := NewPageAlloc(v.before, v.scav)
+			b := NewPageAlloc(v.beforeAlloc, v.beforeScav)
 			defer FreePageAlloc(b)
 
 			for _, expect := range v.hits {
@@ -390,7 +415,7 @@ func TestPageAllocAllocToCache(t *testing.T) {
 					return
 				}
 			}
-			want := NewPageAlloc(v.after, v.scav)
+			want := NewPageAlloc(v.afterAlloc, v.afterScav)
 			defer FreePageAlloc(want)
 
 			checkPageAlloc(t, want, b)
diff --git a/libgo/go/runtime/mpallocbits.go b/libgo/go/runtime/mpallocbits.go
index ff11230..f63164b 100644
--- a/libgo/go/runtime/mpallocbits.go
+++ b/libgo/go/runtime/mpallocbits.go
@@ -57,6 +57,12 @@ func (b *pageBits) setAll() {
 	}
 }
 
+// setBlock64 sets the 64-bit aligned block of bits containing the i'th bit that
+// are set in v.
+func (b *pageBits) setBlock64(i uint, v uint64) {
+	b[i/64] |= v
+}
+
 // clear clears bit i of pageBits.
 func (b *pageBits) clear(i uint) {
 	b[i/64] &^= 1 << (i % 64)
@@ -93,6 +99,12 @@ func (b *pageBits) clearAll() {
 	}
 }
 
+// clearBlock64 clears the 64-bit aligned block of bits containing the i'th bit that
+// are set in v.
+func (b *pageBits) clearBlock64(i uint, v uint64) {
+	b[i/64] &^= v
+}
+
 // popcntRange counts the number of set bits in the
 // range [i, i+n).
 func (b *pageBits) popcntRange(i, n uint) (s uint) {
@@ -367,6 +379,12 @@ func (b *pallocBits) pages64(i uint) uint64 {
 	return (*pageBits)(b).block64(i)
 }
 
+// allocPages64 allocates a 64-bit block of 64 pages aligned to 64 pages according
+// to the bits set in alloc. The block set is the one containing the i'th page.
+func (b *pallocBits) allocPages64(i uint, alloc uint64) {
+	(*pageBits)(b).setBlock64(i, alloc)
+}
+
 // findBitRange64 returns the bit index of the first set of
 // n consecutive 1 bits. If no consecutive set of 1 bits of
 // size n may be found in c, then it returns an integer >= 64.
diff --git a/libgo/go/runtime/mprof.go b/libgo/go/runtime/mprof.go
index 9b11597..2c34e70 100644
--- a/libgo/go/runtime/mprof.go
+++ b/libgo/go/runtime/mprof.go
@@ -152,7 +152,7 @@ var (
 	xbuckets    *bucket // mutex profile buckets
 	sbuckets    *bucket // pre-symbolization profile buckets (stacks fixed up)
 	freebuckets *bucket // freelist of unused fixed up profile buckets
-	buckhash    *[179999]*bucket
+	buckhash    *[buckHashSize]*bucket
 	bucketmem   uintptr
 
 	mProf struct {
diff --git a/libgo/go/runtime/mranges.go b/libgo/go/runtime/mranges.go
index 84a2c06..e0be1e1 100644
--- a/libgo/go/runtime/mranges.go
+++ b/libgo/go/runtime/mranges.go
@@ -10,7 +10,7 @@
 package runtime
 
 import (
-	"runtime/internal/sys"
+	"internal/goarch"
 	"unsafe"
 )
 
@@ -167,7 +167,7 @@ func (a *addrRanges) init(sysStat *sysMemStat) {
 	ranges := (*notInHeapSlice)(unsafe.Pointer(&a.ranges))
 	ranges.len = 0
 	ranges.cap = 16
-	ranges.array = (*notInHeap)(persistentalloc(unsafe.Sizeof(addrRange{})*uintptr(ranges.cap), sys.PtrSize, sysStat))
+	ranges.array = (*notInHeap)(persistentalloc(unsafe.Sizeof(addrRange{})*uintptr(ranges.cap), goarch.PtrSize, sysStat))
 	a.sysStat = sysStat
 	a.totalBytes = 0
 }
@@ -294,7 +294,7 @@ func (a *addrRanges) add(r addrRange) {
 			ranges := (*notInHeapSlice)(unsafe.Pointer(&a.ranges))
 			ranges.len = len(oldRanges) + 1
 			ranges.cap = cap(oldRanges) * 2
-			ranges.array = (*notInHeap)(persistentalloc(unsafe.Sizeof(addrRange{})*uintptr(ranges.cap), sys.PtrSize, a.sysStat))
+			ranges.array = (*notInHeap)(persistentalloc(unsafe.Sizeof(addrRange{})*uintptr(ranges.cap), goarch.PtrSize, a.sysStat))
 
 			// Copy in the old array, but make space for the new range.
 			copy(a.ranges[:i], oldRanges[:i])
@@ -364,7 +364,7 @@ func (a *addrRanges) cloneInto(b *addrRanges) {
 		ranges := (*notInHeapSlice)(unsafe.Pointer(&b.ranges))
 		ranges.len = 0
 		ranges.cap = cap(a.ranges)
-		ranges.array = (*notInHeap)(persistentalloc(unsafe.Sizeof(addrRange{})*uintptr(ranges.cap), sys.PtrSize, b.sysStat))
+		ranges.array = (*notInHeap)(persistentalloc(unsafe.Sizeof(addrRange{})*uintptr(ranges.cap), goarch.PtrSize, b.sysStat))
 	}
 	b.ranges = b.ranges[:len(a.ranges)]
 	b.totalBytes = a.totalBytes
diff --git a/libgo/go/runtime/msan/msan.go b/libgo/go/runtime/msan/msan.go
index c81577d..f1bf4e1 100644
--- a/libgo/go/runtime/msan/msan.go
+++ b/libgo/go/runtime/msan/msan.go
@@ -2,8 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build msan,linux
-// +build amd64 arm64
+//go:build msan && linux && (amd64 || arm64)
 
 package msan
 
diff --git a/libgo/go/runtime/msan0.go b/libgo/go/runtime/msan0.go
index b1096a6..2f5fd2d 100644
--- a/libgo/go/runtime/msan0.go
+++ b/libgo/go/runtime/msan0.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build !msan
-// +build !msan
 
 // Dummy MSan support API, used when not built with -msan.
 
diff --git a/libgo/go/runtime/mspanset.go b/libgo/go/runtime/mspanset.go
index 04ec800..893019a 100644
--- a/libgo/go/runtime/mspanset.go
+++ b/libgo/go/runtime/mspanset.go
@@ -6,8 +6,8 @@ package runtime
 
 import (
 	"internal/cpu"
+	"internal/goarch"
 	"runtime/internal/atomic"
-	"runtime/internal/sys"
 	"unsafe"
 )
 
@@ -82,7 +82,7 @@ func (b *spanSet) push(s *mspan) {
 retry:
 	if top < spineLen {
 		spine := atomic.Loadp(unsafe.Pointer(&b.spine))
-		blockp := add(spine, sys.PtrSize*top)
+		blockp := add(spine, goarch.PtrSize*top)
 		block = (*spanSetBlock)(atomic.Loadp(blockp))
 	} else {
 		// Add a new block to the spine, potentially growing
@@ -102,11 +102,11 @@ retry:
 			if newCap == 0 {
 				newCap = spanSetInitSpineCap
 			}
-			newSpine := persistentalloc(newCap*sys.PtrSize, cpu.CacheLineSize, &memstats.gcMiscSys)
+			newSpine := persistentalloc(newCap*goarch.PtrSize, cpu.CacheLineSize, &memstats.gcMiscSys)
 			if b.spineCap != 0 {
 				// Blocks are allocated off-heap, so
 				// no write barriers.
-				memmove(newSpine, b.spine, b.spineCap*sys.PtrSize)
+				memmove(newSpine, b.spine, b.spineCap*goarch.PtrSize)
 			}
 			// Spine is allocated off-heap, so no write barrier.
 			atomic.StorepNoWB(unsafe.Pointer(&b.spine), newSpine)
@@ -124,7 +124,7 @@ retry:
 		block = spanSetBlockPool.alloc()
 
 		// Add it to the spine.
-		blockp := add(b.spine, sys.PtrSize*top)
+		blockp := add(b.spine, goarch.PtrSize*top)
 		// Blocks are allocated off-heap, so no write barrier.
 		atomic.StorepNoWB(blockp, unsafe.Pointer(block))
 		atomic.Storeuintptr(&b.spineLen, spineLen+1)
@@ -181,7 +181,7 @@ claimLoop:
 	// grows monotonically and we've already verified it, we'll definitely
 	// be reading from a valid block.
 	spine := atomic.Loadp(unsafe.Pointer(&b.spine))
-	blockp := add(spine, sys.PtrSize*uintptr(top))
+	blockp := add(spine, goarch.PtrSize*uintptr(top))
 
 	// Given that the spine length is correct, we know we will never
 	// see a nil block here, since the length is always updated after
@@ -241,7 +241,7 @@ func (b *spanSet) reset() {
 		// since it may be pushed into again. In order to avoid leaking
 		// memory since we're going to reset the head and tail, clean
 		// up such a block now, if it exists.
-		blockp := (**spanSetBlock)(add(b.spine, sys.PtrSize*uintptr(top)))
+		blockp := (**spanSetBlock)(add(b.spine, goarch.PtrSize*uintptr(top)))
 		block := *blockp
 		if block != nil {
 			// Sanity check the popped value.
diff --git a/libgo/go/runtime/mstats.go b/libgo/go/runtime/mstats.go
index 535b78a1..adfe302 100644
--- a/libgo/go/runtime/mstats.go
+++ b/libgo/go/runtime/mstats.go
@@ -7,8 +7,8 @@
 package runtime
 
 import (
+	"internal/goarch"
 	"runtime/internal/atomic"
-	"runtime/internal/sys"
 	"unsafe"
 )
 
@@ -712,7 +712,7 @@ type heapStatsDelta struct {
 
 	// Add a uint32 to ensure this struct is a multiple of 8 bytes in size.
 	// Only necessary on 32-bit platforms.
-	_ [(sys.PtrSize / 4) % 2]uint32
+	_ [(goarch.PtrSize / 4) % 2]uint32
 }
 
 // merge adds in the deltas from b into a.
@@ -789,7 +789,15 @@ type consistentHeapStats struct {
 //
 // The caller's P must not change between acquire and
 // release. This also means that the caller should not
-// acquire a P or release its P in between.
+// acquire a P or release its P in between. A P also must
+// not acquire a given consistentHeapStats if it hasn't
+// yet released it.
+//
+// nosplit because a stack growth in this function could
+// lead to a stack allocation that could reenter the
+// function.
+//
+//go:nosplit
 func (m *consistentHeapStats) acquire() *heapStatsDelta {
 	if pp := getg().m.p.ptr(); pp != nil {
 		seq := atomic.Xadd(&pp.statsSeq, 1)
@@ -813,6 +821,12 @@ func (m *consistentHeapStats) acquire() *heapStatsDelta {
 // The caller's P must not change between acquire and
 // release. This also means that the caller should not
 // acquire a P or release its P in between.
+//
+// nosplit because a stack growth in this function could
+// lead to a stack allocation that causes another acquire
+// before this operation has completed.
+//
+//go:nosplit
 func (m *consistentHeapStats) release() {
 	if pp := getg().m.p.ptr(); pp != nil {
 		seq := atomic.Xadd(&pp.statsSeq, 1)
diff --git a/libgo/go/runtime/mwbbuf.go b/libgo/go/runtime/mwbbuf.go
index b8d4fc2..a92b545 100644
--- a/libgo/go/runtime/mwbbuf.go
+++ b/libgo/go/runtime/mwbbuf.go
@@ -23,8 +23,8 @@
 package runtime
 
 import (
+	"internal/goarch"
 	"runtime/internal/atomic"
-	"runtime/internal/sys"
 	"unsafe"
 )
 
@@ -145,7 +145,7 @@ func (b *wbBuf) putFast(old, new uintptr) bool {
 	p := (*[2]uintptr)(unsafe.Pointer(b.next))
 	p[0] = old
 	p[1] = new
-	b.next += 2 * sys.PtrSize
+	b.next += 2 * goarch.PtrSize
 	return b.next != b.end
 }
 
diff --git a/libgo/go/runtime/nbpipe_pipe.go b/libgo/go/runtime/nbpipe_pipe.go
index b17257e..408e1ec 100644
--- a/libgo/go/runtime/nbpipe_pipe.go
+++ b/libgo/go/runtime/nbpipe_pipe.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build aix || darwin
-// +build aix darwin
 
 package runtime
 
diff --git a/libgo/go/runtime/nbpipe_pipe2.go b/libgo/go/runtime/nbpipe_pipe2.go
index 34ead01..5805cab 100644
--- a/libgo/go/runtime/nbpipe_pipe2.go
+++ b/libgo/go/runtime/nbpipe_pipe2.go
@@ -2,8 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-//go:build freebsd || hurd || linux || netbsd || openbsd || solaris
-// +build freebsd hurd linux netbsd openbsd solaris
+//go:build dragonfly || freebsd || hurd || linux || netbsd || openbsd || solaris
 
 package runtime
 
diff --git a/libgo/go/runtime/nbpipe_test.go b/libgo/go/runtime/nbpipe_test.go
index 22d41cd..c15cb14 100644
--- a/libgo/go/runtime/nbpipe_test.go
+++ b/libgo/go/runtime/nbpipe_test.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build aix || darwin || dragonfly || freebsd || hurd || linux || netbsd || openbsd || solaris
-// +build aix darwin dragonfly freebsd hurd linux netbsd openbsd solaris
 
 package runtime_test
 
diff --git a/libgo/go/runtime/netpoll.go b/libgo/go/runtime/netpoll.go
index 82380f6..dc0511d 100644
--- a/libgo/go/runtime/netpoll.go
+++ b/libgo/go/runtime/netpoll.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build aix || darwin || dragonfly || freebsd || hurd || (js && wasm) || linux || netbsd || openbsd || solaris || windows
-// +build aix darwin dragonfly freebsd hurd js,wasm linux netbsd openbsd solaris windows
 
 package runtime
 
@@ -75,30 +74,99 @@ const pollBlockSize = 4 * 1024
 //go:notinheap
 type pollDesc struct {
 	link *pollDesc // in pollcache, protected by pollcache.lock
+	fd   uintptr   // constant for pollDesc usage lifetime
+
+	// atomicInfo holds bits from closing, rd, and wd,
+	// which are only ever written while holding the lock,
+	// summarized for use by netpollcheckerr,
+	// which cannot acquire the lock.
+	// After writing these fields under lock in a way that
+	// might change the summary, code must call publishInfo
+	// before releasing the lock.
+	// Code that changes fields and then calls netpollunblock
+	// (while still holding the lock) must call publishInfo
+	// before calling netpollunblock, because publishInfo is what
+	// stops netpollblock from blocking anew
+	// (by changing the result of netpollcheckerr).
+	// atomicInfo also holds the eventErr bit,
+	// recording whether a poll event on the fd got an error;
+	// atomicInfo is the only source of truth for that bit.
+	atomicInfo atomic.Uint32 // atomic pollInfo
+
+	// rg, wg are accessed atomically and hold g pointers.
+	// (Using atomic.Uintptr here is similar to using guintptr elsewhere.)
+	rg atomic.Uintptr // pdReady, pdWait, G waiting for read or nil
+	wg atomic.Uintptr // pdReady, pdWait, G waiting for write or nil
 
-	// The lock protects pollOpen, pollSetDeadline, pollUnblock and deadlineimpl operations.
-	// This fully covers seq, rt and wt variables. fd is constant throughout the PollDesc lifetime.
-	// pollReset, pollWait, pollWaitCanceled and runtime·netpollready (IO readiness notification)
-	// proceed w/o taking the lock. So closing, everr, rg, rd, wg and wd are manipulated
-	// in a lock-free way by all operations.
-	// NOTE(dvyukov): the following code uses uintptr to store *g (rg/wg),
-	// that will blow up when GC starts moving objects.
 	lock    mutex // protects the following fields
-	fd      uintptr
 	closing bool
-	everr   bool      // marks event scanning error happened
 	user    uint32    // user settable cookie
 	rseq    uintptr   // protects from stale read timers
-	rg      uintptr   // pdReady, pdWait, G waiting for read or nil
 	rt      timer     // read deadline timer (set if rt.f != nil)
-	rd      int64     // read deadline
+	rd      int64     // read deadline (a nanotime in the future, -1 when expired)
 	wseq    uintptr   // protects from stale write timers
-	wg      uintptr   // pdReady, pdWait, G waiting for write or nil
 	wt      timer     // write deadline timer
-	wd      int64     // write deadline
+	wd      int64     // write deadline (a nanotime in the future, -1 when expired)
 	self    *pollDesc // storage for indirect interface. See (*pollDesc).makeArg.
 }
 
+// pollInfo is the bits needed by netpollcheckerr, stored atomically,
+// mostly duplicating state that is manipulated under lock in pollDesc.
+// The one exception is the pollEventErr bit, which is maintained only
+// in the pollInfo.
+type pollInfo uint32
+
+const (
+	pollClosing = 1 << iota
+	pollEventErr
+	pollExpiredReadDeadline
+	pollExpiredWriteDeadline
+)
+
+func (i pollInfo) closing() bool              { return i&pollClosing != 0 }
+func (i pollInfo) eventErr() bool             { return i&pollEventErr != 0 }
+func (i pollInfo) expiredReadDeadline() bool  { return i&pollExpiredReadDeadline != 0 }
+func (i pollInfo) expiredWriteDeadline() bool { return i&pollExpiredWriteDeadline != 0 }
+
+// info returns the pollInfo corresponding to pd.
+func (pd *pollDesc) info() pollInfo {
+	return pollInfo(pd.atomicInfo.Load())
+}
+
+// publishInfo updates pd.atomicInfo (returned by pd.info)
+// using the other values in pd.
+// It must be called while holding pd.lock,
+// and it must be called after changing anything
+// that might affect the info bits.
+// In practice this means after changing closing
+// or changing rd or wd from < 0 to >= 0.
+func (pd *pollDesc) publishInfo() {
+	var info uint32
+	if pd.closing {
+		info |= pollClosing
+	}
+	if pd.rd < 0 {
+		info |= pollExpiredReadDeadline
+	}
+	if pd.wd < 0 {
+		info |= pollExpiredWriteDeadline
+	}
+
+	// Set all of x except the pollEventErr bit.
+	x := pd.atomicInfo.Load()
+	for !pd.atomicInfo.CompareAndSwap(x, (x&pollEventErr)|info) {
+		x = pd.atomicInfo.Load()
+	}
+}
+
+// setEventErr sets the result of pd.info().eventErr() to b.
+func (pd *pollDesc) setEventErr(b bool) {
+	x := pd.atomicInfo.Load()
+	for (x&pollEventErr != 0) != b && !pd.atomicInfo.CompareAndSwap(x, x^pollEventErr) {
+		x = pd.atomicInfo.Load()
+	}
+}
+
 type pollCache struct {
 	lock  mutex
 	first *pollDesc
@@ -150,22 +218,25 @@ func poll_runtime_isPollServerDescriptor(fd uintptr) bool {
 func poll_runtime_pollOpen(fd uintptr) (uintptr, int) {
 	pd := pollcache.alloc()
 	lock(&pd.lock)
-	if pd.wg != 0 && pd.wg != pdReady {
+	wg := pd.wg.Load()
+	if wg != 0 && wg != pdReady {
 		throw("runtime: blocked write on free polldesc")
 	}
-	if pd.rg != 0 && pd.rg != pdReady {
+	rg := pd.rg.Load()
+	if rg != 0 && rg != pdReady {
 		throw("runtime: blocked read on free polldesc")
 	}
 	pd.fd = fd
 	pd.closing = false
-	pd.everr = false
+	pd.setEventErr(false)
 	pd.rseq++
-	pd.rg = 0
+	pd.rg.Store(0)
 	pd.rd = 0
 	pd.wseq++
-	pd.wg = 0
+	pd.wg.Store(0)
 	pd.wd = 0
 	pd.self = pd
+	pd.publishInfo()
 	unlock(&pd.lock)
 
 	errno := netpollopen(fd, pd)
@@ -182,10 +253,12 @@ func poll_runtime_pollClose(ctx uintptr) {
 	if !pd.closing {
 		throw("runtime: close polldesc w/o unblock")
 	}
-	if pd.wg != 0 && pd.wg != pdReady {
+	wg := pd.wg.Load()
+	if wg != 0 && wg != pdReady {
 		throw("runtime: blocked write on closing polldesc")
 	}
-	if pd.rg != 0 && pd.rg != pdReady {
+	rg := pd.rg.Load()
+	if rg != 0 && rg != pdReady {
 		throw("runtime: blocked read on closing polldesc")
 	}
 	netpollclose(pd.fd)
@@ -210,9 +283,9 @@ func poll_runtime_pollReset(ctx uintptr, mode int) int {
 		return errcode
 	}
 	if mode == 'r' {
-		pd.rg = 0
+		pd.rg.Store(0)
 	} else if mode == 'w' {
-		pd.wg = 0
+		pd.wg.Store(0)
 	}
 	return pollNoError
 }
@@ -277,6 +350,7 @@ func poll_runtime_pollSetDeadline(ctx uintptr, d int64, mode int) {
 	if mode == 'w' || mode == 'r'+'w' {
 		pd.wd = d
 	}
+	pd.publishInfo()
 	combo := pd.rd > 0 && pd.rd == pd.wd
 	rtf := netpollReadDeadline
 	if combo {
@@ -318,15 +392,13 @@ func poll_runtime_pollSetDeadline(ctx uintptr, d int64, mode int) {
 		}
 	}
 	// If we set the new deadline in the past, unblock currently pending IO if any.
+	// Note that pd.publishInfo has already been called, above, immediately after modifying rd and wd.
 	var rg, wg *g
-	if pd.rd < 0 || pd.wd < 0 {
-		atomic.StorepNoWB(noescape(unsafe.Pointer(&wg)), nil) // full memory barrier between stores to rd/wd and load of rg/wg in netpollunblock
-		if pd.rd < 0 {
-			rg = netpollunblock(pd, 'r', false)
-		}
-		if pd.wd < 0 {
-			wg = netpollunblock(pd, 'w', false)
-		}
+	if pd.rd < 0 {
+		rg = netpollunblock(pd, 'r', false)
+	}
+	if pd.wd < 0 {
+		wg = netpollunblock(pd, 'w', false)
 	}
 	unlock(&pd.lock)
 	if rg != nil {
@@ -348,7 +420,7 @@ func poll_runtime_pollUnblock(ctx uintptr) {
 	pd.rseq++
 	pd.wseq++
 	var rg, wg *g
-	atomic.StorepNoWB(noescape(unsafe.Pointer(&rg)), nil) // full memory barrier between store to closing and read of rg/wg in netpollunblock
+	pd.publishInfo()
 	rg = netpollunblock(pd, 'r', false)
 	wg = netpollunblock(pd, 'w', false)
 	if pd.rt.f != nil {
@@ -393,16 +465,17 @@ func netpollready(toRun *gList, pd *pollDesc, mode int32) {
 }
 
 func netpollcheckerr(pd *pollDesc, mode int32) int {
-	if pd.closing {
+	info := pd.info()
+	if info.closing() {
 		return pollErrClosing
 	}
-	if (mode == 'r' && pd.rd < 0) || (mode == 'w' && pd.wd < 0) {
+	if (mode == 'r' && info.expiredReadDeadline()) || (mode == 'w' && info.expiredWriteDeadline()) {
 		return pollErrTimeout
 	}
 	// Report an event scanning error only on a read event.
 	// An error on a write event will be captured in a subsequent
 	// write call that is able to report a more specific error.
-	if mode == 'r' && pd.everr {
+	if mode == 'r' && info.eventErr() {
 		return pollErrNotPollable
 	}
 	return pollNoError
@@ -426,6 +499,8 @@ func netpollgoready(gp *g, traceskip int) {
 
 // returns true if IO is ready, or false if timedout or closed
 // waitio - wait only for completed IO, ignore errors
+// Concurrent calls to netpollblock in the same mode are forbidden, as pollDesc
+// can hold only a single waiting goroutine for each mode.
 func netpollblock(pd *pollDesc, mode int32, waitio bool) bool {
 	gpp := &pd.rg
 	if mode == 'w' {
@@ -434,27 +509,29 @@ func netpollblock(pd *pollDesc, mode int32, waitio bool) bool {
 
 	// set the gpp semaphore to pdWait
 	for {
-		old := *gpp
-		if old == pdReady {
-			*gpp = 0
+		// Consume notification if already ready.
+		if gpp.CompareAndSwap(pdReady, 0) {
 			return true
 		}
-		if old != 0 {
-			throw("runtime: double wait")
-		}
-		if atomic.Casuintptr(gpp, 0, pdWait) {
+		if gpp.CompareAndSwap(0, pdWait) {
 			break
 		}
+
+		// Double check that this isn't corrupt; otherwise we'd loop
+		// forever.
+		if v := gpp.Load(); v != pdReady && v != 0 {
+			throw("runtime: double wait")
+		}
 	}
 
 	// need to recheck error states after setting gpp to pdWait
 	// this is necessary because runtime_pollUnblock/runtime_pollSetDeadline/deadlineimpl
-	// do the opposite: store to closing/rd/wd, membarrier, load of rg/wg
-	if waitio || netpollcheckerr(pd, mode) == 0 {
+	// do the opposite: store to closing/rd/wd, publishInfo, load of rg/wg
+	if waitio || netpollcheckerr(pd, mode) == pollNoError {
 		gopark(netpollblockcommit, unsafe.Pointer(gpp), waitReasonIOWait, traceEvGoBlockNet, 5)
 	}
 	// be careful to not lose concurrent pdReady notification
-	old := atomic.Xchguintptr(gpp, 0)
+	old := gpp.Swap(0)
 	if old > pdWait {
 		throw("runtime: corrupted polldesc")
 	}
@@ -468,7 +545,7 @@ func netpollunblock(pd *pollDesc, mode int32, ioready bool) *g {
 	}
 
 	for {
-		old := *gpp
+		old := gpp.Load()
 		if old == pdReady {
 			return nil
 		}
@@ -481,7 +558,7 @@ func netpollunblock(pd *pollDesc, mode int32, ioready bool) *g {
 		if ioready {
 			new = pdReady
 		}
-		if atomic.Casuintptr(gpp, old, new) {
+		if gpp.CompareAndSwap(old, new) {
 			if old == pdWait {
 				old = 0
 			}
@@ -509,7 +586,7 @@ func netpolldeadlineimpl(pd *pollDesc, seq uintptr, read, write bool) {
 			throw("runtime: inconsistent read deadline")
 		}
 		pd.rd = -1
-		atomic.StorepNoWB(unsafe.Pointer(&pd.rt.f), nil) // full memory barrier between store to rd and load of rg in netpollunblock
+		pd.publishInfo()
 		rg = netpollunblock(pd, 'r', false)
 	}
 	var wg *g
@@ -518,7 +595,7 @@ func netpolldeadlineimpl(pd *pollDesc, seq uintptr, read, write bool) {
 			throw("runtime: inconsistent write deadline")
 		}
 		pd.wd = -1
-		atomic.StorepNoWB(unsafe.Pointer(&pd.wt.f), nil) // full memory barrier between store to wd and load of wg in netpollunblock
+		pd.publishInfo()
 		wg = netpollunblock(pd, 'w', false)
 	}
 	unlock(&pd.lock)
@@ -530,15 +607,15 @@ func netpolldeadlineimpl(pd *pollDesc, seq uintptr, read, write bool) {
 	}
 }
 
-func netpollDeadline(arg interface{}, seq uintptr) {
+func netpollDeadline(arg any, seq uintptr) {
 	netpolldeadlineimpl(arg.(*pollDesc), seq, true, true)
 }
 
-func netpollReadDeadline(arg interface{}, seq uintptr) {
+func netpollReadDeadline(arg any, seq uintptr) {
 	netpolldeadlineimpl(arg.(*pollDesc), seq, true, false)
 }
 
-func netpollWriteDeadline(arg interface{}, seq uintptr) {
+func netpollWriteDeadline(arg any, seq uintptr) {
 	netpolldeadlineimpl(arg.(*pollDesc), seq, false, true)
 }
 
@@ -571,7 +648,7 @@ func (c *pollCache) alloc() *pollDesc {
 // a conversion requires an allocation because pointers to
 // go:notinheap types (which pollDesc is) must be stored
 // in interfaces indirectly. See issue 42076.
-func (pd *pollDesc) makeArg() (i interface{}) {
+func (pd *pollDesc) makeArg() (i any) {
 	x := (*eface)(unsafe.Pointer(&i))
 	x._type = pdType
 	x.data = unsafe.Pointer(&pd.self)
@@ -579,6 +656,6 @@ func (pd *pollDesc) makeArg() (i interface{}) {
 }
 
 var (
-	pdEface interface{} = (*pollDesc)(nil)
-	pdType  *_type      = efaceOf(&pdEface)._type
+	pdEface any    = (*pollDesc)(nil)
+	pdType  *_type = efaceOf(&pdEface)._type
 )
diff --git a/libgo/go/runtime/netpoll_aix.go b/libgo/go/runtime/netpoll_aix.go
index bb6ce37..a3d263c 100644
--- a/libgo/go/runtime/netpoll_aix.go
+++ b/libgo/go/runtime/netpoll_aix.go
@@ -206,10 +206,7 @@ retry:
 			pfd.events &= ^_POLLOUT
 		}
 		if mode != 0 {
-			pds[i].everr = false
-			if pfd.revents == _POLLERR {
-				pds[i].everr = true
-			}
+			pds[i].setEventErr(pfd.revents == _POLLERR)
 			netpollready(&toRun, pds[i], mode)
 			n--
 		}
diff --git a/libgo/go/runtime/netpoll_epoll.go b/libgo/go/runtime/netpoll_epoll.go
index 60a1dc0..f87e153 100644
--- a/libgo/go/runtime/netpoll_epoll.go
+++ b/libgo/go/runtime/netpoll_epoll.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build linux
-// +build linux
 
 package runtime
 
@@ -182,10 +181,7 @@ retry:
 		}
 		if mode != 0 {
 			pd := *(**pollDesc)(unsafe.Pointer(&ev.data))
-			pd.everr = false
-			if ev.events == _EPOLLERR {
-				pd.everr = true
-			}
+			pd.setEventErr(ev.events == _EPOLLERR)
 			netpollready(&toRun, pd, mode)
 		}
 	}
diff --git a/libgo/go/runtime/netpoll_fake.go b/libgo/go/runtime/netpoll_fake.go
index 8366f28..de1dcae 100644
--- a/libgo/go/runtime/netpoll_fake.go
+++ b/libgo/go/runtime/netpoll_fake.go
@@ -6,7 +6,6 @@
 // Should never be used, because wasm/js network connections do not honor "SetNonblock".
 
 //go:build js && wasm
-// +build js,wasm
 
 package runtime
 
diff --git a/libgo/go/runtime/netpoll_kqueue.go b/libgo/go/runtime/netpoll_kqueue.go
index 1e1b110..5e45bbf 100644
--- a/libgo/go/runtime/netpoll_kqueue.go
+++ b/libgo/go/runtime/netpoll_kqueue.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build darwin || dragonfly || freebsd || netbsd || openbsd
-// +build darwin dragonfly freebsd netbsd openbsd
 
 package runtime
 
@@ -188,10 +187,7 @@ retry:
 		}
 		if mode != 0 {
 			pd := (*pollDesc)(unsafe.Pointer(ev.udata))
-			pd.everr = false
-			if ev.flags == _EV_ERROR {
-				pd.everr = true
-			}
+			pd.setEventErr(ev.flags == _EV_ERROR)
 			netpollready(&toRun, pd, mode)
 		}
 	}
diff --git a/libgo/go/runtime/netpoll_solaris.go b/libgo/go/runtime/netpoll_solaris.go
index d5302b5..d9f1853 100644
--- a/libgo/go/runtime/netpoll_solaris.go
+++ b/libgo/go/runtime/netpoll_solaris.go
@@ -135,7 +135,7 @@ func netpollclose(fd uintptr) int32 {
 // this call, port_getn will return one and only one event for that
 // particular descriptor, so this function needs to be called again.
 func netpollupdate(pd *pollDesc, set, clear uint32) {
-	if pd.closing {
+	if pd.info().closing() {
 		return
 	}
 
diff --git a/libgo/go/runtime/netpoll_stub.go b/libgo/go/runtime/netpoll_stub.go
index 33ab8eb..d0a63bc 100644
--- a/libgo/go/runtime/netpoll_stub.go
+++ b/libgo/go/runtime/netpoll_stub.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build plan9
-// +build plan9
 
 package runtime
 
diff --git a/libgo/go/runtime/norace_test.go b/libgo/go/runtime/norace_test.go
index 9ad5dde..d49f2ec 100644
--- a/libgo/go/runtime/norace_test.go
+++ b/libgo/go/runtime/norace_test.go
@@ -4,7 +4,6 @@
 
 // The file contains tests that cannot run under race detector for some reason.
 //go:build !race
-// +build !race
 
 package runtime_test
 
diff --git a/libgo/go/runtime/os3_solaris.go b/libgo/go/runtime/os3_solaris.go
index b2fa1d3..ec23ce2 100644
--- a/libgo/go/runtime/os3_solaris.go
+++ b/libgo/go/runtime/os3_solaris.go
@@ -35,3 +35,8 @@ func sysargs(argc int32, argv **byte) {
 func solarisExecutablePath() string {
 	return executablePath
 }
+
+//go:nosplit
+func validSIGPROF(mp *m, c *sigctxt) bool {
+	return true
+}
diff --git a/libgo/go/runtime/os_aix.go b/libgo/go/runtime/os_aix.go
index 390fefa..d43765a 100644
--- a/libgo/go/runtime/os_aix.go
+++ b/libgo/go/runtime/os_aix.go
@@ -3,11 +3,11 @@
 // license that can be found in the LICENSE file.
 
 //go:build aix
-// +build aix
 
 package runtime
 
 import (
+	"internal/abi"
 	"unsafe"
 )
 
@@ -65,7 +65,7 @@ func semacreate(mp *m) {
 
 //go:nosplit
 func semasleep(ns int64) int32 {
-	_m_ := getg().m
+	mp := getg().m
 	if ns >= 0 {
 		var ts timespec
 
@@ -86,18 +86,18 @@ func semasleep(ns int64) int32 {
 		ts.tv_sec = timespec_sec_t(sec)
 		ts.tv_nsec = timespec_nsec_t(nsec)
 
-		if sem_timedwait((*semt)(unsafe.Pointer(_m_.waitsema)), &ts) != 0 {
+		if sem_timedwait((*semt)(unsafe.Pointer(mp.waitsema)), &ts) != 0 {
 			err := errno()
 			if err == _ETIMEDOUT || err == _EAGAIN || err == _EINTR {
 				return -1
 			}
-			println("sem_timedwait err ", err, " ts.tv_sec ", ts.tv_sec, " ts.tv_nsec ", ts.tv_nsec, " ns ", ns, " id ", _m_.id)
+			println("sem_timedwait err ", err, " ts.tv_sec ", ts.tv_sec, " ts.tv_nsec ", ts.tv_nsec, " ns ", ns, " id ", mp.id)
 			throw("sem_timedwait")
 		}
 		return 0
 	}
 	for {
-		r1 := sem_wait((*semt)(unsafe.Pointer(_m_.waitsema)))
+		r1 := sem_wait((*semt)(unsafe.Pointer(mp.waitsema)))
 		if r1 == 0 {
 			break
 		}
@@ -121,6 +121,19 @@ func osinit() {
 	physPageSize = uintptr(sysconf(__SC_PAGE_SIZE))
 }
 
+func setProcessCPUProfiler(hz int32) {
+	setProcessCPUProfilerTimer(hz)
+}
+
+func setThreadCPUProfiler(hz int32) {
+	setThreadCPUProfilerHz(hz)
+}
+
+//go:nosplit
+func validSIGPROF(mp *m, c *sigctxt) bool {
+	return true
+}
+
 const (
 	_CLOCK_REALTIME  = 9
 	_CLOCK_MONOTONIC = 10
diff --git a/libgo/go/runtime/os_darwin.go b/libgo/go/runtime/os_darwin.go
index 0ae716c..10904fd 100644
--- a/libgo/go/runtime/os_darwin.go
+++ b/libgo/go/runtime/os_darwin.go
@@ -136,3 +136,8 @@ func osinit() {
 	ncpu = getncpu()
 	physPageSize = getPageSize()
 }
+
+//go:nosplit
+func validSIGPROF(mp *m, c *sigctxt) bool {
+	return true
+}
diff --git a/libgo/go/runtime/os_dragonfly.go b/libgo/go/runtime/os_dragonfly.go
index d214f7f..bb85097 100644
--- a/libgo/go/runtime/os_dragonfly.go
+++ b/libgo/go/runtime/os_dragonfly.go
@@ -5,7 +5,8 @@
 package runtime
 
 import (
-	"runtime/internal/sys"
+	"internal/abi"
+	"internal/goarch"
 	"unsafe"
 )
 
diff --git a/libgo/go/runtime/os_freebsd.go b/libgo/go/runtime/os_freebsd.go
index 9c68366..c3fc102 100644
--- a/libgo/go/runtime/os_freebsd.go
+++ b/libgo/go/runtime/os_freebsd.go
@@ -5,6 +5,7 @@
 package runtime
 
 import (
+	"internal/goarch"
 	"unsafe"
 )
 
@@ -87,9 +88,9 @@ func getncpu() int32 {
 		return 1
 	}
 
-	maskSize := uintptr(int(maxcpus+7) / 8)
-	if maskSize < sys.PtrSize {
-		maskSize = sys.PtrSize
+	maskSize := int(maxcpus+7) / 8
+	if maskSize < goarch.PtrSize {
+		maskSize = goarch.PtrSize
 	}
 	if maskSize > uintptr(len(mask)) {
 		maskSize = uintptr(len(mask))
@@ -181,7 +182,7 @@ func sysargs(argc int32, argv **byte) {
 	n++
 
 	// now argv+n is auxv
-	auxv := (*[1 << 28]uintptr)(add(unsafe.Pointer(argv), uintptr(n)*sys.PtrSize))
+	auxv := (*[1 << 28]uintptr)(add(unsafe.Pointer(argv), uintptr(n)*goarch.PtrSize))
 	sysauxv(auxv[:])
 }
 
diff --git a/libgo/go/runtime/os_js.go b/libgo/go/runtime/os_js.go
index 52b64e7..9ed9167 100644
--- a/libgo/go/runtime/os_js.go
+++ b/libgo/go/runtime/os_js.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build js && wasm
-// +build js,wasm
 
 package runtime
 
diff --git a/libgo/go/runtime/os_linux.go b/libgo/go/runtime/os_linux.go
index 627b6d6..6ffd898 100644
--- a/libgo/go/runtime/os_linux.go
+++ b/libgo/go/runtime/os_linux.go
@@ -5,12 +5,21 @@
 package runtime
 
 import (
-	"runtime/internal/sys"
+	"internal/goarch"
+	"runtime/internal/atomic"
 	"unsafe"
 )
 
 type mOS struct {
-	unused byte
+	// profileTimer holds the ID of the POSIX interval timer for profiling CPU
+	// usage on this thread.
+	//
+	// It is valid when the profileTimerValid field is non-zero. A thread
+	// creates and manages its own timer, and these fields are read and written
+	// only by this thread. But because some of the reads on profileTimerValid
+	// are in signal handling code, access to that field uses atomic operations.
+	profileTimer      uintptr
+	profileTimerValid uint32
 }
 
 func getProcID() uint64 {
@@ -120,7 +129,7 @@ var procAuxv = []byte("/proc/self/auxv\x00")
 
 var addrspace_vec [1]byte
 
-//extern mincore
+//extern-sysinfo mincore
 func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32
 
 func sysargs(argc int32, argv **byte) {
@@ -135,7 +144,7 @@ func sysargs(argc int32, argv **byte) {
 	n++
 
 	// now argv+n is auxv
-	auxv := (*[1 << 28]uintptr)(add(unsafe.Pointer(argv), uintptr(n)*sys.PtrSize))
+	auxv := (*[1 << 28]uintptr)(add(unsafe.Pointer(argv), uintptr(n)*goarch.PtrSize))
 	if sysauxv(auxv[:]) != 0 {
 		return
 	}
@@ -233,3 +242,145 @@ func osinit() {
 	ncpu = getproccount()
 	physHugePageSize = getHugePageSize()
 }
+
+//go:noescape
+//extern-sysinfo timer_create
+func timer_create(clockid int32, sevp *_sigevent, timerid *uintptr) int32
+
+//go:noescape
+//extern-sysinfo timer_settime
+func timer_settime(timerid uintptr, flags int32, new, old *_itimerspec) int32
+
+//extern-sysinfo timer_delete
+func timer_delete(timerid uintptr) int32
+
+// go118UseTimerCreateProfiler enables the per-thread CPU profiler.
+const go118UseTimerCreateProfiler = true
+
+// validSIGPROF compares this signal delivery's code against the signal sources
+// that the profiler uses, returning whether the delivery should be processed.
+// To be processed, a signal delivery from a known profiling mechanism should
+// correspond to the best profiling mechanism available to this thread. Signals
+// from other sources are always considered valid.
+//
+//go:nosplit
+func validSIGPROF(mp *m, c *sigctxt) bool {
+	code := int32(c.sigcode())
+	setitimer := code == _SI_KERNEL
+	timer_create := code == _SI_TIMER
+
+	if !(setitimer || timer_create) {
+		// The signal doesn't correspond to a profiling mechanism that the
+		// runtime enables itself. There's no reason to process it, but there's
+		// no reason to ignore it either.
+		return true
+	}
+
+	if mp == nil {
+		// Since we don't have an M, we can't check if there's an active
+		// per-thread timer for this thread. We don't know how long this thread
+		// has been around, and if it happened to interact with the Go scheduler
+		// at a time when profiling was active (causing it to have a per-thread
+		// timer). But it may have never interacted with the Go scheduler, or
+		// never while profiling was active. To avoid double-counting, process
+		// only signals from setitimer.
+		//
+		// When a custom cgo traceback function has been registered (on
+		// platforms that support runtime.SetCgoTraceback), SIGPROF signals
+		// delivered to a thread that cannot find a matching M do this check in
+		// the assembly implementations of runtime.cgoSigtramp.
+		return setitimer
+	}
+
+	// Having an M means the thread interacts with the Go scheduler, and we can
+	// check whether there's an active per-thread timer for this thread.
+	if atomic.Load(&mp.profileTimerValid) != 0 {
+		// If this M has its own per-thread CPU profiling interval timer, we
+		// should track the SIGPROF signals that come from that timer (for
+		// accurate reporting of its CPU usage; see issue 35057) and ignore any
+		// that it gets from the process-wide setitimer (to not over-count its
+		// CPU consumption).
+		return timer_create
+	}
+
+	// No active per-thread timer means the only valid profiler is setitimer.
+	return setitimer
+}
+
+func setProcessCPUProfiler(hz int32) {
+	setProcessCPUProfilerTimer(hz)
+}
+
+func setThreadCPUProfiler(hz int32) {
+	mp := getg().m
+	mp.profilehz = hz
+
+	if !go118UseTimerCreateProfiler {
+		return
+	}
+
+	// destroy any active timer
+	if atomic.Load(&mp.profileTimerValid) != 0 {
+		timerid := mp.profileTimer
+		atomic.Store(&mp.profileTimerValid, 0)
+		mp.profileTimer = 0
+
+		ret := timer_delete(timerid)
+		if ret != 0 {
+			print("runtime: failed to disable profiling timer; timer_delete(", timerid, ") errno=", -ret, "\n")
+			throw("timer_delete")
+		}
+	}
+
+	if hz == 0 {
+		// If the goal was to disable profiling for this thread, then the job's done.
+		return
+	}
+
+	// The period of the timer should be 1/Hz. For every "1/Hz" of additional
+	// work, the user should expect one additional sample in the profile.
+	//
+	// But to scale down to very small amounts of application work, to observe
+	// even CPU usage of "one tenth" of the requested period, set the initial
+	// timing delay in a different way: So that "one tenth" of a period of CPU
+	// spend shows up as a 10% chance of one sample (for an expected value of
+	// 0.1 samples), and so that "two and six tenths" periods of CPU spend show
+	// up as a 60% chance of 3 samples and a 40% chance of 2 samples (for an
+	// expected value of 2.6). Set the initial delay to a value in the unifom
+	// random distribution between 0 and the desired period. And because "0"
+	// means "disable timer", add 1 so the half-open interval [0,period) turns
+	// into (0,period].
+	//
+	// Otherwise, this would show up as a bias away from short-lived threads and
+	// from threads that are only occasionally active: for example, when the
+	// garbage collector runs on a mostly-idle system, the additional threads it
+	// activates may do a couple milliseconds of GC-related work and nothing
+	// else in the few seconds that the profiler observes.
+	spec := new(_itimerspec)
+	spec.it_value.setNsec(1 + int64(fastrandn(uint32(1e9/hz))))
+	spec.it_interval.setNsec(1e9 / int64(hz))
+
+	var timerid uintptr
+	var sevp _sigevent
+	sevp.sigev_notify = _SIGEV_THREAD_ID
+	sevp.sigev_signo = _SIGPROF
+	*((*int32)(unsafe.Pointer(&sevp._sigev_un))) = int32(mp.procid)
+	ret := timer_create(_CLOCK_THREAD_CPUTIME_ID, &sevp, &timerid)
+	if ret != 0 {
+		// If we cannot create a timer for this M, leave profileTimerValid false
+		// to fall back to the process-wide setitimer profiler.
+		return
+	}
+
+	ret = timer_settime(timerid, 0, spec, nil)
+	if ret != 0 {
+		print("runtime: failed to configure profiling timer; timer_settime(", timerid,
+			", 0, {interval: {",
+			spec.it_interval.tv_sec, "s + ", spec.it_interval.tv_nsec, "ns} value: {",
+			spec.it_value.tv_sec, "s + ", spec.it_value.tv_nsec, "ns}}, nil) errno=", -ret, "\n")
+		throw("timer_settime")
+	}
+
+	mp.profileTimer = timerid
+	atomic.Store(&mp.profileTimerValid, 1)
+}
diff --git a/libgo/go/runtime/os_linux_arm64.go b/libgo/go/runtime/os_linux_arm64.go
index 2b51a2a..2c2b2a1 100644
--- a/libgo/go/runtime/os_linux_arm64.go
+++ b/libgo/go/runtime/os_linux_arm64.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build arm64
-// +build arm64
 
 package runtime
 
diff --git a/libgo/go/runtime/os_linux_mips64x.go b/libgo/go/runtime/os_linux_mips64x.go
index bc85ab3..bc22a0d 100644
--- a/libgo/go/runtime/os_linux_mips64x.go
+++ b/libgo/go/runtime/os_linux_mips64x.go
@@ -3,8 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build linux && (mips64 || mips64le)
-// +build linux
-// +build mips64 mips64le
 
 package runtime
 
diff --git a/libgo/go/runtime/os_linux_mipsx.go b/libgo/go/runtime/os_linux_mipsx.go
index be31f19..b538130 100644
--- a/libgo/go/runtime/os_linux_mipsx.go
+++ b/libgo/go/runtime/os_linux_mipsx.go
@@ -3,8 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build linux && (mips || mipsle)
-// +build linux
-// +build mips mipsle
 
 package runtime
 
diff --git a/libgo/go/runtime/os_linux_noauxv.go b/libgo/go/runtime/os_linux_noauxv.go
index 59b5aac..7b84f71 100644
--- a/libgo/go/runtime/os_linux_noauxv.go
+++ b/libgo/go/runtime/os_linux_noauxv.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build linux && !arm && !arm64 && !mips && !mipsle && !mips64 && !mips64le && !s390x && !ppc64 && !ppc64le
-// +build linux,!arm,!arm64,!mips,!mipsle,!mips64,!mips64le,!s390x,!ppc64,!ppc64le
 
 package runtime
 
diff --git a/libgo/go/runtime/os_linux_ppc64x.go b/libgo/go/runtime/os_linux_ppc64x.go
index 43a75c7..9abc9ec 100644
--- a/libgo/go/runtime/os_linux_ppc64x.go
+++ b/libgo/go/runtime/os_linux_ppc64x.go
@@ -3,8 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build linux && (ppc64 || ppc64le)
-// +build linux
-// +build ppc64 ppc64le
 
 package runtime
 
diff --git a/libgo/go/runtime/os_netbsd.go b/libgo/go/runtime/os_netbsd.go
index b2c815e..11871bc 100644
--- a/libgo/go/runtime/os_netbsd.go
+++ b/libgo/go/runtime/os_netbsd.go
@@ -5,8 +5,9 @@
 package runtime
 
 import (
+	"internal/abi"
+	"internal/goarch"
 	"runtime/internal/atomic"
-	"runtime/internal/sys"
 	"unsafe"
 )
 
@@ -64,6 +65,13 @@ func getPageSize() uintptr {
 	return 0
 }
 
+func getOSRev() int {
+	if osrev, ok := sysctlInt([]uint32{_CTL_KERN, _KERN_OSREV}); ok {
+		return int(osrev)
+	}
+	return 0
+}
+
 //go:nosplit
 func semacreate(mp *m) {
 }
@@ -123,6 +131,7 @@ func osinit() {
 	if physPageSize == 0 {
 		physPageSize = getPageSize()
 	}
+	needSysmonWorkaround = getOSRev() < 902000000 // NetBSD 9.2
 }
 
 func sysargs(argc int32, argv **byte) {
@@ -137,7 +146,7 @@ func sysargs(argc int32, argv **byte) {
 	n++
 
 	// now argv+n is auxv
-	auxv := (*[1 << 28]uintptr)(add(unsafe.Pointer(argv), uintptr(n)*sys.PtrSize))
+	auxv := (*[1 << 28]uintptr)(add(unsafe.Pointer(argv), uintptr(n)*goarch.PtrSize))
 	sysauxv(auxv[:])
 }
 
diff --git a/libgo/go/runtime/os_netbsd_arm64.go b/libgo/go/runtime/os_netbsd_arm64.go
index 8d21b0a..2dda9c9 100644
--- a/libgo/go/runtime/os_netbsd_arm64.go
+++ b/libgo/go/runtime/os_netbsd_arm64.go
@@ -4,11 +4,14 @@
 
 package runtime
 
-import "unsafe"
+import (
+	"internal/abi"
+	"unsafe"
+)
 
 func lwp_mcontext_init(mc *mcontextt, stk unsafe.Pointer, mp *m, gp *g, fn uintptr) {
 	// Machine dependent mcontext initialisation for LWP.
-	mc.__gregs[_REG_ELR] = uint64(funcPC(lwp_tramp))
+	mc.__gregs[_REG_ELR] = uint64(abi.FuncPCABI0(lwp_tramp))
 	mc.__gregs[_REG_X31] = uint64(uintptr(stk))
 	mc.__gregs[_REG_X0] = uint64(uintptr(unsafe.Pointer(mp)))
 	mc.__gregs[_REG_X1] = uint64(uintptr(unsafe.Pointer(mp.g0)))
diff --git a/libgo/go/runtime/os_only_solaris.go b/libgo/go/runtime/os_only_solaris.go
index 3829683c..0c72500 100644
--- a/libgo/go/runtime/os_only_solaris.go
+++ b/libgo/go/runtime/os_only_solaris.go
@@ -5,7 +5,6 @@
 // Solaris code that doesn't also apply to illumos.
 
 //go:build !illumos
-// +build !illumos
 
 package runtime
 
diff --git a/libgo/go/runtime/os_openbsd.go b/libgo/go/runtime/os_openbsd.go
index 50f0480..6a9b951 100644
--- a/libgo/go/runtime/os_openbsd.go
+++ b/libgo/go/runtime/os_openbsd.go
@@ -5,6 +5,7 @@
 package runtime
 
 import (
+	"internal/abi"
 	"runtime/internal/atomic"
 	"unsafe"
 )
diff --git a/libgo/go/runtime/panic.go b/libgo/go/runtime/panic.go
index a4b9a83..49c5f5e 100644
--- a/libgo/go/runtime/panic.go
+++ b/libgo/go/runtime/panic.go
@@ -281,29 +281,29 @@ func deferprocStack(d *_defer, frame *bool, pfn uintptr, arg unsafe.Pointer) {
 // Each defer must be released with freedefer.
 func newdefer() *_defer {
 	var d *_defer
-	gp := getg()
-	pp := gp.m.p.ptr()
+	mp := acquirem()
+	pp := mp.p.ptr()
 	if len(pp.deferpool) == 0 && sched.deferpool != nil {
-		systemstack(func() {
-			lock(&sched.deferlock)
-			for len(pp.deferpool) < cap(pp.deferpool)/2 && sched.deferpool != nil {
-				d := sched.deferpool
-				sched.deferpool = d.link
-				d.link = nil
-				pp.deferpool = append(pp.deferpool, d)
-			}
-			unlock(&sched.deferlock)
-		})
+		lock(&sched.deferlock)
+		for len(pp.deferpool) < cap(pp.deferpool)/2 && sched.deferpool != nil {
+			d := sched.deferpool
+			sched.deferpool = d.link
+			d.link = nil
+			pp.deferpool = append(pp.deferpool, d)
+		}
+		unlock(&sched.deferlock)
 	}
 	if n := len(pp.deferpool); n > 0 {
 		d = pp.deferpool[n-1]
 		pp.deferpool[n-1] = nil
 		pp.deferpool = pp.deferpool[:n-1]
 	}
+	releasem(mp)
+	mp, pp = nil, nil
+
 	if d == nil {
-		systemstack(func() {
-			d = new(_defer)
-		})
+		// Allocate new defer.
+		d = new(_defer)
 	}
 	d.heap = true
 	return d
@@ -312,11 +312,16 @@ func newdefer() *_defer {
 // Free the given defer.
 // The defer cannot be used after this call.
 //
-// This must not grow the stack because there may be a frame without a
-// stack map when this is called.
+// This is nosplit because the incoming defer is in a perilous state.
+// It's not on any defer list, so stack copying won't adjust stack
+// pointers in it (namely, d.link). Hence, if we were to copy the
+// stack, d could then contain a stale pointer.
 //
 //go:nosplit
 func freedefer(d *_defer) {
+	d.link = nil
+	// After this point we can copy the stack.
+
 	if d._panic != nil {
 		freedeferpanic()
 	}
@@ -326,7 +331,8 @@ func freedefer(d *_defer) {
 	if !d.heap {
 		return
 	}
-	pp := getg().m.p.ptr()
+	mp := acquirem()
+	pp := mp.p.ptr()
 	if len(pp.deferpool) == cap(pp.deferpool) {
 		// Transfer half of local cache to the central cache.
 		//
@@ -353,19 +359,12 @@ func freedefer(d *_defer) {
 		})
 	}
 
-	// These lines used to be simply `*d = _defer{}` but that
-	// started causing a nosplit stack overflow via typedmemmove.
-	d.link = nil
-	d.frame = nil
-	d.panicStack = nil
-	d.arg = nil
-	d.retaddr = 0
-	d.makefunccanrecover = false
-	// d._panic and d.pfn must be nil already.
-	// If not, we would have called freedeferpanic or freedeferfn above,
-	// both of which throw.
+	*d = _defer{}
 
 	pp.deferpool = append(pp.deferpool, d)
+
+	releasem(mp)
+	mp, pp = nil, nil
 }
 
 // Separate function so that it can split stack.
@@ -636,7 +635,7 @@ func printpanics(p *_panic) {
 }
 
 // The implementation of the predeclared function panic.
-func gopanic(e interface{}) {
+func gopanic(e any) {
 	gp := getg()
 	if gp.m.curg != gp {
 		print("panic: ")
@@ -1237,15 +1236,15 @@ func canpanic(gp *g) bool {
 	// Note also that g->m can change at preemption, so m can go stale
 	// if this function ever makes a function call.
 	_g_ := getg()
-	_m_ := _g_.m
+	mp := _g_.m
 
 	// Is it okay for gp to panic instead of crashing the program?
 	// Yes, as long as it is running Go code, not runtime code,
 	// and not stuck in a system call.
-	if gp == nil || gp != _m_.curg {
+	if gp == nil || gp != mp.curg {
 		return false
 	}
-	if _m_.locks != 0 || _m_.mallocing != 0 || _m_.throwing != 0 || _m_.preemptoff != "" || _m_.dying != 0 {
+	if mp.locks != 0 || mp.mallocing != 0 || mp.throwing != 0 || mp.preemptoff != "" || mp.dying != 0 {
 		return false
 	}
 	status := readgstatus(gp)
diff --git a/libgo/go/runtime/panic32.go b/libgo/go/runtime/panic32.go
index 11d2a84..df4afae 100644
--- a/libgo/go/runtime/panic32.go
+++ b/libgo/go/runtime/panic32.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build 386 || amd64p32 || arm || mips || mipsle || armbe || m68k || nios2 || ppc || riscv || s390 || sh || shbe || sparc
-// +build 386 amd64p32 arm mips mipsle armbe m68k nios2 ppc riscv s390 sh shbe sparc
 
 package runtime
 
diff --git a/libgo/go/runtime/pprof/mprof_test.go b/libgo/go/runtime/pprof/mprof_test.go
index 6a448a7..3abf5df 100644
--- a/libgo/go/runtime/pprof/mprof_test.go
+++ b/libgo/go/runtime/pprof/mprof_test.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build !js
-// +build !js
 
 package pprof
 
@@ -18,7 +17,7 @@ import (
 	"unsafe"
 )
 
-var memSink interface{}
+var memSink any
 
 func allocateTransient1M() {
 	for i := 0; i < 1024; i++ {
@@ -86,17 +85,6 @@ func TestMemoryProfiler(t *testing.T) {
 
 	runtime.GC() // materialize stats
 
-	// TODO(mknyszek): Fix #45315 and remove this extra call.
-	//
-	// Unfortunately, it's possible for the sweep termination condition
-	// to flap, so with just one runtime.GC call, a freed object could be
-	// missed, leading this test to fail. A second call reduces the chance
-	// of this happening to zero, because sweeping actually has to finish
-	// to move on to the next GC, during which nothing will happen.
-	//
-	// See #46500 for more details.
-	runtime.GC()
-
 	memoryProfilerRun++
 
 	tests := []struct {
@@ -105,33 +93,33 @@ func TestMemoryProfiler(t *testing.T) {
 	}{{
 		stk: []string{"runtime/pprof.allocatePersistent1K", "runtime/pprof.TestMemoryProfiler"},
 		legacy: fmt.Sprintf(`%v: %v \[%v: %v\] @ 0x[0-9,a-f x]+
-#	0x[0-9,a-f]+	runtime/pprof\.allocatePersistent1K\+0x[0-9,a-f]+	.*/mprof_test\.go:48
-#	0x[0-9,a-f]+	runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+	.*/mprof_test\.go:83
+#	0x[0-9,a-f]+	runtime/pprof\.allocatePersistent1K\+0x[0-9,a-f]+	.*/mprof_test\.go:47
+#	0x[0-9,a-f]+	runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+	.*/mprof_test\.go:82
 `, 32*memoryProfilerRun, 1024*memoryProfilerRun, 32*memoryProfilerRun, 1024*memoryProfilerRun),
 	}, {
 		stk: []string{"runtime/pprof.allocateTransient1M", "runtime/pprof.TestMemoryProfiler"},
 		legacy: fmt.Sprintf(`(0|%v): (0|%v) \[%v: %v\] @ 0x[0-9,a-f x]+
-#	0x[0-9,a-f]+	runtime/pprof\.allocateTransient1M\+0x[0-9,a-f]+	.*/mprof_test.go:25
-#	0x[0-9,a-f]+	runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+	.*/mprof_test.go:80
+#	0x[0-9,a-f]+	runtime/pprof\.allocateTransient1M\+0x[0-9,a-f]+	.*/mprof_test.go:24
+#	0x[0-9,a-f]+	runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+	.*/mprof_test.go:79
 `, (1<<10)*memoryProfilerRun, (1<<20)*memoryProfilerRun, (1<<10)*memoryProfilerRun, (1<<20)*memoryProfilerRun),
 	}, {
 		stk: []string{"runtime/pprof.allocateTransient2M", "runtime/pprof.TestMemoryProfiler"},
 		// This should start with "0: 0" but gccgo's imprecise
 		// GC means that sometimes the value is not collected.
 		legacy: fmt.Sprintf(`(0|%v): (0|%v) \[%v: %v\] @ 0x[0-9,a-f x]+
-#	0x[0-9,a-f]+	runtime/pprof\.allocateTransient2M\+0x[0-9,a-f]+	.*/mprof_test.go:31
-#	0x[0-9,a-f]+	runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+	.*/mprof_test.go:81
+#	0x[0-9,a-f]+	runtime/pprof\.allocateTransient2M\+0x[0-9,a-f]+	.*/mprof_test.go:30
+#	0x[0-9,a-f]+	runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+	.*/mprof_test.go:80
 `, memoryProfilerRun, (2<<20)*memoryProfilerRun, memoryProfilerRun, (2<<20)*memoryProfilerRun),
 	}, {
 		stk: []string{"runtime/pprof.allocateTransient2MInline", "runtime/pprof.TestMemoryProfiler"},
 		legacy: fmt.Sprintf(`(0|%v): (0|%v) \[%v: %v\] @ 0x[0-9,a-f x]+
-#	0x[0-9,a-f]+	runtime/pprof\.allocateTransient2MInline\+0x[0-9,a-f]+	.*/mprof_test.go:35
-#	0x[0-9,a-f]+	runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+	.*/mprof_test.go:82
+#	0x[0-9,a-f]+	runtime/pprof\.allocateTransient2MInline\+0x[0-9,a-f]+	.*/mprof_test.go:34
+#	0x[0-9,a-f]+	runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+	.*/mprof_test.go:81
 `, memoryProfilerRun, (4<<20)*memoryProfilerRun, memoryProfilerRun, (4<<20)*memoryProfilerRun),
 	}, {
 		stk: []string{"runtime/pprof.allocateReflectTransient"},
 		legacy: fmt.Sprintf(`(0|%v): (0|%v) \[%v: %v\] @( 0x[0-9,a-f]+)+
-#	0x[0-9,a-f]+	runtime/pprof\.allocateReflectTransient\+0x[0-9,a-f]+	.*/mprof_test.go:56
+#	0x[0-9,a-f]+	runtime/pprof\.allocateReflectTransient\+0x[0-9,a-f]+	.*/mprof_test.go:55
 `, memoryProfilerRun, (3<<20)*memoryProfilerRun, memoryProfilerRun, (3<<20)*memoryProfilerRun),
 	}}
 
diff --git a/libgo/go/runtime/pprof/pprof.go b/libgo/go/runtime/pprof/pprof.go
index 54838fc..e90cf97 100644
--- a/libgo/go/runtime/pprof/pprof.go
+++ b/libgo/go/runtime/pprof/pprof.go
@@ -76,6 +76,7 @@ import (
 	"bufio"
 	"bytes"
 	"fmt"
+	"internal/abi"
 	"io"
 	"runtime"
 	"sort"
@@ -133,7 +134,7 @@ import (
 type Profile struct {
 	name  string
 	mu    sync.Mutex
-	m     map[interface{}][]uintptr
+	m     map[any][]uintptr
 	count func() int
 	write func(io.Writer, int) error
 }
@@ -216,7 +217,7 @@ func NewProfile(name string) *Profile {
 	}
 	p := &Profile{
 		name: name,
-		m:    map[interface{}][]uintptr{},
+		m:    map[any][]uintptr{},
 	}
 	profiles.m[name] = p
 	return p
@@ -276,7 +277,7 @@ func (p *Profile) Count() int {
 // Passing skip=0 begins the stack trace at the call to Add inside rpc.NewClient.
 // Passing skip=1 begins the stack trace at the call to NewClient inside mypkg.Run.
 //
-func (p *Profile) Add(value interface{}, skip int) {
+func (p *Profile) Add(value any, skip int) {
 	if p.name == "" {
 		panic("pprof: use of uninitialized Profile")
 	}
@@ -289,7 +290,7 @@ func (p *Profile) Add(value interface{}, skip int) {
 	stk = stk[:n]
 	if len(stk) == 0 {
 		// The value for skip is too large, and there's no stack trace to record.
-		stk = []uintptr{funcPC(lostProfileEvent) + 1}
+		stk = []uintptr{abi.FuncPCABIInternal(lostProfileEvent) + 1}
 	}
 
 	p.mu.Lock()
@@ -302,7 +303,7 @@ func (p *Profile) Add(value interface{}, skip int) {
 
 // Remove removes the execution stack associated with value from the profile.
 // It is a no-op if the value is not in the profile.
-func (p *Profile) Remove(value interface{}) {
+func (p *Profile) Remove(value any) {
 	p.mu.Lock()
 	defer p.mu.Unlock()
 	delete(p.m, value)
diff --git a/libgo/go/runtime/pprof/pprof_norusage.go b/libgo/go/runtime/pprof/pprof_norusage.go
index e175dd3..cbc5176 100644
--- a/libgo/go/runtime/pprof/pprof_norusage.go
+++ b/libgo/go/runtime/pprof/pprof_norusage.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build !darwin && !linux
-// +build !darwin,!linux
 
 package pprof
 
diff --git a/libgo/go/runtime/pprof/pprof_rusage.go b/libgo/go/runtime/pprof/pprof_rusage.go
index 269f21b..46263fe 100644
--- a/libgo/go/runtime/pprof/pprof_rusage.go
+++ b/libgo/go/runtime/pprof/pprof_rusage.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build darwin || linux
-// +build darwin linux
 
 package pprof
 
diff --git a/libgo/go/runtime/pprof/pprof_test.go b/libgo/go/runtime/pprof/pprof_test.go
index ab96b0c..def49c1 100644
--- a/libgo/go/runtime/pprof/pprof_test.go
+++ b/libgo/go/runtime/pprof/pprof_test.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build !js
-// +build !js
 
 package pprof
 
@@ -11,6 +10,7 @@ import (
 	"bytes"
 	"context"
 	"fmt"
+	"internal/abi"
 	"internal/profile"
 	"internal/testenv"
 	"io"
@@ -20,6 +20,7 @@ import (
 	"os/exec"
 	"regexp"
 	"runtime"
+	"runtime/debug"
 	"strings"
 	"sync"
 	"sync/atomic"
@@ -88,14 +89,16 @@ func avoidFunctions() []string {
 }
 
 func TestCPUProfile(t *testing.T) {
-	testCPUProfile(t, stackContains, []string{"pprof.cpuHog1"}, avoidFunctions(), func(dur time.Duration) {
+	matches := matchAndAvoidStacks(stackContains, []string{"pprof.cpuHog1"}, avoidFunctions())
+	testCPUProfile(t, matches, func(dur time.Duration) {
 		cpuHogger(cpuHog1, &salt1, dur)
 	})
 }
 
 func TestCPUProfileMultithreaded(t *testing.T) {
 	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(2))
-	testCPUProfile(t, stackContains, []string{"pprof.cpuHog1", "pprof.cpuHog2"}, avoidFunctions(), func(dur time.Duration) {
+	matches := matchAndAvoidStacks(stackContains, []string{"pprof.cpuHog1", "pprof.cpuHog2"}, avoidFunctions())
+	testCPUProfile(t, matches, func(dur time.Duration) {
 		c := make(chan int)
 		go func() {
 			cpuHogger(cpuHog1, &salt1, dur)
@@ -106,17 +109,139 @@ func TestCPUProfileMultithreaded(t *testing.T) {
 	})
 }
 
+func TestCPUProfileMultithreadMagnitude(t *testing.T) {
+	if runtime.GOOS != "linux" {
+		t.Skip("issue 35057 is only confirmed on Linux")
+	}
+
+	// Linux [5.9,5.16) has a kernel bug that can break CPU timers on newly
+	// created threads, breaking our CPU accounting.
+	major, minor, patch, err := linuxKernelVersion()
+	if err != nil {
+		t.Errorf("Error determining kernel version: %v", err)
+	}
+	t.Logf("Running on Linux %d.%d.%d", major, minor, patch)
+	defer func() {
+		if t.Failed() {
+			t.Logf("Failure of this test may indicate that your system suffers from a known Linux kernel bug fixed on newer kernels. See https://golang.org/issue/49065.")
+		}
+	}()
+
+	// Disable on affected builders to avoid flakiness, but otherwise keep
+	// it enabled to potentially warn users that they are on a broken
+	// kernel.
+	if testenv.Builder() != "" && (runtime.GOARCH == "386" || runtime.GOARCH == "amd64") {
+		have59 := major > 5 || (major == 5 && minor >= 9)
+		have516 := major > 5 || (major == 5 && minor >= 16)
+		if have59 && !have516 {
+			testenv.SkipFlaky(t, 49065)
+		}
+	}
+
+	// Run a workload in a single goroutine, then run copies of the same
+	// workload in several goroutines. For both the serial and parallel cases,
+	// the CPU time the process measures with its own profiler should match the
+	// total CPU usage that the OS reports.
+	//
+	// We could also check that increases in parallelism (GOMAXPROCS) lead to a
+	// linear increase in the CPU usage reported by both the OS and the
+	// profiler, but without a guarantee of exclusive access to CPU resources
+	// that is likely to be a flaky test.
+
+	// Require the smaller value to be within 10%, or 40% in short mode.
+	maxDiff := 0.10
+	if testing.Short() {
+		maxDiff = 0.40
+	}
+
+	compare := func(a, b time.Duration, maxDiff float64) error {
+		if a <= 0 || b <= 0 {
+			return fmt.Errorf("Expected both time reports to be positive")
+		}
+
+		if a < b {
+			a, b = b, a
+		}
+
+		diff := float64(a-b) / float64(a)
+		if diff > maxDiff {
+			return fmt.Errorf("CPU usage reports are too different (limit -%.1f%%, got -%.1f%%)", maxDiff*100, diff*100)
+		}
+
+		return nil
+	}
+
+	for _, tc := range []struct {
+		name    string
+		workers int
+	}{
+		{
+			name:    "serial",
+			workers: 1,
+		},
+		{
+			name:    "parallel",
+			workers: runtime.GOMAXPROCS(0),
+		},
+	} {
+		// check that the OS's perspective matches what the Go runtime measures.
+		t.Run(tc.name, func(t *testing.T) {
+			t.Logf("Running with %d workers", tc.workers)
+
+			var cpuTime time.Duration
+			matches := matchAndAvoidStacks(stackContains, []string{"runtime/pprof.cpuHog1"}, avoidFunctions())
+			p := testCPUProfile(t, matches, func(dur time.Duration) {
+				cpuTime = diffCPUTime(t, func() {
+					var wg sync.WaitGroup
+					var once sync.Once
+					for i := 0; i < tc.workers; i++ {
+						wg.Add(1)
+						go func() {
+							defer wg.Done()
+							var salt = 0
+							cpuHogger(cpuHog1, &salt, dur)
+							once.Do(func() { salt1 = salt })
+						}()
+					}
+					wg.Wait()
+				})
+			})
+
+			for i, unit := range []string{"count", "nanoseconds"} {
+				if have, want := p.SampleType[i].Unit, unit; have != want {
+					t.Errorf("pN SampleType[%d]; %q != %q", i, have, want)
+				}
+			}
+
+			// cpuHog1 called above is the primary source of CPU
+			// load, but there may be some background work by the
+			// runtime. Since the OS rusage measurement will
+			// include all work done by the process, also compare
+			// against all samples in our profile.
+			var value time.Duration
+			for _, sample := range p.Sample {
+				value += time.Duration(sample.Value[1]) * time.Nanosecond
+			}
+
+			t.Logf("compare %s vs %s", cpuTime, value)
+			if err := compare(cpuTime, value, maxDiff); err != nil {
+				t.Errorf("compare got %v want nil", err)
+			}
+		})
+	}
+}
+
 // containsInlinedCall reports whether the function body for the function f is
 // known to contain an inlined function call within the first maxBytes bytes.
-func containsInlinedCall(f interface{}, maxBytes int) bool {
+func containsInlinedCall(f any, maxBytes int) bool {
 	_, found := findInlinedCall(f, maxBytes)
 	return found
 }
 
 // findInlinedCall returns the PC of an inlined function call within
 // the function body for the function f if any.
-func findInlinedCall(f interface{}, maxBytes int) (pc uint64, found bool) {
-	fFunc := runtime.FuncForPC(uintptr(funcPC(f)))
+func findInlinedCall(f any, maxBytes int) (pc uint64, found bool) {
+	fFunc := runtime.FuncForPC(uintptr(abi.FuncPCABIInternal(f)))
 	if fFunc == nil || fFunc.Entry() == 0 {
 		panic("failed to locate function entry")
 	}
@@ -148,7 +273,8 @@ func TestCPUProfileInlining(t *testing.T) {
 		t.Skip("Can't determine whether inlinedCallee was inlined into inlinedCaller.")
 	}
 
-	p := testCPUProfile(t, stackContains, []string{"pprof.inlinedCallee", "pprof.inlinedCaller"}, avoidFunctions(), func(dur time.Duration) {
+	matches := matchAndAvoidStacks(stackContains, []string{"pprof.inlinedCallee", "pprof.inlinedCaller"}, avoidFunctions())
+	p := testCPUProfile(t, matches, func(dur time.Duration) {
 		cpuHogger(inlinedCaller, &salt1, dur)
 	})
 
@@ -198,7 +324,8 @@ func inlinedCalleeDump(pcs []uintptr) {
 }
 
 func TestCPUProfileRecursion(t *testing.T) {
-	p := testCPUProfile(t, stackContains, []string{"runtime/pprof.inlinedCallee", "runtime/pprof.recursionCallee", "runtime/pprof.recursionCaller"}, avoidFunctions(), func(dur time.Duration) {
+	matches := matchAndAvoidStacks(stackContains, []string{"runtime/pprof.inlinedCallee", "runtime/pprof.recursionCallee", "runtime/pprof.recursionCaller"}, avoidFunctions())
+	p := testCPUProfile(t, matches, func(dur time.Duration) {
 		cpuHogger(recursionCaller, &salt1, dur)
 	})
 
@@ -283,7 +410,7 @@ func cpuProfilingBroken() bool {
 
 // testCPUProfile runs f under the CPU profiler, checking for some conditions specified by need,
 // as interpreted by matches, and returns the parsed profile.
-func testCPUProfile(t *testing.T, matches matchFunc, need []string, avoid []string, f func(dur time.Duration)) *profile.Profile {
+func testCPUProfile(t *testing.T, matches profileMatchFunc, f func(dur time.Duration)) *profile.Profile {
 	switch runtime.GOOS {
 	case "darwin":
 		out, err := exec.Command("uname", "-a").CombinedOutput()
@@ -324,7 +451,7 @@ func testCPUProfile(t *testing.T, matches matchFunc, need []string, avoid []stri
 		f(duration)
 		StopCPUProfile()
 
-		if p, ok := profileOk(t, matches, need, avoid, prof, duration); ok {
+		if p, ok := profileOk(t, matches, prof, duration); ok {
 			return p
 		}
 
@@ -349,6 +476,16 @@ func testCPUProfile(t *testing.T, matches matchFunc, need []string, avoid []stri
 	return nil
 }
 
+var diffCPUTimeImpl func(f func()) time.Duration
+
+func diffCPUTime(t *testing.T, f func()) time.Duration {
+	if fn := diffCPUTimeImpl; fn != nil {
+		return fn(f)
+	}
+	t.Fatalf("cannot measure CPU time on GOOS=%s GOARCH=%s", runtime.GOOS, runtime.GOARCH)
+	return 0
+}
+
 func contains(slice []string, s string) bool {
 	for i := range slice {
 		if slice[i] == s {
@@ -370,44 +507,18 @@ func stackContains(spec string, count uintptr, stk []*profile.Location, labels m
 	return false
 }
 
-type matchFunc func(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool
+type sampleMatchFunc func(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool
 
-func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, prof bytes.Buffer, duration time.Duration) (_ *profile.Profile, ok bool) {
+func profileOk(t *testing.T, matches profileMatchFunc, prof bytes.Buffer, duration time.Duration) (_ *profile.Profile, ok bool) {
 	ok = true
 
-	// Check that profile is well formed, contains 'need', and does not contain
-	// anything from 'avoid'.
-	have := make([]uintptr, len(need))
-	avoidSamples := make([]uintptr, len(avoid))
 	var samples uintptr
 	var buf bytes.Buffer
 	p := parseProfile(t, prof.Bytes(), func(count uintptr, stk []*profile.Location, labels map[string][]string) {
 		fmt.Fprintf(&buf, "%d:", count)
 		fprintStack(&buf, stk)
+		fmt.Fprintf(&buf, " labels: %v\n", labels)
 		samples += count
-		for i, spec := range need {
-			if matches(spec, count, stk, labels) {
-				have[i] += count
-			}
-		}
-		for i, name := range avoid {
-			for _, loc := range stk {
-				for _, line := range loc.Line {
-					if strings.Contains(line.Function.Name, name) {
-						avoidSamples[i] += count
-					}
-				}
-			}
-		}
-		for i, name := range avoid {
-			for _, loc := range stk {
-				for _, line := range loc.Line {
-					if strings.Contains(line.Function.Name, name) {
-						avoidSamples[i] += count
-					}
-				}
-			}
-		}
 		fmt.Fprintf(&buf, "\n")
 	})
 	t.Logf("total %d CPU profile samples collected:\n%s", samples, buf.String())
@@ -430,39 +541,77 @@ func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, p
 		ok = false
 	}
 
-	for i, name := range avoid {
-		bad := avoidSamples[i]
-		if bad != 0 {
-			t.Logf("found %d samples in avoid-function %s\n", bad, name)
-			ok = false
-		}
+	if matches != nil && !matches(t, p) {
+		ok = false
 	}
 
-	if len(need) == 0 {
-		return p, ok
-	}
+	return p, ok
+}
 
-	var total uintptr
-	for i, name := range need {
-		total += have[i]
-		t.Logf("%s: %d\n", name, have[i])
-	}
-	if total == 0 {
-		t.Logf("no samples in expected functions")
-		ok = false
-	}
-	// We'd like to check a reasonable minimum, like
-	// total / len(have) / smallconstant, but this test is
-	// pretty flaky (see bug 7095).  So we'll just test to
-	// make sure we got at least one sample.
-	min := uintptr(1)
-	for i, name := range need {
-		if have[i] < min {
-			t.Logf("%s has %d samples out of %d, want at least %d, ideally %d", name, have[i], total, min, total/uintptr(len(have)))
+type profileMatchFunc func(*testing.T, *profile.Profile) bool
+
+func matchAndAvoidStacks(matches sampleMatchFunc, need []string, avoid []string) profileMatchFunc {
+	return func(t *testing.T, p *profile.Profile) (ok bool) {
+		ok = true
+
+		// Check that profile is well formed, contains 'need', and does not contain
+		// anything from 'avoid'.
+		have := make([]uintptr, len(need))
+		avoidSamples := make([]uintptr, len(avoid))
+
+		for _, sample := range p.Sample {
+			count := uintptr(sample.Value[0])
+			for i, spec := range need {
+				if matches(spec, count, sample.Location, sample.Label) {
+					have[i] += count
+				}
+			}
+			for i, name := range avoid {
+				for _, loc := range sample.Location {
+					for _, line := range loc.Line {
+						if strings.Contains(line.Function.Name, name) {
+							avoidSamples[i] += count
+						}
+					}
+				}
+			}
+		}
+
+		for i, name := range avoid {
+			bad := avoidSamples[i]
+			if bad != 0 {
+				t.Logf("found %d samples in avoid-function %s\n", bad, name)
+				ok = false
+			}
+		}
+
+		if len(need) == 0 {
+			return
+		}
+
+		var total uintptr
+		for i, name := range need {
+			total += have[i]
+			t.Logf("%s: %d\n", name, have[i])
+		}
+		if total == 0 {
+			t.Logf("no samples in expected functions")
 			ok = false
 		}
+
+		// We'd like to check a reasonable minimum, like
+		// total / len(have) / smallconstant, but this test is
+		// pretty flaky (see bug 7095).  So we'll just test to
+		// make sure we got at least one sample.
+		min := uintptr(1)
+		for i, name := range need {
+			if have[i] < min {
+				t.Logf("%s has %d samples out of %d, want at least %d, ideally %d", name, have[i], total, min, total/uintptr(len(have)))
+				ok = false
+			}
+		}
+		return
 	}
-	return p, ok
 }
 
 // Fork can hang if preempted with signals frequently enough (see issue 5517).
@@ -574,12 +723,11 @@ func fprintStack(w io.Writer, stk []*profile.Location) {
 		}
 		fmt.Fprintf(w, ")")
 	}
-	fmt.Fprintf(w, "\n")
 }
 
 // Test that profiling of division operations is okay, especially on ARM. See issue 6681.
 func TestMathBigDivide(t *testing.T) {
-	testCPUProfile(t, nil, nil, nil, func(duration time.Duration) {
+	testCPUProfile(t, nil, func(duration time.Duration) {
 		t := time.After(duration)
 		pi := new(big.Int)
 		for {
@@ -611,7 +759,8 @@ func TestMorestack(t *testing.T) {
 	if runtime.Compiler == "gccgo" {
 		t.Skip("no runtime.newstack in gccgo")
 	}
-	testCPUProfile(t, stackContainsAll, []string{"runtime.newstack,runtime/pprof.growstack"}, avoidFunctions(), func(duration time.Duration) {
+	matches := matchAndAvoidStacks(stackContainsAll, []string{"runtime.newstack,runtime/pprof.growstack"}, avoidFunctions())
+	testCPUProfile(t, matches, func(duration time.Duration) {
 		t := time.After(duration)
 		c := make(chan bool)
 		for {
@@ -1152,11 +1301,10 @@ func TestGoroutineCounts(t *testing.T) {
 
 func containsInOrder(s string, all ...string) bool {
 	for _, t := range all {
-		i := strings.Index(s, t)
-		if i < 0 {
+		var ok bool
+		if _, s, ok = strings.Cut(s, t); !ok {
 			return false
 		}
-		s = s[i+len(t):]
 	}
 	return true
 }
@@ -1236,22 +1384,23 @@ func TestEmptyCallStack(t *testing.T) {
 // stackContainsLabeled takes a spec like funcname;key=value and matches if the stack has that key
 // and value and has funcname somewhere in the stack.
 func stackContainsLabeled(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool {
-	semi := strings.Index(spec, ";")
-	if semi == -1 {
+	base, kv, ok := strings.Cut(spec, ";")
+	if !ok {
 		panic("no semicolon in key/value spec")
 	}
-	kv := strings.SplitN(spec[semi+1:], "=", 2)
-	if len(kv) != 2 {
+	k, v, ok := strings.Cut(kv, "=")
+	if !ok {
 		panic("missing = in key/value spec")
 	}
-	if !contains(labels[kv[0]], kv[1]) {
+	if !contains(labels[k], v) {
 		return false
 	}
-	return stackContains(spec[:semi], count, stk, labels)
+	return stackContains(base, count, stk, labels)
 }
 
 func TestCPUProfileLabel(t *testing.T) {
-	testCPUProfile(t, stackContainsLabeled, []string{"pprof.cpuHogger;key=value"}, avoidFunctions(), func(dur time.Duration) {
+	matches := matchAndAvoidStacks(stackContainsLabeled, []string{"pprof.cpuHogger;key=value"}, avoidFunctions())
+	testCPUProfile(t, matches, func(dur time.Duration) {
 		Do(context.Background(), Labels("key", "value"), func(context.Context) {
 			cpuHogger(cpuHog1, &salt1, dur)
 		})
@@ -1262,7 +1411,8 @@ func TestLabelRace(t *testing.T) {
 	// Test the race detector annotations for synchronization
 	// between settings labels and consuming them from the
 	// profile.
-	testCPUProfile(t, stackContainsLabeled, []string{"pprof.cpuHogger;key=value"}, nil, func(dur time.Duration) {
+	matches := matchAndAvoidStacks(stackContainsLabeled, []string{"pprof.cpuHogger;key=value"}, nil)
+	testCPUProfile(t, matches, func(dur time.Duration) {
 		start := time.Now()
 		var wg sync.WaitGroup
 		for time.Since(start) < dur {
@@ -1281,6 +1431,126 @@ func TestLabelRace(t *testing.T) {
 	})
 }
 
+// TestLabelSystemstack makes sure CPU profiler samples of goroutines running
+// on systemstack include the correct pprof labels. See issue #48577
+func TestLabelSystemstack(t *testing.T) {
+	// Grab and re-set the initial value before continuing to ensure
+	// GOGC doesn't actually change following the test.
+	gogc := debug.SetGCPercent(100)
+	debug.SetGCPercent(gogc)
+
+	matches := matchAndAvoidStacks(stackContainsLabeled, []string{"runtime.systemstack;key=value"}, avoidFunctions())
+	p := testCPUProfile(t, matches, func(dur time.Duration) {
+		Do(context.Background(), Labels("key", "value"), func(ctx context.Context) {
+			parallelLabelHog(ctx, dur, gogc)
+		})
+	})
+
+	// Two conditions to check:
+	// * labelHog should always be labeled.
+	// * The label should _only_ appear on labelHog and the Do call above.
+	for _, s := range p.Sample {
+		isLabeled := s.Label != nil && contains(s.Label["key"], "value")
+		var (
+			mayBeLabeled     bool
+			mustBeLabeled    bool
+			mustNotBeLabeled bool
+		)
+		for _, loc := range s.Location {
+			for _, l := range loc.Line {
+				switch l.Function.Name {
+				case "runtime/pprof.labelHog", "runtime/pprof.parallelLabelHog", "runtime/pprof.parallelLabelHog.func1":
+					mustBeLabeled = true
+				case "runtime/pprof.Do":
+					// Do sets the labels, so samples may
+					// or may not be labeled depending on
+					// which part of the function they are
+					// at.
+					mayBeLabeled = true
+				case "runtime.bgsweep", "runtime.bgscavenge", "runtime.forcegchelper", "runtime.gcBgMarkWorker", "runtime.runfinq", "runtime.sysmon":
+					// Runtime system goroutines or threads
+					// (such as those identified by
+					// runtime.isSystemGoroutine). These
+					// should never be labeled.
+					mustNotBeLabeled = true
+				case "gogo", "gosave_systemstack_switch", "racecall":
+					// These are context switch/race
+					// critical that we can't do a full
+					// traceback from. Typically this would
+					// be covered by the runtime check
+					// below, but these symbols don't have
+					// the package name.
+					mayBeLabeled = true
+				}
+
+				if l.Function.Name == "" || strings.HasPrefix(l.Function.Name, "runtime.") || strings.HasPrefix(l.Function.Name, "runtime_") {
+					// There are many places in the runtime
+					// where we can't do a full traceback.
+					// Ideally we'd list them all, but
+					// barring that allow anything in the
+					// runtime, unless explicitly excluded
+					// above.
+					mayBeLabeled = true
+				}
+			}
+		}
+		if mustNotBeLabeled {
+			// If this must not be labeled, then mayBeLabeled hints
+			// are not relevant.
+			mayBeLabeled = false
+		}
+		if mustBeLabeled && !isLabeled {
+			var buf bytes.Buffer
+			fprintStack(&buf, s.Location)
+			t.Errorf("Sample labeled got false want true: %s", buf.String())
+		}
+		if mustNotBeLabeled && isLabeled {
+			var buf bytes.Buffer
+			fprintStack(&buf, s.Location)
+			t.Errorf("Sample labeled got true want false: %s", buf.String())
+		}
+		if isLabeled && !(mayBeLabeled || mustBeLabeled) {
+			var buf bytes.Buffer
+			fprintStack(&buf, s.Location)
+			t.Errorf("Sample labeled got true want false: %s", buf.String())
+		}
+	}
+}
+
+// labelHog is designed to burn CPU time in a way that a high number of CPU
+// samples end up running on systemstack.
+func labelHog(stop chan struct{}, gogc int) {
+	// Regression test for issue 50032. We must give GC an opportunity to
+	// be initially triggered by a labelled goroutine.
+	runtime.GC()
+
+	for i := 0; ; i++ {
+		select {
+		case <-stop:
+			return
+		default:
+			debug.SetGCPercent(gogc)
+		}
+	}
+}
+
+// parallelLabelHog runs GOMAXPROCS goroutines running labelHog.
+func parallelLabelHog(ctx context.Context, dur time.Duration, gogc int) {
+	var wg sync.WaitGroup
+	stop := make(chan struct{})
+	for i := 0; i < runtime.GOMAXPROCS(0); i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			labelHog(stop, gogc)
+		}()
+	}
+
+	time.Sleep(dur)
+	close(stop)
+	wg.Wait()
+}
+
 // Check that there is no deadlock when the program receives SIGPROF while in
 // 64bit atomics' critical section. Used to happen on mips{,le}. See #20146.
 func TestAtomicLoadStore64(t *testing.T) {
@@ -1387,6 +1657,7 @@ func TestTryAdd(t *testing.T) {
 	testCases := []struct {
 		name        string
 		input       []uint64          // following the input format assumed by profileBuilder.addCPUData.
+		count       int               // number of records in input.
 		wantLocs    [][]string        // ordered location entries with function names.
 		wantSamples []*profile.Sample // ordered samples, we care only about Value and the profile location IDs.
 	}{{
@@ -1396,6 +1667,7 @@ func TestTryAdd(t *testing.T) {
 			3, 0, 500, // hz = 500. Must match the period.
 			5, 0, 50, inlinedCallerStack[0], inlinedCallerStack[1],
 		},
+		count: 2,
 		wantLocs: [][]string{
 			{"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"},
 		},
@@ -1412,6 +1684,7 @@ func TestTryAdd(t *testing.T) {
 			7, 0, 10, inlinedCallerStack[0], inlinedCallerStack[1], inlinedCallerStack[0], inlinedCallerStack[1],
 			5, 0, 20, inlinedCallerStack[0], inlinedCallerStack[1],
 		},
+		count:    3,
 		wantLocs: [][]string{{"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"}},
 		wantSamples: []*profile.Sample{
 			{Value: []int64{10, 10 * period}, Location: []*profile.Location{{ID: 1}, {ID: 1}}},
@@ -1425,6 +1698,7 @@ func TestTryAdd(t *testing.T) {
 			// entry. The "stk" entry is actually the count.
 			4, 0, 0, 4242,
 		},
+		count:    2,
 		wantLocs: [][]string{{"runtime/pprof.lostProfileEvent"}},
 		wantSamples: []*profile.Sample{
 			{Value: []int64{4242, 4242 * period}, Location: []*profile.Location{{ID: 1}}},
@@ -1443,6 +1717,7 @@ func TestTryAdd(t *testing.T) {
 			5, 0, 30, inlinedCallerStack[0], inlinedCallerStack[0],
 			4, 0, 40, inlinedCallerStack[0],
 		},
+		count: 3,
 		// inlinedCallerDump shows up here because
 		// runtime_expandFinalInlineFrame adds it to the stack frame.
 		wantLocs: [][]string{{"runtime/pprof.inlinedCalleeDump"}, {"runtime/pprof.inlinedCallerDump"}},
@@ -1456,6 +1731,7 @@ func TestTryAdd(t *testing.T) {
 			3, 0, 500, // hz = 500. Must match the period.
 			9, 0, 10, recursionStack[0], recursionStack[1], recursionStack[2], recursionStack[3], recursionStack[4], recursionStack[5],
 		},
+		count: 2,
 		wantLocs: [][]string{
 			{"runtime/pprof.recursionChainBottom"},
 			{
@@ -1479,6 +1755,7 @@ func TestTryAdd(t *testing.T) {
 			5, 0, 50, inlinedCallerStack[0], inlinedCallerStack[1],
 			4, 0, 60, inlinedCallerStack[0],
 		},
+		count:    3,
 		wantLocs: [][]string{{"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"}},
 		wantSamples: []*profile.Sample{
 			{Value: []int64{50, 50 * period}, Location: []*profile.Location{{ID: 1}}},
@@ -1491,6 +1768,7 @@ func TestTryAdd(t *testing.T) {
 			4, 0, 70, inlinedCallerStack[0],
 			5, 0, 80, inlinedCallerStack[0], inlinedCallerStack[1],
 		},
+		count:    3,
 		wantLocs: [][]string{{"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"}},
 		wantSamples: []*profile.Sample{
 			{Value: []int64{70, 70 * period}, Location: []*profile.Location{{ID: 1}}},
@@ -1503,6 +1781,7 @@ func TestTryAdd(t *testing.T) {
 			3, 0, 500, // hz = 500. Must match the period.
 			4, 0, 70, inlinedCallerStack[0],
 		},
+		count:    2,
 		wantLocs: [][]string{{"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"}},
 		wantSamples: []*profile.Sample{
 			{Value: []int64{70, 70 * period}, Location: []*profile.Location{{ID: 1}}},
@@ -1518,6 +1797,7 @@ func TestTryAdd(t *testing.T) {
 			// from getting merged into above.
 			5, 0, 80, inlinedCallerStack[1], inlinedCallerStack[0],
 		},
+		count: 3,
 		wantLocs: [][]string{
 			{"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"},
 			{"runtime/pprof.inlinedCallerDump"},
@@ -1530,7 +1810,7 @@ func TestTryAdd(t *testing.T) {
 
 	for _, tc := range testCases {
 		t.Run(tc.name, func(t *testing.T) {
-			p, err := translateCPUProfile(tc.input)
+			p, err := translateCPUProfile(tc.input, tc.count)
 			if err != nil {
 				t.Fatalf("translating profile: %v", err)
 			}
@@ -1563,3 +1843,39 @@ func TestTryAdd(t *testing.T) {
 		})
 	}
 }
+
+func TestTimeVDSO(t *testing.T) {
+	// Test that time functions have the right stack trace. In particular,
+	// it shouldn't be recursive.
+
+	if runtime.GOOS == "android" {
+		// Flaky on Android, issue 48655. VDSO may not be enabled.
+		testenv.SkipFlaky(t, 48655)
+	}
+
+	matches := matchAndAvoidStacks(stackContains, []string{"time.now"}, avoidFunctions())
+	p := testCPUProfile(t, matches, func(dur time.Duration) {
+		t0 := time.Now()
+		for {
+			t := time.Now()
+			if t.Sub(t0) >= dur {
+				return
+			}
+		}
+	})
+
+	// Check for recursive time.now sample.
+	for _, sample := range p.Sample {
+		var seenNow bool
+		for _, loc := range sample.Location {
+			for _, line := range loc.Line {
+				if line.Function.Name == "time.now" {
+					if seenNow {
+						t.Fatalf("unexpected recursive time.now")
+					}
+					seenNow = true
+				}
+			}
+		}
+	}
+}
diff --git a/libgo/go/runtime/pprof/proto.go b/libgo/go/runtime/pprof/proto.go
index 6c5dd31..39a6f58 100644
--- a/libgo/go/runtime/pprof/proto.go
+++ b/libgo/go/runtime/pprof/proto.go
@@ -8,11 +8,12 @@ import (
 	"bytes"
 	"compress/gzip"
 	"fmt"
-	internalcpu "internal/cpu"
+	"internal/abi"
 	"io"
 	"os"
 	"runtime"
 	"strconv"
+	"strings"
 	"time"
 	"unsafe"
 )
@@ -22,23 +23,6 @@ import (
 // (The name shows up in the pprof graphs.)
 func lostProfileEvent() { lostProfileEvent() }
 
-// funcPC returns the PC for the func value f.
-func funcPC(f interface{}) uintptr {
-	type iface struct {
-		tab  unsafe.Pointer
-		data unsafe.Pointer
-	}
-	i := (*iface)(unsafe.Pointer(&f))
-	r := *(*uintptr)(i.data)
-	if internalcpu.FunctionDescriptors {
-		// With PPC64 ELF ABI v1 function descriptors the
-		// function address is a pointer to a struct whose
-		// first field is the actual PC.
-		r = *(*uintptr)(unsafe.Pointer(r))
-	}
-	return r
-}
-
 // A profileBuilder writes a profile incrementally from a
 // stream of profile samples delivered by the runtime.
 type profileBuilder struct {
@@ -282,8 +266,9 @@ func newProfileBuilder(w io.Writer) *profileBuilder {
 }
 
 // addCPUData adds the CPU profiling data to the profile.
-// The data must be a whole number of records,
-// as delivered by the runtime.
+//
+// The data must be a whole number of records, as delivered by the runtime.
+// len(tags) must be equal to the number of records in data.
 func (b *profileBuilder) addCPUData(data []uint64, tags []unsafe.Pointer) error {
 	if !b.havePeriod {
 		// first record is period
@@ -298,6 +283,9 @@ func (b *profileBuilder) addCPUData(data []uint64, tags []unsafe.Pointer) error
 		b.period = 1e9 / int64(data[2])
 		b.havePeriod = true
 		data = data[3:]
+		// Consume tag slot. Note that there isn't a meaningful tag
+		// value for this record.
+		tags = tags[1:]
 	}
 
 	// Parse CPU samples from the profile.
@@ -322,14 +310,14 @@ func (b *profileBuilder) addCPUData(data []uint64, tags []unsafe.Pointer) error
 		if data[0] < 3 || tags != nil && len(tags) < 1 {
 			return fmt.Errorf("malformed profile")
 		}
+		if len(tags) < 1 {
+			return fmt.Errorf("mismatched profile records and tags")
+		}
 		count := data[2]
 		stk := data[3:data[0]]
 		data = data[data[0]:]
-		var tag unsafe.Pointer
-		if tags != nil {
-			tag = tags[0]
-			tags = tags[1:]
-		}
+		tag := tags[0]
+		tags = tags[1:]
 
 		if count == 0 && len(stk) == 1 {
 			// overflow record
@@ -338,11 +326,15 @@ func (b *profileBuilder) addCPUData(data []uint64, tags []unsafe.Pointer) error
 				// gentraceback guarantees that PCs in the
 				// stack can be unconditionally decremented and
 				// still be valid, so we must do the same.
-				uint64(funcPC(lostProfileEvent) + 1),
+				uint64(abi.FuncPCABIInternal(lostProfileEvent) + 1),
 			}
 		}
 		b.m.lookup(stk, tag).count += int64(count)
 	}
+
+	if len(tags) != 0 {
+		return fmt.Errorf("mismatched profile records and tags")
+	}
 	return nil
 }
 
@@ -598,6 +590,9 @@ func (b *profileBuilder) readMapping() {
 	}
 }
 
+var space = []byte(" ")
+var newline = []byte("\n")
+
 func parseProcSelfMaps(data []byte, addMapping func(lo, hi, offset uint64, file, buildID string)) {
 	// $ cat /proc/self/maps
 	// 00400000-0040b000 r-xp 00000000 fc:01 787766                             /bin/cat
@@ -624,37 +619,24 @@ func parseProcSelfMaps(data []byte, addMapping func(lo, hi, offset uint64, file,
 	// next removes and returns the next field in the line.
 	// It also removes from line any spaces following the field.
 	next := func() []byte {
-		j := bytes.IndexByte(line, ' ')
-		if j < 0 {
-			f := line
-			line = nil
-			return f
-		}
-		f := line[:j]
-		line = line[j+1:]
-		for len(line) > 0 && line[0] == ' ' {
-			line = line[1:]
-		}
+		var f []byte
+		f, line, _ = bytes.Cut(line, space)
+		line = bytes.TrimLeft(line, " ")
 		return f
 	}
 
 	for len(data) > 0 {
-		i := bytes.IndexByte(data, '\n')
-		if i < 0 {
-			line, data = data, nil
-		} else {
-			line, data = data[:i], data[i+1:]
-		}
+		line, data, _ = bytes.Cut(data, newline)
 		addr := next()
-		i = bytes.IndexByte(addr, '-')
-		if i < 0 {
+		loStr, hiStr, ok := strings.Cut(string(addr), "-")
+		if !ok {
 			continue
 		}
-		lo, err := strconv.ParseUint(string(addr[:i]), 16, 64)
+		lo, err := strconv.ParseUint(loStr, 16, 64)
 		if err != nil {
 			continue
 		}
-		hi, err := strconv.ParseUint(string(addr[i+1:]), 16, 64)
+		hi, err := strconv.ParseUint(hiStr, 16, 64)
 		if err != nil {
 			continue
 		}
diff --git a/libgo/go/runtime/pprof/proto_test.go b/libgo/go/runtime/pprof/proto_test.go
index 9290210..339b85c 100644
--- a/libgo/go/runtime/pprof/proto_test.go
+++ b/libgo/go/runtime/pprof/proto_test.go
@@ -8,6 +8,7 @@ import (
 	"bytes"
 	"encoding/json"
 	"fmt"
+	"internal/abi"
 	"internal/profile"
 	"internal/testenv"
 	"os"
@@ -16,16 +17,20 @@ import (
 	"runtime"
 	"strings"
 	"testing"
+	"unsafe"
 )
 
 // translateCPUProfile parses binary CPU profiling stack trace data
 // generated by runtime.CPUProfile() into a profile struct.
 // This is only used for testing. Real conversions stream the
 // data into the profileBuilder as it becomes available.
-func translateCPUProfile(data []uint64) (*profile.Profile, error) {
+//
+// count is the number of records in data.
+func translateCPUProfile(data []uint64, count int) (*profile.Profile, error) {
 	var buf bytes.Buffer
 	b := newProfileBuilder(&buf)
-	if err := b.addCPUData(data, nil); err != nil {
+	tags := make([]unsafe.Pointer, count)
+	if err := b.addCPUData(data, tags); err != nil {
 		return nil, err
 	}
 	b.build()
@@ -35,7 +40,7 @@ func translateCPUProfile(data []uint64) (*profile.Profile, error) {
 // fmtJSON returns a pretty-printed JSON form for x.
 // It works reasonbly well for printing protocol-buffer
 // data structures like profile.Profile.
-func fmtJSON(x interface{}) string {
+func fmtJSON(x any) string {
 	js, _ := json.MarshalIndent(x, "", "\t")
 	return string(js)
 }
@@ -45,7 +50,7 @@ func TestConvertCPUProfileEmpty(t *testing.T) {
 	var buf bytes.Buffer
 
 	b := []uint64{3, 0, 500} // empty profile at 500 Hz (2ms sample period)
-	p, err := translateCPUProfile(b)
+	p, err := translateCPUProfile(b, 1)
 	if err != nil {
 		t.Fatalf("translateCPUProfile: %v", err)
 	}
@@ -99,11 +104,11 @@ func testPCs(t *testing.T) (addr1, addr2 uint64, map1, map2 *profile.Mapping) {
 		map2 = mprof.Mapping[1]
 		map2.BuildID, _ = elfBuildID(map2.File)
 	case "js":
-		addr1 = uint64(funcPC(f1))
-		addr2 = uint64(funcPC(f2))
+		addr1 = uint64(abi.FuncPCABIInternal(f1))
+		addr2 = uint64(abi.FuncPCABIInternal(f2))
 	default:
-		addr1 = uint64(funcPC(f1))
-		addr2 = uint64(funcPC(f2))
+		addr1 = uint64(abi.FuncPCABIInternal(f1))
+		addr2 = uint64(abi.FuncPCABIInternal(f2))
 		// Fake mapping - HasFunctions will be true because two PCs from Go
 		// will be fully symbolized.
 		fake := &profile.Mapping{ID: 1, HasFunctions: true}
@@ -121,7 +126,7 @@ func TestConvertCPUProfile(t *testing.T) {
 		5, 0, 40, uint64(addr2 + 1), uint64(addr2 + 2), // 40 samples in addr2
 		5, 0, 10, uint64(addr1 + 1), uint64(addr1 + 2), // 10 samples in addr1
 	}
-	p, err := translateCPUProfile(b)
+	p, err := translateCPUProfile(b, 4)
 	if err != nil {
 		t.Fatalf("translating profile: %v", err)
 	}
@@ -275,11 +280,10 @@ func TestProcSelfMaps(t *testing.T) {
 
 	f := func(t *testing.T, input string) {
 		for tx, tt := range strings.Split(input, "\n\n") {
-			i := strings.Index(tt, "->\n")
-			if i < 0 {
+			in, out, ok := strings.Cut(tt, "->\n")
+			if !ok {
 				t.Fatal("malformed test case")
 			}
-			in, out := tt[:i], tt[i+len("->\n"):]
 			if len(out) > 0 && out[len(out)-1] != '\n' {
 				out += "\n"
 			}
@@ -431,7 +435,7 @@ func TestEmptyStack(t *testing.T) {
 		3, 0, 500, // hz = 500
 		3, 0, 10, // 10 samples with an empty stack trace
 	}
-	_, err := translateCPUProfile(b)
+	_, err := translateCPUProfile(b, 2)
 	if err != nil {
 		t.Fatalf("translating profile: %v", err)
 	}
diff --git a/libgo/go/runtime/pprof/rusage_test.go b/libgo/go/runtime/pprof/rusage_test.go
new file mode 100644
index 0000000..b0d651e
--- /dev/null
+++ b/libgo/go/runtime/pprof/rusage_test.go
@@ -0,0 +1,39 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build darwin || freebsd || linux || netbsd || openbsd
+
+package pprof
+
+import (
+	"syscall"
+	"time"
+)
+
+func init() {
+	diffCPUTimeImpl = diffCPUTimeRUsage
+}
+
+func diffCPUTimeRUsage(f func()) time.Duration {
+	ok := true
+	var before, after syscall.Rusage
+
+	err := syscall.Getrusage(syscall.RUSAGE_SELF, &before)
+	if err != nil {
+		ok = false
+	}
+
+	f()
+
+	err = syscall.Getrusage(syscall.RUSAGE_SELF, &after)
+	if err != nil {
+		ok = false
+	}
+
+	if !ok {
+		return 0
+	}
+
+	return time.Duration((after.Utime.Nano() + after.Stime.Nano()) - (before.Utime.Nano() + before.Stime.Nano()))
+}
diff --git a/libgo/go/runtime/pprof/uname_linux_test.go b/libgo/go/runtime/pprof/uname_linux_test.go
new file mode 100644
index 0000000..8374c83
--- /dev/null
+++ b/libgo/go/runtime/pprof/uname_linux_test.go
@@ -0,0 +1,61 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build linux
+
+package pprof
+
+import (
+	"fmt"
+	"regexp"
+	"strconv"
+	"syscall"
+)
+
+var versionRe = regexp.MustCompile(`^(\d+)(?:\.(\d+)(?:\.(\d+))).*$`)
+
+func linuxKernelVersion() (major, minor, patch int, err error) {
+	var uname syscall.Utsname
+	if err := syscall.Uname(&uname); err != nil {
+		return 0, 0, 0, err
+	}
+
+	buf := make([]byte, 0, len(uname.Release))
+	for _, b := range uname.Release {
+		if b == 0 {
+			break
+		}
+		buf = append(buf, byte(b))
+	}
+	rl := string(buf)
+
+	m := versionRe.FindStringSubmatch(rl)
+	if m == nil {
+		return 0, 0, 0, fmt.Errorf("error matching version number in %q", rl)
+	}
+
+	v, err := strconv.ParseInt(m[1], 10, 64)
+	if err != nil {
+		return 0, 0, 0, fmt.Errorf("error parsing major version %q in %s: %w", m[1], rl, err)
+	}
+	major = int(v)
+
+	if len(m) >= 3 {
+		v, err := strconv.ParseInt(m[2], 10, 64)
+		if err != nil {
+			return 0, 0, 0, fmt.Errorf("error parsing minor version %q in %s: %w", m[2], rl, err)
+		}
+		minor = int(v)
+	}
+
+	if len(m) >= 4 {
+		v, err := strconv.ParseInt(m[3], 10, 64)
+		if err != nil {
+			return 0, 0, 0, fmt.Errorf("error parsing patch version %q in %s: %w", m[3], rl, err)
+		}
+		patch = int(v)
+	}
+
+	return
+}
diff --git a/libgo/go/runtime/pprof/uname_other_test.go b/libgo/go/runtime/pprof/uname_other_test.go
new file mode 100644
index 0000000..3276407
--- /dev/null
+++ b/libgo/go/runtime/pprof/uname_other_test.go
@@ -0,0 +1,15 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !linux
+
+package pprof
+
+import (
+	"errors"
+)
+
+func linuxKernelVersion() (major, minor, patch int, err error) {
+	return 0, 0, 0, errors.New("not running on linux")
+}
diff --git a/libgo/go/runtime/preempt_nonwindows.go b/libgo/go/runtime/preempt_nonwindows.go
index 365e86a..d6a2408 100644
--- a/libgo/go/runtime/preempt_nonwindows.go
+++ b/libgo/go/runtime/preempt_nonwindows.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build !windows
-// +build !windows
 
 package runtime
 
diff --git a/libgo/go/runtime/print.go b/libgo/go/runtime/print.go
index d141061..8dd8bf3 100644
--- a/libgo/go/runtime/print.go
+++ b/libgo/go/runtime/print.go
@@ -5,8 +5,8 @@
 package runtime
 
 import (
+	"internal/goarch"
 	"runtime/internal/atomic"
-	"runtime/internal/sys"
 	"unsafe"
 )
 
@@ -293,7 +293,7 @@ func hexdumpWords(p, end uintptr, mark func(uintptr) byte) {
 	var markbuf [1]byte
 	markbuf[0] = ' '
 	minhexdigits = int(unsafe.Sizeof(uintptr(0)) * 2)
-	for i := uintptr(0); p+i < end; i += sys.PtrSize {
+	for i := uintptr(0); p+i < end; i += goarch.PtrSize {
 		if i%16 == 0 {
 			if i != 0 {
 				println()
diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go
index 343f13b..db1e2b4 100644
--- a/libgo/go/runtime/proc.go
+++ b/libgo/go/runtime/proc.go
@@ -7,6 +7,7 @@ package runtime
 import (
 	"internal/abi"
 	"internal/cpu"
+	"internal/goarch"
 	"runtime/internal/atomic"
 	"runtime/internal/sys"
 	"unsafe"
@@ -195,7 +196,7 @@ func main(unsafe.Pointer) {
 	// Max stack size is 1 GB on 64-bit, 250 MB on 32-bit.
 	// Using decimal instead of binary GB and MB because
 	// they look nicer in the stack overflow failure message.
-	if sys.PtrSize == 8 {
+	if goarch.PtrSize == 8 {
 		maxstacksize = 1000000000
 	} else {
 		maxstacksize = 250000000
@@ -503,32 +504,6 @@ func releaseSudog(s *sudog) {
 	releasem(mp)
 }
 
-// funcPC returns the entry PC of the function f.
-// It assumes that f is a func value. Otherwise the behavior is undefined.
-// CAREFUL: In programs with plugins, funcPC can return different values
-// for the same function (because there are actually multiple copies of
-// the same function in the address space). To be safe, don't use the
-// results of this function in any == expression. It is only safe to
-// use the result as an address at which to start executing code.
-//
-// For gccgo note that this differs from the gc implementation; the gc
-// implementation adds sys.PtrSize to the address of the interface
-// value, but GCC's alias analysis decides that that can not be a
-// reference to the second field of the interface, and in some cases
-// it drops the initialization of the second field as a dead store.
-//go:nosplit
-func funcPC(f interface{}) uintptr {
-	i := (*iface)(unsafe.Pointer(&f))
-	r := *(*uintptr)(i.data)
-	if cpu.FunctionDescriptors {
-		// With PPC64 ELF ABI v1 function descriptors the
-		// function address is a pointer to a struct whose
-		// first field is the actual PC.
-		r = *(*uintptr)(unsafe.Pointer(r))
-	}
-	return r
-}
-
 func lockedOSThread() bool {
 	gp := getg()
 	return gp.lockedm != 0 && gp.m.lockedg != 0
@@ -574,6 +549,20 @@ func allgadd(gp *g) {
 	unlock(&allglock)
 }
 
+// allGsSnapshot returns a snapshot of the slice of all Gs.
+//
+// The world must be stopped or allglock must be held.
+func allGsSnapshot() []*g {
+	assertWorldStoppedOrLockHeld(&allglock)
+
+	// Because the world is stopped or allglock is held, allgadd
+	// cannot happen concurrently with this. allgs grows
+	// monotonically and existing entries never change, so we can
+	// simply return a copy of the slice header. For added safety,
+	// we trim everything past len because that can still change.
+	return allgs[:len(allgs):len(allgs)]
+}
+
 // atomicAllG returns &allgs[0] and len(allgs) for use with atomicAllGIndex.
 func atomicAllG() (**g, uintptr) {
 	length := atomic.Loaduintptr(&allglen)
@@ -583,7 +572,7 @@ func atomicAllG() (**g, uintptr) {
 
 // atomicAllGIndex returns ptr[i] with the allgptr returned from atomicAllG.
 func atomicAllGIndex(ptr **g, i uintptr) *g {
-	return *(**g)(add(unsafe.Pointer(ptr), i*sys.PtrSize))
+	return *(**g)(add(unsafe.Pointer(ptr), i*goarch.PtrSize))
 }
 
 // forEachG calls fn on every G from allgs.
@@ -695,10 +684,10 @@ func schedinit() {
 	worldStopped()
 
 	mallocinit()
+	cpuinit()      // must run before alginit
+	alginit()      // maps, hash, fastrand must not be used before this call
 	fastrandinit() // must run before mcommoninit
 	mcommoninit(_g_.m, -1)
-	cpuinit() // must run before alginit
-	alginit() // maps must not be used before this call
 
 	sigsave(&_g_.m.sigmask)
 	initSigmask = _g_.m.sigmask
@@ -720,7 +709,7 @@ func schedinit() {
 	// In 32-bit mode, we can burn a lot of memory on thread stacks.
 	// Try to avoid this by limiting the number of threads we run
 	// by default.
-	if sys.PtrSize == 4 && procs > 32 {
+	if goarch.PtrSize == 4 && procs > 32 {
 		procs = 32
 	}
 
@@ -807,11 +796,12 @@ func mcommoninit(mp *m, id int64) {
 		mp.id = mReserveID()
 	}
 
-	mp.fastrand[0] = uint32(int64Hash(uint64(mp.id), fastrandseed))
-	mp.fastrand[1] = uint32(int64Hash(uint64(cputicks()), ^fastrandseed))
-	if mp.fastrand[0]|mp.fastrand[1] == 0 {
-		mp.fastrand[1] = 1
+	lo := uint32(int64Hash(uint64(mp.id), fastrandseed))
+	hi := uint32(int64Hash(uint64(cputicks()), ^fastrandseed))
+	if lo|hi == 0 {
+		hi = 1
 	}
+	mp.fastrand = uint64(hi)<<32 | uint64(lo)
 
 	mpreinit(mp)
 
@@ -994,17 +984,18 @@ func casgstatus(gp *g, oldval, newval uint32) {
 		gp.trackingSeq++
 	}
 	if gp.tracking {
-		now := nanotime()
 		if oldval == _Grunnable {
 			// We transitioned out of runnable, so measure how much
 			// time we spent in this state and add it to
 			// runnableTime.
+			now := nanotime()
 			gp.runnableTime += now - gp.runnableStamp
 			gp.runnableStamp = 0
 		}
 		if newval == _Grunnable {
 			// We just transitioned into runnable, so record what
 			// time that happened.
+			now := nanotime()
 			gp.runnableStamp = now
 		} else if newval == _Grunning {
 			// We're transitioning into running, so turn off
@@ -3382,8 +3373,10 @@ func goexit1() {
 // goexit continuation on g0.
 func goexit0(gp *g) {
 	_g_ := getg()
+	_p_ := _g_.m.p.ptr()
 
 	casgstatus(gp, _Grunning, _Gdead)
+	// gcController.addScannableStack(_p_, -int64(gp.stack.hi-gp.stack.lo))
 	if isSystemGoroutine(gp, false) {
 		atomic.Xadd(&sched.ngsys, -1)
 		gp.isSystemGoroutine = false
@@ -3407,7 +3400,7 @@ func goexit0(gp *g) {
 		// Flush assist credit to the global pool. This gives
 		// better information to pacing if the application is
 		// rapidly creating an exiting goroutines.
-		assistWorkPerByte := float64frombits(atomic.Load64(&gcController.assistWorkPerByte))
+		assistWorkPerByte := gcController.assistWorkPerByte.Load()
 		scanCredit := int64(assistWorkPerByte * float64(gp.gcAssistBytes))
 		atomic.Xaddint64(&gcController.bgScanCredit, scanCredit)
 		gp.gcAssistBytes = 0
@@ -3416,7 +3409,7 @@ func goexit0(gp *g) {
 	dropg()
 
 	if GOARCH == "wasm" { // no threads yet on wasm
-		gfput(_g_.m.p.ptr(), gp)
+		gfput(_p_, gp)
 		schedule() // never returns
 	}
 
@@ -3424,7 +3417,7 @@ func goexit0(gp *g) {
 		print("invalid m->lockedInt = ", _g_.m.lockedInt, "\n")
 		throw("internal lockOSThread error")
 	}
-	gfput(_g_.m.p.ptr(), gp)
+	gfput(_p_, gp)
 	if locked {
 		// The goroutine may have locked this thread because
 		// it put it in an unusual kernel state. Kill it
@@ -3927,6 +3920,11 @@ func newproc(fn uintptr, arg unsafe.Pointer) *g {
 	}
 	if isSystemGoroutine(newg, false) {
 		atomic.Xadd(&sched.ngsys, +1)
+	} else {
+		// Only user goroutines inherit pprof labels.
+		if _g_.m.curg != nil {
+			newg.labels = _g_.m.curg.labels
+		}
 	}
 	// Track initial transition?
 	newg.trackingSeq = uint8(fastrand())
@@ -3934,6 +3932,7 @@ func newproc(fn uintptr, arg unsafe.Pointer) *g {
 		newg.tracking = true
 	}
 	casgstatus(newg, _Gdead, _Grunnable)
+	// gcController.addScannableStack(_p_, int64(newg.stack.hi-newg.stack.lo))
 
 	if _p_.goidcache == _p_.goidcacheend {
 		// Sched.goidgen is the last allocated id,
@@ -4244,12 +4243,6 @@ func _GC()                        { _GC() }
 func _LostSIGPROFDuringAtomic64() { _LostSIGPROFDuringAtomic64() }
 func _VDSO()                      { _VDSO() }
 
-var _SystemPC = abi.FuncPCABIInternal(_System)
-var _ExternalCodePC = abi.FuncPCABIInternal(_ExternalCode)
-var _LostExternalCodePC = abi.FuncPCABIInternal(_LostExternalCode)
-var _GCPC = abi.FuncPCABIInternal(_GC)
-var _LostSIGPROFDuringAtomic64PC = abi.FuncPCABIInternal(_LostSIGPROFDuringAtomic64)
-
 // Called if we receive a SIGPROF signal.
 // Called by the signal handler, may run during STW.
 //go:nowritebarrierrec
@@ -4286,7 +4279,7 @@ func sigprof(pc uintptr, gp *g, mp *m) {
 	n := 0
 	if traceback {
 		var stklocs [maxCPUProfStack]location
-		n = callers(0, stklocs[:])
+		n = callers(1, stklocs[:])
 
 		// Issue 26595: the stack trace we've just collected is going
 		// to include frames that we don't want to report in the CPU
@@ -4333,61 +4326,23 @@ func sigprof(pc uintptr, gp *g, mp *m) {
 		n = 2
 		stk[0] = pc
 		if mp.preemptoff != "" {
-			stk[1] = _GCPC + sys.PCQuantum
+			stk[1] = abi.FuncPCABIInternal(_GC) + sys.PCQuantum
 		} else {
-			stk[1] = _SystemPC + sys.PCQuantum
-		}
-	}
-
-	if prof.hz != 0 {
-		cpuprof.add(gp, stk[:n])
-	}
-	getg().m.mallocing--
-}
-
-// Use global arrays rather than using up lots of stack space in the
-// signal handler. This is safe since while we are executing a SIGPROF
-// signal other SIGPROF signals are blocked.
-var nonprofGoStklocs [maxCPUProfStack]location
-var nonprofGoStk [maxCPUProfStack]uintptr
-
-// sigprofNonGo is called if we receive a SIGPROF signal on a non-Go thread,
-// and the signal handler collected a stack trace in sigprofCallers.
-// When this is called, sigprofCallersUse will be non-zero.
-// g is nil, and what we can do is very limited.
-//go:nosplit
-//go:nowritebarrierrec
-func sigprofNonGo(pc uintptr) {
-	if prof.hz != 0 {
-		n := callers(0, nonprofGoStklocs[:])
-
-		for i := 0; i < n; i++ {
-			nonprofGoStk[i] = nonprofGoStklocs[i].pc
-		}
-
-		if n <= 0 {
-			n = 2
-			nonprofGoStk[0] = pc
-			nonprofGoStk[1] = _ExternalCodePC + sys.PCQuantum
+			stk[1] = abi.FuncPCABIInternal(_System) + sys.PCQuantum
 		}
-
-		cpuprof.addNonGo(nonprofGoStk[:n])
 	}
-}
 
-// sigprofNonGoPC is called when a profiling signal arrived on a
-// non-Go thread and we have a single PC value, not a stack trace.
-// g is nil, and what we can do is very limited.
-//go:nosplit
-//go:nowritebarrierrec
-func sigprofNonGoPC(pc uintptr) {
 	if prof.hz != 0 {
-		stk := []uintptr{
-			pc,
-			_ExternalCodePC + sys.PCQuantum,
+		// Note: it can happen on Windows that we interrupted a system thread
+		// with no g, so gp could nil. The other nil checks are done out of
+		// caution, but not expected to be nil in practice.
+		var tagPtr *unsafe.Pointer
+		if gp != nil && gp.m != nil && gp.m.curg != nil {
+			tagPtr = &gp.m.curg.labels
 		}
-		cpuprof.addNonGo(stk)
+		cpuprof.add(tagPtr, stk[:n])
 	}
+	getg().m.mallocing--
 }
 
 // setcpuprofilerate sets the CPU profiling rate to hz times per second.
@@ -4511,8 +4466,8 @@ func (pp *p) destroy() {
 		pp.sudogbuf[i] = nil
 	}
 	pp.sudogcache = pp.sudogbuf[:0]
-	for i := range pp.deferpoolbuf {
-		pp.deferpoolbuf[i] = nil
+	for j := range pp.deferpoolbuf {
+		pp.deferpoolbuf[j] = nil
 	}
 	pp.deferpool = pp.deferpoolbuf[:0]
 	systemstack(func() {
@@ -4858,6 +4813,10 @@ func checkdead() {
 // This is a variable for testing purposes. It normally doesn't change.
 var forcegcperiod int64 = 2 * 60 * 1e9
 
+// needSysmonWorkaround is true if the workaround for
+// golang.org/issue/42515 is needed on NetBSD.
+var needSysmonWorkaround bool = false
+
 // Always runs without a P, so write barriers are not allowed.
 //
 //go:nowritebarrierrec
@@ -4966,7 +4925,7 @@ func sysmon() {
 			}
 		}
 		mDoFixup()
-		if GOOS == "netbsd" {
+		if GOOS == "netbsd" && needSysmonWorkaround {
 			// netpoll is responsible for waiting for timer
 			// expiration, so we typically don't have to worry
 			// about starting an M to service timers. (Note that
@@ -5510,7 +5469,7 @@ const randomizeScheduler = raceenabled
 // If the run queue is full, runnext puts g on the global queue.
 // Executed only by the owner P.
 func runqput(_p_ *p, gp *g, next bool) {
-	if randomizeScheduler && next && fastrand()%2 == 0 {
+	if randomizeScheduler && next && fastrandn(2) == 0 {
 		next = false
 	}
 
@@ -5623,14 +5582,12 @@ func runqputbatch(pp *p, q *gQueue, qsize int) {
 // Executed only by the owner P.
 func runqget(_p_ *p) (gp *g, inheritTime bool) {
 	// If there's a runnext, it's the next G to run.
-	for {
-		next := _p_.runnext
-		if next == 0 {
-			break
-		}
-		if _p_.runnext.cas(next, 0) {
-			return next.ptr(), true
-		}
+	next := _p_.runnext
+	// If the runnext is non-0 and the CAS fails, it could only have been stolen by another P,
+	// because other Ps can race to set runnext to 0, but only the current P can set it to non-0.
+	// Hence, there's no need to retry this CAS if it falls.
+	if next != 0 && _p_.runnext.cas(next, 0) {
+		return next.ptr(), true
 	}
 
 	for {
@@ -5795,7 +5752,7 @@ func (q *gQueue) pushBack(gp *g) {
 	q.tail.set(gp)
 }
 
-// pushBackAll adds all Gs in l2 to the tail of q. After this q2 must
+// pushBackAll adds all Gs in q2 to the tail of q. After this q2 must
 // not be used.
 func (q *gQueue) pushBackAll(q2 gQueue) {
 	if q2.tail == 0 {
diff --git a/libgo/go/runtime/proc_test.go b/libgo/go/runtime/proc_test.go
index d7549a8..41548a2 100644
--- a/libgo/go/runtime/proc_test.go
+++ b/libgo/go/runtime/proc_test.go
@@ -119,6 +119,10 @@ func TestGoroutineParallelism(t *testing.T) {
 	// since the goroutines can't be stopped/preempted.
 	// Disable GC for this test (see issue #10958).
 	defer debug.SetGCPercent(debug.SetGCPercent(-1))
+	// SetGCPercent waits until the mark phase is over, but the runtime
+	// also preempts at the start of the sweep phase, so make sure that's
+	// done too. See #45867.
+	runtime.GC()
 	for try := 0; try < N; try++ {
 		done := make(chan bool)
 		x := uint32(0)
@@ -163,6 +167,10 @@ func testGoroutineParallelism2(t *testing.T, load, netpoll bool) {
 	// since the goroutines can't be stopped/preempted.
 	// Disable GC for this test (see issue #10958).
 	defer debug.SetGCPercent(debug.SetGCPercent(-1))
+	// SetGCPercent waits until the mark phase is over, but the runtime
+	// also preempts at the start of the sweep phase, so make sure that's
+	// done too. See #45867.
+	runtime.GC()
 	for try := 0; try < N; try++ {
 		if load {
 			// Create P goroutines and wait until they all run.
@@ -630,6 +638,10 @@ func TestSchedLocalQueueEmpty(t *testing.T) {
 	// If runtime triggers a forced GC during this test then it will deadlock,
 	// since the goroutines can't be stopped/preempted during spin wait.
 	defer debug.SetGCPercent(debug.SetGCPercent(-1))
+	// SetGCPercent waits until the mark phase is over, but the runtime
+	// also preempts at the start of the sweep phase, so make sure that's
+	// done too. See #45867.
+	runtime.GC()
 
 	iters := int(1e5)
 	if testing.Short() {
@@ -1039,7 +1051,7 @@ func testPreemptionAfterSyscall(t *testing.T, syscallDuration time.Duration) {
 		interations = 1
 	}
 	const (
-		maxDuration = 3 * time.Second
+		maxDuration = 5 * time.Second
 		nroutines   = 8
 	)
 
@@ -1075,6 +1087,10 @@ func testPreemptionAfterSyscall(t *testing.T, syscallDuration time.Duration) {
 }
 
 func TestPreemptionAfterSyscall(t *testing.T) {
+	if runtime.GOOS == "plan9" {
+		testenv.SkipFlaky(t, 41015)
+	}
+
 	for _, i := range []time.Duration{10, 100, 1000} {
 		d := i * time.Microsecond
 		t.Run(fmt.Sprint(d), func(t *testing.T) {
diff --git a/libgo/go/runtime/race0.go b/libgo/go/runtime/race0.go
index 0e431b8..f36d438 100644
--- a/libgo/go/runtime/race0.go
+++ b/libgo/go/runtime/race0.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build !race
-// +build !race
 
 // Dummy race detection API, used when not built with -race.
 
diff --git a/libgo/go/runtime/relax_stub.go b/libgo/go/runtime/relax_stub.go
index 5b92879c2..e507702 100644
--- a/libgo/go/runtime/relax_stub.go
+++ b/libgo/go/runtime/relax_stub.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build !windows
-// +build !windows
 
 package runtime
 
diff --git a/libgo/go/runtime/runtime1.go b/libgo/go/runtime/runtime1.go
index 68e7b9e..679bde6 100644
--- a/libgo/go/runtime/runtime1.go
+++ b/libgo/go/runtime/runtime1.go
@@ -6,8 +6,8 @@ package runtime
 
 import (
 	"internal/bytealg"
+	"internal/goarch"
 	"runtime/internal/atomic"
-	"runtime/internal/sys"
 	"unsafe"
 )
 
@@ -66,7 +66,7 @@ var (
 // nosplit for use in linux startup sysargs
 //go:nosplit
 func argv_index(argv **byte, i int32) *byte {
-	return *(**byte)(add(unsafe.Pointer(argv), uintptr(i)*sys.PtrSize))
+	return *(**byte)(add(unsafe.Pointer(argv), uintptr(i)*goarch.PtrSize))
 }
 
 func args(c int32, v **byte) {
@@ -201,10 +201,10 @@ func check() {
 	if unsafe.Sizeof(j) != 8 {
 		throw("bad j")
 	}
-	if unsafe.Sizeof(k) != sys.PtrSize {
+	if unsafe.Sizeof(k) != goarch.PtrSize {
 		throw("bad k")
 	}
-	if unsafe.Sizeof(l) != sys.PtrSize {
+	if unsafe.Sizeof(l) != goarch.PtrSize {
 		throw("bad l")
 	}
 	if unsafe.Sizeof(x1) != 1 {
@@ -326,6 +326,7 @@ var debug struct {
 	schedtrace         int32
 	tracebackancestors int32
 	asyncpreemptoff    int32
+	harddecommit       int32
 
 	// debug.malloc is used as a combined debug check
 	// in the malloc function and should be set
@@ -355,6 +356,7 @@ var dbgvars = []dbgVar{
 	{"tracebackancestors", &debug.tracebackancestors},
 	{"asyncpreemptoff", &debug.asyncpreemptoff},
 	{"inittrace", &debug.inittrace},
+	{"harddecommit", &debug.harddecommit},
 }
 
 func parsedebugvars() {
diff --git a/libgo/go/runtime/runtime2.go b/libgo/go/runtime/runtime2.go
index 81a40d5..3c0348f 100644
--- a/libgo/go/runtime/runtime2.go
+++ b/libgo/go/runtime/runtime2.go
@@ -5,8 +5,8 @@
 package runtime
 
 import (
+	"internal/goarch"
 	"runtime/internal/atomic"
-	"runtime/internal/sys"
 	"unsafe"
 )
 
@@ -219,7 +219,7 @@ type eface struct {
 	data  unsafe.Pointer
 }
 
-func efaceOf(ep *interface{}) *eface {
+func efaceOf(ep *any) *eface {
 	return (*eface)(unsafe.Pointer(ep))
 }
 
@@ -265,6 +265,8 @@ func efaceOf(ep *interface{}) *eface {
 // so I can't see them ever moving. If we did want to start moving data
 // in the GC, we'd need to allocate the goroutine structs from an
 // alternate arena. Using guintptr doesn't make that problem any worse.
+// Note that pollDesc.rg, pollDesc.wg also store g in uintptr form,
+// so they would need to be updated too if g's start moving.
 type guintptr uintptr
 
 //go:nosplit
@@ -551,7 +553,7 @@ const (
 	// tlsSlots is the number of pointer-sized slots reserved for TLS on some platforms,
 	// like Windows.
 	tlsSlots = 6
-	tlsSize  = tlsSlots * sys.PtrSize
+	tlsSize  = tlsSlots * goarch.PtrSize
 )
 
 type m struct {
@@ -576,7 +578,6 @@ type m struct {
 	throwing    int32
 	preemptoff  string // if != "", keep curg running on this m
 	locks       int32
-	softfloat   int32
 	dying       int32
 	profilehz   int32
 	spinning    bool // m is out of work and is actively looking for work
@@ -585,7 +586,7 @@ type m struct {
 	printlock   int8
 	incgo       bool   // m is executing a cgo call
 	freeWait    uint32 // if == 0, safe to free g0 and delete m (atomic)
-	fastrand    [2]uint32
+	fastrand    uint64
 	needextram  bool
 	traceback   uint8
 	ncgocall    uint64 // number of cgo calls in total
@@ -667,8 +668,7 @@ type p struct {
 	pcache      pageCache
 	raceprocctx uintptr
 
-	// gccgo has only one size of defer.
-	deferpool    []*_defer
+	deferpool    []*_defer // pool of available defer structs (see panic.go)
 	deferpoolbuf [32]*_defer
 
 	// Cache of goroutine ids, amortizes accesses to runtime·sched.goidgen.
@@ -789,6 +789,12 @@ type p struct {
 	// Race context used while executing timer functions.
 	// Not for gccgo: timerRaceCtx uintptr
 
+	// scannableStackSizeDelta accumulates the amount of stack space held by
+	// live goroutines (i.e. those eligible for stack scanning).
+	// Flushed to gcController.scannableStackSize once scannableStackSizeSlack
+	// or -scannableStackSizeSlack is reached.
+	scannableStackSizeDelta int64
+
 	// preempt is set to indicate that this P should be enter the
 	// scheduler ASAP (regardless of what G is running on it).
 	preempt bool
@@ -935,7 +941,7 @@ func extendRandom(r []byte, n int) {
 			w = 16
 		}
 		h := memhash(unsafe.Pointer(&r[n-w]), uintptr(nanotime()), uintptr(w))
-		for i := 0; i < sys.PtrSize && n < len(r); i++ {
+		for i := 0; i < goarch.PtrSize && n < len(r); i++ {
 			r[n] = byte(h)
 			n++
 			h >>= 8
@@ -944,7 +950,6 @@ func extendRandom(r []byte, n int) {
 }
 
 // A _defer holds an entry on the list of deferred calls.
-// If you add a field here, add code to clear it in freedefer.
 // This struct must match the code in Defer_statement::defer_struct_type
 // in the compiler.
 // Some defers will be allocated on the stack and some on the heap.
diff --git a/libgo/go/runtime/runtime_linux_test.go b/libgo/go/runtime/runtime_linux_test.go
new file mode 100644
index 0000000..dc7770d
--- /dev/null
+++ b/libgo/go/runtime/runtime_linux_test.go
@@ -0,0 +1,41 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+	. "runtime"
+	"syscall"
+	"testing"
+	"time"
+)
+
+var pid, tid int
+
+func init() {
+	// Record pid and tid of init thread for use during test.
+	// The call to LockOSThread is just to exercise it;
+	// we can't test that it does anything.
+	// Instead we're testing that the conditions are good
+	// for how it is used in init (must be on main thread).
+	pid, tid = syscall.Getpid(), syscall.Gettid()
+	LockOSThread()
+
+	sysNanosleep = func(d time.Duration) {
+		// Invoke a blocking syscall directly; calling time.Sleep()
+		// would deschedule the goroutine instead.
+		ts := syscall.NsecToTimespec(d.Nanoseconds())
+		for {
+			if err := syscall.Nanosleep(&ts, &ts); err != syscall.EINTR {
+				return
+			}
+		}
+	}
+}
+
+func TestLockOSThread(t *testing.T) {
+	if pid != tid {
+		t.Fatalf("pid=%d but tid=%d", pid, tid)
+	}
+}
diff --git a/libgo/go/runtime/runtime_mmap_test.go b/libgo/go/runtime/runtime_mmap_test.go
index 8f72daa..5e97f13 100644
--- a/libgo/go/runtime/runtime_mmap_test.go
+++ b/libgo/go/runtime/runtime_mmap_test.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build aix || darwin || dragonfly || freebsd || hurd || linux || netbsd || openbsd || solaris
-// +build aix darwin dragonfly freebsd hurd linux netbsd openbsd solaris
 
 package runtime_test
 
diff --git a/libgo/go/runtime/runtime_test.go b/libgo/go/runtime/runtime_test.go
index bda09cf..583c922 100644
--- a/libgo/go/runtime/runtime_test.go
+++ b/libgo/go/runtime/runtime_test.go
@@ -6,6 +6,7 @@ package runtime_test
 
 import (
 	"flag"
+	"fmt"
 	"io"
 	. "runtime"
 	"runtime/debug"
@@ -53,8 +54,8 @@ func BenchmarkIfaceCmpNil100(b *testing.B) {
 	}
 }
 
-var efaceCmp1 interface{}
-var efaceCmp2 interface{}
+var efaceCmp1 any
+var efaceCmp2 any
 
 func BenchmarkEfaceCmpDiff(b *testing.B) {
 	x := 5
@@ -367,3 +368,78 @@ func TestVersion(t *testing.T) {
 		t.Fatalf("cr/nl in version: %q", vers)
 	}
 }
+
+func TestTimediv(t *testing.T) {
+	for _, tc := range []struct {
+		num int64
+		div int32
+		ret int32
+		rem int32
+	}{
+		{
+			num: 8,
+			div: 2,
+			ret: 4,
+			rem: 0,
+		},
+		{
+			num: 9,
+			div: 2,
+			ret: 4,
+			rem: 1,
+		},
+		{
+			// Used by runtime.check.
+			num: 12345*1000000000 + 54321,
+			div: 1000000000,
+			ret: 12345,
+			rem: 54321,
+		},
+		{
+			num: 1<<32 - 1,
+			div: 2,
+			ret: 1<<31 - 1, // no overflow.
+			rem: 1,
+		},
+		{
+			num: 1 << 32,
+			div: 2,
+			ret: 1<<31 - 1, // overflow.
+			rem: 0,
+		},
+		{
+			num: 1 << 40,
+			div: 2,
+			ret: 1<<31 - 1, // overflow.
+			rem: 0,
+		},
+		{
+			num: 1<<40 + 1,
+			div: 1 << 10,
+			ret: 1 << 30,
+			rem: 1,
+		},
+	} {
+		name := fmt.Sprintf("%d div %d", tc.num, tc.div)
+		t.Run(name, func(t *testing.T) {
+			// Double check that the inputs make sense using
+			// standard 64-bit division.
+			ret64 := tc.num / int64(tc.div)
+			rem64 := tc.num % int64(tc.div)
+			if ret64 != int64(int32(ret64)) {
+				// Simulate timediv overflow value.
+				ret64 = 1<<31 - 1
+				rem64 = 0
+			}
+			if ret64 != int64(tc.ret) {
+				t.Errorf("%d / %d got ret %d rem %d want ret %d rem %d", tc.num, tc.div, ret64, rem64, tc.ret, tc.rem)
+			}
+
+			var rem int32
+			ret := Timediv(tc.num, tc.div, &rem)
+			if ret != tc.ret || rem != tc.rem {
+				t.Errorf("timediv %d / %d got ret %d rem %d want ret %d rem %d", tc.num, tc.div, ret, rem, tc.ret, tc.rem)
+			}
+		})
+	}
+}
diff --git a/libgo/go/runtime/runtime_unix_test.go b/libgo/go/runtime/runtime_unix_test.go
index 54e14c9..781594c 100644
--- a/libgo/go/runtime/runtime_unix_test.go
+++ b/libgo/go/runtime/runtime_unix_test.go
@@ -7,7 +7,6 @@
 // and Close(-1) is nearly universally fast.
 
 //go:build aix || darwin || dragonfly || freebsd || hurd || linux || netbsd || openbsd || plan9
-// +build aix darwin dragonfly freebsd hurd linux netbsd openbsd plan9
 
 package runtime_test
 
diff --git a/libgo/go/runtime/rwmutex_test.go b/libgo/go/runtime/rwmutex_test.go
index 291a32e..f15d367 100644
--- a/libgo/go/runtime/rwmutex_test.go
+++ b/libgo/go/runtime/rwmutex_test.go
@@ -55,6 +55,11 @@ func TestParallelRWMutexReaders(t *testing.T) {
 	// since the goroutines can't be stopped/preempted.
 	// Disable GC for this test (see issue #10958).
 	defer debug.SetGCPercent(debug.SetGCPercent(-1))
+	// SetGCPercent waits until the mark phase is over, but the runtime
+	// also preempts at the start of the sweep phase, so make sure that's
+	// done too.
+	GC()
+
 	doTestParallelReaders(1)
 	doTestParallelReaders(3)
 	doTestParallelReaders(4)
diff --git a/libgo/go/runtime/semasleep_test.go b/libgo/go/runtime/semasleep_test.go
index 905e932..d56733c 100644
--- a/libgo/go/runtime/semasleep_test.go
+++ b/libgo/go/runtime/semasleep_test.go
@@ -3,11 +3,11 @@
 // license that can be found in the LICENSE file.
 
 //go:build !plan9 && !windows && !js
-// +build !plan9,!windows,!js
 
 package runtime_test
 
 import (
+	"io"
 	"os/exec"
 	"syscall"
 	"testing"
@@ -21,43 +21,83 @@ func TestSpuriousWakeupsNeverHangSemasleep(t *testing.T) {
 	if *flagQuick {
 		t.Skip("-quick")
 	}
+	t.Parallel() // Waits for a program to sleep for 1s.
 
 	exe, err := buildTestProg(t, "testprog")
 	if err != nil {
 		t.Fatal(err)
 	}
 
-	start := time.Now()
 	cmd := exec.Command(exe, "After1")
+	stdout, err := cmd.StdoutPipe()
+	if err != nil {
+		t.Fatalf("StdoutPipe: %v", err)
+	}
+	beforeStart := time.Now()
 	if err := cmd.Start(); err != nil {
 		t.Fatalf("Failed to start command: %v", err)
 	}
 	doneCh := make(chan error, 1)
 	go func() {
 		doneCh <- cmd.Wait()
+		close(doneCh)
 	}()
+	t.Cleanup(func() {
+		cmd.Process.Kill()
+		<-doneCh
+	})
+
+	// Wait for After1 to close its stdout so that we know the runtime's SIGIO
+	// handler is registered.
+	b, err := io.ReadAll(stdout)
+	if len(b) > 0 {
+		t.Logf("read from testprog stdout: %s", b)
+	}
+	if err != nil {
+		t.Fatalf("error reading from testprog: %v", err)
+	}
+
+	// Wait for an arbitrary timeout longer than one second. The subprocess itself
+	// attempts to sleep for one second, but if the machine running the test is
+	// heavily loaded that subprocess may not schedule very quickly even if the
+	// bug remains fixed. (This is fine, because if the bug really is unfixed we
+	// can keep the process hung indefinitely, as long as we signal it often
+	// enough.)
+	timeout := 10 * time.Second
+
+	// The subprocess begins sleeping for 1s after it writes to stdout, so measure
+	// the timeout from here (not from when we started creating the process).
+	// That should reduce noise from process startup overhead.
+	ready := time.Now()
 
 	// With the repro running, we can continuously send to it
-	// a non-terminal signal such as SIGIO, to spuriously
-	// wakeup pthread_cond_timedwait_relative_np.
-	unfixedTimer := time.NewTimer(2 * time.Second)
+	// a signal that the runtime considers non-terminal,
+	// such as SIGIO, to spuriously wake up
+	// pthread_cond_timedwait_relative_np.
+	ticker := time.NewTicker(200 * time.Millisecond)
+	defer ticker.Stop()
 	for {
 		select {
-		case <-time.After(200 * time.Millisecond):
+		case now := <-ticker.C:
+			if now.Sub(ready) > timeout {
+				t.Error("Program failed to return on time and has to be killed, issue #27520 still exists")
+				// Send SIGQUIT to get a goroutine dump.
+				// Stop sending SIGIO so that the program can clean up and actually terminate.
+				cmd.Process.Signal(syscall.SIGQUIT)
+				return
+			}
+
 			// Send the pesky signal that toggles spinning
 			// indefinitely if #27520 is not fixed.
 			cmd.Process.Signal(syscall.SIGIO)
 
-		case <-unfixedTimer.C:
-			t.Error("Program failed to return on time and has to be killed, issue #27520 still exists")
-			cmd.Process.Signal(syscall.SIGKILL)
-			return
-
 		case err := <-doneCh:
 			if err != nil {
 				t.Fatalf("The program returned but unfortunately with an error: %v", err)
 			}
-			if time.Since(start) < 100*time.Millisecond {
+			if time.Since(beforeStart) < 1*time.Second {
+				// The program was supposed to sleep for a full (monotonic) second;
+				// it should not return before that has elapsed.
 				t.Fatalf("The program stopped too quickly.")
 			}
 			return
diff --git a/libgo/go/runtime/signal_unix.go b/libgo/go/runtime/signal_unix.go
index a291d2b..188598d 100644
--- a/libgo/go/runtime/signal_unix.go
+++ b/libgo/go/runtime/signal_unix.go
@@ -3,12 +3,13 @@
 // license that can be found in the LICENSE file.
 
 //go:build aix || darwin || dragonfly || freebsd || hurd || linux || netbsd || openbsd || solaris
-// +build aix darwin dragonfly freebsd hurd linux netbsd openbsd solaris
 
 package runtime
 
 import (
+	"internal/abi"
 	"runtime/internal/atomic"
+	"runtime/internal/sys"
 	"unsafe"
 )
 
@@ -175,8 +176,8 @@ func sigInstallGoHandler(sig uint32) bool {
 	}
 
 	// When built using c-archive or c-shared, only install signal
-	// handlers for synchronous signals, SIGPIPE, and SIGURG.
-	if (isarchive || islibrary) && t.flags&_SigPanic == 0 && sig != _SIGPIPE && sig != _SIGURG {
+	// handlers for synchronous signals and SIGPIPE and sigPreempt.
+	if (isarchive || islibrary) && t.flags&_SigPanic == 0 && sig != _SIGPIPE && sig != sigPreempt {
 		return false
 	}
 
@@ -271,15 +272,32 @@ func clearSignalHandlers() {
 	}
 }
 
-// setProcessCPUProfiler is called when the profiling timer changes.
-// It is called with prof.lock held. hz is the new timer, and is 0 if
+// setProcessCPUProfilerTimer is called when the profiling timer changes.
+// It is called with prof.signalLock held. hz is the new timer, and is 0 if
 // profiling is being disabled. Enable or disable the signal as
 // required for -buildmode=c-archive.
-func setProcessCPUProfiler(hz int32) {
+func setProcessCPUProfilerTimer(hz int32) {
 	if hz != 0 {
 		// Enable the Go signal handler if not enabled.
 		if atomic.Cas(&handlingSig[_SIGPROF], 0, 1) {
-			atomic.Storeuintptr(&fwdSig[_SIGPROF], getsig(_SIGPROF))
+			h := getsig(_SIGPROF)
+			// If no signal handler was installed before, then we record
+			// _SIG_IGN here. When we turn off profiling (below) we'll start
+			// ignoring SIGPROF signals. We do this, rather than change
+			// to SIG_DFL, because there may be a pending SIGPROF
+			// signal that has not yet been delivered to some other thread.
+			// If we change to SIG_DFL when turning off profiling, the
+			// program will crash when that SIGPROF is delivered. We assume
+			// that programs that use profiling don't want to crash on a
+			// stray SIGPROF. See issue 19320.
+			// We do the change here instead of when turning off profiling,
+			// because there we may race with a signal handler running
+			// concurrently, in particular, sigfwdgo may observe _SIG_DFL and
+			// die. See issue 43828.
+			if h == _SIG_DFL {
+				h = _SIG_IGN
+			}
+			atomic.Storeuintptr(&fwdSig[_SIGPROF], h)
 			setsig(_SIGPROF, getSigtramp())
 		}
 
@@ -296,31 +314,19 @@ func setProcessCPUProfiler(hz int32) {
 		// when we enabled profiling. We don't try to handle the case
 		// of a program that changes the SIGPROF handler while Go
 		// profiling is enabled.
-		//
-		// If no signal handler was installed before, then start
-		// ignoring SIGPROF signals. We do this, rather than change
-		// to SIG_DFL, because there may be a pending SIGPROF
-		// signal that has not yet been delivered to some other thread.
-		// If we change to SIG_DFL here, the program will crash
-		// when that SIGPROF is delivered. We assume that programs
-		// that use profiling don't want to crash on a stray SIGPROF.
-		// See issue 19320.
 		if !sigInstallGoHandler(_SIGPROF) {
 			if atomic.Cas(&handlingSig[_SIGPROF], 1, 0) {
 				h := atomic.Loaduintptr(&fwdSig[_SIGPROF])
-				if h == _SIG_DFL {
-					h = _SIG_IGN
-				}
 				setsig(_SIGPROF, h)
 			}
 		}
 	}
 }
 
-// setThreadCPUProfiler makes any thread-specific changes required to
+// setThreadCPUProfilerHz makes any thread-specific changes required to
 // implement profiling at a rate of hz.
-// No changes required on Unix systems.
-func setThreadCPUProfiler(hz int32) {
+// No changes required on Unix systems when using setitimer.
+func setThreadCPUProfilerHz(hz int32) {
 	getg().m.profilehz = hz
 }
 
@@ -338,7 +344,7 @@ func doSigPreempt(gp *g, ctxt *sigctxt, sigpc uintptr) {
 	if wantAsyncPreempt(gp) {
 		if ok, newpc := isAsyncSafePoint(gp, sigpc); ok {
 			// Adjust the PC and inject a call to asyncPreempt.
-			// ctxt.pushCall(funcPC(asyncPreempt), newpc)
+			// ctxt.pushCall(abi.FuncPCABI0(asyncPreempt), newpc)
 			throw("pushCall not implemented")
 			_ = newpc
 		}
@@ -407,8 +413,12 @@ func sigtrampgo(sig uint32, info *_siginfo_t, ctx unsafe.Pointer) {
 	if g == nil {
 		c := sigctxt{info, ctx}
 		if sig == _SIGPROF {
-			_, pc := getSiginfo(info, ctx)
-			sigprofNonGo(pc)
+			// Some platforms (Linux) have per-thread timers, which we use in
+			// combination with the process-wide timer. Avoid double-counting.
+			if validSIGPROF(nil, &c) {
+				_, pc := getSiginfo(info, ctx)
+				sigprofNonGoPC(pc)
+			}
 			return
 		}
 		if sig == sigPreempt && preemptMSupported && debug.asyncpreemptoff == 0 {
@@ -433,6 +443,21 @@ func sigtrampgo(sig uint32, info *_siginfo_t, ctx unsafe.Pointer) {
 	setg(g)
 }
 
+// sigprofNonGoPC is called when a profiling signal arrived on a
+// non-Go thread and we have a single PC value, not a stack trace.
+// g is nil, and what we can do is very limited.
+//go:nosplit
+//go:nowritebarrierrec
+func sigprofNonGoPC(pc uintptr) {
+	if prof.hz != 0 {
+		stk := []uintptr{
+			pc,
+			abi.FuncPCABIInternal(_ExternalCode) + sys.PCQuantum,
+		}
+		cpuprof.addNonGo(stk)
+	}
+}
+
 // crashing is the number of m's we have waited for when implementing
 // GOTRACEBACK=crash when a signal is received.
 var crashing int32
@@ -482,7 +507,11 @@ func sighandler(sig uint32, info *_siginfo_t, ctxt unsafe.Pointer, gp *g) {
 	}
 
 	if sig == _SIGPROF {
-		sigprof(sigpc, gp, _g_.m)
+		// Some platforms (Linux) have per-thread timers, which we use in
+		// combination with the process-wide timer. Avoid double-counting.
+		if validSIGPROF(_g_.m, c) {
+			sigprof(sigpc, gp, _g_.m)
+		}
 		return
 	}
 
@@ -572,9 +601,11 @@ func sighandler(sig uint32, info *_siginfo_t, ctxt unsafe.Pointer, gp *g) {
 	}
 
 	print("PC=", hex(sigpc), " m=", _g_.m.id, " sigcode=", c.sigcode(), "\n")
-	if _g_.m.lockedg != 0 && _g_.m.ncgo > 0 && gp == _g_.m.g0 {
+	if _g_.m.incgo && gp == _g_.m.g0 && _g_.m.curg != nil {
 		print("signal arrived during cgo execution\n")
-		gp = _g_.m.lockedg.ptr()
+		// Switch to curg so that we get a traceback of the Go code
+		// leading up to the cgocall, which switched from curg to g0.
+		gp = _g_.m.curg
 	}
 	if sig == _SIGILL || sig == _SIGFPE {
 		// It would be nice to know how long the instruction is.
diff --git a/libgo/go/runtime/signal_windows_test.go b/libgo/go/runtime/signal_windows_test.go
index 1b7cb9d..7c88ab5 100644
--- a/libgo/go/runtime/signal_windows_test.go
+++ b/libgo/go/runtime/signal_windows_test.go
@@ -1,5 +1,4 @@
 //go:build windows
-// +build windows
 
 package runtime_test
 
diff --git a/libgo/go/runtime/sigqueue.go b/libgo/go/runtime/sigqueue.go
index d5539f1..da2491d 100644
--- a/libgo/go/runtime/sigqueue.go
+++ b/libgo/go/runtime/sigqueue.go
@@ -29,7 +29,6 @@
 // nor deadlocks.
 
 //go:build !plan9
-// +build !plan9
 
 package runtime
 
diff --git a/libgo/go/runtime/sigqueue_note.go b/libgo/go/runtime/sigqueue_note.go
index e23446b..fb1a517 100644
--- a/libgo/go/runtime/sigqueue_note.go
+++ b/libgo/go/runtime/sigqueue_note.go
@@ -8,7 +8,6 @@
 // those functions. These functions will never be called.
 
 //go:build !darwin && !plan9
-// +build !darwin,!plan9
 
 package runtime
 
diff --git a/libgo/go/runtime/sizeof_test.go b/libgo/go/runtime/sizeof_test.go
index f510eeb..df798a5 100644
--- a/libgo/go/runtime/sizeof_test.go
+++ b/libgo/go/runtime/sizeof_test.go
@@ -21,9 +21,9 @@ func TestSizeof(t *testing.T) {
 	const _64bit = unsafe.Sizeof(uintptr(0)) == 8
 
 	var tests = []struct {
-		val    interface{} // type as a value
-		_32bit uintptr     // size on 32bit platforms
-		_64bit uintptr     // size on 64bit platforms
+		val    any     // type as a value
+		_32bit uintptr // size on 32bit platforms
+		_64bit uintptr // size on 64bit platforms
 	}{
 		{runtime.G{}, 236, 392},   // g, but exported for testing
 		{runtime.Sudog{}, 56, 88}, // sudog, but exported for testing
diff --git a/libgo/go/runtime/slice.go b/libgo/go/runtime/slice.go
index 8654aba..7c8b458 100644
--- a/libgo/go/runtime/slice.go
+++ b/libgo/go/runtime/slice.go
@@ -5,6 +5,8 @@
 package runtime
 
 import (
+	"internal/abi"
+	"internal/goarch"
 	"runtime/internal/math"
 	"runtime/internal/sys"
 	"unsafe"
@@ -79,12 +81,15 @@ func makeslicecopy(et *_type, tolen int, fromlen int, from unsafe.Pointer) unsaf
 
 	if raceenabled {
 		callerpc := getcallerpc()
-		pc := funcPC(makeslicecopy)
+		pc := abi.FuncPCABIInternal(makeslicecopy)
 		racereadrangepc(from, copymem, callerpc, pc)
 	}
 	if msanenabled {
 		msanread(from, copymem)
 	}
+	if asanenabled {
+		asanread(from, copymem)
+	}
 
 	memmove(to, from, copymem)
 
@@ -130,16 +135,15 @@ func makeslice64(et *_type, len64, cap64 int64) unsafe.Pointer {
 }
 
 func unsafeslice(et *_type, ptr unsafe.Pointer, len int) {
-	if len == 0 {
-		return
-	}
-
-	if ptr == nil {
-		panic(errorString("unsafe.Slice: ptr is nil and len is not zero"))
+	if len < 0 {
+		panicunsafeslicelen()
 	}
 
 	mem, overflow := math.MulUintptr(et.size, uintptr(len))
-	if overflow || mem > maxAlloc || len < 0 {
+	if overflow || mem > -uintptr(ptr) {
+		if ptr == nil {
+			panic(errorString("unsafe.Slice: ptr is nil and len is not zero"))
+		}
 		panicunsafeslicelen()
 	}
 }
@@ -176,11 +180,14 @@ func panicunsafeslicelen() {
 func growslice(et *_type, oldarray unsafe.Pointer, oldlen, oldcap, cap int) slice {
 	if raceenabled {
 		callerpc := getcallerpc()
-		racereadrangepc(oldarray, uintptr(oldlen*int(et.size)), callerpc, funcPC(growslice))
+		racereadrangepc(oldarray, uintptr(oldlen*int(et.size)), callerpc, abi.FuncPCABIInternal(growslice))
 	}
 	if msanenabled {
 		msanread(oldarray, uintptr(oldlen*int(et.size)))
 	}
+	if asanenabled {
+		asanread(oldarray, uintptr(oldlen*int(et.size)))
+	}
 
 	if cap < oldcap {
 		panic(errorString("growslice: cap out of range"))
@@ -197,13 +204,17 @@ func growslice(et *_type, oldarray unsafe.Pointer, oldlen, oldcap, cap int) slic
 	if cap > doublecap {
 		newcap = cap
 	} else {
-		if oldcap < 1024 {
+		const threshold = 256
+		if oldcap < threshold {
 			newcap = doublecap
 		} else {
 			// Check 0 < newcap to detect overflow
 			// and prevent an infinite loop.
 			for 0 < newcap && newcap < cap {
-				newcap += newcap / 4
+				// Transition from growing 2x for small slices
+				// to growing 1.25x for large slices. This formula
+				// gives a smooth-ish transition between the two.
+				newcap += (newcap + 3*threshold) / 4
 			}
 			// Set newcap to the requested cap when
 			// the newcap calculation overflowed.
@@ -226,15 +237,15 @@ func growslice(et *_type, oldarray unsafe.Pointer, oldlen, oldcap, cap int) slic
 		capmem = roundupsize(uintptr(newcap))
 		overflow = uintptr(newcap) > maxAlloc
 		newcap = int(capmem)
-	case et.size == sys.PtrSize:
-		lenmem = uintptr(oldlen) * sys.PtrSize
-		newlenmem = uintptr(cap) * sys.PtrSize
-		capmem = roundupsize(uintptr(newcap) * sys.PtrSize)
-		overflow = uintptr(newcap) > maxAlloc/sys.PtrSize
-		newcap = int(capmem / sys.PtrSize)
+	case et.size == goarch.PtrSize:
+		lenmem = uintptr(oldlen) * goarch.PtrSize
+		newlenmem = uintptr(cap) * goarch.PtrSize
+		capmem = roundupsize(uintptr(newcap) * goarch.PtrSize)
+		overflow = uintptr(newcap) > maxAlloc/goarch.PtrSize
+		newcap = int(capmem / goarch.PtrSize)
 	case isPowerOfTwo(et.size):
 		var shift uintptr
-		if sys.PtrSize == 8 {
+		if goarch.PtrSize == 8 {
 			// Mask shift for better code generation.
 			shift = uintptr(sys.Ctz64(uint64(et.size))) & 63
 		} else {
@@ -312,7 +323,7 @@ func slicecopy(toPtr unsafe.Pointer, toLen int, fromPtr unsafe.Pointer, fromLen
 	size := uintptr(n) * width
 	if raceenabled {
 		callerpc := getcallerpc()
-		pc := funcPC(slicecopy)
+		pc := abi.FuncPCABIInternal(slicecopy)
 		racereadrangepc(fromPtr, size, callerpc, pc)
 		racewriterangepc(toPtr, size, callerpc, pc)
 	}
@@ -320,6 +331,10 @@ func slicecopy(toPtr unsafe.Pointer, toLen int, fromPtr unsafe.Pointer, fromLen
 		msanread(fromPtr, size)
 		msanwrite(toPtr, size)
 	}
+	if asanenabled {
+		asanread(fromPtr, size)
+		asanwrite(toPtr, size)
+	}
 
 	if size == 1 { // common case worth about 2x to do here
 		// TODO: is this still worth it with new memmove impl?
diff --git a/libgo/go/runtime/stack_test.go b/libgo/go/runtime/stack_test.go
index d4ee52c..03d17a5 100644
--- a/libgo/go/runtime/stack_test.go
+++ b/libgo/go/runtime/stack_test.go
@@ -7,11 +7,9 @@ package runtime_test
 import (
 	"bytes"
 	"fmt"
-	"os"
 	"reflect"
 	"regexp"
 	. "runtime"
-	"strconv"
 	"strings"
 	"sync"
 	"sync/atomic"
@@ -83,12 +81,7 @@ func TestStackGrowth(t *testing.T) {
 		t.Skip("-quick")
 	}
 
-	if GOARCH == "wasm" {
-		t.Skip("fails on wasm (too slow?)")
-	}
-
-	// Don't make this test parallel as this makes the 20 second
-	// timeout unreliable on slow builders. (See issue #19381.)
+	t.Parallel()
 
 	var wg sync.WaitGroup
 
@@ -102,6 +95,7 @@ func TestStackGrowth(t *testing.T) {
 		growDuration = time.Since(start)
 	}()
 	wg.Wait()
+	t.Log("first growStack took", growDuration)
 
 	// in locked goroutine
 	wg.Add(1)
@@ -114,55 +108,43 @@ func TestStackGrowth(t *testing.T) {
 	wg.Wait()
 
 	// in finalizer
+	if Compiler == "gccgo" && !*Pusestackmaps {
+		// This test is flaky for gccgo's
+		// conservative stack scanning.
+		return
+	}
+	var finalizerStart time.Time
+	var started, progress uint32
 	wg.Add(1)
-	go func() {
+	s := new(string) // Must be of a type that avoids the tiny allocator, or else the finalizer might not run.
+	SetFinalizer(s, func(ss *string) {
 		defer wg.Done()
-
-		if Compiler == "gccgo" && !*Pusestackmaps {
-			// This test is flaky for gccgo's
-			// conservative stack scanning.
-			return
-		}
-
-		done := make(chan bool)
-		var startTime time.Time
-		var started, progress uint32
-		go func() {
-			s := new(string)
-			SetFinalizer(s, func(ss *string) {
-				startTime = time.Now()
-				atomic.StoreUint32(&started, 1)
-				growStack(&progress)
-				done <- true
-			})
-			s = nil
-			done <- true
-		}()
-		<-done
-		GC()
-
-		timeout := 20 * time.Second
-		if s := os.Getenv("GO_TEST_TIMEOUT_SCALE"); s != "" {
-			scale, err := strconv.Atoi(s)
-			if err == nil {
-				timeout *= time.Duration(scale)
-			}
-		}
-
-		select {
-		case <-done:
-		case <-time.After(timeout):
+		finalizerStart = time.Now()
+		atomic.StoreUint32(&started, 1)
+		growStack(&progress)
+	})
+	setFinalizerTime := time.Now()
+	s = nil
+
+	if d, ok := t.Deadline(); ok {
+		// Pad the timeout by an arbitrary 5% to give the AfterFunc time to run.
+		timeout := time.Until(d) * 19 / 20
+		timer := time.AfterFunc(timeout, func() {
+			// Panic — instead of calling t.Error and returning from the test — so
+			// that we get a useful goroutine dump if the test times out, especially
+			// if GOTRACEBACK=system or GOTRACEBACK=crash is set.
 			if atomic.LoadUint32(&started) == 0 {
-				t.Log("finalizer did not start")
+				panic("finalizer did not start")
 			} else {
-				t.Logf("finalizer started %s ago and finished %d iterations", time.Since(startTime), atomic.LoadUint32(&progress))
+				panic(fmt.Sprintf("finalizer started %s ago (%s after registration) and ran %d iterations, but did not return", time.Since(finalizerStart), finalizerStart.Sub(setFinalizerTime), atomic.LoadUint32(&progress)))
 			}
-			t.Log("first growStack took", growDuration)
-			t.Error("finalizer did not run")
-			return
-		}
-	}()
+		})
+		defer timer.Stop()
+	}
+
+	GC()
 	wg.Wait()
+	t.Logf("finalizer started after %s and ran %d iterations in %v", finalizerStart.Sub(setFinalizerTime), atomic.LoadUint32(&progress), time.Since(finalizerStart))
 }
 
 // ... and in init
@@ -592,6 +574,34 @@ func count21(n int) int { return 1 + count22(n-1) }
 func count22(n int) int { return 1 + count23(n-1) }
 func count23(n int) int { return 1 + count1(n-1) }
 
+type stkobjT struct {
+	p *stkobjT
+	x int64
+	y [20]int // consume some stack
+}
+
+// Sum creates a linked list of stkobjTs.
+func Sum(n int64, p *stkobjT) {
+	if n == 0 {
+		return
+	}
+	s := stkobjT{p: p, x: n}
+	Sum(n-1, &s)
+	p.x += s.x
+}
+
+func BenchmarkStackCopyWithStkobj(b *testing.B) {
+	c := make(chan bool)
+	for i := 0; i < b.N; i++ {
+		go func() {
+			var s stkobjT
+			Sum(100000, &s)
+			c <- true
+		}()
+		<-c
+	}
+}
+
 type structWithMethod struct{}
 
 func (s structWithMethod) caller() string {
@@ -871,7 +881,7 @@ func deferHeapAndStack(n int) (r int) {
 }
 
 // Pass a value to escapeMe to force it to escape.
-var escapeMe = func(x interface{}) {}
+var escapeMe = func(x any) {}
 
 // Test that when F -> G is inlined and F is excluded from stack
 // traces, G still appears.
diff --git a/libgo/go/runtime/string.go b/libgo/go/runtime/string.go
index 44b9314..a5f50ba 100644
--- a/libgo/go/runtime/string.go
+++ b/libgo/go/runtime/string.go
@@ -5,8 +5,9 @@
 package runtime
 
 import (
+	"internal/abi"
 	"internal/bytealg"
-	"runtime/internal/sys"
+	"internal/goarch"
 	"unsafe"
 )
 
@@ -88,14 +89,17 @@ func slicebytetostring(buf *tmpBuf, ptr *byte, n int) (str string) {
 		racereadrangepc(unsafe.Pointer(ptr),
 			uintptr(n),
 			getcallerpc(),
-			funcPC(slicebytetostring))
+			abi.FuncPCABIInternal(slicebytetostring))
 	}
 	if msanenabled {
 		msanread(unsafe.Pointer(ptr), uintptr(n))
 	}
+	if asanenabled {
+		asanread(unsafe.Pointer(ptr), uintptr(n))
+	}
 	if n == 1 {
 		p := unsafe.Pointer(&staticuint64s[*ptr])
-		if sys.BigEndian {
+		if goarch.BigEndian {
 			p = add(p, 7)
 		}
 		stringStructOf(&str).str = p
@@ -144,11 +148,14 @@ func slicebytetostringtmp(ptr *byte, n int) (str string) {
 		racereadrangepc(unsafe.Pointer(ptr),
 			uintptr(n),
 			getcallerpc(),
-			funcPC(slicebytetostringtmp))
+			abi.FuncPCABIInternal(slicebytetostringtmp))
 	}
 	if msanenabled && n > 0 {
 		msanread(unsafe.Pointer(ptr), uintptr(n))
 	}
+	if asanenabled && n > 0 {
+		asanread(unsafe.Pointer(ptr), uintptr(n))
+	}
 	stringStructOf(&str).str = unsafe.Pointer(ptr)
 	stringStructOf(&str).len = n
 	return
@@ -195,11 +202,14 @@ func slicerunetostring(buf *tmpBuf, a []rune) string {
 		racereadrangepc(unsafe.Pointer(&a[0]),
 			uintptr(len(a))*unsafe.Sizeof(a[0]),
 			getcallerpc(),
-			funcPC(slicerunetostring))
+			abi.FuncPCABIInternal(slicerunetostring))
 	}
 	if msanenabled && len(a) > 0 {
 		msanread(unsafe.Pointer(&a[0]), uintptr(len(a))*unsafe.Sizeof(a[0]))
 	}
+	if asanenabled && len(a) > 0 {
+		asanread(unsafe.Pointer(&a[0]), uintptr(len(a))*unsafe.Sizeof(a[0]))
+	}
 	var dum [4]byte
 	size1 := 0
 	for _, r := range a {
diff --git a/libgo/go/runtime/stubs.go b/libgo/go/runtime/stubs.go
index e24313c..268ef5e 100644
--- a/libgo/go/runtime/stubs.go
+++ b/libgo/go/runtime/stubs.go
@@ -5,7 +5,8 @@
 package runtime
 
 import (
-	"runtime/internal/sys"
+	"internal/goarch"
+	"runtime/internal/math"
 	"unsafe"
 )
 
@@ -119,20 +120,32 @@ func reflect_memmove(to, from unsafe.Pointer, n uintptr) {
 func memcmp(a, b unsafe.Pointer, size uintptr) int32
 
 // exported value for testing
-var hashLoad = float32(loadFactorNum) / float32(loadFactorDen)
+const hashLoad = float32(loadFactorNum) / float32(loadFactorDen)
 
 //go:nosplit
 func fastrand() uint32 {
 	mp := getg().m
+	// Implement wyrand: https://github.com/wangyi-fudan/wyhash
+	// Only the platform that math.Mul64 can be lowered
+	// by the compiler should be in this list.
+	if goarch.IsAmd64|goarch.IsArm64|goarch.IsPpc64|
+		goarch.IsPpc64le|goarch.IsMips64|goarch.IsMips64le|
+		goarch.IsS390x|goarch.IsRiscv64 == 1 {
+		mp.fastrand += 0xa0761d6478bd642f
+		hi, lo := math.Mul64(mp.fastrand, mp.fastrand^0xe7037ed1a0b428db)
+		return uint32(hi ^ lo)
+	}
+
 	// Implement xorshift64+: 2 32-bit xorshift sequences added together.
 	// Shift triplet [17,7,16] was calculated as indicated in Marsaglia's
 	// Xorshift paper: https://www.jstatsoft.org/article/view/v008i14/xorshift.pdf
 	// This generator passes the SmallCrush suite, part of TestU01 framework:
 	// http://simul.iro.umontreal.ca/testu01/tu01.html
-	s1, s0 := mp.fastrand[0], mp.fastrand[1]
+	t := (*[2]uint32)(unsafe.Pointer(&mp.fastrand))
+	s1, s0 := t[0], t[1]
 	s1 ^= s1 << 17
 	s1 = s1 ^ s0 ^ s1>>7 ^ s0>>16
-	mp.fastrand[0], mp.fastrand[1] = s0, s1
+	t[0], t[1] = s0, s1
 	return s0 + s1
 }
 
@@ -143,8 +156,8 @@ func fastrandn(n uint32) uint32 {
 	return uint32(uint64(fastrand()) * uint64(n) >> 32)
 }
 
-//go:linkname sync_fastrand sync.fastrand
-func sync_fastrand() uint32 { return fastrand() }
+//go:linkname sync_fastrandn sync.fastrandn
+func sync_fastrandn(n uint32) uint32 { return fastrandn(n) }
 
 //go:linkname net_fastrand net.fastrand
 func net_fastrand() uint32 { return fastrand() }
@@ -335,7 +348,7 @@ func rethrowException()
 // used by the stack unwinder.
 func unwindExceptionSize() uintptr
 
-const uintptrMask = 1<<(8*sys.PtrSize) - 1
+const uintptrMask = 1<<(8*goarch.PtrSize) - 1
 
 type bitvector struct {
 	n        int32 // # of bits
diff --git a/libgo/go/runtime/stubs2.go b/libgo/go/runtime/stubs2.go
index c9e5f3d..0b9e605 100644
--- a/libgo/go/runtime/stubs2.go
+++ b/libgo/go/runtime/stubs2.go
@@ -2,8 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-//go:build !js && !plan9 && !windows
-// +build !js,!plan9,!windows
+//go:build !aix && !darwin && !js && !openbsd && !plan9 && !solaris && !windows
 
 package runtime
 
diff --git a/libgo/go/runtime/stubs3.go b/libgo/go/runtime/stubs3.go
index cb900b2..3d7ed0c 100644
--- a/libgo/go/runtime/stubs3.go
+++ b/libgo/go/runtime/stubs3.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //-go:build !aix && !darwin && !freebsd && !openbsd && !plan9 && !solaris
-// -build !aix,!darwin,!freebsd,!openbsd,!plan9,!solaris
 
 package runtime
 
diff --git a/libgo/go/runtime/stubs_linux.go b/libgo/go/runtime/stubs_linux.go
index b8c9579..3e50f3b 100644
--- a/libgo/go/runtime/stubs_linux.go
+++ b/libgo/go/runtime/stubs_linux.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build linux
-// +build linux
 
 package runtime
 
diff --git a/libgo/go/runtime/stubs_nonlinux.go b/libgo/go/runtime/stubs_nonlinux.go
index 4f081d5..a283958 100644
--- a/libgo/go/runtime/stubs_nonlinux.go
+++ b/libgo/go/runtime/stubs_nonlinux.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build !linux
-// +build !linux
 
 package runtime
 
diff --git a/libgo/go/runtime/symtab_test.go b/libgo/go/runtime/symtab_test.go
index ddf64f6..920db09 100644
--- a/libgo/go/runtime/symtab_test.go
+++ b/libgo/go/runtime/symtab_test.go
@@ -256,3 +256,35 @@ func TestFunctionAlignmentTraceback(t *testing.T) {
 		t.Errorf("frames.Next() got %+v want %+v", frame.Func, f)
 	}
 }
+
+func BenchmarkFunc(b *testing.B) {
+	pc, _, _, ok := runtime.Caller(0)
+	if !ok {
+		b.Fatal("failed to look up PC")
+	}
+	f := runtime.FuncForPC(pc)
+	b.Run("Name", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			name := f.Name()
+			if name != "runtime_test.BenchmarkFunc" {
+				b.Fatalf("unexpected name %q", name)
+			}
+		}
+	})
+	b.Run("Entry", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			pc := f.Entry()
+			if pc == 0 {
+				b.Fatal("zero PC")
+			}
+		}
+	})
+	b.Run("FileLine", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			file, line := f.FileLine(pc)
+			if !strings.HasSuffix(file, "symtab_test.go") || line == 0 {
+				b.Fatalf("unexpected file/line %q:%d", file, line)
+			}
+		}
+	})
+}
diff --git a/libgo/go/runtime/sys_wasm.go b/libgo/go/runtime/sys_wasm.go
index 057ed4c..e6e7f47 100644
--- a/libgo/go/runtime/sys_wasm.go
+++ b/libgo/go/runtime/sys_wasm.go
@@ -5,6 +5,7 @@
 package runtime
 
 import (
+	"internal/goarch"
 	"runtime/internal/sys"
 	"unsafe"
 )
@@ -30,7 +31,7 @@ func wasmExit(code int32)
 // and then stopped before the first instruction in fn.
 func gostartcall(buf *gobuf, fn, ctxt unsafe.Pointer) {
 	sp := buf.sp
-	sp -= sys.PtrSize
+	sp -= goarch.PtrSize
 	*(*uintptr)(unsafe.Pointer(sp)) = buf.pc
 	buf.sp = sp
 	buf.pc = uintptr(fn)
diff --git a/libgo/go/runtime/testdata/testprog/badtraceback.go b/libgo/go/runtime/testdata/testprog/badtraceback.go
index d558adc..09aa2b8 100644
--- a/libgo/go/runtime/testdata/testprog/badtraceback.go
+++ b/libgo/go/runtime/testdata/testprog/badtraceback.go
@@ -17,6 +17,9 @@ func init() {
 func BadTraceback() {
 	// Disable GC to prevent traceback at unexpected time.
 	debug.SetGCPercent(-1)
+	// Out of an abundance of caution, also make sure that there are
+	// no GCs actively in progress.
+	runtime.GC()
 
 	// Run badLR1 on its own stack to minimize the stack size and
 	// exercise the stack bounds logic in the hex dump.
diff --git a/libgo/go/runtime/testdata/testprog/checkptr.go b/libgo/go/runtime/testdata/testprog/checkptr.go
index 9c55613..b27e5f7 100644
--- a/libgo/go/runtime/testdata/testprog/checkptr.go
+++ b/libgo/go/runtime/testdata/testprog/checkptr.go
@@ -20,6 +20,7 @@ func init() {
 	register("CheckPtrSmall", CheckPtrSmall)
 	register("CheckPtrSliceOK", CheckPtrSliceOK)
 	register("CheckPtrSliceFail", CheckPtrSliceFail)
+	register("CheckPtrAlignmentNested", CheckPtrAlignmentNested)
 }
 
 func CheckPtrAlignmentNoPtr() {
@@ -96,3 +97,10 @@ func CheckPtrSliceFail() {
 	sink2 = p
 	sink2 = unsafe.Slice(p, 100)
 }
+
+func CheckPtrAlignmentNested() {
+	s := make([]int8, 100)
+	p := unsafe.Pointer(&s[0])
+	n := 9
+	_ = ((*[10]int8)(unsafe.Pointer((*[10]int64)(unsafe.Pointer(&p)))))[:n:n]
+}
diff --git a/libgo/go/runtime/testdata/testprog/gc.go b/libgo/go/runtime/testdata/testprog/gc.go
index 74732cd..215228e 100644
--- a/libgo/go/runtime/testdata/testprog/gc.go
+++ b/libgo/go/runtime/testdata/testprog/gc.go
@@ -90,7 +90,7 @@ func GCFairness2() {
 	runtime.GOMAXPROCS(1)
 	debug.SetGCPercent(1)
 	var count [3]int64
-	var sink [3]interface{}
+	var sink [3]any
 	for i := range count {
 		go func(i int) {
 			for {
@@ -132,81 +132,88 @@ func GCFairness2() {
 func GCPhys() {
 	// This test ensures that heap-growth scavenging is working as intended.
 	//
-	// It sets up a specific scenario: it allocates two pairs of objects whose
-	// sizes sum to size. One object in each pair is "small" (though must be
-	// large enough to be considered a large object by the runtime) and one is
-	// large. The small objects are kept while the large objects are freed,
-	// creating two large unscavenged holes in the heap. The heap goal should
-	// also be small as a result (so size must be at least as large as the
-	// minimum heap size). We then allocate one large object, bigger than both
-	// pairs of objects combined. This allocation, because it will tip
-	// HeapSys-HeapReleased well above the heap goal, should trigger heap-growth
-	// scavenging and scavenge most, if not all, of the large holes we created
-	// earlier.
+	// It attempts to construct a sizeable "swiss cheese" heap, with many
+	// allocChunk-sized holes. Then, it triggers a heap growth by trying to
+	// allocate as much memory as would fit in those holes.
+	//
+	// The heap growth should cause a large number of those holes to be
+	// returned to the OS.
+
 	const (
-		// Size must be also large enough to be considered a large
-		// object (not in any size-segregated span).
-		size    = 4 << 20
-		split   = 64 << 10
-		objects = 2
+		// The total amount of memory we're willing to allocate.
+		allocTotal = 32 << 20
 
 		// The page cache could hide 64 8-KiB pages from the scavenger today.
 		maxPageCache = (8 << 10) * 64
-
-		// Reduce GOMAXPROCS down to 4 if it's greater. We need to bound the amount
-		// of memory held in the page cache because the scavenger can't reach it.
-		// The page cache will hold at most maxPageCache of memory per-P, so this
-		// bounds the amount of memory hidden from the scavenger to 4*maxPageCache
-		// at most.
-		maxProcs = 4
 	)
-	// Set GOGC so that this test operates under consistent assumptions.
-	debug.SetGCPercent(100)
-	procs := runtime.GOMAXPROCS(-1)
-	if procs > maxProcs {
-		defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(maxProcs))
-		procs = runtime.GOMAXPROCS(-1)
+
+	// How big the allocations are needs to depend on the page size.
+	// If the page size is too big and the allocations are too small,
+	// they might not be aligned to the physical page size, so the scavenger
+	// will gloss over them.
+	pageSize := os.Getpagesize()
+	var allocChunk int
+	if pageSize <= 8<<10 {
+		allocChunk = 64 << 10
+	} else {
+		allocChunk = 512 << 10
 	}
-	// Save objects which we want to survive, and condemn objects which we don't.
-	// Note that we condemn objects in this way and release them all at once in
-	// order to avoid having the GC start freeing up these objects while the loop
-	// is still running and filling in the holes we intend to make.
-	saved := make([][]byte, 0, objects+1)
-	condemned := make([][]byte, 0, objects)
-	for i := 0; i < 2*objects; i++ {
+	allocs := allocTotal / allocChunk
+
+	// Set GC percent just so this test is a little more consistent in the
+	// face of varying environments.
+	debug.SetGCPercent(100)
+
+	// Set GOMAXPROCS to 1 to minimize the amount of memory held in the page cache,
+	// and to reduce the chance that the background scavenger gets scheduled.
+	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1))
+
+	// Allocate allocTotal bytes of memory in allocChunk byte chunks.
+	// Alternate between whether the chunk will be held live or will be
+	// condemned to GC to create holes in the heap.
+	saved := make([][]byte, allocs/2+1)
+	condemned := make([][]byte, allocs/2)
+	for i := 0; i < allocs; i++ {
+		b := make([]byte, allocChunk)
 		if i%2 == 0 {
-			saved = append(saved, make([]byte, split))
+			saved = append(saved, b)
 		} else {
-			condemned = append(condemned, make([]byte, size-split))
+			condemned = append(condemned, b)
 		}
 	}
-	condemned = nil
-	// Clean up the heap. This will free up every other object created above
-	// (i.e. everything in condemned) creating holes in the heap.
-	// Also, if the condemned objects are still being swept, its possible that
-	// the scavenging that happens as a result of the next allocation won't see
-	// the holes at all. We call runtime.GC() twice here so that when we allocate
-	// our large object there's no race with sweeping.
-	runtime.GC()
+
+	// Run a GC cycle just so we're at a consistent state.
 	runtime.GC()
-	// Perform one big allocation which should also scavenge any holes.
-	//
-	// The heap goal will rise after this object is allocated, so it's very
-	// important that we try to do all the scavenging in a single allocation
-	// that exceeds the heap goal. Otherwise the rising heap goal could foil our
-	// test.
-	saved = append(saved, make([]byte, objects*size))
-	// Clean up the heap again just to put it in a known state.
+
+	// Drop the only reference to all the condemned memory.
+	condemned = nil
+
+	// Clear the condemned memory.
 	runtime.GC()
+
+	// At this point, the background scavenger is likely running
+	// and could pick up the work, so the next line of code doesn't
+	// end up doing anything. That's fine. What's important is that
+	// this test fails somewhat regularly if the runtime doesn't
+	// scavenge on heap growth, and doesn't fail at all otherwise.
+
+	// Make a large allocation that in theory could fit, but won't
+	// because we turned the heap into swiss cheese.
+	saved = append(saved, make([]byte, allocTotal/2))
+
 	// heapBacked is an estimate of the amount of physical memory used by
 	// this test. HeapSys is an estimate of the size of the mapped virtual
 	// address space (which may or may not be backed by physical pages)
 	// whereas HeapReleased is an estimate of the amount of bytes returned
 	// to the OS. Their difference then roughly corresponds to the amount
 	// of virtual address space that is backed by physical pages.
+	//
+	// heapBacked also subtracts out maxPageCache bytes of memory because
+	// this is memory that may be hidden from the scavenger per-P. Since
+	// GOMAXPROCS=1 here, subtracting it out once is fine.
 	var stats runtime.MemStats
 	runtime.ReadMemStats(&stats)
-	heapBacked := stats.HeapSys - stats.HeapReleased
+	heapBacked := stats.HeapSys - stats.HeapReleased - maxPageCache
 	// If heapBacked does not exceed the heap goal by more than retainExtraPercent
 	// then the scavenger is working as expected; the newly-created holes have been
 	// scavenged immediately as part of the allocations which cannot fit in the holes.
@@ -216,19 +223,14 @@ func GCPhys() {
 	// to other allocations that happen during this test we may still see some physical
 	// memory over-use.
 	overuse := (float64(heapBacked) - float64(stats.HeapAlloc)) / float64(stats.HeapAlloc)
-	// Compute the threshold.
+	// Check against our overuse threshold, which is what the scavenger always reserves
+	// to encourage allocation of memory that doesn't need to be faulted in.
 	//
-	// In theory, this threshold should just be zero, but that's not possible in practice.
-	// Firstly, the runtime's page cache can hide up to maxPageCache of free memory from the
-	// scavenger per P. To account for this, we increase the threshold by the ratio between the
-	// total amount the runtime could hide from the scavenger to the amount of memory we expect
-	// to be able to scavenge here, which is (size-split)*objects. This computation is the crux
-	// GOMAXPROCS above; if GOMAXPROCS is too high the threshold just becomes 100%+ since the
-	// amount of memory being allocated is fixed. Then we add 5% to account for noise, such as
-	// other allocations this test may have performed that we don't explicitly account for The
-	// baseline threshold here is around 11% for GOMAXPROCS=1, capping out at around 30% for
-	// GOMAXPROCS=4.
-	threshold := 0.05 + float64(procs)*maxPageCache/float64((size-split)*objects)
+	// Add additional slack in case the page size is large and the scavenger
+	// can't reach that memory because it doesn't constitute a complete aligned
+	// physical page. Assume the worst case: a full physical page out of each
+	// allocation.
+	threshold := 0.1 + float64(pageSize)/float64(allocChunk)
 	if overuse <= threshold {
 		fmt.Println("OK")
 		return
@@ -243,6 +245,7 @@ func GCPhys() {
 		"(alloc: %d, goal: %d, sys: %d, rel: %d, objs: %d)\n", threshold*100, overuse*100,
 		stats.HeapAlloc, stats.NextGC, stats.HeapSys, stats.HeapReleased, len(saved))
 	runtime.KeepAlive(saved)
+	runtime.KeepAlive(condemned)
 }
 
 // Test that defer closure is correctly scanned when the stack is scanned.
@@ -263,9 +266,9 @@ func DeferLiveness() {
 }
 
 //go:noinline
-func escape(x interface{}) { sink2 = x; sink2 = nil }
+func escape(x any) { sink2 = x; sink2 = nil }
 
-var sink2 interface{}
+var sink2 any
 
 // Test zombie object detection and reporting.
 func GCZombie() {
diff --git a/libgo/go/runtime/testdata/testprog/numcpu_freebsd.go b/libgo/go/runtime/testdata/testprog/numcpu_freebsd.go
index aff36ec..7209f67 100644
--- a/libgo/go/runtime/testdata/testprog/numcpu_freebsd.go
+++ b/libgo/go/runtime/testdata/testprog/numcpu_freebsd.go
@@ -85,19 +85,18 @@ func getList() ([]string, error) {
 	if err != nil {
 		return nil, fmt.Errorf("fail to execute '%s': %s", cmdline, err)
 	}
-	pos := bytes.IndexRune(output, '\n')
-	if pos == -1 {
+	output, _, ok := bytes.Cut(output, []byte("\n"))
+	if !ok {
 		return nil, fmt.Errorf("invalid output from '%s', '\\n' not found: %s", cmdline, output)
 	}
-	output = output[0:pos]
 
-	pos = bytes.IndexRune(output, ':')
-	if pos == -1 {
+	_, cpus, ok := bytes.Cut(output, []byte(":"))
+	if !ok {
 		return nil, fmt.Errorf("invalid output from '%s', ':' not found: %s", cmdline, output)
 	}
 
 	var list []string
-	for _, val := range bytes.Split(output[pos+1:], []byte(",")) {
+	for _, val := range bytes.Split(cpus, []byte(",")) {
 		index := string(bytes.TrimSpace(val))
 		if len(index) == 0 {
 			continue
diff --git a/libgo/go/runtime/testdata/testprog/preempt.go b/libgo/go/runtime/testdata/testprog/preempt.go
index 1c74d0e..fb6755a 100644
--- a/libgo/go/runtime/testdata/testprog/preempt.go
+++ b/libgo/go/runtime/testdata/testprog/preempt.go
@@ -20,6 +20,10 @@ func AsyncPreempt() {
 	runtime.GOMAXPROCS(1)
 	// Disable GC so we have complete control of what we're testing.
 	debug.SetGCPercent(-1)
+	// Out of an abundance of caution, also make sure that there are
+	// no GCs actively in progress. The sweep phase of a GC cycle
+	// for instance tries to preempt Ps at the very beginning.
+	runtime.GC()
 
 	// Start a goroutine with no sync safe-points.
 	var ready, ready2 uint32
diff --git a/libgo/go/runtime/testdata/testprog/signal.go b/libgo/go/runtime/testdata/testprog/signal.go
index 417e105..cc5ac8a 100644
--- a/libgo/go/runtime/testdata/testprog/signal.go
+++ b/libgo/go/runtime/testdata/testprog/signal.go
@@ -2,6 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+//go:build !windows && !plan9
 // +build !windows,!plan9
 
 package main
diff --git a/libgo/go/runtime/testdata/testprog/sleep.go b/libgo/go/runtime/testdata/testprog/sleep.go
index 86e2f6c..b230e60 100644
--- a/libgo/go/runtime/testdata/testprog/sleep.go
+++ b/libgo/go/runtime/testdata/testprog/sleep.go
@@ -4,7 +4,10 @@
 
 package main
 
-import "time"
+import (
+	"os"
+	"time"
+)
 
 // for golang.org/issue/27250
 
@@ -13,5 +16,7 @@ func init() {
 }
 
 func After1() {
+	os.Stdout.WriteString("ready\n")
+	os.Stdout.Close()
 	<-time.After(1 * time.Second)
 }
diff --git a/libgo/go/runtime/testdata/testprog/syscalls_none.go b/libgo/go/runtime/testdata/testprog/syscalls_none.go
index 7f8ded3..068bb59 100644
--- a/libgo/go/runtime/testdata/testprog/syscalls_none.go
+++ b/libgo/go/runtime/testdata/testprog/syscalls_none.go
@@ -2,6 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+//go:build !linux
 // +build !linux
 
 package main
diff --git a/libgo/go/runtime/testdata/testprog/traceback_ancestors.go b/libgo/go/runtime/testdata/testprog/traceback_ancestors.go
index 0ee402c..1d0d00b 100644
--- a/libgo/go/runtime/testdata/testprog/traceback_ancestors.go
+++ b/libgo/go/runtime/testdata/testprog/traceback_ancestors.go
@@ -33,30 +33,27 @@ func printStack() {
 	for {
 		n := runtime.Stack(buf, true)
 		if n < len(buf) {
-			tb := string(buf[:n])
+			all := string(buf[:n])
+			var saved string
 
 			// Delete any ignored goroutines, if present.
-			pos := 0
-			for pos < len(tb) {
-				next := pos + strings.Index(tb[pos:], "\n\n")
-				if next < pos {
-					next = len(tb)
-				} else {
-					next += len("\n\n")
-				}
+			for all != "" {
+				var g string
+				g, all, _ = strings.Cut(all, "\n\n")
 
-				if strings.HasPrefix(tb[pos:], "goroutine ") {
-					id := tb[pos+len("goroutine "):]
-					id = id[:strings.IndexByte(id, ' ')]
+				if strings.HasPrefix(g, "goroutine ") {
+					id, _, _ := strings.Cut(strings.TrimPrefix(g, "goroutine "), " ")
 					if ignoreGoroutines[id] {
-						tb = tb[:pos] + tb[next:]
-						next = pos
+						continue
 					}
 				}
-				pos = next
+				if saved != "" {
+					saved += "\n\n"
+				}
+				saved += g
 			}
 
-			fmt.Print(tb)
+			fmt.Print(saved)
 			return
 		}
 		buf = make([]byte, 2*len(buf))
@@ -89,11 +86,10 @@ func recurseThenCallGo(w chan struct{}, frames int, goroutines int, main bool) {
 func goroutineID() string {
 	buf := make([]byte, 128)
 	runtime.Stack(buf, false)
-	const prefix = "goroutine "
-	if !bytes.HasPrefix(buf, []byte(prefix)) {
+	prefix := []byte("goroutine ")
+	if !bytes.HasPrefix(buf, prefix) {
 		panic(fmt.Sprintf("expected %q at beginning of traceback:\n%s", prefix, buf))
 	}
-	buf = buf[len(prefix):]
-	n := bytes.IndexByte(buf, ' ')
-	return string(buf[:n])
+	id, _, _ := bytes.Cut(bytes.TrimPrefix(buf, prefix), []byte(" "))
+	return string(id)
 }
diff --git a/libgo/go/runtime/testdata/testprogcgo/callback.go b/libgo/go/runtime/testdata/testprogcgo/callback.go
index 2f7568c..45baeb1 100644
--- a/libgo/go/runtime/testdata/testprogcgo/callback.go
+++ b/libgo/go/runtime/testdata/testprogcgo/callback.go
@@ -2,6 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+//go:build !plan9 && !windows
 // +build !plan9,!windows
 
 package main
@@ -70,7 +71,7 @@ func grow1(x, sum *int) int {
 
 func CgoCallbackGC() {
 	P := 100
-	if os.Getenv("RUNTIME_TESTING_SHORT") != "" {
+	if os.Getenv("RUNTIME_TEST_SHORT") != "" {
 		P = 10
 	}
 	done := make(chan bool)
diff --git a/libgo/go/runtime/testdata/testprogcgo/catchpanic.go b/libgo/go/runtime/testdata/testprogcgo/catchpanic.go
index 55a606d..c722d40 100644
--- a/libgo/go/runtime/testdata/testprogcgo/catchpanic.go
+++ b/libgo/go/runtime/testdata/testprogcgo/catchpanic.go
@@ -2,6 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+//go:build !plan9 && !windows
 // +build !plan9,!windows
 
 package main
diff --git a/libgo/go/runtime/testdata/testprogcgo/dropm.go b/libgo/go/runtime/testdata/testprogcgo/dropm.go
index 9e782f5..700b7fa 100644
--- a/libgo/go/runtime/testdata/testprogcgo/dropm.go
+++ b/libgo/go/runtime/testdata/testprogcgo/dropm.go
@@ -2,6 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+//go:build !plan9 && !windows
 // +build !plan9,!windows
 
 // Test that a sequence of callbacks from C to Go get the same m.
diff --git a/libgo/go/runtime/testdata/testprogcgo/eintr.go b/libgo/go/runtime/testdata/testprogcgo/eintr.go
index 1722a75..b35b280 100644
--- a/libgo/go/runtime/testdata/testprogcgo/eintr.go
+++ b/libgo/go/runtime/testdata/testprogcgo/eintr.go
@@ -2,6 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+//go:build !plan9 && !windows
 // +build !plan9,!windows
 
 package main
diff --git a/libgo/go/runtime/testdata/testprogcgo/exec.go b/libgo/go/runtime/testdata/testprogcgo/exec.go
index 15723c7..c268bcd 100644
--- a/libgo/go/runtime/testdata/testprogcgo/exec.go
+++ b/libgo/go/runtime/testdata/testprogcgo/exec.go
@@ -2,6 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+//go:build !plan9 && !windows
 // +build !plan9,!windows
 
 package main
diff --git a/libgo/go/runtime/testdata/testprogcgo/gprof.go b/libgo/go/runtime/testdata/testprogcgo/gprof.go
new file mode 100644
index 0000000..85986cc
--- /dev/null
+++ b/libgo/go/runtime/testdata/testprogcgo/gprof.go
@@ -0,0 +1,49 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !gccgo
+// +build !gccgo
+
+package main
+
+// Test taking a goroutine profile with C traceback.
+
+/*
+// Defined in gprof_c.c.
+void CallGoSleep(void);
+void gprofCgoTraceback(void* parg);
+void gprofCgoContext(void* parg);
+*/
+import "C"
+
+import (
+	"fmt"
+	"io"
+	"runtime"
+	"runtime/pprof"
+	"time"
+	"unsafe"
+)
+
+func init() {
+	register("GoroutineProfile", GoroutineProfile)
+}
+
+func GoroutineProfile() {
+	runtime.SetCgoTraceback(0, unsafe.Pointer(C.gprofCgoTraceback), unsafe.Pointer(C.gprofCgoContext), nil)
+
+	go C.CallGoSleep()
+	go C.CallGoSleep()
+	go C.CallGoSleep()
+	time.Sleep(1 * time.Second)
+
+	prof := pprof.Lookup("goroutine")
+	prof.WriteTo(io.Discard, 1)
+	fmt.Println("OK")
+}
+
+//export GoSleep
+func GoSleep() {
+	time.Sleep(time.Hour)
+}
diff --git a/libgo/go/runtime/testdata/testprogcgo/gprof_c.c b/libgo/go/runtime/testdata/testprogcgo/gprof_c.c
new file mode 100644
index 0000000..11b0649
--- /dev/null
+++ b/libgo/go/runtime/testdata/testprogcgo/gprof_c.c
@@ -0,0 +1,33 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !gccgo
+// +build !gccgo
+
+// The C definitions for gprof.go. That file uses //export so
+// it can't put function definitions in the "C" import comment.
+
+#include <stdint.h>
+#include <stdlib.h>
+
+// Functions exported from Go.
+extern void GoSleep();
+
+struct cgoContextArg {
+	uintptr_t context;
+};
+
+void gprofCgoContext(void *arg) {
+	((struct cgoContextArg*)arg)->context = 1;
+}
+
+void gprofCgoTraceback(void *arg) {
+	// spend some time here so the P is more likely to be retaken.
+	volatile int i;
+	for (i = 0; i < 123456789; i++);
+}
+
+void CallGoSleep() {
+	GoSleep();
+}
diff --git a/libgo/go/runtime/testdata/testprogcgo/lockosthread.go b/libgo/go/runtime/testdata/testprogcgo/lockosthread.go
index 36423d9..8fcea35 100644
--- a/libgo/go/runtime/testdata/testprogcgo/lockosthread.go
+++ b/libgo/go/runtime/testdata/testprogcgo/lockosthread.go
@@ -2,6 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+//go:build !plan9 && !windows
 // +build !plan9,!windows
 
 package main
diff --git a/libgo/go/runtime/testdata/testprogcgo/needmdeadlock.go b/libgo/go/runtime/testdata/testprogcgo/needmdeadlock.go
index 5a9c359..b95ec77 100644
--- a/libgo/go/runtime/testdata/testprogcgo/needmdeadlock.go
+++ b/libgo/go/runtime/testdata/testprogcgo/needmdeadlock.go
@@ -2,6 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+//go:build !plan9 && !windows
 // +build !plan9,!windows
 
 package main
diff --git a/libgo/go/runtime/testdata/testprogcgo/numgoroutine.go b/libgo/go/runtime/testdata/testprogcgo/numgoroutine.go
index b7134a4..e09706d 100644
--- a/libgo/go/runtime/testdata/testprogcgo/numgoroutine.go
+++ b/libgo/go/runtime/testdata/testprogcgo/numgoroutine.go
@@ -2,6 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+//go:build !plan9 && !windows
 // +build !plan9,!windows
 
 package main
diff --git a/libgo/go/runtime/testdata/testprogcgo/panic.c b/libgo/go/runtime/testdata/testprogcgo/panic.c
new file mode 100644
index 0000000..deb5ed5
--- /dev/null
+++ b/libgo/go/runtime/testdata/testprogcgo/panic.c
@@ -0,0 +1,9 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+extern void panic_callback();
+
+void call_callback(void) {
+	panic_callback();
+}
diff --git a/libgo/go/runtime/testdata/testprogcgo/panic.go b/libgo/go/runtime/testdata/testprogcgo/panic.go
new file mode 100644
index 0000000..57ac895
--- /dev/null
+++ b/libgo/go/runtime/testdata/testprogcgo/panic.go
@@ -0,0 +1,23 @@
+package main
+
+// This program will crash.
+// We want to test unwinding from a cgo callback.
+
+/*
+void call_callback(void);
+*/
+import "C"
+
+func init() {
+	register("PanicCallback", PanicCallback)
+}
+
+//export panic_callback
+func panic_callback() {
+	var i *int
+	*i = 42
+}
+
+func PanicCallback() {
+	C.call_callback()
+}
diff --git a/libgo/go/runtime/testdata/testprogcgo/pprof.go b/libgo/go/runtime/testdata/testprogcgo/pprof.go
index 668d4e0..bbf93c4 100644
--- a/libgo/go/runtime/testdata/testprogcgo/pprof.go
+++ b/libgo/go/runtime/testdata/testprogcgo/pprof.go
@@ -32,8 +32,6 @@ void cpuHog() {
 void cpuHog2() {
 }
 
-static int cpuHogCount;
-
 struct cgoTracebackArg {
 	uintptr_t  context;
 	uintptr_t  sigContext;
@@ -50,13 +48,6 @@ void pprofCgoTraceback(void* parg) {
 	arg->buf[0] = (uintptr_t)(cpuHog) + 0x10;
 	arg->buf[1] = (uintptr_t)(cpuHog2) + 0x4;
 	arg->buf[2] = 0;
-	++cpuHogCount;
-}
-
-// getCpuHogCount fetches the number of times we've seen cpuHog in the
-// traceback.
-int getCpuHogCount() {
-	return cpuHogCount;
 }
 */
 import "C"
@@ -89,7 +80,7 @@ func CgoPprof() {
 	}
 
 	t0 := time.Now()
-	for C.getCpuHogCount() < 2 && time.Since(t0) < time.Second {
+	for time.Since(t0) < time.Second {
 		C.cpuHog()
 	}
 
diff --git a/libgo/go/runtime/testdata/testprogcgo/raceprof.go b/libgo/go/runtime/testdata/testprogcgo/raceprof.go
index c17bb39..3329139 100644
--- a/libgo/go/runtime/testdata/testprogcgo/raceprof.go
+++ b/libgo/go/runtime/testdata/testprogcgo/raceprof.go
@@ -3,8 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build ((linux && amd64) || (freebsd && amd64)) && !gccgo
-// +build linux,amd64 freebsd,amd64
-// +build !gccgo
 
 package main
 
diff --git a/libgo/go/runtime/testdata/testprogcgo/racesig.go b/libgo/go/runtime/testdata/testprogcgo/racesig.go
index a079b3f..9352679 100644
--- a/libgo/go/runtime/testdata/testprogcgo/racesig.go
+++ b/libgo/go/runtime/testdata/testprogcgo/racesig.go
@@ -2,6 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+//go:build (linux && amd64) || (freebsd && amd64)
 // +build linux,amd64 freebsd,amd64
 
 package main
diff --git a/libgo/go/runtime/testdata/testprogcgo/segv.go b/libgo/go/runtime/testdata/testprogcgo/segv.go
index 3237a8c..0632475 100644
--- a/libgo/go/runtime/testdata/testprogcgo/segv.go
+++ b/libgo/go/runtime/testdata/testprogcgo/segv.go
@@ -2,6 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+//go:build !plan9 && !windows
 // +build !plan9,!windows
 
 package main
diff --git a/libgo/go/runtime/testdata/testprogcgo/sigstack.go b/libgo/go/runtime/testdata/testprogcgo/sigstack.go
index 21b668d..12ca661 100644
--- a/libgo/go/runtime/testdata/testprogcgo/sigstack.go
+++ b/libgo/go/runtime/testdata/testprogcgo/sigstack.go
@@ -2,6 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+//go:build !plan9 && !windows
 // +build !plan9,!windows
 
 // Test handling of Go-allocated signal stacks when calling from
diff --git a/libgo/go/runtime/testdata/testprogcgo/sigthrow.go b/libgo/go/runtime/testdata/testprogcgo/sigthrow.go
new file mode 100644
index 0000000..665e3b0
--- /dev/null
+++ b/libgo/go/runtime/testdata/testprogcgo/sigthrow.go
@@ -0,0 +1,20 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+// This program will abort.
+
+/*
+#include <stdlib.h>
+*/
+import "C"
+
+func init() {
+	register("Abort", Abort)
+}
+
+func Abort() {
+	C.abort()
+}
diff --git a/libgo/go/runtime/testdata/testprogcgo/threadpanic.go b/libgo/go/runtime/testdata/testprogcgo/threadpanic.go
index f9b48a9..2d24fe6 100644
--- a/libgo/go/runtime/testdata/testprogcgo/threadpanic.go
+++ b/libgo/go/runtime/testdata/testprogcgo/threadpanic.go
@@ -2,6 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+//go:build !plan9
 // +build !plan9
 
 package main
diff --git a/libgo/go/runtime/testdata/testprogcgo/threadpprof.go b/libgo/go/runtime/testdata/testprogcgo/threadpprof.go
index ed3faed..ec83d2d 100644
--- a/libgo/go/runtime/testdata/testprogcgo/threadpprof.go
+++ b/libgo/go/runtime/testdata/testprogcgo/threadpprof.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build !plan9 && !windows && !gccgo
-// +build !plan9,!windows,!gccgo
 
 package main
 
@@ -34,8 +33,6 @@ void cpuHogThread() {
 void cpuHogThread2() {
 }
 
-static int cpuHogThreadCount;
-
 struct cgoTracebackArg {
 	uintptr_t  context;
 	uintptr_t  sigContext;
@@ -50,13 +47,6 @@ void pprofCgoThreadTraceback(void* parg) {
 	arg->buf[0] = (uintptr_t)(cpuHogThread) + 0x10;
 	arg->buf[1] = (uintptr_t)(cpuHogThread2) + 0x4;
 	arg->buf[2] = 0;
-	__sync_add_and_fetch(&cpuHogThreadCount, 1);
-}
-
-// getCPUHogThreadCount fetches the number of times we've seen cpuHogThread
-// in the traceback.
-int getCPUHogThreadCount() {
-	return __sync_add_and_fetch(&cpuHogThreadCount, 0);
 }
 
 static void* cpuHogDriver(void* arg __attribute__ ((unused))) {
@@ -74,6 +64,7 @@ void runCPUHogThread(void) {
 import "C"
 
 import (
+	"context"
 	"fmt"
 	"os"
 	"runtime"
@@ -108,12 +99,16 @@ func pprofThread() {
 		os.Exit(2)
 	}
 
-	C.runCPUHogThread()
+	// This goroutine may receive a profiling signal while creating the C-owned
+	// thread. If it does, the SetCgoTraceback handler will make the leaf end of
+	// the stack look almost (but not exactly) like the stacks the test case is
+	// trying to find. Attach a profiler label so the test can filter out those
+	// confusing samples.
+	pprof.Do(context.Background(), pprof.Labels("ignore", "ignore"), func(ctx context.Context) {
+		C.runCPUHogThread()
+	})
 
-	t0 := time.Now()
-	for C.getCPUHogThreadCount() < 2 && time.Since(t0) < time.Second {
-		time.Sleep(100 * time.Millisecond)
-	}
+	time.Sleep(1 * time.Second)
 
 	pprof.StopCPUProfile()
 
diff --git a/libgo/go/runtime/testdata/testprogcgo/threadprof.go b/libgo/go/runtime/testdata/testprogcgo/threadprof.go
index 2d4c103..d62d4b4 100644
--- a/libgo/go/runtime/testdata/testprogcgo/threadprof.go
+++ b/libgo/go/runtime/testdata/testprogcgo/threadprof.go
@@ -2,21 +2,22 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// We only build this file with the tag "threadprof", since it starts
-// a thread running a busy loop at constructor time.
-
+//go:build !plan9 && !windows
 // +build !plan9,!windows
-// +build threadprof
 
 package main
 
 /*
 #include <stdint.h>
+#include <stdlib.h>
 #include <signal.h>
 #include <pthread.h>
 
 volatile int32_t spinlock;
 
+// Note that this thread is only started if GO_START_SIGPROF_THREAD
+// is set in the environment, which is only done when running the
+// CgoExternalThreadSIGPROF test.
 static void *thread1(void *p) {
 	(void)p;
 	while (spinlock == 0)
@@ -26,9 +27,13 @@ static void *thread1(void *p) {
 	return NULL;
 }
 
+// This constructor function is run when the program starts.
+// It is used for the CgoExternalThreadSIGPROF test.
 __attribute__((constructor)) void issue9456() {
-	pthread_t tid;
-	pthread_create(&tid, 0, thread1, NULL);
+	if (getenv("GO_START_SIGPROF_THREAD") != NULL) {
+		pthread_t tid;
+		pthread_create(&tid, 0, thread1, NULL);
+	}
 }
 
 void **nullptr;
diff --git a/libgo/go/runtime/testdata/testprognet/signal.go b/libgo/go/runtime/testdata/testprognet/signal.go
index 4d2de79..dfa2e10 100644
--- a/libgo/go/runtime/testdata/testprognet/signal.go
+++ b/libgo/go/runtime/testdata/testprognet/signal.go
@@ -2,6 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+//go:build !windows && !plan9
 // +build !windows,!plan9
 
 // This is in testprognet instead of testprog because testprog
diff --git a/libgo/go/runtime/testdata/testprognet/signalexec.go b/libgo/go/runtime/testdata/testprognet/signalexec.go
index 4a988ef..62ebce7 100644
--- a/libgo/go/runtime/testdata/testprognet/signalexec.go
+++ b/libgo/go/runtime/testdata/testprognet/signalexec.go
@@ -2,6 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+//go:build darwin || dragonfly || freebsd || linux || netbsd || openbsd
 // +build darwin dragonfly freebsd linux netbsd openbsd
 
 // This is in testprognet instead of testprog because testprog
diff --git a/libgo/go/runtime/testdata/testwinlib/main.c b/libgo/go/runtime/testdata/testwinlib/main.c
index e84a32f..c3fe3cb 100644
--- a/libgo/go/runtime/testdata/testwinlib/main.c
+++ b/libgo/go/runtime/testdata/testwinlib/main.c
@@ -41,17 +41,20 @@ int main()
     if (NULL == exceptionHandlerHandle)
     {
         printf("cannot add vectored exception handler\n");
+        fflush(stdout);
         return 2;
     }
     void *continueHandlerHandle = AddVectoredContinueHandler(0, customContinueHandlder);
     if (NULL == continueHandlerHandle)
     {
         printf("cannot add vectored continue handler\n");
+        fflush(stdout);
         return 2;
     }
     CallMeBack(throwFromC);
     RemoveVectoredContinueHandler(continueHandlerHandle);
     RemoveVectoredExceptionHandler(exceptionHandlerHandle);
     printf("exceptionCount: %d\ncontinueCount: %d\n", exceptionCount, continueCount);
+    fflush(stdout);
     return 0;
-}
-\ No newline at end of file
+}
diff --git a/libgo/go/runtime/testdata/testwinlib/main.go b/libgo/go/runtime/testdata/testwinlib/main.go
index 400eaa1..025ef91 100644
--- a/libgo/go/runtime/testdata/testwinlib/main.go
+++ b/libgo/go/runtime/testdata/testwinlib/main.go
@@ -1,3 +1,4 @@
+//go:build windows && cgo
 // +build windows,cgo
 
 package main
diff --git a/libgo/go/runtime/time.go b/libgo/go/runtime/time.go
index 7cc9156..8e5a36e 100644
--- a/libgo/go/runtime/time.go
+++ b/libgo/go/runtime/time.go
@@ -26,8 +26,8 @@ type timer struct {
 	// when must be positive on an active timer.
 	when   int64
 	period int64
-	f      func(interface{}, uintptr)
-	arg    interface{}
+	f      func(any, uintptr)
+	arg    any
 	seq    uintptr
 
 	// What to set the when field to in timerModifiedXX status.
@@ -230,14 +230,14 @@ func resetTimer(t *timer, when int64) bool {
 
 // modTimer modifies an existing timer.
 //go:linkname modTimer time.modTimer
-func modTimer(t *timer, when, period int64, f func(interface{}, uintptr), arg interface{}, seq uintptr) {
+func modTimer(t *timer, when, period int64, f func(any, uintptr), arg any, seq uintptr) {
 	modtimer(t, when, period, f, arg, seq)
 }
 
 // Go runtime.
 
 // Ready the goroutine arg.
-func goroutineReady(arg interface{}, seq uintptr) {
+func goroutineReady(arg any, seq uintptr) {
 	goready(arg.(*g), 0)
 }
 
@@ -365,9 +365,9 @@ func deltimer(t *timer) bool {
 
 // dodeltimer removes timer i from the current P's heap.
 // We are locked on the P when this is called.
-// It reports whether it saw no problems due to races.
+// It returns the smallest changed index in pp.timers.
 // The caller must have locked the timers for pp.
-func dodeltimer(pp *p, i int) {
+func dodeltimer(pp *p, i int) int {
 	if t := pp.timers[i]; t.pp.ptr() != pp {
 		throw("dodeltimer: wrong P")
 	} else {
@@ -379,16 +379,18 @@ func dodeltimer(pp *p, i int) {
 	}
 	pp.timers[last] = nil
 	pp.timers = pp.timers[:last]
+	smallestChanged := i
 	if i != last {
 		// Moving to i may have moved the last timer to a new parent,
 		// so sift up to preserve the heap guarantee.
-		siftupTimer(pp.timers, i)
+		smallestChanged = siftupTimer(pp.timers, i)
 		siftdownTimer(pp.timers, i)
 	}
 	if i == 0 {
 		updateTimer0When(pp)
 	}
 	atomic.Xadd(&pp.numTimers, -1)
+	return smallestChanged
 }
 
 // dodeltimer0 removes timer 0 from the current P's heap.
@@ -417,7 +419,7 @@ func dodeltimer0(pp *p) {
 // modtimer modifies an existing timer.
 // This is called by the netpoll code or time.Ticker.Reset or time.Timer.Reset.
 // Reports whether the timer was modified before it was run.
-func modtimer(t *timer, when, period int64, f func(interface{}, uintptr), arg interface{}, seq uintptr) bool {
+func modtimer(t *timer, when, period int64, f func(any, uintptr), arg any, seq uintptr) bool {
 	if when <= 0 {
 		throw("timer when must be positive")
 	}
@@ -673,13 +675,14 @@ func adjusttimers(pp *p, now int64) {
 		switch s := atomic.Load(&t.status); s {
 		case timerDeleted:
 			if atomic.Cas(&t.status, s, timerRemoving) {
-				dodeltimer(pp, i)
+				changed := dodeltimer(pp, i)
 				if !atomic.Cas(&t.status, timerRemoving, timerRemoved) {
 					badTimer()
 				}
 				atomic.Xadd(&pp.deletedTimers, -1)
-				// Look at this heap position again.
-				i--
+				// Go back to the earliest changed heap entry.
+				// "- 1" because the loop will add 1.
+				i = changed - 1
 			}
 		case timerModifiedEarlier, timerModifiedLater:
 			if atomic.Cas(&t.status, s, timerMoving) {
@@ -689,10 +692,11 @@ func adjusttimers(pp *p, now int64) {
 				// We don't add it back yet because the
 				// heap manipulation could cause our
 				// loop to skip some other timer.
-				dodeltimer(pp, i)
+				changed := dodeltimer(pp, i)
 				moved = append(moved, t)
-				// Look at this heap position again.
-				i--
+				// Go back to the earliest changed heap entry.
+				// "- 1" because the loop will add 1.
+				i = changed - 1
 			}
 		case timerNoStatus, timerRunning, timerRemoving, timerRemoved, timerMoving:
 			badTimer()
@@ -1020,7 +1024,10 @@ func timeSleepUntil() (int64, *p) {
 // "panic holding locks" message. Instead, we panic while not
 // holding a lock.
 
-func siftupTimer(t []*timer, i int) {
+// siftupTimer puts the timer at position i in the right place
+// in the heap by moving it up toward the top of the heap.
+// It returns the smallest changed index.
+func siftupTimer(t []*timer, i int) int {
 	if i >= len(t) {
 		badTimer()
 	}
@@ -1040,8 +1047,11 @@ func siftupTimer(t []*timer, i int) {
 	if tmp != t[i] {
 		t[i] = tmp
 	}
+	return i
 }
 
+// siftdownTimer puts the timer at position i in the right place
+// in the heap by moving it down toward the bottom of the heap.
 func siftdownTimer(t []*timer, i int) {
 	n := len(t)
 	if i >= n {
diff --git a/libgo/go/runtime/time_fake.go b/libgo/go/runtime/time_fake.go
index c790fab..b5e0463 100644
--- a/libgo/go/runtime/time_fake.go
+++ b/libgo/go/runtime/time_fake.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build faketime && !windows
-// +build faketime,!windows
 
 // Faketime isn't currently supported on Windows. This would require
 // modifying syscall.Write to call syscall.faketimeWrite,
@@ -42,6 +41,10 @@ func time_now() (sec int64, nsec int32, mono int64) {
 	return faketime / 1e9, int32(faketime % 1e9), faketime
 }
 
+// write is like the Unix write system call.
+// We have to avoid write barriers to avoid potential deadlock
+// on write calls.
+//go:nowritebarrierrec
 func write(fd uintptr, p unsafe.Pointer, n int32) int32 {
 	if !(fd == 1 || fd == 2) {
 		// Do an ordinary write.
diff --git a/libgo/go/runtime/time_nofake.go b/libgo/go/runtime/time_nofake.go
index 5a4ceaf..70a2102 100644
--- a/libgo/go/runtime/time_nofake.go
+++ b/libgo/go/runtime/time_nofake.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build !faketime
-// +build !faketime
 
 package runtime
 
@@ -20,9 +19,14 @@ func nanotime() int64 {
 	return nanotime1()
 }
 
+var overrideWrite func(fd uintptr, p unsafe.Pointer, n int32) int32
+
 // write must be nosplit on Windows (see write1)
 //
 //go:nosplit
 func write(fd uintptr, p unsafe.Pointer, n int32) int32 {
+	if overrideWrite != nil {
+		return overrideWrite(fd, noescape(p), n)
+	}
 	return write1(fd, p, n)
 }
diff --git a/libgo/go/runtime/timeasm.go b/libgo/go/runtime/timeasm.go
index 2c06dcb..1579e9d 100644
--- a/libgo/go/runtime/timeasm.go
+++ b/libgo/go/runtime/timeasm.go
@@ -5,9 +5,6 @@
 // Declarations for operating systems implementing time.now directly in assembly.
 
 //go:build ignore && !faketime && (windows || (linux && amd64))
-// +build ignore
-// +build !faketime
-// +build windows linux,amd64
 
 package runtime
 
diff --git a/libgo/go/runtime/timestub.go b/libgo/go/runtime/timestub.go
index 76de76f..0b5d2fc 100644
--- a/libgo/go/runtime/timestub.go
+++ b/libgo/go/runtime/timestub.go
@@ -6,9 +6,6 @@
 // indirectly, in terms of walltime and nanotime assembly.
 
 //-go:build !faketime && !windows && !(linux && amd64)
-// -build !faketime
-// -build !windows
-// -build !linux !amd64
 
 package runtime
 
diff --git a/libgo/go/runtime/timestub2.go b/libgo/go/runtime/timestub2.go
index d9b4ee0..f6f50c7 100644
--- a/libgo/go/runtime/timestub2.go
+++ b/libgo/go/runtime/timestub2.go
@@ -3,13 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //-go:build !aix && !darwin && !freebsd && !openbsd && !solaris && !windows && !(linux && amd64)
-// -build !aix
-// -build !darwin
-// -build !freebsd
-// -build !openbsd
-// -build !solaris
-// -build !windows
-// -build !linux !amd64
 
 package runtime
 
diff --git a/libgo/go/runtime/tls_stub.go b/libgo/go/runtime/tls_stub.go
index 95dafd0..7bdfc6b 100644
--- a/libgo/go/runtime/tls_stub.go
+++ b/libgo/go/runtime/tls_stub.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build (windows && !amd64) || !windows
-// +build windows,!amd64 !windows
 
 package runtime
 
diff --git a/libgo/go/runtime/trace.go b/libgo/go/runtime/trace.go
index a7c36ba..5d7eb75 100644
--- a/libgo/go/runtime/trace.go
+++ b/libgo/go/runtime/trace.go
@@ -13,6 +13,7 @@
 package runtime
 
 import (
+	"internal/goarch"
 	"runtime/internal/atomic"
 	"runtime/internal/sys"
 	"unsafe"
@@ -85,7 +86,7 @@ const (
 	// and ppc64le.
 	// Tracing won't work reliably for architectures where cputicks is emulated
 	// by nanotime, so the value doesn't matter for those architectures.
-	traceTickDiv = 16 + 48*(sys.Goarch386|sys.GoarchAmd64)
+	traceTickDiv = 16 + 48*(goarch.Is386|goarch.IsAmd64)
 	// Maximum number of PCs in a single stack trace.
 	// Since events contain only stack id rather than whole stack trace,
 	// we can allow quite large values here.
@@ -426,6 +427,9 @@ func ReadTrace() []byte {
 		trace.footerWritten = true
 		// Use float64 because (trace.ticksEnd - trace.ticksStart) * 1e9 can overflow int64.
 		freq := float64(trace.ticksEnd-trace.ticksStart) * 1e9 / float64(trace.timeEnd-trace.timeStart) / traceTickDiv
+		if freq <= 0 {
+			throw("trace: ReadTrace got invalid frequency")
+		}
 		trace.lockOwner = nil
 		unlock(&trace.lock)
 		var data []byte
@@ -551,8 +555,15 @@ func traceEventLocked(extraBytes int, mp *m, pid int32, bufp *traceBufPtr, ev by
 		bufp.set(buf)
 	}
 
+	// NOTE: ticks might be same after tick division, although the real cputicks is
+	// linear growth.
 	ticks := uint64(cputicks()) / traceTickDiv
 	tickDiff := ticks - buf.lastTicks
+	if tickDiff == 0 {
+		ticks = buf.lastTicks + 1
+		tickDiff = 1
+	}
+
 	buf.lastTicks = ticks
 	narg := byte(len(args))
 	if skip >= 0 {
@@ -652,6 +663,9 @@ func traceFlush(buf traceBufPtr, pid int32) traceBufPtr {
 
 	// initialize the buffer for a new batch
 	ticks := uint64(cputicks()) / traceTickDiv
+	if ticks == bufp.lastTicks {
+		ticks = bufp.lastTicks + 1
+	}
 	bufp.lastTicks = ticks
 	bufp.byte(traceEvBatch | 1<<traceArgCountShift)
 	bufp.varint(uint64(pid))
@@ -922,7 +936,7 @@ type traceAlloc struct {
 //go:notinheap
 type traceAllocBlock struct {
 	next traceAllocBlockPtr
-	data [64<<10 - sys.PtrSize]byte
+	data [64<<10 - goarch.PtrSize]byte
 }
 
 // TODO: Since traceAllocBlock is now go:notinheap, this isn't necessary.
@@ -933,7 +947,7 @@ func (p *traceAllocBlockPtr) set(x *traceAllocBlock) { *p = traceAllocBlockPtr(u
 
 // alloc allocates n-byte block.
 func (a *traceAlloc) alloc(n uintptr) unsafe.Pointer {
-	n = alignUp(n, sys.PtrSize)
+	n = alignUp(n, goarch.PtrSize)
 	if a.head == 0 || a.off+n > uintptr(len(a.head.ptr().data)) {
 		if n > uintptr(len(a.head.ptr().data)) {
 			throw("trace: alloc too large")
diff --git a/libgo/go/runtime/trace/annotation.go b/libgo/go/runtime/trace/annotation.go
index 6e18bfb..d05b5e2 100644
--- a/libgo/go/runtime/trace/annotation.go
+++ b/libgo/go/runtime/trace/annotation.go
@@ -98,7 +98,7 @@ func Log(ctx context.Context, category, message string) {
 }
 
 // Logf is like Log, but the value is formatted using the specified format spec.
-func Logf(ctx context.Context, category, format string, args ...interface{}) {
+func Logf(ctx context.Context, category, format string, args ...any) {
 	if IsEnabled() {
 		// Ideally this should be just Log, but that will
 		// add one more frame in the stack trace.
diff --git a/libgo/go/runtime/traceback_test.go b/libgo/go/runtime/traceback_test.go
index 152db6c..2597731 100644
--- a/libgo/go/runtime/traceback_test.go
+++ b/libgo/go/runtime/traceback_test.go
@@ -6,6 +6,8 @@ package runtime_test
 
 import (
 	"bytes"
+	"internal/goexperiment"
+	"internal/testenv"
 	"runtime"
 	"strings"
 	"testing"
@@ -14,6 +16,19 @@ import (
 var testTracebackArgsBuf [1000]byte
 
 func TestTracebackArgs(t *testing.T) {
+	if *flagQuick {
+		t.Skip("-quick")
+	}
+	optimized := !strings.HasSuffix(testenv.Builder(), "-noopt")
+	abiSel := func(x, y string) string {
+		// select expected output based on ABI
+		// In noopt build we always spill arguments so the output is the same as stack ABI.
+		if optimized && goexperiment.RegabiArgs {
+			return x
+		}
+		return y
+	}
+
 	tests := []struct {
 		fn     func() int
 		expect string
@@ -106,6 +121,52 @@ func TestTracebackArgs(t *testing.T) {
 			func() int { return testTracebackArgs8d(testArgsType8d{1, 2, 3, 4, 5, 6, 7, 8, [3]int{9, 10, 11}, 12}) },
 			"testTracebackArgs8d({0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, {0x9, 0xa, ...}, ...})",
 		},
+
+		// Register argument liveness.
+		// 1, 3 are used and live, 2, 4 are dead (in register ABI).
+		// Address-taken (7) and stack ({5, 6}) args are always live.
+		{
+			func() int {
+				poisonStack() // poison arg area to make output deterministic
+				return testTracebackArgs9(1, 2, 3, 4, [2]int{5, 6}, 7)
+			},
+			abiSel(
+				"testTracebackArgs9(0x1, 0xffffffff?, 0x3, 0xff?, {0x5, 0x6}, 0x7)",
+				"testTracebackArgs9(0x1, 0x2, 0x3, 0x4, {0x5, 0x6}, 0x7)"),
+		},
+		// No live.
+		// (Note: this assume at least 5 int registers if register ABI is used.)
+		{
+			func() int {
+				poisonStack() // poison arg area to make output deterministic
+				return testTracebackArgs10(1, 2, 3, 4, 5)
+			},
+			abiSel(
+				"testTracebackArgs10(0xffffffff?, 0xffffffff?, 0xffffffff?, 0xffffffff?, 0xffffffff?)",
+				"testTracebackArgs10(0x1, 0x2, 0x3, 0x4, 0x5)"),
+		},
+		// Conditional spills.
+		// Spill in conditional, not executed.
+		{
+			func() int {
+				poisonStack() // poison arg area to make output deterministic
+				return testTracebackArgs11a(1, 2, 3)
+			},
+			abiSel(
+				"testTracebackArgs11a(0xffffffff?, 0xffffffff?, 0xffffffff?)",
+				"testTracebackArgs11a(0x1, 0x2, 0x3)"),
+		},
+		// 2 spills in conditional, not executed; 3 spills in conditional, executed, but not statically known.
+		// So print 0x3?.
+		{
+			func() int {
+				poisonStack() // poison arg area to make output deterministic
+				return testTracebackArgs11b(1, 2, 3, 4)
+			},
+			abiSel(
+				"testTracebackArgs11b(0xffffffff?, 0xffffffff?, 0x3?, 0x4)",
+				"testTracebackArgs11b(0x1, 0x2, 0x3, 0x4)"),
+		},
 	}
 	for _, test := range tests {
 		n := test.fn()
@@ -295,3 +356,72 @@ func testTracebackArgs8d(a testArgsType8d) int {
 	}
 	return n
 }
+
+// nosplit to avoid preemption or morestack spilling registers.
+//
+//go:nosplit
+//go:noinline
+func testTracebackArgs9(a int64, b int32, c int16, d int8, x [2]int, y int) int {
+	if a < 0 {
+		println(&y) // take address, make y live, even if no longer used at traceback
+	}
+	n := runtime.Stack(testTracebackArgsBuf[:], false)
+	if a < 0 {
+		// use half of in-reg args to keep them alive, the other half are dead
+		return int(a) + int(c)
+	}
+	return n
+}
+
+// nosplit to avoid preemption or morestack spilling registers.
+//
+//go:nosplit
+//go:noinline
+func testTracebackArgs10(a, b, c, d, e int32) int {
+	// no use of any args
+	return runtime.Stack(testTracebackArgsBuf[:], false)
+}
+
+// norace to avoid race instrumentation changing spill locations.
+// nosplit to avoid preemption or morestack spilling registers.
+//
+//go:norace
+//go:nosplit
+//go:noinline
+func testTracebackArgs11a(a, b, c int32) int {
+	if a < 0 {
+		println(a, b, c) // spill in a conditional, may not execute
+	}
+	if b < 0 {
+		return int(a + b + c)
+	}
+	return runtime.Stack(testTracebackArgsBuf[:], false)
+}
+
+// norace to avoid race instrumentation changing spill locations.
+// nosplit to avoid preemption or morestack spilling registers.
+//
+//go:norace
+//go:nosplit
+//go:noinline
+func testTracebackArgs11b(a, b, c, d int32) int {
+	var x int32
+	if a < 0 {
+		print() // spill b in a conditional
+		x = b
+	} else {
+		print() // spill c in a conditional
+		x = c
+	}
+	if d < 0 { // d is always needed
+		return int(x + d)
+	}
+	return runtime.Stack(testTracebackArgsBuf[:], false)
+}
+
+// Poison the arg area with deterministic values.
+//
+//go:noinline
+func poisonStack() [20]int {
+	return [20]int{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}
+}
diff --git a/libgo/go/runtime/type.go b/libgo/go/runtime/type.go
index c788342..9a418ea 100644
--- a/libgo/go/runtime/type.go
+++ b/libgo/go/runtime/type.go
@@ -7,8 +7,8 @@
 package runtime
 
 import (
+	"internal/goarch"
 	"runtime/internal/atomic"
-	"runtime/internal/sys"
 	"unsafe"
 )
 
@@ -237,7 +237,7 @@ func reflect_lookupType(s string) *_type {
 			typelist.types = make(map[string]uintptr, n)
 			for _, list := range typelist.lists {
 				for i := 0; i < list.count; i++ {
-					typ := *(**_type)(add(unsafe.Pointer(&list.types), uintptr(i)*sys.PtrSize))
+					typ := *(**_type)(add(unsafe.Pointer(&list.types), uintptr(i)*goarch.PtrSize))
 					typelist.types[typ.string()] = uintptr(unsafe.Pointer(typ))
 				}
 			}
diff --git a/libgo/go/runtime/write_err.go b/libgo/go/runtime/write_err.go
index a4656fd..81ae872 100644
--- a/libgo/go/runtime/write_err.go
+++ b/libgo/go/runtime/write_err.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build !android
-// +build !android
 
 package runtime
author	Ian Lance Taylor <iant@golang.org>	2022-02-11 14:53:56 -0800
committer	Ian Lance Taylor <iant@golang.org>	2022-02-11 15:01:19 -0800
commit	8dc2499aa62f768c6395c9754b8cabc1ce25c494 (patch)
tree	43d7fd2bbfd7ad8c9625a718a5e8718889351994 /libgo/go/runtime
parent	9a56779dbc4e2d9c15be8d31e36f2f59be7331a8 (diff)
download	gcc-8dc2499aa62f768c6395c9754b8cabc1ce25c494.zip gcc-8dc2499aa62f768c6395c9754b8cabc1ce25c494.tar.gz gcc-8dc2499aa62f768c6395c9754b8cabc1ce25c494.tar.bz2