From 34489eb2af3bbb7be101bc838615cf4a4dc6828d Mon Sep 17 00:00:00 2001
From: Ian Lance Taylor <ian@gcc.gnu.org>
Date: Thu, 25 Oct 2018 22:18:08 +0000
Subject: compiler: improve name mangling for packpaths

    The current implementation of Gogo::pkgpath_for_symbol was written in
    a way that allowed two distinct package paths to map to the same
    symbol, which could cause collisions at link- time or compile-time.

    Switch to a better mangling scheme to insure that we get a unique
    packagepath symbol for each package. In the new scheme instead of having
    separate mangling schemes for identifiers and package paths, the
    main identifier mangler ("go_encode_id") now handles mangling of
    both packagepath characters and identifier characters.

    The new mangling scheme is more intrusive: "foo/bar.Baz" is mangled as
    "foo..z2fbar.Baz" instead of "foo_bar.Baz". To mitigate this, this
    patch also adds a demangling capability so that function names
    returned from runtime.CallersFrames are converted back to their
    original unmangled form.

    Changing the pkgpath_for_symbol scheme requires updating a number of
    //go:linkname directives and C "__asm__" directives to match the new
    scheme, as well as updating the 'gotest' driver (which makes
    assumptions about the correct mapping from pkgpath symbol to package
    name).

    Fixes golang/go#27534.

    Reviewed-on: https://go-review.googlesource.com/c/135455

From-SVN: r265510
---
 libgo/go/runtime/proc.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'libgo/go/runtime/proc.go')

diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go
index 77d379b..bb16924 100644
--- a/libgo/go/runtime/proc.go
+++ b/libgo/go/runtime/proc.go
@@ -4670,7 +4670,7 @@ func runqsteal(_p_, p2 *p, stealRunNextG bool) *g {
 	return gp
 }
 
-//go:linkname setMaxThreads runtime_debug.setMaxThreads
+//go:linkname setMaxThreads runtime..z2fdebug.setMaxThreads
 func setMaxThreads(in int) (out int) {
 	lock(&sched.lock)
 	out = int(sched.maxmcount)
@@ -4716,13 +4716,13 @@ func sync_runtime_procUnpin() {
 	procUnpin()
 }
 
-//go:linkname sync_atomic_runtime_procPin sync_atomic.runtime_procPin
+//go:linkname sync_atomic_runtime_procPin sync..z2fatomic.runtime_procPin
 //go:nosplit
 func sync_atomic_runtime_procPin() int {
 	return procPin()
 }
 
-//go:linkname sync_atomic_runtime_procUnpin sync_atomic.runtime_procUnpin
+//go:linkname sync_atomic_runtime_procUnpin sync..z2fatomic.runtime_procUnpin
 //go:nosplit
 func sync_atomic_runtime_procUnpin() {
 	procUnpin()
-- 
cgit v1.1


From c43137e800bb9ca2ecda0a6b6189e0eb5c22f0d7 Mon Sep 17 00:00:00 2001
From: Ian Lance Taylor <ian@gcc.gnu.org>
Date: Wed, 5 Dec 2018 23:09:51 +0000
Subject: runtime: add precise stack scan support

    This CL adds support of precise stack scan using stack maps to
    the runtime. The stack maps are generated by the compiler (if
    supported). Each safepoint is associated with a (real or dummy)
    landing pad, and its "type info" in the exception table is a
    pointer to the stack map. When a stack is scanned, the stack map
    is found by the stack unwinding code by inspecting the exception
    table (LSDA).

    For precise stack scan we need to unwind the stack. There are
    three cases:

    - If a goroutine is scanning its own stack, it can unwind the
      stack and scan the frames.

    - If a goroutine is scanning another, stopped, goroutine, it
      cannot directly unwind the target stack. We handle this by
      switching (runtime.gogo) to the target g, letting it unwind
      and scan the stack, and switch back.

    - If we are scanning a goroutine that is blocked in a syscall,
      we send a signal to the target goroutine's thread, and let the
      signal handler unwind and scan the stack. Extra care is needed
      as this races with enter/exit syscall.

    Currently this is only implemented on linux.

    Reviewed-on: https://go-review.googlesource.com/c/140518

From-SVN: r266832
---
 libgo/go/runtime/proc.go | 51 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 50 insertions(+), 1 deletion(-)

(limited to 'libgo/go/runtime/proc.go')

diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go
index bb16924..ef166cb 100644
--- a/libgo/go/runtime/proc.go
+++ b/libgo/go/runtime/proc.go
@@ -528,6 +528,8 @@ func schedinit() {
 
 	sched.maxmcount = 10000
 
+	usestackmaps = probestackmaps()
+
 	mallocinit()
 	mcommoninit(_g_.m)
 	cpuinit() // must run before alginit
@@ -891,7 +893,49 @@ loop:
 		case _Gcopystack:
 		// Stack being switched. Go around again.
 
-		case _Grunnable, _Gsyscall, _Gwaiting:
+		case _Gsyscall:
+			if usestackmaps {
+				// Claim goroutine by setting scan bit.
+				// Racing with execution or readying of gp.
+				// The scan bit keeps them from running
+				// the goroutine until we're done.
+				if castogscanstatus(gp, s, s|_Gscan) {
+					if gp.scanningself {
+						// Don't try to scan the stack
+						// if the goroutine is going to do
+						// it itself.
+						// FIXME: can this happen?
+						restartg(gp)
+						break
+					}
+					if !gp.gcscandone {
+						// Send a signal to let the goroutine scan
+						// itself. This races with enter/exitsyscall.
+						// If the goroutine is not stopped at a safepoint,
+						// it will not scan the stack and we'll try again.
+						mp := gp.m
+						noteclear(&mp.scannote)
+						gp.scangcw = uintptr(unsafe.Pointer(gcw))
+						tgkill(getpid(), _pid_t(mp.procid), _SIGURG)
+
+						// Wait for gp to scan its own stack.
+						notesleep(&mp.scannote)
+
+						if !gp.gcscandone {
+							// The signal delivered at a bad time.
+							// Try again.
+							restartg(gp)
+							break
+						}
+					}
+					restartg(gp)
+					break loop
+				}
+				break
+			}
+			fallthrough
+
+		case _Grunnable, _Gwaiting:
 			// Claim goroutine by setting scan bit.
 			// Racing with execution or readying of gp.
 			// The scan bit keeps them from running
@@ -954,6 +998,11 @@ loop:
 
 // The GC requests that this routine be moved from a scanmumble state to a mumble state.
 func restartg(gp *g) {
+	if gp.scang != 0 || gp.scangcw != 0 {
+		print("g ", gp.goid, "is being scanned scang=", gp.scang, " scangcw=", gp.scangcw, "\n")
+		throw("restartg: being scanned")
+	}
+
 	s := readgstatus(gp)
 	switch s {
 	default:
-- 
cgit v1.1


From f41bf58736b95ec17b642f4cb9c802facfc1b7bc Mon Sep 17 00:00:00 2001
From: Ian Lance Taylor <ian@gcc.gnu.org>
Date: Thu, 17 Jan 2019 02:14:28 +0000
Subject: runtime: dropg before CAS g status to _Grunnable/_Gwaiting

    Currently, we dropg (which clears gp.m) after we CAS the g status
    to _Grunnable or _Gwaiting. Immediately after CASing the g status,
    another thread may CAS it to _Gscan status and scan its stack.
    With precise stack scan, it accesses gp.m in order to switch to g
    and back (in doscanstackswitch). This races with dropg. If
    doscanstackswitch reads gp.m, then dropg runs, when we restore
    the m at the end of the scan it will set to a stale value. Worse,
    if dropg runs after doscanstackswitch sets the new m, gp will be
    running with a nil m.

    To fix this, we do dropg before CAS g status to _Grunnable or
    _Gwaiting. We can do this safely if we are CASing from _Grunning,
    as we own the g when it is in _Grunning. There is one case where
    we CAS from _Gsyscall to _Grunnable. It is not safe to dropg when
    it is in _Gsyscall, as precise stack scan needs to read gp.m in
    order to signal the m. So we need to introduce a transient state,
    _Gexitingsyscall, between _Gsyscall and _Grunnable, where the GC
    should not scan its stack.

    In is a little unfortunate that we have to add another g status.
    We could reuse an existing one (e.g. _Gcopystack), but it is
    clearer and safer to just use a new one, as Austin suggested.

    Reviewed-on: https://go-review.googlesource.com/c/158157

From-SVN: r268001
---
 libgo/go/runtime/proc.go | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'libgo/go/runtime/proc.go')

diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go
index ef166cb..655d0a9 100644
--- a/libgo/go/runtime/proc.go
+++ b/libgo/go/runtime/proc.go
@@ -956,6 +956,10 @@ loop:
 				break loop
 			}
 
+		case _Gexitingsyscall:
+			// This is a transient state during which we should not scan its stack.
+			// Try again.
+
 		case _Gscanwaiting:
 			// newstack is doing a scan for us right now. Wait.
 
@@ -2635,8 +2639,8 @@ func park_m(gp *g) {
 		traceGoPark(_g_.m.waittraceev, _g_.m.waittraceskip)
 	}
 
-	casgstatus(gp, _Grunning, _Gwaiting)
 	dropg()
+	casgstatus(gp, _Grunning, _Gwaiting)
 
 	if _g_.m.waitunlockf != nil {
 		fn := *(*func(*g, unsafe.Pointer) bool)(unsafe.Pointer(&_g_.m.waitunlockf))
@@ -2660,8 +2664,8 @@ func goschedImpl(gp *g) {
 		dumpgstatus(gp)
 		throw("bad g status")
 	}
-	casgstatus(gp, _Grunning, _Grunnable)
 	dropg()
+	casgstatus(gp, _Grunning, _Grunnable)
 	lock(&sched.lock)
 	globrunqput(gp)
 	unlock(&sched.lock)
@@ -3054,8 +3058,9 @@ func exitsyscallfast_pidle() bool {
 func exitsyscall0(gp *g) {
 	_g_ := getg()
 
-	casgstatus(gp, _Gsyscall, _Grunnable)
+	casgstatus(gp, _Gsyscall, _Gexitingsyscall)
 	dropg()
+	casgstatus(gp, _Gexitingsyscall, _Grunnable)
 	lock(&sched.lock)
 	_p_ := pidleget()
 	if _p_ == nil {
-- 
cgit v1.1


From aee6ed4a2cbed1ac8be35332ee2391206e2101a2 Mon Sep 17 00:00:00 2001
From: Ian Lance Taylor <ian@gcc.gnu.org>
Date: Fri, 18 Jan 2019 03:29:38 +0000
Subject: re PR go/88202 (FAIL: runtime/pprof)

	PR go/88202
    runtime: in sigprof, skip to sigtrampgo if we don't find sigtramp

    Fixes https://gcc.gnu.org/PR88202

    Reviewed-on: https://go-review.googlesource.com/c/158218

From-SVN: r268057
---
 libgo/go/runtime/proc.go | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'libgo/go/runtime/proc.go')

diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go
index 655d0a9..80b04ab 100644
--- a/libgo/go/runtime/proc.go
+++ b/libgo/go/runtime/proc.go
@@ -3600,10 +3600,17 @@ func sigprof(pc uintptr, gp *g, mp *m) {
 		// To ensure a sane profile, walk through the frames in
 		// "stklocs" until we find the "runtime.sigtramp" frame, then
 		// report only those frames below the frame one down from
-		// that. If for some reason "runtime.sigtramp" is not present,
-		// don't make any changes.
+		// that. On systems that don't split stack, "sigtramp" can
+		// do a sibling call to "sigtrampgo", so use "sigtrampgo"
+		// if we don't find "sigtramp". If for some reason
+		// neither "runtime.sigtramp" nor "runtime.sigtrampgo" is
+		// present, don't make any changes.
 		framesToDiscard := 0
 		for i := 0; i < n; i++ {
+			if stklocs[i].function == "runtime.sigtrampgo" && i+2 < n {
+				framesToDiscard = i + 2
+				n -= framesToDiscard
+			}
 			if stklocs[i].function == "runtime.sigtramp" && i+2 < n {
 				framesToDiscard = i + 2
 				n -= framesToDiscard
-- 
cgit v1.1


From 4f4a855d82a889cebcfca150a7a43909bcb6a346 Mon Sep 17 00:00:00 2001
From: Ian Lance Taylor <iant@golang.org>
Date: Fri, 18 Jan 2019 19:04:36 +0000
Subject: libgo: update to Go1.12beta2

    Reviewed-on: https://go-review.googlesource.com/c/158019

gotools/:
	* Makefile.am (go_cmd_vet_files): Update for Go1.12beta2 release.
	(GOTOOLS_TEST_TIMEOUT): Increase to 600.
	(check-runtime): Export LD_LIBRARY_PATH before computing GOARCH
	and GOOS.
	(check-vet): Copy golang.org/x/tools into check-vet-dir.
	* Makefile.in: Regenerate.

gcc/testsuite/:
	* go.go-torture/execute/names-1.go: Stop using debug/xcoff, which
	is no longer externally visible.

From-SVN: r268084
---
 libgo/go/runtime/proc.go | 540 ++++++++++++++++++++++++++---------------------
 1 file changed, 299 insertions(+), 241 deletions(-)

(limited to 'libgo/go/runtime/proc.go')

diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go
index 80b04ab..05dd53d 100644
--- a/libgo/go/runtime/proc.go
+++ b/libgo/go/runtime/proc.go
@@ -20,7 +20,6 @@ import (
 //go:linkname incidlelocked runtime.incidlelocked
 //go:linkname schedinit runtime.schedinit
 //go:linkname ready runtime.ready
-//go:linkname gcprocs runtime.gcprocs
 //go:linkname stopm runtime.stopm
 //go:linkname handoffp runtime.handoffp
 //go:linkname wakep runtime.wakep
@@ -32,7 +31,6 @@ import (
 //go:linkname reentersyscallblock runtime.reentersyscallblock
 //go:linkname exitsyscall runtime.exitsyscall
 //go:linkname gfget runtime.gfget
-//go:linkname helpgc runtime.helpgc
 //go:linkname kickoff runtime.kickoff
 //go:linkname mstart1 runtime.mstart1
 //go:linkname mexit runtime.mexit
@@ -196,8 +194,7 @@ func main() {
 		}
 	}()
 
-	// Record when the world started. Must be after runtime_init
-	// because nanotime on some platforms depends on startNano.
+	// Record when the world started.
 	runtimeInitTime = nanotime()
 
 	main_init_done = make(chan bool)
@@ -285,7 +282,7 @@ func forcegchelper() {
 			println("GC forced")
 		}
 		// Time-triggered, fully concurrent.
-		gcStart(gcBackgroundMode, gcTrigger{kind: gcTriggerTime, now: nanotime()})
+		gcStart(gcTrigger{kind: gcTriggerTime, now: nanotime()})
 	}
 }
 
@@ -479,17 +476,18 @@ const (
 	_GoidCacheBatch = 16
 )
 
-// cpuinit extracts the environment variable GODEBUGCPU from the environment on
-// Linux and Darwin if the GOEXPERIMENT debugcpu was set and calls internal/cpu.Initialize.
+// cpuinit extracts the environment variable GODEBUG from the environment on
+// Unix-like operating systems and calls internal/cpu.Initialize.
 func cpuinit() {
-	const prefix = "GODEBUGCPU="
+	const prefix = "GODEBUG="
 	var env string
 
-	if haveexperiment("debugcpu") && (GOOS == "linux" || GOOS == "darwin") {
+	switch GOOS {
+	case "aix", "darwin", "dragonfly", "freebsd", "netbsd", "openbsd", "solaris", "linux":
 		cpu.DebugOptions = true
 
 		// Similar to goenv_unix but extracts the environment value for
-		// GODEBUGCPU directly.
+		// GODEBUG directly.
 		// TODO(moehrmann): remove when general goenvs() can be called before cpuinit()
 		n := int32(0)
 		for argv_index(argv, argc+1+n) != nil {
@@ -500,7 +498,7 @@ func cpuinit() {
 			p := argv_index(argv, argc+1+i)
 			s := *(*string)(unsafe.Pointer(&stringStruct{unsafe.Pointer(p), findnull(p)}))
 
-			if hasprefix(s, prefix) {
+			if hasPrefix(s, prefix) {
 				env = gostring(p)[len(prefix):]
 				break
 			}
@@ -643,59 +641,6 @@ func ready(gp *g, traceskip int, next bool) {
 	_g_.m.locks--
 }
 
-func gcprocs() int32 {
-	// Figure out how many CPUs to use during GC.
-	// Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
-	lock(&sched.lock)
-	n := gomaxprocs
-	if n > ncpu {
-		n = ncpu
-	}
-	if n > _MaxGcproc {
-		n = _MaxGcproc
-	}
-	if n > sched.nmidle+1 { // one M is currently running
-		n = sched.nmidle + 1
-	}
-	unlock(&sched.lock)
-	return n
-}
-
-func needaddgcproc() bool {
-	lock(&sched.lock)
-	n := gomaxprocs
-	if n > ncpu {
-		n = ncpu
-	}
-	if n > _MaxGcproc {
-		n = _MaxGcproc
-	}
-	n -= sched.nmidle + 1 // one M is currently running
-	unlock(&sched.lock)
-	return n > 0
-}
-
-func helpgc(nproc int32) {
-	_g_ := getg()
-	lock(&sched.lock)
-	pos := 0
-	for n := int32(1); n < nproc; n++ { // one M is currently running
-		if allp[pos].mcache == _g_.m.mcache {
-			pos++
-		}
-		mp := mget()
-		if mp == nil {
-			throw("gcprocs inconsistency")
-		}
-		mp.helpgc = n
-		mp.p.set(allp[pos])
-		mp.mcache = allp[pos].mcache
-		pos++
-		notewakeup(&mp.park)
-	}
-	unlock(&sched.lock)
-}
-
 // freezeStopWait is a large value that freezetheworld sets
 // sched.stopwait to in order to request that all Gs permanently stop.
 const freezeStopWait = 0x7fffffff
@@ -1154,20 +1099,14 @@ func stopTheWorldWithSema() {
 	}
 }
 
-func mhelpgc() {
-	_g_ := getg()
-	_g_.m.helpgc = -1
-}
-
 func startTheWorldWithSema(emitTraceEvent bool) int64 {
 	_g_ := getg()
 
 	_g_.m.locks++ // disable preemption because it can be holding p in a local var
 	if netpollinited() {
-		gp := netpoll(false) // non-blocking
-		injectglist(gp)
+		list := netpoll(false) // non-blocking
+		injectglist(&list)
 	}
-	add := needaddgcproc()
 	lock(&sched.lock)
 
 	procs := gomaxprocs
@@ -1197,7 +1136,6 @@ func startTheWorldWithSema(emitTraceEvent bool) int64 {
 		} else {
 			// Start M to run P.  Do not start another M below.
 			newm(nil, p)
-			add = false
 		}
 	}
 
@@ -1214,16 +1152,6 @@ func startTheWorldWithSema(emitTraceEvent bool) int64 {
 		wakep()
 	}
 
-	if add {
-		// If GC could have used another helper proc, start one now,
-		// in the hope that it will be available next time.
-		// It would have been even better to start it before the collection,
-		// but doing so requires allocating memory, so it's tricky to
-		// coordinate. This lazy approach works out in practice:
-		// we don't mind if the first couple gc rounds don't have quite
-		// the maximum number of procs.
-		newm(mhelpgc, nil)
-	}
 	_g_.m.locks--
 
 	return startTime
@@ -1288,10 +1216,7 @@ func mstart1() {
 		fn()
 	}
 
-	if _g_.m.helpgc != 0 {
-		_g_.m.helpgc = 0
-		stopm()
-	} else if _g_.m != &m0 {
+	if _g_.m != &m0 {
 		acquirep(_g_.m.nextp.ptr())
 		_g_.m.nextp = 0
 	}
@@ -1591,7 +1516,7 @@ func allocm(_p_ *p, fn func(), allocatestack bool) (mp *m, g0Stack unsafe.Pointe
 // the following strategy: there is a stack of available m's
 // that can be stolen. Using compare-and-swap
 // to pop from the stack has ABA races, so we simulate
-// a lock by doing an exchange (via casp) to steal the stack
+// a lock by doing an exchange (via Casuintptr) to steal the stack
 // head and replace the top pointer with MLOCKED (1).
 // This serves as a simple spin lock that we can use even
 // without an m. The thread that locks the stack in this way
@@ -1925,7 +1850,7 @@ func startTemplateThread() {
 
 // templateThread is a thread in a known-good state that exists solely
 // to start new threads in known-good states when the calling thread
-// may not be a a good state.
+// may not be in a good state.
 //
 // Many programs never need this, so templateThread is started lazily
 // when we first enter a state that might lead to running on a thread
@@ -1977,21 +1902,11 @@ func stopm() {
 		throw("stopm spinning")
 	}
 
-retry:
 	lock(&sched.lock)
 	mput(_g_.m)
 	unlock(&sched.lock)
 	notesleep(&_g_.m.park)
 	noteclear(&_g_.m.park)
-	if _g_.m.helpgc != 0 {
-		// helpgc() set _g_.m.p and _g_.m.mcache, so we have a P.
-		gchelper()
-		// Undo the effects of helpgc().
-		_g_.m.helpgc = 0
-		_g_.m.mcache = nil
-		_g_.m.p = 0
-		goto retry
-	}
 	acquirep(_g_.m.nextp.ptr())
 	_g_.m.nextp = 0
 }
@@ -2279,9 +2194,9 @@ top:
 	// not set lastpoll yet), this thread will do blocking netpoll below
 	// anyway.
 	if netpollinited() && atomic.Load(&netpollWaiters) > 0 && atomic.Load64(&sched.lastpoll) != 0 {
-		if gp := netpoll(false); gp != nil { // non-blocking
-			// netpoll returns list of goroutines linked by schedlink.
-			injectglist(gp.schedlink.ptr())
+		if list := netpoll(false); !list.empty() { // non-blocking
+			gp := list.pop()
+			injectglist(&list)
 			casgstatus(gp, _Gwaiting, _Grunnable)
 			if trace.enabled {
 				traceGoUnpark(gp, 0)
@@ -2336,10 +2251,10 @@ stop:
 	}
 
 	// wasm only:
-	// Check if a goroutine is waiting for a callback from the WebAssembly host.
-	// If yes, pause the execution until a callback was triggered.
-	if pauseSchedulerUntilCallback() {
-		// A callback was triggered and caused at least one goroutine to wake up.
+	// If a callback returned and no other goroutine is awake,
+	// then pause execution until a callback was triggered.
+	if beforeIdle() {
+		// At least one goroutine got woken.
 		goto top
 	}
 
@@ -2433,29 +2348,30 @@ stop:
 		if _g_.m.spinning {
 			throw("findrunnable: netpoll with spinning")
 		}
-		gp := netpoll(true) // block until new work is available
+		list := netpoll(true) // block until new work is available
 		atomic.Store64(&sched.lastpoll, uint64(nanotime()))
-		if gp != nil {
+		if !list.empty() {
 			lock(&sched.lock)
 			_p_ = pidleget()
 			unlock(&sched.lock)
 			if _p_ != nil {
 				acquirep(_p_)
-				injectglist(gp.schedlink.ptr())
+				gp := list.pop()
+				injectglist(&list)
 				casgstatus(gp, _Gwaiting, _Grunnable)
 				if trace.enabled {
 					traceGoUnpark(gp, 0)
 				}
 				return gp, false
 			}
-			injectglist(gp)
+			injectglist(&list)
 		}
 	}
 	stopm()
 	goto top
 }
 
-// pollWork returns true if there is non-background work this P could
+// pollWork reports whether there is non-background work this P could
 // be doing. This is a fairly lightweight check to be used for
 // background work loops, like idle GC. It checks a subset of the
 // conditions checked by the actual scheduler.
@@ -2468,8 +2384,8 @@ func pollWork() bool {
 		return true
 	}
 	if netpollinited() && atomic.Load(&netpollWaiters) > 0 && sched.lastpoll != 0 {
-		if gp := netpoll(false); gp != nil {
-			injectglist(gp)
+		if list := netpoll(false); !list.empty() {
+			injectglist(&list)
 			return true
 		}
 	}
@@ -2494,22 +2410,21 @@ func resetspinning() {
 	}
 }
 
-// Injects the list of runnable G's into the scheduler.
+// Injects the list of runnable G's into the scheduler and clears glist.
 // Can run concurrently with GC.
-func injectglist(glist *g) {
-	if glist == nil {
+func injectglist(glist *gList) {
+	if glist.empty() {
 		return
 	}
 	if trace.enabled {
-		for gp := glist; gp != nil; gp = gp.schedlink.ptr() {
+		for gp := glist.head.ptr(); gp != nil; gp = gp.schedlink.ptr() {
 			traceGoUnpark(gp, 0)
 		}
 	}
 	lock(&sched.lock)
 	var n int
-	for n = 0; glist != nil; n++ {
-		gp := glist
-		glist = gp.schedlink.ptr()
+	for n = 0; !glist.empty(); n++ {
+		gp := glist.pop()
 		casgstatus(gp, _Gwaiting, _Grunnable)
 		globrunqput(gp)
 	}
@@ -2517,6 +2432,7 @@ func injectglist(glist *g) {
 	for ; n != 0 && sched.npidle != 0; n-- {
 		startm(nil, false)
 	}
+	*glist = gList{}
 }
 
 // One round of scheduler: find a runnable goroutine and execute it.
@@ -2602,6 +2518,23 @@ top:
 		resetspinning()
 	}
 
+	if sched.disable.user && !schedEnabled(gp) {
+		// Scheduling of this goroutine is disabled. Put it on
+		// the list of pending runnable goroutines for when we
+		// re-enable user scheduling and look again.
+		lock(&sched.lock)
+		if schedEnabled(gp) {
+			// Something re-enabled scheduling while we
+			// were acquiring the lock.
+			unlock(&sched.lock)
+		} else {
+			sched.disable.runnable.pushBack(gp)
+			sched.disable.n++
+			unlock(&sched.lock)
+			goto top
+		}
+	}
+
 	if gp.lockedm != 0 {
 		// Hands off own p to the locked m,
 		// then blocks waiting for a new p.
@@ -2714,7 +2647,7 @@ func goexit0(gp *g) {
 	_g_ := getg()
 
 	casgstatus(gp, _Grunning, _Gdead)
-	if isSystemGoroutine(gp) {
+	if isSystemGoroutine(gp, false) {
 		atomic.Xadd(&sched.ngsys, -1)
 		gp.isSystemGoroutine = false
 	}
@@ -2755,7 +2688,6 @@ func goexit0(gp *g) {
 		print("invalid m->lockedInt = ", _g_.m.lockedInt, "\n")
 		throw("internal lockOSThread error")
 	}
-	_g_.m.lockedExt = 0
 	gfput(_g_.m.p.ptr(), gp)
 	if locked {
 		// The goroutine may have locked this thread because
@@ -2767,6 +2699,10 @@ func goexit0(gp *g) {
 		if GOOS != "plan9" { // See golang.org/issue/22227.
 			_g_.m.exiting = true
 			gogo(_g_.m.g0)
+		} else {
+			// Clear lockedExt on plan9 since we may end up re-using
+			// this thread.
+			_g_.m.lockedExt = 0
 		}
 	}
 	schedule()
@@ -2823,8 +2759,11 @@ func reentersyscall(pc, sp uintptr) {
 	_g_.m.syscalltick = _g_.m.p.ptr().syscalltick
 	_g_.sysblocktraced = true
 	_g_.m.mcache = nil
-	_g_.m.p.ptr().m = 0
-	atomic.Store(&_g_.m.p.ptr().status, _Psyscall)
+	pp := _g_.m.p.ptr()
+	pp.m = 0
+	_g_.m.oldp.set(pp)
+	_g_.m.p = 0
+	atomic.Store(&pp.status, _Psyscall)
 	if sched.gcwaiting != 0 {
 		systemstack(entersyscall_gcwait)
 	}
@@ -2843,7 +2782,7 @@ func entersyscall_sysmon() {
 
 func entersyscall_gcwait() {
 	_g_ := getg()
-	_p_ := _g_.m.p.ptr()
+	_p_ := _g_.m.oldp.ptr()
 
 	lock(&sched.lock)
 	if sched.stopwait > 0 && atomic.Cas(&_p_.status, _Psyscall, _Pgcstop) {
@@ -2900,8 +2839,9 @@ func exitsyscall() {
 	_g_.m.locks++ // see comment in entersyscall
 
 	_g_.waitsince = 0
-	oldp := _g_.m.p.ptr()
-	if exitsyscallfast() {
+	oldp := _g_.m.oldp.ptr()
+	_g_.m.oldp = 0
+	if exitsyscallfast(oldp) {
 		if _g_.m.mcache == nil {
 			throw("lost mcache")
 		}
@@ -2924,6 +2864,12 @@ func exitsyscall() {
 		if getg().preempt {
 			checkPreempt()
 		}
+		_g_.throwsplit = false
+
+		if sched.disable.user && !schedEnabled(_g_) {
+			// Scheduling of this goroutine is disabled.
+			Gosched()
+		}
 
 		return
 	}
@@ -2964,27 +2910,23 @@ func exitsyscall() {
 }
 
 //go:nosplit
-func exitsyscallfast() bool {
+func exitsyscallfast(oldp *p) bool {
 	_g_ := getg()
 
 	// Freezetheworld sets stopwait but does not retake P's.
 	if sched.stopwait == freezeStopWait {
-		_g_.m.mcache = nil
-		_g_.m.p = 0
 		return false
 	}
 
 	// Try to re-acquire the last P.
-	if _g_.m.p != 0 && _g_.m.p.ptr().status == _Psyscall && atomic.Cas(&_g_.m.p.ptr().status, _Psyscall, _Prunning) {
+	if oldp != nil && oldp.status == _Psyscall && atomic.Cas(&oldp.status, _Psyscall, _Pidle) {
 		// There's a cpu for us, so we can run.
+		wirep(oldp)
 		exitsyscallfast_reacquired()
 		return true
 	}
 
 	// Try to get any other idle P.
-	oldp := _g_.m.p.ptr()
-	_g_.m.mcache = nil
-	_g_.m.p = 0
 	if sched.pidle != 0 {
 		var ok bool
 		systemstack(func() {
@@ -3011,15 +2953,9 @@ func exitsyscallfast() bool {
 // has successfully reacquired the P it was running on before the
 // syscall.
 //
-// This function is allowed to have write barriers because exitsyscall
-// has acquired a P at this point.
-//
-//go:yeswritebarrierrec
 //go:nosplit
 func exitsyscallfast_reacquired() {
 	_g_ := getg()
-	_g_.m.mcache = _g_.m.p.ptr().mcache
-	_g_.m.p.ptr().m.set(_g_.m)
 	if _g_.m.syscalltick != _g_.m.p.ptr().syscalltick {
 		if trace.enabled {
 			// The p was retaken and then enter into syscall again (since _g_.m.syscalltick has changed).
@@ -3062,7 +2998,10 @@ func exitsyscall0(gp *g) {
 	dropg()
 	casgstatus(gp, _Gexitingsyscall, _Grunnable)
 	lock(&sched.lock)
-	_p_ := pidleget()
+	var _p_ *p
+	if schedEnabled(_g_) {
+		_p_ = pidleget()
+	}
 	if _p_ == nil {
 		globrunqput(gp)
 	} else if atomic.Load(&sched.sysmonwait) != 0 {
@@ -3229,11 +3168,12 @@ func newproc(fn uintptr, arg unsafe.Pointer) *g {
 
 	newg.param = arg
 	newg.gopc = getcallerpc()
+	newg.ancestors = saveAncestors(_g_)
 	newg.startpc = fn
 	if _g_.m.curg != nil {
 		newg.labels = _g_.m.curg.labels
 	}
-	if isSystemGoroutine(newg) {
+	if isSystemGoroutine(newg, false) {
 		atomic.Xadd(&sched.ngsys, +1)
 	}
 	newg.gcscanvalid = false
@@ -3342,20 +3282,17 @@ func gfput(_p_ *p, gp *g) {
 		throw("gfput: bad status (not Gdead)")
 	}
 
-	gp.schedlink.set(_p_.gfree)
-	_p_.gfree = gp
-	_p_.gfreecnt++
-	if _p_.gfreecnt >= 64 {
-		lock(&sched.gflock)
-		for _p_.gfreecnt >= 32 {
-			_p_.gfreecnt--
-			gp = _p_.gfree
-			_p_.gfree = gp.schedlink.ptr()
-			gp.schedlink.set(sched.gfree)
-			sched.gfree = gp
-			sched.ngfree++
+	_p_.gFree.push(gp)
+	_p_.gFree.n++
+	if _p_.gFree.n >= 64 {
+		lock(&sched.gFree.lock)
+		for _p_.gFree.n >= 32 {
+			_p_.gFree.n--
+			gp = _p_.gFree.pop()
+			sched.gFree.list.push(gp)
+			sched.gFree.n++
 		}
-		unlock(&sched.gflock)
+		unlock(&sched.gFree.lock)
 	}
 }
 
@@ -3363,43 +3300,39 @@ func gfput(_p_ *p, gp *g) {
 // If local list is empty, grab a batch from global list.
 func gfget(_p_ *p) *g {
 retry:
-	gp := _p_.gfree
-	if gp == nil && sched.gfree != nil {
-		lock(&sched.gflock)
-		for _p_.gfreecnt < 32 {
-			if sched.gfree != nil {
-				gp = sched.gfree
-				sched.gfree = gp.schedlink.ptr()
-			} else {
+	if _p_.gFree.empty() && !sched.gFree.list.empty() {
+		lock(&sched.gFree.lock)
+		// Move a batch of free Gs to the P.
+		for _p_.gFree.n < 32 {
+			gp := sched.gFree.list.pop()
+			if gp == nil {
 				break
 			}
-			_p_.gfreecnt++
-			sched.ngfree--
-			gp.schedlink.set(_p_.gfree)
-			_p_.gfree = gp
+			sched.gFree.n--
+			_p_.gFree.push(gp)
+			_p_.gFree.n++
 		}
-		unlock(&sched.gflock)
+		unlock(&sched.gFree.lock)
 		goto retry
 	}
-	if gp != nil {
-		_p_.gfree = gp.schedlink.ptr()
-		_p_.gfreecnt--
+	gp := _p_.gFree.pop()
+	if gp == nil {
+		return nil
 	}
+	_p_.gFree.n--
 	return gp
 }
 
 // Purge all cached G's from gfree list to the global list.
 func gfpurge(_p_ *p) {
-	lock(&sched.gflock)
-	for _p_.gfreecnt != 0 {
-		_p_.gfreecnt--
-		gp := _p_.gfree
-		_p_.gfree = gp.schedlink.ptr()
-		gp.schedlink.set(sched.gfree)
-		sched.gfree = gp
-		sched.ngfree++
+	lock(&sched.gFree.lock)
+	for !_p_.gFree.empty() {
+		gp := _p_.gFree.pop()
+		_p_.gFree.n--
+		sched.gFree.list.push(gp)
+		sched.gFree.n++
 	}
-	unlock(&sched.gflock)
+	unlock(&sched.gFree.lock)
 }
 
 // Breakpoint executes a breakpoint trap.
@@ -3512,9 +3445,9 @@ func badunlockosthread() {
 }
 
 func gcount() int32 {
-	n := int32(allglen) - sched.ngfree - int32(atomic.Load(&sched.ngsys))
+	n := int32(allglen) - sched.gFree.n - int32(atomic.Load(&sched.ngsys))
 	for _, _p_ := range allp {
-		n -= _p_.gfreecnt
+		n -= _p_.gFree.n
 	}
 
 	// All these variables can be changed concurrently, so the result can be inconsistent.
@@ -3627,7 +3560,7 @@ func sigprof(pc uintptr, gp *g, mp *m) {
 		// Account it against abstract "System" or "GC".
 		n = 2
 		stk[0] = pc
-		if mp.preemptoff != "" || mp.helpgc != 0 {
+		if mp.preemptoff != "" {
 			stk[1] = _GCPC + sys.PCQuantum
 		} else {
 			stk[1] = _SystemPC + sys.PCQuantum
@@ -3854,6 +3787,7 @@ func procresize(nprocs int32) *p {
 	if _g_.m.p != 0 && _g_.m.p.ptr().id < nprocs {
 		// continue to use the current P
 		_g_.m.p.ptr().status = _Prunning
+		_g_.m.p.ptr().mcache.prepareForSweep()
 	} else {
 		// release the current P and acquire allp[0]
 		if _g_.m.p != 0 {
@@ -3898,36 +3832,40 @@ func procresize(nprocs int32) *p {
 //go:yeswritebarrierrec
 func acquirep(_p_ *p) {
 	// Do the part that isn't allowed to have write barriers.
-	acquirep1(_p_)
+	wirep(_p_)
 
-	// have p; write barriers now allowed
-	_g_ := getg()
-	_g_.m.mcache = _p_.mcache
+	// Have p; write barriers now allowed.
+
+	// Perform deferred mcache flush before this P can allocate
+	// from a potentially stale mcache.
+	_p_.mcache.prepareForSweep()
 
 	if trace.enabled {
 		traceProcStart()
 	}
 }
 
-// acquirep1 is the first step of acquirep, which actually acquires
-// _p_. This is broken out so we can disallow write barriers for this
-// part, since we don't yet have a P.
+// wirep is the first step of acquirep, which actually associates the
+// current M to _p_. This is broken out so we can disallow write
+// barriers for this part, since we don't yet have a P.
 //
 //go:nowritebarrierrec
-func acquirep1(_p_ *p) {
+//go:nosplit
+func wirep(_p_ *p) {
 	_g_ := getg()
 
 	if _g_.m.p != 0 || _g_.m.mcache != nil {
-		throw("acquirep: already in go")
+		throw("wirep: already in go")
 	}
 	if _p_.m != 0 || _p_.status != _Pidle {
 		id := int64(0)
 		if _p_.m != 0 {
 			id = _p_.m.ptr().id
 		}
-		print("acquirep: p->m=", _p_.m, "(", id, ") p->status=", _p_.status, "\n")
-		throw("acquirep: invalid p state")
+		print("wirep: p->m=", _p_.m, "(", id, ") p->status=", _p_.status, "\n")
+		throw("wirep: invalid p state")
 	}
+	_g_.m.mcache = _p_.mcache
 	_g_.m.p.set(_p_)
 	_p_.m.set(_g_.m)
 	_p_.status = _Prunning
@@ -4005,7 +3943,7 @@ func checkdead() {
 	lock(&allglock)
 	for i := 0; i < len(allgs); i++ {
 		gp := allgs[i]
-		if isSystemGoroutine(gp) {
+		if isSystemGoroutine(gp, false) {
 			continue
 		}
 		s := readgstatus(gp)
@@ -4134,8 +4072,8 @@ func sysmon() {
 		now := nanotime()
 		if netpollinited() && lastpoll != 0 && lastpoll+10*1000*1000 < now {
 			atomic.Cas64(&sched.lastpoll, uint64(lastpoll), uint64(now))
-			gp := netpoll(false) // non-blocking - returns list of goroutines
-			if gp != nil {
+			list := netpoll(false) // non-blocking - returns list of goroutines
+			if !list.empty() {
 				// Need to decrement number of idle locked M's
 				// (pretending that one more is running) before injectglist.
 				// Otherwise it can lead to the following situation:
@@ -4144,7 +4082,7 @@ func sysmon() {
 				// observes that there is no work to do and no other running M's
 				// and reports deadlock.
 				incidlelocked(-1)
-				injectglist(gp)
+				injectglist(&list)
 				incidlelocked(1)
 			}
 		}
@@ -4159,8 +4097,9 @@ func sysmon() {
 		if t := (gcTrigger{kind: gcTriggerTime, now: now}); t.test() && atomic.Load(&forcegc.idle) != 0 {
 			lock(&forcegc.lock)
 			forcegc.idle = 0
-			forcegc.g.schedlink = 0
-			injectglist(forcegc.g)
+			var list gList
+			list.push(forcegc.g)
+			injectglist(&list)
 			unlock(&forcegc.lock)
 		}
 		// scavenge heap once in a while
@@ -4337,7 +4276,7 @@ func schedtrace(detailed bool) {
 			if mp != nil {
 				id = mp.id
 			}
-			print("  P", i, ": status=", _p_.status, " schedtick=", _p_.schedtick, " syscalltick=", _p_.syscalltick, " m=", id, " runqsize=", t-h, " gfreecnt=", _p_.gfreecnt, "\n")
+			print("  P", i, ": status=", _p_.status, " schedtick=", _p_.schedtick, " syscalltick=", _p_.syscalltick, " m=", id, " runqsize=", t-h, " gfreecnt=", _p_.gFree.n, "\n")
 		} else {
 			// In non-detailed mode format lengths of per-P run queues as:
 			// [len1 len2 len3 len4]
@@ -4373,7 +4312,7 @@ func schedtrace(detailed bool) {
 		if lockedg != nil {
 			id3 = lockedg.goid
 		}
-		print("  M", mp.id, ": p=", id1, " curg=", id2, " mallocing=", mp.mallocing, " throwing=", mp.throwing, " preemptoff=", mp.preemptoff, ""+" locks=", mp.locks, " dying=", mp.dying, " helpgc=", mp.helpgc, " spinning=", mp.spinning, " blocked=", mp.blocked, " lockedg=", id3, "\n")
+		print("  M", mp.id, ": p=", id1, " curg=", id2, " mallocing=", mp.mallocing, " throwing=", mp.throwing, " preemptoff=", mp.preemptoff, ""+" locks=", mp.locks, " dying=", mp.dying, " spinning=", mp.spinning, " blocked=", mp.blocked, " lockedg=", id3, "\n")
 	}
 
 	lock(&allglock)
@@ -4395,6 +4334,40 @@ func schedtrace(detailed bool) {
 	unlock(&sched.lock)
 }
 
+// schedEnableUser enables or disables the scheduling of user
+// goroutines.
+//
+// This does not stop already running user goroutines, so the caller
+// should first stop the world when disabling user goroutines.
+func schedEnableUser(enable bool) {
+	lock(&sched.lock)
+	if sched.disable.user == !enable {
+		unlock(&sched.lock)
+		return
+	}
+	sched.disable.user = !enable
+	if enable {
+		n := sched.disable.n
+		sched.disable.n = 0
+		globrunqputbatch(&sched.disable.runnable, n)
+		unlock(&sched.lock)
+		for ; n != 0 && sched.npidle != 0; n-- {
+			startm(nil, false)
+		}
+	} else {
+		unlock(&sched.lock)
+	}
+}
+
+// schedEnabled reports whether gp should be scheduled. It returns
+// false is scheduling of gp is disabled.
+func schedEnabled(gp *g) bool {
+	if sched.disable.user {
+		return isSystemGoroutine(gp, true)
+	}
+	return true
+}
+
 // Put mp on midle list.
 // Sched must be locked.
 // May run during STW, so write barriers are not allowed.
@@ -4424,13 +4397,7 @@ func mget() *m {
 // May run during STW, so write barriers are not allowed.
 //go:nowritebarrierrec
 func globrunqput(gp *g) {
-	gp.schedlink = 0
-	if sched.runqtail != 0 {
-		sched.runqtail.ptr().schedlink.set(gp)
-	} else {
-		sched.runqhead.set(gp)
-	}
-	sched.runqtail.set(gp)
+	sched.runq.pushBack(gp)
 	sched.runqsize++
 }
 
@@ -4439,25 +4406,17 @@ func globrunqput(gp *g) {
 // May run during STW, so write barriers are not allowed.
 //go:nowritebarrierrec
 func globrunqputhead(gp *g) {
-	gp.schedlink = sched.runqhead
-	sched.runqhead.set(gp)
-	if sched.runqtail == 0 {
-		sched.runqtail.set(gp)
-	}
+	sched.runq.push(gp)
 	sched.runqsize++
 }
 
 // Put a batch of runnable goroutines on the global runnable queue.
+// This clears *batch.
 // Sched must be locked.
-func globrunqputbatch(ghead *g, gtail *g, n int32) {
-	gtail.schedlink = 0
-	if sched.runqtail != 0 {
-		sched.runqtail.ptr().schedlink.set(ghead)
-	} else {
-		sched.runqhead.set(ghead)
-	}
-	sched.runqtail.set(gtail)
+func globrunqputbatch(batch *gQueue, n int32) {
+	sched.runq.pushBackAll(*batch)
 	sched.runqsize += n
+	*batch = gQueue{}
 }
 
 // Try get a batch of G's from the global runnable queue.
@@ -4479,16 +4438,11 @@ func globrunqget(_p_ *p, max int32) *g {
 	}
 
 	sched.runqsize -= n
-	if sched.runqsize == 0 {
-		sched.runqtail = 0
-	}
 
-	gp := sched.runqhead.ptr()
-	sched.runqhead = gp.schedlink
+	gp := sched.runq.pop()
 	n--
 	for ; n > 0; n-- {
-		gp1 := sched.runqhead.ptr()
-		sched.runqhead = gp1.schedlink
+		gp1 := sched.runq.pop()
 		runqput(_p_, gp1, false)
 	}
 	return gp
@@ -4520,7 +4474,7 @@ func pidleget() *p {
 	return _p_
 }
 
-// runqempty returns true if _p_ has no Gs on its local run queue.
+// runqempty reports whether _p_ has no Gs on its local run queue.
 // It never returns true spuriously.
 func runqempty(_p_ *p) bool {
 	// Defend against a race where 1) _p_ has G1 in runqnext but runqhead == runqtail,
@@ -4572,11 +4526,11 @@ func runqput(_p_ *p, gp *g, next bool) {
 	}
 
 retry:
-	h := atomic.Load(&_p_.runqhead) // load-acquire, synchronize with consumers
+	h := atomic.LoadAcq(&_p_.runqhead) // load-acquire, synchronize with consumers
 	t := _p_.runqtail
 	if t-h < uint32(len(_p_.runq)) {
 		_p_.runq[t%uint32(len(_p_.runq))].set(gp)
-		atomic.Store(&_p_.runqtail, t+1) // store-release, makes the item available for consumption
+		atomic.StoreRel(&_p_.runqtail, t+1) // store-release, makes the item available for consumption
 		return
 	}
 	if runqputslow(_p_, gp, h, t) {
@@ -4600,7 +4554,7 @@ func runqputslow(_p_ *p, gp *g, h, t uint32) bool {
 	for i := uint32(0); i < n; i++ {
 		batch[i] = _p_.runq[(h+i)%uint32(len(_p_.runq))].ptr()
 	}
-	if !atomic.Cas(&_p_.runqhead, h, h+n) { // cas-release, commits consume
+	if !atomic.CasRel(&_p_.runqhead, h, h+n) { // cas-release, commits consume
 		return false
 	}
 	batch[n] = gp
@@ -4616,10 +4570,13 @@ func runqputslow(_p_ *p, gp *g, h, t uint32) bool {
 	for i := uint32(0); i < n; i++ {
 		batch[i].schedlink.set(batch[i+1])
 	}
+	var q gQueue
+	q.head.set(batch[0])
+	q.tail.set(batch[n])
 
 	// Now put the batch on global queue.
 	lock(&sched.lock)
-	globrunqputbatch(batch[0], batch[n], int32(n+1))
+	globrunqputbatch(&q, int32(n+1))
 	unlock(&sched.lock)
 	return true
 }
@@ -4641,13 +4598,13 @@ func runqget(_p_ *p) (gp *g, inheritTime bool) {
 	}
 
 	for {
-		h := atomic.Load(&_p_.runqhead) // load-acquire, synchronize with other consumers
+		h := atomic.LoadAcq(&_p_.runqhead) // load-acquire, synchronize with other consumers
 		t := _p_.runqtail
 		if t == h {
 			return nil, false
 		}
 		gp := _p_.runq[h%uint32(len(_p_.runq))].ptr()
-		if atomic.Cas(&_p_.runqhead, h, h+1) { // cas-release, commits consume
+		if atomic.CasRel(&_p_.runqhead, h, h+1) { // cas-release, commits consume
 			return gp, false
 		}
 	}
@@ -4659,8 +4616,8 @@ func runqget(_p_ *p) (gp *g, inheritTime bool) {
 // Can be executed by any P.
 func runqgrab(_p_ *p, batch *[256]guintptr, batchHead uint32, stealRunNextG bool) uint32 {
 	for {
-		h := atomic.Load(&_p_.runqhead) // load-acquire, synchronize with other consumers
-		t := atomic.Load(&_p_.runqtail) // load-acquire, synchronize with the producer
+		h := atomic.LoadAcq(&_p_.runqhead) // load-acquire, synchronize with other consumers
+		t := atomic.LoadAcq(&_p_.runqtail) // load-acquire, synchronize with the producer
 		n := t - h
 		n = n - n/2
 		if n == 0 {
@@ -4703,7 +4660,7 @@ func runqgrab(_p_ *p, batch *[256]guintptr, batchHead uint32, stealRunNextG bool
 			g := _p_.runq[(h+i)%uint32(len(_p_.runq))]
 			batch[(batchHead+i)%uint32(len(batch))] = g
 		}
-		if atomic.Cas(&_p_.runqhead, h, h+n) { // cas-release, commits consume
+		if atomic.CasRel(&_p_.runqhead, h, h+n) { // cas-release, commits consume
 			return n
 		}
 	}
@@ -4723,11 +4680,112 @@ func runqsteal(_p_, p2 *p, stealRunNextG bool) *g {
 	if n == 0 {
 		return gp
 	}
-	h := atomic.Load(&_p_.runqhead) // load-acquire, synchronize with consumers
+	h := atomic.LoadAcq(&_p_.runqhead) // load-acquire, synchronize with consumers
 	if t-h+n >= uint32(len(_p_.runq)) {
 		throw("runqsteal: runq overflow")
 	}
-	atomic.Store(&_p_.runqtail, t+n) // store-release, makes the item available for consumption
+	atomic.StoreRel(&_p_.runqtail, t+n) // store-release, makes the item available for consumption
+	return gp
+}
+
+// A gQueue is a dequeue of Gs linked through g.schedlink. A G can only
+// be on one gQueue or gList at a time.
+type gQueue struct {
+	head guintptr
+	tail guintptr
+}
+
+// empty reports whether q is empty.
+func (q *gQueue) empty() bool {
+	return q.head == 0
+}
+
+// push adds gp to the head of q.
+func (q *gQueue) push(gp *g) {
+	gp.schedlink = q.head
+	q.head.set(gp)
+	if q.tail == 0 {
+		q.tail.set(gp)
+	}
+}
+
+// pushBack adds gp to the tail of q.
+func (q *gQueue) pushBack(gp *g) {
+	gp.schedlink = 0
+	if q.tail != 0 {
+		q.tail.ptr().schedlink.set(gp)
+	} else {
+		q.head.set(gp)
+	}
+	q.tail.set(gp)
+}
+
+// pushBackAll adds all Gs in l2 to the tail of q. After this q2 must
+// not be used.
+func (q *gQueue) pushBackAll(q2 gQueue) {
+	if q2.tail == 0 {
+		return
+	}
+	q2.tail.ptr().schedlink = 0
+	if q.tail != 0 {
+		q.tail.ptr().schedlink = q2.head
+	} else {
+		q.head = q2.head
+	}
+	q.tail = q2.tail
+}
+
+// pop removes and returns the head of queue q. It returns nil if
+// q is empty.
+func (q *gQueue) pop() *g {
+	gp := q.head.ptr()
+	if gp != nil {
+		q.head = gp.schedlink
+		if q.head == 0 {
+			q.tail = 0
+		}
+	}
+	return gp
+}
+
+// popList takes all Gs in q and returns them as a gList.
+func (q *gQueue) popList() gList {
+	stack := gList{q.head}
+	*q = gQueue{}
+	return stack
+}
+
+// A gList is a list of Gs linked through g.schedlink. A G can only be
+// on one gQueue or gList at a time.
+type gList struct {
+	head guintptr
+}
+
+// empty reports whether l is empty.
+func (l *gList) empty() bool {
+	return l.head == 0
+}
+
+// push adds gp to the head of l.
+func (l *gList) push(gp *g) {
+	gp.schedlink = l.head
+	l.head.set(gp)
+}
+
+// pushAll prepends all Gs in q to l.
+func (l *gList) pushAll(q gQueue) {
+	if !q.empty() {
+		q.tail.ptr().schedlink = l.head
+		l.head = q.head
+	}
+}
+
+// pop removes and returns the head of l. If l is empty, it returns nil.
+func (l *gList) pop() *g {
+	gp := l.head.ptr()
+	if gp != nil {
+		l.head = gp.schedlink
+	}
 	return gp
 }
 
-- 
cgit v1.1


From 6065f1c5889159c9fa1b6c74726d04fc9abea9fe Mon Sep 17 00:00:00 2001
From: Ian Lance Taylor <ian@gcc.gnu.org>
Date: Tue, 29 Jan 2019 15:31:10 +0000
Subject: runtime: fix sigprof frame counting

    If sigtramp and sigtrampgo are both on stack, n -= framesToDiscard
    is executed twice, which should actually run only once.

    Reviewed-on: https://go-review.googlesource.com/c/159238

From-SVN: r268366
---
 libgo/go/runtime/proc.go | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'libgo/go/runtime/proc.go')

diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go
index 05dd53d..b4fa88f 100644
--- a/libgo/go/runtime/proc.go
+++ b/libgo/go/runtime/proc.go
@@ -3542,14 +3542,13 @@ func sigprof(pc uintptr, gp *g, mp *m) {
 		for i := 0; i < n; i++ {
 			if stklocs[i].function == "runtime.sigtrampgo" && i+2 < n {
 				framesToDiscard = i + 2
-				n -= framesToDiscard
 			}
 			if stklocs[i].function == "runtime.sigtramp" && i+2 < n {
 				framesToDiscard = i + 2
-				n -= framesToDiscard
 				break
 			}
 		}
+		n -= framesToDiscard
 		for i := 0; i < n; i++ {
 			stk[i] = stklocs[i+framesToDiscard].pc
 		}
-- 
cgit v1.1


From 8a9f2a6bbd6bdf164ca987edac34ac72447881a5 Mon Sep 17 00:00:00 2001
From: Ian Lance Taylor <ian@gcc.gnu.org>
Date: Fri, 15 Feb 2019 01:57:51 +0000
Subject: compiler, runtime: harmonize types referenced by both C and Go

    Compiling with LTO revealed a number of cases in the runtime and
    standard library where C and Go disagreed about the type of an object or
    function (or where Go and code generated by the compiler disagreed). In
    all cases the underlying representation was the same (e.g., uintptr vs.
    void*), so this wasn't causing actual problems, but it did result in a
    number of annoying warnings when compiling with LTO.

    Reviewed-on: https://go-review.googlesource.com/c/160700

From-SVN: r268923
---
 libgo/go/runtime/proc.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'libgo/go/runtime/proc.go')

diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go
index b4fa88f..e937563 100644
--- a/libgo/go/runtime/proc.go
+++ b/libgo/go/runtime/proc.go
@@ -153,7 +153,7 @@ var runtimeInitTime int64
 var initSigmask sigset
 
 // The main goroutine.
-func main() {
+func main(unsafe.Pointer) {
 	g := getg()
 
 	// Max stack size is 1 GB on 64-bit, 250 MB on 32-bit.
-- 
cgit v1.1


From 6bd37418a378bec4916ad70403375af00df91938 Mon Sep 17 00:00:00 2001
From: Ian Lance Taylor <ian@gcc.gnu.org>
Date: Tue, 19 Feb 2019 15:32:34 +0000
Subject: runtime: abort stack scan in cases that we cannot unwind the stack

    In signal-triggered stack scan, if the signal is delivered at
    certain bad time (e.g. in vdso, or in the middle of setcontext?),
    the unwinder may not be able to unwind the whole stack, while it
    still reports _URC_END_OF_STACK. So we cannot rely on _URC_END_OF_STACK
    to tell if it successfully scanned the stack. Instead, we check
    the last Go frame to see it actually reached the end of the stack.
    For Go-created stack, this is runtime.kickoff. For C-created
    stack, we need to record the outermost Go frame when it enters
    the Go side.

    Also we cannot unwind the stack if the signal is delivered in the
    middle of runtime.gogo, halfway through a goroutine switch, where
    the g and the stack don't match. Give up in this case as well.

    Reviewed-on: https://go-review.googlesource.com/c/159098

From-SVN: r269018
---
 libgo/go/runtime/proc.go | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'libgo/go/runtime/proc.go')

diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go
index e937563..1c944d6 100644
--- a/libgo/go/runtime/proc.go
+++ b/libgo/go/runtime/proc.go
@@ -1192,6 +1192,9 @@ func kickoff() {
 		gp.param = nil
 	}
 
+	// Record the entry SP to help stack scan.
+	gp.entrysp = getsp()
+
 	fv(param)
 	goexit1()
 }
-- 
cgit v1.1


From cba8a572c208078c1c6eb9845b54f960526c53c0 Mon Sep 17 00:00:00 2001
From: Ian Lance Taylor <ian@gcc.gnu.org>
Date: Wed, 27 Feb 2019 22:35:10 +0000
Subject: re PR go/89172 (FAIL: runtime/pprof)

	PR go/89172
    internal/cpu, runtime, runtime/pprof: handle function descriptors

    When using PPC64 ELF ABI v1 a function address is not a PC, but is the
    address of a function descriptor.  The first field in the function
    descriptor is the actual PC (see
    http://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi.html#FUNC-DES).
    The libbacktrace library knows about this, and libgo uses actual PC
    values consistently except for the helper function funcPC that appears
    in both runtime and runtime/pprof.

    This patch fixes funcPC by recording, in the internal/cpu package,
    whether function descriptors are being used.  We have to check for
    function descriptors using a C compiler check, because GCC can be
    configured using --with-abi to select the ELF ABI to use.

    Fixes https://gcc.gnu.org/PR89172

    Reviewed-on: https://go-review.googlesource.com/c/162978

From-SVN: r269266
---
 libgo/go/runtime/proc.go | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'libgo/go/runtime/proc.go')

diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go
index 1c944d6..0e6c9e1 100644
--- a/libgo/go/runtime/proc.go
+++ b/libgo/go/runtime/proc.go
@@ -446,7 +446,14 @@ func releaseSudog(s *sudog) {
 //go:nosplit
 func funcPC(f interface{}) uintptr {
 	i := (*iface)(unsafe.Pointer(&f))
-	return **(**uintptr)(i.data)
+	r := **(**uintptr)(i.data)
+	if cpu.FunctionDescriptors {
+		// With PPC64 ELF ABI v1 function descriptors the
+		// function address is a pointer to a struct whose
+		// first field is the actual PC.
+		r = *(*uintptr)(unsafe.Pointer(r))
+	}
+	return r
 }
 
 func lockedOSThread() bool {
-- 
cgit v1.1