aboutsummaryrefslogtreecommitdiff
path: root/libgo/go/runtime/proc.go
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/go/runtime/proc.go')
-rw-r--r--libgo/go/runtime/proc.go794
1 files changed, 484 insertions, 310 deletions
diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go
index 037e779..0534290 100644
--- a/libgo/go/runtime/proc.go
+++ b/libgo/go/runtime/proc.go
@@ -68,8 +68,6 @@ func main_init()
//extern main.main
func main_main()
-var buildVersion = sys.TheVersion
-
// set using cmd/go/internal/modload.ModInfoProg
var modinfo string
@@ -108,33 +106,64 @@ var modinfo string
// any work to do.
//
// The current approach:
-// We unpark an additional thread when we ready a goroutine if (1) there is an
-// idle P and there are no "spinning" worker threads. A worker thread is considered
-// spinning if it is out of local work and did not find work in global run queue/
-// netpoller; the spinning state is denoted in m.spinning and in sched.nmspinning.
-// Threads unparked this way are also considered spinning; we don't do goroutine
-// handoff so such threads are out of work initially. Spinning threads do some
-// spinning looking for work in per-P run queues before parking. If a spinning
+//
+// This approach applies to three primary sources of potential work: readying a
+// goroutine, new/modified-earlier timers, and idle-priority GC. See below for
+// additional details.
+//
+// We unpark an additional thread when we submit work if (this is wakep()):
+// 1. There is an idle P, and
+// 2. There are no "spinning" worker threads.
+//
+// A worker thread is considered spinning if it is out of local work and did
+// not find work in the global run queue or netpoller; the spinning state is
+// denoted in m.spinning and in sched.nmspinning. Threads unparked this way are
+// also considered spinning; we don't do goroutine handoff so such threads are
+// out of work initially. Spinning threads spin on looking for work in per-P
+// run queues and timer heaps or from the GC before parking. If a spinning
// thread finds work it takes itself out of the spinning state and proceeds to
-// execution. If it does not find work it takes itself out of the spinning state
-// and then parks.
-// If there is at least one spinning thread (sched.nmspinning>1), we don't unpark
-// new threads when readying goroutines. To compensate for that, if the last spinning
-// thread finds work and stops spinning, it must unpark a new spinning thread.
-// This approach smooths out unjustified spikes of thread unparking,
-// but at the same time guarantees eventual maximal CPU parallelism utilization.
+// execution. If it does not find work it takes itself out of the spinning
+// state and then parks.
+//
+// If there is at least one spinning thread (sched.nmspinning>1), we don't
+// unpark new threads when submitting work. To compensate for that, if the last
+// spinning thread finds work and stops spinning, it must unpark a new spinning
+// thread. This approach smooths out unjustified spikes of thread unparking,
+// but at the same time guarantees eventual maximal CPU parallelism
+// utilization.
+//
+// The main implementation complication is that we need to be very careful
+// during spinning->non-spinning thread transition. This transition can race
+// with submission of new work, and either one part or another needs to unpark
+// another worker thread. If they both fail to do that, we can end up with
+// semi-persistent CPU underutilization.
//
-// The main implementation complication is that we need to be very careful during
-// spinning->non-spinning thread transition. This transition can race with submission
-// of a new goroutine, and either one part or another needs to unpark another worker
-// thread. If they both fail to do that, we can end up with semi-persistent CPU
-// underutilization. The general pattern for goroutine readying is: submit a goroutine
-// to local work queue, #StoreLoad-style memory barrier, check sched.nmspinning.
-// The general pattern for spinning->non-spinning transition is: decrement nmspinning,
-// #StoreLoad-style memory barrier, check all per-P work queues for new work.
-// Note that all this complexity does not apply to global run queue as we are not
-// sloppy about thread unparking when submitting to global queue. Also see comments
-// for nmspinning manipulation.
+// The general pattern for submission is:
+// 1. Submit work to the local run queue, timer heap, or GC state.
+// 2. #StoreLoad-style memory barrier.
+// 3. Check sched.nmspinning.
+//
+// The general pattern for spinning->non-spinning transition is:
+// 1. Decrement nmspinning.
+// 2. #StoreLoad-style memory barrier.
+// 3. Check all per-P work queues and GC for new work.
+//
+// Note that all this complexity does not apply to global run queue as we are
+// not sloppy about thread unparking when submitting to global queue. Also see
+// comments for nmspinning manipulation.
+//
+// How these different sources of work behave varies, though it doesn't affect
+// the synchronization approach:
+// * Ready goroutine: this is an obvious source of work; the goroutine is
+// immediately ready and must run on some thread eventually.
+// * New/modified-earlier timer: The current timer implementation (see time.go)
+// uses netpoll in a thread with no work available to wait for the soonest
+// timer. If there is no thread waiting, we want a new spinning thread to go
+// wait.
+// * Idle-priority GC: The GC wakes a stopped idle thread to contribute to
+// background GC work (note: currently disabled per golang.org/issue/19112).
+// Also see golang.org/issue/44313, as this should be extended to all GC
+// workers.
var (
m0 m
@@ -514,8 +543,8 @@ var (
allglock mutex
allgs []*g
- // allglen and allgptr are atomic variables that contain len(allg) and
- // &allg[0] respectively. Proper ordering depends on totally-ordered
+ // allglen and allgptr are atomic variables that contain len(allgs) and
+ // &allgs[0] respectively. Proper ordering depends on totally-ordered
// loads and stores. Writes are protected by allglock.
//
// allgptr is updated before allglen. Readers should read allglen
@@ -556,6 +585,30 @@ func atomicAllGIndex(ptr **g, i uintptr) *g {
return *(**g)(add(unsafe.Pointer(ptr), i*sys.PtrSize))
}
+// forEachG calls fn on every G from allgs.
+//
+// forEachG takes a lock to exclude concurrent addition of new Gs.
+func forEachG(fn func(gp *g)) {
+ lock(&allglock)
+ for _, gp := range allgs {
+ fn(gp)
+ }
+ unlock(&allglock)
+}
+
+// forEachGRace calls fn on every G from allgs.
+//
+// forEachGRace avoids locking, but does not exclude addition of new Gs during
+// execution, which may be missed.
+func forEachGRace(fn func(gp *g)) {
+ ptr, length := atomicAllG()
+ for i := uintptr(0); i < length; i++ {
+ gp := atomicAllGIndex(ptr, i)
+ fn(gp)
+ }
+ return
+}
+
const (
// Number of goroutine ids to grab from sched.goidgen to local per-P cache at once.
// 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
@@ -649,6 +702,11 @@ func schedinit() {
sigsave(&_g_.m.sigmask)
initSigmask = _g_.m.sigmask
+ if offset := unsafe.Offsetof(sched.timeToRun); offset%8 != 0 {
+ println(offset)
+ throw("sched.timeToRun not aligned to 8 bytes")
+ }
+
goargs()
goenvs()
parsedebugvars()
@@ -925,6 +983,37 @@ func casgstatus(gp *g, oldval, newval uint32) {
nextYield = nanotime() + yieldDelay/2
}
}
+
+ // Handle tracking for scheduling latencies.
+ if oldval == _Grunning {
+ // Track every 8th time a goroutine transitions out of running.
+ if gp.trackingSeq%gTrackingPeriod == 0 {
+ gp.tracking = true
+ }
+ gp.trackingSeq++
+ }
+ if gp.tracking {
+ now := nanotime()
+ if oldval == _Grunnable {
+ // We transitioned out of runnable, so measure how much
+ // time we spent in this state and add it to
+ // runnableTime.
+ gp.runnableTime += now - gp.runnableStamp
+ gp.runnableStamp = 0
+ }
+ if newval == _Grunnable {
+ // We just transitioned into runnable, so record what
+ // time that happened.
+ gp.runnableStamp = now
+ } else if newval == _Grunning {
+ // We're transitioning into running, so turn off
+ // tracking and record how much time we spent in
+ // runnable.
+ gp.tracking = false
+ sched.timeToRun.record(gp.runnableTime)
+ gp.runnableTime = 0
+ }
+ }
}
// casGToPreemptScan transitions gp from _Grunning to _Gscan|_Gpreempted.
@@ -1235,6 +1324,9 @@ func kickoff() {
goexit1()
}
+// The go:noinline is to guarantee the getcallerpc/getcallersp below are safe,
+// so that we can set up g0.sched to return to the call of mstart1 above.
+//go:noinline
func mstart1() {
_g_ := getg()
@@ -1370,6 +1462,8 @@ found:
}
unlock(&sched.lock)
+ atomic.Xadd64(&ncgocall, int64(m.ncgocall))
+
// Release the P.
handoffp(releasep())
// After this point we must not have write barriers.
@@ -1661,6 +1755,10 @@ func needm() {
// Store the original signal mask for use by minit.
mp.sigmask = sigmask
+ // Install TLS on some platforms (previously setg
+ // would do this if necessary).
+ osSetupTLS(mp)
+
// Install g (= m->curg).
setg(mp.curg)
@@ -1822,7 +1920,7 @@ func lockextra(nilokay bool) *m {
for {
old := atomic.Loaduintptr(&extram)
if old == locked {
- osyield()
+ osyield_no_g()
continue
}
if old == 0 && !nilokay {
@@ -1833,13 +1931,13 @@ func lockextra(nilokay bool) *m {
atomic.Xadd(&extraMWaiters, 1)
incr = true
}
- usleep(1)
+ usleep_no_g(1)
continue
}
if atomic.Casuintptr(&extram, old, locked) {
return (*m)(unsafe.Pointer(old))
}
- osyield()
+ osyield_no_g()
continue
}
}
@@ -2417,85 +2515,40 @@ top:
}
}
- // Steal work from other P's.
+ // Spinning Ms: steal work from other Ps.
+ //
+ // Limit the number of spinning Ms to half the number of busy Ps.
+ // This is necessary to prevent excessive CPU consumption when
+ // GOMAXPROCS>>1 but the program parallelism is low.
procs := uint32(gomaxprocs)
- ranTimer := false
- // If number of spinning M's >= number of busy P's, block.
- // This is necessary to prevent excessive CPU consumption
- // when GOMAXPROCS>>1 but the program parallelism is low.
- if !_g_.m.spinning && 2*atomic.Load(&sched.nmspinning) >= procs-atomic.Load(&sched.npidle) {
- goto stop
- }
- if !_g_.m.spinning {
- _g_.m.spinning = true
- atomic.Xadd(&sched.nmspinning, 1)
- }
- const stealTries = 4
- for i := 0; i < stealTries; i++ {
- stealTimersOrRunNextG := i == stealTries-1
-
- for enum := stealOrder.start(fastrand()); !enum.done(); enum.next() {
- if sched.gcwaiting != 0 {
- goto top
- }
- p2 := allp[enum.position()]
- if _p_ == p2 {
- continue
- }
-
- // Steal timers from p2. This call to checkTimers is the only place
- // where we might hold a lock on a different P's timers. We do this
- // once on the last pass before checking runnext because stealing
- // from the other P's runnext should be the last resort, so if there
- // are timers to steal do that first.
- //
- // We only check timers on one of the stealing iterations because
- // the time stored in now doesn't change in this loop and checking
- // the timers for each P more than once with the same value of now
- // is probably a waste of time.
- //
- // timerpMask tells us whether the P may have timers at all. If it
- // can't, no need to check at all.
- if stealTimersOrRunNextG && timerpMask.read(enum.position()) {
- tnow, w, ran := checkTimers(p2, now)
- now = tnow
- if w != 0 && (pollUntil == 0 || w < pollUntil) {
- pollUntil = w
- }
- if ran {
- // Running the timers may have
- // made an arbitrary number of G's
- // ready and added them to this P's
- // local run queue. That invalidates
- // the assumption of runqsteal
- // that is always has room to add
- // stolen G's. So check now if there
- // is a local G to run.
- if gp, inheritTime := runqget(_p_); gp != nil {
- return gp, inheritTime
- }
- ranTimer = true
- }
- }
+ if _g_.m.spinning || 2*atomic.Load(&sched.nmspinning) < procs-atomic.Load(&sched.npidle) {
+ if !_g_.m.spinning {
+ _g_.m.spinning = true
+ atomic.Xadd(&sched.nmspinning, 1)
+ }
- // Don't bother to attempt to steal if p2 is idle.
- if !idlepMask.read(enum.position()) {
- if gp := runqsteal(_p_, p2, stealTimersOrRunNextG); gp != nil {
- return gp, false
- }
- }
+ gp, inheritTime, tnow, w, newWork := stealWork(now)
+ now = tnow
+ if gp != nil {
+ // Successfully stole.
+ return gp, inheritTime
+ }
+ if newWork {
+ // There may be new timer or GC work; restart to
+ // discover.
+ goto top
+ }
+ if w != 0 && (pollUntil == 0 || w < pollUntil) {
+ // Earlier timer to wait for.
+ pollUntil = w
}
- }
- if ranTimer {
- // Running a timer may have made some goroutine ready.
- goto top
}
-stop:
-
- // We have nothing to do. If we're in the GC mark phase, can
- // safely scan and blacken objects, and have work to do, run
- // idle-time marking rather than give up the P.
+ // We have nothing to do.
+ //
+ // If we're in the GC mark phase, can safely scan and blacken objects,
+ // and have work to do, run idle-time marking rather than give up the
+ // P.
if gcBlackenEnabled != 0 && gcMarkWorkAvailable(_p_) {
node := (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop())
if node != nil {
@@ -2509,17 +2562,11 @@ stop:
}
}
- delta := int64(-1)
- if pollUntil != 0 {
- // checkTimers ensures that polluntil > now.
- delta = pollUntil - now
- }
-
// wasm only:
// If a callback returned and no other goroutine is awake,
// then wake event handler goroutine which pauses execution
// until a callback was triggered.
- gp, otherReady := beforeIdle(delta)
+ gp, otherReady := beforeIdle(now, pollUntil)
if gp != nil {
casgstatus(gp, _Gwaiting, _Grunnable)
if trace.enabled {
@@ -2558,18 +2605,25 @@ stop:
pidleput(_p_)
unlock(&sched.lock)
- // Delicate dance: thread transitions from spinning to non-spinning state,
- // potentially concurrently with submission of new goroutines. We must
- // drop nmspinning first and then check all per-P queues again (with
- // #StoreLoad memory barrier in between). If we do it the other way around,
- // another thread can submit a goroutine after we've checked all run queues
- // but before we drop nmspinning; as a result nobody will unpark a thread
- // to run the goroutine.
+ // Delicate dance: thread transitions from spinning to non-spinning
+ // state, potentially concurrently with submission of new work. We must
+ // drop nmspinning first and then check all sources again (with
+ // #StoreLoad memory barrier in between). If we do it the other way
+ // around, another thread can submit work after we've checked all
+ // sources but before we drop nmspinning; as a result nobody will
+ // unpark a thread to run the work.
+ //
+ // This applies to the following sources of work:
+ //
+ // * Goroutines added to a per-P run queue.
+ // * New/modified-earlier timers on a per-P timer heap.
+ // * Idle-priority GC work (barring golang.org/issue/19112).
+ //
// If we discover new work below, we need to restore m.spinning as a signal
// for resetspinning to unpark a new worker thread (because there can be more
// than one starving goroutine). However, if after discovering new work
- // we also observe no idle Ps, it is OK to just park the current thread:
- // the system is fully loaded so no spinning threads are required.
+ // we also observe no idle Ps it is OK to skip unparking a new worker
+ // thread: the system is fully loaded so no spinning threads are required.
// Also see "Worker thread parking/unparking" comment at the top of the file.
wasSpinning := _g_.m.spinning
if _g_.m.spinning {
@@ -2577,97 +2631,48 @@ stop:
if int32(atomic.Xadd(&sched.nmspinning, -1)) < 0 {
throw("findrunnable: negative nmspinning")
}
- }
- // check all runqueues once again
- for id, _p_ := range allpSnapshot {
- if !idlepMaskSnapshot.read(uint32(id)) && !runqempty(_p_) {
- lock(&sched.lock)
- _p_ = pidleget()
- unlock(&sched.lock)
- if _p_ != nil {
- acquirep(_p_)
- if wasSpinning {
- _g_.m.spinning = true
- atomic.Xadd(&sched.nmspinning, 1)
- }
- goto top
- }
- break
- }
- }
-
- // Similar to above, check for timer creation or expiry concurrently with
- // transitioning from spinning to non-spinning. Note that we cannot use
- // checkTimers here because it calls adjusttimers which may need to allocate
- // memory, and that isn't allowed when we don't have an active P.
- for id, _p_ := range allpSnapshot {
- if timerpMaskSnapshot.read(uint32(id)) {
- w := nobarrierWakeTime(_p_)
- if w != 0 && (pollUntil == 0 || w < pollUntil) {
- pollUntil = w
- }
- }
- }
- if pollUntil != 0 {
- if now == 0 {
- now = nanotime()
- }
- delta = pollUntil - now
- if delta < 0 {
- delta = 0
- }
- }
+ // Note the for correctness, only the last M transitioning from
+ // spinning to non-spinning must perform these rechecks to
+ // ensure no missed work. We are performing it on every M that
+ // transitions as a conservative change to monitor effects on
+ // latency. See golang.org/issue/43997.
- // Check for idle-priority GC work again.
- //
- // N.B. Since we have no P, gcBlackenEnabled may change at any time; we
- // must check again after acquiring a P.
- if atomic.Load(&gcBlackenEnabled) != 0 && gcMarkWorkAvailable(nil) {
- // Work is available; we can start an idle GC worker only if
- // there is an available P and available worker G.
- //
- // We can attempt to acquire these in either order. Workers are
- // almost always available (see comment in findRunnableGCWorker
- // for the one case there may be none). Since we're slightly
- // less likely to find a P, check for that first.
- lock(&sched.lock)
- var node *gcBgMarkWorkerNode
- _p_ = pidleget()
+ // Check all runqueues once again.
+ _p_ = checkRunqsNoP(allpSnapshot, idlepMaskSnapshot)
if _p_ != nil {
- // Now that we own a P, gcBlackenEnabled can't change
- // (as it requires STW).
- if gcBlackenEnabled != 0 {
- node = (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop())
- if node == nil {
- pidleput(_p_)
- _p_ = nil
- }
- } else {
- pidleput(_p_)
- _p_ = nil
- }
+ acquirep(_p_)
+ _g_.m.spinning = true
+ atomic.Xadd(&sched.nmspinning, 1)
+ goto top
}
- unlock(&sched.lock)
+
+ // Check for idle-priority GC work again.
+ _p_, gp = checkIdleGCNoP()
if _p_ != nil {
acquirep(_p_)
- if wasSpinning {
- _g_.m.spinning = true
- atomic.Xadd(&sched.nmspinning, 1)
- }
+ _g_.m.spinning = true
+ atomic.Xadd(&sched.nmspinning, 1)
// Run the idle worker.
_p_.gcMarkWorkerMode = gcMarkWorkerIdleMode
- gp := node.gp.ptr()
casgstatus(gp, _Gwaiting, _Grunnable)
if trace.enabled {
traceGoUnpark(gp, 0)
}
return gp, false
}
+
+ // Finally, check for timer creation or expiry concurrently with
+ // transitioning from spinning to non-spinning.
+ //
+ // Note that we cannot use checkTimers here because it calls
+ // adjusttimers which may need to allocate memory, and that isn't
+ // allowed when we don't have an active P.
+ pollUntil = checkTimersNoP(allpSnapshot, timerpMaskSnapshot, pollUntil)
}
- // poll network
+ // Poll network until next timer.
if netpollinited() && (atomic.Load(&netpollWaiters) > 0 || pollUntil != 0) && atomic.Xchg64(&sched.lastpoll, 0) != 0 {
atomic.Store64(&sched.pollUntil, uint64(pollUntil))
if _g_.m.p != 0 {
@@ -2676,11 +2681,21 @@ stop:
if _g_.m.spinning {
throw("findrunnable: netpoll with spinning")
}
+ delay := int64(-1)
+ if pollUntil != 0 {
+ if now == 0 {
+ now = nanotime()
+ }
+ delay = pollUntil - now
+ if delay < 0 {
+ delay = 0
+ }
+ }
if faketime != 0 {
// When using fake time, just poll.
- delta = 0
+ delay = 0
}
- list := netpoll(delta) // block until new work is available
+ list := netpoll(delay) // block until new work is available
atomic.Store64(&sched.pollUntil, 0)
atomic.Store64(&sched.lastpoll, uint64(nanotime()))
if faketime != 0 && list.empty() {
@@ -2742,6 +2757,178 @@ func pollWork() bool {
return false
}
+// stealWork attempts to steal a runnable goroutine or timer from any P.
+//
+// If newWork is true, new work may have been readied.
+//
+// If now is not 0 it is the current time. stealWork returns the passed time or
+// the current time if now was passed as 0.
+func stealWork(now int64) (gp *g, inheritTime bool, rnow, pollUntil int64, newWork bool) {
+ pp := getg().m.p.ptr()
+
+ ranTimer := false
+
+ const stealTries = 4
+ for i := 0; i < stealTries; i++ {
+ stealTimersOrRunNextG := i == stealTries-1
+
+ for enum := stealOrder.start(fastrand()); !enum.done(); enum.next() {
+ if sched.gcwaiting != 0 {
+ // GC work may be available.
+ return nil, false, now, pollUntil, true
+ }
+ p2 := allp[enum.position()]
+ if pp == p2 {
+ continue
+ }
+
+ // Steal timers from p2. This call to checkTimers is the only place
+ // where we might hold a lock on a different P's timers. We do this
+ // once on the last pass before checking runnext because stealing
+ // from the other P's runnext should be the last resort, so if there
+ // are timers to steal do that first.
+ //
+ // We only check timers on one of the stealing iterations because
+ // the time stored in now doesn't change in this loop and checking
+ // the timers for each P more than once with the same value of now
+ // is probably a waste of time.
+ //
+ // timerpMask tells us whether the P may have timers at all. If it
+ // can't, no need to check at all.
+ if stealTimersOrRunNextG && timerpMask.read(enum.position()) {
+ tnow, w, ran := checkTimers(p2, now)
+ now = tnow
+ if w != 0 && (pollUntil == 0 || w < pollUntil) {
+ pollUntil = w
+ }
+ if ran {
+ // Running the timers may have
+ // made an arbitrary number of G's
+ // ready and added them to this P's
+ // local run queue. That invalidates
+ // the assumption of runqsteal
+ // that it always has room to add
+ // stolen G's. So check now if there
+ // is a local G to run.
+ if gp, inheritTime := runqget(pp); gp != nil {
+ return gp, inheritTime, now, pollUntil, ranTimer
+ }
+ ranTimer = true
+ }
+ }
+
+ // Don't bother to attempt to steal if p2 is idle.
+ if !idlepMask.read(enum.position()) {
+ if gp := runqsteal(pp, p2, stealTimersOrRunNextG); gp != nil {
+ return gp, false, now, pollUntil, ranTimer
+ }
+ }
+ }
+ }
+
+ // No goroutines found to steal. Regardless, running a timer may have
+ // made some goroutine ready that we missed. Indicate the next timer to
+ // wait for.
+ return nil, false, now, pollUntil, ranTimer
+}
+
+// Check all Ps for a runnable G to steal.
+//
+// On entry we have no P. If a G is available to steal and a P is available,
+// the P is returned which the caller should acquire and attempt to steal the
+// work to.
+func checkRunqsNoP(allpSnapshot []*p, idlepMaskSnapshot pMask) *p {
+ for id, p2 := range allpSnapshot {
+ if !idlepMaskSnapshot.read(uint32(id)) && !runqempty(p2) {
+ lock(&sched.lock)
+ pp := pidleget()
+ unlock(&sched.lock)
+ if pp != nil {
+ return pp
+ }
+
+ // Can't get a P, don't bother checking remaining Ps.
+ break
+ }
+ }
+
+ return nil
+}
+
+// Check all Ps for a timer expiring sooner than pollUntil.
+//
+// Returns updated pollUntil value.
+func checkTimersNoP(allpSnapshot []*p, timerpMaskSnapshot pMask, pollUntil int64) int64 {
+ for id, p2 := range allpSnapshot {
+ if timerpMaskSnapshot.read(uint32(id)) {
+ w := nobarrierWakeTime(p2)
+ if w != 0 && (pollUntil == 0 || w < pollUntil) {
+ pollUntil = w
+ }
+ }
+ }
+
+ return pollUntil
+}
+
+// Check for idle-priority GC, without a P on entry.
+//
+// If some GC work, a P, and a worker G are all available, the P and G will be
+// returned. The returned P has not been wired yet.
+func checkIdleGCNoP() (*p, *g) {
+ // N.B. Since we have no P, gcBlackenEnabled may change at any time; we
+ // must check again after acquiring a P.
+ if atomic.Load(&gcBlackenEnabled) == 0 {
+ return nil, nil
+ }
+ if !gcMarkWorkAvailable(nil) {
+ return nil, nil
+ }
+
+ // Work is available; we can start an idle GC worker only if there is
+ // an available P and available worker G.
+ //
+ // We can attempt to acquire these in either order, though both have
+ // synchronization concerns (see below). Workers are almost always
+ // available (see comment in findRunnableGCWorker for the one case
+ // there may be none). Since we're slightly less likely to find a P,
+ // check for that first.
+ //
+ // Synchronization: note that we must hold sched.lock until we are
+ // committed to keeping it. Otherwise we cannot put the unnecessary P
+ // back in sched.pidle without performing the full set of idle
+ // transition checks.
+ //
+ // If we were to check gcBgMarkWorkerPool first, we must somehow handle
+ // the assumption in gcControllerState.findRunnableGCWorker that an
+ // empty gcBgMarkWorkerPool is only possible if gcMarkDone is running.
+ lock(&sched.lock)
+ pp := pidleget()
+ if pp == nil {
+ unlock(&sched.lock)
+ return nil, nil
+ }
+
+ // Now that we own a P, gcBlackenEnabled can't change (as it requires
+ // STW).
+ if gcBlackenEnabled == 0 {
+ pidleput(pp)
+ unlock(&sched.lock)
+ return nil, nil
+ }
+
+ node := (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop())
+ if node == nil {
+ pidleput(pp)
+ unlock(&sched.lock)
+ return nil, nil
+ }
+
+ unlock(&sched.lock)
+
+ return pp, node.gp.ptr()
+}
+
// wakeNetPoller wakes up the thread sleeping in the network poller if it isn't
// going to wake up before the when argument; or it wakes an idle P to service
// timers and the network poller if there isn't one already.
@@ -2908,7 +3095,9 @@ top:
}
if gp == nil && gcBlackenEnabled != 0 {
gp = gcController.findRunnableGCWorker(_g_.m.p.ptr())
- tryWakeP = tryWakeP || gp != nil
+ if gp != nil {
+ tryWakeP = true
+ }
}
if gp == nil {
// Check the global runnable queue once in a while to ensure fairness.
@@ -2999,7 +3188,7 @@ func dropg() {
// checkTimers runs any timers for the P that are ready.
// If now is not 0 it is the current time.
-// It returns the current time or 0 if it is not known,
+// It returns the passed time or the current time if now was passed as 0.
// and the time when the next timer should run or 0 if there is no next timer,
// and reports whether it ran any timers.
// If the time when the next timer should run is not 0,
@@ -3149,6 +3338,7 @@ func preemptPark(gp *g) {
throw("bad g status")
}
gp.waitreason = waitReasonPreempted
+
// Transition from _Grunning to _Gscan|_Gpreempted. We can't
// be in _Grunning when we dropg because then we'd be running
// without an M, but the moment we're in _Gpreempted,
@@ -3527,20 +3717,28 @@ func exitsyscallfast_pidle() bool {
// exitsyscall slow path on g0.
// Failed to acquire P, enqueue gp as runnable.
//
+// Called via mcall, so gp is the calling g from this M.
+//
//go:nowritebarrierrec
func exitsyscall0(gp *g) {
- _g_ := getg()
-
casgstatus(gp, _Gsyscall, _Gexitingsyscall)
dropg()
casgstatus(gp, _Gexitingsyscall, _Grunnable)
lock(&sched.lock)
var _p_ *p
- if schedEnabled(_g_) {
+ if schedEnabled(gp) {
_p_ = pidleget()
}
+ var locked bool
if _p_ == nil {
globrunqput(gp)
+
+ // Below, we stoplockedm if gp is locked. globrunqput releases
+ // ownership of gp, so we must check if gp is locked prior to
+ // committing the release by unlocking sched.lock, otherwise we
+ // could race with another M transitioning gp from unlocked to
+ // locked.
+ locked = gp.lockedm != 0
} else if atomic.Load(&sched.sysmonwait) != 0 {
atomic.Store(&sched.sysmonwait, 0)
notewakeup(&sched.sysmonnote)
@@ -3550,8 +3748,11 @@ func exitsyscall0(gp *g) {
acquirep(_p_)
execute(gp, false) // Never returns.
}
- if _g_.m.lockedg != 0 {
+ if locked {
// Wait until another thread schedules gp and so m again.
+ //
+ // N.B. lockedm must be this M, as this g was running on this M
+ // before entersyscall.
stoplockedm()
execute(gp, false) // Never returns.
}
@@ -3586,7 +3787,10 @@ func syscall_exitsyscall() {
exitsyscall()
}
-func beforefork() {
+// Called from syscall package before fork.
+//go:linkname syscall_runtime_BeforeFork syscall.runtime__BeforeFork
+//go:nosplit
+func syscall_runtime_BeforeFork() {
gp := getg().m.curg
// Block signals during a fork, so that the child does not run
@@ -3597,14 +3801,10 @@ func beforefork() {
sigblock(false)
}
-// Called from syscall package before fork.
-//go:linkname syscall_runtime_BeforeFork syscall.runtime__BeforeFork
+// Called from syscall package after fork in parent.
+//go:linkname syscall_runtime_AfterFork syscall.runtime__AfterFork
//go:nosplit
-func syscall_runtime_BeforeFork() {
- systemstack(beforefork)
-}
-
-func afterfork() {
+func syscall_runtime_AfterFork() {
gp := getg().m.curg
msigrestore(gp.m.sigmask)
@@ -3612,13 +3812,6 @@ func afterfork() {
gp.m.locks--
}
-// Called from syscall package after fork in parent.
-//go:linkname syscall_runtime_AfterFork syscall.runtime__AfterFork
-//go:nosplit
-func syscall_runtime_AfterFork() {
- systemstack(afterfork)
-}
-
// inForkedChild is true while manipulating signals in the child process.
// This is used to avoid calling libc functions in case we are using vfork.
var inForkedChild bool
@@ -3734,6 +3927,11 @@ func newproc(fn uintptr, arg unsafe.Pointer) *g {
if isSystemGoroutine(newg, false) {
atomic.Xadd(&sched.ngsys, +1)
}
+ // Track initial transition?
+ newg.trackingSeq = uint8(fastrand())
+ if newg.trackingSeq%gTrackingPeriod == 0 {
+ newg.tracking = true
+ }
casgstatus(newg, _Gdead, _Grunnable)
if _p_.goidcache == _p_.goidcacheend {
@@ -3844,13 +4042,19 @@ func gfput(_p_ *p, gp *g) {
_p_.gFree.push(gp)
_p_.gFree.n++
if _p_.gFree.n >= 64 {
- lock(&sched.gFree.lock)
+ var (
+ inc int32
+ noStackQ gQueue
+ )
for _p_.gFree.n >= 32 {
- _p_.gFree.n--
gp = _p_.gFree.pop()
- sched.gFree.list.push(gp)
- sched.gFree.n++
+ _p_.gFree.n--
+ noStackQ.push(gp)
+ inc++
}
+ lock(&sched.gFree.lock)
+ sched.gFree.list.pushAll(noStackQ)
+ sched.gFree.n += inc
unlock(&sched.gFree.lock)
}
}
@@ -3884,13 +4088,19 @@ retry:
// Purge all cached G's from gfree list to the global list.
func gfpurge(_p_ *p) {
- lock(&sched.gFree.lock)
+ var (
+ inc int32
+ noStackQ gQueue
+ )
for !_p_.gFree.empty() {
gp := _p_.gFree.pop()
_p_.gFree.n--
- sched.gFree.list.push(gp)
- sched.gFree.n++
+ noStackQ.push(gp)
+ inc++
}
+ lock(&sched.gFree.lock)
+ sched.gFree.list.pushAll(noStackQ)
+ sched.gFree.n += inc
unlock(&sched.gFree.lock)
}
@@ -4061,71 +4271,6 @@ func sigprof(pc uintptr, gp *g, mp *m) {
// See golang.org/issue/17165.
getg().m.mallocing++
- // Define that a "user g" is a user-created goroutine, and a "system g"
- // is one that is m->g0 or m->gsignal.
- //
- // We might be interrupted for profiling halfway through a
- // goroutine switch. The switch involves updating three (or four) values:
- // g, PC, SP, and (on arm) LR. The PC must be the last to be updated,
- // because once it gets updated the new g is running.
- //
- // When switching from a user g to a system g, LR is not considered live,
- // so the update only affects g, SP, and PC. Since PC must be last, there
- // the possible partial transitions in ordinary execution are (1) g alone is updated,
- // (2) both g and SP are updated, and (3) SP alone is updated.
- // If SP or g alone is updated, we can detect the partial transition by checking
- // whether the SP is within g's stack bounds. (We could also require that SP
- // be changed only after g, but the stack bounds check is needed by other
- // cases, so there is no need to impose an additional requirement.)
- //
- // There is one exceptional transition to a system g, not in ordinary execution.
- // When a signal arrives, the operating system starts the signal handler running
- // with an updated PC and SP. The g is updated last, at the beginning of the
- // handler. There are two reasons this is okay. First, until g is updated the
- // g and SP do not match, so the stack bounds check detects the partial transition.
- // Second, signal handlers currently run with signals disabled, so a profiling
- // signal cannot arrive during the handler.
- //
- // When switching from a system g to a user g, there are three possibilities.
- //
- // First, it may be that the g switch has no PC update, because the SP
- // either corresponds to a user g throughout (as in asmcgocall)
- // or because it has been arranged to look like a user g frame
- // (as in cgocallback). In this case, since the entire
- // transition is a g+SP update, a partial transition updating just one of
- // those will be detected by the stack bounds check.
- //
- // Second, when returning from a signal handler, the PC and SP updates
- // are performed by the operating system in an atomic update, so the g
- // update must be done before them. The stack bounds check detects
- // the partial transition here, and (again) signal handlers run with signals
- // disabled, so a profiling signal cannot arrive then anyway.
- //
- // Third, the common case: it may be that the switch updates g, SP, and PC
- // separately. If the PC is within any of the functions that does this,
- // we don't ask for a traceback. C.F. the function setsSP for more about this.
- //
- // There is another apparently viable approach, recorded here in case
- // the "PC within setsSP function" check turns out not to be usable.
- // It would be possible to delay the update of either g or SP until immediately
- // before the PC update instruction. Then, because of the stack bounds check,
- // the only problematic interrupt point is just before that PC update instruction,
- // and the sigprof handler can detect that instruction and simulate stepping past
- // it in order to reach a consistent state. On ARM, the update of g must be made
- // in two places (in R10 and also in a TLS slot), so the delayed update would
- // need to be the SP update. The sigprof handler must read the instruction at
- // the current PC and if it was the known instruction (for example, JMP BX or
- // MOV R2, PC), use that other register in place of the PC value.
- // The biggest drawback to this solution is that it requires that we can tell
- // whether it's safe to read from the memory pointed at by PC.
- // In a correct program, we can test PC == nil and otherwise read,
- // but if a profiling signal happens at the instant that a program executes
- // a bad jump (before the program manages to handle the resulting fault)
- // the profiling handler could fault trying to read nonexistent memory.
- //
- // To recap, there are no constraints on the assembly being used for the
- // transition. We simply require that g and SP match and that the PC is not
- // in gogo.
traceback := true
// If SIGPROF arrived while already fetching runtime callers
@@ -4351,7 +4496,6 @@ func (pp *p) destroy() {
moveTimers(plocal, pp.timers)
pp.timers = nil
pp.numTimers = 0
- pp.adjustTimers = 0
pp.deletedTimers = 0
atomic.Store64(&pp.timer0When, 0)
unlock(&pp.timersLock)
@@ -4650,11 +4794,9 @@ func checkdead() {
}
grunning := 0
- lock(&allglock)
- for i := 0; i < len(allgs); i++ {
- gp := allgs[i]
+ forEachG(func(gp *g) {
if isSystemGoroutine(gp, false) {
- continue
+ return
}
s := readgstatus(gp)
switch s &^ _Gscan {
@@ -4667,8 +4809,7 @@ func checkdead() {
print("runtime: checkdead: find g ", gp.goid, " in status ", s, "\n")
throw("checkdead: runnable g")
}
- }
- unlock(&allglock)
+ })
if grunning == 0 { // possible if main goroutine calls runtime·Goexit()
unlock(&sched.lock) // unlock so that GODEBUG=scheddetail=1 doesn't hang
throw("no goroutines (main called runtime.Goexit) - deadlock!")
@@ -4972,7 +5113,7 @@ func preemptall() bool {
// Tell the goroutine running on processor P to stop.
// This function is purely best-effort. It can incorrectly fail to inform the
-// goroutine. It can send inform the wrong goroutine. Even if it informs the
+// goroutine. It can inform the wrong goroutine. Even if it informs the
// correct goroutine, that goroutine might ignore the request if it is
// simultaneously executing newstack.
// No lock needs to be held.
@@ -5080,9 +5221,7 @@ func schedtrace(detailed bool) {
print(" M", mp.id, ": p=", id1, " curg=", id2, " mallocing=", mp.mallocing, " throwing=", mp.throwing, " preemptoff=", mp.preemptoff, ""+" locks=", mp.locks, " dying=", mp.dying, " spinning=", mp.spinning, " blocked=", mp.blocked, " lockedg=", id3, "\n")
}
- lock(&allglock)
- for gi := 0; gi < len(allgs); gi++ {
- gp := allgs[gi]
+ forEachG(func(gp *g) {
mp := gp.m
lockedm := gp.lockedm.ptr()
id1 := int64(-1)
@@ -5094,8 +5233,7 @@ func schedtrace(detailed bool) {
id2 = lockedm.id
}
print(" G", gp.goid, ": status=", readgstatus(gp), "(", gp.waitreason.String(), ") m=", id1, " lockedm=", id2, "\n")
- }
- unlock(&allglock)
+ })
unlock(&sched.lock)
}
@@ -5190,6 +5328,8 @@ func globrunqputhead(gp *g) {
// Put a batch of runnable goroutines on the global runnable queue.
// This clears *batch.
// sched.lock must be held.
+// May run during STW, so write barriers are not allowed.
+//go:nowritebarrierrec
func globrunqputbatch(batch *gQueue, n int32) {
assertLockHeld(&sched.lock)
@@ -5505,6 +5645,45 @@ func runqget(_p_ *p) (gp *g, inheritTime bool) {
}
}
+// runqdrain drains the local runnable queue of _p_ and returns all goroutines in it.
+// Executed only by the owner P.
+func runqdrain(_p_ *p) (drainQ gQueue, n uint32) {
+ oldNext := _p_.runnext
+ if oldNext != 0 && _p_.runnext.cas(oldNext, 0) {
+ drainQ.pushBack(oldNext.ptr())
+ n++
+ }
+
+retry:
+ h := atomic.LoadAcq(&_p_.runqhead) // load-acquire, synchronize with other consumers
+ t := _p_.runqtail
+ qn := t - h
+ if qn == 0 {
+ return
+ }
+ if qn > uint32(len(_p_.runq)) { // read inconsistent h and t
+ goto retry
+ }
+
+ if !atomic.CasRel(&_p_.runqhead, h, h+qn) { // cas-release, commits consume
+ goto retry
+ }
+
+ // We've inverted the order in which it gets G's from the local P's runnable queue
+ // and then advances the head pointer because we don't want to mess up the statuses of G's
+ // while runqdrain() and runqsteal() are running in parallel.
+ // Thus we should advance the head pointer before draining the local P into a gQueue,
+ // so that we can update any gp.schedlink only after we take the full ownership of G,
+ // meanwhile, other P's can't access to all G's in local P's runnable queue and steal them.
+ // See https://groups.google.com/g/golang-dev/c/0pTKxEKhHSc/m/6Q85QjdVBQAJ for more details.
+ for i := uint32(0); i < qn; i++ {
+ gp := _p_.runq[(h+i)%uint32(len(_p_.runq))].ptr()
+ drainQ.pushBack(gp)
+ n++
+ }
+ return
+}
+
// Grabs a batch of goroutines from _p_'s runnable queue into batch.
// Batch is a ring buffer starting at batchHead.
// Returns number of grabbed goroutines.
@@ -5698,11 +5877,6 @@ func setMaxThreads(in int) (out int) {
return
}
-func haveexperiment(name string) bool {
- // The gofrontend does not support experiments.
- return false
-}
-
//go:nosplit
func procPin() int {
_g_ := getg()
@@ -5828,7 +6002,7 @@ var inittrace tracestat
type tracestat struct {
active bool // init tracing activation status
- id int64 // init go routine id
+ id int64 // init goroutine id
allocs uint64 // heap allocations
bytes uint64 // heap allocated bytes
}