aboutsummaryrefslogtreecommitdiff
path: root/libgo/go/runtime/proc.go
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/go/runtime/proc.go')
-rw-r--r--libgo/go/runtime/proc.go1285
1 files changed, 1247 insertions, 38 deletions
diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go
index ea7f84e..b28e26b 100644
--- a/libgo/go/runtime/proc.go
+++ b/libgo/go/runtime/proc.go
@@ -6,61 +6,128 @@ package runtime
import (
"runtime/internal/atomic"
+ "runtime/internal/sys"
"unsafe"
)
-// Functions temporarily called by C code.
+// Functions called by C code.
+//go:linkname main runtime.main
+//go:linkname goparkunlock runtime.goparkunlock
//go:linkname newextram runtime.newextram
//go:linkname acquirep runtime.acquirep
//go:linkname releasep runtime.releasep
//go:linkname incidlelocked runtime.incidlelocked
-//go:linkname checkdead runtime.checkdead
-//go:linkname sysmon runtime.sysmon
-//go:linkname schedtrace runtime.schedtrace
-//go:linkname allgadd runtime.allgadd
-//go:linkname mcommoninit runtime.mcommoninit
+//go:linkname schedinit runtime.schedinit
//go:linkname ready runtime.ready
//go:linkname gcprocs runtime.gcprocs
-//go:linkname needaddgcproc runtime.needaddgcproc
//go:linkname stopm runtime.stopm
//go:linkname handoffp runtime.handoffp
//go:linkname wakep runtime.wakep
//go:linkname stoplockedm runtime.stoplockedm
//go:linkname schedule runtime.schedule
//go:linkname execute runtime.execute
-//go:linkname gfput runtime.gfput
+//go:linkname goexit1 runtime.goexit1
+//go:linkname reentersyscall runtime.reentersyscall
+//go:linkname reentersyscallblock runtime.reentersyscallblock
+//go:linkname exitsyscall runtime.exitsyscall
//go:linkname gfget runtime.gfget
-//go:linkname lockOSThread runtime.lockOSThread
-//go:linkname unlockOSThread runtime.unlockOSThread
-//go:linkname procresize runtime.procresize
//go:linkname helpgc runtime.helpgc
-//go:linkname stopTheWorldWithSema runtime.stopTheWorldWithSema
-//go:linkname startTheWorldWithSema runtime.startTheWorldWithSema
-//go:linkname mput runtime.mput
-//go:linkname mget runtime.mget
+//go:linkname kickoff runtime.kickoff
+//go:linkname mstart1 runtime.mstart1
//go:linkname globrunqput runtime.globrunqput
//go:linkname pidleget runtime.pidleget
-//go:linkname runqempty runtime.runqempty
-//go:linkname runqput runtime.runqput
// Function called by misc/cgo/test.
//go:linkname lockedOSThread runtime.lockedOSThread
-// Functions temporarily in C that have not yet been ported.
-func allocm(*p, bool, *unsafe.Pointer, *uintptr) *m
+// C functions for thread and context management.
+func newosproc(*m)
func malg(bool, bool, *unsafe.Pointer, *uintptr) *g
-func startm(*p, bool)
-func newm(unsafe.Pointer, *p)
-func gchelper()
-func getfingwait() bool
-func getfingwake() bool
-func wakefing() *g
-
-// C functions for ucontext management.
+func resetNewG(*g, *unsafe.Pointer, *uintptr)
func gogo(*g)
func setGContext()
func makeGContext(*g, unsafe.Pointer, uintptr)
func getTraceback(me, gp *g)
+func gtraceback(*g)
+func _cgo_notify_runtime_init_done()
+func alreadyInCallers() bool
+
+// Functions created by the compiler.
+//extern __go_init_main
+func main_init()
+
+//extern main.main
+func main_main()
+
+var buildVersion = sys.TheVersion
+
+// Goroutine scheduler
+// The scheduler's job is to distribute ready-to-run goroutines over worker threads.
+//
+// The main concepts are:
+// G - goroutine.
+// M - worker thread, or machine.
+// P - processor, a resource that is required to execute Go code.
+// M must have an associated P to execute Go code, however it can be
+// blocked or in a syscall w/o an associated P.
+//
+// Design doc at https://golang.org/s/go11sched.
+
+// Worker thread parking/unparking.
+// We need to balance between keeping enough running worker threads to utilize
+// available hardware parallelism and parking excessive running worker threads
+// to conserve CPU resources and power. This is not simple for two reasons:
+// (1) scheduler state is intentionally distributed (in particular, per-P work
+// queues), so it is not possible to compute global predicates on fast paths;
+// (2) for optimal thread management we would need to know the future (don't park
+// a worker thread when a new goroutine will be readied in near future).
+//
+// Three rejected approaches that would work badly:
+// 1. Centralize all scheduler state (would inhibit scalability).
+// 2. Direct goroutine handoff. That is, when we ready a new goroutine and there
+// is a spare P, unpark a thread and handoff it the thread and the goroutine.
+// This would lead to thread state thrashing, as the thread that readied the
+// goroutine can be out of work the very next moment, we will need to park it.
+// Also, it would destroy locality of computation as we want to preserve
+// dependent goroutines on the same thread; and introduce additional latency.
+// 3. Unpark an additional thread whenever we ready a goroutine and there is an
+// idle P, but don't do handoff. This would lead to excessive thread parking/
+// unparking as the additional threads will instantly park without discovering
+// any work to do.
+//
+// The current approach:
+// We unpark an additional thread when we ready a goroutine if (1) there is an
+// idle P and there are no "spinning" worker threads. A worker thread is considered
+// spinning if it is out of local work and did not find work in global run queue/
+// netpoller; the spinning state is denoted in m.spinning and in sched.nmspinning.
+// Threads unparked this way are also considered spinning; we don't do goroutine
+// handoff so such threads are out of work initially. Spinning threads do some
+// spinning looking for work in per-P run queues before parking. If a spinning
+// thread finds work it takes itself out of the spinning state and proceeds to
+// execution. If it does not find work it takes itself out of the spinning state
+// and then parks.
+// If there is at least one spinning thread (sched.nmspinning>1), we don't unpark
+// new threads when readying goroutines. To compensate for that, if the last spinning
+// thread finds work and stops spinning, it must unpark a new spinning thread.
+// This approach smooths out unjustified spikes of thread unparking,
+// but at the same time guarantees eventual maximal CPU parallelism utilization.
+//
+// The main implementation complication is that we need to be very careful during
+// spinning->non-spinning thread transition. This transition can race with submission
+// of a new goroutine, and either one part or another needs to unpark another worker
+// thread. If they both fail to do that, we can end up with semi-persistent CPU
+// underutilization. The general pattern for goroutine readying is: submit a goroutine
+// to local work queue, #StoreLoad-style memory barrier, check sched.nmspinning.
+// The general pattern for spinning->non-spinning transition is: decrement nmspinning,
+// #StoreLoad-style memory barrier, check all per-P work queues for new work.
+// Note that all this complexity does not apply to global run queue as we are not
+// sloppy about thread unparking when submitting to global queue. Also see comments
+// for nmspinning manipulation.
+
+var (
+ m0 m
+ g0 g
+)
// main_init_done is a signal used by cgocallbackg that initialization
// has been completed. It is made before _cgo_notify_runtime_init_done,
@@ -68,6 +135,159 @@ func getTraceback(me, gp *g)
// it is closed, meaning cgocallbackg can reliably receive from it.
var main_init_done chan bool
+// runtimeInitTime is the nanotime() at which the runtime started.
+var runtimeInitTime int64
+
+// Value to use for signal mask for newly created M's.
+var initSigmask sigset
+
+// The main goroutine.
+func main() {
+ g := getg()
+
+ // Max stack size is 1 GB on 64-bit, 250 MB on 32-bit.
+ // Using decimal instead of binary GB and MB because
+ // they look nicer in the stack overflow failure message.
+ if sys.PtrSize == 8 {
+ maxstacksize = 1000000000
+ } else {
+ maxstacksize = 250000000
+ }
+
+ // Record when the world started.
+ runtimeInitTime = nanotime()
+
+ systemstack(func() {
+ newm(sysmon, nil)
+ })
+
+ // Lock the main goroutine onto this, the main OS thread,
+ // during initialization. Most programs won't care, but a few
+ // do require certain calls to be made by the main thread.
+ // Those can arrange for main.main to run in the main thread
+ // by calling runtime.LockOSThread during initialization
+ // to preserve the lock.
+ lockOSThread()
+
+ if g.m != &m0 {
+ throw("runtime.main not on m0")
+ }
+
+ // Defer unlock so that runtime.Goexit during init does the unlock too.
+ needUnlock := true
+ defer func() {
+ if needUnlock {
+ unlockOSThread()
+ }
+ }()
+
+ main_init_done = make(chan bool)
+ if iscgo {
+ _cgo_notify_runtime_init_done()
+ }
+
+ fn := main_init // make an indirect call, as the linker doesn't know the address of the main package when laying down the runtime
+ fn()
+ close(main_init_done)
+
+ needUnlock = false
+ unlockOSThread()
+
+ // For gccgo we have to wait until after main is initialized
+ // to enable GC, because initializing main registers the GC roots.
+ gcenable()
+
+ if isarchive || islibrary {
+ // A program compiled with -buildmode=c-archive or c-shared
+ // has a main, but it is not executed.
+ return
+ }
+ fn = main_main // make an indirect call, as the linker doesn't know the address of the main package when laying down the runtime
+ fn()
+ if raceenabled {
+ racefini()
+ }
+
+ // Make racy client program work: if panicking on
+ // another goroutine at the same time as main returns,
+ // let the other goroutine finish printing the panic trace.
+ // Once it does, it will exit. See issue 3934.
+ if panicking != 0 {
+ gopark(nil, nil, "panicwait", traceEvGoStop, 1)
+ }
+
+ exit(0)
+ for {
+ var x *int32
+ *x = 0
+ }
+}
+
+// os_beforeExit is called from os.Exit(0).
+//go:linkname os_beforeExit os.runtime_beforeExit
+func os_beforeExit() {
+ if raceenabled {
+ racefini()
+ }
+}
+
+// start forcegc helper goroutine
+func init() {
+ go forcegchelper()
+}
+
+func forcegchelper() {
+ forcegc.g = getg()
+ for {
+ lock(&forcegc.lock)
+ if forcegc.idle != 0 {
+ throw("forcegc: phase error")
+ }
+ atomic.Store(&forcegc.idle, 1)
+ goparkunlock(&forcegc.lock, "force gc (idle)", traceEvGoBlock, 1)
+ // this goroutine is explicitly resumed by sysmon
+ if debug.gctrace > 0 {
+ println("GC forced")
+ }
+ gcStart(gcBackgroundMode, true)
+ }
+}
+
+//go:nosplit
+
+// Gosched yields the processor, allowing other goroutines to run. It does not
+// suspend the current goroutine, so execution resumes automatically.
+func Gosched() {
+ mcall(gosched_m)
+}
+
+// Puts the current goroutine into a waiting state and calls unlockf.
+// If unlockf returns false, the goroutine is resumed.
+// unlockf must not access this G's stack, as it may be moved between
+// the call to gopark and the call to unlockf.
+func gopark(unlockf func(*g, unsafe.Pointer) bool, lock unsafe.Pointer, reason string, traceEv byte, traceskip int) {
+ mp := acquirem()
+ gp := mp.curg
+ status := readgstatus(gp)
+ if status != _Grunning && status != _Gscanrunning {
+ throw("gopark: bad g status")
+ }
+ mp.waitlock = lock
+ mp.waitunlockf = *(*unsafe.Pointer)(unsafe.Pointer(&unlockf))
+ gp.waitreason = reason
+ mp.waittraceev = traceEv
+ mp.waittraceskip = traceskip
+ releasem(mp)
+ // can't do anything that might move the G between Ms here.
+ mcall(park_m)
+}
+
+// Puts the current goroutine into a waiting state and unlocks the lock.
+// The goroutine can be made runnable again by calling goready(gp).
+func goparkunlock(lock *mutex, reason string, traceEv byte, traceskip int) {
+ gopark(parkunlock_c, unsafe.Pointer(lock), reason, traceEv, traceskip)
+}
+
func goready(gp *g, traceskip int) {
systemstack(func() {
ready(gp, traceskip, true)
@@ -164,12 +384,11 @@ func releaseSudog(s *sudog) {
// funcPC returns the entry PC of the function f.
// It assumes that f is a func value. Otherwise the behavior is undefined.
-// For gccgo here unless and until we port proc.go.
-// Note that this differs from the gc implementation; the gc implementation
-// adds sys.PtrSize to the address of the interface value, but GCC's
-// alias analysis decides that that can not be a reference to the second
-// field of the interface, and in some cases it drops the initialization
-// of the second field as a dead store.
+// For gccgo note that this differs from the gc implementation; the gc
+// implementation adds sys.PtrSize to the address of the interface
+// value, but GCC's alias analysis decides that that can not be a
+// reference to the second field of the interface, and in some cases
+// it drops the initialization of the second field as a dead store.
//go:nosplit
func funcPC(f interface{}) uintptr {
i := (*iface)(unsafe.Pointer(&f))
@@ -207,6 +426,62 @@ func allgadd(gp *g) {
unlock(&allglock)
}
+const (
+ // Number of goroutine ids to grab from sched.goidgen to local per-P cache at once.
+ // 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
+ _GoidCacheBatch = 16
+)
+
+// The bootstrap sequence is:
+//
+// call osinit
+// call schedinit
+// make & queue new G
+// call runtime·mstart
+//
+// The new G calls runtime·main.
+func schedinit() {
+ _m_ := &m0
+ _g_ := &g0
+ _m_.g0 = _g_
+ _m_.curg = _g_
+ _g_.m = _m_
+ setg(_g_)
+
+ sched.maxmcount = 10000
+
+ tracebackinit()
+ mallocinit()
+ mcommoninit(_g_.m)
+ alginit() // maps must not be used before this call
+
+ msigsave(_g_.m)
+ initSigmask = _g_.m.sigmask
+
+ goargs()
+ goenvs()
+ parsedebugvars()
+ gcinit()
+
+ sched.lastpoll = uint64(nanotime())
+ procs := ncpu
+ if n, ok := atoi32(gogetenv("GOMAXPROCS")); ok && n > 0 {
+ procs = n
+ }
+ if procs > _MaxGomaxprocs {
+ procs = _MaxGomaxprocs
+ }
+ if procresize(procs) != nil {
+ throw("unknown runnable goroutine during bootstrap")
+ }
+
+ if buildVersion == "" {
+ // Condition should never trigger. This code just serves
+ // to ensure runtime·buildVersion is kept in the resulting binary.
+ buildVersion = "unknown"
+ }
+}
+
func dumpgstatus(gp *g) {
_g_ := getg()
print("runtime: gp: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
@@ -491,6 +766,122 @@ func casgstatus(gp *g, oldval, newval uint32) {
}
}
+// scang blocks until gp's stack has been scanned.
+// It might be scanned by scang or it might be scanned by the goroutine itself.
+// Either way, the stack scan has completed when scang returns.
+func scang(gp *g, gcw *gcWork) {
+ // Invariant; we (the caller, markroot for a specific goroutine) own gp.gcscandone.
+ // Nothing is racing with us now, but gcscandone might be set to true left over
+ // from an earlier round of stack scanning (we scan twice per GC).
+ // We use gcscandone to record whether the scan has been done during this round.
+ // It is important that the scan happens exactly once: if called twice,
+ // the installation of stack barriers will detect the double scan and die.
+
+ gp.gcscandone = false
+
+ // See http://golang.org/cl/21503 for justification of the yield delay.
+ const yieldDelay = 10 * 1000
+ var nextYield int64
+
+ // Endeavor to get gcscandone set to true,
+ // either by doing the stack scan ourselves or by coercing gp to scan itself.
+ // gp.gcscandone can transition from false to true when we're not looking
+ // (if we asked for preemption), so any time we lock the status using
+ // castogscanstatus we have to double-check that the scan is still not done.
+loop:
+ for i := 0; !gp.gcscandone; i++ {
+ switch s := readgstatus(gp); s {
+ default:
+ dumpgstatus(gp)
+ throw("stopg: invalid status")
+
+ case _Gdead:
+ // No stack.
+ gp.gcscandone = true
+ break loop
+
+ case _Gcopystack:
+ // Stack being switched. Go around again.
+
+ case _Grunnable, _Gsyscall, _Gwaiting:
+ // Claim goroutine by setting scan bit.
+ // Racing with execution or readying of gp.
+ // The scan bit keeps them from running
+ // the goroutine until we're done.
+ if castogscanstatus(gp, s, s|_Gscan) {
+ if gp.scanningself {
+ // Don't try to scan the stack
+ // if the goroutine is going to do
+ // it itself.
+ restartg(gp)
+ break
+ }
+ if !gp.gcscandone {
+ scanstack(gp, gcw)
+ gp.gcscandone = true
+ }
+ restartg(gp)
+ break loop
+ }
+
+ case _Gscanwaiting:
+ // newstack is doing a scan for us right now. Wait.
+
+ case _Gscanrunning:
+ // checkPreempt is scanning. Wait.
+
+ case _Grunning:
+ // Goroutine running. Try to preempt execution so it can scan itself.
+ // The preemption handler (in newstack) does the actual scan.
+
+ // Optimization: if there is already a pending preemption request
+ // (from the previous loop iteration), don't bother with the atomics.
+ if gp.preemptscan && gp.preempt {
+ break
+ }
+
+ // Ask for preemption and self scan.
+ if castogscanstatus(gp, _Grunning, _Gscanrunning) {
+ if !gp.gcscandone {
+ gp.preemptscan = true
+ gp.preempt = true
+ }
+ casfrom_Gscanstatus(gp, _Gscanrunning, _Grunning)
+ }
+ }
+
+ if i == 0 {
+ nextYield = nanotime() + yieldDelay
+ }
+ if nanotime() < nextYield {
+ procyield(10)
+ } else {
+ osyield()
+ nextYield = nanotime() + yieldDelay/2
+ }
+ }
+
+ gp.preemptscan = false // cancel scan request if no longer needed
+}
+
+// The GC requests that this routine be moved from a scanmumble state to a mumble state.
+func restartg(gp *g) {
+ s := readgstatus(gp)
+ switch s {
+ default:
+ dumpgstatus(gp)
+ throw("restartg: unexpected status")
+
+ case _Gdead:
+ // ok
+
+ case _Gscanrunnable,
+ _Gscanwaiting,
+ _Gscansyscall:
+ casfrom_Gscanstatus(gp, s, s&^_Gscan)
+ }
+}
+
// stopTheWorld stops all P's from executing goroutines, interrupting
// all goroutines at GC safe points and records reason as the reason
// for the stop. On return, only the current goroutine's P is running.
@@ -684,11 +1075,64 @@ func startTheWorldWithSema() {
// coordinate. This lazy approach works out in practice:
// we don't mind if the first couple gc rounds don't have quite
// the maximum number of procs.
- newm(unsafe.Pointer(funcPC(mhelpgc)), nil)
+ newm(mhelpgc, nil)
}
_g_.m.locks--
}
+// First function run by a new goroutine.
+// This is passed to makecontext.
+func kickoff() {
+ gp := getg()
+
+ if gp.traceback != nil {
+ gtraceback(gp)
+ }
+
+ fv := gp.entry
+ param := gp.param
+ gp.entry = nil
+ gp.param = nil
+ fv(param)
+ goexit1()
+}
+
+// This is called from mstart.
+func mstart1() {
+ _g_ := getg()
+
+ if _g_ != _g_.m.g0 {
+ throw("bad runtime·mstart")
+ }
+
+ asminit()
+ minit()
+
+ // Install signal handlers; after minit so that minit can
+ // prepare the thread to be able to handle the signals.
+ if _g_.m == &m0 {
+ // Create an extra M for callbacks on threads not created by Go.
+ if iscgo && !cgoHasExtraM {
+ cgoHasExtraM = true
+ newextram()
+ }
+ initsig(false)
+ }
+
+ if fn := _g_.m.mstartfn; fn != nil {
+ fn()
+ }
+
+ if _g_.m.helpgc != 0 {
+ _g_.m.helpgc = 0
+ stopm()
+ } else if _g_.m != &m0 {
+ acquirep(_g_.m.nextp.ptr())
+ _g_.m.nextp = 0
+ }
+ schedule()
+}
+
// forEachP calls fn(p) for every P p when p reaches a GC safe point.
// If a P is currently executing code, this will bring the P to a GC
// safe point and execute fn on that P. If the P is not executing code
@@ -811,6 +1255,35 @@ func runSafePointFn() {
unlock(&sched.lock)
}
+// Allocate a new m unassociated with any thread.
+// Can use p for allocation context if needed.
+// fn is recorded as the new m's m.mstartfn.
+//
+// This function is allowed to have write barriers even if the caller
+// isn't because it borrows _p_.
+//
+//go:yeswritebarrierrec
+func allocm(_p_ *p, fn func(), allocatestack bool) (mp *m, g0Stack unsafe.Pointer, g0StackSize uintptr) {
+ _g_ := getg()
+ _g_.m.locks++ // disable GC because it can be called from sysmon
+ if _g_.m.p == 0 {
+ acquirep(_p_) // temporarily borrow p for mallocs in this function
+ }
+ mp = new(m)
+ mp.mstartfn = fn
+ mcommoninit(mp)
+
+ mp.g0 = malg(allocatestack, false, &g0Stack, &g0StackSize)
+ mp.g0.m = mp
+
+ if _p_ == _g_.m.p.ptr() {
+ releasep()
+ }
+ _g_.m.locks--
+
+ return mp, g0Stack, g0StackSize
+}
+
// needm is called when a cgo callback happens on a
// thread without an m (a thread not created by Go).
// In this case, needm is expected to find an m to use
@@ -884,6 +1357,7 @@ func needm(x byte) {
setGContext()
// Initialize this thread to use the m.
+ asminit()
minit()
}
@@ -915,9 +1389,7 @@ func oneNewExtraM() {
// The sched.pc will never be returned to, but setting it to
// goexit makes clear to the traceback routines where
// the goroutine stack ends.
- var g0SP unsafe.Pointer
- var g0SPSize uintptr
- mp := allocm(nil, true, &g0SP, &g0SPSize)
+ mp, g0SP, g0SPSize := allocm(nil, nil, true)
gp := malg(true, false, nil, nil)
gp.gcscanvalid = true // fresh G, so no dequeueRescan necessary
gp.gcscandone = true
@@ -1051,6 +1523,17 @@ func unlockextra(mp *m) {
atomic.Storeuintptr(&extram, uintptr(unsafe.Pointer(mp)))
}
+// Create a new m. It will start off with a call to fn, or else the scheduler.
+// fn needs to be static and not a heap allocated closure.
+// May run with m.p==nil, so write barriers are not allowed.
+//go:nowritebarrierrec
+func newm(fn func(), _p_ *p) {
+ mp, _, _ := allocm(_p_, fn, false)
+ mp.nextp.set(_p_)
+ mp.sigmask = initSigmask
+ newosproc(mp)
+}
+
// Stops execution of the current m until new work is available.
// Returns with acquired P.
func stopm() {
@@ -1083,6 +1566,59 @@ retry:
_g_.m.nextp = 0
}
+func mspinning() {
+ // startm's caller incremented nmspinning. Set the new M's spinning.
+ getg().m.spinning = true
+}
+
+// Schedules some M to run the p (creates an M if necessary).
+// If p==nil, tries to get an idle P, if no idle P's does nothing.
+// May run with m.p==nil, so write barriers are not allowed.
+// If spinning is set, the caller has incremented nmspinning and startm will
+// either decrement nmspinning or set m.spinning in the newly started M.
+//go:nowritebarrierrec
+func startm(_p_ *p, spinning bool) {
+ lock(&sched.lock)
+ if _p_ == nil {
+ _p_ = pidleget()
+ if _p_ == nil {
+ unlock(&sched.lock)
+ if spinning {
+ // The caller incremented nmspinning, but there are no idle Ps,
+ // so it's okay to just undo the increment and give up.
+ if int32(atomic.Xadd(&sched.nmspinning, -1)) < 0 {
+ throw("startm: negative nmspinning")
+ }
+ }
+ return
+ }
+ }
+ mp := mget()
+ unlock(&sched.lock)
+ if mp == nil {
+ var fn func()
+ if spinning {
+ // The caller incremented nmspinning, so set m.spinning in the new M.
+ fn = mspinning
+ }
+ newm(fn, _p_)
+ return
+ }
+ if mp.spinning {
+ throw("startm: m is spinning")
+ }
+ if mp.nextp != 0 {
+ throw("startm: m has p")
+ }
+ if spinning && !runqempty(_p_) {
+ throw("startm: p has runnable gs")
+ }
+ // The caller incremented nmspinning, so set m.spinning in the new M.
+ mp.spinning = spinning
+ mp.nextp.set(_p_)
+ notewakeup(&mp.park)
+}
+
// Hands off P from syscall or locked M.
// Always runs without a P, so write barriers are not allowed.
//go:nowritebarrierrec
@@ -1281,7 +1817,7 @@ top:
if _p_.runSafePointFn != 0 {
runSafePointFn()
}
- if getfingwait() && getfingwake() {
+ if fingwait && fingwake {
if gp := wakefing(); gp != nil {
ready(gp, 0, true)
}
@@ -1593,6 +2129,7 @@ top:
// goroutines on the global queue.
// Since we preempt by storing the goroutine on the global
// queue, this is the only place we need to check preempt.
+ // This does not call checkPreempt because gp is not running.
if gp != nil && gp.preempt {
gp.preempt = false
lock(&sched.lock)
@@ -1636,6 +2173,442 @@ func dropg() {
setGNoWB(&_g_.m.curg, nil)
}
+func parkunlock_c(gp *g, lock unsafe.Pointer) bool {
+ unlock((*mutex)(lock))
+ return true
+}
+
+// park continuation on g0.
+func park_m(gp *g) {
+ _g_ := getg()
+
+ if trace.enabled {
+ traceGoPark(_g_.m.waittraceev, _g_.m.waittraceskip, gp)
+ }
+
+ casgstatus(gp, _Grunning, _Gwaiting)
+ dropg()
+
+ if _g_.m.waitunlockf != nil {
+ fn := *(*func(*g, unsafe.Pointer) bool)(unsafe.Pointer(&_g_.m.waitunlockf))
+ ok := fn(gp, _g_.m.waitlock)
+ _g_.m.waitunlockf = nil
+ _g_.m.waitlock = nil
+ if !ok {
+ if trace.enabled {
+ traceGoUnpark(gp, 2)
+ }
+ casgstatus(gp, _Gwaiting, _Grunnable)
+ execute(gp, true) // Schedule it back, never returns.
+ }
+ }
+ schedule()
+}
+
+func goschedImpl(gp *g) {
+ status := readgstatus(gp)
+ if status&^_Gscan != _Grunning {
+ dumpgstatus(gp)
+ throw("bad g status")
+ }
+ casgstatus(gp, _Grunning, _Grunnable)
+ dropg()
+ lock(&sched.lock)
+ globrunqput(gp)
+ unlock(&sched.lock)
+
+ schedule()
+}
+
+// Gosched continuation on g0.
+func gosched_m(gp *g) {
+ if trace.enabled {
+ traceGoSched()
+ }
+ goschedImpl(gp)
+}
+
+func gopreempt_m(gp *g) {
+ if trace.enabled {
+ traceGoPreempt()
+ }
+ goschedImpl(gp)
+}
+
+// Finishes execution of the current goroutine.
+func goexit1() {
+ if trace.enabled {
+ traceGoEnd()
+ }
+ mcall(goexit0)
+}
+
+// goexit continuation on g0.
+func goexit0(gp *g) {
+ _g_ := getg()
+
+ casgstatus(gp, _Grunning, _Gdead)
+ if isSystemGoroutine(gp) {
+ atomic.Xadd(&sched.ngsys, -1)
+ }
+ gp.m = nil
+ gp.lockedm = nil
+ _g_.m.lockedg = nil
+ gp.entry = nil
+ gp.paniconfault = false
+ gp._defer = nil // should be true already but just in case.
+ gp._panic = nil // non-nil for Goexit during panic. points at stack-allocated data.
+ gp.writebuf = nil
+ gp.waitreason = ""
+ gp.param = nil
+
+ // Note that gp's stack scan is now "valid" because it has no
+ // stack. We could dequeueRescan, but that takes a lock and
+ // isn't really necessary.
+ gp.gcscanvalid = true
+ dropg()
+
+ if _g_.m.locked&^_LockExternal != 0 {
+ print("invalid m->locked = ", _g_.m.locked, "\n")
+ throw("internal lockOSThread error")
+ }
+ _g_.m.locked = 0
+ gfput(_g_.m.p.ptr(), gp)
+ schedule()
+}
+
+// The goroutine g is about to enter a system call.
+// Record that it's not using the cpu anymore.
+// This is called only from the go syscall library and cgocall,
+// not from the low-level system calls used by the runtime.
+//
+// The entersyscall function is written in C, so that it can save the
+// current register context so that the GC will see them.
+// It calls reentersyscall.
+//
+// Syscall tracing:
+// At the start of a syscall we emit traceGoSysCall to capture the stack trace.
+// If the syscall does not block, that is it, we do not emit any other events.
+// If the syscall blocks (that is, P is retaken), retaker emits traceGoSysBlock;
+// when syscall returns we emit traceGoSysExit and when the goroutine starts running
+// (potentially instantly, if exitsyscallfast returns true) we emit traceGoStart.
+// To ensure that traceGoSysExit is emitted strictly after traceGoSysBlock,
+// we remember current value of syscalltick in m (_g_.m.syscalltick = _g_.m.p.ptr().syscalltick),
+// whoever emits traceGoSysBlock increments p.syscalltick afterwards;
+// and we wait for the increment before emitting traceGoSysExit.
+// Note that the increment is done even if tracing is not enabled,
+// because tracing can be enabled in the middle of syscall. We don't want the wait to hang.
+//
+//go:nosplit
+//go:noinline
+func reentersyscall(pc, sp uintptr) {
+ _g_ := getg()
+
+ // Disable preemption because during this function g is in Gsyscall status,
+ // but can have inconsistent g->sched, do not let GC observe it.
+ _g_.m.locks++
+
+ _g_.syscallsp = sp
+ _g_.syscallpc = pc
+ casgstatus(_g_, _Grunning, _Gsyscall)
+
+ if trace.enabled {
+ systemstack(traceGoSysCall)
+ }
+
+ if atomic.Load(&sched.sysmonwait) != 0 {
+ systemstack(entersyscall_sysmon)
+ }
+
+ if _g_.m.p.ptr().runSafePointFn != 0 {
+ // runSafePointFn may stack split if run on this stack
+ systemstack(runSafePointFn)
+ }
+
+ _g_.m.syscalltick = _g_.m.p.ptr().syscalltick
+ _g_.sysblocktraced = true
+ _g_.m.mcache = nil
+ _g_.m.p.ptr().m = 0
+ atomic.Store(&_g_.m.p.ptr().status, _Psyscall)
+ if sched.gcwaiting != 0 {
+ systemstack(entersyscall_gcwait)
+ }
+
+ _g_.m.locks--
+}
+
+func entersyscall_sysmon() {
+ lock(&sched.lock)
+ if atomic.Load(&sched.sysmonwait) != 0 {
+ atomic.Store(&sched.sysmonwait, 0)
+ notewakeup(&sched.sysmonnote)
+ }
+ unlock(&sched.lock)
+}
+
+func entersyscall_gcwait() {
+ _g_ := getg()
+ _p_ := _g_.m.p.ptr()
+
+ lock(&sched.lock)
+ if sched.stopwait > 0 && atomic.Cas(&_p_.status, _Psyscall, _Pgcstop) {
+ if trace.enabled {
+ traceGoSysBlock(_p_)
+ traceProcStop(_p_)
+ }
+ _p_.syscalltick++
+ if sched.stopwait--; sched.stopwait == 0 {
+ notewakeup(&sched.stopnote)
+ }
+ }
+ unlock(&sched.lock)
+}
+
+// The same as reentersyscall(), but with a hint that the syscall is blocking.
+//go:nosplit
+func reentersyscallblock(pc, sp uintptr) {
+ _g_ := getg()
+
+ _g_.m.locks++ // see comment in entersyscall
+ _g_.throwsplit = true
+ _g_.m.syscalltick = _g_.m.p.ptr().syscalltick
+ _g_.sysblocktraced = true
+ _g_.m.p.ptr().syscalltick++
+
+ // Leave SP around for GC and traceback.
+ _g_.syscallsp = sp
+ _g_.syscallpc = pc
+ casgstatus(_g_, _Grunning, _Gsyscall)
+ systemstack(entersyscallblock_handoff)
+
+ _g_.m.locks--
+}
+
+func entersyscallblock_handoff() {
+ if trace.enabled {
+ traceGoSysCall()
+ traceGoSysBlock(getg().m.p.ptr())
+ }
+ handoffp(releasep())
+}
+
+// The goroutine g exited its system call.
+// Arrange for it to run on a cpu again.
+// This is called only from the go syscall library, not
+// from the low-level system calls used by the runtime.
+//
+// Write barriers are not allowed because our P may have been stolen.
+//
+//go:nosplit
+//go:nowritebarrierrec
+func exitsyscall(dummy int32) {
+ _g_ := getg()
+
+ _g_.m.locks++ // see comment in entersyscall
+
+ _g_.waitsince = 0
+ oldp := _g_.m.p.ptr()
+ if exitsyscallfast() {
+ if _g_.m.mcache == nil {
+ throw("lost mcache")
+ }
+ if trace.enabled {
+ if oldp != _g_.m.p.ptr() || _g_.m.syscalltick != _g_.m.p.ptr().syscalltick {
+ systemstack(traceGoStart)
+ }
+ }
+ // There's a cpu for us, so we can run.
+ _g_.m.p.ptr().syscalltick++
+ // We need to cas the status and scan before resuming...
+ casgstatus(_g_, _Gsyscall, _Grunning)
+
+ exitsyscallclear(_g_)
+ _g_.m.locks--
+ _g_.throwsplit = false
+ return
+ }
+
+ _g_.sysexitticks = 0
+ if trace.enabled {
+ // Wait till traceGoSysBlock event is emitted.
+ // This ensures consistency of the trace (the goroutine is started after it is blocked).
+ for oldp != nil && oldp.syscalltick == _g_.m.syscalltick {
+ osyield()
+ }
+ // We can't trace syscall exit right now because we don't have a P.
+ // Tracing code can invoke write barriers that cannot run without a P.
+ // So instead we remember the syscall exit time and emit the event
+ // in execute when we have a P.
+ _g_.sysexitticks = cputicks()
+ }
+
+ _g_.m.locks--
+
+ // Call the scheduler.
+ mcall(exitsyscall0)
+
+ if _g_.m.mcache == nil {
+ throw("lost mcache")
+ }
+
+ // Scheduler returned, so we're allowed to run now.
+ // Delete the syscallsp information that we left for
+ // the garbage collector during the system call.
+ // Must wait until now because until gosched returns
+ // we don't know for sure that the garbage collector
+ // is not running.
+ exitsyscallclear(_g_)
+
+ _g_.m.p.ptr().syscalltick++
+ _g_.throwsplit = false
+}
+
+//go:nosplit
+func exitsyscallfast() bool {
+ _g_ := getg()
+
+ // Freezetheworld sets stopwait but does not retake P's.
+ if sched.stopwait == freezeStopWait {
+ _g_.m.mcache = nil
+ _g_.m.p = 0
+ return false
+ }
+
+ // Try to re-acquire the last P.
+ if _g_.m.p != 0 && _g_.m.p.ptr().status == _Psyscall && atomic.Cas(&_g_.m.p.ptr().status, _Psyscall, _Prunning) {
+ // There's a cpu for us, so we can run.
+ exitsyscallfast_reacquired()
+ return true
+ }
+
+ // Try to get any other idle P.
+ oldp := _g_.m.p.ptr()
+ _g_.m.mcache = nil
+ _g_.m.p = 0
+ if sched.pidle != 0 {
+ var ok bool
+ systemstack(func() {
+ ok = exitsyscallfast_pidle()
+ if ok && trace.enabled {
+ if oldp != nil {
+ // Wait till traceGoSysBlock event is emitted.
+ // This ensures consistency of the trace (the goroutine is started after it is blocked).
+ for oldp.syscalltick == _g_.m.syscalltick {
+ osyield()
+ }
+ }
+ traceGoSysExit(0)
+ }
+ })
+ if ok {
+ return true
+ }
+ }
+ return false
+}
+
+// exitsyscallfast_reacquired is the exitsyscall path on which this G
+// has successfully reacquired the P it was running on before the
+// syscall.
+//
+// This function is allowed to have write barriers because exitsyscall
+// has acquired a P at this point.
+//
+//go:yeswritebarrierrec
+//go:nosplit
+func exitsyscallfast_reacquired() {
+ _g_ := getg()
+ _g_.m.mcache = _g_.m.p.ptr().mcache
+ _g_.m.p.ptr().m.set(_g_.m)
+ if _g_.m.syscalltick != _g_.m.p.ptr().syscalltick {
+ if trace.enabled {
+ // The p was retaken and then enter into syscall again (since _g_.m.syscalltick has changed).
+ // traceGoSysBlock for this syscall was already emitted,
+ // but here we effectively retake the p from the new syscall running on the same p.
+ systemstack(func() {
+ // Denote blocking of the new syscall.
+ traceGoSysBlock(_g_.m.p.ptr())
+ // Denote completion of the current syscall.
+ traceGoSysExit(0)
+ })
+ }
+ _g_.m.p.ptr().syscalltick++
+ }
+}
+
+func exitsyscallfast_pidle() bool {
+ lock(&sched.lock)
+ _p_ := pidleget()
+ if _p_ != nil && atomic.Load(&sched.sysmonwait) != 0 {
+ atomic.Store(&sched.sysmonwait, 0)
+ notewakeup(&sched.sysmonnote)
+ }
+ unlock(&sched.lock)
+ if _p_ != nil {
+ acquirep(_p_)
+ return true
+ }
+ return false
+}
+
+// exitsyscall slow path on g0.
+// Failed to acquire P, enqueue gp as runnable.
+//
+//go:nowritebarrierrec
+func exitsyscall0(gp *g) {
+ _g_ := getg()
+
+ casgstatus(gp, _Gsyscall, _Grunnable)
+ dropg()
+ lock(&sched.lock)
+ _p_ := pidleget()
+ if _p_ == nil {
+ globrunqput(gp)
+ } else if atomic.Load(&sched.sysmonwait) != 0 {
+ atomic.Store(&sched.sysmonwait, 0)
+ notewakeup(&sched.sysmonnote)
+ }
+ unlock(&sched.lock)
+ if _p_ != nil {
+ acquirep(_p_)
+ execute(gp, false) // Never returns.
+ }
+ if _g_.m.lockedg != nil {
+ // Wait until another thread schedules gp and so m again.
+ stoplockedm()
+ execute(gp, false) // Never returns.
+ }
+ stopm()
+ schedule() // Never returns.
+}
+
+// exitsyscallclear clears GC-related information that we only track
+// during a syscall.
+func exitsyscallclear(gp *g) {
+ // Garbage collector isn't running (since we are), so okay to
+ // clear syscallsp.
+ gp.syscallsp = 0
+
+ gp.gcstack = nil
+ gp.gcnextsp = nil
+ memclrNoHeapPointers(unsafe.Pointer(&gp.gcregs), unsafe.Sizeof(gp.gcregs))
+}
+
+// Code generated by cgo, and some library code, calls syscall.Entersyscall
+// and syscall.Exitsyscall.
+
+//go:linkname syscall_entersyscall syscall.Entersyscall
+//go:nosplit
+func syscall_entersyscall() {
+ entersyscall(0)
+}
+
+//go:linkname syscall_exitsyscall syscall.Exitsyscall
+//go:nosplit
+func syscall_exitsyscall() {
+ exitsyscall(0)
+}
+
func beforefork() {
gp := getg().m.curg
@@ -1671,6 +2644,91 @@ func syscall_runtime_AfterFork() {
systemstack(afterfork)
}
+// Create a new g running fn passing arg as the single argument.
+// Put it on the queue of g's waiting to run.
+// The compiler turns a go statement into a call to this.
+//go:linkname newproc __go_go
+func newproc(fn uintptr, arg unsafe.Pointer) *g {
+ _g_ := getg()
+
+ if fn == 0 {
+ _g_.m.throwing = -1 // do not dump full stacks
+ throw("go of nil func value")
+ }
+ _g_.m.locks++ // disable preemption because it can be holding p in a local var
+
+ _p_ := _g_.m.p.ptr()
+ newg := gfget(_p_)
+ var (
+ sp unsafe.Pointer
+ spsize uintptr
+ )
+ if newg == nil {
+ newg = malg(true, false, &sp, &spsize)
+ casgstatus(newg, _Gidle, _Gdead)
+ newg.gcRescan = -1
+ allgadd(newg) // publishes with a g->status of Gdead so GC scanner doesn't look at uninitialized stack.
+ } else {
+ resetNewG(newg, &sp, &spsize)
+ }
+ newg.traceback = nil
+
+ if readgstatus(newg) != _Gdead {
+ throw("newproc1: new g is not Gdead")
+ }
+
+ // Store the C function pointer into entryfn, take the address
+ // of entryfn, convert it to a Go function value, and store
+ // that in entry.
+ newg.entryfn = fn
+ var entry func(unsafe.Pointer)
+ *(*unsafe.Pointer)(unsafe.Pointer(&entry)) = unsafe.Pointer(&newg.entryfn)
+ newg.entry = entry
+
+ newg.param = arg
+ newg.gopc = getcallerpc(unsafe.Pointer(&fn))
+ newg.startpc = fn
+ if isSystemGoroutine(newg) {
+ atomic.Xadd(&sched.ngsys, +1)
+ }
+ // The stack is dirty from the argument frame, so queue it for
+ // scanning. Do this before setting it to runnable so we still
+ // own the G. If we're recycling a G, it may already be on the
+ // rescan list.
+ if newg.gcRescan == -1 {
+ queueRescan(newg)
+ } else {
+ // The recycled G is already on the rescan list. Just
+ // mark the stack dirty.
+ newg.gcscanvalid = false
+ }
+ casgstatus(newg, _Gdead, _Grunnable)
+
+ if _p_.goidcache == _p_.goidcacheend {
+ // Sched.goidgen is the last allocated id,
+ // this batch must be [sched.goidgen+1, sched.goidgen+GoidCacheBatch].
+ // At startup sched.goidgen=0, so main goroutine receives goid=1.
+ _p_.goidcache = atomic.Xadd64(&sched.goidgen, _GoidCacheBatch)
+ _p_.goidcache -= _GoidCacheBatch - 1
+ _p_.goidcacheend = _p_.goidcache + _GoidCacheBatch
+ }
+ newg.goid = int64(_p_.goidcache)
+ _p_.goidcache++
+ if trace.enabled {
+ traceGoCreate(newg, newg.startpc)
+ }
+
+ makeGContext(newg, sp, spsize)
+
+ runqput(_p_, newg, true)
+
+ if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 && runtimeInitTime != 0 {
+ wakep()
+ }
+ _g_.m.locks--
+ return newg
+}
+
// Put on gfree list.
// If local list is too long, transfer a batch to the global list.
func gfput(_p_ *p, gp *g) {
@@ -1738,6 +2796,11 @@ func gfpurge(_p_ *p) {
unlock(&sched.gflock)
}
+// Breakpoint executes a breakpoint trap.
+func Breakpoint() {
+ breakpoint()
+}
+
// dolockOSThread is called by LockOSThread and lockOSThread below
// after they modify m.locked. Do not allow preemption during this call,
// or else the m might be different in this function than in the caller.
@@ -1822,6 +2885,152 @@ func mcount() int32 {
return sched.mcount
}
+var prof struct {
+ lock uint32
+ hz int32
+}
+
+func _System() { _System() }
+func _ExternalCode() { _ExternalCode() }
+func _GC() { _GC() }
+
+var _SystemPC = funcPC(_System)
+var _ExternalCodePC = funcPC(_ExternalCode)
+var _GCPC = funcPC(_GC)
+
+// Called if we receive a SIGPROF signal.
+// Called by the signal handler, may run during STW.
+//go:nowritebarrierrec
+func sigprof(pc uintptr, gp *g, mp *m) {
+ if prof.hz == 0 {
+ return
+ }
+
+ // Profiling runs concurrently with GC, so it must not allocate.
+ // Set a trap in case the code does allocate.
+ // Note that on windows, one thread takes profiles of all the
+ // other threads, so mp is usually not getg().m.
+ // In fact mp may not even be stopped.
+ // See golang.org/issue/17165.
+ getg().m.mallocing++
+
+ traceback := true
+
+ // If SIGPROF arrived while already fetching runtime callers
+ // we can have trouble on older systems because the unwind
+ // library calls dl_iterate_phdr which was not reentrant in
+ // the past. alreadyInCallers checks for that.
+ if gp == nil || alreadyInCallers() {
+ traceback = false
+ }
+
+ var stk [maxCPUProfStack]uintptr
+ n := 0
+ if traceback {
+ var stklocs [maxCPUProfStack]location
+ n = callers(0, stklocs[:])
+
+ for i := 0; i < n; i++ {
+ stk[i] = stklocs[i].pc
+ }
+ }
+
+ if n <= 0 {
+ // Normal traceback is impossible or has failed.
+ // Account it against abstract "System" or "GC".
+ n = 2
+ stk[0] = pc
+ if mp.preemptoff != "" || mp.helpgc != 0 {
+ stk[1] = _GCPC + sys.PCQuantum
+ } else {
+ stk[1] = _SystemPC + sys.PCQuantum
+ }
+ }
+
+ if prof.hz != 0 {
+ // Simple cas-lock to coordinate with setcpuprofilerate.
+ for !atomic.Cas(&prof.lock, 0, 1) {
+ osyield()
+ }
+ if prof.hz != 0 {
+ cpuprof.add(stk[:n])
+ }
+ atomic.Store(&prof.lock, 0)
+ }
+ getg().m.mallocing--
+}
+
+// Use global arrays rather than using up lots of stack space in the
+// signal handler. This is safe since while we are executing a SIGPROF
+// signal other SIGPROF signals are blocked.
+var nonprofGoStklocs [maxCPUProfStack]location
+var nonprofGoStk [maxCPUProfStack]uintptr
+
+// sigprofNonGo is called if we receive a SIGPROF signal on a non-Go thread,
+// and the signal handler collected a stack trace in sigprofCallers.
+// When this is called, sigprofCallersUse will be non-zero.
+// g is nil, and what we can do is very limited.
+//go:nosplit
+//go:nowritebarrierrec
+func sigprofNonGo(pc uintptr) {
+ if prof.hz != 0 {
+ n := callers(0, nonprofGoStklocs[:])
+
+ for i := 0; i < n; i++ {
+ nonprofGoStk[i] = nonprofGoStklocs[i].pc
+ }
+
+ if n <= 0 {
+ n = 2
+ nonprofGoStk[0] = pc
+ nonprofGoStk[1] = _ExternalCodePC + sys.PCQuantum
+ }
+
+ // Simple cas-lock to coordinate with setcpuprofilerate.
+ for !atomic.Cas(&prof.lock, 0, 1) {
+ osyield()
+ }
+ if prof.hz != 0 {
+ cpuprof.addNonGo(nonprofGoStk[:n])
+ }
+ atomic.Store(&prof.lock, 0)
+ }
+}
+
+// Arrange to call fn with a traceback hz times a second.
+func setcpuprofilerate_m(hz int32) {
+ // Force sane arguments.
+ if hz < 0 {
+ hz = 0
+ }
+
+ // Disable preemption, otherwise we can be rescheduled to another thread
+ // that has profiling enabled.
+ _g_ := getg()
+ _g_.m.locks++
+
+ // Stop profiler on this thread so that it is safe to lock prof.
+ // if a profiling signal came in while we had prof locked,
+ // it would deadlock.
+ resetcpuprofiler(0)
+
+ for !atomic.Cas(&prof.lock, 0, 1) {
+ osyield()
+ }
+ prof.hz = hz
+ atomic.Store(&prof.lock, 0)
+
+ lock(&sched.lock)
+ sched.profilehz = hz
+ unlock(&sched.lock)
+
+ if hz != 0 {
+ resetcpuprofiler(hz)
+ }
+
+ _g_.m.locks--
+}
+
// Change number of processors. The world is stopped, sched is locked.
// gcworkbufs are not being modified by either the GC or
// the write barrier code.