diff options
Diffstat (limited to 'libgo/go')
-rw-r--r-- | libgo/go/runtime/debug.go | 20 | ||||
-rw-r--r-- | libgo/go/runtime/export_test.go | 62 | ||||
-rw-r--r-- | libgo/go/runtime/lock_futex.go | 21 | ||||
-rw-r--r-- | libgo/go/runtime/lock_sema.go | 13 | ||||
-rw-r--r-- | libgo/go/runtime/proc.go | 1744 | ||||
-rw-r--r-- | libgo/go/runtime/proc_test.go | 10 | ||||
-rw-r--r-- | libgo/go/runtime/runtime2.go | 36 | ||||
-rw-r--r-- | libgo/go/runtime/stubs.go | 88 | ||||
-rw-r--r-- | libgo/go/runtime/trace.go | 75 |
9 files changed, 1896 insertions, 173 deletions
diff --git a/libgo/go/runtime/debug.go b/libgo/go/runtime/debug.go index 43f6e1e..55937ff 100644 --- a/libgo/go/runtime/debug.go +++ b/libgo/go/runtime/debug.go @@ -14,7 +14,25 @@ import ( // change the current setting. // The number of logical CPUs on the local machine can be queried with NumCPU. // This call will go away when the scheduler improves. -func GOMAXPROCS(n int) int +func GOMAXPROCS(n int) int { + if n > _MaxGomaxprocs { + n = _MaxGomaxprocs + } + lock(&sched.lock) + ret := int(gomaxprocs) + unlock(&sched.lock) + if n <= 0 || n == ret { + return ret + } + + stopTheWorld("GOMAXPROCS") + + // newprocs will be processed by startTheWorld + newprocs = int32(n) + + startTheWorld() + return ret +} // NumCPU returns the number of logical CPUs usable by the current process. // diff --git a/libgo/go/runtime/export_test.go b/libgo/go/runtime/export_test.go index b8b129d..36e6256 100644 --- a/libgo/go/runtime/export_test.go +++ b/libgo/go/runtime/export_test.go @@ -7,6 +7,7 @@ package runtime import ( + "runtime/internal/atomic" "unsafe" ) @@ -47,39 +48,6 @@ func GCMask(x interface{}) (ret []byte) { return nil } -//func testSchedLocalQueue() -//func testSchedLocalQueueSteal() -// -//func RunSchedLocalQueueTest() { -// testSchedLocalQueue() -//} -// -//func RunSchedLocalQueueStealTest() { -// testSchedLocalQueueSteal() -//} - -//var StringHash = stringHash -//var BytesHash = bytesHash -//var Int32Hash = int32Hash -//var Int64Hash = int64Hash -//var EfaceHash = efaceHash -//var IfaceHash = ifaceHash -//var MemclrBytes = memclrBytes - -var HashLoad = &hashLoad - -// entry point for testing -//func GostringW(w []uint16) (s string) { -// s = gostringw(&w[0]) -// return -//} - -//var Gostringnocopy = gostringnocopy -//var Maxstring = &maxstring - -//type Uintreg uintreg - -/* func RunSchedLocalQueueTest() { _p_ := new(p) gs := make([]g, len(_p_.runq)) @@ -177,14 +145,26 @@ func RunSchedLocalQueueEmptyTest(iters int) { } } -var StringHash = stringHash -var BytesHash = bytesHash -var Int32Hash = int32Hash -var Int64Hash = int64Hash -var EfaceHash = efaceHash -var IfaceHash = ifaceHash -var MemclrBytes = memclrBytes -*/ +//var StringHash = stringHash +//var BytesHash = bytesHash +//var Int32Hash = int32Hash +//var Int64Hash = int64Hash +//var EfaceHash = efaceHash +//var IfaceHash = ifaceHash +//var MemclrBytes = memclrBytes + +var HashLoad = &hashLoad + +// entry point for testing +//func GostringW(w []uint16) (s string) { +// s = gostringw(&w[0]) +// return +//} + +//var Gostringnocopy = gostringnocopy +//var Maxstring = &maxstring + +//type Uintreg uintreg var Open = open var Close = closefd diff --git a/libgo/go/runtime/lock_futex.go b/libgo/go/runtime/lock_futex.go index 1ad7911..4d914b2 100644 --- a/libgo/go/runtime/lock_futex.go +++ b/libgo/go/runtime/lock_futex.go @@ -149,13 +149,9 @@ func notewakeup(n *note) { func notesleep(n *note) { gp := getg() - - // Currently OK to sleep in non-g0 for gccgo. It happens in - // stoptheworld because we have not implemented preemption. - // if gp != gp.m.g0 { - // throw("notesleep not on g0") - // } - + if gp != gp.m.g0 { + throw("notesleep not on g0") + } for atomic.Load(key32(&n.key)) == 0 { gp.m.blocked = true futexsleep(key32(&n.key), 0, -1) @@ -202,10 +198,13 @@ func notetsleep_internal(n *note, ns int64) bool { } func notetsleep(n *note, ns int64) bool { - gp := getg() - if gp != gp.m.g0 && gp.m.preemptoff != "" { - throw("notetsleep not on g0") - } + // Currently OK to sleep in non-g0 for gccgo. It happens in + // stoptheworld because our version of systemstack does not + // change to g0. + // gp := getg() + // if gp != gp.m.g0 && gp.m.preemptoff != "" { + // throw("notetsleep not on g0") + // } return notetsleep_internal(n, ns) } diff --git a/libgo/go/runtime/lock_sema.go b/libgo/go/runtime/lock_sema.go index eaf938a..5c70a74 100644 --- a/libgo/go/runtime/lock_sema.go +++ b/libgo/go/runtime/lock_sema.go @@ -162,13 +162,9 @@ func notewakeup(n *note) { func notesleep(n *note) { gp := getg() - - // Currently OK to sleep in non-g0 for gccgo. It happens in - // stoptheworld because we have not implemented preemption. - // if gp != gp.m.g0 { - // throw("notesleep not on g0") - // } - + if gp != gp.m.g0 { + throw("notesleep not on g0") + } semacreate(gp.m) if !atomic.Casuintptr(&n.key, 0, uintptr(unsafe.Pointer(gp.m))) { // Must be locked (got wakeup). @@ -257,7 +253,8 @@ func notetsleep(n *note, ns int64) bool { gp := getg() // Currently OK to sleep in non-g0 for gccgo. It happens in - // stoptheworld because we have not implemented preemption. + // stoptheworld because our version of systemstack does not + // change to g0. // if gp != gp.m.g0 && gp.m.preemptoff != "" { // throw("notetsleep not on g0") // } diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go index 78cc6ee..659b17d 100644 --- a/libgo/go/runtime/proc.go +++ b/libgo/go/runtime/proc.go @@ -11,15 +11,45 @@ import ( // Functions temporarily called by C code. //go:linkname newextram runtime.newextram +//go:linkname acquirep runtime.acquirep +//go:linkname releasep runtime.releasep +//go:linkname incidlelocked runtime.incidlelocked //go:linkname checkdead runtime.checkdead +//go:linkname sysmon runtime.sysmon //go:linkname schedtrace runtime.schedtrace //go:linkname allgadd runtime.allgadd +//go:linkname ready runtime.ready +//go:linkname gcprocs runtime.gcprocs +//go:linkname needaddgcproc runtime.needaddgcproc +//go:linkname stopm runtime.stopm +//go:linkname handoffp runtime.handoffp +//go:linkname wakep runtime.wakep +//go:linkname stoplockedm runtime.stoplockedm +//go:linkname schedule runtime.schedule +//go:linkname execute runtime.execute +//go:linkname procresize runtime.procresize +//go:linkname helpgc runtime.helpgc +//go:linkname stopTheWorldWithSema runtime.stopTheWorldWithSema +//go:linkname startTheWorldWithSema runtime.startTheWorldWithSema +//go:linkname mput runtime.mput +//go:linkname mget runtime.mget +//go:linkname globrunqput runtime.globrunqput +//go:linkname pidleget runtime.pidleget +//go:linkname runqempty runtime.runqempty +//go:linkname runqput runtime.runqput // Functions temporarily in C that have not yet been ported. func allocm(*p, bool, *unsafe.Pointer, *uintptr) *m func malg(bool, bool, *unsafe.Pointer, *uintptr) *g +func startm(*p, bool) +func newm(unsafe.Pointer, *p) +func gchelper() +func getfingwait() bool +func getfingwake() bool +func wakefing() *g // C functions for ucontext management. +func gogo(*g) func setGContext() func makeGContext(*g, unsafe.Pointer, uintptr) func getTraceback(me, gp *g) @@ -30,6 +60,12 @@ func getTraceback(me, gp *g) // it is closed, meaning cgocallbackg can reliably receive from it. var main_init_done chan bool +func goready(gp *g, traceskip int) { + systemstack(func() { + ready(gp, traceskip, true) + }) +} + var ( allgs []*g allglock mutex @@ -56,6 +92,117 @@ func allgadd(gp *g) { unlock(&allglock) } +func dumpgstatus(gp *g) { + _g_ := getg() + print("runtime: gp: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n") + print("runtime: g: g=", _g_, ", goid=", _g_.goid, ", g->atomicstatus=", readgstatus(_g_), "\n") +} + +// Mark gp ready to run. +func ready(gp *g, traceskip int, next bool) { + if trace.enabled { + traceGoUnpark(gp, traceskip) + } + + status := readgstatus(gp) + + // Mark runnable. + _g_ := getg() + _g_.m.locks++ // disable preemption because it can be holding p in a local var + if status&^_Gscan != _Gwaiting { + dumpgstatus(gp) + throw("bad g->status in ready") + } + + // status is Gwaiting or Gscanwaiting, make Grunnable and put on runq + casgstatus(gp, _Gwaiting, _Grunnable) + runqput(_g_.m.p.ptr(), gp, next) + if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 { // TODO: fast atomic + wakep() + } + _g_.m.locks-- +} + +func gcprocs() int32 { + // Figure out how many CPUs to use during GC. + // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc. + lock(&sched.lock) + n := gomaxprocs + if n > ncpu { + n = ncpu + } + if n > _MaxGcproc { + n = _MaxGcproc + } + if n > sched.nmidle+1 { // one M is currently running + n = sched.nmidle + 1 + } + unlock(&sched.lock) + return n +} + +func needaddgcproc() bool { + lock(&sched.lock) + n := gomaxprocs + if n > ncpu { + n = ncpu + } + if n > _MaxGcproc { + n = _MaxGcproc + } + n -= sched.nmidle + 1 // one M is currently running + unlock(&sched.lock) + return n > 0 +} + +func helpgc(nproc int32) { + _g_ := getg() + lock(&sched.lock) + pos := 0 + for n := int32(1); n < nproc; n++ { // one M is currently running + if allp[pos].mcache == _g_.m.mcache { + pos++ + } + mp := mget() + if mp == nil { + throw("gcprocs inconsistency") + } + mp.helpgc = n + mp.p.set(allp[pos]) + mp.mcache = allp[pos].mcache + pos++ + notewakeup(&mp.park) + } + unlock(&sched.lock) +} + +// freezeStopWait is a large value that freezetheworld sets +// sched.stopwait to in order to request that all Gs permanently stop. +const freezeStopWait = 0x7fffffff + +// Similar to stopTheWorld but best-effort and can be called several times. +// There is no reverse operation, used during crashing. +// This function must not lock any mutexes. +func freezetheworld() { + // stopwait and preemption requests can be lost + // due to races with concurrently executing threads, + // so try several times + for i := 0; i < 5; i++ { + // this should tell the scheduler to not start any new goroutines + sched.stopwait = freezeStopWait + atomic.Store(&sched.gcwaiting, 1) + // this should stop running goroutines + if !preemptall() { + break // no running goroutines + } + usleep(1000) + } + // to be sure + usleep(1000) + preemptall() + usleep(1000) +} + // All reads and writes of g's status go through readgstatus, casgstatus // castogscanstatus, casfrom_Gscanstatus. //go:nosplit @@ -123,6 +270,217 @@ func casgstatus(gp *g, oldval, newval uint32) { } } +// stopTheWorld stops all P's from executing goroutines, interrupting +// all goroutines at GC safe points and records reason as the reason +// for the stop. On return, only the current goroutine's P is running. +// stopTheWorld must not be called from a system stack and the caller +// must not hold worldsema. The caller must call startTheWorld when +// other P's should resume execution. +// +// stopTheWorld is safe for multiple goroutines to call at the +// same time. Each will execute its own stop, and the stops will +// be serialized. +// +// This is also used by routines that do stack dumps. If the system is +// in panic or being exited, this may not reliably stop all +// goroutines. +func stopTheWorld(reason string) { + semacquire(&worldsema, false) + getg().m.preemptoff = reason + systemstack(stopTheWorldWithSema) +} + +// startTheWorld undoes the effects of stopTheWorld. +func startTheWorld() { + systemstack(startTheWorldWithSema) + // worldsema must be held over startTheWorldWithSema to ensure + // gomaxprocs cannot change while worldsema is held. + semrelease(&worldsema) + getg().m.preemptoff = "" +} + +// Holding worldsema grants an M the right to try to stop the world +// and prevents gomaxprocs from changing concurrently. +var worldsema uint32 = 1 + +// stopTheWorldWithSema is the core implementation of stopTheWorld. +// The caller is responsible for acquiring worldsema and disabling +// preemption first and then should stopTheWorldWithSema on the system +// stack: +// +// semacquire(&worldsema, false) +// m.preemptoff = "reason" +// systemstack(stopTheWorldWithSema) +// +// When finished, the caller must either call startTheWorld or undo +// these three operations separately: +// +// m.preemptoff = "" +// systemstack(startTheWorldWithSema) +// semrelease(&worldsema) +// +// It is allowed to acquire worldsema once and then execute multiple +// startTheWorldWithSema/stopTheWorldWithSema pairs. +// Other P's are able to execute between successive calls to +// startTheWorldWithSema and stopTheWorldWithSema. +// Holding worldsema causes any other goroutines invoking +// stopTheWorld to block. +func stopTheWorldWithSema() { + _g_ := getg() + + // If we hold a lock, then we won't be able to stop another M + // that is blocked trying to acquire the lock. + if _g_.m.locks > 0 { + throw("stopTheWorld: holding locks") + } + + lock(&sched.lock) + sched.stopwait = gomaxprocs + atomic.Store(&sched.gcwaiting, 1) + preemptall() + // stop current P + _g_.m.p.ptr().status = _Pgcstop // Pgcstop is only diagnostic. + sched.stopwait-- + // try to retake all P's in Psyscall status + for i := 0; i < int(gomaxprocs); i++ { + p := allp[i] + s := p.status + if s == _Psyscall && atomic.Cas(&p.status, s, _Pgcstop) { + if trace.enabled { + traceGoSysBlock(p) + traceProcStop(p) + } + p.syscalltick++ + sched.stopwait-- + } + } + // stop idle P's + for { + p := pidleget() + if p == nil { + break + } + p.status = _Pgcstop + sched.stopwait-- + } + wait := sched.stopwait > 0 + unlock(&sched.lock) + + // wait for remaining P's to stop voluntarily + if wait { + for { + // wait for 100us, then try to re-preempt in case of any races + if notetsleep(&sched.stopnote, 100*1000) { + noteclear(&sched.stopnote) + break + } + preemptall() + } + } + if sched.stopwait != 0 { + throw("stopTheWorld: not stopped") + } + for i := 0; i < int(gomaxprocs); i++ { + p := allp[i] + if p.status != _Pgcstop { + throw("stopTheWorld: not stopped") + } + } +} + +func mhelpgc() { + _g_ := getg() + _g_.m.helpgc = -1 +} + +func startTheWorldWithSema() { + _g_ := getg() + + _g_.m.locks++ // disable preemption because it can be holding p in a local var + gp := netpoll(false) // non-blocking + injectglist(gp) + add := needaddgcproc() + lock(&sched.lock) + + procs := gomaxprocs + if newprocs != 0 { + procs = newprocs + newprocs = 0 + } + p1 := procresize(procs) + sched.gcwaiting = 0 + if sched.sysmonwait != 0 { + sched.sysmonwait = 0 + notewakeup(&sched.sysmonnote) + } + unlock(&sched.lock) + + for p1 != nil { + p := p1 + p1 = p1.link.ptr() + if p.m != 0 { + mp := p.m.ptr() + p.m = 0 + if mp.nextp != 0 { + throw("startTheWorld: inconsistent mp->nextp") + } + mp.nextp.set(p) + notewakeup(&mp.park) + } else { + // Start M to run P. Do not start another M below. + newm(nil, p) + add = false + } + } + + // Wakeup an additional proc in case we have excessive runnable goroutines + // in local queues or in the global queue. If we don't, the proc will park itself. + // If we have lots of excessive work, resetspinning will unpark additional procs as necessary. + if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 { + wakep() + } + + if add { + // If GC could have used another helper proc, start one now, + // in the hope that it will be available next time. + // It would have been even better to start it before the collection, + // but doing so requires allocating memory, so it's tricky to + // coordinate. This lazy approach works out in practice: + // we don't mind if the first couple gc rounds don't have quite + // the maximum number of procs. + newm(unsafe.Pointer(funcPC(mhelpgc)), nil) + } + _g_.m.locks-- +} + +// runSafePointFn runs the safe point function, if any, for this P. +// This should be called like +// +// if getg().m.p.runSafePointFn != 0 { +// runSafePointFn() +// } +// +// runSafePointFn must be checked on any transition in to _Pidle or +// _Psyscall to avoid a race where forEachP sees that the P is running +// just before the P goes into _Pidle/_Psyscall and neither forEachP +// nor the P run the safe-point function. +func runSafePointFn() { + p := getg().m.p.ptr() + // Resolve the race between forEachP running the safe-point + // function on this P's behalf and this P running the + // safe-point function directly. + if !atomic.Cas(&p.runSafePointFn, 1, 0) { + return + } + sched.safePointFn(p) + lock(&sched.lock) + sched.safePointWait-- + if sched.safePointWait == 0 { + notewakeup(&sched.safePointNote) + } + unlock(&sched.lock) +} + // needm is called when a cgo callback happens on a // thread without an m (a thread not created by Go). // In this case, needm is expected to find an m to use @@ -245,9 +603,6 @@ func oneNewExtraM() { mp.lockedg = gp gp.lockedm = mp gp.goid = int64(atomic.Xadd64(&sched.goidgen, 1)) - if raceenabled { - gp.racectx = racegostart(funcPC(newextram)) - } // put on allg for garbage collector allgadd(gp) @@ -365,6 +720,744 @@ func unlockextra(mp *m) { atomic.Storeuintptr(&extram, uintptr(unsafe.Pointer(mp))) } +// Stops execution of the current m until new work is available. +// Returns with acquired P. +func stopm() { + _g_ := getg() + + if _g_.m.locks != 0 { + throw("stopm holding locks") + } + if _g_.m.p != 0 { + throw("stopm holding p") + } + if _g_.m.spinning { + throw("stopm spinning") + } + +retry: + lock(&sched.lock) + mput(_g_.m) + unlock(&sched.lock) + notesleep(&_g_.m.park) + noteclear(&_g_.m.park) + if _g_.m.helpgc != 0 { + gchelper() + _g_.m.helpgc = 0 + _g_.m.mcache = nil + _g_.m.p = 0 + goto retry + } + acquirep(_g_.m.nextp.ptr()) + _g_.m.nextp = 0 +} + +// Hands off P from syscall or locked M. +// Always runs without a P, so write barriers are not allowed. +//go:nowritebarrier +func handoffp(_p_ *p) { + // handoffp must start an M in any situation where + // findrunnable would return a G to run on _p_. + + // if it has local work, start it straight away + if !runqempty(_p_) || sched.runqsize != 0 { + startm(_p_, false) + return + } + // if it has GC work, start it straight away + if gcBlackenEnabled != 0 && gcMarkWorkAvailable(_p_) { + startm(_p_, false) + return + } + // no local work, check that there are no spinning/idle M's, + // otherwise our help is not required + if atomic.Load(&sched.nmspinning)+atomic.Load(&sched.npidle) == 0 && atomic.Cas(&sched.nmspinning, 0, 1) { // TODO: fast atomic + startm(_p_, true) + return + } + lock(&sched.lock) + if sched.gcwaiting != 0 { + _p_.status = _Pgcstop + sched.stopwait-- + if sched.stopwait == 0 { + notewakeup(&sched.stopnote) + } + unlock(&sched.lock) + return + } + if _p_.runSafePointFn != 0 && atomic.Cas(&_p_.runSafePointFn, 1, 0) { + sched.safePointFn(_p_) + sched.safePointWait-- + if sched.safePointWait == 0 { + notewakeup(&sched.safePointNote) + } + } + if sched.runqsize != 0 { + unlock(&sched.lock) + startm(_p_, false) + return + } + // If this is the last running P and nobody is polling network, + // need to wakeup another M to poll network. + if sched.npidle == uint32(gomaxprocs-1) && atomic.Load64(&sched.lastpoll) != 0 { + unlock(&sched.lock) + startm(_p_, false) + return + } + pidleput(_p_) + unlock(&sched.lock) +} + +// Tries to add one more P to execute G's. +// Called when a G is made runnable (newproc, ready). +func wakep() { + // be conservative about spinning threads + if !atomic.Cas(&sched.nmspinning, 0, 1) { + return + } + startm(nil, true) +} + +// Stops execution of the current m that is locked to a g until the g is runnable again. +// Returns with acquired P. +func stoplockedm() { + _g_ := getg() + + if _g_.m.lockedg == nil || _g_.m.lockedg.lockedm != _g_.m { + throw("stoplockedm: inconsistent locking") + } + if _g_.m.p != 0 { + // Schedule another M to run this p. + _p_ := releasep() + handoffp(_p_) + } + incidlelocked(1) + // Wait until another thread schedules lockedg again. + notesleep(&_g_.m.park) + noteclear(&_g_.m.park) + status := readgstatus(_g_.m.lockedg) + if status&^_Gscan != _Grunnable { + print("runtime:stoplockedm: g is not Grunnable or Gscanrunnable\n") + dumpgstatus(_g_) + throw("stoplockedm: not runnable") + } + acquirep(_g_.m.nextp.ptr()) + _g_.m.nextp = 0 +} + +// Schedules the locked m to run the locked gp. +// May run during STW, so write barriers are not allowed. +//go:nowritebarrier +func startlockedm(gp *g) { + _g_ := getg() + + mp := gp.lockedm + if mp == _g_.m { + throw("startlockedm: locked to me") + } + if mp.nextp != 0 { + throw("startlockedm: m has p") + } + // directly handoff current P to the locked m + incidlelocked(-1) + _p_ := releasep() + mp.nextp.set(_p_) + notewakeup(&mp.park) + stopm() +} + +// Stops the current m for stopTheWorld. +// Returns when the world is restarted. +func gcstopm() { + _g_ := getg() + + if sched.gcwaiting == 0 { + throw("gcstopm: not waiting for gc") + } + if _g_.m.spinning { + _g_.m.spinning = false + // OK to just drop nmspinning here, + // startTheWorld will unpark threads as necessary. + if int32(atomic.Xadd(&sched.nmspinning, -1)) < 0 { + throw("gcstopm: negative nmspinning") + } + } + _p_ := releasep() + lock(&sched.lock) + _p_.status = _Pgcstop + sched.stopwait-- + if sched.stopwait == 0 { + notewakeup(&sched.stopnote) + } + unlock(&sched.lock) + stopm() +} + +// Schedules gp to run on the current M. +// If inheritTime is true, gp inherits the remaining time in the +// current time slice. Otherwise, it starts a new time slice. +// Never returns. +func execute(gp *g, inheritTime bool) { + _g_ := getg() + + casgstatus(gp, _Grunnable, _Grunning) + gp.waitsince = 0 + gp.preempt = false + if !inheritTime { + _g_.m.p.ptr().schedtick++ + } + _g_.m.curg = gp + gp.m = _g_.m + + // Check whether the profiler needs to be turned on or off. + hz := sched.profilehz + if _g_.m.profilehz != hz { + resetcpuprofiler(hz) + } + + if trace.enabled { + // GoSysExit has to happen when we have a P, but before GoStart. + // So we emit it here. + if gp.syscallsp != 0 && gp.sysblocktraced { + traceGoSysExit(gp.sysexitticks) + } + traceGoStart() + } + + gogo(gp) +} + +// Finds a runnable goroutine to execute. +// Tries to steal from other P's, get g from global queue, poll network. +func findrunnable() (gp *g, inheritTime bool) { + _g_ := getg() + + // The conditions here and in handoffp must agree: if + // findrunnable would return a G to run, handoffp must start + // an M. + +top: + _p_ := _g_.m.p.ptr() + if sched.gcwaiting != 0 { + gcstopm() + goto top + } + if _p_.runSafePointFn != 0 { + runSafePointFn() + } + if getfingwait() && getfingwake() { + if gp := wakefing(); gp != nil { + ready(gp, 0, true) + } + } + + // local runq + if gp, inheritTime := runqget(_p_); gp != nil { + return gp, inheritTime + } + + // global runq + if sched.runqsize != 0 { + lock(&sched.lock) + gp := globrunqget(_p_, 0) + unlock(&sched.lock) + if gp != nil { + return gp, false + } + } + + // Poll network. + // This netpoll is only an optimization before we resort to stealing. + // We can safely skip it if there a thread blocked in netpoll already. + // If there is any kind of logical race with that blocked thread + // (e.g. it has already returned from netpoll, but does not set lastpoll yet), + // this thread will do blocking netpoll below anyway. + if netpollinited() && sched.lastpoll != 0 { + if gp := netpoll(false); gp != nil { // non-blocking + // netpoll returns list of goroutines linked by schedlink. + injectglist(gp.schedlink.ptr()) + casgstatus(gp, _Gwaiting, _Grunnable) + if trace.enabled { + traceGoUnpark(gp, 0) + } + return gp, false + } + } + + // Steal work from other P's. + procs := uint32(gomaxprocs) + if atomic.Load(&sched.npidle) == procs-1 { + // Either GOMAXPROCS=1 or everybody, except for us, is idle already. + // New work can appear from returning syscall/cgocall, network or timers. + // Neither of that submits to local run queues, so no point in stealing. + goto stop + } + // If number of spinning M's >= number of busy P's, block. + // This is necessary to prevent excessive CPU consumption + // when GOMAXPROCS>>1 but the program parallelism is low. + if !_g_.m.spinning && 2*atomic.Load(&sched.nmspinning) >= procs-atomic.Load(&sched.npidle) { // TODO: fast atomic + goto stop + } + if !_g_.m.spinning { + _g_.m.spinning = true + atomic.Xadd(&sched.nmspinning, 1) + } + for i := 0; i < 4; i++ { + for enum := stealOrder.start(fastrand1()); !enum.done(); enum.next() { + if sched.gcwaiting != 0 { + goto top + } + stealRunNextG := i > 2 // first look for ready queues with more than 1 g + if gp := runqsteal(_p_, allp[enum.position()], stealRunNextG); gp != nil { + return gp, false + } + } + } + +stop: + + // We have nothing to do. If we're in the GC mark phase, can + // safely scan and blacken objects, and have work to do, run + // idle-time marking rather than give up the P. + if gcBlackenEnabled != 0 && _p_.gcBgMarkWorker != 0 && gcMarkWorkAvailable(_p_) { + _p_.gcMarkWorkerMode = gcMarkWorkerIdleMode + gp := _p_.gcBgMarkWorker.ptr() + casgstatus(gp, _Gwaiting, _Grunnable) + if trace.enabled { + traceGoUnpark(gp, 0) + } + return gp, false + } + + // return P and block + lock(&sched.lock) + if sched.gcwaiting != 0 || _p_.runSafePointFn != 0 { + unlock(&sched.lock) + goto top + } + if sched.runqsize != 0 { + gp := globrunqget(_p_, 0) + unlock(&sched.lock) + return gp, false + } + if releasep() != _p_ { + throw("findrunnable: wrong p") + } + pidleput(_p_) + unlock(&sched.lock) + + // Delicate dance: thread transitions from spinning to non-spinning state, + // potentially concurrently with submission of new goroutines. We must + // drop nmspinning first and then check all per-P queues again (with + // #StoreLoad memory barrier in between). If we do it the other way around, + // another thread can submit a goroutine after we've checked all run queues + // but before we drop nmspinning; as the result nobody will unpark a thread + // to run the goroutine. + // If we discover new work below, we need to restore m.spinning as a signal + // for resetspinning to unpark a new worker thread (because there can be more + // than one starving goroutine). However, if after discovering new work + // we also observe no idle Ps, it is OK to just park the current thread: + // the system is fully loaded so no spinning threads are required. + // Also see "Worker thread parking/unparking" comment at the top of the file. + wasSpinning := _g_.m.spinning + if _g_.m.spinning { + _g_.m.spinning = false + if int32(atomic.Xadd(&sched.nmspinning, -1)) < 0 { + throw("findrunnable: negative nmspinning") + } + } + + // check all runqueues once again + for i := 0; i < int(gomaxprocs); i++ { + _p_ := allp[i] + if _p_ != nil && !runqempty(_p_) { + lock(&sched.lock) + _p_ = pidleget() + unlock(&sched.lock) + if _p_ != nil { + acquirep(_p_) + if wasSpinning { + _g_.m.spinning = true + atomic.Xadd(&sched.nmspinning, 1) + } + goto top + } + break + } + } + + // poll network + if netpollinited() && atomic.Xchg64(&sched.lastpoll, 0) != 0 { + if _g_.m.p != 0 { + throw("findrunnable: netpoll with p") + } + if _g_.m.spinning { + throw("findrunnable: netpoll with spinning") + } + gp := netpoll(true) // block until new work is available + atomic.Store64(&sched.lastpoll, uint64(nanotime())) + if gp != nil { + lock(&sched.lock) + _p_ = pidleget() + unlock(&sched.lock) + if _p_ != nil { + acquirep(_p_) + injectglist(gp.schedlink.ptr()) + casgstatus(gp, _Gwaiting, _Grunnable) + if trace.enabled { + traceGoUnpark(gp, 0) + } + return gp, false + } + injectglist(gp) + } + } + stopm() + goto top +} + +func resetspinning() { + _g_ := getg() + if !_g_.m.spinning { + throw("resetspinning: not a spinning m") + } + _g_.m.spinning = false + nmspinning := atomic.Xadd(&sched.nmspinning, -1) + if int32(nmspinning) < 0 { + throw("findrunnable: negative nmspinning") + } + // M wakeup policy is deliberately somewhat conservative, so check if we + // need to wakeup another P here. See "Worker thread parking/unparking" + // comment at the top of the file for details. + if nmspinning == 0 && atomic.Load(&sched.npidle) > 0 { + wakep() + } +} + +// Injects the list of runnable G's into the scheduler. +// Can run concurrently with GC. +func injectglist(glist *g) { + if glist == nil { + return + } + if trace.enabled { + for gp := glist; gp != nil; gp = gp.schedlink.ptr() { + traceGoUnpark(gp, 0) + } + } + lock(&sched.lock) + var n int + for n = 0; glist != nil; n++ { + gp := glist + glist = gp.schedlink.ptr() + casgstatus(gp, _Gwaiting, _Grunnable) + globrunqput(gp) + } + unlock(&sched.lock) + for ; n != 0 && sched.npidle != 0; n-- { + startm(nil, false) + } +} + +// One round of scheduler: find a runnable goroutine and execute it. +// Never returns. +func schedule() { + _g_ := getg() + + if _g_.m.locks != 0 { + throw("schedule: holding locks") + } + + if _g_.m.lockedg != nil { + stoplockedm() + execute(_g_.m.lockedg, false) // Never returns. + } + +top: + if sched.gcwaiting != 0 { + gcstopm() + goto top + } + if _g_.m.p.ptr().runSafePointFn != 0 { + runSafePointFn() + } + + var gp *g + var inheritTime bool + if trace.enabled || trace.shutdown { + gp = traceReader() + if gp != nil { + casgstatus(gp, _Gwaiting, _Grunnable) + traceGoUnpark(gp, 0) + } + } + if gp == nil && gcBlackenEnabled != 0 { + gp = gcController.findRunnableGCWorker(_g_.m.p.ptr()) + } + if gp == nil { + // Check the global runnable queue once in a while to ensure fairness. + // Otherwise two goroutines can completely occupy the local runqueue + // by constantly respawning each other. + if _g_.m.p.ptr().schedtick%61 == 0 && sched.runqsize > 0 { + lock(&sched.lock) + gp = globrunqget(_g_.m.p.ptr(), 1) + unlock(&sched.lock) + } + } + if gp == nil { + gp, inheritTime = runqget(_g_.m.p.ptr()) + if gp != nil && _g_.m.spinning { + throw("schedule: spinning with local work") + } + + // Because gccgo does not implement preemption as a stack check, + // we need to check for preemption here for fairness. + // Otherwise goroutines on the local queue may starve + // goroutines on the global queue. + // Since we preempt by storing the goroutine on the global + // queue, this is the only place we need to check preempt. + if gp != nil && gp.preempt { + gp.preempt = false + lock(&sched.lock) + globrunqput(gp) + unlock(&sched.lock) + goto top + } + } + if gp == nil { + gp, inheritTime = findrunnable() // blocks until work is available + } + + // This thread is going to run a goroutine and is not spinning anymore, + // so if it was marked as spinning we need to reset it now and potentially + // start a new spinning M. + if _g_.m.spinning { + resetspinning() + } + + if gp.lockedm != nil { + // Hands off own p to the locked m, + // then blocks waiting for a new p. + startlockedm(gp) + goto top + } + + execute(gp, inheritTime) +} + +// Purge all cached G's from gfree list to the global list. +func gfpurge(_p_ *p) { + lock(&sched.gflock) + for _p_.gfreecnt != 0 { + _p_.gfreecnt-- + gp := _p_.gfree + _p_.gfree = gp.schedlink.ptr() + gp.schedlink.set(sched.gfree) + sched.gfree = gp + sched.ngfree++ + } + unlock(&sched.gflock) +} + +// Change number of processors. The world is stopped, sched is locked. +// gcworkbufs are not being modified by either the GC or +// the write barrier code. +// Returns list of Ps with local work, they need to be scheduled by the caller. +func procresize(nprocs int32) *p { + old := gomaxprocs + if old < 0 || old > _MaxGomaxprocs || nprocs <= 0 || nprocs > _MaxGomaxprocs { + throw("procresize: invalid arg") + } + if trace.enabled { + traceGomaxprocs(nprocs) + } + + // update statistics + now := nanotime() + if sched.procresizetime != 0 { + sched.totaltime += int64(old) * (now - sched.procresizetime) + } + sched.procresizetime = now + + // initialize new P's + for i := int32(0); i < nprocs; i++ { + pp := allp[i] + if pp == nil { + pp = new(p) + pp.id = i + pp.status = _Pgcstop + pp.sudogcache = pp.sudogbuf[:0] + pp.deferpool = pp.deferpoolbuf[:0] + atomicstorep(unsafe.Pointer(&allp[i]), unsafe.Pointer(pp)) + } + if pp.mcache == nil { + if old == 0 && i == 0 { + if getg().m.mcache == nil { + throw("missing mcache?") + } + pp.mcache = getg().m.mcache // bootstrap + } else { + pp.mcache = allocmcache() + } + } + } + + // free unused P's + for i := nprocs; i < old; i++ { + p := allp[i] + if trace.enabled { + if p == getg().m.p.ptr() { + // moving to p[0], pretend that we were descheduled + // and then scheduled again to keep the trace sane. + traceGoSched() + traceProcStop(p) + } + } + // move all runnable goroutines to the global queue + for p.runqhead != p.runqtail { + // pop from tail of local queue + p.runqtail-- + gp := p.runq[p.runqtail%uint32(len(p.runq))].ptr() + // push onto head of global queue + globrunqputhead(gp) + } + if p.runnext != 0 { + globrunqputhead(p.runnext.ptr()) + p.runnext = 0 + } + // if there's a background worker, make it runnable and put + // it on the global queue so it can clean itself up + if gp := p.gcBgMarkWorker.ptr(); gp != nil { + casgstatus(gp, _Gwaiting, _Grunnable) + if trace.enabled { + traceGoUnpark(gp, 0) + } + globrunqput(gp) + // This assignment doesn't race because the + // world is stopped. + p.gcBgMarkWorker.set(nil) + } + for i := range p.sudogbuf { + p.sudogbuf[i] = nil + } + p.sudogcache = p.sudogbuf[:0] + for i := range p.deferpoolbuf { + p.deferpoolbuf[i] = nil + } + p.deferpool = p.deferpoolbuf[:0] + freemcache(p.mcache) + p.mcache = nil + gfpurge(p) + traceProcFree(p) + p.status = _Pdead + // can't free P itself because it can be referenced by an M in syscall + } + + _g_ := getg() + if _g_.m.p != 0 && _g_.m.p.ptr().id < nprocs { + // continue to use the current P + _g_.m.p.ptr().status = _Prunning + } else { + // release the current P and acquire allp[0] + if _g_.m.p != 0 { + _g_.m.p.ptr().m = 0 + } + _g_.m.p = 0 + _g_.m.mcache = nil + p := allp[0] + p.m = 0 + p.status = _Pidle + acquirep(p) + if trace.enabled { + traceGoStart() + } + } + var runnablePs *p + for i := nprocs - 1; i >= 0; i-- { + p := allp[i] + if _g_.m.p.ptr() == p { + continue + } + p.status = _Pidle + if runqempty(p) { + pidleput(p) + } else { + p.m.set(mget()) + p.link.set(runnablePs) + runnablePs = p + } + } + stealOrder.reset(uint32(nprocs)) + var int32p *int32 = &gomaxprocs // make compiler check that gomaxprocs is an int32 + atomic.Store((*uint32)(unsafe.Pointer(int32p)), uint32(nprocs)) + return runnablePs +} + +// Associate p and the current m. +func acquirep(_p_ *p) { + acquirep1(_p_) + + // have p; write barriers now allowed + _g_ := getg() + _g_.m.mcache = _p_.mcache + + if trace.enabled { + traceProcStart() + } +} + +// May run during STW, so write barriers are not allowed. +//go:nowritebarrier +func acquirep1(_p_ *p) { + _g_ := getg() + + if _g_.m.p != 0 || _g_.m.mcache != nil { + throw("acquirep: already in go") + } + if _p_.m != 0 || _p_.status != _Pidle { + id := int32(0) + if _p_.m != 0 { + id = _p_.m.ptr().id + } + print("acquirep: p->m=", _p_.m, "(", id, ") p->status=", _p_.status, "\n") + throw("acquirep: invalid p state") + } + _g_.m.p.set(_p_) + _p_.m.set(_g_.m) + _p_.status = _Prunning +} + +// Disassociate p and the current m. +func releasep() *p { + _g_ := getg() + + if _g_.m.p == 0 || _g_.m.mcache == nil { + throw("releasep: invalid arg") + } + _p_ := _g_.m.p.ptr() + if _p_.m.ptr() != _g_.m || _p_.mcache != _g_.m.mcache || _p_.status != _Prunning { + print("releasep: m=", _g_.m, " m->p=", _g_.m.p.ptr(), " p->m=", _p_.m, " m->mcache=", _g_.m.mcache, " p->mcache=", _p_.mcache, " p->status=", _p_.status, "\n") + throw("releasep: invalid p state") + } + if trace.enabled { + traceProcStop(_g_.m.p.ptr()) + } + _g_.m.p = 0 + _g_.m.mcache = nil + _p_.m = 0 + _p_.status = _Pidle + return _p_ +} + +func incidlelocked(v int32) { + lock(&sched.lock) + sched.nmidlelocked += v + if v > 0 { + checkdead() + } + unlock(&sched.lock) +} + // Check for deadlock situation. // The check is based on number of running M's, if 0 -> deadlock. func checkdead() { @@ -443,6 +1536,236 @@ func checkdead() { throw("all goroutines are asleep - deadlock!") } +// forcegcperiod is the maximum time in nanoseconds between garbage +// collections. If we go this long without a garbage collection, one +// is forced to run. +// +// This is a variable for testing purposes. It normally doesn't change. +var forcegcperiod int64 = 2 * 60 * 1e9 + +// Always runs without a P, so write barriers are not allowed. +// +//go:nowritebarrierrec +func sysmon() { + // If a heap span goes unused for 5 minutes after a garbage collection, + // we hand it back to the operating system. + scavengelimit := int64(5 * 60 * 1e9) + + if debug.scavenge > 0 { + // Scavenge-a-lot for testing. + forcegcperiod = 10 * 1e6 + scavengelimit = 20 * 1e6 + } + + lastscavenge := nanotime() + nscavenge := 0 + + lasttrace := int64(0) + idle := 0 // how many cycles in succession we had not wokeup somebody + delay := uint32(0) + for { + if idle == 0 { // start with 20us sleep... + delay = 20 + } else if idle > 50 { // start doubling the sleep after 1ms... + delay *= 2 + } + if delay > 10*1000 { // up to 10ms + delay = 10 * 1000 + } + usleep(delay) + if debug.schedtrace <= 0 && (sched.gcwaiting != 0 || atomic.Load(&sched.npidle) == uint32(gomaxprocs)) { // TODO: fast atomic + lock(&sched.lock) + if atomic.Load(&sched.gcwaiting) != 0 || atomic.Load(&sched.npidle) == uint32(gomaxprocs) { + atomic.Store(&sched.sysmonwait, 1) + unlock(&sched.lock) + // Make wake-up period small enough + // for the sampling to be correct. + maxsleep := forcegcperiod / 2 + if scavengelimit < forcegcperiod { + maxsleep = scavengelimit / 2 + } + notetsleep(&sched.sysmonnote, maxsleep) + lock(&sched.lock) + atomic.Store(&sched.sysmonwait, 0) + noteclear(&sched.sysmonnote) + idle = 0 + delay = 20 + } + unlock(&sched.lock) + } + // poll network if not polled for more than 10ms + lastpoll := int64(atomic.Load64(&sched.lastpoll)) + now := nanotime() + unixnow := unixnanotime() + if lastpoll != 0 && lastpoll+10*1000*1000 < now { + atomic.Cas64(&sched.lastpoll, uint64(lastpoll), uint64(now)) + gp := netpoll(false) // non-blocking - returns list of goroutines + if gp != nil { + // Need to decrement number of idle locked M's + // (pretending that one more is running) before injectglist. + // Otherwise it can lead to the following situation: + // injectglist grabs all P's but before it starts M's to run the P's, + // another M returns from syscall, finishes running its G, + // observes that there is no work to do and no other running M's + // and reports deadlock. + incidlelocked(-1) + injectglist(gp) + incidlelocked(1) + } + } + // retake P's blocked in syscalls + // and preempt long running G's + if retake(now) != 0 { + idle = 0 + } else { + idle++ + } + // check if we need to force a GC + lastgc := int64(atomic.Load64(&memstats.last_gc)) + if gcphase == _GCoff && lastgc != 0 && unixnow-lastgc > forcegcperiod && atomic.Load(&forcegc.idle) != 0 { + lock(&forcegc.lock) + forcegc.idle = 0 + forcegc.g.schedlink = 0 + injectglist(forcegc.g) + unlock(&forcegc.lock) + } + // scavenge heap once in a while + if lastscavenge+scavengelimit/2 < now { + mheap_.scavenge(int32(nscavenge), uint64(now), uint64(scavengelimit)) + lastscavenge = now + nscavenge++ + } + if debug.schedtrace > 0 && lasttrace+int64(debug.schedtrace)*1000000 <= now { + lasttrace = now + schedtrace(debug.scheddetail > 0) + } + } +} + +var pdesc [_MaxGomaxprocs]struct { + schedtick uint32 + schedwhen int64 + syscalltick uint32 + syscallwhen int64 +} + +// forcePreemptNS is the time slice given to a G before it is +// preempted. +const forcePreemptNS = 10 * 1000 * 1000 // 10ms + +func retake(now int64) uint32 { + n := 0 + for i := int32(0); i < gomaxprocs; i++ { + _p_ := allp[i] + if _p_ == nil { + continue + } + pd := &pdesc[i] + s := _p_.status + if s == _Psyscall { + // Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us). + t := int64(_p_.syscalltick) + if int64(pd.syscalltick) != t { + pd.syscalltick = uint32(t) + pd.syscallwhen = now + continue + } + // On the one hand we don't want to retake Ps if there is no other work to do, + // but on the other hand we want to retake them eventually + // because they can prevent the sysmon thread from deep sleep. + if runqempty(_p_) && atomic.Load(&sched.nmspinning)+atomic.Load(&sched.npidle) > 0 && pd.syscallwhen+10*1000*1000 > now { + continue + } + // Need to decrement number of idle locked M's + // (pretending that one more is running) before the CAS. + // Otherwise the M from which we retake can exit the syscall, + // increment nmidle and report deadlock. + incidlelocked(-1) + if atomic.Cas(&_p_.status, s, _Pidle) { + if trace.enabled { + traceGoSysBlock(_p_) + traceProcStop(_p_) + } + n++ + _p_.syscalltick++ + handoffp(_p_) + } + incidlelocked(1) + } else if s == _Prunning { + // Preempt G if it's running for too long. + t := int64(_p_.schedtick) + if int64(pd.schedtick) != t { + pd.schedtick = uint32(t) + pd.schedwhen = now + continue + } + if pd.schedwhen+forcePreemptNS > now { + continue + } + preemptone(_p_) + } + } + return uint32(n) +} + +// Tell all goroutines that they have been preempted and they should stop. +// This function is purely best-effort. It can fail to inform a goroutine if a +// processor just started running it. +// No locks need to be held. +// Returns true if preemption request was issued to at least one goroutine. +func preemptall() bool { + res := false + for i := int32(0); i < gomaxprocs; i++ { + _p_ := allp[i] + if _p_ == nil || _p_.status != _Prunning { + continue + } + if preemptone(_p_) { + res = true + } + } + return res +} + +// Tell the goroutine running on processor P to stop. +// This function is purely best-effort. It can incorrectly fail to inform the +// goroutine. It can send inform the wrong goroutine. Even if it informs the +// correct goroutine, that goroutine might ignore the request if it is +// simultaneously executing newstack. +// No lock needs to be held. +// Returns true if preemption request was issued. +// The actual preemption will happen at some point in the future +// and will be indicated by the gp->status no longer being +// Grunning +func preemptone(_p_ *p) bool { + mp := _p_.m.ptr() + if mp == nil || mp == getg().m { + return false + } + gp := mp.curg + if gp == nil || gp == mp.g0 { + return false + } + + gp.preempt = true + + // At this point the gc implementation sets gp.stackguard0 to + // a value that causes the goroutine to suspend itself. + // gccgo has no support for this, and it's hard to support. + // The split stack code reads a value from its TCB. + // We have no way to set a value in the TCB of a different thread. + // And, of course, not all systems support split stack anyhow. + // Checking the field in the g is expensive, since it requires + // loading the g from TLS. The best mechanism is likely to be + // setting a global variable and figuring out a way to efficiently + // check that global variable. + // + // For now we check gp.preempt in schedule and mallocgc, + // which is at least better than doing nothing at all. + + return true +} + var starttime int64 func schedtrace(detailed bool) { @@ -451,8 +1774,6 @@ func schedtrace(detailed bool) { starttime = now } - gomaxprocs := int32(GOMAXPROCS(0)) - lock(&sched.lock) print("SCHED ", (now-starttime)/1e6, "ms: gomaxprocs=", gomaxprocs, " idleprocs=", sched.npidle, " threads=", sched.mcount, " spinningthreads=", sched.nmspinning, " idlethreads=", sched.nmidle, " runqueue=", sched.runqsize) if detailed { @@ -531,3 +1852,416 @@ func schedtrace(detailed bool) { unlock(&allglock) unlock(&sched.lock) } + +// Put mp on midle list. +// Sched must be locked. +// May run during STW, so write barriers are not allowed. +//go:nowritebarrier +func mput(mp *m) { + mp.schedlink = sched.midle + sched.midle.set(mp) + sched.nmidle++ + checkdead() +} + +// Try to get an m from midle list. +// Sched must be locked. +// May run during STW, so write barriers are not allowed. +//go:nowritebarrier +func mget() *m { + mp := sched.midle.ptr() + if mp != nil { + sched.midle = mp.schedlink + sched.nmidle-- + } + return mp +} + +// Put gp on the global runnable queue. +// Sched must be locked. +// May run during STW, so write barriers are not allowed. +//go:nowritebarrier +func globrunqput(gp *g) { + gp.schedlink = 0 + if sched.runqtail != 0 { + sched.runqtail.ptr().schedlink.set(gp) + } else { + sched.runqhead.set(gp) + } + sched.runqtail.set(gp) + sched.runqsize++ +} + +// Put gp at the head of the global runnable queue. +// Sched must be locked. +// May run during STW, so write barriers are not allowed. +//go:nowritebarrier +func globrunqputhead(gp *g) { + gp.schedlink = sched.runqhead + sched.runqhead.set(gp) + if sched.runqtail == 0 { + sched.runqtail.set(gp) + } + sched.runqsize++ +} + +// Put a batch of runnable goroutines on the global runnable queue. +// Sched must be locked. +func globrunqputbatch(ghead *g, gtail *g, n int32) { + gtail.schedlink = 0 + if sched.runqtail != 0 { + sched.runqtail.ptr().schedlink.set(ghead) + } else { + sched.runqhead.set(ghead) + } + sched.runqtail.set(gtail) + sched.runqsize += n +} + +// Try get a batch of G's from the global runnable queue. +// Sched must be locked. +func globrunqget(_p_ *p, max int32) *g { + if sched.runqsize == 0 { + return nil + } + + n := sched.runqsize/gomaxprocs + 1 + if n > sched.runqsize { + n = sched.runqsize + } + if max > 0 && n > max { + n = max + } + if n > int32(len(_p_.runq))/2 { + n = int32(len(_p_.runq)) / 2 + } + + sched.runqsize -= n + if sched.runqsize == 0 { + sched.runqtail = 0 + } + + gp := sched.runqhead.ptr() + sched.runqhead = gp.schedlink + n-- + for ; n > 0; n-- { + gp1 := sched.runqhead.ptr() + sched.runqhead = gp1.schedlink + runqput(_p_, gp1, false) + } + return gp +} + +// Put p to on _Pidle list. +// Sched must be locked. +// May run during STW, so write barriers are not allowed. +//go:nowritebarrier +func pidleput(_p_ *p) { + if !runqempty(_p_) { + throw("pidleput: P has non-empty run queue") + } + _p_.link = sched.pidle + sched.pidle.set(_p_) + atomic.Xadd(&sched.npidle, 1) // TODO: fast atomic +} + +// Try get a p from _Pidle list. +// Sched must be locked. +// May run during STW, so write barriers are not allowed. +//go:nowritebarrier +func pidleget() *p { + _p_ := sched.pidle.ptr() + if _p_ != nil { + sched.pidle = _p_.link + atomic.Xadd(&sched.npidle, -1) // TODO: fast atomic + } + return _p_ +} + +// runqempty returns true if _p_ has no Gs on its local run queue. +// It never returns true spuriously. +func runqempty(_p_ *p) bool { + // Defend against a race where 1) _p_ has G1 in runqnext but runqhead == runqtail, + // 2) runqput on _p_ kicks G1 to the runq, 3) runqget on _p_ empties runqnext. + // Simply observing that runqhead == runqtail and then observing that runqnext == nil + // does not mean the queue is empty. + for { + head := atomic.Load(&_p_.runqhead) + tail := atomic.Load(&_p_.runqtail) + runnext := atomic.Loaduintptr((*uintptr)(unsafe.Pointer(&_p_.runnext))) + if tail == atomic.Load(&_p_.runqtail) { + return head == tail && runnext == 0 + } + } +} + +// To shake out latent assumptions about scheduling order, +// we introduce some randomness into scheduling decisions +// when running with the race detector. +// The need for this was made obvious by changing the +// (deterministic) scheduling order in Go 1.5 and breaking +// many poorly-written tests. +// With the randomness here, as long as the tests pass +// consistently with -race, they shouldn't have latent scheduling +// assumptions. +const randomizeScheduler = raceenabled + +// runqput tries to put g on the local runnable queue. +// If next if false, runqput adds g to the tail of the runnable queue. +// If next is true, runqput puts g in the _p_.runnext slot. +// If the run queue is full, runnext puts g on the global queue. +// Executed only by the owner P. +func runqput(_p_ *p, gp *g, next bool) { + if randomizeScheduler && next && fastrand1()%2 == 0 { + next = false + } + + if next { + retryNext: + oldnext := _p_.runnext + if !_p_.runnext.cas(oldnext, guintptr(unsafe.Pointer(gp))) { + goto retryNext + } + if oldnext == 0 { + return + } + // Kick the old runnext out to the regular run queue. + gp = oldnext.ptr() + } + +retry: + h := atomic.Load(&_p_.runqhead) // load-acquire, synchronize with consumers + t := _p_.runqtail + if t-h < uint32(len(_p_.runq)) { + _p_.runq[t%uint32(len(_p_.runq))].set(gp) + atomic.Store(&_p_.runqtail, t+1) // store-release, makes the item available for consumption + return + } + if runqputslow(_p_, gp, h, t) { + return + } + // the queue is not full, now the put above must succeed + goto retry +} + +// Put g and a batch of work from local runnable queue on global queue. +// Executed only by the owner P. +func runqputslow(_p_ *p, gp *g, h, t uint32) bool { + var batch [len(_p_.runq)/2 + 1]*g + + // First, grab a batch from local queue. + n := t - h + n = n / 2 + if n != uint32(len(_p_.runq)/2) { + throw("runqputslow: queue is not full") + } + for i := uint32(0); i < n; i++ { + batch[i] = _p_.runq[(h+i)%uint32(len(_p_.runq))].ptr() + } + if !atomic.Cas(&_p_.runqhead, h, h+n) { // cas-release, commits consume + return false + } + batch[n] = gp + + if randomizeScheduler { + for i := uint32(1); i <= n; i++ { + j := fastrand1() % (i + 1) + batch[i], batch[j] = batch[j], batch[i] + } + } + + // Link the goroutines. + for i := uint32(0); i < n; i++ { + batch[i].schedlink.set(batch[i+1]) + } + + // Now put the batch on global queue. + lock(&sched.lock) + globrunqputbatch(batch[0], batch[n], int32(n+1)) + unlock(&sched.lock) + return true +} + +// Get g from local runnable queue. +// If inheritTime is true, gp should inherit the remaining time in the +// current time slice. Otherwise, it should start a new time slice. +// Executed only by the owner P. +func runqget(_p_ *p) (gp *g, inheritTime bool) { + // If there's a runnext, it's the next G to run. + for { + next := _p_.runnext + if next == 0 { + break + } + if _p_.runnext.cas(next, 0) { + return next.ptr(), true + } + } + + for { + h := atomic.Load(&_p_.runqhead) // load-acquire, synchronize with other consumers + t := _p_.runqtail + if t == h { + return nil, false + } + gp := _p_.runq[h%uint32(len(_p_.runq))].ptr() + if atomic.Cas(&_p_.runqhead, h, h+1) { // cas-release, commits consume + return gp, false + } + } +} + +// Grabs a batch of goroutines from _p_'s runnable queue into batch. +// Batch is a ring buffer starting at batchHead. +// Returns number of grabbed goroutines. +// Can be executed by any P. +func runqgrab(_p_ *p, batch *[256]guintptr, batchHead uint32, stealRunNextG bool) uint32 { + for { + h := atomic.Load(&_p_.runqhead) // load-acquire, synchronize with other consumers + t := atomic.Load(&_p_.runqtail) // load-acquire, synchronize with the producer + n := t - h + n = n - n/2 + if n == 0 { + if stealRunNextG { + // Try to steal from _p_.runnext. + if next := _p_.runnext; next != 0 { + // Sleep to ensure that _p_ isn't about to run the g we + // are about to steal. + // The important use case here is when the g running on _p_ + // ready()s another g and then almost immediately blocks. + // Instead of stealing runnext in this window, back off + // to give _p_ a chance to schedule runnext. This will avoid + // thrashing gs between different Ps. + // A sync chan send/recv takes ~50ns as of time of writing, + // so 3us gives ~50x overshoot. + if GOOS != "windows" { + usleep(3) + } else { + // On windows system timer granularity is 1-15ms, + // which is way too much for this optimization. + // So just yield. + osyield() + } + if !_p_.runnext.cas(next, 0) { + continue + } + batch[batchHead%uint32(len(batch))] = next + return 1 + } + } + return 0 + } + if n > uint32(len(_p_.runq)/2) { // read inconsistent h and t + continue + } + for i := uint32(0); i < n; i++ { + g := _p_.runq[(h+i)%uint32(len(_p_.runq))] + batch[(batchHead+i)%uint32(len(batch))] = g + } + if atomic.Cas(&_p_.runqhead, h, h+n) { // cas-release, commits consume + return n + } + } +} + +// Steal half of elements from local runnable queue of p2 +// and put onto local runnable queue of p. +// Returns one of the stolen elements (or nil if failed). +func runqsteal(_p_, p2 *p, stealRunNextG bool) *g { + t := _p_.runqtail + n := runqgrab(p2, &_p_.runq, t, stealRunNextG) + if n == 0 { + return nil + } + n-- + gp := _p_.runq[(t+n)%uint32(len(_p_.runq))].ptr() + if n == 0 { + return gp + } + h := atomic.Load(&_p_.runqhead) // load-acquire, synchronize with consumers + if t-h+n >= uint32(len(_p_.runq)) { + throw("runqsteal: runq overflow") + } + atomic.Store(&_p_.runqtail, t+n) // store-release, makes the item available for consumption + return gp +} + +// Active spinning for sync.Mutex. +//go:linkname sync_runtime_canSpin sync.runtime_canSpin +//go:nosplit +func sync_runtime_canSpin(i int) bool { + // sync.Mutex is cooperative, so we are conservative with spinning. + // Spin only few times and only if running on a multicore machine and + // GOMAXPROCS>1 and there is at least one other running P and local runq is empty. + // As opposed to runtime mutex we don't do passive spinning here, + // because there can be work on global runq on on other Ps. + if i >= active_spin || ncpu <= 1 || gomaxprocs <= int32(sched.npidle+sched.nmspinning)+1 { + return false + } + if p := getg().m.p.ptr(); !runqempty(p) { + return false + } + return true +} + +//go:linkname sync_runtime_doSpin sync.runtime_doSpin +//go:nosplit +func sync_runtime_doSpin() { + procyield(active_spin_cnt) +} + +var stealOrder randomOrder + +// randomOrder/randomEnum are helper types for randomized work stealing. +// They allow to enumerate all Ps in different pseudo-random orders without repetitions. +// The algorithm is based on the fact that if we have X such that X and GOMAXPROCS +// are coprime, then a sequences of (i + X) % GOMAXPROCS gives the required enumeration. +type randomOrder struct { + count uint32 + coprimes []uint32 +} + +type randomEnum struct { + i uint32 + count uint32 + pos uint32 + inc uint32 +} + +func (ord *randomOrder) reset(count uint32) { + ord.count = count + ord.coprimes = ord.coprimes[:0] + for i := uint32(1); i <= count; i++ { + if gcd(i, count) == 1 { + ord.coprimes = append(ord.coprimes, i) + } + } +} + +func (ord *randomOrder) start(i uint32) randomEnum { + return randomEnum{ + count: ord.count, + pos: i % ord.count, + inc: ord.coprimes[i%uint32(len(ord.coprimes))], + } +} + +func (enum *randomEnum) done() bool { + return enum.i == enum.count +} + +func (enum *randomEnum) next() { + enum.i++ + enum.pos = (enum.pos + enum.inc) % enum.count +} + +func (enum *randomEnum) position() uint32 { + return enum.pos +} + +func gcd(a, b uint32) uint32 { + for b != 0 { + a, b = b, a%b + } + return a +} diff --git a/libgo/go/runtime/proc_test.go b/libgo/go/runtime/proc_test.go index cc39017..813c929 100644 --- a/libgo/go/runtime/proc_test.go +++ b/libgo/go/runtime/proc_test.go @@ -556,19 +556,14 @@ func nonleaf(stop chan int) bool { } } -/* func TestSchedLocalQueue(t *testing.T) { - runtime.TestSchedLocalQueue1() + runtime.RunSchedLocalQueueTest() } -*/ -/* func TestSchedLocalQueueSteal(t *testing.T) { - runtime.TestSchedLocalQueueSteal1() + runtime.RunSchedLocalQueueStealTest() } -*/ -/* func TestSchedLocalQueueEmpty(t *testing.T) { if runtime.NumCPU() == 1 { // Takes too long and does not trigger the race. @@ -586,7 +581,6 @@ func TestSchedLocalQueueEmpty(t *testing.T) { } runtime.RunSchedLocalQueueEmptyTest(iters) } -*/ func benchmarkStackGrowth(b *testing.B, rec int) { b.RunParallel(func(pb *testing.PB) { diff --git a/libgo/go/runtime/runtime2.go b/libgo/go/runtime/runtime2.go index 571972c..755bc5f 100644 --- a/libgo/go/runtime/runtime2.go +++ b/libgo/go/runtime/runtime2.go @@ -5,6 +5,7 @@ package runtime import ( + "runtime/internal/atomic" "runtime/internal/sys" "unsafe" ) @@ -203,12 +204,10 @@ func (gp guintptr) ptr() *g { return (*g)(unsafe.Pointer(gp)) } //go:nosplit func (gp *guintptr) set(g *g) { *gp = guintptr(unsafe.Pointer(g)) } -/* //go:nosplit func (gp *guintptr) cas(old, new guintptr) bool { return atomic.Casuintptr((*uintptr)(unsafe.Pointer(gp)), uintptr(old), uintptr(new)) } -*/ type puintptr uintptr @@ -358,8 +357,8 @@ type g struct { sigpc uintptr gopc uintptr // pc of go statement that created this goroutine startpc uintptr // pc of goroutine function - racectx uintptr - waiting *sudog // sudog structures this g is waiting on (that have a valid elem ptr); in lock order + // Not for gccgo: racectx uintptr + waiting *sudog // sudog structures this g is waiting on (that have a valid elem ptr); in lock order // Not for gccgo: cgoCtxt []uintptr // cgo traceback context // Per-G GC state @@ -521,16 +520,16 @@ type p struct { gfreecnt int32 sudogcache []*sudog - // Not for gccgo for now: sudogbuf [128]*sudog + sudogbuf [128]*sudog - // Not for gccgo for now: tracebuf traceBufPtr + tracebuf traceBufPtr // Not for gccgo for now: palloc persistentAlloc // per-P to avoid mutex // Per-P GC state - // Not for gccgo for now: gcAssistTime int64 // Nanoseconds in assistAlloc - // Not for gccgo for now: gcBgMarkWorker guintptr - // Not for gccgo for now: gcMarkWorkerMode gcMarkWorkerMode + gcAssistTime int64 // Nanoseconds in assistAlloc + gcBgMarkWorker guintptr + gcMarkWorkerMode gcMarkWorkerMode // gcw is this P's GC work buffer cache. The work buffer is // filled by write barriers, drained by mutator assists, and @@ -760,18 +759,13 @@ var ( // allm *m - allp [_MaxGomaxprocs + 1]*p - - // gomaxprocs int32 - - panicking uint32 - ncpu int32 - - // forcegc forcegcstate - - sched schedt - - // newprocs int32 + allp [_MaxGomaxprocs + 1]*p + gomaxprocs int32 + panicking uint32 + ncpu int32 + forcegc forcegcstate + sched schedt + newprocs int32 // Information about what cpu features are available. // Set on startup. diff --git a/libgo/go/runtime/stubs.go b/libgo/go/runtime/stubs.go index 3d18408..6787476 100644 --- a/libgo/go/runtime/stubs.go +++ b/libgo/go/runtime/stubs.go @@ -304,6 +304,7 @@ const ( _64bit = 1 << (^uintptr(0) >> 63) / 2 _MHeapMap_TotalBits = (_64bit*sys.GoosWindows)*35 + (_64bit*(1-sys.GoosWindows)*(1-sys.GoosDarwin*sys.GoarchArm64))*39 + sys.GoosDarwin*sys.GoarchArm64*31 + (1-_64bit)*32 _MaxMem = uintptr(1<<_MHeapMap_TotalBits - 1) + _MaxGcproc = 32 ) // Here for gccgo until we port malloc.go. @@ -350,7 +351,6 @@ func entersyscallblock(int32) func exitsyscall(int32) func gopark(func(*g, unsafe.Pointer) bool, unsafe.Pointer, string, byte, int) func goparkunlock(*mutex, string, byte, int) -func goready(*g, int) // Temporary hack for gccgo until we port proc.go. //go:nosplit @@ -411,12 +411,6 @@ func roundupsize(uintptr) uintptr // Here for gccgo until we port mgc.go. func GC() -// Here for gccgo until we port proc.go. -var worldsema uint32 = 1 - -func stopTheWorldWithSema() -func startTheWorldWithSema() - // For gccgo to call from C code. //go:linkname acquireWorldsema runtime.acquireWorldsema func acquireWorldsema() { @@ -429,26 +423,6 @@ func releaseWorldsema() { semrelease(&worldsema) } -// Here for gccgo until we port proc.go. -func stopTheWorld(reason string) { - semacquire(&worldsema, false) - getg().m.preemptoff = reason - getg().m.gcing = 1 - systemstack(stopTheWorldWithSema) -} - -// Here for gccgo until we port proc.go. -func startTheWorld() { - getg().m.gcing = 0 - getg().m.locks++ - systemstack(startTheWorldWithSema) - // worldsema must be held over startTheWorldWithSema to ensure - // gomaxprocs cannot change while worldsema is held. - semrelease(&worldsema) - getg().m.preemptoff = "" - getg().m.locks-- -} - // For gccgo to call from C code, so that the C code and the Go code // can share the memstats variable for now. //go:linkname getMstats runtime.getMstats @@ -461,6 +435,7 @@ func setcpuprofilerate_m(hz int32) // Temporary for gccgo until we port mem_GOOS.go. func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer +func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) // Temporary for gccgo until we port proc.go, so that the C signal // handler can call into cpuprof. @@ -522,7 +497,6 @@ func getZerobase() *uintptr { func sigprof() func mcount() int32 func goexit1() -func freezetheworld() // Get signal trampoline, written in C. func getSigtramp() uintptr @@ -592,6 +566,7 @@ func getPanicking() uint32 { // Temporary for gccgo until we port mcache.go. func allocmcache() *mcache +func freemcache(*mcache) // Temporary for gccgo until we port mgc.go. // This is just so that allgadd will compile. @@ -616,3 +591,60 @@ func gcount() int32 { unlock(&allglock) return n } + +// Temporary for gccgo until we port mgc.go. +var gcBlackenEnabled uint32 + +// Temporary for gccgo until we port mgc.go. +func gcMarkWorkAvailable(p *p) bool { + return false +} + +// Temporary for gccgo until we port mgc.go. +var gcController gcControllerState + +// Temporary for gccgo until we port mgc.go. +type gcControllerState struct { +} + +// Temporary for gccgo until we port mgc.go. +func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g { + return nil +} + +// Temporary for gccgo until we port mgc.go. +var gcphase uint32 + +// Temporary for gccgo until we port mgc.go. +const ( + _GCoff = iota + _GCmark + _GCmarktermination +) + +// Temporary for gccgo until we port mgc.go. +type gcMarkWorkerMode int + +// Temporary for gccgo until we port mgc.go. +const ( + gcMarkWorkerDedicatedMode gcMarkWorkerMode = iota + gcMarkWorkerFractionalMode + gcMarkWorkerIdleMode +) + +// Temporary for gccgo until we port mheap.go. +type mheap struct { +} + +// Temporary for gccgo until we port mheap.go. +var mheap_ mheap + +// Temporary for gccgo until we port mheap.go. +func (h *mheap) scavenge(k int32, now, limit uint64) { +} + +// Temporary for gccgo until we initialize ncpu in Go. +//go:linkname setncpu runtime.setncpu +func setncpu(n int32) { + ncpu = n +} diff --git a/libgo/go/runtime/trace.go b/libgo/go/runtime/trace.go index 35126f1..09a150f 100644 --- a/libgo/go/runtime/trace.go +++ b/libgo/go/runtime/trace.go @@ -127,10 +127,10 @@ var trace struct { // traceBufHeader is per-P tracing buffer. type traceBufHeader struct { - link traceBufPtr // in trace.empty/full - lastTicks uint64 // when we wrote the last event - pos int // next write offset in arr - stk [traceStackSize]uintptr // scratch buffer for traceback + link traceBufPtr // in trace.empty/full + lastTicks uint64 // when we wrote the last event + pos int // next write offset in arr + stk [traceStackSize]location // scratch buffer for traceback } // traceBuf is per-P tracing buffer. @@ -152,9 +152,6 @@ func traceBufPtrOf(b *traceBuf) traceBufPtr { return traceBufPtr(unsafe.Pointer(b)) } -/* -Commented out for gccgo for now. - // StartTrace enables tracing for the current process. // While tracing, the data will be buffered and available via ReadTrace. // StartTrace returns an error if tracing is already enabled. @@ -522,13 +519,7 @@ func traceEvent(ev byte, skip int, args ...uint64) { if gp == _g_ { nstk = callers(skip, buf.stk[:]) } else if gp != nil { - gp = mp.curg - // This may happen when tracing a system call, - // so we must lock the stack. - if gcTryLockStackBarriers(gp) { - nstk = gcallers(gp, skip, buf.stk[:]) - gcUnlockStackBarriers(gp) - } + // FIXME: get stack trace of different goroutine. } if nstk > 0 { nstk-- // skip runtime.goexit @@ -647,8 +638,6 @@ func (buf *traceBuf) byte(v byte) { buf.pos++ } -*/ - // traceStackTable maps stack traces (arrays of PC's) to unique uint32 ids. // It is lock-free for reading. type traceStackTable struct { @@ -664,28 +653,30 @@ type traceStack struct { hash uintptr id uint32 n int - stk [0]uintptr // real type [n]uintptr + stk [0]location // real type [n]location } type traceStackPtr uintptr -/* -Commented out for gccgo for now. - func (tp traceStackPtr) ptr() *traceStack { return (*traceStack)(unsafe.Pointer(tp)) } // stack returns slice of PCs. -func (ts *traceStack) stack() []uintptr { - return (*[traceStackSize]uintptr)(unsafe.Pointer(&ts.stk))[:ts.n] +func (ts *traceStack) stack() []location { + return (*[traceStackSize]location)(unsafe.Pointer(&ts.stk))[:ts.n] } // put returns a unique id for the stack trace pcs and caches it in the table, // if it sees the trace for the first time. -func (tab *traceStackTable) put(pcs []uintptr) uint32 { +func (tab *traceStackTable) put(pcs []location) uint32 { if len(pcs) == 0 { return 0 } - hash := memhash(unsafe.Pointer(&pcs[0]), 0, uintptr(len(pcs))*unsafe.Sizeof(pcs[0])) + var hash uintptr + for _, loc := range pcs { + hash += loc.pc + hash += hash << 10 + hash ^= hash >> 6 + } // First, search the hashtable w/o the mutex. if id := tab.find(pcs, hash); id != 0 { return id @@ -714,7 +705,7 @@ func (tab *traceStackTable) put(pcs []uintptr) uint32 { } // find checks if the stack trace pcs is already present in the table. -func (tab *traceStackTable) find(pcs []uintptr, hash uintptr) uint32 { +func (tab *traceStackTable) find(pcs []location, hash uintptr) uint32 { part := int(hash % uintptr(len(tab.tab))) Search: for stk := tab.tab[part].ptr(); stk != nil; stk = stk.link.ptr() { @@ -732,13 +723,12 @@ Search: // newStack allocates a new stack of size n. func (tab *traceStackTable) newStack(n int) *traceStack { - return (*traceStack)(tab.mem.alloc(unsafe.Sizeof(traceStack{}) + uintptr(n)*sys.PtrSize)) + return (*traceStack)(tab.mem.alloc(unsafe.Sizeof(traceStack{}) + uintptr(n)*unsafe.Sizeof(location{}))) } // dump writes all previously cached stacks to trace buffers, // releases all memory and resets state. func (tab *traceStackTable) dump() { - frames := make(map[uintptr]traceFrame) var tmp [(2 + 4*traceStackSize) * traceBytesPerNumber]byte buf := traceFlush(0).ptr() for _, stk := range tab.tab { @@ -749,8 +739,8 @@ func (tab *traceStackTable) dump() { tmpbuf = traceAppend(tmpbuf, uint64(stk.n)) for _, pc := range stk.stack() { var frame traceFrame - frame, buf = traceFrameForPC(buf, frames, pc) - tmpbuf = traceAppend(tmpbuf, uint64(pc)) + frame, buf = traceFrameForPC(buf, pc) + tmpbuf = traceAppend(tmpbuf, uint64(pc.pc)) tmpbuf = traceAppend(tmpbuf, uint64(frame.funcID)) tmpbuf = traceAppend(tmpbuf, uint64(frame.fileID)) tmpbuf = traceAppend(tmpbuf, uint64(frame.line)) @@ -780,25 +770,15 @@ type traceFrame struct { line uint64 } -func traceFrameForPC(buf *traceBuf, frames map[uintptr]traceFrame, pc uintptr) (traceFrame, *traceBuf) { - if frame, ok := frames[pc]; ok { - return frame, buf - } - +func traceFrameForPC(buf *traceBuf, loc location) (traceFrame, *traceBuf) { var frame traceFrame - f := findfunc(pc) - if f == nil { - frames[pc] = frame - return frame, buf - } - - fn := funcname(f) + fn := loc.function const maxLen = 1 << 10 if len(fn) > maxLen { fn = fn[len(fn)-maxLen:] } frame.funcID, buf = traceString(buf, fn) - file, line := funcline(f, pc-sys.PCQuantum) + file, line := loc.filename, loc.lineno frame.line = uint64(line) if len(file) > maxLen { file = file[len(file)-maxLen:] @@ -807,8 +787,6 @@ func traceFrameForPC(buf *traceBuf, frames map[uintptr]traceFrame, pc uintptr) ( return frame, buf } -*/ - // traceAlloc is a non-thread-safe region allocator. // It holds a linked list of traceAllocBlock. type traceAlloc struct { @@ -831,9 +809,6 @@ type traceAllocBlockPtr uintptr func (p traceAllocBlockPtr) ptr() *traceAllocBlock { return (*traceAllocBlock)(unsafe.Pointer(p)) } func (p *traceAllocBlockPtr) set(x *traceAllocBlock) { *p = traceAllocBlockPtr(unsafe.Pointer(x)) } -/* -Commented out for gccgo for now. - // alloc allocates n-byte block. func (a *traceAlloc) alloc(n uintptr) unsafe.Pointer { n = round(n, sys.PtrSize) @@ -841,6 +816,8 @@ func (a *traceAlloc) alloc(n uintptr) unsafe.Pointer { if n > uintptr(len(a.head.ptr().data)) { throw("trace: alloc too large") } + // This is only safe because the strings returned by callers + // are stored in a location that is not in the Go heap. block := (*traceAllocBlock)(sysAlloc(unsafe.Sizeof(traceAllocBlock{}), &memstats.other_sys)) if block == nil { throw("trace: out of memory") @@ -913,7 +890,7 @@ func traceGoCreate(newg *g, pc uintptr) { newg.traceseq = 0 newg.tracelastp = getg().m.p // +PCQuantum because traceFrameForPC expects return PCs and subtracts PCQuantum. - id := trace.stackTab.put([]uintptr{pc + sys.PCQuantum}) + id := trace.stackTab.put([]location{location{pc: pc + sys.PCQuantum}}) traceEvent(traceEvGoCreate, 2, uint64(newg.goid), uint64(id)) } @@ -1004,5 +981,3 @@ func traceHeapAlloc() { func traceNextGC() { traceEvent(traceEvNextGC, -1, memstats.next_gc) } - -*/ |