diff options
Diffstat (limited to 'libgo/runtime/proc.c')
-rw-r--r-- | libgo/runtime/proc.c | 1400 |
1 files changed, 60 insertions, 1340 deletions
diff --git a/libgo/runtime/proc.c b/libgo/runtime/proc.c index 8a7a2d7..c4a5283 100644 --- a/libgo/runtime/proc.c +++ b/libgo/runtime/proc.c @@ -365,9 +365,14 @@ extern P** runtime_getAllP() __asm__ (GOSYM_PREFIX "runtime.getAllP"); extern G* allocg(void) __asm__ (GOSYM_PREFIX "runtime.allocg"); +extern bool needaddgcproc(void) + __asm__ (GOSYM_PREFIX "runtime.needaddgcproc"); +extern void startm(P*, bool) + __asm__(GOSYM_PREFIX "runtime.startm"); +extern void newm(void(*)(void), P*) + __asm__(GOSYM_PREFIX "runtime.newm"); Sched* runtime_sched; -int32 runtime_gomaxprocs; M runtime_m0; G runtime_g0; // idle goroutine for m0 G* runtime_lastg; @@ -376,51 +381,58 @@ P** runtime_allp; int8* runtime_goos; int32 runtime_ncpu; bool runtime_precisestack; -static int32 newprocs; bool runtime_isarchive; void* runtime_mstart(void*); -static void runqput(P*, G*); -static G* runqget(P*); -static bool runqputslow(P*, G*, uint32, uint32); -static G* runqsteal(P*, P*); -static void mput(M*); -static M* mget(void); static void mcommoninit(M*); -static void schedule(void); -static void procresize(int32); -static void acquirep(P*); -static P* releasep(void); -static void newm(void(*)(void), P*); -static void stopm(void); -static void startm(P*, bool); -static void handoffp(P*); -static void wakep(void); -static void stoplockedm(void); -static void startlockedm(G*); -static void sysmon(void); -static uint32 retake(int64); -static void incidlelocked(int32); static void exitsyscall0(G*); static void park0(G*); static void goexit0(G*); static void gfput(P*, G*); static G* gfget(P*); -static void gfpurge(P*); -static void globrunqput(G*); -static void globrunqputbatch(G*, G*, int32); -static G* globrunqget(P*, int32); -static P* pidleget(void); -static void pidleput(P*); -static void injectglist(G*); -static bool preemptall(void); static bool exitsyscallfast(void); -void allgadd(G*) +extern void setncpu(int32) + __asm__(GOSYM_PREFIX "runtime.setncpu"); +extern void allgadd(G*) __asm__(GOSYM_PREFIX "runtime.allgadd"); -void checkdead(void) +extern void stopm(void) + __asm__(GOSYM_PREFIX "runtime.stopm"); +extern void handoffp(P*) + __asm__(GOSYM_PREFIX "runtime.handoffp"); +extern void wakep(void) + __asm__(GOSYM_PREFIX "runtime.wakep"); +extern void stoplockedm(void) + __asm__(GOSYM_PREFIX "runtime.stoplockedm"); +extern void schedule(void) + __asm__(GOSYM_PREFIX "runtime.schedule"); +extern void execute(G*, bool) + __asm__(GOSYM_PREFIX "runtime.execute"); +extern void procresize(int32) + __asm__(GOSYM_PREFIX "runtime.procresize"); +extern void acquirep(P*) + __asm__(GOSYM_PREFIX "runtime.acquirep"); +extern P* releasep(void) + __asm__(GOSYM_PREFIX "runtime.releasep"); +extern void incidlelocked(int32) + __asm__(GOSYM_PREFIX "runtime.incidlelocked"); +extern void checkdead(void) __asm__(GOSYM_PREFIX "runtime.checkdead"); +extern void sysmon(void) + __asm__(GOSYM_PREFIX "runtime.sysmon"); +extern void mput(M*) + __asm__(GOSYM_PREFIX "runtime.mput"); +extern M* mget(void) + __asm__(GOSYM_PREFIX "runtime.mget"); +extern void globrunqput(G*) + __asm__(GOSYM_PREFIX "runtime.globrunqput"); +extern P* pidleget(void) + __asm__(GOSYM_PREFIX "runtime.pidleget"); +extern bool runqempty(P*) + __asm__(GOSYM_PREFIX "runtime.runqempty"); +extern void runqput(P*, G*, bool) + __asm__(GOSYM_PREFIX "runtime.runqput"); bool runtime_isstarted; @@ -441,6 +453,7 @@ runtime_schedinit(void) const byte *p; Eface i; + setncpu(runtime_ncpu); runtime_sched = runtime_getsched(); m = &runtime_m0; @@ -660,234 +673,6 @@ mcommoninit(M *mp) runtime_unlock(&runtime_sched->lock); } -// Mark gp ready to run. -void -runtime_ready(G *gp) -{ - // Mark runnable. - g->m->locks++; // disable preemption because it can be holding p in a local var - if(gp->atomicstatus != _Gwaiting) { - runtime_printf("goroutine %D has status %d\n", gp->goid, gp->atomicstatus); - runtime_throw("bad g->atomicstatus in ready"); - } - gp->atomicstatus = _Grunnable; - runqput((P*)g->m->p, gp); - if(runtime_atomicload(&runtime_sched->npidle) != 0 && runtime_atomicload(&runtime_sched->nmspinning) == 0) // TODO: fast atomic - wakep(); - g->m->locks--; -} - -void goready(G*, int) __asm__ (GOSYM_PREFIX "runtime.goready"); - -void -goready(G* gp, int traceskip __attribute__ ((unused))) -{ - runtime_ready(gp); -} - -int32 -runtime_gcprocs(void) -{ - int32 n; - - // Figure out how many CPUs to use during GC. - // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc. - runtime_lock(&runtime_sched->lock); - n = runtime_gomaxprocs; - if(n > runtime_ncpu) - n = runtime_ncpu > 0 ? runtime_ncpu : 1; - if(n > MaxGcproc) - n = MaxGcproc; - if(n > runtime_sched->nmidle+1) // one M is currently running - n = runtime_sched->nmidle+1; - runtime_unlock(&runtime_sched->lock); - return n; -} - -static bool -needaddgcproc(void) -{ - int32 n; - - runtime_lock(&runtime_sched->lock); - n = runtime_gomaxprocs; - if(n > runtime_ncpu) - n = runtime_ncpu; - if(n > MaxGcproc) - n = MaxGcproc; - n -= runtime_sched->nmidle+1; // one M is currently running - runtime_unlock(&runtime_sched->lock); - return n > 0; -} - -void -runtime_helpgc(int32 nproc) -{ - M *mp; - int32 n, pos; - - runtime_lock(&runtime_sched->lock); - pos = 0; - for(n = 1; n < nproc; n++) { // one M is currently running - if(runtime_allp[pos]->mcache == g->m->mcache) - pos++; - mp = mget(); - if(mp == nil) - runtime_throw("runtime_gcprocs inconsistency"); - mp->helpgc = n; - mp->mcache = runtime_allp[pos]->mcache; - pos++; - runtime_notewakeup(&mp->park); - } - runtime_unlock(&runtime_sched->lock); -} - -// Similar to stoptheworld but best-effort and can be called several times. -// There is no reverse operation, used during crashing. -// This function must not lock any mutexes. -void -runtime_freezetheworld(void) -{ - int32 i; - - if(runtime_gomaxprocs == 1) - return; - // stopwait and preemption requests can be lost - // due to races with concurrently executing threads, - // so try several times - for(i = 0; i < 5; i++) { - // this should tell the scheduler to not start any new goroutines - runtime_sched->stopwait = 0x7fffffff; - runtime_atomicstore((uint32*)&runtime_sched->gcwaiting, 1); - // this should stop running goroutines - if(!preemptall()) - break; // no running goroutines - runtime_usleep(1000); - } - // to be sure - runtime_usleep(1000); - preemptall(); - runtime_usleep(1000); -} - -void -runtime_stopTheWorldWithSema(void) -{ - int32 i; - uint32 s; - P *p; - bool wait; - - runtime_lock(&runtime_sched->lock); - runtime_sched->stopwait = runtime_gomaxprocs; - runtime_atomicstore((uint32*)&runtime_sched->gcwaiting, 1); - preemptall(); - // stop current P - ((P*)g->m->p)->status = _Pgcstop; - runtime_sched->stopwait--; - // try to retake all P's in _Psyscall status - for(i = 0; i < runtime_gomaxprocs; i++) { - p = runtime_allp[i]; - s = p->status; - if(s == _Psyscall && runtime_cas(&p->status, s, _Pgcstop)) - runtime_sched->stopwait--; - } - // stop idle P's - while((p = pidleget()) != nil) { - p->status = _Pgcstop; - runtime_sched->stopwait--; - } - wait = runtime_sched->stopwait > 0; - runtime_unlock(&runtime_sched->lock); - - // wait for remaining P's to stop voluntarily - if(wait) { - runtime_notesleep(&runtime_sched->stopnote); - runtime_noteclear(&runtime_sched->stopnote); - } - if(runtime_sched->stopwait) - runtime_throw("stoptheworld: not stopped"); - for(i = 0; i < runtime_gomaxprocs; i++) { - p = runtime_allp[i]; - if(p->status != _Pgcstop) - runtime_throw("stoptheworld: not stopped"); - } -} - -static void -mhelpgc(void) -{ - g->m->helpgc = -1; -} - -void -runtime_startTheWorldWithSema(void) -{ - P *p, *p1; - M *mp; - G *gp; - bool add; - - g->m->locks++; // disable preemption because it can be holding p in a local var - gp = runtime_netpoll(false); // non-blocking - injectglist(gp); - add = needaddgcproc(); - runtime_lock(&runtime_sched->lock); - if(newprocs) { - procresize(newprocs); - newprocs = 0; - } else - procresize(runtime_gomaxprocs); - runtime_sched->gcwaiting = 0; - - p1 = nil; - while((p = pidleget()) != nil) { - // procresize() puts p's with work at the beginning of the list. - // Once we reach a p without a run queue, the rest don't have one either. - if(p->runqhead == p->runqtail) { - pidleput(p); - break; - } - p->m = (uintptr)mget(); - p->link = (uintptr)p1; - p1 = p; - } - if(runtime_sched->sysmonwait) { - runtime_sched->sysmonwait = false; - runtime_notewakeup(&runtime_sched->sysmonnote); - } - runtime_unlock(&runtime_sched->lock); - - while(p1) { - p = p1; - p1 = (P*)p1->link; - if(p->m) { - mp = (M*)p->m; - p->m = 0; - if(mp->nextp) - runtime_throw("startTheWorldWithSema: inconsistent mp->nextp"); - mp->nextp = (uintptr)p; - runtime_notewakeup(&mp->park); - } else { - // Start M to run P. Do not start another M below. - newm(nil, p); - add = false; - } - } - - if(add) { - // If GC could have used another helper proc, start one now, - // in the hope that it will be available next time. - // It would have been even better to start it before the collection, - // but doing so requires allocating memory, so it's tricky to - // coordinate. This lazy approach works out in practice: - // we don't mind if the first couple gc rounds don't have quite - // the maximum number of procs. - newm(mhelpgc, nil); - } - g->m->locks--; -} - // Called to start an M. void* runtime_mstart(void* mp) @@ -1055,7 +840,7 @@ makeGContext(G* gp, byte* sp, uintptr spsize) { } // Create a new m. It will start off with a call to fn, or else the scheduler. -static void +void newm(void(*fn)(void), P *p) { M *mp; @@ -1067,40 +852,6 @@ newm(void(*fn)(void), P *p) runtime_newosproc(mp); } -// Stops execution of the current m until new work is available. -// Returns with acquired P. -static void -stopm(void) -{ - M* m; - - m = g->m; - if(m->locks) - runtime_throw("stopm holding locks"); - if(m->p) - runtime_throw("stopm holding p"); - if(m->spinning) { - m->spinning = false; - runtime_xadd(&runtime_sched->nmspinning, -1); - } - -retry: - runtime_lock(&runtime_sched->lock); - mput(m); - runtime_unlock(&runtime_sched->lock); - runtime_notesleep(&m->park); - m = g->m; - runtime_noteclear(&m->park); - if(m->helpgc) { - runtime_gchelper(); - m->helpgc = 0; - m->mcache = nil; - goto retry; - } - acquirep((P*)m->nextp); - m->nextp = 0; -} - static void mspinning(void) { @@ -1109,7 +860,7 @@ mspinning(void) // Schedules some M to run the p (creates an M if necessary). // If p==nil, tries to get an idle P, if no idle P's does nothing. -static void +void startm(P *p, bool spinning) { M *mp; @@ -1138,361 +889,12 @@ startm(P *p, bool spinning) runtime_throw("startm: m is spinning"); if(mp->nextp) runtime_throw("startm: m has p"); - mp->spinning = spinning; - mp->nextp = (uintptr)p; - runtime_notewakeup(&mp->park); -} - -// Hands off P from syscall or locked M. -static void -handoffp(P *p) -{ - // if it has local work, start it straight away - if(p->runqhead != p->runqtail || runtime_sched->runqsize) { - startm(p, false); - return; - } - // no local work, check that there are no spinning/idle M's, - // otherwise our help is not required - if(runtime_atomicload(&runtime_sched->nmspinning) + runtime_atomicload(&runtime_sched->npidle) == 0 && // TODO: fast atomic - runtime_cas(&runtime_sched->nmspinning, 0, 1)) { - startm(p, true); - return; - } - runtime_lock(&runtime_sched->lock); - if(runtime_sched->gcwaiting) { - p->status = _Pgcstop; - if(--runtime_sched->stopwait == 0) - runtime_notewakeup(&runtime_sched->stopnote); - runtime_unlock(&runtime_sched->lock); - return; - } - if(runtime_sched->runqsize) { - runtime_unlock(&runtime_sched->lock); - startm(p, false); - return; + if(spinning && !runqempty(p)) { + runtime_throw("startm: p has runnable gs"); } - // If this is the last running P and nobody is polling network, - // need to wakeup another M to poll network. - if(runtime_sched->npidle == (uint32)runtime_gomaxprocs-1 && runtime_atomicload64(&runtime_sched->lastpoll) != 0) { - runtime_unlock(&runtime_sched->lock); - startm(p, false); - return; - } - pidleput(p); - runtime_unlock(&runtime_sched->lock); -} - -// Tries to add one more P to execute G's. -// Called when a G is made runnable (newproc, ready). -static void -wakep(void) -{ - // be conservative about spinning threads - if(!runtime_cas(&runtime_sched->nmspinning, 0, 1)) - return; - startm(nil, true); -} - -// Stops execution of the current m that is locked to a g until the g is runnable again. -// Returns with acquired P. -static void -stoplockedm(void) -{ - M *m; - P *p; - - m = g->m; - if(m->lockedg == nil || m->lockedg->lockedm != m) - runtime_throw("stoplockedm: inconsistent locking"); - if(m->p) { - // Schedule another M to run this p. - p = releasep(); - handoffp(p); - } - incidlelocked(1); - // Wait until another thread schedules lockedg again. - runtime_notesleep(&m->park); - m = g->m; - runtime_noteclear(&m->park); - if(m->lockedg->atomicstatus != _Grunnable) - runtime_throw("stoplockedm: not runnable"); - acquirep((P*)m->nextp); - m->nextp = 0; -} - -// Schedules the locked m to run the locked gp. -static void -startlockedm(G *gp) -{ - M *mp; - P *p; - - mp = gp->lockedm; - if(mp == g->m) - runtime_throw("startlockedm: locked to me"); - if(mp->nextp) - runtime_throw("startlockedm: m has p"); - // directly handoff current P to the locked m - incidlelocked(-1); - p = releasep(); + mp->spinning = spinning; mp->nextp = (uintptr)p; runtime_notewakeup(&mp->park); - stopm(); -} - -// Stops the current m for stoptheworld. -// Returns when the world is restarted. -static void -gcstopm(void) -{ - P *p; - - if(!runtime_sched->gcwaiting) - runtime_throw("gcstopm: not waiting for gc"); - if(g->m->spinning) { - g->m->spinning = false; - runtime_xadd(&runtime_sched->nmspinning, -1); - } - p = releasep(); - runtime_lock(&runtime_sched->lock); - p->status = _Pgcstop; - if(--runtime_sched->stopwait == 0) - runtime_notewakeup(&runtime_sched->stopnote); - runtime_unlock(&runtime_sched->lock); - stopm(); -} - -// Schedules gp to run on the current M. -// Never returns. -static void -execute(G *gp) -{ - int32 hz; - - if(gp->atomicstatus != _Grunnable) { - runtime_printf("execute: bad g status %d\n", gp->atomicstatus); - runtime_throw("execute: bad g status"); - } - gp->atomicstatus = _Grunning; - gp->waitsince = 0; - ((P*)g->m->p)->schedtick++; - g->m->curg = gp; - gp->m = g->m; - - // Check whether the profiler needs to be turned on or off. - hz = runtime_sched->profilehz; - if(g->m->profilehz != hz) - runtime_resetcpuprofiler(hz); - - runtime_gogo(gp); -} - -// Finds a runnable goroutine to execute. -// Tries to steal from other P's, get g from global queue, poll network. -static G* -findrunnable(void) -{ - G *gp; - P *p; - int32 i; - -top: - if(runtime_sched->gcwaiting) { - gcstopm(); - goto top; - } - if(runtime_fingwait && runtime_fingwake && (gp = runtime_wakefing()) != nil) - runtime_ready(gp); - // local runq - gp = runqget((P*)g->m->p); - if(gp) - return gp; - // global runq - if(runtime_sched->runqsize) { - runtime_lock(&runtime_sched->lock); - gp = globrunqget((P*)g->m->p, 0); - runtime_unlock(&runtime_sched->lock); - if(gp) - return gp; - } - // poll network - gp = runtime_netpoll(false); // non-blocking - if(gp) { - injectglist((G*)gp->schedlink); - gp->atomicstatus = _Grunnable; - return gp; - } - // If number of spinning M's >= number of busy P's, block. - // This is necessary to prevent excessive CPU consumption - // when GOMAXPROCS>>1 but the program parallelism is low. - if(!g->m->spinning && 2 * runtime_atomicload(&runtime_sched->nmspinning) >= runtime_gomaxprocs - runtime_atomicload(&runtime_sched->npidle)) // TODO: fast atomic - goto stop; - if(!g->m->spinning) { - g->m->spinning = true; - runtime_xadd(&runtime_sched->nmspinning, 1); - } - // random steal from other P's - for(i = 0; i < 2*runtime_gomaxprocs; i++) { - if(runtime_sched->gcwaiting) - goto top; - p = runtime_allp[runtime_fastrand1()%runtime_gomaxprocs]; - if(p == (P*)g->m->p) - gp = runqget(p); - else - gp = runqsteal((P*)g->m->p, p); - if(gp) - return gp; - } -stop: - // return P and block - runtime_lock(&runtime_sched->lock); - if(runtime_sched->gcwaiting) { - runtime_unlock(&runtime_sched->lock); - goto top; - } - if(runtime_sched->runqsize) { - gp = globrunqget((P*)g->m->p, 0); - runtime_unlock(&runtime_sched->lock); - return gp; - } - p = releasep(); - pidleput(p); - runtime_unlock(&runtime_sched->lock); - if(g->m->spinning) { - g->m->spinning = false; - runtime_xadd(&runtime_sched->nmspinning, -1); - } - // check all runqueues once again - for(i = 0; i < runtime_gomaxprocs; i++) { - p = runtime_allp[i]; - if(p && p->runqhead != p->runqtail) { - runtime_lock(&runtime_sched->lock); - p = pidleget(); - runtime_unlock(&runtime_sched->lock); - if(p) { - acquirep(p); - goto top; - } - break; - } - } - // poll network - if(runtime_xchg64(&runtime_sched->lastpoll, 0) != 0) { - if(g->m->p) - runtime_throw("findrunnable: netpoll with p"); - if(g->m->spinning) - runtime_throw("findrunnable: netpoll with spinning"); - gp = runtime_netpoll(true); // block until new work is available - runtime_atomicstore64(&runtime_sched->lastpoll, runtime_nanotime()); - if(gp) { - runtime_lock(&runtime_sched->lock); - p = pidleget(); - runtime_unlock(&runtime_sched->lock); - if(p) { - acquirep(p); - injectglist((G*)gp->schedlink); - gp->atomicstatus = _Grunnable; - return gp; - } - injectglist(gp); - } - } - stopm(); - goto top; -} - -static void -resetspinning(void) -{ - int32 nmspinning; - - if(g->m->spinning) { - g->m->spinning = false; - nmspinning = runtime_xadd(&runtime_sched->nmspinning, -1); - if(nmspinning < 0) - runtime_throw("findrunnable: negative nmspinning"); - } else - nmspinning = runtime_atomicload(&runtime_sched->nmspinning); - - // M wakeup policy is deliberately somewhat conservative (see nmspinning handling), - // so see if we need to wakeup another P here. - if (nmspinning == 0 && runtime_atomicload(&runtime_sched->npidle) > 0) - wakep(); -} - -// Injects the list of runnable G's into the scheduler. -// Can run concurrently with GC. -static void -injectglist(G *glist) -{ - int32 n; - G *gp; - - if(glist == nil) - return; - runtime_lock(&runtime_sched->lock); - for(n = 0; glist; n++) { - gp = glist; - glist = (G*)gp->schedlink; - gp->atomicstatus = _Grunnable; - globrunqput(gp); - } - runtime_unlock(&runtime_sched->lock); - - for(; n && runtime_sched->npidle; n--) - startm(nil, false); -} - -// One round of scheduler: find a runnable goroutine and execute it. -// Never returns. -static void -schedule(void) -{ - G *gp; - uint32 tick; - - if(g->m->locks) - runtime_throw("schedule: holding locks"); - -top: - if(runtime_sched->gcwaiting) { - gcstopm(); - goto top; - } - - gp = nil; - // Check the global runnable queue once in a while to ensure fairness. - // Otherwise two goroutines can completely occupy the local runqueue - // by constantly respawning each other. - tick = ((P*)g->m->p)->schedtick; - // This is a fancy way to say tick%61==0, - // it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors. - if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime_sched->runqsize > 0) { - runtime_lock(&runtime_sched->lock); - gp = globrunqget((P*)g->m->p, 1); - runtime_unlock(&runtime_sched->lock); - if(gp) - resetspinning(); - } - if(gp == nil) { - gp = runqget((P*)g->m->p); - if(gp && g->m->spinning) - runtime_throw("schedule: spinning with local work"); - } - if(gp == nil) { - gp = findrunnable(); // blocks until work is available - resetspinning(); - } - - if(gp->lockedm) { - // Hands off own p to the locked m, - // then blocks waiting for a new p. - startlockedm(gp); - goto top; - } - - execute(gp); } // Puts the current goroutine into a waiting state and calls unlockf. @@ -1572,12 +974,12 @@ park0(G *gp) m->waitlock = nil; if(!ok) { gp->atomicstatus = _Grunnable; - execute(gp); // Schedule it back, never returns. + execute(gp, true); // Schedule it back, never returns. } } if(m->lockedg) { stoplockedm(); - execute(gp); // Never returns. + execute(gp, true); // Never returns. } schedule(); } @@ -1606,7 +1008,7 @@ runtime_gosched0(G *gp) runtime_unlock(&runtime_sched->lock); if(m->lockedg) { stoplockedm(); - execute(gp); // Never returns. + execute(gp, true); // Never returns. } schedule(); } @@ -1643,6 +1045,7 @@ goexit0(G *gp) gp->writebuf.__capacity = 0; gp->waitreason = runtime_gostringnocopy(nil); gp->param = nil; + m->curg->m = nil; m->curg = nil; m->lockedg = nil; if(m->locked & ~_LockExternal) { @@ -1896,12 +1299,12 @@ exitsyscall0(G *gp) runtime_unlock(&runtime_sched->lock); if(p) { acquirep(p); - execute(gp); // Never returns. + execute(gp, false); // Never returns. } if(m->lockedg) { // Wait until another thread schedules gp and so m again. stoplockedm(); - execute(gp); // Never returns. + execute(gp, false); // Never returns. } stopm(); schedule(); // Never returns. @@ -2069,7 +1472,7 @@ __go_go(void (*fn)(void*), void* arg) makeGContext(newg, sp, (uintptr)spsize); - runqput(p, newg); + runqput(p, newg, true); if(runtime_atomicload(&runtime_sched->npidle) != 0 && runtime_atomicload(&runtime_sched->nmspinning) == 0 && fn != runtime_main) // TODO: fast atomic wakep(); @@ -2126,23 +1529,6 @@ retry: return gp; } -// Purge all cached G's from gfree list to the global list. -static void -gfpurge(P *p) -{ - G *gp; - - runtime_lock(&runtime_sched->gflock); - while(p->gfreecnt) { - p->gfreecnt--; - gp = p->gfree; - p->gfree = (G*)gp->schedlink; - gp->schedlink = (uintptr)runtime_sched->gfree; - runtime_sched->gfree = gp; - } - runtime_unlock(&runtime_sched->gflock); -} - void runtime_Breakpoint(void) { @@ -2157,38 +1543,6 @@ runtime_Gosched(void) runtime_gosched(); } -// Implementation of runtime.GOMAXPROCS. -// delete when scheduler is even stronger - -intgo runtime_GOMAXPROCS(intgo) - __asm__(GOSYM_PREFIX "runtime.GOMAXPROCS"); - -intgo -runtime_GOMAXPROCS(intgo n) -{ - intgo ret; - - if(n > _MaxGomaxprocs) - n = _MaxGomaxprocs; - runtime_lock(&runtime_sched->lock); - ret = (intgo)runtime_gomaxprocs; - if(n <= 0 || n == ret) { - runtime_unlock(&runtime_sched->lock); - return ret; - } - runtime_unlock(&runtime_sched->lock); - - runtime_acquireWorldsema(); - g->m->gcing = 1; - runtime_stopTheWorldWithSema(); - newprocs = (int32)n; - g->m->gcing = 0; - runtime_releaseWorldsema(); - runtime_startTheWorldWithSema(); - - return ret; -} - // lockOSThread is called by runtime.LockOSThread and runtime.lockOSThread below // after they modify m->locked. Do not allow preemption during this call, // or else the m might be different in this function than in the caller. @@ -2365,599 +1719,6 @@ runtime_setcpuprofilerate_m(int32 hz) g->m->locks--; } -// Change number of processors. The world is stopped, sched is locked. -static void -procresize(int32 new) -{ - int32 i, old; - bool pempty; - G *gp; - P *p; - intgo j; - - old = runtime_gomaxprocs; - if(old < 0 || old > _MaxGomaxprocs || new <= 0 || new >_MaxGomaxprocs) - runtime_throw("procresize: invalid arg"); - // initialize new P's - for(i = 0; i < new; i++) { - p = runtime_allp[i]; - if(p == nil) { - p = (P*)runtime_mallocgc(sizeof(*p), 0, FlagNoInvokeGC); - p->id = i; - p->status = _Pgcstop; - p->deferpool.__values = &p->deferpoolbuf[0]; - p->deferpool.__count = 0; - p->deferpool.__capacity = nelem(p->deferpoolbuf); - runtime_atomicstorep(&runtime_allp[i], p); - } - if(p->mcache == nil) { - if(old==0 && i==0) - p->mcache = g->m->mcache; // bootstrap - else - p->mcache = runtime_allocmcache(); - } - } - - // redistribute runnable G's evenly - // collect all runnable goroutines in global queue preserving FIFO order - // FIFO order is required to ensure fairness even during frequent GCs - // see http://golang.org/issue/7126 - pempty = false; - while(!pempty) { - pempty = true; - for(i = 0; i < old; i++) { - p = runtime_allp[i]; - if(p->runqhead == p->runqtail) - continue; - pempty = false; - // pop from tail of local queue - p->runqtail--; - gp = (G*)p->runq[p->runqtail%nelem(p->runq)]; - // push onto head of global queue - gp->schedlink = runtime_sched->runqhead; - runtime_sched->runqhead = (uintptr)gp; - if(runtime_sched->runqtail == 0) - runtime_sched->runqtail = (uintptr)gp; - runtime_sched->runqsize++; - } - } - // fill local queues with at most nelem(p->runq)/2 goroutines - // start at 1 because current M already executes some G and will acquire allp[0] below, - // so if we have a spare G we want to put it into allp[1]. - for(i = 1; (uint32)i < (uint32)new * nelem(p->runq)/2 && runtime_sched->runqsize > 0; i++) { - gp = (G*)runtime_sched->runqhead; - runtime_sched->runqhead = gp->schedlink; - if(runtime_sched->runqhead == 0) - runtime_sched->runqtail = 0; - runtime_sched->runqsize--; - runqput(runtime_allp[i%new], gp); - } - - // free unused P's - for(i = new; i < old; i++) { - p = runtime_allp[i]; - for(j = 0; j < p->deferpool.__count; j++) { - ((struct _defer**)p->deferpool.__values)[j] = nil; - } - p->deferpool.__count = 0; - runtime_freemcache(p->mcache); - p->mcache = nil; - gfpurge(p); - p->status = _Pdead; - // can't free P itself because it can be referenced by an M in syscall - } - - if(g->m->p) - ((P*)g->m->p)->m = 0; - g->m->p = 0; - g->m->mcache = nil; - p = runtime_allp[0]; - p->m = 0; - p->status = _Pidle; - acquirep(p); - for(i = new-1; i > 0; i--) { - p = runtime_allp[i]; - p->status = _Pidle; - pidleput(p); - } - runtime_atomicstore((uint32*)&runtime_gomaxprocs, new); -} - -// Associate p and the current m. -static void -acquirep(P *p) -{ - M *m; - - m = g->m; - if(m->p || m->mcache) - runtime_throw("acquirep: already in go"); - if(p->m || p->status != _Pidle) { - runtime_printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? ((M*)p->m)->id : 0, p->status); - runtime_throw("acquirep: invalid p state"); - } - m->mcache = p->mcache; - m->p = (uintptr)p; - p->m = (uintptr)m; - p->status = _Prunning; -} - -// Disassociate p and the current m. -static P* -releasep(void) -{ - M *m; - P *p; - - m = g->m; - if(m->p == 0 || m->mcache == nil) - runtime_throw("releasep: invalid arg"); - p = (P*)m->p; - if((M*)p->m != m || p->mcache != m->mcache || p->status != _Prunning) { - runtime_printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n", - m, m->p, p->m, m->mcache, p->mcache, p->status); - runtime_throw("releasep: invalid p state"); - } - m->p = 0; - m->mcache = nil; - p->m = 0; - p->status = _Pidle; - return p; -} - -static void -incidlelocked(int32 v) -{ - runtime_lock(&runtime_sched->lock); - runtime_sched->nmidlelocked += v; - if(v > 0) - checkdead(); - runtime_unlock(&runtime_sched->lock); -} - -static void -sysmon(void) -{ - uint32 idle, delay; - int64 now, lastpoll, lasttrace; - G *gp; - - lasttrace = 0; - idle = 0; // how many cycles in succession we had not wokeup somebody - delay = 0; - for(;;) { - if(idle == 0) // start with 20us sleep... - delay = 20; - else if(idle > 50) // start doubling the sleep after 1ms... - delay *= 2; - if(delay > 10*1000) // up to 10ms - delay = 10*1000; - runtime_usleep(delay); - if(runtime_debug.schedtrace <= 0 && - (runtime_sched->gcwaiting || runtime_atomicload(&runtime_sched->npidle) == (uint32)runtime_gomaxprocs)) { // TODO: fast atomic - runtime_lock(&runtime_sched->lock); - if(runtime_atomicload(&runtime_sched->gcwaiting) || runtime_atomicload(&runtime_sched->npidle) == (uint32)runtime_gomaxprocs) { - runtime_atomicstore(&runtime_sched->sysmonwait, 1); - runtime_unlock(&runtime_sched->lock); - runtime_notesleep(&runtime_sched->sysmonnote); - runtime_noteclear(&runtime_sched->sysmonnote); - idle = 0; - delay = 20; - } else - runtime_unlock(&runtime_sched->lock); - } - // poll network if not polled for more than 10ms - lastpoll = runtime_atomicload64(&runtime_sched->lastpoll); - now = runtime_nanotime(); - if(lastpoll != 0 && lastpoll + 10*1000*1000 < now) { - runtime_cas64(&runtime_sched->lastpoll, lastpoll, now); - gp = runtime_netpoll(false); // non-blocking - if(gp) { - // Need to decrement number of idle locked M's - // (pretending that one more is running) before injectglist. - // Otherwise it can lead to the following situation: - // injectglist grabs all P's but before it starts M's to run the P's, - // another M returns from syscall, finishes running its G, - // observes that there is no work to do and no other running M's - // and reports deadlock. - incidlelocked(-1); - injectglist(gp); - incidlelocked(1); - } - } - // retake P's blocked in syscalls - // and preempt long running G's - if(retake(now)) - idle = 0; - else - idle++; - - if(runtime_debug.schedtrace > 0 && lasttrace + runtime_debug.schedtrace*1000000ll <= now) { - lasttrace = now; - runtime_schedtrace(runtime_debug.scheddetail); - } - } -} - -typedef struct Pdesc Pdesc; -struct Pdesc -{ - uint32 schedtick; - int64 schedwhen; - uint32 syscalltick; - int64 syscallwhen; -}; -static Pdesc pdesc[_MaxGomaxprocs]; - -static uint32 -retake(int64 now) -{ - uint32 i, s, n; - int64 t; - P *p; - Pdesc *pd; - - n = 0; - for(i = 0; i < (uint32)runtime_gomaxprocs; i++) { - p = runtime_allp[i]; - if(p==nil) - continue; - pd = &pdesc[i]; - s = p->status; - if(s == _Psyscall) { - // Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us). - t = p->syscalltick; - if(pd->syscalltick != t) { - pd->syscalltick = t; - pd->syscallwhen = now; - continue; - } - // On the one hand we don't want to retake Ps if there is no other work to do, - // but on the other hand we want to retake them eventually - // because they can prevent the sysmon thread from deep sleep. - if(p->runqhead == p->runqtail && - runtime_atomicload(&runtime_sched->nmspinning) + runtime_atomicload(&runtime_sched->npidle) > 0 && - pd->syscallwhen + 10*1000*1000 > now) - continue; - // Need to decrement number of idle locked M's - // (pretending that one more is running) before the CAS. - // Otherwise the M from which we retake can exit the syscall, - // increment nmidle and report deadlock. - incidlelocked(-1); - if(runtime_cas(&p->status, s, _Pidle)) { - n++; - handoffp(p); - } - incidlelocked(1); - } else if(s == _Prunning) { - // Preempt G if it's running for more than 10ms. - t = p->schedtick; - if(pd->schedtick != t) { - pd->schedtick = t; - pd->schedwhen = now; - continue; - } - if(pd->schedwhen + 10*1000*1000 > now) - continue; - // preemptone(p); - } - } - return n; -} - -// Tell all goroutines that they have been preempted and they should stop. -// This function is purely best-effort. It can fail to inform a goroutine if a -// processor just started running it. -// No locks need to be held. -// Returns true if preemption request was issued to at least one goroutine. -static bool -preemptall(void) -{ - return false; -} - -// Put mp on midle list. -// Sched must be locked. -static void -mput(M *mp) -{ - mp->schedlink = runtime_sched->midle; - runtime_sched->midle = (uintptr)mp; - runtime_sched->nmidle++; - checkdead(); -} - -// Try to get an m from midle list. -// Sched must be locked. -static M* -mget(void) -{ - M *mp; - - if((mp = (M*)runtime_sched->midle) != nil){ - runtime_sched->midle = mp->schedlink; - runtime_sched->nmidle--; - } - return mp; -} - -// Put gp on the global runnable queue. -// Sched must be locked. -static void -globrunqput(G *gp) -{ - gp->schedlink = 0; - if(runtime_sched->runqtail) - ((G*)runtime_sched->runqtail)->schedlink = (uintptr)gp; - else - runtime_sched->runqhead = (uintptr)gp; - runtime_sched->runqtail = (uintptr)gp; - runtime_sched->runqsize++; -} - -// Put a batch of runnable goroutines on the global runnable queue. -// Sched must be locked. -static void -globrunqputbatch(G *ghead, G *gtail, int32 n) -{ - gtail->schedlink = 0; - if(runtime_sched->runqtail) - ((G*)runtime_sched->runqtail)->schedlink = (uintptr)ghead; - else - runtime_sched->runqhead = (uintptr)ghead; - runtime_sched->runqtail = (uintptr)gtail; - runtime_sched->runqsize += n; -} - -// Try get a batch of G's from the global runnable queue. -// Sched must be locked. -static G* -globrunqget(P *p, int32 max) -{ - G *gp, *gp1; - int32 n; - - if(runtime_sched->runqsize == 0) - return nil; - n = runtime_sched->runqsize/runtime_gomaxprocs+1; - if(n > runtime_sched->runqsize) - n = runtime_sched->runqsize; - if(max > 0 && n > max) - n = max; - if((uint32)n > nelem(p->runq)/2) - n = nelem(p->runq)/2; - runtime_sched->runqsize -= n; - if(runtime_sched->runqsize == 0) - runtime_sched->runqtail = 0; - gp = (G*)runtime_sched->runqhead; - runtime_sched->runqhead = gp->schedlink; - n--; - while(n--) { - gp1 = (G*)runtime_sched->runqhead; - runtime_sched->runqhead = gp1->schedlink; - runqput(p, gp1); - } - return gp; -} - -// Put p to on pidle list. -// Sched must be locked. -static void -pidleput(P *p) -{ - p->link = runtime_sched->pidle; - runtime_sched->pidle = (uintptr)p; - runtime_xadd(&runtime_sched->npidle, 1); // TODO: fast atomic -} - -// Try get a p from pidle list. -// Sched must be locked. -static P* -pidleget(void) -{ - P *p; - - p = (P*)runtime_sched->pidle; - if(p) { - runtime_sched->pidle = p->link; - runtime_xadd(&runtime_sched->npidle, -1); // TODO: fast atomic - } - return p; -} - -// Try to put g on local runnable queue. -// If it's full, put onto global queue. -// Executed only by the owner P. -static void -runqput(P *p, G *gp) -{ - uint32 h, t; - -retry: - h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with consumers - t = p->runqtail; - if(t - h < nelem(p->runq)) { - p->runq[t%nelem(p->runq)] = (uintptr)gp; - runtime_atomicstore(&p->runqtail, t+1); // store-release, makes the item available for consumption - return; - } - if(runqputslow(p, gp, h, t)) - return; - // the queue is not full, now the put above must suceed - goto retry; -} - -// Put g and a batch of work from local runnable queue on global queue. -// Executed only by the owner P. -static bool -runqputslow(P *p, G *gp, uint32 h, uint32 t) -{ - G *batch[nelem(p->runq)/2+1]; - uint32 n, i; - - // First, grab a batch from local queue. - n = t-h; - n = n/2; - if(n != nelem(p->runq)/2) - runtime_throw("runqputslow: queue is not full"); - for(i=0; i<n; i++) - batch[i] = (G*)p->runq[(h+i)%nelem(p->runq)]; - if(!runtime_cas(&p->runqhead, h, h+n)) // cas-release, commits consume - return false; - batch[n] = gp; - // Link the goroutines. - for(i=0; i<n; i++) - batch[i]->schedlink = (uintptr)batch[i+1]; - // Now put the batch on global queue. - runtime_lock(&runtime_sched->lock); - globrunqputbatch(batch[0], batch[n], n+1); - runtime_unlock(&runtime_sched->lock); - return true; -} - -// Get g from local runnable queue. -// Executed only by the owner P. -static G* -runqget(P *p) -{ - G *gp; - uint32 t, h; - - for(;;) { - h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with other consumers - t = p->runqtail; - if(t == h) - return nil; - gp = (G*)p->runq[h%nelem(p->runq)]; - if(runtime_cas(&p->runqhead, h, h+1)) // cas-release, commits consume - return gp; - } -} - -// Grabs a batch of goroutines from local runnable queue. -// batch array must be of size nelem(p->runq)/2. Returns number of grabbed goroutines. -// Can be executed by any P. -static uint32 -runqgrab(P *p, G **batch) -{ - uint32 t, h, n, i; - - for(;;) { - h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with other consumers - t = runtime_atomicload(&p->runqtail); // load-acquire, synchronize with the producer - n = t-h; - n = n - n/2; - if(n == 0) - break; - if(n > nelem(p->runq)/2) // read inconsistent h and t - continue; - for(i=0; i<n; i++) - batch[i] = (G*)p->runq[(h+i)%nelem(p->runq)]; - if(runtime_cas(&p->runqhead, h, h+n)) // cas-release, commits consume - break; - } - return n; -} - -// Steal half of elements from local runnable queue of p2 -// and put onto local runnable queue of p. -// Returns one of the stolen elements (or nil if failed). -static G* -runqsteal(P *p, P *p2) -{ - G *gp; - G *batch[nelem(p->runq)/2]; - uint32 t, h, n, i; - - n = runqgrab(p2, batch); - if(n == 0) - return nil; - n--; - gp = batch[n]; - if(n == 0) - return gp; - h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with consumers - t = p->runqtail; - if(t - h + n >= nelem(p->runq)) - runtime_throw("runqsteal: runq overflow"); - for(i=0; i<n; i++, t++) - p->runq[t%nelem(p->runq)] = (uintptr)batch[i]; - runtime_atomicstore(&p->runqtail, t); // store-release, makes the item available for consumption - return gp; -} - -void runtime_testSchedLocalQueue(void) - __asm__("runtime.testSchedLocalQueue"); - -void -runtime_testSchedLocalQueue(void) -{ - P p; - G gs[nelem(p.runq)]; - int32 i, j; - - runtime_memclr((byte*)&p, sizeof(p)); - - for(i = 0; i < (int32)nelem(gs); i++) { - if(runqget(&p) != nil) - runtime_throw("runq is not empty initially"); - for(j = 0; j < i; j++) - runqput(&p, &gs[i]); - for(j = 0; j < i; j++) { - if(runqget(&p) != &gs[i]) { - runtime_printf("bad element at iter %d/%d\n", i, j); - runtime_throw("bad element"); - } - } - if(runqget(&p) != nil) - runtime_throw("runq is not empty afterwards"); - } -} - -void runtime_testSchedLocalQueueSteal(void) - __asm__("runtime.testSchedLocalQueueSteal"); - -void -runtime_testSchedLocalQueueSteal(void) -{ - P p1, p2; - G gs[nelem(p1.runq)], *gp; - int32 i, j, s; - - runtime_memclr((byte*)&p1, sizeof(p1)); - runtime_memclr((byte*)&p2, sizeof(p2)); - - for(i = 0; i < (int32)nelem(gs); i++) { - for(j = 0; j < i; j++) { - gs[j].sig = 0; - runqput(&p1, &gs[j]); - } - gp = runqsteal(&p2, &p1); - s = 0; - if(gp) { - s++; - gp->sig++; - } - while((gp = runqget(&p2)) != nil) { - s++; - gp->sig++; - } - while((gp = runqget(&p1)) != nil) - gp->sig++; - for(j = 0; j < i; j++) { - if(gs[j].sig != 1) { - runtime_printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i); - runtime_throw("bad element"); - } - } - if(s != i/2 && s != i/2+1) { - runtime_printf("bad steal %d, want %d or %d, iter %d\n", - s, i/2, i/2+1, i); - runtime_throw("bad steal"); - } - } -} - intgo runtime_setmaxthreads(intgo in) { @@ -3041,56 +1802,15 @@ os_beforeExit() { } -// Active spinning for sync.Mutex. -//go:linkname sync_runtime_canSpin sync.runtime_canSpin - -enum -{ - ACTIVE_SPIN = 4, - ACTIVE_SPIN_CNT = 30, -}; - -extern _Bool sync_runtime_canSpin(intgo i) - __asm__ (GOSYM_PREFIX "sync.runtime_canSpin"); - -_Bool -sync_runtime_canSpin(intgo i) -{ - P *p; - - // sync.Mutex is cooperative, so we are conservative with spinning. - // Spin only few times and only if running on a multicore machine and - // GOMAXPROCS>1 and there is at least one other running P and local runq is empty. - // As opposed to runtime mutex we don't do passive spinning here, - // because there can be work on global runq on on other Ps. - if (i >= ACTIVE_SPIN || runtime_ncpu <= 1 || runtime_gomaxprocs <= (int32)(runtime_sched->npidle+runtime_sched->nmspinning)+1) { - return false; - } - p = (P*)g->m->p; - return p != nil && p->runqhead == p->runqtail; -} - -//go:linkname sync_runtime_doSpin sync.runtime_doSpin -//go:nosplit - -extern void sync_runtime_doSpin(void) - __asm__ (GOSYM_PREFIX "sync.runtime_doSpin"); - -void -sync_runtime_doSpin() -{ - runtime_procyield(ACTIVE_SPIN_CNT); -} - // For Go code to look at variables, until we port proc.go. -extern M** runtime_go_allm(void) +extern M* runtime_go_allm(void) __asm__ (GOSYM_PREFIX "runtime.allm"); -M** +M* runtime_go_allm() { - return &runtime_allm; + return runtime_allm; } intgo NumCPU(void) __asm__ (GOSYM_PREFIX "runtime.NumCPU"); |