aboutsummaryrefslogtreecommitdiff
path: root/libgo/runtime/proc.c
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/runtime/proc.c')
-rw-r--r--libgo/runtime/proc.c1400
1 files changed, 60 insertions, 1340 deletions
diff --git a/libgo/runtime/proc.c b/libgo/runtime/proc.c
index 8a7a2d7..c4a5283 100644
--- a/libgo/runtime/proc.c
+++ b/libgo/runtime/proc.c
@@ -365,9 +365,14 @@ extern P** runtime_getAllP()
__asm__ (GOSYM_PREFIX "runtime.getAllP");
extern G* allocg(void)
__asm__ (GOSYM_PREFIX "runtime.allocg");
+extern bool needaddgcproc(void)
+ __asm__ (GOSYM_PREFIX "runtime.needaddgcproc");
+extern void startm(P*, bool)
+ __asm__(GOSYM_PREFIX "runtime.startm");
+extern void newm(void(*)(void), P*)
+ __asm__(GOSYM_PREFIX "runtime.newm");
Sched* runtime_sched;
-int32 runtime_gomaxprocs;
M runtime_m0;
G runtime_g0; // idle goroutine for m0
G* runtime_lastg;
@@ -376,51 +381,58 @@ P** runtime_allp;
int8* runtime_goos;
int32 runtime_ncpu;
bool runtime_precisestack;
-static int32 newprocs;
bool runtime_isarchive;
void* runtime_mstart(void*);
-static void runqput(P*, G*);
-static G* runqget(P*);
-static bool runqputslow(P*, G*, uint32, uint32);
-static G* runqsteal(P*, P*);
-static void mput(M*);
-static M* mget(void);
static void mcommoninit(M*);
-static void schedule(void);
-static void procresize(int32);
-static void acquirep(P*);
-static P* releasep(void);
-static void newm(void(*)(void), P*);
-static void stopm(void);
-static void startm(P*, bool);
-static void handoffp(P*);
-static void wakep(void);
-static void stoplockedm(void);
-static void startlockedm(G*);
-static void sysmon(void);
-static uint32 retake(int64);
-static void incidlelocked(int32);
static void exitsyscall0(G*);
static void park0(G*);
static void goexit0(G*);
static void gfput(P*, G*);
static G* gfget(P*);
-static void gfpurge(P*);
-static void globrunqput(G*);
-static void globrunqputbatch(G*, G*, int32);
-static G* globrunqget(P*, int32);
-static P* pidleget(void);
-static void pidleput(P*);
-static void injectglist(G*);
-static bool preemptall(void);
static bool exitsyscallfast(void);
-void allgadd(G*)
+extern void setncpu(int32)
+ __asm__(GOSYM_PREFIX "runtime.setncpu");
+extern void allgadd(G*)
__asm__(GOSYM_PREFIX "runtime.allgadd");
-void checkdead(void)
+extern void stopm(void)
+ __asm__(GOSYM_PREFIX "runtime.stopm");
+extern void handoffp(P*)
+ __asm__(GOSYM_PREFIX "runtime.handoffp");
+extern void wakep(void)
+ __asm__(GOSYM_PREFIX "runtime.wakep");
+extern void stoplockedm(void)
+ __asm__(GOSYM_PREFIX "runtime.stoplockedm");
+extern void schedule(void)
+ __asm__(GOSYM_PREFIX "runtime.schedule");
+extern void execute(G*, bool)
+ __asm__(GOSYM_PREFIX "runtime.execute");
+extern void procresize(int32)
+ __asm__(GOSYM_PREFIX "runtime.procresize");
+extern void acquirep(P*)
+ __asm__(GOSYM_PREFIX "runtime.acquirep");
+extern P* releasep(void)
+ __asm__(GOSYM_PREFIX "runtime.releasep");
+extern void incidlelocked(int32)
+ __asm__(GOSYM_PREFIX "runtime.incidlelocked");
+extern void checkdead(void)
__asm__(GOSYM_PREFIX "runtime.checkdead");
+extern void sysmon(void)
+ __asm__(GOSYM_PREFIX "runtime.sysmon");
+extern void mput(M*)
+ __asm__(GOSYM_PREFIX "runtime.mput");
+extern M* mget(void)
+ __asm__(GOSYM_PREFIX "runtime.mget");
+extern void globrunqput(G*)
+ __asm__(GOSYM_PREFIX "runtime.globrunqput");
+extern P* pidleget(void)
+ __asm__(GOSYM_PREFIX "runtime.pidleget");
+extern bool runqempty(P*)
+ __asm__(GOSYM_PREFIX "runtime.runqempty");
+extern void runqput(P*, G*, bool)
+ __asm__(GOSYM_PREFIX "runtime.runqput");
bool runtime_isstarted;
@@ -441,6 +453,7 @@ runtime_schedinit(void)
const byte *p;
Eface i;
+ setncpu(runtime_ncpu);
runtime_sched = runtime_getsched();
m = &runtime_m0;
@@ -660,234 +673,6 @@ mcommoninit(M *mp)
runtime_unlock(&runtime_sched->lock);
}
-// Mark gp ready to run.
-void
-runtime_ready(G *gp)
-{
- // Mark runnable.
- g->m->locks++; // disable preemption because it can be holding p in a local var
- if(gp->atomicstatus != _Gwaiting) {
- runtime_printf("goroutine %D has status %d\n", gp->goid, gp->atomicstatus);
- runtime_throw("bad g->atomicstatus in ready");
- }
- gp->atomicstatus = _Grunnable;
- runqput((P*)g->m->p, gp);
- if(runtime_atomicload(&runtime_sched->npidle) != 0 && runtime_atomicload(&runtime_sched->nmspinning) == 0) // TODO: fast atomic
- wakep();
- g->m->locks--;
-}
-
-void goready(G*, int) __asm__ (GOSYM_PREFIX "runtime.goready");
-
-void
-goready(G* gp, int traceskip __attribute__ ((unused)))
-{
- runtime_ready(gp);
-}
-
-int32
-runtime_gcprocs(void)
-{
- int32 n;
-
- // Figure out how many CPUs to use during GC.
- // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
- runtime_lock(&runtime_sched->lock);
- n = runtime_gomaxprocs;
- if(n > runtime_ncpu)
- n = runtime_ncpu > 0 ? runtime_ncpu : 1;
- if(n > MaxGcproc)
- n = MaxGcproc;
- if(n > runtime_sched->nmidle+1) // one M is currently running
- n = runtime_sched->nmidle+1;
- runtime_unlock(&runtime_sched->lock);
- return n;
-}
-
-static bool
-needaddgcproc(void)
-{
- int32 n;
-
- runtime_lock(&runtime_sched->lock);
- n = runtime_gomaxprocs;
- if(n > runtime_ncpu)
- n = runtime_ncpu;
- if(n > MaxGcproc)
- n = MaxGcproc;
- n -= runtime_sched->nmidle+1; // one M is currently running
- runtime_unlock(&runtime_sched->lock);
- return n > 0;
-}
-
-void
-runtime_helpgc(int32 nproc)
-{
- M *mp;
- int32 n, pos;
-
- runtime_lock(&runtime_sched->lock);
- pos = 0;
- for(n = 1; n < nproc; n++) { // one M is currently running
- if(runtime_allp[pos]->mcache == g->m->mcache)
- pos++;
- mp = mget();
- if(mp == nil)
- runtime_throw("runtime_gcprocs inconsistency");
- mp->helpgc = n;
- mp->mcache = runtime_allp[pos]->mcache;
- pos++;
- runtime_notewakeup(&mp->park);
- }
- runtime_unlock(&runtime_sched->lock);
-}
-
-// Similar to stoptheworld but best-effort and can be called several times.
-// There is no reverse operation, used during crashing.
-// This function must not lock any mutexes.
-void
-runtime_freezetheworld(void)
-{
- int32 i;
-
- if(runtime_gomaxprocs == 1)
- return;
- // stopwait and preemption requests can be lost
- // due to races with concurrently executing threads,
- // so try several times
- for(i = 0; i < 5; i++) {
- // this should tell the scheduler to not start any new goroutines
- runtime_sched->stopwait = 0x7fffffff;
- runtime_atomicstore((uint32*)&runtime_sched->gcwaiting, 1);
- // this should stop running goroutines
- if(!preemptall())
- break; // no running goroutines
- runtime_usleep(1000);
- }
- // to be sure
- runtime_usleep(1000);
- preemptall();
- runtime_usleep(1000);
-}
-
-void
-runtime_stopTheWorldWithSema(void)
-{
- int32 i;
- uint32 s;
- P *p;
- bool wait;
-
- runtime_lock(&runtime_sched->lock);
- runtime_sched->stopwait = runtime_gomaxprocs;
- runtime_atomicstore((uint32*)&runtime_sched->gcwaiting, 1);
- preemptall();
- // stop current P
- ((P*)g->m->p)->status = _Pgcstop;
- runtime_sched->stopwait--;
- // try to retake all P's in _Psyscall status
- for(i = 0; i < runtime_gomaxprocs; i++) {
- p = runtime_allp[i];
- s = p->status;
- if(s == _Psyscall && runtime_cas(&p->status, s, _Pgcstop))
- runtime_sched->stopwait--;
- }
- // stop idle P's
- while((p = pidleget()) != nil) {
- p->status = _Pgcstop;
- runtime_sched->stopwait--;
- }
- wait = runtime_sched->stopwait > 0;
- runtime_unlock(&runtime_sched->lock);
-
- // wait for remaining P's to stop voluntarily
- if(wait) {
- runtime_notesleep(&runtime_sched->stopnote);
- runtime_noteclear(&runtime_sched->stopnote);
- }
- if(runtime_sched->stopwait)
- runtime_throw("stoptheworld: not stopped");
- for(i = 0; i < runtime_gomaxprocs; i++) {
- p = runtime_allp[i];
- if(p->status != _Pgcstop)
- runtime_throw("stoptheworld: not stopped");
- }
-}
-
-static void
-mhelpgc(void)
-{
- g->m->helpgc = -1;
-}
-
-void
-runtime_startTheWorldWithSema(void)
-{
- P *p, *p1;
- M *mp;
- G *gp;
- bool add;
-
- g->m->locks++; // disable preemption because it can be holding p in a local var
- gp = runtime_netpoll(false); // non-blocking
- injectglist(gp);
- add = needaddgcproc();
- runtime_lock(&runtime_sched->lock);
- if(newprocs) {
- procresize(newprocs);
- newprocs = 0;
- } else
- procresize(runtime_gomaxprocs);
- runtime_sched->gcwaiting = 0;
-
- p1 = nil;
- while((p = pidleget()) != nil) {
- // procresize() puts p's with work at the beginning of the list.
- // Once we reach a p without a run queue, the rest don't have one either.
- if(p->runqhead == p->runqtail) {
- pidleput(p);
- break;
- }
- p->m = (uintptr)mget();
- p->link = (uintptr)p1;
- p1 = p;
- }
- if(runtime_sched->sysmonwait) {
- runtime_sched->sysmonwait = false;
- runtime_notewakeup(&runtime_sched->sysmonnote);
- }
- runtime_unlock(&runtime_sched->lock);
-
- while(p1) {
- p = p1;
- p1 = (P*)p1->link;
- if(p->m) {
- mp = (M*)p->m;
- p->m = 0;
- if(mp->nextp)
- runtime_throw("startTheWorldWithSema: inconsistent mp->nextp");
- mp->nextp = (uintptr)p;
- runtime_notewakeup(&mp->park);
- } else {
- // Start M to run P. Do not start another M below.
- newm(nil, p);
- add = false;
- }
- }
-
- if(add) {
- // If GC could have used another helper proc, start one now,
- // in the hope that it will be available next time.
- // It would have been even better to start it before the collection,
- // but doing so requires allocating memory, so it's tricky to
- // coordinate. This lazy approach works out in practice:
- // we don't mind if the first couple gc rounds don't have quite
- // the maximum number of procs.
- newm(mhelpgc, nil);
- }
- g->m->locks--;
-}
-
// Called to start an M.
void*
runtime_mstart(void* mp)
@@ -1055,7 +840,7 @@ makeGContext(G* gp, byte* sp, uintptr spsize) {
}
// Create a new m. It will start off with a call to fn, or else the scheduler.
-static void
+void
newm(void(*fn)(void), P *p)
{
M *mp;
@@ -1067,40 +852,6 @@ newm(void(*fn)(void), P *p)
runtime_newosproc(mp);
}
-// Stops execution of the current m until new work is available.
-// Returns with acquired P.
-static void
-stopm(void)
-{
- M* m;
-
- m = g->m;
- if(m->locks)
- runtime_throw("stopm holding locks");
- if(m->p)
- runtime_throw("stopm holding p");
- if(m->spinning) {
- m->spinning = false;
- runtime_xadd(&runtime_sched->nmspinning, -1);
- }
-
-retry:
- runtime_lock(&runtime_sched->lock);
- mput(m);
- runtime_unlock(&runtime_sched->lock);
- runtime_notesleep(&m->park);
- m = g->m;
- runtime_noteclear(&m->park);
- if(m->helpgc) {
- runtime_gchelper();
- m->helpgc = 0;
- m->mcache = nil;
- goto retry;
- }
- acquirep((P*)m->nextp);
- m->nextp = 0;
-}
-
static void
mspinning(void)
{
@@ -1109,7 +860,7 @@ mspinning(void)
// Schedules some M to run the p (creates an M if necessary).
// If p==nil, tries to get an idle P, if no idle P's does nothing.
-static void
+void
startm(P *p, bool spinning)
{
M *mp;
@@ -1138,361 +889,12 @@ startm(P *p, bool spinning)
runtime_throw("startm: m is spinning");
if(mp->nextp)
runtime_throw("startm: m has p");
- mp->spinning = spinning;
- mp->nextp = (uintptr)p;
- runtime_notewakeup(&mp->park);
-}
-
-// Hands off P from syscall or locked M.
-static void
-handoffp(P *p)
-{
- // if it has local work, start it straight away
- if(p->runqhead != p->runqtail || runtime_sched->runqsize) {
- startm(p, false);
- return;
- }
- // no local work, check that there are no spinning/idle M's,
- // otherwise our help is not required
- if(runtime_atomicload(&runtime_sched->nmspinning) + runtime_atomicload(&runtime_sched->npidle) == 0 && // TODO: fast atomic
- runtime_cas(&runtime_sched->nmspinning, 0, 1)) {
- startm(p, true);
- return;
- }
- runtime_lock(&runtime_sched->lock);
- if(runtime_sched->gcwaiting) {
- p->status = _Pgcstop;
- if(--runtime_sched->stopwait == 0)
- runtime_notewakeup(&runtime_sched->stopnote);
- runtime_unlock(&runtime_sched->lock);
- return;
- }
- if(runtime_sched->runqsize) {
- runtime_unlock(&runtime_sched->lock);
- startm(p, false);
- return;
+ if(spinning && !runqempty(p)) {
+ runtime_throw("startm: p has runnable gs");
}
- // If this is the last running P and nobody is polling network,
- // need to wakeup another M to poll network.
- if(runtime_sched->npidle == (uint32)runtime_gomaxprocs-1 && runtime_atomicload64(&runtime_sched->lastpoll) != 0) {
- runtime_unlock(&runtime_sched->lock);
- startm(p, false);
- return;
- }
- pidleput(p);
- runtime_unlock(&runtime_sched->lock);
-}
-
-// Tries to add one more P to execute G's.
-// Called when a G is made runnable (newproc, ready).
-static void
-wakep(void)
-{
- // be conservative about spinning threads
- if(!runtime_cas(&runtime_sched->nmspinning, 0, 1))
- return;
- startm(nil, true);
-}
-
-// Stops execution of the current m that is locked to a g until the g is runnable again.
-// Returns with acquired P.
-static void
-stoplockedm(void)
-{
- M *m;
- P *p;
-
- m = g->m;
- if(m->lockedg == nil || m->lockedg->lockedm != m)
- runtime_throw("stoplockedm: inconsistent locking");
- if(m->p) {
- // Schedule another M to run this p.
- p = releasep();
- handoffp(p);
- }
- incidlelocked(1);
- // Wait until another thread schedules lockedg again.
- runtime_notesleep(&m->park);
- m = g->m;
- runtime_noteclear(&m->park);
- if(m->lockedg->atomicstatus != _Grunnable)
- runtime_throw("stoplockedm: not runnable");
- acquirep((P*)m->nextp);
- m->nextp = 0;
-}
-
-// Schedules the locked m to run the locked gp.
-static void
-startlockedm(G *gp)
-{
- M *mp;
- P *p;
-
- mp = gp->lockedm;
- if(mp == g->m)
- runtime_throw("startlockedm: locked to me");
- if(mp->nextp)
- runtime_throw("startlockedm: m has p");
- // directly handoff current P to the locked m
- incidlelocked(-1);
- p = releasep();
+ mp->spinning = spinning;
mp->nextp = (uintptr)p;
runtime_notewakeup(&mp->park);
- stopm();
-}
-
-// Stops the current m for stoptheworld.
-// Returns when the world is restarted.
-static void
-gcstopm(void)
-{
- P *p;
-
- if(!runtime_sched->gcwaiting)
- runtime_throw("gcstopm: not waiting for gc");
- if(g->m->spinning) {
- g->m->spinning = false;
- runtime_xadd(&runtime_sched->nmspinning, -1);
- }
- p = releasep();
- runtime_lock(&runtime_sched->lock);
- p->status = _Pgcstop;
- if(--runtime_sched->stopwait == 0)
- runtime_notewakeup(&runtime_sched->stopnote);
- runtime_unlock(&runtime_sched->lock);
- stopm();
-}
-
-// Schedules gp to run on the current M.
-// Never returns.
-static void
-execute(G *gp)
-{
- int32 hz;
-
- if(gp->atomicstatus != _Grunnable) {
- runtime_printf("execute: bad g status %d\n", gp->atomicstatus);
- runtime_throw("execute: bad g status");
- }
- gp->atomicstatus = _Grunning;
- gp->waitsince = 0;
- ((P*)g->m->p)->schedtick++;
- g->m->curg = gp;
- gp->m = g->m;
-
- // Check whether the profiler needs to be turned on or off.
- hz = runtime_sched->profilehz;
- if(g->m->profilehz != hz)
- runtime_resetcpuprofiler(hz);
-
- runtime_gogo(gp);
-}
-
-// Finds a runnable goroutine to execute.
-// Tries to steal from other P's, get g from global queue, poll network.
-static G*
-findrunnable(void)
-{
- G *gp;
- P *p;
- int32 i;
-
-top:
- if(runtime_sched->gcwaiting) {
- gcstopm();
- goto top;
- }
- if(runtime_fingwait && runtime_fingwake && (gp = runtime_wakefing()) != nil)
- runtime_ready(gp);
- // local runq
- gp = runqget((P*)g->m->p);
- if(gp)
- return gp;
- // global runq
- if(runtime_sched->runqsize) {
- runtime_lock(&runtime_sched->lock);
- gp = globrunqget((P*)g->m->p, 0);
- runtime_unlock(&runtime_sched->lock);
- if(gp)
- return gp;
- }
- // poll network
- gp = runtime_netpoll(false); // non-blocking
- if(gp) {
- injectglist((G*)gp->schedlink);
- gp->atomicstatus = _Grunnable;
- return gp;
- }
- // If number of spinning M's >= number of busy P's, block.
- // This is necessary to prevent excessive CPU consumption
- // when GOMAXPROCS>>1 but the program parallelism is low.
- if(!g->m->spinning && 2 * runtime_atomicload(&runtime_sched->nmspinning) >= runtime_gomaxprocs - runtime_atomicload(&runtime_sched->npidle)) // TODO: fast atomic
- goto stop;
- if(!g->m->spinning) {
- g->m->spinning = true;
- runtime_xadd(&runtime_sched->nmspinning, 1);
- }
- // random steal from other P's
- for(i = 0; i < 2*runtime_gomaxprocs; i++) {
- if(runtime_sched->gcwaiting)
- goto top;
- p = runtime_allp[runtime_fastrand1()%runtime_gomaxprocs];
- if(p == (P*)g->m->p)
- gp = runqget(p);
- else
- gp = runqsteal((P*)g->m->p, p);
- if(gp)
- return gp;
- }
-stop:
- // return P and block
- runtime_lock(&runtime_sched->lock);
- if(runtime_sched->gcwaiting) {
- runtime_unlock(&runtime_sched->lock);
- goto top;
- }
- if(runtime_sched->runqsize) {
- gp = globrunqget((P*)g->m->p, 0);
- runtime_unlock(&runtime_sched->lock);
- return gp;
- }
- p = releasep();
- pidleput(p);
- runtime_unlock(&runtime_sched->lock);
- if(g->m->spinning) {
- g->m->spinning = false;
- runtime_xadd(&runtime_sched->nmspinning, -1);
- }
- // check all runqueues once again
- for(i = 0; i < runtime_gomaxprocs; i++) {
- p = runtime_allp[i];
- if(p && p->runqhead != p->runqtail) {
- runtime_lock(&runtime_sched->lock);
- p = pidleget();
- runtime_unlock(&runtime_sched->lock);
- if(p) {
- acquirep(p);
- goto top;
- }
- break;
- }
- }
- // poll network
- if(runtime_xchg64(&runtime_sched->lastpoll, 0) != 0) {
- if(g->m->p)
- runtime_throw("findrunnable: netpoll with p");
- if(g->m->spinning)
- runtime_throw("findrunnable: netpoll with spinning");
- gp = runtime_netpoll(true); // block until new work is available
- runtime_atomicstore64(&runtime_sched->lastpoll, runtime_nanotime());
- if(gp) {
- runtime_lock(&runtime_sched->lock);
- p = pidleget();
- runtime_unlock(&runtime_sched->lock);
- if(p) {
- acquirep(p);
- injectglist((G*)gp->schedlink);
- gp->atomicstatus = _Grunnable;
- return gp;
- }
- injectglist(gp);
- }
- }
- stopm();
- goto top;
-}
-
-static void
-resetspinning(void)
-{
- int32 nmspinning;
-
- if(g->m->spinning) {
- g->m->spinning = false;
- nmspinning = runtime_xadd(&runtime_sched->nmspinning, -1);
- if(nmspinning < 0)
- runtime_throw("findrunnable: negative nmspinning");
- } else
- nmspinning = runtime_atomicload(&runtime_sched->nmspinning);
-
- // M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
- // so see if we need to wakeup another P here.
- if (nmspinning == 0 && runtime_atomicload(&runtime_sched->npidle) > 0)
- wakep();
-}
-
-// Injects the list of runnable G's into the scheduler.
-// Can run concurrently with GC.
-static void
-injectglist(G *glist)
-{
- int32 n;
- G *gp;
-
- if(glist == nil)
- return;
- runtime_lock(&runtime_sched->lock);
- for(n = 0; glist; n++) {
- gp = glist;
- glist = (G*)gp->schedlink;
- gp->atomicstatus = _Grunnable;
- globrunqput(gp);
- }
- runtime_unlock(&runtime_sched->lock);
-
- for(; n && runtime_sched->npidle; n--)
- startm(nil, false);
-}
-
-// One round of scheduler: find a runnable goroutine and execute it.
-// Never returns.
-static void
-schedule(void)
-{
- G *gp;
- uint32 tick;
-
- if(g->m->locks)
- runtime_throw("schedule: holding locks");
-
-top:
- if(runtime_sched->gcwaiting) {
- gcstopm();
- goto top;
- }
-
- gp = nil;
- // Check the global runnable queue once in a while to ensure fairness.
- // Otherwise two goroutines can completely occupy the local runqueue
- // by constantly respawning each other.
- tick = ((P*)g->m->p)->schedtick;
- // This is a fancy way to say tick%61==0,
- // it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors.
- if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime_sched->runqsize > 0) {
- runtime_lock(&runtime_sched->lock);
- gp = globrunqget((P*)g->m->p, 1);
- runtime_unlock(&runtime_sched->lock);
- if(gp)
- resetspinning();
- }
- if(gp == nil) {
- gp = runqget((P*)g->m->p);
- if(gp && g->m->spinning)
- runtime_throw("schedule: spinning with local work");
- }
- if(gp == nil) {
- gp = findrunnable(); // blocks until work is available
- resetspinning();
- }
-
- if(gp->lockedm) {
- // Hands off own p to the locked m,
- // then blocks waiting for a new p.
- startlockedm(gp);
- goto top;
- }
-
- execute(gp);
}
// Puts the current goroutine into a waiting state and calls unlockf.
@@ -1572,12 +974,12 @@ park0(G *gp)
m->waitlock = nil;
if(!ok) {
gp->atomicstatus = _Grunnable;
- execute(gp); // Schedule it back, never returns.
+ execute(gp, true); // Schedule it back, never returns.
}
}
if(m->lockedg) {
stoplockedm();
- execute(gp); // Never returns.
+ execute(gp, true); // Never returns.
}
schedule();
}
@@ -1606,7 +1008,7 @@ runtime_gosched0(G *gp)
runtime_unlock(&runtime_sched->lock);
if(m->lockedg) {
stoplockedm();
- execute(gp); // Never returns.
+ execute(gp, true); // Never returns.
}
schedule();
}
@@ -1643,6 +1045,7 @@ goexit0(G *gp)
gp->writebuf.__capacity = 0;
gp->waitreason = runtime_gostringnocopy(nil);
gp->param = nil;
+ m->curg->m = nil;
m->curg = nil;
m->lockedg = nil;
if(m->locked & ~_LockExternal) {
@@ -1896,12 +1299,12 @@ exitsyscall0(G *gp)
runtime_unlock(&runtime_sched->lock);
if(p) {
acquirep(p);
- execute(gp); // Never returns.
+ execute(gp, false); // Never returns.
}
if(m->lockedg) {
// Wait until another thread schedules gp and so m again.
stoplockedm();
- execute(gp); // Never returns.
+ execute(gp, false); // Never returns.
}
stopm();
schedule(); // Never returns.
@@ -2069,7 +1472,7 @@ __go_go(void (*fn)(void*), void* arg)
makeGContext(newg, sp, (uintptr)spsize);
- runqput(p, newg);
+ runqput(p, newg, true);
if(runtime_atomicload(&runtime_sched->npidle) != 0 && runtime_atomicload(&runtime_sched->nmspinning) == 0 && fn != runtime_main) // TODO: fast atomic
wakep();
@@ -2126,23 +1529,6 @@ retry:
return gp;
}
-// Purge all cached G's from gfree list to the global list.
-static void
-gfpurge(P *p)
-{
- G *gp;
-
- runtime_lock(&runtime_sched->gflock);
- while(p->gfreecnt) {
- p->gfreecnt--;
- gp = p->gfree;
- p->gfree = (G*)gp->schedlink;
- gp->schedlink = (uintptr)runtime_sched->gfree;
- runtime_sched->gfree = gp;
- }
- runtime_unlock(&runtime_sched->gflock);
-}
-
void
runtime_Breakpoint(void)
{
@@ -2157,38 +1543,6 @@ runtime_Gosched(void)
runtime_gosched();
}
-// Implementation of runtime.GOMAXPROCS.
-// delete when scheduler is even stronger
-
-intgo runtime_GOMAXPROCS(intgo)
- __asm__(GOSYM_PREFIX "runtime.GOMAXPROCS");
-
-intgo
-runtime_GOMAXPROCS(intgo n)
-{
- intgo ret;
-
- if(n > _MaxGomaxprocs)
- n = _MaxGomaxprocs;
- runtime_lock(&runtime_sched->lock);
- ret = (intgo)runtime_gomaxprocs;
- if(n <= 0 || n == ret) {
- runtime_unlock(&runtime_sched->lock);
- return ret;
- }
- runtime_unlock(&runtime_sched->lock);
-
- runtime_acquireWorldsema();
- g->m->gcing = 1;
- runtime_stopTheWorldWithSema();
- newprocs = (int32)n;
- g->m->gcing = 0;
- runtime_releaseWorldsema();
- runtime_startTheWorldWithSema();
-
- return ret;
-}
-
// lockOSThread is called by runtime.LockOSThread and runtime.lockOSThread below
// after they modify m->locked. Do not allow preemption during this call,
// or else the m might be different in this function than in the caller.
@@ -2365,599 +1719,6 @@ runtime_setcpuprofilerate_m(int32 hz)
g->m->locks--;
}
-// Change number of processors. The world is stopped, sched is locked.
-static void
-procresize(int32 new)
-{
- int32 i, old;
- bool pempty;
- G *gp;
- P *p;
- intgo j;
-
- old = runtime_gomaxprocs;
- if(old < 0 || old > _MaxGomaxprocs || new <= 0 || new >_MaxGomaxprocs)
- runtime_throw("procresize: invalid arg");
- // initialize new P's
- for(i = 0; i < new; i++) {
- p = runtime_allp[i];
- if(p == nil) {
- p = (P*)runtime_mallocgc(sizeof(*p), 0, FlagNoInvokeGC);
- p->id = i;
- p->status = _Pgcstop;
- p->deferpool.__values = &p->deferpoolbuf[0];
- p->deferpool.__count = 0;
- p->deferpool.__capacity = nelem(p->deferpoolbuf);
- runtime_atomicstorep(&runtime_allp[i], p);
- }
- if(p->mcache == nil) {
- if(old==0 && i==0)
- p->mcache = g->m->mcache; // bootstrap
- else
- p->mcache = runtime_allocmcache();
- }
- }
-
- // redistribute runnable G's evenly
- // collect all runnable goroutines in global queue preserving FIFO order
- // FIFO order is required to ensure fairness even during frequent GCs
- // see http://golang.org/issue/7126
- pempty = false;
- while(!pempty) {
- pempty = true;
- for(i = 0; i < old; i++) {
- p = runtime_allp[i];
- if(p->runqhead == p->runqtail)
- continue;
- pempty = false;
- // pop from tail of local queue
- p->runqtail--;
- gp = (G*)p->runq[p->runqtail%nelem(p->runq)];
- // push onto head of global queue
- gp->schedlink = runtime_sched->runqhead;
- runtime_sched->runqhead = (uintptr)gp;
- if(runtime_sched->runqtail == 0)
- runtime_sched->runqtail = (uintptr)gp;
- runtime_sched->runqsize++;
- }
- }
- // fill local queues with at most nelem(p->runq)/2 goroutines
- // start at 1 because current M already executes some G and will acquire allp[0] below,
- // so if we have a spare G we want to put it into allp[1].
- for(i = 1; (uint32)i < (uint32)new * nelem(p->runq)/2 && runtime_sched->runqsize > 0; i++) {
- gp = (G*)runtime_sched->runqhead;
- runtime_sched->runqhead = gp->schedlink;
- if(runtime_sched->runqhead == 0)
- runtime_sched->runqtail = 0;
- runtime_sched->runqsize--;
- runqput(runtime_allp[i%new], gp);
- }
-
- // free unused P's
- for(i = new; i < old; i++) {
- p = runtime_allp[i];
- for(j = 0; j < p->deferpool.__count; j++) {
- ((struct _defer**)p->deferpool.__values)[j] = nil;
- }
- p->deferpool.__count = 0;
- runtime_freemcache(p->mcache);
- p->mcache = nil;
- gfpurge(p);
- p->status = _Pdead;
- // can't free P itself because it can be referenced by an M in syscall
- }
-
- if(g->m->p)
- ((P*)g->m->p)->m = 0;
- g->m->p = 0;
- g->m->mcache = nil;
- p = runtime_allp[0];
- p->m = 0;
- p->status = _Pidle;
- acquirep(p);
- for(i = new-1; i > 0; i--) {
- p = runtime_allp[i];
- p->status = _Pidle;
- pidleput(p);
- }
- runtime_atomicstore((uint32*)&runtime_gomaxprocs, new);
-}
-
-// Associate p and the current m.
-static void
-acquirep(P *p)
-{
- M *m;
-
- m = g->m;
- if(m->p || m->mcache)
- runtime_throw("acquirep: already in go");
- if(p->m || p->status != _Pidle) {
- runtime_printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? ((M*)p->m)->id : 0, p->status);
- runtime_throw("acquirep: invalid p state");
- }
- m->mcache = p->mcache;
- m->p = (uintptr)p;
- p->m = (uintptr)m;
- p->status = _Prunning;
-}
-
-// Disassociate p and the current m.
-static P*
-releasep(void)
-{
- M *m;
- P *p;
-
- m = g->m;
- if(m->p == 0 || m->mcache == nil)
- runtime_throw("releasep: invalid arg");
- p = (P*)m->p;
- if((M*)p->m != m || p->mcache != m->mcache || p->status != _Prunning) {
- runtime_printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n",
- m, m->p, p->m, m->mcache, p->mcache, p->status);
- runtime_throw("releasep: invalid p state");
- }
- m->p = 0;
- m->mcache = nil;
- p->m = 0;
- p->status = _Pidle;
- return p;
-}
-
-static void
-incidlelocked(int32 v)
-{
- runtime_lock(&runtime_sched->lock);
- runtime_sched->nmidlelocked += v;
- if(v > 0)
- checkdead();
- runtime_unlock(&runtime_sched->lock);
-}
-
-static void
-sysmon(void)
-{
- uint32 idle, delay;
- int64 now, lastpoll, lasttrace;
- G *gp;
-
- lasttrace = 0;
- idle = 0; // how many cycles in succession we had not wokeup somebody
- delay = 0;
- for(;;) {
- if(idle == 0) // start with 20us sleep...
- delay = 20;
- else if(idle > 50) // start doubling the sleep after 1ms...
- delay *= 2;
- if(delay > 10*1000) // up to 10ms
- delay = 10*1000;
- runtime_usleep(delay);
- if(runtime_debug.schedtrace <= 0 &&
- (runtime_sched->gcwaiting || runtime_atomicload(&runtime_sched->npidle) == (uint32)runtime_gomaxprocs)) { // TODO: fast atomic
- runtime_lock(&runtime_sched->lock);
- if(runtime_atomicload(&runtime_sched->gcwaiting) || runtime_atomicload(&runtime_sched->npidle) == (uint32)runtime_gomaxprocs) {
- runtime_atomicstore(&runtime_sched->sysmonwait, 1);
- runtime_unlock(&runtime_sched->lock);
- runtime_notesleep(&runtime_sched->sysmonnote);
- runtime_noteclear(&runtime_sched->sysmonnote);
- idle = 0;
- delay = 20;
- } else
- runtime_unlock(&runtime_sched->lock);
- }
- // poll network if not polled for more than 10ms
- lastpoll = runtime_atomicload64(&runtime_sched->lastpoll);
- now = runtime_nanotime();
- if(lastpoll != 0 && lastpoll + 10*1000*1000 < now) {
- runtime_cas64(&runtime_sched->lastpoll, lastpoll, now);
- gp = runtime_netpoll(false); // non-blocking
- if(gp) {
- // Need to decrement number of idle locked M's
- // (pretending that one more is running) before injectglist.
- // Otherwise it can lead to the following situation:
- // injectglist grabs all P's but before it starts M's to run the P's,
- // another M returns from syscall, finishes running its G,
- // observes that there is no work to do and no other running M's
- // and reports deadlock.
- incidlelocked(-1);
- injectglist(gp);
- incidlelocked(1);
- }
- }
- // retake P's blocked in syscalls
- // and preempt long running G's
- if(retake(now))
- idle = 0;
- else
- idle++;
-
- if(runtime_debug.schedtrace > 0 && lasttrace + runtime_debug.schedtrace*1000000ll <= now) {
- lasttrace = now;
- runtime_schedtrace(runtime_debug.scheddetail);
- }
- }
-}
-
-typedef struct Pdesc Pdesc;
-struct Pdesc
-{
- uint32 schedtick;
- int64 schedwhen;
- uint32 syscalltick;
- int64 syscallwhen;
-};
-static Pdesc pdesc[_MaxGomaxprocs];
-
-static uint32
-retake(int64 now)
-{
- uint32 i, s, n;
- int64 t;
- P *p;
- Pdesc *pd;
-
- n = 0;
- for(i = 0; i < (uint32)runtime_gomaxprocs; i++) {
- p = runtime_allp[i];
- if(p==nil)
- continue;
- pd = &pdesc[i];
- s = p->status;
- if(s == _Psyscall) {
- // Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
- t = p->syscalltick;
- if(pd->syscalltick != t) {
- pd->syscalltick = t;
- pd->syscallwhen = now;
- continue;
- }
- // On the one hand we don't want to retake Ps if there is no other work to do,
- // but on the other hand we want to retake them eventually
- // because they can prevent the sysmon thread from deep sleep.
- if(p->runqhead == p->runqtail &&
- runtime_atomicload(&runtime_sched->nmspinning) + runtime_atomicload(&runtime_sched->npidle) > 0 &&
- pd->syscallwhen + 10*1000*1000 > now)
- continue;
- // Need to decrement number of idle locked M's
- // (pretending that one more is running) before the CAS.
- // Otherwise the M from which we retake can exit the syscall,
- // increment nmidle and report deadlock.
- incidlelocked(-1);
- if(runtime_cas(&p->status, s, _Pidle)) {
- n++;
- handoffp(p);
- }
- incidlelocked(1);
- } else if(s == _Prunning) {
- // Preempt G if it's running for more than 10ms.
- t = p->schedtick;
- if(pd->schedtick != t) {
- pd->schedtick = t;
- pd->schedwhen = now;
- continue;
- }
- if(pd->schedwhen + 10*1000*1000 > now)
- continue;
- // preemptone(p);
- }
- }
- return n;
-}
-
-// Tell all goroutines that they have been preempted and they should stop.
-// This function is purely best-effort. It can fail to inform a goroutine if a
-// processor just started running it.
-// No locks need to be held.
-// Returns true if preemption request was issued to at least one goroutine.
-static bool
-preemptall(void)
-{
- return false;
-}
-
-// Put mp on midle list.
-// Sched must be locked.
-static void
-mput(M *mp)
-{
- mp->schedlink = runtime_sched->midle;
- runtime_sched->midle = (uintptr)mp;
- runtime_sched->nmidle++;
- checkdead();
-}
-
-// Try to get an m from midle list.
-// Sched must be locked.
-static M*
-mget(void)
-{
- M *mp;
-
- if((mp = (M*)runtime_sched->midle) != nil){
- runtime_sched->midle = mp->schedlink;
- runtime_sched->nmidle--;
- }
- return mp;
-}
-
-// Put gp on the global runnable queue.
-// Sched must be locked.
-static void
-globrunqput(G *gp)
-{
- gp->schedlink = 0;
- if(runtime_sched->runqtail)
- ((G*)runtime_sched->runqtail)->schedlink = (uintptr)gp;
- else
- runtime_sched->runqhead = (uintptr)gp;
- runtime_sched->runqtail = (uintptr)gp;
- runtime_sched->runqsize++;
-}
-
-// Put a batch of runnable goroutines on the global runnable queue.
-// Sched must be locked.
-static void
-globrunqputbatch(G *ghead, G *gtail, int32 n)
-{
- gtail->schedlink = 0;
- if(runtime_sched->runqtail)
- ((G*)runtime_sched->runqtail)->schedlink = (uintptr)ghead;
- else
- runtime_sched->runqhead = (uintptr)ghead;
- runtime_sched->runqtail = (uintptr)gtail;
- runtime_sched->runqsize += n;
-}
-
-// Try get a batch of G's from the global runnable queue.
-// Sched must be locked.
-static G*
-globrunqget(P *p, int32 max)
-{
- G *gp, *gp1;
- int32 n;
-
- if(runtime_sched->runqsize == 0)
- return nil;
- n = runtime_sched->runqsize/runtime_gomaxprocs+1;
- if(n > runtime_sched->runqsize)
- n = runtime_sched->runqsize;
- if(max > 0 && n > max)
- n = max;
- if((uint32)n > nelem(p->runq)/2)
- n = nelem(p->runq)/2;
- runtime_sched->runqsize -= n;
- if(runtime_sched->runqsize == 0)
- runtime_sched->runqtail = 0;
- gp = (G*)runtime_sched->runqhead;
- runtime_sched->runqhead = gp->schedlink;
- n--;
- while(n--) {
- gp1 = (G*)runtime_sched->runqhead;
- runtime_sched->runqhead = gp1->schedlink;
- runqput(p, gp1);
- }
- return gp;
-}
-
-// Put p to on pidle list.
-// Sched must be locked.
-static void
-pidleput(P *p)
-{
- p->link = runtime_sched->pidle;
- runtime_sched->pidle = (uintptr)p;
- runtime_xadd(&runtime_sched->npidle, 1); // TODO: fast atomic
-}
-
-// Try get a p from pidle list.
-// Sched must be locked.
-static P*
-pidleget(void)
-{
- P *p;
-
- p = (P*)runtime_sched->pidle;
- if(p) {
- runtime_sched->pidle = p->link;
- runtime_xadd(&runtime_sched->npidle, -1); // TODO: fast atomic
- }
- return p;
-}
-
-// Try to put g on local runnable queue.
-// If it's full, put onto global queue.
-// Executed only by the owner P.
-static void
-runqput(P *p, G *gp)
-{
- uint32 h, t;
-
-retry:
- h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with consumers
- t = p->runqtail;
- if(t - h < nelem(p->runq)) {
- p->runq[t%nelem(p->runq)] = (uintptr)gp;
- runtime_atomicstore(&p->runqtail, t+1); // store-release, makes the item available for consumption
- return;
- }
- if(runqputslow(p, gp, h, t))
- return;
- // the queue is not full, now the put above must suceed
- goto retry;
-}
-
-// Put g and a batch of work from local runnable queue on global queue.
-// Executed only by the owner P.
-static bool
-runqputslow(P *p, G *gp, uint32 h, uint32 t)
-{
- G *batch[nelem(p->runq)/2+1];
- uint32 n, i;
-
- // First, grab a batch from local queue.
- n = t-h;
- n = n/2;
- if(n != nelem(p->runq)/2)
- runtime_throw("runqputslow: queue is not full");
- for(i=0; i<n; i++)
- batch[i] = (G*)p->runq[(h+i)%nelem(p->runq)];
- if(!runtime_cas(&p->runqhead, h, h+n)) // cas-release, commits consume
- return false;
- batch[n] = gp;
- // Link the goroutines.
- for(i=0; i<n; i++)
- batch[i]->schedlink = (uintptr)batch[i+1];
- // Now put the batch on global queue.
- runtime_lock(&runtime_sched->lock);
- globrunqputbatch(batch[0], batch[n], n+1);
- runtime_unlock(&runtime_sched->lock);
- return true;
-}
-
-// Get g from local runnable queue.
-// Executed only by the owner P.
-static G*
-runqget(P *p)
-{
- G *gp;
- uint32 t, h;
-
- for(;;) {
- h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with other consumers
- t = p->runqtail;
- if(t == h)
- return nil;
- gp = (G*)p->runq[h%nelem(p->runq)];
- if(runtime_cas(&p->runqhead, h, h+1)) // cas-release, commits consume
- return gp;
- }
-}
-
-// Grabs a batch of goroutines from local runnable queue.
-// batch array must be of size nelem(p->runq)/2. Returns number of grabbed goroutines.
-// Can be executed by any P.
-static uint32
-runqgrab(P *p, G **batch)
-{
- uint32 t, h, n, i;
-
- for(;;) {
- h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with other consumers
- t = runtime_atomicload(&p->runqtail); // load-acquire, synchronize with the producer
- n = t-h;
- n = n - n/2;
- if(n == 0)
- break;
- if(n > nelem(p->runq)/2) // read inconsistent h and t
- continue;
- for(i=0; i<n; i++)
- batch[i] = (G*)p->runq[(h+i)%nelem(p->runq)];
- if(runtime_cas(&p->runqhead, h, h+n)) // cas-release, commits consume
- break;
- }
- return n;
-}
-
-// Steal half of elements from local runnable queue of p2
-// and put onto local runnable queue of p.
-// Returns one of the stolen elements (or nil if failed).
-static G*
-runqsteal(P *p, P *p2)
-{
- G *gp;
- G *batch[nelem(p->runq)/2];
- uint32 t, h, n, i;
-
- n = runqgrab(p2, batch);
- if(n == 0)
- return nil;
- n--;
- gp = batch[n];
- if(n == 0)
- return gp;
- h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with consumers
- t = p->runqtail;
- if(t - h + n >= nelem(p->runq))
- runtime_throw("runqsteal: runq overflow");
- for(i=0; i<n; i++, t++)
- p->runq[t%nelem(p->runq)] = (uintptr)batch[i];
- runtime_atomicstore(&p->runqtail, t); // store-release, makes the item available for consumption
- return gp;
-}
-
-void runtime_testSchedLocalQueue(void)
- __asm__("runtime.testSchedLocalQueue");
-
-void
-runtime_testSchedLocalQueue(void)
-{
- P p;
- G gs[nelem(p.runq)];
- int32 i, j;
-
- runtime_memclr((byte*)&p, sizeof(p));
-
- for(i = 0; i < (int32)nelem(gs); i++) {
- if(runqget(&p) != nil)
- runtime_throw("runq is not empty initially");
- for(j = 0; j < i; j++)
- runqput(&p, &gs[i]);
- for(j = 0; j < i; j++) {
- if(runqget(&p) != &gs[i]) {
- runtime_printf("bad element at iter %d/%d\n", i, j);
- runtime_throw("bad element");
- }
- }
- if(runqget(&p) != nil)
- runtime_throw("runq is not empty afterwards");
- }
-}
-
-void runtime_testSchedLocalQueueSteal(void)
- __asm__("runtime.testSchedLocalQueueSteal");
-
-void
-runtime_testSchedLocalQueueSteal(void)
-{
- P p1, p2;
- G gs[nelem(p1.runq)], *gp;
- int32 i, j, s;
-
- runtime_memclr((byte*)&p1, sizeof(p1));
- runtime_memclr((byte*)&p2, sizeof(p2));
-
- for(i = 0; i < (int32)nelem(gs); i++) {
- for(j = 0; j < i; j++) {
- gs[j].sig = 0;
- runqput(&p1, &gs[j]);
- }
- gp = runqsteal(&p2, &p1);
- s = 0;
- if(gp) {
- s++;
- gp->sig++;
- }
- while((gp = runqget(&p2)) != nil) {
- s++;
- gp->sig++;
- }
- while((gp = runqget(&p1)) != nil)
- gp->sig++;
- for(j = 0; j < i; j++) {
- if(gs[j].sig != 1) {
- runtime_printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i);
- runtime_throw("bad element");
- }
- }
- if(s != i/2 && s != i/2+1) {
- runtime_printf("bad steal %d, want %d or %d, iter %d\n",
- s, i/2, i/2+1, i);
- runtime_throw("bad steal");
- }
- }
-}
-
intgo
runtime_setmaxthreads(intgo in)
{
@@ -3041,56 +1802,15 @@ os_beforeExit()
{
}
-// Active spinning for sync.Mutex.
-//go:linkname sync_runtime_canSpin sync.runtime_canSpin
-
-enum
-{
- ACTIVE_SPIN = 4,
- ACTIVE_SPIN_CNT = 30,
-};
-
-extern _Bool sync_runtime_canSpin(intgo i)
- __asm__ (GOSYM_PREFIX "sync.runtime_canSpin");
-
-_Bool
-sync_runtime_canSpin(intgo i)
-{
- P *p;
-
- // sync.Mutex is cooperative, so we are conservative with spinning.
- // Spin only few times and only if running on a multicore machine and
- // GOMAXPROCS>1 and there is at least one other running P and local runq is empty.
- // As opposed to runtime mutex we don't do passive spinning here,
- // because there can be work on global runq on on other Ps.
- if (i >= ACTIVE_SPIN || runtime_ncpu <= 1 || runtime_gomaxprocs <= (int32)(runtime_sched->npidle+runtime_sched->nmspinning)+1) {
- return false;
- }
- p = (P*)g->m->p;
- return p != nil && p->runqhead == p->runqtail;
-}
-
-//go:linkname sync_runtime_doSpin sync.runtime_doSpin
-//go:nosplit
-
-extern void sync_runtime_doSpin(void)
- __asm__ (GOSYM_PREFIX "sync.runtime_doSpin");
-
-void
-sync_runtime_doSpin()
-{
- runtime_procyield(ACTIVE_SPIN_CNT);
-}
-
// For Go code to look at variables, until we port proc.go.
-extern M** runtime_go_allm(void)
+extern M* runtime_go_allm(void)
__asm__ (GOSYM_PREFIX "runtime.allm");
-M**
+M*
runtime_go_allm()
{
- return &runtime_allm;
+ return runtime_allm;
}
intgo NumCPU(void) __asm__ (GOSYM_PREFIX "runtime.NumCPU");