diff options
author | Ian Lance Taylor <ian@gcc.gnu.org> | 2013-07-16 06:54:42 +0000 |
---|---|---|
committer | Ian Lance Taylor <ian@gcc.gnu.org> | 2013-07-16 06:54:42 +0000 |
commit | be47d6eceffd2c5dbbc1566d5eea490527fb2bd4 (patch) | |
tree | 0e8fda573576bb4181dba29d0e88380a8c38fafd /libgo/runtime | |
parent | efb30cdeb003fd7c585ee0d7657340086abcbd9e (diff) | |
download | gcc-be47d6eceffd2c5dbbc1566d5eea490527fb2bd4.zip gcc-be47d6eceffd2c5dbbc1566d5eea490527fb2bd4.tar.gz gcc-be47d6eceffd2c5dbbc1566d5eea490527fb2bd4.tar.bz2 |
libgo: Update to Go 1.1.1.
From-SVN: r200974
Diffstat (limited to 'libgo/runtime')
39 files changed, 4078 insertions, 1354 deletions
diff --git a/libgo/runtime/chan.c b/libgo/runtime/chan.c index a79ee9e..6f52a1d 100644 --- a/libgo/runtime/chan.c +++ b/libgo/runtime/chan.c @@ -35,6 +35,8 @@ struct WaitQ SudoG* last; }; +// The garbage collector is assuming that Hchan can only contain pointers into the stack +// and cannot contain pointers into the heap. struct Hchan { uintgo qcount; // total data in the q @@ -49,6 +51,8 @@ struct Hchan Lock; }; +uint32 runtime_Hchansize = sizeof(Hchan); + // Buffer follows Hchan immediately in memory. // chanbuf(c, i) is pointer to the i'th slot in the buffer. #define chanbuf(c, i) ((byte*)((c)+1)+(uintptr)(c)->elemsize*(i)) @@ -107,6 +111,7 @@ runtime_makechan_c(ChanType *t, int64 hint) c->elemsize = elem->__size; c->elemalign = elem->__align; c->dataqsiz = hint; + runtime_settype(c, (uintptr)t | TypeInfo_Chan); if(debug) runtime_printf("makechan: chan=%p; elemsize=%D; elemalign=%d; dataqsiz=%D\n", @@ -875,16 +880,27 @@ sellock(Select *sel) static void selunlock(Select *sel) { - uint32 i; - Hchan *c, *c0; + int32 i, n, r; + Hchan *c; - c = nil; - for(i=sel->ncase; i-->0;) { - c0 = sel->lockorder[i]; - if(c0 && c0 != c) { - c = c0; - runtime_unlock(c); - } + // We must be very careful here to not touch sel after we have unlocked + // the last lock, because sel can be freed right after the last unlock. + // Consider the following situation. + // First M calls runtime_park() in runtime_selectgo() passing the sel. + // Once runtime_park() has unlocked the last lock, another M makes + // the G that calls select runnable again and schedules it for execution. + // When the G runs on another M, it locks all the locks and frees sel. + // Now if the first M touches sel, it will access freed memory. + n = (int32)sel->ncase; + r = 0; + // skip the default case + if(n>0 && sel->lockorder[0] == nil) + r = 1; + for(i = n-1; i >= r; i--) { + c = sel->lockorder[i]; + if(i>0 && sel->lockorder[i-1] == c) + continue; // will unlock it on the next iteration + runtime_unlock(c); } } @@ -910,7 +926,7 @@ static int selectgo(Select **selp) { Select *sel; - uint32 o, i, j; + uint32 o, i, j, k; Scase *cas, *dfl; Hchan *c; SudoG *sg; @@ -946,12 +962,42 @@ selectgo(Select **selp) } // sort the cases by Hchan address to get the locking order. + // simple heap sort, to guarantee n log n time and constant stack footprint. for(i=0; i<sel->ncase; i++) { - c = sel->scase[i].chan; - for(j=i; j>0 && sel->lockorder[j-1] >= c; j--) - sel->lockorder[j] = sel->lockorder[j-1]; + j = i; + c = sel->scase[j].chan; + while(j > 0 && sel->lockorder[k=(j-1)/2] < c) { + sel->lockorder[j] = sel->lockorder[k]; + j = k; + } sel->lockorder[j] = c; } + for(i=sel->ncase; i-->0; ) { + c = sel->lockorder[i]; + sel->lockorder[i] = sel->lockorder[0]; + j = 0; + for(;;) { + k = j*2+1; + if(k >= i) + break; + if(k+1 < i && sel->lockorder[k] < sel->lockorder[k+1]) + k++; + if(c < sel->lockorder[k]) { + sel->lockorder[j] = sel->lockorder[k]; + j = k; + continue; + } + break; + } + sel->lockorder[j] = c; + } + /* + for(i=0; i+1<sel->ncase; i++) + if(sel->lockorder[i] > sel->lockorder[i+1]) { + runtime_printf("i=%d %p %p\n", i, sel->lockorder[i], sel->lockorder[i+1]); + runtime_throw("select: broken sort"); + } + */ sellock(sel); loop: @@ -1048,7 +1094,7 @@ loop: c = cas->chan; if(c->dataqsiz > 0) - runtime_throw("selectgo: shouldnt happen"); + runtime_throw("selectgo: shouldn't happen"); if(debug) runtime_printf("wait-return: sel=%p c=%p cas=%p kind=%d\n", diff --git a/libgo/runtime/cpuprof.c b/libgo/runtime/cpuprof.c index 3ef08ef..5163873 100644 --- a/libgo/runtime/cpuprof.c +++ b/libgo/runtime/cpuprof.c @@ -121,7 +121,9 @@ static uintptr eod[3] = {0, 1, 0}; // LostProfileData is a no-op function used in profiles // to mark the number of profiling stack traces that were // discarded due to slow data writers. -static void LostProfileData(void) { +static void +LostProfileData(void) +{ } extern void runtime_SetCPUProfileRate(intgo) @@ -365,7 +367,7 @@ getprofile(Profile *p) return ret; // Wait for new log. - runtime_entersyscall(); + runtime_entersyscallblock(); runtime_notesleep(&p->wait); runtime_exitsyscall(); runtime_noteclear(&p->wait); diff --git a/libgo/runtime/go-main.c b/libgo/runtime/go-main.c index 97d1405..77233d3 100644 --- a/libgo/runtime/go-main.c +++ b/libgo/runtime/go-main.c @@ -30,9 +30,6 @@ extern char **environ; -extern void runtime_main (void); -static void mainstart (void *); - /* The main function. */ int @@ -42,13 +39,7 @@ main (int argc, char **argv) runtime_args (argc, (byte **) argv); runtime_osinit (); runtime_schedinit (); - __go_go (mainstart, NULL); + __go_go (runtime_main, NULL); runtime_mstart (runtime_m ()); abort (); } - -static void -mainstart (void *arg __attribute__ ((unused))) -{ - runtime_main (); -} diff --git a/libgo/runtime/go-map-index.c b/libgo/runtime/go-map-index.c index a602d2a..499641c 100644 --- a/libgo/runtime/go-map-index.c +++ b/libgo/runtime/go-map-index.c @@ -98,7 +98,7 @@ __go_map_index (struct __go_map *map, const void *key, _Bool insert) key_descriptor = descriptor->__map_descriptor->__key_type; key_offset = descriptor->__key_offset; key_size = key_descriptor->__size; - __go_assert (key_size != 0 && key_size != -1UL); + __go_assert (key_size != -1UL); equalfn = key_descriptor->__equalfn; key_hash = key_descriptor->__hashfn (key, key_size); diff --git a/libgo/runtime/go-reflect-map.c b/libgo/runtime/go-reflect-map.c index 3697537..1ae7c96 100644 --- a/libgo/runtime/go-reflect-map.c +++ b/libgo/runtime/go-reflect-map.c @@ -238,3 +238,12 @@ makemap (const struct __go_map_type *t) __builtin_memcpy (ret, &map, sizeof (void *)); return (uintptr_t) ret; } + +extern _Bool ismapkey (const struct __go_type_descriptor *) + __asm__ (GOSYM_PREFIX "reflect.ismapkey"); + +_Bool +ismapkey (const struct __go_type_descriptor *typ) +{ + return typ != NULL && typ->__hashfn != __go_type_hash_error; +} diff --git a/libgo/runtime/go-signal.c b/libgo/runtime/go-signal.c index 1965e05..1e80057 100644 --- a/libgo/runtime/go-signal.c +++ b/libgo/runtime/go-signal.c @@ -12,6 +12,7 @@ #include "runtime.h" #include "go-assert.h" #include "go-panic.h" +#include "signal_unix.h" #ifndef SA_RESTART #define SA_RESTART 0 @@ -157,12 +158,15 @@ runtime_badsignal(int32 sig) /* Handle a signal, for cases where we don't panic. We can split the stack here. */ -static void -sig_handler (int sig) +void +runtime_sighandler (int sig, Siginfo *info, + void *context __attribute__ ((unused)), G *gp) { + M *m; int i; - if (runtime_m () == NULL) + m = runtime_m (); + if (m == NULL) { runtime_badsignal (sig); return; @@ -171,7 +175,8 @@ sig_handler (int sig) #ifdef SIGPROF if (sig == SIGPROF) { - runtime_sigprof (); + if (gp != runtime_m ()->g0 && gp != runtime_m ()->gsignal) + runtime_sigprof (); return; } #endif @@ -179,13 +184,18 @@ sig_handler (int sig) for (i = 0; runtime_sigtab[i].sig != -1; ++i) { SigTab *t; + bool notify, crash; t = &runtime_sigtab[i]; if (t->sig != sig) continue; - if ((t->flags & SigNotify) != 0) + notify = false; +#ifdef SA_SIGINFO + notify = info != NULL && info->si_code == SI_USER; +#endif + if (notify || (t->flags & SigNotify) != 0) { if (__go_sigsend (sig)) return; @@ -210,9 +220,15 @@ sig_handler (int sig) runtime_printf ("%s\n", name); } + if (m->lockedg != NULL && m->ncgo > 0 && gp == m->g0) + { + runtime_printf("signal arrived during cgo execution\n"); + gp = m->lockedg; + } + runtime_printf ("\n"); - if (runtime_gotraceback ()) + if (runtime_gotraceback (&crash)) { G *g; @@ -225,6 +241,9 @@ sig_handler (int sig) a readable form. */ } + if (crash) + runtime_crash (); + runtime_exit (2); } @@ -259,15 +278,14 @@ sig_panic_leadin (int sig) permitted to split the stack. */ static void -sig_panic_info_handler (int sig, siginfo_t *info, - void *context __attribute__ ((unused))) +sig_panic_info_handler (int sig, Siginfo *info, void *context) { G *g; g = runtime_g (); if (g == NULL || info->si_code == SI_USER) { - sig_handler (sig); + runtime_sighandler (sig, info, context, g); return; } @@ -331,7 +349,7 @@ sig_panic_handler (int sig) g = runtime_g (); if (g == NULL) { - sig_handler (sig); + runtime_sighandler (sig, NULL, NULL, g); return; } @@ -373,10 +391,10 @@ sig_panic_handler (int sig) the stack. */ static void -sig_tramp (int) __attribute__ ((no_split_stack)); +sig_tramp_info (int, Siginfo *, void *) __attribute__ ((no_split_stack)); static void -sig_tramp (int sig) +sig_tramp_info (int sig, Siginfo *info, void *context) { G *gp; M *mp; @@ -403,7 +421,7 @@ sig_tramp (int sig) #endif } - sig_handler (sig); + runtime_sighandler (sig, info, context, gp); /* We are going to return back to the signal trampoline and then to whatever we were doing before we got the signal. Restore the @@ -418,8 +436,20 @@ sig_tramp (int sig) } } +#ifndef SA_SIGINFO + +static void sig_tramp (int sig) __attribute__ ((no_split_stack)); + +static void +sig_tramp (int sig) +{ + sig_tramp_info (sig, NULL, NULL); +} + +#endif + void -runtime_setsig (int32 i, bool def __attribute__ ((unused)), bool restart) +runtime_setsig (int32 i, GoSighandler *fn, bool restart) { struct sigaction sa; int r; @@ -434,17 +464,30 @@ runtime_setsig (int32 i, bool def __attribute__ ((unused)), bool restart) if ((t->flags & SigPanic) == 0) { +#ifdef SA_SIGINFO + sa.sa_flags = SA_ONSTACK | SA_SIGINFO; + if (fn == runtime_sighandler) + fn = (void *) sig_tramp_info; + sa.sa_sigaction = (void *) fn; +#else sa.sa_flags = SA_ONSTACK; - sa.sa_handler = sig_tramp; + if (fn == runtime_sighandler) + fn = (void *) sig_tramp; + sa.sa_handler = (void *) fn; +#endif } else { #ifdef SA_SIGINFO sa.sa_flags = SA_SIGINFO; - sa.sa_sigaction = sig_panic_info_handler; + if (fn == runtime_sighandler) + fn = (void *) sig_panic_info_handler; + sa.sa_sigaction = (void *) fn; #else sa.sa_flags = 0; - sa.sa_handler = sig_panic_handler; + if (fn == runtime_sighandler) + fn = (void *) sig_panic_handler; + sa.sa_handler = (void *) fn; #endif } @@ -455,6 +498,37 @@ runtime_setsig (int32 i, bool def __attribute__ ((unused)), bool restart) __go_assert (0); } +GoSighandler* +runtime_getsig (int32 i) +{ + struct sigaction sa; + int r; + SigTab *t; + + memset (&sa, 0, sizeof sa); + + r = sigemptyset (&sa.sa_mask); + __go_assert (r == 0); + + t = &runtime_sigtab[i]; + + if (sigaction (t->sig, NULL, &sa) != 0) + runtime_throw ("sigaction read failure"); + + if ((void *) sa.sa_handler == sig_tramp_info) + return runtime_sighandler; +#ifdef SA_SIGINFO + if ((void *) sa.sa_handler == sig_panic_info_handler) + return runtime_sighandler; +#else + if ((void *) sa.sa_handler == sig_tramp + || (void *) sa.sa_handler == sig_panic_handler) + return runtime_sighandler; +#endif + + return (void *) sa.sa_handler; +} + /* Used by the os package to raise SIGPIPE. */ void os_sigpipe (void) __asm__ (GOSYM_PREFIX "os.sigpipe"); diff --git a/libgo/runtime/go-unsafe-new.c b/libgo/runtime/go-unsafe-new.c index 54788f1..7848642 100644 --- a/libgo/runtime/go-unsafe-new.c +++ b/libgo/runtime/go-unsafe-new.c @@ -21,14 +21,5 @@ void *unsafe_New (const struct __go_type_descriptor *) void * unsafe_New (const struct __go_type_descriptor *descriptor) { - uint32 flag; - void *ret; - - flag = (descriptor->__code & GO_NO_POINTERS) != 0 ? FlagNoPointers : 0; - ret = runtime_mallocgc (descriptor->__size, flag, 1, 1); - - if (UseSpanType && flag == 0) - runtime_settype (ret, (uintptr) descriptor | TypeInfo_SingleObject); - - return ret; + return runtime_cnew (descriptor); } diff --git a/libgo/runtime/go-unsafe-newarray.c b/libgo/runtime/go-unsafe-newarray.c index e4fb336..f5c5efc 100644 --- a/libgo/runtime/go-unsafe-newarray.c +++ b/libgo/runtime/go-unsafe-newarray.c @@ -21,21 +21,5 @@ void *unsafe_NewArray (const struct __go_type_descriptor *, intgo) void * unsafe_NewArray (const struct __go_type_descriptor *descriptor, intgo n) { - uint64 size; - void *ret; - - size = n * descriptor->__size; - if (size == 0) - ret = &runtime_zerobase; - else if ((descriptor->__code & GO_NO_POINTERS) != 0) - ret = runtime_mallocgc (size, FlagNoPointers, 1, 1); - else - { - ret = runtime_mallocgc (size, 0, 1, 1); - - if (UseSpanType) - runtime_settype (ret, (uintptr) descriptor | TypeInfo_Array); - } - - return ret; + return runtime_cnewarray (descriptor, n); } diff --git a/libgo/runtime/lock_futex.c b/libgo/runtime/lock_futex.c index 5374aff..4b9651a 100644 --- a/libgo/runtime/lock_futex.c +++ b/libgo/runtime/lock_futex.c @@ -41,7 +41,7 @@ runtime_lock(Lock *l) runtime_throw("runtime_lock: lock count"); // Speculative grab for lock. - v = runtime_xchg(&l->key, MUTEX_LOCKED); + v = runtime_xchg((uint32*)&l->key, MUTEX_LOCKED); if(v == MUTEX_UNLOCKED) return; @@ -64,7 +64,7 @@ runtime_lock(Lock *l) // Try for lock, spinning. for(i = 0; i < spin; i++) { while(l->key == MUTEX_UNLOCKED) - if(runtime_cas(&l->key, MUTEX_UNLOCKED, wait)) + if(runtime_cas((uint32*)&l->key, MUTEX_UNLOCKED, wait)) return; runtime_procyield(ACTIVE_SPIN_CNT); } @@ -72,17 +72,17 @@ runtime_lock(Lock *l) // Try for lock, rescheduling. for(i=0; i < PASSIVE_SPIN; i++) { while(l->key == MUTEX_UNLOCKED) - if(runtime_cas(&l->key, MUTEX_UNLOCKED, wait)) + if(runtime_cas((uint32*)&l->key, MUTEX_UNLOCKED, wait)) return; runtime_osyield(); } // Sleep. - v = runtime_xchg(&l->key, MUTEX_SLEEPING); + v = runtime_xchg((uint32*)&l->key, MUTEX_SLEEPING); if(v == MUTEX_UNLOCKED) return; wait = MUTEX_SLEEPING; - runtime_futexsleep(&l->key, MUTEX_SLEEPING, -1); + runtime_futexsleep((uint32*)&l->key, MUTEX_SLEEPING, -1); } } @@ -94,11 +94,11 @@ runtime_unlock(Lock *l) if(--runtime_m()->locks < 0) runtime_throw("runtime_unlock: lock count"); - v = runtime_xchg(&l->key, MUTEX_UNLOCKED); + v = runtime_xchg((uint32*)&l->key, MUTEX_UNLOCKED); if(v == MUTEX_UNLOCKED) runtime_throw("unlock of unlocked lock"); if(v == MUTEX_SLEEPING) - runtime_futexwakeup(&l->key, 1); + runtime_futexwakeup((uint32*)&l->key, 1); } // One-time notifications. @@ -111,9 +111,9 @@ runtime_noteclear(Note *n) void runtime_notewakeup(Note *n) { - if(runtime_xchg(&n->key, 1)) + if(runtime_xchg((uint32*)&n->key, 1)) runtime_throw("notewakeup - double wakeup"); - runtime_futexwakeup(&n->key, 1); + runtime_futexwakeup((uint32*)&n->key, 1); } void @@ -121,8 +121,8 @@ runtime_notesleep(Note *n) { if(runtime_m()->profilehz > 0) runtime_setprof(false); - while(runtime_atomicload(&n->key) == 0) - runtime_futexsleep(&n->key, 0, -1); + while(runtime_atomicload((uint32*)&n->key) == 0) + runtime_futexsleep((uint32*)&n->key, 0, -1); if(runtime_m()->profilehz > 0) runtime_setprof(true); } @@ -137,15 +137,15 @@ runtime_notetsleep(Note *n, int64 ns) return; } - if(runtime_atomicload(&n->key) != 0) + if(runtime_atomicload((uint32*)&n->key) != 0) return; if(runtime_m()->profilehz > 0) runtime_setprof(false); deadline = runtime_nanotime() + ns; for(;;) { - runtime_futexsleep(&n->key, 0, ns); - if(runtime_atomicload(&n->key) != 0) + runtime_futexsleep((uint32*)&n->key, 0, ns); + if(runtime_atomicload((uint32*)&n->key) != 0) break; now = runtime_nanotime(); if(now >= deadline) diff --git a/libgo/runtime/lock_sema.c b/libgo/runtime/lock_sema.c index 8c4b397..2663c54 100644 --- a/libgo/runtime/lock_sema.c +++ b/libgo/runtime/lock_sema.c @@ -43,7 +43,7 @@ runtime_lock(Lock *l) runtime_throw("runtime_lock: lock count"); // Speculative grab for lock. - if(runtime_casp(&l->waitm, nil, (void*)LOCKED)) + if(runtime_casp((void**)&l->key, nil, (void*)LOCKED)) return; if(m->waitsema == 0) @@ -56,10 +56,10 @@ runtime_lock(Lock *l) spin = ACTIVE_SPIN; for(i=0;; i++) { - v = (uintptr)runtime_atomicloadp(&l->waitm); + v = (uintptr)runtime_atomicloadp((void**)&l->key); if((v&LOCKED) == 0) { unlocked: - if(runtime_casp(&l->waitm, (void*)v, (void*)(v|LOCKED))) + if(runtime_casp((void**)&l->key, (void*)v, (void*)(v|LOCKED))) return; i = 0; } @@ -74,9 +74,9 @@ unlocked: // Queue this M. for(;;) { m->nextwaitm = (void*)(v&~LOCKED); - if(runtime_casp(&l->waitm, (void*)v, (void*)((uintptr)m|LOCKED))) + if(runtime_casp((void**)&l->key, (void*)v, (void*)((uintptr)m|LOCKED))) break; - v = (uintptr)runtime_atomicloadp(&l->waitm); + v = (uintptr)runtime_atomicloadp((void**)&l->key); if((v&LOCKED) == 0) goto unlocked; } @@ -99,15 +99,15 @@ runtime_unlock(Lock *l) runtime_throw("runtime_unlock: lock count"); for(;;) { - v = (uintptr)runtime_atomicloadp(&l->waitm); + v = (uintptr)runtime_atomicloadp((void**)&l->key); if(v == LOCKED) { - if(runtime_casp(&l->waitm, (void*)LOCKED, nil)) + if(runtime_casp((void**)&l->key, (void*)LOCKED, nil)) break; } else { // Other M's are waiting for the lock. // Dequeue an M. mp = (void*)(v&~LOCKED); - if(runtime_casp(&l->waitm, (void*)v, mp->nextwaitm)) { + if(runtime_casp((void**)&l->key, (void*)v, mp->nextwaitm)) { // Dequeued an M. Wake it. runtime_semawakeup(mp); break; @@ -120,7 +120,7 @@ runtime_unlock(Lock *l) void runtime_noteclear(Note *n) { - n->waitm = nil; + n->key = 0; } void @@ -129,8 +129,8 @@ runtime_notewakeup(Note *n) M *mp; do - mp = runtime_atomicloadp(&n->waitm); - while(!runtime_casp(&n->waitm, mp, (void*)LOCKED)); + mp = runtime_atomicloadp((void**)&n->key); + while(!runtime_casp((void**)&n->key, mp, (void*)LOCKED)); // Successfully set waitm to LOCKED. // What was it before? @@ -153,8 +153,8 @@ runtime_notesleep(Note *n) m = runtime_m(); if(m->waitsema == 0) m->waitsema = runtime_semacreate(); - if(!runtime_casp(&n->waitm, nil, m)) { // must be LOCKED (got wakeup) - if(n->waitm != (void*)LOCKED) + if(!runtime_casp((void**)&n->key, nil, m)) { // must be LOCKED (got wakeup) + if(n->key != LOCKED) runtime_throw("notesleep - waitm out of sync"); return; } @@ -183,8 +183,8 @@ runtime_notetsleep(Note *n, int64 ns) m->waitsema = runtime_semacreate(); // Register for wakeup on n->waitm. - if(!runtime_casp(&n->waitm, nil, m)) { // must be LOCKED (got wakeup already) - if(n->waitm != (void*)LOCKED) + if(!runtime_casp((void**)&n->key, nil, m)) { // must be LOCKED (got wakeup already) + if(n->key != LOCKED) runtime_throw("notetsleep - waitm out of sync"); return; } @@ -219,10 +219,10 @@ runtime_notetsleep(Note *n, int64 ns) // so that any notewakeup racing with the return does not // try to grant us the semaphore when we don't expect it. for(;;) { - mp = runtime_atomicloadp(&n->waitm); + mp = runtime_atomicloadp((void**)&n->key); if(mp == m) { // No wakeup yet; unregister if possible. - if(runtime_casp(&n->waitm, mp, nil)) + if(runtime_casp((void**)&n->key, mp, nil)) return; } else if(mp == (M*)LOCKED) { // Wakeup happened so semaphore is available. diff --git a/libgo/runtime/malloc.goc b/libgo/runtime/malloc.goc index a484642..dfab683 100644 --- a/libgo/runtime/malloc.goc +++ b/libgo/runtime/malloc.goc @@ -18,7 +18,7 @@ package runtime #include "go-type.h" #include "race.h" -MHeap runtime_mheap; +MHeap *runtime_mheap; int32 runtime_checking; @@ -46,7 +46,7 @@ runtime_mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed) g = runtime_g(); if(g->status == Gsyscall) dogc = 0; - if(runtime_gcwaiting && g != m->g0 && m->locks == 0 && g->status != Gsyscall) { + if(runtime_gcwaiting && g != m->g0 && m->locks == 0 && dogc) { runtime_gosched(); m = runtime_m(); } @@ -78,7 +78,7 @@ runtime_mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed) npages = size >> PageShift; if((size & PageMask) != 0) npages++; - s = runtime_MHeap_Alloc(&runtime_mheap, npages, 0, 1, zeroed); + s = runtime_MHeap_Alloc(runtime_mheap, npages, 0, 1, zeroed); if(s == nil) runtime_throw("out of memory"); size = npages<<PageShift; @@ -92,9 +92,9 @@ runtime_mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed) if (sizeof(void*) == 4 && c->local_total_alloc >= (1<<30)) { // purge cache stats to prevent overflow - runtime_lock(&runtime_mheap); + runtime_lock(runtime_mheap); runtime_purgecachedstats(c); - runtime_unlock(&runtime_mheap); + runtime_unlock(runtime_mheap); } if(!(flag & FlagNoGC)) @@ -175,17 +175,17 @@ __go_free(void *v) if(sizeclass == 0) { // Large object. size = s->npages<<PageShift; - *(uintptr*)(s->start<<PageShift) = 1; // mark as "needs to be zeroed" + *(uintptr*)(s->start<<PageShift) = (uintptr)0xfeedfeedfeedfeedll; // mark as "needs to be zeroed" // Must mark v freed before calling unmarkspan and MHeap_Free: // they might coalesce v into other spans and change the bitmap further. runtime_markfreed(v, size); runtime_unmarkspan(v, 1<<PageShift); - runtime_MHeap_Free(&runtime_mheap, s, 1); + runtime_MHeap_Free(runtime_mheap, s, 1); } else { // Small object. size = runtime_class_to_size[sizeclass]; if(size > sizeof(uintptr)) - ((uintptr*)v)[1] = 1; // mark as "needs to be zeroed" + ((uintptr*)v)[1] = (uintptr)0xfeedfeedfeedfeedll; // mark as "needs to be zeroed" // Must mark v freed before calling MCache_Free: // it might coalesce v and other blocks into a bigger span // and change the bitmap further. @@ -213,12 +213,12 @@ runtime_mlookup(void *v, byte **base, uintptr *size, MSpan **sp) m->mcache->local_nlookup++; if (sizeof(void*) == 4 && m->mcache->local_nlookup >= (1<<30)) { // purge cache stats to prevent overflow - runtime_lock(&runtime_mheap); + runtime_lock(runtime_mheap); runtime_purgecachedstats(m->mcache); - runtime_unlock(&runtime_mheap); + runtime_unlock(runtime_mheap); } - s = runtime_MHeap_LookupMaybe(&runtime_mheap, v); + s = runtime_MHeap_LookupMaybe(runtime_mheap, v); if(sp) *sp = s; if(s == nil) { @@ -262,11 +262,11 @@ runtime_allocmcache(void) intgo rate; MCache *c; - runtime_lock(&runtime_mheap); - c = runtime_FixAlloc_Alloc(&runtime_mheap.cachealloc); - mstats.mcache_inuse = runtime_mheap.cachealloc.inuse; - mstats.mcache_sys = runtime_mheap.cachealloc.sys; - runtime_unlock(&runtime_mheap); + runtime_lock(runtime_mheap); + c = runtime_FixAlloc_Alloc(&runtime_mheap->cachealloc); + mstats.mcache_inuse = runtime_mheap->cachealloc.inuse; + mstats.mcache_sys = runtime_mheap->cachealloc.sys; + runtime_unlock(runtime_mheap); runtime_memclr((byte*)c, sizeof(*c)); // Set first allocation sample size. @@ -283,10 +283,10 @@ void runtime_freemcache(MCache *c) { runtime_MCache_ReleaseAll(c); - runtime_lock(&runtime_mheap); + runtime_lock(runtime_mheap); runtime_purgecachedstats(c); - runtime_FixAlloc_Free(&runtime_mheap.cachealloc, c); - runtime_unlock(&runtime_mheap); + runtime_FixAlloc_Free(&runtime_mheap->cachealloc, c); + runtime_unlock(runtime_mheap); } void @@ -334,9 +334,15 @@ runtime_mallocinit(void) USED(arena_size); USED(bitmap_size); + if((runtime_mheap = runtime_SysAlloc(sizeof(*runtime_mheap))) == nil) + runtime_throw("runtime: cannot allocate heap metadata"); + runtime_InitSizes(); - limit = runtime_memlimit(); + // limit = runtime_memlimit(); + // See https://code.google.com/p/go/issues/detail?id=5049 + // TODO(rsc): Fix after 1.1. + limit = 0; // Set up the allocation arena, a contiguous area of memory where // allocated data will be found. The arena begins with a bitmap large @@ -414,13 +420,13 @@ runtime_mallocinit(void) if((uintptr)p & (((uintptr)1<<PageShift)-1)) runtime_throw("runtime: SysReserve returned unaligned address"); - runtime_mheap.bitmap = p; - runtime_mheap.arena_start = p + bitmap_size; - runtime_mheap.arena_used = runtime_mheap.arena_start; - runtime_mheap.arena_end = runtime_mheap.arena_start + arena_size; + runtime_mheap->bitmap = p; + runtime_mheap->arena_start = p + bitmap_size; + runtime_mheap->arena_used = runtime_mheap->arena_start; + runtime_mheap->arena_end = runtime_mheap->arena_start + arena_size; // Initialize the rest of the allocator. - runtime_MHeap_Init(&runtime_mheap, runtime_SysAlloc); + runtime_MHeap_Init(runtime_mheap, runtime_SysAlloc); runtime_m()->mcache = runtime_allocmcache(); // See if it works. @@ -519,8 +525,8 @@ runtime_settype_flush(M *mp, bool sysalloc) // (Manually inlined copy of runtime_MHeap_Lookup) p = (uintptr)v>>PageShift; if(sizeof(void*) == 8) - p -= (uintptr)runtime_mheap.arena_start >> PageShift; - s = runtime_mheap.map[p]; + p -= (uintptr)runtime_mheap->arena_start >> PageShift; + s = runtime_mheap->map[p]; if(s->sizeclass == 0) { s->types.compression = MTypes_Single; @@ -537,9 +543,11 @@ runtime_settype_flush(M *mp, bool sysalloc) nbytes3 = 8*sizeof(uintptr) + 1*ntypes; if(!sysalloc) { - data3 = runtime_mallocgc(nbytes3, FlagNoPointers, 0, 1); + data3 = runtime_mallocgc(nbytes3, FlagNoProfiling|FlagNoPointers, 0, 1); } else { data3 = runtime_SysAlloc(nbytes3); + if(data3 == nil) + runtime_throw("runtime: cannot allocate memory"); if(0) runtime_printf("settype(0->3): SysAlloc(%x) --> %p\n", (uint32)nbytes3, data3); } @@ -573,9 +581,11 @@ runtime_settype_flush(M *mp, bool sysalloc) nbytes2 = ntypes * sizeof(uintptr); if(!sysalloc) { - data2 = runtime_mallocgc(nbytes2, FlagNoPointers, 0, 1); + data2 = runtime_mallocgc(nbytes2, FlagNoProfiling|FlagNoPointers, 0, 1); } else { data2 = runtime_SysAlloc(nbytes2); + if(data2 == nil) + runtime_throw("runtime: cannot allocate memory"); if(0) runtime_printf("settype.(3->2): SysAlloc(%x) --> %p\n", (uint32)nbytes2, data2); } @@ -633,7 +643,7 @@ runtime_settype(void *v, uintptr t) } if(DebugTypeAtBlockEnd) { - s = runtime_MHeap_Lookup(&runtime_mheap, v); + s = runtime_MHeap_Lookup(runtime_mheap, v); *(uintptr*)((uintptr)v+s->elemsize-sizeof(uintptr)) = t; } } @@ -672,7 +682,7 @@ runtime_gettype(void *v) uintptr t, ofs; byte *data; - s = runtime_MHeap_LookupMaybe(&runtime_mheap, v); + s = runtime_MHeap_LookupMaybe(runtime_mheap, v); if(s != nil) { t = 0; switch(s->types.compression) { @@ -731,9 +741,8 @@ runtime_new(const Type *typ) ret = runtime_mallocgc(typ->__size, flag, 1, 1); if(UseSpanType && !flag) { - if(false) { + if(false) runtime_printf("new %S: %p\n", *typ->__reflection, ret); - } runtime_settype(ret, (uintptr)typ | TypeInfo_SingleObject); } } @@ -741,6 +750,45 @@ runtime_new(const Type *typ) return ret; } +static void* +cnew(const Type *typ, intgo n, int32 objtyp) +{ + uint32 flag; + void *ret; + + if((objtyp&(PtrSize-1)) != objtyp) + runtime_throw("runtime: invalid objtyp"); + if(n < 0 || (typ->__size > 0 && (uintptr)n > (MaxMem/typ->__size))) + runtime_panicstring("runtime: allocation size out of range"); + if(typ->__size == 0 || n == 0) { + // All 0-length allocations use this pointer. + // The language does not require the allocations to + // have distinct values. + return &runtime_zerobase; + } + flag = typ->__code&GO_NO_POINTERS ? FlagNoPointers : 0; + ret = runtime_mallocgc(typ->__size*n, flag, 1, 1); + if(UseSpanType && !flag) { + if(false) + runtime_printf("cnew [%D]%S: %p\n", (int64)n, *typ->__reflection, ret); + runtime_settype(ret, (uintptr)typ | TypeInfo_SingleObject); + } + return ret; +} + +// same as runtime_new, but callable from C +void* +runtime_cnew(const Type *typ) +{ + return cnew(typ, 1, TypeInfo_SingleObject); +} + +void* +runtime_cnewarray(const Type *typ, intgo n) +{ + return cnew(typ, n, TypeInfo_Array); +} + func GC() { runtime_gc(1); } diff --git a/libgo/runtime/malloc.h b/libgo/runtime/malloc.h index 7ebb762..ebea34e 100644 --- a/libgo/runtime/malloc.h +++ b/libgo/runtime/malloc.h @@ -86,6 +86,7 @@ typedef struct MSpan MSpan; typedef struct MStats MStats; typedef struct MLink MLink; typedef struct MTypes MTypes; +typedef struct GCStats GCStats; enum { @@ -114,10 +115,18 @@ enum HeapAllocChunk = 1<<20, // Chunk size for heap growth // Number of bits in page to span calculations (4k pages). - // On 64-bit, we limit the arena to 128GB, or 37 bits. + // On Windows 64-bit we limit the arena to 32GB or 35 bits (see below for reason). + // On other 64-bit platforms, we limit the arena to 128GB, or 37 bits. // On 32-bit, we don't bother limiting anything, so we use the full 32-bit address. #if __SIZEOF_POINTER__ == 8 +#ifdef GOOS_windows + // Windows counts memory used by page table into committed memory + // of the process, so we can't reserve too much memory. + // See http://golang.org/issue/5402 and http://golang.org/issue/5236. + MHeapMap_Bits = 35 - PageShift, +#else MHeapMap_Bits = 37 - PageShift, +#endif #else MHeapMap_Bits = 32 - PageShift, #endif @@ -133,7 +142,7 @@ enum // This must be a #define instead of an enum because it // is so large. #if __SIZEOF_POINTER__ == 8 -#define MaxMem (1ULL<<(MHeapMap_Bits+PageShift)) /* 128 GB */ +#define MaxMem (1ULL<<(MHeapMap_Bits+PageShift)) /* 128 GB or 32 GB */ #else #define MaxMem ((uintptr)-1) #endif @@ -229,7 +238,7 @@ struct MStats uint64 buckhash_sys; // profiling bucket hash table // Statistics about garbage collector. - // Protected by stopping the world during GC. + // Protected by mheap or stopping the world during GC. uint64 next_gc; // next GC (in heap_alloc time) uint64 last_gc; // last GC (in absolute time) uint64 pause_total_ns; @@ -249,7 +258,6 @@ struct MStats extern MStats mstats __asm__ (GOSYM_PREFIX "runtime.VmemStats"); - // Size classes. Computed and initialized by InitSizes. // // SizeToClass(0 <= n <= MaxSmallSize) returns the size class, @@ -416,18 +424,18 @@ struct MHeap byte *arena_end; // central free lists for small size classes. - // the union makes sure that the MCentrals are + // the padding makes sure that the MCentrals are // spaced CacheLineSize bytes apart, so that each MCentral.Lock // gets its own cache line. - union { + struct { MCentral; - byte pad[CacheLineSize]; + byte pad[64]; } central[NumSizeClasses]; FixAlloc spanalloc; // allocator for Span* FixAlloc cachealloc; // allocator for MCache* }; -extern MHeap runtime_mheap; +extern MHeap *runtime_mheap; void runtime_MHeap_Init(MHeap *h, void *(*allocator)(uintptr)); MSpan* runtime_MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, int32 acct, int32 zeroed); @@ -452,8 +460,8 @@ void runtime_unmarkspan(void *v, uintptr size); bool runtime_blockspecial(void*); void runtime_setblockspecial(void*, bool); void runtime_purgecachedstats(MCache*); -void* runtime_new(const Type *); -#define runtime_cnew(T) runtime_new(T) +void* runtime_cnew(const Type*); +void* runtime_cnewarray(const Type*, intgo); void runtime_settype(void*, uintptr); void runtime_settype_flush(M*, bool); @@ -493,6 +501,7 @@ enum TypeInfo_SingleObject = 0, TypeInfo_Array = 1, TypeInfo_Map = 2, + TypeInfo_Chan = 3, // Enables type information at the end of blocks allocated from heap DebugTypeAtBlockEnd = 0, @@ -504,4 +513,5 @@ void runtime_gc_itab_ptr(Eface*); void runtime_memorydump(void); +void runtime_proc_scan(void (*)(Obj)); void runtime_time_scan(void (*)(Obj)); diff --git a/libgo/runtime/mcache.c b/libgo/runtime/mcache.c index 570c06a..45bac4f 100644 --- a/libgo/runtime/mcache.c +++ b/libgo/runtime/mcache.c @@ -21,7 +21,7 @@ runtime_MCache_Alloc(MCache *c, int32 sizeclass, uintptr size, int32 zeroed) l = &c->list[sizeclass]; if(l->list == nil) { // Replenish using central lists. - n = runtime_MCentral_AllocList(&runtime_mheap.central[sizeclass], + n = runtime_MCentral_AllocList(&runtime_mheap->central[sizeclass], runtime_class_to_transfercount[sizeclass], &first); if(n == 0) runtime_throw("out of memory"); @@ -69,7 +69,7 @@ ReleaseN(MCache *c, MCacheList *l, int32 n, int32 sizeclass) c->size -= n*runtime_class_to_size[sizeclass]; // Return them to central free list. - runtime_MCentral_FreeList(&runtime_mheap.central[sizeclass], n, first); + runtime_MCentral_FreeList(&runtime_mheap->central[sizeclass], n, first); } void diff --git a/libgo/runtime/mcentral.c b/libgo/runtime/mcentral.c index b405438..b3108a1 100644 --- a/libgo/runtime/mcentral.c +++ b/libgo/runtime/mcentral.c @@ -108,7 +108,7 @@ MCentral_Free(MCentral *c, void *v) int32 size; // Find span for v. - s = runtime_MHeap_Lookup(&runtime_mheap, v); + s = runtime_MHeap_Lookup(runtime_mheap, v); if(s == nil || s->ref == 0) runtime_throw("invalid free"); @@ -133,7 +133,7 @@ MCentral_Free(MCentral *c, void *v) s->freelist = nil; c->nfree -= (s->npages << PageShift) / size; runtime_unlock(c); - runtime_MHeap_Free(&runtime_mheap, s, 0); + runtime_MHeap_Free(runtime_mheap, s, 0); runtime_lock(c); } } @@ -168,7 +168,7 @@ runtime_MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *e c->nfree -= (s->npages << PageShift) / size; runtime_unlock(c); runtime_unmarkspan((byte*)(s->start<<PageShift), s->npages<<PageShift); - runtime_MHeap_Free(&runtime_mheap, s, 0); + runtime_MHeap_Free(runtime_mheap, s, 0); } else { runtime_unlock(c); } @@ -200,7 +200,7 @@ MCentral_Grow(MCentral *c) runtime_unlock(c); runtime_MGetSizeClassInfo(c->sizeclass, &size, &npages, &n); - s = runtime_MHeap_Alloc(&runtime_mheap, npages, c->sizeclass, 0, 1); + s = runtime_MHeap_Alloc(runtime_mheap, npages, c->sizeclass, 0, 1); if(s == nil) { // TODO(rsc): Log out of memory runtime_lock(c); diff --git a/libgo/runtime/mem.c b/libgo/runtime/mem.c index e606bdd..8481e95 100644 --- a/libgo/runtime/mem.c +++ b/libgo/runtime/mem.c @@ -78,7 +78,7 @@ runtime_SysAlloc(uintptr n) fd = dev_zero; #endif - p = runtime_mmap(nil, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, fd, 0); + p = runtime_mmap(nil, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, fd, 0); if (p == MAP_FAILED) { if(errno == EACCES) { runtime_printf("runtime: mmap: access denied\n"); @@ -169,7 +169,7 @@ runtime_SysMap(void *v, uintptr n) // On 64-bit, we don't actually have v reserved, so tread carefully. if(sizeof(void*) == 8 && (uintptr)v >= 0xffffffffU) { - p = mmap_fixed(v, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, fd, 0); + p = mmap_fixed(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, fd, 0); if(p == MAP_FAILED && errno == ENOMEM) runtime_throw("runtime: out of memory"); if(p != v) { @@ -179,7 +179,9 @@ runtime_SysMap(void *v, uintptr n) return; } - p = runtime_mmap(v, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_FIXED|MAP_PRIVATE, fd, 0); + p = runtime_mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_FIXED|MAP_PRIVATE, fd, 0); + if(p == MAP_FAILED && errno == ENOMEM) + runtime_throw("runtime: out of memory"); if(p != v) runtime_throw("runtime: cannot map pages in arena address space"); } diff --git a/libgo/runtime/mfixalloc.c b/libgo/runtime/mfixalloc.c index 109cfe8..6e4f0c6 100644 --- a/libgo/runtime/mfixalloc.c +++ b/libgo/runtime/mfixalloc.c @@ -30,6 +30,11 @@ void* runtime_FixAlloc_Alloc(FixAlloc *f) { void *v; + + if(f->size == 0) { + runtime_printf("runtime: use of FixAlloc_Alloc before FixAlloc_Init\n"); + runtime_throw("runtime: internal error"); + } if(f->list) { v = f->list; diff --git a/libgo/runtime/mgc0.c b/libgo/runtime/mgc0.c index 88283cc..36afd2b 100644 --- a/libgo/runtime/mgc0.c +++ b/libgo/runtime/mgc0.c @@ -21,8 +21,11 @@ #define tab __methods // Eface aka __go_empty_interface. #define type __type_descriptor +// Hmap aka __go_map +typedef struct __go_map Hmap; // Type aka __go_type_descriptor #define kind __code +#define string __reflection #define KindPtr GO_PTR #define KindNoPointers GO_NO_POINTERS // PtrType aka __go_ptr_type @@ -41,6 +44,9 @@ extern void * __splitstack_find_context (void *context[10], size_t *, void **, enum { Debug = 0, DebugMark = 0, // run second pass to check mark + CollectStats = 0, + ScanStackByFrames = 0, + IgnorePreciseGC = 0, // Four bits per word (see #defines below). wordsPerBitmapWord = sizeof(void*)*8/4, @@ -147,6 +153,7 @@ static Workbuf* getempty(Workbuf*); static Workbuf* getfull(Workbuf*); static void putempty(Workbuf*); static Workbuf* handoff(Workbuf*); +static void gchelperstart(void); static struct { uint64 full; // lock-free list of full blocks @@ -170,11 +177,114 @@ static struct { } work; enum { - // TODO(atom): to be expanded in a next CL GC_DEFAULT_PTR = GC_NUM_INSTR, + GC_MAP_NEXT, + GC_CHAN, + + GC_NUM_INSTR2 }; -// PtrTarget and BitTarget are structures used by intermediate buffers. +static struct { + struct { + uint64 sum; + uint64 cnt; + } ptr; + uint64 nbytes; + struct { + uint64 sum; + uint64 cnt; + uint64 notype; + uint64 typelookup; + } obj; + uint64 rescan; + uint64 rescanbytes; + uint64 instr[GC_NUM_INSTR2]; + uint64 putempty; + uint64 getfull; +} gcstats; + +// markonly marks an object. It returns true if the object +// has been marked by this function, false otherwise. +// This function doesn't append the object to any buffer. +static bool +markonly(void *obj) +{ + byte *p; + uintptr *bitp, bits, shift, x, xbits, off; + MSpan *s; + PageID k; + + // Words outside the arena cannot be pointers. + if((byte*)obj < runtime_mheap->arena_start || (byte*)obj >= runtime_mheap->arena_used) + return false; + + // obj may be a pointer to a live object. + // Try to find the beginning of the object. + + // Round down to word boundary. + obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1)); + + // Find bits for this word. + off = (uintptr*)obj - (uintptr*)runtime_mheap->arena_start; + bitp = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1; + shift = off % wordsPerBitmapWord; + xbits = *bitp; + bits = xbits >> shift; + + // Pointing at the beginning of a block? + if((bits & (bitAllocated|bitBlockBoundary)) != 0) + goto found; + + // Otherwise consult span table to find beginning. + // (Manually inlined copy of MHeap_LookupMaybe.) + k = (uintptr)obj>>PageShift; + x = k; + if(sizeof(void*) == 8) + x -= (uintptr)runtime_mheap->arena_start>>PageShift; + s = runtime_mheap->map[x]; + if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse) + return false; + p = (byte*)((uintptr)s->start<<PageShift); + if(s->sizeclass == 0) { + obj = p; + } else { + if((byte*)obj >= (byte*)s->limit) + return false; + uintptr size = s->elemsize; + int32 i = ((byte*)obj - p)/size; + obj = p+i*size; + } + + // Now that we know the object header, reload bits. + off = (uintptr*)obj - (uintptr*)runtime_mheap->arena_start; + bitp = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1; + shift = off % wordsPerBitmapWord; + xbits = *bitp; + bits = xbits >> shift; + +found: + // Now we have bits, bitp, and shift correct for + // obj pointing at the base of the object. + // Only care about allocated and not marked. + if((bits & (bitAllocated|bitMarked)) != bitAllocated) + return false; + if(work.nproc == 1) + *bitp |= bitMarked<<shift; + else { + for(;;) { + x = *bitp; + if(x & (bitMarked<<shift)) + return false; + if(runtime_casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift)))) + break; + } + } + + // The object is now marked + return true; +} + +// PtrTarget is a structure used by intermediate buffers. // The intermediate buffers hold GC data before it // is moved/flushed to the work buffer (Workbuf). // The size of an intermediate buffer is very small, @@ -186,24 +296,16 @@ struct PtrTarget uintptr ti; }; -typedef struct BitTarget BitTarget; -struct BitTarget -{ - void *p; - uintptr ti; - uintptr *bitp, shift; -}; - typedef struct BufferList BufferList; struct BufferList { PtrTarget ptrtarget[IntermediateBufferCapacity]; - BitTarget bittarget[IntermediateBufferCapacity]; - BufferList *next; + Obj obj[IntermediateBufferCapacity]; + uint32 busy; + byte pad[CacheLineSize]; }; -static BufferList *bufferList; +static BufferList bufferList[MaxGcproc]; -static Lock lock; static Type *itabtype; static void enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj); @@ -214,7 +316,6 @@ static void enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj); // and are prepared to be scanned by the garbage collector. // // _wp, _wbuf, _nobj are input/output parameters and are specifying the work buffer. -// bitbuf holds temporary data generated by this function. // // A simplified drawing explaining how the todo-list moves from a structure to another: // @@ -222,14 +323,12 @@ static void enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj); // (find pointers) // Obj ------> PtrTarget (pointer targets) // ↑ | -// | | flushptrbuf (1st part, -// | | find block start) -// | ↓ -// `--------- BitTarget (pointer targets and the corresponding locations in bitmap) -// flushptrbuf -// (2nd part, mark and enqueue) +// | | +// `----------' +// flushptrbuf +// (find block start, mark and enqueue) static void -flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf, uintptr *_nobj, BitTarget *bitbuf) +flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf, uintptr *_nobj) { byte *p, *arena_start, *obj; uintptr size, *bitp, bits, shift, j, x, xbits, off, nobj, ti, n; @@ -238,9 +337,8 @@ flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf Obj *wp; Workbuf *wbuf; PtrTarget *ptrbuf_end; - BitTarget *bitbufpos, *bt; - arena_start = runtime_mheap.arena_start; + arena_start = runtime_mheap->arena_start; wp = *_wp; wbuf = *_wbuf; @@ -250,6 +348,11 @@ flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf n = ptrbuf_end - ptrbuf; *ptrbufpos = ptrbuf; + if(CollectStats) { + runtime_xadd64(&gcstats.ptr.sum, n); + runtime_xadd64(&gcstats.ptr.cnt, 1); + } + // If buffer is nearly full, get a new one. if(wbuf == nil || nobj+n >= nelem(wbuf->obj)) { if(wbuf != nil) @@ -267,8 +370,6 @@ flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf { // Multi-threaded version. - bitbufpos = bitbuf; - while(ptrbuf < ptrbuf_end) { obj = ptrbuf->p; ti = ptrbuf->ti; @@ -276,7 +377,7 @@ flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf // obj belongs to interval [mheap.arena_start, mheap.arena_used). if(Debug > 1) { - if(obj < runtime_mheap.arena_start || obj >= runtime_mheap.arena_used) + if(obj < runtime_mheap->arena_start || obj >= runtime_mheap->arena_used) runtime_throw("object is outside of mheap"); } @@ -319,7 +420,7 @@ flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf x = k; if(sizeof(void*) == 8) x -= (uintptr)arena_start>>PageShift; - s = runtime_mheap.map[x]; + s = runtime_mheap->map[x]; if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse) continue; p = (byte*)((uintptr)s->start<<PageShift); @@ -346,40 +447,36 @@ flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf // Only care about allocated and not marked. if((bits & (bitAllocated|bitMarked)) != bitAllocated) continue; - - *bitbufpos++ = (BitTarget){obj, ti, bitp, shift}; - } - - runtime_lock(&lock); - for(bt=bitbuf; bt<bitbufpos; bt++){ - xbits = *bt->bitp; - bits = xbits >> bt->shift; - if((bits & bitMarked) != 0) - continue; - - // Mark the block - *bt->bitp = xbits | (bitMarked << bt->shift); + if(work.nproc == 1) + *bitp |= bitMarked<<shift; + else { + for(;;) { + x = *bitp; + if(x & (bitMarked<<shift)) + goto continue_obj; + if(runtime_casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift)))) + break; + } + } // If object has no pointers, don't need to scan further. if((bits & bitNoPointers) != 0) continue; - obj = bt->p; - // Ask span about size class. // (Manually inlined copy of MHeap_Lookup.) x = (uintptr)obj >> PageShift; if(sizeof(void*) == 8) x -= (uintptr)arena_start>>PageShift; - s = runtime_mheap.map[x]; + s = runtime_mheap->map[x]; PREFETCH(obj); - *wp = (Obj){obj, s->elemsize, bt->ti}; + *wp = (Obj){obj, s->elemsize, ti}; wp++; nobj++; + continue_obj:; } - runtime_unlock(&lock); // If another proc wants a pointer, give it some. if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) { @@ -395,9 +492,73 @@ flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf *_nobj = nobj; } +static void +flushobjbuf(Obj *objbuf, Obj **objbufpos, Obj **_wp, Workbuf **_wbuf, uintptr *_nobj) +{ + uintptr nobj, off; + Obj *wp, obj; + Workbuf *wbuf; + Obj *objbuf_end; + + wp = *_wp; + wbuf = *_wbuf; + nobj = *_nobj; + + objbuf_end = *objbufpos; + *objbufpos = objbuf; + + while(objbuf < objbuf_end) { + obj = *objbuf++; + + // Align obj.b to a word boundary. + off = (uintptr)obj.p & (PtrSize-1); + if(off != 0) { + obj.p += PtrSize - off; + obj.n -= PtrSize - off; + obj.ti = 0; + } + + if(obj.p == nil || obj.n == 0) + continue; + + // If buffer is full, get a new one. + if(wbuf == nil || nobj >= nelem(wbuf->obj)) { + if(wbuf != nil) + wbuf->nobj = nobj; + wbuf = getempty(wbuf); + wp = wbuf->obj; + nobj = 0; + } + + *wp = obj; + wp++; + nobj++; + } + + // If another proc wants a pointer, give it some. + if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) { + wbuf->nobj = nobj; + wbuf = handoff(wbuf); + nobj = wbuf->nobj; + wp = wbuf->obj + nobj; + } + + *_wp = wp; + *_wbuf = wbuf; + *_nobj = nobj; +} + // Program that scans the whole block and treats every block element as a potential pointer static uintptr defaultProg[2] = {PtrSize, GC_DEFAULT_PTR}; +#if 0 +// Hashmap iterator program +static uintptr mapProg[2] = {0, GC_MAP_NEXT}; + +// Hchan program +static uintptr chanProg[2] = {0, GC_CHAN}; +#endif + // Local variables of a program fragment or loop typedef struct Frame Frame; struct Frame { @@ -405,6 +566,61 @@ struct Frame { uintptr *loop_or_ret; }; +// Sanity check for the derived type info objti. +static void +checkptr(void *obj, uintptr objti) +{ + uintptr type, tisize, i, x; + byte *objstart; + Type *t; + MSpan *s; + + if(!Debug) + runtime_throw("checkptr is debug only"); + + if((byte*)obj < runtime_mheap->arena_start || (byte*)obj >= runtime_mheap->arena_used) + return; + type = runtime_gettype(obj); + t = (Type*)(type & ~(uintptr)(PtrSize-1)); + if(t == nil) + return; + x = (uintptr)obj >> PageShift; + if(sizeof(void*) == 8) + x -= (uintptr)(runtime_mheap->arena_start)>>PageShift; + s = runtime_mheap->map[x]; + objstart = (byte*)((uintptr)s->start<<PageShift); + if(s->sizeclass != 0) { + i = ((byte*)obj - objstart)/s->elemsize; + objstart += i*s->elemsize; + } + tisize = *(uintptr*)objti; + // Sanity check for object size: it should fit into the memory block. + if((byte*)obj + tisize > objstart + s->elemsize) + runtime_throw("invalid gc type info"); + if(obj != objstart) + return; + // If obj points to the beginning of the memory block, + // check type info as well. + if(t->string == nil || + // Gob allocates unsafe pointers for indirection. + (runtime_strcmp((const char *)t->string->str, (const char*)"unsafe.Pointer") && + // Runtime and gc think differently about closures. + runtime_strstr((const char *)t->string->str, (const char*)"struct { F uintptr") != (const char *)t->string->str)) { +#if 0 + pc1 = (uintptr*)objti; + pc2 = (uintptr*)t->gc; + // A simple best-effort check until first GC_END. + for(j = 1; pc1[j] != GC_END && pc2[j] != GC_END; j++) { + if(pc1[j] != pc2[j]) { + runtime_printf("invalid gc type info for '%s' at %p, type info %p, block info %p\n", + t->string ? (const int8*)t->string->str : (const int8*)"?", j, pc1[j], pc2[j]); + runtime_throw("invalid gc type info"); + } + } +#endif + } +} + // scanblock scans a block of n bytes starting at pointer b for references // to other objects, scanning any it finds recursively until there are no // unscanned objects left. Instead of using an explicit recursion, it keeps @@ -419,49 +635,64 @@ static void scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking) { byte *b, *arena_start, *arena_used; - uintptr n, i, end_b, elemsize, ti, objti, count /* , type */; + uintptr n, i, end_b, elemsize, size, ti, objti, count /* , type */; uintptr *pc, precise_type, nominal_size; +#if 0 + uintptr *map_ret, mapkey_size, mapval_size, mapkey_ti, mapval_ti, *chan_ret, chancap; +#endif void *obj; const Type *t; Slice *sliceptr; Frame *stack_ptr, stack_top, stack[GC_STACK_CAPACITY+4]; BufferList *scanbuffers; PtrTarget *ptrbuf, *ptrbuf_end, *ptrbufpos; - BitTarget *bitbuf; + Obj *objbuf, *objbuf_end, *objbufpos; Eface *eface; Iface *iface; +#if 0 + Hmap *hmap; + MapType *maptype; + bool mapkey_kind, mapval_kind; + struct hash_gciter map_iter; + struct hash_gciter_data d; + Hchan *chan; + ChanType *chantype; +#endif if(sizeof(Workbuf) % PageSize != 0) runtime_throw("scanblock: size of Workbuf is suboptimal"); // Memory arena parameters. - arena_start = runtime_mheap.arena_start; - arena_used = runtime_mheap.arena_used; + arena_start = runtime_mheap->arena_start; + arena_used = runtime_mheap->arena_used; stack_ptr = stack+nelem(stack)-1; precise_type = false; nominal_size = 0; - // Allocate ptrbuf, bitbuf + // Allocate ptrbuf { - runtime_lock(&lock); - - if(bufferList == nil) { - bufferList = runtime_SysAlloc(sizeof(*bufferList)); - bufferList->next = nil; - } - scanbuffers = bufferList; - bufferList = bufferList->next; - + scanbuffers = &bufferList[runtime_m()->helpgc]; ptrbuf = &scanbuffers->ptrtarget[0]; ptrbuf_end = &scanbuffers->ptrtarget[0] + nelem(scanbuffers->ptrtarget); - bitbuf = &scanbuffers->bittarget[0]; - - runtime_unlock(&lock); + objbuf = &scanbuffers->obj[0]; + objbuf_end = &scanbuffers->obj[0] + nelem(scanbuffers->obj); } ptrbufpos = ptrbuf; + objbufpos = objbuf; + + // (Silence the compiler) +#if 0 + map_ret = nil; + mapkey_size = mapval_size = 0; + mapkey_kind = mapval_kind = false; + mapkey_ti = mapval_ti = 0; + chan = nil; + chantype = nil; + chan_ret = nil; +#endif goto next_block; @@ -472,7 +703,13 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking) runtime_printf("scanblock %p %D\n", b, (int64)n); } - if(ti != 0 && 0) { + if(CollectStats) { + runtime_xadd64(&gcstats.nbytes, n); + runtime_xadd64(&gcstats.obj.sum, nobj); + runtime_xadd64(&gcstats.obj.cnt, 1); + } + + if(ti != 0 && false) { pc = (uintptr*)(ti & ~(uintptr)PC_BITS); precise_type = (ti & PRECISE); stack_top.elemsize = pc[0]; @@ -484,10 +721,27 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking) } else { stack_top.count = 1; } - } else if(UseSpanType && 0) { + if(Debug) { + // Simple sanity check for provided type info ti: + // The declared size of the object must be not larger than the actual size + // (it can be smaller due to inferior pointers). + // It's difficult to make a comprehensive check due to inferior pointers, + // reflection, gob, etc. + if(pc[0] > n) { + runtime_printf("invalid gc type info: type info size %p, block size %p\n", pc[0], n); + runtime_throw("invalid gc type info"); + } + } + } else if(UseSpanType && false) { + if(CollectStats) + runtime_xadd64(&gcstats.obj.notype, 1); + #if 0 type = runtime_gettype(b); if(type != 0) { + if(CollectStats) + runtime_xadd64(&gcstats.obj.typelookup, 1); + t = (Type*)(type & ~(uintptr)(PtrSize-1)); switch(type & (PtrSize-1)) { case TypeInfo_SingleObject: @@ -506,8 +760,27 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking) stack_top.loop_or_ret = pc+1; break; case TypeInfo_Map: - // TODO(atom): to be expanded in a next CL - pc = defaultProg; + hmap = (Hmap*)b; + maptype = (MapType*)t; + if(hash_gciter_init(hmap, &map_iter)) { + mapkey_size = maptype->key->size; + mapkey_kind = maptype->key->kind; + mapkey_ti = (uintptr)maptype->key->gc | PRECISE; + mapval_size = maptype->elem->size; + mapval_kind = maptype->elem->kind; + mapval_ti = (uintptr)maptype->elem->gc | PRECISE; + + map_ret = nil; + pc = mapProg; + } else { + goto next_block; + } + break; + case TypeInfo_Chan: + chan = (Hchan*)b; + chantype = (ChanType*)t; + chan_ret = nil; + pc = chanProg; break; default: runtime_throw("scanblock: invalid type"); @@ -521,12 +794,18 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking) pc = defaultProg; } + if(IgnorePreciseGC) + pc = defaultProg; + pc++; stack_top.b = (uintptr)b; end_b = (uintptr)b + n - PtrSize; for(;;) { + if(CollectStats) + runtime_xadd64(&gcstats.instr[pc[0]], 1); + obj = nil; objti = 0; switch(pc[0]) { @@ -534,13 +813,19 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking) obj = *(void**)(stack_top.b + pc[1]); objti = pc[2]; pc += 3; + if(Debug) + checkptr(obj, objti); break; case GC_SLICE: sliceptr = (Slice*)(stack_top.b + pc[1]); if(sliceptr->cap != 0) { obj = sliceptr->array; - objti = pc[2] | PRECISE | LOOP; + // Can't use slice element type for scanning, + // because if it points to an array embedded + // in the beginning of a struct, + // we will scan the whole struct as the slice. + // So just obtain type info from heap. } pc += 3; break; @@ -552,17 +837,31 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking) case GC_STRING: obj = *(void**)(stack_top.b + pc[1]); + markonly(obj); pc += 2; - break; + continue; case GC_EFACE: eface = (Eface*)(stack_top.b + pc[1]); pc += 2; - if(eface->type != nil && ((byte*)eface->__object >= arena_start && (byte*)eface->__object < arena_used)) { - t = eface->type; + if(eface->type == nil) + continue; + + // eface->type + t = eface->type; + if((const byte*)t >= arena_start && (const byte*)t < arena_used) { + union { const Type *tc; Type *tr; } u; + u.tc = t; + *ptrbufpos++ = (struct PtrTarget){(void*)u.tr, 0}; + if(ptrbufpos == ptrbuf_end) + flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj); + } + + // eface->__object + if((byte*)eface->__object >= arena_start && (byte*)eface->__object < arena_used) { if(t->__size <= sizeof(void*)) { if((t->kind & KindNoPointers)) - break; + continue; obj = eface->__object; if((t->kind & ~KindNoPointers) == KindPtr) @@ -580,14 +879,14 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking) iface = (Iface*)(stack_top.b + pc[1]); pc += 2; if(iface->tab == nil) - break; + continue; // iface->tab if((byte*)iface->tab >= arena_start && (byte*)iface->tab < arena_used) { // *ptrbufpos++ = (struct PtrTarget){iface->tab, (uintptr)itabtype->gc}; *ptrbufpos++ = (struct PtrTarget){iface->tab, 0}; if(ptrbufpos == ptrbuf_end) - flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj, bitbuf); + flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj); } // iface->data @@ -596,7 +895,7 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking) t = nil; if(t->__size <= sizeof(void*)) { if((t->kind & KindNoPointers)) - break; + continue; obj = iface->__object; if((t->kind & ~KindNoPointers) == KindPtr) @@ -611,13 +910,13 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking) break; case GC_DEFAULT_PTR: - while((i = stack_top.b) <= end_b) { + while(stack_top.b <= end_b) { + obj = *(byte**)stack_top.b; stack_top.b += PtrSize; - obj = *(byte**)i; if((byte*)obj >= arena_start && (byte*)obj < arena_used) { *ptrbufpos++ = (struct PtrTarget){obj, 0}; if(ptrbufpos == ptrbuf_end) - flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj, bitbuf); + flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj); } } goto next_block; @@ -625,9 +924,8 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking) case GC_END: if(--stack_top.count != 0) { // Next iteration of a loop if possible. - elemsize = stack_top.elemsize; - stack_top.b += elemsize; - if(stack_top.b + elemsize <= end_b+PtrSize) { + stack_top.b += stack_top.elemsize; + if(stack_top.b + stack_top.elemsize <= end_b+PtrSize) { pc = stack_top.loop_or_ret; continue; } @@ -648,6 +946,10 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking) // Found a value that may be a pointer. // Do a rescan of the entire block. enqueue((Obj){b, n, 0}, &wbuf, &wp, &nobj); + if(CollectStats) { + runtime_xadd64(&gcstats.rescan, 1); + runtime_xadd64(&gcstats.rescanbytes, n); + } break; } } @@ -680,20 +982,136 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking) // Stack push. *stack_ptr-- = stack_top; stack_top = (Frame){1, 0, stack_top.b + pc[1], pc+3 /*return address*/}; - pc = (uintptr*)pc[2]; // target of the CALL instruction + pc = (uintptr*)((byte*)pc + *(int32*)(pc+2)); // target of the CALL instruction continue; +#if 0 case GC_MAP_PTR: - // TODO(atom): to be expanded in a next CL. Same as GC_APTR for now. - obj = *(void**)(stack_top.b + pc[1]); - pc += 3; - break; + hmap = *(Hmap**)(stack_top.b + pc[1]); + if(hmap == nil) { + pc += 3; + continue; + } + if(markonly(hmap)) { + maptype = (MapType*)pc[2]; + if(hash_gciter_init(hmap, &map_iter)) { + mapkey_size = maptype->key->size; + mapkey_kind = maptype->key->kind; + mapkey_ti = (uintptr)maptype->key->gc | PRECISE; + mapval_size = maptype->elem->size; + mapval_kind = maptype->elem->kind; + mapval_ti = (uintptr)maptype->elem->gc | PRECISE; + + // Start mapProg. + map_ret = pc+3; + pc = mapProg+1; + } else { + pc += 3; + } + } else { + pc += 3; + } + continue; + + case GC_MAP_NEXT: + // Add all keys and values to buffers, mark all subtables. + while(hash_gciter_next(&map_iter, &d)) { + // buffers: reserve space for 2 objects. + if(ptrbufpos+2 >= ptrbuf_end) + flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj); + if(objbufpos+2 >= objbuf_end) + flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj); + + if(d.st != nil) + markonly(d.st); + + if(d.key_data != nil) { + if(!(mapkey_kind & KindNoPointers) || d.indirectkey) { + if(!d.indirectkey) + *objbufpos++ = (Obj){d.key_data, mapkey_size, mapkey_ti}; + else { + if(Debug) { + obj = *(void**)d.key_data; + if(!(arena_start <= obj && obj < arena_used)) + runtime_throw("scanblock: inconsistent hashmap"); + } + *ptrbufpos++ = (struct PtrTarget){*(void**)d.key_data, mapkey_ti}; + } + } + if(!(mapval_kind & KindNoPointers) || d.indirectval) { + if(!d.indirectval) + *objbufpos++ = (Obj){d.val_data, mapval_size, mapval_ti}; + else { + if(Debug) { + obj = *(void**)d.val_data; + if(!(arena_start <= obj && obj < arena_used)) + runtime_throw("scanblock: inconsistent hashmap"); + } + *ptrbufpos++ = (struct PtrTarget){*(void**)d.val_data, mapval_ti}; + } + } + } + } + if(map_ret == nil) + goto next_block; + pc = map_ret; + continue; +#endif case GC_REGION: - // TODO(atom): to be expanded in a next CL. Same as GC_APTR for now. obj = (void*)(stack_top.b + pc[1]); + size = pc[2]; + objti = pc[3]; pc += 4; - break; + + *objbufpos++ = (Obj){obj, size, objti}; + if(objbufpos == objbuf_end) + flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj); + continue; + +#if 0 + case GC_CHAN_PTR: + // Similar to GC_MAP_PTR + chan = *(Hchan**)(stack_top.b + pc[1]); + if(chan == nil) { + pc += 3; + continue; + } + if(markonly(chan)) { + chantype = (ChanType*)pc[2]; + if(!(chantype->elem->kind & KindNoPointers)) { + // Start chanProg. + chan_ret = pc+3; + pc = chanProg+1; + continue; + } + } + pc += 3; + continue; + + case GC_CHAN: + // There are no heap pointers in struct Hchan, + // so we can ignore the leading sizeof(Hchan) bytes. + if(!(chantype->elem->kind & KindNoPointers)) { + // Channel's buffer follows Hchan immediately in memory. + // Size of buffer (cap(c)) is second int in the chan struct. + chancap = ((uintgo*)chan)[1]; + if(chancap > 0) { + // TODO(atom): split into two chunks so that only the + // in-use part of the circular buffer is scanned. + // (Channel routines zero the unused part, so the current + // code does not lead to leaks, it's just a little inefficient.) + *objbufpos++ = (Obj){(byte*)chan+runtime_Hchansize, chancap*chantype->elem->size, + (uintptr)chantype->elem->gc | PRECISE | LOOP}; + if(objbufpos == objbuf_end) + flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj); + } + } + if(chan_ret == nil) + goto next_block; + pc = chan_ret; + continue; +#endif default: runtime_throw("scanblock: invalid GC instruction"); @@ -701,9 +1119,9 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking) } if((byte*)obj >= arena_start && (byte*)obj < arena_used) { - *ptrbufpos++ = (PtrTarget){obj, objti}; + *ptrbufpos++ = (struct PtrTarget){obj, objti}; if(ptrbufpos == ptrbuf_end) - flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj, bitbuf); + flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj); } } @@ -712,7 +1130,8 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking) // the loop by setting b, n, ti to the parameters for the next block. if(nobj == 0) { - flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj, bitbuf); + flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj); + flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj); if(nobj == 0) { if(!keepworking) { @@ -737,11 +1156,7 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking) nobj--; } -endscan: - runtime_lock(&lock); - scanbuffers->next = bufferList; - bufferList = scanbuffers; - runtime_unlock(&lock); +endscan:; } // debug_scanblock is the debug copy of scanblock. @@ -776,14 +1191,14 @@ debug_scanblock(byte *b, uintptr n) obj = (byte*)vp[i]; // Words outside the arena cannot be pointers. - if((byte*)obj < runtime_mheap.arena_start || (byte*)obj >= runtime_mheap.arena_used) + if((byte*)obj < runtime_mheap->arena_start || (byte*)obj >= runtime_mheap->arena_used) continue; // Round down to word boundary. obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1)); // Consult span table to find beginning. - s = runtime_MHeap_LookupMaybe(&runtime_mheap, obj); + s = runtime_MHeap_LookupMaybe(runtime_mheap, obj); if(s == nil) continue; @@ -799,8 +1214,8 @@ debug_scanblock(byte *b, uintptr n) } // Now that we know the object header, reload bits. - off = (uintptr*)obj - (uintptr*)runtime_mheap.arena_start; - bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; + off = (uintptr*)obj - (uintptr*)runtime_mheap->arena_start; + bitp = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1; shift = off % wordsPerBitmapWord; xbits = *bitp; bits = xbits >> shift; @@ -906,6 +1321,8 @@ getempty(Workbuf *b) if(work.nchunk < sizeof *b) { work.nchunk = 1<<20; work.chunk = runtime_SysAlloc(work.nchunk); + if(work.chunk == nil) + runtime_throw("runtime: cannot allocate memory"); } b = (Workbuf*)work.chunk; work.chunk += sizeof *b; @@ -919,6 +1336,9 @@ getempty(Workbuf *b) static void putempty(Workbuf *b) { + if(CollectStats) + runtime_xadd64(&gcstats.putempty, 1); + runtime_lfstackpush(&work.empty, &b->node); } @@ -929,6 +1349,9 @@ getfull(Workbuf *b) M *m; int32 i; + if(CollectStats) + runtime_xadd64(&gcstats.getfull, 1); + if(b != nil) runtime_lfstackpush(&work.empty, &b->node); b = (Workbuf*)runtime_lfstackpop(&work.full); @@ -994,6 +1417,8 @@ addroot(Obj obj) if(cap < 2*work.rootcap) cap = 2*work.rootcap; new = (Obj*)runtime_SysAlloc(cap*sizeof(Obj)); + if(new == nil) + runtime_throw("runtime: cannot allocate memory"); if(work.roots != nil) { runtime_memmove(new, work.roots, work.rootcap*sizeof(Obj)); runtime_SysFree(work.roots, work.rootcap*sizeof(Obj)); @@ -1081,13 +1506,14 @@ static void addfinroots(void *v) { uintptr size; + void *base; size = 0; - if(!runtime_mlookup(v, (byte**)&v, &size, nil) || !runtime_blockspecial(v)) + if(!runtime_mlookup(v, (byte**)&base, &size, nil) || !runtime_blockspecial(base)) runtime_throw("mark - finalizer inconsistency"); // do not mark the finalizer block itself. just mark the things it points at. - addroot((Obj){v, size, 0}); + addroot((Obj){base, size, 0}); } static struct root_list* roots; @@ -1128,22 +1554,27 @@ addroots(void) addroot((Obj){(byte*)&runtime_g0, sizeof runtime_g0, 0}); addroot((Obj){(byte*)&runtime_allg, sizeof runtime_allg, 0}); addroot((Obj){(byte*)&runtime_allm, sizeof runtime_allm, 0}); + addroot((Obj){(byte*)&runtime_allp, sizeof runtime_allp, 0}); + runtime_proc_scan(addroot); runtime_MProf_Mark(addroot); runtime_time_scan(addroot); // MSpan.types - allspans = runtime_mheap.allspans; - for(spanidx=0; spanidx<runtime_mheap.nspan; spanidx++) { + allspans = runtime_mheap->allspans; + for(spanidx=0; spanidx<runtime_mheap->nspan; spanidx++) { s = allspans[spanidx]; if(s->state == MSpanInUse) { + // The garbage collector ignores type pointers stored in MSpan.types: + // - Compiler-generated types are stored outside of heap. + // - The reflect package has runtime-generated types cached in its data structures. + // The garbage collector relies on finding the references via that cache. switch(s->types.compression) { case MTypes_Empty: case MTypes_Single: break; case MTypes_Words: case MTypes_Bytes: - // TODO(atom): consider using defaultProg instead of 0 - addroot((Obj){(byte*)&s->types.data, sizeof(void*), 0}); + markonly((byte*)s->types.data); break; } } @@ -1196,6 +1627,8 @@ handlespecial(byte *p, uintptr size) if(finq == nil || finq->cnt == finq->cap) { if(finc == nil) { finc = runtime_SysAlloc(PageSize); + if(finc == nil) + runtime_throw("runtime: cannot allocate memory"); finc->cap = (PageSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1; finc->alllink = allfin; allfin = finc; @@ -1235,10 +1668,10 @@ sweepspan(ParFor *desc, uint32 idx) m = runtime_m(); USED(&desc); - s = runtime_mheap.allspans[idx]; + s = runtime_mheap->allspans[idx]; if(s->state != MSpanInUse) return; - arena_start = runtime_mheap.arena_start; + arena_start = runtime_mheap->arena_start; p = (byte*)(s->start << PageShift); cl = s->sizeclass; size = s->elemsize; @@ -1301,8 +1734,8 @@ sweepspan(ParFor *desc, uint32 idx) if(cl == 0) { // Free large span. runtime_unmarkspan(p, 1<<PageShift); - *(uintptr*)p = 1; // needs zeroing - runtime_MHeap_Free(&runtime_mheap, s, 1); + *(uintptr*)p = (uintptr)0xdeaddeaddeaddeadll; // needs zeroing + runtime_MHeap_Free(runtime_mheap, s, 1); c->local_alloc -= size; c->local_nfree++; } else { @@ -1316,7 +1749,7 @@ sweepspan(ParFor *desc, uint32 idx) break; } if(size > sizeof(uintptr)) - ((uintptr*)p)[1] = 1; // mark as "needs to be zeroed" + ((uintptr*)p)[1] = (uintptr)0xdeaddeaddeaddeadll; // mark as "needs to be zeroed" end->next = (MLink*)p; end = (MLink*)p; @@ -1330,7 +1763,7 @@ sweepspan(ParFor *desc, uint32 idx) c->local_nfree += nfree; c->local_cachealloc -= nfree * size; c->local_objects -= nfree; - runtime_MCentral_FreeSpan(&runtime_mheap.central[cl], s, nfree, head.next, end); + runtime_MCentral_FreeSpan(&runtime_mheap->central[cl], s, nfree, head.next, end); } } @@ -1344,10 +1777,10 @@ dumpspan(uint32 idx) MSpan *s; bool allocated, special; - s = runtime_mheap.allspans[idx]; + s = runtime_mheap->allspans[idx]; if(s->state != MSpanInUse) return; - arena_start = runtime_mheap.arena_start; + arena_start = runtime_mheap->arena_start; p = (byte*)(s->start << PageShift); sizeclass = s->sizeclass; size = s->elemsize; @@ -1405,7 +1838,7 @@ runtime_memorydump(void) { uint32 spanidx; - for(spanidx=0; spanidx<runtime_mheap.nspan; spanidx++) { + for(spanidx=0; spanidx<runtime_mheap->nspan; spanidx++) { dumpspan(spanidx); } } @@ -1413,6 +1846,8 @@ runtime_memorydump(void) void runtime_gchelper(void) { + gchelperstart(); + // parallel mark for over gc roots runtime_parfordo(work.markfor); @@ -1426,10 +1861,13 @@ runtime_gchelper(void) } runtime_parfordo(work.sweepfor); + bufferList[runtime_m()->helpgc].busy = 0; if(runtime_xadd(&work.ndone, +1) == work.nproc-1) runtime_notewakeup(&work.alldone); } +#define GcpercentUnknown (-2) + // Initialized from $GOGC. GOGC=off means no gc. // // Next gc is after we've allocated an extra amount of @@ -1439,22 +1877,14 @@ runtime_gchelper(void) // proportion to the allocation cost. Adjusting gcpercent // just changes the linear constant (and also the amount of // extra memory used). -static int32 gcpercent = -2; - -static void -stealcache(void) -{ - M *mp; - - for(mp=runtime_allm; mp; mp=mp->alllink) - runtime_MCache_ReleaseAll(mp->mcache); -} +static int32 gcpercent = GcpercentUnknown; static void cachestats(GCStats *stats) { M *mp; MCache *c; + P *p, **pp; uint32 i; uint64 stacks_inuse; uint64 *src, *dst; @@ -1463,9 +1893,7 @@ cachestats(GCStats *stats) runtime_memclr((byte*)stats, sizeof(*stats)); stacks_inuse = 0; for(mp=runtime_allm; mp; mp=mp->alllink) { - c = mp->mcache; - runtime_purgecachedstats(c); - // stacks_inuse += mp->stackinuse*FixedStack; + //stacks_inuse += mp->stackinuse*FixedStack; if(stats) { src = (uint64*)&mp->gcstats; dst = (uint64*)stats; @@ -1473,6 +1901,12 @@ cachestats(GCStats *stats) dst[i] += src[i]; runtime_memclr((byte*)&mp->gcstats, sizeof(mp->gcstats)); } + } + for(pp=runtime_allp; (p=*pp) != nil; pp++) { + c = p->mcache; + if(c==nil) + continue; + runtime_purgecachedstats(c); for(i=0; i<nelem(c->local_by_size); i++) { mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc; c->local_by_size[i].nmalloc = 0; @@ -1492,6 +1926,19 @@ struct gc_args static void gc(struct gc_args *args); +static int32 +readgogc(void) +{ + const byte *p; + + p = runtime_getenv("GOGC"); + if(p == nil || p[0] == '\0') + return 100; + if(runtime_strcmp((const char *)p, "off") == 0) + return -1; + return runtime_atoi(p); +} + void runtime_gc(int32 force) { @@ -1504,6 +1951,8 @@ runtime_gc(int32 force) // a problem in the past. if((((uintptr)&work.empty) & 7) != 0) runtime_throw("runtime: gc work buffer is misaligned"); + if((((uintptr)&work.full) & 7) != 0) + runtime_throw("runtime: gc work buffer is misaligned"); // Make sure all registers are saved on stack so that // scanstack sees them. @@ -1521,14 +1970,8 @@ runtime_gc(int32 force) if(!mstats.enablegc || m->locks > 0 || runtime_panicking) return; - if(gcpercent == -2) { // first time through - p = runtime_getenv("GOGC"); - if(p == nil || p[0] == '\0') - gcpercent = 100; - else if(runtime_strcmp((const char*)p, "off") == 0) - gcpercent = -1; - else - gcpercent = runtime_atoi(p); + if(gcpercent == GcpercentUnknown) { // first time through + gcpercent = readgogc(); p = runtime_getenv("GOGCTRACE"); if(p != nil) @@ -1555,7 +1998,7 @@ gc(struct gc_args *args) { M *m; int64 t0, t1, t2, t3, t4; - uint64 heap0, heap1, obj0, obj1; + uint64 heap0, heap1, obj0, obj1, ninstr; GCStats stats; M *mp; uint32 i; @@ -1574,6 +2017,9 @@ gc(struct gc_args *args) m->gcing = 1; runtime_stoptheworld(); + if(CollectStats) + runtime_memclr((byte*)&gcstats, sizeof(gcstats)); + for(mp=runtime_allm; mp; mp=mp->alllink) runtime_settype_flush(mp, false); @@ -1604,7 +2050,7 @@ gc(struct gc_args *args) work.nproc = runtime_gcprocs(); addroots(); runtime_parforsetup(work.markfor, work.nproc, work.nroot, nil, false, markroot); - runtime_parforsetup(work.sweepfor, work.nproc, runtime_mheap.nspan, nil, true, sweepspan); + runtime_parforsetup(work.sweepfor, work.nproc, runtime_mheap->nspan, nil, true, sweepspan); if(work.nproc > 1) { runtime_noteclear(&work.alldone); runtime_helpgc(work.nproc); @@ -1612,6 +2058,7 @@ gc(struct gc_args *args) t1 = runtime_nanotime(); + gchelperstart(); runtime_parfordo(work.markfor); scanblock(nil, nil, 0, true); @@ -1623,14 +2070,14 @@ gc(struct gc_args *args) t2 = runtime_nanotime(); runtime_parfordo(work.sweepfor); + bufferList[m->helpgc].busy = 0; t3 = runtime_nanotime(); - stealcache(); - cachestats(&stats); - if(work.nproc > 1) runtime_notesleep(&work.alldone); + cachestats(&stats); + stats.nprocyield += work.sweepfor->nprocyield; stats.nosyield += work.sweepfor->nosyield; stats.nsleep += work.sweepfor->nsleep; @@ -1670,6 +2117,27 @@ gc(struct gc_args *args) stats.nhandoff, stats.nhandoffcnt, work.sweepfor->nsteal, work.sweepfor->nstealcnt, stats.nprocyield, stats.nosyield, stats.nsleep); + if(CollectStats) { + runtime_printf("scan: %D bytes, %D objects, %D untyped, %D types from MSpan\n", + gcstats.nbytes, gcstats.obj.cnt, gcstats.obj.notype, gcstats.obj.typelookup); + if(gcstats.ptr.cnt != 0) + runtime_printf("avg ptrbufsize: %D (%D/%D)\n", + gcstats.ptr.sum/gcstats.ptr.cnt, gcstats.ptr.sum, gcstats.ptr.cnt); + if(gcstats.obj.cnt != 0) + runtime_printf("avg nobj: %D (%D/%D)\n", + gcstats.obj.sum/gcstats.obj.cnt, gcstats.obj.sum, gcstats.obj.cnt); + runtime_printf("rescans: %D, %D bytes\n", gcstats.rescan, gcstats.rescanbytes); + + runtime_printf("instruction counts:\n"); + ninstr = 0; + for(i=0; i<nelem(gcstats.instr); i++) { + runtime_printf("\t%d:\t%D\n", i, gcstats.instr[i]); + ninstr += gcstats.instr[i]; + } + runtime_printf("\ttotal:\t%D\n", ninstr); + + runtime_printf("putempty: %D, getfull: %D\n", gcstats.putempty, gcstats.getfull); + } } runtime_MProf_GC(); @@ -1704,6 +2172,71 @@ runtime_ReadMemStats(MStats *stats) runtime_starttheworld(); } +void runtime_debug_readGCStats(Slice*) + __asm__("runtime_debug.readGCStats"); + +void +runtime_debug_readGCStats(Slice *pauses) +{ + uint64 *p; + uint32 i, n; + + // Calling code in runtime/debug should make the slice large enough. + if((size_t)pauses->cap < nelem(mstats.pause_ns)+3) + runtime_throw("runtime: short slice passed to readGCStats"); + + // Pass back: pauses, last gc (absolute time), number of gc, total pause ns. + p = (uint64*)pauses->array; + runtime_lock(runtime_mheap); + n = mstats.numgc; + if(n > nelem(mstats.pause_ns)) + n = nelem(mstats.pause_ns); + + // The pause buffer is circular. The most recent pause is at + // pause_ns[(numgc-1)%nelem(pause_ns)], and then backward + // from there to go back farther in time. We deliver the times + // most recent first (in p[0]). + for(i=0; i<n; i++) + p[i] = mstats.pause_ns[(mstats.numgc-1-i)%nelem(mstats.pause_ns)]; + + p[n] = mstats.last_gc; + p[n+1] = mstats.numgc; + p[n+2] = mstats.pause_total_ns; + runtime_unlock(runtime_mheap); + pauses->__count = n+3; +} + +intgo runtime_debug_setGCPercent(intgo) + __asm__("runtime_debug.setGCPercent"); + +intgo +runtime_debug_setGCPercent(intgo in) +{ + intgo out; + + runtime_lock(runtime_mheap); + if(gcpercent == GcpercentUnknown) + gcpercent = readgogc(); + out = gcpercent; + if(in < 0) + in = -1; + gcpercent = in; + runtime_unlock(runtime_mheap); + return out; +} + +static void +gchelperstart(void) +{ + M *m; + + m = runtime_m(); + if(m->helpgc < 0 || m->helpgc >= MaxGcproc) + runtime_throw("gchelperstart: bad m->helpgc"); + if(runtime_xchg(&bufferList[m->helpgc].busy, 1)) + runtime_throw("gchelperstart: already busy"); +} + static void runfinq(void* dummy __attribute__ ((unused))) { @@ -1757,11 +2290,11 @@ runtime_markallocated(void *v, uintptr n, bool noptr) if(0) runtime_printf("markallocated %p+%p\n", v, n); - if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start) + if((byte*)v+n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start) runtime_throw("markallocated: bad pointer"); - off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset - b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; + off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start; // word offset + b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1; shift = off % wordsPerBitmapWord; for(;;) { @@ -1789,11 +2322,11 @@ runtime_markfreed(void *v, uintptr n) if(0) runtime_printf("markallocated %p+%p\n", v, n); - if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start) + if((byte*)v+n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start) runtime_throw("markallocated: bad pointer"); - off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset - b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; + off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start; // word offset + b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1; shift = off % wordsPerBitmapWord; for(;;) { @@ -1819,11 +2352,11 @@ runtime_checkfreed(void *v, uintptr n) if(!runtime_checking) return; - if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start) + if((byte*)v+n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start) return; // not allocated, so okay - off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset - b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; + off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start; // word offset + b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1; shift = off % wordsPerBitmapWord; bits = *b>>shift; @@ -1842,7 +2375,7 @@ runtime_markspan(void *v, uintptr size, uintptr n, bool leftover) uintptr *b, off, shift; byte *p; - if((byte*)v+size*n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start) + if((byte*)v+size*n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start) runtime_throw("markspan: bad pointer"); p = v; @@ -1853,8 +2386,8 @@ runtime_markspan(void *v, uintptr size, uintptr n, bool leftover) // the entire span, and each bitmap word has bits for only // one span, so no other goroutines are changing these // bitmap words. - off = (uintptr*)p - (uintptr*)runtime_mheap.arena_start; // word offset - b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; + off = (uintptr*)p - (uintptr*)runtime_mheap->arena_start; // word offset + b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1; shift = off % wordsPerBitmapWord; *b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift); } @@ -1866,14 +2399,14 @@ runtime_unmarkspan(void *v, uintptr n) { uintptr *p, *b, off; - if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start) + if((byte*)v+n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start) runtime_throw("markspan: bad pointer"); p = v; - off = p - (uintptr*)runtime_mheap.arena_start; // word offset + off = p - (uintptr*)runtime_mheap->arena_start; // word offset if(off % wordsPerBitmapWord != 0) runtime_throw("markspan: unaligned pointer"); - b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; + b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1; n /= PtrSize; if(n%wordsPerBitmapWord != 0) runtime_throw("unmarkspan: unaligned length"); @@ -1894,8 +2427,8 @@ runtime_blockspecial(void *v) if(DebugMark) return true; - off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; - b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; + off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start; + b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1; shift = off % wordsPerBitmapWord; return (*b & (bitSpecial<<shift)) != 0; @@ -1909,8 +2442,8 @@ runtime_setblockspecial(void *v, bool s) if(DebugMark) return; - off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; - b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; + off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start; + b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1; shift = off % wordsPerBitmapWord; for(;;) { diff --git a/libgo/runtime/mgc0.h b/libgo/runtime/mgc0.h index a2798ef..d14fb37 100644 --- a/libgo/runtime/mgc0.h +++ b/libgo/runtime/mgc0.h @@ -12,17 +12,22 @@ // Meaning of arguments: // off Offset (in bytes) from the start of the current object // objgc Pointer to GC info of an object +// objgcrel Offset to GC info of an object // len Length of an array // elemsize Size (in bytes) of an element // size Size (in bytes) +// +// NOTE: There is a copy of these in ../reflect/type.go. +// They must be kept in sync. enum { GC_END, // End of object, loop or subroutine. Args: none GC_PTR, // A typed pointer. Args: (off, objgc) GC_APTR, // Pointer to an arbitrary object. Args: (off) GC_ARRAY_START, // Start an array with a fixed length. Args: (off, len, elemsize) GC_ARRAY_NEXT, // The next element of an array. Args: none - GC_CALL, // Call a subroutine. Args: (off, objgc) + GC_CALL, // Call a subroutine. Args: (off, objgcrel) GC_MAP_PTR, // Go map. Args: (off, MapType*) + GC_CHAN_PTR, // Go channel. Args: (off, ChanType*) GC_STRING, // Go string. Args: (off) GC_EFACE, // interface{}. Args: (off) GC_IFACE, // interface{...}. Args: (off) diff --git a/libgo/runtime/mheap.c b/libgo/runtime/mheap.c index 6636b01..b4d94b6 100644 --- a/libgo/runtime/mheap.c +++ b/libgo/runtime/mheap.c @@ -37,6 +37,8 @@ RecordSpan(void *vh, byte *p) if(cap < h->nspancap*3/2) cap = h->nspancap*3/2; all = (MSpan**)runtime_SysAlloc(cap*sizeof(all[0])); + if(all == nil) + runtime_throw("runtime: cannot allocate memory"); if(h->allspans) { runtime_memmove(all, h->allspans, h->nspancap*sizeof(all[0])); runtime_SysFree(h->allspans, h->nspancap*sizeof(all[0])); @@ -119,6 +121,25 @@ HaveSpan: s->state = MSpanInUse; mstats.heap_idle -= s->npages<<PageShift; mstats.heap_released -= s->npreleased<<PageShift; + if(s->npreleased > 0) { + // We have called runtime_SysUnused with these pages, and on + // Unix systems it called madvise. At this point at least + // some BSD-based kernels will return these pages either as + // zeros or with the old data. For our caller, the first word + // in the page indicates whether the span contains zeros or + // not (this word was set when the span was freed by + // MCentral_Free or runtime_MCentral_FreeSpan). If the first + // page in the span is returned as zeros, and some subsequent + // page is returned with the old data, then we will be + // returning a span that is assumed to be all zeros, but the + // actual data will not be all zeros. Avoid that problem by + // explicitly marking the span as not being zeroed, just in + // case. The beadbead constant we use here means nothing, it + // is just a unique constant not seen elsewhere in the + // runtime, as a clue in case it turns up unexpectedly in + // memory or in a stack trace. + *(uintptr*)(s->start<<PageShift) = (uintptr)0xbeadbeadbeadbeadULL; + } s->npreleased = 0; if(s->npages > npage) { @@ -356,23 +377,64 @@ forcegchelper(void *vnote) runtime_notewakeup(note); } +static uintptr +scavengelist(MSpan *list, uint64 now, uint64 limit) +{ + uintptr released, sumreleased; + MSpan *s; + + if(runtime_MSpanList_IsEmpty(list)) + return 0; + + sumreleased = 0; + for(s=list->next; s != list; s=s->next) { + if((now - s->unusedsince) > limit) { + released = (s->npages - s->npreleased) << PageShift; + mstats.heap_released += released; + sumreleased += released; + s->npreleased = s->npages; + runtime_SysUnused((void*)(s->start << PageShift), s->npages << PageShift); + } + } + return sumreleased; +} + +static uintptr +scavenge(uint64 now, uint64 limit) +{ + uint32 i; + uintptr sumreleased; + MHeap *h; + + h = runtime_mheap; + sumreleased = 0; + for(i=0; i < nelem(h->free); i++) + sumreleased += scavengelist(&h->free[i], now, limit); + sumreleased += scavengelist(&h->large, now, limit); + return sumreleased; +} + // Release (part of) unused memory to OS. // Goroutine created at startup. // Loop forever. void runtime_MHeap_Scavenger(void* dummy) { + G *g; MHeap *h; - MSpan *s, *list; uint64 tick, now, forcegc, limit; - uint32 k, i; - uintptr released, sumreleased; + uint32 k; + uintptr sumreleased; const byte *env; bool trace; Note note, *notep; USED(dummy); + g = runtime_g(); + g->issystem = true; + g->isbackground = true; + // If we go two minutes without a garbage collection, force one to run. forcegc = 2*60*1e9; // If a span goes unused for 5 minutes after a garbage collection, @@ -389,10 +451,10 @@ runtime_MHeap_Scavenger(void* dummy) if(env != nil) trace = runtime_atoi(env) > 0; - h = &runtime_mheap; + h = runtime_mheap; for(k=0;; k++) { runtime_noteclear(¬e); - runtime_entersyscall(); + runtime_entersyscallblock(); runtime_notetsleep(¬e, tick); runtime_exitsyscall(); @@ -406,7 +468,7 @@ runtime_MHeap_Scavenger(void* dummy) runtime_noteclear(¬e); notep = ¬e; __go_go(forcegchelper, (void*)notep); - runtime_entersyscall(); + runtime_entersyscallblock(); runtime_notesleep(¬e); runtime_exitsyscall(); if(trace) @@ -414,24 +476,7 @@ runtime_MHeap_Scavenger(void* dummy) runtime_lock(h); now = runtime_nanotime(); } - sumreleased = 0; - for(i=0; i < nelem(h->free)+1; i++) { - if(i < nelem(h->free)) - list = &h->free[i]; - else - list = &h->large; - if(runtime_MSpanList_IsEmpty(list)) - continue; - for(s=list->next; s != list; s=s->next) { - if((now - s->unusedsince) > limit) { - released = (s->npages - s->npreleased) << PageShift; - mstats.heap_released += released; - sumreleased += released; - s->npreleased = s->npages; - runtime_SysUnused((void*)(s->start << PageShift), s->npages << PageShift); - } - } - } + sumreleased = scavenge(now, limit); runtime_unlock(h); if(trace) { @@ -444,6 +489,17 @@ runtime_MHeap_Scavenger(void* dummy) } } +void runtime_debug_freeOSMemory(void) __asm__("runtime_debug.freeOSMemory"); + +void +runtime_debug_freeOSMemory(void) +{ + runtime_gc(1); + runtime_lock(runtime_mheap); + scavenge(~(uintptr)0, 0); + runtime_unlock(runtime_mheap); +} + // Initialize a new span with the given start and npages. void runtime_MSpan_Init(MSpan *span, PageID start, uintptr npages) diff --git a/libgo/runtime/mprof.goc b/libgo/runtime/mprof.goc index c1b09be..73d9379 100644 --- a/libgo/runtime/mprof.goc +++ b/libgo/runtime/mprof.goc @@ -14,7 +14,43 @@ package runtime #include "go-string.h" // NOTE(rsc): Everything here could use cas if contention became an issue. -static Lock proflock; +static Lock proflock, alloclock; + +// All memory allocations are local and do not escape outside of the profiler. +// The profiler is forbidden from referring to garbage-collected memory. + +static byte *pool; // memory allocation pool +static uintptr poolfree; // number of bytes left in the pool +enum { + Chunk = 32*PageSize, // initial size of the pool +}; + +// Memory allocation local to this file. +// There is no way to return the allocated memory back to the OS. +static void* +allocate(uintptr size) +{ + void *v; + + if(size == 0) + return nil; + + if(size >= Chunk/2) + return runtime_SysAlloc(size); + + runtime_lock(&alloclock); + if(size > poolfree) { + pool = runtime_SysAlloc(Chunk); + if(pool == nil) + runtime_throw("runtime: cannot allocate memory"); + poolfree = Chunk; + } + v = pool; + pool += size; + poolfree -= size; + runtime_unlock(&alloclock); + return v; +} enum { MProf, BProf }; // profile types @@ -26,6 +62,8 @@ struct Bucket Bucket *next; // next in hash list Bucket *allnext; // next in list of all mbuckets/bbuckets int32 typ; + // Generally unions can break precise GC, + // this one is fine because it does not contain pointers. union { struct // typ == MProf @@ -67,6 +105,8 @@ stkbucket(int32 typ, Location *stk, int32 nstk, bool alloc) if(buckhash == nil) { buckhash = runtime_SysAlloc(BuckHashSize*sizeof buckhash[0]); + if(buckhash == nil) + runtime_throw("runtime: cannot allocate memory"); mstats.buckhash_sys += BuckHashSize*sizeof buckhash[0]; } @@ -97,7 +137,9 @@ stkbucket(int32 typ, Location *stk, int32 nstk, bool alloc) if(!alloc) return nil; - b = runtime_mallocgc(sizeof *b + nstk*sizeof stk[0], FlagNoProfiling, 0, 1); + b = allocate(sizeof *b + nstk*sizeof stk[0]); + if(b == nil) + runtime_throw("runtime: cannot allocate memory"); bucketmem += sizeof *b + nstk*sizeof stk[0]; runtime_memmove(b->stk, stk, nstk*sizeof stk[0]); b->typ = typ; @@ -115,13 +157,11 @@ stkbucket(int32 typ, Location *stk, int32 nstk, bool alloc) return b; } -// Record that a gc just happened: all the 'recent' statistics are now real. -void -runtime_MProf_GC(void) +static void +MProf_GC(void) { Bucket *b; - - runtime_lock(&proflock); + for(b=mbuckets; b; b=b->allnext) { b->allocs += b->recent_allocs; b->frees += b->recent_frees; @@ -132,6 +172,14 @@ runtime_MProf_GC(void) b->recent_alloc_bytes = 0; b->recent_free_bytes = 0; } +} + +// Record that a gc just happened: all the 'recent' statistics are now real. +void +runtime_MProf_GC(void) +{ + runtime_lock(&proflock); + MProf_GC(); runtime_unlock(&proflock); } @@ -166,7 +214,7 @@ struct AddrEntry Bucket *b; }; -static AddrHash *addrhash[1<<AddrHashBits]; +static AddrHash **addrhash; // points to (AddrHash*)[1<<AddrHashBits] static AddrEntry *addrfree; static uintptr addrmem; @@ -193,7 +241,7 @@ setaddrbucket(uintptr addr, Bucket *b) if(ah->addr == (addr>>AddrHashShift)) goto found; - ah = runtime_mallocgc(sizeof *ah, FlagNoProfiling, 0, 1); + ah = allocate(sizeof *ah); addrmem += sizeof *ah; ah->next = addrhash[h]; ah->addr = addr>>AddrHashShift; @@ -201,7 +249,7 @@ setaddrbucket(uintptr addr, Bucket *b) found: if((e = addrfree) == nil) { - e = runtime_mallocgc(64*sizeof *e, FlagNoProfiling, 0, 0); + e = allocate(64*sizeof *e); addrmem += 64*sizeof *e; for(i=0; i+1<64; i++) e[i].next = &e[i+1]; @@ -353,12 +401,28 @@ record(Record *r, Bucket *b) func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) { Bucket *b; Record *r; + bool clear; runtime_lock(&proflock); n = 0; - for(b=mbuckets; b; b=b->allnext) + clear = true; + for(b=mbuckets; b; b=b->allnext) { if(include_inuse_zero || b->alloc_bytes != b->free_bytes) n++; + if(b->allocs != 0 || b->frees != 0) + clear = false; + } + if(clear) { + // Absolutely no data, suggesting that a garbage collection + // has not yet happened. In order to allow profiling when + // garbage collection is disabled from the beginning of execution, + // accumulate stats as if a GC just happened, and recount buckets. + MProf_GC(); + n = 0; + for(b=mbuckets; b; b=b->allnext) + if(include_inuse_zero || b->alloc_bytes != b->free_bytes) + n++; + } ok = false; if(n <= p.__count) { ok = true; @@ -531,3 +595,8 @@ func GoroutineProfile(b Slice) (n int, ok bool) { } } +void +runtime_mprofinit(void) +{ + addrhash = allocate((1<<AddrHashBits)*sizeof *addrhash); +} diff --git a/libgo/runtime/netpoll.goc b/libgo/runtime/netpoll.goc new file mode 100644 index 0000000..a0bd735 --- /dev/null +++ b/libgo/runtime/netpoll.goc @@ -0,0 +1,356 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build darwin linux + +package net + +#include "runtime.h" +#include "defs.h" +#include "arch.h" +#include "malloc.h" + +// Map gccgo field names to gc field names. +// Eface aka __go_empty_interface. +#define type __type_descriptor +#define data __object + +// Integrated network poller (platform-independent part). +// A particular implementation (epoll/kqueue) must define the following functions: +// void runtime_netpollinit(void); // to initialize the poller +// int32 runtime_netpollopen(int32 fd, PollDesc *pd); // to arm edge-triggered notifications + // and associate fd with pd. +// An implementation must call the following function to denote that the pd is ready. +// void runtime_netpollready(G **gpp, PollDesc *pd, int32 mode); + +#define READY ((G*)1) + +struct PollDesc +{ + PollDesc* link; // in pollcache, protected by pollcache.Lock + Lock; // protectes the following fields + int32 fd; + bool closing; + uintptr seq; // protects from stale timers and ready notifications + G* rg; // G waiting for read or READY (binary semaphore) + Timer rt; // read deadline timer (set if rt.fv != nil) + int64 rd; // read deadline + G* wg; // the same for writes + Timer wt; + int64 wd; +}; + +static struct +{ + Lock; + PollDesc* first; + // PollDesc objects must be type-stable, + // because we can get ready notification from epoll/kqueue + // after the descriptor is closed/reused. + // Stale notifications are detected using seq variable, + // seq is incremented when deadlines are changed or descriptor is reused. +} pollcache; + +static void netpollblock(PollDesc*, int32); +static G* netpollunblock(PollDesc*, int32); +static void deadline(int64, Eface); +static void readDeadline(int64, Eface); +static void writeDeadline(int64, Eface); +static PollDesc* allocPollDesc(void); +static intgo checkerr(PollDesc *pd, int32 mode); + +static FuncVal deadlineFn = {(void(*)(void))deadline}; +static FuncVal readDeadlineFn = {(void(*)(void))readDeadline}; +static FuncVal writeDeadlineFn = {(void(*)(void))writeDeadline}; + +func runtime_pollServerInit() { + runtime_netpollinit(); +} + +func runtime_pollOpen(fd int) (pd *PollDesc, errno int) { + pd = allocPollDesc(); + runtime_lock(pd); + if(pd->wg != nil && pd->wg != READY) + runtime_throw("runtime_pollOpen: blocked write on free descriptor"); + if(pd->rg != nil && pd->rg != READY) + runtime_throw("runtime_pollOpen: blocked read on free descriptor"); + pd->fd = fd; + pd->closing = false; + pd->seq++; + pd->rg = nil; + pd->rd = 0; + pd->wg = nil; + pd->wd = 0; + runtime_unlock(pd); + + errno = runtime_netpollopen(fd, pd); +} + +func runtime_pollClose(pd *PollDesc) { + if(!pd->closing) + runtime_throw("runtime_pollClose: close w/o unblock"); + if(pd->wg != nil && pd->wg != READY) + runtime_throw("runtime_pollClose: blocked write on closing descriptor"); + if(pd->rg != nil && pd->rg != READY) + runtime_throw("runtime_pollClose: blocked read on closing descriptor"); + runtime_netpollclose(pd->fd); + runtime_lock(&pollcache); + pd->link = pollcache.first; + pollcache.first = pd; + runtime_unlock(&pollcache); +} + +func runtime_pollReset(pd *PollDesc, mode int) (err int) { + runtime_lock(pd); + err = checkerr(pd, mode); + if(err) + goto ret; + if(mode == 'r') + pd->rg = nil; + else if(mode == 'w') + pd->wg = nil; +ret: + runtime_unlock(pd); +} + +func runtime_pollWait(pd *PollDesc, mode int) (err int) { + runtime_lock(pd); + err = checkerr(pd, mode); + if(err) + goto ret; + netpollblock(pd, mode); + err = checkerr(pd, mode); +ret: + runtime_unlock(pd); +} + +func runtime_pollSetDeadline(pd *PollDesc, d int64, mode int) { + runtime_lock(pd); + if(pd->closing) + goto ret; + pd->seq++; // invalidate current timers + // Reset current timers. + if(pd->rt.fv) { + runtime_deltimer(&pd->rt); + pd->rt.fv = nil; + } + if(pd->wt.fv) { + runtime_deltimer(&pd->wt); + pd->wt.fv = nil; + } + // Setup new timers. + if(d != 0 && d <= runtime_nanotime()) { + d = -1; + } + if(mode == 'r' || mode == 'r'+'w') + pd->rd = d; + if(mode == 'w' || mode == 'r'+'w') + pd->wd = d; + if(pd->rd > 0 && pd->rd == pd->wd) { + pd->rt.fv = &deadlineFn; + pd->rt.when = pd->rd; + // Copy current seq into the timer arg. + // Timer func will check the seq against current descriptor seq, + // if they differ the descriptor was reused or timers were reset. + pd->rt.arg.type = (Type*)pd->seq; + pd->rt.arg.data = pd; + runtime_addtimer(&pd->rt); + } else { + if(pd->rd > 0) { + pd->rt.fv = &readDeadlineFn; + pd->rt.when = pd->rd; + pd->rt.arg.type = (Type*)pd->seq; + pd->rt.arg.data = pd; + runtime_addtimer(&pd->rt); + } + if(pd->wd > 0) { + pd->wt.fv = &writeDeadlineFn; + pd->wt.when = pd->wd; + pd->wt.arg.type = (Type*)pd->seq; + pd->wt.arg.data = pd; + runtime_addtimer(&pd->wt); + } + } +ret: + runtime_unlock(pd); +} + +func runtime_pollUnblock(pd *PollDesc) { + G *rg, *wg; + + runtime_lock(pd); + if(pd->closing) + runtime_throw("runtime_pollUnblock: already closing"); + pd->closing = true; + pd->seq++; + rg = netpollunblock(pd, 'r'); + wg = netpollunblock(pd, 'w'); + if(pd->rt.fv) { + runtime_deltimer(&pd->rt); + pd->rt.fv = nil; + } + if(pd->wt.fv) { + runtime_deltimer(&pd->wt); + pd->wt.fv = nil; + } + runtime_unlock(pd); + if(rg) + runtime_ready(rg); + if(wg) + runtime_ready(wg); +} + +// make pd ready, newly runnable goroutines (if any) are enqueued info gpp list +void +runtime_netpollready(G **gpp, PollDesc *pd, int32 mode) +{ + G *rg, *wg; + + rg = wg = nil; + runtime_lock(pd); + if(mode == 'r' || mode == 'r'+'w') + rg = netpollunblock(pd, 'r'); + if(mode == 'w' || mode == 'r'+'w') + wg = netpollunblock(pd, 'w'); + runtime_unlock(pd); + if(rg) { + rg->schedlink = *gpp; + *gpp = rg; + } + if(wg) { + wg->schedlink = *gpp; + *gpp = wg; + } +} + +static intgo +checkerr(PollDesc *pd, int32 mode) +{ + if(pd->closing) + return 1; // errClosing + if((mode == 'r' && pd->rd < 0) || (mode == 'w' && pd->wd < 0)) + return 2; // errTimeout + return 0; +} + +static void +netpollblock(PollDesc *pd, int32 mode) +{ + G **gpp; + + gpp = &pd->rg; + if(mode == 'w') + gpp = &pd->wg; + if(*gpp == READY) { + *gpp = nil; + return; + } + if(*gpp != nil) + runtime_throw("epoll: double wait"); + *gpp = runtime_g(); + runtime_park(runtime_unlock, &pd->Lock, "IO wait"); + runtime_lock(pd); +} + +static G* +netpollunblock(PollDesc *pd, int32 mode) +{ + G **gpp, *old; + + gpp = &pd->rg; + if(mode == 'w') + gpp = &pd->wg; + if(*gpp == READY) + return nil; + if(*gpp == nil) { + *gpp = READY; + return nil; + } + old = *gpp; + *gpp = nil; + return old; +} + +static void +deadlineimpl(int64 now, Eface arg, bool read, bool write) +{ + PollDesc *pd; + uint32 seq; + G *rg, *wg; + + USED(now); + pd = (PollDesc*)arg.data; + // This is the seq when the timer was set. + // If it's stale, ignore the timer event. + seq = (uintptr)arg.type; + rg = wg = nil; + runtime_lock(pd); + if(seq != pd->seq) { + // The descriptor was reused or timers were reset. + runtime_unlock(pd); + return; + } + if(read) { + if(pd->rd <= 0 || pd->rt.fv == nil) + runtime_throw("deadlineimpl: inconsistent read deadline"); + pd->rd = -1; + pd->rt.fv = nil; + rg = netpollunblock(pd, 'r'); + } + if(write) { + if(pd->wd <= 0 || (pd->wt.fv == nil && !read)) + runtime_throw("deadlineimpl: inconsistent write deadline"); + pd->wd = -1; + pd->wt.fv = nil; + wg = netpollunblock(pd, 'w'); + } + runtime_unlock(pd); + if(rg) + runtime_ready(rg); + if(wg) + runtime_ready(wg); +} + +static void +deadline(int64 now, Eface arg) +{ + deadlineimpl(now, arg, true, true); +} + +static void +readDeadline(int64 now, Eface arg) +{ + deadlineimpl(now, arg, true, false); +} + +static void +writeDeadline(int64 now, Eface arg) +{ + deadlineimpl(now, arg, false, true); +} + +static PollDesc* +allocPollDesc(void) +{ + PollDesc *pd; + uint32 i, n; + + runtime_lock(&pollcache); + if(pollcache.first == nil) { + n = PageSize/sizeof(*pd); + if(n == 0) + n = 1; + // Must be in non-GC memory because can be referenced + // only from epoll/kqueue internals. + pd = runtime_SysAlloc(n*sizeof(*pd)); + for(i = 0; i < n; i++) { + pd[i].link = pollcache.first; + pollcache.first = &pd[i]; + } + } + pd = pollcache.first; + pollcache.first = pd->link; + runtime_unlock(&pollcache); + return pd; +} diff --git a/libgo/runtime/netpoll_epoll.c b/libgo/runtime/netpoll_epoll.c new file mode 100644 index 0000000..04f9c75 --- /dev/null +++ b/libgo/runtime/netpoll_epoll.c @@ -0,0 +1,154 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build linux + +#include <errno.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/epoll.h> + +#include "runtime.h" +#include "defs.h" + +#ifndef EPOLLRDHUP +#define EPOLLRDHUP 0x2000 +#endif + +#ifndef EPOLL_CLOEXEC +#define EPOLL_CLOEXEC 02000000 +#endif + +typedef struct epoll_event EpollEvent; + +static int32 +runtime_epollcreate(int32 size) +{ + int r; + + r = epoll_create(size); + if(r >= 0) + return r; + return - errno; +} + +static int32 +runtime_epollcreate1(int32 flags) +{ + int r; + + r = epoll_create1(flags); + if(r >= 0) + return r; + return - errno; +} + +static int32 +runtime_epollctl(int32 epfd, int32 op, int32 fd, EpollEvent *ev) +{ + int r; + + r = epoll_ctl(epfd, op, fd, ev); + if(r >= 0) + return r; + return - errno; +} + +static int32 +runtime_epollwait(int32 epfd, EpollEvent *ev, int32 nev, int32 timeout) +{ + int r; + + r = epoll_wait(epfd, ev, nev, timeout); + if(r >= 0) + return r; + return - errno; +} + +static void +runtime_closeonexec(int32 fd) +{ + fcntl(fd, F_SETFD, FD_CLOEXEC); +} + +static int32 epfd = -1; // epoll descriptor + +void +runtime_netpollinit(void) +{ + epfd = runtime_epollcreate1(EPOLL_CLOEXEC); + if(epfd >= 0) + return; + epfd = runtime_epollcreate(1024); + if(epfd >= 0) { + runtime_closeonexec(epfd); + return; + } + runtime_printf("netpollinit: failed to create descriptor (%d)\n", -epfd); + runtime_throw("netpollinit: failed to create descriptor"); +} + +int32 +runtime_netpollopen(int32 fd, PollDesc *pd) +{ + EpollEvent ev; + int32 res; + + ev.events = EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLET; + ev.data.ptr = (void*)pd; + res = runtime_epollctl(epfd, EPOLL_CTL_ADD, fd, &ev); + return -res; +} + +int32 +runtime_netpollclose(int32 fd) +{ + EpollEvent ev; + int32 res; + + res = runtime_epollctl(epfd, EPOLL_CTL_DEL, fd, &ev); + return -res; +} + +// polls for ready network connections +// returns list of goroutines that become runnable +G* +runtime_netpoll(bool block) +{ + static int32 lasterr; + EpollEvent events[128], *ev; + int32 n, i, waitms, mode; + G *gp; + + if(epfd == -1) + return nil; + waitms = -1; + if(!block) + waitms = 0; +retry: + n = runtime_epollwait(epfd, events, nelem(events), waitms); + if(n < 0) { + if(n != -EINTR && n != lasterr) { + lasterr = n; + runtime_printf("runtime: epollwait on fd %d failed with %d\n", epfd, -n); + } + goto retry; + } + gp = nil; + for(i = 0; i < n; i++) { + ev = &events[i]; + if(ev->events == 0) + continue; + mode = 0; + if(ev->events & (EPOLLIN|EPOLLRDHUP|EPOLLHUP|EPOLLERR)) + mode += 'r'; + if(ev->events & (EPOLLOUT|EPOLLHUP|EPOLLERR)) + mode += 'w'; + if(mode) + runtime_netpollready(&gp, (void*)ev->data.ptr, mode); + } + if(block && gp == nil) + goto retry; + return gp; +} diff --git a/libgo/runtime/netpoll_kqueue.c b/libgo/runtime/netpoll_kqueue.c new file mode 100644 index 0000000..9b79b20 --- /dev/null +++ b/libgo/runtime/netpoll_kqueue.c @@ -0,0 +1,108 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build darwin + +#include "runtime.h" +#include "defs_GOOS_GOARCH.h" + +// Integrated network poller (kqueue-based implementation). + +int32 runtime_kqueue(void); +int32 runtime_kevent(int32, Kevent*, int32, Kevent*, int32, Timespec*); +void runtime_closeonexec(int32); + +static int32 kq = -1; + +void +runtime_netpollinit(void) +{ + kq = runtime_kqueue(); + if(kq < 0) { + runtime_printf("netpollinit: kqueue failed with %d\n", -kq); + runtime_throw("netpollinit: kqueue failed"); + } + runtime_closeonexec(kq); +} + +int32 +runtime_netpollopen(int32 fd, PollDesc *pd) +{ + Kevent ev[2]; + int32 n; + + // Arm both EVFILT_READ and EVFILT_WRITE in edge-triggered mode (EV_CLEAR) + // for the whole fd lifetime. The notifications are automatically unregistered + // when fd is closed. + ev[0].ident = fd; + ev[0].filter = EVFILT_READ; + ev[0].flags = EV_ADD|EV_RECEIPT|EV_CLEAR; + ev[0].fflags = 0; + ev[0].data = 0; + ev[0].udata = (byte*)pd; + ev[1] = ev[0]; + ev[1].filter = EVFILT_WRITE; + n = runtime_kevent(kq, ev, 2, ev, 2, nil); + if(n < 0) + return -n; + if(n != 2 || + (ev[0].flags&EV_ERROR) == 0 || ev[0].ident != fd || ev[0].filter != EVFILT_READ || + (ev[1].flags&EV_ERROR) == 0 || ev[1].ident != fd || ev[1].filter != EVFILT_WRITE) + return EFAULT; // just to mark out from other errors + if(ev[0].data != 0) + return ev[0].data; + if(ev[1].data != 0) + return ev[1].data; + return 0; +} + +int32 +runtime_netpollclose(int32 fd) +{ + // Don't need to unregister because calling close() + // on fd will remove any kevents that reference the descriptor. + USED(fd); + return 0; +} + +// Polls for ready network connections. +// Returns list of goroutines that become runnable. +G* +runtime_netpoll(bool block) +{ + static int32 lasterr; + Kevent events[64], *ev; + Timespec ts, *tp; + int32 n, i; + G *gp; + + if(kq == -1) + return nil; + tp = nil; + if(!block) { + ts.tv_sec = 0; + ts.tv_nsec = 0; + tp = &ts; + } + gp = nil; +retry: + n = runtime_kevent(kq, nil, 0, events, nelem(events), tp); + if(n < 0) { + if(n != -EINTR && n != lasterr) { + lasterr = n; + runtime_printf("runtime: kevent on fd %d failed with %d\n", kq, -n); + } + goto retry; + } + for(i = 0; i < n; i++) { + ev = &events[i]; + if(ev->filter == EVFILT_READ) + runtime_netpollready(&gp, (PollDesc*)ev->udata, 'r'); + if(ev->filter == EVFILT_WRITE) + runtime_netpollready(&gp, (PollDesc*)ev->udata, 'w'); + } + if(block && gp == nil) + goto retry; + return gp; +} diff --git a/libgo/runtime/netpoll_stub.c b/libgo/runtime/netpoll_stub.c new file mode 100644 index 0000000..e28e38e --- /dev/null +++ b/libgo/runtime/netpoll_stub.c @@ -0,0 +1,18 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build freebsd netbsd openbsd plan9 windows + +#include "runtime.h" + +// Polls for ready network connections. +// Returns list of goroutines that become runnable. +G* +runtime_netpoll(bool block) +{ + // Implementation for platforms that do not support + // integrated network poller. + USED(block); + return nil; +} diff --git a/libgo/runtime/panic.c b/libgo/runtime/panic.c index 7b9b578..7d79256 100644 --- a/libgo/runtime/panic.c +++ b/libgo/runtime/panic.c @@ -3,6 +3,7 @@ // license that can be found in the LICENSE file. #include "runtime.h" +#include "malloc.h" #include "go-defer.h" #include "go-panic.h" @@ -37,6 +38,11 @@ runtime_startpanic(void) M *m; m = runtime_m(); + if(runtime_mheap == 0 || runtime_mheap->cachealloc.size == 0) { // very early + runtime_printf("runtime: panic before malloc heap initialized\n"); + m->mallocing = 1; // tell rest of panic not to try to malloc + } else if(m->mcache == nil) // can happen if called from signal handler or throw + m->mcache = runtime_allocmcache(); if(m->dying) { runtime_printf("panic during panic\n"); runtime_exit(3); @@ -51,13 +57,14 @@ runtime_dopanic(int32 unused __attribute__ ((unused))) { G *g; static bool didothers; + bool crash; g = runtime_g(); if(g->sig != 0) runtime_printf("[signal %x code=%p addr=%p]\n", g->sig, (void*)g->sigcode0, (void*)g->sigcode1); - if(runtime_gotraceback()){ + if(runtime_gotraceback(&crash)){ if(g != runtime_m()->g0) { runtime_printf("\n"); runtime_goroutineheader(g); @@ -79,6 +86,9 @@ runtime_dopanic(int32 unused __attribute__ ((unused))) runtime_lock(&deadlock); runtime_lock(&deadlock); } + + if(crash) + runtime_crash(); runtime_exit(2); } diff --git a/libgo/runtime/parfor.c b/libgo/runtime/parfor.c index 65ca586..c0e40f5 100644 --- a/libgo/runtime/parfor.c +++ b/libgo/runtime/parfor.c @@ -49,6 +49,7 @@ void runtime_parforsetup(ParFor *desc, uint32 nthr, uint32 n, void *ctx, bool wait, void (*body)(ParFor*, uint32)) { uint32 i, begin, end; + uint64 *pos; if(desc == nil || nthr == 0 || nthr > desc->nthrmax || body == nil) { runtime_printf("desc=%p nthr=%d count=%d body=%p\n", desc, nthr, n, body); @@ -70,7 +71,10 @@ runtime_parforsetup(ParFor *desc, uint32 nthr, uint32 n, void *ctx, bool wait, v for(i=0; i<nthr; i++) { begin = (uint64)n*i / nthr; end = (uint64)n*(i+1) / nthr; - desc->thr[i].pos = (uint64)begin | (((uint64)end)<<32); + pos = &desc->thr[i].pos; + if(((uintptr)pos & 7) != 0) + runtime_throw("parforsetup: pos is not aligned"); + *pos = (uint64)begin | (((uint64)end)<<32); } } @@ -152,7 +156,7 @@ runtime_parfordo(ParFor *desc) // See if it has any work. begin = (uint32)pos; end = (uint32)(pos>>32); - if(begin >= end-1) { + if(begin+1 >= end) { begin = end = 0; break; } diff --git a/libgo/runtime/print.c b/libgo/runtime/print.c index 9e0c45b..f5c6e82 100644 --- a/libgo/runtime/print.c +++ b/libgo/runtime/print.c @@ -88,6 +88,9 @@ go_vprintf(const char *s, va_list va) case 'a': runtime_printslice(va_arg(va, Slice)); break; + case 'c': + runtime_printbyte(va_arg(va, int32)); + break; case 'd': runtime_printint(va_arg(va, int32)); break; @@ -154,6 +157,12 @@ runtime_printbool(_Bool v) } void +runtime_printbyte(int8 c) +{ + gwrite(&c, 1); +} + +void runtime_printfloat(double v) { byte buf[20]; diff --git a/libgo/runtime/proc.c b/libgo/runtime/proc.c index 9b563a5..9639922 100644 --- a/libgo/runtime/proc.c +++ b/libgo/runtime/proc.c @@ -56,15 +56,8 @@ extern void __splitstack_block_signals_context (void *context[10], int *, uintptr runtime_stacks_sys; -static void schedule(G*); - static void gtraceback(G*); -typedef struct Sched Sched; - -M runtime_m0; -G runtime_g0; // idle goroutine for m0 - #ifdef __rtems__ #define __thread #endif @@ -166,194 +159,61 @@ runtime_m(void) return m; } -int32 runtime_gcwaiting; - -G* runtime_allg; -G* runtime_lastg; -M* runtime_allm; - -int8* runtime_goos; -int32 runtime_ncpu; - -// The static TLS size. See runtime_newm. -static int tlssize; - -#ifdef HAVE_DL_ITERATE_PHDR - -// Called via dl_iterate_phdr. - -static int -addtls(struct dl_phdr_info* info, size_t size __attribute__ ((unused)), void *data) -{ - size_t *total = (size_t *)data; - unsigned int i; - - for(i = 0; i < info->dlpi_phnum; ++i) { - if(info->dlpi_phdr[i].p_type == PT_TLS) - *total += info->dlpi_phdr[i].p_memsz; - } - return 0; -} - -// Set the total TLS size. - -static void -inittlssize() +// Set m and g. +void +runtime_setmg(M* mp, G* gp) { - size_t total = 0; - - dl_iterate_phdr(addtls, (void *)&total); - tlssize = total; + m = mp; + g = gp; } -#else +// The static TLS size. See runtime_newm. +static int tlssize; +// Start a new thread. static void -inittlssize() +runtime_newosproc(M *mp) { -} - -#endif - -// Go scheduler -// -// The go scheduler's job is to match ready-to-run goroutines (`g's) -// with waiting-for-work schedulers (`m's). If there are ready g's -// and no waiting m's, ready() will start a new m running in a new -// OS thread, so that all ready g's can run simultaneously, up to a limit. -// For now, m's never go away. -// -// By default, Go keeps only one kernel thread (m) running user code -// at a single time; other threads may be blocked in the operating system. -// Setting the environment variable $GOMAXPROCS or calling -// runtime.GOMAXPROCS() will change the number of user threads -// allowed to execute simultaneously. $GOMAXPROCS is thus an -// approximation of the maximum number of cores to use. -// -// Even a program that can run without deadlock in a single process -// might use more m's if given the chance. For example, the prime -// sieve will use as many m's as there are primes (up to runtime_sched.mmax), -// allowing different stages of the pipeline to execute in parallel. -// We could revisit this choice, only kicking off new m's for blocking -// system calls, but that would limit the amount of parallel computation -// that go would try to do. -// -// In general, one could imagine all sorts of refinements to the -// scheduler, but the goal now is just to get something working on -// Linux and OS X. - -struct Sched { - Lock; - - G *gfree; // available g's (status == Gdead) - int64 goidgen; - - G *ghead; // g's waiting to run - G *gtail; - int32 gwait; // number of g's waiting to run - int32 gcount; // number of g's that are alive - int32 grunning; // number of g's running on cpu or in syscall - - M *mhead; // m's waiting for work - int32 mwait; // number of m's waiting for work - int32 mcount; // number of m's that have been created + pthread_attr_t attr; + size_t stacksize; + sigset_t clear, old; + pthread_t tid; + int ret; - volatile uint32 atomic; // atomic scheduling word (see below) + if(pthread_attr_init(&attr) != 0) + runtime_throw("pthread_attr_init"); + if(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0) + runtime_throw("pthread_attr_setdetachstate"); - int32 profilehz; // cpu profiling rate + stacksize = PTHREAD_STACK_MIN; - bool init; // running initialization - bool lockmain; // init called runtime.LockOSThread + // With glibc before version 2.16 the static TLS size is taken + // out of the stack size, and we get an error or a crash if + // there is not enough stack space left. Add it back in if we + // can, in case the program uses a lot of TLS space. FIXME: + // This can be disabled in glibc 2.16 and later, if the bug is + // indeed fixed then. + stacksize += tlssize; - Note stopped; // one g can set waitstop and wait here for m's to stop -}; + if(pthread_attr_setstacksize(&attr, stacksize) != 0) + runtime_throw("pthread_attr_setstacksize"); -// The atomic word in sched is an atomic uint32 that -// holds these fields. -// -// [15 bits] mcpu number of m's executing on cpu -// [15 bits] mcpumax max number of m's allowed on cpu -// [1 bit] waitstop some g is waiting on stopped -// [1 bit] gwaiting gwait != 0 -// -// These fields are the information needed by entersyscall -// and exitsyscall to decide whether to coordinate with the -// scheduler. Packing them into a single machine word lets -// them use a fast path with a single atomic read/write and -// no lock/unlock. This greatly reduces contention in -// syscall- or cgo-heavy multithreaded programs. -// -// Except for entersyscall and exitsyscall, the manipulations -// to these fields only happen while holding the schedlock, -// so the routines holding schedlock only need to worry about -// what entersyscall and exitsyscall do, not the other routines -// (which also use the schedlock). -// -// In particular, entersyscall and exitsyscall only read mcpumax, -// waitstop, and gwaiting. They never write them. Thus, writes to those -// fields can be done (holding schedlock) without fear of write conflicts. -// There may still be logic conflicts: for example, the set of waitstop must -// be conditioned on mcpu >= mcpumax or else the wait may be a -// spurious sleep. The Promela model in proc.p verifies these accesses. -enum { - mcpuWidth = 15, - mcpuMask = (1<<mcpuWidth) - 1, - mcpuShift = 0, - mcpumaxShift = mcpuShift + mcpuWidth, - waitstopShift = mcpumaxShift + mcpuWidth, - gwaitingShift = waitstopShift+1, - - // The max value of GOMAXPROCS is constrained - // by the max value we can store in the bit fields - // of the atomic word. Reserve a few high values - // so that we can detect accidental decrement - // beyond zero. - maxgomaxprocs = mcpuMask - 10, -}; + // Block signals during pthread_create so that the new thread + // starts with signals disabled. It will enable them in minit. + sigfillset(&clear); -#define atomic_mcpu(v) (((v)>>mcpuShift)&mcpuMask) -#define atomic_mcpumax(v) (((v)>>mcpumaxShift)&mcpuMask) -#define atomic_waitstop(v) (((v)>>waitstopShift)&1) -#define atomic_gwaiting(v) (((v)>>gwaitingShift)&1) - -Sched runtime_sched; -int32 runtime_gomaxprocs; -bool runtime_singleproc; - -static bool canaddmcpu(void); - -// An m that is waiting for notewakeup(&m->havenextg). This may -// only be accessed while the scheduler lock is held. This is used to -// minimize the number of times we call notewakeup while the scheduler -// lock is held, since the m will normally move quickly to lock the -// scheduler itself, producing lock contention. -static M* mwakeup; - -// Scheduling helpers. Sched must be locked. -static void gput(G*); // put/get on ghead/gtail -static G* gget(void); -static void mput(M*); // put/get on mhead -static M* mget(G*); -static void gfput(G*); // put/get on gfree -static G* gfget(void); -static void matchmg(void); // match m's to g's -static void readylocked(G*); // ready, but sched is locked -static void mnextg(M*, G*); -static void mcommoninit(M*); +#ifdef SIGTRAP + // Blocking SIGTRAP reportedly breaks gdb on Alpha GNU/Linux. + sigdelset(&clear, SIGTRAP); +#endif -void -setmcpumax(uint32 n) -{ - uint32 v, w; + sigemptyset(&old); + sigprocmask(SIG_BLOCK, &clear, &old); + ret = pthread_create(&tid, &attr, runtime_mstart, mp); + sigprocmask(SIG_SETMASK, &old, nil); - for(;;) { - v = runtime_sched.atomic; - w = v; - w &= ~(mcpuMask<<mcpumaxShift); - w |= n<<mcpumaxShift; - if(runtime_cas(&runtime_sched.atomic, v, w)) - break; - } + if (ret != 0) + runtime_throw("pthread_create"); } // First function run by a new goroutine. This replaces gogocall. @@ -449,8 +309,142 @@ runtime_mcall(void (*pfn)(G*)) } } -// Keep trace of scavenger's goroutine for deadlock detection. -static G *scvg; +#ifdef HAVE_DL_ITERATE_PHDR + +// Called via dl_iterate_phdr. + +static int +addtls(struct dl_phdr_info* info, size_t size __attribute__ ((unused)), void *data) +{ + size_t *total = (size_t *)data; + unsigned int i; + + for(i = 0; i < info->dlpi_phnum; ++i) { + if(info->dlpi_phdr[i].p_type == PT_TLS) + *total += info->dlpi_phdr[i].p_memsz; + } + return 0; +} + +// Set the total TLS size. + +static void +inittlssize() +{ + size_t total = 0; + + dl_iterate_phdr(addtls, (void *)&total); + tlssize = total; +} + +#else + +static void +inittlssize() +{ +} + +#endif + +// Goroutine scheduler +// The scheduler's job is to distribute ready-to-run goroutines over worker threads. +// +// The main concepts are: +// G - goroutine. +// M - worker thread, or machine. +// P - processor, a resource that is required to execute Go code. +// M must have an associated P to execute Go code, however it can be +// blocked or in a syscall w/o an associated P. +// +// Design doc at http://golang.org/s/go11sched. + +typedef struct Sched Sched; +struct Sched { + Lock; + + uint64 goidgen; + M* midle; // idle m's waiting for work + int32 nmidle; // number of idle m's waiting for work + int32 mlocked; // number of locked m's waiting for work + int32 mcount; // number of m's that have been created + + P* pidle; // idle P's + uint32 npidle; + uint32 nmspinning; + + // Global runnable queue. + G* runqhead; + G* runqtail; + int32 runqsize; + + // Global cache of dead G's. + Lock gflock; + G* gfree; + + int32 stopwait; + Note stopnote; + uint32 sysmonwait; + Note sysmonnote; + uint64 lastpoll; + + int32 profilehz; // cpu profiling rate +}; + +// The max value of GOMAXPROCS. +// There are no fundamental restrictions on the value. +enum { MaxGomaxprocs = 1<<8 }; + +Sched runtime_sched; +int32 runtime_gomaxprocs; +bool runtime_singleproc; +bool runtime_iscgo; +uint32 runtime_gcwaiting; +M runtime_m0; +G runtime_g0; // idle goroutine for m0 +G* runtime_allg; +G* runtime_lastg; +M* runtime_allm; +P** runtime_allp; +M* runtime_extram; +int8* runtime_goos; +int32 runtime_ncpu; +static int32 newprocs; + +void* runtime_mstart(void*); +static void runqput(P*, G*); +static G* runqget(P*); +static void runqgrow(P*); +static G* runqsteal(P*, P*); +static void mput(M*); +static M* mget(void); +static void mcommoninit(M*); +static void schedule(void); +static void procresize(int32); +static void acquirep(P*); +static P* releasep(void); +static void newm(void(*)(void), P*); +static void stopm(void); +static void startm(P*, bool); +static void handoffp(P*); +static void wakep(void); +static void stoplockedm(void); +static void startlockedm(G*); +static void sysmon(void); +static uint32 retake(uint32*); +static void inclocked(int32); +static void checkdead(void); +static void exitsyscall0(G*); +static void park0(G*); +static void gosched0(G*); +static void goexit0(G*); +static void gfput(P*, G*); +static G* gfget(P*); +static void gfpurge(P*); +static void globrunqput(G*); +static G* globrunqget(P*); +static P* pidleget(void); +static void pidleput(P*); +static void injectglist(G*); // The bootstrap sequence is: // @@ -463,7 +457,7 @@ static G *scvg; void runtime_schedinit(void) { - int32 n; + int32 n, procs; const byte *p; m = &runtime_m0; @@ -476,6 +470,7 @@ runtime_schedinit(void) inittlssize(); m->nomemprof++; + runtime_mprofinit(); runtime_mallocinit(); mcommoninit(m); @@ -487,28 +482,20 @@ runtime_schedinit(void) // so that we don't need to call malloc when we crash. // runtime_findfunc(0); - runtime_gomaxprocs = 1; + runtime_sched.lastpoll = runtime_nanotime(); + procs = 1; p = runtime_getenv("GOMAXPROCS"); - if(p != nil && (n = runtime_atoi(p)) != 0) { - if(n > maxgomaxprocs) - n = maxgomaxprocs; - runtime_gomaxprocs = n; + if(p != nil && (n = runtime_atoi(p)) > 0) { + if(n > MaxGomaxprocs) + n = MaxGomaxprocs; + procs = n; } - // wait for the main goroutine to start before taking - // GOMAXPROCS into account. - setmcpumax(1); - runtime_singleproc = runtime_gomaxprocs == 1; - - canaddmcpu(); // mcpu++ to account for bootstrap m - m->helpgc = 1; // flag to tell schedule() to mcpu-- - runtime_sched.grunning++; + runtime_allp = runtime_malloc((MaxGomaxprocs+1)*sizeof(runtime_allp[0])); + procresize(procs); // Can not enable GC until all roots are registered. // mstats.enablegc = 1; m->nomemprof--; - - if(raceenabled) - runtime_raceinit(); } extern void main_init(void) __asm__ (GOSYM_PREFIX "__go_init_main"); @@ -516,70 +503,44 @@ extern void main_main(void) __asm__ (GOSYM_PREFIX "main.main"); // The main goroutine. void -runtime_main(void) +runtime_main(void* dummy __attribute__((unused))) { + newm(sysmon, nil); + // Lock the main goroutine onto this, the main OS thread, // during initialization. Most programs won't care, but a few // do require certain calls to be made by the main thread. // Those can arrange for main.main to run in the main thread // by calling runtime.LockOSThread during initialization // to preserve the lock. - runtime_LockOSThread(); - // From now on, newgoroutines may use non-main threads. - setmcpumax(runtime_gomaxprocs); - runtime_sched.init = true; - scvg = __go_go(runtime_MHeap_Scavenger, nil); - scvg->issystem = true; + runtime_lockOSThread(); + if(m != &runtime_m0) + runtime_throw("runtime_main not on m0"); + __go_go(runtime_MHeap_Scavenger, nil); main_init(); - runtime_sched.init = false; - if(!runtime_sched.lockmain) - runtime_UnlockOSThread(); + runtime_unlockOSThread(); // For gccgo we have to wait until after main is initialized // to enable GC, because initializing main registers the GC // roots. mstats.enablegc = 1; - // The deadlock detection has false negatives. - // Let scvg start up, to eliminate the false negative - // for the trivial program func main() { select{} }. - runtime_gosched(); - main_main(); if(raceenabled) runtime_racefini(); + + // Make racy client program work: if panicking on + // another goroutine at the same time as main returns, + // let the other goroutine finish printing the panic trace. + // Once it does, it will exit. See issue 3934. + if(runtime_panicking) + runtime_park(nil, nil, "panicwait"); + runtime_exit(0); for(;;) *(int32*)0 = 0; } -// Lock the scheduler. -static void -schedlock(void) -{ - runtime_lock(&runtime_sched); -} - -// Unlock the scheduler. -static void -schedunlock(void) -{ - M *mp; - - mp = mwakeup; - mwakeup = nil; - runtime_unlock(&runtime_sched); - if(mp != nil) - runtime_notewakeup(&mp->havenextg); -} - -void -runtime_goexit(void) -{ - g->status = Gmoribund; - runtime_gosched(); -} - void runtime_goroutineheader(G *gp) { @@ -604,9 +565,6 @@ runtime_goroutineheader(G *gp) else status = "waiting"; break; - case Gmoribund: - status = "moribund"; - break; default: status = "???"; break; @@ -644,7 +602,7 @@ runtime_tracebackothers(G * volatile me) int32 traceback; tb.gp = me; - traceback = runtime_gotraceback(); + traceback = runtime_gotraceback(nil); for(gp = runtime_allg; gp != nil; gp = gp->alllink) { if(gp == me || gp->status == Gdead) continue; @@ -698,28 +656,20 @@ gtraceback(G* gp) runtime_gogo(traceback->gp); } -// Mark this g as m's idle goroutine. -// This functionality might be used in environments where programs -// are limited to a single thread, to simulate a select-driven -// network server. It is not exposed via the standard runtime API. -void -runtime_idlegoroutine(void) -{ - if(g->idlem != nil) - runtime_throw("g is already an idle goroutine"); - g->idlem = m; -} - static void mcommoninit(M *mp) { - mp->id = runtime_sched.mcount++; + // If there is no mcache runtime_callers() will crash, + // and we are most likely in sysmon thread so the stack is senseless anyway. + if(m->mcache) + runtime_callers(1, mp->createstack, nelem(mp->createstack)); + mp->fastrand = 0x49f6428aUL + mp->id + runtime_cputicks(); - if(mp->mcache == nil) - mp->mcache = runtime_allocmcache(); + runtime_lock(&runtime_sched); + mp->id = runtime_sched.mcount++; - runtime_callers(1, mp->createstack, nelem(mp->createstack)); + runtime_mpreinit(mp); // Add to runtime_allm so garbage collector doesn't free m // when it is just in a register or thread-local storage. @@ -727,324 +677,77 @@ mcommoninit(M *mp) // runtime_NumCgoCall() iterates over allm w/o schedlock, // so we need to publish it safely. runtime_atomicstorep(&runtime_allm, mp); + runtime_unlock(&runtime_sched); } -// Try to increment mcpu. Report whether succeeded. -static bool -canaddmcpu(void) -{ - uint32 v; - - for(;;) { - v = runtime_sched.atomic; - if(atomic_mcpu(v) >= atomic_mcpumax(v)) - return 0; - if(runtime_cas(&runtime_sched.atomic, v, v+(1<<mcpuShift))) - return 1; - } -} - -// Put on `g' queue. Sched must be locked. -static void -gput(G *gp) -{ - M *mp; - - // If g is wired, hand it off directly. - if((mp = gp->lockedm) != nil && canaddmcpu()) { - mnextg(mp, gp); - return; - } - - // If g is the idle goroutine for an m, hand it off. - if(gp->idlem != nil) { - if(gp->idlem->idleg != nil) { - runtime_printf("m%d idle out of sync: g%D g%D\n", - gp->idlem->id, - gp->idlem->idleg->goid, gp->goid); - runtime_throw("runtime: double idle"); - } - gp->idlem->idleg = gp; - return; - } - - gp->schedlink = nil; - if(runtime_sched.ghead == nil) - runtime_sched.ghead = gp; - else - runtime_sched.gtail->schedlink = gp; - runtime_sched.gtail = gp; - - // increment gwait. - // if it transitions to nonzero, set atomic gwaiting bit. - if(runtime_sched.gwait++ == 0) - runtime_xadd(&runtime_sched.atomic, 1<<gwaitingShift); -} - -// Report whether gget would return something. -static bool -haveg(void) -{ - return runtime_sched.ghead != nil || m->idleg != nil; -} - -// Get from `g' queue. Sched must be locked. -static G* -gget(void) -{ - G *gp; - - gp = runtime_sched.ghead; - if(gp) { - runtime_sched.ghead = gp->schedlink; - if(runtime_sched.ghead == nil) - runtime_sched.gtail = nil; - // decrement gwait. - // if it transitions to zero, clear atomic gwaiting bit. - if(--runtime_sched.gwait == 0) - runtime_xadd(&runtime_sched.atomic, -1<<gwaitingShift); - } else if(m->idleg != nil) { - gp = m->idleg; - m->idleg = nil; - } - return gp; -} - -// Put on `m' list. Sched must be locked. -static void -mput(M *mp) -{ - mp->schedlink = runtime_sched.mhead; - runtime_sched.mhead = mp; - runtime_sched.mwait++; -} - -// Get an `m' to run `g'. Sched must be locked. -static M* -mget(G *gp) -{ - M *mp; - - // if g has its own m, use it. - if(gp && (mp = gp->lockedm) != nil) - return mp; - - // otherwise use general m pool. - if((mp = runtime_sched.mhead) != nil) { - runtime_sched.mhead = mp->schedlink; - runtime_sched.mwait--; - } - return mp; -} - -// Mark g ready to run. +// Mark gp ready to run. void runtime_ready(G *gp) { - schedlock(); - readylocked(gp); - schedunlock(); -} - -// Mark g ready to run. Sched is already locked. -// G might be running already and about to stop. -// The sched lock protects g->status from changing underfoot. -static void -readylocked(G *gp) -{ - if(gp->m) { - // Running on another machine. - // Ready it when it stops. - gp->readyonstop = 1; - return; - } - // Mark runnable. - if(gp->status == Grunnable || gp->status == Grunning) { + if(gp->status != Gwaiting) { runtime_printf("goroutine %D has status %d\n", gp->goid, gp->status); runtime_throw("bad g->status in ready"); } gp->status = Grunnable; - - gput(gp); - matchmg(); -} - -// Same as readylocked but a different symbol so that -// debuggers can set a breakpoint here and catch all -// new goroutines. -static void -newprocreadylocked(G *gp) -{ - readylocked(gp); -} - -// Pass g to m for running. -// Caller has already incremented mcpu. -static void -mnextg(M *mp, G *gp) -{ - runtime_sched.grunning++; - mp->nextg = gp; - if(mp->waitnextg) { - mp->waitnextg = 0; - if(mwakeup != nil) - runtime_notewakeup(&mwakeup->havenextg); - mwakeup = mp; - } -} - -// Get the next goroutine that m should run. -// Sched must be locked on entry, is unlocked on exit. -// Makes sure that at most $GOMAXPROCS g's are -// running on cpus (not in system calls) at any given time. -static G* -nextgandunlock(void) -{ - G *gp; - uint32 v; - -top: - if(atomic_mcpu(runtime_sched.atomic) >= maxgomaxprocs) - runtime_throw("negative mcpu"); - - // If there is a g waiting as m->nextg, the mcpu++ - // happened before it was passed to mnextg. - if(m->nextg != nil) { - gp = m->nextg; - m->nextg = nil; - schedunlock(); - return gp; - } - - if(m->lockedg != nil) { - // We can only run one g, and it's not available. - // Make sure some other cpu is running to handle - // the ordinary run queue. - if(runtime_sched.gwait != 0) { - matchmg(); - // m->lockedg might have been on the queue. - if(m->nextg != nil) { - gp = m->nextg; - m->nextg = nil; - schedunlock(); - return gp; - } - } - } else { - // Look for work on global queue. - while(haveg() && canaddmcpu()) { - gp = gget(); - if(gp == nil) - runtime_throw("gget inconsistency"); - - if(gp->lockedm) { - mnextg(gp->lockedm, gp); - continue; - } - runtime_sched.grunning++; - schedunlock(); - return gp; - } - - // The while loop ended either because the g queue is empty - // or because we have maxed out our m procs running go - // code (mcpu >= mcpumax). We need to check that - // concurrent actions by entersyscall/exitsyscall cannot - // invalidate the decision to end the loop. - // - // We hold the sched lock, so no one else is manipulating the - // g queue or changing mcpumax. Entersyscall can decrement - // mcpu, but if does so when there is something on the g queue, - // the gwait bit will be set, so entersyscall will take the slow path - // and use the sched lock. So it cannot invalidate our decision. - // - // Wait on global m queue. - mput(m); - } - - // Look for deadlock situation. - // There is a race with the scavenger that causes false negatives: - // if the scavenger is just starting, then we have - // scvg != nil && grunning == 0 && gwait == 0 - // and we do not detect a deadlock. It is possible that we should - // add that case to the if statement here, but it is too close to Go 1 - // to make such a subtle change. Instead, we work around the - // false negative in trivial programs by calling runtime.gosched - // from the main goroutine just before main.main. - // See runtime_main above. - // - // On a related note, it is also possible that the scvg == nil case is - // wrong and should include gwait, but that does not happen in - // standard Go programs, which all start the scavenger. - // - if((scvg == nil && runtime_sched.grunning == 0) || - (scvg != nil && runtime_sched.grunning == 1 && runtime_sched.gwait == 0 && - (scvg->status == Grunning || scvg->status == Gsyscall))) { - m->throwing = -1; // do not dump full stacks - runtime_throw("all goroutines are asleep - deadlock!"); - } - - m->nextg = nil; - m->waitnextg = 1; - runtime_noteclear(&m->havenextg); - - // Stoptheworld is waiting for all but its cpu to go to stop. - // Entersyscall might have decremented mcpu too, but if so - // it will see the waitstop and take the slow path. - // Exitsyscall never increments mcpu beyond mcpumax. - v = runtime_atomicload(&runtime_sched.atomic); - if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) { - // set waitstop = 0 (known to be 1) - runtime_xadd(&runtime_sched.atomic, -1<<waitstopShift); - runtime_notewakeup(&runtime_sched.stopped); - } - schedunlock(); - - runtime_notesleep(&m->havenextg); - if(m->helpgc) { - runtime_gchelper(); - m->helpgc = 0; - runtime_lock(&runtime_sched); - goto top; - } - if((gp = m->nextg) == nil) - runtime_throw("bad m->nextg in nextgoroutine"); - m->nextg = nil; - return gp; + runqput(m->p, gp); + if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0) // TODO: fast atomic + wakep(); } int32 runtime_gcprocs(void) { int32 n; - + // Figure out how many CPUs to use during GC. // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc. + runtime_lock(&runtime_sched); n = runtime_gomaxprocs; if(n > runtime_ncpu) n = runtime_ncpu > 0 ? runtime_ncpu : 1; if(n > MaxGcproc) n = MaxGcproc; - if(n > runtime_sched.mwait+1) // one M is currently running - n = runtime_sched.mwait+1; + if(n > runtime_sched.nmidle+1) // one M is currently running + n = runtime_sched.nmidle+1; + runtime_unlock(&runtime_sched); return n; } +static bool +needaddgcproc(void) +{ + int32 n; + + runtime_lock(&runtime_sched); + n = runtime_gomaxprocs; + if(n > runtime_ncpu) + n = runtime_ncpu; + if(n > MaxGcproc) + n = MaxGcproc; + n -= runtime_sched.nmidle+1; // one M is currently running + runtime_unlock(&runtime_sched); + return n > 0; +} + void runtime_helpgc(int32 nproc) { M *mp; - int32 n; + int32 n, pos; runtime_lock(&runtime_sched); - for(n = 1; n < nproc; n++) { // one M is currently running - mp = mget(nil); + pos = 0; + for(n = 1; n < nproc; n++) { // one M is currently running + if(runtime_allp[pos]->mcache == m->mcache) + pos++; + mp = mget(); if(mp == nil) runtime_throw("runtime_gcprocs inconsistency"); - mp->helpgc = 1; - mp->waitnextg = 0; - runtime_notewakeup(&mp->havenextg); + mp->helpgc = n; + mp->mcache = runtime_allp[pos]->mcache; + pos++; + runtime_notewakeup(&mp->park); } runtime_unlock(&runtime_sched); } @@ -1052,57 +755,104 @@ runtime_helpgc(int32 nproc) void runtime_stoptheworld(void) { - uint32 v; - - schedlock(); - runtime_gcwaiting = 1; - - setmcpumax(1); - - // while mcpu > 1 - for(;;) { - v = runtime_sched.atomic; - if(atomic_mcpu(v) <= 1) - break; - - // It would be unsafe for multiple threads to be using - // the stopped note at once, but there is only - // ever one thread doing garbage collection. - runtime_noteclear(&runtime_sched.stopped); - if(atomic_waitstop(v)) - runtime_throw("invalid waitstop"); + int32 i; + uint32 s; + P *p; + bool wait; - // atomic { waitstop = 1 }, predicated on mcpu <= 1 check above - // still being true. - if(!runtime_cas(&runtime_sched.atomic, v, v+(1<<waitstopShift))) - continue; + runtime_lock(&runtime_sched); + runtime_sched.stopwait = runtime_gomaxprocs; + runtime_atomicstore((uint32*)&runtime_gcwaiting, 1); + // stop current P + m->p->status = Pgcstop; + runtime_sched.stopwait--; + // try to retake all P's in Psyscall status + for(i = 0; i < runtime_gomaxprocs; i++) { + p = runtime_allp[i]; + s = p->status; + if(s == Psyscall && runtime_cas(&p->status, s, Pgcstop)) + runtime_sched.stopwait--; + } + // stop idle P's + while((p = pidleget()) != nil) { + p->status = Pgcstop; + runtime_sched.stopwait--; + } + wait = runtime_sched.stopwait > 0; + runtime_unlock(&runtime_sched); - schedunlock(); - runtime_notesleep(&runtime_sched.stopped); - schedlock(); + // wait for remaining P's to stop voluntary + if(wait) { + runtime_notesleep(&runtime_sched.stopnote); + runtime_noteclear(&runtime_sched.stopnote); + } + if(runtime_sched.stopwait) + runtime_throw("stoptheworld: not stopped"); + for(i = 0; i < runtime_gomaxprocs; i++) { + p = runtime_allp[i]; + if(p->status != Pgcstop) + runtime_throw("stoptheworld: not stopped"); } - runtime_singleproc = runtime_gomaxprocs == 1; - schedunlock(); +} + +static void +mhelpgc(void) +{ + m->helpgc = -1; } void runtime_starttheworld(void) { + P *p, *p1; M *mp; - int32 max; - - // Figure out how many CPUs GC could possibly use. - max = runtime_gomaxprocs; - if(max > runtime_ncpu) - max = runtime_ncpu > 0 ? runtime_ncpu : 1; - if(max > MaxGcproc) - max = MaxGcproc; - - schedlock(); + G *gp; + bool add; + + gp = runtime_netpoll(false); // non-blocking + injectglist(gp); + add = needaddgcproc(); + runtime_lock(&runtime_sched); + if(newprocs) { + procresize(newprocs); + newprocs = 0; + } else + procresize(runtime_gomaxprocs); runtime_gcwaiting = 0; - setmcpumax(runtime_gomaxprocs); - matchmg(); - if(runtime_gcprocs() < max && canaddmcpu()) { + + p1 = nil; + while((p = pidleget()) != nil) { + // procresize() puts p's with work at the beginning of the list. + // Once we reach a p without a run queue, the rest don't have one either. + if(p->runqhead == p->runqtail) { + pidleput(p); + break; + } + mp = mget(); + if(mp == nil) { + p->link = p1; + p1 = p; + continue; + } + if(mp->nextp) + runtime_throw("starttheworld: inconsistent mp->nextp"); + mp->nextp = p; + runtime_notewakeup(&mp->park); + } + if(runtime_sched.sysmonwait) { + runtime_sched.sysmonwait = false; + runtime_notewakeup(&runtime_sched.sysmonnote); + } + runtime_unlock(&runtime_sched); + + while(p1) { + p = p1; + p1 = p1->link; + add = false; + newm(nil, p); + } + + if(add) { // If GC could have used another helper proc, start one now, // in the hope that it will be available next time. // It would have been even better to start it before the collection, @@ -1110,17 +860,8 @@ runtime_starttheworld(void) // coordinate. This lazy approach works out in practice: // we don't mind if the first couple gc rounds don't have quite // the maximum number of procs. - // canaddmcpu above did mcpu++ - // (necessary, because m will be doing various - // initialization work so is definitely running), - // but m is not running a specific goroutine, - // so set the helpgc flag as a signal to m's - // first schedule(nil) to mcpu-- and grunning--. - mp = runtime_newm(); - mp->helpgc = 1; - runtime_sched.grunning++; + newm(mhelpgc, nil); } - schedunlock(); } // Called to start an M. @@ -1167,10 +908,23 @@ runtime_mstart(void* mp) // Install signal handlers; after minit so that minit can // prepare the thread to be able to handle the signals. - if(m == &runtime_m0) + if(m == &runtime_m0) { runtime_initsig(); + if(runtime_iscgo) + runtime_newextram(); + } + + if(m->mstartfn) + m->mstartfn(); - schedule(nil); + if(m->helpgc) { + m->helpgc = 0; + stopm(); + } else if(m != &runtime_m0) { + acquirep(m->nextp); + m->nextp = nil; + } + schedule(); // TODO(brainman): This point is never reached, because scheduler // does not release os threads at the moment. But once this path @@ -1187,43 +941,17 @@ struct CgoThreadStart void (*fn)(void); }; -// Kick off new m's as needed (up to mcpumax). -// Sched is locked. -static void -matchmg(void) -{ - G *gp; - M *mp; - - if(m->mallocing || m->gcing) - return; - - while(haveg() && canaddmcpu()) { - gp = gget(); - if(gp == nil) - runtime_throw("gget inconsistency"); - - // Find the m that will run gp. - if((mp = mget(gp)) == nil) - mp = runtime_newm(); - mnextg(mp, gp); - } -} - -// Create a new m. It will start off with a call to runtime_mstart. +// Allocate a new m unassociated with any thread. +// Can use p for allocation context if needed. M* -runtime_newm(void) +runtime_allocm(P *p) { M *mp; - pthread_attr_t attr; - pthread_t tid; - size_t stacksize; - sigset_t clear; - sigset_t old; - int ret; + m->locks++; // disable GC because it can be called from sysmon + if(m->p == nil) + acquirep(p); // temporarily borrow p for mallocs in this function #if 0 - static const Type *mtype; // The Go type M if(mtype == nil) { Eface e; runtime_gc_m_ptr(&e); @@ -1235,112 +963,418 @@ runtime_newm(void) mcommoninit(mp); mp->g0 = runtime_malg(-1, nil, nil); - if(pthread_attr_init(&attr) != 0) - runtime_throw("pthread_attr_init"); - if(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0) - runtime_throw("pthread_attr_setdetachstate"); + if(p == m->p) + releasep(); + m->locks--; - stacksize = PTHREAD_STACK_MIN; + return mp; +} - // With glibc before version 2.16 the static TLS size is taken - // out of the stack size, and we get an error or a crash if - // there is not enough stack space left. Add it back in if we - // can, in case the program uses a lot of TLS space. FIXME: - // This can be disabled in glibc 2.16 and later, if the bug is - // indeed fixed then. - stacksize += tlssize; +static M* lockextra(bool nilokay); +static void unlockextra(M*); - if(pthread_attr_setstacksize(&attr, stacksize) != 0) - runtime_throw("pthread_attr_setstacksize"); +// needm is called when a cgo callback happens on a +// thread without an m (a thread not created by Go). +// In this case, needm is expected to find an m to use +// and return with m, g initialized correctly. +// Since m and g are not set now (likely nil, but see below) +// needm is limited in what routines it can call. In particular +// it can only call nosplit functions (textflag 7) and cannot +// do any scheduling that requires an m. +// +// In order to avoid needing heavy lifting here, we adopt +// the following strategy: there is a stack of available m's +// that can be stolen. Using compare-and-swap +// to pop from the stack has ABA races, so we simulate +// a lock by doing an exchange (via casp) to steal the stack +// head and replace the top pointer with MLOCKED (1). +// This serves as a simple spin lock that we can use even +// without an m. The thread that locks the stack in this way +// unlocks the stack by storing a valid stack head pointer. +// +// In order to make sure that there is always an m structure +// available to be stolen, we maintain the invariant that there +// is always one more than needed. At the beginning of the +// program (if cgo is in use) the list is seeded with a single m. +// If needm finds that it has taken the last m off the list, its job +// is - once it has installed its own m so that it can do things like +// allocate memory - to create a spare m and put it on the list. +// +// Each of these extra m's also has a g0 and a curg that are +// pressed into service as the scheduling stack and current +// goroutine for the duration of the cgo callback. +// +// When the callback is done with the m, it calls dropm to +// put the m back on the list. +void +runtime_needm(void) +{ + M *mp; - // Block signals during pthread_create so that the new thread - // starts with signals disabled. It will enable them in minit. - sigfillset(&clear); + // Lock extra list, take head, unlock popped list. + // nilokay=false is safe here because of the invariant above, + // that the extra list always contains or will soon contain + // at least one m. + mp = lockextra(false); + + // Set needextram when we've just emptied the list, + // so that the eventual call into cgocallbackg will + // allocate a new m for the extra list. We delay the + // allocation until then so that it can be done + // after exitsyscall makes sure it is okay to be + // running at all (that is, there's no garbage collection + // running right now). + mp->needextram = mp->schedlink == nil; + unlockextra(mp->schedlink); + + // Install m and g (= m->g0) and set the stack bounds + // to match the current stack. We don't actually know + // how big the stack is, like we don't know how big any + // scheduling stack is, but we assume there's at least 32 kB, + // which is more than enough for us. + runtime_setmg(mp, mp->g0); + + // We assume that the split stack support has been initialized + // for this new thread. + + // Initialize this thread to use the m. + runtime_minit(); +} -#ifdef SIGTRAP - // Blocking SIGTRAP reportedly breaks gdb on Alpha GNU/Linux. - sigdelset(&clear, SIGTRAP); -#endif +// newextram allocates an m and puts it on the extra list. +// It is called with a working local m, so that it can do things +// like call schedlock and allocate. +void +runtime_newextram(void) +{ + M *mp, *mnext; + G *gp; - sigemptyset(&old); - sigprocmask(SIG_BLOCK, &clear, &old); - ret = pthread_create(&tid, &attr, runtime_mstart, mp); - sigprocmask(SIG_SETMASK, &old, nil); + // Create extra goroutine locked to extra m. + // The goroutine is the context in which the cgo callback will run. + // The sched.pc will never be returned to, but setting it to + // runtime.goexit makes clear to the traceback routines where + // the goroutine stack ends. + mp = runtime_allocm(nil); + gp = runtime_malg(StackMin, nil, nil); + gp->status = Gsyscall; + mp->curg = gp; + mp->locked = LockInternal; + mp->lockedg = gp; + gp->lockedm = mp; + // put on allg for garbage collector + runtime_lock(&runtime_sched); + if(runtime_lastg == nil) + runtime_allg = gp; + else + runtime_lastg->alllink = gp; + runtime_lastg = gp; + runtime_unlock(&runtime_sched); + gp->goid = runtime_xadd64(&runtime_sched.goidgen, 1); - if (ret != 0) - runtime_throw("pthread_create"); + // Add m to the extra list. + mnext = lockextra(true); + mp->schedlink = mnext; + unlockextra(mp); +} - return mp; +// dropm is called when a cgo callback has called needm but is now +// done with the callback and returning back into the non-Go thread. +// It puts the current m back onto the extra list. +// +// The main expense here is the call to signalstack to release the +// m's signal stack, and then the call to needm on the next callback +// from this thread. It is tempting to try to save the m for next time, +// which would eliminate both these costs, but there might not be +// a next time: the current thread (which Go does not control) might exit. +// If we saved the m for that thread, there would be an m leak each time +// such a thread exited. Instead, we acquire and release an m on each +// call. These should typically not be scheduling operations, just a few +// atomics, so the cost should be small. +// +// TODO(rsc): An alternative would be to allocate a dummy pthread per-thread +// variable using pthread_key_create. Unlike the pthread keys we already use +// on OS X, this dummy key would never be read by Go code. It would exist +// only so that we could register at thread-exit-time destructor. +// That destructor would put the m back onto the extra list. +// This is purely a performance optimization. The current version, +// in which dropm happens on each cgo call, is still correct too. +// We may have to keep the current version on systems with cgo +// but without pthreads, like Windows. +void +runtime_dropm(void) +{ + M *mp, *mnext; + + // Undo whatever initialization minit did during needm. + runtime_unminit(); + + // Clear m and g, and return m to the extra list. + // After the call to setmg we can only call nosplit functions. + mp = m; + runtime_setmg(nil, nil); + + mnext = lockextra(true); + mp->schedlink = mnext; + unlockextra(mp); } -// One round of scheduler: find a goroutine and run it. -// The argument is the goroutine that was running before -// schedule was called, or nil if this is the first call. -// Never returns. -static void -schedule(G *gp) +#define MLOCKED ((M*)1) + +// lockextra locks the extra list and returns the list head. +// The caller must unlock the list by storing a new list head +// to runtime.extram. If nilokay is true, then lockextra will +// return a nil list head if that's what it finds. If nilokay is false, +// lockextra will keep waiting until the list head is no longer nil. +static M* +lockextra(bool nilokay) { - int32 hz; - uint32 v; + M *mp; + void (*yield)(void); - schedlock(); - if(gp != nil) { - // Just finished running gp. - gp->m = nil; - runtime_sched.grunning--; - - // atomic { mcpu-- } - v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift); - if(atomic_mcpu(v) > maxgomaxprocs) - runtime_throw("negative mcpu in scheduler"); - - switch(gp->status) { - case Grunnable: - case Gdead: - // Shouldn't have been running! - runtime_throw("bad gp->status in sched"); - case Grunning: - gp->status = Grunnable; - gput(gp); - break; - case Gmoribund: - if(raceenabled) - runtime_racegoend(gp->goid); - gp->status = Gdead; - if(gp->lockedm) { - gp->lockedm = nil; - m->lockedg = nil; - } - gp->idlem = nil; - runtime_memclr(&gp->context, sizeof gp->context); - gfput(gp); - if(--runtime_sched.gcount == 0) - runtime_exit(0); - break; + for(;;) { + mp = runtime_atomicloadp(&runtime_extram); + if(mp == MLOCKED) { + yield = runtime_osyield; + yield(); + continue; } - if(gp->readyonstop) { - gp->readyonstop = 0; - readylocked(gp); + if(mp == nil && !nilokay) { + runtime_usleep(1); + continue; } - } else if(m->helpgc) { - // Bootstrap m or new m started by starttheworld. - // atomic { mcpu-- } - v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift); - if(atomic_mcpu(v) > maxgomaxprocs) - runtime_throw("negative mcpu in scheduler"); - // Compensate for increment in starttheworld(). - runtime_sched.grunning--; + if(!runtime_casp(&runtime_extram, mp, MLOCKED)) { + yield = runtime_osyield; + yield(); + continue; + } + break; + } + return mp; +} + +static void +unlockextra(M *mp) +{ + runtime_atomicstorep(&runtime_extram, mp); +} + + +// Create a new m. It will start off with a call to fn, or else the scheduler. +static void +newm(void(*fn)(void), P *p) +{ + M *mp; + + mp = runtime_allocm(p); + mp->nextp = p; + mp->mstartfn = fn; + + runtime_newosproc(mp); +} + +// Stops execution of the current m until new work is available. +// Returns with acquired P. +static void +stopm(void) +{ + if(m->locks) + runtime_throw("stopm holding locks"); + if(m->p) + runtime_throw("stopm holding p"); + if(m->spinning) { + m->spinning = false; + runtime_xadd(&runtime_sched.nmspinning, -1); + } + +retry: + runtime_lock(&runtime_sched); + mput(m); + runtime_unlock(&runtime_sched); + runtime_notesleep(&m->park); + runtime_noteclear(&m->park); + if(m->helpgc) { + runtime_gchelper(); m->helpgc = 0; - } else if(m->nextg != nil) { - // New m started by matchmg. - } else { - runtime_throw("invalid m state in scheduler"); + m->mcache = nil; + goto retry; } + acquirep(m->nextp); + m->nextp = nil; +} + +static void +mspinning(void) +{ + m->spinning = true; +} + +// Schedules some M to run the p (creates an M if necessary). +// If p==nil, tries to get an idle P, if no idle P's returns false. +static void +startm(P *p, bool spinning) +{ + M *mp; + void (*fn)(void); - // Find (or wait for) g to run. Unlocks runtime_sched. - gp = nextgandunlock(); - gp->readyonstop = 0; + runtime_lock(&runtime_sched); + if(p == nil) { + p = pidleget(); + if(p == nil) { + runtime_unlock(&runtime_sched); + if(spinning) + runtime_xadd(&runtime_sched.nmspinning, -1); + return; + } + } + mp = mget(); + runtime_unlock(&runtime_sched); + if(mp == nil) { + fn = nil; + if(spinning) + fn = mspinning; + newm(fn, p); + return; + } + if(mp->spinning) + runtime_throw("startm: m is spinning"); + if(mp->nextp) + runtime_throw("startm: m has p"); + mp->spinning = spinning; + mp->nextp = p; + runtime_notewakeup(&mp->park); +} + +// Hands off P from syscall or locked M. +static void +handoffp(P *p) +{ + // if it has local work, start it straight away + if(p->runqhead != p->runqtail || runtime_sched.runqsize) { + startm(p, false); + return; + } + // no local work, check that there are no spinning/idle M's, + // otherwise our help is not required + if(runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) == 0 && // TODO: fast atomic + runtime_cas(&runtime_sched.nmspinning, 0, 1)) { + startm(p, true); + return; + } + runtime_lock(&runtime_sched); + if(runtime_gcwaiting) { + p->status = Pgcstop; + if(--runtime_sched.stopwait == 0) + runtime_notewakeup(&runtime_sched.stopnote); + runtime_unlock(&runtime_sched); + return; + } + if(runtime_sched.runqsize) { + runtime_unlock(&runtime_sched); + startm(p, false); + return; + } + // If this is the last running P and nobody is polling network, + // need to wakeup another M to poll network. + if(runtime_sched.npidle == (uint32)runtime_gomaxprocs-1 && runtime_atomicload64(&runtime_sched.lastpoll) != 0) { + runtime_unlock(&runtime_sched); + startm(p, false); + return; + } + pidleput(p); + runtime_unlock(&runtime_sched); +} + +// Tries to add one more P to execute G's. +// Called when a G is made runnable (newproc, ready). +static void +wakep(void) +{ + // be conservative about spinning threads + if(!runtime_cas(&runtime_sched.nmspinning, 0, 1)) + return; + startm(nil, true); +} + +// Stops execution of the current m that is locked to a g until the g is runnable again. +// Returns with acquired P. +static void +stoplockedm(void) +{ + P *p; + + if(m->lockedg == nil || m->lockedg->lockedm != m) + runtime_throw("stoplockedm: inconsistent locking"); + if(m->p) { + // Schedule another M to run this p. + p = releasep(); + handoffp(p); + } + inclocked(1); + // Wait until another thread schedules lockedg again. + runtime_notesleep(&m->park); + runtime_noteclear(&m->park); + if(m->lockedg->status != Grunnable) + runtime_throw("stoplockedm: not runnable"); + acquirep(m->nextp); + m->nextp = nil; +} + +// Schedules the locked m to run the locked gp. +static void +startlockedm(G *gp) +{ + M *mp; + P *p; + + mp = gp->lockedm; + if(mp == m) + runtime_throw("startlockedm: locked to me"); + if(mp->nextp) + runtime_throw("startlockedm: m has p"); + // directly handoff current P to the locked m + inclocked(-1); + p = releasep(); + mp->nextp = p; + runtime_notewakeup(&mp->park); + stopm(); +} + +// Stops the current m for stoptheworld. +// Returns when the world is restarted. +static void +gcstopm(void) +{ + P *p; + + if(!runtime_gcwaiting) + runtime_throw("gcstopm: not waiting for gc"); + if(m->spinning) { + m->spinning = false; + runtime_xadd(&runtime_sched.nmspinning, -1); + } + p = releasep(); + runtime_lock(&runtime_sched); + p->status = Pgcstop; + if(--runtime_sched.stopwait == 0) + runtime_notewakeup(&runtime_sched.stopnote); + runtime_unlock(&runtime_sched); + stopm(); +} + +// Schedules gp to run on the current M. +// Never returns. +static void +execute(G *gp) +{ + int32 hz; + + if(gp->status != Grunnable) { + runtime_printf("execute: bad g status %d\n", gp->status); + runtime_throw("execute: bad g status"); + } gp->status = Grunning; + m->p->tick++; m->curg = gp; gp->m = m; @@ -1352,30 +1386,261 @@ schedule(G *gp) runtime_gogo(gp); } -// Enter scheduler. If g->status is Grunning, -// re-queues g and runs everyone else who is waiting -// before running g again. If g->status is Gmoribund, -// kills off g. -void -runtime_gosched(void) +// Finds a runnable goroutine to execute. +// Tries to steal from other P's, get g from global queue, poll network. +static G* +findrunnable(void) +{ + G *gp; + P *p; + int32 i; + +top: + if(runtime_gcwaiting) { + gcstopm(); + goto top; + } + // local runq + gp = runqget(m->p); + if(gp) + return gp; + // global runq + if(runtime_sched.runqsize) { + runtime_lock(&runtime_sched); + gp = globrunqget(m->p); + runtime_unlock(&runtime_sched); + if(gp) + return gp; + } + // poll network + gp = runtime_netpoll(false); // non-blocking + if(gp) { + injectglist(gp->schedlink); + gp->status = Grunnable; + return gp; + } + // If number of spinning M's >= number of busy P's, block. + // This is necessary to prevent excessive CPU consumption + // when GOMAXPROCS>>1 but the program parallelism is low. + if(!m->spinning && 2 * runtime_atomicload(&runtime_sched.nmspinning) >= runtime_gomaxprocs - runtime_atomicload(&runtime_sched.npidle)) // TODO: fast atomic + goto stop; + if(!m->spinning) { + m->spinning = true; + runtime_xadd(&runtime_sched.nmspinning, 1); + } + // random steal from other P's + for(i = 0; i < 2*runtime_gomaxprocs; i++) { + if(runtime_gcwaiting) + goto top; + p = runtime_allp[runtime_fastrand1()%runtime_gomaxprocs]; + if(p == m->p) + gp = runqget(p); + else + gp = runqsteal(m->p, p); + if(gp) + return gp; + } +stop: + // return P and block + runtime_lock(&runtime_sched); + if(runtime_gcwaiting) { + runtime_unlock(&runtime_sched); + goto top; + } + if(runtime_sched.runqsize) { + gp = globrunqget(m->p); + runtime_unlock(&runtime_sched); + return gp; + } + p = releasep(); + pidleput(p); + runtime_unlock(&runtime_sched); + if(m->spinning) { + m->spinning = false; + runtime_xadd(&runtime_sched.nmspinning, -1); + } + // check all runqueues once again + for(i = 0; i < runtime_gomaxprocs; i++) { + p = runtime_allp[i]; + if(p && p->runqhead != p->runqtail) { + runtime_lock(&runtime_sched); + p = pidleget(); + runtime_unlock(&runtime_sched); + if(p) { + acquirep(p); + goto top; + } + break; + } + } + // poll network + if(runtime_xchg64(&runtime_sched.lastpoll, 0) != 0) { + if(m->p) + runtime_throw("findrunnable: netpoll with p"); + if(m->spinning) + runtime_throw("findrunnable: netpoll with spinning"); + gp = runtime_netpoll(true); // block until new work is available + runtime_atomicstore64(&runtime_sched.lastpoll, runtime_nanotime()); + if(gp) { + runtime_lock(&runtime_sched); + p = pidleget(); + runtime_unlock(&runtime_sched); + if(p) { + acquirep(p); + injectglist(gp->schedlink); + gp->status = Grunnable; + return gp; + } + injectglist(gp); + } + } + stopm(); + goto top; +} + +// Injects the list of runnable G's into the scheduler. +// Can run concurrently with GC. +static void +injectglist(G *glist) { - if(m->locks != 0) - runtime_throw("gosched holding locks"); - if(g == m->g0) - runtime_throw("gosched of g0"); - runtime_mcall(schedule); + int32 n; + G *gp; + + if(glist == nil) + return; + runtime_lock(&runtime_sched); + for(n = 0; glist; n++) { + gp = glist; + glist = gp->schedlink; + gp->status = Grunnable; + globrunqput(gp); + } + runtime_unlock(&runtime_sched); + + for(; n && runtime_sched.npidle; n--) + startm(nil, false); +} + +// One round of scheduler: find a runnable goroutine and execute it. +// Never returns. +static void +schedule(void) +{ + G *gp; + + if(m->locks) + runtime_throw("schedule: holding locks"); + +top: + if(runtime_gcwaiting) { + gcstopm(); + goto top; + } + + gp = runqget(m->p); + if(gp == nil) + gp = findrunnable(); + + if(m->spinning) { + m->spinning = false; + runtime_xadd(&runtime_sched.nmspinning, -1); + } + + // M wakeup policy is deliberately somewhat conservative (see nmspinning handling), + // so see if we need to wakeup another M here. + if (m->p->runqhead != m->p->runqtail && + runtime_atomicload(&runtime_sched.nmspinning) == 0 && + runtime_atomicload(&runtime_sched.npidle) > 0) // TODO: fast atomic + wakep(); + + if(gp->lockedm) { + startlockedm(gp); + goto top; + } + + execute(gp); } // Puts the current goroutine into a waiting state and unlocks the lock. // The goroutine can be made runnable again by calling runtime_ready(gp). void -runtime_park(void (*unlockf)(Lock*), Lock *lock, const char *reason) +runtime_park(void(*unlockf)(Lock*), Lock *lock, const char *reason) { - g->status = Gwaiting; + m->waitlock = lock; + m->waitunlockf = unlockf; g->waitreason = reason; - if(unlockf) - unlockf(lock); - runtime_gosched(); + runtime_mcall(park0); +} + +// runtime_park continuation on g0. +static void +park0(G *gp) +{ + gp->status = Gwaiting; + gp->m = nil; + m->curg = nil; + if(m->waitunlockf) { + m->waitunlockf(m->waitlock); + m->waitunlockf = nil; + m->waitlock = nil; + } + if(m->lockedg) { + stoplockedm(); + execute(gp); // Never returns. + } + schedule(); +} + +// Scheduler yield. +void +runtime_gosched(void) +{ + runtime_mcall(gosched0); +} + +// runtime_gosched continuation on g0. +static void +gosched0(G *gp) +{ + gp->status = Grunnable; + gp->m = nil; + m->curg = nil; + runtime_lock(&runtime_sched); + globrunqput(gp); + runtime_unlock(&runtime_sched); + if(m->lockedg) { + stoplockedm(); + execute(gp); // Never returns. + } + schedule(); +} + +// Finishes execution of the current goroutine. +void +runtime_goexit(void) +{ + if(raceenabled) + runtime_racegoend(); + runtime_mcall(goexit0); +} + +// runtime_goexit continuation on g0. +static void +goexit0(G *gp) +{ + gp->status = Gdead; + gp->entry = nil; + gp->m = nil; + gp->lockedm = nil; + m->curg = nil; + m->lockedg = nil; + if(m->locked & ~LockExternal) { + runtime_printf("invalid m->locked = %d", m->locked); + runtime_throw("internal lockOSThread error"); + } + m->locked = 0; + gfput(m->p, gp); + schedule(); } // The goroutine g is about to enter a system call. @@ -1386,17 +1651,12 @@ runtime_park(void (*unlockf)(Lock*), Lock *lock, const char *reason) // Entersyscall cannot split the stack: the runtime_gosave must // make g->sched refer to the caller's stack segment, because // entersyscall is going to return immediately after. -// It's okay to call matchmg and notewakeup even after -// decrementing mcpu, because we haven't released the -// sched lock yet, so the garbage collector cannot be running. void runtime_entersyscall(void) __attribute__ ((no_split_stack)); void -runtime_entersyscall(void) +runtime_entersyscall() { - uint32 v; - if(m->profilehz > 0) runtime_setprof(false); @@ -1415,30 +1675,57 @@ runtime_entersyscall(void) g->status = Gsyscall; - // Fast path. - // The slow path inside the schedlock/schedunlock will get - // through without stopping if it does: - // mcpu-- - // gwait not true - // waitstop && mcpu <= mcpumax not true - // If we can do the same with a single atomic add, - // then we can skip the locks. - v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift); - if(!atomic_gwaiting(v) && (!atomic_waitstop(v) || atomic_mcpu(v) > atomic_mcpumax(v))) - return; - - schedlock(); - v = runtime_atomicload(&runtime_sched.atomic); - if(atomic_gwaiting(v)) { - matchmg(); - v = runtime_atomicload(&runtime_sched.atomic); + if(runtime_atomicload(&runtime_sched.sysmonwait)) { // TODO: fast atomic + runtime_lock(&runtime_sched); + if(runtime_atomicload(&runtime_sched.sysmonwait)) { + runtime_atomicstore(&runtime_sched.sysmonwait, 0); + runtime_notewakeup(&runtime_sched.sysmonnote); + } + runtime_unlock(&runtime_sched); } - if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) { - runtime_xadd(&runtime_sched.atomic, -1<<waitstopShift); - runtime_notewakeup(&runtime_sched.stopped); + + m->mcache = nil; + m->p->tick++; + m->p->m = nil; + runtime_atomicstore(&m->p->status, Psyscall); + if(runtime_gcwaiting) { + runtime_lock(&runtime_sched); + if (runtime_sched.stopwait > 0 && runtime_cas(&m->p->status, Psyscall, Pgcstop)) { + if(--runtime_sched.stopwait == 0) + runtime_notewakeup(&runtime_sched.stopnote); + } + runtime_unlock(&runtime_sched); } +} + +// The same as runtime_entersyscall(), but with a hint that the syscall is blocking. +void +runtime_entersyscallblock(void) +{ + P *p; + + if(m->profilehz > 0) + runtime_setprof(false); + + // Leave SP around for gc and traceback. +#ifdef USING_SPLIT_STACK + g->gcstack = __splitstack_find(nil, nil, &g->gcstack_size, + &g->gcnext_segment, &g->gcnext_sp, + &g->gcinitial_sp); +#else + g->gcnext_sp = (byte *) &v; +#endif + + // Save the registers in the g structure so that any pointers + // held in registers will be seen by the garbage collector. + getcontext(&g->gcregs); - schedunlock(); + g->status = Gsyscall; + + p = releasep(); + handoffp(p); + if(g->isbackground) // do not consider blocked scavenger for deadlock detection + inclocked(1); } // The goroutine g exited its system call. @@ -1449,46 +1736,53 @@ void runtime_exitsyscall(void) { G *gp; - uint32 v; - - // Fast path. - // If we can do the mcpu++ bookkeeping and - // find that we still have mcpu <= mcpumax, then we can - // start executing Go code immediately, without having to - // schedlock/schedunlock. - // Also do fast return if any locks are held, so that - // panic code can use syscalls to open a file. + P *p; + + // Check whether the profiler needs to be turned on. + if(m->profilehz > 0) + runtime_setprof(true); + gp = g; - v = runtime_xadd(&runtime_sched.atomic, (1<<mcpuShift)); - if((m->profilehz == runtime_sched.profilehz && atomic_mcpu(v) <= atomic_mcpumax(v)) || m->locks > 0) { + // Try to re-acquire the last P. + if(m->p && m->p->status == Psyscall && runtime_cas(&m->p->status, Psyscall, Prunning)) { // There's a cpu for us, so we can run. + m->mcache = m->p->mcache; + m->p->m = m; + m->p->tick++; gp->status = Grunning; // Garbage collector isn't running (since we are), - // so okay to clear gcstack. + // so okay to clear gcstack and gcsp. #ifdef USING_SPLIT_STACK gp->gcstack = nil; #endif gp->gcnext_sp = nil; runtime_memclr(&gp->gcregs, sizeof gp->gcregs); - - if(m->profilehz > 0) - runtime_setprof(true); return; } - // Tell scheduler to put g back on the run queue: - // mostly equivalent to g->status = Grunning, - // but keeps the garbage collector from thinking - // that g is running right now, which it's not. - gp->readyonstop = 1; + if(gp->isbackground) // do not consider blocked scavenger for deadlock detection + inclocked(-1); + // Try to get any other idle P. + m->p = nil; + if(runtime_sched.pidle) { + runtime_lock(&runtime_sched); + p = pidleget(); + runtime_unlock(&runtime_sched); + if(p) { + acquirep(p); +#ifdef USING_SPLIT_STACK + gp->gcstack = nil; +#endif + gp->gcnext_sp = nil; + runtime_memclr(&gp->gcregs, sizeof gp->gcregs); + return; + } + } - // All the cpus are taken. - // The scheduler will ready g and put this m to sleep. - // When the scheduler takes g away from m, - // it will undo the runtime_sched.mcpu++ above. - runtime_gosched(); + // Call the scheduler. + runtime_mcall(exitsyscall0); - // Gosched returned, so we're allowed to run now. + // Scheduler returned, so we're allowed to run now. // Delete the gcstack information that we left for // the garbage collector during the system call. // Must wait until now because until gosched returns @@ -1501,6 +1795,34 @@ runtime_exitsyscall(void) runtime_memclr(&gp->gcregs, sizeof gp->gcregs); } +// runtime_exitsyscall slow path on g0. +// Failed to acquire P, enqueue gp as runnable. +static void +exitsyscall0(G *gp) +{ + P *p; + + gp->status = Grunnable; + gp->m = nil; + m->curg = nil; + runtime_lock(&runtime_sched); + p = pidleget(); + if(p == nil) + globrunqput(gp); + runtime_unlock(&runtime_sched); + if(p) { + acquirep(p); + execute(gp); // Never returns. + } + if(m->lockedg) { + // Wait until another thread schedules gp and so m again. + stoplockedm(); + execute(gp); // Never returns. + } + stopm(); + schedule(); // Never returns. +} + // Allocate a new g, with a stack big enough for stacksize bytes. G* runtime_malg(int32 stacksize, byte** ret_stack, size_t* ret_stacksize) @@ -1554,15 +1876,10 @@ __go_go(void (*fn)(void*), void* arg) byte *sp; size_t spsize; G *newg; - int64 goid; - - goid = runtime_xadd64((uint64*)&runtime_sched.goidgen, 1); - if(raceenabled) - runtime_racegostart(goid, runtime_getcallerpc(&fn)); - schedlock(); + m->locks++; // disable preemption because it can be holding p in a local var - if((newg = gfget()) != nil) { + if((newg = gfget(m->p)) != nil) { #ifdef USING_SPLIT_STACK int dont_block_signals = 0; @@ -1579,24 +1896,20 @@ __go_go(void (*fn)(void*), void* arg) #endif } else { newg = runtime_malg(StackMin, &sp, &spsize); + runtime_lock(&runtime_sched); if(runtime_lastg == nil) runtime_allg = newg; else runtime_lastg->alllink = newg; runtime_lastg = newg; + runtime_unlock(&runtime_sched); } - newg->status = Gwaiting; - newg->waitreason = "new goroutine"; newg->entry = (byte*)fn; newg->param = arg; newg->gopc = (uintptr)__builtin_return_address(0); - - runtime_sched.gcount++; - newg->goid = goid; - - if(sp == nil) - runtime_throw("nil g->stack0"); + newg->status = Grunnable; + newg->goid = runtime_xadd64(&runtime_sched.goidgen, 1); { // Avoid warnings about variables clobbered by @@ -1613,33 +1926,87 @@ __go_go(void (*fn)(void*), void* arg) vnewg->context.uc_stack.ss_size = vspsize; makecontext(&vnewg->context, kickoff, 0); - newprocreadylocked(vnewg); - schedunlock(); + runqput(m->p, vnewg); + if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0 && fn != runtime_main) // TODO: fast atomic + wakep(); + m->locks--; return vnewg; } } -// Put on gfree list. Sched must be locked. +// Put on gfree list. +// If local list is too long, transfer a batch to the global list. static void -gfput(G *gp) -{ - gp->schedlink = runtime_sched.gfree; - runtime_sched.gfree = gp; +gfput(P *p, G *gp) +{ + gp->schedlink = p->gfree; + p->gfree = gp; + p->gfreecnt++; + if(p->gfreecnt >= 64) { + runtime_lock(&runtime_sched.gflock); + while(p->gfreecnt >= 32) { + p->gfreecnt--; + gp = p->gfree; + p->gfree = gp->schedlink; + gp->schedlink = runtime_sched.gfree; + runtime_sched.gfree = gp; + } + runtime_unlock(&runtime_sched.gflock); + } } -// Get from gfree list. Sched must be locked. +// Get from gfree list. +// If local list is empty, grab a batch from global list. static G* -gfget(void) +gfget(P *p) { G *gp; - gp = runtime_sched.gfree; - if(gp) - runtime_sched.gfree = gp->schedlink; +retry: + gp = p->gfree; + if(gp == nil && runtime_sched.gfree) { + runtime_lock(&runtime_sched.gflock); + while(p->gfreecnt < 32 && runtime_sched.gfree) { + p->gfreecnt++; + gp = runtime_sched.gfree; + runtime_sched.gfree = gp->schedlink; + gp->schedlink = p->gfree; + p->gfree = gp; + } + runtime_unlock(&runtime_sched.gflock); + goto retry; + } + if(gp) { + p->gfree = gp->schedlink; + p->gfreecnt--; + } return gp; } +// Purge all cached G's from gfree list to the global list. +static void +gfpurge(P *p) +{ + G *gp; + + runtime_lock(&runtime_sched.gflock); + while(p->gfreecnt) { + p->gfreecnt--; + gp = p->gfree; + p->gfree = gp->schedlink; + gp->schedlink = runtime_sched.gfree; + runtime_sched.gfree = gp; + } + runtime_unlock(&runtime_sched.gflock); +} + +void +runtime_Breakpoint(void) +{ + runtime_breakpoint(); +} + void runtime_Gosched (void) __asm__ (GOSYM_PREFIX "runtime.Gosched"); void @@ -1649,67 +2016,82 @@ runtime_Gosched(void) } // Implementation of runtime.GOMAXPROCS. -// delete when scheduler is stronger +// delete when scheduler is even stronger int32 runtime_gomaxprocsfunc(int32 n) { int32 ret; - uint32 v; - schedlock(); + if(n > MaxGomaxprocs) + n = MaxGomaxprocs; + runtime_lock(&runtime_sched); ret = runtime_gomaxprocs; - if(n <= 0) - n = ret; - if(n > maxgomaxprocs) - n = maxgomaxprocs; - runtime_gomaxprocs = n; - if(runtime_gomaxprocs > 1) - runtime_singleproc = false; - if(runtime_gcwaiting != 0) { - if(atomic_mcpumax(runtime_sched.atomic) != 1) - runtime_throw("invalid mcpumax during gc"); - schedunlock(); + if(n <= 0 || n == ret) { + runtime_unlock(&runtime_sched); return ret; } + runtime_unlock(&runtime_sched); - setmcpumax(n); + runtime_semacquire(&runtime_worldsema); + m->gcing = 1; + runtime_stoptheworld(); + newprocs = n; + m->gcing = 0; + runtime_semrelease(&runtime_worldsema); + runtime_starttheworld(); - // If there are now fewer allowed procs - // than procs running, stop. - v = runtime_atomicload(&runtime_sched.atomic); - if((int32)atomic_mcpu(v) > n) { - schedunlock(); - runtime_gosched(); - return ret; - } - // handle more procs - matchmg(); - schedunlock(); return ret; } -void -runtime_LockOSThread(void) +static void +LockOSThread(void) { - if(m == &runtime_m0 && runtime_sched.init) { - runtime_sched.lockmain = true; - return; - } m->lockedg = g; g->lockedm = m; } +void runtime_LockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.LockOSThread"); void -runtime_UnlockOSThread(void) +runtime_LockOSThread(void) { - if(m == &runtime_m0 && runtime_sched.init) { - runtime_sched.lockmain = false; + m->locked |= LockExternal; + LockOSThread(); +} + +void +runtime_lockOSThread(void) +{ + m->locked += LockInternal; + LockOSThread(); +} + +static void +UnlockOSThread(void) +{ + if(m->locked != 0) return; - } m->lockedg = nil; g->lockedm = nil; } +void runtime_UnlockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.UnlockOSThread"); + +void +runtime_UnlockOSThread(void) +{ + m->locked &= ~LockExternal; + UnlockOSThread(); +} + +void +runtime_unlockOSThread(void) +{ + if(m->locked < LockInternal) + runtime_throw("runtime: internal error: misuse of lockOSThread/unlockOSThread"); + m->locked -= LockInternal; + UnlockOSThread(); +} + bool runtime_lockedOSThread(void) { @@ -1740,13 +2122,28 @@ intgo runtime_NumGoroutine (void) intgo runtime_NumGoroutine() { - return runtime_sched.gcount; + return runtime_gcount(); } int32 runtime_gcount(void) { - return runtime_sched.gcount; + G *gp; + int32 n, s; + + n = 0; + runtime_lock(&runtime_sched); + // TODO(dvyukov): runtime.NumGoroutine() is O(N). + // We do not want to increment/decrement centralized counter in newproc/goexit, + // just to make runtime.NumGoroutine() faster. + // Compromise solution is to introduce per-P counters of active goroutines. + for(gp = runtime_allg; gp; gp = gp->alllink) { + s = gp->status; + if(s == Grunnable || s == Grunning || s == Gsyscall || s == Gwaiting) + n++; + } + runtime_unlock(&runtime_sched); + return n; } int32 @@ -1769,6 +2166,9 @@ runtime_sigprof() { int32 n, i; + // Windows does profiling in a dedicated thread w/o m. + if(!Windows && (m == nil || m->mcache == nil)) + return; if(prof.fn == nil || prof.hz == 0) return; @@ -1813,3 +2213,555 @@ runtime_setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz) if(hz != 0) runtime_resetcpuprofiler(hz); } + +// Change number of processors. The world is stopped, sched is locked. +static void +procresize(int32 new) +{ + int32 i, old; + G *gp; + P *p; + + old = runtime_gomaxprocs; + if(old < 0 || old > MaxGomaxprocs || new <= 0 || new >MaxGomaxprocs) + runtime_throw("procresize: invalid arg"); + // initialize new P's + for(i = 0; i < new; i++) { + p = runtime_allp[i]; + if(p == nil) { + p = (P*)runtime_mallocgc(sizeof(*p), 0, 0, 1); + p->status = Pgcstop; + runtime_atomicstorep(&runtime_allp[i], p); + } + if(p->mcache == nil) { + if(old==0 && i==0) + p->mcache = m->mcache; // bootstrap + else + p->mcache = runtime_allocmcache(); + } + if(p->runq == nil) { + p->runqsize = 128; + p->runq = (G**)runtime_mallocgc(p->runqsize*sizeof(G*), 0, 0, 1); + } + } + + // redistribute runnable G's evenly + for(i = 0; i < old; i++) { + p = runtime_allp[i]; + while((gp = runqget(p)) != nil) + globrunqput(gp); + } + // start at 1 because current M already executes some G and will acquire allp[0] below, + // so if we have a spare G we want to put it into allp[1]. + for(i = 1; runtime_sched.runqhead; i++) { + gp = runtime_sched.runqhead; + runtime_sched.runqhead = gp->schedlink; + runqput(runtime_allp[i%new], gp); + } + runtime_sched.runqtail = nil; + runtime_sched.runqsize = 0; + + // free unused P's + for(i = new; i < old; i++) { + p = runtime_allp[i]; + runtime_freemcache(p->mcache); + p->mcache = nil; + gfpurge(p); + p->status = Pdead; + // can't free P itself because it can be referenced by an M in syscall + } + + if(m->p) + m->p->m = nil; + m->p = nil; + m->mcache = nil; + p = runtime_allp[0]; + p->m = nil; + p->status = Pidle; + acquirep(p); + for(i = new-1; i > 0; i--) { + p = runtime_allp[i]; + p->status = Pidle; + pidleput(p); + } + runtime_singleproc = new == 1; + runtime_atomicstore((uint32*)&runtime_gomaxprocs, new); +} + +// Associate p and the current m. +static void +acquirep(P *p) +{ + if(m->p || m->mcache) + runtime_throw("acquirep: already in go"); + if(p->m || p->status != Pidle) { + runtime_printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? p->m->id : 0, p->status); + runtime_throw("acquirep: invalid p state"); + } + m->mcache = p->mcache; + m->p = p; + p->m = m; + p->status = Prunning; +} + +// Disassociate p and the current m. +static P* +releasep(void) +{ + P *p; + + if(m->p == nil || m->mcache == nil) + runtime_throw("releasep: invalid arg"); + p = m->p; + if(p->m != m || p->mcache != m->mcache || p->status != Prunning) { + runtime_printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n", + m, m->p, p->m, m->mcache, p->mcache, p->status); + runtime_throw("releasep: invalid p state"); + } + m->p = nil; + m->mcache = nil; + p->m = nil; + p->status = Pidle; + return p; +} + +static void +inclocked(int32 v) +{ + runtime_lock(&runtime_sched); + runtime_sched.mlocked += v; + if(v > 0) + checkdead(); + runtime_unlock(&runtime_sched); +} + +// Check for deadlock situation. +// The check is based on number of running M's, if 0 -> deadlock. +static void +checkdead(void) +{ + G *gp; + int32 run, grunning, s; + + // -1 for sysmon + run = runtime_sched.mcount - runtime_sched.nmidle - runtime_sched.mlocked - 1; + if(run > 0) + return; + if(run < 0) { + runtime_printf("checkdead: nmidle=%d mlocked=%d mcount=%d\n", + runtime_sched.nmidle, runtime_sched.mlocked, runtime_sched.mcount); + runtime_throw("checkdead: inconsistent counts"); + } + grunning = 0; + for(gp = runtime_allg; gp; gp = gp->alllink) { + if(gp->isbackground) + continue; + s = gp->status; + if(s == Gwaiting) + grunning++; + else if(s == Grunnable || s == Grunning || s == Gsyscall) { + runtime_printf("checkdead: find g %D in status %d\n", gp->goid, s); + runtime_throw("checkdead: runnable g"); + } + } + if(grunning == 0) // possible if main goroutine calls runtime_Goexit() + runtime_exit(0); + m->throwing = -1; // do not dump full stacks + runtime_throw("all goroutines are asleep - deadlock!"); +} + +static void +sysmon(void) +{ + uint32 idle, delay; + int64 now, lastpoll; + G *gp; + uint32 ticks[MaxGomaxprocs]; + + idle = 0; // how many cycles in succession we had not wokeup somebody + delay = 0; + for(;;) { + if(idle == 0) // start with 20us sleep... + delay = 20; + else if(idle > 50) // start doubling the sleep after 1ms... + delay *= 2; + if(delay > 10*1000) // up to 10ms + delay = 10*1000; + runtime_usleep(delay); + if(runtime_gcwaiting || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs) { // TODO: fast atomic + runtime_lock(&runtime_sched); + if(runtime_atomicload(&runtime_gcwaiting) || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs) { + runtime_atomicstore(&runtime_sched.sysmonwait, 1); + runtime_unlock(&runtime_sched); + runtime_notesleep(&runtime_sched.sysmonnote); + runtime_noteclear(&runtime_sched.sysmonnote); + idle = 0; + delay = 20; + } else + runtime_unlock(&runtime_sched); + } + // poll network if not polled for more than 10ms + lastpoll = runtime_atomicload64(&runtime_sched.lastpoll); + now = runtime_nanotime(); + if(lastpoll != 0 && lastpoll + 10*1000*1000 > now) { + gp = runtime_netpoll(false); // non-blocking + injectglist(gp); + } + // retake P's blocked in syscalls + if(retake(ticks)) + idle = 0; + else + idle++; + } +} + +static uint32 +retake(uint32 *ticks) +{ + uint32 i, s, n; + int64 t; + P *p; + + n = 0; + for(i = 0; i < (uint32)runtime_gomaxprocs; i++) { + p = runtime_allp[i]; + if(p==nil) + continue; + t = p->tick; + if(ticks[i] != t) { + ticks[i] = t; + continue; + } + s = p->status; + if(s != Psyscall) + continue; + if(p->runqhead == p->runqtail && runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) > 0) // TODO: fast atomic + continue; + // Need to increment number of locked M's before the CAS. + // Otherwise the M from which we retake can exit the syscall, + // increment nmidle and report deadlock. + inclocked(-1); + if(runtime_cas(&p->status, s, Pidle)) { + n++; + handoffp(p); + } + inclocked(1); + } + return n; +} + +// Put mp on midle list. +// Sched must be locked. +static void +mput(M *mp) +{ + mp->schedlink = runtime_sched.midle; + runtime_sched.midle = mp; + runtime_sched.nmidle++; + checkdead(); +} + +// Try to get an m from midle list. +// Sched must be locked. +static M* +mget(void) +{ + M *mp; + + if((mp = runtime_sched.midle) != nil){ + runtime_sched.midle = mp->schedlink; + runtime_sched.nmidle--; + } + return mp; +} + +// Put gp on the global runnable queue. +// Sched must be locked. +static void +globrunqput(G *gp) +{ + gp->schedlink = nil; + if(runtime_sched.runqtail) + runtime_sched.runqtail->schedlink = gp; + else + runtime_sched.runqhead = gp; + runtime_sched.runqtail = gp; + runtime_sched.runqsize++; +} + +// Try get a batch of G's from the global runnable queue. +// Sched must be locked. +static G* +globrunqget(P *p) +{ + G *gp, *gp1; + int32 n; + + if(runtime_sched.runqsize == 0) + return nil; + n = runtime_sched.runqsize/runtime_gomaxprocs+1; + if(n > runtime_sched.runqsize) + n = runtime_sched.runqsize; + runtime_sched.runqsize -= n; + if(runtime_sched.runqsize == 0) + runtime_sched.runqtail = nil; + gp = runtime_sched.runqhead; + runtime_sched.runqhead = gp->schedlink; + n--; + while(n--) { + gp1 = runtime_sched.runqhead; + runtime_sched.runqhead = gp1->schedlink; + runqput(p, gp1); + } + return gp; +} + +// Put p to on pidle list. +// Sched must be locked. +static void +pidleput(P *p) +{ + p->link = runtime_sched.pidle; + runtime_sched.pidle = p; + runtime_xadd(&runtime_sched.npidle, 1); // TODO: fast atomic +} + +// Try get a p from pidle list. +// Sched must be locked. +static P* +pidleget(void) +{ + P *p; + + p = runtime_sched.pidle; + if(p) { + runtime_sched.pidle = p->link; + runtime_xadd(&runtime_sched.npidle, -1); // TODO: fast atomic + } + return p; +} + +// Put g on local runnable queue. +// TODO(dvyukov): consider using lock-free queue. +static void +runqput(P *p, G *gp) +{ + int32 h, t, s; + + runtime_lock(p); +retry: + h = p->runqhead; + t = p->runqtail; + s = p->runqsize; + if(t == h-1 || (h == 0 && t == s-1)) { + runqgrow(p); + goto retry; + } + p->runq[t++] = gp; + if(t == s) + t = 0; + p->runqtail = t; + runtime_unlock(p); +} + +// Get g from local runnable queue. +static G* +runqget(P *p) +{ + G *gp; + int32 t, h, s; + + if(p->runqhead == p->runqtail) + return nil; + runtime_lock(p); + h = p->runqhead; + t = p->runqtail; + s = p->runqsize; + if(t == h) { + runtime_unlock(p); + return nil; + } + gp = p->runq[h++]; + if(h == s) + h = 0; + p->runqhead = h; + runtime_unlock(p); + return gp; +} + +// Grow local runnable queue. +// TODO(dvyukov): consider using fixed-size array +// and transfer excess to the global list (local queue can grow way too big). +static void +runqgrow(P *p) +{ + G **q; + int32 s, t, h, t2; + + h = p->runqhead; + t = p->runqtail; + s = p->runqsize; + t2 = 0; + q = runtime_malloc(2*s*sizeof(*q)); + while(t != h) { + q[t2++] = p->runq[h++]; + if(h == s) + h = 0; + } + runtime_free(p->runq); + p->runq = q; + p->runqhead = 0; + p->runqtail = t2; + p->runqsize = 2*s; +} + +// Steal half of elements from local runnable queue of p2 +// and put onto local runnable queue of p. +// Returns one of the stolen elements (or nil if failed). +static G* +runqsteal(P *p, P *p2) +{ + G *gp, *gp1; + int32 t, h, s, t2, h2, s2, c, i; + + if(p2->runqhead == p2->runqtail) + return nil; + // sort locks to prevent deadlocks + if(p < p2) + runtime_lock(p); + runtime_lock(p2); + if(p2->runqhead == p2->runqtail) { + runtime_unlock(p2); + if(p < p2) + runtime_unlock(p); + return nil; + } + if(p >= p2) + runtime_lock(p); + // now we've locked both queues and know the victim is not empty + h = p->runqhead; + t = p->runqtail; + s = p->runqsize; + h2 = p2->runqhead; + t2 = p2->runqtail; + s2 = p2->runqsize; + gp = p2->runq[h2++]; // return value + if(h2 == s2) + h2 = 0; + // steal roughly half + if(t2 > h2) + c = (t2 - h2) / 2; + else + c = (s2 - h2 + t2) / 2; + // copy + for(i = 0; i != c; i++) { + // the target queue is full? + if(t == h-1 || (h == 0 && t == s-1)) + break; + // the victim queue is empty? + if(t2 == h2) + break; + gp1 = p2->runq[h2++]; + if(h2 == s2) + h2 = 0; + p->runq[t++] = gp1; + if(t == s) + t = 0; + } + p->runqtail = t; + p2->runqhead = h2; + runtime_unlock(p2); + runtime_unlock(p); + return gp; +} + +void runtime_testSchedLocalQueue(void) + __asm__("runtime.testSchedLocalQueue"); + +void +runtime_testSchedLocalQueue(void) +{ + P p; + G gs[1000]; + int32 i, j; + + runtime_memclr((byte*)&p, sizeof(p)); + p.runqsize = 1; + p.runqhead = 0; + p.runqtail = 0; + p.runq = runtime_malloc(p.runqsize*sizeof(*p.runq)); + + for(i = 0; i < (int32)nelem(gs); i++) { + if(runqget(&p) != nil) + runtime_throw("runq is not empty initially"); + for(j = 0; j < i; j++) + runqput(&p, &gs[i]); + for(j = 0; j < i; j++) { + if(runqget(&p) != &gs[i]) { + runtime_printf("bad element at iter %d/%d\n", i, j); + runtime_throw("bad element"); + } + } + if(runqget(&p) != nil) + runtime_throw("runq is not empty afterwards"); + } +} + +void runtime_testSchedLocalQueueSteal(void) + __asm__("runtime.testSchedLocalQueueSteal"); + +void +runtime_testSchedLocalQueueSteal(void) +{ + P p1, p2; + G gs[1000], *gp; + int32 i, j, s; + + runtime_memclr((byte*)&p1, sizeof(p1)); + p1.runqsize = 1; + p1.runqhead = 0; + p1.runqtail = 0; + p1.runq = runtime_malloc(p1.runqsize*sizeof(*p1.runq)); + + runtime_memclr((byte*)&p2, sizeof(p2)); + p2.runqsize = nelem(gs); + p2.runqhead = 0; + p2.runqtail = 0; + p2.runq = runtime_malloc(p2.runqsize*sizeof(*p2.runq)); + + for(i = 0; i < (int32)nelem(gs); i++) { + for(j = 0; j < i; j++) { + gs[j].sig = 0; + runqput(&p1, &gs[j]); + } + gp = runqsteal(&p2, &p1); + s = 0; + if(gp) { + s++; + gp->sig++; + } + while((gp = runqget(&p2)) != nil) { + s++; + gp->sig++; + } + while((gp = runqget(&p1)) != nil) + gp->sig++; + for(j = 0; j < i; j++) { + if(gs[j].sig != 1) { + runtime_printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i); + runtime_throw("bad element"); + } + } + if(s != i/2 && s != i/2+1) { + runtime_printf("bad steal %d, want %d or %d, iter %d\n", + s, i/2, i/2+1, i); + runtime_throw("bad steal"); + } + } +} + +void +runtime_proc_scan(void (*addroot)(Obj)) +{ + addroot((Obj){(byte*)&runtime_sched, sizeof runtime_sched, 0}); +} diff --git a/libgo/runtime/race.h b/libgo/runtime/race.h index 9f3b3ec..3357bed 100644 --- a/libgo/runtime/race.h +++ b/libgo/runtime/race.h @@ -11,17 +11,19 @@ enum { raceenabled = 0 }; #endif // Initialize race detection subsystem. -void runtime_raceinit(void); +uintptr runtime_raceinit(void); // Finalize race detection subsystem, does not return. void runtime_racefini(void); void runtime_racemapshadow(void *addr, uintptr size); void runtime_racemalloc(void *p, uintptr sz, void *pc); void runtime_racefree(void *p); -void runtime_racegostart(int32 goid, void *pc); -void runtime_racegoend(int32 goid); +uintptr runtime_racegostart(void *pc); +void runtime_racegoend(void); void runtime_racewritepc(void *addr, void *callpc, void *pc); void runtime_racereadpc(void *addr, void *callpc, void *pc); +void runtime_racewriterangepc(void *addr, uintptr sz, uintptr step, void *callpc, void *pc); +void runtime_racereadrangepc(void *addr, uintptr sz, uintptr step, void *callpc, void *pc); void runtime_racefingo(void); void runtime_raceacquire(void *addr); void runtime_raceacquireg(G *gp, void *addr); diff --git a/libgo/runtime/runtime.c b/libgo/runtime/runtime.c index 48ece55..138e5af 100644 --- a/libgo/runtime/runtime.c +++ b/libgo/runtime/runtime.c @@ -10,14 +10,27 @@ #include "array.h" #include "go-panic.h" +// The GOTRACEBACK environment variable controls the +// behavior of a Go program that is crashing and exiting. +// GOTRACEBACK=0 suppress all tracebacks +// GOTRACEBACK=1 default behavior - show tracebacks but exclude runtime frames +// GOTRACEBACK=2 show tracebacks including runtime frames +// GOTRACEBACK=crash show tracebacks including runtime frames, then crash (core dump etc) int32 -runtime_gotraceback(void) +runtime_gotraceback(bool *crash) { const byte *p; + if(crash != nil) + *crash = false; p = runtime_getenv("GOTRACEBACK"); if(p == nil || p[0] == '\0') return 1; // default is on + if(runtime_strcmp((const char *)p, "crash") == 0) { + if(crash != nil) + *crash = true; + return 2; // extra information + } return runtime_atoi(p); } @@ -44,6 +57,11 @@ runtime_progname() return argc == 0 ? nil : argv[0]; } +// Information about what cpu features are available. +// Set on startup in asm_{x86/amd64}.s. +uint32 runtime_cpuid_ecx; +uint32 runtime_cpuid_edx; + void runtime_goargs(void) { @@ -90,6 +108,52 @@ runtime_atoi(const byte *p) return n; } +static struct root_list runtime_roots = +{ nil, + { { &syscall_Envs, sizeof syscall_Envs }, + { &os_Args, sizeof os_Args }, + { nil, 0 } }, +}; + +static void +TestAtomic64(void) +{ + uint64 z64, x64; + + z64 = 42; + x64 = 0; + PREFETCH(&z64); + if(runtime_cas64(&z64, &x64, 1)) + runtime_throw("cas64 failed"); + if(x64 != 42) + runtime_throw("cas64 failed"); + if(!runtime_cas64(&z64, &x64, 1)) + runtime_throw("cas64 failed"); + if(x64 != 42 || z64 != 1) + runtime_throw("cas64 failed"); + if(runtime_atomicload64(&z64) != 1) + runtime_throw("load64 failed"); + runtime_atomicstore64(&z64, (1ull<<40)+1); + if(runtime_atomicload64(&z64) != (1ull<<40)+1) + runtime_throw("store64 failed"); + if(runtime_xadd64(&z64, (1ull<<40)+1) != (2ull<<40)+2) + runtime_throw("xadd64 failed"); + if(runtime_atomicload64(&z64) != (2ull<<40)+2) + runtime_throw("xadd64 failed"); + if(runtime_xchg64(&z64, (3ull<<40)+3) != (2ull<<40)+2) + runtime_throw("xchg64 failed"); + if(runtime_atomicload64(&z64) != (3ull<<40)+3) + runtime_throw("xchg64 failed"); +} + +void +runtime_check(void) +{ + __go_register_gc_roots(&runtime_roots); + + TestAtomic64(); +} + uint32 runtime_fastrand1(void) { @@ -105,19 +169,6 @@ runtime_fastrand1(void) return x; } -static struct root_list runtime_roots = -{ nil, - { { &syscall_Envs, sizeof syscall_Envs }, - { &os_Args, sizeof os_Args }, - { nil, 0 } }, -}; - -void -runtime_check(void) -{ - __go_register_gc_roots(&runtime_roots); -} - int64 runtime_cputicks(void) { @@ -139,7 +190,7 @@ runtime_showframe(String s, bool current) if(current && runtime_m()->throwing > 0) return 1; if(traceback < 0) - traceback = runtime_gotraceback(); + traceback = runtime_gotraceback(nil); return traceback > 1 || (__builtin_memchr(s.str, '.', s.len) != nil && __builtin_memcmp(s.str, "runtime.", 7) != 0); } diff --git a/libgo/runtime/runtime.h b/libgo/runtime/runtime.h index 959220d..5b2a64f 100644 --- a/libgo/runtime/runtime.h +++ b/libgo/runtime/runtime.h @@ -54,9 +54,11 @@ typedef uint8 bool; typedef uint8 byte; typedef struct Func Func; typedef struct G G; -typedef union Lock Lock; +typedef struct Lock Lock; typedef struct M M; -typedef union Note Note; +typedef struct P P; +typedef struct Note Note; +typedef struct String String; typedef struct FuncVal FuncVal; typedef struct SigTab SigTab; typedef struct MCache MCache; @@ -64,14 +66,14 @@ typedef struct FixAlloc FixAlloc; typedef struct Hchan Hchan; typedef struct Timers Timers; typedef struct Timer Timer; -typedef struct GCStats GCStats; -typedef struct LFNode LFNode; -typedef struct ParFor ParFor; -typedef struct ParForThread ParForThread; -typedef struct CgoMal CgoMal; +typedef struct GCStats GCStats; +typedef struct LFNode LFNode; +typedef struct ParFor ParFor; +typedef struct ParForThread ParForThread; +typedef struct CgoMal CgoMal; +typedef struct PollDesc PollDesc; typedef struct __go_open_array Slice; -typedef struct String String; typedef struct __go_interface Iface; typedef struct __go_empty_interface Eface; typedef struct __go_type_descriptor Type; @@ -81,6 +83,7 @@ typedef struct __go_panic_stack Panic; typedef struct __go_ptr_type PtrType; typedef struct __go_func_type FuncType; typedef struct __go_map_type MapType; +typedef struct __go_channel_type ChanType; typedef struct Traceback Traceback; @@ -110,11 +113,20 @@ enum Grunning, Gsyscall, Gwaiting, - Gmoribund, + Gmoribund_unused, // currently unused, but hardcoded in gdb scripts Gdead, }; enum { + // P status + Pidle, + Prunning, + Psyscall, + Pgcstop, + Pdead, +}; +enum +{ true = 1, false = 0, }; @@ -129,19 +141,22 @@ enum // Global <-> per-M stack segment cache transfer batch size. StackCacheBatch = 16, }; - /* * structures */ -union Lock +struct Lock { - uint32 key; // futex-based impl - M* waitm; // linked list of waiting M's (sema-based impl) + // Futex-based impl treats it as uint32 key, + // while sema-based impl as M* waitm. + // Used to be a union, but unions break precise GC. + uintptr key; }; -union Note +struct Note { - uint32 key; // futex-based impl - M* waitm; // waiting M (sema-based impl) + // Futex-based impl treats it as uint32 key, + // while sema-based impl as M* waitm. + // Used to be a union, but unions break precise GC. + uintptr key; }; struct String { @@ -194,13 +209,12 @@ struct G uint32 selgen; // valid sudog pointer const char* waitreason; // if status==Gwaiting G* schedlink; - bool readyonstop; bool ispanic; - bool issystem; - int8 raceignore; // ignore race detection events + bool issystem; // do not output in stack dump + bool isbackground; // ignore in deadlock detector + bool blockingsyscall; // hint that the next syscall will block M* m; // for debuggers, but offset not hard-coded M* lockedm; - M* idlem; int32 sig; int32 writenbuf; byte* writebuf; @@ -224,34 +238,44 @@ struct M { G* g0; // goroutine with scheduling stack G* gsignal; // signal-handling G + byte* gsignalstack; + size_t gsignalstacksize; + void (*mstartfn)(void); G* curg; // current running goroutine + P* p; // attached P for executing Go code (nil if not executing Go code) + P* nextp; int32 id; int32 mallocing; int32 throwing; int32 gcing; int32 locks; int32 nomemprof; - int32 waitnextg; int32 dying; int32 profilehz; int32 helpgc; + bool blockingsyscall; + bool spinning; uint32 fastrand; uint64 ncgocall; // number of cgo calls in total - Note havenextg; - G* nextg; + int32 ncgo; // number of cgo calls currently in progress + CgoMal* cgomal; + Note park; M* alllink; // on allm M* schedlink; MCache *mcache; G* lockedg; - G* idleg; Location createstack[32]; // Stack that created this thread. + uint32 locked; // tracking for LockOSThread M* nextwaitm; // next M waiting for lock uintptr waitsema; // semaphore for parking on locks uint32 waitsemacount; uint32 waitsemalock; GCStats gcstats; bool racecall; + bool needextram; void* racepc; + void (*waitunlockf)(Lock*); + void* waitlock; uintptr settype_buf[1024]; uintptr settype_bufsize; @@ -259,6 +283,38 @@ struct M uintptr end[]; }; +struct P +{ + Lock; + + uint32 status; // one of Pidle/Prunning/... + P* link; + uint32 tick; // incremented on every scheduler or system call + M* m; // back-link to associated M (nil if idle) + MCache* mcache; + + // Queue of runnable goroutines. + G** runq; + int32 runqhead; + int32 runqtail; + int32 runqsize; + + // Available G's (status == Gdead) + G* gfree; + int32 gfreecnt; + + byte pad[64]; +}; + +// The m->locked word holds a single bit saying whether +// external calls to LockOSThread are in effect, and then a counter +// of the internal nesting depth of lockOSThread / unlockOSThread. +enum +{ + LockExternal = 1, + LockInternal = 2, +}; + struct SigTab { int32 sig; @@ -271,6 +327,8 @@ enum SigThrow = 1<<2, // if signal.Notify doesn't take it, exit loudly SigPanic = 1<<3, // if the signal is from the kernel, panic SigDefault = 1<<4, // if the signal isn't explicitly requested, don't monitor it + SigHandling = 1<<5, // our signal handler is registered + SigIgnored = 1<<6, // the signal was ignored before we registered for it }; #ifndef NSIG @@ -343,6 +401,7 @@ struct ParFor bool wait; // if true, wait while all threads finish processing, // otherwise parfor may return while other threads are still working ParForThread *thr; // array of thread descriptors + uint32 pad; // to align ParForThread.pos for 64-bit atomic operations // stats uint64 nsteal; uint64 nstealcnt; @@ -356,7 +415,7 @@ struct ParFor struct CgoMal { CgoMal *next; - byte *alloc; + void *alloc; }; /* @@ -369,6 +428,19 @@ struct CgoMal #define USED(v) ((void) v) #define ROUND(x, n) (((x)+(n)-1)&~((n)-1)) /* all-caps to mark as macro: it evaluates n twice */ +byte* runtime_startup_random_data; +uint32 runtime_startup_random_data_len; +void runtime_get_random_data(byte**, int32*); + +enum { + // hashinit wants this many random bytes + HashRandomBytes = 32 +}; +void runtime_hashinit(void); + +void runtime_traceback(); +void runtime_tracebackothers(G*); + /* * external data */ @@ -376,21 +448,27 @@ extern uintptr runtime_zerobase; extern G* runtime_allg; extern G* runtime_lastg; extern M* runtime_allm; +extern P** runtime_allp; extern int32 runtime_gomaxprocs; extern bool runtime_singleproc; extern uint32 runtime_panicking; -extern int32 runtime_gcwaiting; // gc is waiting to run +extern uint32 runtime_gcwaiting; // gc is waiting to run +extern int8* runtime_goos; extern int32 runtime_ncpu; +extern void (*runtime_sysargs)(int32, uint8**); /* * common functions and data */ +#define runtime_strcmp(s1, s2) __builtin_strcmp((s1), (s2)) +#define runtime_strstr(s1, s2) __builtin_strstr((s1), (s2)) intgo runtime_findnull(const byte*); void runtime_dump(byte*, int32); /* * very low level c-called */ +struct __go_func_type; void runtime_args(int32, byte**); void runtime_osinit(); void runtime_goargs(void); @@ -400,42 +478,98 @@ void runtime_throw(const char*) __attribute__ ((noreturn)); void runtime_panicstring(const char*) __attribute__ ((noreturn)); void runtime_prints(const char*); void runtime_printf(const char*, ...); +#define runtime_mcmp(a, b, s) __builtin_memcmp((a), (b), (s)) +#define runtime_memmove(a, b, s) __builtin_memmove((a), (b), (s)) void* runtime_mal(uintptr); +String runtime_gostring(const byte*); +String runtime_gostringnocopy(const byte*); void runtime_schedinit(void); void runtime_initsig(void); void runtime_sigenable(uint32 sig); -int32 runtime_gotraceback(void); +void runtime_sigdisable(uint32 sig); +int32 runtime_gotraceback(bool *crash); void runtime_goroutineheader(G*); void runtime_goroutinetrailer(G*); -void runtime_traceback(); -void runtime_tracebackothers(G*); void runtime_printtrace(Location*, int32, bool); -String runtime_gostring(const byte*); -String runtime_gostringnocopy(const byte*); +#define runtime_open(p, f, m) open((p), (f), (m)) +#define runtime_read(d, v, n) read((d), (v), (n)) +#define runtime_write(d, v, n) write((d), (v), (n)) +#define runtime_close(d) close(d) +#define runtime_cas(pval, old, new) __sync_bool_compare_and_swap (pval, old, new) +#define runtime_cas64(pval, pold, new) __atomic_compare_exchange_n (pval, pold, new, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) +#define runtime_casp(pval, old, new) __sync_bool_compare_and_swap (pval, old, new) +// Don't confuse with XADD x86 instruction, +// this one is actually 'addx', that is, add-and-fetch. +#define runtime_xadd(p, v) __sync_add_and_fetch (p, v) +#define runtime_xadd64(p, v) __sync_add_and_fetch (p, v) +#define runtime_xchg(p, v) __atomic_exchange_n (p, v, __ATOMIC_SEQ_CST) +#define runtime_xchg64(p, v) __atomic_exchange_n (p, v, __ATOMIC_SEQ_CST) +#define runtime_atomicload(p) __atomic_load_n (p, __ATOMIC_SEQ_CST) +#define runtime_atomicstore(p, v) __atomic_store_n (p, v, __ATOMIC_SEQ_CST) +#define runtime_atomicstore64(p, v) __atomic_store_n (p, v, __ATOMIC_SEQ_CST) +#define runtime_atomicload64(p) __atomic_load_n (p, __ATOMIC_SEQ_CST) +#define runtime_atomicloadp(p) __atomic_load_n (p, __ATOMIC_SEQ_CST) +#define runtime_atomicstorep(p, v) __atomic_store_n (p, v, __ATOMIC_SEQ_CST) +void runtime_ready(G*); +const byte* runtime_getenv(const char*); +int32 runtime_atoi(const byte*); void* runtime_mstart(void*); G* runtime_malg(int32, byte**, size_t*); +void runtime_mpreinit(M*); void runtime_minit(void); +void runtime_unminit(void); +void runtime_signalstack(byte*, int32); +MCache* runtime_allocmcache(void); +void runtime_freemcache(MCache*); void runtime_mallocinit(void); +void runtime_mprofinit(void); +#define runtime_malloc(s) __go_alloc(s) +#define runtime_free(p) __go_free(p) +bool runtime_addfinalizer(void*, FuncVal *fn, const struct __go_func_type *); +#define runtime_getcallersp(p) __builtin_frame_address(1) +int32 runtime_mcount(void); +int32 runtime_gcount(void); +uint32 runtime_fastrand1(void); + +void runtime_setmg(M*, G*); +void runtime_newextram(void); +#define runtime_exit(s) exit(s) +#define runtime_breakpoint() __builtin_trap() void runtime_gosched(void); void runtime_park(void(*)(Lock*), Lock*, const char*); void runtime_tsleep(int64, const char*); M* runtime_newm(void); void runtime_goexit(void); void runtime_entersyscall(void) __asm__ (GOSYM_PREFIX "syscall.Entersyscall"); +void runtime_entersyscallblock(void); void runtime_exitsyscall(void) __asm__ (GOSYM_PREFIX "syscall.Exitsyscall"); +G* __go_go(void (*pfn)(void*), void*); void siginit(void); bool __go_sigsend(int32 sig); int32 runtime_callers(int32, Location*, int32); int64 runtime_nanotime(void); +void runtime_dopanic(int32) __attribute__ ((noreturn)); +void runtime_startpanic(void); +void runtime_sigprof(); +void runtime_resetcpuprofiler(int32); +void runtime_setcpuprofilerate(void(*)(uintptr*, int32), int32); +void runtime_usleep(uint32); int64 runtime_cputicks(void); int64 runtime_tickspersecond(void); void runtime_blockevent(int64, int32); extern int64 runtime_blockprofilerate; +void runtime_addtimer(Timer*); +bool runtime_deltimer(Timer*); +G* runtime_netpoll(bool); +void runtime_netpollinit(void); +int32 runtime_netpollopen(int32, PollDesc*); +int32 runtime_netpollclose(int32); +void runtime_netpollready(G**, PollDesc*, int32); +void runtime_crash(void); void runtime_stoptheworld(void); void runtime_starttheworld(void); extern uint32 runtime_worldsema; -G* __go_go(void (*pfn)(void*), void*); /* * mutual exclusion locks. in the uncontended case, @@ -533,6 +667,7 @@ void __wrap_rtems_task_variable_add(void **); * runtime go-called */ void runtime_printbool(_Bool); +void runtime_printbyte(int8); void runtime_printfloat(double); void runtime_printint(int64); void runtime_printiface(Iface); @@ -544,53 +679,10 @@ void runtime_printuint(uint64); void runtime_printhex(uint64); void runtime_printslice(Slice); void runtime_printcomplex(__complex double); - -struct __go_func_type; void reflect_call(const struct __go_func_type *, FuncVal *, _Bool, _Bool, void **, void **) __asm__ (GOSYM_PREFIX "reflect.call"); - -/* Functions. */ #define runtime_panic __go_panic -#define runtime_write(d, v, n) write((d), (v), (n)) -#define runtime_malloc(s) __go_alloc(s) -#define runtime_free(p) __go_free(p) -#define runtime_strcmp(s1, s2) __builtin_strcmp((s1), (s2)) -#define runtime_mcmp(a, b, s) __builtin_memcmp((a), (b), (s)) -#define runtime_memmove(a, b, s) __builtin_memmove((a), (b), (s)) -#define runtime_exit(s) exit(s) -MCache* runtime_allocmcache(void); -void free(void *v); -#define runtime_cas(pval, old, new) __sync_bool_compare_and_swap (pval, old, new) -#define runtime_casp(pval, old, new) __sync_bool_compare_and_swap (pval, old, new) -#define runtime_cas64(pval, pold, new) __atomic_compare_exchange_n (pval, pold, new, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) -#define runtime_xadd(p, v) __sync_add_and_fetch (p, v) -#define runtime_xadd64(p, v) __sync_add_and_fetch (p, v) -#define runtime_xchg(p, v) __atomic_exchange_n (p, v, __ATOMIC_SEQ_CST) -#define runtime_atomicload(p) __atomic_load_n (p, __ATOMIC_SEQ_CST) -#define runtime_atomicstore(p, v) __atomic_store_n (p, v, __ATOMIC_SEQ_CST) -#define runtime_atomicloadp(p) __atomic_load_n (p, __ATOMIC_SEQ_CST) -#define runtime_atomicstorep(p, v) __atomic_store_n (p, v, __ATOMIC_SEQ_CST) -#define runtime_atomicload64(p) __atomic_load_n (p, __ATOMIC_SEQ_CST) -#define runtime_atomicstore64(p, v) __atomic_store_n (p, v, __ATOMIC_SEQ_CST) -#define PREFETCH(p) __builtin_prefetch(p) - -struct __go_func_type; -bool runtime_addfinalizer(void*, FuncVal *fn, const struct __go_func_type *); -#define runtime_getcallersp(p) __builtin_frame_address(1) -int32 runtime_mcount(void); -int32 runtime_gcount(void); -void runtime_dopanic(int32) __attribute__ ((noreturn)); -void runtime_startpanic(void); -void runtime_ready(G*); -const byte* runtime_getenv(const char*); -int32 runtime_atoi(const byte*); -uint32 runtime_fastrand1(void); - -void runtime_sigprof(); -void runtime_resetcpuprofiler(int32); -void runtime_setcpuprofilerate(void(*)(uintptr*, int32), int32); -void runtime_usleep(uint32); /* * runtime c-called (but written in Go) @@ -605,14 +697,13 @@ void runtime_newErrorString(String, Eface*) /* * wrapped for go users */ -#define ISNAN(f) __builtin_isnan(f) void runtime_semacquire(uint32 volatile *); void runtime_semrelease(uint32 volatile *); int32 runtime_gomaxprocsfunc(int32 n); void runtime_procyield(uint32); void runtime_osyield(void); -void runtime_LockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.LockOSThread"); -void runtime_UnlockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.UnlockOSThread"); +void runtime_lockOSThread(void); +void runtime_unlockOSThread(void); bool runtime_showframe(String, bool); @@ -628,12 +719,13 @@ uintptr runtime_memlimit(void); // This is a no-op on other systems. void runtime_setprof(bool); +#define ISNAN(f) __builtin_isnan(f) + enum { - UseSpanType = 1, + UseSpanType = 0, }; -void runtime_setsig(int32, bool, bool); #define runtime_setitimer setitimer void runtime_check(void); @@ -658,5 +750,8 @@ struct backtrace_state; extern struct backtrace_state *__go_get_backtrace_state(void); extern _Bool __go_file_line(uintptr, String*, String*, intgo *); extern byte* runtime_progname(); +extern void runtime_main(void*); int32 getproccount(void); + +#define PREFETCH(p) __builtin_prefetch(p) diff --git a/libgo/runtime/sema.goc b/libgo/runtime/sema.goc index 4622f6c..be971bd 100644 --- a/libgo/runtime/sema.goc +++ b/libgo/runtime/sema.goc @@ -44,12 +44,12 @@ struct SemaRoot // Prime to not correlate with any user patterns. #define SEMTABLESZ 251 -union semtable +struct semtable { SemaRoot; - uint8 pad[CacheLineSize]; + uint8 pad[CacheLineSize-sizeof(SemaRoot)]; }; -static union semtable semtable[SEMTABLESZ]; +static struct semtable semtable[SEMTABLESZ]; static SemaRoot* semroot(uint32 volatile *addr) diff --git a/libgo/runtime/signal_unix.c b/libgo/runtime/signal_unix.c index 3b8f439..5a506c8 100644 --- a/libgo/runtime/signal_unix.c +++ b/libgo/runtime/signal_unix.c @@ -8,6 +8,7 @@ #include "runtime.h" #include "defs.h" +#include "signal_unix.h" extern SigTab runtime_sigtab[]; @@ -22,7 +23,21 @@ runtime_initsig(void) t = &runtime_sigtab[i]; if((t->flags == 0) || (t->flags & SigDefault)) continue; - runtime_setsig(i, false, true); + + // For some signals, we respect an inherited SIG_IGN handler + // rather than insist on installing our own default handler. + // Even these signals can be fetched using the os/signal package. + switch(t->sig) { + case SIGHUP: + case SIGINT: + if(runtime_getsig(i) == GO_SIG_IGN) { + t->flags = SigNotify | SigIgnored; + continue; + } + } + + t->flags |= SigHandling; + runtime_setsig(i, runtime_sighandler, true); } } @@ -32,16 +47,49 @@ runtime_sigenable(uint32 sig) int32 i; SigTab *t; + t = nil; for(i = 0; runtime_sigtab[i].sig != -1; i++) { - // ~0 means all signals. - if(~sig == 0 || runtime_sigtab[i].sig == (int32)sig) { + if(runtime_sigtab[i].sig == (int32)sig) { t = &runtime_sigtab[i]; - if(t->flags & SigDefault) { - runtime_setsig(i, false, true); - t->flags &= ~SigDefault; // make this idempotent - } + break; } } + + if(t == nil) + return; + + if((t->flags & SigNotify) && !(t->flags & SigHandling)) { + t->flags |= SigHandling; + if(runtime_getsig(i) == GO_SIG_IGN) + t->flags |= SigIgnored; + runtime_setsig(i, runtime_sighandler, true); + } +} + +void +runtime_sigdisable(uint32 sig) +{ + int32 i; + SigTab *t; + + t = nil; + for(i = 0; runtime_sigtab[i].sig != -1; i++) { + if(runtime_sigtab[i].sig == (int32)sig) { + t = &runtime_sigtab[i]; + break; + } + } + + if(t == nil) + return; + + if((t->flags & SigNotify) && (t->flags & SigHandling)) { + t->flags &= ~SigHandling; + if(t->flags & SigIgnored) + runtime_setsig(i, GO_SIG_IGN, true); + else + runtime_setsig(i, GO_SIG_DFL, true); + } } void @@ -62,3 +110,44 @@ runtime_resetcpuprofiler(int32 hz) } runtime_m()->profilehz = hz; } + +void +os_sigpipe(void) +{ + int32 i; + + for(i = 0; runtime_sigtab[i].sig != -1; i++) + if(runtime_sigtab[i].sig == SIGPIPE) + break; + runtime_setsig(i, GO_SIG_DFL, false); + runtime_raise(SIGPIPE); +} + +void +runtime_crash(void) +{ + int32 i; + +#ifdef GOOS_darwin + // OS X core dumps are linear dumps of the mapped memory, + // from the first virtual byte to the last, with zeros in the gaps. + // Because of the way we arrange the address space on 64-bit systems, + // this means the OS X core file will be >128 GB and even on a zippy + // workstation can take OS X well over an hour to write (uninterruptible). + // Save users from making that mistake. + if(sizeof(void*) == 8) + return; +#endif + + for(i = 0; runtime_sigtab[i].sig != -1; i++) + if(runtime_sigtab[i].sig == SIGABRT) + break; + runtime_setsig(i, GO_SIG_DFL, false); + runtime_raise(SIGABRT); +} + +void +runtime_raise(int32 sig) +{ + raise(sig); +} diff --git a/libgo/runtime/signal_unix.h b/libgo/runtime/signal_unix.h new file mode 100644 index 0000000..1c51740 --- /dev/null +++ b/libgo/runtime/signal_unix.h @@ -0,0 +1,22 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include <signal.h> + +#define GO_SIG_DFL ((void*)SIG_DFL) +#define GO_SIG_IGN ((void*)SIG_IGN) + +#ifdef SA_SIGINFO +typedef siginfo_t Siginfo; +#else +typedef void *Siginfo; +#endif + +typedef void GoSighandler(int32, Siginfo*, void*, G*); +void runtime_setsig(int32, GoSighandler*, bool); +GoSighandler* runtime_getsig(int32); + +void runtime_sighandler(int32 sig, Siginfo *info, void *context, G *gp); +void runtime_raise(int32); + diff --git a/libgo/runtime/sigqueue.goc b/libgo/runtime/sigqueue.goc index 82b0400..8657216 100644 --- a/libgo/runtime/sigqueue.goc +++ b/libgo/runtime/sigqueue.goc @@ -107,7 +107,7 @@ func signal_recv() (m uint32) { new = HASWAITER; if(runtime_cas(&sig.state, old, new)) { if (new == HASWAITER) { - runtime_entersyscall(); + runtime_entersyscallblock(); runtime_notesleep(&sig); runtime_exitsyscall(); runtime_noteclear(&sig); @@ -135,8 +135,6 @@ done:; // Must only be called from a single goroutine at a time. func signal_enable(s uint32) { - int32 i; - if(!sig.inuse) { // The first call to signal_enable is for us // to use for initialization. It does not pass @@ -146,16 +144,16 @@ func signal_enable(s uint32) { return; } - if(~s == 0) { - // Special case: want everything. - for(i=0; (size_t)i<nelem(sig.wanted); i++) - sig.wanted[i] = ~(uint32)0; - runtime_sigenable(s); - return; - } - if(s >= nelem(sig.wanted)*32) return; sig.wanted[s/32] |= 1U<<(s&31); runtime_sigenable(s); } + +// Must only be called from a single goroutine at a time. +func signal_disable(s uint32) { + if(s >= nelem(sig.wanted)*32) + return; + sig.wanted[s/32] &= ~(1U<<(s&31)); + runtime_sigdisable(s); +} diff --git a/libgo/runtime/string.goc b/libgo/runtime/string.goc index 04ecbe6..64ed4f6e 100644 --- a/libgo/runtime/string.goc +++ b/libgo/runtime/string.goc @@ -7,6 +7,7 @@ package runtime #include "arch.h" #include "malloc.h" #include "go-string.h" +#include "race.h" #define charntorune(pv, str, len) __go_get_rune(str, len, pv) diff --git a/libgo/runtime/thread-linux.c b/libgo/runtime/thread-linux.c index 13d23c4..74139ea 100644 --- a/libgo/runtime/thread-linux.c +++ b/libgo/runtime/thread-linux.c @@ -15,6 +15,7 @@ // Futexsleep is allowed to wake up spuriously. #include <errno.h> +#include <signal.h> #include <string.h> #include <time.h> #include <sys/types.h> @@ -83,3 +84,48 @@ runtime_goenvs(void) { runtime_goenvs_unix(); } + +// Called to initialize a new m (including the bootstrap m). +// Called on the parent thread (main thread in case of bootstrap), can allocate memory. +void +runtime_mpreinit(M *mp) +{ + mp->gsignal = runtime_malg(32*1024, &mp->gsignalstack, &mp->gsignalstacksize); // OS X wants >=8K, Linux >=2K +} + +// Called to initialize a new m (including the bootstrap m). +// Called on the new thread, can not allocate memory. +void +runtime_minit(void) +{ + M* m; + sigset_t sigs; + + // Initialize signal handling. + m = runtime_m(); + runtime_signalstack(m->gsignalstack, m->gsignalstacksize); + if (sigemptyset(&sigs) != 0) + runtime_throw("sigemptyset"); + sigprocmask(SIG_SETMASK, &sigs, nil); +} + +// Called from dropm to undo the effect of an minit. +void +runtime_unminit(void) +{ + runtime_signalstack(nil, 0); +} + +void +runtime_signalstack(byte *p, int32 n) +{ + stack_t st; + + st.ss_sp = p; + st.ss_size = n; + st.ss_flags = 0; + if(p == nil) + st.ss_flags = SS_DISABLE; + if(sigaltstack(&st, nil) < 0) + *(int *)0xf1 = 0xf1; +} diff --git a/libgo/runtime/thread.c b/libgo/runtime/thread.c index 12d0099..83ee006 100644 --- a/libgo/runtime/thread.c +++ b/libgo/runtime/thread.c @@ -133,27 +133,6 @@ __sync_add_and_fetch_8 (uint64* ptr, uint64 add) #endif -// Called to initialize a new m (including the bootstrap m). -void -runtime_minit(void) -{ - byte* stack; - size_t stacksize; - stack_t ss; - sigset_t sigs; - - // Initialize signal handling. - runtime_m()->gsignal = runtime_malg(32*1024, &stack, &stacksize); // OS X wants >=8K, Linux >=2K - ss.ss_sp = stack; - ss.ss_flags = 0; - ss.ss_size = stacksize; - if(sigaltstack(&ss, nil) < 0) - *(int *)0xf1 = 0xf1; - if (sigemptyset(&sigs) != 0) - runtime_throw("sigemptyset"); - sigprocmask(SIG_SETMASK, &sigs, nil); -} - uintptr runtime_memlimit(void) { diff --git a/libgo/runtime/time.goc b/libgo/runtime/time.goc index e9f087a..e06b75c 100644 --- a/libgo/runtime/time.goc +++ b/libgo/runtime/time.goc @@ -14,7 +14,6 @@ package time static Timers timers; static void addtimer(Timer*); -static bool deltimer(Timer*); // Package time APIs. // Godoc uses the comments in package time, not these. @@ -30,15 +29,13 @@ func Sleep(ns int64) { func startTimer(t *Timer) { if(raceenabled) runtime_racerelease(t); - runtime_lock(&timers); - addtimer(t); - runtime_unlock(&timers); + runtime_addtimer(t); } // stopTimer removes t from the timer heap if it is there. // It returns true if t was removed, false if t wasn't even there. func stopTimer(t *Timer) (stopped bool) { - stopped = deltimer(t); + stopped = runtime_deltimer(t); } // C runtime. @@ -80,6 +77,14 @@ runtime_tsleep(int64 ns, const char *reason) runtime_park(runtime_unlock, &timers, reason); } +void +runtime_addtimer(Timer *t) +{ + runtime_lock(&timers); + addtimer(t); + runtime_unlock(&timers); +} + // Add a timer to the heap and start or kick the timer proc // if the new timer is earlier than any of the others. static void @@ -122,8 +127,8 @@ addtimer(Timer *t) // Delete timer t from the heap. // Do not need to update the timerproc: // if it wakes up early, no big deal. -static bool -deltimer(Timer *t) +bool +runtime_deltimer(Timer *t) { int32 i; @@ -205,7 +210,7 @@ timerproc(void* dummy __attribute__ ((unused))) timers.sleeping = true; runtime_noteclear(&timers.waitnote); runtime_unlock(&timers); - runtime_entersyscall(); + runtime_entersyscallblock(); runtime_notetsleep(&timers.waitnote, delta); runtime_exitsyscall(); } |