aboutsummaryrefslogtreecommitdiff
path: root/libgo/runtime
diff options
context:
space:
mode:
authorIan Lance Taylor <ian@gcc.gnu.org>2013-07-16 06:54:42 +0000
committerIan Lance Taylor <ian@gcc.gnu.org>2013-07-16 06:54:42 +0000
commitbe47d6eceffd2c5dbbc1566d5eea490527fb2bd4 (patch)
tree0e8fda573576bb4181dba29d0e88380a8c38fafd /libgo/runtime
parentefb30cdeb003fd7c585ee0d7657340086abcbd9e (diff)
downloadgcc-be47d6eceffd2c5dbbc1566d5eea490527fb2bd4.zip
gcc-be47d6eceffd2c5dbbc1566d5eea490527fb2bd4.tar.gz
gcc-be47d6eceffd2c5dbbc1566d5eea490527fb2bd4.tar.bz2
libgo: Update to Go 1.1.1.
From-SVN: r200974
Diffstat (limited to 'libgo/runtime')
-rw-r--r--libgo/runtime/chan.c74
-rw-r--r--libgo/runtime/cpuprof.c6
-rw-r--r--libgo/runtime/go-main.c11
-rw-r--r--libgo/runtime/go-map-index.c2
-rw-r--r--libgo/runtime/go-reflect-map.c9
-rw-r--r--libgo/runtime/go-signal.c108
-rw-r--r--libgo/runtime/go-unsafe-new.c11
-rw-r--r--libgo/runtime/go-unsafe-newarray.c18
-rw-r--r--libgo/runtime/lock_futex.c28
-rw-r--r--libgo/runtime/lock_sema.c34
-rw-r--r--libgo/runtime/malloc.goc114
-rw-r--r--libgo/runtime/malloc.h30
-rw-r--r--libgo/runtime/mcache.c4
-rw-r--r--libgo/runtime/mcentral.c8
-rw-r--r--libgo/runtime/mem.c8
-rw-r--r--libgo/runtime/mfixalloc.c5
-rw-r--r--libgo/runtime/mgc0.c853
-rw-r--r--libgo/runtime/mgc0.h7
-rw-r--r--libgo/runtime/mheap.c104
-rw-r--r--libgo/runtime/mprof.goc91
-rw-r--r--libgo/runtime/netpoll.goc356
-rw-r--r--libgo/runtime/netpoll_epoll.c154
-rw-r--r--libgo/runtime/netpoll_kqueue.c108
-rw-r--r--libgo/runtime/netpoll_stub.c18
-rw-r--r--libgo/runtime/panic.c12
-rw-r--r--libgo/runtime/parfor.c8
-rw-r--r--libgo/runtime/print.c9
-rw-r--r--libgo/runtime/proc.c2658
-rw-r--r--libgo/runtime/race.h8
-rw-r--r--libgo/runtime/runtime.c81
-rw-r--r--libgo/runtime/runtime.h255
-rw-r--r--libgo/runtime/sema.goc6
-rw-r--r--libgo/runtime/signal_unix.c103
-rw-r--r--libgo/runtime/signal_unix.h22
-rw-r--r--libgo/runtime/sigqueue.goc20
-rw-r--r--libgo/runtime/string.goc1
-rw-r--r--libgo/runtime/thread-linux.c46
-rw-r--r--libgo/runtime/thread.c21
-rw-r--r--libgo/runtime/time.goc21
39 files changed, 4078 insertions, 1354 deletions
diff --git a/libgo/runtime/chan.c b/libgo/runtime/chan.c
index a79ee9e..6f52a1d 100644
--- a/libgo/runtime/chan.c
+++ b/libgo/runtime/chan.c
@@ -35,6 +35,8 @@ struct WaitQ
SudoG* last;
};
+// The garbage collector is assuming that Hchan can only contain pointers into the stack
+// and cannot contain pointers into the heap.
struct Hchan
{
uintgo qcount; // total data in the q
@@ -49,6 +51,8 @@ struct Hchan
Lock;
};
+uint32 runtime_Hchansize = sizeof(Hchan);
+
// Buffer follows Hchan immediately in memory.
// chanbuf(c, i) is pointer to the i'th slot in the buffer.
#define chanbuf(c, i) ((byte*)((c)+1)+(uintptr)(c)->elemsize*(i))
@@ -107,6 +111,7 @@ runtime_makechan_c(ChanType *t, int64 hint)
c->elemsize = elem->__size;
c->elemalign = elem->__align;
c->dataqsiz = hint;
+ runtime_settype(c, (uintptr)t | TypeInfo_Chan);
if(debug)
runtime_printf("makechan: chan=%p; elemsize=%D; elemalign=%d; dataqsiz=%D\n",
@@ -875,16 +880,27 @@ sellock(Select *sel)
static void
selunlock(Select *sel)
{
- uint32 i;
- Hchan *c, *c0;
+ int32 i, n, r;
+ Hchan *c;
- c = nil;
- for(i=sel->ncase; i-->0;) {
- c0 = sel->lockorder[i];
- if(c0 && c0 != c) {
- c = c0;
- runtime_unlock(c);
- }
+ // We must be very careful here to not touch sel after we have unlocked
+ // the last lock, because sel can be freed right after the last unlock.
+ // Consider the following situation.
+ // First M calls runtime_park() in runtime_selectgo() passing the sel.
+ // Once runtime_park() has unlocked the last lock, another M makes
+ // the G that calls select runnable again and schedules it for execution.
+ // When the G runs on another M, it locks all the locks and frees sel.
+ // Now if the first M touches sel, it will access freed memory.
+ n = (int32)sel->ncase;
+ r = 0;
+ // skip the default case
+ if(n>0 && sel->lockorder[0] == nil)
+ r = 1;
+ for(i = n-1; i >= r; i--) {
+ c = sel->lockorder[i];
+ if(i>0 && sel->lockorder[i-1] == c)
+ continue; // will unlock it on the next iteration
+ runtime_unlock(c);
}
}
@@ -910,7 +926,7 @@ static int
selectgo(Select **selp)
{
Select *sel;
- uint32 o, i, j;
+ uint32 o, i, j, k;
Scase *cas, *dfl;
Hchan *c;
SudoG *sg;
@@ -946,12 +962,42 @@ selectgo(Select **selp)
}
// sort the cases by Hchan address to get the locking order.
+ // simple heap sort, to guarantee n log n time and constant stack footprint.
for(i=0; i<sel->ncase; i++) {
- c = sel->scase[i].chan;
- for(j=i; j>0 && sel->lockorder[j-1] >= c; j--)
- sel->lockorder[j] = sel->lockorder[j-1];
+ j = i;
+ c = sel->scase[j].chan;
+ while(j > 0 && sel->lockorder[k=(j-1)/2] < c) {
+ sel->lockorder[j] = sel->lockorder[k];
+ j = k;
+ }
sel->lockorder[j] = c;
}
+ for(i=sel->ncase; i-->0; ) {
+ c = sel->lockorder[i];
+ sel->lockorder[i] = sel->lockorder[0];
+ j = 0;
+ for(;;) {
+ k = j*2+1;
+ if(k >= i)
+ break;
+ if(k+1 < i && sel->lockorder[k] < sel->lockorder[k+1])
+ k++;
+ if(c < sel->lockorder[k]) {
+ sel->lockorder[j] = sel->lockorder[k];
+ j = k;
+ continue;
+ }
+ break;
+ }
+ sel->lockorder[j] = c;
+ }
+ /*
+ for(i=0; i+1<sel->ncase; i++)
+ if(sel->lockorder[i] > sel->lockorder[i+1]) {
+ runtime_printf("i=%d %p %p\n", i, sel->lockorder[i], sel->lockorder[i+1]);
+ runtime_throw("select: broken sort");
+ }
+ */
sellock(sel);
loop:
@@ -1048,7 +1094,7 @@ loop:
c = cas->chan;
if(c->dataqsiz > 0)
- runtime_throw("selectgo: shouldnt happen");
+ runtime_throw("selectgo: shouldn't happen");
if(debug)
runtime_printf("wait-return: sel=%p c=%p cas=%p kind=%d\n",
diff --git a/libgo/runtime/cpuprof.c b/libgo/runtime/cpuprof.c
index 3ef08ef..5163873 100644
--- a/libgo/runtime/cpuprof.c
+++ b/libgo/runtime/cpuprof.c
@@ -121,7 +121,9 @@ static uintptr eod[3] = {0, 1, 0};
// LostProfileData is a no-op function used in profiles
// to mark the number of profiling stack traces that were
// discarded due to slow data writers.
-static void LostProfileData(void) {
+static void
+LostProfileData(void)
+{
}
extern void runtime_SetCPUProfileRate(intgo)
@@ -365,7 +367,7 @@ getprofile(Profile *p)
return ret;
// Wait for new log.
- runtime_entersyscall();
+ runtime_entersyscallblock();
runtime_notesleep(&p->wait);
runtime_exitsyscall();
runtime_noteclear(&p->wait);
diff --git a/libgo/runtime/go-main.c b/libgo/runtime/go-main.c
index 97d1405..77233d3 100644
--- a/libgo/runtime/go-main.c
+++ b/libgo/runtime/go-main.c
@@ -30,9 +30,6 @@
extern char **environ;
-extern void runtime_main (void);
-static void mainstart (void *);
-
/* The main function. */
int
@@ -42,13 +39,7 @@ main (int argc, char **argv)
runtime_args (argc, (byte **) argv);
runtime_osinit ();
runtime_schedinit ();
- __go_go (mainstart, NULL);
+ __go_go (runtime_main, NULL);
runtime_mstart (runtime_m ());
abort ();
}
-
-static void
-mainstart (void *arg __attribute__ ((unused)))
-{
- runtime_main ();
-}
diff --git a/libgo/runtime/go-map-index.c b/libgo/runtime/go-map-index.c
index a602d2a..499641c 100644
--- a/libgo/runtime/go-map-index.c
+++ b/libgo/runtime/go-map-index.c
@@ -98,7 +98,7 @@ __go_map_index (struct __go_map *map, const void *key, _Bool insert)
key_descriptor = descriptor->__map_descriptor->__key_type;
key_offset = descriptor->__key_offset;
key_size = key_descriptor->__size;
- __go_assert (key_size != 0 && key_size != -1UL);
+ __go_assert (key_size != -1UL);
equalfn = key_descriptor->__equalfn;
key_hash = key_descriptor->__hashfn (key, key_size);
diff --git a/libgo/runtime/go-reflect-map.c b/libgo/runtime/go-reflect-map.c
index 3697537..1ae7c96 100644
--- a/libgo/runtime/go-reflect-map.c
+++ b/libgo/runtime/go-reflect-map.c
@@ -238,3 +238,12 @@ makemap (const struct __go_map_type *t)
__builtin_memcpy (ret, &map, sizeof (void *));
return (uintptr_t) ret;
}
+
+extern _Bool ismapkey (const struct __go_type_descriptor *)
+ __asm__ (GOSYM_PREFIX "reflect.ismapkey");
+
+_Bool
+ismapkey (const struct __go_type_descriptor *typ)
+{
+ return typ != NULL && typ->__hashfn != __go_type_hash_error;
+}
diff --git a/libgo/runtime/go-signal.c b/libgo/runtime/go-signal.c
index 1965e05..1e80057 100644
--- a/libgo/runtime/go-signal.c
+++ b/libgo/runtime/go-signal.c
@@ -12,6 +12,7 @@
#include "runtime.h"
#include "go-assert.h"
#include "go-panic.h"
+#include "signal_unix.h"
#ifndef SA_RESTART
#define SA_RESTART 0
@@ -157,12 +158,15 @@ runtime_badsignal(int32 sig)
/* Handle a signal, for cases where we don't panic. We can split the
stack here. */
-static void
-sig_handler (int sig)
+void
+runtime_sighandler (int sig, Siginfo *info,
+ void *context __attribute__ ((unused)), G *gp)
{
+ M *m;
int i;
- if (runtime_m () == NULL)
+ m = runtime_m ();
+ if (m == NULL)
{
runtime_badsignal (sig);
return;
@@ -171,7 +175,8 @@ sig_handler (int sig)
#ifdef SIGPROF
if (sig == SIGPROF)
{
- runtime_sigprof ();
+ if (gp != runtime_m ()->g0 && gp != runtime_m ()->gsignal)
+ runtime_sigprof ();
return;
}
#endif
@@ -179,13 +184,18 @@ sig_handler (int sig)
for (i = 0; runtime_sigtab[i].sig != -1; ++i)
{
SigTab *t;
+ bool notify, crash;
t = &runtime_sigtab[i];
if (t->sig != sig)
continue;
- if ((t->flags & SigNotify) != 0)
+ notify = false;
+#ifdef SA_SIGINFO
+ notify = info != NULL && info->si_code == SI_USER;
+#endif
+ if (notify || (t->flags & SigNotify) != 0)
{
if (__go_sigsend (sig))
return;
@@ -210,9 +220,15 @@ sig_handler (int sig)
runtime_printf ("%s\n", name);
}
+ if (m->lockedg != NULL && m->ncgo > 0 && gp == m->g0)
+ {
+ runtime_printf("signal arrived during cgo execution\n");
+ gp = m->lockedg;
+ }
+
runtime_printf ("\n");
- if (runtime_gotraceback ())
+ if (runtime_gotraceback (&crash))
{
G *g;
@@ -225,6 +241,9 @@ sig_handler (int sig)
a readable form. */
}
+ if (crash)
+ runtime_crash ();
+
runtime_exit (2);
}
@@ -259,15 +278,14 @@ sig_panic_leadin (int sig)
permitted to split the stack. */
static void
-sig_panic_info_handler (int sig, siginfo_t *info,
- void *context __attribute__ ((unused)))
+sig_panic_info_handler (int sig, Siginfo *info, void *context)
{
G *g;
g = runtime_g ();
if (g == NULL || info->si_code == SI_USER)
{
- sig_handler (sig);
+ runtime_sighandler (sig, info, context, g);
return;
}
@@ -331,7 +349,7 @@ sig_panic_handler (int sig)
g = runtime_g ();
if (g == NULL)
{
- sig_handler (sig);
+ runtime_sighandler (sig, NULL, NULL, g);
return;
}
@@ -373,10 +391,10 @@ sig_panic_handler (int sig)
the stack. */
static void
-sig_tramp (int) __attribute__ ((no_split_stack));
+sig_tramp_info (int, Siginfo *, void *) __attribute__ ((no_split_stack));
static void
-sig_tramp (int sig)
+sig_tramp_info (int sig, Siginfo *info, void *context)
{
G *gp;
M *mp;
@@ -403,7 +421,7 @@ sig_tramp (int sig)
#endif
}
- sig_handler (sig);
+ runtime_sighandler (sig, info, context, gp);
/* We are going to return back to the signal trampoline and then to
whatever we were doing before we got the signal. Restore the
@@ -418,8 +436,20 @@ sig_tramp (int sig)
}
}
+#ifndef SA_SIGINFO
+
+static void sig_tramp (int sig) __attribute__ ((no_split_stack));
+
+static void
+sig_tramp (int sig)
+{
+ sig_tramp_info (sig, NULL, NULL);
+}
+
+#endif
+
void
-runtime_setsig (int32 i, bool def __attribute__ ((unused)), bool restart)
+runtime_setsig (int32 i, GoSighandler *fn, bool restart)
{
struct sigaction sa;
int r;
@@ -434,17 +464,30 @@ runtime_setsig (int32 i, bool def __attribute__ ((unused)), bool restart)
if ((t->flags & SigPanic) == 0)
{
+#ifdef SA_SIGINFO
+ sa.sa_flags = SA_ONSTACK | SA_SIGINFO;
+ if (fn == runtime_sighandler)
+ fn = (void *) sig_tramp_info;
+ sa.sa_sigaction = (void *) fn;
+#else
sa.sa_flags = SA_ONSTACK;
- sa.sa_handler = sig_tramp;
+ if (fn == runtime_sighandler)
+ fn = (void *) sig_tramp;
+ sa.sa_handler = (void *) fn;
+#endif
}
else
{
#ifdef SA_SIGINFO
sa.sa_flags = SA_SIGINFO;
- sa.sa_sigaction = sig_panic_info_handler;
+ if (fn == runtime_sighandler)
+ fn = (void *) sig_panic_info_handler;
+ sa.sa_sigaction = (void *) fn;
#else
sa.sa_flags = 0;
- sa.sa_handler = sig_panic_handler;
+ if (fn == runtime_sighandler)
+ fn = (void *) sig_panic_handler;
+ sa.sa_handler = (void *) fn;
#endif
}
@@ -455,6 +498,37 @@ runtime_setsig (int32 i, bool def __attribute__ ((unused)), bool restart)
__go_assert (0);
}
+GoSighandler*
+runtime_getsig (int32 i)
+{
+ struct sigaction sa;
+ int r;
+ SigTab *t;
+
+ memset (&sa, 0, sizeof sa);
+
+ r = sigemptyset (&sa.sa_mask);
+ __go_assert (r == 0);
+
+ t = &runtime_sigtab[i];
+
+ if (sigaction (t->sig, NULL, &sa) != 0)
+ runtime_throw ("sigaction read failure");
+
+ if ((void *) sa.sa_handler == sig_tramp_info)
+ return runtime_sighandler;
+#ifdef SA_SIGINFO
+ if ((void *) sa.sa_handler == sig_panic_info_handler)
+ return runtime_sighandler;
+#else
+ if ((void *) sa.sa_handler == sig_tramp
+ || (void *) sa.sa_handler == sig_panic_handler)
+ return runtime_sighandler;
+#endif
+
+ return (void *) sa.sa_handler;
+}
+
/* Used by the os package to raise SIGPIPE. */
void os_sigpipe (void) __asm__ (GOSYM_PREFIX "os.sigpipe");
diff --git a/libgo/runtime/go-unsafe-new.c b/libgo/runtime/go-unsafe-new.c
index 54788f1..7848642 100644
--- a/libgo/runtime/go-unsafe-new.c
+++ b/libgo/runtime/go-unsafe-new.c
@@ -21,14 +21,5 @@ void *unsafe_New (const struct __go_type_descriptor *)
void *
unsafe_New (const struct __go_type_descriptor *descriptor)
{
- uint32 flag;
- void *ret;
-
- flag = (descriptor->__code & GO_NO_POINTERS) != 0 ? FlagNoPointers : 0;
- ret = runtime_mallocgc (descriptor->__size, flag, 1, 1);
-
- if (UseSpanType && flag == 0)
- runtime_settype (ret, (uintptr) descriptor | TypeInfo_SingleObject);
-
- return ret;
+ return runtime_cnew (descriptor);
}
diff --git a/libgo/runtime/go-unsafe-newarray.c b/libgo/runtime/go-unsafe-newarray.c
index e4fb336..f5c5efc 100644
--- a/libgo/runtime/go-unsafe-newarray.c
+++ b/libgo/runtime/go-unsafe-newarray.c
@@ -21,21 +21,5 @@ void *unsafe_NewArray (const struct __go_type_descriptor *, intgo)
void *
unsafe_NewArray (const struct __go_type_descriptor *descriptor, intgo n)
{
- uint64 size;
- void *ret;
-
- size = n * descriptor->__size;
- if (size == 0)
- ret = &runtime_zerobase;
- else if ((descriptor->__code & GO_NO_POINTERS) != 0)
- ret = runtime_mallocgc (size, FlagNoPointers, 1, 1);
- else
- {
- ret = runtime_mallocgc (size, 0, 1, 1);
-
- if (UseSpanType)
- runtime_settype (ret, (uintptr) descriptor | TypeInfo_Array);
- }
-
- return ret;
+ return runtime_cnewarray (descriptor, n);
}
diff --git a/libgo/runtime/lock_futex.c b/libgo/runtime/lock_futex.c
index 5374aff..4b9651a 100644
--- a/libgo/runtime/lock_futex.c
+++ b/libgo/runtime/lock_futex.c
@@ -41,7 +41,7 @@ runtime_lock(Lock *l)
runtime_throw("runtime_lock: lock count");
// Speculative grab for lock.
- v = runtime_xchg(&l->key, MUTEX_LOCKED);
+ v = runtime_xchg((uint32*)&l->key, MUTEX_LOCKED);
if(v == MUTEX_UNLOCKED)
return;
@@ -64,7 +64,7 @@ runtime_lock(Lock *l)
// Try for lock, spinning.
for(i = 0; i < spin; i++) {
while(l->key == MUTEX_UNLOCKED)
- if(runtime_cas(&l->key, MUTEX_UNLOCKED, wait))
+ if(runtime_cas((uint32*)&l->key, MUTEX_UNLOCKED, wait))
return;
runtime_procyield(ACTIVE_SPIN_CNT);
}
@@ -72,17 +72,17 @@ runtime_lock(Lock *l)
// Try for lock, rescheduling.
for(i=0; i < PASSIVE_SPIN; i++) {
while(l->key == MUTEX_UNLOCKED)
- if(runtime_cas(&l->key, MUTEX_UNLOCKED, wait))
+ if(runtime_cas((uint32*)&l->key, MUTEX_UNLOCKED, wait))
return;
runtime_osyield();
}
// Sleep.
- v = runtime_xchg(&l->key, MUTEX_SLEEPING);
+ v = runtime_xchg((uint32*)&l->key, MUTEX_SLEEPING);
if(v == MUTEX_UNLOCKED)
return;
wait = MUTEX_SLEEPING;
- runtime_futexsleep(&l->key, MUTEX_SLEEPING, -1);
+ runtime_futexsleep((uint32*)&l->key, MUTEX_SLEEPING, -1);
}
}
@@ -94,11 +94,11 @@ runtime_unlock(Lock *l)
if(--runtime_m()->locks < 0)
runtime_throw("runtime_unlock: lock count");
- v = runtime_xchg(&l->key, MUTEX_UNLOCKED);
+ v = runtime_xchg((uint32*)&l->key, MUTEX_UNLOCKED);
if(v == MUTEX_UNLOCKED)
runtime_throw("unlock of unlocked lock");
if(v == MUTEX_SLEEPING)
- runtime_futexwakeup(&l->key, 1);
+ runtime_futexwakeup((uint32*)&l->key, 1);
}
// One-time notifications.
@@ -111,9 +111,9 @@ runtime_noteclear(Note *n)
void
runtime_notewakeup(Note *n)
{
- if(runtime_xchg(&n->key, 1))
+ if(runtime_xchg((uint32*)&n->key, 1))
runtime_throw("notewakeup - double wakeup");
- runtime_futexwakeup(&n->key, 1);
+ runtime_futexwakeup((uint32*)&n->key, 1);
}
void
@@ -121,8 +121,8 @@ runtime_notesleep(Note *n)
{
if(runtime_m()->profilehz > 0)
runtime_setprof(false);
- while(runtime_atomicload(&n->key) == 0)
- runtime_futexsleep(&n->key, 0, -1);
+ while(runtime_atomicload((uint32*)&n->key) == 0)
+ runtime_futexsleep((uint32*)&n->key, 0, -1);
if(runtime_m()->profilehz > 0)
runtime_setprof(true);
}
@@ -137,15 +137,15 @@ runtime_notetsleep(Note *n, int64 ns)
return;
}
- if(runtime_atomicload(&n->key) != 0)
+ if(runtime_atomicload((uint32*)&n->key) != 0)
return;
if(runtime_m()->profilehz > 0)
runtime_setprof(false);
deadline = runtime_nanotime() + ns;
for(;;) {
- runtime_futexsleep(&n->key, 0, ns);
- if(runtime_atomicload(&n->key) != 0)
+ runtime_futexsleep((uint32*)&n->key, 0, ns);
+ if(runtime_atomicload((uint32*)&n->key) != 0)
break;
now = runtime_nanotime();
if(now >= deadline)
diff --git a/libgo/runtime/lock_sema.c b/libgo/runtime/lock_sema.c
index 8c4b397..2663c54 100644
--- a/libgo/runtime/lock_sema.c
+++ b/libgo/runtime/lock_sema.c
@@ -43,7 +43,7 @@ runtime_lock(Lock *l)
runtime_throw("runtime_lock: lock count");
// Speculative grab for lock.
- if(runtime_casp(&l->waitm, nil, (void*)LOCKED))
+ if(runtime_casp((void**)&l->key, nil, (void*)LOCKED))
return;
if(m->waitsema == 0)
@@ -56,10 +56,10 @@ runtime_lock(Lock *l)
spin = ACTIVE_SPIN;
for(i=0;; i++) {
- v = (uintptr)runtime_atomicloadp(&l->waitm);
+ v = (uintptr)runtime_atomicloadp((void**)&l->key);
if((v&LOCKED) == 0) {
unlocked:
- if(runtime_casp(&l->waitm, (void*)v, (void*)(v|LOCKED)))
+ if(runtime_casp((void**)&l->key, (void*)v, (void*)(v|LOCKED)))
return;
i = 0;
}
@@ -74,9 +74,9 @@ unlocked:
// Queue this M.
for(;;) {
m->nextwaitm = (void*)(v&~LOCKED);
- if(runtime_casp(&l->waitm, (void*)v, (void*)((uintptr)m|LOCKED)))
+ if(runtime_casp((void**)&l->key, (void*)v, (void*)((uintptr)m|LOCKED)))
break;
- v = (uintptr)runtime_atomicloadp(&l->waitm);
+ v = (uintptr)runtime_atomicloadp((void**)&l->key);
if((v&LOCKED) == 0)
goto unlocked;
}
@@ -99,15 +99,15 @@ runtime_unlock(Lock *l)
runtime_throw("runtime_unlock: lock count");
for(;;) {
- v = (uintptr)runtime_atomicloadp(&l->waitm);
+ v = (uintptr)runtime_atomicloadp((void**)&l->key);
if(v == LOCKED) {
- if(runtime_casp(&l->waitm, (void*)LOCKED, nil))
+ if(runtime_casp((void**)&l->key, (void*)LOCKED, nil))
break;
} else {
// Other M's are waiting for the lock.
// Dequeue an M.
mp = (void*)(v&~LOCKED);
- if(runtime_casp(&l->waitm, (void*)v, mp->nextwaitm)) {
+ if(runtime_casp((void**)&l->key, (void*)v, mp->nextwaitm)) {
// Dequeued an M. Wake it.
runtime_semawakeup(mp);
break;
@@ -120,7 +120,7 @@ runtime_unlock(Lock *l)
void
runtime_noteclear(Note *n)
{
- n->waitm = nil;
+ n->key = 0;
}
void
@@ -129,8 +129,8 @@ runtime_notewakeup(Note *n)
M *mp;
do
- mp = runtime_atomicloadp(&n->waitm);
- while(!runtime_casp(&n->waitm, mp, (void*)LOCKED));
+ mp = runtime_atomicloadp((void**)&n->key);
+ while(!runtime_casp((void**)&n->key, mp, (void*)LOCKED));
// Successfully set waitm to LOCKED.
// What was it before?
@@ -153,8 +153,8 @@ runtime_notesleep(Note *n)
m = runtime_m();
if(m->waitsema == 0)
m->waitsema = runtime_semacreate();
- if(!runtime_casp(&n->waitm, nil, m)) { // must be LOCKED (got wakeup)
- if(n->waitm != (void*)LOCKED)
+ if(!runtime_casp((void**)&n->key, nil, m)) { // must be LOCKED (got wakeup)
+ if(n->key != LOCKED)
runtime_throw("notesleep - waitm out of sync");
return;
}
@@ -183,8 +183,8 @@ runtime_notetsleep(Note *n, int64 ns)
m->waitsema = runtime_semacreate();
// Register for wakeup on n->waitm.
- if(!runtime_casp(&n->waitm, nil, m)) { // must be LOCKED (got wakeup already)
- if(n->waitm != (void*)LOCKED)
+ if(!runtime_casp((void**)&n->key, nil, m)) { // must be LOCKED (got wakeup already)
+ if(n->key != LOCKED)
runtime_throw("notetsleep - waitm out of sync");
return;
}
@@ -219,10 +219,10 @@ runtime_notetsleep(Note *n, int64 ns)
// so that any notewakeup racing with the return does not
// try to grant us the semaphore when we don't expect it.
for(;;) {
- mp = runtime_atomicloadp(&n->waitm);
+ mp = runtime_atomicloadp((void**)&n->key);
if(mp == m) {
// No wakeup yet; unregister if possible.
- if(runtime_casp(&n->waitm, mp, nil))
+ if(runtime_casp((void**)&n->key, mp, nil))
return;
} else if(mp == (M*)LOCKED) {
// Wakeup happened so semaphore is available.
diff --git a/libgo/runtime/malloc.goc b/libgo/runtime/malloc.goc
index a484642..dfab683 100644
--- a/libgo/runtime/malloc.goc
+++ b/libgo/runtime/malloc.goc
@@ -18,7 +18,7 @@ package runtime
#include "go-type.h"
#include "race.h"
-MHeap runtime_mheap;
+MHeap *runtime_mheap;
int32 runtime_checking;
@@ -46,7 +46,7 @@ runtime_mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed)
g = runtime_g();
if(g->status == Gsyscall)
dogc = 0;
- if(runtime_gcwaiting && g != m->g0 && m->locks == 0 && g->status != Gsyscall) {
+ if(runtime_gcwaiting && g != m->g0 && m->locks == 0 && dogc) {
runtime_gosched();
m = runtime_m();
}
@@ -78,7 +78,7 @@ runtime_mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed)
npages = size >> PageShift;
if((size & PageMask) != 0)
npages++;
- s = runtime_MHeap_Alloc(&runtime_mheap, npages, 0, 1, zeroed);
+ s = runtime_MHeap_Alloc(runtime_mheap, npages, 0, 1, zeroed);
if(s == nil)
runtime_throw("out of memory");
size = npages<<PageShift;
@@ -92,9 +92,9 @@ runtime_mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed)
if (sizeof(void*) == 4 && c->local_total_alloc >= (1<<30)) {
// purge cache stats to prevent overflow
- runtime_lock(&runtime_mheap);
+ runtime_lock(runtime_mheap);
runtime_purgecachedstats(c);
- runtime_unlock(&runtime_mheap);
+ runtime_unlock(runtime_mheap);
}
if(!(flag & FlagNoGC))
@@ -175,17 +175,17 @@ __go_free(void *v)
if(sizeclass == 0) {
// Large object.
size = s->npages<<PageShift;
- *(uintptr*)(s->start<<PageShift) = 1; // mark as "needs to be zeroed"
+ *(uintptr*)(s->start<<PageShift) = (uintptr)0xfeedfeedfeedfeedll; // mark as "needs to be zeroed"
// Must mark v freed before calling unmarkspan and MHeap_Free:
// they might coalesce v into other spans and change the bitmap further.
runtime_markfreed(v, size);
runtime_unmarkspan(v, 1<<PageShift);
- runtime_MHeap_Free(&runtime_mheap, s, 1);
+ runtime_MHeap_Free(runtime_mheap, s, 1);
} else {
// Small object.
size = runtime_class_to_size[sizeclass];
if(size > sizeof(uintptr))
- ((uintptr*)v)[1] = 1; // mark as "needs to be zeroed"
+ ((uintptr*)v)[1] = (uintptr)0xfeedfeedfeedfeedll; // mark as "needs to be zeroed"
// Must mark v freed before calling MCache_Free:
// it might coalesce v and other blocks into a bigger span
// and change the bitmap further.
@@ -213,12 +213,12 @@ runtime_mlookup(void *v, byte **base, uintptr *size, MSpan **sp)
m->mcache->local_nlookup++;
if (sizeof(void*) == 4 && m->mcache->local_nlookup >= (1<<30)) {
// purge cache stats to prevent overflow
- runtime_lock(&runtime_mheap);
+ runtime_lock(runtime_mheap);
runtime_purgecachedstats(m->mcache);
- runtime_unlock(&runtime_mheap);
+ runtime_unlock(runtime_mheap);
}
- s = runtime_MHeap_LookupMaybe(&runtime_mheap, v);
+ s = runtime_MHeap_LookupMaybe(runtime_mheap, v);
if(sp)
*sp = s;
if(s == nil) {
@@ -262,11 +262,11 @@ runtime_allocmcache(void)
intgo rate;
MCache *c;
- runtime_lock(&runtime_mheap);
- c = runtime_FixAlloc_Alloc(&runtime_mheap.cachealloc);
- mstats.mcache_inuse = runtime_mheap.cachealloc.inuse;
- mstats.mcache_sys = runtime_mheap.cachealloc.sys;
- runtime_unlock(&runtime_mheap);
+ runtime_lock(runtime_mheap);
+ c = runtime_FixAlloc_Alloc(&runtime_mheap->cachealloc);
+ mstats.mcache_inuse = runtime_mheap->cachealloc.inuse;
+ mstats.mcache_sys = runtime_mheap->cachealloc.sys;
+ runtime_unlock(runtime_mheap);
runtime_memclr((byte*)c, sizeof(*c));
// Set first allocation sample size.
@@ -283,10 +283,10 @@ void
runtime_freemcache(MCache *c)
{
runtime_MCache_ReleaseAll(c);
- runtime_lock(&runtime_mheap);
+ runtime_lock(runtime_mheap);
runtime_purgecachedstats(c);
- runtime_FixAlloc_Free(&runtime_mheap.cachealloc, c);
- runtime_unlock(&runtime_mheap);
+ runtime_FixAlloc_Free(&runtime_mheap->cachealloc, c);
+ runtime_unlock(runtime_mheap);
}
void
@@ -334,9 +334,15 @@ runtime_mallocinit(void)
USED(arena_size);
USED(bitmap_size);
+ if((runtime_mheap = runtime_SysAlloc(sizeof(*runtime_mheap))) == nil)
+ runtime_throw("runtime: cannot allocate heap metadata");
+
runtime_InitSizes();
- limit = runtime_memlimit();
+ // limit = runtime_memlimit();
+ // See https://code.google.com/p/go/issues/detail?id=5049
+ // TODO(rsc): Fix after 1.1.
+ limit = 0;
// Set up the allocation arena, a contiguous area of memory where
// allocated data will be found. The arena begins with a bitmap large
@@ -414,13 +420,13 @@ runtime_mallocinit(void)
if((uintptr)p & (((uintptr)1<<PageShift)-1))
runtime_throw("runtime: SysReserve returned unaligned address");
- runtime_mheap.bitmap = p;
- runtime_mheap.arena_start = p + bitmap_size;
- runtime_mheap.arena_used = runtime_mheap.arena_start;
- runtime_mheap.arena_end = runtime_mheap.arena_start + arena_size;
+ runtime_mheap->bitmap = p;
+ runtime_mheap->arena_start = p + bitmap_size;
+ runtime_mheap->arena_used = runtime_mheap->arena_start;
+ runtime_mheap->arena_end = runtime_mheap->arena_start + arena_size;
// Initialize the rest of the allocator.
- runtime_MHeap_Init(&runtime_mheap, runtime_SysAlloc);
+ runtime_MHeap_Init(runtime_mheap, runtime_SysAlloc);
runtime_m()->mcache = runtime_allocmcache();
// See if it works.
@@ -519,8 +525,8 @@ runtime_settype_flush(M *mp, bool sysalloc)
// (Manually inlined copy of runtime_MHeap_Lookup)
p = (uintptr)v>>PageShift;
if(sizeof(void*) == 8)
- p -= (uintptr)runtime_mheap.arena_start >> PageShift;
- s = runtime_mheap.map[p];
+ p -= (uintptr)runtime_mheap->arena_start >> PageShift;
+ s = runtime_mheap->map[p];
if(s->sizeclass == 0) {
s->types.compression = MTypes_Single;
@@ -537,9 +543,11 @@ runtime_settype_flush(M *mp, bool sysalloc)
nbytes3 = 8*sizeof(uintptr) + 1*ntypes;
if(!sysalloc) {
- data3 = runtime_mallocgc(nbytes3, FlagNoPointers, 0, 1);
+ data3 = runtime_mallocgc(nbytes3, FlagNoProfiling|FlagNoPointers, 0, 1);
} else {
data3 = runtime_SysAlloc(nbytes3);
+ if(data3 == nil)
+ runtime_throw("runtime: cannot allocate memory");
if(0) runtime_printf("settype(0->3): SysAlloc(%x) --> %p\n", (uint32)nbytes3, data3);
}
@@ -573,9 +581,11 @@ runtime_settype_flush(M *mp, bool sysalloc)
nbytes2 = ntypes * sizeof(uintptr);
if(!sysalloc) {
- data2 = runtime_mallocgc(nbytes2, FlagNoPointers, 0, 1);
+ data2 = runtime_mallocgc(nbytes2, FlagNoProfiling|FlagNoPointers, 0, 1);
} else {
data2 = runtime_SysAlloc(nbytes2);
+ if(data2 == nil)
+ runtime_throw("runtime: cannot allocate memory");
if(0) runtime_printf("settype.(3->2): SysAlloc(%x) --> %p\n", (uint32)nbytes2, data2);
}
@@ -633,7 +643,7 @@ runtime_settype(void *v, uintptr t)
}
if(DebugTypeAtBlockEnd) {
- s = runtime_MHeap_Lookup(&runtime_mheap, v);
+ s = runtime_MHeap_Lookup(runtime_mheap, v);
*(uintptr*)((uintptr)v+s->elemsize-sizeof(uintptr)) = t;
}
}
@@ -672,7 +682,7 @@ runtime_gettype(void *v)
uintptr t, ofs;
byte *data;
- s = runtime_MHeap_LookupMaybe(&runtime_mheap, v);
+ s = runtime_MHeap_LookupMaybe(runtime_mheap, v);
if(s != nil) {
t = 0;
switch(s->types.compression) {
@@ -731,9 +741,8 @@ runtime_new(const Type *typ)
ret = runtime_mallocgc(typ->__size, flag, 1, 1);
if(UseSpanType && !flag) {
- if(false) {
+ if(false)
runtime_printf("new %S: %p\n", *typ->__reflection, ret);
- }
runtime_settype(ret, (uintptr)typ | TypeInfo_SingleObject);
}
}
@@ -741,6 +750,45 @@ runtime_new(const Type *typ)
return ret;
}
+static void*
+cnew(const Type *typ, intgo n, int32 objtyp)
+{
+ uint32 flag;
+ void *ret;
+
+ if((objtyp&(PtrSize-1)) != objtyp)
+ runtime_throw("runtime: invalid objtyp");
+ if(n < 0 || (typ->__size > 0 && (uintptr)n > (MaxMem/typ->__size)))
+ runtime_panicstring("runtime: allocation size out of range");
+ if(typ->__size == 0 || n == 0) {
+ // All 0-length allocations use this pointer.
+ // The language does not require the allocations to
+ // have distinct values.
+ return &runtime_zerobase;
+ }
+ flag = typ->__code&GO_NO_POINTERS ? FlagNoPointers : 0;
+ ret = runtime_mallocgc(typ->__size*n, flag, 1, 1);
+ if(UseSpanType && !flag) {
+ if(false)
+ runtime_printf("cnew [%D]%S: %p\n", (int64)n, *typ->__reflection, ret);
+ runtime_settype(ret, (uintptr)typ | TypeInfo_SingleObject);
+ }
+ return ret;
+}
+
+// same as runtime_new, but callable from C
+void*
+runtime_cnew(const Type *typ)
+{
+ return cnew(typ, 1, TypeInfo_SingleObject);
+}
+
+void*
+runtime_cnewarray(const Type *typ, intgo n)
+{
+ return cnew(typ, n, TypeInfo_Array);
+}
+
func GC() {
runtime_gc(1);
}
diff --git a/libgo/runtime/malloc.h b/libgo/runtime/malloc.h
index 7ebb762..ebea34e 100644
--- a/libgo/runtime/malloc.h
+++ b/libgo/runtime/malloc.h
@@ -86,6 +86,7 @@ typedef struct MSpan MSpan;
typedef struct MStats MStats;
typedef struct MLink MLink;
typedef struct MTypes MTypes;
+typedef struct GCStats GCStats;
enum
{
@@ -114,10 +115,18 @@ enum
HeapAllocChunk = 1<<20, // Chunk size for heap growth
// Number of bits in page to span calculations (4k pages).
- // On 64-bit, we limit the arena to 128GB, or 37 bits.
+ // On Windows 64-bit we limit the arena to 32GB or 35 bits (see below for reason).
+ // On other 64-bit platforms, we limit the arena to 128GB, or 37 bits.
// On 32-bit, we don't bother limiting anything, so we use the full 32-bit address.
#if __SIZEOF_POINTER__ == 8
+#ifdef GOOS_windows
+ // Windows counts memory used by page table into committed memory
+ // of the process, so we can't reserve too much memory.
+ // See http://golang.org/issue/5402 and http://golang.org/issue/5236.
+ MHeapMap_Bits = 35 - PageShift,
+#else
MHeapMap_Bits = 37 - PageShift,
+#endif
#else
MHeapMap_Bits = 32 - PageShift,
#endif
@@ -133,7 +142,7 @@ enum
// This must be a #define instead of an enum because it
// is so large.
#if __SIZEOF_POINTER__ == 8
-#define MaxMem (1ULL<<(MHeapMap_Bits+PageShift)) /* 128 GB */
+#define MaxMem (1ULL<<(MHeapMap_Bits+PageShift)) /* 128 GB or 32 GB */
#else
#define MaxMem ((uintptr)-1)
#endif
@@ -229,7 +238,7 @@ struct MStats
uint64 buckhash_sys; // profiling bucket hash table
// Statistics about garbage collector.
- // Protected by stopping the world during GC.
+ // Protected by mheap or stopping the world during GC.
uint64 next_gc; // next GC (in heap_alloc time)
uint64 last_gc; // last GC (in absolute time)
uint64 pause_total_ns;
@@ -249,7 +258,6 @@ struct MStats
extern MStats mstats
__asm__ (GOSYM_PREFIX "runtime.VmemStats");
-
// Size classes. Computed and initialized by InitSizes.
//
// SizeToClass(0 <= n <= MaxSmallSize) returns the size class,
@@ -416,18 +424,18 @@ struct MHeap
byte *arena_end;
// central free lists for small size classes.
- // the union makes sure that the MCentrals are
+ // the padding makes sure that the MCentrals are
// spaced CacheLineSize bytes apart, so that each MCentral.Lock
// gets its own cache line.
- union {
+ struct {
MCentral;
- byte pad[CacheLineSize];
+ byte pad[64];
} central[NumSizeClasses];
FixAlloc spanalloc; // allocator for Span*
FixAlloc cachealloc; // allocator for MCache*
};
-extern MHeap runtime_mheap;
+extern MHeap *runtime_mheap;
void runtime_MHeap_Init(MHeap *h, void *(*allocator)(uintptr));
MSpan* runtime_MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, int32 acct, int32 zeroed);
@@ -452,8 +460,8 @@ void runtime_unmarkspan(void *v, uintptr size);
bool runtime_blockspecial(void*);
void runtime_setblockspecial(void*, bool);
void runtime_purgecachedstats(MCache*);
-void* runtime_new(const Type *);
-#define runtime_cnew(T) runtime_new(T)
+void* runtime_cnew(const Type*);
+void* runtime_cnewarray(const Type*, intgo);
void runtime_settype(void*, uintptr);
void runtime_settype_flush(M*, bool);
@@ -493,6 +501,7 @@ enum
TypeInfo_SingleObject = 0,
TypeInfo_Array = 1,
TypeInfo_Map = 2,
+ TypeInfo_Chan = 3,
// Enables type information at the end of blocks allocated from heap
DebugTypeAtBlockEnd = 0,
@@ -504,4 +513,5 @@ void runtime_gc_itab_ptr(Eface*);
void runtime_memorydump(void);
+void runtime_proc_scan(void (*)(Obj));
void runtime_time_scan(void (*)(Obj));
diff --git a/libgo/runtime/mcache.c b/libgo/runtime/mcache.c
index 570c06a..45bac4f 100644
--- a/libgo/runtime/mcache.c
+++ b/libgo/runtime/mcache.c
@@ -21,7 +21,7 @@ runtime_MCache_Alloc(MCache *c, int32 sizeclass, uintptr size, int32 zeroed)
l = &c->list[sizeclass];
if(l->list == nil) {
// Replenish using central lists.
- n = runtime_MCentral_AllocList(&runtime_mheap.central[sizeclass],
+ n = runtime_MCentral_AllocList(&runtime_mheap->central[sizeclass],
runtime_class_to_transfercount[sizeclass], &first);
if(n == 0)
runtime_throw("out of memory");
@@ -69,7 +69,7 @@ ReleaseN(MCache *c, MCacheList *l, int32 n, int32 sizeclass)
c->size -= n*runtime_class_to_size[sizeclass];
// Return them to central free list.
- runtime_MCentral_FreeList(&runtime_mheap.central[sizeclass], n, first);
+ runtime_MCentral_FreeList(&runtime_mheap->central[sizeclass], n, first);
}
void
diff --git a/libgo/runtime/mcentral.c b/libgo/runtime/mcentral.c
index b405438..b3108a1 100644
--- a/libgo/runtime/mcentral.c
+++ b/libgo/runtime/mcentral.c
@@ -108,7 +108,7 @@ MCentral_Free(MCentral *c, void *v)
int32 size;
// Find span for v.
- s = runtime_MHeap_Lookup(&runtime_mheap, v);
+ s = runtime_MHeap_Lookup(runtime_mheap, v);
if(s == nil || s->ref == 0)
runtime_throw("invalid free");
@@ -133,7 +133,7 @@ MCentral_Free(MCentral *c, void *v)
s->freelist = nil;
c->nfree -= (s->npages << PageShift) / size;
runtime_unlock(c);
- runtime_MHeap_Free(&runtime_mheap, s, 0);
+ runtime_MHeap_Free(runtime_mheap, s, 0);
runtime_lock(c);
}
}
@@ -168,7 +168,7 @@ runtime_MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *e
c->nfree -= (s->npages << PageShift) / size;
runtime_unlock(c);
runtime_unmarkspan((byte*)(s->start<<PageShift), s->npages<<PageShift);
- runtime_MHeap_Free(&runtime_mheap, s, 0);
+ runtime_MHeap_Free(runtime_mheap, s, 0);
} else {
runtime_unlock(c);
}
@@ -200,7 +200,7 @@ MCentral_Grow(MCentral *c)
runtime_unlock(c);
runtime_MGetSizeClassInfo(c->sizeclass, &size, &npages, &n);
- s = runtime_MHeap_Alloc(&runtime_mheap, npages, c->sizeclass, 0, 1);
+ s = runtime_MHeap_Alloc(runtime_mheap, npages, c->sizeclass, 0, 1);
if(s == nil) {
// TODO(rsc): Log out of memory
runtime_lock(c);
diff --git a/libgo/runtime/mem.c b/libgo/runtime/mem.c
index e606bdd..8481e95 100644
--- a/libgo/runtime/mem.c
+++ b/libgo/runtime/mem.c
@@ -78,7 +78,7 @@ runtime_SysAlloc(uintptr n)
fd = dev_zero;
#endif
- p = runtime_mmap(nil, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, fd, 0);
+ p = runtime_mmap(nil, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, fd, 0);
if (p == MAP_FAILED) {
if(errno == EACCES) {
runtime_printf("runtime: mmap: access denied\n");
@@ -169,7 +169,7 @@ runtime_SysMap(void *v, uintptr n)
// On 64-bit, we don't actually have v reserved, so tread carefully.
if(sizeof(void*) == 8 && (uintptr)v >= 0xffffffffU) {
- p = mmap_fixed(v, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, fd, 0);
+ p = mmap_fixed(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, fd, 0);
if(p == MAP_FAILED && errno == ENOMEM)
runtime_throw("runtime: out of memory");
if(p != v) {
@@ -179,7 +179,9 @@ runtime_SysMap(void *v, uintptr n)
return;
}
- p = runtime_mmap(v, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_FIXED|MAP_PRIVATE, fd, 0);
+ p = runtime_mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_FIXED|MAP_PRIVATE, fd, 0);
+ if(p == MAP_FAILED && errno == ENOMEM)
+ runtime_throw("runtime: out of memory");
if(p != v)
runtime_throw("runtime: cannot map pages in arena address space");
}
diff --git a/libgo/runtime/mfixalloc.c b/libgo/runtime/mfixalloc.c
index 109cfe8..6e4f0c6 100644
--- a/libgo/runtime/mfixalloc.c
+++ b/libgo/runtime/mfixalloc.c
@@ -30,6 +30,11 @@ void*
runtime_FixAlloc_Alloc(FixAlloc *f)
{
void *v;
+
+ if(f->size == 0) {
+ runtime_printf("runtime: use of FixAlloc_Alloc before FixAlloc_Init\n");
+ runtime_throw("runtime: internal error");
+ }
if(f->list) {
v = f->list;
diff --git a/libgo/runtime/mgc0.c b/libgo/runtime/mgc0.c
index 88283cc..36afd2b 100644
--- a/libgo/runtime/mgc0.c
+++ b/libgo/runtime/mgc0.c
@@ -21,8 +21,11 @@
#define tab __methods
// Eface aka __go_empty_interface.
#define type __type_descriptor
+// Hmap aka __go_map
+typedef struct __go_map Hmap;
// Type aka __go_type_descriptor
#define kind __code
+#define string __reflection
#define KindPtr GO_PTR
#define KindNoPointers GO_NO_POINTERS
// PtrType aka __go_ptr_type
@@ -41,6 +44,9 @@ extern void * __splitstack_find_context (void *context[10], size_t *, void **,
enum {
Debug = 0,
DebugMark = 0, // run second pass to check mark
+ CollectStats = 0,
+ ScanStackByFrames = 0,
+ IgnorePreciseGC = 0,
// Four bits per word (see #defines below).
wordsPerBitmapWord = sizeof(void*)*8/4,
@@ -147,6 +153,7 @@ static Workbuf* getempty(Workbuf*);
static Workbuf* getfull(Workbuf*);
static void putempty(Workbuf*);
static Workbuf* handoff(Workbuf*);
+static void gchelperstart(void);
static struct {
uint64 full; // lock-free list of full blocks
@@ -170,11 +177,114 @@ static struct {
} work;
enum {
- // TODO(atom): to be expanded in a next CL
GC_DEFAULT_PTR = GC_NUM_INSTR,
+ GC_MAP_NEXT,
+ GC_CHAN,
+
+ GC_NUM_INSTR2
};
-// PtrTarget and BitTarget are structures used by intermediate buffers.
+static struct {
+ struct {
+ uint64 sum;
+ uint64 cnt;
+ } ptr;
+ uint64 nbytes;
+ struct {
+ uint64 sum;
+ uint64 cnt;
+ uint64 notype;
+ uint64 typelookup;
+ } obj;
+ uint64 rescan;
+ uint64 rescanbytes;
+ uint64 instr[GC_NUM_INSTR2];
+ uint64 putempty;
+ uint64 getfull;
+} gcstats;
+
+// markonly marks an object. It returns true if the object
+// has been marked by this function, false otherwise.
+// This function doesn't append the object to any buffer.
+static bool
+markonly(void *obj)
+{
+ byte *p;
+ uintptr *bitp, bits, shift, x, xbits, off;
+ MSpan *s;
+ PageID k;
+
+ // Words outside the arena cannot be pointers.
+ if((byte*)obj < runtime_mheap->arena_start || (byte*)obj >= runtime_mheap->arena_used)
+ return false;
+
+ // obj may be a pointer to a live object.
+ // Try to find the beginning of the object.
+
+ // Round down to word boundary.
+ obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
+
+ // Find bits for this word.
+ off = (uintptr*)obj - (uintptr*)runtime_mheap->arena_start;
+ bitp = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
+ shift = off % wordsPerBitmapWord;
+ xbits = *bitp;
+ bits = xbits >> shift;
+
+ // Pointing at the beginning of a block?
+ if((bits & (bitAllocated|bitBlockBoundary)) != 0)
+ goto found;
+
+ // Otherwise consult span table to find beginning.
+ // (Manually inlined copy of MHeap_LookupMaybe.)
+ k = (uintptr)obj>>PageShift;
+ x = k;
+ if(sizeof(void*) == 8)
+ x -= (uintptr)runtime_mheap->arena_start>>PageShift;
+ s = runtime_mheap->map[x];
+ if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse)
+ return false;
+ p = (byte*)((uintptr)s->start<<PageShift);
+ if(s->sizeclass == 0) {
+ obj = p;
+ } else {
+ if((byte*)obj >= (byte*)s->limit)
+ return false;
+ uintptr size = s->elemsize;
+ int32 i = ((byte*)obj - p)/size;
+ obj = p+i*size;
+ }
+
+ // Now that we know the object header, reload bits.
+ off = (uintptr*)obj - (uintptr*)runtime_mheap->arena_start;
+ bitp = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
+ shift = off % wordsPerBitmapWord;
+ xbits = *bitp;
+ bits = xbits >> shift;
+
+found:
+ // Now we have bits, bitp, and shift correct for
+ // obj pointing at the base of the object.
+ // Only care about allocated and not marked.
+ if((bits & (bitAllocated|bitMarked)) != bitAllocated)
+ return false;
+ if(work.nproc == 1)
+ *bitp |= bitMarked<<shift;
+ else {
+ for(;;) {
+ x = *bitp;
+ if(x & (bitMarked<<shift))
+ return false;
+ if(runtime_casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
+ break;
+ }
+ }
+
+ // The object is now marked
+ return true;
+}
+
+// PtrTarget is a structure used by intermediate buffers.
// The intermediate buffers hold GC data before it
// is moved/flushed to the work buffer (Workbuf).
// The size of an intermediate buffer is very small,
@@ -186,24 +296,16 @@ struct PtrTarget
uintptr ti;
};
-typedef struct BitTarget BitTarget;
-struct BitTarget
-{
- void *p;
- uintptr ti;
- uintptr *bitp, shift;
-};
-
typedef struct BufferList BufferList;
struct BufferList
{
PtrTarget ptrtarget[IntermediateBufferCapacity];
- BitTarget bittarget[IntermediateBufferCapacity];
- BufferList *next;
+ Obj obj[IntermediateBufferCapacity];
+ uint32 busy;
+ byte pad[CacheLineSize];
};
-static BufferList *bufferList;
+static BufferList bufferList[MaxGcproc];
-static Lock lock;
static Type *itabtype;
static void enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj);
@@ -214,7 +316,6 @@ static void enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj);
// and are prepared to be scanned by the garbage collector.
//
// _wp, _wbuf, _nobj are input/output parameters and are specifying the work buffer.
-// bitbuf holds temporary data generated by this function.
//
// A simplified drawing explaining how the todo-list moves from a structure to another:
//
@@ -222,14 +323,12 @@ static void enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj);
// (find pointers)
// Obj ------> PtrTarget (pointer targets)
// ↑ |
-// | | flushptrbuf (1st part,
-// | | find block start)
-// | ↓
-// `--------- BitTarget (pointer targets and the corresponding locations in bitmap)
-// flushptrbuf
-// (2nd part, mark and enqueue)
+// | |
+// `----------'
+// flushptrbuf
+// (find block start, mark and enqueue)
static void
-flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf, uintptr *_nobj, BitTarget *bitbuf)
+flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf, uintptr *_nobj)
{
byte *p, *arena_start, *obj;
uintptr size, *bitp, bits, shift, j, x, xbits, off, nobj, ti, n;
@@ -238,9 +337,8 @@ flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf
Obj *wp;
Workbuf *wbuf;
PtrTarget *ptrbuf_end;
- BitTarget *bitbufpos, *bt;
- arena_start = runtime_mheap.arena_start;
+ arena_start = runtime_mheap->arena_start;
wp = *_wp;
wbuf = *_wbuf;
@@ -250,6 +348,11 @@ flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf
n = ptrbuf_end - ptrbuf;
*ptrbufpos = ptrbuf;
+ if(CollectStats) {
+ runtime_xadd64(&gcstats.ptr.sum, n);
+ runtime_xadd64(&gcstats.ptr.cnt, 1);
+ }
+
// If buffer is nearly full, get a new one.
if(wbuf == nil || nobj+n >= nelem(wbuf->obj)) {
if(wbuf != nil)
@@ -267,8 +370,6 @@ flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf
{
// Multi-threaded version.
- bitbufpos = bitbuf;
-
while(ptrbuf < ptrbuf_end) {
obj = ptrbuf->p;
ti = ptrbuf->ti;
@@ -276,7 +377,7 @@ flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf
// obj belongs to interval [mheap.arena_start, mheap.arena_used).
if(Debug > 1) {
- if(obj < runtime_mheap.arena_start || obj >= runtime_mheap.arena_used)
+ if(obj < runtime_mheap->arena_start || obj >= runtime_mheap->arena_used)
runtime_throw("object is outside of mheap");
}
@@ -319,7 +420,7 @@ flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf
x = k;
if(sizeof(void*) == 8)
x -= (uintptr)arena_start>>PageShift;
- s = runtime_mheap.map[x];
+ s = runtime_mheap->map[x];
if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse)
continue;
p = (byte*)((uintptr)s->start<<PageShift);
@@ -346,40 +447,36 @@ flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf
// Only care about allocated and not marked.
if((bits & (bitAllocated|bitMarked)) != bitAllocated)
continue;
-
- *bitbufpos++ = (BitTarget){obj, ti, bitp, shift};
- }
-
- runtime_lock(&lock);
- for(bt=bitbuf; bt<bitbufpos; bt++){
- xbits = *bt->bitp;
- bits = xbits >> bt->shift;
- if((bits & bitMarked) != 0)
- continue;
-
- // Mark the block
- *bt->bitp = xbits | (bitMarked << bt->shift);
+ if(work.nproc == 1)
+ *bitp |= bitMarked<<shift;
+ else {
+ for(;;) {
+ x = *bitp;
+ if(x & (bitMarked<<shift))
+ goto continue_obj;
+ if(runtime_casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
+ break;
+ }
+ }
// If object has no pointers, don't need to scan further.
if((bits & bitNoPointers) != 0)
continue;
- obj = bt->p;
-
// Ask span about size class.
// (Manually inlined copy of MHeap_Lookup.)
x = (uintptr)obj >> PageShift;
if(sizeof(void*) == 8)
x -= (uintptr)arena_start>>PageShift;
- s = runtime_mheap.map[x];
+ s = runtime_mheap->map[x];
PREFETCH(obj);
- *wp = (Obj){obj, s->elemsize, bt->ti};
+ *wp = (Obj){obj, s->elemsize, ti};
wp++;
nobj++;
+ continue_obj:;
}
- runtime_unlock(&lock);
// If another proc wants a pointer, give it some.
if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) {
@@ -395,9 +492,73 @@ flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf
*_nobj = nobj;
}
+static void
+flushobjbuf(Obj *objbuf, Obj **objbufpos, Obj **_wp, Workbuf **_wbuf, uintptr *_nobj)
+{
+ uintptr nobj, off;
+ Obj *wp, obj;
+ Workbuf *wbuf;
+ Obj *objbuf_end;
+
+ wp = *_wp;
+ wbuf = *_wbuf;
+ nobj = *_nobj;
+
+ objbuf_end = *objbufpos;
+ *objbufpos = objbuf;
+
+ while(objbuf < objbuf_end) {
+ obj = *objbuf++;
+
+ // Align obj.b to a word boundary.
+ off = (uintptr)obj.p & (PtrSize-1);
+ if(off != 0) {
+ obj.p += PtrSize - off;
+ obj.n -= PtrSize - off;
+ obj.ti = 0;
+ }
+
+ if(obj.p == nil || obj.n == 0)
+ continue;
+
+ // If buffer is full, get a new one.
+ if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
+ if(wbuf != nil)
+ wbuf->nobj = nobj;
+ wbuf = getempty(wbuf);
+ wp = wbuf->obj;
+ nobj = 0;
+ }
+
+ *wp = obj;
+ wp++;
+ nobj++;
+ }
+
+ // If another proc wants a pointer, give it some.
+ if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) {
+ wbuf->nobj = nobj;
+ wbuf = handoff(wbuf);
+ nobj = wbuf->nobj;
+ wp = wbuf->obj + nobj;
+ }
+
+ *_wp = wp;
+ *_wbuf = wbuf;
+ *_nobj = nobj;
+}
+
// Program that scans the whole block and treats every block element as a potential pointer
static uintptr defaultProg[2] = {PtrSize, GC_DEFAULT_PTR};
+#if 0
+// Hashmap iterator program
+static uintptr mapProg[2] = {0, GC_MAP_NEXT};
+
+// Hchan program
+static uintptr chanProg[2] = {0, GC_CHAN};
+#endif
+
// Local variables of a program fragment or loop
typedef struct Frame Frame;
struct Frame {
@@ -405,6 +566,61 @@ struct Frame {
uintptr *loop_or_ret;
};
+// Sanity check for the derived type info objti.
+static void
+checkptr(void *obj, uintptr objti)
+{
+ uintptr type, tisize, i, x;
+ byte *objstart;
+ Type *t;
+ MSpan *s;
+
+ if(!Debug)
+ runtime_throw("checkptr is debug only");
+
+ if((byte*)obj < runtime_mheap->arena_start || (byte*)obj >= runtime_mheap->arena_used)
+ return;
+ type = runtime_gettype(obj);
+ t = (Type*)(type & ~(uintptr)(PtrSize-1));
+ if(t == nil)
+ return;
+ x = (uintptr)obj >> PageShift;
+ if(sizeof(void*) == 8)
+ x -= (uintptr)(runtime_mheap->arena_start)>>PageShift;
+ s = runtime_mheap->map[x];
+ objstart = (byte*)((uintptr)s->start<<PageShift);
+ if(s->sizeclass != 0) {
+ i = ((byte*)obj - objstart)/s->elemsize;
+ objstart += i*s->elemsize;
+ }
+ tisize = *(uintptr*)objti;
+ // Sanity check for object size: it should fit into the memory block.
+ if((byte*)obj + tisize > objstart + s->elemsize)
+ runtime_throw("invalid gc type info");
+ if(obj != objstart)
+ return;
+ // If obj points to the beginning of the memory block,
+ // check type info as well.
+ if(t->string == nil ||
+ // Gob allocates unsafe pointers for indirection.
+ (runtime_strcmp((const char *)t->string->str, (const char*)"unsafe.Pointer") &&
+ // Runtime and gc think differently about closures.
+ runtime_strstr((const char *)t->string->str, (const char*)"struct { F uintptr") != (const char *)t->string->str)) {
+#if 0
+ pc1 = (uintptr*)objti;
+ pc2 = (uintptr*)t->gc;
+ // A simple best-effort check until first GC_END.
+ for(j = 1; pc1[j] != GC_END && pc2[j] != GC_END; j++) {
+ if(pc1[j] != pc2[j]) {
+ runtime_printf("invalid gc type info for '%s' at %p, type info %p, block info %p\n",
+ t->string ? (const int8*)t->string->str : (const int8*)"?", j, pc1[j], pc2[j]);
+ runtime_throw("invalid gc type info");
+ }
+ }
+#endif
+ }
+}
+
// scanblock scans a block of n bytes starting at pointer b for references
// to other objects, scanning any it finds recursively until there are no
// unscanned objects left. Instead of using an explicit recursion, it keeps
@@ -419,49 +635,64 @@ static void
scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
{
byte *b, *arena_start, *arena_used;
- uintptr n, i, end_b, elemsize, ti, objti, count /* , type */;
+ uintptr n, i, end_b, elemsize, size, ti, objti, count /* , type */;
uintptr *pc, precise_type, nominal_size;
+#if 0
+ uintptr *map_ret, mapkey_size, mapval_size, mapkey_ti, mapval_ti, *chan_ret, chancap;
+#endif
void *obj;
const Type *t;
Slice *sliceptr;
Frame *stack_ptr, stack_top, stack[GC_STACK_CAPACITY+4];
BufferList *scanbuffers;
PtrTarget *ptrbuf, *ptrbuf_end, *ptrbufpos;
- BitTarget *bitbuf;
+ Obj *objbuf, *objbuf_end, *objbufpos;
Eface *eface;
Iface *iface;
+#if 0
+ Hmap *hmap;
+ MapType *maptype;
+ bool mapkey_kind, mapval_kind;
+ struct hash_gciter map_iter;
+ struct hash_gciter_data d;
+ Hchan *chan;
+ ChanType *chantype;
+#endif
if(sizeof(Workbuf) % PageSize != 0)
runtime_throw("scanblock: size of Workbuf is suboptimal");
// Memory arena parameters.
- arena_start = runtime_mheap.arena_start;
- arena_used = runtime_mheap.arena_used;
+ arena_start = runtime_mheap->arena_start;
+ arena_used = runtime_mheap->arena_used;
stack_ptr = stack+nelem(stack)-1;
precise_type = false;
nominal_size = 0;
- // Allocate ptrbuf, bitbuf
+ // Allocate ptrbuf
{
- runtime_lock(&lock);
-
- if(bufferList == nil) {
- bufferList = runtime_SysAlloc(sizeof(*bufferList));
- bufferList->next = nil;
- }
- scanbuffers = bufferList;
- bufferList = bufferList->next;
-
+ scanbuffers = &bufferList[runtime_m()->helpgc];
ptrbuf = &scanbuffers->ptrtarget[0];
ptrbuf_end = &scanbuffers->ptrtarget[0] + nelem(scanbuffers->ptrtarget);
- bitbuf = &scanbuffers->bittarget[0];
-
- runtime_unlock(&lock);
+ objbuf = &scanbuffers->obj[0];
+ objbuf_end = &scanbuffers->obj[0] + nelem(scanbuffers->obj);
}
ptrbufpos = ptrbuf;
+ objbufpos = objbuf;
+
+ // (Silence the compiler)
+#if 0
+ map_ret = nil;
+ mapkey_size = mapval_size = 0;
+ mapkey_kind = mapval_kind = false;
+ mapkey_ti = mapval_ti = 0;
+ chan = nil;
+ chantype = nil;
+ chan_ret = nil;
+#endif
goto next_block;
@@ -472,7 +703,13 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
runtime_printf("scanblock %p %D\n", b, (int64)n);
}
- if(ti != 0 && 0) {
+ if(CollectStats) {
+ runtime_xadd64(&gcstats.nbytes, n);
+ runtime_xadd64(&gcstats.obj.sum, nobj);
+ runtime_xadd64(&gcstats.obj.cnt, 1);
+ }
+
+ if(ti != 0 && false) {
pc = (uintptr*)(ti & ~(uintptr)PC_BITS);
precise_type = (ti & PRECISE);
stack_top.elemsize = pc[0];
@@ -484,10 +721,27 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
} else {
stack_top.count = 1;
}
- } else if(UseSpanType && 0) {
+ if(Debug) {
+ // Simple sanity check for provided type info ti:
+ // The declared size of the object must be not larger than the actual size
+ // (it can be smaller due to inferior pointers).
+ // It's difficult to make a comprehensive check due to inferior pointers,
+ // reflection, gob, etc.
+ if(pc[0] > n) {
+ runtime_printf("invalid gc type info: type info size %p, block size %p\n", pc[0], n);
+ runtime_throw("invalid gc type info");
+ }
+ }
+ } else if(UseSpanType && false) {
+ if(CollectStats)
+ runtime_xadd64(&gcstats.obj.notype, 1);
+
#if 0
type = runtime_gettype(b);
if(type != 0) {
+ if(CollectStats)
+ runtime_xadd64(&gcstats.obj.typelookup, 1);
+
t = (Type*)(type & ~(uintptr)(PtrSize-1));
switch(type & (PtrSize-1)) {
case TypeInfo_SingleObject:
@@ -506,8 +760,27 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
stack_top.loop_or_ret = pc+1;
break;
case TypeInfo_Map:
- // TODO(atom): to be expanded in a next CL
- pc = defaultProg;
+ hmap = (Hmap*)b;
+ maptype = (MapType*)t;
+ if(hash_gciter_init(hmap, &map_iter)) {
+ mapkey_size = maptype->key->size;
+ mapkey_kind = maptype->key->kind;
+ mapkey_ti = (uintptr)maptype->key->gc | PRECISE;
+ mapval_size = maptype->elem->size;
+ mapval_kind = maptype->elem->kind;
+ mapval_ti = (uintptr)maptype->elem->gc | PRECISE;
+
+ map_ret = nil;
+ pc = mapProg;
+ } else {
+ goto next_block;
+ }
+ break;
+ case TypeInfo_Chan:
+ chan = (Hchan*)b;
+ chantype = (ChanType*)t;
+ chan_ret = nil;
+ pc = chanProg;
break;
default:
runtime_throw("scanblock: invalid type");
@@ -521,12 +794,18 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
pc = defaultProg;
}
+ if(IgnorePreciseGC)
+ pc = defaultProg;
+
pc++;
stack_top.b = (uintptr)b;
end_b = (uintptr)b + n - PtrSize;
for(;;) {
+ if(CollectStats)
+ runtime_xadd64(&gcstats.instr[pc[0]], 1);
+
obj = nil;
objti = 0;
switch(pc[0]) {
@@ -534,13 +813,19 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
obj = *(void**)(stack_top.b + pc[1]);
objti = pc[2];
pc += 3;
+ if(Debug)
+ checkptr(obj, objti);
break;
case GC_SLICE:
sliceptr = (Slice*)(stack_top.b + pc[1]);
if(sliceptr->cap != 0) {
obj = sliceptr->array;
- objti = pc[2] | PRECISE | LOOP;
+ // Can't use slice element type for scanning,
+ // because if it points to an array embedded
+ // in the beginning of a struct,
+ // we will scan the whole struct as the slice.
+ // So just obtain type info from heap.
}
pc += 3;
break;
@@ -552,17 +837,31 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
case GC_STRING:
obj = *(void**)(stack_top.b + pc[1]);
+ markonly(obj);
pc += 2;
- break;
+ continue;
case GC_EFACE:
eface = (Eface*)(stack_top.b + pc[1]);
pc += 2;
- if(eface->type != nil && ((byte*)eface->__object >= arena_start && (byte*)eface->__object < arena_used)) {
- t = eface->type;
+ if(eface->type == nil)
+ continue;
+
+ // eface->type
+ t = eface->type;
+ if((const byte*)t >= arena_start && (const byte*)t < arena_used) {
+ union { const Type *tc; Type *tr; } u;
+ u.tc = t;
+ *ptrbufpos++ = (struct PtrTarget){(void*)u.tr, 0};
+ if(ptrbufpos == ptrbuf_end)
+ flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
+ }
+
+ // eface->__object
+ if((byte*)eface->__object >= arena_start && (byte*)eface->__object < arena_used) {
if(t->__size <= sizeof(void*)) {
if((t->kind & KindNoPointers))
- break;
+ continue;
obj = eface->__object;
if((t->kind & ~KindNoPointers) == KindPtr)
@@ -580,14 +879,14 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
iface = (Iface*)(stack_top.b + pc[1]);
pc += 2;
if(iface->tab == nil)
- break;
+ continue;
// iface->tab
if((byte*)iface->tab >= arena_start && (byte*)iface->tab < arena_used) {
// *ptrbufpos++ = (struct PtrTarget){iface->tab, (uintptr)itabtype->gc};
*ptrbufpos++ = (struct PtrTarget){iface->tab, 0};
if(ptrbufpos == ptrbuf_end)
- flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj, bitbuf);
+ flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
}
// iface->data
@@ -596,7 +895,7 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
t = nil;
if(t->__size <= sizeof(void*)) {
if((t->kind & KindNoPointers))
- break;
+ continue;
obj = iface->__object;
if((t->kind & ~KindNoPointers) == KindPtr)
@@ -611,13 +910,13 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
break;
case GC_DEFAULT_PTR:
- while((i = stack_top.b) <= end_b) {
+ while(stack_top.b <= end_b) {
+ obj = *(byte**)stack_top.b;
stack_top.b += PtrSize;
- obj = *(byte**)i;
if((byte*)obj >= arena_start && (byte*)obj < arena_used) {
*ptrbufpos++ = (struct PtrTarget){obj, 0};
if(ptrbufpos == ptrbuf_end)
- flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj, bitbuf);
+ flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
}
}
goto next_block;
@@ -625,9 +924,8 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
case GC_END:
if(--stack_top.count != 0) {
// Next iteration of a loop if possible.
- elemsize = stack_top.elemsize;
- stack_top.b += elemsize;
- if(stack_top.b + elemsize <= end_b+PtrSize) {
+ stack_top.b += stack_top.elemsize;
+ if(stack_top.b + stack_top.elemsize <= end_b+PtrSize) {
pc = stack_top.loop_or_ret;
continue;
}
@@ -648,6 +946,10 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
// Found a value that may be a pointer.
// Do a rescan of the entire block.
enqueue((Obj){b, n, 0}, &wbuf, &wp, &nobj);
+ if(CollectStats) {
+ runtime_xadd64(&gcstats.rescan, 1);
+ runtime_xadd64(&gcstats.rescanbytes, n);
+ }
break;
}
}
@@ -680,20 +982,136 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
// Stack push.
*stack_ptr-- = stack_top;
stack_top = (Frame){1, 0, stack_top.b + pc[1], pc+3 /*return address*/};
- pc = (uintptr*)pc[2]; // target of the CALL instruction
+ pc = (uintptr*)((byte*)pc + *(int32*)(pc+2)); // target of the CALL instruction
continue;
+#if 0
case GC_MAP_PTR:
- // TODO(atom): to be expanded in a next CL. Same as GC_APTR for now.
- obj = *(void**)(stack_top.b + pc[1]);
- pc += 3;
- break;
+ hmap = *(Hmap**)(stack_top.b + pc[1]);
+ if(hmap == nil) {
+ pc += 3;
+ continue;
+ }
+ if(markonly(hmap)) {
+ maptype = (MapType*)pc[2];
+ if(hash_gciter_init(hmap, &map_iter)) {
+ mapkey_size = maptype->key->size;
+ mapkey_kind = maptype->key->kind;
+ mapkey_ti = (uintptr)maptype->key->gc | PRECISE;
+ mapval_size = maptype->elem->size;
+ mapval_kind = maptype->elem->kind;
+ mapval_ti = (uintptr)maptype->elem->gc | PRECISE;
+
+ // Start mapProg.
+ map_ret = pc+3;
+ pc = mapProg+1;
+ } else {
+ pc += 3;
+ }
+ } else {
+ pc += 3;
+ }
+ continue;
+
+ case GC_MAP_NEXT:
+ // Add all keys and values to buffers, mark all subtables.
+ while(hash_gciter_next(&map_iter, &d)) {
+ // buffers: reserve space for 2 objects.
+ if(ptrbufpos+2 >= ptrbuf_end)
+ flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
+ if(objbufpos+2 >= objbuf_end)
+ flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj);
+
+ if(d.st != nil)
+ markonly(d.st);
+
+ if(d.key_data != nil) {
+ if(!(mapkey_kind & KindNoPointers) || d.indirectkey) {
+ if(!d.indirectkey)
+ *objbufpos++ = (Obj){d.key_data, mapkey_size, mapkey_ti};
+ else {
+ if(Debug) {
+ obj = *(void**)d.key_data;
+ if(!(arena_start <= obj && obj < arena_used))
+ runtime_throw("scanblock: inconsistent hashmap");
+ }
+ *ptrbufpos++ = (struct PtrTarget){*(void**)d.key_data, mapkey_ti};
+ }
+ }
+ if(!(mapval_kind & KindNoPointers) || d.indirectval) {
+ if(!d.indirectval)
+ *objbufpos++ = (Obj){d.val_data, mapval_size, mapval_ti};
+ else {
+ if(Debug) {
+ obj = *(void**)d.val_data;
+ if(!(arena_start <= obj && obj < arena_used))
+ runtime_throw("scanblock: inconsistent hashmap");
+ }
+ *ptrbufpos++ = (struct PtrTarget){*(void**)d.val_data, mapval_ti};
+ }
+ }
+ }
+ }
+ if(map_ret == nil)
+ goto next_block;
+ pc = map_ret;
+ continue;
+#endif
case GC_REGION:
- // TODO(atom): to be expanded in a next CL. Same as GC_APTR for now.
obj = (void*)(stack_top.b + pc[1]);
+ size = pc[2];
+ objti = pc[3];
pc += 4;
- break;
+
+ *objbufpos++ = (Obj){obj, size, objti};
+ if(objbufpos == objbuf_end)
+ flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj);
+ continue;
+
+#if 0
+ case GC_CHAN_PTR:
+ // Similar to GC_MAP_PTR
+ chan = *(Hchan**)(stack_top.b + pc[1]);
+ if(chan == nil) {
+ pc += 3;
+ continue;
+ }
+ if(markonly(chan)) {
+ chantype = (ChanType*)pc[2];
+ if(!(chantype->elem->kind & KindNoPointers)) {
+ // Start chanProg.
+ chan_ret = pc+3;
+ pc = chanProg+1;
+ continue;
+ }
+ }
+ pc += 3;
+ continue;
+
+ case GC_CHAN:
+ // There are no heap pointers in struct Hchan,
+ // so we can ignore the leading sizeof(Hchan) bytes.
+ if(!(chantype->elem->kind & KindNoPointers)) {
+ // Channel's buffer follows Hchan immediately in memory.
+ // Size of buffer (cap(c)) is second int in the chan struct.
+ chancap = ((uintgo*)chan)[1];
+ if(chancap > 0) {
+ // TODO(atom): split into two chunks so that only the
+ // in-use part of the circular buffer is scanned.
+ // (Channel routines zero the unused part, so the current
+ // code does not lead to leaks, it's just a little inefficient.)
+ *objbufpos++ = (Obj){(byte*)chan+runtime_Hchansize, chancap*chantype->elem->size,
+ (uintptr)chantype->elem->gc | PRECISE | LOOP};
+ if(objbufpos == objbuf_end)
+ flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj);
+ }
+ }
+ if(chan_ret == nil)
+ goto next_block;
+ pc = chan_ret;
+ continue;
+#endif
default:
runtime_throw("scanblock: invalid GC instruction");
@@ -701,9 +1119,9 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
}
if((byte*)obj >= arena_start && (byte*)obj < arena_used) {
- *ptrbufpos++ = (PtrTarget){obj, objti};
+ *ptrbufpos++ = (struct PtrTarget){obj, objti};
if(ptrbufpos == ptrbuf_end)
- flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj, bitbuf);
+ flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
}
}
@@ -712,7 +1130,8 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
// the loop by setting b, n, ti to the parameters for the next block.
if(nobj == 0) {
- flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj, bitbuf);
+ flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
+ flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj);
if(nobj == 0) {
if(!keepworking) {
@@ -737,11 +1156,7 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
nobj--;
}
-endscan:
- runtime_lock(&lock);
- scanbuffers->next = bufferList;
- bufferList = scanbuffers;
- runtime_unlock(&lock);
+endscan:;
}
// debug_scanblock is the debug copy of scanblock.
@@ -776,14 +1191,14 @@ debug_scanblock(byte *b, uintptr n)
obj = (byte*)vp[i];
// Words outside the arena cannot be pointers.
- if((byte*)obj < runtime_mheap.arena_start || (byte*)obj >= runtime_mheap.arena_used)
+ if((byte*)obj < runtime_mheap->arena_start || (byte*)obj >= runtime_mheap->arena_used)
continue;
// Round down to word boundary.
obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
// Consult span table to find beginning.
- s = runtime_MHeap_LookupMaybe(&runtime_mheap, obj);
+ s = runtime_MHeap_LookupMaybe(runtime_mheap, obj);
if(s == nil)
continue;
@@ -799,8 +1214,8 @@ debug_scanblock(byte *b, uintptr n)
}
// Now that we know the object header, reload bits.
- off = (uintptr*)obj - (uintptr*)runtime_mheap.arena_start;
- bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
+ off = (uintptr*)obj - (uintptr*)runtime_mheap->arena_start;
+ bitp = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
xbits = *bitp;
bits = xbits >> shift;
@@ -906,6 +1321,8 @@ getempty(Workbuf *b)
if(work.nchunk < sizeof *b) {
work.nchunk = 1<<20;
work.chunk = runtime_SysAlloc(work.nchunk);
+ if(work.chunk == nil)
+ runtime_throw("runtime: cannot allocate memory");
}
b = (Workbuf*)work.chunk;
work.chunk += sizeof *b;
@@ -919,6 +1336,9 @@ getempty(Workbuf *b)
static void
putempty(Workbuf *b)
{
+ if(CollectStats)
+ runtime_xadd64(&gcstats.putempty, 1);
+
runtime_lfstackpush(&work.empty, &b->node);
}
@@ -929,6 +1349,9 @@ getfull(Workbuf *b)
M *m;
int32 i;
+ if(CollectStats)
+ runtime_xadd64(&gcstats.getfull, 1);
+
if(b != nil)
runtime_lfstackpush(&work.empty, &b->node);
b = (Workbuf*)runtime_lfstackpop(&work.full);
@@ -994,6 +1417,8 @@ addroot(Obj obj)
if(cap < 2*work.rootcap)
cap = 2*work.rootcap;
new = (Obj*)runtime_SysAlloc(cap*sizeof(Obj));
+ if(new == nil)
+ runtime_throw("runtime: cannot allocate memory");
if(work.roots != nil) {
runtime_memmove(new, work.roots, work.rootcap*sizeof(Obj));
runtime_SysFree(work.roots, work.rootcap*sizeof(Obj));
@@ -1081,13 +1506,14 @@ static void
addfinroots(void *v)
{
uintptr size;
+ void *base;
size = 0;
- if(!runtime_mlookup(v, (byte**)&v, &size, nil) || !runtime_blockspecial(v))
+ if(!runtime_mlookup(v, (byte**)&base, &size, nil) || !runtime_blockspecial(base))
runtime_throw("mark - finalizer inconsistency");
// do not mark the finalizer block itself. just mark the things it points at.
- addroot((Obj){v, size, 0});
+ addroot((Obj){base, size, 0});
}
static struct root_list* roots;
@@ -1128,22 +1554,27 @@ addroots(void)
addroot((Obj){(byte*)&runtime_g0, sizeof runtime_g0, 0});
addroot((Obj){(byte*)&runtime_allg, sizeof runtime_allg, 0});
addroot((Obj){(byte*)&runtime_allm, sizeof runtime_allm, 0});
+ addroot((Obj){(byte*)&runtime_allp, sizeof runtime_allp, 0});
+ runtime_proc_scan(addroot);
runtime_MProf_Mark(addroot);
runtime_time_scan(addroot);
// MSpan.types
- allspans = runtime_mheap.allspans;
- for(spanidx=0; spanidx<runtime_mheap.nspan; spanidx++) {
+ allspans = runtime_mheap->allspans;
+ for(spanidx=0; spanidx<runtime_mheap->nspan; spanidx++) {
s = allspans[spanidx];
if(s->state == MSpanInUse) {
+ // The garbage collector ignores type pointers stored in MSpan.types:
+ // - Compiler-generated types are stored outside of heap.
+ // - The reflect package has runtime-generated types cached in its data structures.
+ // The garbage collector relies on finding the references via that cache.
switch(s->types.compression) {
case MTypes_Empty:
case MTypes_Single:
break;
case MTypes_Words:
case MTypes_Bytes:
- // TODO(atom): consider using defaultProg instead of 0
- addroot((Obj){(byte*)&s->types.data, sizeof(void*), 0});
+ markonly((byte*)s->types.data);
break;
}
}
@@ -1196,6 +1627,8 @@ handlespecial(byte *p, uintptr size)
if(finq == nil || finq->cnt == finq->cap) {
if(finc == nil) {
finc = runtime_SysAlloc(PageSize);
+ if(finc == nil)
+ runtime_throw("runtime: cannot allocate memory");
finc->cap = (PageSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1;
finc->alllink = allfin;
allfin = finc;
@@ -1235,10 +1668,10 @@ sweepspan(ParFor *desc, uint32 idx)
m = runtime_m();
USED(&desc);
- s = runtime_mheap.allspans[idx];
+ s = runtime_mheap->allspans[idx];
if(s->state != MSpanInUse)
return;
- arena_start = runtime_mheap.arena_start;
+ arena_start = runtime_mheap->arena_start;
p = (byte*)(s->start << PageShift);
cl = s->sizeclass;
size = s->elemsize;
@@ -1301,8 +1734,8 @@ sweepspan(ParFor *desc, uint32 idx)
if(cl == 0) {
// Free large span.
runtime_unmarkspan(p, 1<<PageShift);
- *(uintptr*)p = 1; // needs zeroing
- runtime_MHeap_Free(&runtime_mheap, s, 1);
+ *(uintptr*)p = (uintptr)0xdeaddeaddeaddeadll; // needs zeroing
+ runtime_MHeap_Free(runtime_mheap, s, 1);
c->local_alloc -= size;
c->local_nfree++;
} else {
@@ -1316,7 +1749,7 @@ sweepspan(ParFor *desc, uint32 idx)
break;
}
if(size > sizeof(uintptr))
- ((uintptr*)p)[1] = 1; // mark as "needs to be zeroed"
+ ((uintptr*)p)[1] = (uintptr)0xdeaddeaddeaddeadll; // mark as "needs to be zeroed"
end->next = (MLink*)p;
end = (MLink*)p;
@@ -1330,7 +1763,7 @@ sweepspan(ParFor *desc, uint32 idx)
c->local_nfree += nfree;
c->local_cachealloc -= nfree * size;
c->local_objects -= nfree;
- runtime_MCentral_FreeSpan(&runtime_mheap.central[cl], s, nfree, head.next, end);
+ runtime_MCentral_FreeSpan(&runtime_mheap->central[cl], s, nfree, head.next, end);
}
}
@@ -1344,10 +1777,10 @@ dumpspan(uint32 idx)
MSpan *s;
bool allocated, special;
- s = runtime_mheap.allspans[idx];
+ s = runtime_mheap->allspans[idx];
if(s->state != MSpanInUse)
return;
- arena_start = runtime_mheap.arena_start;
+ arena_start = runtime_mheap->arena_start;
p = (byte*)(s->start << PageShift);
sizeclass = s->sizeclass;
size = s->elemsize;
@@ -1405,7 +1838,7 @@ runtime_memorydump(void)
{
uint32 spanidx;
- for(spanidx=0; spanidx<runtime_mheap.nspan; spanidx++) {
+ for(spanidx=0; spanidx<runtime_mheap->nspan; spanidx++) {
dumpspan(spanidx);
}
}
@@ -1413,6 +1846,8 @@ runtime_memorydump(void)
void
runtime_gchelper(void)
{
+ gchelperstart();
+
// parallel mark for over gc roots
runtime_parfordo(work.markfor);
@@ -1426,10 +1861,13 @@ runtime_gchelper(void)
}
runtime_parfordo(work.sweepfor);
+ bufferList[runtime_m()->helpgc].busy = 0;
if(runtime_xadd(&work.ndone, +1) == work.nproc-1)
runtime_notewakeup(&work.alldone);
}
+#define GcpercentUnknown (-2)
+
// Initialized from $GOGC. GOGC=off means no gc.
//
// Next gc is after we've allocated an extra amount of
@@ -1439,22 +1877,14 @@ runtime_gchelper(void)
// proportion to the allocation cost. Adjusting gcpercent
// just changes the linear constant (and also the amount of
// extra memory used).
-static int32 gcpercent = -2;
-
-static void
-stealcache(void)
-{
- M *mp;
-
- for(mp=runtime_allm; mp; mp=mp->alllink)
- runtime_MCache_ReleaseAll(mp->mcache);
-}
+static int32 gcpercent = GcpercentUnknown;
static void
cachestats(GCStats *stats)
{
M *mp;
MCache *c;
+ P *p, **pp;
uint32 i;
uint64 stacks_inuse;
uint64 *src, *dst;
@@ -1463,9 +1893,7 @@ cachestats(GCStats *stats)
runtime_memclr((byte*)stats, sizeof(*stats));
stacks_inuse = 0;
for(mp=runtime_allm; mp; mp=mp->alllink) {
- c = mp->mcache;
- runtime_purgecachedstats(c);
- // stacks_inuse += mp->stackinuse*FixedStack;
+ //stacks_inuse += mp->stackinuse*FixedStack;
if(stats) {
src = (uint64*)&mp->gcstats;
dst = (uint64*)stats;
@@ -1473,6 +1901,12 @@ cachestats(GCStats *stats)
dst[i] += src[i];
runtime_memclr((byte*)&mp->gcstats, sizeof(mp->gcstats));
}
+ }
+ for(pp=runtime_allp; (p=*pp) != nil; pp++) {
+ c = p->mcache;
+ if(c==nil)
+ continue;
+ runtime_purgecachedstats(c);
for(i=0; i<nelem(c->local_by_size); i++) {
mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc;
c->local_by_size[i].nmalloc = 0;
@@ -1492,6 +1926,19 @@ struct gc_args
static void gc(struct gc_args *args);
+static int32
+readgogc(void)
+{
+ const byte *p;
+
+ p = runtime_getenv("GOGC");
+ if(p == nil || p[0] == '\0')
+ return 100;
+ if(runtime_strcmp((const char *)p, "off") == 0)
+ return -1;
+ return runtime_atoi(p);
+}
+
void
runtime_gc(int32 force)
{
@@ -1504,6 +1951,8 @@ runtime_gc(int32 force)
// a problem in the past.
if((((uintptr)&work.empty) & 7) != 0)
runtime_throw("runtime: gc work buffer is misaligned");
+ if((((uintptr)&work.full) & 7) != 0)
+ runtime_throw("runtime: gc work buffer is misaligned");
// Make sure all registers are saved on stack so that
// scanstack sees them.
@@ -1521,14 +1970,8 @@ runtime_gc(int32 force)
if(!mstats.enablegc || m->locks > 0 || runtime_panicking)
return;
- if(gcpercent == -2) { // first time through
- p = runtime_getenv("GOGC");
- if(p == nil || p[0] == '\0')
- gcpercent = 100;
- else if(runtime_strcmp((const char*)p, "off") == 0)
- gcpercent = -1;
- else
- gcpercent = runtime_atoi(p);
+ if(gcpercent == GcpercentUnknown) { // first time through
+ gcpercent = readgogc();
p = runtime_getenv("GOGCTRACE");
if(p != nil)
@@ -1555,7 +1998,7 @@ gc(struct gc_args *args)
{
M *m;
int64 t0, t1, t2, t3, t4;
- uint64 heap0, heap1, obj0, obj1;
+ uint64 heap0, heap1, obj0, obj1, ninstr;
GCStats stats;
M *mp;
uint32 i;
@@ -1574,6 +2017,9 @@ gc(struct gc_args *args)
m->gcing = 1;
runtime_stoptheworld();
+ if(CollectStats)
+ runtime_memclr((byte*)&gcstats, sizeof(gcstats));
+
for(mp=runtime_allm; mp; mp=mp->alllink)
runtime_settype_flush(mp, false);
@@ -1604,7 +2050,7 @@ gc(struct gc_args *args)
work.nproc = runtime_gcprocs();
addroots();
runtime_parforsetup(work.markfor, work.nproc, work.nroot, nil, false, markroot);
- runtime_parforsetup(work.sweepfor, work.nproc, runtime_mheap.nspan, nil, true, sweepspan);
+ runtime_parforsetup(work.sweepfor, work.nproc, runtime_mheap->nspan, nil, true, sweepspan);
if(work.nproc > 1) {
runtime_noteclear(&work.alldone);
runtime_helpgc(work.nproc);
@@ -1612,6 +2058,7 @@ gc(struct gc_args *args)
t1 = runtime_nanotime();
+ gchelperstart();
runtime_parfordo(work.markfor);
scanblock(nil, nil, 0, true);
@@ -1623,14 +2070,14 @@ gc(struct gc_args *args)
t2 = runtime_nanotime();
runtime_parfordo(work.sweepfor);
+ bufferList[m->helpgc].busy = 0;
t3 = runtime_nanotime();
- stealcache();
- cachestats(&stats);
-
if(work.nproc > 1)
runtime_notesleep(&work.alldone);
+ cachestats(&stats);
+
stats.nprocyield += work.sweepfor->nprocyield;
stats.nosyield += work.sweepfor->nosyield;
stats.nsleep += work.sweepfor->nsleep;
@@ -1670,6 +2117,27 @@ gc(struct gc_args *args)
stats.nhandoff, stats.nhandoffcnt,
work.sweepfor->nsteal, work.sweepfor->nstealcnt,
stats.nprocyield, stats.nosyield, stats.nsleep);
+ if(CollectStats) {
+ runtime_printf("scan: %D bytes, %D objects, %D untyped, %D types from MSpan\n",
+ gcstats.nbytes, gcstats.obj.cnt, gcstats.obj.notype, gcstats.obj.typelookup);
+ if(gcstats.ptr.cnt != 0)
+ runtime_printf("avg ptrbufsize: %D (%D/%D)\n",
+ gcstats.ptr.sum/gcstats.ptr.cnt, gcstats.ptr.sum, gcstats.ptr.cnt);
+ if(gcstats.obj.cnt != 0)
+ runtime_printf("avg nobj: %D (%D/%D)\n",
+ gcstats.obj.sum/gcstats.obj.cnt, gcstats.obj.sum, gcstats.obj.cnt);
+ runtime_printf("rescans: %D, %D bytes\n", gcstats.rescan, gcstats.rescanbytes);
+
+ runtime_printf("instruction counts:\n");
+ ninstr = 0;
+ for(i=0; i<nelem(gcstats.instr); i++) {
+ runtime_printf("\t%d:\t%D\n", i, gcstats.instr[i]);
+ ninstr += gcstats.instr[i];
+ }
+ runtime_printf("\ttotal:\t%D\n", ninstr);
+
+ runtime_printf("putempty: %D, getfull: %D\n", gcstats.putempty, gcstats.getfull);
+ }
}
runtime_MProf_GC();
@@ -1704,6 +2172,71 @@ runtime_ReadMemStats(MStats *stats)
runtime_starttheworld();
}
+void runtime_debug_readGCStats(Slice*)
+ __asm__("runtime_debug.readGCStats");
+
+void
+runtime_debug_readGCStats(Slice *pauses)
+{
+ uint64 *p;
+ uint32 i, n;
+
+ // Calling code in runtime/debug should make the slice large enough.
+ if((size_t)pauses->cap < nelem(mstats.pause_ns)+3)
+ runtime_throw("runtime: short slice passed to readGCStats");
+
+ // Pass back: pauses, last gc (absolute time), number of gc, total pause ns.
+ p = (uint64*)pauses->array;
+ runtime_lock(runtime_mheap);
+ n = mstats.numgc;
+ if(n > nelem(mstats.pause_ns))
+ n = nelem(mstats.pause_ns);
+
+ // The pause buffer is circular. The most recent pause is at
+ // pause_ns[(numgc-1)%nelem(pause_ns)], and then backward
+ // from there to go back farther in time. We deliver the times
+ // most recent first (in p[0]).
+ for(i=0; i<n; i++)
+ p[i] = mstats.pause_ns[(mstats.numgc-1-i)%nelem(mstats.pause_ns)];
+
+ p[n] = mstats.last_gc;
+ p[n+1] = mstats.numgc;
+ p[n+2] = mstats.pause_total_ns;
+ runtime_unlock(runtime_mheap);
+ pauses->__count = n+3;
+}
+
+intgo runtime_debug_setGCPercent(intgo)
+ __asm__("runtime_debug.setGCPercent");
+
+intgo
+runtime_debug_setGCPercent(intgo in)
+{
+ intgo out;
+
+ runtime_lock(runtime_mheap);
+ if(gcpercent == GcpercentUnknown)
+ gcpercent = readgogc();
+ out = gcpercent;
+ if(in < 0)
+ in = -1;
+ gcpercent = in;
+ runtime_unlock(runtime_mheap);
+ return out;
+}
+
+static void
+gchelperstart(void)
+{
+ M *m;
+
+ m = runtime_m();
+ if(m->helpgc < 0 || m->helpgc >= MaxGcproc)
+ runtime_throw("gchelperstart: bad m->helpgc");
+ if(runtime_xchg(&bufferList[m->helpgc].busy, 1))
+ runtime_throw("gchelperstart: already busy");
+}
+
static void
runfinq(void* dummy __attribute__ ((unused)))
{
@@ -1757,11 +2290,11 @@ runtime_markallocated(void *v, uintptr n, bool noptr)
if(0)
runtime_printf("markallocated %p+%p\n", v, n);
- if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
+ if((byte*)v+n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start)
runtime_throw("markallocated: bad pointer");
- off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
- b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
+ off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start; // word offset
+ b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
for(;;) {
@@ -1789,11 +2322,11 @@ runtime_markfreed(void *v, uintptr n)
if(0)
runtime_printf("markallocated %p+%p\n", v, n);
- if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
+ if((byte*)v+n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start)
runtime_throw("markallocated: bad pointer");
- off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
- b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
+ off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start; // word offset
+ b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
for(;;) {
@@ -1819,11 +2352,11 @@ runtime_checkfreed(void *v, uintptr n)
if(!runtime_checking)
return;
- if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
+ if((byte*)v+n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start)
return; // not allocated, so okay
- off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
- b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
+ off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start; // word offset
+ b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
bits = *b>>shift;
@@ -1842,7 +2375,7 @@ runtime_markspan(void *v, uintptr size, uintptr n, bool leftover)
uintptr *b, off, shift;
byte *p;
- if((byte*)v+size*n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
+ if((byte*)v+size*n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start)
runtime_throw("markspan: bad pointer");
p = v;
@@ -1853,8 +2386,8 @@ runtime_markspan(void *v, uintptr size, uintptr n, bool leftover)
// the entire span, and each bitmap word has bits for only
// one span, so no other goroutines are changing these
// bitmap words.
- off = (uintptr*)p - (uintptr*)runtime_mheap.arena_start; // word offset
- b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
+ off = (uintptr*)p - (uintptr*)runtime_mheap->arena_start; // word offset
+ b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
*b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
}
@@ -1866,14 +2399,14 @@ runtime_unmarkspan(void *v, uintptr n)
{
uintptr *p, *b, off;
- if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
+ if((byte*)v+n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start)
runtime_throw("markspan: bad pointer");
p = v;
- off = p - (uintptr*)runtime_mheap.arena_start; // word offset
+ off = p - (uintptr*)runtime_mheap->arena_start; // word offset
if(off % wordsPerBitmapWord != 0)
runtime_throw("markspan: unaligned pointer");
- b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
+ b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
n /= PtrSize;
if(n%wordsPerBitmapWord != 0)
runtime_throw("unmarkspan: unaligned length");
@@ -1894,8 +2427,8 @@ runtime_blockspecial(void *v)
if(DebugMark)
return true;
- off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;
- b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
+ off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start;
+ b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
return (*b & (bitSpecial<<shift)) != 0;
@@ -1909,8 +2442,8 @@ runtime_setblockspecial(void *v, bool s)
if(DebugMark)
return;
- off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;
- b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
+ off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start;
+ b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
for(;;) {
diff --git a/libgo/runtime/mgc0.h b/libgo/runtime/mgc0.h
index a2798ef..d14fb37 100644
--- a/libgo/runtime/mgc0.h
+++ b/libgo/runtime/mgc0.h
@@ -12,17 +12,22 @@
// Meaning of arguments:
// off Offset (in bytes) from the start of the current object
// objgc Pointer to GC info of an object
+// objgcrel Offset to GC info of an object
// len Length of an array
// elemsize Size (in bytes) of an element
// size Size (in bytes)
+//
+// NOTE: There is a copy of these in ../reflect/type.go.
+// They must be kept in sync.
enum {
GC_END, // End of object, loop or subroutine. Args: none
GC_PTR, // A typed pointer. Args: (off, objgc)
GC_APTR, // Pointer to an arbitrary object. Args: (off)
GC_ARRAY_START, // Start an array with a fixed length. Args: (off, len, elemsize)
GC_ARRAY_NEXT, // The next element of an array. Args: none
- GC_CALL, // Call a subroutine. Args: (off, objgc)
+ GC_CALL, // Call a subroutine. Args: (off, objgcrel)
GC_MAP_PTR, // Go map. Args: (off, MapType*)
+ GC_CHAN_PTR, // Go channel. Args: (off, ChanType*)
GC_STRING, // Go string. Args: (off)
GC_EFACE, // interface{}. Args: (off)
GC_IFACE, // interface{...}. Args: (off)
diff --git a/libgo/runtime/mheap.c b/libgo/runtime/mheap.c
index 6636b01..b4d94b6 100644
--- a/libgo/runtime/mheap.c
+++ b/libgo/runtime/mheap.c
@@ -37,6 +37,8 @@ RecordSpan(void *vh, byte *p)
if(cap < h->nspancap*3/2)
cap = h->nspancap*3/2;
all = (MSpan**)runtime_SysAlloc(cap*sizeof(all[0]));
+ if(all == nil)
+ runtime_throw("runtime: cannot allocate memory");
if(h->allspans) {
runtime_memmove(all, h->allspans, h->nspancap*sizeof(all[0]));
runtime_SysFree(h->allspans, h->nspancap*sizeof(all[0]));
@@ -119,6 +121,25 @@ HaveSpan:
s->state = MSpanInUse;
mstats.heap_idle -= s->npages<<PageShift;
mstats.heap_released -= s->npreleased<<PageShift;
+ if(s->npreleased > 0) {
+ // We have called runtime_SysUnused with these pages, and on
+ // Unix systems it called madvise. At this point at least
+ // some BSD-based kernels will return these pages either as
+ // zeros or with the old data. For our caller, the first word
+ // in the page indicates whether the span contains zeros or
+ // not (this word was set when the span was freed by
+ // MCentral_Free or runtime_MCentral_FreeSpan). If the first
+ // page in the span is returned as zeros, and some subsequent
+ // page is returned with the old data, then we will be
+ // returning a span that is assumed to be all zeros, but the
+ // actual data will not be all zeros. Avoid that problem by
+ // explicitly marking the span as not being zeroed, just in
+ // case. The beadbead constant we use here means nothing, it
+ // is just a unique constant not seen elsewhere in the
+ // runtime, as a clue in case it turns up unexpectedly in
+ // memory or in a stack trace.
+ *(uintptr*)(s->start<<PageShift) = (uintptr)0xbeadbeadbeadbeadULL;
+ }
s->npreleased = 0;
if(s->npages > npage) {
@@ -356,23 +377,64 @@ forcegchelper(void *vnote)
runtime_notewakeup(note);
}
+static uintptr
+scavengelist(MSpan *list, uint64 now, uint64 limit)
+{
+ uintptr released, sumreleased;
+ MSpan *s;
+
+ if(runtime_MSpanList_IsEmpty(list))
+ return 0;
+
+ sumreleased = 0;
+ for(s=list->next; s != list; s=s->next) {
+ if((now - s->unusedsince) > limit) {
+ released = (s->npages - s->npreleased) << PageShift;
+ mstats.heap_released += released;
+ sumreleased += released;
+ s->npreleased = s->npages;
+ runtime_SysUnused((void*)(s->start << PageShift), s->npages << PageShift);
+ }
+ }
+ return sumreleased;
+}
+
+static uintptr
+scavenge(uint64 now, uint64 limit)
+{
+ uint32 i;
+ uintptr sumreleased;
+ MHeap *h;
+
+ h = runtime_mheap;
+ sumreleased = 0;
+ for(i=0; i < nelem(h->free); i++)
+ sumreleased += scavengelist(&h->free[i], now, limit);
+ sumreleased += scavengelist(&h->large, now, limit);
+ return sumreleased;
+}
+
// Release (part of) unused memory to OS.
// Goroutine created at startup.
// Loop forever.
void
runtime_MHeap_Scavenger(void* dummy)
{
+ G *g;
MHeap *h;
- MSpan *s, *list;
uint64 tick, now, forcegc, limit;
- uint32 k, i;
- uintptr released, sumreleased;
+ uint32 k;
+ uintptr sumreleased;
const byte *env;
bool trace;
Note note, *notep;
USED(dummy);
+ g = runtime_g();
+ g->issystem = true;
+ g->isbackground = true;
+
// If we go two minutes without a garbage collection, force one to run.
forcegc = 2*60*1e9;
// If a span goes unused for 5 minutes after a garbage collection,
@@ -389,10 +451,10 @@ runtime_MHeap_Scavenger(void* dummy)
if(env != nil)
trace = runtime_atoi(env) > 0;
- h = &runtime_mheap;
+ h = runtime_mheap;
for(k=0;; k++) {
runtime_noteclear(&note);
- runtime_entersyscall();
+ runtime_entersyscallblock();
runtime_notetsleep(&note, tick);
runtime_exitsyscall();
@@ -406,7 +468,7 @@ runtime_MHeap_Scavenger(void* dummy)
runtime_noteclear(&note);
notep = &note;
__go_go(forcegchelper, (void*)notep);
- runtime_entersyscall();
+ runtime_entersyscallblock();
runtime_notesleep(&note);
runtime_exitsyscall();
if(trace)
@@ -414,24 +476,7 @@ runtime_MHeap_Scavenger(void* dummy)
runtime_lock(h);
now = runtime_nanotime();
}
- sumreleased = 0;
- for(i=0; i < nelem(h->free)+1; i++) {
- if(i < nelem(h->free))
- list = &h->free[i];
- else
- list = &h->large;
- if(runtime_MSpanList_IsEmpty(list))
- continue;
- for(s=list->next; s != list; s=s->next) {
- if((now - s->unusedsince) > limit) {
- released = (s->npages - s->npreleased) << PageShift;
- mstats.heap_released += released;
- sumreleased += released;
- s->npreleased = s->npages;
- runtime_SysUnused((void*)(s->start << PageShift), s->npages << PageShift);
- }
- }
- }
+ sumreleased = scavenge(now, limit);
runtime_unlock(h);
if(trace) {
@@ -444,6 +489,17 @@ runtime_MHeap_Scavenger(void* dummy)
}
}
+void runtime_debug_freeOSMemory(void) __asm__("runtime_debug.freeOSMemory");
+
+void
+runtime_debug_freeOSMemory(void)
+{
+ runtime_gc(1);
+ runtime_lock(runtime_mheap);
+ scavenge(~(uintptr)0, 0);
+ runtime_unlock(runtime_mheap);
+}
+
// Initialize a new span with the given start and npages.
void
runtime_MSpan_Init(MSpan *span, PageID start, uintptr npages)
diff --git a/libgo/runtime/mprof.goc b/libgo/runtime/mprof.goc
index c1b09be..73d9379 100644
--- a/libgo/runtime/mprof.goc
+++ b/libgo/runtime/mprof.goc
@@ -14,7 +14,43 @@ package runtime
#include "go-string.h"
// NOTE(rsc): Everything here could use cas if contention became an issue.
-static Lock proflock;
+static Lock proflock, alloclock;
+
+// All memory allocations are local and do not escape outside of the profiler.
+// The profiler is forbidden from referring to garbage-collected memory.
+
+static byte *pool; // memory allocation pool
+static uintptr poolfree; // number of bytes left in the pool
+enum {
+ Chunk = 32*PageSize, // initial size of the pool
+};
+
+// Memory allocation local to this file.
+// There is no way to return the allocated memory back to the OS.
+static void*
+allocate(uintptr size)
+{
+ void *v;
+
+ if(size == 0)
+ return nil;
+
+ if(size >= Chunk/2)
+ return runtime_SysAlloc(size);
+
+ runtime_lock(&alloclock);
+ if(size > poolfree) {
+ pool = runtime_SysAlloc(Chunk);
+ if(pool == nil)
+ runtime_throw("runtime: cannot allocate memory");
+ poolfree = Chunk;
+ }
+ v = pool;
+ pool += size;
+ poolfree -= size;
+ runtime_unlock(&alloclock);
+ return v;
+}
enum { MProf, BProf }; // profile types
@@ -26,6 +62,8 @@ struct Bucket
Bucket *next; // next in hash list
Bucket *allnext; // next in list of all mbuckets/bbuckets
int32 typ;
+ // Generally unions can break precise GC,
+ // this one is fine because it does not contain pointers.
union
{
struct // typ == MProf
@@ -67,6 +105,8 @@ stkbucket(int32 typ, Location *stk, int32 nstk, bool alloc)
if(buckhash == nil) {
buckhash = runtime_SysAlloc(BuckHashSize*sizeof buckhash[0]);
+ if(buckhash == nil)
+ runtime_throw("runtime: cannot allocate memory");
mstats.buckhash_sys += BuckHashSize*sizeof buckhash[0];
}
@@ -97,7 +137,9 @@ stkbucket(int32 typ, Location *stk, int32 nstk, bool alloc)
if(!alloc)
return nil;
- b = runtime_mallocgc(sizeof *b + nstk*sizeof stk[0], FlagNoProfiling, 0, 1);
+ b = allocate(sizeof *b + nstk*sizeof stk[0]);
+ if(b == nil)
+ runtime_throw("runtime: cannot allocate memory");
bucketmem += sizeof *b + nstk*sizeof stk[0];
runtime_memmove(b->stk, stk, nstk*sizeof stk[0]);
b->typ = typ;
@@ -115,13 +157,11 @@ stkbucket(int32 typ, Location *stk, int32 nstk, bool alloc)
return b;
}
-// Record that a gc just happened: all the 'recent' statistics are now real.
-void
-runtime_MProf_GC(void)
+static void
+MProf_GC(void)
{
Bucket *b;
-
- runtime_lock(&proflock);
+
for(b=mbuckets; b; b=b->allnext) {
b->allocs += b->recent_allocs;
b->frees += b->recent_frees;
@@ -132,6 +172,14 @@ runtime_MProf_GC(void)
b->recent_alloc_bytes = 0;
b->recent_free_bytes = 0;
}
+}
+
+// Record that a gc just happened: all the 'recent' statistics are now real.
+void
+runtime_MProf_GC(void)
+{
+ runtime_lock(&proflock);
+ MProf_GC();
runtime_unlock(&proflock);
}
@@ -166,7 +214,7 @@ struct AddrEntry
Bucket *b;
};
-static AddrHash *addrhash[1<<AddrHashBits];
+static AddrHash **addrhash; // points to (AddrHash*)[1<<AddrHashBits]
static AddrEntry *addrfree;
static uintptr addrmem;
@@ -193,7 +241,7 @@ setaddrbucket(uintptr addr, Bucket *b)
if(ah->addr == (addr>>AddrHashShift))
goto found;
- ah = runtime_mallocgc(sizeof *ah, FlagNoProfiling, 0, 1);
+ ah = allocate(sizeof *ah);
addrmem += sizeof *ah;
ah->next = addrhash[h];
ah->addr = addr>>AddrHashShift;
@@ -201,7 +249,7 @@ setaddrbucket(uintptr addr, Bucket *b)
found:
if((e = addrfree) == nil) {
- e = runtime_mallocgc(64*sizeof *e, FlagNoProfiling, 0, 0);
+ e = allocate(64*sizeof *e);
addrmem += 64*sizeof *e;
for(i=0; i+1<64; i++)
e[i].next = &e[i+1];
@@ -353,12 +401,28 @@ record(Record *r, Bucket *b)
func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) {
Bucket *b;
Record *r;
+ bool clear;
runtime_lock(&proflock);
n = 0;
- for(b=mbuckets; b; b=b->allnext)
+ clear = true;
+ for(b=mbuckets; b; b=b->allnext) {
if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
n++;
+ if(b->allocs != 0 || b->frees != 0)
+ clear = false;
+ }
+ if(clear) {
+ // Absolutely no data, suggesting that a garbage collection
+ // has not yet happened. In order to allow profiling when
+ // garbage collection is disabled from the beginning of execution,
+ // accumulate stats as if a GC just happened, and recount buckets.
+ MProf_GC();
+ n = 0;
+ for(b=mbuckets; b; b=b->allnext)
+ if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
+ n++;
+ }
ok = false;
if(n <= p.__count) {
ok = true;
@@ -531,3 +595,8 @@ func GoroutineProfile(b Slice) (n int, ok bool) {
}
}
+void
+runtime_mprofinit(void)
+{
+ addrhash = allocate((1<<AddrHashBits)*sizeof *addrhash);
+}
diff --git a/libgo/runtime/netpoll.goc b/libgo/runtime/netpoll.goc
new file mode 100644
index 0000000..a0bd735
--- /dev/null
+++ b/libgo/runtime/netpoll.goc
@@ -0,0 +1,356 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build darwin linux
+
+package net
+
+#include "runtime.h"
+#include "defs.h"
+#include "arch.h"
+#include "malloc.h"
+
+// Map gccgo field names to gc field names.
+// Eface aka __go_empty_interface.
+#define type __type_descriptor
+#define data __object
+
+// Integrated network poller (platform-independent part).
+// A particular implementation (epoll/kqueue) must define the following functions:
+// void runtime_netpollinit(void); // to initialize the poller
+// int32 runtime_netpollopen(int32 fd, PollDesc *pd); // to arm edge-triggered notifications
+ // and associate fd with pd.
+// An implementation must call the following function to denote that the pd is ready.
+// void runtime_netpollready(G **gpp, PollDesc *pd, int32 mode);
+
+#define READY ((G*)1)
+
+struct PollDesc
+{
+ PollDesc* link; // in pollcache, protected by pollcache.Lock
+ Lock; // protectes the following fields
+ int32 fd;
+ bool closing;
+ uintptr seq; // protects from stale timers and ready notifications
+ G* rg; // G waiting for read or READY (binary semaphore)
+ Timer rt; // read deadline timer (set if rt.fv != nil)
+ int64 rd; // read deadline
+ G* wg; // the same for writes
+ Timer wt;
+ int64 wd;
+};
+
+static struct
+{
+ Lock;
+ PollDesc* first;
+ // PollDesc objects must be type-stable,
+ // because we can get ready notification from epoll/kqueue
+ // after the descriptor is closed/reused.
+ // Stale notifications are detected using seq variable,
+ // seq is incremented when deadlines are changed or descriptor is reused.
+} pollcache;
+
+static void netpollblock(PollDesc*, int32);
+static G* netpollunblock(PollDesc*, int32);
+static void deadline(int64, Eface);
+static void readDeadline(int64, Eface);
+static void writeDeadline(int64, Eface);
+static PollDesc* allocPollDesc(void);
+static intgo checkerr(PollDesc *pd, int32 mode);
+
+static FuncVal deadlineFn = {(void(*)(void))deadline};
+static FuncVal readDeadlineFn = {(void(*)(void))readDeadline};
+static FuncVal writeDeadlineFn = {(void(*)(void))writeDeadline};
+
+func runtime_pollServerInit() {
+ runtime_netpollinit();
+}
+
+func runtime_pollOpen(fd int) (pd *PollDesc, errno int) {
+ pd = allocPollDesc();
+ runtime_lock(pd);
+ if(pd->wg != nil && pd->wg != READY)
+ runtime_throw("runtime_pollOpen: blocked write on free descriptor");
+ if(pd->rg != nil && pd->rg != READY)
+ runtime_throw("runtime_pollOpen: blocked read on free descriptor");
+ pd->fd = fd;
+ pd->closing = false;
+ pd->seq++;
+ pd->rg = nil;
+ pd->rd = 0;
+ pd->wg = nil;
+ pd->wd = 0;
+ runtime_unlock(pd);
+
+ errno = runtime_netpollopen(fd, pd);
+}
+
+func runtime_pollClose(pd *PollDesc) {
+ if(!pd->closing)
+ runtime_throw("runtime_pollClose: close w/o unblock");
+ if(pd->wg != nil && pd->wg != READY)
+ runtime_throw("runtime_pollClose: blocked write on closing descriptor");
+ if(pd->rg != nil && pd->rg != READY)
+ runtime_throw("runtime_pollClose: blocked read on closing descriptor");
+ runtime_netpollclose(pd->fd);
+ runtime_lock(&pollcache);
+ pd->link = pollcache.first;
+ pollcache.first = pd;
+ runtime_unlock(&pollcache);
+}
+
+func runtime_pollReset(pd *PollDesc, mode int) (err int) {
+ runtime_lock(pd);
+ err = checkerr(pd, mode);
+ if(err)
+ goto ret;
+ if(mode == 'r')
+ pd->rg = nil;
+ else if(mode == 'w')
+ pd->wg = nil;
+ret:
+ runtime_unlock(pd);
+}
+
+func runtime_pollWait(pd *PollDesc, mode int) (err int) {
+ runtime_lock(pd);
+ err = checkerr(pd, mode);
+ if(err)
+ goto ret;
+ netpollblock(pd, mode);
+ err = checkerr(pd, mode);
+ret:
+ runtime_unlock(pd);
+}
+
+func runtime_pollSetDeadline(pd *PollDesc, d int64, mode int) {
+ runtime_lock(pd);
+ if(pd->closing)
+ goto ret;
+ pd->seq++; // invalidate current timers
+ // Reset current timers.
+ if(pd->rt.fv) {
+ runtime_deltimer(&pd->rt);
+ pd->rt.fv = nil;
+ }
+ if(pd->wt.fv) {
+ runtime_deltimer(&pd->wt);
+ pd->wt.fv = nil;
+ }
+ // Setup new timers.
+ if(d != 0 && d <= runtime_nanotime()) {
+ d = -1;
+ }
+ if(mode == 'r' || mode == 'r'+'w')
+ pd->rd = d;
+ if(mode == 'w' || mode == 'r'+'w')
+ pd->wd = d;
+ if(pd->rd > 0 && pd->rd == pd->wd) {
+ pd->rt.fv = &deadlineFn;
+ pd->rt.when = pd->rd;
+ // Copy current seq into the timer arg.
+ // Timer func will check the seq against current descriptor seq,
+ // if they differ the descriptor was reused or timers were reset.
+ pd->rt.arg.type = (Type*)pd->seq;
+ pd->rt.arg.data = pd;
+ runtime_addtimer(&pd->rt);
+ } else {
+ if(pd->rd > 0) {
+ pd->rt.fv = &readDeadlineFn;
+ pd->rt.when = pd->rd;
+ pd->rt.arg.type = (Type*)pd->seq;
+ pd->rt.arg.data = pd;
+ runtime_addtimer(&pd->rt);
+ }
+ if(pd->wd > 0) {
+ pd->wt.fv = &writeDeadlineFn;
+ pd->wt.when = pd->wd;
+ pd->wt.arg.type = (Type*)pd->seq;
+ pd->wt.arg.data = pd;
+ runtime_addtimer(&pd->wt);
+ }
+ }
+ret:
+ runtime_unlock(pd);
+}
+
+func runtime_pollUnblock(pd *PollDesc) {
+ G *rg, *wg;
+
+ runtime_lock(pd);
+ if(pd->closing)
+ runtime_throw("runtime_pollUnblock: already closing");
+ pd->closing = true;
+ pd->seq++;
+ rg = netpollunblock(pd, 'r');
+ wg = netpollunblock(pd, 'w');
+ if(pd->rt.fv) {
+ runtime_deltimer(&pd->rt);
+ pd->rt.fv = nil;
+ }
+ if(pd->wt.fv) {
+ runtime_deltimer(&pd->wt);
+ pd->wt.fv = nil;
+ }
+ runtime_unlock(pd);
+ if(rg)
+ runtime_ready(rg);
+ if(wg)
+ runtime_ready(wg);
+}
+
+// make pd ready, newly runnable goroutines (if any) are enqueued info gpp list
+void
+runtime_netpollready(G **gpp, PollDesc *pd, int32 mode)
+{
+ G *rg, *wg;
+
+ rg = wg = nil;
+ runtime_lock(pd);
+ if(mode == 'r' || mode == 'r'+'w')
+ rg = netpollunblock(pd, 'r');
+ if(mode == 'w' || mode == 'r'+'w')
+ wg = netpollunblock(pd, 'w');
+ runtime_unlock(pd);
+ if(rg) {
+ rg->schedlink = *gpp;
+ *gpp = rg;
+ }
+ if(wg) {
+ wg->schedlink = *gpp;
+ *gpp = wg;
+ }
+}
+
+static intgo
+checkerr(PollDesc *pd, int32 mode)
+{
+ if(pd->closing)
+ return 1; // errClosing
+ if((mode == 'r' && pd->rd < 0) || (mode == 'w' && pd->wd < 0))
+ return 2; // errTimeout
+ return 0;
+}
+
+static void
+netpollblock(PollDesc *pd, int32 mode)
+{
+ G **gpp;
+
+ gpp = &pd->rg;
+ if(mode == 'w')
+ gpp = &pd->wg;
+ if(*gpp == READY) {
+ *gpp = nil;
+ return;
+ }
+ if(*gpp != nil)
+ runtime_throw("epoll: double wait");
+ *gpp = runtime_g();
+ runtime_park(runtime_unlock, &pd->Lock, "IO wait");
+ runtime_lock(pd);
+}
+
+static G*
+netpollunblock(PollDesc *pd, int32 mode)
+{
+ G **gpp, *old;
+
+ gpp = &pd->rg;
+ if(mode == 'w')
+ gpp = &pd->wg;
+ if(*gpp == READY)
+ return nil;
+ if(*gpp == nil) {
+ *gpp = READY;
+ return nil;
+ }
+ old = *gpp;
+ *gpp = nil;
+ return old;
+}
+
+static void
+deadlineimpl(int64 now, Eface arg, bool read, bool write)
+{
+ PollDesc *pd;
+ uint32 seq;
+ G *rg, *wg;
+
+ USED(now);
+ pd = (PollDesc*)arg.data;
+ // This is the seq when the timer was set.
+ // If it's stale, ignore the timer event.
+ seq = (uintptr)arg.type;
+ rg = wg = nil;
+ runtime_lock(pd);
+ if(seq != pd->seq) {
+ // The descriptor was reused or timers were reset.
+ runtime_unlock(pd);
+ return;
+ }
+ if(read) {
+ if(pd->rd <= 0 || pd->rt.fv == nil)
+ runtime_throw("deadlineimpl: inconsistent read deadline");
+ pd->rd = -1;
+ pd->rt.fv = nil;
+ rg = netpollunblock(pd, 'r');
+ }
+ if(write) {
+ if(pd->wd <= 0 || (pd->wt.fv == nil && !read))
+ runtime_throw("deadlineimpl: inconsistent write deadline");
+ pd->wd = -1;
+ pd->wt.fv = nil;
+ wg = netpollunblock(pd, 'w');
+ }
+ runtime_unlock(pd);
+ if(rg)
+ runtime_ready(rg);
+ if(wg)
+ runtime_ready(wg);
+}
+
+static void
+deadline(int64 now, Eface arg)
+{
+ deadlineimpl(now, arg, true, true);
+}
+
+static void
+readDeadline(int64 now, Eface arg)
+{
+ deadlineimpl(now, arg, true, false);
+}
+
+static void
+writeDeadline(int64 now, Eface arg)
+{
+ deadlineimpl(now, arg, false, true);
+}
+
+static PollDesc*
+allocPollDesc(void)
+{
+ PollDesc *pd;
+ uint32 i, n;
+
+ runtime_lock(&pollcache);
+ if(pollcache.first == nil) {
+ n = PageSize/sizeof(*pd);
+ if(n == 0)
+ n = 1;
+ // Must be in non-GC memory because can be referenced
+ // only from epoll/kqueue internals.
+ pd = runtime_SysAlloc(n*sizeof(*pd));
+ for(i = 0; i < n; i++) {
+ pd[i].link = pollcache.first;
+ pollcache.first = &pd[i];
+ }
+ }
+ pd = pollcache.first;
+ pollcache.first = pd->link;
+ runtime_unlock(&pollcache);
+ return pd;
+}
diff --git a/libgo/runtime/netpoll_epoll.c b/libgo/runtime/netpoll_epoll.c
new file mode 100644
index 0000000..04f9c75
--- /dev/null
+++ b/libgo/runtime/netpoll_epoll.c
@@ -0,0 +1,154 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build linux
+
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/epoll.h>
+
+#include "runtime.h"
+#include "defs.h"
+
+#ifndef EPOLLRDHUP
+#define EPOLLRDHUP 0x2000
+#endif
+
+#ifndef EPOLL_CLOEXEC
+#define EPOLL_CLOEXEC 02000000
+#endif
+
+typedef struct epoll_event EpollEvent;
+
+static int32
+runtime_epollcreate(int32 size)
+{
+ int r;
+
+ r = epoll_create(size);
+ if(r >= 0)
+ return r;
+ return - errno;
+}
+
+static int32
+runtime_epollcreate1(int32 flags)
+{
+ int r;
+
+ r = epoll_create1(flags);
+ if(r >= 0)
+ return r;
+ return - errno;
+}
+
+static int32
+runtime_epollctl(int32 epfd, int32 op, int32 fd, EpollEvent *ev)
+{
+ int r;
+
+ r = epoll_ctl(epfd, op, fd, ev);
+ if(r >= 0)
+ return r;
+ return - errno;
+}
+
+static int32
+runtime_epollwait(int32 epfd, EpollEvent *ev, int32 nev, int32 timeout)
+{
+ int r;
+
+ r = epoll_wait(epfd, ev, nev, timeout);
+ if(r >= 0)
+ return r;
+ return - errno;
+}
+
+static void
+runtime_closeonexec(int32 fd)
+{
+ fcntl(fd, F_SETFD, FD_CLOEXEC);
+}
+
+static int32 epfd = -1; // epoll descriptor
+
+void
+runtime_netpollinit(void)
+{
+ epfd = runtime_epollcreate1(EPOLL_CLOEXEC);
+ if(epfd >= 0)
+ return;
+ epfd = runtime_epollcreate(1024);
+ if(epfd >= 0) {
+ runtime_closeonexec(epfd);
+ return;
+ }
+ runtime_printf("netpollinit: failed to create descriptor (%d)\n", -epfd);
+ runtime_throw("netpollinit: failed to create descriptor");
+}
+
+int32
+runtime_netpollopen(int32 fd, PollDesc *pd)
+{
+ EpollEvent ev;
+ int32 res;
+
+ ev.events = EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLET;
+ ev.data.ptr = (void*)pd;
+ res = runtime_epollctl(epfd, EPOLL_CTL_ADD, fd, &ev);
+ return -res;
+}
+
+int32
+runtime_netpollclose(int32 fd)
+{
+ EpollEvent ev;
+ int32 res;
+
+ res = runtime_epollctl(epfd, EPOLL_CTL_DEL, fd, &ev);
+ return -res;
+}
+
+// polls for ready network connections
+// returns list of goroutines that become runnable
+G*
+runtime_netpoll(bool block)
+{
+ static int32 lasterr;
+ EpollEvent events[128], *ev;
+ int32 n, i, waitms, mode;
+ G *gp;
+
+ if(epfd == -1)
+ return nil;
+ waitms = -1;
+ if(!block)
+ waitms = 0;
+retry:
+ n = runtime_epollwait(epfd, events, nelem(events), waitms);
+ if(n < 0) {
+ if(n != -EINTR && n != lasterr) {
+ lasterr = n;
+ runtime_printf("runtime: epollwait on fd %d failed with %d\n", epfd, -n);
+ }
+ goto retry;
+ }
+ gp = nil;
+ for(i = 0; i < n; i++) {
+ ev = &events[i];
+ if(ev->events == 0)
+ continue;
+ mode = 0;
+ if(ev->events & (EPOLLIN|EPOLLRDHUP|EPOLLHUP|EPOLLERR))
+ mode += 'r';
+ if(ev->events & (EPOLLOUT|EPOLLHUP|EPOLLERR))
+ mode += 'w';
+ if(mode)
+ runtime_netpollready(&gp, (void*)ev->data.ptr, mode);
+ }
+ if(block && gp == nil)
+ goto retry;
+ return gp;
+}
diff --git a/libgo/runtime/netpoll_kqueue.c b/libgo/runtime/netpoll_kqueue.c
new file mode 100644
index 0000000..9b79b20
--- /dev/null
+++ b/libgo/runtime/netpoll_kqueue.c
@@ -0,0 +1,108 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build darwin
+
+#include "runtime.h"
+#include "defs_GOOS_GOARCH.h"
+
+// Integrated network poller (kqueue-based implementation).
+
+int32 runtime_kqueue(void);
+int32 runtime_kevent(int32, Kevent*, int32, Kevent*, int32, Timespec*);
+void runtime_closeonexec(int32);
+
+static int32 kq = -1;
+
+void
+runtime_netpollinit(void)
+{
+ kq = runtime_kqueue();
+ if(kq < 0) {
+ runtime_printf("netpollinit: kqueue failed with %d\n", -kq);
+ runtime_throw("netpollinit: kqueue failed");
+ }
+ runtime_closeonexec(kq);
+}
+
+int32
+runtime_netpollopen(int32 fd, PollDesc *pd)
+{
+ Kevent ev[2];
+ int32 n;
+
+ // Arm both EVFILT_READ and EVFILT_WRITE in edge-triggered mode (EV_CLEAR)
+ // for the whole fd lifetime. The notifications are automatically unregistered
+ // when fd is closed.
+ ev[0].ident = fd;
+ ev[0].filter = EVFILT_READ;
+ ev[0].flags = EV_ADD|EV_RECEIPT|EV_CLEAR;
+ ev[0].fflags = 0;
+ ev[0].data = 0;
+ ev[0].udata = (byte*)pd;
+ ev[1] = ev[0];
+ ev[1].filter = EVFILT_WRITE;
+ n = runtime_kevent(kq, ev, 2, ev, 2, nil);
+ if(n < 0)
+ return -n;
+ if(n != 2 ||
+ (ev[0].flags&EV_ERROR) == 0 || ev[0].ident != fd || ev[0].filter != EVFILT_READ ||
+ (ev[1].flags&EV_ERROR) == 0 || ev[1].ident != fd || ev[1].filter != EVFILT_WRITE)
+ return EFAULT; // just to mark out from other errors
+ if(ev[0].data != 0)
+ return ev[0].data;
+ if(ev[1].data != 0)
+ return ev[1].data;
+ return 0;
+}
+
+int32
+runtime_netpollclose(int32 fd)
+{
+ // Don't need to unregister because calling close()
+ // on fd will remove any kevents that reference the descriptor.
+ USED(fd);
+ return 0;
+}
+
+// Polls for ready network connections.
+// Returns list of goroutines that become runnable.
+G*
+runtime_netpoll(bool block)
+{
+ static int32 lasterr;
+ Kevent events[64], *ev;
+ Timespec ts, *tp;
+ int32 n, i;
+ G *gp;
+
+ if(kq == -1)
+ return nil;
+ tp = nil;
+ if(!block) {
+ ts.tv_sec = 0;
+ ts.tv_nsec = 0;
+ tp = &ts;
+ }
+ gp = nil;
+retry:
+ n = runtime_kevent(kq, nil, 0, events, nelem(events), tp);
+ if(n < 0) {
+ if(n != -EINTR && n != lasterr) {
+ lasterr = n;
+ runtime_printf("runtime: kevent on fd %d failed with %d\n", kq, -n);
+ }
+ goto retry;
+ }
+ for(i = 0; i < n; i++) {
+ ev = &events[i];
+ if(ev->filter == EVFILT_READ)
+ runtime_netpollready(&gp, (PollDesc*)ev->udata, 'r');
+ if(ev->filter == EVFILT_WRITE)
+ runtime_netpollready(&gp, (PollDesc*)ev->udata, 'w');
+ }
+ if(block && gp == nil)
+ goto retry;
+ return gp;
+}
diff --git a/libgo/runtime/netpoll_stub.c b/libgo/runtime/netpoll_stub.c
new file mode 100644
index 0000000..e28e38e
--- /dev/null
+++ b/libgo/runtime/netpoll_stub.c
@@ -0,0 +1,18 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build freebsd netbsd openbsd plan9 windows
+
+#include "runtime.h"
+
+// Polls for ready network connections.
+// Returns list of goroutines that become runnable.
+G*
+runtime_netpoll(bool block)
+{
+ // Implementation for platforms that do not support
+ // integrated network poller.
+ USED(block);
+ return nil;
+}
diff --git a/libgo/runtime/panic.c b/libgo/runtime/panic.c
index 7b9b578..7d79256 100644
--- a/libgo/runtime/panic.c
+++ b/libgo/runtime/panic.c
@@ -3,6 +3,7 @@
// license that can be found in the LICENSE file.
#include "runtime.h"
+#include "malloc.h"
#include "go-defer.h"
#include "go-panic.h"
@@ -37,6 +38,11 @@ runtime_startpanic(void)
M *m;
m = runtime_m();
+ if(runtime_mheap == 0 || runtime_mheap->cachealloc.size == 0) { // very early
+ runtime_printf("runtime: panic before malloc heap initialized\n");
+ m->mallocing = 1; // tell rest of panic not to try to malloc
+ } else if(m->mcache == nil) // can happen if called from signal handler or throw
+ m->mcache = runtime_allocmcache();
if(m->dying) {
runtime_printf("panic during panic\n");
runtime_exit(3);
@@ -51,13 +57,14 @@ runtime_dopanic(int32 unused __attribute__ ((unused)))
{
G *g;
static bool didothers;
+ bool crash;
g = runtime_g();
if(g->sig != 0)
runtime_printf("[signal %x code=%p addr=%p]\n",
g->sig, (void*)g->sigcode0, (void*)g->sigcode1);
- if(runtime_gotraceback()){
+ if(runtime_gotraceback(&crash)){
if(g != runtime_m()->g0) {
runtime_printf("\n");
runtime_goroutineheader(g);
@@ -79,6 +86,9 @@ runtime_dopanic(int32 unused __attribute__ ((unused)))
runtime_lock(&deadlock);
runtime_lock(&deadlock);
}
+
+ if(crash)
+ runtime_crash();
runtime_exit(2);
}
diff --git a/libgo/runtime/parfor.c b/libgo/runtime/parfor.c
index 65ca586..c0e40f5 100644
--- a/libgo/runtime/parfor.c
+++ b/libgo/runtime/parfor.c
@@ -49,6 +49,7 @@ void
runtime_parforsetup(ParFor *desc, uint32 nthr, uint32 n, void *ctx, bool wait, void (*body)(ParFor*, uint32))
{
uint32 i, begin, end;
+ uint64 *pos;
if(desc == nil || nthr == 0 || nthr > desc->nthrmax || body == nil) {
runtime_printf("desc=%p nthr=%d count=%d body=%p\n", desc, nthr, n, body);
@@ -70,7 +71,10 @@ runtime_parforsetup(ParFor *desc, uint32 nthr, uint32 n, void *ctx, bool wait, v
for(i=0; i<nthr; i++) {
begin = (uint64)n*i / nthr;
end = (uint64)n*(i+1) / nthr;
- desc->thr[i].pos = (uint64)begin | (((uint64)end)<<32);
+ pos = &desc->thr[i].pos;
+ if(((uintptr)pos & 7) != 0)
+ runtime_throw("parforsetup: pos is not aligned");
+ *pos = (uint64)begin | (((uint64)end)<<32);
}
}
@@ -152,7 +156,7 @@ runtime_parfordo(ParFor *desc)
// See if it has any work.
begin = (uint32)pos;
end = (uint32)(pos>>32);
- if(begin >= end-1) {
+ if(begin+1 >= end) {
begin = end = 0;
break;
}
diff --git a/libgo/runtime/print.c b/libgo/runtime/print.c
index 9e0c45b..f5c6e82 100644
--- a/libgo/runtime/print.c
+++ b/libgo/runtime/print.c
@@ -88,6 +88,9 @@ go_vprintf(const char *s, va_list va)
case 'a':
runtime_printslice(va_arg(va, Slice));
break;
+ case 'c':
+ runtime_printbyte(va_arg(va, int32));
+ break;
case 'd':
runtime_printint(va_arg(va, int32));
break;
@@ -154,6 +157,12 @@ runtime_printbool(_Bool v)
}
void
+runtime_printbyte(int8 c)
+{
+ gwrite(&c, 1);
+}
+
+void
runtime_printfloat(double v)
{
byte buf[20];
diff --git a/libgo/runtime/proc.c b/libgo/runtime/proc.c
index 9b563a5..9639922 100644
--- a/libgo/runtime/proc.c
+++ b/libgo/runtime/proc.c
@@ -56,15 +56,8 @@ extern void __splitstack_block_signals_context (void *context[10], int *,
uintptr runtime_stacks_sys;
-static void schedule(G*);
-
static void gtraceback(G*);
-typedef struct Sched Sched;
-
-M runtime_m0;
-G runtime_g0; // idle goroutine for m0
-
#ifdef __rtems__
#define __thread
#endif
@@ -166,194 +159,61 @@ runtime_m(void)
return m;
}
-int32 runtime_gcwaiting;
-
-G* runtime_allg;
-G* runtime_lastg;
-M* runtime_allm;
-
-int8* runtime_goos;
-int32 runtime_ncpu;
-
-// The static TLS size. See runtime_newm.
-static int tlssize;
-
-#ifdef HAVE_DL_ITERATE_PHDR
-
-// Called via dl_iterate_phdr.
-
-static int
-addtls(struct dl_phdr_info* info, size_t size __attribute__ ((unused)), void *data)
-{
- size_t *total = (size_t *)data;
- unsigned int i;
-
- for(i = 0; i < info->dlpi_phnum; ++i) {
- if(info->dlpi_phdr[i].p_type == PT_TLS)
- *total += info->dlpi_phdr[i].p_memsz;
- }
- return 0;
-}
-
-// Set the total TLS size.
-
-static void
-inittlssize()
+// Set m and g.
+void
+runtime_setmg(M* mp, G* gp)
{
- size_t total = 0;
-
- dl_iterate_phdr(addtls, (void *)&total);
- tlssize = total;
+ m = mp;
+ g = gp;
}
-#else
+// The static TLS size. See runtime_newm.
+static int tlssize;
+// Start a new thread.
static void
-inittlssize()
+runtime_newosproc(M *mp)
{
-}
-
-#endif
-
-// Go scheduler
-//
-// The go scheduler's job is to match ready-to-run goroutines (`g's)
-// with waiting-for-work schedulers (`m's). If there are ready g's
-// and no waiting m's, ready() will start a new m running in a new
-// OS thread, so that all ready g's can run simultaneously, up to a limit.
-// For now, m's never go away.
-//
-// By default, Go keeps only one kernel thread (m) running user code
-// at a single time; other threads may be blocked in the operating system.
-// Setting the environment variable $GOMAXPROCS or calling
-// runtime.GOMAXPROCS() will change the number of user threads
-// allowed to execute simultaneously. $GOMAXPROCS is thus an
-// approximation of the maximum number of cores to use.
-//
-// Even a program that can run without deadlock in a single process
-// might use more m's if given the chance. For example, the prime
-// sieve will use as many m's as there are primes (up to runtime_sched.mmax),
-// allowing different stages of the pipeline to execute in parallel.
-// We could revisit this choice, only kicking off new m's for blocking
-// system calls, but that would limit the amount of parallel computation
-// that go would try to do.
-//
-// In general, one could imagine all sorts of refinements to the
-// scheduler, but the goal now is just to get something working on
-// Linux and OS X.
-
-struct Sched {
- Lock;
-
- G *gfree; // available g's (status == Gdead)
- int64 goidgen;
-
- G *ghead; // g's waiting to run
- G *gtail;
- int32 gwait; // number of g's waiting to run
- int32 gcount; // number of g's that are alive
- int32 grunning; // number of g's running on cpu or in syscall
-
- M *mhead; // m's waiting for work
- int32 mwait; // number of m's waiting for work
- int32 mcount; // number of m's that have been created
+ pthread_attr_t attr;
+ size_t stacksize;
+ sigset_t clear, old;
+ pthread_t tid;
+ int ret;
- volatile uint32 atomic; // atomic scheduling word (see below)
+ if(pthread_attr_init(&attr) != 0)
+ runtime_throw("pthread_attr_init");
+ if(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0)
+ runtime_throw("pthread_attr_setdetachstate");
- int32 profilehz; // cpu profiling rate
+ stacksize = PTHREAD_STACK_MIN;
- bool init; // running initialization
- bool lockmain; // init called runtime.LockOSThread
+ // With glibc before version 2.16 the static TLS size is taken
+ // out of the stack size, and we get an error or a crash if
+ // there is not enough stack space left. Add it back in if we
+ // can, in case the program uses a lot of TLS space. FIXME:
+ // This can be disabled in glibc 2.16 and later, if the bug is
+ // indeed fixed then.
+ stacksize += tlssize;
- Note stopped; // one g can set waitstop and wait here for m's to stop
-};
+ if(pthread_attr_setstacksize(&attr, stacksize) != 0)
+ runtime_throw("pthread_attr_setstacksize");
-// The atomic word in sched is an atomic uint32 that
-// holds these fields.
-//
-// [15 bits] mcpu number of m's executing on cpu
-// [15 bits] mcpumax max number of m's allowed on cpu
-// [1 bit] waitstop some g is waiting on stopped
-// [1 bit] gwaiting gwait != 0
-//
-// These fields are the information needed by entersyscall
-// and exitsyscall to decide whether to coordinate with the
-// scheduler. Packing them into a single machine word lets
-// them use a fast path with a single atomic read/write and
-// no lock/unlock. This greatly reduces contention in
-// syscall- or cgo-heavy multithreaded programs.
-//
-// Except for entersyscall and exitsyscall, the manipulations
-// to these fields only happen while holding the schedlock,
-// so the routines holding schedlock only need to worry about
-// what entersyscall and exitsyscall do, not the other routines
-// (which also use the schedlock).
-//
-// In particular, entersyscall and exitsyscall only read mcpumax,
-// waitstop, and gwaiting. They never write them. Thus, writes to those
-// fields can be done (holding schedlock) without fear of write conflicts.
-// There may still be logic conflicts: for example, the set of waitstop must
-// be conditioned on mcpu >= mcpumax or else the wait may be a
-// spurious sleep. The Promela model in proc.p verifies these accesses.
-enum {
- mcpuWidth = 15,
- mcpuMask = (1<<mcpuWidth) - 1,
- mcpuShift = 0,
- mcpumaxShift = mcpuShift + mcpuWidth,
- waitstopShift = mcpumaxShift + mcpuWidth,
- gwaitingShift = waitstopShift+1,
-
- // The max value of GOMAXPROCS is constrained
- // by the max value we can store in the bit fields
- // of the atomic word. Reserve a few high values
- // so that we can detect accidental decrement
- // beyond zero.
- maxgomaxprocs = mcpuMask - 10,
-};
+ // Block signals during pthread_create so that the new thread
+ // starts with signals disabled. It will enable them in minit.
+ sigfillset(&clear);
-#define atomic_mcpu(v) (((v)>>mcpuShift)&mcpuMask)
-#define atomic_mcpumax(v) (((v)>>mcpumaxShift)&mcpuMask)
-#define atomic_waitstop(v) (((v)>>waitstopShift)&1)
-#define atomic_gwaiting(v) (((v)>>gwaitingShift)&1)
-
-Sched runtime_sched;
-int32 runtime_gomaxprocs;
-bool runtime_singleproc;
-
-static bool canaddmcpu(void);
-
-// An m that is waiting for notewakeup(&m->havenextg). This may
-// only be accessed while the scheduler lock is held. This is used to
-// minimize the number of times we call notewakeup while the scheduler
-// lock is held, since the m will normally move quickly to lock the
-// scheduler itself, producing lock contention.
-static M* mwakeup;
-
-// Scheduling helpers. Sched must be locked.
-static void gput(G*); // put/get on ghead/gtail
-static G* gget(void);
-static void mput(M*); // put/get on mhead
-static M* mget(G*);
-static void gfput(G*); // put/get on gfree
-static G* gfget(void);
-static void matchmg(void); // match m's to g's
-static void readylocked(G*); // ready, but sched is locked
-static void mnextg(M*, G*);
-static void mcommoninit(M*);
+#ifdef SIGTRAP
+ // Blocking SIGTRAP reportedly breaks gdb on Alpha GNU/Linux.
+ sigdelset(&clear, SIGTRAP);
+#endif
-void
-setmcpumax(uint32 n)
-{
- uint32 v, w;
+ sigemptyset(&old);
+ sigprocmask(SIG_BLOCK, &clear, &old);
+ ret = pthread_create(&tid, &attr, runtime_mstart, mp);
+ sigprocmask(SIG_SETMASK, &old, nil);
- for(;;) {
- v = runtime_sched.atomic;
- w = v;
- w &= ~(mcpuMask<<mcpumaxShift);
- w |= n<<mcpumaxShift;
- if(runtime_cas(&runtime_sched.atomic, v, w))
- break;
- }
+ if (ret != 0)
+ runtime_throw("pthread_create");
}
// First function run by a new goroutine. This replaces gogocall.
@@ -449,8 +309,142 @@ runtime_mcall(void (*pfn)(G*))
}
}
-// Keep trace of scavenger's goroutine for deadlock detection.
-static G *scvg;
+#ifdef HAVE_DL_ITERATE_PHDR
+
+// Called via dl_iterate_phdr.
+
+static int
+addtls(struct dl_phdr_info* info, size_t size __attribute__ ((unused)), void *data)
+{
+ size_t *total = (size_t *)data;
+ unsigned int i;
+
+ for(i = 0; i < info->dlpi_phnum; ++i) {
+ if(info->dlpi_phdr[i].p_type == PT_TLS)
+ *total += info->dlpi_phdr[i].p_memsz;
+ }
+ return 0;
+}
+
+// Set the total TLS size.
+
+static void
+inittlssize()
+{
+ size_t total = 0;
+
+ dl_iterate_phdr(addtls, (void *)&total);
+ tlssize = total;
+}
+
+#else
+
+static void
+inittlssize()
+{
+}
+
+#endif
+
+// Goroutine scheduler
+// The scheduler's job is to distribute ready-to-run goroutines over worker threads.
+//
+// The main concepts are:
+// G - goroutine.
+// M - worker thread, or machine.
+// P - processor, a resource that is required to execute Go code.
+// M must have an associated P to execute Go code, however it can be
+// blocked or in a syscall w/o an associated P.
+//
+// Design doc at http://golang.org/s/go11sched.
+
+typedef struct Sched Sched;
+struct Sched {
+ Lock;
+
+ uint64 goidgen;
+ M* midle; // idle m's waiting for work
+ int32 nmidle; // number of idle m's waiting for work
+ int32 mlocked; // number of locked m's waiting for work
+ int32 mcount; // number of m's that have been created
+
+ P* pidle; // idle P's
+ uint32 npidle;
+ uint32 nmspinning;
+
+ // Global runnable queue.
+ G* runqhead;
+ G* runqtail;
+ int32 runqsize;
+
+ // Global cache of dead G's.
+ Lock gflock;
+ G* gfree;
+
+ int32 stopwait;
+ Note stopnote;
+ uint32 sysmonwait;
+ Note sysmonnote;
+ uint64 lastpoll;
+
+ int32 profilehz; // cpu profiling rate
+};
+
+// The max value of GOMAXPROCS.
+// There are no fundamental restrictions on the value.
+enum { MaxGomaxprocs = 1<<8 };
+
+Sched runtime_sched;
+int32 runtime_gomaxprocs;
+bool runtime_singleproc;
+bool runtime_iscgo;
+uint32 runtime_gcwaiting;
+M runtime_m0;
+G runtime_g0; // idle goroutine for m0
+G* runtime_allg;
+G* runtime_lastg;
+M* runtime_allm;
+P** runtime_allp;
+M* runtime_extram;
+int8* runtime_goos;
+int32 runtime_ncpu;
+static int32 newprocs;
+
+void* runtime_mstart(void*);
+static void runqput(P*, G*);
+static G* runqget(P*);
+static void runqgrow(P*);
+static G* runqsteal(P*, P*);
+static void mput(M*);
+static M* mget(void);
+static void mcommoninit(M*);
+static void schedule(void);
+static void procresize(int32);
+static void acquirep(P*);
+static P* releasep(void);
+static void newm(void(*)(void), P*);
+static void stopm(void);
+static void startm(P*, bool);
+static void handoffp(P*);
+static void wakep(void);
+static void stoplockedm(void);
+static void startlockedm(G*);
+static void sysmon(void);
+static uint32 retake(uint32*);
+static void inclocked(int32);
+static void checkdead(void);
+static void exitsyscall0(G*);
+static void park0(G*);
+static void gosched0(G*);
+static void goexit0(G*);
+static void gfput(P*, G*);
+static G* gfget(P*);
+static void gfpurge(P*);
+static void globrunqput(G*);
+static G* globrunqget(P*);
+static P* pidleget(void);
+static void pidleput(P*);
+static void injectglist(G*);
// The bootstrap sequence is:
//
@@ -463,7 +457,7 @@ static G *scvg;
void
runtime_schedinit(void)
{
- int32 n;
+ int32 n, procs;
const byte *p;
m = &runtime_m0;
@@ -476,6 +470,7 @@ runtime_schedinit(void)
inittlssize();
m->nomemprof++;
+ runtime_mprofinit();
runtime_mallocinit();
mcommoninit(m);
@@ -487,28 +482,20 @@ runtime_schedinit(void)
// so that we don't need to call malloc when we crash.
// runtime_findfunc(0);
- runtime_gomaxprocs = 1;
+ runtime_sched.lastpoll = runtime_nanotime();
+ procs = 1;
p = runtime_getenv("GOMAXPROCS");
- if(p != nil && (n = runtime_atoi(p)) != 0) {
- if(n > maxgomaxprocs)
- n = maxgomaxprocs;
- runtime_gomaxprocs = n;
+ if(p != nil && (n = runtime_atoi(p)) > 0) {
+ if(n > MaxGomaxprocs)
+ n = MaxGomaxprocs;
+ procs = n;
}
- // wait for the main goroutine to start before taking
- // GOMAXPROCS into account.
- setmcpumax(1);
- runtime_singleproc = runtime_gomaxprocs == 1;
-
- canaddmcpu(); // mcpu++ to account for bootstrap m
- m->helpgc = 1; // flag to tell schedule() to mcpu--
- runtime_sched.grunning++;
+ runtime_allp = runtime_malloc((MaxGomaxprocs+1)*sizeof(runtime_allp[0]));
+ procresize(procs);
// Can not enable GC until all roots are registered.
// mstats.enablegc = 1;
m->nomemprof--;
-
- if(raceenabled)
- runtime_raceinit();
}
extern void main_init(void) __asm__ (GOSYM_PREFIX "__go_init_main");
@@ -516,70 +503,44 @@ extern void main_main(void) __asm__ (GOSYM_PREFIX "main.main");
// The main goroutine.
void
-runtime_main(void)
+runtime_main(void* dummy __attribute__((unused)))
{
+ newm(sysmon, nil);
+
// Lock the main goroutine onto this, the main OS thread,
// during initialization. Most programs won't care, but a few
// do require certain calls to be made by the main thread.
// Those can arrange for main.main to run in the main thread
// by calling runtime.LockOSThread during initialization
// to preserve the lock.
- runtime_LockOSThread();
- // From now on, newgoroutines may use non-main threads.
- setmcpumax(runtime_gomaxprocs);
- runtime_sched.init = true;
- scvg = __go_go(runtime_MHeap_Scavenger, nil);
- scvg->issystem = true;
+ runtime_lockOSThread();
+ if(m != &runtime_m0)
+ runtime_throw("runtime_main not on m0");
+ __go_go(runtime_MHeap_Scavenger, nil);
main_init();
- runtime_sched.init = false;
- if(!runtime_sched.lockmain)
- runtime_UnlockOSThread();
+ runtime_unlockOSThread();
// For gccgo we have to wait until after main is initialized
// to enable GC, because initializing main registers the GC
// roots.
mstats.enablegc = 1;
- // The deadlock detection has false negatives.
- // Let scvg start up, to eliminate the false negative
- // for the trivial program func main() { select{} }.
- runtime_gosched();
-
main_main();
if(raceenabled)
runtime_racefini();
+
+ // Make racy client program work: if panicking on
+ // another goroutine at the same time as main returns,
+ // let the other goroutine finish printing the panic trace.
+ // Once it does, it will exit. See issue 3934.
+ if(runtime_panicking)
+ runtime_park(nil, nil, "panicwait");
+
runtime_exit(0);
for(;;)
*(int32*)0 = 0;
}
-// Lock the scheduler.
-static void
-schedlock(void)
-{
- runtime_lock(&runtime_sched);
-}
-
-// Unlock the scheduler.
-static void
-schedunlock(void)
-{
- M *mp;
-
- mp = mwakeup;
- mwakeup = nil;
- runtime_unlock(&runtime_sched);
- if(mp != nil)
- runtime_notewakeup(&mp->havenextg);
-}
-
-void
-runtime_goexit(void)
-{
- g->status = Gmoribund;
- runtime_gosched();
-}
-
void
runtime_goroutineheader(G *gp)
{
@@ -604,9 +565,6 @@ runtime_goroutineheader(G *gp)
else
status = "waiting";
break;
- case Gmoribund:
- status = "moribund";
- break;
default:
status = "???";
break;
@@ -644,7 +602,7 @@ runtime_tracebackothers(G * volatile me)
int32 traceback;
tb.gp = me;
- traceback = runtime_gotraceback();
+ traceback = runtime_gotraceback(nil);
for(gp = runtime_allg; gp != nil; gp = gp->alllink) {
if(gp == me || gp->status == Gdead)
continue;
@@ -698,28 +656,20 @@ gtraceback(G* gp)
runtime_gogo(traceback->gp);
}
-// Mark this g as m's idle goroutine.
-// This functionality might be used in environments where programs
-// are limited to a single thread, to simulate a select-driven
-// network server. It is not exposed via the standard runtime API.
-void
-runtime_idlegoroutine(void)
-{
- if(g->idlem != nil)
- runtime_throw("g is already an idle goroutine");
- g->idlem = m;
-}
-
static void
mcommoninit(M *mp)
{
- mp->id = runtime_sched.mcount++;
+ // If there is no mcache runtime_callers() will crash,
+ // and we are most likely in sysmon thread so the stack is senseless anyway.
+ if(m->mcache)
+ runtime_callers(1, mp->createstack, nelem(mp->createstack));
+
mp->fastrand = 0x49f6428aUL + mp->id + runtime_cputicks();
- if(mp->mcache == nil)
- mp->mcache = runtime_allocmcache();
+ runtime_lock(&runtime_sched);
+ mp->id = runtime_sched.mcount++;
- runtime_callers(1, mp->createstack, nelem(mp->createstack));
+ runtime_mpreinit(mp);
// Add to runtime_allm so garbage collector doesn't free m
// when it is just in a register or thread-local storage.
@@ -727,324 +677,77 @@ mcommoninit(M *mp)
// runtime_NumCgoCall() iterates over allm w/o schedlock,
// so we need to publish it safely.
runtime_atomicstorep(&runtime_allm, mp);
+ runtime_unlock(&runtime_sched);
}
-// Try to increment mcpu. Report whether succeeded.
-static bool
-canaddmcpu(void)
-{
- uint32 v;
-
- for(;;) {
- v = runtime_sched.atomic;
- if(atomic_mcpu(v) >= atomic_mcpumax(v))
- return 0;
- if(runtime_cas(&runtime_sched.atomic, v, v+(1<<mcpuShift)))
- return 1;
- }
-}
-
-// Put on `g' queue. Sched must be locked.
-static void
-gput(G *gp)
-{
- M *mp;
-
- // If g is wired, hand it off directly.
- if((mp = gp->lockedm) != nil && canaddmcpu()) {
- mnextg(mp, gp);
- return;
- }
-
- // If g is the idle goroutine for an m, hand it off.
- if(gp->idlem != nil) {
- if(gp->idlem->idleg != nil) {
- runtime_printf("m%d idle out of sync: g%D g%D\n",
- gp->idlem->id,
- gp->idlem->idleg->goid, gp->goid);
- runtime_throw("runtime: double idle");
- }
- gp->idlem->idleg = gp;
- return;
- }
-
- gp->schedlink = nil;
- if(runtime_sched.ghead == nil)
- runtime_sched.ghead = gp;
- else
- runtime_sched.gtail->schedlink = gp;
- runtime_sched.gtail = gp;
-
- // increment gwait.
- // if it transitions to nonzero, set atomic gwaiting bit.
- if(runtime_sched.gwait++ == 0)
- runtime_xadd(&runtime_sched.atomic, 1<<gwaitingShift);
-}
-
-// Report whether gget would return something.
-static bool
-haveg(void)
-{
- return runtime_sched.ghead != nil || m->idleg != nil;
-}
-
-// Get from `g' queue. Sched must be locked.
-static G*
-gget(void)
-{
- G *gp;
-
- gp = runtime_sched.ghead;
- if(gp) {
- runtime_sched.ghead = gp->schedlink;
- if(runtime_sched.ghead == nil)
- runtime_sched.gtail = nil;
- // decrement gwait.
- // if it transitions to zero, clear atomic gwaiting bit.
- if(--runtime_sched.gwait == 0)
- runtime_xadd(&runtime_sched.atomic, -1<<gwaitingShift);
- } else if(m->idleg != nil) {
- gp = m->idleg;
- m->idleg = nil;
- }
- return gp;
-}
-
-// Put on `m' list. Sched must be locked.
-static void
-mput(M *mp)
-{
- mp->schedlink = runtime_sched.mhead;
- runtime_sched.mhead = mp;
- runtime_sched.mwait++;
-}
-
-// Get an `m' to run `g'. Sched must be locked.
-static M*
-mget(G *gp)
-{
- M *mp;
-
- // if g has its own m, use it.
- if(gp && (mp = gp->lockedm) != nil)
- return mp;
-
- // otherwise use general m pool.
- if((mp = runtime_sched.mhead) != nil) {
- runtime_sched.mhead = mp->schedlink;
- runtime_sched.mwait--;
- }
- return mp;
-}
-
-// Mark g ready to run.
+// Mark gp ready to run.
void
runtime_ready(G *gp)
{
- schedlock();
- readylocked(gp);
- schedunlock();
-}
-
-// Mark g ready to run. Sched is already locked.
-// G might be running already and about to stop.
-// The sched lock protects g->status from changing underfoot.
-static void
-readylocked(G *gp)
-{
- if(gp->m) {
- // Running on another machine.
- // Ready it when it stops.
- gp->readyonstop = 1;
- return;
- }
-
// Mark runnable.
- if(gp->status == Grunnable || gp->status == Grunning) {
+ if(gp->status != Gwaiting) {
runtime_printf("goroutine %D has status %d\n", gp->goid, gp->status);
runtime_throw("bad g->status in ready");
}
gp->status = Grunnable;
-
- gput(gp);
- matchmg();
-}
-
-// Same as readylocked but a different symbol so that
-// debuggers can set a breakpoint here and catch all
-// new goroutines.
-static void
-newprocreadylocked(G *gp)
-{
- readylocked(gp);
-}
-
-// Pass g to m for running.
-// Caller has already incremented mcpu.
-static void
-mnextg(M *mp, G *gp)
-{
- runtime_sched.grunning++;
- mp->nextg = gp;
- if(mp->waitnextg) {
- mp->waitnextg = 0;
- if(mwakeup != nil)
- runtime_notewakeup(&mwakeup->havenextg);
- mwakeup = mp;
- }
-}
-
-// Get the next goroutine that m should run.
-// Sched must be locked on entry, is unlocked on exit.
-// Makes sure that at most $GOMAXPROCS g's are
-// running on cpus (not in system calls) at any given time.
-static G*
-nextgandunlock(void)
-{
- G *gp;
- uint32 v;
-
-top:
- if(atomic_mcpu(runtime_sched.atomic) >= maxgomaxprocs)
- runtime_throw("negative mcpu");
-
- // If there is a g waiting as m->nextg, the mcpu++
- // happened before it was passed to mnextg.
- if(m->nextg != nil) {
- gp = m->nextg;
- m->nextg = nil;
- schedunlock();
- return gp;
- }
-
- if(m->lockedg != nil) {
- // We can only run one g, and it's not available.
- // Make sure some other cpu is running to handle
- // the ordinary run queue.
- if(runtime_sched.gwait != 0) {
- matchmg();
- // m->lockedg might have been on the queue.
- if(m->nextg != nil) {
- gp = m->nextg;
- m->nextg = nil;
- schedunlock();
- return gp;
- }
- }
- } else {
- // Look for work on global queue.
- while(haveg() && canaddmcpu()) {
- gp = gget();
- if(gp == nil)
- runtime_throw("gget inconsistency");
-
- if(gp->lockedm) {
- mnextg(gp->lockedm, gp);
- continue;
- }
- runtime_sched.grunning++;
- schedunlock();
- return gp;
- }
-
- // The while loop ended either because the g queue is empty
- // or because we have maxed out our m procs running go
- // code (mcpu >= mcpumax). We need to check that
- // concurrent actions by entersyscall/exitsyscall cannot
- // invalidate the decision to end the loop.
- //
- // We hold the sched lock, so no one else is manipulating the
- // g queue or changing mcpumax. Entersyscall can decrement
- // mcpu, but if does so when there is something on the g queue,
- // the gwait bit will be set, so entersyscall will take the slow path
- // and use the sched lock. So it cannot invalidate our decision.
- //
- // Wait on global m queue.
- mput(m);
- }
-
- // Look for deadlock situation.
- // There is a race with the scavenger that causes false negatives:
- // if the scavenger is just starting, then we have
- // scvg != nil && grunning == 0 && gwait == 0
- // and we do not detect a deadlock. It is possible that we should
- // add that case to the if statement here, but it is too close to Go 1
- // to make such a subtle change. Instead, we work around the
- // false negative in trivial programs by calling runtime.gosched
- // from the main goroutine just before main.main.
- // See runtime_main above.
- //
- // On a related note, it is also possible that the scvg == nil case is
- // wrong and should include gwait, but that does not happen in
- // standard Go programs, which all start the scavenger.
- //
- if((scvg == nil && runtime_sched.grunning == 0) ||
- (scvg != nil && runtime_sched.grunning == 1 && runtime_sched.gwait == 0 &&
- (scvg->status == Grunning || scvg->status == Gsyscall))) {
- m->throwing = -1; // do not dump full stacks
- runtime_throw("all goroutines are asleep - deadlock!");
- }
-
- m->nextg = nil;
- m->waitnextg = 1;
- runtime_noteclear(&m->havenextg);
-
- // Stoptheworld is waiting for all but its cpu to go to stop.
- // Entersyscall might have decremented mcpu too, but if so
- // it will see the waitstop and take the slow path.
- // Exitsyscall never increments mcpu beyond mcpumax.
- v = runtime_atomicload(&runtime_sched.atomic);
- if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) {
- // set waitstop = 0 (known to be 1)
- runtime_xadd(&runtime_sched.atomic, -1<<waitstopShift);
- runtime_notewakeup(&runtime_sched.stopped);
- }
- schedunlock();
-
- runtime_notesleep(&m->havenextg);
- if(m->helpgc) {
- runtime_gchelper();
- m->helpgc = 0;
- runtime_lock(&runtime_sched);
- goto top;
- }
- if((gp = m->nextg) == nil)
- runtime_throw("bad m->nextg in nextgoroutine");
- m->nextg = nil;
- return gp;
+ runqput(m->p, gp);
+ if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0) // TODO: fast atomic
+ wakep();
}
int32
runtime_gcprocs(void)
{
int32 n;
-
+
// Figure out how many CPUs to use during GC.
// Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
+ runtime_lock(&runtime_sched);
n = runtime_gomaxprocs;
if(n > runtime_ncpu)
n = runtime_ncpu > 0 ? runtime_ncpu : 1;
if(n > MaxGcproc)
n = MaxGcproc;
- if(n > runtime_sched.mwait+1) // one M is currently running
- n = runtime_sched.mwait+1;
+ if(n > runtime_sched.nmidle+1) // one M is currently running
+ n = runtime_sched.nmidle+1;
+ runtime_unlock(&runtime_sched);
return n;
}
+static bool
+needaddgcproc(void)
+{
+ int32 n;
+
+ runtime_lock(&runtime_sched);
+ n = runtime_gomaxprocs;
+ if(n > runtime_ncpu)
+ n = runtime_ncpu;
+ if(n > MaxGcproc)
+ n = MaxGcproc;
+ n -= runtime_sched.nmidle+1; // one M is currently running
+ runtime_unlock(&runtime_sched);
+ return n > 0;
+}
+
void
runtime_helpgc(int32 nproc)
{
M *mp;
- int32 n;
+ int32 n, pos;
runtime_lock(&runtime_sched);
- for(n = 1; n < nproc; n++) { // one M is currently running
- mp = mget(nil);
+ pos = 0;
+ for(n = 1; n < nproc; n++) { // one M is currently running
+ if(runtime_allp[pos]->mcache == m->mcache)
+ pos++;
+ mp = mget();
if(mp == nil)
runtime_throw("runtime_gcprocs inconsistency");
- mp->helpgc = 1;
- mp->waitnextg = 0;
- runtime_notewakeup(&mp->havenextg);
+ mp->helpgc = n;
+ mp->mcache = runtime_allp[pos]->mcache;
+ pos++;
+ runtime_notewakeup(&mp->park);
}
runtime_unlock(&runtime_sched);
}
@@ -1052,57 +755,104 @@ runtime_helpgc(int32 nproc)
void
runtime_stoptheworld(void)
{
- uint32 v;
-
- schedlock();
- runtime_gcwaiting = 1;
-
- setmcpumax(1);
-
- // while mcpu > 1
- for(;;) {
- v = runtime_sched.atomic;
- if(atomic_mcpu(v) <= 1)
- break;
-
- // It would be unsafe for multiple threads to be using
- // the stopped note at once, but there is only
- // ever one thread doing garbage collection.
- runtime_noteclear(&runtime_sched.stopped);
- if(atomic_waitstop(v))
- runtime_throw("invalid waitstop");
+ int32 i;
+ uint32 s;
+ P *p;
+ bool wait;
- // atomic { waitstop = 1 }, predicated on mcpu <= 1 check above
- // still being true.
- if(!runtime_cas(&runtime_sched.atomic, v, v+(1<<waitstopShift)))
- continue;
+ runtime_lock(&runtime_sched);
+ runtime_sched.stopwait = runtime_gomaxprocs;
+ runtime_atomicstore((uint32*)&runtime_gcwaiting, 1);
+ // stop current P
+ m->p->status = Pgcstop;
+ runtime_sched.stopwait--;
+ // try to retake all P's in Psyscall status
+ for(i = 0; i < runtime_gomaxprocs; i++) {
+ p = runtime_allp[i];
+ s = p->status;
+ if(s == Psyscall && runtime_cas(&p->status, s, Pgcstop))
+ runtime_sched.stopwait--;
+ }
+ // stop idle P's
+ while((p = pidleget()) != nil) {
+ p->status = Pgcstop;
+ runtime_sched.stopwait--;
+ }
+ wait = runtime_sched.stopwait > 0;
+ runtime_unlock(&runtime_sched);
- schedunlock();
- runtime_notesleep(&runtime_sched.stopped);
- schedlock();
+ // wait for remaining P's to stop voluntary
+ if(wait) {
+ runtime_notesleep(&runtime_sched.stopnote);
+ runtime_noteclear(&runtime_sched.stopnote);
+ }
+ if(runtime_sched.stopwait)
+ runtime_throw("stoptheworld: not stopped");
+ for(i = 0; i < runtime_gomaxprocs; i++) {
+ p = runtime_allp[i];
+ if(p->status != Pgcstop)
+ runtime_throw("stoptheworld: not stopped");
}
- runtime_singleproc = runtime_gomaxprocs == 1;
- schedunlock();
+}
+
+static void
+mhelpgc(void)
+{
+ m->helpgc = -1;
}
void
runtime_starttheworld(void)
{
+ P *p, *p1;
M *mp;
- int32 max;
-
- // Figure out how many CPUs GC could possibly use.
- max = runtime_gomaxprocs;
- if(max > runtime_ncpu)
- max = runtime_ncpu > 0 ? runtime_ncpu : 1;
- if(max > MaxGcproc)
- max = MaxGcproc;
-
- schedlock();
+ G *gp;
+ bool add;
+
+ gp = runtime_netpoll(false); // non-blocking
+ injectglist(gp);
+ add = needaddgcproc();
+ runtime_lock(&runtime_sched);
+ if(newprocs) {
+ procresize(newprocs);
+ newprocs = 0;
+ } else
+ procresize(runtime_gomaxprocs);
runtime_gcwaiting = 0;
- setmcpumax(runtime_gomaxprocs);
- matchmg();
- if(runtime_gcprocs() < max && canaddmcpu()) {
+
+ p1 = nil;
+ while((p = pidleget()) != nil) {
+ // procresize() puts p's with work at the beginning of the list.
+ // Once we reach a p without a run queue, the rest don't have one either.
+ if(p->runqhead == p->runqtail) {
+ pidleput(p);
+ break;
+ }
+ mp = mget();
+ if(mp == nil) {
+ p->link = p1;
+ p1 = p;
+ continue;
+ }
+ if(mp->nextp)
+ runtime_throw("starttheworld: inconsistent mp->nextp");
+ mp->nextp = p;
+ runtime_notewakeup(&mp->park);
+ }
+ if(runtime_sched.sysmonwait) {
+ runtime_sched.sysmonwait = false;
+ runtime_notewakeup(&runtime_sched.sysmonnote);
+ }
+ runtime_unlock(&runtime_sched);
+
+ while(p1) {
+ p = p1;
+ p1 = p1->link;
+ add = false;
+ newm(nil, p);
+ }
+
+ if(add) {
// If GC could have used another helper proc, start one now,
// in the hope that it will be available next time.
// It would have been even better to start it before the collection,
@@ -1110,17 +860,8 @@ runtime_starttheworld(void)
// coordinate. This lazy approach works out in practice:
// we don't mind if the first couple gc rounds don't have quite
// the maximum number of procs.
- // canaddmcpu above did mcpu++
- // (necessary, because m will be doing various
- // initialization work so is definitely running),
- // but m is not running a specific goroutine,
- // so set the helpgc flag as a signal to m's
- // first schedule(nil) to mcpu-- and grunning--.
- mp = runtime_newm();
- mp->helpgc = 1;
- runtime_sched.grunning++;
+ newm(mhelpgc, nil);
}
- schedunlock();
}
// Called to start an M.
@@ -1167,10 +908,23 @@ runtime_mstart(void* mp)
// Install signal handlers; after minit so that minit can
// prepare the thread to be able to handle the signals.
- if(m == &runtime_m0)
+ if(m == &runtime_m0) {
runtime_initsig();
+ if(runtime_iscgo)
+ runtime_newextram();
+ }
+
+ if(m->mstartfn)
+ m->mstartfn();
- schedule(nil);
+ if(m->helpgc) {
+ m->helpgc = 0;
+ stopm();
+ } else if(m != &runtime_m0) {
+ acquirep(m->nextp);
+ m->nextp = nil;
+ }
+ schedule();
// TODO(brainman): This point is never reached, because scheduler
// does not release os threads at the moment. But once this path
@@ -1187,43 +941,17 @@ struct CgoThreadStart
void (*fn)(void);
};
-// Kick off new m's as needed (up to mcpumax).
-// Sched is locked.
-static void
-matchmg(void)
-{
- G *gp;
- M *mp;
-
- if(m->mallocing || m->gcing)
- return;
-
- while(haveg() && canaddmcpu()) {
- gp = gget();
- if(gp == nil)
- runtime_throw("gget inconsistency");
-
- // Find the m that will run gp.
- if((mp = mget(gp)) == nil)
- mp = runtime_newm();
- mnextg(mp, gp);
- }
-}
-
-// Create a new m. It will start off with a call to runtime_mstart.
+// Allocate a new m unassociated with any thread.
+// Can use p for allocation context if needed.
M*
-runtime_newm(void)
+runtime_allocm(P *p)
{
M *mp;
- pthread_attr_t attr;
- pthread_t tid;
- size_t stacksize;
- sigset_t clear;
- sigset_t old;
- int ret;
+ m->locks++; // disable GC because it can be called from sysmon
+ if(m->p == nil)
+ acquirep(p); // temporarily borrow p for mallocs in this function
#if 0
- static const Type *mtype; // The Go type M
if(mtype == nil) {
Eface e;
runtime_gc_m_ptr(&e);
@@ -1235,112 +963,418 @@ runtime_newm(void)
mcommoninit(mp);
mp->g0 = runtime_malg(-1, nil, nil);
- if(pthread_attr_init(&attr) != 0)
- runtime_throw("pthread_attr_init");
- if(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0)
- runtime_throw("pthread_attr_setdetachstate");
+ if(p == m->p)
+ releasep();
+ m->locks--;
- stacksize = PTHREAD_STACK_MIN;
+ return mp;
+}
- // With glibc before version 2.16 the static TLS size is taken
- // out of the stack size, and we get an error or a crash if
- // there is not enough stack space left. Add it back in if we
- // can, in case the program uses a lot of TLS space. FIXME:
- // This can be disabled in glibc 2.16 and later, if the bug is
- // indeed fixed then.
- stacksize += tlssize;
+static M* lockextra(bool nilokay);
+static void unlockextra(M*);
- if(pthread_attr_setstacksize(&attr, stacksize) != 0)
- runtime_throw("pthread_attr_setstacksize");
+// needm is called when a cgo callback happens on a
+// thread without an m (a thread not created by Go).
+// In this case, needm is expected to find an m to use
+// and return with m, g initialized correctly.
+// Since m and g are not set now (likely nil, but see below)
+// needm is limited in what routines it can call. In particular
+// it can only call nosplit functions (textflag 7) and cannot
+// do any scheduling that requires an m.
+//
+// In order to avoid needing heavy lifting here, we adopt
+// the following strategy: there is a stack of available m's
+// that can be stolen. Using compare-and-swap
+// to pop from the stack has ABA races, so we simulate
+// a lock by doing an exchange (via casp) to steal the stack
+// head and replace the top pointer with MLOCKED (1).
+// This serves as a simple spin lock that we can use even
+// without an m. The thread that locks the stack in this way
+// unlocks the stack by storing a valid stack head pointer.
+//
+// In order to make sure that there is always an m structure
+// available to be stolen, we maintain the invariant that there
+// is always one more than needed. At the beginning of the
+// program (if cgo is in use) the list is seeded with a single m.
+// If needm finds that it has taken the last m off the list, its job
+// is - once it has installed its own m so that it can do things like
+// allocate memory - to create a spare m and put it on the list.
+//
+// Each of these extra m's also has a g0 and a curg that are
+// pressed into service as the scheduling stack and current
+// goroutine for the duration of the cgo callback.
+//
+// When the callback is done with the m, it calls dropm to
+// put the m back on the list.
+void
+runtime_needm(void)
+{
+ M *mp;
- // Block signals during pthread_create so that the new thread
- // starts with signals disabled. It will enable them in minit.
- sigfillset(&clear);
+ // Lock extra list, take head, unlock popped list.
+ // nilokay=false is safe here because of the invariant above,
+ // that the extra list always contains or will soon contain
+ // at least one m.
+ mp = lockextra(false);
+
+ // Set needextram when we've just emptied the list,
+ // so that the eventual call into cgocallbackg will
+ // allocate a new m for the extra list. We delay the
+ // allocation until then so that it can be done
+ // after exitsyscall makes sure it is okay to be
+ // running at all (that is, there's no garbage collection
+ // running right now).
+ mp->needextram = mp->schedlink == nil;
+ unlockextra(mp->schedlink);
+
+ // Install m and g (= m->g0) and set the stack bounds
+ // to match the current stack. We don't actually know
+ // how big the stack is, like we don't know how big any
+ // scheduling stack is, but we assume there's at least 32 kB,
+ // which is more than enough for us.
+ runtime_setmg(mp, mp->g0);
+
+ // We assume that the split stack support has been initialized
+ // for this new thread.
+
+ // Initialize this thread to use the m.
+ runtime_minit();
+}
-#ifdef SIGTRAP
- // Blocking SIGTRAP reportedly breaks gdb on Alpha GNU/Linux.
- sigdelset(&clear, SIGTRAP);
-#endif
+// newextram allocates an m and puts it on the extra list.
+// It is called with a working local m, so that it can do things
+// like call schedlock and allocate.
+void
+runtime_newextram(void)
+{
+ M *mp, *mnext;
+ G *gp;
- sigemptyset(&old);
- sigprocmask(SIG_BLOCK, &clear, &old);
- ret = pthread_create(&tid, &attr, runtime_mstart, mp);
- sigprocmask(SIG_SETMASK, &old, nil);
+ // Create extra goroutine locked to extra m.
+ // The goroutine is the context in which the cgo callback will run.
+ // The sched.pc will never be returned to, but setting it to
+ // runtime.goexit makes clear to the traceback routines where
+ // the goroutine stack ends.
+ mp = runtime_allocm(nil);
+ gp = runtime_malg(StackMin, nil, nil);
+ gp->status = Gsyscall;
+ mp->curg = gp;
+ mp->locked = LockInternal;
+ mp->lockedg = gp;
+ gp->lockedm = mp;
+ // put on allg for garbage collector
+ runtime_lock(&runtime_sched);
+ if(runtime_lastg == nil)
+ runtime_allg = gp;
+ else
+ runtime_lastg->alllink = gp;
+ runtime_lastg = gp;
+ runtime_unlock(&runtime_sched);
+ gp->goid = runtime_xadd64(&runtime_sched.goidgen, 1);
- if (ret != 0)
- runtime_throw("pthread_create");
+ // Add m to the extra list.
+ mnext = lockextra(true);
+ mp->schedlink = mnext;
+ unlockextra(mp);
+}
- return mp;
+// dropm is called when a cgo callback has called needm but is now
+// done with the callback and returning back into the non-Go thread.
+// It puts the current m back onto the extra list.
+//
+// The main expense here is the call to signalstack to release the
+// m's signal stack, and then the call to needm on the next callback
+// from this thread. It is tempting to try to save the m for next time,
+// which would eliminate both these costs, but there might not be
+// a next time: the current thread (which Go does not control) might exit.
+// If we saved the m for that thread, there would be an m leak each time
+// such a thread exited. Instead, we acquire and release an m on each
+// call. These should typically not be scheduling operations, just a few
+// atomics, so the cost should be small.
+//
+// TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
+// variable using pthread_key_create. Unlike the pthread keys we already use
+// on OS X, this dummy key would never be read by Go code. It would exist
+// only so that we could register at thread-exit-time destructor.
+// That destructor would put the m back onto the extra list.
+// This is purely a performance optimization. The current version,
+// in which dropm happens on each cgo call, is still correct too.
+// We may have to keep the current version on systems with cgo
+// but without pthreads, like Windows.
+void
+runtime_dropm(void)
+{
+ M *mp, *mnext;
+
+ // Undo whatever initialization minit did during needm.
+ runtime_unminit();
+
+ // Clear m and g, and return m to the extra list.
+ // After the call to setmg we can only call nosplit functions.
+ mp = m;
+ runtime_setmg(nil, nil);
+
+ mnext = lockextra(true);
+ mp->schedlink = mnext;
+ unlockextra(mp);
}
-// One round of scheduler: find a goroutine and run it.
-// The argument is the goroutine that was running before
-// schedule was called, or nil if this is the first call.
-// Never returns.
-static void
-schedule(G *gp)
+#define MLOCKED ((M*)1)
+
+// lockextra locks the extra list and returns the list head.
+// The caller must unlock the list by storing a new list head
+// to runtime.extram. If nilokay is true, then lockextra will
+// return a nil list head if that's what it finds. If nilokay is false,
+// lockextra will keep waiting until the list head is no longer nil.
+static M*
+lockextra(bool nilokay)
{
- int32 hz;
- uint32 v;
+ M *mp;
+ void (*yield)(void);
- schedlock();
- if(gp != nil) {
- // Just finished running gp.
- gp->m = nil;
- runtime_sched.grunning--;
-
- // atomic { mcpu-- }
- v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift);
- if(atomic_mcpu(v) > maxgomaxprocs)
- runtime_throw("negative mcpu in scheduler");
-
- switch(gp->status) {
- case Grunnable:
- case Gdead:
- // Shouldn't have been running!
- runtime_throw("bad gp->status in sched");
- case Grunning:
- gp->status = Grunnable;
- gput(gp);
- break;
- case Gmoribund:
- if(raceenabled)
- runtime_racegoend(gp->goid);
- gp->status = Gdead;
- if(gp->lockedm) {
- gp->lockedm = nil;
- m->lockedg = nil;
- }
- gp->idlem = nil;
- runtime_memclr(&gp->context, sizeof gp->context);
- gfput(gp);
- if(--runtime_sched.gcount == 0)
- runtime_exit(0);
- break;
+ for(;;) {
+ mp = runtime_atomicloadp(&runtime_extram);
+ if(mp == MLOCKED) {
+ yield = runtime_osyield;
+ yield();
+ continue;
}
- if(gp->readyonstop) {
- gp->readyonstop = 0;
- readylocked(gp);
+ if(mp == nil && !nilokay) {
+ runtime_usleep(1);
+ continue;
}
- } else if(m->helpgc) {
- // Bootstrap m or new m started by starttheworld.
- // atomic { mcpu-- }
- v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift);
- if(atomic_mcpu(v) > maxgomaxprocs)
- runtime_throw("negative mcpu in scheduler");
- // Compensate for increment in starttheworld().
- runtime_sched.grunning--;
+ if(!runtime_casp(&runtime_extram, mp, MLOCKED)) {
+ yield = runtime_osyield;
+ yield();
+ continue;
+ }
+ break;
+ }
+ return mp;
+}
+
+static void
+unlockextra(M *mp)
+{
+ runtime_atomicstorep(&runtime_extram, mp);
+}
+
+
+// Create a new m. It will start off with a call to fn, or else the scheduler.
+static void
+newm(void(*fn)(void), P *p)
+{
+ M *mp;
+
+ mp = runtime_allocm(p);
+ mp->nextp = p;
+ mp->mstartfn = fn;
+
+ runtime_newosproc(mp);
+}
+
+// Stops execution of the current m until new work is available.
+// Returns with acquired P.
+static void
+stopm(void)
+{
+ if(m->locks)
+ runtime_throw("stopm holding locks");
+ if(m->p)
+ runtime_throw("stopm holding p");
+ if(m->spinning) {
+ m->spinning = false;
+ runtime_xadd(&runtime_sched.nmspinning, -1);
+ }
+
+retry:
+ runtime_lock(&runtime_sched);
+ mput(m);
+ runtime_unlock(&runtime_sched);
+ runtime_notesleep(&m->park);
+ runtime_noteclear(&m->park);
+ if(m->helpgc) {
+ runtime_gchelper();
m->helpgc = 0;
- } else if(m->nextg != nil) {
- // New m started by matchmg.
- } else {
- runtime_throw("invalid m state in scheduler");
+ m->mcache = nil;
+ goto retry;
}
+ acquirep(m->nextp);
+ m->nextp = nil;
+}
+
+static void
+mspinning(void)
+{
+ m->spinning = true;
+}
+
+// Schedules some M to run the p (creates an M if necessary).
+// If p==nil, tries to get an idle P, if no idle P's returns false.
+static void
+startm(P *p, bool spinning)
+{
+ M *mp;
+ void (*fn)(void);
- // Find (or wait for) g to run. Unlocks runtime_sched.
- gp = nextgandunlock();
- gp->readyonstop = 0;
+ runtime_lock(&runtime_sched);
+ if(p == nil) {
+ p = pidleget();
+ if(p == nil) {
+ runtime_unlock(&runtime_sched);
+ if(spinning)
+ runtime_xadd(&runtime_sched.nmspinning, -1);
+ return;
+ }
+ }
+ mp = mget();
+ runtime_unlock(&runtime_sched);
+ if(mp == nil) {
+ fn = nil;
+ if(spinning)
+ fn = mspinning;
+ newm(fn, p);
+ return;
+ }
+ if(mp->spinning)
+ runtime_throw("startm: m is spinning");
+ if(mp->nextp)
+ runtime_throw("startm: m has p");
+ mp->spinning = spinning;
+ mp->nextp = p;
+ runtime_notewakeup(&mp->park);
+}
+
+// Hands off P from syscall or locked M.
+static void
+handoffp(P *p)
+{
+ // if it has local work, start it straight away
+ if(p->runqhead != p->runqtail || runtime_sched.runqsize) {
+ startm(p, false);
+ return;
+ }
+ // no local work, check that there are no spinning/idle M's,
+ // otherwise our help is not required
+ if(runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) == 0 && // TODO: fast atomic
+ runtime_cas(&runtime_sched.nmspinning, 0, 1)) {
+ startm(p, true);
+ return;
+ }
+ runtime_lock(&runtime_sched);
+ if(runtime_gcwaiting) {
+ p->status = Pgcstop;
+ if(--runtime_sched.stopwait == 0)
+ runtime_notewakeup(&runtime_sched.stopnote);
+ runtime_unlock(&runtime_sched);
+ return;
+ }
+ if(runtime_sched.runqsize) {
+ runtime_unlock(&runtime_sched);
+ startm(p, false);
+ return;
+ }
+ // If this is the last running P and nobody is polling network,
+ // need to wakeup another M to poll network.
+ if(runtime_sched.npidle == (uint32)runtime_gomaxprocs-1 && runtime_atomicload64(&runtime_sched.lastpoll) != 0) {
+ runtime_unlock(&runtime_sched);
+ startm(p, false);
+ return;
+ }
+ pidleput(p);
+ runtime_unlock(&runtime_sched);
+}
+
+// Tries to add one more P to execute G's.
+// Called when a G is made runnable (newproc, ready).
+static void
+wakep(void)
+{
+ // be conservative about spinning threads
+ if(!runtime_cas(&runtime_sched.nmspinning, 0, 1))
+ return;
+ startm(nil, true);
+}
+
+// Stops execution of the current m that is locked to a g until the g is runnable again.
+// Returns with acquired P.
+static void
+stoplockedm(void)
+{
+ P *p;
+
+ if(m->lockedg == nil || m->lockedg->lockedm != m)
+ runtime_throw("stoplockedm: inconsistent locking");
+ if(m->p) {
+ // Schedule another M to run this p.
+ p = releasep();
+ handoffp(p);
+ }
+ inclocked(1);
+ // Wait until another thread schedules lockedg again.
+ runtime_notesleep(&m->park);
+ runtime_noteclear(&m->park);
+ if(m->lockedg->status != Grunnable)
+ runtime_throw("stoplockedm: not runnable");
+ acquirep(m->nextp);
+ m->nextp = nil;
+}
+
+// Schedules the locked m to run the locked gp.
+static void
+startlockedm(G *gp)
+{
+ M *mp;
+ P *p;
+
+ mp = gp->lockedm;
+ if(mp == m)
+ runtime_throw("startlockedm: locked to me");
+ if(mp->nextp)
+ runtime_throw("startlockedm: m has p");
+ // directly handoff current P to the locked m
+ inclocked(-1);
+ p = releasep();
+ mp->nextp = p;
+ runtime_notewakeup(&mp->park);
+ stopm();
+}
+
+// Stops the current m for stoptheworld.
+// Returns when the world is restarted.
+static void
+gcstopm(void)
+{
+ P *p;
+
+ if(!runtime_gcwaiting)
+ runtime_throw("gcstopm: not waiting for gc");
+ if(m->spinning) {
+ m->spinning = false;
+ runtime_xadd(&runtime_sched.nmspinning, -1);
+ }
+ p = releasep();
+ runtime_lock(&runtime_sched);
+ p->status = Pgcstop;
+ if(--runtime_sched.stopwait == 0)
+ runtime_notewakeup(&runtime_sched.stopnote);
+ runtime_unlock(&runtime_sched);
+ stopm();
+}
+
+// Schedules gp to run on the current M.
+// Never returns.
+static void
+execute(G *gp)
+{
+ int32 hz;
+
+ if(gp->status != Grunnable) {
+ runtime_printf("execute: bad g status %d\n", gp->status);
+ runtime_throw("execute: bad g status");
+ }
gp->status = Grunning;
+ m->p->tick++;
m->curg = gp;
gp->m = m;
@@ -1352,30 +1386,261 @@ schedule(G *gp)
runtime_gogo(gp);
}
-// Enter scheduler. If g->status is Grunning,
-// re-queues g and runs everyone else who is waiting
-// before running g again. If g->status is Gmoribund,
-// kills off g.
-void
-runtime_gosched(void)
+// Finds a runnable goroutine to execute.
+// Tries to steal from other P's, get g from global queue, poll network.
+static G*
+findrunnable(void)
+{
+ G *gp;
+ P *p;
+ int32 i;
+
+top:
+ if(runtime_gcwaiting) {
+ gcstopm();
+ goto top;
+ }
+ // local runq
+ gp = runqget(m->p);
+ if(gp)
+ return gp;
+ // global runq
+ if(runtime_sched.runqsize) {
+ runtime_lock(&runtime_sched);
+ gp = globrunqget(m->p);
+ runtime_unlock(&runtime_sched);
+ if(gp)
+ return gp;
+ }
+ // poll network
+ gp = runtime_netpoll(false); // non-blocking
+ if(gp) {
+ injectglist(gp->schedlink);
+ gp->status = Grunnable;
+ return gp;
+ }
+ // If number of spinning M's >= number of busy P's, block.
+ // This is necessary to prevent excessive CPU consumption
+ // when GOMAXPROCS>>1 but the program parallelism is low.
+ if(!m->spinning && 2 * runtime_atomicload(&runtime_sched.nmspinning) >= runtime_gomaxprocs - runtime_atomicload(&runtime_sched.npidle)) // TODO: fast atomic
+ goto stop;
+ if(!m->spinning) {
+ m->spinning = true;
+ runtime_xadd(&runtime_sched.nmspinning, 1);
+ }
+ // random steal from other P's
+ for(i = 0; i < 2*runtime_gomaxprocs; i++) {
+ if(runtime_gcwaiting)
+ goto top;
+ p = runtime_allp[runtime_fastrand1()%runtime_gomaxprocs];
+ if(p == m->p)
+ gp = runqget(p);
+ else
+ gp = runqsteal(m->p, p);
+ if(gp)
+ return gp;
+ }
+stop:
+ // return P and block
+ runtime_lock(&runtime_sched);
+ if(runtime_gcwaiting) {
+ runtime_unlock(&runtime_sched);
+ goto top;
+ }
+ if(runtime_sched.runqsize) {
+ gp = globrunqget(m->p);
+ runtime_unlock(&runtime_sched);
+ return gp;
+ }
+ p = releasep();
+ pidleput(p);
+ runtime_unlock(&runtime_sched);
+ if(m->spinning) {
+ m->spinning = false;
+ runtime_xadd(&runtime_sched.nmspinning, -1);
+ }
+ // check all runqueues once again
+ for(i = 0; i < runtime_gomaxprocs; i++) {
+ p = runtime_allp[i];
+ if(p && p->runqhead != p->runqtail) {
+ runtime_lock(&runtime_sched);
+ p = pidleget();
+ runtime_unlock(&runtime_sched);
+ if(p) {
+ acquirep(p);
+ goto top;
+ }
+ break;
+ }
+ }
+ // poll network
+ if(runtime_xchg64(&runtime_sched.lastpoll, 0) != 0) {
+ if(m->p)
+ runtime_throw("findrunnable: netpoll with p");
+ if(m->spinning)
+ runtime_throw("findrunnable: netpoll with spinning");
+ gp = runtime_netpoll(true); // block until new work is available
+ runtime_atomicstore64(&runtime_sched.lastpoll, runtime_nanotime());
+ if(gp) {
+ runtime_lock(&runtime_sched);
+ p = pidleget();
+ runtime_unlock(&runtime_sched);
+ if(p) {
+ acquirep(p);
+ injectglist(gp->schedlink);
+ gp->status = Grunnable;
+ return gp;
+ }
+ injectglist(gp);
+ }
+ }
+ stopm();
+ goto top;
+}
+
+// Injects the list of runnable G's into the scheduler.
+// Can run concurrently with GC.
+static void
+injectglist(G *glist)
{
- if(m->locks != 0)
- runtime_throw("gosched holding locks");
- if(g == m->g0)
- runtime_throw("gosched of g0");
- runtime_mcall(schedule);
+ int32 n;
+ G *gp;
+
+ if(glist == nil)
+ return;
+ runtime_lock(&runtime_sched);
+ for(n = 0; glist; n++) {
+ gp = glist;
+ glist = gp->schedlink;
+ gp->status = Grunnable;
+ globrunqput(gp);
+ }
+ runtime_unlock(&runtime_sched);
+
+ for(; n && runtime_sched.npidle; n--)
+ startm(nil, false);
+}
+
+// One round of scheduler: find a runnable goroutine and execute it.
+// Never returns.
+static void
+schedule(void)
+{
+ G *gp;
+
+ if(m->locks)
+ runtime_throw("schedule: holding locks");
+
+top:
+ if(runtime_gcwaiting) {
+ gcstopm();
+ goto top;
+ }
+
+ gp = runqget(m->p);
+ if(gp == nil)
+ gp = findrunnable();
+
+ if(m->spinning) {
+ m->spinning = false;
+ runtime_xadd(&runtime_sched.nmspinning, -1);
+ }
+
+ // M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
+ // so see if we need to wakeup another M here.
+ if (m->p->runqhead != m->p->runqtail &&
+ runtime_atomicload(&runtime_sched.nmspinning) == 0 &&
+ runtime_atomicload(&runtime_sched.npidle) > 0) // TODO: fast atomic
+ wakep();
+
+ if(gp->lockedm) {
+ startlockedm(gp);
+ goto top;
+ }
+
+ execute(gp);
}
// Puts the current goroutine into a waiting state and unlocks the lock.
// The goroutine can be made runnable again by calling runtime_ready(gp).
void
-runtime_park(void (*unlockf)(Lock*), Lock *lock, const char *reason)
+runtime_park(void(*unlockf)(Lock*), Lock *lock, const char *reason)
{
- g->status = Gwaiting;
+ m->waitlock = lock;
+ m->waitunlockf = unlockf;
g->waitreason = reason;
- if(unlockf)
- unlockf(lock);
- runtime_gosched();
+ runtime_mcall(park0);
+}
+
+// runtime_park continuation on g0.
+static void
+park0(G *gp)
+{
+ gp->status = Gwaiting;
+ gp->m = nil;
+ m->curg = nil;
+ if(m->waitunlockf) {
+ m->waitunlockf(m->waitlock);
+ m->waitunlockf = nil;
+ m->waitlock = nil;
+ }
+ if(m->lockedg) {
+ stoplockedm();
+ execute(gp); // Never returns.
+ }
+ schedule();
+}
+
+// Scheduler yield.
+void
+runtime_gosched(void)
+{
+ runtime_mcall(gosched0);
+}
+
+// runtime_gosched continuation on g0.
+static void
+gosched0(G *gp)
+{
+ gp->status = Grunnable;
+ gp->m = nil;
+ m->curg = nil;
+ runtime_lock(&runtime_sched);
+ globrunqput(gp);
+ runtime_unlock(&runtime_sched);
+ if(m->lockedg) {
+ stoplockedm();
+ execute(gp); // Never returns.
+ }
+ schedule();
+}
+
+// Finishes execution of the current goroutine.
+void
+runtime_goexit(void)
+{
+ if(raceenabled)
+ runtime_racegoend();
+ runtime_mcall(goexit0);
+}
+
+// runtime_goexit continuation on g0.
+static void
+goexit0(G *gp)
+{
+ gp->status = Gdead;
+ gp->entry = nil;
+ gp->m = nil;
+ gp->lockedm = nil;
+ m->curg = nil;
+ m->lockedg = nil;
+ if(m->locked & ~LockExternal) {
+ runtime_printf("invalid m->locked = %d", m->locked);
+ runtime_throw("internal lockOSThread error");
+ }
+ m->locked = 0;
+ gfput(m->p, gp);
+ schedule();
}
// The goroutine g is about to enter a system call.
@@ -1386,17 +1651,12 @@ runtime_park(void (*unlockf)(Lock*), Lock *lock, const char *reason)
// Entersyscall cannot split the stack: the runtime_gosave must
// make g->sched refer to the caller's stack segment, because
// entersyscall is going to return immediately after.
-// It's okay to call matchmg and notewakeup even after
-// decrementing mcpu, because we haven't released the
-// sched lock yet, so the garbage collector cannot be running.
void runtime_entersyscall(void) __attribute__ ((no_split_stack));
void
-runtime_entersyscall(void)
+runtime_entersyscall()
{
- uint32 v;
-
if(m->profilehz > 0)
runtime_setprof(false);
@@ -1415,30 +1675,57 @@ runtime_entersyscall(void)
g->status = Gsyscall;
- // Fast path.
- // The slow path inside the schedlock/schedunlock will get
- // through without stopping if it does:
- // mcpu--
- // gwait not true
- // waitstop && mcpu <= mcpumax not true
- // If we can do the same with a single atomic add,
- // then we can skip the locks.
- v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift);
- if(!atomic_gwaiting(v) && (!atomic_waitstop(v) || atomic_mcpu(v) > atomic_mcpumax(v)))
- return;
-
- schedlock();
- v = runtime_atomicload(&runtime_sched.atomic);
- if(atomic_gwaiting(v)) {
- matchmg();
- v = runtime_atomicload(&runtime_sched.atomic);
+ if(runtime_atomicload(&runtime_sched.sysmonwait)) { // TODO: fast atomic
+ runtime_lock(&runtime_sched);
+ if(runtime_atomicload(&runtime_sched.sysmonwait)) {
+ runtime_atomicstore(&runtime_sched.sysmonwait, 0);
+ runtime_notewakeup(&runtime_sched.sysmonnote);
+ }
+ runtime_unlock(&runtime_sched);
}
- if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) {
- runtime_xadd(&runtime_sched.atomic, -1<<waitstopShift);
- runtime_notewakeup(&runtime_sched.stopped);
+
+ m->mcache = nil;
+ m->p->tick++;
+ m->p->m = nil;
+ runtime_atomicstore(&m->p->status, Psyscall);
+ if(runtime_gcwaiting) {
+ runtime_lock(&runtime_sched);
+ if (runtime_sched.stopwait > 0 && runtime_cas(&m->p->status, Psyscall, Pgcstop)) {
+ if(--runtime_sched.stopwait == 0)
+ runtime_notewakeup(&runtime_sched.stopnote);
+ }
+ runtime_unlock(&runtime_sched);
}
+}
+
+// The same as runtime_entersyscall(), but with a hint that the syscall is blocking.
+void
+runtime_entersyscallblock(void)
+{
+ P *p;
+
+ if(m->profilehz > 0)
+ runtime_setprof(false);
+
+ // Leave SP around for gc and traceback.
+#ifdef USING_SPLIT_STACK
+ g->gcstack = __splitstack_find(nil, nil, &g->gcstack_size,
+ &g->gcnext_segment, &g->gcnext_sp,
+ &g->gcinitial_sp);
+#else
+ g->gcnext_sp = (byte *) &v;
+#endif
+
+ // Save the registers in the g structure so that any pointers
+ // held in registers will be seen by the garbage collector.
+ getcontext(&g->gcregs);
- schedunlock();
+ g->status = Gsyscall;
+
+ p = releasep();
+ handoffp(p);
+ if(g->isbackground) // do not consider blocked scavenger for deadlock detection
+ inclocked(1);
}
// The goroutine g exited its system call.
@@ -1449,46 +1736,53 @@ void
runtime_exitsyscall(void)
{
G *gp;
- uint32 v;
-
- // Fast path.
- // If we can do the mcpu++ bookkeeping and
- // find that we still have mcpu <= mcpumax, then we can
- // start executing Go code immediately, without having to
- // schedlock/schedunlock.
- // Also do fast return if any locks are held, so that
- // panic code can use syscalls to open a file.
+ P *p;
+
+ // Check whether the profiler needs to be turned on.
+ if(m->profilehz > 0)
+ runtime_setprof(true);
+
gp = g;
- v = runtime_xadd(&runtime_sched.atomic, (1<<mcpuShift));
- if((m->profilehz == runtime_sched.profilehz && atomic_mcpu(v) <= atomic_mcpumax(v)) || m->locks > 0) {
+ // Try to re-acquire the last P.
+ if(m->p && m->p->status == Psyscall && runtime_cas(&m->p->status, Psyscall, Prunning)) {
// There's a cpu for us, so we can run.
+ m->mcache = m->p->mcache;
+ m->p->m = m;
+ m->p->tick++;
gp->status = Grunning;
// Garbage collector isn't running (since we are),
- // so okay to clear gcstack.
+ // so okay to clear gcstack and gcsp.
#ifdef USING_SPLIT_STACK
gp->gcstack = nil;
#endif
gp->gcnext_sp = nil;
runtime_memclr(&gp->gcregs, sizeof gp->gcregs);
-
- if(m->profilehz > 0)
- runtime_setprof(true);
return;
}
- // Tell scheduler to put g back on the run queue:
- // mostly equivalent to g->status = Grunning,
- // but keeps the garbage collector from thinking
- // that g is running right now, which it's not.
- gp->readyonstop = 1;
+ if(gp->isbackground) // do not consider blocked scavenger for deadlock detection
+ inclocked(-1);
+ // Try to get any other idle P.
+ m->p = nil;
+ if(runtime_sched.pidle) {
+ runtime_lock(&runtime_sched);
+ p = pidleget();
+ runtime_unlock(&runtime_sched);
+ if(p) {
+ acquirep(p);
+#ifdef USING_SPLIT_STACK
+ gp->gcstack = nil;
+#endif
+ gp->gcnext_sp = nil;
+ runtime_memclr(&gp->gcregs, sizeof gp->gcregs);
+ return;
+ }
+ }
- // All the cpus are taken.
- // The scheduler will ready g and put this m to sleep.
- // When the scheduler takes g away from m,
- // it will undo the runtime_sched.mcpu++ above.
- runtime_gosched();
+ // Call the scheduler.
+ runtime_mcall(exitsyscall0);
- // Gosched returned, so we're allowed to run now.
+ // Scheduler returned, so we're allowed to run now.
// Delete the gcstack information that we left for
// the garbage collector during the system call.
// Must wait until now because until gosched returns
@@ -1501,6 +1795,34 @@ runtime_exitsyscall(void)
runtime_memclr(&gp->gcregs, sizeof gp->gcregs);
}
+// runtime_exitsyscall slow path on g0.
+// Failed to acquire P, enqueue gp as runnable.
+static void
+exitsyscall0(G *gp)
+{
+ P *p;
+
+ gp->status = Grunnable;
+ gp->m = nil;
+ m->curg = nil;
+ runtime_lock(&runtime_sched);
+ p = pidleget();
+ if(p == nil)
+ globrunqput(gp);
+ runtime_unlock(&runtime_sched);
+ if(p) {
+ acquirep(p);
+ execute(gp); // Never returns.
+ }
+ if(m->lockedg) {
+ // Wait until another thread schedules gp and so m again.
+ stoplockedm();
+ execute(gp); // Never returns.
+ }
+ stopm();
+ schedule(); // Never returns.
+}
+
// Allocate a new g, with a stack big enough for stacksize bytes.
G*
runtime_malg(int32 stacksize, byte** ret_stack, size_t* ret_stacksize)
@@ -1554,15 +1876,10 @@ __go_go(void (*fn)(void*), void* arg)
byte *sp;
size_t spsize;
G *newg;
- int64 goid;
-
- goid = runtime_xadd64((uint64*)&runtime_sched.goidgen, 1);
- if(raceenabled)
- runtime_racegostart(goid, runtime_getcallerpc(&fn));
- schedlock();
+ m->locks++; // disable preemption because it can be holding p in a local var
- if((newg = gfget()) != nil) {
+ if((newg = gfget(m->p)) != nil) {
#ifdef USING_SPLIT_STACK
int dont_block_signals = 0;
@@ -1579,24 +1896,20 @@ __go_go(void (*fn)(void*), void* arg)
#endif
} else {
newg = runtime_malg(StackMin, &sp, &spsize);
+ runtime_lock(&runtime_sched);
if(runtime_lastg == nil)
runtime_allg = newg;
else
runtime_lastg->alllink = newg;
runtime_lastg = newg;
+ runtime_unlock(&runtime_sched);
}
- newg->status = Gwaiting;
- newg->waitreason = "new goroutine";
newg->entry = (byte*)fn;
newg->param = arg;
newg->gopc = (uintptr)__builtin_return_address(0);
-
- runtime_sched.gcount++;
- newg->goid = goid;
-
- if(sp == nil)
- runtime_throw("nil g->stack0");
+ newg->status = Grunnable;
+ newg->goid = runtime_xadd64(&runtime_sched.goidgen, 1);
{
// Avoid warnings about variables clobbered by
@@ -1613,33 +1926,87 @@ __go_go(void (*fn)(void*), void* arg)
vnewg->context.uc_stack.ss_size = vspsize;
makecontext(&vnewg->context, kickoff, 0);
- newprocreadylocked(vnewg);
- schedunlock();
+ runqput(m->p, vnewg);
+ if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0 && fn != runtime_main) // TODO: fast atomic
+ wakep();
+ m->locks--;
return vnewg;
}
}
-// Put on gfree list. Sched must be locked.
+// Put on gfree list.
+// If local list is too long, transfer a batch to the global list.
static void
-gfput(G *gp)
-{
- gp->schedlink = runtime_sched.gfree;
- runtime_sched.gfree = gp;
+gfput(P *p, G *gp)
+{
+ gp->schedlink = p->gfree;
+ p->gfree = gp;
+ p->gfreecnt++;
+ if(p->gfreecnt >= 64) {
+ runtime_lock(&runtime_sched.gflock);
+ while(p->gfreecnt >= 32) {
+ p->gfreecnt--;
+ gp = p->gfree;
+ p->gfree = gp->schedlink;
+ gp->schedlink = runtime_sched.gfree;
+ runtime_sched.gfree = gp;
+ }
+ runtime_unlock(&runtime_sched.gflock);
+ }
}
-// Get from gfree list. Sched must be locked.
+// Get from gfree list.
+// If local list is empty, grab a batch from global list.
static G*
-gfget(void)
+gfget(P *p)
{
G *gp;
- gp = runtime_sched.gfree;
- if(gp)
- runtime_sched.gfree = gp->schedlink;
+retry:
+ gp = p->gfree;
+ if(gp == nil && runtime_sched.gfree) {
+ runtime_lock(&runtime_sched.gflock);
+ while(p->gfreecnt < 32 && runtime_sched.gfree) {
+ p->gfreecnt++;
+ gp = runtime_sched.gfree;
+ runtime_sched.gfree = gp->schedlink;
+ gp->schedlink = p->gfree;
+ p->gfree = gp;
+ }
+ runtime_unlock(&runtime_sched.gflock);
+ goto retry;
+ }
+ if(gp) {
+ p->gfree = gp->schedlink;
+ p->gfreecnt--;
+ }
return gp;
}
+// Purge all cached G's from gfree list to the global list.
+static void
+gfpurge(P *p)
+{
+ G *gp;
+
+ runtime_lock(&runtime_sched.gflock);
+ while(p->gfreecnt) {
+ p->gfreecnt--;
+ gp = p->gfree;
+ p->gfree = gp->schedlink;
+ gp->schedlink = runtime_sched.gfree;
+ runtime_sched.gfree = gp;
+ }
+ runtime_unlock(&runtime_sched.gflock);
+}
+
+void
+runtime_Breakpoint(void)
+{
+ runtime_breakpoint();
+}
+
void runtime_Gosched (void) __asm__ (GOSYM_PREFIX "runtime.Gosched");
void
@@ -1649,67 +2016,82 @@ runtime_Gosched(void)
}
// Implementation of runtime.GOMAXPROCS.
-// delete when scheduler is stronger
+// delete when scheduler is even stronger
int32
runtime_gomaxprocsfunc(int32 n)
{
int32 ret;
- uint32 v;
- schedlock();
+ if(n > MaxGomaxprocs)
+ n = MaxGomaxprocs;
+ runtime_lock(&runtime_sched);
ret = runtime_gomaxprocs;
- if(n <= 0)
- n = ret;
- if(n > maxgomaxprocs)
- n = maxgomaxprocs;
- runtime_gomaxprocs = n;
- if(runtime_gomaxprocs > 1)
- runtime_singleproc = false;
- if(runtime_gcwaiting != 0) {
- if(atomic_mcpumax(runtime_sched.atomic) != 1)
- runtime_throw("invalid mcpumax during gc");
- schedunlock();
+ if(n <= 0 || n == ret) {
+ runtime_unlock(&runtime_sched);
return ret;
}
+ runtime_unlock(&runtime_sched);
- setmcpumax(n);
+ runtime_semacquire(&runtime_worldsema);
+ m->gcing = 1;
+ runtime_stoptheworld();
+ newprocs = n;
+ m->gcing = 0;
+ runtime_semrelease(&runtime_worldsema);
+ runtime_starttheworld();
- // If there are now fewer allowed procs
- // than procs running, stop.
- v = runtime_atomicload(&runtime_sched.atomic);
- if((int32)atomic_mcpu(v) > n) {
- schedunlock();
- runtime_gosched();
- return ret;
- }
- // handle more procs
- matchmg();
- schedunlock();
return ret;
}
-void
-runtime_LockOSThread(void)
+static void
+LockOSThread(void)
{
- if(m == &runtime_m0 && runtime_sched.init) {
- runtime_sched.lockmain = true;
- return;
- }
m->lockedg = g;
g->lockedm = m;
}
+void runtime_LockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.LockOSThread");
void
-runtime_UnlockOSThread(void)
+runtime_LockOSThread(void)
{
- if(m == &runtime_m0 && runtime_sched.init) {
- runtime_sched.lockmain = false;
+ m->locked |= LockExternal;
+ LockOSThread();
+}
+
+void
+runtime_lockOSThread(void)
+{
+ m->locked += LockInternal;
+ LockOSThread();
+}
+
+static void
+UnlockOSThread(void)
+{
+ if(m->locked != 0)
return;
- }
m->lockedg = nil;
g->lockedm = nil;
}
+void runtime_UnlockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.UnlockOSThread");
+
+void
+runtime_UnlockOSThread(void)
+{
+ m->locked &= ~LockExternal;
+ UnlockOSThread();
+}
+
+void
+runtime_unlockOSThread(void)
+{
+ if(m->locked < LockInternal)
+ runtime_throw("runtime: internal error: misuse of lockOSThread/unlockOSThread");
+ m->locked -= LockInternal;
+ UnlockOSThread();
+}
+
bool
runtime_lockedOSThread(void)
{
@@ -1740,13 +2122,28 @@ intgo runtime_NumGoroutine (void)
intgo
runtime_NumGoroutine()
{
- return runtime_sched.gcount;
+ return runtime_gcount();
}
int32
runtime_gcount(void)
{
- return runtime_sched.gcount;
+ G *gp;
+ int32 n, s;
+
+ n = 0;
+ runtime_lock(&runtime_sched);
+ // TODO(dvyukov): runtime.NumGoroutine() is O(N).
+ // We do not want to increment/decrement centralized counter in newproc/goexit,
+ // just to make runtime.NumGoroutine() faster.
+ // Compromise solution is to introduce per-P counters of active goroutines.
+ for(gp = runtime_allg; gp; gp = gp->alllink) {
+ s = gp->status;
+ if(s == Grunnable || s == Grunning || s == Gsyscall || s == Gwaiting)
+ n++;
+ }
+ runtime_unlock(&runtime_sched);
+ return n;
}
int32
@@ -1769,6 +2166,9 @@ runtime_sigprof()
{
int32 n, i;
+ // Windows does profiling in a dedicated thread w/o m.
+ if(!Windows && (m == nil || m->mcache == nil))
+ return;
if(prof.fn == nil || prof.hz == 0)
return;
@@ -1813,3 +2213,555 @@ runtime_setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
if(hz != 0)
runtime_resetcpuprofiler(hz);
}
+
+// Change number of processors. The world is stopped, sched is locked.
+static void
+procresize(int32 new)
+{
+ int32 i, old;
+ G *gp;
+ P *p;
+
+ old = runtime_gomaxprocs;
+ if(old < 0 || old > MaxGomaxprocs || new <= 0 || new >MaxGomaxprocs)
+ runtime_throw("procresize: invalid arg");
+ // initialize new P's
+ for(i = 0; i < new; i++) {
+ p = runtime_allp[i];
+ if(p == nil) {
+ p = (P*)runtime_mallocgc(sizeof(*p), 0, 0, 1);
+ p->status = Pgcstop;
+ runtime_atomicstorep(&runtime_allp[i], p);
+ }
+ if(p->mcache == nil) {
+ if(old==0 && i==0)
+ p->mcache = m->mcache; // bootstrap
+ else
+ p->mcache = runtime_allocmcache();
+ }
+ if(p->runq == nil) {
+ p->runqsize = 128;
+ p->runq = (G**)runtime_mallocgc(p->runqsize*sizeof(G*), 0, 0, 1);
+ }
+ }
+
+ // redistribute runnable G's evenly
+ for(i = 0; i < old; i++) {
+ p = runtime_allp[i];
+ while((gp = runqget(p)) != nil)
+ globrunqput(gp);
+ }
+ // start at 1 because current M already executes some G and will acquire allp[0] below,
+ // so if we have a spare G we want to put it into allp[1].
+ for(i = 1; runtime_sched.runqhead; i++) {
+ gp = runtime_sched.runqhead;
+ runtime_sched.runqhead = gp->schedlink;
+ runqput(runtime_allp[i%new], gp);
+ }
+ runtime_sched.runqtail = nil;
+ runtime_sched.runqsize = 0;
+
+ // free unused P's
+ for(i = new; i < old; i++) {
+ p = runtime_allp[i];
+ runtime_freemcache(p->mcache);
+ p->mcache = nil;
+ gfpurge(p);
+ p->status = Pdead;
+ // can't free P itself because it can be referenced by an M in syscall
+ }
+
+ if(m->p)
+ m->p->m = nil;
+ m->p = nil;
+ m->mcache = nil;
+ p = runtime_allp[0];
+ p->m = nil;
+ p->status = Pidle;
+ acquirep(p);
+ for(i = new-1; i > 0; i--) {
+ p = runtime_allp[i];
+ p->status = Pidle;
+ pidleput(p);
+ }
+ runtime_singleproc = new == 1;
+ runtime_atomicstore((uint32*)&runtime_gomaxprocs, new);
+}
+
+// Associate p and the current m.
+static void
+acquirep(P *p)
+{
+ if(m->p || m->mcache)
+ runtime_throw("acquirep: already in go");
+ if(p->m || p->status != Pidle) {
+ runtime_printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? p->m->id : 0, p->status);
+ runtime_throw("acquirep: invalid p state");
+ }
+ m->mcache = p->mcache;
+ m->p = p;
+ p->m = m;
+ p->status = Prunning;
+}
+
+// Disassociate p and the current m.
+static P*
+releasep(void)
+{
+ P *p;
+
+ if(m->p == nil || m->mcache == nil)
+ runtime_throw("releasep: invalid arg");
+ p = m->p;
+ if(p->m != m || p->mcache != m->mcache || p->status != Prunning) {
+ runtime_printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n",
+ m, m->p, p->m, m->mcache, p->mcache, p->status);
+ runtime_throw("releasep: invalid p state");
+ }
+ m->p = nil;
+ m->mcache = nil;
+ p->m = nil;
+ p->status = Pidle;
+ return p;
+}
+
+static void
+inclocked(int32 v)
+{
+ runtime_lock(&runtime_sched);
+ runtime_sched.mlocked += v;
+ if(v > 0)
+ checkdead();
+ runtime_unlock(&runtime_sched);
+}
+
+// Check for deadlock situation.
+// The check is based on number of running M's, if 0 -> deadlock.
+static void
+checkdead(void)
+{
+ G *gp;
+ int32 run, grunning, s;
+
+ // -1 for sysmon
+ run = runtime_sched.mcount - runtime_sched.nmidle - runtime_sched.mlocked - 1;
+ if(run > 0)
+ return;
+ if(run < 0) {
+ runtime_printf("checkdead: nmidle=%d mlocked=%d mcount=%d\n",
+ runtime_sched.nmidle, runtime_sched.mlocked, runtime_sched.mcount);
+ runtime_throw("checkdead: inconsistent counts");
+ }
+ grunning = 0;
+ for(gp = runtime_allg; gp; gp = gp->alllink) {
+ if(gp->isbackground)
+ continue;
+ s = gp->status;
+ if(s == Gwaiting)
+ grunning++;
+ else if(s == Grunnable || s == Grunning || s == Gsyscall) {
+ runtime_printf("checkdead: find g %D in status %d\n", gp->goid, s);
+ runtime_throw("checkdead: runnable g");
+ }
+ }
+ if(grunning == 0) // possible if main goroutine calls runtime_Goexit()
+ runtime_exit(0);
+ m->throwing = -1; // do not dump full stacks
+ runtime_throw("all goroutines are asleep - deadlock!");
+}
+
+static void
+sysmon(void)
+{
+ uint32 idle, delay;
+ int64 now, lastpoll;
+ G *gp;
+ uint32 ticks[MaxGomaxprocs];
+
+ idle = 0; // how many cycles in succession we had not wokeup somebody
+ delay = 0;
+ for(;;) {
+ if(idle == 0) // start with 20us sleep...
+ delay = 20;
+ else if(idle > 50) // start doubling the sleep after 1ms...
+ delay *= 2;
+ if(delay > 10*1000) // up to 10ms
+ delay = 10*1000;
+ runtime_usleep(delay);
+ if(runtime_gcwaiting || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs) { // TODO: fast atomic
+ runtime_lock(&runtime_sched);
+ if(runtime_atomicload(&runtime_gcwaiting) || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs) {
+ runtime_atomicstore(&runtime_sched.sysmonwait, 1);
+ runtime_unlock(&runtime_sched);
+ runtime_notesleep(&runtime_sched.sysmonnote);
+ runtime_noteclear(&runtime_sched.sysmonnote);
+ idle = 0;
+ delay = 20;
+ } else
+ runtime_unlock(&runtime_sched);
+ }
+ // poll network if not polled for more than 10ms
+ lastpoll = runtime_atomicload64(&runtime_sched.lastpoll);
+ now = runtime_nanotime();
+ if(lastpoll != 0 && lastpoll + 10*1000*1000 > now) {
+ gp = runtime_netpoll(false); // non-blocking
+ injectglist(gp);
+ }
+ // retake P's blocked in syscalls
+ if(retake(ticks))
+ idle = 0;
+ else
+ idle++;
+ }
+}
+
+static uint32
+retake(uint32 *ticks)
+{
+ uint32 i, s, n;
+ int64 t;
+ P *p;
+
+ n = 0;
+ for(i = 0; i < (uint32)runtime_gomaxprocs; i++) {
+ p = runtime_allp[i];
+ if(p==nil)
+ continue;
+ t = p->tick;
+ if(ticks[i] != t) {
+ ticks[i] = t;
+ continue;
+ }
+ s = p->status;
+ if(s != Psyscall)
+ continue;
+ if(p->runqhead == p->runqtail && runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) > 0) // TODO: fast atomic
+ continue;
+ // Need to increment number of locked M's before the CAS.
+ // Otherwise the M from which we retake can exit the syscall,
+ // increment nmidle and report deadlock.
+ inclocked(-1);
+ if(runtime_cas(&p->status, s, Pidle)) {
+ n++;
+ handoffp(p);
+ }
+ inclocked(1);
+ }
+ return n;
+}
+
+// Put mp on midle list.
+// Sched must be locked.
+static void
+mput(M *mp)
+{
+ mp->schedlink = runtime_sched.midle;
+ runtime_sched.midle = mp;
+ runtime_sched.nmidle++;
+ checkdead();
+}
+
+// Try to get an m from midle list.
+// Sched must be locked.
+static M*
+mget(void)
+{
+ M *mp;
+
+ if((mp = runtime_sched.midle) != nil){
+ runtime_sched.midle = mp->schedlink;
+ runtime_sched.nmidle--;
+ }
+ return mp;
+}
+
+// Put gp on the global runnable queue.
+// Sched must be locked.
+static void
+globrunqput(G *gp)
+{
+ gp->schedlink = nil;
+ if(runtime_sched.runqtail)
+ runtime_sched.runqtail->schedlink = gp;
+ else
+ runtime_sched.runqhead = gp;
+ runtime_sched.runqtail = gp;
+ runtime_sched.runqsize++;
+}
+
+// Try get a batch of G's from the global runnable queue.
+// Sched must be locked.
+static G*
+globrunqget(P *p)
+{
+ G *gp, *gp1;
+ int32 n;
+
+ if(runtime_sched.runqsize == 0)
+ return nil;
+ n = runtime_sched.runqsize/runtime_gomaxprocs+1;
+ if(n > runtime_sched.runqsize)
+ n = runtime_sched.runqsize;
+ runtime_sched.runqsize -= n;
+ if(runtime_sched.runqsize == 0)
+ runtime_sched.runqtail = nil;
+ gp = runtime_sched.runqhead;
+ runtime_sched.runqhead = gp->schedlink;
+ n--;
+ while(n--) {
+ gp1 = runtime_sched.runqhead;
+ runtime_sched.runqhead = gp1->schedlink;
+ runqput(p, gp1);
+ }
+ return gp;
+}
+
+// Put p to on pidle list.
+// Sched must be locked.
+static void
+pidleput(P *p)
+{
+ p->link = runtime_sched.pidle;
+ runtime_sched.pidle = p;
+ runtime_xadd(&runtime_sched.npidle, 1); // TODO: fast atomic
+}
+
+// Try get a p from pidle list.
+// Sched must be locked.
+static P*
+pidleget(void)
+{
+ P *p;
+
+ p = runtime_sched.pidle;
+ if(p) {
+ runtime_sched.pidle = p->link;
+ runtime_xadd(&runtime_sched.npidle, -1); // TODO: fast atomic
+ }
+ return p;
+}
+
+// Put g on local runnable queue.
+// TODO(dvyukov): consider using lock-free queue.
+static void
+runqput(P *p, G *gp)
+{
+ int32 h, t, s;
+
+ runtime_lock(p);
+retry:
+ h = p->runqhead;
+ t = p->runqtail;
+ s = p->runqsize;
+ if(t == h-1 || (h == 0 && t == s-1)) {
+ runqgrow(p);
+ goto retry;
+ }
+ p->runq[t++] = gp;
+ if(t == s)
+ t = 0;
+ p->runqtail = t;
+ runtime_unlock(p);
+}
+
+// Get g from local runnable queue.
+static G*
+runqget(P *p)
+{
+ G *gp;
+ int32 t, h, s;
+
+ if(p->runqhead == p->runqtail)
+ return nil;
+ runtime_lock(p);
+ h = p->runqhead;
+ t = p->runqtail;
+ s = p->runqsize;
+ if(t == h) {
+ runtime_unlock(p);
+ return nil;
+ }
+ gp = p->runq[h++];
+ if(h == s)
+ h = 0;
+ p->runqhead = h;
+ runtime_unlock(p);
+ return gp;
+}
+
+// Grow local runnable queue.
+// TODO(dvyukov): consider using fixed-size array
+// and transfer excess to the global list (local queue can grow way too big).
+static void
+runqgrow(P *p)
+{
+ G **q;
+ int32 s, t, h, t2;
+
+ h = p->runqhead;
+ t = p->runqtail;
+ s = p->runqsize;
+ t2 = 0;
+ q = runtime_malloc(2*s*sizeof(*q));
+ while(t != h) {
+ q[t2++] = p->runq[h++];
+ if(h == s)
+ h = 0;
+ }
+ runtime_free(p->runq);
+ p->runq = q;
+ p->runqhead = 0;
+ p->runqtail = t2;
+ p->runqsize = 2*s;
+}
+
+// Steal half of elements from local runnable queue of p2
+// and put onto local runnable queue of p.
+// Returns one of the stolen elements (or nil if failed).
+static G*
+runqsteal(P *p, P *p2)
+{
+ G *gp, *gp1;
+ int32 t, h, s, t2, h2, s2, c, i;
+
+ if(p2->runqhead == p2->runqtail)
+ return nil;
+ // sort locks to prevent deadlocks
+ if(p < p2)
+ runtime_lock(p);
+ runtime_lock(p2);
+ if(p2->runqhead == p2->runqtail) {
+ runtime_unlock(p2);
+ if(p < p2)
+ runtime_unlock(p);
+ return nil;
+ }
+ if(p >= p2)
+ runtime_lock(p);
+ // now we've locked both queues and know the victim is not empty
+ h = p->runqhead;
+ t = p->runqtail;
+ s = p->runqsize;
+ h2 = p2->runqhead;
+ t2 = p2->runqtail;
+ s2 = p2->runqsize;
+ gp = p2->runq[h2++]; // return value
+ if(h2 == s2)
+ h2 = 0;
+ // steal roughly half
+ if(t2 > h2)
+ c = (t2 - h2) / 2;
+ else
+ c = (s2 - h2 + t2) / 2;
+ // copy
+ for(i = 0; i != c; i++) {
+ // the target queue is full?
+ if(t == h-1 || (h == 0 && t == s-1))
+ break;
+ // the victim queue is empty?
+ if(t2 == h2)
+ break;
+ gp1 = p2->runq[h2++];
+ if(h2 == s2)
+ h2 = 0;
+ p->runq[t++] = gp1;
+ if(t == s)
+ t = 0;
+ }
+ p->runqtail = t;
+ p2->runqhead = h2;
+ runtime_unlock(p2);
+ runtime_unlock(p);
+ return gp;
+}
+
+void runtime_testSchedLocalQueue(void)
+ __asm__("runtime.testSchedLocalQueue");
+
+void
+runtime_testSchedLocalQueue(void)
+{
+ P p;
+ G gs[1000];
+ int32 i, j;
+
+ runtime_memclr((byte*)&p, sizeof(p));
+ p.runqsize = 1;
+ p.runqhead = 0;
+ p.runqtail = 0;
+ p.runq = runtime_malloc(p.runqsize*sizeof(*p.runq));
+
+ for(i = 0; i < (int32)nelem(gs); i++) {
+ if(runqget(&p) != nil)
+ runtime_throw("runq is not empty initially");
+ for(j = 0; j < i; j++)
+ runqput(&p, &gs[i]);
+ for(j = 0; j < i; j++) {
+ if(runqget(&p) != &gs[i]) {
+ runtime_printf("bad element at iter %d/%d\n", i, j);
+ runtime_throw("bad element");
+ }
+ }
+ if(runqget(&p) != nil)
+ runtime_throw("runq is not empty afterwards");
+ }
+}
+
+void runtime_testSchedLocalQueueSteal(void)
+ __asm__("runtime.testSchedLocalQueueSteal");
+
+void
+runtime_testSchedLocalQueueSteal(void)
+{
+ P p1, p2;
+ G gs[1000], *gp;
+ int32 i, j, s;
+
+ runtime_memclr((byte*)&p1, sizeof(p1));
+ p1.runqsize = 1;
+ p1.runqhead = 0;
+ p1.runqtail = 0;
+ p1.runq = runtime_malloc(p1.runqsize*sizeof(*p1.runq));
+
+ runtime_memclr((byte*)&p2, sizeof(p2));
+ p2.runqsize = nelem(gs);
+ p2.runqhead = 0;
+ p2.runqtail = 0;
+ p2.runq = runtime_malloc(p2.runqsize*sizeof(*p2.runq));
+
+ for(i = 0; i < (int32)nelem(gs); i++) {
+ for(j = 0; j < i; j++) {
+ gs[j].sig = 0;
+ runqput(&p1, &gs[j]);
+ }
+ gp = runqsteal(&p2, &p1);
+ s = 0;
+ if(gp) {
+ s++;
+ gp->sig++;
+ }
+ while((gp = runqget(&p2)) != nil) {
+ s++;
+ gp->sig++;
+ }
+ while((gp = runqget(&p1)) != nil)
+ gp->sig++;
+ for(j = 0; j < i; j++) {
+ if(gs[j].sig != 1) {
+ runtime_printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i);
+ runtime_throw("bad element");
+ }
+ }
+ if(s != i/2 && s != i/2+1) {
+ runtime_printf("bad steal %d, want %d or %d, iter %d\n",
+ s, i/2, i/2+1, i);
+ runtime_throw("bad steal");
+ }
+ }
+}
+
+void
+runtime_proc_scan(void (*addroot)(Obj))
+{
+ addroot((Obj){(byte*)&runtime_sched, sizeof runtime_sched, 0});
+}
diff --git a/libgo/runtime/race.h b/libgo/runtime/race.h
index 9f3b3ec..3357bed 100644
--- a/libgo/runtime/race.h
+++ b/libgo/runtime/race.h
@@ -11,17 +11,19 @@ enum { raceenabled = 0 };
#endif
// Initialize race detection subsystem.
-void runtime_raceinit(void);
+uintptr runtime_raceinit(void);
// Finalize race detection subsystem, does not return.
void runtime_racefini(void);
void runtime_racemapshadow(void *addr, uintptr size);
void runtime_racemalloc(void *p, uintptr sz, void *pc);
void runtime_racefree(void *p);
-void runtime_racegostart(int32 goid, void *pc);
-void runtime_racegoend(int32 goid);
+uintptr runtime_racegostart(void *pc);
+void runtime_racegoend(void);
void runtime_racewritepc(void *addr, void *callpc, void *pc);
void runtime_racereadpc(void *addr, void *callpc, void *pc);
+void runtime_racewriterangepc(void *addr, uintptr sz, uintptr step, void *callpc, void *pc);
+void runtime_racereadrangepc(void *addr, uintptr sz, uintptr step, void *callpc, void *pc);
void runtime_racefingo(void);
void runtime_raceacquire(void *addr);
void runtime_raceacquireg(G *gp, void *addr);
diff --git a/libgo/runtime/runtime.c b/libgo/runtime/runtime.c
index 48ece55..138e5af 100644
--- a/libgo/runtime/runtime.c
+++ b/libgo/runtime/runtime.c
@@ -10,14 +10,27 @@
#include "array.h"
#include "go-panic.h"
+// The GOTRACEBACK environment variable controls the
+// behavior of a Go program that is crashing and exiting.
+// GOTRACEBACK=0 suppress all tracebacks
+// GOTRACEBACK=1 default behavior - show tracebacks but exclude runtime frames
+// GOTRACEBACK=2 show tracebacks including runtime frames
+// GOTRACEBACK=crash show tracebacks including runtime frames, then crash (core dump etc)
int32
-runtime_gotraceback(void)
+runtime_gotraceback(bool *crash)
{
const byte *p;
+ if(crash != nil)
+ *crash = false;
p = runtime_getenv("GOTRACEBACK");
if(p == nil || p[0] == '\0')
return 1; // default is on
+ if(runtime_strcmp((const char *)p, "crash") == 0) {
+ if(crash != nil)
+ *crash = true;
+ return 2; // extra information
+ }
return runtime_atoi(p);
}
@@ -44,6 +57,11 @@ runtime_progname()
return argc == 0 ? nil : argv[0];
}
+// Information about what cpu features are available.
+// Set on startup in asm_{x86/amd64}.s.
+uint32 runtime_cpuid_ecx;
+uint32 runtime_cpuid_edx;
+
void
runtime_goargs(void)
{
@@ -90,6 +108,52 @@ runtime_atoi(const byte *p)
return n;
}
+static struct root_list runtime_roots =
+{ nil,
+ { { &syscall_Envs, sizeof syscall_Envs },
+ { &os_Args, sizeof os_Args },
+ { nil, 0 } },
+};
+
+static void
+TestAtomic64(void)
+{
+ uint64 z64, x64;
+
+ z64 = 42;
+ x64 = 0;
+ PREFETCH(&z64);
+ if(runtime_cas64(&z64, &x64, 1))
+ runtime_throw("cas64 failed");
+ if(x64 != 42)
+ runtime_throw("cas64 failed");
+ if(!runtime_cas64(&z64, &x64, 1))
+ runtime_throw("cas64 failed");
+ if(x64 != 42 || z64 != 1)
+ runtime_throw("cas64 failed");
+ if(runtime_atomicload64(&z64) != 1)
+ runtime_throw("load64 failed");
+ runtime_atomicstore64(&z64, (1ull<<40)+1);
+ if(runtime_atomicload64(&z64) != (1ull<<40)+1)
+ runtime_throw("store64 failed");
+ if(runtime_xadd64(&z64, (1ull<<40)+1) != (2ull<<40)+2)
+ runtime_throw("xadd64 failed");
+ if(runtime_atomicload64(&z64) != (2ull<<40)+2)
+ runtime_throw("xadd64 failed");
+ if(runtime_xchg64(&z64, (3ull<<40)+3) != (2ull<<40)+2)
+ runtime_throw("xchg64 failed");
+ if(runtime_atomicload64(&z64) != (3ull<<40)+3)
+ runtime_throw("xchg64 failed");
+}
+
+void
+runtime_check(void)
+{
+ __go_register_gc_roots(&runtime_roots);
+
+ TestAtomic64();
+}
+
uint32
runtime_fastrand1(void)
{
@@ -105,19 +169,6 @@ runtime_fastrand1(void)
return x;
}
-static struct root_list runtime_roots =
-{ nil,
- { { &syscall_Envs, sizeof syscall_Envs },
- { &os_Args, sizeof os_Args },
- { nil, 0 } },
-};
-
-void
-runtime_check(void)
-{
- __go_register_gc_roots(&runtime_roots);
-}
-
int64
runtime_cputicks(void)
{
@@ -139,7 +190,7 @@ runtime_showframe(String s, bool current)
if(current && runtime_m()->throwing > 0)
return 1;
if(traceback < 0)
- traceback = runtime_gotraceback();
+ traceback = runtime_gotraceback(nil);
return traceback > 1 || (__builtin_memchr(s.str, '.', s.len) != nil && __builtin_memcmp(s.str, "runtime.", 7) != 0);
}
diff --git a/libgo/runtime/runtime.h b/libgo/runtime/runtime.h
index 959220d..5b2a64f 100644
--- a/libgo/runtime/runtime.h
+++ b/libgo/runtime/runtime.h
@@ -54,9 +54,11 @@ typedef uint8 bool;
typedef uint8 byte;
typedef struct Func Func;
typedef struct G G;
-typedef union Lock Lock;
+typedef struct Lock Lock;
typedef struct M M;
-typedef union Note Note;
+typedef struct P P;
+typedef struct Note Note;
+typedef struct String String;
typedef struct FuncVal FuncVal;
typedef struct SigTab SigTab;
typedef struct MCache MCache;
@@ -64,14 +66,14 @@ typedef struct FixAlloc FixAlloc;
typedef struct Hchan Hchan;
typedef struct Timers Timers;
typedef struct Timer Timer;
-typedef struct GCStats GCStats;
-typedef struct LFNode LFNode;
-typedef struct ParFor ParFor;
-typedef struct ParForThread ParForThread;
-typedef struct CgoMal CgoMal;
+typedef struct GCStats GCStats;
+typedef struct LFNode LFNode;
+typedef struct ParFor ParFor;
+typedef struct ParForThread ParForThread;
+typedef struct CgoMal CgoMal;
+typedef struct PollDesc PollDesc;
typedef struct __go_open_array Slice;
-typedef struct String String;
typedef struct __go_interface Iface;
typedef struct __go_empty_interface Eface;
typedef struct __go_type_descriptor Type;
@@ -81,6 +83,7 @@ typedef struct __go_panic_stack Panic;
typedef struct __go_ptr_type PtrType;
typedef struct __go_func_type FuncType;
typedef struct __go_map_type MapType;
+typedef struct __go_channel_type ChanType;
typedef struct Traceback Traceback;
@@ -110,11 +113,20 @@ enum
Grunning,
Gsyscall,
Gwaiting,
- Gmoribund,
+ Gmoribund_unused, // currently unused, but hardcoded in gdb scripts
Gdead,
};
enum
{
+ // P status
+ Pidle,
+ Prunning,
+ Psyscall,
+ Pgcstop,
+ Pdead,
+};
+enum
+{
true = 1,
false = 0,
};
@@ -129,19 +141,22 @@ enum
// Global <-> per-M stack segment cache transfer batch size.
StackCacheBatch = 16,
};
-
/*
* structures
*/
-union Lock
+struct Lock
{
- uint32 key; // futex-based impl
- M* waitm; // linked list of waiting M's (sema-based impl)
+ // Futex-based impl treats it as uint32 key,
+ // while sema-based impl as M* waitm.
+ // Used to be a union, but unions break precise GC.
+ uintptr key;
};
-union Note
+struct Note
{
- uint32 key; // futex-based impl
- M* waitm; // waiting M (sema-based impl)
+ // Futex-based impl treats it as uint32 key,
+ // while sema-based impl as M* waitm.
+ // Used to be a union, but unions break precise GC.
+ uintptr key;
};
struct String
{
@@ -194,13 +209,12 @@ struct G
uint32 selgen; // valid sudog pointer
const char* waitreason; // if status==Gwaiting
G* schedlink;
- bool readyonstop;
bool ispanic;
- bool issystem;
- int8 raceignore; // ignore race detection events
+ bool issystem; // do not output in stack dump
+ bool isbackground; // ignore in deadlock detector
+ bool blockingsyscall; // hint that the next syscall will block
M* m; // for debuggers, but offset not hard-coded
M* lockedm;
- M* idlem;
int32 sig;
int32 writenbuf;
byte* writebuf;
@@ -224,34 +238,44 @@ struct M
{
G* g0; // goroutine with scheduling stack
G* gsignal; // signal-handling G
+ byte* gsignalstack;
+ size_t gsignalstacksize;
+ void (*mstartfn)(void);
G* curg; // current running goroutine
+ P* p; // attached P for executing Go code (nil if not executing Go code)
+ P* nextp;
int32 id;
int32 mallocing;
int32 throwing;
int32 gcing;
int32 locks;
int32 nomemprof;
- int32 waitnextg;
int32 dying;
int32 profilehz;
int32 helpgc;
+ bool blockingsyscall;
+ bool spinning;
uint32 fastrand;
uint64 ncgocall; // number of cgo calls in total
- Note havenextg;
- G* nextg;
+ int32 ncgo; // number of cgo calls currently in progress
+ CgoMal* cgomal;
+ Note park;
M* alllink; // on allm
M* schedlink;
MCache *mcache;
G* lockedg;
- G* idleg;
Location createstack[32]; // Stack that created this thread.
+ uint32 locked; // tracking for LockOSThread
M* nextwaitm; // next M waiting for lock
uintptr waitsema; // semaphore for parking on locks
uint32 waitsemacount;
uint32 waitsemalock;
GCStats gcstats;
bool racecall;
+ bool needextram;
void* racepc;
+ void (*waitunlockf)(Lock*);
+ void* waitlock;
uintptr settype_buf[1024];
uintptr settype_bufsize;
@@ -259,6 +283,38 @@ struct M
uintptr end[];
};
+struct P
+{
+ Lock;
+
+ uint32 status; // one of Pidle/Prunning/...
+ P* link;
+ uint32 tick; // incremented on every scheduler or system call
+ M* m; // back-link to associated M (nil if idle)
+ MCache* mcache;
+
+ // Queue of runnable goroutines.
+ G** runq;
+ int32 runqhead;
+ int32 runqtail;
+ int32 runqsize;
+
+ // Available G's (status == Gdead)
+ G* gfree;
+ int32 gfreecnt;
+
+ byte pad[64];
+};
+
+// The m->locked word holds a single bit saying whether
+// external calls to LockOSThread are in effect, and then a counter
+// of the internal nesting depth of lockOSThread / unlockOSThread.
+enum
+{
+ LockExternal = 1,
+ LockInternal = 2,
+};
+
struct SigTab
{
int32 sig;
@@ -271,6 +327,8 @@ enum
SigThrow = 1<<2, // if signal.Notify doesn't take it, exit loudly
SigPanic = 1<<3, // if the signal is from the kernel, panic
SigDefault = 1<<4, // if the signal isn't explicitly requested, don't monitor it
+ SigHandling = 1<<5, // our signal handler is registered
+ SigIgnored = 1<<6, // the signal was ignored before we registered for it
};
#ifndef NSIG
@@ -343,6 +401,7 @@ struct ParFor
bool wait; // if true, wait while all threads finish processing,
// otherwise parfor may return while other threads are still working
ParForThread *thr; // array of thread descriptors
+ uint32 pad; // to align ParForThread.pos for 64-bit atomic operations
// stats
uint64 nsteal;
uint64 nstealcnt;
@@ -356,7 +415,7 @@ struct ParFor
struct CgoMal
{
CgoMal *next;
- byte *alloc;
+ void *alloc;
};
/*
@@ -369,6 +428,19 @@ struct CgoMal
#define USED(v) ((void) v)
#define ROUND(x, n) (((x)+(n)-1)&~((n)-1)) /* all-caps to mark as macro: it evaluates n twice */
+byte* runtime_startup_random_data;
+uint32 runtime_startup_random_data_len;
+void runtime_get_random_data(byte**, int32*);
+
+enum {
+ // hashinit wants this many random bytes
+ HashRandomBytes = 32
+};
+void runtime_hashinit(void);
+
+void runtime_traceback();
+void runtime_tracebackothers(G*);
+
/*
* external data
*/
@@ -376,21 +448,27 @@ extern uintptr runtime_zerobase;
extern G* runtime_allg;
extern G* runtime_lastg;
extern M* runtime_allm;
+extern P** runtime_allp;
extern int32 runtime_gomaxprocs;
extern bool runtime_singleproc;
extern uint32 runtime_panicking;
-extern int32 runtime_gcwaiting; // gc is waiting to run
+extern uint32 runtime_gcwaiting; // gc is waiting to run
+extern int8* runtime_goos;
extern int32 runtime_ncpu;
+extern void (*runtime_sysargs)(int32, uint8**);
/*
* common functions and data
*/
+#define runtime_strcmp(s1, s2) __builtin_strcmp((s1), (s2))
+#define runtime_strstr(s1, s2) __builtin_strstr((s1), (s2))
intgo runtime_findnull(const byte*);
void runtime_dump(byte*, int32);
/*
* very low level c-called
*/
+struct __go_func_type;
void runtime_args(int32, byte**);
void runtime_osinit();
void runtime_goargs(void);
@@ -400,42 +478,98 @@ void runtime_throw(const char*) __attribute__ ((noreturn));
void runtime_panicstring(const char*) __attribute__ ((noreturn));
void runtime_prints(const char*);
void runtime_printf(const char*, ...);
+#define runtime_mcmp(a, b, s) __builtin_memcmp((a), (b), (s))
+#define runtime_memmove(a, b, s) __builtin_memmove((a), (b), (s))
void* runtime_mal(uintptr);
+String runtime_gostring(const byte*);
+String runtime_gostringnocopy(const byte*);
void runtime_schedinit(void);
void runtime_initsig(void);
void runtime_sigenable(uint32 sig);
-int32 runtime_gotraceback(void);
+void runtime_sigdisable(uint32 sig);
+int32 runtime_gotraceback(bool *crash);
void runtime_goroutineheader(G*);
void runtime_goroutinetrailer(G*);
-void runtime_traceback();
-void runtime_tracebackothers(G*);
void runtime_printtrace(Location*, int32, bool);
-String runtime_gostring(const byte*);
-String runtime_gostringnocopy(const byte*);
+#define runtime_open(p, f, m) open((p), (f), (m))
+#define runtime_read(d, v, n) read((d), (v), (n))
+#define runtime_write(d, v, n) write((d), (v), (n))
+#define runtime_close(d) close(d)
+#define runtime_cas(pval, old, new) __sync_bool_compare_and_swap (pval, old, new)
+#define runtime_cas64(pval, pold, new) __atomic_compare_exchange_n (pval, pold, new, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
+#define runtime_casp(pval, old, new) __sync_bool_compare_and_swap (pval, old, new)
+// Don't confuse with XADD x86 instruction,
+// this one is actually 'addx', that is, add-and-fetch.
+#define runtime_xadd(p, v) __sync_add_and_fetch (p, v)
+#define runtime_xadd64(p, v) __sync_add_and_fetch (p, v)
+#define runtime_xchg(p, v) __atomic_exchange_n (p, v, __ATOMIC_SEQ_CST)
+#define runtime_xchg64(p, v) __atomic_exchange_n (p, v, __ATOMIC_SEQ_CST)
+#define runtime_atomicload(p) __atomic_load_n (p, __ATOMIC_SEQ_CST)
+#define runtime_atomicstore(p, v) __atomic_store_n (p, v, __ATOMIC_SEQ_CST)
+#define runtime_atomicstore64(p, v) __atomic_store_n (p, v, __ATOMIC_SEQ_CST)
+#define runtime_atomicload64(p) __atomic_load_n (p, __ATOMIC_SEQ_CST)
+#define runtime_atomicloadp(p) __atomic_load_n (p, __ATOMIC_SEQ_CST)
+#define runtime_atomicstorep(p, v) __atomic_store_n (p, v, __ATOMIC_SEQ_CST)
+void runtime_ready(G*);
+const byte* runtime_getenv(const char*);
+int32 runtime_atoi(const byte*);
void* runtime_mstart(void*);
G* runtime_malg(int32, byte**, size_t*);
+void runtime_mpreinit(M*);
void runtime_minit(void);
+void runtime_unminit(void);
+void runtime_signalstack(byte*, int32);
+MCache* runtime_allocmcache(void);
+void runtime_freemcache(MCache*);
void runtime_mallocinit(void);
+void runtime_mprofinit(void);
+#define runtime_malloc(s) __go_alloc(s)
+#define runtime_free(p) __go_free(p)
+bool runtime_addfinalizer(void*, FuncVal *fn, const struct __go_func_type *);
+#define runtime_getcallersp(p) __builtin_frame_address(1)
+int32 runtime_mcount(void);
+int32 runtime_gcount(void);
+uint32 runtime_fastrand1(void);
+
+void runtime_setmg(M*, G*);
+void runtime_newextram(void);
+#define runtime_exit(s) exit(s)
+#define runtime_breakpoint() __builtin_trap()
void runtime_gosched(void);
void runtime_park(void(*)(Lock*), Lock*, const char*);
void runtime_tsleep(int64, const char*);
M* runtime_newm(void);
void runtime_goexit(void);
void runtime_entersyscall(void) __asm__ (GOSYM_PREFIX "syscall.Entersyscall");
+void runtime_entersyscallblock(void);
void runtime_exitsyscall(void) __asm__ (GOSYM_PREFIX "syscall.Exitsyscall");
+G* __go_go(void (*pfn)(void*), void*);
void siginit(void);
bool __go_sigsend(int32 sig);
int32 runtime_callers(int32, Location*, int32);
int64 runtime_nanotime(void);
+void runtime_dopanic(int32) __attribute__ ((noreturn));
+void runtime_startpanic(void);
+void runtime_sigprof();
+void runtime_resetcpuprofiler(int32);
+void runtime_setcpuprofilerate(void(*)(uintptr*, int32), int32);
+void runtime_usleep(uint32);
int64 runtime_cputicks(void);
int64 runtime_tickspersecond(void);
void runtime_blockevent(int64, int32);
extern int64 runtime_blockprofilerate;
+void runtime_addtimer(Timer*);
+bool runtime_deltimer(Timer*);
+G* runtime_netpoll(bool);
+void runtime_netpollinit(void);
+int32 runtime_netpollopen(int32, PollDesc*);
+int32 runtime_netpollclose(int32);
+void runtime_netpollready(G**, PollDesc*, int32);
+void runtime_crash(void);
void runtime_stoptheworld(void);
void runtime_starttheworld(void);
extern uint32 runtime_worldsema;
-G* __go_go(void (*pfn)(void*), void*);
/*
* mutual exclusion locks. in the uncontended case,
@@ -533,6 +667,7 @@ void __wrap_rtems_task_variable_add(void **);
* runtime go-called
*/
void runtime_printbool(_Bool);
+void runtime_printbyte(int8);
void runtime_printfloat(double);
void runtime_printint(int64);
void runtime_printiface(Iface);
@@ -544,53 +679,10 @@ void runtime_printuint(uint64);
void runtime_printhex(uint64);
void runtime_printslice(Slice);
void runtime_printcomplex(__complex double);
-
-struct __go_func_type;
void reflect_call(const struct __go_func_type *, FuncVal *, _Bool, _Bool,
void **, void **)
__asm__ (GOSYM_PREFIX "reflect.call");
-
-/* Functions. */
#define runtime_panic __go_panic
-#define runtime_write(d, v, n) write((d), (v), (n))
-#define runtime_malloc(s) __go_alloc(s)
-#define runtime_free(p) __go_free(p)
-#define runtime_strcmp(s1, s2) __builtin_strcmp((s1), (s2))
-#define runtime_mcmp(a, b, s) __builtin_memcmp((a), (b), (s))
-#define runtime_memmove(a, b, s) __builtin_memmove((a), (b), (s))
-#define runtime_exit(s) exit(s)
-MCache* runtime_allocmcache(void);
-void free(void *v);
-#define runtime_cas(pval, old, new) __sync_bool_compare_and_swap (pval, old, new)
-#define runtime_casp(pval, old, new) __sync_bool_compare_and_swap (pval, old, new)
-#define runtime_cas64(pval, pold, new) __atomic_compare_exchange_n (pval, pold, new, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
-#define runtime_xadd(p, v) __sync_add_and_fetch (p, v)
-#define runtime_xadd64(p, v) __sync_add_and_fetch (p, v)
-#define runtime_xchg(p, v) __atomic_exchange_n (p, v, __ATOMIC_SEQ_CST)
-#define runtime_atomicload(p) __atomic_load_n (p, __ATOMIC_SEQ_CST)
-#define runtime_atomicstore(p, v) __atomic_store_n (p, v, __ATOMIC_SEQ_CST)
-#define runtime_atomicloadp(p) __atomic_load_n (p, __ATOMIC_SEQ_CST)
-#define runtime_atomicstorep(p, v) __atomic_store_n (p, v, __ATOMIC_SEQ_CST)
-#define runtime_atomicload64(p) __atomic_load_n (p, __ATOMIC_SEQ_CST)
-#define runtime_atomicstore64(p, v) __atomic_store_n (p, v, __ATOMIC_SEQ_CST)
-#define PREFETCH(p) __builtin_prefetch(p)
-
-struct __go_func_type;
-bool runtime_addfinalizer(void*, FuncVal *fn, const struct __go_func_type *);
-#define runtime_getcallersp(p) __builtin_frame_address(1)
-int32 runtime_mcount(void);
-int32 runtime_gcount(void);
-void runtime_dopanic(int32) __attribute__ ((noreturn));
-void runtime_startpanic(void);
-void runtime_ready(G*);
-const byte* runtime_getenv(const char*);
-int32 runtime_atoi(const byte*);
-uint32 runtime_fastrand1(void);
-
-void runtime_sigprof();
-void runtime_resetcpuprofiler(int32);
-void runtime_setcpuprofilerate(void(*)(uintptr*, int32), int32);
-void runtime_usleep(uint32);
/*
* runtime c-called (but written in Go)
@@ -605,14 +697,13 @@ void runtime_newErrorString(String, Eface*)
/*
* wrapped for go users
*/
-#define ISNAN(f) __builtin_isnan(f)
void runtime_semacquire(uint32 volatile *);
void runtime_semrelease(uint32 volatile *);
int32 runtime_gomaxprocsfunc(int32 n);
void runtime_procyield(uint32);
void runtime_osyield(void);
-void runtime_LockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.LockOSThread");
-void runtime_UnlockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.UnlockOSThread");
+void runtime_lockOSThread(void);
+void runtime_unlockOSThread(void);
bool runtime_showframe(String, bool);
@@ -628,12 +719,13 @@ uintptr runtime_memlimit(void);
// This is a no-op on other systems.
void runtime_setprof(bool);
+#define ISNAN(f) __builtin_isnan(f)
+
enum
{
- UseSpanType = 1,
+ UseSpanType = 0,
};
-void runtime_setsig(int32, bool, bool);
#define runtime_setitimer setitimer
void runtime_check(void);
@@ -658,5 +750,8 @@ struct backtrace_state;
extern struct backtrace_state *__go_get_backtrace_state(void);
extern _Bool __go_file_line(uintptr, String*, String*, intgo *);
extern byte* runtime_progname();
+extern void runtime_main(void*);
int32 getproccount(void);
+
+#define PREFETCH(p) __builtin_prefetch(p)
diff --git a/libgo/runtime/sema.goc b/libgo/runtime/sema.goc
index 4622f6c..be971bd 100644
--- a/libgo/runtime/sema.goc
+++ b/libgo/runtime/sema.goc
@@ -44,12 +44,12 @@ struct SemaRoot
// Prime to not correlate with any user patterns.
#define SEMTABLESZ 251
-union semtable
+struct semtable
{
SemaRoot;
- uint8 pad[CacheLineSize];
+ uint8 pad[CacheLineSize-sizeof(SemaRoot)];
};
-static union semtable semtable[SEMTABLESZ];
+static struct semtable semtable[SEMTABLESZ];
static SemaRoot*
semroot(uint32 volatile *addr)
diff --git a/libgo/runtime/signal_unix.c b/libgo/runtime/signal_unix.c
index 3b8f439..5a506c8 100644
--- a/libgo/runtime/signal_unix.c
+++ b/libgo/runtime/signal_unix.c
@@ -8,6 +8,7 @@
#include "runtime.h"
#include "defs.h"
+#include "signal_unix.h"
extern SigTab runtime_sigtab[];
@@ -22,7 +23,21 @@ runtime_initsig(void)
t = &runtime_sigtab[i];
if((t->flags == 0) || (t->flags & SigDefault))
continue;
- runtime_setsig(i, false, true);
+
+ // For some signals, we respect an inherited SIG_IGN handler
+ // rather than insist on installing our own default handler.
+ // Even these signals can be fetched using the os/signal package.
+ switch(t->sig) {
+ case SIGHUP:
+ case SIGINT:
+ if(runtime_getsig(i) == GO_SIG_IGN) {
+ t->flags = SigNotify | SigIgnored;
+ continue;
+ }
+ }
+
+ t->flags |= SigHandling;
+ runtime_setsig(i, runtime_sighandler, true);
}
}
@@ -32,16 +47,49 @@ runtime_sigenable(uint32 sig)
int32 i;
SigTab *t;
+ t = nil;
for(i = 0; runtime_sigtab[i].sig != -1; i++) {
- // ~0 means all signals.
- if(~sig == 0 || runtime_sigtab[i].sig == (int32)sig) {
+ if(runtime_sigtab[i].sig == (int32)sig) {
t = &runtime_sigtab[i];
- if(t->flags & SigDefault) {
- runtime_setsig(i, false, true);
- t->flags &= ~SigDefault; // make this idempotent
- }
+ break;
}
}
+
+ if(t == nil)
+ return;
+
+ if((t->flags & SigNotify) && !(t->flags & SigHandling)) {
+ t->flags |= SigHandling;
+ if(runtime_getsig(i) == GO_SIG_IGN)
+ t->flags |= SigIgnored;
+ runtime_setsig(i, runtime_sighandler, true);
+ }
+}
+
+void
+runtime_sigdisable(uint32 sig)
+{
+ int32 i;
+ SigTab *t;
+
+ t = nil;
+ for(i = 0; runtime_sigtab[i].sig != -1; i++) {
+ if(runtime_sigtab[i].sig == (int32)sig) {
+ t = &runtime_sigtab[i];
+ break;
+ }
+ }
+
+ if(t == nil)
+ return;
+
+ if((t->flags & SigNotify) && (t->flags & SigHandling)) {
+ t->flags &= ~SigHandling;
+ if(t->flags & SigIgnored)
+ runtime_setsig(i, GO_SIG_IGN, true);
+ else
+ runtime_setsig(i, GO_SIG_DFL, true);
+ }
}
void
@@ -62,3 +110,44 @@ runtime_resetcpuprofiler(int32 hz)
}
runtime_m()->profilehz = hz;
}
+
+void
+os_sigpipe(void)
+{
+ int32 i;
+
+ for(i = 0; runtime_sigtab[i].sig != -1; i++)
+ if(runtime_sigtab[i].sig == SIGPIPE)
+ break;
+ runtime_setsig(i, GO_SIG_DFL, false);
+ runtime_raise(SIGPIPE);
+}
+
+void
+runtime_crash(void)
+{
+ int32 i;
+
+#ifdef GOOS_darwin
+ // OS X core dumps are linear dumps of the mapped memory,
+ // from the first virtual byte to the last, with zeros in the gaps.
+ // Because of the way we arrange the address space on 64-bit systems,
+ // this means the OS X core file will be >128 GB and even on a zippy
+ // workstation can take OS X well over an hour to write (uninterruptible).
+ // Save users from making that mistake.
+ if(sizeof(void*) == 8)
+ return;
+#endif
+
+ for(i = 0; runtime_sigtab[i].sig != -1; i++)
+ if(runtime_sigtab[i].sig == SIGABRT)
+ break;
+ runtime_setsig(i, GO_SIG_DFL, false);
+ runtime_raise(SIGABRT);
+}
+
+void
+runtime_raise(int32 sig)
+{
+ raise(sig);
+}
diff --git a/libgo/runtime/signal_unix.h b/libgo/runtime/signal_unix.h
new file mode 100644
index 0000000..1c51740
--- /dev/null
+++ b/libgo/runtime/signal_unix.h
@@ -0,0 +1,22 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <signal.h>
+
+#define GO_SIG_DFL ((void*)SIG_DFL)
+#define GO_SIG_IGN ((void*)SIG_IGN)
+
+#ifdef SA_SIGINFO
+typedef siginfo_t Siginfo;
+#else
+typedef void *Siginfo;
+#endif
+
+typedef void GoSighandler(int32, Siginfo*, void*, G*);
+void runtime_setsig(int32, GoSighandler*, bool);
+GoSighandler* runtime_getsig(int32);
+
+void runtime_sighandler(int32 sig, Siginfo *info, void *context, G *gp);
+void runtime_raise(int32);
+
diff --git a/libgo/runtime/sigqueue.goc b/libgo/runtime/sigqueue.goc
index 82b0400..8657216 100644
--- a/libgo/runtime/sigqueue.goc
+++ b/libgo/runtime/sigqueue.goc
@@ -107,7 +107,7 @@ func signal_recv() (m uint32) {
new = HASWAITER;
if(runtime_cas(&sig.state, old, new)) {
if (new == HASWAITER) {
- runtime_entersyscall();
+ runtime_entersyscallblock();
runtime_notesleep(&sig);
runtime_exitsyscall();
runtime_noteclear(&sig);
@@ -135,8 +135,6 @@ done:;
// Must only be called from a single goroutine at a time.
func signal_enable(s uint32) {
- int32 i;
-
if(!sig.inuse) {
// The first call to signal_enable is for us
// to use for initialization. It does not pass
@@ -146,16 +144,16 @@ func signal_enable(s uint32) {
return;
}
- if(~s == 0) {
- // Special case: want everything.
- for(i=0; (size_t)i<nelem(sig.wanted); i++)
- sig.wanted[i] = ~(uint32)0;
- runtime_sigenable(s);
- return;
- }
-
if(s >= nelem(sig.wanted)*32)
return;
sig.wanted[s/32] |= 1U<<(s&31);
runtime_sigenable(s);
}
+
+// Must only be called from a single goroutine at a time.
+func signal_disable(s uint32) {
+ if(s >= nelem(sig.wanted)*32)
+ return;
+ sig.wanted[s/32] &= ~(1U<<(s&31));
+ runtime_sigdisable(s);
+}
diff --git a/libgo/runtime/string.goc b/libgo/runtime/string.goc
index 04ecbe6..64ed4f6e 100644
--- a/libgo/runtime/string.goc
+++ b/libgo/runtime/string.goc
@@ -7,6 +7,7 @@ package runtime
#include "arch.h"
#include "malloc.h"
#include "go-string.h"
+#include "race.h"
#define charntorune(pv, str, len) __go_get_rune(str, len, pv)
diff --git a/libgo/runtime/thread-linux.c b/libgo/runtime/thread-linux.c
index 13d23c4..74139ea 100644
--- a/libgo/runtime/thread-linux.c
+++ b/libgo/runtime/thread-linux.c
@@ -15,6 +15,7 @@
// Futexsleep is allowed to wake up spuriously.
#include <errno.h>
+#include <signal.h>
#include <string.h>
#include <time.h>
#include <sys/types.h>
@@ -83,3 +84,48 @@ runtime_goenvs(void)
{
runtime_goenvs_unix();
}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
+void
+runtime_mpreinit(M *mp)
+{
+ mp->gsignal = runtime_malg(32*1024, &mp->gsignalstack, &mp->gsignalstacksize); // OS X wants >=8K, Linux >=2K
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the new thread, can not allocate memory.
+void
+runtime_minit(void)
+{
+ M* m;
+ sigset_t sigs;
+
+ // Initialize signal handling.
+ m = runtime_m();
+ runtime_signalstack(m->gsignalstack, m->gsignalstacksize);
+ if (sigemptyset(&sigs) != 0)
+ runtime_throw("sigemptyset");
+ sigprocmask(SIG_SETMASK, &sigs, nil);
+}
+
+// Called from dropm to undo the effect of an minit.
+void
+runtime_unminit(void)
+{
+ runtime_signalstack(nil, 0);
+}
+
+void
+runtime_signalstack(byte *p, int32 n)
+{
+ stack_t st;
+
+ st.ss_sp = p;
+ st.ss_size = n;
+ st.ss_flags = 0;
+ if(p == nil)
+ st.ss_flags = SS_DISABLE;
+ if(sigaltstack(&st, nil) < 0)
+ *(int *)0xf1 = 0xf1;
+}
diff --git a/libgo/runtime/thread.c b/libgo/runtime/thread.c
index 12d0099..83ee006 100644
--- a/libgo/runtime/thread.c
+++ b/libgo/runtime/thread.c
@@ -133,27 +133,6 @@ __sync_add_and_fetch_8 (uint64* ptr, uint64 add)
#endif
-// Called to initialize a new m (including the bootstrap m).
-void
-runtime_minit(void)
-{
- byte* stack;
- size_t stacksize;
- stack_t ss;
- sigset_t sigs;
-
- // Initialize signal handling.
- runtime_m()->gsignal = runtime_malg(32*1024, &stack, &stacksize); // OS X wants >=8K, Linux >=2K
- ss.ss_sp = stack;
- ss.ss_flags = 0;
- ss.ss_size = stacksize;
- if(sigaltstack(&ss, nil) < 0)
- *(int *)0xf1 = 0xf1;
- if (sigemptyset(&sigs) != 0)
- runtime_throw("sigemptyset");
- sigprocmask(SIG_SETMASK, &sigs, nil);
-}
-
uintptr
runtime_memlimit(void)
{
diff --git a/libgo/runtime/time.goc b/libgo/runtime/time.goc
index e9f087a..e06b75c 100644
--- a/libgo/runtime/time.goc
+++ b/libgo/runtime/time.goc
@@ -14,7 +14,6 @@ package time
static Timers timers;
static void addtimer(Timer*);
-static bool deltimer(Timer*);
// Package time APIs.
// Godoc uses the comments in package time, not these.
@@ -30,15 +29,13 @@ func Sleep(ns int64) {
func startTimer(t *Timer) {
if(raceenabled)
runtime_racerelease(t);
- runtime_lock(&timers);
- addtimer(t);
- runtime_unlock(&timers);
+ runtime_addtimer(t);
}
// stopTimer removes t from the timer heap if it is there.
// It returns true if t was removed, false if t wasn't even there.
func stopTimer(t *Timer) (stopped bool) {
- stopped = deltimer(t);
+ stopped = runtime_deltimer(t);
}
// C runtime.
@@ -80,6 +77,14 @@ runtime_tsleep(int64 ns, const char *reason)
runtime_park(runtime_unlock, &timers, reason);
}
+void
+runtime_addtimer(Timer *t)
+{
+ runtime_lock(&timers);
+ addtimer(t);
+ runtime_unlock(&timers);
+}
+
// Add a timer to the heap and start or kick the timer proc
// if the new timer is earlier than any of the others.
static void
@@ -122,8 +127,8 @@ addtimer(Timer *t)
// Delete timer t from the heap.
// Do not need to update the timerproc:
// if it wakes up early, no big deal.
-static bool
-deltimer(Timer *t)
+bool
+runtime_deltimer(Timer *t)
{
int32 i;
@@ -205,7 +210,7 @@ timerproc(void* dummy __attribute__ ((unused)))
timers.sleeping = true;
runtime_noteclear(&timers.waitnote);
runtime_unlock(&timers);
- runtime_entersyscall();
+ runtime_entersyscallblock();
runtime_notetsleep(&timers.waitnote, delta);
runtime_exitsyscall();
}