aboutsummaryrefslogtreecommitdiff
path: root/libgo/runtime
diff options
context:
space:
mode:
authorIan Lance Taylor <ian@gcc.gnu.org>2014-06-06 22:37:27 +0000
committerIan Lance Taylor <ian@gcc.gnu.org>2014-06-06 22:37:27 +0000
commit6736ef96eab222e58e6294f42be981a5afb59811 (patch)
tree2bc668fae9bf96f9a3988e0b0a16685bde8c4f0b /libgo/runtime
parent38a138411da4206c53f9a153ee9c3624fce58a52 (diff)
downloadgcc-6736ef96eab222e58e6294f42be981a5afb59811.zip
gcc-6736ef96eab222e58e6294f42be981a5afb59811.tar.gz
gcc-6736ef96eab222e58e6294f42be981a5afb59811.tar.bz2
libgo: Merge to master revision 19184.
The next revision, 19185, renames several runtime files, and will be handled in a separate change. From-SVN: r211328
Diffstat (limited to 'libgo/runtime')
-rw-r--r--libgo/runtime/chan.c334
-rw-r--r--libgo/runtime/env_posix.c4
-rw-r--r--libgo/runtime/go-append.c5
-rw-r--r--libgo/runtime/go-defer.c10
-rw-r--r--libgo/runtime/go-defer.h10
-rw-r--r--libgo/runtime/go-panic.c6
-rw-r--r--libgo/runtime/go-setenv.c20
-rw-r--r--libgo/runtime/go-string-to-byte-array.c9
-rw-r--r--libgo/runtime/go-string-to-int-array.c13
-rw-r--r--libgo/runtime/go-unwind.c12
-rw-r--r--libgo/runtime/go-varargs.c6
-rw-r--r--libgo/runtime/lock_futex.c16
-rw-r--r--libgo/runtime/lock_sema.c11
-rw-r--r--libgo/runtime/malloc.goc268
-rw-r--r--libgo/runtime/malloc.h121
-rw-r--r--libgo/runtime/mcentral.c84
-rw-r--r--libgo/runtime/mfinal.c218
-rw-r--r--libgo/runtime/mgc0.c1004
-rw-r--r--libgo/runtime/mheap.c372
-rw-r--r--libgo/runtime/mprof.goc226
-rw-r--r--libgo/runtime/msize.c27
-rw-r--r--libgo/runtime/netpoll.goc139
-rw-r--r--libgo/runtime/netpoll_epoll.c13
-rw-r--r--libgo/runtime/netpoll_kqueue.c12
-rw-r--r--libgo/runtime/netpoll_select.c4
-rw-r--r--libgo/runtime/netpoll_stub.c3
-rw-r--r--libgo/runtime/panic.c73
-rw-r--r--libgo/runtime/proc.c517
-rw-r--r--libgo/runtime/race.h4
-rw-r--r--libgo/runtime/runtime.c9
-rw-r--r--libgo/runtime/runtime.h82
-rw-r--r--libgo/runtime/sema.goc6
-rw-r--r--libgo/runtime/signal_unix.c2
-rw-r--r--libgo/runtime/string.goc4
-rw-r--r--libgo/runtime/time.goc16
35 files changed, 2143 insertions, 1517 deletions
diff --git a/libgo/runtime/chan.c b/libgo/runtime/chan.c
index 6bd12e4..cd3a2c5 100644
--- a/libgo/runtime/chan.c
+++ b/libgo/runtime/chan.c
@@ -8,8 +8,6 @@
#include "race.h"
#include "malloc.h"
-#define NOSELGEN 1
-
typedef struct WaitQ WaitQ;
typedef struct SudoG SudoG;
typedef struct Select Select;
@@ -20,8 +18,8 @@ typedef struct __go_channel_type ChanType;
struct SudoG
{
- G* g; // g and selgen constitute
- uint32 selgen; // a weak pointer to g
+ G* g;
+ uint32* selectdone;
SudoG* link;
int64 releasetime;
byte* elem; // data element
@@ -43,6 +41,7 @@ struct Hchan
uint8 elemalign;
uint8 pad; // ensures proper alignment of the buffer that follows Hchan in memory
bool closed;
+ const Type* elemtype; // element type
uintgo sendx; // send index
uintgo recvx; // receive index
WaitQ recvq; // list of recv waiters
@@ -89,8 +88,8 @@ static SudoG* dequeue(WaitQ*);
static void enqueue(WaitQ*, SudoG*);
static void racesync(Hchan*, SudoG*);
-Hchan*
-runtime_makechan_c(ChanType *t, int64 hint)
+static Hchan*
+makechan(ChanType *t, int64 hint)
{
Hchan *c;
uintptr n;
@@ -102,16 +101,16 @@ runtime_makechan_c(ChanType *t, int64 hint)
if(elem->__size >= (1<<16))
runtime_throw("makechan: invalid channel element type");
- if(hint < 0 || (intgo)hint != hint || (elem->__size > 0 && (uintptr)hint > MaxMem / elem->__size))
+ if(hint < 0 || (intgo)hint != hint || (elem->__size > 0 && (uintptr)hint > (MaxMem - sizeof(*c)) / elem->__size))
runtime_panicstring("makechan: size out of range");
n = sizeof(*c);
n = ROUND(n, elem->__align);
// allocate memory in one call
- c = (Hchan*)runtime_mallocgc(n + hint*elem->__size, (uintptr)t | TypeInfo_Chan, 0);
+ c = (Hchan*)runtime_mallocgc(sizeof(*c) + hint*elem->__size, (uintptr)t | TypeInfo_Chan, 0);
c->elemsize = elem->__size;
- c->elemalign = elem->__align;
+ c->elemtype = elem;
c->dataqsiz = hint;
if(debug)
@@ -131,7 +130,7 @@ reflect_makechan(ChanType *t, uint64 size)
{
Hchan *c;
- c = runtime_makechan_c(t, size);
+ c = makechan(t, size);
return c;
}
@@ -139,13 +138,13 @@ reflect_makechan(ChanType *t, uint64 size)
Hchan*
__go_new_channel(ChanType *t, uintptr hint)
{
- return runtime_makechan_c(t, hint);
+ return makechan(t, hint);
}
Hchan*
__go_new_channel_big(ChanType *t, uint64 hint)
{
- return runtime_makechan_c(t, hint);
+ return makechan(t, hint);
}
/*
@@ -162,8 +161,8 @@ __go_new_channel_big(ChanType *t, uint64 hint)
* been closed. it is easiest to loop and re-run
* the operation; we'll see that it's now closed.
*/
-void
-runtime_chansend(ChanType *t, Hchan *c, byte *ep, bool *pres, void *pc)
+static bool
+chansend(ChanType *t, Hchan *c, byte *ep, bool block, void *pc)
{
SudoG *sg;
SudoG mysg;
@@ -173,14 +172,15 @@ runtime_chansend(ChanType *t, Hchan *c, byte *ep, bool *pres, void *pc)
g = runtime_g();
+ if(raceenabled)
+ runtime_racereadobjectpc(ep, t->__element_type, runtime_getcallerpc(&t), chansend);
+
if(c == nil) {
USED(t);
- if(pres != nil) {
- *pres = false;
- return;
- }
+ if(!block)
+ return false;
runtime_park(nil, nil, "chan send (nil chan)");
- return; // not reached
+ return false; // not reached
}
if(runtime_gcwaiting())
@@ -199,7 +199,7 @@ runtime_chansend(ChanType *t, Hchan *c, byte *ep, bool *pres, void *pc)
runtime_lock(c);
if(raceenabled)
- runtime_racereadpc(c, pc, runtime_chansend);
+ runtime_racereadpc(c, pc, chansend);
if(c->closed)
goto closed;
@@ -219,24 +219,20 @@ runtime_chansend(ChanType *t, Hchan *c, byte *ep, bool *pres, void *pc)
if(sg->releasetime)
sg->releasetime = runtime_cputicks();
runtime_ready(gp);
-
- if(pres != nil)
- *pres = true;
- return;
+ return true;
}
- if(pres != nil) {
+ if(!block) {
runtime_unlock(c);
- *pres = false;
- return;
+ return false;
}
mysg.elem = ep;
mysg.g = g;
- mysg.selgen = NOSELGEN;
+ mysg.selectdone = nil;
g->param = nil;
enqueue(&c->sendq, &mysg);
- runtime_park(runtime_unlock, c, "chan send");
+ runtime_parkunlock(c, "chan send");
if(g->param == nil) {
runtime_lock(c);
@@ -248,23 +244,22 @@ runtime_chansend(ChanType *t, Hchan *c, byte *ep, bool *pres, void *pc)
if(mysg.releasetime > 0)
runtime_blockevent(mysg.releasetime - t0, 2);
- return;
+ return true;
asynch:
if(c->closed)
goto closed;
if(c->qcount >= c->dataqsiz) {
- if(pres != nil) {
+ if(!block) {
runtime_unlock(c);
- *pres = false;
- return;
+ return false;
}
mysg.g = g;
mysg.elem = nil;
- mysg.selgen = NOSELGEN;
+ mysg.selectdone = nil;
enqueue(&c->sendq, &mysg);
- runtime_park(runtime_unlock, c, "chan send");
+ runtime_parkunlock(c, "chan send");
runtime_lock(c);
goto asynch;
@@ -287,20 +282,19 @@ asynch:
runtime_ready(gp);
} else
runtime_unlock(c);
- if(pres != nil)
- *pres = true;
if(mysg.releasetime > 0)
runtime_blockevent(mysg.releasetime - t0, 2);
- return;
+ return true;
closed:
runtime_unlock(c);
runtime_panicstring("send on closed channel");
+ return false; // not reached
}
-void
-runtime_chanrecv(ChanType *t, Hchan* c, byte *ep, bool *selected, bool *received)
+static bool
+chanrecv(ChanType *t, Hchan* c, byte *ep, bool block, bool *received)
{
SudoG *sg;
SudoG mysg;
@@ -311,6 +305,8 @@ runtime_chanrecv(ChanType *t, Hchan* c, byte *ep, bool *selected, bool *received
if(runtime_gcwaiting())
runtime_gosched();
+ // raceenabled: don't need to check ep, as it is always on the stack.
+
if(debug)
runtime_printf("chanrecv: chan=%p\n", c);
@@ -318,12 +314,10 @@ runtime_chanrecv(ChanType *t, Hchan* c, byte *ep, bool *selected, bool *received
if(c == nil) {
USED(t);
- if(selected != nil) {
- *selected = false;
- return;
- }
+ if(!block)
+ return false;
runtime_park(nil, nil, "chan receive (nil chan)");
- return; // not reached
+ return false; // not reached
}
t0 = 0;
@@ -354,25 +348,22 @@ runtime_chanrecv(ChanType *t, Hchan* c, byte *ep, bool *selected, bool *received
sg->releasetime = runtime_cputicks();
runtime_ready(gp);
- if(selected != nil)
- *selected = true;
if(received != nil)
*received = true;
- return;
+ return true;
}
- if(selected != nil) {
+ if(!block) {
runtime_unlock(c);
- *selected = false;
- return;
+ return false;
}
mysg.elem = ep;
mysg.g = g;
- mysg.selgen = NOSELGEN;
+ mysg.selectdone = nil;
g->param = nil;
enqueue(&c->recvq, &mysg);
- runtime_park(runtime_unlock, c, "chan receive");
+ runtime_parkunlock(c, "chan receive");
if(g->param == nil) {
runtime_lock(c);
@@ -385,25 +376,24 @@ runtime_chanrecv(ChanType *t, Hchan* c, byte *ep, bool *selected, bool *received
*received = true;
if(mysg.releasetime > 0)
runtime_blockevent(mysg.releasetime - t0, 2);
- return;
+ return true;
asynch:
if(c->qcount <= 0) {
if(c->closed)
goto closed;
- if(selected != nil) {
+ if(!block) {
runtime_unlock(c);
- *selected = false;
if(received != nil)
*received = false;
- return;
+ return false;
}
mysg.g = g;
mysg.elem = nil;
- mysg.selgen = NOSELGEN;
+ mysg.selectdone = nil;
enqueue(&c->recvq, &mysg);
- runtime_park(runtime_unlock, c, "chan receive");
+ runtime_parkunlock(c, "chan receive");
runtime_lock(c);
goto asynch;
@@ -429,19 +419,15 @@ asynch:
} else
runtime_unlock(c);
- if(selected != nil)
- *selected = true;
if(received != nil)
*received = true;
if(mysg.releasetime > 0)
runtime_blockevent(mysg.releasetime - t0, 2);
- return;
+ return true;
closed:
if(ep != nil)
runtime_memclr(ep, c->elemsize);
- if(selected != nil)
- *selected = true;
if(received != nil)
*received = false;
if(raceenabled)
@@ -449,6 +435,7 @@ closed:
runtime_unlock(c);
if(mysg.releasetime > 0)
runtime_blockevent(mysg.releasetime - t0, 2);
+ return true;
}
// The compiler generates a call to __go_send_small to send a value 8
@@ -461,46 +448,46 @@ __go_send_small(ChanType *t, Hchan* c, uint64 val)
byte b[sizeof(uint64)];
uint64 v;
} u;
- byte *p;
+ byte *v;
u.v = val;
#ifndef WORDS_BIGENDIAN
- p = u.b;
+ v = u.b;
#else
- p = u.b + sizeof(uint64) - t->__element_type->__size;
+ v = u.b + sizeof(uint64) - t->__element_type->__size;
#endif
- runtime_chansend(t, c, p, nil, runtime_getcallerpc(&t));
+ chansend(t, c, v, true, runtime_getcallerpc(&t));
}
// The compiler generates a call to __go_send_big to send a value
// larger than 8 bytes or smaller.
void
-__go_send_big(ChanType *t, Hchan* c, byte* p)
+__go_send_big(ChanType *t, Hchan* c, byte* v)
{
- runtime_chansend(t, c, p, nil, runtime_getcallerpc(&t));
+ chansend(t, c, v, true, runtime_getcallerpc(&t));
}
// The compiler generates a call to __go_receive to receive a
// value from a channel.
void
-__go_receive(ChanType *t, Hchan* c, byte* p)
+__go_receive(ChanType *t, Hchan* c, byte* v)
{
- runtime_chanrecv(t, c, p, nil, nil);
+ chanrecv(t, c, v, true, nil);
}
-_Bool runtime_chanrecv2(ChanType *t, Hchan* c, byte* p)
+_Bool runtime_chanrecv2(ChanType *t, Hchan* c, byte* v)
__asm__ (GOSYM_PREFIX "runtime.chanrecv2");
_Bool
-runtime_chanrecv2(ChanType *t, Hchan* c, byte* p)
+runtime_chanrecv2(ChanType *t, Hchan* c, byte* v)
{
bool received;
- runtime_chanrecv(t, c, p, nil, &received);
+ chanrecv(t, c, v, true, &received);
return received;
}
-// func selectnbsend(c chan any, elem any) bool
+// func selectnbsend(c chan any, elem *any) bool
//
// compiler implements
//
@@ -520,12 +507,12 @@ runtime_chanrecv2(ChanType *t, Hchan* c, byte* p)
// }
//
_Bool
-runtime_selectnbsend(ChanType *t, Hchan *c, byte *p)
+runtime_selectnbsend(ChanType *t, Hchan *c, byte *val)
{
bool res;
- runtime_chansend(t, c, p, &res, runtime_getcallerpc(&t));
- return res;
+ res = chansend(t, c, val, false, runtime_getcallerpc(&t));
+ return (_Bool)res;
}
// func selectnbrecv(elem *any, c chan any) bool
@@ -552,8 +539,8 @@ runtime_selectnbrecv(ChanType *t, byte *v, Hchan *c)
{
bool selected;
- runtime_chanrecv(t, c, v, &selected, nil);
- return selected;
+ selected = chanrecv(t, c, v, false, nil);
+ return (_Bool)selected;
}
// func selectnbrecv2(elem *any, ok *bool, c chan any) bool
@@ -582,88 +569,60 @@ runtime_selectnbrecv2(ChanType *t, byte *v, _Bool *received, Hchan *c)
bool r;
r = false;
- runtime_chanrecv(t, c, v, &selected, received == nil ? nil : &r);
+ selected = chanrecv(t, c, v, false, received == nil ? nil : &r);
if(received != nil)
*received = r;
return selected;
}
// For reflect:
-// func chansend(c chan, val iword, nb bool) (selected bool)
-// where an iword is the same word an interface value would use:
-// the actual data if it fits, or else a pointer to the data.
+// func chansend(c chan, val *any, nb bool) (selected bool)
+// where val points to the data to be sent.
+//
+// The "uintptr selected" is really "bool selected" but saying
+// uintptr gets us the right alignment for the output parameter block.
-_Bool reflect_chansend(ChanType *, Hchan *, uintptr, _Bool)
+_Bool reflect_chansend(ChanType *, Hchan *, byte *, _Bool)
__asm__ (GOSYM_PREFIX "reflect.chansend");
_Bool
-reflect_chansend(ChanType *t, Hchan *c, uintptr val, _Bool nb)
+reflect_chansend(ChanType *t, Hchan *c, byte *val, _Bool nb)
{
bool selected;
- bool *sp;
- byte *vp;
- if(nb) {
- selected = false;
- sp = (bool*)&selected;
- } else {
- selected = true;
- sp = nil;
- }
- if(__go_is_pointer_type(t->__element_type))
- vp = (byte*)&val;
- else
- vp = (byte*)val;
- runtime_chansend(t, c, vp, sp, runtime_getcallerpc(&t));
- return selected;
+ selected = chansend(t, c, val, !nb, runtime_getcallerpc(&t));
+ return (_Bool)selected;
}
// For reflect:
-// func chanrecv(c chan, nb bool) (val iword, selected, received bool)
-// where an iword is the same word an interface value would use:
-// the actual data if it fits, or else a pointer to the data.
+// func chanrecv(c chan, nb bool, val *any) (selected, received bool)
+// where val points to a data area that will be filled in with the
+// received value. val must have the size and type of the channel element type.
struct chanrecv_ret
{
- uintptr val;
_Bool selected;
_Bool received;
};
-struct chanrecv_ret reflect_chanrecv(ChanType *, Hchan *, _Bool)
+struct chanrecv_ret reflect_chanrecv(ChanType *, Hchan *, _Bool, byte *val)
__asm__ (GOSYM_PREFIX "reflect.chanrecv");
struct chanrecv_ret
-reflect_chanrecv(ChanType *t, Hchan *c, _Bool nb)
+reflect_chanrecv(ChanType *t, Hchan *c, _Bool nb, byte *val)
{
struct chanrecv_ret ret;
- byte *vp;
- bool *sp;
bool selected;
bool received;
- if(nb) {
- selected = false;
- sp = &selected;
- } else {
- ret.selected = true;
- sp = nil;
- }
received = false;
- if(__go_is_pointer_type(t->__element_type)) {
- vp = (byte*)&ret.val;
- } else {
- vp = runtime_mal(t->__element_type->__size);
- ret.val = (uintptr)vp;
- }
- runtime_chanrecv(t, c, vp, sp, &received);
- if(nb)
- ret.selected = selected;
- ret.received = received;
+ selected = chanrecv(t, c, val, !nb, &received);
+ ret.selected = (_Bool)selected;
+ ret.received = (_Bool)received;
return ret;
}
-static void newselect(int32, Select**);
+static Select* newselect(int32);
// newselect(size uint32) (sel *byte);
@@ -672,14 +631,11 @@ void* runtime_newselect(int32) __asm__ (GOSYM_PREFIX "runtime.newselect");
void*
runtime_newselect(int32 size)
{
- Select *sel;
-
- newselect(size, &sel);
- return (void*)sel;
+ return (void*)newselect(size);
}
-static void
-newselect(int32 size, Select **selp)
+static Select*
+newselect(int32 size)
{
int32 n;
Select *sel;
@@ -701,10 +657,10 @@ newselect(int32 size, Select **selp)
sel->ncase = 0;
sel->lockorder = (void*)(sel->scase + size);
sel->pollorder = (void*)(sel->lockorder + size);
- *selp = sel;
if(debug)
runtime_printf("newselect s=%p size=%d\n", sel, size);
+ return sel;
}
// cut in half to give stack a chance to split
@@ -880,6 +836,14 @@ selunlock(Select *sel)
}
}
+static bool
+selparkcommit(G *gp, void *sel)
+{
+ USED(gp);
+ selunlock(sel);
+ return true;
+}
+
void
runtime_block(void)
{
@@ -902,7 +866,7 @@ static int
selectgo(Select **selp)
{
Select *sel;
- uint32 o, i, j, k;
+ uint32 o, i, j, k, done;
int64 t0;
Scase *cas, *dfl;
Hchan *c;
@@ -1008,7 +972,7 @@ loop:
case CaseSend:
if(raceenabled)
- runtime_racereadpc(c, runtime_selectgo, runtime_chansend);
+ runtime_racereadpc(c, runtime_selectgo, chansend);
if(c->closed)
goto sclose;
if(c->dataqsiz > 0) {
@@ -1035,13 +999,14 @@ loop:
// pass 2 - enqueue on all chans
+ done = 0;
for(i=0; i<sel->ncase; i++) {
o = sel->pollorder[i];
cas = &sel->scase[o];
c = cas->chan;
sg = &cas->sg;
sg->g = g;
- sg->selgen = g->selgen;
+ sg->selectdone = &done;
switch(cas->kind) {
case CaseRecv:
@@ -1055,7 +1020,7 @@ loop:
}
g->param = nil;
- runtime_park((void(*)(Lock*))selunlock, (Lock*)sel, "select");
+ runtime_park(selparkcommit, sel, "select");
sellock(sel);
sg = g->param;
@@ -1091,13 +1056,23 @@ loop:
*cas->receivedp = true;
}
+ if(raceenabled) {
+ if(cas->kind == CaseRecv && cas->sg.elem != nil)
+ runtime_racewriteobjectpc(cas->sg.elem, c->elemtype, selectgo, chanrecv);
+ else if(cas->kind == CaseSend)
+ runtime_racereadobjectpc(cas->sg.elem, c->elemtype, selectgo, chansend);
+ }
+
selunlock(sel);
goto retc;
asyncrecv:
// can receive from buffer
- if(raceenabled)
+ if(raceenabled) {
+ if(cas->sg.elem != nil)
+ runtime_racewriteobjectpc(cas->sg.elem, c->elemtype, selectgo, chanrecv);
runtime_raceacquire(chanbuf(c, c->recvx));
+ }
if(cas->receivedp != nil)
*cas->receivedp = true;
if(cas->sg.elem != nil)
@@ -1120,8 +1095,10 @@ asyncrecv:
asyncsend:
// can send to buffer
- if(raceenabled)
+ if(raceenabled) {
runtime_racerelease(chanbuf(c, c->sendx));
+ runtime_racereadobjectpc(cas->sg.elem, c->elemtype, selectgo, chansend);
+ }
runtime_memmove(chanbuf(c, c->sendx), cas->sg.elem, c->elemsize);
if(++c->sendx == c->dataqsiz)
c->sendx = 0;
@@ -1140,8 +1117,11 @@ asyncsend:
syncrecv:
// can receive from sleeping sender (sg)
- if(raceenabled)
+ if(raceenabled) {
+ if(cas->sg.elem != nil)
+ runtime_racewriteobjectpc(cas->sg.elem, c->elemtype, selectgo, chanrecv);
racesync(c, sg);
+ }
selunlock(sel);
if(debug)
runtime_printf("syncrecv: sel=%p c=%p o=%d\n", sel, c, o);
@@ -1169,8 +1149,10 @@ rclose:
syncsend:
// can send to sleeping receiver (sg)
- if(raceenabled)
+ if(raceenabled) {
+ runtime_racereadobjectpc(cas->sg.elem, c->elemtype, selectgo, chansend);
racesync(c, sg);
+ }
selunlock(sel);
if(debug)
runtime_printf("syncsend: sel=%p c=%p o=%d\n", sel, c, o);
@@ -1204,7 +1186,7 @@ struct runtimeSelect
uintptr dir;
ChanType *typ;
Hchan *ch;
- uintptr val;
+ byte *val;
};
// This enum must match ../reflect/value.go:/SelectDir.
@@ -1214,14 +1196,13 @@ enum SelectDir {
SelectDefault,
};
+// func rselect(cases []runtimeSelect) (chosen int, recvOK bool)
+
struct rselect_ret {
intgo chosen;
- uintptr word;
- bool recvOK;
+ _Bool recvOK;
};
-// func rselect(cases []runtimeSelect) (chosen int, word uintptr, recvOK bool)
-
struct rselect_ret reflect_rselect(Slice)
__asm__ (GOSYM_PREFIX "reflect.rselect");
@@ -1229,36 +1210,18 @@ struct rselect_ret
reflect_rselect(Slice cases)
{
struct rselect_ret ret;
+ intgo chosen;
+ bool recvOK;
int32 i;
Select *sel;
runtimeSelect* rcase, *rc;
- void *elem;
- void *recvptr;
- uintptr maxsize;
- bool onlyptr;
- ret.chosen = -1;
- ret.word = 0;
- ret.recvOK = false;
+ chosen = -1;
+ recvOK = false;
- maxsize = 0;
- onlyptr = true;
rcase = (runtimeSelect*)cases.__values;
- for(i=0; i<cases.__count; i++) {
- rc = &rcase[i];
- if(rc->dir == SelectRecv && rc->ch != nil) {
- if(maxsize < rc->typ->__element_type->__size)
- maxsize = rc->typ->__element_type->__size;
- if(!__go_is_pointer_type(rc->typ->__element_type))
- onlyptr = false;
- }
- }
-
- recvptr = nil;
- if(!onlyptr)
- recvptr = runtime_mal(maxsize);
- newselect(cases.__count, &sel);
+ sel = newselect(cases.__count);
for(i=0; i<cases.__count; i++) {
rc = &rcase[i];
switch(rc->dir) {
@@ -1268,28 +1231,20 @@ reflect_rselect(Slice cases)
case SelectSend:
if(rc->ch == nil)
break;
- if(!__go_is_pointer_type(rc->typ->__element_type))
- elem = (void*)rc->val;
- else
- elem = (void*)&rc->val;
- selectsend(sel, rc->ch, i, elem);
+ selectsend(sel, rc->ch, i, rc->val);
break;
case SelectRecv:
if(rc->ch == nil)
break;
- if(!__go_is_pointer_type(rc->typ->__element_type))
- elem = recvptr;
- else
- elem = &ret.word;
- selectrecv(sel, rc->ch, i, elem, &ret.recvOK);
+ selectrecv(sel, rc->ch, i, rc->val, &recvOK);
break;
}
}
- ret.chosen = (intgo)(uintptr)selectgo(&sel);
- if(rcase[ret.chosen].dir == SelectRecv && !__go_is_pointer_type(rcase[ret.chosen].typ->__element_type))
- ret.word = (uintptr)recvptr;
+ chosen = (intgo)(uintptr)selectgo(&sel);
+ ret.chosen = chosen;
+ ret.recvOK = (_Bool)recvOK;
return ret;
}
@@ -1428,12 +1383,11 @@ loop:
return nil;
q->first = sgp->link;
- // if sgp is stale, ignore it
- if(sgp->selgen != NOSELGEN &&
- (sgp->selgen != sgp->g->selgen ||
- !runtime_cas(&sgp->g->selgen, sgp->selgen, sgp->selgen + 2))) {
- //prints("INVALID PSEUDOG POINTER\n");
- goto loop;
+ // if sgp participates in a select and is already signaled, ignore it
+ if(sgp->selectdone != nil) {
+ // claim the right to signal
+ if(*sgp->selectdone != 0 || !runtime_cas(sgp->selectdone, 0, 1))
+ goto loop;
}
return sgp;
diff --git a/libgo/runtime/env_posix.c b/libgo/runtime/env_posix.c
index 3219550..93f90f5 100644
--- a/libgo/runtime/env_posix.c
+++ b/libgo/runtime/env_posix.c
@@ -2,10 +2,12 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build darwin dragonfly freebsd linux netbsd openbsd windows
+// +build darwin dragonfly freebsd linux netbsd openbsd solaris windows
#include "runtime.h"
#include "array.h"
+#include "arch.h"
+#include "malloc.h"
extern Slice syscall_Envs __asm__ (GOSYM_PREFIX "syscall.Envs");
diff --git a/libgo/runtime/go-append.c b/libgo/runtime/go-append.c
index 8d5dee2..1b2d49e 100644
--- a/libgo/runtime/go-append.c
+++ b/libgo/runtime/go-append.c
@@ -37,6 +37,7 @@ __go_append (struct __go_open_array a, void *bvalues, uintptr_t bcount,
if (count > a.__capacity)
{
intgo m;
+ uintptr capmem;
void *n;
m = a.__capacity;
@@ -57,7 +58,9 @@ __go_append (struct __go_open_array a, void *bvalues, uintptr_t bcount,
if (element_size > 0 && (uintptr) m > MaxMem / element_size)
runtime_panicstring ("growslice: cap out of range");
- n = __go_alloc (m * element_size);
+ capmem = runtime_roundupsize (m * element_size);
+
+ n = __go_alloc (capmem);
__builtin_memcpy (n, a.__values, a.__count * element_size);
a.__values = n;
diff --git a/libgo/runtime/go-defer.c b/libgo/runtime/go-defer.c
index 4c61ae7..5dd8c31 100644
--- a/libgo/runtime/go-defer.c
+++ b/libgo/runtime/go-defer.c
@@ -20,7 +20,7 @@ __go_defer (_Bool *frame, void (*pfn) (void *), void *arg)
struct __go_defer_stack *n;
g = runtime_g ();
- n = (struct __go_defer_stack *) __go_alloc (sizeof (struct __go_defer_stack));
+ n = runtime_newdefer ();
n->__next = g->defer;
n->__frame = frame;
n->__panic = g->panic;
@@ -28,7 +28,7 @@ __go_defer (_Bool *frame, void (*pfn) (void *), void *arg)
n->__arg = arg;
n->__retaddr = NULL;
n->__makefunc_can_recover = 0;
- n->__free = 1;
+ n->__special = 0;
g->defer = n;
}
@@ -44,7 +44,6 @@ __go_undefer (_Bool *frame)
{
struct __go_defer_stack *d;
void (*pfn) (void *);
- M *m;
d = g->defer;
pfn = d->__pfn;
@@ -59,9 +58,8 @@ __go_undefer (_Bool *frame)
call to syscall.CgocallBackDone, in which case we will not
have a memory context. Don't try to free anything in that
case--the GC will release it later. */
- m = runtime_m ();
- if (m != NULL && m->mcache != NULL && d->__free)
- __go_free (d);
+ if (runtime_m () != NULL)
+ runtime_freedefer (d);
/* Since we are executing a defer function here, we know we are
returning from the calling function. If the calling
diff --git a/libgo/runtime/go-defer.h b/libgo/runtime/go-defer.h
index d110a87..acf2d40 100644
--- a/libgo/runtime/go-defer.h
+++ b/libgo/runtime/go-defer.h
@@ -20,8 +20,8 @@ struct __go_defer_stack
/* The value of the panic stack when this function is deferred.
This function can not recover this value from the panic stack.
- This can happen if a deferred function uses its own defer
- statement. */
+ This can happen if a deferred function has a defer statement
+ itself. */
struct __go_panic_stack *__panic;
/* The function to call. */
@@ -41,7 +41,7 @@ struct __go_defer_stack
useful. */
_Bool __makefunc_can_recover;
- /* Set to true if this defer stack entry should be freed when
- done. */
- _Bool __free;
+ /* Set to true if this defer stack entry is not part of the defer
+ pool. */
+ _Bool __special;
};
diff --git a/libgo/runtime/go-panic.c b/libgo/runtime/go-panic.c
index 0cacbcd..77975c6 100644
--- a/libgo/runtime/go-panic.c
+++ b/libgo/runtime/go-panic.c
@@ -54,7 +54,6 @@ __go_panic (struct __go_empty_interface arg)
{
struct __go_defer_stack *d;
void (*pfn) (void *);
- M *m;
d = g->defer;
if (d == NULL)
@@ -101,9 +100,8 @@ __go_panic (struct __go_empty_interface arg)
call to syscall.CgocallBackDone, in which case we will not
have a memory context. Don't try to free anything in that
case--the GC will release it later. */
- m = runtime_m ();
- if (m != NULL && m->mcache != NULL && d->__free)
- __go_free (d);
+ if (runtime_m () != NULL)
+ runtime_freedefer (d);
}
/* The panic was not recovered. */
diff --git a/libgo/runtime/go-setenv.c b/libgo/runtime/go-setenv.c
index 6c7378c..a75d7c4 100644
--- a/libgo/runtime/go-setenv.c
+++ b/libgo/runtime/go-setenv.c
@@ -11,6 +11,8 @@
#include "go-alloc.h"
#include "runtime.h"
+#include "arch.h"
+#include "malloc.h"
/* Set the C environment from Go. This is called by syscall.Setenv. */
@@ -23,6 +25,7 @@ setenv_c (String k, String v)
unsigned char *kn;
const byte *vs;
unsigned char *vn;
+ intgo len;
ks = k.str;
if (ks == NULL)
@@ -38,14 +41,22 @@ setenv_c (String k, String v)
if (ks != NULL && ks[k.len] != 0)
{
- kn = __go_alloc (k.len + 1);
+ // Objects that are explicitly freed must be at least 16 bytes in size,
+ // so that they are not allocated using tiny alloc.
+ len = k.len + 1;
+ if (len < TinySize)
+ len = TinySize;
+ kn = __go_alloc (len);
__builtin_memcpy (kn, ks, k.len);
ks = kn;
}
if (vs != NULL && vs[v.len] != 0)
{
- vn = __go_alloc (v.len + 1);
+ len = v.len + 1;
+ if (len < TinySize)
+ len = TinySize;
+ vn = __go_alloc (len);
__builtin_memcpy (vn, vs, v.len);
vs = vn;
}
@@ -54,7 +65,10 @@ setenv_c (String k, String v)
#else /* !defined(HAVE_SETENV) */
- kn = __go_alloc (k.len + v.len + 2);
+ len = k.len + v.len + 2;
+ if (len < TinySize)
+ len = TinySize;
+ kn = __go_alloc (len);
__builtin_memcpy (kn, ks, k.len);
kn[k.len] = '=';
__builtin_memcpy (kn + k.len + 1, vs, v.len);
diff --git a/libgo/runtime/go-string-to-byte-array.c b/libgo/runtime/go-string-to-byte-array.c
index 5e03033..a4edb50 100644
--- a/libgo/runtime/go-string-to-byte-array.c
+++ b/libgo/runtime/go-string-to-byte-array.c
@@ -12,14 +12,17 @@
struct __go_open_array
__go_string_to_byte_array (String str)
{
+ uintptr cap;
unsigned char *data;
struct __go_open_array ret;
- data = (unsigned char *) runtime_mallocgc (str.len, 0,
- FlagNoScan | FlagNoZero);
+ cap = runtime_roundupsize (str.len);
+ data = (unsigned char *) runtime_mallocgc (cap, 0, FlagNoScan | FlagNoZero);
__builtin_memcpy (data, str.str, str.len);
+ if (cap != (uintptr) str.len)
+ __builtin_memset (data + str.len, 0, cap - (uintptr) str.len);
ret.__values = (void *) data;
ret.__count = str.len;
- ret.__capacity = str.len;
+ ret.__capacity = (intgo) cap;
return ret;
}
diff --git a/libgo/runtime/go-string-to-int-array.c b/libgo/runtime/go-string-to-int-array.c
index d91c9e2..5546889 100644
--- a/libgo/runtime/go-string-to-int-array.c
+++ b/libgo/runtime/go-string-to-int-array.c
@@ -17,6 +17,7 @@ __go_string_to_int_array (String str)
size_t c;
const unsigned char *p;
const unsigned char *pend;
+ uintptr mem;
uint32_t *data;
uint32_t *pd;
struct __go_open_array ret;
@@ -32,8 +33,11 @@ __go_string_to_int_array (String str)
p += __go_get_rune (p, pend - p, &rune);
}
- data = (uint32_t *) runtime_mallocgc (c * sizeof (uint32_t), 0,
- FlagNoScan | FlagNoZero);
+ if (c > MaxMem / sizeof (uint32_t))
+ runtime_throw ("out of memory");
+
+ mem = runtime_roundupsize (c * sizeof (uint32_t));
+ data = (uint32_t *) runtime_mallocgc (mem, 0, FlagNoScan | FlagNoZero);
p = str.str;
pd = data;
while (p < pend)
@@ -43,9 +47,10 @@ __go_string_to_int_array (String str)
p += __go_get_rune (p, pend - p, &rune);
*pd++ = rune;
}
-
+ if (mem > (uintptr) c * sizeof (uint32_t))
+ __builtin_memset (data + c, 0, mem - (uintptr) c * sizeof (uint32_t));
ret.__values = (void *) data;
ret.__count = c;
- ret.__capacity = c;
+ ret.__capacity = (intgo) (mem / sizeof (uint32_t));
return ret;
}
diff --git a/libgo/runtime/go-unwind.c b/libgo/runtime/go-unwind.c
index 04b0a28..849256b 100644
--- a/libgo/runtime/go-unwind.c
+++ b/libgo/runtime/go-unwind.c
@@ -80,7 +80,6 @@ __go_check_defer (_Bool *frame)
{
struct __go_defer_stack *d;
void (*pfn) (void *);
- M *m;
d = g->defer;
if (d == NULL || d->__frame != frame || d->__pfn == NULL)
@@ -91,9 +90,8 @@ __go_check_defer (_Bool *frame)
(*pfn) (d->__arg);
- m = runtime_m ();
- if (m != NULL && m->mcache != NULL && d->__free)
- __go_free (d);
+ if (runtime_m () != NULL)
+ runtime_freedefer (d);
if (n->__was_recovered)
{
@@ -122,7 +120,6 @@ __go_check_defer (_Bool *frame)
&& g->defer->__frame == frame)
{
struct __go_defer_stack *d;
- M *m;
/* This is the defer function which called recover. Simply
return to stop the stack unwind, and let the Go code continue
@@ -130,9 +127,8 @@ __go_check_defer (_Bool *frame)
d = g->defer;
g->defer = d->__next;
- m = runtime_m ();
- if (m != NULL && m->mcache != NULL && d->__free)
- __go_free (d);
+ if (runtime_m () != NULL)
+ runtime_freedefer (d);
/* We are returning from this function. */
*frame = 1;
diff --git a/libgo/runtime/go-varargs.c b/libgo/runtime/go-varargs.c
index 682c08d..705f55e 100644
--- a/libgo/runtime/go-varargs.c
+++ b/libgo/runtime/go-varargs.c
@@ -26,6 +26,12 @@ __go_fcntl (int fd, int cmd, int arg)
return fcntl (fd, cmd, arg);
}
+int
+__go_fcntl_flock (int fd, int cmd, struct flock *arg)
+{
+ return fcntl (fd, cmd, arg);
+}
+
#ifdef HAVE_OPEN64
int
diff --git a/libgo/runtime/lock_futex.c b/libgo/runtime/lock_futex.c
index fa27013..33ef073 100644
--- a/libgo/runtime/lock_futex.c
+++ b/libgo/runtime/lock_futex.c
@@ -124,26 +124,36 @@ runtime_notewakeup(Note *n)
void
runtime_notesleep(Note *n)
{
+ M *m = runtime_m();
+
/* For gccgo it's OK to sleep in non-g0, and it happens in
stoptheworld because we have not implemented preemption.
if(runtime_g() != runtime_m()->g0)
runtime_throw("notesleep not on g0");
*/
- while(runtime_atomicload((uint32*)&n->key) == 0)
+ while(runtime_atomicload((uint32*)&n->key) == 0) {
+ m->blocked = true;
runtime_futexsleep((uint32*)&n->key, 0, -1);
+ m->blocked = false;
+ }
}
static bool
notetsleep(Note *n, int64 ns, int64 deadline, int64 now)
{
+ M *m = runtime_m();
+
// Conceptually, deadline and now are local variables.
// They are passed as arguments so that the space for them
// does not count against our nosplit stack sequence.
if(ns < 0) {
- while(runtime_atomicload((uint32*)&n->key) == 0)
+ while(runtime_atomicload((uint32*)&n->key) == 0) {
+ m->blocked = true;
runtime_futexsleep((uint32*)&n->key, 0, -1);
+ m->blocked = false;
+ }
return true;
}
@@ -152,7 +162,9 @@ notetsleep(Note *n, int64 ns, int64 deadline, int64 now)
deadline = runtime_nanotime() + ns;
for(;;) {
+ m->blocked = true;
runtime_futexsleep((uint32*)&n->key, 0, ns);
+ m->blocked = false;
if(runtime_atomicload((uint32*)&n->key) != 0)
break;
now = runtime_nanotime();
diff --git a/libgo/runtime/lock_sema.c b/libgo/runtime/lock_sema.c
index 000b9fc..d0d551d 100644
--- a/libgo/runtime/lock_sema.c
+++ b/libgo/runtime/lock_sema.c
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build darwin netbsd openbsd plan9 windows
+// +build darwin netbsd openbsd plan9 solaris windows
#include "runtime.h"
@@ -167,7 +167,9 @@ runtime_notesleep(Note *n)
return;
}
// Queued. Sleep.
+ m->blocked = true;
runtime_semasleep(-1);
+ m->blocked = false;
}
static bool
@@ -190,18 +192,23 @@ notetsleep(Note *n, int64 ns, int64 deadline, M *mp)
if(ns < 0) {
// Queued. Sleep.
+ m->blocked = true;
runtime_semasleep(-1);
+ m->blocked = false;
return true;
}
deadline = runtime_nanotime() + ns;
for(;;) {
// Registered. Sleep.
+ m->blocked = true;
if(runtime_semasleep(ns) >= 0) {
+ m->blocked = false;
// Acquired semaphore, semawakeup unregistered us.
// Done.
return true;
}
+ m->blocked = false;
// Interrupted or timed out. Still registered. Semaphore not acquired.
ns = deadline - runtime_nanotime();
@@ -223,8 +230,10 @@ notetsleep(Note *n, int64 ns, int64 deadline, M *mp)
} else if(mp == (M*)LOCKED) {
// Wakeup happened so semaphore is available.
// Grab it to avoid getting out of sync.
+ m->blocked = true;
if(runtime_semasleep(-1) < 0)
runtime_throw("runtime: unable to acquire - semaphore out of sync");
+ m->blocked = false;
return true;
} else
runtime_throw("runtime: unexpected waitm - semaphore out of sync");
diff --git a/libgo/runtime/malloc.goc b/libgo/runtime/malloc.goc
index 33d0c39..798d875 100644
--- a/libgo/runtime/malloc.goc
+++ b/libgo/runtime/malloc.goc
@@ -54,6 +54,7 @@ package runtime
// Mark mheap as 'no pointers', it does not contain interesting pointers but occupies ~45K.
MHeap runtime_mheap;
+MStats mstats;
int32 runtime_checking;
@@ -62,6 +63,9 @@ extern MStats mstats; // defined in zruntime_def_$GOOS_$GOARCH.go
extern volatile intgo runtime_MemProfileRate
__asm__ (GOSYM_PREFIX "runtime.MemProfileRate");
+static void* largealloc(uint32, uintptr*);
+static void profilealloc(void *v, uintptr size, uintptr typ);
+
// Allocate an object of at least size bytes.
// Small objects are allocated from the per-thread cache's free lists.
// Large objects (> 32 kB) are allocated straight from the heap.
@@ -72,12 +76,12 @@ runtime_mallocgc(uintptr size, uintptr typ, uint32 flag)
M *m;
G *g;
int32 sizeclass;
+ uintptr tinysize, size1;
intgo rate;
MCache *c;
MCacheList *l;
- uintptr npages;
- MSpan *s;
- MLink *v;
+ MLink *v, *next;
+ byte *tiny;
bool incallback;
if(size == 0) {
@@ -119,6 +123,81 @@ runtime_mallocgc(uintptr size, uintptr typ, uint32 flag)
c = m->mcache;
if(!runtime_debug.efence && size <= MaxSmallSize) {
+ if((flag&(FlagNoScan|FlagNoGC)) == FlagNoScan && size < TinySize) {
+ // Tiny allocator.
+ //
+ // Tiny allocator combines several tiny allocation requests
+ // into a single memory block. The resulting memory block
+ // is freed when all subobjects are unreachable. The subobjects
+ // must be FlagNoScan (don't have pointers), this ensures that
+ // the amount of potentially wasted memory is bounded.
+ //
+ // Size of the memory block used for combining (TinySize) is tunable.
+ // Current setting is 16 bytes, which relates to 2x worst case memory
+ // wastage (when all but one subobjects are unreachable).
+ // 8 bytes would result in no wastage at all, but provides less
+ // opportunities for combining.
+ // 32 bytes provides more opportunities for combining,
+ // but can lead to 4x worst case wastage.
+ // The best case winning is 8x regardless of block size.
+ //
+ // Objects obtained from tiny allocator must not be freed explicitly.
+ // So when an object will be freed explicitly, we ensure that
+ // its size >= TinySize.
+ //
+ // SetFinalizer has a special case for objects potentially coming
+ // from tiny allocator, it such case it allows to set finalizers
+ // for an inner byte of a memory block.
+ //
+ // The main targets of tiny allocator are small strings and
+ // standalone escaping variables. On a json benchmark
+ // the allocator reduces number of allocations by ~12% and
+ // reduces heap size by ~20%.
+
+ tinysize = c->tinysize;
+ if(size <= tinysize) {
+ tiny = c->tiny;
+ // Align tiny pointer for required (conservative) alignment.
+ if((size&7) == 0)
+ tiny = (byte*)ROUND((uintptr)tiny, 8);
+ else if((size&3) == 0)
+ tiny = (byte*)ROUND((uintptr)tiny, 4);
+ else if((size&1) == 0)
+ tiny = (byte*)ROUND((uintptr)tiny, 2);
+ size1 = size + (tiny - c->tiny);
+ if(size1 <= tinysize) {
+ // The object fits into existing tiny block.
+ v = (MLink*)tiny;
+ c->tiny += size1;
+ c->tinysize -= size1;
+ m->mallocing = 0;
+ m->locks--;
+ if(incallback)
+ runtime_entersyscall();
+ return v;
+ }
+ }
+ // Allocate a new TinySize block.
+ l = &c->list[TinySizeClass];
+ if(l->list == nil)
+ runtime_MCache_Refill(c, TinySizeClass);
+ v = l->list;
+ next = v->next;
+ if(next != nil) // prefetching nil leads to a DTLB miss
+ PREFETCH(next);
+ l->list = next;
+ l->nlist--;
+ ((uint64*)v)[0] = 0;
+ ((uint64*)v)[1] = 0;
+ // See if we need to replace the existing tiny block with the new one
+ // based on amount of remaining free space.
+ if(TinySize-size > tinysize) {
+ c->tiny = (byte*)v + size;
+ c->tinysize = TinySize - size;
+ }
+ size = TinySize;
+ goto done;
+ }
// Allocate from mcache free lists.
// Inlined version of SizeToClass().
if(size <= 1024-8)
@@ -130,31 +209,22 @@ runtime_mallocgc(uintptr size, uintptr typ, uint32 flag)
if(l->list == nil)
runtime_MCache_Refill(c, sizeclass);
v = l->list;
- l->list = v->next;
+ next = v->next;
+ if(next != nil) // prefetching nil leads to a DTLB miss
+ PREFETCH(next);
+ l->list = next;
l->nlist--;
if(!(flag & FlagNoZero)) {
v->next = nil;
// block is zeroed iff second word is zero ...
- if(size > sizeof(uintptr) && ((uintptr*)v)[1] != 0)
+ if(size > 2*sizeof(uintptr) && ((uintptr*)v)[1] != 0)
runtime_memclr((byte*)v, size);
}
+ done:
c->local_cachealloc += size;
} else {
- // TODO(rsc): Report tracebacks for very large allocations.
-
// Allocate directly from heap.
- npages = size >> PageShift;
- if((size & PageMask) != 0)
- npages++;
- s = runtime_MHeap_Alloc(&runtime_mheap, npages, 0, 1, !(flag & FlagNoZero));
- if(s == nil)
- runtime_throw("out of memory");
- s->limit = (byte*)(s->start<<PageShift) + size;
- size = npages<<PageShift;
- v = (void*)(s->start << PageShift);
-
- // setup for mark sweep
- runtime_markspan(v, 0, 0, true);
+ v = largealloc(flag, &size);
}
if(flag & FlagNoGC)
@@ -180,40 +250,83 @@ runtime_mallocgc(uintptr size, uintptr typ, uint32 flag)
m->mallocing = 0;
if(UseSpanType && !(flag & FlagNoScan) && typ != 0 && m->settype_bufsize == nelem(m->settype_buf))
runtime_settype_flush(m);
- m->locks--;
+ if(raceenabled)
+ runtime_racemalloc(v, size);
if(runtime_debug.allocfreetrace)
goto profile;
if(!(flag & FlagNoProfiling) && (rate = runtime_MemProfileRate) > 0) {
- if(size >= (uint32) rate)
- goto profile;
- if((uint32) m->mcache->next_sample > size)
- m->mcache->next_sample -= size;
+ if(size < (uintptr)rate && size < (uintptr)(uint32)c->next_sample)
+ c->next_sample -= size;
else {
- // pick next profile time
- // If you change this, also change allocmcache.
- if(rate > 0x3fffffff) // make 2*rate not overflow
- rate = 0x3fffffff;
- m->mcache->next_sample = runtime_fastrand1() % (2*rate);
profile:
- runtime_setblockspecial(v, true);
- runtime_MProf_Malloc(v, size, typ);
+ profilealloc(v, size, typ);
}
}
+ m->locks--;
+
if(!(flag & FlagNoInvokeGC) && mstats.heap_alloc >= mstats.next_gc)
runtime_gc(0);
- if(raceenabled)
- runtime_racemalloc(v, size);
-
if(incallback)
runtime_entersyscall();
return v;
}
+static void*
+largealloc(uint32 flag, uintptr *sizep)
+{
+ uintptr npages, size;
+ MSpan *s;
+ void *v;
+
+ // Allocate directly from heap.
+ size = *sizep;
+ if(size + PageSize < size)
+ runtime_throw("out of memory");
+ npages = size >> PageShift;
+ if((size & PageMask) != 0)
+ npages++;
+ s = runtime_MHeap_Alloc(&runtime_mheap, npages, 0, 1, !(flag & FlagNoZero));
+ if(s == nil)
+ runtime_throw("out of memory");
+ s->limit = (byte*)(s->start<<PageShift) + size;
+ *sizep = npages<<PageShift;
+ v = (void*)(s->start << PageShift);
+ // setup for mark sweep
+ runtime_markspan(v, 0, 0, true);
+ return v;
+}
+
+static void
+profilealloc(void *v, uintptr size, uintptr typ)
+{
+ uintptr rate;
+ int32 next;
+ MCache *c;
+
+ c = runtime_m()->mcache;
+ rate = runtime_MemProfileRate;
+ if(size < rate) {
+ // pick next profile time
+ // If you change this, also change allocmcache.
+ if(rate > 0x3fffffff) // make 2*rate not overflow
+ rate = 0x3fffffff;
+ next = runtime_fastrand1() % (2*rate);
+ // Subtract the "remainder" of the current allocation.
+ // Otherwise objects that are close in size to sampling rate
+ // will be under-sampled, because we consistently discard this remainder.
+ next -= (size - c->next_sample);
+ if(next < 0)
+ next = 0;
+ c->next_sample = next;
+ }
+ runtime_MProf_Malloc(v, size, typ);
+}
+
void*
__go_alloc(uintptr size)
{
@@ -228,7 +341,6 @@ __go_free(void *v)
int32 sizeclass;
MSpan *s;
MCache *c;
- uint32 prof;
uintptr size;
if(v == nil)
@@ -246,18 +358,27 @@ __go_free(void *v)
runtime_printf("free %p: not an allocated block\n", v);
runtime_throw("free runtime_mlookup");
}
- prof = runtime_blockspecial(v);
+ size = s->elemsize;
+ sizeclass = s->sizeclass;
+ // Objects that are smaller than TinySize can be allocated using tiny alloc,
+ // if then such object is combined with an object with finalizer, we will crash.
+ if(size < TinySize)
+ runtime_throw("freeing too small block");
if(raceenabled)
runtime_racefree(v);
- // Find size class for v.
- sizeclass = s->sizeclass;
+ // Ensure that the span is swept.
+ // If we free into an unswept span, we will corrupt GC bitmaps.
+ runtime_MSpan_EnsureSwept(s);
+
+ if(s->specials != nil)
+ runtime_freeallspecials(s, v, size);
+
c = m->mcache;
if(sizeclass == 0) {
// Large object.
- size = s->npages<<PageShift;
- *(uintptr*)(s->start<<PageShift) = (uintptr)0xfeedfeedfeedfeedll; // mark as "needs to be zeroed"
+ s->needzero = 1;
// Must mark v freed before calling unmarkspan and MHeap_Free:
// they might coalesce v into other spans and change the bitmap further.
runtime_markfreed(v, size);
@@ -270,9 +391,10 @@ __go_free(void *v)
c->local_largefree += size;
} else {
// Small object.
- size = runtime_class_to_size[sizeclass];
- if(size > sizeof(uintptr))
+ if(size > 2*sizeof(uintptr))
((uintptr*)v)[1] = (uintptr)0xfeedfeedfeedfeedll; // mark as "needs to be zeroed"
+ else if(size > sizeof(uintptr))
+ ((uintptr*)v)[1] = 0;
// Must mark v freed before calling MCache_Free:
// it might coalesce v and other blocks into a bigger span
// and change the bitmap further.
@@ -280,8 +402,6 @@ __go_free(void *v)
c->local_nsmallfree[sizeclass]++;
runtime_MCache_Free(c, v, sizeclass, size);
}
- if(prof)
- runtime_MProf_Free(v, size);
m->mallocing = 0;
}
@@ -392,6 +512,12 @@ runtime_purgecachedstats(MCache *c)
extern uintptr runtime_sizeof_C_MStats
__asm__ (GOSYM_PREFIX "runtime.Sizeof_C_MStats");
+// Size of the trailing by_size array differs between Go and C,
+// NumSizeClasses was changed, but we can not change Go struct because of backward compatibility.
+// sizeof_C_MStats is what C thinks about size of Go struct.
+
+// Initialized in mallocinit because it's defined in go/runtime/mem.go.
+
#define MaxArena32 (2U<<30)
void
@@ -400,11 +526,10 @@ runtime_mallocinit(void)
byte *p;
uintptr arena_size, bitmap_size, spans_size;
extern byte _end[];
- byte *want;
uintptr limit;
uint64 i;
- runtime_sizeof_C_MStats = sizeof(MStats);
+ runtime_sizeof_C_MStats = sizeof(MStats) - (NumSizeClasses - 61) * sizeof(mstats.by_size[0]);
p = nil;
arena_size = 0;
@@ -419,6 +544,9 @@ runtime_mallocinit(void)
runtime_InitSizes();
+ if(runtime_class_to_size[TinySizeClass] != TinySize)
+ runtime_throw("bad TinySizeClass");
+
// limit = runtime_memlimit();
// See https://code.google.com/p/go/issues/detail?id=5049
// TODO(rsc): Fix after 1.1.
@@ -457,7 +585,7 @@ runtime_mallocinit(void)
spans_size = arena_size / PageSize * sizeof(runtime_mheap.spans[0]);
spans_size = ROUND(spans_size, PageSize);
for(i = 0; i < HeapBaseOptions; i++) {
- p = runtime_SysReserve(HeapBase(i), bitmap_size + spans_size + arena_size);
+ p = runtime_SysReserve(HeapBase(i), bitmap_size + spans_size + arena_size + PageSize);
if(p != nil)
break;
}
@@ -499,18 +627,16 @@ runtime_mallocinit(void)
// So adjust it upward a little bit ourselves: 1/4 MB to get
// away from the running binary image and then round up
// to a MB boundary.
- want = (byte*)ROUND((uintptr)_end + (1<<18), 1<<20);
- if(0xffffffff - (uintptr)want <= bitmap_size + spans_size + arena_size)
- want = 0;
- p = runtime_SysReserve(want, bitmap_size + spans_size + arena_size);
+ p = (byte*)ROUND((uintptr)_end + (1<<18), 1<<20);
+ p = runtime_SysReserve(p, bitmap_size + spans_size + arena_size + PageSize);
if(p == nil)
runtime_throw("runtime: cannot reserve arena virtual address space");
- if((uintptr)p & (((uintptr)1<<PageShift)-1))
- runtime_printf("runtime: SysReserve returned unaligned address %p; asked for %p", p,
- bitmap_size+spans_size+arena_size);
}
- if((uintptr)p & (((uintptr)1<<PageShift)-1))
- runtime_throw("runtime: SysReserve returned unaligned address");
+
+ // PageSize can be larger than OS definition of page size,
+ // so SysReserve can give us a PageSize-unaligned pointer.
+ // To overcome this we ask for PageSize more and round up the pointer.
+ p = (byte*)ROUND((uintptr)p, PageSize);
runtime_mheap.spans = (MSpan**)p;
runtime_mheap.bitmap = p + spans_size;
@@ -523,7 +649,7 @@ runtime_mallocinit(void)
runtime_m()->mcache = runtime_allocmcache();
// See if it works.
- runtime_free(runtime_malloc(1));
+ runtime_free(runtime_malloc(TinySize));
}
void*
@@ -828,16 +954,18 @@ func SetFinalizer(obj Eface, finalizer Eface) {
goto throw;
}
ot = (const PtrType*)obj.type;
- if(ot->__element_type != nil && ot->__element_type->__size == 0) {
+ // As an implementation detail we do not run finalizers for zero-sized objects,
+ // because we use &runtime_zerobase for all such allocations.
+ if(ot->__element_type != nil && ot->__element_type->__size == 0)
return;
- }
if(!runtime_mlookup(obj.__object, &base, &size, nil) || obj.__object != base) {
- runtime_printf("runtime.SetFinalizer: pointer not at beginning of allocated block\n");
- goto throw;
+ // As an implementation detail we allow to set finalizers for an inner byte
+ // of an object if it could come from tiny alloc (see mallocgc for details).
+ if(ot->__element_type == nil || (ot->__element_type->__code&GO_NO_POINTERS) == 0 || ot->__element_type->__size >= TinySize) {
+ runtime_printf("runtime.SetFinalizer: pointer not at beginning of allocated block\n");
+ goto throw;
+ }
}
- ft = nil;
- ot = (const PtrType*)obj.__type_descriptor;
- fint = nil;
if(finalizer.__type_descriptor != nil) {
if(finalizer.__type_descriptor->__code != GO_FUNC)
goto badfunc;
@@ -856,11 +984,15 @@ func SetFinalizer(obj Eface, finalizer Eface) {
// ok - satisfies non-empty interface
} else
goto badfunc;
- }
- if(!runtime_addfinalizer(obj.__object, finalizer.__type_descriptor != nil ? *(void**)finalizer.__object : nil, ft, ot)) {
- runtime_printf("runtime.SetFinalizer: finalizer already set\n");
- goto throw;
+ ot = (const PtrType*)obj.__type_descriptor;
+ if(!runtime_addfinalizer(obj.__object, *(FuncVal**)finalizer.__object, ft, ot)) {
+ runtime_printf("runtime.SetFinalizer: finalizer already set\n");
+ goto throw;
+ }
+ } else {
+ // NOTE: asking to remove a finalizer when there currently isn't one set is OK.
+ runtime_removefinalizer(obj.__object);
}
return;
diff --git a/libgo/runtime/malloc.h b/libgo/runtime/malloc.h
index 16f51a5..b5dc5a4 100644
--- a/libgo/runtime/malloc.h
+++ b/libgo/runtime/malloc.h
@@ -66,14 +66,14 @@
//
// The small objects on the MCache and MCentral free lists
// may or may not be zeroed. They are zeroed if and only if
-// the second word of the object is zero. The spans in the
-// page heap are always zeroed. When a span full of objects
-// is returned to the page heap, the objects that need to be
-// are zeroed first. There are two main benefits to delaying the
+// the second word of the object is zero. A span in the
+// page heap is zeroed unless s->needzero is set. When a span
+// is allocated to break into small objects, it is zeroed if needed
+// and s->needzero is set. There are two main benefits to delaying the
// zeroing this way:
//
// 1. stack frames allocated from the small object lists
-// can avoid zeroing altogether.
+// or the page heap can avoid zeroing altogether.
// 2. the cost of zeroing when reusing a small object is
// charged to the mutator, not the garbage collector.
//
@@ -90,7 +90,7 @@ typedef struct GCStats GCStats;
enum
{
- PageShift = 12,
+ PageShift = 13,
PageSize = 1<<PageShift,
PageMask = PageSize - 1,
};
@@ -103,11 +103,15 @@ enum
// size classes. NumSizeClasses is that number. It's needed here
// because there are static arrays of this length; when msize runs its
// size choosing algorithm it double-checks that NumSizeClasses agrees.
- NumSizeClasses = 61,
+ NumSizeClasses = 67,
// Tunable constants.
MaxSmallSize = 32<<10,
+ // Tiny allocator parameters, see "Tiny allocator" comment in malloc.goc.
+ TinySize = 16,
+ TinySizeClass = 2,
+
FixAllocChunk = 16<<10, // Chunk size for FixAlloc
MaxMHeapList = 1<<(20 - PageShift), // Maximum page length for fixed-size list in MHeap.
HeapAllocChunk = 1<<20, // Chunk size for heap growth
@@ -256,7 +260,7 @@ struct MStats
};
extern MStats mstats
- __asm__ (GOSYM_PREFIX "runtime.VmemStats");
+ __asm__ (GOSYM_PREFIX "runtime.memStats");
// Size classes. Computed and initialized by InitSizes.
//
@@ -269,6 +273,7 @@ extern MStats mstats
// making new objects in class i
int32 runtime_SizeToClass(int32);
+uintptr runtime_roundupsize(uintptr);
extern int32 runtime_class_to_size[NumSizeClasses];
extern int32 runtime_class_to_allocnpages[NumSizeClasses];
extern int8 runtime_size_to_class8[1024/8 + 1];
@@ -291,6 +296,10 @@ struct MCache
// so they are grouped here for better caching.
int32 next_sample; // trigger heap sample after allocating this many bytes
intptr local_cachealloc; // bytes allocated (or freed) from cache since last lock of heap
+ // Allocator cache for tiny objects w/o pointers.
+ // See "Tiny allocator" comment in malloc.goc.
+ byte* tiny;
+ uintptr tinysize;
// The rest is not accessed on every malloc.
MCacheList list[NumSizeClasses];
// Local allocator stats, flushed during GC.
@@ -341,6 +350,43 @@ struct MTypes
uintptr data;
};
+enum
+{
+ KindSpecialFinalizer = 1,
+ KindSpecialProfile = 2,
+ // Note: The finalizer special must be first because if we're freeing
+ // an object, a finalizer special will cause the freeing operation
+ // to abort, and we want to keep the other special records around
+ // if that happens.
+};
+
+typedef struct Special Special;
+struct Special
+{
+ Special* next; // linked list in span
+ uint16 offset; // span offset of object
+ byte kind; // kind of Special
+};
+
+// The described object has a finalizer set for it.
+typedef struct SpecialFinalizer SpecialFinalizer;
+struct SpecialFinalizer
+{
+ Special;
+ FuncVal* fn;
+ const FuncType* ft;
+ const PtrType* ot;
+};
+
+// The described object is being heap profiled.
+typedef struct Bucket Bucket; // from mprof.goc
+typedef struct SpecialProfile SpecialProfile;
+struct SpecialProfile
+{
+ Special;
+ Bucket* b;
+};
+
// An MSpan is a run of pages.
enum
{
@@ -356,17 +402,28 @@ struct MSpan
PageID start; // starting page number
uintptr npages; // number of pages in span
MLink *freelist; // list of free objects
- uint32 ref; // number of allocated objects in this span
- int32 sizeclass; // size class
+ // sweep generation:
+ // if sweepgen == h->sweepgen - 2, the span needs sweeping
+ // if sweepgen == h->sweepgen - 1, the span is currently being swept
+ // if sweepgen == h->sweepgen, the span is swept and ready to use
+ // h->sweepgen is incremented by 2 after every GC
+ uint32 sweepgen;
+ uint16 ref; // number of allocated objects in this span
+ uint8 sizeclass; // size class
+ uint8 state; // MSpanInUse etc
+ uint8 needzero; // needs to be zeroed before allocation
uintptr elemsize; // computed from sizeclass or from npages
- uint32 state; // MSpanInUse etc
int64 unusedsince; // First time spotted by GC in MSpanFree state
uintptr npreleased; // number of pages released to the OS
byte *limit; // end of data in span
MTypes types; // types of allocated objects in this span
+ Lock specialLock; // TODO: use to protect types also (instead of settype_lock)
+ Special *specials; // linked list of special records sorted by offset.
};
void runtime_MSpan_Init(MSpan *span, PageID start, uintptr npages);
+void runtime_MSpan_EnsureSwept(MSpan *span);
+bool runtime_MSpan_Sweep(MSpan *span);
// Every MSpan is in one doubly-linked list,
// either one of the MHeap's free lists or one of the
@@ -374,6 +431,7 @@ void runtime_MSpan_Init(MSpan *span, PageID start, uintptr npages);
void runtime_MSpanList_Init(MSpan *list);
bool runtime_MSpanList_IsEmpty(MSpan *list);
void runtime_MSpanList_Insert(MSpan *list, MSpan *span);
+void runtime_MSpanList_InsertBack(MSpan *list, MSpan *span);
void runtime_MSpanList_Remove(MSpan *span); // from whatever list it is in
@@ -390,7 +448,7 @@ struct MCentral
void runtime_MCentral_Init(MCentral *c, int32 sizeclass);
int32 runtime_MCentral_AllocList(MCentral *c, MLink **first);
void runtime_MCentral_FreeList(MCentral *c, MLink *first);
-void runtime_MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end);
+bool runtime_MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end);
// Main malloc heap.
// The heap itself is the "free[]" and "large" arrays,
@@ -399,10 +457,15 @@ struct MHeap
{
Lock;
MSpan free[MaxMHeapList]; // free lists of given length
- MSpan large; // free lists length >= MaxMHeapList
- MSpan **allspans;
+ MSpan freelarge; // free lists length >= MaxMHeapList
+ MSpan busy[MaxMHeapList]; // busy lists of large objects of given length
+ MSpan busylarge; // busy lists of large objects length >= MaxMHeapList
+ MSpan **allspans; // all spans out there
+ MSpan **sweepspans; // copy of allspans referenced by sweeper
uint32 nspan;
uint32 nspancap;
+ uint32 sweepgen; // sweep generation, see comment in MSpan
+ uint32 sweepdone; // all spans are swept
// span lookup
MSpan** spans;
@@ -426,6 +489,9 @@ struct MHeap
FixAlloc spanalloc; // allocator for Span*
FixAlloc cachealloc; // allocator for MCache*
+ FixAlloc specialfinalizeralloc; // allocator for SpecialFinalizer*
+ FixAlloc specialprofilealloc; // allocator for SpecialProfile*
+ Lock speciallock; // lock for sepcial record allocators.
// Malloc stats.
uint64 largefree; // bytes freed for large objects (>MaxSmallSize)
@@ -435,7 +501,7 @@ struct MHeap
extern MHeap runtime_mheap;
void runtime_MHeap_Init(MHeap *h);
-MSpan* runtime_MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, int32 acct, int32 zeroed);
+MSpan* runtime_MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, bool large, bool needzero);
void runtime_MHeap_Free(MHeap *h, MSpan *s, int32 acct);
MSpan* runtime_MHeap_Lookup(MHeap *h, void *v);
MSpan* runtime_MHeap_LookupMaybe(MHeap *h, void *v);
@@ -449,6 +515,7 @@ void* runtime_mallocgc(uintptr size, uintptr typ, uint32 flag);
void* runtime_persistentalloc(uintptr size, uintptr align, uint64 *stat);
int32 runtime_mlookup(void *v, byte **base, uintptr *size, MSpan **s);
void runtime_gc(int32 force);
+uintptr runtime_sweepone(void);
void runtime_markscan(void *v);
void runtime_marknogc(void *v);
void runtime_checkallocated(void *v, uintptr n);
@@ -457,8 +524,6 @@ void runtime_checkfreed(void *v, uintptr n);
extern int32 runtime_checking;
void runtime_markspan(void *v, uintptr size, uintptr n, bool leftover);
void runtime_unmarkspan(void *v, uintptr size);
-bool runtime_blockspecial(void*);
-void runtime_setblockspecial(void*, bool);
void runtime_purgecachedstats(MCache*);
void* runtime_cnew(const Type*);
void* runtime_cnewarray(const Type*, intgo);
@@ -486,17 +551,25 @@ struct Obj
};
void runtime_MProf_Malloc(void*, uintptr, uintptr);
-void runtime_MProf_Free(void*, uintptr);
+void runtime_MProf_Free(Bucket*, void*, uintptr, bool);
void runtime_MProf_GC(void);
-void runtime_MProf_Mark(void (*addroot)(Obj));
+void runtime_MProf_TraceGC(void);
+struct Workbuf;
+void runtime_MProf_Mark(struct Workbuf**, void (*)(struct Workbuf**, Obj));
int32 runtime_gcprocs(void);
void runtime_helpgc(int32 nproc);
void runtime_gchelper(void);
+void runtime_setprofilebucket(void *p, Bucket *b);
+
struct __go_func_type;
struct __go_ptr_type;
-bool runtime_getfinalizer(void *p, bool del, FuncVal **fn, const struct __go_func_type **ft, const struct __go_ptr_type **ot);
-void runtime_walkfintab(void (*fn)(void*), void (*scan)(Obj));
+bool runtime_addfinalizer(void *p, FuncVal *fn, const struct __go_func_type*, const struct __go_ptr_type*);
+void runtime_removefinalizer(void*);
+void runtime_queuefinalizer(void *p, FuncVal *fn, const struct __go_func_type *ft, const struct __go_ptr_type *ot);
+
+void runtime_freeallspecials(MSpan *span, void *p, uintptr size);
+bool runtime_freespecial(Special *s, void *p, uintptr size, bool freed);
enum
{
@@ -514,6 +587,6 @@ void runtime_gc_itab_ptr(Eface*);
void runtime_memorydump(void);
-void runtime_proc_scan(void (*)(Obj));
-void runtime_time_scan(void (*)(Obj));
-void runtime_netpoll_scan(void (*)(Obj));
+void runtime_proc_scan(struct Workbuf**, void (*)(struct Workbuf**, Obj));
+void runtime_time_scan(struct Workbuf**, void (*)(struct Workbuf**, Obj));
+void runtime_netpoll_scan(struct Workbuf**, void (*)(struct Workbuf**, Obj));
diff --git a/libgo/runtime/mcentral.c b/libgo/runtime/mcentral.c
index 8191610..1285336 100644
--- a/libgo/runtime/mcentral.c
+++ b/libgo/runtime/mcentral.c
@@ -39,17 +39,58 @@ runtime_MCentral_AllocList(MCentral *c, MLink **pfirst)
{
MSpan *s;
int32 cap, n;
+ uint32 sg;
runtime_lock(c);
- // Replenish central list if empty.
- if(runtime_MSpanList_IsEmpty(&c->nonempty)) {
- if(!MCentral_Grow(c)) {
+ sg = runtime_mheap.sweepgen;
+retry:
+ for(s = c->nonempty.next; s != &c->nonempty; s = s->next) {
+ if(s->sweepgen == sg-2 && runtime_cas(&s->sweepgen, sg-2, sg-1)) {
runtime_unlock(c);
- *pfirst = nil;
- return 0;
+ runtime_MSpan_Sweep(s);
+ runtime_lock(c);
+ // the span could have been moved to heap, retry
+ goto retry;
+ }
+ if(s->sweepgen == sg-1) {
+ // the span is being swept by background sweeper, skip
+ continue;
+ }
+ // we have a nonempty span that does not require sweeping, allocate from it
+ goto havespan;
+ }
+
+ for(s = c->empty.next; s != &c->empty; s = s->next) {
+ if(s->sweepgen == sg-2 && runtime_cas(&s->sweepgen, sg-2, sg-1)) {
+ // we have an empty span that requires sweeping,
+ // sweep it and see if we can free some space in it
+ runtime_MSpanList_Remove(s);
+ // swept spans are at the end of the list
+ runtime_MSpanList_InsertBack(&c->empty, s);
+ runtime_unlock(c);
+ runtime_MSpan_Sweep(s);
+ runtime_lock(c);
+ // the span could be moved to nonempty or heap, retry
+ goto retry;
+ }
+ if(s->sweepgen == sg-1) {
+ // the span is being swept by background sweeper, skip
+ continue;
}
+ // already swept empty span,
+ // all subsequent ones must also be either swept or in process of sweeping
+ break;
+ }
+
+ // Replenish central list if empty.
+ if(!MCentral_Grow(c)) {
+ runtime_unlock(c);
+ *pfirst = nil;
+ return 0;
}
s = c->nonempty.next;
+
+havespan:
cap = (s->npages << PageShift) / s->elemsize;
n = cap - s->ref;
*pfirst = s->freelist;
@@ -57,7 +98,7 @@ runtime_MCentral_AllocList(MCentral *c, MLink **pfirst)
s->ref += n;
c->nfree -= n;
runtime_MSpanList_Remove(s);
- runtime_MSpanList_Insert(&c->empty, s);
+ runtime_MSpanList_InsertBack(&c->empty, s);
runtime_unlock(c);
return n;
}
@@ -106,7 +147,7 @@ MCentral_Free(MCentral *c, void *v)
size = runtime_class_to_size[c->sizeclass];
runtime_MSpanList_Remove(s);
runtime_unmarkspan((byte*)(s->start<<PageShift), s->npages<<PageShift);
- *(uintptr*)(s->start<<PageShift) = 1; // needs zeroing
+ s->needzero = 1;
s->freelist = nil;
c->nfree -= (s->npages << PageShift) / size;
runtime_unlock(c);
@@ -116,8 +157,9 @@ MCentral_Free(MCentral *c, void *v)
}
// Free n objects from a span s back into the central free list c.
-// Called from GC.
-void
+// Called during sweep.
+// Returns true if the span was returned to heap.
+bool
runtime_MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end)
{
int32 size;
@@ -136,19 +178,21 @@ runtime_MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *e
s->ref -= n;
c->nfree += n;
- // If s is completely freed, return it to the heap.
- if(s->ref == 0) {
- size = runtime_class_to_size[c->sizeclass];
- runtime_MSpanList_Remove(s);
- *(uintptr*)(s->start<<PageShift) = 1; // needs zeroing
- s->freelist = nil;
- c->nfree -= (s->npages << PageShift) / size;
- runtime_unlock(c);
- runtime_unmarkspan((byte*)(s->start<<PageShift), s->npages<<PageShift);
- runtime_MHeap_Free(&runtime_mheap, s, 0);
- } else {
+ if(s->ref != 0) {
runtime_unlock(c);
+ return false;
}
+
+ // s is completely freed, return it to the heap.
+ size = runtime_class_to_size[c->sizeclass];
+ runtime_MSpanList_Remove(s);
+ s->needzero = 1;
+ s->freelist = nil;
+ c->nfree -= (s->npages << PageShift) / size;
+ runtime_unlock(c);
+ runtime_unmarkspan((byte*)(s->start<<PageShift), s->npages<<PageShift);
+ runtime_MHeap_Free(&runtime_mheap, s, 0);
+ return true;
}
void
diff --git a/libgo/runtime/mfinal.c b/libgo/runtime/mfinal.c
deleted file mode 100644
index 625af52..0000000
--- a/libgo/runtime/mfinal.c
+++ /dev/null
@@ -1,218 +0,0 @@
-// Copyright 2010 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "runtime.h"
-#include "arch.h"
-#include "malloc.h"
-#include "go-type.h"
-
-enum { debug = 0 };
-
-typedef struct Fin Fin;
-struct Fin
-{
- FuncVal *fn;
- const struct __go_func_type *ft;
- const struct __go_ptr_type *ot;
-};
-
-// Finalizer hash table. Direct hash, linear scan, at most 3/4 full.
-// Table size is power of 3 so that hash can be key % max.
-// Key[i] == (void*)-1 denotes free but formerly occupied entry
-// (doesn't stop the linear scan).
-// Key and val are separate tables because the garbage collector
-// must be instructed to ignore the pointers in key but follow the
-// pointers in val.
-typedef struct Fintab Fintab;
-struct Fintab
-{
- Lock;
- void **fkey;
- Fin *val;
- int32 nkey; // number of non-nil entries in key
- int32 ndead; // number of dead (-1) entries in key
- int32 max; // size of key, val allocations
-};
-
-#define TABSZ 17
-#define TAB(p) (&fintab[((uintptr)(p)>>3)%TABSZ])
-
-static struct {
- Fintab;
- uint8 pad[0 /* CacheLineSize - sizeof(Fintab) */];
-} fintab[TABSZ];
-
-static void
-addfintab(Fintab *t, void *k, FuncVal *fn, const struct __go_func_type *ft, const struct __go_ptr_type *ot)
-{
- int32 i, j;
-
- i = (uintptr)k % (uintptr)t->max;
- for(j=0; j<t->max; j++) {
- if(t->fkey[i] == nil) {
- t->nkey++;
- goto ret;
- }
- if(t->fkey[i] == (void*)-1) {
- t->ndead--;
- goto ret;
- }
- if(++i == t->max)
- i = 0;
- }
-
- // cannot happen - table is known to be non-full
- runtime_throw("finalizer table inconsistent");
-
-ret:
- t->fkey[i] = k;
- t->val[i].fn = fn;
- t->val[i].ft = ft;
- t->val[i].ot = ot;
-}
-
-static bool
-lookfintab(Fintab *t, void *k, bool del, Fin *f)
-{
- int32 i, j;
-
- if(t->max == 0)
- return false;
- i = (uintptr)k % (uintptr)t->max;
- for(j=0; j<t->max; j++) {
- if(t->fkey[i] == nil)
- return false;
- if(t->fkey[i] == k) {
- if(f)
- *f = t->val[i];
- if(del) {
- t->fkey[i] = (void*)-1;
- t->val[i].fn = nil;
- t->val[i].ft = nil;
- t->val[i].ot = nil;
- t->ndead++;
- }
- return true;
- }
- if(++i == t->max)
- i = 0;
- }
-
- // cannot happen - table is known to be non-full
- runtime_throw("finalizer table inconsistent");
- return false;
-}
-
-static void
-resizefintab(Fintab *tab)
-{
- Fintab newtab;
- void *k;
- int32 i;
-
- runtime_memclr((byte*)&newtab, sizeof newtab);
- newtab.max = tab->max;
- if(newtab.max == 0)
- newtab.max = 3*3*3;
- else if(tab->ndead < tab->nkey/2) {
- // grow table if not many dead values.
- // otherwise just rehash into table of same size.
- newtab.max *= 3;
- }
-
- newtab.fkey = runtime_mallocgc(newtab.max*sizeof newtab.fkey[0], 0, FlagNoInvokeGC|FlagNoScan);
- newtab.val = runtime_mallocgc(newtab.max*sizeof newtab.val[0], 0, FlagNoInvokeGC);
-
- for(i=0; i<tab->max; i++) {
- k = tab->fkey[i];
- if(k != nil && k != (void*)-1)
- addfintab(&newtab, k, tab->val[i].fn, tab->val[i].ft, tab->val[i].ot);
- }
-
- runtime_free(tab->fkey);
- runtime_free(tab->val);
-
- tab->fkey = newtab.fkey;
- tab->val = newtab.val;
- tab->nkey = newtab.nkey;
- tab->ndead = newtab.ndead;
- tab->max = newtab.max;
-}
-
-bool
-runtime_addfinalizer(void *p, FuncVal *f, const struct __go_func_type *ft, const struct __go_ptr_type *ot)
-{
- Fintab *tab;
- byte *base;
-
- if(debug) {
- if(!runtime_mlookup(p, &base, nil, nil) || p != base)
- runtime_throw("addfinalizer on invalid pointer");
- }
-
- tab = TAB(p);
- runtime_lock(tab);
- if(f == nil) {
- lookfintab(tab, p, true, nil);
- runtime_unlock(tab);
- return true;
- }
-
- if(lookfintab(tab, p, false, nil)) {
- runtime_unlock(tab);
- return false;
- }
-
- if(tab->nkey >= tab->max/2+tab->max/4) {
- // keep table at most 3/4 full:
- // allocate new table and rehash.
- resizefintab(tab);
- }
-
- addfintab(tab, p, f, ft, ot);
- runtime_setblockspecial(p, true);
- runtime_unlock(tab);
- return true;
-}
-
-// get finalizer; if del, delete finalizer.
-// caller is responsible for updating RefHasFinalizer (special) bit.
-bool
-runtime_getfinalizer(void *p, bool del, FuncVal **fn, const struct __go_func_type **ft, const struct __go_ptr_type **ot)
-{
- Fintab *tab;
- bool res;
- Fin f;
-
- tab = TAB(p);
- runtime_lock(tab);
- res = lookfintab(tab, p, del, &f);
- runtime_unlock(tab);
- if(res==false)
- return false;
- *fn = f.fn;
- *ft = f.ft;
- *ot = f.ot;
- return true;
-}
-
-void
-runtime_walkfintab(void (*fn)(void*), void (*addroot)(Obj))
-{
- void **key;
- void **ekey;
- int32 i;
-
- for(i=0; i<TABSZ; i++) {
- runtime_lock(&fintab[i]);
- key = fintab[i].fkey;
- ekey = key + fintab[i].max;
- for(; key < ekey; key++)
- if(*key != nil && *key != ((void*)-1))
- fn(*key);
- addroot((Obj){(byte*)&fintab[i].fkey, sizeof(void*), 0});
- addroot((Obj){(byte*)&fintab[i].val, sizeof(void*), 0});
- runtime_unlock(&fintab[i]);
- }
-}
diff --git a/libgo/runtime/mgc0.c b/libgo/runtime/mgc0.c
index d665d92..10dd412 100644
--- a/libgo/runtime/mgc0.c
+++ b/libgo/runtime/mgc0.c
@@ -2,7 +2,53 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// Garbage collector.
+// Garbage collector (GC).
+//
+// GC is:
+// - mark&sweep
+// - mostly precise (with the exception of some C-allocated objects, assembly frames/arguments, etc)
+// - parallel (up to MaxGcproc threads)
+// - partially concurrent (mark is stop-the-world, while sweep is concurrent)
+// - non-moving/non-compacting
+// - full (non-partial)
+//
+// GC rate.
+// Next GC is after we've allocated an extra amount of memory proportional to
+// the amount already in use. The proportion is controlled by GOGC environment variable
+// (100 by default). If GOGC=100 and we're using 4M, we'll GC again when we get to 8M
+// (this mark is tracked in next_gc variable). This keeps the GC cost in linear
+// proportion to the allocation cost. Adjusting GOGC just changes the linear constant
+// (and also the amount of extra memory used).
+//
+// Concurrent sweep.
+// The sweep phase proceeds concurrently with normal program execution.
+// The heap is swept span-by-span both lazily (when a goroutine needs another span)
+// and concurrently in a background goroutine (this helps programs that are not CPU bound).
+// However, at the end of the stop-the-world GC phase we don't know the size of the live heap,
+// and so next_gc calculation is tricky and happens as follows.
+// At the end of the stop-the-world phase next_gc is conservatively set based on total
+// heap size; all spans are marked as "needs sweeping".
+// Whenever a span is swept, next_gc is decremented by GOGC*newly_freed_memory.
+// The background sweeper goroutine simply sweeps spans one-by-one bringing next_gc
+// closer to the target value. However, this is not enough to avoid over-allocating memory.
+// Consider that a goroutine wants to allocate a new span for a large object and
+// there are no free swept spans, but there are small-object unswept spans.
+// If the goroutine naively allocates a new span, it can surpass the yet-unknown
+// target next_gc value. In order to prevent such cases (1) when a goroutine needs
+// to allocate a new small-object span, it sweeps small-object spans for the same
+// object size until it frees at least one object; (2) when a goroutine needs to
+// allocate large-object span from heap, it sweeps spans until it frees at least
+// that many pages into heap. Together these two measures ensure that we don't surpass
+// target next_gc value by a large margin. There is an exception: if a goroutine sweeps
+// and frees two nonadjacent one-page spans to the heap, it will allocate a new two-page span,
+// but there can still be other one-page unswept spans which could be combined into a two-page span.
+// It's critical to ensure that no operations proceed on unswept spans (that would corrupt
+// mark bits in GC bitmap). During GC all mcaches are flushed into the central cache,
+// so they are empty. When a goroutine grabs a new span into mcache, it sweeps it.
+// When a goroutine explicitly frees an object or sets a finalizer, it ensures that
+// the span is swept (either by sweeping it, or by waiting for the concurrent sweep to finish).
+// The finalizer goroutine is kicked off only when all spans are swept.
+// When the next GC starts, it sweeps all not-yet-swept spans (if any).
#include <unistd.h>
@@ -19,12 +65,9 @@
#define cap __capacity
// Iface aka __go_interface
#define tab __methods
-// Eface aka __go_empty_interface.
-#define type __type_descriptor
// Hmap aka __go_map
typedef struct __go_map Hmap;
// Type aka __go_type_descriptor
-#define kind __code
#define string __reflection
#define KindPtr GO_PTR
#define KindNoPointers GO_NO_POINTERS
@@ -43,15 +86,19 @@ extern void * __splitstack_find_context (void *context[10], size_t *, void **,
enum {
Debug = 0,
- DebugMark = 0, // run second pass to check mark
CollectStats = 0,
ScanStackByFrames = 1,
IgnorePreciseGC = 0,
+ ConcurrentSweep = 1,
// Four bits per word (see #defines below).
wordsPerBitmapWord = sizeof(void*)*8/4,
bitShift = sizeof(void*)*8/4,
+ WorkbufSize = 16*1024,
+ RootBlockSize = 4*1024,
+ FinBlockSize = 4*1024,
+
handoffThreshold = 4,
IntermediateBufferCapacity = 64,
@@ -66,8 +113,20 @@ enum {
BitsPointer = 1,
BitsIface = 2,
BitsEface = 3,
+
+ RootData = 0,
+ RootBss = 1,
+ RootFinalizers = 2,
+ RootSpanTypes = 3,
+ RootFlushCaches = 4,
+ RootCount = 5,
};
+#define GcpercentUnknown (-2)
+
+// Initialized from $GOGC. GOGC=off means no gc.
+static int32 gcpercent = GcpercentUnknown;
+
static struct
{
Lock;
@@ -89,16 +148,34 @@ sync_runtime_registerPool(void **p)
static void
clearpools(void)
{
- void **p, **next;
-
- for(p = pools.head; p != nil; p = next) {
- next = p[0];
- p[0] = nil; // next
- p[1] = nil; // slice
- p[2] = nil;
- p[3] = nil;
+ void **pool, **next;
+ P *p, **pp;
+ MCache *c;
+ uintptr off;
+
+ // clear sync.Pool's
+ for(pool = pools.head; pool != nil; pool = next) {
+ next = pool[0];
+ pool[0] = nil; // next
+ pool[1] = nil; // local
+ pool[2] = nil; // localSize
+ off = (uintptr)pool[3] / sizeof(void*);
+ pool[off+0] = nil; // global slice
+ pool[off+1] = nil;
+ pool[off+2] = nil;
}
pools.head = nil;
+
+ for(pp=runtime_allp; (p=*pp) != nil; pp++) {
+ // clear tinyalloc pool
+ c = p->mcache;
+ if(c != nil) {
+ c->tiny = nil;
+ c->tinysize = 0;
+ }
+ // clear defer pools
+ p->deferpool = nil;
+ }
}
// Bits in per-word bitmap.
@@ -149,11 +226,10 @@ clearpools(void)
//
uint32 runtime_worldsema = 1;
-// The size of Workbuf is N*PageSize.
typedef struct Workbuf Workbuf;
struct Workbuf
{
-#define SIZE (2*PageSize-sizeof(LFNode)-sizeof(uintptr))
+#define SIZE (WorkbufSize-sizeof(LFNode)-sizeof(uintptr))
LFNode node; // must be first
uintptr nobj;
Obj obj[SIZE/sizeof(Obj) - 1];
@@ -180,45 +256,42 @@ struct FinBlock
Finalizer fin[1];
};
-static G *fing;
-static FinBlock *finq; // list of finalizers that are to be executed
-static FinBlock *finc; // cache of free blocks
-static FinBlock *allfin; // list of all blocks
-static Lock finlock;
-static int32 fingwait;
+static G *fing;
+static FinBlock *finq; // list of finalizers that are to be executed
+static FinBlock *finc; // cache of free blocks
+static FinBlock *allfin; // list of all blocks
+static int32 fingwait;
+static Lock gclock;
-static void runfinq(void*);
+static void runfinq(void*);
+static void bgsweep(void*);
static Workbuf* getempty(Workbuf*);
static Workbuf* getfull(Workbuf*);
static void putempty(Workbuf*);
static Workbuf* handoff(Workbuf*);
static void gchelperstart(void);
+static void flushallmcaches(void);
+static void addstackroots(G *gp, Workbuf **wbufp);
static struct {
uint64 full; // lock-free list of full blocks
uint64 empty; // lock-free list of empty blocks
byte pad0[CacheLineSize]; // prevents false-sharing between full/empty and nproc/nwait
uint32 nproc;
+ int64 tstart;
volatile uint32 nwait;
volatile uint32 ndone;
- volatile uint32 debugmarkdone;
Note alldone;
ParFor *markfor;
- ParFor *sweepfor;
Lock;
byte *chunk;
uintptr nchunk;
-
- Obj *roots;
- uint32 nroot;
- uint32 rootcap;
} work __attribute__((aligned(8)));
enum {
GC_DEFAULT_PTR = GC_NUM_INSTR,
GC_CHAN,
- GC_G_PTR,
GC_NUM_INSTR2
};
@@ -250,6 +323,8 @@ static struct {
uint64 foundword;
uint64 foundspan;
} markonly;
+ uint32 nbgsweep;
+ uint32 npausesweep;
} gcstats;
// markonly marks an object. It returns true if the object
@@ -638,11 +713,6 @@ static uintptr defaultProg[2] = {PtrSize, GC_DEFAULT_PTR};
static uintptr chanProg[2] = {0, GC_CHAN};
#endif
-#if 0
-// G* program
-static uintptr gptrProg[2] = {0, GC_G_PTR};
-#endif
-
// Local variables of a program fragment or loop
typedef struct Frame Frame;
struct Frame {
@@ -713,15 +783,11 @@ checkptr(void *obj, uintptr objti)
// a work list in the Workbuf* structures and loops in the main function
// body. Keeping an explicit work list is easier on the stack allocator and
// more efficient.
-//
-// wbuf: current work buffer
-// wp: storage for next queued pointer (write pointer)
-// nobj: number of queued objects
static void
-scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
+scanblock(Workbuf *wbuf, bool keepworking)
{
byte *b, *arena_start, *arena_used;
- uintptr n, i, end_b, elemsize, size, ti, objti, count /* , type */;
+ uintptr n, i, end_b, elemsize, size, ti, objti, count, /* type, */ nobj;
uintptr *pc, precise_type, nominal_size;
#if 0
uintptr *chan_ret, chancap;
@@ -738,8 +804,9 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
Hchan *chan;
ChanType *chantype;
#endif
+ Obj *wp;
- if(sizeof(Workbuf) % PageSize != 0)
+ if(sizeof(Workbuf) % WorkbufSize != 0)
runtime_throw("scanblock: size of Workbuf is suboptimal");
// Memory arena parameters.
@@ -751,6 +818,14 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
precise_type = false;
nominal_size = 0;
+ if(wbuf) {
+ nobj = wbuf->nobj;
+ wp = &wbuf->obj[nobj];
+ } else {
+ nobj = 0;
+ wp = nil;
+ }
+
// Initialize sbuf
scanbuffers = &bufferList[runtime_m()->helpgc];
@@ -904,11 +979,11 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
case GC_EFACE:
eface = (Eface*)(stack_top.b + pc[1]);
pc += 2;
- if(eface->type == nil)
+ if(eface->__type_descriptor == nil)
continue;
// eface->type
- t = eface->type;
+ t = eface->__type_descriptor;
if((const byte*)t >= arena_start && (const byte*)t < arena_used) {
union { const Type *tc; Type *tr; } u;
u.tc = t;
@@ -920,11 +995,11 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
// eface->__object
if((byte*)eface->__object >= arena_start && (byte*)eface->__object < arena_used) {
if(t->__size <= sizeof(void*)) {
- if((t->kind & KindNoPointers))
+ if((t->__code & KindNoPointers))
continue;
obj = eface->__object;
- if((t->kind & ~KindNoPointers) == KindPtr)
+ if((t->__code & ~KindNoPointers) == KindPtr)
// objti = (uintptr)((PtrType*)t)->elem->gc;
objti = 0;
} else {
@@ -953,11 +1028,11 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
// t = iface->tab->type;
t = nil;
if(t->__size <= sizeof(void*)) {
- if((t->kind & KindNoPointers))
+ if((t->__code & KindNoPointers))
continue;
obj = iface->__object;
- if((t->kind & ~KindNoPointers) == KindPtr)
+ if((t->__code & ~KindNoPointers) == KindPtr)
// objti = (uintptr)((const PtrType*)t)->elem->gc;
objti = 0;
} else {
@@ -1064,7 +1139,7 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
}
if(markonly(chan)) {
chantype = (ChanType*)pc[2];
- if(!(chantype->elem->kind & KindNoPointers)) {
+ if(!(chantype->elem->__code & KindNoPointers)) {
// Start chanProg.
chan_ret = pc+3;
pc = chanProg+1;
@@ -1077,7 +1152,7 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
case GC_CHAN:
// There are no heap pointers in struct Hchan,
// so we can ignore the leading sizeof(Hchan) bytes.
- if(!(chantype->elem->kind & KindNoPointers)) {
+ if(!(chantype->elem->__code & KindNoPointers)) {
// Channel's buffer follows Hchan immediately in memory.
// Size of buffer (cap(c)) is second int in the chan struct.
chancap = ((uintgo*)chan)[1];
@@ -1098,13 +1173,6 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
continue;
#endif
-#if 0
- case GC_G_PTR:
- obj = (void*)stack_top.b;
- scanstack(obj, &sbuf);
- goto next_block;
-#endif
-
default:
runtime_throw("scanblock: invalid GC instruction");
return;
@@ -1149,80 +1217,15 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
}
}
-// debug_scanblock is the debug copy of scanblock.
-// it is simpler, slower, single-threaded, recursive,
-// and uses bitSpecial as the mark bit.
-static void
-debug_scanblock(byte *b, uintptr n)
-{
- byte *obj, *p;
- void **vp;
- uintptr size, *bitp, bits, shift, i, xbits, off;
- MSpan *s;
-
- if(!DebugMark)
- runtime_throw("debug_scanblock without DebugMark");
-
- if((intptr)n < 0) {
- runtime_printf("debug_scanblock %p %D\n", b, (int64)n);
- runtime_throw("debug_scanblock");
- }
-
- // Align b to a word boundary.
- off = (uintptr)b & (PtrSize-1);
- if(off != 0) {
- b += PtrSize - off;
- n -= PtrSize - off;
- }
-
- vp = (void**)b;
- n /= PtrSize;
- for(i=0; i<(uintptr)n; i++) {
- obj = (byte*)vp[i];
-
- // Words outside the arena cannot be pointers.
- if((byte*)obj < runtime_mheap.arena_start || (byte*)obj >= runtime_mheap.arena_used)
- continue;
-
- // Round down to word boundary.
- obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
-
- // Consult span table to find beginning.
- s = runtime_MHeap_LookupMaybe(&runtime_mheap, obj);
- if(s == nil)
- continue;
-
- p = (byte*)((uintptr)s->start<<PageShift);
- size = s->elemsize;
- if(s->sizeclass == 0) {
- obj = p;
- } else {
- int32 i = ((byte*)obj - p)/size;
- obj = p+i*size;
- }
-
- // Now that we know the object header, reload bits.
- off = (uintptr*)obj - (uintptr*)runtime_mheap.arena_start;
- bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
- shift = off % wordsPerBitmapWord;
- xbits = *bitp;
- bits = xbits >> shift;
-
- // Now we have bits, bitp, and shift correct for
- // obj pointing at the base of the object.
- // If not allocated or already marked, done.
- if((bits & bitAllocated) == 0 || (bits & bitSpecial) != 0) // NOTE: bitSpecial not bitMarked
- continue;
- *bitp |= bitSpecial<<shift;
- if(!(bits & bitMarked))
- runtime_printf("found unmarked block %p in %p\n", obj, vp+i);
-
- // If object has no pointers, don't need to scan further.
- if((bits & bitScan) == 0)
- continue;
+static struct root_list* roots;
- debug_scanblock(obj, size);
- }
+void
+__go_register_gc_roots (struct root_list* r)
+{
+ // FIXME: This needs locking if multiple goroutines can call
+ // dlopen simultaneously.
+ r->next = roots;
+ roots = r;
}
// Append obj to the work buffer.
@@ -1281,18 +1284,125 @@ enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj)
}
static void
+enqueue1(Workbuf **wbufp, Obj obj)
+{
+ Workbuf *wbuf;
+
+ wbuf = *wbufp;
+ if(wbuf->nobj >= nelem(wbuf->obj))
+ *wbufp = wbuf = getempty(wbuf);
+ wbuf->obj[wbuf->nobj++] = obj;
+}
+
+static void
markroot(ParFor *desc, uint32 i)
{
- Obj *wp;
Workbuf *wbuf;
- uintptr nobj;
+ FinBlock *fb;
+ MHeap *h;
+ MSpan **allspans, *s;
+ uint32 spanidx, sg;
+ G *gp;
+ void *p;
USED(&desc);
- wp = nil;
- wbuf = nil;
- nobj = 0;
- enqueue(work.roots[i], &wbuf, &wp, &nobj);
- scanblock(wbuf, wp, nobj, false);
+ wbuf = getempty(nil);
+ switch(i) {
+ case RootData:
+ // For gccgo this is both data and bss.
+ {
+ struct root_list *pl;
+
+ for(pl = roots; pl != nil; pl = pl->next) {
+ struct root *pr = &pl->roots[0];
+ while(1) {
+ void *decl = pr->decl;
+ if(decl == nil)
+ break;
+ enqueue1(&wbuf, (Obj){decl, pr->size, 0});
+ pr++;
+ }
+ }
+ }
+ break;
+
+ case RootBss:
+ // For gccgo we use this for all the other global roots.
+ enqueue1(&wbuf, (Obj){(byte*)&runtime_m0, sizeof runtime_m0, 0});
+ enqueue1(&wbuf, (Obj){(byte*)&runtime_g0, sizeof runtime_g0, 0});
+ enqueue1(&wbuf, (Obj){(byte*)&runtime_allg, sizeof runtime_allg, 0});
+ enqueue1(&wbuf, (Obj){(byte*)&runtime_allm, sizeof runtime_allm, 0});
+ enqueue1(&wbuf, (Obj){(byte*)&runtime_allp, sizeof runtime_allp, 0});
+ enqueue1(&wbuf, (Obj){(byte*)&work, sizeof work, 0});
+ runtime_proc_scan(&wbuf, enqueue1);
+ runtime_MProf_Mark(&wbuf, enqueue1);
+ runtime_time_scan(&wbuf, enqueue1);
+ runtime_netpoll_scan(&wbuf, enqueue1);
+ break;
+
+ case RootFinalizers:
+ for(fb=allfin; fb; fb=fb->alllink)
+ enqueue1(&wbuf, (Obj){(byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]), 0});
+ break;
+
+ case RootSpanTypes:
+ // mark span types and MSpan.specials (to walk spans only once)
+ h = &runtime_mheap;
+ sg = h->sweepgen;
+ allspans = h->allspans;
+ for(spanidx=0; spanidx<runtime_mheap.nspan; spanidx++) {
+ Special *sp;
+ SpecialFinalizer *spf;
+
+ s = allspans[spanidx];
+ if(s->sweepgen != sg) {
+ runtime_printf("sweep %d %d\n", s->sweepgen, sg);
+ runtime_throw("gc: unswept span");
+ }
+ if(s->state != MSpanInUse)
+ continue;
+ // The garbage collector ignores type pointers stored in MSpan.types:
+ // - Compiler-generated types are stored outside of heap.
+ // - The reflect package has runtime-generated types cached in its data structures.
+ // The garbage collector relies on finding the references via that cache.
+ if(s->types.compression == MTypes_Words || s->types.compression == MTypes_Bytes)
+ markonly((byte*)s->types.data);
+ for(sp = s->specials; sp != nil; sp = sp->next) {
+ if(sp->kind != KindSpecialFinalizer)
+ continue;
+ // don't mark finalized object, but scan it so we
+ // retain everything it points to.
+ spf = (SpecialFinalizer*)sp;
+ // A finalizer can be set for an inner byte of an object, find object beginning.
+ p = (void*)((s->start << PageShift) + spf->offset/s->elemsize*s->elemsize);
+ enqueue1(&wbuf, (Obj){p, s->elemsize, 0});
+ enqueue1(&wbuf, (Obj){(void*)&spf->fn, PtrSize, 0});
+ enqueue1(&wbuf, (Obj){(void*)&spf->ft, PtrSize, 0});
+ enqueue1(&wbuf, (Obj){(void*)&spf->ot, PtrSize, 0});
+ }
+ }
+ break;
+
+ case RootFlushCaches:
+ flushallmcaches();
+ break;
+
+ default:
+ // the rest is scanning goroutine stacks
+ if(i - RootCount >= runtime_allglen)
+ runtime_throw("markroot: bad index");
+ gp = runtime_allg[i - RootCount];
+ // remember when we've first observed the G blocked
+ // needed only to output in traceback
+ if((gp->status == Gwaiting || gp->status == Gsyscall) && gp->waitsince == 0)
+ gp->waitsince = work.tstart;
+ addstackroots(gp, &wbuf);
+ break;
+
+ }
+
+ if(wbuf)
+ scanblock(wbuf, false);
}
// Get an empty work buffer off the work.empty list,
@@ -1395,32 +1505,22 @@ handoff(Workbuf *b)
}
static void
-addroot(Obj obj)
+addstackroots(G *gp, Workbuf **wbufp)
{
- uint32 cap;
- Obj *new;
-
- if(work.nroot >= work.rootcap) {
- cap = PageSize/sizeof(Obj);
- if(cap < 2*work.rootcap)
- cap = 2*work.rootcap;
- new = (Obj*)runtime_SysAlloc(cap*sizeof(Obj), &mstats.gc_sys);
- if(new == nil)
- runtime_throw("runtime: cannot allocate memory");
- if(work.roots != nil) {
- runtime_memmove(new, work.roots, work.rootcap*sizeof(Obj));
- runtime_SysFree(work.roots, work.rootcap*sizeof(Obj), &mstats.gc_sys);
- }
- work.roots = new;
- work.rootcap = cap;
+ switch(gp->status){
+ default:
+ runtime_printf("unexpected G.status %d (goroutine %p %D)\n", gp->status, gp, gp->goid);
+ runtime_throw("mark - bad status");
+ case Gdead:
+ return;
+ case Grunning:
+ runtime_throw("mark - world not stopped");
+ case Grunnable:
+ case Gsyscall:
+ case Gwaiting:
+ break;
}
- work.roots[work.nroot] = obj;
- work.nroot++;
-}
-static void
-addstackroots(G *gp)
-{
#ifdef USING_SPLIT_STACK
M *mp;
void* sp;
@@ -1458,11 +1558,11 @@ addstackroots(G *gp)
}
}
if(sp != nil) {
- addroot((Obj){sp, spsize, 0});
+ enqueue1(wbufp, (Obj){sp, spsize, 0});
while((sp = __splitstack_find(next_segment, next_sp,
&spsize, &next_segment,
&next_sp, &initial_sp)) != nil)
- addroot((Obj){sp, spsize, 0});
+ enqueue1(wbufp, (Obj){sp, spsize, 0});
}
#else
M *mp;
@@ -1484,159 +1584,23 @@ addstackroots(G *gp)
}
top = (byte*)gp->gcinitial_sp + gp->gcstack_size;
if(top > bottom)
- addroot((Obj){bottom, top - bottom, 0});
+ enqueue1(wbufp, (Obj){bottom, top - bottom, 0});
else
- addroot((Obj){top, bottom - top, 0});
+ enqueue1(wbufp, (Obj){top, bottom - top, 0});
#endif
}
-static void
-addfinroots(void *v)
-{
- uintptr size;
- void *base;
-
- size = 0;
- if(!runtime_mlookup(v, (byte**)&base, &size, nil) || !runtime_blockspecial(base))
- runtime_throw("mark - finalizer inconsistency");
-
- // do not mark the finalizer block itself. just mark the things it points at.
- addroot((Obj){base, size, 0});
-}
-
-static struct root_list* roots;
-
void
-__go_register_gc_roots (struct root_list* r)
-{
- // FIXME: This needs locking if multiple goroutines can call
- // dlopen simultaneously.
- r->next = roots;
- roots = r;
-}
-
-static void
-addroots(void)
-{
- struct root_list *pl;
- G *gp;
- FinBlock *fb;
- MSpan *s, **allspans;
- uint32 spanidx;
-
- work.nroot = 0;
-
- // mark data+bss.
- for(pl = roots; pl != nil; pl = pl->next) {
- struct root* pr = &pl->roots[0];
- while(1) {
- void *decl = pr->decl;
- if(decl == nil)
- break;
- addroot((Obj){decl, pr->size, 0});
- pr++;
- }
- }
-
- addroot((Obj){(byte*)&runtime_m0, sizeof runtime_m0, 0});
- addroot((Obj){(byte*)&runtime_g0, sizeof runtime_g0, 0});
- addroot((Obj){(byte*)&runtime_allg, sizeof runtime_allg, 0});
- addroot((Obj){(byte*)&runtime_allm, sizeof runtime_allm, 0});
- addroot((Obj){(byte*)&runtime_allp, sizeof runtime_allp, 0});
- runtime_proc_scan(addroot);
- runtime_MProf_Mark(addroot);
- runtime_time_scan(addroot);
- runtime_netpoll_scan(addroot);
-
- // MSpan.types
- allspans = runtime_mheap.allspans;
- for(spanidx=0; spanidx<runtime_mheap.nspan; spanidx++) {
- s = allspans[spanidx];
- if(s->state == MSpanInUse) {
- // The garbage collector ignores type pointers stored in MSpan.types:
- // - Compiler-generated types are stored outside of heap.
- // - The reflect package has runtime-generated types cached in its data structures.
- // The garbage collector relies on finding the references via that cache.
- switch(s->types.compression) {
- case MTypes_Empty:
- case MTypes_Single:
- break;
- case MTypes_Words:
- case MTypes_Bytes:
- markonly((byte*)s->types.data);
- break;
- }
- }
- }
-
- // stacks
- for(gp=runtime_allg; gp!=nil; gp=gp->alllink) {
- switch(gp->status){
- default:
- runtime_printf("unexpected G.status %d\n", gp->status);
- runtime_throw("mark - bad status");
- case Gdead:
- break;
- case Grunning:
- runtime_throw("mark - world not stopped");
- case Grunnable:
- case Gsyscall:
- case Gwaiting:
- addstackroots(gp);
- break;
- }
- }
-
- runtime_walkfintab(addfinroots, addroot);
-
- for(fb=allfin; fb; fb=fb->alllink)
- addroot((Obj){(byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]), 0});
-
- addroot((Obj){(byte*)&work, sizeof work, 0});
-}
-
-static void
-addfreelists(void)
-{
- int32 i;
- P *p, **pp;
- MCache *c;
- MLink *m;
-
- // Mark objects in the MCache of each P so we don't collect them.
- for(pp=runtime_allp; (p=*pp); pp++) {
- c = p->mcache;
- if(c==nil)
- continue;
- for(i = 0; i < NumSizeClasses; i++) {
- for(m = c->list[i].list; m != nil; m = m->next) {
- markonly(m);
- }
- }
- }
- // Note: the sweeper will mark objects in each span's freelist.
-}
-
-static bool
-handlespecial(byte *p, uintptr size)
+runtime_queuefinalizer(void *p, FuncVal *fn, const FuncType *ft, const PtrType *ot)
{
- FuncVal *fn;
- const struct __go_func_type *ft;
- const struct __go_ptr_type *ot;
FinBlock *block;
Finalizer *f;
-
- if(!runtime_getfinalizer(p, true, &fn, &ft, &ot)) {
- runtime_setblockspecial(p, false);
- runtime_MProf_Free(p, size);
- return false;
- }
- runtime_lock(&finlock);
+ runtime_lock(&gclock);
if(finq == nil || finq->cnt == finq->cap) {
if(finc == nil) {
- finc = runtime_persistentalloc(PageSize, 0, &mstats.gc_sys);
- finc->cap = (PageSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1;
+ finc = runtime_persistentalloc(FinBlockSize, 0, &mstats.gc_sys);
+ finc->cap = (FinBlockSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1;
finc->alllink = allfin;
allfin = finc;
}
@@ -1651,37 +1615,64 @@ handlespecial(byte *p, uintptr size)
f->ft = ft;
f->ot = ot;
f->arg = p;
- runtime_unlock(&finlock);
- return true;
+ runtime_unlock(&gclock);
+}
+
+void
+runtime_MSpan_EnsureSwept(MSpan *s)
+{
+ M *m = runtime_m();
+ uint32 sg;
+
+ sg = runtime_mheap.sweepgen;
+ if(runtime_atomicload(&s->sweepgen) == sg)
+ return;
+ m->locks++;
+ if(runtime_cas(&s->sweepgen, sg-2, sg-1)) {
+ runtime_MSpan_Sweep(s);
+ m->locks--;
+ return;
+ }
+ m->locks--;
+ // unfortunate condition, and we don't have efficient means to wait
+ while(runtime_atomicload(&s->sweepgen) != sg)
+ runtime_osyield();
}
// Sweep frees or collects finalizers for blocks not marked in the mark phase.
// It clears the mark bits in preparation for the next GC round.
-static void
-sweepspan(ParFor *desc, uint32 idx)
+// Returns true if the span was returned to heap.
+bool
+runtime_MSpan_Sweep(MSpan *s)
{
M *m;
- int32 cl, n, npages;
- uintptr size, off, *bitp, shift;
+ int32 cl, n, npages, nfree;
+ uintptr size, off, *bitp, shift, bits;
+ uint32 sweepgen;
byte *p;
MCache *c;
byte *arena_start;
MLink head, *end;
- int32 nfree;
byte *type_data;
byte compression;
uintptr type_data_inc;
- MSpan *s;
MLink *x;
+ Special *special, **specialp, *y;
+ bool res, sweepgenset;
m = runtime_m();
- USED(&desc);
- s = runtime_mheap.allspans[idx];
- if(s->state != MSpanInUse)
- return;
+ // It's critical that we enter this function with preemption disabled,
+ // GC must not start while we are in the middle of this function.
+ if(m->locks == 0 && m->mallocing == 0 && runtime_g() != m->g0)
+ runtime_throw("MSpan_Sweep: m is not locked");
+ sweepgen = runtime_mheap.sweepgen;
+ if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
+ runtime_printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n",
+ s->state, s->sweepgen, sweepgen);
+ runtime_throw("MSpan_Sweep: bad span state");
+ }
arena_start = runtime_mheap.arena_start;
- p = (byte*)(s->start << PageShift);
cl = s->sizeclass;
size = s->elemsize;
if(cl == 0) {
@@ -1691,9 +1682,11 @@ sweepspan(ParFor *desc, uint32 idx)
npages = runtime_class_to_allocnpages[cl];
n = (npages << PageShift) / size;
}
+ res = false;
nfree = 0;
end = &head;
c = m->mcache;
+ sweepgenset = false;
// mark any free objects in this span so we don't collect them
for(x = s->freelist; x != nil; x = x->next) {
@@ -1706,6 +1699,35 @@ sweepspan(ParFor *desc, uint32 idx)
*bitp |= bitMarked<<shift;
}
+ // Unlink & free special records for any objects we're about to free.
+ specialp = &s->specials;
+ special = *specialp;
+ while(special != nil) {
+ // A finalizer can be set for an inner byte of an object, find object beginning.
+ p = (byte*)(s->start << PageShift) + special->offset/size*size;
+ off = (uintptr*)p - (uintptr*)arena_start;
+ bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
+ shift = off % wordsPerBitmapWord;
+ bits = *bitp>>shift;
+ if((bits & (bitAllocated|bitMarked)) == bitAllocated) {
+ // Find the exact byte for which the special was setup
+ // (as opposed to object beginning).
+ p = (byte*)(s->start << PageShift) + special->offset;
+ // about to free object: splice out special record
+ y = special;
+ special = special->next;
+ *specialp = special;
+ if(!runtime_freespecial(y, p, size, false)) {
+ // stop freeing of object if it has a finalizer
+ *bitp |= bitMarked << shift;
+ }
+ } else {
+ // object is still live: keep special record
+ specialp = &special->next;
+ special = *specialp;
+ }
+ }
+
type_data = (byte*)s->types.data;
type_data_inc = sizeof(uintptr);
compression = s->types.compression;
@@ -1719,9 +1741,8 @@ sweepspan(ParFor *desc, uint32 idx)
// Sweep through n objects of given size starting at p.
// This thread owns the span now, so it can manipulate
// the block bitmap without atomic operations.
+ p = (byte*)(s->start << PageShift);
for(; n > 0; n--, p += size, type_data+=type_data_inc) {
- uintptr off, *bitp, shift, bits;
-
off = (uintptr*)p - (uintptr*)arena_start;
bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
@@ -1731,36 +1752,28 @@ sweepspan(ParFor *desc, uint32 idx)
continue;
if((bits & bitMarked) != 0) {
- if(DebugMark) {
- if(!(bits & bitSpecial))
- runtime_printf("found spurious mark on %p\n", p);
- *bitp &= ~(bitSpecial<<shift);
- }
*bitp &= ~(bitMarked<<shift);
continue;
}
- // Special means it has a finalizer or is being profiled.
- // In DebugMark mode, the bit has been coopted so
- // we have to assume all blocks are special.
- if(DebugMark || (bits & bitSpecial) != 0) {
- if(handlespecial(p, size))
- continue;
- }
-
// Clear mark, scan, and special bits.
*bitp &= ~((bitScan|bitMarked|bitSpecial)<<shift);
if(cl == 0) {
// Free large span.
runtime_unmarkspan(p, 1<<PageShift);
- *(uintptr*)p = (uintptr)0xdeaddeaddeaddeadll; // needs zeroing
+ s->needzero = 1;
+ // important to set sweepgen before returning it to heap
+ runtime_atomicstore(&s->sweepgen, sweepgen);
+ sweepgenset = true;
if(runtime_debug.efence)
runtime_SysFree(p, size, &mstats.gc_sys);
else
runtime_MHeap_Free(&runtime_mheap, s, 1);
c->local_nlargefree++;
c->local_largefree += size;
+ runtime_xadd64(&mstats.next_gc, -(uint64)(size * (gcpercent + 100)/100));
+ res = true;
} else {
// Free small object.
switch(compression) {
@@ -1771,19 +1784,106 @@ sweepspan(ParFor *desc, uint32 idx)
*(byte*)type_data = 0;
break;
}
- if(size > sizeof(uintptr))
+ if(size > 2*sizeof(uintptr))
((uintptr*)p)[1] = (uintptr)0xdeaddeaddeaddeadll; // mark as "needs to be zeroed"
-
+ else if(size > sizeof(uintptr))
+ ((uintptr*)p)[1] = 0;
+
end->next = (MLink*)p;
end = (MLink*)p;
nfree++;
}
}
+ if(!sweepgenset) {
+ // The span must be in our exclusive ownership until we update sweepgen,
+ // check for potential races.
+ if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
+ runtime_printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n",
+ s->state, s->sweepgen, sweepgen);
+ runtime_throw("MSpan_Sweep: bad span state after sweep");
+ }
+ runtime_atomicstore(&s->sweepgen, sweepgen);
+ }
if(nfree) {
c->local_nsmallfree[cl] += nfree;
c->local_cachealloc -= nfree * size;
- runtime_MCentral_FreeSpan(&runtime_mheap.central[cl], s, nfree, head.next, end);
+ runtime_xadd64(&mstats.next_gc, -(uint64)(nfree * size * (gcpercent + 100)/100));
+ res = runtime_MCentral_FreeSpan(&runtime_mheap.central[cl], s, nfree, head.next, end);
+ }
+ return res;
+}
+
+// State of background sweep.
+// Pretected by gclock.
+static struct
+{
+ G* g;
+ bool parked;
+
+ MSpan** spans;
+ uint32 nspan;
+ uint32 spanidx;
+} sweep;
+
+// background sweeping goroutine
+static void
+bgsweep(void* dummy __attribute__ ((unused)))
+{
+ runtime_g()->issystem = 1;
+ for(;;) {
+ while(runtime_sweepone() != (uintptr)-1) {
+ gcstats.nbgsweep++;
+ runtime_gosched();
+ }
+ runtime_lock(&gclock);
+ if(finq != nil) {
+ // kick off or wake up goroutine to run queued finalizers
+ if(fing == nil)
+ fing = __go_go(runfinq, nil);
+ else if(fingwait) {
+ fingwait = 0;
+ runtime_ready(fing);
+ }
+ }
+ sweep.parked = true;
+ runtime_parkunlock(&gclock, "GC sweep wait");
+ }
+}
+
+// sweeps one span
+// returns number of pages returned to heap, or -1 if there is nothing to sweep
+uintptr
+runtime_sweepone(void)
+{
+ M *m = runtime_m();
+ MSpan *s;
+ uint32 idx, sg;
+ uintptr npages;
+
+ // increment locks to ensure that the goroutine is not preempted
+ // in the middle of sweep thus leaving the span in an inconsistent state for next GC
+ m->locks++;
+ sg = runtime_mheap.sweepgen;
+ for(;;) {
+ idx = runtime_xadd(&sweep.spanidx, 1) - 1;
+ if(idx >= sweep.nspan) {
+ runtime_mheap.sweepdone = true;
+ m->locks--;
+ return (uintptr)-1;
+ }
+ s = sweep.spans[idx];
+ if(s->state != MSpanInUse) {
+ s->sweepgen = sg;
+ continue;
+ }
+ if(s->sweepgen != sg-2 || !runtime_cas(&s->sweepgen, sg-2, sg-1))
+ continue;
+ npages = s->npages;
+ if(!runtime_MSpan_Sweep(s))
+ npages = 0;
+ m->locks--;
+ return npages;
}
}
@@ -1874,34 +1974,14 @@ runtime_gchelper(void)
runtime_parfordo(work.markfor);
// help other threads scan secondary blocks
- scanblock(nil, nil, 0, true);
+ scanblock(nil, true);
- if(DebugMark) {
- // wait while the main thread executes mark(debug_scanblock)
- while(runtime_atomicload(&work.debugmarkdone) == 0)
- runtime_usleep(10);
- }
-
- runtime_parfordo(work.sweepfor);
bufferList[runtime_m()->helpgc].busy = 0;
nproc = work.nproc; // work.nproc can change right after we increment work.ndone
if(runtime_xadd(&work.ndone, +1) == nproc-1)
runtime_notewakeup(&work.alldone);
}
-#define GcpercentUnknown (-2)
-
-// Initialized from $GOGC. GOGC=off means no gc.
-//
-// Next gc is after we've allocated an extra amount of
-// memory proportional to the amount already in use.
-// If gcpercent=100 and we're using 4M, we'll gc again
-// when we get to 8M. This keeps the gc cost in linear
-// proportion to the allocation cost. Adjusting gcpercent
-// just changes the linear constant (and also the amount of
-// extra memory used).
-static int32 gcpercent = GcpercentUnknown;
-
static void
cachestats(void)
{
@@ -1917,12 +1997,25 @@ cachestats(void)
}
static void
+flushallmcaches(void)
+{
+ P *p, **pp;
+ MCache *c;
+
+ // Flush MCache's to MCentral.
+ for(pp=runtime_allp; (p=*pp) != nil; pp++) {
+ c = p->mcache;
+ if(c==nil)
+ continue;
+ runtime_MCache_ReleaseAll(c);
+ }
+}
+
+static void
updatememstats(GCStats *stats)
{
M *mp;
MSpan *s;
- MCache *c;
- P *p, **pp;
uint32 i;
uint64 stacks_inuse, smallfree;
uint64 *src, *dst;
@@ -1963,12 +2056,7 @@ updatememstats(GCStats *stats)
}
// Flush MCache's to MCentral.
- for(pp=runtime_allp; (p=*pp) != nil; pp++) {
- c = p->mcache;
- if(c==nil)
- continue;
- runtime_MCache_ReleaseAll(c);
- }
+ flushallmcaches();
// Aggregate local stats.
cachestats();
@@ -2081,6 +2169,9 @@ runtime_gc(int32 force)
a.start_time = runtime_nanotime();
m->gcing = 1;
runtime_stoptheworld();
+
+ if(runtime_debug.allocfreetrace)
+ runtime_MProf_TraceGC();
clearpools();
@@ -2108,19 +2199,24 @@ runtime_gc(int32 force)
m->locks--;
// now that gc is done, kick off finalizer thread if needed
- if(finq != nil) {
- runtime_lock(&finlock);
- // kick off or wake up goroutine to run queued finalizers
- if(fing == nil)
- fing = __go_go(runfinq, nil);
- else if(fingwait) {
- fingwait = 0;
- runtime_ready(fing);
+ if(!ConcurrentSweep) {
+ if(finq != nil) {
+ runtime_lock(&gclock);
+ // kick off or wake up goroutine to run queued finalizers
+ if(fing == nil)
+ fing = __go_go(runfinq, nil);
+ else if(fingwait) {
+ fingwait = 0;
+ runtime_ready(fing);
+ }
+ runtime_unlock(&gclock);
}
- runtime_unlock(&finlock);
+ // give the queued finalizers, if any, a chance to run
+ runtime_gosched();
+ } else {
+ // For gccgo, let other goroutines run.
+ runtime_gosched();
}
- // give the queued finalizers, if any, a chance to run
- runtime_gosched();
}
static void
@@ -2137,7 +2233,7 @@ gc(struct gc_args *args)
{
M *m;
int64 t0, t1, t2, t3, t4;
- uint64 heap0, heap1, obj0, obj1, ninstr;
+ uint64 heap0, heap1, obj, ninstr;
GCStats stats;
M *mp;
uint32 i;
@@ -2146,6 +2242,7 @@ gc(struct gc_args *args)
m = runtime_m();
t0 = args->start_time;
+ work.tstart = args->start_time;
if(CollectStats)
runtime_memclr((byte*)&gcstats, sizeof(gcstats));
@@ -2153,61 +2250,50 @@ gc(struct gc_args *args)
for(mp=runtime_allm; mp; mp=mp->alllink)
runtime_settype_flush(mp);
- heap0 = 0;
- obj0 = 0;
- if(runtime_debug.gctrace) {
- updatememstats(nil);
- heap0 = mstats.heap_alloc;
- obj0 = mstats.nmalloc - mstats.nfree;
- }
-
m->locks++; // disable gc during mallocs in parforalloc
if(work.markfor == nil)
work.markfor = runtime_parforalloc(MaxGcproc);
- if(work.sweepfor == nil)
- work.sweepfor = runtime_parforalloc(MaxGcproc);
m->locks--;
if(itabtype == nil) {
// get C pointer to the Go type "itab"
// runtime_gc_itab_ptr(&eface);
- // itabtype = ((PtrType*)eface.type)->elem;
+ // itabtype = ((PtrType*)eface.__type_descriptor)->elem;
}
+ t1 = runtime_nanotime();
+
+ // Sweep what is not sweeped by bgsweep.
+ while(runtime_sweepone() != (uintptr)-1)
+ gcstats.npausesweep++;
+
work.nwait = 0;
work.ndone = 0;
- work.debugmarkdone = 0;
work.nproc = runtime_gcprocs();
- addroots();
- addfreelists();
- runtime_parforsetup(work.markfor, work.nproc, work.nroot, nil, false, markroot);
- runtime_parforsetup(work.sweepfor, work.nproc, runtime_mheap.nspan, nil, true, sweepspan);
+ runtime_parforsetup(work.markfor, work.nproc, RootCount + runtime_allglen, nil, false, markroot);
if(work.nproc > 1) {
runtime_noteclear(&work.alldone);
runtime_helpgc(work.nproc);
}
- t1 = runtime_nanotime();
+ t2 = runtime_nanotime();
gchelperstart();
runtime_parfordo(work.markfor);
- scanblock(nil, nil, 0, true);
-
- if(DebugMark) {
- for(i=0; i<work.nroot; i++)
- debug_scanblock(work.roots[i].p, work.roots[i].n);
- runtime_atomicstore(&work.debugmarkdone, 1);
- }
- t2 = runtime_nanotime();
+ scanblock(nil, true);
- runtime_parfordo(work.sweepfor);
- bufferList[m->helpgc].busy = 0;
t3 = runtime_nanotime();
+ bufferList[m->helpgc].busy = 0;
if(work.nproc > 1)
runtime_notesleep(&work.alldone);
cachestats();
+ // next_gc calculation is tricky with concurrent sweep since we don't know size of live heap
+ // estimate what was live heap size after previous GC (for tracing only)
+ heap0 = mstats.next_gc*100/(gcpercent+100);
+ // conservatively set next_gc to high value assuming that everything is live
+ // concurrent/lazy sweep will reduce this number while discovering new garbage
mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100;
t4 = runtime_nanotime();
@@ -2221,20 +2307,23 @@ gc(struct gc_args *args)
if(runtime_debug.gctrace) {
updatememstats(&stats);
heap1 = mstats.heap_alloc;
- obj1 = mstats.nmalloc - mstats.nfree;
+ obj = mstats.nmalloc - mstats.nfree;
- stats.nprocyield += work.sweepfor->nprocyield;
- stats.nosyield += work.sweepfor->nosyield;
- stats.nsleep += work.sweepfor->nsleep;
+ stats.nprocyield += work.markfor->nprocyield;
+ stats.nosyield += work.markfor->nosyield;
+ stats.nsleep += work.markfor->nsleep;
- runtime_printf("gc%d(%d): %D+%D+%D ms, %D -> %D MB %D -> %D (%D-%D) objects,"
+ runtime_printf("gc%d(%d): %D+%D+%D ms, %D -> %D MB, %D (%D-%D) objects,"
+ " %d/%d/%d sweeps,"
" %D(%D) handoff, %D(%D) steal, %D/%D/%D yields\n",
- mstats.numgc, work.nproc, (t2-t1)/1000000, (t3-t2)/1000000, (t1-t0+t4-t3)/1000000,
- heap0>>20, heap1>>20, obj0, obj1,
+ mstats.numgc, work.nproc, (t3-t2)/1000000, (t2-t1)/1000000, (t1-t0+t4-t3)/1000000,
+ heap0>>20, heap1>>20, obj,
mstats.nmalloc, mstats.nfree,
+ sweep.nspan, gcstats.nbgsweep, gcstats.npausesweep,
stats.nhandoff, stats.nhandoffcnt,
- work.sweepfor->nsteal, work.sweepfor->nstealcnt,
+ work.markfor->nsteal, work.markfor->nstealcnt,
stats.nprocyield, stats.nosyield, stats.nsleep);
+ gcstats.nbgsweep = gcstats.npausesweep = 0;
if(CollectStats) {
runtime_printf("scan: %D bytes, %D objects, %D untyped, %D types from MSpan\n",
gcstats.nbytes, gcstats.obj.cnt, gcstats.obj.notype, gcstats.obj.typelookup);
@@ -2261,9 +2350,44 @@ gc(struct gc_args *args)
}
}
+ // We cache current runtime_mheap.allspans array in sweep.spans,
+ // because the former can be resized and freed.
+ // Otherwise we would need to take heap lock every time
+ // we want to convert span index to span pointer.
+
+ // Free the old cached array if necessary.
+ if(sweep.spans && sweep.spans != runtime_mheap.allspans)
+ runtime_SysFree(sweep.spans, sweep.nspan*sizeof(sweep.spans[0]), &mstats.other_sys);
+ // Cache the current array.
+ runtime_mheap.sweepspans = runtime_mheap.allspans;
+ runtime_mheap.sweepgen += 2;
+ runtime_mheap.sweepdone = false;
+ sweep.spans = runtime_mheap.allspans;
+ sweep.nspan = runtime_mheap.nspan;
+ sweep.spanidx = 0;
+
+ // Temporary disable concurrent sweep, because we see failures on builders.
+ if(ConcurrentSweep) {
+ runtime_lock(&gclock);
+ if(sweep.g == nil)
+ sweep.g = __go_go(bgsweep, nil);
+ else if(sweep.parked) {
+ sweep.parked = false;
+ runtime_ready(sweep.g);
+ }
+ runtime_unlock(&gclock);
+ } else {
+ // Sweep all spans eagerly.
+ while(runtime_sweepone() != (uintptr)-1)
+ gcstats.npausesweep++;
+ }
+
runtime_MProf_GC();
}
+extern uintptr runtime_sizeof_C_MStats
+ __asm__ (GOSYM_PREFIX "runtime.Sizeof_C_MStats");
+
void runtime_ReadMemStats(MStats *)
__asm__ (GOSYM_PREFIX "runtime.ReadMemStats");
@@ -2281,7 +2405,9 @@ runtime_ReadMemStats(MStats *stats)
m->gcing = 1;
runtime_stoptheworld();
updatememstats(nil);
- *stats = mstats;
+ // Size of the trailing by_size array differs between Go and C,
+ // NumSizeClasses was changed, but we can not change Go struct because of backward compatibility.
+ runtime_memmove(stats, &mstats, runtime_sizeof_C_MStats);
m->gcing = 0;
m->locks++;
runtime_semrelease(&runtime_worldsema);
@@ -2366,15 +2492,15 @@ runfinq(void* dummy __attribute__ ((unused)))
Iface iface;
for(;;) {
- runtime_lock(&finlock);
+ runtime_lock(&gclock);
fb = finq;
finq = nil;
if(fb == nil) {
fingwait = 1;
- runtime_park(runtime_unlock, &finlock, "finalizer wait");
+ runtime_parkunlock(&gclock, "finalizer wait");
continue;
}
- runtime_unlock(&finlock);
+ runtime_unlock(&gclock);
if(raceenabled)
runtime_racefingo();
for(; fb; fb=next) {
@@ -2385,12 +2511,12 @@ runfinq(void* dummy __attribute__ ((unused)))
f = &fb->fin[i];
fint = ((const Type**)f->ft->__in.array)[0];
- if(fint->kind == KindPtr) {
+ if(fint->__code == KindPtr) {
// direct use of pointer
param = &f->arg;
} else if(((const InterfaceType*)fint)->__methods.__count == 0) {
// convert to empty interface
- ef.type = (const Type*)f->ot;
+ ef.__type_descriptor = (const Type*)f->ot;
ef.__object = f->arg;
param = &ef;
} else {
@@ -2585,50 +2711,6 @@ runtime_unmarkspan(void *v, uintptr n)
*b-- = 0;
}
-bool
-runtime_blockspecial(void *v)
-{
- uintptr *b, off, shift;
-
- if(DebugMark)
- return true;
-
- off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;
- b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
- shift = off % wordsPerBitmapWord;
-
- return (*b & (bitSpecial<<shift)) != 0;
-}
-
-void
-runtime_setblockspecial(void *v, bool s)
-{
- uintptr *b, off, shift, bits, obits;
-
- if(DebugMark)
- return;
-
- off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;
- b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
- shift = off % wordsPerBitmapWord;
-
- for(;;) {
- obits = *b;
- if(s)
- bits = obits | (bitSpecial<<shift);
- else
- bits = obits & ~(bitSpecial<<shift);
- if(runtime_gomaxprocs == 1) {
- *b = bits;
- break;
- } else {
- // more than one goroutine is potentially running: use atomic op
- if(runtime_casp((void**)b, (void*)obits, (void*)bits))
- break;
- }
- }
-}
-
void
runtime_MHeap_MapBits(MHeap *h)
{
diff --git a/libgo/runtime/mheap.c b/libgo/runtime/mheap.c
index 1b8ab79..3a5eb15 100644
--- a/libgo/runtime/mheap.c
+++ b/libgo/runtime/mheap.c
@@ -41,7 +41,10 @@ RecordSpan(void *vh, byte *p)
runtime_throw("runtime: cannot allocate memory");
if(h->allspans) {
runtime_memmove(all, h->allspans, h->nspancap*sizeof(all[0]));
- runtime_SysFree(h->allspans, h->nspancap*sizeof(all[0]), &mstats.other_sys);
+ // Don't free the old array if it's referenced by sweep.
+ // See the comment in mgc0.c.
+ if(h->allspans != runtime_mheap.sweepspans)
+ runtime_SysFree(h->allspans, h->nspancap*sizeof(all[0]), &mstats.other_sys);
}
h->allspans = all;
h->nspancap = cap;
@@ -57,10 +60,15 @@ runtime_MHeap_Init(MHeap *h)
runtime_FixAlloc_Init(&h->spanalloc, sizeof(MSpan), RecordSpan, h, &mstats.mspan_sys);
runtime_FixAlloc_Init(&h->cachealloc, sizeof(MCache), nil, nil, &mstats.mcache_sys);
+ runtime_FixAlloc_Init(&h->specialfinalizeralloc, sizeof(SpecialFinalizer), nil, nil, &mstats.other_sys);
+ runtime_FixAlloc_Init(&h->specialprofilealloc, sizeof(SpecialProfile), nil, nil, &mstats.other_sys);
// h->mapcache needs no init
- for(i=0; i<nelem(h->free); i++)
+ for(i=0; i<nelem(h->free); i++) {
runtime_MSpanList_Init(&h->free[i]);
- runtime_MSpanList_Init(&h->large);
+ runtime_MSpanList_Init(&h->busy[i]);
+ }
+ runtime_MSpanList_Init(&h->freelarge);
+ runtime_MSpanList_Init(&h->busylarge);
for(i=0; i<nelem(h->central); i++)
runtime_MCentral_Init(&h->central[i], i);
}
@@ -84,10 +92,86 @@ runtime_MHeap_MapSpans(MHeap *h)
h->spans_mapped = n;
}
+// Sweeps spans in list until reclaims at least npages into heap.
+// Returns the actual number of pages reclaimed.
+static uintptr
+MHeap_ReclaimList(MHeap *h, MSpan *list, uintptr npages)
+{
+ MSpan *s;
+ uintptr n;
+ uint32 sg;
+
+ n = 0;
+ sg = runtime_mheap.sweepgen;
+retry:
+ for(s = list->next; s != list; s = s->next) {
+ if(s->sweepgen == sg-2 && runtime_cas(&s->sweepgen, sg-2, sg-1)) {
+ runtime_MSpanList_Remove(s);
+ // swept spans are at the end of the list
+ runtime_MSpanList_InsertBack(list, s);
+ runtime_unlock(h);
+ n += runtime_MSpan_Sweep(s);
+ runtime_lock(h);
+ if(n >= npages)
+ return n;
+ // the span could have been moved elsewhere
+ goto retry;
+ }
+ if(s->sweepgen == sg-1) {
+ // the span is being sweept by background sweeper, skip
+ continue;
+ }
+ // already swept empty span,
+ // all subsequent ones must also be either swept or in process of sweeping
+ break;
+ }
+ return n;
+}
+
+// Sweeps and reclaims at least npage pages into heap.
+// Called before allocating npage pages.
+static void
+MHeap_Reclaim(MHeap *h, uintptr npage)
+{
+ uintptr reclaimed, n;
+
+ // First try to sweep busy spans with large objects of size >= npage,
+ // this has good chances of reclaiming the necessary space.
+ for(n=npage; n < nelem(h->busy); n++) {
+ if(MHeap_ReclaimList(h, &h->busy[n], npage))
+ return; // Bingo!
+ }
+
+ // Then -- even larger objects.
+ if(MHeap_ReclaimList(h, &h->busylarge, npage))
+ return; // Bingo!
+
+ // Now try smaller objects.
+ // One such object is not enough, so we need to reclaim several of them.
+ reclaimed = 0;
+ for(n=0; n < npage && n < nelem(h->busy); n++) {
+ reclaimed += MHeap_ReclaimList(h, &h->busy[n], npage-reclaimed);
+ if(reclaimed >= npage)
+ return;
+ }
+
+ // Now sweep everything that is not yet swept.
+ runtime_unlock(h);
+ for(;;) {
+ n = runtime_sweepone();
+ if(n == (uintptr)-1) // all spans are swept
+ break;
+ reclaimed += n;
+ if(reclaimed >= npage)
+ break;
+ }
+ runtime_lock(h);
+}
+
// Allocate a new span of npage pages from the heap
// and record its size class in the HeapMap and HeapMapCache.
MSpan*
-runtime_MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, int32 acct, int32 zeroed)
+runtime_MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, bool large, bool needzero)
{
MSpan *s;
@@ -97,14 +181,22 @@ runtime_MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, int32 acct, int32
s = MHeap_AllocLocked(h, npage, sizeclass);
if(s != nil) {
mstats.heap_inuse += npage<<PageShift;
- if(acct) {
+ if(large) {
mstats.heap_objects++;
mstats.heap_alloc += npage<<PageShift;
+ // Swept spans are at the end of lists.
+ if(s->npages < nelem(h->free))
+ runtime_MSpanList_InsertBack(&h->busy[s->npages], s);
+ else
+ runtime_MSpanList_InsertBack(&h->busylarge, s);
}
}
runtime_unlock(h);
- if(s != nil && *(uintptr*)(s->start<<PageShift) != 0 && zeroed)
- runtime_memclr((byte*)(s->start<<PageShift), s->npages<<PageShift);
+ if(s != nil) {
+ if(needzero && s->needzero)
+ runtime_memclr((byte*)(s->start<<PageShift), s->npages<<PageShift);
+ s->needzero = 0;
+ }
return s;
}
@@ -115,6 +207,11 @@ MHeap_AllocLocked(MHeap *h, uintptr npage, int32 sizeclass)
MSpan *s, *t;
PageID p;
+ // To prevent excessive heap growth, before allocating n pages
+ // we need to sweep and reclaim at least n pages.
+ if(!h->sweepdone)
+ MHeap_Reclaim(h, npage);
+
// Try in fixed-size lists up to max.
for(n=npage; n < nelem(h->free); n++) {
if(!runtime_MSpanList_IsEmpty(&h->free[n])) {
@@ -138,29 +235,12 @@ HaveSpan:
if(s->npages < npage)
runtime_throw("MHeap_AllocLocked - bad npages");
runtime_MSpanList_Remove(s);
+ runtime_atomicstore(&s->sweepgen, h->sweepgen);
s->state = MSpanInUse;
mstats.heap_idle -= s->npages<<PageShift;
mstats.heap_released -= s->npreleased<<PageShift;
- if(s->npreleased > 0) {
- // We have called runtime_SysUnused with these pages, and on
- // Unix systems it called madvise. At this point at least
- // some BSD-based kernels will return these pages either as
- // zeros or with the old data. For our caller, the first word
- // in the page indicates whether the span contains zeros or
- // not (this word was set when the span was freed by
- // MCentral_Free or runtime_MCentral_FreeSpan). If the first
- // page in the span is returned as zeros, and some subsequent
- // page is returned with the old data, then we will be
- // returning a span that is assumed to be all zeros, but the
- // actual data will not be all zeros. Avoid that problem by
- // explicitly marking the span as not being zeroed, just in
- // case. The beadbead constant we use here means nothing, it
- // is just a unique constant not seen elsewhere in the
- // runtime, as a clue in case it turns up unexpectedly in
- // memory or in a stack trace.
+ if(s->npreleased > 0)
runtime_SysUsed((void*)(s->start<<PageShift), s->npages<<PageShift);
- *(uintptr*)(s->start<<PageShift) = (uintptr)0xbeadbeadbeadbeadULL;
- }
s->npreleased = 0;
if(s->npages > npage) {
@@ -174,7 +254,8 @@ HaveSpan:
h->spans[p-1] = s;
h->spans[p] = t;
h->spans[p+t->npages-1] = t;
- *(uintptr*)(t->start<<PageShift) = *(uintptr*)(s->start<<PageShift); // copy "needs zeroing" mark
+ t->needzero = s->needzero;
+ runtime_atomicstore(&t->sweepgen, h->sweepgen);
t->state = MSpanInUse;
MHeap_FreeLocked(h, t);
t->unusedsince = s->unusedsince; // preserve age
@@ -197,7 +278,7 @@ HaveSpan:
static MSpan*
MHeap_AllocLarge(MHeap *h, uintptr npage)
{
- return BestFit(&h->large, npage, nil);
+ return BestFit(&h->freelarge, npage, nil);
}
// Search list for smallest span with >= npage pages.
@@ -258,6 +339,7 @@ MHeap_Grow(MHeap *h, uintptr npage)
p -= ((uintptr)h->arena_start>>PageShift);
h->spans[p] = s;
h->spans[p + s->npages - 1] = s;
+ runtime_atomicstore(&s->sweepgen, h->sweepgen);
s->state = MSpanInUse;
MHeap_FreeLocked(h, s);
return true;
@@ -319,20 +401,19 @@ runtime_MHeap_Free(MHeap *h, MSpan *s, int32 acct)
static void
MHeap_FreeLocked(MHeap *h, MSpan *s)
{
- uintptr *sp, *tp;
MSpan *t;
PageID p;
s->types.compression = MTypes_Empty;
- if(s->state != MSpanInUse || s->ref != 0) {
- runtime_printf("MHeap_FreeLocked - span %p ptr %p state %d ref %d\n", s, s->start<<PageShift, s->state, s->ref);
+ if(s->state != MSpanInUse || s->ref != 0 || s->sweepgen != h->sweepgen) {
+ runtime_printf("MHeap_FreeLocked - span %p ptr %p state %d ref %d sweepgen %d/%d\n",
+ s, s->start<<PageShift, s->state, s->ref, s->sweepgen, h->sweepgen);
runtime_throw("MHeap_FreeLocked - invalid free");
}
mstats.heap_idle += s->npages<<PageShift;
s->state = MSpanFree;
runtime_MSpanList_Remove(s);
- sp = (uintptr*)(s->start<<PageShift);
// Stamp newly unused spans. The scavenger will use that
// info to potentially give back some pages to the OS.
s->unusedsince = runtime_nanotime();
@@ -342,13 +423,10 @@ MHeap_FreeLocked(MHeap *h, MSpan *s)
p = s->start;
p -= (uintptr)h->arena_start >> PageShift;
if(p > 0 && (t = h->spans[p-1]) != nil && t->state != MSpanInUse) {
- if(t->npreleased == 0) { // cant't touch this otherwise
- tp = (uintptr*)(t->start<<PageShift);
- *tp |= *sp; // propagate "needs zeroing" mark
- }
s->start = t->start;
s->npages += t->npages;
s->npreleased = t->npreleased; // absorb released pages
+ s->needzero |= t->needzero;
p -= t->npages;
h->spans[p] = s;
runtime_MSpanList_Remove(t);
@@ -356,12 +434,9 @@ MHeap_FreeLocked(MHeap *h, MSpan *s)
runtime_FixAlloc_Free(&h->spanalloc, t);
}
if((p+s->npages)*sizeof(h->spans[0]) < h->spans_mapped && (t = h->spans[p+s->npages]) != nil && t->state != MSpanInUse) {
- if(t->npreleased == 0) { // cant't touch this otherwise
- tp = (uintptr*)(t->start<<PageShift);
- *sp |= *tp; // propagate "needs zeroing" mark
- }
s->npages += t->npages;
s->npreleased += t->npreleased;
+ s->needzero |= t->needzero;
h->spans[p + s->npages - 1] = s;
runtime_MSpanList_Remove(t);
t->state = MSpanDead;
@@ -372,7 +447,7 @@ MHeap_FreeLocked(MHeap *h, MSpan *s)
if(s->npages < nelem(h->free))
runtime_MSpanList_Insert(&h->free[s->npages], s);
else
- runtime_MSpanList_Insert(&h->large, s);
+ runtime_MSpanList_Insert(&h->freelarge, s);
}
static void
@@ -427,7 +502,7 @@ scavenge(int32 k, uint64 now, uint64 limit)
sumreleased = 0;
for(i=0; i < nelem(h->free); i++)
sumreleased += scavengelist(&h->free[i], now, limit);
- sumreleased += scavengelist(&h->large, now, limit);
+ sumreleased += scavengelist(&h->freelarge, now, limit);
if(runtime_debug.gctrace > 0) {
if(sumreleased > 0)
@@ -516,10 +591,13 @@ runtime_MSpan_Init(MSpan *span, PageID start, uintptr npages)
span->ref = 0;
span->sizeclass = 0;
span->elemsize = 0;
- span->state = 0;
+ span->state = MSpanDead;
span->unusedsince = 0;
span->npreleased = 0;
span->types.compression = MTypes_Empty;
+ span->specialLock.key = 0;
+ span->specials = nil;
+ span->needzero = 0;
}
// Initialize an empty doubly-linked list.
@@ -561,4 +639,212 @@ runtime_MSpanList_Insert(MSpan *list, MSpan *span)
span->prev->next = span;
}
+void
+runtime_MSpanList_InsertBack(MSpan *list, MSpan *span)
+{
+ if(span->next != nil || span->prev != nil) {
+ runtime_printf("failed MSpanList_Insert %p %p %p\n", span, span->next, span->prev);
+ runtime_throw("MSpanList_Insert");
+ }
+ span->next = list;
+ span->prev = list->prev;
+ span->next->prev = span;
+ span->prev->next = span;
+}
+
+// Adds the special record s to the list of special records for
+// the object p. All fields of s should be filled in except for
+// offset & next, which this routine will fill in.
+// Returns true if the special was successfully added, false otherwise.
+// (The add will fail only if a record with the same p and s->kind
+// already exists.)
+static bool
+addspecial(void *p, Special *s)
+{
+ MSpan *span;
+ Special **t, *x;
+ uintptr offset;
+ byte kind;
+
+ span = runtime_MHeap_LookupMaybe(&runtime_mheap, p);
+ if(span == nil)
+ runtime_throw("addspecial on invalid pointer");
+
+ // Ensure that the span is swept.
+ // GC accesses specials list w/o locks. And it's just much safer.
+ runtime_MSpan_EnsureSwept(span);
+
+ offset = (uintptr)p - (span->start << PageShift);
+ kind = s->kind;
+
+ runtime_lock(&span->specialLock);
+
+ // Find splice point, check for existing record.
+ t = &span->specials;
+ while((x = *t) != nil) {
+ if(offset == x->offset && kind == x->kind) {
+ runtime_unlock(&span->specialLock);
+ return false; // already exists
+ }
+ if(offset < x->offset || (offset == x->offset && kind < x->kind))
+ break;
+ t = &x->next;
+ }
+ // Splice in record, fill in offset.
+ s->offset = offset;
+ s->next = x;
+ *t = s;
+ runtime_unlock(&span->specialLock);
+ return true;
+}
+// Removes the Special record of the given kind for the object p.
+// Returns the record if the record existed, nil otherwise.
+// The caller must FixAlloc_Free the result.
+static Special*
+removespecial(void *p, byte kind)
+{
+ MSpan *span;
+ Special *s, **t;
+ uintptr offset;
+
+ span = runtime_MHeap_LookupMaybe(&runtime_mheap, p);
+ if(span == nil)
+ runtime_throw("removespecial on invalid pointer");
+
+ // Ensure that the span is swept.
+ // GC accesses specials list w/o locks. And it's just much safer.
+ runtime_MSpan_EnsureSwept(span);
+
+ offset = (uintptr)p - (span->start << PageShift);
+
+ runtime_lock(&span->specialLock);
+ t = &span->specials;
+ while((s = *t) != nil) {
+ // This function is used for finalizers only, so we don't check for
+ // "interior" specials (p must be exactly equal to s->offset).
+ if(offset == s->offset && kind == s->kind) {
+ *t = s->next;
+ runtime_unlock(&span->specialLock);
+ return s;
+ }
+ t = &s->next;
+ }
+ runtime_unlock(&span->specialLock);
+ return nil;
+}
+
+// Adds a finalizer to the object p. Returns true if it succeeded.
+bool
+runtime_addfinalizer(void *p, FuncVal *f, const FuncType *ft, const PtrType *ot)
+{
+ SpecialFinalizer *s;
+
+ runtime_lock(&runtime_mheap.speciallock);
+ s = runtime_FixAlloc_Alloc(&runtime_mheap.specialfinalizeralloc);
+ runtime_unlock(&runtime_mheap.speciallock);
+ s->kind = KindSpecialFinalizer;
+ s->fn = f;
+ s->ft = ft;
+ s->ot = ot;
+ if(addspecial(p, s))
+ return true;
+
+ // There was an old finalizer
+ runtime_lock(&runtime_mheap.speciallock);
+ runtime_FixAlloc_Free(&runtime_mheap.specialfinalizeralloc, s);
+ runtime_unlock(&runtime_mheap.speciallock);
+ return false;
+}
+
+// Removes the finalizer (if any) from the object p.
+void
+runtime_removefinalizer(void *p)
+{
+ SpecialFinalizer *s;
+
+ s = (SpecialFinalizer*)removespecial(p, KindSpecialFinalizer);
+ if(s == nil)
+ return; // there wasn't a finalizer to remove
+ runtime_lock(&runtime_mheap.speciallock);
+ runtime_FixAlloc_Free(&runtime_mheap.specialfinalizeralloc, s);
+ runtime_unlock(&runtime_mheap.speciallock);
+}
+
+// Set the heap profile bucket associated with addr to b.
+void
+runtime_setprofilebucket(void *p, Bucket *b)
+{
+ SpecialProfile *s;
+
+ runtime_lock(&runtime_mheap.speciallock);
+ s = runtime_FixAlloc_Alloc(&runtime_mheap.specialprofilealloc);
+ runtime_unlock(&runtime_mheap.speciallock);
+ s->kind = KindSpecialProfile;
+ s->b = b;
+ if(!addspecial(p, s))
+ runtime_throw("setprofilebucket: profile already set");
+}
+
+// Do whatever cleanup needs to be done to deallocate s. It has
+// already been unlinked from the MSpan specials list.
+// Returns true if we should keep working on deallocating p.
+bool
+runtime_freespecial(Special *s, void *p, uintptr size, bool freed)
+{
+ SpecialFinalizer *sf;
+ SpecialProfile *sp;
+
+ switch(s->kind) {
+ case KindSpecialFinalizer:
+ sf = (SpecialFinalizer*)s;
+ runtime_queuefinalizer(p, sf->fn, sf->ft, sf->ot);
+ runtime_lock(&runtime_mheap.speciallock);
+ runtime_FixAlloc_Free(&runtime_mheap.specialfinalizeralloc, sf);
+ runtime_unlock(&runtime_mheap.speciallock);
+ return false; // don't free p until finalizer is done
+ case KindSpecialProfile:
+ sp = (SpecialProfile*)s;
+ runtime_MProf_Free(sp->b, p, size, freed);
+ runtime_lock(&runtime_mheap.speciallock);
+ runtime_FixAlloc_Free(&runtime_mheap.specialprofilealloc, sp);
+ runtime_unlock(&runtime_mheap.speciallock);
+ return true;
+ default:
+ runtime_throw("bad special kind");
+ return true;
+ }
+}
+
+// Free all special records for p.
+void
+runtime_freeallspecials(MSpan *span, void *p, uintptr size)
+{
+ Special *s, **t, *list;
+ uintptr offset;
+
+ // first, collect all specials into the list; then, free them
+ // this is required to not cause deadlock between span->specialLock and proflock
+ list = nil;
+ offset = (uintptr)p - (span->start << PageShift);
+ runtime_lock(&span->specialLock);
+ t = &span->specials;
+ while((s = *t) != nil) {
+ if(offset + size <= s->offset)
+ break;
+ if(offset <= s->offset) {
+ *t = s->next;
+ s->next = list;
+ list = s;
+ } else
+ t = &s->next;
+ }
+ runtime_unlock(&span->specialLock);
+
+ while(list != nil) {
+ s = list;
+ list = s->next;
+ if(!runtime_freespecial(s, p, size, true))
+ runtime_throw("can't explicitly free an object with a finalizer");
+ }
+}
diff --git a/libgo/runtime/mprof.goc b/libgo/runtime/mprof.goc
index 469ddfe..24f8fe5 100644
--- a/libgo/runtime/mprof.goc
+++ b/libgo/runtime/mprof.goc
@@ -23,7 +23,6 @@ enum { MProf, BProf }; // profile types
// Per-call-stack profiling information.
// Lookup by hashing call stack into a linked-list hash table.
-typedef struct Bucket Bucket;
struct Bucket
{
Bucket *next; // next in hash list
@@ -35,14 +34,33 @@ struct Bucket
{
struct // typ == MProf
{
+ // The following complex 3-stage scheme of stats accumulation
+ // is required to obtain a consistent picture of mallocs and frees
+ // for some point in time.
+ // The problem is that mallocs come in real time, while frees
+ // come only after a GC during concurrent sweeping. So if we would
+ // naively count them, we would get a skew toward mallocs.
+ //
+ // Mallocs are accounted in recent stats.
+ // Explicit frees are accounted in recent stats.
+ // GC frees are accounted in prev stats.
+ // After GC prev stats are added to final stats and
+ // recent stats are moved into prev stats.
uintptr allocs;
uintptr frees;
uintptr alloc_bytes;
uintptr free_bytes;
- uintptr recent_allocs; // since last gc
+
+ uintptr prev_allocs; // since last but one till last gc
+ uintptr prev_frees;
+ uintptr prev_alloc_bytes;
+ uintptr prev_free_bytes;
+
+ uintptr recent_allocs; // since last gc till now
uintptr recent_frees;
uintptr recent_alloc_bytes;
uintptr recent_free_bytes;
+
};
struct // typ == BProf
{
@@ -50,7 +68,8 @@ struct Bucket
int64 cycles;
};
};
- uintptr hash;
+ uintptr hash; // hash of size + stk
+ uintptr size;
uintptr nstk;
Location stk[1];
};
@@ -64,7 +83,7 @@ static uintptr bucketmem;
// Return the bucket for stk[0:nstk], allocating new bucket if needed.
static Bucket*
-stkbucket(int32 typ, Location *stk, int32 nstk, bool alloc)
+stkbucket(int32 typ, uintptr size, Location *stk, int32 nstk, bool alloc)
{
int32 i, j;
uintptr h;
@@ -83,12 +102,17 @@ stkbucket(int32 typ, Location *stk, int32 nstk, bool alloc)
h += h<<10;
h ^= h>>6;
}
+ // hash in size
+ h += size;
+ h += h<<10;
+ h ^= h>>6;
+ // finalize
h += h<<3;
h ^= h>>11;
i = h%BuckHashSize;
for(b = buckhash[i]; b; b=b->next) {
- if(b->typ == typ && b->hash == h && b->nstk == (uintptr)nstk) {
+ if(b->typ == typ && b->hash == h && b->size == size && b->nstk == (uintptr)nstk) {
for(j = 0; j < nstk; j++) {
if(b->stk[j].pc != stk[j].pc ||
b->stk[j].lineno != stk[j].lineno ||
@@ -108,6 +132,7 @@ stkbucket(int32 typ, Location *stk, int32 nstk, bool alloc)
runtime_memmove(b->stk, stk, nstk*sizeof stk[0]);
b->typ = typ;
b->hash = h;
+ b->size = size;
b->nstk = nstk;
b->next = buckhash[i];
buckhash[i] = b;
@@ -127,10 +152,16 @@ MProf_GC(void)
Bucket *b;
for(b=mbuckets; b; b=b->allnext) {
- b->allocs += b->recent_allocs;
- b->frees += b->recent_frees;
- b->alloc_bytes += b->recent_alloc_bytes;
- b->free_bytes += b->recent_free_bytes;
+ b->allocs += b->prev_allocs;
+ b->frees += b->prev_frees;
+ b->alloc_bytes += b->prev_alloc_bytes;
+ b->free_bytes += b->prev_free_bytes;
+
+ b->prev_allocs = b->recent_allocs;
+ b->prev_frees = b->recent_frees;
+ b->prev_alloc_bytes = b->recent_alloc_bytes;
+ b->prev_free_bytes = b->recent_free_bytes;
+
b->recent_allocs = 0;
b->recent_frees = 0;
b->recent_alloc_bytes = 0;
@@ -147,115 +178,6 @@ runtime_MProf_GC(void)
runtime_unlock(&proflock);
}
-// Map from pointer to Bucket* that allocated it.
-// Three levels:
-// Linked-list hash table for top N-AddrHashShift bits.
-// Array index for next AddrDenseBits bits.
-// Linked list for next AddrHashShift-AddrDenseBits bits.
-// This is more efficient than using a general map,
-// because of the typical clustering of the pointer keys.
-
-typedef struct AddrHash AddrHash;
-typedef struct AddrEntry AddrEntry;
-
-enum {
- AddrHashBits = 12, // good for 4GB of used address space
- AddrHashShift = 20, // each AddrHash knows about 1MB of address space
- AddrDenseBits = 8, // good for a profiling rate of 4096 bytes
-};
-
-struct AddrHash
-{
- AddrHash *next; // next in top-level hash table linked list
- uintptr addr; // addr>>20
- AddrEntry *dense[1<<AddrDenseBits];
-};
-
-struct AddrEntry
-{
- AddrEntry *next; // next in bottom-level linked list
- uint32 addr;
- Bucket *b;
-};
-
-static AddrHash **addrhash; // points to (AddrHash*)[1<<AddrHashBits]
-static AddrEntry *addrfree;
-static uintptr addrmem;
-
-// Multiplicative hash function:
-// hashMultiplier is the bottom 32 bits of int((sqrt(5)-1)/2 * (1<<32)).
-// This is a good multiplier as suggested in CLR, Knuth. The hash
-// value is taken to be the top AddrHashBits bits of the bottom 32 bits
-// of the multiplied value.
-enum {
- HashMultiplier = 2654435769U
-};
-
-// Set the bucket associated with addr to b.
-static void
-setaddrbucket(uintptr addr, Bucket *b)
-{
- int32 i;
- uint32 h;
- AddrHash *ah;
- AddrEntry *e;
-
- h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits);
- for(ah=addrhash[h]; ah; ah=ah->next)
- if(ah->addr == (addr>>AddrHashShift))
- goto found;
-
- ah = runtime_persistentalloc(sizeof *ah, 0, &mstats.buckhash_sys);
- addrmem += sizeof *ah;
- ah->next = addrhash[h];
- ah->addr = addr>>AddrHashShift;
- addrhash[h] = ah;
-
-found:
- if((e = addrfree) == nil) {
- e = runtime_persistentalloc(64*sizeof *e, 0, &mstats.buckhash_sys);
- addrmem += 64*sizeof *e;
- for(i=0; i+1<64; i++)
- e[i].next = &e[i+1];
- e[63].next = nil;
- }
- addrfree = e->next;
- e->addr = (uint32)~(addr & ((1<<AddrHashShift)-1));
- e->b = b;
- h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20.
- e->next = ah->dense[h];
- ah->dense[h] = e;
-}
-
-// Get the bucket associated with addr and clear the association.
-static Bucket*
-getaddrbucket(uintptr addr)
-{
- uint32 h;
- AddrHash *ah;
- AddrEntry *e, **l;
- Bucket *b;
-
- h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits);
- for(ah=addrhash[h]; ah; ah=ah->next)
- if(ah->addr == (addr>>AddrHashShift))
- goto found;
- return nil;
-
-found:
- h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20.
- for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) {
- if(e->addr == (uint32)~(addr & ((1<<AddrHashShift)-1))) {
- *l = e->next;
- b = e->b;
- e->next = addrfree;
- addrfree = e;
- return b;
- }
- }
- return nil;
-}
-
static const char*
typeinfoname(int32 typeinfo)
{
@@ -285,6 +207,18 @@ printstackframes(Location *stk, int32 nstk)
}
}
+// Called by collector to report a gc in allocfreetrace mode.
+void
+runtime_MProf_TraceGC(void)
+{
+ Location stk[32];
+ int32 nstk;
+
+ nstk = runtime_callers(1, stk, nelem(stk));
+ runtime_printf("MProf_TraceGC\n");
+ printstackframes(stk, nstk);
+}
+
// Called by malloc to record a profiled block.
void
runtime_MProf_Malloc(void *p, uintptr size, uintptr typ)
@@ -295,39 +229,44 @@ runtime_MProf_Malloc(void *p, uintptr size, uintptr typ)
const char *name;
int32 nstk;
- nstk = runtime_callers(1, stk, 32);
+ nstk = runtime_callers(1, stk, nelem(stk));
runtime_lock(&proflock);
- if(runtime_debug.allocfreetrace) {
+ if(runtime_debug.allocfreetrace) {
type = (Type*)(typ & ~3);
name = typeinfoname(typ & 3);
runtime_printf("MProf_Malloc(p=%p, size=%p, type=%p <%s", p, size, type, name);
if(type != nil)
- runtime_printf(" of %S", *type->__reflection);
+ runtime_printf(" of %S", *type->__reflection);
runtime_printf(">)\n");
printstackframes(stk, nstk);
}
- b = stkbucket(MProf, stk, nstk, true);
+ b = stkbucket(MProf, size, stk, nstk, true);
b->recent_allocs++;
b->recent_alloc_bytes += size;
- setaddrbucket((uintptr)p, b);
runtime_unlock(&proflock);
+
+ // Setprofilebucket locks a bunch of other mutexes, so we call it outside of proflock.
+ // This reduces potential contention and chances of deadlocks.
+ // Since the object must be alive during call to MProf_Malloc,
+ // it's fine to do this non-atomically.
+ runtime_setprofilebucket(p, b);
}
// Called when freeing a profiled block.
void
-runtime_MProf_Free(void *p, uintptr size)
+runtime_MProf_Free(Bucket *b, void *p, uintptr size, bool freed)
{
- Bucket *b;
-
runtime_lock(&proflock);
- b = getaddrbucket((uintptr)p);
- if(b != nil) {
+ if(freed) {
b->recent_frees++;
b->recent_free_bytes += size;
- if(runtime_debug.allocfreetrace) {
- runtime_printf("MProf_Free(p=%p, size=%p)\n", p, size);
- printstackframes(b->stk, b->nstk);
- }
+ } else {
+ b->prev_frees++;
+ b->prev_free_bytes += size;
+ }
+ if(runtime_debug.allocfreetrace) {
+ runtime_printf("MProf_Free(p=%p, size=%p)\n", p, size);
+ printstackframes(b->stk, b->nstk);
}
runtime_unlock(&proflock);
}
@@ -366,9 +305,9 @@ runtime_blockevent(int64 cycles, int32 skip)
if(rate <= 0 || (rate > cycles && runtime_fastrand1()%rate > cycles))
return;
- nstk = runtime_callers(skip, stk, 32);
+ nstk = runtime_callers(skip, stk, nelem(stk));
runtime_lock(&proflock);
- b = stkbucket(BProf, stk, nstk, true);
+ b = stkbucket(BProf, 0, stk, nstk, true);
b->count++;
b->cycles += cycles;
runtime_unlock(&proflock);
@@ -420,6 +359,7 @@ func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) {
// garbage collection is disabled from the beginning of execution,
// accumulate stats as if a GC just happened, and recount buckets.
MProf_GC();
+ MProf_GC();
n = 0;
for(b=mbuckets; b; b=b->allnext)
if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
@@ -437,13 +377,11 @@ func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) {
}
void
-runtime_MProf_Mark(void (*addroot)(Obj))
+runtime_MProf_Mark(struct Workbuf **wbufp, void (*enqueue1)(struct Workbuf**, Obj))
{
// buckhash is not allocated via mallocgc.
- addroot((Obj){(byte*)&mbuckets, sizeof mbuckets, 0});
- addroot((Obj){(byte*)&bbuckets, sizeof bbuckets, 0});
- addroot((Obj){(byte*)&addrhash, sizeof addrhash, 0});
- addroot((Obj){(byte*)&addrfree, sizeof addrfree, 0});
+ enqueue1(wbufp, (Obj){(byte*)&mbuckets, sizeof mbuckets, 0});
+ enqueue1(wbufp, (Obj){(byte*)&bbuckets, sizeof bbuckets, 0});
}
// Must match BlockProfileRecord in debug.go.
@@ -568,6 +506,7 @@ saveg(G *gp, TRecord *r)
}
func GoroutineProfile(b Slice) (n int, ok bool) {
+ uintptr i;
TRecord *r;
G *gp;
@@ -584,7 +523,8 @@ func GoroutineProfile(b Slice) (n int, ok bool) {
ok = true;
r = (TRecord*)b.__values;
saveg(g, r++);
- for(gp = runtime_allg; gp != nil; gp = gp->alllink) {
+ for(i = 0; i < runtime_allglen; i++) {
+ gp = runtime_allg[i];
if(gp == g || gp->status == Gdead)
continue;
saveg(gp, r++);
@@ -596,9 +536,3 @@ func GoroutineProfile(b Slice) (n int, ok bool) {
runtime_starttheworld();
}
}
-
-void
-runtime_mprofinit(void)
-{
- addrhash = runtime_persistentalloc((1<<AddrHashBits)*sizeof *addrhash, 0, &mstats.buckhash_sys);
-}
diff --git a/libgo/runtime/msize.c b/libgo/runtime/msize.c
index 745a769..34509d0 100644
--- a/libgo/runtime/msize.c
+++ b/libgo/runtime/msize.c
@@ -44,8 +44,8 @@ int32 runtime_class_to_allocnpages[NumSizeClasses];
int8 runtime_size_to_class8[1024/8 + 1];
int8 runtime_size_to_class128[(MaxSmallSize-1024)/128 + 1];
-static int32
-SizeToClass(int32 size)
+int32
+runtime_SizeToClass(int32 size)
{
if(size > MaxSmallSize)
runtime_throw("SizeToClass - invalid size");
@@ -90,9 +90,9 @@ runtime_InitSizes(void)
// objects into the page, we might as well
// use just this size instead of having two
// different sizes.
- if(sizeclass > 1
- && (int32)npages == runtime_class_to_allocnpages[sizeclass-1]
- && allocsize/size == allocsize/runtime_class_to_size[sizeclass-1]) {
+ if(sizeclass > 1 &&
+ (int32)npages == runtime_class_to_allocnpages[sizeclass-1] &&
+ allocsize/size == allocsize/runtime_class_to_size[sizeclass-1]) {
runtime_class_to_size[sizeclass-1] = size;
continue;
}
@@ -119,7 +119,7 @@ runtime_InitSizes(void)
// Double-check SizeToClass.
if(0) {
for(n=0; n < MaxSmallSize; n++) {
- sizeclass = SizeToClass(n);
+ sizeclass = runtime_SizeToClass(n);
if(sizeclass < 1 || sizeclass >= NumSizeClasses || runtime_class_to_size[sizeclass] < n) {
runtime_printf("size=%d sizeclass=%d runtime_class_to_size=%d\n", n, sizeclass, runtime_class_to_size[sizeclass]);
runtime_printf("incorrect SizeToClass");
@@ -158,3 +158,18 @@ dump:
}
runtime_throw("InitSizes failed");
}
+
+// Returns size of the memory block that mallocgc will allocate if you ask for the size.
+uintptr
+runtime_roundupsize(uintptr size)
+{
+ if(size < MaxSmallSize) {
+ if(size <= 1024-8)
+ return runtime_class_to_size[runtime_size_to_class8[(size+7)>>3]];
+ else
+ return runtime_class_to_size[runtime_size_to_class128[(size-1024+127) >> 7]];
+ }
+ if(size + PageSize < size)
+ return size;
+ return ROUND(size, PageSize);
+}
diff --git a/libgo/runtime/netpoll.goc b/libgo/runtime/netpoll.goc
index 0270573..15dd58c 100644
--- a/libgo/runtime/netpoll.goc
+++ b/libgo/runtime/netpoll.goc
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build darwin dragonfly freebsd linux netbsd openbsd windows
+// +build darwin dragonfly freebsd linux netbsd openbsd solaris windows
package net
@@ -24,21 +24,45 @@ package net
// An implementation must call the following function to denote that the pd is ready.
// void runtime_netpollready(G **gpp, PollDesc *pd, int32 mode);
+// PollDesc contains 2 binary semaphores, rg and wg, to park reader and writer
+// goroutines respectively. The semaphore can be in the following states:
+// READY - io readiness notification is pending;
+// a goroutine consumes the notification by changing the state to nil.
+// WAIT - a goroutine prepares to park on the semaphore, but not yet parked;
+// the goroutine commits to park by changing the state to G pointer,
+// or, alternatively, concurrent io notification changes the state to READY,
+// or, alternatively, concurrent timeout/close changes the state to nil.
+// G pointer - the goroutine is blocked on the semaphore;
+// io notification or timeout/close changes the state to READY or nil respectively
+// and unparks the goroutine.
+// nil - nothing of the above.
#define READY ((G*)1)
+#define WAIT ((G*)2)
+
+enum
+{
+ PollBlockSize = 4*1024,
+};
struct PollDesc
{
PollDesc* link; // in pollcache, protected by pollcache.Lock
+
+ // The lock protects pollOpen, pollSetDeadline, pollUnblock and deadlineimpl operations.
+ // This fully covers seq, rt and wt variables. fd is constant throughout the PollDesc lifetime.
+ // pollReset, pollWait, pollWaitCanceled and runtime_netpollready (IO rediness notification)
+ // proceed w/o taking the lock. So closing, rg, rd, wg and wd are manipulated
+ // in a lock-free way by all operations.
Lock; // protectes the following fields
uintptr fd;
bool closing;
uintptr seq; // protects from stale timers and ready notifications
- G* rg; // G waiting for read or READY (binary semaphore)
+ G* rg; // READY, WAIT, G waiting for read or nil
Timer rt; // read deadline timer (set if rt.fv != nil)
int64 rd; // read deadline
- G* wg; // the same for writes
- Timer wt;
- int64 wd;
+ G* wg; // READY, WAIT, G waiting for write or nil
+ Timer wt; // write deadline timer
+ int64 wd; // write deadline
};
static struct
@@ -52,7 +76,7 @@ static struct
// seq is incremented when deadlines are changed or descriptor is reused.
} pollcache;
-static bool netpollblock(PollDesc*, int32);
+static bool netpollblock(PollDesc*, int32, bool);
static G* netpollunblock(PollDesc*, int32, bool);
static void deadline(int64, Eface);
static void readDeadline(int64, Eface);
@@ -102,7 +126,6 @@ func runtime_pollClose(pd *PollDesc) {
}
func runtime_pollReset(pd *PollDesc, mode int) (err int) {
- runtime_lock(pd);
err = checkerr(pd, mode);
if(err)
goto ret;
@@ -111,14 +134,15 @@ func runtime_pollReset(pd *PollDesc, mode int) (err int) {
else if(mode == 'w')
pd->wg = nil;
ret:
- runtime_unlock(pd);
}
func runtime_pollWait(pd *PollDesc, mode int) (err int) {
- runtime_lock(pd);
err = checkerr(pd, mode);
if(err == 0) {
- while(!netpollblock(pd, mode)) {
+ // As for now only Solaris uses level-triggered IO.
+ if(Solaris)
+ runtime_netpollarm(pd->fd, mode);
+ while(!netpollblock(pd, mode, false)) {
err = checkerr(pd, mode);
if(err != 0)
break;
@@ -127,15 +151,13 @@ func runtime_pollWait(pd *PollDesc, mode int) (err int) {
// Pretend it has not happened and retry.
}
}
- runtime_unlock(pd);
}
func runtime_pollWaitCanceled(pd *PollDesc, mode int) {
- runtime_lock(pd);
- // wait for ioready, ignore closing or timeouts.
- while(!netpollblock(pd, mode))
+ // This function is used only on windows after a failed attempt to cancel
+ // a pending async IO operation. Wait for ioready, ignore closing or timeouts.
+ while(!netpollblock(pd, mode, true))
;
- runtime_unlock(pd);
}
func runtime_pollSetDeadline(pd *PollDesc, d int64, mode int) {
@@ -190,7 +212,7 @@ func runtime_pollSetDeadline(pd *PollDesc, d int64, mode int) {
}
// If we set the new deadline in the past, unblock currently pending IO if any.
rg = nil;
- wg = nil;
+ runtime_atomicstorep(&wg, nil); // full memory barrier between stores to rd/wd and load of rg/wg in netpollunblock
if(pd->rd < 0)
rg = netpollunblock(pd, 'r', false);
if(pd->wd < 0)
@@ -210,6 +232,7 @@ func runtime_pollUnblock(pd *PollDesc) {
runtime_throw("runtime_pollUnblock: already closing");
pd->closing = true;
pd->seq++;
+ runtime_atomicstorep(&rg, nil); // full memory barrier between store to closing and read of rg/wg in netpollunblock
rg = netpollunblock(pd, 'r', false);
wg = netpollunblock(pd, 'w', false);
if(pd->rt.fv) {
@@ -240,12 +263,10 @@ runtime_netpollready(G **gpp, PollDesc *pd, int32 mode)
G *rg, *wg;
rg = wg = nil;
- runtime_lock(pd);
if(mode == 'r' || mode == 'r'+'w')
rg = netpollunblock(pd, 'r', true);
if(mode == 'w' || mode == 'r'+'w')
wg = netpollunblock(pd, 'w', true);
- runtime_unlock(pd);
if(rg) {
rg->schedlink = *gpp;
*gpp = rg;
@@ -266,51 +287,75 @@ checkerr(PollDesc *pd, int32 mode)
return 0;
}
+static bool
+blockcommit(G *gp, G **gpp)
+{
+ return runtime_casp(gpp, WAIT, gp);
+}
+
// returns true if IO is ready, or false if timedout or closed
+// waitio - wait only for completed IO, ignore errors
static bool
-netpollblock(PollDesc *pd, int32 mode)
+netpollblock(PollDesc *pd, int32 mode, bool waitio)
{
- G **gpp;
+ G **gpp, *old;
gpp = &pd->rg;
if(mode == 'w')
gpp = &pd->wg;
- if(*gpp == READY) {
- *gpp = nil;
- return true;
+
+ // set the gpp semaphore to WAIT
+ for(;;) {
+ old = *gpp;
+ if(old == READY) {
+ *gpp = nil;
+ return true;
+ }
+ if(old != nil)
+ runtime_throw("netpollblock: double wait");
+ if(runtime_casp(gpp, nil, WAIT))
+ break;
}
- if(*gpp != nil)
- runtime_throw("netpollblock: double wait");
- *gpp = runtime_g();
- runtime_park(runtime_unlock, &pd->Lock, "IO wait");
- runtime_lock(pd);
- if(runtime_g()->param)
- return true;
- return false;
+
+ // need to recheck error states after setting gpp to WAIT
+ // this is necessary because runtime_pollUnblock/runtime_pollSetDeadline/deadlineimpl
+ // do the opposite: store to closing/rd/wd, membarrier, load of rg/wg
+ if(waitio || checkerr(pd, mode) == 0)
+ runtime_park((bool(*)(G*, void*))blockcommit, gpp, "IO wait");
+ // be careful to not lose concurrent READY notification
+ old = runtime_xchgp(gpp, nil);
+ if(old > WAIT)
+ runtime_throw("netpollblock: corrupted state");
+ return old == READY;
}
static G*
netpollunblock(PollDesc *pd, int32 mode, bool ioready)
{
- G **gpp, *old;
+ G **gpp, *old, *new;
gpp = &pd->rg;
if(mode == 'w')
gpp = &pd->wg;
- if(*gpp == READY)
- return nil;
- if(*gpp == nil) {
- // Only set READY for ioready. runtime_pollWait
- // will check for timeout/cancel before waiting.
+
+ for(;;) {
+ old = *gpp;
+ if(old == READY)
+ return nil;
+ if(old == nil && !ioready) {
+ // Only set READY for ioready. runtime_pollWait
+ // will check for timeout/cancel before waiting.
+ return nil;
+ }
+ new = nil;
if(ioready)
- *gpp = READY;
- return nil;
+ new = READY;
+ if(runtime_casp(gpp, old, new))
+ break;
}
- old = *gpp;
- // pass unblock reason onto blocked g
- old->param = (void*)(uintptr)ioready;
- *gpp = nil;
- return old;
+ if(old > WAIT)
+ return old; // must be G*
+ return nil;
}
static void
@@ -336,14 +381,14 @@ deadlineimpl(int64 now, Eface arg, bool read, bool write)
if(pd->rd <= 0 || pd->rt.fv == nil)
runtime_throw("deadlineimpl: inconsistent read deadline");
pd->rd = -1;
- pd->rt.fv = nil;
+ runtime_atomicstorep(&pd->rt.fv, nil); // full memory barrier between store to rd and load of rg in netpollunblock
rg = netpollunblock(pd, 'r', false);
}
if(write) {
if(pd->wd <= 0 || (pd->wt.fv == nil && !read))
runtime_throw("deadlineimpl: inconsistent write deadline");
pd->wd = -1;
- pd->wt.fv = nil;
+ runtime_atomicstorep(&pd->wt.fv, nil); // full memory barrier between store to wd and load of wg in netpollunblock
wg = netpollunblock(pd, 'w', false);
}
runtime_unlock(pd);
@@ -379,7 +424,7 @@ allocPollDesc(void)
runtime_lock(&pollcache);
if(pollcache.first == nil) {
- n = PageSize/sizeof(*pd);
+ n = PollBlockSize/sizeof(*pd);
if(n == 0)
n = 1;
// Must be in non-GC memory because can be referenced
diff --git a/libgo/runtime/netpoll_epoll.c b/libgo/runtime/netpoll_epoll.c
index 2acbca3..fe534c9 100644
--- a/libgo/runtime/netpoll_epoll.c
+++ b/libgo/runtime/netpoll_epoll.c
@@ -116,6 +116,14 @@ runtime_netpollclose(uintptr fd)
return -res;
}
+void
+runtime_netpollarm(uintptr fd, int32 mode)
+{
+ USED(fd);
+ USED(mode);
+ runtime_throw("unused");
+}
+
// polls for ready network connections
// returns list of goroutines that become runnable
G*
@@ -159,7 +167,8 @@ retry:
}
void
-runtime_netpoll_scan(void (*addroot)(Obj))
+runtime_netpoll_scan(struct Workbuf** wbufp, void (*enqueue1)(struct Workbuf**, Obj))
{
- USED(addroot);
+ USED(wbufp);
+ USED(enqueue1);
}
diff --git a/libgo/runtime/netpoll_kqueue.c b/libgo/runtime/netpoll_kqueue.c
index 5d3f856..bc38644 100644
--- a/libgo/runtime/netpoll_kqueue.c
+++ b/libgo/runtime/netpoll_kqueue.c
@@ -59,6 +59,13 @@ runtime_netpollclose(uintptr fd)
return 0;
}
+void
+runtime_netpollarm(uintptr fd, int32 mode)
+{
+ USED(fd, mode);
+ runtime_throw("unused");
+}
+
// Polls for ready network connections.
// Returns list of goroutines that become runnable.
G*
@@ -104,7 +111,8 @@ retry:
}
void
-runtime_netpoll_scan(void (*addroot)(Obj))
+runtime_netpoll_scan(struct Workbuf** wbufp, void (*enqueue1)(struct Workbuf**, Obj))
{
- USED(addroot);
+ USED(wbufp);
+ USED(enqueue1);
}
diff --git a/libgo/runtime/netpoll_select.c b/libgo/runtime/netpoll_select.c
index 788d19f..b461335 100644
--- a/libgo/runtime/netpoll_select.c
+++ b/libgo/runtime/netpoll_select.c
@@ -246,7 +246,7 @@ runtime_netpoll(bool block)
}
void
-runtime_netpoll_scan(void (*addroot)(Obj))
+runtime_netpoll_scan(struct Workbuf** wbufp, void (*enqueue1)(struct Workbuf**, Obj))
{
- addroot((Obj){(byte*)&data, sizeof data, 0});
+ enqueue1(wbufp, (Obj){(byte*)&data, sizeof data, 0});
}
diff --git a/libgo/runtime/netpoll_stub.c b/libgo/runtime/netpoll_stub.c
index a88c9f5..468a610 100644
--- a/libgo/runtime/netpoll_stub.c
+++ b/libgo/runtime/netpoll_stub.c
@@ -19,7 +19,8 @@ runtime_netpoll(bool block)
}
void
-runtime_netpoll_scan(void (*addroot)(Obj))
+runtime_netpoll_scan(struct Workbuf** wbufp, void (*enqueue1)(struct Workbuf**, Obj))
{
+ USED(wbufp);
USED(addroot);
}
diff --git a/libgo/runtime/panic.c b/libgo/runtime/panic.c
index 8fe321f..78d4dd9 100644
--- a/libgo/runtime/panic.c
+++ b/libgo/runtime/panic.c
@@ -12,6 +12,42 @@
uint32 runtime_panicking;
static Lock paniclk;
+// Allocate a Defer, usually using per-P pool.
+// Each defer must be released with freedefer.
+Defer*
+runtime_newdefer()
+{
+ Defer *d;
+ P *p;
+
+ d = nil;
+ p = runtime_m()->p;
+ d = p->deferpool;
+ if(d)
+ p->deferpool = d->__next;
+ if(d == nil) {
+ // deferpool is empty
+ d = runtime_malloc(sizeof(Defer));
+ }
+ return d;
+}
+
+// Free the given defer.
+// The defer cannot be used after this call.
+void
+runtime_freedefer(Defer *d)
+{
+ P *p;
+
+ if(d->__special)
+ return;
+ p = runtime_m()->p;
+ d->__next = p->deferpool;
+ p->deferpool = d;
+ // No need to wipe out pointers in argp/pc/fn/args,
+ // because we empty the pool before GC.
+}
+
// Run all deferred functions for the current goroutine.
static void
rundefer(void)
@@ -28,8 +64,7 @@ rundefer(void)
d->__pfn = nil;
if (pfn != nil)
(*pfn)(d->__arg);
- if (d->__free)
- runtime_free(d);
+ runtime_freedefer(d);
}
}
@@ -44,18 +79,34 @@ runtime_startpanic(void)
m->mallocing = 1; // tell rest of panic not to try to malloc
} else if(m->mcache == nil) // can happen if called from signal handler or throw
m->mcache = runtime_allocmcache();
- if(m->dying) {
+ switch(m->dying) {
+ case 0:
+ m->dying = 1;
+ if(runtime_g() != nil)
+ runtime_g()->writebuf = nil;
+ runtime_xadd(&runtime_panicking, 1);
+ runtime_lock(&paniclk);
+ if(runtime_debug.schedtrace > 0 || runtime_debug.scheddetail > 0)
+ runtime_schedtrace(true);
+ runtime_freezetheworld();
+ return;
+ case 1:
+ // Something failed while panicing, probably the print of the
+ // argument to panic(). Just print a stack trace and exit.
+ m->dying = 2;
runtime_printf("panic during panic\n");
+ runtime_dopanic(0);
runtime_exit(3);
+ case 2:
+ // This is a genuine bug in the runtime, we couldn't even
+ // print the stack trace successfully.
+ m->dying = 3;
+ runtime_printf("stack trace unavailable\n");
+ runtime_exit(4);
+ default:
+ // Can't even print! Just exit.
+ runtime_exit(5);
}
- m->dying = 1;
- if(runtime_g() != nil)
- runtime_g()->writebuf = nil;
- runtime_xadd(&runtime_panicking, 1);
- runtime_lock(&paniclk);
- if(runtime_debug.schedtrace > 0 || runtime_debug.scheddetail > 0)
- runtime_schedtrace(true);
- runtime_freezetheworld();
}
void
diff --git a/libgo/runtime/proc.c b/libgo/runtime/proc.c
index c627ac1..1e15519 100644
--- a/libgo/runtime/proc.c
+++ b/libgo/runtime/proc.c
@@ -392,17 +392,23 @@ struct Sched {
int32 profilehz; // cpu profiling rate
};
-// The max value of GOMAXPROCS.
-// There are no fundamental restrictions on the value.
-enum { MaxGomaxprocs = 1<<8 };
+enum
+{
+ // The max value of GOMAXPROCS.
+ // There are no fundamental restrictions on the value.
+ MaxGomaxprocs = 1<<8,
+
+ // Number of goroutine ids to grab from runtime_sched.goidgen to local per-P cache at once.
+ // 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
+ GoidCacheBatch = 16,
+};
Sched runtime_sched;
int32 runtime_gomaxprocs;
uint32 runtime_needextram = 1;
bool runtime_iscgo = true;
M runtime_m0;
-G runtime_g0; // idle goroutine for m0
-G* runtime_allg;
+G runtime_g0; // idle goroutine for m0
G* runtime_lastg;
M* runtime_allm;
P** runtime_allp;
@@ -412,10 +418,15 @@ int32 runtime_ncpu;
bool runtime_precisestack;
static int32 newprocs;
+static Lock allglock; // the following vars are protected by this lock or by stoptheworld
+G** runtime_allg;
+uintptr runtime_allglen;
+static uintptr allgcap;
+
void* runtime_mstart(void*);
static void runqput(P*, G*);
static G* runqget(P*);
-static void runqgrow(P*);
+static bool runqputslow(P*, G*, uint32, uint32);
static G* runqsteal(P*, P*);
static void mput(M*);
static M* mget(void);
@@ -442,12 +453,14 @@ static void gfput(P*, G*);
static G* gfget(P*);
static void gfpurge(P*);
static void globrunqput(G*);
+static void globrunqputbatch(G*, G*, int32);
static G* globrunqget(P*, int32);
static P* pidleget(void);
static void pidleput(P*);
static void injectglist(G*);
static bool preemptall(void);
static bool exitsyscallfast(void);
+static void allgadd(G*);
// The bootstrap sequence is:
//
@@ -476,7 +489,6 @@ runtime_schedinit(void)
runtime_sched.maxmcount = 10000;
runtime_precisestack = 0;
- runtime_mprofinit();
runtime_mallocinit();
mcommoninit(m);
@@ -541,7 +553,7 @@ runtime_main(void* dummy __attribute__((unused)))
d.__retaddr = nil;
d.__makefunc_can_recover = 0;
d.__frame = &frame;
- d.__free = 0;
+ d.__special = true;
g->defer = &d;
if(m != &runtime_m0)
@@ -579,6 +591,7 @@ void
runtime_goroutineheader(G *gp)
{
const char *status;
+ int64 waitfor;
switch(gp->status) {
case Gidle:
@@ -603,7 +616,16 @@ runtime_goroutineheader(G *gp)
status = "???";
break;
}
- runtime_printf("goroutine %D [%s]:\n", gp->goid, status);
+
+ // approx time the G is blocked, in minutes
+ waitfor = 0;
+ if((gp->status == Gwaiting || gp->status == Gsyscall) && gp->waitsince != 0)
+ waitfor = (runtime_nanotime() - gp->waitsince) / (60LL*1000*1000*1000);
+
+ if(waitfor < 1)
+ runtime_printf("goroutine %D [%s]:\n", gp->goid, status);
+ else
+ runtime_printf("goroutine %D [%s, %D minutes]:\n", gp->goid, status, waitfor);
}
void
@@ -624,7 +646,7 @@ runtime_printcreatedby(G *g)
struct Traceback
{
G* gp;
- Location locbuf[100];
+ Location locbuf[TracebackMaxFrames];
int32 c;
};
@@ -634,6 +656,7 @@ runtime_tracebackothers(G * volatile me)
G * volatile gp;
Traceback tb;
int32 traceback;
+ volatile uintptr i;
tb.gp = me;
traceback = runtime_gotraceback(nil);
@@ -657,7 +680,9 @@ runtime_tracebackothers(G * volatile me)
runtime_printcreatedby(gp);
}
- for(gp = runtime_allg; gp != nil; gp = gp->alllink) {
+ runtime_lock(&allglock);
+ for(i = 0; i < runtime_allglen; i++) {
+ gp = runtime_allg[i];
if(gp == me || gp == m->curg || gp->status == Gdead)
continue;
if(gp->issystem && traceback < 2)
@@ -696,6 +721,7 @@ runtime_tracebackothers(G * volatile me)
runtime_printcreatedby(gp);
}
}
+ runtime_unlock(&allglock);
}
static void
@@ -1038,6 +1064,7 @@ struct CgoThreadStart
{
M *m;
G *g;
+ uintptr *tls;
void (*fn)(void);
};
@@ -1200,14 +1227,7 @@ runtime_newextram(void)
gp->lockedm = mp;
gp->goid = runtime_xadd64(&runtime_sched.goidgen, 1);
// put on allg for garbage collector
- runtime_lock(&runtime_sched);
- if(runtime_lastg == nil)
- runtime_allg = gp;
- else
- runtime_lastg->alllink = gp;
- runtime_lastg = gp;
- runtime_unlock(&runtime_sched);
- gp->goid = runtime_xadd64(&runtime_sched.goidgen, 1);
+ allgadd(gp);
// The context for gp will be set up in runtime_needm. But
// here we need to set up the context for g0.
@@ -1379,7 +1399,7 @@ mspinning(void)
}
// Schedules some M to run the p (creates an M if necessary).
-// If p==nil, tries to get an idle P, if no idle P's returns false.
+// If p==nil, tries to get an idle P, if no idle P's does nothing.
static void
startm(P *p, bool spinning)
{
@@ -1543,6 +1563,7 @@ execute(G *gp)
runtime_throw("execute: bad g status");
}
gp->status = Grunning;
+ gp->waitsince = 0;
m->p->schedtick++;
m->curg = gp;
gp->m = m;
@@ -1760,10 +1781,10 @@ top:
execute(gp);
}
-// Puts the current goroutine into a waiting state and unlocks the lock.
-// The goroutine can be made runnable again by calling runtime_ready(gp).
+// Puts the current goroutine into a waiting state and calls unlockf.
+// If unlockf returns false, the goroutine is resumed.
void
-runtime_park(void(*unlockf)(Lock*), Lock *lock, const char *reason)
+runtime_park(bool(*unlockf)(G*, void*), void *lock, const char *reason)
{
m->waitlock = lock;
m->waitunlockf = unlockf;
@@ -1771,17 +1792,39 @@ runtime_park(void(*unlockf)(Lock*), Lock *lock, const char *reason)
runtime_mcall(park0);
}
+static bool
+parkunlock(G *gp, void *lock)
+{
+ USED(gp);
+ runtime_unlock(lock);
+ return true;
+}
+
+// Puts the current goroutine into a waiting state and unlocks the lock.
+// The goroutine can be made runnable again by calling runtime_ready(gp).
+void
+runtime_parkunlock(Lock *lock, const char *reason)
+{
+ runtime_park(parkunlock, lock, reason);
+}
+
// runtime_park continuation on g0.
static void
park0(G *gp)
{
+ bool ok;
+
gp->status = Gwaiting;
gp->m = nil;
m->curg = nil;
if(m->waitunlockf) {
- m->waitunlockf(m->waitlock);
+ ok = m->waitunlockf(gp, m->waitlock);
m->waitunlockf = nil;
m->waitlock = nil;
+ if(!ok) {
+ gp->status = Grunnable;
+ execute(gp); // Schedule it back, never returns.
+ }
}
if(m->lockedg) {
stoplockedm();
@@ -1968,6 +2011,7 @@ runtime_exitsyscall(void)
if(gp->isbackground) // do not consider blocked scavenger for deadlock detection
incidlelocked(-1);
+ g->waitsince = 0;
if(exitsyscallfast()) {
// There's a cpu for us, so we can run.
m->p->syscalltick++;
@@ -2160,11 +2204,13 @@ __go_go(void (*fn)(void*), void* arg)
byte *sp;
size_t spsize;
G *newg;
+ P *p;
//runtime_printf("newproc1 %p %p narg=%d nret=%d\n", fn->fn, argp, narg, nret);
m->locks++; // disable preemption because it can be holding p in a local var
- if((newg = gfget(m->p)) != nil) {
+ p = m->p;
+ if((newg = gfget(p)) != nil) {
#ifdef USING_SPLIT_STACK
int dont_block_signals = 0;
@@ -2181,20 +2227,18 @@ __go_go(void (*fn)(void*), void* arg)
#endif
} else {
newg = runtime_malg(StackMin, &sp, &spsize);
- runtime_lock(&runtime_sched);
- if(runtime_lastg == nil)
- runtime_allg = newg;
- else
- runtime_lastg->alllink = newg;
- runtime_lastg = newg;
- runtime_unlock(&runtime_sched);
+ allgadd(newg);
}
newg->entry = (byte*)fn;
newg->param = arg;
newg->gopc = (uintptr)__builtin_return_address(0);
newg->status = Grunnable;
- newg->goid = runtime_xadd64(&runtime_sched.goidgen, 1);
+ if(p->goidcache == p->goidcacheend) {
+ p->goidcache = runtime_xadd64(&runtime_sched.goidgen, GoidCacheBatch);
+ p->goidcacheend = p->goidcache + GoidCacheBatch;
+ }
+ newg->goid = p->goidcache++;
{
// Avoid warnings about variables clobbered by
@@ -2211,7 +2255,7 @@ __go_go(void (*fn)(void*), void* arg)
vnewg->context.uc_stack.ss_size = vspsize;
makecontext(&vnewg->context, kickoff, 0);
- runqput(m->p, vnewg);
+ runqput(p, vnewg);
if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0 && fn != runtime_main) // TODO: fast atomic
wakep();
@@ -2220,6 +2264,31 @@ __go_go(void (*fn)(void*), void* arg)
}
}
+static void
+allgadd(G *gp)
+{
+ G **new;
+ uintptr cap;
+
+ runtime_lock(&allglock);
+ if(runtime_allglen >= allgcap) {
+ cap = 4096/sizeof(new[0]);
+ if(cap < 2*allgcap)
+ cap = 2*allgcap;
+ new = runtime_malloc(cap*sizeof(new[0]));
+ if(new == nil)
+ runtime_throw("runtime: cannot allocate memory");
+ if(runtime_allg != nil) {
+ runtime_memmove(new, runtime_allg, runtime_allglen*sizeof(new[0]));
+ runtime_free(runtime_allg);
+ }
+ runtime_allg = new;
+ allgcap = cap;
+ }
+ runtime_allg[runtime_allglen++] = gp;
+ runtime_unlock(&allglock);
+}
+
// Put on gfree list.
// If local list is too long, transfer a batch to the global list.
static void
@@ -2415,19 +2484,21 @@ runtime_gcount(void)
{
G *gp;
int32 n, s;
+ uintptr i;
n = 0;
- runtime_lock(&runtime_sched);
+ runtime_lock(&allglock);
// TODO(dvyukov): runtime.NumGoroutine() is O(N).
// We do not want to increment/decrement centralized counter in newproc/goexit,
// just to make runtime.NumGoroutine() faster.
// Compromise solution is to introduce per-P counters of active goroutines.
- for(gp = runtime_allg; gp; gp = gp->alllink) {
+ for(i = 0; i < runtime_allglen; i++) {
+ gp = runtime_allg[i];
s = gp->status;
if(s == Grunnable || s == Grunning || s == Gsyscall || s == Gwaiting)
n++;
}
- runtime_unlock(&runtime_sched);
+ runtime_unlock(&allglock);
return n;
}
@@ -2441,32 +2512,39 @@ static struct {
Lock;
void (*fn)(uintptr*, int32);
int32 hz;
- uintptr pcbuf[100];
- Location locbuf[100];
+ uintptr pcbuf[TracebackMaxFrames];
+ Location locbuf[TracebackMaxFrames];
} prof;
-static void
-System(void)
-{
-}
+static void System(void) {}
+static void GC(void) {}
// Called if we receive a SIGPROF signal.
void
runtime_sigprof()
{
+ M *mp = m;
int32 n, i;
bool traceback;
if(prof.fn == nil || prof.hz == 0)
return;
+
+ if(mp == nil)
+ return;
+
traceback = true;
- // Windows does profiling in a dedicated thread w/o m.
- if(!Windows && (m == nil || m->mcache == nil))
+
+ if(mp->mcache == nil)
traceback = false;
-
+
+ // Profiling runs concurrently with GC, so it must not allocate.
+ mp->mallocing++;
+
runtime_lock(&prof);
if(prof.fn == nil) {
runtime_unlock(&prof);
+ mp->mallocing--;
return;
}
n = 0;
@@ -2484,13 +2562,17 @@ runtime_sigprof()
for(i = 0; i < n; i++)
prof.pcbuf[i] = prof.locbuf[i].pc;
}
- if (!traceback || n <= 0) {
+ if(!traceback || n <= 0) {
n = 2;
prof.pcbuf[0] = (uintptr)runtime_getcallerpc(&n);
- prof.pcbuf[1] = (uintptr)System + 1;
+ if(mp->gcing || mp->helpgc)
+ prof.pcbuf[1] = (uintptr)GC;
+ else
+ prof.pcbuf[1] = (uintptr)System;
}
prof.fn(prof.pcbuf, n);
runtime_unlock(&prof);
+ mp->mallocing--;
}
// Arrange to call fn with a traceback hz times a second.
@@ -2533,6 +2615,7 @@ static void
procresize(int32 new)
{
int32 i, old;
+ bool empty;
G *gp;
P *p;
@@ -2554,27 +2637,42 @@ procresize(int32 new)
else
p->mcache = runtime_allocmcache();
}
- if(p->runq == nil) {
- p->runqsize = 128;
- p->runq = (G**)runtime_mallocgc(p->runqsize*sizeof(G*), 0, FlagNoInvokeGC);
- }
}
// redistribute runnable G's evenly
- for(i = 0; i < old; i++) {
- p = runtime_allp[i];
- while((gp = runqget(p)) != nil)
- globrunqput(gp);
+ // collect all runnable goroutines in global queue preserving FIFO order
+ // FIFO order is required to ensure fairness even during frequent GCs
+ // see http://golang.org/issue/7126
+ empty = false;
+ while(!empty) {
+ empty = true;
+ for(i = 0; i < old; i++) {
+ p = runtime_allp[i];
+ if(p->runqhead == p->runqtail)
+ continue;
+ empty = false;
+ // pop from tail of local queue
+ p->runqtail--;
+ gp = p->runq[p->runqtail%nelem(p->runq)];
+ // push onto head of global queue
+ gp->schedlink = runtime_sched.runqhead;
+ runtime_sched.runqhead = gp;
+ if(runtime_sched.runqtail == nil)
+ runtime_sched.runqtail = gp;
+ runtime_sched.runqsize++;
+ }
}
+ // fill local queues with at most nelem(p->runq)/2 goroutines
// start at 1 because current M already executes some G and will acquire allp[0] below,
// so if we have a spare G we want to put it into allp[1].
- for(i = 1; runtime_sched.runqhead; i++) {
+ for(i = 1; (uint32)i < (uint32)new * nelem(p->runq)/2 && runtime_sched.runqsize > 0; i++) {
gp = runtime_sched.runqhead;
runtime_sched.runqhead = gp->schedlink;
+ if(runtime_sched.runqhead == nil)
+ runtime_sched.runqtail = nil;
+ runtime_sched.runqsize--;
runqput(runtime_allp[i%new], gp);
}
- runtime_sched.runqtail = nil;
- runtime_sched.runqsize = 0;
// free unused P's
for(i = new; i < old; i++) {
@@ -2656,28 +2754,39 @@ checkdead(void)
{
G *gp;
int32 run, grunning, s;
+ uintptr i;
// -1 for sysmon
run = runtime_sched.mcount - runtime_sched.nmidle - runtime_sched.nmidlelocked - 1 - countextra();
if(run > 0)
return;
+ // If we are dying because of a signal caught on an already idle thread,
+ // freezetheworld will cause all running threads to block.
+ // And runtime will essentially enter into deadlock state,
+ // except that there is a thread that will call runtime_exit soon.
+ if(runtime_panicking > 0)
+ return;
if(run < 0) {
- runtime_printf("checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n",
+ runtime_printf("runtime: checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n",
runtime_sched.nmidle, runtime_sched.nmidlelocked, runtime_sched.mcount);
runtime_throw("checkdead: inconsistent counts");
}
grunning = 0;
- for(gp = runtime_allg; gp; gp = gp->alllink) {
+ runtime_lock(&allglock);
+ for(i = 0; i < runtime_allglen; i++) {
+ gp = runtime_allg[i];
if(gp->isbackground)
continue;
s = gp->status;
if(s == Gwaiting)
grunning++;
else if(s == Grunnable || s == Grunning || s == Gsyscall) {
- runtime_printf("checkdead: find g %D in status %d\n", gp->goid, s);
+ runtime_unlock(&allglock);
+ runtime_printf("runtime: checkdead: find g %D in status %d\n", gp->goid, s);
runtime_throw("checkdead: runnable g");
}
}
+ runtime_unlock(&allglock);
if(grunning == 0) // possible if main goroutine calls runtime_Goexit()
runtime_exit(0);
m->throwing = -1; // do not dump full stacks
@@ -2774,16 +2883,19 @@ retake(int64 now)
pd = &pdesc[i];
s = p->status;
if(s == Psyscall) {
- // Retake P from syscall if it's there for more than 1 sysmon tick (20us).
- // But only if there is other work to do.
+ // Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
t = p->syscalltick;
if(pd->syscalltick != t) {
pd->syscalltick = t;
pd->syscallwhen = now;
continue;
}
+ // On the one hand we don't want to retake Ps if there is no other work to do,
+ // but on the other hand we want to retake them eventually
+ // because they can prevent the sysmon thread from deep sleep.
if(p->runqhead == p->runqtail &&
- runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) > 0)
+ runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) > 0 &&
+ pd->syscallwhen + 10*1000*1000 > now)
continue;
// Need to decrement number of idle locked M's
// (pretending that one more is running) before the CAS.
@@ -2828,7 +2940,8 @@ runtime_schedtrace(bool detailed)
static int64 starttime;
int64 now;
int64 id1, id2, id3;
- int32 i, q, t, h, s;
+ int32 i, t, h;
+ uintptr gi;
const char *fmt;
M *mp, *lockedm;
G *gp, *lockedg;
@@ -2855,15 +2968,11 @@ runtime_schedtrace(bool detailed)
if(p == nil)
continue;
mp = p->m;
- t = p->runqtail;
- h = p->runqhead;
- s = p->runqsize;
- q = t - h;
- if(q < 0)
- q += s;
+ h = runtime_atomicload(&p->runqhead);
+ t = runtime_atomicload(&p->runqtail);
if(detailed)
- runtime_printf(" P%d: status=%d schedtick=%d syscalltick=%d m=%d runqsize=%d/%d gfreecnt=%d\n",
- i, p->status, p->schedtick, p->syscalltick, mp ? mp->id : -1, q, s, p->gfreecnt);
+ runtime_printf(" P%d: status=%d schedtick=%d syscalltick=%d m=%d runqsize=%d gfreecnt=%d\n",
+ i, p->status, p->schedtick, p->syscalltick, mp ? mp->id : -1, t-h, p->gfreecnt);
else {
// In non-detailed mode format lengths of per-P run queues as:
// [len1 len2 len3 len4]
@@ -2874,7 +2983,7 @@ runtime_schedtrace(bool detailed)
fmt = " [%d";
else if(i == runtime_gomaxprocs-1)
fmt = " %d]\n";
- runtime_printf(fmt, q);
+ runtime_printf(fmt, t-h);
}
}
if(!detailed) {
@@ -2895,18 +3004,21 @@ runtime_schedtrace(bool detailed)
if(lockedg)
id3 = lockedg->goid;
runtime_printf(" M%d: p=%D curg=%D mallocing=%d throwing=%d gcing=%d"
- " locks=%d dying=%d helpgc=%d spinning=%d lockedg=%D\n",
+ " locks=%d dying=%d helpgc=%d spinning=%d blocked=%d lockedg=%D\n",
mp->id, id1, id2,
mp->mallocing, mp->throwing, mp->gcing, mp->locks, mp->dying, mp->helpgc,
- mp->spinning, id3);
+ mp->spinning, m->blocked, id3);
}
- for(gp = runtime_allg; gp; gp = gp->alllink) {
+ runtime_lock(&allglock);
+ for(gi = 0; gi < runtime_allglen; gi++) {
+ gp = runtime_allg[gi];
mp = gp->m;
lockedm = gp->lockedm;
runtime_printf(" G%D: status=%d(%s) m=%d lockedm=%d\n",
gp->goid, gp->status, gp->waitreason, mp ? mp->id : -1,
lockedm ? lockedm->id : -1);
}
+ runtime_unlock(&allglock);
runtime_unlock(&runtime_sched);
}
@@ -2949,6 +3061,20 @@ globrunqput(G *gp)
runtime_sched.runqsize++;
}
+// Put a batch of runnable goroutines on the global runnable queue.
+// Sched must be locked.
+static void
+globrunqputbatch(G *ghead, G *gtail, int32 n)
+{
+ gtail->schedlink = nil;
+ if(runtime_sched.runqtail)
+ runtime_sched.runqtail->schedlink = ghead;
+ else
+ runtime_sched.runqhead = ghead;
+ runtime_sched.runqtail = gtail;
+ runtime_sched.runqsize += n;
+}
+
// Try get a batch of G's from the global runnable queue.
// Sched must be locked.
static G*
@@ -2964,6 +3090,8 @@ globrunqget(P *p, int32 max)
n = runtime_sched.runqsize;
if(max > 0 && n > max)
n = max;
+ if((uint32)n > nelem(p->runq)/2)
+ n = nelem(p->runq)/2;
runtime_sched.runqsize -= n;
if(runtime_sched.runqsize == 0)
runtime_sched.runqtail = nil;
@@ -3003,78 +3131,98 @@ pidleget(void)
return p;
}
-// Put g on local runnable queue.
-// TODO(dvyukov): consider using lock-free queue.
+// Try to put g on local runnable queue.
+// If it's full, put onto global queue.
+// Executed only by the owner P.
static void
runqput(P *p, G *gp)
{
- int32 h, t, s;
+ uint32 h, t;
- runtime_lock(p);
retry:
- h = p->runqhead;
+ h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with consumers
t = p->runqtail;
- s = p->runqsize;
- if(t == h-1 || (h == 0 && t == s-1)) {
- runqgrow(p);
- goto retry;
+ if(t - h < nelem(p->runq)) {
+ p->runq[t%nelem(p->runq)] = gp;
+ runtime_atomicstore(&p->runqtail, t+1); // store-release, makes the item available for consumption
+ return;
}
- p->runq[t++] = gp;
- if(t == s)
- t = 0;
- p->runqtail = t;
- runtime_unlock(p);
+ if(runqputslow(p, gp, h, t))
+ return;
+ // the queue is not full, now the put above must suceed
+ goto retry;
+}
+
+// Put g and a batch of work from local runnable queue on global queue.
+// Executed only by the owner P.
+static bool
+runqputslow(P *p, G *gp, uint32 h, uint32 t)
+{
+ G *batch[nelem(p->runq)/2+1];
+ uint32 n, i;
+
+ // First, grab a batch from local queue.
+ n = t-h;
+ n = n/2;
+ if(n != nelem(p->runq)/2)
+ runtime_throw("runqputslow: queue is not full");
+ for(i=0; i<n; i++)
+ batch[i] = p->runq[(h+i)%nelem(p->runq)];
+ if(!runtime_cas(&p->runqhead, h, h+n)) // cas-release, commits consume
+ return false;
+ batch[n] = gp;
+ // Link the goroutines.
+ for(i=0; i<n; i++)
+ batch[i]->schedlink = batch[i+1];
+ // Now put the batch on global queue.
+ runtime_lock(&runtime_sched);
+ globrunqputbatch(batch[0], batch[n], n+1);
+ runtime_unlock(&runtime_sched);
+ return true;
}
// Get g from local runnable queue.
+// Executed only by the owner P.
static G*
runqget(P *p)
{
G *gp;
- int32 t, h, s;
+ uint32 t, h;
- if(p->runqhead == p->runqtail)
- return nil;
- runtime_lock(p);
- h = p->runqhead;
- t = p->runqtail;
- s = p->runqsize;
- if(t == h) {
- runtime_unlock(p);
- return nil;
+ for(;;) {
+ h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with other consumers
+ t = p->runqtail;
+ if(t == h)
+ return nil;
+ gp = p->runq[h%nelem(p->runq)];
+ if(runtime_cas(&p->runqhead, h, h+1)) // cas-release, commits consume
+ return gp;
}
- gp = p->runq[h++];
- if(h == s)
- h = 0;
- p->runqhead = h;
- runtime_unlock(p);
- return gp;
}
-// Grow local runnable queue.
-// TODO(dvyukov): consider using fixed-size array
-// and transfer excess to the global list (local queue can grow way too big).
-static void
-runqgrow(P *p)
+// Grabs a batch of goroutines from local runnable queue.
+// batch array must be of size nelem(p->runq)/2. Returns number of grabbed goroutines.
+// Can be executed by any P.
+static uint32
+runqgrab(P *p, G **batch)
{
- G **q;
- int32 s, t, h, t2;
+ uint32 t, h, n, i;
- h = p->runqhead;
- t = p->runqtail;
- s = p->runqsize;
- t2 = 0;
- q = runtime_malloc(2*s*sizeof(*q));
- while(t != h) {
- q[t2++] = p->runq[h++];
- if(h == s)
- h = 0;
+ for(;;) {
+ h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with other consumers
+ t = runtime_atomicload(&p->runqtail); // load-acquire, synchronize with the producer
+ n = t-h;
+ n = n - n/2;
+ if(n == 0)
+ break;
+ if(n > nelem(p->runq)/2) // read inconsistent h and t
+ continue;
+ for(i=0; i<n; i++)
+ batch[i] = p->runq[(h+i)%nelem(p->runq)];
+ if(runtime_cas(&p->runqhead, h, h+n)) // cas-release, commits consume
+ break;
}
- runtime_free(p->runq);
- p->runq = q;
- p->runqhead = 0;
- p->runqtail = t2;
- p->runqsize = 2*s;
+ return n;
}
// Steal half of elements from local runnable queue of p2
@@ -3083,57 +3231,24 @@ runqgrow(P *p)
static G*
runqsteal(P *p, P *p2)
{
- G *gp, *gp1;
- int32 t, h, s, t2, h2, s2, c, i;
+ G *gp;
+ G *batch[nelem(p->runq)/2];
+ uint32 t, h, n, i;
- if(p2->runqhead == p2->runqtail)
+ n = runqgrab(p2, batch);
+ if(n == 0)
return nil;
- // sort locks to prevent deadlocks
- if(p < p2)
- runtime_lock(p);
- runtime_lock(p2);
- if(p2->runqhead == p2->runqtail) {
- runtime_unlock(p2);
- if(p < p2)
- runtime_unlock(p);
- return nil;
- }
- if(p >= p2)
- runtime_lock(p);
- // now we've locked both queues and know the victim is not empty
- h = p->runqhead;
+ n--;
+ gp = batch[n];
+ if(n == 0)
+ return gp;
+ h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with consumers
t = p->runqtail;
- s = p->runqsize;
- h2 = p2->runqhead;
- t2 = p2->runqtail;
- s2 = p2->runqsize;
- gp = p2->runq[h2++]; // return value
- if(h2 == s2)
- h2 = 0;
- // steal roughly half
- if(t2 > h2)
- c = (t2 - h2) / 2;
- else
- c = (s2 - h2 + t2) / 2;
- // copy
- for(i = 0; i != c; i++) {
- // the target queue is full?
- if(t == h-1 || (h == 0 && t == s-1))
- break;
- // the victim queue is empty?
- if(t2 == h2)
- break;
- gp1 = p2->runq[h2++];
- if(h2 == s2)
- h2 = 0;
- p->runq[t++] = gp1;
- if(t == s)
- t = 0;
- }
- p->runqtail = t;
- p2->runqhead = h2;
- runtime_unlock(p2);
- runtime_unlock(p);
+ if(t - h + n >= nelem(p->runq))
+ runtime_throw("runqsteal: runq overflow");
+ for(i=0; i<n; i++, t++)
+ p->runq[t%nelem(p->runq)] = batch[i];
+ runtime_atomicstore(&p->runqtail, t); // store-release, makes the item available for consumption
return gp;
}
@@ -3144,14 +3259,10 @@ void
runtime_testSchedLocalQueue(void)
{
P p;
- G gs[1000];
+ G gs[nelem(p.runq)];
int32 i, j;
runtime_memclr((byte*)&p, sizeof(p));
- p.runqsize = 1;
- p.runqhead = 0;
- p.runqtail = 0;
- p.runq = runtime_malloc(p.runqsize*sizeof(*p.runq));
for(i = 0; i < (int32)nelem(gs); i++) {
if(runqget(&p) != nil)
@@ -3176,20 +3287,11 @@ void
runtime_testSchedLocalQueueSteal(void)
{
P p1, p2;
- G gs[1000], *gp;
+ G gs[nelem(p1.runq)], *gp;
int32 i, j, s;
runtime_memclr((byte*)&p1, sizeof(p1));
- p1.runqsize = 1;
- p1.runqhead = 0;
- p1.runqtail = 0;
- p1.runq = runtime_malloc(p1.runqsize*sizeof(*p1.runq));
-
runtime_memclr((byte*)&p2, sizeof(p2));
- p2.runqsize = nelem(gs);
- p2.runqhead = 0;
- p2.runqtail = 0;
- p2.runq = runtime_malloc(p2.runqsize*sizeof(*p2.runq));
for(i = 0; i < (int32)nelem(gs); i++) {
for(j = 0; j < i; j++) {
@@ -3239,9 +3341,9 @@ runtime_debug_setMaxThreads(intgo in)
}
void
-runtime_proc_scan(void (*addroot)(Obj))
+runtime_proc_scan(struct Workbuf** wbufp, void (*enqueue1)(struct Workbuf**, Obj))
{
- addroot((Obj){(byte*)&runtime_sched, sizeof runtime_sched, 0});
+ enqueue1(wbufp, (Obj){(byte*)&runtime_sched, sizeof runtime_sched, 0});
}
// When a function calls a closure, it passes the closure value to
@@ -3271,3 +3373,30 @@ runtime_gcwaiting(void)
{
return runtime_sched.gcwaiting;
}
+
+// func runtime_procPin() int
+
+intgo sync_runtime_procPin(void)
+ __asm__(GOSYM_PREFIX "sync.runtime_procPin");
+
+intgo
+sync_runtime_procPin()
+{
+ M *mp;
+
+ mp = m;
+ // Disable preemption.
+ mp->locks++;
+ return mp->p->id;
+}
+
+// func runtime_procUnpin()
+
+void sync_runtime_procUnpin(void)
+ __asm__ (GOSYM_PREFIX "sync.runtime_procUnpin");
+
+void
+sync_runtime_procUnpin(void)
+{
+ m->locks--;
+}
diff --git a/libgo/runtime/race.h b/libgo/runtime/race.h
index c97e672..e84c5d4 100644
--- a/libgo/runtime/race.h
+++ b/libgo/runtime/race.h
@@ -24,8 +24,8 @@ void runtime_racewritepc(void *addr, void *callpc, void *pc);
void runtime_racereadpc(void *addr, void *callpc, void *pc);
void runtime_racewriterangepc(void *addr, uintptr sz, void *callpc, void *pc);
void runtime_racereadrangepc(void *addr, uintptr sz, void *callpc, void *pc);
-void runtime_racereadobjectpc(void *addr, Type *t, void *callpc, void *pc);
-void runtime_racewriteobjectpc(void *addr, Type *t, void *callpc, void *pc);
+void runtime_racereadobjectpc(void *addr, const Type *t, void *callpc, void *pc);
+void runtime_racewriteobjectpc(void *addr, const Type *t, void *callpc, void *pc);
void runtime_racefingo(void);
void runtime_raceacquire(void *addr);
void runtime_raceacquireg(G *gp, void *addr);
diff --git a/libgo/runtime/runtime.c b/libgo/runtime/runtime.c
index 4f9909b..eb32a8d 100644
--- a/libgo/runtime/runtime.c
+++ b/libgo/runtime/runtime.c
@@ -353,3 +353,12 @@ runtime_debug_setMaxStack(intgo in)
runtime_maxstacksize = in;
return out;
}
+
+void memclrBytes(Slice)
+ __asm__ (GOSYM_PREFIX "runtime.memclrBytes");
+
+void
+memclrBytes(Slice s)
+{
+ runtime_memclr(s.__values, s.__count);
+}
diff --git a/libgo/runtime/runtime.h b/libgo/runtime/runtime.h
index ef6090f..9d5e42f 100644
--- a/libgo/runtime/runtime.h
+++ b/libgo/runtime/runtime.h
@@ -205,12 +205,12 @@ struct G
void* gcinitial_sp;
ucontext_t gcregs;
byte* entry; // initial function
- G* alllink; // on allg
void* param; // passed parameter on wakeup
bool fromgogo; // reached from gogo
int16 status;
uint32 selgen; // valid sudog pointer
int64 goid;
+ int64 waitsince; // approx time when the G become blocked
const char* waitreason; // if status==Gwaiting
G* schedlink;
bool ispanic;
@@ -221,8 +221,6 @@ struct G
int32 sig;
int32 writenbuf;
byte* writebuf;
- // DeferChunk* dchunk;
- // DeferChunk* dchunknext;
uintptr sigcode0;
uintptr sigcode1;
// uintptr sigpc;
@@ -256,7 +254,8 @@ struct M
int32 dying;
int32 profilehz;
int32 helpgc;
- bool spinning;
+ bool spinning; // M is out of work and is actively looking for work
+ bool blocked; // M is blocked on a Note
uint32 fastrand;
uint64 ncgocall; // number of cgo calls in total
int32 ncgo; // number of cgo calls currently in progress
@@ -276,8 +275,7 @@ struct M
bool racecall;
bool needextram;
bool dropextram; // for gccgo: drop after call is done.
- void* racepc;
- void (*waitunlockf)(Lock*);
+ bool (*waitunlockf)(G*, void*);
void* waitlock;
uintptr settype_buf[1024];
@@ -297,12 +295,16 @@ struct P
uint32 syscalltick; // incremented on every system call
M* m; // back-link to associated M (nil if idle)
MCache* mcache;
+ Defer* deferpool; // pool of available Defer structs (see panic.c)
+
+ // Cache of goroutine ids, amortizes accesses to runtime_sched.goidgen.
+ uint64 goidcache;
+ uint64 goidcacheend;
// Queue of runnable goroutines.
- G** runq;
- int32 runqhead;
- int32 runqtail;
- int32 runqsize;
+ uint32 runqhead;
+ uint32 runqtail;
+ G* runq[256];
// Available G's (status == Gdead)
G* gfree;
@@ -359,6 +361,15 @@ enum {
Windows = 0
};
#endif
+#ifdef GOOS_solaris
+enum {
+ Solaris = 1
+};
+#else
+enum {
+ Solaris = 0
+};
+#endif
struct Timers
{
@@ -458,12 +469,18 @@ void runtime_hashinit(void);
void runtime_traceback(void);
void runtime_tracebackothers(G*);
+enum
+{
+ // The maximum number of frames we print for a traceback
+ TracebackMaxFrames = 100,
+};
/*
* external data
*/
extern uintptr runtime_zerobase;
-extern G* runtime_allg;
+extern G** runtime_allg;
+extern uintptr runtime_allglen;
extern G* runtime_lastg;
extern M* runtime_allm;
extern P** runtime_allp;
@@ -514,21 +531,6 @@ void runtime_printtrace(Location*, int32, bool);
#define runtime_read(d, v, n) read((d), (v), (n))
#define runtime_write(d, v, n) write((d), (v), (n))
#define runtime_close(d) close(d)
-#define runtime_cas(pval, old, new) __sync_bool_compare_and_swap (pval, old, new)
-#define runtime_cas64(pval, old, new) __sync_bool_compare_and_swap (pval, old, new)
-#define runtime_casp(pval, old, new) __sync_bool_compare_and_swap (pval, old, new)
-// Don't confuse with XADD x86 instruction,
-// this one is actually 'addx', that is, add-and-fetch.
-#define runtime_xadd(p, v) __sync_add_and_fetch (p, v)
-#define runtime_xadd64(p, v) __sync_add_and_fetch (p, v)
-#define runtime_xchg(p, v) __atomic_exchange_n (p, v, __ATOMIC_SEQ_CST)
-#define runtime_xchg64(p, v) __atomic_exchange_n (p, v, __ATOMIC_SEQ_CST)
-#define runtime_atomicload(p) __atomic_load_n (p, __ATOMIC_SEQ_CST)
-#define runtime_atomicstore(p, v) __atomic_store_n (p, v, __ATOMIC_SEQ_CST)
-#define runtime_atomicstore64(p, v) __atomic_store_n (p, v, __ATOMIC_SEQ_CST)
-#define runtime_atomicload64(p) __atomic_load_n (p, __ATOMIC_SEQ_CST)
-#define runtime_atomicloadp(p) __atomic_load_n (p, __ATOMIC_SEQ_CST)
-#define runtime_atomicstorep(p, v) __atomic_store_n (p, v, __ATOMIC_SEQ_CST)
void runtime_ready(G*);
const byte* runtime_getenv(const char*);
int32 runtime_atoi(const byte*);
@@ -546,7 +548,6 @@ void runtime_mallocinit(void);
void runtime_mprofinit(void);
#define runtime_malloc(s) __go_alloc(s)
#define runtime_free(p) __go_free(p)
-bool runtime_addfinalizer(void*, FuncVal *fn, const struct __go_func_type *, const struct __go_ptr_type *);
#define runtime_getcallersp(p) __builtin_frame_address(1)
int32 runtime_mcount(void);
int32 runtime_gcount(void);
@@ -554,6 +555,24 @@ void runtime_mcall(void(*)(G*));
uint32 runtime_fastrand1(void);
int32 runtime_timediv(int64, int32, int32*);
+// atomic operations
+#define runtime_cas(pval, old, new) __sync_bool_compare_and_swap (pval, old, new)
+#define runtime_cas64(pval, old, new) __sync_bool_compare_and_swap (pval, old, new)
+#define runtime_casp(pval, old, new) __sync_bool_compare_and_swap (pval, old, new)
+// Don't confuse with XADD x86 instruction,
+// this one is actually 'addx', that is, add-and-fetch.
+#define runtime_xadd(p, v) __sync_add_and_fetch (p, v)
+#define runtime_xadd64(p, v) __sync_add_and_fetch (p, v)
+#define runtime_xchg(p, v) __atomic_exchange_n (p, v, __ATOMIC_SEQ_CST)
+#define runtime_xchg64(p, v) __atomic_exchange_n (p, v, __ATOMIC_SEQ_CST)
+#define runtime_xchgp(p, v) __atomic_exchange_n (p, v, __ATOMIC_SEQ_CST)
+#define runtime_atomicload(p) __atomic_load_n (p, __ATOMIC_SEQ_CST)
+#define runtime_atomicstore(p, v) __atomic_store_n (p, v, __ATOMIC_SEQ_CST)
+#define runtime_atomicstore64(p, v) __atomic_store_n (p, v, __ATOMIC_SEQ_CST)
+#define runtime_atomicload64(p) __atomic_load_n (p, __ATOMIC_SEQ_CST)
+#define runtime_atomicloadp(p) __atomic_load_n (p, __ATOMIC_SEQ_CST)
+#define runtime_atomicstorep(p, v) __atomic_store_n (p, v, __ATOMIC_SEQ_CST)
+
void runtime_setmg(M*, G*);
void runtime_newextram(void);
#define runtime_exit(s) exit(s)
@@ -561,7 +580,8 @@ void runtime_newextram(void);
void runtime_gosched(void);
void runtime_gosched0(G*);
void runtime_schedtrace(bool);
-void runtime_park(void(*)(Lock*), Lock*, const char*);
+void runtime_park(bool(*)(G*, void*), void*, const char*);
+void runtime_parkunlock(Lock*, const char*);
void runtime_tsleep(int64, const char*);
M* runtime_newm(void);
void runtime_goexit(void);
@@ -593,6 +613,7 @@ int32 runtime_netpollopen(uintptr, PollDesc*);
int32 runtime_netpollclose(uintptr);
void runtime_netpollready(G**, PollDesc*, int32);
uintptr runtime_netpollfd(PollDesc*);
+void runtime_netpollarm(uintptr, int32);
void runtime_crash(void);
void runtime_parsedebugvars(void);
void _rt0_go(void);
@@ -743,9 +764,6 @@ void runtime_lockOSThread(void);
void runtime_unlockOSThread(void);
bool runtime_showframe(String, bool);
-Hchan* runtime_makechan_c(ChanType*, int64);
-void runtime_chansend(ChanType*, Hchan*, byte*, bool*, void*);
-void runtime_chanrecv(ChanType*, Hchan*, byte*, bool*, bool*);
void runtime_printcreatedby(G*);
uintptr runtime_memlimit(void);
@@ -793,3 +811,5 @@ void* __go_get_closure(void);
bool runtime_gcwaiting(void);
void runtime_badsignal(int);
+Defer* runtime_newdefer(void);
+void runtime_freedefer(Defer*);
diff --git a/libgo/runtime/sema.goc b/libgo/runtime/sema.goc
index f5d5bc8..50f0e97 100644
--- a/libgo/runtime/sema.goc
+++ b/libgo/runtime/sema.goc
@@ -136,7 +136,7 @@ runtime_semacquire(uint32 volatile *addr, bool profile)
// Any semrelease after the cansemacquire knows we're waiting
// (we set nwait above), so go to sleep.
semqueue(root, addr, &s);
- runtime_park(runtime_unlock, root, "semacquire");
+ runtime_parkunlock(root, "semacquire");
if(cansemacquire(addr)) {
if(t0)
runtime_blockevent(s.releasetime - t0, 3);
@@ -259,7 +259,7 @@ func runtime_Syncsemacquire(s *SyncSema) {
else
s->tail->next = &w;
s->tail = &w;
- runtime_park(runtime_unlock, s, "semacquire");
+ runtime_parkunlock(s, "semacquire");
if(t0)
runtime_blockevent(w.releasetime - t0, 2);
}
@@ -293,7 +293,7 @@ func runtime_Syncsemrelease(s *SyncSema, n uint32) {
else
s->tail->next = &w;
s->tail = &w;
- runtime_park(runtime_unlock, s, "semarelease");
+ runtime_parkunlock(s, "semarelease");
} else
runtime_unlock(s);
}
diff --git a/libgo/runtime/signal_unix.c b/libgo/runtime/signal_unix.c
index 6c191d0..66638de 100644
--- a/libgo/runtime/signal_unix.c
+++ b/libgo/runtime/signal_unix.c
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build darwin dragonfly freebsd linux openbsd netbsd
+// +build darwin dragonfly freebsd linux netbsd openbsd solaris
#include <sys/time.h>
diff --git a/libgo/runtime/string.goc b/libgo/runtime/string.goc
index a7446e9..a0e80cc 100644
--- a/libgo/runtime/string.goc
+++ b/libgo/runtime/string.goc
@@ -43,11 +43,9 @@ gostringsize(intgo l, byte** pmem)
*pmem = nil;
return runtime_emptystring;
}
- // leave room for NUL for C runtime (e.g., callers of getenv)
- mem = runtime_mallocgc(l+1, 0, FlagNoScan|FlagNoZero);
+ mem = runtime_mallocgc(l, 0, FlagNoScan|FlagNoZero);
s.str = mem;
s.len = l;
- mem[l] = 0;
*pmem = mem;
return s;
}
diff --git a/libgo/runtime/time.goc b/libgo/runtime/time.goc
index e4e35ec..13ce41f 100644
--- a/libgo/runtime/time.goc
+++ b/libgo/runtime/time.goc
@@ -78,7 +78,7 @@ runtime_tsleep(int64 ns, const char *reason)
t.arg.__object = g;
runtime_lock(&timers);
addtimer(&t);
- runtime_park(runtime_unlock, &timers, reason);
+ runtime_parkunlock(&timers, reason);
}
void
@@ -221,12 +221,20 @@ timerproc(void* dummy __attribute__ ((unused)))
runtime_raceacquire(t);
__go_set_closure(t->fv);
f(now, arg);
+
+ // clear f and arg to avoid leak while sleeping for next timer
+ f = nil;
+ USED(f);
+ arg.__type_descriptor = nil;
+ arg.__object = nil;
+ USED(&arg);
+
runtime_lock(&timers);
}
if(delta < 0) {
// No timers left - put goroutine to sleep.
timers.rescheduling = true;
- runtime_park(runtime_unlock, &timers, "timer goroutine (idle)");
+ runtime_parkunlock(&timers, "timer goroutine (idle)");
continue;
}
// At least one timer pending. Sleep until then.
@@ -320,7 +328,7 @@ dumptimers(const char *msg)
}
void
-runtime_time_scan(void (*addroot)(Obj))
+runtime_time_scan(struct Workbuf** wbufp, void (*enqueue1)(struct Workbuf**, Obj))
{
- addroot((Obj){(byte*)&timers, sizeof timers, 0});
+ enqueue1(wbufp, (Obj){(byte*)&timers, sizeof timers, 0});
}