diff options
author | Ian Lance Taylor <ian@gcc.gnu.org> | 2011-10-26 23:57:58 +0000 |
---|---|---|
committer | Ian Lance Taylor <ian@gcc.gnu.org> | 2011-10-26 23:57:58 +0000 |
commit | d8f412571f8768df2d3239e72392dfeabbad1559 (patch) | |
tree | 19d182df05ead7ff8ba7ee00a7d57555e1383fdf /libgo/runtime | |
parent | e0c39d66d4f0607177b1cf8995dda56a667e07b3 (diff) | |
download | gcc-d8f412571f8768df2d3239e72392dfeabbad1559.zip gcc-d8f412571f8768df2d3239e72392dfeabbad1559.tar.gz gcc-d8f412571f8768df2d3239e72392dfeabbad1559.tar.bz2 |
Update Go library to last weekly.
From-SVN: r180552
Diffstat (limited to 'libgo/runtime')
35 files changed, 831 insertions, 272 deletions
diff --git a/libgo/runtime/arch.h b/libgo/runtime/arch.h new file mode 100644 index 0000000..0546a5d --- /dev/null +++ b/libgo/runtime/arch.h @@ -0,0 +1,8 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// FIXME: Ideally CacheLineSize would be dependent on the host architecture. +enum { + CacheLineSize = 64 +}; diff --git a/libgo/runtime/cpuprof.c b/libgo/runtime/cpuprof.c index 3797e1c..bec15ae 100644 --- a/libgo/runtime/cpuprof.c +++ b/libgo/runtime/cpuprof.c @@ -49,6 +49,7 @@ // in the situation when normally the goroutine "owns" handoff. #include "runtime.h" +#include "arch.h" #include "malloc.h" #include "array.h" diff --git a/libgo/runtime/go-append.c b/libgo/runtime/go-append.c index 261d85b..b1e882c 100644 --- a/libgo/runtime/go-append.c +++ b/libgo/runtime/go-append.c @@ -8,6 +8,7 @@ #include "go-panic.h" #include "array.h" #include "runtime.h" +#include "arch.h" #include "malloc.h" /* We should be OK if we don't split the stack here, since the only diff --git a/libgo/runtime/go-byte-array-to-string.c b/libgo/runtime/go-byte-array-to-string.c index ab9e283..cfe1906 100644 --- a/libgo/runtime/go-byte-array-to-string.c +++ b/libgo/runtime/go-byte-array-to-string.c @@ -6,6 +6,7 @@ #include "go-string.h" #include "runtime.h" +#include "arch.h" #include "malloc.h" struct __go_string diff --git a/libgo/runtime/go-go.c b/libgo/runtime/go-go.c index 1391620..d56b8b1 100644 --- a/libgo/runtime/go-go.c +++ b/libgo/runtime/go-go.c @@ -17,6 +17,7 @@ #include "go-panic.h" #include "go-alloc.h" #include "runtime.h" +#include "arch.h" #include "malloc.h" #ifdef USING_SPLIT_STACK @@ -561,7 +562,7 @@ __go_cachestats (void) /* Start the other threads after garbage collection. */ void -runtime_starttheworld (void) +runtime_starttheworld (bool extra __attribute__ ((unused))) { int i; pthread_t me; diff --git a/libgo/runtime/go-int-array-to-string.c b/libgo/runtime/go-int-array-to-string.c index ec07b87..1a37879 100644 --- a/libgo/runtime/go-int-array-to-string.c +++ b/libgo/runtime/go-int-array-to-string.c @@ -7,6 +7,7 @@ #include "go-assert.h" #include "go-string.h" #include "runtime.h" +#include "arch.h" #include "malloc.h" struct __go_string diff --git a/libgo/runtime/go-int-to-string.c b/libgo/runtime/go-int-to-string.c index af58015..e9645bf 100644 --- a/libgo/runtime/go-int-to-string.c +++ b/libgo/runtime/go-int-to-string.c @@ -6,6 +6,7 @@ #include "go-string.h" #include "runtime.h" +#include "arch.h" #include "malloc.h" struct __go_string diff --git a/libgo/runtime/go-main.c b/libgo/runtime/go-main.c index 37956d5..927a36c 100644 --- a/libgo/runtime/go-main.c +++ b/libgo/runtime/go-main.c @@ -19,6 +19,7 @@ #include "go-string.h" #include "runtime.h" +#include "arch.h" #include "malloc.h" #undef int diff --git a/libgo/runtime/go-make-slice.c b/libgo/runtime/go-make-slice.c index d0e8369..a818a7f 100644 --- a/libgo/runtime/go-make-slice.c +++ b/libgo/runtime/go-make-slice.c @@ -12,6 +12,7 @@ #include "go-type.h" #include "array.h" #include "runtime.h" +#include "arch.h" #include "malloc.h" struct __go_open_array diff --git a/libgo/runtime/go-new.c b/libgo/runtime/go-new.c index 657978c..b1af5f2 100644 --- a/libgo/runtime/go-new.c +++ b/libgo/runtime/go-new.c @@ -6,6 +6,7 @@ #include "go-alloc.h" #include "runtime.h" +#include "arch.h" #include "malloc.h" void * diff --git a/libgo/runtime/go-note.c b/libgo/runtime/go-note.c index 2b80b9b..62c229f 100644 --- a/libgo/runtime/go-note.c +++ b/libgo/runtime/go-note.c @@ -12,7 +12,7 @@ #include "runtime.h" /* We use a single global lock and condition variable. It would be - better to use a futex on Linux. */ + better to use a futex on GNU/Linux. */ static pthread_mutex_t note_lock = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t note_cond = PTHREAD_COND_INITIALIZER; diff --git a/libgo/runtime/go-panic.c b/libgo/runtime/go-panic.c index c39ea9f..f3e182d 100644 --- a/libgo/runtime/go-panic.c +++ b/libgo/runtime/go-panic.c @@ -8,6 +8,7 @@ #include <stdlib.h> #include "runtime.h" +#include "arch.h" #include "malloc.h" #include "go-alloc.h" #include "go-defer.h" diff --git a/libgo/runtime/go-semacquire.c b/libgo/runtime/go-semacquire.c index 40fe2af..05b6377 100644 --- a/libgo/runtime/go-semacquire.c +++ b/libgo/runtime/go-semacquire.c @@ -13,9 +13,9 @@ /* We use a single global lock and condition variable. This is painful, since it will cause unnecessary contention, but is hard to - avoid in a portable manner. On Linux we can use futexes, but they - are unfortunately not exposed by libc and are thus also hard to use - portably. */ + avoid in a portable manner. On GNU/Linux we can use futexes, but + they are unfortunately not exposed by libc and are thus also hard + to use portably. */ static pthread_mutex_t sem_lock = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t sem_cond = PTHREAD_COND_INITIALIZER; diff --git a/libgo/runtime/go-string-to-byte-array.c b/libgo/runtime/go-string-to-byte-array.c index 10c565e..8bae54b 100644 --- a/libgo/runtime/go-string-to-byte-array.c +++ b/libgo/runtime/go-string-to-byte-array.c @@ -7,6 +7,7 @@ #include "go-string.h" #include "array.h" #include "runtime.h" +#include "arch.h" #include "malloc.h" struct __go_open_array diff --git a/libgo/runtime/go-string-to-int-array.c b/libgo/runtime/go-string-to-int-array.c index f59df67..aff1468 100644 --- a/libgo/runtime/go-string-to-int-array.c +++ b/libgo/runtime/go-string-to-int-array.c @@ -8,6 +8,7 @@ #include "go-string.h" #include "array.h" #include "runtime.h" +#include "arch.h" #include "malloc.h" struct __go_open_array diff --git a/libgo/runtime/go-strplus.c b/libgo/runtime/go-strplus.c index e4dea9c..bfbe341 100644 --- a/libgo/runtime/go-strplus.c +++ b/libgo/runtime/go-strplus.c @@ -6,6 +6,7 @@ #include "go-string.h" #include "runtime.h" +#include "arch.h" #include "malloc.h" struct __go_string diff --git a/libgo/runtime/go-strslice.c b/libgo/runtime/go-strslice.c index 94ecee9..40ccac6 100644 --- a/libgo/runtime/go-strslice.c +++ b/libgo/runtime/go-strslice.c @@ -7,6 +7,7 @@ #include "go-string.h" #include "go-panic.h" #include "runtime.h" +#include "arch.h" #include "malloc.h" struct __go_string diff --git a/libgo/runtime/goc2c.c b/libgo/runtime/goc2c.c index 32fbceb..fe413fe 100644 --- a/libgo/runtime/goc2c.c +++ b/libgo/runtime/goc2c.c @@ -219,13 +219,14 @@ getchar_skipping_comments(void) } /* - * Read and return a token. Tokens are delimited by whitespace or by - * [(),{}]. The latter are all returned as single characters. + * Read and return a token. Tokens are string or character literals + * or else delimited by whitespace or by [(),{}]. + * The latter are all returned as single characters. */ static char * read_token(void) { - int c; + int c, q; char *buf; unsigned int alc, off; const char* delims = "(),{}"; @@ -240,7 +241,26 @@ read_token(void) alc = 16; buf = xmalloc(alc + 1); off = 0; - if (strchr(delims, c) != NULL) { + if(c == '"' || c == '\'') { + q = c; + buf[off] = c; + ++off; + while (1) { + if (off+2 >= alc) { // room for c and maybe next char + alc *= 2; + buf = xrealloc(buf, alc + 1); + } + c = getchar_no_eof(); + buf[off] = c; + ++off; + if(c == q) + break; + if(c == '\\') { + buf[off] = getchar_no_eof(); + ++off; + } + } + } else if (strchr(delims, c) != NULL) { buf[off] = c; ++off; } else { diff --git a/libgo/runtime/malloc.goc b/libgo/runtime/malloc.goc index 2ea69ee..f8d4327 100644 --- a/libgo/runtime/malloc.goc +++ b/libgo/runtime/malloc.goc @@ -12,6 +12,7 @@ package runtime #include <stdlib.h> #include "go-alloc.h" #include "runtime.h" +#include "arch.h" #include "malloc.h" #include "go-string.h" #include "interface.h" @@ -96,11 +97,12 @@ runtime_mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed) m->mcache->next_sample -= size; else { // pick next profile time + // If you change this, also change allocmcache. if(rate > 0x3fffffff) // make 2*rate not overflow rate = 0x3fffffff; m->mcache->next_sample = runtime_fastrand1() % (2*rate); profile: - runtime_setblockspecial(v); + runtime_setblockspecial(v, true); runtime_MProf_Malloc(v, size); } } @@ -224,6 +226,7 @@ runtime_mlookup(void *v, byte **base, uintptr *size, MSpan **sp) MCache* runtime_allocmcache(void) { + int32 rate; MCache *c; if(!__sync_bool_compare_and_swap(&m->mallocing, 0, 1)) @@ -239,6 +242,13 @@ runtime_allocmcache(void) mstats.mcache_sys = runtime_mheap.cachealloc.sys; runtime_unlock(&runtime_mheap); + // Set first allocation sample size. + rate = runtime_MemProfileRate; + if(rate > 0x3fffffff) // make 2*rate not overflow + rate = 0x3fffffff; + if(rate != 0) + c->next_sample = runtime_fastrand1() % (2*rate); + __sync_bool_compare_and_swap(&m->mallocing, 1, 0); if(__sync_bool_compare_and_swap(&m->gcing, 1, 0)) __go_run_goroutine_gc(2); @@ -280,6 +290,7 @@ runtime_mallocinit(void) byte *p; uintptr arena_size, bitmap_size; extern byte end[]; + byte *want; runtime_sizeof_C_MStats = sizeof(MStats); @@ -341,9 +352,13 @@ runtime_mallocinit(void) // not as an absolute requirement. If we ask for the end // of the data segment but the operating system requires // a little more space before we can start allocating, it will - // give out a slightly higher pointer. That's fine. - // Run with what we get back. - p = runtime_SysReserve(end, bitmap_size + arena_size); + // give out a slightly higher pointer. Except QEMU, which + // is buggy, as usual: it won't adjust the pointer upward. + // So adjust it upward a little bit ourselves: 1/4 MB to get + // away from the running binary image and then round up + // to a MB boundary. + want = (byte*)(((uintptr)end + (1<<18) + (1<<20) - 1)&~((1<<20)-1)); + p = runtime_SysReserve(want, bitmap_size + arena_size); if(p == nil) runtime_throw("runtime: cannot reserve arena virtual address space"); } @@ -418,8 +433,9 @@ runtime_mal(uintptr n) return runtime_mallocgc(n, 0, 1, 1); } -func new(n uint32) (ret *uint8) { - ret = runtime_mal(n); +func new(typ *Type) (ret *uint8) { + uint32 flag = typ->__code&GO_NO_POINTERS ? FlagNoPointers : 0; + ret = runtime_mallocgc(typ->__size, flag, 1, 1); } func Alloc(n uintptr) (p *byte) { @@ -444,9 +460,8 @@ func SetFinalizer(obj Eface, finalizer Eface) { const FuncType *ft; if(obj.__type_descriptor == nil) { - // runtime_printf("runtime.SetFinalizer: first argument is nil interface\n"); - throw: - runtime_throw("runtime.SetFinalizer"); + // runtime·printf("runtime.SetFinalizer: first argument is nil interface\n"); + goto throw; } if(obj.__type_descriptor->__code != GO_PTR) { // runtime_printf("runtime.SetFinalizer: first argument is %S, not pointer\n", *obj.type->string); @@ -458,19 +473,21 @@ func SetFinalizer(obj Eface, finalizer Eface) { } ft = nil; if(finalizer.__type_descriptor != nil) { - if(finalizer.__type_descriptor->__code != GO_FUNC) { - badfunc: - // runtime_printf("runtime.SetFinalizer: second argument is %S, not func(%S)\n", *finalizer.type->string, *obj.type->string); - goto throw; - } + if(finalizer.__type_descriptor->__code != GO_FUNC) + goto badfunc; ft = (const FuncType*)finalizer.__type_descriptor; if(ft->__dotdotdot || ft->__in.__count != 1 || !__go_type_descriptors_equal(*(Type**)ft->__in.__values, obj.__type_descriptor)) goto badfunc; + } - if(runtime_getfinalizer(obj.__object, 0)) { - // runtime_printf("runtime.SetFinalizer: finalizer already set"); - goto throw; - } + if(!runtime_addfinalizer(obj.__object, finalizer.__type_descriptor != nil ? *(void**)finalizer.__object : nil, ft)) { + runtime_printf("runtime.SetFinalizer: finalizer already set\n"); + goto throw; } - runtime_addfinalizer(obj.__object, finalizer.__type_descriptor != nil ? *(void**)finalizer.__object : nil, ft); + return; + +badfunc: + // runtime_printf("runtime.SetFinalizer: second argument is %S, not func(%S)\n", *finalizer.type->string, *obj.type->string); +throw: + runtime_throw("runtime.SetFinalizer"); } diff --git a/libgo/runtime/malloc.h b/libgo/runtime/malloc.h index 3e813bb..1ccc2f0 100644 --- a/libgo/runtime/malloc.h +++ b/libgo/runtime/malloc.h @@ -120,6 +120,13 @@ enum #else MHeapMap_Bits = 20, #endif + + // Max number of threads to run garbage collection. + // 2, 3, and 4 are all plausible maximums depending + // on the hardware details of the machine. The second + // proc is the one that helps the most (after the first), + // so start with just 2 for now. + MaxGcproc = 2, }; // A generic linked list of blocks. (Typically the block is bigger than sizeof(MLink).) @@ -192,7 +199,7 @@ struct MStats uint64 nlookup; // number of pointer lookups uint64 nmalloc; // number of mallocs uint64 nfree; // number of frees - + // Statistics about malloc heap. // protected by mheap.Lock uint64 heap_alloc; // bytes allocated and still in use @@ -210,7 +217,7 @@ struct MStats uint64 mcache_inuse; // MCache structures uint64 mcache_sys; uint64 buckhash_sys; // profiling bucket hash table - + // Statistics about garbage collector. // Protected by stopping the world during GC. uint64 next_gc; // next GC (in heap_alloc time) @@ -219,7 +226,7 @@ struct MStats uint32 numgc; bool enablegc; bool debuggc; - + // Statistics about allocation size classes. struct { uint32 size; @@ -240,7 +247,7 @@ extern MStats mstats // // class_to_size[i] = largest size in class i // class_to_allocnpages[i] = number of pages to allocate when -// making new objects in class i +// making new objects in class i // class_to_transfercount[i] = number of objects to move when // taking a bunch of objects out of the central lists // and putting them in the thread free list. @@ -279,7 +286,7 @@ struct MCache int64 nmalloc; int64 nfree; } local_by_size[NumSizeClasses]; - + }; void* runtime_MCache_Alloc(MCache *c, int32 sizeclass, uintptr size, int32 zeroed); @@ -352,14 +359,14 @@ struct MHeap byte *arena_start; byte *arena_used; byte *arena_end; - + // central free lists for small size classes. // the union makes sure that the MCentrals are - // spaced 64 bytes apart, so that each MCentral.Lock + // spaced CacheLineSize bytes apart, so that each MCentral.Lock // gets its own cache line. union { MCentral; - byte pad[64]; + byte pad[CacheLineSize]; } central[NumSizeClasses]; FixAlloc spanalloc; // allocator for Span* @@ -387,7 +394,7 @@ int32 runtime_checking; void runtime_markspan(void *v, uintptr size, uintptr n, bool leftover); void runtime_unmarkspan(void *v, uintptr size); bool runtime_blockspecial(void*); -void runtime_setblockspecial(void*); +void runtime_setblockspecial(void*, bool); void runtime_purgecachedstats(M*); enum @@ -402,6 +409,8 @@ void runtime_Mprof_Init(void); void runtime_MProf_Malloc(void*, uintptr); void runtime_MProf_Free(void*, uintptr); void runtime_MProf_Mark(void (*scan)(byte *, int64)); +int32 runtime_helpgc(bool*); +void runtime_gchelper(void); // Malloc profiling settings. // Must match definition in extern.go. @@ -412,13 +421,6 @@ enum { }; extern int32 runtime_malloc_profile; -typedef struct Finalizer Finalizer; -struct Finalizer -{ - Finalizer *next; // for use by caller of getfinalizer - void (*fn)(void*); - void *arg; - const struct __go_func_type *ft; -}; - -Finalizer* runtime_getfinalizer(void*, bool); +struct __go_func_type; +bool runtime_getfinalizer(void *p, bool del, void (**fn)(void*), const struct __go_func_type **ft); +void runtime_walkfintab(void (*fn)(void*), void (*scan)(byte*, int64)); diff --git a/libgo/runtime/mcache.c b/libgo/runtime/mcache.c index 191b0d1..6c60aeb 100644 --- a/libgo/runtime/mcache.c +++ b/libgo/runtime/mcache.c @@ -7,6 +7,7 @@ // See malloc.h for an overview. #include "runtime.h" +#include "arch.h" #include "malloc.h" void* diff --git a/libgo/runtime/mcentral.c b/libgo/runtime/mcentral.c index cd3d6ca..b98a8d3 100644 --- a/libgo/runtime/mcentral.c +++ b/libgo/runtime/mcentral.c @@ -15,6 +15,7 @@ // so that it is faster to move those lists between MCaches and MCentrals. #include "runtime.h" +#include "arch.h" #include "malloc.h" static bool MCentral_Grow(MCentral *c); diff --git a/libgo/runtime/mem.c b/libgo/runtime/mem.c index 90c2c61..4267c55 100644 --- a/libgo/runtime/mem.c +++ b/libgo/runtime/mem.c @@ -2,6 +2,7 @@ #include <unistd.h> #include "runtime.h" +#include "arch.h" #include "malloc.h" #ifndef MAP_ANON diff --git a/libgo/runtime/mem_posix_memalign.c b/libgo/runtime/mem_posix_memalign.c index 2318be8..7d04f99 100644 --- a/libgo/runtime/mem_posix_memalign.c +++ b/libgo/runtime/mem_posix_memalign.c @@ -1,6 +1,7 @@ #include <errno.h> #include "runtime.h" +#include "arch.h" #include "malloc.h" void* diff --git a/libgo/runtime/mfinal.c b/libgo/runtime/mfinal.c index 04d58dd..db9a4fd 100644 --- a/libgo/runtime/mfinal.c +++ b/libgo/runtime/mfinal.c @@ -3,18 +3,17 @@ // license that can be found in the LICENSE file. #include "runtime.h" +#include "arch.h" #include "malloc.h" -// Lock to protect finalizer data structures. -// Cannot reuse mheap.Lock because the finalizer -// maintenance requires allocation. -static Lock finlock; +enum { debug = 0 }; -void -runtime_initfintab() +typedef struct Fin Fin; +struct Fin { - runtime_initlock(&finlock); -} + void (*fn)(void*); + const struct __go_func_type *ft; +}; // Finalizer hash table. Direct hash, linear scan, at most 3/4 full. // Table size is power of 3 so that hash can be key % max. @@ -26,25 +25,43 @@ runtime_initfintab() typedef struct Fintab Fintab; struct Fintab { - void **key; - Finalizer **val; + Lock; + void **fkey; + Fin *val; int32 nkey; // number of non-nil entries in key int32 ndead; // number of dead (-1) entries in key int32 max; // size of key, val allocations }; +#define TABSZ 17 +#define TAB(p) (&fintab[((uintptr)(p)>>3)%TABSZ]) + +static struct { + Fintab; + uint8 pad[0 /* CacheLineSize - sizeof(Fintab) */]; +} fintab[TABSZ]; + +void +runtime_initfintab() +{ + int32 i; + + for(i=0; i<TABSZ; i++) + runtime_initlock(&fintab[i]); +} + static void -addfintab(Fintab *t, void *k, Finalizer *v) +addfintab(Fintab *t, void *k, void (*fn)(void*), const struct __go_func_type *ft) { int32 i, j; i = (uintptr)k % (uintptr)t->max; for(j=0; j<t->max; j++) { - if(t->key[i] == nil) { + if(t->fkey[i] == nil) { t->nkey++; goto ret; } - if(t->key[i] == (void*)-1) { + if(t->fkey[i] == (void*)-1) { t->ndead--; goto ret; } @@ -56,30 +73,32 @@ addfintab(Fintab *t, void *k, Finalizer *v) runtime_throw("finalizer table inconsistent"); ret: - t->key[i] = k; - t->val[i] = v; + t->fkey[i] = k; + t->val[i].fn = fn; + t->val[i].ft = ft; } -static Finalizer* -lookfintab(Fintab *t, void *k, bool del) +static bool +lookfintab(Fintab *t, void *k, bool del, Fin *f) { int32 i, j; - Finalizer *v; if(t->max == 0) - return nil; + return false; i = (uintptr)k % (uintptr)t->max; for(j=0; j<t->max; j++) { - if(t->key[i] == nil) - return nil; - if(t->key[i] == k) { - v = t->val[i]; + if(t->fkey[i] == nil) + return false; + if(t->fkey[i] == k) { + if(f) + *f = t->val[i]; if(del) { - t->key[i] = (void*)-1; - t->val[i] = nil; + t->fkey[i] = (void*)-1; + t->val[i].fn = nil; + t->val[i].ft = nil; t->ndead++; } - return v; + return true; } if(++i == t->max) i = 0; @@ -87,108 +106,123 @@ lookfintab(Fintab *t, void *k, bool del) // cannot happen - table is known to be non-full runtime_throw("finalizer table inconsistent"); - return nil; + return false; } -static Fintab fintab; - -// add finalizer; caller is responsible for making sure not already in table -void -runtime_addfinalizer(void *p, void (*f)(void*), const struct __go_func_type *ft) +static void +resizefintab(Fintab *tab) { Fintab newtab; + void *k; int32 i; - byte *base; - Finalizer *e; + + runtime_memclr((byte*)&newtab, sizeof newtab); + newtab.max = tab->max; + if(newtab.max == 0) + newtab.max = 3*3*3; + else if(tab->ndead < tab->nkey/2) { + // grow table if not many dead values. + // otherwise just rehash into table of same size. + newtab.max *= 3; + } + + newtab.fkey = runtime_mallocgc(newtab.max*sizeof newtab.fkey[0], FlagNoPointers, 0, 1); + newtab.val = runtime_mallocgc(newtab.max*sizeof newtab.val[0], 0, 0, 1); - e = nil; - if(f != nil) { - e = runtime_mal(sizeof *e); - e->fn = f; - e->ft = ft; + for(i=0; i<tab->max; i++) { + k = tab->fkey[i]; + if(k != nil && k != (void*)-1) + addfintab(&newtab, k, tab->val[i].fn, tab->val[i].ft); } + + runtime_free(tab->fkey); + runtime_free(tab->val); + + tab->fkey = newtab.fkey; + tab->val = newtab.val; + tab->nkey = newtab.nkey; + tab->ndead = newtab.ndead; + tab->max = newtab.max; +} +bool +runtime_addfinalizer(void *p, void (*f)(void*), const struct __go_func_type *ft) +{ + Fintab *tab; + byte *base; + bool ret = false; + + if(debug) { + if(!runtime_mlookup(p, &base, nil, nil) || p != base) + runtime_throw("addfinalizer on invalid pointer"); + } + if(!__sync_bool_compare_and_swap(&m->holds_finlock, 0, 1)) runtime_throw("finalizer deadlock"); - runtime_lock(&finlock); - if(!runtime_mlookup(p, &base, nil, nil) || p != base) { - runtime_unlock(&finlock); - __sync_bool_compare_and_swap(&m->holds_finlock, 1, 0); - runtime_throw("addfinalizer on invalid pointer"); - } + tab = TAB(p); + runtime_lock(tab); if(f == nil) { - lookfintab(&fintab, p, 1); + if(lookfintab(tab, p, true, nil)) + runtime_setblockspecial(p, false); + ret = true; goto unlock; } - if(lookfintab(&fintab, p, 0)) { - runtime_unlock(&finlock); - __sync_bool_compare_and_swap(&m->holds_finlock, 1, 0); - runtime_throw("double finalizer"); + if(lookfintab(tab, p, false, nil)) { + ret = false; + goto unlock; } - runtime_setblockspecial(p); - if(fintab.nkey >= fintab.max/2+fintab.max/4) { + if(tab->nkey >= tab->max/2+tab->max/4) { // keep table at most 3/4 full: // allocate new table and rehash. - - runtime_memclr((byte*)&newtab, sizeof newtab); - newtab.max = fintab.max; - if(newtab.max == 0) - newtab.max = 3*3*3; - else if(fintab.ndead < fintab.nkey/2) { - // grow table if not many dead values. - // otherwise just rehash into table of same size. - newtab.max *= 3; - } - - newtab.key = runtime_mallocgc(newtab.max*sizeof newtab.key[0], FlagNoPointers, 0, 1); - newtab.val = runtime_mallocgc(newtab.max*sizeof newtab.val[0], 0, 0, 1); - - for(i=0; i<fintab.max; i++) { - void *k; - - k = fintab.key[i]; - if(k != nil && k != (void*)-1) - addfintab(&newtab, k, fintab.val[i]); - } - runtime_free(fintab.key); - runtime_free(fintab.val); - fintab = newtab; + resizefintab(tab); } - addfintab(&fintab, p, e); + addfintab(tab, p, f, ft); + runtime_setblockspecial(p, true); + ret = true; + unlock: - runtime_unlock(&finlock); + runtime_unlock(tab); __sync_bool_compare_and_swap(&m->holds_finlock, 1, 0); if(__sync_bool_compare_and_swap(&m->gcing_for_finlock, 1, 0)) { __go_run_goroutine_gc(200); } + + return ret; } // get finalizer; if del, delete finalizer. -// caller is responsible for updating RefHasFinalizer bit. -Finalizer* -runtime_getfinalizer(void *p, bool del) +// caller is responsible for updating RefHasFinalizer (special) bit. +bool +runtime_getfinalizer(void *p, bool del, void (**fn)(void*), const struct __go_func_type **ft) { - Finalizer *f; + Fintab *tab; + bool res; + Fin f; if(!__sync_bool_compare_and_swap(&m->holds_finlock, 0, 1)) runtime_throw("finalizer deadlock"); - runtime_lock(&finlock); - f = lookfintab(&fintab, p, del); - runtime_unlock(&finlock); + tab = TAB(p); + runtime_lock(tab); + res = lookfintab(tab, p, del, &f); + runtime_unlock(tab); __sync_bool_compare_and_swap(&m->holds_finlock, 1, 0); if(__sync_bool_compare_and_swap(&m->gcing_for_finlock, 1, 0)) { __go_run_goroutine_gc(201); } - return f; + if(res==false) + return false; + *fn = f.fn; + *ft = f.ft; + return true; } void @@ -196,18 +230,22 @@ runtime_walkfintab(void (*fn)(void*), void (*scan)(byte *, int64)) { void **key; void **ekey; + int32 i; if(!__sync_bool_compare_and_swap(&m->holds_finlock, 0, 1)) runtime_throw("finalizer deadlock"); - scan((byte*)&fintab, sizeof fintab); - runtime_lock(&finlock); - key = fintab.key; - ekey = key + fintab.max; - for(; key < ekey; key++) - if(*key != nil && *key != ((void*)-1)) - fn(*key); - runtime_unlock(&finlock); + for(i=0; i<TABSZ; i++) { + runtime_lock(&fintab[i]); + key = fintab[i].fkey; + ekey = key + fintab[i].max; + for(; key < ekey; key++) + if(*key != nil && *key != ((void*)-1)) + fn(*key); + scan((byte*)&fintab[i].fkey, sizeof(void*)); + scan((byte*)&fintab[i].val, sizeof(void*)); + runtime_unlock(&fintab[i]); + } __sync_bool_compare_and_swap(&m->holds_finlock, 1, 0); if(__sync_bool_compare_and_swap(&m->gcing_for_finlock, 1, 0)) { diff --git a/libgo/runtime/mfixalloc.c b/libgo/runtime/mfixalloc.c index c05583d..109cfe8 100644 --- a/libgo/runtime/mfixalloc.c +++ b/libgo/runtime/mfixalloc.c @@ -7,6 +7,7 @@ // See malloc.h for overview. #include "runtime.h" +#include "arch.h" #include "malloc.h" // Initialize f to allocate objects of the given size, diff --git a/libgo/runtime/mgc0.c b/libgo/runtime/mgc0.c index 900ebde..cb58525 100644 --- a/libgo/runtime/mgc0.c +++ b/libgo/runtime/mgc0.c @@ -5,13 +5,14 @@ // Garbage collector. #include "runtime.h" +#include "arch.h" #include "malloc.h" enum { Debug = 0, - UseCas = 1, PtrSize = sizeof(void*), - + DebugMark = 0, // run second pass to check mark + // Four bits per word (see #defines below). wordsPerBitmapWord = sizeof(void*)*8/4, bitShift = sizeof(void*)*8/4, @@ -50,28 +51,72 @@ enum { #define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial) +// TODO: Make these per-M. static uint64 nlookup; static uint64 nsizelookup; static uint64 naddrlookup; +static uint64 nhandoff; + static int32 gctrace; typedef struct Workbuf Workbuf; struct Workbuf { Workbuf *next; - uintptr nw; - byte *w[2048-2]; + uintptr nobj; + byte *obj[512-2]; +}; + +typedef struct Finalizer Finalizer; +struct Finalizer +{ + void (*fn)(void*); + void *arg; + const struct __go_func_type *ft; +}; + +typedef struct FinBlock FinBlock; +struct FinBlock +{ + FinBlock *alllink; + FinBlock *next; + int32 cnt; + int32 cap; + Finalizer fin[1]; }; static bool finstarted; static pthread_mutex_t finqlock = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t finqcond = PTHREAD_COND_INITIALIZER; -static Finalizer *finq; +static FinBlock *finq; // list of finalizers that are to be executed +static FinBlock *finc; // cache of free blocks +static FinBlock *allfin; // list of all blocks +static Lock finlock; static int32 fingwait; static void runfinq(void*); static Workbuf* getempty(Workbuf*); static Workbuf* getfull(Workbuf*); +static void putempty(Workbuf*); +static Workbuf* handoff(Workbuf*); + +static struct { + Lock fmu; + Workbuf *full; + Lock emu; + Workbuf *empty; + uint32 nproc; + volatile uint32 nwait; + volatile uint32 ndone; + Note alldone; + Lock markgate; + Lock sweepgate; + MSpan *spans; + + Lock; + byte *chunk; + uintptr nchunk; +} work; // scanblock scans a block of n bytes starting at pointer b for references // to other objects, scanning any it finds recursively until there are no @@ -82,13 +127,14 @@ static Workbuf* getfull(Workbuf*); static void scanblock(byte *b, int64 n) { - byte *obj, *arena_start, *p; + byte *obj, *arena_start, *arena_used, *p; void **vp; - uintptr size, *bitp, bits, shift, i, j, x, xbits, off; + uintptr size, *bitp, bits, shift, i, j, x, xbits, off, nobj, nproc; MSpan *s; PageID k; - void **bw, **w, **ew; + void **wp; Workbuf *wbuf; + bool keepworking; if((int64)(uintptr)n != n || n < 0) { // runtime_printf("scanblock %p %lld\n", b, (long long)n); @@ -97,11 +143,19 @@ scanblock(byte *b, int64 n) // Memory arena parameters. arena_start = runtime_mheap.arena_start; - + arena_used = runtime_mheap.arena_used; + nproc = work.nproc; + wbuf = nil; // current work buffer - ew = nil; // end of work buffer - bw = nil; // beginning of work buffer - w = nil; // current pointer into work buffer + wp = nil; // storage for next queued pointer (write pointer) + nobj = 0; // number of queued objects + + // Scanblock helpers pass b==nil. + // The main proc needs to return to make more + // calls to scanblock. But if work.nproc==1 then + // might as well process blocks as soon as we + // have them. + keepworking = b == nil || work.nproc == 1; // Align b to a word boundary. off = (uintptr)b & (PtrSize-1); @@ -117,17 +171,17 @@ scanblock(byte *b, int64 n) runtime_printf("scanblock %p %lld\n", b, (long long) n); vp = (void**)b; - n /= PtrSize; + n >>= (2+PtrSize/8); /* n /= PtrSize (4 or 8) */ for(i=0; i<(uintptr)n; i++) { obj = (byte*)vp[i]; - + // Words outside the arena cannot be pointers. - if((byte*)obj < arena_start || (byte*)obj >= runtime_mheap.arena_used) + if((byte*)obj < arena_start || (byte*)obj >= arena_used) continue; - + // obj may be a pointer to a live object. // Try to find the beginning of the object. - + // Round down to word boundary. obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1)); @@ -185,47 +239,72 @@ scanblock(byte *b, int64 n) found: // Now we have bits, bitp, and shift correct for // obj pointing at the base of the object. - // If not allocated or already marked, done. - if((bits & bitAllocated) == 0 || (bits & bitMarked) != 0) + // Only care about allocated and not marked. + if((bits & (bitAllocated|bitMarked)) != bitAllocated) continue; - *bitp |= bitMarked<<shift; + if(nproc == 1) + *bitp |= bitMarked<<shift; + else { + for(;;) { + x = *bitp; + if(x & (bitMarked<<shift)) + goto continue_obj; + if(runtime_casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift)))) + break; + } + } // If object has no pointers, don't need to scan further. if((bits & bitNoPointers) != 0) continue; + // If another proc wants a pointer, give it some. + if(nobj > 4 && work.nwait > 0 && work.full == nil) { + wbuf->nobj = nobj; + wbuf = handoff(wbuf); + nobj = wbuf->nobj; + wp = (void**)(wbuf->obj + nobj); + } + // If buffer is full, get a new one. - if(w >= ew) { + if(wbuf == nil || nobj >= nelem(wbuf->obj)) { + if(wbuf != nil) + wbuf->nobj = nobj; wbuf = getempty(wbuf); - bw = (void**)wbuf->w; - w = bw; - ew = bw + nelem(wbuf->w); + wp = (void**)(wbuf->obj); + nobj = 0; } - *w++ = obj; + *wp++ = obj; + nobj++; + continue_obj:; } - + // Done scanning [b, b+n). Prepare for the next iteration of // the loop by setting b and n to the parameters for the next block. - // Fetch b from the work buffers. - if(w <= bw) { + // Fetch b from the work buffer. + if(nobj == 0) { + if(!keepworking) { + putempty(wbuf); + return; + } // Emptied our buffer: refill. wbuf = getfull(wbuf); if(wbuf == nil) - break; - bw = (void**)wbuf->w; - ew = (void**)(wbuf->w + nelem(wbuf->w)); - w = bw+wbuf->nw; + return; + nobj = wbuf->nobj; + wp = (void**)(wbuf->obj + wbuf->nobj); } - b = *--w; - + b = *--wp; + nobj--; + // Figure out n = size of b. Start by loading bits for b. off = (uintptr*)b - (uintptr*)arena_start; bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; shift = off % wordsPerBitmapWord; xbits = *bitp; bits = xbits >> shift; - + // Might be small; look for nearby block boundary. // A block boundary is marked by either bitBlockBoundary // or bitAllocated being set (see notes near their definition). @@ -244,12 +323,12 @@ scanblock(byte *b, int64 n) // apply a mask to keep only the bits corresponding // to shift+j < bitShift aka j < bitShift-shift. bits &= (boundary<<(bitShift-shift)) - boundary; - + // A block boundary j words before b is indicated by // xbits>>(shift-j) & boundary // (assuming shift >= j). There is no cleverness here // avoid the test, because when j gets too large the shift - // turns negative, which is undefined in C. + // turns negative, which is undefined in C. for(j=1; j<bitShift; j++) { if(((bits>>j)&boundary) != 0 || (shift>=j && ((xbits>>(shift-j))&boundary) != 0)) { @@ -257,7 +336,7 @@ scanblock(byte *b, int64 n) goto scan; } } - + // Fall back to asking span about size class. // (Manually inlined copy of MHeap_Lookup.) nlookup++; @@ -274,29 +353,123 @@ scanblock(byte *b, int64 n) } } -static struct { - Workbuf *full; - Workbuf *empty; - byte *chunk; - uintptr nchunk; -} work; +// debug_scanblock is the debug copy of scanblock. +// it is simpler, slower, single-threaded, recursive, +// and uses bitSpecial as the mark bit. +static void +debug_scanblock(byte *b, int64 n) +{ + byte *obj, *p; + void **vp; + uintptr size, *bitp, bits, shift, i, xbits, off; + MSpan *s; + + if(!DebugMark) + runtime_throw("debug_scanblock without DebugMark"); + + if((int64)(uintptr)n != n || n < 0) { + //runtime_printf("debug_scanblock %p %D\n", b, n); + runtime_throw("debug_scanblock"); + } + + // Align b to a word boundary. + off = (uintptr)b & (PtrSize-1); + if(off != 0) { + b += PtrSize - off; + n -= PtrSize - off; + } + + vp = (void**)b; + n /= PtrSize; + for(i=0; i<(uintptr)n; i++) { + obj = (byte*)vp[i]; + + // Words outside the arena cannot be pointers. + if((byte*)obj < runtime_mheap.arena_start || (byte*)obj >= runtime_mheap.arena_used) + continue; + + // Round down to word boundary. + obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1)); + + // Consult span table to find beginning. + s = runtime_MHeap_LookupMaybe(&runtime_mheap, obj); + if(s == nil) + continue; + + + p = (byte*)((uintptr)s->start<<PageShift); + if(s->sizeclass == 0) { + obj = p; + size = (uintptr)s->npages<<PageShift; + } else { + if((byte*)obj >= (byte*)s->limit) + continue; + size = runtime_class_to_size[s->sizeclass]; + int32 i = ((byte*)obj - p)/size; + obj = p+i*size; + } + + // Now that we know the object header, reload bits. + off = (uintptr*)obj - (uintptr*)runtime_mheap.arena_start; + bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; + shift = off % wordsPerBitmapWord; + xbits = *bitp; + bits = xbits >> shift; + + // Now we have bits, bitp, and shift correct for + // obj pointing at the base of the object. + // If not allocated or already marked, done. + if((bits & bitAllocated) == 0 || (bits & bitSpecial) != 0) // NOTE: bitSpecial not bitMarked + continue; + *bitp |= bitSpecial<<shift; + if(!(bits & bitMarked)) + runtime_printf("found unmarked block %p in %p\n", obj, vp+i); + + // If object has no pointers, don't need to scan further. + if((bits & bitNoPointers) != 0) + continue; + + debug_scanblock(obj, size); + } +} // Get an empty work buffer off the work.empty list, // allocating new buffers as needed. static Workbuf* getempty(Workbuf *b) { - if(b != nil) { - b->nw = nelem(b->w); - b->next = work.full; - work.full = b; - } - b = work.empty; - if(b != nil) { - work.empty = b->next; - return b; + if(work.nproc == 1) { + // Put b on full list. + if(b != nil) { + b->next = work.full; + work.full = b; + } + // Grab from empty list if possible. + b = work.empty; + if(b != nil) { + work.empty = b->next; + goto haveb; + } + } else { + // Put b on full list. + if(b != nil) { + runtime_lock(&work.fmu); + b->next = work.full; + work.full = b; + runtime_unlock(&work.fmu); + } + // Grab from empty list if possible. + runtime_lock(&work.emu); + b = work.empty; + if(b != nil) + work.empty = b->next; + runtime_unlock(&work.emu); + if(b != nil) + goto haveb; } - + + // Need to allocate. + runtime_lock(&work); if(work.nchunk < sizeof *b) { work.nchunk = 1<<20; work.chunk = runtime_SysAlloc(work.nchunk); @@ -304,25 +477,121 @@ getempty(Workbuf *b) b = (Workbuf*)work.chunk; work.chunk += sizeof *b; work.nchunk -= sizeof *b; + runtime_unlock(&work); + +haveb: + b->nobj = 0; return b; } +static void +putempty(Workbuf *b) +{ + if(b == nil) + return; + + if(work.nproc == 1) { + b->next = work.empty; + work.empty = b; + return; + } + + runtime_lock(&work.emu); + b->next = work.empty; + work.empty = b; + runtime_unlock(&work.emu); +} + // Get a full work buffer off the work.full list, or return nil. static Workbuf* getfull(Workbuf *b) { - if(b != nil) { - b->nw = 0; - b->next = work.empty; - work.empty = b; + int32 i; + Workbuf *b1; + + if(work.nproc == 1) { + // Put b on empty list. + if(b != nil) { + b->next = work.empty; + work.empty = b; + } + // Grab from full list if possible. + // Since work.nproc==1, no one else is + // going to give us work. + b = work.full; + if(b != nil) + work.full = b->next; + return b; + } + + putempty(b); + + // Grab buffer from full list if possible. + for(;;) { + b1 = work.full; + if(b1 == nil) + break; + runtime_lock(&work.fmu); + if(work.full != nil) { + b1 = work.full; + work.full = b1->next; + runtime_unlock(&work.fmu); + return b1; + } + runtime_unlock(&work.fmu); + } + + runtime_xadd(&work.nwait, +1); + for(i=0;; i++) { + b1 = work.full; + if(b1 != nil) { + runtime_lock(&work.fmu); + if(work.full != nil) { + runtime_xadd(&work.nwait, -1); + b1 = work.full; + work.full = b1->next; + runtime_unlock(&work.fmu); + return b1; + } + runtime_unlock(&work.fmu); + continue; + } + if(work.nwait == work.nproc) + return nil; + if(i < 10) + runtime_procyield(20); + else if(i < 20) + runtime_osyield(); + else + runtime_usleep(100); } - b = work.full; - if(b != nil) - work.full = b->next; - return b; } -// Scanstack calls scanblock on each of gp's stack segments. +static Workbuf* +handoff(Workbuf *b) +{ + int32 n; + Workbuf *b1; + + // Make new buffer with half of b's pointers. + b1 = getempty(nil); + n = b->nobj/2; + b->nobj -= n; + b1->nobj = n; + runtime_memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]); + nhandoff += n; + + // Put b on full list - let first half of b get stolen. + runtime_lock(&work.fmu); + b->next = work.full; + work.full = b; + runtime_unlock(&work.fmu); + + return b1; +} + +// Markfin calls scanblock on the blocks that have finalizers: +// the things pointed at cannot be freed until the finalizers have run. static void markfin(void *v) { @@ -355,11 +624,22 @@ __go_register_gc_roots (struct root_list* r) roots = r; } -// Mark static void -mark(void) +debug_markfin(void *v) +{ + uintptr size; + + if(!runtime_mlookup(v, (byte**)&v, &size, nil)) + runtime_throw("debug_mark - finalizer inconsistency"); + debug_scanblock(v, size); +} + +// Mark +static void +mark(void (*scan)(byte*, int64)) { struct root_list *pl; + FinBlock *fb; for(pl = roots; pl != nil; pl = pl->next) { struct root* pr = &pl->roots[0]; @@ -372,18 +652,63 @@ mark(void) } } - scanblock((byte*)&m0, sizeof m0); - scanblock((byte*)&finq, sizeof finq); - runtime_MProf_Mark(scanblock); + scan((byte*)&m0, sizeof m0); + scan((byte*)&finq, sizeof finq); + runtime_MProf_Mark(scan); // mark stacks - __go_scanstacks(scanblock); + __go_scanstacks(scan); // mark things pointed at by objects with finalizers - runtime_walkfintab(markfin, scanblock); + if(scan == debug_scanblock) + runtime_walkfintab(debug_markfin, scan); + else + runtime_walkfintab(markfin, scan); + + for(fb=allfin; fb; fb=fb->alllink) + scanblock((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0])); + + // in multiproc mode, join in the queued work. + scan(nil, 0); } -// Sweep frees or calls finalizers for blocks not marked in the mark phase. +static bool +handlespecial(byte *p, uintptr size) +{ + void (*fn)(void*); + const struct __go_func_type *ft; + FinBlock *block; + Finalizer *f; + + if(!runtime_getfinalizer(p, true, &fn, &ft)) { + runtime_setblockspecial(p, false); + runtime_MProf_Free(p, size); + return false; + } + + runtime_lock(&finlock); + if(finq == nil || finq->cnt == finq->cap) { + if(finc == nil) { + finc = runtime_SysAlloc(PageSize); + finc->cap = (PageSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1; + finc->alllink = allfin; + allfin = finc; + } + block = finc; + finc = block->next; + block->next = finq; + finq = block; + } + f = &finq->fin[finq->cnt]; + finq->cnt++; + f->fn = fn; + f->ft = ft; + f->arg = p; + runtime_unlock(&finlock); + return true; +} + +// Sweep frees or collects finalizers for blocks not marked in the mark phase. // It clears the mark bits in preparation for the next GC round. static void sweep(void) @@ -393,9 +718,17 @@ sweep(void) uintptr size; byte *p; MCache *c; - Finalizer *f; + byte *arena_start; + + arena_start = runtime_mheap.arena_start; + + for(;;) { + s = work.spans; + if(s == nil) + break; + if(!runtime_casp(&work.spans, s, s->allnext)) + continue; - for(s = runtime_mheap.allspans; s != nil; s = s->allnext) { if(s->state != MSpanInUse) continue; @@ -410,13 +743,15 @@ sweep(void) npages = runtime_class_to_allocnpages[cl]; n = (npages << PageShift) / size; } - - // sweep through n objects of given size starting at p. + + // Sweep through n objects of given size starting at p. + // This thread owns the span now, so it can manipulate + // the block bitmap without atomic operations. for(; n > 0; n--, p += size) { uintptr off, *bitp, shift, bits; - off = (uintptr*)p - (uintptr*)runtime_mheap.arena_start; - bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; + off = (uintptr*)p - (uintptr*)arena_start; + bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; shift = off % wordsPerBitmapWord; bits = *bitp>>shift; @@ -424,20 +759,21 @@ sweep(void) continue; if((bits & bitMarked) != 0) { + if(DebugMark) { + if(!(bits & bitSpecial)) + runtime_printf("found spurious mark on %p\n", p); + *bitp &= ~(bitSpecial<<shift); + } *bitp &= ~(bitMarked<<shift); continue; } - if((bits & bitSpecial) != 0) { - // Special means it has a finalizer or is being profiled. - f = runtime_getfinalizer(p, 1); - if(f != nil) { - f->arg = p; - f->next = finq; - finq = f; + // Special means it has a finalizer or is being profiled. + // In DebugMark mode, the bit has been coopted so + // we have to assume all blocks are special. + if(DebugMark || (bits & bitSpecial) != 0) { + if(handlespecial(p, size)) continue; - } - runtime_MProf_Free(p, size); } // Mark freed; restore block boundary bit. @@ -464,6 +800,23 @@ sweep(void) static pthread_mutex_t gcsema = PTHREAD_MUTEX_INITIALIZER; +void +runtime_gchelper(void) +{ + // Wait until main proc is ready for mark help. + runtime_lock(&work.markgate); + runtime_unlock(&work.markgate); + scanblock(nil, 0); + + // Wait until main proc is ready for sweep help. + runtime_lock(&work.sweepgate); + runtime_unlock(&work.sweepgate); + sweep(); + + if(runtime_xadd(&work.ndone, +1) == work.nproc-1) + runtime_notewakeup(&work.alldone); +} + // Initialized from $GOGC. GOGC=off means no gc. // // Next gc is after we've allocated an extra amount of @@ -481,7 +834,7 @@ runtime_gc(int32 force __attribute__ ((unused))) int64 t0, t1, t2, t3; uint64 heap0, heap1, obj0, obj1; char *p; - Finalizer *fp; + bool extra; // The gc is turned off (via enablegc) until // the bootstrap has completed. @@ -502,10 +855,16 @@ runtime_gc(int32 force __attribute__ ((unused))) gcpercent = -1; else gcpercent = runtime_atoi(p); - + p = runtime_getenv("GOGCTRACE"); if(p != nil) gctrace = runtime_atoi(p); + + runtime_initlock(&work.fmu); + runtime_initlock(&work.emu); + runtime_initlock(&work.markgate); + runtime_initlock(&work.sweepgate); + runtime_initlock(&work.Lock); } if(gcpercent < 0) return; @@ -522,20 +881,42 @@ runtime_gc(int32 force __attribute__ ((unused))) nlookup = 0; nsizelookup = 0; naddrlookup = 0; + nhandoff = 0; m->gcing = 1; runtime_stoptheworld(); - if(runtime_mheap.Lock.key != 0) - runtime_throw("runtime_mheap locked during gc"); __go_cachestats(); heap0 = mstats.heap_alloc; obj0 = mstats.nmalloc - mstats.nfree; - mark(); + runtime_lock(&work.markgate); + runtime_lock(&work.sweepgate); + + extra = false; + work.nproc = 1; +#if 0 + if(runtime_gomaxprocs > 1 && runtime_ncpu > 1) { + runtime_noteclear(&work.alldone); + work.nproc += runtime_helpgc(&extra); + } +#endif + work.nwait = 0; + work.ndone = 0; + + runtime_unlock(&work.markgate); // let the helpers in + mark(scanblock); + if(DebugMark) + mark(debug_scanblock); t1 = runtime_nanotime(); + + work.spans = runtime_mheap.allspans; + runtime_unlock(&work.sweepgate); // let the helpers in sweep(); + if(work.nproc > 1) + runtime_notesleep(&work.alldone); t2 = runtime_nanotime(); + __go_stealcache(); __go_cachestats(); @@ -553,21 +934,28 @@ runtime_gc(int32 force __attribute__ ((unused))) mstats.numgc++; if(mstats.debuggc) runtime_printf("pause %llu\n", (unsigned long long)t3-t0); - + if(gctrace) { - runtime_printf("gc%d: %llu+%llu+%llu ms %llu -> %llu MB %llu -> %llu (%llu-%llu) objects %llu pointer lookups (%llu size, %llu addr)\n", + runtime_printf("gc%d: %llu+%llu+%llu ms %llu -> %llu MB %llu -> %llu (%llu-%llu) objects %llu pointer lookups (%llu size, %llu addr) %llu handoff\n", mstats.numgc, (unsigned long long)(t1-t0)/1000000, (unsigned long long)(t2-t1)/1000000, (unsigned long long)(t3-t2)/1000000, (unsigned long long)heap0>>20, (unsigned long long)heap1>>20, (unsigned long long)obj0, (unsigned long long)obj1, (unsigned long long)mstats.nmalloc, (unsigned long long)mstats.nfree, - (unsigned long long)nlookup, (unsigned long long)nsizelookup, (unsigned long long)naddrlookup); + (unsigned long long)nlookup, (unsigned long long)nsizelookup, (unsigned long long)naddrlookup, (unsigned long long) nhandoff); } pthread_mutex_unlock(&gcsema); - runtime_starttheworld(); + + // If we could have used another helper proc, start one now, + // in the hope that it will be available next time. + // It would have been even better to start it before the collection, + // but doing so requires allocating memory, so it's tricky to + // coordinate. This lazy approach works out in practice: + // we don't mind if the first couple gc rounds don't have quite + // the maximum number of procs. + runtime_starttheworld(extra); // finqlock is still held. - fp = finq; - if(fp != nil) { + if(finq != nil) { // kick off or wake up goroutine to run queued finalizers if(!finstarted) { __go_go(runfinq, nil); @@ -601,37 +989,44 @@ runtime_UpdateMemStats(void) __go_cachestats(); m->gcing = 0; pthread_mutex_unlock(&gcsema); - runtime_starttheworld(); + runtime_starttheworld(false); } static void runfinq(void* dummy) { - Finalizer *f, *next; + Finalizer *f; + FinBlock *fb, *next; + uint32 i; USED(dummy); for(;;) { pthread_mutex_lock(&finqlock); - f = finq; + fb = finq; finq = nil; - if(f == nil) { + if(fb == nil) { fingwait = 1; pthread_cond_wait(&finqcond, &finqlock); pthread_mutex_unlock(&finqlock); continue; } pthread_mutex_unlock(&finqlock); - for(; f; f=next) { - void *params[1]; - - next = f->next; - params[0] = &f->arg; - reflect_call(f->ft, (void*)f->fn, 0, 0, params, nil); - f->fn = nil; - f->arg = nil; - f->next = nil; - runtime_free(f); + for(; fb; fb=next) { + next = fb->next; + for(i=0; i<(uint32)fb->cnt; i++) { + void *params[1]; + + f = &fb->fin[i]; + params[0] = &f->arg; + runtime_setblockspecial(f->arg, false); + reflect_call(f->ft, (void*)f->fn, 0, 0, params, nil); + f->fn = nil; + f->arg = nil; + } + fb->cnt = 0; + fb->next = finc; + finc = fb; } runtime_gc(1); // trigger another gc to clean up the finalized objects, if possible } @@ -783,6 +1178,9 @@ runtime_blockspecial(void *v) { uintptr *b, off, shift; + if(DebugMark) + return true; + off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; shift = off % wordsPerBitmapWord; @@ -791,17 +1189,23 @@ runtime_blockspecial(void *v) } void -runtime_setblockspecial(void *v) +runtime_setblockspecial(void *v, bool s) { uintptr *b, off, shift, bits, obits; + if(DebugMark) + return; + off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; shift = off % wordsPerBitmapWord; for(;;) { obits = *b; - bits = obits | (bitSpecial<<shift); + if(s) + bits = obits | (bitSpecial<<shift); + else + bits = obits & ~(bitSpecial<<shift); if(runtime_singleproc) { *b = bits; break; @@ -812,7 +1216,7 @@ runtime_setblockspecial(void *v) } } } - + void runtime_MHeap_MapBits(MHeap *h) { @@ -823,7 +1227,7 @@ runtime_MHeap_MapBits(MHeap *h) bitmapChunk = 8192 }; uintptr n; - + n = (h->arena_used - h->arena_start) / wordsPerBitmapWord; n = (n+bitmapChunk-1) & ~(bitmapChunk-1); if(h->bitmap_mapped >= n) diff --git a/libgo/runtime/mheap.c b/libgo/runtime/mheap.c index cacac7d..a49b405 100644 --- a/libgo/runtime/mheap.c +++ b/libgo/runtime/mheap.c @@ -13,6 +13,7 @@ // and heapmap(i) == span for all s->start <= i < s->start+s->npages. #include "runtime.h" +#include "arch.h" #include "malloc.h" static MSpan *MHeap_AllocLocked(MHeap*, uintptr, int32); @@ -102,6 +103,7 @@ HaveSpan: runtime_throw("MHeap_AllocLocked - bad npages"); runtime_MSpanList_Remove(s); s->state = MSpanInUse; + mstats.heap_idle -= s->npages<<PageShift; if(s->npages > npage) { // Trim extra and put it back in the heap. @@ -277,6 +279,7 @@ MHeap_FreeLocked(MHeap *h, MSpan *s) // runtime_printf("MHeap_FreeLocked - span %p ptr %p state %d ref %d\n", s, s->start<<PageShift, s->state, s->ref); runtime_throw("MHeap_FreeLocked - invalid free"); } + mstats.heap_idle += s->npages<<PageShift; s->state = MSpanFree; runtime_MSpanList_Remove(s); sp = (uintptr*)(s->start<<PageShift); diff --git a/libgo/runtime/mprof.goc b/libgo/runtime/mprof.goc index d87be42..23c4f90 100644 --- a/libgo/runtime/mprof.goc +++ b/libgo/runtime/mprof.goc @@ -7,6 +7,7 @@ package runtime #include "runtime.h" +#include "arch.h" #include "malloc.h" #include "defs.h" #include "go-type.h" diff --git a/libgo/runtime/msize.c b/libgo/runtime/msize.c index 6e82885..e2672b0 100644 --- a/libgo/runtime/msize.c +++ b/libgo/runtime/msize.c @@ -26,6 +26,7 @@ // TODO(rsc): Compute max waste for any given size. #include "runtime.h" +#include "arch.h" #include "malloc.h" int32 runtime_class_to_size[NumSizeClasses]; diff --git a/libgo/runtime/proc.c b/libgo/runtime/proc.c index 521bcd64..8af6935 100644 --- a/libgo/runtime/proc.c +++ b/libgo/runtime/proc.c @@ -3,6 +3,7 @@ // license that can be found in the LICENSE file. #include "runtime.h" +#include "arch.h" #include "malloc.h" /* so that acid generated from proc.c includes malloc data structures */ typedef struct Sched Sched; diff --git a/libgo/runtime/runtime.h b/libgo/runtime/runtime.h index ddc99eb..2767dd8 100644 --- a/libgo/runtime/runtime.h +++ b/libgo/runtime/runtime.h @@ -136,7 +136,7 @@ bool __go_sigsend(int32 sig); int64 runtime_nanotime(void); void runtime_stoptheworld(void); -void runtime_starttheworld(void); +void runtime_starttheworld(bool); void __go_go(void (*pfn)(void*), void*); void __go_gc_goroutine_init(void*); void __go_enable_gc(void); @@ -184,18 +184,21 @@ void runtime_notewakeup(Note*); MCache* runtime_allocmcache(void); void free(void *v); struct __go_func_type; -void runtime_addfinalizer(void*, void(*fn)(void*), const struct __go_func_type *); -void runtime_walkfintab(void (*fn)(void*), void (*scan)(byte *, int64)); +bool runtime_addfinalizer(void*, void(*fn)(void*), const struct __go_func_type *); #define runtime_mmap mmap #define runtime_munmap(p, s) munmap((p), (s)) #define runtime_cas(pval, old, new) __sync_bool_compare_and_swap (pval, old, new) #define runtime_casp(pval, old, new) __sync_bool_compare_and_swap (pval, old, new) +#define runtime_xadd(p, v) __sync_add_and_fetch (p, v) void runtime_sigprof(uint8 *pc, uint8 *sp, uint8 *lr); void runtime_cpuprofinit(void); void runtime_resetcpuprofiler(int32); void runtime_setcpuprofilerate(void(*)(uintptr*, int32), int32); uint32 runtime_fastrand1(void); +void runtime_procyield(uint32); +void runtime_osyield(void); +void runtime_usleep(uint32); struct __go_func_type; void reflect_call(const struct __go_func_type *, const void *, _Bool, _Bool, diff --git a/libgo/runtime/sigqueue.goc b/libgo/runtime/sigqueue.goc index 2e47222..3a90868 100644 --- a/libgo/runtime/sigqueue.goc +++ b/libgo/runtime/sigqueue.goc @@ -39,6 +39,7 @@ package runtime #include "config.h" #include "runtime.h" +#include "arch.h" #include "malloc.h" #include "defs.h" diff --git a/libgo/runtime/thread.c b/libgo/runtime/thread.c index bac3f7d..822d5da 100644 --- a/libgo/runtime/thread.c +++ b/libgo/runtime/thread.c @@ -14,19 +14,6 @@ runtime_initlock(Lock *l) runtime_throw("sem_init failed"); } -static uint32 -runtime_xadd(uint32 volatile *val, int32 delta) -{ - uint32 oval, nval; - - for(;;){ - oval = *val; - nval = oval + delta; - if(runtime_cas(val, oval, nval)) - return nval; - } -} - // noinline so that runtime_lock doesn't have to split the stack. static void runtime_lock_full(Lock *l) __attribute__ ((noinline)); diff --git a/libgo/runtime/yield.c b/libgo/runtime/yield.c new file mode 100644 index 0000000..3ebc4a4 --- /dev/null +++ b/libgo/runtime/yield.c @@ -0,0 +1,54 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#define _GNU_SOURCE + +#include "config.h" + +#include <stddef.h> +#include <sys/types.h> +#include <sys/time.h> +#include <pthread.h> +#include <unistd.h> + +#ifdef HAVE_SYS_SELECT_H +#include <sys/select.h> +#endif + +#include "runtime.h" + +/* Spin wait. */ + +void +runtime_procyield (uint32 cnt) +{ + volatile uint32 i; + + for (i = 0; i < cnt; ++i) + { +#if defined (__i386__) || defined (__x86_64__) + __builtin_ia32_pause (); +#endif + } +} + +/* Ask the OS to reschedule this thread. */ + +void +runtime_osyield (void) +{ + pthread_yield (); +} + +/* Sleep for some number of microseconds. */ + +void +runtime_usleep (uint32 us) +{ + struct timeval tv; + + tv.tv_sec = us / 1000000; + tv.tv_usec = us % 1000000; + select (0, NULL, NULL, NULL, &tv); +} |