aboutsummaryrefslogtreecommitdiff
path: root/libgo/runtime
diff options
context:
space:
mode:
authorIan Lance Taylor <ian@gcc.gnu.org>2011-10-26 23:57:58 +0000
committerIan Lance Taylor <ian@gcc.gnu.org>2011-10-26 23:57:58 +0000
commitd8f412571f8768df2d3239e72392dfeabbad1559 (patch)
tree19d182df05ead7ff8ba7ee00a7d57555e1383fdf /libgo/runtime
parente0c39d66d4f0607177b1cf8995dda56a667e07b3 (diff)
downloadgcc-d8f412571f8768df2d3239e72392dfeabbad1559.zip
gcc-d8f412571f8768df2d3239e72392dfeabbad1559.tar.gz
gcc-d8f412571f8768df2d3239e72392dfeabbad1559.tar.bz2
Update Go library to last weekly.
From-SVN: r180552
Diffstat (limited to 'libgo/runtime')
-rw-r--r--libgo/runtime/arch.h8
-rw-r--r--libgo/runtime/cpuprof.c1
-rw-r--r--libgo/runtime/go-append.c1
-rw-r--r--libgo/runtime/go-byte-array-to-string.c1
-rw-r--r--libgo/runtime/go-go.c3
-rw-r--r--libgo/runtime/go-int-array-to-string.c1
-rw-r--r--libgo/runtime/go-int-to-string.c1
-rw-r--r--libgo/runtime/go-main.c1
-rw-r--r--libgo/runtime/go-make-slice.c1
-rw-r--r--libgo/runtime/go-new.c1
-rw-r--r--libgo/runtime/go-note.c2
-rw-r--r--libgo/runtime/go-panic.c1
-rw-r--r--libgo/runtime/go-semacquire.c6
-rw-r--r--libgo/runtime/go-string-to-byte-array.c1
-rw-r--r--libgo/runtime/go-string-to-int-array.c1
-rw-r--r--libgo/runtime/go-strplus.c1
-rw-r--r--libgo/runtime/go-strslice.c1
-rw-r--r--libgo/runtime/goc2c.c28
-rw-r--r--libgo/runtime/malloc.goc55
-rw-r--r--libgo/runtime/malloc.h40
-rw-r--r--libgo/runtime/mcache.c1
-rw-r--r--libgo/runtime/mcentral.c1
-rw-r--r--libgo/runtime/mem.c1
-rw-r--r--libgo/runtime/mem_posix_memalign.c1
-rw-r--r--libgo/runtime/mfinal.c226
-rw-r--r--libgo/runtime/mfixalloc.c1
-rw-r--r--libgo/runtime/mgc0.c634
-rw-r--r--libgo/runtime/mheap.c3
-rw-r--r--libgo/runtime/mprof.goc1
-rw-r--r--libgo/runtime/msize.c1
-rw-r--r--libgo/runtime/proc.c1
-rw-r--r--libgo/runtime/runtime.h9
-rw-r--r--libgo/runtime/sigqueue.goc1
-rw-r--r--libgo/runtime/thread.c13
-rw-r--r--libgo/runtime/yield.c54
35 files changed, 831 insertions, 272 deletions
diff --git a/libgo/runtime/arch.h b/libgo/runtime/arch.h
new file mode 100644
index 0000000..0546a5d
--- /dev/null
+++ b/libgo/runtime/arch.h
@@ -0,0 +1,8 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// FIXME: Ideally CacheLineSize would be dependent on the host architecture.
+enum {
+ CacheLineSize = 64
+};
diff --git a/libgo/runtime/cpuprof.c b/libgo/runtime/cpuprof.c
index 3797e1c..bec15ae 100644
--- a/libgo/runtime/cpuprof.c
+++ b/libgo/runtime/cpuprof.c
@@ -49,6 +49,7 @@
// in the situation when normally the goroutine "owns" handoff.
#include "runtime.h"
+#include "arch.h"
#include "malloc.h"
#include "array.h"
diff --git a/libgo/runtime/go-append.c b/libgo/runtime/go-append.c
index 261d85b..b1e882c 100644
--- a/libgo/runtime/go-append.c
+++ b/libgo/runtime/go-append.c
@@ -8,6 +8,7 @@
#include "go-panic.h"
#include "array.h"
#include "runtime.h"
+#include "arch.h"
#include "malloc.h"
/* We should be OK if we don't split the stack here, since the only
diff --git a/libgo/runtime/go-byte-array-to-string.c b/libgo/runtime/go-byte-array-to-string.c
index ab9e283..cfe1906 100644
--- a/libgo/runtime/go-byte-array-to-string.c
+++ b/libgo/runtime/go-byte-array-to-string.c
@@ -6,6 +6,7 @@
#include "go-string.h"
#include "runtime.h"
+#include "arch.h"
#include "malloc.h"
struct __go_string
diff --git a/libgo/runtime/go-go.c b/libgo/runtime/go-go.c
index 1391620..d56b8b1 100644
--- a/libgo/runtime/go-go.c
+++ b/libgo/runtime/go-go.c
@@ -17,6 +17,7 @@
#include "go-panic.h"
#include "go-alloc.h"
#include "runtime.h"
+#include "arch.h"
#include "malloc.h"
#ifdef USING_SPLIT_STACK
@@ -561,7 +562,7 @@ __go_cachestats (void)
/* Start the other threads after garbage collection. */
void
-runtime_starttheworld (void)
+runtime_starttheworld (bool extra __attribute__ ((unused)))
{
int i;
pthread_t me;
diff --git a/libgo/runtime/go-int-array-to-string.c b/libgo/runtime/go-int-array-to-string.c
index ec07b87..1a37879 100644
--- a/libgo/runtime/go-int-array-to-string.c
+++ b/libgo/runtime/go-int-array-to-string.c
@@ -7,6 +7,7 @@
#include "go-assert.h"
#include "go-string.h"
#include "runtime.h"
+#include "arch.h"
#include "malloc.h"
struct __go_string
diff --git a/libgo/runtime/go-int-to-string.c b/libgo/runtime/go-int-to-string.c
index af58015..e9645bf 100644
--- a/libgo/runtime/go-int-to-string.c
+++ b/libgo/runtime/go-int-to-string.c
@@ -6,6 +6,7 @@
#include "go-string.h"
#include "runtime.h"
+#include "arch.h"
#include "malloc.h"
struct __go_string
diff --git a/libgo/runtime/go-main.c b/libgo/runtime/go-main.c
index 37956d5..927a36c 100644
--- a/libgo/runtime/go-main.c
+++ b/libgo/runtime/go-main.c
@@ -19,6 +19,7 @@
#include "go-string.h"
#include "runtime.h"
+#include "arch.h"
#include "malloc.h"
#undef int
diff --git a/libgo/runtime/go-make-slice.c b/libgo/runtime/go-make-slice.c
index d0e8369..a818a7f 100644
--- a/libgo/runtime/go-make-slice.c
+++ b/libgo/runtime/go-make-slice.c
@@ -12,6 +12,7 @@
#include "go-type.h"
#include "array.h"
#include "runtime.h"
+#include "arch.h"
#include "malloc.h"
struct __go_open_array
diff --git a/libgo/runtime/go-new.c b/libgo/runtime/go-new.c
index 657978c..b1af5f2 100644
--- a/libgo/runtime/go-new.c
+++ b/libgo/runtime/go-new.c
@@ -6,6 +6,7 @@
#include "go-alloc.h"
#include "runtime.h"
+#include "arch.h"
#include "malloc.h"
void *
diff --git a/libgo/runtime/go-note.c b/libgo/runtime/go-note.c
index 2b80b9b..62c229f 100644
--- a/libgo/runtime/go-note.c
+++ b/libgo/runtime/go-note.c
@@ -12,7 +12,7 @@
#include "runtime.h"
/* We use a single global lock and condition variable. It would be
- better to use a futex on Linux. */
+ better to use a futex on GNU/Linux. */
static pthread_mutex_t note_lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t note_cond = PTHREAD_COND_INITIALIZER;
diff --git a/libgo/runtime/go-panic.c b/libgo/runtime/go-panic.c
index c39ea9f..f3e182d 100644
--- a/libgo/runtime/go-panic.c
+++ b/libgo/runtime/go-panic.c
@@ -8,6 +8,7 @@
#include <stdlib.h>
#include "runtime.h"
+#include "arch.h"
#include "malloc.h"
#include "go-alloc.h"
#include "go-defer.h"
diff --git a/libgo/runtime/go-semacquire.c b/libgo/runtime/go-semacquire.c
index 40fe2af..05b6377 100644
--- a/libgo/runtime/go-semacquire.c
+++ b/libgo/runtime/go-semacquire.c
@@ -13,9 +13,9 @@
/* We use a single global lock and condition variable. This is
painful, since it will cause unnecessary contention, but is hard to
- avoid in a portable manner. On Linux we can use futexes, but they
- are unfortunately not exposed by libc and are thus also hard to use
- portably. */
+ avoid in a portable manner. On GNU/Linux we can use futexes, but
+ they are unfortunately not exposed by libc and are thus also hard
+ to use portably. */
static pthread_mutex_t sem_lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t sem_cond = PTHREAD_COND_INITIALIZER;
diff --git a/libgo/runtime/go-string-to-byte-array.c b/libgo/runtime/go-string-to-byte-array.c
index 10c565e..8bae54b 100644
--- a/libgo/runtime/go-string-to-byte-array.c
+++ b/libgo/runtime/go-string-to-byte-array.c
@@ -7,6 +7,7 @@
#include "go-string.h"
#include "array.h"
#include "runtime.h"
+#include "arch.h"
#include "malloc.h"
struct __go_open_array
diff --git a/libgo/runtime/go-string-to-int-array.c b/libgo/runtime/go-string-to-int-array.c
index f59df67..aff1468 100644
--- a/libgo/runtime/go-string-to-int-array.c
+++ b/libgo/runtime/go-string-to-int-array.c
@@ -8,6 +8,7 @@
#include "go-string.h"
#include "array.h"
#include "runtime.h"
+#include "arch.h"
#include "malloc.h"
struct __go_open_array
diff --git a/libgo/runtime/go-strplus.c b/libgo/runtime/go-strplus.c
index e4dea9c..bfbe341 100644
--- a/libgo/runtime/go-strplus.c
+++ b/libgo/runtime/go-strplus.c
@@ -6,6 +6,7 @@
#include "go-string.h"
#include "runtime.h"
+#include "arch.h"
#include "malloc.h"
struct __go_string
diff --git a/libgo/runtime/go-strslice.c b/libgo/runtime/go-strslice.c
index 94ecee9..40ccac6 100644
--- a/libgo/runtime/go-strslice.c
+++ b/libgo/runtime/go-strslice.c
@@ -7,6 +7,7 @@
#include "go-string.h"
#include "go-panic.h"
#include "runtime.h"
+#include "arch.h"
#include "malloc.h"
struct __go_string
diff --git a/libgo/runtime/goc2c.c b/libgo/runtime/goc2c.c
index 32fbceb..fe413fe 100644
--- a/libgo/runtime/goc2c.c
+++ b/libgo/runtime/goc2c.c
@@ -219,13 +219,14 @@ getchar_skipping_comments(void)
}
/*
- * Read and return a token. Tokens are delimited by whitespace or by
- * [(),{}]. The latter are all returned as single characters.
+ * Read and return a token. Tokens are string or character literals
+ * or else delimited by whitespace or by [(),{}].
+ * The latter are all returned as single characters.
*/
static char *
read_token(void)
{
- int c;
+ int c, q;
char *buf;
unsigned int alc, off;
const char* delims = "(),{}";
@@ -240,7 +241,26 @@ read_token(void)
alc = 16;
buf = xmalloc(alc + 1);
off = 0;
- if (strchr(delims, c) != NULL) {
+ if(c == '"' || c == '\'') {
+ q = c;
+ buf[off] = c;
+ ++off;
+ while (1) {
+ if (off+2 >= alc) { // room for c and maybe next char
+ alc *= 2;
+ buf = xrealloc(buf, alc + 1);
+ }
+ c = getchar_no_eof();
+ buf[off] = c;
+ ++off;
+ if(c == q)
+ break;
+ if(c == '\\') {
+ buf[off] = getchar_no_eof();
+ ++off;
+ }
+ }
+ } else if (strchr(delims, c) != NULL) {
buf[off] = c;
++off;
} else {
diff --git a/libgo/runtime/malloc.goc b/libgo/runtime/malloc.goc
index 2ea69ee..f8d4327 100644
--- a/libgo/runtime/malloc.goc
+++ b/libgo/runtime/malloc.goc
@@ -12,6 +12,7 @@ package runtime
#include <stdlib.h>
#include "go-alloc.h"
#include "runtime.h"
+#include "arch.h"
#include "malloc.h"
#include "go-string.h"
#include "interface.h"
@@ -96,11 +97,12 @@ runtime_mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed)
m->mcache->next_sample -= size;
else {
// pick next profile time
+ // If you change this, also change allocmcache.
if(rate > 0x3fffffff) // make 2*rate not overflow
rate = 0x3fffffff;
m->mcache->next_sample = runtime_fastrand1() % (2*rate);
profile:
- runtime_setblockspecial(v);
+ runtime_setblockspecial(v, true);
runtime_MProf_Malloc(v, size);
}
}
@@ -224,6 +226,7 @@ runtime_mlookup(void *v, byte **base, uintptr *size, MSpan **sp)
MCache*
runtime_allocmcache(void)
{
+ int32 rate;
MCache *c;
if(!__sync_bool_compare_and_swap(&m->mallocing, 0, 1))
@@ -239,6 +242,13 @@ runtime_allocmcache(void)
mstats.mcache_sys = runtime_mheap.cachealloc.sys;
runtime_unlock(&runtime_mheap);
+ // Set first allocation sample size.
+ rate = runtime_MemProfileRate;
+ if(rate > 0x3fffffff) // make 2*rate not overflow
+ rate = 0x3fffffff;
+ if(rate != 0)
+ c->next_sample = runtime_fastrand1() % (2*rate);
+
__sync_bool_compare_and_swap(&m->mallocing, 1, 0);
if(__sync_bool_compare_and_swap(&m->gcing, 1, 0))
__go_run_goroutine_gc(2);
@@ -280,6 +290,7 @@ runtime_mallocinit(void)
byte *p;
uintptr arena_size, bitmap_size;
extern byte end[];
+ byte *want;
runtime_sizeof_C_MStats = sizeof(MStats);
@@ -341,9 +352,13 @@ runtime_mallocinit(void)
// not as an absolute requirement. If we ask for the end
// of the data segment but the operating system requires
// a little more space before we can start allocating, it will
- // give out a slightly higher pointer. That's fine.
- // Run with what we get back.
- p = runtime_SysReserve(end, bitmap_size + arena_size);
+ // give out a slightly higher pointer. Except QEMU, which
+ // is buggy, as usual: it won't adjust the pointer upward.
+ // So adjust it upward a little bit ourselves: 1/4 MB to get
+ // away from the running binary image and then round up
+ // to a MB boundary.
+ want = (byte*)(((uintptr)end + (1<<18) + (1<<20) - 1)&~((1<<20)-1));
+ p = runtime_SysReserve(want, bitmap_size + arena_size);
if(p == nil)
runtime_throw("runtime: cannot reserve arena virtual address space");
}
@@ -418,8 +433,9 @@ runtime_mal(uintptr n)
return runtime_mallocgc(n, 0, 1, 1);
}
-func new(n uint32) (ret *uint8) {
- ret = runtime_mal(n);
+func new(typ *Type) (ret *uint8) {
+ uint32 flag = typ->__code&GO_NO_POINTERS ? FlagNoPointers : 0;
+ ret = runtime_mallocgc(typ->__size, flag, 1, 1);
}
func Alloc(n uintptr) (p *byte) {
@@ -444,9 +460,8 @@ func SetFinalizer(obj Eface, finalizer Eface) {
const FuncType *ft;
if(obj.__type_descriptor == nil) {
- // runtime_printf("runtime.SetFinalizer: first argument is nil interface\n");
- throw:
- runtime_throw("runtime.SetFinalizer");
+ // runtime·printf("runtime.SetFinalizer: first argument is nil interface\n");
+ goto throw;
}
if(obj.__type_descriptor->__code != GO_PTR) {
// runtime_printf("runtime.SetFinalizer: first argument is %S, not pointer\n", *obj.type->string);
@@ -458,19 +473,21 @@ func SetFinalizer(obj Eface, finalizer Eface) {
}
ft = nil;
if(finalizer.__type_descriptor != nil) {
- if(finalizer.__type_descriptor->__code != GO_FUNC) {
- badfunc:
- // runtime_printf("runtime.SetFinalizer: second argument is %S, not func(%S)\n", *finalizer.type->string, *obj.type->string);
- goto throw;
- }
+ if(finalizer.__type_descriptor->__code != GO_FUNC)
+ goto badfunc;
ft = (const FuncType*)finalizer.__type_descriptor;
if(ft->__dotdotdot || ft->__in.__count != 1 || !__go_type_descriptors_equal(*(Type**)ft->__in.__values, obj.__type_descriptor))
goto badfunc;
+ }
- if(runtime_getfinalizer(obj.__object, 0)) {
- // runtime_printf("runtime.SetFinalizer: finalizer already set");
- goto throw;
- }
+ if(!runtime_addfinalizer(obj.__object, finalizer.__type_descriptor != nil ? *(void**)finalizer.__object : nil, ft)) {
+ runtime_printf("runtime.SetFinalizer: finalizer already set\n");
+ goto throw;
}
- runtime_addfinalizer(obj.__object, finalizer.__type_descriptor != nil ? *(void**)finalizer.__object : nil, ft);
+ return;
+
+badfunc:
+ // runtime_printf("runtime.SetFinalizer: second argument is %S, not func(%S)\n", *finalizer.type->string, *obj.type->string);
+throw:
+ runtime_throw("runtime.SetFinalizer");
}
diff --git a/libgo/runtime/malloc.h b/libgo/runtime/malloc.h
index 3e813bb..1ccc2f0 100644
--- a/libgo/runtime/malloc.h
+++ b/libgo/runtime/malloc.h
@@ -120,6 +120,13 @@ enum
#else
MHeapMap_Bits = 20,
#endif
+
+ // Max number of threads to run garbage collection.
+ // 2, 3, and 4 are all plausible maximums depending
+ // on the hardware details of the machine. The second
+ // proc is the one that helps the most (after the first),
+ // so start with just 2 for now.
+ MaxGcproc = 2,
};
// A generic linked list of blocks. (Typically the block is bigger than sizeof(MLink).)
@@ -192,7 +199,7 @@ struct MStats
uint64 nlookup; // number of pointer lookups
uint64 nmalloc; // number of mallocs
uint64 nfree; // number of frees
-
+
// Statistics about malloc heap.
// protected by mheap.Lock
uint64 heap_alloc; // bytes allocated and still in use
@@ -210,7 +217,7 @@ struct MStats
uint64 mcache_inuse; // MCache structures
uint64 mcache_sys;
uint64 buckhash_sys; // profiling bucket hash table
-
+
// Statistics about garbage collector.
// Protected by stopping the world during GC.
uint64 next_gc; // next GC (in heap_alloc time)
@@ -219,7 +226,7 @@ struct MStats
uint32 numgc;
bool enablegc;
bool debuggc;
-
+
// Statistics about allocation size classes.
struct {
uint32 size;
@@ -240,7 +247,7 @@ extern MStats mstats
//
// class_to_size[i] = largest size in class i
// class_to_allocnpages[i] = number of pages to allocate when
-// making new objects in class i
+// making new objects in class i
// class_to_transfercount[i] = number of objects to move when
// taking a bunch of objects out of the central lists
// and putting them in the thread free list.
@@ -279,7 +286,7 @@ struct MCache
int64 nmalloc;
int64 nfree;
} local_by_size[NumSizeClasses];
-
+
};
void* runtime_MCache_Alloc(MCache *c, int32 sizeclass, uintptr size, int32 zeroed);
@@ -352,14 +359,14 @@ struct MHeap
byte *arena_start;
byte *arena_used;
byte *arena_end;
-
+
// central free lists for small size classes.
// the union makes sure that the MCentrals are
- // spaced 64 bytes apart, so that each MCentral.Lock
+ // spaced CacheLineSize bytes apart, so that each MCentral.Lock
// gets its own cache line.
union {
MCentral;
- byte pad[64];
+ byte pad[CacheLineSize];
} central[NumSizeClasses];
FixAlloc spanalloc; // allocator for Span*
@@ -387,7 +394,7 @@ int32 runtime_checking;
void runtime_markspan(void *v, uintptr size, uintptr n, bool leftover);
void runtime_unmarkspan(void *v, uintptr size);
bool runtime_blockspecial(void*);
-void runtime_setblockspecial(void*);
+void runtime_setblockspecial(void*, bool);
void runtime_purgecachedstats(M*);
enum
@@ -402,6 +409,8 @@ void runtime_Mprof_Init(void);
void runtime_MProf_Malloc(void*, uintptr);
void runtime_MProf_Free(void*, uintptr);
void runtime_MProf_Mark(void (*scan)(byte *, int64));
+int32 runtime_helpgc(bool*);
+void runtime_gchelper(void);
// Malloc profiling settings.
// Must match definition in extern.go.
@@ -412,13 +421,6 @@ enum {
};
extern int32 runtime_malloc_profile;
-typedef struct Finalizer Finalizer;
-struct Finalizer
-{
- Finalizer *next; // for use by caller of getfinalizer
- void (*fn)(void*);
- void *arg;
- const struct __go_func_type *ft;
-};
-
-Finalizer* runtime_getfinalizer(void*, bool);
+struct __go_func_type;
+bool runtime_getfinalizer(void *p, bool del, void (**fn)(void*), const struct __go_func_type **ft);
+void runtime_walkfintab(void (*fn)(void*), void (*scan)(byte*, int64));
diff --git a/libgo/runtime/mcache.c b/libgo/runtime/mcache.c
index 191b0d1..6c60aeb 100644
--- a/libgo/runtime/mcache.c
+++ b/libgo/runtime/mcache.c
@@ -7,6 +7,7 @@
// See malloc.h for an overview.
#include "runtime.h"
+#include "arch.h"
#include "malloc.h"
void*
diff --git a/libgo/runtime/mcentral.c b/libgo/runtime/mcentral.c
index cd3d6ca..b98a8d3 100644
--- a/libgo/runtime/mcentral.c
+++ b/libgo/runtime/mcentral.c
@@ -15,6 +15,7 @@
// so that it is faster to move those lists between MCaches and MCentrals.
#include "runtime.h"
+#include "arch.h"
#include "malloc.h"
static bool MCentral_Grow(MCentral *c);
diff --git a/libgo/runtime/mem.c b/libgo/runtime/mem.c
index 90c2c61..4267c55 100644
--- a/libgo/runtime/mem.c
+++ b/libgo/runtime/mem.c
@@ -2,6 +2,7 @@
#include <unistd.h>
#include "runtime.h"
+#include "arch.h"
#include "malloc.h"
#ifndef MAP_ANON
diff --git a/libgo/runtime/mem_posix_memalign.c b/libgo/runtime/mem_posix_memalign.c
index 2318be8..7d04f99 100644
--- a/libgo/runtime/mem_posix_memalign.c
+++ b/libgo/runtime/mem_posix_memalign.c
@@ -1,6 +1,7 @@
#include <errno.h>
#include "runtime.h"
+#include "arch.h"
#include "malloc.h"
void*
diff --git a/libgo/runtime/mfinal.c b/libgo/runtime/mfinal.c
index 04d58dd..db9a4fd 100644
--- a/libgo/runtime/mfinal.c
+++ b/libgo/runtime/mfinal.c
@@ -3,18 +3,17 @@
// license that can be found in the LICENSE file.
#include "runtime.h"
+#include "arch.h"
#include "malloc.h"
-// Lock to protect finalizer data structures.
-// Cannot reuse mheap.Lock because the finalizer
-// maintenance requires allocation.
-static Lock finlock;
+enum { debug = 0 };
-void
-runtime_initfintab()
+typedef struct Fin Fin;
+struct Fin
{
- runtime_initlock(&finlock);
-}
+ void (*fn)(void*);
+ const struct __go_func_type *ft;
+};
// Finalizer hash table. Direct hash, linear scan, at most 3/4 full.
// Table size is power of 3 so that hash can be key % max.
@@ -26,25 +25,43 @@ runtime_initfintab()
typedef struct Fintab Fintab;
struct Fintab
{
- void **key;
- Finalizer **val;
+ Lock;
+ void **fkey;
+ Fin *val;
int32 nkey; // number of non-nil entries in key
int32 ndead; // number of dead (-1) entries in key
int32 max; // size of key, val allocations
};
+#define TABSZ 17
+#define TAB(p) (&fintab[((uintptr)(p)>>3)%TABSZ])
+
+static struct {
+ Fintab;
+ uint8 pad[0 /* CacheLineSize - sizeof(Fintab) */];
+} fintab[TABSZ];
+
+void
+runtime_initfintab()
+{
+ int32 i;
+
+ for(i=0; i<TABSZ; i++)
+ runtime_initlock(&fintab[i]);
+}
+
static void
-addfintab(Fintab *t, void *k, Finalizer *v)
+addfintab(Fintab *t, void *k, void (*fn)(void*), const struct __go_func_type *ft)
{
int32 i, j;
i = (uintptr)k % (uintptr)t->max;
for(j=0; j<t->max; j++) {
- if(t->key[i] == nil) {
+ if(t->fkey[i] == nil) {
t->nkey++;
goto ret;
}
- if(t->key[i] == (void*)-1) {
+ if(t->fkey[i] == (void*)-1) {
t->ndead--;
goto ret;
}
@@ -56,30 +73,32 @@ addfintab(Fintab *t, void *k, Finalizer *v)
runtime_throw("finalizer table inconsistent");
ret:
- t->key[i] = k;
- t->val[i] = v;
+ t->fkey[i] = k;
+ t->val[i].fn = fn;
+ t->val[i].ft = ft;
}
-static Finalizer*
-lookfintab(Fintab *t, void *k, bool del)
+static bool
+lookfintab(Fintab *t, void *k, bool del, Fin *f)
{
int32 i, j;
- Finalizer *v;
if(t->max == 0)
- return nil;
+ return false;
i = (uintptr)k % (uintptr)t->max;
for(j=0; j<t->max; j++) {
- if(t->key[i] == nil)
- return nil;
- if(t->key[i] == k) {
- v = t->val[i];
+ if(t->fkey[i] == nil)
+ return false;
+ if(t->fkey[i] == k) {
+ if(f)
+ *f = t->val[i];
if(del) {
- t->key[i] = (void*)-1;
- t->val[i] = nil;
+ t->fkey[i] = (void*)-1;
+ t->val[i].fn = nil;
+ t->val[i].ft = nil;
t->ndead++;
}
- return v;
+ return true;
}
if(++i == t->max)
i = 0;
@@ -87,108 +106,123 @@ lookfintab(Fintab *t, void *k, bool del)
// cannot happen - table is known to be non-full
runtime_throw("finalizer table inconsistent");
- return nil;
+ return false;
}
-static Fintab fintab;
-
-// add finalizer; caller is responsible for making sure not already in table
-void
-runtime_addfinalizer(void *p, void (*f)(void*), const struct __go_func_type *ft)
+static void
+resizefintab(Fintab *tab)
{
Fintab newtab;
+ void *k;
int32 i;
- byte *base;
- Finalizer *e;
+
+ runtime_memclr((byte*)&newtab, sizeof newtab);
+ newtab.max = tab->max;
+ if(newtab.max == 0)
+ newtab.max = 3*3*3;
+ else if(tab->ndead < tab->nkey/2) {
+ // grow table if not many dead values.
+ // otherwise just rehash into table of same size.
+ newtab.max *= 3;
+ }
+
+ newtab.fkey = runtime_mallocgc(newtab.max*sizeof newtab.fkey[0], FlagNoPointers, 0, 1);
+ newtab.val = runtime_mallocgc(newtab.max*sizeof newtab.val[0], 0, 0, 1);
- e = nil;
- if(f != nil) {
- e = runtime_mal(sizeof *e);
- e->fn = f;
- e->ft = ft;
+ for(i=0; i<tab->max; i++) {
+ k = tab->fkey[i];
+ if(k != nil && k != (void*)-1)
+ addfintab(&newtab, k, tab->val[i].fn, tab->val[i].ft);
}
+
+ runtime_free(tab->fkey);
+ runtime_free(tab->val);
+
+ tab->fkey = newtab.fkey;
+ tab->val = newtab.val;
+ tab->nkey = newtab.nkey;
+ tab->ndead = newtab.ndead;
+ tab->max = newtab.max;
+}
+bool
+runtime_addfinalizer(void *p, void (*f)(void*), const struct __go_func_type *ft)
+{
+ Fintab *tab;
+ byte *base;
+ bool ret = false;
+
+ if(debug) {
+ if(!runtime_mlookup(p, &base, nil, nil) || p != base)
+ runtime_throw("addfinalizer on invalid pointer");
+ }
+
if(!__sync_bool_compare_and_swap(&m->holds_finlock, 0, 1))
runtime_throw("finalizer deadlock");
- runtime_lock(&finlock);
- if(!runtime_mlookup(p, &base, nil, nil) || p != base) {
- runtime_unlock(&finlock);
- __sync_bool_compare_and_swap(&m->holds_finlock, 1, 0);
- runtime_throw("addfinalizer on invalid pointer");
- }
+ tab = TAB(p);
+ runtime_lock(tab);
if(f == nil) {
- lookfintab(&fintab, p, 1);
+ if(lookfintab(tab, p, true, nil))
+ runtime_setblockspecial(p, false);
+ ret = true;
goto unlock;
}
- if(lookfintab(&fintab, p, 0)) {
- runtime_unlock(&finlock);
- __sync_bool_compare_and_swap(&m->holds_finlock, 1, 0);
- runtime_throw("double finalizer");
+ if(lookfintab(tab, p, false, nil)) {
+ ret = false;
+ goto unlock;
}
- runtime_setblockspecial(p);
- if(fintab.nkey >= fintab.max/2+fintab.max/4) {
+ if(tab->nkey >= tab->max/2+tab->max/4) {
// keep table at most 3/4 full:
// allocate new table and rehash.
-
- runtime_memclr((byte*)&newtab, sizeof newtab);
- newtab.max = fintab.max;
- if(newtab.max == 0)
- newtab.max = 3*3*3;
- else if(fintab.ndead < fintab.nkey/2) {
- // grow table if not many dead values.
- // otherwise just rehash into table of same size.
- newtab.max *= 3;
- }
-
- newtab.key = runtime_mallocgc(newtab.max*sizeof newtab.key[0], FlagNoPointers, 0, 1);
- newtab.val = runtime_mallocgc(newtab.max*sizeof newtab.val[0], 0, 0, 1);
-
- for(i=0; i<fintab.max; i++) {
- void *k;
-
- k = fintab.key[i];
- if(k != nil && k != (void*)-1)
- addfintab(&newtab, k, fintab.val[i]);
- }
- runtime_free(fintab.key);
- runtime_free(fintab.val);
- fintab = newtab;
+ resizefintab(tab);
}
- addfintab(&fintab, p, e);
+ addfintab(tab, p, f, ft);
+ runtime_setblockspecial(p, true);
+ ret = true;
+
unlock:
- runtime_unlock(&finlock);
+ runtime_unlock(tab);
__sync_bool_compare_and_swap(&m->holds_finlock, 1, 0);
if(__sync_bool_compare_and_swap(&m->gcing_for_finlock, 1, 0)) {
__go_run_goroutine_gc(200);
}
+
+ return ret;
}
// get finalizer; if del, delete finalizer.
-// caller is responsible for updating RefHasFinalizer bit.
-Finalizer*
-runtime_getfinalizer(void *p, bool del)
+// caller is responsible for updating RefHasFinalizer (special) bit.
+bool
+runtime_getfinalizer(void *p, bool del, void (**fn)(void*), const struct __go_func_type **ft)
{
- Finalizer *f;
+ Fintab *tab;
+ bool res;
+ Fin f;
if(!__sync_bool_compare_and_swap(&m->holds_finlock, 0, 1))
runtime_throw("finalizer deadlock");
- runtime_lock(&finlock);
- f = lookfintab(&fintab, p, del);
- runtime_unlock(&finlock);
+ tab = TAB(p);
+ runtime_lock(tab);
+ res = lookfintab(tab, p, del, &f);
+ runtime_unlock(tab);
__sync_bool_compare_and_swap(&m->holds_finlock, 1, 0);
if(__sync_bool_compare_and_swap(&m->gcing_for_finlock, 1, 0)) {
__go_run_goroutine_gc(201);
}
- return f;
+ if(res==false)
+ return false;
+ *fn = f.fn;
+ *ft = f.ft;
+ return true;
}
void
@@ -196,18 +230,22 @@ runtime_walkfintab(void (*fn)(void*), void (*scan)(byte *, int64))
{
void **key;
void **ekey;
+ int32 i;
if(!__sync_bool_compare_and_swap(&m->holds_finlock, 0, 1))
runtime_throw("finalizer deadlock");
- scan((byte*)&fintab, sizeof fintab);
- runtime_lock(&finlock);
- key = fintab.key;
- ekey = key + fintab.max;
- for(; key < ekey; key++)
- if(*key != nil && *key != ((void*)-1))
- fn(*key);
- runtime_unlock(&finlock);
+ for(i=0; i<TABSZ; i++) {
+ runtime_lock(&fintab[i]);
+ key = fintab[i].fkey;
+ ekey = key + fintab[i].max;
+ for(; key < ekey; key++)
+ if(*key != nil && *key != ((void*)-1))
+ fn(*key);
+ scan((byte*)&fintab[i].fkey, sizeof(void*));
+ scan((byte*)&fintab[i].val, sizeof(void*));
+ runtime_unlock(&fintab[i]);
+ }
__sync_bool_compare_and_swap(&m->holds_finlock, 1, 0);
if(__sync_bool_compare_and_swap(&m->gcing_for_finlock, 1, 0)) {
diff --git a/libgo/runtime/mfixalloc.c b/libgo/runtime/mfixalloc.c
index c05583d..109cfe8 100644
--- a/libgo/runtime/mfixalloc.c
+++ b/libgo/runtime/mfixalloc.c
@@ -7,6 +7,7 @@
// See malloc.h for overview.
#include "runtime.h"
+#include "arch.h"
#include "malloc.h"
// Initialize f to allocate objects of the given size,
diff --git a/libgo/runtime/mgc0.c b/libgo/runtime/mgc0.c
index 900ebde..cb58525 100644
--- a/libgo/runtime/mgc0.c
+++ b/libgo/runtime/mgc0.c
@@ -5,13 +5,14 @@
// Garbage collector.
#include "runtime.h"
+#include "arch.h"
#include "malloc.h"
enum {
Debug = 0,
- UseCas = 1,
PtrSize = sizeof(void*),
-
+ DebugMark = 0, // run second pass to check mark
+
// Four bits per word (see #defines below).
wordsPerBitmapWord = sizeof(void*)*8/4,
bitShift = sizeof(void*)*8/4,
@@ -50,28 +51,72 @@ enum {
#define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial)
+// TODO: Make these per-M.
static uint64 nlookup;
static uint64 nsizelookup;
static uint64 naddrlookup;
+static uint64 nhandoff;
+
static int32 gctrace;
typedef struct Workbuf Workbuf;
struct Workbuf
{
Workbuf *next;
- uintptr nw;
- byte *w[2048-2];
+ uintptr nobj;
+ byte *obj[512-2];
+};
+
+typedef struct Finalizer Finalizer;
+struct Finalizer
+{
+ void (*fn)(void*);
+ void *arg;
+ const struct __go_func_type *ft;
+};
+
+typedef struct FinBlock FinBlock;
+struct FinBlock
+{
+ FinBlock *alllink;
+ FinBlock *next;
+ int32 cnt;
+ int32 cap;
+ Finalizer fin[1];
};
static bool finstarted;
static pthread_mutex_t finqlock = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t finqcond = PTHREAD_COND_INITIALIZER;
-static Finalizer *finq;
+static FinBlock *finq; // list of finalizers that are to be executed
+static FinBlock *finc; // cache of free blocks
+static FinBlock *allfin; // list of all blocks
+static Lock finlock;
static int32 fingwait;
static void runfinq(void*);
static Workbuf* getempty(Workbuf*);
static Workbuf* getfull(Workbuf*);
+static void putempty(Workbuf*);
+static Workbuf* handoff(Workbuf*);
+
+static struct {
+ Lock fmu;
+ Workbuf *full;
+ Lock emu;
+ Workbuf *empty;
+ uint32 nproc;
+ volatile uint32 nwait;
+ volatile uint32 ndone;
+ Note alldone;
+ Lock markgate;
+ Lock sweepgate;
+ MSpan *spans;
+
+ Lock;
+ byte *chunk;
+ uintptr nchunk;
+} work;
// scanblock scans a block of n bytes starting at pointer b for references
// to other objects, scanning any it finds recursively until there are no
@@ -82,13 +127,14 @@ static Workbuf* getfull(Workbuf*);
static void
scanblock(byte *b, int64 n)
{
- byte *obj, *arena_start, *p;
+ byte *obj, *arena_start, *arena_used, *p;
void **vp;
- uintptr size, *bitp, bits, shift, i, j, x, xbits, off;
+ uintptr size, *bitp, bits, shift, i, j, x, xbits, off, nobj, nproc;
MSpan *s;
PageID k;
- void **bw, **w, **ew;
+ void **wp;
Workbuf *wbuf;
+ bool keepworking;
if((int64)(uintptr)n != n || n < 0) {
// runtime_printf("scanblock %p %lld\n", b, (long long)n);
@@ -97,11 +143,19 @@ scanblock(byte *b, int64 n)
// Memory arena parameters.
arena_start = runtime_mheap.arena_start;
-
+ arena_used = runtime_mheap.arena_used;
+ nproc = work.nproc;
+
wbuf = nil; // current work buffer
- ew = nil; // end of work buffer
- bw = nil; // beginning of work buffer
- w = nil; // current pointer into work buffer
+ wp = nil; // storage for next queued pointer (write pointer)
+ nobj = 0; // number of queued objects
+
+ // Scanblock helpers pass b==nil.
+ // The main proc needs to return to make more
+ // calls to scanblock. But if work.nproc==1 then
+ // might as well process blocks as soon as we
+ // have them.
+ keepworking = b == nil || work.nproc == 1;
// Align b to a word boundary.
off = (uintptr)b & (PtrSize-1);
@@ -117,17 +171,17 @@ scanblock(byte *b, int64 n)
runtime_printf("scanblock %p %lld\n", b, (long long) n);
vp = (void**)b;
- n /= PtrSize;
+ n >>= (2+PtrSize/8); /* n /= PtrSize (4 or 8) */
for(i=0; i<(uintptr)n; i++) {
obj = (byte*)vp[i];
-
+
// Words outside the arena cannot be pointers.
- if((byte*)obj < arena_start || (byte*)obj >= runtime_mheap.arena_used)
+ if((byte*)obj < arena_start || (byte*)obj >= arena_used)
continue;
-
+
// obj may be a pointer to a live object.
// Try to find the beginning of the object.
-
+
// Round down to word boundary.
obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
@@ -185,47 +239,72 @@ scanblock(byte *b, int64 n)
found:
// Now we have bits, bitp, and shift correct for
// obj pointing at the base of the object.
- // If not allocated or already marked, done.
- if((bits & bitAllocated) == 0 || (bits & bitMarked) != 0)
+ // Only care about allocated and not marked.
+ if((bits & (bitAllocated|bitMarked)) != bitAllocated)
continue;
- *bitp |= bitMarked<<shift;
+ if(nproc == 1)
+ *bitp |= bitMarked<<shift;
+ else {
+ for(;;) {
+ x = *bitp;
+ if(x & (bitMarked<<shift))
+ goto continue_obj;
+ if(runtime_casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
+ break;
+ }
+ }
// If object has no pointers, don't need to scan further.
if((bits & bitNoPointers) != 0)
continue;
+ // If another proc wants a pointer, give it some.
+ if(nobj > 4 && work.nwait > 0 && work.full == nil) {
+ wbuf->nobj = nobj;
+ wbuf = handoff(wbuf);
+ nobj = wbuf->nobj;
+ wp = (void**)(wbuf->obj + nobj);
+ }
+
// If buffer is full, get a new one.
- if(w >= ew) {
+ if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
+ if(wbuf != nil)
+ wbuf->nobj = nobj;
wbuf = getempty(wbuf);
- bw = (void**)wbuf->w;
- w = bw;
- ew = bw + nelem(wbuf->w);
+ wp = (void**)(wbuf->obj);
+ nobj = 0;
}
- *w++ = obj;
+ *wp++ = obj;
+ nobj++;
+ continue_obj:;
}
-
+
// Done scanning [b, b+n). Prepare for the next iteration of
// the loop by setting b and n to the parameters for the next block.
- // Fetch b from the work buffers.
- if(w <= bw) {
+ // Fetch b from the work buffer.
+ if(nobj == 0) {
+ if(!keepworking) {
+ putempty(wbuf);
+ return;
+ }
// Emptied our buffer: refill.
wbuf = getfull(wbuf);
if(wbuf == nil)
- break;
- bw = (void**)wbuf->w;
- ew = (void**)(wbuf->w + nelem(wbuf->w));
- w = bw+wbuf->nw;
+ return;
+ nobj = wbuf->nobj;
+ wp = (void**)(wbuf->obj + wbuf->nobj);
}
- b = *--w;
-
+ b = *--wp;
+ nobj--;
+
// Figure out n = size of b. Start by loading bits for b.
off = (uintptr*)b - (uintptr*)arena_start;
bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
xbits = *bitp;
bits = xbits >> shift;
-
+
// Might be small; look for nearby block boundary.
// A block boundary is marked by either bitBlockBoundary
// or bitAllocated being set (see notes near their definition).
@@ -244,12 +323,12 @@ scanblock(byte *b, int64 n)
// apply a mask to keep only the bits corresponding
// to shift+j < bitShift aka j < bitShift-shift.
bits &= (boundary<<(bitShift-shift)) - boundary;
-
+
// A block boundary j words before b is indicated by
// xbits>>(shift-j) & boundary
// (assuming shift >= j). There is no cleverness here
// avoid the test, because when j gets too large the shift
- // turns negative, which is undefined in C.
+ // turns negative, which is undefined in C.
for(j=1; j<bitShift; j++) {
if(((bits>>j)&boundary) != 0 || (shift>=j && ((xbits>>(shift-j))&boundary) != 0)) {
@@ -257,7 +336,7 @@ scanblock(byte *b, int64 n)
goto scan;
}
}
-
+
// Fall back to asking span about size class.
// (Manually inlined copy of MHeap_Lookup.)
nlookup++;
@@ -274,29 +353,123 @@ scanblock(byte *b, int64 n)
}
}
-static struct {
- Workbuf *full;
- Workbuf *empty;
- byte *chunk;
- uintptr nchunk;
-} work;
+// debug_scanblock is the debug copy of scanblock.
+// it is simpler, slower, single-threaded, recursive,
+// and uses bitSpecial as the mark bit.
+static void
+debug_scanblock(byte *b, int64 n)
+{
+ byte *obj, *p;
+ void **vp;
+ uintptr size, *bitp, bits, shift, i, xbits, off;
+ MSpan *s;
+
+ if(!DebugMark)
+ runtime_throw("debug_scanblock without DebugMark");
+
+ if((int64)(uintptr)n != n || n < 0) {
+ //runtime_printf("debug_scanblock %p %D\n", b, n);
+ runtime_throw("debug_scanblock");
+ }
+
+ // Align b to a word boundary.
+ off = (uintptr)b & (PtrSize-1);
+ if(off != 0) {
+ b += PtrSize - off;
+ n -= PtrSize - off;
+ }
+
+ vp = (void**)b;
+ n /= PtrSize;
+ for(i=0; i<(uintptr)n; i++) {
+ obj = (byte*)vp[i];
+
+ // Words outside the arena cannot be pointers.
+ if((byte*)obj < runtime_mheap.arena_start || (byte*)obj >= runtime_mheap.arena_used)
+ continue;
+
+ // Round down to word boundary.
+ obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
+
+ // Consult span table to find beginning.
+ s = runtime_MHeap_LookupMaybe(&runtime_mheap, obj);
+ if(s == nil)
+ continue;
+
+
+ p = (byte*)((uintptr)s->start<<PageShift);
+ if(s->sizeclass == 0) {
+ obj = p;
+ size = (uintptr)s->npages<<PageShift;
+ } else {
+ if((byte*)obj >= (byte*)s->limit)
+ continue;
+ size = runtime_class_to_size[s->sizeclass];
+ int32 i = ((byte*)obj - p)/size;
+ obj = p+i*size;
+ }
+
+ // Now that we know the object header, reload bits.
+ off = (uintptr*)obj - (uintptr*)runtime_mheap.arena_start;
+ bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
+ shift = off % wordsPerBitmapWord;
+ xbits = *bitp;
+ bits = xbits >> shift;
+
+ // Now we have bits, bitp, and shift correct for
+ // obj pointing at the base of the object.
+ // If not allocated or already marked, done.
+ if((bits & bitAllocated) == 0 || (bits & bitSpecial) != 0) // NOTE: bitSpecial not bitMarked
+ continue;
+ *bitp |= bitSpecial<<shift;
+ if(!(bits & bitMarked))
+ runtime_printf("found unmarked block %p in %p\n", obj, vp+i);
+
+ // If object has no pointers, don't need to scan further.
+ if((bits & bitNoPointers) != 0)
+ continue;
+
+ debug_scanblock(obj, size);
+ }
+}
// Get an empty work buffer off the work.empty list,
// allocating new buffers as needed.
static Workbuf*
getempty(Workbuf *b)
{
- if(b != nil) {
- b->nw = nelem(b->w);
- b->next = work.full;
- work.full = b;
- }
- b = work.empty;
- if(b != nil) {
- work.empty = b->next;
- return b;
+ if(work.nproc == 1) {
+ // Put b on full list.
+ if(b != nil) {
+ b->next = work.full;
+ work.full = b;
+ }
+ // Grab from empty list if possible.
+ b = work.empty;
+ if(b != nil) {
+ work.empty = b->next;
+ goto haveb;
+ }
+ } else {
+ // Put b on full list.
+ if(b != nil) {
+ runtime_lock(&work.fmu);
+ b->next = work.full;
+ work.full = b;
+ runtime_unlock(&work.fmu);
+ }
+ // Grab from empty list if possible.
+ runtime_lock(&work.emu);
+ b = work.empty;
+ if(b != nil)
+ work.empty = b->next;
+ runtime_unlock(&work.emu);
+ if(b != nil)
+ goto haveb;
}
-
+
+ // Need to allocate.
+ runtime_lock(&work);
if(work.nchunk < sizeof *b) {
work.nchunk = 1<<20;
work.chunk = runtime_SysAlloc(work.nchunk);
@@ -304,25 +477,121 @@ getempty(Workbuf *b)
b = (Workbuf*)work.chunk;
work.chunk += sizeof *b;
work.nchunk -= sizeof *b;
+ runtime_unlock(&work);
+
+haveb:
+ b->nobj = 0;
return b;
}
+static void
+putempty(Workbuf *b)
+{
+ if(b == nil)
+ return;
+
+ if(work.nproc == 1) {
+ b->next = work.empty;
+ work.empty = b;
+ return;
+ }
+
+ runtime_lock(&work.emu);
+ b->next = work.empty;
+ work.empty = b;
+ runtime_unlock(&work.emu);
+}
+
// Get a full work buffer off the work.full list, or return nil.
static Workbuf*
getfull(Workbuf *b)
{
- if(b != nil) {
- b->nw = 0;
- b->next = work.empty;
- work.empty = b;
+ int32 i;
+ Workbuf *b1;
+
+ if(work.nproc == 1) {
+ // Put b on empty list.
+ if(b != nil) {
+ b->next = work.empty;
+ work.empty = b;
+ }
+ // Grab from full list if possible.
+ // Since work.nproc==1, no one else is
+ // going to give us work.
+ b = work.full;
+ if(b != nil)
+ work.full = b->next;
+ return b;
+ }
+
+ putempty(b);
+
+ // Grab buffer from full list if possible.
+ for(;;) {
+ b1 = work.full;
+ if(b1 == nil)
+ break;
+ runtime_lock(&work.fmu);
+ if(work.full != nil) {
+ b1 = work.full;
+ work.full = b1->next;
+ runtime_unlock(&work.fmu);
+ return b1;
+ }
+ runtime_unlock(&work.fmu);
+ }
+
+ runtime_xadd(&work.nwait, +1);
+ for(i=0;; i++) {
+ b1 = work.full;
+ if(b1 != nil) {
+ runtime_lock(&work.fmu);
+ if(work.full != nil) {
+ runtime_xadd(&work.nwait, -1);
+ b1 = work.full;
+ work.full = b1->next;
+ runtime_unlock(&work.fmu);
+ return b1;
+ }
+ runtime_unlock(&work.fmu);
+ continue;
+ }
+ if(work.nwait == work.nproc)
+ return nil;
+ if(i < 10)
+ runtime_procyield(20);
+ else if(i < 20)
+ runtime_osyield();
+ else
+ runtime_usleep(100);
}
- b = work.full;
- if(b != nil)
- work.full = b->next;
- return b;
}
-// Scanstack calls scanblock on each of gp's stack segments.
+static Workbuf*
+handoff(Workbuf *b)
+{
+ int32 n;
+ Workbuf *b1;
+
+ // Make new buffer with half of b's pointers.
+ b1 = getempty(nil);
+ n = b->nobj/2;
+ b->nobj -= n;
+ b1->nobj = n;
+ runtime_memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]);
+ nhandoff += n;
+
+ // Put b on full list - let first half of b get stolen.
+ runtime_lock(&work.fmu);
+ b->next = work.full;
+ work.full = b;
+ runtime_unlock(&work.fmu);
+
+ return b1;
+}
+
+// Markfin calls scanblock on the blocks that have finalizers:
+// the things pointed at cannot be freed until the finalizers have run.
static void
markfin(void *v)
{
@@ -355,11 +624,22 @@ __go_register_gc_roots (struct root_list* r)
roots = r;
}
-// Mark
static void
-mark(void)
+debug_markfin(void *v)
+{
+ uintptr size;
+
+ if(!runtime_mlookup(v, (byte**)&v, &size, nil))
+ runtime_throw("debug_mark - finalizer inconsistency");
+ debug_scanblock(v, size);
+}
+
+// Mark
+static void
+mark(void (*scan)(byte*, int64))
{
struct root_list *pl;
+ FinBlock *fb;
for(pl = roots; pl != nil; pl = pl->next) {
struct root* pr = &pl->roots[0];
@@ -372,18 +652,63 @@ mark(void)
}
}
- scanblock((byte*)&m0, sizeof m0);
- scanblock((byte*)&finq, sizeof finq);
- runtime_MProf_Mark(scanblock);
+ scan((byte*)&m0, sizeof m0);
+ scan((byte*)&finq, sizeof finq);
+ runtime_MProf_Mark(scan);
// mark stacks
- __go_scanstacks(scanblock);
+ __go_scanstacks(scan);
// mark things pointed at by objects with finalizers
- runtime_walkfintab(markfin, scanblock);
+ if(scan == debug_scanblock)
+ runtime_walkfintab(debug_markfin, scan);
+ else
+ runtime_walkfintab(markfin, scan);
+
+ for(fb=allfin; fb; fb=fb->alllink)
+ scanblock((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]));
+
+ // in multiproc mode, join in the queued work.
+ scan(nil, 0);
}
-// Sweep frees or calls finalizers for blocks not marked in the mark phase.
+static bool
+handlespecial(byte *p, uintptr size)
+{
+ void (*fn)(void*);
+ const struct __go_func_type *ft;
+ FinBlock *block;
+ Finalizer *f;
+
+ if(!runtime_getfinalizer(p, true, &fn, &ft)) {
+ runtime_setblockspecial(p, false);
+ runtime_MProf_Free(p, size);
+ return false;
+ }
+
+ runtime_lock(&finlock);
+ if(finq == nil || finq->cnt == finq->cap) {
+ if(finc == nil) {
+ finc = runtime_SysAlloc(PageSize);
+ finc->cap = (PageSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1;
+ finc->alllink = allfin;
+ allfin = finc;
+ }
+ block = finc;
+ finc = block->next;
+ block->next = finq;
+ finq = block;
+ }
+ f = &finq->fin[finq->cnt];
+ finq->cnt++;
+ f->fn = fn;
+ f->ft = ft;
+ f->arg = p;
+ runtime_unlock(&finlock);
+ return true;
+}
+
+// Sweep frees or collects finalizers for blocks not marked in the mark phase.
// It clears the mark bits in preparation for the next GC round.
static void
sweep(void)
@@ -393,9 +718,17 @@ sweep(void)
uintptr size;
byte *p;
MCache *c;
- Finalizer *f;
+ byte *arena_start;
+
+ arena_start = runtime_mheap.arena_start;
+
+ for(;;) {
+ s = work.spans;
+ if(s == nil)
+ break;
+ if(!runtime_casp(&work.spans, s, s->allnext))
+ continue;
- for(s = runtime_mheap.allspans; s != nil; s = s->allnext) {
if(s->state != MSpanInUse)
continue;
@@ -410,13 +743,15 @@ sweep(void)
npages = runtime_class_to_allocnpages[cl];
n = (npages << PageShift) / size;
}
-
- // sweep through n objects of given size starting at p.
+
+ // Sweep through n objects of given size starting at p.
+ // This thread owns the span now, so it can manipulate
+ // the block bitmap without atomic operations.
for(; n > 0; n--, p += size) {
uintptr off, *bitp, shift, bits;
- off = (uintptr*)p - (uintptr*)runtime_mheap.arena_start;
- bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
+ off = (uintptr*)p - (uintptr*)arena_start;
+ bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
bits = *bitp>>shift;
@@ -424,20 +759,21 @@ sweep(void)
continue;
if((bits & bitMarked) != 0) {
+ if(DebugMark) {
+ if(!(bits & bitSpecial))
+ runtime_printf("found spurious mark on %p\n", p);
+ *bitp &= ~(bitSpecial<<shift);
+ }
*bitp &= ~(bitMarked<<shift);
continue;
}
- if((bits & bitSpecial) != 0) {
- // Special means it has a finalizer or is being profiled.
- f = runtime_getfinalizer(p, 1);
- if(f != nil) {
- f->arg = p;
- f->next = finq;
- finq = f;
+ // Special means it has a finalizer or is being profiled.
+ // In DebugMark mode, the bit has been coopted so
+ // we have to assume all blocks are special.
+ if(DebugMark || (bits & bitSpecial) != 0) {
+ if(handlespecial(p, size))
continue;
- }
- runtime_MProf_Free(p, size);
}
// Mark freed; restore block boundary bit.
@@ -464,6 +800,23 @@ sweep(void)
static pthread_mutex_t gcsema = PTHREAD_MUTEX_INITIALIZER;
+void
+runtime_gchelper(void)
+{
+ // Wait until main proc is ready for mark help.
+ runtime_lock(&work.markgate);
+ runtime_unlock(&work.markgate);
+ scanblock(nil, 0);
+
+ // Wait until main proc is ready for sweep help.
+ runtime_lock(&work.sweepgate);
+ runtime_unlock(&work.sweepgate);
+ sweep();
+
+ if(runtime_xadd(&work.ndone, +1) == work.nproc-1)
+ runtime_notewakeup(&work.alldone);
+}
+
// Initialized from $GOGC. GOGC=off means no gc.
//
// Next gc is after we've allocated an extra amount of
@@ -481,7 +834,7 @@ runtime_gc(int32 force __attribute__ ((unused)))
int64 t0, t1, t2, t3;
uint64 heap0, heap1, obj0, obj1;
char *p;
- Finalizer *fp;
+ bool extra;
// The gc is turned off (via enablegc) until
// the bootstrap has completed.
@@ -502,10 +855,16 @@ runtime_gc(int32 force __attribute__ ((unused)))
gcpercent = -1;
else
gcpercent = runtime_atoi(p);
-
+
p = runtime_getenv("GOGCTRACE");
if(p != nil)
gctrace = runtime_atoi(p);
+
+ runtime_initlock(&work.fmu);
+ runtime_initlock(&work.emu);
+ runtime_initlock(&work.markgate);
+ runtime_initlock(&work.sweepgate);
+ runtime_initlock(&work.Lock);
}
if(gcpercent < 0)
return;
@@ -522,20 +881,42 @@ runtime_gc(int32 force __attribute__ ((unused)))
nlookup = 0;
nsizelookup = 0;
naddrlookup = 0;
+ nhandoff = 0;
m->gcing = 1;
runtime_stoptheworld();
- if(runtime_mheap.Lock.key != 0)
- runtime_throw("runtime_mheap locked during gc");
__go_cachestats();
heap0 = mstats.heap_alloc;
obj0 = mstats.nmalloc - mstats.nfree;
- mark();
+ runtime_lock(&work.markgate);
+ runtime_lock(&work.sweepgate);
+
+ extra = false;
+ work.nproc = 1;
+#if 0
+ if(runtime_gomaxprocs > 1 && runtime_ncpu > 1) {
+ runtime_noteclear(&work.alldone);
+ work.nproc += runtime_helpgc(&extra);
+ }
+#endif
+ work.nwait = 0;
+ work.ndone = 0;
+
+ runtime_unlock(&work.markgate); // let the helpers in
+ mark(scanblock);
+ if(DebugMark)
+ mark(debug_scanblock);
t1 = runtime_nanotime();
+
+ work.spans = runtime_mheap.allspans;
+ runtime_unlock(&work.sweepgate); // let the helpers in
sweep();
+ if(work.nproc > 1)
+ runtime_notesleep(&work.alldone);
t2 = runtime_nanotime();
+
__go_stealcache();
__go_cachestats();
@@ -553,21 +934,28 @@ runtime_gc(int32 force __attribute__ ((unused)))
mstats.numgc++;
if(mstats.debuggc)
runtime_printf("pause %llu\n", (unsigned long long)t3-t0);
-
+
if(gctrace) {
- runtime_printf("gc%d: %llu+%llu+%llu ms %llu -> %llu MB %llu -> %llu (%llu-%llu) objects %llu pointer lookups (%llu size, %llu addr)\n",
+ runtime_printf("gc%d: %llu+%llu+%llu ms %llu -> %llu MB %llu -> %llu (%llu-%llu) objects %llu pointer lookups (%llu size, %llu addr) %llu handoff\n",
mstats.numgc, (unsigned long long)(t1-t0)/1000000, (unsigned long long)(t2-t1)/1000000, (unsigned long long)(t3-t2)/1000000,
(unsigned long long)heap0>>20, (unsigned long long)heap1>>20, (unsigned long long)obj0, (unsigned long long)obj1,
(unsigned long long)mstats.nmalloc, (unsigned long long)mstats.nfree,
- (unsigned long long)nlookup, (unsigned long long)nsizelookup, (unsigned long long)naddrlookup);
+ (unsigned long long)nlookup, (unsigned long long)nsizelookup, (unsigned long long)naddrlookup, (unsigned long long) nhandoff);
}
pthread_mutex_unlock(&gcsema);
- runtime_starttheworld();
+
+ // If we could have used another helper proc, start one now,
+ // in the hope that it will be available next time.
+ // It would have been even better to start it before the collection,
+ // but doing so requires allocating memory, so it's tricky to
+ // coordinate. This lazy approach works out in practice:
+ // we don't mind if the first couple gc rounds don't have quite
+ // the maximum number of procs.
+ runtime_starttheworld(extra);
// finqlock is still held.
- fp = finq;
- if(fp != nil) {
+ if(finq != nil) {
// kick off or wake up goroutine to run queued finalizers
if(!finstarted) {
__go_go(runfinq, nil);
@@ -601,37 +989,44 @@ runtime_UpdateMemStats(void)
__go_cachestats();
m->gcing = 0;
pthread_mutex_unlock(&gcsema);
- runtime_starttheworld();
+ runtime_starttheworld(false);
}
static void
runfinq(void* dummy)
{
- Finalizer *f, *next;
+ Finalizer *f;
+ FinBlock *fb, *next;
+ uint32 i;
USED(dummy);
for(;;) {
pthread_mutex_lock(&finqlock);
- f = finq;
+ fb = finq;
finq = nil;
- if(f == nil) {
+ if(fb == nil) {
fingwait = 1;
pthread_cond_wait(&finqcond, &finqlock);
pthread_mutex_unlock(&finqlock);
continue;
}
pthread_mutex_unlock(&finqlock);
- for(; f; f=next) {
- void *params[1];
-
- next = f->next;
- params[0] = &f->arg;
- reflect_call(f->ft, (void*)f->fn, 0, 0, params, nil);
- f->fn = nil;
- f->arg = nil;
- f->next = nil;
- runtime_free(f);
+ for(; fb; fb=next) {
+ next = fb->next;
+ for(i=0; i<(uint32)fb->cnt; i++) {
+ void *params[1];
+
+ f = &fb->fin[i];
+ params[0] = &f->arg;
+ runtime_setblockspecial(f->arg, false);
+ reflect_call(f->ft, (void*)f->fn, 0, 0, params, nil);
+ f->fn = nil;
+ f->arg = nil;
+ }
+ fb->cnt = 0;
+ fb->next = finc;
+ finc = fb;
}
runtime_gc(1); // trigger another gc to clean up the finalized objects, if possible
}
@@ -783,6 +1178,9 @@ runtime_blockspecial(void *v)
{
uintptr *b, off, shift;
+ if(DebugMark)
+ return true;
+
off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;
b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
@@ -791,17 +1189,23 @@ runtime_blockspecial(void *v)
}
void
-runtime_setblockspecial(void *v)
+runtime_setblockspecial(void *v, bool s)
{
uintptr *b, off, shift, bits, obits;
+ if(DebugMark)
+ return;
+
off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;
b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
for(;;) {
obits = *b;
- bits = obits | (bitSpecial<<shift);
+ if(s)
+ bits = obits | (bitSpecial<<shift);
+ else
+ bits = obits & ~(bitSpecial<<shift);
if(runtime_singleproc) {
*b = bits;
break;
@@ -812,7 +1216,7 @@ runtime_setblockspecial(void *v)
}
}
}
-
+
void
runtime_MHeap_MapBits(MHeap *h)
{
@@ -823,7 +1227,7 @@ runtime_MHeap_MapBits(MHeap *h)
bitmapChunk = 8192
};
uintptr n;
-
+
n = (h->arena_used - h->arena_start) / wordsPerBitmapWord;
n = (n+bitmapChunk-1) & ~(bitmapChunk-1);
if(h->bitmap_mapped >= n)
diff --git a/libgo/runtime/mheap.c b/libgo/runtime/mheap.c
index cacac7d..a49b405 100644
--- a/libgo/runtime/mheap.c
+++ b/libgo/runtime/mheap.c
@@ -13,6 +13,7 @@
// and heapmap(i) == span for all s->start <= i < s->start+s->npages.
#include "runtime.h"
+#include "arch.h"
#include "malloc.h"
static MSpan *MHeap_AllocLocked(MHeap*, uintptr, int32);
@@ -102,6 +103,7 @@ HaveSpan:
runtime_throw("MHeap_AllocLocked - bad npages");
runtime_MSpanList_Remove(s);
s->state = MSpanInUse;
+ mstats.heap_idle -= s->npages<<PageShift;
if(s->npages > npage) {
// Trim extra and put it back in the heap.
@@ -277,6 +279,7 @@ MHeap_FreeLocked(MHeap *h, MSpan *s)
// runtime_printf("MHeap_FreeLocked - span %p ptr %p state %d ref %d\n", s, s->start<<PageShift, s->state, s->ref);
runtime_throw("MHeap_FreeLocked - invalid free");
}
+ mstats.heap_idle += s->npages<<PageShift;
s->state = MSpanFree;
runtime_MSpanList_Remove(s);
sp = (uintptr*)(s->start<<PageShift);
diff --git a/libgo/runtime/mprof.goc b/libgo/runtime/mprof.goc
index d87be42..23c4f90 100644
--- a/libgo/runtime/mprof.goc
+++ b/libgo/runtime/mprof.goc
@@ -7,6 +7,7 @@
package runtime
#include "runtime.h"
+#include "arch.h"
#include "malloc.h"
#include "defs.h"
#include "go-type.h"
diff --git a/libgo/runtime/msize.c b/libgo/runtime/msize.c
index 6e82885..e2672b0 100644
--- a/libgo/runtime/msize.c
+++ b/libgo/runtime/msize.c
@@ -26,6 +26,7 @@
// TODO(rsc): Compute max waste for any given size.
#include "runtime.h"
+#include "arch.h"
#include "malloc.h"
int32 runtime_class_to_size[NumSizeClasses];
diff --git a/libgo/runtime/proc.c b/libgo/runtime/proc.c
index 521bcd64..8af6935 100644
--- a/libgo/runtime/proc.c
+++ b/libgo/runtime/proc.c
@@ -3,6 +3,7 @@
// license that can be found in the LICENSE file.
#include "runtime.h"
+#include "arch.h"
#include "malloc.h" /* so that acid generated from proc.c includes malloc data structures */
typedef struct Sched Sched;
diff --git a/libgo/runtime/runtime.h b/libgo/runtime/runtime.h
index ddc99eb..2767dd8 100644
--- a/libgo/runtime/runtime.h
+++ b/libgo/runtime/runtime.h
@@ -136,7 +136,7 @@ bool __go_sigsend(int32 sig);
int64 runtime_nanotime(void);
void runtime_stoptheworld(void);
-void runtime_starttheworld(void);
+void runtime_starttheworld(bool);
void __go_go(void (*pfn)(void*), void*);
void __go_gc_goroutine_init(void*);
void __go_enable_gc(void);
@@ -184,18 +184,21 @@ void runtime_notewakeup(Note*);
MCache* runtime_allocmcache(void);
void free(void *v);
struct __go_func_type;
-void runtime_addfinalizer(void*, void(*fn)(void*), const struct __go_func_type *);
-void runtime_walkfintab(void (*fn)(void*), void (*scan)(byte *, int64));
+bool runtime_addfinalizer(void*, void(*fn)(void*), const struct __go_func_type *);
#define runtime_mmap mmap
#define runtime_munmap(p, s) munmap((p), (s))
#define runtime_cas(pval, old, new) __sync_bool_compare_and_swap (pval, old, new)
#define runtime_casp(pval, old, new) __sync_bool_compare_and_swap (pval, old, new)
+#define runtime_xadd(p, v) __sync_add_and_fetch (p, v)
void runtime_sigprof(uint8 *pc, uint8 *sp, uint8 *lr);
void runtime_cpuprofinit(void);
void runtime_resetcpuprofiler(int32);
void runtime_setcpuprofilerate(void(*)(uintptr*, int32), int32);
uint32 runtime_fastrand1(void);
+void runtime_procyield(uint32);
+void runtime_osyield(void);
+void runtime_usleep(uint32);
struct __go_func_type;
void reflect_call(const struct __go_func_type *, const void *, _Bool, _Bool,
diff --git a/libgo/runtime/sigqueue.goc b/libgo/runtime/sigqueue.goc
index 2e47222..3a90868 100644
--- a/libgo/runtime/sigqueue.goc
+++ b/libgo/runtime/sigqueue.goc
@@ -39,6 +39,7 @@
package runtime
#include "config.h"
#include "runtime.h"
+#include "arch.h"
#include "malloc.h"
#include "defs.h"
diff --git a/libgo/runtime/thread.c b/libgo/runtime/thread.c
index bac3f7d..822d5da 100644
--- a/libgo/runtime/thread.c
+++ b/libgo/runtime/thread.c
@@ -14,19 +14,6 @@ runtime_initlock(Lock *l)
runtime_throw("sem_init failed");
}
-static uint32
-runtime_xadd(uint32 volatile *val, int32 delta)
-{
- uint32 oval, nval;
-
- for(;;){
- oval = *val;
- nval = oval + delta;
- if(runtime_cas(val, oval, nval))
- return nval;
- }
-}
-
// noinline so that runtime_lock doesn't have to split the stack.
static void runtime_lock_full(Lock *l) __attribute__ ((noinline));
diff --git a/libgo/runtime/yield.c b/libgo/runtime/yield.c
new file mode 100644
index 0000000..3ebc4a4
--- /dev/null
+++ b/libgo/runtime/yield.c
@@ -0,0 +1,54 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define _GNU_SOURCE
+
+#include "config.h"
+
+#include <stddef.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <pthread.h>
+#include <unistd.h>
+
+#ifdef HAVE_SYS_SELECT_H
+#include <sys/select.h>
+#endif
+
+#include "runtime.h"
+
+/* Spin wait. */
+
+void
+runtime_procyield (uint32 cnt)
+{
+ volatile uint32 i;
+
+ for (i = 0; i < cnt; ++i)
+ {
+#if defined (__i386__) || defined (__x86_64__)
+ __builtin_ia32_pause ();
+#endif
+ }
+}
+
+/* Ask the OS to reschedule this thread. */
+
+void
+runtime_osyield (void)
+{
+ pthread_yield ();
+}
+
+/* Sleep for some number of microseconds. */
+
+void
+runtime_usleep (uint32 us)
+{
+ struct timeval tv;
+
+ tv.tv_sec = us / 1000000;
+ tv.tv_usec = us % 1000000;
+ select (0, NULL, NULL, NULL, &tv);
+}