aboutsummaryrefslogtreecommitdiff
path: root/libgo/runtime/malloc.goc
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/runtime/malloc.goc')
-rw-r--r--libgo/runtime/malloc.goc268
1 files changed, 200 insertions, 68 deletions
diff --git a/libgo/runtime/malloc.goc b/libgo/runtime/malloc.goc
index 33d0c39..798d875 100644
--- a/libgo/runtime/malloc.goc
+++ b/libgo/runtime/malloc.goc
@@ -54,6 +54,7 @@ package runtime
// Mark mheap as 'no pointers', it does not contain interesting pointers but occupies ~45K.
MHeap runtime_mheap;
+MStats mstats;
int32 runtime_checking;
@@ -62,6 +63,9 @@ extern MStats mstats; // defined in zruntime_def_$GOOS_$GOARCH.go
extern volatile intgo runtime_MemProfileRate
__asm__ (GOSYM_PREFIX "runtime.MemProfileRate");
+static void* largealloc(uint32, uintptr*);
+static void profilealloc(void *v, uintptr size, uintptr typ);
+
// Allocate an object of at least size bytes.
// Small objects are allocated from the per-thread cache's free lists.
// Large objects (> 32 kB) are allocated straight from the heap.
@@ -72,12 +76,12 @@ runtime_mallocgc(uintptr size, uintptr typ, uint32 flag)
M *m;
G *g;
int32 sizeclass;
+ uintptr tinysize, size1;
intgo rate;
MCache *c;
MCacheList *l;
- uintptr npages;
- MSpan *s;
- MLink *v;
+ MLink *v, *next;
+ byte *tiny;
bool incallback;
if(size == 0) {
@@ -119,6 +123,81 @@ runtime_mallocgc(uintptr size, uintptr typ, uint32 flag)
c = m->mcache;
if(!runtime_debug.efence && size <= MaxSmallSize) {
+ if((flag&(FlagNoScan|FlagNoGC)) == FlagNoScan && size < TinySize) {
+ // Tiny allocator.
+ //
+ // Tiny allocator combines several tiny allocation requests
+ // into a single memory block. The resulting memory block
+ // is freed when all subobjects are unreachable. The subobjects
+ // must be FlagNoScan (don't have pointers), this ensures that
+ // the amount of potentially wasted memory is bounded.
+ //
+ // Size of the memory block used for combining (TinySize) is tunable.
+ // Current setting is 16 bytes, which relates to 2x worst case memory
+ // wastage (when all but one subobjects are unreachable).
+ // 8 bytes would result in no wastage at all, but provides less
+ // opportunities for combining.
+ // 32 bytes provides more opportunities for combining,
+ // but can lead to 4x worst case wastage.
+ // The best case winning is 8x regardless of block size.
+ //
+ // Objects obtained from tiny allocator must not be freed explicitly.
+ // So when an object will be freed explicitly, we ensure that
+ // its size >= TinySize.
+ //
+ // SetFinalizer has a special case for objects potentially coming
+ // from tiny allocator, it such case it allows to set finalizers
+ // for an inner byte of a memory block.
+ //
+ // The main targets of tiny allocator are small strings and
+ // standalone escaping variables. On a json benchmark
+ // the allocator reduces number of allocations by ~12% and
+ // reduces heap size by ~20%.
+
+ tinysize = c->tinysize;
+ if(size <= tinysize) {
+ tiny = c->tiny;
+ // Align tiny pointer for required (conservative) alignment.
+ if((size&7) == 0)
+ tiny = (byte*)ROUND((uintptr)tiny, 8);
+ else if((size&3) == 0)
+ tiny = (byte*)ROUND((uintptr)tiny, 4);
+ else if((size&1) == 0)
+ tiny = (byte*)ROUND((uintptr)tiny, 2);
+ size1 = size + (tiny - c->tiny);
+ if(size1 <= tinysize) {
+ // The object fits into existing tiny block.
+ v = (MLink*)tiny;
+ c->tiny += size1;
+ c->tinysize -= size1;
+ m->mallocing = 0;
+ m->locks--;
+ if(incallback)
+ runtime_entersyscall();
+ return v;
+ }
+ }
+ // Allocate a new TinySize block.
+ l = &c->list[TinySizeClass];
+ if(l->list == nil)
+ runtime_MCache_Refill(c, TinySizeClass);
+ v = l->list;
+ next = v->next;
+ if(next != nil) // prefetching nil leads to a DTLB miss
+ PREFETCH(next);
+ l->list = next;
+ l->nlist--;
+ ((uint64*)v)[0] = 0;
+ ((uint64*)v)[1] = 0;
+ // See if we need to replace the existing tiny block with the new one
+ // based on amount of remaining free space.
+ if(TinySize-size > tinysize) {
+ c->tiny = (byte*)v + size;
+ c->tinysize = TinySize - size;
+ }
+ size = TinySize;
+ goto done;
+ }
// Allocate from mcache free lists.
// Inlined version of SizeToClass().
if(size <= 1024-8)
@@ -130,31 +209,22 @@ runtime_mallocgc(uintptr size, uintptr typ, uint32 flag)
if(l->list == nil)
runtime_MCache_Refill(c, sizeclass);
v = l->list;
- l->list = v->next;
+ next = v->next;
+ if(next != nil) // prefetching nil leads to a DTLB miss
+ PREFETCH(next);
+ l->list = next;
l->nlist--;
if(!(flag & FlagNoZero)) {
v->next = nil;
// block is zeroed iff second word is zero ...
- if(size > sizeof(uintptr) && ((uintptr*)v)[1] != 0)
+ if(size > 2*sizeof(uintptr) && ((uintptr*)v)[1] != 0)
runtime_memclr((byte*)v, size);
}
+ done:
c->local_cachealloc += size;
} else {
- // TODO(rsc): Report tracebacks for very large allocations.
-
// Allocate directly from heap.
- npages = size >> PageShift;
- if((size & PageMask) != 0)
- npages++;
- s = runtime_MHeap_Alloc(&runtime_mheap, npages, 0, 1, !(flag & FlagNoZero));
- if(s == nil)
- runtime_throw("out of memory");
- s->limit = (byte*)(s->start<<PageShift) + size;
- size = npages<<PageShift;
- v = (void*)(s->start << PageShift);
-
- // setup for mark sweep
- runtime_markspan(v, 0, 0, true);
+ v = largealloc(flag, &size);
}
if(flag & FlagNoGC)
@@ -180,40 +250,83 @@ runtime_mallocgc(uintptr size, uintptr typ, uint32 flag)
m->mallocing = 0;
if(UseSpanType && !(flag & FlagNoScan) && typ != 0 && m->settype_bufsize == nelem(m->settype_buf))
runtime_settype_flush(m);
- m->locks--;
+ if(raceenabled)
+ runtime_racemalloc(v, size);
if(runtime_debug.allocfreetrace)
goto profile;
if(!(flag & FlagNoProfiling) && (rate = runtime_MemProfileRate) > 0) {
- if(size >= (uint32) rate)
- goto profile;
- if((uint32) m->mcache->next_sample > size)
- m->mcache->next_sample -= size;
+ if(size < (uintptr)rate && size < (uintptr)(uint32)c->next_sample)
+ c->next_sample -= size;
else {
- // pick next profile time
- // If you change this, also change allocmcache.
- if(rate > 0x3fffffff) // make 2*rate not overflow
- rate = 0x3fffffff;
- m->mcache->next_sample = runtime_fastrand1() % (2*rate);
profile:
- runtime_setblockspecial(v, true);
- runtime_MProf_Malloc(v, size, typ);
+ profilealloc(v, size, typ);
}
}
+ m->locks--;
+
if(!(flag & FlagNoInvokeGC) && mstats.heap_alloc >= mstats.next_gc)
runtime_gc(0);
- if(raceenabled)
- runtime_racemalloc(v, size);
-
if(incallback)
runtime_entersyscall();
return v;
}
+static void*
+largealloc(uint32 flag, uintptr *sizep)
+{
+ uintptr npages, size;
+ MSpan *s;
+ void *v;
+
+ // Allocate directly from heap.
+ size = *sizep;
+ if(size + PageSize < size)
+ runtime_throw("out of memory");
+ npages = size >> PageShift;
+ if((size & PageMask) != 0)
+ npages++;
+ s = runtime_MHeap_Alloc(&runtime_mheap, npages, 0, 1, !(flag & FlagNoZero));
+ if(s == nil)
+ runtime_throw("out of memory");
+ s->limit = (byte*)(s->start<<PageShift) + size;
+ *sizep = npages<<PageShift;
+ v = (void*)(s->start << PageShift);
+ // setup for mark sweep
+ runtime_markspan(v, 0, 0, true);
+ return v;
+}
+
+static void
+profilealloc(void *v, uintptr size, uintptr typ)
+{
+ uintptr rate;
+ int32 next;
+ MCache *c;
+
+ c = runtime_m()->mcache;
+ rate = runtime_MemProfileRate;
+ if(size < rate) {
+ // pick next profile time
+ // If you change this, also change allocmcache.
+ if(rate > 0x3fffffff) // make 2*rate not overflow
+ rate = 0x3fffffff;
+ next = runtime_fastrand1() % (2*rate);
+ // Subtract the "remainder" of the current allocation.
+ // Otherwise objects that are close in size to sampling rate
+ // will be under-sampled, because we consistently discard this remainder.
+ next -= (size - c->next_sample);
+ if(next < 0)
+ next = 0;
+ c->next_sample = next;
+ }
+ runtime_MProf_Malloc(v, size, typ);
+}
+
void*
__go_alloc(uintptr size)
{
@@ -228,7 +341,6 @@ __go_free(void *v)
int32 sizeclass;
MSpan *s;
MCache *c;
- uint32 prof;
uintptr size;
if(v == nil)
@@ -246,18 +358,27 @@ __go_free(void *v)
runtime_printf("free %p: not an allocated block\n", v);
runtime_throw("free runtime_mlookup");
}
- prof = runtime_blockspecial(v);
+ size = s->elemsize;
+ sizeclass = s->sizeclass;
+ // Objects that are smaller than TinySize can be allocated using tiny alloc,
+ // if then such object is combined with an object with finalizer, we will crash.
+ if(size < TinySize)
+ runtime_throw("freeing too small block");
if(raceenabled)
runtime_racefree(v);
- // Find size class for v.
- sizeclass = s->sizeclass;
+ // Ensure that the span is swept.
+ // If we free into an unswept span, we will corrupt GC bitmaps.
+ runtime_MSpan_EnsureSwept(s);
+
+ if(s->specials != nil)
+ runtime_freeallspecials(s, v, size);
+
c = m->mcache;
if(sizeclass == 0) {
// Large object.
- size = s->npages<<PageShift;
- *(uintptr*)(s->start<<PageShift) = (uintptr)0xfeedfeedfeedfeedll; // mark as "needs to be zeroed"
+ s->needzero = 1;
// Must mark v freed before calling unmarkspan and MHeap_Free:
// they might coalesce v into other spans and change the bitmap further.
runtime_markfreed(v, size);
@@ -270,9 +391,10 @@ __go_free(void *v)
c->local_largefree += size;
} else {
// Small object.
- size = runtime_class_to_size[sizeclass];
- if(size > sizeof(uintptr))
+ if(size > 2*sizeof(uintptr))
((uintptr*)v)[1] = (uintptr)0xfeedfeedfeedfeedll; // mark as "needs to be zeroed"
+ else if(size > sizeof(uintptr))
+ ((uintptr*)v)[1] = 0;
// Must mark v freed before calling MCache_Free:
// it might coalesce v and other blocks into a bigger span
// and change the bitmap further.
@@ -280,8 +402,6 @@ __go_free(void *v)
c->local_nsmallfree[sizeclass]++;
runtime_MCache_Free(c, v, sizeclass, size);
}
- if(prof)
- runtime_MProf_Free(v, size);
m->mallocing = 0;
}
@@ -392,6 +512,12 @@ runtime_purgecachedstats(MCache *c)
extern uintptr runtime_sizeof_C_MStats
__asm__ (GOSYM_PREFIX "runtime.Sizeof_C_MStats");
+// Size of the trailing by_size array differs between Go and C,
+// NumSizeClasses was changed, but we can not change Go struct because of backward compatibility.
+// sizeof_C_MStats is what C thinks about size of Go struct.
+
+// Initialized in mallocinit because it's defined in go/runtime/mem.go.
+
#define MaxArena32 (2U<<30)
void
@@ -400,11 +526,10 @@ runtime_mallocinit(void)
byte *p;
uintptr arena_size, bitmap_size, spans_size;
extern byte _end[];
- byte *want;
uintptr limit;
uint64 i;
- runtime_sizeof_C_MStats = sizeof(MStats);
+ runtime_sizeof_C_MStats = sizeof(MStats) - (NumSizeClasses - 61) * sizeof(mstats.by_size[0]);
p = nil;
arena_size = 0;
@@ -419,6 +544,9 @@ runtime_mallocinit(void)
runtime_InitSizes();
+ if(runtime_class_to_size[TinySizeClass] != TinySize)
+ runtime_throw("bad TinySizeClass");
+
// limit = runtime_memlimit();
// See https://code.google.com/p/go/issues/detail?id=5049
// TODO(rsc): Fix after 1.1.
@@ -457,7 +585,7 @@ runtime_mallocinit(void)
spans_size = arena_size / PageSize * sizeof(runtime_mheap.spans[0]);
spans_size = ROUND(spans_size, PageSize);
for(i = 0; i < HeapBaseOptions; i++) {
- p = runtime_SysReserve(HeapBase(i), bitmap_size + spans_size + arena_size);
+ p = runtime_SysReserve(HeapBase(i), bitmap_size + spans_size + arena_size + PageSize);
if(p != nil)
break;
}
@@ -499,18 +627,16 @@ runtime_mallocinit(void)
// So adjust it upward a little bit ourselves: 1/4 MB to get
// away from the running binary image and then round up
// to a MB boundary.
- want = (byte*)ROUND((uintptr)_end + (1<<18), 1<<20);
- if(0xffffffff - (uintptr)want <= bitmap_size + spans_size + arena_size)
- want = 0;
- p = runtime_SysReserve(want, bitmap_size + spans_size + arena_size);
+ p = (byte*)ROUND((uintptr)_end + (1<<18), 1<<20);
+ p = runtime_SysReserve(p, bitmap_size + spans_size + arena_size + PageSize);
if(p == nil)
runtime_throw("runtime: cannot reserve arena virtual address space");
- if((uintptr)p & (((uintptr)1<<PageShift)-1))
- runtime_printf("runtime: SysReserve returned unaligned address %p; asked for %p", p,
- bitmap_size+spans_size+arena_size);
}
- if((uintptr)p & (((uintptr)1<<PageShift)-1))
- runtime_throw("runtime: SysReserve returned unaligned address");
+
+ // PageSize can be larger than OS definition of page size,
+ // so SysReserve can give us a PageSize-unaligned pointer.
+ // To overcome this we ask for PageSize more and round up the pointer.
+ p = (byte*)ROUND((uintptr)p, PageSize);
runtime_mheap.spans = (MSpan**)p;
runtime_mheap.bitmap = p + spans_size;
@@ -523,7 +649,7 @@ runtime_mallocinit(void)
runtime_m()->mcache = runtime_allocmcache();
// See if it works.
- runtime_free(runtime_malloc(1));
+ runtime_free(runtime_malloc(TinySize));
}
void*
@@ -828,16 +954,18 @@ func SetFinalizer(obj Eface, finalizer Eface) {
goto throw;
}
ot = (const PtrType*)obj.type;
- if(ot->__element_type != nil && ot->__element_type->__size == 0) {
+ // As an implementation detail we do not run finalizers for zero-sized objects,
+ // because we use &runtime_zerobase for all such allocations.
+ if(ot->__element_type != nil && ot->__element_type->__size == 0)
return;
- }
if(!runtime_mlookup(obj.__object, &base, &size, nil) || obj.__object != base) {
- runtime_printf("runtime.SetFinalizer: pointer not at beginning of allocated block\n");
- goto throw;
+ // As an implementation detail we allow to set finalizers for an inner byte
+ // of an object if it could come from tiny alloc (see mallocgc for details).
+ if(ot->__element_type == nil || (ot->__element_type->__code&GO_NO_POINTERS) == 0 || ot->__element_type->__size >= TinySize) {
+ runtime_printf("runtime.SetFinalizer: pointer not at beginning of allocated block\n");
+ goto throw;
+ }
}
- ft = nil;
- ot = (const PtrType*)obj.__type_descriptor;
- fint = nil;
if(finalizer.__type_descriptor != nil) {
if(finalizer.__type_descriptor->__code != GO_FUNC)
goto badfunc;
@@ -856,11 +984,15 @@ func SetFinalizer(obj Eface, finalizer Eface) {
// ok - satisfies non-empty interface
} else
goto badfunc;
- }
- if(!runtime_addfinalizer(obj.__object, finalizer.__type_descriptor != nil ? *(void**)finalizer.__object : nil, ft, ot)) {
- runtime_printf("runtime.SetFinalizer: finalizer already set\n");
- goto throw;
+ ot = (const PtrType*)obj.__type_descriptor;
+ if(!runtime_addfinalizer(obj.__object, *(FuncVal**)finalizer.__object, ft, ot)) {
+ runtime_printf("runtime.SetFinalizer: finalizer already set\n");
+ goto throw;
+ }
+ } else {
+ // NOTE: asking to remove a finalizer when there currently isn't one set is OK.
+ runtime_removefinalizer(obj.__object);
}
return;