aboutsummaryrefslogtreecommitdiff
path: root/malloc/malloc.c
diff options
context:
space:
mode:
authorDJ Delorie <dj@delorie.com>2016-11-03 19:58:43 -0400
committerDJ Delorie <dj@delorie.com>2016-11-03 19:58:43 -0400
commitba163b353ee76e9dad24246cb07e47a6840cfbae (patch)
tree1ef5d932c94efc52628a7cbfde99ab3cf517ae6e /malloc/malloc.c
parent6adaeadf95fa5cc68e92b07456bb82dbd85afd06 (diff)
downloadglibc-ba163b353ee76e9dad24246cb07e47a6840cfbae.zip
glibc-ba163b353ee76e9dad24246cb07e47a6840cfbae.tar.gz
glibc-ba163b353ee76e9dad24246cb07e47a6840cfbae.tar.bz2
malloc per-thread cache, separated out
This is the per-thread cache feature of the dj/malloc branch, broken out as a separate feature for review. Enabled by default, use --disable-experimental-malloc to disable. mallopt() support for M_TCACHE_COUNT and M_TCACHE_MAX is added for tuning.
Diffstat (limited to 'malloc/malloc.c')
-rw-r--r--malloc/malloc.c243
1 files changed, 243 insertions, 0 deletions
diff --git a/malloc/malloc.c b/malloc/malloc.c
index 584edbf..e57d0cd 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -297,6 +297,25 @@ __malloc_assert (const char *assertion, const char *file, unsigned int line,
}
#endif
+#ifndef USE_TCACHE
+#define USE_TCACHE 0
+#endif
+#if USE_TCACHE
+/* we want 64 entries */
+#define MAX_TCACHE_SIZE (MALLOC_ALIGNMENT * 63)
+#define TCACHE_IDX ((MAX_TCACHE_SIZE / MALLOC_ALIGNMENT) + 1)
+#define size2tidx(bytes) (((bytes) + MALLOC_ALIGNMENT - 1) / MALLOC_ALIGNMENT)
+
+/* Rounds up, so...
+ idx 0 bytes 0
+ idx 1 bytes 1..8
+ idx 2 bytes 9..16
+ etc
+*/
+
+#define TCACHE_FILL_COUNT 7
+#endif
+
/*
REALLOC_ZERO_BYTES_FREES should be set if a call to
@@ -1711,6 +1730,13 @@ struct malloc_par
/* First address handed out by MORECORE/sbrk. */
char *sbrk_base;
+
+#if USE_TCACHE
+ /* Maximum number of buckets to use. */
+ size_t tcache_max;
+ /* Maximum number of chunks in each bucket. */
+ size_t tcache_count;
+#endif
};
/* There are several instances of this struct ("arenas") in this
@@ -1749,8 +1775,19 @@ static struct malloc_par mp_ =
.trim_threshold = DEFAULT_TRIM_THRESHOLD,
#define NARENAS_FROM_NCORES(n) ((n) * (sizeof (long) == 4 ? 2 : 8))
.arena_test = NARENAS_FROM_NCORES (1)
+#if USE_TCACHE
+ ,
+ .tcache_count = TCACHE_FILL_COUNT,
+ .tcache_max = TCACHE_IDX-1
+#endif
};
+/* Non public mallopt parameters. */
+#if USE_TCACHE
+#define M_TCACHE_COUNT -9
+#define M_TCACHE_MAX -10
+#endif
+
/* Maximum size of memory handled in fastbins. */
static INTERNAL_SIZE_T global_max_fast;
@@ -2874,6 +2911,44 @@ mremap_chunk (mchunkptr p, size_t new_size)
/*------------------------ Public wrappers. --------------------------------*/
+#if USE_TCACHE
+
+typedef struct TCacheEntry {
+ struct TCacheEntry *next;
+} TCacheEntry;
+
+typedef struct TCache {
+ struct TCache *prev, *next;
+ char initted; /* 0 = uninitted, 1 = normal, anything else = shutting down */
+ char counts[TCACHE_IDX];
+ TCacheEntry *entries[TCACHE_IDX];
+} TCache;
+
+static TCache *tcache_list = NULL;
+__libc_lock_define_initialized (static, tcache_mutex);
+
+static __thread TCache tcache = {0,0,0,{0},{0}};
+
+static void __attribute__ ((section ("__libc_thread_freeres_fn")))
+tcache_thread_freeres (void)
+{
+ if (tcache.initted == 1)
+ {
+ __libc_lock_lock (tcache_mutex);
+ tcache.initted = 2;
+ if (tcache.next)
+ tcache.next->prev = tcache.prev;
+ if (tcache.prev)
+ tcache.prev->next = tcache.next;
+ else
+ tcache_list = tcache.next;
+ __libc_lock_unlock (tcache_mutex);
+ }
+}
+text_set_element (__libc_thread_subfreeres, tcache_thread_freeres);
+
+#endif
+
void *
__libc_malloc (size_t bytes)
{
@@ -2884,6 +2959,33 @@ __libc_malloc (size_t bytes)
= atomic_forced_read (__malloc_hook);
if (__builtin_expect (hook != NULL, 0))
return (*hook)(bytes, RETURN_ADDRESS (0));
+#if USE_TCACHE
+ /* int_free also calls request2size, be careful to not pad twice. */
+ size_t tbytes = request2size(bytes);
+ size_t tc_idx = size2tidx (tbytes);
+
+ if (tcache.initted == 0)
+ {
+ tcache.initted = 1;
+ __libc_lock_lock (tcache_mutex);
+ tcache.next = tcache_list;
+ if (tcache.next)
+ tcache.next->prev = &tcache;
+ tcache_list = &tcache;
+ __libc_lock_unlock (tcache_mutex);
+ }
+
+ if (tc_idx < mp_.tcache_max
+ && tc_idx < TCACHE_IDX /* to appease gcc */
+ && tcache.entries[tc_idx] != NULL
+ && tcache.initted == 1)
+ {
+ TCacheEntry *e = tcache.entries[tc_idx];
+ tcache.entries[tc_idx] = e->next;
+ tcache.counts[tc_idx] --;
+ return (void *) e;
+ }
+#endif
arena_get (ar_ptr, bytes);
@@ -3387,6 +3489,38 @@ _int_malloc (mstate av, size_t bytes)
return NULL;
}
check_remalloced_chunk (av, victim, nb);
+#if USE_TCACHE
+ /* While we're here, if we see other chunk of the same size,
+ stash them in the tcache. */
+ size_t tc_idx = size2tidx (nb-SIZE_SZ);
+ if (tc_idx < mp_.tcache_max)
+ {
+ mchunkptr tc_victim;
+ int found = 0;
+
+ /* While bin not empty and tcache not full, copy chunks over. */
+ while (tcache.counts[tc_idx] < mp_.tcache_count
+ && (pp = *fb) != NULL)
+ {
+ do
+ {
+ tc_victim = pp;
+ if (tc_victim == NULL)
+ break;
+ }
+ while ((pp = catomic_compare_and_exchange_val_acq (fb, tc_victim->fd, tc_victim))
+ != tc_victim);
+ if (tc_victim != 0)
+ {
+ TCacheEntry *e = (TCacheEntry *) chunk2mem(tc_victim);
+ e->next = tcache.entries[tc_idx];
+ tcache.entries[tc_idx] = e;
+ tcache.counts[tc_idx] ++;
+ found ++;
+ }
+ }
+ }
+#endif
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
@@ -3425,6 +3559,40 @@ _int_malloc (mstate av, size_t bytes)
if (av != &main_arena)
set_non_main_arena (victim);
check_malloced_chunk (av, victim, nb);
+#if USE_TCACHE
+ /* While we're here, if we see other chunk of the same size,
+ stash them in the tcache. */
+ size_t tc_idx = size2tidx (nb-SIZE_SZ);
+ if (tc_idx < mp_.tcache_max)
+ {
+ mchunkptr tc_victim;
+ int found = 0;
+
+ /* While bin not empty and tcache not full, copy chunks over. */
+ while (tcache.counts[tc_idx] < mp_.tcache_count
+ && (tc_victim = last(bin)) != bin)
+ {
+ if (tc_victim != 0)
+ {
+ bck = tc_victim->bk;
+ set_inuse_bit_at_offset (tc_victim, nb);
+ if (av != &main_arena)
+ set_non_main_arena (tc_victim);
+ bin->bk = bck;
+ bck->fd = bin;
+
+ TCacheEntry *e = (TCacheEntry *) chunk2mem(tc_victim);
+ e->next = tcache.entries[tc_idx];
+ tcache.entries[tc_idx] = e;
+ tcache.counts[tc_idx] ++;
+ found ++;
+ //_m_printf("snarf chunk %p %lx %p %lx\n", tc_victim, nb,
+ // chunk_at_offset(tc_victim, nb), chunk_at_offset(tc_victim, nb)->size);
+ }
+ }
+ //_m_printf("%d chunks found in smallbin\n", found);
+ }
+#endif
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
@@ -3463,6 +3631,14 @@ _int_malloc (mstate av, size_t bytes)
otherwise need to expand memory to service a "small" request.
*/
+#if USE_TCACHE
+ INTERNAL_SIZE_T tcache_nb = 0;
+ if (size2tidx (nb-SIZE_SZ) <= mp_.tcache_max)
+ tcache_nb = nb;
+ size_t tc_idx = size2tidx (nb-SIZE_SZ);
+ int return_cached = 0;
+#endif
+
for (;; )
{
int iters = 0;
@@ -3523,10 +3699,29 @@ _int_malloc (mstate av, size_t bytes)
set_inuse_bit_at_offset (victim, size);
if (av != &main_arena)
set_non_main_arena (victim);
+#if USE_TCACHE
+ /* Fill cache first, return to user only if cache fills.
+ We may return one of these chunks later. */
+ if (tcache_nb
+ && tcache.counts[tc_idx] < mp_.tcache_count)
+ {
+ TCacheEntry *e = (TCacheEntry *) chunk2mem(victim);
+ e->next = tcache.entries[tc_idx];
+ tcache.entries[tc_idx] = e;
+ tcache.counts[tc_idx] ++;
+ return_cached = 1;
+ continue;
+ }
+ else
+ {
+#endif
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
+#if USE_TCACHE
+ }
+#endif
}
/* place chunk in bin */
@@ -3598,6 +3793,17 @@ _int_malloc (mstate av, size_t bytes)
break;
}
+#if USE_TCACHE
+ /* If all the small chunks we found ended up cached, return one now. */
+ if (return_cached)
+ {
+ TCacheEntry *e = tcache.entries[tc_idx];
+ tcache.entries[tc_idx] = e->next;
+ tcache.counts[tc_idx] --;
+ return (void *) e;
+ }
+#endif
+
/*
If a large request, scan through the chunks of current bin in
sorted order to find smallest that fits. Use the skip list for this.
@@ -3883,6 +4089,23 @@ _int_free (mstate av, mchunkptr p, int have_lock)
check_inuse_chunk(av, p);
+#if USE_TCACHE
+ {
+ size_t tc_idx = size2tidx (size - SIZE_SZ);
+
+ if (tc_idx < mp_.tcache_max
+ && tcache.counts[tc_idx] < mp_.tcache_count
+ && tcache.initted == 1)
+ {
+ TCacheEntry *e = (TCacheEntry *) chunk2mem (p);
+ e->next = tcache.entries[tc_idx];
+ tcache.entries[tc_idx] = e;
+ tcache.counts[tc_idx] ++;
+ return;
+ }
+ }
+#endif
+
/*
If eligible, place chunk on a fastbin so it can be found
and used quickly in malloc.
@@ -4904,6 +5127,26 @@ __libc_mallopt (int param_number, int value)
if (value > 0)
do_set_arena_test (value);
break;
+#if USE_TCACHE
+ case M_TCACHE_COUNT:
+ if (value >= 0)
+ {
+ LIBC_PROBE (memory_mallopt_tcache_count, 2, value, mp_.tcache_count);
+ mp_.tcache_count = value;
+ }
+ break;
+ case M_TCACHE_MAX:
+ if (value >= 0)
+ {
+ value = size2tidx (value);
+ if (value < TCACHE_IDX)
+ {
+ LIBC_PROBE (memory_mallopt_tcache_max, 2, value, mp_.tcache_max);
+ mp_.tcache_max = value;
+ }
+ }
+ break;
+#endif
}
__libc_lock_unlock (av->mutex);
return res;