aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--malloc/Makefile3
-rw-r--r--malloc/malloc.c260
-rw-r--r--malloc/tst-memalign-2.c155
3 files changed, 390 insertions, 28 deletions
diff --git a/malloc/Makefile b/malloc/Makefile
index 0717df6..f496758 100644
--- a/malloc/Makefile
+++ b/malloc/Makefile
@@ -43,6 +43,7 @@ tests := mallocbug tst-malloc tst-valloc tst-calloc tst-obstack \
tst-tcfree1 tst-tcfree2 tst-tcfree3 \
tst-safe-linking \
tst-mallocalign1 \
+ tst-memalign-2
tests-static := \
tst-interpose-static-nothread \
@@ -70,7 +71,7 @@ test-srcs = tst-mtrace
# with MALLOC_CHECK_=3 because they expect a specific failure.
tests-exclude-malloc-check = tst-malloc-check tst-malloc-usable \
tst-mxfast tst-safe-linking \
- tst-compathooks-off tst-compathooks-on
+ tst-compathooks-off tst-compathooks-on tst-memalign-2
# Run all tests with MALLOC_CHECK_=3
tests-malloc-check = $(filter-out $(tests-exclude-malloc-check) \
diff --git a/malloc/malloc.c b/malloc/malloc.c
index 05e65a2..0315ac5 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -3156,19 +3156,44 @@ tcache_put (mchunkptr chunk, size_t tc_idx)
}
/* Caller must ensure that we know tc_idx is valid and there's
- available chunks to remove. */
+ available chunks to remove. Removes chunk from the middle of the
+ list. */
static __always_inline void *
-tcache_get (size_t tc_idx)
+tcache_get_n (size_t tc_idx, tcache_entry **ep)
{
- tcache_entry *e = tcache->entries[tc_idx];
+ tcache_entry *e;
+ if (ep == &(tcache->entries[tc_idx]))
+ e = *ep;
+ else
+ e = REVEAL_PTR (*ep);
+
if (__glibc_unlikely (!aligned_OK (e)))
malloc_printerr ("malloc(): unaligned tcache chunk detected");
- tcache->entries[tc_idx] = REVEAL_PTR (e->next);
+
+ if (ep == &(tcache->entries[tc_idx]))
+ *ep = REVEAL_PTR (e->next);
+ else
+ *ep = PROTECT_PTR (ep, REVEAL_PTR (e->next));
+
--(tcache->counts[tc_idx]);
e->key = 0;
return (void *) e;
}
+/* Like the above, but removes from the head of the list. */
+static __always_inline void *
+tcache_get (size_t tc_idx)
+{
+ return tcache_get_n (tc_idx, & tcache->entries[tc_idx]);
+}
+
+/* Iterates through the tcache linked list. */
+static __always_inline tcache_entry *
+tcache_next (tcache_entry *e)
+{
+ return (tcache_entry *) REVEAL_PTR (e->next);
+}
+
static void
tcache_thread_shutdown (void)
{
@@ -3277,7 +3302,7 @@ __libc_malloc (size_t bytes)
DIAG_PUSH_NEEDS_COMMENT;
if (tc_idx < mp_.tcache_bins
- && tcache
+ && tcache != NULL
&& tcache->counts[tc_idx] > 0)
{
victim = tcache_get (tc_idx);
@@ -3536,6 +3561,38 @@ _mid_memalign (size_t alignment, size_t bytes, void *address)
alignment = a;
}
+#if USE_TCACHE
+ {
+ size_t tbytes;
+ tbytes = checked_request2size (bytes);
+ if (tbytes == 0)
+ {
+ __set_errno (ENOMEM);
+ return NULL;
+ }
+ size_t tc_idx = csize2tidx (tbytes);
+
+ if (tc_idx < mp_.tcache_bins
+ && tcache != NULL
+ && tcache->counts[tc_idx] > 0)
+ {
+ /* The tcache itself isn't encoded, but the chain is. */
+ tcache_entry **tep = & tcache->entries[tc_idx];
+ tcache_entry *te = *tep;
+ while (te != NULL && !PTR_IS_ALIGNED (te, alignment))
+ {
+ tep = & (te->next);
+ te = tcache_next (te);
+ }
+ if (te != NULL)
+ {
+ void *victim = tcache_get_n (tc_idx, tep);
+ return tag_new_usable (victim);
+ }
+ }
+ }
+#endif
+
if (SINGLE_THREAD_P)
{
p = _int_memalign (&main_arena, alignment, bytes);
@@ -3841,7 +3898,7 @@ _int_malloc (mstate av, size_t bytes)
/* While we're here, if we see other chunks of the same size,
stash them in the tcache. */
size_t tc_idx = csize2tidx (nb);
- if (tcache && tc_idx < mp_.tcache_bins)
+ if (tcache != NULL && tc_idx < mp_.tcache_bins)
{
mchunkptr tc_victim;
@@ -3899,7 +3956,7 @@ _int_malloc (mstate av, size_t bytes)
/* While we're here, if we see other chunks of the same size,
stash them in the tcache. */
size_t tc_idx = csize2tidx (nb);
- if (tcache && tc_idx < mp_.tcache_bins)
+ if (tcache != NULL && tc_idx < mp_.tcache_bins)
{
mchunkptr tc_victim;
@@ -3961,7 +4018,7 @@ _int_malloc (mstate av, size_t bytes)
#if USE_TCACHE
INTERNAL_SIZE_T tcache_nb = 0;
size_t tc_idx = csize2tidx (nb);
- if (tcache && tc_idx < mp_.tcache_bins)
+ if (tcache != NULL && tc_idx < mp_.tcache_bins)
tcache_nb = nb;
int return_cached = 0;
@@ -4041,7 +4098,7 @@ _int_malloc (mstate av, size_t bytes)
#if USE_TCACHE
/* Fill cache first, return to user only if cache fills.
We may return one of these chunks later. */
- if (tcache_nb
+ if (tcache_nb > 0
&& tcache->counts[tc_idx] < mp_.tcache_count)
{
tcache_put (victim, tc_idx);
@@ -4915,6 +4972,43 @@ _int_realloc (mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize,
------------------------------ memalign ------------------------------
*/
+/* Returns 0 if the chunk is not and does not contain the requested
+ aligned sub-chunk, else returns the amount of "waste" from
+ trimming. BYTES is the *user* byte size, not the chunk byte
+ size. */
+static size_t
+chunk_ok_for_memalign (mchunkptr p, size_t alignment, size_t bytes)
+{
+ void *m = chunk2mem (p);
+ INTERNAL_SIZE_T size = memsize (p);
+ void *aligned_m = m;
+
+ if (__glibc_unlikely (misaligned_chunk (p)))
+ malloc_printerr ("_int_memalign(): unaligned chunk detected");
+
+ aligned_m = PTR_ALIGN_UP (m, alignment);
+
+ INTERNAL_SIZE_T front_extra = (intptr_t) aligned_m - (intptr_t) m;
+
+ /* We can't trim off the front as it's too small. */
+ if (front_extra > 0 && front_extra < MINSIZE)
+ return 0;
+
+ /* If it's a perfect fit, it's an exception to the return value rule
+ (we would return zero waste, which looks like "not usable"), so
+ handle it here by returning a small non-zero value instead. */
+ if (size == bytes && front_extra == 0)
+ return 1;
+
+ /* If the block we need fits in the chunk, calculate total waste. */
+ if (size > bytes + front_extra)
+ return size - bytes;
+
+ /* Can't use this chunk. */
+ return 0;
+}
+
+/* BYTES is user requested bytes, not requested chunksize bytes. */
static void *
_int_memalign (mstate av, size_t alignment, size_t bytes)
{
@@ -4928,8 +5022,7 @@ _int_memalign (mstate av, size_t alignment, size_t bytes)
mchunkptr remainder; /* spare room at end to split off */
unsigned long remainder_size; /* its size */
INTERNAL_SIZE_T size;
-
-
+ mchunkptr victim;
nb = checked_request2size (bytes);
if (nb == 0)
@@ -4938,29 +5031,142 @@ _int_memalign (mstate av, size_t alignment, size_t bytes)
return NULL;
}
- /*
- Strategy: find a spot within that chunk that meets the alignment
+ /* We can't check tcache here because we hold the arena lock, which
+ tcache doesn't expect. We expect it has been checked
+ earlier. */
+
+ /* Strategy: search the bins looking for an existing block that
+ meets our needs. We scan a range of bins from "exact size" to
+ "just under 2x", spanning the small/large barrier if needed. If
+ we don't find anything in those bins, the common malloc code will
+ scan starting at 2x. */
+
+ /* This will be set if we found a candidate chunk. */
+ victim = NULL;
+
+ /* Fast bins are singly-linked, hard to remove a chunk from the middle
+ and unlikely to meet our alignment requirements. We have not done
+ any experimentation with searching for aligned fastbins. */
+
+ int first_bin_index;
+ int first_largebin_index;
+ int last_bin_index;
+
+ if (in_smallbin_range (nb))
+ first_bin_index = smallbin_index (nb);
+ else
+ first_bin_index = largebin_index (nb);
+
+ if (in_smallbin_range (nb * 2))
+ last_bin_index = smallbin_index (nb * 2);
+ else
+ last_bin_index = largebin_index (nb * 2);
+
+ first_largebin_index = largebin_index (MIN_LARGE_SIZE);
+
+ int victim_index; /* its bin index */
+
+ for (victim_index = first_bin_index;
+ victim_index < last_bin_index;
+ victim_index ++)
+ {
+ victim = NULL;
+
+ if (victim_index < first_largebin_index)
+ {
+ /* Check small bins. Small bin chunks are doubly-linked despite
+ being the same size. */
+
+ mchunkptr fwd; /* misc temp for linking */
+ mchunkptr bck; /* misc temp for linking */
+
+ bck = bin_at (av, victim_index);
+ fwd = bck->fd;
+ while (fwd != bck)
+ {
+ if (chunk_ok_for_memalign (fwd, alignment, bytes) > 0)
+ {
+ victim = fwd;
+
+ /* Unlink it */
+ victim->fd->bk = victim->bk;
+ victim->bk->fd = victim->fd;
+ break;
+ }
+
+ fwd = fwd->fd;
+ }
+ }
+ else
+ {
+ /* Check large bins. */
+ mchunkptr fwd; /* misc temp for linking */
+ mchunkptr bck; /* misc temp for linking */
+ mchunkptr best = NULL;
+ size_t best_size = 0;
+
+ bck = bin_at (av, victim_index);
+ fwd = bck->fd;
+
+ while (fwd != bck)
+ {
+ int extra;
+
+ if (chunksize (fwd) < nb)
+ break;
+ extra = chunk_ok_for_memalign (fwd, alignment, bytes);
+ if (extra > 0
+ && (extra <= best_size || best == NULL))
+ {
+ best = fwd;
+ best_size = extra;
+ }
+
+ fwd = fwd->fd;
+ }
+ victim = best;
+
+ if (victim != NULL)
+ {
+ unlink_chunk (av, victim);
+ break;
+ }
+ }
+
+ if (victim != NULL)
+ break;
+ }
+
+ /* Strategy: find a spot within that chunk that meets the alignment
request, and then possibly free the leading and trailing space.
- */
+ This strategy is incredibly costly and can lead to external
+ fragmentation if header and footer chunks are unused. */
- /* Call malloc with worst case padding to hit alignment. */
+ if (victim != NULL)
+ {
+ p = victim;
+ m = chunk2mem (p);
+ set_inuse (p);
+ }
+ else
+ {
+ /* Call malloc with worst case padding to hit alignment. */
- m = (char *) (_int_malloc (av, nb + alignment + MINSIZE));
+ m = (char *) (_int_malloc (av, nb + alignment + MINSIZE));
- if (m == 0)
- return 0; /* propagate failure */
+ if (m == 0)
+ return 0; /* propagate failure */
- p = mem2chunk (m);
+ p = mem2chunk (m);
+ }
if ((((unsigned long) (m)) % alignment) != 0) /* misaligned */
-
- { /*
- Find an aligned spot inside chunk. Since we need to give back
- leading space in a chunk of at least MINSIZE, if the first
- calculation places us at a spot with less than MINSIZE leader,
- we can move to the next aligned spot -- we've allocated enough
- total room so that this is always possible.
- */
+ {
+ /* Find an aligned spot inside chunk. Since we need to give back
+ leading space in a chunk of at least MINSIZE, if the first
+ calculation places us at a spot with less than MINSIZE leader,
+ we can move to the next aligned spot -- we've allocated enough
+ total room so that this is always possible. */
brk = (char *) mem2chunk (((unsigned long) (m + alignment - 1)) &
- ((signed long) alignment));
if ((unsigned long) (brk - (char *) (p)) < MINSIZE)
diff --git a/malloc/tst-memalign-2.c b/malloc/tst-memalign-2.c
new file mode 100644
index 0000000..4996578
--- /dev/null
+++ b/malloc/tst-memalign-2.c
@@ -0,0 +1,155 @@
+/* Test for memalign chunk reuse.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <malloc.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <array_length.h>
+#include <libc-pointer-arith.h>
+#include <support/check.h>
+
+typedef struct TestCase {
+ size_t size;
+ size_t alignment;
+ void *ptr1;
+ void *ptr2;
+} TestCase;
+
+static TestCase tcache_allocs[] = {
+ { 24, 8, NULL, NULL },
+ { 24, 16, NULL, NULL },
+ { 128, 32, NULL, NULL }
+};
+#define TN array_length (tcache_allocs)
+
+static TestCase large_allocs[] = {
+ { 23450, 64, NULL, NULL },
+ { 23450, 64, NULL, NULL },
+ { 23550, 64, NULL, NULL },
+ { 23550, 64, NULL, NULL },
+ { 23650, 64, NULL, NULL },
+ { 23650, 64, NULL, NULL },
+ { 33650, 64, NULL, NULL },
+ { 33650, 64, NULL, NULL }
+};
+#define LN array_length (large_allocs)
+
+void *p;
+
+/* Sanity checks, ancillary to the actual test. */
+#define CHECK(p,a) \
+ if (p == NULL || !PTR_IS_ALIGNED (p, a)) \
+ FAIL_EXIT1 ("NULL or misaligned memory detected.\n");
+
+static int
+do_test (void)
+{
+ int i, j;
+ int count;
+ void *ptr[10];
+ void *p;
+
+ /* TCache test. */
+
+ for (i = 0; i < TN; ++ i)
+ {
+ tcache_allocs[i].ptr1 = memalign (tcache_allocs[i].alignment, tcache_allocs[i].size);
+ CHECK (tcache_allocs[i].ptr1, tcache_allocs[i].alignment);
+ free (tcache_allocs[i].ptr1);
+ /* This should return the same chunk as was just free'd. */
+ tcache_allocs[i].ptr2 = memalign (tcache_allocs[i].alignment, tcache_allocs[i].size);
+ CHECK (tcache_allocs[i].ptr2, tcache_allocs[i].alignment);
+ free (tcache_allocs[i].ptr2);
+
+ TEST_VERIFY (tcache_allocs[i].ptr1 == tcache_allocs[i].ptr2);
+ }
+
+ /* Test for non-head tcache hits. */
+ for (i = 0; i < array_length (ptr); ++ i)
+ {
+ if (i == 4)
+ {
+ ptr[i] = memalign (64, 256);
+ CHECK (ptr[i], 64);
+ }
+ else
+ {
+ ptr[i] = malloc (256);
+ CHECK (ptr[i], 4);
+ }
+ }
+ for (i = 0; i < array_length (ptr); ++ i)
+ free (ptr[i]);
+
+ p = memalign (64, 256);
+ CHECK (p, 64);
+
+ count = 0;
+ for (i = 0; i < 10; ++ i)
+ if (ptr[i] == p)
+ ++ count;
+ free (p);
+ TEST_VERIFY (count > 0);
+
+ /* Large bins test. */
+
+ for (i = 0; i < LN; ++ i)
+ {
+ large_allocs[i].ptr1 = memalign (large_allocs[i].alignment, large_allocs[i].size);
+ CHECK (large_allocs[i].ptr1, large_allocs[i].alignment);
+ /* Keep chunks from combining by fragmenting the heap. */
+ p = malloc (512);
+ CHECK (p, 4);
+ }
+
+ for (i = 0; i < LN; ++ i)
+ free (large_allocs[i].ptr1);
+
+ /* Force the unsorted bins to be scanned and moved to small/large
+ bins. */
+ p = malloc (60000);
+
+ for (i = 0; i < LN; ++ i)
+ {
+ large_allocs[i].ptr2 = memalign (large_allocs[i].alignment, large_allocs[i].size);
+ CHECK (large_allocs[i].ptr2, large_allocs[i].alignment);
+ }
+
+ count = 0;
+ for (i = 0; i < LN; ++ i)
+ {
+ int ok = 0;
+ for (j = 0; j < LN; ++ j)
+ if (large_allocs[i].ptr1 == large_allocs[j].ptr2)
+ ok = 1;
+ if (ok == 1)
+ count ++;
+ }
+
+ /* The allocation algorithm is complicated outside of the memalign
+ logic, so just make sure it's working for most of the
+ allocations. This avoids possible boundary conditions with
+ empty/full heaps. */
+ TEST_VERIFY (count > LN / 2);
+
+ return 0;
+}
+
+#include <support/test-driver.c>