diff options
author | Tobias Burnus <tobias@codesourcery.com> | 2023-07-12 13:50:21 +0200 |
---|---|---|
committer | Tobias Burnus <tobias@codesourcery.com> | 2023-07-12 13:50:21 +0200 |
commit | 450b05ce54d3f08c583c3b5341233ce0df99725b (patch) | |
tree | 9b2a66884505fc73d6ca4ca4de6ab52232749b37 /libgomp/allocator.c | |
parent | f9182da3213aa57c16dd0b52862126de4a259f6a (diff) | |
download | gcc-450b05ce54d3f08c583c3b5341233ce0df99725b.zip gcc-450b05ce54d3f08c583c3b5341233ce0df99725b.tar.gz gcc-450b05ce54d3f08c583c3b5341233ce0df99725b.tar.bz2 |
libgomp: Use libnuma for OpenMP's partition=nearest allocation trait
As with the memkind library, it is only used when found at runtime;
it does not need to be present when building GCC.
The included testcase does not check whether the memory has been placed
on the nearest node as the Linux kernel memory handling too often ignores
that hint, using a different node for the allocation. However, when
running with 'numactl --preferred=<node> ./executable', it is clearly
visible that the feature works by comparing malloc/default vs. nearest
placement (using get_mempolicy to obtain the node for a mem addr).
libgomp/ChangeLog:
* allocator.c: Add ifdef for LIBGOMP_USE_LIBNUMA.
(enum gomp_numa_memkind_kind): Renamed from gomp_memkind_kind;
add GOMP_MEMKIND_LIBNUMA.
(struct gomp_libnuma_data, gomp_init_libnuma, gomp_get_libnuma): New.
(omp_init_allocator): Handle partition=nearest with libnuma if avail.
(omp_aligned_alloc, omp_free, omp_aligned_calloc, omp_realloc): Add
numa_alloc_local (+ memset), numa_free, and numa_realloc calls as
needed.
* config/linux/allocator.c (LIBGOMP_USE_LIBNUMA): Define
* libgomp.texi: Fix a typo; use 'fi' instead of its ligature char.
(Memory allocation): Renamed from 'Memory allocation with libmemkind';
updated for libnuma usage.
* testsuite/libgomp.c-c++-common/alloc-11.c: New test.
* testsuite/libgomp.c-c++-common/alloc-12.c: New test.
Diffstat (limited to 'libgomp/allocator.c')
-rw-r--r-- | libgomp/allocator.c | 202 |
1 files changed, 174 insertions, 28 deletions
diff --git a/libgomp/allocator.c b/libgomp/allocator.c index 25c0f15..b3187ab 100644 --- a/libgomp/allocator.c +++ b/libgomp/allocator.c @@ -31,13 +31,13 @@ #include "libgomp.h" #include <stdlib.h> #include <string.h> -#ifdef LIBGOMP_USE_MEMKIND +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) #include <dlfcn.h> #endif #define omp_max_predefined_alloc omp_thread_mem_alloc -enum gomp_memkind_kind +enum gomp_numa_memkind_kind { GOMP_MEMKIND_NONE = 0, #define GOMP_MEMKIND_KINDS \ @@ -50,7 +50,8 @@ enum gomp_memkind_kind #define GOMP_MEMKIND_KIND(kind) GOMP_MEMKIND_##kind GOMP_MEMKIND_KINDS, #undef GOMP_MEMKIND_KIND - GOMP_MEMKIND_COUNT + GOMP_MEMKIND_COUNT, + GOMP_MEMKIND_LIBNUMA = GOMP_MEMKIND_COUNT }; struct omp_allocator_data @@ -65,7 +66,7 @@ struct omp_allocator_data unsigned int fallback : 8; unsigned int pinned : 1; unsigned int partition : 7; -#ifdef LIBGOMP_USE_MEMKIND +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) unsigned int memkind : 8; #endif #ifndef HAVE_SYNC_BUILTINS @@ -81,6 +82,14 @@ struct omp_mem_header void *pad; }; +struct gomp_libnuma_data +{ + void *numa_handle; + void *(*numa_alloc_local) (size_t); + void *(*numa_realloc) (void *, size_t, size_t); + void (*numa_free) (void *, size_t); +}; + struct gomp_memkind_data { void *memkind_handle; @@ -92,6 +101,50 @@ struct gomp_memkind_data void **kinds[GOMP_MEMKIND_COUNT]; }; +#ifdef LIBGOMP_USE_LIBNUMA +static struct gomp_libnuma_data *libnuma_data; +static pthread_once_t libnuma_data_once = PTHREAD_ONCE_INIT; + +static void +gomp_init_libnuma (void) +{ + void *handle = dlopen ("libnuma.so.1", RTLD_LAZY); + struct gomp_libnuma_data *data; + + data = calloc (1, sizeof (struct gomp_libnuma_data)); + if (data == NULL) + { + if (handle) + dlclose (handle); + return; + } + if (!handle) + { + __atomic_store_n (&libnuma_data, data, MEMMODEL_RELEASE); + return; + } + data->numa_handle = handle; + data->numa_alloc_local + = (__typeof (data->numa_alloc_local)) dlsym (handle, "numa_alloc_local"); + data->numa_realloc + = (__typeof (data->numa_realloc)) dlsym (handle, "numa_realloc"); + data->numa_free + = (__typeof (data->numa_free)) dlsym (handle, "numa_free"); + __atomic_store_n (&libnuma_data, data, MEMMODEL_RELEASE); +} + +static struct gomp_libnuma_data * +gomp_get_libnuma (void) +{ + struct gomp_libnuma_data *data + = __atomic_load_n (&libnuma_data, MEMMODEL_ACQUIRE); + if (data) + return data; + pthread_once (&libnuma_data_once, gomp_init_libnuma); + return __atomic_load_n (&libnuma_data, MEMMODEL_ACQUIRE); +} +#endif + #ifdef LIBGOMP_USE_MEMKIND static struct gomp_memkind_data *memkind_data; static pthread_once_t memkind_data_once = PTHREAD_ONCE_INIT; @@ -166,7 +219,7 @@ omp_init_allocator (omp_memspace_handle_t memspace, int ntraits, struct omp_allocator_data data = { memspace, 1, ~(uintptr_t) 0, 0, 0, omp_atv_contended, omp_atv_all, omp_atv_default_mem_fb, omp_atv_false, omp_atv_environment, -#ifdef LIBGOMP_USE_MEMKIND +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) GOMP_MEMKIND_NONE #endif }; @@ -285,8 +338,8 @@ omp_init_allocator (omp_memspace_handle_t memspace, int ntraits, switch (memspace) { - case omp_high_bw_mem_space: #ifdef LIBGOMP_USE_MEMKIND + case omp_high_bw_mem_space: struct gomp_memkind_data *memkind_data; memkind_data = gomp_get_memkind (); if (data.partition == omp_atv_interleaved @@ -300,17 +353,15 @@ omp_init_allocator (omp_memspace_handle_t memspace, int ntraits, data.memkind = GOMP_MEMKIND_HBW_PREFERRED; break; } -#endif break; case omp_large_cap_mem_space: -#ifdef LIBGOMP_USE_MEMKIND memkind_data = gomp_get_memkind (); if (memkind_data->kinds[GOMP_MEMKIND_DAX_KMEM_ALL]) data.memkind = GOMP_MEMKIND_DAX_KMEM_ALL; else if (memkind_data->kinds[GOMP_MEMKIND_DAX_KMEM]) data.memkind = GOMP_MEMKIND_DAX_KMEM; -#endif break; +#endif default: #ifdef LIBGOMP_USE_MEMKIND if (data.partition == omp_atv_interleaved) @@ -323,6 +374,14 @@ omp_init_allocator (omp_memspace_handle_t memspace, int ntraits, break; } +#ifdef LIBGOMP_USE_LIBNUMA + if (data.memkind == GOMP_MEMKIND_NONE && data.partition == omp_atv_nearest) + { + data.memkind = GOMP_MEMKIND_LIBNUMA; + libnuma_data = gomp_get_libnuma (); + } +#endif + /* No support for this so far. */ if (data.pinned) return omp_null_allocator; @@ -357,8 +416,8 @@ omp_aligned_alloc (size_t alignment, size_t size, struct omp_allocator_data *allocator_data; size_t new_size, new_alignment; void *ptr, *ret; -#ifdef LIBGOMP_USE_MEMKIND - enum gomp_memkind_kind memkind; +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) + enum gomp_numa_memkind_kind memkind; #endif if (__builtin_expect (size == 0, 0)) @@ -379,7 +438,7 @@ retry: allocator_data = (struct omp_allocator_data *) allocator; if (new_alignment < allocator_data->alignment) new_alignment = allocator_data->alignment; -#ifdef LIBGOMP_USE_MEMKIND +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) memkind = allocator_data->memkind; #endif } @@ -388,8 +447,10 @@ retry: allocator_data = NULL; if (new_alignment < sizeof (void *)) new_alignment = sizeof (void *); -#ifdef LIBGOMP_USE_MEMKIND +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) memkind = GOMP_MEMKIND_NONE; +#endif +#ifdef LIBGOMP_USE_MEMKIND if (allocator == omp_high_bw_mem_alloc) memkind = GOMP_MEMKIND_HBW_PREFERRED; else if (allocator == omp_large_cap_mem_alloc) @@ -444,6 +505,13 @@ retry: allocator_data->used_pool_size = used_pool_size; gomp_mutex_unlock (&allocator_data->lock); #endif +#ifdef LIBGOMP_USE_LIBNUMA + if (memkind == GOMP_MEMKIND_LIBNUMA) + ptr = libnuma_data->numa_alloc_local (new_size); +# ifdef LIBGOMP_USE_MEMKIND + else +# endif +#endif #ifdef LIBGOMP_USE_MEMKIND if (memkind) { @@ -469,6 +537,13 @@ retry: } else { +#ifdef LIBGOMP_USE_LIBNUMA + if (memkind == GOMP_MEMKIND_LIBNUMA) + ptr = libnuma_data->numa_alloc_local (new_size); +# ifdef LIBGOMP_USE_MEMKIND + else +# endif +#endif #ifdef LIBGOMP_USE_MEMKIND if (memkind) { @@ -502,7 +577,7 @@ fail: { case omp_atv_default_mem_fb: if ((new_alignment > sizeof (void *) && new_alignment > alignment) -#ifdef LIBGOMP_USE_MEMKIND +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) || memkind #endif || (allocator_data @@ -577,6 +652,16 @@ omp_free (void *ptr, omp_allocator_handle_t allocator) gomp_mutex_unlock (&allocator_data->lock); #endif } +#ifdef LIBGOMP_USE_LIBNUMA + if (allocator_data->memkind == GOMP_MEMKIND_LIBNUMA) + { + libnuma_data->numa_free (data->ptr, data->size); + return; + } +# ifdef LIBGOMP_USE_MEMKIND + else +# endif +#endif #ifdef LIBGOMP_USE_MEMKIND if (allocator_data->memkind) { @@ -590,7 +675,7 @@ omp_free (void *ptr, omp_allocator_handle_t allocator) #ifdef LIBGOMP_USE_MEMKIND else { - enum gomp_memkind_kind memkind = GOMP_MEMKIND_NONE; + enum gomp_numa_memkind_kind memkind = GOMP_MEMKIND_NONE; if (data->allocator == omp_high_bw_mem_alloc) memkind = GOMP_MEMKIND_HBW_PREFERRED; else if (data->allocator == omp_large_cap_mem_alloc) @@ -625,8 +710,8 @@ omp_aligned_calloc (size_t alignment, size_t nmemb, size_t size, struct omp_allocator_data *allocator_data; size_t new_size, size_temp, new_alignment; void *ptr, *ret; -#ifdef LIBGOMP_USE_MEMKIND - enum gomp_memkind_kind memkind; +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) + enum gomp_numa_memkind_kind memkind; #endif if (__builtin_expect (size == 0 || nmemb == 0, 0)) @@ -647,7 +732,7 @@ retry: allocator_data = (struct omp_allocator_data *) allocator; if (new_alignment < allocator_data->alignment) new_alignment = allocator_data->alignment; -#ifdef LIBGOMP_USE_MEMKIND +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) memkind = allocator_data->memkind; #endif } @@ -656,8 +741,10 @@ retry: allocator_data = NULL; if (new_alignment < sizeof (void *)) new_alignment = sizeof (void *); -#ifdef LIBGOMP_USE_MEMKIND +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) memkind = GOMP_MEMKIND_NONE; +#endif +#ifdef LIBGOMP_USE_MEMKIND if (allocator == omp_high_bw_mem_alloc) memkind = GOMP_MEMKIND_HBW_PREFERRED; else if (allocator == omp_large_cap_mem_alloc) @@ -714,6 +801,15 @@ retry: allocator_data->used_pool_size = used_pool_size; gomp_mutex_unlock (&allocator_data->lock); #endif +#ifdef LIBGOMP_USE_LIBNUMA + if (memkind == GOMP_MEMKIND_LIBNUMA) + /* numa_alloc_local uses mmap with MAP_ANONYMOUS, returning + memory that is initialized to zero. */ + ptr = libnuma_data->numa_alloc_local (new_size); +# ifdef LIBGOMP_USE_MEMKIND + else +# endif +#endif #ifdef LIBGOMP_USE_MEMKIND if (memkind) { @@ -739,6 +835,15 @@ retry: } else { +#ifdef LIBGOMP_USE_LIBNUMA + if (memkind == GOMP_MEMKIND_LIBNUMA) + /* numa_alloc_local uses mmap with MAP_ANONYMOUS, returning + memory that is initialized to zero. */ + ptr = libnuma_data->numa_alloc_local (new_size); +# ifdef LIBGOMP_USE_MEMKIND + else +# endif +#endif #ifdef LIBGOMP_USE_MEMKIND if (memkind) { @@ -772,7 +877,7 @@ fail: { case omp_atv_default_mem_fb: if ((new_alignment > sizeof (void *) && new_alignment > alignment) -#ifdef LIBGOMP_USE_MEMKIND +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) || memkind #endif || (allocator_data @@ -815,8 +920,8 @@ omp_realloc (void *ptr, size_t size, omp_allocator_handle_t allocator, size_t new_size, old_size, new_alignment, old_alignment; void *new_ptr, *ret; struct omp_mem_header *data; -#ifdef LIBGOMP_USE_MEMKIND - enum gomp_memkind_kind memkind, free_memkind; +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) + enum gomp_numa_memkind_kind memkind, free_memkind; #endif if (__builtin_expect (ptr == NULL, 0)) @@ -841,15 +946,17 @@ retry: allocator_data = (struct omp_allocator_data *) allocator; if (new_alignment < allocator_data->alignment) new_alignment = allocator_data->alignment; -#ifdef LIBGOMP_USE_MEMKIND +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) memkind = allocator_data->memkind; #endif } else { allocator_data = NULL; -#ifdef LIBGOMP_USE_MEMKIND +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) memkind = GOMP_MEMKIND_NONE; +#endif +#ifdef LIBGOMP_USE_MEMKIND if (allocator == omp_high_bw_mem_alloc) memkind = GOMP_MEMKIND_HBW_PREFERRED; else if (allocator == omp_large_cap_mem_alloc) @@ -865,15 +972,17 @@ retry: if (free_allocator > omp_max_predefined_alloc) { free_allocator_data = (struct omp_allocator_data *) free_allocator; -#ifdef LIBGOMP_USE_MEMKIND +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) free_memkind = free_allocator_data->memkind; #endif } else { free_allocator_data = NULL; -#ifdef LIBGOMP_USE_MEMKIND +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) free_memkind = GOMP_MEMKIND_NONE; +#endif +#ifdef LIBGOMP_USE_MEMKIND if (free_allocator == omp_high_bw_mem_alloc) free_memkind = GOMP_MEMKIND_HBW_PREFERRED; else if (free_allocator == omp_large_cap_mem_alloc) @@ -953,6 +1062,19 @@ retry: allocator_data->used_pool_size = used_pool_size; gomp_mutex_unlock (&allocator_data->lock); #endif +#ifdef LIBGOMP_USE_LIBNUMA + if (memkind == GOMP_MEMKIND_LIBNUMA) + { + if (prev_size) + new_ptr = libnuma_data->numa_realloc (data->ptr, data->size, + new_size); + else + new_ptr = libnuma_data->numa_alloc_local (new_size); + } +# ifdef LIBGOMP_USE_MEMKIND + else +# endif +#endif #ifdef LIBGOMP_USE_MEMKIND if (memkind) { @@ -994,12 +1116,19 @@ retry: } else if (new_alignment == sizeof (void *) && old_alignment == sizeof (struct omp_mem_header) -#ifdef LIBGOMP_USE_MEMKIND +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) && memkind == free_memkind #endif && (free_allocator_data == NULL || free_allocator_data->pool_size == ~(uintptr_t) 0)) { +#ifdef LIBGOMP_USE_LIBNUMA + if (memkind == GOMP_MEMKIND_LIBNUMA) + new_ptr = libnuma_data->numa_realloc (data->ptr, data->size, new_size); +# ifdef LIBGOMP_USE_MEMKIND + else +# endif +#endif #ifdef LIBGOMP_USE_MEMKIND if (memkind) { @@ -1021,6 +1150,13 @@ retry: } else { +#ifdef LIBGOMP_USE_LIBNUMA + if (memkind == GOMP_MEMKIND_LIBNUMA) + new_ptr = libnuma_data->numa_alloc_local (new_size); +# ifdef LIBGOMP_USE_MEMKIND + else +# endif +#endif #ifdef LIBGOMP_USE_MEMKIND if (memkind) { @@ -1060,6 +1196,16 @@ retry: gomp_mutex_unlock (&free_allocator_data->lock); #endif } +#ifdef LIBGOMP_USE_LIBNUMA + if (free_memkind == GOMP_MEMKIND_LIBNUMA) + { + libnuma_data->numa_free (data->ptr, data->size); + return ret; + } +# ifdef LIBGOMP_USE_MEMKIND + else +# endif +#endif #ifdef LIBGOMP_USE_MEMKIND if (free_memkind) { @@ -1079,7 +1225,7 @@ fail: { case omp_atv_default_mem_fb: if (new_alignment > sizeof (void *) -#ifdef LIBGOMP_USE_MEMKIND +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) || memkind #endif || (allocator_data |