diff options
author | Andrew Stubbs <ams@codesourcery.com> | 2021-12-03 17:46:41 +0000 |
---|---|---|
committer | Andrew Stubbs <ams@codesourcery.com> | 2023-12-06 16:48:57 +0000 |
commit | 30486fab717a90dc7516722c24ef9c5ea246c350 (patch) | |
tree | 473b0117e6eb85aa3d358ef4bf9ccdc3cff5ec6b /libgomp/allocator.c | |
parent | 458e7c937924bbcef80eb006af0b61420dbfc1c1 (diff) | |
download | gcc-30486fab717a90dc7516722c24ef9c5ea246c350.zip gcc-30486fab717a90dc7516722c24ef9c5ea246c350.tar.gz gcc-30486fab717a90dc7516722c24ef9c5ea246c350.tar.bz2 |
libgomp, nvptx: low-latency memory allocator
This patch adds support for allocating low-latency ".shared" memory on
NVPTX GPU device, via the omp_low_lat_mem_space and omp_alloc. The memory
can be allocated, reallocated, and freed using a basic but fast algorithm,
is thread safe and the size of the low-latency heap can be configured using
the GOMP_NVPTX_LOWLAT_POOL environment variable.
The use of the PTX dynamic_smem_size feature means that low-latency allocator
will not work with the PTX 3.1 multilib.
For now, the omp_low_lat_mem_alloc allocator also works, but that will change
when I implement the access traits.
libgomp/ChangeLog:
* allocator.c (MEMSPACE_ALLOC): New macro.
(MEMSPACE_CALLOC): New macro.
(MEMSPACE_REALLOC): New macro.
(MEMSPACE_FREE): New macro.
(predefined_alloc_mapping): New array. Add _Static_assert to match.
(ARRAY_SIZE): New macro.
(omp_aligned_alloc): Use MEMSPACE_ALLOC.
Implement fall-backs for predefined allocators. Simplify existing
fall-backs.
(omp_free): Use MEMSPACE_FREE.
(omp_calloc): Use MEMSPACE_CALLOC. Implement fall-backs for
predefined allocators. Simplify existing fall-backs.
(omp_realloc): Use MEMSPACE_REALLOC, MEMSPACE_ALLOC, and MEMSPACE_FREE.
Implement fall-backs for predefined allocators. Simplify existing
fall-backs.
* config/nvptx/team.c (__nvptx_lowlat_pool): New asm variable.
(__nvptx_lowlat_init): New prototype.
(gomp_nvptx_main): Call __nvptx_lowlat_init.
* libgomp.texi: Update memory space table.
* plugin/plugin-nvptx.c (lowlat_pool_size): New variable.
(GOMP_OFFLOAD_init_device): Read the GOMP_NVPTX_LOWLAT_POOL envvar.
(GOMP_OFFLOAD_run): Apply lowlat_pool_size.
* basic-allocator.c: New file.
* config/nvptx/allocator.c: New file.
* testsuite/libgomp.c/omp_alloc-1.c: New test.
* testsuite/libgomp.c/omp_alloc-2.c: New test.
* testsuite/libgomp.c/omp_alloc-3.c: New test.
* testsuite/libgomp.c/omp_alloc-4.c: New test.
* testsuite/libgomp.c/omp_alloc-5.c: New test.
* testsuite/libgomp.c/omp_alloc-6.c: New test.
Co-authored-by: Kwok Cheung Yeung <kcy@codesourcery.com>
Co-Authored-By: Thomas Schwinge <thomas@codesourcery.com>
Diffstat (limited to 'libgomp/allocator.c')
-rw-r--r-- | libgomp/allocator.c | 246 |
1 files changed, 147 insertions, 99 deletions
diff --git a/libgomp/allocator.c b/libgomp/allocator.c index b4e50e2..fa39812 100644 --- a/libgomp/allocator.c +++ b/libgomp/allocator.c @@ -37,6 +37,47 @@ #define omp_max_predefined_alloc omp_thread_mem_alloc +/* These macros may be overridden in config/<target>/allocator.c. + The following definitions (ab)use comma operators to avoid unused + variable errors. */ +#ifndef MEMSPACE_ALLOC +#define MEMSPACE_ALLOC(MEMSPACE, SIZE) \ + malloc (((void)(MEMSPACE), (SIZE))) +#endif +#ifndef MEMSPACE_CALLOC +#define MEMSPACE_CALLOC(MEMSPACE, SIZE) \ + calloc (1, (((void)(MEMSPACE), (SIZE)))) +#endif +#ifndef MEMSPACE_REALLOC +#define MEMSPACE_REALLOC(MEMSPACE, ADDR, OLDSIZE, SIZE) \ + realloc (ADDR, (((void)(MEMSPACE), (void)(OLDSIZE), (SIZE)))) +#endif +#ifndef MEMSPACE_FREE +#define MEMSPACE_FREE(MEMSPACE, ADDR, SIZE) \ + free (((void)(MEMSPACE), (void)(SIZE), (ADDR))) +#endif + +/* Map the predefined allocators to the correct memory space. + The index to this table is the omp_allocator_handle_t enum value. + When the user calls omp_alloc with a predefined allocator this + table determines what memory they get. */ +static const omp_memspace_handle_t predefined_alloc_mapping[] = { + omp_default_mem_space, /* omp_null_allocator doesn't actually use this. */ + omp_default_mem_space, /* omp_default_mem_alloc. */ + omp_large_cap_mem_space, /* omp_large_cap_mem_alloc. */ + omp_const_mem_space, /* omp_const_mem_alloc. */ + omp_high_bw_mem_space, /* omp_high_bw_mem_alloc. */ + omp_low_lat_mem_space, /* omp_low_lat_mem_alloc. */ + omp_low_lat_mem_space, /* omp_cgroup_mem_alloc (implementation defined). */ + omp_low_lat_mem_space, /* omp_pteam_mem_alloc (implementation defined). */ + omp_low_lat_mem_space, /* omp_thread_mem_alloc (implementation defined). */ +}; + +#define ARRAY_SIZE(A) (sizeof (A) / sizeof ((A)[0])) +_Static_assert (ARRAY_SIZE (predefined_alloc_mapping) + == omp_max_predefined_alloc + 1, + "predefined_alloc_mapping must match omp_memspace_handle_t"); + enum gomp_numa_memkind_kind { GOMP_MEMKIND_NONE = 0, @@ -533,7 +574,7 @@ retry: } else #endif - ptr = malloc (new_size); + ptr = MEMSPACE_ALLOC (allocator_data->memspace, new_size); if (ptr == NULL) { #ifdef HAVE_SYNC_BUILTINS @@ -565,7 +606,13 @@ retry: } else #endif - ptr = malloc (new_size); + { + omp_memspace_handle_t memspace; + memspace = (allocator_data + ? allocator_data->memspace + : predefined_alloc_mapping[allocator]); + ptr = MEMSPACE_ALLOC (memspace, new_size); + } if (ptr == NULL) goto fail; } @@ -582,36 +629,26 @@ retry: ((struct omp_mem_header *) ret)[-1].allocator = allocator; return ret; -fail: - if (allocator_data) +fail:; + int fallback = (allocator_data + ? allocator_data->fallback + : allocator == omp_default_mem_alloc + ? omp_atv_null_fb + : omp_atv_default_mem_fb); + switch (fallback) { - switch (allocator_data->fallback) - { - case omp_atv_default_mem_fb: - if ((new_alignment > sizeof (void *) && new_alignment > alignment) -#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) - || memkind -#endif - || (allocator_data - && allocator_data->pool_size < ~(uintptr_t) 0)) - { - allocator = omp_default_mem_alloc; - goto retry; - } - /* Otherwise, we've already performed default mem allocation - and if that failed, it won't succeed again (unless it was - intermittent. Return NULL then, as that is the fallback. */ - break; - case omp_atv_null_fb: - break; - default: - case omp_atv_abort_fb: - gomp_fatal ("Out of memory allocating %lu bytes", - (unsigned long) size); - case omp_atv_allocator_fb: - allocator = allocator_data->fb_data; - goto retry; - } + case omp_atv_default_mem_fb: + allocator = omp_default_mem_alloc; + goto retry; + case omp_atv_null_fb: + break; + default: + case omp_atv_abort_fb: + gomp_fatal ("Out of memory allocating %lu bytes", + (unsigned long) size); + case omp_atv_allocator_fb: + allocator = allocator_data->fb_data; + goto retry; } return NULL; } @@ -644,6 +681,7 @@ void omp_free (void *ptr, omp_allocator_handle_t allocator) { struct omp_mem_header *data; + omp_memspace_handle_t memspace = omp_default_mem_space; if (ptr == NULL) return; @@ -683,10 +721,12 @@ omp_free (void *ptr, omp_allocator_handle_t allocator) return; } #endif + + memspace = allocator_data->memspace; } -#ifdef LIBGOMP_USE_MEMKIND else { +#ifdef LIBGOMP_USE_MEMKIND enum gomp_numa_memkind_kind memkind = GOMP_MEMKIND_NONE; if (data->allocator == omp_high_bw_mem_alloc) memkind = GOMP_MEMKIND_HBW_PREFERRED; @@ -702,9 +742,12 @@ omp_free (void *ptr, omp_allocator_handle_t allocator) return; } } - } #endif - free (data->ptr); + + memspace = predefined_alloc_mapping[data->allocator]; + } + + MEMSPACE_FREE (memspace, data->ptr, data->size); } ialias (omp_free) @@ -831,7 +874,7 @@ retry: } else #endif - ptr = calloc (1, new_size); + ptr = MEMSPACE_CALLOC (allocator_data->memspace, new_size); if (ptr == NULL) { #ifdef HAVE_SYNC_BUILTINS @@ -865,7 +908,13 @@ retry: } else #endif - ptr = calloc (1, new_size); + { + omp_memspace_handle_t memspace; + memspace = (allocator_data + ? allocator_data->memspace + : predefined_alloc_mapping[allocator]); + ptr = MEMSPACE_CALLOC (memspace, new_size); + } if (ptr == NULL) goto fail; } @@ -882,36 +931,26 @@ retry: ((struct omp_mem_header *) ret)[-1].allocator = allocator; return ret; -fail: - if (allocator_data) +fail:; + int fallback = (allocator_data + ? allocator_data->fallback + : allocator == omp_default_mem_alloc + ? omp_atv_null_fb + : omp_atv_default_mem_fb); + switch (fallback) { - switch (allocator_data->fallback) - { - case omp_atv_default_mem_fb: - if ((new_alignment > sizeof (void *) && new_alignment > alignment) -#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) - || memkind -#endif - || (allocator_data - && allocator_data->pool_size < ~(uintptr_t) 0)) - { - allocator = omp_default_mem_alloc; - goto retry; - } - /* Otherwise, we've already performed default mem allocation - and if that failed, it won't succeed again (unless it was - intermittent. Return NULL then, as that is the fallback. */ - break; - case omp_atv_null_fb: - break; - default: - case omp_atv_abort_fb: - gomp_fatal ("Out of memory allocating %lu bytes", - (unsigned long) (size * nmemb)); - case omp_atv_allocator_fb: - allocator = allocator_data->fb_data; - goto retry; - } + case omp_atv_default_mem_fb: + allocator = omp_default_mem_alloc; + goto retry; + case omp_atv_null_fb: + break; + default: + case omp_atv_abort_fb: + gomp_fatal ("Out of memory allocating %lu bytes", + (unsigned long) (size * nmemb)); + case omp_atv_allocator_fb: + allocator = allocator_data->fb_data; + goto retry; } return NULL; } @@ -1101,9 +1140,10 @@ retry: else #endif if (prev_size) - new_ptr = realloc (data->ptr, new_size); + new_ptr = MEMSPACE_REALLOC (allocator_data->memspace, data->ptr, + data->size, new_size); else - new_ptr = malloc (new_size); + new_ptr = MEMSPACE_ALLOC (allocator_data->memspace, new_size); if (new_ptr == NULL) { #ifdef HAVE_SYNC_BUILTINS @@ -1151,7 +1191,13 @@ retry: } else #endif - new_ptr = realloc (data->ptr, new_size); + { + omp_memspace_handle_t memspace; + memspace = (allocator_data + ? allocator_data->memspace + : predefined_alloc_mapping[allocator]); + new_ptr = MEMSPACE_REALLOC (memspace, data->ptr, data->size, new_size); + } if (new_ptr == NULL) goto fail; ret = (char *) new_ptr + sizeof (struct omp_mem_header); @@ -1178,7 +1224,13 @@ retry: } else #endif - new_ptr = malloc (new_size); + { + omp_memspace_handle_t memspace; + memspace = (allocator_data + ? allocator_data->memspace + : predefined_alloc_mapping[allocator]); + new_ptr = MEMSPACE_ALLOC (memspace, new_size); + } if (new_ptr == NULL) goto fail; } @@ -1227,39 +1279,35 @@ retry: return ret; } #endif - free (data->ptr); + { + omp_memspace_handle_t was_memspace; + was_memspace = (free_allocator_data + ? free_allocator_data->memspace + : predefined_alloc_mapping[free_allocator]); + MEMSPACE_FREE (was_memspace, data->ptr, data->size); + } return ret; -fail: - if (allocator_data) +fail:; + int fallback = (allocator_data + ? allocator_data->fallback + : allocator == omp_default_mem_alloc + ? omp_atv_null_fb + : omp_atv_default_mem_fb); + switch (fallback) { - switch (allocator_data->fallback) - { - case omp_atv_default_mem_fb: - if (new_alignment > sizeof (void *) -#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) - || memkind -#endif - || (allocator_data - && allocator_data->pool_size < ~(uintptr_t) 0)) - { - allocator = omp_default_mem_alloc; - goto retry; - } - /* Otherwise, we've already performed default mem allocation - and if that failed, it won't succeed again (unless it was - intermittent. Return NULL then, as that is the fallback. */ - break; - case omp_atv_null_fb: - break; - default: - case omp_atv_abort_fb: - gomp_fatal ("Out of memory allocating %lu bytes", - (unsigned long) size); - case omp_atv_allocator_fb: - allocator = allocator_data->fb_data; - goto retry; - } + case omp_atv_default_mem_fb: + allocator = omp_default_mem_alloc; + goto retry; + case omp_atv_null_fb: + break; + default: + case omp_atv_abort_fb: + gomp_fatal ("Out of memory allocating %lu bytes", + (unsigned long) size); + case omp_atv_allocator_fb: + allocator = allocator_data->fb_data; + goto retry; } return NULL; } |