/* Copyright (C) 2023-2024 Free Software Foundation, Inc.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
. */
/* The low-latency allocators use space reserved in .shared memory when the
kernel is launched. The heap is initialized in gomp_nvptx_main and all
allocations are forgotten when the kernel exits. Allocations to other
memory spaces all use the system malloc syscall.
The root heap descriptor is stored elsewhere in shared memory, and each
free chunk contains a similar descriptor for the next free chunk in the
chain.
The descriptor is two 16-bit values: offset and size, which describe the
location of a chunk of memory available for allocation. The offset is
relative to the base of the heap. The special value 0xffff, 0xffff
indicates that the heap is locked. The descriptor is encoded into a
single 32-bit integer so that it may be easily accessed atomically.
Memory is allocated to the first free chunk that fits. The free chain
is always stored in order of the offset to assist coalescing adjacent
chunks. */
#include "libgomp.h"
#include
#define BASIC_ALLOC_PREFIX __nvptx_lowlat
#include "../../basic-allocator.c"
/* There should be some .shared space reserved for us. There's no way to
express this magic extern sizeless array in C so use asm. */
asm (".extern .shared .u8 __nvptx_lowlat_pool[];\n");
static void *
nvptx_memspace_alloc (omp_memspace_handle_t memspace, size_t size)
{
if (memspace == omp_low_lat_mem_space)
{
char *shared_pool;
asm ("cvta.shared.u64\t%0, __nvptx_lowlat_pool;" : "=r" (shared_pool));
return __nvptx_lowlat_alloc (shared_pool, size);
}
else
return malloc (size);
}
static void *
nvptx_memspace_calloc (omp_memspace_handle_t memspace, size_t size)
{
if (memspace == omp_low_lat_mem_space)
{
char *shared_pool;
asm ("cvta.shared.u64\t%0, __nvptx_lowlat_pool;" : "=r" (shared_pool));
return __nvptx_lowlat_calloc (shared_pool, size);
}
else
return calloc (1, size);
}
static void
nvptx_memspace_free (omp_memspace_handle_t memspace, void *addr, size_t size)
{
if (memspace == omp_low_lat_mem_space)
{
char *shared_pool;
asm ("cvta.shared.u64\t%0, __nvptx_lowlat_pool;" : "=r" (shared_pool));
__nvptx_lowlat_free (shared_pool, addr, size);
}
else
free (addr);
}
static void *
nvptx_memspace_realloc (omp_memspace_handle_t memspace, void *addr,
size_t oldsize, size_t size)
{
if (memspace == omp_low_lat_mem_space)
{
char *shared_pool;
asm ("cvta.shared.u64\t%0, __nvptx_lowlat_pool;" : "=r" (shared_pool));
return __nvptx_lowlat_realloc (shared_pool, addr, oldsize, size);
}
else
return realloc (addr, size);
}
static inline int
nvptx_memspace_validate (omp_memspace_handle_t memspace, unsigned access)
{
#if __PTX_ISA_VERSION_MAJOR__ > 4 \
|| (__PTX_ISA_VERSION_MAJOR__ == 4 && __PTX_ISA_VERSION_MINOR >= 1)
/* Disallow use of low-latency memory when it must be accessible by
all threads. */
return (memspace != omp_low_lat_mem_space
|| access != omp_atv_all);
#else
/* Low-latency memory is not available before PTX 4.1. */
return (memspace != omp_low_lat_mem_space);
#endif
}
#define MEMSPACE_ALLOC(MEMSPACE, SIZE, PIN) \
nvptx_memspace_alloc (MEMSPACE, ((void)(PIN), (SIZE)))
#define MEMSPACE_CALLOC(MEMSPACE, SIZE, PIN) \
nvptx_memspace_calloc (MEMSPACE, ((void)(PIN), (SIZE)))
#define MEMSPACE_REALLOC(MEMSPACE, ADDR, OLDSIZE, SIZE, OLDPIN, PIN) \
nvptx_memspace_realloc (MEMSPACE, ADDR, OLDSIZE, \
((void)(OLDPIN), (void)(PIN), (SIZE)))
#define MEMSPACE_FREE(MEMSPACE, ADDR, SIZE, PIN) \
nvptx_memspace_free (MEMSPACE, ADDR, ((void)(PIN), (SIZE)))
#define MEMSPACE_VALIDATE(MEMSPACE, ACCESS, PIN) \
nvptx_memspace_validate (MEMSPACE, ((void)(PIN), (ACCESS)))
/* The default low-latency memspace implies omp_atv_all, which is incompatible
with the .shared memory space. */
#define OMP_LOW_LAT_MEM_ALLOC_INVALID 1
#include "../../allocator.c"