diff options
Diffstat (limited to 'openmp/device/src')
| -rw-r--r-- | openmp/device/src/Allocator.cpp | 67 | ||||
| -rw-r--r-- | openmp/device/src/Kernel.cpp | 1 | ||||
| -rw-r--r-- | openmp/device/src/Misc.cpp | 4 | ||||
| -rw-r--r-- | openmp/device/src/State.cpp | 24 |
4 files changed, 38 insertions, 58 deletions
diff --git a/openmp/device/src/Allocator.cpp b/openmp/device/src/Allocator.cpp index aac2a60..34c945c 100644 --- a/openmp/device/src/Allocator.cpp +++ b/openmp/device/src/Allocator.cpp @@ -18,42 +18,36 @@ #include "Synchronization.h" using namespace ompx; +using namespace allocator; + +// Provide a default implementation of malloc / free for AMDGPU platforms built +// without 'libc' support. +extern "C" { +#if defined(__AMDGPU__) && !defined(OMPTARGET_HAS_LIBC) +[[gnu::weak]] void *malloc(size_t Size) { return allocator::alloc(Size); } +[[gnu::weak]] void free(void *Ptr) { allocator::free(Ptr); } +#else +[[gnu::leaf]] void *malloc(size_t Size); +[[gnu::leaf]] void free(void *Ptr); +#endif +} -[[gnu::used, gnu::retain, gnu::weak, - gnu::visibility( - "protected")]] DeviceMemoryPoolTy __omp_rtl_device_memory_pool; -[[gnu::used, gnu::retain, gnu::weak, - gnu::visibility("protected")]] DeviceMemoryPoolTrackingTy - __omp_rtl_device_memory_pool_tracker; +static constexpr uint64_t MEMORY_SIZE = /* 1 MiB */ 1024 * 1024; +alignas(ALIGNMENT) static uint8_t Memory[MEMORY_SIZE] = {0}; -/// Stateless bump allocator that uses the __omp_rtl_device_memory_pool -/// directly. +// Fallback bump pointer interface for platforms without a functioning +// allocator. struct BumpAllocatorTy final { + uint64_t Offset = 0; void *alloc(uint64_t Size) { Size = utils::roundUp(Size, uint64_t(allocator::ALIGNMENT)); - if (config::isDebugMode(DeviceDebugKind::AllocationTracker)) { - atomic::add(&__omp_rtl_device_memory_pool_tracker.NumAllocations, 1, - atomic::seq_cst); - atomic::add(&__omp_rtl_device_memory_pool_tracker.AllocationTotal, Size, - atomic::seq_cst); - atomic::min(&__omp_rtl_device_memory_pool_tracker.AllocationMin, Size, - atomic::seq_cst); - atomic::max(&__omp_rtl_device_memory_pool_tracker.AllocationMax, Size, - atomic::seq_cst); - } - - uint64_t *Data = - reinterpret_cast<uint64_t *>(&__omp_rtl_device_memory_pool.Ptr); - uint64_t End = - reinterpret_cast<uint64_t>(Data) + __omp_rtl_device_memory_pool.Size; - - uint64_t OldData = atomic::add(Data, Size, atomic::seq_cst); - if (OldData + Size > End) + uint64_t OldData = atomic::add(&Offset, Size, atomic::seq_cst); + if (OldData + Size >= MEMORY_SIZE) __builtin_trap(); - return reinterpret_cast<void *>(OldData); + return &Memory[OldData]; } void free(void *) {} @@ -65,13 +59,20 @@ BumpAllocatorTy BumpAllocator; /// ///{ -void allocator::init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment) { - // TODO: Check KernelEnvironment for an allocator choice as soon as we have - // more than one. +void *allocator::alloc(uint64_t Size) { +#if defined(__AMDGPU__) && !defined(OMPTARGET_HAS_LIBC) + return BumpAllocator.alloc(Size); +#else + return ::malloc(Size); +#endif } -void *allocator::alloc(uint64_t Size) { return BumpAllocator.alloc(Size); } - -void allocator::free(void *Ptr) { BumpAllocator.free(Ptr); } +void allocator::free(void *Ptr) { +#if defined(__AMDGPU__) && !defined(OMPTARGET_HAS_LIBC) + BumpAllocator.free(Ptr); +#else + ::free(Ptr); +#endif +} ///} diff --git a/openmp/device/src/Kernel.cpp b/openmp/device/src/Kernel.cpp index 8c2828b..05af35d 100644 --- a/openmp/device/src/Kernel.cpp +++ b/openmp/device/src/Kernel.cpp @@ -41,7 +41,6 @@ inititializeRuntime(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment, synchronize::init(IsSPMD); mapping::init(IsSPMD); state::init(IsSPMD, KernelEnvironment, KernelLaunchEnvironment); - allocator::init(IsSPMD, KernelEnvironment); workshare::init(IsSPMD); } diff --git a/openmp/device/src/Misc.cpp b/openmp/device/src/Misc.cpp index 563f674..a53fb43 100644 --- a/openmp/device/src/Misc.cpp +++ b/openmp/device/src/Misc.cpp @@ -100,7 +100,7 @@ void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { case omp_const_mem_alloc: case omp_high_bw_mem_alloc: case omp_low_lat_mem_alloc: - return malloc(size); + return ompx::allocator::alloc(size); default: return nullptr; } @@ -113,7 +113,7 @@ void omp_free(void *ptr, omp_allocator_handle_t allocator) { case omp_const_mem_alloc: case omp_high_bw_mem_alloc: case omp_low_lat_mem_alloc: - free(ptr); + ompx::allocator::free(ptr); return; case omp_null_allocator: default: diff --git a/openmp/device/src/State.cpp b/openmp/device/src/State.cpp index 4753951..9f38cf2 100644 --- a/openmp/device/src/State.cpp +++ b/openmp/device/src/State.cpp @@ -44,26 +44,6 @@ using namespace ompx; namespace { -/// Fallback implementations are missing to trigger a link time error. -/// Implementations for new devices, including the host, should go into a -/// dedicated begin/end declare variant. -/// -///{ -extern "C" { -#if defined(__AMDGPU__) && !defined(OMPTARGET_HAS_LIBC) - -[[gnu::weak]] void *malloc(size_t Size) { return allocator::alloc(Size); } -[[gnu::weak]] void free(void *Ptr) { allocator::free(Ptr); } - -#else - -[[gnu::weak, gnu::leaf]] void *malloc(size_t Size); -[[gnu::weak, gnu::leaf]] void free(void *Ptr); - -#endif -} -///} - /// A "smart" stack in shared memory. /// /// The stack exposes a malloc/free interface but works like a stack internally. @@ -171,13 +151,13 @@ void memory::freeShared(void *Ptr, uint64_t Bytes, const char *Reason) { } void *memory::allocGlobal(uint64_t Bytes, const char *Reason) { - void *Ptr = malloc(Bytes); + void *Ptr = allocator::alloc(Bytes); if (config::isDebugMode(DeviceDebugKind::CommonIssues) && Ptr == nullptr) printf("nullptr returned by malloc!\n"); return Ptr; } -void memory::freeGlobal(void *Ptr, const char *Reason) { free(Ptr); } +void memory::freeGlobal(void *Ptr, const char *Reason) { allocator::free(Ptr); } ///} |
