aboutsummaryrefslogtreecommitdiff
path: root/openmp/device/src
diff options
context:
space:
mode:
Diffstat (limited to 'openmp/device/src')
-rw-r--r--openmp/device/src/Allocator.cpp67
-rw-r--r--openmp/device/src/Kernel.cpp1
-rw-r--r--openmp/device/src/Misc.cpp4
-rw-r--r--openmp/device/src/State.cpp24
4 files changed, 38 insertions, 58 deletions
diff --git a/openmp/device/src/Allocator.cpp b/openmp/device/src/Allocator.cpp
index aac2a60..34c945c 100644
--- a/openmp/device/src/Allocator.cpp
+++ b/openmp/device/src/Allocator.cpp
@@ -18,42 +18,36 @@
#include "Synchronization.h"
using namespace ompx;
+using namespace allocator;
+
+// Provide a default implementation of malloc / free for AMDGPU platforms built
+// without 'libc' support.
+extern "C" {
+#if defined(__AMDGPU__) && !defined(OMPTARGET_HAS_LIBC)
+[[gnu::weak]] void *malloc(size_t Size) { return allocator::alloc(Size); }
+[[gnu::weak]] void free(void *Ptr) { allocator::free(Ptr); }
+#else
+[[gnu::leaf]] void *malloc(size_t Size);
+[[gnu::leaf]] void free(void *Ptr);
+#endif
+}
-[[gnu::used, gnu::retain, gnu::weak,
- gnu::visibility(
- "protected")]] DeviceMemoryPoolTy __omp_rtl_device_memory_pool;
-[[gnu::used, gnu::retain, gnu::weak,
- gnu::visibility("protected")]] DeviceMemoryPoolTrackingTy
- __omp_rtl_device_memory_pool_tracker;
+static constexpr uint64_t MEMORY_SIZE = /* 1 MiB */ 1024 * 1024;
+alignas(ALIGNMENT) static uint8_t Memory[MEMORY_SIZE] = {0};
-/// Stateless bump allocator that uses the __omp_rtl_device_memory_pool
-/// directly.
+// Fallback bump pointer interface for platforms without a functioning
+// allocator.
struct BumpAllocatorTy final {
+ uint64_t Offset = 0;
void *alloc(uint64_t Size) {
Size = utils::roundUp(Size, uint64_t(allocator::ALIGNMENT));
- if (config::isDebugMode(DeviceDebugKind::AllocationTracker)) {
- atomic::add(&__omp_rtl_device_memory_pool_tracker.NumAllocations, 1,
- atomic::seq_cst);
- atomic::add(&__omp_rtl_device_memory_pool_tracker.AllocationTotal, Size,
- atomic::seq_cst);
- atomic::min(&__omp_rtl_device_memory_pool_tracker.AllocationMin, Size,
- atomic::seq_cst);
- atomic::max(&__omp_rtl_device_memory_pool_tracker.AllocationMax, Size,
- atomic::seq_cst);
- }
-
- uint64_t *Data =
- reinterpret_cast<uint64_t *>(&__omp_rtl_device_memory_pool.Ptr);
- uint64_t End =
- reinterpret_cast<uint64_t>(Data) + __omp_rtl_device_memory_pool.Size;
-
- uint64_t OldData = atomic::add(Data, Size, atomic::seq_cst);
- if (OldData + Size > End)
+ uint64_t OldData = atomic::add(&Offset, Size, atomic::seq_cst);
+ if (OldData + Size >= MEMORY_SIZE)
__builtin_trap();
- return reinterpret_cast<void *>(OldData);
+ return &Memory[OldData];
}
void free(void *) {}
@@ -65,13 +59,20 @@ BumpAllocatorTy BumpAllocator;
///
///{
-void allocator::init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment) {
- // TODO: Check KernelEnvironment for an allocator choice as soon as we have
- // more than one.
+void *allocator::alloc(uint64_t Size) {
+#if defined(__AMDGPU__) && !defined(OMPTARGET_HAS_LIBC)
+ return BumpAllocator.alloc(Size);
+#else
+ return ::malloc(Size);
+#endif
}
-void *allocator::alloc(uint64_t Size) { return BumpAllocator.alloc(Size); }
-
-void allocator::free(void *Ptr) { BumpAllocator.free(Ptr); }
+void allocator::free(void *Ptr) {
+#if defined(__AMDGPU__) && !defined(OMPTARGET_HAS_LIBC)
+ BumpAllocator.free(Ptr);
+#else
+ ::free(Ptr);
+#endif
+}
///}
diff --git a/openmp/device/src/Kernel.cpp b/openmp/device/src/Kernel.cpp
index 8c2828b..05af35d 100644
--- a/openmp/device/src/Kernel.cpp
+++ b/openmp/device/src/Kernel.cpp
@@ -41,7 +41,6 @@ inititializeRuntime(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
synchronize::init(IsSPMD);
mapping::init(IsSPMD);
state::init(IsSPMD, KernelEnvironment, KernelLaunchEnvironment);
- allocator::init(IsSPMD, KernelEnvironment);
workshare::init(IsSPMD);
}
diff --git a/openmp/device/src/Misc.cpp b/openmp/device/src/Misc.cpp
index 563f674..a53fb43 100644
--- a/openmp/device/src/Misc.cpp
+++ b/openmp/device/src/Misc.cpp
@@ -100,7 +100,7 @@ void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
case omp_const_mem_alloc:
case omp_high_bw_mem_alloc:
case omp_low_lat_mem_alloc:
- return malloc(size);
+ return ompx::allocator::alloc(size);
default:
return nullptr;
}
@@ -113,7 +113,7 @@ void omp_free(void *ptr, omp_allocator_handle_t allocator) {
case omp_const_mem_alloc:
case omp_high_bw_mem_alloc:
case omp_low_lat_mem_alloc:
- free(ptr);
+ ompx::allocator::free(ptr);
return;
case omp_null_allocator:
default:
diff --git a/openmp/device/src/State.cpp b/openmp/device/src/State.cpp
index 4753951..9f38cf2 100644
--- a/openmp/device/src/State.cpp
+++ b/openmp/device/src/State.cpp
@@ -44,26 +44,6 @@ using namespace ompx;
namespace {
-/// Fallback implementations are missing to trigger a link time error.
-/// Implementations for new devices, including the host, should go into a
-/// dedicated begin/end declare variant.
-///
-///{
-extern "C" {
-#if defined(__AMDGPU__) && !defined(OMPTARGET_HAS_LIBC)
-
-[[gnu::weak]] void *malloc(size_t Size) { return allocator::alloc(Size); }
-[[gnu::weak]] void free(void *Ptr) { allocator::free(Ptr); }
-
-#else
-
-[[gnu::weak, gnu::leaf]] void *malloc(size_t Size);
-[[gnu::weak, gnu::leaf]] void free(void *Ptr);
-
-#endif
-}
-///}
-
/// A "smart" stack in shared memory.
///
/// The stack exposes a malloc/free interface but works like a stack internally.
@@ -171,13 +151,13 @@ void memory::freeShared(void *Ptr, uint64_t Bytes, const char *Reason) {
}
void *memory::allocGlobal(uint64_t Bytes, const char *Reason) {
- void *Ptr = malloc(Bytes);
+ void *Ptr = allocator::alloc(Bytes);
if (config::isDebugMode(DeviceDebugKind::CommonIssues) && Ptr == nullptr)
printf("nullptr returned by malloc!\n");
return Ptr;
}
-void memory::freeGlobal(void *Ptr, const char *Reason) { free(Ptr); }
+void memory::freeGlobal(void *Ptr, const char *Reason) { allocator::free(Ptr); }
///}