aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Sala <salapenades1@llnl.gov>2025-08-09 22:50:23 -0700
committerKevin Sala <salapenades1@llnl.gov>2025-08-10 12:16:05 -0700
commitf2c6f976c58937a1d48bd0fb7f21b9a36d0452d6 (patch)
tree44a1f595dfd54386a685e00f3f6ec1a20fea1cad
parentfa3c7425ae9e5ffea83841f2be61b0f494b99038 (diff)
downloadllvm-users/kevinsala/omp-dyn-groupprivate-rt-pr.zip
llvm-users/kevinsala/omp-dyn-groupprivate-rt-pr.tar.gz
llvm-users/kevinsala/omp-dyn-groupprivate-rt-pr.tar.bz2
-rw-r--r--offload/DeviceRTL/include/DeviceTypes.h5
-rw-r--r--offload/DeviceRTL/src/State.cpp19
-rw-r--r--offload/include/omptarget.h15
-rw-r--r--offload/plugins-nextgen/amdgpu/src/rtl.cpp4
-rw-r--r--offload/plugins-nextgen/common/src/PluginInterface.cpp4
-rw-r--r--offload/plugins-nextgen/cuda/src/rtl.cpp6
-rw-r--r--openmp/runtime/src/kmp_csupport.cpp2
7 files changed, 38 insertions, 17 deletions
diff --git a/offload/DeviceRTL/include/DeviceTypes.h b/offload/DeviceRTL/include/DeviceTypes.h
index a43b506..042fef4 100644
--- a/offload/DeviceRTL/include/DeviceTypes.h
+++ b/offload/DeviceRTL/include/DeviceTypes.h
@@ -163,8 +163,13 @@ typedef enum omp_allocator_handle_t {
///}
+/// The OpenMP access group type. The criterion for grupping tasks using a
+/// specific grouping property.
enum omp_access_t {
+ /// Groups the tasks based on the contention group to which they belong.
omp_access_cgroup = 0,
+ /// Groups the tasks based on the parallel region to which they bind.
+ omp_access_pteam = 1,
};
#endif
diff --git a/offload/DeviceRTL/src/State.cpp b/offload/DeviceRTL/src/State.cpp
index 9e2a999..c6bc6a1 100644
--- a/offload/DeviceRTL/src/State.cpp
+++ b/offload/DeviceRTL/src/State.cpp
@@ -163,14 +163,15 @@ struct DynCGroupMemTy {
Size = 0;
Ptr = nullptr;
IsFallback = false;
- if (KLE) {
- Size = KLE->DynCGroupMemSize;
- if (void *Fallback = KLE->DynCGroupMemFallback) {
- Ptr = static_cast<char *>(Fallback) + Size * omp_get_team_num();
- IsFallback = true;
- } else {
- Ptr = static_cast<char *>(NativeDynCGroup);
- }
+ if (!KLE)
+ return;
+
+ Size = KLE->DynCGroupMemSize;
+ if (void *Fallback = KLE->DynCGroupMemFallback) {
+ Ptr = static_cast<char *>(Fallback) + Size * omp_get_team_num();
+ IsFallback = true;
+ } else {
+ Ptr = static_cast<char *>(NativeDynCGroup);
}
}
@@ -466,7 +467,7 @@ int omp_is_initial_device(void) { return 0; }
void *omp_get_dyn_groupprivate_ptr(size_t Offset, int *IsFallback,
omp_access_t) {
- if (IsFallback != NULL)
+ if (IsFallback != nullptr)
*IsFallback = DynCGroupMem.isFallback();
return DynCGroupMem.getPtr(Offset);
}
diff --git a/offload/include/omptarget.h b/offload/include/omptarget.h
index 45bb74e..ddb0f7f 100644
--- a/offload/include/omptarget.h
+++ b/offload/include/omptarget.h
@@ -273,15 +273,22 @@ struct __tgt_target_non_contig {
extern "C" {
#endif
-typedef enum {
- omp_access_cgroup = 0,
-} omp_access_t;
+/// The OpenMP access group type. The criterion for grupping tasks using a
+/// specific grouping property.
+enum omp_access_t {
+ /// Groups the tasks based on the contention group to which they belong.
+ omp_access_cgroup = 0,
+ /// Groups the tasks based on the parallel region to which they bind.
+ omp_access_pteam = 1,
+};
void ompx_dump_mapping_tables(void);
int omp_get_num_devices(void);
int omp_get_device_num(void);
int omp_get_initial_device(void);
-size_t omp_get_groupprivate_limit(int device_num, omp_access_t access_group = omp_access_cgroup);
+size_t
+omp_get_groupprivate_limit(int device_num,
+ omp_access_t access_group = omp_access_cgroup);
void *omp_target_alloc(size_t Size, int DeviceNum);
void omp_target_free(void *DevicePtr, int DeviceNum);
int omp_target_is_present(const void *Ptr, int DeviceNum);
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index fa373c2..9751169 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -3441,6 +3441,10 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
KernelArgs.DynCGroupMem);
}
+ // Increase to the requested dynamic memory size for the device if needed.
+ DynBlockMemSize =
+ std::max(DynBlockMemSize, GenericDevice.getDynamicMemorySize());
+
// Push the kernel launch into the stream.
return Stream->pushKernelLaunch(*this, AllArgs, NumThreads, NumBlocks,
getStaticBlockMemSize() + DynBlockMemSize,
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index 2997585..dc66b2c 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -579,12 +579,12 @@ Error GenericKernelTy::launch(GenericDeviceTy &GenericDevice, void **ArgPtrs,
uint32_t TotalBlockMemSize = StaticBlockMemSize + DynBlockMemSize;
if (StaticBlockMemSize > MaxBlockMemSize)
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
- "Static block memory size exceeds maximum");
+ "static block memory size exceeds maximum");
else if (!KernelArgs.Flags.AllowDynCGroupMemFallback &&
TotalBlockMemSize > MaxBlockMemSize)
return Plugin::error(
ErrorCode::INVALID_ARGUMENT,
- "Static and dynamic block memory size exceeds maximum");
+ "static and dynamic block memory size exceeds maximum");
void *FallbackBlockMem = nullptr;
if (DynBlockMemSize && (!GenericDevice.hasNativeBlockSharedMem() ||
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index eda7a85..af192af 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -151,7 +151,7 @@ struct CUDAKernelTy : public GenericKernelTy {
int SharedMemSize;
Res = cuFuncGetAttribute(&SharedMemSize,
CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, Func);
- if (auto Err = Plugin::check(Res, "Error in cuFuncGetAttribute: %s"))
+ if (auto Err = Plugin::check(Res, "error in cuFuncGetAttribute: %s"))
return Err;
StaticBlockMemSize = SharedMemSize;
@@ -1322,6 +1322,10 @@ Error CUDAKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
if (GenericDevice.getRPCServer())
GenericDevice.Plugin.getRPCServer().Thread->notify();
+ // Increase to the requested dynamic memory size for the device if needed.
+ DynBlockMemSize =
+ std::max(DynBlockMemSize, GenericDevice.getDynamicMemorySize());
+
// In case we require more memory than the current limit.
if (DynBlockMemSize >= MaxDynBlockMemSize) {
CUresult AttrResult = cuFuncSetAttribute(
diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp
index 9605bad..3ac62e5 100644
--- a/openmp/runtime/src/kmp_csupport.cpp
+++ b/openmp/runtime/src/kmp_csupport.cpp
@@ -4517,7 +4517,7 @@ void omp_free(void *ptr, omp_allocator_handle_t allocator) {
void *omp_get_dyn_groupprivate_ptr(size_t offset, int *is_fallback,
omp_access_t access_group) {
- if (is_fallback != NULL)
+ if (is_fallback != nullptr)
*is_fallback = 0;
return NULL;
}