Add fixesusers/kevinsala/omp-dyn-groupprivate-rt-pr

author: Kevin Sala <salapenades1@llnl.gov> 2025-08-09 22:50:23 -0700
committer: Kevin Sala <salapenades1@llnl.gov> 2025-08-10 12:16:05 -0700
commit: f2c6f976c58937a1d48bd0fb7f21b9a36d0452d6 (patch)
tree: 44a1f595dfd54386a685e00f3f6ec1a20fea1cad
parent: fa3c7425ae9e5ffea83841f2be61b0f494b99038 (diff)
download: llvm-users/kevinsala/omp-dyn-groupprivate-rt-pr.zip
llvm-users/kevinsala/omp-dyn-groupprivate-rt-pr.tar.gz
llvm-users/kevinsala/omp-dyn-groupprivate-rt-pr.tar.bz2
7 files changed, 38 insertions, 17 deletions
diff --git a/offload/DeviceRTL/include/DeviceTypes.h b/offload/DeviceRTL/include/DeviceTypes.h
index a43b506..042fef4 100644
--- a/offload/DeviceRTL/include/DeviceTypes.h
+++ b/offload/DeviceRTL/include/DeviceTypes.h
@@ -163,8 +163,13 @@ typedef enum omp_allocator_handle_t {
 
 ///}
 
+/// The OpenMP access group type. The criterion for grupping tasks using a
+/// specific grouping property.
 enum omp_access_t {
+  /// Groups the tasks based on the contention group to which they belong.
   omp_access_cgroup = 0,
+  /// Groups the tasks based on the parallel region to which they bind.
+  omp_access_pteam = 1,
 };
 
 #endif
diff --git a/offload/DeviceRTL/src/State.cpp b/offload/DeviceRTL/src/State.cpp
index 9e2a999..c6bc6a1 100644
--- a/offload/DeviceRTL/src/State.cpp
+++ b/offload/DeviceRTL/src/State.cpp
@@ -163,14 +163,15 @@ struct DynCGroupMemTy {
     Size = 0;
     Ptr = nullptr;
     IsFallback = false;
-    if (KLE) {
-      Size = KLE->DynCGroupMemSize;
-      if (void *Fallback = KLE->DynCGroupMemFallback) {
-        Ptr = static_cast<char *>(Fallback) + Size * omp_get_team_num();
-        IsFallback = true;
-      } else {
-        Ptr = static_cast<char *>(NativeDynCGroup);
-      }
+    if (!KLE)
+      return;
+
+    Size = KLE->DynCGroupMemSize;
+    if (void *Fallback = KLE->DynCGroupMemFallback) {
+      Ptr = static_cast<char *>(Fallback) + Size * omp_get_team_num();
+      IsFallback = true;
+    } else {
+      Ptr = static_cast<char *>(NativeDynCGroup);
     }
   }
 
@@ -466,7 +467,7 @@ int omp_is_initial_device(void) { return 0; }
 
 void *omp_get_dyn_groupprivate_ptr(size_t Offset, int *IsFallback,
                                    omp_access_t) {
-  if (IsFallback != NULL)
+  if (IsFallback != nullptr)
     *IsFallback = DynCGroupMem.isFallback();
   return DynCGroupMem.getPtr(Offset);
 }
diff --git a/offload/include/omptarget.h b/offload/include/omptarget.h
index 45bb74e..ddb0f7f 100644
--- a/offload/include/omptarget.h
+++ b/offload/include/omptarget.h
@@ -273,15 +273,22 @@ struct __tgt_target_non_contig {
 extern "C" {
 #endif
 
-typedef enum {
-    omp_access_cgroup = 0,
-} omp_access_t;
+/// The OpenMP access group type. The criterion for grupping tasks using a
+/// specific grouping property.
+enum omp_access_t {
+  /// Groups the tasks based on the contention group to which they belong.
+  omp_access_cgroup = 0,
+  /// Groups the tasks based on the parallel region to which they bind.
+  omp_access_pteam = 1,
+};
 
 void ompx_dump_mapping_tables(void);
 int omp_get_num_devices(void);
 int omp_get_device_num(void);
 int omp_get_initial_device(void);
-size_t omp_get_groupprivate_limit(int device_num, omp_access_t access_group = omp_access_cgroup);
+size_t
+omp_get_groupprivate_limit(int device_num,
+                           omp_access_t access_group = omp_access_cgroup);
 void *omp_target_alloc(size_t Size, int DeviceNum);
 void omp_target_free(void *DevicePtr, int DeviceNum);
 int omp_target_is_present(const void *Ptr, int DeviceNum);
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index fa373c2..9751169 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -3441,6 +3441,10 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
                            KernelArgs.DynCGroupMem);
   }
 
+  // Increase to the requested dynamic memory size for the device if needed.
+  DynBlockMemSize =
+      std::max(DynBlockMemSize, GenericDevice.getDynamicMemorySize());
+
   // Push the kernel launch into the stream.
   return Stream->pushKernelLaunch(*this, AllArgs, NumThreads, NumBlocks,
                                   getStaticBlockMemSize() + DynBlockMemSize,
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index 2997585..dc66b2c 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -579,12 +579,12 @@ Error GenericKernelTy::launch(GenericDeviceTy &GenericDevice, void **ArgPtrs,
   uint32_t TotalBlockMemSize = StaticBlockMemSize + DynBlockMemSize;
   if (StaticBlockMemSize > MaxBlockMemSize)
     return Plugin::error(ErrorCode::INVALID_ARGUMENT,
-                         "Static block memory size exceeds maximum");
+                         "static block memory size exceeds maximum");
   else if (!KernelArgs.Flags.AllowDynCGroupMemFallback &&
            TotalBlockMemSize > MaxBlockMemSize)
     return Plugin::error(
         ErrorCode::INVALID_ARGUMENT,
-        "Static and dynamic block memory size exceeds maximum");
+        "static and dynamic block memory size exceeds maximum");
 
   void *FallbackBlockMem = nullptr;
   if (DynBlockMemSize && (!GenericDevice.hasNativeBlockSharedMem() ||
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index eda7a85..af192af 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -151,7 +151,7 @@ struct CUDAKernelTy : public GenericKernelTy {
     int SharedMemSize;
     Res = cuFuncGetAttribute(&SharedMemSize,
                              CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, Func);
-    if (auto Err = Plugin::check(Res, "Error in cuFuncGetAttribute: %s"))
+    if (auto Err = Plugin::check(Res, "error in cuFuncGetAttribute: %s"))
       return Err;
 
     StaticBlockMemSize = SharedMemSize;
@@ -1322,6 +1322,10 @@ Error CUDAKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
   if (GenericDevice.getRPCServer())
     GenericDevice.Plugin.getRPCServer().Thread->notify();
 
+  // Increase to the requested dynamic memory size for the device if needed.
+  DynBlockMemSize =
+      std::max(DynBlockMemSize, GenericDevice.getDynamicMemorySize());
+
   // In case we require more memory than the current limit.
   if (DynBlockMemSize >= MaxDynBlockMemSize) {
     CUresult AttrResult = cuFuncSetAttribute(
diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp
index 9605bad..3ac62e5 100644
--- a/openmp/runtime/src/kmp_csupport.cpp
+++ b/openmp/runtime/src/kmp_csupport.cpp
@@ -4517,7 +4517,7 @@ void omp_free(void *ptr, omp_allocator_handle_t allocator) {
 
 void *omp_get_dyn_groupprivate_ptr(size_t offset, int *is_fallback,
                                    omp_access_t access_group) {
-  if (is_fallback != NULL)
+  if (is_fallback != nullptr)
     *is_fallback = 0;
   return NULL;
 }
author	Kevin Sala <salapenades1@llnl.gov>	2025-08-09 22:50:23 -0700
committer	Kevin Sala <salapenades1@llnl.gov>	2025-08-10 12:16:05 -0700
commit	f2c6f976c58937a1d48bd0fb7f21b9a36d0452d6 (patch)
tree	44a1f595dfd54386a685e00f3f6ec1a20fea1cad
parent	fa3c7425ae9e5ffea83841f2be61b0f494b99038 (diff)
download	llvm-users/kevinsala/omp-dyn-groupprivate-rt-pr.zip llvm-users/kevinsala/omp-dyn-groupprivate-rt-pr.tar.gz llvm-users/kevinsala/omp-dyn-groupprivate-rt-pr.tar.bz2