diff options
author | Kevin Sala <salapenades1@llnl.gov> | 2025-08-09 22:50:23 -0700 |
---|---|---|
committer | Kevin Sala <salapenades1@llnl.gov> | 2025-08-10 12:16:05 -0700 |
commit | f2c6f976c58937a1d48bd0fb7f21b9a36d0452d6 (patch) | |
tree | 44a1f595dfd54386a685e00f3f6ec1a20fea1cad | |
parent | fa3c7425ae9e5ffea83841f2be61b0f494b99038 (diff) | |
download | llvm-users/kevinsala/omp-dyn-groupprivate-rt-pr.zip llvm-users/kevinsala/omp-dyn-groupprivate-rt-pr.tar.gz llvm-users/kevinsala/omp-dyn-groupprivate-rt-pr.tar.bz2 |
-rw-r--r-- | offload/DeviceRTL/include/DeviceTypes.h | 5 | ||||
-rw-r--r-- | offload/DeviceRTL/src/State.cpp | 19 | ||||
-rw-r--r-- | offload/include/omptarget.h | 15 | ||||
-rw-r--r-- | offload/plugins-nextgen/amdgpu/src/rtl.cpp | 4 | ||||
-rw-r--r-- | offload/plugins-nextgen/common/src/PluginInterface.cpp | 4 | ||||
-rw-r--r-- | offload/plugins-nextgen/cuda/src/rtl.cpp | 6 | ||||
-rw-r--r-- | openmp/runtime/src/kmp_csupport.cpp | 2 |
7 files changed, 38 insertions, 17 deletions
diff --git a/offload/DeviceRTL/include/DeviceTypes.h b/offload/DeviceRTL/include/DeviceTypes.h index a43b506..042fef4 100644 --- a/offload/DeviceRTL/include/DeviceTypes.h +++ b/offload/DeviceRTL/include/DeviceTypes.h @@ -163,8 +163,13 @@ typedef enum omp_allocator_handle_t { ///} +/// The OpenMP access group type. The criterion for grupping tasks using a +/// specific grouping property. enum omp_access_t { + /// Groups the tasks based on the contention group to which they belong. omp_access_cgroup = 0, + /// Groups the tasks based on the parallel region to which they bind. + omp_access_pteam = 1, }; #endif diff --git a/offload/DeviceRTL/src/State.cpp b/offload/DeviceRTL/src/State.cpp index 9e2a999..c6bc6a1 100644 --- a/offload/DeviceRTL/src/State.cpp +++ b/offload/DeviceRTL/src/State.cpp @@ -163,14 +163,15 @@ struct DynCGroupMemTy { Size = 0; Ptr = nullptr; IsFallback = false; - if (KLE) { - Size = KLE->DynCGroupMemSize; - if (void *Fallback = KLE->DynCGroupMemFallback) { - Ptr = static_cast<char *>(Fallback) + Size * omp_get_team_num(); - IsFallback = true; - } else { - Ptr = static_cast<char *>(NativeDynCGroup); - } + if (!KLE) + return; + + Size = KLE->DynCGroupMemSize; + if (void *Fallback = KLE->DynCGroupMemFallback) { + Ptr = static_cast<char *>(Fallback) + Size * omp_get_team_num(); + IsFallback = true; + } else { + Ptr = static_cast<char *>(NativeDynCGroup); } } @@ -466,7 +467,7 @@ int omp_is_initial_device(void) { return 0; } void *omp_get_dyn_groupprivate_ptr(size_t Offset, int *IsFallback, omp_access_t) { - if (IsFallback != NULL) + if (IsFallback != nullptr) *IsFallback = DynCGroupMem.isFallback(); return DynCGroupMem.getPtr(Offset); } diff --git a/offload/include/omptarget.h b/offload/include/omptarget.h index 45bb74e..ddb0f7f 100644 --- a/offload/include/omptarget.h +++ b/offload/include/omptarget.h @@ -273,15 +273,22 @@ struct __tgt_target_non_contig { extern "C" { #endif -typedef enum { - omp_access_cgroup = 0, -} omp_access_t; +/// The OpenMP access group type. The criterion for grupping tasks using a +/// specific grouping property. +enum omp_access_t { + /// Groups the tasks based on the contention group to which they belong. + omp_access_cgroup = 0, + /// Groups the tasks based on the parallel region to which they bind. + omp_access_pteam = 1, +}; void ompx_dump_mapping_tables(void); int omp_get_num_devices(void); int omp_get_device_num(void); int omp_get_initial_device(void); -size_t omp_get_groupprivate_limit(int device_num, omp_access_t access_group = omp_access_cgroup); +size_t +omp_get_groupprivate_limit(int device_num, + omp_access_t access_group = omp_access_cgroup); void *omp_target_alloc(size_t Size, int DeviceNum); void omp_target_free(void *DevicePtr, int DeviceNum); int omp_target_is_present(const void *Ptr, int DeviceNum); diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index fa373c2..9751169 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -3441,6 +3441,10 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice, KernelArgs.DynCGroupMem); } + // Increase to the requested dynamic memory size for the device if needed. + DynBlockMemSize = + std::max(DynBlockMemSize, GenericDevice.getDynamicMemorySize()); + // Push the kernel launch into the stream. return Stream->pushKernelLaunch(*this, AllArgs, NumThreads, NumBlocks, getStaticBlockMemSize() + DynBlockMemSize, diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp index 2997585..dc66b2c 100644 --- a/offload/plugins-nextgen/common/src/PluginInterface.cpp +++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp @@ -579,12 +579,12 @@ Error GenericKernelTy::launch(GenericDeviceTy &GenericDevice, void **ArgPtrs, uint32_t TotalBlockMemSize = StaticBlockMemSize + DynBlockMemSize; if (StaticBlockMemSize > MaxBlockMemSize) return Plugin::error(ErrorCode::INVALID_ARGUMENT, - "Static block memory size exceeds maximum"); + "static block memory size exceeds maximum"); else if (!KernelArgs.Flags.AllowDynCGroupMemFallback && TotalBlockMemSize > MaxBlockMemSize) return Plugin::error( ErrorCode::INVALID_ARGUMENT, - "Static and dynamic block memory size exceeds maximum"); + "static and dynamic block memory size exceeds maximum"); void *FallbackBlockMem = nullptr; if (DynBlockMemSize && (!GenericDevice.hasNativeBlockSharedMem() || diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp index eda7a85..af192af 100644 --- a/offload/plugins-nextgen/cuda/src/rtl.cpp +++ b/offload/plugins-nextgen/cuda/src/rtl.cpp @@ -151,7 +151,7 @@ struct CUDAKernelTy : public GenericKernelTy { int SharedMemSize; Res = cuFuncGetAttribute(&SharedMemSize, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, Func); - if (auto Err = Plugin::check(Res, "Error in cuFuncGetAttribute: %s")) + if (auto Err = Plugin::check(Res, "error in cuFuncGetAttribute: %s")) return Err; StaticBlockMemSize = SharedMemSize; @@ -1322,6 +1322,10 @@ Error CUDAKernelTy::launchImpl(GenericDeviceTy &GenericDevice, if (GenericDevice.getRPCServer()) GenericDevice.Plugin.getRPCServer().Thread->notify(); + // Increase to the requested dynamic memory size for the device if needed. + DynBlockMemSize = + std::max(DynBlockMemSize, GenericDevice.getDynamicMemorySize()); + // In case we require more memory than the current limit. if (DynBlockMemSize >= MaxDynBlockMemSize) { CUresult AttrResult = cuFuncSetAttribute( diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp index 9605bad..3ac62e5 100644 --- a/openmp/runtime/src/kmp_csupport.cpp +++ b/openmp/runtime/src/kmp_csupport.cpp @@ -4517,7 +4517,7 @@ void omp_free(void *ptr, omp_allocator_handle_t allocator) { void *omp_get_dyn_groupprivate_ptr(size_t offset, int *is_fallback, omp_access_t access_group) { - if (is_fallback != NULL) + if (is_fallback != nullptr) *is_fallback = 0; return NULL; } |