diff options
Diffstat (limited to 'offload/plugins-nextgen/amdgpu/src/rtl.cpp')
-rw-r--r-- | offload/plugins-nextgen/amdgpu/src/rtl.cpp | 70 |
1 files changed, 57 insertions, 13 deletions
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index b7bfa89..7961820 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -2232,16 +2232,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { /// Get the stream of the asynchronous info structure or get a new one. Error getStream(AsyncInfoWrapperTy &AsyncInfoWrapper, AMDGPUStreamTy *&Stream) { - // Get the stream (if any) from the async info. - Stream = AsyncInfoWrapper.getQueueAs<AMDGPUStreamTy *>(); - if (!Stream) { - // There was no stream; get an idle one. - if (auto Err = AMDGPUStreamManager.getResource(Stream)) - return Err; - - // Modify the async info's stream. - AsyncInfoWrapper.setQueueAs<AMDGPUStreamTy *>(Stream); - } + auto WrapperStream = + AsyncInfoWrapper.getOrInitQueue<AMDGPUStreamTy *>(AMDGPUStreamManager); + if (!WrapperStream) + return WrapperStream.takeError(); + Stream = *WrapperStream; return Plugin::success(); } @@ -2296,7 +2291,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { } /// Synchronize current thread with the pending operations on the async info. - Error synchronizeImpl(__tgt_async_info &AsyncInfo) override { + Error synchronizeImpl(__tgt_async_info &AsyncInfo, + bool ReleaseQueue) override { AMDGPUStreamTy *Stream = reinterpret_cast<AMDGPUStreamTy *>(AsyncInfo.Queue); assert(Stream && "Invalid stream"); @@ -2307,8 +2303,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { // Once the stream is synchronized, return it to stream pool and reset // AsyncInfo. This is to make sure the synchronization only works for its // own tasks. - AsyncInfo.Queue = nullptr; - return AMDGPUStreamManager.returnResource(Stream); + if (ReleaseQueue) { + AsyncInfo.Queue = nullptr; + return AMDGPUStreamManager.returnResource(Stream); + } + return Plugin::success(); } /// Query for the completion of the pending operations on the async info. @@ -2591,6 +2590,17 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { return Event->wait(*Stream); } + Expected<bool> hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfo) override { + auto Stream = AsyncInfo.getQueueAs<AMDGPUStreamTy *>(); + if (!Stream) + return false; + + auto Query = Stream->query(); + if (Query) + return !*Query; + return Query.takeError(); + } + /// Synchronize the current thread with the event. Error syncEventImpl(void *EventPtr) override { AMDGPUEventTy *Event = reinterpret_cast<AMDGPUEventTy *>(EventPtr); @@ -2945,6 +2955,40 @@ private: return Plugin::success(); } + bool checkIfCoarseGrainMemoryNearOrAbove64GB() { + for (AMDGPUMemoryPoolTy *Pool : AllMemoryPools) { + if (!Pool->isGlobal() || !Pool->isCoarseGrained()) + continue; + uint64_t Value; + hsa_status_t Status = + Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_SIZE, Value); + if (Status != HSA_STATUS_SUCCESS) + continue; + constexpr uint64_t Almost64Gig = 0xFF0000000; + if (Value >= Almost64Gig) + return true; + } + return false; // CoarseGrain pool w/ 64GB or more capacity not found + } + + size_t getMemoryManagerSizeThreshold() override { + // Targeting high memory capacity GPUs such as + // data center GPUs. + if (checkIfCoarseGrainMemoryNearOrAbove64GB()) { + // Set GenericDeviceTy::MemoryManager's Threshold to 3GiB, + // if threshold is not already set by ENV var + // LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD. + // This MemoryManager is used for omp_target_alloc(), OpenMP + // (non-usm) map clause, etc. + // + // Ideally, this kind of pooling is best performed at + // a common level (e.g, user side of HSA) between OpenMP and HIP + // but that feature does not exist (yet). + return 3ul * 1024 * 1024 * 1024 /* 3 GiB */; + } + return 0; + } + /// Envar for controlling the number of HSA queues per device. High number of /// queues may degrade performance. UInt32Envar OMPX_NumQueues; |