aboutsummaryrefslogtreecommitdiff
path: root/offload/plugins-nextgen/amdgpu/src/rtl.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'offload/plugins-nextgen/amdgpu/src/rtl.cpp')
-rw-r--r--offload/plugins-nextgen/amdgpu/src/rtl.cpp45
1 files changed, 45 insertions, 0 deletions
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index b7bfa89..852c0e9 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2591,6 +2591,17 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
return Event->wait(*Stream);
}
+ Expected<bool> hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfo) override {
+ auto Stream = AsyncInfo.getQueueAs<AMDGPUStreamTy *>();
+ if (!Stream)
+ return false;
+
+ auto Query = Stream->query();
+ if (Query)
+ return !*Query;
+ return Query.takeError();
+ }
+
/// Synchronize the current thread with the event.
Error syncEventImpl(void *EventPtr) override {
AMDGPUEventTy *Event = reinterpret_cast<AMDGPUEventTy *>(EventPtr);
@@ -2945,6 +2956,40 @@ private:
return Plugin::success();
}
+ bool checkIfCoarseGrainMemoryNearOrAbove64GB() {
+ for (AMDGPUMemoryPoolTy *Pool : AllMemoryPools) {
+ if (!Pool->isGlobal() || !Pool->isCoarseGrained())
+ continue;
+ uint64_t Value;
+ hsa_status_t Status =
+ Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_SIZE, Value);
+ if (Status != HSA_STATUS_SUCCESS)
+ continue;
+ constexpr uint64_t Almost64Gig = 0xFF0000000;
+ if (Value >= Almost64Gig)
+ return true;
+ }
+ return false; // CoarseGrain pool w/ 64GB or more capacity not found
+ }
+
+ size_t getMemoryManagerSizeThreshold() override {
+ // Targeting high memory capacity GPUs such as
+ // data center GPUs.
+ if (checkIfCoarseGrainMemoryNearOrAbove64GB()) {
+ // Set GenericDeviceTy::MemoryManager's Threshold to 3GiB,
+ // if threshold is not already set by ENV var
+ // LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD.
+ // This MemoryManager is used for omp_target_alloc(), OpenMP
+ // (non-usm) map clause, etc.
+ //
+ // Ideally, this kind of pooling is best performed at
+ // a common level (e.g, user side of HSA) between OpenMP and HIP
+ // but that feature does not exist (yet).
+ return 3ul * 1024 * 1024 * 1024 /* 3 GiB */;
+ }
+ return 0;
+ }
+
/// Envar for controlling the number of HSA queues per device. High number of
/// queues may degrade performance.
UInt32Envar OMPX_NumQueues;