diff options
author | Ross Brunton <ross@codeplay.com> | 2025-08-07 10:16:33 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-08-07 10:16:33 +0100 |
commit | a44532544bd96c68ce2bc885d0cc0c4c9116f8b1 (patch) | |
tree | 7c930ddc0b2b4bfe3332588ee3a096216f08101c | |
parent | b9e133d5b6e41b652ba579bcb8850c00f72d0f01 (diff) | |
download | llvm-a44532544bd96c68ce2bc885d0cc0c4c9116f8b1.zip llvm-a44532544bd96c68ce2bc885d0cc0c4c9116f8b1.tar.gz llvm-a44532544bd96c68ce2bc885d0cc0c4c9116f8b1.tar.bz2 |
[Offload] Don't create events for empty queues (#152304)
Add a device function to check if a device queue is empty. If liboffload
tries to create an event for an empty queue, we create an "empty" event
that is already complete.
This allows `olCreateEvent`, `olSyncEvent` and `olWaitEvent` to run
quickly for empty queues.
-rw-r--r-- | offload/liboffload/src/OffloadImpl.cpp | 24 | ||||
-rw-r--r-- | offload/plugins-nextgen/amdgpu/src/rtl.cpp | 11 | ||||
-rw-r--r-- | offload/plugins-nextgen/common/include/PluginInterface.h | 8 | ||||
-rw-r--r-- | offload/plugins-nextgen/common/src/PluginInterface.cpp | 15 | ||||
-rw-r--r-- | offload/plugins-nextgen/cuda/src/rtl.cpp | 5 | ||||
-rw-r--r-- | offload/plugins-nextgen/host/src/rtl.cpp | 3 |
6 files changed, 62 insertions, 4 deletions
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp index 6486b2b..272a12a 100644 --- a/offload/liboffload/src/OffloadImpl.cpp +++ b/offload/liboffload/src/OffloadImpl.cpp @@ -72,6 +72,8 @@ struct ol_queue_impl_t { struct ol_event_impl_t { ol_event_impl_t(void *EventInfo, ol_queue_handle_t Queue) : EventInfo(EventInfo), Queue(Queue) {} + // EventInfo may be null, in which case the event should be considered always + // complete void *EventInfo; ol_queue_handle_t Queue; }; @@ -509,8 +511,8 @@ Error olWaitEvents_impl(ol_queue_handle_t Queue, ol_event_handle_t *Events, return Plugin::error(ErrorCode::INVALID_NULL_HANDLE, "olWaitEvents asked to wait on a NULL event"); - // Do nothing if the event is for this queue - if (Event->Queue == Queue) + // Do nothing if the event is for this queue or the event is always complete + if (Event->Queue == Queue || !Event->EventInfo) continue; if (auto Err = Device->waitEvent(Event->EventInfo, Queue->AsyncInfo)) @@ -548,6 +550,10 @@ Error olGetQueueInfoSize_impl(ol_queue_handle_t Queue, ol_queue_info_t PropName, } Error olSyncEvent_impl(ol_event_handle_t Event) { + if (!Event->EventInfo) + // Event always complete + return Plugin::success(); + if (auto Res = Event->Queue->Device->Device->syncEvent(Event->EventInfo)) return Res; @@ -555,8 +561,9 @@ Error olSyncEvent_impl(ol_event_handle_t Event) { } Error olDestroyEvent_impl(ol_event_handle_t Event) { - if (auto Res = Event->Queue->Device->Device->destroyEvent(Event->EventInfo)) - return Res; + if (Event->EventInfo) + if (auto Res = Event->Queue->Device->Device->destroyEvent(Event->EventInfo)) + return Res; return olDestroy(Event); } @@ -590,7 +597,16 @@ Error olGetEventInfoSize_impl(ol_event_handle_t Event, ol_event_info_t PropName, } Error olCreateEvent_impl(ol_queue_handle_t Queue, ol_event_handle_t *EventOut) { + auto Pending = Queue->Device->Device->hasPendingWork(Queue->AsyncInfo); + if (auto Err = Pending.takeError()) + return Err; + *EventOut = new ol_event_impl_t(nullptr, Queue); + if (!*Pending) + // Queue is empty, don't record an event and consider the event always + // complete + return Plugin::success(); + if (auto Res = Queue->Device->Device->createEvent(&(*EventOut)->EventInfo)) return Res; diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index 82deeb4..852c0e9 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -2591,6 +2591,17 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { return Event->wait(*Stream); } + Expected<bool> hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfo) override { + auto Stream = AsyncInfo.getQueueAs<AMDGPUStreamTy *>(); + if (!Stream) + return false; + + auto Query = Stream->query(); + if (Query) + return !*Query; + return Query.takeError(); + } + /// Synchronize the current thread with the event. Error syncEventImpl(void *EventPtr) override { AMDGPUEventTy *Event = reinterpret_cast<AMDGPUEventTy *>(EventPtr); diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index 4b7d410..1d64193 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -973,6 +973,14 @@ struct GenericDeviceTy : public DeviceAllocatorTy { Error printInfo(); virtual Expected<InfoTreeNode> obtainInfoImpl() = 0; + /// Return true if the device has work that is either queued or currently + /// running + /// + /// Devices which cannot report this information should always return true + Expected<bool> hasPendingWork(__tgt_async_info *AsyncInfo); + virtual Expected<bool> + hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfoWrapper) = 0; + /// Getters of the grid values. uint32_t getWarpSize() const { return GridValues.GV_Warp_Size; } uint32_t getThreadLimit() const { return GridValues.GV_Max_WG_Size; } diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp index 0a57046..bcc9179 100644 --- a/offload/plugins-nextgen/common/src/PluginInterface.cpp +++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp @@ -1626,6 +1626,21 @@ Error GenericDeviceTy::waitEvent(void *EventPtr, __tgt_async_info *AsyncInfo) { return Err; } +Expected<bool> GenericDeviceTy::hasPendingWork(__tgt_async_info *AsyncInfo) { + AsyncInfoWrapperTy AsyncInfoWrapper(*this, AsyncInfo); + auto Res = hasPendingWorkImpl(AsyncInfoWrapper); + if (auto Err = Res.takeError()) { + AsyncInfoWrapper.finalize(Err); + return Err; + } + + auto Err = Plugin::success(); + AsyncInfoWrapper.finalize(Err); + if (Err) + return Err; + return Res; +} + Error GenericDeviceTy::syncEvent(void *EventPtr) { return syncEventImpl(EventPtr); } diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp index c5f3167..7649fd9 100644 --- a/offload/plugins-nextgen/cuda/src/rtl.cpp +++ b/offload/plugins-nextgen/cuda/src/rtl.cpp @@ -916,6 +916,11 @@ struct CUDADeviceTy : public GenericDeviceTy { return Plugin::check(Res, "error in cuStreamWaitEvent: %s"); } + // TODO: This should be implementable on CUDA + Expected<bool> hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfo) override { + return true; + } + /// Synchronize the current thread with the event. Error syncEventImpl(void *EventPtr) override { CUevent Event = reinterpret_cast<CUevent>(EventPtr); diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp index d950572..9abc350 100644 --- a/offload/plugins-nextgen/host/src/rtl.cpp +++ b/offload/plugins-nextgen/host/src/rtl.cpp @@ -333,6 +333,9 @@ struct GenELF64DeviceTy : public GenericDeviceTy { AsyncInfoWrapperTy &AsyncInfoWrapper) override { return Plugin::success(); } + Expected<bool> hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfo) override { + return true; + } Error syncEventImpl(void *EventPtr) override { return Plugin::success(); } /// Print information about the device. |