aboutsummaryrefslogtreecommitdiff
path: root/libc
diff options
context:
space:
mode:
authorJoseph Huber <jhuber6@vols.utk.edu>2023-03-10 16:48:53 -0600
committerJoseph Huber <jhuber6@vols.utk.edu>2023-03-17 12:55:31 -0500
commit8e4f9b1fcbfd5e747b0df9a2de511b43bfa13348 (patch)
tree4ab2a512d0d60cdd064638424f7852ed5b64f719 /libc
parent8c040d0f4941d3557affda3550428b8712b69c92 (diff)
downloadllvm-8e4f9b1fcbfd5e747b0df9a2de511b43bfa13348.zip
llvm-8e4f9b1fcbfd5e747b0df9a2de511b43bfa13348.tar.gz
llvm-8e4f9b1fcbfd5e747b0df9a2de511b43bfa13348.tar.bz2
[libc] Add initial support for an RPC mechanism for the GPU
This patch adds initial support for an RPC client / server architecture. The GPU is unable to perform several system utilities on its own, so in order to implement features like printing or memory allocation we need to be able to communicate with the executing process. This is done via a buffer of "sharable" memory. That is, a buffer with a unified pointer that both the client and server can use to communicate. The implementation here is based off of Jon Chesterfields minimal RPC example in his work. We use an `inbox` and `outbox` to communicate between if there is an RPC request and to signify when work is done. We use a fixed-size buffer for the communication channel. This is fixed size so that we can ensure that there is enough space for all compute-units on the GPU to issue work to any of the ports. Right now the implementation is single threaded so there is only a single buffer that is not shared. This implementation still has several features missing to be complete. Such as multi-threaded support and asynchrnonous calls. Depends on D145912 Reviewed By: sivachandra Differential Revision: https://reviews.llvm.org/D145913
Diffstat (limited to 'libc')
-rw-r--r--libc/src/__support/CMakeLists.txt1
-rw-r--r--libc/src/__support/OSUtil/CMakeLists.txt29
-rw-r--r--libc/src/__support/OSUtil/gpu/CMakeLists.txt3
-rw-r--r--libc/src/__support/OSUtil/gpu/io.cpp29
-rw-r--r--libc/src/__support/OSUtil/gpu/io.h18
-rw-r--r--libc/src/__support/OSUtil/gpu/quick_exit.cpp10
-rw-r--r--libc/src/__support/OSUtil/io.h6
-rw-r--r--libc/src/__support/RPC/CMakeLists.txt18
-rw-r--r--libc/src/__support/RPC/rpc.h140
-rw-r--r--libc/src/__support/RPC/rpc_client.cpp27
-rw-r--r--libc/src/__support/RPC/rpc_client.h23
-rw-r--r--libc/startup/gpu/amdgpu/CMakeLists.txt3
-rw-r--r--libc/startup/gpu/amdgpu/start.cpp6
-rw-r--r--libc/utils/gpu/loader/amdgpu/CMakeLists.txt3
-rw-r--r--libc/utils/gpu/loader/amdgpu/Loader.cpp66
15 files changed, 365 insertions, 17 deletions
diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt
index 8329a80..e4eb354 100644
--- a/libc/src/__support/CMakeLists.txt
+++ b/libc/src/__support/CMakeLists.txt
@@ -204,6 +204,7 @@ add_header_library(
add_subdirectory(FPUtil)
add_subdirectory(OSUtil)
add_subdirectory(StringUtil)
+add_subdirectory(RPC)
# Thread support is used by other "File". So, we add the "threads"
# before "File".
diff --git a/libc/src/__support/OSUtil/CMakeLists.txt b/libc/src/__support/OSUtil/CMakeLists.txt
index 50aad32..c196775 100644
--- a/libc/src/__support/OSUtil/CMakeLists.txt
+++ b/libc/src/__support/OSUtil/CMakeLists.txt
@@ -8,12 +8,23 @@ if(NOT TARGET ${target_os_util})
return()
endif()
-add_header_library(
- osutil
- HDRS
- io.h
- quick_exit.h
- syscall.h
- DEPENDS
- ${target_os_util}
-)
+# The OSUtil is an object library in GPU mode.
+if(NOT LIBC_TARGET_ARCHITECTURE_IS_GPU)
+ add_header_library(
+ osutil
+ HDRS
+ io.h
+ quick_exit.h
+ syscall.h
+ DEPENDS
+ ${target_os_util}
+ )
+else()
+ add_object_library(
+ osutil
+ ALIAS
+ ${target_os_util}
+ DEPENDS
+ ${target_os_util}
+ )
+endif()
diff --git a/libc/src/__support/OSUtil/gpu/CMakeLists.txt b/libc/src/__support/OSUtil/gpu/CMakeLists.txt
index eb6e86f..d1aa696 100644
--- a/libc/src/__support/OSUtil/gpu/CMakeLists.txt
+++ b/libc/src/__support/OSUtil/gpu/CMakeLists.txt
@@ -2,8 +2,11 @@ add_object_library(
gpu_util
SRCS
quick_exit.cpp
+ io.cpp
HDRS
quick_exit.h
+ io.h
DEPENDS
libc.src.__support.common
+ libc.src.__support.RPC.rpc_client
)
diff --git a/libc/src/__support/OSUtil/gpu/io.cpp b/libc/src/__support/OSUtil/gpu/io.cpp
new file mode 100644
index 0000000..75ac83a
--- /dev/null
+++ b/libc/src/__support/OSUtil/gpu/io.cpp
@@ -0,0 +1,29 @@
+//===-------------- GPU implementation of IO utils --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "io.h"
+
+#include "src/__support/RPC/rpc_client.h"
+#include "src/string/string_utils.h"
+
+namespace __llvm_libc {
+
+void write_to_stderr(const char *msg) {
+ uint64_t length = internal::string_length(msg) + 1;
+ uint64_t buffer_len = sizeof(rpc::Buffer) - sizeof(uint64_t);
+ for (uint64_t i = 0; i < length; i += buffer_len)
+ rpc::client.run(
+ [&](rpc::Buffer *buffer) {
+ buffer->data[0] = rpc::Opcode::PRINT_TO_STDERR;
+ inline_memcpy(reinterpret_cast<char *>(&buffer->data[1]), &msg[i],
+ (length > buffer_len ? buffer_len : length));
+ },
+ [](rpc::Buffer *) {});
+}
+
+} // namespace __llvm_libc
diff --git a/libc/src/__support/OSUtil/gpu/io.h b/libc/src/__support/OSUtil/gpu/io.h
new file mode 100644
index 0000000..e9a4ebf
--- /dev/null
+++ b/libc/src/__support/OSUtil/gpu/io.h
@@ -0,0 +1,18 @@
+//===-------------- GPU implementation of IO utils --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_SUPPORT_OSUTIL_GPU_IO_H
+#define LLVM_LIBC_SRC_SUPPORT_OSUTIL_GPU_IO_H
+
+namespace __llvm_libc {
+
+void write_to_stderr(const char *msg);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_SUPPORT_OSUTIL_LINUX_IO_H
diff --git a/libc/src/__support/OSUtil/gpu/quick_exit.cpp b/libc/src/__support/OSUtil/gpu/quick_exit.cpp
index d8c8f03..9be7095 100644
--- a/libc/src/__support/OSUtil/gpu/quick_exit.cpp
+++ b/libc/src/__support/OSUtil/gpu/quick_exit.cpp
@@ -11,11 +11,21 @@
#include "quick_exit.h"
+#include "src/__support/RPC/rpc_client.h"
#include "src/__support/macros/properties/architectures.h"
namespace __llvm_libc {
void quick_exit(int status) {
+ // TODO: Support asynchronous calls so we don't wait and exit from the GPU
+ // immediately.
+ rpc::client.run(
+ [&](rpc::Buffer *buffer) {
+ buffer->data[0] = rpc::Opcode::EXIT;
+ buffer->data[1] = status;
+ },
+ [](rpc::Buffer *) {});
+
#if defined(LIBC_TARGET_ARCH_IS_NVPTX)
asm("exit" ::: "memory");
#elif defined(LIBC_TARGET_ARCH_IS_AMDGPU)
diff --git a/libc/src/__support/OSUtil/io.h b/libc/src/__support/OSUtil/io.h
index dbf92dc..e2eee08 100644
--- a/libc/src/__support/OSUtil/io.h
+++ b/libc/src/__support/OSUtil/io.h
@@ -9,7 +9,11 @@
#ifndef LLVM_LIBC_SRC_SUPPORT_OSUTIL_IO_H
#define LLVM_LIBC_SRC_SUPPORT_OSUTIL_IO_H
-#ifdef __unix__
+#include "src/__support/macros/properties/architectures.h"
+
+#if defined(LIBC_TARGET_ARCH_IS_GPU)
+#include "gpu/io.h"
+#elif defined(__unix__)
#include "linux/io.h"
#endif
diff --git a/libc/src/__support/RPC/CMakeLists.txt b/libc/src/__support/RPC/CMakeLists.txt
new file mode 100644
index 0000000..f583762
--- /dev/null
+++ b/libc/src/__support/RPC/CMakeLists.txt
@@ -0,0 +1,18 @@
+add_header_library(
+ rpc
+ HDRS
+ rpc.h
+ DEPENDS
+ libc.src.__support.common
+ libc.src.__support.CPP.atomic
+)
+
+add_object_library(
+ rpc_client
+ SRCS
+ rpc_client.cpp
+ HDRS
+ rpc_client.h
+ DEPENDS
+ .rpc
+)
diff --git a/libc/src/__support/RPC/rpc.h b/libc/src/__support/RPC/rpc.h
new file mode 100644
index 0000000..c3df09e
--- /dev/null
+++ b/libc/src/__support/RPC/rpc.h
@@ -0,0 +1,140 @@
+//===-- Shared memory RPC client / server interface -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_SUPPORT_RPC_RPC_H
+#define LLVM_LIBC_SRC_SUPPORT_RPC_RPC_H
+
+#include "src/__support/CPP/atomic.h"
+
+#include <stdint.h>
+
+namespace __llvm_libc {
+namespace rpc {
+
+/// A list of opcodes that we use to invoke certain actions on the server. We
+/// reserve the first 255 values for internal libc usage.
+enum Opcode : uint64_t {
+ NOOP = 0,
+ PRINT_TO_STDERR = 1,
+ EXIT = 2,
+ LIBC_LAST = (1UL << 8) - 1,
+};
+
+/// A fixed size channel used to communicate between the RPC client and server.
+struct Buffer {
+ uint64_t data[8];
+};
+
+/// A common process used to synchronize communication between a client and a
+/// server. The process contains an inbox and an outbox used for signaling
+/// ownership of the shared buffer.
+struct Process {
+ cpp::Atomic<uint32_t> *inbox;
+ cpp::Atomic<uint32_t> *outbox;
+ Buffer *buffer;
+
+ /// Initialize the communication channels.
+ void reset(void *inbox, void *outbox, void *buffer) {
+ *this = {
+ reinterpret_cast<cpp::Atomic<uint32_t> *>(inbox),
+ reinterpret_cast<cpp::Atomic<uint32_t> *>(outbox),
+ reinterpret_cast<Buffer *>(buffer),
+ };
+ }
+};
+
+/// The RPC client used to make requests to the server.
+struct Client : public Process {
+ template <typename F, typename U> void run(F fill, U use);
+};
+
+/// The RPC server used to respond to the client.
+struct Server : public Process {
+ template <typename W, typename C> bool run(W work, C clean);
+};
+
+/// Run the RPC client protocol to communicate with the server. We perform the
+/// following high level actions to complete a communication:
+/// - Apply \p fill to the shared buffer and write 1 to the outbox.
+/// - Wait until the inbox is 1.
+/// - Apply \p use to the shared buffer and write 0 to the outbox.
+/// - Wait until the inbox is 0.
+template <typename F, typename U> void Client::run(F fill, U use) {
+ bool in = inbox->load(cpp::MemoryOrder::RELAXED);
+ bool out = outbox->load(cpp::MemoryOrder::RELAXED);
+ atomic_thread_fence(cpp::MemoryOrder::ACQUIRE);
+ // Write to buffer then to the outbox.
+ if (!in & !out) {
+ fill(buffer);
+ atomic_thread_fence(cpp::MemoryOrder::RELEASE);
+ outbox->store(1, cpp::MemoryOrder::RELEASE);
+ out = 1;
+ }
+ // Wait for the result from the server.
+ if (!in & out) {
+ while (!in)
+ in = inbox->load(cpp::MemoryOrder::RELAXED);
+ atomic_thread_fence(cpp::MemoryOrder::ACQUIRE);
+ }
+ // Read from the buffer and then write to outbox.
+ if (in & out) {
+ use(buffer);
+ atomic_thread_fence(cpp::MemoryOrder::RELEASE);
+ outbox->store(0, cpp::MemoryOrder::RELEASE);
+ out = 0;
+ }
+ // Wait for server to complete the communication.
+ if (in & !out) {
+ while (in)
+ in = inbox->load(cpp::MemoryOrder::RELAXED);
+ atomic_thread_fence(cpp::MemoryOrder::ACQUIRE);
+ }
+}
+
+/// Run the RPC server protocol to communicate with the client. This is
+/// non-blocking and only checks the server a single time. We perform the
+/// following high level actions to complete a communication:
+/// - Query if the inbox is 1 and exit if there is no work to do.
+/// - Apply \p work to the shared buffer and write 1 to the outbox.
+/// - Wait until the inbox is 0.
+/// - Apply \p clean to the shared buffer and write 0 to the outbox.
+template <typename W, typename C> bool Server::run(W work, C clean) {
+ bool in = inbox->load(cpp::MemoryOrder::RELAXED);
+ bool out = outbox->load(cpp::MemoryOrder::RELAXED);
+ atomic_thread_fence(cpp::MemoryOrder::ACQUIRE);
+ // No work to do, exit.
+ if (!in & !out)
+ return false;
+ // Do work then write to the outbox.
+ if (in & !out) {
+ work(buffer);
+ atomic_thread_fence(cpp::MemoryOrder::RELEASE);
+ outbox->store(1, cpp::MemoryOrder::RELEASE);
+ out = 1;
+ }
+ // Wait for the client to read the result.
+ if (in & out) {
+ while (in)
+ in = inbox->load(cpp::MemoryOrder::RELAXED);
+ atomic_thread_fence(cpp::MemoryOrder::ACQUIRE);
+ }
+ // Clean up the buffer and signal the client.
+ if (!in & out) {
+ clean(buffer);
+ atomic_thread_fence(cpp::MemoryOrder::RELEASE);
+ outbox->store(0, cpp::MemoryOrder::RELEASE);
+ out = 0;
+ }
+
+ return true;
+}
+
+} // namespace rpc
+} // namespace __llvm_libc
+
+#endif
diff --git a/libc/src/__support/RPC/rpc_client.cpp b/libc/src/__support/RPC/rpc_client.cpp
new file mode 100644
index 0000000..3e64fe5
--- /dev/null
+++ b/libc/src/__support/RPC/rpc_client.cpp
@@ -0,0 +1,27 @@
+//===-- Shared memory RPC client instantiation ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_SUPPORT_RPC_RPC_CLIENT_H
+#define LLVM_LIBC_SRC_SUPPORT_RPC_RPC_CLIENT_H
+
+#include "rpc.h"
+
+namespace __llvm_libc {
+namespace rpc {
+
+/// The libc client instance used to communicate with the server.
+Client client;
+
+/// Externally visible symbol to signify the usage of an RPC client to
+/// whomever needs to run the server.
+extern "C" [[gnu::visibility("protected")]] const bool __llvm_libc_rpc = false;
+
+} // namespace rpc
+} // namespace __llvm_libc
+
+#endif
diff --git a/libc/src/__support/RPC/rpc_client.h b/libc/src/__support/RPC/rpc_client.h
new file mode 100644
index 0000000..509ec2f
--- /dev/null
+++ b/libc/src/__support/RPC/rpc_client.h
@@ -0,0 +1,23 @@
+//===-- Shared memory RPC client instantiation ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_SUPPORT_RPC_RPC_CLIENT_H
+#define LLVM_LIBC_SRC_SUPPORT_RPC_RPC_CLIENT_H
+
+#include "rpc.h"
+
+namespace __llvm_libc {
+namespace rpc {
+
+/// The libc client instance used to communicate with the server.
+extern Client client;
+
+} // namespace rpc
+} // namespace __llvm_libc
+
+#endif
diff --git a/libc/startup/gpu/amdgpu/CMakeLists.txt b/libc/startup/gpu/amdgpu/CMakeLists.txt
index be20237..d1c6fc7 100644
--- a/libc/startup/gpu/amdgpu/CMakeLists.txt
+++ b/libc/startup/gpu/amdgpu/CMakeLists.txt
@@ -2,11 +2,12 @@ add_startup_object(
crt1
SRC
start.cpp
+ DEPENDS
+ libc.src.__support.RPC.rpc_client
COMPILE_OPTIONS
-ffreestanding # To avoid compiler warnings about calling the main function.
-fno-builtin
-nogpulib # Do not include any GPU vendor libraries.
- -nostdinc
-mcpu=${LIBC_GPU_TARGET_ARCHITECTURE}
-emit-llvm # AMDGPU's intermediate object file format is bitcode.
--target=${LIBC_GPU_TARGET_TRIPLE}
diff --git a/libc/startup/gpu/amdgpu/start.cpp b/libc/startup/gpu/amdgpu/start.cpp
index 3be3745..cc30982 100644
--- a/libc/startup/gpu/amdgpu/start.cpp
+++ b/libc/startup/gpu/amdgpu/start.cpp
@@ -6,9 +6,13 @@
//
//===----------------------------------------------------------------------===//
+#include "src/__support/RPC/rpc_client.h"
+
extern "C" int main(int argc, char **argv);
extern "C" [[gnu::visibility("protected"), clang::amdgpu_kernel]] void
-_start(int argc, char **argv, int *ret) {
+_start(int argc, char **argv, int *ret, void *in, void *out, void *buffer) {
+ __llvm_libc::rpc::client.reset(in, out, buffer);
+
__atomic_fetch_or(ret, main(argc, argv), __ATOMIC_RELAXED);
}
diff --git a/libc/utils/gpu/loader/amdgpu/CMakeLists.txt b/libc/utils/gpu/loader/amdgpu/CMakeLists.txt
index 524e808..bef97af 100644
--- a/libc/utils/gpu/loader/amdgpu/CMakeLists.txt
+++ b/libc/utils/gpu/loader/amdgpu/CMakeLists.txt
@@ -1,4 +1,7 @@
add_executable(amdhsa_loader Loader.cpp)
+add_dependencies(amdhsa_loader libc.src.__support.RPC.rpc)
+
+target_include_directories(amdhsa_loader PRIVATE ${LIBC_SOURCE_DIR})
target_link_libraries(amdhsa_loader
PRIVATE
hsa-runtime64::hsa-runtime64
diff --git a/libc/utils/gpu/loader/amdgpu/Loader.cpp b/libc/utils/gpu/loader/amdgpu/Loader.cpp
index 5f444d8..3136dc2 100644
--- a/libc/utils/gpu/loader/amdgpu/Loader.cpp
+++ b/libc/utils/gpu/loader/amdgpu/Loader.cpp
@@ -15,6 +15,8 @@
#include "Loader.h"
+#include "src/__support/RPC/rpc.h"
+
#include <hsa/hsa.h>
#include <hsa/hsa_ext_amd.h>
@@ -31,8 +33,35 @@ struct kernel_args_t {
int argc;
void *argv;
void *ret;
+ void *inbox;
+ void *outbox;
+ void *buffer;
};
+static __llvm_libc::rpc::Server server;
+
+/// Queries the RPC client at least once and performs server-side work if there
+/// are any active requests.
+void handle_server() {
+ while (server.run(
+ [&](__llvm_libc::rpc::Buffer *buffer) {
+ switch (static_cast<__llvm_libc::rpc::Opcode>(buffer->data[0])) {
+ case __llvm_libc::rpc::Opcode::PRINT_TO_STDERR: {
+ fputs(reinterpret_cast<const char *>(&buffer->data[1]), stderr);
+ break;
+ }
+ case __llvm_libc::rpc::Opcode::EXIT: {
+ exit(buffer->data[1]);
+ break;
+ }
+ default:
+ return;
+ };
+ },
+ [](__llvm_libc::rpc::Buffer *buffer) {}))
+ ;
+}
+
/// Print the error code and exit if \p code indicates an error.
static void handle_error(hsa_status_t code) {
if (code == HSA_STATUS_SUCCESS || code == HSA_STATUS_INFO_BREAK)
@@ -278,6 +307,26 @@ int load(int argc, char **argv, void *image, size_t size) {
handle_error(err);
hsa_amd_memory_fill(dev_ret, 0, sizeof(int));
+ // Allocate finegrained memory for the RPC server and client to share.
+ void *server_inbox;
+ void *server_outbox;
+ void *buffer;
+ if (hsa_status_t err = hsa_amd_memory_pool_allocate(
+ finegrained_pool, sizeof(__llvm_libc::cpp::Atomic<int>),
+ /*flags=*/0, &server_inbox))
+ handle_error(err);
+ if (hsa_status_t err = hsa_amd_memory_pool_allocate(
+ finegrained_pool, sizeof(__llvm_libc::cpp::Atomic<int>),
+ /*flags=*/0, &server_outbox))
+ handle_error(err);
+ if (hsa_status_t err = hsa_amd_memory_pool_allocate(
+ finegrained_pool, sizeof(__llvm_libc::rpc::Buffer),
+ /*flags=*/0, &buffer))
+ handle_error(err);
+ hsa_amd_agents_allow_access(1, &dev_agent, nullptr, server_inbox);
+ hsa_amd_agents_allow_access(1, &dev_agent, nullptr, server_outbox);
+ hsa_amd_agents_allow_access(1, &dev_agent, nullptr, buffer);
+
// Initialie all the arguments (explicit and implicit) to zero, then set the
// explicit arguments to the values created above.
std::memset(args, 0, args_size);
@@ -285,6 +334,9 @@ int load(int argc, char **argv, void *image, size_t size) {
kernel_args->argc = argc;
kernel_args->argv = dev_argv;
kernel_args->ret = dev_ret;
+ kernel_args->inbox = server_outbox;
+ kernel_args->outbox = server_inbox;
+ kernel_args->buffer = buffer;
// Obtain a packet from the queue.
uint64_t packet_id = hsa_queue_add_write_index_relaxed(queue, 1);
@@ -316,6 +368,9 @@ int load(int argc, char **argv, void *image, size_t size) {
hsa_signal_create(1, 0, nullptr, &packet->completion_signal))
handle_error(err);
+ // Initialize the RPC server's buffer for host-device communication.
+ server.reset(server_inbox, server_outbox, buffer);
+
// Initialize the packet header and set the doorbell signal to begin execution
// by the HSA runtime.
uint16_t header =
@@ -326,11 +381,12 @@ int load(int argc, char **argv, void *image, size_t size) {
__ATOMIC_RELEASE);
hsa_signal_store_relaxed(queue->doorbell_signal, packet_id);
- // Wait until the kernel has completed execution on the device.
- while (hsa_signal_wait_scacquire(packet->completion_signal,
- HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX,
- HSA_WAIT_STATE_ACTIVE) != 0)
- ;
+ // Wait until the kernel has completed execution on the device. Periodically
+ // check the RPC client for work to be performed on the server.
+ while (hsa_signal_wait_scacquire(
+ packet->completion_signal, HSA_SIGNAL_CONDITION_EQ, 0,
+ /*timeout_hint=*/1024, HSA_WAIT_STATE_ACTIVE) != 0)
+ handle_server();
// Create a memory signal and copy the return value back from the device into
// a new buffer.