[libc] Scan the ports more fairly in the RPC server (#66680)

Summary: Currently, we use the RPC server to respond to different ports which each contain a request from some client thread wishing to do work on the server. This scan starts at zero and continues until its checked all ports at which point it resets. If we find an active port, we service it and then restart the search. This is bad for two reasons. First, it means that we will always bias the lower ports. If a thread grabs a high port it will be stuck for a very long time until all the other work is done. Second, it means that the `handle_server` function can technically run indefinitely as long as the client is always pushing new work. Because the OpenMP implementation uses the user thread to service the kernel, this means that it could be stalled with another asyncrhonous device's kernels. This patch addresses this by making the server restart at the next port over. This means we will always do a full scan of the ports before quitting.
author: Joseph Huber <35342157+jhuber6@users.noreply.github.com> 2023-09-26 16:09:48 -0500
committer: GitHub <noreply@github.com> 2023-09-26 16:09:48 -0500
commit: 1a5d3b6cda2c56a39bbe2a529db4d3ac3d5ffa0f (patch)
tree: 416123f63f14326fda52fd17e951de3389538a0c /libc/utils
parent: 0f88be77ea59360ae43f4e5683e23f7edce6aceb (diff)
download: llvm-1a5d3b6cda2c56a39bbe2a529db4d3ac3d5ffa0f.zip
llvm-1a5d3b6cda2c56a39bbe2a529db4d3ac3d5ffa0f.tar.gz
llvm-1a5d3b6cda2c56a39bbe2a529db4d3ac3d5ffa0f.tar.bz2
1 files changed, 12 insertions, 6 deletions
diff --git a/libc/utils/gpu/server/rpc_server.cpp b/libc/utils/gpu/server/rpc_server.cpp
index 6395a80..1c1c9f1 100644
--- a/libc/utils/gpu/server/rpc_server.cpp
+++ b/libc/utils/gpu/server/rpc_server.cpp
@@ -36,11 +36,12 @@ struct Server {
 
   rpc_status_t handle_server(
       const std::unordered_map<rpc_opcode_t, rpc_opcode_callback_ty> &callbacks,
-      const std::unordered_map<rpc_opcode_t, void *> &callback_data) {
+      const std::unordered_map<rpc_opcode_t, void *> &callback_data,
+      uint32_t &index) {
     rpc_status_t ret = RPC_STATUS_SUCCESS;
     std::visit(
         [&](auto &server) {
-          ret = handle_server(*server, callbacks, callback_data);
+          ret = handle_server(*server, callbacks, callback_data, index);
         },
         server);
     return ret;
@@ -51,8 +52,9 @@ private:
   rpc_status_t handle_server(
       rpc::Server<lane_size> &server,
       const std::unordered_map<rpc_opcode_t, rpc_opcode_callback_ty> &callbacks,
-      const std::unordered_map<rpc_opcode_t, void *> &callback_data) {
-    auto port = server.try_open();
+      const std::unordered_map<rpc_opcode_t, void *> &callback_data,
+      uint32_t &index) {
+    auto port = server.try_open(index);
     if (!port)
       return RPC_STATUS_SUCCESS;
 
@@ -203,6 +205,9 @@ private:
       (handler->second)(port_ref, data);
     }
     }
+
+    // Increment the index so we start the scan after this port.
+    index = port->get_index() + 1;
     port->close();
     return RPC_STATUS_CONTINUE;
   }
@@ -333,10 +338,11 @@ rpc_status_t rpc_handle_server(uint32_t device_id) {
   if (!state->devices[device_id])
     return RPC_STATUS_ERROR;
 
+  uint32_t index = 0;
   for (;;) {
     auto &device = *state->devices[device_id];
-    rpc_status_t status =
-        device.server.handle_server(device.callbacks, device.callback_data);
+    rpc_status_t status = device.server.handle_server(
+        device.callbacks, device.callback_data, index);
     if (status != RPC_STATUS_CONTINUE)
       return status;
   }
author	Joseph Huber <35342157+jhuber6@users.noreply.github.com>	2023-09-26 16:09:48 -0500
committer	GitHub <noreply@github.com>	2023-09-26 16:09:48 -0500
commit	1a5d3b6cda2c56a39bbe2a529db4d3ac3d5ffa0f (patch)
tree	416123f63f14326fda52fd17e951de3389538a0c /libc/utils
parent	0f88be77ea59360ae43f4e5683e23f7edce6aceb (diff)
download	llvm-1a5d3b6cda2c56a39bbe2a529db4d3ac3d5ffa0f.zip llvm-1a5d3b6cda2c56a39bbe2a529db4d3ac3d5ffa0f.tar.gz llvm-1a5d3b6cda2c56a39bbe2a529db4d3ac3d5ffa0f.tar.bz2