[libc] Round up time for GPU nanosleep implementation (#81630)

Summary: The GPU `nanosleep` tests would occasionally fail. This was due to the fact that we used integer division to determine how many ticks we had to sleep for. This would then truncate, leaving us with a value just slightly below the requested value. This would then occasionally leave us with a return value of `-1`. This patch just changes the code to round up by 1 so we always sleep for at least the requested value.
author: Joseph Huber <huberjn@outlook.com> 2024-02-13 10:55:28 -0600
committer: GitHub <noreply@github.com> 2024-02-13 10:55:28 -0600
commit: 1dacfd119071af50eaef21a97a46076ee6ff20fd (patch)
tree: bacdd41a842972bf87b30b050c1bf0da8ef52f03
parent: d79c3c50c45f2bd0acc0269dbedde9ddeed2d50e (diff)
download: llvm-1dacfd119071af50eaef21a97a46076ee6ff20fd.zip
llvm-1dacfd119071af50eaef21a97a46076ee6ff20fd.tar.gz
llvm-1dacfd119071af50eaef21a97a46076ee6ff20fd.tar.bz2
1 files changed, 8 insertions, 7 deletions
diff --git a/libc/src/time/gpu/nanosleep.cpp b/libc/src/time/gpu/nanosleep.cpp
index e84fe62..34ff904 100644
--- a/libc/src/time/gpu/nanosleep.cpp
+++ b/libc/src/time/gpu/nanosleep.cpp
@@ -12,18 +12,19 @@
 
 namespace LIBC_NAMESPACE {
 
-constexpr uint64_t TICKS_PER_NS = 1000000000UL;
+constexpr uint64_t TICKS_PER_SEC = 1000000000UL;
 
 LLVM_LIBC_FUNCTION(int, nanosleep,
                    (const struct timespec *req, struct timespec *rem)) {
   if (!GPU_CLOCKS_PER_SEC || !req)
     return -1;
 
-  uint64_t nsecs = req->tv_nsec + req->tv_sec * TICKS_PER_NS;
+  uint64_t nsecs = req->tv_nsec + req->tv_sec * TICKS_PER_SEC;
+  uint64_t tick_rate = TICKS_PER_SEC / GPU_CLOCKS_PER_SEC;
 
   uint64_t start = gpu::fixed_frequency_clock();
 #if defined(LIBC_TARGET_ARCH_IS_NVPTX) && __CUDA_ARCH__ >= 700
-  uint64_t end = start + nsecs / (TICKS_PER_NS / GPU_CLOCKS_PER_SEC);
+  uint64_t end = start + (nsecs + tick_rate - 1) / tick_rate;
   uint64_t cur = gpu::fixed_frequency_clock();
   // The NVPTX architecture supports sleeping and guaruntees the actual time
   // slept will be somewhere between zero and twice the requested amount. Here
@@ -34,7 +35,7 @@ LLVM_LIBC_FUNCTION(int, nanosleep,
     nsecs -= nsecs > cur - start ? cur - start : 0;
   }
 #elif defined(LIBC_TARGET_ARCH_IS_AMDGPU)
-  uint64_t end = start + nsecs / (TICKS_PER_NS / GPU_CLOCKS_PER_SEC);
+  uint64_t end = start + (nsecs + tick_rate - 1) / tick_rate;
   uint64_t cur = gpu::fixed_frequency_clock();
   // The AMDGPU architecture does not provide a sleep implementation with a
   // known delay so we simply repeatedly sleep with a large value of ~960 clock
@@ -56,11 +57,11 @@ LLVM_LIBC_FUNCTION(int, nanosleep,
 
   // Check to make sure we slept for at least the desired duration and set the
   // remaining time if not.
-  uint64_t elapsed = (stop - start) * (TICKS_PER_NS / GPU_CLOCKS_PER_SEC);
+  uint64_t elapsed = (stop - start) * tick_rate;
   if (elapsed < nsecs) {
     if (rem) {
-      rem->tv_sec = (nsecs - elapsed) / TICKS_PER_NS;
-      rem->tv_nsec = (nsecs - elapsed) % TICKS_PER_NS;
+      rem->tv_sec = (nsecs - elapsed) / TICKS_PER_SEC;
+      rem->tv_nsec = (nsecs - elapsed) % TICKS_PER_SEC;
     }
     return -1;
   }
author	Joseph Huber <huberjn@outlook.com>	2024-02-13 10:55:28 -0600
committer	GitHub <noreply@github.com>	2024-02-13 10:55:28 -0600
commit	1dacfd119071af50eaef21a97a46076ee6ff20fd (patch)
tree	bacdd41a842972bf87b30b050c1bf0da8ef52f03
parent	d79c3c50c45f2bd0acc0269dbedde9ddeed2d50e (diff)
download	llvm-1dacfd119071af50eaef21a97a46076ee6ff20fd.zip llvm-1dacfd119071af50eaef21a97a46076ee6ff20fd.tar.gz llvm-1dacfd119071af50eaef21a97a46076ee6ff20fd.tar.bz2