aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoseph Huber <huberjn@outlook.com>2024-02-13 12:26:45 -0600
committerGitHub <noreply@github.com>2024-02-13 12:26:45 -0600
commit63198e0682058c81cd546cb1851e785cec1387ef (patch)
treedb231e23f83f306de4ed8e46f23ca065335efb76
parent502a88bae799694d0ed90e1839cd7a0aacb6bc9d (diff)
downloadllvm-63198e0682058c81cd546cb1851e785cec1387ef.zip
llvm-63198e0682058c81cd546cb1851e785cec1387ef.tar.gz
llvm-63198e0682058c81cd546cb1851e785cec1387ef.tar.bz2
[libc] Remove remaining GPU architecture dependent instructions (#81612)
Summary: Recent patches have added solutions to the remaining sources of divergence. This patch simply removes the last occures of things like `has_builtin`, `ifdef` or builtins with feature requirements. The one exception here is `nanosleep`, but I made changes in the `__nvvm_reflect` pass to make usage like this actually work at O0. Depends on https://github.com/llvm/llvm-project/pull/81331
-rw-r--r--libc/src/__support/GPU/amdgpu/utils.h9
-rw-r--r--libc/src/__support/GPU/nvptx/utils.h6
-rw-r--r--libc/src/__support/RPC/rpc_util.h5
-rw-r--r--libc/src/time/gpu/nanosleep.cpp5
-rw-r--r--libc/src/time/gpu/time_utils.h21
5 files changed, 14 insertions, 32 deletions
diff --git a/libc/src/__support/GPU/amdgpu/utils.h b/libc/src/__support/GPU/amdgpu/utils.h
index 9432b7b..75f0b57 100644
--- a/libc/src/__support/GPU/amdgpu/utils.h
+++ b/libc/src/__support/GPU/amdgpu/utils.h
@@ -152,14 +152,7 @@ LIBC_INLINE uint64_t processor_clock() { return __builtin_readcyclecounter(); }
/// Returns a fixed-frequency timestamp. The actual frequency is dependent on
/// the card and can only be queried via the driver.
LIBC_INLINE uint64_t fixed_frequency_clock() {
- if constexpr (LIBC_HAS_BUILTIN(__builtin_amdgcn_s_sendmsg_rtnl))
- return __builtin_amdgcn_s_sendmsg_rtnl(0x83);
- else if constexpr (LIBC_HAS_BUILTIN(__builtin_amdgcn_s_memrealtime))
- return __builtin_amdgcn_s_memrealtime();
- else if constexpr (LIBC_HAS_BUILTIN(__builtin_amdgcn_s_memtime))
- return __builtin_amdgcn_s_memtime();
- else
- return 0;
+ return __builtin_readsteadycounter();
}
/// Terminates execution of the associated wavefront.
diff --git a/libc/src/__support/GPU/nvptx/utils.h b/libc/src/__support/GPU/nvptx/utils.h
index 6c4bb5a..22a46e8 100644
--- a/libc/src/__support/GPU/nvptx/utils.h
+++ b/libc/src/__support/GPU/nvptx/utils.h
@@ -135,13 +135,11 @@ LIBC_INLINE uint32_t get_lane_size() { return 32; }
}
/// Returns the current value of the GPU's processor clock.
-LIBC_INLINE uint64_t processor_clock() {
- return __nvvm_read_ptx_sreg_clock64();
-}
+LIBC_INLINE uint64_t processor_clock() { return __builtin_readcyclecounter(); }
/// Returns a global fixed-frequency timer at nanosecond frequency.
LIBC_INLINE uint64_t fixed_frequency_clock() {
- return __nvvm_read_ptx_sreg_globaltimer();
+ return __builtin_readsteadycounter();
}
/// Terminates execution of the calling thread.
diff --git a/libc/src/__support/RPC/rpc_util.h b/libc/src/__support/RPC/rpc_util.h
index ff95692..cc2a11a 100644
--- a/libc/src/__support/RPC/rpc_util.h
+++ b/libc/src/__support/RPC/rpc_util.h
@@ -21,8 +21,9 @@ namespace rpc {
/// Suspend the thread briefly to assist the thread scheduler during busy loops.
LIBC_INLINE void sleep_briefly() {
-#if defined(LIBC_TARGET_ARCH_IS_NVPTX) && __CUDA_ARCH__ >= 700
- __nvvm_nanosleep(64);
+#if defined(LIBC_TARGET_ARCH_IS_NVPTX)
+ if (__nvvm_reflect("__CUDA_ARCH") >= 700)
+ LIBC_INLINE_ASM("nanosleep.u32 64;" ::: "memory");
#elif defined(LIBC_TARGET_ARCH_IS_AMDGPU)
__builtin_amdgcn_s_sleep(2);
#elif defined(LIBC_TARGET_ARCH_IS_X86)
diff --git a/libc/src/time/gpu/nanosleep.cpp b/libc/src/time/gpu/nanosleep.cpp
index 34ff904..dd669ff 100644
--- a/libc/src/time/gpu/nanosleep.cpp
+++ b/libc/src/time/gpu/nanosleep.cpp
@@ -23,14 +23,15 @@ LLVM_LIBC_FUNCTION(int, nanosleep,
uint64_t tick_rate = TICKS_PER_SEC / GPU_CLOCKS_PER_SEC;
uint64_t start = gpu::fixed_frequency_clock();
-#if defined(LIBC_TARGET_ARCH_IS_NVPTX) && __CUDA_ARCH__ >= 700
+#if defined(LIBC_TARGET_ARCH_IS_NVPTX)
uint64_t end = start + (nsecs + tick_rate - 1) / tick_rate;
uint64_t cur = gpu::fixed_frequency_clock();
// The NVPTX architecture supports sleeping and guaruntees the actual time
// slept will be somewhere between zero and twice the requested amount. Here
// we will sleep again if we undershot the time.
while (cur < end) {
- __nvvm_nanosleep(static_cast<uint32_t>(nsecs));
+ if (__nvvm_reflect("__CUDA_ARCH") >= 700)
+ LIBC_INLINE_ASM("nanosleep.u32 %0;" ::"r"(nsecs));
cur = gpu::fixed_frequency_clock();
nsecs -= nsecs > cur - start ? cur - start : 0;
}
diff --git a/libc/src/time/gpu/time_utils.h b/libc/src/time/gpu/time_utils.h
index 531a748..8a9a5f0 100644
--- a/libc/src/time/gpu/time_utils.h
+++ b/libc/src/time/gpu/time_utils.h
@@ -15,24 +15,13 @@ namespace LIBC_NAMESPACE {
#if defined(LIBC_TARGET_ARCH_IS_AMDGPU)
// AMDGPU does not have a single set frequency. Different architectures and
-// cards can have vary values. Here we default to a few known values, but for
-// complete support the frequency needs to be read from the kernel driver.
-#if defined(__GFX10__) || defined(__GFX11__) || defined(__GFX12__) || \
- defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)
-// These architectures use a 100 MHz fixed frequency clock.
-constexpr uint64_t clock_freq = 100000000;
-#elif defined(__GFX9__)
-// These architectures use a 25 MHz fixed frequency clock expect for Vega 10
-// which is actually 27 Mhz. We default to 25 MHz in all cases anyway.
-constexpr uint64_t clock_freq = 25000000;
-#else
-// The frequency for these architecture is unknown. We simply default to zero.
-constexpr uint64_t clock_freq = 0;
-#endif
+// cards can have different values. The actualy frequency needs to be read from
+// the kernel driver and will be between 25 MHz and 100 MHz on most cards. All
+// cards following the GFX9 ISAs use a 100 MHz clock so we will default to that.
+constexpr uint64_t clock_freq = 100000000UL;
// We provide an externally visible symbol such that the runtime can set
-// this to the correct value. If it is not set we try to default to the
-// known values.
+// this to the correct value.
extern "C" [[gnu::visibility("protected")]] uint64_t
[[clang::address_space(4)]] __llvm_libc_clock_freq;
#define GPU_CLOCKS_PER_SEC static_cast<clock_t>(__llvm_libc_clock_freq)