aboutsummaryrefslogtreecommitdiff
path: root/clang/lib
diff options
context:
space:
mode:
authorJohannes Doerfert <johannes@jdoerfert.de>2022-07-11 21:42:16 -0500
committerJohannes Doerfert <johannes@jdoerfert.de>2022-07-21 12:36:54 -0500
commit48d6f5240187573881f96cc9574ea09592f50723 (patch)
treef3a782e9031cf73f6cc625c440ff75a1be682091 /clang/lib
parentd150152615074190d20492512da439cd5820b04a (diff)
downloadllvm-48d6f5240187573881f96cc9574ea09592f50723.zip
llvm-48d6f5240187573881f96cc9574ea09592f50723.tar.gz
llvm-48d6f5240187573881f96cc9574ea09592f50723.tar.bz2
[CUDA][FIX] Make shfl[_sync] for unsigned long long non-recursive
A copy-paste error caused UB in the definition of the unsigned long long versions of the shfl intrinsics. Reported and diagnosed by @trws. Differential Revision: https://reviews.llvm.org/D129536
Diffstat (limited to 'clang/lib')
-rw-r--r--clang/lib/Headers/__clang_cuda_intrinsics.h8
1 files changed, 4 insertions, 4 deletions
diff --git a/clang/lib/Headers/__clang_cuda_intrinsics.h b/clang/lib/Headers/__clang_cuda_intrinsics.h
index cfd5eb8..b87413e 100644
--- a/clang/lib/Headers/__clang_cuda_intrinsics.h
+++ b/clang/lib/Headers/__clang_cuda_intrinsics.h
@@ -71,8 +71,8 @@
} \
inline __device__ unsigned long long __FnName( \
unsigned long long __val, __Type __offset, int __width = warpSize) { \
- return static_cast<unsigned long long>(::__FnName( \
- static_cast<unsigned long long>(__val), __offset, __width)); \
+ return static_cast<unsigned long long>( \
+ ::__FnName(static_cast<long long>(__val), __offset, __width)); \
} \
inline __device__ double __FnName(double __val, __Type __offset, \
int __width = warpSize) { \
@@ -139,8 +139,8 @@ __MAKE_SHUFFLES(__shfl_xor, __nvvm_shfl_bfly_i32, __nvvm_shfl_bfly_f32, 0x1f,
inline __device__ unsigned long long __FnName( \
unsigned int __mask, unsigned long long __val, __Type __offset, \
int __width = warpSize) { \
- return static_cast<unsigned long long>(::__FnName( \
- __mask, static_cast<unsigned long long>(__val), __offset, __width)); \
+ return static_cast<unsigned long long>( \
+ ::__FnName(__mask, static_cast<long long>(__val), __offset, __width)); \
} \
inline __device__ long __FnName(unsigned int __mask, long __val, \
__Type __offset, int __width = warpSize) { \