aboutsummaryrefslogtreecommitdiff
path: root/openmp
diff options
context:
space:
mode:
authorShilei Tian <i@tianshilei.me>2024-05-26 00:57:50 -0400
committerShilei Tian <i@tianshilei.me>2024-05-26 00:57:50 -0400
commit7b4865582299294455bc816358fd88a9c6e5e0be (patch)
treeccf01a80a61d58bc1fbd4a01ec39e65017c26979 /openmp
parentac17fbc07682c40a08a429cd061a2c57662570a4 (diff)
downloadllvm-7b4865582299294455bc816358fd88a9c6e5e0be.zip
llvm-7b4865582299294455bc816358fd88a9c6e5e0be.tar.gz
llvm-7b4865582299294455bc816358fd88a9c6e5e0be.tar.bz2
Reapply "[OpenMP][OMPX] Add shfl_down_sync (#93311)"
This reverts commit 9b31cc71d66064dfaf2afabf4a835211321bb4a0.
Diffstat (limited to 'openmp')
-rw-r--r--openmp/runtime/src/include/ompx.h.var68
1 files changed, 60 insertions, 8 deletions
diff --git a/openmp/runtime/src/include/ompx.h.var b/openmp/runtime/src/include/ompx.h.var
index 1985188..623f0b9 100644
--- a/openmp/runtime/src/include/ompx.h.var
+++ b/openmp/runtime/src/include/ompx.h.var
@@ -9,6 +9,12 @@
#ifndef __OMPX_H
#define __OMPX_H
+#ifdef __AMDGCN_WAVEFRONT_SIZE
+#define __WARP_SIZE __AMDGCN_WAVEFRONT_SIZE
+#else
+#define __WARP_SIZE 32
+#endif
+
typedef unsigned long uint64_t;
#ifdef __cplusplus
@@ -75,11 +81,11 @@ _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(grid_dim, 1)
static inline RETTY ompx_##NAME(ARGS) { BODY; }
_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block, int Ordering,
- _Pragma("omp barrier"));
+ _Pragma("omp barrier"))
_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block_acq_rel, void,
- ompx_sync_block(ompx_acq_rel));
+ ompx_sync_block(ompx_acq_rel))
_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block_divergent, int Ordering,
- ompx_sync_block(Ordering));
+ ompx_sync_block(Ordering))
#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C
///}
@@ -87,6 +93,22 @@ static inline uint64_t ompx_ballot_sync(uint64_t mask, int pred) {
__builtin_trap();
}
+/// ompx_shfl_down_sync_{i,f,l,d}
+///{
+#define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(TYPE, TY) \
+ static inline TYPE ompx_shfl_down_sync_##TY(uint64_t mask, TYPE var, \
+ unsigned delta, int width) { \
+ __builtin_trap(); \
+ }
+
+_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(int, i)
+_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(float, f)
+_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(long, l)
+_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(double, d)
+
+#undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL
+///}
+
#pragma omp end declare variant
/// ompx_{sync_block}_{,divergent}
@@ -94,9 +116,9 @@ static inline uint64_t ompx_ballot_sync(uint64_t mask, int pred) {
#define _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(RETTY, NAME, ARGS) \
RETTY ompx_##NAME(ARGS);
-_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block, int Ordering);
-_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_acq_rel, void);
-_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_divergent, int Ordering);
+_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block, int Ordering)
+_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_acq_rel, void)
+_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_divergent, int Ordering)
#undef _TGT_KERNEL_LANGUAGE_DECL_SYNC_C
///}
@@ -117,6 +139,20 @@ _TGT_KERNEL_LANGUAGE_DECL_GRID_C(grid_dim)
uint64_t ompx_ballot_sync(uint64_t mask, int pred);
+/// ompx_shfl_down_sync_{i,f,l,d}
+///{
+#define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(TYPE, TY) \
+ TYPE ompx_shfl_down_sync_##TY(uint64_t mask, TYPE var, unsigned delta, \
+ int width);
+
+_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(int, i)
+_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(float, f)
+_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(long, l)
+_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(double, d)
+
+#undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC
+///}
+
#ifdef __cplusplus
}
#endif
@@ -162,9 +198,9 @@ _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(grid_dim)
}
_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(void, sync_block, int Ordering = acc_rel,
- Ordering);
+ Ordering)
_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(void, sync_block_divergent,
- int Ordering = acc_rel, Ordering);
+ int Ordering = acc_rel, Ordering)
#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX
///}
@@ -172,6 +208,22 @@ static inline uint64_t ballot_sync(uint64_t mask, int pred) {
return ompx_ballot_sync(mask, pred);
}
+/// shfl_down_sync
+///{
+#define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(TYPE, TY) \
+ static inline TYPE shfl_down_sync(uint64_t mask, TYPE var, unsigned delta, \
+ int width = __WARP_SIZE) { \
+ return ompx_shfl_down_sync_##TY(mask, var, delta, width); \
+ }
+
+_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(int, i)
+_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(float, f)
+_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(long, l)
+_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(double, d)
+
+#undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC
+///}
+
} // namespace ompx
#endif