aboutsummaryrefslogtreecommitdiff
path: root/libclc
diff options
context:
space:
mode:
authorDaniel Stone <daniels@collabora.com>2020-08-17 13:44:49 -0700
committerTom Stellard <tstellar@redhat.com>2020-08-17 13:55:48 -0700
commit3d21fa56f5f5afbbf16b35b199480af71e1189a3 (patch)
tree8314d6f2ac8ae9695f19bac8a92016b22d83b344 /libclc
parent3a7051d9c28e3dd6da5048d91b74fad830728e93 (diff)
downloadllvm-3d21fa56f5f5afbbf16b35b199480af71e1189a3.zip
llvm-3d21fa56f5f5afbbf16b35b199480af71e1189a3.tar.gz
llvm-3d21fa56f5f5afbbf16b35b199480af71e1189a3.tar.bz2
libclc: Make all built-ins overloadable
The SPIR spec states that all OpenCL built-in functions should be overloadable and mangled, to ensure consistency. Add the overload attribute to functions which were missing them: work dimensions, memory barriers and fences, and events. Reviewed By: tstellar, jenatali Differential Revision: https://reviews.llvm.org/D82078
Diffstat (limited to 'libclc')
-rw-r--r--libclc/amdgcn-amdhsa/lib/workitem/get_global_size.cl11
-rw-r--r--libclc/amdgcn-amdhsa/lib/workitem/get_local_size.cl23
-rw-r--r--libclc/amdgcn-amdhsa/lib/workitem/get_num_groups.cl2
-rw-r--r--libclc/amdgcn/lib/mem_fence/fence.cl25
-rw-r--r--libclc/amdgcn/lib/synchronization/barrier.cl7
-rw-r--r--libclc/amdgcn/lib/workitem/get_global_offset.cl12
-rw-r--r--libclc/amdgcn/lib/workitem/get_global_size.cl19
-rw-r--r--libclc/amdgcn/lib/workitem/get_group_id.cl19
-rw-r--r--libclc/amdgcn/lib/workitem/get_local_id.cl19
-rw-r--r--libclc/amdgcn/lib/workitem/get_local_size.cl19
-rw-r--r--libclc/amdgcn/lib/workitem/get_num_groups.cl19
-rw-r--r--libclc/amdgcn/lib/workitem/get_work_dim.cl8
-rw-r--r--libclc/generic/include/clc/async/wait_group_events.h3
-rw-r--r--libclc/generic/include/clc/explicit_fence/explicit_memory_fence.h6
-rw-r--r--libclc/generic/include/clc/synchronization/barrier.h2
-rw-r--r--libclc/generic/include/clc/workitem/get_global_id.h2
-rw-r--r--libclc/generic/include/clc/workitem/get_global_offset.h2
-rw-r--r--libclc/generic/include/clc/workitem/get_global_size.h2
-rw-r--r--libclc/generic/include/clc/workitem/get_group_id.h2
-rw-r--r--libclc/generic/include/clc/workitem/get_local_id.h2
-rw-r--r--libclc/generic/include/clc/workitem/get_local_size.h2
-rw-r--r--libclc/generic/include/clc/workitem/get_num_groups.h2
-rw-r--r--libclc/generic/include/clc/workitem/get_work_dim.h2
-rw-r--r--libclc/generic/lib/async/wait_group_events.cl3
-rw-r--r--libclc/generic/lib/workitem/get_global_id.cl2
-rw-r--r--libclc/generic/lib/workitem/get_global_size.cl2
-rw-r--r--libclc/ptx-nvidiacl/lib/mem_fence/fence.cl10
-rw-r--r--libclc/ptx-nvidiacl/lib/synchronization/barrier.cl3
-rw-r--r--libclc/ptx-nvidiacl/lib/workitem/get_global_id.cl2
-rw-r--r--libclc/ptx-nvidiacl/lib/workitem/get_group_id.cl2
-rw-r--r--libclc/ptx-nvidiacl/lib/workitem/get_local_id.cl2
-rw-r--r--libclc/ptx-nvidiacl/lib/workitem/get_local_size.cl2
-rw-r--r--libclc/ptx-nvidiacl/lib/workitem/get_num_groups.cl2
-rw-r--r--libclc/r600/lib/synchronization/barrier.cl3
-rw-r--r--libclc/r600/lib/workitem/get_global_offset.cl15
-rw-r--r--libclc/r600/lib/workitem/get_global_size.cl19
-rw-r--r--libclc/r600/lib/workitem/get_group_id.cl19
-rw-r--r--libclc/r600/lib/workitem/get_local_id.cl19
-rw-r--r--libclc/r600/lib/workitem/get_local_size.cl19
-rw-r--r--libclc/r600/lib/workitem/get_num_groups.cl19
-rw-r--r--libclc/r600/lib/workitem/get_work_dim.cl11
41 files changed, 191 insertions, 173 deletions
diff --git a/libclc/amdgcn-amdhsa/lib/workitem/get_global_size.cl b/libclc/amdgcn-amdhsa/lib/workitem/get_global_size.cl
index 2f95f99..62bd2ba2 100644
--- a/libclc/amdgcn-amdhsa/lib/workitem/get_global_size.cl
+++ b/libclc/amdgcn-amdhsa/lib/workitem/get_global_size.cl
@@ -15,10 +15,9 @@
CONST_AS uchar * __clc_amdgcn_dispatch_ptr(void) __asm("llvm.amdgcn.dispatch.ptr");
#endif
-_CLC_DEF size_t get_global_size(uint dim)
-{
- CONST_AS uint * ptr = (CONST_AS uint *) __dispatch_ptr();
- if (dim < 3)
- return ptr[3 + dim];
- return 1;
+_CLC_DEF _CLC_OVERLOAD size_t get_global_size(uint dim) {
+ CONST_AS uint *ptr = (CONST_AS uint *)__dispatch_ptr();
+ if (dim < 3)
+ return ptr[3 + dim];
+ return 1;
}
diff --git a/libclc/amdgcn-amdhsa/lib/workitem/get_local_size.cl b/libclc/amdgcn-amdhsa/lib/workitem/get_local_size.cl
index 9f208d8..9f09fd5 100644
--- a/libclc/amdgcn-amdhsa/lib/workitem/get_local_size.cl
+++ b/libclc/amdgcn-amdhsa/lib/workitem/get_local_size.cl
@@ -15,16 +15,15 @@
CONST_AS char * __clc_amdgcn_dispatch_ptr(void) __asm("llvm.amdgcn.dispatch.ptr");
#endif
-_CLC_DEF size_t get_local_size(uint dim)
-{
- CONST_AS uint * ptr = (CONST_AS uint *) __dispatch_ptr();
- switch (dim) {
- case 0:
- return ptr[1] & 0xffffu;
- case 1:
- return ptr[1] >> 16;
- case 2:
- return ptr[2] & 0xffffu;
- }
- return 1;
+_CLC_DEF _CLC_OVERLOAD size_t get_local_size(uint dim) {
+ CONST_AS uint *ptr = (CONST_AS uint *)__dispatch_ptr();
+ switch (dim) {
+ case 0:
+ return ptr[1] & 0xffffu;
+ case 1:
+ return ptr[1] >> 16;
+ case 2:
+ return ptr[2] & 0xffffu;
+ }
+ return 1;
}
diff --git a/libclc/amdgcn-amdhsa/lib/workitem/get_num_groups.cl b/libclc/amdgcn-amdhsa/lib/workitem/get_num_groups.cl
index 946b526..35dc221 100644
--- a/libclc/amdgcn-amdhsa/lib/workitem/get_num_groups.cl
+++ b/libclc/amdgcn-amdhsa/lib/workitem/get_num_groups.cl
@@ -1,7 +1,7 @@
#include <clc/clc.h>
-_CLC_DEF size_t get_num_groups(uint dim) {
+_CLC_DEF _CLC_OVERLOAD size_t get_num_groups(uint dim) {
size_t global_size = get_global_size(dim);
size_t local_size = get_local_size(dim);
size_t num_groups = global_size / local_size;
diff --git a/libclc/amdgcn/lib/mem_fence/fence.cl b/libclc/amdgcn/lib/mem_fence/fence.cl
index b85baf7..c7a10bb 100644
--- a/libclc/amdgcn/lib/mem_fence/fence.cl
+++ b/libclc/amdgcn/lib/mem_fence/fence.cl
@@ -17,24 +17,21 @@ void __clc_amdgcn_s_waitcnt(unsigned flags);
_CLC_DEF void __clc_amdgcn_s_waitcnt(unsigned) __asm("llvm.amdgcn.s.waitcnt");
#endif
-_CLC_DEF void mem_fence(cl_mem_fence_flags flags)
-{
- if (flags & CLK_GLOBAL_MEM_FENCE) {
- // scalar loads are counted with LGKM but we don't know whether
- // the compiler turned any loads to scalar
- __waitcnt(0);
- } else if (flags & CLK_LOCAL_MEM_FENCE)
- __waitcnt(0xff); // LGKM is [12:8]
+_CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) {
+ if (flags & CLK_GLOBAL_MEM_FENCE) {
+ // scalar loads are counted with LGKM but we don't know whether
+ // the compiler turned any loads to scalar
+ __waitcnt(0);
+ } else if (flags & CLK_LOCAL_MEM_FENCE)
+ __waitcnt(0xff); // LGKM is [12:8]
}
#undef __waitcnt
// We don't have separate mechanism for read and write fences
-_CLC_DEF void read_mem_fence(cl_mem_fence_flags flags)
-{
- mem_fence(flags);
+_CLC_DEF _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags) {
+ mem_fence(flags);
}
-_CLC_DEF void write_mem_fence(cl_mem_fence_flags flags)
-{
- mem_fence(flags);
+_CLC_DEF _CLC_OVERLOAD void write_mem_fence(cl_mem_fence_flags flags) {
+ mem_fence(flags);
}
diff --git a/libclc/amdgcn/lib/synchronization/barrier.cl b/libclc/amdgcn/lib/synchronization/barrier.cl
index e2f3c13..82bbd4b 100644
--- a/libclc/amdgcn/lib/synchronization/barrier.cl
+++ b/libclc/amdgcn/lib/synchronization/barrier.cl
@@ -1,7 +1,6 @@
#include <clc/clc.h>
-_CLC_DEF void barrier(cl_mem_fence_flags flags)
-{
- mem_fence(flags);
- __builtin_amdgcn_s_barrier();
+_CLC_DEF _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags) {
+ mem_fence(flags);
+ __builtin_amdgcn_s_barrier();
}
diff --git a/libclc/amdgcn/lib/workitem/get_global_offset.cl b/libclc/amdgcn/lib/workitem/get_global_offset.cl
index 0a87cd2..73d5694 100644
--- a/libclc/amdgcn/lib/workitem/get_global_offset.cl
+++ b/libclc/amdgcn/lib/workitem/get_global_offset.cl
@@ -8,11 +8,9 @@
#define CONST_AS __attribute__((address_space(2)))
#endif
-_CLC_DEF size_t get_global_offset(uint dim)
-{
- CONST_AS uint * ptr =
- (CONST_AS uint *) __builtin_amdgcn_implicitarg_ptr();
- if (dim < 3)
- return ptr[dim + 1];
- return 0;
+_CLC_DEF _CLC_OVERLOAD size_t get_global_offset(uint dim) {
+ CONST_AS uint *ptr = (CONST_AS uint *)__builtin_amdgcn_implicitarg_ptr();
+ if (dim < 3)
+ return ptr[dim + 1];
+ return 0;
}
diff --git a/libclc/amdgcn/lib/workitem/get_global_size.cl b/libclc/amdgcn/lib/workitem/get_global_size.cl
index c1e3894..2f28ca6 100644
--- a/libclc/amdgcn/lib/workitem/get_global_size.cl
+++ b/libclc/amdgcn/lib/workitem/get_global_size.cl
@@ -4,12 +4,15 @@ uint __clc_amdgcn_get_global_size_x(void) __asm("llvm.r600.read.global.size.x");
uint __clc_amdgcn_get_global_size_y(void) __asm("llvm.r600.read.global.size.y");
uint __clc_amdgcn_get_global_size_z(void) __asm("llvm.r600.read.global.size.z");
-_CLC_DEF size_t get_global_size(uint dim)
-{
- switch (dim) {
- case 0: return __clc_amdgcn_get_global_size_x();
- case 1: return __clc_amdgcn_get_global_size_y();
- case 2: return __clc_amdgcn_get_global_size_z();
- default: return 1;
- }
+_CLC_DEF _CLC_OVERLOAD size_t get_global_size(uint dim) {
+ switch (dim) {
+ case 0:
+ return __clc_amdgcn_get_global_size_x();
+ case 1:
+ return __clc_amdgcn_get_global_size_y();
+ case 2:
+ return __clc_amdgcn_get_global_size_z();
+ default:
+ return 1;
+ }
}
diff --git a/libclc/amdgcn/lib/workitem/get_group_id.cl b/libclc/amdgcn/lib/workitem/get_group_id.cl
index eb57b3e..211c86e 100644
--- a/libclc/amdgcn/lib/workitem/get_group_id.cl
+++ b/libclc/amdgcn/lib/workitem/get_group_id.cl
@@ -1,11 +1,14 @@
#include <clc/clc.h>
-_CLC_DEF size_t get_group_id(uint dim)
-{
- switch(dim) {
- case 0: return __builtin_amdgcn_workgroup_id_x();
- case 1: return __builtin_amdgcn_workgroup_id_y();
- case 2: return __builtin_amdgcn_workgroup_id_z();
- default: return 1;
- }
+_CLC_DEF _CLC_OVERLOAD size_t get_group_id(uint dim) {
+ switch (dim) {
+ case 0:
+ return __builtin_amdgcn_workgroup_id_x();
+ case 1:
+ return __builtin_amdgcn_workgroup_id_y();
+ case 2:
+ return __builtin_amdgcn_workgroup_id_z();
+ default:
+ return 1;
+ }
}
diff --git a/libclc/amdgcn/lib/workitem/get_local_id.cl b/libclc/amdgcn/lib/workitem/get_local_id.cl
index 9f666de..073ecfa 100644
--- a/libclc/amdgcn/lib/workitem/get_local_id.cl
+++ b/libclc/amdgcn/lib/workitem/get_local_id.cl
@@ -1,11 +1,14 @@
#include <clc/clc.h>
-_CLC_DEF size_t get_local_id(uint dim)
-{
- switch(dim) {
- case 0: return __builtin_amdgcn_workitem_id_x();
- case 1: return __builtin_amdgcn_workitem_id_y();
- case 2: return __builtin_amdgcn_workitem_id_z();
- default: return 1;
- }
+_CLC_DEF _CLC_OVERLOAD size_t get_local_id(uint dim) {
+ switch (dim) {
+ case 0:
+ return __builtin_amdgcn_workitem_id_x();
+ case 1:
+ return __builtin_amdgcn_workitem_id_y();
+ case 2:
+ return __builtin_amdgcn_workitem_id_z();
+ default:
+ return 1;
+ }
}
diff --git a/libclc/amdgcn/lib/workitem/get_local_size.cl b/libclc/amdgcn/lib/workitem/get_local_size.cl
index 9b19f6b..c398b7e 100644
--- a/libclc/amdgcn/lib/workitem/get_local_size.cl
+++ b/libclc/amdgcn/lib/workitem/get_local_size.cl
@@ -4,12 +4,15 @@ uint __clc_amdgcn_get_local_size_x(void) __asm("llvm.r600.read.local.size.x");
uint __clc_amdgcn_get_local_size_y(void) __asm("llvm.r600.read.local.size.y");
uint __clc_amdgcn_get_local_size_z(void) __asm("llvm.r600.read.local.size.z");
-_CLC_DEF size_t get_local_size(uint dim)
-{
- switch (dim) {
- case 0: return __clc_amdgcn_get_local_size_x();
- case 1: return __clc_amdgcn_get_local_size_y();
- case 2: return __clc_amdgcn_get_local_size_z();
- default: return 1;
- }
+_CLC_DEF _CLC_OVERLOAD size_t get_local_size(uint dim) {
+ switch (dim) {
+ case 0:
+ return __clc_amdgcn_get_local_size_x();
+ case 1:
+ return __clc_amdgcn_get_local_size_y();
+ case 2:
+ return __clc_amdgcn_get_local_size_z();
+ default:
+ return 1;
+ }
}
diff --git a/libclc/amdgcn/lib/workitem/get_num_groups.cl b/libclc/amdgcn/lib/workitem/get_num_groups.cl
index f921414..020741e 100644
--- a/libclc/amdgcn/lib/workitem/get_num_groups.cl
+++ b/libclc/amdgcn/lib/workitem/get_num_groups.cl
@@ -4,12 +4,15 @@ uint __clc_amdgcn_get_num_groups_x(void) __asm("llvm.r600.read.ngroups.x");
uint __clc_amdgcn_get_num_groups_y(void) __asm("llvm.r600.read.ngroups.y");
uint __clc_amdgcn_get_num_groups_z(void) __asm("llvm.r600.read.ngroups.z");
-_CLC_DEF size_t get_num_groups(uint dim)
-{
- switch (dim) {
- case 0: return __clc_amdgcn_get_num_groups_x();
- case 1: return __clc_amdgcn_get_num_groups_y();
- case 2: return __clc_amdgcn_get_num_groups_z();
- default: return 1;
- }
+_CLC_DEF _CLC_OVERLOAD size_t get_num_groups(uint dim) {
+ switch (dim) {
+ case 0:
+ return __clc_amdgcn_get_num_groups_x();
+ case 1:
+ return __clc_amdgcn_get_num_groups_y();
+ case 2:
+ return __clc_amdgcn_get_num_groups_z();
+ default:
+ return 1;
+ }
}
diff --git a/libclc/amdgcn/lib/workitem/get_work_dim.cl b/libclc/amdgcn/lib/workitem/get_work_dim.cl
index 3add9b6..cb8cf83 100644
--- a/libclc/amdgcn/lib/workitem/get_work_dim.cl
+++ b/libclc/amdgcn/lib/workitem/get_work_dim.cl
@@ -8,9 +8,7 @@
#define CONST_AS __attribute__((address_space(2)))
#endif
-_CLC_DEF uint get_work_dim(void)
-{
- CONST_AS uint * ptr =
- (CONST_AS uint *) __builtin_amdgcn_implicitarg_ptr();
- return ptr[0];
+_CLC_DEF _CLC_OVERLOAD uint get_work_dim(void) {
+ CONST_AS uint *ptr = (CONST_AS uint *)__builtin_amdgcn_implicitarg_ptr();
+ return ptr[0];
}
diff --git a/libclc/generic/include/clc/async/wait_group_events.h b/libclc/generic/include/clc/async/wait_group_events.h
index 799efa0..d707f4c 100644
--- a/libclc/generic/include/clc/async/wait_group_events.h
+++ b/libclc/generic/include/clc/async/wait_group_events.h
@@ -1 +1,2 @@
-void wait_group_events(int num_events, event_t *event_list);
+_CLC_DECL _CLC_OVERLOAD void wait_group_events(int num_events,
+ event_t *event_list);
diff --git a/libclc/generic/include/clc/explicit_fence/explicit_memory_fence.h b/libclc/generic/include/clc/explicit_fence/explicit_memory_fence.h
index 8e046b1..05c6d79 100644
--- a/libclc/generic/include/clc/explicit_fence/explicit_memory_fence.h
+++ b/libclc/generic/include/clc/explicit_fence/explicit_memory_fence.h
@@ -1,3 +1,3 @@
-_CLC_DECL void mem_fence(cl_mem_fence_flags flags);
-_CLC_DECL void read_mem_fence(cl_mem_fence_flags flags);
-_CLC_DECL void write_mem_fence(cl_mem_fence_flags flags);
+_CLC_DECL _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags);
+_CLC_DECL _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags);
+_CLC_DECL _CLC_OVERLOAD void write_mem_fence(cl_mem_fence_flags flags);
diff --git a/libclc/generic/include/clc/synchronization/barrier.h b/libclc/generic/include/clc/synchronization/barrier.h
index 7167a3d..63e3ac5 100644
--- a/libclc/generic/include/clc/synchronization/barrier.h
+++ b/libclc/generic/include/clc/synchronization/barrier.h
@@ -1 +1 @@
-_CLC_DECL void barrier(cl_mem_fence_flags flags);
+_CLC_DECL _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags);
diff --git a/libclc/generic/include/clc/workitem/get_global_id.h b/libclc/generic/include/clc/workitem/get_global_id.h
index 92759f1..3bbace0 100644
--- a/libclc/generic/include/clc/workitem/get_global_id.h
+++ b/libclc/generic/include/clc/workitem/get_global_id.h
@@ -1 +1 @@
-_CLC_DECL size_t get_global_id(uint dim);
+_CLC_DECL _CLC_OVERLOAD size_t get_global_id(uint dim);
diff --git a/libclc/generic/include/clc/workitem/get_global_offset.h b/libclc/generic/include/clc/workitem/get_global_offset.h
index 7f4f603..ad7b441 100644
--- a/libclc/generic/include/clc/workitem/get_global_offset.h
+++ b/libclc/generic/include/clc/workitem/get_global_offset.h
@@ -1 +1 @@
-_CLC_DECL size_t get_global_offset(uint dim);
+_CLC_DECL _CLC_OVERLOAD size_t get_global_offset(uint dim);
diff --git a/libclc/generic/include/clc/workitem/get_global_size.h b/libclc/generic/include/clc/workitem/get_global_size.h
index 2f83705..1b7ccf7 100644
--- a/libclc/generic/include/clc/workitem/get_global_size.h
+++ b/libclc/generic/include/clc/workitem/get_global_size.h
@@ -1 +1 @@
-_CLC_DECL size_t get_global_size(uint dim);
+_CLC_DECL _CLC_OVERLOAD size_t get_global_size(uint dim);
diff --git a/libclc/generic/include/clc/workitem/get_group_id.h b/libclc/generic/include/clc/workitem/get_group_id.h
index 346c82c..b71fbc1 100644
--- a/libclc/generic/include/clc/workitem/get_group_id.h
+++ b/libclc/generic/include/clc/workitem/get_group_id.h
@@ -1 +1 @@
-_CLC_DECL size_t get_group_id(uint dim);
+_CLC_DECL _CLC_OVERLOAD size_t get_group_id(uint dim);
diff --git a/libclc/generic/include/clc/workitem/get_local_id.h b/libclc/generic/include/clc/workitem/get_local_id.h
index 169aeed..60aa1ec 100644
--- a/libclc/generic/include/clc/workitem/get_local_id.h
+++ b/libclc/generic/include/clc/workitem/get_local_id.h
@@ -1 +1 @@
-_CLC_DECL size_t get_local_id(uint dim);
+_CLC_DECL _CLC_OVERLOAD size_t get_local_id(uint dim);
diff --git a/libclc/generic/include/clc/workitem/get_local_size.h b/libclc/generic/include/clc/workitem/get_local_size.h
index 040ec58..808730f 100644
--- a/libclc/generic/include/clc/workitem/get_local_size.h
+++ b/libclc/generic/include/clc/workitem/get_local_size.h
@@ -1 +1 @@
-_CLC_DECL size_t get_local_size(uint dim);
+_CLC_DECL _CLC_OVERLOAD size_t get_local_size(uint dim);
diff --git a/libclc/generic/include/clc/workitem/get_num_groups.h b/libclc/generic/include/clc/workitem/get_num_groups.h
index e555c7e..8657eb7f 100644
--- a/libclc/generic/include/clc/workitem/get_num_groups.h
+++ b/libclc/generic/include/clc/workitem/get_num_groups.h
@@ -1 +1 @@
-_CLC_DECL size_t get_num_groups(uint dim);
+_CLC_DECL _CLC_OVERLOAD size_t get_num_groups(uint dim);
diff --git a/libclc/generic/include/clc/workitem/get_work_dim.h b/libclc/generic/include/clc/workitem/get_work_dim.h
index ae08ba9..8781b2a 100644
--- a/libclc/generic/include/clc/workitem/get_work_dim.h
+++ b/libclc/generic/include/clc/workitem/get_work_dim.h
@@ -1 +1 @@
-_CLC_DECL uint get_work_dim(void);
+_CLC_DECL _CLC_OVERLOAD uint get_work_dim(void);
diff --git a/libclc/generic/lib/async/wait_group_events.cl b/libclc/generic/lib/async/wait_group_events.cl
index 05c9d58..5f4eec3 100644
--- a/libclc/generic/lib/async/wait_group_events.cl
+++ b/libclc/generic/lib/async/wait_group_events.cl
@@ -1,5 +1,6 @@
#include <clc/clc.h>
-_CLC_DEF void wait_group_events(int num_events, event_t *event_list) {
+_CLC_DEF _CLC_OVERLOAD void wait_group_events(int num_events,
+ event_t *event_list) {
barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
}
diff --git a/libclc/generic/lib/workitem/get_global_id.cl b/libclc/generic/lib/workitem/get_global_id.cl
index b6c2ea1..ccd84d9 100644
--- a/libclc/generic/lib/workitem/get_global_id.cl
+++ b/libclc/generic/lib/workitem/get_global_id.cl
@@ -1,5 +1,5 @@
#include <clc/clc.h>
-_CLC_DEF size_t get_global_id(uint dim) {
+_CLC_DEF _CLC_OVERLOAD size_t get_global_id(uint dim) {
return get_group_id(dim) * get_local_size(dim) + get_local_id(dim) + get_global_offset(dim);
}
diff --git a/libclc/generic/lib/workitem/get_global_size.cl b/libclc/generic/lib/workitem/get_global_size.cl
index 5ae649e..9bc2607 100644
--- a/libclc/generic/lib/workitem/get_global_size.cl
+++ b/libclc/generic/lib/workitem/get_global_size.cl
@@ -1,5 +1,5 @@
#include <clc/clc.h>
-_CLC_DEF size_t get_global_size(uint dim) {
+_CLC_DEF _CLC_OVERLOAD size_t get_global_size(uint dim) {
return get_num_groups(dim)*get_local_size(dim);
}
diff --git a/libclc/ptx-nvidiacl/lib/mem_fence/fence.cl b/libclc/ptx-nvidiacl/lib/mem_fence/fence.cl
index 16b0391..de078b5 100644
--- a/libclc/ptx-nvidiacl/lib/mem_fence/fence.cl
+++ b/libclc/ptx-nvidiacl/lib/mem_fence/fence.cl
@@ -1,15 +1,15 @@
#include <clc/clc.h>
-_CLC_DEF void mem_fence(cl_mem_fence_flags flags) {
- if (flags & (CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE))
- __nvvm_membar_cta();
+_CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) {
+ if (flags & (CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE))
+ __nvvm_membar_cta();
}
// We do not have separate mechanism for read and write fences.
-_CLC_DEF void read_mem_fence(cl_mem_fence_flags flags) {
+_CLC_DEF _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags) {
mem_fence(flags);
}
-_CLC_DEF void write_mem_fence(cl_mem_fence_flags flags) {
+_CLC_DEF _CLC_OVERLOAD void write_mem_fence(cl_mem_fence_flags flags) {
mem_fence(flags);
}
diff --git a/libclc/ptx-nvidiacl/lib/synchronization/barrier.cl b/libclc/ptx-nvidiacl/lib/synchronization/barrier.cl
index 930e36a..b3d99d7 100644
--- a/libclc/ptx-nvidiacl/lib/synchronization/barrier.cl
+++ b/libclc/ptx-nvidiacl/lib/synchronization/barrier.cl
@@ -1,6 +1,5 @@
#include <clc/clc.h>
-_CLC_DEF void barrier(cl_mem_fence_flags flags) {
+_CLC_DEF _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags) {
__syncthreads();
}
-
diff --git a/libclc/ptx-nvidiacl/lib/workitem/get_global_id.cl b/libclc/ptx-nvidiacl/lib/workitem/get_global_id.cl
index 19bc195..a7f5f59 100644
--- a/libclc/ptx-nvidiacl/lib/workitem/get_global_id.cl
+++ b/libclc/ptx-nvidiacl/lib/workitem/get_global_id.cl
@@ -1,5 +1,5 @@
#include <clc/clc.h>
-_CLC_DEF size_t get_global_id(uint dim) {
+_CLC_DEF _CLC_OVERLOAD size_t get_global_id(uint dim) {
return get_group_id(dim) * get_local_size(dim) + get_local_id(dim);
}
diff --git a/libclc/ptx-nvidiacl/lib/workitem/get_group_id.cl b/libclc/ptx-nvidiacl/lib/workitem/get_group_id.cl
index dbc4784..bbbf106 100644
--- a/libclc/ptx-nvidiacl/lib/workitem/get_group_id.cl
+++ b/libclc/ptx-nvidiacl/lib/workitem/get_group_id.cl
@@ -1,6 +1,6 @@
#include <clc/clc.h>
-_CLC_DEF size_t get_group_id(uint dim) {
+_CLC_DEF _CLC_OVERLOAD size_t get_group_id(uint dim) {
switch (dim) {
case 0: return __nvvm_read_ptx_sreg_ctaid_x();
case 1: return __nvvm_read_ptx_sreg_ctaid_y();
diff --git a/libclc/ptx-nvidiacl/lib/workitem/get_local_id.cl b/libclc/ptx-nvidiacl/lib/workitem/get_local_id.cl
index f31581a1..a6770f2 100644
--- a/libclc/ptx-nvidiacl/lib/workitem/get_local_id.cl
+++ b/libclc/ptx-nvidiacl/lib/workitem/get_local_id.cl
@@ -1,6 +1,6 @@
#include <clc/clc.h>
-_CLC_DEF size_t get_local_id(uint dim) {
+_CLC_DEF _CLC_OVERLOAD size_t get_local_id(uint dim) {
switch (dim) {
case 0: return __nvvm_read_ptx_sreg_tid_x();
case 1: return __nvvm_read_ptx_sreg_tid_y();
diff --git a/libclc/ptx-nvidiacl/lib/workitem/get_local_size.cl b/libclc/ptx-nvidiacl/lib/workitem/get_local_size.cl
index d00b0d6..5960d5d7 100644
--- a/libclc/ptx-nvidiacl/lib/workitem/get_local_size.cl
+++ b/libclc/ptx-nvidiacl/lib/workitem/get_local_size.cl
@@ -1,6 +1,6 @@
#include <clc/clc.h>
-_CLC_DEF size_t get_local_size(uint dim) {
+_CLC_DEF _CLC_OVERLOAD size_t get_local_size(uint dim) {
switch (dim) {
case 0: return __nvvm_read_ptx_sreg_ntid_x();
case 1: return __nvvm_read_ptx_sreg_ntid_y();
diff --git a/libclc/ptx-nvidiacl/lib/workitem/get_num_groups.cl b/libclc/ptx-nvidiacl/lib/workitem/get_num_groups.cl
index d7abf3f..f0e52f1 100644
--- a/libclc/ptx-nvidiacl/lib/workitem/get_num_groups.cl
+++ b/libclc/ptx-nvidiacl/lib/workitem/get_num_groups.cl
@@ -1,6 +1,6 @@
#include <clc/clc.h>
-_CLC_DEF size_t get_num_groups(uint dim) {
+_CLC_DEF _CLC_OVERLOAD size_t get_num_groups(uint dim) {
switch (dim) {
case 0: return __nvvm_read_ptx_sreg_nctaid_x();
case 1: return __nvvm_read_ptx_sreg_nctaid_y();
diff --git a/libclc/r600/lib/synchronization/barrier.cl b/libclc/r600/lib/synchronization/barrier.cl
index 98200e7..6a28ee3 100644
--- a/libclc/r600/lib/synchronization/barrier.cl
+++ b/libclc/r600/lib/synchronization/barrier.cl
@@ -2,8 +2,7 @@
_CLC_DEF void __clc_r600_barrier(void) __asm("llvm.r600.group.barrier");
-_CLC_DEF void barrier(uint flags)
-{
+_CLC_DEF _CLC_OVERLOAD void barrier(uint flags) {
// We should call mem_fence here, but that is not implemented for r600 yet
__clc_r600_barrier();
}
diff --git a/libclc/r600/lib/workitem/get_global_offset.cl b/libclc/r600/lib/workitem/get_global_offset.cl
index b38ae33..7c2e403 100644
--- a/libclc/r600/lib/workitem/get_global_offset.cl
+++ b/libclc/r600/lib/workitem/get_global_offset.cl
@@ -1,11 +1,10 @@
#include <clc/clc.h>
-_CLC_DEF uint get_global_offset(uint dim)
-{
- __attribute__((address_space(7))) uint * ptr =
- (__attribute__((address_space(7))) uint *)
- __builtin_r600_implicitarg_ptr();
- if (dim < 3)
- return ptr[dim + 1];
- return 0;
+_CLC_DEF _CLC_OVERLOAD uint get_global_offset(uint dim) {
+ __attribute__((address_space(7))) uint *ptr =
+ (__attribute__((address_space(7)))
+ uint *)__builtin_r600_implicitarg_ptr();
+ if (dim < 3)
+ return ptr[dim + 1];
+ return 0;
}
diff --git a/libclc/r600/lib/workitem/get_global_size.cl b/libclc/r600/lib/workitem/get_global_size.cl
index d356929c..6281361 100644
--- a/libclc/r600/lib/workitem/get_global_size.cl
+++ b/libclc/r600/lib/workitem/get_global_size.cl
@@ -4,12 +4,15 @@ uint __clc_r600_get_global_size_x(void) __asm("llvm.r600.read.global.size.x");
uint __clc_r600_get_global_size_y(void) __asm("llvm.r600.read.global.size.y");
uint __clc_r600_get_global_size_z(void) __asm("llvm.r600.read.global.size.z");
-_CLC_DEF size_t get_global_size(uint dim)
-{
- switch (dim) {
- case 0: return __clc_r600_get_global_size_x();
- case 1: return __clc_r600_get_global_size_y();
- case 2: return __clc_r600_get_global_size_z();
- default: return 1;
- }
+_CLC_DEF _CLC_OVERLOAD size_t get_global_size(uint dim) {
+ switch (dim) {
+ case 0:
+ return __clc_r600_get_global_size_x();
+ case 1:
+ return __clc_r600_get_global_size_y();
+ case 2:
+ return __clc_r600_get_global_size_z();
+ default:
+ return 1;
+ }
}
diff --git a/libclc/r600/lib/workitem/get_group_id.cl b/libclc/r600/lib/workitem/get_group_id.cl
index e5efc0a..1fb993a 100644
--- a/libclc/r600/lib/workitem/get_group_id.cl
+++ b/libclc/r600/lib/workitem/get_group_id.cl
@@ -1,11 +1,14 @@
#include <clc/clc.h>
-_CLC_DEF uint get_group_id(uint dim)
-{
- switch(dim) {
- case 0: return __builtin_r600_read_tgid_x();
- case 1: return __builtin_r600_read_tgid_y();
- case 2: return __builtin_r600_read_tgid_z();
- default: return 1;
- }
+_CLC_DEF _CLC_OVERLOAD uint get_group_id(uint dim) {
+ switch (dim) {
+ case 0:
+ return __builtin_r600_read_tgid_x();
+ case 1:
+ return __builtin_r600_read_tgid_y();
+ case 2:
+ return __builtin_r600_read_tgid_z();
+ default:
+ return 1;
+ }
}
diff --git a/libclc/r600/lib/workitem/get_local_id.cl b/libclc/r600/lib/workitem/get_local_id.cl
index a871a5d..80fdc34 100644
--- a/libclc/r600/lib/workitem/get_local_id.cl
+++ b/libclc/r600/lib/workitem/get_local_id.cl
@@ -1,11 +1,14 @@
#include <clc/clc.h>
-_CLC_DEF uint get_local_id(uint dim)
-{
- switch(dim) {
- case 0: return __builtin_r600_read_tidig_x();
- case 1: return __builtin_r600_read_tidig_y();
- case 2: return __builtin_r600_read_tidig_z();
- default: return 1;
- }
+_CLC_DEF _CLC_OVERLOAD uint get_local_id(uint dim) {
+ switch (dim) {
+ case 0:
+ return __builtin_r600_read_tidig_x();
+ case 1:
+ return __builtin_r600_read_tidig_y();
+ case 2:
+ return __builtin_r600_read_tidig_z();
+ default:
+ return 1;
+ }
}
diff --git a/libclc/r600/lib/workitem/get_local_size.cl b/libclc/r600/lib/workitem/get_local_size.cl
index 89e2612..6edab7c 100644
--- a/libclc/r600/lib/workitem/get_local_size.cl
+++ b/libclc/r600/lib/workitem/get_local_size.cl
@@ -4,12 +4,15 @@ uint __clc_r600_get_local_size_x(void) __asm("llvm.r600.read.local.size.x");
uint __clc_r600_get_local_size_y(void) __asm("llvm.r600.read.local.size.y");
uint __clc_r600_get_local_size_z(void) __asm("llvm.r600.read.local.size.z");
-_CLC_DEF size_t get_local_size(uint dim)
-{
- switch (dim) {
- case 0: return __clc_r600_get_local_size_x();
- case 1: return __clc_r600_get_local_size_y();
- case 2: return __clc_r600_get_local_size_z();
- default: return 1;
- }
+_CLC_DEF _CLC_OVERLOAD size_t get_local_size(uint dim) {
+ switch (dim) {
+ case 0:
+ return __clc_r600_get_local_size_x();
+ case 1:
+ return __clc_r600_get_local_size_y();
+ case 2:
+ return __clc_r600_get_local_size_z();
+ default:
+ return 1;
+ }
}
diff --git a/libclc/r600/lib/workitem/get_num_groups.cl b/libclc/r600/lib/workitem/get_num_groups.cl
index dfe6cef..ab4f5f6 100644
--- a/libclc/r600/lib/workitem/get_num_groups.cl
+++ b/libclc/r600/lib/workitem/get_num_groups.cl
@@ -4,12 +4,15 @@ uint __clc_r600_get_num_groups_x(void) __asm("llvm.r600.read.ngroups.x");
uint __clc_r600_get_num_groups_y(void) __asm("llvm.r600.read.ngroups.y");
uint __clc_r600_get_num_groups_z(void) __asm("llvm.r600.read.ngroups.z");
-_CLC_DEF size_t get_num_groups(uint dim)
-{
- switch (dim) {
- case 0: return __clc_r600_get_num_groups_x();
- case 1: return __clc_r600_get_num_groups_y();
- case 2: return __clc_r600_get_num_groups_z();
- default: return 1;
- }
+_CLC_DEF _CLC_OVERLOAD size_t get_num_groups(uint dim) {
+ switch (dim) {
+ case 0:
+ return __clc_r600_get_num_groups_x();
+ case 1:
+ return __clc_r600_get_num_groups_y();
+ case 2:
+ return __clc_r600_get_num_groups_z();
+ default:
+ return 1;
+ }
}
diff --git a/libclc/r600/lib/workitem/get_work_dim.cl b/libclc/r600/lib/workitem/get_work_dim.cl
index fccf716..e18a83b 100644
--- a/libclc/r600/lib/workitem/get_work_dim.cl
+++ b/libclc/r600/lib/workitem/get_work_dim.cl
@@ -1,9 +1,8 @@
#include <clc/clc.h>
-_CLC_DEF uint get_work_dim(void)
-{
- __attribute__((address_space(7))) uint * ptr =
- (__attribute__((address_space(7))) uint *)
- __builtin_r600_implicitarg_ptr();
- return ptr[0];
+_CLC_DEF _CLC_OVERLOAD uint get_work_dim(void) {
+ __attribute__((address_space(7))) uint *ptr =
+ (__attribute__((address_space(7)))
+ uint *)__builtin_r600_implicitarg_ptr();
+ return ptr[0];
}