aboutsummaryrefslogtreecommitdiff
path: root/libclc/r600
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2013-10-31 15:50:48 +0000
committerTom Stellard <thomas.stellard@amd.com>2013-10-31 15:50:48 +0000
commitd2e83929a96630c8d790843865e6671d1addbae2 (patch)
tree2648e776f75de96c11754e5692a1107584287f00 /libclc/r600
parent74e1d0a0a0510ffd965a0db8bafa4a549c2e32bd (diff)
downloadllvm-d2e83929a96630c8d790843865e6671d1addbae2.zip
llvm-d2e83929a96630c8d790843865e6671d1addbae2.tar.gz
llvm-d2e83929a96630c8d790843865e6671d1addbae2.tar.bz2
R600: Set the noduplicate attribute on barrier() intrinsics
This will prevent LLVM optimization passes from creating illegal uses of the barrier() intrinsic (e.g. calling barrier() from a conditional that is not executed by all threads). llvm-svn: 193753
Diffstat (limited to 'libclc/r600')
-rw-r--r--libclc/r600/lib/SOURCES1
-rw-r--r--libclc/r600/lib/synchronization/barrier.cl15
-rw-r--r--libclc/r600/lib/synchronization/barrier_impl.ll33
3 files changed, 30 insertions, 19 deletions
diff --git a/libclc/r600/lib/SOURCES b/libclc/r600/lib/SOURCES
index aac6d8f..d9fc897 100644
--- a/libclc/r600/lib/SOURCES
+++ b/libclc/r600/lib/SOURCES
@@ -8,4 +8,3 @@ workitem/get_global_size.ll
synchronization/barrier.cl
synchronization/barrier_impl.ll
shared/vload.cl
-shared/vstore.cl \ No newline at end of file
diff --git a/libclc/r600/lib/synchronization/barrier.cl b/libclc/r600/lib/synchronization/barrier.cl
index ac0b4b3..6f2900b 100644
--- a/libclc/r600/lib/synchronization/barrier.cl
+++ b/libclc/r600/lib/synchronization/barrier.cl
@@ -1,15 +1,10 @@
#include <clc/clc.h>
-void barrier_local(void);
-void barrier_global(void);
-
-void barrier(cl_mem_fence_flags flags) {
- if (flags & CLK_LOCAL_MEM_FENCE) {
- barrier_local();
- }
+_CLC_DEF int __clc_clk_local_mem_fence() {
+ return CLK_LOCAL_MEM_FENCE;
+}
- if (flags & CLK_GLOBAL_MEM_FENCE) {
- barrier_global();
- }
+_CLC_DEF int __clc_clk_global_mem_fence() {
+ return CLK_GLOBAL_MEM_FENCE;
}
diff --git a/libclc/r600/lib/synchronization/barrier_impl.ll b/libclc/r600/lib/synchronization/barrier_impl.ll
index 99ac018..3d8ee66 100644
--- a/libclc/r600/lib/synchronization/barrier_impl.ll
+++ b/libclc/r600/lib/synchronization/barrier_impl.ll
@@ -1,12 +1,29 @@
-declare void @llvm.AMDGPU.barrier.local() nounwind
-declare void @llvm.AMDGPU.barrier.global() nounwind
+declare i32 @__clc_clk_local_mem_fence() nounwind alwaysinline
+declare i32 @__clc_clk_global_mem_fence() nounwind alwaysinline
+declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
+declare void @llvm.AMDGPU.barrier.global() nounwind noduplicate
-define void @barrier_local() nounwind alwaysinline {
- call void @llvm.AMDGPU.barrier.local()
- ret void
-}
+define void @barrier(i32 %flags) nounwind noduplicate alwaysinline {
+barrier_local_test:
+ %CLK_LOCAL_MEM_FENCE = call i32 @__clc_clk_local_mem_fence()
+ %0 = and i32 %flags, %CLK_LOCAL_MEM_FENCE
+ %1 = icmp ne i32 %0, 0
+ br i1 %1, label %barrier_local, label %barrier_global_test
+
+barrier_local:
+ call void @llvm.AMDGPU.barrier.local() noduplicate
+ br label %barrier_global_test
+
+barrier_global_test:
+ %CLK_GLOBAL_MEM_FENCE = call i32 @__clc_clk_global_mem_fence()
+ %2 = and i32 %flags, %CLK_GLOBAL_MEM_FENCE
+ %3 = icmp ne i32 %2, 0
+ br i1 %3, label %barrier_global, label %done
+
+barrier_global:
+ call void @llvm.AMDGPU.barrier.global() noduplicate
+ br label %done
-define void @barrier_global() nounwind alwaysinline {
- call void @llvm.AMDGPU.barrier.global()
+done:
ret void
}