diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2013-10-31 15:50:48 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2013-10-31 15:50:48 +0000 |
commit | d2e83929a96630c8d790843865e6671d1addbae2 (patch) | |
tree | 2648e776f75de96c11754e5692a1107584287f00 /libclc/r600 | |
parent | 74e1d0a0a0510ffd965a0db8bafa4a549c2e32bd (diff) | |
download | llvm-d2e83929a96630c8d790843865e6671d1addbae2.zip llvm-d2e83929a96630c8d790843865e6671d1addbae2.tar.gz llvm-d2e83929a96630c8d790843865e6671d1addbae2.tar.bz2 |
R600: Set the noduplicate attribute on barrier() intrinsics
This will prevent LLVM optimization passes from creating illegal uses
of the barrier() intrinsic (e.g. calling barrier() from a conditional
that is not executed by all threads).
llvm-svn: 193753
Diffstat (limited to 'libclc/r600')
-rw-r--r-- | libclc/r600/lib/SOURCES | 1 | ||||
-rw-r--r-- | libclc/r600/lib/synchronization/barrier.cl | 15 | ||||
-rw-r--r-- | libclc/r600/lib/synchronization/barrier_impl.ll | 33 |
3 files changed, 30 insertions, 19 deletions
diff --git a/libclc/r600/lib/SOURCES b/libclc/r600/lib/SOURCES index aac6d8f..d9fc897 100644 --- a/libclc/r600/lib/SOURCES +++ b/libclc/r600/lib/SOURCES @@ -8,4 +8,3 @@ workitem/get_global_size.ll synchronization/barrier.cl synchronization/barrier_impl.ll shared/vload.cl -shared/vstore.cl
\ No newline at end of file diff --git a/libclc/r600/lib/synchronization/barrier.cl b/libclc/r600/lib/synchronization/barrier.cl index ac0b4b3..6f2900b 100644 --- a/libclc/r600/lib/synchronization/barrier.cl +++ b/libclc/r600/lib/synchronization/barrier.cl @@ -1,15 +1,10 @@ #include <clc/clc.h> -void barrier_local(void); -void barrier_global(void); - -void barrier(cl_mem_fence_flags flags) { - if (flags & CLK_LOCAL_MEM_FENCE) { - barrier_local(); - } +_CLC_DEF int __clc_clk_local_mem_fence() { + return CLK_LOCAL_MEM_FENCE; +} - if (flags & CLK_GLOBAL_MEM_FENCE) { - barrier_global(); - } +_CLC_DEF int __clc_clk_global_mem_fence() { + return CLK_GLOBAL_MEM_FENCE; } diff --git a/libclc/r600/lib/synchronization/barrier_impl.ll b/libclc/r600/lib/synchronization/barrier_impl.ll index 99ac018..3d8ee66 100644 --- a/libclc/r600/lib/synchronization/barrier_impl.ll +++ b/libclc/r600/lib/synchronization/barrier_impl.ll @@ -1,12 +1,29 @@ -declare void @llvm.AMDGPU.barrier.local() nounwind -declare void @llvm.AMDGPU.barrier.global() nounwind +declare i32 @__clc_clk_local_mem_fence() nounwind alwaysinline +declare i32 @__clc_clk_global_mem_fence() nounwind alwaysinline +declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate +declare void @llvm.AMDGPU.barrier.global() nounwind noduplicate -define void @barrier_local() nounwind alwaysinline { - call void @llvm.AMDGPU.barrier.local() - ret void -} +define void @barrier(i32 %flags) nounwind noduplicate alwaysinline { +barrier_local_test: + %CLK_LOCAL_MEM_FENCE = call i32 @__clc_clk_local_mem_fence() + %0 = and i32 %flags, %CLK_LOCAL_MEM_FENCE + %1 = icmp ne i32 %0, 0 + br i1 %1, label %barrier_local, label %barrier_global_test + +barrier_local: + call void @llvm.AMDGPU.barrier.local() noduplicate + br label %barrier_global_test + +barrier_global_test: + %CLK_GLOBAL_MEM_FENCE = call i32 @__clc_clk_global_mem_fence() + %2 = and i32 %flags, %CLK_GLOBAL_MEM_FENCE + %3 = icmp ne i32 %2, 0 + br i1 %3, label %barrier_global, label %done + +barrier_global: + call void @llvm.AMDGPU.barrier.global() noduplicate + br label %done -define void @barrier_global() nounwind alwaysinline { - call void @llvm.AMDGPU.barrier.global() +done: ret void } |