R600: Set the noduplicate attribute on barrier() intrinsics

This will prevent LLVM optimization passes from creating illegal uses
of the barrier() intrinsic (e.g. calling barrier() from a conditional
that is not executed by all threads).

llvm-svn: 193753
This commit is contained in:
Tom Stellard 2013-10-31 15:50:48 +00:00
parent 74e1d0a0a0
commit d2e83929a9
3 changed files with 32 additions and 21 deletions

View File

@ -8,4 +8,3 @@ workitem/get_global_size.ll
synchronization/barrier.cl
synchronization/barrier_impl.ll
shared/vload.cl
shared/vstore.cl

View File

@ -1,15 +1,10 @@
#include <clc/clc.h>
void barrier_local(void);
void barrier_global(void);
void barrier(cl_mem_fence_flags flags) {
if (flags & CLK_LOCAL_MEM_FENCE) {
barrier_local();
}
if (flags & CLK_GLOBAL_MEM_FENCE) {
barrier_global();
}
_CLC_DEF int __clc_clk_local_mem_fence() {
return CLK_LOCAL_MEM_FENCE;
}
_CLC_DEF int __clc_clk_global_mem_fence() {
return CLK_GLOBAL_MEM_FENCE;
}

View File

@ -1,12 +1,29 @@
declare void @llvm.AMDGPU.barrier.local() nounwind
declare void @llvm.AMDGPU.barrier.global() nounwind
declare i32 @__clc_clk_local_mem_fence() nounwind alwaysinline
declare i32 @__clc_clk_global_mem_fence() nounwind alwaysinline
declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
declare void @llvm.AMDGPU.barrier.global() nounwind noduplicate
define void @barrier_local() nounwind alwaysinline {
call void @llvm.AMDGPU.barrier.local()
ret void
}
define void @barrier_global() nounwind alwaysinline {
call void @llvm.AMDGPU.barrier.global()
define void @barrier(i32 %flags) nounwind noduplicate alwaysinline {
barrier_local_test:
%CLK_LOCAL_MEM_FENCE = call i32 @__clc_clk_local_mem_fence()
%0 = and i32 %flags, %CLK_LOCAL_MEM_FENCE
%1 = icmp ne i32 %0, 0
br i1 %1, label %barrier_local, label %barrier_global_test
barrier_local:
call void @llvm.AMDGPU.barrier.local() noduplicate
br label %barrier_global_test
barrier_global_test:
%CLK_GLOBAL_MEM_FENCE = call i32 @__clc_clk_global_mem_fence()
%2 = and i32 %flags, %CLK_GLOBAL_MEM_FENCE
%3 = icmp ne i32 %2, 0
br i1 %3, label %barrier_global, label %done
barrier_global:
call void @llvm.AMDGPU.barrier.global() noduplicate
br label %done
done:
ret void
}