forked from OSchip/llvm-project
R600: Set the noduplicate attribute on barrier() intrinsics
This will prevent LLVM optimization passes from creating illegal uses of the barrier() intrinsic (e.g. calling barrier() from a conditional that is not executed by all threads). llvm-svn: 193753
This commit is contained in:
parent
74e1d0a0a0
commit
d2e83929a9
|
@ -8,4 +8,3 @@ workitem/get_global_size.ll
|
|||
synchronization/barrier.cl
|
||||
synchronization/barrier_impl.ll
|
||||
shared/vload.cl
|
||||
shared/vstore.cl
|
|
@ -1,15 +1,10 @@
|
|||
|
||||
#include <clc/clc.h>
|
||||
|
||||
void barrier_local(void);
|
||||
void barrier_global(void);
|
||||
|
||||
void barrier(cl_mem_fence_flags flags) {
|
||||
if (flags & CLK_LOCAL_MEM_FENCE) {
|
||||
barrier_local();
|
||||
}
|
||||
|
||||
if (flags & CLK_GLOBAL_MEM_FENCE) {
|
||||
barrier_global();
|
||||
}
|
||||
_CLC_DEF int __clc_clk_local_mem_fence() {
|
||||
return CLK_LOCAL_MEM_FENCE;
|
||||
}
|
||||
|
||||
_CLC_DEF int __clc_clk_global_mem_fence() {
|
||||
return CLK_GLOBAL_MEM_FENCE;
|
||||
}
|
||||
|
|
|
@ -1,12 +1,29 @@
|
|||
declare void @llvm.AMDGPU.barrier.local() nounwind
|
||||
declare void @llvm.AMDGPU.barrier.global() nounwind
|
||||
declare i32 @__clc_clk_local_mem_fence() nounwind alwaysinline
|
||||
declare i32 @__clc_clk_global_mem_fence() nounwind alwaysinline
|
||||
declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
|
||||
declare void @llvm.AMDGPU.barrier.global() nounwind noduplicate
|
||||
|
||||
define void @barrier_local() nounwind alwaysinline {
|
||||
call void @llvm.AMDGPU.barrier.local()
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @barrier_global() nounwind alwaysinline {
|
||||
call void @llvm.AMDGPU.barrier.global()
|
||||
define void @barrier(i32 %flags) nounwind noduplicate alwaysinline {
|
||||
barrier_local_test:
|
||||
%CLK_LOCAL_MEM_FENCE = call i32 @__clc_clk_local_mem_fence()
|
||||
%0 = and i32 %flags, %CLK_LOCAL_MEM_FENCE
|
||||
%1 = icmp ne i32 %0, 0
|
||||
br i1 %1, label %barrier_local, label %barrier_global_test
|
||||
|
||||
barrier_local:
|
||||
call void @llvm.AMDGPU.barrier.local() noduplicate
|
||||
br label %barrier_global_test
|
||||
|
||||
barrier_global_test:
|
||||
%CLK_GLOBAL_MEM_FENCE = call i32 @__clc_clk_global_mem_fence()
|
||||
%2 = and i32 %flags, %CLK_GLOBAL_MEM_FENCE
|
||||
%3 = icmp ne i32 %2, 0
|
||||
br i1 %3, label %barrier_global, label %done
|
||||
|
||||
barrier_global:
|
||||
call void @llvm.AMDGPU.barrier.global() noduplicate
|
||||
br label %done
|
||||
|
||||
done:
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue