amdgcn: rewrite barrier() using fence and clang __builtin_amdgcn_s_barrier

Specs require using fences when barrier() is invoked:
"The barrier function will either flush any variables stored in local memory
or queue a memory fence to ensure correct ordering of memory operations to local memory."
and
"The barrier function will queue a memory fence to ensure correct ordering
of memory operations to global memory."

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>
Reviewed-by: Aaron Watry <awatry@gmail.com>
Tested-by: Aaron Watry <awatry@gmail.com>
llvm-svn: 311022
This commit is contained in:
Jan Vesely 2017-08-16 17:09:00 +00:00
parent 1977092dc3
commit 999b1d9426
3 changed files with 8 additions and 33 deletions

View File

@ -1,7 +1,7 @@
math/ldexp.cl
mem_fence/fence.cl
mem_fence/waitcnt.ll
synchronization/barrier_impl.ll
synchronization/barrier.cl
workitem/get_global_offset.cl
workitem/get_group_id.cl
workitem/get_global_size.ll

View File

@ -0,0 +1,7 @@
#include <clc/clc.h>
_CLC_DEF void barrier(cl_mem_fence_flags flags)
{
mem_fence(flags);
__builtin_amdgcn_s_barrier();
}

View File

@ -1,32 +0,0 @@
declare i32 @__clc_clk_local_mem_fence() #1
declare i32 @__clc_clk_global_mem_fence() #1
declare void @llvm.amdgcn.s.barrier() #0
define void @barrier(i32 %flags) #2 {
barrier_local_test:
%CLK_LOCAL_MEM_FENCE = call i32 @__clc_clk_local_mem_fence()
%0 = and i32 %flags, %CLK_LOCAL_MEM_FENCE
%1 = icmp ne i32 %0, 0
br i1 %1, label %barrier_local, label %barrier_global_test
barrier_local:
call void @llvm.amdgcn.s.barrier()
br label %barrier_global_test
barrier_global_test:
%CLK_GLOBAL_MEM_FENCE = call i32 @__clc_clk_global_mem_fence()
%2 = and i32 %flags, %CLK_GLOBAL_MEM_FENCE
%3 = icmp ne i32 %2, 0
br i1 %3, label %barrier_global, label %done
barrier_global:
call void @llvm.amdgcn.s.barrier()
br label %done
done:
ret void
}
attributes #0 = { nounwind convergent }
attributes #1 = { nounwind alwaysinline }
attributes #2 = { nounwind convergent alwaysinline }