forked from OSchip/llvm-project
amdgcn: Implement {read_,write_,}mem_fence builtin
v2: add more detailed comment about waitcnt instruction Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu> Reviewed-by: Aaron Watry <awatry@gmail.com> Tested-by: Aaron Watry <awatry@gmail.com> llvm-svn: 311021
This commit is contained in:
parent
fec506daaa
commit
1977092dc3
|
@ -1,4 +1,6 @@
|
|||
math/ldexp.cl
|
||||
mem_fence/fence.cl
|
||||
mem_fence/waitcnt.ll
|
||||
synchronization/barrier_impl.ll
|
||||
workitem/get_global_offset.cl
|
||||
workitem/get_group_id.cl
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
#include <clc/clc.h>
|
||||
|
||||
void __clc_amdgcn_s_waitcnt(unsigned flags);
|
||||
|
||||
// s_waitcnt takes 16bit argument with a combined number of maximum allowed
|
||||
// pending operations:
|
||||
// [12:8] LGKM -- LDS, GDS, Konstant (SMRD), Messages
|
||||
// [7] -- undefined
|
||||
// [6:4] -- exports, GDS, and mem write
|
||||
// [3:0] -- vector memory operations
|
||||
|
||||
// Newer clang supports __builtin_amdgcn_s_waitcnt
|
||||
#if __clang_major__ >= 5
|
||||
# define __waitcnt(x) __builtin_amdgcn_s_waitcnt(x)
|
||||
#else
|
||||
# define __waitcnt(x) __clc_amdgcn_s_waitcnt(x)
|
||||
#endif
|
||||
|
||||
_CLC_DEF void mem_fence(cl_mem_fence_flags flags)
|
||||
{
|
||||
if (flags & CLK_GLOBAL_MEM_FENCE) {
|
||||
// scalar loads are counted with LGKM but we don't know whether
|
||||
// the compiler turned any loads to scalar
|
||||
__waitcnt(0);
|
||||
} else if (flags & CLK_LOCAL_MEM_FENCE)
|
||||
__waitcnt(0xff); // LGKM is [12:8]
|
||||
}
|
||||
#undef __waitcnt
|
||||
|
||||
// We don't have separate mechanism for read and write fences
|
||||
_CLC_DEF void read_mem_fence(cl_mem_fence_flags flags)
|
||||
{
|
||||
mem_fence(flags);
|
||||
}
|
||||
|
||||
_CLC_DEF void write_mem_fence(cl_mem_fence_flags flags)
|
||||
{
|
||||
mem_fence(flags);
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
declare void @llvm.amdgcn.s.waitcnt(i32) #0
|
||||
|
||||
; Export waitcnt intrinsic for clang < 5
|
||||
define void @__clc_amdgcn_s_waitcnt(i32 %flags) #1 {
|
||||
entry:
|
||||
tail call void @llvm.amdgcn.s.waitcnt(i32 %flags)
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind alwaysinline }
|
|
@ -179,6 +179,9 @@
|
|||
#include <clc/synchronization/cl_mem_fence_flags.h>
|
||||
#include <clc/synchronization/barrier.h>
|
||||
|
||||
/* 6.11.9 Explicit Memory Fence Functions */
|
||||
#include <clc/explicit_fence/explicit_memory_fence.h>
|
||||
|
||||
/* 6.11.10 Async Copy and Prefetch Functions */
|
||||
#include <clc/async/async_work_group_copy.h>
|
||||
#include <clc/async/async_work_group_strided_copy.h>
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
_CLC_DECL void mem_fence(cl_mem_fence_flags flags);
|
||||
_CLC_DECL void read_mem_fence(cl_mem_fence_flags flags);
|
||||
_CLC_DECL void write_mem_fence(cl_mem_fence_flags flags);
|
Loading…
Reference in New Issue