forked from OSchip/llvm-project
[libomptarget] Implement atomic inc and fence functions for AMDGCN using clang builtins
This function uses __builtin_amdgcn_atomic_inc32(): uint32_t atomicInc(uint32_t *address, uint32_t max); These functions use __builtin_amdgcn_fence(): __kmpc_impl_threadfence() __kmpc_impl_threadfence_block() __kmpc_impl_threadfence_system() They will take place of current mechanism of directly calling IR functions. Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D83132
This commit is contained in:
parent
0882c9d4fc
commit
38d6640ba5
|
@ -11,11 +11,6 @@
|
|||
|
||||
#include "target_impl.h"
|
||||
|
||||
// inc requires an amdgcn specific intrinsic which is not yet available
|
||||
DEVICE unsigned atomicInc(unsigned *address);
|
||||
DEVICE unsigned atomicInc(unsigned *address, unsigned max);
|
||||
DEVICE int atomicInc(int *address);
|
||||
|
||||
namespace {
|
||||
|
||||
template <typename T> DEVICE T atomicAdd(T *address, T val) {
|
||||
|
@ -38,5 +33,9 @@ template <typename T> DEVICE T atomicCAS(T *address, T compare, T val) {
|
|||
return compare;
|
||||
}
|
||||
|
||||
INLINE uint32_t atomicInc(uint32_t *address, uint32_t max) {
|
||||
return __builtin_amdgcn_atomic_inc32(address, max, __ATOMIC_SEQ_CST, "");
|
||||
}
|
||||
|
||||
} // namespace
|
||||
#endif
|
||||
|
|
|
@ -120,9 +120,17 @@ INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) {
|
|||
__builtin_amdgcn_s_barrier();
|
||||
}
|
||||
|
||||
DEVICE void __kmpc_impl_threadfence(void);
|
||||
DEVICE void __kmpc_impl_threadfence_block(void);
|
||||
DEVICE void __kmpc_impl_threadfence_system(void);
|
||||
INLINE void __kmpc_impl_threadfence() {
|
||||
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "agent");
|
||||
}
|
||||
|
||||
INLINE void __kmpc_impl_threadfence_block() {
|
||||
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
|
||||
}
|
||||
|
||||
INLINE void __kmpc_impl_threadfence_system() {
|
||||
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "");
|
||||
}
|
||||
|
||||
// Calls to the AMDGCN layer (assuming 1D layout)
|
||||
INLINE int GetThreadIdInBlock() { return __builtin_amdgcn_workitem_id_x(); }
|
||||
|
|
Loading…
Reference in New Issue