[libomptarget] Implement atomic inc and fence functions for AMDGCN using clang builtins

This function uses __builtin_amdgcn_atomic_inc32():
  uint32_t atomicInc(uint32_t *address, uint32_t max);

These functions use __builtin_amdgcn_fence():
__kmpc_impl_threadfence()
__kmpc_impl_threadfence_block()
__kmpc_impl_threadfence_system()

They will take place of current mechanism of directly calling IR functions.

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D83132
This commit is contained in:
Saiyedul Islam 2020-07-07 06:15:26 +00:00
parent 0882c9d4fc
commit 38d6640ba5
2 changed files with 15 additions and 8 deletions

View File

@ -11,11 +11,6 @@
#include "target_impl.h"
// inc requires an amdgcn specific intrinsic which is not yet available
DEVICE unsigned atomicInc(unsigned *address);
DEVICE unsigned atomicInc(unsigned *address, unsigned max);
DEVICE int atomicInc(int *address);
namespace {
template <typename T> DEVICE T atomicAdd(T *address, T val) {
@ -38,5 +33,9 @@ template <typename T> DEVICE T atomicCAS(T *address, T compare, T val) {
return compare;
}
INLINE uint32_t atomicInc(uint32_t *address, uint32_t max) {
return __builtin_amdgcn_atomic_inc32(address, max, __ATOMIC_SEQ_CST, "");
}
} // namespace
#endif

View File

@ -120,9 +120,17 @@ INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) {
__builtin_amdgcn_s_barrier();
}
DEVICE void __kmpc_impl_threadfence(void);
DEVICE void __kmpc_impl_threadfence_block(void);
DEVICE void __kmpc_impl_threadfence_system(void);
INLINE void __kmpc_impl_threadfence() {
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "agent");
}
INLINE void __kmpc_impl_threadfence_block() {
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
}
INLINE void __kmpc_impl_threadfence_system() {
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "");
}
// Calls to the AMDGCN layer (assuming 1D layout)
INLINE int GetThreadIdInBlock() { return __builtin_amdgcn_workitem_id_x(); }