forked from OSchip/llvm-project
[libomptarget][nfc] Introduce atomic wrapper function
Summary: [libomptarget][nfc] Introduce atomic wrapper function Wraps atomic functions in a template prefixed __kmpc_atomic that dispatches to cuda or hip atomic functions. Intended to be easily extended to dispatch to OpenCL or C++ atomics for a third target. Reviewers: ABataev, jdoerfert, grokos Reviewed By: jdoerfert Subscribers: Anastasia, jvesely, mgrang, dexonsmith, llvm-commits, mgorny, jfb, openmp-commits Tags: #openmp, #llvm Differential Revision: https://reviews.llvm.org/D71404
This commit is contained in:
parent
3db1cf7a1e
commit
2caeaf2f45
|
@ -76,6 +76,7 @@ set(h_files
|
|||
${devicertl_base_directory}/common/omptarget.h
|
||||
${devicertl_base_directory}/common/omptargeti.h
|
||||
${devicertl_base_directory}/common/state-queue.h
|
||||
${devicertl_base_directory}/common/target_atomic.h
|
||||
${devicertl_base_directory}/common/state-queuei.h
|
||||
${devicertl_base_directory}/common/support.h)
|
||||
|
||||
|
|
|
@ -11,6 +11,8 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "common/target_atomic.h"
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Task Descriptor
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -207,7 +209,7 @@ INLINE void omptarget_nvptx_SimpleMemoryManager::Release() {
|
|||
ASSERT0(LT_FUSSY, usedMemIdx < OMP_STATE_COUNT,
|
||||
"MemIdx is too big or uninitialized.");
|
||||
MemDataTy &MD = MemData[usedSlotIdx];
|
||||
atomicExch((unsigned *)&MD.keys[usedMemIdx], 0);
|
||||
__kmpc_atomic_exchange((unsigned *)&MD.keys[usedMemIdx], 0u);
|
||||
}
|
||||
|
||||
INLINE const void *omptarget_nvptx_SimpleMemoryManager::Acquire(const void *buf,
|
||||
|
@ -217,7 +219,7 @@ INLINE const void *omptarget_nvptx_SimpleMemoryManager::Acquire(const void *buf,
|
|||
const unsigned sm = usedSlotIdx;
|
||||
MemDataTy &MD = MemData[sm];
|
||||
unsigned i = hash(GetBlockIdInKernel());
|
||||
while (atomicCAS((unsigned *)&MD.keys[i], 0, 1) != 0) {
|
||||
while (__kmpc_atomic_cas((unsigned *)&MD.keys[i], 0u, 1u) != 0) {
|
||||
i = hash(i + 1);
|
||||
}
|
||||
usedSlotIdx = sm;
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "common/omptarget.h"
|
||||
#include "common/target_atomic.h"
|
||||
#include "target_impl.h"
|
||||
|
||||
EXTERN double omp_get_wtick(void) {
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
|
||||
#include "common/omptarget.h"
|
||||
#include "target_impl.h"
|
||||
#include "common/target_atomic.h"
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -397,9 +398,9 @@ public:
|
|||
unsigned int rank = __kmpc_impl_popc(active & lane_mask_lt);
|
||||
uint64_t warp_res;
|
||||
if (rank == 0) {
|
||||
warp_res = atomicAdd(
|
||||
warp_res = __kmpc_atomic_add(
|
||||
(unsigned long long *)&omptarget_nvptx_threadPrivateContext->Cnt(),
|
||||
change);
|
||||
(unsigned long long)change);
|
||||
}
|
||||
warp_res = Shuffle(active, warp_res, leader);
|
||||
return warp_res + rank;
|
||||
|
@ -792,8 +793,8 @@ EXTERN void __kmpc_reduce_conditional_lastprivate(kmp_Ident *loc, int32_t gtid,
|
|||
// Atomic max of iterations.
|
||||
uint64_t *varArray = (uint64_t *)array;
|
||||
uint64_t elem = varArray[i];
|
||||
(void)atomicMax((unsigned long long int *)Buffer,
|
||||
(unsigned long long int)elem);
|
||||
(void)__kmpc_atomic_max((unsigned long long int *)Buffer,
|
||||
(unsigned long long int)elem);
|
||||
|
||||
// Barrier.
|
||||
syncWorkersInGenericMode(NumThreads);
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "common/omptarget.h"
|
||||
#include "common/target_atomic.h"
|
||||
#include "target_impl.h"
|
||||
|
||||
EXTERN
|
||||
|
@ -242,7 +243,7 @@ static int32_t nvptx_teams_reduce_nowait(int32_t global_tid, int32_t num_vars,
|
|||
// atomicInc increments 'timestamp' and has a range [0, NumTeams-1].
|
||||
// It resets 'timestamp' back to 0 once the last team increments
|
||||
// this counter.
|
||||
unsigned val = atomicInc(timestamp, NumTeams - 1);
|
||||
unsigned val = __kmpc_atomic_inc(timestamp, NumTeams - 1);
|
||||
IsLastTeam = val == NumTeams - 1;
|
||||
}
|
||||
|
||||
|
@ -377,7 +378,7 @@ EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_simple(kmp_Ident *loc,
|
|||
if (checkSPMDMode(loc) && GetThreadIdInBlock() != 0)
|
||||
return 0;
|
||||
// The master thread of the team actually does the reduction.
|
||||
while (atomicCAS((uint32_t *)crit, 0, 1))
|
||||
while (__kmpc_atomic_cas((uint32_t *)crit, 0u, 1u))
|
||||
;
|
||||
return 1;
|
||||
}
|
||||
|
@ -386,7 +387,7 @@ EXTERN void
|
|||
__kmpc_nvptx_teams_end_reduce_nowait_simple(kmp_Ident *loc, int32_t global_tid,
|
||||
kmp_CriticalName *crit) {
|
||||
__kmpc_impl_threadfence_system();
|
||||
(void)atomicExch((uint32_t *)crit, 0);
|
||||
(void)__kmpc_atomic_exchange((uint32_t *)crit, 0u);
|
||||
}
|
||||
|
||||
INLINE static bool isMaster(kmp_Ident *loc, uint32_t ThreadId) {
|
||||
|
@ -431,7 +432,7 @@ EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
|
|||
bool IsMaster = isMaster(loc, ThreadId);
|
||||
while (IsMaster) {
|
||||
// Atomic read
|
||||
Bound = atomicAdd((uint32_t *)&IterCnt, 0);
|
||||
Bound = __kmpc_atomic_add((uint32_t *)&IterCnt, 0u);
|
||||
if (TeamId < Bound + num_of_records)
|
||||
break;
|
||||
}
|
||||
|
@ -447,7 +448,7 @@ EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
|
|||
// Increment team counter.
|
||||
// This counter is incremented by all teams in the current
|
||||
// BUFFER_SIZE chunk.
|
||||
ChunkTeamCount = atomicInc((uint32_t *)&Cnt, num_of_records - 1);
|
||||
ChunkTeamCount = __kmpc_atomic_inc((uint32_t *)&Cnt, num_of_records - 1u);
|
||||
}
|
||||
// Synchronize
|
||||
if (checkSPMDMode(loc))
|
||||
|
@ -522,7 +523,7 @@ EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
|
|||
if (IsMaster && ChunkTeamCount == num_of_records - 1) {
|
||||
// Allow SIZE number of teams to proceed writing their
|
||||
// intermediate results to the global buffer.
|
||||
atomicAdd((uint32_t *)&IterCnt, num_of_records);
|
||||
__kmpc_atomic_add((uint32_t *)&IterCnt, uint32_t(num_of_records));
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
//===------- state-queue.cu - NVPTX OpenMP GPU State Queue ------- CUDA -*-===//
|
||||
//===------- state-queuei.h - OpenMP GPU State Queue ------------- CUDA -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
|
@ -17,15 +17,16 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "state-queue.h"
|
||||
#include "common/target_atomic.h"
|
||||
|
||||
template <typename ElementType, uint32_t SIZE>
|
||||
INLINE uint32_t omptarget_nvptx_Queue<ElementType, SIZE>::ENQUEUE_TICKET() {
|
||||
return atomicAdd((unsigned int *)&tail, 1);
|
||||
return __kmpc_atomic_add((unsigned int *)&tail, 1u);
|
||||
}
|
||||
|
||||
template <typename ElementType, uint32_t SIZE>
|
||||
INLINE uint32_t omptarget_nvptx_Queue<ElementType, SIZE>::DEQUEUE_TICKET() {
|
||||
return atomicAdd((unsigned int *)&head, 1);
|
||||
return __kmpc_atomic_add((unsigned int *)&head, 1u);
|
||||
}
|
||||
|
||||
template <typename ElementType, uint32_t SIZE>
|
||||
|
@ -37,28 +38,28 @@ omptarget_nvptx_Queue<ElementType, SIZE>::ID(uint32_t ticket) {
|
|||
template <typename ElementType, uint32_t SIZE>
|
||||
INLINE bool omptarget_nvptx_Queue<ElementType, SIZE>::IsServing(uint32_t slot,
|
||||
uint32_t id) {
|
||||
return atomicAdd((unsigned int *)&ids[slot], 0) == id;
|
||||
return __kmpc_atomic_add((unsigned int *)&ids[slot], 0u) == id;
|
||||
}
|
||||
|
||||
template <typename ElementType, uint32_t SIZE>
|
||||
INLINE void
|
||||
omptarget_nvptx_Queue<ElementType, SIZE>::PushElement(uint32_t slot,
|
||||
ElementType *element) {
|
||||
atomicExch((unsigned long long *)&elementQueue[slot],
|
||||
(unsigned long long)element);
|
||||
__kmpc_atomic_exchange((unsigned long long *)&elementQueue[slot],
|
||||
(unsigned long long)element);
|
||||
}
|
||||
|
||||
template <typename ElementType, uint32_t SIZE>
|
||||
INLINE ElementType *
|
||||
omptarget_nvptx_Queue<ElementType, SIZE>::PopElement(uint32_t slot) {
|
||||
return (ElementType *)atomicAdd((unsigned long long *)&elementQueue[slot],
|
||||
(unsigned long long)0);
|
||||
return (ElementType *)__kmpc_atomic_add(
|
||||
(unsigned long long *)&elementQueue[slot], (unsigned long long)0);
|
||||
}
|
||||
|
||||
template <typename ElementType, uint32_t SIZE>
|
||||
INLINE void omptarget_nvptx_Queue<ElementType, SIZE>::DoneServing(uint32_t slot,
|
||||
uint32_t id) {
|
||||
atomicExch((unsigned int *)&ids[slot], (id + 1) % MAX_ID);
|
||||
__kmpc_atomic_exchange((unsigned int *)&ids[slot], (id + 1) % MAX_ID);
|
||||
}
|
||||
|
||||
template <typename ElementType, uint32_t SIZE>
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
//===---- target_atomic.h - OpenMP GPU target atomic functions ---- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Declarations of atomic functions provided by each target
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef OMPTARGET_TARGET_ATOMIC_H
|
||||
#define OMPTARGET_TARGET_ATOMIC_H
|
||||
|
||||
#include "target_impl.h"
|
||||
|
||||
template <typename T> INLINE T __kmpc_atomic_add(T *address, T val) {
|
||||
return atomicAdd(address, val);
|
||||
}
|
||||
|
||||
template <typename T> INLINE T __kmpc_atomic_inc(T *address, T val) {
|
||||
return atomicInc(address, val);
|
||||
}
|
||||
|
||||
template <typename T> INLINE T __kmpc_atomic_max(T *address, T val) {
|
||||
return atomicMax(address, val);
|
||||
}
|
||||
|
||||
template <typename T> INLINE T __kmpc_atomic_exchange(T *address, T val) {
|
||||
return atomicExch(address, val);
|
||||
}
|
||||
|
||||
template <typename T> INLINE T __kmpc_atomic_cas(T *address, T compare, T val) {
|
||||
return atomicCAS(address, compare, val);
|
||||
}
|
||||
|
||||
#endif
|
|
@ -12,10 +12,11 @@
|
|||
|
||||
#include "target_impl.h"
|
||||
#include "common/debug.h"
|
||||
#include "common/target_atomic.h"
|
||||
|
||||
#define __OMP_SPIN 1000
|
||||
#define UNSET 0
|
||||
#define SET 1
|
||||
#define UNSET 0u
|
||||
#define SET 1u
|
||||
|
||||
EXTERN void __kmpc_impl_init_lock(omp_lock_t *lock) {
|
||||
omp_unset_lock(lock);
|
||||
|
@ -30,7 +31,7 @@ EXTERN void __kmpc_impl_set_lock(omp_lock_t *lock) {
|
|||
// (old == compare ? val : old)
|
||||
|
||||
// TODO: not sure spinning is a good idea here..
|
||||
while (atomicCAS(lock, UNSET, SET) != UNSET) {
|
||||
while (__kmpc_atomic_cas(lock, UNSET, SET) != UNSET) {
|
||||
clock_t start = clock();
|
||||
clock_t now;
|
||||
for (;;) {
|
||||
|
@ -44,7 +45,7 @@ EXTERN void __kmpc_impl_set_lock(omp_lock_t *lock) {
|
|||
}
|
||||
|
||||
EXTERN void __kmpc_impl_unset_lock(omp_lock_t *lock) {
|
||||
(void)atomicExch(lock, UNSET);
|
||||
(void)__kmpc_atomic_exchange(lock, UNSET);
|
||||
}
|
||||
|
||||
EXTERN int __kmpc_impl_test_lock(omp_lock_t *lock) {
|
||||
|
|
Loading…
Reference in New Issue