[libomptarget][nfc] Introduce atomic wrapper function

Summary:
[libomptarget][nfc] Introduce atomic wrapper function

Wraps atomic functions in a template prefixed __kmpc_atomic that
dispatches to cuda or hip atomic functions. Intended to be easily extended
to dispatch to OpenCL or C++ atomics for a third target.

Reviewers: ABataev, jdoerfert, grokos

Reviewed By: jdoerfert

Subscribers: Anastasia, jvesely, mgrang, dexonsmith, llvm-commits, mgorny, jfb, openmp-commits

Tags: #openmp, #llvm

Differential Revision: https://reviews.llvm.org/D71404
This commit is contained in:
Jon Chesterfield 2019-12-18 20:06:16 +00:00
parent 3db1cf7a1e
commit 2caeaf2f45
8 changed files with 71 additions and 25 deletions

View File

@ -76,6 +76,7 @@ set(h_files
${devicertl_base_directory}/common/omptarget.h
${devicertl_base_directory}/common/omptargeti.h
${devicertl_base_directory}/common/state-queue.h
${devicertl_base_directory}/common/target_atomic.h
${devicertl_base_directory}/common/state-queuei.h
${devicertl_base_directory}/common/support.h)

View File

@ -11,6 +11,8 @@
//
//===----------------------------------------------------------------------===//
#include "common/target_atomic.h"
////////////////////////////////////////////////////////////////////////////////
// Task Descriptor
////////////////////////////////////////////////////////////////////////////////
@ -207,7 +209,7 @@ INLINE void omptarget_nvptx_SimpleMemoryManager::Release() {
ASSERT0(LT_FUSSY, usedMemIdx < OMP_STATE_COUNT,
"MemIdx is too big or uninitialized.");
MemDataTy &MD = MemData[usedSlotIdx];
atomicExch((unsigned *)&MD.keys[usedMemIdx], 0);
__kmpc_atomic_exchange((unsigned *)&MD.keys[usedMemIdx], 0u);
}
INLINE const void *omptarget_nvptx_SimpleMemoryManager::Acquire(const void *buf,
@ -217,7 +219,7 @@ INLINE const void *omptarget_nvptx_SimpleMemoryManager::Acquire(const void *buf,
const unsigned sm = usedSlotIdx;
MemDataTy &MD = MemData[sm];
unsigned i = hash(GetBlockIdInKernel());
while (atomicCAS((unsigned *)&MD.keys[i], 0, 1) != 0) {
while (__kmpc_atomic_cas((unsigned *)&MD.keys[i], 0u, 1u) != 0) {
i = hash(i + 1);
}
usedSlotIdx = sm;

View File

@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "common/omptarget.h"
#include "common/target_atomic.h"
#include "target_impl.h"
EXTERN double omp_get_wtick(void) {

View File

@ -14,6 +14,7 @@
#include "common/omptarget.h"
#include "target_impl.h"
#include "common/target_atomic.h"
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
@ -397,9 +398,9 @@ public:
unsigned int rank = __kmpc_impl_popc(active & lane_mask_lt);
uint64_t warp_res;
if (rank == 0) {
warp_res = atomicAdd(
warp_res = __kmpc_atomic_add(
(unsigned long long *)&omptarget_nvptx_threadPrivateContext->Cnt(),
change);
(unsigned long long)change);
}
warp_res = Shuffle(active, warp_res, leader);
return warp_res + rank;
@ -792,8 +793,8 @@ EXTERN void __kmpc_reduce_conditional_lastprivate(kmp_Ident *loc, int32_t gtid,
// Atomic max of iterations.
uint64_t *varArray = (uint64_t *)array;
uint64_t elem = varArray[i];
(void)atomicMax((unsigned long long int *)Buffer,
(unsigned long long int)elem);
(void)__kmpc_atomic_max((unsigned long long int *)Buffer,
(unsigned long long int)elem);
// Barrier.
syncWorkersInGenericMode(NumThreads);

View File

@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "common/omptarget.h"
#include "common/target_atomic.h"
#include "target_impl.h"
EXTERN
@ -242,7 +243,7 @@ static int32_t nvptx_teams_reduce_nowait(int32_t global_tid, int32_t num_vars,
// atomicInc increments 'timestamp' and has a range [0, NumTeams-1].
// It resets 'timestamp' back to 0 once the last team increments
// this counter.
unsigned val = atomicInc(timestamp, NumTeams - 1);
unsigned val = __kmpc_atomic_inc(timestamp, NumTeams - 1);
IsLastTeam = val == NumTeams - 1;
}
@ -377,7 +378,7 @@ EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_simple(kmp_Ident *loc,
if (checkSPMDMode(loc) && GetThreadIdInBlock() != 0)
return 0;
// The master thread of the team actually does the reduction.
while (atomicCAS((uint32_t *)crit, 0, 1))
while (__kmpc_atomic_cas((uint32_t *)crit, 0u, 1u))
;
return 1;
}
@ -386,7 +387,7 @@ EXTERN void
__kmpc_nvptx_teams_end_reduce_nowait_simple(kmp_Ident *loc, int32_t global_tid,
kmp_CriticalName *crit) {
__kmpc_impl_threadfence_system();
(void)atomicExch((uint32_t *)crit, 0);
(void)__kmpc_atomic_exchange((uint32_t *)crit, 0u);
}
INLINE static bool isMaster(kmp_Ident *loc, uint32_t ThreadId) {
@ -431,7 +432,7 @@ EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
bool IsMaster = isMaster(loc, ThreadId);
while (IsMaster) {
// Atomic read
Bound = atomicAdd((uint32_t *)&IterCnt, 0);
Bound = __kmpc_atomic_add((uint32_t *)&IterCnt, 0u);
if (TeamId < Bound + num_of_records)
break;
}
@ -447,7 +448,7 @@ EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
// Increment team counter.
// This counter is incremented by all teams in the current
// BUFFER_SIZE chunk.
ChunkTeamCount = atomicInc((uint32_t *)&Cnt, num_of_records - 1);
ChunkTeamCount = __kmpc_atomic_inc((uint32_t *)&Cnt, num_of_records - 1u);
}
// Synchronize
if (checkSPMDMode(loc))
@ -522,7 +523,7 @@ EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
if (IsMaster && ChunkTeamCount == num_of_records - 1) {
// Allow SIZE number of teams to proceed writing their
// intermediate results to the global buffer.
atomicAdd((uint32_t *)&IterCnt, num_of_records);
__kmpc_atomic_add((uint32_t *)&IterCnt, uint32_t(num_of_records));
}
return 0;

View File

@ -1,4 +1,4 @@
//===------- state-queue.cu - NVPTX OpenMP GPU State Queue ------- CUDA -*-===//
//===------- state-queuei.h - OpenMP GPU State Queue ------------- CUDA -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@ -17,15 +17,16 @@
//===----------------------------------------------------------------------===//
#include "state-queue.h"
#include "common/target_atomic.h"
template <typename ElementType, uint32_t SIZE>
INLINE uint32_t omptarget_nvptx_Queue<ElementType, SIZE>::ENQUEUE_TICKET() {
return atomicAdd((unsigned int *)&tail, 1);
return __kmpc_atomic_add((unsigned int *)&tail, 1u);
}
template <typename ElementType, uint32_t SIZE>
INLINE uint32_t omptarget_nvptx_Queue<ElementType, SIZE>::DEQUEUE_TICKET() {
return atomicAdd((unsigned int *)&head, 1);
return __kmpc_atomic_add((unsigned int *)&head, 1u);
}
template <typename ElementType, uint32_t SIZE>
@ -37,28 +38,28 @@ omptarget_nvptx_Queue<ElementType, SIZE>::ID(uint32_t ticket) {
template <typename ElementType, uint32_t SIZE>
INLINE bool omptarget_nvptx_Queue<ElementType, SIZE>::IsServing(uint32_t slot,
uint32_t id) {
return atomicAdd((unsigned int *)&ids[slot], 0) == id;
return __kmpc_atomic_add((unsigned int *)&ids[slot], 0u) == id;
}
template <typename ElementType, uint32_t SIZE>
INLINE void
omptarget_nvptx_Queue<ElementType, SIZE>::PushElement(uint32_t slot,
ElementType *element) {
atomicExch((unsigned long long *)&elementQueue[slot],
(unsigned long long)element);
__kmpc_atomic_exchange((unsigned long long *)&elementQueue[slot],
(unsigned long long)element);
}
template <typename ElementType, uint32_t SIZE>
INLINE ElementType *
omptarget_nvptx_Queue<ElementType, SIZE>::PopElement(uint32_t slot) {
return (ElementType *)atomicAdd((unsigned long long *)&elementQueue[slot],
(unsigned long long)0);
return (ElementType *)__kmpc_atomic_add(
(unsigned long long *)&elementQueue[slot], (unsigned long long)0);
}
template <typename ElementType, uint32_t SIZE>
INLINE void omptarget_nvptx_Queue<ElementType, SIZE>::DoneServing(uint32_t slot,
uint32_t id) {
atomicExch((unsigned int *)&ids[slot], (id + 1) % MAX_ID);
__kmpc_atomic_exchange((unsigned int *)&ids[slot], (id + 1) % MAX_ID);
}
template <typename ElementType, uint32_t SIZE>

View File

@ -0,0 +1,38 @@
//===---- target_atomic.h - OpenMP GPU target atomic functions ---- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Declarations of atomic functions provided by each target
//
//===----------------------------------------------------------------------===//
#ifndef OMPTARGET_TARGET_ATOMIC_H
#define OMPTARGET_TARGET_ATOMIC_H
#include "target_impl.h"
template <typename T> INLINE T __kmpc_atomic_add(T *address, T val) {
return atomicAdd(address, val);
}
template <typename T> INLINE T __kmpc_atomic_inc(T *address, T val) {
return atomicInc(address, val);
}
template <typename T> INLINE T __kmpc_atomic_max(T *address, T val) {
return atomicMax(address, val);
}
template <typename T> INLINE T __kmpc_atomic_exchange(T *address, T val) {
return atomicExch(address, val);
}
template <typename T> INLINE T __kmpc_atomic_cas(T *address, T compare, T val) {
return atomicCAS(address, compare, val);
}
#endif

View File

@ -12,10 +12,11 @@
#include "target_impl.h"
#include "common/debug.h"
#include "common/target_atomic.h"
#define __OMP_SPIN 1000
#define UNSET 0
#define SET 1
#define UNSET 0u
#define SET 1u
EXTERN void __kmpc_impl_init_lock(omp_lock_t *lock) {
omp_unset_lock(lock);
@ -30,7 +31,7 @@ EXTERN void __kmpc_impl_set_lock(omp_lock_t *lock) {
// (old == compare ? val : old)
// TODO: not sure spinning is a good idea here..
while (atomicCAS(lock, UNSET, SET) != UNSET) {
while (__kmpc_atomic_cas(lock, UNSET, SET) != UNSET) {
clock_t start = clock();
clock_t now;
for (;;) {
@ -44,7 +45,7 @@ EXTERN void __kmpc_impl_set_lock(omp_lock_t *lock) {
}
EXTERN void __kmpc_impl_unset_lock(omp_lock_t *lock) {
(void)atomicExch(lock, UNSET);
(void)__kmpc_atomic_exchange(lock, UNSET);
}
EXTERN int __kmpc_impl_test_lock(omp_lock_t *lock) {