forked from OSchip/llvm-project
[libomptarget][nfc] Use cuda variable wrappers from support.h
Summary: [libomptarget][nfc] Use cuda variable wrappers from support.h Reimplementation of D69693, after the revert of D69885 Use the wrappers in support.h for cuda builtin variables at all call sites. Localises use of cuda and removes WARPSIZE==32 assumption in debug.h. Reviewers: ABataev, jdoerfert, grokos Reviewed By: jdoerfert Subscribers: openmp-commits Tags: #openmp Differential Revision: https://reviews.llvm.org/D70186
This commit is contained in:
parent
4787c6e2f3
commit
32dfbd131d
|
@ -13,11 +13,6 @@
|
|||
#include "target_impl.h"
|
||||
#include <stdio.h>
|
||||
|
||||
// Warp ID in the CUDA block
|
||||
INLINE static unsigned getWarpId() { return threadIdx.x / WARPSIZE; }
|
||||
// Lane ID in the CUDA warp.
|
||||
INLINE static unsigned getLaneId() { return threadIdx.x % WARPSIZE; }
|
||||
|
||||
// Return true if this is the first active thread in the warp.
|
||||
INLINE static bool IsWarpMasterActiveThread() {
|
||||
unsigned long long Mask = __kmpc_impl_activemask();
|
||||
|
@ -67,7 +62,7 @@ __kmpc_initialize_data_sharing_environment(__kmpc_data_sharing_slot *rootS,
|
|||
DSPRINT0(DSFLAG_INIT,
|
||||
"Entering __kmpc_initialize_data_sharing_environment\n");
|
||||
|
||||
unsigned WID = getWarpId();
|
||||
unsigned WID = GetWarpId();
|
||||
DSPRINT(DSFLAG_INIT, "Warp ID: %u\n", WID);
|
||||
|
||||
omptarget_nvptx_TeamDescr *teamDescr =
|
||||
|
@ -111,7 +106,7 @@ EXTERN void *__kmpc_data_sharing_environment_begin(
|
|||
DSPRINT(DSFLAG, "Default Data Size %016llx\n",
|
||||
(unsigned long long)SharingDefaultDataSize);
|
||||
|
||||
unsigned WID = getWarpId();
|
||||
unsigned WID = GetWarpId();
|
||||
__kmpc_impl_lanemask_t CurActiveThreads = __kmpc_impl_activemask();
|
||||
|
||||
__kmpc_data_sharing_slot *&SlotP = DataSharingState.SlotPtr[WID];
|
||||
|
@ -231,7 +226,7 @@ EXTERN void __kmpc_data_sharing_environment_end(
|
|||
|
||||
DSPRINT0(DSFLAG, "Entering __kmpc_data_sharing_environment_end\n");
|
||||
|
||||
unsigned WID = getWarpId();
|
||||
unsigned WID = GetWarpId();
|
||||
|
||||
if (IsEntryPoint) {
|
||||
if (IsWarpMasterActiveThread()) {
|
||||
|
@ -359,7 +354,7 @@ EXTERN void __kmpc_data_sharing_init_stack_spmd() {
|
|||
// This function initializes the stack pointer with the pointer to the
|
||||
// statically allocated shared memory slots. The size of a shared memory
|
||||
// slot is pre-determined to be 256 bytes.
|
||||
if (threadIdx.x == 0)
|
||||
if (GetThreadIdInBlock() == 0)
|
||||
data_sharing_init_stack_common();
|
||||
|
||||
__threadfence_block();
|
||||
|
@ -377,7 +372,7 @@ INLINE static void* data_sharing_push_stack_common(size_t PushSize) {
|
|||
PushSize = (PushSize + (Alignment - 1)) / Alignment * Alignment;
|
||||
|
||||
// Frame pointer must be visible to all workers in the same warp.
|
||||
const unsigned WID = getWarpId();
|
||||
const unsigned WID = GetWarpId();
|
||||
void *FrameP = 0;
|
||||
__kmpc_impl_lanemask_t CurActive = __kmpc_impl_activemask();
|
||||
|
||||
|
@ -467,7 +462,7 @@ EXTERN void *__kmpc_data_sharing_push_stack(size_t DataSize,
|
|||
// Compute the start address of the frame of each thread in the warp.
|
||||
uintptr_t FrameStartAddress =
|
||||
(uintptr_t) data_sharing_push_stack_common(PushSize);
|
||||
FrameStartAddress += (uintptr_t) (getLaneId() * DataSize);
|
||||
FrameStartAddress += (uintptr_t) (GetLaneId() * DataSize);
|
||||
return (void *)FrameStartAddress;
|
||||
}
|
||||
|
||||
|
@ -482,7 +477,7 @@ EXTERN void __kmpc_data_sharing_pop_stack(void *FrameStart) {
|
|||
__threadfence_block();
|
||||
|
||||
if (GetThreadIdInBlock() % WARPSIZE == 0) {
|
||||
unsigned WID = getWarpId();
|
||||
unsigned WID = GetWarpId();
|
||||
|
||||
// Current slot
|
||||
__kmpc_data_sharing_slot *&SlotP = DataSharingState.SlotPtr[WID];
|
||||
|
|
|
@ -128,12 +128,12 @@
|
|||
|
||||
#if OMPTARGET_NVPTX_DEBUG || OMPTARGET_NVPTX_TEST || OMPTARGET_NVPTX_WARNING
|
||||
#include <stdio.h>
|
||||
#include "target_impl.h"
|
||||
#include "support.h"
|
||||
|
||||
template <typename... Arguments>
|
||||
NOINLINE static void log(const char *fmt, Arguments... parameters) {
|
||||
printf(fmt, (int)blockIdx.x, (int)threadIdx.x, (int)(threadIdx.x / WARPSIZE),
|
||||
(int)(threadIdx.x & 0x1F), parameters...);
|
||||
printf(fmt, (int)GetBlockIdInKernel(), (int)GetThreadIdInBlock(),
|
||||
(int)GetWarpId(), (int)GetLaneId(), parameters...);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -144,9 +144,8 @@ template <typename... Arguments>
|
|||
NOINLINE static void check(bool cond, const char *fmt,
|
||||
Arguments... parameters) {
|
||||
if (!cond)
|
||||
printf(fmt, (int)blockIdx.x, (int)threadIdx.x,
|
||||
(int)(threadIdx.x / WARPSIZE), (int)(threadIdx.x & 0x1F),
|
||||
parameters...);
|
||||
printf(fmt, (int)GetBlockIdInKernel(), (int)GetThreadIdInBlock(),
|
||||
(int)GetWarpId(), (int)GetLaneId(), parameters...);
|
||||
assert(cond);
|
||||
}
|
||||
|
||||
|
|
|
@ -364,7 +364,7 @@ EXTERN void omp_set_lock(omp_lock_t *lock) {
|
|||
for (;;) {
|
||||
now = clock();
|
||||
clock_t cycles = now > start ? now - start : now + (0xffffffff - start);
|
||||
if (cycles >= __OMP_SPIN * blockIdx.x) {
|
||||
if (cycles >= __OMP_SPIN * GetBlockIdInKernel()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -106,9 +106,9 @@ DEVICE int GetNumberOfBlocksInKernel() { return gridDim.x; }
|
|||
|
||||
DEVICE int GetNumberOfThreadsInBlock() { return blockDim.x; }
|
||||
|
||||
DEVICE unsigned GetWarpId() { return threadIdx.x / WARPSIZE; }
|
||||
DEVICE unsigned GetWarpId() { return GetThreadIdInBlock() / WARPSIZE; }
|
||||
|
||||
DEVICE unsigned GetLaneId() { return threadIdx.x & (WARPSIZE - 1); }
|
||||
DEVICE unsigned GetLaneId() { return GetThreadIdInBlock() & (WARPSIZE - 1); }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
|
@ -124,7 +124,7 @@ DEVICE unsigned GetLaneId() { return threadIdx.x & (WARPSIZE - 1); }
|
|||
// If NumThreads is 1024, master id is 992.
|
||||
//
|
||||
// Called in Generic Execution Mode only.
|
||||
DEVICE int GetMasterThreadID() { return (blockDim.x - 1) & ~(WARPSIZE - 1); }
|
||||
DEVICE int GetMasterThreadID() { return (GetNumberOfThreadsInBlock() - 1) & ~(WARPSIZE - 1); }
|
||||
|
||||
// The last warp is reserved for the master; other warps are workers.
|
||||
// Called in Generic Execution Mode only.
|
||||
|
|
Loading…
Reference in New Issue