forked from OSchip/llvm-project
[OpenMP] Create and use `__kmpc_is_generic_main_thread`
In order to fold calls based on high-level knowledge and control flow tracking it helps to expose the information as a runtime call. The logic: `!SPMD && getTID() == getMasterTID()` was used in various places and is now encapsulated in `__kmpc_is_generic_main_thread`. As part of this rewrite we replaced eager computation of arguments with on-demand computation, especially helpful if the calls can be folded and arguments don't need to be computed consequently. Differential Revision: https://reviews.llvm.org/D105768
This commit is contained in:
parent
1ab1f04a2b
commit
a7b7b5dfe5
|
@ -192,7 +192,7 @@ INLINE omptarget_nvptx_TaskDescr *getMyTopTaskDescriptor(int threadId) {
|
|||
|
||||
INLINE omptarget_nvptx_TaskDescr *
|
||||
getMyTopTaskDescriptor(bool isSPMDExecutionMode) {
|
||||
return getMyTopTaskDescriptor(GetLogicalThreadIdInBlock(isSPMDExecutionMode));
|
||||
return getMyTopTaskDescriptor(GetLogicalThreadIdInBlock());
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -15,11 +15,6 @@
|
|||
#include "target/shuffle.h"
|
||||
#include "target_impl.h"
|
||||
|
||||
// Return true if this is the master thread.
|
||||
INLINE static bool IsMasterThread(bool isSPMDExecutionMode) {
|
||||
return !isSPMDExecutionMode && GetMasterThreadID() == GetThreadIdInBlock();
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Runtime functions for trunk data sharing scheme.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -66,7 +61,8 @@ static void *__kmpc_alloc_for_warp(AllocTy Alloc, unsigned Bytes,
|
|||
|
||||
EXTERN void *__kmpc_alloc_shared(size_t Bytes) {
|
||||
Bytes = Bytes + (Bytes % MinBytes);
|
||||
if (IsMasterThread(__kmpc_is_spmd_exec_mode())) {
|
||||
int TID = GetThreadIdInBlock();
|
||||
if (__kmpc_is_generic_main_thread(TID)) {
|
||||
// Main thread alone, use shared memory if space is available.
|
||||
if (MainSharedStack.Usage[0] + Bytes <= MainSharedStack.MaxSize) {
|
||||
void *Ptr = &MainSharedStack.Data[MainSharedStack.Usage[0]];
|
||||
|
@ -75,7 +71,6 @@ EXTERN void *__kmpc_alloc_shared(size_t Bytes) {
|
|||
return Ptr;
|
||||
}
|
||||
} else {
|
||||
int TID = GetThreadIdInBlock();
|
||||
int WID = GetWarpId();
|
||||
unsigned WarpBytes = Bytes * WARPSIZE;
|
||||
auto AllocSharedStack = [&]() {
|
||||
|
@ -92,7 +87,6 @@ EXTERN void *__kmpc_alloc_shared(size_t Bytes) {
|
|||
return __kmpc_alloc_for_warp(AllocSharedStack, Bytes, WarpBytes);
|
||||
}
|
||||
// Fallback to malloc
|
||||
int TID = GetThreadIdInBlock();
|
||||
unsigned WarpBytes = Bytes * WARPSIZE;
|
||||
auto AllocGlobal = [&] {
|
||||
return SafeMalloc(WarpBytes, "AllocGlobalFallback");
|
||||
|
|
|
@ -68,9 +68,7 @@ EXTERN int omp_get_thread_limit(void) {
|
|||
}
|
||||
|
||||
EXTERN int omp_get_thread_num() {
|
||||
bool isSPMDExecutionMode = __kmpc_is_spmd_exec_mode();
|
||||
int tid = GetLogicalThreadIdInBlock(isSPMDExecutionMode);
|
||||
int rc = GetOmpThreadId(tid, isSPMDExecutionMode);
|
||||
int rc = GetOmpThreadId();
|
||||
PRINT(LD_IO, "call omp_get_thread_num() returns %d\n", rc);
|
||||
return rc;
|
||||
}
|
||||
|
|
|
@ -210,7 +210,7 @@ public:
|
|||
ASSERT0(LT_FUSSY, __kmpc_is_spmd_exec_mode(), "Expected non-SPMD mode.");
|
||||
return;
|
||||
}
|
||||
int tid = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode());
|
||||
int tid = GetLogicalThreadIdInBlock();
|
||||
omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor(tid);
|
||||
T tnum = GetNumberOfOmpThreads(__kmpc_is_spmd_exec_mode());
|
||||
T tripCount = ub - lb + 1; // +1 because ub is inclusive
|
||||
|
@ -453,7 +453,7 @@ public:
|
|||
// ID of a thread in its own warp
|
||||
|
||||
// automatically selects thread or warp ID based on selected implementation
|
||||
int tid = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode());
|
||||
int tid = GetLogicalThreadIdInBlock();
|
||||
ASSERT0(LT_FUSSY, gtid < GetNumberOfOmpThreads(__kmpc_is_spmd_exec_mode()),
|
||||
"current thread is not needed here; error");
|
||||
// retrieve schedule
|
||||
|
|
|
@ -160,6 +160,10 @@ EXTERN int8_t __kmpc_is_spmd_exec_mode() {
|
|||
return (execution_param & ModeMask) == Spmd;
|
||||
}
|
||||
|
||||
EXTERN int8_t __kmpc_is_generic_main_thread(kmp_int32 Tid) {
|
||||
return !__kmpc_is_spmd_exec_mode() && GetMasterThreadID() == Tid;
|
||||
}
|
||||
|
||||
EXTERN bool __kmpc_kernel_parallel(void**WorkFn);
|
||||
|
||||
static void __kmpc_target_region_state_machine(ident_t *Ident) {
|
||||
|
|
|
@ -188,7 +188,7 @@ EXTERN void __kmpc_serialized_parallel(kmp_Ident *loc, uint32_t global_tid) {
|
|||
}
|
||||
|
||||
// assume this is only called for nested parallel
|
||||
int threadId = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode());
|
||||
int threadId = GetLogicalThreadIdInBlock();
|
||||
|
||||
// unlike actual parallel, threads in the same team do not share
|
||||
// the workTaskDescr in this case and num threads is fixed to 1
|
||||
|
@ -227,7 +227,7 @@ EXTERN void __kmpc_end_serialized_parallel(kmp_Ident *loc,
|
|||
}
|
||||
|
||||
// pop stack
|
||||
int threadId = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode());
|
||||
int threadId = GetLogicalThreadIdInBlock();
|
||||
omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor(threadId);
|
||||
// set new top
|
||||
omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(
|
||||
|
@ -249,8 +249,7 @@ EXTERN uint16_t __kmpc_parallel_level(kmp_Ident *loc, uint32_t global_tid) {
|
|||
// it's cheap to recalculate this value so we never use the result
|
||||
// of this call.
|
||||
EXTERN int32_t __kmpc_global_thread_num(kmp_Ident *loc) {
|
||||
int tid = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode());
|
||||
return GetOmpThreadId(tid, __kmpc_is_spmd_exec_mode());
|
||||
return GetOmpThreadId();
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -262,7 +261,7 @@ EXTERN void __kmpc_push_num_threads(kmp_Ident *loc, int32_t tid,
|
|||
PRINT(LD_IO, "call kmpc_push_num_threads %d\n", num_threads);
|
||||
ASSERT0(LT_FUSSY, isRuntimeInitialized(),
|
||||
"Runtime must be initialized.");
|
||||
tid = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode());
|
||||
tid = GetLogicalThreadIdInBlock();
|
||||
omptarget_nvptx_threadPrivateContext->NumThreadsForNextParallel(tid) =
|
||||
num_threads;
|
||||
}
|
||||
|
|
|
@ -69,7 +69,7 @@ static int32_t nvptx_parallel_reduce_nowait(
|
|||
int32_t global_tid, int32_t num_vars, size_t reduce_size, void *reduce_data,
|
||||
kmp_ShuffleReductFctPtr shflFct, kmp_InterWarpCopyFctPtr cpyFct,
|
||||
bool isSPMDExecutionMode, bool isRuntimeUninitialized) {
|
||||
uint32_t BlockThreadId = GetLogicalThreadIdInBlock(isSPMDExecutionMode);
|
||||
uint32_t BlockThreadId = GetLogicalThreadIdInBlock();
|
||||
uint32_t NumThreads = GetNumberOfOmpThreads(isSPMDExecutionMode);
|
||||
if (NumThreads == 1)
|
||||
return 1;
|
||||
|
@ -184,10 +184,11 @@ EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
|
|||
kmp_ListGlobalFctPtr glredFct) {
|
||||
|
||||
// Terminate all threads in non-SPMD mode except for the master thread.
|
||||
if (!__kmpc_is_spmd_exec_mode() && GetThreadIdInBlock() != GetMasterThreadID())
|
||||
if (!__kmpc_is_spmd_exec_mode() &&
|
||||
!__kmpc_is_generic_main_thread(GetThreadIdInBlock()))
|
||||
return 0;
|
||||
|
||||
uint32_t ThreadId = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode());
|
||||
uint32_t ThreadId = GetLogicalThreadIdInBlock();
|
||||
|
||||
// In non-generic mode all workers participate in the teams reduction.
|
||||
// In generic mode only the team master participates in the teams
|
||||
|
|
|
@ -67,11 +67,11 @@ int GetNumberOfWorkersInTeam() { return GetMasterThreadID(); }
|
|||
// or a serial region by the master. If the master (whose CUDA thread
|
||||
// id is GetMasterThreadID()) calls this routine, we return 0 because
|
||||
// it is a shadow for the first worker.
|
||||
int GetLogicalThreadIdInBlock(bool isSPMDExecutionMode) {
|
||||
int GetLogicalThreadIdInBlock() {
|
||||
// Implemented using control flow (predication) instead of with a modulo
|
||||
// operation.
|
||||
int tid = GetThreadIdInBlock();
|
||||
if (!isSPMDExecutionMode && tid >= GetMasterThreadID())
|
||||
if (__kmpc_is_generic_main_thread(tid))
|
||||
return 0;
|
||||
else
|
||||
return tid;
|
||||
|
@ -83,16 +83,19 @@ int GetLogicalThreadIdInBlock(bool isSPMDExecutionMode) {
|
|||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
int GetOmpThreadId(int threadId, bool isSPMDExecutionMode) {
|
||||
int GetOmpThreadId() {
|
||||
int tid = GetThreadIdInBlock();
|
||||
if (__kmpc_is_generic_main_thread(tid))
|
||||
return 0;
|
||||
// omp_thread_num
|
||||
int rc;
|
||||
if ((parallelLevel[GetWarpId()] & (OMP_ACTIVE_PARALLEL_LEVEL - 1)) > 1) {
|
||||
rc = 0;
|
||||
} else if (isSPMDExecutionMode) {
|
||||
rc = GetThreadIdInBlock();
|
||||
} else if (__kmpc_is_spmd_exec_mode()) {
|
||||
rc = tid;
|
||||
} else {
|
||||
omptarget_nvptx_TaskDescr *currTaskDescr =
|
||||
omptarget_nvptx_threadPrivateContext->GetTopLevelTaskDescr(threadId);
|
||||
omptarget_nvptx_threadPrivateContext->GetTopLevelTaskDescr(tid);
|
||||
ASSERT0(LT_FUSSY, currTaskDescr, "expected a top task descr");
|
||||
rc = currTaskDescr->ThreadId();
|
||||
}
|
||||
|
|
|
@ -47,7 +47,7 @@ EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid) {
|
|||
"Expected SPMD mode with uninitialized runtime.");
|
||||
__kmpc_barrier_simple_spmd(loc_ref, tid);
|
||||
} else {
|
||||
tid = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode());
|
||||
tid = GetLogicalThreadIdInBlock();
|
||||
int numberOfActiveOMPThreads =
|
||||
GetNumberOfOmpThreads(__kmpc_is_spmd_exec_mode());
|
||||
if (numberOfActiveOMPThreads > 1) {
|
||||
|
|
|
@ -96,7 +96,7 @@ EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Ident *loc, uint32_t global_tid,
|
|||
"bad assumptions");
|
||||
|
||||
// 2. push new context: update new task descriptor
|
||||
int tid = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode());
|
||||
int tid = GetLogicalThreadIdInBlock();
|
||||
omptarget_nvptx_TaskDescr *parentTaskDescr = getMyTopTaskDescriptor(tid);
|
||||
newTaskDescr->CopyForExplicitTask(parentTaskDescr);
|
||||
// set new task descriptor as top
|
||||
|
@ -135,7 +135,7 @@ EXTERN void __kmpc_omp_task_begin_if0(kmp_Ident *loc, uint32_t global_tid,
|
|||
"bad assumptions");
|
||||
|
||||
// 2. push new context: update new task descriptor
|
||||
int tid = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode());
|
||||
int tid = GetLogicalThreadIdInBlock();
|
||||
omptarget_nvptx_TaskDescr *parentTaskDescr = getMyTopTaskDescriptor(tid);
|
||||
newTaskDescr->CopyForExplicitTask(parentTaskDescr);
|
||||
// set new task descriptor as top
|
||||
|
@ -163,7 +163,7 @@ EXTERN void __kmpc_omp_task_complete_if0(kmp_Ident *loc, uint32_t global_tid,
|
|||
omptarget_nvptx_TaskDescr *parentTaskDescr = newTaskDescr->GetPrevTaskDescr();
|
||||
// 3... noting to call... is inline
|
||||
// 4. pop context
|
||||
int tid = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode());
|
||||
int tid = GetLogicalThreadIdInBlock();
|
||||
omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(tid,
|
||||
parentTaskDescr);
|
||||
// 5. free
|
||||
|
|
|
@ -41,13 +41,12 @@ bool isRuntimeInitialized();
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// get global ids to locate tread/team info (constant regardless of OMP)
|
||||
int GetLogicalThreadIdInBlock(bool isSPMDExecutionMode);
|
||||
int GetLogicalThreadIdInBlock();
|
||||
int GetMasterThreadID();
|
||||
int GetNumberOfWorkersInTeam();
|
||||
|
||||
// get OpenMP thread and team ids
|
||||
int GetOmpThreadId(int threadId,
|
||||
bool isSPMDExecutionMode); // omp_thread_num
|
||||
int GetOmpThreadId(); // omp_thread_num
|
||||
int GetOmpTeamId(); // omp_team_num
|
||||
|
||||
// get OpenMP number of threads and team
|
||||
|
|
|
@ -449,6 +449,10 @@ EXTERN void __kmpc_parallel_51(ident_t *ident, kmp_int32 global_tid,
|
|||
// SPMD execution mode interrogation function.
|
||||
EXTERN int8_t __kmpc_is_spmd_exec_mode();
|
||||
|
||||
/// Return true if the hardware thread id \p Tid represents the OpenMP main
|
||||
/// thread in generic mode outside of a parallel region.
|
||||
EXTERN int8_t __kmpc_is_generic_main_thread(kmp_int32 Tid);
|
||||
|
||||
EXTERN void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode,
|
||||
const void *buf, size_t size,
|
||||
int16_t is_shared, const void **res);
|
||||
|
|
Loading…
Reference in New Issue