[OpenMP] Avoid reading uninitialized parallel level values

In a last minute change request for a2dbfb6b72 we introduced a
read of the uninitialized parallel level value in SPMD-mode.
We go back to initializing the array early and checking for an
adjusted level.

Found by the miniqmc unit tests:
  https://cdash.qmcpack.org/CDash/viewTest.php?onlyfailed&buildid=203434

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D101123
This commit is contained in:
Johannes Doerfert 2021-04-23 01:36:18 +00:00
parent cbe8b57a67
commit 17330a3cb1
2 changed files with 10 additions and 16 deletions

View File

@ -88,6 +88,11 @@ EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit,
int threadId = GetThreadIdInBlock(); int threadId = GetThreadIdInBlock();
if (threadId == 0) { if (threadId == 0) {
usedSlotIdx = __kmpc_impl_smid() % MAX_SM; usedSlotIdx = __kmpc_impl_smid() % MAX_SM;
parallelLevel[0] =
1 + (GetNumberOfThreadsInBlock() > 1 ? OMP_ACTIVE_PARALLEL_LEVEL : 0);
} else if (GetLaneId() == 0) {
parallelLevel[GetWarpId()] =
1 + (GetNumberOfThreadsInBlock() > 1 ? OMP_ACTIVE_PARALLEL_LEVEL : 0);
} }
if (!RequiresOMPRuntime) { if (!RequiresOMPRuntime) {
// Runtime is not required - exit. // Runtime is not required - exit.

View File

@ -289,31 +289,20 @@ EXTERN void __kmpc_parallel_51(kmp_Ident *ident, kmp_int32 global_tid,
int proc_bind, void *fn, void *wrapper_fn, int proc_bind, void *fn, void *wrapper_fn,
void **args, size_t nargs) { void **args, size_t nargs) {
// Handle the serialized case first, same for SPMD/non-SPMD. // Handle the serialized case first, same for SPMD/non-SPMD except that in
// TODO: Add UNLIKELY to optimize? // SPMD mode we already incremented the parallel level counter, account for
bool InParallelRegion = (__kmpc_parallel_level(ident, global_tid) > 0); // that.
bool InParallelRegion =
(__kmpc_parallel_level(ident, global_tid) > __kmpc_is_spmd_exec_mode());
if (!if_expr || InParallelRegion) { if (!if_expr || InParallelRegion) {
__kmpc_serialized_parallel(ident, global_tid); __kmpc_serialized_parallel(ident, global_tid);
__kmp_invoke_microtask(global_tid, 0, fn, args, nargs); __kmp_invoke_microtask(global_tid, 0, fn, args, nargs);
__kmpc_end_serialized_parallel(ident, global_tid); __kmpc_end_serialized_parallel(ident, global_tid);
return; return;
} }
if (__kmpc_is_spmd_exec_mode()) { if (__kmpc_is_spmd_exec_mode()) {
// Increment parallel level for SPMD warps.
if (GetLaneId() == 0)
parallelLevel[GetWarpId()] =
1 + (GetNumberOfThreadsInBlock() > 1 ? OMP_ACTIVE_PARALLEL_LEVEL : 0);
// TODO: Is that synchronization correct/needed? Can only using a memory
// fence ensure consistency?
__kmpc_impl_syncthreads();
__kmp_invoke_microtask(global_tid, 0, fn, args, nargs); __kmp_invoke_microtask(global_tid, 0, fn, args, nargs);
// Decrement (zero out) parallel level for SPMD warps.
if (GetLaneId() == 0)
parallelLevel[GetWarpId()] = 0;
return; return;
} }