forked from OSchip/llvm-project
[OpenMP] Avoid reading uninitialized parallel level values
In a last minute change request for a2dbfb6b72
we introduced a
read of the uninitialized parallel level value in SPMD-mode.
We go back to initializing the array early and checking for an
adjusted level.
Found by the miniqmc unit tests:
https://cdash.qmcpack.org/CDash/viewTest.php?onlyfailed&buildid=203434
Reviewed By: JonChesterfield
Differential Revision: https://reviews.llvm.org/D101123
This commit is contained in:
parent
cbe8b57a67
commit
17330a3cb1
|
@ -88,6 +88,11 @@ EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit,
|
||||||
int threadId = GetThreadIdInBlock();
|
int threadId = GetThreadIdInBlock();
|
||||||
if (threadId == 0) {
|
if (threadId == 0) {
|
||||||
usedSlotIdx = __kmpc_impl_smid() % MAX_SM;
|
usedSlotIdx = __kmpc_impl_smid() % MAX_SM;
|
||||||
|
parallelLevel[0] =
|
||||||
|
1 + (GetNumberOfThreadsInBlock() > 1 ? OMP_ACTIVE_PARALLEL_LEVEL : 0);
|
||||||
|
} else if (GetLaneId() == 0) {
|
||||||
|
parallelLevel[GetWarpId()] =
|
||||||
|
1 + (GetNumberOfThreadsInBlock() > 1 ? OMP_ACTIVE_PARALLEL_LEVEL : 0);
|
||||||
}
|
}
|
||||||
if (!RequiresOMPRuntime) {
|
if (!RequiresOMPRuntime) {
|
||||||
// Runtime is not required - exit.
|
// Runtime is not required - exit.
|
||||||
|
|
|
@ -289,31 +289,20 @@ EXTERN void __kmpc_parallel_51(kmp_Ident *ident, kmp_int32 global_tid,
|
||||||
int proc_bind, void *fn, void *wrapper_fn,
|
int proc_bind, void *fn, void *wrapper_fn,
|
||||||
void **args, size_t nargs) {
|
void **args, size_t nargs) {
|
||||||
|
|
||||||
// Handle the serialized case first, same for SPMD/non-SPMD.
|
// Handle the serialized case first, same for SPMD/non-SPMD except that in
|
||||||
// TODO: Add UNLIKELY to optimize?
|
// SPMD mode we already incremented the parallel level counter, account for
|
||||||
bool InParallelRegion = (__kmpc_parallel_level(ident, global_tid) > 0);
|
// that.
|
||||||
|
bool InParallelRegion =
|
||||||
|
(__kmpc_parallel_level(ident, global_tid) > __kmpc_is_spmd_exec_mode());
|
||||||
if (!if_expr || InParallelRegion) {
|
if (!if_expr || InParallelRegion) {
|
||||||
__kmpc_serialized_parallel(ident, global_tid);
|
__kmpc_serialized_parallel(ident, global_tid);
|
||||||
__kmp_invoke_microtask(global_tid, 0, fn, args, nargs);
|
__kmp_invoke_microtask(global_tid, 0, fn, args, nargs);
|
||||||
__kmpc_end_serialized_parallel(ident, global_tid);
|
__kmpc_end_serialized_parallel(ident, global_tid);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (__kmpc_is_spmd_exec_mode()) {
|
if (__kmpc_is_spmd_exec_mode()) {
|
||||||
// Increment parallel level for SPMD warps.
|
|
||||||
if (GetLaneId() == 0)
|
|
||||||
parallelLevel[GetWarpId()] =
|
|
||||||
1 + (GetNumberOfThreadsInBlock() > 1 ? OMP_ACTIVE_PARALLEL_LEVEL : 0);
|
|
||||||
// TODO: Is that synchronization correct/needed? Can only using a memory
|
|
||||||
// fence ensure consistency?
|
|
||||||
__kmpc_impl_syncthreads();
|
|
||||||
|
|
||||||
__kmp_invoke_microtask(global_tid, 0, fn, args, nargs);
|
__kmp_invoke_microtask(global_tid, 0, fn, args, nargs);
|
||||||
|
|
||||||
// Decrement (zero out) parallel level for SPMD warps.
|
|
||||||
if (GetLaneId() == 0)
|
|
||||||
parallelLevel[GetWarpId()] = 0;
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue