forked from OSchip/llvm-project
Fix PR30890: Reduction across teams hangs
__kmpc_reduce_nowait() correctly swapped the teams for reductions in a teams construct. Apply the same logic to __kmpc_reduce() and __kmpc_reduce_end(). Differential Revision: https://reviews.llvm.org/D40753 llvm-svn: 319788
This commit is contained in:
parent
7e7566323d
commit
a4ca525c1b
openmp/runtime
|
@ -3202,6 +3202,43 @@ __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
|
|||
#endif // KMP_USE_DYNAMIC_LOCK
|
||||
} // __kmp_end_critical_section_reduce_block
|
||||
|
||||
#if OMP_40_ENABLED
|
||||
static __forceinline int
|
||||
__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
|
||||
int *task_state) {
|
||||
kmp_team_t *team;
|
||||
|
||||
// Check if we are inside the teams construct?
|
||||
if (th->th.th_teams_microtask) {
|
||||
*team_p = team = th->th.th_team;
|
||||
if (team->t.t_level == th->th.th_teams_level) {
|
||||
// This is reduction at teams construct.
|
||||
KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
|
||||
// Let's swap teams temporarily for the reduction.
|
||||
th->th.th_info.ds.ds_tid = team->t.t_master_tid;
|
||||
th->th.th_team = team->t.t_parent;
|
||||
th->th.th_team_nproc = th->th.th_team->t.t_nproc;
|
||||
th->th.th_task_team = th->th.th_team->t.t_task_team[0];
|
||||
*task_state = th->th.th_task_state;
|
||||
th->th.th_task_state = 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __forceinline void
|
||||
__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
|
||||
// Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
|
||||
th->th.th_info.ds.ds_tid = 0;
|
||||
th->th.th_team = team;
|
||||
th->th.th_team_nproc = team->t.t_nproc;
|
||||
th->th.th_task_team = team->t.t_task_team[task_state];
|
||||
th->th.th_task_state = task_state;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 2.a.i. Reduce Block without a terminating barrier */
|
||||
/*!
|
||||
@ingroup SYNCHRONIZATION
|
||||
|
@ -3228,8 +3265,8 @@ __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
|
|||
int retval = 0;
|
||||
PACKED_REDUCTION_METHOD_T packed_reduction_method;
|
||||
#if OMP_40_ENABLED
|
||||
kmp_team_t *team;
|
||||
kmp_info_t *th;
|
||||
kmp_team_t *team;
|
||||
int teams_swapped = 0, task_state;
|
||||
#endif
|
||||
KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
|
||||
|
@ -3254,22 +3291,7 @@ __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
|
|||
|
||||
#if OMP_40_ENABLED
|
||||
th = __kmp_thread_from_gtid(global_tid);
|
||||
if (th->th.th_teams_microtask) { // AC: check if we are inside the teams
|
||||
// construct?
|
||||
team = th->th.th_team;
|
||||
if (team->t.t_level == th->th.th_teams_level) {
|
||||
// this is reduction at teams construct
|
||||
KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
|
||||
// Let's swap teams temporarily for the reduction barrier
|
||||
teams_swapped = 1;
|
||||
th->th.th_info.ds.ds_tid = team->t.t_master_tid;
|
||||
th->th.th_team = team->t.t_parent;
|
||||
th->th.th_team_nproc = th->th.th_team->t.t_nproc;
|
||||
th->th.th_task_team = th->th.th_team->t.t_task_team[0];
|
||||
task_state = th->th.th_task_state;
|
||||
th->th.th_task_state = 0;
|
||||
}
|
||||
}
|
||||
teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
|
||||
#endif // OMP_40_ENABLED
|
||||
|
||||
// packed_reduction_method value will be reused by __kmp_end_reduce* function,
|
||||
|
@ -3373,12 +3395,7 @@ __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
|
|||
}
|
||||
#if OMP_40_ENABLED
|
||||
if (teams_swapped) {
|
||||
// Restore thread structure
|
||||
th->th.th_info.ds.ds_tid = 0;
|
||||
th->th.th_team = team;
|
||||
th->th.th_team_nproc = team->t.t_nproc;
|
||||
th->th.th_task_team = team->t.t_task_team[task_state];
|
||||
th->th.th_task_state = task_state;
|
||||
__kmp_restore_swapped_teams(th, team, task_state);
|
||||
}
|
||||
#endif
|
||||
KA_TRACE(
|
||||
|
@ -3466,6 +3483,11 @@ kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
|
|||
KMP_COUNT_BLOCK(REDUCE_wait);
|
||||
int retval = 0;
|
||||
PACKED_REDUCTION_METHOD_T packed_reduction_method;
|
||||
#if OMP_40_ENABLED
|
||||
kmp_info_t *th;
|
||||
kmp_team_t *team;
|
||||
int teams_swapped = 0, task_state;
|
||||
#endif
|
||||
|
||||
KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
|
||||
|
||||
|
@ -3487,6 +3509,11 @@ kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
|
|||
__kmp_push_sync(global_tid, ct_reduce, loc, NULL);
|
||||
#endif
|
||||
|
||||
#if OMP_40_ENABLED
|
||||
th = __kmp_thread_from_gtid(global_tid);
|
||||
teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
|
||||
#endif // OMP_40_ENABLED
|
||||
|
||||
packed_reduction_method = __kmp_determine_reduction_method(
|
||||
loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
|
||||
__KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
|
||||
|
@ -3548,6 +3575,11 @@ kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
|
|||
// should never reach this block
|
||||
KMP_ASSERT(0); // "unexpected method"
|
||||
}
|
||||
#if OMP_40_ENABLED
|
||||
if (teams_swapped) {
|
||||
__kmp_restore_swapped_teams(th, team, task_state);
|
||||
}
|
||||
#endif
|
||||
|
||||
KA_TRACE(10,
|
||||
("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
|
||||
|
@ -3570,9 +3602,19 @@ void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
|
|||
kmp_critical_name *lck) {
|
||||
|
||||
PACKED_REDUCTION_METHOD_T packed_reduction_method;
|
||||
#if OMP_40_ENABLED
|
||||
kmp_info_t *th;
|
||||
kmp_team_t *team;
|
||||
int teams_swapped = 0, task_state;
|
||||
#endif
|
||||
|
||||
KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
|
||||
|
||||
#if OMP_40_ENABLED
|
||||
th = __kmp_thread_from_gtid(global_tid);
|
||||
teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
|
||||
#endif // OMP_40_ENABLED
|
||||
|
||||
packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
|
||||
|
||||
// this barrier should be visible to a customer and to the threading profile
|
||||
|
@ -3660,6 +3702,11 @@ void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
|
|||
// should never reach this block
|
||||
KMP_ASSERT(0); // "unexpected method"
|
||||
}
|
||||
#if OMP_40_ENABLED
|
||||
if (teams_swapped) {
|
||||
__kmp_restore_swapped_teams(th, team, task_state);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (__kmp_env_consistency_check)
|
||||
__kmp_pop_sync(global_tid, ct_reduce, loc);
|
||||
|
|
|
@ -0,0 +1,63 @@
|
|||
// RUN: %libomp-compile-and-run
|
||||
//
|
||||
// The test checks the teams construct with reduction executed on the host.
|
||||
//
|
||||
|
||||
#include <stdio.h>
|
||||
#include <omp.h>
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifndef N_TEAMS
|
||||
#define N_TEAMS 4
|
||||
#endif
|
||||
#ifndef N_THR
|
||||
#define N_THR 3
|
||||
#endif
|
||||
|
||||
// Internal library stuff to emulate compiler's code generation:
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
int32_t reserved_1;
|
||||
int32_t flags;
|
||||
int32_t reserved_2;
|
||||
int32_t reserved_3;
|
||||
char const *psource;
|
||||
} ident_t;
|
||||
|
||||
static ident_t dummy_loc = {0, 2, 0, 0, ";dummyFile;dummyFunc;0;0;;"};
|
||||
|
||||
typedef int32_t kmp_critical_name[8];
|
||||
kmp_critical_name crit;
|
||||
|
||||
int32_t __kmpc_global_thread_num(ident_t *);
|
||||
void __kmpc_push_num_teams(ident_t *, int32_t global_tid, int32_t num_teams,
|
||||
int32_t num_threads);
|
||||
void __kmpc_fork_teams(ident_t *, int32_t argc, void *microtask, ...);
|
||||
int32_t __kmpc_reduce(ident_t *, int32_t global_tid, int32_t num_vars,
|
||||
size_t reduce_size, void *reduce_data, void *reduce_func,
|
||||
kmp_critical_name *lck);
|
||||
void __kmpc_end_reduce(ident_t *, int32_t global_tid, kmp_critical_name *lck);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
// Outlined entry point:
|
||||
void outlined(int32_t *gtid, int32_t *tid) {
|
||||
int32_t ret = __kmpc_reduce(&dummy_loc, *gtid, 0, 0, NULL, NULL, &crit);
|
||||
__kmpc_end_reduce(&dummy_loc, *gtid, &crit);
|
||||
}
|
||||
|
||||
int main() {
|
||||
int32_t th = __kmpc_global_thread_num(NULL); // registers initial thread
|
||||
__kmpc_push_num_teams(&dummy_loc, th, N_TEAMS, N_THR);
|
||||
__kmpc_fork_teams(&dummy_loc, 0, &outlined);
|
||||
|
||||
// Test did not hang -> passed!
|
||||
printf("passed\n");
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue