forked from OSchip/llvm-project
Revert "[OpenMP] Add Two-level Distributed Barrier"
This reverts commit 25073a4ecf
.
This breaks non-x86 OpenMP builds for a while now. Until a solution is
ready to be upstreamed we revert the feature and unblock those builds.
See:
https://reviews.llvm.org/rG25073a4ecfc9b2e3cb76776185e63bfdb094cd98#1005821
and
https://reviews.llvm.org/rG25073a4ecfc9b2e3cb76776185e63bfdb094cd98#1005821
The currently proposed fix (D104788) seems not to be ready yet:
https://reviews.llvm.org/D104788#2841928
This commit is contained in:
parent
bc8bb3df35
commit
4eb90e893f
|
@ -269,7 +269,6 @@ Using_int_Value "%1$s value \"%2$d\" will be used."
|
|||
Using_uint_Value "%1$s value \"%2$u\" will be used."
|
||||
Using_uint64_Value "%1$s value \"%2$s\" will be used."
|
||||
Using_str_Value "%1$s value \"%2$s\" will be used."
|
||||
BarrierPatternOverride "Mixing other barrier patterns with dist is prohibited. Using dist for all barrier patterns."
|
||||
MaxValueUsing "%1$s maximum value \"%2$d\" will be used."
|
||||
MinValueUsing "%1$s minimum value \"%2$d\" will be used."
|
||||
MemoryAllocFailed "Memory allocation failed."
|
||||
|
|
|
@ -115,7 +115,6 @@ typedef unsigned int kmp_hwloc_depth_t;
|
|||
#include "kmp_debug.h"
|
||||
#include "kmp_lock.h"
|
||||
#include "kmp_version.h"
|
||||
#include "kmp_barrier.h"
|
||||
#if USE_DEBUGGER
|
||||
#include "kmp_debugger.h"
|
||||
#endif
|
||||
|
@ -264,7 +263,6 @@ typedef union kmp_root kmp_root_p;
|
|||
|
||||
template <bool C = false, bool S = true> class kmp_flag_32;
|
||||
template <bool C = false, bool S = true> class kmp_flag_64;
|
||||
template <bool C = false, bool S = true> class kmp_atomic_flag_64;
|
||||
class kmp_flag_oncore;
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -1881,15 +1879,6 @@ typedef struct kmp_disp {
|
|||
0 // Thread th_reap_state: not safe to reap (tasking)
|
||||
#define KMP_SAFE_TO_REAP 1 // Thread th_reap_state: safe to reap (not tasking)
|
||||
|
||||
// The flag_type describes the storage used for the flag.
|
||||
enum flag_type {
|
||||
flag32, /**< atomic 32 bit flags */
|
||||
flag64, /**< 64 bit flags */
|
||||
atomic_flag64, /**< atomic 64 bit flags */
|
||||
flag_oncore, /**< special 64-bit flag for on-core barrier (hierarchical) */
|
||||
flag_unset
|
||||
};
|
||||
|
||||
enum barrier_type {
|
||||
bs_plain_barrier = 0, /* 0, All non-fork/join barriers (except reduction
|
||||
barriers if enabled) */
|
||||
|
@ -1913,7 +1902,6 @@ typedef enum kmp_bar_pat { /* Barrier communication patterns */
|
|||
bp_hyper_bar = 2, /* Hypercube-embedded tree with min
|
||||
branching factor 2^n */
|
||||
bp_hierarchical_bar = 3, /* Machine hierarchy tree */
|
||||
bp_dist_bar = 4, /* Distributed barrier */
|
||||
bp_last_bar /* Placeholder to mark the end */
|
||||
} kmp_bar_pat_e;
|
||||
|
||||
|
@ -2638,7 +2626,6 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info {
|
|||
/* while awaiting queuing lock acquire */
|
||||
|
||||
volatile void *th_sleep_loc; // this points at a kmp_flag<T>
|
||||
flag_type th_sleep_loc_type; // enum type of flag stored in th_sleep_loc
|
||||
|
||||
ident_t *th_ident;
|
||||
unsigned th_x; // Random number generator data
|
||||
|
@ -2659,9 +2646,6 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info {
|
|||
written by the worker thread) */
|
||||
kmp_uint8 th_active_in_pool; // included in count of #active threads in pool
|
||||
int th_active; // ! sleeping; 32 bits for TCR/TCW
|
||||
std::atomic<kmp_uint32> th_used_in_team; // Flag indicating use in team
|
||||
// 0 = not used in team; 1 = used in team;
|
||||
// 2 = transitioning to not used in team; 3 = transitioning to used in team
|
||||
struct cons_header *th_cons; // used for consistency check
|
||||
#if KMP_USE_HIER_SCHED
|
||||
// used for hierarchical scheduling
|
||||
|
@ -2841,7 +2825,6 @@ typedef struct KMP_ALIGN_CACHE kmp_base_team {
|
|||
#if USE_ITT_BUILD
|
||||
void *t_stack_id; // team specific stack stitching id (for ittnotify)
|
||||
#endif /* USE_ITT_BUILD */
|
||||
distributedBarrier *b; // Distributed barrier data associated with team
|
||||
} kmp_base_team_t;
|
||||
|
||||
union KMP_ALIGN_CACHE kmp_team {
|
||||
|
@ -4143,26 +4126,18 @@ template <bool C, bool S>
|
|||
extern void __kmp_suspend_32(int th_gtid, kmp_flag_32<C, S> *flag);
|
||||
template <bool C, bool S>
|
||||
extern void __kmp_suspend_64(int th_gtid, kmp_flag_64<C, S> *flag);
|
||||
template <bool C, bool S>
|
||||
extern void __kmp_atomic_suspend_64(int th_gtid,
|
||||
kmp_atomic_flag_64<C, S> *flag);
|
||||
extern void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag);
|
||||
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
|
||||
template <bool C, bool S>
|
||||
extern void __kmp_mwait_32(int th_gtid, kmp_flag_32<C, S> *flag);
|
||||
template <bool C, bool S>
|
||||
extern void __kmp_mwait_64(int th_gtid, kmp_flag_64<C, S> *flag);
|
||||
template <bool C, bool S>
|
||||
extern void __kmp_atomic_mwait_64(int th_gtid, kmp_atomic_flag_64<C, S> *flag);
|
||||
extern void __kmp_mwait_oncore(int th_gtid, kmp_flag_oncore *flag);
|
||||
#endif
|
||||
template <bool C, bool S>
|
||||
extern void __kmp_resume_32(int target_gtid, kmp_flag_32<C, S> *flag);
|
||||
template <bool C, bool S>
|
||||
extern void __kmp_resume_64(int target_gtid, kmp_flag_64<C, S> *flag);
|
||||
template <bool C, bool S>
|
||||
extern void __kmp_atomic_resume_64(int target_gtid,
|
||||
kmp_atomic_flag_64<C, S> *flag);
|
||||
extern void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag);
|
||||
|
||||
template <bool C, bool S>
|
||||
|
@ -4181,14 +4156,6 @@ int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid,
|
|||
void *itt_sync_obj,
|
||||
#endif /* USE_ITT_BUILD */
|
||||
kmp_int32 is_constrained);
|
||||
template <bool C, bool S>
|
||||
int __kmp_atomic_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid,
|
||||
kmp_atomic_flag_64<C, S> *flag,
|
||||
int final_spin, int *thread_finished,
|
||||
#if USE_ITT_BUILD
|
||||
void *itt_sync_obj,
|
||||
#endif /* USE_ITT_BUILD */
|
||||
kmp_int32 is_constrained);
|
||||
int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid,
|
||||
kmp_flag_oncore *flag, int final_spin,
|
||||
int *thread_finished,
|
||||
|
|
|
@ -732,7 +732,7 @@ static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs,
|
|||
|
||||
#define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \
|
||||
__kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
|
||||
(*lhs) = (TYPE)((*lhs)OP rhs); \
|
||||
(*lhs) = (TYPE)((*lhs)OP((TYPE)rhs)); \
|
||||
__kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
@ -791,14 +791,14 @@ static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs,
|
|||
{ \
|
||||
TYPE old_value, new_value; \
|
||||
old_value = *(TYPE volatile *)lhs; \
|
||||
new_value = (TYPE)(old_value OP rhs); \
|
||||
new_value = (TYPE)(old_value OP((TYPE)rhs)); \
|
||||
while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
|
||||
(kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
|
||||
*VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
|
||||
KMP_DO_PAUSE; \
|
||||
\
|
||||
old_value = *(TYPE volatile *)lhs; \
|
||||
new_value = (TYPE)(old_value OP rhs); \
|
||||
new_value = (TYPE)(old_value OP((TYPE)rhs)); \
|
||||
} \
|
||||
}
|
||||
|
||||
|
|
|
@ -10,14 +10,12 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "kmp.h"
|
||||
#include "kmp_wait_release.h"
|
||||
#include "kmp_barrier.h"
|
||||
#include "kmp_itt.h"
|
||||
#include "kmp_os.h"
|
||||
#include "kmp_stats.h"
|
||||
#include "ompt-specific.h"
|
||||
// for distributed barrier
|
||||
#include "kmp_affinity.h"
|
||||
|
||||
#if KMP_MIC
|
||||
#include <immintrin.h>
|
||||
|
@ -42,517 +40,6 @@
|
|||
void __kmp_print_structure(void); // Forward declaration
|
||||
|
||||
// ---------------------------- Barrier Algorithms ----------------------------
|
||||
// Distributed barrier
|
||||
|
||||
// Compute how many threads to have polling each cache-line.
|
||||
// We want to limit the number of writes to IDEAL_GO_RESOLUTION.
|
||||
void distributedBarrier::computeVarsForN(size_t n) {
|
||||
int nsockets = 1;
|
||||
if (__kmp_topology) {
|
||||
int socket_level = __kmp_topology->get_level(KMP_HW_SOCKET);
|
||||
int core_level = __kmp_topology->get_level(KMP_HW_CORE);
|
||||
int ncores_per_socket =
|
||||
__kmp_topology->calculate_ratio(core_level, socket_level);
|
||||
nsockets = __kmp_topology->get_count(socket_level);
|
||||
|
||||
if (nsockets <= 0)
|
||||
nsockets = 1;
|
||||
if (ncores_per_socket <= 0)
|
||||
ncores_per_socket = 1;
|
||||
|
||||
threads_per_go = ncores_per_socket >> 1;
|
||||
if (!fix_threads_per_go) {
|
||||
// Minimize num_gos
|
||||
if (threads_per_go > 4) {
|
||||
if (KMP_OPTIMIZE_FOR_REDUCTIONS) {
|
||||
threads_per_go = threads_per_go >> 1;
|
||||
}
|
||||
if (threads_per_go > 4 && nsockets == 1)
|
||||
threads_per_go = threads_per_go >> 1;
|
||||
}
|
||||
}
|
||||
if (threads_per_go == 0)
|
||||
threads_per_go = 1;
|
||||
fix_threads_per_go = true;
|
||||
num_gos = n / threads_per_go;
|
||||
if (n % threads_per_go)
|
||||
num_gos++;
|
||||
if (nsockets == 1 || num_gos == 1)
|
||||
num_groups = 1;
|
||||
else {
|
||||
num_groups = num_gos / nsockets;
|
||||
if (num_gos % nsockets)
|
||||
num_groups++;
|
||||
}
|
||||
if (num_groups <= 0)
|
||||
num_groups = 1;
|
||||
gos_per_group = num_gos / num_groups;
|
||||
if (num_gos % num_groups)
|
||||
gos_per_group++;
|
||||
threads_per_group = threads_per_go * gos_per_group;
|
||||
} else {
|
||||
num_gos = n / threads_per_go;
|
||||
if (n % threads_per_go)
|
||||
num_gos++;
|
||||
if (num_gos == 1)
|
||||
num_groups = 1;
|
||||
else {
|
||||
num_groups = num_gos / 2;
|
||||
if (num_gos % 2)
|
||||
num_groups++;
|
||||
}
|
||||
gos_per_group = num_gos / num_groups;
|
||||
if (num_gos % num_groups)
|
||||
gos_per_group++;
|
||||
threads_per_group = threads_per_go * gos_per_group;
|
||||
}
|
||||
}
|
||||
|
||||
void distributedBarrier::computeGo(size_t n) {
|
||||
// Minimize num_gos
|
||||
for (num_gos = 1;; num_gos++)
|
||||
if (IDEAL_CONTENTION * num_gos >= n)
|
||||
break;
|
||||
threads_per_go = n / num_gos;
|
||||
if (n % num_gos)
|
||||
threads_per_go++;
|
||||
while (num_gos > MAX_GOS) {
|
||||
threads_per_go++;
|
||||
num_gos = n / threads_per_go;
|
||||
if (n % threads_per_go)
|
||||
num_gos++;
|
||||
}
|
||||
computeVarsForN(n);
|
||||
}
|
||||
|
||||
// This function is to resize the barrier arrays when the new number of threads
|
||||
// exceeds max_threads, which is the current size of all the arrays
|
||||
void distributedBarrier::resize(size_t nthr) {
|
||||
KMP_DEBUG_ASSERT(nthr > max_threads);
|
||||
|
||||
// expand to requested size * 2
|
||||
max_threads = nthr * 2;
|
||||
|
||||
// allocate arrays to new max threads
|
||||
for (int i = 0; i < MAX_ITERS; ++i) {
|
||||
if (flags[i])
|
||||
flags[i] = (flags_s *)KMP_INTERNAL_REALLOC(flags[i],
|
||||
max_threads * sizeof(flags_s));
|
||||
else
|
||||
flags[i] = (flags_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(flags_s));
|
||||
}
|
||||
|
||||
if (go)
|
||||
go = (go_s *)KMP_INTERNAL_REALLOC(go, max_threads * sizeof(go_s));
|
||||
else
|
||||
go = (go_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(go_s));
|
||||
|
||||
if (iter)
|
||||
iter = (iter_s *)KMP_INTERNAL_REALLOC(iter, max_threads * sizeof(iter_s));
|
||||
else
|
||||
iter = (iter_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(iter_s));
|
||||
|
||||
if (sleep)
|
||||
sleep =
|
||||
(sleep_s *)KMP_INTERNAL_REALLOC(sleep, max_threads * sizeof(sleep_s));
|
||||
else
|
||||
sleep = (sleep_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(sleep_s));
|
||||
}
|
||||
|
||||
// This function is to set all the go flags that threads might be waiting
|
||||
// on, and when blocktime is not infinite, it should be followed by a wake-up
|
||||
// call to each thread
|
||||
kmp_uint64 distributedBarrier::go_release() {
|
||||
kmp_uint64 next_go = iter[0].iter + distributedBarrier::MAX_ITERS;
|
||||
for (size_t j = 0; j < num_gos; j++) {
|
||||
go[j].go.store(next_go);
|
||||
}
|
||||
return next_go;
|
||||
}
|
||||
|
||||
void distributedBarrier::go_reset() {
|
||||
for (size_t j = 0; j < max_threads; ++j) {
|
||||
for (size_t i = 0; i < distributedBarrier::MAX_ITERS; ++i) {
|
||||
flags[i][j].stillNeed = 1;
|
||||
}
|
||||
go[j].go.store(0);
|
||||
iter[j].iter = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// This function inits/re-inits the distributed barrier for a particular number
|
||||
// of threads. If a resize of arrays is needed, it calls the resize function.
|
||||
void distributedBarrier::init(size_t nthr) {
|
||||
size_t old_max = max_threads;
|
||||
if (nthr > max_threads) { // need more space in arrays
|
||||
resize(nthr);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < max_threads; i++) {
|
||||
for (size_t j = 0; j < distributedBarrier::MAX_ITERS; j++) {
|
||||
flags[j][i].stillNeed = 1;
|
||||
}
|
||||
go[i].go.store(0);
|
||||
iter[i].iter = 0;
|
||||
if (i >= old_max)
|
||||
sleep[i].sleep = false;
|
||||
}
|
||||
|
||||
// Recalculate num_gos, etc. based on new nthr
|
||||
computeVarsForN(nthr);
|
||||
|
||||
num_threads = nthr;
|
||||
|
||||
if (team_icvs == NULL)
|
||||
team_icvs = __kmp_allocate(sizeof(kmp_internal_control_t));
|
||||
}
|
||||
|
||||
// This function is used only when KMP_BLOCKTIME is not infinite.
|
||||
// static
|
||||
void __kmp_dist_barrier_wakeup(enum barrier_type bt, kmp_team_t *team,
|
||||
size_t start, size_t stop, size_t inc,
|
||||
size_t tid) {
|
||||
KMP_DEBUG_ASSERT(__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME);
|
||||
if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
|
||||
return;
|
||||
|
||||
kmp_info_t **other_threads = team->t.t_threads;
|
||||
for (size_t thr = start; thr < stop; thr += inc) {
|
||||
KMP_DEBUG_ASSERT(other_threads[thr]);
|
||||
int gtid = other_threads[thr]->th.th_info.ds.ds_gtid;
|
||||
// Wake up worker regardless of if it appears to be sleeping or not
|
||||
__kmp_atomic_resume_64(gtid, (kmp_atomic_flag_64<> *)NULL);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
__kmp_dist_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
|
||||
int tid, void (*reduce)(void *, void *)
|
||||
USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
|
||||
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_dist_gather);
|
||||
kmp_team_t *team;
|
||||
distributedBarrier *b;
|
||||
kmp_info_t **other_threads;
|
||||
kmp_uint64 my_current_iter, my_next_iter;
|
||||
kmp_uint32 nproc;
|
||||
bool group_leader;
|
||||
|
||||
team = this_thr->th.th_team;
|
||||
nproc = this_thr->th.th_team_nproc;
|
||||
other_threads = team->t.t_threads;
|
||||
b = team->t.b;
|
||||
my_current_iter = b->iter[tid].iter;
|
||||
my_next_iter = (my_current_iter + 1) % distributedBarrier::MAX_ITERS;
|
||||
group_leader = ((tid % b->threads_per_group) == 0);
|
||||
|
||||
KA_TRACE(20,
|
||||
("__kmp_dist_barrier_gather: T#%d(%d:%d) enter; barrier type %d\n",
|
||||
gtid, team->t.t_id, tid, bt));
|
||||
|
||||
#if USE_ITT_BUILD && USE_ITT_NOTIFY
|
||||
// Barrier imbalance - save arrive time to the thread
|
||||
if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
|
||||
this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
|
||||
__itt_get_timestamp();
|
||||
}
|
||||
#endif
|
||||
|
||||
if (group_leader) {
|
||||
// Start from the thread after the group leader
|
||||
size_t group_start = tid + 1;
|
||||
size_t group_end = tid + b->threads_per_group;
|
||||
size_t threads_pending = 0;
|
||||
|
||||
if (group_end > nproc)
|
||||
group_end = nproc;
|
||||
do { // wait for threads in my group
|
||||
threads_pending = 0;
|
||||
// Check all the flags every time to avoid branch misspredict
|
||||
for (size_t thr = group_start; thr < group_end; thr++) {
|
||||
// Each thread uses a different cache line
|
||||
threads_pending += b->flags[my_current_iter][thr].stillNeed;
|
||||
}
|
||||
// Execute tasks here
|
||||
if (__kmp_tasking_mode != tskm_immediate_exec) {
|
||||
kmp_task_team_t *task_team = this_thr->th.th_task_team;
|
||||
if (task_team != NULL) {
|
||||
if (TCR_SYNC_4(task_team->tt.tt_active)) {
|
||||
if (KMP_TASKING_ENABLED(task_team)) {
|
||||
int tasks_completed = FALSE;
|
||||
__kmp_atomic_execute_tasks_64(
|
||||
this_thr, gtid, (kmp_atomic_flag_64<> *)NULL, FALSE,
|
||||
&tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
|
||||
} else
|
||||
this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
|
||||
}
|
||||
} else {
|
||||
this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
|
||||
} // if
|
||||
}
|
||||
if (TCR_4(__kmp_global.g.g_done)) {
|
||||
if (__kmp_global.g.g_abort)
|
||||
__kmp_abort_thread();
|
||||
break;
|
||||
} else if (__kmp_tasking_mode != tskm_immediate_exec &&
|
||||
this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
|
||||
this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
|
||||
}
|
||||
} while (threads_pending > 0);
|
||||
|
||||
if (reduce) { // Perform reduction if needed
|
||||
OMPT_REDUCTION_DECL(this_thr, gtid);
|
||||
OMPT_REDUCTION_BEGIN;
|
||||
// Group leader reduces all threads in group
|
||||
for (size_t thr = group_start; thr < group_end; thr++) {
|
||||
(*reduce)(this_thr->th.th_local.reduce_data,
|
||||
other_threads[thr]->th.th_local.reduce_data);
|
||||
}
|
||||
OMPT_REDUCTION_END;
|
||||
}
|
||||
|
||||
// Set flag for next iteration
|
||||
b->flags[my_next_iter][tid].stillNeed = 1;
|
||||
// Each thread uses a different cache line; resets stillNeed to 0 to
|
||||
// indicate it has reached the barrier
|
||||
b->flags[my_current_iter][tid].stillNeed = 0;
|
||||
|
||||
do { // wait for all group leaders
|
||||
threads_pending = 0;
|
||||
for (size_t thr = 0; thr < nproc; thr += b->threads_per_group) {
|
||||
threads_pending += b->flags[my_current_iter][thr].stillNeed;
|
||||
}
|
||||
// Execute tasks here
|
||||
if (__kmp_tasking_mode != tskm_immediate_exec) {
|
||||
kmp_task_team_t *task_team = this_thr->th.th_task_team;
|
||||
if (task_team != NULL) {
|
||||
if (TCR_SYNC_4(task_team->tt.tt_active)) {
|
||||
if (KMP_TASKING_ENABLED(task_team)) {
|
||||
int tasks_completed = FALSE;
|
||||
__kmp_atomic_execute_tasks_64(
|
||||
this_thr, gtid, (kmp_atomic_flag_64<> *)NULL, FALSE,
|
||||
&tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
|
||||
} else
|
||||
this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
|
||||
}
|
||||
} else {
|
||||
this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
|
||||
} // if
|
||||
}
|
||||
if (TCR_4(__kmp_global.g.g_done)) {
|
||||
if (__kmp_global.g.g_abort)
|
||||
__kmp_abort_thread();
|
||||
break;
|
||||
} else if (__kmp_tasking_mode != tskm_immediate_exec &&
|
||||
this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
|
||||
this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
|
||||
}
|
||||
} while (threads_pending > 0);
|
||||
|
||||
if (reduce) { // Perform reduction if needed
|
||||
if (KMP_MASTER_TID(tid)) { // Master reduces over group leaders
|
||||
OMPT_REDUCTION_DECL(this_thr, gtid);
|
||||
OMPT_REDUCTION_BEGIN;
|
||||
for (size_t thr = b->threads_per_group; thr < nproc;
|
||||
thr += b->threads_per_group) {
|
||||
(*reduce)(this_thr->th.th_local.reduce_data,
|
||||
other_threads[thr]->th.th_local.reduce_data);
|
||||
}
|
||||
OMPT_REDUCTION_END;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Set flag for next iteration
|
||||
b->flags[my_next_iter][tid].stillNeed = 1;
|
||||
// Each thread uses a different cache line; resets stillNeed to 0 to
|
||||
// indicate it has reached the barrier
|
||||
b->flags[my_current_iter][tid].stillNeed = 0;
|
||||
}
|
||||
|
||||
KMP_MFENCE();
|
||||
|
||||
KA_TRACE(20,
|
||||
("__kmp_dist_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
|
||||
gtid, team->t.t_id, tid, bt));
|
||||
}
|
||||
|
||||
static void __kmp_dist_barrier_release(
|
||||
enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
|
||||
int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
|
||||
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_dist_release);
|
||||
kmp_team_t *team;
|
||||
distributedBarrier *b;
|
||||
kmp_bstate_t *thr_bar;
|
||||
kmp_uint64 my_current_iter, next_go;
|
||||
size_t my_go_index;
|
||||
bool group_leader;
|
||||
|
||||
KA_TRACE(20, ("__kmp_dist_barrier_release: T#%d(%d) enter; barrier type %d\n",
|
||||
gtid, tid, bt));
|
||||
|
||||
thr_bar = &this_thr->th.th_bar[bt].bb;
|
||||
|
||||
if (!KMP_MASTER_TID(tid)) {
|
||||
// workers and non-master group leaders need to check their presence in team
|
||||
do {
|
||||
if (this_thr->th.th_used_in_team.load() != 1 &&
|
||||
this_thr->th.th_used_in_team.load() != 3) {
|
||||
// Thread is not in use in a team. Wait on location in tid's thread
|
||||
// struct. The 0 value tells anyone looking that this thread is spinning
|
||||
// or sleeping until this location becomes 3 again; 3 is the transition
|
||||
// state to get to 1 which is waiting on go and being in the team
|
||||
kmp_flag_32<false, false> my_flag(&(this_thr->th.th_used_in_team), 3);
|
||||
if (KMP_COMPARE_AND_STORE_ACQ32(&(this_thr->th.th_used_in_team), 2,
|
||||
0) ||
|
||||
this_thr->th.th_used_in_team.load() == 0) {
|
||||
my_flag.wait(this_thr, true, itt_sync_obj);
|
||||
}
|
||||
#if USE_ITT_BUILD && USE_ITT_NOTIFY
|
||||
if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
|
||||
// In fork barrier where we could not get the object reliably
|
||||
itt_sync_obj =
|
||||
__kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
|
||||
// Cancel wait on previous parallel region...
|
||||
__kmp_itt_task_starting(itt_sync_obj);
|
||||
|
||||
if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
|
||||
return;
|
||||
|
||||
itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
|
||||
if (itt_sync_obj != NULL)
|
||||
// Call prepare as early as possible for "new" barrier
|
||||
__kmp_itt_task_finished(itt_sync_obj);
|
||||
} else
|
||||
#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
|
||||
if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
|
||||
return;
|
||||
}
|
||||
if (this_thr->th.th_used_in_team.load() != 1 &&
|
||||
this_thr->th.th_used_in_team.load() != 3) // spurious wake-up?
|
||||
continue;
|
||||
if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
|
||||
return;
|
||||
|
||||
// At this point, the thread thinks it is in use in a team, or in
|
||||
// transition to be used in a team, but it might have reached this barrier
|
||||
// before it was marked unused by the team. Unused threads are awoken and
|
||||
// shifted to wait on local thread struct elsewhere. It also might reach
|
||||
// this point by being picked up for use by a different team. Either way,
|
||||
// we need to update the tid.
|
||||
tid = __kmp_tid_from_gtid(gtid);
|
||||
team = this_thr->th.th_team;
|
||||
KMP_DEBUG_ASSERT(tid >= 0);
|
||||
KMP_DEBUG_ASSERT(team);
|
||||
b = team->t.b;
|
||||
my_current_iter = b->iter[tid].iter;
|
||||
next_go = my_current_iter + distributedBarrier::MAX_ITERS;
|
||||
my_go_index = tid / b->threads_per_go;
|
||||
if (this_thr->th.th_used_in_team.load() == 3) {
|
||||
KMP_COMPARE_AND_STORE_ACQ32(&(this_thr->th.th_used_in_team), 3, 1);
|
||||
}
|
||||
// Check if go flag is set
|
||||
if (b->go[my_go_index].go.load() != next_go) {
|
||||
// Wait on go flag on team
|
||||
kmp_atomic_flag_64<false, true> my_flag(
|
||||
&(b->go[my_go_index].go), next_go, &(b->sleep[tid].sleep));
|
||||
my_flag.wait(this_thr, true, itt_sync_obj);
|
||||
KMP_DEBUG_ASSERT(my_current_iter == b->iter[tid].iter ||
|
||||
b->iter[tid].iter == 0);
|
||||
KMP_DEBUG_ASSERT(b->sleep[tid].sleep == false);
|
||||
}
|
||||
|
||||
if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
|
||||
return;
|
||||
// At this point, the thread's go location was set. This means the primary
|
||||
// thread is safely in the barrier, and so this thread's data is
|
||||
// up-to-date, but we should check again that this thread is really in
|
||||
// use in the team, as it could have been woken up for the purpose of
|
||||
// changing team size, or reaping threads at shutdown.
|
||||
if (this_thr->th.th_used_in_team.load() == 1)
|
||||
break;
|
||||
} while (1);
|
||||
|
||||
if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
|
||||
return;
|
||||
|
||||
group_leader = ((tid % b->threads_per_group) == 0);
|
||||
if (group_leader) {
|
||||
// Tell all the threads in my group they can go!
|
||||
for (size_t go_idx = my_go_index + 1;
|
||||
go_idx < my_go_index + b->gos_per_group; go_idx++) {
|
||||
b->go[go_idx].go.store(next_go);
|
||||
}
|
||||
// Fence added so that workers can see changes to go. sfence inadequate.
|
||||
KMP_MFENCE();
|
||||
}
|
||||
|
||||
#if KMP_BARRIER_ICV_PUSH
|
||||
if (propagate_icvs) { // copy ICVs to final dest
|
||||
__kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team,
|
||||
tid, FALSE);
|
||||
copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
|
||||
(kmp_internal_control_t *)team->t.b->team_icvs);
|
||||
copy_icvs(&thr_bar->th_fixed_icvs,
|
||||
&team->t.t_implicit_task_taskdata[tid].td_icvs);
|
||||
}
|
||||
#endif
|
||||
if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME && group_leader) {
|
||||
// This thread is now awake and participating in the barrier;
|
||||
// wake up the other threads in the group
|
||||
size_t nproc = this_thr->th.th_team_nproc;
|
||||
size_t group_end = tid + b->threads_per_group;
|
||||
if (nproc < group_end)
|
||||
group_end = nproc;
|
||||
__kmp_dist_barrier_wakeup(bt, team, tid + 1, group_end, 1, tid);
|
||||
}
|
||||
} else { // Primary thread
|
||||
team = this_thr->th.th_team;
|
||||
b = team->t.b;
|
||||
my_current_iter = b->iter[tid].iter;
|
||||
next_go = my_current_iter + distributedBarrier::MAX_ITERS;
|
||||
#if KMP_BARRIER_ICV_PUSH
|
||||
if (propagate_icvs) {
|
||||
// primary thread has ICVs in final destination; copy
|
||||
copy_icvs(&thr_bar->th_fixed_icvs,
|
||||
&team->t.t_implicit_task_taskdata[tid].td_icvs);
|
||||
}
|
||||
#endif
|
||||
// Tell all the group leaders they can go!
|
||||
for (size_t go_idx = 0; go_idx < b->num_gos; go_idx += b->gos_per_group) {
|
||||
b->go[go_idx].go.store(next_go);
|
||||
}
|
||||
|
||||
if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
|
||||
// Wake-up the group leaders
|
||||
size_t nproc = this_thr->th.th_team_nproc;
|
||||
__kmp_dist_barrier_wakeup(bt, team, tid + b->threads_per_group, nproc,
|
||||
b->threads_per_group, tid);
|
||||
}
|
||||
|
||||
// Tell all the threads in my group they can go!
|
||||
for (size_t go_idx = 1; go_idx < b->gos_per_group; go_idx++) {
|
||||
b->go[go_idx].go.store(next_go);
|
||||
}
|
||||
|
||||
// Fence added so that workers can see changes to go. sfence inadequate.
|
||||
KMP_MFENCE();
|
||||
|
||||
if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
|
||||
// Wake-up the other threads in my group
|
||||
size_t nproc = this_thr->th.th_team_nproc;
|
||||
size_t group_end = tid + b->threads_per_group;
|
||||
if (nproc < group_end)
|
||||
group_end = nproc;
|
||||
__kmp_dist_barrier_wakeup(bt, team, tid + 1, group_end, 1, tid);
|
||||
}
|
||||
}
|
||||
// Update to next iteration
|
||||
KMP_ASSERT(my_current_iter == b->iter[tid].iter);
|
||||
b->iter[tid].iter = (b->iter[tid].iter + 1) % distributedBarrier::MAX_ITERS;
|
||||
|
||||
KA_TRACE(
|
||||
20, ("__kmp_dist_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
|
||||
gtid, team->t.t_id, tid, bt));
|
||||
}
|
||||
|
||||
// Linear Barrier
|
||||
template <bool cancellable = false>
|
||||
|
@ -1907,11 +1394,6 @@ static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split,
|
|||
bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj));
|
||||
} else {
|
||||
switch (__kmp_barrier_gather_pattern[bt]) {
|
||||
case bp_dist_bar: {
|
||||
__kmp_dist_barrier_gather(bt, this_thr, gtid, tid,
|
||||
reduce USE_ITT_BUILD_ARG(itt_sync_obj));
|
||||
break;
|
||||
}
|
||||
case bp_hyper_bar: {
|
||||
// don't set branch bits to 0; use linear
|
||||
KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]);
|
||||
|
@ -2025,12 +1507,6 @@ static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split,
|
|||
bt, this_thr, gtid, tid, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
|
||||
} else {
|
||||
switch (__kmp_barrier_release_pattern[bt]) {
|
||||
case bp_dist_bar: {
|
||||
KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
|
||||
__kmp_dist_barrier_release(bt, this_thr, gtid, tid,
|
||||
FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
|
||||
break;
|
||||
}
|
||||
case bp_hyper_bar: {
|
||||
KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
|
||||
__kmp_hyper_barrier_release(bt, this_thr, gtid, tid,
|
||||
|
@ -2162,11 +1638,6 @@ void __kmp_end_split_barrier(enum barrier_type bt, int gtid) {
|
|||
if (!team->t.t_serialized) {
|
||||
if (KMP_MASTER_GTID(gtid)) {
|
||||
switch (__kmp_barrier_release_pattern[bt]) {
|
||||
case bp_dist_bar: {
|
||||
__kmp_dist_barrier_release(bt, this_thr, gtid, tid,
|
||||
FALSE USE_ITT_BUILD_ARG(NULL));
|
||||
break;
|
||||
}
|
||||
case bp_hyper_bar: {
|
||||
KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
|
||||
__kmp_hyper_barrier_release(bt, this_thr, gtid, tid,
|
||||
|
@ -2278,8 +1749,8 @@ void __kmp_join_barrier(int gtid) {
|
|||
|
||||
if (__kmp_tasking_mode == tskm_extra_barrier) {
|
||||
__kmp_tasking_barrier(team, this_thr, gtid);
|
||||
KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) past tasking barrier\n",
|
||||
gtid, team_id, tid));
|
||||
KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) past taking barrier\n", gtid,
|
||||
team_id, tid));
|
||||
}
|
||||
#ifdef KMP_DEBUG
|
||||
if (__kmp_tasking_mode != tskm_immediate_exec) {
|
||||
|
@ -2288,9 +1759,8 @@ void __kmp_join_barrier(int gtid) {
|
|||
__kmp_gtid_from_thread(this_thr), team_id,
|
||||
team->t.t_task_team[this_thr->th.th_task_state],
|
||||
this_thr->th.th_task_team));
|
||||
if (this_thr->th.th_task_team)
|
||||
KMP_DEBUG_ASSERT(this_thr->th.th_task_team ==
|
||||
team->t.t_task_team[this_thr->th.th_task_state]);
|
||||
KMP_DEBUG_ASSERT(this_thr->th.th_task_team ==
|
||||
team->t.t_task_team[this_thr->th.th_task_state]);
|
||||
}
|
||||
#endif /* KMP_DEBUG */
|
||||
|
||||
|
@ -2316,11 +1786,6 @@ void __kmp_join_barrier(int gtid) {
|
|||
#endif /* USE_ITT_BUILD */
|
||||
|
||||
switch (__kmp_barrier_gather_pattern[bs_forkjoin_barrier]) {
|
||||
case bp_dist_bar: {
|
||||
__kmp_dist_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
|
||||
NULL USE_ITT_BUILD_ARG(itt_sync_obj));
|
||||
break;
|
||||
}
|
||||
case bp_hyper_bar: {
|
||||
KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
|
||||
__kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
|
||||
|
@ -2366,7 +1831,8 @@ void __kmp_join_barrier(int gtid) {
|
|||
team_thread->th.th_stats->setIdleFlag();
|
||||
if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME &&
|
||||
team_thread->th.th_sleep_loc != NULL)
|
||||
__kmp_null_resume_wrapper(team_thread);
|
||||
__kmp_null_resume_wrapper(__kmp_gtid_from_thread(team_thread),
|
||||
team_thread->th.th_sleep_loc);
|
||||
}
|
||||
#endif
|
||||
#if USE_ITT_BUILD
|
||||
|
@ -2513,11 +1979,6 @@ void __kmp_fork_barrier(int gtid, int tid) {
|
|||
} // primary thread
|
||||
|
||||
switch (__kmp_barrier_release_pattern[bs_forkjoin_barrier]) {
|
||||
case bp_dist_bar: {
|
||||
__kmp_dist_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
|
||||
TRUE USE_ITT_BUILD_ARG(NULL));
|
||||
break;
|
||||
}
|
||||
case bp_hyper_bar: {
|
||||
KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
|
||||
__kmp_hyper_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
|
||||
|
|
|
@ -1,109 +0,0 @@
|
|||
/*
|
||||
* kmp_barrier.h
|
||||
*/
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef KMP_BARRIER_H
|
||||
#define KMP_BARRIER_H
|
||||
|
||||
#include "kmp.h"
|
||||
|
||||
// Use four cache lines: MLC tends to prefetch the next or previous cache line
|
||||
// creating a possible fake conflict between cores, so this is the only way to
|
||||
// guarantee that no such prefetch can happen.
|
||||
#ifndef KMP_FOURLINE_ALIGN_CACHE
|
||||
#define KMP_FOURLINE_ALIGN_CACHE KMP_ALIGN(4 * CACHE_LINE)
|
||||
#endif
|
||||
|
||||
#define KMP_OPTIMIZE_FOR_REDUCTIONS 0
|
||||
|
||||
class distributedBarrier {
|
||||
struct flags_s {
|
||||
kmp_uint32 volatile KMP_FOURLINE_ALIGN_CACHE stillNeed;
|
||||
};
|
||||
|
||||
struct go_s {
|
||||
std::atomic<kmp_uint64> KMP_FOURLINE_ALIGN_CACHE go;
|
||||
};
|
||||
|
||||
struct iter_s {
|
||||
kmp_uint64 volatile KMP_FOURLINE_ALIGN_CACHE iter;
|
||||
};
|
||||
|
||||
struct sleep_s {
|
||||
std::atomic<bool> KMP_FOURLINE_ALIGN_CACHE sleep;
|
||||
};
|
||||
|
||||
void init(size_t nthr);
|
||||
void resize(size_t nthr);
|
||||
void computeGo(size_t n);
|
||||
void computeVarsForN(size_t n);
|
||||
|
||||
public:
|
||||
enum {
|
||||
MAX_ITERS = 3,
|
||||
MAX_GOS = 8,
|
||||
IDEAL_GOS = 4,
|
||||
IDEAL_CONTENTION = 16,
|
||||
};
|
||||
|
||||
flags_s *flags[MAX_ITERS];
|
||||
go_s *go;
|
||||
iter_s *iter;
|
||||
sleep_s *sleep;
|
||||
|
||||
size_t KMP_ALIGN_CACHE num_threads; // number of threads in barrier
|
||||
size_t KMP_ALIGN_CACHE max_threads; // size of arrays in data structure
|
||||
// number of go signals each requiring one write per iteration
|
||||
size_t KMP_ALIGN_CACHE num_gos;
|
||||
// number of groups of gos
|
||||
size_t KMP_ALIGN_CACHE num_groups;
|
||||
// threads per go signal
|
||||
size_t KMP_ALIGN_CACHE threads_per_go;
|
||||
bool KMP_ALIGN_CACHE fix_threads_per_go;
|
||||
// threads per group
|
||||
size_t KMP_ALIGN_CACHE threads_per_group;
|
||||
// number of go signals in a group
|
||||
size_t KMP_ALIGN_CACHE gos_per_group;
|
||||
void *team_icvs;
|
||||
|
||||
distributedBarrier() = delete;
|
||||
~distributedBarrier() = delete;
|
||||
|
||||
// Used instead of constructor to create aligned data
|
||||
static distributedBarrier *allocate(int nThreads) {
|
||||
distributedBarrier *d = (distributedBarrier *)_mm_malloc(
|
||||
sizeof(distributedBarrier), 4 * CACHE_LINE);
|
||||
d->num_threads = 0;
|
||||
d->max_threads = 0;
|
||||
for (int i = 0; i < MAX_ITERS; ++i)
|
||||
d->flags[i] = NULL;
|
||||
d->go = NULL;
|
||||
d->iter = NULL;
|
||||
d->sleep = NULL;
|
||||
d->team_icvs = NULL;
|
||||
d->fix_threads_per_go = false;
|
||||
// calculate gos and groups ONCE on base size
|
||||
d->computeGo(nThreads);
|
||||
d->init(nThreads);
|
||||
return d;
|
||||
}
|
||||
|
||||
static void deallocate(distributedBarrier *db) { _mm_free(db); }
|
||||
|
||||
void update_num_threads(size_t nthr) { init(nthr); }
|
||||
|
||||
bool need_resize(size_t new_nthr) { return (new_nthr > max_threads); }
|
||||
size_t get_num_threads() { return num_threads; }
|
||||
kmp_uint64 go_release();
|
||||
void go_reset();
|
||||
};
|
||||
|
||||
#endif // KMP_BARRIER_H
|
|
@ -110,8 +110,8 @@ char const *__kmp_barrier_type_name[bs_last_barrier] = {"plain", "forkjoin"
|
|||
"reduction"
|
||||
#endif // KMP_FAST_REDUCTION_BARRIER
|
||||
};
|
||||
char const *__kmp_barrier_pattern_name[bp_last_bar] = {
|
||||
"linear", "tree", "hyper", "hierarchical", "dist"};
|
||||
char const *__kmp_barrier_pattern_name[bp_last_bar] = {"linear", "tree",
|
||||
"hyper", "hierarchical"};
|
||||
|
||||
int __kmp_allThreadsSpecified = 0;
|
||||
size_t __kmp_align_alloc = CACHE_LINE;
|
||||
|
|
|
@ -1019,27 +1019,6 @@ extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v);
|
|||
#define KMP_MB() /* nothing to do */
|
||||
#endif
|
||||
|
||||
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
|
||||
#if KMP_COMPILER_ICC
|
||||
#define KMP_MFENCE_() _mm_mfence()
|
||||
#define KMP_SFENCE_() _mm_sfence()
|
||||
#elif KMP_COMPILER_MSVC
|
||||
#define KMP_MFENCE_() MemoryBarrier()
|
||||
#define KMP_SFENCE_() MemoryBarrier()
|
||||
#else
|
||||
#define KMP_MFENCE_() __sync_synchronize()
|
||||
#define KMP_SFENCE_() __sync_synchronize()
|
||||
#endif
|
||||
#define KMP_MFENCE() \
|
||||
if (UNLIKELY(!__kmp_cpuinfo.initialized)) { \
|
||||
__kmp_query_cpuid(&__kmp_cpuinfo); \
|
||||
} \
|
||||
if (__kmp_cpuinfo.sse2) { \
|
||||
KMP_MFENCE_(); \
|
||||
}
|
||||
#define KMP_SFENCE() KMP_SFENCE_()
|
||||
#endif
|
||||
|
||||
#ifndef KMP_IMB
|
||||
#define KMP_IMB() /* nothing to do */
|
||||
#endif
|
||||
|
|
|
@ -109,10 +109,6 @@ static int __kmp_unregister_root_other_thread(int gtid);
|
|||
static void __kmp_reap_thread(kmp_info_t *thread, int is_root);
|
||||
kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
|
||||
|
||||
void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads,
|
||||
int new_nthreads);
|
||||
void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads);
|
||||
|
||||
/* Calculate the identifier of the current thread */
|
||||
/* fast (and somewhat portable) way to get unique identifier of executing
|
||||
thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */
|
||||
|
@ -1210,7 +1206,7 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
|
|||
this_thr->th.th_team = serial_team;
|
||||
serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
|
||||
|
||||
KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,
|
||||
KF_TRACE(10, ("__kmpc_serialized_parallel: T#d curtask=%p\n", global_tid,
|
||||
this_thr->th.th_current_task));
|
||||
KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
|
||||
this_thr->th.th_current_task->td_flags.executing = 0;
|
||||
|
@ -1569,24 +1565,15 @@ int __kmp_fork_call(ident_t *loc, int gtid,
|
|||
|
||||
/* Change number of threads in the team if requested */
|
||||
if (master_set_numthreads) { // The parallel has num_threads clause
|
||||
if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
|
||||
if (master_set_numthreads < master_th->th.th_teams_size.nth) {
|
||||
// AC: only can reduce number of threads dynamically, can't increase
|
||||
kmp_info_t **other_threads = parent_team->t.t_threads;
|
||||
// NOTE: if using distributed barrier, we need to run this code block
|
||||
// even when the team size appears not to have changed from the max.
|
||||
int old_proc = master_th->th.th_teams_size.nth;
|
||||
if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
|
||||
bp_dist_bar) {
|
||||
__kmp_resize_dist_barrier(parent_team, old_proc,
|
||||
master_set_numthreads);
|
||||
__kmp_add_threads_to_team(parent_team, master_set_numthreads);
|
||||
}
|
||||
parent_team->t.t_nproc = master_set_numthreads;
|
||||
for (i = 0; i < master_set_numthreads; ++i) {
|
||||
other_threads[i]->th.th_team_nproc = master_set_numthreads;
|
||||
}
|
||||
// Keep extra threads hot in the team for possible next parallels
|
||||
}
|
||||
// Keep extra threads hot in the team for possible next parallels
|
||||
master_th->th.th_set_nproc = 0;
|
||||
}
|
||||
|
||||
|
@ -1650,9 +1637,6 @@ int __kmp_fork_call(ident_t *loc, int gtid,
|
|||
}
|
||||
#endif
|
||||
|
||||
// Need this to happen before we determine the number of threads, not while
|
||||
// we are allocating the team
|
||||
//__kmp_push_current_task_to_thread(master_th, parent_team, 0);
|
||||
int enter_teams = 0;
|
||||
if (parent_team->t.t_active_level >=
|
||||
master_th->th.th_current_task->td_icvs.max_active_levels) {
|
||||
|
@ -1660,10 +1644,13 @@ int __kmp_fork_call(ident_t *loc, int gtid,
|
|||
} else {
|
||||
enter_teams = ((ap == NULL && active_level == 0) ||
|
||||
(ap && teams_level > 0 && teams_level == level));
|
||||
nthreads = master_set_numthreads
|
||||
? master_set_numthreads
|
||||
// TODO: get nproc directly from current task
|
||||
: get__nproc_2(parent_team, master_tid);
|
||||
nthreads =
|
||||
master_set_numthreads
|
||||
? master_set_numthreads
|
||||
: get__nproc_2(
|
||||
parent_team,
|
||||
master_tid); // TODO: get nproc directly from current task
|
||||
|
||||
// Check if we need to take forkjoin lock? (no need for serialized
|
||||
// parallel out of teams construct). This code moved here from
|
||||
// __kmp_reserve_threads() to speedup nested serialized parallels.
|
||||
|
@ -1998,8 +1985,6 @@ int __kmp_fork_call(ident_t *loc, int gtid,
|
|||
#endif
|
||||
proc_bind, &new_icvs,
|
||||
argc USE_NESTED_HOT_ARG(master_th));
|
||||
if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
|
||||
copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);
|
||||
} else {
|
||||
/* allocate a new parallel team */
|
||||
KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
|
||||
|
@ -2010,9 +1995,6 @@ int __kmp_fork_call(ident_t *loc, int gtid,
|
|||
proc_bind,
|
||||
&master_th->th.th_current_task->td_icvs,
|
||||
argc USE_NESTED_HOT_ARG(master_th));
|
||||
if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
|
||||
copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,
|
||||
&master_th->th.th_current_task->td_icvs);
|
||||
}
|
||||
KF_TRACE(
|
||||
10, ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
|
||||
|
@ -2379,12 +2361,6 @@ void __kmp_join_call(ident_t *loc, int gtid
|
|||
parent_team->t.t_stack_id = NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (team->t.t_nproc > 1 &&
|
||||
__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
|
||||
team->t.b->update_num_threads(team->t.t_nproc);
|
||||
__kmp_add_threads_to_team(team, team->t.t_nproc);
|
||||
}
|
||||
}
|
||||
|
||||
KMP_MB();
|
||||
|
@ -2672,9 +2648,6 @@ void __kmp_set_num_threads(int new_nth, int gtid) {
|
|||
|
||||
__kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
|
||||
|
||||
if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
|
||||
__kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);
|
||||
}
|
||||
// Release the extra threads we don't need any more.
|
||||
for (f = new_nth; f < hot_team->t.t_nproc; f++) {
|
||||
KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
|
||||
|
@ -2694,11 +2667,6 @@ void __kmp_set_num_threads(int new_nth, int gtid) {
|
|||
}
|
||||
#endif
|
||||
|
||||
if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
|
||||
hot_team->t.b->update_num_threads(new_nth);
|
||||
__kmp_add_threads_to_team(hot_team, new_nth);
|
||||
}
|
||||
|
||||
__kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
|
||||
|
||||
// Update the t_nproc field in the threads that are still active.
|
||||
|
@ -4146,6 +4114,7 @@ static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
|
|||
this_thr->th.th_team_nproc = team->t.t_nproc;
|
||||
this_thr->th.th_team_master = master;
|
||||
this_thr->th.th_team_serialized = team->t.t_serialized;
|
||||
TCW_PTR(this_thr->th.th_sleep_loc, NULL);
|
||||
|
||||
KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
|
||||
|
||||
|
@ -4314,12 +4283,6 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
|
|||
new_thr->th.th_task_state_top = 0;
|
||||
new_thr->th.th_task_state_stack_sz = 4;
|
||||
|
||||
if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
|
||||
// Make sure pool thread has transitioned to waiting on own thread struct
|
||||
KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0);
|
||||
// Thread activated in __kmp_allocate_team when increasing team size
|
||||
}
|
||||
|
||||
#ifdef KMP_ADJUST_BLOCKTIME
|
||||
/* Adjust blocktime back to zero if necessary */
|
||||
/* Middle initialization might not have occurred yet */
|
||||
|
@ -4487,9 +4450,6 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
|
|||
balign[b].bb.use_oncore_barrier = 0;
|
||||
}
|
||||
|
||||
TCW_PTR(new_thr->th.th_sleep_loc, NULL);
|
||||
new_thr->th.th_sleep_loc_type = flag_unset;
|
||||
|
||||
new_thr->th.th_spin_here = FALSE;
|
||||
new_thr->th.th_next_waiting = 0;
|
||||
#if KMP_OS_UNIX
|
||||
|
@ -5069,13 +5029,6 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
|
|||
}
|
||||
#endif
|
||||
|
||||
if (team->t.t_nproc != new_nproc &&
|
||||
__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
|
||||
// Distributed barrier may need a resize
|
||||
int old_nthr = team->t.t_nproc;
|
||||
__kmp_resize_dist_barrier(team, old_nthr, new_nproc);
|
||||
}
|
||||
|
||||
// Has the number of threads changed?
|
||||
/* Let's assume the most common case is that the number of threads is
|
||||
unchanged, and put that case first. */
|
||||
|
@ -5125,11 +5078,6 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
|
|||
new_nproc));
|
||||
|
||||
team->t.t_size_changed = 1;
|
||||
if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
|
||||
// Barrier size already reduced earlier in this function
|
||||
// Activate team threads via th_used_in_team
|
||||
__kmp_add_threads_to_team(team, new_nproc);
|
||||
}
|
||||
#if KMP_NESTED_HOT_TEAMS
|
||||
if (__kmp_hot_teams_mode == 0) {
|
||||
// AC: saved number of threads should correspond to team's value in this
|
||||
|
@ -5206,7 +5154,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
|
|||
KA_TRACE(20,
|
||||
("__kmp_allocate_team: increasing hot team thread count to %d\n",
|
||||
new_nproc));
|
||||
int old_nproc = team->t.t_nproc; // save old value and use to update only
|
||||
|
||||
team->t.t_size_changed = 1;
|
||||
|
||||
#if KMP_NESTED_HOT_TEAMS
|
||||
|
@ -5233,9 +5181,10 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
|
|||
KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
|
||||
team->t.t_nproc = new_nproc; // just get reserved threads involved
|
||||
} else {
|
||||
// We may have some threads in reserve, but not enough;
|
||||
// get reserved threads involved if any.
|
||||
team->t.t_nproc = hot_teams[level].hot_team_nth;
|
||||
// we may have some threads in reserve, but not enough
|
||||
team->t.t_nproc =
|
||||
hot_teams[level]
|
||||
.hot_team_nth; // get reserved threads involved if any
|
||||
hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size
|
||||
#endif // KMP_NESTED_HOT_TEAMS
|
||||
if (team->t.t_max_nproc < new_nproc) {
|
||||
|
@ -5290,12 +5239,8 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
|
|||
#if KMP_NESTED_HOT_TEAMS
|
||||
} // end of check of t_nproc vs. new_nproc vs. hot_team_nth
|
||||
#endif // KMP_NESTED_HOT_TEAMS
|
||||
if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
|
||||
// Barrier size already increased earlier in this function
|
||||
// Activate team threads via th_used_in_team
|
||||
__kmp_add_threads_to_team(team, new_nproc);
|
||||
}
|
||||
/* make sure everyone is syncronized */
|
||||
int old_nproc = team->t.t_nproc; // save old value and use to update only
|
||||
// new threads below
|
||||
__kmp_initialize_team(team, new_nproc, new_icvs,
|
||||
root->r.r_uber_thread->th.th_ident);
|
||||
|
@ -5399,13 +5344,6 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
|
|||
/* take this team from the team pool */
|
||||
__kmp_team_pool = team->t.t_next_pool;
|
||||
|
||||
if (max_nproc > 1 &&
|
||||
__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
|
||||
if (!team->t.b) { // Allocate barrier structure
|
||||
team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
|
||||
}
|
||||
}
|
||||
|
||||
/* setup the team for fresh use */
|
||||
__kmp_initialize_team(team, new_nproc, new_icvs, NULL);
|
||||
|
||||
|
@ -5461,12 +5399,6 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
|
|||
|
||||
/* and set it up */
|
||||
team->t.t_max_nproc = max_nproc;
|
||||
if (max_nproc > 1 &&
|
||||
__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
|
||||
// Allocate barrier structure
|
||||
team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
|
||||
}
|
||||
|
||||
/* NOTE well, for some reason allocating one big buffer and dividing it up
|
||||
seems to really hurt performance a lot on the P4, so, let's not use this */
|
||||
__kmp_allocate_team_arrays(team, max_nproc);
|
||||
|
@ -5623,43 +5555,10 @@ void __kmp_free_team(kmp_root_t *root,
|
|||
/* free the worker threads */
|
||||
for (f = 1; f < team->t.t_nproc; ++f) {
|
||||
KMP_DEBUG_ASSERT(team->t.t_threads[f]);
|
||||
if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
|
||||
KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team),
|
||||
1, 2);
|
||||
}
|
||||
__kmp_free_thread(team->t.t_threads[f]);
|
||||
}
|
||||
|
||||
if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
|
||||
if (team->t.b) {
|
||||
// wake up thread at old location
|
||||
team->t.b->go_release();
|
||||
if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
|
||||
for (f = 1; f < team->t.t_nproc; ++f) {
|
||||
if (team->t.b->sleep[f].sleep) {
|
||||
__kmp_atomic_resume_64(
|
||||
team->t.t_threads[f]->th.th_info.ds.ds_gtid,
|
||||
(kmp_atomic_flag_64<> *)NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Wait for threads to be removed from team
|
||||
for (int f = 1; f < team->t.t_nproc; ++f) {
|
||||
while (team->t.t_threads[f]->th.th_used_in_team.load() != 0)
|
||||
KMP_CPU_PAUSE();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (f = 1; f < team->t.t_nproc; ++f) {
|
||||
team->t.t_threads[f] = NULL;
|
||||
}
|
||||
|
||||
if (team->t.t_max_nproc > 1 &&
|
||||
__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
|
||||
distributedBarrier::deallocate(team->t.b);
|
||||
team->t.b = NULL;
|
||||
}
|
||||
/* put the team back in the team pool */
|
||||
/* TODO limit size of team pool, call reap_team if pool too large */
|
||||
team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
|
||||
|
@ -6058,19 +5957,12 @@ static void __kmp_reap_thread(kmp_info_t *thread, int is_root) {
|
|||
KA_TRACE(
|
||||
20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
|
||||
gtid));
|
||||
if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
|
||||
while (
|
||||
!KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3))
|
||||
KMP_CPU_PAUSE();
|
||||
__kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL);
|
||||
} else {
|
||||
/* Need release fence here to prevent seg faults for tree forkjoin
|
||||
barrier (GEH) */
|
||||
ANNOTATE_HAPPENS_BEFORE(thread);
|
||||
kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
|
||||
thread);
|
||||
__kmp_release_64(&flag);
|
||||
}
|
||||
/* Need release fence here to prevent seg faults for tree forkjoin barrier
|
||||
* (GEH) */
|
||||
ANNOTATE_HAPPENS_BEFORE(thread);
|
||||
kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
|
||||
thread);
|
||||
__kmp_release_64(&flag);
|
||||
}
|
||||
|
||||
// Terminate OS thread.
|
||||
|
@ -6944,8 +6836,8 @@ static void __kmp_do_serial_initialize(void) {
|
|||
#if KMP_FAST_REDUCTION_BARRIER
|
||||
#define kmp_reduction_barrier_gather_bb ((int)1)
|
||||
#define kmp_reduction_barrier_release_bb ((int)1)
|
||||
#define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt
|
||||
#define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt
|
||||
#define kmp_reduction_barrier_gather_pat bp_hyper_bar
|
||||
#define kmp_reduction_barrier_release_pat bp_hyper_bar
|
||||
#endif // KMP_FAST_REDUCTION_BARRIER
|
||||
for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
|
||||
__kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
|
||||
|
@ -8802,96 +8694,6 @@ void __kmp_omp_display_env(int verbose) {
|
|||
__kmp_release_bootstrap_lock(&__kmp_initz_lock);
|
||||
}
|
||||
|
||||
// The team size is changing, so distributed barrier must be modified
|
||||
void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads,
|
||||
int new_nthreads) {
|
||||
KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
|
||||
bp_dist_bar);
|
||||
kmp_info_t **other_threads = team->t.t_threads;
|
||||
|
||||
// We want all the workers to stop waiting on the barrier while we adjust the
|
||||
// size of the team.
|
||||
for (int f = 1; f < old_nthreads; ++f) {
|
||||
KMP_DEBUG_ASSERT(other_threads[f] != NULL);
|
||||
// Ignore threads that are already inactive or not present in the team
|
||||
if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) {
|
||||
// teams construct causes thread_limit to get passed in, and some of
|
||||
// those could be inactive; just ignore them
|
||||
continue;
|
||||
}
|
||||
// If thread is transitioning still to in_use state, wait for it
|
||||
if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) {
|
||||
while (team->t.t_threads[f]->th.th_used_in_team.load() == 3)
|
||||
KMP_CPU_PAUSE();
|
||||
}
|
||||
// The thread should be in_use now
|
||||
KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1);
|
||||
// Transition to unused state
|
||||
team->t.t_threads[f]->th.th_used_in_team.store(2);
|
||||
KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2);
|
||||
}
|
||||
// Release all the workers
|
||||
kmp_uint64 new_value; // new value for go
|
||||
new_value = team->t.b->go_release();
|
||||
|
||||
KMP_MFENCE();
|
||||
|
||||
// Workers should see transition status 2 and move to 0; but may need to be
|
||||
// woken up first
|
||||
size_t my_go_index;
|
||||
int count = old_nthreads - 1;
|
||||
while (count > 0) {
|
||||
count = old_nthreads - 1;
|
||||
for (int f = 1; f < old_nthreads; ++f) {
|
||||
my_go_index = f / team->t.b->threads_per_go;
|
||||
if (other_threads[f]->th.th_used_in_team.load() != 0) {
|
||||
if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { // Wake up the workers
|
||||
kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST(
|
||||
void *, other_threads[f]->th.th_sleep_loc);
|
||||
__kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag);
|
||||
}
|
||||
} else {
|
||||
KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0);
|
||||
count--;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Now update the barrier size
|
||||
team->t.b->update_num_threads(new_nthreads);
|
||||
team->t.b->go_reset();
|
||||
}
|
||||
|
||||
void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads) {
|
||||
// Add the threads back to the team
|
||||
KMP_DEBUG_ASSERT(team);
|
||||
// Threads were paused and pointed at th_used_in_team temporarily during a
|
||||
// resize of the team. We're going to set th_used_in_team to 3 to indicate to
|
||||
// the thread that it should transition itself back into the team. Then, if
|
||||
// blocktime isn't infinite, the thread could be sleeping, so we send a resume
|
||||
// to wake it up.
|
||||
for (int f = 1; f < new_nthreads; ++f) {
|
||||
KMP_DEBUG_ASSERT(team->t.t_threads[f]);
|
||||
KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0,
|
||||
3);
|
||||
if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { // Wake up sleeping threads
|
||||
__kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,
|
||||
(kmp_flag_32<false, false> *)NULL);
|
||||
}
|
||||
}
|
||||
// The threads should be transitioning to the team; when they are done, they
|
||||
// should have set th_used_in_team to 1. This loop forces master to wait until
|
||||
// all threads have moved into the team and are waiting in the barrier.
|
||||
int count = new_nthreads - 1;
|
||||
while (count > 0) {
|
||||
count = new_nthreads - 1;
|
||||
for (int f = 1; f < new_nthreads; ++f) {
|
||||
if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) {
|
||||
count--;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Globals and functions for hidden helper task
|
||||
kmp_info_t **__kmp_hidden_helper_threads;
|
||||
kmp_info_t *__kmp_hidden_helper_main_thread;
|
||||
|
|
|
@ -1684,8 +1684,6 @@ static void __kmp_stg_parse_barrier_pattern(char const *name, char const *value,
|
|||
const char *var;
|
||||
/* ---------- Barrier method control ------------ */
|
||||
|
||||
static int dist_req = 0, non_dist_req = 0;
|
||||
static bool warn = 1;
|
||||
for (int i = bs_plain_barrier; i < bs_last_barrier; i++) {
|
||||
var = __kmp_barrier_pattern_env_name[i];
|
||||
|
||||
|
@ -1697,11 +1695,6 @@ static void __kmp_stg_parse_barrier_pattern(char const *name, char const *value,
|
|||
for (j = bp_linear_bar; j < bp_last_bar; j++) {
|
||||
if (__kmp_match_with_sentinel(__kmp_barrier_pattern_name[j], value, 1,
|
||||
',')) {
|
||||
if (j == bp_dist_bar) {
|
||||
dist_req++;
|
||||
} else {
|
||||
non_dist_req++;
|
||||
}
|
||||
__kmp_barrier_gather_pattern[i] = (kmp_bar_pat_e)j;
|
||||
break;
|
||||
}
|
||||
|
@ -1716,11 +1709,6 @@ static void __kmp_stg_parse_barrier_pattern(char const *name, char const *value,
|
|||
if (comma != NULL) {
|
||||
for (j = bp_linear_bar; j < bp_last_bar; j++) {
|
||||
if (__kmp_str_match(__kmp_barrier_pattern_name[j], 1, comma + 1)) {
|
||||
if (j == bp_dist_bar) {
|
||||
dist_req++;
|
||||
} else {
|
||||
non_dist_req++;
|
||||
}
|
||||
__kmp_barrier_release_pattern[i] = (kmp_bar_pat_e)j;
|
||||
break;
|
||||
}
|
||||
|
@ -1735,28 +1723,6 @@ static void __kmp_stg_parse_barrier_pattern(char const *name, char const *value,
|
|||
}
|
||||
}
|
||||
}
|
||||
if ((dist_req == 0) && (non_dist_req != 0)) {
|
||||
// Something was set to a barrier other than dist; set all others to hyper
|
||||
for (int i = bs_plain_barrier; i < bs_last_barrier; i++) {
|
||||
if (__kmp_barrier_release_pattern[i] == bp_dist_bar)
|
||||
__kmp_barrier_release_pattern[i] = bp_hyper_bar;
|
||||
if (__kmp_barrier_gather_pattern[i] == bp_dist_bar)
|
||||
__kmp_barrier_gather_pattern[i] = bp_hyper_bar;
|
||||
}
|
||||
} else if (non_dist_req != 0) {
|
||||
// some requests for dist, plus requests for others; set all to dist
|
||||
if (non_dist_req > 0 && dist_req > 0 && warn) {
|
||||
KMP_INFORM(BarrierPatternOverride, name,
|
||||
__kmp_barrier_pattern_name[bp_dist_bar]);
|
||||
warn = 0;
|
||||
}
|
||||
for (int i = bs_plain_barrier; i < bs_last_barrier; i++) {
|
||||
if (__kmp_barrier_release_pattern[i] != bp_dist_bar)
|
||||
__kmp_barrier_release_pattern[i] = bp_dist_bar;
|
||||
if (__kmp_barrier_gather_pattern[i] != bp_dist_bar)
|
||||
__kmp_barrier_gather_pattern[i] = bp_dist_bar;
|
||||
}
|
||||
}
|
||||
} // __kmp_stg_parse_barrier_pattern
|
||||
|
||||
static void __kmp_stg_print_barrier_pattern(kmp_str_buf_t *buffer,
|
||||
|
@ -1773,7 +1739,7 @@ static void __kmp_stg_print_barrier_pattern(kmp_str_buf_t *buffer,
|
|||
__kmp_str_buf_print(buffer, " %s='",
|
||||
__kmp_barrier_pattern_env_name[i]);
|
||||
}
|
||||
KMP_DEBUG_ASSERT(j < bp_last_bar && k < bp_last_bar);
|
||||
KMP_DEBUG_ASSERT(j < bs_last_barrier && k < bs_last_barrier);
|
||||
__kmp_str_buf_print(buffer, "%s,%s'\n", __kmp_barrier_pattern_name[j],
|
||||
__kmp_barrier_pattern_name[k]);
|
||||
}
|
||||
|
|
|
@ -246,8 +246,6 @@ enum stats_state_e {
|
|||
// KMP_tree_release -- time in __kmp_tree_barrier_release
|
||||
// KMP_hyper_gather -- time in __kmp_hyper_barrier_gather
|
||||
// KMP_hyper_release -- time in __kmp_hyper_barrier_release
|
||||
// KMP_dist_gather -- time in __kmp_dist_barrier_gather
|
||||
// KMP_dist_release -- time in __kmp_dist_barrier_release
|
||||
// clang-format off
|
||||
#define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \
|
||||
macro(KMP_fork_call, 0, arg) \
|
||||
|
@ -257,8 +255,6 @@ enum stats_state_e {
|
|||
macro(KMP_hier_release, 0, arg) \
|
||||
macro(KMP_hyper_gather, 0, arg) \
|
||||
macro(KMP_hyper_release, 0, arg) \
|
||||
macro(KMP_dist_gather, 0, arg) \
|
||||
macro(KMP_dist_release, 0, arg) \
|
||||
macro(KMP_linear_gather, 0, arg) \
|
||||
macro(KMP_linear_release, 0, arg) \
|
||||
macro(KMP_tree_gather, 0, arg) \
|
||||
|
|
|
@ -515,31 +515,6 @@ int __kmp_str_match(char const *target, int len, char const *data) {
|
|||
return ((len > 0) ? i >= len : (!target[i] && (len || !data[i])));
|
||||
} // __kmp_str_match
|
||||
|
||||
// If data contains all of target, returns true, otherwise returns false.
|
||||
// len should be the length of target
|
||||
bool __kmp_str_contains(char const *target, int len, char const *data) {
|
||||
int i = 0, j = 0, start = 0;
|
||||
if (target == NULL || data == NULL) {
|
||||
return FALSE;
|
||||
}
|
||||
while (target[i]) {
|
||||
if (!data[j])
|
||||
return FALSE;
|
||||
if (TOLOWER(target[i]) != TOLOWER(data[j])) {
|
||||
j = start + 1;
|
||||
start = j;
|
||||
i = 0;
|
||||
} else {
|
||||
if (i == 0)
|
||||
start = j;
|
||||
j++;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
return i == len;
|
||||
} // __kmp_str_contains
|
||||
|
||||
int __kmp_str_match_false(char const *data) {
|
||||
int result =
|
||||
__kmp_str_match("false", 1, data) || __kmp_str_match("off", 2, data) ||
|
||||
|
|
|
@ -106,7 +106,6 @@ int __kmp_str_eqf(char const *lhs, char const *rhs);
|
|||
char *__kmp_str_format(char const *format, ...);
|
||||
void __kmp_str_free(char **str);
|
||||
int __kmp_str_match(char const *target, int len, char const *data);
|
||||
bool __kmp_str_contains(char const *target, int len, char const *data);
|
||||
int __kmp_str_match_false(char const *data);
|
||||
int __kmp_str_match_true(char const *data);
|
||||
void __kmp_str_replace(char *str, char search_for, char replace_with);
|
||||
|
|
|
@ -2963,7 +2963,8 @@ static inline int __kmp_execute_tasks_template(
|
|||
(TCR_PTR(CCAST(void *, other_thread->th.th_sleep_loc)) !=
|
||||
NULL)) {
|
||||
asleep = 1;
|
||||
__kmp_null_resume_wrapper(other_thread);
|
||||
__kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread),
|
||||
other_thread->th.th_sleep_loc);
|
||||
// A sleeping thread should not have any tasks on it's queue.
|
||||
// There is a slight possibility that it resumes, steals a task
|
||||
// from another thread, which spawns more tasks, all in the time
|
||||
|
@ -3112,16 +3113,6 @@ int __kmp_execute_tasks_64(
|
|||
thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
|
||||
}
|
||||
|
||||
template <bool C, bool S>
|
||||
int __kmp_atomic_execute_tasks_64(
|
||||
kmp_info_t *thread, kmp_int32 gtid, kmp_atomic_flag_64<C, S> *flag,
|
||||
int final_spin, int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
|
||||
kmp_int32 is_constrained) {
|
||||
return __kmp_execute_tasks_template(
|
||||
thread, gtid, flag, final_spin,
|
||||
thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
|
||||
}
|
||||
|
||||
int __kmp_execute_tasks_oncore(
|
||||
kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
|
||||
int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
|
||||
|
@ -3148,14 +3139,6 @@ template int __kmp_execute_tasks_64<true, false>(kmp_info_t *, kmp_int32,
|
|||
int *USE_ITT_BUILD_ARG(void *),
|
||||
kmp_int32);
|
||||
|
||||
template int __kmp_atomic_execute_tasks_64<false, true>(
|
||||
kmp_info_t *, kmp_int32, kmp_atomic_flag_64<false, true> *, int,
|
||||
int *USE_ITT_BUILD_ARG(void *), kmp_int32);
|
||||
|
||||
template int __kmp_atomic_execute_tasks_64<true, false>(
|
||||
kmp_info_t *, kmp_int32, kmp_atomic_flag_64<true, false> *, int,
|
||||
int *USE_ITT_BUILD_ARG(void *), kmp_int32);
|
||||
|
||||
// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
|
||||
// next barrier so they can assist in executing enqueued tasks.
|
||||
// First thread in allocates the task team atomically.
|
||||
|
@ -3194,7 +3177,7 @@ static void __kmp_enable_tasking(kmp_task_team_t *task_team,
|
|||
// tasks and execute them. In extra barrier mode, tasks do not sleep
|
||||
// at the separate tasking barrier, so this isn't a problem.
|
||||
for (i = 0; i < nthreads; i++) {
|
||||
void *sleep_loc;
|
||||
volatile void *sleep_loc;
|
||||
kmp_info_t *thread = threads_data[i].td.td_thr;
|
||||
|
||||
if (i == this_thr->th.th_info.ds.ds_tid) {
|
||||
|
@ -3211,7 +3194,7 @@ static void __kmp_enable_tasking(kmp_task_team_t *task_team,
|
|||
KF_TRACE(50, ("__kmp_enable_tasking: T#%d waking up thread T#%d\n",
|
||||
__kmp_gtid_from_thread(this_thr),
|
||||
__kmp_gtid_from_thread(thread)));
|
||||
__kmp_null_resume_wrapper(thread);
|
||||
__kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
|
||||
} else {
|
||||
KF_TRACE(50, ("__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
|
||||
__kmp_gtid_from_thread(this_thr),
|
||||
|
@ -3581,7 +3564,7 @@ void __kmp_wait_to_unref_task_teams(void) {
|
|||
__kmp_gtid_from_thread(thread)));
|
||||
|
||||
if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
|
||||
void *sleep_loc;
|
||||
volatile void *sleep_loc;
|
||||
// If the thread is sleeping, awaken it.
|
||||
if ((sleep_loc = TCR_PTR(CCAST(void *, thread->th.th_sleep_loc))) !=
|
||||
NULL) {
|
||||
|
@ -3589,7 +3572,7 @@ void __kmp_wait_to_unref_task_teams(void) {
|
|||
10,
|
||||
("__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
|
||||
__kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread)));
|
||||
__kmp_null_resume_wrapper(thread);
|
||||
__kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,10 +33,6 @@ template <bool C, bool S>
|
|||
void __kmp_mwait_64(int th_gtid, kmp_flag_64<C, S> *flag) {
|
||||
__kmp_mwait_template(th_gtid, flag);
|
||||
}
|
||||
template <bool C, bool S>
|
||||
void __kmp_atomic_mwait_64(int th_gtid, kmp_atomic_flag_64<C, S> *flag) {
|
||||
__kmp_mwait_template(th_gtid, flag);
|
||||
}
|
||||
void __kmp_mwait_oncore(int th_gtid, kmp_flag_oncore *flag) {
|
||||
__kmp_mwait_template(th_gtid, flag);
|
||||
}
|
||||
|
@ -44,8 +40,4 @@ void __kmp_mwait_oncore(int th_gtid, kmp_flag_oncore *flag) {
|
|||
template void __kmp_mwait_32<false, false>(int, kmp_flag_32<false, false> *);
|
||||
template void __kmp_mwait_64<false, true>(int, kmp_flag_64<false, true> *);
|
||||
template void __kmp_mwait_64<true, false>(int, kmp_flag_64<true, false> *);
|
||||
template void
|
||||
__kmp_atomic_mwait_64<false, true>(int, kmp_atomic_flag_64<false, true> *);
|
||||
template void
|
||||
__kmp_atomic_mwait_64<true, false>(int, kmp_atomic_flag_64<true, false> *);
|
||||
#endif
|
||||
|
|
|
@ -33,285 +33,96 @@ higher level operations such as barriers and fork/join.
|
|||
@{
|
||||
*/
|
||||
|
||||
/*!
|
||||
* The flag_type describes the storage used for the flag.
|
||||
*/
|
||||
enum flag_type {
|
||||
flag32, /**< 32 bit flags */
|
||||
flag64, /**< 64 bit flags */
|
||||
flag_oncore /**< special 64-bit flag for on-core barrier (hierarchical) */
|
||||
};
|
||||
|
||||
struct flag_properties {
|
||||
unsigned int type : 16;
|
||||
unsigned int reserved : 16;
|
||||
};
|
||||
|
||||
template <enum flag_type FlagType> struct flag_traits {};
|
||||
|
||||
template <> struct flag_traits<flag32> {
|
||||
typedef kmp_uint32 flag_t;
|
||||
static const flag_type t = flag32;
|
||||
static inline flag_t tcr(flag_t f) { return TCR_4(f); }
|
||||
static inline flag_t test_then_add4(volatile flag_t *f) {
|
||||
return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
|
||||
}
|
||||
static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
|
||||
return KMP_TEST_THEN_OR32(f, v);
|
||||
}
|
||||
static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
|
||||
return KMP_TEST_THEN_AND32(f, v);
|
||||
}
|
||||
};
|
||||
|
||||
template <> struct flag_traits<atomic_flag64> {
|
||||
typedef kmp_uint64 flag_t;
|
||||
static const flag_type t = atomic_flag64;
|
||||
static inline flag_t tcr(flag_t f) { return TCR_8(f); }
|
||||
static inline flag_t test_then_add4(volatile flag_t *f) {
|
||||
return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
|
||||
}
|
||||
static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
|
||||
return KMP_TEST_THEN_OR64(f, v);
|
||||
}
|
||||
static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
|
||||
return KMP_TEST_THEN_AND64(f, v);
|
||||
}
|
||||
};
|
||||
|
||||
template <> struct flag_traits<flag64> {
|
||||
typedef kmp_uint64 flag_t;
|
||||
static const flag_type t = flag64;
|
||||
static inline flag_t tcr(flag_t f) { return TCR_8(f); }
|
||||
static inline flag_t test_then_add4(volatile flag_t *f) {
|
||||
return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
|
||||
}
|
||||
static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
|
||||
return KMP_TEST_THEN_OR64(f, v);
|
||||
}
|
||||
static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
|
||||
return KMP_TEST_THEN_AND64(f, v);
|
||||
}
|
||||
};
|
||||
|
||||
template <> struct flag_traits<flag_oncore> {
|
||||
typedef kmp_uint64 flag_t;
|
||||
static const flag_type t = flag_oncore;
|
||||
static inline flag_t tcr(flag_t f) { return TCR_8(f); }
|
||||
static inline flag_t test_then_add4(volatile flag_t *f) {
|
||||
return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
|
||||
}
|
||||
static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
|
||||
return KMP_TEST_THEN_OR64(f, v);
|
||||
}
|
||||
static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
|
||||
return KMP_TEST_THEN_AND64(f, v);
|
||||
}
|
||||
};
|
||||
|
||||
/*! Base class for all flags */
|
||||
template <flag_type FlagType> class kmp_flag {
|
||||
protected:
|
||||
flag_properties t; /**< "Type" of the flag in loc */
|
||||
kmp_info_t *waiting_threads[1]; /**< Threads sleeping on this thread. */
|
||||
kmp_uint32 num_waiting_threads; /**< #threads sleeping on this thread. */
|
||||
std::atomic<bool> *sleepLoc;
|
||||
/*!
|
||||
* Base class for wait/release volatile flag
|
||||
*/
|
||||
template <typename P> class kmp_flag_native {
|
||||
volatile P *loc;
|
||||
flag_properties t;
|
||||
|
||||
public:
|
||||
typedef flag_traits<FlagType> traits_type;
|
||||
kmp_flag() : t({FlagType, 0U}), num_waiting_threads(0), sleepLoc(nullptr) {}
|
||||
kmp_flag(int nwaiters)
|
||||
: t({FlagType, 0U}), num_waiting_threads(nwaiters), sleepLoc(nullptr) {}
|
||||
kmp_flag(std::atomic<bool> *sloc)
|
||||
: t({FlagType, 0U}), num_waiting_threads(0), sleepLoc(sloc) {}
|
||||
/*! @result the flag_type */
|
||||
typedef P flag_t;
|
||||
kmp_flag_native(volatile P *p, flag_type ft)
|
||||
: loc(p), t({(short unsigned int)ft, 0U}) {}
|
||||
volatile P *get() { return loc; }
|
||||
void *get_void_p() { return RCAST(void *, CCAST(P *, loc)); }
|
||||
void set(volatile P *new_loc) { loc = new_loc; }
|
||||
flag_type get_type() { return (flag_type)(t.type); }
|
||||
|
||||
/*! param i in index into waiting_threads
|
||||
* @result the thread that is waiting at index i */
|
||||
kmp_info_t *get_waiter(kmp_uint32 i) {
|
||||
KMP_DEBUG_ASSERT(i < num_waiting_threads);
|
||||
return waiting_threads[i];
|
||||
}
|
||||
/*! @result num_waiting_threads */
|
||||
kmp_uint32 get_num_waiters() { return num_waiting_threads; }
|
||||
/*! @param thr in the thread which is now waiting
|
||||
* Insert a waiting thread at index 0. */
|
||||
void set_waiter(kmp_info_t *thr) {
|
||||
waiting_threads[0] = thr;
|
||||
num_waiting_threads = 1;
|
||||
}
|
||||
enum barrier_type get_bt() { return bs_last_barrier; }
|
||||
P load() { return *loc; }
|
||||
void store(P val) { *loc = val; }
|
||||
};
|
||||
|
||||
/*! Base class for wait/release volatile flag */
|
||||
template <typename PtrType, flag_type FlagType, bool Sleepable>
|
||||
class kmp_flag_native : public kmp_flag<FlagType> {
|
||||
protected:
|
||||
volatile PtrType *loc;
|
||||
PtrType checker; /**< When flag==checker, it has been released. */
|
||||
typedef flag_traits<FlagType> traits_type;
|
||||
|
||||
/*!
|
||||
* Base class for wait/release atomic flag
|
||||
*/
|
||||
template <typename P> class kmp_flag {
|
||||
std::atomic<P>
|
||||
*loc; /**< Pointer to the flag storage that is modified by another thread
|
||||
*/
|
||||
flag_properties t; /**< "Type" of the flag in loc */
|
||||
public:
|
||||
typedef PtrType flag_t;
|
||||
kmp_flag_native(volatile PtrType *p) : kmp_flag<FlagType>(), loc(p) {}
|
||||
kmp_flag_native(volatile PtrType *p, kmp_info_t *thr)
|
||||
: kmp_flag<FlagType>(1), loc(p) {
|
||||
this->waiting_threads[0] = thr;
|
||||
}
|
||||
kmp_flag_native(volatile PtrType *p, PtrType c)
|
||||
: kmp_flag<FlagType>(), loc(p), checker(c) {}
|
||||
kmp_flag_native(volatile PtrType *p, PtrType c, std::atomic<bool> *sloc)
|
||||
: kmp_flag<FlagType>(sloc), loc(p), checker(c) {}
|
||||
volatile PtrType *get() { return loc; }
|
||||
void *get_void_p() { return RCAST(void *, CCAST(PtrType *, loc)); }
|
||||
void set(volatile PtrType *new_loc) { loc = new_loc; }
|
||||
PtrType load() { return *loc; }
|
||||
void store(PtrType val) { *loc = val; }
|
||||
/*! @result true if the flag object has been released. */
|
||||
virtual bool done_check() {
|
||||
if (Sleepable && !(this->sleepLoc))
|
||||
return (traits_type::tcr(*(this->get())) & ~KMP_BARRIER_SLEEP_STATE) ==
|
||||
checker;
|
||||
else
|
||||
return traits_type::tcr(*(this->get())) == checker;
|
||||
}
|
||||
/*! @param old_loc in old value of flag
|
||||
* @result true if the flag's old value indicates it was released. */
|
||||
virtual bool done_check_val(PtrType old_loc) { return old_loc == checker; }
|
||||
/*! @result true if the flag object is not yet released.
|
||||
* Used in __kmp_wait_template like:
|
||||
* @code
|
||||
* while (flag.notdone_check()) { pause(); }
|
||||
* @endcode */
|
||||
virtual bool notdone_check() {
|
||||
return traits_type::tcr(*(this->get())) != checker;
|
||||
}
|
||||
/*! @result Actual flag value before release was applied.
|
||||
* Trigger all waiting threads to run by modifying flag to release state. */
|
||||
void internal_release() {
|
||||
(void)traits_type::test_then_add4((volatile PtrType *)this->get());
|
||||
}
|
||||
/*! @result Actual flag value before sleep bit(s) set.
|
||||
* Notes that there is at least one thread sleeping on the flag by setting
|
||||
* sleep bit(s). */
|
||||
PtrType set_sleeping() {
|
||||
if (this->sleepLoc) {
|
||||
this->sleepLoc->store(true);
|
||||
return *(this->get());
|
||||
}
|
||||
return traits_type::test_then_or((volatile PtrType *)this->get(),
|
||||
KMP_BARRIER_SLEEP_STATE);
|
||||
}
|
||||
/*! @result Actual flag value before sleep bit(s) cleared.
|
||||
* Notes that there are no longer threads sleeping on the flag by clearing
|
||||
* sleep bit(s). */
|
||||
void unset_sleeping() {
|
||||
if (this->sleepLoc) {
|
||||
this->sleepLoc->store(false);
|
||||
return;
|
||||
}
|
||||
traits_type::test_then_and((volatile PtrType *)this->get(),
|
||||
~KMP_BARRIER_SLEEP_STATE);
|
||||
}
|
||||
/*! @param old_loc in old value of flag
|
||||
* Test if there are threads sleeping on the flag's old value in old_loc. */
|
||||
bool is_sleeping_val(PtrType old_loc) {
|
||||
if (this->sleepLoc)
|
||||
return this->sleepLoc->load();
|
||||
return old_loc & KMP_BARRIER_SLEEP_STATE;
|
||||
}
|
||||
/*! Test whether there are threads sleeping on the flag. */
|
||||
bool is_sleeping() {
|
||||
if (this->sleepLoc)
|
||||
return this->sleepLoc->load();
|
||||
return is_sleeping_val(*(this->get()));
|
||||
}
|
||||
bool is_any_sleeping() {
|
||||
if (this->sleepLoc)
|
||||
return this->sleepLoc->load();
|
||||
return is_sleeping_val(*(this->get()));
|
||||
}
|
||||
kmp_uint8 *get_stolen() { return NULL; }
|
||||
};
|
||||
|
||||
/*! Base class for wait/release atomic flag */
|
||||
template <typename PtrType, flag_type FlagType, bool Sleepable>
|
||||
class kmp_flag_atomic : public kmp_flag<FlagType> {
|
||||
protected:
|
||||
std::atomic<PtrType> *loc; /**< Pointer to flag location to wait on */
|
||||
PtrType checker; /**< Flag == checker means it has been released. */
|
||||
public:
|
||||
typedef flag_traits<FlagType> traits_type;
|
||||
typedef PtrType flag_t;
|
||||
kmp_flag_atomic(std::atomic<PtrType> *p) : kmp_flag<FlagType>(), loc(p) {}
|
||||
kmp_flag_atomic(std::atomic<PtrType> *p, kmp_info_t *thr)
|
||||
: kmp_flag<FlagType>(1), loc(p) {
|
||||
this->waiting_threads[0] = thr;
|
||||
}
|
||||
kmp_flag_atomic(std::atomic<PtrType> *p, PtrType c)
|
||||
: kmp_flag<FlagType>(), loc(p), checker(c) {}
|
||||
kmp_flag_atomic(std::atomic<PtrType> *p, PtrType c, std::atomic<bool> *sloc)
|
||||
: kmp_flag<FlagType>(sloc), loc(p), checker(c) {}
|
||||
/*! @result the pointer to the actual flag */
|
||||
std::atomic<PtrType> *get() { return loc; }
|
||||
/*! @result void* pointer to the actual flag */
|
||||
typedef P flag_t;
|
||||
kmp_flag(std::atomic<P> *p, flag_type ft)
|
||||
: loc(p), t({(short unsigned int)ft, 0U}) {}
|
||||
/*!
|
||||
* @result the pointer to the actual flag
|
||||
*/
|
||||
std::atomic<P> *get() { return loc; }
|
||||
/*!
|
||||
* @result void* pointer to the actual flag
|
||||
*/
|
||||
void *get_void_p() { return RCAST(void *, loc); }
|
||||
/*! @param new_loc in set loc to point at new_loc */
|
||||
void set(std::atomic<PtrType> *new_loc) { loc = new_loc; }
|
||||
/*! @result flag value */
|
||||
PtrType load() { return loc->load(std::memory_order_acquire); }
|
||||
/*! @param val the new flag value to be stored */
|
||||
void store(PtrType val) { loc->store(val, std::memory_order_release); }
|
||||
/*! @result true if the flag object has been released. */
|
||||
bool done_check() {
|
||||
if (Sleepable && !(this->sleepLoc))
|
||||
return (this->load() & ~KMP_BARRIER_SLEEP_STATE) == checker;
|
||||
else
|
||||
return this->load() == checker;
|
||||
}
|
||||
/*! @param old_loc in old value of flag
|
||||
* @result true if the flag's old value indicates it was released. */
|
||||
bool done_check_val(PtrType old_loc) { return old_loc == checker; }
|
||||
/*! @result true if the flag object is not yet released.
|
||||
* Used in __kmp_wait_template like:
|
||||
* @code
|
||||
* while (flag.notdone_check()) { pause(); }
|
||||
* @endcode */
|
||||
bool notdone_check() { return this->load() != checker; }
|
||||
/*! @result Actual flag value before release was applied.
|
||||
* Trigger all waiting threads to run by modifying flag to release state. */
|
||||
void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); }
|
||||
/*! @result Actual flag value before sleep bit(s) set.
|
||||
* Notes that there is at least one thread sleeping on the flag by setting
|
||||
* sleep bit(s). */
|
||||
PtrType set_sleeping() {
|
||||
if (this->sleepLoc) {
|
||||
this->sleepLoc->store(true);
|
||||
return *(this->get());
|
||||
}
|
||||
return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE);
|
||||
}
|
||||
/*! @result Actual flag value before sleep bit(s) cleared.
|
||||
* Notes that there are no longer threads sleeping on the flag by clearing
|
||||
* sleep bit(s). */
|
||||
void unset_sleeping() {
|
||||
if (this->sleepLoc) {
|
||||
this->sleepLoc->store(false);
|
||||
return;
|
||||
}
|
||||
KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE);
|
||||
}
|
||||
/*! @param old_loc in old value of flag
|
||||
* Test whether there are threads sleeping on flag's old value in old_loc. */
|
||||
bool is_sleeping_val(PtrType old_loc) {
|
||||
if (this->sleepLoc)
|
||||
return this->sleepLoc->load();
|
||||
return old_loc & KMP_BARRIER_SLEEP_STATE;
|
||||
}
|
||||
/*! Test whether there are threads sleeping on the flag. */
|
||||
bool is_sleeping() {
|
||||
if (this->sleepLoc)
|
||||
return this->sleepLoc->load();
|
||||
return is_sleeping_val(this->load());
|
||||
}
|
||||
bool is_any_sleeping() {
|
||||
if (this->sleepLoc)
|
||||
return this->sleepLoc->load();
|
||||
return is_sleeping_val(this->load());
|
||||
}
|
||||
kmp_uint8 *get_stolen() { return NULL; }
|
||||
/*!
|
||||
* @param new_loc in set loc to point at new_loc
|
||||
*/
|
||||
void set(std::atomic<P> *new_loc) { loc = new_loc; }
|
||||
/*!
|
||||
* @result the flag_type
|
||||
*/
|
||||
flag_type get_type() { return (flag_type)(t.type); }
|
||||
/*!
|
||||
* @result flag value
|
||||
*/
|
||||
P load() { return loc->load(std::memory_order_acquire); }
|
||||
/*!
|
||||
* @param val the new flag value to be stored
|
||||
*/
|
||||
void store(P val) { loc->store(val, std::memory_order_release); }
|
||||
// Derived classes must provide the following:
|
||||
/*
|
||||
kmp_info_t * get_waiter(kmp_uint32 i);
|
||||
kmp_uint32 get_num_waiters();
|
||||
bool done_check();
|
||||
bool done_check_val(P old_loc);
|
||||
bool notdone_check();
|
||||
P internal_release();
|
||||
void suspend(int th_gtid);
|
||||
void mwait(int th_gtid);
|
||||
void resume(int th_gtid);
|
||||
P set_sleeping();
|
||||
P unset_sleeping();
|
||||
bool is_sleeping();
|
||||
bool is_any_sleeping();
|
||||
bool is_sleeping_val(P old_loc);
|
||||
int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
|
||||
int *thread_finished
|
||||
USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32
|
||||
is_constrained);
|
||||
*/
|
||||
};
|
||||
|
||||
#if OMPT_SUPPORT
|
||||
|
@ -453,9 +264,8 @@ final_spin=FALSE)
|
|||
ompt_entry_state = this_thr->th.ompt_thread_info.state;
|
||||
if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
|
||||
KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
|
||||
ompt_lw_taskteam_t *team = NULL;
|
||||
if (this_thr->th.th_team)
|
||||
team = this_thr->th.th_team->t.ompt_serialized_team_info;
|
||||
ompt_lw_taskteam_t *team =
|
||||
this_thr->th.th_team->t.ompt_serialized_team_info;
|
||||
if (team) {
|
||||
tId = &(team->ompt_task_info.task_data);
|
||||
} else {
|
||||
|
@ -530,11 +340,11 @@ final_spin=FALSE)
|
|||
disabled (KMP_TASKING=0). */
|
||||
if (task_team != NULL) {
|
||||
if (TCR_SYNC_4(task_team->tt.tt_active)) {
|
||||
if (KMP_TASKING_ENABLED(task_team)) {
|
||||
if (KMP_TASKING_ENABLED(task_team))
|
||||
flag->execute_tasks(
|
||||
this_thr, th_gtid, final_spin,
|
||||
&tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
|
||||
} else
|
||||
else
|
||||
this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
|
||||
} else {
|
||||
KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
|
||||
|
@ -747,7 +557,6 @@ static inline void __kmp_mwait_template(int th_gtid, C *flag) {
|
|||
else {
|
||||
// if flag changes here, wake-up happens immediately
|
||||
TCW_PTR(th->th.th_sleep_loc, (void *)flag);
|
||||
th->th.th_sleep_loc_type = flag->get_type();
|
||||
__kmp_unlock_suspend_mx(th);
|
||||
KF_TRACE(50, ("__kmp_mwait_template: T#%d calling mwait\n", th_gtid));
|
||||
#if KMP_HAVE_UMWAIT
|
||||
|
@ -765,7 +574,6 @@ static inline void __kmp_mwait_template(int th_gtid, C *flag) {
|
|||
if (flag->is_sleeping())
|
||||
flag->unset_sleeping();
|
||||
TCW_PTR(th->th.th_sleep_loc, NULL);
|
||||
th->th.th_sleep_loc_type = flag_unset;
|
||||
}
|
||||
// Mark thread as active again
|
||||
th->th.th_active = TRUE;
|
||||
|
@ -816,15 +624,251 @@ template <class C> static inline void __kmp_release_template(C *flag) {
|
|||
}
|
||||
}
|
||||
|
||||
template <typename FlagType> struct flag_traits {};
|
||||
|
||||
template <> struct flag_traits<kmp_uint32> {
|
||||
typedef kmp_uint32 flag_t;
|
||||
static const flag_type t = flag32;
|
||||
static inline flag_t tcr(flag_t f) { return TCR_4(f); }
|
||||
static inline flag_t test_then_add4(volatile flag_t *f) {
|
||||
return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
|
||||
}
|
||||
static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
|
||||
return KMP_TEST_THEN_OR32(f, v);
|
||||
}
|
||||
static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
|
||||
return KMP_TEST_THEN_AND32(f, v);
|
||||
}
|
||||
};
|
||||
|
||||
template <> struct flag_traits<kmp_uint64> {
|
||||
typedef kmp_uint64 flag_t;
|
||||
static const flag_type t = flag64;
|
||||
static inline flag_t tcr(flag_t f) { return TCR_8(f); }
|
||||
static inline flag_t test_then_add4(volatile flag_t *f) {
|
||||
return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
|
||||
}
|
||||
static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
|
||||
return KMP_TEST_THEN_OR64(f, v);
|
||||
}
|
||||
static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
|
||||
return KMP_TEST_THEN_AND64(f, v);
|
||||
}
|
||||
};
|
||||
|
||||
// Basic flag that does not use C11 Atomics
|
||||
template <typename FlagType, bool Sleepable>
|
||||
class kmp_basic_flag_native : public kmp_flag_native<FlagType> {
|
||||
typedef flag_traits<FlagType> traits_type;
|
||||
FlagType checker; /**< Value to compare flag to to check if flag has been
|
||||
released. */
|
||||
kmp_info_t
|
||||
*waiting_threads[1]; /**< Array of threads sleeping on this thread. */
|
||||
kmp_uint32
|
||||
num_waiting_threads; /**< Number of threads sleeping on this thread. */
|
||||
public:
|
||||
kmp_basic_flag_native(volatile FlagType *p)
|
||||
: kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
|
||||
kmp_basic_flag_native(volatile FlagType *p, kmp_info_t *thr)
|
||||
: kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(1) {
|
||||
waiting_threads[0] = thr;
|
||||
}
|
||||
kmp_basic_flag_native(volatile FlagType *p, FlagType c)
|
||||
: kmp_flag_native<FlagType>(p, traits_type::t), checker(c),
|
||||
num_waiting_threads(0) {}
|
||||
/*!
|
||||
* param i in index into waiting_threads
|
||||
* @result the thread that is waiting at index i
|
||||
*/
|
||||
kmp_info_t *get_waiter(kmp_uint32 i) {
|
||||
KMP_DEBUG_ASSERT(i < num_waiting_threads);
|
||||
return waiting_threads[i];
|
||||
}
|
||||
/*!
|
||||
* @result num_waiting_threads
|
||||
*/
|
||||
kmp_uint32 get_num_waiters() { return num_waiting_threads; }
|
||||
/*!
|
||||
* @param thr in the thread which is now waiting
|
||||
*
|
||||
* Insert a waiting thread at index 0.
|
||||
*/
|
||||
void set_waiter(kmp_info_t *thr) {
|
||||
waiting_threads[0] = thr;
|
||||
num_waiting_threads = 1;
|
||||
}
|
||||
/*!
|
||||
* @result true if the flag object has been released.
|
||||
*/
|
||||
bool done_check() {
|
||||
if (Sleepable)
|
||||
return (traits_type::tcr(*(this->get())) & ~KMP_BARRIER_SLEEP_STATE) ==
|
||||
checker;
|
||||
else
|
||||
return traits_type::tcr(*(this->get())) == checker;
|
||||
}
|
||||
/*!
|
||||
* @param old_loc in old value of flag
|
||||
* @result true if the flag's old value indicates it was released.
|
||||
*/
|
||||
bool done_check_val(FlagType old_loc) { return old_loc == checker; }
|
||||
/*!
|
||||
* @result true if the flag object is not yet released.
|
||||
* Used in __kmp_wait_template like:
|
||||
* @code
|
||||
* while (flag.notdone_check()) { pause(); }
|
||||
* @endcode
|
||||
*/
|
||||
bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
|
||||
/*!
|
||||
* @result Actual flag value before release was applied.
|
||||
* Trigger all waiting threads to run by modifying flag to release state.
|
||||
*/
|
||||
void internal_release() {
|
||||
(void)traits_type::test_then_add4((volatile FlagType *)this->get());
|
||||
}
|
||||
/*!
|
||||
* @result Actual flag value before sleep bit(s) set.
|
||||
* Notes that there is at least one thread sleeping on the flag by setting
|
||||
* sleep bit(s).
|
||||
*/
|
||||
FlagType set_sleeping() {
|
||||
return traits_type::test_then_or((volatile FlagType *)this->get(),
|
||||
KMP_BARRIER_SLEEP_STATE);
|
||||
}
|
||||
/*!
|
||||
* @result Actual flag value before sleep bit(s) cleared.
|
||||
* Notes that there are no longer threads sleeping on the flag by clearing
|
||||
* sleep bit(s).
|
||||
*/
|
||||
FlagType unset_sleeping() {
|
||||
return traits_type::test_then_and((volatile FlagType *)this->get(),
|
||||
~KMP_BARRIER_SLEEP_STATE);
|
||||
}
|
||||
/*!
|
||||
* @param old_loc in old value of flag
|
||||
* Test whether there are threads sleeping on the flag's old value in old_loc.
|
||||
*/
|
||||
bool is_sleeping_val(FlagType old_loc) {
|
||||
return old_loc & KMP_BARRIER_SLEEP_STATE;
|
||||
}
|
||||
/*!
|
||||
* Test whether there are threads sleeping on the flag.
|
||||
*/
|
||||
bool is_sleeping() { return is_sleeping_val(*(this->get())); }
|
||||
bool is_any_sleeping() { return is_sleeping_val(*(this->get())); }
|
||||
kmp_uint8 *get_stolen() { return NULL; }
|
||||
enum barrier_type get_bt() { return bs_last_barrier; }
|
||||
};
|
||||
|
||||
template <typename FlagType, bool Sleepable>
|
||||
class kmp_basic_flag : public kmp_flag<FlagType> {
|
||||
typedef flag_traits<FlagType> traits_type;
|
||||
FlagType checker; /**< Value to compare flag to to check if flag has been
|
||||
released. */
|
||||
kmp_info_t
|
||||
*waiting_threads[1]; /**< Array of threads sleeping on this thread. */
|
||||
kmp_uint32
|
||||
num_waiting_threads; /**< Number of threads sleeping on this thread. */
|
||||
public:
|
||||
kmp_basic_flag(std::atomic<FlagType> *p)
|
||||
: kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
|
||||
kmp_basic_flag(std::atomic<FlagType> *p, kmp_info_t *thr)
|
||||
: kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
|
||||
waiting_threads[0] = thr;
|
||||
}
|
||||
kmp_basic_flag(std::atomic<FlagType> *p, FlagType c)
|
||||
: kmp_flag<FlagType>(p, traits_type::t), checker(c),
|
||||
num_waiting_threads(0) {}
|
||||
/*!
|
||||
* param i in index into waiting_threads
|
||||
* @result the thread that is waiting at index i
|
||||
*/
|
||||
kmp_info_t *get_waiter(kmp_uint32 i) {
|
||||
KMP_DEBUG_ASSERT(i < num_waiting_threads);
|
||||
return waiting_threads[i];
|
||||
}
|
||||
/*!
|
||||
* @result num_waiting_threads
|
||||
*/
|
||||
kmp_uint32 get_num_waiters() { return num_waiting_threads; }
|
||||
/*!
|
||||
* @param thr in the thread which is now waiting
|
||||
*
|
||||
* Insert a waiting thread at index 0.
|
||||
*/
|
||||
void set_waiter(kmp_info_t *thr) {
|
||||
waiting_threads[0] = thr;
|
||||
num_waiting_threads = 1;
|
||||
}
|
||||
/*!
|
||||
* @result true if the flag object has been released.
|
||||
*/
|
||||
bool done_check() {
|
||||
if (Sleepable)
|
||||
return (this->load() & ~KMP_BARRIER_SLEEP_STATE) == checker;
|
||||
else
|
||||
return this->load() == checker;
|
||||
}
|
||||
/*!
|
||||
* @param old_loc in old value of flag
|
||||
* @result true if the flag's old value indicates it was released.
|
||||
*/
|
||||
bool done_check_val(FlagType old_loc) { return old_loc == checker; }
|
||||
/*!
|
||||
* @result true if the flag object is not yet released.
|
||||
* Used in __kmp_wait_template like:
|
||||
* @code
|
||||
* while (flag.notdone_check()) { pause(); }
|
||||
* @endcode
|
||||
*/
|
||||
bool notdone_check() { return this->load() != checker; }
|
||||
/*!
|
||||
* @result Actual flag value before release was applied.
|
||||
* Trigger all waiting threads to run by modifying flag to release state.
|
||||
*/
|
||||
void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); }
|
||||
/*!
|
||||
* @result Actual flag value before sleep bit(s) set.
|
||||
* Notes that there is at least one thread sleeping on the flag by setting
|
||||
* sleep bit(s).
|
||||
*/
|
||||
FlagType set_sleeping() {
|
||||
return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE);
|
||||
}
|
||||
/*!
|
||||
* @result Actual flag value before sleep bit(s) cleared.
|
||||
* Notes that there are no longer threads sleeping on the flag by clearing
|
||||
* sleep bit(s).
|
||||
*/
|
||||
FlagType unset_sleeping() {
|
||||
return KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE);
|
||||
}
|
||||
/*!
|
||||
* @param old_loc in old value of flag
|
||||
* Test whether there are threads sleeping on the flag's old value in old_loc.
|
||||
*/
|
||||
bool is_sleeping_val(FlagType old_loc) {
|
||||
return old_loc & KMP_BARRIER_SLEEP_STATE;
|
||||
}
|
||||
/*!
|
||||
* Test whether there are threads sleeping on the flag.
|
||||
*/
|
||||
bool is_sleeping() { return is_sleeping_val(this->load()); }
|
||||
bool is_any_sleeping() { return is_sleeping_val(this->load()); }
|
||||
kmp_uint8 *get_stolen() { return NULL; }
|
||||
enum barrier_type get_bt() { return bs_last_barrier; }
|
||||
};
|
||||
|
||||
template <bool Cancellable, bool Sleepable>
|
||||
class kmp_flag_32 : public kmp_flag_atomic<kmp_uint32, flag32, Sleepable> {
|
||||
class kmp_flag_32 : public kmp_basic_flag<kmp_uint32, Sleepable> {
|
||||
public:
|
||||
kmp_flag_32(std::atomic<kmp_uint32> *p)
|
||||
: kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p) {}
|
||||
: kmp_basic_flag<kmp_uint32, Sleepable>(p) {}
|
||||
kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
|
||||
: kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p, thr) {}
|
||||
: kmp_basic_flag<kmp_uint32, Sleepable>(p, thr) {}
|
||||
kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
|
||||
: kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p, c) {}
|
||||
: kmp_basic_flag<kmp_uint32, Sleepable>(p, c) {}
|
||||
void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
|
||||
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
|
||||
void mwait(int th_gtid) { __kmp_mwait_32(th_gtid, this); }
|
||||
|
@ -851,16 +895,14 @@ public:
|
|||
};
|
||||
|
||||
template <bool Cancellable, bool Sleepable>
|
||||
class kmp_flag_64 : public kmp_flag_native<kmp_uint64, flag64, Sleepable> {
|
||||
class kmp_flag_64 : public kmp_basic_flag_native<kmp_uint64, Sleepable> {
|
||||
public:
|
||||
kmp_flag_64(volatile kmp_uint64 *p)
|
||||
: kmp_flag_native<kmp_uint64, flag64, Sleepable>(p) {}
|
||||
: kmp_basic_flag_native<kmp_uint64, Sleepable>(p) {}
|
||||
kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
|
||||
: kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, thr) {}
|
||||
: kmp_basic_flag_native<kmp_uint64, Sleepable>(p, thr) {}
|
||||
kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
|
||||
: kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, c) {}
|
||||
kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c, std::atomic<bool> *loc)
|
||||
: kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, c, loc) {}
|
||||
: kmp_basic_flag_native<kmp_uint64, Sleepable>(p, c) {}
|
||||
void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
|
||||
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
|
||||
void mwait(int th_gtid) { __kmp_mwait_64(th_gtid, this); }
|
||||
|
@ -886,52 +928,20 @@ public:
|
|||
flag_type get_ptr_type() { return flag64; }
|
||||
};
|
||||
|
||||
template <bool Cancellable, bool Sleepable>
|
||||
class kmp_atomic_flag_64
|
||||
: public kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable> {
|
||||
public:
|
||||
kmp_atomic_flag_64(std::atomic<kmp_uint64> *p)
|
||||
: kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p) {}
|
||||
kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_info_t *thr)
|
||||
: kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, thr) {}
|
||||
kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_uint64 c)
|
||||
: kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, c) {}
|
||||
kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_uint64 c,
|
||||
std::atomic<bool> *loc)
|
||||
: kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, c, loc) {}
|
||||
void suspend(int th_gtid) { __kmp_atomic_suspend_64(th_gtid, this); }
|
||||
void mwait(int th_gtid) { __kmp_atomic_mwait_64(th_gtid, this); }
|
||||
void resume(int th_gtid) { __kmp_atomic_resume_64(th_gtid, this); }
|
||||
int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
|
||||
int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
|
||||
kmp_int32 is_constrained) {
|
||||
return __kmp_atomic_execute_tasks_64(
|
||||
this_thr, gtid, this, final_spin,
|
||||
thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
|
||||
}
|
||||
bool wait(kmp_info_t *this_thr,
|
||||
int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
|
||||
if (final_spin)
|
||||
return __kmp_wait_template<kmp_atomic_flag_64, TRUE, Cancellable,
|
||||
Sleepable>(
|
||||
this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
|
||||
else
|
||||
return __kmp_wait_template<kmp_atomic_flag_64, FALSE, Cancellable,
|
||||
Sleepable>(
|
||||
this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
|
||||
}
|
||||
void release() { __kmp_release_template(this); }
|
||||
flag_type get_ptr_type() { return atomic_flag64; }
|
||||
};
|
||||
|
||||
// Hierarchical 64-bit on-core barrier instantiation
|
||||
class kmp_flag_oncore : public kmp_flag_native<kmp_uint64, flag_oncore, false> {
|
||||
kmp_uint32 offset; /**< Portion of flag of interest for an operation. */
|
||||
class kmp_flag_oncore : public kmp_flag_native<kmp_uint64> {
|
||||
kmp_uint64 checker;
|
||||
kmp_info_t *waiting_threads[1];
|
||||
kmp_uint32 num_waiting_threads;
|
||||
kmp_uint32
|
||||
offset; /**< Portion of flag that is of interest for an operation. */
|
||||
bool flag_switch; /**< Indicates a switch in flag location. */
|
||||
enum barrier_type bt; /**< Barrier type. */
|
||||
kmp_info_t *this_thr; /**< Thread to redirect to different flag location. */
|
||||
kmp_info_t *this_thr; /**< Thread that may be redirected to different flag
|
||||
location. */
|
||||
#if USE_ITT_BUILD
|
||||
void *itt_sync_obj; /**< ITT object to pass to new flag location. */
|
||||
void *
|
||||
itt_sync_obj; /**< ITT object that must be passed to new flag location. */
|
||||
#endif
|
||||
unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
|
||||
return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
|
||||
|
@ -939,17 +949,26 @@ class kmp_flag_oncore : public kmp_flag_native<kmp_uint64, flag_oncore, false> {
|
|||
|
||||
public:
|
||||
kmp_flag_oncore(volatile kmp_uint64 *p)
|
||||
: kmp_flag_native<kmp_uint64, flag_oncore, false>(p), flag_switch(false) {
|
||||
}
|
||||
: kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
|
||||
flag_switch(false) {}
|
||||
kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
|
||||
: kmp_flag_native<kmp_uint64, flag_oncore, false>(p), offset(idx),
|
||||
flag_switch(false), bt(bs_last_barrier), itt_sync_obj(nullptr) {}
|
||||
: kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
|
||||
offset(idx), flag_switch(false) {}
|
||||
kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
|
||||
enum barrier_type bar_t,
|
||||
kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt))
|
||||
: kmp_flag_native<kmp_uint64, flag_oncore, false>(p, c), offset(idx),
|
||||
flag_switch(false), bt(bar_t),
|
||||
: kmp_flag_native<kmp_uint64>(p, flag_oncore), checker(c),
|
||||
num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t),
|
||||
this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}
|
||||
kmp_info_t *get_waiter(kmp_uint32 i) {
|
||||
KMP_DEBUG_ASSERT(i < num_waiting_threads);
|
||||
return waiting_threads[i];
|
||||
}
|
||||
kmp_uint32 get_num_waiters() { return num_waiting_threads; }
|
||||
void set_waiter(kmp_info_t *thr) {
|
||||
waiting_threads[0] = thr;
|
||||
num_waiting_threads = 1;
|
||||
}
|
||||
bool done_check_val(kmp_uint64 old_loc) {
|
||||
return byteref(&old_loc, offset) == checker;
|
||||
}
|
||||
|
@ -978,6 +997,17 @@ public:
|
|||
KMP_TEST_THEN_OR64(get(), mask);
|
||||
}
|
||||
}
|
||||
kmp_uint64 set_sleeping() {
|
||||
return KMP_TEST_THEN_OR64(get(), KMP_BARRIER_SLEEP_STATE);
|
||||
}
|
||||
kmp_uint64 unset_sleeping() {
|
||||
return KMP_TEST_THEN_AND64(get(), ~KMP_BARRIER_SLEEP_STATE);
|
||||
}
|
||||
bool is_sleeping_val(kmp_uint64 old_loc) {
|
||||
return old_loc & KMP_BARRIER_SLEEP_STATE;
|
||||
}
|
||||
bool is_sleeping() { return is_sleeping_val(*get()); }
|
||||
bool is_any_sleeping() { return is_sleeping_val(*get()); }
|
||||
void wait(kmp_info_t *this_thr, int final_spin) {
|
||||
if (final_spin)
|
||||
__kmp_wait_template<kmp_flag_oncore, TRUE>(
|
||||
|
@ -1008,39 +1038,27 @@ public:
|
|||
thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
|
||||
#endif
|
||||
}
|
||||
kmp_uint8 *get_stolen() { return NULL; }
|
||||
enum barrier_type get_bt() { return bt; }
|
||||
flag_type get_ptr_type() { return flag_oncore; }
|
||||
};
|
||||
|
||||
static inline void __kmp_null_resume_wrapper(kmp_info_t *thr) {
|
||||
int gtid = __kmp_gtid_from_thread(thr);
|
||||
void *flag = CCAST(void *, thr->th.th_sleep_loc);
|
||||
flag_type type = thr->th.th_sleep_loc_type;
|
||||
// Used to wake up threads, volatile void* flag is usually the th_sleep_loc
|
||||
// associated with int gtid.
|
||||
static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
|
||||
if (!flag)
|
||||
return;
|
||||
// Attempt to wake up a thread: examine its type and call appropriate template
|
||||
switch (type) {
|
||||
|
||||
switch (RCAST(kmp_flag_64<> *, CCAST(void *, flag))->get_type()) {
|
||||
case flag32:
|
||||
__kmp_resume_32(gtid, RCAST(kmp_flag_32<> *, flag));
|
||||
__kmp_resume_32(gtid, (kmp_flag_32<> *)NULL);
|
||||
break;
|
||||
case flag64:
|
||||
__kmp_resume_64(gtid, RCAST(kmp_flag_64<> *, flag));
|
||||
break;
|
||||
case atomic_flag64:
|
||||
__kmp_atomic_resume_64(gtid, RCAST(kmp_atomic_flag_64<> *, flag));
|
||||
__kmp_resume_64(gtid, (kmp_flag_64<> *)NULL);
|
||||
break;
|
||||
case flag_oncore:
|
||||
__kmp_resume_oncore(gtid, RCAST(kmp_flag_oncore *, flag));
|
||||
__kmp_resume_oncore(gtid, (kmp_flag_oncore *)NULL);
|
||||
break;
|
||||
#ifdef KMP_DEBUG
|
||||
case flag_unset:
|
||||
KF_TRACE(100, ("__kmp_null_resume_wrapper: flag type %d is unset\n", type));
|
||||
break;
|
||||
default:
|
||||
KF_TRACE(100, ("__kmp_null_resume_wrapper: flag type %d does not match any "
|
||||
"known flag type\n",
|
||||
type));
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1409,13 +1409,9 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
|
|||
/* TODO: shouldn't this use release semantics to ensure that
|
||||
__kmp_suspend_initialize_thread gets called first? */
|
||||
old_spin = flag->set_sleeping();
|
||||
TCW_PTR(th->th.th_sleep_loc, (void *)flag);
|
||||
th->th.th_sleep_loc_type = flag->get_type();
|
||||
if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
|
||||
__kmp_pause_status != kmp_soft_paused) {
|
||||
flag->unset_sleeping();
|
||||
TCW_PTR(th->th.th_sleep_loc, NULL);
|
||||
th->th.th_sleep_loc_type = flag_unset;
|
||||
__kmp_unlock_suspend_mx(th);
|
||||
return;
|
||||
}
|
||||
|
@ -1423,10 +1419,8 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
|
|||
" was %x\n",
|
||||
th_gtid, flag->get(), flag->load(), old_spin));
|
||||
|
||||
if (flag->done_check_val(old_spin) || flag->done_check()) {
|
||||
flag->unset_sleeping();
|
||||
TCW_PTR(th->th.th_sleep_loc, NULL);
|
||||
th->th.th_sleep_loc_type = flag_unset;
|
||||
if (flag->done_check_val(old_spin)) {
|
||||
old_spin = flag->unset_sleeping();
|
||||
KF_TRACE(5, ("__kmp_suspend_template: T#%d false alarm, reset sleep bit "
|
||||
"for spin(%p)\n",
|
||||
th_gtid, flag->get()));
|
||||
|
@ -1435,6 +1429,7 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
|
|||
"with low probability" return when the condition variable has
|
||||
not been signaled or broadcast */
|
||||
int deactivated = FALSE;
|
||||
TCW_PTR(th->th.th_sleep_loc, (void *)flag);
|
||||
|
||||
while (flag->is_sleeping()) {
|
||||
#ifdef DEBUG_SUSPEND
|
||||
|
@ -1456,9 +1451,6 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
|
|||
deactivated = TRUE;
|
||||
}
|
||||
|
||||
KMP_DEBUG_ASSERT(th->th.th_sleep_loc);
|
||||
KMP_DEBUG_ASSERT(flag->get_type() == th->th.th_sleep_loc_type);
|
||||
|
||||
#if USE_SUSPEND_TIMEOUT
|
||||
struct timespec now;
|
||||
struct timeval tval;
|
||||
|
@ -1488,18 +1480,6 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
|
|||
if ((status != 0) && (status != EINTR) && (status != ETIMEDOUT)) {
|
||||
KMP_SYSFAIL("pthread_cond_wait", status);
|
||||
}
|
||||
|
||||
KMP_DEBUG_ASSERT(flag->get_type() == flag->get_ptr_type());
|
||||
|
||||
if (!flag->is_sleeping() &&
|
||||
((status == EINTR) || (status == ETIMEDOUT))) {
|
||||
// if interrupt or timeout, and thread is no longer sleeping, we need to
|
||||
// make sure sleep_loc gets reset; however, this shouldn't be needed if
|
||||
// we woke up with resume
|
||||
flag->unset_sleeping();
|
||||
TCW_PTR(th->th.th_sleep_loc, NULL);
|
||||
th->th.th_sleep_loc_type = flag_unset;
|
||||
}
|
||||
#ifdef KMP_DEBUG
|
||||
if (status == ETIMEDOUT) {
|
||||
if (flag->is_sleeping()) {
|
||||
|
@ -1509,8 +1489,6 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
|
|||
KF_TRACE(2, ("__kmp_suspend_template: T#%d timeout wakeup, sleep bit "
|
||||
"not set!\n",
|
||||
th_gtid));
|
||||
TCW_PTR(th->th.th_sleep_loc, NULL);
|
||||
th->th.th_sleep_loc_type = flag_unset;
|
||||
}
|
||||
} else if (flag->is_sleeping()) {
|
||||
KF_TRACE(100,
|
||||
|
@ -1528,13 +1506,6 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
|
|||
}
|
||||
}
|
||||
}
|
||||
// We may have had the loop variable set before entering the loop body;
|
||||
// so we need to reset sleep_loc.
|
||||
TCW_PTR(th->th.th_sleep_loc, NULL);
|
||||
th->th.th_sleep_loc_type = flag_unset;
|
||||
|
||||
KMP_DEBUG_ASSERT(!flag->is_sleeping());
|
||||
KMP_DEBUG_ASSERT(!th->th.th_sleep_loc);
|
||||
#ifdef DEBUG_SUSPEND
|
||||
{
|
||||
char buffer[128];
|
||||
|
@ -1556,10 +1527,6 @@ template <bool C, bool S>
|
|||
void __kmp_suspend_64(int th_gtid, kmp_flag_64<C, S> *flag) {
|
||||
__kmp_suspend_template(th_gtid, flag);
|
||||
}
|
||||
template <bool C, bool S>
|
||||
void __kmp_atomic_suspend_64(int th_gtid, kmp_atomic_flag_64<C, S> *flag) {
|
||||
__kmp_suspend_template(th_gtid, flag);
|
||||
}
|
||||
void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {
|
||||
__kmp_suspend_template(th_gtid, flag);
|
||||
}
|
||||
|
@ -1567,10 +1534,6 @@ void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {
|
|||
template void __kmp_suspend_32<false, false>(int, kmp_flag_32<false, false> *);
|
||||
template void __kmp_suspend_64<false, true>(int, kmp_flag_64<false, true> *);
|
||||
template void __kmp_suspend_64<true, false>(int, kmp_flag_64<true, false> *);
|
||||
template void
|
||||
__kmp_atomic_suspend_64<false, true>(int, kmp_atomic_flag_64<false, true> *);
|
||||
template void
|
||||
__kmp_atomic_suspend_64<true, false>(int, kmp_atomic_flag_64<true, false> *);
|
||||
|
||||
/* This routine signals the thread specified by target_gtid to wake up
|
||||
after setting the sleep bit indicated by the flag argument to FALSE.
|
||||
|
@ -1593,50 +1556,36 @@ static inline void __kmp_resume_template(int target_gtid, C *flag) {
|
|||
|
||||
__kmp_lock_suspend_mx(th);
|
||||
|
||||
if (!flag || flag != th->th.th_sleep_loc) {
|
||||
// coming from __kmp_null_resume_wrapper, or thread is now sleeping on a
|
||||
// different location; wake up at new location
|
||||
if (!flag) { // coming from __kmp_null_resume_wrapper
|
||||
flag = (C *)CCAST(void *, th->th.th_sleep_loc);
|
||||
}
|
||||
|
||||
// First, check if the flag is null or its type has changed. If so, someone
|
||||
// else woke it up.
|
||||
if (!flag) { // Thread doesn't appear to be sleeping on anything
|
||||
if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type
|
||||
// simply shows what flag was cast to
|
||||
KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
|
||||
"awake: flag(%p)\n",
|
||||
gtid, target_gtid, (void *)NULL));
|
||||
gtid, target_gtid, NULL));
|
||||
__kmp_unlock_suspend_mx(th);
|
||||
return;
|
||||
} else if (flag->get_type() != th->th.th_sleep_loc_type) {
|
||||
// Flag type does not appear to match this function template; possibly the
|
||||
// thread is sleeping on something else. Try null resume again.
|
||||
KF_TRACE(
|
||||
5,
|
||||
("__kmp_resume_template: T#%d retrying, thread T#%d Mismatch flag(%p), "
|
||||
"spin(%p) type=%d ptr_type=%d\n",
|
||||
gtid, target_gtid, flag, flag->get(), flag->get_type(),
|
||||
th->th.th_sleep_loc_type));
|
||||
__kmp_unlock_suspend_mx(th);
|
||||
__kmp_null_resume_wrapper(th);
|
||||
return;
|
||||
} else { // if multiple threads are sleeping, flag should be internally
|
||||
// referring to a specific thread here
|
||||
if (!flag->is_sleeping()) {
|
||||
typename C::flag_t old_spin = flag->unset_sleeping();
|
||||
if (!flag->is_sleeping_val(old_spin)) {
|
||||
KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
|
||||
"awake: flag(%p): %u\n",
|
||||
gtid, target_gtid, flag->get(), (unsigned int)flag->load()));
|
||||
"awake: flag(%p): "
|
||||
"%u => %u\n",
|
||||
gtid, target_gtid, flag->get(), old_spin, flag->load()));
|
||||
__kmp_unlock_suspend_mx(th);
|
||||
return;
|
||||
}
|
||||
KF_TRACE(5, ("__kmp_resume_template: T#%d about to wakeup T#%d, reset "
|
||||
"sleep bit for flag's loc(%p): "
|
||||
"%u => %u\n",
|
||||
gtid, target_gtid, flag->get(), old_spin, flag->load()));
|
||||
}
|
||||
KMP_DEBUG_ASSERT(flag);
|
||||
flag->unset_sleeping();
|
||||
TCW_PTR(th->th.th_sleep_loc, NULL);
|
||||
th->th.th_sleep_loc_type = flag_unset;
|
||||
|
||||
KF_TRACE(5, ("__kmp_resume_template: T#%d about to wakeup T#%d, reset "
|
||||
"sleep bit for flag's loc(%p): %u\n",
|
||||
gtid, target_gtid, flag->get(), (unsigned int)flag->load()));
|
||||
|
||||
#ifdef DEBUG_SUSPEND
|
||||
{
|
||||
|
@ -1662,19 +1611,12 @@ template <bool C, bool S>
|
|||
void __kmp_resume_64(int target_gtid, kmp_flag_64<C, S> *flag) {
|
||||
__kmp_resume_template(target_gtid, flag);
|
||||
}
|
||||
template <bool C, bool S>
|
||||
void __kmp_atomic_resume_64(int target_gtid, kmp_atomic_flag_64<C, S> *flag) {
|
||||
__kmp_resume_template(target_gtid, flag);
|
||||
}
|
||||
void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) {
|
||||
__kmp_resume_template(target_gtid, flag);
|
||||
}
|
||||
|
||||
template void __kmp_resume_32<false, true>(int, kmp_flag_32<false, true> *);
|
||||
template void __kmp_resume_32<false, false>(int, kmp_flag_32<false, false> *);
|
||||
template void __kmp_resume_64<false, true>(int, kmp_flag_64<false, true> *);
|
||||
template void
|
||||
__kmp_atomic_resume_64<false, true>(int, kmp_atomic_flag_64<false, true> *);
|
||||
|
||||
#if KMP_USE_MONITOR
|
||||
void __kmp_resume_monitor() {
|
||||
|
|
|
@ -240,12 +240,13 @@ static void __kmp_win32_cond_wait(kmp_win32_cond_t *cv, kmp_win32_mutex_t *mx,
|
|||
continue;
|
||||
}
|
||||
// condition fulfilled, exiting
|
||||
flag->unset_sleeping();
|
||||
old_f = flag->unset_sleeping();
|
||||
KMP_DEBUG_ASSERT(old_f & KMP_BARRIER_SLEEP_STATE);
|
||||
TCW_PTR(th->th.th_sleep_loc, NULL);
|
||||
th->th.th_sleep_loc_type = flag_unset;
|
||||
KF_TRACE(50, ("__kmp_win32_cond_wait: exiting, condition "
|
||||
"fulfilled: flag's loc(%p): %u\n",
|
||||
flag->get(), (unsigned int)flag->load()));
|
||||
KF_TRACE(50,
|
||||
("__kmp_win32_cond_wait: exiting, condition "
|
||||
"fulfilled: flag's loc(%p): %u => %u\n",
|
||||
flag->get(), (unsigned int)old_f, (unsigned int)flag->load()));
|
||||
|
||||
__kmp_win32_mutex_lock(&cv->waiters_count_lock_);
|
||||
KMP_DEBUG_ASSERT(cv->waiters_count_ > 0);
|
||||
|
@ -375,13 +376,9 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
|
|||
/* TODO: shouldn't this use release semantics to ensure that
|
||||
__kmp_suspend_initialize_thread gets called first? */
|
||||
old_spin = flag->set_sleeping();
|
||||
TCW_PTR(th->th.th_sleep_loc, (void *)flag);
|
||||
th->th.th_sleep_loc_type = flag->get_type();
|
||||
if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
|
||||
__kmp_pause_status != kmp_soft_paused) {
|
||||
flag->unset_sleeping();
|
||||
TCW_PTR(th->th.th_sleep_loc, NULL);
|
||||
th->th.th_sleep_loc_type = flag_unset;
|
||||
__kmp_unlock_suspend_mx(th);
|
||||
return;
|
||||
}
|
||||
|
@ -390,10 +387,8 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
|
|||
" loc(%p)==%u\n",
|
||||
th_gtid, flag->get(), (unsigned int)flag->load()));
|
||||
|
||||
if (flag->done_check_val(old_spin) || flag->done_check()) {
|
||||
flag->unset_sleeping();
|
||||
TCW_PTR(th->th.th_sleep_loc, NULL);
|
||||
th->th.th_sleep_loc_type = flag_unset;
|
||||
if (flag->done_check_val(old_spin)) {
|
||||
old_spin = flag->unset_sleeping();
|
||||
KF_TRACE(5, ("__kmp_suspend_template: T#%d false alarm, reset sleep bit "
|
||||
"for flag's loc(%p)\n",
|
||||
th_gtid, flag->get()));
|
||||
|
@ -405,7 +400,7 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
|
|||
low probability" return when the condition variable has not been signaled
|
||||
or broadcast */
|
||||
int deactivated = FALSE;
|
||||
|
||||
TCW_PTR(th->th.th_sleep_loc, (void *)flag);
|
||||
while (flag->is_sleeping()) {
|
||||
KF_TRACE(15, ("__kmp_suspend_template: T#%d about to perform "
|
||||
"kmp_win32_cond_wait()\n",
|
||||
|
@ -420,14 +415,13 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
|
|||
KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
|
||||
}
|
||||
deactivated = TRUE;
|
||||
__kmp_win32_cond_wait(&th->th.th_suspend_cv, &th->th.th_suspend_mx, th,
|
||||
flag);
|
||||
} else {
|
||||
__kmp_win32_cond_wait(&th->th.th_suspend_cv, &th->th.th_suspend_mx, th,
|
||||
flag);
|
||||
}
|
||||
|
||||
KMP_DEBUG_ASSERT(th->th.th_sleep_loc);
|
||||
KMP_DEBUG_ASSERT(th->th.th_sleep_loc_type == flag->get_type());
|
||||
|
||||
__kmp_win32_cond_wait(&th->th.th_suspend_cv, &th->th.th_suspend_mx, th,
|
||||
flag);
|
||||
|
||||
#ifdef KMP_DEBUG
|
||||
if (flag->is_sleeping()) {
|
||||
KF_TRACE(100,
|
||||
|
@ -437,14 +431,6 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
|
|||
|
||||
} // while
|
||||
|
||||
// We may have had the loop variable set before entering the loop body;
|
||||
// so we need to reset sleep_loc.
|
||||
TCW_PTR(th->th.th_sleep_loc, NULL);
|
||||
th->th.th_sleep_loc_type = flag_unset;
|
||||
|
||||
KMP_DEBUG_ASSERT(!flag->is_sleeping());
|
||||
KMP_DEBUG_ASSERT(!th->th.th_sleep_loc);
|
||||
|
||||
// Mark the thread as active again (if it was previous marked as inactive)
|
||||
if (deactivated) {
|
||||
th->th.th_active = TRUE;
|
||||
|
@ -467,10 +453,6 @@ template <bool C, bool S>
|
|||
void __kmp_suspend_64(int th_gtid, kmp_flag_64<C, S> *flag) {
|
||||
__kmp_suspend_template(th_gtid, flag);
|
||||
}
|
||||
template <bool C, bool S>
|
||||
void __kmp_atomic_suspend_64(int th_gtid, kmp_atomic_flag_64<C, S> *flag) {
|
||||
__kmp_suspend_template(th_gtid, flag);
|
||||
}
|
||||
void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {
|
||||
__kmp_suspend_template(th_gtid, flag);
|
||||
}
|
||||
|
@ -478,10 +460,6 @@ void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {
|
|||
template void __kmp_suspend_32<false, false>(int, kmp_flag_32<false, false> *);
|
||||
template void __kmp_suspend_64<false, true>(int, kmp_flag_64<false, true> *);
|
||||
template void __kmp_suspend_64<true, false>(int, kmp_flag_64<true, false> *);
|
||||
template void
|
||||
__kmp_atomic_suspend_64<false, true>(int, kmp_atomic_flag_64<false, true> *);
|
||||
template void
|
||||
__kmp_atomic_suspend_64<true, false>(int, kmp_atomic_flag_64<true, false> *);
|
||||
|
||||
/* This routine signals the thread specified by target_gtid to wake up
|
||||
after setting the sleep bit indicated by the flag argument to FALSE */
|
||||
|
@ -499,35 +477,32 @@ static inline void __kmp_resume_template(int target_gtid, C *flag) {
|
|||
__kmp_suspend_initialize_thread(th);
|
||||
__kmp_lock_suspend_mx(th);
|
||||
|
||||
if (!flag || flag != th->th.th_sleep_loc) {
|
||||
// coming from __kmp_null_resume_wrapper, or thread is now sleeping on a
|
||||
// different location; wake up at new location
|
||||
if (!flag) { // coming from __kmp_null_resume_wrapper
|
||||
flag = (C *)th->th.th_sleep_loc;
|
||||
}
|
||||
|
||||
// First, check if the flag is null or its type has changed. If so, someone
|
||||
// else woke it up.
|
||||
if (!flag || flag->get_type() != th->th.th_sleep_loc_type) {
|
||||
// simply shows what flag was cast to
|
||||
if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type
|
||||
// simply shows what
|
||||
// flag was cast to
|
||||
KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
|
||||
"awake: flag's loc(%p)\n",
|
||||
gtid, target_gtid, NULL));
|
||||
__kmp_unlock_suspend_mx(th);
|
||||
return;
|
||||
} else {
|
||||
if (!flag->is_sleeping()) {
|
||||
typename C::flag_t old_spin = flag->unset_sleeping();
|
||||
if (!flag->is_sleeping_val(old_spin)) {
|
||||
KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
|
||||
"awake: flag's loc(%p): %u\n",
|
||||
gtid, target_gtid, flag->get(), (unsigned int)flag->load()));
|
||||
"awake: flag's loc(%p): %u => %u\n",
|
||||
gtid, target_gtid, flag->get(), (unsigned int)old_spin,
|
||||
(unsigned int)flag->load()));
|
||||
__kmp_unlock_suspend_mx(th);
|
||||
return;
|
||||
}
|
||||
}
|
||||
KMP_DEBUG_ASSERT(flag);
|
||||
flag->unset_sleeping();
|
||||
TCW_PTR(th->th.th_sleep_loc, NULL);
|
||||
th->th.th_sleep_loc_type = flag_unset;
|
||||
|
||||
KF_TRACE(5, ("__kmp_resume_template: T#%d about to wakeup T#%d, reset sleep "
|
||||
"bit for flag's loc(%p)\n",
|
||||
gtid, target_gtid, flag->get()));
|
||||
|
@ -548,19 +523,12 @@ template <bool C, bool S>
|
|||
void __kmp_resume_64(int target_gtid, kmp_flag_64<C, S> *flag) {
|
||||
__kmp_resume_template(target_gtid, flag);
|
||||
}
|
||||
template <bool C, bool S>
|
||||
void __kmp_atomic_resume_64(int target_gtid, kmp_atomic_flag_64<C, S> *flag) {
|
||||
__kmp_resume_template(target_gtid, flag);
|
||||
}
|
||||
void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) {
|
||||
__kmp_resume_template(target_gtid, flag);
|
||||
}
|
||||
|
||||
template void __kmp_resume_32<false, true>(int, kmp_flag_32<false, true> *);
|
||||
template void __kmp_resume_32<false, false>(int, kmp_flag_32<false, false> *);
|
||||
template void __kmp_resume_64<false, true>(int, kmp_flag_64<false, true> *);
|
||||
template void
|
||||
__kmp_atomic_resume_64<false, true>(int, kmp_atomic_flag_64<false, true> *);
|
||||
|
||||
void __kmp_yield() { Sleep(0); }
|
||||
|
||||
|
|
|
@ -2,8 +2,6 @@
|
|||
// RUN: %libomp-compile && env KMP_BLOCKTIME=infinite %libomp-run
|
||||
// RUN: %libomp-compile && env KMP_PLAIN_BARRIER_PATTERN='hierarchical,hierarchical' KMP_FORKJOIN_BARRIER_PATTERN='hierarchical,hierarchical' %libomp-run
|
||||
// RUN: %libomp-compile && env KMP_BLOCKTIME=infinite KMP_PLAIN_BARRIER_PATTERN='hierarchical,hierarchical' KMP_FORKJOIN_BARRIER_PATTERN='hierarchical,hierarchical' %libomp-run
|
||||
// RUN: %libomp-compile && env KMP_PLAIN_BARRIER_PATTERN='dist,dist' KMP_FORKJOIN_BARRIER_PATTERN='dist,dist' KMP_REDUCTION_BARRIER_PATTERN='dist,dist' %libomp-run
|
||||
// RUN: %libomp-compile && env KMP_BLOCKTIME=infinite KMP_PLAIN_BARRIER_PATTERN='dist,dist' KMP_FORKJOIN_BARRIER_PATTERN='dist,dist' KMP_REDUCTION_BARRIER_PATTERN='dist,dist' %libomp-run
|
||||
#include <stdio.h>
|
||||
#include "omp_testsuite.h"
|
||||
#include "omp_my_sleep.h"
|
||||
|
|
Loading…
Reference in New Issue