forked from OSchip/llvm-project
Revert "[OpenMP] Add support for Intel's umonitor/umwait"
This reverts commit 9cfad5f9c5
.
This commit is contained in:
parent
0c101c9cbc
commit
5644f734d6
|
@ -417,8 +417,6 @@ AffUsingHwloc "%1$s: Affinity capable, using hwloc."
|
||||||
AffIgnoringHwloc "%1$s: Ignoring hwloc mechanism."
|
AffIgnoringHwloc "%1$s: Ignoring hwloc mechanism."
|
||||||
AffHwlocErrorOccurred "%1$s: Hwloc failed in %2$s. Relying on internal affinity mechanisms."
|
AffHwlocErrorOccurred "%1$s: Hwloc failed in %2$s. Relying on internal affinity mechanisms."
|
||||||
EnvSerialWarn "%1$s must be set prior to OpenMP runtime library initialization; ignored."
|
EnvSerialWarn "%1$s must be set prior to OpenMP runtime library initialization; ignored."
|
||||||
EnvMwaitWarn "You have enabled the use of umonitor/umwait. If the CPU doesn't have that enabled "
|
|
||||||
"you'll get an illegal instruction exception."
|
|
||||||
EnvVarDeprecated "%1$s variable deprecated, please use %2$s instead."
|
EnvVarDeprecated "%1$s variable deprecated, please use %2$s instead."
|
||||||
RedMethodNotSupported "KMP_FORCE_REDUCTION: %1$s method is not supported; using critical."
|
RedMethodNotSupported "KMP_FORCE_REDUCTION: %1$s method is not supported; using critical."
|
||||||
AffHWSubsetNoHWLOC "KMP_HW_SUBSET ignored: unsupported item requested for non-HWLOC topology method (KMP_TOPOLOGY_METHOD)"
|
AffHWSubsetNoHWLOC "KMP_HW_SUBSET ignored: unsupported item requested for non-HWLOC topology method (KMP_TOPOLOGY_METHOD)"
|
||||||
|
|
|
@ -255,10 +255,6 @@ typedef union kmp_team kmp_team_p;
|
||||||
typedef union kmp_info kmp_info_p;
|
typedef union kmp_info kmp_info_p;
|
||||||
typedef union kmp_root kmp_root_p;
|
typedef union kmp_root kmp_root_p;
|
||||||
|
|
||||||
template <bool C = false, bool S = true> class kmp_flag_32;
|
|
||||||
template <bool C = false, bool S = true> class kmp_flag_64;
|
|
||||||
class kmp_flag_oncore;
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
@ -1322,96 +1318,6 @@ static inline void __kmp_x86_pause(void) { _mm_pause(); }
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
// User-level Monitor/Mwait
|
|
||||||
#if KMP_HAVE_UMWAIT
|
|
||||||
// We always try for UMWAIT first
|
|
||||||
#if (KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300) || \
|
|
||||||
(KMP_COMPILER_MSVC && _MSC_VER >= 1700) || \
|
|
||||||
(KMP_COMPILER_CLANG && (KMP_MSVC_COMPAT || __MINGW32__)) || \
|
|
||||||
(KMP_COMPILER_GCC && __MINGW32__)
|
|
||||||
#if KMP_OS_UNIX
|
|
||||||
#include <immintrin.h>
|
|
||||||
#else
|
|
||||||
#include <intrin.h>
|
|
||||||
#endif // KMP_OS_UNIX
|
|
||||||
#else
|
|
||||||
#define USE_MWAIT_ASM \
|
|
||||||
KMP_OS_UNIX && (!KMP_COMPILER_ICC || __INTEL_COMPILER < 1900)
|
|
||||||
#endif // KMP_COMPILER_ICC etc.
|
|
||||||
#if KMP_OS_UNIX && 0 // "waitpkg" not recognized yet
|
|
||||||
__attribute__((target("waitpkg")))
|
|
||||||
#endif
|
|
||||||
static inline int
|
|
||||||
__kmp_tpause(uint32_t hint, uint64_t counter) {
|
|
||||||
#if (USE_MWAIT_ASM)
|
|
||||||
uint32_t timeHi = uint32_t(counter >> 32);
|
|
||||||
uint32_t timeLo = uint32_t(counter & 0xffffffff);
|
|
||||||
char flag;
|
|
||||||
__asm__ volatile("#tpause\n.byte 0x66, 0x0F, 0xAE, 0xF1\n"
|
|
||||||
"setb %0"
|
|
||||||
: "=r"(flag)
|
|
||||||
: "a"(timeLo), "d"(timeHi), "c"(hint)
|
|
||||||
:);
|
|
||||||
return flag;
|
|
||||||
#else
|
|
||||||
return _tpause(hint, counter);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
#if KMP_OS_UNIX && 0 // "waitpkg" not recognized on our build machine
|
|
||||||
__attribute__((target("waitpkg")))
|
|
||||||
#endif
|
|
||||||
static inline void
|
|
||||||
__kmp_umonitor(void *cacheline) {
|
|
||||||
#if (USE_MWAIT_ASM)
|
|
||||||
__asm__ volatile("# umonitor\n.byte 0xF3, 0x0F, 0xAE, 0x01 "
|
|
||||||
:
|
|
||||||
: "a"(cacheline)
|
|
||||||
:);
|
|
||||||
#else
|
|
||||||
_umonitor(cacheline);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
#if KMP_OS_UNIX && 0 // "waitpkg" not recognized on our build machine
|
|
||||||
__attribute__((target("waitpkg")))
|
|
||||||
#endif
|
|
||||||
static inline int
|
|
||||||
__kmp_umwait(uint32_t hint, uint64_t counter) {
|
|
||||||
#if (USE_MWAIT_ASM)
|
|
||||||
uint32_t timeHi = uint32_t(counter >> 32);
|
|
||||||
uint32_t timeLo = uint32_t(counter & 0xffffffff);
|
|
||||||
char flag;
|
|
||||||
__asm__ volatile("#umwait\n.byte 0xF2, 0x0F, 0xAE, 0xF1\n"
|
|
||||||
"setb %0"
|
|
||||||
: "=r"(flag)
|
|
||||||
: "a"(timeLo), "d"(timeHi), "c"(hint)
|
|
||||||
:);
|
|
||||||
return flag;
|
|
||||||
#else
|
|
||||||
return _umwait(hint, counter);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
#elif KMP_HAVE_MWAIT
|
|
||||||
#if KMP_OS_UNIX
|
|
||||||
#include <pmmintrin.h>
|
|
||||||
#else
|
|
||||||
#include <intrin.h>
|
|
||||||
#endif
|
|
||||||
#if KMP_OS_UNIX
|
|
||||||
__attribute__((target("sse3")))
|
|
||||||
#endif
|
|
||||||
static inline void
|
|
||||||
__kmp_mm_monitor(void *cacheline, unsigned extensions, unsigned hints) {
|
|
||||||
_mm_monitor(cacheline, extensions, hints);
|
|
||||||
}
|
|
||||||
#if KMP_OS_UNIX
|
|
||||||
__attribute__((target("sse3")))
|
|
||||||
#endif
|
|
||||||
static inline void
|
|
||||||
__kmp_mm_mwait(unsigned extensions, unsigned hints) {
|
|
||||||
_mm_mwait(extensions, hints);
|
|
||||||
}
|
|
||||||
#endif // KMP_HAVE_UMWAIT
|
|
||||||
|
|
||||||
/* ------------------------------------------------------------------------ */
|
/* ------------------------------------------------------------------------ */
|
||||||
/* Support datatypes for the orphaned construct nesting checks. */
|
/* Support datatypes for the orphaned construct nesting checks. */
|
||||||
/* ------------------------------------------------------------------------ */
|
/* ------------------------------------------------------------------------ */
|
||||||
|
@ -3188,13 +3094,6 @@ static inline void __kmp_assert_valid_gtid(kmp_int32 gtid) {
|
||||||
KMP_FATAL(ThreadIdentInvalid);
|
KMP_FATAL(ThreadIdentInvalid);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
|
|
||||||
extern int __kmp_user_level_mwait; // TRUE or FALSE; from KMP_USER_LEVEL_MWAIT
|
|
||||||
extern int __kmp_umwait_enabled; // Runtime check if user-level mwait enabled
|
|
||||||
extern int __kmp_mwait_enabled; // Runtime check if ring3 mwait is enabled
|
|
||||||
extern int __kmp_mwait_hints; // Hints to pass in to mwait
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* ------------------------------------------------------------------------- */
|
/* ------------------------------------------------------------------------- */
|
||||||
|
|
||||||
extern kmp_global_t __kmp_global; /* global status */
|
extern kmp_global_t __kmp_global; /* global status */
|
||||||
|
@ -3396,14 +3295,17 @@ extern kmp_uint32 __kmp_wait_4(kmp_uint32 volatile *spinner, kmp_uint32 checker,
|
||||||
extern void __kmp_wait_4_ptr(void *spinner, kmp_uint32 checker,
|
extern void __kmp_wait_4_ptr(void *spinner, kmp_uint32 checker,
|
||||||
kmp_uint32 (*pred)(void *, kmp_uint32), void *obj);
|
kmp_uint32 (*pred)(void *, kmp_uint32), void *obj);
|
||||||
|
|
||||||
extern void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64<> *flag,
|
class kmp_flag_32;
|
||||||
|
class kmp_flag_64;
|
||||||
|
class kmp_flag_oncore;
|
||||||
|
extern void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag,
|
||||||
int final_spin
|
int final_spin
|
||||||
#if USE_ITT_BUILD
|
#if USE_ITT_BUILD
|
||||||
,
|
,
|
||||||
void *itt_sync_obj
|
void *itt_sync_obj
|
||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
extern void __kmp_release_64(kmp_flag_64<> *flag);
|
extern void __kmp_release_64(kmp_flag_64 *flag);
|
||||||
|
|
||||||
extern void __kmp_infinite_loop(void);
|
extern void __kmp_infinite_loop(void);
|
||||||
|
|
||||||
|
@ -3501,6 +3403,13 @@ extern int __kmp_try_suspend_mx(kmp_info_t *th);
|
||||||
extern void __kmp_lock_suspend_mx(kmp_info_t *th);
|
extern void __kmp_lock_suspend_mx(kmp_info_t *th);
|
||||||
extern void __kmp_unlock_suspend_mx(kmp_info_t *th);
|
extern void __kmp_unlock_suspend_mx(kmp_info_t *th);
|
||||||
|
|
||||||
|
extern void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag);
|
||||||
|
extern void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag);
|
||||||
|
extern void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag);
|
||||||
|
extern void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag);
|
||||||
|
extern void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag);
|
||||||
|
extern void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag);
|
||||||
|
|
||||||
extern void __kmp_elapsed(double *);
|
extern void __kmp_elapsed(double *);
|
||||||
extern void __kmp_elapsed_tick(double *);
|
extern void __kmp_elapsed_tick(double *);
|
||||||
|
|
||||||
|
@ -3625,6 +3534,28 @@ extern kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
|
||||||
kmp_task_t *task);
|
kmp_task_t *task);
|
||||||
extern void __kmp_fulfill_event(kmp_event_t *event);
|
extern void __kmp_fulfill_event(kmp_event_t *event);
|
||||||
|
|
||||||
|
int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid,
|
||||||
|
kmp_flag_32 *flag, int final_spin,
|
||||||
|
int *thread_finished,
|
||||||
|
#if USE_ITT_BUILD
|
||||||
|
void *itt_sync_obj,
|
||||||
|
#endif /* USE_ITT_BUILD */
|
||||||
|
kmp_int32 is_constrained);
|
||||||
|
int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid,
|
||||||
|
kmp_flag_64 *flag, int final_spin,
|
||||||
|
int *thread_finished,
|
||||||
|
#if USE_ITT_BUILD
|
||||||
|
void *itt_sync_obj,
|
||||||
|
#endif /* USE_ITT_BUILD */
|
||||||
|
kmp_int32 is_constrained);
|
||||||
|
int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid,
|
||||||
|
kmp_flag_oncore *flag, int final_spin,
|
||||||
|
int *thread_finished,
|
||||||
|
#if USE_ITT_BUILD
|
||||||
|
void *itt_sync_obj,
|
||||||
|
#endif /* USE_ITT_BUILD */
|
||||||
|
kmp_int32 is_constrained);
|
||||||
|
|
||||||
extern void __kmp_free_task_team(kmp_info_t *thread,
|
extern void __kmp_free_task_team(kmp_info_t *thread,
|
||||||
kmp_task_team_t *task_team);
|
kmp_task_team_t *task_team);
|
||||||
extern void __kmp_reap_task_teams(void);
|
extern void __kmp_reap_task_teams(void);
|
||||||
|
@ -3988,46 +3919,4 @@ extern void __kmp_omp_display_env(int verbose);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
template <bool C, bool S>
|
|
||||||
extern void __kmp_suspend_32(int th_gtid, kmp_flag_32<C, S> *flag);
|
|
||||||
template <bool C, bool S>
|
|
||||||
extern void __kmp_suspend_64(int th_gtid, kmp_flag_64<C, S> *flag);
|
|
||||||
extern void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag);
|
|
||||||
template <bool C, bool S>
|
|
||||||
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
|
|
||||||
extern void __kmp_mwait_32(int th_gtid, kmp_flag_32<C, S> *flag);
|
|
||||||
template <bool C, bool S>
|
|
||||||
extern void __kmp_mwait_64(int th_gtid, kmp_flag_64<C, S> *flag);
|
|
||||||
extern void __kmp_mwait_oncore(int th_gtid, kmp_flag_oncore *flag);
|
|
||||||
template <bool C, bool S>
|
|
||||||
#endif
|
|
||||||
extern void __kmp_resume_32(int target_gtid, kmp_flag_32<C, S> *flag);
|
|
||||||
template <bool C, bool S>
|
|
||||||
extern void __kmp_resume_64(int target_gtid, kmp_flag_64<C, S> *flag);
|
|
||||||
extern void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag);
|
|
||||||
|
|
||||||
template <bool C, bool S>
|
|
||||||
int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid,
|
|
||||||
kmp_flag_32<C, S> *flag, int final_spin,
|
|
||||||
int *thread_finished,
|
|
||||||
#if USE_ITT_BUILD
|
|
||||||
void *itt_sync_obj,
|
|
||||||
#endif /* USE_ITT_BUILD */
|
|
||||||
kmp_int32 is_constrained);
|
|
||||||
template <bool C, bool S>
|
|
||||||
int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid,
|
|
||||||
kmp_flag_64<C, S> *flag, int final_spin,
|
|
||||||
int *thread_finished,
|
|
||||||
#if USE_ITT_BUILD
|
|
||||||
void *itt_sync_obj,
|
|
||||||
#endif /* USE_ITT_BUILD */
|
|
||||||
kmp_int32 is_constrained);
|
|
||||||
int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid,
|
|
||||||
kmp_flag_oncore *flag, int final_spin,
|
|
||||||
int *thread_finished,
|
|
||||||
#if USE_ITT_BUILD
|
|
||||||
void *itt_sync_obj,
|
|
||||||
#endif /* USE_ITT_BUILD */
|
|
||||||
kmp_int32 is_constrained);
|
|
||||||
|
|
||||||
#endif /* KMP_H */
|
#endif /* KMP_H */
|
||||||
|
|
|
@ -78,7 +78,7 @@ static bool __kmp_linear_barrier_gather_template(
|
||||||
is valid any more - it could be deallocated by the master thread at any
|
is valid any more - it could be deallocated by the master thread at any
|
||||||
time. */
|
time. */
|
||||||
ANNOTATE_BARRIER_BEGIN(this_thr);
|
ANNOTATE_BARRIER_BEGIN(this_thr);
|
||||||
kmp_flag_64<> flag(&thr_bar->b_arrived, other_threads[0]);
|
kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[0]);
|
||||||
flag.release();
|
flag.release();
|
||||||
} else {
|
} else {
|
||||||
kmp_balign_team_t *team_bar = &team->t.t_bar[bt];
|
kmp_balign_team_t *team_bar = &team->t.t_bar[bt];
|
||||||
|
@ -101,14 +101,14 @@ static bool __kmp_linear_barrier_gather_template(
|
||||||
&other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state));
|
&other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state));
|
||||||
|
|
||||||
// Wait for worker thread to arrive
|
// Wait for worker thread to arrive
|
||||||
|
kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_arrived,
|
||||||
|
new_state);
|
||||||
if (cancellable) {
|
if (cancellable) {
|
||||||
kmp_flag_64<true, false> flag(
|
bool cancelled = flag.wait_cancellable_nosleep(
|
||||||
&other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state);
|
this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
|
||||||
if (flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)))
|
if (cancelled)
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
kmp_flag_64<> flag(&other_threads[i]->th.th_bar[bt].bb.b_arrived,
|
|
||||||
new_state);
|
|
||||||
flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
|
flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
|
||||||
}
|
}
|
||||||
ANNOTATE_BARRIER_END(other_threads[i]);
|
ANNOTATE_BARRIER_END(other_threads[i]);
|
||||||
|
@ -203,20 +203,22 @@ static bool __kmp_linear_barrier_release_template(
|
||||||
other_threads[i]->th.th_bar[bt].bb.b_go,
|
other_threads[i]->th.th_bar[bt].bb.b_go,
|
||||||
other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP));
|
other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP));
|
||||||
ANNOTATE_BARRIER_BEGIN(other_threads[i]);
|
ANNOTATE_BARRIER_BEGIN(other_threads[i]);
|
||||||
kmp_flag_64<> flag(&other_threads[i]->th.th_bar[bt].bb.b_go,
|
kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_go,
|
||||||
other_threads[i]);
|
other_threads[i]);
|
||||||
flag.release();
|
flag.release();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else { // Wait for the MASTER thread to release us
|
} else { // Wait for the MASTER thread to release us
|
||||||
KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n",
|
KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n",
|
||||||
gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
|
gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
|
||||||
|
kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
|
||||||
if (cancellable) {
|
if (cancellable) {
|
||||||
kmp_flag_64<true, false> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
|
bool cancelled = flag.wait_cancellable_nosleep(
|
||||||
if (flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)))
|
this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
|
||||||
|
if (cancelled) {
|
||||||
return true;
|
return true;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
|
|
||||||
flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
|
flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
|
||||||
}
|
}
|
||||||
ANNOTATE_BARRIER_END(this_thr);
|
ANNOTATE_BARRIER_END(this_thr);
|
||||||
|
@ -337,7 +339,7 @@ __kmp_tree_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
|
||||||
gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
|
gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
|
||||||
team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
|
team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
|
||||||
// Wait for child to arrive
|
// Wait for child to arrive
|
||||||
kmp_flag_64<> flag(&child_bar->b_arrived, new_state);
|
kmp_flag_64 flag(&child_bar->b_arrived, new_state);
|
||||||
flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
|
flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
|
||||||
ANNOTATE_BARRIER_END(child_thr);
|
ANNOTATE_BARRIER_END(child_thr);
|
||||||
#if USE_ITT_BUILD && USE_ITT_NOTIFY
|
#if USE_ITT_BUILD && USE_ITT_NOTIFY
|
||||||
|
@ -382,7 +384,7 @@ __kmp_tree_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
|
||||||
is valid any more - it could be deallocated by the master thread at any
|
is valid any more - it could be deallocated by the master thread at any
|
||||||
time. */
|
time. */
|
||||||
ANNOTATE_BARRIER_BEGIN(this_thr);
|
ANNOTATE_BARRIER_BEGIN(this_thr);
|
||||||
kmp_flag_64<> flag(&thr_bar->b_arrived, other_threads[parent_tid]);
|
kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[parent_tid]);
|
||||||
flag.release();
|
flag.release();
|
||||||
} else {
|
} else {
|
||||||
// Need to update the team arrived pointer if we are the master thread
|
// Need to update the team arrived pointer if we are the master thread
|
||||||
|
@ -418,7 +420,7 @@ static void __kmp_tree_barrier_release(
|
||||||
KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n", gtid,
|
KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n", gtid,
|
||||||
&thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
|
&thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
|
||||||
// Wait for parent thread to release us
|
// Wait for parent thread to release us
|
||||||
kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
|
kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
|
||||||
flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
|
flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
|
||||||
ANNOTATE_BARRIER_END(this_thr);
|
ANNOTATE_BARRIER_END(this_thr);
|
||||||
#if USE_ITT_BUILD && USE_ITT_NOTIFY
|
#if USE_ITT_BUILD && USE_ITT_NOTIFY
|
||||||
|
@ -496,7 +498,7 @@ static void __kmp_tree_barrier_release(
|
||||||
child_bar->b_go + KMP_BARRIER_STATE_BUMP));
|
child_bar->b_go + KMP_BARRIER_STATE_BUMP));
|
||||||
// Release child from barrier
|
// Release child from barrier
|
||||||
ANNOTATE_BARRIER_BEGIN(child_thr);
|
ANNOTATE_BARRIER_BEGIN(child_thr);
|
||||||
kmp_flag_64<> flag(&child_bar->b_go, child_thr);
|
kmp_flag_64 flag(&child_bar->b_go, child_thr);
|
||||||
flag.release();
|
flag.release();
|
||||||
child++;
|
child++;
|
||||||
child_tid++;
|
child_tid++;
|
||||||
|
@ -538,7 +540,7 @@ __kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
|
||||||
#endif
|
#endif
|
||||||
/* Perform a hypercube-embedded tree gather to wait until all of the threads
|
/* Perform a hypercube-embedded tree gather to wait until all of the threads
|
||||||
have arrived, and reduce any required data as we go. */
|
have arrived, and reduce any required data as we go. */
|
||||||
kmp_flag_64<> p_flag(&thr_bar->b_arrived);
|
kmp_flag_64 p_flag(&thr_bar->b_arrived);
|
||||||
for (level = 0, offset = 1; offset < num_threads;
|
for (level = 0, offset = 1; offset < num_threads;
|
||||||
level += branch_bits, offset <<= branch_bits) {
|
level += branch_bits, offset <<= branch_bits) {
|
||||||
kmp_uint32 child;
|
kmp_uint32 child;
|
||||||
|
@ -586,7 +588,7 @@ __kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
|
||||||
gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
|
gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
|
||||||
team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
|
team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
|
||||||
// Wait for child to arrive
|
// Wait for child to arrive
|
||||||
kmp_flag_64<> c_flag(&child_bar->b_arrived, new_state);
|
kmp_flag_64 c_flag(&child_bar->b_arrived, new_state);
|
||||||
c_flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
|
c_flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
|
||||||
ANNOTATE_BARRIER_END(child_thr);
|
ANNOTATE_BARRIER_END(child_thr);
|
||||||
KMP_MB(); // Synchronize parent and child threads.
|
KMP_MB(); // Synchronize parent and child threads.
|
||||||
|
@ -668,7 +670,7 @@ static void __kmp_hyper_barrier_release(
|
||||||
KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n", gtid,
|
KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n", gtid,
|
||||||
&thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
|
&thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
|
||||||
// Wait for parent thread to release us
|
// Wait for parent thread to release us
|
||||||
kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
|
kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
|
||||||
flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
|
flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
|
||||||
ANNOTATE_BARRIER_END(this_thr);
|
ANNOTATE_BARRIER_END(this_thr);
|
||||||
#if USE_ITT_BUILD && USE_ITT_NOTIFY
|
#if USE_ITT_BUILD && USE_ITT_NOTIFY
|
||||||
|
@ -770,7 +772,7 @@ static void __kmp_hyper_barrier_release(
|
||||||
child_bar->b_go + KMP_BARRIER_STATE_BUMP));
|
child_bar->b_go + KMP_BARRIER_STATE_BUMP));
|
||||||
// Release child from barrier
|
// Release child from barrier
|
||||||
ANNOTATE_BARRIER_BEGIN(child_thr);
|
ANNOTATE_BARRIER_BEGIN(child_thr);
|
||||||
kmp_flag_64<> flag(&child_bar->b_go, child_thr);
|
kmp_flag_64 flag(&child_bar->b_go, child_thr);
|
||||||
flag.release();
|
flag.release();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -915,7 +917,7 @@ static void __kmp_hierarchical_barrier_gather(
|
||||||
KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting "
|
KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting "
|
||||||
"for leaf kids\n",
|
"for leaf kids\n",
|
||||||
gtid, team->t.t_id, tid));
|
gtid, team->t.t_id, tid));
|
||||||
kmp_flag_64<> flag(&thr_bar->b_arrived, leaf_state);
|
kmp_flag_64 flag(&thr_bar->b_arrived, leaf_state);
|
||||||
flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
|
flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
|
||||||
if (reduce) {
|
if (reduce) {
|
||||||
ANNOTATE_REDUCE_AFTER(reduce);
|
ANNOTATE_REDUCE_AFTER(reduce);
|
||||||
|
@ -955,7 +957,7 @@ static void __kmp_hierarchical_barrier_gather(
|
||||||
gtid, team->t.t_id, tid,
|
gtid, team->t.t_id, tid,
|
||||||
__kmp_gtid_from_tid(child_tid, team), team->t.t_id,
|
__kmp_gtid_from_tid(child_tid, team), team->t.t_id,
|
||||||
child_tid, &child_bar->b_arrived, new_state));
|
child_tid, &child_bar->b_arrived, new_state));
|
||||||
kmp_flag_64<> flag(&child_bar->b_arrived, new_state);
|
kmp_flag_64 flag(&child_bar->b_arrived, new_state);
|
||||||
flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
|
flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
|
||||||
ANNOTATE_BARRIER_END(child_thr);
|
ANNOTATE_BARRIER_END(child_thr);
|
||||||
if (reduce) {
|
if (reduce) {
|
||||||
|
@ -988,7 +990,7 @@ static void __kmp_hierarchical_barrier_gather(
|
||||||
gtid, team->t.t_id, tid,
|
gtid, team->t.t_id, tid,
|
||||||
__kmp_gtid_from_tid(child_tid, team), team->t.t_id,
|
__kmp_gtid_from_tid(child_tid, team), team->t.t_id,
|
||||||
child_tid, &child_bar->b_arrived, new_state));
|
child_tid, &child_bar->b_arrived, new_state));
|
||||||
kmp_flag_64<> flag(&child_bar->b_arrived, new_state);
|
kmp_flag_64 flag(&child_bar->b_arrived, new_state);
|
||||||
flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
|
flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
|
||||||
ANNOTATE_BARRIER_END(child_thr);
|
ANNOTATE_BARRIER_END(child_thr);
|
||||||
if (reduce) {
|
if (reduce) {
|
||||||
|
@ -1023,8 +1025,7 @@ static void __kmp_hierarchical_barrier_gather(
|
||||||
!thr_bar->use_oncore_barrier) { // Parent is waiting on my b_arrived
|
!thr_bar->use_oncore_barrier) { // Parent is waiting on my b_arrived
|
||||||
// flag; release it
|
// flag; release it
|
||||||
ANNOTATE_BARRIER_BEGIN(this_thr);
|
ANNOTATE_BARRIER_BEGIN(this_thr);
|
||||||
kmp_flag_64<> flag(&thr_bar->b_arrived,
|
kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[thr_bar->parent_tid]);
|
||||||
other_threads[thr_bar->parent_tid]);
|
|
||||||
flag.release();
|
flag.release();
|
||||||
} else {
|
} else {
|
||||||
// Leaf does special release on "offset" bits of parent's b_arrived flag
|
// Leaf does special release on "offset" bits of parent's b_arrived flag
|
||||||
|
@ -1068,7 +1069,7 @@ static void __kmp_hierarchical_barrier_release(
|
||||||
thr_bar->team == NULL) {
|
thr_bar->team == NULL) {
|
||||||
// Use traditional method of waiting on my own b_go flag
|
// Use traditional method of waiting on my own b_go flag
|
||||||
thr_bar->wait_flag = KMP_BARRIER_OWN_FLAG;
|
thr_bar->wait_flag = KMP_BARRIER_OWN_FLAG;
|
||||||
kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
|
kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
|
||||||
flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
|
flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
|
||||||
ANNOTATE_BARRIER_END(this_thr);
|
ANNOTATE_BARRIER_END(this_thr);
|
||||||
TCW_8(thr_bar->b_go,
|
TCW_8(thr_bar->b_go,
|
||||||
|
@ -1217,7 +1218,7 @@ static void __kmp_hierarchical_barrier_release(
|
||||||
child_bar->b_go + KMP_BARRIER_STATE_BUMP));
|
child_bar->b_go + KMP_BARRIER_STATE_BUMP));
|
||||||
// Release child using child's b_go flag
|
// Release child using child's b_go flag
|
||||||
ANNOTATE_BARRIER_BEGIN(child_thr);
|
ANNOTATE_BARRIER_BEGIN(child_thr);
|
||||||
kmp_flag_64<> flag(&child_bar->b_go, child_thr);
|
kmp_flag_64 flag(&child_bar->b_go, child_thr);
|
||||||
flag.release();
|
flag.release();
|
||||||
}
|
}
|
||||||
} else { // Release all children at once with leaf_state bits on my own
|
} else { // Release all children at once with leaf_state bits on my own
|
||||||
|
@ -1243,7 +1244,7 @@ static void __kmp_hierarchical_barrier_release(
|
||||||
child_bar->b_go + KMP_BARRIER_STATE_BUMP));
|
child_bar->b_go + KMP_BARRIER_STATE_BUMP));
|
||||||
// Release child using child's b_go flag
|
// Release child using child's b_go flag
|
||||||
ANNOTATE_BARRIER_BEGIN(child_thr);
|
ANNOTATE_BARRIER_BEGIN(child_thr);
|
||||||
kmp_flag_64<> flag(&child_bar->b_go, child_thr);
|
kmp_flag_64 flag(&child_bar->b_go, child_thr);
|
||||||
flag.release();
|
flag.release();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -206,13 +206,6 @@ int __kmp_display_env = FALSE;
|
||||||
int __kmp_display_env_verbose = FALSE;
|
int __kmp_display_env_verbose = FALSE;
|
||||||
int __kmp_omp_cancellation = FALSE;
|
int __kmp_omp_cancellation = FALSE;
|
||||||
|
|
||||||
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
|
|
||||||
int __kmp_user_level_mwait = FALSE;
|
|
||||||
int __kmp_umwait_enabled = FALSE;
|
|
||||||
int __kmp_mwait_enabled = FALSE;
|
|
||||||
int __kmp_mwait_hints = 0;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* map OMP 3.0 schedule types with our internal schedule types */
|
/* map OMP 3.0 schedule types with our internal schedule types */
|
||||||
enum sched_type __kmp_sch_map[kmp_sched_upper - kmp_sched_lower_ext +
|
enum sched_type __kmp_sch_map[kmp_sched_upper - kmp_sched_lower_ext +
|
||||||
kmp_sched_upper_std - kmp_sched_lower - 2] = {
|
kmp_sched_upper_std - kmp_sched_lower - 2] = {
|
||||||
|
|
|
@ -281,16 +281,6 @@ template <> struct traits_t<unsigned long long> {
|
||||||
#define __forceinline __inline
|
#define __forceinline __inline
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Check if the OS/arch can support user-level mwait */
|
|
||||||
// All mwait code tests for UMWAIT first, so it should only fall back to ring3
|
|
||||||
// MWAIT for KNL.
|
|
||||||
#define KMP_HAVE_MWAIT \
|
|
||||||
((KMP_ARCH_X86 || KMP_ARCH_X86_64) && (KMP_OS_LINUX || KMP_OS_WINDOWS) && \
|
|
||||||
!KMP_MIC2)
|
|
||||||
#define KMP_HAVE_UMWAIT \
|
|
||||||
((KMP_ARCH_X86 || KMP_ARCH_X86_64) && (KMP_OS_LINUX || KMP_OS_WINDOWS) && \
|
|
||||||
!KMP_MIC)
|
|
||||||
|
|
||||||
#if KMP_OS_WINDOWS
|
#if KMP_OS_WINDOWS
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
|
|
||||||
|
|
|
@ -5458,7 +5458,7 @@ void __kmp_free_team(kmp_root_t *root,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
// first check if thread is sleeping
|
// first check if thread is sleeping
|
||||||
kmp_flag_64<> fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
|
kmp_flag_64 fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
|
||||||
if (fl.is_sleeping())
|
if (fl.is_sleeping())
|
||||||
fl.resume(__kmp_gtid_from_thread(th));
|
fl.resume(__kmp_gtid_from_thread(th));
|
||||||
KMP_CPU_PAUSE();
|
KMP_CPU_PAUSE();
|
||||||
|
@ -5885,7 +5885,7 @@ static void __kmp_reap_thread(kmp_info_t *thread, int is_root) {
|
||||||
/* Need release fence here to prevent seg faults for tree forkjoin barrier
|
/* Need release fence here to prevent seg faults for tree forkjoin barrier
|
||||||
* (GEH) */
|
* (GEH) */
|
||||||
ANNOTATE_HAPPENS_BEFORE(thread);
|
ANNOTATE_HAPPENS_BEFORE(thread);
|
||||||
kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread);
|
kmp_flag_64 flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread);
|
||||||
__kmp_release_64(&flag);
|
__kmp_release_64(&flag);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6579,48 +6579,6 @@ static void __kmp_check_mic_type() {
|
||||||
|
|
||||||
#endif /* KMP_MIC_SUPPORTED */
|
#endif /* KMP_MIC_SUPPORTED */
|
||||||
|
|
||||||
#if KMP_HAVE_UMWAIT
|
|
||||||
static void __kmp_user_level_mwait_init() {
|
|
||||||
struct kmp_cpuid buf;
|
|
||||||
__kmp_x86_cpuid(7, 0, &buf);
|
|
||||||
__kmp_umwait_enabled = ((buf.ecx >> 5) & 1) && __kmp_user_level_mwait;
|
|
||||||
KF_TRACE(30, ("__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",
|
|
||||||
__kmp_umwait_enabled));
|
|
||||||
}
|
|
||||||
#elif KMP_HAVE_MWAIT
|
|
||||||
#ifndef AT_INTELPHIUSERMWAIT
|
|
||||||
// Spurious, non-existent value that should always fail to return anything.
|
|
||||||
// Will be replaced with the correct value when we know that.
|
|
||||||
#define AT_INTELPHIUSERMWAIT 10000
|
|
||||||
#endif
|
|
||||||
// getauxval() function is available in RHEL7 and SLES12. If a system with an
|
|
||||||
// earlier OS is used to build the RTL, we'll use the following internal
|
|
||||||
// function when the entry is not found.
|
|
||||||
unsigned long getauxval(unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;
|
|
||||||
unsigned long getauxval(unsigned long) { return 0; }
|
|
||||||
|
|
||||||
static void __kmp_user_level_mwait_init() {
|
|
||||||
// When getauxval() and correct value of AT_INTELPHIUSERMWAIT are available
|
|
||||||
// use them to find if the user-level mwait is enabled. Otherwise, forcibly
|
|
||||||
// set __kmp_mwait_enabled=TRUE on Intel MIC if the environment variable
|
|
||||||
// KMP_USER_LEVEL_MWAIT was set to TRUE.
|
|
||||||
if (__kmp_mic_type == mic3) {
|
|
||||||
unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
|
|
||||||
if ((res & 0x1) || __kmp_user_level_mwait) {
|
|
||||||
__kmp_mwait_enabled = TRUE;
|
|
||||||
if (__kmp_user_level_mwait) {
|
|
||||||
KMP_INFORM(EnvMwaitWarn);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
__kmp_mwait_enabled = FALSE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
KF_TRACE(30, ("__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
|
|
||||||
"__kmp_mwait_enabled = %d\n",
|
|
||||||
__kmp_mic_type, __kmp_mwait_enabled));
|
|
||||||
}
|
|
||||||
#endif /* KMP_HAVE_UMWAIT */
|
|
||||||
|
|
||||||
static void __kmp_do_serial_initialize(void) {
|
static void __kmp_do_serial_initialize(void) {
|
||||||
int i, gtid;
|
int i, gtid;
|
||||||
int size;
|
int size;
|
||||||
|
@ -6795,9 +6753,6 @@ static void __kmp_do_serial_initialize(void) {
|
||||||
|
|
||||||
__kmp_env_initialize(NULL);
|
__kmp_env_initialize(NULL);
|
||||||
|
|
||||||
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
|
|
||||||
__kmp_user_level_mwait_init();
|
|
||||||
#endif
|
|
||||||
// Print all messages in message catalog for testing purposes.
|
// Print all messages in message catalog for testing purposes.
|
||||||
#ifdef KMP_DEBUG
|
#ifdef KMP_DEBUG
|
||||||
char const *val = __kmp_env_get("KMP_DUMP_CATALOG");
|
char const *val = __kmp_env_get("KMP_DUMP_CATALOG");
|
||||||
|
@ -8398,8 +8353,7 @@ void __kmp_resume_if_soft_paused() {
|
||||||
for (int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
|
for (int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
|
||||||
kmp_info_t *thread = __kmp_threads[gtid];
|
kmp_info_t *thread = __kmp_threads[gtid];
|
||||||
if (thread) { // Wake it if sleeping
|
if (thread) { // Wake it if sleeping
|
||||||
kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
|
kmp_flag_64 fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread);
|
||||||
thread);
|
|
||||||
if (fl.is_sleeping())
|
if (fl.is_sleeping())
|
||||||
fl.resume(gtid);
|
fl.resume(gtid);
|
||||||
else if (__kmp_try_suspend_mx(thread)) { // got suspend lock
|
else if (__kmp_try_suspend_mx(thread)) { // got suspend lock
|
||||||
|
|
|
@ -4621,35 +4621,6 @@ static void __kmp_stg_print_task_throttling(kmp_str_buf_t *buffer,
|
||||||
__kmp_stg_print_bool(buffer, name, __kmp_enable_task_throttling);
|
__kmp_stg_print_bool(buffer, name, __kmp_enable_task_throttling);
|
||||||
} // __kmp_stg_print_task_throttling
|
} // __kmp_stg_print_task_throttling
|
||||||
|
|
||||||
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
|
|
||||||
// -----------------------------------------------------------------------------
|
|
||||||
// KMP_USER_LEVEL_MWAIT
|
|
||||||
|
|
||||||
static void __kmp_stg_parse_user_level_mwait(char const *name,
|
|
||||||
char const *value, void *data) {
|
|
||||||
__kmp_stg_parse_bool(name, value, &__kmp_user_level_mwait);
|
|
||||||
} // __kmp_stg_parse_user_level_mwait
|
|
||||||
|
|
||||||
static void __kmp_stg_print_user_level_mwait(kmp_str_buf_t *buffer,
|
|
||||||
char const *name, void *data) {
|
|
||||||
__kmp_stg_print_bool(buffer, name, __kmp_user_level_mwait);
|
|
||||||
} // __kmp_stg_print_user_level_mwait
|
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------
|
|
||||||
// KMP_MWAIT_HINTS
|
|
||||||
|
|
||||||
static void __kmp_stg_parse_mwait_hints(char const *name, char const *value,
|
|
||||||
void *data) {
|
|
||||||
__kmp_stg_parse_int(name, value, 0, INT_MAX, &__kmp_mwait_hints);
|
|
||||||
} // __kmp_stg_parse_mwait_hints
|
|
||||||
|
|
||||||
static void __kmp_stg_print_mwait_hints(kmp_str_buf_t *buffer, char const *name,
|
|
||||||
void *data) {
|
|
||||||
__kmp_stg_print_int(buffer, name, __kmp_mwait_hints);
|
|
||||||
} // __kmp_stg_print_mwait_hints
|
|
||||||
|
|
||||||
#endif // KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
|
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------
|
||||||
// OMP_DISPLAY_ENV
|
// OMP_DISPLAY_ENV
|
||||||
|
|
||||||
|
@ -4968,12 +4939,6 @@ static kmp_setting_t __kmp_stg_table[] = {
|
||||||
__kmp_stg_print_omp_tool_libraries, NULL, 0, 0},
|
__kmp_stg_print_omp_tool_libraries, NULL, 0, 0},
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
|
|
||||||
{"KMP_USER_LEVEL_MWAIT", __kmp_stg_parse_user_level_mwait,
|
|
||||||
__kmp_stg_print_user_level_mwait, NULL, 0, 0},
|
|
||||||
{"KMP_MWAIT_HINTS", __kmp_stg_parse_mwait_hints,
|
|
||||||
__kmp_stg_print_mwait_hints, NULL, 0, 0},
|
|
||||||
#endif
|
|
||||||
{"", NULL, NULL, NULL, 0, 0}}; // settings
|
{"", NULL, NULL, NULL, 0, 0}}; // settings
|
||||||
|
|
||||||
static int const __kmp_stg_count =
|
static int const __kmp_stg_count =
|
||||||
|
|
|
@ -258,7 +258,6 @@ enum stats_state_e {
|
||||||
macro(KMP_tree_release, 0, arg) \
|
macro(KMP_tree_release, 0, arg) \
|
||||||
macro(USER_resume, 0, arg) \
|
macro(USER_resume, 0, arg) \
|
||||||
macro(USER_suspend, 0, arg) \
|
macro(USER_suspend, 0, arg) \
|
||||||
macro(USER_mwait, 0, arg) \
|
|
||||||
macro(KMP_allocate_team, 0, arg) \
|
macro(KMP_allocate_team, 0, arg) \
|
||||||
macro(KMP_setup_icv_copy, 0, arg) \
|
macro(KMP_setup_icv_copy, 0, arg) \
|
||||||
macro(USER_icv_copy, 0, arg) \
|
macro(USER_icv_copy, 0, arg) \
|
||||||
|
|
|
@ -786,8 +786,7 @@ void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
|
||||||
}
|
}
|
||||||
|
|
||||||
int thread_finished = FALSE;
|
int thread_finished = FALSE;
|
||||||
kmp_flag_32<false, false> flag(
|
kmp_flag_32 flag((std::atomic<kmp_uint32> *)&node.dn.npredecessors, 0U);
|
||||||
(std::atomic<kmp_uint32> *)&node.dn.npredecessors, 0U);
|
|
||||||
while (node.dn.npredecessors > 0) {
|
while (node.dn.npredecessors > 0) {
|
||||||
flag.execute_tasks(thread, gtid, FALSE,
|
flag.execute_tasks(thread, gtid, FALSE,
|
||||||
&thread_finished USE_ITT_BUILD_ARG(NULL),
|
&thread_finished USE_ITT_BUILD_ARG(NULL),
|
||||||
|
|
|
@ -1876,10 +1876,9 @@ static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid,
|
||||||
must_wait = must_wait || (thread->th.th_task_team != NULL &&
|
must_wait = must_wait || (thread->th.th_task_team != NULL &&
|
||||||
thread->th.th_task_team->tt.tt_found_proxy_tasks);
|
thread->th.th_task_team->tt.tt_found_proxy_tasks);
|
||||||
if (must_wait) {
|
if (must_wait) {
|
||||||
kmp_flag_32<false, false> flag(
|
kmp_flag_32 flag(RCAST(std::atomic<kmp_uint32> *,
|
||||||
RCAST(std::atomic<kmp_uint32> *,
|
&(taskdata->td_incomplete_child_tasks)),
|
||||||
&(taskdata->td_incomplete_child_tasks)),
|
0U);
|
||||||
0U);
|
|
||||||
while (KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) != 0) {
|
while (KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) != 0) {
|
||||||
flag.execute_tasks(thread, gtid, FALSE,
|
flag.execute_tasks(thread, gtid, FALSE,
|
||||||
&thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
|
&thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
|
||||||
|
@ -1985,7 +1984,7 @@ kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid, int end_part) {
|
||||||
thread->th.ompt_thread_info.ompt_task_yielded = 1;
|
thread->th.ompt_thread_info.ompt_task_yielded = 1;
|
||||||
#endif
|
#endif
|
||||||
__kmp_execute_tasks_32(
|
__kmp_execute_tasks_32(
|
||||||
thread, gtid, (kmp_flag_32<> *)NULL, FALSE,
|
thread, gtid, NULL, FALSE,
|
||||||
&thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
|
&thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
|
||||||
__kmp_task_stealing_constraint);
|
__kmp_task_stealing_constraint);
|
||||||
#if OMPT_SUPPORT
|
#if OMPT_SUPPORT
|
||||||
|
@ -2513,8 +2512,8 @@ void __kmpc_end_taskgroup(ident_t *loc, int gtid) {
|
||||||
if (!taskdata->td_flags.team_serial ||
|
if (!taskdata->td_flags.team_serial ||
|
||||||
(thread->th.th_task_team != NULL &&
|
(thread->th.th_task_team != NULL &&
|
||||||
thread->th.th_task_team->tt.tt_found_proxy_tasks)) {
|
thread->th.th_task_team->tt.tt_found_proxy_tasks)) {
|
||||||
kmp_flag_32<false, false> flag(
|
kmp_flag_32 flag(RCAST(std::atomic<kmp_uint32> *, &(taskgroup->count)),
|
||||||
RCAST(std::atomic<kmp_uint32> *, &(taskgroup->count)), 0U);
|
0U);
|
||||||
while (KMP_ATOMIC_LD_ACQ(&taskgroup->count) != 0) {
|
while (KMP_ATOMIC_LD_ACQ(&taskgroup->count) != 0) {
|
||||||
flag.execute_tasks(thread, gtid, FALSE,
|
flag.execute_tasks(thread, gtid, FALSE,
|
||||||
&thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
|
&thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
|
||||||
|
@ -3022,9 +3021,8 @@ static inline int __kmp_execute_tasks_template(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <bool C, bool S>
|
|
||||||
int __kmp_execute_tasks_32(
|
int __kmp_execute_tasks_32(
|
||||||
kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32<C, S> *flag, int final_spin,
|
kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
|
||||||
int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
|
int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
|
||||||
kmp_int32 is_constrained) {
|
kmp_int32 is_constrained) {
|
||||||
return __kmp_execute_tasks_template(
|
return __kmp_execute_tasks_template(
|
||||||
|
@ -3032,9 +3030,8 @@ int __kmp_execute_tasks_32(
|
||||||
thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
|
thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <bool C, bool S>
|
|
||||||
int __kmp_execute_tasks_64(
|
int __kmp_execute_tasks_64(
|
||||||
kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64<C, S> *flag, int final_spin,
|
kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
|
||||||
int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
|
int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
|
||||||
kmp_int32 is_constrained) {
|
kmp_int32 is_constrained) {
|
||||||
return __kmp_execute_tasks_template(
|
return __kmp_execute_tasks_template(
|
||||||
|
@ -3051,23 +3048,6 @@ int __kmp_execute_tasks_oncore(
|
||||||
thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
|
thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
|
||||||
}
|
}
|
||||||
|
|
||||||
template int
|
|
||||||
__kmp_execute_tasks_32<false, false>(kmp_info_t *, kmp_int32,
|
|
||||||
kmp_flag_32<false, false> *, int,
|
|
||||||
int *USE_ITT_BUILD_ARG(void *), kmp_int32);
|
|
||||||
|
|
||||||
template int __kmp_execute_tasks_64<false, true>(kmp_info_t *, kmp_int32,
|
|
||||||
kmp_flag_64<false, true> *,
|
|
||||||
int,
|
|
||||||
int *USE_ITT_BUILD_ARG(void *),
|
|
||||||
kmp_int32);
|
|
||||||
|
|
||||||
template int __kmp_execute_tasks_64<true, false>(kmp_info_t *, kmp_int32,
|
|
||||||
kmp_flag_64<true, false> *,
|
|
||||||
int,
|
|
||||||
int *USE_ITT_BUILD_ARG(void *),
|
|
||||||
kmp_int32);
|
|
||||||
|
|
||||||
// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
|
// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
|
||||||
// next barrier so they can assist in executing enqueued tasks.
|
// next barrier so they can assist in executing enqueued tasks.
|
||||||
// First thread in allocates the task team atomically.
|
// First thread in allocates the task team atomically.
|
||||||
|
@ -3617,10 +3597,9 @@ void __kmp_task_team_wait(
|
||||||
// Worker threads may have dropped through to release phase, but could
|
// Worker threads may have dropped through to release phase, but could
|
||||||
// still be executing tasks. Wait here for tasks to complete. To avoid
|
// still be executing tasks. Wait here for tasks to complete. To avoid
|
||||||
// memory contention, only master thread checks termination condition.
|
// memory contention, only master thread checks termination condition.
|
||||||
kmp_flag_32<false, false> flag(
|
kmp_flag_32 flag(RCAST(std::atomic<kmp_uint32> *,
|
||||||
RCAST(std::atomic<kmp_uint32> *,
|
&task_team->tt.tt_unfinished_threads),
|
||||||
&task_team->tt.tt_unfinished_threads),
|
0U);
|
||||||
0U);
|
|
||||||
flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
|
flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
|
||||||
}
|
}
|
||||||
// Deactivate the old task team, so that the worker threads will stop
|
// Deactivate the old task team, so that the worker threads will stop
|
||||||
|
@ -3642,7 +3621,7 @@ void __kmp_task_team_wait(
|
||||||
}
|
}
|
||||||
|
|
||||||
// __kmp_tasking_barrier:
|
// __kmp_tasking_barrier:
|
||||||
// This routine is called only when __kmp_tasking_mode == tskm_extra_barrier.
|
// This routine may only called when __kmp_tasking_mode == tskm_extra_barrier.
|
||||||
// Internal function to execute all tasks prior to a regular barrier or a join
|
// Internal function to execute all tasks prior to a regular barrier or a join
|
||||||
// barrier. It is a full barrier itself, which unfortunately turns regular
|
// barrier. It is a full barrier itself, which unfortunately turns regular
|
||||||
// barriers into double barriers and join barriers into 1 1/2 barriers.
|
// barriers into double barriers and join barriers into 1 1/2 barriers.
|
||||||
|
@ -3656,7 +3635,7 @@ void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread, int gtid) {
|
||||||
#if USE_ITT_BUILD
|
#if USE_ITT_BUILD
|
||||||
KMP_FSYNC_SPIN_INIT(spin, NULL);
|
KMP_FSYNC_SPIN_INIT(spin, NULL);
|
||||||
#endif /* USE_ITT_BUILD */
|
#endif /* USE_ITT_BUILD */
|
||||||
kmp_flag_32<false, false> spin_flag(spin, 0U);
|
kmp_flag_32 spin_flag(spin, 0U);
|
||||||
while (!spin_flag.execute_tasks(thread, gtid, TRUE,
|
while (!spin_flag.execute_tasks(thread, gtid, TRUE,
|
||||||
&flag USE_ITT_BUILD_ARG(NULL), 0)) {
|
&flag USE_ITT_BUILD_ARG(NULL), 0)) {
|
||||||
#if USE_ITT_BUILD
|
#if USE_ITT_BUILD
|
||||||
|
|
|
@ -12,32 +12,14 @@
|
||||||
|
|
||||||
#include "kmp_wait_release.h"
|
#include "kmp_wait_release.h"
|
||||||
|
|
||||||
void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64<> *flag,
|
void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag,
|
||||||
int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
|
int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
|
||||||
if (final_spin)
|
if (final_spin)
|
||||||
__kmp_wait_template<kmp_flag_64<>, TRUE>(
|
__kmp_wait_template<kmp_flag_64, TRUE>(
|
||||||
this_thr, flag USE_ITT_BUILD_ARG(itt_sync_obj));
|
this_thr, flag USE_ITT_BUILD_ARG(itt_sync_obj));
|
||||||
else
|
else
|
||||||
__kmp_wait_template<kmp_flag_64<>, FALSE>(
|
__kmp_wait_template<kmp_flag_64, FALSE>(
|
||||||
this_thr, flag USE_ITT_BUILD_ARG(itt_sync_obj));
|
this_thr, flag USE_ITT_BUILD_ARG(itt_sync_obj));
|
||||||
}
|
}
|
||||||
|
|
||||||
void __kmp_release_64(kmp_flag_64<> *flag) { __kmp_release_template(flag); }
|
void __kmp_release_64(kmp_flag_64 *flag) { __kmp_release_template(flag); }
|
||||||
|
|
||||||
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
|
|
||||||
template <bool C, bool S>
|
|
||||||
void __kmp_mwait_32(int th_gtid, kmp_flag_32<C, S> *flag) {
|
|
||||||
__kmp_mwait_template(th_gtid, flag);
|
|
||||||
}
|
|
||||||
template <bool C, bool S>
|
|
||||||
void __kmp_mwait_64(int th_gtid, kmp_flag_64<C, S> *flag) {
|
|
||||||
__kmp_mwait_template(th_gtid, flag);
|
|
||||||
}
|
|
||||||
void __kmp_mwait_oncore(int th_gtid, kmp_flag_oncore *flag) {
|
|
||||||
__kmp_mwait_template(th_gtid, flag);
|
|
||||||
}
|
|
||||||
|
|
||||||
template void __kmp_mwait_32<false, false>(int, kmp_flag_32<false, false> *);
|
|
||||||
template void __kmp_mwait_64<false, true>(int, kmp_flag_64<false, true> *);
|
|
||||||
template void __kmp_mwait_64<true, false>(int, kmp_flag_64<true, false> *);
|
|
||||||
#endif
|
|
||||||
|
|
|
@ -42,26 +42,20 @@ enum flag_type {
|
||||||
flag_oncore /**< special 64-bit flag for on-core barrier (hierarchical) */
|
flag_oncore /**< special 64-bit flag for on-core barrier (hierarchical) */
|
||||||
};
|
};
|
||||||
|
|
||||||
struct flag_properties {
|
|
||||||
unsigned int type : 16;
|
|
||||||
unsigned int reserved : 16;
|
|
||||||
};
|
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
* Base class for wait/release volatile flag
|
* Base class for wait/release volatile flag
|
||||||
*/
|
*/
|
||||||
template <typename P> class kmp_flag_native {
|
template <typename P> class kmp_flag_native {
|
||||||
volatile P *loc;
|
volatile P *loc;
|
||||||
flag_properties t;
|
flag_type t;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
typedef P flag_t;
|
typedef P flag_t;
|
||||||
kmp_flag_native(volatile P *p, flag_type ft)
|
kmp_flag_native(volatile P *p, flag_type ft) : loc(p), t(ft) {}
|
||||||
: loc(p), t({(unsigned int)ft, 0U}) {}
|
|
||||||
volatile P *get() { return loc; }
|
volatile P *get() { return loc; }
|
||||||
void *get_void_p() { return RCAST(void *, CCAST(P *, loc)); }
|
void *get_void_p() { return RCAST(void *, CCAST(P *, loc)); }
|
||||||
void set(volatile P *new_loc) { loc = new_loc; }
|
void set(volatile P *new_loc) { loc = new_loc; }
|
||||||
flag_type get_type() { return (flag_type)(t.type); }
|
flag_type get_type() { return t; }
|
||||||
P load() { return *loc; }
|
P load() { return *loc; }
|
||||||
void store(P val) { *loc = val; }
|
void store(P val) { *loc = val; }
|
||||||
};
|
};
|
||||||
|
@ -73,12 +67,10 @@ template <typename P> class kmp_flag {
|
||||||
std::atomic<P>
|
std::atomic<P>
|
||||||
*loc; /**< Pointer to the flag storage that is modified by another thread
|
*loc; /**< Pointer to the flag storage that is modified by another thread
|
||||||
*/
|
*/
|
||||||
flag_properties t; /**< "Type" of the flag in loc */
|
flag_type t; /**< "Type" of the flag in loc */
|
||||||
|
|
||||||
public:
|
public:
|
||||||
typedef P flag_t;
|
typedef P flag_t;
|
||||||
kmp_flag(std::atomic<P> *p, flag_type ft)
|
kmp_flag(std::atomic<P> *p, flag_type ft) : loc(p), t(ft) {}
|
||||||
: loc(p), t({(unsigned int)ft, 0U}) {}
|
|
||||||
/*!
|
/*!
|
||||||
* @result the pointer to the actual flag
|
* @result the pointer to the actual flag
|
||||||
*/
|
*/
|
||||||
|
@ -94,7 +86,7 @@ public:
|
||||||
/*!
|
/*!
|
||||||
* @result the flag_type
|
* @result the flag_type
|
||||||
*/
|
*/
|
||||||
flag_type get_type() { return (flag_type)(t.type); }
|
flag_type get_type() { return t; }
|
||||||
/*!
|
/*!
|
||||||
* @result flag value
|
* @result flag value
|
||||||
*/
|
*/
|
||||||
|
@ -112,7 +104,6 @@ public:
|
||||||
bool notdone_check();
|
bool notdone_check();
|
||||||
P internal_release();
|
P internal_release();
|
||||||
void suspend(int th_gtid);
|
void suspend(int th_gtid);
|
||||||
void mwait(int th_gtid);
|
|
||||||
void resume(int th_gtid);
|
void resume(int th_gtid);
|
||||||
P set_sleeping();
|
P set_sleeping();
|
||||||
P unset_sleeping();
|
P unset_sleeping();
|
||||||
|
@ -169,8 +160,8 @@ static void __ompt_implicit_task_end(kmp_info_t *this_thr,
|
||||||
to wake it back up to prevent deadlocks!
|
to wake it back up to prevent deadlocks!
|
||||||
|
|
||||||
NOTE: We may not belong to a team at this point. */
|
NOTE: We may not belong to a team at this point. */
|
||||||
template <class C, bool final_spin, bool Cancellable = false,
|
template <class C, int final_spin, bool cancellable = false,
|
||||||
bool Sleepable = true>
|
bool sleepable = true>
|
||||||
static inline bool
|
static inline bool
|
||||||
__kmp_wait_template(kmp_info_t *this_thr,
|
__kmp_wait_template(kmp_info_t *this_thr,
|
||||||
C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
|
C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
|
||||||
|
@ -194,7 +185,7 @@ __kmp_wait_template(kmp_info_t *this_thr,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
th_gtid = this_thr->th.th_info.ds.ds_gtid;
|
th_gtid = this_thr->th.th_info.ds.ds_gtid;
|
||||||
if (Cancellable) {
|
if (cancellable) {
|
||||||
kmp_team_t *team = this_thr->th.th_team;
|
kmp_team_t *team = this_thr->th.th_team;
|
||||||
if (team && team->t.t_cancel_request == cancel_parallel)
|
if (team && team->t.t_cancel_request == cancel_parallel)
|
||||||
return true;
|
return true;
|
||||||
|
@ -384,7 +375,7 @@ final_spin=FALSE)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
// Check if the barrier surrounding this wait loop has been cancelled
|
// Check if the barrier surrounding this wait loop has been cancelled
|
||||||
if (Cancellable) {
|
if (cancellable) {
|
||||||
kmp_team_t *team = this_thr->th.th_team;
|
kmp_team_t *team = this_thr->th.th_team;
|
||||||
if (team && team->t.t_cancel_request == cancel_parallel)
|
if (team && team->t.t_cancel_request == cancel_parallel)
|
||||||
break;
|
break;
|
||||||
|
@ -409,31 +400,23 @@ final_spin=FALSE)
|
||||||
#endif
|
#endif
|
||||||
// Don't suspend if wait loop designated non-sleepable
|
// Don't suspend if wait loop designated non-sleepable
|
||||||
// in template parameters
|
// in template parameters
|
||||||
if (!Sleepable)
|
if (!sleepable)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
|
if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
|
||||||
__kmp_pause_status != kmp_soft_paused)
|
__kmp_pause_status != kmp_soft_paused)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
|
KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
|
||||||
if (__kmp_mwait_enabled || __kmp_umwait_enabled) {
|
|
||||||
KF_TRACE(50, ("__kmp_wait_sleep: T#%d using monitor/mwait\n", th_gtid));
|
|
||||||
flag->mwait(th_gtid);
|
|
||||||
} else {
|
|
||||||
#endif
|
|
||||||
KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
|
|
||||||
#if KMP_OS_UNIX
|
#if KMP_OS_UNIX
|
||||||
if (final_spin)
|
if (final_spin)
|
||||||
KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
|
KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
|
||||||
#endif
|
#endif
|
||||||
flag->suspend(th_gtid);
|
flag->suspend(th_gtid);
|
||||||
#if KMP_OS_UNIX
|
#if KMP_OS_UNIX
|
||||||
if (final_spin)
|
if (final_spin)
|
||||||
KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
|
KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
|
||||||
#endif
|
|
||||||
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (TCR_4(__kmp_global.g.g_done)) {
|
if (TCR_4(__kmp_global.g.g_done)) {
|
||||||
|
@ -475,7 +458,7 @@ final_spin=FALSE)
|
||||||
KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
|
KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
|
||||||
#endif
|
#endif
|
||||||
KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
|
KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
|
||||||
if (Cancellable) {
|
if (cancellable) {
|
||||||
kmp_team_t *team = this_thr->th.th_team;
|
kmp_team_t *team = this_thr->th.th_team;
|
||||||
if (team && team->t.t_cancel_request == cancel_parallel) {
|
if (team && team->t.t_cancel_request == cancel_parallel) {
|
||||||
if (tasks_completed) {
|
if (tasks_completed) {
|
||||||
|
@ -492,83 +475,6 @@ final_spin=FALSE)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
|
|
||||||
// Set up a monitor on the flag variable causing the calling thread to wait in
|
|
||||||
// a less active state until the flag variable is modified.
|
|
||||||
template <class C>
|
|
||||||
static inline void __kmp_mwait_template(int th_gtid, C *flag) {
|
|
||||||
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_mwait);
|
|
||||||
kmp_info_t *th = __kmp_threads[th_gtid];
|
|
||||||
|
|
||||||
KF_TRACE(30, ("__kmp_mwait_template: T#%d enter for flag = %p\n", th_gtid,
|
|
||||||
flag->get()));
|
|
||||||
|
|
||||||
// User-level mwait is available
|
|
||||||
KMP_DEBUG_ASSERT(__kmp_mwait_enabled || __kmp_umwait_enabled);
|
|
||||||
|
|
||||||
__kmp_suspend_initialize_thread(th);
|
|
||||||
__kmp_lock_suspend_mx(th);
|
|
||||||
|
|
||||||
volatile void *spin = flag->get();
|
|
||||||
void *cacheline = (void *)(kmp_uint64(spin) & ~(CACHE_LINE - 1));
|
|
||||||
|
|
||||||
if (!flag->done_check()) {
|
|
||||||
// Mark thread as no longer active
|
|
||||||
th->th.th_active = FALSE;
|
|
||||||
if (th->th.th_active_in_pool) {
|
|
||||||
th->th.th_active_in_pool = FALSE;
|
|
||||||
KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
|
|
||||||
KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
|
|
||||||
}
|
|
||||||
flag->set_sleeping();
|
|
||||||
KF_TRACE(50, ("__kmp_mwait_template: T#%d calling monitor\n", th_gtid));
|
|
||||||
#if KMP_HAVE_UMWAIT
|
|
||||||
if (__kmp_umwait_enabled) {
|
|
||||||
__kmp_umonitor(cacheline);
|
|
||||||
}
|
|
||||||
#elif KMP_HAVE_MWAIT
|
|
||||||
if (__kmp_mwait_enabled) {
|
|
||||||
__kmp_mm_monitor(cacheline, 0, 0);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
// To avoid a race, check flag between 'monitor' and 'mwait'. A write to
|
|
||||||
// the address could happen after the last time we checked and before
|
|
||||||
// monitoring started, in which case monitor can't detect the change.
|
|
||||||
if (flag->done_check())
|
|
||||||
flag->unset_sleeping();
|
|
||||||
else {
|
|
||||||
// if flag changes here, wake-up happens immediately
|
|
||||||
TCW_PTR(th->th.th_sleep_loc, (void *)flag);
|
|
||||||
__kmp_unlock_suspend_mx(th);
|
|
||||||
KF_TRACE(50, ("__kmp_mwait_template: T#%d calling mwait\n", th_gtid));
|
|
||||||
#if KMP_HAVE_UMWAIT
|
|
||||||
if (__kmp_umwait_enabled) {
|
|
||||||
__kmp_umwait(1, 100); // to do: enable ctrl via hints, backoff counter
|
|
||||||
}
|
|
||||||
#elif KMP_HAVE_MWAIT
|
|
||||||
if (__kmp_mwait_enabled) {
|
|
||||||
__kmp_mm_mwait(0, __kmp_mwait_hints);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
KF_TRACE(50, ("__kmp_mwait_template: T#%d mwait done\n", th_gtid));
|
|
||||||
__kmp_lock_suspend_mx(th);
|
|
||||||
// Clean up sleep info; doesn't matter how/why this thread stopped waiting
|
|
||||||
if (flag->is_sleeping())
|
|
||||||
flag->unset_sleeping();
|
|
||||||
TCW_PTR(th->th.th_sleep_loc, NULL);
|
|
||||||
}
|
|
||||||
// Mark thread as active again
|
|
||||||
th->th.th_active = TRUE;
|
|
||||||
if (TCR_4(th->th.th_in_pool)) {
|
|
||||||
KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
|
|
||||||
th->th.th_active_in_pool = TRUE;
|
|
||||||
}
|
|
||||||
} // Drop out to main wait loop to check flag, handle tasks, etc.
|
|
||||||
__kmp_unlock_suspend_mx(th);
|
|
||||||
KF_TRACE(30, ("__kmp_mwait_template: T#%d exit\n", th_gtid));
|
|
||||||
}
|
|
||||||
#endif // KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
|
|
||||||
|
|
||||||
/* Release any threads specified as waiting on the flag by releasing the flag
|
/* Release any threads specified as waiting on the flag by releasing the flag
|
||||||
and resume the waiting thread if indicated by the sleep bit(s). A thread that
|
and resume the waiting thread if indicated by the sleep bit(s). A thread that
|
||||||
calls __kmp_wait_template must call this function to wake up the potentially
|
calls __kmp_wait_template must call this function to wake up the potentially
|
||||||
|
@ -639,7 +545,7 @@ template <> struct flag_traits<kmp_uint64> {
|
||||||
};
|
};
|
||||||
|
|
||||||
// Basic flag that does not use C11 Atomics
|
// Basic flag that does not use C11 Atomics
|
||||||
template <typename FlagType, bool Sleepable>
|
template <typename FlagType>
|
||||||
class kmp_basic_flag_native : public kmp_flag_native<FlagType> {
|
class kmp_basic_flag_native : public kmp_flag_native<FlagType> {
|
||||||
typedef flag_traits<FlagType> traits_type;
|
typedef flag_traits<FlagType> traits_type;
|
||||||
FlagType checker; /**< Value to compare flag to to check if flag has been
|
FlagType checker; /**< Value to compare flag to to check if flag has been
|
||||||
|
@ -682,13 +588,7 @@ public:
|
||||||
/*!
|
/*!
|
||||||
* @result true if the flag object has been released.
|
* @result true if the flag object has been released.
|
||||||
*/
|
*/
|
||||||
bool done_check() {
|
bool done_check() { return traits_type::tcr(*(this->get())) == checker; }
|
||||||
if (Sleepable)
|
|
||||||
return (traits_type::tcr(*(this->get())) & ~KMP_BARRIER_SLEEP_STATE) ==
|
|
||||||
checker;
|
|
||||||
else
|
|
||||||
return traits_type::tcr(*(this->get())) == checker;
|
|
||||||
}
|
|
||||||
/*!
|
/*!
|
||||||
* @param old_loc in old value of flag
|
* @param old_loc in old value of flag
|
||||||
* @result true if the flag's old value indicates it was released.
|
* @result true if the flag's old value indicates it was released.
|
||||||
|
@ -743,8 +643,7 @@ public:
|
||||||
enum barrier_type get_bt() { return bs_last_barrier; }
|
enum barrier_type get_bt() { return bs_last_barrier; }
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename FlagType, bool Sleepable>
|
template <typename FlagType> class kmp_basic_flag : public kmp_flag<FlagType> {
|
||||||
class kmp_basic_flag : public kmp_flag<FlagType> {
|
|
||||||
typedef flag_traits<FlagType> traits_type;
|
typedef flag_traits<FlagType> traits_type;
|
||||||
FlagType checker; /**< Value to compare flag to to check if flag has been
|
FlagType checker; /**< Value to compare flag to to check if flag has been
|
||||||
released. */
|
released. */
|
||||||
|
@ -786,12 +685,7 @@ public:
|
||||||
/*!
|
/*!
|
||||||
* @result true if the flag object has been released.
|
* @result true if the flag object has been released.
|
||||||
*/
|
*/
|
||||||
bool done_check() {
|
bool done_check() { return this->load() == checker; }
|
||||||
if (Sleepable)
|
|
||||||
return (this->load() & ~KMP_BARRIER_SLEEP_STATE) == checker;
|
|
||||||
else
|
|
||||||
return this->load() == checker;
|
|
||||||
}
|
|
||||||
/*!
|
/*!
|
||||||
* @param old_loc in old value of flag
|
* @param old_loc in old value of flag
|
||||||
* @result true if the flag's old value indicates it was released.
|
* @result true if the flag's old value indicates it was released.
|
||||||
|
@ -842,19 +736,14 @@ public:
|
||||||
enum barrier_type get_bt() { return bs_last_barrier; }
|
enum barrier_type get_bt() { return bs_last_barrier; }
|
||||||
};
|
};
|
||||||
|
|
||||||
template <bool Cancellable, bool Sleepable>
|
class kmp_flag_32 : public kmp_basic_flag<kmp_uint32> {
|
||||||
class kmp_flag_32 : public kmp_basic_flag<kmp_uint32, Sleepable> {
|
|
||||||
public:
|
public:
|
||||||
kmp_flag_32(std::atomic<kmp_uint32> *p)
|
kmp_flag_32(std::atomic<kmp_uint32> *p) : kmp_basic_flag<kmp_uint32>(p) {}
|
||||||
: kmp_basic_flag<kmp_uint32, Sleepable>(p) {}
|
|
||||||
kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
|
kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
|
||||||
: kmp_basic_flag<kmp_uint32, Sleepable>(p, thr) {}
|
: kmp_basic_flag<kmp_uint32>(p, thr) {}
|
||||||
kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
|
kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
|
||||||
: kmp_basic_flag<kmp_uint32, Sleepable>(p, c) {}
|
: kmp_basic_flag<kmp_uint32>(p, c) {}
|
||||||
void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
|
void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
|
||||||
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
|
|
||||||
void mwait(int th_gtid) { __kmp_mwait_32(th_gtid, this); }
|
|
||||||
#endif
|
|
||||||
void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
|
void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
|
||||||
int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
|
int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
|
||||||
int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
|
int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
|
||||||
|
@ -863,32 +752,27 @@ public:
|
||||||
this_thr, gtid, this, final_spin,
|
this_thr, gtid, this, final_spin,
|
||||||
thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
|
thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
|
||||||
}
|
}
|
||||||
bool wait(kmp_info_t *this_thr,
|
void wait(kmp_info_t *this_thr,
|
||||||
int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
|
int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
|
||||||
if (final_spin)
|
if (final_spin)
|
||||||
return __kmp_wait_template<kmp_flag_32, TRUE, Cancellable, Sleepable>(
|
__kmp_wait_template<kmp_flag_32, TRUE>(
|
||||||
this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
|
this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
|
||||||
else
|
else
|
||||||
return __kmp_wait_template<kmp_flag_32, FALSE, Cancellable, Sleepable>(
|
__kmp_wait_template<kmp_flag_32, FALSE>(
|
||||||
this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
|
this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
|
||||||
}
|
}
|
||||||
void release() { __kmp_release_template(this); }
|
void release() { __kmp_release_template(this); }
|
||||||
flag_type get_ptr_type() { return flag32; }
|
flag_type get_ptr_type() { return flag32; }
|
||||||
};
|
};
|
||||||
|
|
||||||
template <bool Cancellable, bool Sleepable>
|
class kmp_flag_64 : public kmp_basic_flag_native<kmp_uint64> {
|
||||||
class kmp_flag_64 : public kmp_basic_flag_native<kmp_uint64, Sleepable> {
|
|
||||||
public:
|
public:
|
||||||
kmp_flag_64(volatile kmp_uint64 *p)
|
kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag_native<kmp_uint64>(p) {}
|
||||||
: kmp_basic_flag_native<kmp_uint64, Sleepable>(p) {}
|
|
||||||
kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
|
kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
|
||||||
: kmp_basic_flag_native<kmp_uint64, Sleepable>(p, thr) {}
|
: kmp_basic_flag_native<kmp_uint64>(p, thr) {}
|
||||||
kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
|
kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
|
||||||
: kmp_basic_flag_native<kmp_uint64, Sleepable>(p, c) {}
|
: kmp_basic_flag_native<kmp_uint64>(p, c) {}
|
||||||
void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
|
void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
|
||||||
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
|
|
||||||
void mwait(int th_gtid) { __kmp_mwait_64(th_gtid, this); }
|
|
||||||
#endif
|
|
||||||
void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
|
void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
|
||||||
int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
|
int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
|
||||||
int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
|
int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
|
||||||
|
@ -897,15 +781,27 @@ public:
|
||||||
this_thr, gtid, this, final_spin,
|
this_thr, gtid, this, final_spin,
|
||||||
thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
|
thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
|
||||||
}
|
}
|
||||||
bool wait(kmp_info_t *this_thr,
|
void wait(kmp_info_t *this_thr,
|
||||||
int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
|
int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
|
||||||
if (final_spin)
|
if (final_spin)
|
||||||
return __kmp_wait_template<kmp_flag_64, TRUE, Cancellable, Sleepable>(
|
__kmp_wait_template<kmp_flag_64, TRUE>(
|
||||||
this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
|
this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
|
||||||
else
|
else
|
||||||
return __kmp_wait_template<kmp_flag_64, FALSE, Cancellable, Sleepable>(
|
__kmp_wait_template<kmp_flag_64, FALSE>(
|
||||||
this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
|
this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
|
||||||
}
|
}
|
||||||
|
bool wait_cancellable_nosleep(kmp_info_t *this_thr,
|
||||||
|
int final_spin
|
||||||
|
USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
|
||||||
|
bool retval = false;
|
||||||
|
if (final_spin)
|
||||||
|
retval = __kmp_wait_template<kmp_flag_64, TRUE, true, false>(
|
||||||
|
this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
|
||||||
|
else
|
||||||
|
retval = __kmp_wait_template<kmp_flag_64, FALSE, true, false>(
|
||||||
|
this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
void release() { __kmp_release_template(this); }
|
void release() { __kmp_release_template(this); }
|
||||||
flag_type get_ptr_type() { return flag64; }
|
flag_type get_ptr_type() { return flag64; }
|
||||||
};
|
};
|
||||||
|
@ -963,8 +859,8 @@ public:
|
||||||
return true;
|
return true;
|
||||||
else if (flag_switch) {
|
else if (flag_switch) {
|
||||||
this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
|
this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
|
||||||
kmp_flag_64<> flag(&this_thr->th.th_bar[bt].bb.b_go,
|
kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go,
|
||||||
(kmp_uint64)KMP_BARRIER_STATE_BUMP);
|
(kmp_uint64)KMP_BARRIER_STATE_BUMP);
|
||||||
__kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
|
__kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
@ -1000,9 +896,6 @@ public:
|
||||||
}
|
}
|
||||||
void release() { __kmp_release_template(this); }
|
void release() { __kmp_release_template(this); }
|
||||||
void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
|
void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
|
||||||
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
|
|
||||||
void mwait(int th_gtid) { __kmp_mwait_oncore(th_gtid, this); }
|
|
||||||
#endif
|
|
||||||
void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
|
void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
|
||||||
int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
|
int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
|
||||||
int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
|
int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
|
||||||
|
@ -1022,15 +915,15 @@ static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
|
||||||
if (!flag)
|
if (!flag)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
switch (RCAST(kmp_flag_64<> *, CCAST(void *, flag))->get_type()) {
|
switch (RCAST(kmp_flag_64 *, CCAST(void *, flag))->get_type()) {
|
||||||
case flag32:
|
case flag32:
|
||||||
__kmp_resume_32(gtid, (kmp_flag_32<> *)NULL);
|
__kmp_resume_32(gtid, NULL);
|
||||||
break;
|
break;
|
||||||
case flag64:
|
case flag64:
|
||||||
__kmp_resume_64(gtid, (kmp_flag_64<> *)NULL);
|
__kmp_resume_64(gtid, NULL);
|
||||||
break;
|
break;
|
||||||
case flag_oncore:
|
case flag_oncore:
|
||||||
__kmp_resume_oncore(gtid, (kmp_flag_oncore *)NULL);
|
__kmp_resume_oncore(gtid, NULL);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1459,7 +1459,8 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
|
||||||
|
|
||||||
__kmp_suspend_initialize_thread(th);
|
__kmp_suspend_initialize_thread(th);
|
||||||
|
|
||||||
__kmp_lock_suspend_mx(th);
|
status = pthread_mutex_lock(&th->th.th_suspend_mx.m_mutex);
|
||||||
|
KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
|
||||||
|
|
||||||
KF_TRACE(10, ("__kmp_suspend_template: T#%d setting sleep bit for spin(%p)\n",
|
KF_TRACE(10, ("__kmp_suspend_template: T#%d setting sleep bit for spin(%p)\n",
|
||||||
th_gtid, flag->get()));
|
th_gtid, flag->get()));
|
||||||
|
@ -1470,7 +1471,8 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
|
||||||
if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
|
if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
|
||||||
__kmp_pause_status != kmp_soft_paused) {
|
__kmp_pause_status != kmp_soft_paused) {
|
||||||
flag->unset_sleeping();
|
flag->unset_sleeping();
|
||||||
__kmp_unlock_suspend_mx(th);
|
status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex);
|
||||||
|
KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
KF_TRACE(5, ("__kmp_suspend_template: T#%d set sleep bit for spin(%p)==%x,"
|
KF_TRACE(5, ("__kmp_suspend_template: T#%d set sleep bit for spin(%p)==%x,"
|
||||||
|
@ -1533,7 +1535,7 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
|
||||||
th_gtid));
|
th_gtid));
|
||||||
status = pthread_cond_wait(&th->th.th_suspend_cv.c_cond,
|
status = pthread_cond_wait(&th->th.th_suspend_cv.c_cond,
|
||||||
&th->th.th_suspend_mx.m_mutex);
|
&th->th.th_suspend_mx.m_mutex);
|
||||||
#endif // USE_SUSPEND_TIMEOUT
|
#endif
|
||||||
|
|
||||||
if ((status != 0) && (status != EINTR) && (status != ETIMEDOUT)) {
|
if ((status != 0) && (status != EINTR) && (status != ETIMEDOUT)) {
|
||||||
KMP_SYSFAIL("pthread_cond_wait", status);
|
KMP_SYSFAIL("pthread_cond_wait", status);
|
||||||
|
@ -1573,26 +1575,21 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
__kmp_unlock_suspend_mx(th);
|
status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex);
|
||||||
|
KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
|
||||||
KF_TRACE(30, ("__kmp_suspend_template: T#%d exit\n", th_gtid));
|
KF_TRACE(30, ("__kmp_suspend_template: T#%d exit\n", th_gtid));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <bool C, bool S>
|
void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag) {
|
||||||
void __kmp_suspend_32(int th_gtid, kmp_flag_32<C, S> *flag) {
|
|
||||||
__kmp_suspend_template(th_gtid, flag);
|
__kmp_suspend_template(th_gtid, flag);
|
||||||
}
|
}
|
||||||
template <bool C, bool S>
|
void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag) {
|
||||||
void __kmp_suspend_64(int th_gtid, kmp_flag_64<C, S> *flag) {
|
|
||||||
__kmp_suspend_template(th_gtid, flag);
|
__kmp_suspend_template(th_gtid, flag);
|
||||||
}
|
}
|
||||||
void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {
|
void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {
|
||||||
__kmp_suspend_template(th_gtid, flag);
|
__kmp_suspend_template(th_gtid, flag);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void __kmp_suspend_32<false, false>(int, kmp_flag_32<false, false> *);
|
|
||||||
template void __kmp_suspend_64<false, true>(int, kmp_flag_64<false, true> *);
|
|
||||||
template void __kmp_suspend_64<true, false>(int, kmp_flag_64<true, false> *);
|
|
||||||
|
|
||||||
/* This routine signals the thread specified by target_gtid to wake up
|
/* This routine signals the thread specified by target_gtid to wake up
|
||||||
after setting the sleep bit indicated by the flag argument to FALSE.
|
after setting the sleep bit indicated by the flag argument to FALSE.
|
||||||
The target thread must already have called __kmp_suspend_template() */
|
The target thread must already have called __kmp_suspend_template() */
|
||||||
|
@ -1611,7 +1608,9 @@ static inline void __kmp_resume_template(int target_gtid, C *flag) {
|
||||||
KMP_DEBUG_ASSERT(gtid != target_gtid);
|
KMP_DEBUG_ASSERT(gtid != target_gtid);
|
||||||
|
|
||||||
__kmp_suspend_initialize_thread(th);
|
__kmp_suspend_initialize_thread(th);
|
||||||
__kmp_lock_suspend_mx(th);
|
|
||||||
|
status = pthread_mutex_lock(&th->th.th_suspend_mx.m_mutex);
|
||||||
|
KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
|
||||||
|
|
||||||
if (!flag) { // coming from __kmp_null_resume_wrapper
|
if (!flag) { // coming from __kmp_null_resume_wrapper
|
||||||
flag = (C *)CCAST(void *, th->th.th_sleep_loc);
|
flag = (C *)CCAST(void *, th->th.th_sleep_loc);
|
||||||
|
@ -1620,11 +1619,13 @@ static inline void __kmp_resume_template(int target_gtid, C *flag) {
|
||||||
// First, check if the flag is null or its type has changed. If so, someone
|
// First, check if the flag is null or its type has changed. If so, someone
|
||||||
// else woke it up.
|
// else woke it up.
|
||||||
if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type
|
if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type
|
||||||
// simply shows what flag was cast to
|
// simply shows what
|
||||||
|
// flag was cast to
|
||||||
KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
|
KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
|
||||||
"awake: flag(%p)\n",
|
"awake: flag(%p)\n",
|
||||||
gtid, target_gtid, NULL));
|
gtid, target_gtid, NULL));
|
||||||
__kmp_unlock_suspend_mx(th);
|
status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex);
|
||||||
|
KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
|
||||||
return;
|
return;
|
||||||
} else { // if multiple threads are sleeping, flag should be internally
|
} else { // if multiple threads are sleeping, flag should be internally
|
||||||
// referring to a specific thread here
|
// referring to a specific thread here
|
||||||
|
@ -1634,7 +1635,8 @@ static inline void __kmp_resume_template(int target_gtid, C *flag) {
|
||||||
"awake: flag(%p): "
|
"awake: flag(%p): "
|
||||||
"%u => %u\n",
|
"%u => %u\n",
|
||||||
gtid, target_gtid, flag->get(), old_spin, flag->load()));
|
gtid, target_gtid, flag->get(), old_spin, flag->load()));
|
||||||
__kmp_unlock_suspend_mx(th);
|
status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex);
|
||||||
|
KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
KF_TRACE(5, ("__kmp_resume_template: T#%d about to wakeup T#%d, reset "
|
KF_TRACE(5, ("__kmp_resume_template: T#%d about to wakeup T#%d, reset "
|
||||||
|
@ -1654,27 +1656,23 @@ static inline void __kmp_resume_template(int target_gtid, C *flag) {
|
||||||
#endif
|
#endif
|
||||||
status = pthread_cond_signal(&th->th.th_suspend_cv.c_cond);
|
status = pthread_cond_signal(&th->th.th_suspend_cv.c_cond);
|
||||||
KMP_CHECK_SYSFAIL("pthread_cond_signal", status);
|
KMP_CHECK_SYSFAIL("pthread_cond_signal", status);
|
||||||
__kmp_unlock_suspend_mx(th);
|
status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex);
|
||||||
|
KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
|
||||||
KF_TRACE(30, ("__kmp_resume_template: T#%d exiting after signaling wake up"
|
KF_TRACE(30, ("__kmp_resume_template: T#%d exiting after signaling wake up"
|
||||||
" for T#%d\n",
|
" for T#%d\n",
|
||||||
gtid, target_gtid));
|
gtid, target_gtid));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <bool C, bool S>
|
void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag) {
|
||||||
void __kmp_resume_32(int target_gtid, kmp_flag_32<C, S> *flag) {
|
|
||||||
__kmp_resume_template(target_gtid, flag);
|
__kmp_resume_template(target_gtid, flag);
|
||||||
}
|
}
|
||||||
template <bool C, bool S>
|
void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag) {
|
||||||
void __kmp_resume_64(int target_gtid, kmp_flag_64<C, S> *flag) {
|
|
||||||
__kmp_resume_template(target_gtid, flag);
|
__kmp_resume_template(target_gtid, flag);
|
||||||
}
|
}
|
||||||
void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) {
|
void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) {
|
||||||
__kmp_resume_template(target_gtid, flag);
|
__kmp_resume_template(target_gtid, flag);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void __kmp_resume_32<false, true>(int, kmp_flag_32<false, true> *);
|
|
||||||
template void __kmp_resume_64<false, true>(int, kmp_flag_64<false, true> *);
|
|
||||||
|
|
||||||
#if KMP_USE_MONITOR
|
#if KMP_USE_MONITOR
|
||||||
void __kmp_resume_monitor() {
|
void __kmp_resume_monitor() {
|
||||||
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume);
|
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume);
|
||||||
|
|
|
@ -363,7 +363,7 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
|
||||||
th_gtid, flag->get()));
|
th_gtid, flag->get()));
|
||||||
|
|
||||||
__kmp_suspend_initialize_thread(th);
|
__kmp_suspend_initialize_thread(th);
|
||||||
__kmp_lock_suspend_mx(th);
|
__kmp_win32_mutex_lock(&th->th.th_suspend_mx);
|
||||||
|
|
||||||
KF_TRACE(10, ("__kmp_suspend_template: T#%d setting sleep bit for flag's"
|
KF_TRACE(10, ("__kmp_suspend_template: T#%d setting sleep bit for flag's"
|
||||||
" loc(%p)\n",
|
" loc(%p)\n",
|
||||||
|
@ -375,7 +375,7 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
|
||||||
if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
|
if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
|
||||||
__kmp_pause_status != kmp_soft_paused) {
|
__kmp_pause_status != kmp_soft_paused) {
|
||||||
flag->unset_sleeping();
|
flag->unset_sleeping();
|
||||||
__kmp_unlock_suspend_mx(th);
|
__kmp_win32_mutex_unlock(&th->th.th_suspend_mx);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -437,26 +437,21 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__kmp_unlock_suspend_mx(th);
|
__kmp_win32_mutex_unlock(&th->th.th_suspend_mx);
|
||||||
|
|
||||||
KF_TRACE(30, ("__kmp_suspend_template: T#%d exit\n", th_gtid));
|
KF_TRACE(30, ("__kmp_suspend_template: T#%d exit\n", th_gtid));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <bool C, bool S>
|
void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag) {
|
||||||
void __kmp_suspend_32(int th_gtid, kmp_flag_32<C, S> *flag) {
|
|
||||||
__kmp_suspend_template(th_gtid, flag);
|
__kmp_suspend_template(th_gtid, flag);
|
||||||
}
|
}
|
||||||
template <bool C, bool S>
|
void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag) {
|
||||||
void __kmp_suspend_64(int th_gtid, kmp_flag_64<C, S> *flag) {
|
|
||||||
__kmp_suspend_template(th_gtid, flag);
|
__kmp_suspend_template(th_gtid, flag);
|
||||||
}
|
}
|
||||||
void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {
|
void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {
|
||||||
__kmp_suspend_template(th_gtid, flag);
|
__kmp_suspend_template(th_gtid, flag);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void __kmp_suspend_32<false, false>(int, kmp_flag_32<false, false> *);
|
|
||||||
template void __kmp_suspend_64<false, true>(int, kmp_flag_64<false, true> *);
|
|
||||||
template void __kmp_suspend_64<true, false>(int, kmp_flag_64<true, false> *);
|
|
||||||
|
|
||||||
/* This routine signals the thread specified by target_gtid to wake up
|
/* This routine signals the thread specified by target_gtid to wake up
|
||||||
after setting the sleep bit indicated by the flag argument to FALSE */
|
after setting the sleep bit indicated by the flag argument to FALSE */
|
||||||
template <class C>
|
template <class C>
|
||||||
|
@ -472,7 +467,7 @@ static inline void __kmp_resume_template(int target_gtid, C *flag) {
|
||||||
gtid, target_gtid));
|
gtid, target_gtid));
|
||||||
|
|
||||||
__kmp_suspend_initialize_thread(th);
|
__kmp_suspend_initialize_thread(th);
|
||||||
__kmp_lock_suspend_mx(th);
|
__kmp_win32_mutex_lock(&th->th.th_suspend_mx);
|
||||||
|
|
||||||
if (!flag) { // coming from __kmp_null_resume_wrapper
|
if (!flag) { // coming from __kmp_null_resume_wrapper
|
||||||
flag = (C *)th->th.th_sleep_loc;
|
flag = (C *)th->th.th_sleep_loc;
|
||||||
|
@ -486,7 +481,7 @@ static inline void __kmp_resume_template(int target_gtid, C *flag) {
|
||||||
KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
|
KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
|
||||||
"awake: flag's loc(%p)\n",
|
"awake: flag's loc(%p)\n",
|
||||||
gtid, target_gtid, NULL));
|
gtid, target_gtid, NULL));
|
||||||
__kmp_unlock_suspend_mx(th);
|
__kmp_win32_mutex_unlock(&th->th.th_suspend_mx);
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
typename C::flag_t old_spin = flag->unset_sleeping();
|
typename C::flag_t old_spin = flag->unset_sleeping();
|
||||||
|
@ -494,7 +489,7 @@ static inline void __kmp_resume_template(int target_gtid, C *flag) {
|
||||||
KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
|
KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
|
||||||
"awake: flag's loc(%p): %u => %u\n",
|
"awake: flag's loc(%p): %u => %u\n",
|
||||||
gtid, target_gtid, flag->get(), old_spin, *(flag->get())));
|
gtid, target_gtid, flag->get(), old_spin, *(flag->get())));
|
||||||
__kmp_unlock_suspend_mx(th);
|
__kmp_win32_mutex_unlock(&th->th.th_suspend_mx);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -504,28 +499,23 @@ static inline void __kmp_resume_template(int target_gtid, C *flag) {
|
||||||
gtid, target_gtid, flag->get()));
|
gtid, target_gtid, flag->get()));
|
||||||
|
|
||||||
__kmp_win32_cond_signal(&th->th.th_suspend_cv);
|
__kmp_win32_cond_signal(&th->th.th_suspend_cv);
|
||||||
__kmp_unlock_suspend_mx(th);
|
__kmp_win32_mutex_unlock(&th->th.th_suspend_mx);
|
||||||
|
|
||||||
KF_TRACE(30, ("__kmp_resume_template: T#%d exiting after signaling wake up"
|
KF_TRACE(30, ("__kmp_resume_template: T#%d exiting after signaling wake up"
|
||||||
" for T#%d\n",
|
" for T#%d\n",
|
||||||
gtid, target_gtid));
|
gtid, target_gtid));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <bool C, bool S>
|
void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag) {
|
||||||
void __kmp_resume_32(int target_gtid, kmp_flag_32<C, S> *flag) {
|
|
||||||
__kmp_resume_template(target_gtid, flag);
|
__kmp_resume_template(target_gtid, flag);
|
||||||
}
|
}
|
||||||
template <bool C, bool S>
|
void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag) {
|
||||||
void __kmp_resume_64(int target_gtid, kmp_flag_64<C, S> *flag) {
|
|
||||||
__kmp_resume_template(target_gtid, flag);
|
__kmp_resume_template(target_gtid, flag);
|
||||||
}
|
}
|
||||||
void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) {
|
void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) {
|
||||||
__kmp_resume_template(target_gtid, flag);
|
__kmp_resume_template(target_gtid, flag);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void __kmp_resume_32<false, true>(int, kmp_flag_32<false, true> *);
|
|
||||||
template void __kmp_resume_64<false, true>(int, kmp_flag_64<false, true> *);
|
|
||||||
|
|
||||||
void __kmp_yield() { Sleep(0); }
|
void __kmp_yield() { Sleep(0); }
|
||||||
|
|
||||||
void __kmp_gtid_set_specific(int gtid) {
|
void __kmp_gtid_set_specific(int gtid) {
|
||||||
|
|
Loading…
Reference in New Issue